Last active
July 6, 2019 16:12
-
-
Save integeruser/a7a4b8dfceef1ad9ded0c9cf7c88c1d9 to your computer and use it in GitHub Desktop.
Benchmarking different ways of reading lines from a file
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#include <algorithm> | |
#include <cassert> | |
#include <cstddef> | |
#include <cstdint> | |
#include <cstdlib> | |
#include <chrono> | |
#include <fstream> | |
#include <iomanip> | |
#include <iostream> | |
#include <string> | |
#include <vector> | |
#include <fcntl.h> | |
#include <stdio.h> | |
#include <string.h> | |
#include <unistd.h> | |
/* $(CXX) -std=c++11 -march=native -O3 -o readlines readlines.cpp */ | |
#define BUF_SIZE 1024*16 | |
using hrc = std::chrono::high_resolution_clock; | |
void test00(const char* filename) | |
{ | |
size_t num_lines = 0; | |
char buf[BUF_SIZE]; | |
FILE* fp = fopen(filename, "r"); | |
hrc::time_point start_time = hrc::now(); | |
while (fgets(buf, BUF_SIZE, fp)) { | |
++num_lines; | |
} | |
hrc::time_point end_time = hrc::now(); | |
fclose(fp); | |
std::chrono::duration<float> duration = end_time-start_time; | |
std::cout << __func__ << "(fgets): " << num_lines << " lines / " << | |
std::fixed << std::setprecision(3) << duration.count() << " s" << std::endl; | |
} | |
void test01(const char* filename) | |
{ | |
size_t num_lines = 0; | |
std::ifstream file(filename); | |
std::string line; | |
hrc::time_point start_time = hrc::now(); | |
while (std::getline(file, line)) { | |
++num_lines; | |
} | |
hrc::time_point end_time = hrc::now(); | |
file.close(); | |
std::chrono::duration<float> duration = end_time-start_time; | |
std::cout << __func__ << "(getline): " << num_lines << " lines / " << | |
std::fixed << std::setprecision(3) << duration.count() << " s" << std::endl; | |
} | |
void test02(const char* filename) | |
{ | |
size_t num_lines = 0; | |
char buf[BUF_SIZE]; | |
std::ifstream file(filename); | |
hrc::time_point start_time = hrc::now(); | |
while (file) { | |
file.read(buf, BUF_SIZE); | |
const ssize_t bytes_read = file.gcount(); | |
char* line = buf; | |
char* newline; | |
while ((newline = (char*) memchr(line, '\n', (buf+bytes_read)-line))) { | |
line = newline+1; | |
++num_lines; | |
} | |
const ptrdiff_t bytes_used = line-buf; | |
file.seekg(-(bytes_read-bytes_used), std::ios_base::cur); | |
} | |
hrc::time_point end_time = hrc::now(); | |
file.close(); | |
std::chrono::duration<float> duration = end_time-start_time; | |
std::cout << __func__ << "(ifstream): " << num_lines << " lines / " << | |
std::fixed << std::setprecision(3) << duration.count() << " s" << std::endl; | |
} | |
void test03(const char* filename) | |
{ | |
size_t num_lines = 0; | |
char buf[BUF_SIZE]; | |
int fd = open(filename, O_RDONLY); | |
ssize_t bytes_read; | |
hrc::time_point start_time = hrc::now(); | |
while ((bytes_read = read(fd, buf, BUF_SIZE))) { | |
char* line = buf; | |
char* newline; | |
while ((newline = (char*) memchr(line, '\n', (buf+bytes_read)-line))) { | |
line = newline+1; | |
++num_lines; | |
} | |
const ptrdiff_t bytes_used = line-buf; | |
lseek(fd, -(bytes_read-bytes_used), SEEK_CUR); | |
} | |
hrc::time_point end_time = hrc::now(); | |
close(fd); | |
std::chrono::duration<float> duration = end_time-start_time; | |
std::cout << __func__ << "(read+lseek): " << num_lines << " lines / " << | |
std::fixed << std::setprecision(3) << duration.count() << " s" << std::endl; | |
} | |
void test04(const char* filename) | |
{ | |
size_t num_lines = 0; | |
char buf[BUF_SIZE]; | |
int fd = open(filename, O_RDONLY); | |
ssize_t bytes_read; | |
off_t bytes_prev_read = 0; | |
hrc::time_point start_time = hrc::now(); | |
while ((bytes_read = read(fd, buf+bytes_prev_read, BUF_SIZE-bytes_prev_read))) { | |
bytes_read += bytes_prev_read; | |
char* line = buf; | |
char* newline; | |
while ((newline = (char*) memchr(line, '\n', (buf+bytes_read)-line))) { | |
line = newline+1; | |
++num_lines; | |
} | |
const ptrdiff_t bytes_used = line-buf; | |
bytes_prev_read = bytes_read-bytes_used; | |
memcpy(buf, line, bytes_prev_read); | |
} | |
hrc::time_point end_time = hrc::now(); | |
close(fd); | |
std::chrono::duration<float> duration = end_time-start_time; | |
std::cout << __func__ << "(read+memcpy): " << num_lines << " lines / " << | |
std::fixed << std::setprecision(3) << duration.count() << " s" << std::endl; | |
} | |
int main(int argc, char const *argv[]) | |
{ | |
if (argc != 2) { | |
std::cerr << "Usage: fast_readline filename" << std::endl; | |
return EXIT_FAILURE; | |
} | |
const char* filename = argv[1]; | |
test00(filename); | |
test01(filename); | |
test02(filename); | |
test03(filename); | |
test04(filename); | |
return EXIT_SUCCESS; | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment