Created
February 23, 2012 21:43
-
-
Save tamasd/1895228 to your computer and use it in GitHub Desktop.
A quick test to see if mmap() or fopen() + fgetln() is faster for linear line-by-line file processing.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#include <stdio.h> | |
#include <stdlib.h> | |
#include <time.h> | |
#include <errno.h> | |
#include <string.h> | |
#include <sys/mman.h> | |
#include <sys/types.h> | |
#include <sys/stat.h> | |
#include <sys/fcntl.h> | |
#include <unistd.h> | |
double avg_strlen_mmap(char*); | |
double avg_strlen_normal(char*); | |
int main (int argc, char **argv) { | |
time_t starttime, endtime; | |
double avg; | |
starttime = time(NULL); | |
avg = avg_strlen_normal(argv[1]); | |
endtime = time(NULL); | |
printf("Normal file operation speed: %ld secs, average: %lf.\n", | |
endtime - starttime, avg); | |
starttime = time(NULL); | |
avg = avg_strlen_mmap(argv[1]); | |
endtime = time(NULL); | |
printf("Memory-mapped file operation speed: %ld secs, average: %lf.\n", | |
endtime - starttime, avg); | |
return EXIT_SUCCESS; | |
} | |
double avg_strlen_mmap(char *filename) { | |
double num = 0; | |
double len = 0; | |
int fd = open(filename, O_RDONLY); | |
if (fd == -1) { | |
printf("Can't open file.\n"); | |
exit(1); | |
} | |
// figure out file size | |
struct stat statbuf; | |
int result = fstat(fd, &statbuf); | |
if (result == -1) { | |
printf("fstat failed\n"); | |
exit(1); | |
} | |
size_t length = statbuf.st_size; | |
// use mmap | |
caddr_t base = mmap(NULL, length, PROT_READ, MAP_SHARED, fd, 0); | |
if (base == (caddr_t) -1) { | |
printf("Could not mmap\n"); | |
exit(1); | |
} | |
size_t bytes_read = 0; | |
char *str; | |
size_t current_line = 0; | |
for (str = (char *)base; bytes_read < length; bytes_read++, str++) { | |
if (str[0] == '\n') { | |
num += current_line; | |
len += 1.0; | |
current_line = 0; | |
} else { | |
current_line++; | |
} | |
} | |
num += current_line; | |
//len += 1.0; | |
munmap(base, length); | |
close(fd); | |
return num / len; | |
} | |
double avg_strlen_normal(char *filename) { | |
double num = 0; | |
double len = 0; | |
FILE *f = fopen(filename, "r"); | |
char *line = NULL; | |
char *buf = NULL; | |
size_t length; | |
do { | |
line = fgetln(f, &length); | |
if (!line) { | |
clearerr(f); | |
break; | |
} | |
if (line[length - 1] == '\n') { | |
line[length - 1] = '\0'; | |
} else { | |
if ((buf = (char *)malloc((length + 1) * sizeof(char))) == NULL) { | |
printf("Malloc failed.\n"); | |
exit(1); | |
} | |
memcpy(buf, line, length); | |
buf[length] = '\0'; | |
line = buf; | |
} | |
num += strlen(line); | |
len += 1.0; | |
if (buf != NULL) { | |
free(buf); | |
buf = NULL; | |
} | |
} while (line != NULL); | |
fclose(f); | |
return num / len; | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment