Skip to content

Instantly share code, notes, and snippets.

@tamasd
Created February 23, 2012 21:43
Show Gist options
  • Save tamasd/1895228 to your computer and use it in GitHub Desktop.
Save tamasd/1895228 to your computer and use it in GitHub Desktop.
A quick test to see if mmap() or fopen() + fgetln() is faster for linear line-by-line file processing.
#include <stdio.h>
#include <stdlib.h>
#include <time.h>
#include <errno.h>
#include <string.h>
#include <sys/mman.h>
#include <sys/types.h>
#include <sys/stat.h>
#include <sys/fcntl.h>
#include <unistd.h>
double avg_strlen_mmap(char*);
double avg_strlen_normal(char*);
int main (int argc, char **argv) {
time_t starttime, endtime;
double avg;
starttime = time(NULL);
avg = avg_strlen_normal(argv[1]);
endtime = time(NULL);
printf("Normal file operation speed: %ld secs, average: %lf.\n",
endtime - starttime, avg);
starttime = time(NULL);
avg = avg_strlen_mmap(argv[1]);
endtime = time(NULL);
printf("Memory-mapped file operation speed: %ld secs, average: %lf.\n",
endtime - starttime, avg);
return EXIT_SUCCESS;
}
double avg_strlen_mmap(char *filename) {
double num = 0;
double len = 0;
int fd = open(filename, O_RDONLY);
if (fd == -1) {
printf("Can't open file.\n");
exit(1);
}
// figure out file size
struct stat statbuf;
int result = fstat(fd, &statbuf);
if (result == -1) {
printf("fstat failed\n");
exit(1);
}
size_t length = statbuf.st_size;
// use mmap
caddr_t base = mmap(NULL, length, PROT_READ, MAP_SHARED, fd, 0);
if (base == (caddr_t) -1) {
printf("Could not mmap\n");
exit(1);
}
size_t bytes_read = 0;
char *str;
size_t current_line = 0;
for (str = (char *)base; bytes_read < length; bytes_read++, str++) {
if (str[0] == '\n') {
num += current_line;
len += 1.0;
current_line = 0;
} else {
current_line++;
}
}
num += current_line;
//len += 1.0;
munmap(base, length);
close(fd);
return num / len;
}
double avg_strlen_normal(char *filename) {
double num = 0;
double len = 0;
FILE *f = fopen(filename, "r");
char *line = NULL;
char *buf = NULL;
size_t length;
do {
line = fgetln(f, &length);
if (!line) {
clearerr(f);
break;
}
if (line[length - 1] == '\n') {
line[length - 1] = '\0';
} else {
if ((buf = (char *)malloc((length + 1) * sizeof(char))) == NULL) {
printf("Malloc failed.\n");
exit(1);
}
memcpy(buf, line, length);
buf[length] = '\0';
line = buf;
}
num += strlen(line);
len += 1.0;
if (buf != NULL) {
free(buf);
buf = NULL;
}
} while (line != NULL);
fclose(f);
return num / len;
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment