Created
September 29, 2023 10:42
-
-
Save saagarjha/1a82fddb67f5870525e104cb2f5454e9 to your computer and use it in GitHub Desktop.
Test whether mmap or read is faster on your computer
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
// As seen on: | |
// https://federated.saagarjha.com/notice/AaEMQpJBSbxhLyxYzg | |
// https://twitter.com/_saagarjha/status/1707423903969341949 | |
// Compiling: gcc mmap_vs_read.c -O3 -o mmap_vs_read | |
// Usage: ./mmap_vs_read <bigfile> <mmap|read> | |
#include <fcntl.h> | |
#include <stddef.h> | |
#include <stdio.h> | |
#include <stdlib.h> | |
#include <string.h> | |
#include <sys/fcntl.h> | |
#include <sys/mman.h> | |
#include <unistd.h> | |
unsigned char hash_buffer(unsigned char *buffer, size_t size) { | |
unsigned char hash = 0; | |
while (size--) { | |
hash ^= *buffer++; | |
} | |
return hash; | |
} | |
unsigned char test_read(int fd) { | |
unsigned char hash = 0; | |
size_t buffer_size = sysconf(_SC_PAGESIZE) * 16; | |
unsigned char *buffer = malloc(buffer_size); | |
ssize_t size = 0; | |
while (size = read(fd, buffer, buffer_size), size > 0) { | |
hash ^= hash_buffer(buffer, size); | |
} | |
return hash; | |
} | |
unsigned char test_mmap(int fd) { | |
off_t size = lseek(fd, 0, SEEK_END); | |
unsigned char *buffer = mmap(NULL, size, PROT_READ, MAP_PRIVATE, fd, 0); | |
return hash_buffer(buffer, size); | |
} | |
int main(int argc, char **argv) { | |
int fd = open(*++argv, O_RDONLY); | |
char *choice = *++argv; | |
if (!choice) { | |
return EXIT_FAILURE; | |
} else if (!strcmp(choice, "read")) { | |
return test_read(fd); | |
} else if (!strcmp(choice, "mmap")) { | |
return test_mmap(fd); | |
} else { | |
return EXIT_FAILURE; | |
} | |
} |
Author
saagarjha
commented
Sep 29, 2023
With some slight changes, on a wimpy FreeBSD machine:
% repeat 3 time ./tperf ~/freenas-boot.zfs.gz read
4.195u 1.291s 0:05.48 100.0%5+172k 0+1io 6pf+0w
4.098u 1.387s 0:05.48 99.8%5+173k 0+1io 0pf+0w
4.054u 1.432s 0:05.48 100.0%5+172k 0+1io 0pf+0w
% repeat 3 time ./tperf ~/freenas-boot.zfs.gz mmap
4.628u 2.022s 0:06.66 99.6%5+173k 0+1io 18457pf+0w
4.450u 2.174s 0:06.62 100.0%5+172k 0+1io 18465pf+0w
4.457u 2.118s 0:06.58 99.6%5+173k 0+1io 18454pf+0w
However, in a beefy Mac Pro, with those same changes:
% repeat 3 time ./tperf freenas-boot.zfs.gz mmap
3.090u 1.235s 0:04.52 95.5%0+0k 0+0io 0pf+0w
3.087u 1.187s 0:04.28 99.5%0+0k 0+0io 0pf+0w
3.083u 1.234s 0:04.32 99.7%0+0k 0+0io 0pf+0w
% repeat 3 time ./tperf freenas-boot.zfs.gz read
1.898u 0.817s 0:02.71 99.6%0+0k 0+0io 0pf+0w
1.900u 0.817s 0:02.71 100.0%0+0k 0+0io 0pf+0w
1.901u 0.826s 0:02.72 100.0%0+0k 0+0io 0pf+0w
The changes were to use madvise
and break it up into smaller chunks:
unsigned char test_mmap(int fd) {
unsigned char hash = 0;
off_t size = lseek(fd, 0, SEEK_END);
size_t offset = 0;
size_t buffer_size = sysconf(_SC_PAGESIZE) * 16;
unsigned char *file = mmap(NULL, size, PROT_READ, MAP_PRIVATE, fd, 0);
posix_madvise(file, size, POSIX_MADV_SEQUENTIAL);
while (offset < size) {
size_t chunk_size = MIN(size - offset, buffer_size);
unsigned char *buffer = file + offset;
hash ^= hash_buffer(buffer, chunk_size);
offset += chunk_size;
}
return hash;
}
My big FreeBSD system isn't booting, and I'm 5000 miles away, so I'll have to spend effort into figuring why.
And on the big FreeBSD system:
% repeat 5 time ./tperf ./freenas-boot.zfs.gz read
4.899u 1.149s 0:06.04 99.8% 5+168k 0+0io 0pf+0w
4.913u 1.275s 0:06.18 100.0% 5+167k 0+0io 0pf+0w
4.721u 1.416s 0:06.13 100.0% 5+167k 0+0io 0pf+0w
4.940u 1.189s 0:06.13 99.8% 5+167k 0+0io 0pf+0w
4.950u 1.188s 0:06.13 100.0% 5+167k 0+0io 0pf+0w
% repeat 5 time ./tperf ./freenas-boot.zfs.gz mmap
5.526u 1.637s 0:07.16 99.8% 5+168k 0+0io 0pf+0w
5.647u 1.535s 0:07.18 99.8% 5+167k 0+0io 0pf+0w
5.576u 1.575s 0:07.15 99.8% 5+167k 0+0io 0pf+0w
5.593u 1.551s 0:07.14 100.0% 5+167k 0+0io 0pf+0w
5.397u 1.762s 0:07.15 100.0% 5+168k 0+0io 0pf+0w
The big Mac and FreeBSD systems each have hundreds of gigabytes of RAM, and all three systems are SSD-only. (However: on the big FreeSBD system, I used /tmp
which is tmpfs
; on the smaller one, which doesn't have as much RAM, the test file was on a ZFS dataset.)
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment