Skip to content

Instantly share code, notes, and snippets.

@ccat3z
Last active November 14, 2024 10:24
Show Gist options
  • Save ccat3z/c0d81b0ddeb7276eeff16844528a6876 to your computer and use it in GitHub Desktop.
Save ccat3z/c0d81b0ddeb7276eeff16844528a6876 to your computer and use it in GitHub Desktop.
mmap read large file
#include <fcntl.h>
#include <unistd.h>
#include <sys/mman.h>
#include <sys/stat.h>
#include <cerrno>
#include <cstring>
#include <iostream>
#include <fstream>
void dump_mem() {
std::ifstream statm("/proc/self/statm");
if (!statm.is_open()) {
std::cerr << "Failed to open /proc/self/statm" << std::endl;
return;
}
struct {
int64_t size;
int64_t resident;
int64_t shared;
int64_t text;
int64_t lib;
int64_t data;
int64_t dt;
} stat;
statm >> stat.size >> stat.resident >> stat.shared >> stat.text >> stat.lib >> stat.data >> stat.dt;
statm.close();
std::cerr << "VmRss: " << stat.resident << "KB RssFile+RssShmem: " << stat.shared << "KB" << std::endl;
}
int main() {
const auto page_size = sysconf(_SC_PAGESIZE);
int fd = open("./file", O_RDONLY);
if (fd < 0) {
std::cerr << "open failed: " << std::strerror(errno) << std::endl;
return -1;
}
struct stat st;
int res = fstat(fd, &st);
if (res != 0) {
std::cerr << "fstat failed: " << std::strerror(errno) << std::endl;
return -1;
}
// fadv sequential
// res = posix_fadvise(fd, 0, st.st_size, POSIX_FADV_SEQUENTIAL);
// if (res != 0) {
// std::cerr << "fadvise failed: " << std::strerror(errno) << std::endl;
// return -1;
// }
char* data = static_cast<char *>(mmap(nullptr, st.st_size, PROT_READ, MAP_PRIVATE | MAP_POPULATE, fd, 0));
if (!data) {
std::cerr << "mmap failed: " << std::strerror(errno) << std::endl;
return -1;
}
// madv sequential
// res = posix_madvise(data, (st.st_size - st.st_size % page_size), POSIX_FADV_SEQUENTIAL);
// if (res != 0) {
// std::cerr << "madvise failed: " << std::strerror(errno) << std::endl;
// return -1;
// }
size_t buf_sz = 1024*1024*10;
char *buf = static_cast<char *>(malloc(buf_sz));
size_t pos_retain = 0;
for (size_t pos = 0; pos < st.st_size;) {
auto sz = std::min(buf_sz, st.st_size - pos);
memcpy(buf, data + pos, sz);
auto purge_sz = pos - pos_retain + sz;
purge_sz -= (purge_sz % page_size);
// fadv dontneed
// res = posix_fadvise(fd, pos_retain, purge_sz, POSIX_FADV_DONTNEED);
// if (res != 0) {
// std::cerr << "fadvise failed: " << std::strerror(errno) << std::endl;
// return -1;
// }
// madv dontneed
// res = madvise(data + pos_retain, purge_sz, MADV_DONTNEED);
// if (res != 0) {
// std::cerr << "madvise failed: " << std::strerror(errno) << std::endl;
// return -1;
// }
// munmap
res = munmap(data + pos_retain, purge_sz);
if (res != 0) {
std::cerr << "munmap failed: " << std::strerror(errno) << std::endl;
return -1;
}
std::cerr << "read " << pos << "+" << sz
<< " purge: " << pos_retain << "+" << purge_sz << " ";
pos += sz;
pos_retain += purge_sz;
dump_mem();
}
free(buf);
return 0;
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment