Last active
June 30, 2021 05:04
-
-
Save ymmt2005/4512427 to your computer and use it in GitHub Desktop.
Fastest du for Linux.
This is in fact faster than du(1) as long as all dentries are cached :-p
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
/** | |
* Compilation: | |
* | |
* 1. Without e2fslib: | |
* $ gcc -O2 -o due2fs due2fs.c | |
* | |
* 2. With e2fslib | |
* $ sudo apt-get install e2fslibs-dev | |
* $ gcc -DUSE_E2FSLIB -O2 -o due2fs due2fs.c -lext2fs | |
* | |
* Run: | |
* $ sudo due2fs DIRECTORY | |
*/ | |
#define _GNU_SOURCE | |
#include <dirent.h> /* Defines DT_* constants */ | |
#include <fcntl.h> | |
#include <stdio.h> | |
#include <unistd.h> | |
#include <stdlib.h> | |
#include <stdint.h> | |
#include <string.h> | |
#include <sys/syscall.h> | |
#include <sys/time.h> | |
#include <sys/resource.h> | |
#ifdef USE_E2FSLIB | |
#include <ext2fs/ext2fs.h> | |
static ext2_filsys g_e2fs; | |
#endif | |
#define handle_error(msg) \ | |
do { perror(msg); exit(EXIT_FAILURE); } while (0) | |
/** | |
* http://lxr.free-electrons.com/source/include/linux/dirent.h?v=3.2 | |
*/ | |
struct linux_dirent64 { | |
uint64_t d_ino; | |
int64_t d_off; | |
unsigned short d_reclen; | |
unsigned char d_type; | |
char d_name[0]; | |
}; | |
const size_t DIRENT_ONESIZE = sizeof(struct linux_dirent64) + 64; | |
const size_t EXPECTED_MAXFILES = 1200; | |
#ifdef USE_E2FSLIB | |
static inline uint64_t getfilesize(uint64_t ino) { | |
uint64_t size; | |
ext2_file_t f; | |
if( ext2fs_file_open(g_e2fs, ino, 0, &f) != 0 ) { | |
// skip errors | |
return 0; | |
} | |
size = (uint64_t)ext2fs_file_get_size(f); | |
ext2fs_file_close(f); | |
return size; | |
} | |
void open_ext2fs(const char* path) { | |
int p[2]; | |
pid_t child; | |
char cmdline[4096]; | |
int len = 0; | |
int r = snprintf(cmdline, sizeof(cmdline), | |
"/bin/df %s | /usr/bin/tail -1 | /usr/bin/awk '{printf \"%%s\", $1}'", | |
path); | |
if( r == sizeof(cmdline) ) { | |
fprintf(stderr, "path too long\n", path); | |
exit(EXIT_FAILURE); | |
} | |
if( pipe(p) != 0 ) | |
handle_error("pipe"); | |
child = fork(); | |
if( child == -1 ) | |
handle_error("fork"); | |
if( child == 0 ) { | |
// child | |
close(p[0]); | |
if( dup2(p[1], 1) == -1 ) | |
handle_error("dup2"); | |
close(p[1]); | |
execl("/bin/sh", "/bin/sh", "-c", cmdline, (char*)NULL); | |
handle_error("execl"); | |
} | |
close(p[1]); | |
while( 1 ) { | |
ssize_t nread = read(p[0], cmdline+len, sizeof(cmdline) - len); | |
if( nread == -1 ) | |
handle_error("read"); | |
if( nread == 0 ) { | |
cmdline[len] = '\0'; | |
break; | |
} | |
len += nread; | |
if( len == sizeof(cmdline) ) { | |
fprintf(stderr, "too long device name?!\n"); | |
exit(EXIT_FAILURE); | |
} | |
} | |
close(p[0]); | |
fprintf(stderr, "Using device \"%s\"\n", cmdline); | |
if( ext2fs_open(cmdline, 0, 0, 0, unix_io_manager, &g_e2fs) != 0 ) { | |
fprintf(stderr, "ext2fs_open failed.\n"); | |
exit(EXIT_FAILURE); | |
} | |
} | |
#else // USE_E2FSLIB | |
static inline uint64_t getfilesize(int dir_fd, const char* path) { | |
struct stat st; | |
if( fstatat(dir_fd, path, &st, 0) != 0 ) { | |
// skip errors | |
return 0; | |
} | |
return (uint64_t)st.st_size; | |
} | |
#endif | |
static uint64_t diskusage(int dir_fd, char* buf, size_t bufsize) { | |
uint64_t total = 0; | |
ssize_t nread; | |
int bpos; | |
int dir_fds[EXPECTED_MAXFILES]; | |
int n_dir_fds = 0; | |
int fd; | |
int i; | |
struct linux_dirent64 *d; | |
while( 1 ) { | |
nread = syscall(SYS_getdents64, dir_fd, buf, bufsize); | |
if( nread == -1 ) | |
handle_error("getdents"); | |
if( nread == 0 ) | |
break; | |
for( bpos = 0; bpos < nread; ) { | |
d = (struct linux_dirent64 *) (buf + bpos); | |
if( strcmp(d->d_name, ".") == 0 || | |
strcmp(d->d_name, "..") == 0 ) { | |
bpos += d->d_reclen; | |
continue; | |
} | |
if( d->d_type == DT_DIR ) { | |
if( n_dir_fds == EXPECTED_MAXFILES ) { | |
fprintf(stderr, "info: cache is full. flushing...\n"); | |
char* new_buf = (char*)malloc(bufsize); | |
if( ! new_buf ) { | |
free(buf); | |
fprintf(stderr, "malloc failed.\n"); | |
exit(EXIT_FAILURE); | |
} | |
for( i = 0; i < n_dir_fds; ++i ) { | |
total += diskusage(dir_fds[i], new_buf, bufsize); | |
close(dir_fds[i]); | |
} | |
free(new_buf); | |
n_dir_fds = 0; | |
} | |
fd = openat(dir_fd, d->d_name, O_RDONLY|O_DIRECTORY); | |
if( fd == -1 ) | |
handle_error("openat"); | |
dir_fds[n_dir_fds++] = fd; | |
} | |
else if( d->d_type == DT_REG ) { | |
#ifdef USE_E2FSLIB | |
total += getfilesize( d->d_ino ); | |
#else | |
total += getfilesize( dir_fd, d->d_name ); | |
#endif | |
} | |
// ignore other types | |
bpos += d->d_reclen; | |
} | |
} | |
for( i = 0; i < n_dir_fds; ++i ) { | |
total += diskusage(dir_fds[i], buf, bufsize); | |
close(dir_fds[i]); | |
} | |
return total; | |
} | |
int main(int argc, char *argv[]) { | |
int fd; | |
struct rlimit rlim; | |
const size_t bufsize = DIRENT_ONESIZE * EXPECTED_MAXFILES; | |
char* buf = (char*)malloc(bufsize); | |
if( ! buf ) { | |
fprintf(stderr, "malloc failed.\n"); | |
exit(EXIT_FAILURE); | |
} | |
if( argc == 1 ) { | |
printf("Usage: due2fs DIRECTORY\n"); | |
return EXIT_SUCCESS; | |
} | |
rlim.rlim_cur = EXPECTED_MAXFILES * 2; | |
rlim.rlim_max = EXPECTED_MAXFILES * 2; | |
if( setrlimit(RLIMIT_NOFILE, &rlim) != 0 ) | |
handle_error("setrlimit(RLIMIT_NOFILE)"); | |
fd = open(argv[1], O_RDONLY|O_DIRECTORY); | |
if( fd == -1 ) | |
handle_error("open"); | |
#ifdef USE_E2FSLIB | |
open_ext2fs(argv[1]); | |
#endif | |
printf("%llu\n", (unsigned long long)diskusage(fd, buf, bufsize)); | |
close(fd); | |
free(buf); | |
return EXIT_SUCCESS; | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment