Created
May 8, 2026 17:06
-
-
Save s1037989/5247ec96abb04bcd1f17ca2d2b54645c to your computer and use it in GitHub Desktop.
Efficiently read large binary chunks from stdin and print only the bytes in the ranges specified by the -b argument, similar to how cut works
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| // bytecut.c | |
| // cc -O2 -Wall -Wextra -o bytecut bytecut.c | |
| // $ head -c1024 /dev/urandom | bytecut -b 1-4,9- | wc -c | |
| // 1020 | |
| #include <stdio.h> | |
| #include <stdlib.h> | |
| #include <string.h> | |
| #include <errno.h> | |
| typedef struct { | |
| unsigned long long start; // 1-based inclusive | |
| unsigned long long end; // 1-based inclusive, 0 means EOF | |
| } Range; | |
| static int cmp_range(const void *a, const void *b) { | |
| const Range *ra = a, *rb = b; | |
| return (ra->start > rb->start) - (ra->start < rb->start); | |
| } | |
| static unsigned long long parse_ull(const char *s, char **endp) { | |
| errno = 0; | |
| unsigned long long v = strtoull(s, endp, 10); | |
| if (errno || v == 0) { | |
| fprintf(stderr, "invalid byte position: %s\n", s); | |
| exit(2); | |
| } | |
| return v; | |
| } | |
| static Range *parse_ranges(char *spec, size_t *count) { | |
| size_t cap = 16; | |
| Range *ranges = malloc(cap * sizeof *ranges); | |
| if (!ranges) { | |
| perror("malloc"); | |
| exit(1); | |
| } | |
| for (char *tok = strtok(spec, ","); tok; tok = strtok(NULL, ",")) { | |
| char *p = tok; | |
| char *endp; | |
| unsigned long long start = parse_ull(p, &endp); | |
| unsigned long long end = start; | |
| if (*endp == '-') { | |
| p = endp + 1; | |
| if (*p == '\0') { | |
| end = 0; // open-ended | |
| } else { | |
| end = parse_ull(p, &endp); | |
| if (*endp != '\0' || end < start) { | |
| fprintf(stderr, "invalid range: %s\n", tok); | |
| exit(2); | |
| } | |
| } | |
| } else if (*endp != '\0') { | |
| fprintf(stderr, "invalid range: %s\n", tok); | |
| exit(2); | |
| } | |
| if (*count == cap) { | |
| cap *= 2; | |
| ranges = realloc(ranges, cap * sizeof *ranges); | |
| if (!ranges) { | |
| perror("realloc"); | |
| exit(1); | |
| } | |
| } | |
| ranges[(*count)++] = (Range){ start, end }; | |
| } | |
| qsort(ranges, *count, sizeof *ranges, cmp_range); | |
| return ranges; | |
| } | |
| int main(int argc, char **argv) { | |
| char *spec = NULL; | |
| for (int i = 1; i < argc; i++) { | |
| if (strcmp(argv[i], "-b") == 0 && i + 1 < argc) { | |
| spec = argv[++i]; | |
| } else { | |
| fprintf(stderr, "usage: %s -b ranges < input > output\n", argv[0]); | |
| return 2; | |
| } | |
| } | |
| if (!spec) { | |
| fprintf(stderr, "usage: %s -b ranges < input > output\n", argv[0]); | |
| return 2; | |
| } | |
| spec = strdup(spec); | |
| if (!spec) { | |
| perror("strdup"); | |
| return 1; | |
| } | |
| size_t nranges = 0; | |
| Range *ranges = parse_ranges(spec, &nranges); | |
| enum { CHUNK = 1024 * 1024 }; | |
| unsigned char *buf = malloc(CHUNK); | |
| if (!buf) { | |
| perror("malloc"); | |
| return 1; | |
| } | |
| unsigned long long offset = 1; // current chunk start, 1-based | |
| size_t ri = 0; | |
| for (;;) { | |
| size_t n = fread(buf, 1, CHUNK, stdin); | |
| if (n == 0) { | |
| if (ferror(stdin)) { | |
| perror("fread"); | |
| return 1; | |
| } | |
| break; | |
| } | |
| unsigned long long chunk_start = offset; | |
| unsigned long long chunk_end = offset + n - 1; | |
| while (ri < nranges) { | |
| unsigned long long rs = ranges[ri].start; | |
| unsigned long long re = ranges[ri].end ? ranges[ri].end : chunk_end; | |
| if (rs > chunk_end) | |
| break; | |
| unsigned long long s = rs > chunk_start ? rs : chunk_start; | |
| unsigned long long e = re < chunk_end ? re : chunk_end; | |
| if (s <= e) { | |
| size_t pos = (size_t)(s - chunk_start); | |
| size_t len = (size_t)(e - s + 1); | |
| if (fwrite(buf + pos, 1, len, stdout) != len) { | |
| perror("fwrite"); | |
| return 1; | |
| } | |
| } | |
| if (ranges[ri].end && ranges[ri].end <= chunk_end) | |
| ri++; | |
| else | |
| break; | |
| } | |
| offset += n; | |
| } | |
| if (ferror(stdout)) { | |
| perror("stdout"); | |
| return 1; | |
| } | |
| free(buf); | |
| free(ranges); | |
| free(spec); | |
| return 0; | |
| } |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment