Skip to content

Instantly share code, notes, and snippets.

@s1037989
Created May 8, 2026 17:06
Show Gist options
  • Select an option

  • Save s1037989/5247ec96abb04bcd1f17ca2d2b54645c to your computer and use it in GitHub Desktop.

Select an option

Save s1037989/5247ec96abb04bcd1f17ca2d2b54645c to your computer and use it in GitHub Desktop.
Efficiently read large binary chunks from stdin and print only the bytes in the ranges specified by the -b argument, similar to how cut works
// bytecut.c
// cc -O2 -Wall -Wextra -o bytecut bytecut.c
// $ head -c1024 /dev/urandom | bytecut -b 1-4,9- | wc -c
// 1020
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <errno.h>
typedef struct {
unsigned long long start; // 1-based inclusive
unsigned long long end; // 1-based inclusive, 0 means EOF
} Range;
static int cmp_range(const void *a, const void *b) {
const Range *ra = a, *rb = b;
return (ra->start > rb->start) - (ra->start < rb->start);
}
static unsigned long long parse_ull(const char *s, char **endp) {
errno = 0;
unsigned long long v = strtoull(s, endp, 10);
if (errno || v == 0) {
fprintf(stderr, "invalid byte position: %s\n", s);
exit(2);
}
return v;
}
static Range *parse_ranges(char *spec, size_t *count) {
size_t cap = 16;
Range *ranges = malloc(cap * sizeof *ranges);
if (!ranges) {
perror("malloc");
exit(1);
}
for (char *tok = strtok(spec, ","); tok; tok = strtok(NULL, ",")) {
char *p = tok;
char *endp;
unsigned long long start = parse_ull(p, &endp);
unsigned long long end = start;
if (*endp == '-') {
p = endp + 1;
if (*p == '\0') {
end = 0; // open-ended
} else {
end = parse_ull(p, &endp);
if (*endp != '\0' || end < start) {
fprintf(stderr, "invalid range: %s\n", tok);
exit(2);
}
}
} else if (*endp != '\0') {
fprintf(stderr, "invalid range: %s\n", tok);
exit(2);
}
if (*count == cap) {
cap *= 2;
ranges = realloc(ranges, cap * sizeof *ranges);
if (!ranges) {
perror("realloc");
exit(1);
}
}
ranges[(*count)++] = (Range){ start, end };
}
qsort(ranges, *count, sizeof *ranges, cmp_range);
return ranges;
}
int main(int argc, char **argv) {
char *spec = NULL;
for (int i = 1; i < argc; i++) {
if (strcmp(argv[i], "-b") == 0 && i + 1 < argc) {
spec = argv[++i];
} else {
fprintf(stderr, "usage: %s -b ranges < input > output\n", argv[0]);
return 2;
}
}
if (!spec) {
fprintf(stderr, "usage: %s -b ranges < input > output\n", argv[0]);
return 2;
}
spec = strdup(spec);
if (!spec) {
perror("strdup");
return 1;
}
size_t nranges = 0;
Range *ranges = parse_ranges(spec, &nranges);
enum { CHUNK = 1024 * 1024 };
unsigned char *buf = malloc(CHUNK);
if (!buf) {
perror("malloc");
return 1;
}
unsigned long long offset = 1; // current chunk start, 1-based
size_t ri = 0;
for (;;) {
size_t n = fread(buf, 1, CHUNK, stdin);
if (n == 0) {
if (ferror(stdin)) {
perror("fread");
return 1;
}
break;
}
unsigned long long chunk_start = offset;
unsigned long long chunk_end = offset + n - 1;
while (ri < nranges) {
unsigned long long rs = ranges[ri].start;
unsigned long long re = ranges[ri].end ? ranges[ri].end : chunk_end;
if (rs > chunk_end)
break;
unsigned long long s = rs > chunk_start ? rs : chunk_start;
unsigned long long e = re < chunk_end ? re : chunk_end;
if (s <= e) {
size_t pos = (size_t)(s - chunk_start);
size_t len = (size_t)(e - s + 1);
if (fwrite(buf + pos, 1, len, stdout) != len) {
perror("fwrite");
return 1;
}
}
if (ranges[ri].end && ranges[ri].end <= chunk_end)
ri++;
else
break;
}
offset += n;
}
if (ferror(stdout)) {
perror("stdout");
return 1;
}
free(buf);
free(ranges);
free(spec);
return 0;
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment