Skip to content

Instantly share code, notes, and snippets.

@kempniu
Last active March 16, 2023 23:41
Show Gist options
  • Save kempniu/30c7fa2c1825cde80040 to your computer and use it in GitHub Desktop.
Save kempniu/30c7fa2c1825cde80040 to your computer and use it in GitHub Desktop.
sparsify - copy non-zero blocks of data from source to destination
/*
***********************************************************************
*
* DO WHAT THE FUCK YOU WANT TO PUBLIC LICENSE
* Version 2, December 2004
*
* Copyright (C) 2004 Sam Hocevar <[email protected]>
*
* Everyone is permitted to copy and distribute verbatim or modified
* copies of this license document, and changing it is allowed as long
* as the name is changed.
*
* DO WHAT THE FUCK YOU WANT TO PUBLIC LICENSE
* TERMS AND CONDITIONS FOR COPYING, DISTRIBUTION AND MODIFICATION
*
* 0. You just DO WHAT THE FUCK YOU WANT TO.
*
***********************************************************************
*
* DISCLAIMER: USE AT YOUR OWN RISK. This program may bring
* ancient curses on you, fry your computer, kill kittens and/or
* cause space-time discontinuities. It worked for me. YMMV.
*
***********************************************************************
*/
#include <assert.h>
#include <errno.h>
#include <fcntl.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <unistd.h>
#include <sys/mman.h>
#define MMAP_SIZE 64 * 1024 * 1024 // 64 MiB
#define BLOCK_SIZE 4096
#define DIE(msg) { perror(msg); exit(1); }
int main(int argc, char *argv[])
{
if (argc != 3) {
fprintf(stderr, "Usage: %s /path/to/source /path/to/destination\n", argv[0]);
exit(1);
}
int srcfd, dstfd;
unsigned char zeros[BLOCK_SIZE], src[BLOCK_SIZE], *dst;
ssize_t rb;
size_t offset = 0, mapoffset = 0;
if ((srcfd = open(argv[1], O_RDONLY)) == -1)
DIE("Failed to open source");
if ((dstfd = open(argv[2], O_RDWR)) == -1)
DIE("Failed to open destination");
if ((dst = mmap(NULL, MMAP_SIZE, PROT_READ | PROT_WRITE, MAP_SHARED, dstfd, 0)) == (void *) -1)
DIE("Failed to mmap() destination");
memset(zeros, 0, BLOCK_SIZE);
while ((rb = read(srcfd, src, BLOCK_SIZE)) == BLOCK_SIZE) {
if (memcmp(src, zeros, BLOCK_SIZE) != 0)
memcpy(dst + mapoffset, src, BLOCK_SIZE);
offset += rb;
mapoffset += rb;
if (mapoffset == MMAP_SIZE) {
printf("%16lu bytes processed\n", offset);
assert(munmap(dst, MMAP_SIZE) == 0);
dst = mmap(NULL, MMAP_SIZE, PROT_READ | PROT_WRITE, MAP_SHARED, dstfd, offset);
assert(dst != (void *) -1);
mapoffset = 0;
}
}
assert(munmap(dst, MMAP_SIZE) == 0);
assert(close(srcfd) == 0);
assert(close(dstfd) == 0);
if (rb == -1)
DIE("Error reading source");
return 0;
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment