-
-
Save cpq/4714740 to your computer and use it in GitHub Desktop.
// Copyright (c) Sergey Lyubka, 2013. | |
// All rights reserved. | |
// Released under the MIT license. | |
// This program is used to embed arbitrary data into a C binary. It takes | |
// a list of files as an input, and produces a .c data file that contains | |
// contents of all these files as collection of char arrays. | |
// Usage: | |
// 1. Compile this file: | |
// cc -o embed embed.c | |
// | |
// 2. Convert list of files into single .c: | |
// ./embed file1.data file2.data > embedded_data.c | |
// | |
// 3. In your application code, you can access files using this function: | |
// | |
// const char *find_embedded_file(const char *file_name, size_t *size); | |
// size_t size; | |
// const char *data = find_embedded_file("file1.data", &size); | |
// | |
// 4. Build your app with embedded_data.c: | |
// cc -o my_app my_app.c embedded_data.c | |
#include <stdio.h> | |
#include <stdlib.h> | |
static const char *code = | |
"const char *find_embedded_file(const char *name, size_t *size) {\n" | |
" const struct embedded_file *p;\n" | |
" for (p = embedded_files; p->name != NULL; p++) {\n" | |
" if (!strcmp(p->name, name)) {\n" | |
" if (size != NULL) { *size = p->size; }\n" | |
" return (const char *) p->data;\n" | |
" }\n" | |
" }\n" | |
" return NULL;\n" | |
"}\n"; | |
int main(int argc, char *argv[]) { | |
FILE *fp; | |
int i, j, ch; | |
for (i = 1; i < argc; i++) { | |
if ((fp = fopen(argv[i], "rb")) == NULL) { | |
exit(EXIT_FAILURE); | |
} else { | |
printf("static const unsigned char v%d[] = {", i); | |
for (j = 0; (ch = fgetc(fp)) != EOF; j++) { | |
if ((j % 12) == 0) { | |
printf("%s", "\n "); | |
} | |
printf(" %#04x,", ch); | |
} | |
// Append zero byte at the end, to make text files appear in memory | |
// as nul-terminated strings. | |
printf("%s", " 0x00\n};\n"); | |
fclose(fp); | |
} | |
} | |
printf("%s", "\nconst struct embedded_file {\n"); | |
printf("%s", " const char *name;\n"); | |
printf("%s", " const unsigned char *data;\n"); | |
printf("%s", " size_t size;\n"); | |
printf("%s", "} embedded_files[] = {\n"); | |
for (i = 1; i < argc; i++) { | |
printf(" {\"%s\", v%d, sizeof(v%d) - 1},\n", argv[i], i, i); | |
} | |
printf("%s", " {NULL, NULL, 0}\n"); | |
printf("%s", "};\n\n"); | |
printf("%s", code); | |
return EXIT_SUCCESS; | |
} |
# Copyright (c) Sergey Lyubka, 2013. | |
# All rights reserved. | |
# Released under the MIT license. | |
# | |
# Same functionality as C code above, but in Perl. | |
# | |
# This program is used to embed arbitrary data into a C binary. It takes | |
# a list of files as an input, and produces a .c data file that contains | |
# contents of all these files as collection of char arrays. | |
# | |
# Usage: perl <this_file> <file1> [file2, ...] > embedded_data.c | |
foreach my $i (0 .. $#ARGV) { | |
open FD, '<:raw', $ARGV[$i] or die "Cannot open $ARGV[$i]: $!\n"; | |
printf("static const unsigned char v%d[] = {", $i); | |
my $byte; | |
my $j = 0; | |
while (read(FD, $byte, 1)) { | |
if (($j % 12) == 0) { | |
print "\n"; | |
} | |
printf ' %#04x,', ord($byte); | |
$j++; | |
} | |
print " 0x00\n};\n"; | |
close FD; | |
} | |
print <<EOS; | |
#include <stddef.h> | |
#include <string.h> | |
static const struct embedded_file { | |
const char *name; | |
const unsigned char *data; | |
size_t size; | |
} embedded_files[] = { | |
EOS | |
foreach my $i (0 .. $#ARGV) { | |
print " {\"$ARGV[$i]\", v$i, sizeof(v$i) - 1},\n"; | |
} | |
print <<EOS; | |
{NULL, NULL, 0} | |
}; | |
const char *find_embedded_file(const char *name, size_t *size) { | |
const struct embedded_file *p; | |
for (p = embedded_files; p->name != NULL; p++) { | |
if (!strcmp(p->name, name)) { | |
if (size != NULL) { *size = p->size; } | |
return (const char *) p->data; | |
} | |
} | |
return NULL; | |
} | |
EOS |
Actually - scratch that. I wrote my own embedder Python which doesn't trigger compiler warnings about over-long strings. :-)
Thanks for the inspiration!
Oh hey Bjarni!
Haven't heard from you for ages. Cool that you find it useful, I've added the "MIT" license snippet just in case.
So I may seem like a noob...but let's say I'm using this to pack a zip into the binary, how would I recreate the zip file using the const char *data = find_embedded_file("test.zip", &size); function?
I'm basically needing to pack a lot of files as "resources" into a single C binary, then unpack them into files at runtime...I've done this successfully in C#, however I'm not sure how to convert the hex values created in the C file generated from the embed program back into tangible files.
@anonneo If I'm not too late, all you need to do is loop over the file string in your C program and put it in to a FILE pointer.
FILE *fp = fopen("some_file.ext");
for(int i = 0; i < size; i++) {
fputc(some_file[i], fp);
}
Sorry about the formatting, markdown code styling is new to me.
@cpq sorry to bother you but I was just wondering how much bigger the data.c output file would be compared to the original binary?
I'm running an ~480 MB zipped archive through and getting a c file that is greater than 2.6 GB. Is this a problem, or is that actually what is supposed to happen?
@pete-the-dev that's about right. Each byte gets transformed into , 0xYY
string which is 6 characters, so expect a ~6x size increase.
That's a .c file though, remember that when it gets compiled, file data will occupy the original space in the binary, not 6x.
Thanks!
Hi @cpq - this came up when I was googling for a solution to exactly this problem, and I really like how you've done this. It's very clean and elegant!
However I notice you've marked the sources as "all rights reserved", and no other license details. May I have your permission to include this code in in
libpagekite
, a software library which I release under the Apache and AGPL licenses?Thanks!