Last active
October 7, 2024 22:00
-
-
Save cpq/4714740 to your computer and use it in GitHub Desktop.
How to embed data files into C/C++ executable
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
// Copyright (c) Sergey Lyubka, 2013. | |
// All rights reserved. | |
// Released under the MIT license. | |
// This program is used to embed arbitrary data into a C binary. It takes | |
// a list of files as an input, and produces a .c data file that contains | |
// contents of all these files as collection of char arrays. | |
// Usage: | |
// 1. Compile this file: | |
// cc -o embed embed.c | |
// | |
// 2. Convert list of files into single .c: | |
// ./embed file1.data file2.data > embedded_data.c | |
// | |
// 3. In your application code, you can access files using this function: | |
// | |
// const char *find_embedded_file(const char *file_name, size_t *size); | |
// size_t size; | |
// const char *data = find_embedded_file("file1.data", &size); | |
// | |
// 4. Build your app with embedded_data.c: | |
// cc -o my_app my_app.c embedded_data.c | |
#include <stdio.h> | |
#include <stdlib.h> | |
static const char *code = | |
"const char *find_embedded_file(const char *name, size_t *size) {\n" | |
" const struct embedded_file *p;\n" | |
" for (p = embedded_files; p->name != NULL; p++) {\n" | |
" if (!strcmp(p->name, name)) {\n" | |
" if (size != NULL) { *size = p->size; }\n" | |
" return (const char *) p->data;\n" | |
" }\n" | |
" }\n" | |
" return NULL;\n" | |
"}\n"; | |
int main(int argc, char *argv[]) { | |
FILE *fp; | |
int i, j, ch; | |
for (i = 1; i < argc; i++) { | |
if ((fp = fopen(argv[i], "rb")) == NULL) { | |
exit(EXIT_FAILURE); | |
} else { | |
printf("static const unsigned char v%d[] = {", i); | |
for (j = 0; (ch = fgetc(fp)) != EOF; j++) { | |
if ((j % 12) == 0) { | |
printf("%s", "\n "); | |
} | |
printf(" %#04x,", ch); | |
} | |
// Append zero byte at the end, to make text files appear in memory | |
// as nul-terminated strings. | |
printf("%s", " 0x00\n};\n"); | |
fclose(fp); | |
} | |
} | |
printf("%s", "\nconst struct embedded_file {\n"); | |
printf("%s", " const char *name;\n"); | |
printf("%s", " const unsigned char *data;\n"); | |
printf("%s", " size_t size;\n"); | |
printf("%s", "} embedded_files[] = {\n"); | |
for (i = 1; i < argc; i++) { | |
printf(" {\"%s\", v%d, sizeof(v%d) - 1},\n", argv[i], i, i); | |
} | |
printf("%s", " {NULL, NULL, 0}\n"); | |
printf("%s", "};\n\n"); | |
printf("%s", code); | |
return EXIT_SUCCESS; | |
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# Copyright (c) Sergey Lyubka, 2013. | |
# All rights reserved. | |
# Released under the MIT license. | |
# | |
# Same functionality as C code above, but in Perl. | |
# | |
# This program is used to embed arbitrary data into a C binary. It takes | |
# a list of files as an input, and produces a .c data file that contains | |
# contents of all these files as collection of char arrays. | |
# | |
# Usage: perl <this_file> <file1> [file2, ...] > embedded_data.c | |
foreach my $i (0 .. $#ARGV) { | |
open FD, '<:raw', $ARGV[$i] or die "Cannot open $ARGV[$i]: $!\n"; | |
printf("static const unsigned char v%d[] = {", $i); | |
my $byte; | |
my $j = 0; | |
while (read(FD, $byte, 1)) { | |
if (($j % 12) == 0) { | |
print "\n"; | |
} | |
printf ' %#04x,', ord($byte); | |
$j++; | |
} | |
print " 0x00\n};\n"; | |
close FD; | |
} | |
print <<EOS; | |
#include <stddef.h> | |
#include <string.h> | |
static const struct embedded_file { | |
const char *name; | |
const unsigned char *data; | |
size_t size; | |
} embedded_files[] = { | |
EOS | |
foreach my $i (0 .. $#ARGV) { | |
print " {\"$ARGV[$i]\", v$i, sizeof(v$i) - 1},\n"; | |
} | |
print <<EOS; | |
{NULL, NULL, 0} | |
}; | |
const char *find_embedded_file(const char *name, size_t *size) { | |
const struct embedded_file *p; | |
for (p = embedded_files; p->name != NULL; p++) { | |
if (!strcmp(p->name, name)) { | |
if (size != NULL) { *size = p->size; } | |
return (const char *) p->data; | |
} | |
} | |
return NULL; | |
} | |
EOS |
@cpq sorry to bother you but I was just wondering how much bigger the data.c output file would be compared to the original binary?
I'm running an ~480 MB zipped archive through and getting a c file that is greater than 2.6 GB. Is this a problem, or is that actually what is supposed to happen?
@pete-the-dev that's about right. Each byte gets transformed into , 0xYY
string which is 6 characters, so expect a ~6x size increase.
That's a .c file though, remember that when it gets compiled, file data will occupy the original space in the binary, not 6x.
Thanks!
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
@anonneo If I'm not too late, all you need to do is loop over the file string in your C program and put it in to a FILE pointer.
Sorry about the formatting, markdown code styling is new to me.