Last active
October 23, 2024 14:11
-
-
Save niklas-ourmachinery/0c391d378a9d327867d46772c4d7eedc to your computer and use it in GitHub Desktop.
Tool for patching static hash macros in source code
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
// hash can generate static hashes of strings or patch TM_STATIC_HASH("...", v) | |
// macros in source code with the correct hash values. | |
#include <foundation/carray.inl> | |
#include <foundation/git_ignore.h> | |
#include <foundation/log.h> | |
#include <foundation/murmurhash64a.inl> | |
#include <foundation/os.h> | |
#include <foundation/temp_allocator.h> | |
#include <inttypes.h> | |
#include <stdio.h> | |
#include <stdlib.h> | |
#if defined(TM_OS_WINDOWS) | |
#include <windows.h> | |
#endif | |
static void log_output_debug_string(struct tm_logger_o *inst, enum tm_log_type log_type, const char *msg) | |
{ | |
#if defined(TM_OS_WINDOWS) | |
OutputDebugStringA(msg); | |
#endif | |
} | |
tm_logger_i output_debug_string_logger = { | |
0, | |
log_output_debug_string | |
}; | |
int print_usage(int argc, char **argv) | |
{ | |
tm_logger_api->print(TM_LOG_TYPE_INFO, | |
"Usage: hash [OPTION]... [FILE]...\n" | |
"Checks and fixes static strings in the FILEs. (Defaults to `.`).\n" | |
"\n" | |
" -e [STRING]...\n" | |
" When run with the -e argument, instead of checking files for static\n" | |
" hash strings, the command will compute and print the static hashes of\n" | |
" the strings passed as arguments.\n" | |
"\n" | |
" -h\n" | |
" --help\n" | |
" Display this help and exit.\n" | |
"\n" | |
"hash finds static hash macros in .c, .h and .inl file matching the pattern\n" | |
"\n" | |
" TM_STATIC_HASH(\"...\", ...)\n" | |
"\n" | |
"For every such macro, hash will compute the murmurhash of the string part\n" | |
"and check that it matches the number part. If not, the file will be patched\n" | |
"so that the hash is correct. For example, this:\n" | |
"\n" | |
" #define TM_CI_EDITOR_UI TM_STATIC_HASH(\"tm_ci_editor_ui_i\")\n" | |
"\n" | |
"Will be patched to:\n" | |
"\n" | |
" #define TM_CI_EDITOR_UI TM_STATIC_HASH(\"tm_ci_editor_ui_i\", 0xdd963167d23fc53aULL)\n" | |
"\n" | |
"If a directory is supplied, all *.c, *.h and *.inl files in the directory\n" | |
"(recursively) will be checked. The directory traversal respects .gitignore\n" | |
"files.\n" | |
"\n"); | |
return 0; | |
} | |
void process_file(const char *file, uint64_t size) | |
{ | |
// Read text | |
struct tm_os_file_io_api *io = tm_os_api->file_io; | |
char *text = (char *)tm_allocator_api->system->realloc(tm_allocator_api->system->inst, 0, 0, size + 1, __FILE__, __LINE__, 0); | |
tm_file_o f = io->open_input(file); | |
io->read(f, text, (uint32_t)size); | |
io->close(f); | |
text[size] = 0; | |
char buffer[1024]; | |
bool patched = false; | |
uint64_t lineno = 1; | |
uint64_t o = 0; | |
while (o < size) { | |
if (text[o] == '\n') { | |
++o, ++lineno; | |
continue; | |
} else if (text[o] != 'T') { | |
++o; | |
continue; | |
} | |
if (o + 16 > size) | |
break; | |
// Find static hash pattern | |
if (memcmp(text + o, "TM_STATIC_HASH(\"", 16)) { | |
++o; | |
continue; | |
} | |
const char *macro_s = text + o; | |
const char *string_s = macro_s + 16; | |
const char *string_e = (char *)memchr(string_s, '"', size - o - 16); | |
if (!string_e || string_e - string_s > 512) { | |
++o; | |
continue; | |
} | |
const char *macro_e = (char *)memchr(string_e, ')', (size_t)(text + (int64_t)size - string_e)) + 1; | |
const uint32_t string_len = (uint32_t)(string_e - string_s); | |
const uint32_t macro_len = (uint32_t)(macro_e - macro_s); | |
const uint64_t hash = tm_murmur_hash_64a_inline(string_s, string_len, 0); | |
sprintf(buffer, "TM_STATIC_HASH(\"%.*s\", 0x%" PRIx64 "ULL)", string_len, string_s, hash); | |
const uint32_t buffer_len = (uint32_t)strlen(buffer); | |
if (o + buffer_len > size || 0 == memcmp(macro_s, buffer, buffer_len)) { | |
o += buffer_len; | |
continue; | |
} | |
// A bad hash has been found -- patch it. | |
patched = true; | |
tm_logger_api->printf(TM_LOG_TYPE_INFO, "%s:%d %s", file, lineno, buffer); | |
const int64_t extra = (int64_t)buffer_len - (int64_t)macro_len; | |
text = tm_allocator_api->system->realloc(tm_allocator_api->system->inst, text, size + 1, (uint64_t)((int64_t)size + extra + 1), __FILE__, __LINE__, 0); | |
memmove(text + o + extra, text + o, size + 1 - o); | |
memcpy(text + o, buffer, buffer_len); | |
size = (uint64_t)((int64_t)size + extra); | |
o += buffer_len; | |
} | |
if (patched) { | |
f = io->open_output(file, false); | |
io->write(f, text, (uint32_t)size); | |
io->close(f); | |
} | |
tm_allocator_api->system->realloc(tm_allocator_api->system->inst, text, size + 1, 0, __FILE__, __LINE__, 0); | |
} | |
int strendswith(const char *s, const char *e) | |
{ | |
const uint64_t sn = strlen(s); | |
const uint64_t en = strlen(e); | |
return sn > en && 0 == strcmp(s + sn - en, e); | |
} | |
void process_file_or_dir(const char *file, const char *gitignore) | |
{ | |
if (gitignore && tm_git_ignore_api->match(gitignore, file)) | |
return; | |
struct tm_os_file_system_api *fs = tm_os_api->file_system; | |
tm_file_stat_t stat = fs->stat(file); | |
if (!stat.exists) { | |
tm_logger_api->printf(TM_LOG_TYPE_ERROR, | |
"trim-includes: cannot access '%s': No such file or directory\n\n", file); | |
exit(-1); | |
} else if (stat.is_directory) { | |
TM_INIT_TEMP_ALLOCATOR(ta); | |
if (gitignore == NULL) { | |
const char *gitignore_f = !strcmp(file, ".") ? ".gitignore" : tm_temp_allocator_api->printf(ta, "%s/.gitignore", file); | |
tm_file_stat_t gistat = fs->stat(gitignore_f); | |
if (gistat.exists) { | |
struct tm_os_file_io_api *io = tm_os_api->file_io; | |
char *data = (char *)ta->realloc(ta->inst, 0, 0, gistat.size + 1); | |
tm_file_o f = io->open_input(gitignore_f); | |
int32_t res = io->read(f, data, (uint32_t)gistat.size); | |
io->close(f); | |
data[gistat.size] = 0; | |
gitignore = res < 0 ? "" : data; | |
} else | |
gitignore = ""; | |
} | |
tm_strings_t *entries = fs->directory_entries(file, ta); | |
char *s = (char *)entries + sizeof(tm_strings_t); | |
for (uint32_t i = 0; i < entries->count; ++i) { | |
char *sub = s; | |
s += strlen(s) + 1; | |
if (sub[0] == '.') | |
continue; | |
if (strcmp(file, ".") == 0) | |
process_file_or_dir(sub, gitignore); | |
else { | |
char *joined = tm_temp_allocator_api->printf(ta, "%s/%s", file, sub); | |
process_file_or_dir(joined, gitignore); | |
ta->realloc(ta->inst, joined, strlen(joined + 1), 0); | |
} | |
} | |
TM_SHUTDOWN_TEMP_ALLOCATOR(ta); | |
} else { | |
if (strendswith(file, ".c") || strendswith(file, ".h") || strendswith(file, ".inl")) | |
process_file(file, stat.size); | |
} | |
} | |
void process_string(const char *s) | |
{ | |
const uint64_t hash = tm_murmur_hash_string_inline(s); | |
tm_logger_api->printf(TM_LOG_TYPE_INFO, "TM_STATIC_HASH(\"%s\", 0x%" PRIx64 "ULL)\n", s, hash); | |
} | |
int main(int argc, char **argv) | |
{ | |
tm_logger_api->add_logger(tm_logger_api->printf_logger); | |
tm_logger_api->add_logger(&output_debug_string_logger); | |
bool action_print_usage = false; | |
bool action_compute_hashes = false; | |
bool in_file_list = false; | |
for (int i = 1; i < argc; ++i) { | |
if (in_file_list) { | |
if (action_compute_hashes) | |
process_string(argv[i]); | |
else | |
process_file_or_dir(argv[i], NULL); | |
} else if (strcmp(argv[i], "-h") == 0 || strcmp(argv[i], "--help") == 0) { | |
action_print_usage = true; | |
} else if (strcmp(argv[i], "-e")) { | |
action_compute_hashes = true; | |
} else if (strcmp(argv[i], "--") == 0) { | |
in_file_list = true; | |
} else if (argv[i][0] == '-') { | |
tm_logger_api->printf(TM_LOG_TYPE_ERROR, | |
"hash: unknown option -- %s\n" | |
"Try 'hash --help' for more information.\n", | |
argv[i]); | |
return -1; | |
} else { | |
in_file_list = true; | |
--i; | |
} | |
} | |
if (action_print_usage) | |
return print_usage(argc, argv); | |
else if (!in_file_list) | |
process_file_or_dir(".", NULL); | |
return 0; | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment