Skip to content

Instantly share code, notes, and snippets.

@niklas-ourmachinery
Last active October 23, 2024 14:11
Show Gist options
  • Save niklas-ourmachinery/0c391d378a9d327867d46772c4d7eedc to your computer and use it in GitHub Desktop.
Save niklas-ourmachinery/0c391d378a9d327867d46772c4d7eedc to your computer and use it in GitHub Desktop.
Tool for patching static hash macros in source code
// hash can generate static hashes of strings or patch TM_STATIC_HASH("...", v)
// macros in source code with the correct hash values.
#include <foundation/carray.inl>
#include <foundation/git_ignore.h>
#include <foundation/log.h>
#include <foundation/murmurhash64a.inl>
#include <foundation/os.h>
#include <foundation/temp_allocator.h>
#include <inttypes.h>
#include <stdio.h>
#include <stdlib.h>
#if defined(TM_OS_WINDOWS)
#include <windows.h>
#endif
static void log_output_debug_string(struct tm_logger_o *inst, enum tm_log_type log_type, const char *msg)
{
#if defined(TM_OS_WINDOWS)
OutputDebugStringA(msg);
#endif
}
tm_logger_i output_debug_string_logger = {
0,
log_output_debug_string
};
int print_usage(int argc, char **argv)
{
tm_logger_api->print(TM_LOG_TYPE_INFO,
"Usage: hash [OPTION]... [FILE]...\n"
"Checks and fixes static strings in the FILEs. (Defaults to `.`).\n"
"\n"
" -e [STRING]...\n"
" When run with the -e argument, instead of checking files for static\n"
" hash strings, the command will compute and print the static hashes of\n"
" the strings passed as arguments.\n"
"\n"
" -h\n"
" --help\n"
" Display this help and exit.\n"
"\n"
"hash finds static hash macros in .c, .h and .inl file matching the pattern\n"
"\n"
" TM_STATIC_HASH(\"...\", ...)\n"
"\n"
"For every such macro, hash will compute the murmurhash of the string part\n"
"and check that it matches the number part. If not, the file will be patched\n"
"so that the hash is correct. For example, this:\n"
"\n"
" #define TM_CI_EDITOR_UI TM_STATIC_HASH(\"tm_ci_editor_ui_i\")\n"
"\n"
"Will be patched to:\n"
"\n"
" #define TM_CI_EDITOR_UI TM_STATIC_HASH(\"tm_ci_editor_ui_i\", 0xdd963167d23fc53aULL)\n"
"\n"
"If a directory is supplied, all *.c, *.h and *.inl files in the directory\n"
"(recursively) will be checked. The directory traversal respects .gitignore\n"
"files.\n"
"\n");
return 0;
}
void process_file(const char *file, uint64_t size)
{
// Read text
struct tm_os_file_io_api *io = tm_os_api->file_io;
char *text = (char *)tm_allocator_api->system->realloc(tm_allocator_api->system->inst, 0, 0, size + 1, __FILE__, __LINE__, 0);
tm_file_o f = io->open_input(file);
io->read(f, text, (uint32_t)size);
io->close(f);
text[size] = 0;
char buffer[1024];
bool patched = false;
uint64_t lineno = 1;
uint64_t o = 0;
while (o < size) {
if (text[o] == '\n') {
++o, ++lineno;
continue;
} else if (text[o] != 'T') {
++o;
continue;
}
if (o + 16 > size)
break;
// Find static hash pattern
if (memcmp(text + o, "TM_STATIC_HASH(\"", 16)) {
++o;
continue;
}
const char *macro_s = text + o;
const char *string_s = macro_s + 16;
const char *string_e = (char *)memchr(string_s, '"', size - o - 16);
if (!string_e || string_e - string_s > 512) {
++o;
continue;
}
const char *macro_e = (char *)memchr(string_e, ')', (size_t)(text + (int64_t)size - string_e)) + 1;
const uint32_t string_len = (uint32_t)(string_e - string_s);
const uint32_t macro_len = (uint32_t)(macro_e - macro_s);
const uint64_t hash = tm_murmur_hash_64a_inline(string_s, string_len, 0);
sprintf(buffer, "TM_STATIC_HASH(\"%.*s\", 0x%" PRIx64 "ULL)", string_len, string_s, hash);
const uint32_t buffer_len = (uint32_t)strlen(buffer);
if (o + buffer_len > size || 0 == memcmp(macro_s, buffer, buffer_len)) {
o += buffer_len;
continue;
}
// A bad hash has been found -- patch it.
patched = true;
tm_logger_api->printf(TM_LOG_TYPE_INFO, "%s:%d %s", file, lineno, buffer);
const int64_t extra = (int64_t)buffer_len - (int64_t)macro_len;
text = tm_allocator_api->system->realloc(tm_allocator_api->system->inst, text, size + 1, (uint64_t)((int64_t)size + extra + 1), __FILE__, __LINE__, 0);
memmove(text + o + extra, text + o, size + 1 - o);
memcpy(text + o, buffer, buffer_len);
size = (uint64_t)((int64_t)size + extra);
o += buffer_len;
}
if (patched) {
f = io->open_output(file, false);
io->write(f, text, (uint32_t)size);
io->close(f);
}
tm_allocator_api->system->realloc(tm_allocator_api->system->inst, text, size + 1, 0, __FILE__, __LINE__, 0);
}
int strendswith(const char *s, const char *e)
{
const uint64_t sn = strlen(s);
const uint64_t en = strlen(e);
return sn > en && 0 == strcmp(s + sn - en, e);
}
void process_file_or_dir(const char *file, const char *gitignore)
{
if (gitignore && tm_git_ignore_api->match(gitignore, file))
return;
struct tm_os_file_system_api *fs = tm_os_api->file_system;
tm_file_stat_t stat = fs->stat(file);
if (!stat.exists) {
tm_logger_api->printf(TM_LOG_TYPE_ERROR,
"trim-includes: cannot access '%s': No such file or directory\n\n", file);
exit(-1);
} else if (stat.is_directory) {
TM_INIT_TEMP_ALLOCATOR(ta);
if (gitignore == NULL) {
const char *gitignore_f = !strcmp(file, ".") ? ".gitignore" : tm_temp_allocator_api->printf(ta, "%s/.gitignore", file);
tm_file_stat_t gistat = fs->stat(gitignore_f);
if (gistat.exists) {
struct tm_os_file_io_api *io = tm_os_api->file_io;
char *data = (char *)ta->realloc(ta->inst, 0, 0, gistat.size + 1);
tm_file_o f = io->open_input(gitignore_f);
int32_t res = io->read(f, data, (uint32_t)gistat.size);
io->close(f);
data[gistat.size] = 0;
gitignore = res < 0 ? "" : data;
} else
gitignore = "";
}
tm_strings_t *entries = fs->directory_entries(file, ta);
char *s = (char *)entries + sizeof(tm_strings_t);
for (uint32_t i = 0; i < entries->count; ++i) {
char *sub = s;
s += strlen(s) + 1;
if (sub[0] == '.')
continue;
if (strcmp(file, ".") == 0)
process_file_or_dir(sub, gitignore);
else {
char *joined = tm_temp_allocator_api->printf(ta, "%s/%s", file, sub);
process_file_or_dir(joined, gitignore);
ta->realloc(ta->inst, joined, strlen(joined + 1), 0);
}
}
TM_SHUTDOWN_TEMP_ALLOCATOR(ta);
} else {
if (strendswith(file, ".c") || strendswith(file, ".h") || strendswith(file, ".inl"))
process_file(file, stat.size);
}
}
void process_string(const char *s)
{
const uint64_t hash = tm_murmur_hash_string_inline(s);
tm_logger_api->printf(TM_LOG_TYPE_INFO, "TM_STATIC_HASH(\"%s\", 0x%" PRIx64 "ULL)\n", s, hash);
}
int main(int argc, char **argv)
{
tm_logger_api->add_logger(tm_logger_api->printf_logger);
tm_logger_api->add_logger(&output_debug_string_logger);
bool action_print_usage = false;
bool action_compute_hashes = false;
bool in_file_list = false;
for (int i = 1; i < argc; ++i) {
if (in_file_list) {
if (action_compute_hashes)
process_string(argv[i]);
else
process_file_or_dir(argv[i], NULL);
} else if (strcmp(argv[i], "-h") == 0 || strcmp(argv[i], "--help") == 0) {
action_print_usage = true;
} else if (strcmp(argv[i], "-e")) {
action_compute_hashes = true;
} else if (strcmp(argv[i], "--") == 0) {
in_file_list = true;
} else if (argv[i][0] == '-') {
tm_logger_api->printf(TM_LOG_TYPE_ERROR,
"hash: unknown option -- %s\n"
"Try 'hash --help' for more information.\n",
argv[i]);
return -1;
} else {
in_file_list = true;
--i;
}
}
if (action_print_usage)
return print_usage(argc, argv);
else if (!in_file_list)
process_file_or_dir(".", NULL);
return 0;
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment