Skip to content

Instantly share code, notes, and snippets.

@RealNeGate
Last active December 13, 2022 03:50
Show Gist options
  • Save RealNeGate/397db4aaace43e0499dc8f7b429ccc17 to your computer and use it in GitHub Desktop.
Save RealNeGate/397db4aaace43e0499dc8f7b429ccc17 to your computer and use it in GitHub Desktop.
#include <stdlib.h>
#include <stdio.h>
#include <stdint.h>
#include <string.h>
#include <x86intrin.h>
static const char keywords[][16] = {
"auto",
"break",
"case",
"char",
"const",
"continue",
"default",
"do",
"double",
"else",
"enum",
"extern",
"float",
"for",
"goto",
"if",
"inline",
"int",
"long",
"register",
"restrict",
"return",
"short",
"signed",
"sizeof",
"static",
"struct",
"switch",
"typedef",
"union",
"unsigned",
"void",
"volatile",
"while",
"_Alignas",
"_Alignof",
"_Atomic",
"_Bool",
"_Complex",
"_Generic",
"_Imaginary",
"_Pragma",
"_Noreturn",
"_Static_assert",
"_Thread_local",
"_Typeof",
"_Vector",
"__asm__",
"__attribute__",
"__cdecl",
"__stdcall",
"__declspec"
};
// This thing just prints to the console some source code you copy into the code
enum { num_keywords = sizeof(keywords) / sizeof(*keywords) };
uint64_t signatures[num_keywords];
void init(void) {
srand(0);
for (int i = 0; i < num_keywords; i++) {
const char *keyword = keywords[i];
size_t keyword_len = strlen(keyword);
uint64_t signature = 0;
// The 7 high bytes of the signature are characters 0, 2, 4, etc.
for (int j = 0; j < keyword_len && j < 7; j++) {
signature = (uint8_t)keyword[j] + signature * 256;
}
if (strcmp(keyword, "void") == 0) {
printf("A: %lu\n", signature);
}
// The low byte of the signature is the length.
signature = keyword_len + signature * 256;
if (strcmp(keyword, "void") == 0) {
printf("B: %lu\n", signature);
}
for (int j = 0; j < i; j++) {
if (signatures[j] == signature) {
printf("Signature collision.\n");
exit(1);
}
}
if (strcmp(keyword, "void") == 0) {
printf("C: %lu\n", signature);
}
signatures[i] = signature;
}
}
int check(uint64_t a, uint8_t max_collisions) {
uint8_t slot[256] = { 0 };
for (int i = 0; i < num_keywords; i++) {
uint8_t hash = (uint8_t)((a * signatures[i]) >> 56);
if (slot[hash] > max_collisions) return 0;
slot[hash]++;
}
return 1;
}
uint64_t rand16(void) {
return rand();
}
uint64_t rand64(void) {
return rand16() | (rand16() << 16ull) | (rand16() << 32ull) | (rand16() << 48ull);
}
int main(int argc, char **argv) {
init();
uint64_t max_iterations = 1ull << 40;
uint8_t max_collisions = 0;
for (uint64_t i = 0; i < max_iterations; i++) {
uint64_t a = rand64();
// a = 4847631451906204020
if (check(a, max_collisions)) {
printf("a = %lu (%d keywords, %lu tries)\n", a, num_keywords, i);
printf("static const uint8_t values[256] = {\n");
for (int i = 0; i < num_keywords;) {
int end = i + 4;
printf(" ");
for (; i < end && i < num_keywords; i++) {
uint8_t hash = (uint8_t)((a * signatures[i]) >> 56);
printf("[%d] = %d /* %s */, ", hash, i, keywords[i]);
}
printf("\n");
}
printf("};\n");
return 0;
}
}
printf("No hash function with max %d collisions found.\n", max_collisions);
return 1;
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment