Last active
December 13, 2022 03:50
-
-
Save RealNeGate/397db4aaace43e0499dc8f7b429ccc17 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#include <stdlib.h> | |
#include <stdio.h> | |
#include <stdint.h> | |
#include <string.h> | |
#include <x86intrin.h> | |
static const char keywords[][16] = { | |
"auto", | |
"break", | |
"case", | |
"char", | |
"const", | |
"continue", | |
"default", | |
"do", | |
"double", | |
"else", | |
"enum", | |
"extern", | |
"float", | |
"for", | |
"goto", | |
"if", | |
"inline", | |
"int", | |
"long", | |
"register", | |
"restrict", | |
"return", | |
"short", | |
"signed", | |
"sizeof", | |
"static", | |
"struct", | |
"switch", | |
"typedef", | |
"union", | |
"unsigned", | |
"void", | |
"volatile", | |
"while", | |
"_Alignas", | |
"_Alignof", | |
"_Atomic", | |
"_Bool", | |
"_Complex", | |
"_Generic", | |
"_Imaginary", | |
"_Pragma", | |
"_Noreturn", | |
"_Static_assert", | |
"_Thread_local", | |
"_Typeof", | |
"_Vector", | |
"__asm__", | |
"__attribute__", | |
"__cdecl", | |
"__stdcall", | |
"__declspec" | |
}; | |
// This thing just prints to the console some source code you copy into the code | |
enum { num_keywords = sizeof(keywords) / sizeof(*keywords) }; | |
uint64_t signatures[num_keywords]; | |
void init(void) { | |
srand(0); | |
for (int i = 0; i < num_keywords; i++) { | |
const char *keyword = keywords[i]; | |
size_t keyword_len = strlen(keyword); | |
uint64_t signature = 0; | |
// The 7 high bytes of the signature are characters 0, 2, 4, etc. | |
for (int j = 0; j < keyword_len && j < 7; j++) { | |
signature = (uint8_t)keyword[j] + signature * 256; | |
} | |
if (strcmp(keyword, "void") == 0) { | |
printf("A: %lu\n", signature); | |
} | |
// The low byte of the signature is the length. | |
signature = keyword_len + signature * 256; | |
if (strcmp(keyword, "void") == 0) { | |
printf("B: %lu\n", signature); | |
} | |
for (int j = 0; j < i; j++) { | |
if (signatures[j] == signature) { | |
printf("Signature collision.\n"); | |
exit(1); | |
} | |
} | |
if (strcmp(keyword, "void") == 0) { | |
printf("C: %lu\n", signature); | |
} | |
signatures[i] = signature; | |
} | |
} | |
int check(uint64_t a, uint8_t max_collisions) { | |
uint8_t slot[256] = { 0 }; | |
for (int i = 0; i < num_keywords; i++) { | |
uint8_t hash = (uint8_t)((a * signatures[i]) >> 56); | |
if (slot[hash] > max_collisions) return 0; | |
slot[hash]++; | |
} | |
return 1; | |
} | |
uint64_t rand16(void) { | |
return rand(); | |
} | |
uint64_t rand64(void) { | |
return rand16() | (rand16() << 16ull) | (rand16() << 32ull) | (rand16() << 48ull); | |
} | |
int main(int argc, char **argv) { | |
init(); | |
uint64_t max_iterations = 1ull << 40; | |
uint8_t max_collisions = 0; | |
for (uint64_t i = 0; i < max_iterations; i++) { | |
uint64_t a = rand64(); | |
// a = 4847631451906204020 | |
if (check(a, max_collisions)) { | |
printf("a = %lu (%d keywords, %lu tries)\n", a, num_keywords, i); | |
printf("static const uint8_t values[256] = {\n"); | |
for (int i = 0; i < num_keywords;) { | |
int end = i + 4; | |
printf(" "); | |
for (; i < end && i < num_keywords; i++) { | |
uint8_t hash = (uint8_t)((a * signatures[i]) >> 56); | |
printf("[%d] = %d /* %s */, ", hash, i, keywords[i]); | |
} | |
printf("\n"); | |
} | |
printf("};\n"); | |
return 0; | |
} | |
} | |
printf("No hash function with max %d collisions found.\n", max_collisions); | |
return 1; | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment