Last active
August 5, 2025 23:38
-
-
Save jweinst1/d01a2636f06322d69bde39af2a161b5a to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#include <stdint.h> | |
#include <stddef.h> | |
#include <stdio.h> | |
#include <time.h> | |
#include <stdlib.h> | |
#include <string.h> | |
#include <limits.h> | |
#include <stdbool.h> | |
#include <unordered_map> | |
#include <vector> | |
static std::unordered_map<uint64_t, size_t> GLOBAL_HASH_COUNT; | |
static void addHash(uint64_t h) { | |
auto found = GLOBAL_HASH_COUNT.find(h); | |
if (found != GLOBAL_HASH_COUNT.end()) { | |
found->second += 1; | |
} else { | |
GLOBAL_HASH_COUNT.insert(std::make_pair(h, 1)); | |
} | |
} | |
static size_t printColls() { | |
size_t total = 0; | |
for (const auto& elem: GLOBAL_HASH_COUNT) { | |
if (elem.second > 1) { | |
printf("COLL: %llu = %zu\n", elem.first, elem.second); | |
total += (elem.second - 1); | |
} | |
} | |
printf("TOTAL COLL: %zu\n", total); | |
return total; | |
} | |
// Finalization mix - force all bits of a hash block to avalanche | |
static uint64_t fmix64(uint64_t k) { | |
k ^= k >> 33; | |
k *= 0xff51afd7ed558ccdULL; | |
k ^= k >> 33; | |
k *= 0xc4ceb9fe1a85ec53ULL; | |
k ^= k >> 33; | |
return k; | |
} | |
uint64_t murmur3_64(const void *key, size_t len, uint32_t seed) { | |
const uint8_t *data = (const uint8_t *)key; | |
const int nblocks = len / 16; | |
uint64_t h1 = seed; | |
uint64_t h2 = seed; | |
const uint64_t c1 = 0x87c37b91114253d5ULL; | |
const uint64_t c2 = 0x4cf5ad432745937fULL; | |
//---------- | |
// body | |
const uint64_t *blocks = (const uint64_t *)(data); | |
for (int i = 0; i < nblocks; i++) { | |
uint64_t k1 = blocks[i * 2 + 0]; | |
uint64_t k2 = blocks[i * 2 + 1]; | |
k1 *= c1; k1 = (k1 << 31) | (k1 >> (64 - 31)); k1 *= c2; h1 ^= k1; | |
h1 = (h1 << 27) | (h1 >> (64 - 27)); h1 += h2; h1 = h1 * 5 + 0x52dce729; | |
k2 *= c2; k2 = (k2 << 33) | (k2 >> (64 - 33)); k2 *= c1; h2 ^= k2; | |
h2 = (h2 << 31) | (h2 >> (64 - 31)); h2 += h1; h2 = h2 * 5 + 0x38495ab5; | |
} | |
//---------- | |
// tail | |
const uint8_t *tail = (const uint8_t *)(data + nblocks * 16); | |
uint64_t k1 = 0; | |
uint64_t k2 = 0; | |
switch (len & 15) { | |
case 15: k2 ^= ((uint64_t)tail[14]) << 48; | |
case 14: k2 ^= ((uint64_t)tail[13]) << 40; | |
case 13: k2 ^= ((uint64_t)tail[12]) << 32; | |
case 12: k2 ^= ((uint64_t)tail[11]) << 24; | |
case 11: k2 ^= ((uint64_t)tail[10]) << 16; | |
case 10: k2 ^= ((uint64_t)tail[ 9]) << 8; | |
case 9: k2 ^= ((uint64_t)tail[ 8]) << 0; | |
k2 *= c2; k2 = (k2 << 33) | (k2 >> (64 - 33)); k2 *= c1; h2 ^= k2; | |
case 8: k1 ^= ((uint64_t)tail[ 7]) << 56; | |
case 7: k1 ^= ((uint64_t)tail[ 6]) << 48; | |
case 6: k1 ^= ((uint64_t)tail[ 5]) << 40; | |
case 5: k1 ^= ((uint64_t)tail[ 4]) << 32; | |
case 4: k1 ^= ((uint64_t)tail[ 3]) << 24; | |
case 3: k1 ^= ((uint64_t)tail[ 2]) << 16; | |
case 2: k1 ^= ((uint64_t)tail[ 1]) << 8; | |
case 1: k1 ^= ((uint64_t)tail[ 0]) << 0; | |
k1 *= c1; k1 = (k1 << 31) | (k1 >> (64 - 31)); k1 *= c2; h1 ^= k1; | |
} | |
//---------- | |
// finalization | |
h1 ^= len; | |
h2 ^= len; | |
h1 += h2; | |
h2 += h1; | |
h1 = fmix64(h1); | |
h2 = fmix64(h2); | |
h1 += h2; | |
// Return 64-bit hash (can return h2 or h1 depending on needs) | |
return h1; | |
} | |
void create_rand_hashes(std::vector<uint64_t>& hashes, size_t count, size_t* modder) { | |
static const size_t input_size = 10; | |
char input_string[input_size + 1] = {'\0'}; | |
for (size_t i = 0; i < count; ++i) { | |
for (int j = 0; j < input_size; ++j) | |
{ | |
input_string[j] = rand() % 253; | |
} | |
if (modder != NULL) { | |
hashes.push_back(murmur3_64(input_string, input_size, input_size) % *modder); | |
} else { | |
hashes.push_back(murmur3_64(input_string, input_size, input_size)); | |
} | |
} | |
} | |
struct HashBlock { | |
size_t hash_count; | |
std::vector<uint64_t> hashes; | |
HashBlock() { | |
hash_count = 0; | |
} | |
explicit HashBlock(size_t count, size_t mod = 0): hash_count(count) { | |
create_rand_hashes(hashes, hash_count, mod == 0 ? NULL : &mod); | |
for (const auto& h : hashes) { | |
addHash(h); | |
} | |
} | |
}; | |
int main(int argc, char const *argv[]) | |
{ | |
srand(time(NULL)); | |
std::vector<HashBlock> blocks; | |
for (int i = 0; i < 100; ++i) | |
{ | |
HashBlock h(500, 1024 * 1024 * 10); | |
blocks.push_back(h); | |
} | |
printColls(); | |
return 0; | |
} |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#include <stdint.h> | |
#include <stddef.h> | |
#include <stdio.h> | |
#include <time.h> | |
#include <stdlib.h> | |
#include <string.h> | |
#include <limits.h> | |
#include <stdbool.h> | |
// Finalization mix - force all bits of a hash block to avalanche | |
static uint64_t fmix64(uint64_t k) { | |
k ^= k >> 33; | |
k *= 0xff51afd7ed558ccdULL; | |
k ^= k >> 33; | |
k *= 0xc4ceb9fe1a85ec53ULL; | |
k ^= k >> 33; | |
return k; | |
} | |
uint64_t murmur3_64(const void *key, size_t len, uint32_t seed) { | |
const uint8_t *data = (const uint8_t *)key; | |
const int nblocks = len / 16; | |
uint64_t h1 = seed; | |
uint64_t h2 = seed; | |
const uint64_t c1 = 0x87c37b91114253d5ULL; | |
const uint64_t c2 = 0x4cf5ad432745937fULL; | |
//---------- | |
// body | |
const uint64_t *blocks = (const uint64_t *)(data); | |
for (int i = 0; i < nblocks; i++) { | |
uint64_t k1 = blocks[i * 2 + 0]; | |
uint64_t k2 = blocks[i * 2 + 1]; | |
k1 *= c1; k1 = (k1 << 31) | (k1 >> (64 - 31)); k1 *= c2; h1 ^= k1; | |
h1 = (h1 << 27) | (h1 >> (64 - 27)); h1 += h2; h1 = h1 * 5 + 0x52dce729; | |
k2 *= c2; k2 = (k2 << 33) | (k2 >> (64 - 33)); k2 *= c1; h2 ^= k2; | |
h2 = (h2 << 31) | (h2 >> (64 - 31)); h2 += h1; h2 = h2 * 5 + 0x38495ab5; | |
} | |
//---------- | |
// tail | |
const uint8_t *tail = (const uint8_t *)(data + nblocks * 16); | |
uint64_t k1 = 0; | |
uint64_t k2 = 0; | |
switch (len & 15) { | |
case 15: k2 ^= ((uint64_t)tail[14]) << 48; | |
case 14: k2 ^= ((uint64_t)tail[13]) << 40; | |
case 13: k2 ^= ((uint64_t)tail[12]) << 32; | |
case 12: k2 ^= ((uint64_t)tail[11]) << 24; | |
case 11: k2 ^= ((uint64_t)tail[10]) << 16; | |
case 10: k2 ^= ((uint64_t)tail[ 9]) << 8; | |
case 9: k2 ^= ((uint64_t)tail[ 8]) << 0; | |
k2 *= c2; k2 = (k2 << 33) | (k2 >> (64 - 33)); k2 *= c1; h2 ^= k2; | |
case 8: k1 ^= ((uint64_t)tail[ 7]) << 56; | |
case 7: k1 ^= ((uint64_t)tail[ 6]) << 48; | |
case 6: k1 ^= ((uint64_t)tail[ 5]) << 40; | |
case 5: k1 ^= ((uint64_t)tail[ 4]) << 32; | |
case 4: k1 ^= ((uint64_t)tail[ 3]) << 24; | |
case 3: k1 ^= ((uint64_t)tail[ 2]) << 16; | |
case 2: k1 ^= ((uint64_t)tail[ 1]) << 8; | |
case 1: k1 ^= ((uint64_t)tail[ 0]) << 0; | |
k1 *= c1; k1 = (k1 << 31) | (k1 >> (64 - 31)); k1 *= c2; h1 ^= k1; | |
} | |
//---------- | |
// finalization | |
h1 ^= len; | |
h2 ^= len; | |
h1 += h2; | |
h2 += h1; | |
h1 = fmix64(h1); | |
h2 = fmix64(h2); | |
h1 += h2; | |
// Return 64-bit hash (can return h2 or h1 depending on needs) | |
return h1; | |
} | |
struct number_node { | |
uint64_t hash; | |
uint64_t block; | |
}; | |
struct number_part { | |
size_t size; | |
}; | |
uint64_t* create_rand_hashes(size_t count) { | |
uint64_t* hashes = malloc(count * sizeof(uint64_t)); | |
static const size_t input_size = 10; | |
char input_string[input_size + 1] = {'\0'}; | |
for (size_t i = 0; i < count; ++i) { | |
for (int j = 0; j < input_size; ++j) | |
{ | |
input_string[j] = rand() % 253; | |
} | |
hashes[i] = murmur3_64(input_string, input_size, input_size) % 9999999999; | |
} | |
return hashes; | |
} | |
bool find_min_max(const uint64_t *array, size_t size, uint64_t *min_out, uint64_t *max_out) { | |
if (size == 0 || array == NULL || min_out == NULL || max_out == NULL) { | |
return false; // Invalid input | |
} | |
uint64_t min = UINT64_MAX; | |
uint64_t max = 0; | |
for (size_t i = 0; i < size; ++i) { | |
if (array[i] < min) min = array[i]; | |
if (array[i] > max) max = array[i]; | |
} | |
*min_out = min; | |
*max_out = max; | |
return true; | |
} | |
struct num_range { | |
uint64_t min; | |
uint64_t max; | |
}; | |
static inline uint64_t min_u64(uint64_t a, uint64_t b) { | |
return a < b ? a : b; | |
} | |
static inline uint64_t max_u64(uint64_t a, uint64_t b) { | |
return a > b ? a : b; | |
} | |
uint64_t line_overlap_amount(uint64_t a1, uint64_t a2, uint64_t b1, uint64_t b2) { | |
// Normalize segments | |
if (a1 > a2) { | |
uint64_t tmp = a1; a1 = a2; a2 = tmp; | |
} | |
if (b1 > b2) { | |
uint64_t tmp = b1; b1 = b2; b2 = tmp; | |
} | |
uint64_t start = max_u64(a1, b1); | |
uint64_t end = min_u64(a2, b2); | |
if (start >= end) { | |
return 0; // No overlap | |
} | |
return end - start; | |
} | |
int main(int argc, char const *argv[]) | |
{ | |
srand(time(NULL)); | |
static const size_t repeats = 20; | |
struct num_range* rngs = malloc(repeats * sizeof(struct num_range)); | |
for (int k = 0; k < repeats; ++k) | |
{ | |
uint64_t* hs = create_rand_hashes(50); | |
uint64_t mini = 0; | |
uint64_t maxi = 0; | |
find_min_max(hs, 50, &mini, &maxi); | |
printf("MIN: %llu , MAX: %llu RNG: %llu FACT: %llu\n", mini, maxi, maxi - mini, (maxi - mini) / 64000); | |
rngs[k].min = mini; | |
rngs[k].max = maxi; | |
free(hs); | |
} | |
for (int i = 0; i < repeats; ++i) | |
{ | |
for (int j = i + 1; j < repeats; ++j) | |
{ | |
printf("Pos1: %d Pos2: %d, Overlap: %llu\n", i, j, line_overlap_amount(rngs[i].min, rngs[i].max, rngs[j].min, rngs[j].max)); | |
} | |
} | |
free(rngs); | |
return 0; | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment