Last active
February 15, 2024 16:54
-
-
Save pervognsen/dffdb7deb202ac051166ff6bc06ff96c to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
// Length-segregated string tables for length < 16. You use a separate overflow table for length >= 16. | |
// By segregating like this you can pack the string data in the table itself tightly without any padding. The datapath | |
// is uniform and efficient for all lengths < 16 by using unaligned 16-byte SIMD loads/compares and masking off the length prefix. | |
// One of the benefits of packing string data tightly for each length table is that you can afford to reduce the load factor | |
// on shorter length tables without hurting space utilization too much. This can push hole-in-one rates into the 95% range without | |
// too much of a negative impact on cache utilization. | |
// Since get() takes a vector register as an argument with the key, you want to shape the upstream code so the string to be queried | |
// is naturally in a vector. For example, in an optimized identifier lexer you should already have a SIMD fast path for length < 16 | |
// so you can scan the identifier, compute the hash and then call get(), all working with the same vector register. The length >= 16 | |
// fallback path in the lexer would then call into the overflow table with a string pointer instead of a vector. | |
typedef __m128i Chars; | |
#define EMPTY _mm_set1_epi8(0) | |
Chars load(const char *src) { | |
return _mm_loadu_epi8(src); | |
} | |
bool equal(Chars lhs, Chars rhs, uint32_t len) { | |
return (_mm_movemask_epi8(_mm_cmpeq_epi8(lhs, rhs)) & ((1 << len) - 1)) == (1 << len) - 1; | |
} | |
struct ShortTable { | |
uint32_t mask; | |
char *keys; | |
}; | |
int get_short(ShortTable *table, Chars key, uint32_t len, uint32_t hash) { | |
uint32_t i = hash & table->mask, d = 1; | |
for (;;) { | |
Chars k = load(table->keys + i * len); | |
if (equal(k, key, len)) return i; | |
if (equal(k, EMPTY, len)) return -1; | |
i = (i + d++) & table->mask; | |
} | |
} | |
struct Table { | |
ShortTable tables[15]; | |
}; | |
int get(Table *table, Chars key, uint32_t len, uint32_t hash) { | |
assert(0 < len && len < 16); | |
return get_short(&table->tables[len - 1], key, len, hash); | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment