Skip to content

Instantly share code, notes, and snippets.

@DBJDBJ
Created May 15, 2019 08:36
Show Gist options
  • Save DBJDBJ/e15c3817340fbf091427c9e25864f82b to your computer and use it in GitHub Desktop.
Save DBJDBJ/e15c3817340fbf091427c9e25864f82b to your computer and use it in GitHub Desktop.
// 2019=05-13 [email protected]
//
// clang++ prog.cc -Wall -Wextra -std=c++17
//
#include <iostream>
#include <string_view>
#include <type_traits>
using namespace std;
using namespace std::string_view_literals ;
// based on http://www.cse.yorku.ca/~oz/hash.html
// djb2
template<typename CHR_>
constexpr size_t DJB2(CHR_ *str)
{
unsigned long hash = 5381;
int c{};
while ( !! (c = *str++))
hash = ((hash << 5) + hash) + c;
return hash;
}
/*
https://en.wikipedia.org/wiki/Fowler–Noll–Vo_hash_function#FNV-1_hash
hash = FNV_offset_basis
for each byte_of_data to be hashed
hash = hash × FNV_prime
hash = hash XOR byte_of_data
return hash
*/
template<typename CHR>
constexpr auto TFNV1( CHR * charr )
{
#define EX_FNV_1_PRIME (1099511628211ull)
#define EX_FNV_1_RANGE (0xffffffffffffffffull)
using CH_ = typename std::remove_cv<CHR>::type ;
uint64_t hash { EX_FNV_1_RANGE };
CH_ && byte_of_data = CH_ {} ;
while ( !! ( byte_of_data = CH_(*charr++)) ) {
hash = hash * EX_FNV_1_PRIME;
hash = hash xor byte_of_data;
}
return hash;
#undef EX_FNV_1_PRIME
#undef EX_FNV_1_RANGE
}
// these two will be utf8 encoded each
// they contains chars inside the ascii range
constexpr auto n_english = "English"sv;
constexpr auto w_english = L"English"sv;
// these two will be utf8 encoded each
// they contains chars beyond the ascii range
constexpr auto specimen_a = "한글"sv;
constexpr auto specimen_w = L"한글"sv;
constexpr auto hash_1 = DJB2(n_english.data());
constexpr auto hash_2 = DJB2(w_english.data()) ;
// for UTF8 + ASCII chars this works always
// one can meaningfully compare wide and narrow strings
static_assert (hash_1 == hash_2) ;
int main()
{
// for UTF8 + IDEOGRAMS hash are always different of course
// one can not meaningfully compare wide and narrow utf8 strings
// that contain non ascii chars
cout << boolalpha
<< endl << TFNV1( specimen_a.data() )
<< endl << TFNV1( specimen_w.data() ) ;
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment