Created
May 15, 2019 08:36
-
-
Save DBJDBJ/e15c3817340fbf091427c9e25864f82b to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
// 2019=05-13 [email protected] | |
// | |
// clang++ prog.cc -Wall -Wextra -std=c++17 | |
// | |
#include <iostream> | |
#include <string_view> | |
#include <type_traits> | |
using namespace std; | |
using namespace std::string_view_literals ; | |
// based on http://www.cse.yorku.ca/~oz/hash.html | |
// djb2 | |
template<typename CHR_> | |
constexpr size_t DJB2(CHR_ *str) | |
{ | |
unsigned long hash = 5381; | |
int c{}; | |
while ( !! (c = *str++)) | |
hash = ((hash << 5) + hash) + c; | |
return hash; | |
} | |
/* | |
https://en.wikipedia.org/wiki/Fowler–Noll–Vo_hash_function#FNV-1_hash | |
hash = FNV_offset_basis | |
for each byte_of_data to be hashed | |
hash = hash × FNV_prime | |
hash = hash XOR byte_of_data | |
return hash | |
*/ | |
template<typename CHR> | |
constexpr auto TFNV1( CHR * charr ) | |
{ | |
#define EX_FNV_1_PRIME (1099511628211ull) | |
#define EX_FNV_1_RANGE (0xffffffffffffffffull) | |
using CH_ = typename std::remove_cv<CHR>::type ; | |
uint64_t hash { EX_FNV_1_RANGE }; | |
CH_ && byte_of_data = CH_ {} ; | |
while ( !! ( byte_of_data = CH_(*charr++)) ) { | |
hash = hash * EX_FNV_1_PRIME; | |
hash = hash xor byte_of_data; | |
} | |
return hash; | |
#undef EX_FNV_1_PRIME | |
#undef EX_FNV_1_RANGE | |
} | |
// these two will be utf8 encoded each | |
// they contains chars inside the ascii range | |
constexpr auto n_english = "English"sv; | |
constexpr auto w_english = L"English"sv; | |
// these two will be utf8 encoded each | |
// they contains chars beyond the ascii range | |
constexpr auto specimen_a = "한글"sv; | |
constexpr auto specimen_w = L"한글"sv; | |
constexpr auto hash_1 = DJB2(n_english.data()); | |
constexpr auto hash_2 = DJB2(w_english.data()) ; | |
// for UTF8 + ASCII chars this works always | |
// one can meaningfully compare wide and narrow strings | |
static_assert (hash_1 == hash_2) ; | |
int main() | |
{ | |
// for UTF8 + IDEOGRAMS hash are always different of course | |
// one can not meaningfully compare wide and narrow utf8 strings | |
// that contain non ascii chars | |
cout << boolalpha | |
<< endl << TFNV1( specimen_a.data() ) | |
<< endl << TFNV1( specimen_w.data() ) ; | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment