Last active
September 26, 2025 00:26
-
-
Save jweinst1/49ea69d5dfd81a9bf5d01a8ff3625ee8 to your computer and use it in GitHub Desktop.
closest sub mask in a mash set
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| >>> bin(0b10100 & ~0b1111) | |
| '0b10000' | |
| >>> bin(0b11000 & 0b1111) | |
| '0b1000' | |
| >>> bin(0b11000 & ~0b1111) | |
| '0b10000' | |
| >>> bin(0b01000 & ~0b1111) | |
| '0b0' | |
| >>> bin(0b11000 & ~0b1111) | |
| '0b10000' | |
| >>> bin(0b11000 & 0b1111) | |
| '0b1000' | |
| >>> bin((0b11000 & ~0b1111) & (0b11000 & 0b1111)) | |
| '0b0' | |
| >>> bin((0b11000 & ~0b1111) & (0b11000 & 0b1111)) | |
| '0b0' |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| #include <iostream> | |
| #include <bitset> | |
| #include <array> | |
| std::array<uint8_t, 2> closest(uint8_t set, uint8_t key) { | |
| int len = 32 - __builtin_clz(key); | |
| // must consider both 1 bit higher and lower | |
| uint8_t ent_lo = ((1 << len) - 1) & set; | |
| uint8_t ent_hi = (1 << len) & set; | |
| return {ent_lo, ent_hi}; | |
| } | |
| int main(int argc, char const *argv[]) | |
| { | |
| const auto found = closest(0b10110101, 0b11001); | |
| std::cout << std::bitset<8>(found[0]) << "\n"; | |
| std::cout << std::bitset<8>(found[1]) << "\n"; | |
| //00010101 | |
| //00100000 | |
| return 0; | |
| } |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| #include <iostream> | |
| #include <bitset> | |
| #include <array> | |
| std::array<uint8_t, 2> closest(uint8_t set, uint8_t key) { | |
| int len = 32 - __builtin_clz(key); | |
| uint8_t ent_lo = ((1 << len) - 1) & set; | |
| uint8_t ent_hi = (1 << len) & set; | |
| return {ent_lo, ent_hi}; | |
| } | |
| // Grabs only the first bit of each number so that the distance is preserved | |
| // 1 -> above 127 | |
| // 0 -> below 127 | |
| uint8_t arr_to_byte(const std::array<uint8_t, 8>& arr) { | |
| return ((arr[0] >> 7) << 7) | | |
| ((arr[1] >> 7) << 6) | | |
| ((arr[2] >> 7) << 5) | | |
| ((arr[3] >> 7) << 4) | | |
| ((arr[4] >> 7) << 3) | | |
| ((arr[5] >> 7) << 2) | | |
| ((arr[6] >> 7) << 1) | | |
| ((arr[7] >> 7) << 0); | |
| } | |
| size_t euc_dist(const std::array<uint8_t, 8>& x, const std::array<uint8_t, 8>& y) { | |
| size_t total = 0; | |
| for (int i = 0; i < x.size(); ++i) | |
| { | |
| total += ((x[i] - y[i]) * (x[i] - y[i])); | |
| } | |
| return total; | |
| } | |
| /** | |
| * This discretizes the vector further into a byte that can be used to index the partition of the mash set. | |
| * This discretization retains some entropy, but does not distort distance in euclidean space | |
| * */ | |
| int main(int argc, char const *argv[]) | |
| { | |
| const auto found = closest(0b10110101, 0b11001); | |
| std::cout << std::bitset<8>(found[0]) << "\n"; | |
| std::cout << std::bitset<8>(found[1]) << "\n"; | |
| const std::array<uint8_t, 8> arr = {221, 45, 129, 170, 48, 58, 92, 240}; | |
| const std::array<uint8_t, 8> arr2 = {121, 145, 129, 70, 97, 18, 32, 210}; | |
| const uint8_t arr1Val = arr_to_byte(arr); | |
| const uint8_t arr2Val = arr_to_byte(arr2); | |
| std::cout << std::bitset<8>(arr_to_byte(arr)) << "\n"; | |
| std::cout << std::bitset<8>(arr_to_byte(arr2)) << "\n"; | |
| std::cout << (size_t)((arr2Val - arr1Val) * (arr2Val - arr1Val)) << "\n"; | |
| std::cout << euc_dist(arr, arr2) << "\n"; | |
| const std::array<uint8_t, 8> arr3 = {21, 45, 129, 170, 48, 58, 82, 240}; | |
| const std::array<uint8_t, 8> arr4 = {121, 145, 29, 70, 197, 19, 32, 210}; | |
| const uint8_t arr3Val = arr_to_byte(arr3); | |
| const uint8_t arr4Val = arr_to_byte(arr4); | |
| std::cout << std::bitset<8>(arr_to_byte(arr3)) << "\n"; | |
| std::cout << std::bitset<8>(arr_to_byte(arr4)) << "\n"; | |
| std::cout << (size_t)((arr4Val - arr3Val) * (arr4Val - arr3Val)) << "\n"; | |
| std::cout << euc_dist(arr3, arr4) << "\n"; | |
| return 0; | |
| } |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| >>> 0b1100 & 0b10100 | |
| 4 | |
| >>> (0b1100 & 0b10100) ^ 0b1100 | |
| 8 | |
| >>> (0b1100 & 0b10100) ^ 0b11100 | |
| 24 | |
| >>> (0b1100 & 0b10100) ^ 0b1100 | |
| 8 | |
| >>> (0b1100 & 0b11100) ^ 0b1100 | |
| 0 | |
| >>> (0b1100 & 0b11100) ^ ~0b1100 | |
| -1 | |
| >>> (0b1100 & 0b11100) ^ 0b11110011 | |
| 255 |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment