Skip to content

Instantly share code, notes, and snippets.

@matovitch
Last active December 21, 2015 14:49
Show Gist options
  • Save matovitch/6322796 to your computer and use it in GitHub Desktop.
Save matovitch/6322796 to your computer and use it in GitHub Desktop.
Home made locality sensitive hashing
#ifndef MYLSH_H
#define MYLSH_H
#include <stdlib.h>
#include <stdint.h>
#include <limits.h>
static const uint8_t uint_shuf[256] = {0x7f,0x87,0x95,0x7e,0xf1,0x0c,0x2c,0xc5,
0x8f,0xba,0xf3,0xcd,0x09,0xa2,0x5b,0x8e,
0xd7,0xf0,0x39,0x85,0x3c,0xe7,0xd4,0x7a,
0xa9,0xb0,0xbf,0x08,0x84,0x23,0x4c,0x9d,
0x77,0x7d,0x6c,0x50,0x06,0xc2,0x68,0x40,
0x3d,0x6d,0x03,0x89,0xad,0x33,0x0e,0x99,
0x83,0x71,0x81,0x0b,0xdf,0xe6,0x2b,0xbe,
0x74,0x38,0x59,0x9c,0x5d,0xd6,0x75,0x4a,
0xe3,0xe2,0x73,0xd2,0xec,0x31,0x57,0xb4,
0x86,0xee,0x5e,0x4d,0xe0,0x69,0x6a,0x91,
0x35,0xb6,0x5f,0xa8,0xfe,0x8b,0x48,0x56,
0x76,0x5a,0x8a,0xa3,0xf9,0x94,0x32,0xfa,
0xfc,0x17,0x9a,0x8d,0xaa,0x98,0xf6,0x14,
0xac,0xa0,0x51,0x79,0xf2,0xc7,0xb2,0xbc,
0x82,0x2d,0x10,0xe8,0xf4,0x9f,0x19,0x6b,
0x54,0x20,0x12,0x01,0x27,0x37,0xc8,0xe5,
0xe4,0x67,0xdc,0x52,0x80,0xa6,0x53,0xa7,
0xf5,0x1a,0xf7,0x43,0x61,0xb5,0xcb,0xa5,
0x58,0x1f,0xed,0xb1,0xa4,0xfd,0xae,0x65,
0x16,0x24,0x9b,0x04,0xd9,0x4b,0x2f,0x4f,
0x90,0xdd,0x34,0x62,0x00,0x78,0x63,0xca,
0xd5,0xeb,0x70,0x92,0xcf,0x22,0xf8,0x1b,
0xc4,0x97,0xc9,0xdb,0x45,0x3a,0xd3,0xc1,
0x47,0x7b,0x26,0x4e,0x66,0x1c,0x28,0xb8,
0xbb,0x3f,0x0f,0xc6,0xe1,0x88,0xab,0x6e,
0x42,0xd0,0xa1,0x0d,0xd8,0x55,0xb3,0x13,
0x9e,0x29,0x8c,0x1e,0xce,0xb9,0xc3,0x44,
0xea,0xef,0x07,0x21,0x05,0x0a,0x1d,0x6f,
0xc0,0x3b,0x46,0xde,0x72,0x36,0x11,0xda,
0x93,0x30,0xff,0x2a,0x18,0xcc,0x3e,0x41,
0x25,0x15,0xfb,0x5c,0x2e,0x60,0xe9,0x49,
0xbd,0xb7,0x02,0x64,0x7c,0xaf,0xd1,0x96};
void mylsh(uint8_t* hash,
uint32_t hash_size,
uint8_t* data,
uint32_t data_size) {
uint32_t bit_size = hash_size * CHAR_BIT;
uint32_t circ_size = data_size;
if (circ_size < bit_size)
circ_size *= (bit_size / circ_size << 1);
uint32_t slic_size = circ_size / bit_size;
for (uint32_t i = 0; i < bit_size; i++) {
uint32_t avg = 0;
for (uint32_t j = 0; j < slic_size; j++)
avg += uint_shuf[data[(i * slic_size + j) % data_size]];
if (avg > slic_size * (1 << (CHAR_BIT - 1)))
hash[i/CHAR_BIT] += (1 << (i % CHAR_BIT));
}
}
#endif
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment