Skip to content

Instantly share code, notes, and snippets.

@mckelvin
Created August 31, 2012 02:18
Show Gist options
  • Save mckelvin/3547990 to your computer and use it in GitHub Desktop.
Save mckelvin/3547990 to your computer and use it in GitHub Desktop.
SimHash for chromaprint
#!/usr/env python
# -*- coding: UTF-8 -*-
__author__ = 'Myautsai Pan'
import sys
import numpy as np
class SimHash(object):
'''Implements of SimHash for chromaprint'''
def __init__(self, chromaprint=[], bits=32):
self.hash_bits = 32
self.value = self.cal_hash(chromaprint)
def cal_hash(self, chromaprint):
''' input a hashed array `chromaprint`, output `self.hash_bits` bit hash'''
Q = [0]*self.hash_bits
for fp_hash in chromaprint:
#TODO assert type(fp_hash) == int, "assert fingerprint type being integer."
for i in xrange(self.hash_bits):
r =(((fp_hash >> i) & 1) << 1) -1 # right started i'th(start from 0) bit * 2 -1
assert r == 1 or r == -1
Q[i] += r
sim_hash = 0
for i in xrange(self.hash_bits):
w = 1 if Q[i]>0 else 0
sim_hash |= w << i
return sim_hash
def __int__(self):
return int(self.value)
def __long__(self):
return long(self.value)
def __hash__(self):
return self.value
def __str__(self):
s = [str(self.value >> i & 1) for i in xrange(self.hash_bits)]
s.reverse()
return ''.join(s)
def __repr__(self):
return ' '.join(('<SimHash',self.__str__(),' (',str(self.__int__()),')>'))
def __call__(self):
return self.value
def main(argv):
a = SimHash([1,2,8,8,8,8,8])
a = SimHash([1,2,8,8,8,8,7])
print 'SimHash a:',repr(a)
return 0
if __name__ == '__main__':
sys.exit(main(sys.argv))
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment