Created
August 31, 2012 02:18
-
-
Save mckelvin/3547990 to your computer and use it in GitHub Desktop.
SimHash for chromaprint
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/env python | |
# -*- coding: UTF-8 -*- | |
__author__ = 'Myautsai Pan' | |
import sys | |
import numpy as np | |
class SimHash(object): | |
'''Implements of SimHash for chromaprint''' | |
def __init__(self, chromaprint=[], bits=32): | |
self.hash_bits = 32 | |
self.value = self.cal_hash(chromaprint) | |
def cal_hash(self, chromaprint): | |
''' input a hashed array `chromaprint`, output `self.hash_bits` bit hash''' | |
Q = [0]*self.hash_bits | |
for fp_hash in chromaprint: | |
#TODO assert type(fp_hash) == int, "assert fingerprint type being integer." | |
for i in xrange(self.hash_bits): | |
r =(((fp_hash >> i) & 1) << 1) -1 # right started i'th(start from 0) bit * 2 -1 | |
assert r == 1 or r == -1 | |
Q[i] += r | |
sim_hash = 0 | |
for i in xrange(self.hash_bits): | |
w = 1 if Q[i]>0 else 0 | |
sim_hash |= w << i | |
return sim_hash | |
def __int__(self): | |
return int(self.value) | |
def __long__(self): | |
return long(self.value) | |
def __hash__(self): | |
return self.value | |
def __str__(self): | |
s = [str(self.value >> i & 1) for i in xrange(self.hash_bits)] | |
s.reverse() | |
return ''.join(s) | |
def __repr__(self): | |
return ' '.join(('<SimHash',self.__str__(),' (',str(self.__int__()),')>')) | |
def __call__(self): | |
return self.value | |
def main(argv): | |
a = SimHash([1,2,8,8,8,8,8]) | |
a = SimHash([1,2,8,8,8,8,7]) | |
print 'SimHash a:',repr(a) | |
return 0 | |
if __name__ == '__main__': | |
sys.exit(main(sys.argv)) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment