Skip to content

Instantly share code, notes, and snippets.

@crackwitz
Created August 20, 2014 01:30
Show Gist options
  • Save crackwitz/35ef558705699bce7e40 to your computer and use it in GitHub Desktop.
Save crackwitz/35ef558705699bce7e40 to your computer and use it in GitHub Desktop.
import os
import sys
import time
import glob
import hashlib
import binascii
import struct
import base64
# https://en.wikipedia.org/wiki/UUHash
# https://en.wikipedia.org/w/index.php?title=UUHash&oldid=621992370
# MLDonkey source code, file src/utils/lib/fst_hash.c, retrieved 2014-08-20
# http://sourceforge.net/projects/mldonkey/files/mldonkey/3.1.5/mldonkey-3.1.5.tar.bz2
# http://www.opensource.apple.com/source/xnu/xnu-1456.1.26/bsd/libkern/crc32.c
def UUHash(fobj):
global bytesread
chunksize = 307200
fobj.seek(0, os.SEEK_END)
filesize = fobj.tell()
fobj.seek(0)
chunk = fobj.read(chunksize)
bytesread += len(chunk)
md5hash = hashlib.md5(chunk).digest()
smallhash = 0
if filesize > chunksize:
lastpos = fobj.tell()
offset = 0x100000
while offset + 2*chunksize < filesize:
fobj.seek(offset)
chunk = fobj.read(chunksize)
bytesread += len(chunk)
smallhash = binascii.crc32(chunk, smallhash)
lastpos = offset + chunksize
offset <<= 1
endlen = filesize - lastpos
if endlen > chunksize:
endlen = chunksize
fobj.seek(filesize-endlen)
chunk = fobj.read(endlen)
bytesread += len(chunk)
smallhash = binascii.crc32(chunk, smallhash)
smallhash = ((~smallhash) ^ filesize) % 2**32
return md5hash + struct.pack("<I", smallhash)
files = []
for globbable in sys.argv[1:]:
files += glob.glob(globbable) or [globbable]
for fname in files:
bytesread = 0
t0 = time.time()
hash = UUHash(file(fname, 'rb'))
t1 = time.time()
encoded = base64.b64encode(hash)
#encoded = hash.encode('hex')
print "%-28s" % encoded, fname
print "bytes read:", bytesread, "or", bytesread/307200., "chunks", "took %.2f secs" % (t1-t0)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment