Last active
January 10, 2024 15:45
-
-
Save henryjfry/c265230c7aefbf7c80b31509a9d4cd99 to your computer and use it in GitHub Desktop.
opensubtitles.org - HashFile = filehash = filesize + 64bit sum of the first and last 64k of the file
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import struct, os | |
__64k = 65536 | |
__longlong_format_char = 'q' | |
__byte_size = struct.calcsize(__longlong_format_char) | |
meta = {'filesize': '', 'filehash': ''} | |
def temp_file(): | |
import tempfile | |
file = tempfile.NamedTemporaryFile() | |
filename = file.name | |
return filename | |
def size_hashFile_url(meta, filepath): | |
name = filepath | |
import urllib | |
from urllib import request | |
f = None | |
opener = None | |
url = name | |
request.urlcleanup() | |
f = request.urlopen(url) | |
filesize = int(f.headers['Content-Length']) | |
if filesize < __64k * 2: | |
try: filesize = int(str(f.headers['Content-Range']).split('/')[1]) | |
except: pass | |
opener = request.build_opener() | |
opener.addheaders = [('Range','bytes=0-65536')] | |
first_64kb = temp_file() | |
last_64kb = temp_file() | |
#print(first_64kb) | |
#print(last_64kb) | |
#print([('Range','bytes=0-65536')]) | |
request.install_opener(opener) | |
request.urlretrieve(url, first_64kb) | |
opener = request.build_opener() | |
if filesize > 0: | |
opener.addheaders = [('Range', 'bytes=%s-%s' % (filesize - __64k, filesize))] | |
#print([('Range', 'bytes=%s-%s' % (filesize - __64k, filesize))]) | |
else: | |
opener.addheaders = [('Range','bytes=-65536-0')] | |
#print([('Range','bytes=-65536-0')]) | |
request.install_opener(opener) | |
request.urlretrieve(url, last_64kb) | |
f = open(first_64kb, 'rb') | |
try: | |
longlongformat = '<q' # little-endian long long | |
bytesize = struct.calcsize(longlongformat) | |
#f = open(name, "rb") | |
#filesize = os.path.getsize(name) | |
meta['filesize'] = filesize | |
hash = filesize | |
if filesize < 65536 * 2: | |
return "SizeError" | |
range_value = __64k / __byte_size | |
range_value = round(range_value) | |
for x in range(range_value): | |
buffer = f.read(bytesize) | |
(l_value,)= struct.unpack(longlongformat, buffer) | |
hash += l_value | |
hash = hash & 0xFFFFFFFFFFFFFFFF #to remain as 64bit number | |
#f.seek(max(0,filesize-65536),0) | |
f.close() | |
f = open(last_64kb, 'rb') | |
for x in range(range_value): | |
buffer = f.read(bytesize) | |
(l_value,)= struct.unpack(longlongformat, buffer) | |
hash += l_value | |
hash = hash & 0xFFFFFFFFFFFFFFFF | |
f.close() | |
returnedhash = "%016x" % hash | |
meta['filehash'] = returnedhash | |
return meta | |
except(IOError): | |
return meta | |
def size_hashFile(meta, filepath): | |
name = filepath | |
if 'http' in str(filepath): | |
meta = size_hashFile_url(meta=meta, filepath=filepath) | |
return meta | |
try: | |
longlongformat = '<q' # little-endian long long | |
bytesize = struct.calcsize(longlongformat) | |
f = open(name, "rb") | |
filesize = os.path.getsize(name) | |
meta['filesize'] = filesize | |
hash = filesize | |
if filesize < 65536 * 2: | |
return "SizeError" | |
range_value = __64k / __byte_size | |
range_value = round(range_value) | |
for x in range(range_value): | |
buffer = f.read(bytesize) | |
(l_value,)= struct.unpack(longlongformat, buffer) | |
hash += l_value | |
hash = hash & 0xFFFFFFFFFFFFFFFF #to remain as 64bit number | |
f.seek(max(0,filesize-65536),0) | |
for x in range(range_value): | |
buffer = f.read(bytesize) | |
(l_value,)= struct.unpack(longlongformat, buffer) | |
hash += l_value | |
hash = hash & 0xFFFFFFFFFFFFFFFF | |
f.close() | |
returnedhash = "%016x" % hash | |
meta['filehash'] = returnedhash | |
return meta | |
except(IOError): | |
return meta |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment