Skip to content

Instantly share code, notes, and snippets.

@HelloZeroNet
Created April 3, 2016 20:39
Show Gist options
  • Save HelloZeroNet/8c44dda7802cf05c6c59001d20816da2 to your computer and use it in GitHub Desktop.
Save HelloZeroNet/8c44dda7802cf05c6c59001d20816da2 to your computer and use it in GitHub Desktop.
Bindiff
import difflib
def diff(old, new, limit=False):
matcher = difflib.SequenceMatcher(None, old, new)
actions = []
size = 0
for tag, old_from, old_to, new_from, new_to in matcher.get_opcodes():
if tag == "insert":
new_line = new[new_from:new_to]
actions.append(("+", new_line))
size += sum(map(len, new_line))
elif tag == "equal":
actions.append(("=", old_to-old_from))
elif tag == "delete":
actions.append(("-", old_to-old_from))
elif tag == "replace":
actions.append(("-", old_to-old_from))
new_line = new[new_from:new_to]
actions.append(("+", new_line))
size += sum(map(len, new_line))
if limit and size > limit:
return False
return actions
def sumSize(lines, lines_from, lines_to):
return sum(
map(len,
lines[lines_from:lines_to]
)
)
def diffBin(old, new, limit=False):
matcher = difflib.SequenceMatcher(None, old, new)
actions = []
size = 0
for tag, old_from, old_to, new_from, new_to in matcher.get_opcodes():
if tag == "insert":
new_line = new[new_from:new_to]
actions.append(("+", new_line))
size += sum(map(len, new_line))
elif tag == "equal":
actions.append(("=", sumSize(old, old_from, old_to)))
elif tag == "delete":
actions.append(("-", sumSize(old, old_from, old_to)))
elif tag == "replace":
actions.append(("-", sumSize(old, old_from, old_to)))
new_line = new[new_from:new_to]
actions.append(("+", new_line))
size += sum(map(len, new_line))
if limit and size > limit:
return False
return actions
def patch(old, actions):
i = 0
for action, param in actions:
if action == "=": # Same lines
i += param
elif action == "-": # Delete lines
del(old[i:i + param])
elif action == "+": # Add lines
for add_line in param:
old.insert(i, add_line)
i += 1
return len(old) == i
def patchFile(f, actions):
out_f = StringIO.StringIO()
i = 0
for action, param in actions:
if action == "=": # Same lines
out_f.write(f.read(param))
elif action == "-": # Delete lines
f.seek(param, 1) # Seek from current position
continue
elif action == "+": # Add lines
for add_line in param:
out_f.write(add_line)
return out_f
def sha512sum(file, blocksize=65536):
if hasattr(file, "endswith"): # Its a string open it
file = open(file, "rb")
hash = hashlib.sha512()
for block in iter(lambda: file.read(blocksize), ""):
hash.update(block)
return hash.hexdigest()[0:64] # Truncate to 256bits is good enough
import os, psutil, time, shutil, hashlib, tempfile
import cStringIO as StringIO
s = time.time()
for i in range(100):
diff(list(open("users_archive.json", "rb")), list(open("users_archive2.json", "rb")))
print "Diff", time.time()-s
s = time.time()
for i in range(100):
diffBin(list(open("users_archive.json", "rb")), list(open("users_archive2.json", "rb")))
print "DiffBin", time.time()-s
# SHA512: c8cc7f1dc3e018cd45a3a77accba5105721102539b2af900e34c91c2a1b40d6c
# Patch list: Mem +0.48MB, Peak: 11.37MB Taken: 0.188s
# Patch file: Mem +0.20MB, Peak: 10.25MB Taken: 0.323s
# Patch fbin: Mem +0.16MB, Peak: 10.22MB Taken: 0.132s
# Tempfile
# with Mem +0.45MB, Peak: 10.78MB Taken: 0.155s
# without Mem +0.14MB, Peak: 11.46MB Taken: 0.144s
process = psutil.Process(os.getpid())
actions = [('=', 17), ('-', 1), ('=', 204), ('-', 1), ('=', 273), ('-', 1), ('=', 2743), ('+', [' "luiscuadrado123": "web,1APYAwUqwiJV6V1jv4zyTTSXAACf1gqoZ7,G5678wXeS8aYwULahj0IR7E4cWVReZHtzyPStpcJBxxNbshLuPpH1nNmHX4b47WQd+u0sRxDOiLx8/Lcio/sc8s=",\n', ' "luisdeblas": "web,1dueof4yrxQX1Xp6df1JPWLLmorSbjsE9,HL1/V2feFYbX6OZhUnaX1pbQ3mgSUHV26LkZSawvMkPFTDBSv8NBRBTIQiW3HazUKab8tCHC2AbKCVFb67ku/IA=",\n']), ('=', 657), ('-', 1), ('=', 88), ('+', [' "nx": "web,188ABN6iBoUCERRrtzHfa3V1dsJ16h7v5R,HNhCn5ce28xW6/BdMr9yYK1YMhKEJbXhseauFwI8KDKYdrdAVJtCIjRp9h/DfY8PNARUujAVNoo5hk+BF98hX7E=",\n', ' "nxmc": "web,17CC51W4oQJ8ys5LPgqMPaGFX8oHDpVLqz,G3lJyfHGkBQCygdGcya4ZxKG4do7cBN7niisS4Ow8TlLtxUGah8ab4NSkYzrB+kz2mVb2GdGqMqKnJT4fL6nytM=",\n', ' "nyanloutre": "web,1LJoVy98WbuHAVPb9nqKDLz4AWEiLqmvqh,HI//exjgiro+fqKFZDh05996p/AHLjsFxUaB22YD8WKDAA4ounK8sZ09bL7ACW3c7FLHAFGB1UjTN5bYylPh+38=",\n']), ('=', 405), ('+', [' "punk": "web,15Q2LvRrh8MkZTJaGR1pbHDadRGgWdUnWA,HCwnPwqBDtrIV+ukhIHbbEcohfzrWscezh66UNPMfsO/o6KouSjxnaWm7ejLUH/ie86R6jqoOR47sQ82k1rvIYw=",\n', ' "punkcoder": "web,1ACc4pa2pJam33SkSE1zF1seF3LQKWR24v,G5R3iUyluJHO4gX9mX4H55peZr+JyDDdsmT5651k3IvT/fRn1IapdgHtyqkLrfF5hiQsEeP768oV5dJcqjZo7Fg=",\n', ' "puntenpol": "web,1PVpsmgE8oHs8UhcbxnZj2CsccgUfatNjn,HOi0Dds80AN8OweqJP1HM7doLoFvbj4N9cXrSskxb5iMspL0fMtZUW9RUwfXPEL36Q4ISOnRFyXinVgvlqSJCCg=",\n']), ('=', 337), ('+', [' "salazar": "web,14n5DubeLS35QiWu8xFpDhUhPVGb1hiTLK,HOSuG+Xg42y6ImadGmWge7laKRIYwgOmf5Bm+zpHPc5XCa5hvDtzyw6yt/7PljzfC7zw09qypqSsK62iJQFZYgU=",\n', ' "salikshah": "web,1PN7XWJANFhd9JTdRzETmA7YHogGW2MSrC,HKTGW0Yx34SaoUTZFXAsBIWP1UqLl/cZhCi7/D80nDxj0PaiXkYuiOZrC0U+q8S0WLJ26D04Uy0wkg43FUa8vuk=",\n', ' "salocin": "web,1EE7uwPcuAhkepEpYS4zW8vj3VyrNZQZ43,HJH+ExIdhILwi8RNY1ttRO5+WSr+jpL/SAkZc7JkDDzz6/agT+ilLJjXL6vqpYTMNfIotGuTzOBRSdTlfwschxo=",\n', ' "salparadise": "web,1CVtGismh9rHbnK6yWinCuVogLyXDx79AV,HGIiW7gGxK+IvO7jFmanQMlYoQ9xDyeI5MzyeNBi8NgY4uD+gehTBxC6LRzwEIupUJNTWLSmA+rr1Jj2K1lv8hA=",\n']), ('=', 132), ('+', [' "sexy": "web,1Hf7jYBwV4WhVwtHCesJzbGzUmUq2oPacJ,HBrRQPlEHfEG8jBM0V09y18Bh03pA+/NGEM1iEc41HlnLQJPWB3Hjs1RIkXuYaNg0yN9CgYqtf/pR5KLzoFQyhQ=",\n', ' "seymourbutts": "web,1KJuAtdMStBXcDKZfwsdd5KpxFNh6YYC69,HDg00Q0TLS32+H+aneRA3niChuuF9mt60iNyT4osAErMmgXVaKiqWBMbDkKEtVKXqd5luliqE6GGlOAU7z9azvU=",\n', ' "sf": "web,12p6VU9C5wdDJFsVLvn3nJBMeqhJAmAvgm,HEPpE1ZPgniKcJzleziEPbNK5B8I//UsQutvZQ6gVfTygCHfSVLPpQXwLJL8igLbceI7efpFEVncJbnbOyduAeM=",\n']), ('=', 472), ('+', [' "test123321": "web,1PmQNoPZ62WBYuF5Sqog8hnQBbeDaitW4u,G3mJnIXE8kdg+QKA9iRgf4TDPOumbhnGaGCOd1ZV0fibNmEFTvOv0IjF/A19Xbf9464yMX8/ZqcYk0viKsVfv6w=",\n', ' "test1234": "web,1JLGSSH6idqGcVsTtfN6M2fxZuvcmKW3N9,HNFl8/3fER11NLjT6d5Qg5vg+ieDedYAy/IGUqYDa0z35GCgkQUVZDiHkdZTl+hVLz2jWLQ1VcRdUVEnIod8zHE=",\n', ' "test12345": "web,15jG4YnDc7ccAMGmCLuy6cKHKxtkDFmiTr,HDepui2thowYhIRqmOguZeBr2mOgqe6rDut0SBX+FiH6GG3CZG/SM2Ukzho8zMl97S+YeIBjzAcj0fHxQukjHrg=",\n', ' "test123456": "web,18NiN86jQ5pXtzCnmDHQMvB6VYQaJuarDQ,HJewiGmq2w8vBOFGcvfoUltS4UK+RFO5T7ZGSzVbg/ze4dgaXxjcGxHh6T1u59N2c1QaE8y8lVcnipgjcQMkvbo=",\n']), ('=', 744), ('+', [' "z0b": "web,1KhtLHtVe9KsPX2EsvxWNZPLPxMS8Uwuxg,HBN41OzHK1JS7E5AMDXFQAPqSTPQxbnQMmAGd3UIhfdOKX+21pAqAyGWNgyIFwtNQK2OlkEoy0ci6bGSEjGtxx0=",\n']), ('=', 203), ('-', 1), ('=', 79)]
actions_bin = [('=', 2143), ('-', 145), ('=', 29334), ('-', 149), ('=', 39410), ('-', 139), ('=', 395607), ('+', [' "luiscuadrado123": "web,1APYAwUqwiJV6V1jv4zyTTSXAACf1gqoZ7,G5678wXeS8aYwULahj0IR7E4cWVReZHtzyPStpcJBxxNbshLuPpH1nNmHX4b47WQd+u0sRxDOiLx8/Lcio/sc8s=",\n', ' "luisdeblas": "web,1dueof4yrxQX1Xp6df1JPWLLmorSbjsE9,HL1/V2feFYbX6OZhUnaX1pbQ3mgSUHV26LkZSawvMkPFTDBSv8NBRBTIQiW3HazUKab8tCHC2AbKCVFb67ku/IA=",\n']), ('=', 94712), ('-', 143), ('=', 12663), ('+', [' "nx": "web,188ABN6iBoUCERRrtzHfa3V1dsJ16h7v5R,HNhCn5ce28xW6/BdMr9yYK1YMhKEJbXhseauFwI8KDKYdrdAVJtCIjRp9h/DfY8PNARUujAVNoo5hk+BF98hX7E=",\n', ' "nxmc": "web,17CC51W4oQJ8ys5LPgqMPaGFX8oHDpVLqz,G3lJyfHGkBQCygdGcya4ZxKG4do7cBN7niisS4Ow8TlLtxUGah8ab4NSkYzrB+kz2mVb2GdGqMqKnJT4fL6nytM=",\n', ' "nyanloutre": "web,1LJoVy98WbuHAVPb9nqKDLz4AWEiLqmvqh,HI//exjgiro+fqKFZDh05996p/AHLjsFxUaB22YD8WKDAA4ounK8sZ09bL7ACW3c7FLHAFGB1UjTN5bYylPh+38=",\n']), ('=', 58342), ('+', [' "punk": "web,15Q2LvRrh8MkZTJaGR1pbHDadRGgWdUnWA,HCwnPwqBDtrIV+ukhIHbbEcohfzrWscezh66UNPMfsO/o6KouSjxnaWm7ejLUH/ie86R6jqoOR47sQ82k1rvIYw=",\n', ' "punkcoder": "web,1ACc4pa2pJam33SkSE1zF1seF3LQKWR24v,G5R3iUyluJHO4gX9mX4H55peZr+JyDDdsmT5651k3IvT/fRn1IapdgHtyqkLrfF5hiQsEeP768oV5dJcqjZo7Fg=",\n', ' "puntenpol": "web,1PVpsmgE8oHs8UhcbxnZj2CsccgUfatNjn,HOi0Dds80AN8OweqJP1HM7doLoFvbj4N9cXrSskxb5iMspL0fMtZUW9RUwfXPEL36Q4ISOnRFyXinVgvlqSJCCg=",\n']), ('=', 48458), ('+', [' "salazar": "web,14n5DubeLS35QiWu8xFpDhUhPVGb1hiTLK,HOSuG+Xg42y6ImadGmWge7laKRIYwgOmf5Bm+zpHPc5XCa5hvDtzyw6yt/7PljzfC7zw09qypqSsK62iJQFZYgU=",\n', ' "salikshah": "web,1PN7XWJANFhd9JTdRzETmA7YHogGW2MSrC,HKTGW0Yx34SaoUTZFXAsBIWP1UqLl/cZhCi7/D80nDxj0PaiXkYuiOZrC0U+q8S0WLJ26D04Uy0wkg43FUa8vuk=",\n', ' "salocin": "web,1EE7uwPcuAhkepEpYS4zW8vj3VyrNZQZ43,HJH+ExIdhILwi8RNY1ttRO5+WSr+jpL/SAkZc7JkDDzz6/agT+ilLJjXL6vqpYTMNfIotGuTzOBRSdTlfwschxo=",\n', ' "salparadise": "web,1CVtGismh9rHbnK6yWinCuVogLyXDx79AV,HGIiW7gGxK+IvO7jFmanQMlYoQ9xDyeI5MzyeNBi8NgY4uD+gehTBxC6LRzwEIupUJNTWLSmA+rr1Jj2K1lv8hA=",\n']), ('=', 19050), ('+', [' "sexy": "web,1Hf7jYBwV4WhVwtHCesJzbGzUmUq2oPacJ,HBrRQPlEHfEG8jBM0V09y18Bh03pA+/NGEM1iEc41HlnLQJPWB3Hjs1RIkXuYaNg0yN9CgYqtf/pR5KLzoFQyhQ=",\n', ' "seymourbutts": "web,1KJuAtdMStBXcDKZfwsdd5KpxFNh6YYC69,HDg00Q0TLS32+H+aneRA3niChuuF9mt60iNyT4osAErMmgXVaKiqWBMbDkKEtVKXqd5luliqE6GGlOAU7z9azvU=",\n', ' "sf": "web,12p6VU9C5wdDJFsVLvn3nJBMeqhJAmAvgm,HEPpE1ZPgniKcJzleziEPbNK5B8I//UsQutvZQ6gVfTygCHfSVLPpQXwLJL8igLbceI7efpFEVncJbnbOyduAeM=",\n']), ('=', 68178), ('+', [' "test123321": "web,1PmQNoPZ62WBYuF5Sqog8hnQBbeDaitW4u,G3mJnIXE8kdg+QKA9iRgf4TDPOumbhnGaGCOd1ZV0fibNmEFTvOv0IjF/A19Xbf9464yMX8/ZqcYk0viKsVfv6w=",\n', ' "test1234": "web,1JLGSSH6idqGcVsTtfN6M2fxZuvcmKW3N9,HNFl8/3fER11NLjT6d5Qg5vg+ieDedYAy/IGUqYDa0z35GCgkQUVZDiHkdZTl+hVLz2jWLQ1VcRdUVEnIod8zHE=",\n', ' "test12345": "web,15jG4YnDc7ccAMGmCLuy6cKHKxtkDFmiTr,HDepui2thowYhIRqmOguZeBr2mOgqe6rDut0SBX+FiH6GG3CZG/SM2Ukzho8zMl97S+YeIBjzAcj0fHxQukjHrg=",\n', ' "test123456": "web,18NiN86jQ5pXtzCnmDHQMvB6VYQaJuarDQ,HJewiGmq2w8vBOFGcvfoUltS4UK+RFO5T7ZGSzVbg/ze4dgaXxjcGxHh6T1u59N2c1QaE8y8lVcnipgjcQMkvbo=",\n']), ('=', 107154), ('+', [' "z0b": "web,1KhtLHtVe9KsPX2EsvxWNZPLPxMS8Uwuxg,HBN41OzHK1JS7E5AMDXFQAPqSTPQxbnQMmAGd3UIhfdOKX+21pAqAyGWNgyIFwtNQK2OlkEoy0ci6bGSEjGtxx0=",\n']), ('=', 29484), ('-', 151), ('=', 11048)]
before = process.memory_info()[0] / float(2 ** 20)
def testPatchFile():
f = open("users_archive.json", "rb")
back = patchFile(f, actions_bin)
back.seek(0)
assert sha512sum(back) == "c8cc7f1dc3e018cd45a3a77accba5105721102539b2af900e34c91c2a1b40d6c"
def testPatch():
lines = list(open("users_archive.json", "rb"))
patch(lines, actions)
assert sha512sum(StringIO.StringIO("".join(lines))) == "c8cc7f1dc3e018cd45a3a77accba5105721102539b2af900e34c91c2a1b40d6c"
s = time.time()
for i in range(10):
#testPatch()
testPatchFile()
taken = time.time()-s
print "Mem", "+%.2fMB, Peak: %.2fMB" % (process.memory_info()[0] / float(2 ** 20) - before, process.memory_info()[3] / float(2 ** 20)), "Taken: %.3fs" % taken
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment