Skip to content

Instantly share code, notes, and snippets.

@rpendleton
Last active April 19, 2020 23:48
Show Gist options
  • Save rpendleton/3381ac41e7d7b275f81d8c60164d0a81 to your computer and use it in GitHub Desktop.
Save rpendleton/3381ac41e7d7b275f81d8c60164d0a81 to your computer and use it in GitHub Desktop.
finds matching hashes between two Format-List outputs containing Hash and Path columns
#!/usr/bin/env python3
import json
import sys
error_while_reading = False
plaintiff_files = {}
matches = {}
plaintiff_count = 0
defendant_count = 0
def read_list(f):
global error_while_reading
hash_prefix = "Hash: "
path_prefix = "Path: "
while True:
hash = f.readline()
if not hash:
# end of file
break
hash = hash.strip()
if not hash:
# empty line
continue
if not hash.startswith(hash_prefix):
error_while_reading = True
print(f"expected hash but found: {hash}")
break
hash = hash[len(hash_prefix):].lstrip()
path = f.readline()
if not path:
error_while_reading = True
print("expected path but found eof")
break
path = path.strip()
if not path:
error_while_reading = True
print("expected path but found empty line")
break
if not path.startswith(path_prefix):
error_while_reading = True
print(f"expected path but found: {path}");
break
path = path[len(path_prefix):].lstrip()
yield (hash, path)
with open("plantiff.txt") as f:
for hash, path in read_list(f):
plaintiff_count += 1
if hash in plaintiff_files:
plaintiff_files[hash].append(path)
else:
plaintiff_files[hash] = [path]
if error_while_reading:
print("encountered error while reading plaintiff file")
exit()
with open("defendant.txt") as f:
for hash, path in read_list(f):
defendant_count += 1
if hash in plaintiff_files:
if hash in matches:
matches[hash]["defendant_paths"].append(path)
else:
matches[hash] = {
"plaintiff_paths": plaintiff_files[hash],
"defendant_paths": [path]
}
if error_while_reading:
print("encountered error while reading defendant file")
exit()
print(f"Total plaintiff files: {plaintiff_count}")
print(f"Total defendant files: {defendant_count}")
print(f"Total number of matching hashes: {len(matches)}")
print("--")
json.dump(matches, sys.stdout, indent=2)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment