Last active
April 19, 2020 23:48
-
-
Save rpendleton/3381ac41e7d7b275f81d8c60164d0a81 to your computer and use it in GitHub Desktop.
finds matching hashes between two Format-List outputs containing Hash and Path columns
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env python3 | |
import json | |
import sys | |
error_while_reading = False | |
plaintiff_files = {} | |
matches = {} | |
plaintiff_count = 0 | |
defendant_count = 0 | |
def read_list(f): | |
global error_while_reading | |
hash_prefix = "Hash: " | |
path_prefix = "Path: " | |
while True: | |
hash = f.readline() | |
if not hash: | |
# end of file | |
break | |
hash = hash.strip() | |
if not hash: | |
# empty line | |
continue | |
if not hash.startswith(hash_prefix): | |
error_while_reading = True | |
print(f"expected hash but found: {hash}") | |
break | |
hash = hash[len(hash_prefix):].lstrip() | |
path = f.readline() | |
if not path: | |
error_while_reading = True | |
print("expected path but found eof") | |
break | |
path = path.strip() | |
if not path: | |
error_while_reading = True | |
print("expected path but found empty line") | |
break | |
if not path.startswith(path_prefix): | |
error_while_reading = True | |
print(f"expected path but found: {path}"); | |
break | |
path = path[len(path_prefix):].lstrip() | |
yield (hash, path) | |
with open("plantiff.txt") as f: | |
for hash, path in read_list(f): | |
plaintiff_count += 1 | |
if hash in plaintiff_files: | |
plaintiff_files[hash].append(path) | |
else: | |
plaintiff_files[hash] = [path] | |
if error_while_reading: | |
print("encountered error while reading plaintiff file") | |
exit() | |
with open("defendant.txt") as f: | |
for hash, path in read_list(f): | |
defendant_count += 1 | |
if hash in plaintiff_files: | |
if hash in matches: | |
matches[hash]["defendant_paths"].append(path) | |
else: | |
matches[hash] = { | |
"plaintiff_paths": plaintiff_files[hash], | |
"defendant_paths": [path] | |
} | |
if error_while_reading: | |
print("encountered error while reading defendant file") | |
exit() | |
print(f"Total plaintiff files: {plaintiff_count}") | |
print(f"Total defendant files: {defendant_count}") | |
print(f"Total number of matching hashes: {len(matches)}") | |
print("--") | |
json.dump(matches, sys.stdout, indent=2) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment