rpendleton · April 19, 2020 23:48
diff --git a/compare.py b/compare.py
 #!/usr/bin/env python3
 import json
 import sys

 error_while_reading = False

 plaintiff_files = {}
 matches = {}

 plaintiff_count = 0
 defendant_count = 0

 def read_list(f):
    global error_while_reading

    hash_prefix = "Hash: "
    path_prefix = "Path: "

    while True:
        hash = f.readline()
        if not hash:
            # end of file
            break

        hash = hash.strip()
        if not hash:
            # empty line
            continue

        if not hash.startswith(hash_prefix):
            error_while_reading = True
            print(f"expected hash but found: {hash}")
            break

        hash = hash[len(hash_prefix):].lstrip()

        path = f.readline()
        if not path:
            error_while_reading = True
            print("expected path but found eof")
            break

        path = path.strip()
        if not path:
            error_while_reading = True
            print("expected path but found empty line")
            break

        if not path.startswith(path_prefix):
            error_while_reading = True
            print(f"expected path but found: {path}");
            break

        path = path[len(path_prefix):].lstrip()

        yield (hash, path)

 with open("plantiff.txt") as f:
    for hash, path in read_list(f):
        plaintiff_count += 1

        if hash in plaintiff_files:
            plaintiff_files[hash].append(path)
        else:
            plaintiff_files[hash] = [path]

 if error_while_reading:
    print("encountered error while reading plaintiff file")
    exit()

 with open("defendant.txt") as f:
    for hash, path in read_list(f):
        defendant_count += 1

        if hash in plaintiff_files:
            if hash in matches:
                matches[hash]["defendant_paths"].append(path)
            else:
                matches[hash] = {
                    "plaintiff_paths": plaintiff_files[hash],
                    "defendant_paths": [path]
                }

 if error_while_reading:
    print("encountered error while reading defendant file")
    exit()

 print(f"Total plaintiff files: {plaintiff_count}")
 print(f"Total defendant files: {defendant_count}")
 print(f"Total number of matching hashes: {len(matches)}")
 print("--")

 json.dump(matches, sys.stdout, indent=2)
	#!/usr/bin/env python3
	import json
	import sys

	error_while_reading = False

	plaintiff_files = {}
	matches = {}

	plaintiff_count = 0
	defendant_count = 0

	def read_list(f):
	global error_while_reading

	hash_prefix = "Hash: "
	path_prefix = "Path: "

	while True:
	hash = f.readline()
	if not hash:
	# end of file
	break

	hash = hash.strip()
	if not hash:
	# empty line
	continue

	if not hash.startswith(hash_prefix):
	error_while_reading = True
	print(f"expected hash but found: {hash}")
	break

	hash = hash[len(hash_prefix):].lstrip()

	path = f.readline()
	if not path:
	error_while_reading = True
	print("expected path but found eof")
	break

	path = path.strip()
	if not path:
	error_while_reading = True
	print("expected path but found empty line")
	break

	if not path.startswith(path_prefix):
	error_while_reading = True
	print(f"expected path but found: {path}");
	break

	path = path[len(path_prefix):].lstrip()

	yield (hash, path)

	with open("plantiff.txt") as f:
	for hash, path in read_list(f):
	plaintiff_count += 1

	if hash in plaintiff_files:
	plaintiff_files[hash].append(path)
	else:
	plaintiff_files[hash] = [path]

	if error_while_reading:
	print("encountered error while reading plaintiff file")
	exit()

	with open("defendant.txt") as f:
	for hash, path in read_list(f):
	defendant_count += 1

	if hash in plaintiff_files:
	if hash in matches:
	matches[hash]["defendant_paths"].append(path)
	else:
	matches[hash] = {
	"plaintiff_paths": plaintiff_files[hash],
	"defendant_paths": [path]
	}

	if error_while_reading:
	print("encountered error while reading defendant file")
	exit()

	print(f"Total plaintiff files: {plaintiff_count}")
	print(f"Total defendant files: {defendant_count}")
	print(f"Total number of matching hashes: {len(matches)}")
	print("--")

	json.dump(matches, sys.stdout, indent=2)