Created
August 10, 2024 07:02
-
-
Save maxfire2008/d271732cb3f5c860b3c4c11950cdcc80 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import os | |
import difflib | |
from collections import defaultdict | |
def find_xxh3_files(directory): | |
xxh3_files = [] | |
for root, _, files in os.walk(directory): | |
for file in files: | |
if file.endswith('.xxh3'): | |
xxh3_files.append(os.path.join(root, file)) | |
return xxh3_files | |
def read_file_content(file_path): | |
with open(file_path, 'r') as file: | |
return file.readlines() | |
def find_closest_match(file_content, comparison_files): | |
closest_match = None | |
highest_similarity = 0 | |
for comp_file, comp_content in comparison_files.items(): | |
similarity = difflib.SequenceMatcher(None, file_content, comp_content).ratio() | |
if similarity > highest_similarity: | |
highest_similarity = similarity | |
closest_match = comp_file | |
return closest_match, highest_similarity | |
def print_diff(file1, file2): | |
file1_content = read_file_content(file1) | |
file2_content = read_file_content(file2) | |
diff = difflib.unified_diff(file1_content, file2_content, fromfile=file1, tofile=file2) | |
print("\n".join(diff)) | |
def main(dir1, dir2): | |
files_dir1 = find_xxh3_files(dir1) | |
files_dir2 = find_xxh3_files(dir2) | |
if not files_dir1 or not files_dir2: | |
print("No .xxh3 files found in one or both directories.") | |
return | |
dir2_contents = {file: read_file_content(file) for file in files_dir2} | |
for file1 in files_dir1: | |
file1_content = read_file_content(file1) | |
closest_match, similarity = find_closest_match(file1_content, dir2_contents) | |
print("###################################\n"*3) | |
if closest_match: | |
print(f"\nComparing {file1} to {closest_match} (Similarity: {similarity:.2f}):") | |
print_diff(file1, closest_match) | |
else: | |
print(f"No match found for {file1}.") | |
if __name__ == "__main__": | |
dir1 = "" | |
dir2 = "" | |
main(dir1, dir2) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment