Created
March 18, 2025 02:02
-
-
Save naufalso/68bcf08f86cc7f312bd2ccc1ce50dff9 to your computer and use it in GitHub Desktop.
Extract line differences in python.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import difflib | |
def extract_line_differences(source: str, modified: str) -> dict: | |
""" | |
Extracts line-level differences between two multi-line strings and returns a dictionary where: | |
- For replacements: key is the differing block of lines in the source and | |
value is the corresponding block of lines in the modified string. | |
- For deletions: key is the block of lines deleted from the source, value is an empty string. | |
- For insertions: key is a string indicating the insertion location (e.g., "INSERT_AT_LINE_#") | |
and value is the inserted block of lines from the modified string. | |
Parameters: | |
source (str): The original multi-line string. | |
modified (str): The modified multi-line string. | |
Returns: | |
dict: A dictionary with line-level differences. | |
""" | |
diff_dict = {} | |
# Split strings into lists of lines. | |
source_lines = source.splitlines() | |
modified_lines = modified.splitlines() | |
# Compare the lists of lines. | |
matcher = difflib.SequenceMatcher(None, source_lines, modified_lines) | |
for tag, i1, i2, j1, j2 in matcher.get_opcodes(): | |
if tag == 'equal': | |
continue # Lines are identical. | |
elif tag == 'replace': | |
# A block of source lines was replaced by a block of modified lines. | |
key = "\n".join(source_lines[i1:i2]) | |
value = "\n".join(modified_lines[j1:j2]) | |
diff_dict[key] = value | |
elif tag == 'delete': | |
# A block of lines in the source was deleted. | |
key = "\n".join(source_lines[i1:i2]) | |
diff_dict[key] = "" | |
elif tag == 'insert': | |
# New lines were inserted in the modified string. | |
key = f"INSERT_AT_LINE_{i1}" | |
value = "\n".join(modified_lines[j1:j2]) | |
diff_dict[key] = value | |
return diff_dict |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment