Skip to content

Instantly share code, notes, and snippets.

@naufalso
Created March 18, 2025 02:02
Show Gist options
  • Save naufalso/68bcf08f86cc7f312bd2ccc1ce50dff9 to your computer and use it in GitHub Desktop.
Save naufalso/68bcf08f86cc7f312bd2ccc1ce50dff9 to your computer and use it in GitHub Desktop.
Extract line differences in python.
import difflib
def extract_line_differences(source: str, modified: str) -> dict:
"""
Extracts line-level differences between two multi-line strings and returns a dictionary where:
- For replacements: key is the differing block of lines in the source and
value is the corresponding block of lines in the modified string.
- For deletions: key is the block of lines deleted from the source, value is an empty string.
- For insertions: key is a string indicating the insertion location (e.g., "INSERT_AT_LINE_#")
and value is the inserted block of lines from the modified string.
Parameters:
source (str): The original multi-line string.
modified (str): The modified multi-line string.
Returns:
dict: A dictionary with line-level differences.
"""
diff_dict = {}
# Split strings into lists of lines.
source_lines = source.splitlines()
modified_lines = modified.splitlines()
# Compare the lists of lines.
matcher = difflib.SequenceMatcher(None, source_lines, modified_lines)
for tag, i1, i2, j1, j2 in matcher.get_opcodes():
if tag == 'equal':
continue # Lines are identical.
elif tag == 'replace':
# A block of source lines was replaced by a block of modified lines.
key = "\n".join(source_lines[i1:i2])
value = "\n".join(modified_lines[j1:j2])
diff_dict[key] = value
elif tag == 'delete':
# A block of lines in the source was deleted.
key = "\n".join(source_lines[i1:i2])
diff_dict[key] = ""
elif tag == 'insert':
# New lines were inserted in the modified string.
key = f"INSERT_AT_LINE_{i1}"
value = "\n".join(modified_lines[j1:j2])
diff_dict[key] = value
return diff_dict
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment