Skip to content

Instantly share code, notes, and snippets.

@raghavkaul
Last active November 12, 2020 00:32
Show Gist options
  • Save raghavkaul/ddd088902ea8acd7db4157354ed513d6 to your computer and use it in GitHub Desktop.
Save raghavkaul/ddd088902ea8acd7db4157354ed513d6 to your computer and use it in GitHub Desktop.
Parse git unified diff files in python3
import re
class Patch:
""" Parses and encapsulates a git unified diff/patch file. """
# See: https://stackoverflow.com/questions/987372/what-is-the-format-of-a-patch-file
# Chunk headers look like: @@ -4,9 +4,12 @@. The following regex parses them
ptn = (
r"@@ -(?P<old_start>\d+)," # Where the chunk begins in the old file
r"(?P<old_n_lines>\d+) " # Number of lines in the chunk in the old file
r"\+(?P<new_start>\d+)," # Where the chunk begins in the new file
r"(?P<new_n_lines>\d+) " # Number of lines in the chunk in the new file
"@@(?P<chunk_content>(.(?!@@))+)" # String contents of the chunk
)
self.chunks = []
def __init__(self, patchfile_content: str):
for match in re.finditer(self.ptn, patchfile_content, re.DOTALL):
chunk = match.groupdict()
# These parts of the patch chunk header are integer line numbers
for numeric_part in [
"old_start",
"old_n_lines",
"new_start",
"new_n_lines",
]:
chunk[numeric_part] = int(chunk[numeric_part])
self.chunks.append(chunk)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment