Last active
November 12, 2020 00:32
-
-
Save raghavkaul/ddd088902ea8acd7db4157354ed513d6 to your computer and use it in GitHub Desktop.
Parse git unified diff files in python3
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import re | |
class Patch: | |
""" Parses and encapsulates a git unified diff/patch file. """ | |
# See: https://stackoverflow.com/questions/987372/what-is-the-format-of-a-patch-file | |
# Chunk headers look like: @@ -4,9 +4,12 @@. The following regex parses them | |
ptn = ( | |
r"@@ -(?P<old_start>\d+)," # Where the chunk begins in the old file | |
r"(?P<old_n_lines>\d+) " # Number of lines in the chunk in the old file | |
r"\+(?P<new_start>\d+)," # Where the chunk begins in the new file | |
r"(?P<new_n_lines>\d+) " # Number of lines in the chunk in the new file | |
"@@(?P<chunk_content>(.(?!@@))+)" # String contents of the chunk | |
) | |
self.chunks = [] | |
def __init__(self, patchfile_content: str): | |
for match in re.finditer(self.ptn, patchfile_content, re.DOTALL): | |
chunk = match.groupdict() | |
# These parts of the patch chunk header are integer line numbers | |
for numeric_part in [ | |
"old_start", | |
"old_n_lines", | |
"new_start", | |
"new_n_lines", | |
]: | |
chunk[numeric_part] = int(chunk[numeric_part]) | |
self.chunks.append(chunk) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment