Skip to content

Instantly share code, notes, and snippets.

@Yengas
Last active October 6, 2018 14:27
Show Gist options
  • Save Yengas/de50ed357e01a59656376ee3849318af to your computer and use it in GitHub Desktop.
Save Yengas/de50ed357e01a59656376ee3849318af to your computer and use it in GitHub Desktop.
A python script that compares two branches/commits to see which files where added/deleted/modified/renamed. And also prints out which line ranges where added/deleted.

Usage

# get the diff between current head and master
git-changed-files-and-lines.py master
# get the diff between master and dev
git-changed-files-and-lines.py master dev

Example Result

{
  "deleted": [
    "file1",
    "file2"
  ],
  "renamed": [
    {
      "to": "file4",
      "from": "file3"
    }
  ],
  "added": [
    "file5",
    "file6"
  ],
  "modified": {
    "test.js": {
      "deleted": [
        [ 433, 465 ]
      ],
      "source_file": "test.js",
      "added": [
        [ 132, 142 ]
      ]
    }
  }
}

The schema to be more precise becomes:

{{
  deleted: {string[]}, // deleted files paths,
  added: {string[]}, // added files paths, relative to the current directory
  renamed: {{ from: string, to: string }[]} // old and new paths of the renamed files
  /** 
   * modified files:
   * a map of modified files, where the source_file is the file name the file used to had, usually the same with the map key.
   * added and deleted lines are an array of 2 numbers. The starting and ending line for each of the modifications in the file.
   * For example having [132, 142] in the added key means, there were 11 lines added to the file, and the resulting file
   * has the new lines starting from the 132th line. Having [433, 465] in deleted means, there were 33 lines removed from the file
   * and if you were to look at the old version of the file, removed lines would start from the 433th line.
   * Each of the starting and ending lines are inclusive.
  **/
  modified: {{ [string]: { source_file: string, deleted: ([number, number])[], added: ([number, number])[] }} 
}}
#!/usr/bin/env python2.7
import sys
import subprocess
import re
import json
SOURCE = "HEAD"
TARGET = "MASTER"
FILE_START_REGEX = re.compile(r"^diff \-\-git")
LINE_START_REGEX = re.compile(r"^\@\@ \-([0-9]+)\,([0-9]+) \+([0-9]+)\,([0-9]+) \@\@")
DELETE_LINE_REGEX = re.compile(r"^\-")
ADD_LINE_REGEX = re.compile(r"^\+")
NEW_LINE_REGEX = re.compile(r"^\\ No newline")
PARSE_NONE = 0
PARSE_HEADER = 1
PARSE_LINES = 2
# Get the target git branch/commit
if len(sys.argv) > 1:
TARGET = sys.argv[1]
if len(sys.argv) > 2:
SOURCE = sys.argv[2]
# create git diff subprocess with target, source and filter
def createGitDiffSubprocess(target, source, f, nameOnly = True):
args = ['git', 'diff'] + (["--name-only"] if nameOnly else []) + ["--diff-filter=" + f, target + '..' + source ]
return subprocess.Popen(args, stdout=subprocess.PIPE)
# given a subprocess, puts each line of its stdout to an array
def subprocessLinesToArr(proc):
return [line.rstrip() for line in iter(proc.stdout.readline, '')]
# get the files that were added
def gitAddedFilesSince(target, source):
return subprocessLinesToArr(createGitDiffSubprocess(target, source, 'A'))
# get the files that were deleted
def gitDeletedFilesSince(target, source):
return subprocessLinesToArr(createGitDiffSubprocess(target, source, 'D'))
# get the renamed files by parsing the result of --diff-filter=R
def gitRenamedFilesSince(target, source):
proc = createGitDiffSubprocess(target, source, 'R', False)
parse_status = PARSE_NONE
current_file, old_file = None, None
lines = -1
results = []
for line in iter(proc.stdout.readline, ''):
match = FILE_START_REGEX.match(line)
if match is not None:
parse_status = PARSE_HEADER
lines = 3
continue
elif parse_status is PARSE_HEADER and lines > 0:
if lines == 2:
old_file = line[12:-1]
elif lines == 1:
current_file = line[10:-1]
results.append({ "from": old_file, "to": current_file })
lines -= 1
continue
return results
# create a result object that holds each file and their added/deleted lines
def createResultObject():
result = {};
# adds an entry to the result, regarding the given line is deleted/added on the modified file
def addContinuous(current_file, old_file, line, deleted):
# create a new entry, if not exist
if current_file not in result:
result[current_file] = { "source_file": old_file, "added": [], "deleted": [] };
# pick where to add the entry
arr = result[current_file]["deleted" if deleted else "added"]
last = [line, line]
# if the last entry in the file was not the line before this, add new entry, otherwise use the last entry
if len(arr) < 1 or arr[-1][-1] != line - 1:
arr.append(last)
else:
last = arr[-1]
# extend the entry with this line
last[-1] = line
return { "result": result, "addContinuous": addContinuous }
# get the modified files and lines between the target and source refs
def gitModifiedFilesAndLinesSince(target, source):
proc = createGitDiffSubprocess(target, source, 'M', False)
parse_status = PARSE_NONE
current_file, old_file = None, None
lines = -1
left, right = 0, 0
created = createResultObject()
addContinuous = created["addContinuous"]
for line in iter(proc.stdout.readline, ''):
match = FILE_START_REGEX.match(line)
if match is not None:
# start parsing the header of the change such as which files have been changed
parse_status = PARSE_HEADER
lines = 3
continue
elif parse_status is PARSE_HEADER and lines > 0:
if lines == 2:
old_file = line[6:-1]
elif lines == 1:
current_file = line[6:-1]
lines -= 1
continue
match = LINE_START_REGEX.match(line)
# start parsing add/removed lines changes if the LINE_START_REGEX matches
if match is not None:
left = int(match.group(1))
leftLinesCount = int(match.group(2))
right = int(match.group(3))
rightLinesCount = int(match.group(4))
parse_status = PARSE_LINES
continue
match = NEW_LINE_REGEX.match(line)
if parse_status is PARSE_LINES and (match is not None):
continue
# if there are still lines to parse for either add or remove operations, continue
elif parse_status is PARSE_LINES and (leftLinesCount > 0 or rightLinesCount > 0):
lines -= 1
match = ADD_LINE_REGEX.match(line)
# found an added line
if match is not None:
addContinuous(current_file, old_file, right, False)
right += 1
rightLinesCount -= 1
continue
match = DELETE_LINE_REGEX.match(line)
# found a removed line
if match is not None:
addContinuous(current_file, old_file, left, True)
left += 1
leftLinesCount -= 1
continue
# this line is shared between old/new version of the file
right += 1
left += 1
leftLinesCount -= 1
rightLinesCount -= 1
else:
# if we exhausted all lines for removed/added, couldn't find any new LINE_START_REGEX match
# that means something has gone wrong, raise an exception
raise Exception('Git diff parse error')
return created["result"]
print json.dumps({
"added": gitAddedFilesSince(TARGET, SOURCE),
"deleted": gitDeletedFilesSince(TARGET, SOURCE),
"renamed": gitRenamedFilesSince(TARGET, SOURCE),
"modified": gitModifiedFilesAndLinesSince(TARGET, SOURCE)
})
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment