Created
November 24, 2017 04:07
-
-
Save cpelley/0e47ecef94bc61e4a7a8d1e4bbcafcdc to your computer and use it in GitHub Desktop.
Fetch svn file renames/moves
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env python2.7 | |
import argparse | |
from difflib import SequenceMatcher | |
import os | |
import subprocess | |
import tempfile | |
def main(old_url, new_url, threshold): | |
OLD_URL, OLD_REV = old_url.split('@') | |
NEW_URL, NEW_REV = new_url.split('@') | |
command = 'svn diff --old {}@{} --new {}@{} --summarize' | |
command = command.format(OLD_URL, OLD_REV, NEW_URL, NEW_REV) | |
stdout = subprocess.check_output(command, shell=True) | |
stdout = stdout.split('\n') | |
added = [std.strip('A').strip() for std in stdout if std.startswith('A')] | |
removed = [std.strip('D').strip() for std in stdout if std.startswith('D')] | |
pairing = {} | |
# Those files removed to search | |
removed_search = removed | |
for afile in added: | |
fh_a = tempfile.NamedTemporaryFile() | |
fh_a.close() | |
command = 'svn export -q {}@{} {}'.format(afile, NEW_REV, fh_a.name) | |
subprocess.call(command, shell=True) | |
if not os.path.isfile(fh_a.name): | |
continue | |
with open(fh_a.name, 'r') as added: | |
added_text = added.read() | |
found = False | |
for ind in range(len(removed_search)): | |
dfile = removed_search[ind] | |
fh_d = tempfile.NamedTemporaryFile() | |
fh_d.close() | |
command = 'svn export -q {}@{} {}'.format(dfile, OLD_REV, fh_d.name) | |
subprocess.call(command, shell=True) | |
if not os.path.isfile(fh_d.name): | |
continue | |
with open(fh_d.name, 'r') as deleted: | |
deleted_text = deleted.read() | |
m = SequenceMatcher(None, deleted_text, added_text) | |
if m.ratio() > threshold: | |
pairing[dfile] = afile | |
found = True | |
break | |
if found: | |
# Remove the match so that it doesn't contribute to the next | |
# search. | |
removed_search.pop(ind) | |
#for ind in range(len(stdout)): | |
# change = stdout[ind].split() | |
# if change[-1] in pairing.keys(): | |
# continue | |
# if change[-1] in pairing.values(): | |
# continue | |
# print chane | |
for key, value in pairing.iteritems(): | |
print '{}\t->\t{}'.format(key,value) | |
if __name__ == '__main__': | |
msg = ('List all files that are identified as being renamed/moved using a ' | |
'heuristic for determining percentage similarity.') | |
parser = argparse.ArgumentParser(description=msg) | |
parser.add_argument('old', help='OLD-URL[@OLDREV]') | |
parser.add_argument('new', help='NEW-URL[@NEWREV]') | |
msg = ('Threshold, defaults to 0.7 when not specified. Similarities ' | |
'between files greater than this fraction are flagged as a ' | |
'rename/move.') | |
parser.add_argument('--threshold', type=float, default=.7, help=msg) | |
args = parser.parse_args() | |
main(args.old, args.new, args.threshold) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment