Skip to content

Instantly share code, notes, and snippets.

@mdamien
Created April 7, 2019 08:59
Show Gist options
  • Save mdamien/2181b37a1945c1384053c0aaf3740403 to your computer and use it in GitHub Desktop.
Save mdamien/2181b37a1945c1384053c0aaf3740403 to your computer and use it in GitHub Desktop.
mwpersistence4git.py
import mwpersistence
import deltas
import mwreverts
import sys
from pathlib import Path
import git
def git_file_history(path):
repo = git.Repo(path, search_parent_directories=True)
git_root = repo.git.rev_parse("--show-toplevel")
file_in_repo = str(Path(path)).replace(git_root + '/', '')
for i, commit in enumerate(reversed(list(repo.iter_commits(paths=path)))):
filecontents = (commit.tree / file_in_repo).data_stream.read().decode('utf-8')
yield commit, filecontents
if __name__ == '__main__':
path = sys.argv[1]
revisions = list(git_file_history(path))
state = mwpersistence.DiffState(
deltas.SegmentMatcher(),
revert_radius=15,
revert_detector=mwreverts.Detector(),
)
for i, revision in enumerate(revisions):
_, filecontents = revision
tokens, _, _ = state.update(filecontents, revision=i)
last_commit = None
for token in tokens:
commit, _ = revisions[token.revisions[0]]
if last_commit != str(commit):
print()
print(commit.author, commit)
print(token, end='')
last_commit = str(commit)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment