Created
July 26, 2012 08:52
-
-
Save aaugustin/3181087 to your computer and use it in GitHub Desktop.
Script used to create the svn_to_git.py file for code.djangoproject.com
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
"""Build a mapping between svn commits and git changesets. | |
Usage: python build_svn_to_git_mapping.py > svn_to_git.py | |
This script should be run in a clone of the git repository, | |
with a checkout of https://code.djangoproject.com/svn/django in ../django-svn. | |
""" | |
import os | |
import pprint | |
import re | |
# These commits are svn copy and svn move operations (tags, branches, etc.) | |
# + stuff on the py3k branch (verified manually) | |
expected_missing_ids = set([ | |
2016, 2539, 2540, 4385, 4566, 4805, 4806, 4807, 4808, 6609, 6610, 6611, | |
7541, 7542, 7543, 7969, 8037, 8253, 8362, 8626, 8881, 8882, 8883, 8889, | |
8961, 9094, 9460, 9500, 9501, 9503, 10851, 11357, 11361, 11366, 11470, | |
11471, 11473, 11503, 11506, 11508, 11509, 11510, 11511, 11514, 11515, | |
11517, 11522, 11612, 11613, 13258, 13277, 13278, 13280, 13283, 13285, | |
13303, 13304, 13762, 13763, 15046, 15047, 15476, 15477, 15903, 15904, | |
15906, 16742, 16746, 16771, 16772, 16810, 16948, 16949, 16950, 16951, | |
17810 | |
]) | |
git_log = os.popen(r"git log --all --full-history | grep -E '^(commit| git-svn-id)'") | |
git_log = reversed(git_log.readlines()) | |
svn_log = os.popen(r"svn log --quiet ../django-svn | grep -v '^-*$' | sed 's/^r\([0-9]*\) .*$/\1/'") | |
svn_log = reversed(svn_log.readlines()) | |
svn_id_re = re.compile('^ git-svn-id: http://code.djangoproject.com/.*@(\d+) ') | |
git_id_re = re.compile('^commit ([0-9a-f]{40})$') | |
mapping = [] | |
try: | |
while True: | |
# look for a svn id | |
while True: | |
line = git_log.next() | |
match = svn_id_re.match(line) | |
if match: | |
svn_id = match.group(1) | |
break | |
# find the git id | |
line = git_log.next() | |
match = git_id_re.match(line) | |
git_id = match.group(1) | |
mapping.append((svn_id, git_id)) | |
except StopIteration: | |
pass | |
mapping_dict = dict(mapping) | |
expected_ids = set(int(line.strip()) for line in svn_log) | |
actual_ids = set(int(svn_id) for svn_id, _ in mapping) | |
missing_ids = sorted(expected_ids - actual_ids - expected_missing_ids) | |
extra_ids = sorted(actual_ids - expected_ids) | |
if missing_ids: | |
print "Missing commits:" | |
for i in missing_ids: | |
print str(i) | |
if extra_ids: | |
print "Extra commits:" | |
for i in extra_ids: | |
print str(i), mapping_dict[str(i)] | |
if not missing_ids and not extra_ids: | |
last_svn_id = mapping[-1][0] | |
svn_ids = range(int(last_svn_id) + 1) | |
print "# Use svn_to_git[<svn commit id>] to get the git changeset hash" | |
print 'svn_to_git =', | |
pprint.pprint([mapping_dict.get(str(i)) for i in svn_ids]) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment