Skip to content

Instantly share code, notes, and snippets.

@aaugustin
Created July 26, 2012 08:52
Show Gist options
  • Save aaugustin/3181087 to your computer and use it in GitHub Desktop.
Save aaugustin/3181087 to your computer and use it in GitHub Desktop.
Script used to create the svn_to_git.py file for code.djangoproject.com
"""Build a mapping between svn commits and git changesets.
Usage: python build_svn_to_git_mapping.py > svn_to_git.py
This script should be run in a clone of the git repository,
with a checkout of https://code.djangoproject.com/svn/django in ../django-svn.
"""
import os
import pprint
import re
# These commits are svn copy and svn move operations (tags, branches, etc.)
# + stuff on the py3k branch (verified manually)
expected_missing_ids = set([
2016, 2539, 2540, 4385, 4566, 4805, 4806, 4807, 4808, 6609, 6610, 6611,
7541, 7542, 7543, 7969, 8037, 8253, 8362, 8626, 8881, 8882, 8883, 8889,
8961, 9094, 9460, 9500, 9501, 9503, 10851, 11357, 11361, 11366, 11470,
11471, 11473, 11503, 11506, 11508, 11509, 11510, 11511, 11514, 11515,
11517, 11522, 11612, 11613, 13258, 13277, 13278, 13280, 13283, 13285,
13303, 13304, 13762, 13763, 15046, 15047, 15476, 15477, 15903, 15904,
15906, 16742, 16746, 16771, 16772, 16810, 16948, 16949, 16950, 16951,
17810
])
git_log = os.popen(r"git log --all --full-history | grep -E '^(commit| git-svn-id)'")
git_log = reversed(git_log.readlines())
svn_log = os.popen(r"svn log --quiet ../django-svn | grep -v '^-*$' | sed 's/^r\([0-9]*\) .*$/\1/'")
svn_log = reversed(svn_log.readlines())
svn_id_re = re.compile('^ git-svn-id: http://code.djangoproject.com/.*@(\d+) ')
git_id_re = re.compile('^commit ([0-9a-f]{40})$')
mapping = []
try:
while True:
# look for a svn id
while True:
line = git_log.next()
match = svn_id_re.match(line)
if match:
svn_id = match.group(1)
break
# find the git id
line = git_log.next()
match = git_id_re.match(line)
git_id = match.group(1)
mapping.append((svn_id, git_id))
except StopIteration:
pass
mapping_dict = dict(mapping)
expected_ids = set(int(line.strip()) for line in svn_log)
actual_ids = set(int(svn_id) for svn_id, _ in mapping)
missing_ids = sorted(expected_ids - actual_ids - expected_missing_ids)
extra_ids = sorted(actual_ids - expected_ids)
if missing_ids:
print "Missing commits:"
for i in missing_ids:
print str(i)
if extra_ids:
print "Extra commits:"
for i in extra_ids:
print str(i), mapping_dict[str(i)]
if not missing_ids and not extra_ids:
last_svn_id = mapping[-1][0]
svn_ids = range(int(last_svn_id) + 1)
print "# Use svn_to_git[<svn commit id>] to get the git changeset hash"
print 'svn_to_git =',
pprint.pprint([mapping_dict.get(str(i)) for i in svn_ids])
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment