Created
March 14, 2012 03:59
-
-
Save bos/2033956 to your computer and use it in GitHub Desktop.
5x to 8x speedup in "hg convert"
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# HG changeset patch | |
# User Bryan O'Sullivan <[email protected]> | |
# Date 1331697209 25200 | |
# Branch stable | |
# Node ID 9c15f20c0418fbad1da202f72dc894372538beba | |
# Parent 6344043924497cd06d781d9014c66802285072e4 | |
imported patch libgit2.patch | |
diff -r 634404392449 -r 9c15f20c0418 hgext/convert/git.py | |
--- a/hgext/convert/git.py Sun Jan 01 13:37:30 2012 -0600 | |
+++ b/hgext/convert/git.py Tue Mar 13 20:53:29 2012 -0700 | |
@@ -12,7 +12,22 @@ | |
from common import NoRepo, commit, converter_source, checktool | |
-class convert_git(converter_source): | |
+class convert_git_base(converter_source): | |
+ def __init__(self, ui, path, rev): | |
+ super(convert_git_base, self).__init__(ui, path, rev=rev) | |
+ | |
+ if os.path.isdir(path + "/.git"): | |
+ path += "/.git" | |
+ if not os.path.exists(path + "/objects"): | |
+ raise NoRepo(_("%s does not look like a Git repository") % path) | |
+ | |
+ checktool('git', 'git') | |
+ self.path = path | |
+ | |
+ # pygit2 0.16 has no support for diffing, so we have to shell out | |
+ # to git for diffs. the methods below would otherwise belong in | |
+ # convert_git_plain. | |
+ | |
# Windows does not support GIT_DIR= construct while other systems | |
# cannot remove environment variable. Just assume none have | |
# both issues. | |
@@ -39,47 +54,6 @@ | |
else: | |
return util.popen('GIT_DIR=%s %s' % (self.path, s), 'rb') | |
- def gitread(self, s): | |
- fh = self.gitopen(s) | |
- data = fh.read() | |
- return data, fh.close() | |
- | |
- def __init__(self, ui, path, rev=None): | |
- super(convert_git, self).__init__(ui, path, rev=rev) | |
- | |
- if os.path.isdir(path + "/.git"): | |
- path += "/.git" | |
- if not os.path.exists(path + "/objects"): | |
- raise NoRepo(_("%s does not look like a Git repository") % path) | |
- | |
- checktool('git', 'git') | |
- | |
- self.path = path | |
- | |
- def getheads(self): | |
- if not self.rev: | |
- heads, ret = self.gitread('git rev-parse --branches --remotes') | |
- heads = heads.splitlines() | |
- else: | |
- heads, ret = self.gitread("git rev-parse --verify %s" % self.rev) | |
- heads = [heads[:-1]] | |
- if ret: | |
- raise util.Abort(_('cannot retrieve git heads')) | |
- return heads | |
- | |
- def catfile(self, rev, type): | |
- if rev == hex(nullid): | |
- raise IOError() | |
- data, ret = self.gitread("git cat-file %s %s" % (type, rev)) | |
- if ret: | |
- raise util.Abort(_('cannot read %r object at %s') % (type, rev)) | |
- return data | |
- | |
- def getfile(self, name, rev): | |
- data = self.catfile(rev, "blob") | |
- mode = self.modecache[(name, rev)] | |
- return data, mode | |
- | |
def getchanges(self, version): | |
self.modecache = {} | |
fh = self.gitopen("git diff-tree -z --root -m -r %s" % version) | |
@@ -106,6 +80,105 @@ | |
raise util.Abort(_('cannot read changes in %s') % version) | |
return (changes, {}) | |
+ def getchangedfiles(self, version, i): | |
+ changes = [] | |
+ if i is None: | |
+ fh = self.gitopen("git diff-tree --root -m -r %s" % version) | |
+ for l in fh: | |
+ if "\t" not in l: | |
+ continue | |
+ m, f = l[:-1].split("\t") | |
+ changes.append(f) | |
+ else: | |
+ fh = self.gitopen('git diff-tree --name-only --root -r %s "%s^%s" --' | |
+ % (version, version, i + 1)) | |
+ changes = [f.rstrip('\n') for f in fh] | |
+ if fh.close(): | |
+ raise util.Abort(_('cannot read changes in %s') % version) | |
+ | |
+ return changes | |
+ | |
+def hexoid(obj): | |
+ # pygit2's "hex" property is unicode, but "oid" is str | |
+ return obj.oid.encode('hex') | |
+ | |
+class convert_git_pygit2(convert_git_base): | |
+ def __init__(self, ui, path, rev): | |
+ super(convert_git_pygit2, self).__init__(ui, path, rev=rev) | |
+ import pygit2 | |
+ self.repo = pygit2.init_repository(path, True) | |
+ | |
+ def getheads(self): | |
+ if not self.rev: | |
+ return [hexoid(self.repo.lookup_reference(r).resolve()) | |
+ for r in self.repo.listall_references() | |
+ if (r.startswith('refs/heads/') or | |
+ r.startswith('refs/remotes/'))] | |
+ else: | |
+ try: | |
+ return [hexoid(repo.lookup_reference(self.rev).resolve())] | |
+ except KeyError: | |
+ return [hexoid(repo[self.rev])] | |
+ | |
+ def getcommit(self, rev): | |
+ def prettyname(sig): | |
+ if sig.name: | |
+ return self.recode('%s <%s>' % (sig.name, sig.email)) | |
+ return self.recode(sig.email) | |
+ | |
+ c = self.repo[rev.decode('hex')] | |
+ message = c.message | |
+ author = prettyname(c.author) | |
+ committer = prettyname(c.committer) | |
+ if author != committer: | |
+ message += '\ncommitter: %s\n' % committer | |
+ return commit(parents=[hexoid(p) for p in c.parents], | |
+ date='%s %s' % (c.commit_time, c.commit_time_offset), | |
+ author=author, desc=self.recode(message), rev=rev) | |
+ | |
+ hex_nullid = hex(nullid) | |
+ | |
+ def getfile(self, name, rev): | |
+ if rev == self.hex_nullid: | |
+ raise IOError | |
+ return self.repo[rev.decode('hex')].data, self.modecache[(name,rev)] | |
+ | |
+ def gettags(self): | |
+ return dict((r.split('/',2)[-1], | |
+ hexoid(self.repo.lookup_reference(r).resolve())) | |
+ for r in self.repo.listall_references() | |
+ if r.startswith('refs/tags/')) | |
+ | |
+class convert_git_plain(convert_git_base): | |
+ def gitread(self, s): | |
+ fh = self.gitopen(s) | |
+ data = fh.read() | |
+ return data, fh.close() | |
+ | |
+ def getheads(self): | |
+ if not self.rev: | |
+ heads, ret = self.gitread('git rev-parse --branches --remotes') | |
+ heads = heads.splitlines() | |
+ else: | |
+ heads, ret = self.gitread("git rev-parse --verify %s" % self.rev) | |
+ heads = [heads[:-1]] | |
+ if ret: | |
+ raise util.Abort(_('cannot retrieve git heads')) | |
+ return heads | |
+ | |
+ def catfile(self, rev, type): | |
+ if rev == hex(nullid): | |
+ raise IOError() | |
+ data, ret = self.gitread("git cat-file %s %s" % (type, rev)) | |
+ if ret: | |
+ raise util.Abort(_('cannot read %r object at %s') % (type, rev)) | |
+ return data | |
+ | |
+ def getfile(self, name, rev): | |
+ data = self.catfile(rev, "blob") | |
+ mode = self.modecache[(name, rev)] | |
+ return data, mode | |
+ | |
def getcommit(self, version): | |
c = self.catfile(version, "commit") # read the commit hash | |
end = c.find("\n\n") | |
@@ -159,24 +232,6 @@ | |
return tags | |
- def getchangedfiles(self, version, i): | |
- changes = [] | |
- if i is None: | |
- fh = self.gitopen("git diff-tree --root -m -r %s" % version) | |
- for l in fh: | |
- if "\t" not in l: | |
- continue | |
- m, f = l[:-1].split("\t") | |
- changes.append(f) | |
- else: | |
- fh = self.gitopen('git diff-tree --name-only --root -r %s "%s^%s" --' | |
- % (version, version, i + 1)) | |
- changes = [f.rstrip('\n') for f in fh] | |
- if fh.close(): | |
- raise util.Abort(_('cannot read changes in %s') % version) | |
- | |
- return changes | |
- | |
def getbookmarks(self): | |
bookmarks = {} | |
@@ -203,3 +258,9 @@ | |
pass | |
return bookmarks | |
+ | |
+def convert_git(ui, path, rev=None): | |
+ try: | |
+ return convert_git_pygit2(ui, path, rev) | |
+ except ImportError: | |
+ return convert_git_plain(ui, path, rev) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment