Created
December 13, 2010 00:59
-
-
Save madsen/738518 to your computer and use it in GitHub Desktop.
Create a Git repo from a Perl libwin32 Subversion dump file
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#! /usr/bin/perl | |
#--------------------------------------------------------------------- | |
# git-from-svndump-libwin32.pl | |
# Copyright 2010 Christopher J. Madsen | |
# | |
# Create a Git repo from a libwin32 Subversion dump file | |
#--------------------------------------------------------------------- | |
use strict; | |
use warnings; | |
use autodie ':io'; | |
use Date::Format 'time2str'; | |
use Date::Parse 'str2time'; | |
use Digest::MD5 'md5_hex'; | |
use String::RewritePrefix (); | |
use Text::Tabs 'expand'; | |
use Time::Local 'timegm'; | |
#===================================================================== | |
my $authorFile = "$ENV{HOME}/.gitauthors"; | |
my %user; | |
my %pathXlate = ( | |
'trunk/vc/GUEmap/' => '', | |
'trunk/ppport.h' => 'ppport.h', | |
'trunk/Makefile.PL' => 'Makefile.PL', | |
'trunk/README' => 'README', | |
'trunk/Changes' => 'Changes', | |
'trunk/ChangeNotify/' => 'ChangeNotify/', | |
'trunk/Event/' => 'Event/', | |
'trunk/IPC/' => 'IPC/', | |
'trunk/Mutex/' => 'Mutex/', | |
'trunk/Semaphore/' => 'Semaphore/', | |
'trunk/Win32-IPC/' => '', # r198 & r224 directories changed | |
); | |
my %release = ( | |
'libwin32-0.02' => 'Sat, 03 May 1997 16:24:47 GMT', | |
'libwin32-0.03' => 'Thu, 08 May 1997 01:35:34 GMT', | |
'libwin32-0.04' => 'Sun, 11 May 1997 08:36:00 GMT', | |
'libwin32-0.05' => 'Sun, 11 May 1997 21:26:39 GMT', | |
'libwin32-0.06' => 'Tue, 13 May 1997 07:57:05 GMT', | |
'libwin32-0.07' => 'Wed, 28 May 1997 07:20:19 GMT', | |
'libwin32-0.08' => 'Sat, 26 Jul 1997 23:48:09 GMT', | |
'libwin32-0.09' => 'Mon, 15 Dec 1997 06:47:56 GMT', | |
'libwin32-0.10' => 'Sat, 07 Feb 1998 04:19:46 GMT', | |
'libwin32-0.12' => 'Tue, 12 May 1998 17:32:48 GMT', | |
'libwin32-0.13' => 'Fri, 18 Sep 1998 23:14:38 GMT', | |
'libwin32-0.14' => 'Sun, 15 Nov 1998 05:23:00 GMT', | |
'libwin32-0.15' => 'Sun, 26 Sep 1999 04:21:18 GMT', | |
'libwin32-0.151' => 'Mon, 27 Sep 1999 00:25:21 GMT', | |
'libwin32-0.16' => 'Tue, 23 May 2000 06:16:26 GMT', | |
'libwin32-0.171' => 'Fri, 16 Feb 2001 00:00:00 GMT', # ESTIMATED | |
'libwin32-0.18' => 'Tue, 13 Nov 2001 07:57:48 GMT', | |
'libwin32-0.191' => 'Tue, 09 Jul 2002 01:11:21 GMT', | |
'libwin32-0.24' => 'Tue, 15 Feb 2005 07:31:38 GMT', | |
'libwin32-0.25' => 'Sat, 17 Sep 2005 19:50:59 GMT', | |
'libwin32-0.26' => 'Sat, 17 Sep 2005 20:42:53 GMT', | |
'libwin32-0.27' => 'Wed, 07 Mar 2007 20:12:57 GMT', | |
'libwin32-0.28' => 'Sat, 15 Sep 2007 00:53:15 GMT', | |
); | |
my $url = 'file:///var/svn/libwin32'; # FIXME | |
my $prefixRE = join('|', map { quotemeta $_ } keys %pathXlate); | |
$prefixRE = qr/^(?:$prefixRE)/; | |
# '<svn username> = real-name <email address>' mapping based on git-svnimport: | |
sub load_authors { | |
open(my $authors, '<', $authorFile); | |
while (<$authors>) { | |
next unless /^(.+?|\(no author\))\s*=\s*(.+?)\s*<(.+)>\s*$/; | |
my ($user, $name, $email) = ($1, $2, $3); | |
$user{$user} = "$name <$email>"; | |
} | |
close $authors; | |
} # end load_authors | |
#--------------------------------------------------------------------- | |
sub want_node | |
{ | |
my ($revNode, $header, $prop, $contentRef) = @_; | |
my $rev = $revNode->[0]{'Revision-number'} || 0; | |
return unless $header->{'Node-path'} and $header->{'Node-path'} =~ $prefixRE; | |
return if $rev == 169 or $rev == 194; # FIXME Unnecessary changes | |
return if $rev > 223 and $header->{'Node-path'} =~ m!^trunk/[^/]*$!; # FIXME | |
return if $header->{'Node-action'} eq 'change' | |
and not defined $header->{'Text-content-length'}; | |
return if ($header->{'Node-kind'} || '') eq 'dir' | |
and $header->{'Node-action'} eq 'add'; | |
($header->{'Node-path'}) = String::RewritePrefix->rewrite( | |
\%pathXlate, $header->{'Node-path'} | |
); | |
if ($header->{'Node-copyfrom-path'} and not $$contentRef) { | |
$$contentRef = `svn cat '$url/$header->{'Node-copyfrom-path'}\@$header->{'Node-copyfrom-rev'}'`; | |
} | |
$prop->{'svn:keywords'} ||= 'Date Id Rev'; # FIXME | |
if (my $kw = $prop->{'svn:keywords'} and $$contentRef) { | |
$kw =~ s/\bRev\b/Rev Revision/; | |
$kw =~ s/\s+/|/g; | |
my $date = time2str('%Y-%m-%d %T %z', $revNode->[1]{'svn:timestamp'}); | |
my %value = ( | |
Rev => $rev, | |
Revision => $rev, | |
Date => $date, | |
Id => "$header->{'Node-path'} $rev $date $revNode->[1]{'svn:author'}", | |
); | |
$$contentRef =~ s/\$($kw)\$/\$$1: $value{$1} \$/g; | |
} # end if svn:keywords | |
return 1; | |
} # end want_node | |
#--------------------------------------------------------------------- | |
sub parseProps | |
{ | |
my $content = shift; | |
my %prop; | |
pos($content) = 0; | |
while ($content =~ /\GK (\d+)\n/gc) { | |
my $key = substr($content, pos($content), $1); | |
pos($content) += $1 + 1; | |
$content =~ /\GV (\d+)\n/gc or die; | |
$prop{$key} = substr($content, pos($content), $1); | |
pos($content) += $1 + 1; | |
} | |
$content =~ /\GPROPS-END\n\z/gc or die; | |
\%prop; | |
} # end parseProps | |
#--------------------------------------------------------------------- | |
load_authors(); | |
my @changes; | |
my $rev; | |
my $header = {}; | |
binmode STDOUT, ':raw'; | |
for my $inFile (@ARGV) { | |
open(my $in, '<:raw', $inFile); | |
while (<$in>) { | |
if (/^$/) { | |
next unless keys %$header; | |
my $prop; | |
if ($header->{'Prop-content-length'}) { | |
read($in, $prop, $header->{'Prop-content-length'}); | |
$prop = parseProps($prop); | |
} else { | |
$prop = {}; | |
} | |
my $content; | |
if ($header->{'Text-content-length'}) { | |
read($in, $content, $header->{'Text-content-length'}); | |
warn "$inFile:$.: Bad MD5\n" | |
unless $header->{'Text-content-md5'} eq md5_hex($content); | |
} | |
my $wanted; | |
if (defined $header->{'Revision-number'}) { | |
processRev(\@changes) if @changes; | |
@changes = (); | |
$rev = $header->{'Revision-number'}; | |
my @date = $prop->{'svn:date'} =~ | |
/^(\d+)-(\d+)-(\d+)T(\d+):(\d+):(\d+)\.\d+Z$/ or die; | |
--$date[1]; | |
$prop->{'svn:timestamp'} = timegm(reverse @date); | |
$wanted = 1; | |
} else { | |
$wanted = want_node($changes[0], $header, $prop, \$content); | |
} | |
push @changes, [ $header, $prop, \$content ] if $wanted; | |
$header = {}; | |
} # end if blank line | |
else { | |
/^(\S+):\s*(.+)/ or die "Bad <$_>"; | |
$header->{$1} = $2; | |
} # end else not blank | |
} # end while $in | |
} # end for each file | |
processRev(\@changes) if @changes; | |
my $numCommits = 0; | |
my %seenver; # FIXME | |
sub processRev | |
{ | |
my ($changes) = @_; | |
my $revisionInfo = shift @$changes; | |
my $rev = $revisionInfo->[0]{'Revision-number'}; | |
die unless defined $rev; | |
# Skip rev 0: | |
unless ($rev) { | |
die if @$changes; | |
return; | |
} | |
# Skip revisions that didn't change anything: | |
return unless @$changes; | |
my $revprop = $revisionInfo->[1]; | |
my $user = $user{$revprop->{'svn:author'}} | |
or die "Unknown author for r$rev: $revprop->{'svn:author'}"; | |
my $time = $revprop->{'svn:timestamp'}; | |
$time .= time2str(' %z', $time); | |
#FIXME libwin32: | |
$revprop->{'svn:log'} =~ s{Moved the Win32::IPC subclasses under Win32-IPC} | |
{Win32-IPC is now a separate distribution}; | |
my $tag; | |
if ($rev < 223) { | |
$revprop->{'svn:log'} =~ /(libwin32-0\.\d+)\b/ | |
or die $revprop->{'svn:log'}; | |
$tag = $1; | |
die unless $release{$tag}; | |
$revprop->{'svn:log'} = "Imported $tag\n"; | |
$time = str2time($release{$tag}) . ' +0000'; | |
my %changelog; | |
foreach my $c (@$changes) { | |
my ($header, $prop, $contentRef) = @$c; | |
next unless $header->{'Node-path'} =~ m!^(\w+)/Changes$! | |
and $header->{'Node-action'} ne 'delete'; | |
my $module = "Win32::$1"; | |
my %ver; | |
while ($$contentRef =~ m/^(\d[.\d]+)\b.*\n((?:^[ \t].*\n)+)/gm) { | |
$ver{$1} = $2 unless $seenver{$module}{$1}++; | |
} | |
my $log = ''; | |
my $ver; | |
$log .= expand($ver{$ver = $_}) for sort { $a <=> $b } keys %ver;; | |
$log =~ s/^ +(?=[^-\s])/ /mg; | |
$log =~ s/^ +- +/ /mg; | |
$log =~ s{\s+\(courtesy\s+Chris\s+Madsen\)} | |
{\n (courtesy Christopher J. Madsen)}g; | |
$log =~ s/\s*\z/\n/; | |
$changelog{"$module $ver"} = $log; | |
} # end foreach $c in @$changes | |
foreach my $module (sort keys %changelog) { | |
$revprop->{'svn:log'} .= "\n$module\n$changelog{$module}"; | |
} | |
} | |
elsif ($rev == 224) { | |
push @$changes, map { [ { qw(Node-action delete Node-path), $_ } ] } | |
qw(ChangeNotify Event IPC Mutex Semaphore ppport.h); | |
} | |
elsif ($rev == 281) { $tag = '1.06' } | |
elsif ($rev == 285) { $tag = '1.07' } | |
# print "#begin r$rev\n"; | |
print "commit refs/heads/master\n"; | |
print "mark :$rev\n"; | |
print "committer $user $time\n"; | |
print_data(\$revprop->{'svn:log'}); | |
print "from refs/heads/master^0\n" unless $numCommits++; | |
foreach my $c (@$changes) { | |
my ($header, $prop, $contentRef) = @$c; | |
my $path = $header->{'Node-path'}; | |
if ($header->{'Node-action'} eq 'delete') { | |
print "D $path\n"; | |
} else { | |
my $mode = ($prop->{'svn:executable'} ? '755' : '644'); | |
print "M $mode inline $path\n"; | |
print_data($contentRef); | |
} | |
} # end foreach $c in @$changes | |
print "\n"; | |
print "#end r$rev\n"; | |
#FIXME libwin32: | |
if ($tag) { | |
my ($tagver, $module); | |
if ($tag =~ /libwin32-(.+)/) { | |
$module = 'libwin32'; $tagver = $1; | |
} else { | |
$module = 'Win32-IPC'; $tagver = $tag; | |
} | |
my $tagmsg = "Tagged $module $tagver (r$rev)"; | |
print "tag $tag\n"; | |
print "from :$rev\n"; | |
print "tagger $user $time\n"; | |
print_data(\$tagmsg); | |
} # end if $tag | |
} # end processRev | |
sub print_data | |
{ | |
my $contentRef = shift; | |
$contentRef = \'' unless defined $$contentRef; | |
printf "data %d\n", length $$contentRef; | |
print $$contentRef; | |
print "\n"; | |
} # end print_data |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment