Skip to content

Instantly share code, notes, and snippets.

@madsen
Created December 13, 2010 00:59
Show Gist options
  • Save madsen/738518 to your computer and use it in GitHub Desktop.
Save madsen/738518 to your computer and use it in GitHub Desktop.
Create a Git repo from a Perl libwin32 Subversion dump file
#! /usr/bin/perl
#---------------------------------------------------------------------
# git-from-svndump-libwin32.pl
# Copyright 2010 Christopher J. Madsen
#
# Create a Git repo from a libwin32 Subversion dump file
#---------------------------------------------------------------------
use strict;
use warnings;
use autodie ':io';
use Date::Format 'time2str';
use Date::Parse 'str2time';
use Digest::MD5 'md5_hex';
use String::RewritePrefix ();
use Text::Tabs 'expand';
use Time::Local 'timegm';
#=====================================================================
my $authorFile = "$ENV{HOME}/.gitauthors";
my %user;
my %pathXlate = (
'trunk/vc/GUEmap/' => '',
'trunk/ppport.h' => 'ppport.h',
'trunk/Makefile.PL' => 'Makefile.PL',
'trunk/README' => 'README',
'trunk/Changes' => 'Changes',
'trunk/ChangeNotify/' => 'ChangeNotify/',
'trunk/Event/' => 'Event/',
'trunk/IPC/' => 'IPC/',
'trunk/Mutex/' => 'Mutex/',
'trunk/Semaphore/' => 'Semaphore/',
'trunk/Win32-IPC/' => '', # r198 & r224 directories changed
);
my %release = (
'libwin32-0.02' => 'Sat, 03 May 1997 16:24:47 GMT',
'libwin32-0.03' => 'Thu, 08 May 1997 01:35:34 GMT',
'libwin32-0.04' => 'Sun, 11 May 1997 08:36:00 GMT',
'libwin32-0.05' => 'Sun, 11 May 1997 21:26:39 GMT',
'libwin32-0.06' => 'Tue, 13 May 1997 07:57:05 GMT',
'libwin32-0.07' => 'Wed, 28 May 1997 07:20:19 GMT',
'libwin32-0.08' => 'Sat, 26 Jul 1997 23:48:09 GMT',
'libwin32-0.09' => 'Mon, 15 Dec 1997 06:47:56 GMT',
'libwin32-0.10' => 'Sat, 07 Feb 1998 04:19:46 GMT',
'libwin32-0.12' => 'Tue, 12 May 1998 17:32:48 GMT',
'libwin32-0.13' => 'Fri, 18 Sep 1998 23:14:38 GMT',
'libwin32-0.14' => 'Sun, 15 Nov 1998 05:23:00 GMT',
'libwin32-0.15' => 'Sun, 26 Sep 1999 04:21:18 GMT',
'libwin32-0.151' => 'Mon, 27 Sep 1999 00:25:21 GMT',
'libwin32-0.16' => 'Tue, 23 May 2000 06:16:26 GMT',
'libwin32-0.171' => 'Fri, 16 Feb 2001 00:00:00 GMT', # ESTIMATED
'libwin32-0.18' => 'Tue, 13 Nov 2001 07:57:48 GMT',
'libwin32-0.191' => 'Tue, 09 Jul 2002 01:11:21 GMT',
'libwin32-0.24' => 'Tue, 15 Feb 2005 07:31:38 GMT',
'libwin32-0.25' => 'Sat, 17 Sep 2005 19:50:59 GMT',
'libwin32-0.26' => 'Sat, 17 Sep 2005 20:42:53 GMT',
'libwin32-0.27' => 'Wed, 07 Mar 2007 20:12:57 GMT',
'libwin32-0.28' => 'Sat, 15 Sep 2007 00:53:15 GMT',
);
my $url = 'file:///var/svn/libwin32'; # FIXME
my $prefixRE = join('|', map { quotemeta $_ } keys %pathXlate);
$prefixRE = qr/^(?:$prefixRE)/;
# '<svn username> = real-name <email address>' mapping based on git-svnimport:
sub load_authors {
open(my $authors, '<', $authorFile);
while (<$authors>) {
next unless /^(.+?|\(no author\))\s*=\s*(.+?)\s*<(.+)>\s*$/;
my ($user, $name, $email) = ($1, $2, $3);
$user{$user} = "$name <$email>";
}
close $authors;
} # end load_authors
#---------------------------------------------------------------------
sub want_node
{
my ($revNode, $header, $prop, $contentRef) = @_;
my $rev = $revNode->[0]{'Revision-number'} || 0;
return unless $header->{'Node-path'} and $header->{'Node-path'} =~ $prefixRE;
return if $rev == 169 or $rev == 194; # FIXME Unnecessary changes
return if $rev > 223 and $header->{'Node-path'} =~ m!^trunk/[^/]*$!; # FIXME
return if $header->{'Node-action'} eq 'change'
and not defined $header->{'Text-content-length'};
return if ($header->{'Node-kind'} || '') eq 'dir'
and $header->{'Node-action'} eq 'add';
($header->{'Node-path'}) = String::RewritePrefix->rewrite(
\%pathXlate, $header->{'Node-path'}
);
if ($header->{'Node-copyfrom-path'} and not $$contentRef) {
$$contentRef = `svn cat '$url/$header->{'Node-copyfrom-path'}\@$header->{'Node-copyfrom-rev'}'`;
}
$prop->{'svn:keywords'} ||= 'Date Id Rev'; # FIXME
if (my $kw = $prop->{'svn:keywords'} and $$contentRef) {
$kw =~ s/\bRev\b/Rev Revision/;
$kw =~ s/\s+/|/g;
my $date = time2str('%Y-%m-%d %T %z', $revNode->[1]{'svn:timestamp'});
my %value = (
Rev => $rev,
Revision => $rev,
Date => $date,
Id => "$header->{'Node-path'} $rev $date $revNode->[1]{'svn:author'}",
);
$$contentRef =~ s/\$($kw)\$/\$$1: $value{$1} \$/g;
} # end if svn:keywords
return 1;
} # end want_node
#---------------------------------------------------------------------
sub parseProps
{
my $content = shift;
my %prop;
pos($content) = 0;
while ($content =~ /\GK (\d+)\n/gc) {
my $key = substr($content, pos($content), $1);
pos($content) += $1 + 1;
$content =~ /\GV (\d+)\n/gc or die;
$prop{$key} = substr($content, pos($content), $1);
pos($content) += $1 + 1;
}
$content =~ /\GPROPS-END\n\z/gc or die;
\%prop;
} # end parseProps
#---------------------------------------------------------------------
load_authors();
my @changes;
my $rev;
my $header = {};
binmode STDOUT, ':raw';
for my $inFile (@ARGV) {
open(my $in, '<:raw', $inFile);
while (<$in>) {
if (/^$/) {
next unless keys %$header;
my $prop;
if ($header->{'Prop-content-length'}) {
read($in, $prop, $header->{'Prop-content-length'});
$prop = parseProps($prop);
} else {
$prop = {};
}
my $content;
if ($header->{'Text-content-length'}) {
read($in, $content, $header->{'Text-content-length'});
warn "$inFile:$.: Bad MD5\n"
unless $header->{'Text-content-md5'} eq md5_hex($content);
}
my $wanted;
if (defined $header->{'Revision-number'}) {
processRev(\@changes) if @changes;
@changes = ();
$rev = $header->{'Revision-number'};
my @date = $prop->{'svn:date'} =~
/^(\d+)-(\d+)-(\d+)T(\d+):(\d+):(\d+)\.\d+Z$/ or die;
--$date[1];
$prop->{'svn:timestamp'} = timegm(reverse @date);
$wanted = 1;
} else {
$wanted = want_node($changes[0], $header, $prop, \$content);
}
push @changes, [ $header, $prop, \$content ] if $wanted;
$header = {};
} # end if blank line
else {
/^(\S+):\s*(.+)/ or die "Bad <$_>";
$header->{$1} = $2;
} # end else not blank
} # end while $in
} # end for each file
processRev(\@changes) if @changes;
my $numCommits = 0;
my %seenver; # FIXME
sub processRev
{
my ($changes) = @_;
my $revisionInfo = shift @$changes;
my $rev = $revisionInfo->[0]{'Revision-number'};
die unless defined $rev;
# Skip rev 0:
unless ($rev) {
die if @$changes;
return;
}
# Skip revisions that didn't change anything:
return unless @$changes;
my $revprop = $revisionInfo->[1];
my $user = $user{$revprop->{'svn:author'}}
or die "Unknown author for r$rev: $revprop->{'svn:author'}";
my $time = $revprop->{'svn:timestamp'};
$time .= time2str(' %z', $time);
#FIXME libwin32:
$revprop->{'svn:log'} =~ s{Moved the Win32::IPC subclasses under Win32-IPC}
{Win32-IPC is now a separate distribution};
my $tag;
if ($rev < 223) {
$revprop->{'svn:log'} =~ /(libwin32-0\.\d+)\b/
or die $revprop->{'svn:log'};
$tag = $1;
die unless $release{$tag};
$revprop->{'svn:log'} = "Imported $tag\n";
$time = str2time($release{$tag}) . ' +0000';
my %changelog;
foreach my $c (@$changes) {
my ($header, $prop, $contentRef) = @$c;
next unless $header->{'Node-path'} =~ m!^(\w+)/Changes$!
and $header->{'Node-action'} ne 'delete';
my $module = "Win32::$1";
my %ver;
while ($$contentRef =~ m/^(\d[.\d]+)\b.*\n((?:^[ \t].*\n)+)/gm) {
$ver{$1} = $2 unless $seenver{$module}{$1}++;
}
my $log = '';
my $ver;
$log .= expand($ver{$ver = $_}) for sort { $a <=> $b } keys %ver;;
$log =~ s/^ +(?=[^-\s])/ /mg;
$log =~ s/^ +- +/ /mg;
$log =~ s{\s+\(courtesy\s+Chris\s+Madsen\)}
{\n (courtesy Christopher J. Madsen)}g;
$log =~ s/\s*\z/\n/;
$changelog{"$module $ver"} = $log;
} # end foreach $c in @$changes
foreach my $module (sort keys %changelog) {
$revprop->{'svn:log'} .= "\n$module\n$changelog{$module}";
}
}
elsif ($rev == 224) {
push @$changes, map { [ { qw(Node-action delete Node-path), $_ } ] }
qw(ChangeNotify Event IPC Mutex Semaphore ppport.h);
}
elsif ($rev == 281) { $tag = '1.06' }
elsif ($rev == 285) { $tag = '1.07' }
# print "#begin r$rev\n";
print "commit refs/heads/master\n";
print "mark :$rev\n";
print "committer $user $time\n";
print_data(\$revprop->{'svn:log'});
print "from refs/heads/master^0\n" unless $numCommits++;
foreach my $c (@$changes) {
my ($header, $prop, $contentRef) = @$c;
my $path = $header->{'Node-path'};
if ($header->{'Node-action'} eq 'delete') {
print "D $path\n";
} else {
my $mode = ($prop->{'svn:executable'} ? '755' : '644');
print "M $mode inline $path\n";
print_data($contentRef);
}
} # end foreach $c in @$changes
print "\n";
print "#end r$rev\n";
#FIXME libwin32:
if ($tag) {
my ($tagver, $module);
if ($tag =~ /libwin32-(.+)/) {
$module = 'libwin32'; $tagver = $1;
} else {
$module = 'Win32-IPC'; $tagver = $tag;
}
my $tagmsg = "Tagged $module $tagver (r$rev)";
print "tag $tag\n";
print "from :$rev\n";
print "tagger $user $time\n";
print_data(\$tagmsg);
} # end if $tag
} # end processRev
sub print_data
{
my $contentRef = shift;
$contentRef = \'' unless defined $$contentRef;
printf "data %d\n", length $$contentRef;
print $$contentRef;
print "\n";
} # end print_data
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment