Created
July 22, 2009 08:03
-
-
Save jrk/151872 to your computer and use it in GitHub Desktop.
An improved replacement for scp, with resume and advanced compression support. By Andrei Alexandrescu.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env perl | |
# Via: http://erdani.org/code/scpi.html | |
################################################################################ | |
## Copyright (c) 2006 by Andrei Alexandrescu | |
## Permission to use, copy, modify, distribute and sell this software for any | |
## purpose is hereby granted without fee, provided that the above copyright | |
## notice appear in all copies and that both that copyright notice and this | |
## permission notice appear in supporting documentation. | |
## The author makes no representations about the | |
## suitability of this software for any purpose. It is provided "as is" | |
## without express or implied warranty. | |
################################################################################ | |
use strict; | |
use warnings; | |
use Pod::Usage; | |
use Getopt::Long qw(:config no_ignore_case bundling); | |
use IPC::Open2; | |
use File::Basename; | |
use Time::HiRes qw(time sleep); | |
sub CopySpec($$); | |
sub OpenCmdChannel($); | |
sub SysReadLine($); | |
sub FormatSize($;$); | |
sub CopyFile($$$$$); | |
sub ReadAll($\$); | |
sub ReadChunk($\$); | |
sub yap(@); | |
my @sshOptions = ("-e", "none"); | |
my $batchMode = 0; | |
my $ssh = "ssh"; | |
my $limitBandwidth = 2_000_000_000; | |
my $preserveTime = 0; | |
my $quiet = 0; | |
my $recursive = 0; | |
my $verbose = 0; | |
my $restMode = 0; | |
my $compress = ""; | |
my $expand = "cat"; | |
my ($srcRdr, $srcWtr, $dstRdr, $dstWtr); | |
my $sharedCode = q{ | |
my $bufSize = 32 * 1024; | |
sub SysReadLine($) { | |
my ($h) = @_; | |
my $buf = ""; | |
while (sysread($h, my $c, 1)) { | |
$buf .= $c; | |
last if ($c eq "\n"); | |
} | |
$buf; | |
} | |
sub ReadChunk($\$) { | |
my ($h, $buf) = @_; | |
$$buf = ""; | |
my $len = int(SysReadLine($h)); | |
while ((my $diff = $len - length($$buf)) > 0) { | |
sysread($h, $$buf, $diff, length($$buf)) or die "Cannot read from stream"; | |
} | |
$len; | |
} | |
sub WriteChunk($$) { | |
my $h = $_[0]; | |
my $buf = \$_[1]; | |
my $len = length($$buf); | |
syswrite($h, "$len\n") && syswrite($h, $$buf) == $len | |
or die "Cannot write to stream"; | |
$len; | |
} | |
}; | |
eval($sharedCode); | |
die $@ if $@; | |
sub AddSSHOption($$) { push @sshOptions, "-$_[0]"; } | |
sub AddSSHOption2($$) { push @sshOptions, "-$_[0]", "$_[1]"; } | |
GetOptions( | |
"h|help|?" => sub { pod2usage(1); }, | |
"1" => \&AddSSHOption, | |
"2" => \&AddSSHOption, | |
"4" => \&AddSSHOption, | |
"6" => \&AddSSHOption, | |
"B" => \$batchMode, | |
"C" => \&AddSSHOption, | |
"c=s" => \&AddSSHOption2, | |
"F=s" => \&AddSSHOption2, | |
"i=s" => \&AddSSHOption2, | |
"l=i" => sub { $limitBandwidth = 1024 * $_[1] }, | |
"o=s" => \&AddSSHOption2, | |
"P=i" => sub { AddSSHOption2("p", $_[1]); }, | |
"p" => \$preserveTime, | |
"q" => \$quiet, | |
"r" => \$recursive, | |
"S=s" => \$ssh, | |
"v" => \&AddSSHOption, | |
"R" => \$restMode, | |
"b" => sub { $compress = "|bzip2"; $expand = "bunzip2" }, | |
"g" => sub { $compress = "|gzip"; $expand = "gunzip" }, | |
"n" => sub { $compress = ""; $expand = "cat" }, | |
"d" => sub { ++$verbose }, | |
); | |
# main code starts here; scoped for the sake of symbol locality | |
{ | |
pod2usage(1) if (@ARGV < 2); | |
my $dst = pop(@ARGV); | |
my ($dstHost, $dstSpec) = ($dst =~ qr{(?:(.+?)\:)?(.*)}); | |
$dstHost = '' if !$dstHost; | |
$dstSpec = '' if !$dstSpec; | |
for my $src (@ARGV) { | |
# Parse source | |
my ($srcHost, $srcSpec) = ($src =~ qr{(?:(.+?)\:)?(.*)}); | |
$srcHost = '' if !$srcHost; | |
yap("Source host: `$srcHost', target host: `$dstHost'\n"); | |
# Start communicating with the two hosts | |
($srcRdr, $srcWtr) = OpenCmdChannel($srcHost); | |
($dstRdr, $dstWtr) = OpenCmdChannel($dstHost); | |
# Here's the meat | |
CopySpec($srcSpec, $dstSpec); | |
# Cleanuppa | |
close($srcWtr) or warn("Cannot close source channel"); | |
close($dstWtr) or warn("Cannot close target channel"); | |
} | |
} | |
################################################################################ | |
sub CopySpec($$) { | |
my ($srcSpec, $dstSpec) = @_; | |
# support spaces in destination, while allowing other metachars | |
$srcSpec =~ s/(\s)/\\$1/g; | |
yap("Source filespec: `$srcSpec', target filespec: `$dstSpec'\n"); | |
#my $fileInfoFormat = q{"q{%n}, %s, q{%A}, "}; | |
print $srcWtr qq{<stat --format="q{%n}, %s, q{%A}, "}, | |
qq{ -- $srcSpec 2>/dev/null\n}; | |
ReadAll($srcRdr, my $buf); | |
yap("Source names, sizes, attributes: $buf\n"); | |
my @srcFiles = eval("($buf)"); | |
for (my $i = 0; $i < @srcFiles; $i += 3) { | |
my $srcName = $srcFiles[$i]; | |
my $srcSize = $srcFiles[$i + 1]; | |
my $srcAttr = $srcFiles[$i + 2]; | |
if ($srcAttr =~ /^d/) { | |
if (!$recursive) { | |
print "Ignoring directory `$srcName'\n"; | |
next; | |
} | |
yap("Recursive copy\n"); | |
$dstSpec =~ s{/$}{}; | |
my $targetDir = "$dstSpec/" . basename($srcName) . '/'; | |
print $dstWtr qq{<mkdir --parents '$targetDir'; echo \$?\n}; | |
ReadAll($dstRdr, $buf); | |
$buf = int($buf); | |
if ($buf) { | |
warn "Cannot create directory `$targetDir', error code $buf; skipping `$srcName'"; | |
next; | |
} | |
CopySpec($srcName . '/*', $targetDir); | |
next; | |
} | |
my $dstName = $dstSpec; | |
if ($dstName eq "" || $dstName =~ qr{/$} || $dstName =~ qr{(^|/)\.?\.$}) { | |
# it's a directory | |
$dstName =~ s{/?$}{/}; | |
$dstName .= basename($srcName); | |
} | |
my $dstSize = 0; | |
if ($restMode) { | |
print $dstWtr qq{<stat --format='%s' -- '$dstName' 2>/dev/null\n}; | |
ReadAll($dstRdr, $buf); | |
yap("Size of destination file `$dstName': $buf\n"); | |
$dstSize = int($buf) if ($buf); | |
} | |
my $diff = $srcSize - $dstSize; | |
#next if ($restMode && $diff == 0); | |
yap("Copying '$srcName' to '$dstName' from byte $dstSize to byte $srcSize ", | |
"(total: $diff bytes)\n"); | |
if ($restMode && $diff < 0) { | |
print STDERR "Destination '$dstName' exists but is larger than ", | |
"source '$srcName', give up on -R\n"; | |
next; | |
} | |
CopyFile($srcName, $srcSize, $dstSize, $dstName, $restMode); | |
} | |
} | |
sub yap(@) { | |
return if (!$verbose); | |
print join('', @_); | |
} | |
sub OpenCmdChannel($) { | |
my ($host) = @_; | |
# The command above should have no single quote, sigh | |
my $perlCmd = $sharedCode . q{ | |
use strict; | |
use warnings; | |
use bytes; | |
my $buf; | |
$| = 1; | |
binmode(STDIN) or die "Cannot binmode stdin"; | |
binmode(STDOUT) or die "Cannot binmode stdout"; | |
while (my $ln = SysReadLine(\*STDIN)) { | |
my $stream; | |
if ($ln =~ /^</) { | |
$ln = substr($ln, 1); | |
open($stream, "$ln|") or die "Cannot open $ln: $!"; | |
binmode($stream) or die "Cannot binmode stream"; | |
while (sysread($stream, $buf, $bufSize)) { | |
WriteChunk(\*STDOUT, $buf); | |
} | |
WriteChunk(\*STDOUT, ""); | |
} elsif ($ln =~ /^>/) { | |
$ln = substr($ln, 1); | |
open($stream, "|$ln") or die "Cannot open $ln: $!"; | |
binmode($stream) or die "Cannot binmode stream"; | |
while (ReadChunk(\*STDIN, $buf)) { | |
syswrite($stream, $buf) == length($buf) or die "Cannot write!"; | |
} | |
} else { | |
die "Unrecognized command line: $ln"; | |
} | |
close($stream); | |
} | |
}; | |
$perlCmd =~ s/\s+/ /g; | |
my @pipe = $host | |
# open via ssh | |
? ($ssh, @sshOptions, $host, "perl -e '$perlCmd'") | |
# local pipe | |
: ("zsh", "-c", "perl -e '$perlCmd'"); | |
#"zsh", "-c", "perl -e '$perlCmd'" | |
open2(my $rdr, my $wtr, @pipe) or die "Cannot open pipe `@pipe': $!"; | |
binmode($rdr) && binmode($wtr) or die "Cannot binmode channel"; | |
print $wtr qq{<echo ping\n}; | |
ReadAll($rdr, my $response); | |
die "Expected `ping', got `$response' in command `@pipe'" | |
if $response ne "ping\n"; | |
($rdr, $wtr); | |
} | |
sub CopyFile($$$$$) { | |
my ($src, $srcSize, $srcOffset, $dst, $append) = @_; | |
my $redir = $append ? '>>' : '>'; | |
# Send commands to source | |
my $payload = $srcSize - $srcOffset; | |
my $nTh = $srcOffset + 1; | |
print $srcWtr qq{<tail --bytes=+$nTh -- $src $compress\n} | |
or die "Cannot send commands to source"; | |
# Send commands to target | |
print $dstWtr qq{>$expand $redir $dst\n} | |
or die "Cannot send commands to destination"; | |
my $speed = 0; | |
my $eta; | |
my $started = time(); | |
my $bytesSinceLastTime = 0; | |
my $buf; | |
my $accum = $srcOffset; | |
my $moreData = 1; | |
# ok, now do the deed | |
while ($moreData) { | |
$moreData = ReadChunk($srcRdr, $buf); | |
WriteChunk($dstWtr, $buf) == length($buf) or die "Cannot write: $!\n"; | |
$accum += length($buf); | |
$bytesSinceLastTime += length($buf); | |
my $newTime = time(); | |
my $delta = $newTime - $started; | |
next if ($delta < 1 && $moreData); | |
# Compute speed and ETA | |
my $instaSpeed = $bytesSinceLastTime / $delta; | |
$speed = $speed == 0 | |
? $instaSpeed | |
: (19 * $speed + $instaSpeed) / 20; | |
# Throttle bandwidth | |
if ($speed > $limitBandwidth) { | |
my $sleep = $bytesSinceLastTime / $limitBandwidth - $delta; | |
if ($sleep > 0) { | |
sleep($sleep); | |
$speed = $limitBandwidth; | |
} | |
} | |
$started = $newTime; | |
$bytesSinceLastTime = 0; | |
# | |
next if ($quiet); | |
$eta = $speed ? ($srcSize - $accum) / $speed : 0; # seconds | |
# Print the stuff | |
my $p = sprintf("%.2f", $srcSize ? $accum * 100 / $srcSize : 100); | |
my $hrs = int($eta / 3600.); | |
my $mins = int(($eta - 3600 * $hrs) / 60); | |
my $rhs = sprintf(" %.2f%% %s %.0fKB/s %02u:%02u ETA", | |
$p, FormatSize($accum), $speed / 1024., $hrs, $mins); | |
my $fsLen = 80 - length($rhs) - 2; | |
printf STDERR "%-*.*s %s\r", $fsLen, $fsLen, basename($src), $rhs; | |
} | |
print STDERR "\n"; | |
} | |
sub FormatSize($;$) { | |
my ($size, $suffix) = @_; | |
$suffix = $suffix || "B"; | |
if ($size < 1000) { | |
return sprintf("%.5s$suffix", $size); | |
} | |
$size /= 1024.; | |
return FormatSize($size, | |
$suffix eq "B" ? "K" : $suffix eq "K" ? "M" : $suffix eq "M" ? "G" : "T"); | |
} | |
sub ReadAll($\$) { | |
my ($h, $r) = @_; | |
$$r = ""; | |
my $buf; | |
while (ReadChunk($h, $buf)) { | |
$$r .= $buf; | |
} | |
length($$r); | |
} | |
__END__ | |
Generate html with: | |
$ pod2html scpi.pl >scpi.html --css="/style.css" --title=scpi | |
=head1 SCPI: B<S>ECURE B<C>OB<P>Y B<I>MPROVED | |
scpi - scp improved with restartable downloads and advanced compression. | |
=begin html | |
<style> | |
tt, pre, code { | |
font-family:"Courier"; | |
font-size:105%; | |
} | |
</style> | |
<script type="text/javascript"><!-- | |
google_ad_client = "pub-7042296581632428"; | |
google_ad_width = 728; | |
google_ad_height = 90; | |
google_ad_format = "728x90_as"; | |
google_ad_type = "text_image"; | |
google_ad_channel =""; | |
google_color_border = "336699"; | |
google_color_bg = "C1CCCD"; | |
google_color_link = "0000FF"; | |
google_color_url = "008000"; | |
google_color_text = "000000"; | |
//--></script> | |
<script type="text/javascript" | |
src="http://pagead2.googlesyndication.com/pagead/show_ads.js"> | |
</script> | |
=end html | |
=head1 SYNOPSIS | |
C<scpi.pl [options] [[user@]host:]src [...] [[user@]host:]dest> | |
=head1 DESCRIPTION | |
C<scpi> (C<scp> improved) is intended to be a drop-in replacement for the | |
C<scp> utility that comes together with any Secure Shell (ssh) implementation. | |
C<scpi> implements the much-needed restartable downloads that C<scp> famously | |
doesn't support. As a couple of perks, C<scpi> implements a few other | |
niceties such as optional use of advanced compression (C<gzip>/C<bzip2>) | |
and copying multiple files to one destination directory in one shot. | |
Options: | |
-h, -help brief help message | |
-1 passed to ssh | |
-1 passed to ssh | |
-4 passed to ssh | |
-6 passed to ssh | |
-B batch usage (not implemented) | |
-C passed to ssh | |
-c str passed to ssh | |
-F str passed to ssh | |
-i str passed to ssh | |
-l limit bandwidth used | |
-o str passed to ssh | |
-P int passed to ssh | |
-p preserve file times (not implemented) | |
-q don't display progress | |
-r recurse | |
-S str choose the ssh to use | |
-v passed to ssh | |
-R restart mode (resume broken downloads) | |
-n use no compression (default) | |
-b use bzip2 for compressing the data stream | |
(%, speed, and ETA will be inaccurate) | |
-g use gzip for compressing the data stream | |
(%, speed, and ETA will be inaccurate) | |
-d debug (extra output) | |
C<scpi> is implemented in Perl, and requires Perl on the machine it's running, | |
as well as on the source and/or destination hosts. Also, C<scpi> uses the C<ssh> | |
executable on the client machine (but not C<scp>). | |
The main target audience for C<scpi> consists of people who enjoy C<scp>'s | |
security but are unhappy with C<scp>'s lack of a "restart" capability---if | |
the connection drops in the middle of a copy, C<scp> forces redoing the | |
transfer all over again. C<scpi> implements robust restarts. When invoked with | |
the -R flag, C<scpi> assumes that the existing target file, if any, is a | |
valid fragment left from a previous C<scpi> (or C<scp> for that matter) | |
execution, and that the source file hasn't changed in the meantime. | |
It follows that C<scpi> is best at transferring large files. To better support | |
that ability, C<scp> supports C<gzip> or C<bzip2> compression of the data | |
stream, which can be a big win in transmission speed at a low computational | |
cost, particularly over an agglomerated LAN. (You can also pass C<scpi> the | |
-C flag, which C<scpi> passes along to C<ssh> instructing it to compress the | |
stream itself (but not the source and destination files). Generally, the | |
newer C<bzip2> algorithm is expected to be better than either C<gzip> or | |
C<ssh>'s native compression.) | |
=head1 DOWNLOAD | |
C<scpi.pl> is available from http://erdani.org/code/scpi.pl. Save the file to a | |
local Unix (including Cygwin) directory and use it as you'd use C<scp>. | |
=head1 IMPLEMENTATION | |
You don't need to read this unless you plan on changing scpi yourself, or you | |
enjoy cool hacks. | |
In essence, scpi starts ssh through a bidirectional pipe and implements its own | |
little data transfer protocol on top of that pipe. The protocol simply | |
prescribes that data transfers take place in "chunks". Each chunk consists of a | |
number N (in decimal ASCII) followed by a newline, followed by N bytes of data. | |
By convention, if N is zero, that means an entire transfer---in our case, a file | |
of arbitrary size---has ended. Transmitting a large file in chunks has the | |
advantage that the size of the file mustn't be known in advance, advantage | |
exploited in scpi's implementation. | |
The need for such a protocol arises from the requirement of reusing one | |
bidirectional stream (namely ssh's stdin and stdout) for multiple binary | |
transfers. If only one blob of data were to be transferred, closing the | |
stream would naturally signify the end of the data. However, a more | |
sophisticated communication demands a reliable way to detect the size | |
of a data block. | |
With this little protocol in place, scpi implements and uses internally a | |
very simple "command line" interface---a way to execute commands on a | |
remote system and either feed data | |
to their standard input or read their standard output. Simplicity and reuse of | |
good old utilities (such as C<tail> and C<cat>) make scpi robust and | |
easy to debug. | |
=head1 AUTHOR | |
Written by Andrei Alexandrescu. Mosey to http://erdani.org/email to contact | |
the author. | |
=head1 KNOWN BUGS | |
As of this time, the -p (preserve file time) and -B (batch usage) are ignored, | |
the first because the author didn't need it yet, and the second because the | |
author didn't have the time to look into what it really means. | |
C<scpi> has not been tested with file names containing a newline, and it | |
almost sure won't work with them. Quotes and other special characters inside | |
file names are likely to cause trouble as well. | |
The implementation of bandwidth limitation hasn't been really thought through. | |
When the option -l is in effect, there is some bandwith throttling, but perhaps | |
the regularization algorithm could be vastly enhanced. | |
When using -b or -g (compression with C<bzip2> or C<gzip>), the percent | |
transferred, speed, and estimated time of arrival (ETA) will likely be | |
inaccurate. This is because they reflect the amount of compressed, not "real", | |
data transferred. On the bright side, at the end of each file's transfer, the | |
percent complete will show the effective stream compression achieved on that | |
file. | |
=head1 SEE ALSO | |
The documentation for scp shows more details on the options. B<scpi> emulates | |
scp's options (and, just like scp, passes some of them to ssh). | |
=head1 COPYRIGHT | |
Copyright (c) 2006 Andrei Alexandrescu. | |
This is free software; see the source for copying conditions. There is NO | |
warranty; not even for MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. | |
=cut |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment