Skip to content

Instantly share code, notes, and snippets.

@daler
Created July 23, 2014 15:45
Show Gist options
  • Save daler/0ef88c38c7987c53937c to your computer and use it in GitHub Desktop.
Save daler/0ef88c38c7987c53937c to your computer and use it in GitHub Desktop.
Sync files (downloading only) over FTP with explicit SSL/TLS
#!/usr/bin/env python
import os
import sys
import platform
import argparse
import ftplib
import time
import netrc
from fnmatch import fnmatch
usage = """
Download a subdirectory from an FTP site over SSL/TLS.
If you have a .netrc file (at ~/.netrc), you don't have to specify username or
password at the command line.
Incomplete downloads are prefixed by a "@"; in verbose mode a "." is printed
for every 100 MB downloaded.
"""
epilog = "Ryan Dale ([email protected]), July 2014"
# This script is a heavily-modified version of the ftpmirror.py script that
# ships with Python in the Tools/scripts directory.
#
# Differences from ftpmirror.py:
#
# - Don't delete from server
# - Better command line interface (specify skip patterns, only show
# a listing)
# - Use FTPES instead of plain FTP
# - Better architecture (reusable functions; separate line-parsing function;
# auto-detect Linux or Mac)
# - LoggingFile object uses a more reasonable "." as symbol, and only prints
# one every 100MB
#
class ShortLineError(Exception):
pass
def connection(url, user, pwd, remote_dir='/'):
ftps = ftplib.FTP_TLS(url)
ftps.login(user, pwd)
ftps.prot_p()
ftps.cwd(remote_dir)
return ftps
def makedir(pathname):
if os.path.isdir(pathname):
return
dirname = os.path.dirname(pathname)
if dirname:
makedir(dirname)
os.mkdir(pathname, 0777)
class LoggingFile:
"""
Prints a symbol ('.' by default) every `blocksize` bytes.
"""
def __init__(self, fp, blocksize, outfp, symbol='.'):
self.fp = fp
self.bytes = 0
self.hashes = 0
self.blocksize = blocksize
self.outfp = outfp
self.symbol = symbol
def write(self, data):
self.bytes = self.bytes + len(data)
hashes = int(self.bytes) / self.blocksize
while hashes > self.hashes:
self.outfp.write(self.symbol)
self.outfp.flush()
self.hashes = self.hashes + 1
self.fp.write(data)
def close(self):
self.outfp.write('\n')
def writedict(dict, filename):
dir, fname = os.path.split(filename)
tempname = os.path.join(dir, '@' + fname)
backup = os.path.join(dir, fname + '~')
try:
os.unlink(backup)
except os.error:
pass
fp = open(tempname, 'w')
fp.write('{\n')
for key, value in dict.items():
fp.write('%r: %r,\n' % (key, value))
fp.write('}\n')
fp.close()
try:
os.rename(filename, backup)
except os.error:
pass
os.rename(tempname, filename)
def parse_line(line):
"""
Parse a single line from RETRLINES LIST
"""
mac = not ('Linux' in platform.system())
if mac:
# Mac listing has just filenames;
# trailing / means subdirectory
filename = line.strip()
mode = '-'
if filename[-1:] == '/':
filename = filename[:-1]
mode = 'd'
infostuff = ''
else:
# Parse, assuming a UNIX listing
words = line.split(None, 8)
if len(words) < 6:
raise ShortLineError('line "%s" too short' % line)
filename = words[-1].lstrip()
i = filename.find(" -> ")
if i >= 0:
# words[0] had better start with 'l'...
if verbose > 1:
print 'Found symbolic link %r' % (filename,)
linkto = filename[i+4:]
filename = filename[:i]
infostuff = words[-5:-1]
mode = words[0]
return mode, infostuff, filename
def mirrorsubdir(f, localdir, verbose=1, skip_patterns=None,
interactive=False):
if skip_patterns is None:
skip_patterns = []
skippats = ['.', '..', '.mirrorinfo'] + skip_patterns
pwd = f.pwd()
# Set up local dir
if localdir and not os.path.isdir(localdir):
if verbose:
print 'Creating local directory', repr(localdir)
try:
makedir(localdir)
except os.error, msg:
print "Failed to establish local directory", repr(localdir)
return
infofilename = os.path.join(localdir, '.mirrorinfo')
try:
text = open(infofilename, 'r').read()
except IOError, msg:
text = '{}'
try:
info = eval(text)
except (SyntaxError, NameError):
print 'Bad mirror info in', repr(infofilename)
info = {}
# keep track of subdirs
subdirs = []
# text from RETRLINES will be parsed into this list.
listing = []
if verbose:
print 'Listing remote directory %r...' % (pwd,)
f.retrlines('LIST', listing.append)
filesfound = []
for line in listing:
mode, infostuff, filename = parse_line(line)
if verbose > 1:
print '-->', repr(line)
skip = 0
for pat in skippats:
if fnmatch(filename, pat):
if verbose > 1:
print 'Skip pattern', repr(pat),
print 'matches', repr(filename)
skip = 1
break
if skip:
continue
if mode[0] == 'd':
if verbose > 1:
print 'Remembering subdirectory', repr(filename)
subdirs.append(filename)
continue
filesfound.append(filename)
if filename in info and info[filename] == infostuff:
if verbose > 1:
print 'Already have this version of', repr(filename)
continue
fullname = os.path.join(localdir, filename)
tempname = os.path.join(localdir, '@'+filename)
if interactive:
doit = askabout('file', filename, pwd)
if not doit:
if filename in info:
info[filename] = 'Not retrieved'
continue
try:
os.unlink(tempname)
except os.error:
pass
if mode[0] == 'l':
if verbose:
print "Creating symlink %r -> %r" % (filename, linkto)
try:
os.symlink(linkto, tempname)
except IOError, msg:
print "Can't create %r: %s" % (tempname, msg)
continue
else:
try:
fp = open(tempname, 'wb')
except IOError, msg:
print "Can't create %r: %s" % (tempname, msg)
continue
if verbose:
print (
'Retrieving %r from %r as %r...'
% (filename, pwd, fullname))
if verbose:
fp1 = LoggingFile(fp, 1024*1024*100, sys.stdout)
else:
fp1 = fp
t0 = time.time()
try:
f.retrbinary('RETR ' + filename,
fp1.write, 8*1024)
except ftplib.error_perm, msg:
print msg
t1 = time.time()
bytes = fp.tell()
fp.close()
if fp1 != fp:
fp1.close()
try:
os.unlink(fullname)
except os.error:
pass # Ignore the error
try:
os.rename(tempname, fullname)
except os.error, msg:
print "Can't rename %r to %r: %s" % (tempname, fullname, msg)
continue
info[filename] = infostuff
writedict(info, infofilename)
if verbose and mode[0] != 'l':
dt = t1 - t0
kbytes = bytes / 1024.0
print int(round(kbytes)),
print 'Kbytes in',
print int(round(dt)),
print 'seconds',
if t1 > t0:
print ('(~%d Kbytes/sec)' %
int(round(kbytes/dt),))
print
# Recursively mirror subdirectories
for subdir in subdirs:
if interactive:
doit = askabout('subdirectory', subdir, pwd)
if not doit:
continue
if verbose:
print 'Processing subdirectory', repr(subdir)
localsubdir = os.path.join(localdir, subdir)
pwd = f.pwd()
if verbose > 1:
print 'Remote directory now:', repr(pwd)
print 'Remote cwd', repr(subdir)
try:
f.cwd(subdir)
except ftplib.error_perm, msg:
print "Can't chdir to", repr(subdir), ":", repr(msg)
else:
if verbose:
print 'Mirroring as', repr(localsubdir)
mirrorsubdir(f, localsubdir, verbose=verbose,
interactive=interactive, skip_patterns=skip_patterns)
if verbose > 1:
print 'Remote cwd ..'
f.cwd('..')
newpwd = f.pwd()
if newpwd != pwd:
print 'Ended up in wrong directory after cd + cd ..'
print 'Giving up now.'
break
else:
if verbose > 1:
print 'OK.'
if __name__ == "__main__":
ap = argparse.ArgumentParser(description=usage, epilog=epilog)
ap.add_argument('--user', '-u', help='Username')
ap.add_argument('--password', '-p', help='Password')
ap.add_argument('url',
help='URL to FTPES server.')
ap.add_argument('--verbose',
help='Verbose level (0, 1, 2); default is %(default)s',
type=int, default=1)
ap.add_argument('--localdir',
help='Local dir to save mirror to; default is %(default)s',
default=os.getcwd())
ap.add_argument('--remotedir',
help='Remote dir to mirror, default is %(default)s',
default='/')
ap.add_argument('--interactive', action='store_true',
help='Enable interactive mode')
ap.add_argument('--list',
help='Just list the remote dir, do not mirror anything',
action='store_true')
ap.add_argument('--skip', nargs='+',
help='Specify filename patterns to skip. '
'Can be specified multiple times.')
args = ap.parse_args()
if args.url is None:
ap.print_help()
sys.exit(1)
if args.skip is None:
args.skip = []
if args.user is None or args.password is None:
n = netrc.netrc(os.path.expanduser('~/.netrc'))
try:
user, account, password = n.hosts[args.url]
except KeyError:
print('No information for host "%s" in ~/.netrc. Please specify a '
'username and password as commandline arguments' % args.url)
sys.exit(1)
args.user = user
args.password = password
f = connection(
url=args.url,
user=args.user,
pwd=args.password,
remote_dir=args.remotedir
)
if args.list:
lines = []
f.retrlines("LIST", lines.append)
for line in lines:
mode, info, filename = parse_line(line)
skip = False
for s in args.skip:
if fnmatch(filename, s):
skip = True
break
if skip:
continue
print line
else:
mirrorsubdir(f, args.localdir, args.verbose, skip_patterns=args.skip,
interactive=args.interactive)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment