Created
July 23, 2014 15:45
-
-
Save daler/0ef88c38c7987c53937c to your computer and use it in GitHub Desktop.
Sync files (downloading only) over FTP with explicit SSL/TLS
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env python | |
import os | |
import sys | |
import platform | |
import argparse | |
import ftplib | |
import time | |
import netrc | |
from fnmatch import fnmatch | |
usage = """ | |
Download a subdirectory from an FTP site over SSL/TLS. | |
If you have a .netrc file (at ~/.netrc), you don't have to specify username or | |
password at the command line. | |
Incomplete downloads are prefixed by a "@"; in verbose mode a "." is printed | |
for every 100 MB downloaded. | |
""" | |
epilog = "Ryan Dale ([email protected]), July 2014" | |
# This script is a heavily-modified version of the ftpmirror.py script that | |
# ships with Python in the Tools/scripts directory. | |
# | |
# Differences from ftpmirror.py: | |
# | |
# - Don't delete from server | |
# - Better command line interface (specify skip patterns, only show | |
# a listing) | |
# - Use FTPES instead of plain FTP | |
# - Better architecture (reusable functions; separate line-parsing function; | |
# auto-detect Linux or Mac) | |
# - LoggingFile object uses a more reasonable "." as symbol, and only prints | |
# one every 100MB | |
# | |
class ShortLineError(Exception): | |
pass | |
def connection(url, user, pwd, remote_dir='/'): | |
ftps = ftplib.FTP_TLS(url) | |
ftps.login(user, pwd) | |
ftps.prot_p() | |
ftps.cwd(remote_dir) | |
return ftps | |
def makedir(pathname): | |
if os.path.isdir(pathname): | |
return | |
dirname = os.path.dirname(pathname) | |
if dirname: | |
makedir(dirname) | |
os.mkdir(pathname, 0777) | |
class LoggingFile: | |
""" | |
Prints a symbol ('.' by default) every `blocksize` bytes. | |
""" | |
def __init__(self, fp, blocksize, outfp, symbol='.'): | |
self.fp = fp | |
self.bytes = 0 | |
self.hashes = 0 | |
self.blocksize = blocksize | |
self.outfp = outfp | |
self.symbol = symbol | |
def write(self, data): | |
self.bytes = self.bytes + len(data) | |
hashes = int(self.bytes) / self.blocksize | |
while hashes > self.hashes: | |
self.outfp.write(self.symbol) | |
self.outfp.flush() | |
self.hashes = self.hashes + 1 | |
self.fp.write(data) | |
def close(self): | |
self.outfp.write('\n') | |
def writedict(dict, filename): | |
dir, fname = os.path.split(filename) | |
tempname = os.path.join(dir, '@' + fname) | |
backup = os.path.join(dir, fname + '~') | |
try: | |
os.unlink(backup) | |
except os.error: | |
pass | |
fp = open(tempname, 'w') | |
fp.write('{\n') | |
for key, value in dict.items(): | |
fp.write('%r: %r,\n' % (key, value)) | |
fp.write('}\n') | |
fp.close() | |
try: | |
os.rename(filename, backup) | |
except os.error: | |
pass | |
os.rename(tempname, filename) | |
def parse_line(line): | |
""" | |
Parse a single line from RETRLINES LIST | |
""" | |
mac = not ('Linux' in platform.system()) | |
if mac: | |
# Mac listing has just filenames; | |
# trailing / means subdirectory | |
filename = line.strip() | |
mode = '-' | |
if filename[-1:] == '/': | |
filename = filename[:-1] | |
mode = 'd' | |
infostuff = '' | |
else: | |
# Parse, assuming a UNIX listing | |
words = line.split(None, 8) | |
if len(words) < 6: | |
raise ShortLineError('line "%s" too short' % line) | |
filename = words[-1].lstrip() | |
i = filename.find(" -> ") | |
if i >= 0: | |
# words[0] had better start with 'l'... | |
if verbose > 1: | |
print 'Found symbolic link %r' % (filename,) | |
linkto = filename[i+4:] | |
filename = filename[:i] | |
infostuff = words[-5:-1] | |
mode = words[0] | |
return mode, infostuff, filename | |
def mirrorsubdir(f, localdir, verbose=1, skip_patterns=None, | |
interactive=False): | |
if skip_patterns is None: | |
skip_patterns = [] | |
skippats = ['.', '..', '.mirrorinfo'] + skip_patterns | |
pwd = f.pwd() | |
# Set up local dir | |
if localdir and not os.path.isdir(localdir): | |
if verbose: | |
print 'Creating local directory', repr(localdir) | |
try: | |
makedir(localdir) | |
except os.error, msg: | |
print "Failed to establish local directory", repr(localdir) | |
return | |
infofilename = os.path.join(localdir, '.mirrorinfo') | |
try: | |
text = open(infofilename, 'r').read() | |
except IOError, msg: | |
text = '{}' | |
try: | |
info = eval(text) | |
except (SyntaxError, NameError): | |
print 'Bad mirror info in', repr(infofilename) | |
info = {} | |
# keep track of subdirs | |
subdirs = [] | |
# text from RETRLINES will be parsed into this list. | |
listing = [] | |
if verbose: | |
print 'Listing remote directory %r...' % (pwd,) | |
f.retrlines('LIST', listing.append) | |
filesfound = [] | |
for line in listing: | |
mode, infostuff, filename = parse_line(line) | |
if verbose > 1: | |
print '-->', repr(line) | |
skip = 0 | |
for pat in skippats: | |
if fnmatch(filename, pat): | |
if verbose > 1: | |
print 'Skip pattern', repr(pat), | |
print 'matches', repr(filename) | |
skip = 1 | |
break | |
if skip: | |
continue | |
if mode[0] == 'd': | |
if verbose > 1: | |
print 'Remembering subdirectory', repr(filename) | |
subdirs.append(filename) | |
continue | |
filesfound.append(filename) | |
if filename in info and info[filename] == infostuff: | |
if verbose > 1: | |
print 'Already have this version of', repr(filename) | |
continue | |
fullname = os.path.join(localdir, filename) | |
tempname = os.path.join(localdir, '@'+filename) | |
if interactive: | |
doit = askabout('file', filename, pwd) | |
if not doit: | |
if filename in info: | |
info[filename] = 'Not retrieved' | |
continue | |
try: | |
os.unlink(tempname) | |
except os.error: | |
pass | |
if mode[0] == 'l': | |
if verbose: | |
print "Creating symlink %r -> %r" % (filename, linkto) | |
try: | |
os.symlink(linkto, tempname) | |
except IOError, msg: | |
print "Can't create %r: %s" % (tempname, msg) | |
continue | |
else: | |
try: | |
fp = open(tempname, 'wb') | |
except IOError, msg: | |
print "Can't create %r: %s" % (tempname, msg) | |
continue | |
if verbose: | |
print ( | |
'Retrieving %r from %r as %r...' | |
% (filename, pwd, fullname)) | |
if verbose: | |
fp1 = LoggingFile(fp, 1024*1024*100, sys.stdout) | |
else: | |
fp1 = fp | |
t0 = time.time() | |
try: | |
f.retrbinary('RETR ' + filename, | |
fp1.write, 8*1024) | |
except ftplib.error_perm, msg: | |
print msg | |
t1 = time.time() | |
bytes = fp.tell() | |
fp.close() | |
if fp1 != fp: | |
fp1.close() | |
try: | |
os.unlink(fullname) | |
except os.error: | |
pass # Ignore the error | |
try: | |
os.rename(tempname, fullname) | |
except os.error, msg: | |
print "Can't rename %r to %r: %s" % (tempname, fullname, msg) | |
continue | |
info[filename] = infostuff | |
writedict(info, infofilename) | |
if verbose and mode[0] != 'l': | |
dt = t1 - t0 | |
kbytes = bytes / 1024.0 | |
print int(round(kbytes)), | |
print 'Kbytes in', | |
print int(round(dt)), | |
print 'seconds', | |
if t1 > t0: | |
print ('(~%d Kbytes/sec)' % | |
int(round(kbytes/dt),)) | |
# Recursively mirror subdirectories | |
for subdir in subdirs: | |
if interactive: | |
doit = askabout('subdirectory', subdir, pwd) | |
if not doit: | |
continue | |
if verbose: | |
print 'Processing subdirectory', repr(subdir) | |
localsubdir = os.path.join(localdir, subdir) | |
pwd = f.pwd() | |
if verbose > 1: | |
print 'Remote directory now:', repr(pwd) | |
print 'Remote cwd', repr(subdir) | |
try: | |
f.cwd(subdir) | |
except ftplib.error_perm, msg: | |
print "Can't chdir to", repr(subdir), ":", repr(msg) | |
else: | |
if verbose: | |
print 'Mirroring as', repr(localsubdir) | |
mirrorsubdir(f, localsubdir, verbose=verbose, | |
interactive=interactive, skip_patterns=skip_patterns) | |
if verbose > 1: | |
print 'Remote cwd ..' | |
f.cwd('..') | |
newpwd = f.pwd() | |
if newpwd != pwd: | |
print 'Ended up in wrong directory after cd + cd ..' | |
print 'Giving up now.' | |
break | |
else: | |
if verbose > 1: | |
print 'OK.' | |
if __name__ == "__main__": | |
ap = argparse.ArgumentParser(description=usage, epilog=epilog) | |
ap.add_argument('--user', '-u', help='Username') | |
ap.add_argument('--password', '-p', help='Password') | |
ap.add_argument('url', | |
help='URL to FTPES server.') | |
ap.add_argument('--verbose', | |
help='Verbose level (0, 1, 2); default is %(default)s', | |
type=int, default=1) | |
ap.add_argument('--localdir', | |
help='Local dir to save mirror to; default is %(default)s', | |
default=os.getcwd()) | |
ap.add_argument('--remotedir', | |
help='Remote dir to mirror, default is %(default)s', | |
default='/') | |
ap.add_argument('--interactive', action='store_true', | |
help='Enable interactive mode') | |
ap.add_argument('--list', | |
help='Just list the remote dir, do not mirror anything', | |
action='store_true') | |
ap.add_argument('--skip', nargs='+', | |
help='Specify filename patterns to skip. ' | |
'Can be specified multiple times.') | |
args = ap.parse_args() | |
if args.url is None: | |
ap.print_help() | |
sys.exit(1) | |
if args.skip is None: | |
args.skip = [] | |
if args.user is None or args.password is None: | |
n = netrc.netrc(os.path.expanduser('~/.netrc')) | |
try: | |
user, account, password = n.hosts[args.url] | |
except KeyError: | |
print('No information for host "%s" in ~/.netrc. Please specify a ' | |
'username and password as commandline arguments' % args.url) | |
sys.exit(1) | |
args.user = user | |
args.password = password | |
f = connection( | |
url=args.url, | |
user=args.user, | |
pwd=args.password, | |
remote_dir=args.remotedir | |
) | |
if args.list: | |
lines = [] | |
f.retrlines("LIST", lines.append) | |
for line in lines: | |
mode, info, filename = parse_line(line) | |
skip = False | |
for s in args.skip: | |
if fnmatch(filename, s): | |
skip = True | |
break | |
if skip: | |
continue | |
print line | |
else: | |
mirrorsubdir(f, args.localdir, args.verbose, skip_patterns=args.skip, | |
interactive=args.interactive) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment