Skip to content

Instantly share code, notes, and snippets.

@jlisee
Created May 11, 2013 15:15
Show Gist options
  • Save jlisee/5560229 to your computer and use it in GitHub Desktop.
Save jlisee/5560229 to your computer and use it in GitHub Desktop.
Download and unpack a tarball from the internet with python.
# Author: Joseph Lisee <[email protected]>
# License: No rights reserved, code is in the public domain.
# Date: May 2013
__doc__ = """
Downloads and unpacks a tarball from the Internet.
"""
import argparse
import os
import os.path
import sys
import tarfile
from urllib2 import urlopen, URLError, HTTPError
def download_file(url, download_path = '.'):
"""
Downloads a file, based on StackOverflow:
http://stackoverflow.com/a/4028894/138948
"""
success = False
# Open the url
try:
f = urlopen(url)
print "downloading " + url
# Formulate the path to our local file
local_path = os.path.join(download_path, os.path.basename(url))
# Open our local file for writing
with open(local_path, "wb") as local_file:
local_file.write(f.read())
success = True
# Handle errors
except HTTPError, e:
print "HTTP Error:", e.code, url
except URLError, e:
print "URL Error:", e.reason, url
return success
def unpack_tarball(tar_url, extract_path='.'):
"""
Extracts a tar file to disk, return root directory (and assumes
there is one).
"""
print "extracting " + tar_url
# Open and extract
tar = tarfile.open(tar_url, 'r')
tar.extractall(extract_path)
# Get root (the shortest path will be the root if there is one)
return sorted(tar.getnames())[0]
def download_and_unpack(url, output_path):
"""
Downloads and unpacks the given tarball, and returns you it's root.
"""
# Download file
base_path, tar_filename = os.path.split(url)
download_file(url, output_path)
# Unpack file
tar_path = os.path.join(output_path, tar_filename)
unpack_dir = unpack_tarball(tar_path, output_path)
return os.path.join(output_path, unpack_dir)
def main(argv = None):
if argv is None:
argv = sys.argv
# Parse arguments
parser = argparse.ArgumentParser(description='Process some integers.')
parser.add_argument('url', metavar='URL', type=str,
help='File to download and unpack')
parser.add_argument('--output', dest='output', default=".",
help='output directory')
args = parser.parse_args(argv[1:])
# Make output directory if needed
if args.output is not None and not os.path.exists(args.output):
os.makedirs(args.output)
# Do our work!
print download_and_unpack(args.url, args.output)
if __name__ == '__main__':
sys.exit(main())
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment