Created
October 26, 2010 15:06
-
-
Save eykd/647067 to your computer and use it in GitHub Desktop.
A simple script for caching packages on S3 and building simple HTML indices.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env python | |
# -*- coding: utf-8 -*- | |
"""pycache -- cache a python package from PyPI on S3. | |
A simple script to collect a cache of packages locally and sync them up to an S3 bucket, using directories as namespaces so that different projects can have different dependencies. | |
This is just about the simplest thing that could possibly work. | |
""" | |
import warnings | |
warnings.filterwarnings('ignore') | |
import os | |
import argparse | |
import datetime | |
import time | |
import mimetypes | |
import itertools | |
from paver.easy import path | |
from setuptools.package_index import PackageIndex | |
import boto | |
__cache__ = path("~/.pycache").expanduser().abspath() | |
__cache__.makedirs() | |
index = PackageIndex(index_url="http://pypi.it.uwosh.edu/", search_path=[]) | |
html = """<html> | |
<head><title>Index - {project}</title></head> | |
<body> | |
<h1>{project}</h1> | |
{body} | |
</body> | |
</html> | |
""" | |
def main(package=None, project=None, sync=False): | |
if project is not None: | |
proj_p = __cache__ / project | |
else: | |
proj_p = __cache__ | |
proj_p.makedirs() | |
if package is not None: | |
tmp = path(os.tmpnam()) | |
tmp.makedirs() | |
dl = index.download(package, tmp) | |
if dl is not None: | |
fn = path(dl) | |
fn.copy(proj_p / fn.name) | |
for fn in tmp.listdir(): | |
fn.remove() | |
tmp.removedirs() | |
if sync: | |
buildIndices() | |
prefix = package | |
syncer = SyncS3(__cache__) | |
syncer.sync_s3() | |
def buildIndices(): | |
for proj_p in itertools.chain((__cache__, ), __cache__.walkdirs()): | |
links = ('<li><a href="{project}/{file}">{name}</a></li>'.format(project=proj_p.partition(__cache__)[-1], | |
file=fn.isdir() and (fn.name + '/index.html') or fn.name, | |
name=fn.name) | |
for fn in proj_p.listdir() if fn.name != 'index.html') | |
with open((proj_p / 'index.html'), 'w') as fo: | |
fo.write(html.format(body="<ul>%s</ul>" % ''.join(links), | |
project=proj_p.name)) | |
class SyncS3(object): | |
AWS_ACCESS_KEY_ID = '****************' | |
AWS_SECRET_ACCESS_KEY = '*****************' | |
AWS_BUCKET_NAME = 'pypi.yourdomain.org' | |
FILTER_LIST = [] | |
def __init__(self, directory, prefix=None, do_force=False, verbosity=0): | |
self.DIRECTORY = directory | |
self.prefix = prefix | |
self.do_force = do_force | |
self.verbosity = verbosity | |
self.upload_count = 0 | |
self.skip_count = 0 | |
def sync_s3(self): | |
""" | |
Walks the media directory and syncs files to S3 | |
""" | |
bucket, key = self.open_s3() | |
os.path.walk(self.DIRECTORY, self.upload_s3, | |
(bucket, key, self.AWS_BUCKET_NAME, self.DIRECTORY)) | |
def open_s3(self): | |
""" | |
Opens connection to S3 returning bucket and key | |
""" | |
conn = boto.connect_s3(self.AWS_ACCESS_KEY_ID, self.AWS_SECRET_ACCESS_KEY) | |
try: | |
bucket = conn.get_bucket(self.AWS_BUCKET_NAME) | |
except boto.exception.S3ResponseError: | |
bucket = conn.create_bucket(self.AWS_BUCKET_NAME) | |
return bucket, boto.s3.key.Key(bucket) | |
def upload_s3(self, arg, dirname, names): | |
""" | |
This is the callback to os.path.walk and where much of the work happens | |
""" | |
bucket, key, bucket_name, root_dir = arg # expand arg tuple | |
# Skip directories we don't want to sync | |
if os.path.basename(dirname) in self.FILTER_LIST: | |
# prevent walk from processing subfiles/subdirs below the ignored one | |
del names[:] | |
return | |
# Later we assume the MEDIA_ROOT ends with a trailing slash | |
if not root_dir.endswith(os.path.sep): | |
root_dir = root_dir + os.path.sep | |
for file in names: | |
headers = {} | |
if file in self.FILTER_LIST: | |
continue # Skip files we don't want to sync | |
filename = os.path.join(dirname, file) | |
file_key = filename[len(root_dir):] | |
if os.path.isdir(filename): | |
filename = os.path.join(filename, 'index.html') | |
if not os.path.exists(filename): | |
continue | |
if self.prefix: | |
file_key = '%s/%s' % (self.prefix, file_key) | |
# Check if file on S3 is older than local file, if so, upload | |
if not self.do_force: | |
s3_key = bucket.get_key(file_key) | |
if s3_key: | |
s3_datetime = datetime.datetime(*time.strptime( | |
s3_key.last_modified, '%a, %d %b %Y %H:%M:%S %Z')[0:6]) | |
local_datetime = datetime.datetime.utcfromtimestamp( | |
os.stat(filename).st_mtime) | |
if local_datetime < s3_datetime: | |
self.skip_count += 1 | |
if self.verbosity > 1: | |
print "File %s hasn't been modified since last " \ | |
"being uploaded" % (file_key) | |
continue | |
# File is newer, let's process and upload | |
if self.verbosity > 0: | |
print "Uploading %s..." % (file_key) | |
content_type = mimetypes.guess_type(filename)[0] | |
if content_type: | |
headers['Content-Type'] = content_type | |
file_obj = open(filename, 'rb') | |
file_size = os.fstat(file_obj.fileno()).st_size | |
filedata = file_obj.read() | |
try: | |
key.name = file_key | |
key.set_contents_from_string(filedata, headers, replace=True) | |
key.set_acl('public-read') | |
except boto.s3.connection.S3CreateError, e: | |
print "Failed: %s" % e | |
except Exception, e: | |
print e | |
raise | |
else: | |
self.upload_count += 1 | |
file_obj.close() | |
if __name__ == '__main__': | |
parser = argparse.ArgumentParser(description='Cache a python package from PyPI.') | |
parser.add_argument('-j', '--project', action='store', ) | |
parser.add_argument('-k', '--package', action='store') | |
parser.add_argument('-s', '--sync', action='store_true') | |
args = parser.parse_args() | |
main(project=args.project, package=args.package, sync=args.sync) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment