eykd · October 26, 2010 15:06
diff --git a/pycache.py b/pycache.py
 #!/usr/bin/env python
 # -*- coding: utf-8 -*-
 """pycache -- cache a python package from PyPI on S3.

 A simple script to collect a cache of packages locally and sync them up to an S3 bucket, using directories as namespaces so that different projects can have different dependencies.

 This is just about the simplest thing that could possibly work.
 """
 import warnings
 warnings.filterwarnings('ignore')

 import os
 import argparse
 import datetime
 import time
 import mimetypes
 import itertools

 from paver.easy import path
 from setuptools.package_index import PackageIndex

 import boto

 __cache__ = path("~/.pycache").expanduser().abspath()
 __cache__.makedirs()

 index = PackageIndex(index_url="http://pypi.it.uwosh.edu/", search_path=[])

 html = """<html>
 <head><title>Index - {project}</title></head>
 <body>
 <h1>{project}</h1>
 {body}
 </body>
 </html>
 """

 def main(package=None, project=None, sync=False):
    if project is not None:
        proj_p = __cache__ / project
    else:
        proj_p = __cache__
    proj_p.makedirs()

    if package is not None:
        tmp = path(os.tmpnam())
        tmp.makedirs()
        dl = index.download(package, tmp)
        if dl is not None:
            fn = path(dl)
            fn.copy(proj_p / fn.name)
            for fn in tmp.listdir():
                fn.remove()
            tmp.removedirs()

    if sync:
        buildIndices()
        prefix = package
        syncer = SyncS3(__cache__)
        syncer.sync_s3()

 def buildIndices():
    for proj_p in itertools.chain((__cache__, ), __cache__.walkdirs()):
        links = ('<li><a href="{project}/{file}">{name}</a></li>'.format(project=proj_p.partition(__cache__)[-1],
                                                                         file=fn.isdir() and (fn.name + '/index.html') or fn.name,
                                                                         name=fn.name)
                 for fn in proj_p.listdir() if fn.name != 'index.html')

        with open((proj_p / 'index.html'), 'w') as fo:
            fo.write(html.format(body="<ul>%s</ul>" % ''.join(links),
                                 project=proj_p.name))


 class SyncS3(object):
    AWS_ACCESS_KEY_ID = '****************'
    AWS_SECRET_ACCESS_KEY = '*****************'
    AWS_BUCKET_NAME = 'pypi.yourdomain.org'
    FILTER_LIST = []

    def __init__(self, directory, prefix=None, do_force=False, verbosity=0):
        self.DIRECTORY = directory
        self.prefix = prefix
        self.do_force = do_force
        self.verbosity = verbosity
        self.upload_count = 0
        self.skip_count = 0

    def sync_s3(self):
        """
        Walks the media directory and syncs files to S3
        """
        bucket, key = self.open_s3()
        os.path.walk(self.DIRECTORY, self.upload_s3,
            (bucket, key, self.AWS_BUCKET_NAME, self.DIRECTORY))

    def open_s3(self):
        """
        Opens connection to S3 returning bucket and key
        """
        conn = boto.connect_s3(self.AWS_ACCESS_KEY_ID, self.AWS_SECRET_ACCESS_KEY)
        try:
            bucket = conn.get_bucket(self.AWS_BUCKET_NAME)
        except boto.exception.S3ResponseError:
            bucket = conn.create_bucket(self.AWS_BUCKET_NAME)
        return bucket, boto.s3.key.Key(bucket)

    def upload_s3(self, arg, dirname, names):
        """
        This is the callback to os.path.walk and where much of the work happens
        """
        bucket, key, bucket_name, root_dir = arg  # expand arg tuple

        # Skip directories we don't want to sync
        if os.path.basename(dirname) in self.FILTER_LIST:
            # prevent walk from processing subfiles/subdirs below the ignored one
            del names[:]
            return 

        # Later we assume the MEDIA_ROOT ends with a trailing slash
        if not root_dir.endswith(os.path.sep):
            root_dir = root_dir + os.path.sep

        for file in names:
            headers = {}

            if file in self.FILTER_LIST:
                continue  # Skip files we don't want to sync

            filename = os.path.join(dirname, file)
            file_key = filename[len(root_dir):]
            if os.path.isdir(filename):
                filename = os.path.join(filename, 'index.html')
                if not os.path.exists(filename):
                    continue

            if self.prefix:
                file_key = '%s/%s' % (self.prefix, file_key)

            # Check if file on S3 is older than local file, if so, upload
            if not self.do_force:
                s3_key = bucket.get_key(file_key)
                if s3_key:
                    s3_datetime = datetime.datetime(*time.strptime(
                        s3_key.last_modified, '%a, %d %b %Y %H:%M:%S %Z')[0:6])
                    local_datetime = datetime.datetime.utcfromtimestamp(
                        os.stat(filename).st_mtime)
                    if local_datetime < s3_datetime:
                        self.skip_count += 1
                        if self.verbosity > 1:
                            print "File %s hasn't been modified since last " \
                                "being uploaded" % (file_key)
                        continue

            # File is newer, let's process and upload
            if self.verbosity > 0:
                print "Uploading %s..." % (file_key)

            content_type = mimetypes.guess_type(filename)[0]
            if content_type:
                headers['Content-Type'] = content_type
            file_obj = open(filename, 'rb')
            file_size = os.fstat(file_obj.fileno()).st_size
            filedata = file_obj.read()

            try:
                key.name = file_key
                key.set_contents_from_string(filedata, headers, replace=True)
                key.set_acl('public-read')
            except boto.s3.connection.S3CreateError, e:
                print "Failed: %s" % e
            except Exception, e:
                print e
                raise
            else:
                self.upload_count += 1

            file_obj.close()


 if __name__ == '__main__':
    parser = argparse.ArgumentParser(description='Cache a python package from PyPI.')
    parser.add_argument('-j', '--project', action='store', )
    parser.add_argument('-k', '--package', action='store')
    parser.add_argument('-s', '--sync', action='store_true')
    args = parser.parse_args()
    main(project=args.project, package=args.package, sync=args.sync)
	#!/usr/bin/env python
	# -- coding: utf-8 --
	"""pycache -- cache a python package from PyPI on S3.

	A simple script to collect a cache of packages locally and sync them up to an S3 bucket, using directories as namespaces so that different projects can have different dependencies.

	This is just about the simplest thing that could possibly work.
	"""
	import warnings
	warnings.filterwarnings('ignore')

	import os
	import argparse
	import datetime
	import time
	import mimetypes
	import itertools

	from paver.easy import path
	from setuptools.package_index import PackageIndex

	import boto

	__cache__ = path("~/.pycache").expanduser().abspath()
	__cache__.makedirs()

	index = PackageIndex(index_url="http://pypi.it.uwosh.edu/", search_path=[])

	html = """<html>
	<head><title>Index - {project}</title></head>
	<body>
	<h1>{project}</h1>
	{body}
	</body>
	</html>
	"""

	def main(package=None, project=None, sync=False):
	if project is not None:
	proj_p = __cache__ / project
	else:
	proj_p = __cache__
	proj_p.makedirs()

	if package is not None:
	tmp = path(os.tmpnam())
	tmp.makedirs()
	dl = index.download(package, tmp)
	if dl is not None:
	fn = path(dl)
	fn.copy(proj_p / fn.name)
	for fn in tmp.listdir():
	fn.remove()
	tmp.removedirs()

	if sync:
	buildIndices()
	prefix = package
	syncer = SyncS3(__cache__)
	syncer.sync_s3()

	def buildIndices():
	for proj_p in itertools.chain((__cache__, ), __cache__.walkdirs()):
	links = ('<li><a href="{project}/{file}">{name}</a></li>'.format(project=proj_p.partition(__cache__)[-1],
	file=fn.isdir() and (fn.name + '/index.html') or fn.name,
	name=fn.name)
	for fn in proj_p.listdir() if fn.name != 'index.html')

	with open((proj_p / 'index.html'), 'w') as fo:
	fo.write(html.format(body="<ul>%s</ul>" % ''.join(links),
	project=proj_p.name))


	class SyncS3(object):
	AWS_ACCESS_KEY_ID = '****************'
	AWS_SECRET_ACCESS_KEY = '*****************'
	AWS_BUCKET_NAME = 'pypi.yourdomain.org'
	FILTER_LIST = []

	def __init__(self, directory, prefix=None, do_force=False, verbosity=0):
	self.DIRECTORY = directory
	self.prefix = prefix
	self.do_force = do_force
	self.verbosity = verbosity
	self.upload_count = 0
	self.skip_count = 0

	def sync_s3(self):
	"""
	Walks the media directory and syncs files to S3
	"""
	bucket, key = self.open_s3()
	os.path.walk(self.DIRECTORY, self.upload_s3,
	(bucket, key, self.AWS_BUCKET_NAME, self.DIRECTORY))

	def open_s3(self):
	"""
	Opens connection to S3 returning bucket and key
	"""
	conn = boto.connect_s3(self.AWS_ACCESS_KEY_ID, self.AWS_SECRET_ACCESS_KEY)
	try:
	bucket = conn.get_bucket(self.AWS_BUCKET_NAME)
	except boto.exception.S3ResponseError:
	bucket = conn.create_bucket(self.AWS_BUCKET_NAME)
	return bucket, boto.s3.key.Key(bucket)

	def upload_s3(self, arg, dirname, names):
	"""
	This is the callback to os.path.walk and where much of the work happens
	"""
	bucket, key, bucket_name, root_dir = arg # expand arg tuple

	# Skip directories we don't want to sync
	if os.path.basename(dirname) in self.FILTER_LIST:
	# prevent walk from processing subfiles/subdirs below the ignored one
	del names[:]
	return

	# Later we assume the MEDIA_ROOT ends with a trailing slash
	if not root_dir.endswith(os.path.sep):
	root_dir = root_dir + os.path.sep

	for file in names:
	headers = {}

	if file in self.FILTER_LIST:
	continue # Skip files we don't want to sync

	filename = os.path.join(dirname, file)
	file_key = filename[len(root_dir):]
	if os.path.isdir(filename):
	filename = os.path.join(filename, 'index.html')
	if not os.path.exists(filename):
	continue

	if self.prefix:
	file_key = '%s/%s' % (self.prefix, file_key)

	# Check if file on S3 is older than local file, if so, upload
	if not self.do_force:
	s3_key = bucket.get_key(file_key)
	if s3_key:
	s3_datetime = datetime.datetime(*time.strptime(
	s3_key.last_modified, '%a, %d %b %Y %H:%M:%S %Z')[0:6])
	local_datetime = datetime.datetime.utcfromtimestamp(
	os.stat(filename).st_mtime)
	if local_datetime < s3_datetime:
	self.skip_count += 1
	if self.verbosity > 1:
	print "File %s hasn't been modified since last " \
	"being uploaded" % (file_key)
	continue

	# File is newer, let's process and upload
	if self.verbosity > 0:
	print "Uploading %s..." % (file_key)

	content_type = mimetypes.guess_type(filename)[0]
	if content_type:
	headers['Content-Type'] = content_type
	file_obj = open(filename, 'rb')
	file_size = os.fstat(file_obj.fileno()).st_size
	filedata = file_obj.read()

	try:
	key.name = file_key
	key.set_contents_from_string(filedata, headers, replace=True)
	key.set_acl('public-read')
	except boto.s3.connection.S3CreateError, e:
	print "Failed: %s" % e
	except Exception, e:
	print e
	raise
	else:
	self.upload_count += 1

	file_obj.close()


	if __name__ == '__main__':
	parser = argparse.ArgumentParser(description='Cache a python package from PyPI.')
	parser.add_argument('-j', '--project', action='store', )
	parser.add_argument('-k', '--package', action='store')
	parser.add_argument('-s', '--sync', action='store_true')
	args = parser.parse_args()
	main(project=args.project, package=args.package, sync=args.sync)