Skip to content

Instantly share code, notes, and snippets.

@jokull
Created November 22, 2012 15:36
Show Gist options
  • Save jokull/4131747 to your computer and use it in GitHub Desktop.
Save jokull/4131747 to your computer and use it in GitHub Desktop.
S3 static assets sync that is awesome
# encoding=utf-8
import sys
import datetime
import email
import mimetypes
import os
import time
import gzip
import subprocess
from cStringIO import StringIO
from boto.s3.connection import S3Connection
s3 = S3Connection()
GZIP_CONTENT_TYPES = (
'text/css',
'application/javascript',
)
GZIP_SIZE_MIN = 1024 # Per recommendation
EXCLUDE_FILENAMES = ('.DS_Store', '.git')
HEADERS = {
# HTTP/1.0
'Expires': '%s GMT' % (email.Utils.formatdate(
time.mktime((datetime.datetime.now() +
datetime.timedelta(days=365 * 2)).timetuple()))),
# HTTP/1.1
'Cache-Control': 'max-age %d' % (3600 * 24 * 365 * 2),
}
def main():
try:
media_root, bucket_root = sys.argv[1:]
except ValueError:
sys.exit(u"Error. (Hint: python sync_s3.py public/ s3.bucket.com/static )")
if '/' in bucket_root:
bucket_name, prefix = bucket_root.split("/", 1)
else:
bucket_name, prefix = bucket_root, ''
bucket = s3.get_bucket(bucket_name)
# tar c dir | md5sum
media_root_md5, stderr = subprocess.Popen('tar c %s | md5' % media_root,
stdout=subprocess.PIPE, shell=True).communicate()
if stderr:
raise Exception(u'Could not get unique folder checksum')
s3_root = os.path.join(
prefix,
media_root_md5[:6],
)
if not media_root.endswith("/"):
# We want to copy folder as a whole, not just contents - like rsync
s3_root = os.path.join(s3_root, media_root)
s3_root = s3_root.rstrip("/") # Normalize
print "Uploading to //s3.amazonaws.com/%s/%s/" % (bucket_name, s3_root)
for root, dirs, files in os.walk(media_root):
for filename in files:
if [s for s in EXCLUDE_FILENAMES if root.endswith(s)]:
continue # example .git
if filename in EXCLUDE_FILENAMES:
continue # example .DS_Store
path = os.path.join(root, filename)
s3_path = os.path.join(os.path.relpath(root, media_root), filename)
s3_path = os.path.normpath(os.path.join(s3_root, s3_path))
content_type, _ = mimetypes.guess_type(s3_path)
byte_length = os.stat(path).st_size
headers = HEADERS.copy()
key = bucket.new_key(s3_path)
with file(path) as fp:
if content_type in GZIP_CONTENT_TYPES and byte_length > GZIP_SIZE_MIN:
headers['Content-Encoding'] = 'gzip'
compressed = StringIO()
with gzip.GzipFile(fileobj=compressed, mode='wr', compresslevel=9) as gzip_fp:
gzip_fp.write(fp.read())
contents = compressed.getvalue()
else:
contents = fp.read()
if content_type:
headers['Content-Type'] = content_type
if os.environ.get('DRYRUN') == "true":
for key, value in headers.items():
print "%s: %s" % (key, value)
print s3_path
print
else:
key.set_contents_from_string(
contents, headers, replace=True, policy='public-read')
if __name__ == '__main__':
try:
main()
except KeyboardInterrupt:
sys.exit(u"Early exit")
@jokull
Copy link
Author

jokull commented Nov 22, 2012

Not only will this script get you laid, but it will give you an A grade on the Y!Slow test.

Usage

Dry run:
$ DRYRUN=true python s3_sync.py public/ s3.domain.com/static

Upload:
$ python s3_sync.py public/ s3.domain.com/static

Features

  • Gzips text based files over 1kb
  • Content-Type, Expires and Cache-Control headers
  • Familiar rsync behavior with slashes
  • Does an MD5 checksum of the static assets and prefixes the S3 key with first 6 letters of the checksum - guarantees that visitors to your site will receive the new version, but cache aggressively otherwise
  • Prints the S3 HTTP root URI to stdout

No messing around with stupid compiling CoffeeScript, Stylus, concatenating files or minifying JavaScript. This is the job of a frontend compiler (see Brunch).

@jokull
Copy link
Author

jokull commented Apr 1, 2013

Now a proper Python package
pip install ssstatic

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment