Created
November 29, 2013 19:27
-
-
Save njbair/7710734 to your computer and use it in GitHub Desktop.
Sets a Cache-Control HTTP header on all matching objects in an Amazon S3 bucket
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/python | |
# Amazon S3 Cache-Control Processor | |
# | |
# Programmatically applies the specified Cache-Control metadata to all | |
# matching objects (files) in an S3 bucket | |
# | |
# Uses the excellent boto library <http://docs.pythonboto.org> | |
from boto.s3.connection import S3Connection | |
from boto.s3.key import Key | |
import boto | |
import re | |
# A static class to store configuration data | |
class Config: | |
# the name of the S3 bucket | |
bucket = 'gracechurchmentor' | |
# a list of Regular Expressions to trigger a skip. Useful for | |
# blacklisting files which you don't want to process | |
ignore = ( | |
'^logs', # ignore logfiles | |
'^SRS' # ignore Steve's files | |
) | |
# value for Cache-Control header | |
cache_control = 'max-age=259200, public' | |
def main(): | |
# instantiate the S3 connection | |
c = S3Connection() | |
c = boto.connect_s3() | |
b = c.get_bucket(Config.bucket) | |
# get a list of all files in the bucket | |
objects = b.list() | |
# iterate through the objects (files) in the list | |
# | |
# S3 buckets don't technically have hierarchical directory | |
# structures; rather, object names are prepended with | |
# pseudo-directory names for easier organization. But in fact, all | |
# objects exist within a single top-level, making it very easy to | |
# iterate through them without worrying about recursive functions | |
# and all that fun stuff. Yay! | |
for object in objects: | |
skip = False | |
# check each object against the ignore list | |
for pattern in Config.ignore: | |
# perform the search | |
match = re.search(pattern, object.name) | |
if (match): | |
skip = True | |
# only proceed if no matches were found | |
if (not skip): | |
print "Modifying %s..." % (object.name) | |
object.set_metadata('cache-control', Config.cache_control) | |
if (object.get_metadata('cache-control') == Config.cache_control): | |
print "Success!" | |
else: | |
print "Failed!" | |
main() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment