Skip to content

Instantly share code, notes, and snippets.

@jayzeng
Created July 30, 2013 20:47
Show Gist options
  • Save jayzeng/6116773 to your computer and use it in GitHub Desktop.
Save jayzeng/6116773 to your computer and use it in GitHub Desktop.
down load sample
import os
import logging
import argparse
from boto.s3.connection import S3Connection
def create_dir(type):
if not os.path.exists(type):
os.mkdir(type)
def get_aws_credentials():
return {'key' : os.environ.get('AWS_ACCESS_KEY'),
'secret' : os.environ.get('AWS_SECRET_KEY')}
def parse_arg():
parser = argparse.ArgumentParser()
parser.add_argument('-m', help='stats month in 2013', dest='month',required=True)
parser.add_argument('-d', help='stats day', dest='day',required=True)
return parser.parse_args()
def download_sample(month, day):
# connect to the bucket
cred = get_aws_credentials()
conn = S3Connection(cred['key'],cred['secret'])
bucket_name = 'mat-redshift-prod'
bucket = conn.get_bucket(bucket_name)
interested_types = ('stat_clicks', 'stat_events', 'stat_installs', 'stat_opens')
bucket_day = '1_1/2013/0%d/0%d/' % (month, day)
bucket_name = 'qproc_imports/prod/qproc_import_csv/%s/%s'
for type in interested_types:
create_dir(type)
for key in bucket.list(bucket_name % (type, bucket_day)):
try:
file_name = key.name.split('/')[-1]
if not os.path.exists('%s/%s' % (type, file_name)):
key.get_contents_to_filename('%s/%s' % (type, file_name))
except Exception, e:
print(e)
if __name__ == "__main__":
args = parse_arg()
month = int(args.month)
day = int(args.day)
download_sample(month, day)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment