Created
July 30, 2013 20:47
-
-
Save jayzeng/6116773 to your computer and use it in GitHub Desktop.
down load sample
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import os | |
import logging | |
import argparse | |
from boto.s3.connection import S3Connection | |
def create_dir(type): | |
if not os.path.exists(type): | |
os.mkdir(type) | |
def get_aws_credentials(): | |
return {'key' : os.environ.get('AWS_ACCESS_KEY'), | |
'secret' : os.environ.get('AWS_SECRET_KEY')} | |
def parse_arg(): | |
parser = argparse.ArgumentParser() | |
parser.add_argument('-m', help='stats month in 2013', dest='month',required=True) | |
parser.add_argument('-d', help='stats day', dest='day',required=True) | |
return parser.parse_args() | |
def download_sample(month, day): | |
# connect to the bucket | |
cred = get_aws_credentials() | |
conn = S3Connection(cred['key'],cred['secret']) | |
bucket_name = 'mat-redshift-prod' | |
bucket = conn.get_bucket(bucket_name) | |
interested_types = ('stat_clicks', 'stat_events', 'stat_installs', 'stat_opens') | |
bucket_day = '1_1/2013/0%d/0%d/' % (month, day) | |
bucket_name = 'qproc_imports/prod/qproc_import_csv/%s/%s' | |
for type in interested_types: | |
create_dir(type) | |
for key in bucket.list(bucket_name % (type, bucket_day)): | |
try: | |
file_name = key.name.split('/')[-1] | |
if not os.path.exists('%s/%s' % (type, file_name)): | |
key.get_contents_to_filename('%s/%s' % (type, file_name)) | |
except Exception, e: | |
print(e) | |
if __name__ == "__main__": | |
args = parse_arg() | |
month = int(args.month) | |
day = int(args.day) | |
download_sample(month, day) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment