Created
May 4, 2015 12:40
-
-
Save holgerd77/ff83c09fb5c7a8e90638 to your computer and use it in GitHub Desktop.
Script for Farmsubsidy to send folders with exported flat CSV files to S3 bucket
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#Taken from: https://gist.github.com/SavvyGuard/6115006 | |
#Usage: python send_folder_to_s3 COUNTRY_DIRNAME | |
#Dependencies: boto | |
import boto | |
import boto.s3 | |
import os.path | |
import sys | |
# Fill these in - you get them when you sign up for S3 | |
AWS_ACCESS_KEY_ID = '' | |
AWS_ACCESS_KEY_SECRET = '' | |
# Fill in info on data to upload | |
# destination bucket name | |
bucket_name = 'data.farmsubsidy.org' | |
if len(sys.argv) != 2: | |
print "Please run this script with (exactly) one COUNTRY_DIRNAME as argument!" | |
sys.exit() | |
if not os.path.isdir(sys.argv[1]): | |
print "Country directory does not exist!" | |
sys.exit() | |
# source directory | |
sourceDir = sys.argv[1] + '/' | |
# destination directory name (on s3) | |
destDir = 'Flat/' + sys.argv[1] + '/' | |
#max size in bytes before uploading in parts. between 1 and 5 GB recommended | |
MAX_SIZE = 20 * 1000 * 1000 | |
#size of parts when uploading in parts | |
PART_SIZE = 6 * 1000 * 1000 | |
conn = boto.connect_s3(AWS_ACCESS_KEY_ID, AWS_ACCESS_KEY_SECRET) | |
bucket = conn.get_bucket(bucket_name) | |
uploadFileNames = [] | |
for (sourceDir, dirname, filename) in os.walk(sourceDir): | |
uploadFileNames.extend(filename) | |
break | |
def percent_cb(complete, total): | |
sys.stdout.write('.') | |
sys.stdout.flush() | |
for filename in uploadFileNames: | |
sourcepath = os.path.join(sourceDir + filename) | |
destpath = os.path.join(destDir, filename) | |
print 'Uploading %s to Amazon S3 bucket %s' % \ | |
(sourcepath, bucket_name) | |
filesize = os.path.getsize(sourcepath) | |
if filesize > MAX_SIZE: | |
print "multipart upload" | |
mp = bucket.initiate_multipart_upload(destpath) | |
fp = open(sourcepath,'rb') | |
fp_num = 0 | |
while (fp.tell() < filesize): | |
fp_num += 1 | |
print "uploading part %i" %fp_num | |
mp.upload_part_from_file(fp, fp_num, cb=percent_cb, num_cb=10, size=PART_SIZE) | |
mp.complete_upload() | |
else: | |
print "singlepart upload" | |
k = boto.s3.key.Key(bucket) | |
k.key = destpath | |
k.set_contents_from_filename(sourcepath, | |
cb=percent_cb, num_cb=10) | |
k = boto.s3.key.Key(bucket) | |
k.key = destpath | |
k.make_public() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment