Skip to content

Instantly share code, notes, and snippets.

@mario-chaves
Created November 20, 2015 16:42
Show Gist options
  • Save mario-chaves/cf05758544820706e1f7 to your computer and use it in GitHub Desktop.
Save mario-chaves/cf05758544820706e1f7 to your computer and use it in GitHub Desktop.
import os
import boto
import requests
import shutil
import math
from filechunkio import FileChunkIO
from boto.s3.key import Key
def download_and_send_to_s3(aws_key, aws_secret_key, bucket_name, download_url, bucket_folder='', download_to=''):
try:
conn = boto.connect_s3(aws_key, aws_secret_key)
except boto.exception.AWSConnectionError, error:
raise Exception('Error: %i: %s' % (error.status, error.reason))
try:
bucket_object = conn.get_bucket(bucket_name)
except boto.exception.S3ResponseError:
try:
bucket_object = conn.create_bucket(bucket_name)
bucket_object.set_canned_acl('public-read')
except boto.exception.S3CreateError, error:
raise Exception('Error: %i: %s' % (error.status, error.reason))
file_name = download_url.split('/')[-1]
if not download_to:
download_to = os.path.join('/', 'tmp', file_name)
r = requests.get(download_url, stream=True)
with open(download_to, 'wb') as f:
r.raw.decode_content = True
shutil.copyfileobj(r.raw, f)
source_size = os.stat(download_to).st_size
multi_part = bucket_object.initiate_multipart_upload(file_name)
chunk_size = 52428800
chunk_count = int(math.ceil(source_size / float(chunk_size)))
for i in range(chunk_count):
offset = chunk_size * i
byte_set = min(chunk_size, source_size - offset)
with FileChunkIO(download_to, 'r', offset=offset, bytes=byte_set) as f:
multi_part.upload_part_from_file(f, part_num=i + 1)
k = Key(bucket_object)
k.name = bucket_folder + '/' + file_name if bucket_folder else file_name
k.set_contents_from_filename(download_to, policy='public-read')
multi_part.complete_upload()
if os.path.exists(download_to):
os.remove(download_to)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment