-
-
Save deltastateonline/e08970904574fdb41520730cc292f316 to your computer and use it in GitHub Desktop.
Download a file from S3 using "vanilla" standard library Python
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import hashlib, hmac, socket, ssl | |
from datetime import datetime | |
try: | |
from urlparse import urlsplit | |
except: | |
from urllib.parse import urlsplit | |
ALGORTHM = 'AWS4-HMAC-SHA256' | |
sign = lambda key, msg: hmac.new(key, msg.encode('utf-8'), hashlib.sha256).digest() | |
def sign_headers(headers, url, access_key, secret_key, region = 'us-east-1'): | |
method = 'GET' | |
# Get host and parsed datetime and date used by AWS | |
parsed_url = urlsplit(url) | |
host = parsed_url.netloc | |
date = datetime.utcnow() | |
aws_datetime = date.strftime("%Y%m%dT%H%M%SZ") | |
aws_date = date.strftime("%Y%m%d") | |
# Generate scope and scoped credential strings, and the signing key | |
scope = '/'.join([aws_date, region, 's3', 'aws4_request']) | |
credential = '/'.join([access_key, scope]) | |
signing_key = sign(sign(sign(sign(('AWS4' + secret_key).encode('utf-8'), aws_date), region), 's3'), 'aws4_request') | |
# Fill up all headers except 'Authorization' | |
headers['Host'] = host | |
headers['X-Amz-Date'] = aws_datetime | |
headers['X-Amz-Content-Sha256'] = u'UNSIGNED-PAYLOAD' | |
# Format header keys and data for the upcoming AWS atrings | |
sorted_headers_string = ';'.join([header.lower().strip() for header in sorted(headers)]) | |
canonical_header_list = [header.lower().strip() + ':' + str(headers[header]).strip() for header in sorted(headers)] | |
# Geenerate canonical request and string to be signed | |
prefix = [method, parsed_url.path, parsed_url.query] | |
suffix = ['', sorted_headers_string, u'UNSIGNED-PAYLOAD'] # '' to alow 2 '\n' | |
canonical_req = '\n'.join(prefix + canonical_header_list + suffix) | |
string_to_sign = '\n'.join([ALGORTHM, aws_datetime, scope, hashlib.sha256(canonical_req.encode('utf-8')).hexdigest()]) | |
signature = hmac.new(signing_key, string_to_sign.encode('utf-8'), hashlib.sha256).hexdigest() | |
# Finally generate the Authoization header with signing string_to_sign | |
headers['Authorization'] = ALGORTHM + ' Credential=' + credential + ', ' + 'SignedHeaders=' + sorted_headers_string + ', ' + 'Signature=' + signature | |
return headers | |
def download_s3_chunk(bucket, key, start, end, access_key, secret_key, endpoint = 'https://s3.amazonaws.com', region = 'us-east-1'): | |
''' Download part of an S3 stored file using vanilla Python ''' | |
headers = {'Range': 'bytes={}-{}'.format(start, end), 'User-Agent': 'ssup'} | |
headers = sign_headers(headers, endpoint, access_key, secret_key) | |
# Raw message to send via socket | |
s3_message_parts = ['GET {} HTTP/1.1', | |
'Host: {}', | |
'Connection: keep-alive', | |
'Accept-Encoding: gzip, deflate', | |
'Accept: */*', | |
'User-Agent: ssup', | |
'X-Amz-Content-Sha256: UNSIGNED-PAYLOAD', | |
'Range: bytes={}-{}', | |
'X-Amz-Date: {}', | |
'Authorization: {}', | |
'\r\n'] | |
message_params = '/' + bucket + '/' + key, headers['Host'], start, end, headers['X-Amz-Date'], headers['Authorization'] | |
s3_download_message = '\r\n'.join(s3_message_parts).format(message_params) | |
s = ssl.wrap_socket(socket.socket()) | |
s.connect(('s3.amazonaws.com', 443)) | |
s.sendall(s3_download_message) | |
#Implement proper retrieval loop | |
return s.recv(), s.recv() | |
if __name__=='__main__': | |
# Adjust to get arguments from command prompt | |
from sys import argv as args | |
# Credentials | |
access_key = 'access' | |
secret_key = 'secret' | |
# Bucket, key and location info | |
bucket = 'my_bucket' | |
key = 'my_key' | |
# Chunk of key to download | |
start = 20 | |
end = 100 | |
header, chunk = download_s3_chunk(bucket, key, start, end, access_key, secret_key) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment