-
-
Save huynhbaoan/2ed92c8faa088fac1ff8bf2a14e8bdf9 to your computer and use it in GitHub Desktop.
AWS Lambda function to gzip compress file when upload to S3 (will replace original file with gz version)
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
### | |
### This gist contains 2 files : settings.json and lambda_function.py | |
### | |
### settings.json | |
{ | |
"extensions" : ["*.hdr", "*.glb", "*.wasm"] | |
} | |
### lambda_function.py | |
''' | |
This script convert an uncompressed S3 file into a gzipped compressed file. File is replaced, original file is deleted (replaced by the gz version) | |
Create a role with S3 (read/write) and Cloudwatch Logs access : | |
{ | |
"Version": "2012-10-17", | |
"Statement": [ | |
{ | |
"Sid": "VisualEditor0", | |
"Effect": "Allow", | |
"Action": [ | |
"logs:CreateLogStream", | |
"s3:*", | |
"logs:PutLogEvents" | |
], | |
"Resource": "*" | |
}, | |
{ | |
"Sid": "VisualEditor1", | |
"Effect": "Allow", | |
"Action": "logs:CreateLogGroup", | |
"Resource": "*" | |
} | |
] | |
} | |
Install the Lambda in the region of the Bucket, Python 2.7, 1mn max execution time. | |
Change "settings.json" to add or remove extension you want to compress | |
Trigger is S3, PUT event (select the bucket where the lambda apply), output is S3 and Cloudwatch Logs. | |
How it works : | |
- on each PUT event (a new file is uploaded on the bucket), an event si sent to the lambda function (note : it doesnt work with a multipart upload). | |
- the lambda wake up, and analyze the incomming file | |
- read metadata of the incomming file | |
- if the file is have the "gzip" HTTP meta ContentEncoding header, it meens it is already compressed, so there is no need to recompress it | |
- if the file is too small (hard coded : 1024 byte) : no compression | |
- if the file is not a rucognized extension (see settings.json) : no compression | |
- if the file pass all this previous check, it is dowloaded locally (in /tmp) | |
- gzip the local vesion by using the local os "gzip" tool (could be improved by using the internal python gzip feature - TODO) | |
- overwrite the file in the bucket with the locally gzipped version | |
- update metadata with previous + ContentEncoding setted to "gzip" | |
- delete the locally gzipped version | |
''' | |
import json | |
import pprint | |
import boto3 | |
import botocore | |
import tempfile | |
import os | |
import subprocess | |
import fnmatch | |
def lambda_handler(event, context): | |
with open("settings.json") as json_data: | |
settings = json.load(json_data) | |
#print "EVENT :" | |
client = boto3.client('s3') | |
s3 = boto3.resource('s3') | |
for r in event.get('Records'): | |
# pprint.pprint(r) | |
bucketName = r.get('s3').get('bucket').get('name') | |
objectKey = r.get('s3').get('object').get('key') | |
etag = r.get('s3').get('object').get('eTag') | |
print "Retreiving object :" | |
print " bucketname = " + bucketName | |
print " objectKey = " + objectKey | |
uploadedMeta = client.head_object(Bucket=bucketName, Key=objectKey, IfMatch=etag) | |
contentEncoding = uploadedMeta.get('ContentEncoding', None) | |
size = uploadedMeta.get('ContentLength', 0) | |
print " Current encoding = " + str(contentEncoding) | |
print " Size = " + str(size) | |
if (contentEncoding == 'gzip'): | |
print(" ==> File is already compressed") | |
return True | |
match = False | |
for ext in settings['extensions']: | |
if fnmatch.fnmatch(objectKey, ext): | |
match = True | |
break | |
if (match == False): | |
print(" ==> File extension is not activated for compression. See settings.json") | |
return True | |
if (size < 1024): | |
print(" ==> File is too small to be compressed") | |
return True | |
tmp_in = tempfile.mkdtemp()+'.orig' | |
tmp_out = tmp_in + '.gz' # must be .gz because it is gzip default file creation | |
new_objectKey = objectKey + '.gz' # name in S3 | |
print("Download content to " + tmp_in + " and gzip it to " + tmp_out) | |
s3.Bucket(bucketName).download_file(objectKey, tmp_in) | |
print("GZipping file") | |
print subprocess.check_output(['gzip', '-v', '-f', '-9', tmp_in]) # gzip command create .gz file | |
statinfo = os.stat(tmp_out) | |
newsize = statinfo.st_size | |
print "New gzipped file = " + str(statinfo.st_size) | |
if (size - newsize < 1024 ): | |
print "Compression is not efficient, keep original file" | |
return True | |
print "Overwritting S3 file with gzipped version" | |
# Recreate metadata from original file (including http headers) | |
# Todo : keep original upload date | |
extraArgs = { | |
'ContentEncoding':"gzip" | |
} | |
for m in ['Metadata', 'CacheControl', 'ContentDisposition', 'ContentLanguage', 'ContentType', 'Expires']: | |
if (uploadedMeta.get(m, None) != None): | |
extraArgs[m] = uploadedMeta.get(m) | |
extraArgs['Metadata']['lambda'] = os.environ.get('AWS_LAMBDA_FUNCTION_NAME', '') | |
extraArgs['Metadata']['originak-size'] = str(size) | |
s3.Object(bucketName, objectKey).upload_file( | |
Filename=tmp_out, | |
ExtraArgs=extraArgs) | |
# remove local file | |
os.remove(tmp_out) | |
return { | |
'statusCode': 200, | |
'body': 'It works' | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment