Created
March 9, 2018 00:08
-
-
Save jastang/2f443ce989c58ec3d34c44a5b96e6a86 to your computer and use it in GitHub Desktop.
Divide large S3 objects for preprocessing
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
while end_byte <= objectsize: | |
# Ensure the end_byte is a carriage return, so the line splits work. | |
end_byte = scan_to_eol(end_byte, objectsize, bucket, key) | |
# Invoke the cleaning function | |
ctx = { | |
"start_byte": start_byte, | |
"end_byte": end_byte, | |
"bucket": bucket, | |
"key": key | |
} | |
aws_lambda.invoke( | |
FunctionName='CleanS3Object', | |
InvocationType='Event', | |
LogType='Tail', | |
Payload=json.dumps(ctx) | |
) | |
start_byte = end_byte + 1 | |
if end_byte == objectsize: | |
break | |
elif end_byte + chunksize > objectsize: | |
end_byte = objectsize | |
else: | |
end_byte = end_byte + chunksize |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment