Skip to content

Instantly share code, notes, and snippets.

@SumindaD
Created July 29, 2019 11:11
Show Gist options
  • Save SumindaD/1133c30252b06ee48b28422c7ca25909 to your computer and use it in GitHub Desktop.
Save SumindaD/1133c30252b06ee48b28422c7ca25909 to your computer and use it in GitHub Desktop.
import json
import urllib.parse
import boto3
import os
def handle(event, context):
print("Triggered getTextFromS3PDF event: " + json.dumps(event, indent=2))
# Get the object from the event and show its content type
bucket = event['Records'][0]['s3']['bucket']['name']
key = urllib.parse.unquote_plus(event['Records'][0]['s3']['object']['key'], encoding='utf-8')
try:
textract = boto3.client('textract')
textract.start_document_text_detection(
DocumentLocation={
'S3Object': {
'Bucket': bucket,
'Name': key
}
},
JobTag=key + '_Job',
NotificationChannel={
'RoleArn': os.environ['LAMBDA_ROLE_ARN'],
'SNSTopicArn': os.environ['PDF_JOB_SNS_TOPIC_ARN']
})
print('Triggered PDF Processing for ' + key)
except Exception as e:
print(e)
print('Error getting object {} from bucket {}. Make sure they exist and your bucket is in the same region as this function.'.format(key, bucket))
raise e
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment