Skip to content

Instantly share code, notes, and snippets.

@SumindaD
Created July 10, 2019 06:49
Show Gist options
  • Select an option

  • Save SumindaD/ed034b2b0126efd6bd31e1a5e5225778 to your computer and use it in GitHub Desktop.

Select an option

Save SumindaD/ed034b2b0126efd6bd31e1a5e5225778 to your computer and use it in GitHub Desktop.
Text file S3 triggered lambda processes the file and creates the result in a new text file in same S3 location
import json
import urllib.parse
import boto3
import os
print('Loading function')
s3 = boto3.client('s3')
comprehend = boto3.client('comprehendmedical')
def executeComprehendService(text):
comprehendText = ''
entities = comprehend.detect_entities(Text=text)
for entity in entities["Entities"]:
comprehendText += "- {}".format(entity["Text"]) + '\n'
comprehendText += " Type: {}".format(entity["Type"]) + '\n'
comprehendText += " Category: {}".format(entity["Category"]) + '\n'
if(entity["Traits"]):
comprehendText += " Traits:" + '\n'
for trait in entity["Traits"]:
comprehendText += " - {}".format(trait["Name"]) + '\n'
comprehendText += '\n'
return comprehendText
def writeComprehendMedicalTextToS3File(comprehendText, bucketName, textFileName):
print('Loading writeComprehendMedicalTextToS3File')
generateFilePath = 'Comprehend_' + os.path.splitext(textFileName)[0] + '.txt'
s3.put_object(Body=comprehendText, Bucket=bucketName, Key=generateFilePath)
print('Generated ' + generateFilePath)
def lambda_handler(event, context):
# Get the object from the event and show its content type
bucket = event['Records'][0]['s3']['bucket']['name']
key = urllib.parse.unquote_plus(event['Records'][0]['s3']['object']['key'], encoding='utf-8')
try:
if('Comprehend_' not in key):
textFile = s3.get_object(Bucket=bucket, Key=key)
content = textFile['Body'].read().decode('utf-8')
comprehendText = executeComprehendService(json.dumps(content))
writeComprehendMedicalTextToS3File(comprehendText, bucket, key)
print('Processing Done!')
except Exception as e:
print(e)
print('Error getting object {} from bucket {}. Make sure they exist and your bucket is in the same region as this function.'.format(key, bucket))
raise e
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment