Created
July 10, 2019 06:49
-
-
Save SumindaD/ed034b2b0126efd6bd31e1a5e5225778 to your computer and use it in GitHub Desktop.
Text file S3 triggered lambda processes the file and creates the result in a new text file in same S3 location
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| import json | |
| import urllib.parse | |
| import boto3 | |
| import os | |
| print('Loading function') | |
| s3 = boto3.client('s3') | |
| comprehend = boto3.client('comprehendmedical') | |
| def executeComprehendService(text): | |
| comprehendText = '' | |
| entities = comprehend.detect_entities(Text=text) | |
| for entity in entities["Entities"]: | |
| comprehendText += "- {}".format(entity["Text"]) + '\n' | |
| comprehendText += " Type: {}".format(entity["Type"]) + '\n' | |
| comprehendText += " Category: {}".format(entity["Category"]) + '\n' | |
| if(entity["Traits"]): | |
| comprehendText += " Traits:" + '\n' | |
| for trait in entity["Traits"]: | |
| comprehendText += " - {}".format(trait["Name"]) + '\n' | |
| comprehendText += '\n' | |
| return comprehendText | |
| def writeComprehendMedicalTextToS3File(comprehendText, bucketName, textFileName): | |
| print('Loading writeComprehendMedicalTextToS3File') | |
| generateFilePath = 'Comprehend_' + os.path.splitext(textFileName)[0] + '.txt' | |
| s3.put_object(Body=comprehendText, Bucket=bucketName, Key=generateFilePath) | |
| print('Generated ' + generateFilePath) | |
| def lambda_handler(event, context): | |
| # Get the object from the event and show its content type | |
| bucket = event['Records'][0]['s3']['bucket']['name'] | |
| key = urllib.parse.unquote_plus(event['Records'][0]['s3']['object']['key'], encoding='utf-8') | |
| try: | |
| if('Comprehend_' not in key): | |
| textFile = s3.get_object(Bucket=bucket, Key=key) | |
| content = textFile['Body'].read().decode('utf-8') | |
| comprehendText = executeComprehendService(json.dumps(content)) | |
| writeComprehendMedicalTextToS3File(comprehendText, bucket, key) | |
| print('Processing Done!') | |
| except Exception as e: | |
| print(e) | |
| print('Error getting object {} from bucket {}. Make sure they exist and your bucket is in the same region as this function.'.format(key, bucket)) | |
| raise e |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment