Last active
December 13, 2019 14:49
-
-
Save philschmid/d3ff61b4590ac6ff1ef340efb6d09b1c to your computer and use it in GitHub Desktop.
yake_lambda
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| def get_string_from_dict(input_dict=''): | |
| try: | |
| result_string='' | |
| # loops trough dict | |
| for key, value in input_dict.items(): | |
| # checks if value is string then translates it | |
| if(type(value) == str or type(value_text) == int): | |
| result_string =f"{result_string} {value}." | |
| # recusriv looping through nested | |
| elif(type(value) == dict): | |
| result_string =f"{result_string} {get_string_from_dict(input_dict=value)}." | |
| # if type list -> checking if list element string then translate else recusriv looping | |
| elif (type(value) == list): | |
| result_string =f"{result_string} {iterate_list(value)}." | |
| else: | |
| raise ValueError('String Creation not possible') | |
| return result_string | |
| except Exception as e: | |
| print(e) | |
| raise(e) | |
| def iterate_list(value='', rec_function=get_string_from_dict): | |
| # temp list for returning | |
| temp_string = '' | |
| for value_text in value: | |
| # checks if value is string then translates it | |
| if(type(value_text) == str or type(value_text) == int): | |
| temp_string =f"{temp_string} {value_text}." | |
| # recusriv looping through nested | |
| elif(type(value_text) == dict): | |
| temp_string =f"{temp_string} {get_string_from_dict(input_dict=value_text)}." | |
| elif (type(value_text) == list): | |
| temp_string =f"{temp_string} {iterate_list(value_text)}." | |
| else: | |
| pass | |
| return temp_string | |
| # test={ | |
| # 'xx': 'bla stirng 1', | |
| # 'xy': ['string2','string3'], | |
| # 'xz': [{'x':'string4'}], | |
| # 'xa': {'xb':'string5'} | |
| # } | |
| # print(get_string_from_dict(test)) | |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| from utils.update_insight import update_insight | |
| from utils.get_key_entities import get_key_entities | |
| from utils.predict import predict | |
| import json | |
| def extractor(event, context): | |
| try: | |
| print(event) | |
| if'body' in event: | |
| # if(test['Records'][0]['eventSource'] == 'aws:sqs'): | |
| # | |
| # get message Body | |
| # | |
| raw_message = json.loads(event['body']) | |
| print(raw_message) | |
| # get Key_words from Text | |
| keyword_dict = predict(raw_message['english']) | |
| # update Dynamo Item | |
| update_insight(keywords=keyword_dict, key_dict={ | |
| 'searchString#contentType': raw_message['searchString#contentType'], 'createdAt': raw_message['createdAt']}) | |
| print(entity_array) | |
| print(key_entity_array) | |
| else: | |
| raise ValueError('Now message in Event') | |
| except Exception as e: | |
| print(e) | |
| return True |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| import yake | |
| #from get_string_from_dict import get_string_from_dict | |
| from .get_string_from_dict import get_string_from_dict | |
| language = "en" | |
| max_ngram_size = 3 | |
| deduplication_thresold = 0.9 | |
| deduplication_algo = 'seqm' | |
| windowSize = 1 | |
| numOfKeywords = 30 | |
| def predict(input_dict=''): | |
| try: | |
| # gets text from input dict | |
| text=get_string_from_dict(input_dict) | |
| # init model | |
| custom_kw_extractor = yake.KeywordExtractor(lan=language, n=max_ngram_size, dedupLim=deduplication_thresold, dedupFunc=deduplication_algo, windowsSize=windowSize, top=numOfKeywords, features=None) | |
| # predict | |
| keywords = custom_kw_extractor.extract_keywords(text) | |
| # transoform to dict array | |
| keywords = [{'keyword': kw[1],'score':1-kw[0]} for kw in keywords] | |
| # return | |
| return keywords | |
| except Exception as e: | |
| print(e) | |
| raise(e) |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| import boto3 | |
| import json | |
| import decimal | |
| import os | |
| # Helper class to convert a DynamoDB item to JSON. | |
| class DecimalEncoder(json.JSONEncoder): | |
| def default(self, o): | |
| if isinstance(o, decimal.Decimal): | |
| if o % 1 > 0: | |
| return float(o) | |
| else: | |
| return int(o) | |
| return super(DecimalEncoder, self).default(o) | |
| dynamodb = boto3.resource('dynamodb', aws_access_key_id=os.getenv('aws_access_key_id'), | |
| aws_secret_access_key=os.getenv('aws_secret_access_key'), region_name=os.getenv('region')) | |
| table = dynamodb.Table(f"{os.getenv('INSIGHTTABLE')}-{os.getenv('STAGE')}") | |
| # table = dynamodb.Table("talos-insight-standalone-documents-qa") | |
| def update_insight(key_words='', key_dict=''): | |
| try: | |
| x = table.update_item( | |
| Key=key_dict, | |
| UpdateExpression="set key_words = :r", | |
| ExpressionAttributeValues={ | |
| ':r': key_words, | |
| }, | |
| ReturnValues="UPDATED_NEW" | |
| ) | |
| print(x) | |
| return True | |
| except Exception as e: | |
| print("type error: " + str(e)) | |
| return("type error: " + str(e)) | |
| # update_insight(enities=[{'type': 'Ner', 'value': 'Siemens', 'count': 2}, { | |
| # 'type': 'PRG', 'value': 'Apple', 'count': 14}], key_entities=['Siemens', 'Apple'], key_dict={'searchString#contentType': 'LG#news', 'createdAt': '2019-11-22T14:48:00.000Z'}) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment