Skip to content

Instantly share code, notes, and snippets.

@philschmid
Last active December 13, 2019 14:49
Show Gist options
  • Select an option

  • Save philschmid/d3ff61b4590ac6ff1ef340efb6d09b1c to your computer and use it in GitHub Desktop.

Select an option

Save philschmid/d3ff61b4590ac6ff1ef340efb6d09b1c to your computer and use it in GitHub Desktop.
yake_lambda
def get_string_from_dict(input_dict=''):
try:
result_string=''
# loops trough dict
for key, value in input_dict.items():
# checks if value is string then translates it
if(type(value) == str or type(value_text) == int):
result_string =f"{result_string} {value}."
# recusriv looping through nested
elif(type(value) == dict):
result_string =f"{result_string} {get_string_from_dict(input_dict=value)}."
# if type list -> checking if list element string then translate else recusriv looping
elif (type(value) == list):
result_string =f"{result_string} {iterate_list(value)}."
else:
raise ValueError('String Creation not possible')
return result_string
except Exception as e:
print(e)
raise(e)
def iterate_list(value='', rec_function=get_string_from_dict):
# temp list for returning
temp_string = ''
for value_text in value:
# checks if value is string then translates it
if(type(value_text) == str or type(value_text) == int):
temp_string =f"{temp_string} {value_text}."
# recusriv looping through nested
elif(type(value_text) == dict):
temp_string =f"{temp_string} {get_string_from_dict(input_dict=value_text)}."
elif (type(value_text) == list):
temp_string =f"{temp_string} {iterate_list(value_text)}."
else:
pass
return temp_string
# test={
# 'xx': 'bla stirng 1',
# 'xy': ['string2','string3'],
# 'xz': [{'x':'string4'}],
# 'xa': {'xb':'string5'}
# }
# print(get_string_from_dict(test))
from utils.update_insight import update_insight
from utils.get_key_entities import get_key_entities
from utils.predict import predict
import json
def extractor(event, context):
try:
print(event)
if'body' in event:
# if(test['Records'][0]['eventSource'] == 'aws:sqs'):
#
# get message Body
#
raw_message = json.loads(event['body'])
print(raw_message)
# get Key_words from Text
keyword_dict = predict(raw_message['english'])
# update Dynamo Item
update_insight(keywords=keyword_dict, key_dict={
'searchString#contentType': raw_message['searchString#contentType'], 'createdAt': raw_message['createdAt']})
print(entity_array)
print(key_entity_array)
else:
raise ValueError('Now message in Event')
except Exception as e:
print(e)
return True
import yake
#from get_string_from_dict import get_string_from_dict
from .get_string_from_dict import get_string_from_dict
language = "en"
max_ngram_size = 3
deduplication_thresold = 0.9
deduplication_algo = 'seqm'
windowSize = 1
numOfKeywords = 30
def predict(input_dict=''):
try:
# gets text from input dict
text=get_string_from_dict(input_dict)
# init model
custom_kw_extractor = yake.KeywordExtractor(lan=language, n=max_ngram_size, dedupLim=deduplication_thresold, dedupFunc=deduplication_algo, windowsSize=windowSize, top=numOfKeywords, features=None)
# predict
keywords = custom_kw_extractor.extract_keywords(text)
# transoform to dict array
keywords = [{'keyword': kw[1],'score':1-kw[0]} for kw in keywords]
# return
return keywords
except Exception as e:
print(e)
raise(e)
import boto3
import json
import decimal
import os
# Helper class to convert a DynamoDB item to JSON.
class DecimalEncoder(json.JSONEncoder):
def default(self, o):
if isinstance(o, decimal.Decimal):
if o % 1 > 0:
return float(o)
else:
return int(o)
return super(DecimalEncoder, self).default(o)
dynamodb = boto3.resource('dynamodb', aws_access_key_id=os.getenv('aws_access_key_id'),
aws_secret_access_key=os.getenv('aws_secret_access_key'), region_name=os.getenv('region'))
table = dynamodb.Table(f"{os.getenv('INSIGHTTABLE')}-{os.getenv('STAGE')}")
# table = dynamodb.Table("talos-insight-standalone-documents-qa")
def update_insight(key_words='', key_dict=''):
try:
x = table.update_item(
Key=key_dict,
UpdateExpression="set key_words = :r",
ExpressionAttributeValues={
':r': key_words,
},
ReturnValues="UPDATED_NEW"
)
print(x)
return True
except Exception as e:
print("type error: " + str(e))
return("type error: " + str(e))
# update_insight(enities=[{'type': 'Ner', 'value': 'Siemens', 'count': 2}, {
# 'type': 'PRG', 'value': 'Apple', 'count': 14}], key_entities=['Siemens', 'Apple'], key_dict={'searchString#contentType': 'LG#news', 'createdAt': '2019-11-22T14:48:00.000Z'})
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment