Last active
December 13, 2019 12:30
-
-
Save philschmid/bd6bfa8888d7d9d7c54c0c7e76dec550 to your computer and use it in GitHub Desktop.
Multilabel categorizing talos mit gc nlp api and aws lambda
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
// Imports the Google Cloud client library | |
const language = require('@google-cloud/language'); | |
// Creates a client | |
const client = new language.LanguageServiceClient(); | |
const categorizeText = async(document:string) => { | |
// Prepares a document, representing the provided text | |
const document = { | |
content: document, | |
type: 'PLAIN_TEXT', | |
}; | |
// Classifies text in the document | |
const [classification] = await client.classifyText({document}); | |
classification.categories.forEach(category => { | |
console.log(`Name: ${category.name}, Confidence: ${category.confidence}`); | |
}); | |
return classification.categories | |
} |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
export const createStringFromMessage = async (message: Object) => { | |
try { | |
const promises = Object.values(message).map(async (attribute: string | Object | Array<Object | string>) => { | |
if (typeof attribute === 'string' || typeof attribute === 'number') { | |
return attribute; | |
} else if (typeof attribute === 'object') { | |
if (Array.isArray(attribute)) { | |
return await createStringFromArray(attribute); | |
} else { | |
return await createStringFromMessage(attribute); | |
} | |
} else { | |
// throw new Error(`Error: Wrong Message input ${JSON.stringify(message)}`); | |
return ''; | |
} | |
}); | |
return Promise.all(promises).then(function(promiseResults) { | |
return promiseResults.join('. '); | |
}); | |
} catch (error) { | |
console.error(error); | |
throw error; | |
} | |
}; | |
const createStringFromArray = async (inputArray: Array<any>) => { | |
try { | |
const promises = inputArray.map(async (attribute: string | Object | Array<Object | string>) => { | |
if (typeof attribute === 'string' || typeof attribute === 'number') { | |
return attribute; | |
} else if (typeof attribute === 'object') { | |
if (Array.isArray(attribute)) { | |
return await createStringFromArray(attribute); | |
} else { | |
return await createStringFromMessage(attribute); | |
} | |
} else { | |
// throw new Error(`Error: Wrong Message input ${JSON.stringify(message)}`); | |
return ''; | |
} | |
}); | |
return Promise.all(promises).then(function(promiseResults) { | |
return promiseResults.join('. '); | |
}); | |
} catch (error) { | |
console.error(error); | |
throw error; | |
} | |
}; |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
{ | |
"Records": [ | |
{ | |
"messageId": "2e1424d4-f796-459a-8184-9c92662be6da", | |
"receiptHandle": "AQEBzWwaftRI0KuVm4tP+/7q1rGgNqicHq...", | |
"body": "test", | |
"attributes": { | |
"ApproximateReceiveCount": "1", | |
"SentTimestamp": "1545082650636", | |
"SenderId": "AIDAIENQZJOLO23YVJ4VO", | |
"ApproximateFirstReceiveTimestamp": "1545082650649" | |
}, | |
"messageAttributes": {}, | |
"md5OfBody": "098f6bcd4621d373cade4e832627b4f6", | |
"eventSource": "aws:sqs", | |
"eventSourceARN": "arn:aws:sqs:us-east-2:123456789012:my-queue", | |
"awsRegion": "us-east-2" | |
} | |
] | |
} |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import {categorizeText} from './utils/categorizeText' | |
import {createStringFromMessage} from './utils/createStringFromMessage' | |
import {Insight} from './models/insight' | |
/** @format */ | |
const event = { | |
"Records": [ | |
{ | |
"messageId": "2e1424d4-f796-459a-8184-9c92662be6da", | |
"body": "test", | |
"eventSource": "aws:sqs", | |
} | |
] | |
} | |
module.exports.handler = async (x, context) => { | |
try { | |
// | |
// checks if the event is a s3 trigger and after gets json from s3 | |
// | |
const rawMessage = event.Records[0].eventSource === 'aws:sqs' | |
? JSON.parse(event.Records[0].body) | |
: new Error(`Error in sqs trigger Message ${event}`) | |
// | |
// creates One String from document for categorizeText | |
// | |
const stringMessage:string = await createStringFromMessage(rawMessage.english) | |
// | |
// creates Insight Document with all meta data | |
// | |
const categories:string[] = await categorizeText(stringMessage) | |
// | |
// creates Dynamo object from Insight Document | |
// | |
await updateInsight({'searchString#contentType':`${rawMessage.searchString}#${rawMessage.contentType}`,createdAt:rawMessage.createdAt},{categories}) | |
// | |
// finishes function | |
// | |
return `Successfully processed sqs ${event.Records[0].messageId} message.` | |
} catch (error) { | |
console.log(error) | |
return `Successfully processed sqs ${event.Records[0].messageId} message.` | |
} | |
} | |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import {attribute, table, hashKey, rangeKey} from '@aws/dynamodb-data-mapper-annotations'; | |
@table(`${process.env.DYNAMO_TABLE}`) | |
export class Insight { | |
@hashKey() | |
'searchString#contentType': string; | |
@attribute() | |
categories?: string[]; | |
@rangeKey() | |
createdAt: string; | |
} |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import {Insight} from '../models/insight'; | |
import {mapper} from '../config/datamapper'; | |
export const updateInsight = (key: Insight, payload: {categories: string[]}) => { | |
return new Promise(async (resolve, reject) => { | |
const myItem = await mapper.get(Object.assign(new Insight(), key)); | |
await mapper | |
.update({...myItem, ...payload}) | |
.then(objectSaved => { | |
resolve(true); | |
}) | |
.catch(err => { | |
reject(err); | |
}); | |
}); | |
}; |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment