Last active
February 19, 2024 16:10
-
-
Save ogallagher/5be9bfe5c1ef757cf4faccaac3dc7a55 to your computer and use it in GitHub Desktop.
Sandbox testing of natural language processing APIs
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
/** | |
* Test natural language APIs. | |
* - cloudmersive | |
* - detect language | |
* - tag parts of speech (POS) | |
*/ | |
import dotenv from 'dotenv' | |
import cm from 'cloudmersive-validate-api-client' | |
import cm_nlp from 'cloudmersive-nlp-api-client' | |
// npm list (dependencies) | |
// load credentials from .env | |
dotenv.config() | |
function env_get_or_default(key, default_val) { | |
return ( | |
process.env[key] != undefined | |
&& process.env[key] != '' | |
) | |
? process.env[key] | |
: default_val | |
} | |
function cloudmersive_api_validate() { | |
return new Promise((res, rej) => { | |
const cloudmersive_api_key = env_get_or_default('cloudmersive_api_key') | |
if (cloudmersive_api_key !== undefined) { | |
console.log(`debug cloudmersive api key = ${cloudmersive_api_key}`) | |
// auth | |
const cm_api_client = cm.ApiClient.instance | |
cm_api_client.authentications.Apikey.apiKey = cloudmersive_api_key | |
// domain check | |
const cm_api_domain = new cm.DomainApi() | |
cm_api_domain.domainCheck( | |
'cloudmersive.com', | |
(err, data) => { | |
if (err) { | |
rej(`failed to perform cloudmersive api domain check. ${err}`) | |
} | |
else { | |
console.log( | |
`info cloudmersive domain check passed. ${JSON.stringify(data, undefined, 2)}` | |
) | |
res(cloudmersive_api_key) | |
} | |
} | |
) | |
} | |
else { | |
rej(`cloudmersive api key missing`) | |
} | |
}) | |
} | |
function cloudmersive_api_nlp(api_key, [do_extract, do_detect_language, do_tag_pos]) { | |
let example_sentence = { | |
'eng': "my father said hello with a special message to me that I hadn't requested", | |
'spa': "mi padre me envió un mensaje diciendo hola que yo no le había pedido", | |
'kor': '우리 아버지는 특별한 인사로 "안녕하나" 라고 하는 제가 안 시킨 메시지를 제게 보냈어요' | |
} | |
return new Promise((res, rej) => { | |
// auth | |
const cm_api_nlp = cm_nlp.ApiClient.instance | |
cm_api_nlp.authentications.Apikey.apiKey = api_key | |
// extract named entities. TODO what does this do? | |
// example results | |
const extract_res = { | |
'eng': { | |
"Successful": true, | |
"Entities": [] | |
} | |
} | |
const cm_api_extract = new cm_nlp.ExtractEntitiesApi() | |
const extract_p = new Promise((res, rej) => { | |
if (!do_extract) { | |
res() | |
} | |
else { | |
cm_api_extract.extractEntitiesPost( | |
{ | |
'InputString': "my father said hello with a special message to me that I hadn't requested" | |
}, | |
(err, data) => { | |
if (err) { | |
rej(`failed to fetch from extract-entities ${err}`) | |
} | |
else { | |
console.log( | |
`info cloudmersive extract-entities fetch passed. ${JSON.stringify(data, undefined, 2)}` | |
) | |
res() | |
} | |
} | |
) | |
} | |
}) | |
// detect language. | |
// example results | |
const detect_language_res = { | |
'eng': { | |
"Successful": true, | |
"DetectedLanguage_ThreeLetterCode": "ENG", | |
"DetectedLanguage_FullName": "English" | |
}, | |
'spa': { | |
"Successful": true, | |
"DetectedLanguage_ThreeLetterCode": "SPA", | |
"DetectedLanguage_FullName": "Spanish" | |
} | |
} | |
const cm_api_detect_language = new cm_nlp.LanguageDetectionApi() | |
const detect_language_p = new Promise((res, rej) => { | |
if (!do_detect_language) { | |
res() | |
} | |
else { | |
cm_api_detect_language.languageDetectionGetLanguage( | |
{ | |
'textToDetect': example_sentence['eng'] | |
}, | |
(err, data) => { | |
if (err) { | |
rej(`failed to fetch from detect-language ${err}`) | |
} | |
else { | |
console.log( | |
`info cloudmersive detect-language fetch passed. ${JSON.stringify(data, undefined, 2)}` | |
) | |
res() | |
} | |
} | |
) | |
} | |
}) | |
// tag parts of speech. Not working for languages other than English. | |
// example results. POS tags follow [Penn Treebank](https://www.ling.upenn.edu/courses/Fall_2003/ling001/penn_treebank_pos.html) standard | |
const tag_pos_res = { | |
// looks good! | |
'eng': { | |
"TaggedSentences": [ | |
{ | |
"Words": [ | |
{ | |
"Word": "my", | |
"Tag": "PRP$" | |
}, | |
{ | |
"Word": "father", | |
"Tag": "NN" | |
}, | |
{ | |
"Word": "said", | |
"Tag": "VBD" | |
}, | |
{ | |
"Word": "hello", | |
"Tag": "UH" | |
}, | |
{ | |
"Word": "with", | |
"Tag": "IN" | |
}, | |
{ | |
"Word": "a", | |
"Tag": "DT" | |
}, | |
{ | |
"Word": "special", | |
"Tag": "JJ" | |
}, | |
{ | |
"Word": "message", | |
"Tag": "NN" | |
}, | |
{ | |
"Word": "to", | |
"Tag": "TO" | |
}, | |
{ | |
"Word": "me", | |
"Tag": "PRP" | |
}, | |
{ | |
"Word": "that", | |
"Tag": "IN" | |
}, | |
{ | |
"Word": "I", | |
"Tag": "PRP" | |
}, | |
{ | |
"Word": "had", | |
"Tag": "VBD" | |
}, | |
{ | |
"Word": "n't", | |
"Tag": "RB" | |
}, | |
{ | |
"Word": "requested", | |
"Tag": "VBN" | |
} | |
] | |
} | |
] | |
}, | |
// words parsed as FW="foreign word" | |
'spa': { | |
"TaggedSentences": [ | |
{ | |
"Words": [ | |
{ | |
"Word": "mi", | |
"Tag": "FW" | |
}, | |
{ | |
"Word": "padre", | |
"Tag": "FW" | |
}, | |
{ | |
"Word": "me", | |
"Tag": "FW" | |
}, | |
{ | |
"Word": "envió", | |
"Tag": "FW" | |
}, | |
{ | |
"Word": "un", | |
"Tag": "FW" | |
}, | |
{ | |
"Word": "mensaje", | |
"Tag": "FW" | |
}, | |
{ | |
"Word": "diciendo", | |
"Tag": "FW" | |
}, | |
{ | |
"Word": "hola", | |
"Tag": "FW" | |
}, | |
{ | |
"Word": "que", | |
"Tag": "FW" | |
}, | |
{ | |
"Word": "yo", | |
"Tag": "FW" | |
}, | |
{ | |
"Word": "no", | |
"Tag": "DT" | |
}, | |
{ | |
"Word": "le", | |
"Tag": "FW" | |
}, | |
{ | |
"Word": "había", | |
"Tag": "FW" | |
}, | |
{ | |
"Word": "pedido", | |
"Tag": "FW" | |
} | |
] | |
} | |
] | |
}, | |
// words parsed as FW="foreign word" | |
'kor': { | |
"TaggedSentences": [ | |
{ | |
"Words": [ | |
{ | |
"Word": "우리", | |
"Tag": "FW" | |
}, | |
{ | |
"Word": "아버지는", | |
"Tag": "FW" | |
}, | |
{ | |
"Word": "특별한", | |
"Tag": "FW" | |
}, | |
{ | |
"Word": "인사로", | |
"Tag": "FW" | |
}, | |
{ | |
"Word": "``", | |
"Tag": "``" | |
}, | |
{ | |
"Word": "안녕하나", | |
"Tag": "NN" | |
}, | |
{ | |
"Word": "''", | |
"Tag": "''" | |
}, | |
{ | |
"Word": "라고", | |
"Tag": "FW" | |
}, | |
{ | |
"Word": "하는", | |
"Tag": "FW" | |
}, | |
{ | |
"Word": "제가", | |
"Tag": "FW" | |
}, | |
{ | |
"Word": "안", | |
"Tag": "FW" | |
}, | |
{ | |
"Word": "시킨", | |
"Tag": "FW" | |
}, | |
{ | |
"Word": "메시지를", | |
"Tag": "FW" | |
}, | |
{ | |
"Word": "제게", | |
"Tag": "FW" | |
}, | |
{ | |
"Word": "보냈어요", | |
"Tag": "FW" | |
} | |
] | |
} | |
] | |
} | |
} | |
const cm_api_pos_tagger = new cm_nlp.PosTaggerApi() | |
const tag_pos_p = new Promise((res, rej) => { | |
if (!do_tag_pos) { | |
res() | |
} | |
else { | |
// posTaggerTagAdjectives: [Function (anonymous)] | |
// posTaggerTagAdverbs: [Function (anonymous)] | |
// posTaggerTagNouns: [Function (anonymous)] | |
// posTaggerTagPronouns: [Function (anonymous)] | |
// posTaggerTagSentence: [Function (anonymous)] | |
// posTaggerTagVerbs: [Function (anonymous)] | |
cm_api_pos_tagger.posTaggerTagSentence( | |
{ | |
'InputText': example_sentence['spa'] | |
// confirmed no other attributes (ex. language) supported for PosRequest | |
}, | |
(err, data) => { | |
if (err) { | |
rej(`failed to fetch from tag-pos ${err}`) | |
} | |
else { | |
console.log( | |
`info cloudmersive tag-pos fetch passed. ${JSON.stringify(data, undefined, 2)}` | |
) | |
res() | |
} | |
} | |
) | |
} | |
}) | |
return Promise.all([ | |
extract_p, | |
detect_language_p, | |
tag_pos_p | |
]) | |
}) | |
} | |
cloudmersive_api_validate() | |
.then( | |
(api_key) => { | |
return cloudmersive_api_nlp(api_key, [false, false, true]) | |
}, | |
(err) => { | |
console.log(`error ${err}`) | |
} | |
) | |
.then( | |
(msg) => { | |
console.log(`info ${msg}`) | |
}, | |
(err) => { | |
console.log(`error ${err}`) | |
} | |
) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment