Created
June 8, 2021 17:58
-
-
Save andymithamclarke/0b63f95d092e3b8682e8bd788ff8b6ef to your computer and use it in GitHub Desktop.
NLP Steps Added to VAERS Study Project
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# Configure English as language of text | |
make_constant(ds["Symptom Description"], { | |
"value": "en", | |
"out_type": "category" | |
}) -> (ds.lang) | |
# Parse and extract ADJECTIVES from SYMPTOM DESCRIPTION column. | |
extract_keywords(ds["Symptom Description"], ds.lang, { | |
"keywords": { | |
"pos_tags": [ | |
"ADJ" | |
], | |
"entities": false, | |
"noun_phrases": false | |
}, | |
"extended_language_support": false | |
}) -> (ds["Symptom Description - Adjectives"]) | |
# Parse and extract NOUNS from SYMPTOM DESCRIPTION column. | |
extract_keywords(ds["Symptom Description"], ds.lang, { | |
"keywords": { | |
"pos_tags": [ | |
"NOUN" | |
], | |
"entities": false, | |
"noun_phrases": false | |
}, | |
"extended_language_support": false | |
}) -> (ds["Symptom Description - Nouns"]) | |
# Parse all text columns and extract their ngrams. | |
extract_ngrams(ds["Symptom Description"], | |
ds.lang) -> (ds["Symptom Description - Significant terms"]) | |
extract_ngrams(ds.Allergies, | |
ds.lang) -> (ds["Allergies - Significant terms"]) | |
extract_ngrams(ds["Prior Vaccinations"], | |
ds.lang) -> (ds["Prior Vaccinations - Significant terms"]) | |
extract_ngrams(ds["Medical History Notes"], | |
ds.lang) -> (ds["Medical History Notes - Significant terms"]) | |
extract_ngrams(ds["Other Medication"], | |
ds.lang) -> (ds["Other Medication - Significant terms"]) | |
extract_ngrams(ds["Current Illnesses"], | |
ds.lang) -> (ds["Current Illnesses - Significant terms"]) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment