Skip to content

Instantly share code, notes, and snippets.

@omundy
Created December 10, 2016 14:34
Show Gist options
  • Save omundy/df021bf3b817301e485a98d233f6ca68 to your computer and use it in GitHub Desktop.
Save omundy/df021bf3b817301e485a98d233f6ca68 to your computer and use it in GitHub Desktop.
NLTK POS Smell Tagging Examples
Display the source blob
Display the rendered blob
Raw
{
"cells": [
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## NLTK POS Smell Tagging Examples"
]
},
{
"cell_type": "raw",
"metadata": {},
"source": [
"POS tag reference:\n",
"https://www.ling.upenn.edu/courses/Fall_2003/ling001/penn_treebank_pos.html"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
"# initialize library\n",
"import nltk\n",
"nltk.data.path.append(\"/Users/owmundy/Documents/_code/Python/nltk_data\")"
]
},
{
"cell_type": "code",
"execution_count": 4,
"metadata": {
"collapsed": false
},
"outputs": [
{
"data": {
"text/plain": [
"[('My', 'PRP$'),\n",
" ('wife', 'NN'),\n",
" ('bought', 'VBD'),\n",
" ('maple', 'JJ'),\n",
" ('bacon', 'NN'),\n",
" ('hand', 'NN'),\n",
" ('sanitizer', 'NN'),\n",
" ('.', '.'),\n",
" ('I', 'PRP'),\n",
" ('am', 'VBP'),\n",
" ('in', 'IN'),\n",
" ('a', 'DT'),\n",
" ('meeting', 'NN'),\n",
" ('smelling', 'VBG'),\n",
" ('like', 'IN'),\n",
" ('a', 'DT'),\n",
" ('gay', 'NN'),\n",
" ('lumberjack', 'NN')]"
]
},
"execution_count": 4,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"# smell_word = VBG = Verb, gerund or present participle\n",
"# smell_object = maple bacon hand sanitizer\n",
"# smell_modifier = \n",
"# smell_smell = gay lumberjack | maple bacon hand sanitizer\n",
"# smell_sentiment = positive\n",
"s = \"My wife bought maple bacon hand sanitizer. I am in a meeting smelling like a gay lumberjack\"\n",
"tokens = nltk.word_tokenize(s) # tokenize words\n",
"tagged = nltk.pos_tag(tokens) # parts of speech tagging\n",
"tagged"
]
},
{
"cell_type": "code",
"execution_count": 6,
"metadata": {
"collapsed": false
},
"outputs": [
{
"data": {
"text/plain": [
"[('The', 'DT'),\n",
" ('smell', 'NN'),\n",
" ('of', 'IN'),\n",
" ('hot', 'JJ'),\n",
" ('apple', 'NN'),\n",
" ('cider', 'NN'),\n",
" ('makes', 'VBZ'),\n",
" ('me', 'PRP'),\n",
" ('so', 'IN'),\n",
" ('happy', 'JJ'),\n",
" ('#', '#'),\n",
" ('fall', 'NN')]"
]
},
"execution_count": 6,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"# smell_word = NN = Noun, singular or mass\n",
"# smell_object = hot apple cider\n",
"# smell_modifier = happy\n",
"# smell_smell = hot apple cider\n",
"# smell_sentiment = positive\n",
"s = \"The smell of hot apple cider makes me so happy #fall\"\n",
"tokens = nltk.word_tokenize(s) # tokenize words\n",
"tagged = nltk.pos_tag(tokens) # parts of speech tagging\n",
"tagged"
]
},
{
"cell_type": "code",
"execution_count": 7,
"metadata": {
"collapsed": false
},
"outputs": [
{
"data": {
"text/plain": [
"[('The', 'DT'),\n",
" ('first', 'JJ'),\n",
" ('rain', 'NN'),\n",
" ('always', 'RB'),\n",
" ('smells', 'VBZ'),\n",
" ('so', 'RB'),\n",
" ('heavenly', 'RB'),\n",
" ('?', '.')]"
]
},
"execution_count": 7,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"# smell_word = VBZ = Verb, 3rd person singular present\n",
"# smell_object = first rain\n",
"# smell_modifier = heavenly\n",
"# smell_smell = first rain\n",
"# smell_sentiment = positive\n",
"s = \"The first rain always smells so heavenly ?\"\n",
"tokens = nltk.word_tokenize(s) # tokenize words\n",
"tagged = nltk.pos_tag(tokens) # parts of speech tagging\n",
"tagged"
]
},
{
"cell_type": "code",
"execution_count": 8,
"metadata": {
"collapsed": false
},
"outputs": [
{
"data": {
"text/plain": [
"[('Confirmed', 'NNP'),\n",
" ('Structure', 'NNP'),\n",
" ('Fire', 'NNP'),\n",
" ('Ridaucrest', 'NNP'),\n",
" ('Tower', 'NNP'),\n",
" ('via', 'IN'),\n",
" ('Rideau', 'NNP'),\n",
" ('Street', 'NNP'),\n",
" ('.', '.'),\n",
" ('Reportedly', 'RB'),\n",
" ('on', 'IN'),\n",
" ('1st', 'CD'),\n",
" ('floor', 'NN'),\n",
" ('.', '.'),\n",
" ('Strong', 'NNP'),\n",
" ('smell', 'NN'),\n",
" ('of', 'IN'),\n",
" ('burnt', 'JJ'),\n",
" ('plastic/rubber', 'NN'),\n",
" ('.', '.'),\n",
" ('@', 'NN'),\n",
" ('YGKTraffic', 'NN')]"
]
},
"execution_count": 8,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"# smell_word = NN\n",
"# smell_object = Structure Fire \n",
"# smell_modifier = strong\n",
"# smell_smell = burnt plastic\n",
"# smell_sentiment = negative\n",
"s = \"Confirmed Structure Fire Ridaucrest Tower via Rideau Street. Reportedly on 1st floor. Strong smell of burnt plastic/rubber.@YGKTraffic\"\n",
"tokens = nltk.word_tokenize(s) # tokenize words\n",
"tagged = nltk.pos_tag(tokens) # parts of speech tagging\n",
"tagged"
]
},
{
"cell_type": "code",
"execution_count": 9,
"metadata": {
"collapsed": false
},
"outputs": [
{
"data": {
"text/plain": [
"[('I', 'PRP'), ('smell', 'VBP'), ('money', 'NN')]"
]
},
"execution_count": 9,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"# smell_word = VBP = Verb, non-3rd person singular present\n",
"# smell_object = money\n",
"# smell_modifier = \n",
"# smell_smell = \n",
"# smell_sentiment = \n",
"s = \"I smell money\"\n",
"tokens = nltk.word_tokenize(s) # tokenize words\n",
"tagged = nltk.pos_tag(tokens) # parts of speech tagging\n",
"tagged"
]
},
{
"cell_type": "code",
"execution_count": 10,
"metadata": {
"collapsed": false
},
"outputs": [
{
"data": {
"text/plain": [
"[('Can', 'MD'),\n",
" ('you', 'PRP'),\n",
" ('say', 'VB'),\n",
" ('insurance', 'NN'),\n",
" ('fraud', 'NN'),\n",
" ('?', '.'),\n",
" ('That', 'IN'),\n",
" ('what', 'WP'),\n",
" ('I', 'PRP'),\n",
" ('smell', 'VBP'),\n",
" ('https', 'NNS'),\n",
" (':', ':'),\n",
" ('//t.co/EuxLLYPqFN', 'NN')]"
]
},
"execution_count": 10,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"# smell_word = VBP\n",
"# smell_object = insurance fraud \n",
"# smell_modifier = \n",
"# smell_smell = \n",
"# smell_sentiment = negative\n",
"s = \"Can you say insurance fraud? That what I smell https://t.co/EuxLLYPqFN\"\n",
"tokens = nltk.word_tokenize(s) # tokenize words\n",
"tagged = nltk.pos_tag(tokens) # parts of speech tagging\n",
"tagged"
]
},
{
"cell_type": "code",
"execution_count": 11,
"metadata": {
"collapsed": false
},
"outputs": [
{
"data": {
"text/plain": [
"[('@', 'JJ'),\n",
" ('ThePME', 'NNP'),\n",
" ('missing', 'VBG'),\n",
" ('Browns', 'NNP'),\n",
" ('?', '.'),\n",
" ('They', 'PRP'),\n",
" ('stink', 'VBP'),\n",
" ('but', 'CC'),\n",
" ('running', 'VBG'),\n",
" ('the', 'DT'),\n",
" ('ball', 'NN'),\n",
" ('well', 'RB'),\n",
" ('.', '.')]"
]
},
"execution_count": 11,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"# smell_word = VBP\n",
"# smell_object = They / Browns\n",
"# smell_modifier = \n",
"# smell_smell = \n",
"# smell_sentiment = negative\n",
"s = \"@ThePME missing Browns? They stink but running the ball well.\"\n",
"tokens = nltk.word_tokenize(s) # tokenize words\n",
"tagged = nltk.pos_tag(tokens) # parts of speech tagging\n",
"tagged"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": []
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.5.2"
}
},
"nbformat": 4,
"nbformat_minor": 2
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment