omundy · December 10, 2016 14:34
diff --git a/NLTK POS Tagging.ipynb b/NLTK POS Tagging.ipynb
 {
 "cells": [
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## NLTK POS Smell Tagging Examples"
   ]
  },
  {
   "cell_type": "raw",
   "metadata": {},
   "source": [
    "POS tag reference:\n",
    "https://www.ling.upenn.edu/courses/Fall_2003/ling001/penn_treebank_pos.html"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "# initialize library\n",
    "import nltk\n",
    "nltk.data.path.append(\"/Users/owmundy/Documents/_code/Python/nltk_data\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "[('My', 'PRP$'),\n",
       " ('wife', 'NN'),\n",
       " ('bought', 'VBD'),\n",
       " ('maple', 'JJ'),\n",
       " ('bacon', 'NN'),\n",
       " ('hand', 'NN'),\n",
       " ('sanitizer', 'NN'),\n",
       " ('.', '.'),\n",
       " ('I', 'PRP'),\n",
       " ('am', 'VBP'),\n",
       " ('in', 'IN'),\n",
       " ('a', 'DT'),\n",
       " ('meeting', 'NN'),\n",
       " ('smelling', 'VBG'),\n",
       " ('like', 'IN'),\n",
       " ('a', 'DT'),\n",
       " ('gay', 'NN'),\n",
       " ('lumberjack', 'NN')]"
      ]
     },
     "execution_count": 4,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# smell_word = VBG = Verb, gerund or present participle\n",
    "# smell_object = maple bacon hand sanitizer\n",
    "# smell_modifier = \n",
    "# smell_smell = gay lumberjack | maple bacon hand sanitizer\n",
    "# smell_sentiment = positive\n",
    "s = \"My wife bought maple bacon hand sanitizer. I am in a meeting smelling like a gay lumberjack\"\n",
    "tokens = nltk.word_tokenize(s) # tokenize words\n",
    "tagged = nltk.pos_tag(tokens) # parts of speech tagging\n",
    "tagged"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "[('The', 'DT'),\n",
       " ('smell', 'NN'),\n",
       " ('of', 'IN'),\n",
       " ('hot', 'JJ'),\n",
       " ('apple', 'NN'),\n",
       " ('cider', 'NN'),\n",
       " ('makes', 'VBZ'),\n",
       " ('me', 'PRP'),\n",
       " ('so', 'IN'),\n",
       " ('happy', 'JJ'),\n",
       " ('#', '#'),\n",
       " ('fall', 'NN')]"
      ]
     },
     "execution_count": 6,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# smell_word = NN = Noun, singular or mass\n",
    "# smell_object = hot apple cider\n",
    "# smell_modifier = happy\n",
    "# smell_smell = hot apple cider\n",
    "# smell_sentiment = positive\n",
    "s = \"The smell of hot apple cider makes me so happy #fall\"\n",
    "tokens = nltk.word_tokenize(s) # tokenize words\n",
    "tagged = nltk.pos_tag(tokens) # parts of speech tagging\n",
    "tagged"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 7,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "[('The', 'DT'),\n",
       " ('first', 'JJ'),\n",
       " ('rain', 'NN'),\n",
       " ('always', 'RB'),\n",
       " ('smells', 'VBZ'),\n",
       " ('so', 'RB'),\n",
       " ('heavenly', 'RB'),\n",
       " ('?', '.')]"
      ]
     },
     "execution_count": 7,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# smell_word = VBZ = Verb, 3rd person singular present\n",
    "# smell_object = first rain\n",
    "# smell_modifier = heavenly\n",
    "# smell_smell = first rain\n",
    "# smell_sentiment = positive\n",
    "s = \"The first rain always smells so heavenly ?\"\n",
    "tokens = nltk.word_tokenize(s) # tokenize words\n",
    "tagged = nltk.pos_tag(tokens) # parts of speech tagging\n",
    "tagged"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 8,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "[('Confirmed', 'NNP'),\n",
       " ('Structure', 'NNP'),\n",
       " ('Fire', 'NNP'),\n",
       " ('Ridaucrest', 'NNP'),\n",
       " ('Tower', 'NNP'),\n",
       " ('via', 'IN'),\n",
       " ('Rideau', 'NNP'),\n",
       " ('Street', 'NNP'),\n",
       " ('.', '.'),\n",
       " ('Reportedly', 'RB'),\n",
       " ('on', 'IN'),\n",
       " ('1st', 'CD'),\n",
       " ('floor', 'NN'),\n",
       " ('.', '.'),\n",
       " ('Strong', 'NNP'),\n",
       " ('smell', 'NN'),\n",
       " ('of', 'IN'),\n",
       " ('burnt', 'JJ'),\n",
       " ('plastic/rubber', 'NN'),\n",
       " ('.', '.'),\n",
       " ('@', 'NN'),\n",
       " ('YGKTraffic', 'NN')]"
      ]
     },
     "execution_count": 8,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# smell_word = NN\n",
    "# smell_object = Structure Fire \n",
    "# smell_modifier = strong\n",
    "# smell_smell = burnt plastic\n",
    "# smell_sentiment = negative\n",
    "s = \"Confirmed Structure Fire Ridaucrest Tower via Rideau Street. Reportedly on 1st floor. Strong smell of burnt plastic/rubber.@YGKTraffic\"\n",
    "tokens = nltk.word_tokenize(s) # tokenize words\n",
    "tagged = nltk.pos_tag(tokens) # parts of speech tagging\n",
    "tagged"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 9,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "[('I', 'PRP'), ('smell', 'VBP'), ('money', 'NN')]"
      ]
     },
     "execution_count": 9,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# smell_word = VBP = Verb, non-3rd person singular present\n",
    "# smell_object = money\n",
    "# smell_modifier = \n",
    "# smell_smell = \n",
    "# smell_sentiment = \n",
    "s = \"I smell money\"\n",
    "tokens = nltk.word_tokenize(s) # tokenize words\n",
    "tagged = nltk.pos_tag(tokens) # parts of speech tagging\n",
    "tagged"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 10,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "[('Can', 'MD'),\n",
       " ('you', 'PRP'),\n",
       " ('say', 'VB'),\n",
       " ('insurance', 'NN'),\n",
       " ('fraud', 'NN'),\n",
       " ('?', '.'),\n",
       " ('That', 'IN'),\n",
       " ('what', 'WP'),\n",
       " ('I', 'PRP'),\n",
       " ('smell', 'VBP'),\n",
       " ('https', 'NNS'),\n",
       " (':', ':'),\n",
       " ('//t.co/EuxLLYPqFN', 'NN')]"
      ]
     },
     "execution_count": 10,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# smell_word = VBP\n",
    "# smell_object = insurance fraud \n",
    "# smell_modifier = \n",
    "# smell_smell = \n",
    "# smell_sentiment = negative\n",
    "s = \"Can you say insurance fraud? That what I smell https://t.co/EuxLLYPqFN\"\n",
    "tokens = nltk.word_tokenize(s) # tokenize words\n",
    "tagged = nltk.pos_tag(tokens) # parts of speech tagging\n",
    "tagged"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 11,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "[('@', 'JJ'),\n",
       " ('ThePME', 'NNP'),\n",
       " ('missing', 'VBG'),\n",
       " ('Browns', 'NNP'),\n",
       " ('?', '.'),\n",
       " ('They', 'PRP'),\n",
       " ('stink', 'VBP'),\n",
       " ('but', 'CC'),\n",
       " ('running', 'VBG'),\n",
       " ('the', 'DT'),\n",
       " ('ball', 'NN'),\n",
       " ('well', 'RB'),\n",
       " ('.', '.')]"
      ]
     },
     "execution_count": 11,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# smell_word = VBP\n",
    "# smell_object = They / Browns\n",
    "# smell_modifier = \n",
    "# smell_smell = \n",
    "# smell_sentiment = negative\n",
    "s = \"@ThePME missing Browns?  They stink but running the ball well.\"\n",
    "tokens = nltk.word_tokenize(s) # tokenize words\n",
    "tagged = nltk.pos_tag(tokens) # parts of speech tagging\n",
    "tagged"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.5.2"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
 }
	{
	"cells": [
	{
	"cell_type": "markdown",
	"metadata": {},
	"source": [
	"## NLTK POS Smell Tagging Examples"
	]
	},
	{
	"cell_type": "raw",
	"metadata": {},
	"source": [
	"POS tag reference:\n",
	"https://www.ling.upenn.edu/courses/Fall_2003/ling001/penn_treebank_pos.html"
	]
	},
	{
	"cell_type": "code",
	"execution_count": null,
	"metadata": {
	"collapsed": true
	},
	"outputs": [],
	"source": [
	"# initialize library\n",
	"import nltk\n",
	"nltk.data.path.append(\"/Users/owmundy/Documents/_code/Python/nltk_data\")"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 4,
	"metadata": {
	"collapsed": false
	},
	"outputs": [
	{
	"data": {
	"text/plain": [
	"[('My', 'PRP$'),\n",
	" ('wife', 'NN'),\n",
	" ('bought', 'VBD'),\n",
	" ('maple', 'JJ'),\n",
	" ('bacon', 'NN'),\n",
	" ('hand', 'NN'),\n",
	" ('sanitizer', 'NN'),\n",
	" ('.', '.'),\n",
	" ('I', 'PRP'),\n",
	" ('am', 'VBP'),\n",
	" ('in', 'IN'),\n",
	" ('a', 'DT'),\n",
	" ('meeting', 'NN'),\n",
	" ('smelling', 'VBG'),\n",
	" ('like', 'IN'),\n",
	" ('a', 'DT'),\n",
	" ('gay', 'NN'),\n",
	" ('lumberjack', 'NN')]"
	]
	},
	"execution_count": 4,
	"metadata": {},
	"output_type": "execute_result"
	}
	],
	"source": [
	"# smell_word = VBG = Verb, gerund or present participle\n",
	"# smell_object = maple bacon hand sanitizer\n",
	"# smell_modifier = \n",
	"# smell_smell = gay lumberjack \| maple bacon hand sanitizer\n",
	"# smell_sentiment = positive\n",
	"s = \"My wife bought maple bacon hand sanitizer. I am in a meeting smelling like a gay lumberjack\"\n",
	"tokens = nltk.word_tokenize(s) # tokenize words\n",
	"tagged = nltk.pos_tag(tokens) # parts of speech tagging\n",
	"tagged"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 6,
	"metadata": {
	"collapsed": false
	},
	"outputs": [
	{
	"data": {
	"text/plain": [
	"[('The', 'DT'),\n",
	" ('smell', 'NN'),\n",
	" ('of', 'IN'),\n",
	" ('hot', 'JJ'),\n",
	" ('apple', 'NN'),\n",
	" ('cider', 'NN'),\n",
	" ('makes', 'VBZ'),\n",
	" ('me', 'PRP'),\n",
	" ('so', 'IN'),\n",
	" ('happy', 'JJ'),\n",
	" ('#', '#'),\n",
	" ('fall', 'NN')]"
	]
	},
	"execution_count": 6,
	"metadata": {},
	"output_type": "execute_result"
	}
	],
	"source": [
	"# smell_word = NN = Noun, singular or mass\n",
	"# smell_object = hot apple cider\n",
	"# smell_modifier = happy\n",
	"# smell_smell = hot apple cider\n",
	"# smell_sentiment = positive\n",
	"s = \"The smell of hot apple cider makes me so happy #fall\"\n",
	"tokens = nltk.word_tokenize(s) # tokenize words\n",
	"tagged = nltk.pos_tag(tokens) # parts of speech tagging\n",
	"tagged"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 7,
	"metadata": {
	"collapsed": false
	},
	"outputs": [
	{
	"data": {
	"text/plain": [
	"[('The', 'DT'),\n",
	" ('first', 'JJ'),\n",
	" ('rain', 'NN'),\n",
	" ('always', 'RB'),\n",
	" ('smells', 'VBZ'),\n",
	" ('so', 'RB'),\n",
	" ('heavenly', 'RB'),\n",
	" ('?', '.')]"
	]
	},
	"execution_count": 7,
	"metadata": {},
	"output_type": "execute_result"
	}
	],
	"source": [
	"# smell_word = VBZ = Verb, 3rd person singular present\n",
	"# smell_object = first rain\n",
	"# smell_modifier = heavenly\n",
	"# smell_smell = first rain\n",
	"# smell_sentiment = positive\n",
	"s = \"The first rain always smells so heavenly ?\"\n",
	"tokens = nltk.word_tokenize(s) # tokenize words\n",
	"tagged = nltk.pos_tag(tokens) # parts of speech tagging\n",
	"tagged"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 8,
	"metadata": {
	"collapsed": false
	},
	"outputs": [
	{
	"data": {
	"text/plain": [
	"[('Confirmed', 'NNP'),\n",
	" ('Structure', 'NNP'),\n",
	" ('Fire', 'NNP'),\n",
	" ('Ridaucrest', 'NNP'),\n",
	" ('Tower', 'NNP'),\n",
	" ('via', 'IN'),\n",
	" ('Rideau', 'NNP'),\n",
	" ('Street', 'NNP'),\n",
	" ('.', '.'),\n",
	" ('Reportedly', 'RB'),\n",
	" ('on', 'IN'),\n",
	" ('1st', 'CD'),\n",
	" ('floor', 'NN'),\n",
	" ('.', '.'),\n",
	" ('Strong', 'NNP'),\n",
	" ('smell', 'NN'),\n",
	" ('of', 'IN'),\n",
	" ('burnt', 'JJ'),\n",
	" ('plastic/rubber', 'NN'),\n",
	" ('.', '.'),\n",
	" ('@', 'NN'),\n",
	" ('YGKTraffic', 'NN')]"
	]
	},
	"execution_count": 8,
	"metadata": {},
	"output_type": "execute_result"
	}
	],
	"source": [
	"# smell_word = NN\n",
	"# smell_object = Structure Fire \n",
	"# smell_modifier = strong\n",
	"# smell_smell = burnt plastic\n",
	"# smell_sentiment = negative\n",
	"s = \"Confirmed Structure Fire Ridaucrest Tower via Rideau Street. Reportedly on 1st floor. Strong smell of burnt plastic/rubber.@YGKTraffic\"\n",
	"tokens = nltk.word_tokenize(s) # tokenize words\n",
	"tagged = nltk.pos_tag(tokens) # parts of speech tagging\n",
	"tagged"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 9,
	"metadata": {
	"collapsed": false
	},
	"outputs": [
	{
	"data": {
	"text/plain": [
	"[('I', 'PRP'), ('smell', 'VBP'), ('money', 'NN')]"
	]
	},
	"execution_count": 9,
	"metadata": {},
	"output_type": "execute_result"
	}
	],
	"source": [
	"# smell_word = VBP = Verb, non-3rd person singular present\n",
	"# smell_object = money\n",
	"# smell_modifier = \n",
	"# smell_smell = \n",
	"# smell_sentiment = \n",
	"s = \"I smell money\"\n",
	"tokens = nltk.word_tokenize(s) # tokenize words\n",
	"tagged = nltk.pos_tag(tokens) # parts of speech tagging\n",
	"tagged"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 10,
	"metadata": {
	"collapsed": false
	},
	"outputs": [
	{
	"data": {
	"text/plain": [
	"[('Can', 'MD'),\n",
	" ('you', 'PRP'),\n",
	" ('say', 'VB'),\n",
	" ('insurance', 'NN'),\n",
	" ('fraud', 'NN'),\n",
	" ('?', '.'),\n",
	" ('That', 'IN'),\n",
	" ('what', 'WP'),\n",
	" ('I', 'PRP'),\n",
	" ('smell', 'VBP'),\n",
	" ('https', 'NNS'),\n",
	" (':', ':'),\n",
	" ('//t.co/EuxLLYPqFN', 'NN')]"
	]
	},
	"execution_count": 10,
	"metadata": {},
	"output_type": "execute_result"
	}
	],
	"source": [
	"# smell_word = VBP\n",
	"# smell_object = insurance fraud \n",
	"# smell_modifier = \n",
	"# smell_smell = \n",
	"# smell_sentiment = negative\n",
	"s = \"Can you say insurance fraud? That what I smell https://t.co/EuxLLYPqFN\"\n",
	"tokens = nltk.word_tokenize(s) # tokenize words\n",
	"tagged = nltk.pos_tag(tokens) # parts of speech tagging\n",
	"tagged"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 11,
	"metadata": {
	"collapsed": false
	},
	"outputs": [
	{
	"data": {
	"text/plain": [
	"[('@', 'JJ'),\n",
	" ('ThePME', 'NNP'),\n",
	" ('missing', 'VBG'),\n",
	" ('Browns', 'NNP'),\n",
	" ('?', '.'),\n",
	" ('They', 'PRP'),\n",
	" ('stink', 'VBP'),\n",
	" ('but', 'CC'),\n",
	" ('running', 'VBG'),\n",
	" ('the', 'DT'),\n",
	" ('ball', 'NN'),\n",
	" ('well', 'RB'),\n",
	" ('.', '.')]"
	]
	},
	"execution_count": 11,
	"metadata": {},
	"output_type": "execute_result"
	}
	],
	"source": [
	"# smell_word = VBP\n",
	"# smell_object = They / Browns\n",
	"# smell_modifier = \n",
	"# smell_smell = \n",
	"# smell_sentiment = negative\n",
	"s = \"@ThePME missing Browns? They stink but running the ball well.\"\n",
	"tokens = nltk.word_tokenize(s) # tokenize words\n",
	"tagged = nltk.pos_tag(tokens) # parts of speech tagging\n",
	"tagged"
	]
	},
	{
	"cell_type": "code",
	"execution_count": null,
	"metadata": {
	"collapsed": true
	},
	"outputs": [],
	"source": []
	}
	],
	"metadata": {
	"kernelspec": {
	"display_name": "Python 3",
	"language": "python",
	"name": "python3"
	},
	"language_info": {
	"codemirror_mode": {
	"name": "ipython",
	"version": 3
	},
	"file_extension": ".py",
	"mimetype": "text/x-python",
	"name": "python",
	"nbconvert_exporter": "python",
	"pygments_lexer": "ipython3",
	"version": "3.5.2"
	}
	},
	"nbformat": 4,
	"nbformat_minor": 2
	}