mzeidhassan · April 17, 2018 21:49
diff --git a/textblob.ipynb b/textblob.ipynb
 {
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {},
   "outputs": [],
   "source": [
    "import textblob\n",
    "from textblob import TextBlob\n",
    "from textblob.classifiers import NaiveBayesClassifier"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 37,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "'decl'"
      ]
     },
     "execution_count": 37,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# train a basic declarative/interrogative classifier\n",
    "train = [\n",
    "    ('i love this sandwich', 'decl'),\n",
    "    ('this is an amazing place', 'decl'),\n",
    "    ('i feel very good about these drinks.', 'decl'),\n",
    "    ('this is my best work', 'decl'),\n",
    "    (\"what an awesome view\", 'decl'),\n",
    "    ('how are you doing', 'inter'),\n",
    "    ('how old are you', 'inter'),\n",
    "    (\"do you plan to go to the cinema\", 'inter'),\n",
    "    ('how much does it cost', 'inter'),\n",
    "    ('have you been to Cairo', 'inter'),\n",
    "    ('in which box does the toy exist', 'inter'),\n",
    "    ('i am going to study this evening', 'decl'),\n",
    "    ('are you going to visit your parents today', 'inter'),\n",
    "    ('but this is not going anywhere', 'decl'),\n",
    "    ('where are you going', 'inter')\n",
    "\t]\n",
    "test = [\n",
    "    ('the beer was good.', 'decl'),\n",
    "    ('in what location was payphone recorded', 'inter'),\n",
    "    (\"what was metacritics score\", 'inter'),\n",
    "    (\"i feel amazing!\", 'decl'),\n",
    "    ('Gary is a friend of mine.', 'decl'),\n",
    "    (\"what actor plays leonard's counterpart\", 'inter')\n",
    "\t]\n",
    "from textblob.classifiers import NaiveBayesClassifier\n",
    "cl = NaiveBayesClassifier(train)\n",
    "cl.classify('i am planning to go there')"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "Another way to classify text is to pass a classifier into the constructor of TextBlob and call its classify() method\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 42,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "The juice was good.  :  decl\n",
      "But it was very sweet.  :  inter\n",
      "Are you going to consume it anyway  :  inter\n"
     ]
    }
   ],
   "source": [
    "from textblob import TextBlob\n",
    "blob = TextBlob(\"The juice was good. But it was very sweet. Are you going to consume it anyway\", classifier=cl)\n",
    "for s in blob.sentences:\n",
    "    print((s), \" : \", s.classify())"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# Language Detection "
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 24,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "'ar'"
      ]
     },
     "execution_count": 24,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# Powered by Google Translate API\n",
    "\n",
    "ar = TextBlob(u\"مرحبا بالجميع\")\n",
    "ar.detect_language()\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 25,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "'zh-CN'"
      ]
     },
     "execution_count": 25,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "chinese = TextBlob(u\"美丽优于丑陋\")\n",
    "chinese.detect_language()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 26,
   "metadata": {
    "scrolled": true
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "'ja'"
      ]
     },
     "execution_count": 26,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "japanese = TextBlob(u\"複数のチャネルにわたり、シームレスで一貫性のあるデジタル・エクスペリエンスを従業員やパートナ、顧客に提供できます\")\n",
    "japanese.detect_language()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 31,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "'en'"
      ]
     },
     "execution_count": 31,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "#Language Detection using another Python package 'langdetect'\n",
    "from langdetect import detect\n",
    "detect(\"I love Python\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 29,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "'de'"
      ]
     },
     "execution_count": 29,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "detect('Zwei, Drei, Vier')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 30,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "'ar'"
      ]
     },
     "execution_count": 30,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "detect('هنا القاهرة')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 32,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "It is an amazing service.\n"
     ]
    }
   ],
   "source": [
    "# Spelling Correction ; very basic and not recommended\n",
    "# If we are going to use Spacy, then this could be a better solution \n",
    "# https://github.com/tokestermw/spacy_hunspell\n",
    "\n",
    "from textblob import Word\n",
    "spell = TextBlob(\"It is an amazng servic.\")\n",
    "print(spell.correct())"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Tensorflow",
   "language": "python",
   "name": "tensorflow"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.5.2"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
 }
	{
	"cells": [
	{
	"cell_type": "code",
	"execution_count": 2,
	"metadata": {},
	"outputs": [],
	"source": [
	"import textblob\n",
	"from textblob import TextBlob\n",
	"from textblob.classifiers import NaiveBayesClassifier"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 37,
	"metadata": {},
	"outputs": [
	{
	"data": {
	"text/plain": [
	"'decl'"
	]
	},
	"execution_count": 37,
	"metadata": {},
	"output_type": "execute_result"
	}
	],
	"source": [
	"# train a basic declarative/interrogative classifier\n",
	"train = [\n",
	" ('i love this sandwich', 'decl'),\n",
	" ('this is an amazing place', 'decl'),\n",
	" ('i feel very good about these drinks.', 'decl'),\n",
	" ('this is my best work', 'decl'),\n",
	" (\"what an awesome view\", 'decl'),\n",
	" ('how are you doing', 'inter'),\n",
	" ('how old are you', 'inter'),\n",
	" (\"do you plan to go to the cinema\", 'inter'),\n",
	" ('how much does it cost', 'inter'),\n",
	" ('have you been to Cairo', 'inter'),\n",
	" ('in which box does the toy exist', 'inter'),\n",
	" ('i am going to study this evening', 'decl'),\n",
	" ('are you going to visit your parents today', 'inter'),\n",
	" ('but this is not going anywhere', 'decl'),\n",
	" ('where are you going', 'inter')\n",
	"\t]\n",
	"test = [\n",
	" ('the beer was good.', 'decl'),\n",
	" ('in what location was payphone recorded', 'inter'),\n",
	" (\"what was metacritics score\", 'inter'),\n",
	" (\"i feel amazing!\", 'decl'),\n",
	" ('Gary is a friend of mine.', 'decl'),\n",
	" (\"what actor plays leonard's counterpart\", 'inter')\n",
	"\t]\n",
	"from textblob.classifiers import NaiveBayesClassifier\n",
	"cl = NaiveBayesClassifier(train)\n",
	"cl.classify('i am planning to go there')"
	]
	},
	{
	"cell_type": "markdown",
	"metadata": {},
	"source": [
	"Another way to classify text is to pass a classifier into the constructor of TextBlob and call its classify() method\n"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 42,
	"metadata": {},
	"outputs": [
	{
	"name": "stdout",
	"output_type": "stream",
	"text": [
	"The juice was good. : decl\n",
	"But it was very sweet. : inter\n",
	"Are you going to consume it anyway : inter\n"
	]
	}
	],
	"source": [
	"from textblob import TextBlob\n",
	"blob = TextBlob(\"The juice was good. But it was very sweet. Are you going to consume it anyway\", classifier=cl)\n",
	"for s in blob.sentences:\n",
	" print((s), \" : \", s.classify())"
	]
	},
	{
	"cell_type": "markdown",
	"metadata": {},
	"source": [
	"# Language Detection "
	]
	},
	{
	"cell_type": "code",
	"execution_count": 24,
	"metadata": {},
	"outputs": [
	{
	"data": {
	"text/plain": [
	"'ar'"
	]
	},
	"execution_count": 24,
	"metadata": {},
	"output_type": "execute_result"
	}
	],
	"source": [
	"# Powered by Google Translate API\n",
	"\n",
	"ar = TextBlob(u\"مرحبا بالجميع\")\n",
	"ar.detect_language()\n"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 25,
	"metadata": {},
	"outputs": [
	{
	"data": {
	"text/plain": [
	"'zh-CN'"
	]
	},
	"execution_count": 25,
	"metadata": {},
	"output_type": "execute_result"
	}
	],
	"source": [
	"chinese = TextBlob(u\"美丽优于丑陋\")\n",
	"chinese.detect_language()"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 26,
	"metadata": {
	"scrolled": true
	},
	"outputs": [
	{
	"data": {
	"text/plain": [
	"'ja'"
	]
	},
	"execution_count": 26,
	"metadata": {},
	"output_type": "execute_result"
	}
	],
	"source": [
	"japanese = TextBlob(u\"複数のチャネルにわたり、シームレスで一貫性のあるデジタル・エクスペリエンスを従業員やパートナ、顧客に提供できます\")\n",
	"japanese.detect_language()"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 31,
	"metadata": {},
	"outputs": [
	{
	"data": {
	"text/plain": [
	"'en'"
	]
	},
	"execution_count": 31,
	"metadata": {},
	"output_type": "execute_result"
	}
	],
	"source": [
	"#Language Detection using another Python package 'langdetect'\n",
	"from langdetect import detect\n",
	"detect(\"I love Python\")"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 29,
	"metadata": {},
	"outputs": [
	{
	"data": {
	"text/plain": [
	"'de'"
	]
	},
	"execution_count": 29,
	"metadata": {},
	"output_type": "execute_result"
	}
	],
	"source": [
	"detect('Zwei, Drei, Vier')"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 30,
	"metadata": {},
	"outputs": [
	{
	"data": {
	"text/plain": [
	"'ar'"
	]
	},
	"execution_count": 30,
	"metadata": {},
	"output_type": "execute_result"
	}
	],
	"source": [
	"detect('هنا القاهرة')"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 32,
	"metadata": {},
	"outputs": [
	{
	"name": "stdout",
	"output_type": "stream",
	"text": [
	"It is an amazing service.\n"
	]
	}
	],
	"source": [
	"# Spelling Correction ; very basic and not recommended\n",
	"# If we are going to use Spacy, then this could be a better solution \n",
	"# https://github.com/tokestermw/spacy_hunspell\n",
	"\n",
	"from textblob import Word\n",
	"spell = TextBlob(\"It is an amazng servic.\")\n",
	"print(spell.correct())"
	]
	}
	],
	"metadata": {
	"kernelspec": {
	"display_name": "Tensorflow",
	"language": "python",
	"name": "tensorflow"
	},
	"language_info": {
	"codemirror_mode": {
	"name": "ipython",
	"version": 3
	},
	"file_extension": ".py",
	"mimetype": "text/x-python",
	"name": "python",
	"nbconvert_exporter": "python",
	"pygments_lexer": "ipython3",
	"version": "3.5.2"
	}
	},
	"nbformat": 4,
	"nbformat_minor": 2
	}