Skip to content

Instantly share code, notes, and snippets.

@mzeidhassan
Last active April 17, 2018 21:49
Show Gist options
  • Save mzeidhassan/97b4845b651a0bac60dc1f88521289b3 to your computer and use it in GitHub Desktop.
Save mzeidhassan/97b4845b651a0bac60dc1f88521289b3 to your computer and use it in GitHub Desktop.
textblob
Display the source blob
Display the rendered blob
Raw
{
"cells": [
{
"cell_type": "code",
"execution_count": 2,
"metadata": {},
"outputs": [],
"source": [
"import textblob\n",
"from textblob import TextBlob\n",
"from textblob.classifiers import NaiveBayesClassifier"
]
},
{
"cell_type": "code",
"execution_count": 37,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"'decl'"
]
},
"execution_count": 37,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"# train a basic declarative/interrogative classifier\n",
"train = [\n",
" ('i love this sandwich', 'decl'),\n",
" ('this is an amazing place', 'decl'),\n",
" ('i feel very good about these drinks.', 'decl'),\n",
" ('this is my best work', 'decl'),\n",
" (\"what an awesome view\", 'decl'),\n",
" ('how are you doing', 'inter'),\n",
" ('how old are you', 'inter'),\n",
" (\"do you plan to go to the cinema\", 'inter'),\n",
" ('how much does it cost', 'inter'),\n",
" ('have you been to Cairo', 'inter'),\n",
" ('in which box does the toy exist', 'inter'),\n",
" ('i am going to study this evening', 'decl'),\n",
" ('are you going to visit your parents today', 'inter'),\n",
" ('but this is not going anywhere', 'decl'),\n",
" ('where are you going', 'inter')\n",
"\t]\n",
"test = [\n",
" ('the beer was good.', 'decl'),\n",
" ('in what location was payphone recorded', 'inter'),\n",
" (\"what was metacritics score\", 'inter'),\n",
" (\"i feel amazing!\", 'decl'),\n",
" ('Gary is a friend of mine.', 'decl'),\n",
" (\"what actor plays leonard's counterpart\", 'inter')\n",
"\t]\n",
"from textblob.classifiers import NaiveBayesClassifier\n",
"cl = NaiveBayesClassifier(train)\n",
"cl.classify('i am planning to go there')"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Another way to classify text is to pass a classifier into the constructor of TextBlob and call its classify() method\n"
]
},
{
"cell_type": "code",
"execution_count": 42,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"The juice was good. : decl\n",
"But it was very sweet. : inter\n",
"Are you going to consume it anyway : inter\n"
]
}
],
"source": [
"from textblob import TextBlob\n",
"blob = TextBlob(\"The juice was good. But it was very sweet. Are you going to consume it anyway\", classifier=cl)\n",
"for s in blob.sentences:\n",
" print((s), \" : \", s.classify())"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"# Language Detection "
]
},
{
"cell_type": "code",
"execution_count": 24,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"'ar'"
]
},
"execution_count": 24,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"# Powered by Google Translate API\n",
"\n",
"ar = TextBlob(u\"مرحبا بالجميع\")\n",
"ar.detect_language()\n"
]
},
{
"cell_type": "code",
"execution_count": 25,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"'zh-CN'"
]
},
"execution_count": 25,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"chinese = TextBlob(u\"美丽优于丑陋\")\n",
"chinese.detect_language()"
]
},
{
"cell_type": "code",
"execution_count": 26,
"metadata": {
"scrolled": true
},
"outputs": [
{
"data": {
"text/plain": [
"'ja'"
]
},
"execution_count": 26,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"japanese = TextBlob(u\"複数のチャネルにわたり、シームレスで一貫性のあるデジタル・エクスペリエンスを従業員やパートナ、顧客に提供できます\")\n",
"japanese.detect_language()"
]
},
{
"cell_type": "code",
"execution_count": 31,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"'en'"
]
},
"execution_count": 31,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"#Language Detection using another Python package 'langdetect'\n",
"from langdetect import detect\n",
"detect(\"I love Python\")"
]
},
{
"cell_type": "code",
"execution_count": 29,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"'de'"
]
},
"execution_count": 29,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"detect('Zwei, Drei, Vier')"
]
},
{
"cell_type": "code",
"execution_count": 30,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"'ar'"
]
},
"execution_count": 30,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"detect('هنا القاهرة')"
]
},
{
"cell_type": "code",
"execution_count": 32,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"It is an amazing service.\n"
]
}
],
"source": [
"# Spelling Correction ; very basic and not recommended\n",
"# If we are going to use Spacy, then this could be a better solution \n",
"# https://github.com/tokestermw/spacy_hunspell\n",
"\n",
"from textblob import Word\n",
"spell = TextBlob(\"It is an amazng servic.\")\n",
"print(spell.correct())"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "Tensorflow",
"language": "python",
"name": "tensorflow"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.5.2"
}
},
"nbformat": 4,
"nbformat_minor": 2
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment