Last active
April 17, 2018 21:49
-
-
Save mzeidhassan/97b4845b651a0bac60dc1f88521289b3 to your computer and use it in GitHub Desktop.
textblob
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
{ | |
"cells": [ | |
{ | |
"cell_type": "code", | |
"execution_count": 2, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"import textblob\n", | |
"from textblob import TextBlob\n", | |
"from textblob.classifiers import NaiveBayesClassifier" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 37, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/plain": [ | |
"'decl'" | |
] | |
}, | |
"execution_count": 37, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"# train a basic declarative/interrogative classifier\n", | |
"train = [\n", | |
" ('i love this sandwich', 'decl'),\n", | |
" ('this is an amazing place', 'decl'),\n", | |
" ('i feel very good about these drinks.', 'decl'),\n", | |
" ('this is my best work', 'decl'),\n", | |
" (\"what an awesome view\", 'decl'),\n", | |
" ('how are you doing', 'inter'),\n", | |
" ('how old are you', 'inter'),\n", | |
" (\"do you plan to go to the cinema\", 'inter'),\n", | |
" ('how much does it cost', 'inter'),\n", | |
" ('have you been to Cairo', 'inter'),\n", | |
" ('in which box does the toy exist', 'inter'),\n", | |
" ('i am going to study this evening', 'decl'),\n", | |
" ('are you going to visit your parents today', 'inter'),\n", | |
" ('but this is not going anywhere', 'decl'),\n", | |
" ('where are you going', 'inter')\n", | |
"\t]\n", | |
"test = [\n", | |
" ('the beer was good.', 'decl'),\n", | |
" ('in what location was payphone recorded', 'inter'),\n", | |
" (\"what was metacritics score\", 'inter'),\n", | |
" (\"i feel amazing!\", 'decl'),\n", | |
" ('Gary is a friend of mine.', 'decl'),\n", | |
" (\"what actor plays leonard's counterpart\", 'inter')\n", | |
"\t]\n", | |
"from textblob.classifiers import NaiveBayesClassifier\n", | |
"cl = NaiveBayesClassifier(train)\n", | |
"cl.classify('i am planning to go there')" | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": {}, | |
"source": [ | |
"Another way to classify text is to pass a classifier into the constructor of TextBlob and call its classify() method\n" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 42, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"name": "stdout", | |
"output_type": "stream", | |
"text": [ | |
"The juice was good. : decl\n", | |
"But it was very sweet. : inter\n", | |
"Are you going to consume it anyway : inter\n" | |
] | |
} | |
], | |
"source": [ | |
"from textblob import TextBlob\n", | |
"blob = TextBlob(\"The juice was good. But it was very sweet. Are you going to consume it anyway\", classifier=cl)\n", | |
"for s in blob.sentences:\n", | |
" print((s), \" : \", s.classify())" | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": {}, | |
"source": [ | |
"# Language Detection " | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 24, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/plain": [ | |
"'ar'" | |
] | |
}, | |
"execution_count": 24, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"# Powered by Google Translate API\n", | |
"\n", | |
"ar = TextBlob(u\"مرحبا بالجميع\")\n", | |
"ar.detect_language()\n" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 25, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/plain": [ | |
"'zh-CN'" | |
] | |
}, | |
"execution_count": 25, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"chinese = TextBlob(u\"美丽优于丑陋\")\n", | |
"chinese.detect_language()" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 26, | |
"metadata": { | |
"scrolled": true | |
}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/plain": [ | |
"'ja'" | |
] | |
}, | |
"execution_count": 26, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"japanese = TextBlob(u\"複数のチャネルにわたり、シームレスで一貫性のあるデジタル・エクスペリエンスを従業員やパートナ、顧客に提供できます\")\n", | |
"japanese.detect_language()" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 31, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/plain": [ | |
"'en'" | |
] | |
}, | |
"execution_count": 31, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"#Language Detection using another Python package 'langdetect'\n", | |
"from langdetect import detect\n", | |
"detect(\"I love Python\")" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 29, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/plain": [ | |
"'de'" | |
] | |
}, | |
"execution_count": 29, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"detect('Zwei, Drei, Vier')" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 30, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/plain": [ | |
"'ar'" | |
] | |
}, | |
"execution_count": 30, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"detect('هنا القاهرة')" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 32, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"name": "stdout", | |
"output_type": "stream", | |
"text": [ | |
"It is an amazing service.\n" | |
] | |
} | |
], | |
"source": [ | |
"# Spelling Correction ; very basic and not recommended\n", | |
"# If we are going to use Spacy, then this could be a better solution \n", | |
"# https://github.com/tokestermw/spacy_hunspell\n", | |
"\n", | |
"from textblob import Word\n", | |
"spell = TextBlob(\"It is an amazng servic.\")\n", | |
"print(spell.correct())" | |
] | |
} | |
], | |
"metadata": { | |
"kernelspec": { | |
"display_name": "Tensorflow", | |
"language": "python", | |
"name": "tensorflow" | |
}, | |
"language_info": { | |
"codemirror_mode": { | |
"name": "ipython", | |
"version": 3 | |
}, | |
"file_extension": ".py", | |
"mimetype": "text/x-python", | |
"name": "python", | |
"nbconvert_exporter": "python", | |
"pygments_lexer": "ipython3", | |
"version": "3.5.2" | |
} | |
}, | |
"nbformat": 4, | |
"nbformat_minor": 2 | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment