Last active
May 6, 2019 04:27
-
-
Save jkotra/43d41c2a72d47c160d490e9df7d8be48 to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
{ | |
"cells": [ | |
{ | |
"cell_type": "code", | |
"execution_count": 1, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"name": "stderr", | |
"output_type": "stream", | |
"text": [ | |
"Using TensorFlow backend.\n" | |
] | |
} | |
], | |
"source": [ | |
"import pandas as pd\n", | |
"import numpy as np\n", | |
"import matplotlib.pyplot as plt\n", | |
"from sklearn.utils import shuffle\n", | |
"\n", | |
"import mmap\n", | |
"import tqdm\n", | |
"\n", | |
"import keras" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 2, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/html": [ | |
"<div>\n", | |
"<style scoped>\n", | |
" .dataframe tbody tr th:only-of-type {\n", | |
" vertical-align: middle;\n", | |
" }\n", | |
"\n", | |
" .dataframe tbody tr th {\n", | |
" vertical-align: top;\n", | |
" }\n", | |
"\n", | |
" .dataframe thead th {\n", | |
" text-align: right;\n", | |
" }\n", | |
"</style>\n", | |
"<table border=\"1\" class=\"dataframe\">\n", | |
" <thead>\n", | |
" <tr style=\"text-align: right;\">\n", | |
" <th></th>\n", | |
" <th>target</th>\n", | |
" <th>id</th>\n", | |
" <th>date</th>\n", | |
" <th>Query</th>\n", | |
" <th>username</th>\n", | |
" <th>tweet</th>\n", | |
" </tr>\n", | |
" </thead>\n", | |
" <tbody>\n", | |
" <tr>\n", | |
" <th>654496</th>\n", | |
" <td>0</td>\n", | |
" <td>2239483892</td>\n", | |
" <td>Fri Jun 19 08:35:57 PDT 2009</td>\n", | |
" <td>NO_QUERY</td>\n", | |
" <td>Robscore24</td>\n", | |
" <td>At the doc with my grandma. She's very sick. ...</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>1440592</th>\n", | |
" <td>4</td>\n", | |
" <td>2061696458</td>\n", | |
" <td>Sat Jun 06 21:24:56 PDT 2009</td>\n", | |
" <td>NO_QUERY</td>\n", | |
" <td>ensetsu</td>\n", | |
" <td>@ashley_is_here for sure</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>728066</th>\n", | |
" <td>0</td>\n", | |
" <td>2262974701</td>\n", | |
" <td>Sat Jun 20 23:39:29 PDT 2009</td>\n", | |
" <td>NO_QUERY</td>\n", | |
" <td>angeloy</td>\n", | |
" <td>@riandawson wow thats rad</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>1093740</th>\n", | |
" <td>4</td>\n", | |
" <td>1970089754</td>\n", | |
" <td>Sat May 30 02:24:44 PDT 2009</td>\n", | |
" <td>NO_QUERY</td>\n", | |
" <td>mr_apollo</td>\n", | |
" <td>@xXmIxEdMoDeLXx im bored as hell i probably wo...</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>1063147</th>\n", | |
" <td>4</td>\n", | |
" <td>1964375791</td>\n", | |
" <td>Fri May 29 14:08:15 PDT 2009</td>\n", | |
" <td>NO_QUERY</td>\n", | |
" <td>snedwan</td>\n", | |
" <td>@KimSherrell I'm still bloody 4th ... your tas...</td>\n", | |
" </tr>\n", | |
" </tbody>\n", | |
"</table>\n", | |
"</div>" | |
], | |
"text/plain": [ | |
" target id date Query \\\n", | |
"654496 0 2239483892 Fri Jun 19 08:35:57 PDT 2009 NO_QUERY \n", | |
"1440592 4 2061696458 Sat Jun 06 21:24:56 PDT 2009 NO_QUERY \n", | |
"728066 0 2262974701 Sat Jun 20 23:39:29 PDT 2009 NO_QUERY \n", | |
"1093740 4 1970089754 Sat May 30 02:24:44 PDT 2009 NO_QUERY \n", | |
"1063147 4 1964375791 Fri May 29 14:08:15 PDT 2009 NO_QUERY \n", | |
"\n", | |
" username tweet \n", | |
"654496 Robscore24 At the doc with my grandma. She's very sick. ... \n", | |
"1440592 ensetsu @ashley_is_here for sure \n", | |
"728066 angeloy @riandawson wow thats rad \n", | |
"1093740 mr_apollo @xXmIxEdMoDeLXx im bored as hell i probably wo... \n", | |
"1063147 snedwan @KimSherrell I'm still bloody 4th ... your tas... " | |
] | |
}, | |
"execution_count": 2, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"cols = [\"target\",\"id\",\"date\",\"Query\",\"username\",\"tweet\"]\n", | |
"train = pd.read_csv(\"/home/jagadeesh/DS/twitter/train.csv\",encoding = \"ISO-8859-1\",names=cols, header=None)\n", | |
"train = shuffle(train)\n", | |
"train.head()" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 3, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"#remove 'Query' and 'id'\n", | |
"train.drop(['Query','id'],axis=1,inplace=True)\n", | |
"\n", | |
"#replace 4(positive) with 1\n", | |
"train.target.replace(4,1,inplace=True)\n", | |
"\n", | |
"#Select 10k as validation set\n", | |
"val = train[300000:320000]\n", | |
"\n", | |
"#first 100k as training set.\n", | |
"train = train[:300000]" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 4, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"name": "stdout", | |
"output_type": "stream", | |
"text": [ | |
"Negative: 150195 Positive: 149805\n" | |
] | |
} | |
], | |
"source": [ | |
"print(\"Negative:\",len(train[train.target == 0].index),\"Positive:\",len(train[train.target == 1].index))" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 5, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/plain": [ | |
"target 0\n", | |
"date 0\n", | |
"username 0\n", | |
"tweet 0\n", | |
"dtype: int64" | |
] | |
}, | |
"execution_count": 5, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"#check for missing values\n", | |
"\n", | |
"train.isna().sum()" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 6, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"#set target as label\n", | |
"train_labels = train.target.values\n", | |
"val_labels = val.target.values" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 7, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"name": "stdout", | |
"output_type": "stream", | |
"text": [ | |
"vocab_size: 227424\n" | |
] | |
} | |
], | |
"source": [ | |
"from keras.preprocessing.text import Tokenizer\n", | |
"from keras.preprocessing.sequence import pad_sequences\n", | |
"\n", | |
"t = Tokenizer()\n", | |
"t.fit_on_texts(pd.concat([train,val])['tweet'])\n", | |
"vocab_size = len(t.word_index) + 1\n", | |
"print(\"vocab_size:\", vocab_size)\n", | |
"\n", | |
"train_encoded_docs = t.texts_to_sequences(train.tweet)\n", | |
"val_encoded_docs = t.texts_to_sequences(val.tweet)" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 8, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"data": { | |
"application/vnd.jupyter.widget-view+json": { | |
"model_id": "f4cb9aba80e345b086e6fc3b252f6ae3", | |
"version_major": 2, | |
"version_minor": 0 | |
}, | |
"text/plain": [ | |
"HBox(children=(IntProgress(value=0, max=1193514), HTML(value='')))" | |
] | |
}, | |
"metadata": {}, | |
"output_type": "display_data" | |
}, | |
{ | |
"name": "stdout", | |
"output_type": "stream", | |
"text": [ | |
"\n", | |
"Loaded 1193514 word vectors.\n" | |
] | |
} | |
], | |
"source": [ | |
"def get_num_lines(file_path):\n", | |
" fp = open(file_path, \"r+\")\n", | |
" buf = mmap.mmap(fp.fileno(), 0)\n", | |
" lines = 0\n", | |
" while buf.readline():\n", | |
" lines += 1\n", | |
" return lines\n", | |
"\n", | |
"def get_embedding_index(path):\n", | |
" \n", | |
" embeddings_index = dict()\n", | |
" glove_path = path\n", | |
" f = open(glove_path)\n", | |
" for line in tqdm.tqdm_notebook(f,total=get_num_lines(glove_path)):\n", | |
" values = line.split()\n", | |
" word = values[0]\n", | |
" coefs = np.asarray(values[1:], dtype='float32')\n", | |
" embeddings_index[word] = coefs\n", | |
" f.close()\n", | |
" print('Loaded %s word vectors.' % len(embeddings_index))\n", | |
" return embeddings_index\n", | |
"\n", | |
"def create_weight_matrix(embeddings_index):\n", | |
" \n", | |
" embedding_matrix = np.zeros((vocab_size, 100))\n", | |
" for word, i in t.word_index.items():\n", | |
" embedding_vector = embeddings_index.get(word)\n", | |
" if embedding_vector is not None:\n", | |
" embedding_matrix[i] = embedding_vector\n", | |
" return embedding_matrix\n", | |
"\n", | |
"embeddings_index = get_embedding_index(\"/home/jagadeesh/DS/glove_twitter/glove.twitter.27B.100d.txt\")\n", | |
"embedding_matrix = create_weight_matrix(embeddings_index)" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 9, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"max_length = 10\n", | |
"\n", | |
"train_padded_docs = pad_sequences(train_encoded_docs, maxlen=max_length, padding='post')\n", | |
"val_padded_docs = pad_sequences(val_encoded_docs, maxlen=max_length, padding='post')" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 18, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"model = keras.models.Sequential()\n", | |
"\n", | |
"model.add(keras.layers.Embedding(vocab_size, 100, weights=[embedding_matrix], input_length=10))\n", | |
"model.add(keras.layers.CuDNNLSTM(50))\n", | |
"model.add(keras.layers.Dense(50, activation='relu'))\n", | |
"model.add(keras.layers.Dropout(0.2))\n", | |
"\n", | |
"\n", | |
"model.add(keras.layers.Dense(1,activation='sigmoid'))\n", | |
"\n", | |
"es = keras.callbacks.EarlyStopping(monitor='val_loss', patience=4)\n", | |
"model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['acc'])" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 19, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"name": "stdout", | |
"output_type": "stream", | |
"text": [ | |
"_________________________________________________________________\n", | |
"Layer (type) Output Shape Param # \n", | |
"=================================================================\n", | |
"embedding_4 (Embedding) (None, 10, 100) 22742400 \n", | |
"_________________________________________________________________\n", | |
"cu_dnnlstm_4 (CuDNNLSTM) (None, 50) 30400 \n", | |
"_________________________________________________________________\n", | |
"dense_6 (Dense) (None, 50) 2550 \n", | |
"_________________________________________________________________\n", | |
"dropout_3 (Dropout) (None, 50) 0 \n", | |
"_________________________________________________________________\n", | |
"dense_7 (Dense) (None, 1) 51 \n", | |
"=================================================================\n", | |
"Total params: 22,775,401\n", | |
"Trainable params: 22,775,401\n", | |
"Non-trainable params: 0\n", | |
"_________________________________________________________________\n", | |
"None\n" | |
] | |
} | |
], | |
"source": [ | |
"print(model.summary())" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 20, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"name": "stdout", | |
"output_type": "stream", | |
"text": [ | |
"Train on 300000 samples, validate on 20000 samples\n", | |
"Epoch 1/50\n", | |
"300000/300000 [==============================] - 4s 13us/step - loss: 0.6373 - acc: 0.6427 - val_loss: 0.5641 - val_acc: 0.7172\n", | |
"Epoch 2/50\n", | |
"300000/300000 [==============================] - 2s 8us/step - loss: 0.5249 - acc: 0.7433 - val_loss: 0.5020 - val_acc: 0.7559\n", | |
"Epoch 3/50\n", | |
"300000/300000 [==============================] - 2s 8us/step - loss: 0.4790 - acc: 0.7739 - val_loss: 0.4831 - val_acc: 0.7691\n", | |
"Epoch 4/50\n", | |
"300000/300000 [==============================] - 2s 8us/step - loss: 0.4535 - acc: 0.7911 - val_loss: 0.4754 - val_acc: 0.7720\n", | |
"Epoch 5/50\n", | |
"300000/300000 [==============================] - 2s 8us/step - loss: 0.4329 - acc: 0.8042 - val_loss: 0.4710 - val_acc: 0.7755\n", | |
"Epoch 6/50\n", | |
"300000/300000 [==============================] - 2s 8us/step - loss: 0.4125 - acc: 0.8156 - val_loss: 0.4751 - val_acc: 0.7768\n", | |
"Epoch 7/50\n", | |
"300000/300000 [==============================] - 2s 8us/step - loss: 0.3896 - acc: 0.8281 - val_loss: 0.4804 - val_acc: 0.7740\n", | |
"Epoch 8/50\n", | |
"300000/300000 [==============================] - 2s 8us/step - loss: 0.3660 - acc: 0.8408 - val_loss: 0.4966 - val_acc: 0.7682\n", | |
"Epoch 9/50\n", | |
"300000/300000 [==============================] - 2s 8us/step - loss: 0.3410 - acc: 0.8532 - val_loss: 0.5168 - val_acc: 0.7673\n" | |
] | |
}, | |
{ | |
"data": { | |
"text/plain": [ | |
"<keras.callbacks.History at 0x7faccbdeccc0>" | |
] | |
}, | |
"execution_count": 20, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"model.fit(train_padded_docs, train_labels,validation_data=(val_padded_docs,val_labels), epochs=50, verbose=1,batch_size=10000,callbacks=[es])" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 21, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"pred = model.predict(val_padded_docs)" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 22, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"predicted = pd.DataFrame(val['tweet'])\n", | |
"predicted['score'] = pred\n", | |
"predicted['result'] = predicted.score.apply(lambda x: \"+\" if x > 0.51 else \"-\")" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 24, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/html": [ | |
"<div>\n", | |
"<style scoped>\n", | |
" .dataframe tbody tr th:only-of-type {\n", | |
" vertical-align: middle;\n", | |
" }\n", | |
"\n", | |
" .dataframe tbody tr th {\n", | |
" vertical-align: top;\n", | |
" }\n", | |
"\n", | |
" .dataframe thead th {\n", | |
" text-align: right;\n", | |
" }\n", | |
"</style>\n", | |
"<table border=\"1\" class=\"dataframe\">\n", | |
" <thead>\n", | |
" <tr style=\"text-align: right;\">\n", | |
" <th></th>\n", | |
" <th>tweet</th>\n", | |
" <th>score</th>\n", | |
" <th>result</th>\n", | |
" </tr>\n", | |
" </thead>\n", | |
" <tbody>\n", | |
" <tr>\n", | |
" <th>1404825</th>\n", | |
" <td>@joycethefairy ho ho ho ... watching Skins eh? .. thats a pretty good series ... should i tell you what happens? ;P</td>\n", | |
" <td>0.869579</td>\n", | |
" <td>+</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>477450</th>\n", | |
" <td>Now, it's off to Wal*Mart with Momala. Gotta call the health food store today and quit. It's not personal...it's just business.</td>\n", | |
" <td>0.262908</td>\n", | |
" <td>-</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>1199199</th>\n", | |
" <td>@pmolyneux Who does the giant foot belong to? Wait till you've had 10 cookies - then see how you feel</td>\n", | |
" <td>0.816340</td>\n", | |
" <td>+</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>102447</th>\n", | |
" <td>Still feeling icky. Fever is gone but my throat hurts</td>\n", | |
" <td>0.006581</td>\n", | |
" <td>-</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>920270</th>\n", | |
" <td>@erin82883 happy mothersday to you erin!</td>\n", | |
" <td>0.999443</td>\n", | |
" <td>+</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>754505</th>\n", | |
" <td>i deleted my Guild Wars builds!!everyone who plays or played Gw knows that this is f*ckin WORST case!!</td>\n", | |
" <td>0.032944</td>\n", | |
" <td>-</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>404972</th>\n", | |
" <td>im never gonna give my phone to a 2 year old again all they do is rearrange and delete your apps and everything else on your phone greatt</td>\n", | |
" <td>0.914653</td>\n", | |
" <td>+</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>1102071</th>\n", | |
" <td>@sudosushi i'll help if i can mate</td>\n", | |
" <td>0.915159</td>\n", | |
" <td>+</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>709296</th>\n", | |
" <td>@ROBSTENSHIPPER yeah July 27th, I think. But it was reported that R might not go due to his busy schedule</td>\n", | |
" <td>0.105789</td>\n", | |
" <td>-</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>189395</th>\n", | |
" <td>@ruchirfalodiya maine kaha morning</td>\n", | |
" <td>0.997870</td>\n", | |
" <td>+</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>930646</th>\n", | |
" <td>according to marga, i have the upper hand</td>\n", | |
" <td>0.374307</td>\n", | |
" <td>-</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>1161282</th>\n", | |
" <td>@stin_key ?????? ??????,? ??????????? ??????? ?????????? ? ??????????? ??????, ???????.??????? ?????????? ???????? ???????.???? ?????????</td>\n", | |
" <td>0.009762</td>\n", | |
" <td>-</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>104465</th>\n", | |
" <td>@DominicCArcenas Awww im sorry to hear that. its never fun to go through a break up i might have to too soon</td>\n", | |
" <td>0.404579</td>\n", | |
" <td>-</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>498130</th>\n", | |
" <td>@digitalla is anyone streaming this like Techzulu? Where can I get the feed if I can't attend?</td>\n", | |
" <td>0.072250</td>\n", | |
" <td>-</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>842707</th>\n", | |
" <td>ah, welcome als follower @blauwmp ;)</td>\n", | |
" <td>0.998003</td>\n", | |
" <td>+</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>134442</th>\n", | |
" <td>It was Carmen/Katie love... but I deleted the huge VOBs and just realized that I killed the whole project. Ohh, sadness.</td>\n", | |
" <td>0.013685</td>\n", | |
" <td>-</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>600827</th>\n", | |
" <td>is gettin the kids ready for daycare amd myself for work</td>\n", | |
" <td>0.121314</td>\n", | |
" <td>-</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>62589</th>\n", | |
" <td>@essess So ugly. In and out within ten minutes. Going to see another today! All the good places are by Blue Goose, which is so far away</td>\n", | |
" <td>0.226649</td>\n", | |
" <td>-</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>1371573</th>\n", | |
" <td>@jennych4 definately want in...please...taking my son to see kooz on field before the game...well hopefully</td>\n", | |
" <td>0.741964</td>\n", | |
" <td>+</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>920176</th>\n", | |
" <td>Up and at 'em. Ready for the day! Be encouraged, and encourage someone else!</td>\n", | |
" <td>0.847171</td>\n", | |
" <td>+</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>19637</th>\n", | |
" <td>Too bad I don't give out my number anymore could of had a cutie</td>\n", | |
" <td>0.414295</td>\n", | |
" <td>-</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>342528</th>\n", | |
" <td>watching state of origin, go queensland!</td>\n", | |
" <td>0.994709</td>\n", | |
" <td>+</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>1508207</th>\n", | |
" <td>@_ANNiCA_: lmfao!! What are you doin punkinhead?? Rio (G.M.B)</td>\n", | |
" <td>0.775441</td>\n", | |
" <td>+</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>1075299</th>\n", | |
" <td>Pizza,coffee and hot irish men on my tv good friday</td>\n", | |
" <td>0.979200</td>\n", | |
" <td>+</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>363520</th>\n", | |
" <td>@lincua you're not gonna be tmobile anymore?</td>\n", | |
" <td>0.040909</td>\n", | |
" <td>-</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>974280</th>\n", | |
" <td>@judez_xo sweet ill let her know</td>\n", | |
" <td>0.964585</td>\n", | |
" <td>+</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>379447</th>\n", | |
" <td>I am awake at 3:23 AM because I'm a bigbby whose terrified of thunder &amp; extreme lightning!</td>\n", | |
" <td>0.160444</td>\n", | |
" <td>-</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>274666</th>\n", | |
" <td>@goaskalicia i know the feeling. here's hoping you win the lottery!</td>\n", | |
" <td>0.620291</td>\n", | |
" <td>+</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>567842</th>\n", | |
" <td>@imogenheap Sounds like Santa Clause lost his deer. I hope he's following you on twitter. This will be a bad Christmas, otherwise.</td>\n", | |
" <td>0.583785</td>\n", | |
" <td>+</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>479560</th>\n", | |
" <td>It´s so boring here ... I didn´t like my school ... teachers are bad there</td>\n", | |
" <td>0.132040</td>\n", | |
" <td>-</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>...</th>\n", | |
" <td>...</td>\n", | |
" <td>...</td>\n", | |
" <td>...</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>1548019</th>\n", | |
" <td>@Alyssa_Milano too bad, Ben sheets isn't do anything right now</td>\n", | |
" <td>0.131698</td>\n", | |
" <td>-</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>1567645</th>\n", | |
" <td>@chuckchik91 lol good for u i been out since last thursday</td>\n", | |
" <td>0.479890</td>\n", | |
" <td>-</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>160113</th>\n", | |
" <td>shift time bbye biochem waaaaahhhhhh!! http://plurk.com/p/wxizo</td>\n", | |
" <td>0.102715</td>\n", | |
" <td>-</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>1159282</th>\n", | |
" <td>I'm so very hungry, any fancy buying me Mc Donalds!</td>\n", | |
" <td>0.046230</td>\n", | |
" <td>-</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>1224430</th>\n", | |
" <td>i post it - http://a-frenzy.com/wp/</td>\n", | |
" <td>0.974048</td>\n", | |
" <td>+</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>5754</th>\n", | |
" <td>I feel like death, can't even believe I'm at work right now</td>\n", | |
" <td>0.083845</td>\n", | |
" <td>-</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>82540</th>\n", | |
" <td>@spirkee 5/9 MWS BnUp2 updates didn't go through to Twitter or FB.</td>\n", | |
" <td>0.262266</td>\n", | |
" <td>-</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>753274</th>\n", | |
" <td>@shanedawson get well soon</td>\n", | |
" <td>0.509889</td>\n", | |
" <td>-</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>567986</th>\n", | |
" <td>back from RDA... its been raining all day got to ride Nessa though XD</td>\n", | |
" <td>0.445876</td>\n", | |
" <td>-</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>869206</th>\n", | |
" <td>@NJTheDJ. WORD I 2ND THAT</td>\n", | |
" <td>0.956074</td>\n", | |
" <td>+</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>1403415</th>\n", | |
" <td>@mileycyrus YOUR COMING TOO IRELAND immmm soooooo stocked yaayyy</td>\n", | |
" <td>0.938833</td>\n", | |
" <td>+</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>1370551</th>\n", | |
" <td>@TonyPdeLaCouer Didn't know that was you at @SnoohBirdie 's house.</td>\n", | |
" <td>0.209453</td>\n", | |
" <td>-</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>548520</th>\n", | |
" <td>Dinner with the fam! last day in Cali</td>\n", | |
" <td>0.700747</td>\n", | |
" <td>+</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>296515</th>\n", | |
" <td>Good Morning :-h woke up early. Six more days 'till school</td>\n", | |
" <td>0.379951</td>\n", | |
" <td>-</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>297695</th>\n", | |
" <td>Gettin chilly outside ... What a lovely start for summer '09</td>\n", | |
" <td>0.824240</td>\n", | |
" <td>+</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>1536336</th>\n", | |
" <td>Good morning kids... who else out there is recovering from the weekend?</td>\n", | |
" <td>0.494093</td>\n", | |
" <td>-</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>599901</th>\n", | |
" <td>disappointed that dictionary .com app crashes with #os3 update</td>\n", | |
" <td>0.004565</td>\n", | |
" <td>-</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>284742</th>\n", | |
" <td>I'm on a diet and on a budget which 1 will be the 1st I will cheat on? Neither!! I'm strong!!! I'm strong????</td>\n", | |
" <td>0.618436</td>\n", | |
" <td>+</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>275769</th>\n", | |
" <td>Asda just robbed me of 98p!</td>\n", | |
" <td>0.006412</td>\n", | |
" <td>-</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>1211504</th>\n", | |
" <td>@AlexAllTimeLow i hate you for not coming to perth</td>\n", | |
" <td>0.049144</td>\n", | |
" <td>-</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>95160</th>\n", | |
" <td>is once again tired for no reason...I wonder what's wrong with me</td>\n", | |
" <td>0.202788</td>\n", | |
" <td>-</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>952110</th>\n", | |
" <td>@mrw00dy comment for you</td>\n", | |
" <td>0.998516</td>\n", | |
" <td>+</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>975862</th>\n", | |
" <td>going to Ashland tomorrow see you all in three days</td>\n", | |
" <td>0.601973</td>\n", | |
" <td>+</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>4133</th>\n", | |
" <td>still feeling almost entirely overwhelmed by an uncomfortable desire for swift and violent revenge</td>\n", | |
" <td>0.247597</td>\n", | |
" <td>-</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>557088</th>\n", | |
" <td>looks like cean is down</td>\n", | |
" <td>0.009802</td>\n", | |
" <td>-</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>194102</th>\n", | |
" <td>i miss you chikedi.</td>\n", | |
" <td>0.009941</td>\n", | |
" <td>-</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>842571</th>\n", | |
" <td>@Jezriyah sitting here with a tiger card with your name on it. you still want it?</td>\n", | |
" <td>0.617289</td>\n", | |
" <td>+</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>1492573</th>\n", | |
" <td>@chods70 the game is getting harder, and I think I need your expertise</td>\n", | |
" <td>0.324862</td>\n", | |
" <td>-</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>1155714</th>\n", | |
" <td>Received good news from the PA of Boystown &amp; Kids Help Line</td>\n", | |
" <td>0.672338</td>\n", | |
" <td>+</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>959579</th>\n", | |
" <td>sleeping sort-of early tonight.</td>\n", | |
" <td>0.379771</td>\n", | |
" <td>-</td>\n", | |
" </tr>\n", | |
" </tbody>\n", | |
"</table>\n", | |
"<p>20000 rows × 3 columns</p>\n", | |
"</div>" | |
], | |
"text/plain": [ | |
" tweet \\\n", | |
"1404825 @joycethefairy ho ho ho ... watching Skins eh? .. thats a pretty good series ... should i tell you what happens? ;P \n", | |
"477450 Now, it's off to Wal*Mart with Momala. Gotta call the health food store today and quit. It's not personal...it's just business. \n", | |
"1199199 @pmolyneux Who does the giant foot belong to? Wait till you've had 10 cookies - then see how you feel \n", | |
"102447 Still feeling icky. Fever is gone but my throat hurts \n", | |
"920270 @erin82883 happy mothersday to you erin! \n", | |
"754505 i deleted my Guild Wars builds!!everyone who plays or played Gw knows that this is f*ckin WORST case!! \n", | |
"404972 im never gonna give my phone to a 2 year old again all they do is rearrange and delete your apps and everything else on your phone greatt \n", | |
"1102071 @sudosushi i'll help if i can mate \n", | |
"709296 @ROBSTENSHIPPER yeah July 27th, I think. But it was reported that R might not go due to his busy schedule \n", | |
"189395 @ruchirfalodiya maine kaha morning \n", | |
"930646 according to marga, i have the upper hand \n", | |
"1161282 @stin_key ?????? ??????,? ??????????? ??????? ?????????? ? ??????????? ??????, ???????.??????? ?????????? ???????? ???????.???? ????????? \n", | |
"104465 @DominicCArcenas Awww im sorry to hear that. its never fun to go through a break up i might have to too soon \n", | |
"498130 @digitalla is anyone streaming this like Techzulu? Where can I get the feed if I can't attend? \n", | |
"842707 ah, welcome als follower @blauwmp ;) \n", | |
"134442 It was Carmen/Katie love... but I deleted the huge VOBs and just realized that I killed the whole project. Ohh, sadness. \n", | |
"600827 is gettin the kids ready for daycare amd myself for work \n", | |
"62589 @essess So ugly. In and out within ten minutes. Going to see another today! All the good places are by Blue Goose, which is so far away \n", | |
"1371573 @jennych4 definately want in...please...taking my son to see kooz on field before the game...well hopefully \n", | |
"920176 Up and at 'em. Ready for the day! Be encouraged, and encourage someone else! \n", | |
"19637 Too bad I don't give out my number anymore could of had a cutie \n", | |
"342528 watching state of origin, go queensland! \n", | |
"1508207 @_ANNiCA_: lmfao!! What are you doin punkinhead?? Rio (G.M.B) \n", | |
"1075299 Pizza,coffee and hot irish men on my tv good friday \n", | |
"363520 @lincua you're not gonna be tmobile anymore? \n", | |
"974280 @judez_xo sweet ill let her know \n", | |
"379447 I am awake at 3:23 AM because I'm a bigbby whose terrified of thunder & extreme lightning! \n", | |
"274666 @goaskalicia i know the feeling. here's hoping you win the lottery! \n", | |
"567842 @imogenheap Sounds like Santa Clause lost his deer. I hope he's following you on twitter. This will be a bad Christmas, otherwise. \n", | |
"479560 It´s so boring here ... I didn´t like my school ... teachers are bad there \n", | |
"... ... \n", | |
"1548019 @Alyssa_Milano too bad, Ben sheets isn't do anything right now \n", | |
"1567645 @chuckchik91 lol good for u i been out since last thursday \n", | |
"160113 shift time bbye biochem waaaaahhhhhh!! http://plurk.com/p/wxizo \n", | |
"1159282 I'm so very hungry, any fancy buying me Mc Donalds! \n", | |
"1224430 i post it - http://a-frenzy.com/wp/ \n", | |
"5754 I feel like death, can't even believe I'm at work right now \n", | |
"82540 @spirkee 5/9 MWS BnUp2 updates didn't go through to Twitter or FB. \n", | |
"753274 @shanedawson get well soon \n", | |
"567986 back from RDA... its been raining all day got to ride Nessa though XD \n", | |
"869206 @NJTheDJ. WORD I 2ND THAT \n", | |
"1403415 @mileycyrus YOUR COMING TOO IRELAND immmm soooooo stocked yaayyy \n", | |
"1370551 @TonyPdeLaCouer Didn't know that was you at @SnoohBirdie 's house. \n", | |
"548520 Dinner with the fam! last day in Cali \n", | |
"296515 Good Morning :-h woke up early. Six more days 'till school \n", | |
"297695 Gettin chilly outside ... What a lovely start for summer '09 \n", | |
"1536336 Good morning kids... who else out there is recovering from the weekend? \n", | |
"599901 disappointed that dictionary .com app crashes with #os3 update \n", | |
"284742 I'm on a diet and on a budget which 1 will be the 1st I will cheat on? Neither!! I'm strong!!! I'm strong???? \n", | |
"275769 Asda just robbed me of 98p! \n", | |
"1211504 @AlexAllTimeLow i hate you for not coming to perth \n", | |
"95160 is once again tired for no reason...I wonder what's wrong with me \n", | |
"952110 @mrw00dy comment for you \n", | |
"975862 going to Ashland tomorrow see you all in three days \n", | |
"4133 still feeling almost entirely overwhelmed by an uncomfortable desire for swift and violent revenge \n", | |
"557088 looks like cean is down \n", | |
"194102 i miss you chikedi. \n", | |
"842571 @Jezriyah sitting here with a tiger card with your name on it. you still want it? \n", | |
"1492573 @chods70 the game is getting harder, and I think I need your expertise \n", | |
"1155714 Received good news from the PA of Boystown & Kids Help Line \n", | |
"959579 sleeping sort-of early tonight. \n", | |
"\n", | |
" score result \n", | |
"1404825 0.869579 + \n", | |
"477450 0.262908 - \n", | |
"1199199 0.816340 + \n", | |
"102447 0.006581 - \n", | |
"920270 0.999443 + \n", | |
"754505 0.032944 - \n", | |
"404972 0.914653 + \n", | |
"1102071 0.915159 + \n", | |
"709296 0.105789 - \n", | |
"189395 0.997870 + \n", | |
"930646 0.374307 - \n", | |
"1161282 0.009762 - \n", | |
"104465 0.404579 - \n", | |
"498130 0.072250 - \n", | |
"842707 0.998003 + \n", | |
"134442 0.013685 - \n", | |
"600827 0.121314 - \n", | |
"62589 0.226649 - \n", | |
"1371573 0.741964 + \n", | |
"920176 0.847171 + \n", | |
"19637 0.414295 - \n", | |
"342528 0.994709 + \n", | |
"1508207 0.775441 + \n", | |
"1075299 0.979200 + \n", | |
"363520 0.040909 - \n", | |
"974280 0.964585 + \n", | |
"379447 0.160444 - \n", | |
"274666 0.620291 + \n", | |
"567842 0.583785 + \n", | |
"479560 0.132040 - \n", | |
"... ... .. \n", | |
"1548019 0.131698 - \n", | |
"1567645 0.479890 - \n", | |
"160113 0.102715 - \n", | |
"1159282 0.046230 - \n", | |
"1224430 0.974048 + \n", | |
"5754 0.083845 - \n", | |
"82540 0.262266 - \n", | |
"753274 0.509889 - \n", | |
"567986 0.445876 - \n", | |
"869206 0.956074 + \n", | |
"1403415 0.938833 + \n", | |
"1370551 0.209453 - \n", | |
"548520 0.700747 + \n", | |
"296515 0.379951 - \n", | |
"297695 0.824240 + \n", | |
"1536336 0.494093 - \n", | |
"599901 0.004565 - \n", | |
"284742 0.618436 + \n", | |
"275769 0.006412 - \n", | |
"1211504 0.049144 - \n", | |
"95160 0.202788 - \n", | |
"952110 0.998516 + \n", | |
"975862 0.601973 + \n", | |
"4133 0.247597 - \n", | |
"557088 0.009802 - \n", | |
"194102 0.009941 - \n", | |
"842571 0.617289 + \n", | |
"1492573 0.324862 - \n", | |
"1155714 0.672338 + \n", | |
"959579 0.379771 - \n", | |
"\n", | |
"[20000 rows x 3 columns]" | |
] | |
}, | |
"execution_count": 24, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"pd.set_option('display.max_colwidth', -1)\n", | |
"predicted" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": null, | |
"metadata": {}, | |
"outputs": [], | |
"source": [] | |
} | |
], | |
"metadata": { | |
"kernelspec": { | |
"display_name": "Python 3", | |
"language": "python", | |
"name": "python3" | |
}, | |
"language_info": { | |
"codemirror_mode": { | |
"name": "ipython", | |
"version": 3 | |
}, | |
"file_extension": ".py", | |
"mimetype": "text/x-python", | |
"name": "python", | |
"nbconvert_exporter": "python", | |
"pygments_lexer": "ipython3", | |
"version": "3.7.3" | |
} | |
}, | |
"nbformat": 4, | |
"nbformat_minor": 2 | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
note: rough edit.