Last active
December 26, 2019 19:33
-
-
Save akhileshravi/ec8e861903072643ca40a4ddd5358f4c to your computer and use it in GitHub Desktop.
16110007 Assignment 3 NLP
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Name: Akhilesh Ravi | |
Roll No.: 16110007 |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
{ | |
"nbformat": 4, | |
"nbformat_minor": 0, | |
"metadata": { | |
"colab": { | |
"name": "NLP_Assignment3_16110007", | |
"provenance": [], | |
"collapsed_sections": [] | |
}, | |
"kernelspec": { | |
"name": "python3", | |
"display_name": "Python 3" | |
} | |
}, | |
"cells": [ | |
{ | |
"cell_type": "code", | |
"metadata": { | |
"id": "9dTzSCUXYQyj", | |
"colab_type": "code", | |
"colab": { | |
"base_uri": "https://localhost:8080/", | |
"height": 34 | |
}, | |
"outputId": "c335ef41-cb62-450e-a2d8-4bbfbb599a7d" | |
}, | |
"source": [ | |
"from google.colab import drive\n", | |
"drive.mount('/content/gdrive')" | |
], | |
"execution_count": 1, | |
"outputs": [ | |
{ | |
"output_type": "stream", | |
"text": [ | |
"Drive already mounted at /content/gdrive; to attempt to forcibly remount, call drive.mount(\"/content/gdrive\", force_remount=True).\n" | |
], | |
"name": "stdout" | |
} | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"metadata": { | |
"id": "YJ_e3TKg9U_N", | |
"colab_type": "code", | |
"colab": {} | |
}, | |
"source": [ | |
"path = \"/content/gdrive/My Drive/Semester 7/NLP/Assignment3/\"\n", | |
"with open(path + \"train.txt\", 'r') as ftrain:\n", | |
" train_text = ftrain.read()\n", | |
"with open(path + \"test.txt\", 'r') as ftest:\n", | |
" test_text = ftest.read()" | |
], | |
"execution_count": 0, | |
"outputs": [] | |
}, | |
{ | |
"cell_type": "code", | |
"metadata": { | |
"id": "IIFVs3aW78NQ", | |
"colab_type": "code", | |
"colab": {} | |
}, | |
"source": [ | |
"import nltk\n", | |
"# nltk.download('stopwords')\n", | |
"from nltk.corpus import stopwords\n", | |
"import re" | |
], | |
"execution_count": 0, | |
"outputs": [] | |
}, | |
{ | |
"cell_type": "code", | |
"metadata": { | |
"id": "ac5LT9gO8b44", | |
"colab_type": "code", | |
"colab": {} | |
}, | |
"source": [ | |
"help(stopwords)" | |
], | |
"execution_count": 0, | |
"outputs": [] | |
}, | |
{ | |
"cell_type": "code", | |
"metadata": { | |
"id": "zUoUl3RN86fc", | |
"colab_type": "code", | |
"colab": { | |
"base_uri": "https://localhost:8080/", | |
"height": 34 | |
}, | |
"outputId": "a6ac02d2-1312-4aff-cfe9-d8ad82ae4f3a" | |
}, | |
"source": [ | |
"stopwords_en = stopwords.words('english')\n", | |
"exclude = ['very', 'not', 'never', 'no', 'ever', 'nothing', 'really', 'extremely']\n", | |
"for i in exclude:\n", | |
" if i not in stopwords_en:\n", | |
" print(i, end=' ')" | |
], | |
"execution_count": 5, | |
"outputs": [ | |
{ | |
"output_type": "stream", | |
"text": [ | |
"never ever nothing really extremely " | |
], | |
"name": "stdout" | |
} | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"metadata": { | |
"id": "2im06gLjauis", | |
"colab_type": "code", | |
"colab": {} | |
}, | |
"source": [ | |
"emojis = ['😂', '❤', '♥', '😍', '😭', '😘', '😊', '👌', '💕', '👏', '😁', '☺', '♡', '👍', '😩', '🙏', '✌', '😏', '😉', '🙌',\n", | |
" '🙈', '💪', '😄', '😒', '💃', '💖', '😃', '😔', '😱', '🎉', '😜', '☯', '🌸', '💜', '💙', '✨', '😳', '💗', '★',\n", | |
" '☀', '😡', '😎', '😢', '💋', '😋', '🙊', '😴', '🎶', '💞', '😌']\n", | |
"emoji_dict = {emojis[i]: i for i in range(len(emojis))}\n", | |
"# 50 most frequently used emojis from https://www.kaggle.com/thomasseleck/emoji-sentiment-data" | |
], | |
"execution_count": 0, | |
"outputs": [] | |
}, | |
{ | |
"cell_type": "code", | |
"metadata": { | |
"id": "kb4JWfw9-dvK", | |
"colab_type": "code", | |
"colab": {} | |
}, | |
"source": [ | |
"train_id = []\n", | |
"train_data = []\n", | |
"train_hin, train_eng, train_o, train_labels = [], [], [], []\n", | |
"# emoji_train = []\n", | |
"for sample in train_text.split('\\n\\n'):\n", | |
" \n", | |
" lines_sample = sample.split('\\n')\n", | |
" # print(lines_sample[0])\n", | |
" try:\n", | |
" train_labels.append(lines_sample[0].split()[2])\n", | |
" train_id.append(lines_sample[0].split()[1])\n", | |
" except IndexError:\n", | |
" del train_id[-1]\n", | |
" continue\n", | |
" temp, temp_eng, temp_hin, temp_o = [], [], [], []\n", | |
" \n", | |
" \n", | |
" for line in lines_sample[1:]:\n", | |
" t = line.split('\\t')\n", | |
"\n", | |
" if t[1] != 'O':\n", | |
" t[0]=re.sub('[\\W_]+', '', t[0])\n", | |
" if t[1] == 'Eng' and t[0] in stopwords_en and t[0] not in exclude:\n", | |
" continue\n", | |
" if 'http' in t[0]:\n", | |
" continue\n", | |
" temp.append(t[0])\n", | |
" if t[1] == 'Eng':\n", | |
" temp_eng.append(t[0])\n", | |
" elif t[1] == 'Hin':\n", | |
" temp_hin.append(t[0])\n", | |
" elif t[1] == 'O':\n", | |
" temp_o.append(t[0])\n", | |
" if temp == []:\n", | |
" continue\n", | |
" train_data.append(temp)\n", | |
" train_eng.append(temp_eng)\n", | |
" train_hin.append(temp_hin)\n", | |
" train_o.append(temp_o)\n" | |
], | |
"execution_count": 0, | |
"outputs": [] | |
}, | |
{ | |
"cell_type": "code", | |
"metadata": { | |
"id": "Vgrdip_g_Fej", | |
"colab_type": "code", | |
"colab": { | |
"base_uri": "https://localhost:8080/", | |
"height": 34 | |
}, | |
"outputId": "bf6b5e72-10b6-4075-9f93-49a79dace711" | |
}, | |
"source": [ | |
"print(len(train_text.split('\\n\\n')))" | |
], | |
"execution_count": 8, | |
"outputs": [ | |
{ | |
"output_type": "stream", | |
"text": [ | |
"15132\n" | |
], | |
"name": "stdout" | |
} | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"metadata": { | |
"id": "fZu39o6-0L2i", | |
"colab_type": "code", | |
"colab": { | |
"base_uri": "https://localhost:8080/", | |
"height": 34 | |
}, | |
"outputId": "a2488ce9-89d1-4b49-c9d9-df756a66352c" | |
}, | |
"source": [ | |
"len(train_data)" | |
], | |
"execution_count": 9, | |
"outputs": [ | |
{ | |
"output_type": "execute_result", | |
"data": { | |
"text/plain": [ | |
"15131" | |
] | |
}, | |
"metadata": { | |
"tags": [] | |
}, | |
"execution_count": 9 | |
} | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"metadata": { | |
"id": "X3elTZuw0gQb", | |
"colab_type": "code", | |
"colab": {} | |
}, | |
"source": [ | |
"test_id = []\n", | |
"test_data = []\n", | |
"test_hin, test_eng, test_o, test_labels = [], [], [], []\n", | |
"\n", | |
"for sample in test_text.split('\\n\\n'):\n", | |
" \n", | |
" lines_sample = sample.split('\\n')\n", | |
" # print(lines_sample[0])\n", | |
" try:\n", | |
" test_labels.append(lines_sample[0].split()[2])\n", | |
" test_id.append(lines_sample[0].split()[1])\n", | |
" except IndexError:\n", | |
" del test_id[-1]\n", | |
" continue\n", | |
" temp, temp_eng, temp_hin, temp_o = [], [], [], []\n", | |
" \n", | |
" \n", | |
" for line in lines_sample[1:]:\n", | |
" t = line.split('\\t')\n", | |
" t[0] = t[0].lower()\n", | |
" if t[1] != 'O':\n", | |
" t[0]=re.sub('[\\W_]+', '', t[0])\n", | |
" if t[1] == 'Eng' and t[0] in stopwords_en and t[0] not in exclude:\n", | |
" continue\n", | |
" if 'http' in t[0]:\n", | |
" continue\n", | |
" temp.append(t[0])\n", | |
" if t[1] == 'Eng':\n", | |
" temp_eng.append(t[0])\n", | |
" elif t[1] == 'Hin':\n", | |
" temp_hin.append(t[0])\n", | |
" elif t[1] == 'O':\n", | |
" temp_o.append(t[0])\n", | |
" if temp == []:\n", | |
" continue\n", | |
" test_data.append(temp)\n", | |
" test_eng.append(temp_eng)\n", | |
" test_hin.append(temp_hin)\n", | |
" test_o.append(temp_o)" | |
], | |
"execution_count": 0, | |
"outputs": [] | |
}, | |
{ | |
"cell_type": "code", | |
"metadata": { | |
"id": "KoLNJ4VA0wCF", | |
"colab_type": "code", | |
"colab": {} | |
}, | |
"source": [ | |
"train_tweets = [' '.join(i) for i in train_data]\n", | |
"test_tweets = [' '.join(i) for i in test_data]" | |
], | |
"execution_count": 0, | |
"outputs": [] | |
}, | |
{ | |
"cell_type": "code", | |
"metadata": { | |
"id": "DA0RtaGc36nz", | |
"colab_type": "code", | |
"colab": {} | |
}, | |
"source": [ | |
"train_tweets_dict = {}\n", | |
"test_tweets_dict = {}" | |
], | |
"execution_count": 0, | |
"outputs": [] | |
}, | |
{ | |
"cell_type": "code", | |
"metadata": { | |
"id": "3wseReG-4oib", | |
"colab_type": "code", | |
"colab": {} | |
}, | |
"source": [ | |
"train_tweets_dict['eng'] = [' '.join(i) for i in train_eng]\n", | |
"test_tweets_dict['eng'] = [' '.join(i) for i in test_eng]" | |
], | |
"execution_count": 0, | |
"outputs": [] | |
}, | |
{ | |
"cell_type": "code", | |
"metadata": { | |
"id": "uC29oWZv36k9", | |
"colab_type": "code", | |
"colab": {} | |
}, | |
"source": [ | |
"train_tweets_dict['hin'] = [' '.join(i) for i in train_hin]\n", | |
"test_tweets_dict['hin'] = [' '.join(i) for i in test_hin]" | |
], | |
"execution_count": 0, | |
"outputs": [] | |
}, | |
{ | |
"cell_type": "code", | |
"metadata": { | |
"id": "5K_BLtFW38SK", | |
"colab_type": "code", | |
"colab": {} | |
}, | |
"source": [ | |
"train_tweets_dict['o'] = [' '.join(i) for i in train_o]\n", | |
"test_tweets_dict['o'] = [' '.join(i) for i in test_o]" | |
], | |
"execution_count": 0, | |
"outputs": [] | |
}, | |
{ | |
"cell_type": "code", | |
"metadata": { | |
"id": "sNh20-xu2Yq7", | |
"colab_type": "code", | |
"colab": { | |
"base_uri": "https://localhost:8080/", | |
"height": 34 | |
}, | |
"outputId": "7fc16ef6-f5ce-4b31-afca-1858dc7d81b2" | |
}, | |
"source": [ | |
"np.unique(train_labels)" | |
], | |
"execution_count": 122, | |
"outputs": [ | |
{ | |
"output_type": "execute_result", | |
"data": { | |
"text/plain": [ | |
"array(['negative', 'neutral', 'positive'], dtype='<U8')" | |
] | |
}, | |
"metadata": { | |
"tags": [] | |
}, | |
"execution_count": 122 | |
} | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"metadata": { | |
"id": "n3u6_cLo07yx", | |
"colab_type": "code", | |
"colab": { | |
"base_uri": "https://localhost:8080/", | |
"height": 81 | |
}, | |
"outputId": "13537e4a-5ddc-461a-a1db-ed66b752f647" | |
}, | |
"source": [ | |
"from sklearn.metrics import accuracy_score, classification_report, confusion_matrix\n", | |
"from keras.preprocessing.text import Tokenizer\n", | |
"from sklearn.model_selection import train_test_split\n", | |
"import pandas as pd\n", | |
"import random\n", | |
"import numpy as np\n", | |
"from keras.preprocessing import sequence\n", | |
"from keras.utils import np_utils\n", | |
"\n", | |
"from keras.models import Sequential\n", | |
"from keras.layers.core import Dense, Dropout, Activation, Lambda\n", | |
"from keras.layers.embeddings import Embedding\n", | |
"from keras.layers.recurrent import LSTM, SimpleRNN, GRU\n", | |
"from keras.preprocessing.text import Tokenizer\n", | |
"from keras import optimizers" | |
], | |
"execution_count": 22, | |
"outputs": [ | |
{ | |
"output_type": "stream", | |
"text": [ | |
"Using TensorFlow backend.\n" | |
], | |
"name": "stderr" | |
}, | |
{ | |
"output_type": "display_data", | |
"data": { | |
"text/html": [ | |
"<p style=\"color: red;\">\n", | |
"The default version of TensorFlow in Colab will soon switch to TensorFlow 2.x.<br>\n", | |
"We recommend you <a href=\"https://www.tensorflow.org/guide/migrate\" target=\"_blank\">upgrade</a> now \n", | |
"or ensure your notebook will continue to use TensorFlow 1.x via the <code>%tensorflow_version 1.x</code> magic:\n", | |
"<a href=\"https://colab.research.google.com/notebooks/tensorflow_version.ipynb\" target=\"_blank\">more info</a>.</p>\n" | |
], | |
"text/plain": [ | |
"<IPython.core.display.HTML object>" | |
] | |
}, | |
"metadata": { | |
"tags": [] | |
} | |
} | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"metadata": { | |
"id": "ZJGGWOMY2cBR", | |
"colab_type": "code", | |
"colab": {} | |
}, | |
"source": [ | |
"label_values = {'negative':0, 'neutral':1, 'positive':2}" | |
], | |
"execution_count": 0, | |
"outputs": [] | |
}, | |
{ | |
"cell_type": "code", | |
"metadata": { | |
"id": "OKygtMNS2KVn", | |
"colab_type": "code", | |
"colab": {} | |
}, | |
"source": [ | |
"y_train = np.array([label_values[i] for i in train_labels])\n", | |
"y_test = np.array([label_values[i] for i in test_labels])" | |
], | |
"execution_count": 0, | |
"outputs": [] | |
}, | |
{ | |
"cell_type": "code", | |
"metadata": { | |
"id": "CDe4kCvp76Ez", | |
"colab_type": "code", | |
"colab": {} | |
}, | |
"source": [ | |
"" | |
], | |
"execution_count": 0, | |
"outputs": [] | |
}, | |
{ | |
"cell_type": "code", | |
"metadata": { | |
"id": "WbKqX45L1K9p", | |
"colab_type": "code", | |
"colab": {} | |
}, | |
"source": [ | |
"max_features = 20000\n", | |
"tokenizer1 = Tokenizer(num_words=max_features)\n", | |
"tokenizer1.fit_on_texts(train_tweets)" | |
], | |
"execution_count": 0, | |
"outputs": [] | |
}, | |
{ | |
"cell_type": "code", | |
"metadata": { | |
"id": "kqUXn83O1i0V", | |
"colab_type": "code", | |
"colab": { | |
"base_uri": "https://localhost:8080/", | |
"height": 52 | |
}, | |
"outputId": "5c7ed4a4-f86f-491b-dc28-794d11738843" | |
}, | |
"source": [ | |
"max_len = 250\n", | |
"num_classes = 3\n", | |
"\n", | |
"sequences_train = tokenizer1.texts_to_sequences(train_tweets)\n", | |
"sequences_test = tokenizer1.texts_to_sequences(test_tweets)\n", | |
"\n", | |
"X_train = sequence.pad_sequences(sequences_train, maxlen=max_len)\n", | |
"X_test = sequence.pad_sequences(sequences_test, maxlen=max_len)\n", | |
"\n", | |
"Y_train = np_utils.to_categorical(y_train, num_classes)\n", | |
"Y_test = np_utils.to_categorical(y_test, num_classes)\n", | |
"\n", | |
"print('X_train shape:', X_train.shape)\n", | |
"print('X_test shape:', X_test.shape)" | |
], | |
"execution_count": 28, | |
"outputs": [ | |
{ | |
"output_type": "stream", | |
"text": [ | |
"X_train shape: (15131, 250)\n", | |
"X_test shape: (1869, 250)\n" | |
], | |
"name": "stdout" | |
} | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"metadata": { | |
"id": "AA6THqVE3Elf", | |
"colab_type": "code", | |
"colab": {} | |
}, | |
"source": [ | |
"input_dim = X_train.shape[1]" | |
], | |
"execution_count": 0, | |
"outputs": [] | |
}, | |
{ | |
"cell_type": "code", | |
"metadata": { | |
"id": "nQJDXF6T22KB", | |
"colab_type": "code", | |
"colab": {} | |
}, | |
"source": [ | |
"input_dim = X_train.shape[1]\n", | |
"model1 = Sequential()\n", | |
"\n", | |
"model1.add(Dense(input_dim))\n", | |
"model1.add(Dropout(0.2))\n", | |
"model1.add(Activation('relu'))\n", | |
"model1.add(Dense(200))\n", | |
"model1.add(Dropout(0.2))\n", | |
"model1.add(Activation('tanh'))\n", | |
"model1.add(Dense(100))\n", | |
"model1.add(Dropout(0.2))\n", | |
"model1.add(Activation('sigmoid'))\n", | |
"model1.add(Dense(3))\n", | |
"model1.add(Activation('softmax'))\n", | |
"\n", | |
"adam = optimizers.Adam(lr=0.01, decay=1e-6)\n", | |
"\n", | |
"model1.compile(loss='binary_crossentropy', optimizer=adam, metrics=['accuracy'])" | |
], | |
"execution_count": 0, | |
"outputs": [] | |
}, | |
{ | |
"cell_type": "code", | |
"metadata": { | |
"id": "Cz5Qg-hI2rk_", | |
"colab_type": "code", | |
"colab": { | |
"base_uri": "https://localhost:8080/", | |
"height": 69 | |
}, | |
"outputId": "f121d048-6d4b-419d-82de-a50d24ea7cce" | |
}, | |
"source": [ | |
"model1.fit(X_train, Y_train, batch_size = 256, epochs=1)" | |
], | |
"execution_count": 34, | |
"outputs": [ | |
{ | |
"output_type": "stream", | |
"text": [ | |
"Epoch 1/1\n", | |
"15131/15131 [==============================] - 1s 70us/step - loss: 0.6535 - acc: 0.6507\n" | |
], | |
"name": "stdout" | |
}, | |
{ | |
"output_type": "execute_result", | |
"data": { | |
"text/plain": [ | |
"<keras.callbacks.History at 0x7f0f220a2c18>" | |
] | |
}, | |
"metadata": { | |
"tags": [] | |
}, | |
"execution_count": 34 | |
} | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"metadata": { | |
"id": "IUh9VqF13a8q", | |
"colab_type": "code", | |
"colab": { | |
"base_uri": "https://localhost:8080/", | |
"height": 34 | |
}, | |
"outputId": "81e6d0c1-ab8f-46ea-ae70-cc547f187fa5" | |
}, | |
"source": [ | |
"preds = model1.predict_classes(X_test, verbose=0)\n", | |
"np.sum(preds==y_test)/len(y_test)" | |
], | |
"execution_count": 35, | |
"outputs": [ | |
{ | |
"output_type": "execute_result", | |
"data": { | |
"text/plain": [ | |
"0.4002140181915463" | |
] | |
}, | |
"metadata": { | |
"tags": [] | |
}, | |
"execution_count": 35 | |
} | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"metadata": { | |
"id": "ILc7YMLjJsaF", | |
"colab_type": "code", | |
"colab": { | |
"base_uri": "https://localhost:8080/", | |
"height": 538 | |
}, | |
"outputId": "8d5c648b-16cf-4850-b3fd-875fa4a5c1e4" | |
}, | |
"source": [ | |
"model1.summary()" | |
], | |
"execution_count": 36, | |
"outputs": [ | |
{ | |
"output_type": "stream", | |
"text": [ | |
"Model: \"sequential_2\"\n", | |
"_________________________________________________________________\n", | |
"Layer (type) Output Shape Param # \n", | |
"=================================================================\n", | |
"dense_5 (Dense) (None, 250) 62750 \n", | |
"_________________________________________________________________\n", | |
"dropout_4 (Dropout) (None, 250) 0 \n", | |
"_________________________________________________________________\n", | |
"activation_5 (Activation) (None, 250) 0 \n", | |
"_________________________________________________________________\n", | |
"dense_6 (Dense) (None, 200) 50200 \n", | |
"_________________________________________________________________\n", | |
"dropout_5 (Dropout) (None, 200) 0 \n", | |
"_________________________________________________________________\n", | |
"activation_6 (Activation) (None, 200) 0 \n", | |
"_________________________________________________________________\n", | |
"dense_7 (Dense) (None, 100) 20100 \n", | |
"_________________________________________________________________\n", | |
"dropout_6 (Dropout) (None, 100) 0 \n", | |
"_________________________________________________________________\n", | |
"activation_7 (Activation) (None, 100) 0 \n", | |
"_________________________________________________________________\n", | |
"dense_8 (Dense) (None, 3) 303 \n", | |
"_________________________________________________________________\n", | |
"activation_8 (Activation) (None, 3) 0 \n", | |
"=================================================================\n", | |
"Total params: 133,353\n", | |
"Trainable params: 133,353\n", | |
"Non-trainable params: 0\n", | |
"_________________________________________________________________\n" | |
], | |
"name": "stdout" | |
} | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"metadata": { | |
"id": "8fDdsocU4zd4", | |
"colab_type": "code", | |
"colab": {} | |
}, | |
"source": [ | |
"tokenizers = {}\n", | |
"for v in ['eng', 'hin', 'o']:\n", | |
" tokenizers[v] = Tokenizer(num_words=max_features)\n", | |
" tokenizers[v].fit_on_texts(train_tweets_dict[v])" | |
], | |
"execution_count": 0, | |
"outputs": [] | |
}, | |
{ | |
"cell_type": "code", | |
"metadata": { | |
"id": "gp4HxqeK5NHm", | |
"colab_type": "code", | |
"colab": { | |
"base_uri": "https://localhost:8080/", | |
"height": 52 | |
}, | |
"outputId": "abb433b3-1990-439f-b712-331f6a101cd5" | |
}, | |
"source": [ | |
"sequences_train_dict, sequences_test_dict = {}, {}\n", | |
"X_train_dict, X_test_dict = {}, {}\n", | |
"for v in ['eng', 'hin', 'o']:\n", | |
" sequences_train_dict[v] = tokenizers[v].texts_to_sequences(train_tweets)\n", | |
" sequences_test_dict[v] = tokenizers[v].texts_to_sequences(test_tweets)\n", | |
"\n", | |
" X_train_dict[v] = sequence.pad_sequences(sequences_train_dict[v], maxlen=max_len)\n", | |
" X_test_dict[v] = sequence.pad_sequences(sequences_test_dict[v], maxlen=max_len)\n", | |
"\n", | |
"# Y_train = np_utils.to_categorical(y_train, num_classes)\n", | |
"# Y_test = np_utils.to_categorical(y_test, num_classes)\n", | |
"\n", | |
"X_train2 = np.hstack(tuple([X_train] + [X_train_dict[v] for v in ['eng', 'hin', 'o']]))\n", | |
"X_test2 = np.hstack(tuple([X_test] + [X_test_dict[v] for v in ['eng', 'hin', 'o']]))\n", | |
"\n", | |
"print('X_train2 shape:', X_train2.shape)\n", | |
"print('X_test2 shape:', X_test2.shape)" | |
], | |
"execution_count": 38, | |
"outputs": [ | |
{ | |
"output_type": "stream", | |
"text": [ | |
"X_train2 shape: (15131, 1000)\n", | |
"X_test2 shape: (1869, 1000)\n" | |
], | |
"name": "stdout" | |
} | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"metadata": { | |
"id": "q01Pe0BFXwXl", | |
"colab_type": "code", | |
"colab": {} | |
}, | |
"source": [ | |
"# sum(np.sum(emoji_train, axis=0) > 0)" | |
], | |
"execution_count": 0, | |
"outputs": [] | |
}, | |
{ | |
"cell_type": "code", | |
"metadata": { | |
"id": "w66AmyT36fNY", | |
"colab_type": "code", | |
"colab": {} | |
}, | |
"source": [ | |
"input_dim2 = X_train2.shape[1]\n", | |
"model2 = Sequential()\n", | |
"\n", | |
"model2.add(Dense(input_dim))\n", | |
"model2.add(Dropout(0.2))\n", | |
"model2.add(Activation('relu'))\n", | |
"model2.add(Dense(200))\n", | |
"model2.add(Dropout(0.2))\n", | |
"model2.add(Activation('tanh'))\n", | |
"model2.add(Dense(100))\n", | |
"model2.add(Dropout(0.2))\n", | |
"model2.add(Activation('sigmoid'))\n", | |
"model2.add(Dense(3))\n", | |
"model2.add(Activation('softmax'))\n", | |
"\n", | |
"adam = optimizers.Adam(lr=0.001, decay=1e-6)\n", | |
"\n", | |
"model2.compile(loss='binary_crossentropy', optimizer=adam, metrics=['accuracy'])" | |
], | |
"execution_count": 0, | |
"outputs": [] | |
}, | |
{ | |
"cell_type": "code", | |
"metadata": { | |
"id": "k__XRSio68V4", | |
"colab_type": "code", | |
"colab": { | |
"base_uri": "https://localhost:8080/", | |
"height": 69 | |
}, | |
"outputId": "ebcb5fc3-d773-4e93-f679-f244b84c78c4" | |
}, | |
"source": [ | |
"model2.fit(X_train2, Y_train, batch_size = 256, epochs=1)" | |
], | |
"execution_count": 42, | |
"outputs": [ | |
{ | |
"output_type": "stream", | |
"text": [ | |
"Epoch 1/1\n", | |
"15131/15131 [==============================] - 2s 113us/step - loss: 0.6664 - acc: 0.6436\n" | |
], | |
"name": "stdout" | |
}, | |
{ | |
"output_type": "execute_result", | |
"data": { | |
"text/plain": [ | |
"<keras.callbacks.History at 0x7f0f1a1f4cf8>" | |
] | |
}, | |
"metadata": { | |
"tags": [] | |
}, | |
"execution_count": 42 | |
} | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"metadata": { | |
"id": "sOXvqSjh7DO1", | |
"colab_type": "code", | |
"colab": { | |
"base_uri": "https://localhost:8080/", | |
"height": 34 | |
}, | |
"outputId": "199fef9a-949b-421c-bce9-45a689ee336d" | |
}, | |
"source": [ | |
"preds2 = model2.predict_classes(X_test2, verbose=0)\n", | |
"np.sum(preds2==y_test)/len(y_test)" | |
], | |
"execution_count": 43, | |
"outputs": [ | |
{ | |
"output_type": "execute_result", | |
"data": { | |
"text/plain": [ | |
"0.4071696094168004" | |
] | |
}, | |
"metadata": { | |
"tags": [] | |
}, | |
"execution_count": 43 | |
} | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"metadata": { | |
"id": "6pAujGiH7T0N", | |
"colab_type": "code", | |
"colab": { | |
"base_uri": "https://localhost:8080/", | |
"height": 34 | |
}, | |
"outputId": "82579038-25f0-459f-d89e-3ef10e10a1d0" | |
}, | |
"source": [ | |
"preds2[:10]" | |
], | |
"execution_count": 44, | |
"outputs": [ | |
{ | |
"output_type": "execute_result", | |
"data": { | |
"text/plain": [ | |
"array([1, 1, 1, 1, 1, 1, 1, 1, 1, 1])" | |
] | |
}, | |
"metadata": { | |
"tags": [] | |
}, | |
"execution_count": 44 | |
} | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"metadata": { | |
"id": "SmkqnBdkLMr6", | |
"colab_type": "code", | |
"colab": { | |
"base_uri": "https://localhost:8080/", | |
"height": 52 | |
}, | |
"outputId": "6077f179-a37a-4102-c20d-c72029482ebe" | |
}, | |
"source": [ | |
"X_train3 = X_train.copy()\n", | |
"X_test3 = X_test.copy()\n", | |
"\n", | |
"emoji_train = np.zeros((X_train.shape[0], 50))\n", | |
"emoji_test = np.zeros((X_test.shape[0], 50))\n", | |
"\n", | |
"i = 0\n", | |
"for sample in train_text.split('\\n\\n'):\n", | |
" \n", | |
" lines_sample = sample.split('\\n')\n", | |
" try:\n", | |
" tmp = lines_sample[0].split()[2]\n", | |
" tmp = lines_sample[0].split()[1]\n", | |
" except IndexError:\n", | |
" continue \n", | |
" \n", | |
" for line in lines_sample[1:]:\n", | |
" t = line.split('\\t')\n", | |
" for ch in t[0]:\n", | |
" if ch in emojis:\n", | |
" emoji_train[i][emoji_dict[ch]] += 1\n", | |
" i += 1\n", | |
"\n", | |
"i = 0\n", | |
"for sample in test_text.split('\\n\\n'):\n", | |
" \n", | |
" lines_sample = sample.split('\\n')\n", | |
" try:\n", | |
" tmp = lines_sample[0].split()[2]\n", | |
" tmp = lines_sample[0].split()[1]\n", | |
" except IndexError:\n", | |
" continue\n", | |
"\n", | |
" for line in lines_sample[1:]:\n", | |
" t = line.split('\\t')\n", | |
" for ch in t[0]:\n", | |
" if ch in emojis:\n", | |
" emoji_test[i][emoji_dict[ch]] += 1\n", | |
" i += 1\n", | |
"\n", | |
"X_train3 = np.hstack((X_train, emoji_train))\n", | |
"X_test3 = np.hstack((X_test, emoji_test))\n", | |
"\n", | |
"print('X_train3 shape:', X_train3.shape)\n", | |
"print('X_test3 shape:', X_test3.shape)" | |
], | |
"execution_count": 45, | |
"outputs": [ | |
{ | |
"output_type": "stream", | |
"text": [ | |
"X_train3 shape: (15131, 300)\n", | |
"X_test3 shape: (1869, 300)\n" | |
], | |
"name": "stdout" | |
} | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"metadata": { | |
"id": "vH7e1na5aZT4", | |
"colab_type": "code", | |
"colab": { | |
"base_uri": "https://localhost:8080/", | |
"height": 34 | |
}, | |
"outputId": "7dfc1f1e-31aa-4644-8640-ada6a9a61b1d" | |
}, | |
"source": [ | |
"sum(np.sum(emoji_train, axis=1)>0)" | |
], | |
"execution_count": 46, | |
"outputs": [ | |
{ | |
"output_type": "execute_result", | |
"data": { | |
"text/plain": [ | |
"2112" | |
] | |
}, | |
"metadata": { | |
"tags": [] | |
}, | |
"execution_count": 46 | |
} | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"metadata": { | |
"id": "K7X1G-3VOwnT", | |
"colab_type": "code", | |
"colab": {} | |
}, | |
"source": [ | |
"input_dim3 = X_train3.shape[1]\n", | |
"model3 = Sequential()\n", | |
"\n", | |
"model3.add(Dense(input_dim))\n", | |
"model3.add(Dropout(0.2))\n", | |
"model3.add(Activation('relu'))\n", | |
"model3.add(Dense(200))\n", | |
"model3.add(Dropout(0.2))\n", | |
"model3.add(Activation('tanh'))\n", | |
"model3.add(Dense(100))\n", | |
"model3.add(Dropout(0.2))\n", | |
"model3.add(Activation('sigmoid'))\n", | |
"model3.add(Dense(3))\n", | |
"model3.add(Activation('softmax'))\n", | |
"\n", | |
"adam = optimizers.Adam(lr=0.001, decay=1e-6)\n", | |
"\n", | |
"model3.compile(loss='binary_crossentropy', optimizer=adam, metrics=['accuracy'])" | |
], | |
"execution_count": 0, | |
"outputs": [] | |
}, | |
{ | |
"cell_type": "code", | |
"metadata": { | |
"id": "sXYYCFNDPCRI", | |
"colab_type": "code", | |
"colab": { | |
"base_uri": "https://localhost:8080/", | |
"height": 173 | |
}, | |
"outputId": "fe744d46-436c-4319-ae93-90adffb4572a" | |
}, | |
"source": [ | |
"model3.fit(X_train3, Y_train, batch_size = 512, epochs=4)" | |
], | |
"execution_count": 60, | |
"outputs": [ | |
{ | |
"output_type": "stream", | |
"text": [ | |
"Epoch 1/4\n", | |
"15131/15131 [==============================] - 2s 115us/step - loss: 0.7066 - acc: 0.6207\n", | |
"Epoch 2/4\n", | |
"15131/15131 [==============================] - 1s 38us/step - loss: 0.6396 - acc: 0.6603\n", | |
"Epoch 3/4\n", | |
"15131/15131 [==============================] - 1s 38us/step - loss: 0.6359 - acc: 0.6626\n", | |
"Epoch 4/4\n", | |
"15131/15131 [==============================] - 1s 38us/step - loss: 0.6343 - acc: 0.6631\n" | |
], | |
"name": "stdout" | |
}, | |
{ | |
"output_type": "execute_result", | |
"data": { | |
"text/plain": [ | |
"<keras.callbacks.History at 0x7f0f19749be0>" | |
] | |
}, | |
"metadata": { | |
"tags": [] | |
}, | |
"execution_count": 60 | |
} | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"metadata": { | |
"id": "ETjxCbb8PHMl", | |
"colab_type": "code", | |
"colab": { | |
"base_uri": "https://localhost:8080/", | |
"height": 34 | |
}, | |
"outputId": "bc8196c9-d391-4208-ac33-1e8aaf325677" | |
}, | |
"source": [ | |
"preds3 = model3.predict_classes(X_test3, verbose=0)\n", | |
"np.sum(preds3==y_test)/len(y_test)" | |
], | |
"execution_count": 61, | |
"outputs": [ | |
{ | |
"output_type": "execute_result", | |
"data": { | |
"text/plain": [ | |
"0.32691278758694486" | |
] | |
}, | |
"metadata": { | |
"tags": [] | |
}, | |
"execution_count": 61 | |
} | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"metadata": { | |
"id": "FGYJbkjcYtSn", | |
"colab_type": "code", | |
"colab": { | |
"base_uri": "https://localhost:8080/", | |
"height": 34 | |
}, | |
"outputId": "ede4745b-1786-47fd-b332-b33b39553ca7" | |
}, | |
"source": [ | |
"sum(sum(emoji_train))" | |
], | |
"execution_count": 62, | |
"outputs": [ | |
{ | |
"output_type": "execute_result", | |
"data": { | |
"text/plain": [ | |
"5102.0" | |
] | |
}, | |
"metadata": { | |
"tags": [] | |
}, | |
"execution_count": 62 | |
} | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"metadata": { | |
"id": "8lGOcNgnPNKM", | |
"colab_type": "code", | |
"colab": { | |
"base_uri": "https://localhost:8080/", | |
"height": 34 | |
}, | |
"outputId": "b799dff6-1721-418d-b43a-017a0c0e269c" | |
}, | |
"source": [ | |
"X_train.shape" | |
], | |
"execution_count": 149, | |
"outputs": [ | |
{ | |
"output_type": "execute_result", | |
"data": { | |
"text/plain": [ | |
"(15131, 250)" | |
] | |
}, | |
"metadata": { | |
"tags": [] | |
}, | |
"execution_count": 149 | |
} | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"metadata": { | |
"id": "avVAaVEX3l1I", | |
"colab_type": "code", | |
"colab": {} | |
}, | |
"source": [ | |
"from sklearn.tree import DecisionTreeClassifier" | |
], | |
"execution_count": 0, | |
"outputs": [] | |
}, | |
{ | |
"cell_type": "code", | |
"metadata": { | |
"id": "oBhhwjWO3pvX", | |
"colab_type": "code", | |
"colab": { | |
"base_uri": "https://localhost:8080/", | |
"height": 121 | |
}, | |
"outputId": "1f996a56-f2af-4c3f-8f73-ff032c845a49" | |
}, | |
"source": [ | |
"dtree1 = DecisionTreeClassifier()\n", | |
"dtree1.fit(X_train, Y_train)" | |
], | |
"execution_count": 65, | |
"outputs": [ | |
{ | |
"output_type": "execute_result", | |
"data": { | |
"text/plain": [ | |
"DecisionTreeClassifier(class_weight=None, criterion='gini', max_depth=None,\n", | |
" max_features=None, max_leaf_nodes=None,\n", | |
" min_impurity_decrease=0.0, min_impurity_split=None,\n", | |
" min_samples_leaf=1, min_samples_split=2,\n", | |
" min_weight_fraction_leaf=0.0, presort=False,\n", | |
" random_state=None, splitter='best')" | |
] | |
}, | |
"metadata": { | |
"tags": [] | |
}, | |
"execution_count": 65 | |
} | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"metadata": { | |
"id": "l5rwDIWGKi7T", | |
"colab_type": "code", | |
"colab": { | |
"base_uri": "https://localhost:8080/", | |
"height": 34 | |
}, | |
"outputId": "3a3a69e5-afea-456d-f21e-4d02da6c0a9f" | |
}, | |
"source": [ | |
"predsd1 = dtree1.predict(X_test)\n", | |
"predsd1 = np.argmax(predsd1, axis=1)\n", | |
"np.sum(predsd1==y_test)/len(y_test)" | |
], | |
"execution_count": 66, | |
"outputs": [ | |
{ | |
"output_type": "execute_result", | |
"data": { | |
"text/plain": [ | |
"0.3911182450508293" | |
] | |
}, | |
"metadata": { | |
"tags": [] | |
}, | |
"execution_count": 66 | |
} | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"metadata": { | |
"id": "T2eJDbzBK9fq", | |
"colab_type": "code", | |
"colab": { | |
"base_uri": "https://localhost:8080/", | |
"height": 121 | |
}, | |
"outputId": "3e561970-f213-4824-87a6-a23fab886df6" | |
}, | |
"source": [ | |
"dtree2 = DecisionTreeClassifier()\n", | |
"dtree2.fit(X_train2, Y_train)" | |
], | |
"execution_count": 67, | |
"outputs": [ | |
{ | |
"output_type": "execute_result", | |
"data": { | |
"text/plain": [ | |
"DecisionTreeClassifier(class_weight=None, criterion='gini', max_depth=None,\n", | |
" max_features=None, max_leaf_nodes=None,\n", | |
" min_impurity_decrease=0.0, min_impurity_split=None,\n", | |
" min_samples_leaf=1, min_samples_split=2,\n", | |
" min_weight_fraction_leaf=0.0, presort=False,\n", | |
" random_state=None, splitter='best')" | |
] | |
}, | |
"metadata": { | |
"tags": [] | |
}, | |
"execution_count": 67 | |
} | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"metadata": { | |
"id": "2IPi6lqNK8Ct", | |
"colab_type": "code", | |
"colab": { | |
"base_uri": "https://localhost:8080/", | |
"height": 34 | |
}, | |
"outputId": "c63065ce-d635-4d59-a87e-605dd94c86c8" | |
}, | |
"source": [ | |
"predsd2 = dtree2.predict(X_test2)\n", | |
"predsd2 = np.argmax(predsd2, axis=1)\n", | |
"np.sum(predsd2==y_test)/len(y_test)" | |
], | |
"execution_count": 68, | |
"outputs": [ | |
{ | |
"output_type": "execute_result", | |
"data": { | |
"text/plain": [ | |
"0.38095238095238093" | |
] | |
}, | |
"metadata": { | |
"tags": [] | |
}, | |
"execution_count": 68 | |
} | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"metadata": { | |
"id": "CKjk91n8P0Ju", | |
"colab_type": "code", | |
"colab": { | |
"base_uri": "https://localhost:8080/", | |
"height": 121 | |
}, | |
"outputId": "963755c1-6807-4845-9db5-b43eb9f1a1d1" | |
}, | |
"source": [ | |
"dtree3 = DecisionTreeClassifier()\n", | |
"dtree3.fit(X_train3, Y_train)" | |
], | |
"execution_count": 69, | |
"outputs": [ | |
{ | |
"output_type": "execute_result", | |
"data": { | |
"text/plain": [ | |
"DecisionTreeClassifier(class_weight=None, criterion='gini', max_depth=None,\n", | |
" max_features=None, max_leaf_nodes=None,\n", | |
" min_impurity_decrease=0.0, min_impurity_split=None,\n", | |
" min_samples_leaf=1, min_samples_split=2,\n", | |
" min_weight_fraction_leaf=0.0, presort=False,\n", | |
" random_state=None, splitter='best')" | |
] | |
}, | |
"metadata": { | |
"tags": [] | |
}, | |
"execution_count": 69 | |
} | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"metadata": { | |
"id": "Q7R4-ZYoP2_L", | |
"colab_type": "code", | |
"colab": { | |
"base_uri": "https://localhost:8080/", | |
"height": 34 | |
}, | |
"outputId": "c7aa9058-d994-4adb-c4f2-1303c84effda" | |
}, | |
"source": [ | |
"predsd3 = dtree3.predict(X_test3)\n", | |
"predsd3 = np.argmax(predsd3, axis=1)\n", | |
"np.sum(predsd3==y_test)/len(y_test)" | |
], | |
"execution_count": 70, | |
"outputs": [ | |
{ | |
"output_type": "execute_result", | |
"data": { | |
"text/plain": [ | |
"0.39058319957196364" | |
] | |
}, | |
"metadata": { | |
"tags": [] | |
}, | |
"execution_count": 70 | |
} | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"metadata": { | |
"id": "f8grLdKPQRGN", | |
"colab_type": "code", | |
"colab": { | |
"base_uri": "https://localhost:8080/", | |
"height": 34 | |
}, | |
"outputId": "876bd815-1b44-443d-c8fb-00440ae0cc5d" | |
}, | |
"source": [ | |
"a = X_test3[(np.sum(emoji_test, axis=1)>0)]\n", | |
"p = y_test[(np.sum(emoji_test, axis=1)>0)]\n", | |
"predsd4 = dtree3.predict(a)\n", | |
"predsd4 = np.argmax(predsd4, axis=1)\n", | |
"np.sum(predsd4==p)/len(p)" | |
], | |
"execution_count": 78, | |
"outputs": [ | |
{ | |
"output_type": "execute_result", | |
"data": { | |
"text/plain": [ | |
"0.4262295081967213" | |
] | |
}, | |
"metadata": { | |
"tags": [] | |
}, | |
"execution_count": 78 | |
} | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"metadata": { | |
"id": "4A1riiirhQ5p", | |
"colab_type": "code", | |
"colab": {} | |
}, | |
"source": [ | |
"from keras.layers.normalization import BatchNormalization\n", | |
"from keras.layers import SpatialDropout1D\n", | |
"from keras.models import Model\n", | |
"from keras.layers import Input,Flatten, Dense, Embedding, RNN, Conv1D, BatchNormalization, MaxPooling1D, Activation, Dropout, concatenate, Lambda\n", | |
"from keras import optimizers\n", | |
"from keras.layers.convolutional import Convolution1D\n", | |
"from keras import backend as K" | |
], | |
"execution_count": 0, | |
"outputs": [] | |
}, | |
{ | |
"cell_type": "code", | |
"metadata": { | |
"id": "gqCJcTt9hEoD", | |
"colab_type": "code", | |
"colab": { | |
"base_uri": "https://localhost:8080/", | |
"height": 72 | |
}, | |
"outputId": "8b529ddd-651f-4f8e-d8c6-f3e96cee1811" | |
}, | |
"source": [ | |
"nb_filter = 300\n", | |
"filter_length = 3\n", | |
"hidden_dims = 300 # 250\n", | |
"nb_epoch = 2\n", | |
"\n", | |
"\n", | |
"cmodel1 = Sequential()\n", | |
"cmodel1.add(Embedding(max_features, 300))\n", | |
"cmodel1.add(SpatialDropout1D(0.2))\n", | |
"# we add a Convolution1D, which will learn nb_filter\n", | |
"# word group filters of size filter_length:\n", | |
"cmodel1.add(Convolution1D(nb_filter=nb_filter,\n", | |
" filter_length=filter_length,\n", | |
" border_mode='valid',\n", | |
" activation='tanh',\n", | |
" subsample_length=1))\n", | |
"\n", | |
"#cmodel1.add(BatchNormalization())\n", | |
"from keras import optimizers\n", | |
"def max_1d(X):\n", | |
" return K.max(X, axis=1)\n", | |
"\n", | |
"cmodel1.add(Lambda(max_1d, output_shape=(nb_filter,)))\n", | |
"cmodel1.add(Dense(hidden_dims))\n", | |
"cmodel1.add(Dropout(0.2))\n", | |
"cmodel1.add(Activation('relu'))\n", | |
"cmodel1.add(Dense(num_classes))\n", | |
"cmodel1.add(Activation('sigmoid'))\n", | |
"adam = optimizers.Adam(lr=0.001, decay=1e-6)\n", | |
"cmodel1.compile(loss='binary_crossentropy',\n", | |
" optimizer=adam,\n", | |
" metrics=['accuracy'])" | |
], | |
"execution_count": 106, | |
"outputs": [ | |
{ | |
"output_type": "stream", | |
"text": [ | |
"/usr/local/lib/python3.6/dist-packages/ipykernel_launcher.py:16: UserWarning: Update your `Conv1D` call to the Keras 2 API: `Conv1D(activation=\"tanh\", filters=300, kernel_size=3, strides=1, padding=\"valid\")`\n", | |
" app.launch_new_instance()\n" | |
], | |
"name": "stderr" | |
} | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"metadata": { | |
"id": "5SIgsZezhsJv", | |
"colab_type": "code", | |
"colab": { | |
"base_uri": "https://localhost:8080/", | |
"height": 69 | |
}, | |
"outputId": "d31955e9-b6dd-459f-a3ec-51f9fbd2f776" | |
}, | |
"source": [ | |
"cmodel1.fit(X_train3, Y_train, epochs = 1)" | |
], | |
"execution_count": 107, | |
"outputs": [ | |
{ | |
"output_type": "stream", | |
"text": [ | |
"Epoch 1/1\n", | |
"15131/15131 [==============================] - 237s 16ms/step - loss: 0.5342 - acc: 0.7211\n" | |
], | |
"name": "stdout" | |
}, | |
{ | |
"output_type": "execute_result", | |
"data": { | |
"text/plain": [ | |
"<keras.callbacks.History at 0x7f0f16bf6748>" | |
] | |
}, | |
"metadata": { | |
"tags": [] | |
}, | |
"execution_count": 107 | |
} | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"metadata": { | |
"id": "bPB2nA7wixq8", | |
"colab_type": "code", | |
"colab": { | |
"base_uri": "https://localhost:8080/", | |
"height": 34 | |
}, | |
"outputId": "ae705a5a-4024-42d4-90b1-1460caaa1ea2" | |
}, | |
"source": [ | |
"predsc1 = cmodel1.predict_classes(X_test3, verbose=0)\n", | |
"np.sum(predsc1==y_test)/len(y_test)" | |
], | |
"execution_count": 109, | |
"outputs": [ | |
{ | |
"output_type": "execute_result", | |
"data": { | |
"text/plain": [ | |
"0.565008025682183" | |
] | |
}, | |
"metadata": { | |
"tags": [] | |
}, | |
"execution_count": 109 | |
} | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"metadata": { | |
"id": "cFj06lng5S12", | |
"colab_type": "code", | |
"colab": { | |
"base_uri": "https://localhost:8080/", | |
"height": 34 | |
}, | |
"outputId": "f9b8a66d-bb98-47a2-dd14-e02571cd691b" | |
}, | |
"source": [ | |
"prf(y_test, predsc1, average='micro')" | |
], | |
"execution_count": 167, | |
"outputs": [ | |
{ | |
"output_type": "execute_result", | |
"data": { | |
"text/plain": [ | |
"(0.565008025682183, 0.565008025682183, 0.565008025682183, None)" | |
] | |
}, | |
"metadata": { | |
"tags": [] | |
}, | |
"execution_count": 167 | |
} | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"metadata": { | |
"id": "OV8yft9S5n2p", | |
"colab_type": "code", | |
"colab": { | |
"base_uri": "https://localhost:8080/", | |
"height": 34 | |
}, | |
"outputId": "8463406e-a582-4ac1-d737-62a6a45dee8f" | |
}, | |
"source": [ | |
"prf(y_test, predsc1, average='macro')" | |
], | |
"execution_count": 168, | |
"outputs": [ | |
{ | |
"output_type": "execute_result", | |
"data": { | |
"text/plain": [ | |
"(0.5659138903963613, 0.5840501910447199, 0.5662953882918141, None)" | |
] | |
}, | |
"metadata": { | |
"tags": [] | |
}, | |
"execution_count": 168 | |
} | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"metadata": { | |
"id": "LFglzGiw5rVT", | |
"colab_type": "code", | |
"colab": { | |
"base_uri": "https://localhost:8080/", | |
"height": 34 | |
}, | |
"outputId": "d8262ee5-fb84-4959-a23d-3245894fa599" | |
}, | |
"source": [ | |
"prf(y_test, predsc1, average='weighted')" | |
], | |
"execution_count": 169, | |
"outputs": [ | |
{ | |
"output_type": "execute_result", | |
"data": { | |
"text/plain": [ | |
"(0.5675574871168725, 0.565008025682183, 0.5569762553083624, None)" | |
] | |
}, | |
"metadata": { | |
"tags": [] | |
}, | |
"execution_count": 169 | |
} | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"metadata": { | |
"id": "iKXK20SYjOZj", | |
"colab_type": "code", | |
"colab": { | |
"base_uri": "https://localhost:8080/", | |
"height": 469 | |
}, | |
"outputId": "df05a7c1-d2ac-45a9-bec8-44a0b7673c43" | |
}, | |
"source": [ | |
"cmodel1.summary()" | |
], | |
"execution_count": 108, | |
"outputs": [ | |
{ | |
"output_type": "stream", | |
"text": [ | |
"Model: \"sequential_20\"\n", | |
"_________________________________________________________________\n", | |
"Layer (type) Output Shape Param # \n", | |
"=================================================================\n", | |
"embedding_12 (Embedding) (None, None, 300) 6000000 \n", | |
"_________________________________________________________________\n", | |
"spatial_dropout1d_11 (Spatia (None, None, 300) 0 \n", | |
"_________________________________________________________________\n", | |
"conv1d_9 (Conv1D) (None, None, 300) 270300 \n", | |
"_________________________________________________________________\n", | |
"lambda_9 (Lambda) (None, 300) 0 \n", | |
"_________________________________________________________________\n", | |
"dense_46 (Dense) (None, 300) 90300 \n", | |
"_________________________________________________________________\n", | |
"dropout_32 (Dropout) (None, 300) 0 \n", | |
"_________________________________________________________________\n", | |
"activation_46 (Activation) (None, 300) 0 \n", | |
"_________________________________________________________________\n", | |
"dense_47 (Dense) (None, 3) 903 \n", | |
"_________________________________________________________________\n", | |
"activation_47 (Activation) (None, 3) 0 \n", | |
"=================================================================\n", | |
"Total params: 6,361,503\n", | |
"Trainable params: 6,361,503\n", | |
"Non-trainable params: 0\n", | |
"_________________________________________________________________\n" | |
], | |
"name": "stdout" | |
} | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"metadata": { | |
"id": "NWkJByaVs_ow", | |
"colab_type": "code", | |
"colab": { | |
"base_uri": "https://localhost:8080/", | |
"height": 72 | |
}, | |
"outputId": "351f49b0-1849-47cc-ba7f-c09ab01aef60" | |
}, | |
"source": [ | |
"nb_filter = 300\n", | |
"filter_length = 3\n", | |
"hidden_dims = 300 # 250\n", | |
"nb_epoch = 2\n", | |
"\n", | |
"\n", | |
"cmodel1a = Sequential()\n", | |
"cmodel1a.add(Embedding(max_features, 300))\n", | |
"cmodel1a.add(SpatialDropout1D(0.2))\n", | |
"# we add a Convolution1D, which will learn nb_filter\n", | |
"# word group filters of size filter_length:\n", | |
"cmodel1a.add(Convolution1D(nb_filter=nb_filter,\n", | |
" filter_length=filter_length,\n", | |
" border_mode='valid',\n", | |
" activation='tanh',\n", | |
" subsample_length=1))\n", | |
"\n", | |
"cmodel1a.add(Lambda(max_1d, output_shape=(nb_filter,)))\n", | |
"cmodel1a.add(Dense(hidden_dims))\n", | |
"cmodel1a.add(Dropout(0.2))\n", | |
"cmodel1a.add(Activation('relu'))\n", | |
"cmodel1a.add(Dense(num_classes))\n", | |
"cmodel1a.add(Activation('sigmoid'))\n", | |
"adam = optimizers.Adam(lr=0.001, decay=1e-6)\n", | |
"cmodel1a.compile(loss='binary_crossentropy',\n", | |
" optimizer=adam,\n", | |
" metrics=['accuracy'])" | |
], | |
"execution_count": 145, | |
"outputs": [ | |
{ | |
"output_type": "stream", | |
"text": [ | |
"/usr/local/lib/python3.6/dist-packages/ipykernel_launcher.py:16: UserWarning: Update your `Conv1D` call to the Keras 2 API: `Conv1D(activation=\"tanh\", filters=300, kernel_size=3, strides=1, padding=\"valid\")`\n", | |
" app.launch_new_instance()\n" | |
], | |
"name": "stderr" | |
} | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"metadata": { | |
"id": "OgoK_J2cjtic", | |
"colab_type": "code", | |
"colab": { | |
"base_uri": "https://localhost:8080/", | |
"height": 104 | |
}, | |
"outputId": "7c41bb80-bfb6-4548-f280-7254855740a8" | |
}, | |
"source": [ | |
"cmodel1a.fit(X_train3, Y_train, epochs = 2, batch_size=256)" | |
], | |
"execution_count": 146, | |
"outputs": [ | |
{ | |
"output_type": "stream", | |
"text": [ | |
"Epoch 1/2\n", | |
"15131/15131 [==============================] - 173s 11ms/step - loss: 0.5879 - acc: 0.6856\n", | |
"Epoch 2/2\n", | |
"15131/15131 [==============================] - 168s 11ms/step - loss: 0.4583 - acc: 0.7792\n" | |
], | |
"name": "stdout" | |
}, | |
{ | |
"output_type": "execute_result", | |
"data": { | |
"text/plain": [ | |
"<keras.callbacks.History at 0x7f0f1368c940>" | |
] | |
}, | |
"metadata": { | |
"tags": [] | |
}, | |
"execution_count": 146 | |
} | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"metadata": { | |
"id": "SgnkbkcPtJHB", | |
"colab_type": "code", | |
"colab": { | |
"base_uri": "https://localhost:8080/", | |
"height": 34 | |
}, | |
"outputId": "d4ae2a11-387e-4178-fe2a-a2df44faa7c5" | |
}, | |
"source": [ | |
"predsc1a = cmodel1a.predict_classes(X_test3, verbose=0)\n", | |
"np.sum(predsc1a==y_test)/len(y_test)" | |
], | |
"execution_count": 147, | |
"outputs": [ | |
{ | |
"output_type": "execute_result", | |
"data": { | |
"text/plain": [ | |
"0.5644729802033173" | |
] | |
}, | |
"metadata": { | |
"tags": [] | |
}, | |
"execution_count": 147 | |
} | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"metadata": { | |
"id": "VcAy5qNE207S", | |
"colab_type": "code", | |
"colab": {} | |
}, | |
"source": [ | |
"from sklearn.metrics import precision_recall_fscore_support as prf" | |
], | |
"execution_count": 0, | |
"outputs": [] | |
}, | |
{ | |
"cell_type": "code", | |
"metadata": { | |
"id": "mvjiLm4z3CDa", | |
"colab_type": "code", | |
"colab": { | |
"base_uri": "https://localhost:8080/", | |
"height": 34 | |
}, | |
"outputId": "2e407171-f2e3-436c-9f5f-c873a290962e" | |
}, | |
"source": [ | |
"prf(y_test, predsc1a, average='micro')" | |
], | |
"execution_count": 163, | |
"outputs": [ | |
{ | |
"output_type": "execute_result", | |
"data": { | |
"text/plain": [ | |
"(0.5644729802033173, 0.5644729802033173, 0.5644729802033173, None)" | |
] | |
}, | |
"metadata": { | |
"tags": [] | |
}, | |
"execution_count": 163 | |
} | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"metadata": { | |
"id": "RRe5rJKX4IPQ", | |
"colab_type": "code", | |
"colab": { | |
"base_uri": "https://localhost:8080/", | |
"height": 34 | |
}, | |
"outputId": "a4045c61-c53d-41f5-a146-1f37c97a9743" | |
}, | |
"source": [ | |
"prf(y_test, predsc1a, average='macro')" | |
], | |
"execution_count": 164, | |
"outputs": [ | |
{ | |
"output_type": "execute_result", | |
"data": { | |
"text/plain": [ | |
"(0.5708475748562786, 0.563563693418115, 0.5666094135651139, None)" | |
] | |
}, | |
"metadata": { | |
"tags": [] | |
}, | |
"execution_count": 164 | |
} | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"metadata": { | |
"id": "KUOgZQBe4NQK", | |
"colab_type": "code", | |
"colab": { | |
"base_uri": "https://localhost:8080/", | |
"height": 34 | |
}, | |
"outputId": "e9451770-bfdb-45e0-bf94-8b7973afbf0f" | |
}, | |
"source": [ | |
"prf(y_test, predsc1a, average='weighted')" | |
], | |
"execution_count": 166, | |
"outputs": [ | |
{ | |
"output_type": "execute_result", | |
"data": { | |
"text/plain": [ | |
"(0.5663970904202026, 0.5644729802033173, 0.5648397339912556, None)" | |
] | |
}, | |
"metadata": { | |
"tags": [] | |
}, | |
"execution_count": 166 | |
} | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"metadata": { | |
"colab_type": "code", | |
"outputId": "b8acaefe-008c-4714-9919-28d2f304d49a", | |
"id": "anNM5TySjt77", | |
"colab": { | |
"base_uri": "https://localhost:8080/", | |
"height": 72 | |
} | |
}, | |
"source": [ | |
"cmodel2 = Sequential()\n", | |
"cmodel2.add(Embedding(max_features, 500))\n", | |
"cmodel2.add(SpatialDropout1D(0.2))\n", | |
"# we add a Convolution1D, which will learn nb_filter\n", | |
"# word group filters of size filter_length:\n", | |
"cmodel2.add(Convolution1D(nb_filter=nb_filter,\n", | |
" filter_length=filter_length,\n", | |
" border_mode='valid',\n", | |
" activation='tanh',\n", | |
" subsample_length=1))\n", | |
"\n", | |
"cmodel2.add(Lambda(max_1d, output_shape=(nb_filter,)))\n", | |
"cmodel2.add(Dense(hidden_dims))\n", | |
"cmodel2.add(Dropout(0.2))\n", | |
"cmodel2.add(Activation('relu'))\n", | |
"cmodel2.add(Dense(num_classes))\n", | |
"cmodel2.add(Activation('sigmoid'))\n", | |
"adam = optimizers.Adam(lr=0.001, decay=1e-6)\n", | |
"cmodel2.compile(loss='binary_crossentropy',\n", | |
" optimizer=adam,\n", | |
" metrics=['accuracy'])" | |
], | |
"execution_count": 132, | |
"outputs": [ | |
{ | |
"output_type": "stream", | |
"text": [ | |
"/usr/local/lib/python3.6/dist-packages/ipykernel_launcher.py:10: UserWarning: Update your `Conv1D` call to the Keras 2 API: `Conv1D(activation=\"tanh\", filters=300, kernel_size=3, strides=1, padding=\"valid\")`\n", | |
" # Remove the CWD from sys.path while we load stuff.\n" | |
], | |
"name": "stderr" | |
} | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"metadata": { | |
"id": "L3s7q3rqj50A", | |
"colab_type": "code", | |
"colab": { | |
"base_uri": "https://localhost:8080/", | |
"height": 69 | |
}, | |
"outputId": "0ff1d71c-e282-4cc7-b846-bacfae25dbb1" | |
}, | |
"source": [ | |
"cmodel2.fit(X_train2, Y_train, epochs = 1)" | |
], | |
"execution_count": 133, | |
"outputs": [ | |
{ | |
"output_type": "stream", | |
"text": [ | |
"Epoch 1/1\n", | |
"15131/15131 [==============================] - 986s 65ms/step - loss: 0.5525 - acc: 0.7080\n" | |
], | |
"name": "stdout" | |
}, | |
{ | |
"output_type": "execute_result", | |
"data": { | |
"text/plain": [ | |
"<keras.callbacks.History at 0x7f0f13d670b8>" | |
] | |
}, | |
"metadata": { | |
"tags": [] | |
}, | |
"execution_count": 133 | |
} | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"metadata": { | |
"id": "oB7-3ZwTkGBE", | |
"colab_type": "code", | |
"colab": { | |
"base_uri": "https://localhost:8080/", | |
"height": 34 | |
}, | |
"outputId": "5d6905c3-5f8b-4d1d-e381-632aaee0069e" | |
}, | |
"source": [ | |
"predsc2 = cmodel2.predict_classes(X_test3, verbose=0)\n", | |
"np.sum(predsc2==y_test)/len(y_test)" | |
], | |
"execution_count": 134, | |
"outputs": [ | |
{ | |
"output_type": "execute_result", | |
"data": { | |
"text/plain": [ | |
"0.46441947565543074" | |
] | |
}, | |
"metadata": { | |
"tags": [] | |
}, | |
"execution_count": 134 | |
} | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"metadata": { | |
"id": "ch95QsenkLAY", | |
"colab_type": "code", | |
"colab": { | |
"base_uri": "https://localhost:8080/", | |
"height": 469 | |
}, | |
"outputId": "f235abbe-3114-4524-8194-cb9cdd49259b" | |
}, | |
"source": [ | |
"cmodel2.summary()" | |
], | |
"execution_count": 117, | |
"outputs": [ | |
{ | |
"output_type": "stream", | |
"text": [ | |
"Model: \"sequential_22\"\n", | |
"_________________________________________________________________\n", | |
"Layer (type) Output Shape Param # \n", | |
"=================================================================\n", | |
"embedding_14 (Embedding) (None, None, 500) 10000000 \n", | |
"_________________________________________________________________\n", | |
"spatial_dropout1d_13 (Spatia (None, None, 500) 0 \n", | |
"_________________________________________________________________\n", | |
"conv1d_11 (Conv1D) (None, None, 300) 450300 \n", | |
"_________________________________________________________________\n", | |
"lambda_11 (Lambda) (None, 300) 0 \n", | |
"_________________________________________________________________\n", | |
"dense_50 (Dense) (None, 300) 90300 \n", | |
"_________________________________________________________________\n", | |
"dropout_34 (Dropout) (None, 300) 0 \n", | |
"_________________________________________________________________\n", | |
"activation_50 (Activation) (None, 300) 0 \n", | |
"_________________________________________________________________\n", | |
"dense_51 (Dense) (None, 3) 903 \n", | |
"_________________________________________________________________\n", | |
"activation_51 (Activation) (None, 3) 0 \n", | |
"=================================================================\n", | |
"Total params: 10,541,503\n", | |
"Trainable params: 10,541,503\n", | |
"Non-trainable params: 0\n", | |
"_________________________________________________________________\n" | |
], | |
"name": "stdout" | |
} | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"metadata": { | |
"id": "cItt6ON9TKMF", | |
"colab_type": "code", | |
"colab": {} | |
}, | |
"source": [ | |
"!pip install emoji\n", | |
"import emoji" | |
], | |
"execution_count": 0, | |
"outputs": [] | |
}, | |
{ | |
"cell_type": "code", | |
"metadata": { | |
"id": "F8jVacTFkbu2", | |
"colab_type": "code", | |
"colab": {} | |
}, | |
"source": [ | |
"new_train_data = []\n", | |
"\n", | |
"for sample in train_text.split('\\n\\n'):\n", | |
" \n", | |
" lines_sample = sample.split('\\n')\n", | |
" try:\n", | |
" tmp = (lines_sample[0].split()[2])\n", | |
" tmp = (lines_sample[0].split()[1])\n", | |
" except IndexError:\n", | |
" continue\n", | |
" temp = []\n", | |
" \n", | |
" for line in lines_sample[1:]:\n", | |
" t = line.split('\\t')\n", | |
"\n", | |
" if t[1] != 'O':\n", | |
" t[0]=re.sub('[\\W_]+', '', t[0])\n", | |
" new = ''\n", | |
" for ch in t[0]:\n", | |
" if ch in emojis:\n", | |
" new += ' ' + emoji.demojize(ch) + ' '\n", | |
" if t[1] == 'Eng' and t[0] in stopwords_en and t[0] not in exclude:\n", | |
" continue\n", | |
" if 'http' in t[0]:\n", | |
" continue\n", | |
" temp.append(t[0])\n", | |
" if temp == []:\n", | |
" continue\n", | |
" new_train_data.append(temp)\n" | |
], | |
"execution_count": 0, | |
"outputs": [] | |
}, | |
{ | |
"cell_type": "code", | |
"metadata": { | |
"id": "fZOiuRo1k2M_", | |
"colab_type": "code", | |
"colab": {} | |
}, | |
"source": [ | |
"new_test_data = []\n", | |
"# emoji_test = []\n", | |
"for sample in test_text.split('\\n\\n'):\n", | |
" \n", | |
" lines_sample = sample.split('\\n')\n", | |
" try:\n", | |
" tmp = (lines_sample[0].split()[2])\n", | |
" tmp = (lines_sample[0].split()[1])\n", | |
" except IndexError:\n", | |
" continue\n", | |
" temp = []\n", | |
" \n", | |
" for line in lines_sample[1:]:\n", | |
" t = line.split('\\t')\n", | |
"\n", | |
" if t[1] != 'O':\n", | |
" t[0]=re.sub('[\\W_]+', '', t[0])\n", | |
" new = ''\n", | |
" for ch in t[0]:\n", | |
" if ch in emojis:\n", | |
" new += ' ' + emoji.demojize(ch) + ' '\n", | |
" if t[1] == 'Eng' and t[0] in stopwords_en and t[0] not in exclude:\n", | |
" continue\n", | |
" if 'http' in t[0]:\n", | |
" continue\n", | |
" temp.append(t[0])\n", | |
" if temp == []:\n", | |
" continue\n", | |
" new_test_data.append(temp)" | |
], | |
"execution_count": 0, | |
"outputs": [] | |
}, | |
{ | |
"cell_type": "code", | |
"metadata": { | |
"id": "gAVZFFL5kSyE", | |
"colab_type": "code", | |
"colab": {} | |
}, | |
"source": [ | |
"new_train_tweets = [' '.join(i) for i in new_train_data]\n", | |
"new_test_tweets = [' '.join(i) for i in new_test_data]" | |
], | |
"execution_count": 0, | |
"outputs": [] | |
}, | |
{ | |
"cell_type": "code", | |
"metadata": { | |
"id": "siNMAS_0mUfV", | |
"colab_type": "code", | |
"colab": {} | |
}, | |
"source": [ | |
"max_features = 20000\n", | |
"tokenizer2 = Tokenizer(num_words=max_features)\n", | |
"tokenizer2.fit_on_texts(new_train_tweets)" | |
], | |
"execution_count": 0, | |
"outputs": [] | |
}, | |
{ | |
"cell_type": "code", | |
"metadata": { | |
"id": "m5U5FfAKkTh_", | |
"colab_type": "code", | |
"colab": { | |
"base_uri": "https://localhost:8080/", | |
"height": 52 | |
}, | |
"outputId": "e4529ffe-5fe1-4879-ea03-5896cb3ed70c" | |
}, | |
"source": [ | |
"max_len = 250\n", | |
"num_classes = 3\n", | |
"\n", | |
"new_sequences_train = tokenizer2.texts_to_sequences(new_train_tweets)\n", | |
"new_sequences_test = tokenizer2.texts_to_sequences(new_test_tweets)\n", | |
"\n", | |
"X_train4 = sequence.pad_sequences(new_sequences_train, maxlen=max_len)\n", | |
"X_test4 = sequence.pad_sequences(new_sequences_test, maxlen=max_len)\n", | |
"\n", | |
"# Y_train = np_utils.to_categorical(y_train, num_classes)\n", | |
"# Y_test = np_utils.to_categorical(y_test, num_classes)\n", | |
"\n", | |
"print('X_train4 shape:', X_train4.shape)\n", | |
"print('X_test4 shape:', X_test4.shape)" | |
], | |
"execution_count": 126, | |
"outputs": [ | |
{ | |
"output_type": "stream", | |
"text": [ | |
"X_train4 shape: (15131, 250)\n", | |
"X_test4 shape: (1869, 250)\n" | |
], | |
"name": "stdout" | |
} | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"metadata": { | |
"id": "fNAfKJRYmyaR", | |
"colab_type": "code", | |
"colab": {} | |
}, | |
"source": [ | |
"" | |
], | |
"execution_count": 0, | |
"outputs": [] | |
}, | |
{ | |
"cell_type": "code", | |
"metadata": { | |
"colab_type": "code", | |
"outputId": "2d4f0ab4-1e04-431e-e3c2-20e0f26ca7cc", | |
"id": "mZODp4rXm0G3", | |
"colab": { | |
"base_uri": "https://localhost:8080/", | |
"height": 72 | |
} | |
}, | |
"source": [ | |
"cmodel3 = Sequential()\n", | |
"cmodel3.add(Embedding(max_features, 300))\n", | |
"cmodel3.add(SpatialDropout1D(0.2))\n", | |
"# we add a Convolution1D, which will learn nb_filter\n", | |
"# word group filters of size filter_length:\n", | |
"cmodel3.add(Convolution1D(nb_filter=nb_filter,\n", | |
" filter_length=filter_length,\n", | |
" border_mode='valid',\n", | |
" activation='tanh',\n", | |
" subsample_length=1))\n", | |
"\n", | |
"cmodel3.add(Lambda(max_1d, output_shape=(nb_filter,)))\n", | |
"cmodel3.add(Dense(hidden_dims))\n", | |
"cmodel3.add(Dropout(0.2))\n", | |
"cmodel3.add(Activation('relu'))\n", | |
"cmodel3.add(Dense(num_classes))\n", | |
"cmodel3.add(Activation('sigmoid'))\n", | |
"adam = optimizers.Adam(lr=0.001, decay=1e-6)\n", | |
"cmodel3.compile(loss='binary_crossentropy',\n", | |
" optimizer=adam,\n", | |
" metrics=['accuracy'])" | |
], | |
"execution_count": 127, | |
"outputs": [ | |
{ | |
"output_type": "stream", | |
"text": [ | |
"/usr/local/lib/python3.6/dist-packages/ipykernel_launcher.py:10: UserWarning: Update your `Conv1D` call to the Keras 2 API: `Conv1D(activation=\"tanh\", filters=300, kernel_size=3, strides=1, padding=\"valid\")`\n", | |
" # Remove the CWD from sys.path while we load stuff.\n" | |
], | |
"name": "stderr" | |
} | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"metadata": { | |
"id": "69J1sfpOm9ug", | |
"colab_type": "code", | |
"colab": { | |
"base_uri": "https://localhost:8080/", | |
"height": 69 | |
}, | |
"outputId": "ad67b870-dbc5-4ad4-d162-509a99a116f5" | |
}, | |
"source": [ | |
"cmodel3.fit(X_train4, Y_train, epochs = 1)" | |
], | |
"execution_count": 129, | |
"outputs": [ | |
{ | |
"output_type": "stream", | |
"text": [ | |
"Epoch 1/1\n", | |
"15131/15131 [==============================] - 216s 14ms/step - loss: 0.5303 - acc: 0.7264\n" | |
], | |
"name": "stdout" | |
}, | |
{ | |
"output_type": "execute_result", | |
"data": { | |
"text/plain": [ | |
"<keras.callbacks.History at 0x7f0f143d9898>" | |
] | |
}, | |
"metadata": { | |
"tags": [] | |
}, | |
"execution_count": 129 | |
} | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"metadata": { | |
"id": "HWM5IozSnrQb", | |
"colab_type": "code", | |
"colab": { | |
"base_uri": "https://localhost:8080/", | |
"height": 34 | |
}, | |
"outputId": "5c630355-d5e8-42b8-826a-01d5621d4cbb" | |
}, | |
"source": [ | |
"predsc3 = cmodel3.predict_classes(X_test4, verbose=0)\n", | |
"np.sum(predsc3==y_test)/len(y_test)" | |
], | |
"execution_count": 130, | |
"outputs": [ | |
{ | |
"output_type": "execute_result", | |
"data": { | |
"text/plain": [ | |
"0.5521669341894061" | |
] | |
}, | |
"metadata": { | |
"tags": [] | |
}, | |
"execution_count": 130 | |
} | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"metadata": { | |
"id": "OSVeBC5uoD_d", | |
"colab_type": "code", | |
"colab": { | |
"base_uri": "https://localhost:8080/", | |
"height": 469 | |
}, | |
"outputId": "ae0b085c-9980-42ba-f598-8307bd3210a3" | |
}, | |
"source": [ | |
"cmodel3.summary()" | |
], | |
"execution_count": 131, | |
"outputs": [ | |
{ | |
"output_type": "stream", | |
"text": [ | |
"Model: \"sequential_23\"\n", | |
"_________________________________________________________________\n", | |
"Layer (type) Output Shape Param # \n", | |
"=================================================================\n", | |
"embedding_15 (Embedding) (None, None, 300) 6000000 \n", | |
"_________________________________________________________________\n", | |
"spatial_dropout1d_14 (Spatia (None, None, 300) 0 \n", | |
"_________________________________________________________________\n", | |
"conv1d_12 (Conv1D) (None, None, 300) 270300 \n", | |
"_________________________________________________________________\n", | |
"lambda_12 (Lambda) (None, 300) 0 \n", | |
"_________________________________________________________________\n", | |
"dense_52 (Dense) (None, 300) 90300 \n", | |
"_________________________________________________________________\n", | |
"dropout_35 (Dropout) (None, 300) 0 \n", | |
"_________________________________________________________________\n", | |
"activation_52 (Activation) (None, 300) 0 \n", | |
"_________________________________________________________________\n", | |
"dense_53 (Dense) (None, 3) 903 \n", | |
"_________________________________________________________________\n", | |
"activation_53 (Activation) (None, 3) 0 \n", | |
"=================================================================\n", | |
"Total params: 6,361,503\n", | |
"Trainable params: 6,361,503\n", | |
"Non-trainable params: 0\n", | |
"_________________________________________________________________\n" | |
], | |
"name": "stdout" | |
} | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"metadata": { | |
"id": "5-B6qEc6BJJJ", | |
"colab_type": "code", | |
"colab": { | |
"base_uri": "https://localhost:8080/", | |
"height": 191 | |
}, | |
"outputId": "6e4a8b2b-0fbc-429c-a907-2caa8eac5d8e" | |
}, | |
"source": [ | |
"print(train_text[:100])" | |
], | |
"execution_count": 126, | |
"outputs": [ | |
{ | |
"output_type": "stream", | |
"text": [ | |
"meta\t3\tnegative\n", | |
"@\tO\n", | |
"AdilNisarButt\tHin\n", | |
"pakistan\tHin\n", | |
"ka\tHin\n", | |
"ghra\tHin\n", | |
"tauq\tHin\n", | |
"he\tEng\n", | |
"Pakistan\tEng\n", | |
"Isra\n" | |
], | |
"name": "stdout" | |
} | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"metadata": { | |
"id": "XHN9MSnd5wcC", | |
"colab_type": "code", | |
"colab": { | |
"base_uri": "https://localhost:8080/", | |
"height": 121 | |
}, | |
"outputId": "282da6de-3570-453a-9c8b-31166287f94e" | |
}, | |
"source": [ | |
"a = np.array([[1,1,1,1], [2,2,2,2]])\n", | |
"b = np.array([[3,3],[4,4]])\n", | |
"print(a)\n", | |
"print(b)\n", | |
"print(np.hstack((a,b)))" | |
], | |
"execution_count": 57, | |
"outputs": [ | |
{ | |
"output_type": "stream", | |
"text": [ | |
"[[1 1 1 1]\n", | |
" [2 2 2 2]]\n", | |
"[[3 3]\n", | |
" [4 4]]\n", | |
"[[1 1 1 1 3 3]\n", | |
" [2 2 2 2 4 4]]\n" | |
], | |
"name": "stdout" | |
} | |
] | |
} | |
] | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment