Created
September 16, 2019 18:35
-
-
Save shanecandoit/5b1e63ab589a647f4ef373d69766ab5b to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| { | |
| "cells": [ | |
| { | |
| "cell_type": "code", | |
| "execution_count": 1, | |
| "metadata": {}, | |
| "outputs": [], | |
| "source": [ | |
| "# reuters news categorize" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 2, | |
| "metadata": {}, | |
| "outputs": [ | |
| { | |
| "name": "stderr", | |
| "output_type": "stream", | |
| "text": [ | |
| "Using TensorFlow backend.\n" | |
| ] | |
| } | |
| ], | |
| "source": [ | |
| "from keras.datasets import reuters" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 3, | |
| "metadata": {}, | |
| "outputs": [ | |
| { | |
| "name": "stdout", | |
| "output_type": "stream", | |
| "text": [ | |
| "Downloading data from https://s3.amazonaws.com/text-datasets/reuters.npz\n", | |
| "2113536/2110848 [==============================] - 1s 1us/step\n" | |
| ] | |
| } | |
| ], | |
| "source": [ | |
| "(train_data, train_labels), (test_data, test_labels) = reuters.load_data(num_words=10_000)" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 4, | |
| "metadata": {}, | |
| "outputs": [ | |
| { | |
| "data": { | |
| "text/plain": [ | |
| "((8982,), (8982,), (2246,), (2246,))" | |
| ] | |
| }, | |
| "execution_count": 4, | |
| "metadata": {}, | |
| "output_type": "execute_result" | |
| } | |
| ], | |
| "source": [ | |
| "train_data.shape, train_labels.shape, test_data.shape, test_labels.shape" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 7, | |
| "metadata": {}, | |
| "outputs": [ | |
| { | |
| "name": "stdout", | |
| "output_type": "stream", | |
| "text": [ | |
| "Downloading data from https://s3.amazonaws.com/text-datasets/reuters_word_index.json\n", | |
| "557056/550378 [==============================] - 0s 1us/step\n" | |
| ] | |
| } | |
| ], | |
| "source": [ | |
| "# decode to words\n", | |
| "word_index = reuters.get_word_index()\n", | |
| "rev_word_index = dict([(val, key) for (key, val) in word_index.items()])\n", | |
| "def decode(news_id):\n", | |
| " res = ' '.join([rev_word_index.get(i - 3, '?') for i in train_data[news_id]])\n", | |
| " return res\n" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 9, | |
| "metadata": {}, | |
| "outputs": [ | |
| { | |
| "name": "stdout", | |
| "output_type": "stream", | |
| "text": [ | |
| "0\n", | |
| "? ? ? said as a result of its december acquisition of space co it expects earnings per share in 1987 of 1 15 to 1 30 dlrs per share up from 70 cts in 1986 the company said pretax net should rise to nine to 10 mln dlrs from six mln dlrs in 1986 and rental operation revenues to 19 to 22 mln dlrs from 12 5 mln dlrs it said cash flow per share this year should be 2 50 to three dlrs reuter 3\n", | |
| "3\n", | |
| "\n", | |
| "1\n", | |
| "? generale de banque sa lt ? br and lt heller overseas corp of chicago have each taken 50 pct stakes in ? company sa ? factors generale de banque said in a statement it gave no financial details of the transaction sa ? ? turnover in 1986 was 17 5 billion belgian francs reuter 3\n", | |
| "4\n", | |
| "\n", | |
| "2\n", | |
| "? shr 3 28 dlrs vs 22 cts shr diluted 2 99 dlrs vs 22 cts net 46 0 mln vs 3 328 000 avg shrs 14 0 mln vs 15 2 mln year shr 5 41 dlrs vs 1 56 dlrs shr diluted 4 94 dlrs vs 1 50 dlrs net 78 2 mln vs 25 9 mln avg shrs 14 5 mln vs 15 1 mln note earnings per share reflect the two for one split effective january 6 1987 per share amounts are calculated after preferred stock dividends loss continuing operations for the qtr 1986 includes gains of sale of investments in ? corp of 14 mln dlrs and associated companies of 4 189 000 less writedowns of investments in national ? inc of 11 8 mln and ? corp of 15 6 mln reuter 3\n", | |
| "3\n", | |
| "\n", | |
| "3\n", | |
| "? the farmers home administration the u s agriculture department's farm lending arm could lose about seven billion dlrs in outstanding principal on its severely ? borrowers or about one fourth of its farm loan portfolio the general accounting office gao said in remarks prepared for delivery to the senate agriculture committee brian crowley senior associate director of gao also said that a preliminary analysis of proposed changes in ? financial eligibility standards indicated as many as one half of ? borrowers who received new loans from the agency in 1986 would be ? under the proposed system the agency has proposed evaluating ? credit using a variety of financial ratios instead of relying solely on ? ability senate agriculture committee chairman patrick leahy d vt ? the proposed eligibility changes telling ? administrator ? clark at a hearing that they would mark a dramatic shift in the agency's purpose away from being farmers' lender of last resort toward becoming a big city bank but clark defended the new regulations saying the agency had a responsibility to ? its 70 billion dlr loan portfolio in a ? yet ? manner crowley of gao ? ? arm said the proposed credit ? system attempted to ensure that ? would make loans only to borrowers who had a reasonable change of repaying their debt reuter 3\n", | |
| "4\n", | |
| "\n", | |
| "4\n", | |
| "? seton co said its board has received a proposal from chairman and chief executive officer philip d ? to acquire seton for 15 75 dlrs per share in cash seton said the acquisition bid is subject to ? arranging the necessary financing it said he intends to ask other members of senior management to participate the company said ? owns 30 pct of seton stock and other management members another 7 5 pct seton said it has formed an independent board committee to consider the offer and has deferred the annual meeting it had scheduled for march 31 reuter 3\n", | |
| "4\n", | |
| "\n" | |
| ] | |
| } | |
| ], | |
| "source": [ | |
| "for i in range(5):\n", | |
| " print(i)\n", | |
| " print(decode(i))\n", | |
| " print(train_labels[i])\n", | |
| " print()" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 10, | |
| "metadata": {}, | |
| "outputs": [], | |
| "source": [ | |
| "# one hot encode input" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 11, | |
| "metadata": {}, | |
| "outputs": [], | |
| "source": [ | |
| "import numpy as np" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 12, | |
| "metadata": {}, | |
| "outputs": [], | |
| "source": [ | |
| "def vectorize_seqs(seqs, dims=10_000):\n", | |
| " results = np.zeros((len(seqs), dims))\n", | |
| " for i, seq in enumerate(seqs):\n", | |
| " results[i, seq] = 1.\n", | |
| " return results" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 13, | |
| "metadata": {}, | |
| "outputs": [], | |
| "source": [ | |
| "x_train = vectorize_seqs(train_data)\n", | |
| "x_test = vectorize_seqs(test_data)" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 14, | |
| "metadata": {}, | |
| "outputs": [ | |
| { | |
| "data": { | |
| "text/plain": [ | |
| "((8982, 10000), (2246, 10000))" | |
| ] | |
| }, | |
| "execution_count": 14, | |
| "metadata": {}, | |
| "output_type": "execute_result" | |
| } | |
| ], | |
| "source": [ | |
| "x_train.shape, x_test.shape" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 15, | |
| "metadata": {}, | |
| "outputs": [], | |
| "source": [ | |
| "# one hot encode category" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 16, | |
| "metadata": {}, | |
| "outputs": [], | |
| "source": [ | |
| "def to_one_hot(labels, dims=46):\n", | |
| " results = np.zeros((len(labels), dims))\n", | |
| " for i, label in enumerate(labels):\n", | |
| " results[i, label] = 1.\n", | |
| " return results" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 17, | |
| "metadata": {}, | |
| "outputs": [], | |
| "source": [ | |
| "one_hot_train_labels = to_one_hot(train_labels)\n", | |
| "one_hot_test_labels = to_one_hot(test_labels)" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 18, | |
| "metadata": {}, | |
| "outputs": [ | |
| { | |
| "data": { | |
| "text/plain": [ | |
| "((8982, 46), (2246, 46))" | |
| ] | |
| }, | |
| "execution_count": 18, | |
| "metadata": {}, | |
| "output_type": "execute_result" | |
| } | |
| ], | |
| "source": [ | |
| "one_hot_train_labels.shape, one_hot_test_labels.shape" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 19, | |
| "metadata": {}, | |
| "outputs": [], | |
| "source": [ | |
| "# model" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 20, | |
| "metadata": {}, | |
| "outputs": [], | |
| "source": [ | |
| "from keras import models\n", | |
| "from keras import layers" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 21, | |
| "metadata": {}, | |
| "outputs": [ | |
| { | |
| "name": "stderr", | |
| "output_type": "stream", | |
| "text": [ | |
| "WARNING: Logging before flag parsing goes to stderr.\n", | |
| "W0916 13:23:31.052654 4663387584 deprecation_wrapper.py:119] From /anaconda3/lib/python3.7/site-packages/keras/backend/tensorflow_backend.py:74: The name tf.get_default_graph is deprecated. Please use tf.compat.v1.get_default_graph instead.\n", | |
| "\n", | |
| "W0916 13:23:31.084854 4663387584 deprecation_wrapper.py:119] From /anaconda3/lib/python3.7/site-packages/keras/backend/tensorflow_backend.py:517: The name tf.placeholder is deprecated. Please use tf.compat.v1.placeholder instead.\n", | |
| "\n", | |
| "W0916 13:23:31.087738 4663387584 deprecation_wrapper.py:119] From /anaconda3/lib/python3.7/site-packages/keras/backend/tensorflow_backend.py:4138: The name tf.random_uniform is deprecated. Please use tf.random.uniform instead.\n", | |
| "\n" | |
| ] | |
| } | |
| ], | |
| "source": [ | |
| "model = models.Sequential()\n", | |
| "model.add(layers.Dense(64, activation='relu', input_shape=(10_000,)))\n", | |
| "model.add(layers.Dense(64, activation='relu'))\n", | |
| "model.add(layers.Dense(46, activation='softmax'))" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 22, | |
| "metadata": {}, | |
| "outputs": [ | |
| { | |
| "name": "stderr", | |
| "output_type": "stream", | |
| "text": [ | |
| "W0916 13:24:08.987415 4663387584 deprecation_wrapper.py:119] From /anaconda3/lib/python3.7/site-packages/keras/optimizers.py:790: The name tf.train.Optimizer is deprecated. Please use tf.compat.v1.train.Optimizer instead.\n", | |
| "\n", | |
| "W0916 13:24:09.014467 4663387584 deprecation_wrapper.py:119] From /anaconda3/lib/python3.7/site-packages/keras/backend/tensorflow_backend.py:3295: The name tf.log is deprecated. Please use tf.math.log instead.\n", | |
| "\n" | |
| ] | |
| } | |
| ], | |
| "source": [ | |
| "model.compile(optimizer='adam',\n", | |
| " loss='categorical_crossentropy',\n", | |
| " metrics=['acc'])" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 23, | |
| "metadata": {}, | |
| "outputs": [ | |
| { | |
| "name": "stdout", | |
| "output_type": "stream", | |
| "text": [ | |
| "_________________________________________________________________\n", | |
| "Layer (type) Output Shape Param # \n", | |
| "=================================================================\n", | |
| "dense_1 (Dense) (None, 64) 640064 \n", | |
| "_________________________________________________________________\n", | |
| "dense_2 (Dense) (None, 64) 4160 \n", | |
| "_________________________________________________________________\n", | |
| "dense_3 (Dense) (None, 46) 2990 \n", | |
| "=================================================================\n", | |
| "Total params: 647,214\n", | |
| "Trainable params: 647,214\n", | |
| "Non-trainable params: 0\n", | |
| "_________________________________________________________________\n" | |
| ] | |
| } | |
| ], | |
| "source": [ | |
| "model.summary()" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 24, | |
| "metadata": {}, | |
| "outputs": [], | |
| "source": [ | |
| "# validation - set apart 1000 samples" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 25, | |
| "metadata": {}, | |
| "outputs": [], | |
| "source": [ | |
| "x_val = x_train[:1000]\n", | |
| "partial_x_train = x_train[1000:]\n", | |
| "\n", | |
| "y_val = one_hot_train_labels[:1000]\n", | |
| "partial_y_train = one_hot_train_labels[1000:]" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 26, | |
| "metadata": {}, | |
| "outputs": [ | |
| { | |
| "data": { | |
| "text/plain": [ | |
| "((1000, 10000), (7982, 10000), (1000, 46), (7982, 46))" | |
| ] | |
| }, | |
| "execution_count": 26, | |
| "metadata": {}, | |
| "output_type": "execute_result" | |
| } | |
| ], | |
| "source": [ | |
| "x_val.shape, partial_x_train.shape, y_val.shape, partial_y_train.shape" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 27, | |
| "metadata": {}, | |
| "outputs": [], | |
| "source": [ | |
| "# train" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 34, | |
| "metadata": {}, | |
| "outputs": [ | |
| { | |
| "name": "stdout", | |
| "output_type": "stream", | |
| "text": [ | |
| "Train on 7982 samples, validate on 1000 samples\n", | |
| "Epoch 1/9\n", | |
| "7982/7982 [==============================] - 1s 110us/step - loss: 0.0972 - acc: 0.9638 - val_loss: 1.0223 - val_acc: 0.8060\n", | |
| "Epoch 2/9\n", | |
| "7982/7982 [==============================] - 1s 107us/step - loss: 0.0946 - acc: 0.9614 - val_loss: 1.0177 - val_acc: 0.8100\n", | |
| "Epoch 3/9\n", | |
| "7982/7982 [==============================] - 1s 108us/step - loss: 0.0935 - acc: 0.9607 - val_loss: 1.0411 - val_acc: 0.8010\n", | |
| "Epoch 4/9\n", | |
| "7982/7982 [==============================] - 1s 109us/step - loss: 0.0893 - acc: 0.9634 - val_loss: 1.0584 - val_acc: 0.8020\n", | |
| "Epoch 5/9\n", | |
| "7982/7982 [==============================] - 1s 104us/step - loss: 0.0862 - acc: 0.9624 - val_loss: 1.0672 - val_acc: 0.8080\n", | |
| "Epoch 6/9\n", | |
| "7982/7982 [==============================] - 1s 103us/step - loss: 0.0853 - acc: 0.9609 - val_loss: 1.0696 - val_acc: 0.8070\n", | |
| "Epoch 7/9\n", | |
| "7982/7982 [==============================] - 1s 104us/step - loss: 0.0843 - acc: 0.9633 - val_loss: 1.0586 - val_acc: 0.8050\n", | |
| "Epoch 8/9\n", | |
| "7982/7982 [==============================] - 1s 106us/step - loss: 0.0870 - acc: 0.9622 - val_loss: 1.0894 - val_acc: 0.8010\n", | |
| "Epoch 9/9\n", | |
| "7982/7982 [==============================] - 1s 104us/step - loss: 0.0839 - acc: 0.9623 - val_loss: 1.0723 - val_acc: 0.8060\n" | |
| ] | |
| } | |
| ], | |
| "source": [ | |
| "history = model.fit(partial_x_train,\n", | |
| " partial_y_train,\n", | |
| " epochs= 9, # 20,\n", | |
| " batch_size=512,\n", | |
| " validation_data=(x_val, y_val))" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 35, | |
| "metadata": {}, | |
| "outputs": [], | |
| "source": [ | |
| "# plot" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 36, | |
| "metadata": {}, | |
| "outputs": [], | |
| "source": [ | |
| "import matplotlib.pyplot as plt" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 37, | |
| "metadata": {}, | |
| "outputs": [ | |
| { | |
| "data": { | |
| "image/png": "\n", | |
| "text/plain": [ | |
| "<Figure size 432x288 with 1 Axes>" | |
| ] | |
| }, | |
| "metadata": { | |
| "needs_background": "light" | |
| }, | |
| "output_type": "display_data" | |
| } | |
| ], | |
| "source": [ | |
| "loss = history.history['loss']\n", | |
| "val_loss = history.history['val_loss']\n", | |
| "\n", | |
| "epochs = range(1, len(loss)+1)\n", | |
| "\n", | |
| "plt.plot(epochs, loss, 'bo', label='train loss')\n", | |
| "plt.plot(epochs, val_loss, 'b', label='valid loss')\n", | |
| "plt.title('training and validation loss')\n", | |
| "plt.xlabel('epochs')\n", | |
| "plt.ylabel('loss')\n", | |
| "plt.legend()\n", | |
| "\n", | |
| "plt.show()" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 38, | |
| "metadata": {}, | |
| "outputs": [ | |
| { | |
| "data": { | |
| "image/png": "\n", | |
| "text/plain": [ | |
| "<Figure size 432x288 with 1 Axes>" | |
| ] | |
| }, | |
| "metadata": { | |
| "needs_background": "light" | |
| }, | |
| "output_type": "display_data" | |
| } | |
| ], | |
| "source": [ | |
| "plt.clf()\n", | |
| "\n", | |
| "acc = history.history['acc']\n", | |
| "val_acc = history.history['val_acc']\n", | |
| "\n", | |
| "plt.plot(epochs, acc, 'bo', label='training acc')\n", | |
| "plt.plot(epochs, val_acc, 'b', label='validation acc')\n", | |
| "plt.title('training and validation acc')\n", | |
| "plt.xlabel('epochs')\n", | |
| "plt.ylabel('acc')\n", | |
| "plt.legend()\n", | |
| "\n", | |
| "plt.show()" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 39, | |
| "metadata": {}, | |
| "outputs": [], | |
| "source": [ | |
| "# peaks at 9, do run for 9 epochs" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 40, | |
| "metadata": {}, | |
| "outputs": [ | |
| { | |
| "name": "stdout", | |
| "output_type": "stream", | |
| "text": [ | |
| "2246/2246 [==============================] - 0s 126us/step\n" | |
| ] | |
| } | |
| ], | |
| "source": [ | |
| "results = model.evaluate(x_test, one_hot_test_labels)" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 42, | |
| "metadata": {}, | |
| "outputs": [ | |
| { | |
| "data": { | |
| "text/plain": [ | |
| "[1.1937172574224157, 0.7911843276936776]" | |
| ] | |
| }, | |
| "execution_count": 42, | |
| "metadata": {}, | |
| "output_type": "execute_result" | |
| } | |
| ], | |
| "source": [ | |
| "results\n", | |
| "# [1.1937172574224157, 0.7911843276936776]" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": null, | |
| "metadata": {}, | |
| "outputs": [], | |
| "source": [] | |
| } | |
| ], | |
| "metadata": { | |
| "kernelspec": { | |
| "display_name": "Python 3", | |
| "language": "python", | |
| "name": "python3" | |
| }, | |
| "language_info": { | |
| "codemirror_mode": { | |
| "name": "ipython", | |
| "version": 3 | |
| }, | |
| "file_extension": ".py", | |
| "mimetype": "text/x-python", | |
| "name": "python", | |
| "nbconvert_exporter": "python", | |
| "pygments_lexer": "ipython3", | |
| "version": "3.7.3" | |
| } | |
| }, | |
| "nbformat": 4, | |
| "nbformat_minor": 2 | |
| } |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment