Last active
January 15, 2020 21:05
-
-
Save gokulanv/73839706217c6f44dace8e45ba631fd8 to your computer and use it in GitHub Desktop.
An effort to build a RNN from scratch!
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
{ | |
"nbformat": 4, | |
"nbformat_minor": 0, | |
"metadata": { | |
"colab": { | |
"name": "RNNExercise.ipynb", | |
"provenance": [], | |
"toc_visible": true, | |
"include_colab_link": true | |
}, | |
"kernelspec": { | |
"name": "python2", | |
"display_name": "Python 2" | |
}, | |
"accelerator": "GPU" | |
}, | |
"cells": [ | |
{ | |
"cell_type": "markdown", | |
"metadata": { | |
"id": "view-in-github", | |
"colab_type": "text" | |
}, | |
"source": [ | |
"<a href=\"https://colab.research.google.com/gist/gokulanv/73839706217c6f44dace8e45ba631fd8/rnnexercise.ipynb\" target=\"_parent\"><img src=\"https://colab.research.google.com/assets/colab-badge.svg\" alt=\"Open In Colab\"/></a>" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"metadata": { | |
"id": "p_XCXrGOprdR", | |
"colab_type": "code", | |
"colab": {} | |
}, | |
"source": [ | |
"import csv\n", | |
"import itertools\n", | |
"import operator\n", | |
"import numpy as np\n", | |
"import nltk\n", | |
"import sys\n", | |
"from datetime import datetime\n", | |
"\n", | |
"import matplotlib.pyplot as plt\n", | |
"%matplotlib inline" | |
], | |
"execution_count": 0, | |
"outputs": [] | |
}, | |
{ | |
"cell_type": "code", | |
"metadata": { | |
"id": "xcWSf7dPp3B6", | |
"colab_type": "code", | |
"outputId": "452fc411-d605-49a7-a3e0-0c94fd0e2e53", | |
"colab": { | |
"base_uri": "https://localhost:8080/", | |
"height": 1000 | |
} | |
}, | |
"source": [ | |
"nltk.download(\"book\")" | |
], | |
"execution_count": 0, | |
"outputs": [ | |
{ | |
"output_type": "stream", | |
"text": [ | |
"[nltk_data] Downloading collection u'book'\n", | |
"[nltk_data] | \n", | |
"[nltk_data] | Downloading package abc to /root/nltk_data...\n", | |
"[nltk_data] | Unzipping corpora/abc.zip.\n", | |
"[nltk_data] | Downloading package brown to /root/nltk_data...\n", | |
"[nltk_data] | Unzipping corpora/brown.zip.\n", | |
"[nltk_data] | Downloading package chat80 to /root/nltk_data...\n", | |
"[nltk_data] | Unzipping corpora/chat80.zip.\n", | |
"[nltk_data] | Downloading package cmudict to /root/nltk_data...\n", | |
"[nltk_data] | Unzipping corpora/cmudict.zip.\n", | |
"[nltk_data] | Downloading package conll2000 to /root/nltk_data...\n", | |
"[nltk_data] | Unzipping corpora/conll2000.zip.\n", | |
"[nltk_data] | Downloading package conll2002 to /root/nltk_data...\n", | |
"[nltk_data] | Unzipping corpora/conll2002.zip.\n", | |
"[nltk_data] | Downloading package dependency_treebank to\n", | |
"[nltk_data] | /root/nltk_data...\n", | |
"[nltk_data] | Unzipping corpora/dependency_treebank.zip.\n", | |
"[nltk_data] | Downloading package genesis to /root/nltk_data...\n", | |
"[nltk_data] | Unzipping corpora/genesis.zip.\n", | |
"[nltk_data] | Downloading package gutenberg to /root/nltk_data...\n", | |
"[nltk_data] | Unzipping corpora/gutenberg.zip.\n", | |
"[nltk_data] | Downloading package ieer to /root/nltk_data...\n", | |
"[nltk_data] | Unzipping corpora/ieer.zip.\n", | |
"[nltk_data] | Downloading package inaugural to /root/nltk_data...\n", | |
"[nltk_data] | Unzipping corpora/inaugural.zip.\n", | |
"[nltk_data] | Downloading package movie_reviews to\n", | |
"[nltk_data] | /root/nltk_data...\n", | |
"[nltk_data] | Unzipping corpora/movie_reviews.zip.\n", | |
"[nltk_data] | Downloading package nps_chat to /root/nltk_data...\n", | |
"[nltk_data] | Unzipping corpora/nps_chat.zip.\n", | |
"[nltk_data] | Downloading package names to /root/nltk_data...\n", | |
"[nltk_data] | Unzipping corpora/names.zip.\n", | |
"[nltk_data] | Downloading package ppattach to /root/nltk_data...\n", | |
"[nltk_data] | Unzipping corpora/ppattach.zip.\n", | |
"[nltk_data] | Downloading package reuters to /root/nltk_data...\n", | |
"[nltk_data] | Downloading package senseval to /root/nltk_data...\n", | |
"[nltk_data] | Unzipping corpora/senseval.zip.\n", | |
"[nltk_data] | Downloading package state_union to /root/nltk_data...\n", | |
"[nltk_data] | Unzipping corpora/state_union.zip.\n", | |
"[nltk_data] | Downloading package stopwords to /root/nltk_data...\n", | |
"[nltk_data] | Unzipping corpora/stopwords.zip.\n", | |
"[nltk_data] | Downloading package swadesh to /root/nltk_data...\n", | |
"[nltk_data] | Unzipping corpora/swadesh.zip.\n", | |
"[nltk_data] | Downloading package timit to /root/nltk_data...\n", | |
"[nltk_data] | Unzipping corpora/timit.zip.\n", | |
"[nltk_data] | Downloading package treebank to /root/nltk_data...\n", | |
"[nltk_data] | Unzipping corpora/treebank.zip.\n", | |
"[nltk_data] | Downloading package toolbox to /root/nltk_data...\n", | |
"[nltk_data] | Unzipping corpora/toolbox.zip.\n", | |
"[nltk_data] | Downloading package udhr to /root/nltk_data...\n", | |
"[nltk_data] | Unzipping corpora/udhr.zip.\n", | |
"[nltk_data] | Downloading package udhr2 to /root/nltk_data...\n", | |
"[nltk_data] | Unzipping corpora/udhr2.zip.\n", | |
"[nltk_data] | Downloading package unicode_samples to\n", | |
"[nltk_data] | /root/nltk_data...\n", | |
"[nltk_data] | Unzipping corpora/unicode_samples.zip.\n", | |
"[nltk_data] | Downloading package webtext to /root/nltk_data...\n", | |
"[nltk_data] | Unzipping corpora/webtext.zip.\n", | |
"[nltk_data] | Downloading package wordnet to /root/nltk_data...\n", | |
"[nltk_data] | Unzipping corpora/wordnet.zip.\n", | |
"[nltk_data] | Downloading package wordnet_ic to /root/nltk_data...\n", | |
"[nltk_data] | Unzipping corpora/wordnet_ic.zip.\n", | |
"[nltk_data] | Downloading package words to /root/nltk_data...\n", | |
"[nltk_data] | Unzipping corpora/words.zip.\n", | |
"[nltk_data] | Downloading package maxent_treebank_pos_tagger to\n", | |
"[nltk_data] | /root/nltk_data...\n", | |
"[nltk_data] | Unzipping taggers/maxent_treebank_pos_tagger.zip.\n", | |
"[nltk_data] | Downloading package maxent_ne_chunker to\n", | |
"[nltk_data] | /root/nltk_data...\n", | |
"[nltk_data] | Unzipping chunkers/maxent_ne_chunker.zip.\n", | |
"[nltk_data] | Downloading package universal_tagset to\n", | |
"[nltk_data] | /root/nltk_data...\n", | |
"[nltk_data] | Unzipping taggers/universal_tagset.zip.\n", | |
"[nltk_data] | Downloading package punkt to /root/nltk_data...\n", | |
"[nltk_data] | Unzipping tokenizers/punkt.zip.\n", | |
"[nltk_data] | Downloading package book_grammars to\n", | |
"[nltk_data] | /root/nltk_data...\n", | |
"[nltk_data] | Unzipping grammars/book_grammars.zip.\n", | |
"[nltk_data] | Downloading package city_database to\n", | |
"[nltk_data] | /root/nltk_data...\n", | |
"[nltk_data] | Unzipping corpora/city_database.zip.\n", | |
"[nltk_data] | Downloading package tagsets to /root/nltk_data...\n", | |
"[nltk_data] | Unzipping help/tagsets.zip.\n", | |
"[nltk_data] | Downloading package panlex_swadesh to\n", | |
"[nltk_data] | /root/nltk_data...\n", | |
"[nltk_data] | Downloading package averaged_perceptron_tagger to\n", | |
"[nltk_data] | /root/nltk_data...\n", | |
"[nltk_data] | Unzipping taggers/averaged_perceptron_tagger.zip.\n", | |
"[nltk_data] | \n", | |
"[nltk_data] Done downloading collection book\n" | |
], | |
"name": "stdout" | |
}, | |
{ | |
"output_type": "execute_result", | |
"data": { | |
"text/plain": [ | |
"True" | |
] | |
}, | |
"metadata": { | |
"tags": [] | |
}, | |
"execution_count": 2 | |
} | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"metadata": { | |
"id": "Gy0rm0omp851", | |
"colab_type": "code", | |
"outputId": "6098587e-6c0a-47bf-c358-eaba26fdb356", | |
"colab": { | |
"base_uri": "https://localhost:8080/", | |
"height": 188 | |
} | |
}, | |
"source": [ | |
"vocabulary_size = 8000\n", | |
"unknown_token = \"UNKNOWN_TOKEN\"\n", | |
"sentence_start_token = \"SENTENCE_START\"\n", | |
"sentence_end_token = \"SENTENCE_END\"\n", | |
"\n", | |
"# Read the data and append SENTENCE_START and SENTENCE_END tokens\n", | |
"print \"Reading CSV file...\"\n", | |
"with open('reddit-comments-2015-08.csv', 'rb') as f:\n", | |
" reader = csv.reader(f, skipinitialspace=True)\n", | |
" reader.next()\n", | |
" # Split full comments into sentences\n", | |
" sentences = itertools.chain(*[nltk.sent_tokenize(x[0].decode('utf-8').lower()) for x in reader])\n", | |
" # Append SENTENCE_START and SENTENCE_END\n", | |
" sentences = [\"%s %s %s\" % (sentence_start_token, x, sentence_end_token) for x in sentences]\n", | |
"print \"Parsed %d sentences.\" % (len(sentences))\n", | |
" \n", | |
"# Tokenize the sentences into words\n", | |
"tokenized_sentences = [nltk.word_tokenize(sent) for sent in sentences]\n", | |
"\n", | |
"# Count the word frequencies\n", | |
"word_freq = nltk.FreqDist(itertools.chain(*tokenized_sentences))\n", | |
"print \"Found %d unique words tokens.\" % len(word_freq.items())\n", | |
"\n", | |
"# Get the most common words and build index_to_word and word_to_index vectors\n", | |
"vocab = word_freq.most_common(vocabulary_size-1)\n", | |
"index_to_word = [x[0] for x in vocab]\n", | |
"index_to_word.append(unknown_token)\n", | |
"word_to_index = dict([(w,i) for i,w in enumerate(index_to_word)])\n", | |
"\n", | |
"print \"Using vocabulary size %d.\" % vocabulary_size\n", | |
"print \"The least frequent word in our vocabulary is '%s' and appeared %d times.\" % (vocab[-1][0], vocab[-1][1])\n", | |
"\n", | |
"# Replace all words not in our vocabulary with the unknown token\n", | |
"for i, sent in enumerate(tokenized_sentences):\n", | |
" tokenized_sentences[i] = [w if w in word_to_index else unknown_token for w in sent]\n", | |
"\n", | |
"print \"\\nExample sentence: '%s'\" % sentences[0]\n", | |
"print \"\\nExample sentence after Pre-processing: '%s'\" % tokenized_sentences[0]" | |
], | |
"execution_count": 0, | |
"outputs": [ | |
{ | |
"output_type": "stream", | |
"text": [ | |
"Reading CSV file...\n", | |
"Parsed 79170 sentences.\n", | |
"Found 65498 unique words tokens.\n", | |
"Using vocabulary size 8000.\n", | |
"The least frequent word in our vocabulary is 'traction' and appeared 10 times.\n", | |
"\n", | |
"Example sentence: 'SENTENCE_START i joined a new league this year and they have different scoring rules than i'm used to. SENTENCE_END'\n", | |
"\n", | |
"Example sentence after Pre-processing: '[u'SENTENCE_START', u'i', u'joined', u'a', u'new', u'league', u'this', u'year', u'and', u'they', u'have', u'different', u'scoring', u'rules', u'than', u'i', u\"'m\", u'used', u'to', u'.', u'SENTENCE_END']'\n" | |
], | |
"name": "stdout" | |
} | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"metadata": { | |
"id": "nmmr5wrUqBOL", | |
"colab_type": "code", | |
"colab": {} | |
}, | |
"source": [ | |
"# Create the training data\n", | |
"X_train = np.asarray([[word_to_index[w] for w in sent[:-1]] for sent in tokenized_sentences])\n", | |
"y_train = np.asarray([[word_to_index[w] for w in sent[1:]] for sent in tokenized_sentences])" | |
], | |
"execution_count": 0, | |
"outputs": [] | |
}, | |
{ | |
"cell_type": "code", | |
"metadata": { | |
"id": "fO9en4KBqnZm", | |
"colab_type": "code", | |
"outputId": "aa734323-8246-4dfb-b14d-267816266eab", | |
"colab": { | |
"base_uri": "https://localhost:8080/", | |
"height": 134 | |
} | |
}, | |
"source": [ | |
"x_example, y_example = X_train[17], y_train[17]\n", | |
"print \"x:\\n%s\\n%s\" % (\" \".join([index_to_word[x] for x in x_example]), x_example)\n", | |
"print \"\\ny:\\n%s\\n%s\" % (\" \".join([index_to_word[x] for x in y_example]), y_example)" | |
], | |
"execution_count": 0, | |
"outputs": [ | |
{ | |
"output_type": "stream", | |
"text": [ | |
"x:\n", | |
"SENTENCE_START what are n't you understanding about this ? !\n", | |
"[0, 51, 27, 16, 10, 861, 54, 25, 34, 69]\n", | |
"\n", | |
"y:\n", | |
"what are n't you understanding about this ? ! SENTENCE_END\n", | |
"[51, 27, 16, 10, 861, 54, 25, 34, 69, 1]\n" | |
], | |
"name": "stdout" | |
} | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"metadata": { | |
"id": "e6kKpX5oAa3C", | |
"colab_type": "code", | |
"colab": {} | |
}, | |
"source": [ | |
"def softmax(x):\n", | |
" xt = np.exp(x - np.max(x))\n", | |
" return xt / np.sum(xt)\n", | |
"\n", | |
"def save_model_parameters_theano(outfile, model):\n", | |
" U, V, W = model.U.get_value(), model.V.get_value(), model.W.get_value()\n", | |
" np.savez(outfile, U=U, V=V, W=W)\n", | |
" print \"Saved model parameters to %s.\" % outfile\n", | |
" \n", | |
"def load_model_parameters_theano(path, model):\n", | |
" npzfile = np.load(path)\n", | |
" U, V, W = npzfile[\"U\"], npzfile[\"V\"], npzfile[\"W\"]\n", | |
" model.hidden_dim = U.shape[0]\n", | |
" model.word_dim = U.shape[1]\n", | |
" model.U.set_value(U)\n", | |
" model.V.set_value(V)\n", | |
" model.W.set_value(W)\n", | |
" print \"Loaded model parameters from %s. hidden_dim=%d word_dim=%d\" % (path, U.shape[0], U.shape[1])" | |
], | |
"execution_count": 0, | |
"outputs": [] | |
}, | |
{ | |
"cell_type": "code", | |
"metadata": { | |
"id": "LRNjEp6Jq3bA", | |
"colab_type": "code", | |
"colab": {} | |
}, | |
"source": [ | |
"class RNNPy:\n", | |
"\n", | |
" def __init__(self, word_dim, hidden_dim = 100, bptt_truncate = 4):\n", | |
" self.word_dim = word_dim\n", | |
" self.hidden_dim = hidden_dim\n", | |
" self.bptt_truncate = bptt_truncate\n", | |
" self.U = np.random.uniform(-1/np.sqrt(word_dim), 1/np.sqrt(word_dim), (hidden_dim, word_dim))\n", | |
" self.V = np.random.uniform(-1/np.sqrt(hidden_dim), 1/np.sqrt(hidden_dim), (word_dim, hidden_dim))\n", | |
" self.W = np.random.uniform(-1/np.sqrt(hidden_dim), 1/np.sqrt(hidden_dim), (hidden_dim, hidden_dim))" | |
], | |
"execution_count": 0, | |
"outputs": [] | |
}, | |
{ | |
"cell_type": "code", | |
"metadata": { | |
"id": "P8dIiXgZvTHF", | |
"colab_type": "code", | |
"colab": {} | |
}, | |
"source": [ | |
"def forward_prop(self, x):\n", | |
"\n", | |
" # x is similar to batch_size\n", | |
" T = len(x)\n", | |
" s = np.zeros((T+1, self.hidden_dim))\n", | |
" o = np.zeros((T, self.word_dim))\n", | |
"\n", | |
" for i in range(1, T):\n", | |
" s[i] = np.tanh(self.U[:,x[i]] + np.dot(self.W, s[i-1]))\n", | |
" o[i] = softmax(np.dot(self.V, s[i]))\n", | |
"\n", | |
" return [o,s]\n", | |
"\n", | |
"RNNPy.forward_prop = forward_prop\n" | |
], | |
"execution_count": 0, | |
"outputs": [] | |
}, | |
{ | |
"cell_type": "code", | |
"metadata": { | |
"id": "a9TtQ6xgAzNx", | |
"colab_type": "code", | |
"colab": {} | |
}, | |
"source": [ | |
"def predict(self, x):\n", | |
" # Perform forward propagation and return index of the highest score\n", | |
" o, s = self.forward_prop(x)\n", | |
" return np.argmax(o, axis=1)\n", | |
"\n", | |
"RNNPy.predict = predict" | |
], | |
"execution_count": 0, | |
"outputs": [] | |
}, | |
{ | |
"cell_type": "code", | |
"metadata": { | |
"id": "UC5ehxCfa3lU", | |
"colab_type": "code", | |
"outputId": "a9f5b7ba-3408-41ef-8762-9bd4613ea802", | |
"colab": { | |
"base_uri": "https://localhost:8080/", | |
"height": 151 | |
} | |
}, | |
"source": [ | |
"np.random.seed(10)\n", | |
"model = RNNPy(vocabulary_size)\n", | |
"o, s = model.forward_prop(X_train[10])\n", | |
"print o.shape\n", | |
"print o" | |
], | |
"execution_count": 0, | |
"outputs": [ | |
{ | |
"output_type": "stream", | |
"text": [ | |
"(45, 8000)\n", | |
"[[0. 0. 0. ... 0. 0. 0. ]\n", | |
" [0.0001254 0.00012548 0.00012455 ... 0.00012493 0.00012458 0.00012458]\n", | |
" [0.00012389 0.00012525 0.00012473 ... 0.00012546 0.0001259 0.00012535]\n", | |
" ...\n", | |
" [0.00012406 0.00012463 0.00012539 ... 0.00012617 0.00012463 0.00012589]\n", | |
" [0.00012547 0.00012431 0.00012485 ... 0.00012427 0.00012611 0.00012472]\n", | |
" [0.00012482 0.00012529 0.00012477 ... 0.00012488 0.00012508 0.0001267 ]]\n" | |
], | |
"name": "stdout" | |
} | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"metadata": { | |
"id": "KCfcuK2Wa3ZB", | |
"colab_type": "code", | |
"outputId": "65c94c38-b6f5-47f4-dc03-bc571d054051", | |
"colab": { | |
"base_uri": "https://localhost:8080/", | |
"height": 101 | |
} | |
}, | |
"source": [ | |
"predictions = model.predict(X_train[10])\n", | |
"print predictions.shape\n", | |
"print predictions" | |
], | |
"execution_count": 0, | |
"outputs": [ | |
{ | |
"output_type": "stream", | |
"text": [ | |
"(45,)\n", | |
"[ 0 2048 7434 7430 1013 3562 7366 1627 2212 3251 7299 6722 565 238\n", | |
" 2539 21 6548 261 5274 2082 1835 5376 3522 477 7051 7352 7715 3822\n", | |
" 6914 5059 3850 6176 743 2082 5561 2182 6569 2800 2752 6821 4437 7021\n", | |
" 6399 6912 3922]\n" | |
], | |
"name": "stdout" | |
} | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"metadata": { | |
"id": "Cmt4Ohnsa3JC", | |
"colab_type": "code", | |
"colab": {} | |
}, | |
"source": [ | |
"def calculate_total_loss(self, x, y):\n", | |
" L = 0\n", | |
" # For each sentence...\n", | |
" for i in np.arange(len(y)):\n", | |
" o, s = self.forward_prop(x[i])\n", | |
" # We only care about our prediction of the \"correct\" words\n", | |
" correct_word_predictions = o[np.arange(len(y[i])), y[i]]\n", | |
" # Add to the loss based on how off we were\n", | |
" temp = np.sum(np.log(correct_word_predictions))\n", | |
" L += -1 * temp\n", | |
" return L\n", | |
"\n", | |
"def calculate_loss(self, x, y):\n", | |
" # Divide the total loss by the number of training examples\n", | |
" N = np.sum((len(y_i) for y_i in y))\n", | |
" return self.calculate_total_loss(x,y)/N\n", | |
"\n", | |
"RNNPy.calculate_total_loss = calculate_total_loss\n", | |
"RNNPy.calculate_loss = calculate_loss" | |
], | |
"execution_count": 0, | |
"outputs": [] | |
}, | |
{ | |
"cell_type": "code", | |
"metadata": { | |
"id": "f_PuasRRa22u", | |
"colab_type": "code", | |
"outputId": "84405546-1ff6-47a3-b1a8-234307dada23", | |
"colab": { | |
"base_uri": "https://localhost:8080/", | |
"height": 138 | |
} | |
}, | |
"source": [ | |
"# Limit to 1000 examples to save time\n", | |
"print \"Expected Loss for random predictions: %f\" % np.log(vocabulary_size)\n", | |
"print \"Actual loss: %f\" % model.calculate_loss(X_train[:1000], y_train[:1000])" | |
], | |
"execution_count": 0, | |
"outputs": [ | |
{ | |
"output_type": "stream", | |
"text": [ | |
"Expected Loss for random predictions: 8.987197\n" | |
], | |
"name": "stdout" | |
}, | |
{ | |
"output_type": "stream", | |
"text": [ | |
"/usr/local/lib/python2.7/dist-packages/ipykernel_launcher.py:15: DeprecationWarning: Calling np.sum(generator) is deprecated, and in the future will give a different result. Use np.sum(np.fromiter(generator)) or the python sum builtin instead.\n", | |
" from ipykernel import kernelapp as app\n", | |
"/usr/local/lib/python2.7/dist-packages/ipykernel_launcher.py:9: RuntimeWarning: divide by zero encountered in log\n", | |
" if __name__ == '__main__':\n" | |
], | |
"name": "stderr" | |
}, | |
{ | |
"output_type": "stream", | |
"text": [ | |
"Actual loss: inf\n" | |
], | |
"name": "stdout" | |
} | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": { | |
"id": "Z8YNtU7MeZIC", | |
"colab_type": "text" | |
}, | |
"source": [ | |
"#Backpropagation through time - BPTT" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"metadata": { | |
"id": "TW0YcO5N2k5d", | |
"colab_type": "code", | |
"colab": {} | |
}, | |
"source": [ | |
"def bptt(self, x, y):\n", | |
" T = len(y)\n", | |
" # Perform forward propagation\n", | |
" o, s = self.forward_prop(x)\n", | |
" # We accumulate the gradients in these variables\n", | |
" dLdU = np.zeros(self.U.shape)\n", | |
" dLdV = np.zeros(self.V.shape)\n", | |
" dLdW = np.zeros(self.W.shape)\n", | |
" delta_o = o\n", | |
" delta_o[np.arange(len(y)), y] -= 1.\n", | |
" # For each output backwards...\n", | |
" for t in np.arange(T)[::-1]:\n", | |
" dLdV += np.outer(delta_o[t], s[t].T)\n", | |
" # Initial delta calculation\n", | |
" delta_t = self.V.T.dot(delta_o[t]) * (1 - (s[t] ** 2))\n", | |
" # Backpropagation through time (for at most self.bptt_truncate steps)\n", | |
" for bptt_step in np.arange(max(0, t-self.bptt_truncate), t+1)[::-1]:\n", | |
" # print \"Backpropagation step t=%d bptt step=%d \" % (t, bptt_step)\n", | |
" dLdW += np.outer(delta_t, s[bptt_step-1]) \n", | |
" dLdU[:,x[bptt_step]] += delta_t\n", | |
" # Update delta for next step\n", | |
" delta_t = self.W.T.dot(delta_t) * (1 - s[bptt_step-1] ** 2)\n", | |
" return [dLdU, dLdV, dLdW]\n", | |
"\n", | |
"RNNPy.bptt = bptt" | |
], | |
"execution_count": 0, | |
"outputs": [] | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": { | |
"id": "gIQAuc5aegP3", | |
"colab_type": "text" | |
}, | |
"source": [ | |
"#Gradient Checking" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"metadata": { | |
"id": "YkChTWb3QPRM", | |
"colab_type": "code", | |
"outputId": "e683a2d3-37ab-4015-f498-2dcfb5973f09", | |
"colab": { | |
"base_uri": "https://localhost:8080/", | |
"height": 168 | |
} | |
}, | |
"source": [ | |
"def gradient_check(self, x, y, h=0.001, error_threshold=0.01):\n", | |
" # Calculate the gradients using backpropagation. We want to checker if these are correct.\n", | |
" bptt_gradients = model.bptt(x, y)\n", | |
" # List of all parameters we want to check.\n", | |
" model_parameters = ['U', 'V', 'W']\n", | |
" # Gradient check for each parameter\n", | |
" for pidx, pname in enumerate(model_parameters):\n", | |
" # Get the actual parameter value from the mode, e.g. model.W\n", | |
" parameter = operator.attrgetter(pname)(self)\n", | |
" print \"Performing gradient check for parameter %s with size %d.\" % (pname, np.prod(parameter.shape))\n", | |
" # Iterate over each element of the parameter matrix, e.g. (0,0), (0,1), ...\n", | |
" it = np.nditer(parameter, flags=['multi_index'], op_flags=['readwrite'])\n", | |
" while not it.finished:\n", | |
" ix = it.multi_index\n", | |
" # Save the original value so we can reset it later\n", | |
" original_value = parameter[ix]\n", | |
" # Estimate the gradient using (f(x+h) - f(x-h))/(2*h)\n", | |
" parameter[ix] = original_value + h\n", | |
" gradplus = model.calculate_total_loss([x],[y])\n", | |
" parameter[ix] = original_value - h\n", | |
" gradminus = model.calculate_total_loss([x],[y])\n", | |
" estimated_gradient = (gradplus - gradminus)/(2*h)\n", | |
" # Reset parameter to original value\n", | |
" parameter[ix] = original_value\n", | |
" # The gradient for this parameter calculated using backpropagation\n", | |
" backprop_gradient = bptt_gradients[pidx][ix]\n", | |
" # calculate The relative error: (|x - y|/(|x| + |y|))\n", | |
" relative_error = np.abs(backprop_gradient - estimated_gradient)/(np.abs(backprop_gradient) + np.abs(estimated_gradient))\n", | |
" # If the error is to large fail the gradient check\n", | |
" if relative_error > error_threshold:\n", | |
" print \"Gradient Check ERROR: parameter=%s ix=%s\" % (pname, ix)\n", | |
" print \"+h Loss: %f\" % gradplus\n", | |
" print \"-h Loss: %f\" % gradminus\n", | |
" print \"Estimated_gradient: %f\" % estimated_gradient\n", | |
" print \"Backpropagation gradient: %f\" % backprop_gradient\n", | |
" print \"Relative Error: %f\" % relative_error\n", | |
" return \n", | |
" it.iternext()\n", | |
" print \"Gradient check for parameter %s passed.\" % (pname)\n", | |
"\n", | |
"RNNPy.gradient_check = gradient_check\n", | |
"\n", | |
"# To avoid performing millions of expensive calculations we use a smaller vocabulary size for checking.\n", | |
"grad_check_vocab_size = 100\n", | |
"np.random.seed(10)\n", | |
"model = RNNPy(grad_check_vocab_size, 10, bptt_truncate=1000)\n", | |
"model.gradient_check([0,1,2,3], [1,2,3,4])" | |
], | |
"execution_count": 0, | |
"outputs": [ | |
{ | |
"output_type": "stream", | |
"text": [ | |
"Performing gradient check for parameter U with size 1000.\n", | |
"Gradient check for parameter U passed.\n", | |
"Performing gradient check for parameter V with size 1000.\n" | |
], | |
"name": "stdout" | |
}, | |
{ | |
"output_type": "stream", | |
"text": [ | |
"/usr/local/lib/python2.7/dist-packages/ipykernel_launcher.py:9: RuntimeWarning: divide by zero encountered in log\n", | |
" if __name__ == '__main__':\n", | |
"/usr/local/lib/python2.7/dist-packages/ipykernel_launcher.py:22: RuntimeWarning: invalid value encountered in double_scalars\n" | |
], | |
"name": "stderr" | |
}, | |
{ | |
"output_type": "stream", | |
"text": [ | |
"Gradient check for parameter V passed.\n", | |
"Performing gradient check for parameter W with size 100.\n", | |
"Gradient check for parameter W passed.\n" | |
], | |
"name": "stdout" | |
} | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": { | |
"id": "BLVi8OJIe2j4", | |
"colab_type": "text" | |
}, | |
"source": [ | |
"## Stochastic gradient descent implementation" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"metadata": { | |
"id": "P190PPNQerJV", | |
"colab_type": "code", | |
"colab": {} | |
}, | |
"source": [ | |
"# Performs one step of SGD.\n", | |
"def numpy_sdg_step(self, x, y, learning_rate):\n", | |
" # Calculate the gradients\n", | |
" dLdU, dLdV, dLdW = self.bptt(x, y)\n", | |
" # Change parameters according to gradients and learning rate\n", | |
" self.U -= learning_rate * dLdU\n", | |
" self.V -= learning_rate * dLdV\n", | |
" self.W -= learning_rate * dLdW\n", | |
"\n", | |
"RNNPy.sgd_step = numpy_sdg_step" | |
], | |
"execution_count": 0, | |
"outputs": [] | |
}, | |
{ | |
"cell_type": "code", | |
"metadata": { | |
"id": "D53uJ_gee8oe", | |
"colab_type": "code", | |
"colab": {} | |
}, | |
"source": [ | |
"# Outer SGD Loop\n", | |
"# - model: The RNN model instance\n", | |
"# - X_train: The training data set\n", | |
"# - y_train: The training data labels\n", | |
"# - learning_rate: Initial learning rate for SGD\n", | |
"# - nepoch: Number of times to iterate through the complete dataset\n", | |
"# - evaluate_loss_after: Evaluate the loss after this many epochs\n", | |
"def train_with_sgd(model, X_train, y_train, learning_rate=0.005, nepoch=100, evaluate_loss_after=5):\n", | |
" # We keep track of the losses so we can plot them later\n", | |
" losses = []\n", | |
" num_examples_seen = 0\n", | |
" for epoch in range(nepoch):\n", | |
" # Optionally evaluate the loss\n", | |
" if (epoch % evaluate_loss_after == 0):\n", | |
" loss = model.calculate_loss(X_train, y_train)\n", | |
" losses.append((num_examples_seen, loss))\n", | |
" time = datetime.now().strftime('%Y-%m-%d %H:%M:%S')\n", | |
" print \"%s: Loss after num_examples_seen=%d epoch=%d: %f\" % (time, num_examples_seen, epoch, loss)\n", | |
" # Adjust the learning rate if loss increases\n", | |
" if (len(losses) > 1 and losses[-1][1] > losses[-2][1]):\n", | |
" learning_rate = learning_rate * 0.5 \n", | |
" print \"Setting learning rate to %f\" % learning_rate\n", | |
" sys.stdout.flush()\n", | |
" # For each training example...\n", | |
" for i in range(len(y_train)):\n", | |
" # One SGD step\n", | |
" model.sgd_step(X_train[i], y_train[i], learning_rate)\n", | |
" num_examples_seen += 1" | |
], | |
"execution_count": 0, | |
"outputs": [] | |
}, | |
{ | |
"cell_type": "code", | |
"metadata": { | |
"id": "tLw_nLKQfABy", | |
"colab_type": "code", | |
"outputId": "f89d5c20-2ad8-4e76-8c82-e53cfe87e9e4", | |
"colab": { | |
"base_uri": "https://localhost:8080/", | |
"height": 34 | |
} | |
}, | |
"source": [ | |
"np.random.seed(10)\n", | |
"model = RNNPy(vocabulary_size)\n", | |
"%timeit model.sgd_step(X_train[10], y_train[10], 0.005)" | |
], | |
"execution_count": 0, | |
"outputs": [ | |
{ | |
"output_type": "stream", | |
"text": [ | |
"10 loops, best of 3: 153 ms per loop\n" | |
], | |
"name": "stdout" | |
} | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"metadata": { | |
"id": "99E8hg4XfFbR", | |
"colab_type": "code", | |
"outputId": "9aa679f9-0056-4544-ee5c-3cb5e06c58e2", | |
"colab": { | |
"base_uri": "https://localhost:8080/", | |
"height": 272 | |
} | |
}, | |
"source": [ | |
"np.random.seed(10)\n", | |
"# Train on a small subset of the data to see what happens\n", | |
"model = RNNPy(vocabulary_size)\n", | |
"losses = train_with_sgd(model, X_train[:100], y_train[:100], nepoch=10, evaluate_loss_after=1)" | |
], | |
"execution_count": 0, | |
"outputs": [ | |
{ | |
"output_type": "stream", | |
"text": [ | |
"/usr/local/lib/python2.7/dist-packages/ipykernel_launcher.py:15: DeprecationWarning: Calling np.sum(generator) is deprecated, and in the future will give a different result. Use np.sum(np.fromiter(generator)) or the python sum builtin instead.\n", | |
" from ipykernel import kernelapp as app\n", | |
"/usr/local/lib/python2.7/dist-packages/ipykernel_launcher.py:9: RuntimeWarning: divide by zero encountered in log\n", | |
" if __name__ == '__main__':\n" | |
], | |
"name": "stderr" | |
}, | |
{ | |
"output_type": "stream", | |
"text": [ | |
"2020-01-15 19:38:44: Loss after num_examples_seen=0 epoch=0: inf\n", | |
"2020-01-15 19:38:52: Loss after num_examples_seen=100 epoch=1: inf\n", | |
"2020-01-15 19:39:00: Loss after num_examples_seen=200 epoch=2: inf\n", | |
"2020-01-15 19:39:09: Loss after num_examples_seen=300 epoch=3: inf\n", | |
"2020-01-15 19:39:17: Loss after num_examples_seen=400 epoch=4: inf\n", | |
"2020-01-15 19:39:25: Loss after num_examples_seen=500 epoch=5: inf\n", | |
"2020-01-15 19:39:33: Loss after num_examples_seen=600 epoch=6: inf\n", | |
"2020-01-15 19:39:42: Loss after num_examples_seen=700 epoch=7: inf\n", | |
"2020-01-15 19:39:50: Loss after num_examples_seen=800 epoch=8: inf\n", | |
"2020-01-15 19:39:58: Loss after num_examples_seen=900 epoch=9: inf\n" | |
], | |
"name": "stdout" | |
} | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"metadata": { | |
"id": "GkwgC04jtupa", | |
"colab_type": "code", | |
"colab": {} | |
}, | |
"source": [ | |
"" | |
], | |
"execution_count": 0, | |
"outputs": [] | |
} | |
] | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
An effort to build a RNN from the scratch!