Skip to content

Instantly share code, notes, and snippets.

@nzw0301
Created July 18, 2016 17:55
Show Gist options
  • Save nzw0301/227c23fa5a7dc463a6bda44eee00d720 to your computer and use it in GitHub Desktop.
Save nzw0301/227c23fa5a7dc463a6bda44eee00d720 to your computer and use it in GitHub Desktop.
Display the source blob
Display the rendered blob
Raw
{
"cells": [
{
"cell_type": "code",
"execution_count": 1,
"metadata": {
"collapsed": false
},
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
"Using Theano backend.\n"
]
}
],
"source": [
"'''This example demonstrates the use of fast text in https://arxiv.org/abs/1607.01759.\n",
"NOTE: not using hieraltical softmax and bi-gram features\n",
"Gets to 0.88 test accuracy after 6 epochs. (equal CNN in Keras example: https://github.com/fchollet/keras/blob/master/examples/imdb_cnn.py)\n",
"10s/epoch on Intel i5 1.4Ghz CPU.\n",
"1s/epoch on Tesla K40 GPU.\n",
"'''\n",
" \n",
"import numpy as np\n",
"np.random.seed(1337)\n",
"\n",
"from keras.preprocessing import sequence\n",
"from keras.models import Sequential\n",
"from keras.layers import Dense, Lambda, Embedding\n",
"from keras.datasets import imdb\n",
"from keras import backend as K"
]
},
{
"cell_type": "code",
"execution_count": 2,
"metadata": {
"collapsed": false
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Loading data...\n",
"20000 train sequences\n",
"5000 test sequences\n",
"Pad sequences (samples x time)\n"
]
}
],
"source": [
"# set parameters:\n",
"max_features = 5000\n",
"maxlen = 400\n",
"batch_size = 32\n",
"embedding_dims = 50\n",
"\n",
"print('Loading data...')\n",
"(X_train, y_train), (X_test, y_test) = imdb.load_data(nb_words=max_features,\n",
" test_split=0.2)\n",
"\n",
"print(len(X_train), 'train sequences')\n",
"print(len(X_test), 'test sequences')\n",
"\n",
"print('Pad sequences (samples x time)')\n",
"X_train = sequence.pad_sequences(X_train, maxlen=maxlen)\n",
"X_test = sequence.pad_sequences(X_test, maxlen=maxlen)"
]
},
{
"cell_type": "code",
"execution_count": 3,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
"def means(x):\n",
" return K.mean(x, axis=1)"
]
},
{
"cell_type": "code",
"execution_count": 4,
"metadata": {
"collapsed": false
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Train on 20000 samples, validate on 5000 samples\n",
"Epoch 1/10\n",
"20000/20000 [==============================] - 10s - loss: 0.6322 - acc: 0.7113 - val_loss: 0.5388 - val_acc: 0.7882\n",
"Epoch 2/10\n",
"20000/20000 [==============================] - 9s - loss: 0.4584 - acc: 0.8365 - val_loss: 0.4032 - val_acc: 0.8496\n",
"Epoch 3/10\n",
"20000/20000 [==============================] - 10s - loss: 0.3575 - acc: 0.8712 - val_loss: 0.3462 - val_acc: 0.8674\n",
"Epoch 4/10\n",
"20000/20000 [==============================] - 9s - loss: 0.3080 - acc: 0.8864 - val_loss: 0.3162 - val_acc: 0.8768\n",
"Epoch 5/10\n",
"20000/20000 [==============================] - 9s - loss: 0.2780 - acc: 0.8959 - val_loss: 0.3021 - val_acc: 0.8794\n",
"Epoch 6/10\n",
"20000/20000 [==============================] - 9s - loss: 0.2567 - acc: 0.9040 - val_loss: 0.2918 - val_acc: 0.8824\n",
"Epoch 7/10\n",
"20000/20000 [==============================] - 10s - loss: 0.2402 - acc: 0.9105 - val_loss: 0.2843 - val_acc: 0.8850\n",
"Epoch 8/10\n",
"20000/20000 [==============================] - 10s - loss: 0.2262 - acc: 0.9166 - val_loss: 0.2808 - val_acc: 0.8854\n",
"Epoch 9/10\n",
"20000/20000 [==============================] - 10s - loss: 0.2153 - acc: 0.9213 - val_loss: 0.2792 - val_acc: 0.8868\n",
"Epoch 10/10\n",
"20000/20000 [==============================] - 9s - loss: 0.2060 - acc: 0.9244 - val_loss: 0.2788 - val_acc: 0.8874\n"
]
},
{
"data": {
"text/plain": [
"<keras.callbacks.History at 0x10e083f28>"
]
},
"execution_count": 4,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"model = Sequential()\n",
"model.add(Embedding(input_dim=5000, output_dim=embedding_dims, init='glorot_uniform'))\n",
"model.add(Lambda(means, output_shape=(embedding_dims,)))\n",
"\n",
"# We project onto a single unit output layer, and squash it with a sigmoid:\n",
"model.add(Dense(1, activation=\"sigmoid\"))\n",
"\n",
"model.compile(loss='binary_crossentropy',\n",
" optimizer='adam',\n",
" metrics=['accuracy'])\n",
"\n",
"model.fit(X_train, y_train,\n",
" batch_size=batch_size,\n",
" validation_data=(X_test, y_test))"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": []
}
],
"metadata": {
"anaconda-cloud": {},
"kernelspec": {
"display_name": "Python [Root]",
"language": "python",
"name": "Python [Root]"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.5.2"
}
},
"nbformat": 4,
"nbformat_minor": 0
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment