Skip to content

Instantly share code, notes, and snippets.

@ameasure
Created February 11, 2017 19:47
Show Gist options
  • Save ameasure/6f3fbdcccab4f319ab8dea4c62206a73 to your computer and use it in GitHub Desktop.
Save ameasure/6f3fbdcccab4f319ab8dea4c62206a73 to your computer and use it in GitHub Desktop.
imdb_soft_attention_lstm.py
# -*- coding: utf-8 -*-
'''Trains an LSTM on the IMDB sentiment classification task with soft attention.
Experiments with max_features=10000, max_len=80
1) MLP-dropout-tanh attention: 83.59 at epoch 4
2) MLP-dropout-relu attention: 83.26 at epoch 3
3) MLP-tanh attention: 82.91 at epoch 4
4) GlobalMaxPooling1D attention: 82.44 at epoch 7
'''
from __future__ import print_function
import numpy as np
np.random.seed(1337) # for reproducibility
from keras.preprocessing import sequence
from keras.models import Model
from keras.layers import Dense, Activation, Embedding, GlobalMaxPooling1D, Input
from keras.layers import LSTM, TimeDistributed, Dropout, Reshape, merge
from keras.datasets import imdb
max_features = 10000
maxlen = 80 # cut texts after this number of words (among top max_features most common words)
batch_size = 128
print('Loading data...')
(X_train, y_train), (X_test, y_test) = imdb.load_data(nb_words=max_features)
print(len(X_train), 'train sequences')
print(len(X_test), 'test sequences')
print('Pad sequences (samples x time)')
X_train = sequence.pad_sequences(X_train, maxlen=maxlen)
X_test = sequence.pad_sequences(X_test, maxlen=maxlen)
print('X_train shape:', X_train.shape)
print('X_test shape:', X_test.shape)
print('Build model...')
input_layer = Input(shape=(maxlen,), dtype='float32')
embedding = Embedding(max_features, 128)(input_layer)
encoder = LSTM(128, dropout_W=0.5, dropout_U=0.5, return_sequences=True)(embedding)
# begin attention layer
h1 = TimeDistributed(Dense(128, activation='relu'))(encoder)
h2 = TimeDistributed(Dense(1))(h1)
r2 = Reshape((maxlen,))(h2)
attention = Activation('softmax')(r2)
attended_encoding = merge([attention, encoder], mode='dot', dot_axes=(1,1))
# end attention
out = Dense(1, activation='sigmoid')(attended_encoding)
model=Model(input=input_layer, output=out)
model.compile(loss='binary_crossentropy',
optimizer='adam',
metrics=['accuracy'])
print('Train...')
model.fit(X_train, y_train, batch_size=batch_size, nb_epoch=15,
validation_data=(X_test, y_test))
score, acc = model.evaluate(X_test, y_test,
batch_size=batch_size)
print('Test score:', score)
print('Test accuracy:', acc)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment