ameasure · February 11, 2017 19:47
diff --git a/imdb_soft_attention_lstm b/imdb_soft_attention_lstm
 # -*- coding: utf-8 -*-
 '''Trains an LSTM on the IMDB sentiment classification task with soft attention.

 Experiments with max_features=10000, max_len=80
    1) MLP-dropout-tanh attention: 83.59 at epoch 4
    2) MLP-dropout-relu attention: 83.26 at epoch 3
    3) MLP-tanh attention: 82.91 at epoch 4
    4) GlobalMaxPooling1D attention: 82.44 at epoch 7
 '''
 from __future__ import print_function
 import numpy as np
 np.random.seed(1337)  # for reproducibility

 from keras.preprocessing import sequence
 from keras.models import Model
 from keras.layers import Dense, Activation, Embedding, GlobalMaxPooling1D, Input
 from keras.layers import LSTM, TimeDistributed, Dropout, Reshape, merge
 from keras.datasets import imdb

 max_features = 10000
 maxlen = 80  # cut texts after this number of words (among top max_features most common words)
 batch_size = 128

 print('Loading data...')
 (X_train, y_train), (X_test, y_test) = imdb.load_data(nb_words=max_features)
 print(len(X_train), 'train sequences')
 print(len(X_test), 'test sequences')

 print('Pad sequences (samples x time)')
 X_train = sequence.pad_sequences(X_train, maxlen=maxlen)
 X_test = sequence.pad_sequences(X_test, maxlen=maxlen)
 print('X_train shape:', X_train.shape)
 print('X_test shape:', X_test.shape)

 print('Build model...')

 input_layer = Input(shape=(maxlen,), dtype='float32')
 embedding = Embedding(max_features, 128)(input_layer)
 encoder = LSTM(128, dropout_W=0.5, dropout_U=0.5, return_sequences=True)(embedding)
 # begin attention layer
 h1 = TimeDistributed(Dense(128, activation='relu'))(encoder)
 h2 = TimeDistributed(Dense(1))(h1)
 r2 = Reshape((maxlen,))(h2)
 attention = Activation('softmax')(r2)
 attended_encoding = merge([attention, encoder], mode='dot', dot_axes=(1,1))
 # end attention
 out = Dense(1, activation='sigmoid')(attended_encoding)

 model=Model(input=input_layer, output=out)

 model.compile(loss='binary_crossentropy',
              optimizer='adam',
              metrics=['accuracy'])

 print('Train...')
 model.fit(X_train, y_train, batch_size=batch_size, nb_epoch=15,
          validation_data=(X_test, y_test))
 score, acc = model.evaluate(X_test, y_test,
                            batch_size=batch_size)
 print('Test score:', score)
 print('Test accuracy:', acc)
	# -- coding: utf-8 --
	'''Trains an LSTM on the IMDB sentiment classification task with soft attention.

	Experiments with max_features=10000, max_len=80
	1) MLP-dropout-tanh attention: 83.59 at epoch 4
	2) MLP-dropout-relu attention: 83.26 at epoch 3
	3) MLP-tanh attention: 82.91 at epoch 4
	4) GlobalMaxPooling1D attention: 82.44 at epoch 7
	'''
	from __future__ import print_function
	import numpy as np
	np.random.seed(1337) # for reproducibility

	from keras.preprocessing import sequence
	from keras.models import Model
	from keras.layers import Dense, Activation, Embedding, GlobalMaxPooling1D, Input
	from keras.layers import LSTM, TimeDistributed, Dropout, Reshape, merge
	from keras.datasets import imdb

	max_features = 10000
	maxlen = 80 # cut texts after this number of words (among top max_features most common words)
	batch_size = 128

	print('Loading data...')
	(X_train, y_train), (X_test, y_test) = imdb.load_data(nb_words=max_features)
	print(len(X_train), 'train sequences')
	print(len(X_test), 'test sequences')

	print('Pad sequences (samples x time)')
	X_train = sequence.pad_sequences(X_train, maxlen=maxlen)
	X_test = sequence.pad_sequences(X_test, maxlen=maxlen)
	print('X_train shape:', X_train.shape)
	print('X_test shape:', X_test.shape)

	print('Build model...')

	input_layer = Input(shape=(maxlen,), dtype='float32')
	embedding = Embedding(max_features, 128)(input_layer)
	encoder = LSTM(128, dropout_W=0.5, dropout_U=0.5, return_sequences=True)(embedding)
	# begin attention layer
	h1 = TimeDistributed(Dense(128, activation='relu'))(encoder)
	h2 = TimeDistributed(Dense(1))(h1)
	r2 = Reshape((maxlen,))(h2)
	attention = Activation('softmax')(r2)
	attended_encoding = merge([attention, encoder], mode='dot', dot_axes=(1,1))
	# end attention
	out = Dense(1, activation='sigmoid')(attended_encoding)

	model=Model(input=input_layer, output=out)

	model.compile(loss='binary_crossentropy',
	optimizer='adam',
	metrics=['accuracy'])

	print('Train...')
	model.fit(X_train, y_train, batch_size=batch_size, nb_epoch=15,
	validation_data=(X_test, y_test))
	score, acc = model.evaluate(X_test, y_test,
	batch_size=batch_size)
	print('Test score:', score)
	print('Test accuracy:', acc)