Created
February 17, 2018 12:34
-
-
Save rakuishi/d5ad7191c032c116b0b2bfad228802e0 to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# 0.1176554 | |
def homework(train_X, train_Y, tokenizer_en, tokenizer_ja): | |
import numpy as np | |
from keras.models import Model | |
from keras.layers import Input, Embedding, Dense, LSTM | |
emb_dim = 256 | |
hid_dim = 256 | |
en_vocab_size = len(tokenizer_en.word_index) + 1 | |
ja_vocab_size = len(tokenizer_ja.word_index) + 1 | |
seqX_len = len(train_X[0]) | |
seqY_len = len(train_Y[0]) | |
encoder_inputs = Input(shape=(seqX_len,)) | |
encoder_embedded = Embedding(en_vocab_size, emb_dim, mask_zero=True)(encoder_inputs) | |
_, *encoder_states = LSTM(hid_dim, return_state=True)(encoder_embedded) | |
decoder_inputs = Input(shape=(seqY_len,)) | |
decoder_embedding = Embedding(ja_vocab_size, emb_dim) | |
decoder_embedded = decoder_embedding(decoder_inputs) | |
decoder_lstm = LSTM(hid_dim, return_sequences=True, return_state=True) | |
decoder_outputs, _, _ = decoder_lstm(decoder_embedded, initial_state=encoder_states) | |
decoder_dense = Dense(ja_vocab_size, activation='softmax') | |
decoder_outputs = decoder_dense(decoder_outputs) | |
model = Model([encoder_inputs, decoder_inputs], decoder_outputs) | |
model.compile(optimizer='rmsprop', loss='sparse_categorical_crossentropy') | |
train_target = np.hstack((train_Y[:, 1:], np.zeros((len(train_Y),1), dtype=np.int32))) | |
model.fit([train_X, train_Y], np.expand_dims(train_target, -1), batch_size=128, epochs=10, verbose=2, validation_split=0.2) | |
encoder_model = Model(encoder_inputs, encoder_states) | |
decoder_states_inputs = [Input(shape=(hid_dim,)), Input(shape=(hid_dim,))] | |
decoder_inputs = Input(shape=(1,)) | |
decoder_embedded = decoder_embedding(decoder_inputs) | |
decoder_outputs, *decoder_states = decoder_lstm(decoder_embedded, initial_state=decoder_states_inputs) | |
decoder_outputs = decoder_dense(decoder_outputs) | |
decoder_model = Model([decoder_inputs] + decoder_states_inputs, [decoder_outputs] + decoder_states) | |
def decode_sequence(input_seq, bos_eos, max_output_length): | |
states_value = encoder_model.predict(input_seq) | |
target_seq = np.array(bos_eos[0]) # bos_and_eos[0]="<s>"に対応するインデックス | |
output_seq = bos_eos[0][:] | |
while True: | |
output_tokens, *states_value = decoder_model.predict([target_seq] + states_value) | |
sampled_token_index = [np.argmax(output_tokens[0, -1, :])] | |
output_seq += sampled_token_index | |
if (sampled_token_index == bos_eos[1] or len(output_seq) > max_output_length): | |
break | |
target_seq = np.array(sampled_token_index) | |
return output_seq | |
return decode_sequence |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# IndexError | |
def homework(train_X, train_Y, tokenizer_en, tokenizer_ja): | |
import numpy as np | |
from keras.models import Model | |
from keras.layers import Input, Permute, Activation, Embedding, Dense, LSTM, concatenate, dot | |
from keras import backend as K | |
# 定数 | |
emb_dim = 256 | |
hid_dim = 256 | |
att_dim = 256 | |
en_vocab_size = len(tokenizer_en.word_index) + 1 | |
ja_vocab_size = len(tokenizer_ja.word_index) + 1 | |
seqX_len = len(train_X[0]) | |
seqY_len = len(train_Y[0]) | |
# ----- モデル構築 ----- # | |
# 符号化器 | |
encoder_inputs = Input(shape=(seqX_len,)) | |
encoder_embeded = Embedding(en_vocab_size, emb_dim, mask_zero=True)(encoder_inputs) | |
encoded_seq, *encoder_states = LSTM(hid_dim, return_sequences=True, return_state=True)(encoder_embeded) | |
# 復号化器 | |
decoder_inputs = Input(shape=(seqY_len,)) | |
decoder_embeding = Embedding(ja_vocab_size, emb_dim) | |
decoder_embeded = decoder_embeding(decoder_inputs) | |
decoder_lstm = LSTM(hid_dim, return_sequences=True, return_state=True) | |
decoded_seq, _, _ = decoder_lstm(decoder_embeded, initial_state=encoder_states) | |
# Attention | |
score_dense = Dense(hid_dim) | |
score = score_dense(decoded_seq) | |
score = dot([score, encoded_seq], axes=(2,2)) | |
attention = Activation('softmax')(score) | |
context = dot([attention, encoded_seq], axes=(2,1)) | |
concat = concatenate([context, decoded_seq], axis=2) | |
attention_dense = Dense(att_dim, activation='tanh') | |
attentional = attention_dense(concat) | |
output_dense = Dense(ja_vocab_size, activation='softmax') | |
outputs = output_dense(attentional) | |
model = Model([encoder_inputs, decoder_inputs], outputs) | |
model.compile(optimizer='rmsprop', loss='sparse_categorical_crossentropy') | |
train_target = np.hstack((train_Y[:, 1:], np.zeros((len(train_Y),1), dtype=np.int32))) | |
model.fit([train_X, train_Y], np.expand_dims(train_target, -1), batch_size=128, epochs=10, verbose=2, validation_split=0.2) | |
# ----- モデルによる生成 ----- # | |
encoder_model = Model(encoder_inputs, [encoded_seq]+encoder_states) | |
decoder_states_inputs = [Input(shape=(hid_dim,)), Input(shape=(hid_dim,))] | |
decoder_inputs = Input(shape=(1,)) | |
decoder_embeded = decoder_embeding(decoder_inputs) | |
decoded_seq, *decoder_states = decoder_lstm(decoder_embeded, initial_state=decoder_states_inputs) | |
decoder_model = Model([decoder_inputs] + decoder_states_inputs, [decoded_seq] + decoder_states) | |
# Attention | |
encoded_seq_in, decoded_seq_in = Input(shape=(seqX_len, hid_dim)), Input(shape=(1, hid_dim)) | |
score = score_dense(decoded_seq_in) | |
score = dot([score, encoded_seq_in], axes=(2,2)) | |
attention = Activation('softmax')(score) | |
context = dot([attention, encoded_seq_in], axes=(2,1)) | |
concat = concatenate([context, decoded_seq_in], axis=2) | |
attentional = attention_dense(concat) | |
attention_outputs = output_dense(attentional) | |
attention_model = Model([encoded_seq_in, decoded_seq_in], [attention_outputs, attention]) | |
def decode_sequence(input_seq, bos_eos, max_output_length = 1000): | |
encoded_seq, *states_value = encoder_model.predict(input_seq) | |
# bos_eos[0]="<s>"に対応するインデックス | |
target_seq = np.array(bos_eos[0]) | |
output_seq = bos_eos[0] | |
attention_seq = np.empty((0,len(input_seq[0]))) | |
while True: | |
decoded_seq, *states_value = decoder_model.predict([target_seq] + states_value) | |
output_tokens, attention = attention_model.predict([encoded_seq, decoded_seq]) | |
sampled_token_index = [np.argmax(output_tokens[0, -1, :])] | |
output_seq += sampled_token_index | |
attention_seq = np.append(attention_seq, attention[0], axis=0) | |
if (sampled_token_index == bos_eos[1] or len(output_seq) > max_output_length): | |
break | |
target_seq = np.array(sampled_token_index) | |
return output_seq | |
# ----- decode_sequence ----- # | |
return decode_sequence |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment