Created
April 22, 2016 17:22
-
-
Save lukedeo/84746bbbfd1be2280ecd227a4cfcdec3 to your computer and use it in GitHub Desktop.
heard u liek to cnn
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# import csv | |
# import os | |
# import numpy as np | |
# from cervantes.box import WordVectorBox | |
# from cervantes.language import OneLevelEmbedding | |
# from cervantes.nn.models import RNNClassifier | |
# from cervantes.box import EnglishCharBox | |
from keras.models import Model, Sequential | |
from keras.layers import Input, Reshape, \ | |
Embedding, GRU, \ | |
Dense, Highway, \ | |
Convolution1D, \ | |
Dropout, merge, \ | |
TimeDistributed,\ | |
Lambda | |
import keras.backend as K | |
NGRAMS = [1, 2, 3, 4, 5] | |
NFILTERS = 32 * 3 | |
CHARACTERS_PER_WORD = 15 | |
WORDS_PER_DOCUMENT = 300 | |
NUMBER_CHARACTERS = 30 + 2 | |
EMBEDDING_DIM = 100 | |
INPUT_SHAPE = (CHARACTERS_PER_WORD * WORDS_PER_DOCUMENT, ) | |
EMBEDDING_SHAPE = (WORDS_PER_DOCUMENT, CHARACTERS_PER_WORD, EMBEDDING_DIM) | |
doc = Input(shape=(INPUT_SHAPE[0], ), dtype='int32') | |
embedded = Sequential([ | |
Embedding( | |
input_dim=NUMBER_CHARACTERS, | |
output_dim=EMBEDDING_DIM, | |
input_length=INPUT_SHAPE[0] | |
), | |
Reshape(EMBEDDING_SHAPE) | |
])(doc) | |
def sub_model(n): | |
return Sequential([ | |
Convolution1D(NFILTERS, n, | |
activation='relu', | |
input_shape=EMBEDDING_SHAPE[1:] | |
), | |
Lambda( | |
lambda x: K.max(x, axis=1), | |
output_shape=(NFILTERS,) | |
) | |
]) | |
rep = Dropout(0.5)( | |
merge( | |
[TimeDistributed(sub_model(n))(embedded) for n in NGRAMS], | |
mode='concat', | |
concat_axis=-1 | |
) | |
) | |
rep = Dropout(0.5)( | |
merge( | |
[GRU(90)(rep), GRU(90, go_backwards=True)(rep)], | |
mode='concat', | |
concat_axis=-1 | |
) | |
) | |
output = Highway(activation='relu')(rep) | |
output = Dropout(0.5)(output) | |
output = Dense(64, activation='relu')(output) | |
output = Dropout(0.4)(output) | |
output = Dense(1, activation='sigmoid')(output) | |
nn = Model(input=doc, output=output) | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment