Skip to content

Instantly share code, notes, and snippets.

@hackintoshrao
Created August 31, 2017 07:21
Show Gist options
  • Save hackintoshrao/1a538f86e1d01015e92fa9edde6d3b5e to your computer and use it in GitHub Desktop.
Save hackintoshrao/1a538f86e1d01015e92fa9edde6d3b5e to your computer and use it in GitHub Desktop.
def get_embedding_layer(vocab_index, learned_embeddings, max_words, embedding_dim, max_seq_len, do_train):
""""
Creates the Embedding layer using learned embedding and word indesx map of all
unique words in the text corpus.
||Params||
vocab_index : Map of all unique words in your text corpus as keys and their index as values
learned_embeddings: Learned embedding representation from GLove/Word2Vec.
max_words: Max words to be used from the vocab
embedding_dim: Size of the learned embedding (100/300/600)
max_seq_len: max length of input text used for training/validation.
do_train: Boolean flag to indicate whether the embedding vectors has to be trained/altered.
||Return||
embedding_layer: Embedding Layer
"""
vocab_size = min(max_words, len(vocab_index))
embedding_matrix = np.zeros((vocab_size, embedding_dim))
for word, i in vocab_index.items():
if i >= max_words:
continue
embedding_vector = learned_embeddings.get(word)
if embedding_vector is not None:
# words not found in embedding index will be all-zeros.
embedding_matrix[i] = embedding_vector
embedding_layer = Embedding(vocab_size,
embedding_dim,
weights=[embedding_matrix],
input_length=max_seq_len,
trainable=do_train)
return embedding_layer
def model_nlp_classify_cnn(max_seq_len, embedding_layer, num_labels):
"""
Model for NLP classification using CNN.
||Params||
max_seq_len: max length of input text used for training/validation.
embedding_layer: Keras Embedding Layer.
num_labels: Number of labels/categories in the output.
||Return||
model: NLP classification model using CNN.
"""
sequence_input = Input(shape=(max_seq_len,), dtype='int32')
embedded_sequences = embedding_layer(sequence_input)
x = Conv1D(128, 5, activation='relu')(embedded_sequences)
x = MaxPooling1D(5)(x)
x = Conv1D(128, 5, activation='relu')(x)
x = MaxPooling1D(5)(x)
x = Conv1D(128, 5, activation='relu')(x)
x = MaxPooling1D(35)(x)
x = Flatten()(x)
x = Dense(128, activation='relu')(x)
predictions = Dense(num_labels, activation='softmax')(x)
model = Model(sequence_input, preds)
return model
def train_save_model(model, optimizer='rmsprop', save_path="./"):
"""
Trains the model and saves the trained model.
||PARAMS||
model: Model for NLP classification.
optimizer: Optimizer algorithm to be used for training the network.
Defaults to rmsprop. SGC and Adam are few other options.
save_path: Path for saving the trained model. Defaults to current directory.
"""
model.compile(loss='categorical_crossentropy',
optimizer=optimizer,
metrics=['acc'])
model.fit(x_train, y_train,
batch_size=128,
epochs=10,
validation_data=(x_val, y_val))
# Need to implement method for saving the model.
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment