This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import re | |
import nltk | |
from nltk.corpus import stopwords | |
from nltk.stem.wordnet import WordNetLemmatizer | |
def clean_text(text): | |
text = text.lower() | |
# '@' mention. Even tough @ adds some information to the message, | |
# this information doesn't add value build the classifcation model | |
text = re.sub(r'@[A-Za-z0-9_]+','', text) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from keras.preprocessing.text import Tokenizer | |
from keras.preprocessing.sequence import pad_sequences | |
vocabulary_size = 20000 # Choosing size of vocabulary | |
tokenizer = Tokenizer(num_words=vocabulary_size) | |
tokenizer.fit_on_texts(df['message']) | |
sequences = tokenizer.texts_to_sequences(df['message']) | |
# Pads sequences to the same length: MAXLEN | |
MAXLEN = 50 | |
X = pad_sequences(sequences, maxlen=MAXLEN) | |
y = df[output_columns_all] |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
main_input = Input(shape=(MAXLEN,), dtype='int32', name='main_input') | |
x = Embedding(input_dim=vocabulary_size, output_dim=50, input_length=MAXLEN)(main_input) | |
x = Dropout(0.3)(x) | |
x = Conv1D(64, 5, activation='relu')(x) | |
x = MaxPooling1D(pool_size=4)(x) | |
x = LSTM(100)(x) | |
x = Dropout(0.3)(x) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
output_array = [] | |
metrics_array = {} | |
loss_array = {} | |
for i, dense_layer in enumerate(output_columns_binary): | |
name = f'binary_output_{i}' | |
# A Dense Layer is created for each output | |
binary_output = Dense(1, activation='sigmoid', name=name)(x) | |
output_array.append(binary_output) | |
metrics_array[name] = 'binary_accuracy' | |
loss_array[name] = 'binary_crossentropy' |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
categorical_output = Dense(3, activation='softmax', name='categorical_output')(x) | |
output_array.append(categorical_output) | |
metrics_array['categorical_output'] = 'sparse_categorical_accuracy' | |
loss_array['categorical_output'] = 'sparse_categorical_crossentropy' |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
weight_binary = {0: 0.5, 1: 7} #values obtained through calculations | |
weight_categorical = {0: 1.4, 1: 0.43, 2: 7} | |
classes_weights = {} | |
# There are 35 outputs in output_columns_binary, having classes (0 or 1) | |
for i, dense_layer in enumerate(output_columns_binary): | |
name = f'binary_output_{i}' | |
classes_weights[name] = weight_binary | |
# There is only 1 output in output_columns_categorical, having classes (0, 1 or 2) | |
for i, dense_layer in enumerate(output_columns_categorical): | |
name = 'categorical_output' |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
model = Model(inputs=main_input, outputs=output_array) | |
model.compile(optimizer='adadelta', | |
loss=loss_array, | |
metrics=metrics_array) | |
model.fit(X_train, y_train_output, | |
epochs=40, batch_size=512, | |
class_weight=classes_weights, verbose=0); |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
variable "environment" { | |
description = "Env" | |
default = "dev" | |
} | |
variable "name" { | |
description = "Application Name" | |
type = string | |
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
resource "aws_sqs_queue" "queue" { | |
name = "apigateway-queue" | |
delay_seconds = 0 | |
max_message_size = 262144 | |
message_retention_seconds = 86400 | |
receive_wait_time_seconds = 10 | |
tags = { | |
Product = local.app_name | |
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
{ | |
"Version": "2012-10-17", | |
"Statement": [ | |
{ | |
"Effect": "Allow", | |
"Action": [ | |
"logs:CreateLogGroup", | |
"logs:CreateLogStream", | |
"logs:DescribeLogGroups", | |
"logs:DescribeLogStreams", |
OlderNewer