Skip to content

Instantly share code, notes, and snippets.

@DanielDaCosta
DanielDaCosta / text_preprocessing.py
Last active May 6, 2020 02:01
Text Preprocessing of MultiLabel Classifier
import re
import nltk
from nltk.corpus import stopwords
from nltk.stem.wordnet import WordNetLemmatizer
def clean_text(text):
text = text.lower()
# '@' mention. Even tough @ adds some information to the message,
# this information doesn't add value build the classifcation model
text = re.sub(r'@[A-Za-z0-9_]+','', text)
@DanielDaCosta
DanielDaCosta / Keras_tokenizer.py
Created May 6, 2020 02:07
Keras tokenizer for Medium
from keras.preprocessing.text import Tokenizer
from keras.preprocessing.sequence import pad_sequences
vocabulary_size = 20000 # Choosing size of vocabulary
tokenizer = Tokenizer(num_words=vocabulary_size)
tokenizer.fit_on_texts(df['message'])
sequences = tokenizer.texts_to_sequences(df['message'])
# Pads sequences to the same length: MAXLEN
MAXLEN = 50
X = pad_sequences(sequences, maxlen=MAXLEN)
y = df[output_columns_all]
@DanielDaCosta
DanielDaCosta / Model.py
Created May 6, 2020 02:08
Model Architecture Keras
main_input = Input(shape=(MAXLEN,), dtype='int32', name='main_input')
x = Embedding(input_dim=vocabulary_size, output_dim=50, input_length=MAXLEN)(main_input)
x = Dropout(0.3)(x)
x = Conv1D(64, 5, activation='relu')(x)
x = MaxPooling1D(pool_size=4)(x)
x = LSTM(100)(x)
x = Dropout(0.3)(x)
@DanielDaCosta
DanielDaCosta / output_binary.py
Created May 6, 2020 02:10
Output Binary Crossentropy Medium
output_array = []
metrics_array = {}
loss_array = {}
for i, dense_layer in enumerate(output_columns_binary):
name = f'binary_output_{i}'
# A Dense Layer is created for each output
binary_output = Dense(1, activation='sigmoid', name=name)(x)
output_array.append(binary_output)
metrics_array[name] = 'binary_accuracy'
loss_array[name] = 'binary_crossentropy'
@DanielDaCosta
DanielDaCosta / output_categorical.py
Created May 6, 2020 02:11
Output sparse_categorical_crossentropy Medium
categorical_output = Dense(3, activation='softmax', name='categorical_output')(x)
output_array.append(categorical_output)
metrics_array['categorical_output'] = 'sparse_categorical_accuracy'
loss_array['categorical_output'] = 'sparse_categorical_crossentropy'
@DanielDaCosta
DanielDaCosta / class_weight.py
Created May 6, 2020 02:12
Class Weight Medium
weight_binary = {0: 0.5, 1: 7} #values obtained through calculations
weight_categorical = {0: 1.4, 1: 0.43, 2: 7}
classes_weights = {}
# There are 35 outputs in output_columns_binary, having classes (0 or 1)
for i, dense_layer in enumerate(output_columns_binary):
name = f'binary_output_{i}'
classes_weights[name] = weight_binary
# There is only 1 output in output_columns_categorical, having classes (0, 1 or 2)
for i, dense_layer in enumerate(output_columns_categorical):
name = 'categorical_output'
@DanielDaCosta
DanielDaCosta / model_instance.py
Created May 6, 2020 02:13
Model Instance Medium
model = Model(inputs=main_input, outputs=output_array)
model.compile(optimizer='adadelta',
loss=loss_array,
metrics=metrics_array)
model.fit(X_train, y_train_output,
epochs=40, batch_size=512,
class_weight=classes_weights, verbose=0);
variable "environment" {
description = "Env"
default = "dev"
}
variable "name" {
description = "Application Name"
type = string
}
resource "aws_sqs_queue" "queue" {
name = "apigateway-queue"
delay_seconds = 0
max_message_size = 262144
message_retention_seconds = 86400
receive_wait_time_seconds = 10
tags = {
Product = local.app_name
}
{
"Version": "2012-10-17",
"Statement": [
{
"Effect": "Allow",
"Action": [
"logs:CreateLogGroup",
"logs:CreateLogStream",
"logs:DescribeLogGroups",
"logs:DescribeLogStreams",