Skip to content

Instantly share code, notes, and snippets.

View oscar-defelice's full-sized avatar
:atom:
Cooking & Coding

Oscar oscar-defelice

:atom:
Cooking & Coding
View GitHub Profile
function tokenise(text) {
text = text.toLowerCase();
var splitted_text = text.split(' ');
var tokens = [];
splitted_text.forEach(element => {
if (word2index[element] != undefined) {
tokens.push(word2index[element]);
}
});
while (tokens.length < maxLen) {
async function loadVocab(vocabPath) {
let word2index = await (await fetch(vocabPath)).json();
return word2index;
}
<!DOCTYPE html>
<html>
<head>
<script src="https://cdn.jsdelivr.net/npm/@tensorflow/tfjs@latest"></script>
<meta charset="UTF-8">
<title>Text Classifier</title>
</head>
<body>
<h1>Text Classifier</h1>
<div>
import json
with open( 'tokeniser.json' , 'w' ) as file:
json.dump(tokeniser.to_json() , file )
#save Keras model
saved_model_path = "modelCNN.h5"
keras_model.save(saved_model_path)
y_pred = to_categorical(np.argmax(keras_model.predict(tokenised_text_test), axis=1))
print(classification_report(y_test, y_pred, target_names=labels.values(), digits=4))
# build the model
keras_model = Sequential()
keras_model.add(Embedding(vocab_size, output_dim = emb_dim, input_length=max_len))
keras_model.add(Dropout(dropout_rate))
keras_model.add(Conv1D(50, 3, activation='relu', padding='same', strides=1))
keras_model.add(MaxPool1D())
keras_model.add(Dropout(dropout_rate))
keras_model.add(Conv1D(100, 3, activation='relu', padding='same', strides=1))
keras_model.add(MaxPool1D())
keras_model.add(Dropout(dropout_rate))
emb_dim = 64
dropout_rate = 0.3
n_labels = y.shape[1]
learning_rate = 0.0006
loss = tf.keras.losses.CategoricalCrossentropy(from_logits=True)
metric = tf.keras.metrics.CategoricalAccuracy('accuracy')
opt = tf.keras.optimizers.Adam(learning_rate = learning_rate)
encoded_labels = preprocessing.LabelEncoder()
y = encoded_labels.fit_transform(train_data['label'])
y = to_categorical(y)
from keras.preprocessing.text import Tokenizer
from keras.preprocessing.sequence import pad_sequences
tokeniser = Tokenizer()
tokeniser.fit_on_texts(train_data['Text'])
tokenised_text = tokeniser.texts_to_sequences(train_data['Text'])
tokenised_text = pad_sequences(tokenised_text, maxlen=max_len)