Skip to content

Instantly share code, notes, and snippets.

@huangsam
Created June 14, 2020 00:08
Show Gist options
  • Save huangsam/e2871d4599fdd9629bd972393a5f7ba3 to your computer and use it in GitHub Desktop.
Save huangsam/e2871d4599fdd9629bd972393a5f7ba3 to your computer and use it in GitHub Desktop.
Use Tensorflow to detect sarcasm in sentences
# Colab -> https://goo.gle/tfw-sarcembed
# GitHub -> https://goo.gle/2PH90ea
import json
with open("sarcasm.json", "r") as f:
datastore = json.load(f)
sentences = []
labels = []
for item in datastore:
sentences.append(item["headline"])
labels.append(item["is_sarcastic"])
import tensorflow as tf
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences
vocab_size = 10000
embedding_dim = 16
max_length = 100
trunc_type= "post"
padding_type= "post"
oov_tok = "<OOV>"
training_size = 20000
training_sentences = sentences[0:training_size]
testing_sentences = sentences[training_size:]
training_labels = labels[0:training_size]
testing_labels = labels[training_size:]
tokenizer = Tokenizer(num_words=vocab_size, oov_token=oov_tok)
tokenizer.fit_on_texts(training_sentences)
word_index = tokenizer.word_index
training_sequences = tokenizer.texts_to_sequences(training_sentences)
training_padded = pad_sequences(training_sequences, maxlen=max_length, padding=padding_type, truncating=trunc_type)
testing_sequences = tokenizer.texts_to_sequences(testing_sentences)
testing_padded = pad_sequences(testing_sequences, maxlen=max_length, padding=padding_type, truncating=trunc_type)
# Need this block to get it to work with TensorFlow 2.x
import numpy as np
training_padded = np.array(training_padded)
training_labels = np.array(training_labels)
testing_padded = np.array(testing_padded)
testing_labels = np.array(testing_labels)
model = tf.keras.Sequential([
tf.keras.layers.Embedding(vocab_size, embedding_dim, input_length=max_length),
tf.keras.layers.GlobalAveragePooling1D(),
tf.keras.layers.Dense(24, activation="relu"),
tf.keras.layers.Dense(1, activation="sigmoid")
])
model.compile(loss="binary_crossentropy",optimizer="adam",metrics=["accuracy"])
model.summary()
num_epochs = 30
history = model.fit(
training_padded, training_labels,
epochs=num_epochs, validation_data=(testing_padded, testing_labels), verbose=2,
)
import matplotlib.pyplot as plt
def plot_graphs(history, string):
plt.close()
plt.plot(history.history[string])
plt.plot(history.history["val_" + string])
plt.xlabel("Epochs")
plt.ylabel(string)
plt.legend([string, "val_" + string])
plt.savefig(f"plot_{string}.png")
plot_graphs(history, "accuracy")
plot_graphs(history, "loss")
new_sentence = [
"granny starting to fear spiders in the garden might be real",
"game of thrones season finale showing this sunday night",
"kitty wants sushi really bad",
]
new_sequences = tokenizer.texts_to_sequences(new_sentence)
new_padded = pad_sequences(new_sequences, maxlen=max_length, padding=padding_type, truncating=trunc_type)
print(model.predict(new_padded))
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment