Skip to content

Instantly share code, notes, and snippets.

View eustin's full-sized avatar
🐗

Justin Evans eustin

🐗
View GitHub Profile
VOCAB_SIZE = max(tokeniser.index_word) + 1
print(f"VOCAB_SIZE: {VOCAB_SIZE}")
sequences = tokeniser.texts_to_sequences(sentences)
for x in sequences:
print(x)
tokeniser = tf.keras.preprocessing.text.Tokenizer()
tokeniser.fit_on_texts(sentences)
print(tokeniser.word_index)
sentences = [
"snoopy dog",
"milo dog",
"dumbo elephant",
"portugal country",
"brazil country",
]
snoopy_vs_beagle = tf.sqrt(tf.reduce_sum(tf.square(embeddings[0] - embeddings[3])))
snoopy_vs_is = tf.sqrt(tf.reduce_sum(tf.square(embeddings[0] - embeddings[1])))
print(snoopy_vs_beagle.numpy())
print(snoopy_vs_is.numpy())
is_vec = index_one_hot[1]
snoopy_vs_is = tf.sqrt(tf.reduce_sum(tf.square(snoopy_vec - is_vec)))
print(snoopy_vs_is.numpy())
snoopy_vec = index_one_hot[0]
beagle_vec = index_one_hot[3]
snoopy_vs_beagle = tf.sqrt(tf.reduce_sum(tf.square(snoopy_vec - beagle_vec)))
print(snoopy_vs_beagle.numpy())
embeddings = tf.random.uniform((4, 2), minval=-0.05, maxval=0.05).numpy()
print(embeddings)
num_classes = len(index_word)
index_one_hot = {i: tf.one_hot(x, depth=num_classes) \
for i, x in enumerate(index_word.keys())}
for k, v in index_one_hot.items():
word = index_word[k]
one_hot_vector = v.numpy()
print(f"{word:<6}: {one_hot_vector}")
index_word = {i: x for i, x in enumerate(tokens)}
print(index_word)