Skip to content

Instantly share code, notes, and snippets.

View eustin's full-sized avatar
🐗

Justin Evans eustin

🐗
View GitHub Profile
for embeddings in docs_embeddings[0]:
print()
print(embeddings)
docs_sequences = []
for docs_list in [bing_search_results, google_search_results]:
docs_sequences.append(tokeniser.texts_to_sequences(docs_list))
docs_embeddings = []
for docs_set in docs_sequences:
this_docs_set = []
for doc in docs_set:
this_doc_embeddings = np.array([embeddings[idx] for idx in doc])
this_docs_set.append(this_doc_embeddings)
print(query_embeddings.shape)
query_embeddings = np.row_stack([query_1_embeddings, query_2_embeddings_avg])
query_2_embeddings_avg = tf.reduce_mean(query_2_embeddings, axis=1, keepdims=True).numpy()
print(query_2_embeddings_avg)
query_2_embedding_indices = tokeniser.texts_to_sequences([query_2])
query_2_embeddings = np.array([embeddings[x] for x in query_2_embedding_indices])
print(query_2_embeddings)
query_1_embedding_index = tokeniser.texts_to_sequences([query_1])
query_1_embeddings = np.array([embeddings[x] for x in query_1_embedding_index])
print(query_1_embeddings)
EMBEDDING_DIMS = 2
embeddings = np.random.randn(vocab_size, EMBEDDING_DIMS).astype(np.float32)
print(embeddings)
for idx, word in tokeniser.index_word.items():
print(f"index {idx} - {word}")
combined_texts = [query_1, *bing_search_results, query_2, *google_search_results]
tokeniser = tf.keras.preprocessing.text.Tokenizer()
tokeniser.fit_on_texts(combined_texts)
# we add one here to account for the padding word
vocab_size = max(tokeniser.index_word) + 1
print(vocab_size)