Created
April 5, 2019 09:32
-
-
Save hakanilter/b49299b68b58163e594770077021df08 to your computer and use it in GitHub Desktop.
Tensorflow Universal Sentence Encoder
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import tensorflow as tf | |
import tensorflow_hub as hub | |
import numpy as np | |
import os | |
import pandas as pd | |
from scipy import spatial | |
from operator import itemgetter | |
#module_url = "https://tfhub.dev/google/universal-sentence-encoder/2" | |
module_url = "https://tfhub.dev/google/universal-sentence-encoder-large/3" | |
# Import the Universal Sentence Encoder's TF Hub module | |
os.environ["TFHUB_CACHE_DIR"] = '/content/gdrive/My Drive/' # cache on host | |
embed = hub.Module(module_url) | |
tf.logging.set_verbosity(tf.logging.WARN) | |
def progress(i): | |
print('\r{} {}'.format('-\|/'[i % 4], i), end='') | |
def create_embeddings(messages, block_size=1000): | |
session = tf.Session() | |
session.run([tf.global_variables_initializer(), tf.tables_initializer()]) | |
embeddings = list() | |
for i in range(0, len(messages), block_size): | |
start = i | |
end = min(i+block_size, len(messages)) | |
progress(start) | |
embeddings.append(session.run(embed(messages[start:end]))) | |
progress(end) | |
message_embeddings = np.vstack(embeddings) | |
del embeddings | |
session.close() | |
return message_embeddings |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment