Skip to content

Instantly share code, notes, and snippets.

@hakanilter
Created April 5, 2019 09:32
Show Gist options
  • Save hakanilter/b49299b68b58163e594770077021df08 to your computer and use it in GitHub Desktop.
Save hakanilter/b49299b68b58163e594770077021df08 to your computer and use it in GitHub Desktop.
Tensorflow Universal Sentence Encoder
import tensorflow as tf
import tensorflow_hub as hub
import numpy as np
import os
import pandas as pd
from scipy import spatial
from operator import itemgetter
#module_url = "https://tfhub.dev/google/universal-sentence-encoder/2"
module_url = "https://tfhub.dev/google/universal-sentence-encoder-large/3"
# Import the Universal Sentence Encoder's TF Hub module
os.environ["TFHUB_CACHE_DIR"] = '/content/gdrive/My Drive/' # cache on host
embed = hub.Module(module_url)
tf.logging.set_verbosity(tf.logging.WARN)
def progress(i):
print('\r{} {}'.format('-\|/'[i % 4], i), end='')
def create_embeddings(messages, block_size=1000):
session = tf.Session()
session.run([tf.global_variables_initializer(), tf.tables_initializer()])
embeddings = list()
for i in range(0, len(messages), block_size):
start = i
end = min(i+block_size, len(messages))
progress(start)
embeddings.append(session.run(embed(messages[start:end])))
progress(end)
message_embeddings = np.vstack(embeddings)
del embeddings
session.close()
return message_embeddings
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment