Skip to content

Instantly share code, notes, and snippets.

py_binary(
name = “word2vec”,
srcs = [“word2vec.py”],
deps = [“:gen_word2vec”, ...],
...
)
native.py_library(
name = "gen_word2vec",
src = ["gen_word2vec.py"]
...
)
native.genrule(
name = "gen_word2vec_pygenrule",
tools = ["gen_gen_word2vec_py_wrappers_cc"],
out = ["gen_word2vec.py"],
...
)
native.cc_binary(
name = "gen_gen_word2vec_py_wrappers_cc",
deps = [":word2vec_ops", ...],
...
)
tf_gen_op_wrapper_py(
name = "gen_word2vec",
out = "gen_word2vec.py",
deps = [":word2vec_ops"],
)
node {
name: "Cast_1"
op: "Cast"
input: "Skipgram:4"
attr {
key: "DstT"
value {
type: DT_FLOAT
}
}
lr = opts.learning_rate * tf.maximum(0.0001, 1.0 — tf.cast(self._words, tf.float32) / words_to_train)
REGISTER_OP(“Skipgram”)
 .Output(“vocab_word: string”)
 .Output(“vocab_freq: int32”)
 .Output(“words_per_epoch: int64”)
 .Output(“current_epoch: int32”)
 .Output(“total_words_processed: int64”)
 .Output(“examples: int32”)
 .Output(“labels: int32”)
 .Attr(“filename: string”)
 .Attr(“batch_size: int”)
@kevinrobinson
kevinrobinson / call_skipgram.py
Created December 7, 2015 19:34
Call the skipgram op to load training data
(words, counts, words_per_epoch, self._epoch, self._words, examples, labels) = word2vec.skipgram(filename=opts.train_data,
batch_size=opts.batch_size,
window_size=opts.window_size,
min_count=opts.min_count,
subsample=opts.subsample)
@kevinrobinson
kevinrobinson / dictionary.py
Last active December 7, 2015 18:48
Naive dictionary approach to representing words
# example input:
sentence = 'The quick brown fox jumped over the lazy dog.'
# tokenize and normalize words, building the set of all vocabulary ever seen
words_set = {}
def tokenize(sentence):
return map(str.lower, sentence[0:-1].split(' '))
for word in tokenize(sentence):
words_set[word] = True