Skip to content

Instantly share code, notes, and snippets.

tf_gen_op_wrapper_py(
name = "gen_word2vec",
out = "gen_word2vec.py",
deps = [":word2vec_ops"],
)
native.cc_binary(
name = "gen_gen_word2vec_py_wrappers_cc",
deps = [":word2vec_ops", ...],
...
)
native.genrule(
name = "gen_word2vec_pygenrule",
tools = ["gen_gen_word2vec_py_wrappers_cc"],
out = ["gen_word2vec.py"],
...
)
native.py_library(
name = "gen_word2vec",
src = ["gen_word2vec.py"]
...
)
py_binary(
name = “word2vec”,
srcs = [“word2vec.py”],
deps = [“:gen_word2vec”, ...],
...
)
class SkipgramOp : public OpKernel {
public:
explicit SkipgramOp(OpKernelConstruction* ctx) : OpKernel(ctx) {
// validate input and save it as state
OP_REQUIRES_OK(ctx, ctx->GetAttr("filename", &filename));
// ...
}
void Compute(OpKernelContext* ctx) override {
Tensor examples(DT_INT32, TensorShape({batch_size_}));
# build
bazel build -c dbg tensorflow/models/embedding:all
# copy built gen_word2vec.py file from bazel-out/../genfiles into the project
# in word2vec.py, comment out the division line to fix python 2/3 error:
# error: TypeError: unsupported operand type(s) for /: 'Tensor' and 'int'
# fix: comment out from __future__ import division
# in word2vec.py, change the import to pull in gen_word2vec from the local folder:
# before: from tensorflow.models.embedding import gen_word2vec as word2vec
def _InitOpDefLibrary():
op_list = op_def_pb2.OpList()
text_format.Merge(op_list_ascii, op_list)
op_def_registry.register_op_list(op_list)
op_def_lib = op_def_library.OpDefLibrary()
op_def_lib.add_op_list(op_list)
return op_def_lib
_InitOpDefLibrary.op_list_ascii = """op {
def skipgram(filename, batch_size, window_size=None, min_count=None,
subsample=None, name=None):
r"""Parses a text file and creates a batch of examples.
...
"""
return _op_def_lib.apply_op("Skipgram", filename=filename,
batch_size=batch_size, window_size=window_size,
min_count=min_count, subsample=subsample,
name=name)
tuple = word2vec.skipgram(filename=opts.train_data,
batch_size=opts.batch_size,
window_size=opts.window_size,
min_count=opts.min_count,
subsample=opts.subsample)
(vocab_word, vocab_freq, words_per_epoch, current_epoch, total_words_processed, examples, labels) = tuple