Skip to content

Instantly share code, notes, and snippets.

@kevinrobinson
Created December 6, 2015 16:51
Show Gist options
  • Select an option

  • Save kevinrobinson/8707dc3a4a7e599d2627 to your computer and use it in GitHub Desktop.

Select an option

Save kevinrobinson/8707dc3a4a7e599d2627 to your computer and use it in GitHub Desktop.
Built gen_word2vec.py, 3972c79
"""Python wrappers around Brain.
This file is MACHINE GENERATED! Do not edit.
"""
from google.protobuf import text_format
from tensorflow.core.framework import op_def_pb2
from tensorflow.python.framework import op_def_registry
from tensorflow.python.framework import ops
from tensorflow.python.ops import op_def_library
def neg_train(w_in, w_out, examples, labels, lr, vocab_count,
num_negative_samples, name=None):
r"""Training via negative sampling.
Args:
w_in: A `Tensor` of type mutable `float32`. input word embedding.
w_out: A `Tensor` of type mutable `float32`. output word embedding.
examples: A `Tensor` of type `int32`. A vector of word ids.
labels: A `Tensor` of type `int32`. A vector of word ids.
lr: A `Tensor` of type `float32`.
vocab_count: A list of `ints`. Count of words in the vocabulary.
num_negative_samples: An `int`. Number of negative samples per exaple.
name: A name for the operation (optional).
Returns:
The created Operation.
"""
return _op_def_lib.apply_op("NegTrain", w_in=w_in, w_out=w_out,
examples=examples, labels=labels, lr=lr,
vocab_count=vocab_count,
num_negative_samples=num_negative_samples,
name=name)
ops.RegisterShape("NegTrain")(None)
def skipgram(filename, batch_size, window_size=None, min_count=None,
subsample=None, name=None):
r"""Parses a text file and creates a batch of examples.
Args:
filename: A `string`. The corpus's text file name.
batch_size: An `int`. The size of produced batch.
window_size: An optional `int`. Defaults to `5`.
The number of words to predict to the left and right of the target.
min_count: An optional `int`. Defaults to `5`.
The minimum number of word occurrences for it to be included in the
vocabulary.
subsample: An optional `float`. Defaults to `0.001`.
Threshold for word occurrence. Words that appear with higher
frequency will be randomly down-sampled. Set to 0 to disable.
name: A name for the operation (optional).
Returns:
A tuple of `Tensor` objects (vocab_word, vocab_freq, words_per_epoch, current_epoch, total_words_processed, examples, labels).
vocab_word: A `Tensor` of type `string`. A vector of words in the corpus.
vocab_freq: A `Tensor` of type `int32`. Frequencies of words. Sorted in the non-ascending order.
words_per_epoch: A `Tensor` of type `int64`. Number of words per epoch in the data file.
current_epoch: A `Tensor` of type `int32`. The current epoch number.
total_words_processed: A `Tensor` of type `int64`. The total number of words processed so far.
examples: A `Tensor` of type `int32`. A vector of word ids.
labels: A `Tensor` of type `int32`. A vector of word ids.
"""
return _op_def_lib.apply_op("Skipgram", filename=filename,
batch_size=batch_size, window_size=window_size,
min_count=min_count, subsample=subsample,
name=name)
ops.RegisterShape("Skipgram")(None)
def _InitOpDefLibrary():
op_list = op_def_pb2.OpList()
text_format.Merge(_InitOpDefLibrary.op_list_ascii, op_list)
op_def_registry.register_op_list(op_list)
op_def_lib = op_def_library.OpDefLibrary()
op_def_lib.add_op_list(op_list)
return op_def_lib
_InitOpDefLibrary.op_list_ascii = """op {
name: "NegTrain"
input_arg {
name: "w_in"
type: DT_FLOAT
is_ref: true
}
input_arg {
name: "w_out"
type: DT_FLOAT
is_ref: true
}
input_arg {
name: "examples"
type: DT_INT32
}
input_arg {
name: "labels"
type: DT_INT32
}
input_arg {
name: "lr"
type: DT_FLOAT
}
attr {
name: "vocab_count"
type: "list(int)"
}
attr {
name: "num_negative_samples"
type: "int"
}
}
op {
name: "Skipgram"
output_arg {
name: "vocab_word"
type: DT_STRING
}
output_arg {
name: "vocab_freq"
type: DT_INT32
}
output_arg {
name: "words_per_epoch"
type: DT_INT64
}
output_arg {
name: "current_epoch"
type: DT_INT32
}
output_arg {
name: "total_words_processed"
type: DT_INT64
}
output_arg {
name: "examples"
type: DT_INT32
}
output_arg {
name: "labels"
type: DT_INT32
}
attr {
name: "filename"
type: "string"
}
attr {
name: "batch_size"
type: "int"
}
attr {
name: "window_size"
type: "int"
default_value {
i: 5
}
}
attr {
name: "min_count"
type: "int"
default_value {
i: 5
}
}
attr {
name: "subsample"
type: "float"
default_value {
f: 0.001
}
}
}
"""
_op_def_lib = _InitOpDefLibrary()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment