Created
December 6, 2015 16:51
-
-
Save kevinrobinson/8707dc3a4a7e599d2627 to your computer and use it in GitHub Desktop.
Built gen_word2vec.py, 3972c79
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| """Python wrappers around Brain. | |
| This file is MACHINE GENERATED! Do not edit. | |
| """ | |
| from google.protobuf import text_format | |
| from tensorflow.core.framework import op_def_pb2 | |
| from tensorflow.python.framework import op_def_registry | |
| from tensorflow.python.framework import ops | |
| from tensorflow.python.ops import op_def_library | |
| def neg_train(w_in, w_out, examples, labels, lr, vocab_count, | |
| num_negative_samples, name=None): | |
| r"""Training via negative sampling. | |
| Args: | |
| w_in: A `Tensor` of type mutable `float32`. input word embedding. | |
| w_out: A `Tensor` of type mutable `float32`. output word embedding. | |
| examples: A `Tensor` of type `int32`. A vector of word ids. | |
| labels: A `Tensor` of type `int32`. A vector of word ids. | |
| lr: A `Tensor` of type `float32`. | |
| vocab_count: A list of `ints`. Count of words in the vocabulary. | |
| num_negative_samples: An `int`. Number of negative samples per exaple. | |
| name: A name for the operation (optional). | |
| Returns: | |
| The created Operation. | |
| """ | |
| return _op_def_lib.apply_op("NegTrain", w_in=w_in, w_out=w_out, | |
| examples=examples, labels=labels, lr=lr, | |
| vocab_count=vocab_count, | |
| num_negative_samples=num_negative_samples, | |
| name=name) | |
| ops.RegisterShape("NegTrain")(None) | |
| def skipgram(filename, batch_size, window_size=None, min_count=None, | |
| subsample=None, name=None): | |
| r"""Parses a text file and creates a batch of examples. | |
| Args: | |
| filename: A `string`. The corpus's text file name. | |
| batch_size: An `int`. The size of produced batch. | |
| window_size: An optional `int`. Defaults to `5`. | |
| The number of words to predict to the left and right of the target. | |
| min_count: An optional `int`. Defaults to `5`. | |
| The minimum number of word occurrences for it to be included in the | |
| vocabulary. | |
| subsample: An optional `float`. Defaults to `0.001`. | |
| Threshold for word occurrence. Words that appear with higher | |
| frequency will be randomly down-sampled. Set to 0 to disable. | |
| name: A name for the operation (optional). | |
| Returns: | |
| A tuple of `Tensor` objects (vocab_word, vocab_freq, words_per_epoch, current_epoch, total_words_processed, examples, labels). | |
| vocab_word: A `Tensor` of type `string`. A vector of words in the corpus. | |
| vocab_freq: A `Tensor` of type `int32`. Frequencies of words. Sorted in the non-ascending order. | |
| words_per_epoch: A `Tensor` of type `int64`. Number of words per epoch in the data file. | |
| current_epoch: A `Tensor` of type `int32`. The current epoch number. | |
| total_words_processed: A `Tensor` of type `int64`. The total number of words processed so far. | |
| examples: A `Tensor` of type `int32`. A vector of word ids. | |
| labels: A `Tensor` of type `int32`. A vector of word ids. | |
| """ | |
| return _op_def_lib.apply_op("Skipgram", filename=filename, | |
| batch_size=batch_size, window_size=window_size, | |
| min_count=min_count, subsample=subsample, | |
| name=name) | |
| ops.RegisterShape("Skipgram")(None) | |
| def _InitOpDefLibrary(): | |
| op_list = op_def_pb2.OpList() | |
| text_format.Merge(_InitOpDefLibrary.op_list_ascii, op_list) | |
| op_def_registry.register_op_list(op_list) | |
| op_def_lib = op_def_library.OpDefLibrary() | |
| op_def_lib.add_op_list(op_list) | |
| return op_def_lib | |
| _InitOpDefLibrary.op_list_ascii = """op { | |
| name: "NegTrain" | |
| input_arg { | |
| name: "w_in" | |
| type: DT_FLOAT | |
| is_ref: true | |
| } | |
| input_arg { | |
| name: "w_out" | |
| type: DT_FLOAT | |
| is_ref: true | |
| } | |
| input_arg { | |
| name: "examples" | |
| type: DT_INT32 | |
| } | |
| input_arg { | |
| name: "labels" | |
| type: DT_INT32 | |
| } | |
| input_arg { | |
| name: "lr" | |
| type: DT_FLOAT | |
| } | |
| attr { | |
| name: "vocab_count" | |
| type: "list(int)" | |
| } | |
| attr { | |
| name: "num_negative_samples" | |
| type: "int" | |
| } | |
| } | |
| op { | |
| name: "Skipgram" | |
| output_arg { | |
| name: "vocab_word" | |
| type: DT_STRING | |
| } | |
| output_arg { | |
| name: "vocab_freq" | |
| type: DT_INT32 | |
| } | |
| output_arg { | |
| name: "words_per_epoch" | |
| type: DT_INT64 | |
| } | |
| output_arg { | |
| name: "current_epoch" | |
| type: DT_INT32 | |
| } | |
| output_arg { | |
| name: "total_words_processed" | |
| type: DT_INT64 | |
| } | |
| output_arg { | |
| name: "examples" | |
| type: DT_INT32 | |
| } | |
| output_arg { | |
| name: "labels" | |
| type: DT_INT32 | |
| } | |
| attr { | |
| name: "filename" | |
| type: "string" | |
| } | |
| attr { | |
| name: "batch_size" | |
| type: "int" | |
| } | |
| attr { | |
| name: "window_size" | |
| type: "int" | |
| default_value { | |
| i: 5 | |
| } | |
| } | |
| attr { | |
| name: "min_count" | |
| type: "int" | |
| default_value { | |
| i: 5 | |
| } | |
| } | |
| attr { | |
| name: "subsample" | |
| type: "float" | |
| default_value { | |
| f: 0.001 | |
| } | |
| } | |
| } | |
| """ | |
| _op_def_lib = _InitOpDefLibrary() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment