Skip to content

Instantly share code, notes, and snippets.

@gaphex
Last active June 23, 2019 11:58
Show Gist options
  • Select an option

  • Save gaphex/883af614b27c9964125b4e683d3bd033 to your computer and use it in GitHub Desktop.

Select an option

Save gaphex/883af614b27c9964125b4e683d3bd033 to your computer and use it in GitHub Desktop.
Extracting features from text
def batch(iterable, n=1):
l = len(iterable)
for ndx in range(0, l, n):
yield iterable[ndx:min(ndx + n, l)]
def build_vectorizer(_estimator, _input_fn_builder, batch_size=128):
container = DataContainer()
predict_fn = _estimator.predict(_input_fn_builder(container), yield_single_examples=False)
def vectorize(text, verbose=False):
x = []
bar = Progbar(len(text))
for text_batch in batch(text, batch_size):
container.set(text_batch)
x.append(next(predict_fn)['output'])
if verbose:
bar.add(len(text_batch))
r = np.vstack(x)
return r
return vectorize
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment