gaphex · November 17, 2019 15:29
diff --git a/build_bert_preprocessor.py b/build_bert_preprocessor.py
 def build_preprocessor(voc_path, seq_len, lower=True):
  """
  Build a text preprocessing pipeline for BERT
  Returns a function which converts a list of strings to a list
  of three np.arrays with [input_ids, input_mask, segment_ids]
  
  """
  tokenizer = FullTokenizer(vocab_file=voc_path, do_lower_case=lower)
  
  def strings_to_arrays(sents):
  
      sents = np.atleast_1d(sents).reshape((-1,))

      examples = []
      for example in read_examples(sents):
          examples.append(example)

      features = convert_examples_to_features(examples, seq_len, tokenizer)
      arrays = features_to_arrays(features)
      return arrays
  
  return strings_to_arrays
	def build_preprocessor(voc_path, seq_len, lower=True):
	"""
	Build a text preprocessing pipeline for BERT
	Returns a function which converts a list of strings to a list
	of three np.arrays with [input_ids, input_mask, segment_ids]

	"""
	tokenizer = FullTokenizer(vocab_file=voc_path, do_lower_case=lower)

	def strings_to_arrays(sents):

	sents = np.atleast_1d(sents).reshape((-1,))

	examples = []
	for example in read_examples(sents):
	examples.append(example)

	features = convert_examples_to_features(examples, seq_len, tokenizer)
	arrays = features_to_arrays(features)
	return arrays

	return strings_to_arrays
No results found