sanusart · August 29, 2019 20:44
diff --git a/create_training_data.py b/create_training_data.py
 import spacy
 from spacy.matcher import Matcher
 from spacy.lang.en import English

 nlp = English()
 matcher = Matcher(nlp.vocab)

 # create some patterns and add to matcher
 pattern1 = [{"LOWER": "iphone"}, {"LOWER": "x"}]
 pattern2 = [{"LOWER": "iphone"}, {"IS_DIGIT": True, "OP": "?"}]
 matcher.add("GADGET", None, pattern1, pattern2)

 TRAINING_DATA = []

 for doc in nlp.pipe(TEXTS):
    # match on the doc and create a list of matched spans
    spans = [doc[start:end] for match_id, start, end in matcher(doc)]
    # Get (start character, end character, label) tuples of matches
    entities = [(span.start_char, span.end_char, "GADGET") for span in spans]
    # Format the matches as a (doc.text, entities) tuple
    training_example = (doc.text, {"entities": entities})
    # Append the example to the training data
    TRAINING_DATA.append(training_example)

 # structure of resulting training data
 TRAINING_DATA = [
    ("How to preorder the iPhone X", {'entities': [(20, 28, 'GADGET')]})
    ("Examples without labels are also needed", {'entities': []})
    # And many more examples...
 ]
diff --git a/mvfzf b/mvfzf
 hghj
	import spacy
	from spacy.matcher import Matcher
	from spacy.lang.en import English

	nlp = English()
	matcher = Matcher(nlp.vocab)

	# create some patterns and add to matcher
	pattern1 = [{"LOWER": "iphone"}, {"LOWER": "x"}]
	pattern2 = [{"LOWER": "iphone"}, {"IS_DIGIT": True, "OP": "?"}]
	matcher.add("GADGET", None, pattern1, pattern2)

	TRAINING_DATA = []

	for doc in nlp.pipe(TEXTS):
	# match on the doc and create a list of matched spans
	spans = [doc[start:end] for match_id, start, end in matcher(doc)]
	# Get (start character, end character, label) tuples of matches
	entities = [(span.start_char, span.end_char, "GADGET") for span in spans]
	# Format the matches as a (doc.text, entities) tuple
	training_example = (doc.text, {"entities": entities})
	# Append the example to the training data
	TRAINING_DATA.append(training_example)

	# structure of resulting training data
	TRAINING_DATA = [
	("How to preorder the iPhone X", {'entities': [(20, 28, 'GADGET')]})
	("Examples without labels are also needed", {'entities': []})
	# And many more examples...
	]