DataTurks · May 27, 2018 10:35 · MarkHeatzig · Aug 6, 2019
diff --git a/train_spacy_NER.py b/train_spacy_NER.py
 import spacy
 ################### Train Spacy NER.###########
 def train_spacy():
    TRAIN_DATA = convert_dataturks_to_spacy("dataturks_downloaded.json");
    nlp = spacy.blank('en')  # create blank Language class
    # create the built-in pipeline components and add them to the pipeline
    # nlp.create_pipe works for built-ins that are registered with spaCy
    if 'ner' not in nlp.pipe_names:
        ner = nlp.create_pipe('ner')
        nlp.add_pipe(ner, last=True)

    # add labels
    for _, annotations in TRAIN_DATA:
        for ent in annotations.get('entities'):
            ner.add_label(ent[2])

    # get names of other pipes to disable them during training
    other_pipes = [pipe for pipe in nlp.pipe_names if pipe != 'ner']
    with nlp.disable_pipes(*other_pipes):  # only train NER
        optimizer = nlp.begin_training()
        for itn in range(1):
            print("Statring iteration " + str(itn))
            random.shuffle(TRAIN_DATA)
            losses = {}
            for text, annotations in TRAIN_DATA:
                nlp.update(
                    [text],  # batch of texts
                    [annotations],  # batch of annotations
                    drop=0.2,  # dropout - make it harder to memorise data
                    sgd=optimizer,  # callable to update weights
                    losses=losses)
            print(losses)
    
    #do prediction
    doc = nlp("Samsing mobiles below $100")
    print ("Entities= " + str(["" + str(ent.text) + "_" + str(ent.label_) for ent in doc.ents]))
	import spacy
	################### Train Spacy NER.###########
	def train_spacy():
	TRAIN_DATA = convert_dataturks_to_spacy("dataturks_downloaded.json");
	nlp = spacy.blank('en') # create blank Language class
	# create the built-in pipeline components and add them to the pipeline
	# nlp.create_pipe works for built-ins that are registered with spaCy
	if 'ner' not in nlp.pipe_names:
	ner = nlp.create_pipe('ner')
	nlp.add_pipe(ner, last=True)

	# add labels
	for _, annotations in TRAIN_DATA:
	for ent in annotations.get('entities'):
	ner.add_label(ent[2])

	# get names of other pipes to disable them during training
	other_pipes = [pipe for pipe in nlp.pipe_names if pipe != 'ner']
	with nlp.disable_pipes(*other_pipes): # only train NER
	optimizer = nlp.begin_training()
	for itn in range(1):
	print("Statring iteration " + str(itn))
	random.shuffle(TRAIN_DATA)
	losses = {}
	for text, annotations in TRAIN_DATA:
	nlp.update(
	[text], # batch of texts
	[annotations], # batch of annotations
	drop=0.2, # dropout - make it harder to memorise data
	sgd=optimizer, # callable to update weights
	losses=losses)
	print(losses)

	#do prediction
	doc = nlp("Samsing mobiles below $100")
	print ("Entities= " + str(["" + str(ent.text) + "_" + str(ent.label_) for ent in doc.ents]))
No results found