paulmwatson · July 22, 2020 13:52
diff --git a/spacy_sentence_with_abbreviations.py b/spacy_sentence_with_abbreviations.py
 import spacy
 from spacy.attrs import ORTH, LEMMA

 text = 'Sgt. Maj. A. Grinston found approx. 2.2 miles up a creek on Mt. Toohigh.'

 nlp = spacy.load('en_core_web_lg')

 print([t.text for t in nlp(text).sents])
 #=> ['Sgt.', 'Maj.', 'A. Grinston found approx.', '2.2 miles up a creek on Mt. Toohigh.']

 nlp.tokenizer.add_special_case('Sgt.', [{ORTH: 'Sgt.', LEMMA: 'seargeant'}])
 nlp.tokenizer.add_special_case('Maj.', [{ORTH: 'Maj.', LEMMA: 'major'}])
 nlp.tokenizer.add_special_case('approx.', [{ORTH: 'approx.', LEMMA: 'approximately'}])

 print([t.text for t in nlp(text).sents])
 #=> ['Sgt. Maj. A. Grinston found approx. 2.2 miles up a creek on Mt. Toohigh.']
	import spacy
	from spacy.attrs import ORTH, LEMMA

	text = 'Sgt. Maj. A. Grinston found approx. 2.2 miles up a creek on Mt. Toohigh.'

	nlp = spacy.load('en_core_web_lg')

	print([t.text for t in nlp(text).sents])
	#=> ['Sgt.', 'Maj.', 'A. Grinston found approx.', '2.2 miles up a creek on Mt. Toohigh.']

	nlp.tokenizer.add_special_case('Sgt.', [{ORTH: 'Sgt.', LEMMA: 'seargeant'}])
	nlp.tokenizer.add_special_case('Maj.', [{ORTH: 'Maj.', LEMMA: 'major'}])
	nlp.tokenizer.add_special_case('approx.', [{ORTH: 'approx.', LEMMA: 'approximately'}])

	print([t.text for t in nlp(text).sents])
	#=> ['Sgt. Maj. A. Grinston found approx. 2.2 miles up a creek on Mt. Toohigh.']