zeddee · February 4, 2020 01:33
diff --git a/learn-spacy-one.py b/learn-spacy-one.py
 # from https://course.spacy.io/chapter1
 from spacy.lang.en import English

 nlp = English()
 doc = nlp("hello world!!! one 123")

 for token in doc:
    print(token.text)

 print(doc)

 # a slice from a Doc is a Span object
 span = doc[1:4]
 print(span.text)

 # lexical attributes
 print("Lexical attributes")
 print("========================================")
 print("Index:\t", [token.i for token in doc])
 print("Text:\t", [token.text for token in doc])
 print("is_alphs:\t", [token.is_alpha for token in doc])
 print("is_punct:\t", [token.is_punct for token in doc])
 print("like_num:\t", [token.like_num for token in doc])

 doc = nlp(
    "In 1990, more than 60% of people in East Asia were in extreme poverty. "
    "Now less than 4% are."
 )

 # Iterate over the tokens in the doc
 # to find percentage figures in 'doc'
 for token in doc:
    # Check if the token resembles a number
    if token.like_num:
        # Get the next token in the document
        # if token is like a number
        # we want to check if the next token is '%'
        next_token = doc[token.i + 1]
        # Check if the next token's text equals '%'
        if next_token.text == "%":
            print("Percentage found:", token.text)
	# from https://course.spacy.io/chapter1
	from spacy.lang.en import English

	nlp = English()
	doc = nlp("hello world!!! one 123")

	for token in doc:
	print(token.text)

	print(doc)

	# a slice from a Doc is a Span object
	span = doc[1:4]
	print(span.text)

	# lexical attributes
	print("Lexical attributes")
	print("========================================")
	print("Index:\t", [token.i for token in doc])
	print("Text:\t", [token.text for token in doc])
	print("is_alphs:\t", [token.is_alpha for token in doc])
	print("is_punct:\t", [token.is_punct for token in doc])
	print("like_num:\t", [token.like_num for token in doc])

	doc = nlp(
	"In 1990, more than 60% of people in East Asia were in extreme poverty. "
	"Now less than 4% are."
	)

	# Iterate over the tokens in the doc
	# to find percentage figures in 'doc'
	for token in doc:
	# Check if the token resembles a number
	if token.like_num:
	# Get the next token in the document
	# if token is like a number
	# we want to check if the next token is '%'
	next_token = doc[token.i + 1]
	# Check if the next token's text equals '%'
	if next_token.text == "%":
	print("Percentage found:", token.text)