Skip to content

Instantly share code, notes, and snippets.

@zeddee
Created February 4, 2020 01:33
Show Gist options
  • Save zeddee/4f347f438393b5f5876576a8e197b053 to your computer and use it in GitHub Desktop.
Save zeddee/4f347f438393b5f5876576a8e197b053 to your computer and use it in GitHub Desktop.
# from https://course.spacy.io/chapter1
from spacy.lang.en import English
nlp = English()
doc = nlp("hello world!!! one 123")
for token in doc:
print(token.text)
print(doc)
# a slice from a Doc is a Span object
span = doc[1:4]
print(span.text)
# lexical attributes
print("Lexical attributes")
print("========================================")
print("Index:\t", [token.i for token in doc])
print("Text:\t", [token.text for token in doc])
print("is_alphs:\t", [token.is_alpha for token in doc])
print("is_punct:\t", [token.is_punct for token in doc])
print("like_num:\t", [token.like_num for token in doc])
doc = nlp(
"In 1990, more than 60% of people in East Asia were in extreme poverty. "
"Now less than 4% are."
)
# Iterate over the tokens in the doc
# to find percentage figures in 'doc'
for token in doc:
# Check if the token resembles a number
if token.like_num:
# Get the next token in the document
# if token is like a number
# we want to check if the next token is '%'
next_token = doc[token.i + 1]
# Check if the next token's text equals '%'
if next_token.text == "%":
print("Percentage found:", token.text)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment