Skip to content

Instantly share code, notes, and snippets.

View aniruddha27's full-sized avatar

Aniruddha Bhandari aniruddha27

View GitHub Profile
doc = nlp(' Last year, I spoke about the Ujjwala programme , through which, I am happy to report, 50 million free liquid-gas connections have been provided so far')
png = visualise_spacy_tree.create_png(doc)
display(Image(png))
# to extract initiatives using pattern matching
def all_schemes(text,check):
schemes = []
doc = nlp(text)
# initiatives
prog_list = ['programme','scheme',
'initiative','campaign',
# to check if keyswords like 'programs','schemes', etc. present in sentences
def prog_sent(text):
patterns = [r'\b(?i)'+'plan'+r'\b',
r'\b(?i)'+'programme'+r'\b',
r'\b(?i)'+'scheme'+r'\b',
r'\b(?i)'+'campaign'+r'\b',
r'\b(?i)'+'initiative'+r'\b',
r'\b(?i)'+'conference'+r'\b',
# function to find sentences containing PMs of India
def find_names(text):
names = []
# spacy doc
doc = nlp(text)
# pattern
pattern = [{'LOWER':'prime'},
import spacy
from spacy.matcher import Matcher
from spacy import displacy
import visualise_spacy_tree
from IPython.display import Image, display
# load english language model
nlp = spacy.load('en_core_web_sm',disable=['ner','textcat'])
# split sentences
def sentences(text):
# split sentences and questions
text = re.split('[.?]', text)
clean_sent = []
for sent in text:
clean_sent.append(sent)
return clean_sent
# sentences
# function to preprocess speech
def clean(text):
# removing paragraph numbers
text = re.sub('[0-9]+.\t','',str(text))
# removing new line characters
text = re.sub('\n ','',str(text))
text = re.sub('\n',' ',str(text))
# removing apostrophes
text = re.sub("'s",'',str(text))
for token in doc:
# extract subject
if (token.dep_=='nsubj'):
print(token.text)
# extract object
elif (token.dep_=='dobj'):
print(token.text)
from spacy import displacy
displacy.render(doc, style='dep',jupyter=True)
text = "The children love cream biscuits"
# create spacy
doc = nlp(text)
for token in doc:
print(token.text,'->',token.pos_)