This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
doc = nlp(' Last year, I spoke about the Ujjwala programme , through which, I am happy to report, 50 million free liquid-gas connections have been provided so far') | |
png = visualise_spacy_tree.create_png(doc) | |
display(Image(png)) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# to extract initiatives using pattern matching | |
def all_schemes(text,check): | |
schemes = [] | |
doc = nlp(text) | |
# initiatives | |
prog_list = ['programme','scheme', | |
'initiative','campaign', |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# to check if keyswords like 'programs','schemes', etc. present in sentences | |
def prog_sent(text): | |
patterns = [r'\b(?i)'+'plan'+r'\b', | |
r'\b(?i)'+'programme'+r'\b', | |
r'\b(?i)'+'scheme'+r'\b', | |
r'\b(?i)'+'campaign'+r'\b', | |
r'\b(?i)'+'initiative'+r'\b', | |
r'\b(?i)'+'conference'+r'\b', |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# function to find sentences containing PMs of India | |
def find_names(text): | |
names = [] | |
# spacy doc | |
doc = nlp(text) | |
# pattern | |
pattern = [{'LOWER':'prime'}, |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import spacy | |
from spacy.matcher import Matcher | |
from spacy import displacy | |
import visualise_spacy_tree | |
from IPython.display import Image, display | |
# load english language model | |
nlp = spacy.load('en_core_web_sm',disable=['ner','textcat']) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# split sentences | |
def sentences(text): | |
# split sentences and questions | |
text = re.split('[.?]', text) | |
clean_sent = [] | |
for sent in text: | |
clean_sent.append(sent) | |
return clean_sent | |
# sentences |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# function to preprocess speech | |
def clean(text): | |
# removing paragraph numbers | |
text = re.sub('[0-9]+.\t','',str(text)) | |
# removing new line characters | |
text = re.sub('\n ','',str(text)) | |
text = re.sub('\n',' ',str(text)) | |
# removing apostrophes | |
text = re.sub("'s",'',str(text)) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
for token in doc: | |
# extract subject | |
if (token.dep_=='nsubj'): | |
print(token.text) | |
# extract object | |
elif (token.dep_=='dobj'): | |
print(token.text) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from spacy import displacy | |
displacy.render(doc, style='dep',jupyter=True) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
text = "The children love cream biscuits" | |
# create spacy | |
doc = nlp(text) | |
for token in doc: | |
print(token.text,'->',token.pos_) |