Skip to content

Instantly share code, notes, and snippets.

View aniruddha27's full-sized avatar

Aniruddha Bhandari aniruddha27

View GitHub Profile
def rule2_mod(text,index):
doc = nlp(text)
phrase = ''
for token in doc:
if token.i == index:
# function for rule 2
def rule2(text):
doc = nlp(text)
pat = []
# iterate over tokens
for token in doc:
phrase = ''
# create a df containing sentence and its output for rule 1
row_list = []
# df2 contains all the sentences from all the speeches
for i in range(len(df2)):
sent = df2.loc[i,'Sent']
year = df2.loc[i,'Year']
output = rule1(sent)
dict1 = {'Year':year,'Sent':sent,'Output':output}
# create a df containing sentence and its output for rule 1
row_list = []
for i in range(len(df3)):
sent = df3.loc[i,'Sent']
year = df3.loc[i,'Year']
output = rule1(sent)
dict1 = {'Year':year,'Sent':sent,'Output':output}
row_list.append(dict1)
# function for rule 1: noun(subject), verb, noun(object)
def rule1(text):
doc = nlp(text)
sent = []
for token in doc:
# if the token is a verb
text = df3.loc[9,'Sent']
print(text)
doc = nlp(text)
displacy.render(doc, style='dep',jupyter=True)
# function to check output percentage for a rule
def output_per(df,out_col):
result = 0
for out in df[out_col]:
if len(out)!=0:
result+=1
per = result/len(df)
from random import randint
def rand_sent(df):
index = randint(0, len(df))
print('Index = ',index)
doc = nlp(df.loc[index,'Sent'][1:])
displacy.render(doc, style='dep',jupyter=True)
return index
row_list = []
# df2 contains all sentences from all speeches
for i in range(len(df2)):
sent = df2.loc[i,'Sent']
if (',' not in sent) and (len(sent.split()) <= 15):
year = df2.loc[i,'Year']
length = len(sent.split())
# rule to extract initiative name
def sent_subtree(text):
# pattern match for schemes or initiatives
patterns = [r'\b(?i)'+'plan'+r'\b',
r'\b(?i)'+'programme'+r'\b',
r'\b(?i)'+'scheme'+r'\b',
r'\b(?i)'+'campaign'+r'\b',
r'\b(?i)'+'initiative'+r'\b',
r'\b(?i)'+'conference'+r'\b',