Skip to content

Instantly share code, notes, and snippets.

View aniruddha27's full-sized avatar

Aniruddha Bhandari aniruddha27

View GitHub Profile
# selecting non-empty outputs
df_show2 = pd.DataFrame(columns=df_rule2_all.columns)
for row in range(len(df_rule2_all)):
if len(df_rule2_all.loc[row,'Output'])!=0:
df_show2 = df_show2.append(df_rule2_all.loc[row,:])
# reset the index
df_show2.reset_index(inplace=True)
# create a df containing sentence and its output for rule 2
row_list = []
# df2 contains all the sentences from all the speeches
for i in range(len(df2)):
sent = df2.loc[i,'Sent']
year = df2.loc[i,'Year']
output = rule2(sent)
dict1 = {'Year':year,'Sent':sent,'Output':output}
# create a df containing sentence and its output for rule 2
row_list = []
for i in range(len(df3)):
sent = df3.loc[i,'Sent']
year = df3.loc[i,'Year']
# rule
output = rule2(sent)
# separate subject, verb and object
verb_dict = dict()
dis_dict = dict()
dis_list = []
# iterating over all the sentences
for i in range(len(df_show)):
# sentence containing the output
# selecting non-empty output rows
df_show = pd.DataFrame(columns=df_rule1_all.columns)
for row in range(len(df_rule1_all)):
if len(df_rule1_all.loc[row,'Output'])!=0:
df_show = df_show.append(df_rule1_all.loc[row,:])
# reset the index
df_show.reset_index(inplace=True)
# create a dataframe containing sentences
df2 = pd.DataFrame(columns=['Sent','Year','Len'])
row_list = []
for i in range(len(df)):
for sent in df.loc[i,'sent']:
wordcount = len(sent.split())
year = df.loc[i,'Year']
# rule 3 function
def rule3_mod(text):
doc = nlp(text)
sent = []
for token in doc:
if token.pos_=='ADP':
# rule 0
def rule0(text, index):
doc = nlp(text)
token = doc[index]
entity = ''
for sub_tok in token.children:
# rule 3 function
def rule3(text):
doc = nlp(text)
sent = []
for token in doc:
# look for prepositions
# rule 1 modified function
def rule1_mod(text):
doc = nlp(text)
sent = []
for token in doc:
# root word
if (token.pos_=='VERB'):