This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# selecting non-empty outputs | |
df_show2 = pd.DataFrame(columns=df_rule2_all.columns) | |
for row in range(len(df_rule2_all)): | |
if len(df_rule2_all.loc[row,'Output'])!=0: | |
df_show2 = df_show2.append(df_rule2_all.loc[row,:]) | |
# reset the index | |
df_show2.reset_index(inplace=True) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# create a df containing sentence and its output for rule 2 | |
row_list = [] | |
# df2 contains all the sentences from all the speeches | |
for i in range(len(df2)): | |
sent = df2.loc[i,'Sent'] | |
year = df2.loc[i,'Year'] | |
output = rule2(sent) | |
dict1 = {'Year':year,'Sent':sent,'Output':output} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# create a df containing sentence and its output for rule 2 | |
row_list = [] | |
for i in range(len(df3)): | |
sent = df3.loc[i,'Sent'] | |
year = df3.loc[i,'Year'] | |
# rule | |
output = rule2(sent) | |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# separate subject, verb and object | |
verb_dict = dict() | |
dis_dict = dict() | |
dis_list = [] | |
# iterating over all the sentences | |
for i in range(len(df_show)): | |
# sentence containing the output |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# selecting non-empty output rows | |
df_show = pd.DataFrame(columns=df_rule1_all.columns) | |
for row in range(len(df_rule1_all)): | |
if len(df_rule1_all.loc[row,'Output'])!=0: | |
df_show = df_show.append(df_rule1_all.loc[row,:]) | |
# reset the index | |
df_show.reset_index(inplace=True) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# create a dataframe containing sentences | |
df2 = pd.DataFrame(columns=['Sent','Year','Len']) | |
row_list = [] | |
for i in range(len(df)): | |
for sent in df.loc[i,'sent']: | |
wordcount = len(sent.split()) | |
year = df.loc[i,'Year'] |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# rule 3 function | |
def rule3_mod(text): | |
doc = nlp(text) | |
sent = [] | |
for token in doc: | |
if token.pos_=='ADP': |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# rule 0 | |
def rule0(text, index): | |
doc = nlp(text) | |
token = doc[index] | |
entity = '' | |
for sub_tok in token.children: |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# rule 3 function | |
def rule3(text): | |
doc = nlp(text) | |
sent = [] | |
for token in doc: | |
# look for prepositions |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# rule 1 modified function | |
def rule1_mod(text): | |
doc = nlp(text) | |
sent = [] | |
for token in doc: | |
# root word | |
if (token.pos_=='VERB'): |