Aniruddha Bhandari aniruddha27

Data Engineer

aniruddha27 / nlp_ie_22.py

Created June 5, 2020 21:44

aniruddha27 / nlp_ie_21.py

Created June 5, 2020 21:41

aniruddha27 / nlp_ie_20.py

Created June 5, 2020 21:33

	# create a df containing sentence and its output for rule 1
	row_list = []

	# df2 contains all the sentences from all the speeches
	for i in range(len(df2)):

	sent = df2.loc[i,'Sent']
	year = df2.loc[i,'Year']
	output = rule1(sent)
	dict1 = {'Year':year,'Sent':sent,'Output':output}

aniruddha27 / nlp_ie_19.py

Created June 5, 2020 21:32

	# create a df containing sentence and its output for rule 1
	row_list = []

	for i in range(len(df3)):

	sent = df3.loc[i,'Sent']
	year = df3.loc[i,'Year']
	output = rule1(sent)
	dict1 = {'Year':year,'Sent':sent,'Output':output}
	row_list.append(dict1)

aniruddha27 / nlp_ie_18.py

Last active June 28, 2020 17:14

	# function for rule 1: noun(subject), verb, noun(object)
	def rule1(text):

	doc = nlp(text)

	sent = []

	for token in doc:

	# if the token is a verb

aniruddha27 / nlp_ie_17.py

Created June 5, 2020 21:15

	text = df3.loc[9,'Sent']
	print(text)
	doc = nlp(text)
	displacy.render(doc, style='dep',jupyter=True)

aniruddha27 / nlp_ie_16.py

Created June 5, 2020 21:12

	# function to check output percentage for a rule
	def output_per(df,out_col):

	result = 0

	for out in df[out_col]:
	if len(out)!=0:
	result+=1

	per = result/len(df)

aniruddha27 / nlp_ie_15.py

Created June 5, 2020 21:12

	from random import randint
	def rand_sent(df):

	index = randint(0, len(df))
	print('Index = ',index)
	doc = nlp(df.loc[index,'Sent'][1:])
	displacy.render(doc, style='dep',jupyter=True)

	return index

aniruddha27 / nlp_ie_14.py

Created June 5, 2020 21:11

	row_list = []
	# df2 contains all sentences from all speeches
	for i in range(len(df2)):
	sent = df2.loc[i,'Sent']

	if (',' not in sent) and (len(sent.split()) <= 15):

	year = df2.loc[i,'Year']
	length = len(sent.split())

aniruddha27 / nlp_ie_13.py

Created June 5, 2020 21:03

	# rule to extract initiative name
	def sent_subtree(text):

	# pattern match for schemes or initiatives
	patterns = [r'\b(?i)'+'plan'+r'\b',
	r'\b(?i)'+'programme'+r'\b',
	r'\b(?i)'+'scheme'+r'\b',
	r'\b(?i)'+'campaign'+r'\b',
	r'\b(?i)'+'initiative'+r'\b',
	r'\b(?i)'+'conference'+r'\b',