parulagg27 · April 6, 2020 22:29
diff --git a/holidays.py b/holidays.py
 from selenium import webdriver
 from selenium.webdriver.support.ui import Select
 from selenium.webdriver.support.ui import WebDriverWait
 from selenium.webdriver.firefox.options import Options
 from bs4 import BeautifulSoup
 import numpy as np
 import pandas as pd
 import sys


 def parse_holidays(given_year, driver, final_list):

 	month_map = ['January', 'February', 'March', 'April', 'May', 'June', 'July', 'August', 'September', 'October', 'November', 'December']

 	for label in month_map:
 		month = Select(driver.find_element_by_id("drMonth"))
 		month.select_by_visible_text(label)

 		year = Select(driver.find_element_by_id("drYear"))
 		year.select_by_visible_text(given_year)

 		regional_office = Select(driver.find_element_by_name("drRegionalOffice"))
 		regional_office.select_by_value("0")

 		search_button = driver.find_element_by_id("btnGo")
 		search_button.click()

 		soup = BeautifulSoup(driver.page_source, 'html.parser')
 		table = soup.find_all('table')[0].find_all('tr')

 		final_list.append(find_common_holidays(table=table))

 	itemDict = {item[0]: item[1:] for item in final_list}
 	print (itemDict)


 def find_common_holidays(table):
 	holiday_list = []
 	for tr in table:
 		"""
 		Logic for reading the text from parsed holidays table row wise and generates a separate
 		list for entries in each row, i.e., for each state in table and finally creates a single
 		list for this list.
 		"""
 		cell = []
 		for td in tr('td'):
 			text = td(text=True)
 			cell.append(''.join(text))
 		holiday_list.append(cell)

 	df = pd.DataFrame(holiday_list)

 	# removes default headers from dataframe produced and uses first row,i.e., list containing holiday dates as headers
 	new_header = df.iloc[0]
 	df = df[1:]  # take the data less the header row
 	df.columns = new_header

 	# replaces blank space entries in table with NaN 
 	df.replace(u'\xa0', np.nan, inplace=True)

 	dates = []
 	for columns in df:
 		"""
 		Logic for finding those column headers in table with no NaN value in any of it's cell.
 		It first iterates over each column header of  table, checks if null value is present in
 		any of it's cells and appends empty list with only those headers having now NaN cell values.
 		"""
 		if not df[columns].isnull().any():
 			dates.append(str(columns))
 	return dates


 given_year = sys.argv[1]


 def run(given_year):
 	options = Options()
 	options.headless = True
 	driver = webdriver.Firefox(options=options)

 	driver.get("https://rbi.org.in/Scripts/HolidayMatrixDisplay.aspx")

 	final_list = []
 	parse_holidays(given_year=given_year, driver=driver, final_list=final_list)
 	driver.close()


 run(given_year)
diff --git a/requirements.txt b/requirements.txt
 beautifulsoup4==4.7.1
 numpy==1.16.4
 pandas==0.24.2
 selenium==3.141.0
	from selenium import webdriver
	from selenium.webdriver.support.ui import Select
	from selenium.webdriver.support.ui import WebDriverWait
	from selenium.webdriver.firefox.options import Options
	from bs4 import BeautifulSoup
	import numpy as np
	import pandas as pd
	import sys


	def parse_holidays(given_year, driver, final_list):

	month_map = ['January', 'February', 'March', 'April', 'May', 'June', 'July', 'August', 'September', 'October', 'November', 'December']

	for label in month_map:
	month = Select(driver.find_element_by_id("drMonth"))
	month.select_by_visible_text(label)

	year = Select(driver.find_element_by_id("drYear"))
	year.select_by_visible_text(given_year)

	regional_office = Select(driver.find_element_by_name("drRegionalOffice"))
	regional_office.select_by_value("0")

	search_button = driver.find_element_by_id("btnGo")
	search_button.click()

	soup = BeautifulSoup(driver.page_source, 'html.parser')
	table = soup.find_all('table')[0].find_all('tr')

	final_list.append(find_common_holidays(table=table))

	itemDict = {item[0]: item[1:] for item in final_list}
	print (itemDict)


	def find_common_holidays(table):
	holiday_list = []
	for tr in table:
	"""
	Logic for reading the text from parsed holidays table row wise and generates a separate
	list for entries in each row, i.e., for each state in table and finally creates a single
	list for this list.
	"""
	cell = []
	for td in tr('td'):
	text = td(text=True)
	cell.append(''.join(text))
	holiday_list.append(cell)

	df = pd.DataFrame(holiday_list)

	# removes default headers from dataframe produced and uses first row,i.e., list containing holiday dates as headers
	new_header = df.iloc[0]
	df = df[1:] # take the data less the header row
	df.columns = new_header

	# replaces blank space entries in table with NaN
	df.replace(u'\xa0', np.nan, inplace=True)

	dates = []
	for columns in df:
	"""
	Logic for finding those column headers in table with no NaN value in any of it's cell.
	It first iterates over each column header of table, checks if null value is present in
	any of it's cells and appends empty list with only those headers having now NaN cell values.
	"""
	if not df[columns].isnull().any():
	dates.append(str(columns))
	return dates


	given_year = sys.argv[1]


	def run(given_year):
	options = Options()
	options.headless = True
	driver = webdriver.Firefox(options=options)

	driver.get("https://rbi.org.in/Scripts/HolidayMatrixDisplay.aspx")

	final_list = []
	parse_holidays(given_year=given_year, driver=driver, final_list=final_list)
	driver.close()


	run(given_year)
	beautifulsoup4==4.7.1
	numpy==1.16.4
	pandas==0.24.2
	selenium==3.141.0