leonardreidy · October 8, 2016 20:32
diff --git a/Extract-Salary-Calculator-Metrocodes-from-HTML.py b/Extract-Salary-Calculator-Metrocodes-from-HTML.py
 # BeautifulSoup is used to extract data from a html source file
 from bs4 import BeautifulSoup

 # tablib is used to output the data to an Excel file with a minimum of hassle
 import tablib

 # open the file of interest, a file containing the html source for
 # the select/options tags you want to extract data from
 # INFILE is a placeholder for a filename
 infile = open(INFILE, 'r')
 soup = BeautifulSoup(infile)

 # find all option elements
 opts = soup.findAll('option')

 # create an empty dictionary to hold the option values and the select text as 
 # key/value pairs
 optsArr = {}

 # iterate through options elements and append key/value pairs extracted
 # from those elements to the new optsArr
 # the keys are extracted from the element text
 # the values are extracted from the option attribute values
 for i in opts:
  optsArr.update({i.text.encode('utf-8'):i['value']})
  
 # NOTE - at this point, the value in the resulting key/value pair is a 
 # string, not an integer as it ought to be if it is to be useful
 # the next step is to convert those strings to integers
 # But first we have to remove a superfluous element from the optsArr
 # dict, the Select text option which is not itself a choice but rather
 # an informational cue which will cause the subsequent code to throw 
 # an error

 del optsArr['Choose State/Metro ---------------->']
 for key,value in optsArr.iteritems():
  optsArr[key] = int(value)

 # the result of these steps should be a dictionary containing key/value 
 # pairs for which the following criteria are satisfied:
 # the keys are metro areas and the values are the integers that encode
 # them

 ################################################################################
 # The following step is important and I have isolated it because it is a       #
 # useful example of the use of tablib which I'm almost certainly going         #
 # to use again.                                                                #
 ################################################################################

 # define headers for the output file
 headers = ('metro_name', 'metro_code')

 # assign the dict above as a list of tuples to the data variable
 data = optsArr.items()

 # create a tablib table of tabular data with headers
 data = tablib.Dataset(*data, headers=headers)

 # export to Excel file
 open('salary-calc-metro-codes.xls','wb').write(data.xls)
	# BeautifulSoup is used to extract data from a html source file
	from bs4 import BeautifulSoup

	# tablib is used to output the data to an Excel file with a minimum of hassle
	import tablib

	# open the file of interest, a file containing the html source for
	# the select/options tags you want to extract data from
	# INFILE is a placeholder for a filename
	infile = open(INFILE, 'r')
	soup = BeautifulSoup(infile)

	# find all option elements
	opts = soup.findAll('option')

	# create an empty dictionary to hold the option values and the select text as
	# key/value pairs
	optsArr = {}

	# iterate through options elements and append key/value pairs extracted
	# from those elements to the new optsArr
	# the keys are extracted from the element text
	# the values are extracted from the option attribute values
	for i in opts:
	optsArr.update({i.text.encode('utf-8'):i['value']})

	# NOTE - at this point, the value in the resulting key/value pair is a
	# string, not an integer as it ought to be if it is to be useful
	# the next step is to convert those strings to integers
	# But first we have to remove a superfluous element from the optsArr
	# dict, the Select text option which is not itself a choice but rather
	# an informational cue which will cause the subsequent code to throw
	# an error

	del optsArr['Choose State/Metro ---------------->']
	for key,value in optsArr.iteritems():
	optsArr[key] = int(value)

	# the result of these steps should be a dictionary containing key/value
	# pairs for which the following criteria are satisfied:
	# the keys are metro areas and the values are the integers that encode
	# them

	################################################################################
	# The following step is important and I have isolated it because it is a #
	# useful example of the use of tablib which I'm almost certainly going #
	# to use again. #
	################################################################################

	# define headers for the output file
	headers = ('metro_name', 'metro_code')

	# assign the dict above as a list of tuples to the data variable
	data = optsArr.items()

	# create a tablib table of tabular data with headers
	data = tablib.Dataset(*data, headers=headers)

	# export to Excel file
	open('salary-calc-metro-codes.xls','wb').write(data.xls)