cjwinchester · August 29, 2015 14:27
diff --git a/ne-auditor-budget-dump.py b/ne-auditor-budget-dump.py
 """

 Pings the Nebraska Auditor of Public Accounts' budget database -- http://www.nebraska.gov/auditor/reports/index.cgi?budget=1 -- and returns a cleaner, transposed csv (or JSON) of summary budget data for every political subdivision in Nebraska.

 ~~ ARGUMENTS ~~
 outfile: name of file to write to
 delimit: delimiter for outfile, or pass "json" to return JSON
 budget_year_list: list of budget years you want to grab data for, in the form ["2013-2014", "2014-2015"]

 """

 from mechanize import Browser
 import csv
 import json

 def getBudgetDump(outfile, delimit, budget_year_list = []):
    mech = Browser()
    mech.addheaders = [('User-agent', 'Mozilla/5.0 (X11; U; Linux i686; en-US; rv:1.9.0.1) Gecko/2008071615 Fedora/3.0.1-1.fc9 Firefox/3.0.1')]
    mech.set_handle_robots(False)

    baseurl = "http://www.nebraska.gov/auditor/reports/index.cgi?budget=1"

    page = mech.open(baseurl)
    html = page.read()
    
    mech.select_form("queryform")

    countyselect = mech.form.find_control("countyselect")
    counties = []
    for thing in countyselect.items:
        counties.append(thing.name)

    mech["countyselect"] = counties
    mech["year"] = budget_year_list
    
    mech.form.find_control("csv").readonly = False
    mech["csv"] = "1"

    response = mech.submit()
    cleanrows = []
    
    for thing in response.readlines():
        cleanrows.append(thing.replace('=HYPERLINK("','').replace('")','').replace(" County",""))
    
    rows = csv.reader(cleanrows, delimiter=',')
    transposed = zip(*rows)
    
    if delimit.lower() != "json":
        with open(outfile + ".txt", 'wb') as subs:
            csv.writer(subs, delimiter=delimit).writerows(transposed)
    else:
        ls = []
        for row in transposed[1:]:
            dict = {}
            dict['county'] = row[0]
            dict['subdivision'] = row[1]
            dict['name'] = row[2]
            dict['year'] = row[3]
            dict['pdf'] = row[4]
            dict['property_tax_principal_int_on_bonds'] = row[5]
            dict['property_tax_other'] = row[6]
            dict['total_property_tax_request'] = row[7]
            dict['valuation'] = row[8]
            dict['outstanding_debt_principal'] = row[9]
            dict['outstanding_debt_interest'] = row[10]
            dict['outstanding_debt_total'] = row[11]
            dict['total_resources'] = row[12]
            dict['total_disbursments_transfers'] = row[13]
            dict['unused_budget_authority'] = row[14]
            ls.append(dict)

        with open(outfile + ".json", 'wb') as subs:
            subs.write(json.dumps(ls))

 getBudgetDump("budget-dump", "|", ["2014-2015"])
	"""

	Pings the Nebraska Auditor of Public Accounts' budget database -- http://www.nebraska.gov/auditor/reports/index.cgi?budget=1 -- and returns a cleaner, transposed csv (or JSON) of summary budget data for every political subdivision in Nebraska.

	~~ ARGUMENTS ~~
	outfile: name of file to write to
	delimit: delimiter for outfile, or pass "json" to return JSON
	budget_year_list: list of budget years you want to grab data for, in the form ["2013-2014", "2014-2015"]

	"""

	from mechanize import Browser
	import csv
	import json

	def getBudgetDump(outfile, delimit, budget_year_list = []):
	mech = Browser()
	mech.addheaders = [('User-agent', 'Mozilla/5.0 (X11; U; Linux i686; en-US; rv:1.9.0.1) Gecko/2008071615 Fedora/3.0.1-1.fc9 Firefox/3.0.1')]
	mech.set_handle_robots(False)

	baseurl = "http://www.nebraska.gov/auditor/reports/index.cgi?budget=1"

	page = mech.open(baseurl)
	html = page.read()

	mech.select_form("queryform")

	countyselect = mech.form.find_control("countyselect")
	counties = []
	for thing in countyselect.items:
	counties.append(thing.name)

	mech["countyselect"] = counties
	mech["year"] = budget_year_list

	mech.form.find_control("csv").readonly = False
	mech["csv"] = "1"

	response = mech.submit()
	cleanrows = []

	for thing in response.readlines():
	cleanrows.append(thing.replace('=HYPERLINK("','').replace('")','').replace(" County",""))

	rows = csv.reader(cleanrows, delimiter=',')
	transposed = zip(*rows)

	if delimit.lower() != "json":
	with open(outfile + ".txt", 'wb') as subs:
	csv.writer(subs, delimiter=delimit).writerows(transposed)
	else:
	ls = []
	for row in transposed[1:]:
	dict = {}
	dict['county'] = row[0]
	dict['subdivision'] = row[1]
	dict['name'] = row[2]
	dict['year'] = row[3]
	dict['pdf'] = row[4]
	dict['property_tax_principal_int_on_bonds'] = row[5]
	dict['property_tax_other'] = row[6]
	dict['total_property_tax_request'] = row[7]
	dict['valuation'] = row[8]
	dict['outstanding_debt_principal'] = row[9]
	dict['outstanding_debt_interest'] = row[10]
	dict['outstanding_debt_total'] = row[11]
	dict['total_resources'] = row[12]
	dict['total_disbursments_transfers'] = row[13]
	dict['unused_budget_authority'] = row[14]
	ls.append(dict)

	with open(outfile + ".json", 'wb') as subs:
	subs.write(json.dumps(ls))

	getBudgetDump("budget-dump", "\|", ["2014-2015"])