Last active
August 29, 2015 14:27
-
-
Save cjwinchester/ae44b6b16b2f1597636a to your computer and use it in GitHub Desktop.
Programatic-ish access to budget summary data for every Nebraska political subdivision, transposed or jsonified into something a human can use.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| """ | |
| Pings the Nebraska Auditor of Public Accounts' budget database -- http://www.nebraska.gov/auditor/reports/index.cgi?budget=1 -- and returns a cleaner, transposed csv (or JSON) of summary budget data for every political subdivision in Nebraska. | |
| ~~ ARGUMENTS ~~ | |
| outfile: name of file to write to | |
| delimit: delimiter for outfile, or pass "json" to return JSON | |
| budget_year_list: list of budget years you want to grab data for, in the form ["2013-2014", "2014-2015"] | |
| """ | |
| from mechanize import Browser | |
| import csv | |
| import json | |
| def getBudgetDump(outfile, delimit, budget_year_list = []): | |
| mech = Browser() | |
| mech.addheaders = [('User-agent', 'Mozilla/5.0 (X11; U; Linux i686; en-US; rv:1.9.0.1) Gecko/2008071615 Fedora/3.0.1-1.fc9 Firefox/3.0.1')] | |
| mech.set_handle_robots(False) | |
| baseurl = "http://www.nebraska.gov/auditor/reports/index.cgi?budget=1" | |
| page = mech.open(baseurl) | |
| html = page.read() | |
| mech.select_form("queryform") | |
| countyselect = mech.form.find_control("countyselect") | |
| counties = [] | |
| for thing in countyselect.items: | |
| counties.append(thing.name) | |
| mech["countyselect"] = counties | |
| mech["year"] = budget_year_list | |
| mech.form.find_control("csv").readonly = False | |
| mech["csv"] = "1" | |
| response = mech.submit() | |
| cleanrows = [] | |
| for thing in response.readlines(): | |
| cleanrows.append(thing.replace('=HYPERLINK("','').replace('")','').replace(" County","")) | |
| rows = csv.reader(cleanrows, delimiter=',') | |
| transposed = zip(*rows) | |
| if delimit.lower() != "json": | |
| with open(outfile + ".txt", 'wb') as subs: | |
| csv.writer(subs, delimiter=delimit).writerows(transposed) | |
| else: | |
| ls = [] | |
| for row in transposed[1:]: | |
| dict = {} | |
| dict['county'] = row[0] | |
| dict['subdivision'] = row[1] | |
| dict['name'] = row[2] | |
| dict['year'] = row[3] | |
| dict['pdf'] = row[4] | |
| dict['property_tax_principal_int_on_bonds'] = row[5] | |
| dict['property_tax_other'] = row[6] | |
| dict['total_property_tax_request'] = row[7] | |
| dict['valuation'] = row[8] | |
| dict['outstanding_debt_principal'] = row[9] | |
| dict['outstanding_debt_interest'] = row[10] | |
| dict['outstanding_debt_total'] = row[11] | |
| dict['total_resources'] = row[12] | |
| dict['total_disbursments_transfers'] = row[13] | |
| dict['unused_budget_authority'] = row[14] | |
| ls.append(dict) | |
| with open(outfile + ".json", 'wb') as subs: | |
| subs.write(json.dumps(ls)) | |
| getBudgetDump("budget-dump", "|", ["2014-2015"]) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment