Last active
June 20, 2016 18:39
-
-
Save orsonadams/f938931d3832089b943b9976a798d588 to your computer and use it in GitHub Desktop.
Simple script I used to get make a map from ACS survey data variable names to their descriptive table names.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!bin/env/python | |
# Simple script I used to get make a map from ACS | |
# survey data variable names to the descriptive table names. | |
# It made my life a bit easier when searching for table names | |
# programmatically | |
# here is the ACS varibles site: | |
# http://api.census.gov/data/2014/acs5/variables.html | |
import os | |
import json | |
import subprocess | |
from bs4 import BeautifulSoup | |
#replace with you desired location | |
HOME = os.expanduser(~/) | |
HTML_FILE = os.path.join(HOME, "variables.html") | |
MAP_HOME = os.path.join(HOME, "census_table_varible_map.json") | |
# we're going to to chunk the parsed html so that we get a | |
# a row of the html table per chunk. 5 its the number of cell | |
# html table. See : http://api.census.gov/data/2014/acs5/variables.html | |
chunk_size = 5 | |
def chunker(l, n=5): | |
for i, _ in enumerate(l): | |
if i % n == 0: | |
yield l[i:, i+n] | |
site = "http://api.census.gov/data/2014/acs5/variables.html" | |
# run wget file will be saved under "variables.html" | |
cmd = subprocess.check_output(["wget", site, "-o", HTML_FILE]) | |
f = open(HTML_FILE, "r") | |
soup = BeautifulSoup(f.read(), "lxml") | |
# the the the table rows | |
tds = (td.text for td in soup.find_all("td")) | |
chunks = chunker(tds) | |
map = {} | |
# grab chunks of size 5 | |
for c in chunks: | |
map[c[0]] = {"col_name" : c[1], "table_name" : c[2]} | |
# write the map to disk | |
json.dump(map, open(MAP_HOME, "W")) | |
#done! | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment