Skip to content

Instantly share code, notes, and snippets.

@cjwinchester
Last active March 16, 2016 23:13
Show Gist options
  • Save cjwinchester/809c283b20ae712616e9 to your computer and use it in GitHub Desktop.
Save cjwinchester/809c283b20ae712616e9 to your computer and use it in GitHub Desktop.
How common is your city name? Use Census data to find out.
city=$1
if [[ -n "$city" ]]; then
echo "$city"
wget -q -O states.txt -nc http://www2.census.gov/geo/docs/reference/state.txt
echo "Fetched state file"
wget -q -O places.txt -nc http://www2.census.gov/geo/docs/reference/codes/files/national_places.txt
echo "Fetched national places file"
csvjoin -e "iso-8859-1" -d "|" -c 2,1 places.txt states.txt | csvcut -c 8,9,10,7,4,3 | csvsort -c 6 | csvformat -D "|" > places_with_state_names.txt
echo "Merged place file with states"
national_count=$(wc -l < places.txt)
joined_count=$(wc -l < places_with_state_names.txt)
if [[ $national_count -eq $joined_count ]]; then
echo "Line counts match: $national_count" | xargs
echo "Grabbing $city data ..."
fab getCities:"$city"
else
echo "Line counts don't match. Places: $national_count / Joined: $joined_count" | xargs
fi
else
echo "Pass a city name as an argument: sh city-getter.sh \"Rock Springs\""
fi
import os
import csv
import json
import fabric
import requests
from slugify import slugify
import time
fabric.state.output.status = False
def getCities(city_name):
base_api_url = "http://api.census.gov/data/2010/sf1?get=P0010001&for=place:"
mid_api_url = "&in=state:"
post_api_url = "&key=" + os.environ['CENSUS_API_KEY']
slugified_name = slugify(city_name).lower()
with open("places_with_state_names.txt", "rb") as infile, \
open(slugified_name + ".csv", "wb") as outfile:
reader = csv.DictReader(infile, delimiter='|')
headers = reader.fieldnames
headers.append('POP_2010')
writer = csv.DictWriter(outfile, fieldnames=headers)
writer.writeheader()
counter = 0
for row in reader:
if city_name.upper() in row['PLACENAME'].upper():
urlstring = (base_api_url + row['PLACEFP'] + mid_api_url +
row['STATE'] + post_api_url)
print "Fetching population data for", row['PLACENAME'], row['STATE_NAME']
r = requests.get(urlstring)
response = r.json()
pop_2010 = response[1][0]
row['POP_2010'] = pop_2010
writer.writerow(row)
counter += 1
time.sleep(1)
print "Found", counter, "matches"
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment