Skip to content

Instantly share code, notes, and snippets.

@dfeldman
Last active February 25, 2016 01:24
Show Gist options
  • Save dfeldman/1c7d2298a6ef22fa6510 to your computer and use it in GitHub Desktop.
Save dfeldman/1c7d2298a6ef22fa6510 to your computer and use it in GitHub Desktop.
analyzethis-example
import Census
# Get your own key!
c=census.Census("b99ef7ede80606207d3a3836bafdc00dd90a244d")
c.acs.zipcode('B19001_001E', 'zip:55406')
# Available variables: http://api.census.gov/data/2014/acs5/variables.html
# Get output like:
# [{u'zip code tabulation area': u'55406', u'B19001_001E': u'14900'}]
import pyzipcode, geopy.distance, json, requests
zipcodes = pyzipcode.ZipCodeDatabase()
def zip_to_lat_long(zip_code):
try:
return (zipcodes[zip_code].latitude, zipcodes[zip_code].longitude)
except:
return (123,0)
def distance(pt1, pt2):
return geopy.distance.vincenty(pt1, pt2).miles
def find_closest_station(zip_code):
stations = json.loads(open('stations.txt').read())[0]
if zip_code in stations:
return zip_code
target_lat_long = zip_to_lat_long(zip_code)
lat_longs = [ (z['lat'], z['long']) for z in stations]
distances = [ distance(target_lat_long, x) for x in lat_longs ]
closest_station = stations[ distances.index(min(distances)) ]
print closest_station
return closest_station['id']
#return closest_zip_code
def do_one_query(station, offset):
url = "http://www.ncdc.noaa.gov/cdo-web/api/v2/data"
querystring = {"datasetid":"GHCND","stationid":station,"startdate":"2016-01-01","enddate":"2016-02-01", "offset":offset, "limit":1000}
headers = {
'token': "szBhJoPZNaVEgCudZbVAPPkFrdEebPIt",
}
response = requests.request("GET", url, headers=headers, params=querystring)
js=json.loads( response.text)
print response.text
if 'results' not in js: return []
# Select a different attribute from : ftp://ftp.ncdc.noaa.gov/pub/data/ghcn/daily/readme.txt
temps = [ (z['date'], float(z['value'])/10, z['datatype']) for z in js['results'] if z['datatype'] == "TMAX"]
return temps
station = find_closest_station('55406')
print do_one_query(station, 0)
# HINT: Temperatures are in Celsius degrees * 10
import requests, json
url = "http://www.ncdc.noaa.gov/cdo-web/api/v2/stations"
headers = {
'token': "szBhJoPZNaVEgCudZbVAPPkFrdEebPIt",
}
def do_one_query(offset):
querystring = {"locationcategoryid":"ZIP",
"sortfield":"name",
"limit":"1000",
"sortorder":"desc",
"datacategoryid":"TEMP",
"datasetid":"GHCND",
"startdate":"2015-01-01",
"enddate":"2015-03-01",
"offset":str(offset) }
response = requests.request("GET", url, headers=headers, params=querystring)
js = json.loads(response.text)
if 'results' not in js: return []
else: return [{'id':x['id'], 'lat':x['latitude'], 'long':x['longitude']} for x in js['results']]
def do_all_queries():
stations = []
offset=0
while True:
result = do_one_query(offset)
if result == []: break
stations.append(result)
offset += len(result)
print "got ", len(result), "results"
return stations
stations = do_all_queries()
with open('stations.txt', 'w') as f:
f.write(json.dumps(stations))
import urllib2, BeautifulSoup
page=urllib2.Request("http://www.citypages.com/event/will-durst-7994215")
page.add_header('User-Agent', 'Mozilla/5.0 (Windows NT 6.1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/41.0.2228.0 Safari/537.36')
opener=urllib2.build_opener()
page_html=opener.open(page).read()
soup = BeautifulSoup.BeautifulSoup(page_html)
soup.find('div', {'class':'when'})
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment