dfeldman · February 25, 2016 01:24
diff --git a/get_census_data.py b/get_census_data.py
 import Census
 # Get your own key!
 c=census.Census("b99ef7ede80606207d3a3836bafdc00dd90a244d")
 c.acs.zipcode('B19001_001E', 'zip:55406')

 # Available variables: http://api.census.gov/data/2014/acs5/variables.html
 # Get output like: 
 # [{u'zip code tabulation area': u'55406', u'B19001_001E': u'14900'}]
diff --git a/get_weather_data.py b/get_weather_data.py
 import pyzipcode, geopy.distance, json, requests

 zipcodes = pyzipcode.ZipCodeDatabase()

 def zip_to_lat_long(zip_code):
    try:
        return (zipcodes[zip_code].latitude, zipcodes[zip_code].longitude)
    except:
        return (123,0)

 def distance(pt1, pt2):
    return geopy.distance.vincenty(pt1, pt2).miles

 def find_closest_station(zip_code):
    stations = json.loads(open('stations.txt').read())[0]
    if zip_code in stations:
        return zip_code
    target_lat_long = zip_to_lat_long(zip_code)
    lat_longs = [ (z['lat'], z['long']) for z in stations]
    distances = [ distance(target_lat_long, x) for x in lat_longs ]
    closest_station = stations[ distances.index(min(distances)) ]
    print closest_station
    return closest_station['id']
    #return closest_zip_code

 def do_one_query(station, offset):
    url = "http://www.ncdc.noaa.gov/cdo-web/api/v2/data"

    querystring = {"datasetid":"GHCND","stationid":station,"startdate":"2016-01-01","enddate":"2016-02-01", "offset":offset, "limit":1000}

    headers = {
    'token': "szBhJoPZNaVEgCudZbVAPPkFrdEebPIt",
    }
    
    response = requests.request("GET", url, headers=headers, params=querystring)
    js=json.loads( response.text)
    print response.text
    if 'results' not in js: return []
    # Select a different attribute from : ftp://ftp.ncdc.noaa.gov/pub/data/ghcn/daily/readme.txt
    temps = [ (z['date'], float(z['value'])/10, z['datatype']) for z in js['results'] if z['datatype'] == "TMAX"]
    return temps
    
 station = find_closest_station('55406')
 print do_one_query(station, 0)

 # HINT: Temperatures are in Celsius degrees * 10
diff --git a/get_weather_stations.py b/get_weather_stations.py
 import requests, json

 url = "http://www.ncdc.noaa.gov/cdo-web/api/v2/stations"

 headers = {
    'token': "szBhJoPZNaVEgCudZbVAPPkFrdEebPIt",
    }


 def do_one_query(offset):
    querystring = {"locationcategoryid":"ZIP",
                   "sortfield":"name",
                   "limit":"1000",
                   "sortorder":"desc",
                   "datacategoryid":"TEMP",
                   "datasetid":"GHCND",
                   "startdate":"2015-01-01",
                   "enddate":"2015-03-01",
                   "offset":str(offset) }
    response = requests.request("GET", url, headers=headers, params=querystring)
    js = json.loads(response.text)
    if 'results' not in js: return []
    else: return [{'id':x['id'], 'lat':x['latitude'], 'long':x['longitude']} for x in js['results']]

 def do_all_queries():
    stations = []
    offset=0
    while True:
        result = do_one_query(offset)
        if result == []: break
        stations.append(result)
        offset += len(result)
        print "got ", len(result), "results"
    return stations

 stations = do_all_queries()
 with open('stations.txt', 'w') as f:
    f.write(json.dumps(stations))
diff --git a/real_scraping.py b/real_scraping.py
 import urllib2, BeautifulSoup
 page=urllib2.Request("http://www.citypages.com/event/will-durst-7994215")
 page.add_header('User-Agent', 'Mozilla/5.0 (Windows NT 6.1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/41.0.2228.0 Safari/537.36')
 opener=urllib2.build_opener()
 page_html=opener.open(page).read()
 soup = BeautifulSoup.BeautifulSoup(page_html)
 soup.find('div', {'class':'when'})
	import Census
	# Get your own key!
	c=census.Census("b99ef7ede80606207d3a3836bafdc00dd90a244d")
	c.acs.zipcode('B19001_001E', 'zip:55406')

	# Available variables: http://api.census.gov/data/2014/acs5/variables.html
	# Get output like:
	# [{u'zip code tabulation area': u'55406', u'B19001_001E': u'14900'}]
	import pyzipcode, geopy.distance, json, requests

	zipcodes = pyzipcode.ZipCodeDatabase()

	def zip_to_lat_long(zip_code):
	try:
	return (zipcodes[zip_code].latitude, zipcodes[zip_code].longitude)
	except:
	return (123,0)

	def distance(pt1, pt2):
	return geopy.distance.vincenty(pt1, pt2).miles

	def find_closest_station(zip_code):
	stations = json.loads(open('stations.txt').read())[0]
	if zip_code in stations:
	return zip_code
	target_lat_long = zip_to_lat_long(zip_code)
	lat_longs = [ (z['lat'], z['long']) for z in stations]
	distances = [ distance(target_lat_long, x) for x in lat_longs ]
	closest_station = stations[ distances.index(min(distances)) ]
	print closest_station
	return closest_station['id']
	#return closest_zip_code

	def do_one_query(station, offset):
	url = "http://www.ncdc.noaa.gov/cdo-web/api/v2/data"

	querystring = {"datasetid":"GHCND","stationid":station,"startdate":"2016-01-01","enddate":"2016-02-01", "offset":offset, "limit":1000}

	headers = {
	'token': "szBhJoPZNaVEgCudZbVAPPkFrdEebPIt",
	}

	response = requests.request("GET", url, headers=headers, params=querystring)
	js=json.loads( response.text)
	print response.text
	if 'results' not in js: return []
	# Select a different attribute from : ftp://ftp.ncdc.noaa.gov/pub/data/ghcn/daily/readme.txt
	temps = [ (z['date'], float(z['value'])/10, z['datatype']) for z in js['results'] if z['datatype'] == "TMAX"]
	return temps

	station = find_closest_station('55406')
	print do_one_query(station, 0)

	# HINT: Temperatures are in Celsius degrees * 10
	import requests, json

	url = "http://www.ncdc.noaa.gov/cdo-web/api/v2/stations"

	headers = {
	'token': "szBhJoPZNaVEgCudZbVAPPkFrdEebPIt",
	}


	def do_one_query(offset):
	querystring = {"locationcategoryid":"ZIP",
	"sortfield":"name",
	"limit":"1000",
	"sortorder":"desc",
	"datacategoryid":"TEMP",
	"datasetid":"GHCND",
	"startdate":"2015-01-01",
	"enddate":"2015-03-01",
	"offset":str(offset) }
	response = requests.request("GET", url, headers=headers, params=querystring)
	js = json.loads(response.text)
	if 'results' not in js: return []
	else: return [{'id':x['id'], 'lat':x['latitude'], 'long':x['longitude']} for x in js['results']]

	def do_all_queries():
	stations = []
	offset=0
	while True:
	result = do_one_query(offset)
	if result == []: break
	stations.append(result)
	offset += len(result)
	print "got ", len(result), "results"
	return stations

	stations = do_all_queries()
	with open('stations.txt', 'w') as f:
	f.write(json.dumps(stations))
	import urllib2, BeautifulSoup
	page=urllib2.Request("http://www.citypages.com/event/will-durst-7994215")
	page.add_header('User-Agent', 'Mozilla/5.0 (Windows NT 6.1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/41.0.2228.0 Safari/537.36')
	opener=urllib2.build_opener()
	page_html=opener.open(page).read()
	soup = BeautifulSoup.BeautifulSoup(page_html)
	soup.find('div', {'class':'when'})