Use the gist: https://gist.github.com/jsundram/1918435
on this data: https://data.sfgov.org/Public-Works/Street-Tree-List/tkzw-k3nq
shapefiles from here: http://www.zillow.com/howto/api/neighborhood-boundaries.htm
minor changes were required.
Use the gist: https://gist.github.com/jsundram/1918435
on this data: https://data.sfgov.org/Public-Works/Street-Tree-List/tkzw-k3nq
shapefiles from here: http://www.zillow.com/howto/api/neighborhood-boundaries.htm
minor changes were required.
| import csv | |
| from collections import defaultdict | |
| import json | |
| import shape_index | |
| def main(): | |
| name = lambda l: l[3] | |
| shapes, names = shape_index.read_shapefile('./ca/ZillowNeighborhoods-CA', 4, name) | |
| idx = shape_index.make_index(shapes) | |
| with open('Street_Tree_List.csv') as f, \ | |
| open('Street_Tree_List_Neighborly.csv', 'w') as g: | |
| r = csv.DictReader(f) | |
| w = csv.DictWriter(g, r.fieldnames + ['Neighborhood']) | |
| rows, parse_errors = 0, 0 | |
| for row in r: | |
| try: | |
| lat = float(row['Latitude']) | |
| lng = float(row['Longitude']) | |
| except ValueError, e: | |
| parse_errors += 1 | |
| continue | |
| rows += 1 | |
| shape = shape_index.query_index(idx, shapes, lat, lng) | |
| row['Neighborhood'] = names.get(shape, None) | |
| w.writerow(row) | |
| print "%d rows succeeded, %d had errors" % (rows, parse_errors) | |
| print "Wrote Stree_Tree_List_Neighborly.csv" | |
| if __name__ == '__main__': | |
| main() |
| import json | |
| from collections import defaultdict | |
| from itertools import izip | |
| # To get rtree: | |
| # brew install spatialindex | |
| # pip install rtree | |
| from rtree import index | |
| # to get shapefile: | |
| # pip install pyshp | |
| import shapefile | |
| def read_shapefile(shape_dir, ixId=3, name=None): | |
| """ixId is the index of a field in the shaperecord that can be used as a numeric id.""" | |
| sf = shapefile.Reader(shape_dir) | |
| shapes = {} | |
| shape_names = {} | |
| for (s, sr) in izip(sf.shapes(), sf.shapeRecords()): | |
| ix = int(sr.record[ixId]) | |
| shapes[ix] = s | |
| if name: | |
| shape_names[ix] = name(sr.record) | |
| return shapes, shape_names | |
| def make_index(county_shapes): | |
| idx = index.Index() | |
| for (countyId, s) in county_shapes.iteritems(): | |
| idx.insert(countyId, s.bbox) | |
| return idx | |
| def query_index(idx, shapes, lat, lng, e=.0000001): | |
| for hit in idx.intersection((lng - e, lat - e, lng + e, lat + e)): | |
| if hit_test(lng, lat, shapes[hit].points): | |
| return hit | |
| def hit_test(x, y, polygon): | |
| inside = False | |
| x1, y1 = polygon[0] | |
| for i in range(len(polygon)) + [0]: | |
| x2, y2 = polygon[i] | |
| if min(y1, y2) < y <= max(y1, y2) and x <= max(x1, x2): | |
| xints = (y-y1) * (x2-x1) / (y2-y1) + x1 | |
| if x1 == x2 or x <= xints: | |
| inside = not inside | |
| x1, y1 = x2, y2 | |
| return inside | |
| def process(your_data): | |
| shapes, names = read_shapefile('./us_county') # http://www2.census.gov/geo/tiger/TIGER2009/tl_2009_us_county.zip | |
| idx = make_index(shapes) | |
| summary = defaultdict(int) | |
| for (lat, lng) in your_data: | |
| countyId = query_index(idx, shapes, lat, lng) | |
| summary[countyId] += 1 | |
| json.dump(summary, open('data.json', 'w')) # scale this later |