jsundram · December 28, 2015 13:39
diff --git a/README.md b/README.md
diff --git a/csv_to_shapes.py b/csv_to_shapes.py
 import csv
 from collections import defaultdict
 import json

 import shape_index


 def main():
    name = lambda l: l[3]
    shapes, names = shape_index.read_shapefile('./ca/ZillowNeighborhoods-CA', 4, name)
    idx = shape_index.make_index(shapes)
    
    with open('Street_Tree_List.csv') as f, \
         open('Street_Tree_List_Neighborly.csv', 'w') as g:
        r = csv.DictReader(f)
        w = csv.DictWriter(g, r.fieldnames + ['Neighborhood'])
        rows, parse_errors = 0, 0
        for row in r:
            try:
                lat = float(row['Latitude'])
                lng = float(row['Longitude'])
            except ValueError, e:
                parse_errors += 1
                continue
            rows += 1
            shape = shape_index.query_index(idx, shapes, lat, lng)
            row['Neighborhood'] = names.get(shape, None)
            w.writerow(row)
        print "%d rows succeeded, %d had errors" % (rows, parse_errors)
        print "Wrote Stree_Tree_List_Neighborly.csv"

    
 if __name__ == '__main__':
    main()
diff --git a/shape_index.py b/shape_index.py
 import json
 from collections import defaultdict
 from itertools import izip
 # To get rtree:
 # brew install spatialindex
 # pip install rtree
 from rtree import index
 # to get shapefile:
 # pip install pyshp
 import shapefile

 def read_shapefile(shape_dir, ixId=3, name=None):
    """ixId is the index of a field in the shaperecord that can be used as a numeric id."""
    sf = shapefile.Reader(shape_dir)
    shapes = {}
    shape_names = {}
    for (s, sr) in izip(sf.shapes(), sf.shapeRecords()):
        ix = int(sr.record[ixId])
        shapes[ix] = s
        if name:
            shape_names[ix] = name(sr.record)
    return shapes, shape_names

 def make_index(county_shapes):
    idx = index.Index()
    for (countyId, s) in county_shapes.iteritems():
        idx.insert(countyId, s.bbox)
    return idx

 def query_index(idx, shapes, lat, lng, e=.0000001):
    for hit in idx.intersection((lng - e, lat - e, lng + e, lat + e)):
        if hit_test(lng, lat, shapes[hit].points):
            return hit

 def hit_test(x, y, polygon):
    inside = False
    x1, y1 = polygon[0]
    for i in range(len(polygon)) + [0]:
        x2, y2 = polygon[i]
        if min(y1, y2) < y <= max(y1, y2) and x <= max(x1, x2):
            xints = (y-y1) * (x2-x1) / (y2-y1) + x1
            if x1 == x2 or x <= xints:
                inside = not inside
        x1, y1 = x2, y2
    return inside

 def process(your_data):
    shapes, names = read_shapefile('./us_county') # http://www2.census.gov/geo/tiger/TIGER2009/tl_2009_us_county.zip
    idx = make_index(shapes)
    
    summary = defaultdict(int)
    for (lat, lng) in your_data:
        countyId = query_index(idx, shapes, lat, lng)
        summary[countyId] += 1
    json.dump(summary, open('data.json', 'w')) # scale this later
	import csv
	from collections import defaultdict
	import json

	import shape_index


	def main():
	name = lambda l: l[3]
	shapes, names = shape_index.read_shapefile('./ca/ZillowNeighborhoods-CA', 4, name)
	idx = shape_index.make_index(shapes)

	with open('Street_Tree_List.csv') as f, \
	open('Street_Tree_List_Neighborly.csv', 'w') as g:
	r = csv.DictReader(f)
	w = csv.DictWriter(g, r.fieldnames + ['Neighborhood'])
	rows, parse_errors = 0, 0
	for row in r:
	try:
	lat = float(row['Latitude'])
	lng = float(row['Longitude'])
	except ValueError, e:
	parse_errors += 1
	continue
	rows += 1
	shape = shape_index.query_index(idx, shapes, lat, lng)
	row['Neighborhood'] = names.get(shape, None)
	w.writerow(row)
	print "%d rows succeeded, %d had errors" % (rows, parse_errors)
	print "Wrote Stree_Tree_List_Neighborly.csv"


	if __name__ == '__main__':
	main()
	import json
	from collections import defaultdict
	from itertools import izip
	# To get rtree:
	# brew install spatialindex
	# pip install rtree
	from rtree import index
	# to get shapefile:
	# pip install pyshp
	import shapefile

	def read_shapefile(shape_dir, ixId=3, name=None):
	"""ixId is the index of a field in the shaperecord that can be used as a numeric id."""
	sf = shapefile.Reader(shape_dir)
	shapes = {}
	shape_names = {}
	for (s, sr) in izip(sf.shapes(), sf.shapeRecords()):
	ix = int(sr.record[ixId])
	shapes[ix] = s
	if name:
	shape_names[ix] = name(sr.record)
	return shapes, shape_names

	def make_index(county_shapes):
	idx = index.Index()
	for (countyId, s) in county_shapes.iteritems():
	idx.insert(countyId, s.bbox)
	return idx

	def query_index(idx, shapes, lat, lng, e=.0000001):
	for hit in idx.intersection((lng - e, lat - e, lng + e, lat + e)):
	if hit_test(lng, lat, shapes[hit].points):
	return hit

	def hit_test(x, y, polygon):
	inside = False
	x1, y1 = polygon[0]
	for i in range(len(polygon)) + [0]:
	x2, y2 = polygon[i]
	if min(y1, y2) < y <= max(y1, y2) and x <= max(x1, x2):
	xints = (y-y1) * (x2-x1) / (y2-y1) + x1
	if x1 == x2 or x <= xints:
	inside = not inside
	x1, y1 = x2, y2
	return inside

	def process(your_data):
	shapes, names = read_shapefile('./us_county') # http://www2.census.gov/geo/tiger/TIGER2009/tl_2009_us_county.zip
	idx = make_index(shapes)

	summary = defaultdict(int)
	for (lat, lng) in your_data:
	countyId = query_index(idx, shapes, lat, lng)
	summary[countyId] += 1
	json.dump(summary, open('data.json', 'w')) # scale this later