Created
March 23, 2016 21:32
-
-
Save fscottfoti/a066a5ee6984a6c21573 to your computer and use it in GitHub Desktop.
script to turn parcels into binary h5 file
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from pymongo import MongoClient | |
from bson.objectid import ObjectId | |
import json | |
import time | |
from string import join | |
import pandas as pd | |
import cPickle | |
MONGO = True | |
JURIS = None | |
FEASIBILITY = True | |
cid = "ZC7yyAyA8jkDFnRtf" # parcels | |
csvname = "output/parcels.csv" | |
if FEASIBILITY: | |
cid = "hMm5FqbDCPa4ube6Y" # feasibility | |
csvname = "output/feasibility.csv" | |
if MONGO: | |
client = MongoClient() | |
#client.drop_database("baus") | |
db = client.togethermap | |
else: | |
outf = open("parcels.json", "w") | |
df = pd.read_csv(csvname, index_col="geom_id") | |
cnt = 0 | |
features = [] | |
print time.ctime() | |
def export_features(features): | |
global MONGO, db, outf | |
if MONGO: | |
db.places.insert_many(features) | |
else: | |
outf.write(join([json.dumps(f) for f in features], "\n")) | |
parcels = cPickle.load(open("output/parcels.pickle")) | |
for geom_id, geojson in parcels: | |
cnt += 1 | |
if cnt % 10000 == 0: | |
print "Done reading rec %d" % cnt | |
if len(features) == 10000: | |
print "Exporting 10k recs" | |
export_features(features) | |
print "Done exporting 10k recs" | |
features = [] | |
try: | |
rec = df.loc[geom_id] | |
except: | |
# don't need to keep it, it's not in parcels.csv | |
continue | |
if JURIS and rec["juris"] != JURIS: | |
continue | |
f = json.loads(geojson) | |
f["properties"] = rec.to_dict() | |
f["properties"]["geom_id"] = geom_id | |
del f["id"] | |
f["creatorUID"] = "ceTir2NKMN87Gq7wj" | |
f["creator"] = "Fletcher Foti" | |
f["createDate"] = "2015-08-29T05:10:00.446Z" | |
f["updateDate"] = "2015-08-29T05:10:00.446Z" | |
f["collectionId"] = cid | |
f['_id'] = str(ObjectId()) | |
f["post_count"] = 0 | |
features.append(f) | |
if len(features): | |
export_features(features) | |
print time.ctime() |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import fiona | |
from shapely.geometry import shape | |
import pandas as pd | |
import json | |
import time | |
from string import join | |
import cPickle | |
print time.ctime() | |
def add_bbox(p): | |
bounds = shape(p['geometry']).bounds | |
minx, miny, maxx, maxy = bounds | |
poly = { | |
"type": "Polygon", | |
"coordinates": [ | |
[ [minx, miny], [minx, maxy], [maxx, maxy], | |
[maxx, miny], [minx, miny] ] | |
] | |
} | |
p['bbox'] = poly | |
return p | |
store = pd.HDFStore("parcels.h5", "w") | |
features = [] | |
with fiona.drivers(): | |
with fiona.open('/home/ubuntu/data/parcels4326.shp') as shp: | |
for f in shp: | |
if f["geometry"] is None: | |
continue | |
geom_id = int(f["properties"]["GEOM_ID"]) | |
f["properties"] = {} | |
f = add_bbox(f) | |
features.append((geom_id, json.dumps(f))) | |
if len(features) % 5000 == 0: print len(features) | |
cPickle.dump(features, open("parcels.pickle", "w")) | |
#geomids, geojson = zip(*features) | |
#s = pd.Series(geojson, index=geomids) | |
#store["parcels"] = pd.DataFrame({"geojson": s}) | |
#store.close() | |
print time.ctime() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment