Created
March 27, 2021 03:57
-
-
Save allenday/049871a2c78f792afc0d363a0f0feda2 to your computer and use it in GitHub Desktop.
earth-engine-to-bigquery
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/bin/sh | |
CREDENTIALS="credentials.json" | |
COLLECTION="projects/earthengine-public/assets/WorldPop/GP/100m/pop" | |
YEAR=2020 | |
export "GOOGLE_APPLICATION_CREDENTIALS=$CREDENTIALS" | |
IMAGES=$(ogrinfo -ro -al "EEDA:" -oo "COLLECTION=$COLLECTION" -where "year=$YEAR" | grep 'gdal_dataset (String) = ' | cut -d '=' -f2 | tr -d ' ') | |
for IMAGE in $IMAGES | |
do | |
NAME=$(basename "$IMAGE") | |
echo "Fetching Image: $IMAGE -> ${NAME}" | |
#gdal_translate -of NetCDF "$IMAGE" "${NAME}.nc" | |
gdal_translate "$IMAGE" "${NAME}.tif" | |
done |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env python3 | |
# See: https://medium.com/@lakshmanok/how-to-load-geojson-files-into-bigquery-gis-9dc009802fb4 | |
import os | |
import sys | |
import json | |
#import glob | |
#for fname in glob.glob('*.geojson'): | |
for fname in sys.argv[1:]: | |
name = os.path.splitext(fname)[0] | |
print (fname, name) | |
with open(fname, 'r') as ifp: | |
with open(f'{name}.json', 'w') as ofp: | |
features = json.load(ifp)['features'] | |
# new-line-separated JSON | |
for obj in features: | |
props = obj['properties'] | |
props['geometry'] = json.dumps({ | |
'type': 'Polygon', | |
'coordinates': obj['geometry']['coordinates'][0] | |
}) | |
print(props, file=ofp) |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env python3 | |
import os | |
import sys | |
#import glob | |
import xarray as xr | |
import numpy as np | |
import geopandas as gpd | |
from shapely.geometry import box | |
import json | |
#from geolib import geohash | |
import geohash | |
# check geohash accuracy | |
# PostGIS example from https://postgis.net/docs/ST_GeoHash.html | |
assert (geohash.encode(48, -126, 20) == 'c0w3hf1s70w3hf1s70w3') | |
# Google BigQuery example from https://cloud.google.com/bigquery/docs/reference/standard-sql/geography_functions#st_geohash | |
assert (geohash.encode(47.62, -122.35, 10) == 'c22yzugqw7') | |
#for fname in glob.glob('*.tif'): | |
for fname in sys.argv[1:]: | |
name = os.path.splitext(fname)[0] | |
print (fname, name) | |
with xr.open_rasterio(fname).squeeze(drop=True) as da: | |
# define raster resolution | |
yres, xres = da.res | |
# split too big datasets to limit RAM consumption | |
size = da.y.shape[0] | |
chunksize = 1000 | |
chunks = int(np.round(size/chunksize + 0.5)) | |
print (f'{chunks} chunks:', end=' ', flush=True) | |
for chunk in range(chunks): | |
print (chunk+1, end=' ', flush=True) | |
# convert dataarray chunk to dataframe | |
df = da[chunk*chunksize:(chunk+1)*chunksize].to_dataframe(name='population').reset_index() | |
# drop NoDATA values | |
df = df[df['population']>0] | |
# further processing impossible when dataframe is empty (no valid pixels) | |
if len(df) == 0: | |
continue | |
# add some attributes | |
df['id'] = name | |
# calculate geohash for point geometry | |
df['geohash'] = df.apply(lambda row: geohash.encode(row.y, row.x, 10), axis=1) | |
# convert point geometry to bounding box | |
# box(minx, miny, maxx, maxy, ccw=True) | |
df['geometry'] = df.apply(lambda row: box(row.x-xres, row.y-yres, row.x+xres, row.y+yres), axis=1) | |
# remove separate coordinates x, y | |
del df['x'] | |
del df['y'] | |
# convert dataframe to geodataframe and reproject to global decimal degree coordinates | |
gdf = gpd.GeoDataFrame(df).set_crs(da.crs.replace('+init=','')).to_crs('epsg:4326') | |
# save to file as geojson | |
gdf.to_file(f'{name}_{chunk+1:02}.geojson', driver='GeoJSON', index=False) | |
print ('', flush=True) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment