Last active
March 17, 2022 13:56
-
-
Save michimau/cc72010eaf7c15d2e8b57a7de694861e to your computer and use it in GitHub Desktop.
gis data ingestion into citus postgresql cluster for nomad job usage
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/bin/bash | |
DBHOST=$NOMAD_META_db_host | |
DBNAME=$NOMAD_META_db_name | |
DBPORT=$NOMAD_META_db_port | |
DBUSER=$NOMAD_META_db_user | |
DBPASSWORD=$NOMAD_META_db_password | |
DBSCHEMA=$NOMAD_META_db_schema | |
FILEURL=$NOMAD_META_fileurl | |
wget $NOMAD_META_fileurl -P /tmp | |
FILEPATH=/tmp/`basename $NOMAD_META_fileurl` | |
CHECK_DISK_FREE_SPACE = false | |
#DBSCHEMA=a_multi_test | |
#FILEPATH=/dataset/RiverBasinDistrict/RiverBasinDistrict.shp | |
SHARDSNUMBER=32 | |
REPLICATIONFACTOR=4 | |
PG_USE_COPY=YES | |
#display_usage() { | |
# echo "Usage example: importsqlite.sh SCHEMANAME FILEPATHNAME" | |
# echo -e "\nUsage: $0 [arguments] \n" | |
#} | |
# | |
#if [ $# -le 1 ] | |
# then | |
# display_usage | |
# exit 1 | |
#fi | |
function import_raster { | |
#echo inside import_raster | |
LAYER_LOWERCASE=$(echo $LAYER | tr '[:upper:]' '[:lower:]' | tr "-" "_") | |
gdal_translate -of GTIFF $FILEPATH -oo TABLE=$LAYER $LAYER.tif | |
raster2pgsql -I -C -M -F -t 100x100 $LAYER.tif $DBSCHEMA.$LAYER_LOWERCASE > $LAYER.sql | |
PGPASSWORD=$DBPASSWORD psql -v ON_ERROR_STOP=ON -U $DBUSER -p $DBPORT -h $DBHOST -d $DBNAME -f $LAYER.sql | |
rm $LAYER.tif $LAYER.sql | |
# echo DISTRIBUTING TABLE \"$DBSCHEMA\".$LAYER_LOWERCASE | |
# PGPASSWORD=$DBPASSWORD psql -q -U $DBUSER -p $DBPORT -h $DBHOST -d $DBNAME -c "SET citus.shard_replication_factor = $REPLICATIONFACTOR; SET citus.shard_count=$SHARDSNUMBER; SELECT create_distributed_table('$DBSCHEMA.$LAYER_LOWERCASE', 'rid');" > /dev/null | |
# echo TRUNCATING LOCAL TABLE $DBSCHEMA.$LAYER_LOWERCASE | |
# PGPASSWORD=$DBPASSWORD psql -q -U $DBUSER -p $DBPORT -h $DBHOST -d $DBNAME -c "SELECT truncate_local_data_after_distributing_table('$DBSCHEMA.$LAYER_LOWERCASE');" > /dev/null | |
} | |
function distribute_table () { | |
echo | |
echo DISTRIBUTING TABLE $DBSCHEMA.$LAYER_LOWERCASE | |
PGPASSWORD=$DBPASSWORD psql -q -U $DBUSER -p $DBPORT -h $DBHOST -d $DBNAME -c "SET citus.shard_replication_factor = $REPLICATIONFACTOR; SET citus.shard_count = $SHARDSNUMBER; SELECT create_distributed_table('$DBSCHEMA.$LAYER_LOWERCASE', '$1');" | |
echo REBALANCING TABLE $DBSCHEMA.$LAYER_LOWERCASE | |
psql -v -U $DBUSER -p $DBPORT -h $DBHOST $DBNAME -c "SELECT rebalance_table_shards('$DBSCHEMA.$LAYER_LOWERCASE', rebalance_strategy := 'by_disk_size');" | |
#echo | |
#echo TRUNCATING LOCAL TABLE $DBSCHEMA.$LAYER_LOWERCASE | |
#PGPASSWORD=$DBPASSWORD psql -q -U $DBUSER -p $DBPORT -h $DBHOST -d $DBNAME -c "SELECT truncate_local_data_after_distributing_table('$DBSCHEMA.$LAYER_LOWERCASE');" | |
echo | |
if [ "$1" = "ogc_fid" ]; then | |
echo CREATING INDEX: $LAYERNAME_wkb_geometry_geom_idx ON TABLE $DBSCHEMA.$LAYER_LOWERCASE | |
PGPASSWORD=$DBPASSWORD psql -q -U $DBUSER -p $DBPORT -h $DBHOST -d $DBNAME -c "CREATE INDEX $LAYER_LOWERCASE_wkb_geometry_geom_idx ON \"$DBSCHEMA\".$LAYER_LOWERCASE USING gist (wkb_geometry)" | |
fi | |
#psql -U $DBUSER -p $DBPORT -h $DBHOST $DBN-d $DBNAME -c "VACUUM ANALYZE $DBSCHEMA.$LAYER_LOWERCASE;" | |
echo DONE WITH CREATING INDEX AND VACUUMING | |
} | |
echo PROCESSING $FILEPATH | |
PGPASSWORD=$DBPASSWORD psql -q -U $DBUSER -p $DBPORT -h $DBHOST -d $DBNAME -c "CREATE SCHEMA IF NOT EXISTS \"$DBSCHEMA\"" > /dev/null | |
#GIS LAYERS PROCESSING | |
LAYERS=$(ogrinfo -oo LIST_ALL_TABLES=YES $FILEPATH | grep ": " | awk '{print $2}' | tr ' ' '\n' | sed "/Open/d") | |
if [ ! -z "$LAYERS" ] | |
then | |
echo GIS LAYERS: $LAYERS | |
for LAYER in $LAYERS; do | |
LAYER_LOWERCASE=$(echo $LAYER | tr '[:upper:]' '[:lower:]' | tr "-" "_") | |
[[ $LAYER_LOWERCASE =~ ^(spatialindex|geom_cols_ref_sys|geometry_columns|geometry_columns_auth|spatial_ref_sys|spatialite_history|sqlite_sequence|views_geometry_columns|virts_geometry_columns)$ ]] && continue | |
#echo LAYER: $LAYER | |
#echo | |
EXISTS=$(PGPASSWORD=$DBPASSWORD psql -q -U $DBUSER -p $DBPORT -h $DBHOST -d $DBNAME -c "SELECT EXISTS ( SELECT FROM pg_tables WHERE schemaname='$DBSCHEMA' AND tablename='$LAYER_LOWERCASE')" | grep 't' | xargs) | |
echo GIS EXISTS=$EXISTS | |
if [ ! -z "$EXISTS" ] | |
then | |
echo OGR2OGR-ING LAYER $LAYER FROM $FILEPATH | |
echo | |
ogr2ogr -progress -f PostgreSQL PG:"dbname='$DBNAME' host='$DBHOST' port='$DBPORT' user='$DBUSER' password='$DBPASSWORD'" -gt 65536 -makevalid -dim XY -lco GEOMETRY_NAME=wkb_geometry -lco FID=ogc_fid -lco PRECISION=N | |
O -lco PG_USE_COPY=YES -lco SCHEMA=$DBSCHEMA -nlt PROMOTE_TO_MULTI $FILEPATH $LAYER | |
#-overwrite | |
distribute_table "ogc_fid" | |
fi | |
done | |
fi | |
#RASTER LAYERS PROCESSING | |
echo gdalinfo $FILEPATH | |
LAYERS=$(gdalinfo $FILEPATH | grep : | grep DATASET | awk -F':' ' { print $NF } ') | |
echo RASTER LAYERS $LAYERS | |
if [ ! -z "$LAYERS" ] | |
then | |
for LAYER in $LAYERS; do | |
LAYER_LOWERCASE=$(echo $LAYER | tr '[:upper:]' '[:lower:]' | tr "-" "_") | |
#echo LAYER_LOWERCASE=$LAYER_LOWERCASE | |
EXISTS=$(PGPASSWORD=$DBPASSWORD psql -q -U $DBUSER -p $DBPORT -h $DBHOST -d $DBNAME -c "SELECT EXISTS ( SELECT FROM pg_tables WHERE schemaname='$DBSCHEMA' AND tablename='$LAYER_LOWERCASE')" | grep ' t' | xargs) | |
#echo EXISTS=$EXISTS | |
#[[ $EXISTS =~ ^(t)$ ]] && continue | |
if [ -z "$EXISTS" ] | |
then | |
echo GDAIL-INGO $DBSCHEMA.$LAYER | |
echo raster from gdpg/sqlite | |
import_raster | |
fi | |
distribute_table "rid" | |
done | |
fi | |
#PURE RASTER IMPORT | |
PURERASTER=$(ogrinfo $FILEPATH | grep PROJCRS) | |
echo PURERASTER=$PURERASTER | |
if [ -z "$PURERASTER" ] | |
then | |
echo "inside pureraster" | |
LAYER=`basename $FILEPATH | sed 's/\(.*\)\..*/\1/'` | |
echo LAYER=$LAYER | |
LAYER_LOWERCASE=$(echo $LAYER | tr '[:upper:]' '[:lower:]' | tr "-" "_") | |
echo LAYER_LOWERCASE=$LAYER_LOWERCASE | |
EXISTS=$(PGPASSWORD=$DBPASSWORD psql -q -U $DBUSER -p $DBPORT -h $DBHOST -d $DBNAME -c "SELECT EXISTS ( SELECT FROM pg_tables WHERE schemaname='$DBSCHEMA' AND tablename='$LAYER_LOWERCASE')" | grep ' t' | xargs) | |
echo EXISTS=$EXISTS | |
if [ -z "$EXISTS" ] | |
then | |
echo GDAl-ING $DBSCHEMA.$LAYER | |
import_raster | |
fi | |
distribute_table "rid" | |
fi | |
PGPASSWORD=$DBPASSWORD psql -q -U $DBUSER -p $DBPORT -h $DBHOST -d $DBNAME -c "SELECT Populate_Geometry_Columns('\"$DBSCHEMA\".\"$LAYER_LOWERCASE\"'::regclass);" > /dev/null | |
PGPASSWORD=$DBPASSWORD psql -q -U $DBUSER -p $DBPORT -h $DBHOST -d $DBNAME -c "GRANT USAGE ON SCHEMA \"$DBSCHEMA\" TO gr_gis_sdi_ldap" > /dev/null | |
PGPASSWORD=$DBPASSWORD psql -q -U $DBUSER -p $DBPORT -h $DBHOST -d $DBNAME -c "SELECT run_command_on_workers(\$cmd\$ GRANT USAGE ON SCHEMA \"$DBSCHEMA\" TO gr_gis_sdi_ldap \$cmd\$)" > /dev/null | |
PGPASSWORD=$DBPASSWORD psql -q -U $DBUSER -p $DBPORT -h $DBHOST -d $DBNAME -c "GRANT SELECT ON ALL TABLES IN SCHEMA \"$DBSCHEMA\" TO gr_gis_sdi_ldap" > /dev/null | |
PGPASSWORD=$DBPASSWORD psql -q -U $DBUSER -p $DBPORT -h $DBHOST -d $DBNAME -c "SELECT run_command_on_workers(\$cmd\$ GRANT SELECT ON ALL TABLES IN SCHEMA \"$DBSCHEMA\" TO gr_gis_sdi_ldap \$cmd\$)" > /dev/null |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment