Created
January 10, 2023 20:20
-
-
Save chris/a0274694d99e904921de3a797d7583b4 to your computer and use it in GitHub Desktop.
Script to import large raster data set in chunks via VRT files.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/bin/bash | |
# | |
# Script to produce tiled raster import of a large data source, so that we can | |
# import in chunks, and be able to handle failures. This takes the lat/long | |
# mins and maxes for a rectangular area, and a VRT tile size and produces VRTs | |
# for each tile. It also outputs a shell script to import those VRTs along with | |
# outputting the last "rid" (raster ID in DB) after completing a tile, so that | |
# if it fails, we know the items to delete (so we can restart the failed tile, | |
# and not have duplicate records from what was partially imported). | |
# | |
# See also blog writeup on this: | |
# | |
# Note, you will need to update table names, min/max and tile size values, etc. | |
# | |
# Note, this does the index creation and raster constraints after all data has been imported. | |
# | |
# Update these values for your desired DB, geographic dimensions, etc: | |
DBTABLE=north_america | |
PSQL="psql -p 5432 -U postgres -d my_database" | |
XMIN=-180 | |
YMIN=13 | |
XMAX=-50 | |
YMAX=73 | |
TILESIZE=10 | |
VRTPREFIX=nageo_tile | |
SCRIPTFILE=import_${VRTPREFIX}.sh | |
RAST_TILE_SIZE="30x30" | |
FIRSTTILE=true | |
echo "#!/bin/bash | |
# | |
# Script to import tiles of raster data for geography range: | |
# XMIN: ${XMIN} | |
# YMIN: ${YMIN} | |
# XMAX: ${XMAX} | |
# YMAX: ${YMAX} | |
# tile size: ${TILESIZE} | |
# Importing into DB table: ${DBTABLE} | |
set -e | |
import_first_vrt () { | |
raster2pgsql -Y -s 4326 -e -t ${RAST_TILE_SIZE} \$1 ${DBTABLE} | ${PSQL} -q -v ON_ERROR_STOP=1 | |
} | |
append_vrt () { | |
raster2pgsql -a -Y -s 4326 -e -t ${RAST_TILE_SIZE} \$1 ${DBTABLE} | ${PSQL} -q -v ON_ERROR_STOP=1 | |
} | |
last_rid () { | |
${PSQL} -t -c \"SELECT MAX(rid) FROM ${DBTABLE};\" | |
} | |
notify () { | |
# TBD: some mechanism to notify you the tile is done - I use SMS | |
} | |
" > $SCRIPTFILE | |
numvrts=$((($XMAX-$XMIN)/$TILESIZE * ($YMAX-$YMIN)/$TILESIZE)) | |
echo "Using database: ${PSQL}" | |
echo "Generating ${numvrts} VRTs and the import script..." | |
for (( y=$YMIN; y<$YMAX; y+=$TILESIZE )); do | |
for (( x=$XMIN; x<$XMAX; x+=$TILESIZE )); do | |
x2=$(($x+$TILESIZE)) | |
y2=$(($y+$TILESIZE)) | |
vrtfile=${VRTPREFIX}_${x}_${y}_${x2}_${y2}.vrt | |
gdalbuildvrt -te ${x}.0 ${y}.0 ${x2}.0 ${y2}.0 $vrtfile *.tif | |
echo "# Tile: ${vrtfile}" >> $SCRIPTFILE | |
if [ "$FIRSTTILE" = true ] ; then | |
echo import_first_vrt ${vrtfile} >> $SCRIPTFILE | |
else | |
echo append_vrt ${vrtfile} >> $SCRIPTFILE | |
fi | |
echo "echo Completed tile $vrtfile" >> $SCRIPTFILE | |
echo "echo -n last rid: " >> $SCRIPTFILE | |
echo "last_rid" >> $SCRIPTFILE | |
echo "notify \"Done with tile ${vrtfile}\"" >> $SCRIPTFILE | |
FIRSTTILE=false | |
done | |
done | |
echo " | |
# Complete | |
echo 'Completed all tiles.' | |
notify \"Done with all of data import!\" | |
" >> $SCRIPTFILE | |
echo " | |
echo 'Creating gist index...' | |
${PSQL} -c "CREATE INDEX ${DBTABLE}_st_convexhull_idx ON ${DBTABLE} USING gist (st_convexhull(rast));" | |
echo 'Done creating gist index on table.' | |
notify 'Done creating gist index on table.' | |
echo 'Setting raster constraints on table...' | |
${PSQL} -c \"SELECT AddRasterConstraints('${DBTABLE}', 'rast');\" | |
echo 'Done setting raster constraints on table.' | |
notify 'Done setting raster constraints on table.' | |
" >> $SCRIPTFILE | |
chmod +x $SCRIPTFILE | |
echo "Saved import script as: ${SCRIPTFILE}" | |
echo "VRT files are: ${VRTPREFIX}_*.vrt" |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment