Skip to content

Instantly share code, notes, and snippets.

@chris
Created January 10, 2023 20:20
Show Gist options
  • Save chris/a0274694d99e904921de3a797d7583b4 to your computer and use it in GitHub Desktop.
Save chris/a0274694d99e904921de3a797d7583b4 to your computer and use it in GitHub Desktop.
Script to import large raster data set in chunks via VRT files.
#!/bin/bash
#
# Script to produce tiled raster import of a large data source, so that we can
# import in chunks, and be able to handle failures. This takes the lat/long
# mins and maxes for a rectangular area, and a VRT tile size and produces VRTs
# for each tile. It also outputs a shell script to import those VRTs along with
# outputting the last "rid" (raster ID in DB) after completing a tile, so that
# if it fails, we know the items to delete (so we can restart the failed tile,
# and not have duplicate records from what was partially imported).
#
# See also blog writeup on this:
#
# Note, you will need to update table names, min/max and tile size values, etc.
#
# Note, this does the index creation and raster constraints after all data has been imported.
#
# Update these values for your desired DB, geographic dimensions, etc:
DBTABLE=north_america
PSQL="psql -p 5432 -U postgres -d my_database"
XMIN=-180
YMIN=13
XMAX=-50
YMAX=73
TILESIZE=10
VRTPREFIX=nageo_tile
SCRIPTFILE=import_${VRTPREFIX}.sh
RAST_TILE_SIZE="30x30"
FIRSTTILE=true
echo "#!/bin/bash
#
# Script to import tiles of raster data for geography range:
# XMIN: ${XMIN}
# YMIN: ${YMIN}
# XMAX: ${XMAX}
# YMAX: ${YMAX}
# tile size: ${TILESIZE}
# Importing into DB table: ${DBTABLE}
set -e
import_first_vrt () {
raster2pgsql -Y -s 4326 -e -t ${RAST_TILE_SIZE} \$1 ${DBTABLE} | ${PSQL} -q -v ON_ERROR_STOP=1
}
append_vrt () {
raster2pgsql -a -Y -s 4326 -e -t ${RAST_TILE_SIZE} \$1 ${DBTABLE} | ${PSQL} -q -v ON_ERROR_STOP=1
}
last_rid () {
${PSQL} -t -c \"SELECT MAX(rid) FROM ${DBTABLE};\"
}
notify () {
# TBD: some mechanism to notify you the tile is done - I use SMS
}
" > $SCRIPTFILE
numvrts=$((($XMAX-$XMIN)/$TILESIZE * ($YMAX-$YMIN)/$TILESIZE))
echo "Using database: ${PSQL}"
echo "Generating ${numvrts} VRTs and the import script..."
for (( y=$YMIN; y<$YMAX; y+=$TILESIZE )); do
for (( x=$XMIN; x<$XMAX; x+=$TILESIZE )); do
x2=$(($x+$TILESIZE))
y2=$(($y+$TILESIZE))
vrtfile=${VRTPREFIX}_${x}_${y}_${x2}_${y2}.vrt
gdalbuildvrt -te ${x}.0 ${y}.0 ${x2}.0 ${y2}.0 $vrtfile *.tif
echo "# Tile: ${vrtfile}" >> $SCRIPTFILE
if [ "$FIRSTTILE" = true ] ; then
echo import_first_vrt ${vrtfile} >> $SCRIPTFILE
else
echo append_vrt ${vrtfile} >> $SCRIPTFILE
fi
echo "echo Completed tile $vrtfile" >> $SCRIPTFILE
echo "echo -n last rid: " >> $SCRIPTFILE
echo "last_rid" >> $SCRIPTFILE
echo "notify \"Done with tile ${vrtfile}\"" >> $SCRIPTFILE
FIRSTTILE=false
done
done
echo "
# Complete
echo 'Completed all tiles.'
notify \"Done with all of data import!\"
" >> $SCRIPTFILE
echo "
echo 'Creating gist index...'
${PSQL} -c "CREATE INDEX ${DBTABLE}_st_convexhull_idx ON ${DBTABLE} USING gist (st_convexhull(rast));"
echo 'Done creating gist index on table.'
notify 'Done creating gist index on table.'
echo 'Setting raster constraints on table...'
${PSQL} -c \"SELECT AddRasterConstraints('${DBTABLE}', 'rast');\"
echo 'Done setting raster constraints on table.'
notify 'Done setting raster constraints on table.'
" >> $SCRIPTFILE
chmod +x $SCRIPTFILE
echo "Saved import script as: ${SCRIPTFILE}"
echo "VRT files are: ${VRTPREFIX}_*.vrt"
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment