Skip to content

Instantly share code, notes, and snippets.

@arbakker
Last active October 12, 2021 14:35
Show Gist options
  • Save arbakker/5d9efe24b06c0f6be799bfc3b2ec2f76 to your computer and use it in GitHub Desktop.
Save arbakker/5d9efe24b06c0f6be799bfc3b2ec2f76 to your computer and use it in GitHub Desktop.
Bash script to order features in GPGK by geohash using #spatialite, #bash, #sqlite3

Geohash GeoPackage

Bash script to reorder feature tables of GeoPackage based on Geohash. Requires sqlite3 and libsqlite3-mod-spatialite version => 5.0.1. To run in Docker:

wget https://service.pdok.nl/cbs/vk100/atom/v1_0/downloads/cbs_vk100_2020.zip
mkdir -p data
unzip cbs_vk100_2020.zip -d data && rm cbs_vk100_2020.zip
docker build . -t "spl" 
docker run -v $(pwd)/data:/data  spl /geohash-gpkg /data/cbs_vk100_2020.gpkg /data/cbs_vk100_2020-geohash.gpkg
FROM debian:sid-slim
# software-properties-common provides add-apt-repository
# which we need in order to install a more recent release
# of libsqlite3-mod-spatialite from the sid distribution
RUN apt-get update && \
apt-get install -y --no-install-recommends \
libsqlite3-mod-spatialite \
sqlite3 && \
apt-get remove -y software-properties-common && \
apt clean && \
rm -rf /var/lib/apt && \
rm -rf /var/lib/dpkg
ADD geohash-gpkg.sh /geohash-gpkg
#!/usr/bin/env bash
set -euo pipefail
GPKG_PATH_IN=$1
GPKG_PATH_OUT=$2
# requires libsqlite3-mod-spatialite version=>5.0.0-1
# init geopackage to reproject geoms with spatialite, see https://gis.stackexchange.com/questions/412884/spatialite-does-not-use-gpkg-spatial-ref-sys-in-gpkg-mode
INIT_QUERY="
DROP TABLE IF EXISTS spatial_ref_sys;
CREATE TABLE spatial_ref_sys (
srid INTEGER NOT NULL PRIMARY KEY,
auth_name VARCHAR(256),
auth_srid INTEGER,
srtext VARCHAR(2048),
proj4text VARCHAR(2048)
);
INSERT INTO spatial_ref_sys SELECT
srs_id AS srid,
organization AS auth_name,
organization_coordsys_id AS auth_srid,
definition AS srtext,
NULL
FROM gpkg_spatial_ref_sys;
"
function execute-query(){
query=$1
gpkg=$2
silent=${3:-true}
query="
select load_extension('mod_spatialite');
select EnableGpkgMode();
${query}
"
if [[ $silent == "true" ]];then
sqlite3 "$gpkg" "$query" > /dev/null
else
sqlite3 "$gpkg" "$query"
fi
}
function get-columns(){
ddl="$1"
re='^.*\s\((.*)\)$'
if [[ $ddl =~ $re ]];then
columns_ddl="${BASH_REMATCH[1]}"
fi
columns=""
ifs_=$IFS
IFS=","
for column_ddl in $columns_ddl;do
re='^\s?"(.*?)".*$'
if [[ $column_ddl =~ $re ]];then
column="${BASH_REMATCH[1]}"
if [[ $column != "fid" ]] && [[ $column != "mygeohash" ]];then # filter out fid and geohash - fid need to be regenerated, geohash needs to be omitted
columns="${columns}, \"${column}\""
fi
fi
done
IFS=$ifs_
columns="${columns:2}" # removes first two chars
echo "$columns"
}
function main(){
cp "$GPKG_PATH_IN" "$GPKG_PATH_OUT"
execute-query "$INIT_QUERY" "$GPKG_PATH_OUT"
for layer in $(execute-query "select table_name from gpkg_contents where data_type='features'" "$GPKG_PATH_OUT" "false");do
ddl_bak_table=$(
execute-query "select sql from sqlite_master where name='${layer}' and type='table';" "$GPKG_PATH_OUT" "false" | \
sed -e "s|${layer}|${layer}_bak|g"
) # get ddl of table before adding geohash column
execute-query "alter table ${layer} add column \"mygeohash\" STRING;" "$GPKG_PATH_OUT"
execute-query "update ${layer} SET mygeohash = (ST_GeoHash(ST_Transform(geom, 4326), 10));" "$GPKG_PATH_OUT" # 10 is chosen as a good granularity to cluster around, depending on the granularity of the data this can be tweaked
columns=$(get-columns "$ddl_bak_table")
insert_query="insert into ${layer}_bak(${columns}) SELECT ${columns} FROM ${layer} order by mygeohash;"
execute-query "$ddl_bak_table" "$GPKG_PATH_OUT"
execute-query "$insert_query" "$GPKG_PATH_OUT"
execute-query "drop table ${layer};" "$GPKG_PATH_OUT"
execute-query "alter table ${layer}_bak rename to ${layer};" "$GPKG_PATH_OUT"
done
echo "INFO: succesfully geohashed GeoPackage: ${GPKG_PATH_OUT}"
}
main
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment