|
#!/usr/bin/env bash |
|
set -euo pipefail |
|
GPKG_PATH_IN=$1 |
|
GPKG_PATH_OUT=$2 |
|
|
|
# requires libsqlite3-mod-spatialite version=>5.0.0-1 |
|
|
|
# init geopackage to reproject geoms with spatialite, see https://gis.stackexchange.com/questions/412884/spatialite-does-not-use-gpkg-spatial-ref-sys-in-gpkg-mode |
|
INIT_QUERY=" |
|
DROP TABLE IF EXISTS spatial_ref_sys; |
|
CREATE TABLE spatial_ref_sys ( |
|
srid INTEGER NOT NULL PRIMARY KEY, |
|
auth_name VARCHAR(256), |
|
auth_srid INTEGER, |
|
srtext VARCHAR(2048), |
|
proj4text VARCHAR(2048) |
|
); |
|
INSERT INTO spatial_ref_sys SELECT |
|
srs_id AS srid, |
|
organization AS auth_name, |
|
organization_coordsys_id AS auth_srid, |
|
definition AS srtext, |
|
NULL |
|
FROM gpkg_spatial_ref_sys; |
|
" |
|
|
|
function execute-query(){ |
|
query=$1 |
|
gpkg=$2 |
|
silent=${3:-true} |
|
query=" |
|
select load_extension('mod_spatialite'); |
|
select EnableGpkgMode(); |
|
${query} |
|
" |
|
if [[ $silent == "true" ]];then |
|
sqlite3 "$gpkg" "$query" > /dev/null |
|
else |
|
sqlite3 "$gpkg" "$query" |
|
fi |
|
} |
|
|
|
function get-columns(){ |
|
ddl="$1" |
|
re='^.*\s\((.*)\)$' |
|
if [[ $ddl =~ $re ]];then |
|
columns_ddl="${BASH_REMATCH[1]}" |
|
fi |
|
columns="" |
|
ifs_=$IFS |
|
IFS="," |
|
for column_ddl in $columns_ddl;do |
|
re='^\s?"(.*?)".*$' |
|
if [[ $column_ddl =~ $re ]];then |
|
column="${BASH_REMATCH[1]}" |
|
if [[ $column != "fid" ]] && [[ $column != "mygeohash" ]];then # filter out fid and geohash - fid need to be regenerated, geohash needs to be omitted |
|
columns="${columns}, \"${column}\"" |
|
fi |
|
fi |
|
done |
|
IFS=$ifs_ |
|
columns="${columns:2}" # removes first two chars |
|
echo "$columns" |
|
} |
|
|
|
function main(){ |
|
cp "$GPKG_PATH_IN" "$GPKG_PATH_OUT" |
|
execute-query "$INIT_QUERY" "$GPKG_PATH_OUT" |
|
for layer in $(execute-query "select table_name from gpkg_contents where data_type='features'" "$GPKG_PATH_OUT" "false");do |
|
ddl_bak_table=$( |
|
execute-query "select sql from sqlite_master where name='${layer}' and type='table';" "$GPKG_PATH_OUT" "false" | \ |
|
sed -e "s|${layer}|${layer}_bak|g" |
|
) # get ddl of table before adding geohash column |
|
execute-query "alter table ${layer} add column \"mygeohash\" STRING;" "$GPKG_PATH_OUT" |
|
execute-query "update ${layer} SET mygeohash = (ST_GeoHash(ST_Transform(geom, 4326), 10));" "$GPKG_PATH_OUT" # 10 is chosen as a good granularity to cluster around, depending on the granularity of the data this can be tweaked |
|
columns=$(get-columns "$ddl_bak_table") |
|
insert_query="insert into ${layer}_bak(${columns}) SELECT ${columns} FROM ${layer} order by mygeohash;" |
|
execute-query "$ddl_bak_table" "$GPKG_PATH_OUT" |
|
execute-query "$insert_query" "$GPKG_PATH_OUT" |
|
execute-query "drop table ${layer};" "$GPKG_PATH_OUT" |
|
execute-query "alter table ${layer}_bak rename to ${layer};" "$GPKG_PATH_OUT" |
|
done |
|
echo "INFO: succesfully geohashed GeoPackage: ${GPKG_PATH_OUT}" |
|
} |
|
|
|
main |