Skip to content

Instantly share code, notes, and snippets.

@clintonlunn
Created May 9, 2025 05:08
Show Gist options
  • Save clintonlunn/bcb765199fe63a3f39e5e7ef4f44f0ec to your computer and use it in GitHub Desktop.
Save clintonlunn/bcb765199fe63a3f39e5e7ef4f44f0ec to your computer and use it in GitHub Desktop.
    graph TD
        USER_NODE[("User<br>Submits Vector Data<br>(e.g., SHP, GPKG, GeoJSON, CSV, KML, FlatGeobuf, validated GeoParquet)")]

        subgraph "Cloud Ingestion & Data Lake Pipeline"
            FB_FUNCTION["Firebase Function (TS/Python)<br>Receives Upload<br>Validates & Routes Data<br>Orchestrates Formatting"]

            subgraph "File Formatting Firebase Function (Invoked by Firebase Function)"
                direction TB
                INPUT_DATA[("Received User Data<br>(Diverse Formats as submitted)")] 
                DUCKDB_PREP["DuckDB (Optional)<br>CSV/Tabular Pre-processing<br>for complex cases"]
                GDAL_CONVERTER["GDAL/OGR<br>Core Geospatial Conversion Engine<br>Handles SHP, GPKG, FGB, GeoJSON, CSV w/WKT, GeoParquet validation/standardization<br>Output: Standardized GeoParquet"]
            end
            
            STANDARDIZED_GP["Standardized GeoParquet Output"]
            
            GCS_STORAGE["Google Cloud Storage (GCS)<br>Stores Standardized GeoParquet Files"]
            
            BIGQUERY_CATALOG[("Google BigQuery<br>(Acts as Apache Iceberg Catalog)")]
            ICEBERG_TABLES["Apache Iceberg Tables<br>(GeoParquet on GCS, Managed by BigQuery Catalog)<br><br><i>Capabilities: Data Versioning/Time Travel, Auditability via Snapshots</i>"]

            DBT_TRANSFORM["dbt (Data Build Tool)<br>SQL-based Transformations on Iceberg Data via BigQuery<br><i>Handles: View/Materialized View Creation, Data Modeling, Testing</i>"]
            ANALYSIS_READY_DATA["Analysis-Ready Datasets<br>(Transformed Iceberg Tables / BigQuery Views)"]
        end

        subgraph "Data Consumption & Access Interfaces"
            direction TB 

            subgraph "Direct Query & Desktop GIS"
                direction LR
                ARCGIS_PRO[("ArcGIS Pro Desktop")]
                BQ_CONNECTOR["BigQuery Connector<br>(Direct Read-Only Query)"]
                EXPORT_PROCESS["Export Process<br>(BQ->GCS->GDAL->FGDB/SHP on GCS)"]
                DOWNLOADED_FILES[("Downloaded Files<br>(FGDB, SHP)")]
            end

            subgraph "Web Services & Online Platforms"
                direction TB
                subgraph "OGC Services via GeoServer (Existing Setup)"
                    direction LR
                    GEOSERVER_INSTANCE[("Existing GeoServer Instance")]
                    GEOSERVER_DATA_BACKEND[("GeoServer Data Backend<br><b>Option 1:</b> Direct BigQuery Query<br><b>Option 2:</b> Intermediate PostGIS DB")]
                    OGC_CLIENTS_GEO[("OGC Clients via GeoServer<br>(Web Maps, GIS Software)")]
                end
                subgraph "Feature Services & ArcGIS Online via KoopJS (Optional)"
                    direction LR
                    KOOPJS_SERVICE[("KoopJS Service<br>(Hosted on GCE/GKE/Cloud Run)")]
                    KOOP_DATA_BACKEND[("Koop Data Backend<br>e.g., BigQuery or GCS Exports")]
                    ARCGIS_ONLINE[("ArcGIS Online")]
                    WEB_APPS_KOOP[("Custom Web Apps<br>Consuming Feature Services")]
                end
            end
            
            OTHER_ANALYSIS_TOOLS[("Other Analysis Tools / Dashboards<br>(Querying BigQuery/Iceberg)")]
        end

        %% Define Connections and Data Flow
        USER_NODE -- Uploads Data --> FB_FUNCTION
        FB_FUNCTION -- Passes Data to --> INPUT_DATA
        
        INPUT_DATA -- If complex CSV requiring pre-processing --> DUCKDB_PREP
        DUCKDB_PREP -- Prepared Tabular Data --> GDAL_CONVERTER
        INPUT_DATA -- Geo-formats, Simple CSVs w/WKT, Validated GeoParquet --> GDAL_CONVERTER
        
        GDAL_CONVERTER -- Generates --> STANDARDIZED_GP
        
        STANDARDIZED_GP -- Stored in --> GCS_STORAGE
        GCS_STORAGE -- Data Files for --> ICEBERG_TABLES
        BIGQUERY_CATALOG -- Manages Metadata for --> ICEBERG_TABLES

        ICEBERG_TABLES -- "Raw" Source for --> DBT_TRANSFORM
        DBT_TRANSFORM -- Creates/Updates --> ANALYSIS_READY_DATA

        %% ArcGIS Pro Consumption
        ANALYSIS_READY_DATA -- Queried by (or raw Iceberg via BQ) --> BQ_CONNECTOR
        BQ_CONNECTOR -- Enables Direct Read in --> ARCGIS_PRO
        ANALYSIS_READY_DATA -- Data for --> EXPORT_PROCESS
        EXPORT_PROCESS -- Creates --> DOWNLOADED_FILES
        DOWNLOADED_FILES -- Used by --> ARCGIS_PRO

        %% GeoServer Consumption
        ANALYSIS_READY_DATA -- Data for (or raw Iceberg via BQ) --> GEOSERVER_DATA_BACKEND
        GEOSERVER_DATA_BACKEND -- Serves Data to --> GEOSERVER_INSTANCE
        GEOSERVER_INSTANCE -- Publishes OGC (WMS, WFS) --> OGC_CLIENTS_GEO
        ARCGIS_PRO -- Can also consume OGC --> OGC_CLIENTS_GEO 
        ARCGIS_ONLINE -- Can also consume OGC --> OGC_CLIENTS_GEO


        %% KoopJS / ArcGIS Online Consumption
        ANALYSIS_READY_DATA -- Data for (or raw Iceberg via BQ) --> KOOP_DATA_BACKEND
        KOOP_DATA_BACKEND -- Serves Data to --> KOOPJS_SERVICE
        KOOPJS_SERVICE -- Publishes Feature Services, etc. --> ARCGIS_ONLINE
        KOOPJS_SERVICE -- Also to --> WEB_APPS_KOOP
        ARCGIS_PRO -- Can also consume services from --> KOOPJS_SERVICE


        %% Other Analysis
        ANALYSIS_READY_DATA -- Queried by (or raw Iceberg via BQ) --> OTHER_ANALYSIS_TOOLS

        %% Styling (Optional - uncomment lines below and ensure renderer supports classes)
        classDef userNode fill:#E1EFFE,stroke:#2962FF,color:#000
        classDef firebaseNode fill:#FFF3E0,stroke:#FF9800,color:#000
        classDef formattingModule fill:#E0F2F1,stroke:#00796B,color:#000,border-radius:10px 
        classDef toolNode fill:#F5F5F5,stroke:#616161,color:#000
        classDef dataNode fill:#E3F2FD,stroke:#1E88E5,color:#000
        classDef storageNode fill:#FFFDE7,stroke:#FBC02D,color:#000
        classDef catalogNode fill:#FFEBEE,stroke:#D32F2F,color:#000
        classDef dbtNode fill:#E8EAF6,stroke:#3F51B5,color:#000
        classDef consumptionSubgraph fill:#FAFAFA,stroke:#BDBDBD,color:#000 
        classDef gisNode fill:#E8F5E9,stroke:#4CAF50,color:#000
        classDef accessMethodNode fill:#EDE7F6,stroke:#651FFF,color:#000
        classDef webServiceNode fill:#E0F7FA,stroke:#00ACC1,color:#000

        class USER_NODE userNode;
        class FB_FUNCTION firebaseNode;
        class INPUT_DATA,DUCKDB_PREP,GDAL_CONVERTER toolNode; 
        class STANDARDIZED_GP,ANALYSIS_READY_DATA,DOWNLOADED_FILES dataNode;
        class GCS_STORAGE storageNode;
        class BIGQUERY_CATALOG,ICEBERG_TABLES catalogNode;
        class DBT_TRANSFORM dbtNode;
        class ARCGIS_PRO,ARCGIS_ONLINE,OGC_CLIENTS_GEO,WEB_APPS_KOOP gisNode;
        class EXPORT_PROCESS,BQ_CONNECTOR,GEOSERVER_INSTANCE,GEOSERVER_DATA_BACKEND,KOOPJS_SERVICE,KOOP_DATA_BACKEND,OTHER_ANALYSIS_TOOLS accessMethodNode;
        class GEOSERVER_INSTANCE,KOOPJS_SERVICE webServiceNode;
Loading
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment