Skip to content

Instantly share code, notes, and snippets.

@Youssef-Harby
Created October 2, 2024 09:57
Show Gist options
  • Save Youssef-Harby/3c9b14bda8dc4e7cac1f9ef7a9c195dc to your computer and use it in GitHub Desktop.
Save Youssef-Harby/3c9b14bda8dc4e7cac1f9ef7a9c195dc to your computer and use it in GitHub Desktop.
Overture Maps Data Downloader with Optional GeoJSON Clipping

Overture Maps Data Downloader with Optional GeoJSON Clipping

This Python script downloads geospatial data from Overture Maps, based on user-defined themes and data types (e.g., buildings, transportation). The script can download specific data within a given bounding box (bbox) and export it in GeoParquet or GeoPackage format.

Features:

Allows downloading multiple themes/types (e.g., buildings, transportation) using the overturemaps Python library.
Bounding box (bbox) filtering to limit data to a specific geographic extent.
Optionally clips geometries to the exact boundaries of a GeoJSON file, if provided.
Supports output in GeoParquet or GeoPackage formats.
Includes extensive logging to track the progress of downloads and data export.
Handles CRS (Coordinate Reference System) mismatches by auto-assigning CRS where necessary.

Usage:

Define your bbox, output format, and optional GeoJSON clipping path at the top of the script.
The script will iterate over specified themes and data types, fetching data and saving it to the defined output directory.

Dependencies:

overturemaps
geopandas
from overturemaps import core
from pathlib import Path
import logging
import sys
import geopandas as gpd
# Set up logging
logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s', handlers=[logging.StreamHandler(sys.stdout)])
logger = logging.getLogger()
# Define variables at the top
bbox = (37.13879,22.50602,42.27389,27.46549) # Example bbox
output_format = "geoparquet" # Define the desired output format ('geoparquet' or 'geojson')
output_dir = Path("output_directory") # Define the output directory for saving files
clip_geojson_path = None # Define the path to a GeoJSON file for clipping (set to None to disable clipping)
# Map of themes to their corresponding data types
theme_to_type = {
"addresses": ["address"],
"base": ["infrastructure", "land", "land_cover", "land_use", "water"],
"buildings": ["building", "building_part"],
"divisions": ["division", "division_area", "division_boundary"],
"places": ["place"],
"transportation": ["segment", "connector"]
}
# Ensure the output directory exists
output_dir.mkdir(parents=True, exist_ok=True)
# Load GeoJSON for clipping if defined
if clip_geojson_path:
try:
clip_gdf = gpd.read_file(clip_geojson_path)
logger.info(f"Loaded clipping GeoJSON from {clip_geojson_path}")
# Ensure the clip_gdf is in EPSG:4326
if clip_gdf.crs != "EPSG:4326":
clip_gdf = clip_gdf.to_crs("EPSG:4326")
logger.info(f"Reprojected clipping GeoJSON to EPSG:4326")
except Exception as e:
logger.error(f"Error loading GeoJSON for clipping: {e}")
clip_gdf = None
else:
clip_gdf = None
logger.info("No GeoJSON clipping file defined. Using full extent.")
# Function to download and optionally clip data using overturemaps
def download_overture_data(theme, data_type):
try:
logger.info(f"Downloading {theme} ({data_type}) data for bbox: {bbox}")
# Fetch data using the overturemaps library
gdf = core.geodataframe(data_type, bbox=bbox)
# Check if the GeoDataFrame has a CRS; assign EPSG:4326 if missing
if gdf.crs is None:
logger.warning(f"No CRS found for {theme} ({data_type}), assigning EPSG:4326")
gdf.set_crs("EPSG:4326", inplace=True)
logger.info(f"Fetched {len(gdf)} records for {theme} ({data_type})")
# Clip the data to the GeoJSON boundaries if provided
if clip_gdf is not None:
logger.info(f"Clipping {theme} ({data_type}) data to GeoJSON boundaries")
gdf = gpd.clip(gdf, clip_gdf)
logger.info(f"Clipped {theme} ({data_type}) to {len(gdf)} records after clipping")
# Define output file paths
output_path = output_dir / f"{theme}_{data_type}.{output_format}"
# Save the geodataframe to the chosen format
if output_format == "geoparquet":
logger.info(f"Saving {theme} ({data_type}) data to {output_path}")
gdf.to_parquet(output_path)
elif output_format == "gpkg":
logger.info(f"Saving {theme} ({data_type}) data to {output_path}")
gdf.to_file(output_path, driver="GPKG")
logger.info(f"Data for {theme} ({data_type}) saved successfully at {output_path}")
except Exception as e:
logger.error(f"Failed to download or process data for {theme}/{data_type}: {e}", exc_info=True)
# Iterate over all themes and export the data for each type
for theme, data_types in theme_to_type.items():
logger.info(f"Processing theme: {theme}")
for data_type in data_types:
logger.info(f"Starting download for {theme}/{data_type}")
download_overture_data(theme, data_type)
logger.info(f"Completed download for {theme}/{data_type}")
logger.info("All data downloaded and saved successfully.")
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment