|
from overturemaps import core |
|
from pathlib import Path |
|
import logging |
|
import sys |
|
import geopandas as gpd |
|
|
|
# Set up logging |
|
logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s', handlers=[logging.StreamHandler(sys.stdout)]) |
|
logger = logging.getLogger() |
|
|
|
# Define variables at the top |
|
bbox = (37.13879,22.50602,42.27389,27.46549) # Example bbox |
|
output_format = "geoparquet" # Define the desired output format ('geoparquet' or 'geojson') |
|
output_dir = Path("output_directory") # Define the output directory for saving files |
|
clip_geojson_path = None # Define the path to a GeoJSON file for clipping (set to None to disable clipping) |
|
|
|
# Map of themes to their corresponding data types |
|
theme_to_type = { |
|
"addresses": ["address"], |
|
"base": ["infrastructure", "land", "land_cover", "land_use", "water"], |
|
"buildings": ["building", "building_part"], |
|
"divisions": ["division", "division_area", "division_boundary"], |
|
"places": ["place"], |
|
"transportation": ["segment", "connector"] |
|
} |
|
|
|
# Ensure the output directory exists |
|
output_dir.mkdir(parents=True, exist_ok=True) |
|
|
|
# Load GeoJSON for clipping if defined |
|
if clip_geojson_path: |
|
try: |
|
clip_gdf = gpd.read_file(clip_geojson_path) |
|
logger.info(f"Loaded clipping GeoJSON from {clip_geojson_path}") |
|
# Ensure the clip_gdf is in EPSG:4326 |
|
if clip_gdf.crs != "EPSG:4326": |
|
clip_gdf = clip_gdf.to_crs("EPSG:4326") |
|
logger.info(f"Reprojected clipping GeoJSON to EPSG:4326") |
|
except Exception as e: |
|
logger.error(f"Error loading GeoJSON for clipping: {e}") |
|
clip_gdf = None |
|
else: |
|
clip_gdf = None |
|
logger.info("No GeoJSON clipping file defined. Using full extent.") |
|
|
|
# Function to download and optionally clip data using overturemaps |
|
def download_overture_data(theme, data_type): |
|
try: |
|
logger.info(f"Downloading {theme} ({data_type}) data for bbox: {bbox}") |
|
|
|
# Fetch data using the overturemaps library |
|
gdf = core.geodataframe(data_type, bbox=bbox) |
|
|
|
# Check if the GeoDataFrame has a CRS; assign EPSG:4326 if missing |
|
if gdf.crs is None: |
|
logger.warning(f"No CRS found for {theme} ({data_type}), assigning EPSG:4326") |
|
gdf.set_crs("EPSG:4326", inplace=True) |
|
|
|
logger.info(f"Fetched {len(gdf)} records for {theme} ({data_type})") |
|
|
|
# Clip the data to the GeoJSON boundaries if provided |
|
if clip_gdf is not None: |
|
logger.info(f"Clipping {theme} ({data_type}) data to GeoJSON boundaries") |
|
gdf = gpd.clip(gdf, clip_gdf) |
|
logger.info(f"Clipped {theme} ({data_type}) to {len(gdf)} records after clipping") |
|
|
|
# Define output file paths |
|
output_path = output_dir / f"{theme}_{data_type}.{output_format}" |
|
|
|
# Save the geodataframe to the chosen format |
|
if output_format == "geoparquet": |
|
logger.info(f"Saving {theme} ({data_type}) data to {output_path}") |
|
gdf.to_parquet(output_path) |
|
elif output_format == "gpkg": |
|
logger.info(f"Saving {theme} ({data_type}) data to {output_path}") |
|
gdf.to_file(output_path, driver="GPKG") |
|
|
|
logger.info(f"Data for {theme} ({data_type}) saved successfully at {output_path}") |
|
|
|
except Exception as e: |
|
logger.error(f"Failed to download or process data for {theme}/{data_type}: {e}", exc_info=True) |
|
|
|
# Iterate over all themes and export the data for each type |
|
for theme, data_types in theme_to_type.items(): |
|
logger.info(f"Processing theme: {theme}") |
|
for data_type in data_types: |
|
logger.info(f"Starting download for {theme}/{data_type}") |
|
download_overture_data(theme, data_type) |
|
logger.info(f"Completed download for {theme}/{data_type}") |
|
|
|
logger.info("All data downloaded and saved successfully.") |