Created
November 5, 2024 08:50
-
-
Save thedivtagguy/d2f58da8a81a954c14eb734f5c2d0c35 to your computer and use it in GitHub Desktop.
Easier way to download stuff from the Microsoft Buildings dataset. Find your quadkey here: https://aman.bh/blog/2023/building-footprints
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import pandas as pd | |
import geopandas as gpd | |
from shapely.geometry import shape, box | |
import json | |
import inquirer | |
import argparse | |
from typing import Optional, Tuple, Dict | |
import sys | |
import webbrowser | |
import re | |
def parse_bbox_string(bbox_str: str) -> Optional[Tuple[float, float, float, float]]: | |
""" | |
Parse a bounding box string from bboxfinder.com | |
Accepts formats like: | |
- 78.359299,17.402408,78.568039,17.615883 | |
- Long,Lat: (78.359299,17.402408) (78.568039,17.615883) | |
""" | |
# Try simple comma-separated format | |
if ',' in bbox_str and '(' not in bbox_str: | |
try: | |
coords = [float(x.strip()) for x in bbox_str.split(',')] | |
if len(coords) == 4: | |
return tuple(coords) | |
except ValueError: | |
pass | |
# Try bboxfinder's "Long,Lat" format | |
match = re.search(r'\(([0-9.-]+),([0-9.-]+)\)\s*\(([0-9.-]+),([0-9.-]+)\)', bbox_str) | |
if match: | |
try: | |
coords = [float(x) for x in match.groups()] | |
return (coords[0], coords[1], coords[2], coords[3]) | |
except ValueError: | |
pass | |
return None | |
def prompt_for_bounds_with_bboxfinder() -> Optional[Tuple[float, float, float, float]]: | |
"""Interactive prompt for bounding box coordinates using bboxfinder.com.""" | |
print("\nOpening bboxfinder.com in your default browser...") | |
print("Instructions:") | |
print("1. Draw a rectangle on the map to select your area") | |
print("2. Copy the coordinates from the 'Box' field at the bottom") | |
print("3. Paste them here when ready") | |
webbrowser.open('http://bboxfinder.com') | |
while True: | |
bbox_input = input("\nPaste the coordinates from bboxfinder.com (or type 'cancel' to skip): ").strip() | |
if bbox_input.lower() == 'cancel': | |
return None | |
bounds = parse_bbox_string(bbox_input) | |
if bounds is None: | |
print("Invalid coordinate format. Please copy the coordinates exactly as shown in bboxfinder.com") | |
print("Expected format: longitude,latitude,longitude,latitude") | |
print("Example: 78.359299,17.402408,78.568039,17.615883") | |
continue | |
# Validate that min is less than max | |
if bounds[0] >= bounds[2] or bounds[1] >= bounds[3]: | |
print("Error: Minimum coordinates must be less than maximum coordinates") | |
continue | |
return bounds | |
def get_available_regions() -> list: | |
"""Fetch list of available regions from the dataset.""" | |
try: | |
dataset_links = pd.read_csv("https://minedbuildings.blob.core.windows.net/global-buildings/dataset-links.csv") | |
return sorted(dataset_links['Location'].unique()) | |
except Exception as e: | |
print(f"Error fetching regions: {e}") | |
return ['India', 'United States', 'Canada', 'Australia'] # Fallback list | |
def process_buildings(region: str, quadkey: int, bounds: Optional[Tuple] = None) -> gpd.GeoDataFrame: | |
""" | |
Process building footprints for a given region and quadkey, optionally cropping to a bounding box. | |
""" | |
try: | |
# Load dataset links | |
dataset_links = pd.read_csv("https://minedbuildings.blob.core.windows.net/global-buildings/dataset-links.csv") | |
# Select tile | |
selectedTile = dataset_links[(dataset_links.Location == region) & (dataset_links.QuadKey == quadkey)] | |
if selectedTile.empty: | |
raise ValueError(f"No data found for region {region} and quadkey {quadkey}") | |
# Process selected tile | |
for _, row in selectedTile.iterrows(): | |
print(f"Loading data from {row.Url}...") | |
df = pd.read_json(row.Url, lines=True) | |
print("Converting geometries...") | |
geometries = [] | |
for idx, geom_row in df.iterrows(): | |
try: | |
geom = shape(geom_row['geometry']) | |
geometries.append(geom) | |
except Exception as e: | |
print(f"Warning: Skipping invalid geometry at index {idx}: {e}") | |
continue | |
print("Creating GeoDataFrame...") | |
gdf = gpd.GeoDataFrame(geometry=geometries, crs=4326) | |
if bounds: | |
print("Cropping to bounding box...") | |
bbox = box(*bounds) | |
gdf = gdf[gdf.intersects(bbox)].copy() | |
gdf['geometry'] = gdf.geometry.intersection(bbox) | |
gdf = gdf[~gdf.geometry.is_empty] | |
# Generate output filename | |
output_filename = f"{region.lower().replace(' ', '_')}_{quadkey}" | |
if bounds: | |
output_filename += f"_cropped" | |
output_filename += ".geojson" | |
print(f"Writing results to {output_filename}...") | |
gdf.to_file(output_filename, driver="GeoJSON") | |
print(f"Successfully processed {len(gdf)} buildings") | |
return gdf | |
except Exception as e: | |
print(f"Error: {str(e)}") | |
return None | |
def main(): | |
parser = argparse.ArgumentParser(description='Process building footprints from global dataset') | |
parser.add_argument('--non-interactive', action='store_true', | |
help='Run in non-interactive mode (requires --region and --quadkey)') | |
parser.add_argument('--region', help='Region name') | |
parser.add_argument('--quadkey', type=int, help='Quadkey identifier') | |
args = parser.parse_args() | |
if args.non_interactive: | |
if not args.region or not args.quadkey: | |
parser.error("Non-interactive mode requires --region and --quadkey arguments") | |
region = args.region | |
quadkey = args.quadkey | |
# Optional bounds prompt | |
use_bounds = input("Do you want to specify a bounding box using bboxfinder.com? (y/N): ").lower().strip() | |
bounds = prompt_for_bounds_with_bboxfinder() if use_bounds == 'y' else None | |
else: | |
# Interactive mode | |
regions = get_available_regions() | |
questions = [ | |
inquirer.List('region', | |
message="Select a region", | |
choices=regions), | |
inquirer.Text('quadkey', | |
message="Enter the quadkey", | |
validate=lambda _, x: x.isdigit()), | |
inquirer.Confirm('use_bounds', | |
message="Do you want to specify a bounding box using bboxfinder.com?", | |
default=False) | |
] | |
answers = inquirer.prompt(questions) | |
if not answers: | |
sys.exit(1) | |
region = answers['region'] | |
quadkey = int(answers['quadkey']) | |
bounds = prompt_for_bounds_with_bboxfinder() if answers['use_bounds'] else None | |
# Process the buildings | |
print(f"\nProcessing buildings for {region} (Quadkey: {quadkey})") | |
if bounds: | |
print(f"Using bounding box: {bounds}") | |
result = process_buildings(region, quadkey, bounds) | |
if result is not None: | |
print("\nProcessing completed successfully!") | |
if __name__ == "__main__": | |
main() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment