Skip to content

Instantly share code, notes, and snippets.

@legel
Created July 19, 2025 03:47
Show Gist options
  • Select an option

  • Save legel/bd762053b5d88d78bfb469e50a34a12a to your computer and use it in GitHub Desktop.

Select an option

Save legel/bd762053b5d88d78bfb469e50a34a12a to your computer and use it in GitHub Desktop.
Earth Engine download Satellite Embeddings v1
import ee
import json
import math
from typing import List, Tuple, Dict
import time
# Initialize Earth Engine
ee.Initialize(project='...')
class EmbeddingPatchExporter:
def __init__(self, geojson_path: str, patch_size_km: int = 10, test_mode: bool = True):
"""
Initialize the exporter with a GeoJSON region and patch size.
Args:
geojson_path: Path to GeoJSON file defining the region
patch_size_km: Size of each patch in kilometers (default 10km)
test_mode: If True, only export one patch for testing
"""
self.patch_size_km = patch_size_km
self.patch_size_m = patch_size_km * 1000
self.test_mode = test_mode
# Load GeoJSON
with open(geojson_path, 'r') as f:
geojson_data = json.load(f)
# Extract coordinates from the first feature
coords = geojson_data['features'][0]['geometry']['coordinates'][0]
# Get bounding box
lons = [c[0] for c in coords]
lats = [c[1] for c in coords]
self.min_lon = min(lons)
self.max_lon = max(lons)
self.min_lat = min(lats)
self.max_lat = max(lats)
print(f"Region bounds: ({self.min_lat:.4f}, {self.min_lon:.4f}) to ({self.max_lat:.4f}, {self.max_lon:.4f})")
# Calculate grid dimensions
self.width_km = self._haversine_distance(self.min_lat, self.min_lon, self.min_lat, self.max_lon)
self.height_km = self._haversine_distance(self.min_lat, self.min_lon, self.max_lat, self.min_lon)
self.cols = math.ceil(self.width_km / patch_size_km)
self.rows = math.ceil(self.height_km / patch_size_km)
print(f"Region size: {self.width_km:.1f} x {self.height_km:.1f} km")
print(f"Grid size: {self.cols} x {self.rows} = {self.cols * self.rows} patches")
# Years to process
self.years = [2017, 2018, 2019, 2020, 2021, 2022, 2023, 2024]
# Track processed source images
self.processed_images = set()
def _haversine_distance(self, lat1: float, lon1: float, lat2: float, lon2: float) -> float:
"""Calculate distance between two points in kilometers."""
R = 6371 # Earth's radius in kilometers
lat1_rad = math.radians(lat1)
lat2_rad = math.radians(lat2)
delta_lat = math.radians(lat2 - lat1)
delta_lon = math.radians(lon2 - lon1)
a = math.sin(delta_lat/2)**2 + math.cos(lat1_rad) * math.cos(lat2_rad) * math.sin(delta_lon/2)**2
c = 2 * math.atan2(math.sqrt(a), math.sqrt(1-a))
return R * c
def _get_patch_bounds(self, row: int, col: int) -> Tuple[float, float, float, float]:
"""Get the bounds for a specific patch."""
# Approximate degrees per km
km_per_degree_lat = 111.0
km_per_degree_lon = 111.0 * math.cos(math.radians((self.min_lat + self.max_lat) / 2))
# Calculate patch bounds
patch_min_lon = self.min_lon + (col * self.patch_size_km / km_per_degree_lon)
patch_max_lon = self.min_lon + ((col + 1) * self.patch_size_km / km_per_degree_lon)
patch_max_lat = self.max_lat - (row * self.patch_size_km / km_per_degree_lat)
patch_min_lat = self.max_lat - ((row + 1) * self.patch_size_km / km_per_degree_lat)
return patch_min_lat, patch_min_lon, patch_max_lat, patch_max_lon
def export_patches(self):
"""Export patches for all years."""
dataset = ee.ImageCollection('GOOGLE/SATELLITE_EMBEDDING/V1/ANNUAL')
# Get all 64 bands
all_bands = ['A{:02d}'.format(i) for i in range(64)]
tasks = []
patch_count = 0
# Iterate through years
for year in self.years:
if self.test_mode and year != 2017:
continue
print(f"\nProcessing year {year}...")
# Iterate through patches (top to bottom, left to right)
for row in range(self.rows):
for col in range(self.cols):
if self.test_mode and (row != 0 or col != 0):
continue
patch_min_lat, patch_min_lon, patch_max_lat, patch_max_lon = self._get_patch_bounds(row, col)
# Create patch geometry
patch_center = ee.Geometry.Point([
(patch_min_lon + patch_max_lon) / 2,
(patch_min_lat + patch_max_lat) / 2
])
patch_bounds = ee.Geometry.Rectangle([
patch_min_lon, patch_min_lat,
patch_max_lon, patch_max_lat
])
# Get images that intersect this patch
year_images = (dataset
.filterDate(f'{year}-01-01', f'{year+1}-01-01')
.filterBounds(patch_bounds))
# Check if we have images for this patch
image_count = year_images.size()
try:
count_info = image_count.getInfo()
if count_info == 0:
print(f" Patch {row}_{col} ({year}): No images found, skipping")
continue
# Get the first image that covers this patch
image = year_images.first()
# Create export task
filename = f"embedding_{year}_patch_{row:03d}_{col:03d}_lat{patch_min_lat:.4f}_{patch_max_lat:.4f}_lon{patch_min_lon:.4f}_{patch_max_lon:.4f}"
task = ee.batch.Export.image.toDrive(
image=image.select(all_bands),
description=f'{year}_patch_{row:03d}_{col:03d}',
folder='earth_engine_exports',
fileNamePrefix=filename,
region=patch_bounds,
scale=10,
crs='EPSG:32617', # UTM Zone 17N for Florida
fileFormat='GeoTIFF',
formatOptions={
'cloudOptimized': True
},
maxPixels=1e9
)
# Start the task
task.start()
tasks.append(task)
patch_count += 1
print(f" Started export for patch {row}_{col} ({year}): {patch_min_lat:.4f},{patch_min_lon:.4f} to {patch_max_lat:.4f},{patch_max_lon:.4f}")
if self.test_mode:
print("\n*** TEST MODE: Only exporting one patch ***")
print(f"Filename: {filename}")
return tasks
except Exception as e:
print(f" Error processing patch {row}_{col} ({year}): {e}")
continue
print(f"\nTotal patches exported: {patch_count}")
print(f"Check https://code.earthengine.google.com/tasks for progress")
return tasks
# Main execution
if __name__ == "__main__":
exporter = EmbeddingPatchExporter(
geojson_path='...',
patch_size_km=10,
test_mode=False # Set to False to export all patches
)
tasks = exporter.export_patches()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment