-
-
Save dnouri/4cae6b4f043930563ce1568fae0e2326 to your computer and use it in GitHub Desktop.
| #!/usr/bin/env python3 | |
| # /// script | |
| # requires-python = ">=3.10" | |
| # dependencies = [ | |
| # "pyopensky>=2.15", | |
| # "duckdb>=1.1.0", | |
| # "pandas>=2.0.0", | |
| # "matplotlib>=3.5.0", | |
| # "seaborn>=0.12.0", | |
| # "tabulate>=0.9.0", | |
| # "tqdm>=4.65.0", | |
| # "traffic>=2.10.0", | |
| # "requests>=2.31.0" | |
| # ] | |
| # /// | |
| """Flight Analysis Tool - OpenSky Network Data with DuckDB | |
| Analyzes flight patterns through any country's airspace using OpenSky | |
| Network's Trino database. Features batch querying, intelligent caching, | |
| and automatic aircraft type enrichment from ADS-B Exchange database. | |
| Usage: | |
| # Analyze German airspace for 2024 | |
| uv run german_flights_analysis.py --country Germany --start 2024-01-01 --end 2024-12-31 | |
| # Analyze UK flights with cache refresh | |
| uv run german_flights_analysis.py --country "United Kingdom" --clear-cache | |
| # Custom airports or ICAO prefixes | |
| uv run german_flights_analysis.py --airports LFPG LFPO LFPB | |
| Features: | |
| - Efficient batch SQL queries for all airports | |
| - Multi-country support (Germany, UK, France, Spain, Italy, Netherlands) | |
| - Automatic cache management (30-day expiration) | |
| - Aircraft type enrichment (615k aircraft database) | |
| - Rich visualizations with manufacturer analysis | |
| First Run: | |
| - Opens browser for OAuth authentication | |
| - Downloads ADS-B Exchange aircraft database | |
| - Queries all airports efficiently | |
| - Caches results as Parquet files with SHA256 checksums | |
| Subsequent Runs: | |
| - Uses cached data if valid (<30 days old) | |
| - Regenerates analysis and visualizations | |
| - No authentication needed if cache is fresh | |
| Output: | |
| - Console: Comprehensive flight statistics | |
| - flight_analysis_visualization.png: Routes, airlines, duration analysis | |
| - aircraft_type_analysis.png: Aircraft types, manufacturers, fleet diversity | |
| Requirements: | |
| - OpenSky Network account (free): https://opensky-network.org/data/apply | |
| - Python 3.10+ with uv package manager | |
| """ | |
| from __future__ import annotations | |
| import argparse | |
| import hashlib | |
| from datetime import datetime, timedelta | |
| from pathlib import Path | |
| import duckdb | |
| import matplotlib.pyplot as plt | |
| import pandas as pd | |
| import requests | |
| import seaborn as sns | |
| from pyopensky.trino import Trino | |
| from tabulate import tabulate | |
| from traffic.data import aircraft as aircraft_db | |
| sns.set_style("whitegrid") | |
| plt.rcParams["figure.figsize"] = (12, 6) | |
| # Constants | |
| CACHE_MAX_AGE_DAYS = 30 | |
| SECTION_HEADER_WIDTH = 60 | |
| REQUEST_TIMEOUT_SECONDS = 30 | |
| GERMAN_AIRPORTS = { | |
| "EDDF": "Frankfurt", | |
| "EDDM": "Munich", | |
| "EDDB": "Berlin Brandenburg", | |
| "EDDH": "Hamburg", | |
| "EDDK": "Cologne/Bonn", | |
| "EDDL": "Düsseldorf", | |
| "EDDS": "Stuttgart", | |
| "EDDW": "Bremen", | |
| "EDDN": "Nuremberg", | |
| "EDDP": "Leipzig/Halle", | |
| "EDDT": "Berlin Tegel (closed)", | |
| "EDDE": "Erfurt", | |
| "EDDG": "Münster/Osnabrück", | |
| "EDDR": "Saarbrücken", | |
| } | |
| AIRLINE_NAMES = { | |
| "DLH": "Lufthansa", | |
| "EWG": "Eurowings", | |
| "RYR": "Ryanair", | |
| "EZY": "easyJet", | |
| "BAW": "British Airways", | |
| "AFR": "Air France", | |
| "KLM": "KLM", | |
| "SWR": "SWISS", | |
| "AUA": "Austrian", | |
| "TUI": "TUI", | |
| "CFG": "Condor", | |
| "UAE": "Emirates", | |
| "THY": "Turkish Airlines", | |
| } | |
| # Mapping of country names to ICAO prefixes | |
| COUNTRY_PREFIXES = { | |
| "Germany": "ED", | |
| "United Kingdom": "EG", | |
| "France": "LF", | |
| "Spain": "LE", | |
| "Italy": "LI", | |
| "Netherlands": "EH", | |
| "Belgium": "EB", | |
| "Switzerland": "LS", | |
| "Austria": "LO", | |
| "Poland": "EP", | |
| "Czech Republic": "LK", | |
| "Denmark": "EK", | |
| "Sweden": "ES", | |
| "Norway": "EN", | |
| "Finland": "EF", | |
| "Ireland": "EI", | |
| "Portugal": "LP", | |
| "Greece": "LG", | |
| "Turkey": "LT", | |
| "Hungary": "LH", | |
| } | |
| def get_process_flights_sql(icao_prefix): | |
| """Generate process flights SQL with configurable ICAO prefix.""" | |
| return f""" | |
| CREATE OR REPLACE TABLE flights_processed AS | |
| SELECT | |
| *, | |
| firstseen as firstseen_dt, | |
| lastseen as lastseen_dt, | |
| EPOCH(lastseen - firstseen) / 60.0 as flight_duration_min, | |
| origin || ' → ' || destination as route, | |
| COALESCE(ga_origin.name, origin) as origin_name, | |
| COALESCE(ga_dest.name, destination) as destination_name, | |
| origin LIKE '{icao_prefix}%' AND destination LIKE '{icao_prefix}%' as is_domestic, | |
| SUBSTRING(callsign, 1, 3) as airline_code, | |
| EXTRACT(hour FROM firstseen) as hour_of_day | |
| FROM {{table_name}} | |
| LEFT JOIN airport_names ga_origin ON origin = ga_origin.code | |
| LEFT JOIN airport_names ga_dest ON destination = ga_dest.code | |
| WHERE callsign IS NOT NULL AND callsign != '' | |
| """ | |
| ROUTES_ANALYSIS_SQL = """ | |
| SELECT | |
| route, | |
| origin_name || ' → ' || destination_name as route_name, | |
| is_domestic, | |
| COUNT(*) as flight_count, | |
| AVG(flight_duration_min) as avg_duration_min, | |
| COUNT(DISTINCT callsign) as unique_callsigns | |
| FROM flights_processed | |
| WHERE origin IS NOT NULL | |
| AND destination IS NOT NULL | |
| AND origin != '' | |
| AND destination != '' | |
| GROUP BY route, route_name, is_domestic | |
| ORDER BY flight_count DESC | |
| """ | |
| AIRCRAFT_ANALYSIS_SQL = """ | |
| SELECT | |
| icao24, | |
| COUNT(*) as flight_count, | |
| COUNT(DISTINCT route) as num_routes, | |
| STRING_AGG(DISTINCT callsign, ', ') as callsigns, | |
| FIRST(callsign) as primary_callsign | |
| FROM flights_processed | |
| GROUP BY icao24 | |
| ORDER BY flight_count DESC | |
| """ | |
| AIRLINES_ANALYSIS_SQL = """ | |
| SELECT | |
| airline_code, | |
| COALESCE(an.name, 'Other') as airline_name, | |
| COUNT(*) as flight_count, | |
| COUNT(DISTINCT route) as unique_routes, | |
| SUM(CASE WHEN is_domestic THEN 1 ELSE 0 END) as domestic_flights, | |
| SUM(CASE WHEN NOT is_domestic THEN 1 ELSE 0 END) as international_flights | |
| FROM flights_processed | |
| LEFT JOIN airline_names an ON airline_code = an.code | |
| GROUP BY airline_code, airline_name | |
| ORDER BY flight_count DESC | |
| """ | |
| AIRCRAFT_TYPES_SQL = """ | |
| SELECT | |
| COALESCE(typecode, 'Unknown') as typecode, | |
| COALESCE(model, 'Unknown') as model, | |
| COUNT(*) as flight_count, | |
| COUNT(DISTINCT icao24) as unique_aircraft, | |
| COUNT(DISTINCT route) as routes_served, | |
| COUNT(DISTINCT airline_code) as airlines_using | |
| FROM flights_processed | |
| GROUP BY typecode, model | |
| ORDER BY flight_count DESC | |
| """ | |
| ROUTE_AIRCRAFT_TYPES_SQL = """ | |
| WITH top_routes AS ( | |
| SELECT | |
| route, | |
| origin_name || ' → ' || destination_name as route_name, | |
| COUNT(*) as total_flights | |
| FROM flights_processed | |
| GROUP BY route, origin_name, destination_name | |
| ORDER BY total_flights DESC | |
| LIMIT 10 | |
| ) | |
| SELECT | |
| fp.route, | |
| tr.route_name, | |
| COALESCE(fp.typecode, 'Unknown') as typecode, | |
| COUNT(*) as flight_count, | |
| AVG(fp.flight_duration_min) as avg_duration | |
| FROM flights_processed fp | |
| JOIN top_routes tr ON fp.route = tr.route | |
| GROUP BY fp.route, tr.route_name, fp.typecode | |
| ORDER BY fp.route, flight_count DESC | |
| """ | |
| class CacheManager: | |
| def __init__(self, cache_dir: Path): | |
| self.cache_dir = cache_dir | |
| self.cache_dir.mkdir(exist_ok=True) | |
| def get_cache_path( | |
| self, | |
| start_date: str, | |
| end_date: str, | |
| airports: list[str] | None = None, | |
| ) -> Path: | |
| airports_str = "_".join(airports) if airports else "all_german" | |
| cache_key = f"flights_{start_date}_{end_date}_{airports_str}" | |
| hash_key = hashlib.sha256(cache_key.encode()).hexdigest()[:8] | |
| return self.cache_dir / f"flights_{hash_key}.parquet" | |
| def load_cache(self, cache_path: Path, conn: duckdb.DuckDBPyConnection) -> int: | |
| conn.execute( | |
| f"CREATE OR REPLACE TABLE flights AS " | |
| f"SELECT * FROM read_parquet('{cache_path}')", | |
| ) | |
| count = conn.execute("SELECT COUNT(*) FROM flights").fetchone()[0] | |
| print(f"✓ Loaded {count:,} flights from cache") | |
| return count | |
| def save_cache(self, df: pd.DataFrame, cache_path: Path) -> None: | |
| print(f"💾 Saving {len(df):,} flights to cache...") | |
| df.to_parquet(cache_path, index=False) | |
| def download_adsbx_database(self) -> dict[str, dict] | None: | |
| """Download and cache ADS-B Exchange aircraft database using DuckDB. | |
| Returns a dictionary mapping ICAO24 codes to aircraft information. | |
| The database is cached for 24 hours to avoid unnecessary downloads. | |
| """ | |
| # ADS-B Exchange provides daily updates of their aircraft database | |
| # File format: NDJSON (newline-delimited JSON), gzipped | |
| today = datetime.now().strftime("%Y%m%d") | |
| adsbx_cache = self.cache_dir / f"adsbx_aircraft_{today}.json.gz" | |
| # Check if today's file already exists | |
| if not adsbx_cache.exists(): | |
| # Remove old ADS-B Exchange cache files | |
| for old_file in self.cache_dir.glob("adsbx_aircraft_*.json.gz"): | |
| old_file.unlink() | |
| # Download fresh database | |
| url = "https://downloads.adsbexchange.com/downloads/basic-ac-db.json.gz" | |
| print("📥 Downloading ADS-B Exchange aircraft database...") | |
| response = requests.get(url, timeout=REQUEST_TIMEOUT_SECONDS) | |
| response.raise_for_status() | |
| adsbx_cache.write_bytes(response.content) | |
| print(f"✓ Downloaded {len(response.content) / 1024 / 1024:.1f} MB") | |
| con = duckdb.connect(":memory:") | |
| # Read compressed NDJSON directly | |
| con.execute(f""" | |
| CREATE TABLE adsbx_raw AS | |
| SELECT * FROM read_json_auto( | |
| '{adsbx_cache}', | |
| compression='gzip', | |
| format='newline_delimited' | |
| ) | |
| """) | |
| result = con.execute(""" | |
| SELECT | |
| LOWER(icao) as icao24, | |
| reg as registration, | |
| short_type as typecode, | |
| model, | |
| manufacturer, | |
| ownop as owner | |
| FROM adsbx_raw | |
| WHERE icao IS NOT NULL AND icao != '' | |
| """).fetchall() | |
| aircraft_lookup = {} | |
| for row in result: | |
| icao24 = row[0] | |
| aircraft_lookup[icao24] = { | |
| "registration": row[1], | |
| "typecode": row[2], | |
| "model": row[3], | |
| "manufacturer": row[4], | |
| "owner": row[5], | |
| } | |
| con.close() | |
| print(f"✓ Loaded {len(aircraft_lookup):,} aircraft from ADS-B Exchange") | |
| return aircraft_lookup | |
| class FlightDataFetcher: | |
| def __init__(self): | |
| print("Connecting to OpenSky Network...") | |
| self.trino = Trino() | |
| # Load ADS-B Exchange database as fallback for aircraft types | |
| cache_mgr = CacheManager(Path("cache")) | |
| self.adsbx_aircraft = cache_mgr.download_adsbx_database() | |
| def fetch_all_airports_batch( | |
| self, | |
| start_date: str, | |
| end_date: str, | |
| airports: list[str], | |
| limit: int | None = None, | |
| ) -> pd.DataFrame | None: | |
| """Fetch flights for multiple airports in a single batch query.""" | |
| # Build SQL query with all airports at once | |
| airport_conditions = " OR ".join( | |
| [f"origin = '{ap}' OR destination = '{ap}'" for ap in airports] | |
| ) | |
| # Use rawdata for custom SQL query | |
| query = f""" | |
| SELECT | |
| callsign, | |
| icao24, | |
| firstseen, | |
| lastseen, | |
| origin, | |
| destination, | |
| day | |
| FROM flights_data4 | |
| WHERE ({airport_conditions}) | |
| AND day >= '{start_date}' | |
| AND day <= '{end_date}' | |
| """ | |
| if limit: | |
| query += f" LIMIT {limit}" | |
| df = self.trino.rawdata(query) | |
| return df if df is not None and not df.empty else None | |
| def fetch_all_airports( | |
| self, | |
| start_date: str, | |
| end_date: str, | |
| specific_airports: list[str] | None = None, | |
| limit: int | None = None, | |
| ) -> pd.DataFrame | None: | |
| if specific_airports: | |
| airports = specific_airports | |
| else: | |
| # For backward compatibility, use German airports if no airports specified | |
| airports = [code for code in GERMAN_AIRPORTS if code != "EDDT"] | |
| # Use batch query for all airports | |
| df = self.fetch_all_airports_batch(start_date, end_date, airports, limit) | |
| if df is None or df.empty: | |
| print("❌ No flight data found") | |
| return None | |
| print(" ✓ Query successful") | |
| df = df.drop_duplicates() | |
| # pyopensky returns 'departure' and 'arrival' columns | |
| if "departure" in df.columns: | |
| df = df.rename( | |
| columns={ | |
| "departure": "origin", | |
| "arrival": "destination", | |
| } | |
| ) | |
| return df | |
| def enrich_with_aircraft_types(self, df: pd.DataFrame) -> pd.DataFrame: | |
| """Enrich flight data with aircraft types. | |
| Uses traffic library first, then ADS-B Exchange fallback. | |
| """ | |
| if df is None or df.empty: | |
| return df | |
| print("📊 Enriching with aircraft type data...") | |
| # Get unique ICAO24 codes | |
| unique_icao24 = df["icao24"].unique() | |
| # Track lookup statistics | |
| traffic_found = 0 | |
| adsbx_found = 0 | |
| not_found = 0 | |
| # Lookup aircraft types with fallback strategy | |
| type_data = [] | |
| for icao24 in unique_icao24: | |
| aircraft_info = {"icao24": icao24} | |
| # First try: traffic library (OpenSky's own database) | |
| ac = aircraft_db.get(icao24) | |
| if ac: | |
| aircraft_info.update( | |
| { | |
| "typecode": ac.get("typecode"), | |
| "model": ac.get("model"), | |
| "registration": ac.get("registration"), | |
| "operator": ac.get("operator"), | |
| } | |
| ) | |
| traffic_found += 1 | |
| type_data.append(aircraft_info) | |
| continue | |
| # Second try: ADS-B Exchange database fallback | |
| # Why fallback: ADS-B Exchange has 20% more aircraft (615k vs 520k) | |
| # and includes military, private, and recently delivered aircraft | |
| if self.adsbx_aircraft and icao24.lower() in self.adsbx_aircraft: | |
| adsbx_data = self.adsbx_aircraft[icao24.lower()] | |
| aircraft_info.update( | |
| { | |
| "typecode": adsbx_data.get("typecode"), | |
| "model": adsbx_data.get("model"), | |
| "registration": adsbx_data.get("registration"), | |
| "operator": adsbx_data.get("owner"), | |
| } | |
| ) | |
| adsbx_found += 1 | |
| else: | |
| not_found += 1 | |
| type_data.append(aircraft_info) | |
| # Create type dataframe and merge | |
| type_df = pd.DataFrame(type_data) | |
| df = df.merge(type_df, on="icao24", how="left") | |
| # Log statistics showing data source effectiveness | |
| print("✓ Aircraft type enrichment complete:") | |
| print(f" - Traffic library: {traffic_found:,} aircraft") | |
| if self.adsbx_aircraft: | |
| print(f" - ADS-B Exchange: {adsbx_found:,} aircraft") | |
| print(f" - Unknown: {not_found:,} aircraft") | |
| print( | |
| f" - Total coverage: " | |
| f"{(traffic_found + adsbx_found) / len(unique_icao24) * 100:.1f}%" | |
| ) | |
| return df | |
| class FlightAnalyzer: | |
| def __init__(self, icao_prefix, airports_dict): | |
| self.conn = duckdb.connect(":memory:") | |
| self.icao_prefix = icao_prefix | |
| self.airports_dict = airports_dict | |
| def load_reference_data(self) -> None: | |
| # Load airport names | |
| self.conn.execute( | |
| """ | |
| CREATE OR REPLACE TABLE airport_names AS | |
| SELECT * FROM VALUES | |
| """ | |
| + ",".join(f"('{k}', '{v}')" for k, v in self.airports_dict.items()) | |
| + " AS t(code, name)" | |
| ) | |
| self.conn.execute( | |
| """ | |
| CREATE OR REPLACE TABLE airline_names AS | |
| SELECT * FROM VALUES | |
| """ | |
| + ",".join(f"('{k}', '{v}')" for k, v in AIRLINE_NAMES.items()) | |
| + " AS t(code, name)" | |
| ) | |
| def process_flights(self, table_name: str) -> int: | |
| print("Processing flight data...") | |
| self.load_reference_data() | |
| process_sql = get_process_flights_sql(self.icao_prefix) | |
| self.conn.execute(process_sql.format(table_name=table_name)) | |
| count = self.conn.execute("SELECT COUNT(*) FROM flights_processed").fetchone()[ | |
| 0 | |
| ] | |
| print(f"✓ Processed {count:,} flights") | |
| return count | |
| def analyze_routes(self) -> pd.DataFrame: | |
| return self.conn.execute(ROUTES_ANALYSIS_SQL).df() | |
| def analyze_aircraft(self) -> pd.DataFrame: | |
| return self.conn.execute(AIRCRAFT_ANALYSIS_SQL).df() | |
| def analyze_airlines(self) -> pd.DataFrame: | |
| return self.conn.execute(AIRLINES_ANALYSIS_SQL).df() | |
| def has_typecode_column(self) -> bool: | |
| """Check if typecode column exists in processed flights.""" | |
| columns = self.conn.execute("PRAGMA table_info(flights_processed)").df() | |
| return "typecode" in columns["name"].values | |
| def analyze_aircraft_types(self) -> pd.DataFrame | None: | |
| """Analyze aircraft types if typecode data is available.""" | |
| if not self.has_typecode_column(): | |
| return None | |
| return self.conn.execute(AIRCRAFT_TYPES_SQL).df() | |
| def analyze_route_aircraft_types(self) -> pd.DataFrame | None: | |
| """Analyze aircraft types used on top routes.""" | |
| if not self.has_typecode_column(): | |
| return None | |
| return self.conn.execute(ROUTE_AIRCRAFT_TYPES_SQL).df() | |
| def get_summary_stats(self) -> dict: | |
| stats = self.conn.execute(""" | |
| SELECT | |
| MIN(DATE(firstseen)) as start_date, | |
| MAX(DATE(firstseen)) as end_date, | |
| COUNT(*) as total_flights, | |
| COUNT(DISTINCT icao24) as unique_aircraft, | |
| COUNT(DISTINCT route) as unique_routes, | |
| COUNT(DISTINCT airline_code) as unique_airlines, | |
| AVG(flight_duration_min) as avg_duration_min, | |
| SUM(CASE WHEN is_domestic THEN 1 ELSE 0 END) * 100.0 / COUNT(*) | |
| as domestic_pct | |
| FROM flights_processed | |
| """).fetchone() | |
| return { | |
| "start_date": stats[0], | |
| "end_date": stats[1], | |
| "total_flights": stats[2], | |
| "unique_aircraft": stats[3], | |
| "unique_routes": stats[4], | |
| "unique_airlines": stats[5], | |
| "avg_duration_min": stats[6], | |
| "domestic_pct": stats[7], | |
| } | |
| def get_visualization_data(self) -> dict: | |
| return { | |
| "hourly": self.conn.execute(""" | |
| SELECT hour_of_day, COUNT(*) as flight_count | |
| FROM flights_processed | |
| GROUP BY hour_of_day | |
| ORDER BY hour_of_day | |
| """).df(), | |
| "duration": self.conn.execute(""" | |
| SELECT flight_duration_min | |
| FROM flights_processed | |
| WHERE flight_duration_min > 0 AND flight_duration_min < 500 | |
| """).df(), | |
| "domestic": self.conn.execute(""" | |
| SELECT | |
| CASE WHEN is_domestic = true THEN 'Domestic' | |
| ELSE 'International' END as type, | |
| COUNT(*) as count | |
| FROM flights_processed | |
| WHERE is_domestic IS NOT NULL | |
| GROUP BY type | |
| """).df(), | |
| "airports": self.conn.execute(""" | |
| WITH activity AS ( | |
| SELECT origin as airport | |
| FROM flights_processed | |
| WHERE origin LIKE 'ED%' | |
| UNION ALL | |
| SELECT destination as airport | |
| FROM flights_processed | |
| WHERE destination LIKE 'ED%' | |
| ) | |
| SELECT | |
| airport, | |
| COUNT(*) as total_flights | |
| FROM activity | |
| GROUP BY airport | |
| ORDER BY total_flights DESC | |
| LIMIT 10 | |
| """).df(), | |
| } | |
| class FlightVisualizer: | |
| @staticmethod | |
| def create_visualizations( | |
| route_stats: pd.DataFrame, | |
| airline_stats: pd.DataFrame, | |
| viz_data: dict, | |
| country_name: str = "German", | |
| ) -> None: | |
| print("\n📊 Creating visualizations...") | |
| fig, axes = plt.subplots(2, 3, figsize=(18, 10)) | |
| fig.suptitle( | |
| f"{country_name} Airport Flight Analysis", | |
| fontsize=16, | |
| fontweight="bold", | |
| ) | |
| FlightVisualizer._plot_routes(axes[0, 0], route_stats) | |
| FlightVisualizer._plot_domestic_intl(axes[0, 1], viz_data["domestic"]) | |
| FlightVisualizer._plot_duration(axes[0, 2], viz_data["duration"]) | |
| FlightVisualizer._plot_airlines(axes[1, 0], airline_stats) | |
| FlightVisualizer._plot_hourly(axes[1, 1], viz_data["hourly"]) | |
| FlightVisualizer._plot_airports(axes[1, 2], viz_data["airports"]) | |
| plt.tight_layout() | |
| FlightVisualizer._save_plots("flight_analysis_visualization") | |
| plt.close() | |
| @staticmethod | |
| def _plot_routes(ax, route_stats: pd.DataFrame) -> None: | |
| top_routes = route_stats.head(10) | |
| ax.barh(range(len(top_routes)), top_routes["flight_count"]) | |
| ax.set_yticks(range(len(top_routes))) | |
| route_labels = [ | |
| r[:30] + "..." if len(r) > 30 else r for r in top_routes["route_name"] | |
| ] | |
| ax.set_yticklabels(route_labels, fontsize=8) | |
| ax.set_xlabel("Number of Flights") | |
| ax.set_title("Top 10 Routes") | |
| ax.invert_yaxis() | |
| @staticmethod | |
| def _plot_domestic_intl(ax, domestic_data: pd.DataFrame) -> None: | |
| colors = ["#2E86AB", "#A23B72"] | |
| ax.pie( | |
| domestic_data["count"], | |
| labels=domestic_data["type"], | |
| autopct="%1.1f%%", | |
| colors=colors, | |
| startangle=90, | |
| ) | |
| ax.set_title("Domestic vs International Flights") | |
| @staticmethod | |
| def _plot_duration(ax, duration_data: pd.DataFrame) -> None: | |
| ax.hist( | |
| duration_data["flight_duration_min"], | |
| bins=30, | |
| color="#2E86AB", | |
| edgecolor="black", | |
| ) | |
| ax.set_xlabel("Flight Duration (minutes)") | |
| ax.set_ylabel("Number of Flights") | |
| ax.set_title("Flight Duration Distribution") | |
| mean_duration = duration_data["flight_duration_min"].mean() | |
| ax.axvline( | |
| mean_duration, | |
| color="red", | |
| linestyle="--", | |
| label=f"Mean: {mean_duration:.1f} min", | |
| ) | |
| ax.legend() | |
| @staticmethod | |
| def _plot_airlines(ax, airline_stats: pd.DataFrame) -> None: | |
| top_airlines = airline_stats.head(10) | |
| ax.bar( | |
| range(len(top_airlines)), | |
| top_airlines["flight_count"], | |
| color="#A23B72", | |
| ) | |
| ax.set_xticks(range(len(top_airlines))) | |
| ax.set_xticklabels( | |
| top_airlines["airline_name"], | |
| rotation=45, | |
| ha="right", | |
| fontsize=8, | |
| ) | |
| ax.set_ylabel("Number of Flights") | |
| ax.set_title("Top 10 Airlines") | |
| @staticmethod | |
| def _plot_hourly(ax, hourly_data: pd.DataFrame) -> None: | |
| ax.plot( | |
| hourly_data["hour_of_day"], | |
| hourly_data["flight_count"], | |
| marker="o", | |
| color="#2E86AB", | |
| ) | |
| ax.set_xlabel("Hour of Day (UTC)") | |
| ax.set_ylabel("Number of Flights") | |
| ax.set_title("Flights by Hour of Day") | |
| ax.grid(visible=True, alpha=0.3) | |
| @staticmethod | |
| def _plot_airports( | |
| ax, airport_activity: pd.DataFrame, country_name: str = "German" | |
| ) -> None: | |
| # Just use airport codes as labels | |
| # Could be enhanced to use a lookup dictionary if available | |
| airport_labels = list(airport_activity["airport"]) | |
| ax.barh( | |
| range(len(airport_activity)), | |
| airport_activity["total_flights"], | |
| color="#52B788", | |
| ) | |
| ax.set_yticks(range(len(airport_activity))) | |
| ax.set_yticklabels(airport_labels, fontsize=8) | |
| ax.set_xlabel("Total Flights (Arrivals + Departures)") | |
| ax.set_title(f"Top {country_name} Airports by Activity") | |
| ax.invert_yaxis() | |
| @staticmethod | |
| def _save_plots(base_name: str = "flight_analysis_visualization") -> None: | |
| output_file = Path(f"{base_name}.png") | |
| plt.savefig(output_file, dpi=150, bbox_inches="tight") | |
| print(f"📊 Visualization saved to {output_file}") | |
| output_file_hq = Path(f"{base_name}_hq.png") | |
| plt.savefig(output_file_hq, dpi=300, bbox_inches="tight") | |
| print(f"📊 High-resolution version saved to {output_file_hq}") | |
| @staticmethod | |
| def create_aircraft_type_visualizations( | |
| aircraft_type_stats: pd.DataFrame | None, | |
| route_type_stats: pd.DataFrame | None, | |
| country_name: str = "German", | |
| ) -> None: | |
| """Create social media-worthy aircraft type visualizations.""" | |
| if aircraft_type_stats is None or aircraft_type_stats.empty: | |
| print("⚠️ No aircraft type data available for visualization") | |
| return | |
| print("\n✈️ Creating aircraft type visualizations...") | |
| # Set style for professional look | |
| plt.style.use("seaborn-v0_8-darkgrid") | |
| fig = plt.figure(figsize=(20, 12)) | |
| fig.suptitle( | |
| f"{country_name} Airspace Aircraft Analysis 2024", | |
| fontsize=24, | |
| fontweight="bold", | |
| y=0.98, | |
| ) | |
| # Create subplots | |
| gs = fig.add_gridspec(3, 3, hspace=0.3, wspace=0.3) | |
| # 1. Top Aircraft Types (horizontal bar) | |
| ax1 = fig.add_subplot(gs[0, :]) | |
| FlightVisualizer._plot_aircraft_types_bar(ax1, aircraft_type_stats) | |
| # 2. Manufacturer Distribution (pie/donut) | |
| ax2 = fig.add_subplot(gs[1, 0]) | |
| FlightVisualizer._plot_manufacturer_distribution(ax2, aircraft_type_stats) | |
| # 3. Aircraft Size Distribution | |
| ax3 = fig.add_subplot(gs[1, 1]) | |
| FlightVisualizer._plot_aircraft_size_distribution(ax3, aircraft_type_stats) | |
| # 4. Airlines per Aircraft Type | |
| ax4 = fig.add_subplot(gs[1, 2]) | |
| FlightVisualizer._plot_airlines_per_type(ax4, aircraft_type_stats) | |
| # 5. Route-Aircraft Type Matrix (if available) | |
| if route_type_stats is not None and not route_type_stats.empty: | |
| ax5 = fig.add_subplot(gs[2, :]) | |
| FlightVisualizer._plot_route_aircraft_matrix(ax5, route_type_stats) | |
| # Adjust layout to minimize padding | |
| plt.subplots_adjust(hspace=0.35, wspace=0.3, top=0.94, bottom=0.06) | |
| FlightVisualizer._save_plots("aircraft_type_analysis") | |
| plt.close() | |
| @staticmethod | |
| def _plot_aircraft_types_bar( | |
| ax, aircraft_type_stats: pd.DataFrame, country_name: str = "German" | |
| ) -> None: | |
| """Horizontal bar chart of top aircraft types.""" | |
| top_types = aircraft_type_stats.head(15) | |
| # Create color gradient based on flight count | |
| colors = plt.cm.viridis( | |
| top_types["flight_count"] / top_types["flight_count"].max() | |
| ) | |
| bars = ax.barh(range(len(top_types)), top_types["flight_count"], color=colors) | |
| # Add value labels on bars | |
| for _i, (bar, flights) in enumerate(zip(bars, top_types["flight_count"])): | |
| ax.text( | |
| bar.get_width() + 0.5, | |
| bar.get_y() + bar.get_height() / 2, | |
| f"{int(flights)}", | |
| va="center", | |
| fontsize=10, | |
| ) | |
| # Format | |
| ax.set_yticks(range(len(top_types))) | |
| labels = [ | |
| f"{row['typecode']} " | |
| f"({row['model'][:20] if row['model'] != 'Unknown' else ''})" | |
| for _, row in top_types.iterrows() | |
| ] | |
| ax.set_yticklabels(labels, fontsize=11) | |
| ax.set_xlabel("Number of Flights", fontsize=12) | |
| ax.set_title( | |
| f"Top Aircraft Types in {country_name} Airspace", | |
| fontsize=14, | |
| fontweight="bold", | |
| ) | |
| ax.invert_yaxis() | |
| ax.grid(axis="x", alpha=0.3) | |
| @staticmethod | |
| def _get_manufacturer_distribution(aircraft_type_stats: pd.DataFrame) -> tuple: | |
| """Get manufacturer flight counts.""" | |
| stats = aircraft_type_stats | |
| airbus = stats[stats["typecode"].str.startswith("A", na=False)][ | |
| "flight_count" | |
| ].sum() | |
| boeing = stats[stats["typecode"].str.startswith("B", na=False)][ | |
| "flight_count" | |
| ].sum() | |
| embraer = stats[stats["typecode"].str.startswith("E", na=False)][ | |
| "flight_count" | |
| ].sum() | |
| other = stats[~stats["typecode"].str.contains("^[ABE]", na=False, regex=True)][ | |
| "flight_count" | |
| ].sum() | |
| return [airbus, boeing, embraer, other] | |
| @staticmethod | |
| def _plot_manufacturer_distribution(ax, aircraft_type_stats: pd.DataFrame) -> None: | |
| """Donut chart showing Airbus vs Boeing vs Others.""" | |
| sizes = FlightVisualizer._get_manufacturer_distribution(aircraft_type_stats) | |
| labels = ["Airbus", "Boeing", "Embraer", "Other"] | |
| colors = ["#00C9FF", "#92FE9D", "#FC466B", "#3F5EFB"] | |
| # Create donut | |
| wedges, texts, autotexts = ax.pie( | |
| sizes, | |
| labels=labels, | |
| colors=colors, | |
| autopct="%1.1f%%", | |
| startangle=90, | |
| pctdistance=0.85, | |
| ) | |
| # Draw circle for donut | |
| centre_circle = plt.Circle((0, 0), 0.70, fc="white") | |
| ax.add_artist(centre_circle) | |
| # Add total in center | |
| total = sum(sizes) | |
| ax.text( | |
| 0, | |
| 0, | |
| f"{int(total):,}\nFlights", | |
| ha="center", | |
| va="center", | |
| fontsize=14, | |
| fontweight="bold", | |
| ) | |
| ax.set_title("Manufacturer Market Share", fontsize=12, fontweight="bold") | |
| @staticmethod | |
| def _categorize_aircraft_size(typecode): | |
| """Categorize aircraft by size.""" | |
| if pd.isna(typecode) or typecode == "Unknown": | |
| return "Unknown" | |
| # Wide-body | |
| if typecode in [ | |
| "A330", | |
| "A340", | |
| "A343", | |
| "A350", | |
| "A380", | |
| "B747", | |
| "B777", | |
| "B77W", | |
| "B787", | |
| "B788", | |
| "B789", | |
| ]: | |
| return "Wide-body" | |
| # Narrow-body | |
| if typecode in [ | |
| "A319", | |
| "A320", | |
| "A20N", | |
| "A321", | |
| "A21N", | |
| "B737", | |
| "B738", | |
| "B38M", | |
| "B39M", | |
| ]: | |
| return "Narrow-body" | |
| # Regional | |
| if typecode.startswith(("E", "CRJ")) or typecode in [ | |
| "AT43", | |
| "AT72", | |
| "AT76", | |
| "DH8D", | |
| ]: | |
| return "Regional" | |
| return "Business/GA" | |
| @staticmethod | |
| def _plot_aircraft_size_distribution(ax, aircraft_type_stats: pd.DataFrame) -> None: | |
| """Distribution by aircraft size category.""" | |
| aircraft_type_stats["size_category"] = aircraft_type_stats["typecode"].apply( | |
| FlightVisualizer._categorize_aircraft_size | |
| ) | |
| size_dist = ( | |
| aircraft_type_stats.groupby("size_category")["flight_count"] | |
| .sum() | |
| .sort_values(ascending=True) | |
| ) | |
| colors = ["#FF6B6B", "#4ECDC4", "#45B7D1", "#96CEB4", "#FFEAA7"] | |
| bars = ax.bar( | |
| range(len(size_dist)), size_dist.values, color=colors[: len(size_dist)] | |
| ) | |
| # Add value labels | |
| for bar, val in zip(bars, size_dist.values): | |
| height = bar.get_height() | |
| ax.text( | |
| bar.get_x() + bar.get_width() / 2, | |
| height + 0.5, | |
| f"{int(val)}", | |
| ha="center", | |
| va="bottom", | |
| fontsize=10, | |
| ) | |
| ax.set_xticks(range(len(size_dist))) | |
| ax.set_xticklabels(size_dist.index, rotation=45, ha="right") | |
| ax.set_ylabel("Number of Flights", fontsize=12) | |
| ax.set_title("Aircraft Size Categories", fontsize=12, fontweight="bold") | |
| ax.grid(axis="y", alpha=0.3) | |
| @staticmethod | |
| def _plot_airlines_per_type(ax, aircraft_type_stats: pd.DataFrame) -> None: | |
| """Airlines using each aircraft type.""" | |
| top_types = aircraft_type_stats.head(10) | |
| x = range(len(top_types)) | |
| width = 0.35 | |
| # Two bars: unique aircraft and airlines using | |
| bars1 = ax.bar( | |
| [i - width / 2 for i in x], | |
| top_types["unique_aircraft"], | |
| width, | |
| label="Unique Aircraft", | |
| color="#3498db", | |
| ) | |
| bars2 = ax.bar( | |
| [i + width / 2 for i in x], | |
| top_types["airlines_using"], | |
| width, | |
| label="Airlines Operating", | |
| color="#e74c3c", | |
| ) | |
| # Add value labels | |
| for bars in [bars1, bars2]: | |
| for bar in bars: | |
| height = bar.get_height() | |
| if height > 0: | |
| ax.text( | |
| bar.get_x() + bar.get_width() / 2, | |
| height + 0.1, | |
| f"{int(height)}", | |
| ha="center", | |
| va="bottom", | |
| fontsize=9, | |
| ) | |
| ax.set_xlabel("Aircraft Type", fontsize=12) | |
| ax.set_ylabel("Count", fontsize=12) | |
| ax.set_title("Fleet Diversity by Type", fontsize=12, fontweight="bold") | |
| ax.set_xticks(x) | |
| ax.set_xticklabels(top_types["typecode"], rotation=45, ha="right") | |
| ax.legend() | |
| ax.grid(axis="y", alpha=0.3) | |
| @staticmethod | |
| def _plot_route_aircraft_matrix(ax, route_type_stats: pd.DataFrame) -> None: | |
| """Stacked bar showing aircraft types on top routes.""" | |
| # Pivot data for stacked bar | |
| pivot = route_type_stats.pivot_table( | |
| index="route_name", columns="typecode", values="flight_count", fill_value=0 | |
| ) | |
| # Get top 8 routes | |
| top_routes = ( | |
| route_type_stats.groupby("route_name")["flight_count"] | |
| .sum() | |
| .nlargest(8) | |
| .index | |
| ) | |
| pivot = pivot.loc[top_routes] | |
| # Sort columns by total flights | |
| col_order = pivot.sum().sort_values(ascending=False).index[:10] | |
| pivot = pivot[col_order] | |
| # Create stacked bar | |
| pivot.plot(kind="barh", stacked=True, ax=ax, colormap="tab20", width=0.7) | |
| ax.set_xlabel("Number of Flights", fontsize=12) | |
| ax.set_ylabel("") | |
| ax.set_title( | |
| "Aircraft Types Operating on Top Routes", fontsize=14, fontweight="bold" | |
| ) | |
| ax.legend( | |
| title="Aircraft Type", bbox_to_anchor=(1.05, 1), loc="upper left", ncol=2 | |
| ) | |
| ax.grid(axis="x", alpha=0.3) | |
| # Format route labels | |
| labels = [ | |
| str(label.get_text())[:30] | |
| if len(str(label.get_text())) > 30 | |
| else str(label.get_text()) | |
| for label in ax.get_yticklabels() | |
| ] | |
| ax.set_yticklabels(labels, fontsize=10) | |
| def print_section_header(title: str) -> None: | |
| """Print a formatted section header.""" | |
| print("\n" + "=" * SECTION_HEADER_WIDTH) | |
| print(title) | |
| print("=" * SECTION_HEADER_WIDTH) | |
| def print_summary( | |
| stats: dict, | |
| route_stats: pd.DataFrame, | |
| aircraft_stats: pd.DataFrame, | |
| airline_stats: pd.DataFrame, | |
| aircraft_type_stats: pd.DataFrame | None = None, | |
| route_type_stats: pd.DataFrame | None = None, | |
| ) -> None: | |
| print_section_header("FLIGHT DATA SUMMARY") | |
| print(f"\n📅 Period: {stats['start_date']} to {stats['end_date']}") | |
| print(f"✈️ Total flights: {stats['total_flights']:,}") | |
| print(f"🛫 Unique aircraft (ICAO24): {stats['unique_aircraft']:,}") | |
| print(f"📍 Unique routes: {stats['unique_routes']:,}") | |
| print(f"🏢 Unique airlines: {stats['unique_airlines']:,}") | |
| print(f"\n🏠 Domestic flights: {stats['domestic_pct']:.1f}%") | |
| print(f"🌍 International flights: {100 - stats['domestic_pct']:.1f}%") | |
| print(f"\n⏱️ Average flight duration: {stats['avg_duration_min']:.1f} minutes") | |
| print_section_header("TOP 10 ROUTES") | |
| top_routes = route_stats.head(10)[ | |
| ["route_name", "flight_count", "avg_duration_min"] | |
| ] | |
| top_routes["avg_duration_min"] = top_routes["avg_duration_min"].round(1) | |
| print( | |
| tabulate( | |
| top_routes, | |
| headers=["Route", "Flights", "Avg Duration (min)"], | |
| tablefmt="grid", | |
| showindex=False, | |
| ), | |
| ) | |
| print_section_header("TOP 10 AIRLINES") | |
| top_airlines = airline_stats.head(10)[ | |
| ["airline_name", "flight_count", "unique_routes"] | |
| ] | |
| print( | |
| tabulate( | |
| top_airlines, | |
| headers=["Airline", "Flights", "Routes"], | |
| tablefmt="grid", | |
| showindex=False, | |
| ), | |
| ) | |
| print_section_header("TOP 5 BUSIEST AIRCRAFT") | |
| top_aircraft = aircraft_stats.head(5)[ | |
| ["icao24", "flight_count", "primary_callsign", "num_routes"] | |
| ] | |
| print( | |
| tabulate( | |
| top_aircraft, | |
| headers=["ICAO24", "Flights", "Primary Callsign", "Routes"], | |
| tablefmt="grid", | |
| showindex=False, | |
| ), | |
| ) | |
| if aircraft_type_stats is not None and not aircraft_type_stats.empty: | |
| print_section_header("TOP 10 AIRCRAFT TYPES") | |
| top_types = aircraft_type_stats.head(10)[ | |
| ["typecode", "model", "flight_count", "unique_aircraft", "airlines_using"] | |
| ] | |
| print( | |
| tabulate( | |
| top_types, | |
| headers=["Type", "Model", "Flights", "Aircraft", "Airlines"], | |
| tablefmt="grid", | |
| showindex=False, | |
| ), | |
| ) | |
| if route_type_stats is not None and not route_type_stats.empty: | |
| print_section_header("AIRCRAFT TYPES ON TOP ROUTES") | |
| # Group by route and show top types per route | |
| for route in route_type_stats["route_name"].unique()[:5]: | |
| route_data = route_type_stats[route_type_stats["route_name"] == route] | |
| if len(route_data) > 0: | |
| print(f"\n{route}:") | |
| types_summary = route_data.head(3)[["typecode", "flight_count"]] | |
| for _, row in types_summary.iterrows(): | |
| print(f" - {row['typecode']}: {row['flight_count']} flights") | |
| def parse_args() -> argparse.Namespace: | |
| parser = argparse.ArgumentParser( | |
| description="Analyze airport flight data from OpenSky Network", | |
| formatter_class=argparse.RawDescriptionHelpFormatter, | |
| epilog=""" | |
| Examples: | |
| # Analyze German airports (default) | |
| %(prog)s --days 7 | |
| # Analyze UK airports | |
| %(prog)s --country "United Kingdom" --airports EGLL EGKK EGCC | |
| # Analyze French airports | |
| %(prog)s --country France --airports LFPG LFPO LFML | |
| # Country not in mapping - use ICAO prefix directly | |
| %(prog)s --country "Romania (LR)" --airports LROP LRTR | |
| # Analyze specific German airports | |
| %(prog)s --airports EDDF EDDM | |
| # Clear cache and reload | |
| %(prog)s --clear-cache --days 7 | |
| """, | |
| ) | |
| default_end = datetime.now().date() | |
| default_start = default_end - timedelta(days=7) | |
| parser.add_argument( | |
| "--start", | |
| type=str, | |
| default=default_start.isoformat(), | |
| help=f"Start date YYYY-MM-DD (default: {default_start})", | |
| ) | |
| parser.add_argument( | |
| "--end", | |
| type=str, | |
| default=default_end.isoformat(), | |
| help=f"End date YYYY-MM-DD (default: {default_end})", | |
| ) | |
| parser.add_argument( | |
| "--days", | |
| type=int, | |
| help="Alternative: analyze last N days from today", | |
| ) | |
| parser.add_argument( | |
| "--airports", | |
| nargs="+", | |
| help="Specific airport codes (e.g., EDDF EDDM)", | |
| ) | |
| parser.add_argument( | |
| "--country", | |
| type=str, | |
| default="Germany", | |
| help="Country name or 'Name (XX)' for custom ICAO prefix", | |
| ) | |
| parser.add_argument( | |
| "--limit", | |
| type=int, | |
| help="Limit flights per airport (for testing)", | |
| ) | |
| parser.add_argument( | |
| "--force", | |
| action="store_true", | |
| help="Force refresh data (bypass cache)", | |
| ) | |
| parser.add_argument( | |
| "--clear-cache", | |
| action="store_true", | |
| help="Clear all cache files before running", | |
| ) | |
| parser.add_argument( | |
| "--cache-dir", | |
| type=Path, | |
| default=Path("./cache"), | |
| help="Cache directory (default: ./cache)", | |
| ) | |
| args = parser.parse_args() | |
| if args.days: | |
| args.end = datetime.now().date().isoformat() | |
| args.start = (datetime.now().date() - timedelta(days=args.days)).isoformat() | |
| # Parse country to determine ICAO prefix | |
| if "(" in args.country and args.country.endswith(")"): | |
| # Format: "Country Name (XX)" where XX is ICAO prefix | |
| args.country_name = args.country.split("(")[0].strip() | |
| args.icao_prefix = args.country.split("(")[1].rstrip(")").strip() | |
| elif args.country in COUNTRY_PREFIXES: | |
| # Known country - use mapping | |
| args.country_name = args.country | |
| args.icao_prefix = COUNTRY_PREFIXES[args.country] | |
| else: | |
| parser.error( | |
| f"Unknown country '{args.country}'. Use format 'Country (XX)' where XX is ICAO prefix, " | |
| f"or one of: {', '.join(COUNTRY_PREFIXES.keys())}" | |
| ) | |
| # Validate arguments for non-German countries | |
| if args.icao_prefix != "ED" and not args.airports: | |
| parser.error(f"--airports required when analyzing {args.country_name}") | |
| return args | |
| def build_airports_dict(icao_prefix, airports_list=None): | |
| """Build airports dictionary based on prefix.""" | |
| # Only return German airport names, otherwise empty dict | |
| # The SQL will use COALESCE to fall back to codes | |
| if icao_prefix == "ED" and not airports_list: | |
| return GERMAN_AIRPORTS | |
| else: | |
| return {} | |
| def main(): | |
| args = parse_args() | |
| # Clear cache if requested | |
| if args.clear_cache: | |
| cache_dir = Path(args.cache_dir) | |
| if cache_dir.exists(): | |
| print("Clearing cache...") | |
| for file in cache_dir.glob("*.parquet"): | |
| file.unlink() | |
| print(f" Removed {file.name}") | |
| for file in cache_dir.glob("*.json.gz"): | |
| file.unlink() | |
| print(f" Removed {file.name}") | |
| print("✓ Cache cleared") | |
| # Build dynamic title | |
| title = f"{args.country_name} Flight Routes Analysis" | |
| padding = (SECTION_HEADER_WIDTH - len(title)) // 2 | |
| print("╔" + "═" * SECTION_HEADER_WIDTH + "╗") | |
| print( | |
| f"║{' ' * padding}{title}{' ' * (SECTION_HEADER_WIDTH - len(title) - padding)}║" | |
| ) | |
| print("╚" + "═" * SECTION_HEADER_WIDTH + "╝") | |
| print() | |
| # Build airports dictionary | |
| airports_dict = build_airports_dict(args.icao_prefix, args.airports) | |
| # Initialize components with country configuration | |
| cache_manager = CacheManager(args.cache_dir) | |
| fetcher = FlightDataFetcher() | |
| analyzer = FlightAnalyzer(icao_prefix=args.icao_prefix, airports_dict=airports_dict) | |
| cache_path = cache_manager.get_cache_path(args.start, args.end, args.airports) | |
| # Check cache age - files older than CACHE_MAX_AGE_DAYS are considered stale | |
| use_cache = False | |
| if not args.force and cache_path.exists(): | |
| cache_age_days = ( | |
| datetime.now() - datetime.fromtimestamp(cache_path.stat().st_mtime) | |
| ).days | |
| if cache_age_days > CACHE_MAX_AGE_DAYS: | |
| print(f"⚠️ Cache is {cache_age_days} days old - refreshing...") | |
| else: | |
| use_cache = True | |
| if use_cache: | |
| cache_manager.load_cache(cache_path, analyzer.conn) | |
| table_name = "flights" | |
| else: | |
| print(f"Querying flights from {args.start} to {args.end}...") | |
| print( | |
| f"Airports: {args.airports or f'All {args.country_name} ({args.icao_prefix}*)'}" | |
| ) | |
| df = fetcher.fetch_all_airports( | |
| args.start, | |
| args.end, | |
| args.airports, | |
| args.limit, | |
| ) | |
| if df is None or df.empty: | |
| print("⚠️ No flights found for the specified criteria") | |
| return | |
| # Enrich with aircraft types before caching | |
| df = fetcher.enrich_with_aircraft_types(df) | |
| cache_manager.save_cache(df, cache_path) | |
| analyzer.conn.execute("CREATE OR REPLACE TABLE flights AS SELECT * FROM df") | |
| print(f"✓ Retrieved and cached {len(df):,} flights") | |
| table_name = "flights" | |
| analyzer.process_flights(table_name) | |
| route_stats = analyzer.analyze_routes() | |
| aircraft_stats = analyzer.analyze_aircraft() | |
| airline_stats = analyzer.analyze_airlines() | |
| aircraft_type_stats = analyzer.analyze_aircraft_types() | |
| route_type_stats = analyzer.analyze_route_aircraft_types() | |
| stats = analyzer.get_summary_stats() | |
| print_summary( | |
| stats, | |
| route_stats, | |
| aircraft_stats, | |
| airline_stats, | |
| aircraft_type_stats, | |
| route_type_stats, | |
| ) | |
| viz_data = analyzer.get_visualization_data() | |
| FlightVisualizer.create_visualizations(route_stats, airline_stats, viz_data) | |
| # Create aircraft type visualizations if data is available | |
| if aircraft_type_stats is not None and not aircraft_type_stats.empty: | |
| FlightVisualizer.create_aircraft_type_visualizations( | |
| aircraft_type_stats, route_type_stats | |
| ) | |
| print("\n✅ Analysis complete!") | |
| print("📂 Cache files are stored in ./cache/") | |
| if __name__ == "__main__": | |
| main() |
dnouri
commented
Sep 7, 2025
╔════════════════════════════════════════════════════════════╗
║ Germany Flight Routes Analysis ║
╚════════════════════════════════════════════════════════════╝
Connecting to OpenSky Network...
✓ Loaded 615,656 aircraft from ADS-B Exchange
✓ Loaded 1,402,717 flights from cache
Processing flight data...
✓ Processed 1,402,530 flights
============================================================
FLIGHT DATA SUMMARY
============================================================
📅 Period: 2023-12-31 to 2024-12-30
✈️ Total flights: 1,402,530
🛫 Unique aircraft (ICAO24): 13,078
📍 Unique routes: 14,343
🏢 Unique airlines: 1,735
🏠 Domestic flights: 12.3%
🌍 International flights: 87.7%
⏱️ Average flight duration: 136.1 minutes
============================================================
TOP 10 ROUTES
============================================================
+--------------------------------+-----------+----------------------+
| Route | Flights | Avg Duration (min) |
+================================+===========+======================+
| EGLL → Frankfurt | 5425 | 64.1 |
+--------------------------------+-----------+----------------------+
| Frankfurt → EGLL | 5384 | 77.2 |
+--------------------------------+-----------+----------------------+
| Munich → Hamburg | 4756 | 65.4 |
+--------------------------------+-----------+----------------------+
| Hamburg → Munich | 4734 | 63.6 |
+--------------------------------+-----------+----------------------+
| EGLL → Munich | 4647 | 82.1 |
+--------------------------------+-----------+----------------------+
| Frankfurt → Berlin Brandenburg | 4641 | 46.6 |
+--------------------------------+-----------+----------------------+
| Berlin Brandenburg → Frankfurt | 4638 | 47.7 |
+--------------------------------+-----------+----------------------+
| Munich → EGLL | 4584 | 96.9 |
+--------------------------------+-----------+----------------------+
| Düsseldorf → Munich | 4430 | 51.7 |
+--------------------------------+-----------+----------------------+
| Munich → Düsseldorf | 4405 | 52.2 |
+--------------------------------+-----------+----------------------+
============================================================
TOP 10 AIRLINES
============================================================
+------------------+-----------+----------+
| Airline | Flights | Routes |
+==================+===========+==========+
| Lufthansa | 391067 | 1418 |
+------------------+-----------+----------+
| Eurowings | 125502 | 1159 |
+------------------+-----------+----------+
| Ryanair | 58614 | 557 |
+------------------+-----------+----------+
| Other | 43365 | 197 |
+------------------+-----------+----------+
| Condor | 39169 | 560 |
+------------------+-----------+----------+
| Other | 36933 | 412 |
+------------------+-----------+----------+
| Other | 36333 | 305 |
+------------------+-----------+----------+
| KLM | 28546 | 77 |
+------------------+-----------+----------+
| Turkish Airlines | 24854 | 162 |
+------------------+-----------+----------+
| Austrian | 24838 | 54 |
+------------------+-----------+----------+
============================================================
TOP 5 BUSIEST AIRCRAFT
============================================================
+----------+-----------+--------------------+----------+
| ICAO24 | Flights | Primary Callsign | Routes |
+==========+===========+====================+==========+
| 3c4dd6 | 2337 | DLH2AN | 182 |
+----------+-----------+--------------------+----------+
| 3c4dd8 | 2335 | DLH8EJ | 196 |
+----------+-----------+--------------------+----------+
| 300781 | 2255 | DLA5PR | 113 |
+----------+-----------+--------------------+----------+
| 3c4dc4 | 2247 | DLH4MF | 199 |
+----------+-----------+--------------------+----------+
| 3c4dc1 | 2212 | DLH4RK | 184 |
+----------+-----------+--------------------+----------+
============================================================
TOP 10 AIRCRAFT TYPES
============================================================
+---------+---------------+-----------+------------+------------+
| Type | Model | Flights | Aircraft | Airlines |
+=========+===============+===========+============+============+
| Unknown | Unknown | 150812 | 2034 | 786 |
+---------+---------------+-----------+------------+------------+
| A320 | A320 214SL | 74069 | 80 | 16 |
+---------+---------------+-----------+------------+------------+
| A321 | A321 231 | 55623 | 75 | 9 |
+---------+---------------+-----------+------------+------------+
| A320 | | 54436 | 243 | 96 |
+---------+---------------+-----------+------------+------------+
| A320 | A320 214 | 53564 | 166 | 41 |
+---------+---------------+-----------+------------+------------+
| CRJ9 | CRJ 900 LR NG | 39807 | 24 | 10 |
+---------+---------------+-----------+------------+------------+
| A20N | | 34047 | 208 | 37 |
+---------+---------------+-----------+------------+------------+
| A319 | A319 112 | 33408 | 42 | 16 |
+---------+---------------+-----------+------------+------------+
| A319 | A319 114 | 32975 | 19 | 2 |
+---------+---------------+-----------+------------+------------+
| B738 | | 30432 | 289 | 75 |
+---------+---------------+-----------+------------+------------+
Implement flight pattern analysis for German airports
Analyzes 1.4 million flights (2024 data) across Germany's 13 major airports using
OpenSky Network and ADS-B Exchange databases to reveal aviation patterns and trends.
Key Insights Discovered:
• Lufthansa dominates with 391,067 flights (28% market share)
• International flights comprise 88% of German air traffic
• London-Frankfurt is busiest route with 10,809 annual flights
• Individual aircraft fly up to 2,337 times per year (6+ flights/day)
• Munich-Hamburg leads domestic routes with 9,490 flights
Aviation Intelligence Provided:
• Market share breakdown across 1,735 airlines
• Route frequency analysis for 14,343 unique routes
• Fleet composition from 13,078 individual aircraft
• Aircraft type distribution (Airbus A320 family dominates)
• Temporal patterns and flight duration statistics
Visualizations Generated:
• Airport activity heatmap showing Frankfurt as primary hub
• Domestic vs international traffic pie chart
• Top routes frequency chart with average durations
• Aircraft type distribution including manufacturer market share
• Fleet diversity metrics and size categories (wide/narrow/regional)
Data Sources:
• OpenSky Network: Real-time flight tracking via OAuth API
• ADS-B Exchange: 615,000 aircraft database for type identification
• Coverage: 90% aircraft type identification (improved from 85%)
Outputs flight statistics to console and saves high-resolution PNG charts
for aviation analysis, route planning, and market research purposes.