Created
September 3, 2025 17:58
-
-
Save mesmacosta/308599458b5313e1d1bf10261c067654 to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| -- =================================================================================== | |
| -- Configuration Block: Adjust these parameters for your analysis | |
| -- =================================================================================== | |
| DECLARE reservation_admin_project_id STRING DEFAULT 'your-reservation-admin-project'; | |
| DECLARE region STRING DEFAULT 'us'; | |
| DECLARE start_timestamp TIMESTAMP DEFAULT TIMESTAMP_SUB(CURRENT_TIMESTAMP(), INTERVAL 7 DAY); | |
| DECLARE end_timestamp TIMESTAMP DEFAULT CURRENT_TIMESTAMP(); | |
| -- Note: Adjust the cost based on your BigQuery Edition and commitment level. | |
| -- Example: Enterprise Edition Pay-as-you-go is ~$0.06 per slot-hour. | |
| DECLARE cost_per_slot_hour FLOAT64 DEFAULT 0.06; | |
| -- =================================================================================== | |
| -- Query Logic: Do not modify below this line | |
| -- =================================================================================== | |
| WITH | |
| -- Step 1: Calculate the total number of slots UTILIZED by all jobs within each | |
| -- reservation, aggregated to a one-second granularity. This represents the | |
| -- actual work done. | |
| utilized_slots_per_second AS ( | |
| SELECT | |
| period_start, | |
| reservation_id, | |
| -- Sum slot-milliseconds from all jobs in the same second and convert to slots. | |
| SUM(period_slot_ms) / 1000 AS utilized_slots | |
| FROM | |
| -- Note: Use JOBS_TIMELINE_BY_PROJECT or JOBS_TIMELINE_BY_ORGANIZATION as needed. | |
| `region-us`.INFORMATION_SCHEMA.JOBS_TIMELINE | |
| WHERE | |
| job_creation_time BETWEEN start_timestamp AND end_timestamp | |
| AND period_start BETWEEN start_timestamp AND end_timestamp | |
| AND reservation_id IS NOT NULL | |
| GROUP BY | |
| 1, | |
| 2 | |
| ), | |
| -- Step 2: Calculate the total number of slots BILLED for each reservation at a | |
| -- one-second granularity. This is achieved by unnesting the per_second_details | |
| -- struct from the RESERVATIONS_TIMELINE view. | |
| billed_slots_per_second AS ( | |
| SELECT | |
| details.start_time AS period_start, | |
| res.reservation_id, | |
| -- Baseline slots are a fixed cost, not part of autoscaler waste, but are included | |
| -- for a complete picture of total provisioned capacity. | |
| details.slots_assigned AS billed_baseline_slots, | |
| -- These are the slots added by the autoscaler. This is the capacity we are | |
| -- analyzing for waste. | |
| details.autoscale_current_slots AS billed_autoscaled_slots | |
| FROM | |
| `your-reservation-admin-project.region-us`.INFORMATION_SCHEMA.RESERVATIONS_TIMELINE AS res, | |
| UNNEST(per_second_details) AS details | |
| WHERE | |
| res.period_start BETWEEN TIMESTAMP_TRUNC(start_timestamp, MINUTE) AND end_timestamp | |
| ), | |
| -- Step 3: Join the utilized and billed data. A FULL OUTER JOIN is crucial to | |
| -- capture seconds where slots were billed but had zero utilization (pure waste). | |
| waste_calculation_per_second AS ( | |
| SELECT | |
| COALESCE(u.period_start, b.period_start) AS period_start, | |
| COALESCE(u.reservation_id, b.reservation_id) AS reservation_id, | |
| IFNULL(u.utilized_slots, 0) AS utilized_slots, | |
| IFNULL(b.billed_baseline_slots, 0) AS billed_baseline_slots, | |
| IFNULL(b.billed_autoscaled_slots, 0) AS billed_autoscaled_slots, | |
| -- Calculate total billed capacity for context. | |
| IFNULL(b.billed_baseline_slots, 0) + IFNULL(b.billed_autoscaled_slots, 0) AS total_billed_slots | |
| FROM | |
| utilized_slots_per_second AS u | |
| FULL OUTER JOIN billed_slots_per_second AS b | |
| ON u.period_start = b.period_start AND u.reservation_id = b.reservation_id | |
| ) | |
| -- Step 4: Final aggregation to summarize waste over the entire analysis period | |
| -- for each reservation. | |
| SELECT | |
| reservation_id, | |
| -- Total utilized slot-hours: The sum of all work done by jobs. | |
| ROUND(SUM(utilized_slots) / 3600, 2) AS total_utilized_slot_hours, | |
| -- Total billed autoscaled slot-hours: The capacity added and billed by the autoscaler. | |
| ROUND(SUM(billed_autoscaled_slots) / 3600, 2) AS total_billed_autoscaled_slot_hours, | |
| -- Total wasted autoscaled slot-hours: The core metric. This is the billed autoscaled | |
| -- capacity that was not used by any job. | |
| ROUND( | |
| SUM( | |
| -- We only count waste against the autoscaled portion. If utilization exceeds | |
| -- the autoscaled slots, it means baseline slots are being used, which is not waste. | |
| GREATEST(0, billed_autoscaled_slots - GREATEST(0, utilized_slots - billed_baseline_slots)) | |
| ) / 3600, | |
| 2 | |
| ) AS total_wasted_autoscaled_slot_hours, | |
| -- Waste Percentage: The proportion of billed autoscaled capacity that was wasted. | |
| -- This is a key performance indicator (KPI) for autoscaler efficiency. | |
| SAFE_DIVIDE( | |
| SUM( | |
| GREATEST(0, billed_autoscaled_slots - GREATEST(0, utilized_slots - billed_baseline_slots)) | |
| ), | |
| SUM(billed_autoscaled_slots) | |
| ) AS waste_percentage, | |
| -- Potential Cost Savings: The financial impact of the calculated waste. | |
| ROUND( | |
| SUM( | |
| GREATEST(0, billed_autoscaled_slots - GREATEST(0, utilized_slots - billed_baseline_slots)) | |
| ) / 3600 * cost_per_slot_hour, | |
| 2 | |
| ) AS potential_cost_savings_usd | |
| FROM | |
| waste_calculation_per_second | |
| WHERE | |
| reservation_id IS NOT NULL | |
| GROUP BY | |
| 1 | |
| ORDER BY | |
| potential_cost_savings_usd DESC; |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment