Skip to content

Instantly share code, notes, and snippets.

@mesmacosta
Created September 3, 2025 17:58
Show Gist options
  • Select an option

  • Save mesmacosta/308599458b5313e1d1bf10261c067654 to your computer and use it in GitHub Desktop.

Select an option

Save mesmacosta/308599458b5313e1d1bf10261c067654 to your computer and use it in GitHub Desktop.
-- ===================================================================================
-- Configuration Block: Adjust these parameters for your analysis
-- ===================================================================================
DECLARE reservation_admin_project_id STRING DEFAULT 'your-reservation-admin-project';
DECLARE region STRING DEFAULT 'us';
DECLARE start_timestamp TIMESTAMP DEFAULT TIMESTAMP_SUB(CURRENT_TIMESTAMP(), INTERVAL 7 DAY);
DECLARE end_timestamp TIMESTAMP DEFAULT CURRENT_TIMESTAMP();
-- Note: Adjust the cost based on your BigQuery Edition and commitment level.
-- Example: Enterprise Edition Pay-as-you-go is ~$0.06 per slot-hour.
DECLARE cost_per_slot_hour FLOAT64 DEFAULT 0.06;
-- ===================================================================================
-- Query Logic: Do not modify below this line
-- ===================================================================================
WITH
-- Step 1: Calculate the total number of slots UTILIZED by all jobs within each
-- reservation, aggregated to a one-second granularity. This represents the
-- actual work done.
utilized_slots_per_second AS (
SELECT
period_start,
reservation_id,
-- Sum slot-milliseconds from all jobs in the same second and convert to slots.
SUM(period_slot_ms) / 1000 AS utilized_slots
FROM
-- Note: Use JOBS_TIMELINE_BY_PROJECT or JOBS_TIMELINE_BY_ORGANIZATION as needed.
`region-us`.INFORMATION_SCHEMA.JOBS_TIMELINE
WHERE
job_creation_time BETWEEN start_timestamp AND end_timestamp
AND period_start BETWEEN start_timestamp AND end_timestamp
AND reservation_id IS NOT NULL
GROUP BY
1,
2
),
-- Step 2: Calculate the total number of slots BILLED for each reservation at a
-- one-second granularity. This is achieved by unnesting the per_second_details
-- struct from the RESERVATIONS_TIMELINE view.
billed_slots_per_second AS (
SELECT
details.start_time AS period_start,
res.reservation_id,
-- Baseline slots are a fixed cost, not part of autoscaler waste, but are included
-- for a complete picture of total provisioned capacity.
details.slots_assigned AS billed_baseline_slots,
-- These are the slots added by the autoscaler. This is the capacity we are
-- analyzing for waste.
details.autoscale_current_slots AS billed_autoscaled_slots
FROM
`your-reservation-admin-project.region-us`.INFORMATION_SCHEMA.RESERVATIONS_TIMELINE AS res,
UNNEST(per_second_details) AS details
WHERE
res.period_start BETWEEN TIMESTAMP_TRUNC(start_timestamp, MINUTE) AND end_timestamp
),
-- Step 3: Join the utilized and billed data. A FULL OUTER JOIN is crucial to
-- capture seconds where slots were billed but had zero utilization (pure waste).
waste_calculation_per_second AS (
SELECT
COALESCE(u.period_start, b.period_start) AS period_start,
COALESCE(u.reservation_id, b.reservation_id) AS reservation_id,
IFNULL(u.utilized_slots, 0) AS utilized_slots,
IFNULL(b.billed_baseline_slots, 0) AS billed_baseline_slots,
IFNULL(b.billed_autoscaled_slots, 0) AS billed_autoscaled_slots,
-- Calculate total billed capacity for context.
IFNULL(b.billed_baseline_slots, 0) + IFNULL(b.billed_autoscaled_slots, 0) AS total_billed_slots
FROM
utilized_slots_per_second AS u
FULL OUTER JOIN billed_slots_per_second AS b
ON u.period_start = b.period_start AND u.reservation_id = b.reservation_id
)
-- Step 4: Final aggregation to summarize waste over the entire analysis period
-- for each reservation.
SELECT
reservation_id,
-- Total utilized slot-hours: The sum of all work done by jobs.
ROUND(SUM(utilized_slots) / 3600, 2) AS total_utilized_slot_hours,
-- Total billed autoscaled slot-hours: The capacity added and billed by the autoscaler.
ROUND(SUM(billed_autoscaled_slots) / 3600, 2) AS total_billed_autoscaled_slot_hours,
-- Total wasted autoscaled slot-hours: The core metric. This is the billed autoscaled
-- capacity that was not used by any job.
ROUND(
SUM(
-- We only count waste against the autoscaled portion. If utilization exceeds
-- the autoscaled slots, it means baseline slots are being used, which is not waste.
GREATEST(0, billed_autoscaled_slots - GREATEST(0, utilized_slots - billed_baseline_slots))
) / 3600,
2
) AS total_wasted_autoscaled_slot_hours,
-- Waste Percentage: The proportion of billed autoscaled capacity that was wasted.
-- This is a key performance indicator (KPI) for autoscaler efficiency.
SAFE_DIVIDE(
SUM(
GREATEST(0, billed_autoscaled_slots - GREATEST(0, utilized_slots - billed_baseline_slots))
),
SUM(billed_autoscaled_slots)
) AS waste_percentage,
-- Potential Cost Savings: The financial impact of the calculated waste.
ROUND(
SUM(
GREATEST(0, billed_autoscaled_slots - GREATEST(0, utilized_slots - billed_baseline_slots))
) / 3600 * cost_per_slot_hour,
2
) AS potential_cost_savings_usd
FROM
waste_calculation_per_second
WHERE
reservation_id IS NOT NULL
GROUP BY
1
ORDER BY
potential_cost_savings_usd DESC;
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment