Created
May 3, 2023 03:13
-
-
Save williamtsoi1/045e1a1fce4e77804073015b3d4ebbe9 to your computer and use it in GitHub Desktop.
BigQuery Slot Utilization (By Organization)
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
-- This script will look into your GCP organization's BigQuery usage and provide slot utilization by buckets and minutes. | |
-- The data gathered will be for the last month (720 hours prior to end_date). | |
-- Note that this script does not gather slot utilization from existing BigQuery reservations. This is only for on-demand slot utilization. | |
-- | |
-- Required permissions: | |
-- You need to have the bigquery.jobs.listAll permission for the GCP organization in order to run this query. | |
-- Details here: https://cloud.google.com/bigquery/docs/information-schema-jobs-timeline-by-organization#required_permissions | |
-- | |
-- Instructions: | |
-- 1. Modify the end_date variable to match the month period that you wish to analyze | |
-- 2. Change the table `region-US`.INFORMATION_SCHEMA.JOBS_TIMELINE_BY_ORGANIZATION to match the region that you wish to analyze. | |
-- For example, for Hong Kong region (asia-east2) the table should be `region-asia-east2`.INFORMATION_SCHEMA.JOBS_TIMELINE_BY_ORGANIZATION | |
-- | |
-- How to interpret the results: | |
-- The query will return results in this format | |
-- +--------+---------+ | |
-- | bucket | minutes | | |
-- +--------+---------+ | |
-- | 100.0 | 25810 | | |
-- | 200.0 | 7263 | | |
-- | 300.0 | 3397 | | |
-- | 400.0 | 2919 | | |
-- | ... | ... | | |
-- +--------+---------+ | |
-- | |
-- This result means that out of the previous 730 hours (total 43600 minutes), the organization used 0-100 slots in 25810 of these minutes, | |
-- the organization used 100-200 slots in 7263 of these minutes etc. | |
DECLARE end_date TIMESTAMP; | |
SET end_date = "2023-05-03"; | |
WITH | |
snapshot_data AS ( | |
SELECT | |
period_start, | |
period_slot_ms | |
FROM | |
`region-US`.INFORMATION_SCHEMA.JOBS_TIMELINE_BY_ORGANIZATION | |
WHERE | |
period_start BETWEEN TIMESTAMP_SUB(end_date, INTERVAL 730 HOUR) | |
AND TIMESTAMP(end_date) | |
AND (statement_type != "SCRIPT" | |
OR statement_type IS NULL) | |
AND period_slot_ms > 0 | |
-- for on demand usage | |
AND reservation_id IS NULL AND job_type = "QUERY" | |
), | |
data_by_time AS ( | |
SELECT | |
TIMESTAMP_TRUNC(period_start, SECOND ) AS usage_time_sec, | |
IFNULL(SUM(period_slot_ms) / 1000, 0) AS usage_slot_sec | |
FROM | |
snapshot_data | |
GROUP BY | |
usage_time_sec), | |
max_per_minute_data_by_time AS ( | |
SELECT | |
TIMESTAMP_TRUNC(usage_time_sec, MINUTE ) AS usage_time, | |
MAX(usage_slot_sec) AS max_slot_per_minute, | |
FROM | |
data_by_time | |
GROUP BY | |
usage_time ), | |
max_per_minute_data_by_time_with_zeros AS ( | |
SELECT | |
usage_time, | |
IFNULL(slot_usage.max_slot_per_minute,0) AS max_slot_per_minute | |
FROM | |
UNNEST(GENERATE_TIMESTAMP_ARRAY(TIMESTAMP_SUB(end_date, INTERVAL 730 HOUR), TIMESTAMP_SUB(end_date, INTERVAL 1 MINUTE), INTERVAL 1 MINUTE)) AS usage_time | |
LEFT JOIN ( | |
SELECT | |
* | |
FROM | |
max_per_minute_data_by_time ) AS slot_usage | |
USING | |
(usage_time)), | |
all_buckets AS ( | |
SELECT | |
GENERATE_ARRAY(0, MAX(max_slot_per_minute) + 100, 100) AS bucket | |
FROM | |
max_per_minute_data_by_time ) | |
SELECT | |
bucket, | |
COUNTIF(max_slot_per_minute <= bucket | |
AND max_slot_per_minute > bucket - 100) AS minutes, | |
FROM | |
max_per_minute_data_by_time_with_zeros, | |
UNNEST(( | |
SELECT | |
* | |
FROM | |
all_buckets)) AS bucket | |
GROUP BY | |
bucket |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment