Skip to content

Instantly share code, notes, and snippets.

@dsaiztc
Forked from iconara/queries.sql
Last active July 2, 2019 15:51
Show Gist options
  • Save dsaiztc/7a10912c4959c8782c1c94a3de5aac74 to your computer and use it in GitHub Desktop.
Save dsaiztc/7a10912c4959c8782c1c94a3de5aac74 to your computer and use it in GitHub Desktop.
Low level Redshift cheat sheet
-- Table information like sortkeys, unsorted percentage
-- see http://docs.aws.amazon.com/redshift/latest/dg/r_SVV_TABLE_INFO.html
SELECT * FROM svv_table_info;
-- Table sizes in GB
SELECT t.name, COUNT(tbl) / 1000.0 AS gb
FROM (
SELECT DISTINCT datname, id, name
FROM stv_tbl_perm
JOIN pg_database ON pg_database.oid = db_id
) AS t
JOIN stv_blocklist ON tbl = t.id
GROUP BY t.name ORDER BY gb DESC;
-- Table column metadata
SELECT * FROM pg_table_def
WHERE schemaname = 'public'
AND tablename = …;
-- Vacuum progress
-- see http://docs.aws.amazon.com/redshift/latest/dg/r_SVV_VACUUM_PROGRESS.html
SELECT * FROM svv_vacuum_progress;
-- The size in MB of each column of each table (actually the number of blocks, but blocks are 1 MB)
-- see http://stackoverflow.com/questions/33388587/how-can-i-find-out-the-size-of-each-column-in-a-redshift-table
SELECT
TRIM(name) as table_name,
TRIM(pg_attribute.attname) AS column_name,
COUNT(1) AS size
FROM
svv_diskusage JOIN pg_attribute ON
svv_diskusage.col = pg_attribute.attnum-1 AND
svv_diskusage.tbl = pg_attribute.attrelid
GROUP BY 1, 2
ORDER BY 1, 2;
-- List users and groups
SELECT * FROM pg_user;
SELECT * FROM pg_group;
-- List all databases
SELECT * FROM pg_database;
-- List the 100 last load errors
-- see http://docs.aws.amazon.com/redshift/latest/dg/r_STL_LOAD_ERRORS.html
SELECT *
FROM stl_load_errors
ORDER BY starttime DESC
LIMIT 100;
-- Convert a millisecond resolution number to a TIMESTAMP
SELECT TIMESTAMP 'epoch' + (millisecond_timestamp/1000 * INTERVAL '1 second') FROM …;
-- Get the full SQL from a query ID
SELECT LISTAGG(text) WITHIN GROUP (ORDER BY sequence) AS sql
FROM STL_QUERYTEXT
WHERE query = …;
-- Get the full SQL, plus more query details from a query ID
-- filter on xid to see all (including Redshift internal) operations in the transaction
WITH query_sql AS (
SELECT
query,
LISTAGG(text) WITHIN GROUP (ORDER BY sequence) AS sql
FROM stl_querytext
GROUP BY 1
)
SELECT
q.query,
userid,
xid,
pid,
starttime,
endtime,
DATEDIFF(milliseconds, starttime, endtime)/1000.0 AS duration,
TRIM(database) AS database,
(CASE aborted WHEN 1 THEN TRUE ELSE FALSE END) AS aborted,
sql
FROM
stl_query q JOIN query_sql qs ON (q.query = qs.query)
WHERE
q.query = …
ORDER BY starttime;
-- Show the most recently executed DDL statements
SELECT
starttime,
xid,
LISTAGG(text) WITHIN GROUP (ORDER BY sequence) AS sql
FROM stl_ddltext
GROUP BY 1, 2
ORDER BY 1 DESC;
-- Query duration stats per database, user and query group; including the max, median, 99 percentile, etc.
-- Change which duration to use (queue, exec or total) by commenting out the right lines below
WITH
durations1 AS (
SELECT
TRIM("database") AS db,
TRIM(u.usename) AS "user",
TRIM(label) AS query_group,
DATE_TRUNC('day', starttime) AS day,
-- total_queue_time/1000000.0 AS duration,
-- total_exec_time/1000000.0 AS duration,
(total_queue_time + total_exec_time)/1000000.0 AS duration
FROM stl_query q, stl_wlm_query w, pg_user u
WHERE q.query = w.query
AND q.userid = u.usesysid
AND aborted = 0
),
durations2 AS (
SELECT
db,
"user",
query_group,
day,
duration,
PERCENTILE_CONT(0.50) WITHIN GROUP (ORDER BY duration) OVER (PARTITION BY db, "user", query_group, day) AS median,
PERCENTILE_CONT(0.75) WITHIN GROUP (ORDER BY duration) OVER (PARTITION BY db, "user", query_group, day) AS p75,
PERCENTILE_CONT(0.90) WITHIN GROUP (ORDER BY duration) OVER (PARTITION BY db, "user", query_group, day) AS p90,
PERCENTILE_CONT(0.95) WITHIN GROUP (ORDER BY duration) OVER (PARTITION BY db, "user", query_group, day) AS p95,
PERCENTILE_CONT(0.99) WITHIN GROUP (ORDER BY duration) OVER (PARTITION BY db, "user", query_group, day) AS p99,
PERCENTILE_CONT(0.999) WITHIN GROUP (ORDER BY duration) OVER (PARTITION BY db, "user", query_group, day) AS p999
FROM durations1
)
SELECT
db,
"user",
query_group,
day,
MIN(duration) AS min,
AVG(duration) AS avg,
MAX(median) AS median,
MAX(p75) AS p75,
MAX(p90) AS p90,
MAX(p95) AS p95,
MAX(p99) AS p99,
MAX(p999) AS p999,
MAX(duration) AS max
FROM durations2
GROUP BY 1, 2, 3, 4
ORDER BY 1, 2, 3, 4;
-- Currently executing and recently executed queries with status, duration, database, etc.
SELECT
r.pid,
TRIM(status) AS status,
TRIM(db_name) AS db,
TRIM(user_name) AS "user",
TRIM(label) AS query_group,
r.starttime AS start_time,
r.duration,
r.query AS sql
FROM stv_recents r LEFT JOIN stv_inflight i ON r.pid = i.pid;
-- Show the remote host and port of running queries
SELECT
recents.pid,
TRIM(db_name) AS db,
TRIM(user_name) AS "user",
TRIM(label) AS query_group,
recents.starttime AS start_time,
recents.duration,
recents.query AS sql,
TRIM(remotehost) AS remote_host,
TRIM(remoteport) AS remote_port
FROM stv_recents recents
LEFT JOIN stl_connection_log connections ON (recents.pid = connections.pid)
LEFT JOIN stv_inflight inflight ON recents.pid = inflight.pid
WHERE TRIM(status) = 'Running'
AND event = 'initiating session';
-- Show permissions
WITH
users AS (
SELECT usename AS user_name FROM pg_user
),
objects AS (
SELECT
schemaname AS schema_name,
'table' AS object_type,
tablename AS object_name,
schemaname + '.' + tablename AS full_object_name
FROM pg_tables
WHERE schemaname NOT IN ('pg_internal')
UNION
SELECT
schemaname AS schema_name,
'view' AS object_type,
viewname AS object_name,
schemaname + '.' + viewname AS full_object_name
FROM pg_views
WHERE schemaname NOT IN ('pg_internal')
)
SELECT
schema_name,
object_name,
object_type,
user_name,
HAS_TABLE_PRIVILEGE(users.user_name, full_object_name, 'select') AS "select",
HAS_TABLE_PRIVILEGE(users.user_name, full_object_name, 'insert') AS "insert",
HAS_TABLE_PRIVILEGE(users.user_name, full_object_name, 'update') AS "update",
HAS_TABLE_PRIVILEGE(users.user_name, full_object_name, 'delete') AS "delete",
HAS_TABLE_PRIVILEGE(users.user_name, full_object_name, 'references') AS "references"
FROM users, objects
ORDER BY full_object_name;
-- Find tables that need vacuum or analyze
SELECT "database", "schema", "table", unsorted, stats_off
FROM svv_table_info
WHERE unsorted > 20
OR stats_off > 20
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment