Created
November 12, 2024 16:50
-
-
Save FranckPachot/7b978f9aa7dc013e989b454be015d078 to your computer and use it in GitHub Desktop.
IndexingWorkshop-DSS2024.sql
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# start a YugabyteDB node | |
docker run -d --name yb yugabytedb/yugabyte:2.23.0.0-b710 \ | |
yugabyted start --enable_pg_parity_tech_preview --background=false | |
# start PostgreSQL client | |
docker run --rm -it --link yb:yb \ | |
-e PGHOST=yb -e PGPORT=5433 -e PGUSER=yugabyte postgres psql | |
select version(); | |
/* | |
Start here on arm | |
*/ | |
set client_min_messages to error; | |
\pset pager off | |
select version(); | |
drop table if exists large_table ; | |
create extension if not exists pgcrypto; | |
create table large_table ( | |
id uuid primary key default gen_random_uuid() | |
,col1 int not null | |
,col2 int not null | |
,col3 text | |
,col4 int not null | |
,col5 text | |
,col6 text | |
); | |
insert into large_table (col1, col2, col3, col4, col5, col6) | |
select | |
(random() * 3)::int, | |
(random() * 50)::int, | |
md5(random()::text), | |
(random() * 5)::int, | |
md5(random()::text), | |
md5(random()::text) | |
from generate_series(1, 1000000); | |
analyze large_table ; | |
prepare query as | |
select col4, col5, col3, col6 | |
from large_table | |
where col1 = $1 | |
and col2 between $2 and $3 | |
and col4 in ( 1, 3, 5 ) | |
order by col2 asc, col3 desc | |
limit 10; | |
execute query(1,24,42); | |
explain (analyze, costs off, dist , summary off) | |
execute query(1,24,42); | |
-- Concepts | |
create index on large_table (col1 asc); | |
explain (analyze, costs off, summary off) | |
execute query(1,24,42); | |
deallocate query; | |
prepare query as /*+ noseqscan(large_table) */ | |
select col4, col5, col3, col6 | |
from large_table | |
where col1 = $1 | |
and col2 between $2 and $3 | |
and col4 in ( 1, 3, 5 ) | |
order by col2 asc, col3 desc | |
limit 10; | |
explain (analyze, costs off, summary off, dist) | |
execute query(1,24,42); | |
create index on large_table (col1 asc, col2 asc); | |
explain (analyze, costs on, summary off, dist) | |
execute query(1,24,42); | |
create index on large_table (col1 asc, col2 asc, col3 desc); | |
explain (analyze, costs off, summary off, dist) | |
execute query(1,24,42); | |
create index on large_table (col1 asc, col2 asc, col3 desc) include (col4); | |
explain (analyze, costs off, summary off, dist) | |
execute query(1,24,42); | |
explain (analyze, costs on, summary off, dist, verbose) | |
execute query(1,24,42); | |
create index on large_table (col1 asc, col2 asc, col3 desc) include (col4, col5, col6); | |
explain (analyze, costs off, summary off, dist, verbose) | |
execute query(1,24,42); | |
-- Sounds too good? | |
\d large_table | |
explain (analyze, costs off, summary on, dist) | |
update large_table set col1=4 where col1=2 and col2=1; | |
explain (analyze, costs off, summary on, dist) | |
update large_table set col4=4 where col1=2 and col2=2; | |
drop index large_table_col1_col2_col3_col4_col5_col6_idx; | |
drop index large_table_col1_col2_col3_col4_idx; | |
drop index large_table_col1_col2_idx; | |
drop index large_table_col1_col2_col3_idx; | |
-- Trade-offs | |
create index on large_table (col1 asc, col4 asc, col2 asc, col3 asc ) include (col5, col6); | |
explain (analyze, costs off, summary off, dist, verbose) | |
execute query(1,24,42); | |
explain (analyze, dist, costs off, summary off) | |
select col4, col5, col3, col6 | |
from large_table | |
where col1 = 1 | |
and col2 between 24 and 42 | |
and col4 in ( 1, 3, 5 ) | |
order by col4 | |
limit 10; | |
create index on large_table (col1 asc, col2 asc, col3 desc) | |
include (col4, col5, col6) | |
where col4 in (1,3,5); | |
explain (analyze, costs on, summary off, dist, verbose) | |
execute query(1,24,42); | |
--> What about the query planner? | |
select attname, n_distinct, most_common_vals, most_common_freqs | |
from pg_stats where tablename ='large_table' and attname in ('col1','col4'); | |
select indexrelname,idx_scan,idx_tup_read,idx_tup_fetch from pg_stat_user_indexes | |
where relname ='large_table'; | |
create statistics col5_col6 on col5, col6 from large_table; | |
create statistics col1_col2_col4 on col1, col2, col3, col4 from large_table; | |
analyze large_table; | |
select stxname, stxkeys, stxddependencies, stxdndistinct | |
from pg_statistic_ext join pg_statistic_ext_data on (oid = stxoid); | |
-------------------------------------------------------------------------------------------------------------------- | |
-- A complete Example | |
--drop schema if exists demo cascade; | |
create schema if not exists demo; | |
create table demo.events ( | |
id uuid not null, | |
system_id uuid not null, | |
event_type varchar(100) not null, | |
created_at timestamptz not null, | |
updated_at timestamptz not null, | |
state varchar(100) not null, | |
customer_id uuid, | |
device uuid, | |
primary key (id) | |
); | |
create index events_system_id on demo.events | |
(system_id); | |
-- execution plan | |
\c | |
\d demo.events | |
select pg_get_indexdef('demo.events_system_id'::regclass::oid); | |
explain (costs off) | |
/*+ indexscan(events events_system_id) */ | |
select * | |
from demo.events | |
where events.system_id = '0188e8d7-dead-73c1-b218-7cf069921b5b' | |
and ( | |
events.event_type in ( | |
'onboarding', | |
'monitoring', | |
'transferring' | |
) | |
and | |
events.state in ( | |
'event_not_started', | |
'event_in_progress', | |
'event_monitoring_not_started' | |
) | |
) | |
order by events.created_at asc limit 100; | |
-- testing with data | |
create extension if not exists pgcrypto; | |
--- use non-transactional writes for fast bulk load in yugabytedb | |
set yb_disable_transactional_writes=on; | |
insert into demo.events (id, system_id, event_type, created_at, updated_at, state, customer_id, device) | |
select | |
gen_random_uuid(), -- id | |
case when random()>0.8 then '0188e8d7-dead-73c1-b218-7cf069921b5b' else gen_random_uuid() end, -- system_id | |
(array['onboarding', 'monitoring', 'transferring', 'maintenance', 'upgrade'])[floor(random() * 5 + 1)], -- event_type | |
now() - interval '1 year' * random(), -- created_at (random time within the last year) | |
now() - interval '1 year' * random(), -- updated_at (random time within the last year) | |
(array['event_not_started', 'event_in_progress', 'event_monitoring_not_started', 'event_monitoring_in_progress', 'event_payment_screening_not_started', 'event_payment_screening_in_progress', 'event_transferring_not_started', 'event_transferring_in_progress', 'event_completed', 'event_cancelled'])[floor(random() * 10 + 1)], -- state | |
gen_random_uuid(), -- customer_id | |
gen_random_uuid() -- device | |
from generate_series(1, 1000000); | |
--- reset the transactional writes to be fully acid | |
set yb_disable_transactional_writes=off; | |
analyze demo.events; | |
-- explain analyze | |
explain (analyze, dist, verbose, costs off) | |
/*+ indexscan(events events_system_id) */ | |
select * | |
from demo.events | |
where events.system_id = '0188e8d7-dead-73c1-b218-7cf069921b5b' | |
and ( | |
events.event_type in ( | |
'onboarding', | |
'monitoring', | |
'transferring' | |
) | |
and | |
events.state in ( | |
'event_not_started', | |
'event_in_progress', | |
'event_monitoring_not_started' | |
) | |
) | |
order by events.created_at asc limit 100; | |
-- access by single "system_id" and multi-value filter on "state" and "event_type" by query xyz | |
create index i1_ise on demo.events (system_id, state, event_type) | |
--; | |
select pg_get_indexdef('demo.i1_ise'::regclass::oid); | |
explain (analyze, dist, verbose, costs off) | |
/*+ indexscan(events i1_ise ) */ | |
select * | |
from demo.events | |
where events.system_id = '0188e8d7-dead-73c1-b218-7cf069921b5b' | |
and ( | |
events.event_type in ( | |
'onboarding', | |
'monitoring', | |
'transferring' | |
) | |
and | |
events.state in ( | |
'event_not_started', | |
'event_in_progress', | |
'event_monitoring_not_started' | |
) | |
) | |
order by events.created_at asc limit 100; | |
-- index condition: access, skip scan, and filter | |
explain (analyze, dist, debug, verbose, costs off, summary off) | |
/*+ indexscan(events i1_ise ) */ | |
select count(*) from demo.events | |
where events.system_id = '0188e8d7-dead-73c1-b218-7cf069921b5b'; | |
explain (analyze, dist, debug, verbose, costs off, summary off) | |
/*+ indexscan(events i1_ise ) */ | |
select count(*) from demo.events | |
where events.system_id = '0188e8d7-dead-73c1-b218-7cf069921b5b' | |
and ( | |
events.event_type in ( | |
'onboarding', | |
'monitoring', | |
'transferring' | |
) | |
and | |
events.state in ( | |
'event_not_started', | |
'event_in_progress', | |
'event_monitoring_not_started' | |
) | |
) | |
; | |
select | |
case when | |
events.system_id = '0188e8d7-dead-73c1-b218-7cf069921b5b' | |
and ( | |
events.event_type in ( 'onboarding', 'monitoring', 'transferring') | |
and | |
events.state in ( 'event_not_started', 'event_in_progress', 'event_monitoring_not_started') | |
) then '✅' else '❌' end keep | |
, system_id, state, event_type | |
,count(*) | |
from demo.events | |
where system_id = '0188e8d7-dead-73c1-b218-7cf069921b5b' | |
group by system_id, state, event_type | |
order by system_id, state, event_type | |
; | |
-- partial index | |
create index i1_i_partial on demo.events (system_id) | |
where ( | |
events.event_type in ( | |
'onboarding', | |
'monitoring', | |
'transferring' | |
) | |
and | |
events.state in ( | |
'event_not_started', | |
'event_in_progress', | |
'event_monitoring_not_started' | |
) | |
) | |
--; | |
select pg_get_indexdef('demo.i1_i_partial'::regclass::oid); | |
explain (analyze, dist, debug, verbose, costs off, summary off) | |
/*+ indexscan(events i1_i_partial ) */ | |
select count(*) from demo.events | |
where events.system_id = '0188e8d7-dead-73c1-b218-7cf069921b5b' | |
and ( | |
events.event_type in ( | |
'onboarding', | |
'monitoring', | |
'transferring' | |
) | |
and | |
events.state in ( | |
'event_not_started', | |
'event_in_progress', | |
'event_monitoring_not_started' | |
) | |
) | |
; | |
explain (analyze, dist, verbose, costs off) | |
/*+ indexscan(events i1_i_partial ) */ | |
select * | |
from demo.events | |
where events.system_id = '0188e8d7-dead-73c1-b218-7cf069921b5b' | |
and ( | |
events.event_type in ( | |
'onboarding', | |
'monitoring', | |
'transferring' | |
) | |
and | |
events.state in ( | |
'event_not_started', | |
'event_in_progress', | |
'event_monitoring_not_started' | |
) | |
) | |
order by events.created_at asc limit 100; | |
-- fetch size | |
\dconfig yb_fetch* | |
set yb_fetch_row_limit = 100000; | |
explain (analyze, dist, verbose, costs off) | |
/*+ indexscan(events i1_i_partial ) */ | |
select * | |
from demo.events | |
where events.system_id = '0188e8d7-dead-73c1-b218-7cf069921b5b' | |
and ( | |
events.event_type in ( | |
'onboarding', | |
'monitoring', | |
'transferring' | |
) | |
and | |
events.state in ( | |
'event_not_started', | |
'event_in_progress', | |
'event_monitoring_not_started' | |
) | |
) | |
order by events.created_at asc limit 100; | |
-- order-preserving index | |
create index i1_icse | |
on demo.events (system_id, created_at, state, event_type) | |
--; | |
select pg_get_indexdef('demo.i1_icse'::regclass::oid); | |
explain (analyze, dist, verbose, costs off) | |
/*+ indexscan(events i1_icse ) */ | |
select * | |
from demo.events | |
where events.system_id = '0188e8d7-dead-73c1-b218-7cf069921b5b' | |
and ( | |
events.event_type in ( | |
'onboarding', | |
'monitoring', | |
'transferring' | |
) | |
and | |
events.state in ( | |
'event_not_started', | |
'event_in_progress', | |
'event_monitoring_not_started' | |
) | |
) | |
order by events.created_at asc limit 100; | |
-- index only scan | |
create index i1_icse_covering | |
on demo.events (system_id, created_at, state, event_type) | |
include (id, updated_at, customer_id, device) | |
--; | |
select pg_get_indexdef('demo.i1_icse_covering'::regclass::oid); | |
explain (analyze, dist, debug, verbose, costs off) | |
/*+ indexscan(events i1_icse_covering ) */ | |
select * | |
from demo.events | |
where events.system_id = '0188e8d7-dead-73c1-b218-7cf069921b5b' | |
and ( | |
events.event_type in ( | |
'onboarding', | |
'monitoring', | |
'transferring' | |
) | |
and | |
events.state in ( | |
'event_not_started', | |
'event_in_progress', | |
'event_monitoring_not_started' | |
) | |
) | |
order by events.created_at asc limit 100; | |
-- the best index for this query | |
create index i1_icse_best on demo.events (system_id, created_at) include (id, updated_at, customer_id, device, event_type, state) | |
where ( | |
events.event_type in ( | |
'onboarding', | |
'monitoring', | |
'transferring' | |
) | |
and | |
events.state in ( | |
'event_not_started', | |
'event_in_progress', | |
'event_monitoring_not_started' | |
) | |
) | |
--; | |
select pg_get_indexdef('demo.i1_icse_best'::regclass::oid); | |
explain (analyze, dist, debug, verbose, costs off) | |
/*+ indexscan(events i1_icse_best ) */ | |
select * | |
from demo.events | |
where events.system_id = '0188e8d7-dead-73c1-b218-7cf069921b5b' | |
and ( | |
events.event_type in ( | |
'onboarding', | |
'monitoring', | |
'transferring' | |
) | |
and | |
events.state in ( | |
'event_not_started', | |
'event_in_progress', | |
'event_monitoring_not_started' | |
) | |
) | |
order by events.created_at asc limit 100; | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment