Last active
April 24, 2023 11:32
-
-
Save FranckPachot/828ea961d7f09b7cf5582a3e84faaf86 to your computer and use it in GitHub Desktop.
For the next time you hear that "Joins Don't Scale": Joining to a 7M rows table to return 20K rows with text search filters in 50 milliseconds on @yugabyteDB and that scales as the plan doesn't depend on the table size😎 SQL databases rocks 🚀
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
-- load names | |
\! wget -cO/var/tmp/baby-names.csv "https://github.com/hadley/data-baby-names/blob/master/baby-names.csv?raw=true" | |
drop table if exists baby_names; | |
create table baby_names ( year int , name text, percent float, sex text, primary key (name,year, sex)); | |
\copy baby_names from '/var/tmp/baby-names.csv' with ( skip 1, format csv ); | |
-- joins scale | |
drop table if exists users,messages cascade; | |
create table users ( | |
primary key (user_id) | |
, user_id bigint generated always as identity (cache 100) | |
, username text | |
); | |
create index users_username on users( upper(username) asc , user_id ); | |
create table messages ( | |
primary key (user_id, message_id) | |
, user_id bigint references users | |
, message_id bigint generated always as identity (cache 100) | |
, send_time timestamptz default now() | |
, message text | |
); | |
insert into users (username) select distinct name from baby_names; | |
create extension if not exists orafce; | |
prepare gen_messages(int) as | |
insert into messages (user_id, send_time, message) | |
select user_id , now() - generate_series * interval '1 minute' , dbms_random.string('p',100) | |
from users , generate_series(1,$1) | |
; | |
execute gen_messages(10); | |
analyze messages, users; | |
set yb_enable_optimizer_statistics to on; | |
set yb_bnl_batch_size to 1024; | |
prepare get_messages(text,int) as | |
select username, send_time from users join messages using(user_id) | |
where upper(username) like upper($1) | |
and send_time > ( now() - $2 * interval '1 days' ) | |
; | |
execute gen_messages(10); | |
execute gen_messages(100); | |
execute gen_messages(1000); | |
explain (analyze, costs off) | |
execute get_messages('Fra%',3); | |
analyze messages, users; | |
select relname, reltuples from pg_class where relname in ('users','messages'); |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment