Last active
March 11, 2025 20:30
-
-
Save ppKrauss/226fd3fb0dca0a6c6d4f875cb93aa2db to your computer and use it in GitHub Desktop.
BitString vs ltree benchmark
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
-- for https://dba.stackexchange.com/q/345559/90651 | |
------------------ | |
CREATE EXTENSION IF NOT EXISTS ltree; | |
DROP SCHEMA IF EXISTS treetest CASCADE; | |
CREATE SCHEMA treetest; | |
CREATE FUNCTION treetest.hiddenBig_to_vBit(x bigint) RETURNS varbit AS $f$ | |
SELECT substring( x_bin from 1+position(B'1' in x_bin) ) | |
FROM (select x::bit(64)) t(x_bin) -- WHERE $1>7 AND $1<4611686018427387904 | |
$f$ LANGUAGE SQL IMMUTABLE PARALLEL SAFE; | |
COMMENT ON FUNCTION treetest.hiddenBig_to_vBit(bigint) | |
IS 'Decodes hidden_bit Bigint bitStrings (to proper Varbit bitStrings).' | |
; | |
CREATE FUNCTION treetest.vBit_to_hiddenBig(x varbit) RETURNS bigint AS $f$ | |
SELECT overlay( b'0'::bit(64) PLACING (b'1' || x) FROM 64-length(x) )::bigint | |
$f$ LANGUAGE SQL IMMUTABLE PARALLEL SAFE; | |
COMMENT ON FUNCTION treetest.vbit_to_hiddenbig(varbit) | |
IS 'Encodes proper VarBit bitString into a hidden_bit Bigint bitString.' | |
; | |
CREATE or replace FUNCTION treetest.str_addpoints(text) RETURNS text AS $f$ | |
SELECT string_agg(x,'.') FROM regexp_split_to_table($1,'') t(x) | |
$f$ language SQL IMMUTABLE; | |
CREATE or replace FUNCTION treetest.MaxBits(p_x varbit, p_bitlen int default 22) RETURNS varbit AS $f$ | |
SELECT substring(p_x || '11111111111111111111111', 1, p_bitlen) | |
$f$ language SQL IMMUTABLE; | |
---------------------- | |
-- extra functions | |
CREATE FUNCTION treetest.cutlast1s(p_x varbit) RETURNS varbit AS $f$ | |
SELECT CASE -- calculates hsucc() | |
WHEN len>0 AND get_bit(p_x,len-1)::boolean THEN treetest.cutlast1s( substring(p_x,1,len-1) ) | |
ELSE CASE WHEN len=0 THEN ''::varbit ELSE substring(p_x,1,len-1) || '1'::bit(1) END | |
END | |
FROM (SELECT length(p_x)) t(len) | |
$f$ LANGUAGE SQL IMMUTABLE; | |
COMMENT ON FUNCTION treetest.cutlast1s(varbit) | |
IS 'cut last 1s and replace remaining zero by 1. For use in hsucc().' | |
; | |
CREATE FUNCTION treetest.hsucc(p_x varbit, p_bits int DEFAULT 64) RETURNS varbit AS $f$ | |
SELECT CASE | |
WHEN length(p_x)<p_bits THEN p_x || '0'::bit(1) | |
ELSE treetest.cutlast1s(p_x) | |
END | |
$f$ LANGUAGE SQL IMMUTABLE; | |
COMMENT ON FUNCTION treetest.hsucc(varbit,int) | |
IS 'Hierarchical successor. See also lexicographic order and https://en.wikipedia.org/wiki/Successor_function' | |
; | |
CREATE OPERATOR + ( | |
leftarg = VARBIT, | |
rightarg = INT, | |
procedure = treetest.hsucc | |
); -- e.g. SELECT ((b'01'+4)+4)+4; | |
----- | |
CREATE FUNCTION treetest.IsDescendantOf(VARBIT, VARBIT) | |
returns boolean language sql immutable as $f$ | |
SELECT $1 BETWEEN $2 AND treetest.MaxBits($2) | |
$f$; | |
CREATE OPERATOR <@ ( | |
leftarg = VARBIT, | |
rightarg = VARBIT, | |
procedure = treetest.IsDescendantOf | |
-- commutator = @> | |
); | |
---------------------- | |
-- -- GENERATE SERIES: | |
CREATE FUNCTION treetest.generatep_hb_series(bit_len int) RETURNS setof bigint as $f$ | |
SELECT i::bigint | maxval as x | |
FROM (SELECT (2^bit_len)::bigint) t(maxval), | |
LATERAL generate_series(0,maxval-1) s(i) | |
$f$ LANGUAGE SQL IMMUTABLE PARALLEL SAFE; | |
COMMENT ON FUNCTION treetest.generatep_hb_series | |
IS 'Obtain a sequency of hidden_bit codes of fixed length, from zero to 2^bit_len-1.' | |
; | |
CREATE FUNCTION treetest.generate_hb_series( | |
bit_len int, | |
p_non_recursive boolean default false | |
) RETURNS setof bigint as $f$ | |
DECLARE | |
s text; | |
BEGIN | |
IF p_non_recursive THEN | |
s := 'SELECT * FROM treetest.generatep_hb_series('|| bit_len::text ||')'; | |
ELSE | |
s := 'SELECT * FROM treetest.generatep_hb_series(1)'; | |
FOR i IN 2..bit_len LOOP | |
s := s || ' UNION ALL SELECT * FROM treetest.generatep_hb_series('|| i::text ||')'; | |
END LOOP; | |
END IF; | |
RETURN QUERY EXECUTE s; | |
END; | |
$f$ LANGUAGE PLpgSQL IMMUTABLE; | |
COMMENT ON FUNCTION treetest.generate_hb_series(int,boolean) | |
IS 'Obtain a sequency of all hidden_bit codes, for generate_vbit_series().' | |
; | |
CREATE or replace FUNCTION treetest.generate_vbit_series( | |
bit_len int, | |
p_non_recursive boolean default false | |
) RETURNS setof varbit as $f$ | |
SELECT treetest.hiddenbig_to_vbit(hb) | |
FROM treetest.generate_hb_series( | |
CASE WHEN bit_len>62 THEN 62 WHEN bit_len<=0 THEN 1 ELSE bit_len END, | |
p_non_recursive | |
) t(hb) | |
WHERE bit_len>=1 | |
UNION ALL | |
SELECT x FROM ( VALUES (''::varbit) ) s(x) WHERE bit_len=0 | |
ORDER BY 1 | |
$f$ LANGUAGE SQL IMMUTABLE PARALLEL SAFE; | |
COMMENT ON FUNCTION treetest.generate_vbit_series(int,boolean) | |
IS 'Obtain a sequency of all bitStrings of bit_len, with 63>bit_len>0.' | |
; | |
-- SELECT * FROM treetest.generate_vbit_series(14); | |
----------------- | |
drop VIEW treetest.table_disk_usage; | |
CREATE VIEW treetest.table_disk_usage AS | |
SELECT | |
relname, | |
pg_size_pretty(table_size) AS size, | |
table_size as size_bytes | |
FROM ( | |
SELECT | |
pg_catalog.pg_namespace.nspname AS schema_name, | |
relname, | |
pg_relation_size(pg_catalog.pg_class.oid) AS table_size | |
FROM pg_catalog.pg_class | |
JOIN pg_catalog.pg_namespace ON relnamespace = pg_catalog.pg_namespace.oid | |
) t | |
WHERE schema_name='treetest' | |
ORDER BY table_size DESC; | |
-- SELECT * FROM treetest.table_disk_usage; | |
------------------------------ | |
------------------------------ | |
------------------------------ | |
-- DEMO | |
CREATE TABLE treetest.fruit ( | |
id SERIAL NOT NULL PRIMARY KEY, | |
binpath varbit NOT NULL, | |
charpath ltree NOT NULL, | |
info text | |
); | |
INSERT INTO treetest.fruit (binpath, charpath, info) VALUES | |
-- 2 taxons and 1 subtaxon. Reference specimens as tree-roots: | |
( b'00', '0.0', 'apple "0"'), | |
( b'01', '0.1', 'green apple ""'), | |
( b'10', '1.0', 'orange "0"') | |
; | |
-- Succecsors, as set elements, by ID where the prefix is the taxon: | |
INSERT INTO treetest.fruit (binpath, charpath, info) VALUES | |
( b'00'+4, '0.0.0', 'apple "00"'), | |
( (b'00'+4)+4, '0.0.0.0', 'apple "000"'), | |
( b'01'+3, '0.1.0', 'green apple "0"'), | |
( (b'01'+3)+3, '0.1.1', 'green apple "1"'), | |
( b'10'+3, '1.0.0', 'orange "00"'), | |
( (b'10'+3)+3, '1.0.1', 'orange "01"') | |
; | |
-------------- | |
-- all fruits: | |
SELECT * FROM treetest.fruit | |
ORDER BY binpath; | |
--find all oranges: | |
SELECT * FROM treetest.fruit | |
WHERE charpath <@ '1'::ltree; | |
SELECT * FROM treetest.fruit | |
WHERE binpath <@ b'1'; | |
--find all green apples: | |
SELECT * FROM treetest.fruit | |
WHERE charpath <@ '0.1'::ltree; | |
SELECT * FROM treetest.fruit | |
WHERE binpath <@ b'01'; | |
--find all apples: | |
SELECT * FROM treetest.fruit | |
WHERE charpath <@ '0'::ltree; | |
SELECT * FROM treetest.fruit | |
WHERE binpath <@ b'0'; | |
------------------------------ | |
------------------------------ | |
------------------------------ | |
-- BENCHMARK! | |
CREATE TABLE treetest.fruit_vbit ( | |
id bigint NOT NULL PRIMARY KEY, | |
binpath varbit NOT NULL | |
); | |
INSERT INTO treetest.fruit_vbit | |
SELECT treetest.vBit_to_hiddenBig(x), x | |
FROM treetest.generate_vbit_series(20) t(x) | |
; -- 32766 rows | |
CREATE TABLE treetest.fruit_ltree ( | |
id bigint NOT NULL PRIMARY KEY, | |
charpath ltree NOT NULL | |
); | |
INSERT INTO treetest.fruit_ltree | |
SELECT treetest.vBit_to_hiddenBig(x), treetest.str_addpoints(x::text)::ltree | |
FROM treetest.generate_vbit_series(20) t(x) | |
; -- 32766 rows | |
----------------------- | |
--find all apples: | |
-- \timing | |
SELECT COUNT(*) n_t1_r1 FROM treetest.fruit_ltree | |
WHERE charpath <@ '1.0'::ltree; | |
SELECT COUNT(*) n_t1_r2 FROM treetest.fruit_vbit | |
WHERE binpath BETWEEN b'10' AND treetest.MaxBits(b'10'); | |
------------- | |
SELECT COUNT(*) n_t2_r1 FROM treetest.fruit_ltree | |
WHERE charpath <@ '1.0.1.1.0.1.0.1.1.0.0.1'::ltree; | |
SELECT COUNT(*) n_t2_r2 FROM treetest.fruit_vbit | |
WHERE binpath BETWEEN b'101101011001' AND treetest.MaxBits(b'101101011001'); | |
-- 14 bits, test1: | |
-- n_t1_r1=8191; Time: ~14 ms | |
-- n_t1_r2=8191; Time: ~9 ms | |
-- 14 bits, test2: | |
-- n_t2_r1=7; Time: ~12 ms | |
-- n_t2_r2=7; Time: ~8 ms | |
---- | |
-- 16 bits, test1: | |
-- n_t1_r1=32767; Time: ~34 ms | |
-- n_t1_r2=32767; Time: ~27 ms | |
-- 16 bits, test2: | |
-- n_t2_r1=31; Time: ~21 ms | |
-- n_t2_r2=31; Time: ~10 ms | |
---- | |
-- 22 bits, test1: rever | |
-- n_t1_r1=2097151; Time: ~1545 ms | |
-- n_t1_r2=2097151; Time: ~278 ms | |
-- 22 bits, test2: rever | |
-- n_t2_r1=2047; Time: ~8.5 ms | |
-- n_t2_r2=2047; Time: ~0.5 ms | |
--- INDEXING: | |
CREATE INDEX fruit_ltree_idx1 ON treetest.fruit_ltree USING GIST (charpath); | |
CREATE UNIQUE INDEX fruit_vbit_idx1 ON treetest.fruit_vbit (binpath); | |
-- 22 bits, test1: | |
-- n_t1_r1=2097151; Time: ~1080 ms | |
-- n_t1_r2=2097151; Time: ~260 ms | |
-- 22 bits, test2: | |
-- n_t2_r1=2047; Time: ~0.78 ms | |
-- n_t2_r2=2047; Time: ~0.53 ms | |
--------- DISC USAGE: | |
-- relname | size | size_bytes | |
-- ----------------+------------+------------ | |
-- fruit_ltree_idx1 | 3380 MB | 3544358912 | |
-- fruit_vbit_idx1 | 180 MB | 188440576 | |
-- fruit_ltree | 1720 MB | 1803550720 | |
-- fruit_vbit | 354 MB | 371458048 | |
-- fruit_vbit_pkey | 252 MB | 264232960 | |
-- fruit_ltree_pkey | 252 MB | 264232960 |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment