Skip to content

Instantly share code, notes, and snippets.

@ppKrauss
Last active March 11, 2025 20:30
Show Gist options
  • Save ppKrauss/226fd3fb0dca0a6c6d4f875cb93aa2db to your computer and use it in GitHub Desktop.
Save ppKrauss/226fd3fb0dca0a6c6d4f875cb93aa2db to your computer and use it in GitHub Desktop.
BitString vs ltree benchmark
-- for https://dba.stackexchange.com/q/345559/90651
------------------
CREATE EXTENSION IF NOT EXISTS ltree;
DROP SCHEMA IF EXISTS treetest CASCADE;
CREATE SCHEMA treetest;
CREATE FUNCTION treetest.hiddenBig_to_vBit(x bigint) RETURNS varbit AS $f$
SELECT substring( x_bin from 1+position(B'1' in x_bin) )
FROM (select x::bit(64)) t(x_bin) -- WHERE $1>7 AND $1<4611686018427387904
$f$ LANGUAGE SQL IMMUTABLE PARALLEL SAFE;
COMMENT ON FUNCTION treetest.hiddenBig_to_vBit(bigint)
IS 'Decodes hidden_bit Bigint bitStrings (to proper Varbit bitStrings).'
;
CREATE FUNCTION treetest.vBit_to_hiddenBig(x varbit) RETURNS bigint AS $f$
SELECT overlay( b'0'::bit(64) PLACING (b'1' || x) FROM 64-length(x) )::bigint
$f$ LANGUAGE SQL IMMUTABLE PARALLEL SAFE;
COMMENT ON FUNCTION treetest.vbit_to_hiddenbig(varbit)
IS 'Encodes proper VarBit bitString into a hidden_bit Bigint bitString.'
;
CREATE or replace FUNCTION treetest.str_addpoints(text) RETURNS text AS $f$
SELECT string_agg(x,'.') FROM regexp_split_to_table($1,'') t(x)
$f$ language SQL IMMUTABLE;
CREATE or replace FUNCTION treetest.MaxBits(p_x varbit, p_bitlen int default 22) RETURNS varbit AS $f$
SELECT substring(p_x || '11111111111111111111111', 1, p_bitlen)
$f$ language SQL IMMUTABLE;
----------------------
-- extra functions
CREATE FUNCTION treetest.cutlast1s(p_x varbit) RETURNS varbit AS $f$
SELECT CASE -- calculates hsucc()
WHEN len>0 AND get_bit(p_x,len-1)::boolean THEN treetest.cutlast1s( substring(p_x,1,len-1) )
ELSE CASE WHEN len=0 THEN ''::varbit ELSE substring(p_x,1,len-1) || '1'::bit(1) END
END
FROM (SELECT length(p_x)) t(len)
$f$ LANGUAGE SQL IMMUTABLE;
COMMENT ON FUNCTION treetest.cutlast1s(varbit)
IS 'cut last 1s and replace remaining zero by 1. For use in hsucc().'
;
CREATE FUNCTION treetest.hsucc(p_x varbit, p_bits int DEFAULT 64) RETURNS varbit AS $f$
SELECT CASE
WHEN length(p_x)<p_bits THEN p_x || '0'::bit(1)
ELSE treetest.cutlast1s(p_x)
END
$f$ LANGUAGE SQL IMMUTABLE;
COMMENT ON FUNCTION treetest.hsucc(varbit,int)
IS 'Hierarchical successor. See also lexicographic order and https://en.wikipedia.org/wiki/Successor_function'
;
CREATE OPERATOR + (
leftarg = VARBIT,
rightarg = INT,
procedure = treetest.hsucc
); -- e.g. SELECT ((b'01'+4)+4)+4;
-----
CREATE FUNCTION treetest.IsDescendantOf(VARBIT, VARBIT)
returns boolean language sql immutable as $f$
SELECT $1 BETWEEN $2 AND treetest.MaxBits($2)
$f$;
CREATE OPERATOR <@ (
leftarg = VARBIT,
rightarg = VARBIT,
procedure = treetest.IsDescendantOf
-- commutator = @>
);
----------------------
-- -- GENERATE SERIES:
CREATE FUNCTION treetest.generatep_hb_series(bit_len int) RETURNS setof bigint as $f$
SELECT i::bigint | maxval as x
FROM (SELECT (2^bit_len)::bigint) t(maxval),
LATERAL generate_series(0,maxval-1) s(i)
$f$ LANGUAGE SQL IMMUTABLE PARALLEL SAFE;
COMMENT ON FUNCTION treetest.generatep_hb_series
IS 'Obtain a sequency of hidden_bit codes of fixed length, from zero to 2^bit_len-1.'
;
CREATE FUNCTION treetest.generate_hb_series(
bit_len int,
p_non_recursive boolean default false
) RETURNS setof bigint as $f$
DECLARE
s text;
BEGIN
IF p_non_recursive THEN
s := 'SELECT * FROM treetest.generatep_hb_series('|| bit_len::text ||')';
ELSE
s := 'SELECT * FROM treetest.generatep_hb_series(1)';
FOR i IN 2..bit_len LOOP
s := s || ' UNION ALL SELECT * FROM treetest.generatep_hb_series('|| i::text ||')';
END LOOP;
END IF;
RETURN QUERY EXECUTE s;
END;
$f$ LANGUAGE PLpgSQL IMMUTABLE;
COMMENT ON FUNCTION treetest.generate_hb_series(int,boolean)
IS 'Obtain a sequency of all hidden_bit codes, for generate_vbit_series().'
;
CREATE or replace FUNCTION treetest.generate_vbit_series(
bit_len int,
p_non_recursive boolean default false
) RETURNS setof varbit as $f$
SELECT treetest.hiddenbig_to_vbit(hb)
FROM treetest.generate_hb_series(
CASE WHEN bit_len>62 THEN 62 WHEN bit_len<=0 THEN 1 ELSE bit_len END,
p_non_recursive
) t(hb)
WHERE bit_len>=1
UNION ALL
SELECT x FROM ( VALUES (''::varbit) ) s(x) WHERE bit_len=0
ORDER BY 1
$f$ LANGUAGE SQL IMMUTABLE PARALLEL SAFE;
COMMENT ON FUNCTION treetest.generate_vbit_series(int,boolean)
IS 'Obtain a sequency of all bitStrings of bit_len, with 63>bit_len>0.'
;
-- SELECT * FROM treetest.generate_vbit_series(14);
-----------------
drop VIEW treetest.table_disk_usage;
CREATE VIEW treetest.table_disk_usage AS
SELECT
relname,
pg_size_pretty(table_size) AS size,
table_size as size_bytes
FROM (
SELECT
pg_catalog.pg_namespace.nspname AS schema_name,
relname,
pg_relation_size(pg_catalog.pg_class.oid) AS table_size
FROM pg_catalog.pg_class
JOIN pg_catalog.pg_namespace ON relnamespace = pg_catalog.pg_namespace.oid
) t
WHERE schema_name='treetest'
ORDER BY table_size DESC;
-- SELECT * FROM treetest.table_disk_usage;
------------------------------
------------------------------
------------------------------
-- DEMO
CREATE TABLE treetest.fruit (
id SERIAL NOT NULL PRIMARY KEY,
binpath varbit NOT NULL,
charpath ltree NOT NULL,
info text
);
INSERT INTO treetest.fruit (binpath, charpath, info) VALUES
-- 2 taxons and 1 subtaxon. Reference specimens as tree-roots:
( b'00', '0.0', 'apple "0"'),
( b'01', '0.1', 'green apple ""'),
( b'10', '1.0', 'orange "0"')
;
-- Succecsors, as set elements, by ID where the prefix is the taxon:
INSERT INTO treetest.fruit (binpath, charpath, info) VALUES
( b'00'+4, '0.0.0', 'apple "00"'),
( (b'00'+4)+4, '0.0.0.0', 'apple "000"'),
( b'01'+3, '0.1.0', 'green apple "0"'),
( (b'01'+3)+3, '0.1.1', 'green apple "1"'),
( b'10'+3, '1.0.0', 'orange "00"'),
( (b'10'+3)+3, '1.0.1', 'orange "01"')
;
--------------
-- all fruits:
SELECT * FROM treetest.fruit
ORDER BY binpath;
--find all oranges:
SELECT * FROM treetest.fruit
WHERE charpath <@ '1'::ltree;
SELECT * FROM treetest.fruit
WHERE binpath <@ b'1';
--find all green apples:
SELECT * FROM treetest.fruit
WHERE charpath <@ '0.1'::ltree;
SELECT * FROM treetest.fruit
WHERE binpath <@ b'01';
--find all apples:
SELECT * FROM treetest.fruit
WHERE charpath <@ '0'::ltree;
SELECT * FROM treetest.fruit
WHERE binpath <@ b'0';
------------------------------
------------------------------
------------------------------
-- BENCHMARK!
CREATE TABLE treetest.fruit_vbit (
id bigint NOT NULL PRIMARY KEY,
binpath varbit NOT NULL
);
INSERT INTO treetest.fruit_vbit
SELECT treetest.vBit_to_hiddenBig(x), x
FROM treetest.generate_vbit_series(20) t(x)
; -- 32766 rows
CREATE TABLE treetest.fruit_ltree (
id bigint NOT NULL PRIMARY KEY,
charpath ltree NOT NULL
);
INSERT INTO treetest.fruit_ltree
SELECT treetest.vBit_to_hiddenBig(x), treetest.str_addpoints(x::text)::ltree
FROM treetest.generate_vbit_series(20) t(x)
; -- 32766 rows
-----------------------
--find all apples:
-- \timing
SELECT COUNT(*) n_t1_r1 FROM treetest.fruit_ltree
WHERE charpath <@ '1.0'::ltree;
SELECT COUNT(*) n_t1_r2 FROM treetest.fruit_vbit
WHERE binpath BETWEEN b'10' AND treetest.MaxBits(b'10');
-------------
SELECT COUNT(*) n_t2_r1 FROM treetest.fruit_ltree
WHERE charpath <@ '1.0.1.1.0.1.0.1.1.0.0.1'::ltree;
SELECT COUNT(*) n_t2_r2 FROM treetest.fruit_vbit
WHERE binpath BETWEEN b'101101011001' AND treetest.MaxBits(b'101101011001');
-- 14 bits, test1:
-- n_t1_r1=8191; Time: ~14 ms
-- n_t1_r2=8191; Time: ~9 ms
-- 14 bits, test2:
-- n_t2_r1=7; Time: ~12 ms
-- n_t2_r2=7; Time: ~8 ms
----
-- 16 bits, test1:
-- n_t1_r1=32767; Time: ~34 ms
-- n_t1_r2=32767; Time: ~27 ms
-- 16 bits, test2:
-- n_t2_r1=31; Time: ~21 ms
-- n_t2_r2=31; Time: ~10 ms
----
-- 22 bits, test1: rever
-- n_t1_r1=2097151; Time: ~1545 ms
-- n_t1_r2=2097151; Time: ~278 ms
-- 22 bits, test2: rever
-- n_t2_r1=2047; Time: ~8.5 ms
-- n_t2_r2=2047; Time: ~0.5 ms
--- INDEXING:
CREATE INDEX fruit_ltree_idx1 ON treetest.fruit_ltree USING GIST (charpath);
CREATE UNIQUE INDEX fruit_vbit_idx1 ON treetest.fruit_vbit (binpath);
-- 22 bits, test1:
-- n_t1_r1=2097151; Time: ~1080 ms
-- n_t1_r2=2097151; Time: ~260 ms
-- 22 bits, test2:
-- n_t2_r1=2047; Time: ~0.78 ms
-- n_t2_r2=2047; Time: ~0.53 ms
--------- DISC USAGE:
-- relname | size | size_bytes
-- ----------------+------------+------------
-- fruit_ltree_idx1 | 3380 MB | 3544358912
-- fruit_vbit_idx1 | 180 MB | 188440576
-- fruit_ltree | 1720 MB | 1803550720
-- fruit_vbit | 354 MB | 371458048
-- fruit_vbit_pkey | 252 MB | 264232960
-- fruit_ltree_pkey | 252 MB | 264232960
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment