Created
November 21, 2019 02:55
-
-
Save macdice/b7eb7015846b8384972c2c7cfd6d2c22 to your computer and use it in GitHub Desktop.
Experimenting with batch and bucket functions
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
postgres=# create or replace function hash_combine(seed bigint, hash bigint) | |
returns bigint as | |
$$ | |
select (seed # (hash + 2654435769 + (seed << 6) + (seed >> 2))) | |
& 4294967295; | |
$$ language sql; | |
CREATE FUNCTION | |
postgres=# create or replace function batch_by_rehash(nbatch int, nbucket int, hash bigint) | |
returns int as | |
$$ | |
select (hash_combine(0, hash) % nbatch)::int; | |
$$ language sql; | |
CREATE FUNCTION | |
postgres=# create or replace function make_unsigned(i int) | |
returns bigint as | |
$$ | |
select case when i < 0 then 4294967296::bigint + i else i end; | |
$$ language sql; | |
CREATE FUNCTION | |
postgres=# select count(*) as non_empty_buckets, | |
min(collisions) as min_chain, | |
max(collisions) as max_chain, | |
avg(collisions) as avg_chain, | |
stddev(collisions) as stddev_chain | |
from ( | |
with hashes as (select make_unsigned(hashint8(generate_series(1::bigint, 7000000000::bigint))) hash) | |
select hash % 1048576, | |
count(*) as collisions | |
from hashes | |
where batch_by_rehash(4096, 1048576, hash) = 0 | |
group by 1 | |
) ss; | |
non_empty_buckets | min_chain | max_chain | avg_chain | stddev_chain | |
-------------------+-----------+-----------+-----------------------+------------------ | |
256 | 6212 | 7001 | 6674.5625000000000000 | 110.581298280549 | |
(1 row) | |
postgres=# create or replace function batch_by_high_bits(hash bigint) | |
returns int as | |
$$ | |
select (hash >> 19)::int; | |
$$ language sql; | |
CREATE FUNCTION | |
postgres=# select count(*) as non_empty_buckets, | |
min(collisions) as min_chain, | |
max(collisions) as max_chain, | |
avg(collisions) as avg_chain, | |
stddev(collisions) as stddev_chain | |
from ( | |
with hashes as (select make_unsigned(hashint8(generate_series(1::bigint, 7000000000::bigint))) hash) | |
select hash % 4194304, | |
count(*) as collisions | |
from hashes | |
where batch_by_high_bits(hash) = 0 | |
group by 1 | |
) ss; | |
non_empty_buckets | min_chain | max_chain | avg_chain | stddev_chain | |
-------------------+-----------+-----------+--------------------+-------------------- | |
329685 | 1 | 17 | 2.5945645085460364 | 1.4777061278208620 | |
(1 row) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment