Created
June 2, 2021 08:55
-
-
Save joelonsql/b0cb40aac4b3f5f5fffa099569269581 to your computer and use it in GitHub Desktop.
HyperLogLog demo
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
-- | |
-- Here comes some general advise on how to use HyperLogLog | |
-- for someone who wants to implement a YouTube-like service | |
-- using the awesome https://github.com/citusdata/postgresql-hll PostgreSQL extension | |
-- | |
CREATE TABLE counter ( | |
id text NOT NULL, | |
sketch hll NOT NULL DEFAULT hll_empty(), | |
PRIMARY KEY (id) | |
); | |
-- register new thing to count | |
INSERT INTO counter (id) VALUES ('my_cool_cat_video'); | |
-- | |
-- get the HLL-sketch datastructure | |
-- and also estimate the cardinality (=number of unique views) for it | |
-- | |
-- read-only operation, thus horizontally scalable | |
-- | |
SELECT | |
sketch, | |
hll_cardinality(sketch) | |
FROM counter WHERE id = 'my_cool_cat_video'; | |
-- | |
-- each time the video is watched, | |
-- the client should mutate the sketch-data structure | |
-- | |
SELECT hll_add([current sketch value], hll_hash_bigint([the userid that watched the video])); | |
-- | |
-- if the new value returned by hll_add() is different than the [current sketch value], | |
-- which will happen more and more infrequent over time, thus it scales well, | |
-- this will cause a write operation to the database: | |
-- | |
UPDATE counter SET sketch = [new sketch value] WHERE id = 'my_cool_cat_video'; | |
-- | |
-- the hll_add(...,hll_hash_bigint()) is read-only can be computed by any PostgreSQL instance, | |
-- running anywhere, and doesn't need to run on the same instance that keeps the actual | |
-- value to update, thus is scales horizontally. | |
-- | |
-- | |
-- Example: | |
-- | |
INSERT INTO counter (id) VALUES ('my_cool_cat_video'); | |
SELECT | |
sketch, | |
hll_cardinality(sketch) | |
FROM counter WHERE id = 'my_cool_cat_video'; | |
/* | |
sketch | hll_cardinality | |
----------+----------------- | |
\x118b7f | 0 | |
(1 row) | |
*/ | |
SELECT hll_add('\x118b7f'::hll, hll_hash_bigint(12345)); | |
/* | |
hll_add | |
-------------------------- | |
\x128b7f3cd2dbd3ee832997 | |
(1 row) | |
*/ | |
SELECT hll_cardinality('\x128b7f3cd2dbd3ee832997'::hll); | |
/* | |
hll_cardinality | |
----------------- | |
1 | |
(1 row) | |
*/ | |
-- | |
-- same user watches same video again, value is unchanged: | |
-- | |
SELECT hll_add('\x128b7f3cd2dbd3ee832997'::hll, hll_hash_bigint(12345)); | |
/* | |
hll_add | |
-------------------------- | |
\x128b7f3cd2dbd3ee832997 | |
(1 row) | |
*/ | |
-- | |
-- some other user watches the same video, value this time changes: | |
-- | |
SELECT hll_add('\x128b7f3cd2dbd3ee832997'::hll, hll_hash_bigint(54321)); | |
/* | |
hll_add | |
------------------------------------------ | |
\x128b7ffdda8f47ee4c2b8f3cd2dbd3ee832997 | |
(1 row) | |
*/ | |
SELECT hll_cardinality('\x128b7ffdda8f47ee4c2b8f3cd2dbd3ee832997'::hll); | |
/* | |
hll_cardinality | |
----------------- | |
2 | |
(1 row) | |
*/ | |
-- | |
-- after thousands of users have watched the video, | |
-- the counter will not be 100% accurate and will just be an estimation, | |
-- but a good estimation with predictable error: | |
-- "The HyperLogLog algorithm is able to estimate cardinalities of > 109 with a typical accuracy (standard error) of 2%, using 1.5 kB of memory." | |
-- https://en.wikipedia.org/wiki/HyperLogLog | |
-- | |
-- | |
-- Simulate 10000 users watching the video: | |
-- | |
DO $$ | |
DECLARE | |
_sketch hll; | |
BEGIN | |
_sketch := hll_empty(); | |
FOR i IN 1..10000 LOOP | |
_sketch := hll_add(_sketch, hll_hash_bigint(i)); | |
END LOOP; | |
RAISE NOTICE 'count estimation: %, sketch value: %', hll_cardinality(_sketch), _sketch; | |
END | |
$$; | |
/* | |
NOTICE: count estimation: 9923.837380245724, sketch value: \x148b7f10c46114621884540c43210681106210c82220a31884539c433088618c6618c4310462214a2210621048310063288822144228c6409044188c120421310a421022194e320cc3110a2088c520c4118d04208a410c23110c31904420862114661846348822188853944320842188832902210c661144220c2510c8329cc119426184683086420825398e208ce311081008e4110e3094a4188832112721882218221108220c652904321083304a51048230c4910cc2120c339ca20104219421194a4190a329127188a413043198a820c8318c25110a3210431908121042194c2091021244208c861906410c812086318c4638086288e221465388851142219c6320c6200ca42008319ca218866418a20886328445210821904808c641086218c8218d023104318082110631206628c8218c6310465408c72104319c63188881904331045190c46106218cc7304812108420c6338c6510c4220c85094251988339043110883144418c6619467210a218442108642088610822208611188318ca331823194c61906708c82104671886220863095842986410c6311442208a641442208432944311863214c209423190a81148228c6418ca73946420c83304e8594c4210642804338883084854104431064290603884318824388243144320ca320844184422142440c4618c6120c23211053086228443208822104610c8518c84288633186229501210441148228842214631946419ce21248311062214c21246231445284e41988508962110c319c432888418c65204453108518ca210c861182721085114a320c8318c8810864110621106228881214e5304a3388c720c81194a211863191471148419cc419cc1190431188210c4118c62108831842732082090412946211c2220c23388c409028188a329468198e011c81290a21188320ce320c42210620142818469111041906438c83314a2214e211c222844520862490a32088120c4510c82110861906229c821884221902295041944310c83194631882811883110c2188a619483108632104210c8121862288441948529081188631886718886198232886220844188821884510c4321065508a510063294641148318843190831b88800c6308c6328c673886311066210631908521c8329465109011086328c421882228ce220c44206012106310863390c62048538c6311084208632842220c4419063190c1190470946510c4418042110840886730c872186228863518433848218c42290e120c62084e51908430cc5308422084268c45210831108228883208e43104320d0310c42120831186210c84198632a4431082130ca6194c328466218c320c0710c83208a12102220c4418c231946221464110a3194a61046728c83110a519464108633886310c6221cc21886321464204631884520d01194620a0a6304c411047204a22908720c22190e4294a2284c4298c538ca730cc51804119885194601186318c630946420c8239862088222186248c413906608c4810c6511483128620a4c619823214641286441c8428461288a32088131c4518c6628c6138cc30882318c432884628c8328c43208c308cc2390431086319ca10a484190a418c242846410c432108618462188a21944528c671206420866288642990420c4318c43388651844110845108843082628864084630906611043190a4090a30844218c263144418486198a3194a228c4308cc2090c310d84288620a0a21048411464108c4108c308c421146511084108431046511044118412146410824 | |
*/ | |
-- | |
-- if we now simulate letting userid 10001 watch the video, | |
-- it is likely the value will not change, since it only changes | |
-- for some values, which statistically over time makes a good estimation. | |
-- | |
-- let's see if the HLL value is unchanged, when we simulate userid 10001 watching the video: | |
-- | |
SELECT | |
'\x148b7f10c46114621884540c43210681106210c82220a31884539c433088618c6618c4310462214a2210621048310063288822144228c6409044188c120421310a421022194e320cc3110a2088c520c4118d04208a410c23110c31904420862114661846348822188853944320842188832902210c661144220c2510c8329cc119426184683086420825398e208ce311081008e4110e3094a4188832112721882218221108220c652904321083304a51048230c4910cc2120c339ca20104219421194a4190a329127188a413043198a820c8318c25110a3210431908121042194c2091021244208c861906410c812086318c4638086288e221465388851142219c6320c6200ca42008319ca218866418a20886328445210821904808c641086218c8218d023104318082110631206628c8218c6310465408c72104319c63188881904331045190c46106218cc7304812108420c6338c6510c4220c85094251988339043110883144418c6619467210a218442108642088610822208611188318ca331823194c61906708c82104671886220863095842986410c6311442208a641442208432944311863214c209423190a81148228c6418ca73946420c83304e8594c4210642804338883084854104431064290603884318824388243144320ca320844184422142440c4618c6120c23211053086228443208822104610c8518c84288633186229501210441148228842214631946419ce21248311062214c21246231445284e41988508962110c319c432888418c65204453108518ca210c861182721085114a320c8318c8810864110621106228881214e5304a3388c720c81194a211863191471148419cc419cc1190431188210c4118c62108831842732082090412946211c2220c23388c409028188a329468198e011c81290a21188320ce320c42210620142818469111041906438c83314a2214e211c222844520862490a32088120c4510c82110861906229c821884221902295041944310c83194631882811883110c2188a619483108632104210c8121862288441948529081188631886718886198232886220844188821884510c4321065508a510063294641148318843190831b88800c6308c6328c673886311066210631908521c8329465109011086328c421882228ce220c44206012106310863390c62048538c6311084208632842220c4419063190c1190470946510c4418042110840886730c872186228863518433848218c42290e120c62084e51908430cc5308422084268c45210831108228883208e43104320d0310c42120831186210c84198632a4431082130ca6194c328466218c320c0710c83208a12102220c4418c231946221464110a3194a61046728c83110a519464108633886310c6221cc21886321464204631884520d01194620a0a6304c411047204a22908720c22190e4294a2284c4298c538ca730cc51804119885194601186318c630946420c8239862088222186248c413906608c4810c6511483128620a4c619823214641286441c8428461288a32088131c4518c6628c6138cc30882318c432884628c8328c43208c308cc2390431086319ca10a484190a418c242846410c432108618462188a21944528c671206420866288642990420c4318c43388651844110845108843082628864084630906611043190a4090a30844218c263144418486198a3194a228c4308cc2090c310d84288620a0a21048411464108c4108c308c421146511084108431046511044118412146410824'::hll | |
= | |
hll_add | |
( | |
'\x148b7f10c46114621884540c43210681106210c82220a31884539c433088618c6618c4310462214a2210621048310063288822144228c6409044188c120421310a421022194e320cc3110a2088c520c4118d04208a410c23110c31904420862114661846348822188853944320842188832902210c661144220c2510c8329cc119426184683086420825398e208ce311081008e4110e3094a4188832112721882218221108220c652904321083304a51048230c4910cc2120c339ca20104219421194a4190a329127188a413043198a820c8318c25110a3210431908121042194c2091021244208c861906410c812086318c4638086288e221465388851142219c6320c6200ca42008319ca218866418a20886328445210821904808c641086218c8218d023104318082110631206628c8218c6310465408c72104319c63188881904331045190c46106218cc7304812108420c6338c6510c4220c85094251988339043110883144418c6619467210a218442108642088610822208611188318ca331823194c61906708c82104671886220863095842986410c6311442208a641442208432944311863214c209423190a81148228c6418ca73946420c83304e8594c4210642804338883084854104431064290603884318824388243144320ca320844184422142440c4618c6120c23211053086228443208822104610c8518c84288633186229501210441148228842214631946419ce21248311062214c21246231445284e41988508962110c319c432888418c65204453108518ca210c861182721085114a320c8318c8810864110621106228881214e5304a3388c720c81194a211863191471148419cc419cc1190431188210c4118c62108831842732082090412946211c2220c23388c409028188a329468198e011c81290a21188320ce320c42210620142818469111041906438c83314a2214e211c222844520862490a32088120c4510c82110861906229c821884221902295041944310c83194631882811883110c2188a619483108632104210c8121862288441948529081188631886718886198232886220844188821884510c4321065508a510063294641148318843190831b88800c6308c6328c673886311066210631908521c8329465109011086328c421882228ce220c44206012106310863390c62048538c6311084208632842220c4419063190c1190470946510c4418042110840886730c872186228863518433848218c42290e120c62084e51908430cc5308422084268c45210831108228883208e43104320d0310c42120831186210c84198632a4431082130ca6194c328466218c320c0710c83208a12102220c4418c231946221464110a3194a61046728c83110a519464108633886310c6221cc21886321464204631884520d01194620a0a6304c411047204a22908720c22190e4294a2284c4298c538ca730cc51804119885194601186318c630946420c8239862088222186248c413906608c4810c6511483128620a4c619823214641286441c8428461288a32088131c4518c6628c6138cc30882318c432884628c8328c43208c308cc2390431086319ca10a484190a418c242846410c432108618462188a21944528c671206420866288642990420c4318c43388651844110845108843082628864084630906611043190a4090a30844218c263144418486198a3194a228c4308cc2090c310d84288620a0a21048411464108c4108c308c421146511084108431046511044118412146410824'::hll, | |
hll_hash_bigint(10001) | |
) AS test_if_unchanged | |
; | |
/* | |
test_if_unchanged | |
------------------- | |
t | |
(1 row) | |
*/ | |
-- | |
-- Yes! We were lucky, the hash of 10001 didn't cause any update of the value, | |
-- we thus didn't need to update the value in the database, and saved a write-operation this time. | |
-- | |
-- | |
-- The HyperLogLog extension allows controlling how often such updates occur, | |
-- and how good estimations is desired, using parameters to the hll_empty() function, | |
-- see the documentation for details. By default, it keeps exact count for small values, | |
-- which is probably a good, since it's more notable if there are differences for small values, | |
-- since a user might know some friend watched their video, but the counter is not updated. | |
-- But for bigger values, it switches over to estimation, which is probably OK since users | |
-- won't notice if the value is off by a few values. | |
-- |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment