Created
January 21, 2015 16:52
-
-
Save atamborrino/5e464085bb130c095055 to your computer and use it in GitHub Desktop.
PostgreSQL aggregation function for Algebird's HyperLogLog monoid (serialized as a bytea)
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
CREATE OR REPLACE FUNCTION sum_hll(a bytea, b bytea) RETURNS bytea as $$ | |
DECLARE | |
local_result bytea = a; | |
BEGIN | |
IF get_byte(a, 0) <> get_byte(b, 0) OR get_byte(a, 1) <> get_byte(b, 1) THEN | |
RAISE EXCEPTION 'HLL ERROR: FIRST 2 BYTES OF HLLs ARE NOT EQUAL. CANNOT SUM.'; | |
END IF; | |
IF length(a) <> length(b) THEN | |
RAISE EXCEPTION 'HLL ERROR: HLLs LENGTH ARE NOT EQUAL. CANNOT SUM.'; | |
END IF; | |
FOR i IN 2..(length(a) - 1) LOOP | |
IF get_byte(a, i) < get_byte(b, i) THEN | |
local_result := set_byte(local_result, i, get_byte(b, i)); | |
END IF; | |
END LOOP; | |
RETURN local_result; | |
END; | |
$$ LANGUAGE plpgsql STRICT IMMUTABLE; | |
CREATE AGGREGATE agg_sum_hll (bytea)( | |
SFUNC = sum_hll, | |
STYPE = bytea | |
); |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
This version is extremely slow... must move to a C implementation