Skip to content

Instantly share code, notes, and snippets.

@atamborrino
Created January 21, 2015 16:52
Show Gist options
  • Save atamborrino/5e464085bb130c095055 to your computer and use it in GitHub Desktop.
Save atamborrino/5e464085bb130c095055 to your computer and use it in GitHub Desktop.
PostgreSQL aggregation function for Algebird's HyperLogLog monoid (serialized as a bytea)
CREATE OR REPLACE FUNCTION sum_hll(a bytea, b bytea) RETURNS bytea as $$
DECLARE
local_result bytea = a;
BEGIN
IF get_byte(a, 0) <> get_byte(b, 0) OR get_byte(a, 1) <> get_byte(b, 1) THEN
RAISE EXCEPTION 'HLL ERROR: FIRST 2 BYTES OF HLLs ARE NOT EQUAL. CANNOT SUM.';
END IF;
IF length(a) <> length(b) THEN
RAISE EXCEPTION 'HLL ERROR: HLLs LENGTH ARE NOT EQUAL. CANNOT SUM.';
END IF;
FOR i IN 2..(length(a) - 1) LOOP
IF get_byte(a, i) < get_byte(b, i) THEN
local_result := set_byte(local_result, i, get_byte(b, i));
END IF;
END LOOP;
RETURN local_result;
END;
$$ LANGUAGE plpgsql STRICT IMMUTABLE;
CREATE AGGREGATE agg_sum_hll (bytea)(
SFUNC = sum_hll,
STYPE = bytea
);
@atamborrino
Copy link
Author

This version is extremely slow... must move to a C implementation

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment