Created
January 19, 2021 16:49
-
-
Save tyrauber/5b6d85d8af8c80659f74af2aa295a7d7 to your computer and use it in GitHub Desktop.
numPadEnc && numPadDec : Numeric Compression in Postgres
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
CREATE OR REPLACE FUNCTION numPadEnc( | |
IN source NUMERIC DEFAULT (extract(epoch from now())*1000000)::bigint, | |
IN prefix TEXT DEFAULT '', | |
IN pad TEXT DEFAULT '123456789ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz', | |
IN seed INTEGER DEFAULT floor(random() * 10 + 1)::int | |
) RETURNS TEXT AS $$ | |
DECLARE | |
calc TEXT := source::text; | |
output TEXT := prefix; | |
chr TEXT := ''; | |
n TEXT := ''; | |
msg TEXT :=''; | |
BEGIN | |
WHILE LENGTH(calc) > 0 LOOP | |
n := n || SUBSTRING(calc,1,1); | |
calc := SUBSTRING(calc,2,length(calc)); | |
IF (n='0') OR ((n || SUBSTRING(calc,1,1))::integer > LENGTH(pad)) OR (length(calc)=0) THEN | |
chr := SUBSTRING(pad, n::integer, 1); | |
output := output || COALESCE(NULLIF(chr, ''), '0'); | |
n := ''; | |
end IF; | |
IF (seed = LENGTH(calc) AND seed != 0) THEN | |
output := output||'_'; | |
END IF; | |
END LOOP; | |
RETURN output; | |
END; | |
$$ LANGUAGE plpgsql VOLATILE; | |
CREATE OR REPLACE FUNCTION numPadDec( | |
IN source TEXT DEFAULT '', | |
IN prefix TEXT DEFAULT '', | |
IN pad TEXT DEFAULT '123456789ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz' | |
) RETURNS TEXT AS $$ | |
DECLARE | |
calc TEXT := REGEXP_REPLACE(REGEXP_REPLACE(source, prefix, ''), '_', '', 'g'); | |
output TEXT := ''; | |
chr TEXT := ''; | |
n TEXT := ''; | |
msg TEXT :=''; | |
BEGIN | |
FOREACH n IN ARRAY regexp_split_to_array(calc, '') | |
LOOP | |
output := output || strpos(pad, n); | |
END LOOP; | |
RETURN output; | |
END; | |
$$ LANGUAGE plpgsql VOLATILE; | |
create or replace function print(IN ext TEXT, INOUT text TEXT) returns text as $$ | |
begin | |
raise notice '% %', ext,text; | |
end | |
$$ language plpgsql; | |
CREATE OR REPLACE FUNCTION numPadTest( | |
IN source TEXT DEFAULT '0', | |
IN prefix TEXT DEFAULT '', | |
IN pad TEXT DEFAULT '123456789ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz', | |
IN seed INTEGER DEFAULT floor(random() * 10 + 1)::int | |
) RETURNS TEXT AS $$ | |
DECLARE | |
calc NUMERIC := source::numeric; | |
msg TEXT :=''; | |
enc TEXT := ''; | |
dec TEXT := ''; | |
BEGIN | |
WHILE (true) LOOP | |
calc := calc::numeric+1; | |
enc := numPadEnc(calc::numeric, prefix, pad, seed); | |
dec := numPadDec(enc, prefix, pad); | |
IF (calc::text != dec) THEN | |
msg := print('source '|| calc || ' enc ' || enc || ' dec ' || dec, (calc::text = dec)::text ); | |
exit; | |
END IF; | |
END LOOP; | |
RETURN msg; | |
END | |
$$ LANGUAGE plpgsql VOLATILE; | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
numPadEnc && numPadDec
Numeric Compression in Postgres
Generate short unique letter / number strings, without having conflicts or having to hit the db,
by encrypting time since the epoch using a number pad, and a randomized seed.
Use numPadEnc() as a default to a column:
numPadEnc() takes a source numeric, or uses milliseconds since the epoch, for example (1611072681016452),
iterates through it, chunking numbers less than the pad's length and converting them using the number
as the index in the pad.
For example, with the default pad
123456789ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz
Note: Chunks cannot start with zero, so zero automatically gets converted to 0;
In order to prevent timing collisions, an underscore may be inserted at random within the output.
A prefix can be provided, for example "u_" for users, to make the ids globally unique across many tables.
The string can be decrypted back to the original.
Why not just use incrementing numerics? Because you expose the total number of records,
the order in which the records were produced, and at large numbers, the encrypted output can
be considerable shorter than the original numeric, up to 50%;
The test validates encrypted and decryption, by iterating through numbers.
Got a better approach for generating shorter unique letter/number strings? Let's hear it.