Last active
August 21, 2024 20:31
-
-
Save david-sanabria/0d3ff67eb56d2750502aed4186d6a4a7 to your computer and use it in GitHub Desktop.
Base62 Encode/Decode functions for PostgreSQL.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
/* | |
* This script will create base62_encode() and base62_decode() in the current schema of a postgresql database. | |
* Give it a star if you find it useful. | |
*/ | |
CREATE OR REPLACE FUNCTION base62_encode( long_number bigint ) | |
RETURNS text | |
AS $BODY$ | |
/* | |
* base62_encode() | |
* | |
* This function accepts a small or big number (base 10) and reduces its length into a string | |
* that is URI-safe using the upper and lower case 26-letter English alphabet | |
* as well as the numbers 0 - 9. The result is returned as a text string that can be decoded | |
* based to base10 using the base62_decode() function. | |
* | |
* You can find a handy explainer at https://helloacm.com/base62/ | |
* | |
* | |
* HISTORY | |
* 2018-03-13 david sanabria, office of systems integration | |
* - New function | |
* | |
*/ | |
declare | |
k_base constant integer := 62; | |
k_alphabet constant text[] := string_to_array( '0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz'::text, null); | |
v_return_text text := ''; | |
v_remainder integer; | |
v_interim bigint; | |
begin | |
v_interim := abs( long_number ); -- Negative Numbers (sign) are ignored | |
--Conversion Loop | |
loop | |
v_remainder := v_interim % k_base; | |
v_interim := v_interim / k_base; | |
v_return_text := ''|| k_alphabet[ (v_remainder + 1) ] || v_return_text ; | |
exit when v_interim <= 0; | |
end loop ; | |
return v_return_text; | |
end;$BODY$ | |
LANGUAGE plpgsql | |
immutable /* Makes no changes to data in tables */ | |
returns null ON NULL INPUT /* Don't bother to call if the value is NULL */ | |
SECURITY INVOKER /* No reason to use DEFINER for security */ | |
cost 5 /* A made up number. Any advice? */ | |
; | |
CREATE OR REPLACE FUNCTION base62_decode( encoded_text text ) | |
RETURNS bigint | |
AS $BODY$ | |
/* | |
* base62_decode() | |
* | |
* This function accepts a string that has been base62 encoded. Any characters that are not valid | |
* are simply ignored, so you can safely pass a formatted string and still get a valid result like you | |
* do with a UUID field. | |
* | |
* HISTORY | |
* 2018-03-13 david sanabria, office of systems integration | |
* - New function | |
* | |
*/ | |
declare | |
k_base constant integer := 62; | |
k_alphabet constant text := '0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz'; | |
v_encoded_arr text[]; | |
v_return_result bigint := 0; | |
v_interim bigint; | |
v_index integer; -- Pointer to input array | |
v_token text; | |
v_power integer := 0; -- reverse pointer, used for position exponent (e.g. 2^32) | |
begin | |
-- check for guard values | |
if encoded_text is null or length( encoded_text ) = 0 then | |
return null; | |
end if; | |
-- reverse the input string to make the exponent math simpler below | |
v_encoded_arr := string_to_array( reverse( encoded_text ) , null ); | |
--Conversion Loop | |
foreach v_token in array v_encoded_arr | |
loop | |
v_index := strpos( k_alphabet, v_token ); | |
if v_index = 0 then | |
raise notice 'Token ignored "%"', v_token; | |
--ignore invalid tokens, which allows formatted strings to be processed (e.g. '{abc-1Lg}') | |
else | |
v_return_result := v_return_result + ( ( v_index - 1) * pow( k_base, v_power) ); | |
v_power := 1 + v_power; --increment after each valid loop | |
end if; | |
end loop; | |
return v_return_result; | |
end;$BODY$ | |
LANGUAGE plpgsql | |
immutable /* Makes no changes to data in tables */ | |
returns null ON NULL INPUT /* Don't bother to call if the value is NULL */ | |
SECURITY INVOKER /* No reason to use DEFINER for security */ | |
cost 5 /* A made up number. Any advice? */ | |
; | |
/* | |
-- TEST Query | |
-- * Requires base62_decode() and base62_encode() | |
-- * Tested in PostgreSQL v9.6.7 | |
with arr as ( | |
select | |
62::integer as v_base | |
,73::bigint as v_test | |
,74400::bigint as v_test2 | |
,'JM0'::text as v_test2_b | |
,09::bigint as v_test3 | |
,999888777666::bigint as v_test4 | |
, string_to_array( '0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz'::text, null) as v_alphabet | |
) | |
select (v_test % v_base)::integer as test_mod | |
, (v_test / v_base)::bigint as test_div | |
,abs( v_test ) as test_abs | |
,v_alphabet[ (v_test % v_base) + 1 ] as test_arr | |
,cwsdoc.base62_encode( v_test2 ) as test_encode | |
,cwsdoc.base62_encode( null ) as test_encode2 | |
,cwsdoc.base62_encode( v_test4 ) as test_encode4 | |
,cwsdoc.base62_decode( cwsdoc.base62_encode( v_test ) ) as test_decode1 | |
,cwsdoc.base62_decode( cwsdoc.base62_encode( v_test2 ) ) as test_decode2 | |
,cwsdoc.base62_decode( cwsdoc.base62_encode( v_test3 ) ) as test_decode3 | |
,cwsdoc.base62_decode( cwsdoc.base62_encode( v_test4 ) ) as test_decode4 | |
,cwsdoc.base62_decode( null ) as test_decode5 | |
,cwsdoc.base62_decode( '{JM-0}' ) as test_decode6 | |
,arr.* | |
from arr; | |
*/ |
In my case, I'd like to encode a hash, larger than a bigint (which is 8 bytes AFAIK), so what would be the variation that encodes from a bytea
and decodes to a bytea
? I'm trying to move existing code in C++ that runs client side, to the server-side, that is used to convert arbitrarily long names into DB-specific ROLE names, which uses base62 to ensure unicity of the names per-DB (since ROLE names are cluster-wide).
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
I figured out the problem where some decodes are incorrect has to do with inlining
v_interim
in the source code. If you change the body of the else statement inbase62_decode
to include thev_interim
variable then it works without a problem:(and yes
select base62_encode(base62_decode('qltxDQbGP7')); -- returns qltxDQbGP7 correctly
)