Skip to content

Instantly share code, notes, and snippets.

@storeman
Last active September 24, 2024 08:49
Show Gist options
  • Save storeman/cecb10bab249f43562cddabc1d9dd7c9 to your computer and use it in GitHub Desktop.
Save storeman/cecb10bab249f43562cddabc1d9dd7c9 to your computer and use it in GitHub Desktop.
PHP unserialize in Postgresql to json
/**
Decode a php serialized value to json. This function only supports basic
data types:
- arrays (will always become a json object)
- booleans
- integers
- floats
- strings
- NULL
The php_unserialize(text) function is a helper function which extracts the first value
found in the string and returns a ROW( vartype, varval, serializedlength)
The php_unserialize_to_json(text) function returns the json value extracted from
the serialized string.
*/
---
--- This function is the helper function
---
CREATE OR REPLACE FUNCTION public.php_unserialize(str text)
RETURNS json AS
$BODY$
DECLARE
vartype CHAR;
varlength INT;
jsonstr TEXT;
varcount INT;
jsonval JSONB;
arrkey JSON;
arrval JSON;
-- String length of the serialized data
serialized_string_length INT;
BEGIN
CASE substring(str, 1, 1)
WHEN 'a' THEN -- array
-- init object
jsonval := '{}'::jsonb;
-- remove the "a" and ":" characters
str := substring(str, 3);
-- Detect number of values in array
varlength := substring(str, 1, position(':' IN str) - 1)::INT;
-- Base size of array is 5 (a:[size]:{})
serialized_string_length := 5 + char_length(varlength::TEXT);
-- If no values, return empty object, as this always returns objects
IF varlength = 0 THEN
return json_build_array('array', '{}'::JSON, serialized_string_length)::JSON;
END IF;
-- remove the array size and ":{"
str := substring(str, char_length(varlength::TEXT) + 3);
-- Find the number of variables specified
FOR varcount IN 1 .. varlength LOOP
-- Find the value of the key and remove it from base string
arrkey := php_unserialize(str);
str := substring(str, (arrkey->>2)::INT + 1);
-- Find the value of the value and remove it from base string
arrval := php_unserialize(str);
str := substring(str, (arrval->>2)::INT + 1);
serialized_string_length := serialized_string_length + (arrkey->>2)::INT + (arrval->>2)::INT;
-- Append value
jsonval := jsonval || jsonb_build_object(arrkey->>1, arrval->1);
END LOOP;
return json_build_array('array', jsonval, serialized_string_length);
WHEN 'b' THEN -- boolean
return json_build_array('bool',(CASE substring(str, 3, 1) WHEN '1' THEN TRUE ELSE FALSE END)::BOOL, 4);
WHEN 'd' THEN -- float
return json_build_array('float', substring(str, 3, position(';' IN str) - 3)::DOUBLE PRECISION, position(';' IN str));
WHEN 'i' THEN -- int
return json_build_array('int', substring(str, 3, position(';' IN str) - 3)::INT, position(';' IN str));
WHEN 'N' THEN -- null
return json_build_array('null', NULL, 2);
WHEN 's' THEN -- string
varlength := regexp_replace(substring(str, 3, position(':' IN str)), '[^0-9]+', '', 'g')::INT;
return json_build_array('string', substring(str, char_length(varlength::TEXT) + 5, varlength)::TEXT, position(';' IN str));
ELSE
RAISE EXCEPTION 'Unable to decode serialized value, unsupported type: %', substr(str, 1, 1);
END CASE;
END;
$BODY$
LANGUAGE plpgsql IMMUTABLE
COST 10;
---
--- The main function
---
CREATE OR REPLACE FUNCTION public.php_unserialize_to_json(str text)
RETURNS json AS
$BODY$
DECLARE
varlength INT;
BEGIN
CASE substring(str, 1, 1)
WHEN 'a' THEN
return php_unserialize(str)->1;
WHEN 'b' THEN
return php_unserialize(str)->1;
WHEN 'd' THEN
return php_unserialize(str)->1;
WHEN 'i' THEN
return php_unserialize(str)->1;
WHEN 'N' THEN
return php_unserialize(str)->1;
WHEN 's' THEN
return php_unserialize(str)->1;
ELSE
RETURN NULL;
END CASE;
END;
$BODY$
LANGUAGE plpgsql IMMUTABLE
COST 10;
CREATE OR REPLACE FUNCTION public.test_php_unserialize_to_json()
RETURNS void AS
$BODY$
DECLARE
BEGIN
IF
php_unserialize_to_json('N;')::TEXT
<> 'null'
THEN
RAISE EXCEPTION 'Expected null, got "%"', php_unserialize_to_json('N;')::TEXT;
END IF;
IF
php_unserialize_to_json('b:1;')::TEXT
<> 'true'
THEN
RAISE EXCEPTION 'Expected true, got "%"', php_unserialize_to_json('b:1;')::TEXT;
END IF;
IF
php_unserialize_to_json('b:0;')::TEXT
<> 'false'
THEN
RAISE EXCEPTION 'Expected false, got "%"', php_unserialize_to_json('b:0;')::TEXT;
END IF;
IF
php_unserialize_to_json('i:1337;')::TEXT
<> '1337'
THEN
RAISE EXCEPTION 'An integer 1337, got "%"', php_unserialize_to_json('i:1337;')::TEXT;
END IF;
IF
php_unserialize_to_json('d:1.337;')::TEXT
<> '1.337'
THEN
RAISE EXCEPTION 'An double 1.337, got "%"', php_unserialize_to_json('d:1.337;')::TEXT;
END IF;
IF
php_unserialize_to_json('s:0:"";')::TEXT
<> '""'
THEN
RAISE EXCEPTION 'An empty string, got "%"', php_unserialize_to_json('s:0:"";')::TEXT;
END IF;
IF
php_unserialize_to_json('s:8:"a string";')::TEXT
<> '"a string"'
THEN
RAISE EXCEPTION 'Expected value "a string", got "%"', php_unserialize_to_json('s:8:"a string";')::TEXT;
END IF;
IF
(php_unserialize_to_json('s:15:"a longer string";'))::TEXT
<> '"a longer string"'
THEN
RAISE EXCEPTION 'Expected value "a longer string", got "%"', php_unserialize_to_json('s:15:"a longer string";')::TEXT;
END IF;
IF
(php_unserialize_to_json('a:0:{}'))::TEXT
<> '{}'
THEN
RAISE EXCEPTION 'Expected empty array, got "%"', php_unserialize_to_json('a:0:{}')::TEXT;
END IF;
IF
(php_unserialize_to_json('a:1:{i:0;s:5:"array";}'))::TEXT
<> ('{"0": "array"}'::JSON)::TEXT
THEN
RAISE EXCEPTION 'Expected array with one value, got "%"', php_unserialize_to_json('a:1:{i:0;s:5:"array";}')::TEXT;
END IF;
IF
(php_unserialize_to_json('a:2:{i:0;i:123;i:1;b:1;}'))::TEXT
<> ('{"0": 123, "1": true}'::JSON)::TEXT
THEN
RAISE EXCEPTION 'Expected array with one value, got "%"', php_unserialize_to_json('a:2:{i:0;i:123;i:1;b:1;}')::TEXT;
END IF;
IF
(php_unserialize_to_json('a:1:{i:0;i:123;}'))::TEXT
<> ('{"0": 123}'::JSON)::TEXT
THEN
RAISE EXCEPTION 'Expected array with one value, got "%"', php_unserialize_to_json('a:1:{i:0;i:123;}')::TEXT;
END IF;
IF
php_unserialize_to_json('a:5:{i:0;d:1;s:1:"k";a:3:{i:0;s:1:"a";i:1;s:1:"b";s:3:"sub";a:3:{i:0;s:1:"a";i:1;s:1:"b";i:2;s:1:"c";}}i:1;N;i:2;b:1;i:3;b:0;}')::TEXT
<> ('{"0": 1, "1": null, "2": true, "3": false, "k": {"0": "a", "1": "b", "sub": {"0": "a", "1": "b", "2": "c"}}}'::JSON)::TEXT
THEN
RAISE EXCEPTION 'Unexpected complex value, got: "%"', php_unserialize_to_json('a:5:{i:0;d:1;s:1:"k";a:3:{i:0;s:1:"a";i:1;s:1:"b";s:3:"sub";a:3:{i:0;s:1:"a";i:1;s:1:"b";i:2;s:1:"c";}}i:1;N;i:2;b:1;i:3;b:0;}')::TEXT;
END IF;
-- SELECT php_unserialize_to_json('a:2:{i:0;d:1;s:1:"k";a:2:{i:0;s:1:"a";i:1;s:1:"b";}}')::TEXT;
END;
$BODY$
LANGUAGE plpgsql IMMUTABLE
COST 10;
SELECT public.test_php_unserialize_to_json();
DROP FUNCTION public.test_php_unserialize_to_json();
@aburgosc
Copy link

aburgosc commented Mar 7, 2024

I modded your gist for support Objects (stdClass) and strings separated by ";" ("text1;text2;text3;text4")

      `CREATE OR REPLACE FUNCTION public.php_unserialize(str text)
        RETURNS json AS
      $BODY$
      DECLARE
        vartype CHAR;
        varlength INT;
        jsonstr TEXT;
        varcount INT;
        jsonval JSONB;
        arrkey JSON;
        arrval JSON;
        -- String length of the serialized data
        serialized_string_length INT;
      BEGIN
        CASE substring(str, 1, 1)
          WHEN 'a' THEN -- array
            -- init object
            jsonval := '{}'::jsonb;
            
            -- remove the "a" and ":" characters
            str := substring(str, 3);
            
            -- Detect number of values in array
            varlength := substring(str, 1, position(':' IN str) - 1)::INT;
            
            -- Base size of array is 5 (a:[size]:{})
            serialized_string_length := 5 + char_length(varlength::TEXT);
            
            -- If no values, return empty object, as this always returns objects
            IF varlength = 0 THEN 
              return json_build_array('array', '{}'::JSON, serialized_string_length)::JSON;
            END IF;
      
            -- remove the array size and ":{"
            str := substring(str, char_length(varlength::TEXT) + 3);
            
            -- Find the number of variables specified
            FOR varcount IN 1 .. varlength LOOP
              -- Find the value of the key and remove it from base string
              arrkey := php_unserialize(str);
              str := substring(str, (arrkey->>2)::INT + 1);
      
              -- Find the value of the value and remove it from base string
              arrval := php_unserialize(str);
              str := substring(str, (arrval->>2)::INT + 1);
      
              serialized_string_length := serialized_string_length + (arrkey->>2)::INT + (arrval->>2)::INT;
      
              -- Append value
              jsonval := jsonval || jsonb_build_object(arrkey->>1, arrval->1);
            END LOOP;
            
            return json_build_array('array', jsonval, serialized_string_length);
          WHEN 'O' THEN -- Object
            -- init object
            jsonval := '{}'::jsonb;
            
            -- remove the "a" and ":" characters
            str := substring(str, 16);
            
            -- Detect number of values in array
            varlength := substring(str, 1, position(':' IN str) - 1)::INT;
            
            -- Base size of array is 5 (a:[size]:{})
            serialized_string_length := 5 + char_length(varlength::TEXT);
            
            -- If no values, return empty object, as this always returns objects
            IF varlength = 0 THEN 
              return json_build_array('array', '{}'::JSON, serialized_string_length)::JSON;
            END IF;
      
            -- remove the array size and ":{"
            str := substring(str, char_length(varlength::TEXT) + 3);
            
            -- Find the number of variables specified
            FOR varcount IN 1 .. varlength LOOP
              -- Find the value of the key and remove it from base string
              arrkey := php_unserialize(str);
              str := substring(str, (arrkey->>2)::INT + 1);
      
              -- Find the value of the value and remove it from base string
              arrval := php_unserialize(str);
              str := substring(str, (arrval->>2)::INT + 1);
      
              serialized_string_length := serialized_string_length + (arrkey->>2)::INT + (arrval->>2)::INT;
      
              -- Append value
              jsonval := jsonval || jsonb_build_object(arrkey->>1, arrval->1);
            END LOOP;
            
            return json_build_array('array', jsonval, serialized_string_length);
          WHEN 'b' THEN -- boolean
            return json_build_array('bool',(CASE substring(str, 3, 1) WHEN '1' THEN TRUE ELSE FALSE END)::BOOL, 4);
          WHEN 'd' THEN -- float
            return json_build_array('float', substring(str, 3, position(';' IN str) - 3)::DOUBLE PRECISION, position(';' IN str));
          WHEN 'i' THEN -- int
            return json_build_array('int', substring(str, 3, position(';' IN str) - 3)::INT, position(';' IN str));
          WHEN 'N' THEN -- null
            return json_build_array('null', NULL, 2);
          WHEN 's' THEN -- string
            varlength := substring(str, 3, position(':' IN substring(str, 3)) - 1)::INT;
            return json_build_array('string', substring(str, char_length(varlength::TEXT) + 5, varlength)::TEXT, position('";' IN str) + 1);
          ELSE
            RAISE EXCEPTION 'Unable to decode serialized value, unsupported type: %', substr(str, 1, 1);
        END CASE;
      END;
      $BODY$
        LANGUAGE plpgsql IMMUTABLE
        COST 10;
        
      ---
      --- The main function
      ---
      CREATE OR REPLACE FUNCTION public.php_unserialize_to_json(str text)
        RETURNS json AS
      $BODY$
      DECLARE
        varlength INT;
      BEGIN
        CASE substring(str, 1, 1)
          WHEN 'a' THEN
            return php_unserialize(str)->1;
          WHEN 'O' THEN
            return php_unserialize(str)->1;
          WHEN 'b' THEN
            return php_unserialize(str)->1;
          WHEN 'd' THEN
            return php_unserialize(str)->1;
          WHEN 'i' THEN
            return php_unserialize(str)->1;
          WHEN 'N' THEN
            return php_unserialize(str)->1;
          WHEN 's' THEN
            return php_unserialize(str)->1;
          ELSE
            RETURN NULL;
        END CASE;
      END;
      $BODY$
        LANGUAGE plpgsql IMMUTABLE
        COST 10;`

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment