Skip to content

Instantly share code, notes, and snippets.

@storeman
Last active September 24, 2024 08:49
Show Gist options
  • Save storeman/cecb10bab249f43562cddabc1d9dd7c9 to your computer and use it in GitHub Desktop.
Save storeman/cecb10bab249f43562cddabc1d9dd7c9 to your computer and use it in GitHub Desktop.
PHP unserialize in Postgresql to json
/**
Decode a php serialized value to json. This function only supports basic
data types:
- arrays (will always become a json object)
- booleans
- integers
- floats
- strings
- NULL
The php_unserialize(text) function is a helper function which extracts the first value
found in the string and returns a ROW( vartype, varval, serializedlength)
The php_unserialize_to_json(text) function returns the json value extracted from
the serialized string.
*/
---
--- This function is the helper function
---
CREATE OR REPLACE FUNCTION public.php_unserialize(str text)
RETURNS json AS
$BODY$
DECLARE
vartype CHAR;
varlength INT;
jsonstr TEXT;
varcount INT;
jsonval JSONB;
arrkey JSON;
arrval JSON;
-- String length of the serialized data
serialized_string_length INT;
BEGIN
CASE substring(str, 1, 1)
WHEN 'a' THEN -- array
-- init object
jsonval := '{}'::jsonb;
-- remove the "a" and ":" characters
str := substring(str, 3);
-- Detect number of values in array
varlength := substring(str, 1, position(':' IN str) - 1)::INT;
-- Base size of array is 5 (a:[size]:{})
serialized_string_length := 5 + char_length(varlength::TEXT);
-- If no values, return empty object, as this always returns objects
IF varlength = 0 THEN
return json_build_array('array', '{}'::JSON, serialized_string_length)::JSON;
END IF;
-- remove the array size and ":{"
str := substring(str, char_length(varlength::TEXT) + 3);
-- Find the number of variables specified
FOR varcount IN 1 .. varlength LOOP
-- Find the value of the key and remove it from base string
arrkey := php_unserialize(str);
str := substring(str, (arrkey->>2)::INT + 1);
-- Find the value of the value and remove it from base string
arrval := php_unserialize(str);
str := substring(str, (arrval->>2)::INT + 1);
serialized_string_length := serialized_string_length + (arrkey->>2)::INT + (arrval->>2)::INT;
-- Append value
jsonval := jsonval || jsonb_build_object(arrkey->>1, arrval->1);
END LOOP;
return json_build_array('array', jsonval, serialized_string_length);
WHEN 'b' THEN -- boolean
return json_build_array('bool',(CASE substring(str, 3, 1) WHEN '1' THEN TRUE ELSE FALSE END)::BOOL, 4);
WHEN 'd' THEN -- float
return json_build_array('float', substring(str, 3, position(';' IN str) - 3)::DOUBLE PRECISION, position(';' IN str));
WHEN 'i' THEN -- int
return json_build_array('int', substring(str, 3, position(';' IN str) - 3)::INT, position(';' IN str));
WHEN 'N' THEN -- null
return json_build_array('null', NULL, 2);
WHEN 's' THEN -- string
varlength := regexp_replace(substring(str, 3, position(':' IN str)), '[^0-9]+', '', 'g')::INT;
return json_build_array('string', substring(str, char_length(varlength::TEXT) + 5, varlength)::TEXT, position(';' IN str));
ELSE
RAISE EXCEPTION 'Unable to decode serialized value, unsupported type: %', substr(str, 1, 1);
END CASE;
END;
$BODY$
LANGUAGE plpgsql IMMUTABLE
COST 10;
---
--- The main function
---
CREATE OR REPLACE FUNCTION public.php_unserialize_to_json(str text)
RETURNS json AS
$BODY$
DECLARE
varlength INT;
BEGIN
CASE substring(str, 1, 1)
WHEN 'a' THEN
return php_unserialize(str)->1;
WHEN 'b' THEN
return php_unserialize(str)->1;
WHEN 'd' THEN
return php_unserialize(str)->1;
WHEN 'i' THEN
return php_unserialize(str)->1;
WHEN 'N' THEN
return php_unserialize(str)->1;
WHEN 's' THEN
return php_unserialize(str)->1;
ELSE
RETURN NULL;
END CASE;
END;
$BODY$
LANGUAGE plpgsql IMMUTABLE
COST 10;
CREATE OR REPLACE FUNCTION public.test_php_unserialize_to_json()
RETURNS void AS
$BODY$
DECLARE
BEGIN
IF
php_unserialize_to_json('N;')::TEXT
<> 'null'
THEN
RAISE EXCEPTION 'Expected null, got "%"', php_unserialize_to_json('N;')::TEXT;
END IF;
IF
php_unserialize_to_json('b:1;')::TEXT
<> 'true'
THEN
RAISE EXCEPTION 'Expected true, got "%"', php_unserialize_to_json('b:1;')::TEXT;
END IF;
IF
php_unserialize_to_json('b:0;')::TEXT
<> 'false'
THEN
RAISE EXCEPTION 'Expected false, got "%"', php_unserialize_to_json('b:0;')::TEXT;
END IF;
IF
php_unserialize_to_json('i:1337;')::TEXT
<> '1337'
THEN
RAISE EXCEPTION 'An integer 1337, got "%"', php_unserialize_to_json('i:1337;')::TEXT;
END IF;
IF
php_unserialize_to_json('d:1.337;')::TEXT
<> '1.337'
THEN
RAISE EXCEPTION 'An double 1.337, got "%"', php_unserialize_to_json('d:1.337;')::TEXT;
END IF;
IF
php_unserialize_to_json('s:0:"";')::TEXT
<> '""'
THEN
RAISE EXCEPTION 'An empty string, got "%"', php_unserialize_to_json('s:0:"";')::TEXT;
END IF;
IF
php_unserialize_to_json('s:8:"a string";')::TEXT
<> '"a string"'
THEN
RAISE EXCEPTION 'Expected value "a string", got "%"', php_unserialize_to_json('s:8:"a string";')::TEXT;
END IF;
IF
(php_unserialize_to_json('s:15:"a longer string";'))::TEXT
<> '"a longer string"'
THEN
RAISE EXCEPTION 'Expected value "a longer string", got "%"', php_unserialize_to_json('s:15:"a longer string";')::TEXT;
END IF;
IF
(php_unserialize_to_json('a:0:{}'))::TEXT
<> '{}'
THEN
RAISE EXCEPTION 'Expected empty array, got "%"', php_unserialize_to_json('a:0:{}')::TEXT;
END IF;
IF
(php_unserialize_to_json('a:1:{i:0;s:5:"array";}'))::TEXT
<> ('{"0": "array"}'::JSON)::TEXT
THEN
RAISE EXCEPTION 'Expected array with one value, got "%"', php_unserialize_to_json('a:1:{i:0;s:5:"array";}')::TEXT;
END IF;
IF
(php_unserialize_to_json('a:2:{i:0;i:123;i:1;b:1;}'))::TEXT
<> ('{"0": 123, "1": true}'::JSON)::TEXT
THEN
RAISE EXCEPTION 'Expected array with one value, got "%"', php_unserialize_to_json('a:2:{i:0;i:123;i:1;b:1;}')::TEXT;
END IF;
IF
(php_unserialize_to_json('a:1:{i:0;i:123;}'))::TEXT
<> ('{"0": 123}'::JSON)::TEXT
THEN
RAISE EXCEPTION 'Expected array with one value, got "%"', php_unserialize_to_json('a:1:{i:0;i:123;}')::TEXT;
END IF;
IF
php_unserialize_to_json('a:5:{i:0;d:1;s:1:"k";a:3:{i:0;s:1:"a";i:1;s:1:"b";s:3:"sub";a:3:{i:0;s:1:"a";i:1;s:1:"b";i:2;s:1:"c";}}i:1;N;i:2;b:1;i:3;b:0;}')::TEXT
<> ('{"0": 1, "1": null, "2": true, "3": false, "k": {"0": "a", "1": "b", "sub": {"0": "a", "1": "b", "2": "c"}}}'::JSON)::TEXT
THEN
RAISE EXCEPTION 'Unexpected complex value, got: "%"', php_unserialize_to_json('a:5:{i:0;d:1;s:1:"k";a:3:{i:0;s:1:"a";i:1;s:1:"b";s:3:"sub";a:3:{i:0;s:1:"a";i:1;s:1:"b";i:2;s:1:"c";}}i:1;N;i:2;b:1;i:3;b:0;}')::TEXT;
END IF;
-- SELECT php_unserialize_to_json('a:2:{i:0;d:1;s:1:"k";a:2:{i:0;s:1:"a";i:1;s:1:"b";}}')::TEXT;
END;
$BODY$
LANGUAGE plpgsql IMMUTABLE
COST 10;
SELECT public.test_php_unserialize_to_json();
DROP FUNCTION public.test_php_unserialize_to_json();
@Ciloe
Copy link

Ciloe commented Jul 23, 2019

In function php_unserialize replace last CASE to

   WHEN 's' THEN -- string
      varlength := substring(str, 3, position(':' IN substring(str, 3)) - 1)::INT;
      return json_build_array('string', substring(str, char_length(varlength::TEXT) + 5, varlength)::TEXT, position(';' IN str));
    ELSE

This correct the length string when more than 9

@storeman
Copy link
Author

@Ciloe

Thanks, fair point. I updated the gist.

@galiganu
Copy link

Hi
It fails to unserialize a simple array:

SELECT php_unserialize_to_json('a:1:{s:3:"abc";s:10:"abcde12345";}')::TEXT

Output is:

{"abc": "\""}

Looks like arrays with string keys are getting messed up.

Thanks,
gali

@roldan
Copy link

roldan commented Feb 4, 2022

Hi @storeman ,

thanks for this solution! but I noticed that this works only with arrays with items length < 10. For example (using @galiganu example) :

SELECT php_unserialize_to_json('a:1:{s:3:"abc";s:10:"abcde12345"}');
 php_unserialize_to_json 
-------------------------
 {"abc": "\""}
(1 row)

This is not working as expected, but if I change "10" to "9" it works better:

SELECT php_unserialize_to_json('a:1:{s:3:"abc";s:9:"abcde12345"}');
 php_unserialize_to_json 
-------------------------
 {"abc": "abcde1234"}
(1 row)

So I replaced this:

WHEN 's' THEN -- string
      varlength := substring(str, 3, position(':' IN str) - 1)::INT;

with this (removed the "-1" and added a 'regexp_replace' to get only numbers):

WHEN 's' THEN -- string
      varlength := regexp_replace(substring(str, 3, position(':' IN str)), '[^0-9]+', '', 'g')::INT;

and now works as expected:

SELECT php_unserialize_to_json('a:1:{s:3:"abc";s:10:"abcde12345";}')::TEXT
;
 php_unserialize_to_json 
-------------------------
 {"abc": "abcde12345"}
(1 row)

@storeman
Copy link
Author

storeman commented Feb 11, 2022

Thanks @roldan, @galiganu for letting me know. There were also some issues with the types of integers, floats, bools and nulls. These are now correctly returned in the json. I also added a test-function which runs and deletes after valid testing.

@synchrone
Copy link

Line #50
return json_build_array('array', '{}'::TEXT[], serialized_string_length)::JSON;
will return [] in the end (at least for me on postgres13), and that will break functions expecting an object and not an array

@storeman
Copy link
Author

@synchrone I understand, a consistent return type is better. I changed it and updated the test.

@bluengreen
Copy link

Awesome function! This is a life saver migrating wordpress data into pg. I ran into a case where it fails for strings larger than 99 chars. I changed the function to this for strings position(':"' IN str)-1 the :" is a better stop I think. These were strings that had "https://" in them as well.

    WHEN 's' THEN -- string
      varlength := regexp_replace(substring(str, 3, position(':"' IN str)-1), '[^0-9]+', '', 'g')::INT;

@lucasmatiasfrias
Copy link

lucasmatiasfrias commented Jul 8, 2022

Hi guys i am trying to parse a json that has html content and i get the following error:
select php_unserialize_to_json('a:4:{s:7:"content";s:331:"<p dir="ltr" style="text-align:left;">Buenas noches, mi nombre es Ovando Yésica. Estoy en mi primer año de la carrera de LGTI. Me interese por esta carrera por que hice un curso en operación de computadoras y me despertó la curiosidad de aprender más sobre la industria de la tecnología. Les deseo buena cursada a todos!!</p>";s:14:"pathnamehashes";a:0:{}s:12:"discussionid";i:327611;s:13:"triggeredfrom";s:20:"forum_add_discussion";}')

ERROR: Unable to decode serialized value, unsupported type: "
CONTEXT: función PL/pgSQL php_unserialize(text) en la línea 66 en RAISE
función PL/pgSQL php_unserialize(text) en la línea 38 en asignación
función PL/pgSQL php_unserialize_to_json(text) en la línea 7 en RETURN
SQL state: P0001

Can you help me? Thanks

@Ciloe
Copy link

Ciloe commented Jul 9, 2022

Yes, you need to escape this char " in your HTML : <p dir="ltr"

@mikluxa
Copy link

mikluxa commented Oct 27, 2022

select substring((regexp_matches(db_fieldname,'("\d+")','g'))[1] from '"(\d+)"') from db_tablename

@aburgosc
Copy link

aburgosc commented Mar 7, 2024

I modded your gist for support Objects (stdClass) and strings separated by ";" ("text1;text2;text3;text4")

      `CREATE OR REPLACE FUNCTION public.php_unserialize(str text)
        RETURNS json AS
      $BODY$
      DECLARE
        vartype CHAR;
        varlength INT;
        jsonstr TEXT;
        varcount INT;
        jsonval JSONB;
        arrkey JSON;
        arrval JSON;
        -- String length of the serialized data
        serialized_string_length INT;
      BEGIN
        CASE substring(str, 1, 1)
          WHEN 'a' THEN -- array
            -- init object
            jsonval := '{}'::jsonb;
            
            -- remove the "a" and ":" characters
            str := substring(str, 3);
            
            -- Detect number of values in array
            varlength := substring(str, 1, position(':' IN str) - 1)::INT;
            
            -- Base size of array is 5 (a:[size]:{})
            serialized_string_length := 5 + char_length(varlength::TEXT);
            
            -- If no values, return empty object, as this always returns objects
            IF varlength = 0 THEN 
              return json_build_array('array', '{}'::JSON, serialized_string_length)::JSON;
            END IF;
      
            -- remove the array size and ":{"
            str := substring(str, char_length(varlength::TEXT) + 3);
            
            -- Find the number of variables specified
            FOR varcount IN 1 .. varlength LOOP
              -- Find the value of the key and remove it from base string
              arrkey := php_unserialize(str);
              str := substring(str, (arrkey->>2)::INT + 1);
      
              -- Find the value of the value and remove it from base string
              arrval := php_unserialize(str);
              str := substring(str, (arrval->>2)::INT + 1);
      
              serialized_string_length := serialized_string_length + (arrkey->>2)::INT + (arrval->>2)::INT;
      
              -- Append value
              jsonval := jsonval || jsonb_build_object(arrkey->>1, arrval->1);
            END LOOP;
            
            return json_build_array('array', jsonval, serialized_string_length);
          WHEN 'O' THEN -- Object
            -- init object
            jsonval := '{}'::jsonb;
            
            -- remove the "a" and ":" characters
            str := substring(str, 16);
            
            -- Detect number of values in array
            varlength := substring(str, 1, position(':' IN str) - 1)::INT;
            
            -- Base size of array is 5 (a:[size]:{})
            serialized_string_length := 5 + char_length(varlength::TEXT);
            
            -- If no values, return empty object, as this always returns objects
            IF varlength = 0 THEN 
              return json_build_array('array', '{}'::JSON, serialized_string_length)::JSON;
            END IF;
      
            -- remove the array size and ":{"
            str := substring(str, char_length(varlength::TEXT) + 3);
            
            -- Find the number of variables specified
            FOR varcount IN 1 .. varlength LOOP
              -- Find the value of the key and remove it from base string
              arrkey := php_unserialize(str);
              str := substring(str, (arrkey->>2)::INT + 1);
      
              -- Find the value of the value and remove it from base string
              arrval := php_unserialize(str);
              str := substring(str, (arrval->>2)::INT + 1);
      
              serialized_string_length := serialized_string_length + (arrkey->>2)::INT + (arrval->>2)::INT;
      
              -- Append value
              jsonval := jsonval || jsonb_build_object(arrkey->>1, arrval->1);
            END LOOP;
            
            return json_build_array('array', jsonval, serialized_string_length);
          WHEN 'b' THEN -- boolean
            return json_build_array('bool',(CASE substring(str, 3, 1) WHEN '1' THEN TRUE ELSE FALSE END)::BOOL, 4);
          WHEN 'd' THEN -- float
            return json_build_array('float', substring(str, 3, position(';' IN str) - 3)::DOUBLE PRECISION, position(';' IN str));
          WHEN 'i' THEN -- int
            return json_build_array('int', substring(str, 3, position(';' IN str) - 3)::INT, position(';' IN str));
          WHEN 'N' THEN -- null
            return json_build_array('null', NULL, 2);
          WHEN 's' THEN -- string
            varlength := substring(str, 3, position(':' IN substring(str, 3)) - 1)::INT;
            return json_build_array('string', substring(str, char_length(varlength::TEXT) + 5, varlength)::TEXT, position('";' IN str) + 1);
          ELSE
            RAISE EXCEPTION 'Unable to decode serialized value, unsupported type: %', substr(str, 1, 1);
        END CASE;
      END;
      $BODY$
        LANGUAGE plpgsql IMMUTABLE
        COST 10;
        
      ---
      --- The main function
      ---
      CREATE OR REPLACE FUNCTION public.php_unserialize_to_json(str text)
        RETURNS json AS
      $BODY$
      DECLARE
        varlength INT;
      BEGIN
        CASE substring(str, 1, 1)
          WHEN 'a' THEN
            return php_unserialize(str)->1;
          WHEN 'O' THEN
            return php_unserialize(str)->1;
          WHEN 'b' THEN
            return php_unserialize(str)->1;
          WHEN 'd' THEN
            return php_unserialize(str)->1;
          WHEN 'i' THEN
            return php_unserialize(str)->1;
          WHEN 'N' THEN
            return php_unserialize(str)->1;
          WHEN 's' THEN
            return php_unserialize(str)->1;
          ELSE
            RETURN NULL;
        END CASE;
      END;
      $BODY$
        LANGUAGE plpgsql IMMUTABLE
        COST 10;`

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment