-
-
Save storeman/cecb10bab249f43562cddabc1d9dd7c9 to your computer and use it in GitHub Desktop.
/** | |
Decode a php serialized value to json. This function only supports basic | |
data types: | |
- arrays (will always become a json object) | |
- booleans | |
- integers | |
- floats | |
- strings | |
- NULL | |
The php_unserialize(text) function is a helper function which extracts the first value | |
found in the string and returns a ROW( vartype, varval, serializedlength) | |
The php_unserialize_to_json(text) function returns the json value extracted from | |
the serialized string. | |
*/ | |
--- | |
--- This function is the helper function | |
--- | |
CREATE OR REPLACE FUNCTION public.php_unserialize(str text) | |
RETURNS json AS | |
$BODY$ | |
DECLARE | |
vartype CHAR; | |
varlength INT; | |
jsonstr TEXT; | |
varcount INT; | |
jsonval JSONB; | |
arrkey JSON; | |
arrval JSON; | |
-- String length of the serialized data | |
serialized_string_length INT; | |
BEGIN | |
CASE substring(str, 1, 1) | |
WHEN 'a' THEN -- array | |
-- init object | |
jsonval := '{}'::jsonb; | |
-- remove the "a" and ":" characters | |
str := substring(str, 3); | |
-- Detect number of values in array | |
varlength := substring(str, 1, position(':' IN str) - 1)::INT; | |
-- Base size of array is 5 (a:[size]:{}) | |
serialized_string_length := 5 + char_length(varlength::TEXT); | |
-- If no values, return empty object, as this always returns objects | |
IF varlength = 0 THEN | |
return json_build_array('array', '{}'::JSON, serialized_string_length)::JSON; | |
END IF; | |
-- remove the array size and ":{" | |
str := substring(str, char_length(varlength::TEXT) + 3); | |
-- Find the number of variables specified | |
FOR varcount IN 1 .. varlength LOOP | |
-- Find the value of the key and remove it from base string | |
arrkey := php_unserialize(str); | |
str := substring(str, (arrkey->>2)::INT + 1); | |
-- Find the value of the value and remove it from base string | |
arrval := php_unserialize(str); | |
str := substring(str, (arrval->>2)::INT + 1); | |
serialized_string_length := serialized_string_length + (arrkey->>2)::INT + (arrval->>2)::INT; | |
-- Append value | |
jsonval := jsonval || jsonb_build_object(arrkey->>1, arrval->1); | |
END LOOP; | |
return json_build_array('array', jsonval, serialized_string_length); | |
WHEN 'b' THEN -- boolean | |
return json_build_array('bool',(CASE substring(str, 3, 1) WHEN '1' THEN TRUE ELSE FALSE END)::BOOL, 4); | |
WHEN 'd' THEN -- float | |
return json_build_array('float', substring(str, 3, position(';' IN str) - 3)::DOUBLE PRECISION, position(';' IN str)); | |
WHEN 'i' THEN -- int | |
return json_build_array('int', substring(str, 3, position(';' IN str) - 3)::INT, position(';' IN str)); | |
WHEN 'N' THEN -- null | |
return json_build_array('null', NULL, 2); | |
WHEN 's' THEN -- string | |
varlength := regexp_replace(substring(str, 3, position(':' IN str)), '[^0-9]+', '', 'g')::INT; | |
return json_build_array('string', substring(str, char_length(varlength::TEXT) + 5, varlength)::TEXT, position(';' IN str)); | |
ELSE | |
RAISE EXCEPTION 'Unable to decode serialized value, unsupported type: %', substr(str, 1, 1); | |
END CASE; | |
END; | |
$BODY$ | |
LANGUAGE plpgsql IMMUTABLE | |
COST 10; | |
--- | |
--- The main function | |
--- | |
CREATE OR REPLACE FUNCTION public.php_unserialize_to_json(str text) | |
RETURNS json AS | |
$BODY$ | |
DECLARE | |
varlength INT; | |
BEGIN | |
CASE substring(str, 1, 1) | |
WHEN 'a' THEN | |
return php_unserialize(str)->1; | |
WHEN 'b' THEN | |
return php_unserialize(str)->1; | |
WHEN 'd' THEN | |
return php_unserialize(str)->1; | |
WHEN 'i' THEN | |
return php_unserialize(str)->1; | |
WHEN 'N' THEN | |
return php_unserialize(str)->1; | |
WHEN 's' THEN | |
return php_unserialize(str)->1; | |
ELSE | |
RETURN NULL; | |
END CASE; | |
END; | |
$BODY$ | |
LANGUAGE plpgsql IMMUTABLE | |
COST 10; | |
CREATE OR REPLACE FUNCTION public.test_php_unserialize_to_json() | |
RETURNS void AS | |
$BODY$ | |
DECLARE | |
BEGIN | |
IF | |
php_unserialize_to_json('N;')::TEXT | |
<> 'null' | |
THEN | |
RAISE EXCEPTION 'Expected null, got "%"', php_unserialize_to_json('N;')::TEXT; | |
END IF; | |
IF | |
php_unserialize_to_json('b:1;')::TEXT | |
<> 'true' | |
THEN | |
RAISE EXCEPTION 'Expected true, got "%"', php_unserialize_to_json('b:1;')::TEXT; | |
END IF; | |
IF | |
php_unserialize_to_json('b:0;')::TEXT | |
<> 'false' | |
THEN | |
RAISE EXCEPTION 'Expected false, got "%"', php_unserialize_to_json('b:0;')::TEXT; | |
END IF; | |
IF | |
php_unserialize_to_json('i:1337;')::TEXT | |
<> '1337' | |
THEN | |
RAISE EXCEPTION 'An integer 1337, got "%"', php_unserialize_to_json('i:1337;')::TEXT; | |
END IF; | |
IF | |
php_unserialize_to_json('d:1.337;')::TEXT | |
<> '1.337' | |
THEN | |
RAISE EXCEPTION 'An double 1.337, got "%"', php_unserialize_to_json('d:1.337;')::TEXT; | |
END IF; | |
IF | |
php_unserialize_to_json('s:0:"";')::TEXT | |
<> '""' | |
THEN | |
RAISE EXCEPTION 'An empty string, got "%"', php_unserialize_to_json('s:0:"";')::TEXT; | |
END IF; | |
IF | |
php_unserialize_to_json('s:8:"a string";')::TEXT | |
<> '"a string"' | |
THEN | |
RAISE EXCEPTION 'Expected value "a string", got "%"', php_unserialize_to_json('s:8:"a string";')::TEXT; | |
END IF; | |
IF | |
(php_unserialize_to_json('s:15:"a longer string";'))::TEXT | |
<> '"a longer string"' | |
THEN | |
RAISE EXCEPTION 'Expected value "a longer string", got "%"', php_unserialize_to_json('s:15:"a longer string";')::TEXT; | |
END IF; | |
IF | |
(php_unserialize_to_json('a:0:{}'))::TEXT | |
<> '{}' | |
THEN | |
RAISE EXCEPTION 'Expected empty array, got "%"', php_unserialize_to_json('a:0:{}')::TEXT; | |
END IF; | |
IF | |
(php_unserialize_to_json('a:1:{i:0;s:5:"array";}'))::TEXT | |
<> ('{"0": "array"}'::JSON)::TEXT | |
THEN | |
RAISE EXCEPTION 'Expected array with one value, got "%"', php_unserialize_to_json('a:1:{i:0;s:5:"array";}')::TEXT; | |
END IF; | |
IF | |
(php_unserialize_to_json('a:2:{i:0;i:123;i:1;b:1;}'))::TEXT | |
<> ('{"0": 123, "1": true}'::JSON)::TEXT | |
THEN | |
RAISE EXCEPTION 'Expected array with one value, got "%"', php_unserialize_to_json('a:2:{i:0;i:123;i:1;b:1;}')::TEXT; | |
END IF; | |
IF | |
(php_unserialize_to_json('a:1:{i:0;i:123;}'))::TEXT | |
<> ('{"0": 123}'::JSON)::TEXT | |
THEN | |
RAISE EXCEPTION 'Expected array with one value, got "%"', php_unserialize_to_json('a:1:{i:0;i:123;}')::TEXT; | |
END IF; | |
IF | |
php_unserialize_to_json('a:5:{i:0;d:1;s:1:"k";a:3:{i:0;s:1:"a";i:1;s:1:"b";s:3:"sub";a:3:{i:0;s:1:"a";i:1;s:1:"b";i:2;s:1:"c";}}i:1;N;i:2;b:1;i:3;b:0;}')::TEXT | |
<> ('{"0": 1, "1": null, "2": true, "3": false, "k": {"0": "a", "1": "b", "sub": {"0": "a", "1": "b", "2": "c"}}}'::JSON)::TEXT | |
THEN | |
RAISE EXCEPTION 'Unexpected complex value, got: "%"', php_unserialize_to_json('a:5:{i:0;d:1;s:1:"k";a:3:{i:0;s:1:"a";i:1;s:1:"b";s:3:"sub";a:3:{i:0;s:1:"a";i:1;s:1:"b";i:2;s:1:"c";}}i:1;N;i:2;b:1;i:3;b:0;}')::TEXT; | |
END IF; | |
-- SELECT php_unserialize_to_json('a:2:{i:0;d:1;s:1:"k";a:2:{i:0;s:1:"a";i:1;s:1:"b";}}')::TEXT; | |
END; | |
$BODY$ | |
LANGUAGE plpgsql IMMUTABLE | |
COST 10; | |
SELECT public.test_php_unserialize_to_json(); | |
DROP FUNCTION public.test_php_unserialize_to_json(); |
Just realised if the string length is more than 9 it fails and raise exception.
In function php_unserialize
replace last CASE to
WHEN 's' THEN -- string
varlength := substring(str, 3, position(':' IN substring(str, 3)) - 1)::INT;
return json_build_array('string', substring(str, char_length(varlength::TEXT) + 5, varlength)::TEXT, position(';' IN str));
ELSE
This correct the length string when more than 9
Thanks, fair point. I updated the gist.
Hi
It fails to unserialize a simple array:
SELECT php_unserialize_to_json('a:1:{s:3:"abc";s:10:"abcde12345";}')::TEXT
Output is:
{"abc": "\""}
Looks like arrays with string keys are getting messed up.
Thanks,
gali
Hi @storeman ,
thanks for this solution! but I noticed that this works only with arrays with items length < 10. For example (using @galiganu example) :
SELECT php_unserialize_to_json('a:1:{s:3:"abc";s:10:"abcde12345"}');
php_unserialize_to_json
-------------------------
{"abc": "\""}
(1 row)
This is not working as expected, but if I change "10" to "9" it works better:
SELECT php_unserialize_to_json('a:1:{s:3:"abc";s:9:"abcde12345"}');
php_unserialize_to_json
-------------------------
{"abc": "abcde1234"}
(1 row)
So I replaced this:
WHEN 's' THEN -- string
varlength := substring(str, 3, position(':' IN str) - 1)::INT;
with this (removed the "-1" and added a 'regexp_replace' to get only numbers):
WHEN 's' THEN -- string
varlength := regexp_replace(substring(str, 3, position(':' IN str)), '[^0-9]+', '', 'g')::INT;
and now works as expected:
SELECT php_unserialize_to_json('a:1:{s:3:"abc";s:10:"abcde12345";}')::TEXT
;
php_unserialize_to_json
-------------------------
{"abc": "abcde12345"}
(1 row)
Line #50
return json_build_array('array', '{}'::TEXT[], serialized_string_length)::JSON;
will return []
in the end (at least for me on postgres13), and that will break functions expecting an object and not an array
@synchrone I understand, a consistent return type is better. I changed it and updated the test.
Awesome function! This is a life saver migrating wordpress data into pg. I ran into a case where it fails for strings larger than 99 chars. I changed the function to this for strings position(':"' IN str)-1
the :" is a better stop I think. These were strings that had "https://" in them as well.
WHEN 's' THEN -- string
varlength := regexp_replace(substring(str, 3, position(':"' IN str)-1), '[^0-9]+', '', 'g')::INT;
Hi guys i am trying to parse a json that has html content and i get the following error:
select php_unserialize_to_json('a:4:{s:7:"content";s:331:"<p dir="ltr" style="text-align:left;">Buenas noches, mi nombre es Ovando Yésica. Estoy en mi primer año de la carrera de LGTI. Me interese por esta carrera por que hice un curso en operación de computadoras y me despertó la curiosidad de aprender más sobre la industria de la tecnología. Les deseo buena cursada a todos!!</p>";s:14:"pathnamehashes";a:0:{}s:12:"discussionid";i:327611;s:13:"triggeredfrom";s:20:"forum_add_discussion";}')
ERROR: Unable to decode serialized value, unsupported type: "
CONTEXT: función PL/pgSQL php_unserialize(text) en la línea 66 en RAISE
función PL/pgSQL php_unserialize(text) en la línea 38 en asignación
función PL/pgSQL php_unserialize_to_json(text) en la línea 7 en RETURN
SQL state: P0001
Can you help me? Thanks
Yes, you need to escape this char "
in your HTML : <p dir="ltr"
select substring((regexp_matches(db_fieldname,'("\d+")','g'))[1] from '"(\d+)"') from db_tablename
I modded your gist for support Objects (stdClass) and strings separated by ";" ("text1;text2;text3;text4")
`CREATE OR REPLACE FUNCTION public.php_unserialize(str text)
RETURNS json AS
$BODY$
DECLARE
vartype CHAR;
varlength INT;
jsonstr TEXT;
varcount INT;
jsonval JSONB;
arrkey JSON;
arrval JSON;
-- String length of the serialized data
serialized_string_length INT;
BEGIN
CASE substring(str, 1, 1)
WHEN 'a' THEN -- array
-- init object
jsonval := '{}'::jsonb;
-- remove the "a" and ":" characters
str := substring(str, 3);
-- Detect number of values in array
varlength := substring(str, 1, position(':' IN str) - 1)::INT;
-- Base size of array is 5 (a:[size]:{})
serialized_string_length := 5 + char_length(varlength::TEXT);
-- If no values, return empty object, as this always returns objects
IF varlength = 0 THEN
return json_build_array('array', '{}'::JSON, serialized_string_length)::JSON;
END IF;
-- remove the array size and ":{"
str := substring(str, char_length(varlength::TEXT) + 3);
-- Find the number of variables specified
FOR varcount IN 1 .. varlength LOOP
-- Find the value of the key and remove it from base string
arrkey := php_unserialize(str);
str := substring(str, (arrkey->>2)::INT + 1);
-- Find the value of the value and remove it from base string
arrval := php_unserialize(str);
str := substring(str, (arrval->>2)::INT + 1);
serialized_string_length := serialized_string_length + (arrkey->>2)::INT + (arrval->>2)::INT;
-- Append value
jsonval := jsonval || jsonb_build_object(arrkey->>1, arrval->1);
END LOOP;
return json_build_array('array', jsonval, serialized_string_length);
WHEN 'O' THEN -- Object
-- init object
jsonval := '{}'::jsonb;
-- remove the "a" and ":" characters
str := substring(str, 16);
-- Detect number of values in array
varlength := substring(str, 1, position(':' IN str) - 1)::INT;
-- Base size of array is 5 (a:[size]:{})
serialized_string_length := 5 + char_length(varlength::TEXT);
-- If no values, return empty object, as this always returns objects
IF varlength = 0 THEN
return json_build_array('array', '{}'::JSON, serialized_string_length)::JSON;
END IF;
-- remove the array size and ":{"
str := substring(str, char_length(varlength::TEXT) + 3);
-- Find the number of variables specified
FOR varcount IN 1 .. varlength LOOP
-- Find the value of the key and remove it from base string
arrkey := php_unserialize(str);
str := substring(str, (arrkey->>2)::INT + 1);
-- Find the value of the value and remove it from base string
arrval := php_unserialize(str);
str := substring(str, (arrval->>2)::INT + 1);
serialized_string_length := serialized_string_length + (arrkey->>2)::INT + (arrval->>2)::INT;
-- Append value
jsonval := jsonval || jsonb_build_object(arrkey->>1, arrval->1);
END LOOP;
return json_build_array('array', jsonval, serialized_string_length);
WHEN 'b' THEN -- boolean
return json_build_array('bool',(CASE substring(str, 3, 1) WHEN '1' THEN TRUE ELSE FALSE END)::BOOL, 4);
WHEN 'd' THEN -- float
return json_build_array('float', substring(str, 3, position(';' IN str) - 3)::DOUBLE PRECISION, position(';' IN str));
WHEN 'i' THEN -- int
return json_build_array('int', substring(str, 3, position(';' IN str) - 3)::INT, position(';' IN str));
WHEN 'N' THEN -- null
return json_build_array('null', NULL, 2);
WHEN 's' THEN -- string
varlength := substring(str, 3, position(':' IN substring(str, 3)) - 1)::INT;
return json_build_array('string', substring(str, char_length(varlength::TEXT) + 5, varlength)::TEXT, position('";' IN str) + 1);
ELSE
RAISE EXCEPTION 'Unable to decode serialized value, unsupported type: %', substr(str, 1, 1);
END CASE;
END;
$BODY$
LANGUAGE plpgsql IMMUTABLE
COST 10;
---
--- The main function
---
CREATE OR REPLACE FUNCTION public.php_unserialize_to_json(str text)
RETURNS json AS
$BODY$
DECLARE
varlength INT;
BEGIN
CASE substring(str, 1, 1)
WHEN 'a' THEN
return php_unserialize(str)->1;
WHEN 'O' THEN
return php_unserialize(str)->1;
WHEN 'b' THEN
return php_unserialize(str)->1;
WHEN 'd' THEN
return php_unserialize(str)->1;
WHEN 'i' THEN
return php_unserialize(str)->1;
WHEN 'N' THEN
return php_unserialize(str)->1;
WHEN 's' THEN
return php_unserialize(str)->1;
ELSE
RETURN NULL;
END CASE;
END;
$BODY$
LANGUAGE plpgsql IMMUTABLE
COST 10;`
Hi, Thanks for this function, i tried on a sample below
a:9:{s:8:"playerId";s:12:"omp-id-84560";s:4:"type";s:5:"audio";s:6:"player";s:5:"html5";s:9:"timestamp";N;s:4:"page";s:47:"https://lea.gov/view.php?id=1216541§ion=3.2";s:3:"src";s:51:"https://lea.gov/pluginfile.php/22639/14j_aug041.mp3";s:7:"browser";s:6:"chrome";s:8:"isMobile";b:0;s:5:"value";i:10;}
expected to get something like this
{"playerId":"omp-id-84560","type":"audio","player":"html5","timestamp":null,"page":"https://lea.gov/view.php?id=1216541§ion=3.2","src":"https://lea.gov/pluginfile.php/22639/14j_aug041.mp3","browser":"chrome","isMobile":false,"value":10}
but when i use the helper fucnction php_unserialize_to_json i get
{ "src": ""http", "playerId": """, "timestamp": "null", "value": "10", "player": "html5", "type": "audio", "page": ""htt", "isMobile": "false", "browser": "chrome" }
You see the src, page and playerid outputs are different. Not sure why, any help on this would be very much appreciated. thanks