Last active
December 18, 2015 20:59
-
-
Save masakielastic/5843820 to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
<?php | |
class TestObject implements JsonSerializable { | |
public function __construct(array $array) { | |
$this->array = $array; | |
} | |
public function jsonSerialize() { | |
return $this->array; | |
} | |
} | |
// Table 3-8. Use of U+FFFD in UTF-8 Conversion | |
// http://www.unicode.org/versions/Unicode6.1.0/ch03.pdf) | |
$str = "a"."\xF1\x80\x80"."\xE1\x80"."\xC2"."b"."\x80"."c"."\x80"."\xBF"."d"; | |
$array = [$str => $str]; | |
$object = new TestObject($array); | |
// REPLACEMENT CHARACTER (U+FFFD): "\xEF\xBF\xBD" | |
$expected = '"a\ufffd\ufffd\ufffdb\ufffdc\ufffd\ufffdd"'; | |
$expected2 = '{"a\ufffd\ufffd\ufffdb\ufffdc\ufffd\ufffdd":"a\ufffd\ufffd\ufffdb\ufffdc\ufffd\ufffdd"}'; | |
$expected3 = 'null'; | |
$expected4 = '{null:null}'; | |
var_dump( | |
false === json_encode($str), | |
false === json_encode($array), | |
false === json_encode($object), | |
$expected === json_encode($str, JSON_PARTIAL_OUTPUT_ON_ERROR), | |
$expected2 === json_encode($array, JSON_PARTIAL_OUTPUT_ON_ERROR), | |
$expected2 === json_encode($object, JSON_PARTIAL_OUTPUT_ON_ERROR), | |
false === json_encode($str, JSON_UNESCAPED_UNICODE), | |
false === json_encode($array, JSON_UNESCAPED_UNICODE), | |
false === json_encode($object, JSON_UNESCAPED_UNICODE), | |
$expected3 === json_encode($str, JSON_UNESCAPED_UNICODE | JSON_PARTIAL_OUTPUT_ON_ERROR), | |
$expected4 === json_encode($array, JSON_UNESCAPED_UNICODE | JSON_PARTIAL_OUTPUT_ON_ERROR), | |
$expected4 === json_encode($object, JSON_UNESCAPED_UNICODE | JSON_PARTIAL_OUTPUT_ON_ERROR) | |
); |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
--- a/ext/json/json.c | |
+++ b/ext/json/json.c | |
@@ -358,7 +358,7 @@ static void json_encode_array(smart_str *buf, zval **val, in | |
} | |
/* }}} */ | |
-static int json_utf8_to_utf16(unsigned short *utf16, char utf8[], int len) /* { | |
+static int json_utf8_to_utf16(unsigned short *utf16, char utf8[], int len, int | |
{ | |
size_t pos = 0, us; | |
int j, status; | |
@@ -368,7 +368,12 @@ static int json_utf8_to_utf16(unsigned short *utf16, char u | |
for (j=0 ; pos < len ; j++) { | |
us = php_next_utf8_char((const unsigned char *)utf8, len | |
if (status != SUCCESS) { | |
- return -1; | |
+ if (options & PHP_JSON_PARTIAL_OUTPUT_ON_ERROR) | |
+ JSON_G(error_code) = PHP_JSON_ERROR_UTF8; | |
+ us = 0xfffd; | |
+ } else { | |
+ return -1; | |
+ } | |
} | |
/* From http://en.wikipedia.org/wiki/UTF16 */ | |
if (us >= 0x10000) { | |
@@ -433,7 +438,7 @@ static void json_escape_string(smart_str *buf, char *s, int | |
} | |
utf16 = (options & PHP_JSON_UNESCAPED_UNICODE) ? NULL : (unsigned short | |
- ulen = json_utf8_to_utf16(utf16, s, len); | |
+ ulen = json_utf8_to_utf16(utf16, s, len, options TSRMLS_CC); | |
if (ulen <= 0) { | |
if (utf16) { | |
efree(utf16); | |
@@ -670,7 +675,7 @@ PHP_JSON_API void php_json_decode_ex(zval *return_value, cha | |
utf16 = (unsigned short *) safe_emalloc((str_len+1), sizeof(unsigned sho | |
- utf16_len = json_utf8_to_utf16(utf16, str, str_len); | |
+ utf16_len = json_utf8_to_utf16(utf16, str, str_len, options TSRMLS_CC); | |
if (utf16_len <= 0) { | |
if (utf16) { | |
efree(utf16); |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
--- a/ext/json/tests/bug43941.phpt | |
+++ b/ext/json/tests/bug43941.phpt | |
@@ -4,17 +4,30 @@ Bug #43941 (json_encode() invalid UTF-8) | |
<?php if (!extension_loaded("json")) print "skip"; ?> | |
--FILE-- | |
<?php | |
+class JsonTest implements JsonSerializable { | |
+ private $test; | |
+ public function __construct($test) { | |
+ $this->test = $test; | |
+ } | |
+ public function jsonSerialize() { | |
+ return $this->test; | |
+ } | |
+} | |
+ | |
+$data = array("abc", "ab\xE0" => "ab\xE0c"); | |
var_dump(json_encode("abc")); | |
var_dump(json_encode("ab\xE0")); | |
var_dump(json_encode("ab\xE0", JSON_PARTIAL_OUTPUT_ON_ERROR)); | |
-var_dump(json_encode(array("ab\xE0", "ab\xE0c", "abc"), JSON_PARTIAL_OUTPUT_ON_ | |
+var_dump(json_encode($data, JSON_PARTIAL_OUTPUT_ON_ERROR)); | |
+var_dump(json_encode(new JsonTest($data), JSON_PARTIAL_OUTPUT_ON_ERROR)); | |
echo "Done\n"; | |
?> | |
--EXPECTF-- | |
string(5) ""abc"" | |
bool(false) | |
-string(4) "null" | |
-string(17) "[null,null,"abc"]" | |
+string(10) ""ab\ufffd"" | |
+string(34) "{"0":"abc","ab\ufffd":"ab\ufffdc"}" | |
+string(34) "{"0":"abc","ab\ufffd":"ab\ufffdc"}" | |
Done |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
--- a/ext/json/tests/bug61537.phpt | |
+++ b/ext/json/tests/bug61537.phpt | |
@@ -27,13 +27,13 @@ var_dump(json_last_error(), json_last_error_msg()); | |
bool(false) | |
int(5) | |
string(56) "Malformed UTF-8 characters, possibly incorrectly encoded" | |
-string(4) "null" | |
+string(8) ""\ufffd"" | |
int(5) | |
string(56) "Malformed UTF-8 characters, possibly incorrectly encoded" | |
bool(false) | |
int(5) | |
string(56) "Malformed UTF-8 characters, possibly incorrectly encoded" | |
-string(4) "null" | |
+string(51) ""an invalid sequen\ufffd in the middle of a string"" | |
int(5) | |
-string(56) "Malformed UTF-8 characters, possibly incorrectly encoded" | |
+string(56) "Malformed UTF-8 characters, possibly incorrectly encoded" |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
<?php | |
if (!defined('ENT_SUBSTITUTE')) { | |
define('ENT_SUBSTITUTE', 8); | |
} | |
if (!defined('JSON_PARTIAL_OUTPUT_ON_ERROR')) { | |
define('JSON_PARTIAL_OUTPUT_ON_ERROR', 512); | |
} | |
if (!defined('JSON_QUOTES')) { | |
define('JSON_QUOTES', JSON_HEX_TAG | JSON_HEX_APOS | JSON_HEX_AMP | JSON_HEX_QUOT); | |
} | |
$string = '&"\'<>'; | |
$expected = '&"'<>'; | |
$expected2 = '"\u0026\u0022\u0027\u003C\u003E"'; | |
var_dump( | |
$expected === htmlspecialchars($string, ENT_QUOTES | ENT_SUBSTITUTE, 'UTF-8'), | |
$expected2 === json_encode( | |
$string, JSON_HEX_TAG | JSON_HEX_APOS | JSON_HEX_AMP | JSON_HEX_QUOT | |
| JSON_PARTIAL_OUTPUT_ON_ERROR), | |
$expected2 === json_encode($string, JSON_QUOTES | JSON_PARTIAL_OUTPUT_ON_ERROR) | |
); |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment