Skip to content

Instantly share code, notes, and snippets.

@masakielastic
Last active December 18, 2015 20:59
Show Gist options
  • Save masakielastic/5843820 to your computer and use it in GitHub Desktop.
Save masakielastic/5843820 to your computer and use it in GitHub Desktop.
<?php
class TestObject implements JsonSerializable {
public function __construct(array $array) {
$this->array = $array;
}
public function jsonSerialize() {
return $this->array;
}
}
// Table 3-8. Use of U+FFFD in UTF-8 Conversion
// http://www.unicode.org/versions/Unicode6.1.0/ch03.pdf)
$str = "a"."\xF1\x80\x80"."\xE1\x80"."\xC2"."b"."\x80"."c"."\x80"."\xBF"."d";
$array = [$str => $str];
$object = new TestObject($array);
// REPLACEMENT CHARACTER (U+FFFD): "\xEF\xBF\xBD"
$expected = '"a\ufffd\ufffd\ufffdb\ufffdc\ufffd\ufffdd"';
$expected2 = '{"a\ufffd\ufffd\ufffdb\ufffdc\ufffd\ufffdd":"a\ufffd\ufffd\ufffdb\ufffdc\ufffd\ufffdd"}';
$expected3 = 'null';
$expected4 = '{null:null}';
var_dump(
false === json_encode($str),
false === json_encode($array),
false === json_encode($object),
$expected === json_encode($str, JSON_PARTIAL_OUTPUT_ON_ERROR),
$expected2 === json_encode($array, JSON_PARTIAL_OUTPUT_ON_ERROR),
$expected2 === json_encode($object, JSON_PARTIAL_OUTPUT_ON_ERROR),
false === json_encode($str, JSON_UNESCAPED_UNICODE),
false === json_encode($array, JSON_UNESCAPED_UNICODE),
false === json_encode($object, JSON_UNESCAPED_UNICODE),
$expected3 === json_encode($str, JSON_UNESCAPED_UNICODE | JSON_PARTIAL_OUTPUT_ON_ERROR),
$expected4 === json_encode($array, JSON_UNESCAPED_UNICODE | JSON_PARTIAL_OUTPUT_ON_ERROR),
$expected4 === json_encode($object, JSON_UNESCAPED_UNICODE | JSON_PARTIAL_OUTPUT_ON_ERROR)
);
--- a/ext/json/json.c
+++ b/ext/json/json.c
@@ -358,7 +358,7 @@ static void json_encode_array(smart_str *buf, zval **val, in
}
/* }}} */
-static int json_utf8_to_utf16(unsigned short *utf16, char utf8[], int len) /* {
+static int json_utf8_to_utf16(unsigned short *utf16, char utf8[], int len, int
{
size_t pos = 0, us;
int j, status;
@@ -368,7 +368,12 @@ static int json_utf8_to_utf16(unsigned short *utf16, char u
for (j=0 ; pos < len ; j++) {
us = php_next_utf8_char((const unsigned char *)utf8, len
if (status != SUCCESS) {
- return -1;
+ if (options & PHP_JSON_PARTIAL_OUTPUT_ON_ERROR)
+ JSON_G(error_code) = PHP_JSON_ERROR_UTF8;
+ us = 0xfffd;
+ } else {
+ return -1;
+ }
}
/* From http://en.wikipedia.org/wiki/UTF16 */
if (us >= 0x10000) {
@@ -433,7 +438,7 @@ static void json_escape_string(smart_str *buf, char *s, int
}
utf16 = (options & PHP_JSON_UNESCAPED_UNICODE) ? NULL : (unsigned short
- ulen = json_utf8_to_utf16(utf16, s, len);
+ ulen = json_utf8_to_utf16(utf16, s, len, options TSRMLS_CC);
if (ulen <= 0) {
if (utf16) {
efree(utf16);
@@ -670,7 +675,7 @@ PHP_JSON_API void php_json_decode_ex(zval *return_value, cha
utf16 = (unsigned short *) safe_emalloc((str_len+1), sizeof(unsigned sho
- utf16_len = json_utf8_to_utf16(utf16, str, str_len);
+ utf16_len = json_utf8_to_utf16(utf16, str, str_len, options TSRMLS_CC);
if (utf16_len <= 0) {
if (utf16) {
efree(utf16);
--- a/ext/json/tests/bug43941.phpt
+++ b/ext/json/tests/bug43941.phpt
@@ -4,17 +4,30 @@ Bug #43941 (json_encode() invalid UTF-8)
<?php if (!extension_loaded("json")) print "skip"; ?>
--FILE--
<?php
+class JsonTest implements JsonSerializable {
+ private $test;
+ public function __construct($test) {
+ $this->test = $test;
+ }
+ public function jsonSerialize() {
+ return $this->test;
+ }
+}
+
+$data = array("abc", "ab\xE0" => "ab\xE0c");
var_dump(json_encode("abc"));
var_dump(json_encode("ab\xE0"));
var_dump(json_encode("ab\xE0", JSON_PARTIAL_OUTPUT_ON_ERROR));
-var_dump(json_encode(array("ab\xE0", "ab\xE0c", "abc"), JSON_PARTIAL_OUTPUT_ON_
+var_dump(json_encode($data, JSON_PARTIAL_OUTPUT_ON_ERROR));
+var_dump(json_encode(new JsonTest($data), JSON_PARTIAL_OUTPUT_ON_ERROR));
echo "Done\n";
?>
--EXPECTF--
string(5) ""abc""
bool(false)
-string(4) "null"
-string(17) "[null,null,"abc"]"
+string(10) ""ab\ufffd""
+string(34) "{"0":"abc","ab\ufffd":"ab\ufffdc"}"
+string(34) "{"0":"abc","ab\ufffd":"ab\ufffdc"}"
Done
--- a/ext/json/tests/bug61537.phpt
+++ b/ext/json/tests/bug61537.phpt
@@ -27,13 +27,13 @@ var_dump(json_last_error(), json_last_error_msg());
bool(false)
int(5)
string(56) "Malformed UTF-8 characters, possibly incorrectly encoded"
-string(4) "null"
+string(8) ""\ufffd""
int(5)
string(56) "Malformed UTF-8 characters, possibly incorrectly encoded"
bool(false)
int(5)
string(56) "Malformed UTF-8 characters, possibly incorrectly encoded"
-string(4) "null"
+string(51) ""an invalid sequen\ufffd in the middle of a string""
int(5)
-string(56) "Malformed UTF-8 characters, possibly incorrectly encoded"
+string(56) "Malformed UTF-8 characters, possibly incorrectly encoded"
<?php
if (!defined('ENT_SUBSTITUTE')) {
define('ENT_SUBSTITUTE', 8);
}
if (!defined('JSON_PARTIAL_OUTPUT_ON_ERROR')) {
define('JSON_PARTIAL_OUTPUT_ON_ERROR', 512);
}
if (!defined('JSON_QUOTES')) {
define('JSON_QUOTES', JSON_HEX_TAG | JSON_HEX_APOS | JSON_HEX_AMP | JSON_HEX_QUOT);
}
$string = '&"\'<>';
$expected = '&amp;&quot;&#039;&lt;&gt;';
$expected2 = '"\u0026\u0022\u0027\u003C\u003E"';
var_dump(
$expected === htmlspecialchars($string, ENT_QUOTES | ENT_SUBSTITUTE, 'UTF-8'),
$expected2 === json_encode(
$string, JSON_HEX_TAG | JSON_HEX_APOS | JSON_HEX_AMP | JSON_HEX_QUOT
| JSON_PARTIAL_OUTPUT_ON_ERROR),
$expected2 === json_encode($string, JSON_QUOTES | JSON_PARTIAL_OUTPUT_ON_ERROR)
);
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment