Last active
December 19, 2015 14:58
-
-
Save masakielastic/5973095 to your computer and use it in GitHub Desktop.
This gist is not maintained. See https://github.com/masakielastic/patches/tree/master/php_bugs_65082 for the latest infomation.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
diff --git a/ext/json/json.c b/ext/json/json.c | |
index 5360841..1f2ede1 100644 | |
--- a/ext/json/json.c | |
+++ b/ext/json/json.c | |
@@ -103,6 +103,8 @@ static PHP_MINIT_FUNCTION(json) | |
REGISTER_LONG_CONSTANT("JSON_PRETTY_PRINT", PHP_JSON_PRETTY_PRINT, CONST_CS | CONST_PERSISTENT); | |
REGISTER_LONG_CONSTANT("JSON_UNESCAPED_UNICODE", PHP_JSON_UNESCAPED_UNICODE, CONST_CS | CONST_PERSISTENT); | |
REGISTER_LONG_CONSTANT("JSON_PARTIAL_OUTPUT_ON_ERROR", PHP_JSON_PARTIAL_OUTPUT_ON_ERROR, CONST_CS | CONST_PERSISTENT); | |
+ REGISTER_LONG_CONSTANT("JSON_NOTUTF8_SUBSTITUTE", PHP_JSON_NOTUTF8_SUBSTITUTE, CONST_CS | CONST_PERSISTENT); | |
+ REGISTER_LONG_CONSTANT("JSON_NOTUTF8_IGNORE", PHP_JSON_NOTUTF8_IGNORE, CONST_CS | CONST_PERSISTENT); | |
REGISTER_LONG_CONSTANT("JSON_ERROR_NONE", PHP_JSON_ERROR_NONE, CONST_CS | CONST_PERSISTENT); | |
REGISTER_LONG_CONSTANT("JSON_ERROR_DEPTH", PHP_JSON_ERROR_DEPTH, CONST_CS | CONST_PERSISTENT); | |
@@ -358,47 +360,44 @@ static void json_encode_array(smart_str *buf, zval **val, int options TSRMLS_DC) | |
} | |
/* }}} */ | |
-static int json_utf8_to_utf16(unsigned short *utf16, char utf8[], int len) /* {{{ */ | |
+static int json_utf8_to_utf16(unsigned short *utf16, char utf8[], int utf8_len, int options) /* {{{ */ | |
{ | |
- size_t pos = 0, us; | |
- int j, status; | |
- | |
- if (utf16) { | |
- /* really convert the utf8 string */ | |
- for (j=0 ; pos < len ; j++) { | |
- us = php_next_utf8_char((const unsigned char *)utf8, len, &pos, &status); | |
- if (status != SUCCESS) { | |
- return -1; | |
- } | |
- /* From http://en.wikipedia.org/wiki/UTF16 */ | |
- if (us >= 0x10000) { | |
- us -= 0x10000; | |
- utf16[j++] = (unsigned short)((us >> 10) | 0xd800); | |
- utf16[j] = (unsigned short)((us & 0x3ff) | 0xdc00); | |
+ size_t pos = 0; | |
+ unsigned int code_point; | |
+ int len, status; | |
+ | |
+ for (len = 0; pos < utf8_len; len++) { | |
+ code_point = php_next_utf8_char((const unsigned char *) utf8, utf8_len, &pos, &status); | |
+ if (status == FAILURE) { | |
+ if (options & PHP_JSON_NOTUTF8_IGNORE) { | |
+ /* ignore this invalid character */ | |
+ len--; | |
+ continue; | |
+ } else if (options & PHP_JSON_NOTUTF8_SUBSTITUTE) { | |
+ /* Unicode Character 'REPLACEMENT CHARACTER' (U+FFFD) */ | |
+ code_point = 0xfffd; | |
} else { | |
- utf16[j] = (unsigned short)us; | |
- } | |
- } | |
- } else { | |
- /* Only check if utf8 string is valid, and compute utf16 lenght */ | |
- for (j=0 ; pos < len ; j++) { | |
- us = php_next_utf8_char((const unsigned char *)utf8, len, &pos, &status); | |
- if (status != SUCCESS) { | |
return -1; | |
} | |
- if (us >= 0x10000) { | |
- j++; | |
- } | |
+ } | |
+ /* From http://en.wikipedia.org/wiki/UTF16 */ | |
+ if (code_point < 0x10000) { | |
+ utf16[len] = (unsigned short) code_point; | |
+ } else { | |
+ code_point -= 0x10000; | |
+ utf16[len++] = (unsigned short) ((code_point >> 10) | 0xd800); | |
+ utf16[len] = (unsigned short) ((code_point & 0x3ff) | 0xdc00); | |
} | |
} | |
- return j; | |
+ | |
+ return len; | |
} | |
/* }}} */ | |
static void json_escape_string(smart_str *buf, char *s, int len, int options TSRMLS_DC) /* {{{ */ | |
{ | |
- int pos = 0, ulen = 0; | |
+ int pos = 0; | |
unsigned short us; | |
unsigned short *utf16; | |
size_t newlen; | |
@@ -432,13 +431,13 @@ static void json_escape_string(smart_str *buf, char *s, int len, int options TSR | |
} | |
- utf16 = (options & PHP_JSON_UNESCAPED_UNICODE) ? NULL : (unsigned short *) safe_emalloc(len, sizeof(unsigned short), 0); | |
- ulen = json_utf8_to_utf16(utf16, s, len); | |
- if (ulen <= 0) { | |
+ utf16 = (unsigned short *) safe_emalloc(len, sizeof(unsigned short), 0); | |
+ len = json_utf8_to_utf16(utf16, s, len, options); | |
+ if (len <= 0) { | |
if (utf16) { | |
efree(utf16); | |
} | |
- if (ulen < 0) { | |
+ if (len < 0) { | |
JSON_G(error_code) = PHP_JSON_ERROR_UTF8; | |
smart_str_appendl(buf, "null", 4); | |
} else { | |
@@ -446,9 +445,6 @@ static void json_escape_string(smart_str *buf, char *s, int len, int options TSR | |
} | |
return; | |
} | |
- if (!(options & PHP_JSON_UNESCAPED_UNICODE)) { | |
- len = ulen; | |
- } | |
/* pre-allocate for string length plus 2 quotes */ | |
smart_str_alloc(buf, len+2, 0); | |
@@ -456,7 +452,7 @@ static void json_escape_string(smart_str *buf, char *s, int len, int options TSR | |
while (pos < len) | |
{ | |
- us = (options & PHP_JSON_UNESCAPED_UNICODE) ? s[pos++] : utf16[pos++]; | |
+ us = utf16[pos++]; | |
switch (us) | |
{ | |
@@ -533,13 +529,39 @@ static void json_escape_string(smart_str *buf, char *s, int len, int options TSR | |
break; | |
default: | |
- if (us >= ' ' && ((options & PHP_JSON_UNESCAPED_UNICODE) || (us & 127) == us)) { | |
- smart_str_appendc(buf, (unsigned char) us); | |
+ if (options & PHP_JSON_UNESCAPED_UNICODE) { | |
+ | |
+ if (us < 0x20) { | |
+ smart_str_appendl(buf, "\\u", 2); | |
+ smart_str_appendc(buf, digits[(us >> 12) & 0xf]); | |
+ smart_str_appendc(buf, digits[(us >> 8) & 0xf]); | |
+ smart_str_appendc(buf, digits[(us >> 4) & 0xf]); | |
+ smart_str_appendc(buf, digits[(us & 0xf)]); | |
+ } else if (us < 0x80) { | |
+ smart_str_appendc(buf, us); | |
+ } else if (us < 0x800) { | |
+ smart_str_appendc(buf, 0xc0 | (us >> 6)); | |
+ smart_str_appendc(buf, 0x80 | (us & 0x3f)); | |
+ } else if (us >= 0xd800 && us <= 0xdbff) { | |
+ unsigned int utf32; | |
+ utf32 = ((us & 0x3ff) << 10) + (utf16[pos++] & 0x3ff) + 0x10000; | |
+ smart_str_appendc(buf, 0xf0 | (utf32 >> 18)); | |
+ smart_str_appendc(buf, 0x80 | ((utf32 >> 12) & 0x3f)); | |
+ smart_str_appendc(buf, 0x80 | ((utf32 >> 6) & 0x3f)); | |
+ smart_str_appendc(buf, 0x80 | (utf32 & 0x3f)); | |
+ } else { | |
+ smart_str_appendc(buf, 0xe0 | (us >> 12)); | |
+ smart_str_appendc(buf, 0x80 | ((us >> 6) & 0x3f)); | |
+ smart_str_appendc(buf, 0x80 | (us & 0x3f)); | |
+ } | |
+ | |
+ } else if (us >= 0x20 && us < 0x80) { | |
+ smart_str_appendc(buf, (unsigned char) us); | |
} else { | |
smart_str_appendl(buf, "\\u", 2); | |
- smart_str_appendc(buf, digits[(us & 0xf000) >> 12]); | |
- smart_str_appendc(buf, digits[(us & 0xf00) >> 8]); | |
- smart_str_appendc(buf, digits[(us & 0xf0) >> 4]); | |
+ smart_str_appendc(buf, digits[(us >> 12) & 0xf]); | |
+ smart_str_appendc(buf, digits[(us >> 8) & 0xf]); | |
+ smart_str_appendc(buf, digits[(us >> 4) & 0xf]); | |
smart_str_appendc(buf, digits[(us & 0xf)]); | |
} | |
break; | |
@@ -670,7 +692,7 @@ PHP_JSON_API void php_json_decode_ex(zval *return_value, char *str, int str_len, | |
utf16 = (unsigned short *) safe_emalloc((str_len+1), sizeof(unsigned short), 1); | |
- utf16_len = json_utf8_to_utf16(utf16, str, str_len); | |
+ utf16_len = json_utf8_to_utf16(utf16, str, str_len, options); | |
if (utf16_len <= 0) { | |
if (utf16) { | |
efree(utf16); | |
@@ -869,4 +891,4 @@ static PHP_FUNCTION(json_last_error_msg) | |
* End: | |
* vim600: noet sw=4 ts=4 fdm=marker | |
* vim<600: noet sw=4 ts=4 | |
- */ | |
+ */ | |
\ No newline at end of file | |
diff --git a/ext/json/php_json.h b/ext/json/php_json.h | |
index ec707ce..2360278 100644 | |
--- a/ext/json/php_json.h | |
+++ b/ext/json/php_json.h | |
@@ -65,6 +65,8 @@ extern zend_class_entry *php_json_serializable_ce; | |
#define PHP_JSON_PRETTY_PRINT (1<<7) | |
#define PHP_JSON_UNESCAPED_UNICODE (1<<8) | |
#define PHP_JSON_PARTIAL_OUTPUT_ON_ERROR (1<<9) | |
+#define PHP_JSON_NOTUTF8_SUBSTITUTE (1<<10) | |
+#define PHP_JSON_NOTUTF8_IGNORE (1<<11) | |
/* Internal flags */ | |
#define PHP_JSON_OUTPUT_ARRAY 0 |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
diff --git a/ext/json/json.c b/ext/json/json.c | |
index 5360841..1b60032 100644 | |
--- a/ext/json/json.c | |
+++ b/ext/json/json.c | |
@@ -103,6 +103,8 @@ static PHP_MINIT_FUNCTION(json) | |
REGISTER_LONG_CONSTANT("JSON_PRETTY_PRINT", PHP_JSON_PRETTY_PRINT, CONST_CS | CONST_PERSISTENT); | |
REGISTER_LONG_CONSTANT("JSON_UNESCAPED_UNICODE", PHP_JSON_UNESCAPED_UNICODE, CONST_CS | CONST_PERSISTENT); | |
REGISTER_LONG_CONSTANT("JSON_PARTIAL_OUTPUT_ON_ERROR", PHP_JSON_PARTIAL_OUTPUT_ON_ERROR, CONST_CS | CONST_PERSISTENT); | |
+ REGISTER_LONG_CONSTANT("JSON_NOTUTF8_SUBSTITUTE", PHP_JSON_NOTUTF8_SUBSTITUTE, CONST_CS | CONST_PERSISTENT); | |
+ REGISTER_LONG_CONSTANT("JSON_NOTUTF8_IGNORE", PHP_JSON_NOTUTF8_IGNORE, CONST_CS | CONST_PERSISTENT); | |
REGISTER_LONG_CONSTANT("JSON_ERROR_NONE", PHP_JSON_ERROR_NONE, CONST_CS | CONST_PERSISTENT); | |
REGISTER_LONG_CONSTANT("JSON_ERROR_DEPTH", PHP_JSON_ERROR_DEPTH, CONST_CS | CONST_PERSISTENT); | |
@@ -358,48 +360,76 @@ static void json_encode_array(smart_str *buf, zval **val, int options TSRMLS_DC) | |
} | |
/* }}} */ | |
-static int json_utf8_to_utf16(unsigned short *utf16, char utf8[], int len) /* {{{ */ | |
+static int json_utf8_to_utf32(unsigned int *utf32, char utf8[], int utf8_len, int options) /* {{{ */ | |
{ | |
- size_t pos = 0, us; | |
- int j, status; | |
- | |
- if (utf16) { | |
- /* really convert the utf8 string */ | |
- for (j=0 ; pos < len ; j++) { | |
- us = php_next_utf8_char((const unsigned char *)utf8, len, &pos, &status); | |
- if (status != SUCCESS) { | |
- return -1; | |
- } | |
- /* From http://en.wikipedia.org/wiki/UTF16 */ | |
- if (us >= 0x10000) { | |
- us -= 0x10000; | |
- utf16[j++] = (unsigned short)((us >> 10) | 0xd800); | |
- utf16[j] = (unsigned short)((us & 0x3ff) | 0xdc00); | |
+ size_t pos = 0; | |
+ unsigned int code_point; | |
+ int len, status; | |
+ | |
+ for (len = 0; pos < utf8_len; len++) { | |
+ code_point = php_next_utf8_char((const unsigned char *) utf8, utf8_len, &pos, &status); | |
+ if (status == FAILURE) { | |
+ | |
+ if (options & PHP_JSON_NOTUTF8_IGNORE) { | |
+ /* ignore this invalid character */ | |
+ len--; | |
+ continue; | |
+ } else if (options & PHP_JSON_NOTUTF8_SUBSTITUTE) { | |
+ /* Unicode Character 'REPLACEMENT CHARACTER' (U+FFFD) */ | |
+ code_point = 0xfffd; | |
} else { | |
- utf16[j] = (unsigned short)us; | |
+ return -1; | |
} | |
} | |
- } else { | |
- /* Only check if utf8 string is valid, and compute utf16 lenght */ | |
- for (j=0 ; pos < len ; j++) { | |
- us = php_next_utf8_char((const unsigned char *)utf8, len, &pos, &status); | |
- if (status != SUCCESS) { | |
+ | |
+ utf32[len] = code_point; | |
+ | |
+ } | |
+ | |
+ return len; | |
+} | |
+/* }}} */ | |
+ | |
+static int json_utf8_to_utf16(unsigned short *utf16, char utf8[], int utf8_len, int options) /* {{{ */ | |
+{ | |
+ size_t pos = 0; | |
+ unsigned int code_point; | |
+ int len, status; | |
+ | |
+ for (len = 0; pos < utf8_len; len++) { | |
+ code_point = php_next_utf8_char((const unsigned char *) utf8, utf8_len, &pos, &status); | |
+ if (status == FAILURE) { | |
+ if (options & PHP_JSON_NOTUTF8_IGNORE) { | |
+ /* ignore this invalid character */ | |
+ len--; | |
+ continue; | |
+ } else if (options & PHP_JSON_NOTUTF8_SUBSTITUTE) { | |
+ /* Unicode Character 'REPLACEMENT CHARACTER' (U+FFFD) */ | |
+ code_point = 0xfffd; | |
+ } else { | |
return -1; | |
} | |
- if (us >= 0x10000) { | |
- j++; | |
- } | |
+ } | |
+ /* From http://en.wikipedia.org/wiki/UTF16 */ | |
+ if (code_point < 0x10000) { | |
+ utf16[len] = (unsigned short) code_point; | |
+ } else { | |
+ code_point -= 0x10000; | |
+ utf16[len++] = (unsigned short) ((code_point >> 10) | 0xd800); | |
+ utf16[len] = (unsigned short) ((code_point & 0x3ff) | 0xdc00); | |
} | |
} | |
- return j; | |
+ | |
+ return len; | |
} | |
/* }}} */ | |
static void json_escape_string(smart_str *buf, char *s, int len, int options TSRMLS_DC) /* {{{ */ | |
{ | |
- int pos = 0, ulen = 0; | |
- unsigned short us; | |
+ int pos = 0; | |
+ unsigned int us; | |
+ unsigned int *utf32; | |
unsigned short *utf16; | |
size_t newlen; | |
@@ -432,13 +462,27 @@ static void json_escape_string(smart_str *buf, char *s, int len, int options TSR | |
} | |
- utf16 = (options & PHP_JSON_UNESCAPED_UNICODE) ? NULL : (unsigned short *) safe_emalloc(len, sizeof(unsigned short), 0); | |
- ulen = json_utf8_to_utf16(utf16, s, len); | |
- if (ulen <= 0) { | |
- if (utf16) { | |
- efree(utf16); | |
+ if (options & PHP_JSON_UNESCAPED_UNICODE) { | |
+ utf32 = (unsigned int *) safe_emalloc(len, sizeof(unsigned int), 0); | |
+ len = json_utf8_to_utf32(utf32, s, len, options); | |
+ } else { | |
+ utf16 = (unsigned short *) safe_emalloc(len, sizeof(unsigned short), 0); | |
+ len = json_utf8_to_utf16(utf16, s, len, options); | |
+ } | |
+ | |
+ if (len <= 0) { | |
+ | |
+ if (options & PHP_JSON_UNESCAPED_UNICODE) { | |
+ if (utf32) { | |
+ efree(utf32); | |
+ } | |
+ } else { | |
+ if (utf16) { | |
+ efree(utf16); | |
+ } | |
} | |
- if (ulen < 0) { | |
+ | |
+ if (len < 0) { | |
JSON_G(error_code) = PHP_JSON_ERROR_UTF8; | |
smart_str_appendl(buf, "null", 4); | |
} else { | |
@@ -446,9 +490,6 @@ static void json_escape_string(smart_str *buf, char *s, int len, int options TSR | |
} | |
return; | |
} | |
- if (!(options & PHP_JSON_UNESCAPED_UNICODE)) { | |
- len = ulen; | |
- } | |
/* pre-allocate for string length plus 2 quotes */ | |
smart_str_alloc(buf, len+2, 0); | |
@@ -456,7 +497,7 @@ static void json_escape_string(smart_str *buf, char *s, int len, int options TSR | |
while (pos < len) | |
{ | |
- us = (options & PHP_JSON_UNESCAPED_UNICODE) ? s[pos++] : utf16[pos++]; | |
+ us = (options & PHP_JSON_UNESCAPED_UNICODE) ? utf32[pos++]: utf16[pos++]; | |
switch (us) | |
{ | |
@@ -533,13 +574,37 @@ static void json_escape_string(smart_str *buf, char *s, int len, int options TSR | |
break; | |
default: | |
- if (us >= ' ' && ((options & PHP_JSON_UNESCAPED_UNICODE) || (us & 127) == us)) { | |
- smart_str_appendc(buf, (unsigned char) us); | |
+ if (options & PHP_JSON_UNESCAPED_UNICODE) { | |
+ | |
+ if (us < 0x20) { | |
+ smart_str_appendl(buf, "\\u", 2); | |
+ smart_str_appendc(buf, digits[(us >> 12) & 0xf]); | |
+ smart_str_appendc(buf, digits[(us >> 8) & 0xf]); | |
+ smart_str_appendc(buf, digits[(us >> 4) & 0xf]); | |
+ smart_str_appendc(buf, digits[(us & 0xf)]); | |
+ } else if (us < 0x80) { | |
+ smart_str_appendc(buf, us); | |
+ } else if (us < 0x800) { | |
+ smart_str_appendc(buf, 0xc0 | (us >> 6)); | |
+ smart_str_appendc(buf, 0x80 | (us & 0x3f)); | |
+ } else if (us < 0x10000) { | |
+ smart_str_appendc(buf, 0xe0 | (us >> 12)); | |
+ smart_str_appendc(buf, 0x80 | ((us >> 6) & 0x3f)); | |
+ smart_str_appendc(buf, 0x80 | (us & 0x3f)); | |
+ } else { | |
+ smart_str_appendc(buf, 0xf0 | (us >> 18)); | |
+ smart_str_appendc(buf, 0x80 | ((us >> 12) & 0x3f)); | |
+ smart_str_appendc(buf, 0x80 | ((us >> 6) & 0x3f)); | |
+ smart_str_appendc(buf, 0x80 | (us & 0x3f)); | |
+ } | |
+ | |
+ } else if (us >= 0x20 && us < 0x80) { | |
+ smart_str_appendc(buf, (unsigned char) us); | |
} else { | |
smart_str_appendl(buf, "\\u", 2); | |
- smart_str_appendc(buf, digits[(us & 0xf000) >> 12]); | |
- smart_str_appendc(buf, digits[(us & 0xf00) >> 8]); | |
- smart_str_appendc(buf, digits[(us & 0xf0) >> 4]); | |
+ smart_str_appendc(buf, digits[(us >> 12) & 0xf]); | |
+ smart_str_appendc(buf, digits[(us >> 8) & 0xf]); | |
+ smart_str_appendc(buf, digits[(us >> 4) & 0xf]); | |
smart_str_appendc(buf, digits[(us & 0xf)]); | |
} | |
break; | |
@@ -547,9 +612,17 @@ static void json_escape_string(smart_str *buf, char *s, int len, int options TSR | |
} | |
smart_str_appendc(buf, '"'); | |
- if (utf16) { | |
- efree(utf16); | |
+ | |
+ if (options & PHP_JSON_UNESCAPED_UNICODE) { | |
+ if (utf32) { | |
+ efree(utf32); | |
+ } | |
+ } else { | |
+ if (utf16) { | |
+ efree(utf16); | |
+ } | |
} | |
+ | |
} | |
/* }}} */ | |
@@ -670,7 +743,7 @@ PHP_JSON_API void php_json_decode_ex(zval *return_value, char *str, int str_len, | |
utf16 = (unsigned short *) safe_emalloc((str_len+1), sizeof(unsigned short), 1); | |
- utf16_len = json_utf8_to_utf16(utf16, str, str_len); | |
+ utf16_len = json_utf8_to_utf16(utf16, str, str_len, options); | |
if (utf16_len <= 0) { | |
if (utf16) { | |
efree(utf16); | |
@@ -869,4 +942,4 @@ static PHP_FUNCTION(json_last_error_msg) | |
* End: | |
* vim600: noet sw=4 ts=4 fdm=marker | |
* vim<600: noet sw=4 ts=4 | |
- */ | |
+ */ | |
\ No newline at end of file | |
diff --git a/ext/json/php_json.h b/ext/json/php_json.h | |
index ec707ce..2360278 100644 | |
--- a/ext/json/php_json.h | |
+++ b/ext/json/php_json.h | |
@@ -65,6 +65,8 @@ extern zend_class_entry *php_json_serializable_ce; | |
#define PHP_JSON_PRETTY_PRINT (1<<7) | |
#define PHP_JSON_UNESCAPED_UNICODE (1<<8) | |
#define PHP_JSON_PARTIAL_OUTPUT_ON_ERROR (1<<9) | |
+#define PHP_JSON_NOTUTF8_SUBSTITUTE (1<<10) | |
+#define PHP_JSON_NOTUTF8_IGNORE (1<<11) | |
/* Internal flags */ | |
#define PHP_JSON_OUTPUT_ARRAY 0 |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
W1 = 110110yyyyyyyyyy | |
W2 = 110111xxxxxxxxxx | |
yyyyyyyyyy = W1 & 0x3FF | |
xxxxxxxxxx = W2 & 0x3FF | |
U = U' + 0x10000 | |
= yyyyyyyyyyxxxxxxxxxx + 0x10000 | |
= yyyyyyyyyy0000000000 + xxxxxxxxxx + 0x10000 | |
= (W1 & 0x3FF) << 10 + (W2 & 0x3FF) + 0x10000 |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
<?php | |
class JsonTest implements JsonSerializable { | |
private $test; | |
public function __construct($test) { | |
$this->test = $test; | |
} | |
public function jsonSerialize() { | |
return $this->test; | |
} | |
} | |
var_dump( | |
'{"a\ufffd":"a\ufffd"}' === json_encode(new JsonTest(["a\x80" => "a\x80"]), JSON_NOTUTF8_SUBSTITUTE), | |
'{"'."a\xEF\xBF\xBD".'":"'."a\xEF\xBF\xBD".'"}' === json_encode(new JsonTest(["a\x80" => "a\x80"]), JSON_UNESCAPED_UNICODE | JSON_NOTUTF8_SUBSTITUTE), | |
'{"a":"a"}' === json_encode(new JsonTest(["a\x80" => "a\x80"]), JSON_NOTUTF8_IGNORE), | |
'{"a":"a"}' === json_encode(new JsonTest(["a\x80" => "a\x80"]), JSON_UNESCAPED_UNICODE | JSON_NOTUTF8_IGNORE), | |
// https://en.wikipedia.org/wiki/UTF-8#Examples | |
// U+0024 | |
'"'."\x24".'"' === json_encode("\x24", JSON_UNESCAPED_UNICODE), | |
// U+00A2 | |
'"'."\xC2\xA2".'"' === json_encode("\xC2\xA2", JSON_UNESCAPED_UNICODE), | |
// U+20AC | |
'"'."\xE2\x82\xAC".'"' === json_encode("\xE2\x82\xAC", JSON_UNESCAPED_UNICODE), | |
// U+24B62 | |
'"'."\xF0\xA4\xAD\xA2".'"' === json_encode("\xF0\xA4\xAD\xA2", JSON_UNESCAPED_UNICODE), | |
"a\xEF\xBF\xBD" === json_decode('"'."a\x80".'"', false, 512, JSON_NOTUTF8_SUBSTITUTE), | |
"a" === json_decode('"'."a\x80".'"', false, 512, JSON_NOTUTF8_IGNORE) | |
); |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment