Created
July 12, 2013 15:36
-
-
Save masakielastic/5985383 to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
diff --git a/ext/json/JSON_parser.c b/ext/json/JSON_parser.c | |
index dd832a7..3749998 100644 | |
--- a/ext/json/JSON_parser.c | |
+++ b/ext/json/JSON_parser.c | |
@@ -353,40 +353,59 @@ use_string: | |
static void utf16_to_utf8(smart_str *buf, unsigned short utf16) | |
{ | |
- if (utf16 < 0x80) | |
- { | |
- smart_str_appendc(buf, (unsigned char) utf16); | |
- } | |
- else if (utf16 < 0x800) | |
- { | |
- smart_str_appendc(buf, 0xc0 | (utf16 >> 6)); | |
- smart_str_appendc(buf, 0x80 | (utf16 & 0x3f)); | |
- } | |
- else if ((utf16 & 0xfc00) == 0xdc00 | |
- && buf->len >= 3 | |
- && ((unsigned char) buf->c[buf->len - 3]) == 0xed | |
- && ((unsigned char) buf->c[buf->len - 2] & 0xf0) == 0xa0 | |
- && ((unsigned char) buf->c[buf->len - 1] & 0xc0) == 0x80) | |
- { | |
- /* found surrogate pair */ | |
- unsigned long utf32; | |
- | |
- utf32 = (((buf->c[buf->len - 2] & 0xf) << 16) | |
- | ((buf->c[buf->len - 1] & 0x3f) << 10) | |
- | (utf16 & 0x3ff)) + 0x10000; | |
- buf->len -= 3; | |
- | |
- smart_str_appendc(buf, (unsigned char) (0xf0 | (utf32 >> 18))); | |
- smart_str_appendc(buf, 0x80 | ((utf32 >> 12) & 0x3f)); | |
- smart_str_appendc(buf, 0x80 | ((utf32 >> 6) & 0x3f)); | |
- smart_str_appendc(buf, 0x80 | (utf32 & 0x3f)); | |
- } | |
- else | |
- { | |
- smart_str_appendc(buf, 0xe0 | (utf16 >> 12)); | |
- smart_str_appendc(buf, 0x80 | ((utf16 >> 6) & 0x3f)); | |
- smart_str_appendc(buf, 0x80 | (utf16 & 0x3f)); | |
+ if ((utf16 & 0xfc00) == 0xdc00) { | |
+ | |
+ if (buf->len >= 3 | |
+ && ((unsigned char) buf->c[buf->len - 3] == 0xed) | |
+ && ((unsigned char) buf->c[buf->len - 2] & 0xf0) == 0xa0 | |
+ && ((unsigned char) buf->c[buf->len - 1] & 0xc0) == 0x80 | |
+ ) { | |
+ /* found surrogate pair */ | |
+ unsigned long utf32; | |
+ | |
+ utf32 = (((buf->c[buf->len - 2] & 0xf) << 16) | |
+ | ((buf->c[buf->len - 1] & 0x3f) << 10) | |
+ | (utf16 & 0x3ff)) + 0x10000; | |
+ buf->len -= 3; | |
+ | |
+ smart_str_appendc(buf, (unsigned char) (0xf0 | (utf32 >> 18))); | |
+ smart_str_appendc(buf, 0x80 | ((utf32 >> 12) & 0x3f)); | |
+ smart_str_appendc(buf, 0x80 | ((utf32 >> 6) & 0x3f)); | |
+ smart_str_appendc(buf, 0x80 | (utf32 & 0x3f)); | |
+ } else { | |
+ buf->len -= 3; | |
+ smart_str_appendl(buf, "\xef\xbf\xbd", 3); | |
+ } | |
+ | |
+ } else { | |
+ | |
+ if (buf->len >= 3 | |
+ && ((unsigned char) buf->c[buf->len - 3] == 0xed) | |
+ && ((unsigned char) buf->c[buf->len - 2] & 0xf0) == 0xa0 | |
+ && ((unsigned char) buf->c[buf->len - 1] & 0xc0) == 0x80 | |
+ ) { | |
+ buf->len -= 3; | |
+ smart_str_appendl(buf, "\xef\xbf\xbd", 3); | |
+ } | |
+ | |
+ if (utf16 < 0x80) | |
+ { | |
+ smart_str_appendc(buf, (unsigned char) utf16); | |
+ } | |
+ else if (utf16 < 0x800) | |
+ { | |
+ smart_str_appendc(buf, 0xc0 | (utf16 >> 6)); | |
+ smart_str_appendc(buf, 0x80 | (utf16 & 0x3f)); | |
+ } | |
+ else | |
+ { | |
+ smart_str_appendc(buf, 0xe0 | (utf16 >> 12)); | |
+ smart_str_appendc(buf, 0x80 | ((utf16 >> 6) & 0x3f)); | |
+ smart_str_appendc(buf, 0x80 | (utf16 & 0x3f)); | |
+ } | |
} | |
+ | |
+ | |
} | |
static void attach_zval(JSON_parser jp, int up, int cur, smart_str *key, int assoc TSRMLS_DC) |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
<?php | |
var_dump( | |
"\xef\xbf\xbd" === json_decode('"\udc00"'), | |
"\xef\xbf\xbd"."\xed\xa0\x80" === json_decode('"\ud800\ud800"'), | |
"\xed\xa0\x80" === json_decode('"\ud800"') | |
); |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment