Created
October 21, 2009 09:10
-
-
Save nobu/214980 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Index: ext/syck/emitter.c | |
=================================================================== | |
--- ext/syck/emitter.c (revision 25425) | |
+++ ext/syck/emitter.c (working copy) | |
@@ -535,4 +535,64 @@ void syck_emit_indent( SyckEmitter *e ) | |
#define SCAN_DOCSEP 8192 | |
+static const unsigned long utf8_limits[] = { | |
+ 0x0, /* 1 */ | |
+ 0x80, /* 2 */ | |
+ 0x800, /* 3 */ | |
+ 0x10000, /* 4 */ | |
+ 0x200000, /* 5 */ | |
+ 0x4000000, /* 6 */ | |
+ 0x80000000, /* 7 */ | |
+}; | |
+ | |
+static int | |
+is_utf8(const char *p, long *lenp) | |
+{ | |
+ int c = *p++ & 0xff; | |
+ unsigned long uv = c; | |
+ long n; | |
+ | |
+ if (*lenp < 1) return 0; | |
+ if (!(uv & 0x80)) { | |
+ *lenp = 1; | |
+ return 1; | |
+ } | |
+ if (!(uv & 0x40)) { | |
+ *lenp = 1; | |
+ return 0; | |
+ } | |
+ | |
+ if (!(uv & 0x20)) { n = 2; uv &= 0x1f; } | |
+ else if (!(uv & 0x10)) { n = 3; uv &= 0x0f; } | |
+ else if (!(uv & 0x08)) { n = 4; uv &= 0x07; } | |
+ else if (!(uv & 0x04)) { n = 5; uv &= 0x03; } | |
+ else if (!(uv & 0x02)) { n = 6; uv &= 0x01; } | |
+ else { | |
+ *lenp = 1; | |
+ return 0; | |
+ } | |
+ if (n > *lenp) { | |
+ return 0; | |
+ } | |
+ *lenp = n--; | |
+ if (n != 0) { | |
+ while (n--) { | |
+ c = *p++ & 0xff; | |
+ if ((c & 0xc0) != 0x80) { | |
+ *lenp -= n + 1; | |
+ return 0; | |
+ } | |
+ else { | |
+ c &= 0x3f; | |
+ uv = uv << 6 | c; | |
+ } | |
+ } | |
+ } | |
+ n = *lenp - 1; | |
+ if (uv < utf8_limits[n]) { | |
+ return 0; | |
+ } | |
+ return 1; | |
+} | |
+ | |
/* | |
* Basic printable test for LATIN-1 characters. | |
@@ -589,4 +649,9 @@ syck_scan_scalar( int req_width, const c | |
( cursor[i] >= 0x20 && cursor[i] <= 0x7E ) ) | |
) { | |
+ long n = len - i; | |
+ if ( is_utf8(cursor, &n) ) { | |
+ i += n - 1; | |
+ continue; | |
+ } | |
flags |= SCAN_NONPRINT; | |
} | |
@@ -901,5 +966,13 @@ syck_emit_2quoted( SyckEmitter *e, int w | |
default: | |
+ { | |
+ long n = str + len - mark; | |
+ if (is_utf8(mark, &n)) { | |
+ syck_emitter_write( e, mark, n ); | |
+ mark += n; | |
+ continue; | |
+ } | |
syck_emitter_escape( e, mark, 1 ); | |
+ } | |
break; | |
} | |
Index: lib/yaml/rubytypes.rb | |
=================================================================== | |
--- lib/yaml/rubytypes.rb (revision 25425) | |
+++ lib/yaml/rubytypes.rb (working copy) | |
@@ -64,6 +64,5 @@ class Struct | |
end | |
if not struct_type | |
- struct_def = [ tag.split( ':', 4 ).last ] | |
- struct_type = Struct.new( *struct_def.concat( val.keys.collect { |k| k.intern } ) ) | |
+ struct_type = Struct.new( tag.split( ':', 4 ).last, *val.keys.collect { |k| k.intern } ) | |
end | |
@@ -144,5 +143,10 @@ class String | |
end | |
def is_binary_data? | |
- self.count("^ -~\t\r\n").fdiv(self.size) > 0.3 || self.index("\x00") unless self.empty? | |
+ case encoding | |
+ when Encoding::UTF_8, Encoding::US_ASCII | |
+ !valid_encoding? | |
+ else | |
+ not /[^ -~\t\r\n]/ !~ self unless self.empty? | |
+ end | |
end | |
def String.yaml_new( klass, tag, val ) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment