Skip to content

Instantly share code, notes, and snippets.

@nobu
Created October 21, 2009 09:10
Show Gist options
  • Save nobu/214980 to your computer and use it in GitHub Desktop.
Save nobu/214980 to your computer and use it in GitHub Desktop.
Index: ext/syck/emitter.c
===================================================================
--- ext/syck/emitter.c (revision 25425)
+++ ext/syck/emitter.c (working copy)
@@ -535,4 +535,64 @@ void syck_emit_indent( SyckEmitter *e )
#define SCAN_DOCSEP 8192
+static const unsigned long utf8_limits[] = {
+ 0x0, /* 1 */
+ 0x80, /* 2 */
+ 0x800, /* 3 */
+ 0x10000, /* 4 */
+ 0x200000, /* 5 */
+ 0x4000000, /* 6 */
+ 0x80000000, /* 7 */
+};
+
+static int
+is_utf8(const char *p, long *lenp)
+{
+ int c = *p++ & 0xff;
+ unsigned long uv = c;
+ long n;
+
+ if (*lenp < 1) return 0;
+ if (!(uv & 0x80)) {
+ *lenp = 1;
+ return 1;
+ }
+ if (!(uv & 0x40)) {
+ *lenp = 1;
+ return 0;
+ }
+
+ if (!(uv & 0x20)) { n = 2; uv &= 0x1f; }
+ else if (!(uv & 0x10)) { n = 3; uv &= 0x0f; }
+ else if (!(uv & 0x08)) { n = 4; uv &= 0x07; }
+ else if (!(uv & 0x04)) { n = 5; uv &= 0x03; }
+ else if (!(uv & 0x02)) { n = 6; uv &= 0x01; }
+ else {
+ *lenp = 1;
+ return 0;
+ }
+ if (n > *lenp) {
+ return 0;
+ }
+ *lenp = n--;
+ if (n != 0) {
+ while (n--) {
+ c = *p++ & 0xff;
+ if ((c & 0xc0) != 0x80) {
+ *lenp -= n + 1;
+ return 0;
+ }
+ else {
+ c &= 0x3f;
+ uv = uv << 6 | c;
+ }
+ }
+ }
+ n = *lenp - 1;
+ if (uv < utf8_limits[n]) {
+ return 0;
+ }
+ return 1;
+}
+
/*
* Basic printable test for LATIN-1 characters.
@@ -589,4 +649,9 @@ syck_scan_scalar( int req_width, const c
( cursor[i] >= 0x20 && cursor[i] <= 0x7E ) )
) {
+ long n = len - i;
+ if ( is_utf8(cursor, &n) ) {
+ i += n - 1;
+ continue;
+ }
flags |= SCAN_NONPRINT;
}
@@ -901,5 +966,13 @@ syck_emit_2quoted( SyckEmitter *e, int w
default:
+ {
+ long n = str + len - mark;
+ if (is_utf8(mark, &n)) {
+ syck_emitter_write( e, mark, n );
+ mark += n;
+ continue;
+ }
syck_emitter_escape( e, mark, 1 );
+ }
break;
}
Index: lib/yaml/rubytypes.rb
===================================================================
--- lib/yaml/rubytypes.rb (revision 25425)
+++ lib/yaml/rubytypes.rb (working copy)
@@ -64,6 +64,5 @@ class Struct
end
if not struct_type
- struct_def = [ tag.split( ':', 4 ).last ]
- struct_type = Struct.new( *struct_def.concat( val.keys.collect { |k| k.intern } ) )
+ struct_type = Struct.new( tag.split( ':', 4 ).last, *val.keys.collect { |k| k.intern } )
end
@@ -144,5 +143,10 @@ class String
end
def is_binary_data?
- self.count("^ -~\t\r\n").fdiv(self.size) > 0.3 || self.index("\x00") unless self.empty?
+ case encoding
+ when Encoding::UTF_8, Encoding::US_ASCII
+ !valid_encoding?
+ else
+ not /[^ -~\t\r\n]/ !~ self unless self.empty?
+ end
end
def String.yaml_new( klass, tag, val )
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment