Created
November 10, 2012 02:24
-
-
Save tmm1/4049587 to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
diff --git a/encoding.c b/encoding.c | |
index b8c5f6d..5863c96 100644 | |
--- a/encoding.c | |
+++ b/encoding.c | |
@@ -35,7 +35,7 @@ int rb_encdb_alias(const char *alias, const char *orig); | |
#pragma GCC visibility pop | |
#endif | |
-static ID id_encoding; | |
+static ID id_encoding, id_handler; | |
VALUE rb_cEncoding; | |
static VALUE rb_encoding_list; | |
@@ -736,18 +736,19 @@ rb_enc_get(VALUE obj) | |
} | |
rb_encoding* | |
-rb_enc_check(VALUE str1, VALUE str2) | |
+rb_enc_check_internal(VALUE str1, VALUE str2, const char *source) | |
{ | |
- rb_encoding *enc = rb_enc_compatible(str1, str2); | |
+ rb_encoding *enc = rb_enc_compatible_internal(str1, str2, source); | |
if (!enc) | |
- rb_raise(rb_eEncCompatError, "incompatible character encodings: %s and %s", | |
+ rb_raise(rb_eEncCompatError, "incompatible character encodings in %s: %s and %s", | |
+ source, | |
rb_enc_name(rb_enc_get(str1)), | |
rb_enc_name(rb_enc_get(str2))); | |
return enc; | |
} | |
rb_encoding* | |
-rb_enc_compatible(VALUE str1, VALUE str2) | |
+rb_enc_compatible_internal(VALUE str1, VALUE str2, const char *source) | |
{ | |
int idx1, idx2; | |
rb_encoding *enc1, *enc2; | |
@@ -806,6 +807,13 @@ rb_enc_compatible(VALUE str1, VALUE str2) | |
if (cr2 == ENC_CODERANGE_7BIT) { | |
return enc1; | |
} | |
+ CONST_ID(id_handler, "handler"); | |
+ if (idx1 == ENCINDEX_UTF_8 && | |
+ idx2 == ENCINDEX_ASCII && | |
+ rb_respond_to(rb_eEncCompatError, id_handler)) { | |
+ rb_funcall(rb_eEncCompatError, id_handler, 3, ID2SYM(rb_intern(source)), str1, str2); | |
+ return enc1; | |
+ } | |
} | |
if (cr1 == ENC_CODERANGE_7BIT) | |
return enc2; | |
diff --git a/include/ruby/encoding.h b/include/ruby/encoding.h | |
index 058462f..76c57dd 100644 | |
--- a/include/ruby/encoding.h | |
+++ b/include/ruby/encoding.h | |
@@ -90,8 +90,10 @@ int rb_enc_find_index(const char *name); | |
int rb_to_encoding_index(VALUE); | |
rb_encoding* rb_to_encoding(VALUE); | |
rb_encoding* rb_enc_get(VALUE); | |
-rb_encoding* rb_enc_compatible(VALUE,VALUE); | |
-rb_encoding* rb_enc_check(VALUE,VALUE); | |
+rb_encoding* rb_enc_compatible_internal(VALUE,VALUE,const char*); | |
+#define rb_enc_compatible(a,b) rb_enc_compatible_internal(a,b,__func__) | |
+rb_encoding* rb_enc_check_internal(VALUE,VALUE,const char*); | |
+#define rb_enc_check(a,b) rb_enc_check_internal(a,b,__func__) | |
VALUE rb_enc_associate_index(VALUE, int); | |
VALUE rb_enc_associate(VALUE, rb_encoding*); | |
void rb_enc_copy(VALUE dst, VALUE src); | |
diff --git a/string.c b/string.c | |
index 134d65b..0951e95 100644 | |
--- a/string.c | |
+++ b/string.c | |
@@ -848,7 +848,7 @@ rb_str_shared_replace(VALUE str, VALUE str2) | |
ENC_CODERANGE_SET(str, cr); | |
} | |
-static ID id_to_s; | |
+static ID id_to_s, id_handler; | |
VALUE | |
rb_obj_as_string(VALUE obj) | |
@@ -1947,13 +1947,26 @@ rb_enc_cr_str_buf_cat(VALUE str, const char *ptr, long len, | |
if (ptr_cr_ret) | |
*ptr_cr_ret = ptr_cr; | |
- if (str_encindex != ptr_encindex && | |
+ if (str_encindex == rb_utf8_encindex() && | |
+ ptr_encindex == rb_ascii8bit_encindex() && | |
+ str_cr == ENC_CODERANGE_VALID && | |
+ ptr_cr == ENC_CODERANGE_VALID && | |
+ rb_respond_to(rb_eEncCompatError, id_handler)) { | |
+ rb_funcall(rb_eEncCompatError, id_handler, 3, ID2SYM(rb_intern(__func__)), str, rb_enc_str_new(ptr, len, rb_enc_from_index(ptr_encindex))); | |
+ } | |
+ else if (str_encindex != ptr_encindex && | |
str_cr != ENC_CODERANGE_7BIT && | |
ptr_cr != ENC_CODERANGE_7BIT) { | |
incompatible: | |
- rb_raise(rb_eEncCompatError, "incompatible character encodings: %s and %s", | |
+ rb_raise(rb_eEncCompatError, "incompatible character encodings: %s (len: %ld, coderange: %s, \"%.5s...\") and %s (len: %ld, coderange: %s, \"%.5s...\")", | |
rb_enc_name(rb_enc_from_index(str_encindex)), | |
- rb_enc_name(rb_enc_from_index(ptr_encindex))); | |
+ RSTRING_LEN(str), | |
+ str_cr == ENC_CODERANGE_UNKNOWN ? "unknown" : str_cr == ENC_CODERANGE_7BIT ? "7bit" : str_cr == ENC_CODERANGE_VALID ? "valid" : "broken", | |
+ RSTRING_PTR(str), | |
+ rb_enc_name(rb_enc_from_index(ptr_encindex)), | |
+ len, | |
+ ptr_cr == ENC_CODERANGE_UNKNOWN ? "unknown" : ptr_cr == ENC_CODERANGE_7BIT ? "7bit" : ptr_cr == ENC_CODERANGE_VALID ? "valid" : "broken", | |
+ ptr); | |
} | |
if (str_cr == ENC_CODERANGE_UNKNOWN) { | |
@@ -7883,6 +7896,7 @@ Init_String(void) | |
rb_define_method(rb_cString, "ascii_only?", rb_str_is_ascii_only_p, 0); | |
id_to_s = rb_intern("to_s"); | |
+ id_handler = rb_intern("handler"); | |
rb_fs = Qnil; | |
rb_define_variable("$;", &rb_fs); |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment