Skip to content

Instantly share code, notes, and snippets.

@tmm1
Created November 10, 2012 02:24
Show Gist options
  • Save tmm1/4049587 to your computer and use it in GitHub Desktop.
Save tmm1/4049587 to your computer and use it in GitHub Desktop.
diff --git a/encoding.c b/encoding.c
index b8c5f6d..5863c96 100644
--- a/encoding.c
+++ b/encoding.c
@@ -35,7 +35,7 @@ int rb_encdb_alias(const char *alias, const char *orig);
#pragma GCC visibility pop
#endif
-static ID id_encoding;
+static ID id_encoding, id_handler;
VALUE rb_cEncoding;
static VALUE rb_encoding_list;
@@ -736,18 +736,19 @@ rb_enc_get(VALUE obj)
}
rb_encoding*
-rb_enc_check(VALUE str1, VALUE str2)
+rb_enc_check_internal(VALUE str1, VALUE str2, const char *source)
{
- rb_encoding *enc = rb_enc_compatible(str1, str2);
+ rb_encoding *enc = rb_enc_compatible_internal(str1, str2, source);
if (!enc)
- rb_raise(rb_eEncCompatError, "incompatible character encodings: %s and %s",
+ rb_raise(rb_eEncCompatError, "incompatible character encodings in %s: %s and %s",
+ source,
rb_enc_name(rb_enc_get(str1)),
rb_enc_name(rb_enc_get(str2)));
return enc;
}
rb_encoding*
-rb_enc_compatible(VALUE str1, VALUE str2)
+rb_enc_compatible_internal(VALUE str1, VALUE str2, const char *source)
{
int idx1, idx2;
rb_encoding *enc1, *enc2;
@@ -806,6 +807,13 @@ rb_enc_compatible(VALUE str1, VALUE str2)
if (cr2 == ENC_CODERANGE_7BIT) {
return enc1;
}
+ CONST_ID(id_handler, "handler");
+ if (idx1 == ENCINDEX_UTF_8 &&
+ idx2 == ENCINDEX_ASCII &&
+ rb_respond_to(rb_eEncCompatError, id_handler)) {
+ rb_funcall(rb_eEncCompatError, id_handler, 3, ID2SYM(rb_intern(source)), str1, str2);
+ return enc1;
+ }
}
if (cr1 == ENC_CODERANGE_7BIT)
return enc2;
diff --git a/include/ruby/encoding.h b/include/ruby/encoding.h
index 058462f..76c57dd 100644
--- a/include/ruby/encoding.h
+++ b/include/ruby/encoding.h
@@ -90,8 +90,10 @@ int rb_enc_find_index(const char *name);
int rb_to_encoding_index(VALUE);
rb_encoding* rb_to_encoding(VALUE);
rb_encoding* rb_enc_get(VALUE);
-rb_encoding* rb_enc_compatible(VALUE,VALUE);
-rb_encoding* rb_enc_check(VALUE,VALUE);
+rb_encoding* rb_enc_compatible_internal(VALUE,VALUE,const char*);
+#define rb_enc_compatible(a,b) rb_enc_compatible_internal(a,b,__func__)
+rb_encoding* rb_enc_check_internal(VALUE,VALUE,const char*);
+#define rb_enc_check(a,b) rb_enc_check_internal(a,b,__func__)
VALUE rb_enc_associate_index(VALUE, int);
VALUE rb_enc_associate(VALUE, rb_encoding*);
void rb_enc_copy(VALUE dst, VALUE src);
diff --git a/string.c b/string.c
index 134d65b..0951e95 100644
--- a/string.c
+++ b/string.c
@@ -848,7 +848,7 @@ rb_str_shared_replace(VALUE str, VALUE str2)
ENC_CODERANGE_SET(str, cr);
}
-static ID id_to_s;
+static ID id_to_s, id_handler;
VALUE
rb_obj_as_string(VALUE obj)
@@ -1947,13 +1947,26 @@ rb_enc_cr_str_buf_cat(VALUE str, const char *ptr, long len,
if (ptr_cr_ret)
*ptr_cr_ret = ptr_cr;
- if (str_encindex != ptr_encindex &&
+ if (str_encindex == rb_utf8_encindex() &&
+ ptr_encindex == rb_ascii8bit_encindex() &&
+ str_cr == ENC_CODERANGE_VALID &&
+ ptr_cr == ENC_CODERANGE_VALID &&
+ rb_respond_to(rb_eEncCompatError, id_handler)) {
+ rb_funcall(rb_eEncCompatError, id_handler, 3, ID2SYM(rb_intern(__func__)), str, rb_enc_str_new(ptr, len, rb_enc_from_index(ptr_encindex)));
+ }
+ else if (str_encindex != ptr_encindex &&
str_cr != ENC_CODERANGE_7BIT &&
ptr_cr != ENC_CODERANGE_7BIT) {
incompatible:
- rb_raise(rb_eEncCompatError, "incompatible character encodings: %s and %s",
+ rb_raise(rb_eEncCompatError, "incompatible character encodings: %s (len: %ld, coderange: %s, \"%.5s...\") and %s (len: %ld, coderange: %s, \"%.5s...\")",
rb_enc_name(rb_enc_from_index(str_encindex)),
- rb_enc_name(rb_enc_from_index(ptr_encindex)));
+ RSTRING_LEN(str),
+ str_cr == ENC_CODERANGE_UNKNOWN ? "unknown" : str_cr == ENC_CODERANGE_7BIT ? "7bit" : str_cr == ENC_CODERANGE_VALID ? "valid" : "broken",
+ RSTRING_PTR(str),
+ rb_enc_name(rb_enc_from_index(ptr_encindex)),
+ len,
+ ptr_cr == ENC_CODERANGE_UNKNOWN ? "unknown" : ptr_cr == ENC_CODERANGE_7BIT ? "7bit" : ptr_cr == ENC_CODERANGE_VALID ? "valid" : "broken",
+ ptr);
}
if (str_cr == ENC_CODERANGE_UNKNOWN) {
@@ -7883,6 +7896,7 @@ Init_String(void)
rb_define_method(rb_cString, "ascii_only?", rb_str_is_ascii_only_p, 0);
id_to_s = rb_intern("to_s");
+ id_handler = rb_intern("handler");
rb_fs = Qnil;
rb_define_variable("$;", &rb_fs);
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment