Created
December 8, 2015 06:14
-
-
Save ippeiukai/d8267d16761bc09c8c4b to your computer and use it in GitHub Desktop.
monkey patch to ruby's JSON that escapes chars bigger than 3 bytes in UTF-8
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
require 'active_support' | |
require 'active_support/concern' | |
require 'active_support/core_ext' | |
require 'active_support/json' | |
# String#to_json to escape any string outside BMP | |
module StringToJsonWithEscape4byteUtf8 | |
extend ActiveSupport::Concern | |
included do | |
alias_method_chain :to_json, :escape_4byte_utf8 | |
end | |
private | |
def to_json_with_escape_4byte_utf8(opts = {}) | |
# State object already set to escape | |
return to_json_without_escape_4byte_utf8(opts) if opts.respond_to?(:ascii_only?) && opts.ascii_only? | |
# escape all if not UTF-8 | |
return to_json_with_ascii_only if self.encoding.name != 'UTF-8' | |
chars_need_escape = self.chars.map { |c| c.bytesize > 3 } | |
if chars_need_escape.none? | |
to_json_without_escape_4byte_utf8(opts) | |
elsif chars_need_escape.all? | |
to_json_with_ascii_only | |
else | |
# split the mixture and to_json recursively | |
groups = [] | |
prev_needs_escape = nil | |
chars_need_escape.each_with_index do |char_needs_escape, idx| | |
groups << [] if prev_needs_escape != char_needs_escape | |
groups.last << idx | |
prev_needs_escape = char_needs_escape | |
end | |
groups.map! { |indexes| (indexes.first .. indexes.last) } | |
groups.each_with_object('') do |idx_range, buff| | |
if buff.empty? | |
buff << self[idx_range].to_json(opts) | |
else | |
# join dropping the double quote | |
buff.chop! | |
buff << self[idx_range].to_json(opts)[1..-1] | |
end | |
end | |
end | |
end | |
def to_json_with_ascii_only | |
JSON.generate([self], ascii_only: true)[1..-2] | |
end | |
end | |
String.send(:include, StringToJsonWithEscape4byteUtf8) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment