Created
December 28, 2013 01:36
-
-
Save hrp/8155066 to your computer and use it in GitHub Desktop.
Dump non-ascii benchmark
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# encoding: utf-8 | |
require 'benchmark' | |
require 'iconv' | |
class String | |
def remove_nonascii(replacement='') | |
n=self.split("") | |
self.slice!(0..self.size) | |
n.each { |b| | |
if b[0].to_i< 33 || b[0].to_i>127 then | |
self.concat(replacement) | |
else | |
self.concat(b) | |
end | |
} | |
self.to_s | |
end | |
def remove_non_ascii(replacement="") | |
self.gsub(/[\u0080-\u00ff]/,replacement) | |
end | |
def iconv | |
Iconv.conv('US-ASCII//IGNORE', 'UTF-8', self) | |
end | |
def with_encode | |
# See String#encode | |
encoding_options = { | |
:invalid => :replace, # Replace invalid byte sequences | |
:undef => :replace, # Replace anything not defined in ASCII | |
:replace => '', # Replace above with this | |
:universal_newline => true # Always break lines with \n | |
} | |
self.encode 'ASCII', encoding_options | |
end | |
def with_chars | |
self.chars.select(&:ascii_only?).join.strip.gsub(/[(\s)]+/, " ").gsub(" ", "").strip | |
end | |
def regex | |
self.gsub(/\P{ASCII}/, '') | |
end | |
end | |
utf8 = '☼☹☼✿☺☻☹☃⌇♒♒⌨☝♡“¥¥ß©®@÷π≠ascii' * 3 | |
# puts utf8.force_encoding( Encoding.find('ASCII') ) | |
puts utf8.with_encode | |
puts utf8.with_chars | |
puts utf8.iconv | |
puts utf8.regex | |
# puts utf8.encode Encoding.find('ASCII'), undef: :replace | |
n = 10_000 | |
Benchmark.bm do |x| | |
x.report('iconv') { n.times { utf8.iconv } } | |
x.report('encod') { n.times { utf8.with_encode } } | |
x.report('chars') { n.times { utf8.with_chars } } | |
# x.report('force') { n.times { utf8.force_encoding('US-ASCII') } } | |
x.report('regex') { n.times { utf8.regex } } | |
end |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment