Created
December 14, 2008 14:32
-
-
Save runeb/35698 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env ruby -wKU | |
# http://blog.grayproductions.net/articles/the_unicode_character_set_and_encodings | |
module UTF8Checksum | |
def is_utf8? | |
where_we_were = pos | |
begin | |
loop do | |
break if eof? | |
first_byte = "%08b" % read(1)[0] | |
unless first_byte[0] == ?0 | |
bytes_left = first_byte[/\A1+/].size - 1 | |
extra_bytes = read(bytes_left) | |
unless extra_bytes and extra_bytes.size == bytes_left and | |
extra_bytes.split("").all? { |b| ("%08b" % b[0]) =~ /\A10/ } | |
return false | |
end | |
end | |
end | |
return true | |
ensure | |
seek(where_we_were) | |
end | |
end | |
end | |
class IO | |
include UTF8Checksum | |
end | |
ARGF.extend(UTF8Checksum) | |
class String | |
def is_utf8? | |
require "stringio" | |
StringIO.new(self).extend(UTF8Checksum).is_utf8? | |
end | |
end | |
if __FILE__ == $PROGRAM_NAME | |
answer = ARGF.is_utf8? | |
p answer | |
exit(answer ? 0 : 1) | |
end |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment