Created
October 6, 2015 14:01
-
-
Save zydeco/0042af7f8e52ca68c510 to your computer and use it in GitHub Desktop.
line-based iconv with fallback encoding
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env ruby | |
# iconv line by line with fallback encoding | |
require 'optparse' | |
options = { | |
from: Encoding::UTF_8, | |
to: Encoding::UTF_8, | |
fallback: Encoding::ISO_8859_1 | |
} | |
def find_encoding(name) | |
begin | |
Encoding.find(name) | |
rescue | |
puts "Invalid encoding name: #{name}" | |
exit 1 | |
end | |
end | |
OptionParser.new do |opts| | |
opts.banner = 'Usage: liconv [-f ENCODING] [-b ENCODING] [-t ENCODING]' | |
opts.on('-l', '--list', 'show list of encodings') do | |
puts Encoding.list.map(&:names).map{|names| names.join(' ')}.sort_by(&:downcase) | |
exit 0 | |
end | |
opts.on('-fENCODING', '--from=ENCODING', 'input encoding') do |v| | |
options[:from] = find_encoding(v) | |
end | |
opts.on('-tENCODING', '--to=ENCODING', 'output encoding') do |v| | |
options[:to] = find_encoding(v) | |
end | |
opts.on('-bENCODING', '--fallback=ENCODING', 'fallback input encoding') do |v| | |
options[:fallback] = find_encoding(v) | |
end | |
opts.on('-s', '--silent', 'suppress error messages') do |v| | |
options[:silent] = v | |
end | |
opts.on('-h', '--help', 'show this help') do | |
puts opts | |
exit 0 | |
end | |
end.parse! | |
from, to, fallback = options.values_at(:from, :to, :fallback) | |
ln = 0 | |
errors = [] | |
while line = gets | |
ln += 1 | |
if line.encoding == from and line.valid_encoding? | |
puts line | |
elsif fallback && line.force_encoding(fallback).valid_encoding? | |
puts line.encode(to) | |
else | |
puts line.force_encoding(Encoding::ASCII).encode!(to, invalid: :replace) | |
errors << ln | |
end | |
end | |
unless options[:silent] | |
errors.each { |lineno| STDERR.puts "input:#{lineno}: invalid encoding" } | |
end | |
exit errors.empty? ? 0 : 1 |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment