Skip to content

Instantly share code, notes, and snippets.

@ishideo
Created October 16, 2017 06:02
Show Gist options
  • Save ishideo/35a82717dddadcabbf627509b1505a62 to your computer and use it in GitHub Desktop.
Save ishideo/35a82717dddadcabbf627509b1505a62 to your computer and use it in GitHub Desktop.
#!/usr/bin/env ruby
# -*- coding: utf-8 -*-
# fixheader.rb --in_csv "./abc.csv
require 'csv'
require 'optparse'
class Header
def initialize
params = optparse
@in_csv = params['in_csv'].encode('UTF-8')
@in_skip_lines = params['in_skip_lines'].to_i
has_double_quote = has_double_quote? open_header(@in_csv, @in_skip_lines)
save_file(@in_csv, fix(@in_csv, @in_skip_lines)) if has_double_quote
end
def fix(file, skip_lines)
lines = open_file file
words = chop_both(lines[skip_lines]).split(/","/)
lines[skip_lines] =
array_zip(words
.uniq
.map { |x| array_autonum(x, words) })
.map { |xs| xs.map(&:to_s).inject { |a, b| a + b } }
.to_csv
lines
end
def array_zip(array)
array.first.zip(*array.drop(1))
end
def array_autonum(uniq_keyword, words, count = 0, array = [])
if words.first.to_s == uniq_keyword.to_s && count != 0
column = words.first.to_s + '.' + count.to_s
count += 1
elsif words.first.to_s == uniq_keyword.to_s && count.zero?
column = words.first.to_s
count += 1
elsif words == []
return array
else
column = nil
end
array_autonum(uniq_keyword, words.drop(1), count, array.push(column))
end
def chop_both(str)
str.slice(1, str.length - 3)
end
def open_file(file)
lines = IO.readlines(file, mode: 'rt:UTF-8')
lines
end
def open_header(file, skip_lines)
header = ''
count = 1 - skip_lines
File.foreach(file, mode: 'rt:UTF-8') { |line|
header = line if count == 1
count += 1
#p count
}
header
end
def has_double_quote?(str)
if str[0] == '"'
true
else
false
end
end
def save_file(file, lines)
File.write(file, lines.inject { |a, b| a.to_s + b.to_s })
print_log
end
def print_log
puts 'in_csv: ' + @in_csv
puts 'in_column: ' + @in_skip_lines.to_s
puts 'Done.'
puts '--------------------------------------------------'
end
def optparse
params = ARGV.getopts('', 'in_csv:', 'in_skip_lines:0')
params
end
end
Header.new if $PROGRAM_NAME == __FILE__
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment