Created
January 29, 2009 14:04
-
-
Save chsh/54549 to your computer and use it in GitHub Desktop.
sample ruby script for reading excel file using rjb + poi-3.5
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| require 'rubygems' | |
| require 'rjb' | |
| JARS = Dir.glob('./javalib/**/*.jar').join(':') | |
| Rjb::load(JARS) | |
| class CellConverter | |
| DateUtil = Rjb::import 'org.apache.poi.ss.usermodel.DateUtil' | |
| CELL_TYPE_BLANK = 3 | |
| CELL_TYPE_BOOLEAN = 4 | |
| CELL_TYPE_ERROR = 5 | |
| CELL_TYPE_FORMULA = 2 | |
| CELL_TYPE_NUMERIC = 0 | |
| CELL_TYPE_STRING = 1 | |
| def self.regulate(cell) | |
| case cell.getCellType() | |
| when CELL_TYPE_BLANK; '' | |
| when CELL_TYPE_BOOLEAN; cell.getBooleanCellValue() | |
| when CELL_TYPE_ERROR; nil | |
| when CELL_TYPE_FORMULA; '' | |
| when CELL_TYPE_NUMERIC | |
| if DateUtil.isCellDateFormatted(cell) || isJapaneseDateFormat(cell) | |
| p = cell.getDateCellValue() | |
| puts "DT:#{p}" | |
| Time.at(p.getTime()/1000) | |
| else | |
| val = cell.getNumericCellValue() | |
| if val.to_i.to_f == val | |
| val.to_i | |
| else | |
| val | |
| end | |
| end | |
| when CELL_TYPE_STRING | |
| cell.getStringCellValue() | |
| else | |
| raise "Unexpected cell type: #{cell.getType()}" | |
| end | |
| end | |
| private | |
| def self.isJapaneseDateFormat(cell) | |
| style = cell.getCellStyle() | |
| return true if [30, 31, 32, 33, 55, 56, 57, 58].include?(style.getDataFormat()) | |
| format_string = style.getDataFormatString() | |
| return false unless format_string | |
| hasJapaneseDateTimeExpression(format_string) | |
| end | |
| def self.hasJapaneseDateTimeExpression(str) | |
| str =~ /[年月日時分秒]/ | |
| end | |
| end | |
| class POIBridge | |
| # JARS = Dir.glob('./javalib/**/*.jar').join(':') | |
| # Rjb::load(JARS) | |
| FileInputStream = Rjb::import('java.io.FileInputStream') | |
| WorkbookFactory = Rjb::import('org.apache.poi.ss.usermodel.WorkbookFactory') | |
| Workbook = Rjb::import('org.apache.poi.ss.usermodel.Workbook') | |
| HSSFCellStyle = Rjb::import('org.apache.poi.hssf.usermodel.HSSFCellStyle') | |
| def initialize | |
| end | |
| CELL_TYPE_BLANK = 3 | |
| CELL_TYPE_BOOLEAN = 4 | |
| CELL_TYPE_ERROR = 5 | |
| CELL_TYPE_FORMULA = 2 | |
| CELL_TYPE_NUMERIC = 0 | |
| CELL_TYPE_STRING = 1 | |
| def parse_file(filename) | |
| inp = FileInputStream.new_with_sig('Ljava.lang.String;', filename) | |
| wb = WorkbookFactory.create(inp) | |
| return [] if wb.getNumberOfSheets() == 0 | |
| sh = wb.getSheetAt(0) | |
| rows = [] | |
| (sh.getLastRowNum() +1).times.each do |i| | |
| row = sh.getRow(i) | |
| unless row | |
| rows << [] | |
| next | |
| end | |
| cn_first = row.getFirstCellNum() | |
| if cn_first < 0 | |
| rows << [] | |
| next | |
| end | |
| cells = [] | |
| cn_first.times do | |
| cells << nil | |
| end | |
| (cn_first ... row.getLastCellNum()).each do |i| | |
| cell = row.getCell(i) | |
| unless cell | |
| cells << nil | |
| next | |
| end | |
| cv = CellConverter.regulate(cell) | |
| cells << cv | |
| end | |
| rows << cells | |
| end | |
| rows | |
| end | |
| end | |
| pb = POIBridge.new | |
| rows = pb.parse_file(ARGV[0]) | |
| rows.each do |row| | |
| puts row.join('|') | |
| end |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment