Skip to content

Instantly share code, notes, and snippets.

@ejwinter
Last active August 10, 2023 02:39
Show Gist options
  • Save ejwinter/f1fa13aa0434eb0b8ee029c89f015f4c to your computer and use it in GitHub Desktop.
Save ejwinter/f1fa13aa0434eb0b8ee029c89f015f4c to your computer and use it in GitHub Desktop.
Turn an Excel (xlsx) file with a single table in it into a Guava table with a header indexed
/**
* This class takes an Excel spreadsheet and converts it into a Guava Table format.
* When other formats of the file are to be supported (like CSV, TSV etc.,) implement the apply method with that format.
*/
@Component
public class ExcelToTableLoader implements TableGenerator<File> {
private static final Logger logger = org.slf4j.LoggerFactory.getLogger(ExcelToTableLoader.class);
private Table<Integer, String, String> processRows(Iterator<Row> rowIterator) {
Map<Integer,String> header = extractHeader(rowIterator.next());
List<Map<String,String>> cells = toCellStream(rowIterator)
.map(row -> extractRow(header, row))
.filter(row -> hasContent(row))
.collect(Collectors.toList());
return toTable(cells);
}
private Table<Integer, String, String> toTable(List<Map<String, String>> rows) {
ImmutableTable.Builder<Integer, String, String> table = ImmutableTable.builder();
int rowCounter = 0;
for (Map<String, String> row : rows) {
for (Map.Entry<String, String> columns : row.entrySet()) {
table.put(rowCounter, columns.getKey(), columns.getValue());
}
rowCounter++;
}
return table.build();
}
private boolean hasContent(Map<String, String> row) {
return row.entrySet().stream()
.anyMatch(column -> column.getValue()!=null && !column.getValue().isEmpty());
}
private static Map<String,String> extractRow(Map<Integer, String> headers, Row row) {
return headers.entrySet().stream().collect(Collectors.toMap(h -> h.getValue(), h -> getCellValue(row.getCell(h.getKey()))));
}
private static Map<Integer, String> extractHeader(Row headerRow) {
//create stream over header row and turn into map
Stream<Cell> cellStream = StreamSupport.stream(headerRow.spliterator(), false);
return Collections.unmodifiableMap(
cellStream
.filter(cell -> cell.getStringCellValue()!=null && !cell.getStringCellValue().isEmpty())
.collect(Collectors.toMap(column->column.getColumnIndex(), column->getCellValue(column)))
);
}
private static String getCellValue(Cell cell) {
switch(cell.getCellTypeEnum()){
case BOOLEAN:
return Boolean.toString(cell.getBooleanCellValue());
case NUMERIC:
return Double.toString(cell.getNumericCellValue());
case BLANK:
return "";
default:
return cell.getStringCellValue();
}
}
private static Stream<Row> toCellStream(Iterator<Row> iterator){
Iterable<Row> asIterable = () -> iterator;
return StreamSupport.stream(asIterable.spliterator(), false);
}
/*
* Process each row and put the values into the table
*/
@Override
public Table<Integer, String, String> apply(File inputFile) throws TableImportException{
Table<Integer, String, String> inputTable = HashBasedTable.create();
FileInputStream file = null;
try {
file = new FileInputStream(inputFile);//Resources
//Get the workbook instance for XLS file
XSSFWorkbook workbook = new XSSFWorkbook(file);
//Get first sheet from the workbook
XSSFSheet sheet = workbook.getSheetAt(0);
if(sheet == null){
throw new TableImportException("There was no sheets in the workbook.");
}
//Iterate through each rows from first sheet
Iterator<Row> rowIterator = sheet.iterator();
if(!rowIterator.hasNext()){
throw new TableImportException("There were now rows in the workbook.");
}
inputTable = processRows(rowIterator);
} catch (IOException e) {
throw new TableImportException(e);
}finally{
if(file != null){
try {
file.close();
} catch (IOException e) {
logger.warn(e.getMessage(), e);
}
}
}
return inputTable;
}
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment