Skip to content

Instantly share code, notes, and snippets.

@ChristopherDavenport
Last active May 20, 2021 18:05
Show Gist options
  • Save ChristopherDavenport/4520197ec60b093ec1ec0b98031364e7 to your computer and use it in GitHub Desktop.
Save ChristopherDavenport/4520197ec60b093ec1ec0b98031364e7 to your computer and use it in GitHub Desktop.
XLSX to Cormorant
import java.nio.file.Paths
import cats.effect.IOApp
import cats.effect.{ExitCode, IO}
import cats.syntax.all._
object Main extends IOApp {
// Will be noticeably cleaner on fs2 3
def run(args: List[String]): IO[ExitCode] = {
import cats.effect._
Blocker[IO].use(blocker =>
POI.fromStream(
fs2.io.file.readAll[IO](Paths.get("/Users/you/Downloads/test.xlsx"), blocker, 4096)
)
).flatTap(csv => IO(println(csv)))
.as(ExitCode.Success)
}
}
import java.io.File
import java.io.FileInputStream
import org.apache.poi.xssf.usermodel.XSSFWorkbook
object ImpureMain {
def main(args: Array[String]): Unit = {
val file = new File("/Users/you/Downloads/test.xlsx")
val fis = new FileInputStream(file)
val wb = new XSSFWorkbook(fis)
val sheet = wb.getSheetAt(0)
val csv = POI.fromSheetComplete(sheet)
println(csv)
}
}
import cats.syntax.all._
import org.apache.poi.ss.usermodel.Cell
import org.apache.poi.ss.usermodel.Row
import org.apache.poi.xssf.usermodel.XSSFSheet
import org.apache.poi.xssf.usermodel.XSSFWorkbook
import io.chrisdavenport.cormorant.CSV
import org.apache.poi.ss.usermodel.CellType._NONE
import org.apache.poi.ss.usermodel.CellType.BOOLEAN
import org.apache.poi.ss.usermodel.CellType.STRING
import org.apache.poi.ss.usermodel.CellType.NUMERIC
import org.apache.poi.ss.usermodel.CellType.BLANK
import org.apache.poi.ss.usermodel.CellType.ERROR
import org.apache.poi.ss.usermodel.CellType.FORMULA
object POI {
private def fromSheetInternal(sheet: XSSFSheet): Either[Throwable, List[List[String]]] = Either.catchNonFatal{
import collection.JavaConverters._
val lb : scala.collection.mutable.ListBuffer[List[String]] =
scala.collection.mutable.ListBuffer.empty[List[String]]
val evaluator = sheet.getWorkbook().getCreationHelper().createFormulaEvaluator()
sheet.iterator().asScala.foreach{row =>
val lr: scala.collection.mutable.ListBuffer[String] =
scala.collection.mutable.ListBuffer.empty[String]
row.cellIterator.asScala.foreach{ cell =>
val string = cell.getCellType match {
case BOOLEAN => cell.getBooleanCellValue().toString()
case STRING => cell.getStringCellValue()
case NUMERIC => cell.getNumericCellValue().toString()
case BLANK => "" // TODO: Maybe an Error, Not sure
case ERROR => throw new Throwable(s"No Errors Allowed - $cell")
case FORMULA =>
// Evaluate Formulas to Values
cell.getCachedFormulaResultType() match {
case BOOLEAN => cell.getBooleanCellValue().toString()
case STRING => cell.getStringCellValue()
case NUMERIC => cell.getNumericCellValue().toString()
case BLANK => "" // TODO: Maybe an Error, Not sure
case ERROR => throw new Throwable(s"No Errors Allowed - $cell")
case FORMULA => throw new Throwable(s"No Formulas Allowed as Formula Results - $cell")
case _NONE => throw new Throwable(s"No _None allowed - Not sure what this is $cell")
}
case _NONE => throw new Throwable(s"No _None allowed - Not sure what this is $cell")
}
lr += string
}
val out = lr.toList
lb += out
}
lb.toList
}
private def fromLL(l: List[List[String]]): Either[Throwable, CSV.Complete] =
l.toNel.toRight(new Throwable("No Rows Found")).flatMap(l =>
l.zipWithIndex.traverse{ case (l, int) => l.toNel.toRight(new Throwable(s"No Columns Found For Row $int"))}
).map{
case cats.data.NonEmptyList(h, body) =>
val headers = CSV.Headers(h.map(CSV.Header))
val rows = CSV.Rows(body.map(row => CSV.Row(row.map(CSV.Field(_)))))
CSV.Complete(headers, rows)
}
def fromSheetComplete(sheet: XSSFSheet): Either[Throwable, CSV.Complete] =
fromSheetInternal(sheet).flatMap(fromLL)
def fromWorkbookComplete(wb: XSSFWorkbook, sheet: Either[Int, String] = Left(1)): Either[Throwable, CSV.Complete] =
Either.catchNonFatal(sheet.fold(wb.getSheetAt(_), wb.getSheet(_)))
.flatMap(fromSheetComplete)
import cats.effect._
import fs2._
def fromStream[F[_]: ConcurrentEffect](s: Stream[F, Byte], sheet: Either[Int, String] = Left(1)): F[CSV.Complete] =
s.through(fs2.io.toInputStream)
.map(is => new XSSFWorkbook(is))
.evalMap(wb => fromWorkbookComplete(wb, sheet).liftTo[F])
.compile
.lastOrError
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment