Skip to content

Instantly share code, notes, and snippets.

@teoadal
Last active December 26, 2024 06:04
Show Gist options
  • Save teoadal/cbb40e599a4842b9e66154c18d0b54f3 to your computer and use it in GitHub Desktop.
Save teoadal/cbb40e599a4842b9e66154c18d0b54f3 to your computer and use it in GitHub Desktop.
using BenchmarkDotNet.Order;
using ClosedXML.Excel;
using DocumentFormat.OpenXml;
using DocumentFormat.OpenXml.Packaging;
using DocumentFormat.OpenXml.Spreadsheet;
using MiniExcelLibs;
using NPOI.HSSF.UserModel;
using OfficeOpenXml;
using SpreadCheetah;
using Cell = SpreadCheetah.Cell;
using OpenXmlCell = DocumentFormat.OpenXml.Spreadsheet.Cell;
using OpenXmlCellValue = DocumentFormat.OpenXml.Spreadsheet.CellValues;
namespace ConsoleApp1;
[SimpleJob(RuntimeMoniker.Net90)]
[Orderer(SummaryOrderPolicy.FastestToSlowest)]
[MeanColumn, MemoryDiagnoser]
public class XlsxBench
{
private const string SheetName = "Test_Book";
private const int NumberOfColumns = 10;
private static readonly Stream OutputStream = new MemoryStream(6_000_000);
[Params(20_000)] public int NumberOfRows { get; set; }
private List<string> _columnNames = null!;
private List<Dictionary<string, object>> _miniExcelSheet = null!;
private List<List<string>> _values = null!;
[Benchmark]
public long ClosedXml()
{
OutputStream.Seek(0, SeekOrigin.Begin);
using var workbook = new XLWorkbook();
var worksheet = workbook.Worksheets.Add(SheetName);
for (var row = 0; row < _values.Count; row++)
{
var rowValues = _values[row];
for (var col = 0; col < rowValues.Count; col++)
{
worksheet
.Cell(row + 1, col + 1)
.SetValue(rowValues[col]);
}
}
workbook.SaveAs(OutputStream);
return OutputStream.Position;
}
[Benchmark]
public long ClosedXml_Batch()
{
OutputStream.Seek(0, SeekOrigin.Begin);
using var workbook = new XLWorkbook();
var worksheet = workbook.Worksheets.Add(SheetName);
worksheet
.Cell(1, 1)
.InsertData(_values);
workbook.SaveAs(OutputStream);
return OutputStream.Position;
}
[Benchmark]
public long EpPlus4()
{
OutputStream.Seek(0, SeekOrigin.Begin);
using var package = new ExcelPackage();
var worksheet = package.Workbook.Worksheets.Add(SheetName);
for (var row = 0; row < _values.Count; row++)
{
var columns = _values[row];
for (var col = 0; col < columns.Count; ++col)
{
worksheet.Cells[row + 1, col + 1].Value = columns[col];
}
}
package.SaveAs(OutputStream);
return OutputStream.Position;
}
[Benchmark]
public long MinExcel()
{
OutputStream.Seek(0, SeekOrigin.Begin);
var values = new List<Dictionary<string, object>>(NumberOfRows);
foreach (var rowValues in _values)
{
var row = new Dictionary<string, object>(NumberOfColumns);
for (var i = 0; i < rowValues.Count; i++)
{
var columnName = _columnNames[i];
var rowValue = rowValues[i];
row.Add(columnName, rowValue);
}
values.Add(row);
}
OutputStream.SaveAs(values);
return OutputStream.Position;
}
[Benchmark]
public long MinExcel_Cached()
{
OutputStream.Seek(0, SeekOrigin.Begin);
for (var rowNumber = 0; rowNumber < _values.Count; rowNumber++)
{
var rowValues = _values[rowNumber];
var rowData = _miniExcelSheet[rowNumber];
for (var i = 0; i < rowValues.Count; i++)
{
var columnName = _columnNames[i];
var rowValue = rowValues[i];
rowData.Add(columnName, rowValue);
}
}
OutputStream.SaveAs(_miniExcelSheet);
foreach (var row in _miniExcelSheet)
{
row.Clear();
}
return OutputStream.Position;
}
[Benchmark]
public long Npoi()
{
OutputStream.Seek(0, SeekOrigin.Begin);
var workbook = new HSSFWorkbook();
var sheet = workbook.CreateSheet(SheetName);
// This Where the Data row starts from
var rowIndex = 0;
//Iteration through some collection
foreach (var rowValues in _values)
{
var currentRow = sheet.CreateRow(rowIndex);
for (var colIndex = 0; colIndex < rowValues.Count; colIndex++)
{
var cell = currentRow.CreateCell(colIndex);
cell.SetCellValue(rowValues[colIndex]);
}
rowIndex++;
}
workbook.Write(OutputStream);
return OutputStream.Position;
}
[Benchmark]
public long OpenXml_Sax()
{
OutputStream.Seek(0, SeekOrigin.Begin);
// Is a stream-based processor.
// You only have a tiny part in memory at any time and you "sniff" the XML stream by implementing callback code for events like tagStarted() etc.
// It uses almost no memory, but you can't do "DOM" stuff, like use xpath or traverse trees.
// https://en.wikipedia.org/wiki/Simple_API_for_XML
using var xl = SpreadsheetDocument.Create(OutputStream, SpreadsheetDocumentType.Workbook);
var workbookPart = xl.AddWorkbookPart();
var wsp = workbookPart.AddNewPart<WorksheetPart>();
var oxw = OpenXmlWriter.Create(wsp);
oxw.WriteStartElement(new Worksheet());
oxw.WriteStartElement(new SheetData());
var rowObject = new Row();
var cellAttributes = new[] { new OpenXmlAttribute("t", "", "inlineStr") };
var cell = new OpenXmlCell();
var inlineString = new InlineString();
for (var row = 0; row < NumberOfRows; row++)
{
var rowAttributes = new[] { new OpenXmlAttribute("r", "", (row + 1).ToString()) };
oxw.WriteStartElement(rowObject, rowAttributes);
var columns = _values[row];
foreach (var col in columns)
{
oxw.WriteStartElement(cell, cellAttributes);
oxw.WriteStartElement(inlineString);
oxw.WriteElement(new Text(col));
oxw.WriteEndElement();
oxw.WriteEndElement();
}
oxw.WriteEndElement();
}
oxw.WriteEndElement();
oxw.WriteEndElement();
oxw.Close();
oxw = OpenXmlWriter.Create(workbookPart);
oxw.WriteStartElement(new Workbook());
oxw.WriteStartElement(new Sheets());
oxw.WriteElement(new Sheet
{
Name = "Sheet1",
SheetId = 1,
Id = workbookPart.GetIdOfPart(wsp)
});
oxw.WriteEndElement();
oxw.WriteEndElement();
oxw.Close();
return OutputStream.Position;
}
[Benchmark]
public long OpenXml_Dom()
{
OutputStream.Seek(0, SeekOrigin.Begin);
// You load the whole thing into memory - it's a massive memory hog.
// You can blow memory with even medium sized documents.
// But you can use xpath and traverse the tree etc.
// https://en.wikipedia.org/wiki/Document_Object_Model
using var xl = SpreadsheetDocument.Create(OutputStream, SpreadsheetDocumentType.Workbook);
var workbookpart = xl.AddWorkbookPart();
workbookpart.Workbook = new Workbook();
var worksheetPart = workbookpart.AddNewPart<WorksheetPart>();
var sheetData = new SheetData();
worksheetPart.Worksheet = new Worksheet();
worksheetPart.Worksheet.AppendChild(sheetData);
var sheets = workbookpart.Workbook.AppendChild(new Sheets());
var sheet = new Sheet
{
Id = workbookpart.GetIdOfPart(worksheetPart),
SheetId = 1,
Name = SheetName
};
var cells = new OpenXmlElement[NumberOfColumns];
for (var row = 0; row < _values.Count; ++row)
{
var rowValues = _values[row];
for (var col = 0; col < rowValues.Count; ++col)
{
var inlineString = new InlineString();
inlineString.AppendChild(new Text(rowValues[col]));
var cell = new OpenXmlCell { DataType = OpenXmlCellValue.InlineString };
cell.AppendChild(inlineString);
cells[col] = cell;
}
var rowObject = new Row(cells) { RowIndex = (uint)row + 1 };
sheetData.AppendChild(rowObject);
}
sheets.AppendChild(sheet);
workbookpart.Workbook.Save();
return OutputStream.Position;
}
[Benchmark(Baseline = true)]
public async Task SpreadCheetah()
{
OutputStream.Seek(0, SeekOrigin.Begin);
await using var spreadsheet = await Spreadsheet.CreateNewAsync(OutputStream);
await spreadsheet.StartWorksheetAsync(SheetName);
foreach (var rowValues in _values)
{
var rowValuesCount = rowValues.Count;
var rowCells = new List<Cell>(rowValuesCount);
for (var col = 0; col < rowValuesCount; col++)
{
rowCells.Add(new Cell(rowValues[col]));
}
await spreadsheet.AddRowAsync(rowCells);
}
await spreadsheet.FinishAsync();
}
[Benchmark]
public async Task SpreadCheetahDataCell()
{
OutputStream.Seek(0, SeekOrigin.Begin);
await using var spreadsheet = await Spreadsheet.CreateNewAsync(OutputStream);
await spreadsheet.StartWorksheetAsync(SheetName);
foreach (var rowValues in _values)
{
var rowValuesCount = rowValues.Count;
var rowCells = new DataCell[rowValuesCount];
for (var col = 0; col < rowValuesCount; col++)
{
rowCells[col] = new DataCell(rowValues[col]);
}
await spreadsheet.AddRowAsync(rowCells);
}
await spreadsheet.FinishAsync();
}
[GlobalSetup]
public void Setup()
{
_columnNames = Enumerable.Range(0, NumberOfColumns).Select(c => $"Column{c}").ToList();
_values = new List<List<string>>(NumberOfRows);
for (var row = 1; row <= NumberOfRows; row++)
{
var cells = new List<string>(NumberOfColumns);
_values.Add(cells);
for (var col = 1; col <= NumberOfColumns; col++)
{
cells.Add($"{col}-{row}");
}
}
_miniExcelSheet = new List<Dictionary<string, object>>(NumberOfRows);
foreach (var _ in _values)
{
_miniExcelSheet.Add(new Dictionary<string, object>(NumberOfColumns));
}
}
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment