Last active
December 26, 2024 06:04
-
-
Save teoadal/cbb40e599a4842b9e66154c18d0b54f3 to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
using BenchmarkDotNet.Order; | |
using ClosedXML.Excel; | |
using DocumentFormat.OpenXml; | |
using DocumentFormat.OpenXml.Packaging; | |
using DocumentFormat.OpenXml.Spreadsheet; | |
using MiniExcelLibs; | |
using NPOI.HSSF.UserModel; | |
using OfficeOpenXml; | |
using SpreadCheetah; | |
using Cell = SpreadCheetah.Cell; | |
using OpenXmlCell = DocumentFormat.OpenXml.Spreadsheet.Cell; | |
using OpenXmlCellValue = DocumentFormat.OpenXml.Spreadsheet.CellValues; | |
namespace ConsoleApp1; | |
[SimpleJob(RuntimeMoniker.Net90)] | |
[Orderer(SummaryOrderPolicy.FastestToSlowest)] | |
[MeanColumn, MemoryDiagnoser] | |
public class XlsxBench | |
{ | |
private const string SheetName = "Test_Book"; | |
private const int NumberOfColumns = 10; | |
private static readonly Stream OutputStream = new MemoryStream(6_000_000); | |
[Params(20_000)] public int NumberOfRows { get; set; } | |
private List<string> _columnNames = null!; | |
private List<Dictionary<string, object>> _miniExcelSheet = null!; | |
private List<List<string>> _values = null!; | |
[Benchmark] | |
public long ClosedXml() | |
{ | |
OutputStream.Seek(0, SeekOrigin.Begin); | |
using var workbook = new XLWorkbook(); | |
var worksheet = workbook.Worksheets.Add(SheetName); | |
for (var row = 0; row < _values.Count; row++) | |
{ | |
var rowValues = _values[row]; | |
for (var col = 0; col < rowValues.Count; col++) | |
{ | |
worksheet | |
.Cell(row + 1, col + 1) | |
.SetValue(rowValues[col]); | |
} | |
} | |
workbook.SaveAs(OutputStream); | |
return OutputStream.Position; | |
} | |
[Benchmark] | |
public long ClosedXml_Batch() | |
{ | |
OutputStream.Seek(0, SeekOrigin.Begin); | |
using var workbook = new XLWorkbook(); | |
var worksheet = workbook.Worksheets.Add(SheetName); | |
worksheet | |
.Cell(1, 1) | |
.InsertData(_values); | |
workbook.SaveAs(OutputStream); | |
return OutputStream.Position; | |
} | |
[Benchmark] | |
public long EpPlus4() | |
{ | |
OutputStream.Seek(0, SeekOrigin.Begin); | |
using var package = new ExcelPackage(); | |
var worksheet = package.Workbook.Worksheets.Add(SheetName); | |
for (var row = 0; row < _values.Count; row++) | |
{ | |
var columns = _values[row]; | |
for (var col = 0; col < columns.Count; ++col) | |
{ | |
worksheet.Cells[row + 1, col + 1].Value = columns[col]; | |
} | |
} | |
package.SaveAs(OutputStream); | |
return OutputStream.Position; | |
} | |
[Benchmark] | |
public long MinExcel() | |
{ | |
OutputStream.Seek(0, SeekOrigin.Begin); | |
var values = new List<Dictionary<string, object>>(NumberOfRows); | |
foreach (var rowValues in _values) | |
{ | |
var row = new Dictionary<string, object>(NumberOfColumns); | |
for (var i = 0; i < rowValues.Count; i++) | |
{ | |
var columnName = _columnNames[i]; | |
var rowValue = rowValues[i]; | |
row.Add(columnName, rowValue); | |
} | |
values.Add(row); | |
} | |
OutputStream.SaveAs(values); | |
return OutputStream.Position; | |
} | |
[Benchmark] | |
public long MinExcel_Cached() | |
{ | |
OutputStream.Seek(0, SeekOrigin.Begin); | |
for (var rowNumber = 0; rowNumber < _values.Count; rowNumber++) | |
{ | |
var rowValues = _values[rowNumber]; | |
var rowData = _miniExcelSheet[rowNumber]; | |
for (var i = 0; i < rowValues.Count; i++) | |
{ | |
var columnName = _columnNames[i]; | |
var rowValue = rowValues[i]; | |
rowData.Add(columnName, rowValue); | |
} | |
} | |
OutputStream.SaveAs(_miniExcelSheet); | |
foreach (var row in _miniExcelSheet) | |
{ | |
row.Clear(); | |
} | |
return OutputStream.Position; | |
} | |
[Benchmark] | |
public long Npoi() | |
{ | |
OutputStream.Seek(0, SeekOrigin.Begin); | |
var workbook = new HSSFWorkbook(); | |
var sheet = workbook.CreateSheet(SheetName); | |
// This Where the Data row starts from | |
var rowIndex = 0; | |
//Iteration through some collection | |
foreach (var rowValues in _values) | |
{ | |
var currentRow = sheet.CreateRow(rowIndex); | |
for (var colIndex = 0; colIndex < rowValues.Count; colIndex++) | |
{ | |
var cell = currentRow.CreateCell(colIndex); | |
cell.SetCellValue(rowValues[colIndex]); | |
} | |
rowIndex++; | |
} | |
workbook.Write(OutputStream); | |
return OutputStream.Position; | |
} | |
[Benchmark] | |
public long OpenXml_Sax() | |
{ | |
OutputStream.Seek(0, SeekOrigin.Begin); | |
// Is a stream-based processor. | |
// You only have a tiny part in memory at any time and you "sniff" the XML stream by implementing callback code for events like tagStarted() etc. | |
// It uses almost no memory, but you can't do "DOM" stuff, like use xpath or traverse trees. | |
// https://en.wikipedia.org/wiki/Simple_API_for_XML | |
using var xl = SpreadsheetDocument.Create(OutputStream, SpreadsheetDocumentType.Workbook); | |
var workbookPart = xl.AddWorkbookPart(); | |
var wsp = workbookPart.AddNewPart<WorksheetPart>(); | |
var oxw = OpenXmlWriter.Create(wsp); | |
oxw.WriteStartElement(new Worksheet()); | |
oxw.WriteStartElement(new SheetData()); | |
var rowObject = new Row(); | |
var cellAttributes = new[] { new OpenXmlAttribute("t", "", "inlineStr") }; | |
var cell = new OpenXmlCell(); | |
var inlineString = new InlineString(); | |
for (var row = 0; row < NumberOfRows; row++) | |
{ | |
var rowAttributes = new[] { new OpenXmlAttribute("r", "", (row + 1).ToString()) }; | |
oxw.WriteStartElement(rowObject, rowAttributes); | |
var columns = _values[row]; | |
foreach (var col in columns) | |
{ | |
oxw.WriteStartElement(cell, cellAttributes); | |
oxw.WriteStartElement(inlineString); | |
oxw.WriteElement(new Text(col)); | |
oxw.WriteEndElement(); | |
oxw.WriteEndElement(); | |
} | |
oxw.WriteEndElement(); | |
} | |
oxw.WriteEndElement(); | |
oxw.WriteEndElement(); | |
oxw.Close(); | |
oxw = OpenXmlWriter.Create(workbookPart); | |
oxw.WriteStartElement(new Workbook()); | |
oxw.WriteStartElement(new Sheets()); | |
oxw.WriteElement(new Sheet | |
{ | |
Name = "Sheet1", | |
SheetId = 1, | |
Id = workbookPart.GetIdOfPart(wsp) | |
}); | |
oxw.WriteEndElement(); | |
oxw.WriteEndElement(); | |
oxw.Close(); | |
return OutputStream.Position; | |
} | |
[Benchmark] | |
public long OpenXml_Dom() | |
{ | |
OutputStream.Seek(0, SeekOrigin.Begin); | |
// You load the whole thing into memory - it's a massive memory hog. | |
// You can blow memory with even medium sized documents. | |
// But you can use xpath and traverse the tree etc. | |
// https://en.wikipedia.org/wiki/Document_Object_Model | |
using var xl = SpreadsheetDocument.Create(OutputStream, SpreadsheetDocumentType.Workbook); | |
var workbookpart = xl.AddWorkbookPart(); | |
workbookpart.Workbook = new Workbook(); | |
var worksheetPart = workbookpart.AddNewPart<WorksheetPart>(); | |
var sheetData = new SheetData(); | |
worksheetPart.Worksheet = new Worksheet(); | |
worksheetPart.Worksheet.AppendChild(sheetData); | |
var sheets = workbookpart.Workbook.AppendChild(new Sheets()); | |
var sheet = new Sheet | |
{ | |
Id = workbookpart.GetIdOfPart(worksheetPart), | |
SheetId = 1, | |
Name = SheetName | |
}; | |
var cells = new OpenXmlElement[NumberOfColumns]; | |
for (var row = 0; row < _values.Count; ++row) | |
{ | |
var rowValues = _values[row]; | |
for (var col = 0; col < rowValues.Count; ++col) | |
{ | |
var inlineString = new InlineString(); | |
inlineString.AppendChild(new Text(rowValues[col])); | |
var cell = new OpenXmlCell { DataType = OpenXmlCellValue.InlineString }; | |
cell.AppendChild(inlineString); | |
cells[col] = cell; | |
} | |
var rowObject = new Row(cells) { RowIndex = (uint)row + 1 }; | |
sheetData.AppendChild(rowObject); | |
} | |
sheets.AppendChild(sheet); | |
workbookpart.Workbook.Save(); | |
return OutputStream.Position; | |
} | |
[Benchmark(Baseline = true)] | |
public async Task SpreadCheetah() | |
{ | |
OutputStream.Seek(0, SeekOrigin.Begin); | |
await using var spreadsheet = await Spreadsheet.CreateNewAsync(OutputStream); | |
await spreadsheet.StartWorksheetAsync(SheetName); | |
foreach (var rowValues in _values) | |
{ | |
var rowValuesCount = rowValues.Count; | |
var rowCells = new List<Cell>(rowValuesCount); | |
for (var col = 0; col < rowValuesCount; col++) | |
{ | |
rowCells.Add(new Cell(rowValues[col])); | |
} | |
await spreadsheet.AddRowAsync(rowCells); | |
} | |
await spreadsheet.FinishAsync(); | |
} | |
[Benchmark] | |
public async Task SpreadCheetahDataCell() | |
{ | |
OutputStream.Seek(0, SeekOrigin.Begin); | |
await using var spreadsheet = await Spreadsheet.CreateNewAsync(OutputStream); | |
await spreadsheet.StartWorksheetAsync(SheetName); | |
foreach (var rowValues in _values) | |
{ | |
var rowValuesCount = rowValues.Count; | |
var rowCells = new DataCell[rowValuesCount]; | |
for (var col = 0; col < rowValuesCount; col++) | |
{ | |
rowCells[col] = new DataCell(rowValues[col]); | |
} | |
await spreadsheet.AddRowAsync(rowCells); | |
} | |
await spreadsheet.FinishAsync(); | |
} | |
[GlobalSetup] | |
public void Setup() | |
{ | |
_columnNames = Enumerable.Range(0, NumberOfColumns).Select(c => $"Column{c}").ToList(); | |
_values = new List<List<string>>(NumberOfRows); | |
for (var row = 1; row <= NumberOfRows; row++) | |
{ | |
var cells = new List<string>(NumberOfColumns); | |
_values.Add(cells); | |
for (var col = 1; col <= NumberOfColumns; col++) | |
{ | |
cells.Add($"{col}-{row}"); | |
} | |
} | |
_miniExcelSheet = new List<Dictionary<string, object>>(NumberOfRows); | |
foreach (var _ in _values) | |
{ | |
_miniExcelSheet.Add(new Dictionary<string, object>(NumberOfColumns)); | |
} | |
} | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment