Skip to content

Instantly share code, notes, and snippets.

@conholdate-gists
Created October 22, 2024 10:45
Show Gist options
  • Save conholdate-gists/4d9238cccde54f2ff920aa90dbdf6128 to your computer and use it in GitHub Desktop.
Save conholdate-gists/4d9238cccde54f2ff920aa90dbdf6128 to your computer and use it in GitHub Desktop.
Extract Table from PDF in C#
// Load source PDF document
var filePath = "input.pdf";
Aspose.Pdf.Document pdfDocument = new Aspose.Pdf.Document(filePath);
foreach (var page in pdfDocument.Pages)
{
Aspose.Pdf.Text.TableAbsorber absorber = new Aspose.Pdf.Text.TableAbsorber();
absorber.Visit(page);
foreach (AbsorbedTable table in absorber.TableList)
{
Console.WriteLine("Table");
foreach (AbsorbedRow row in table.RowList)
{
foreach (AbsorbedCell cell in row.CellList)
{
foreach (TextFragment fragment in cell.TextFragments)
{
var sb = new StringBuilder();
foreach (TextSegment seg in fragment.Segments)
sb.Append(seg.Text);
Console.Write("{sb.ToString()}|");
}
}
Console.WriteLine();
}
}
}
// Load PDF document
Document pdfDocument = new Document("input.pdf");
// Instantiate ExcelSave Option object
ExcelSaveOptions excelSave = new ExcelSaveOptions { Format = ExcelSaveOptions.ExcelFormat.CSV };
// Save the output in XLS format
pdfDocument.Save("PDFToXLS_out.xlsx", excelSave);
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment