Created
October 22, 2024 10:45
-
-
Save conholdate-gists/4d9238cccde54f2ff920aa90dbdf6128 to your computer and use it in GitHub Desktop.
Extract Table from PDF in C#
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
// Load source PDF document | |
var filePath = "input.pdf"; | |
Aspose.Pdf.Document pdfDocument = new Aspose.Pdf.Document(filePath); | |
foreach (var page in pdfDocument.Pages) | |
{ | |
Aspose.Pdf.Text.TableAbsorber absorber = new Aspose.Pdf.Text.TableAbsorber(); | |
absorber.Visit(page); | |
foreach (AbsorbedTable table in absorber.TableList) | |
{ | |
Console.WriteLine("Table"); | |
foreach (AbsorbedRow row in table.RowList) | |
{ | |
foreach (AbsorbedCell cell in row.CellList) | |
{ | |
foreach (TextFragment fragment in cell.TextFragments) | |
{ | |
var sb = new StringBuilder(); | |
foreach (TextSegment seg in fragment.Segments) | |
sb.Append(seg.Text); | |
Console.Write("{sb.ToString()}|"); | |
} | |
} | |
Console.WriteLine(); | |
} | |
} | |
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
// Load PDF document | |
Document pdfDocument = new Document("input.pdf"); | |
// Instantiate ExcelSave Option object | |
ExcelSaveOptions excelSave = new ExcelSaveOptions { Format = ExcelSaveOptions.ExcelFormat.CSV }; | |
// Save the output in XLS format | |
pdfDocument.Save("PDFToXLS_out.xlsx", excelSave); |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment