Last active
April 25, 2025 03:46
-
-
Save aspose-com-gists/0072f3b49f4d3d9a7975b3382cee347f to your computer and use it in GitHub Desktop.
How to Search in PDF Using C#
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
// Load the PDF file | |
Document pdfDocument = new Document("sample.pdf"); | |
// Create a text absorber with a case-insensitive regular expression | |
TextFragmentAbsorber textFragmentAbsorber = new TextFragmentAbsorber("(?i)INVOICE"); | |
// Set text search options to enable regular expression usage | |
TextSearchOptions textSearchOptions = new TextSearchOptions(true); | |
textFragmentAbsorber.TextSearchOptions = textSearchOptions; | |
// Apply the absorber to all pages | |
pdfDocument.Pages.Accept(textFragmentAbsorber); | |
// Retrieve the matched text fragments | |
TextFragmentCollection textFragments = textFragmentAbsorber.TextFragments; | |
// Output the number of matches found | |
Console.WriteLine($"Found {textFragments.Count} instance(s) of the keyword."); | |
// Loop through and display each found text fragment | |
foreach (TextFragment fragment in textFragments) | |
{ | |
Console.WriteLine($"Text: {fragment.Text} | Page: {fragment.Page.Number}"); | |
} |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
// Load PDF and search for text | |
Document pdfDocument = new Document("sample.pdf"); | |
TextFragmentAbsorber absorber = new TextFragmentAbsorber("invoice"); | |
pdfDocument.Pages.Accept(absorber); | |
// Get matched fragments | |
TextFragmentCollection fragments = absorber.TextFragments; | |
// Print position and text for each match | |
foreach (TextFragment fragment in fragments) | |
{ | |
Console.WriteLine($"Text: {fragment.Text}"); | |
Console.WriteLine($"Page: {fragment.Page.Number}"); | |
Console.WriteLine($"Position - X: {fragment.Position.XIndent}, Y: {fragment.Position.YIndent}"); | |
Console.WriteLine($"Font: {fragment.TextState.Font.FontName}, Size: {fragment.TextState.FontSize}"); | |
Console.WriteLine("------------"); | |
} |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
// Load PDF and search for text | |
Document pdfDocument = new Document("sample.pdf"); | |
// Enable regex in search options | |
TextSearchOptions searchOptions = new TextSearchOptions(true) | |
{ | |
IsRegularExpressionUsed = true | |
}; | |
// Create absorber with date pattern | |
TextFragmentAbsorber absorber = new TextFragmentAbsorber(@"\d{2}/\d{2}/\d{4}", searchOptions); | |
// Apply absorber to pages | |
pdfDocument.Pages.Accept(absorber); | |
// Loop and print found patterns | |
foreach (TextFragment fragment in absorber.TextFragments) | |
{ | |
Console.WriteLine($"Found date: {fragment.Text} on Page {fragment.Page.Number}"); | |
} |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
// Load PDF and search for text | |
Document pdfDocument = new Document("sample.pdf"); | |
// Search for the name "John Doe" | |
TextFragmentAbsorber absorber = new TextFragmentAbsorber("John Doe"); | |
pdfDocument.Pages.Accept(absorber); | |
// Loop through found fragments and redact | |
foreach (TextFragment fragment in absorber.TextFragments) | |
{ | |
// Get position and dimensions | |
Aspose.Pdf.Rectangle rect = new Aspose.Pdf.Rectangle( | |
fragment.Position.XIndent, | |
fragment.Position.YIndent, | |
fragment.Position.XIndent + fragment.Rectangle.Width, | |
fragment.Position.YIndent + fragment.Rectangle.Height | |
); | |
// Create a redaction annotation | |
RedactionAnnotation redaction = new RedactionAnnotation(fragment.Page, rect) | |
{ | |
FillColor = Color.Black, | |
Color = Color.Black | |
}; | |
// Add and apply redaction | |
fragment.Page.Annotations.Add(redaction); | |
redaction.Redact(); | |
} | |
// Save the modified document | |
pdfDocument.Save("SearchAndRedact.pdf"); |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
// Load the PDF file | |
Document pdfDocument = new Document("sample.pdf"); | |
// Create a text absorber with the search keyword | |
TextFragmentAbsorber textFragmentAbsorber = new TextFragmentAbsorber("invoice"); | |
// Apply the absorber to all pages | |
pdfDocument.Pages.Accept(textFragmentAbsorber); | |
// Get the matched text fragments | |
TextFragmentCollection textFragments = textFragmentAbsorber.TextFragments; | |
// Print how many times the keyword was found | |
Console.WriteLine($"Found {textFragments.Count} instance(s) of the keyword."); | |
// Loop through and print each found text | |
foreach (TextFragment fragment in textFragments) | |
{ | |
Console.WriteLine($"Text: {fragment.Text} | Page: {fragment.Page.Number}"); | |
} |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
// Load the PDF file | |
Document pdfDocument = new Document("sample.pdf"); | |
// Create a text absorber with a regular expression pattern for whole word match | |
TextFragmentAbsorber textFragmentAbsorber = new TextFragmentAbsorber(@"\bcar\b"); | |
// Set text search options to enable regular expression usage | |
TextSearchOptions textSearchOptions = new TextSearchOptions(true); | |
textFragmentAbsorber.TextSearchOptions = textSearchOptions; | |
// Apply the absorber to all pages | |
pdfDocument.Pages.Accept(textFragmentAbsorber); | |
// Retrieve the matched text fragments | |
TextFragmentCollection textFragments = textFragmentAbsorber.TextFragments; | |
// Output the number of matches found | |
Console.WriteLine($"Found {textFragments.Count} instance(s) of the keyword."); | |
// Loop through and display each found text fragment | |
foreach (TextFragment fragment in textFragments) | |
{ | |
Console.WriteLine($"Text: {fragment.Text} | Page: {fragment.Page.Number}"); | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment