Skip to content

Instantly share code, notes, and snippets.

@aspose-com-gists
Last active April 25, 2025 03:46
Show Gist options
  • Save aspose-com-gists/0072f3b49f4d3d9a7975b3382cee347f to your computer and use it in GitHub Desktop.
Save aspose-com-gists/0072f3b49f4d3d9a7975b3382cee347f to your computer and use it in GitHub Desktop.
How to Search in PDF Using C#
// Load the PDF file
Document pdfDocument = new Document("sample.pdf");
// Create a text absorber with a case-insensitive regular expression
TextFragmentAbsorber textFragmentAbsorber = new TextFragmentAbsorber("(?i)INVOICE");
// Set text search options to enable regular expression usage
TextSearchOptions textSearchOptions = new TextSearchOptions(true);
textFragmentAbsorber.TextSearchOptions = textSearchOptions;
// Apply the absorber to all pages
pdfDocument.Pages.Accept(textFragmentAbsorber);
// Retrieve the matched text fragments
TextFragmentCollection textFragments = textFragmentAbsorber.TextFragments;
// Output the number of matches found
Console.WriteLine($"Found {textFragments.Count} instance(s) of the keyword.");
// Loop through and display each found text fragment
foreach (TextFragment fragment in textFragments)
{
Console.WriteLine($"Text: {fragment.Text} | Page: {fragment.Page.Number}");
}
// Load PDF and search for text
Document pdfDocument = new Document("sample.pdf");
TextFragmentAbsorber absorber = new TextFragmentAbsorber("invoice");
pdfDocument.Pages.Accept(absorber);
// Get matched fragments
TextFragmentCollection fragments = absorber.TextFragments;
// Print position and text for each match
foreach (TextFragment fragment in fragments)
{
Console.WriteLine($"Text: {fragment.Text}");
Console.WriteLine($"Page: {fragment.Page.Number}");
Console.WriteLine($"Position - X: {fragment.Position.XIndent}, Y: {fragment.Position.YIndent}");
Console.WriteLine($"Font: {fragment.TextState.Font.FontName}, Size: {fragment.TextState.FontSize}");
Console.WriteLine("------------");
}
// Load PDF and search for text
Document pdfDocument = new Document("sample.pdf");
// Enable regex in search options
TextSearchOptions searchOptions = new TextSearchOptions(true)
{
IsRegularExpressionUsed = true
};
// Create absorber with date pattern
TextFragmentAbsorber absorber = new TextFragmentAbsorber(@"\d{2}/\d{2}/\d{4}", searchOptions);
// Apply absorber to pages
pdfDocument.Pages.Accept(absorber);
// Loop and print found patterns
foreach (TextFragment fragment in absorber.TextFragments)
{
Console.WriteLine($"Found date: {fragment.Text} on Page {fragment.Page.Number}");
}
// Load PDF and search for text
Document pdfDocument = new Document("sample.pdf");
// Search for the name "John Doe"
TextFragmentAbsorber absorber = new TextFragmentAbsorber("John Doe");
pdfDocument.Pages.Accept(absorber);
// Loop through found fragments and redact
foreach (TextFragment fragment in absorber.TextFragments)
{
// Get position and dimensions
Aspose.Pdf.Rectangle rect = new Aspose.Pdf.Rectangle(
fragment.Position.XIndent,
fragment.Position.YIndent,
fragment.Position.XIndent + fragment.Rectangle.Width,
fragment.Position.YIndent + fragment.Rectangle.Height
);
// Create a redaction annotation
RedactionAnnotation redaction = new RedactionAnnotation(fragment.Page, rect)
{
FillColor = Color.Black,
Color = Color.Black
};
// Add and apply redaction
fragment.Page.Annotations.Add(redaction);
redaction.Redact();
}
// Save the modified document
pdfDocument.Save("SearchAndRedact.pdf");
// Load the PDF file
Document pdfDocument = new Document("sample.pdf");
// Create a text absorber with the search keyword
TextFragmentAbsorber textFragmentAbsorber = new TextFragmentAbsorber("invoice");
// Apply the absorber to all pages
pdfDocument.Pages.Accept(textFragmentAbsorber);
// Get the matched text fragments
TextFragmentCollection textFragments = textFragmentAbsorber.TextFragments;
// Print how many times the keyword was found
Console.WriteLine($"Found {textFragments.Count} instance(s) of the keyword.");
// Loop through and print each found text
foreach (TextFragment fragment in textFragments)
{
Console.WriteLine($"Text: {fragment.Text} | Page: {fragment.Page.Number}");
}
// Load the PDF file
Document pdfDocument = new Document("sample.pdf");
// Create a text absorber with a regular expression pattern for whole word match
TextFragmentAbsorber textFragmentAbsorber = new TextFragmentAbsorber(@"\bcar\b");
// Set text search options to enable regular expression usage
TextSearchOptions textSearchOptions = new TextSearchOptions(true);
textFragmentAbsorber.TextSearchOptions = textSearchOptions;
// Apply the absorber to all pages
pdfDocument.Pages.Accept(textFragmentAbsorber);
// Retrieve the matched text fragments
TextFragmentCollection textFragments = textFragmentAbsorber.TextFragments;
// Output the number of matches found
Console.WriteLine($"Found {textFragments.Count} instance(s) of the keyword.");
// Loop through and display each found text fragment
foreach (TextFragment fragment in textFragments)
{
Console.WriteLine($"Text: {fragment.Text} | Page: {fragment.Page.Number}");
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment