Skip to content

Instantly share code, notes, and snippets.

@ADITYATIWARI342005
Created December 31, 2025 06:07
Show Gist options
  • Select an option

  • Save ADITYATIWARI342005/90ddaed20fb198db6185a497167d821d to your computer and use it in GitHub Desktop.

Select an option

Save ADITYATIWARI342005/90ddaed20fb198db6185a497167d821d to your computer and use it in GitHub Desktop.
// Fuzzer for PDF structured text parser
#include <stdint.h>
#include <stdlib.h>
#include <poppler.h>
// Recursive function to exercise structure tree parsing
static void exercise_structure_tree(PopplerStructureElementIter *iter)
{
if (!iter) {
return;
}
do {
PopplerStructureElement *element = poppler_structure_element_iter_get_element(iter);
if (!element) {
continue;
}
// Exercise all getter functions
(void)poppler_structure_element_get_kind(element);
(void)poppler_structure_element_get_id(element);
(void)poppler_structure_element_get_title(element);
(void)poppler_structure_element_get_language(element);
(void)poppler_structure_element_get_abbreviation(element);
(void)poppler_structure_element_get_alt_text(element);
(void)poppler_structure_element_get_actual_text(element);
// Check if it's content
gboolean is_content = poppler_structure_element_is_content(element);
if (is_content) {
gchar *text;
// Test non-recursive mode
text = poppler_structure_element_get_text(element,
POPPLER_STRUCTURE_GET_TEXT_NONE);
if (text) {
g_free(text);
}
// Test recursive mode
text = poppler_structure_element_get_text(element,
POPPLER_STRUCTURE_GET_TEXT_RECURSIVE);
if (text) {
g_free(text);
}
}
// Recurse into children
PopplerStructureElementIter *child_iter = poppler_structure_element_iter_get_child(iter);
if (child_iter) {
exercise_structure_tree(child_iter);
poppler_structure_element_iter_free(child_iter);
}
g_object_unref(element);
} while (poppler_structure_element_iter_next(iter));
}
// Fuzzer entry point
extern "C" int LLVMFuzzerTestOneInput(const uint8_t *data, size_t size)
{
GError *err = NULL;
// Use the newer API (poppler_document_new_from_bytes)
GBytes *bytes = g_bytes_new_static(data, size);
PopplerDocument *doc = poppler_document_new_from_bytes(bytes, NULL, &err);
g_bytes_unref(bytes);
if (doc == NULL) {
if (err) {
g_error_free(err);
}
return 0;
}
// Try to create iterator - if it fails, no structure tree exists
PopplerStructureElementIter *root_iter = poppler_structure_element_iter_new(doc);
if (root_iter) {
exercise_structure_tree(root_iter);
poppler_structure_element_iter_free(root_iter);
}
g_object_unref(doc);
return 0;
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment