Skip to content

Instantly share code, notes, and snippets.

View documentprocessing's full-sized avatar

Document Processing documentprocessing

View GitHub Profile
@documentprocessing
documentprocessing / custom-template-processing-with-pandoc-api.java
Created June 21, 2025 08:06
Custom Template Processing with Pandoc API
import com.github.davidmoten.pandoc.Pandoc;
import java.nio.file.*;
import java.util.*;
/**
* Generates legal contracts using custom Pandoc templates with dynamic variables.
* Features:
* - YAML front-matter injection for client-specific terms
* - Conditional clauses via Pandoc filters
* - Multi-format output (PDF for signing, DOCX for editing)
@documentprocessing
documentprocessing / html-to-docx-conversion-pandoc.java
Created June 21, 2025 08:03
HTML to DOCX Conversion using Java
import com.github.davidmoten.pandoc.Pandoc;
import java.io.*;
/**
* Converts HTML business reports to MS Word (DOCX) with corporate styling.
* Features:
* - Preserves tables, images, and CSS classes from HTML
* - Applies custom DOCX template (e.g., company-branded styles)
* - Handles embedded base64 images
* - Post-processes with Office-Open-XML (OOXML) hooks
@documentprocessing
documentprocessing / convert-markdown-to-pdf-with-latext-support.java
Created June 21, 2025 07:57
Converting Markdown to PDF with LaTeX support in Java
import com.github.davidmoten.pandoc.Pandoc;
import java.io.IOException;
import java.nio.file.Files;
import java.nio.file.Paths;
/**
* Converts academic Markdown (with LaTeX math and citations) to PDF using Pandoc.
* Requires:
* - Pandoc installed (v2.11+)
* - LaTeX distribution (e.g., TeX Live/MiKTeX)
@documentprocessing
documentprocessing / add-annotations-to-pdf.java
Created June 10, 2025 06:47
Add Annotations to PDF using PDFClown API in Java
// Import PDF Clown annotation and action classes
import org.pdfclown.documents.Document;
import org.pdfclown.documents.Page;
import org.pdfclown.documents.interaction.annotations.Link;
import org.pdfclown.documents.interaction.actions.GoToURI;
import org.pdfclown.files.File;
import org.pdfclown.documents.interaction.navigation.document.Destination;
import org.pdfclown.documents.interaction.navigation.document.LocalDestination;
import java.awt.geom.Rectangle2D;
@documentprocessing
documentprocessing / extract-text-from-pdf-using-pdfclown.java
Created June 10, 2025 06:44
Extract Text from PDF with PDF Clown Java API
// Import PDF Clown text extraction classes
import org.pdfclown.documents.Document;
import org.pdfclown.documents.Page;
import org.pdfclown.files.File;
import org.pdfclown.tools.TextExtractor;
public class ExtractTextFromPDF {
public static void main(String[] args) {
try {
// 1. Load an existing PDF file
@documentprocessing
documentprocessing / create-pdf-with-pdfclown-api.java
Created June 10, 2025 06:42
Create PDF using PDF Clown API for Java
// Import PDF Clown core classes
import org.pdfclown.documents.Document;
import org.pdfclown.documents.Page;
import org.pdfclown.documents.contents.composition.PrimitiveComposer;
import org.pdfclown.documents.contents.fonts.StandardType1Font;
import org.pdfclown.files.File;
import java.awt.geom.Point2D;
public class CreateBasicPDF {
@documentprocessing
documentprocessing / generate-pdf-a-compliant-docuement.java
Created June 2, 2025 16:13
Generate PDF/A Compliant Document in Java
import com.lowagie.text.Document;
import com.lowagie.text.Font;
import com.lowagie.text.FontFactory;
import com.lowagie.text.Paragraph;
import com.lowagie.text.pdf.PdfAConformanceLevel;
import com.lowagie.text.pdf.PdfAWriter;
import com.lowagie.text.pdf.PdfReader;
import com.lowagie.text.pdf.ICC_Profile;
import java.io.FileOutputStream;
import java.io.InputStream;
@documentprocessing
documentprocessing / add-table-to-pdf.java
Created June 2, 2025 16:11
Add a Table to a PDF with OpenPDF Java API
import com.lowagie.text.Document;
import com.lowagie.text.Font;
import com.lowagie.text.FontFactory;
import com.lowagie.text.Paragraph;
import com.lowagie.text.pdf.PdfPTable;
import com.lowagie.text.pdf.PdfWriter;
import java.io.FileOutputStream;
public class PdfTableExample {
public static void main(String[] args) {
@documentprocessing
documentprocessing / create-simple-pdf.java
Created June 2, 2025 16:08
Create Simple PDF in Java
import com.lowagie.text.Document;
import com.lowagie.text.Font;
import com.lowagie.text.FontFactory;
import com.lowagie.text.Paragraph;
import com.lowagie.text.pdf.PdfWriter;
import java.io.FileOutputStream;
public class SimplePdfCreator {
public static void main(String[] args) {
// Step 1: Create a Document object
@documentprocessing
documentprocessing / modify-html-of-an-element.java
Created May 27, 2025 15:28
Modify HTML of an Element in Java with jsoup API
Element div = doc.select("div").first(); // <div></div>
div.html("<p>lorem ipsum</p>"); // <div><p>lorem ipsum</p></div>
div.prepend("<p>First</p>");
div.append("<p>Last</p>");
// now: <div><p>First</p><p>lorem ipsum</p><p>Last</p></div>
Element span = doc.select("span").first(); // <span>One</span>
span.wrap("<li><a href='http://example.com/'></a></li>");
// now: <li><a href="http://example.com"><span>One</span></a></li>