Skip to content

Instantly share code, notes, and snippets.

@jclement
Last active April 8, 2026 21:48
Show Gist options
  • Select an option

  • Save jclement/23f4514a1f0da1347d3f89926c23b68f to your computer and use it in GitHub Desktop.

Select an option

Save jclement/23f4514a1f0da1347d3f89926c23b68f to your computer and use it in GitHub Desktop.
#!/usr/bin/env bash
set -euo pipefail
# pdfify — Convert Markdown to beautiful PDF via Docker
# Supports: images, mermaid diagrams, tables, code blocks, Obsidian callouts
# Usage: ./pdfify <file.md> [file2.md ...] [options]
VERSION="1.2.0"
IMAGE_NAME="pdfify"
GIST_ID="23f4514a1f0da1347d3f89926c23b68f"
GIST_RAW="https://gist.githubusercontent.com/jclement/${GIST_ID}/raw/pdfify.sh"
SELF="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)/$(basename "${BASH_SOURCE[0]}")"
# --- Colors ---
RED='\033[0;31m'
GREEN='\033[0;32m'
YELLOW='\033[0;33m'
BLUE='\033[0;34m'
MAGENTA='\033[0;35m'
CYAN='\033[0;36m'
BOLD='\033[1m'
DIM='\033[2m'
RESET='\033[0m'
# --- Portable SHA-256 (macOS has shasum, Linux often has sha256sum) ---
_sha256() { shasum -a 256 "$@" 2>/dev/null || sha256sum "$@"; }
#!/usr/bin/env bash
set -euo pipefail
# pdfify — Convert Markdown to beautiful PDF via Docker
# Supports: images, mermaid diagrams, tables, code blocks, Obsidian callouts
# Usage: ./pdfify <file.md> [file2.md ...] [options]
VERSION="1.2.0"
IMAGE_NAME="pdfify"
GIST_ID="23f4514a1f0da1347d3f89926c23b68f"
GIST_RAW="https://gist.githubusercontent.com/jclement/${GIST_ID}/raw/pdfify.sh"
SELF="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)/$(basename "${BASH_SOURCE[0]}")"
# --- Colors ---
RED='\033[0;31m'
GREEN='\033[0;32m'
YELLOW='\033[0;33m'
BLUE='\033[0;34m'
MAGENTA='\033[0;35m'
CYAN='\033[0;36m'
BOLD='\033[1m'
DIM='\033[2m'
RESET='\033[0m'
# --- Portable SHA-256 (macOS has shasum, Linux often has sha256sum) ---
_sha256() { shasum -a 256 "$@" 2>/dev/null || sha256sum "$@"; }
info() { echo -e "${BLUE}::${RESET} ${BOLD}$*${RESET}"; }
success() { echo -e "${GREEN}✓${RESET} $*"; }
warn() { echo -e "${YELLOW}⚠${RESET} $*"; }
detail() { echo -e " ${DIM}→${RESET} $*"; }
header() { echo -e "\n${MAGENTA}━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━${RESET}"; echo -e "${MAGENTA} ${BOLD}$*${RESET}"; echo -e "${MAGENTA}━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━${RESET}\n"; }
# --- Self-update ---
do_update() {
info "Updating pdfify from gist..."
local raw_url tmp
raw_url=$(curl -fsSL "https://api.github.com/gists/${GIST_ID}" 2>/dev/null | grep '"raw_url"' | head -1 | sed 's/.*"raw_url": *"//;s/".*//')
[[ -z "$raw_url" ]] && raw_url="$GIST_RAW"
tmp=$(mktemp)
if curl -fsSL "$raw_url" -o "$tmp" 2>/dev/null; then
if [[ -s "$tmp" ]] && head -1 "$tmp" | grep -q '^#!/'; then
chmod +x "$tmp"
mv "$tmp" "$SELF"
success "Updated to latest version"
detail "${CYAN}${SELF}${RESET}"
else
rm -f "$tmp"
echo -e "${RED}Error:${RESET} Downloaded file doesn't look like a script"
exit 1
fi
else
rm -f "$tmp"
echo -e "${RED}Error:${RESET} Failed to download update"
exit 1
fi
exit 0
}
check_for_update() {
local remote_hash local_hash raw_url
raw_url=$(curl -fsSL --connect-timeout 2 --max-time 3 "https://api.github.com/gists/${GIST_ID}" 2>/dev/null | grep '"raw_url"' | head -1 | sed 's/.*"raw_url": *"//;s/".*//') || return 0
[[ -z "$raw_url" ]] && return 0
remote_hash=$(curl -fsSL --connect-timeout 2 --max-time 5 "$raw_url" 2>/dev/null | _sha256 | cut -d' ' -f1) || return 0
local_hash=$(_sha256 < "$SELF" | cut -d' ' -f1)
if [[ -n "$remote_hash" && "$remote_hash" != "$local_hash" ]]; then
echo -e "${YELLOW}⚠${RESET} ${DIM}A newer version of pdfify is available. Run ${CYAN}pdfify --update${DIM} to upgrade.${RESET}"
fi
}
# --- Args (CLI overrides frontmatter; "" means "use frontmatter default") ---
REBUILD=0
WATCH=0
OPEN=0
PREVIEW=0
OUT_FILE=""
NEXT_KEY=""
POSITIONAL=()
# CLI overrides — empty string means "not set, defer to frontmatter"
CLI_TOC_LEVEL=""
CLI_NUMBERS=""
CLI_NUMBER_FROM=""
CLI_TITLE=""
CLI_SUBTITLE=""
CLI_AUTHOR=""
CLI_HEADER=""
CLI_FOOTER=""
CLI_DATE=""
CLI_WATERMARK=""
for arg in "$@"; do
if [[ -n "$NEXT_KEY" ]]; then
case "$NEXT_KEY" in
toc-level) CLI_TOC_LEVEL="$arg" ;;
number-from) CLI_NUMBER_FROM="$arg" ;;
out) OUT_FILE="$arg" ;;
title) CLI_TITLE="$arg" ;;
subtitle) CLI_SUBTITLE="$arg" ;;
author) CLI_AUTHOR="$arg" ;;
header) CLI_HEADER="$arg" ;;
footer) CLI_FOOTER="$arg" ;;
date) CLI_DATE="$arg" ;;
watermark) CLI_WATERMARK="$arg" ;;
esac
NEXT_KEY=""
continue
fi
case "$arg" in
--rebuild) REBUILD=1 ;;
--update) do_update ;;
--watch) WATCH=1 ;;
--open) OPEN=1 ;;
--preview) PREVIEW=1; OPEN=1 ;;
--no-numbers) CLI_NUMBERS="false" ;;
--numbers) CLI_NUMBERS="true" ;;
--clean) echo -e "${BLUE}::${RESET} ${BOLD}Removing Docker image ${CYAN}${IMAGE_NAME}${RESET}..."
docker rmi "$IMAGE_NAME" >/dev/null 2>&1 && echo -e "${GREEN}✓${RESET} Image removed" || echo -e "${DIM}Image not found${RESET}"
exit 0 ;;
--toc-level) NEXT_KEY="toc-level" ;;
--toc-level=*) CLI_TOC_LEVEL="${arg#*=}" ;;
--number-from) NEXT_KEY="number-from" ;;
--number-from=*) CLI_NUMBER_FROM="${arg#*=}" ;;
--out) NEXT_KEY="out" ;;
--out=*) OUT_FILE="${arg#*=}" ;;
--title) NEXT_KEY="title" ;;
--title=*) CLI_TITLE="${arg#*=}" ;;
--subtitle) NEXT_KEY="subtitle" ;;
--subtitle=*) CLI_SUBTITLE="${arg#*=}" ;;
--author) NEXT_KEY="author" ;;
--author=*) CLI_AUTHOR="${arg#*=}" ;;
--header) NEXT_KEY="header" ;;
--header=*) CLI_HEADER="${arg#*=}" ;;
--footer) NEXT_KEY="footer" ;;
--footer=*) CLI_FOOTER="${arg#*=}" ;;
--date) NEXT_KEY="date" ;;
--date=*) CLI_DATE="${arg#*=}" ;;
--watermark) NEXT_KEY="watermark" ;;
--watermark=*) CLI_WATERMARK="${arg#*=}" ;;
--version) echo "pdfify v${VERSION}"; exit 0 ;;
--help|-h) echo -e "${BOLD}pdfify${RESET} v${VERSION} — Markdown to PDF"
echo ""
echo -e "${BOLD}Usage:${RESET} pdfify ${CYAN}<file.md> [file2.md ...]${RESET} [options]"
echo ""
echo -e "${BOLD}Options:${RESET}"
echo -e " ${DIM}--out FILE${RESET} Output file (single input only)"
echo -e " ${DIM}--toc-level N${RESET} TOC depth: 0=none, 1=H1, 2=H2, 3=H3 (default: 3)"
echo -e " ${DIM}--numbers${RESET} Enable numbered headings (default)"
echo -e " ${DIM}--no-numbers${RESET} Disable numbered headings"
echo -e " ${DIM}--number-from N${RESET} Start numbering at heading level N (default: 2)"
echo -e " ${DIM}--open${RESET} Open PDF after generation"
echo -e " ${DIM}--preview${RESET} Render to /tmp and open (no permanent file)"
echo -e " ${DIM}--watch${RESET} Watch for changes and regenerate"
echo -e " ${DIM}--rebuild${RESET} Force rebuild the Docker image"
echo -e " ${DIM}--clean${RESET} Remove the Docker image"
echo -e " ${DIM}--update${RESET} Update pdfify to latest version from gist"
echo -e " ${DIM}--version${RESET} Show version"
echo ""
echo -e "${BOLD}Overrides${RESET} (CLI trumps frontmatter):"
echo -e " ${DIM}--title TEXT${RESET} ${DIM}--subtitle TEXT${RESET}"
echo -e " ${DIM}--author TEXT${RESET} ${DIM}--header TEXT${RESET}"
echo -e " ${DIM}--footer TEXT${RESET} ${DIM}--date TEXT${RESET}"
echo -e " ${DIM}--watermark TEXT${RESET}"
echo ""
echo -e "${BOLD}Frontmatter:${RESET}"
echo -e " title, subtitle, author, header, footer, toc-level, date,"
echo -e " numbersections (true/false), numberfrom (1-4), watermark,"
echo -e " pagebreak (true/false — page break before each top-level heading)"
exit 0 ;;
*) POSITIONAL+=("$arg") ;;
esac
done
if [[ ${#POSITIONAL[@]} -lt 1 ]]; then
echo -e "${BOLD}Usage:${RESET} pdfify ${CYAN}<file.md> [file2.md ...]${RESET} [options]"
echo -e " Run ${CYAN}pdfify --help${RESET} for all options"
exit 1
fi
if [[ -n "$OUT_FILE" && ${#POSITIONAL[@]} -gt 1 ]]; then
echo -e "${RED}Error:${RESET} --out cannot be used with multiple input files"
exit 1
fi
# --- Open helper ---
open_pdf() {
local pdf="$1"
if command -v open >/dev/null 2>&1; then
open "$pdf"
elif command -v xdg-open >/dev/null 2>&1; then
xdg-open "$pdf"
fi
}
header "pdfify v${VERSION}"
# --- Embedded Dockerfile ---
DOCKERFILE=$(cat <<'DOCKERFILE_END'
FROM node:20-slim
ENV DEBIAN_FRONTEND=noninteractive
RUN apt-get update -qq && \
apt-get install -y --no-install-recommends \
pandoc \
texlive-latex-recommended \
texlive-latex-extra \
texlive-fonts-recommended \
texlive-fonts-extra \
texlive-xetex \
lmodern \
librsvg2-bin \
chromium \
ca-certificates \
fonts-liberation \
fonts-roboto \
fonts-roboto-unhinted \
fonts-noto-color-emoji \
wget \
fontconfig \
&& rm -rf /var/lib/apt/lists/*
RUN mkdir -p /usr/share/fonts/truetype/roboto-mono && \
for style in Regular Bold Italic BoldItalic Medium MediumItalic Light LightItalic; do \
wget -q "https://github.com/googlefonts/RobotoMono/raw/main/fonts/ttf/RobotoMono-${style}.ttf" \
-O "/usr/share/fonts/truetype/roboto-mono/RobotoMono-${style}.ttf" 2>/dev/null || true; \
done && \
fc-cache -f
RUN npm install -g @mermaid-js/mermaid-cli
ENV PUPPETEER_SKIP_CHROMIUM_DOWNLOAD=true
ENV PUPPETEER_EXECUTABLE_PATH=/usr/bin/chromium
ENV CHROME_PATH=/usr/bin/chromium
RUN echo '{"maxTextSize": 90000, "flowchart": {"useMaxWidth": true}, "theme": "base", "themeVariables": {"primaryColor": "#3B82F6", "primaryBorderColor": "#1E40AF", "primaryTextColor": "#1E293B", "lineColor": "#475569", "xyChart": {"backgroundColor": "transparent", "plotColorPalette": "#2563EB,#DC2626,#16A34A,#D97706,#9333EA,#0891B2"}}}' > /opt/mermaid-config.json
RUN echo '{"args": ["--no-sandbox", "--disable-setuid-sandbox", "--disable-dev-shm-usage", "--disable-gpu"]}' > /opt/puppeteer-config.json
WORKDIR /work
ENTRYPOINT ["/bin/bash"]
DOCKERFILE_END
)
# --- Build Docker image ---
echo ""
if [[ $REBUILD -eq 1 ]]; then
info "Removing existing Docker image ${CYAN}${IMAGE_NAME}${RESET}..."
docker rmi "$IMAGE_NAME" >/dev/null 2>&1 || true
success "Image removed"
fi
if docker image inspect "$IMAGE_NAME" >/dev/null 2>&1; then
info "Docker image ${GREEN}${IMAGE_NAME}${RESET} found ${DIM}(cached)${RESET}"
success "Reusing existing image"
else
info "Building Docker image ${CYAN}${IMAGE_NAME}${RESET}..."
detail "Installing: pandoc, XeLaTeX, mermaid-cli, Chromium, fonts"
detail "This takes 2-3 minutes on first run (cached after)"
echo ""
BUILD_CTX=$(mktemp -d)
echo "$DOCKERFILE" | DOCKER_BUILDKIT=0 docker build -t "$IMAGE_NAME" -f - "$BUILD_CTX" 2>&1 | while IFS= read -r line; do
if [[ "$line" =~ ^Step\ ([0-9]+)/([0-9]+) ]]; then
echo -e " ${CYAN}[${BASH_REMATCH[1]}/${BASH_REMATCH[2]}]${RESET} ${DIM}${line#*: }${RESET}"
elif [[ "$line" == *"Successfully tagged"* ]]; then
echo -e " ${GREEN}${line}${RESET}"
elif [[ "$line" == *"ERROR"* || "$line" == *"error"* ]]; then
echo -e " ${RED}${line}${RESET}"
fi
done
rm -rf "$BUILD_CTX"
if ! docker image inspect "$IMAGE_NAME" >/dev/null 2>&1; then
echo -e "\n${RED}Docker build failed. Re-running with full output:${RESET}\n"
BUILD_CTX=$(mktemp -d)
echo "$DOCKERFILE" | DOCKER_BUILDKIT=0 docker build -t "$IMAGE_NAME" -f - "$BUILD_CTX"
rm -rf "$BUILD_CTX"
exit 1
fi
success "Docker image built"
fi
# === Per-file conversion ===
convert_file() {
local INPUT_PATH="$1"
local OUTPUT_OVERRIDE="$2"
# --- Resolve paths ---
local INPUT INPUT_DIR INPUT_FILE OUTPUT OUTPUT_DIR OUTPUT_FILE
INPUT="$(cd "$(dirname "$INPUT_PATH")" && pwd)/$(basename "$INPUT_PATH")"
if [[ ! -f "$INPUT" ]]; then
echo -e "${RED}Error:${RESET} $INPUT_PATH not found"
return 1
fi
INPUT_DIR="$(dirname "$INPUT")"
INPUT_FILE="$(basename "$INPUT")"
OUTPUT="${OUTPUT_OVERRIDE:-${INPUT%.md}.pdf}"
OUTPUT_DIR="$(cd "$(dirname "$OUTPUT")" 2>/dev/null && pwd || (mkdir -p "$(dirname "$OUTPUT")" && cd "$(dirname "$OUTPUT")" && pwd))"
OUTPUT="${OUTPUT_DIR}/$(basename "$OUTPUT")"
OUTPUT_FILE="$(basename "$OUTPUT")"
# Preview mode: write temp file in input dir (Docker-mountable), move to /tmp after
local PREVIEW_FINAL=""
if [[ $PREVIEW -eq 1 ]]; then
local base="${INPUT_FILE%.md}"
PREVIEW_FINAL="/tmp/pdfify-preview-${base}.pdf"
OUTPUT_FILE=".pdfify-preview-${base}.pdf"
OUTPUT="${OUTPUT_DIR}/${OUTPUT_FILE}"
fi
info "Input: ${CYAN}${INPUT}${RESET}"
if [[ -n "$PREVIEW_FINAL" ]]; then
info "Output: ${CYAN}${PREVIEW_FINAL}${RESET} ${DIM}(preview)${RESET}"
else
info "Output: ${CYAN}${OUTPUT}${RESET}"
fi
# --- Parse YAML frontmatter ---
local FM_TITLE="" FM_SUBTITLE="" FM_AUTHOR="" FM_FOOTER="" FM_HEADER=""
local FM_TOC_LEVEL="" FM_DATE="" FM_NUMBERSECTIONS="" FM_NUMBERFROM="" FM_WATERMARK=""
local FM_DATE_HASH="" FM_DATE_DIRTY="" FM_DATE_LABEL=""
if head -1 "$INPUT" | grep -q '^---'; then
local FM_BLOCK
FM_BLOCK=$(awk 'NR==1 && /^---/{found=1; next} found && /^---/{exit} found{print}' "$INPUT")
extract_fm() { echo "$FM_BLOCK" | sed -n "s/^$1:[[:space:]]*//p" | sed 's/^["'"'"']\(.*\)["'"'"']$/\1/'; }
FM_TITLE=$(extract_fm "title")
FM_AUTHOR=$(extract_fm "author")
FM_SUBTITLE=$(extract_fm "subtitle")
FM_FOOTER=$(extract_fm "footer")
FM_HEADER=$(extract_fm "header")
FM_TOC_LEVEL=$(extract_fm "toc-level")
FM_DATE=$(extract_fm "date")
FM_NUMBERSECTIONS=$(extract_fm "numbersections")
FM_NUMBERFROM=$(extract_fm "numberfrom")
FM_WATERMARK=$(extract_fm "watermark")
FM_PAGEBREAK=$(extract_fm "pagebreak")
fi
# --- CLI overrides frontmatter ---
[[ -n "$CLI_TITLE" ]] && FM_TITLE="$CLI_TITLE"
[[ -n "$CLI_SUBTITLE" ]] && FM_SUBTITLE="$CLI_SUBTITLE"
[[ -n "$CLI_AUTHOR" ]] && FM_AUTHOR="$CLI_AUTHOR"
[[ -n "$CLI_FOOTER" ]] && FM_FOOTER="$CLI_FOOTER"
[[ -n "$CLI_HEADER" ]] && FM_HEADER="$CLI_HEADER"
[[ -n "$CLI_DATE" ]] && FM_DATE="$CLI_DATE"
[[ -n "$CLI_WATERMARK" ]] && FM_WATERMARK="$CLI_WATERMARK"
[[ -n "$CLI_TOC_LEVEL" ]] && FM_TOC_LEVEL="$CLI_TOC_LEVEL"
[[ -n "$CLI_NUMBER_FROM" ]] && FM_NUMBERFROM="$CLI_NUMBER_FROM"
[[ -n "$CLI_NUMBERS" ]] && FM_NUMBERSECTIONS="$CLI_NUMBERS"
# --- Auto-detect document structure ---
# Count H1 headings (outside code blocks)
local H1_COUNT=0 IN_CODE_SCAN=0 FIRST_H1_TEXT=""
while IFS= read -r scanline || [[ -n "$scanline" ]]; do
[[ "$scanline" =~ ^\`\`\` ]] && { if [[ $IN_CODE_SCAN -eq 0 ]]; then IN_CODE_SCAN=1; else IN_CODE_SCAN=0; fi; continue; }
if [[ $IN_CODE_SCAN -eq 0 && "$scanline" =~ ^#\ ]]; then
H1_COUNT=$((H1_COUNT + 1))
[[ $H1_COUNT -eq 1 ]] && FIRST_H1_TEXT="${scanline#\# }"
fi
done < "$INPUT"
local FILE_TOC_LEVEL="${FM_TOC_LEVEL:-3}"
local FILE_NUMBERS=1
[[ "$FM_NUMBERSECTIONS" == "false" ]] && FILE_NUMBERS=0
local FILE_PAGEBREAK=1
[[ "$FM_PAGEBREAK" == "false" ]] && FILE_PAGEBREAK=0
# Auto-determine numberfrom based on structure (if not explicitly set)
local FILE_NUMBER_FROM="${FM_NUMBERFROM:-}"
local HIDE_FIRST_H1=0
if [[ -z "$FILE_NUMBER_FROM" ]]; then
if [[ $H1_COUNT -eq 1 ]]; then
# Single H1 = document title; number from H2, hide H1 in body
FILE_NUMBER_FROM=2
HIDE_FIRST_H1=1
# Use H1 text as title if no title set
[[ -z "$FM_TITLE" ]] && FM_TITLE="$FIRST_H1_TEXT"
detail "Auto: ${DIM}single H1 detected → using as title, numbering from H2${RESET}"
else
# Multiple H1s = sections; number from H1
FILE_NUMBER_FROM=1
detail "Auto: ${DIM}${H1_COUNT} H1s detected → numbering from H1${RESET}"
fi
fi
# Default date: current date/time
# Set to "none" in frontmatter or --date to suppress
FM_DATE_HASH="${FM_DATE_HASH:-}"
FM_DATE_DIRTY="${FM_DATE_DIRTY:-}"
if [[ "$FM_DATE" == "none" || "$FM_DATE" == "false" ]]; then
FM_DATE=""
elif [[ -z "$FM_DATE" && -z "$CLI_DATE" ]]; then
FM_DATE="$(date +"%Y-%m-%d %H:%M")"
fi
echo ""
[[ -n "$FM_TITLE" ]] && detail "Title: ${CYAN}${FM_TITLE}${RESET}"
[[ -n "$FM_SUBTITLE" ]] && detail "Subtitle: ${CYAN}${FM_SUBTITLE}${RESET}"
[[ -n "$FM_AUTHOR" ]] && detail "Author: ${CYAN}${FM_AUTHOR}${RESET}"
[[ -n "$FM_HEADER" ]] && detail "Header: ${CYAN}${FM_HEADER}${RESET}"
[[ -n "$FM_FOOTER" ]] && detail "Footer: ${CYAN}${FM_FOOTER}${RESET}"
detail "Date: ${CYAN}${FM_DATE}${RESET}"
detail "TOC: ${CYAN}level ${FILE_TOC_LEVEL}${RESET}"
detail "Numbered: ${CYAN}$([ $FILE_NUMBERS -eq 1 ] && echo "yes (from H${FILE_NUMBER_FROM})" || echo no)${RESET}"
[[ -n "$FM_WATERMARK" ]] && detail "Watermark: ${CYAN}${FM_WATERMARK}${RESET}"
# --- Git hash for source file ---
local GIT_STAMP=""
# --- Discover images referenced in the markdown ---
echo ""
info "Scanning ${CYAN}${INPUT_FILE}${RESET} for assets..."
IMAGES=()
while IFS= read -r img; do
[[ -z "$img" ]] && continue
[[ "$img" =~ ^https?:// ]] && continue
if [[ -f "$INPUT_DIR/$img" ]]; then
IMAGES+=("$img")
success "Image: ${CYAN}${img}${RESET} ${DIM}($(du -h "$INPUT_DIR/$img" | cut -f1 | tr -d ' '))${RESET}"
else
warn "Image: ${YELLOW}${img}${RESET} ${RED}(not found)${RESET}"
fi
done < <(sed -n 's/.*!\[[^]]*\](\([^)]*\)).*/\1/p' "$INPUT"; sed -n 's/.*src="\([^"]*\)".*/\1/p' "$INPUT")
MERMAID_COUNT=$(grep -c '```mermaid' "$INPUT" || true)
if [[ $MERMAID_COUNT -gt 0 ]]; then
success "Mermaid diagrams: ${CYAN}${MERMAID_COUNT}${RESET}"
fi
CALLOUT_COUNT=$(grep -c '> \[!' "$INPUT" || true)
if [[ $CALLOUT_COUNT -gt 0 ]]; then
success "Callouts: ${CYAN}${CALLOUT_COUNT}${RESET}"
fi
TABLE_COUNT=$(grep -c '^|' "$INPUT" || true)
CODE_COUNT=$(grep -c '```' "$INPUT" || true)
CODE_COUNT=$(( (CODE_COUNT - MERMAID_COUNT * 2) / 2 ))
[[ $TABLE_COUNT -gt 0 ]] && detail "Tables: ${TABLE_COUNT} rows"
[[ $CODE_COUNT -gt 0 ]] && detail "Code blocks: ~${CODE_COUNT}"
echo ""
info "Found ${GREEN}${#IMAGES[@]}${RESET} image(s), ${GREEN}${MERMAID_COUNT}${RESET} mermaid diagram(s), ${GREEN}${CALLOUT_COUNT}${RESET} callout(s)"
# --- Write the conversion script to a temp file (mounted into Docker) ---
CONVERT_SCRIPT="${INPUT_DIR}/.pdfify-convert-$$.sh"
trap 'rm -f "$CONVERT_SCRIPT"' EXIT
cat > "$CONVERT_SCRIPT" <<'INNER_SCRIPT'
#!/bin/bash
set -euo pipefail
RED='\033[0;31m'
GREEN='\033[0;32m'
YELLOW='\033[0;33m'
BLUE='\033[0;34m'
CYAN='\033[0;36m'
BOLD='\033[1m'
DIM='\033[2m'
RESET='\033[0m'
info() { echo -e "${BLUE}::${RESET} ${BOLD}$*${RESET}"; }
success() { echo -e "${GREEN}✓${RESET} $*"; }
detail() { echo -e " ${DIM}→${RESET} $*"; }
INPUT_FILE="$1"
OUTPUT_FILE="$2"
WORKDIR="/work"
cd "$WORKDIR"
# --- Step 0: Strip first H1 if it's being used as document title ---
HIDE_FIRST_H1="${HIDE_FIRST_H1:-0}"
EFFECTIVE_INPUT="$INPUT_FILE"
if [[ "$HIDE_FIRST_H1" == "1" ]]; then
STRIPPED=$(mktemp /tmp/pdfify-stripped-XXXXXX.md)
FOUND_H1=0
IN_CODE_BLK=0
IN_FMATTER=0
while IFS= read -r line || [[ -n "$line" ]]; do
[[ "$line" =~ ^\`\`\` ]] && { if [[ $IN_CODE_BLK -eq 0 ]]; then IN_CODE_BLK=1; else IN_CODE_BLK=0; fi; }
if [[ "$line" == "---" && $IN_CODE_BLK -eq 0 ]]; then
if [[ $IN_FMATTER -eq 0 && $FOUND_H1 -eq 0 ]]; then IN_FMATTER=1; else IN_FMATTER=0; fi
fi
# Skip the first H1 (and any blank line immediately after)
if [[ $FOUND_H1 -eq 0 && $IN_CODE_BLK -eq 0 && $IN_FMATTER -eq 0 && "$line" =~ ^#\ ]]; then
FOUND_H1=1
continue
fi
# Skip blank line right after removed H1
if [[ $FOUND_H1 -eq 1 && -z "$line" ]]; then
FOUND_H1=2
continue
fi
[[ $FOUND_H1 -eq 1 ]] && FOUND_H1=2
echo "$line" >> "$STRIPPED"
done < "$INPUT_FILE"
EFFECTIVE_INPUT="$(basename "$STRIPPED")"
detail "Stripped first H1 (promoted to title)"
fi
# --- Step 1: Pre-process Obsidian callouts ---
info "Pre-processing callouts..."
CALLOUT_MD=$(mktemp /tmp/pdfify-callout-XXXXXX.md)
IN_CALLOUT=0
CALLOUT_TYPE=""
CALLOUT_TITLE=""
CALLOUT_BUF=""
CALLOUT_COUNT=0
flush_callout() {
if [[ $IN_CALLOUT -eq 1 && -n "$CALLOUT_TYPE" ]]; then
CALLOUT_COUNT=$((CALLOUT_COUNT + 1))
local latex_type
case "${CALLOUT_TYPE,,}" in
info|note) latex_type="calloutinfo" ;;
tip|hint) latex_type="callouttip" ;;
warning|caution) latex_type="calloutwarning" ;;
danger|error|bug) latex_type="calloutdanger" ;;
example) latex_type="calloutexample" ;;
quote|cite) latex_type="calloutquote" ;;
*) latex_type="calloutinfo" ;;
esac
echo "" >> "$CALLOUT_MD"
echo '```{=latex}' >> "$CALLOUT_MD"
echo "\\begin{${latex_type}}{${CALLOUT_TITLE}}" >> "$CALLOUT_MD"
echo '```' >> "$CALLOUT_MD"
echo "" >> "$CALLOUT_MD"
echo "$CALLOUT_BUF" >> "$CALLOUT_MD"
echo "" >> "$CALLOUT_MD"
echo '```{=latex}' >> "$CALLOUT_MD"
echo "\\end{${latex_type}}" >> "$CALLOUT_MD"
echo '```' >> "$CALLOUT_MD"
echo "" >> "$CALLOUT_MD"
fi
IN_CALLOUT=0
CALLOUT_TYPE=""
CALLOUT_TITLE=""
CALLOUT_BUF=""
}
while IFS= read -r line || [[ -n "$line" ]]; do
if [[ "$line" =~ ^\>\ *\[!([a-zA-Z]+)\]\ *(.*) ]]; then
flush_callout
IN_CALLOUT=1
CALLOUT_TYPE="${BASH_REMATCH[1]}"
CALLOUT_TITLE="${BASH_REMATCH[2]:-${BASH_REMATCH[1]^}}"
continue
fi
if [[ $IN_CALLOUT -eq 1 ]]; then
if [[ "$line" =~ ^\>\ ?(.*) ]]; then
CALLOUT_BUF="${CALLOUT_BUF}${BASH_REMATCH[1]}
"
continue
else
flush_callout
fi
fi
echo "$line" >> "$CALLOUT_MD"
done < "${STRIPPED:-$INPUT_FILE}"
flush_callout
if [[ $CALLOUT_COUNT -gt 0 ]]; then
success "Converted $CALLOUT_COUNT callout(s)"
fi
# --- Step 1b+1c: Inject page breaks (after TOC, before each H1) ---
BREAK_INJECTED=$(mktemp /tmp/pdfify-breaks-XXXXXX.md)
H1_COUNT=0
IN_FM=0
IN_CODE=0
DONE_TOC_BREAK=0
while IFS= read -r line || [[ -n "$line" ]]; do
# Track code blocks (``` opens/closes)
if [[ "$line" =~ ^\`\`\` ]]; then
if [[ $IN_CODE -eq 0 ]]; then IN_CODE=1; else IN_CODE=0; fi
echo "$line" >> "$BREAK_INJECTED"
continue
fi
# Track frontmatter (only at start of file)
if [[ "$line" == "---" && $IN_CODE -eq 0 ]]; then
if [[ $IN_FM -eq 0 && $H1_COUNT -eq 0 ]]; then IN_FM=1; else IN_FM=0; fi
echo "$line" >> "$BREAK_INJECTED"
continue
fi
if [[ $IN_CODE -eq 0 && $IN_FM -eq 0 ]]; then
# Before first content after frontmatter: inject TOC page break
if [[ $DONE_TOC_BREAK -eq 0 && "$TOC_LEVEL" -gt 0 && -n "$line" ]]; then
echo "" >> "$BREAK_INJECTED"
echo '```{=latex}' >> "$BREAK_INJECTED"
echo '\newpage' >> "$BREAK_INJECTED"
echo '```' >> "$BREAK_INJECTED"
echo "" >> "$BREAK_INJECTED"
DONE_TOC_BREAK=1
fi
# Page break before each top-level section (except first)
# Build the marker: numberfrom=1 → "# ", numberfrom=2 → "## "
BREAK_HASHES=$(printf '#%.0s' $(seq 1 "$FILE_NUMBER_FROM"))
if [[ "$line" == "${BREAK_HASHES} "* ]]; then
# Make sure it's exactly that level, not deeper
NEXT_CHAR="${line:${#BREAK_HASHES}:1}"
if [[ "$NEXT_CHAR" != "#" ]]; then
H1_COUNT=$((H1_COUNT + 1))
if [[ $H1_COUNT -gt 1 && $FILE_PAGEBREAK -eq 1 ]]; then
echo "" >> "$BREAK_INJECTED"
echo '```{=latex}' >> "$BREAK_INJECTED"
echo '\newpage' >> "$BREAK_INJECTED"
echo '```' >> "$BREAK_INJECTED"
echo "" >> "$BREAK_INJECTED"
fi
fi
fi
fi
echo "$line" >> "$BREAK_INJECTED"
done < "$CALLOUT_MD"
rm -f "$CALLOUT_MD"
CALLOUT_MD="$BREAK_INJECTED"
# --- Step 2: Pre-render Mermaid blocks to PNG ---
info "Pre-rendering Mermaid diagrams..."
TEMP_MD=$(mktemp /tmp/pdfify-XXXXXX.md)
MERMAID_COUNT=0
IN_MERMAID=0
MERMAID_BUF=""
while IFS= read -r line || [[ -n "$line" ]]; do
if [[ "$line" =~ ^\`\`\`mermaid ]]; then
IN_MERMAID=1
MERMAID_BUF=""
continue
fi
if [[ $IN_MERMAID -eq 1 ]]; then
if [[ "$line" =~ ^\`\`\` ]]; then
IN_MERMAID=0
MERMAID_COUNT=$((MERMAID_COUNT + 1))
MERMAID_FILE="/tmp/mermaid-${MERMAID_COUNT}.mmd"
MERMAID_PNG="/tmp/mermaid-${MERMAID_COUNT}.png"
echo "$MERMAID_BUF" > "$MERMAID_FILE"
detail "Rendering diagram ${CYAN}#${MERMAID_COUNT}${RESET}..."
mmdc -i "$MERMAID_FILE" \
-o "$MERMAID_PNG" \
-w 1600 \
-b transparent \
-c /opt/mermaid-config.json \
-p /opt/puppeteer-config.json \
2>/dev/null || {
echo -e " ${YELLOW}⚠${RESET} Diagram $MERMAID_COUNT failed — inserting as code block"
echo '```' >> "$TEMP_MD"
echo "$MERMAID_BUF" >> "$TEMP_MD"
echo '```' >> "$TEMP_MD"
continue
}
SIZE=$(du -h "$MERMAID_PNG" 2>/dev/null | cut -f1 | tr -d ' ')
success "Diagram #${MERMAID_COUNT} rendered ${DIM}(${SIZE})${RESET}"
echo "" >> "$TEMP_MD"
echo "![Diagram ${MERMAID_COUNT}](${MERMAID_PNG})\\" >> "$TEMP_MD"
echo "" >> "$TEMP_MD"
else
MERMAID_BUF="${MERMAID_BUF}${line}
"
fi
else
echo "$line" >> "$TEMP_MD"
fi
done < "$CALLOUT_MD"
# --- Strip YAML frontmatter so pandoc doesn't generate its own title ---
# pdfify already parses frontmatter above; letting pandoc see it causes a
# duplicate title (pandoc's \maketitle + pdfify's custom title banner).
if head -1 "$TEMP_MD" | grep -q '^---'; then
STRIPPED_FM=$(mktemp /tmp/pdfify-nofm-XXXXXX.md)
awk 'NR==1 && /^---/{skip=1; next} skip && /^---/{skip=0; next} !skip' "$TEMP_MD" > "$STRIPPED_FM"
mv "$STRIPPED_FM" "$TEMP_MD"
fi
# --- Lua filter: protect brackets in headings for titlesec ---
# Square brackets in headings break titlesec (\SQSPL@scan error) because LaTeX
# interprets [ as the start of an optional argument.
BRACKET_FILTER=$(mktemp /tmp/pdfify-bracket-filter-XXXXXX.lua)
cat > "$BRACKET_FILTER" <<'LUAFILTER'
-- Protect square brackets in headings to prevent titlesec \SQSPL@scan errors.
-- Brackets in headings make titlesec think they are optional arguments.
-- We replace [ and ] with \lbrack/\rbrack in all inline types.
function Header(el)
if FORMAT ~= "latex" and FORMAT ~= "pdf" then return nil end
el = el:walk {
Str = function(s)
if s.text:find("[%[%]]") then
local t = s.text:gsub("%[", "\\lbrack{}"):gsub("%]", "\\rbrack{}")
return pandoc.RawInline("latex", t)
end
end,
Code = function(c)
-- All code in headings must use \oldtexttt to bypass seqsplit
-- (seqsplit in titlesec moving arguments causes \SQSPL@scan errors)
local t = c.text
t = t:gsub("\\", "\\textbackslash ")
t = t:gsub("%%", "\\%%")
t = t:gsub("%#", "\\#")
t = t:gsub("%$", "\\$")
t = t:gsub("%&", "\\&")
t = t:gsub("_", "\\_")
t = t:gsub("%{", "\\{")
t = t:gsub("%}", "\\}")
t = t:gsub("~", "\\textasciitilde{}")
t = t:gsub("%^", "\\textasciicircum{}")
t = t:gsub("%[", "\\lbrack{}"):gsub("%]", "\\rbrack{}")
return pandoc.RawInline("latex", "\\oldtexttt{" .. t .. "}")
end
}
return el
end
LUAFILTER
echo ""
info "Generating PDF with Pandoc + XeLaTeX..."
detail "Engine: xelatex"
detail "Font: Roboto / Roboto Mono"
detail "Margins: 0.5in, Font size: 10pt"
echo ""
# Write LaTeX preamble for modern styling
PREAMBLE=$(mktemp /tmp/pdfify-preamble-XXXXXX.tex)
cat > "$PREAMBLE" <<'LATEX'
% --- Modern color scheme ---
\usepackage{xcolor}
\definecolor{accent}{HTML}{374151}
\definecolor{accentdark}{HTML}{111827}
\definecolor{codebg}{HTML}{F8F9FA}
\definecolor{codeborder}{HTML}{E2E8F0}
\definecolor{headrulecolor}{HTML}{E2E8F0}
% --- Callout colors ---
\definecolor{infobg}{HTML}{EFF6FF}
\definecolor{infobar}{HTML}{3B82F6}
\definecolor{infofg}{HTML}{1E40AF}
\definecolor{tipbg}{HTML}{F0FDF4}
\definecolor{tipbar}{HTML}{22C55E}
\definecolor{tipfg}{HTML}{166534}
\definecolor{warningbg}{HTML}{FFFBEB}
\definecolor{warningbar}{HTML}{F59E0B}
\definecolor{warningfg}{HTML}{92400E}
\definecolor{dangerbg}{HTML}{FEF2F2}
\definecolor{dangerbar}{HTML}{EF4444}
\definecolor{dangerfg}{HTML}{991B1B}
\definecolor{examplebg}{HTML}{F5F3FF}
\definecolor{examplebar}{HTML}{8B5CF6}
\definecolor{examplefg}{HTML}{5B21B6}
\definecolor{quotecallbg}{HTML}{F8F9FA}
\definecolor{quotecallbar}{HTML}{6B7280}
\definecolor{quotecallfg}{HTML}{374151}
% --- Code block wrapping and styling ---
\usepackage{fvextra}
\DefineVerbatimEnvironment{Highlighting}{Verbatim}{
breaklines,
breakanywhere,
commandchars=\\\{\},
fontsize=\small
}
% Background on code blocks via mdframed
\usepackage[framemethod=tikz]{mdframed}
% Override pandoc's Shaded environment (define first if pandoc didn't)
\makeatletter
\@ifundefined{Shaded}{\newenvironment{Shaded}{}{}}{}
\makeatother
\renewenvironment{Shaded}{%
\begin{mdframed}[
backgroundcolor=codebg,
hidealllines=true,
roundcorner=4pt,
innertopmargin=8pt,
innerbottommargin=8pt,
innerleftmargin=10pt,
innerrightmargin=10pt,
skipabove=10pt,
skipbelow=10pt
]
}{%
\end{mdframed}
}
% --- Callout environments ---
\newenvironment{calloutbase}[3]{%
\begin{mdframed}[
backgroundcolor=#1,
linecolor=#2,
linewidth=3pt,
topline=false,
bottomline=false,
rightline=false,
innertopmargin=12pt,
innerbottommargin=12pt,
innerleftmargin=12pt,
innerrightmargin=12pt,
skipabove=12pt,
skipbelow=12pt,
roundcorner=0pt
]
\textbf{\color{#2}#3}\par\smallskip\setlength{\parindent}{0pt}
}{%
\end{mdframed}
}
\newenvironment{calloutinfo}[1]{\begin{calloutbase}{infobg}{infobar}{#1}}{\end{calloutbase}}
\newenvironment{callouttip}[1]{\begin{calloutbase}{tipbg}{tipbar}{#1}}{\end{calloutbase}}
\newenvironment{calloutwarning}[1]{\begin{calloutbase}{warningbg}{warningbar}{#1}}{\end{calloutbase}}
\newenvironment{calloutdanger}[1]{\begin{calloutbase}{dangerbg}{dangerbar}{#1}}{\end{calloutbase}}
\newenvironment{calloutexample}[1]{\begin{calloutbase}{examplebg}{examplebar}{#1}}{\end{calloutbase}}
\newenvironment{calloutquote}[1]{\begin{calloutbase}{quotecallbg}{quotecallbar}{#1}}{\end{calloutbase}}
% --- PDF bookmarks (sidebar navigation in PDF viewers) ---
\usepackage{bookmark}
\bookmarksetup{
numbered=false,
open,
openlevel=2
}
% --- Title banner ---
\definecolor{titlebg}{HTML}{E5E7EB}
% --- Page break after TOC ---
\let\oldtableofcontents\tableofcontents
\renewcommand{\tableofcontents}{\oldtableofcontents\clearpage}
% --- TOC styling ---
\usepackage{tocloft}
\setlength{\cftbeforetoctitleskip}{0.5em}
\renewcommand{\cfttoctitlefont}{\LARGE\bfseries\color{accentdark}\scshape}
\renewcommand{\cftaftertoctitle}{\par\vspace{2pt}{\color{headrulecolor}\hrule height 1pt}\vspace{10pt}}
\renewcommand{\cftsecfont}{\bfseries\color{accentdark}}
\renewcommand{\cftsecpagefont}{\bfseries\color{accentdark}}
\renewcommand{\cftsubsecfont}{\color{accent}}
\renewcommand{\cftsubsecpagefont}{\color{accent}}
\renewcommand{\cftsubsubsecfont}{\small\color{accent}}
\renewcommand{\cftsubsubsecpagefont}{\small\color{accent}}
\renewcommand{\cftsecleader}{\cftdotfill{\cftsecdotsep}}
\renewcommand{\cftsecdotsep}{\cftdotsep}
\setlength{\cftbeforesecskip}{6pt}
\setlength{\cftbeforesubsecskip}{2pt}
% --- Heading font ---
\newfontfamily\headingfont{Roboto}[BoldFont={Roboto Bold}]
% --- Symbol fallback (arrows, etc.) ---
\usepackage{newunicodechar}
\newfontfamily\fallbackfont{Liberation Sans}[Scale=MatchLowercase]
\newunicodechar{→}{{\fallbackfont →}}
\newunicodechar{←}{{\fallbackfont ←}}
\newunicodechar{↔}{{\fallbackfont ↔}}
\newunicodechar{⇒}{{\fallbackfont ⇒}}
\newunicodechar{⇐}{{\fallbackfont ⇐}}
\newunicodechar{✓}{{\fallbackfont ✓}}
\newunicodechar{✗}{{\fallbackfont ✗}}
% --- Modern section headings (tight, bold, dark) ---
\usepackage{titlesec}
% H1: # headings — large, small caps, dark, with rule
\titleformat{\section}
{\LARGE\headingfont\bfseries\color{accentdark}\addfontfeatures{LetterSpace=5}\scshape}
{\thesection}{0.5em}{}[\vspace{2pt}{\color{headrulecolor}\titlerule[1pt]}]
\titlespacing*{\section}{0pt}{20pt}{10pt}
% H2: ## headings
\titleformat{\subsection}
{\Large\headingfont\bfseries\color{accentdark}\addfontfeatures{LetterSpace=-1}}
{\thesubsection}{0.5em}{}
\titlespacing*{\subsection}{0pt}{16pt}{8pt}
% H3: ### headings
\titleformat{\subsubsection}
{\large\bfseries\color{accent}}
{\thesubsubsection}{0.5em}{}
\titlespacing*{\subsubsection}{0pt}{12pt}{6pt}
% H4: #### headings
\titleformat{\paragraph}[hang]
{\normalsize\bfseries\color{accent}}
{\theparagraph}{0.5em}{}
\titlespacing*{\paragraph}{0pt}{10pt}{4pt}
%%SECNUMDEPTH_PLACEHOLDER%%
% --- Page style (header/footer injected by pdfify) ---
\usepackage{fancyhdr}
\pagestyle{fancy}
\fancyhf{}
\renewcommand{\headrulewidth}{0pt}
\renewcommand{\footrulewidth}{0pt}
\setlength{\headheight}{14pt}
%%HEADER_PLACEHOLDER%%
%%FOOTER_PLACEHOLDER%%
% Make plain style identical to fancy (so title/TOC pages get the same footer)
\fancypagestyle{plain}{\fancyhf{}\renewcommand{\headrulewidth}{0pt}\renewcommand{\footrulewidth}{0pt}%%FOOTER_PLAIN%%}
% --- Blockquote styling (plain > quotes, not callouts) ---
\usepackage{etoolbox}
\renewenvironment{quote}{%
\begin{mdframed}[
backgroundcolor=infobg,
linecolor=infobar,
linewidth=3pt,
topline=false,
bottomline=false,
rightline=false,
innertopmargin=12pt,
innerbottommargin=12pt,
innerleftmargin=12pt,
innerrightmargin=12pt,
skipabove=10pt,
skipbelow=10pt,
roundcorner=0pt
]%
}{%
\end{mdframed}%
}
% --- Table styling ---
\usepackage{booktabs}
\usepackage{colortbl}
\usepackage{longtable}
\usepackage{tabularx}
\arrayrulecolor{codeborder}
% Alternating row shading
\definecolor{tablerowgray}{HTML}{F3F4F6}
\let\oldlongtable\longtable
\let\endoldlongtable\endlongtable
\renewenvironment{longtable}{\rowcolors{2}{white}{tablerowgray}\oldlongtable}{\endoldlongtable}
% Allow line breaks in table cells and shrink monospace to fit
\usepackage{array}
\renewcommand{\arraystretch}{1.4}
\let\oldtexttt\texttt
\renewcommand{\texttt}[1]{{\small\oldtexttt{\seqsplit{#1}}}}
\usepackage{seqsplit}
\setlength{\tabcolsep}{4pt}
% --- Images constrained to page ---
\usepackage{grffile}
\usepackage[export]{adjustbox}
\let\oldincludegraphics\includegraphics
\renewcommand{\includegraphics}[2][]{%
\oldincludegraphics[max width=\textwidth,max height=0.45\textheight,keepaspectratio,#1]{#2}%
}
% --- Figures don't float ---
\usepackage{float}
\floatplacement{figure}{H}
% --- Caption styling ---
\usepackage{caption}
\captionsetup{labelformat=empty,font={small,color=gray},skip=4pt}
% --- Tighter lists ---
\usepackage{enumitem}
\setlist{nosep,leftmargin=1.5em}
% --- Links ---
\usepackage{hyperref}
\hypersetup{
colorlinks=true,
linkcolor=accent,
urlcolor=accent,
citecolor=accent
}
% --- Horizontal rules ---
\renewcommand{\rule}[2]{\textcolor{headrulecolor}{\vrule width \textwidth height 0.5pt}}
LATEX
TOC_LEVEL="${TOC_LEVEL:-3}"
FM_FOOTER="${FM_FOOTER:-}"
FM_HEADER="${FM_HEADER:-}"
FM_AUTHOR="${FM_AUTHOR:-}"
FM_DATE="${FM_DATE:-}"
FM_DATE_LABEL="${FM_DATE_LABEL:-}"
FM_DATE_HASH="${FM_DATE_HASH:-}"
FM_DATE_DIRTY="${FM_DATE_DIRTY:-}"
FILE_NUMBERS="${FILE_NUMBERS:-1}"
FILE_NUMBER_FROM="${FILE_NUMBER_FROM:-2}"
FILE_PAGEBREAK="${FILE_PAGEBREAK:-1}"
# Escape LaTeX special characters in text fields (uses sed to avoid
# bash parameter substitution brace-parsing issues with } in replacements)
latex_escape() {
printf '%s' "$1" | sed \
-e 's/\\/@@BSLASH@@/g' \
-e 's/&/\\&/g' \
-e 's/%/\\%/g' \
-e 's/\$/\\$/g' \
-e 's/#/\\#/g' \
-e 's/_/\\_/g' \
-e 's/{/\\{/g' \
-e 's/}/\\}/g' \
-e 's/~/\\textasciitilde{}/g' \
-e 's/\^/\\textasciicircum{}/g' \
-e 's/@@BSLASH@@/\\textbackslash{}/g'
}
# Inject title banner into preamble
FM_TITLE="${FM_TITLE:-}"
FM_TITLE_TEX="$(latex_escape "$FM_TITLE")"
FM_SUBTITLE_TEX="$(latex_escape "${FM_SUBTITLE:-}")"
FM_AUTHOR_TEX="$(latex_escape "${FM_AUTHOR:-}")"
{
if [[ -n "$FM_TITLE" ]]; then
cat <<'TITLE_STATIC'
\makeatletter
\renewcommand{\maketitle}{%
\thispagestyle{fancy}%
\vspace*{-\topskip}%
\vspace*{-\headsep}%
\vspace*{-\headheight}%
\vspace*{-0.55in}%
\noindent\hspace*{-0.5in}%
\fcolorbox{titlebg}{titlebg}{%
\parbox{\dimexpr\paperwidth-2\fboxsep-2\fboxrule}{%
\hspace*{0.3in}\begin{minipage}{\dimexpr\textwidth}%
\vspace{20pt}%
TITLE_STATIC
echo " {\\fontsize{28}{34}\\selectfont\\bfseries\\color{black}${FM_TITLE_TEX}}\\\\[6pt]%"
FM_SUBTITLE="${FM_SUBTITLE:-}"
if [[ -n "$FM_SUBTITLE" ]]; then
echo " {\\fontsize{14}{18}\\selectfont\\color{black}${FM_SUBTITLE_TEX}}\\\\[8pt]%"
fi
if [[ -n "$FM_AUTHOR" ]]; then
echo " {\\fontsize{11}{14}\\selectfont\\color{black}${FM_AUTHOR_TEX}}\\\\[6pt]%"
fi
if [[ -n "$FM_DATE" ]]; then
DATE_VAL=""
if [[ -n "$FM_DATE_HASH" ]]; then
DATE_VAL="${FM_DATE% · *} · {\\texttt{${FM_DATE_HASH}}}"
else
DATE_VAL="${FM_DATE}"
fi
DIRTY_PART=""
if [[ -n "${FM_DATE_DIRTY:-}" ]]; then
DIRTY_PART=" {\\color{gray}\\itshape (dirty)}"
fi
if [[ -n "$FM_DATE_LABEL" ]]; then
echo " {\\fontsize{10}{12}\\selectfont\\color{black}${DATE_VAL} {\\color{gray}--- ${FM_DATE_LABEL}}${DIRTY_PART}}\\\\[4pt]%"
else
echo " {\\fontsize{10}{12}\\selectfont\\color{black}${DATE_VAL}${DIRTY_PART}}\\\\[4pt]%"
fi
fi
cat <<'TITLE_END'
\vspace{6pt}%
\end{minipage}%
}%
}%
\par\vspace{20pt}%
}
\makeatother
TITLE_END
echo '\AtBeginDocument{\maketitle}'
else
echo '\renewcommand{\maketitle}{}'
fi
} >> "$PREAMBLE"
# Inject header/footer into preamble
GIT_STAMP="${GIT_STAMP:-}"
FOOTER_L=""
FOOTER_C=""
FOOTER_R="\\\\fancyfoot[R]{\\\\color{gray}\\\\small Page \\\\thepage\\\\ of \\\\pageref*{LastPage}}"
[[ -n "$FM_FOOTER" ]] && FOOTER_L="\\\\fancyfoot[L]{\\\\color{gray}\\\\small ${FM_FOOTER}}"
sed -i "s|%%FOOTER_PLACEHOLDER%%|\\\\usepackage{lastpage}${FOOTER_L}${FOOTER_C}${FOOTER_R}|" "$PREAMBLE"
sed -i "s|%%FOOTER_PLAIN%%|${FOOTER_L}${FOOTER_C}${FOOTER_R}|" "$PREAMBLE"
if [[ -n "$FM_HEADER" ]]; then
sed -i "s|%%HEADER_PLACEHOLDER%%|\\\\fancyhead[C]{\\\\color{gray}\\\\small ${FM_HEADER}}|" "$PREAMBLE"
else
sed -i "s|%%HEADER_PLACEHOLDER%%||" "$PREAMBLE"
fi
# Inject watermark if set
FM_WATERMARK="${FM_WATERMARK:-}"
if [[ -n "$FM_WATERMARK" ]]; then
cat >> "$PREAMBLE" <<WATERMARK
\\usepackage{eso-pic}
\\usepackage{tikz}
\\AddToShipoutPictureFG{%
\\begin{tikzpicture}[remember picture,overlay]
\\node[rotate=45,opacity=0.12,scale=10,text=red] at (current page.center) {\\textsf{\\textbf{\\MakeUppercase{${FM_WATERMARK}}}}};
\\end{tikzpicture}%
}
WATERMARK
detail "Watermark: ${CYAN}${FM_WATERMARK}${RESET}"
fi
# Build TOC flags
# When numbering is on, headings shift by -1, so TOC depth needs +1 to compensate
TOC_FLAGS=()
if [[ "$TOC_LEVEL" -gt 0 ]]; then
TOC_FLAGS+=(--toc --toc-depth="$TOC_LEVEL")
detail "TOC depth: ${CYAN}${TOC_LEVEL}${RESET}"
else
detail "TOC: ${DIM}disabled${RESET}"
fi
AUTHOR_FLAGS=()
if [[ -n "$FM_AUTHOR" ]]; then
AUTHOR_FLAGS+=(-M "author=$FM_AUTHOR")
fi
# Numbered sections
NUMBER_FLAGS=()
if [[ "$FILE_NUMBERS" == "1" ]]; then
NUMBER_FLAGS+=(--number-sections)
# numberfrom controls which heading level starts getting numbers
# pandoc: section=1, subsection=2, subsubsection=3
cat >> "$PREAMBLE" <<SECNUM
\\setcounter{secnumdepth}{4}
SECNUM
if [[ "$FILE_NUMBER_FROM" -ge 2 ]]; then
# H1 (\section) unnumbered, H2 numbered as 1, 2, 3
cat >> "$PREAMBLE" <<'SECNUM2'
\makeatletter
\renewcommand{\thesection}{}
\renewcommand{\thesubsection}{\arabic{subsection}}
\renewcommand{\thesubsubsection}{\thesubsection.\arabic{subsubsection}}
% Remove section number from titleformat without changing style
\titleformat{\section}
{\LARGE\headingfont\bfseries\color{accentdark}\addfontfeatures{LetterSpace=5}\scshape}
{}{0em}{}[\vspace{2pt}{\color{headrulecolor}\titlerule[1pt]}]
\makeatother
SECNUM2
fi
if [[ "$FILE_NUMBER_FROM" -ge 3 ]]; then
cat >> "$PREAMBLE" <<'SECNUM3'
\renewcommand{\thesubsection}{}
\renewcommand{\thesubsubsection}{\arabic{subsubsection}}
\titleformat{\subsection}
{\Large\headingfont\bfseries\color{accentdark}\addfontfeatures{LetterSpace=-1}}
{}{0em}{}
SECNUM3
fi
fi
# Remove placeholder
sed -i 's|%%SECNUMDEPTH_PLACEHOLDER%%||' "$PREAMBLE"
pandoc "$TEMP_MD" \
-o "$OUTPUT_FILE" \
--pdf-engine=xelatex \
--lua-filter="$BRACKET_FILTER" \
--resource-path=".:$WORKDIR" \
--columns=72 \
-V geometry:"margin=0.5in,includehead,includefoot" \
-V fontsize=10pt \
-V mainfont="Roboto" \
-V monofont="Roboto Mono" \
"${TOC_FLAGS[@]}" \
"${AUTHOR_FLAGS[@]}" \
"${NUMBER_FLAGS[@]}" \
--highlight-style=tango \
-H "$PREAMBLE" \
--standalone
rm -f "$TEMP_MD" "$CALLOUT_MD" "$PREAMBLE" "${BRACKET_FILTER:-}" "${STRIPPED:-}" /tmp/mermaid-*.mmd /tmp/mermaid-*.png
PAGES=$(strings "$OUTPUT_FILE" 2>/dev/null | grep -c '/Type /Page' || echo "?")
SIZE=$(du -h "$OUTPUT_FILE" | cut -f1 | tr -d ' ')
success "PDF generated: ${CYAN}${SIZE}${RESET}, ~${CYAN}${PAGES}${RESET} pages"
INNER_SCRIPT
chmod +x "$CONVERT_SCRIPT"
# --- Run Docker ---
echo ""
info "Launching Docker container..."
detail "Mounting: ${CYAN}${INPUT_DIR}${RESET} → /work ${DIM}(read-only)${RESET}"
detail "Output: ${CYAN}${OUTPUT_DIR}${RESET} → /output"
echo ""
CONVERT_BASENAME="$(basename "$CONVERT_SCRIPT")"
docker run --rm \
-v "$INPUT_DIR:/work:ro" \
-v "$OUTPUT_DIR:/output" \
-e "TOC_LEVEL=$FILE_TOC_LEVEL" \
-e "FM_FOOTER=$FM_FOOTER" \
-e "FM_HEADER=$FM_HEADER" \
-e "FM_AUTHOR=$FM_AUTHOR" \
-e "FM_TITLE=$FM_TITLE" \
-e "FM_SUBTITLE=$FM_SUBTITLE" \
-e "FM_DATE=$FM_DATE" \
-e "FM_DATE_LABEL=${FM_DATE_LABEL:-}" \
-e "FM_DATE_HASH=${FM_DATE_HASH:-}" \
-e "FM_DATE_DIRTY=${FM_DATE_DIRTY:-}" \
-e "GIT_STAMP=${GIT_STAMP:-}" \
-e "FILE_NUMBERS=$FILE_NUMBERS" \
-e "FILE_NUMBER_FROM=$FILE_NUMBER_FROM" \
-e "HIDE_FIRST_H1=$HIDE_FIRST_H1" \
-e "FM_WATERMARK=$FM_WATERMARK" \
-e "FILE_PAGEBREAK=$FILE_PAGEBREAK" \
--tmpfs /tmp:exec \
"$IMAGE_NAME" "/work/$CONVERT_BASENAME" "$INPUT_FILE" "/output/$OUTPUT_FILE" \
|| {
echo ""
echo -e " ${RED}${BOLD}Error producing PDF.${RESET} Docker/pandoc exited with a non-zero status."
echo ""
return 1
}
# Move preview file to /tmp and clean up
if [[ -n "$PREVIEW_FINAL" ]]; then
mv "$OUTPUT" "$PREVIEW_FINAL"
OUTPUT="$PREVIEW_FINAL"
fi
echo ""
echo -e " ${GREEN}${BOLD}PDF created:${RESET} ${CYAN}${OUTPUT}${RESET}"
echo ""
# Open if requested
if [[ $OPEN -eq 1 ]]; then
open_pdf "$OUTPUT"
fi
}
# --- Process each input file ---
run_all() {
local FAILED=0
for input_file in "${POSITIONAL[@]}"; do
convert_file "$input_file" "$OUT_FILE" || FAILED=$((FAILED + 1))
done
if [[ $FAILED -eq 0 ]]; then
header "Complete! (${#POSITIONAL[@]} file(s))"
else
header "${FAILED} of ${#POSITIONAL[@]} file(s) failed"
fi
}
run_all
# --- Watch mode ---
if [[ $WATCH -eq 1 ]]; then
info "Watching for changes... ${DIM}(Ctrl+C to stop)${RESET}"
echo ""
# Get initial checksums (using a temp file instead of associative array for bash 3 compat)
CHECKSUM_FILE=$(mktemp)
trap 'rm -f "$CHECKSUM_FILE"' EXIT
for f in "${POSITIONAL[@]}"; do
fpath="$(cd "$(dirname "$f")" && pwd)/$(basename "$f")"
echo "$(_sha256 < "$fpath" | cut -d' ' -f1) $fpath" >> "$CHECKSUM_FILE"
done
while true; do
sleep 2
CHANGED=0
for f in "${POSITIONAL[@]}"; do
fpath="$(cd "$(dirname "$f")" && pwd)/$(basename "$f")"
NEW_HASH=$(_sha256 < "$fpath" | cut -d' ' -f1)
OLD_HASH=$(grep " $fpath\$" "$CHECKSUM_FILE" | cut -d' ' -f1)
if [[ "$NEW_HASH" != "$OLD_HASH" ]]; then
CHANGED=1
# Update stored checksum
grep -v " $fpath\$" "$CHECKSUM_FILE" > "${CHECKSUM_FILE}.tmp" || true
echo "$NEW_HASH $fpath" >> "${CHECKSUM_FILE}.tmp"
mv "${CHECKSUM_FILE}.tmp" "$CHECKSUM_FILE"
fi
done
if [[ $CHANGED -eq 1 ]]; then
echo ""
info "Change detected — rebuilding..."
echo ""
run_all
fi
done
fi
# Check for updates (runs after success, fast timeout)
check_for_update
info() { echo -e "${BLUE}::${RESET} ${BOLD}$*${RESET}"; }
success() { echo -e "${GREEN}✓${RESET} $*"; }
warn() { echo -e "${YELLOW}⚠${RESET} $*"; }
detail() { echo -e " ${DIM}→${RESET} $*"; }
header() { echo -e "\n${MAGENTA}━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━${RESET}"; echo -e "${MAGENTA} ${BOLD}$*${RESET}"; echo -e "${MAGENTA}━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━${RESET}\n"; }
# --- Self-update ---
do_update() {
info "Updating pdfify from gist..."
local raw_url tmp
raw_url=$(curl -fsSL "https://api.github.com/gists/${GIST_ID}" 2>/dev/null | grep '"raw_url"' | head -1 | sed 's/.*"raw_url": *"//;s/".*//')
[[ -z "$raw_url" ]] && raw_url="$GIST_RAW"
tmp=$(mktemp)
if curl -fsSL "$raw_url" -o "$tmp" 2>/dev/null; then
if [[ -s "$tmp" ]] && head -1 "$tmp" | grep -q '^#!/'; then
chmod +x "$tmp"
mv "$tmp" "$SELF"
success "Updated to latest version"
detail "${CYAN}${SELF}${RESET}"
else
rm -f "$tmp"
echo -e "${RED}Error:${RESET} Downloaded file doesn't look like a script"
exit 1
fi
else
rm -f "$tmp"
echo -e "${RED}Error:${RESET} Failed to download update"
exit 1
fi
exit 0
}
check_for_update() {
local remote_hash local_hash raw_url
raw_url=$(curl -fsSL --connect-timeout 2 --max-time 3 "https://api.github.com/gists/${GIST_ID}" 2>/dev/null | grep '"raw_url"' | head -1 | sed 's/.*"raw_url": *"//;s/".*//') || return 0
[[ -z "$raw_url" ]] && return 0
remote_hash=$(curl -fsSL --connect-timeout 2 --max-time 5 "$raw_url" 2>/dev/null | _sha256 | cut -d' ' -f1) || return 0
local_hash=$(_sha256 < "$SELF" | cut -d' ' -f1)
if [[ -n "$remote_hash" && "$remote_hash" != "$local_hash" ]]; then
echo -e "${YELLOW}⚠${RESET} ${DIM}A newer version of pdfify is available. Run ${CYAN}pdfify --update${DIM} to upgrade.${RESET}"
fi
}
# --- Args (CLI overrides frontmatter; "" means "use frontmatter default") ---
REBUILD=0
WATCH=0
OPEN=0
PREVIEW=0
OUT_FILE=""
NEXT_KEY=""
POSITIONAL=()
# CLI overrides — empty string means "not set, defer to frontmatter"
CLI_TOC_LEVEL=""
CLI_NUMBERS=""
CLI_NUMBER_FROM=""
CLI_TITLE=""
CLI_SUBTITLE=""
CLI_AUTHOR=""
CLI_HEADER=""
CLI_FOOTER=""
CLI_DATE=""
CLI_WATERMARK=""
for arg in "$@"; do
if [[ -n "$NEXT_KEY" ]]; then
case "$NEXT_KEY" in
toc-level) CLI_TOC_LEVEL="$arg" ;;
number-from) CLI_NUMBER_FROM="$arg" ;;
out) OUT_FILE="$arg" ;;
title) CLI_TITLE="$arg" ;;
subtitle) CLI_SUBTITLE="$arg" ;;
author) CLI_AUTHOR="$arg" ;;
header) CLI_HEADER="$arg" ;;
footer) CLI_FOOTER="$arg" ;;
date) CLI_DATE="$arg" ;;
watermark) CLI_WATERMARK="$arg" ;;
esac
NEXT_KEY=""
continue
fi
case "$arg" in
--rebuild) REBUILD=1 ;;
--update) do_update ;;
--watch) WATCH=1 ;;
--open) OPEN=1 ;;
--preview) PREVIEW=1; OPEN=1 ;;
--no-numbers) CLI_NUMBERS="false" ;;
--numbers) CLI_NUMBERS="true" ;;
--clean) echo -e "${BLUE}::${RESET} ${BOLD}Removing Docker image ${CYAN}${IMAGE_NAME}${RESET}..."
docker rmi "$IMAGE_NAME" >/dev/null 2>&1 && echo -e "${GREEN}✓${RESET} Image removed" || echo -e "${DIM}Image not found${RESET}"
exit 0 ;;
--toc-level) NEXT_KEY="toc-level" ;;
--toc-level=*) CLI_TOC_LEVEL="${arg#*=}" ;;
--number-from) NEXT_KEY="number-from" ;;
--number-from=*) CLI_NUMBER_FROM="${arg#*=}" ;;
--out) NEXT_KEY="out" ;;
--out=*) OUT_FILE="${arg#*=}" ;;
--title) NEXT_KEY="title" ;;
--title=*) CLI_TITLE="${arg#*=}" ;;
--subtitle) NEXT_KEY="subtitle" ;;
--subtitle=*) CLI_SUBTITLE="${arg#*=}" ;;
--author) NEXT_KEY="author" ;;
--author=*) CLI_AUTHOR="${arg#*=}" ;;
--header) NEXT_KEY="header" ;;
--header=*) CLI_HEADER="${arg#*=}" ;;
--footer) NEXT_KEY="footer" ;;
--footer=*) CLI_FOOTER="${arg#*=}" ;;
--date) NEXT_KEY="date" ;;
--date=*) CLI_DATE="${arg#*=}" ;;
--watermark) NEXT_KEY="watermark" ;;
--watermark=*) CLI_WATERMARK="${arg#*=}" ;;
--version) echo "pdfify v${VERSION}"; exit 0 ;;
--help|-h) echo -e "${BOLD}pdfify${RESET} v${VERSION} — Markdown to PDF"
echo ""
echo -e "${BOLD}Usage:${RESET} pdfify ${CYAN}<file.md> [file2.md ...]${RESET} [options]"
echo ""
echo -e "${BOLD}Options:${RESET}"
echo -e " ${DIM}--out FILE${RESET} Output file (single input only)"
echo -e " ${DIM}--toc-level N${RESET} TOC depth: 0=none, 1=H1, 2=H2, 3=H3 (default: 3)"
echo -e " ${DIM}--numbers${RESET} Enable numbered headings (default)"
echo -e " ${DIM}--no-numbers${RESET} Disable numbered headings"
echo -e " ${DIM}--number-from N${RESET} Start numbering at heading level N (default: 2)"
echo -e " ${DIM}--open${RESET} Open PDF after generation"
echo -e " ${DIM}--preview${RESET} Render to /tmp and open (no permanent file)"
echo -e " ${DIM}--watch${RESET} Watch for changes and regenerate"
echo -e " ${DIM}--rebuild${RESET} Force rebuild the Docker image"
echo -e " ${DIM}--clean${RESET} Remove the Docker image"
echo -e " ${DIM}--update${RESET} Update pdfify to latest version from gist"
echo -e " ${DIM}--version${RESET} Show version"
echo ""
echo -e "${BOLD}Overrides${RESET} (CLI trumps frontmatter):"
echo -e " ${DIM}--title TEXT${RESET} ${DIM}--subtitle TEXT${RESET}"
echo -e " ${DIM}--author TEXT${RESET} ${DIM}--header TEXT${RESET}"
echo -e " ${DIM}--footer TEXT${RESET} ${DIM}--date TEXT${RESET}"
echo -e " ${DIM}--watermark TEXT${RESET}"
echo ""
echo -e "${BOLD}Frontmatter:${RESET}"
echo -e " title, subtitle, author, header, footer, toc-level, date,"
echo -e " numbersections (true/false), numberfrom (1-4), watermark"
exit 0 ;;
*) POSITIONAL+=("$arg") ;;
esac
done
if [[ ${#POSITIONAL[@]} -lt 1 ]]; then
echo -e "${BOLD}Usage:${RESET} pdfify ${CYAN}<file.md> [file2.md ...]${RESET} [options]"
echo -e " Run ${CYAN}pdfify --help${RESET} for all options"
exit 1
fi
if [[ -n "$OUT_FILE" && ${#POSITIONAL[@]} -gt 1 ]]; then
echo -e "${RED}Error:${RESET} --out cannot be used with multiple input files"
exit 1
fi
# --- Open helper ---
open_pdf() {
local pdf="$1"
if command -v open >/dev/null 2>&1; then
open "$pdf"
elif command -v xdg-open >/dev/null 2>&1; then
xdg-open "$pdf"
fi
}
header "pdfify v${VERSION}"
# --- Embedded Dockerfile ---
DOCKERFILE=$(cat <<'DOCKERFILE_END'
FROM node:20-slim
ENV DEBIAN_FRONTEND=noninteractive
RUN apt-get update -qq && \
apt-get install -y --no-install-recommends \
pandoc \
texlive-latex-recommended \
texlive-latex-extra \
texlive-fonts-recommended \
texlive-fonts-extra \
texlive-xetex \
lmodern \
librsvg2-bin \
chromium \
ca-certificates \
fonts-liberation \
fonts-roboto \
fonts-roboto-unhinted \
fonts-noto-color-emoji \
wget \
fontconfig \
&& rm -rf /var/lib/apt/lists/*
RUN mkdir -p /usr/share/fonts/truetype/roboto-mono && \
for style in Regular Bold Italic BoldItalic Medium MediumItalic Light LightItalic; do \
wget -q "https://github.com/googlefonts/RobotoMono/raw/main/fonts/ttf/RobotoMono-${style}.ttf" \
-O "/usr/share/fonts/truetype/roboto-mono/RobotoMono-${style}.ttf" 2>/dev/null || true; \
done && \
fc-cache -f
RUN npm install -g @mermaid-js/mermaid-cli
ENV PUPPETEER_SKIP_CHROMIUM_DOWNLOAD=true
ENV PUPPETEER_EXECUTABLE_PATH=/usr/bin/chromium
ENV CHROME_PATH=/usr/bin/chromium
RUN echo '{"maxTextSize": 90000, "flowchart": {"useMaxWidth": true}, "theme": "default"}' > /opt/mermaid-config.json
RUN echo '{"args": ["--no-sandbox", "--disable-setuid-sandbox", "--disable-dev-shm-usage", "--disable-gpu"]}' > /opt/puppeteer-config.json
WORKDIR /work
ENTRYPOINT ["/bin/bash"]
DOCKERFILE_END
)
# --- Build Docker image ---
echo ""
if [[ $REBUILD -eq 1 ]]; then
info "Removing existing Docker image ${CYAN}${IMAGE_NAME}${RESET}..."
docker rmi "$IMAGE_NAME" >/dev/null 2>&1 || true
success "Image removed"
fi
if docker image inspect "$IMAGE_NAME" >/dev/null 2>&1; then
info "Docker image ${GREEN}${IMAGE_NAME}${RESET} found ${DIM}(cached)${RESET}"
success "Reusing existing image"
else
info "Building Docker image ${CYAN}${IMAGE_NAME}${RESET}..."
detail "Installing: pandoc, XeLaTeX, mermaid-cli, Chromium, fonts"
detail "This takes 2-3 minutes on first run (cached after)"
echo ""
BUILD_CTX=$(mktemp -d)
echo "$DOCKERFILE" | DOCKER_BUILDKIT=0 docker build -t "$IMAGE_NAME" -f - "$BUILD_CTX" 2>&1 | while IFS= read -r line; do
if [[ "$line" =~ ^Step\ ([0-9]+)/([0-9]+) ]]; then
echo -e " ${CYAN}[${BASH_REMATCH[1]}/${BASH_REMATCH[2]}]${RESET} ${DIM}${line#*: }${RESET}"
elif [[ "$line" == *"Successfully tagged"* ]]; then
echo -e " ${GREEN}${line}${RESET}"
elif [[ "$line" == *"ERROR"* || "$line" == *"error"* ]]; then
echo -e " ${RED}${line}${RESET}"
fi
done
rm -rf "$BUILD_CTX"
if ! docker image inspect "$IMAGE_NAME" >/dev/null 2>&1; then
echo -e "\n${RED}Docker build failed. Re-running with full output:${RESET}\n"
BUILD_CTX=$(mktemp -d)
echo "$DOCKERFILE" | DOCKER_BUILDKIT=0 docker build -t "$IMAGE_NAME" -f - "$BUILD_CTX"
rm -rf "$BUILD_CTX"
exit 1
fi
success "Docker image built"
fi
# === Per-file conversion ===
convert_file() {
local INPUT_PATH="$1"
local OUTPUT_OVERRIDE="$2"
# --- Resolve paths ---
local INPUT INPUT_DIR INPUT_FILE OUTPUT OUTPUT_DIR OUTPUT_FILE
INPUT="$(cd "$(dirname "$INPUT_PATH")" && pwd)/$(basename "$INPUT_PATH")"
if [[ ! -f "$INPUT" ]]; then
echo -e "${RED}Error:${RESET} $INPUT_PATH not found"
return 1
fi
INPUT_DIR="$(dirname "$INPUT")"
INPUT_FILE="$(basename "$INPUT")"
OUTPUT="${OUTPUT_OVERRIDE:-${INPUT%.md}.pdf}"
OUTPUT_DIR="$(cd "$(dirname "$OUTPUT")" 2>/dev/null && pwd || (mkdir -p "$(dirname "$OUTPUT")" && cd "$(dirname "$OUTPUT")" && pwd))"
OUTPUT="${OUTPUT_DIR}/$(basename "$OUTPUT")"
OUTPUT_FILE="$(basename "$OUTPUT")"
# Preview mode: write temp file in input dir (Docker-mountable), move to /tmp after
local PREVIEW_FINAL=""
if [[ $PREVIEW -eq 1 ]]; then
local base="${INPUT_FILE%.md}"
PREVIEW_FINAL="/tmp/pdfify-preview-${base}.pdf"
OUTPUT_FILE=".pdfify-preview-${base}.pdf"
OUTPUT="${OUTPUT_DIR}/${OUTPUT_FILE}"
fi
info "Input: ${CYAN}${INPUT}${RESET}"
if [[ -n "$PREVIEW_FINAL" ]]; then
info "Output: ${CYAN}${PREVIEW_FINAL}${RESET} ${DIM}(preview)${RESET}"
else
info "Output: ${CYAN}${OUTPUT}${RESET}"
fi
# --- Parse YAML frontmatter ---
local FM_TITLE="" FM_SUBTITLE="" FM_AUTHOR="" FM_FOOTER="" FM_HEADER=""
local FM_TOC_LEVEL="" FM_DATE="" FM_NUMBERSECTIONS="" FM_NUMBERFROM="" FM_WATERMARK=""
local FM_DATE_HASH="" FM_DATE_DIRTY="" FM_DATE_LABEL=""
if head -1 "$INPUT" | grep -q '^---'; then
local FM_BLOCK
FM_BLOCK=$(awk 'NR==1 && /^---/{found=1; next} found && /^---/{exit} found{print}' "$INPUT")
extract_fm() { echo "$FM_BLOCK" | sed -n "s/^$1:[[:space:]]*//p" | sed 's/^["'"'"']\(.*\)["'"'"']$/\1/'; }
FM_TITLE=$(extract_fm "title")
FM_AUTHOR=$(extract_fm "author")
FM_SUBTITLE=$(extract_fm "subtitle")
FM_FOOTER=$(extract_fm "footer")
FM_HEADER=$(extract_fm "header")
FM_TOC_LEVEL=$(extract_fm "toc-level")
FM_DATE=$(extract_fm "date")
FM_NUMBERSECTIONS=$(extract_fm "numbersections")
FM_NUMBERFROM=$(extract_fm "numberfrom")
FM_WATERMARK=$(extract_fm "watermark")
fi
# --- CLI overrides frontmatter ---
[[ -n "$CLI_TITLE" ]] && FM_TITLE="$CLI_TITLE"
[[ -n "$CLI_SUBTITLE" ]] && FM_SUBTITLE="$CLI_SUBTITLE"
[[ -n "$CLI_AUTHOR" ]] && FM_AUTHOR="$CLI_AUTHOR"
[[ -n "$CLI_FOOTER" ]] && FM_FOOTER="$CLI_FOOTER"
[[ -n "$CLI_HEADER" ]] && FM_HEADER="$CLI_HEADER"
[[ -n "$CLI_DATE" ]] && FM_DATE="$CLI_DATE"
[[ -n "$CLI_WATERMARK" ]] && FM_WATERMARK="$CLI_WATERMARK"
[[ -n "$CLI_TOC_LEVEL" ]] && FM_TOC_LEVEL="$CLI_TOC_LEVEL"
[[ -n "$CLI_NUMBER_FROM" ]] && FM_NUMBERFROM="$CLI_NUMBER_FROM"
[[ -n "$CLI_NUMBERS" ]] && FM_NUMBERSECTIONS="$CLI_NUMBERS"
# --- Auto-detect document structure ---
# Count H1 headings (outside code blocks)
local H1_COUNT=0 IN_CODE_SCAN=0 FIRST_H1_TEXT=""
while IFS= read -r scanline || [[ -n "$scanline" ]]; do
[[ "$scanline" =~ ^\`\`\` ]] && { if [[ $IN_CODE_SCAN -eq 0 ]]; then IN_CODE_SCAN=1; else IN_CODE_SCAN=0; fi; continue; }
if [[ $IN_CODE_SCAN -eq 0 && "$scanline" =~ ^#\ ]]; then
H1_COUNT=$((H1_COUNT + 1))
[[ $H1_COUNT -eq 1 ]] && FIRST_H1_TEXT="${scanline#\# }"
fi
done < "$INPUT"
local FILE_TOC_LEVEL="${FM_TOC_LEVEL:-3}"
local FILE_NUMBERS=1
[[ "$FM_NUMBERSECTIONS" == "false" ]] && FILE_NUMBERS=0
# Auto-determine numberfrom based on structure (if not explicitly set)
local FILE_NUMBER_FROM="${FM_NUMBERFROM:-}"
local HIDE_FIRST_H1=0
if [[ -z "$FILE_NUMBER_FROM" ]]; then
if [[ $H1_COUNT -eq 1 ]]; then
# Single H1 = document title; number from H2, hide H1 in body
FILE_NUMBER_FROM=2
HIDE_FIRST_H1=1
# Use H1 text as title if no title set
[[ -z "$FM_TITLE" ]] && FM_TITLE="$FIRST_H1_TEXT"
detail "Auto: ${DIM}single H1 detected → using as title, numbering from H2${RESET}"
else
# Multiple H1s = sections; number from H1
FILE_NUMBER_FROM=1
detail "Auto: ${DIM}${H1_COUNT} H1s detected → numbering from H1${RESET}"
fi
fi
# Default date: current date/time
# Set to "none" in frontmatter or --date to suppress
FM_DATE_HASH="${FM_DATE_HASH:-}"
FM_DATE_DIRTY="${FM_DATE_DIRTY:-}"
if [[ "$FM_DATE" == "none" || "$FM_DATE" == "false" ]]; then
FM_DATE=""
elif [[ -z "$FM_DATE" && -z "$CLI_DATE" ]]; then
FM_DATE="$(date +"%Y-%m-%d %H:%M")"
fi
echo ""
[[ -n "$FM_TITLE" ]] && detail "Title: ${CYAN}${FM_TITLE}${RESET}"
[[ -n "$FM_SUBTITLE" ]] && detail "Subtitle: ${CYAN}${FM_SUBTITLE}${RESET}"
[[ -n "$FM_AUTHOR" ]] && detail "Author: ${CYAN}${FM_AUTHOR}${RESET}"
[[ -n "$FM_HEADER" ]] && detail "Header: ${CYAN}${FM_HEADER}${RESET}"
[[ -n "$FM_FOOTER" ]] && detail "Footer: ${CYAN}${FM_FOOTER}${RESET}"
detail "Date: ${CYAN}${FM_DATE}${RESET}"
detail "TOC: ${CYAN}level ${FILE_TOC_LEVEL}${RESET}"
detail "Numbered: ${CYAN}$([ $FILE_NUMBERS -eq 1 ] && echo "yes (from H${FILE_NUMBER_FROM})" || echo no)${RESET}"
[[ -n "$FM_WATERMARK" ]] && detail "Watermark: ${CYAN}${FM_WATERMARK}${RESET}"
# --- Git hash for source file ---
local GIT_STAMP=""
# --- Discover images referenced in the markdown ---
echo ""
info "Scanning ${CYAN}${INPUT_FILE}${RESET} for assets..."
IMAGES=()
while IFS= read -r img; do
[[ -z "$img" ]] && continue
[[ "$img" =~ ^https?:// ]] && continue
if [[ -f "$INPUT_DIR/$img" ]]; then
IMAGES+=("$img")
success "Image: ${CYAN}${img}${RESET} ${DIM}($(du -h "$INPUT_DIR/$img" | cut -f1 | tr -d ' '))${RESET}"
else
warn "Image: ${YELLOW}${img}${RESET} ${RED}(not found)${RESET}"
fi
done < <(sed -n 's/.*!\[[^]]*\](\([^)]*\)).*/\1/p' "$INPUT"; sed -n 's/.*src="\([^"]*\)".*/\1/p' "$INPUT")
MERMAID_COUNT=$(grep -c '```mermaid' "$INPUT" || true)
if [[ $MERMAID_COUNT -gt 0 ]]; then
success "Mermaid diagrams: ${CYAN}${MERMAID_COUNT}${RESET}"
fi
CALLOUT_COUNT=$(grep -c '> \[!' "$INPUT" || true)
if [[ $CALLOUT_COUNT -gt 0 ]]; then
success "Callouts: ${CYAN}${CALLOUT_COUNT}${RESET}"
fi
TABLE_COUNT=$(grep -c '^|' "$INPUT" || true)
CODE_COUNT=$(grep -c '```' "$INPUT" || true)
CODE_COUNT=$(( (CODE_COUNT - MERMAID_COUNT * 2) / 2 ))
[[ $TABLE_COUNT -gt 0 ]] && detail "Tables: ${TABLE_COUNT} rows"
[[ $CODE_COUNT -gt 0 ]] && detail "Code blocks: ~${CODE_COUNT}"
echo ""
info "Found ${GREEN}${#IMAGES[@]}${RESET} image(s), ${GREEN}${MERMAID_COUNT}${RESET} mermaid diagram(s), ${GREEN}${CALLOUT_COUNT}${RESET} callout(s)"
# --- Write the conversion script to a temp file (mounted into Docker) ---
CONVERT_SCRIPT="${INPUT_DIR}/.pdfify-convert-$$.sh"
trap 'rm -f "$CONVERT_SCRIPT"' EXIT
cat > "$CONVERT_SCRIPT" <<'INNER_SCRIPT'
#!/bin/bash
set -euo pipefail
RED='\033[0;31m'
GREEN='\033[0;32m'
YELLOW='\033[0;33m'
BLUE='\033[0;34m'
CYAN='\033[0;36m'
BOLD='\033[1m'
DIM='\033[2m'
RESET='\033[0m'
info() { echo -e "${BLUE}::${RESET} ${BOLD}$*${RESET}"; }
success() { echo -e "${GREEN}✓${RESET} $*"; }
detail() { echo -e " ${DIM}→${RESET} $*"; }
INPUT_FILE="$1"
OUTPUT_FILE="$2"
WORKDIR="/work"
cd "$WORKDIR"
# --- Step 0: Strip first H1 if it's being used as document title ---
HIDE_FIRST_H1="${HIDE_FIRST_H1:-0}"
EFFECTIVE_INPUT="$INPUT_FILE"
if [[ "$HIDE_FIRST_H1" == "1" ]]; then
STRIPPED=$(mktemp /tmp/pdfify-stripped-XXXXXX.md)
FOUND_H1=0
IN_CODE_BLK=0
IN_FMATTER=0
while IFS= read -r line || [[ -n "$line" ]]; do
[[ "$line" =~ ^\`\`\` ]] && { if [[ $IN_CODE_BLK -eq 0 ]]; then IN_CODE_BLK=1; else IN_CODE_BLK=0; fi; }
if [[ "$line" == "---" && $IN_CODE_BLK -eq 0 ]]; then
if [[ $IN_FMATTER -eq 0 && $FOUND_H1 -eq 0 ]]; then IN_FMATTER=1; else IN_FMATTER=0; fi
fi
# Skip the first H1 (and any blank line immediately after)
if [[ $FOUND_H1 -eq 0 && $IN_CODE_BLK -eq 0 && $IN_FMATTER -eq 0 && "$line" =~ ^#\ ]]; then
FOUND_H1=1
continue
fi
# Skip blank line right after removed H1
if [[ $FOUND_H1 -eq 1 && -z "$line" ]]; then
FOUND_H1=2
continue
fi
[[ $FOUND_H1 -eq 1 ]] && FOUND_H1=2
echo "$line" >> "$STRIPPED"
done < "$INPUT_FILE"
EFFECTIVE_INPUT="$(basename "$STRIPPED")"
detail "Stripped first H1 (promoted to title)"
fi
# --- Step 1: Pre-process Obsidian callouts ---
info "Pre-processing callouts..."
CALLOUT_MD=$(mktemp /tmp/pdfify-callout-XXXXXX.md)
IN_CALLOUT=0
CALLOUT_TYPE=""
CALLOUT_TITLE=""
CALLOUT_BUF=""
CALLOUT_COUNT=0
flush_callout() {
if [[ $IN_CALLOUT -eq 1 && -n "$CALLOUT_TYPE" ]]; then
CALLOUT_COUNT=$((CALLOUT_COUNT + 1))
local latex_type
case "${CALLOUT_TYPE,,}" in
info|note) latex_type="calloutinfo" ;;
tip|hint) latex_type="callouttip" ;;
warning|caution) latex_type="calloutwarning" ;;
danger|error|bug) latex_type="calloutdanger" ;;
example) latex_type="calloutexample" ;;
quote|cite) latex_type="calloutquote" ;;
*) latex_type="calloutinfo" ;;
esac
echo "" >> "$CALLOUT_MD"
echo '```{=latex}' >> "$CALLOUT_MD"
echo "\\begin{${latex_type}}{${CALLOUT_TITLE}}" >> "$CALLOUT_MD"
echo '```' >> "$CALLOUT_MD"
echo "" >> "$CALLOUT_MD"
echo "$CALLOUT_BUF" >> "$CALLOUT_MD"
echo "" >> "$CALLOUT_MD"
echo '```{=latex}' >> "$CALLOUT_MD"
echo "\\end{${latex_type}}" >> "$CALLOUT_MD"
echo '```' >> "$CALLOUT_MD"
echo "" >> "$CALLOUT_MD"
fi
IN_CALLOUT=0
CALLOUT_TYPE=""
CALLOUT_TITLE=""
CALLOUT_BUF=""
}
while IFS= read -r line || [[ -n "$line" ]]; do
if [[ "$line" =~ ^\>\ *\[!([a-zA-Z]+)\]\ *(.*) ]]; then
flush_callout
IN_CALLOUT=1
CALLOUT_TYPE="${BASH_REMATCH[1]}"
CALLOUT_TITLE="${BASH_REMATCH[2]:-${BASH_REMATCH[1]^}}"
continue
fi
if [[ $IN_CALLOUT -eq 1 ]]; then
if [[ "$line" =~ ^\>\ ?(.*) ]]; then
CALLOUT_BUF="${CALLOUT_BUF}${BASH_REMATCH[1]}
"
continue
else
flush_callout
fi
fi
echo "$line" >> "$CALLOUT_MD"
done < "${STRIPPED:-$INPUT_FILE}"
flush_callout
if [[ $CALLOUT_COUNT -gt 0 ]]; then
success "Converted $CALLOUT_COUNT callout(s)"
fi
# --- Step 1b+1c: Inject page breaks (after TOC, before each H1) ---
BREAK_INJECTED=$(mktemp /tmp/pdfify-breaks-XXXXXX.md)
H1_COUNT=0
IN_FM=0
IN_CODE=0
DONE_TOC_BREAK=0
while IFS= read -r line || [[ -n "$line" ]]; do
# Track code blocks (``` opens/closes)
if [[ "$line" =~ ^\`\`\` ]]; then
if [[ $IN_CODE -eq 0 ]]; then IN_CODE=1; else IN_CODE=0; fi
echo "$line" >> "$BREAK_INJECTED"
continue
fi
# Track frontmatter (only at start of file)
if [[ "$line" == "---" && $IN_CODE -eq 0 ]]; then
if [[ $IN_FM -eq 0 && $H1_COUNT -eq 0 ]]; then IN_FM=1; else IN_FM=0; fi
echo "$line" >> "$BREAK_INJECTED"
continue
fi
if [[ $IN_CODE -eq 0 && $IN_FM -eq 0 ]]; then
# Before first content after frontmatter: inject TOC page break
if [[ $DONE_TOC_BREAK -eq 0 && "$TOC_LEVEL" -gt 0 && -n "$line" ]]; then
echo "" >> "$BREAK_INJECTED"
echo '```{=latex}' >> "$BREAK_INJECTED"
echo '\newpage' >> "$BREAK_INJECTED"
echo '```' >> "$BREAK_INJECTED"
echo "" >> "$BREAK_INJECTED"
DONE_TOC_BREAK=1
fi
# Page break before each top-level section (except first)
# Build the marker: numberfrom=1 → "# ", numberfrom=2 → "## "
BREAK_HASHES=$(printf '#%.0s' $(seq 1 "$FILE_NUMBER_FROM"))
if [[ "$line" == "${BREAK_HASHES} "* ]]; then
# Make sure it's exactly that level, not deeper
NEXT_CHAR="${line:${#BREAK_HASHES}:1}"
if [[ "$NEXT_CHAR" != "#" ]]; then
H1_COUNT=$((H1_COUNT + 1))
if [[ $H1_COUNT -gt 1 ]]; then
echo "" >> "$BREAK_INJECTED"
echo '```{=latex}' >> "$BREAK_INJECTED"
echo '\newpage' >> "$BREAK_INJECTED"
echo '```' >> "$BREAK_INJECTED"
echo "" >> "$BREAK_INJECTED"
fi
fi
fi
fi
echo "$line" >> "$BREAK_INJECTED"
done < "$CALLOUT_MD"
rm -f "$CALLOUT_MD"
CALLOUT_MD="$BREAK_INJECTED"
# --- Step 2: Pre-render Mermaid blocks to PNG ---
info "Pre-rendering Mermaid diagrams..."
TEMP_MD=$(mktemp /tmp/pdfify-XXXXXX.md)
MERMAID_COUNT=0
IN_MERMAID=0
MERMAID_BUF=""
while IFS= read -r line || [[ -n "$line" ]]; do
if [[ "$line" =~ ^\`\`\`mermaid ]]; then
IN_MERMAID=1
MERMAID_BUF=""
continue
fi
if [[ $IN_MERMAID -eq 1 ]]; then
if [[ "$line" =~ ^\`\`\` ]]; then
IN_MERMAID=0
MERMAID_COUNT=$((MERMAID_COUNT + 1))
MERMAID_FILE="/tmp/mermaid-${MERMAID_COUNT}.mmd"
MERMAID_PNG="/tmp/mermaid-${MERMAID_COUNT}.png"
echo "$MERMAID_BUF" > "$MERMAID_FILE"
detail "Rendering diagram ${CYAN}#${MERMAID_COUNT}${RESET}..."
mmdc -i "$MERMAID_FILE" \
-o "$MERMAID_PNG" \
-w 1600 \
-b transparent \
-c /opt/mermaid-config.json \
-p /opt/puppeteer-config.json \
2>/dev/null || {
echo -e " ${YELLOW}⚠${RESET} Diagram $MERMAID_COUNT failed — inserting as code block"
echo '```' >> "$TEMP_MD"
echo "$MERMAID_BUF" >> "$TEMP_MD"
echo '```' >> "$TEMP_MD"
continue
}
SIZE=$(du -h "$MERMAID_PNG" 2>/dev/null | cut -f1 | tr -d ' ')
success "Diagram #${MERMAID_COUNT} rendered ${DIM}(${SIZE})${RESET}"
echo "" >> "$TEMP_MD"
echo "![Diagram ${MERMAID_COUNT}](${MERMAID_PNG})\\" >> "$TEMP_MD"
echo "" >> "$TEMP_MD"
else
MERMAID_BUF="${MERMAID_BUF}${line}
"
fi
else
echo "$line" >> "$TEMP_MD"
fi
done < "$CALLOUT_MD"
# --- Lua filter: protect brackets in headings for titlesec ---
# Square brackets in headings break titlesec (\SQSPL@scan error) because LaTeX
# interprets [ as the start of an optional argument.
BRACKET_FILTER=$(mktemp /tmp/pdfify-bracket-filter-XXXXXX.lua)
cat > "$BRACKET_FILTER" <<'LUAFILTER'
-- Protect square brackets in headings to prevent titlesec \SQSPL@scan errors.
-- Brackets in headings make titlesec think they are optional arguments.
-- We replace [ and ] with \lbrack/\rbrack in all inline types.
function Header(el)
if FORMAT ~= "latex" and FORMAT ~= "pdf" then return nil end
el = el:walk {
Str = function(s)
if s.text:find("[%[%]]") then
local t = s.text:gsub("%[", "\\lbrack{}"):gsub("%]", "\\rbrack{}")
return pandoc.RawInline("latex", t)
end
end,
Code = function(c)
-- All code in headings must use \oldtexttt to bypass seqsplit
-- (seqsplit in titlesec moving arguments causes \SQSPL@scan errors)
local t = c.text
t = t:gsub("\\", "\\textbackslash ")
t = t:gsub("%%", "\\%%")
t = t:gsub("%#", "\\#")
t = t:gsub("%$", "\\$")
t = t:gsub("%&", "\\&")
t = t:gsub("_", "\\_")
t = t:gsub("%{", "\\{")
t = t:gsub("%}", "\\}")
t = t:gsub("~", "\\textasciitilde{}")
t = t:gsub("%^", "\\textasciicircum{}")
t = t:gsub("%[", "\\lbrack{}"):gsub("%]", "\\rbrack{}")
return pandoc.RawInline("latex", "\\oldtexttt{" .. t .. "}")
end
}
return el
end
LUAFILTER
echo ""
info "Generating PDF with Pandoc + XeLaTeX..."
detail "Engine: xelatex"
detail "Font: Roboto / Roboto Mono"
detail "Margins: 0.5in, Font size: 10pt"
echo ""
# Write LaTeX preamble for modern styling
PREAMBLE=$(mktemp /tmp/pdfify-preamble-XXXXXX.tex)
cat > "$PREAMBLE" <<'LATEX'
% --- Modern color scheme ---
\usepackage{xcolor}
\definecolor{accent}{HTML}{374151}
\definecolor{accentdark}{HTML}{111827}
\definecolor{codebg}{HTML}{F8F9FA}
\definecolor{codeborder}{HTML}{E2E8F0}
\definecolor{headrulecolor}{HTML}{E2E8F0}
% --- Callout colors ---
\definecolor{infobg}{HTML}{EFF6FF}
\definecolor{infobar}{HTML}{3B82F6}
\definecolor{infofg}{HTML}{1E40AF}
\definecolor{tipbg}{HTML}{F0FDF4}
\definecolor{tipbar}{HTML}{22C55E}
\definecolor{tipfg}{HTML}{166534}
\definecolor{warningbg}{HTML}{FFFBEB}
\definecolor{warningbar}{HTML}{F59E0B}
\definecolor{warningfg}{HTML}{92400E}
\definecolor{dangerbg}{HTML}{FEF2F2}
\definecolor{dangerbar}{HTML}{EF4444}
\definecolor{dangerfg}{HTML}{991B1B}
\definecolor{examplebg}{HTML}{F5F3FF}
\definecolor{examplebar}{HTML}{8B5CF6}
\definecolor{examplefg}{HTML}{5B21B6}
\definecolor{quotecallbg}{HTML}{F8F9FA}
\definecolor{quotecallbar}{HTML}{6B7280}
\definecolor{quotecallfg}{HTML}{374151}
% --- Code block wrapping and styling ---
\usepackage{fvextra}
\DefineVerbatimEnvironment{Highlighting}{Verbatim}{
breaklines,
breakanywhere,
commandchars=\\\{\},
fontsize=\small
}
% Background on code blocks via mdframed
\usepackage[framemethod=tikz]{mdframed}
% Override pandoc's Shaded environment
\renewenvironment{Shaded}{%
\begin{mdframed}[
backgroundcolor=codebg,
hidealllines=true,
roundcorner=4pt,
innertopmargin=8pt,
innerbottommargin=8pt,
innerleftmargin=10pt,
innerrightmargin=10pt,
skipabove=10pt,
skipbelow=10pt
]
}{%
\end{mdframed}
}
% --- Callout environments ---
\newenvironment{calloutbase}[3]{%
\begin{mdframed}[
backgroundcolor=#1,
linecolor=#2,
linewidth=3pt,
topline=false,
bottomline=false,
rightline=false,
innertopmargin=12pt,
innerbottommargin=12pt,
innerleftmargin=12pt,
innerrightmargin=12pt,
skipabove=12pt,
skipbelow=12pt,
roundcorner=0pt
]
\textbf{\color{#2}#3}\par\smallskip\setlength{\parindent}{0pt}
}{%
\end{mdframed}
}
\newenvironment{calloutinfo}[1]{\begin{calloutbase}{infobg}{infobar}{#1}}{\end{calloutbase}}
\newenvironment{callouttip}[1]{\begin{calloutbase}{tipbg}{tipbar}{#1}}{\end{calloutbase}}
\newenvironment{calloutwarning}[1]{\begin{calloutbase}{warningbg}{warningbar}{#1}}{\end{calloutbase}}
\newenvironment{calloutdanger}[1]{\begin{calloutbase}{dangerbg}{dangerbar}{#1}}{\end{calloutbase}}
\newenvironment{calloutexample}[1]{\begin{calloutbase}{examplebg}{examplebar}{#1}}{\end{calloutbase}}
\newenvironment{calloutquote}[1]{\begin{calloutbase}{quotecallbg}{quotecallbar}{#1}}{\end{calloutbase}}
% --- PDF bookmarks (sidebar navigation in PDF viewers) ---
\usepackage{bookmark}
\bookmarksetup{
numbered=false,
open,
openlevel=2
}
% --- Title banner ---
\definecolor{titlebg}{HTML}{E5E7EB}
% --- Page break after TOC ---
\let\oldtableofcontents\tableofcontents
\renewcommand{\tableofcontents}{\oldtableofcontents\clearpage}
% --- TOC styling ---
\usepackage{tocloft}
\setlength{\cftbeforetoctitleskip}{0.5em}
\renewcommand{\cfttoctitlefont}{\LARGE\bfseries\color{accentdark}\scshape}
\renewcommand{\cftaftertoctitle}{\par\vspace{2pt}{\color{headrulecolor}\hrule height 1pt}\vspace{10pt}}
\renewcommand{\cftsecfont}{\bfseries\color{accentdark}}
\renewcommand{\cftsecpagefont}{\bfseries\color{accentdark}}
\renewcommand{\cftsubsecfont}{\color{accent}}
\renewcommand{\cftsubsecpagefont}{\color{accent}}
\renewcommand{\cftsubsubsecfont}{\small\color{accent}}
\renewcommand{\cftsubsubsecpagefont}{\small\color{accent}}
\renewcommand{\cftsecleader}{\cftdotfill{\cftsecdotsep}}
\renewcommand{\cftsecdotsep}{\cftdotsep}
\setlength{\cftbeforesecskip}{6pt}
\setlength{\cftbeforesubsecskip}{2pt}
% --- Heading font ---
\newfontfamily\headingfont{Roboto}[BoldFont={Roboto Bold}]
% --- Symbol fallback (arrows, etc.) ---
\usepackage{newunicodechar}
\newfontfamily\fallbackfont{Liberation Sans}[Scale=MatchLowercase]
\newunicodechar{→}{{\fallbackfont →}}
\newunicodechar{←}{{\fallbackfont ←}}
\newunicodechar{↔}{{\fallbackfont ↔}}
\newunicodechar{⇒}{{\fallbackfont ⇒}}
\newunicodechar{⇐}{{\fallbackfont ⇐}}
\newunicodechar{✓}{{\fallbackfont ✓}}
\newunicodechar{✗}{{\fallbackfont ✗}}
% --- Modern section headings (tight, bold, dark) ---
\usepackage{titlesec}
% H1: # headings — large, small caps, dark, with rule
\titleformat{\section}
{\LARGE\headingfont\bfseries\color{accentdark}\addfontfeatures{LetterSpace=5}\scshape}
{\thesection}{0.5em}{}[\vspace{2pt}{\color{headrulecolor}\titlerule[1pt]}]
\titlespacing*{\section}{0pt}{20pt}{10pt}
% H2: ## headings
\titleformat{\subsection}
{\Large\headingfont\bfseries\color{accentdark}\addfontfeatures{LetterSpace=-1}}
{\thesubsection}{0.5em}{}
\titlespacing*{\subsection}{0pt}{16pt}{8pt}
% H3: ### headings
\titleformat{\subsubsection}
{\large\bfseries\color{accent}}
{\thesubsubsection}{0.5em}{}
\titlespacing*{\subsubsection}{0pt}{12pt}{6pt}
% H4: #### headings
\titleformat{\paragraph}[hang]
{\normalsize\bfseries\color{accent}}
{\theparagraph}{0.5em}{}
\titlespacing*{\paragraph}{0pt}{10pt}{4pt}
%%SECNUMDEPTH_PLACEHOLDER%%
% --- Page style (header/footer injected by pdfify) ---
\usepackage{fancyhdr}
\pagestyle{fancy}
\fancyhf{}
\renewcommand{\headrulewidth}{0pt}
\renewcommand{\footrulewidth}{0pt}
\setlength{\headheight}{14pt}
%%HEADER_PLACEHOLDER%%
%%FOOTER_PLACEHOLDER%%
% Make plain style identical to fancy (so title/TOC pages get the same footer)
\fancypagestyle{plain}{\fancyhf{}\renewcommand{\headrulewidth}{0pt}\renewcommand{\footrulewidth}{0pt}%%FOOTER_PLAIN%%}
% --- Blockquote styling (plain > quotes, not callouts) ---
\usepackage{etoolbox}
\renewenvironment{quote}{%
\begin{mdframed}[
backgroundcolor=infobg,
linecolor=infobar,
linewidth=3pt,
topline=false,
bottomline=false,
rightline=false,
innertopmargin=12pt,
innerbottommargin=12pt,
innerleftmargin=12pt,
innerrightmargin=12pt,
skipabove=10pt,
skipbelow=10pt,
roundcorner=0pt
]%
}{%
\end{mdframed}%
}
% --- Table styling ---
\usepackage{booktabs}
\usepackage{colortbl}
\usepackage{longtable}
\usepackage{tabularx}
\arrayrulecolor{codeborder}
% Allow line breaks in table cells and shrink monospace to fit
\usepackage{array}
\renewcommand{\arraystretch}{1.4}
\let\oldtexttt\texttt
\renewcommand{\texttt}[1]{{\small\oldtexttt{\seqsplit{#1}}}}
\usepackage{seqsplit}
\setlength{\tabcolsep}{4pt}
% --- Images constrained to page ---
\usepackage{grffile}
\usepackage[export]{adjustbox}
\let\oldincludegraphics\includegraphics
\renewcommand{\includegraphics}[2][]{%
\oldincludegraphics[max width=\textwidth,max height=0.45\textheight,keepaspectratio,#1]{#2}%
}
% --- Figures don't float ---
\usepackage{float}
\floatplacement{figure}{H}
% --- Caption styling ---
\usepackage{caption}
\captionsetup{labelformat=empty,font={small,color=gray},skip=4pt}
% --- Tighter lists ---
\usepackage{enumitem}
\setlist{nosep,leftmargin=1.5em}
% --- Links ---
\usepackage{hyperref}
\hypersetup{
colorlinks=true,
linkcolor=accent,
urlcolor=accent,
citecolor=accent
}
% --- Horizontal rules ---
\renewcommand{\rule}[2]{\textcolor{headrulecolor}{\vrule width \textwidth height 0.5pt}}
LATEX
TOC_LEVEL="${TOC_LEVEL:-3}"
FM_FOOTER="${FM_FOOTER:-}"
FM_HEADER="${FM_HEADER:-}"
FM_AUTHOR="${FM_AUTHOR:-}"
FM_DATE="${FM_DATE:-}"
FM_DATE_LABEL="${FM_DATE_LABEL:-}"
FM_DATE_HASH="${FM_DATE_HASH:-}"
FM_DATE_DIRTY="${FM_DATE_DIRTY:-}"
FILE_NUMBERS="${FILE_NUMBERS:-1}"
FILE_NUMBER_FROM="${FILE_NUMBER_FROM:-2}"
# Escape LaTeX special characters in text fields (uses sed to avoid
# bash parameter substitution brace-parsing issues with } in replacements)
latex_escape() {
printf '%s' "$1" | sed \
-e 's/\\/@@BSLASH@@/g' \
-e 's/&/\\&/g' \
-e 's/%/\\%/g' \
-e 's/\$/\\$/g' \
-e 's/#/\\#/g' \
-e 's/_/\\_/g' \
-e 's/{/\\{/g' \
-e 's/}/\\}/g' \
-e 's/~/\\textasciitilde{}/g' \
-e 's/\^/\\textasciicircum{}/g' \
-e 's/@@BSLASH@@/\\textbackslash{}/g'
}
# Inject title banner into preamble
FM_TITLE="${FM_TITLE:-}"
FM_TITLE_TEX="$(latex_escape "$FM_TITLE")"
FM_SUBTITLE_TEX="$(latex_escape "${FM_SUBTITLE:-}")"
FM_AUTHOR_TEX="$(latex_escape "${FM_AUTHOR:-}")"
{
if [[ -n "$FM_TITLE" ]]; then
cat <<'TITLE_STATIC'
\makeatletter
\renewcommand{\maketitle}{%
\thispagestyle{fancy}%
\vspace*{-\topskip}%
\vspace*{-\headsep}%
\vspace*{-\headheight}%
\vspace*{-0.55in}%
\noindent\hspace*{-0.5in}%
\fcolorbox{titlebg}{titlebg}{%
\parbox{\dimexpr\paperwidth-2\fboxsep-2\fboxrule}{%
\hspace*{0.3in}\begin{minipage}{\dimexpr\textwidth}%
\vspace{20pt}%
TITLE_STATIC
echo " {\\fontsize{28}{34}\\selectfont\\bfseries\\color{black}${FM_TITLE_TEX}}\\\\[6pt]%"
FM_SUBTITLE="${FM_SUBTITLE:-}"
if [[ -n "$FM_SUBTITLE" ]]; then
echo " {\\fontsize{14}{18}\\selectfont\\color{black}${FM_SUBTITLE_TEX}}\\\\[8pt]%"
fi
if [[ -n "$FM_AUTHOR" ]]; then
echo " {\\fontsize{11}{14}\\selectfont\\color{black}${FM_AUTHOR_TEX}}\\\\[6pt]%"
fi
if [[ -n "$FM_DATE" ]]; then
DATE_VAL=""
if [[ -n "$FM_DATE_HASH" ]]; then
DATE_VAL="${FM_DATE% · *} · {\\texttt{${FM_DATE_HASH}}}"
else
DATE_VAL="${FM_DATE}"
fi
DIRTY_PART=""
if [[ -n "${FM_DATE_DIRTY:-}" ]]; then
DIRTY_PART=" {\\color{gray}\\itshape (dirty)}"
fi
if [[ -n "$FM_DATE_LABEL" ]]; then
echo " {\\fontsize{10}{12}\\selectfont\\color{black}${DATE_VAL} {\\color{gray}--- ${FM_DATE_LABEL}}${DIRTY_PART}}\\\\[4pt]%"
else
echo " {\\fontsize{10}{12}\\selectfont\\color{black}${DATE_VAL}${DIRTY_PART}}\\\\[4pt]%"
fi
fi
cat <<'TITLE_END'
\vspace{6pt}%
\end{minipage}%
}%
}%
\par\vspace{20pt}%
}
\makeatother
TITLE_END
echo '\AtBeginDocument{\maketitle}'
else
echo '\renewcommand{\maketitle}{}'
fi
} >> "$PREAMBLE"
# Inject header/footer into preamble
GIT_STAMP="${GIT_STAMP:-}"
FOOTER_L=""
FOOTER_C=""
FOOTER_R="\\\\fancyfoot[R]{\\\\color{gray}\\\\small Page \\\\thepage\\\\ of \\\\pageref*{LastPage}}"
[[ -n "$FM_FOOTER" ]] && FOOTER_L="\\\\fancyfoot[L]{\\\\color{gray}\\\\small ${FM_FOOTER}}"
sed -i "s|%%FOOTER_PLACEHOLDER%%|\\\\usepackage{lastpage}${FOOTER_L}${FOOTER_C}${FOOTER_R}|" "$PREAMBLE"
sed -i "s|%%FOOTER_PLAIN%%|${FOOTER_L}${FOOTER_C}${FOOTER_R}|" "$PREAMBLE"
if [[ -n "$FM_HEADER" ]]; then
sed -i "s|%%HEADER_PLACEHOLDER%%|\\\\fancyhead[C]{\\\\color{gray}\\\\small ${FM_HEADER}}|" "$PREAMBLE"
else
sed -i "s|%%HEADER_PLACEHOLDER%%||" "$PREAMBLE"
fi
# Inject watermark if set
FM_WATERMARK="${FM_WATERMARK:-}"
if [[ -n "$FM_WATERMARK" ]]; then
cat >> "$PREAMBLE" <<WATERMARK
\\usepackage{eso-pic}
\\usepackage{tikz}
\\AddToShipoutPictureFG{%
\\begin{tikzpicture}[remember picture,overlay]
\\node[rotate=45,opacity=0.12,scale=10,text=red] at (current page.center) {\\textsf{\\textbf{\\MakeUppercase{${FM_WATERMARK}}}}};
\\end{tikzpicture}%
}
WATERMARK
detail "Watermark: ${CYAN}${FM_WATERMARK}${RESET}"
fi
# Build TOC flags
# When numbering is on, headings shift by -1, so TOC depth needs +1 to compensate
TOC_FLAGS=()
if [[ "$TOC_LEVEL" -gt 0 ]]; then
TOC_FLAGS+=(--toc --toc-depth="$TOC_LEVEL")
detail "TOC depth: ${CYAN}${TOC_LEVEL}${RESET}"
else
detail "TOC: ${DIM}disabled${RESET}"
fi
AUTHOR_FLAGS=()
if [[ -n "$FM_AUTHOR" ]]; then
AUTHOR_FLAGS+=(-M "author=$FM_AUTHOR")
fi
# Numbered sections
NUMBER_FLAGS=()
if [[ "$FILE_NUMBERS" == "1" ]]; then
NUMBER_FLAGS+=(--number-sections)
# numberfrom controls which heading level starts getting numbers
# pandoc: section=1, subsection=2, subsubsection=3
cat >> "$PREAMBLE" <<SECNUM
\\setcounter{secnumdepth}{4}
SECNUM
if [[ "$FILE_NUMBER_FROM" -ge 2 ]]; then
# H1 (\section) unnumbered, H2 numbered as 1, 2, 3
cat >> "$PREAMBLE" <<'SECNUM2'
\makeatletter
\renewcommand{\thesection}{}
\renewcommand{\thesubsection}{\arabic{subsection}}
\renewcommand{\thesubsubsection}{\thesubsection.\arabic{subsubsection}}
% Remove section number from titleformat without changing style
\titleformat{\section}
{\LARGE\headingfont\bfseries\color{accentdark}\addfontfeatures{LetterSpace=5}\scshape}
{}{0em}{}[\vspace{2pt}{\color{headrulecolor}\titlerule[1pt]}]
\makeatother
SECNUM2
fi
if [[ "$FILE_NUMBER_FROM" -ge 3 ]]; then
cat >> "$PREAMBLE" <<'SECNUM3'
\renewcommand{\thesubsection}{}
\renewcommand{\thesubsubsection}{\arabic{subsubsection}}
\titleformat{\subsection}
{\Large\headingfont\bfseries\color{accentdark}\addfontfeatures{LetterSpace=-1}}
{}{0em}{}
SECNUM3
fi
fi
# Remove placeholder
sed -i 's|%%SECNUMDEPTH_PLACEHOLDER%%||' "$PREAMBLE"
pandoc "$TEMP_MD" \
-o "$OUTPUT_FILE" \
--pdf-engine=xelatex \
--lua-filter="$BRACKET_FILTER" \
--resource-path=".:$WORKDIR" \
--columns=72 \
-V geometry:"margin=0.5in,includehead,includefoot" \
-V fontsize=10pt \
-V mainfont="Roboto" \
-V monofont="Roboto Mono" \
"${TOC_FLAGS[@]}" \
"${AUTHOR_FLAGS[@]}" \
"${NUMBER_FLAGS[@]}" \
--highlight-style=tango \
-H "$PREAMBLE" \
--standalone
rm -f "$TEMP_MD" "$CALLOUT_MD" "$PREAMBLE" "${BRACKET_FILTER:-}" "${STRIPPED:-}" /tmp/mermaid-*.mmd /tmp/mermaid-*.png
PAGES=$(strings "$OUTPUT_FILE" 2>/dev/null | grep -c '/Type /Page' || echo "?")
SIZE=$(du -h "$OUTPUT_FILE" | cut -f1 | tr -d ' ')
success "PDF generated: ${CYAN}${SIZE}${RESET}, ~${CYAN}${PAGES}${RESET} pages"
INNER_SCRIPT
chmod +x "$CONVERT_SCRIPT"
# --- Run Docker ---
echo ""
info "Launching Docker container..."
detail "Mounting: ${CYAN}${INPUT_DIR}${RESET} → /work ${DIM}(read-only)${RESET}"
detail "Output: ${CYAN}${OUTPUT_DIR}${RESET} → /output"
echo ""
CONVERT_BASENAME="$(basename "$CONVERT_SCRIPT")"
docker run --rm \
-v "$INPUT_DIR:/work:ro" \
-v "$OUTPUT_DIR:/output" \
-e "TOC_LEVEL=$FILE_TOC_LEVEL" \
-e "FM_FOOTER=$FM_FOOTER" \
-e "FM_HEADER=$FM_HEADER" \
-e "FM_AUTHOR=$FM_AUTHOR" \
-e "FM_TITLE=$FM_TITLE" \
-e "FM_SUBTITLE=$FM_SUBTITLE" \
-e "FM_DATE=$FM_DATE" \
-e "FM_DATE_LABEL=${FM_DATE_LABEL:-}" \
-e "FM_DATE_HASH=${FM_DATE_HASH:-}" \
-e "FM_DATE_DIRTY=${FM_DATE_DIRTY:-}" \
-e "GIT_STAMP=${GIT_STAMP:-}" \
-e "FILE_NUMBERS=$FILE_NUMBERS" \
-e "FILE_NUMBER_FROM=$FILE_NUMBER_FROM" \
-e "HIDE_FIRST_H1=$HIDE_FIRST_H1" \
-e "FM_WATERMARK=$FM_WATERMARK" \
--tmpfs /tmp:exec \
"$IMAGE_NAME" "/work/$CONVERT_BASENAME" "$INPUT_FILE" "/output/$OUTPUT_FILE" \
|| {
echo ""
echo -e " ${RED}${BOLD}Error producing PDF.${RESET} Docker/pandoc exited with a non-zero status."
echo ""
return 1
}
# Move preview file to /tmp and clean up
if [[ -n "$PREVIEW_FINAL" ]]; then
mv "$OUTPUT" "$PREVIEW_FINAL"
OUTPUT="$PREVIEW_FINAL"
fi
echo ""
echo -e " ${GREEN}${BOLD}PDF created:${RESET} ${CYAN}${OUTPUT}${RESET}"
echo ""
# Open if requested
if [[ $OPEN -eq 1 ]]; then
open_pdf "$OUTPUT"
fi
}
# --- Process each input file ---
run_all() {
local FAILED=0
for input_file in "${POSITIONAL[@]}"; do
convert_file "$input_file" "$OUT_FILE" || FAILED=$((FAILED + 1))
done
if [[ $FAILED -eq 0 ]]; then
header "Complete! (${#POSITIONAL[@]} file(s))"
else
header "${FAILED} of ${#POSITIONAL[@]} file(s) failed"
fi
}
run_all
# --- Watch mode ---
if [[ $WATCH -eq 1 ]]; then
info "Watching for changes... ${DIM}(Ctrl+C to stop)${RESET}"
echo ""
# Get initial checksums (using a temp file instead of associative array for bash 3 compat)
CHECKSUM_FILE=$(mktemp)
trap 'rm -f "$CHECKSUM_FILE"' EXIT
for f in "${POSITIONAL[@]}"; do
fpath="$(cd "$(dirname "$f")" && pwd)/$(basename "$f")"
echo "$(_sha256 < "$fpath" | cut -d' ' -f1) $fpath" >> "$CHECKSUM_FILE"
done
while true; do
sleep 2
CHANGED=0
for f in "${POSITIONAL[@]}"; do
fpath="$(cd "$(dirname "$f")" && pwd)/$(basename "$f")"
NEW_HASH=$(_sha256 < "$fpath" | cut -d' ' -f1)
OLD_HASH=$(grep " $fpath\$" "$CHECKSUM_FILE" | cut -d' ' -f1)
if [[ "$NEW_HASH" != "$OLD_HASH" ]]; then
CHANGED=1
# Update stored checksum
grep -v " $fpath\$" "$CHECKSUM_FILE" > "${CHECKSUM_FILE}.tmp" || true
echo "$NEW_HASH $fpath" >> "${CHECKSUM_FILE}.tmp"
mv "${CHECKSUM_FILE}.tmp" "$CHECKSUM_FILE"
fi
done
if [[ $CHANGED -eq 1 ]]; then
echo ""
info "Change detected — rebuilding..."
echo ""
run_all
fi
done
fi
# Check for updates (runs after success, fast timeout)
check_for_update
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment