Skip to content

Instantly share code, notes, and snippets.

@max-mapper
Last active May 5, 2025 21:02
Show Gist options
  • Save max-mapper/989cdf90ed3a849299be9bc86e6a1535 to your computer and use it in GitHub Desktop.
Save max-mapper/989cdf90ed3a849299be9bc86e6a1535 to your computer and use it in GitHub Desktop.
convert pdf to mokuro batch cli
import sys
import os
import pyvips
def process_image(image_path):
"""
Processes a multi-page image, writing each page to a separate JPG file
in a subfolder named after the input image's basename.
Args:
image_path (str): The path to the input image file.
"""
try:
# Get the directory of the input image
image_dir = os.path.dirname(image_path)
if not image_dir:
image_dir = "." # Use current directory if path is just a filename
# Get the basename of the input image (without extension)
image_basename = os.path.splitext(os.path.basename(image_path))[0]
# Construct the path to the output subfolder
output_subfolder = os.path.join(image_dir, image_basename)
# Create the output subfolder if it doesn't exist
if not os.path.exists(output_subfolder):
os.makedirs(output_subfolder)
# Load the image
image = pyvips.Image.new_from_file(image_path)
# Iterate through the pages and write them to separate files
n_pages = image.get('n-pages') if 'n-pages' in image.get_fields() else 1
for i in range(n_pages):
page_image = pyvips.Image.new_from_file(image_path, page=i)
output_file_path = os.path.join(output_subfolder, f"page-{i:03}.jpg")
page_image.write_to_file(output_file_path)
print(f"Wrote page {i+1} to {output_file_path}")
print("Image processing complete.")
except Exception as e:
print(f"Error processing image: {e}")
sys.exit(1)
if __name__ == "__main__":
if len(sys.argv) != 2:
print("Usage: python process_image.py <image_file_path>")
sys.exit(1)
image_file_path = sys.argv[1]
process_image(image_file_path)
#!/bin/bash
# Check if the input directory is provided as an argument
if [ -z "$1" ]; then
echo "Error: Input directory is required as the first argument."
echo "Usage: $0 <input_directory>"
exit 1
fi
INPUTDIR="$1" # Set INPUTDIR from the first argument
# Set the output directory (where the PNGs will be saved)
OUTPUTDIR="outputdir"
# Check if the input and output directories exist
if [ ! -d "$INPUTDIR" ]; then
echo "Error: Input directory '$INPUTDIR' does not exist."
exit 1
fi
if [ ! -d "$OUTPUTDIR" ]; then
echo "Error: Output directory '$OUTPUTDIR' does not exist."
exit 1
fi
# Loop through all PDF files in the input directory
find $INPUTDIR -type f -name "*.pdf" -print0 | while IFS= read -r -d '' PDF; do
# Get the filename without the extension
FILENAME=$(basename "$PDF" .pdf)
# Check if the output directory already contains any PNG files with the same base name
if [ ! -f "$OUTPUTDIR/$FILENAME/output001.png" ]; then #check only for the first page.
echo "Processing PDF: $PDF"
./bin/python convert.py "$PDF"
if [ $? -eq 0 ]; then
echo "Successfully converted $PDF to PNGs in $OUTPUTDIR/$FILENAME/"
else
echo "Error: Failed to convert $PDF to PNGs."
fi
else
echo "Skipping PDF: $PDF (already processed)"
fi
done
./bin/mokuro --parent-dir $OUTPUTDIR --disable_confirmation true
echo "Finished processing all PDFs."
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment