Last active
May 5, 2025 21:02
-
-
Save max-mapper/989cdf90ed3a849299be9bc86e6a1535 to your computer and use it in GitHub Desktop.
convert pdf to mokuro batch cli
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import sys | |
import os | |
import pyvips | |
def process_image(image_path): | |
""" | |
Processes a multi-page image, writing each page to a separate JPG file | |
in a subfolder named after the input image's basename. | |
Args: | |
image_path (str): The path to the input image file. | |
""" | |
try: | |
# Get the directory of the input image | |
image_dir = os.path.dirname(image_path) | |
if not image_dir: | |
image_dir = "." # Use current directory if path is just a filename | |
# Get the basename of the input image (without extension) | |
image_basename = os.path.splitext(os.path.basename(image_path))[0] | |
# Construct the path to the output subfolder | |
output_subfolder = os.path.join(image_dir, image_basename) | |
# Create the output subfolder if it doesn't exist | |
if not os.path.exists(output_subfolder): | |
os.makedirs(output_subfolder) | |
# Load the image | |
image = pyvips.Image.new_from_file(image_path) | |
# Iterate through the pages and write them to separate files | |
n_pages = image.get('n-pages') if 'n-pages' in image.get_fields() else 1 | |
for i in range(n_pages): | |
page_image = pyvips.Image.new_from_file(image_path, page=i) | |
output_file_path = os.path.join(output_subfolder, f"page-{i:03}.jpg") | |
page_image.write_to_file(output_file_path) | |
print(f"Wrote page {i+1} to {output_file_path}") | |
print("Image processing complete.") | |
except Exception as e: | |
print(f"Error processing image: {e}") | |
sys.exit(1) | |
if __name__ == "__main__": | |
if len(sys.argv) != 2: | |
print("Usage: python process_image.py <image_file_path>") | |
sys.exit(1) | |
image_file_path = sys.argv[1] | |
process_image(image_file_path) |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/bin/bash | |
# Check if the input directory is provided as an argument | |
if [ -z "$1" ]; then | |
echo "Error: Input directory is required as the first argument." | |
echo "Usage: $0 <input_directory>" | |
exit 1 | |
fi | |
INPUTDIR="$1" # Set INPUTDIR from the first argument | |
# Set the output directory (where the PNGs will be saved) | |
OUTPUTDIR="outputdir" | |
# Check if the input and output directories exist | |
if [ ! -d "$INPUTDIR" ]; then | |
echo "Error: Input directory '$INPUTDIR' does not exist." | |
exit 1 | |
fi | |
if [ ! -d "$OUTPUTDIR" ]; then | |
echo "Error: Output directory '$OUTPUTDIR' does not exist." | |
exit 1 | |
fi | |
# Loop through all PDF files in the input directory | |
find $INPUTDIR -type f -name "*.pdf" -print0 | while IFS= read -r -d '' PDF; do | |
# Get the filename without the extension | |
FILENAME=$(basename "$PDF" .pdf) | |
# Check if the output directory already contains any PNG files with the same base name | |
if [ ! -f "$OUTPUTDIR/$FILENAME/output001.png" ]; then #check only for the first page. | |
echo "Processing PDF: $PDF" | |
./bin/python convert.py "$PDF" | |
if [ $? -eq 0 ]; then | |
echo "Successfully converted $PDF to PNGs in $OUTPUTDIR/$FILENAME/" | |
else | |
echo "Error: Failed to convert $PDF to PNGs." | |
fi | |
else | |
echo "Skipping PDF: $PDF (already processed)" | |
fi | |
done | |
./bin/mokuro --parent-dir $OUTPUTDIR --disable_confirmation true | |
echo "Finished processing all PDFs." |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment