max-mapper · May 5, 2025 21:02
diff --git a/convert.py b/convert.py
 import sys
 import os
 import pyvips

 def process_image(image_path):
    """
    Processes a multi-page image, writing each page to a separate JPG file
    in a subfolder named after the input image's basename.

    Args:
        image_path (str): The path to the input image file.
    """
    try:
        # Get the directory of the input image
        image_dir = os.path.dirname(image_path)
        if not image_dir:
            image_dir = "."  # Use current directory if path is just a filename

        # Get the basename of the input image (without extension)
        image_basename = os.path.splitext(os.path.basename(image_path))[0]

        # Construct the path to the output subfolder
        output_subfolder = os.path.join(image_dir, image_basename)

        # Create the output subfolder if it doesn't exist
        if not os.path.exists(output_subfolder):
            os.makedirs(output_subfolder)

        # Load the image
        image = pyvips.Image.new_from_file(image_path)

        # Iterate through the pages and write them to separate files
        n_pages = image.get('n-pages') if 'n-pages' in image.get_fields() else 1
        for i in range(n_pages):
            page_image = pyvips.Image.new_from_file(image_path, page=i)
            output_file_path = os.path.join(output_subfolder, f"page-{i:03}.jpg")
            page_image.write_to_file(output_file_path)
            print(f"Wrote page {i+1} to {output_file_path}")

        print("Image processing complete.")

    except Exception as e:
        print(f"Error processing image: {e}")
        sys.exit(1)


 if __name__ == "__main__":
    if len(sys.argv) != 2:
        print("Usage: python process_image.py <image_file_path>")
        sys.exit(1)

    image_file_path = sys.argv[1]
    process_image(image_file_path)
diff --git a/convert.sh b/convert.sh
 #!/bin/bash

 # Check if the input directory is provided as an argument
 if [ -z "$1" ]; then
  echo "Error: Input directory is required as the first argument."
  echo "Usage: $0 <input_directory>"
  exit 1
 fi

 INPUTDIR="$1" # Set INPUTDIR from the first argument

 # Set the output directory (where the PNGs will be saved)
 OUTPUTDIR="outputdir"

 # Check if the input and output directories exist
 if [ ! -d "$INPUTDIR" ]; then
  echo "Error: Input directory '$INPUTDIR' does not exist."
  exit 1
 fi
 if [ ! -d "$OUTPUTDIR" ]; then
  echo "Error: Output directory '$OUTPUTDIR' does not exist."
  exit 1
 fi

 # Loop through all PDF files in the input directory
 find $INPUTDIR -type f -name "*.pdf" -print0 | while IFS= read -r -d '' PDF; do
  # Get the filename without the extension
  FILENAME=$(basename "$PDF" .pdf)
  # Check if the output directory already contains any PNG files with the same base name
  if [ ! -f "$OUTPUTDIR/$FILENAME/output001.png" ]; then #check only for the first page.
    echo "Processing PDF: $PDF"

    ./bin/python convert.py "$PDF"
    
    if [ $? -eq 0 ]; then
      echo "Successfully converted $PDF to PNGs in $OUTPUTDIR/$FILENAME/"
    else
      echo "Error: Failed to convert $PDF to PNGs."
    fi
  else
    echo "Skipping PDF: $PDF (already processed)"
  fi
 done

 ./bin/mokuro --parent-dir $OUTPUTDIR --disable_confirmation true

 echo "Finished processing all PDFs."
	import sys
	import os
	import pyvips

	def process_image(image_path):
	"""
	Processes a multi-page image, writing each page to a separate JPG file
	in a subfolder named after the input image's basename.

	Args:
	image_path (str): The path to the input image file.
	"""
	try:
	# Get the directory of the input image
	image_dir = os.path.dirname(image_path)
	if not image_dir:
	image_dir = "." # Use current directory if path is just a filename

	# Get the basename of the input image (without extension)
	image_basename = os.path.splitext(os.path.basename(image_path))[0]

	# Construct the path to the output subfolder
	output_subfolder = os.path.join(image_dir, image_basename)

	# Create the output subfolder if it doesn't exist
	if not os.path.exists(output_subfolder):
	os.makedirs(output_subfolder)

	# Load the image
	image = pyvips.Image.new_from_file(image_path)

	# Iterate through the pages and write them to separate files
	n_pages = image.get('n-pages') if 'n-pages' in image.get_fields() else 1
	for i in range(n_pages):
	page_image = pyvips.Image.new_from_file(image_path, page=i)
	output_file_path = os.path.join(output_subfolder, f"page-{i:03}.jpg")
	page_image.write_to_file(output_file_path)
	print(f"Wrote page {i+1} to {output_file_path}")

	print("Image processing complete.")

	except Exception as e:
	print(f"Error processing image: {e}")
	sys.exit(1)


	if __name__ == "__main__":
	if len(sys.argv) != 2:
	print("Usage: python process_image.py <image_file_path>")
	sys.exit(1)

	image_file_path = sys.argv[1]
	process_image(image_file_path)
	#!/bin/bash

	# Check if the input directory is provided as an argument
	if [ -z "$1" ]; then
	echo "Error: Input directory is required as the first argument."
	echo "Usage: $0 <input_directory>"
	exit 1
	fi

	INPUTDIR="$1" # Set INPUTDIR from the first argument

	# Set the output directory (where the PNGs will be saved)
	OUTPUTDIR="outputdir"

	# Check if the input and output directories exist
	if [ ! -d "$INPUTDIR" ]; then
	echo "Error: Input directory '$INPUTDIR' does not exist."
	exit 1
	fi
	if [ ! -d "$OUTPUTDIR" ]; then
	echo "Error: Output directory '$OUTPUTDIR' does not exist."
	exit 1
	fi

	# Loop through all PDF files in the input directory
	find $INPUTDIR -type f -name "*.pdf" -print0 \| while IFS= read -r -d '' PDF; do
	# Get the filename without the extension
	FILENAME=$(basename "$PDF" .pdf)
	# Check if the output directory already contains any PNG files with the same base name
	if [ ! -f "$OUTPUTDIR/$FILENAME/output001.png" ]; then #check only for the first page.
	echo "Processing PDF: $PDF"

	./bin/python convert.py "$PDF"

	if [ $? -eq 0 ]; then
	echo "Successfully converted $PDF to PNGs in $OUTPUTDIR/$FILENAME/"
	else
	echo "Error: Failed to convert $PDF to PNGs."
	fi
	else
	echo "Skipping PDF: $PDF (already processed)"
	fi
	done

	./bin/mokuro --parent-dir $OUTPUTDIR --disable_confirmation true

	echo "Finished processing all PDFs."