simonsan · January 4, 2025 13:10
diff --git a/ocrs.sh b/ocrs.sh
 #!/bin/bash

 # Function to display usage
 usage() {
    echo "Usage: $0 [-r|--recursive] [-o|--output-dir <directory>] <file|directory>"
    echo "Options:"
    echo "  -r, --recursive      Process images in subdirectories (only applies to directories)"
    echo "  -o, --output-dir     Directory to store all OCR text files (default: same as image)"
    exit 1
 }

 # Check if ocrs is available
 if ! command -v ocrs >/dev/null 2>&1; then
    echo "Error: 'ocrs' command not found"
    echo "Please install it using: cargo install ocrs-cli"
    echo "If you don't have cargo installed, first install Rust from https://rustup.rs"
    exit 1
 fi

 # Initialize variables
 recursive=false
 INPUT=""
 OUTPUT_DIR=""

 # Parse command line arguments
 while [[ $# -gt 0 ]]; do
    case $1 in
        -r|--recursive)
            recursive=true
            shift
            ;;
        -o|--output-dir)
            OUTPUT_DIR="$2"
            shift 2
            ;;
        *)
            if [ -z "$INPUT" ]; then
                INPUT="$1"
            else
                usage
            fi
            shift
            ;;
    esac
 done

 # Check if input is provided
 if [ -z "$INPUT" ]; then
    usage
 fi

 # Check if input exists
 if [ ! -e "$INPUT" ]; then
    echo "Error: '$INPUT' does not exist"
    exit 1
 fi

 # Check and create output directory if specified
 if [ ! -z "$OUTPUT_DIR" ]; then
    if [ ! -d "$OUTPUT_DIR" ]; then
        mkdir -p "$OUTPUT_DIR" || {
            echo "Error: Could not create output directory '$OUTPUT_DIR'"
            exit 1
        }
    fi
 fi

 # Initialize counters
 processed=0
 skipped=0
 errors=0

 # Function to get output path for text file
 get_output_path() {
    local img="$1"
    local img_abs_path=$(realpath "$img")
    
    if [ -z "$OUTPUT_DIR" ]; then
        echo "${img}.txt"
    else
        # Create a filename based on the full path of the image
        local rel_path
        if [ -d "$INPUT" ]; then
            rel_path=$(realpath --relative-to="$INPUT" "$img")
        else
            rel_path=$(basename "$img")
        fi
        local filename=$(echo "$rel_path" | tr '/' '_')
        echo "$OUTPUT_DIR/${filename}.txt"
    fi
 }

 # Function to process a single image
 process_image() {
    local img="$1"
    local out_file=$(get_output_path "$img")
    local img_abs_path=$(realpath "$img")
    
    if [ -f "$out_file" ]; then
        echo "Skipping: $img (output file already exists)"
        echo "s" >> /tmp/ocr_progress
    else
        echo "Processing: $img"
        if output=$(ocrs "$img" 2>&1); then
            if [ ! -z "$OUTPUT_DIR" ]; then
                # Add the original file path and the OCR output to the file
                echo "Original file: $img_abs_path" > "$out_file"
                echo "" >> "$out_file"
                echo "$output" >> "$out_file"
            else
                # Write output to file next to image
                echo "$output" > "$out_file"
            fi
            echo "p" >> /tmp/ocr_progress
        else
            echo "Error processing: $img"
            echo "$output"
            echo "e" >> /tmp/ocr_progress
            # Clean up the potentially empty or corrupted output file
            rm -f "$out_file"
        fi
    fi
 }

 # Function to check if file is an image
 is_image() {
    local file="$1"
    case "$(lowercase "${file##*.}")" in
        jpg|jpeg|png|tiff|bmp) return 0 ;;
        *) return 1 ;;
    esac
 }

 # Convert string to lowercase
 lowercase() {
    echo "$1" | tr '[:upper:]' '[:lower:]'
 }

 # Function to process images in directory
 process_directory() {
    local directory="$1"
    local types=("jpg" "jpeg" "png" "tiff" "bmp")
    
    for type in "${types[@]}"; do
        while IFS= read -r -d '' img; do
            process_image "$img"
        done < <(find "$directory" -maxdepth 1 -type f -iname "*.$type" -print0)
    done
    
    # Process subdirectories if recursive flag is set
    if [ "$recursive" = true ]; then
        while IFS= read -r -d '' subdir; do
            process_directory "$subdir"
        done < <(find "$directory" -mindepth 1 -type d -print0)
    fi
 }

 # Create temporary file for counting
 rm -f /tmp/ocr_progress
 touch /tmp/ocr_progress

 # Start processing
 echo "Starting OCR processing..."
 if [ -f "$INPUT" ]; then
    if is_image "$INPUT"; then
        process_image "$INPUT"
    else
        echo "Error: '$INPUT' is not a supported image file"
        exit 1
    fi
 elif [ -d "$INPUT" ]; then
    echo "Directory: $INPUT"
    echo "Recursive mode: $recursive"
    process_directory "$INPUT"
 else
    echo "Error: '$INPUT' is neither a file nor a directory"
    exit 1
 fi

 if [ ! -z "$OUTPUT_DIR" ]; then
    echo "Output directory: $OUTPUT_DIR"
 fi
 echo "-------------------"

 # Count final results
 processed=$(grep -c "p" /tmp/ocr_progress)
 skipped=$(grep -c "s" /tmp/ocr_progress)
 errors=$(grep -c "e" /tmp/ocr_progress)
 rm -f /tmp/ocr_progress

 echo "-------------------"
 echo "Processing complete!"
 echo "Files processed: $processed"
 echo "Files skipped: $skipped"
 echo "Files failed: $errors"
	#!/bin/bash

	# Function to display usage
	usage() {
	echo "Usage: $0 [-r\|--recursive] [-o\|--output-dir <directory>] <file\|directory>"
	echo "Options:"
	echo " -r, --recursive Process images in subdirectories (only applies to directories)"
	echo " -o, --output-dir Directory to store all OCR text files (default: same as image)"
	exit 1
	}

	# Check if ocrs is available
	if ! command -v ocrs >/dev/null 2>&1; then
	echo "Error: 'ocrs' command not found"
	echo "Please install it using: cargo install ocrs-cli"
	echo "If you don't have cargo installed, first install Rust from https://rustup.rs"
	exit 1
	fi

	# Initialize variables
	recursive=false
	INPUT=""
	OUTPUT_DIR=""

	# Parse command line arguments
	while [[ $# -gt 0 ]]; do
	case $1 in
	-r\|--recursive)
	recursive=true
	shift
	;;
	-o\|--output-dir)
	OUTPUT_DIR="$2"
	shift 2
	;;
	*)
	if [ -z "$INPUT" ]; then
	INPUT="$1"
	else
	usage
	fi
	shift
	;;
	esac
	done

	# Check if input is provided
	if [ -z "$INPUT" ]; then
	usage
	fi

	# Check if input exists
	if [ ! -e "$INPUT" ]; then
	echo "Error: '$INPUT' does not exist"
	exit 1
	fi

	# Check and create output directory if specified
	if [ ! -z "$OUTPUT_DIR" ]; then
	if [ ! -d "$OUTPUT_DIR" ]; then
	mkdir -p "$OUTPUT_DIR" \|\| {
	echo "Error: Could not create output directory '$OUTPUT_DIR'"
	exit 1
	}
	fi
	fi

	# Initialize counters
	processed=0
	skipped=0
	errors=0

	# Function to get output path for text file
	get_output_path() {
	local img="$1"
	local img_abs_path=$(realpath "$img")

	if [ -z "$OUTPUT_DIR" ]; then
	echo "${img}.txt"
	else
	# Create a filename based on the full path of the image
	local rel_path
	if [ -d "$INPUT" ]; then
	rel_path=$(realpath --relative-to="$INPUT" "$img")
	else
	rel_path=$(basename "$img")
	fi
	local filename=$(echo "$rel_path" \| tr '/' '_')
	echo "$OUTPUT_DIR/${filename}.txt"
	fi
	}

	# Function to process a single image
	process_image() {
	local img="$1"
	local out_file=$(get_output_path "$img")
	local img_abs_path=$(realpath "$img")

	if [ -f "$out_file" ]; then
	echo "Skipping: $img (output file already exists)"
	echo "s" >> /tmp/ocr_progress
	else
	echo "Processing: $img"
	if output=$(ocrs "$img" 2>&1); then
	if [ ! -z "$OUTPUT_DIR" ]; then
	# Add the original file path and the OCR output to the file
	echo "Original file: $img_abs_path" > "$out_file"
	echo "" >> "$out_file"
	echo "$output" >> "$out_file"
	else
	# Write output to file next to image
	echo "$output" > "$out_file"
	fi
	echo "p" >> /tmp/ocr_progress
	else
	echo "Error processing: $img"
	echo "$output"
	echo "e" >> /tmp/ocr_progress
	# Clean up the potentially empty or corrupted output file
	rm -f "$out_file"
	fi
	fi
	}

	# Function to check if file is an image
	is_image() {
	local file="$1"
	case "$(lowercase "${file##*.}")" in
	jpg\|jpeg\|png\|tiff\|bmp) return 0 ;;
	*) return 1 ;;
	esac
	}

	# Convert string to lowercase
	lowercase() {
	echo "$1" \| tr '[:upper:]' '[:lower:]'
	}

	# Function to process images in directory
	process_directory() {
	local directory="$1"
	local types=("jpg" "jpeg" "png" "tiff" "bmp")

	for type in "${types[@]}"; do
	while IFS= read -r -d '' img; do
	process_image "$img"
	done < <(find "$directory" -maxdepth 1 -type f -iname "*.$type" -print0)
	done

	# Process subdirectories if recursive flag is set
	if [ "$recursive" = true ]; then
	while IFS= read -r -d '' subdir; do
	process_directory "$subdir"
	done < <(find "$directory" -mindepth 1 -type d -print0)
	fi
	}

	# Create temporary file for counting
	rm -f /tmp/ocr_progress
	touch /tmp/ocr_progress

	# Start processing
	echo "Starting OCR processing..."
	if [ -f "$INPUT" ]; then
	if is_image "$INPUT"; then
	process_image "$INPUT"
	else
	echo "Error: '$INPUT' is not a supported image file"
	exit 1
	fi
	elif [ -d "$INPUT" ]; then
	echo "Directory: $INPUT"
	echo "Recursive mode: $recursive"
	process_directory "$INPUT"
	else
	echo "Error: '$INPUT' is neither a file nor a directory"
	exit 1
	fi

	if [ ! -z "$OUTPUT_DIR" ]; then
	echo "Output directory: $OUTPUT_DIR"
	fi
	echo "-------------------"

	# Count final results
	processed=$(grep -c "p" /tmp/ocr_progress)
	skipped=$(grep -c "s" /tmp/ocr_progress)
	errors=$(grep -c "e" /tmp/ocr_progress)
	rm -f /tmp/ocr_progress

	echo "-------------------"
	echo "Processing complete!"
	echo "Files processed: $processed"
	echo "Files skipped: $skipped"
	echo "Files failed: $errors"
No results found