Created
August 3, 2025 10:29
-
-
Save FNGarvin/2c3e43953d31955d76069cae2c2aa3ae to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env python3 | |
# -*- coding: utf-8 -*- | |
# | |
# detect_and_crop.py | |
# FNGarvin (c) 2025 | |
# | |
# This script uses a pre-trained YOLOv8 model to detect objects in an image | |
# specified as a command-line argument. For each detected object exceeding a | |
# specified confidence threshold, it generates ImageMagick 'convert' commands | |
# to crop the object. These commands are then printed to the console and | |
# optionally saved to a shell script for later review and execution. | |
# | |
# Usage: | |
# python detect_and_crop.py <image_path> [output_script_name.sh] | |
# | |
# Arguments: | |
# <image_path>: Path to the input image file. | |
# [output_script_name.sh]: Optional. If provided, the generated ImageMagick | |
# commands will be saved to this file as a shell script. | |
# If omitted, commands are only printed to stdout. | |
# | |
# Requires: | |
# - ultralytics library (pip install ultralytics) | |
# - ImageMagick (convert command) | |
# | |
import os | |
import sys | |
from collections import defaultdict | |
from ultralytics import YOLO | |
from shlex import quote as shell_quote # Import shell_quote for robust path handling | |
def detect_and_crop(image_path, output_dir="cropped_objects", confidence_threshold=0.65, script_output_path=None): | |
""" | |
Detects objects in an image using YOLO and generates ImageMagick commands | |
to crop the region of each detected object with a minimum confidence threshold. | |
Commands are printed to stdout and optionally saved to a script file. | |
Args: | |
image_path (str): Path to the input image. | |
output_dir (str): Directory where cropped images *would* be saved. Used for | |
generating the command. Defaults to "cropped_objects". | |
confidence_threshold (float): Minimum confidence score for an object to be considered. | |
Defaults to 0.65. | |
script_output_path (str, optional): Path to a shell script file where commands | |
will be written. If None, commands are only | |
printed to stdout. | |
""" | |
if not os.path.exists(image_path): | |
print(f"Error: Image file not found at '{image_path}'", file=sys.stderr) | |
sys.exit(1) | |
# Load a pre-trained YOLO model | |
model = YOLO("yolov8n.pt") | |
# Run inference on the image | |
results = model(image_path) | |
# Define the output script file handle | |
output_script_file = None | |
if script_output_path: | |
# Create output directory for cropped images if it doesn't exist *before* writing script | |
# This ensures the directory exists if the script is run later. | |
if not os.path.exists(output_dir): | |
os.makedirs(output_dir, exist_ok=True) # exist_ok=True prevents error if dir exists | |
print(f"Created output directory: {output_dir}") | |
try: | |
output_script_file = open(script_output_path, 'w') | |
output_script_file.write("#!/bin/bash\n") | |
output_script_file.write(f"# ImageMagick crop commands generated by detect_and_crop.py from {os.path.basename(image_path)}\n\n") | |
# Ensure the output directory exists when the script runs | |
output_script_file.write(f"mkdir -p {shell_quote(output_dir)}\n\n") | |
print(f"ImageMagick commands will also be saved to: {script_output_path}") | |
except IOError as e: | |
print(f"Warning: Could not open script file '{script_output_path}' for writing: {e}", file=sys.stderr) | |
output_script_file = None # Revert to printing only | |
base_name = os.path.splitext(os.path.basename(image_path))[0] | |
generated_commands_count = 0 | |
class_counters = defaultdict(int) | |
for r in results: | |
boxes = r.boxes | |
names = r.names | |
if len(boxes) == 0: | |
print(f"No objects detected in '{image_path}' with current model and settings.") | |
if output_script_file: | |
output_script_file.write(f"# No objects detected in '{image_path}' with current model and settings.\n") | |
continue | |
for i, box in enumerate(boxes): | |
confidence = float(box.conf[0]) | |
class_id = int(box.cls[0]) | |
class_name = names[class_id] | |
if confidence >= confidence_threshold: | |
# box.xywh returns [x_center, y_center, width, height] in pixels | |
x_center, y_center, width, height = map(int, box.xywh[0]) | |
# Calculate top-left coordinates for ImageMagick (-crop WxH+X+Y) | |
x1 = int(x_center - width / 2) | |
y1 = int(y_center - height / 2) | |
# Ensure coordinates are not negative | |
x1 = max(0, x1) | |
y1 = max(0, y1) | |
class_counters[class_name] += 1 | |
output_filename = f"{base_name}.{class_name}{class_counters[class_name]:03d}.jpg" | |
output_path = os.path.join(output_dir, output_filename) | |
# Construct the ImageMagick convert command | |
# Ensure input path is explicitly first, and output path is explicitly last. | |
# Use shlex.quote for robust path handling in the shell command. | |
input_quoted = shell_quote(image_path) | |
output_quoted = shell_quote(output_path) | |
crop_geometry = f"{width}x{height}+{x1}+{y1}" | |
crop_command = ( | |
f"convert {input_quoted} " | |
f"-crop \"{crop_geometry}\" " | |
f"-quality 95 " | |
f"{output_quoted}" # Output file should always be last for convert | |
) | |
print(f"Generated command for {class_name} (Confidence: {confidence:.2f}):") | |
print(f" {crop_command}\n") | |
if output_script_file: | |
output_script_file.write(f"# {class_name} (Confidence: {confidence:.2f})\n") | |
output_script_file.write(f"{crop_command}\n\n") | |
generated_commands_count += 1 | |
else: | |
print(f"Skipping {class_name} with confidence {confidence:.2f} (below threshold {confidence_threshold:.2f})\n") | |
if generated_commands_count == 0: | |
print("No ImageMagick commands were generated based on the confidence threshold.") | |
if output_script_file: | |
output_script_file.write("# No ImageMagick commands were generated based on the confidence threshold.\n") | |
else: | |
print(f"\nFinished. Total ImageMagick commands generated: {generated_commands_count}") | |
if output_script_file: | |
output_script_file.close() | |
os.chmod(script_output_path, 0o755) # Make the script executable | |
print(f"Remember to review '{script_output_path}' before executing it.") | |
if __name__ == "__main__": | |
if not (2 <= len(sys.argv) <= 3): | |
script_name = os.path.basename(sys.argv[0]) | |
print(f"Usage: python {script_name} <image_path> [output_script_name.sh]") | |
sys.exit(1) | |
image_file = sys.argv[1] | |
script_file = sys.argv[2] if len(sys.argv) == 3 else None | |
detect_and_crop(image_file, script_output_path=script_file) | |
#END OF detect_and_crop.py |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment