Last active
December 31, 2024 14:16
-
-
Save mikkohei13/4566d80bb3b2bf03bc28a12537c86054 to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# Python script that creates a CSV file with image file paths and their corresponding categories | |
# Should work on both Linux & Windows, and with unicode filenames | |
''' | |
Input format: | |
directory;category | |
./lepidoptera/adult;adult | |
./lepidoptera/adult_specimen;adult_specimen | |
./lepidoptera/egg;egg | |
''' | |
import os | |
import csv | |
import sys | |
from pathlib import Path | |
# Base directory | |
BASE_DIR = "./insect_images" | |
# Input and output CSV file names | |
INPUT_CSV = "directories.csv" | |
OUTPUT_CSV = "images_with_categories.csv" | |
def validate_directory(directory_path): | |
""" | |
Check if a directory exists. If not, raise an error and exit the script. | |
""" | |
if not os.path.exists(directory_path): | |
print(f"Error: Directory '{directory_path}' does not exist.") | |
sys.exit(1) | |
def read_categories(input_csv): | |
""" | |
Read the directory-to-category mappings from the input CSV file. | |
""" | |
categories = {} | |
with open(input_csv, mode="r", encoding="utf-8") as file: | |
reader = csv.DictReader(file, delimiter=';') | |
for row in reader: | |
dir_path = os.path.join(BASE_DIR, row['directory']) | |
categories[dir_path] = row['category'] | |
return categories | |
def get_image_files(directory_path): | |
""" | |
Get all image files (jpg, jpeg) in the given directory and subdirectories. | |
""" | |
image_extensions = (".jpg", ".jpeg") | |
image_files = [] | |
# Walk through all files and subdirectories | |
for root, _, files in os.walk(directory_path): | |
for file in files: | |
if file.lower().endswith(image_extensions): | |
image_files.append(os.path.join(root, file)) | |
return image_files | |
def generate_image_category_csv(categories, output_csv): | |
""" | |
Generate a CSV file with image file paths and their corresponding categories. | |
""" | |
with open(output_csv, mode="w", newline="", encoding="utf-8") as file: | |
writer = csv.writer(file, delimiter=';') | |
writer.writerow(["filepath", "category"]) | |
for directory, category in categories.items(): | |
validate_directory(directory) # Validate each directory | |
image_files = get_image_files(directory) # Get image files | |
for image_file in image_files: | |
# Normalize path separators to forward slashes | |
normalized_path = Path(image_file).as_posix() | |
writer.writerow([normalized_path, category]) | |
def main(): | |
""" | |
Main function to orchestrate the script. | |
""" | |
# Read categories from the input CSV | |
categories = read_categories(INPUT_CSV) | |
# Generate the output CSV with images and their categories | |
generate_image_category_csv(categories, OUTPUT_CSV) | |
print(f"Image list with categories saved to '{OUTPUT_CSV}'.") | |
if __name__ == "__main__": | |
main() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment