don1138 · November 7, 2023 01:40
diff --git a/extract_metadata.py b/extract_metadata.py
 # Extract Metadata v1.3.0
 # This script reads the images in a directory and all subdirectories, copies the EXIF metadata, and saves it to a text file with the same name as the source image.
 import os
 import shlex
 from PIL import Image, ExifTags

 # Ask the user for the parent directory path and strip trailing whitespace
 PARENT_DIRECTORY_input = input("Enter the parent directory path: ").strip()

 # Use shlex to sanitize the path
 PARENT_DIRECTORY_components = shlex.split(PARENT_DIRECTORY_input)
 PARENT_DIRECTORY = ' '.join(PARENT_DIRECTORY_components)

 # Define the metadata tags to exclude
 EXCLUDE_TAGS = ["ExifOffset"]

 # Initialize counters for written and skipped files
 files_written = 0
 files_skipped = 0

 def extract_metadata(image_path):
    try:
        image = Image.open(image_path)
        exif_data = image._getexif()
        if exif_data:
            metadata = {}
            for tag, value in exif_data.items():
                tag_name = ExifTags.TAGS.get(tag, tag)
                metadata[tag_name] = value
            return metadata
        else:
            return {}
    except Exception as e:
        print(f"Error extracting metadata from {image_path}: {e}")
        return {}

 def save_metadata_to_txt(image_path, metadata):
    global files_written, files_skipped  # Access global counters

    if not metadata:
        print(f"× No data found in {image_path} -- skipped")
        files_skipped += 1  # Increment the skipped files counter
        return  # Skip writing if no metadata found

    base_name = os.path.splitext(image_path)[0]
    txt_path = base_name + ".txt"

    # Check if the file already exists
    count = 1
    while os.path.exists(txt_path):
        txt_path = f"{base_name}-{count}.txt"
        count += 1

    with open(txt_path, "w", encoding="utf-8") as txt_file:
        for key, value in metadata.items():
            if key not in EXCLUDE_TAGS:  # Exclude specified tags
                if isinstance(value, bytes):
                    try:
                        value = value.decode("utf-8")
                    except UnicodeDecodeError:
                        value = value.decode("latin-1")  # Try latin-1 if utf-8 fails
                    value = value.replace("\x00", "")  # Remove null characters
                txt_file.write(f"{key}: {value}\n")
    print(f"Metadata saved to {txt_path}")
    files_written += 1  # Increment the written files counter

    # Read the file content and replace "UserComment: UNICODE" with an empty string
    with open(txt_path, "r", encoding="utf-8") as txt_file:
        content = txt_file.read()
    content = content.replace("UserComment: UNICODE", "")

    # Write back the modified content to the file
    with open(txt_path, "w", encoding="utf-8") as txt_file:
        txt_file.write(content)

 def main():
    global files_written, files_skipped  # Access global counters

    for subdir, _, files in os.walk(PARENT_DIRECTORY):
        for file in files:
            if file.lower().endswith(('.png', '.jpg', '.jpeg', '.gif')):
                image_path = os.path.join(subdir, file)
                if not file.startswith('.') and not file.startswith('._'):
                    metadata = extract_metadata(image_path)
                    save_metadata_to_txt(image_path, metadata)

    # Print out the counts of written and skipped files
    print(f"\nFiles written: {files_written}")
    print(f"Files skipped: {files_skipped}\n")

 if __name__ == "__main__":
    main()
	# Extract Metadata v1.3.0
	# This script reads the images in a directory and all subdirectories, copies the EXIF metadata, and saves it to a text file with the same name as the source image.
	import os
	import shlex
	from PIL import Image, ExifTags

	# Ask the user for the parent directory path and strip trailing whitespace
	PARENT_DIRECTORY_input = input("Enter the parent directory path: ").strip()

	# Use shlex to sanitize the path
	PARENT_DIRECTORY_components = shlex.split(PARENT_DIRECTORY_input)
	PARENT_DIRECTORY = ' '.join(PARENT_DIRECTORY_components)

	# Define the metadata tags to exclude
	EXCLUDE_TAGS = ["ExifOffset"]

	# Initialize counters for written and skipped files
	files_written = 0
	files_skipped = 0

	def extract_metadata(image_path):
	try:
	image = Image.open(image_path)
	exif_data = image._getexif()
	if exif_data:
	metadata = {}
	for tag, value in exif_data.items():
	tag_name = ExifTags.TAGS.get(tag, tag)
	metadata[tag_name] = value
	return metadata
	else:
	return {}
	except Exception as e:
	print(f"Error extracting metadata from {image_path}: {e}")
	return {}

	def save_metadata_to_txt(image_path, metadata):
	global files_written, files_skipped # Access global counters

	if not metadata:
	print(f"× No data found in {image_path} -- skipped")
	files_skipped += 1 # Increment the skipped files counter
	return # Skip writing if no metadata found

	base_name = os.path.splitext(image_path)[0]
	txt_path = base_name + ".txt"

	# Check if the file already exists
	count = 1
	while os.path.exists(txt_path):
	txt_path = f"{base_name}-{count}.txt"
	count += 1

	with open(txt_path, "w", encoding="utf-8") as txt_file:
	for key, value in metadata.items():
	if key not in EXCLUDE_TAGS: # Exclude specified tags
	if isinstance(value, bytes):
	try:
	value = value.decode("utf-8")
	except UnicodeDecodeError:
	value = value.decode("latin-1") # Try latin-1 if utf-8 fails
	value = value.replace("\x00", "") # Remove null characters
	txt_file.write(f"{key}: {value}\n")
	print(f"Metadata saved to {txt_path}")
	files_written += 1 # Increment the written files counter

	# Read the file content and replace "UserComment: UNICODE" with an empty string
	with open(txt_path, "r", encoding="utf-8") as txt_file:
	content = txt_file.read()
	content = content.replace("UserComment: UNICODE", "")

	# Write back the modified content to the file
	with open(txt_path, "w", encoding="utf-8") as txt_file:
	txt_file.write(content)

	def main():
	global files_written, files_skipped # Access global counters

	for subdir, _, files in os.walk(PARENT_DIRECTORY):
	for file in files:
	if file.lower().endswith(('.png', '.jpg', '.jpeg', '.gif')):
	image_path = os.path.join(subdir, file)
	if not file.startswith('.') and not file.startswith('._'):
	metadata = extract_metadata(image_path)
	save_metadata_to_txt(image_path, metadata)

	# Print out the counts of written and skipped files
	print(f"\nFiles written: {files_written}")
	print(f"Files skipped: {files_skipped}\n")

	if __name__ == "__main__":
	main()