Last active
November 7, 2023 01:40
-
-
Save don1138/7dcf338f27614c01bf0f6bcc3bbbc74e to your computer and use it in GitHub Desktop.
Extract metadata from image files
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# Extract Metadata v1.3.0 | |
# This script reads the images in a directory and all subdirectories, copies the EXIF metadata, and saves it to a text file with the same name as the source image. | |
import os | |
import shlex | |
from PIL import Image, ExifTags | |
# Ask the user for the parent directory path and strip trailing whitespace | |
PARENT_DIRECTORY_input = input("Enter the parent directory path: ").strip() | |
# Use shlex to sanitize the path | |
PARENT_DIRECTORY_components = shlex.split(PARENT_DIRECTORY_input) | |
PARENT_DIRECTORY = ' '.join(PARENT_DIRECTORY_components) | |
# Define the metadata tags to exclude | |
EXCLUDE_TAGS = ["ExifOffset"] | |
# Initialize counters for written and skipped files | |
files_written = 0 | |
files_skipped = 0 | |
def extract_metadata(image_path): | |
try: | |
image = Image.open(image_path) | |
exif_data = image._getexif() | |
if exif_data: | |
metadata = {} | |
for tag, value in exif_data.items(): | |
tag_name = ExifTags.TAGS.get(tag, tag) | |
metadata[tag_name] = value | |
return metadata | |
else: | |
return {} | |
except Exception as e: | |
print(f"Error extracting metadata from {image_path}: {e}") | |
return {} | |
def save_metadata_to_txt(image_path, metadata): | |
global files_written, files_skipped # Access global counters | |
if not metadata: | |
print(f"× No data found in {image_path} -- skipped") | |
files_skipped += 1 # Increment the skipped files counter | |
return # Skip writing if no metadata found | |
base_name = os.path.splitext(image_path)[0] | |
txt_path = base_name + ".txt" | |
# Check if the file already exists | |
count = 1 | |
while os.path.exists(txt_path): | |
txt_path = f"{base_name}-{count}.txt" | |
count += 1 | |
with open(txt_path, "w", encoding="utf-8") as txt_file: | |
for key, value in metadata.items(): | |
if key not in EXCLUDE_TAGS: # Exclude specified tags | |
if isinstance(value, bytes): | |
try: | |
value = value.decode("utf-8") | |
except UnicodeDecodeError: | |
value = value.decode("latin-1") # Try latin-1 if utf-8 fails | |
value = value.replace("\x00", "") # Remove null characters | |
txt_file.write(f"{key}: {value}\n") | |
print(f"Metadata saved to {txt_path}") | |
files_written += 1 # Increment the written files counter | |
# Read the file content and replace "UserComment: UNICODE" with an empty string | |
with open(txt_path, "r", encoding="utf-8") as txt_file: | |
content = txt_file.read() | |
content = content.replace("UserComment: UNICODE", "") | |
# Write back the modified content to the file | |
with open(txt_path, "w", encoding="utf-8") as txt_file: | |
txt_file.write(content) | |
def main(): | |
global files_written, files_skipped # Access global counters | |
for subdir, _, files in os.walk(PARENT_DIRECTORY): | |
for file in files: | |
if file.lower().endswith(('.png', '.jpg', '.jpeg', '.gif')): | |
image_path = os.path.join(subdir, file) | |
if not file.startswith('.') and not file.startswith('._'): | |
metadata = extract_metadata(image_path) | |
save_metadata_to_txt(image_path, metadata) | |
# Print out the counts of written and skipped files | |
print(f"\nFiles written: {files_written}") | |
print(f"Files skipped: {files_skipped}\n") | |
if __name__ == "__main__": | |
main() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment