Skip to content

Instantly share code, notes, and snippets.

@itzmeanjan
Last active August 29, 2025 17:10
Show Gist options
  • Save itzmeanjan/ba8ab2a8f340ff5a5107c711744ea7c7 to your computer and use it in GitHub Desktop.
Save itzmeanjan/ba8ab2a8f340ff5a5107c711744ea7c7 to your computer and use it in GitHub Desktop.
Using Content Addressable Identifier (CID) for Fun and Profit
blake3==1.0.5
pillow==11.3.0
#!/usr/bin/python
"""
Simple Python script for sorting images (mostly wallpapers) into any of the two buckets
- wide
- narrow
It's done based on form-factor of the image. I needed this script after I downloaded
a lot of wallpapers, from various sources listed on Reddit thread https://www.reddit.com/r/hyprland/comments/1n1r6bw/where_do_you_guys_go_for_wallpapers
I found an issue, most of these wallpapers had conflicting names and some popular wallpapers were repetitive.
Though I needed only unique wallpapers and as I was sorting them into only two buckets, I needed names to be collision-free.
Hence introduction of the pseudo-random 32 -bytes (64 hex characters) wide wallpaper file name, which is nothing but 32-bytes
BLAKE3 digest of image file content - this is both content-addressable and collision-free. This helped me in getting rid of duplicate wallpapers.
More on content-addressable identifiers @ https://github.com/ipfs/ipfs-docs/blob/5d7b1bbdbd63b1711dd9dcae03a8b5fb235d7aaf/docs/concepts/content-addressing.md
"""
from PIL import Image
import os
import shutil
import sys
import blake3
from pathlib import Path
PSEUDO_RANDOM_FILE_NAME_BYTE_LENGTH = 32
TARGET_DIRECTORY_FOR_WIDE_IMAGES = './wide'
TARGET_DIRECTORY_FOR_NARROW_IMAGES = './narrow'
# Following https://stackoverflow.com/a/58920124
Image.MAX_IMAGE_PIXELS = None
def compute_blake3_hash_of_file(file_path: Path) -> str:
hasher = blake3.blake3()
with open(file_path, 'rb') as fd:
hasher.update(fd.read())
return hasher.hexdigest(PSEUDO_RANDOM_FILE_NAME_BYTE_LENGTH)
def is_image_wide(image_path: str) -> bool:
img = Image.open(image_path)
(width, height) = img.size
return width > height
def find_image_files(dir_path: str) -> set[str]:
image_extensions = ('.jpg', '.jpeg', '.png', '.gif', '.bmp', '.tiff', '.webp', '.svg')
found_images = set()
for root, _, files in os.walk(dir_path):
for filename in files:
if filename.lower().endswith(image_extensions):
full_path = os.path.join(root, filename)
found_images.add(full_path)
return found_images
def extract_wide_and_narrow_images(image_paths: set[str]) -> tuple[set[str], set[str]]:
wide_images = set(filter(lambda image_path: is_image_wide(image_path), image_paths))
narrow_images = image_paths.difference(wide_images)
return (wide_images, narrow_images)
def move_images(images: set[str], target_dir: str):
os.makedirs(target_dir, exist_ok=True)
for image in images:
try:
image_path = Path(image)
pseudo_random_file_name = compute_blake3_hash_of_file(image_path) + image_path.suffix
pseudo_random_file_path = Path(target_dir).joinpath(pseudo_random_file_name)
if pseudo_random_file_path.exists():
continue
shutil.move(image_path, pseudo_random_file_path)
except Exception as e:
print(f"Error: {e}")
def main():
if len(sys.argv) != 2:
print(f"Expecting path to directory of source images")
exit(1)
source_dir_path = sys.argv[1]
if not os.path.exists(source_dir_path):
print(f"Source directory '{source_dir_path}' doesn't exist")
exit(1)
print(f"Will find images in {os.path.abspath(source_dir_path)}")
print(f"Will put wide images in {os.path.abspath(TARGET_DIRECTORY_FOR_WIDE_IMAGES)}")
print(f"Will put narrow images in {os.path.abspath(TARGET_DIRECTORY_FOR_NARROW_IMAGES)}")
answer = input(f"Go? (y/n): ").lower().strip()
if answer in ('y', 'yes'):
images = find_image_files(source_dir_path)
if len(images) == 0:
print("No images found!")
exit(0)
print(f"Found total {len(images)} images")
(wide, narrow) = extract_wide_and_narrow_images(images)
print(f"{len(wide)}/{len(images)} images are of wide form-factor")
print(f"{len(narrow)}/{len(images)} images are of narrow form-factor")
move_images(wide, TARGET_DIRECTORY_FOR_WIDE_IMAGES)
print(f"Moved all wide images to target directory")
move_images(narrow, TARGET_DIRECTORY_FOR_NARROW_IMAGES)
print(f"Moved all narrow images to target directory")
exit(0)
elif answer in ('n', 'no'):
print("Ok")
exit(0)
else:
print("Invalid input. Please enter 'y', 'yes', 'n', or 'no'.")
exit(1)
if __name__ == '__main__':
main()
@itzmeanjan
Copy link
Author

HOW TO USE?

git clone https://gist.github.com/ba8ab2a8f340ff5a5107c711744ea7c7.git
pushd ba8ab2a8f340ff5a5107c711744ea7c7

python -m venv .venv
source .venv/bin/activate
pip install -r requirements.txt

# It will create two directories in current working directory `wide` and `narrow`.
# Then it will recursively find all images (with well-known file extensions)
# in wallpaper-collection directory, put them into either bucket (actually directory), 
# while renaming each wallpaper to its corresponding BLAKE3 digest, preserving file extension.
python sort_images.py path/to/wallpaper-collection

deactivate
popd

This gist accompanies my blog post @ https://itzmeanjan.in/pages/using-cid-for-fun-and-profit.html

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment