Created
October 4, 2024 19:33
-
-
Save dreness/0676628bafe2b38fec5de0337e31fd43 to your computer and use it in GitHub Desktop.
Create browsable lists of your photos ordered by score for various kinds of image analysis that Photos.app already does
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# #!/usr/bin/env python | |
import sqlite3 | |
import pandas as pd | |
import os | |
from glob import glob | |
""" | |
This script reads a Photos SQLite database to produce views of the top scored assets in each category. | |
""" | |
ALLOWED_EXTENSIONS = {'.png', '.jpg', '.jpeg', '.heic', '.heif', '.tiff', '.gif'} | |
SKIP_TABLES = {'ZDETECTIONTRAIT', 'ZMEMORY', 'ZFAILURESCORE', 'ZMOMENT', | |
'ZQUESTION', 'ZVISUALSEARCHATTRIBUTES', 'ZDETECTEDFACE', | |
'ZNOISESCORE', 'ZPHOTOSHIGHLIGHT', 'ZINTRUSIVEOBJECTPRESENCESCORE'} | |
# Connect to the SQLite database and retrieve score columns | |
def get_score_columns(conn): | |
query = "SELECT name FROM sqlite_master WHERE type='table';" | |
tables = pd.read_sql_query(query, conn) | |
score_info = {} | |
for table_name in tables['name']: | |
if table_name in SKIP_TABLES: | |
continue | |
columns = pd.read_sql_query(f"PRAGMA table_info({table_name})", conn) | |
score_columns = columns[columns['name'].str.contains('SCORE', case=False)]['name'].tolist() | |
if score_columns: | |
score_info[table_name] = score_columns | |
return score_info | |
# Extract assets with SCORE values and filenames | |
def get_assets_with_score_values(conn, score_columns): | |
asset_filenames = pd.read_sql_query("SELECT Z_PK, ZFILENAME FROM ZASSET", conn).set_index('Z_PK')['ZFILENAME'] | |
assets_data = [] | |
for table, columns in score_columns.items(): | |
asset_id_column = 'Z_PK' if table == "ZASSET" else "ZASSET" | |
for column in columns: | |
query = f"SELECT {asset_id_column}, {column} FROM {table} WHERE {column} IS NOT NULL" | |
df = pd.read_sql_query(query, conn) | |
df = df.rename(columns={asset_id_column: 'AssetID', column: f"{table}_{column}"}) | |
assets_data.append(df.set_index('AssetID')) | |
df_assets = pd.concat(assets_data, axis=1).reset_index() | |
df_assets['Filename'] = df_assets['AssetID'].map(asset_filenames.to_dict()).fillna('Unknown') | |
return df_assets.fillna(0) | |
def process_photo_scores(db_path, output_csv_path): | |
with sqlite3.connect(db_path) as conn: | |
score_columns = get_score_columns(conn) | |
df_assets = get_assets_with_score_values(conn, score_columns) | |
df_assets.to_csv(output_csv_path, index=False) | |
def has_allowed_extension(filename): | |
return any(filename.lower().endswith(ext) for ext in ALLOWED_EXTENSIONS) | |
def create_ordered_symlinks_for_scored_assets(originals_dir, symlinks_dir, assets_df, top_n=50): | |
os.makedirs(symlinks_dir, exist_ok=True) | |
score_cols = [col for col in assets_df.columns if 'SCORE' in col] | |
for score_col in score_cols: | |
print(f"\nProcessing {score_col}") | |
table_dir = os.path.join(symlinks_dir, score_col) | |
os.makedirs(table_dir, exist_ok=True) | |
sorted_assets = assets_df[assets_df['Filename'].apply(has_allowed_extension)].sort_values(by=score_col, ascending=False).head(top_n) | |
for i, asset in sorted_assets.iterrows(): | |
basename = os.path.splitext(asset['Filename'])[0] | |
source_path = glob(os.path.join(originals_dir, basename[0], f"{basename}*")) | |
if source_path: | |
dest_path = os.path.join(table_dir, f"{i + 1}_{asset['Filename']}") | |
try: | |
os.symlink(source_path[0], dest_path) | |
print(".", end="") | |
if i + 1 == top_n: | |
print("x", end="") | |
continue | |
except FileExistsError: | |
pass # Ignore if the symlink already exists | |
def main(): | |
# Queries the Photos database; writes a CSV file with asset scores | |
process_photo_scores('/Users/andre/Desktop/Photos copy.sqlite', 'assets_with_scores.csv') | |
# let's go pandas | |
assets_with_scores = pd.read_csv('assets_with_scores.csv') | |
# Create folders of symbolic links for top scored assets in each category | |
create_ordered_symlinks_for_scored_assets( | |
'/Users/andre/Pictures/Photos Library.photoslibrary/resources/derivatives', | |
'/Users/andre/Desktop/Scored_Assets', | |
assets_with_scores, | |
top_n=30 | |
) | |
if __name__ == "__main__": | |
main() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment