Created
March 24, 2025 20:10
-
-
Save Querela/c24b1b7d3ce520df6ae6c667151942af to your computer and use it in GitHub Desktop.
[python][EXIF] EXIF aggregation statistics reporter
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
""" | |
EXIF statistics script. | |
Will use a JSON dump of EXIF data exteacted via `exiftool`, group vales based on various criteria and plot the results. | |
Extract (and cache) EXIF data from images files: | |
$ exiftool -json -g -n images/DSCF*.JPG > exif-data.json | |
Run statistics: | |
$ python3 stats.py < exif-data.json | |
Or directly: | |
$ exiftool -json -g -n images/DSCF*.JPG | python3 stats.py | |
""" | |
import sys | |
import json | |
import math | |
import collections | |
import itertools | |
C = "█" | |
CS = " ▏▎▎▍▌▌▋▊▉" | |
def extract(data, field, bins=None): | |
# sanity check | |
data = [e for e in data if "EXIF" in e] | |
#if field != "LensModel": | |
# data = [ | |
# e for e in data | |
# if "LensModel" in e["EXIF"] and e["EXIF"]["LensModel"] == "XF16-80mmF4 R OIS WR" | |
# ] | |
# extract target value | |
values = [e["EXIF"][field] for e in data if field in e["EXIF"]] | |
# binning of values | |
norms = bin_values(values, bins) | |
return norms | |
def deduplicate(values): | |
seen = set() | |
def is_unseen_with_sideeffect(value): | |
key = value["EXIF"]["DateTimeOriginal"] | |
if key in seen: | |
return False | |
else: | |
seen.add(key) | |
return True | |
total_all = len(data) | |
filtered = list(filter(is_unseen_with_sideeffect, values)) | |
total_filtered = len(filtered) | |
if total_filtered != total_all: | |
print(f"Filtered unique: {total_all} to {total_filtered}") | |
return filtered | |
def bin_values(values, bins=None): | |
# binning of values | |
if not bins: | |
return values | |
if not values or not isinstance(values[0], (int, float)): | |
return values | |
return [min([(abs(binv - value), binv) for binv in bins])[1] for value in values] | |
def group_data(data, field): | |
# sanity check | |
data = [e for e in data if "EXIF" in e] | |
keyfunc = lambda x: x["EXIF"][field] | |
data = sorted(data, key=keyfunc) | |
grouped = {key: list(values) for key, values in itertools.groupby(data, keyfunc)} | |
return grouped | |
def plot(values, bar_width=50, sortfunc=None, min_width_key=7, min_width_num=5): | |
# stats | |
cnt = collections.Counter(values) | |
if len(cnt.keys()) == 1: | |
return | |
# visualization | |
# number of entries | |
total = len(values) | |
# precompute percentages (also used for bar length) | |
percs = {binv: (num / total * 100) for binv, num in cnt.items()} | |
# minimum and maximum for normalization | |
perc_max = round(max(percs.values())) | |
#perc_min = round(min(percs.values())) | |
perc_min = 0 | |
percs_barlen = { | |
binv: (((round(perc) - perc_min) / (perc_max - perc_min)) * bar_width) | |
for binv, perc in percs.items() | |
} | |
bin_maxlen = max(len(str(binv)) for binv in cnt.keys()) | |
num_maxlen = max(len(str(val)) for val in cnt.values()) | |
bin_maxlen = max(bin_maxlen, min_width_key) | |
num_maxlen = max(num_maxlen, min_width_num) | |
results = cnt.most_common() | |
results = sorted(results, key=sortfunc) | |
for binv, num in results: | |
bar = f"{C*math.floor(percs_barlen[binv])}{CS[int((percs_barlen[binv] % 1) * 10)].strip()}" | |
print(f"{binv:{bin_maxlen}}: {num:{num_maxlen}d} ({percs[binv]:4.1f}%) {bar}") | |
print(f" ♡ {total} images") | |
if __name__ == "__main__": | |
# load data from pipe | |
data = json.load(sys.stdin) | |
data = deduplicate(data) | |
print() | |
print("## Overall stats") | |
print() | |
field = "LensModel" | |
values = extract(data, field) | |
print(f"### EXIF.{field}") | |
plot(values) | |
print() | |
BINS_FocalLength = [16, 80, 18, 23, 27, 33, 35, 45, 50, 56, 60, 70, 100, 150, 200, 250, 300] | |
field = "FocalLength" | |
values = extract(data, field, BINS_FocalLength) | |
print(f"### EXIF.{field}") | |
plot(values) | |
print() | |
data_grouped = group_data(data, "LensModel") | |
for lens, lens_data in data_grouped.items(): | |
print(f"## Lens Stats - {lens}") | |
print() | |
field = "FocalLength" | |
values = extract(lens_data, field, BINS_FocalLength) | |
print(f"### EXIF.{field} - {lens}") | |
plot(values) | |
print() | |
field = "FNumber" | |
values = extract(lens_data, field) | |
print(f"### EXIF.{field} - {lens}") | |
plot(values) | |
print() | |
field = "ISO" | |
values = extract(lens_data, field) | |
print(f"### EXIF.{field} - {lens}") | |
plot(values) | |
print() | |
field = "ExposureTime" | |
values = extract(lens_data, field) | |
values = bin_values(values, [1, 2, 3, 4, 5, 10, 15, 30, 60] + [1/v for v in [1, 2, 4, 8, 15, 30, 60, 125, 250, 500, 1000, 2000, 4000, 8000, 16000, 32000, 64000]]) | |
values = [f"1/{int(1/value)}" if value < 1 else f"{int(value)}\"" for value in values] | |
print(f"### EXIF.{field} - {lens}") | |
plot(values, sortfunc=lambda x: 1/int(x[0][2:]) if "/" in x[0] else int(x[0][:-1])) | |
print() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment