Skip to content

Instantly share code, notes, and snippets.

@Querela
Created March 24, 2025 20:10
Show Gist options
  • Save Querela/c24b1b7d3ce520df6ae6c667151942af to your computer and use it in GitHub Desktop.
Save Querela/c24b1b7d3ce520df6ae6c667151942af to your computer and use it in GitHub Desktop.
[python][EXIF] EXIF aggregation statistics reporter
"""
EXIF statistics script.
Will use a JSON dump of EXIF data exteacted via `exiftool`, group vales based on various criteria and plot the results.
Extract (and cache) EXIF data from images files:
$ exiftool -json -g -n images/DSCF*.JPG > exif-data.json
Run statistics:
$ python3 stats.py < exif-data.json
Or directly:
$ exiftool -json -g -n images/DSCF*.JPG | python3 stats.py
"""
import sys
import json
import math
import collections
import itertools
C = "█"
CS = " ▏▎▎▍▌▌▋▊▉"
def extract(data, field, bins=None):
# sanity check
data = [e for e in data if "EXIF" in e]
#if field != "LensModel":
# data = [
# e for e in data
# if "LensModel" in e["EXIF"] and e["EXIF"]["LensModel"] == "XF16-80mmF4 R OIS WR"
# ]
# extract target value
values = [e["EXIF"][field] for e in data if field in e["EXIF"]]
# binning of values
norms = bin_values(values, bins)
return norms
def deduplicate(values):
seen = set()
def is_unseen_with_sideeffect(value):
key = value["EXIF"]["DateTimeOriginal"]
if key in seen:
return False
else:
seen.add(key)
return True
total_all = len(data)
filtered = list(filter(is_unseen_with_sideeffect, values))
total_filtered = len(filtered)
if total_filtered != total_all:
print(f"Filtered unique: {total_all} to {total_filtered}")
return filtered
def bin_values(values, bins=None):
# binning of values
if not bins:
return values
if not values or not isinstance(values[0], (int, float)):
return values
return [min([(abs(binv - value), binv) for binv in bins])[1] for value in values]
def group_data(data, field):
# sanity check
data = [e for e in data if "EXIF" in e]
keyfunc = lambda x: x["EXIF"][field]
data = sorted(data, key=keyfunc)
grouped = {key: list(values) for key, values in itertools.groupby(data, keyfunc)}
return grouped
def plot(values, bar_width=50, sortfunc=None, min_width_key=7, min_width_num=5):
# stats
cnt = collections.Counter(values)
if len(cnt.keys()) == 1:
return
# visualization
# number of entries
total = len(values)
# precompute percentages (also used for bar length)
percs = {binv: (num / total * 100) for binv, num in cnt.items()}
# minimum and maximum for normalization
perc_max = round(max(percs.values()))
#perc_min = round(min(percs.values()))
perc_min = 0
percs_barlen = {
binv: (((round(perc) - perc_min) / (perc_max - perc_min)) * bar_width)
for binv, perc in percs.items()
}
bin_maxlen = max(len(str(binv)) for binv in cnt.keys())
num_maxlen = max(len(str(val)) for val in cnt.values())
bin_maxlen = max(bin_maxlen, min_width_key)
num_maxlen = max(num_maxlen, min_width_num)
results = cnt.most_common()
results = sorted(results, key=sortfunc)
for binv, num in results:
bar = f"{C*math.floor(percs_barlen[binv])}{CS[int((percs_barlen[binv] % 1) * 10)].strip()}"
print(f"{binv:{bin_maxlen}}: {num:{num_maxlen}d} ({percs[binv]:4.1f}%) {bar}")
print(f" ♡ {total} images")
if __name__ == "__main__":
# load data from pipe
data = json.load(sys.stdin)
data = deduplicate(data)
print()
print("## Overall stats")
print()
field = "LensModel"
values = extract(data, field)
print(f"### EXIF.{field}")
plot(values)
print()
BINS_FocalLength = [16, 80, 18, 23, 27, 33, 35, 45, 50, 56, 60, 70, 100, 150, 200, 250, 300]
field = "FocalLength"
values = extract(data, field, BINS_FocalLength)
print(f"### EXIF.{field}")
plot(values)
print()
data_grouped = group_data(data, "LensModel")
for lens, lens_data in data_grouped.items():
print(f"## Lens Stats - {lens}")
print()
field = "FocalLength"
values = extract(lens_data, field, BINS_FocalLength)
print(f"### EXIF.{field} - {lens}")
plot(values)
print()
field = "FNumber"
values = extract(lens_data, field)
print(f"### EXIF.{field} - {lens}")
plot(values)
print()
field = "ISO"
values = extract(lens_data, field)
print(f"### EXIF.{field} - {lens}")
plot(values)
print()
field = "ExposureTime"
values = extract(lens_data, field)
values = bin_values(values, [1, 2, 3, 4, 5, 10, 15, 30, 60] + [1/v for v in [1, 2, 4, 8, 15, 30, 60, 125, 250, 500, 1000, 2000, 4000, 8000, 16000, 32000, 64000]])
values = [f"1/{int(1/value)}" if value < 1 else f"{int(value)}\"" for value in values]
print(f"### EXIF.{field} - {lens}")
plot(values, sortfunc=lambda x: 1/int(x[0][2:]) if "/" in x[0] else int(x[0][:-1]))
print()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment