Skip to content

Instantly share code, notes, and snippets.

@SqrtRyan
Created February 24, 2026 06:57
Show Gist options
  • Select an option

  • Save SqrtRyan/9b2402d50f4fc19c8db592dba229ab4a to your computer and use it in GitHub Desktop.

Select an option

Save SqrtRyan/9b2402d50f4fc19c8db592dba229ab4a to your computer and use it in GitHub Desktop.
"""
Nanobanana edit quality filter — minimal, self-contained.
pip install numpy imagehash rp
>>> from filter_sample import filter_pair
>>> filter_pair('before.png', 'after.png')
True # True = keep (good edit), False = reject (bad edit)
Derived from .claude_auto_score/best_detector.py.
Ryan Burgert 2026
"""
import numpy as np
from PIL import Image
import imagehash
from itertools import combinations
def filter_pair(
path_before,
path_after,
*,
rows=4,
cols=4,
detect_no_edit=True,
frozen_thresh=0.575,
flicker_phash_thresh=34,
flicker_jump_thresh=0.007261,
no_edit_thresh=0.023247,
inconsistency_thresh=0.580564,
):
"""
Return True if the edit is good (keep), False if bad (reject).
Pure function. Loads two keyframe grid images, splits into cells,
computes features, applies failure rules. Any rule firing = reject.
Performance on 222 labeled samples:
detect_no_edit=True: 91.4% recall, 68% precision (stops more failures but also stops more good ones)
detect_no_edit=False: 84.9% recall, 81% precision (fewer FPs but misses subtle no-edits)
The no-edit rule (C1) catches samples where nothing meaningful was changed,
but it also flags ~20 legitimate subtle edits (relighting, film grain, small
object additions) as failures. Set detect_no_edit=False to disable this rule
if you'd rather keep subtle edits at the cost of missing some no-edit failures.
Glossary:
CV = Coefficient of Variation (std / mean)
FP = False Positive
MAE = Mean Absolute Error
pf = per-frame (variable prefix)
phash = Perceptual Image Hash
pw = pairwise (variable prefix)
no-edits = A failure mode where the output looks too similar to the input
Args:
path_before: path to original keyframe grid image
path_after: path to edited keyframe grid image
rows: number of rows in the grid
cols: number of columns in the grid
detect_no_edit: if True, reject samples with very small pixel changes (C1 rule).
True = higher recall (91.4%), more false positives (~40).
False = higher precision (81%), misses subtle no-edits.
frozen_thresh: phash_ratio below this = frozen/duplicated output
flicker_phash_thresh: per-frame phash distance above this (AND jump) = flickering
flicker_jump_thresh: max consecutive diff jump above this (AND phash) = flickering
no_edit_thresh: max pixel diff below this = nothing was edited
inconsistency_thresh: per-frame diff CV above this = inconsistent edit
>>> # filter_pair('before.png', 'after.png') -> bool
"""
before = Image.open(path_before).convert("RGB")
after = Image.open(path_after).convert("RGB")
w, h = before.size
cell_w, cell_h = w // cols, h // rows
n = rows * cols
cells_b, cells_a = [], []
for r in range(rows):
for c in range(cols):
box = (c * cell_w, r * cell_h, (c + 1) * cell_w, (r + 1) * cell_h)
cells_b.append(before.crop(box))
cells_a.append(after.crop(box))
assert len(cells_b) == len(cells_a) == n
# Perceptual hashes
hashes_b = [imagehash.phash(c, hash_size=8) for c in cells_b]
hashes_a = [imagehash.phash(c, hash_size=8) for c in cells_a]
# phash_ratio: how similar after-frames are to each other vs before-frames
pairs = list(combinations(range(n), 2))
pw_b = np.mean([hashes_b[i] - hashes_b[j] for i, j in pairs])
pw_a = np.mean([hashes_a[i] - hashes_a[j] for i, j in pairs])
phash_ratio = pw_a / pw_b if pw_b > 0 else 0.0
# Per-frame phash distance (before[i] vs after[i])
pf_phash_max = max(hashes_b[i] - hashes_a[i] for i in range(n))
# Per-frame pixel MAE (grayscale, 64x64, float)
diffs = []
for i in range(n):
b = np.array(cells_b[i].convert("L").resize((64, 64), Image.BILINEAR), dtype=np.float32) / 255
a = np.array(cells_a[i].convert("L").resize((64, 64), Image.BILINEAR), dtype=np.float32) / 255
diffs.append(float(np.mean(np.abs(a - b))))
pf_max_diff = max(diffs)
pf_cv = float(np.std(diffs) / (np.mean(diffs) + 1e-10))
max_jump = float(np.max(np.abs(np.diff(diffs))))
# F1: Frozen/duplicated output
if phash_ratio < frozen_thresh:
return False
# F5a: Flickering edit
if pf_phash_max >= flicker_phash_thresh and max_jump > flicker_jump_thresh:
return False
# C1: No meaningful edit (optional — high FP rate on subtle edits)
if detect_no_edit and pf_max_diff < no_edit_thresh:
return False
# C2: Inconsistent edit magnitude
if pf_cv > inconsistency_thresh:
return False
return True
if __name__ == "__main__":
import fire
fire.Fire(filter_pair)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment