Created
February 24, 2026 06:57
-
-
Save SqrtRyan/51d9f4f24b559af3abc68f194cc2a164 to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| """ | |
| Nanobanana edit quality filter — minimal, self-contained. | |
| pip install numpy imagehash rp | |
| >>> from filter_sample import filter_pair | |
| >>> filter_pair('before.png', 'after.png') | |
| True # True = keep (good edit), False = reject (bad edit) | |
| Derived from .claude_auto_score/best_detector.py. | |
| Ryan Burgert 2026 | |
| """ | |
| import numpy as np | |
| from PIL import Image | |
| import imagehash | |
| from itertools import combinations | |
| def filter_pair( | |
| path_before, | |
| path_after, | |
| *, | |
| rows=4, | |
| cols=4, | |
| detect_no_edit=True, | |
| frozen_thresh=0.575, | |
| flicker_phash_thresh=34, | |
| flicker_jump_thresh=0.007261, | |
| no_edit_thresh=0.023247, | |
| inconsistency_thresh=0.580564, | |
| ): | |
| """ | |
| Return True if the edit is good (keep), False if bad (reject). | |
| Pure function. Loads two keyframe grid images, splits into cells, | |
| computes features, applies failure rules. Any rule firing = reject. | |
| Performance on 222 labeled samples: | |
| detect_no_edit=True: 91.4% recall, 68% precision (stops more failures but also stops more good ones) | |
| detect_no_edit=False: 84.9% recall, 81% precision (fewer FPs but misses subtle no-edits) | |
| The no-edit rule (C1) catches samples where nothing meaningful was changed, | |
| but it also flags ~20 legitimate subtle edits (relighting, film grain, small | |
| object additions) as failures. Set detect_no_edit=False to disable this rule | |
| if you'd rather keep subtle edits at the cost of missing some no-edit failures. | |
| Glossary: | |
| CV = Coefficient of Variation (std / mean) | |
| FP = False Positive | |
| MAE = Mean Absolute Error | |
| pf = per-frame (variable prefix) | |
| phash = Perceptual Image Hash | |
| pw = pairwise (variable prefix) | |
| no-edits = A failure mode where the output looks too similar to the input | |
| Args: | |
| path_before: path to original keyframe grid image | |
| path_after: path to edited keyframe grid image | |
| rows: number of rows in the grid | |
| cols: number of columns in the grid | |
| detect_no_edit: if True, reject samples with very small pixel changes (C1 rule). | |
| True = higher recall (91.4%), more false positives (~40). | |
| False = higher precision (81%), misses subtle no-edits. | |
| frozen_thresh: phash_ratio below this = frozen/duplicated output | |
| flicker_phash_thresh: per-frame phash distance above this (AND jump) = flickering | |
| flicker_jump_thresh: max consecutive diff jump above this (AND phash) = flickering | |
| no_edit_thresh: max pixel diff below this = nothing was edited | |
| inconsistency_thresh: per-frame diff CV above this = inconsistent edit | |
| >>> # filter_pair('before.png', 'after.png') -> bool | |
| """ | |
| before = Image.open(path_before).convert('RGB') | |
| after = Image.open(path_after).convert('RGB') | |
| w, h = before.size | |
| cell_w, cell_h = w // cols, h // rows | |
| n = rows * cols | |
| cells_b, cells_a = [], [] | |
| for r in range(rows): | |
| for c in range(cols): | |
| box = (c * cell_w, r * cell_h, (c + 1) * cell_w, (r + 1) * cell_h) | |
| cells_b.append(before.crop(box)) | |
| cells_a.append(after.crop(box)) | |
| assert len(cells_b) == len(cells_a) == n | |
| # Perceptual hashes | |
| hashes_b = [imagehash.phash(c, hash_size=8) for c in cells_b] | |
| hashes_a = [imagehash.phash(c, hash_size=8) for c in cells_a] | |
| # phash_ratio: how similar after-frames are to each other vs before-frames | |
| pairs = list(combinations(range(n), 2)) | |
| pw_b = np.mean([hashes_b[i] - hashes_b[j] for i, j in pairs]) | |
| pw_a = np.mean([hashes_a[i] - hashes_a[j] for i, j in pairs]) | |
| phash_ratio = pw_a / pw_b if pw_b > 0 else 0.0 | |
| # Per-frame phash distance (before[i] vs after[i]) | |
| pf_phash_max = max(hashes_b[i] - hashes_a[i] for i in range(n)) | |
| # Per-frame pixel MAE (grayscale, 64x64, float) | |
| diffs = [] | |
| for i in range(n): | |
| b = np.array(cells_b[i].convert('L').resize((64, 64), Image.BILINEAR), dtype=np.float32) / 255 | |
| a = np.array(cells_a[i].convert('L').resize((64, 64), Image.BILINEAR), dtype=np.float32) / 255 | |
| diffs.append(float(np.mean(np.abs(a - b)))) | |
| pf_max_diff = max(diffs) | |
| pf_cv = float(np.std(diffs) / (np.mean(diffs) + 1e-10)) | |
| max_jump = float(np.max(np.abs(np.diff(diffs)))) | |
| # F1: Frozen/duplicated output | |
| if phash_ratio < frozen_thresh: | |
| return False | |
| # F5a: Flickering edit | |
| if pf_phash_max >= flicker_phash_thresh and max_jump > flicker_jump_thresh: | |
| return False | |
| # C1: No meaningful edit (optional — high FP rate on subtle edits) | |
| if detect_no_edit and pf_max_diff < no_edit_thresh: | |
| return False | |
| # C2: Inconsistent edit magnitude | |
| if pf_cv > inconsistency_thresh: | |
| return False | |
| return True | |
| if __name__ == '__main__': | |
| import fire | |
| fire.Fire(filter_pair) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment