Skip to content

Instantly share code, notes, and snippets.

@greg-randall
Created June 8, 2026 15:00
Show Gist options
  • Select an option

  • Save greg-randall/35ebd41c98e1a69c91c6d76d46b63b88 to your computer and use it in GitHub Desktop.

Select an option

Save greg-randall/35ebd41c98e1a69c91c6d76d46b63b88 to your computer and use it in GitHub Desktop.
detect_faces_overlay.py
#!/usr/bin/env python3
"""Run all four face detectors and produce a single overlay image per input.
Each model's bounding box is drawn in a distinct colour at 50% transparency,
and a legend is rendered at the bottom of the image.
Output: ``{stem}_overlay{ext}``
"""
import sys
import urllib.request
from pathlib import Path
import cv2
import numpy as np
# --- Configuration -----------------------------------------------------------
IMAGE_EXTENSIONS = {".jpg", ".jpeg", ".png", ".webp", ".bmp", ".tiff", ".tif"}
# (name, colour-BGR, init-fn)
# colour order chosen for contrast: orange, blue, green, red
DETECTOR_DEFS = [
("mediapipe", (0, 165, 255)),
("retinaface", (255, 100, 0)),
("scrfd", (0, 220, 0)),
("yolo", (0, 0, 255)),
]
DETECTORS = [] # populated below — list of (name, colour, detect_fn)
ALPHA = 0.45 # transparency of filled boxes (0 = invisible, 1 = solid)
LINE_ALPHA = 0.85 # box outline is more opaque for visibility
LEGEND_HEIGHT = 50
FONT = cv2.FONT_HERSHEY_DUPLEX
# ---------------------------------------------------------------------------
# Detector builders
# ---------------------------------------------------------------------------
def build_mediapipe():
"""State-of-the-art fast face detector. Returns detect(img_bgr) -> [(x1,y1,x2,y2,conf), ...]"""
import mediapipe as mp
fd = mp.solutions.face_detection.FaceDetection(
model_selection=1, min_detection_confidence=0.5)
def detect(img_bgr):
rgb = cv2.cvtColor(img_bgr, cv2.COLOR_BGR2RGB)
rgb.flags.writeable = False
results = fd.process(rgb)
faces = []
if results.detections:
h, w = img_bgr.shape[:2]
for d in results.detections:
bb = d.location_data.relative_bounding_box
faces.append((
max(0, int(bb.xmin * w)),
max(0, int(bb.ymin * h)),
min(w - 1, int((bb.xmin + bb.width) * w)),
min(h - 1, int((bb.ymin + bb.height) * h)),
float(d.score[0]),
))
return faces
return detect
def build_retinaface():
"""Multi-task face detector (detection + landmarks). Requires loading first
to avoid Keras/TF Lite conflict with MediaPipe."""
from retinaface import RetinaFace
def detect(img_bgr):
rgb = cv2.cvtColor(img_bgr, cv2.COLOR_BGR2RGB)
h, w = img_bgr.shape[:2]
result = RetinaFace.detect_faces(rgb)
faces = []
if isinstance(result, dict):
for _key, val in result.items():
if isinstance(val, dict) and "facial_area" in val:
x1, y1, x2, y2 = val["facial_area"]
score = float(val.get("score", 0.999))
faces.append((
max(0, int(x1)), max(0, int(y1)),
min(w - 1, int(x2)), min(h - 1, int(y2)),
score,
))
return faces
return detect
def build_scrfd():
"""InsightFace SCRFD — top WIDER Face benchmark performer."""
from insightface.app import FaceAnalysis
app = FaceAnalysis(name="buffalo_l", allowed_modules=["detection"])
app.prepare(ctx_id=-1)
def detect(img_bgr):
rgb = cv2.cvtColor(img_bgr, cv2.COLOR_BGR2RGB)
h, w = img_bgr.shape[:2]
detections = app.get(rgb)
faces = []
for d in detections:
x1, y1, x2, y2 = d.bbox.astype(int).tolist()
conf = float(d.det_score)
faces.append((max(0, x1), max(0, y1), min(w - 1, x2), min(h - 1, y2), conf))
return faces
return detect
def build_yolo():
"""YOLOv11-face — community-trained face detector from akanametov/yolo-face."""
from ultralytics import YOLO
model_url = (
"https://github.com/akanametov/yolo-face/releases/download/1.0.0/"
"yolov11n-face.pt"
)
model_path = Path.home() / ".cache" / "face-detect" / "yolov11n-face.pt"
if not model_path.exists():
model_path.parent.mkdir(parents=True, exist_ok=True)
print(f" Downloading YOLOv11-face model to {model_path} ...")
urllib.request.urlretrieve(model_url, str(model_path))
print(" Download complete.")
model = YOLO(str(model_path))
def detect(img_bgr):
h, w = img_bgr.shape[:2]
results = model(img_bgr, verbose=False)
faces = []
for r in results:
if r.boxes is None:
continue
for box in r.boxes:
x1, y1, x2, y2 = box.xyxy[0].tolist()
conf = float(box.conf[0])
faces.append((max(0, int(x1)), max(0, int(y1)),
min(w - 1, int(x2)), min(h - 1, int(y2)), conf))
return faces
return detect
BUILDERS = {
"mediapipe": build_mediapipe,
"retinaface": build_retinaface,
"scrfd": build_scrfd,
"yolo": build_yolo,
}
# ---------------------------------------------------------------------------
# Drawing
# ---------------------------------------------------------------------------
def draw_overlay(img_bgr, all_faces, legend_entries):
"""Draw semi-transparent boxes for every model + a legend bar at the bottom.
*all_faces* is a list of (name, colour, [(x1,y1,x2,y2,conf), ...]) tuples.
Returns the composite BGR image (same size as input + legend).
"""
h, w = img_bgr.shape[:2]
# --- Build overlays for filled rectangles and outlines -------------------
overlay_fill = np.zeros((h, w, 3), dtype=np.uint8)
overlay_line = np.zeros((h, w, 3), dtype=np.uint8)
for _name, colour, faces in all_faces:
for (x1, y1, x2, y2, _conf) in faces:
cv2.rectangle(overlay_fill, (x1, y1), (x2, y2), colour, -1)
cv2.rectangle(overlay_line, (x1, y1), (x2, y2), colour, 2)
# Alpha-blend ONLY where boxes were drawn (the rest stays original).
img = img_bgr.copy().astype(np.float32)
fill_mask = overlay_fill.any(axis=2)
if fill_mask.any():
blended = img[fill_mask] * (1 - ALPHA) + overlay_fill[fill_mask].astype(np.float32) * ALPHA
img[fill_mask] = blended
line_mask = overlay_line.any(axis=2)
if line_mask.any():
blended = img[line_mask] * (1 - LINE_ALPHA) + overlay_line[line_mask].astype(np.float32) * LINE_ALPHA
img[line_mask] = blended
img = img.clip(0, 255).astype(np.uint8)
# --- Legend bar at the bottom --------------------------------------------
legend = np.full((LEGEND_HEIGHT, w, 3), 30, dtype=np.uint8) # dark grey
n = len(legend_entries)
cell_w = w // max(n, 1)
for i, (name, colour) in enumerate(legend_entries):
x0 = i * cell_w
# colour swatch
cv2.rectangle(legend, (x0 + 8, 8), (x0 + 36, LEGEND_HEIGHT - 8), colour, -1)
cv2.rectangle(legend, (x0 + 8, 8), (x0 + 36, LEGEND_HEIGHT - 8), (255, 255, 255), 1)
# label
cv2.putText(legend, name, (x0 + 46, LEGEND_HEIGHT // 2 + 6),
FONT, 0.55, (255, 255, 255), 1, cv2.LINE_AA)
return np.vstack([img, legend])
def find_image_files(folder):
"""Return original images, skipping outputs from previous runs."""
skip_suffixes = ("_mediapipe", "_retinaface", "_scrfd", "_yolo",
"_boundingbox", "_overlay")
images = []
for entry in sorted(folder.iterdir()):
if not entry.is_file():
continue
if entry.suffix.lower() not in IMAGE_EXTENSIONS:
continue
if any(s in entry.stem for s in skip_suffixes):
continue
images.append(entry)
return images
# ---------------------------------------------------------------------------
# Main
# ---------------------------------------------------------------------------
def main():
folder = Path.cwd()
images = find_image_files(folder)
if not images:
print("No image files found.")
sys.exit(0)
print(f"Found {len(images)} image(s).\n")
# --- Initialise detectors ------------------------------------------------
# RetinaFace MUST load first — its Keras model conflicts with MediaPipe's
# TF Lite backend otherwise.
init_sequence = sorted(DETECTOR_DEFS,
key=lambda x: (0 if x[0] == "retinaface" else 1, x[0]))
print("Initialising detectors ...")
detectors = [] # (name, colour, detect_fn)
legend_entries = [] # (name, colour) — only for models that loaded
for name, colour in init_sequence:
try:
fn = BUILDERS[name]()
detectors.append((name, colour, fn))
legend_entries.append((name, colour))
print(f" {name} ready.")
except Exception as exc:
print(f" [SKIP] {name}: {exc}")
if not detectors:
print("No detectors available.")
sys.exit(1)
print(f"\nColours: {' | '.join(f'{n}' for n, _ in legend_entries)}")
print()
# --- Process images ------------------------------------------------------
for img_path in images:
img_bgr = cv2.imread(str(img_path))
if img_bgr is None:
print(f"[SKIP] Could not read: {img_path.name}")
continue
if img_bgr.ndim >= 3 and img_bgr.shape[2] == 4:
img_bgr = img_bgr[:, :, :3]
print(f"Processing: {img_path.name} ({img_bgr.shape[1]}x{img_bgr.shape[0]})")
all_faces = []
for name, colour, detect_fn in detectors:
try:
faces = detect_fn(img_bgr)
all_faces.append((name, colour, faces))
print(f" {name:14s}: {len(faces)} face(s)")
except Exception as exc:
print(f" {name:14s}: ERROR — {exc}")
out_img = draw_overlay(img_bgr, all_faces, legend_entries)
out_path = folder / f"{img_path.stem}_overlay{img_path.suffix}"
cv2.imwrite(str(out_path), out_img)
print(f" -> {out_path.name}")
print("\nDone.")
if __name__ == "__main__":
main()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment