Skip to content

Instantly share code, notes, and snippets.

@atadams
Created January 26, 2026 21:54
Show Gist options
  • Select an option

  • Save atadams/1583976080e3b96d38fdd6d3e88255f5 to your computer and use it in GitHub Desktop.

Select an option

Save atadams/1583976080e3b96d38fdd6d3e88255f5 to your computer and use it in GitHub Desktop.
Hist.py
import os
import rawpy
import numpy as np
import matplotlib.pyplot as plt
import matplotlib.gridspec as gridspec
import pandas as pd
from PIL import Image
def generate_forensic_histograms(img_path):
base_name = os.path.splitext(os.path.basename(img_path))[0]
# Ensure correct capitalization for macOS Volumes
out_dir = f"/Volumes/data/Histogram/{base_name}_Full_Audit"
try:
os.makedirs(out_dir, exist_ok=True)
except OSError as e:
print(f"Error creating directory {out_dir}: {e}")
return
# 1. Extraction Logic (Linear for RAW, Processed for JPEG)
try:
with rawpy.imread(img_path) as raw:
# no_auto_bright maintains the linear sensor signal
raw_rgb = raw.postprocess(
no_auto_bright=True,
use_camera_wb=True,
output_bps=8
)
with Image.open(img_path) as img:
jpeg_rgb = np.array(img.convert("RGB"))
except Exception as e:
print(f"Failed to read {img_path}: {e}")
return
# Convert to Luminosity (Grayscale)
raw_lum = np.dot(raw_rgb[:, :, :3], [0.2989, 0.5870, 0.1140]).astype(np.uint8)
jpeg_lum = np.dot(jpeg_rgb[:, :, :3], [0.2989, 0.5870, 0.1140]).astype(np.uint8)
# NA5 Binning Logic: Intervals of 10
bins = np.arange(0, 261, 10)
bucket_labels = [f"{bins[i]}-{bins[i+1]-1}" for i in range(len(bins) - 1)]
def create_audit_chart(data_raw, data_jpeg, label, filename, is_combined=False):
fig = plt.figure(figsize=(16, 10), facecolor="#f0f0f0")
gs = gridspec.GridSpec(2, 2, height_ratios=[3, 1])
# Subplot 1: RAW Histogram (Linearity Check)
ax0 = plt.subplot(gs[0, 0])
if is_combined:
for i, c in enumerate(["red", "green", "blue"]):
ax0.hist(
data_raw[:, :, i].ravel(),
bins=256,
color=c,
alpha=0.4,
label=c.capitalize()
)
else:
ax0.hist(
data_raw.ravel(),
bins=256,
color="black" if label == "Luminosity" else label.lower(),
alpha=0.7
)
ax0.set_title(f"RAW {label} (Sensor Linearity)")
ax0.set_yscale("log")
ax0.set_xlim([0, 255])
# Subplot 2: JPEG Histogram (Shadow Floor Check)
ax1 = plt.subplot(gs[0, 1])
if is_combined:
for i, c in enumerate(["red", "green", "blue"]):
ax1.hist(
data_jpeg[:, :, i].ravel(),
bins=256,
color=c,
alpha=0.4,
label=c.capitalize()
)
else:
ax1.hist(
data_jpeg.ravel(),
bins=256,
color="black" if label == "Luminosity" else label.lower(),
alpha=0.7
)
ax1.set_title(f"JPEG {label} (Payload Tonal Curve)")
ax1.set_xlim([0, 255])
# Subplot 3 & 4: NA5 Binning Tables
for idx, (data, title) in enumerate([(data_raw, "RAW"), (data_jpeg, "JPEG")]):
ax_tbl = plt.subplot(gs[1, idx])
ax_tbl.axis("off")
if is_combined:
# Table for Combined shows Red channel as proxy or average
counts, _ = np.histogram(np.mean(data, axis=2), bins=bins)
else:
counts, _ = np.histogram(data, bins=bins)
df = pd.DataFrame(
list(zip(bucket_labels[:10], counts[:10])),
columns=["Range", "Pixel Count"]
)
tbl = ax_tbl.table(
cellText=df.values,
colLabels=df.columns,
loc="center",
cellLoc="center"
)
tbl.set_fontsize(9)
ax_tbl.set_title(f"{title} Shadow Buckets (First 100 levels)")
plt.tight_layout()
plt.savefig(os.path.join(out_dir, f"{filename}.png"), dpi=150)
plt.close()
# Generate the 5 required outputs
create_audit_chart(raw_lum, jpeg_lum, "Luminosity", "01_Luminosity_Audit")
create_audit_chart(raw_rgb[:, :, 0], jpeg_rgb[:, :, 0], "Red", "02_Red_Audit")
create_audit_chart(raw_rgb[:, :, 1], jpeg_rgb[:, :, 1], "Green", "03_Green_Audit")
create_audit_chart(raw_rgb[:, :, 2], jpeg_rgb[:, :, 2], "Blue", "04_Blue_Audit")
create_audit_chart(raw_rgb, jpeg_rgb, "RGB Combined", "05_Combined_RGB_Audit", is_combined=True)
print(f"[✓] Generated 5 Forensic Outputs for {os.path.basename(img_path)}")
# — — EXECUTION BLOCK — -
if __name__ == "__main__":
scan_dir = "<enter directory location>"
print(f"Scanning directory: {scan_dir}")
if os.path.exists(scan_dir):
# List all CR2 files (case insensitive check)
files = [f for f in os.listdir(scan_dir) if f.lower().endswith(".cr2")]
if not files:
print(f"No .CR2 files found in {scan_dir}")
else:
print(f"Found {len(files)} files. Starting batch processing...")
for filename in files:
full_path = os.path.join(scan_dir, filename)
generate_forensic_histograms(full_path)
print("Batch processing complete.")
else:
print(f"Error: Directory not found: {scan_dir}")
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment