Skip to content

Instantly share code, notes, and snippets.

@atucom
Created June 11, 2025 21:38
Show Gist options
  • Save atucom/e99fb154c1ace450b9409650cfe45043 to your computer and use it in GitHub Desktop.
Save atucom/e99fb154c1ace450b9409650cfe45043 to your computer and use it in GitHub Desktop.
Find the z-score outliers (dumps that are significantly different than others) from a directory of firmware dumps
import os
import argparse
import numpy as np
from scipy.stats import zscore
def load_firmware_dumps(folder_path):
firmware_data = []
file_names = []
for fname in sorted(os.listdir(folder_path)):
path = os.path.join(folder_path, fname)
if os.path.isfile(path):
with open(path, "rb") as f:
data = f.read()
firmware_data.append(np.frombuffer(data, dtype=np.uint8))
file_names.append(fname)
if len(set(len(d) for d in firmware_data)) != 1:
raise ValueError("Firmware dumps are not all the same size.")
return file_names, np.stack(firmware_data)
def identify_outliers(data, z_thresh=2.5, anomaly_ratio_thresh=0.001):
z_scores = np.abs(zscore(data, axis=0))
anomaly_counts = (z_scores > z_thresh).sum(axis=1)
anomaly_ratios = anomaly_counts / data.shape[1]
outlier_indices = np.where(anomaly_ratios > anomaly_ratio_thresh)[0]
return outlier_indices, anomaly_ratios
def analyze_firmware_dumps(folder_path, z_thresh, anomaly_ratio_thresh):
file_names, firmware_matrix = load_firmware_dumps(folder_path)
outlier_indices, anomaly_ratios = identify_outliers(
firmware_matrix, z_thresh, anomaly_ratio_thresh
)
print("\nFirmware Analysis Summary:")
print("-" * 50)
for i, fname in enumerate(file_names):
status = "OUTLIER" if i in outlier_indices else "OK"
print(f"{fname:30} | Status: {status:8} | Anomaly Ratio: {anomaly_ratios[i]:.6f}")
print("-" * 50)
return [file_names[i] for i in outlier_indices]
def main():
parser = argparse.ArgumentParser(
description="Detect statistically anomalous firmware dumps of same size."
)
parser.add_argument("--input", "-i", required=True, help="Path to firmware dumps directory")
parser.add_argument("--z_thresh", type=float, default=2.5, help="Z-score threshold per byte (default: 2.5)")
parser.add_argument("--anomaly_ratio_thresh", type=float, default=0.001,
help="Min ratio of anomalous bytes to flag a file (default: 0.001)")
args = parser.parse_args()
analyze_firmware_dumps(args.input, args.z_thresh, args.anomaly_ratio_thresh)
if __name__ == "__main__":
main()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment