Created
June 11, 2025 21:38
-
-
Save atucom/e99fb154c1ace450b9409650cfe45043 to your computer and use it in GitHub Desktop.
Find the z-score outliers (dumps that are significantly different than others) from a directory of firmware dumps
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import os | |
import argparse | |
import numpy as np | |
from scipy.stats import zscore | |
def load_firmware_dumps(folder_path): | |
firmware_data = [] | |
file_names = [] | |
for fname in sorted(os.listdir(folder_path)): | |
path = os.path.join(folder_path, fname) | |
if os.path.isfile(path): | |
with open(path, "rb") as f: | |
data = f.read() | |
firmware_data.append(np.frombuffer(data, dtype=np.uint8)) | |
file_names.append(fname) | |
if len(set(len(d) for d in firmware_data)) != 1: | |
raise ValueError("Firmware dumps are not all the same size.") | |
return file_names, np.stack(firmware_data) | |
def identify_outliers(data, z_thresh=2.5, anomaly_ratio_thresh=0.001): | |
z_scores = np.abs(zscore(data, axis=0)) | |
anomaly_counts = (z_scores > z_thresh).sum(axis=1) | |
anomaly_ratios = anomaly_counts / data.shape[1] | |
outlier_indices = np.where(anomaly_ratios > anomaly_ratio_thresh)[0] | |
return outlier_indices, anomaly_ratios | |
def analyze_firmware_dumps(folder_path, z_thresh, anomaly_ratio_thresh): | |
file_names, firmware_matrix = load_firmware_dumps(folder_path) | |
outlier_indices, anomaly_ratios = identify_outliers( | |
firmware_matrix, z_thresh, anomaly_ratio_thresh | |
) | |
print("\nFirmware Analysis Summary:") | |
print("-" * 50) | |
for i, fname in enumerate(file_names): | |
status = "OUTLIER" if i in outlier_indices else "OK" | |
print(f"{fname:30} | Status: {status:8} | Anomaly Ratio: {anomaly_ratios[i]:.6f}") | |
print("-" * 50) | |
return [file_names[i] for i in outlier_indices] | |
def main(): | |
parser = argparse.ArgumentParser( | |
description="Detect statistically anomalous firmware dumps of same size." | |
) | |
parser.add_argument("--input", "-i", required=True, help="Path to firmware dumps directory") | |
parser.add_argument("--z_thresh", type=float, default=2.5, help="Z-score threshold per byte (default: 2.5)") | |
parser.add_argument("--anomaly_ratio_thresh", type=float, default=0.001, | |
help="Min ratio of anomalous bytes to flag a file (default: 0.001)") | |
args = parser.parse_args() | |
analyze_firmware_dumps(args.input, args.z_thresh, args.anomaly_ratio_thresh) | |
if __name__ == "__main__": | |
main() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment