Created
February 6, 2025 18:41
-
-
Save isaaccorley/9424f71996b18e4b00b3ac54824d8e77 to your computer and use it in GitHub Desktop.
InfraredSolarModules RandomForest 2ez
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
{ | |
"cells": [ | |
{ | |
"cell_type": "code", | |
"execution_count": null, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"!wget https://raw.githubusercontent.com/RaptorMaps/InfraredSolarModules/master/2020-02-14_InfraredSolarModules.zip\n", | |
"!unzip 2020-02-14_InfraredSolarModules.zip" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": null, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"!pip install numpy pillow tqdm scikit-learn" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 2, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"name": "stderr", | |
"output_type": "stream", | |
"text": [ | |
"100%|██████████| 20000/20000 [00:03<00:00, 5425.94it/s]" | |
] | |
}, | |
{ | |
"name": "stdout", | |
"output_type": "stream", | |
"text": [ | |
"(20000, 960) (20000,)\n" | |
] | |
}, | |
{ | |
"name": "stderr", | |
"output_type": "stream", | |
"text": [ | |
"\n" | |
] | |
} | |
], | |
"source": [ | |
"import os\n", | |
"import json\n", | |
"import numpy as np\n", | |
"from PIL import Image\n", | |
"from tqdm import tqdm\n", | |
"from sklearn.ensemble import RandomForestClassifier\n", | |
"from sklearn.model_selection import train_test_split\n", | |
"\n", | |
"\n", | |
"root = \"InfraredSolarModules\"\n", | |
"with open(os.path.join(root, \"module_metadata.json\"), \"r\") as f:\n", | |
" data = json.load(f)\n", | |
"\n", | |
"classes = sorted(list(set([v[\"anomaly_class\"] for v in data.values()])))\n", | |
"cls2idx = {cls: i for i, cls in enumerate(classes)}\n", | |
"images = [os.path.join(root, v[\"image_filepath\"]) for v in data.values()]\n", | |
"x = np.stack([np.array(Image.open(image)) for image in tqdm(images)])\n", | |
"x = x.reshape(x.shape[0], -1)\n", | |
"y = np.array([cls2idx[v[\"anomaly_class\"]] for v in data.values()])\n", | |
"y_binary = np.array([0 if v[\"anomaly_class\"] == \"No-Anomaly\" else 1 for v in data.values()])\n", | |
"print(x.shape, y.shape)" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 3, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"def train(x, y, test_size=0.1, seed=0):\n", | |
" X_train, X_test, y_train, y_test = train_test_split(x, y, test_size=test_size, random_state=seed, stratify=y)\n", | |
" clf = RandomForestClassifier(random_state=seed, n_jobs=-1)\n", | |
" clf.fit(X_train, y_train)\n", | |
" y_pred_train = clf.predict(X_train)\n", | |
" y_pred_test = clf.predict(X_test)\n", | |
" train_acc = (y_pred_train == y_train).mean()\n", | |
" test_acc = (y_pred_test == y_test).mean()\n", | |
" return train_acc, test_acc" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 26, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"name": "stdout", | |
"output_type": "stream", | |
"text": [ | |
"Train accuracy: 0.9997222222222222\n", | |
"Test accuracy: 0.6725\n", | |
"Train accuracy: 0.9998333333333334\n", | |
"Test accuracy: 0.657\n", | |
"Train accuracy: 0.9997222222222222\n", | |
"Test accuracy: 0.6765\n", | |
"Train accuracy: 0.9996666666666667\n", | |
"Test accuracy: 0.6595\n", | |
"Train accuracy: 0.9996111111111111\n", | |
"Test accuracy: 0.6775\n", | |
"Train accuracy: 0.9997222222222222\n", | |
"Test accuracy: 0.6565\n", | |
"Train accuracy: 0.9997222222222222\n", | |
"Test accuracy: 0.6575\n", | |
"Train accuracy: 0.9997222222222222\n", | |
"Test accuracy: 0.6675\n", | |
"Train accuracy: 0.9996666666666667\n", | |
"Test accuracy: 0.668\n", | |
"Train accuracy: 0.9997222222222222\n", | |
"Test accuracy: 0.6695\n", | |
"Train accuracy (averaged across seeds): 0.999711111111111 5.4433105395173477e-05\n", | |
"Test accuracy: (averaged across seeds) 0.6662 0.007672027111526655\n" | |
] | |
} | |
], | |
"source": [ | |
"train_acc, test_acc = [], []\n", | |
"\n", | |
"for seed in range(10):\n", | |
" train_acc_, test_acc_ = train(x, y, test_size=0.1, seed=seed)\n", | |
" train_acc.append(train_acc_)\n", | |
" test_acc.append(test_acc_)\n", | |
" print(\"Train accuracy:\", train_acc_)\n", | |
" print(\"Test accuracy:\", test_acc_)\n", | |
"\n", | |
"print(\"Train accuracy (averaged across seeds):\", np.mean(train_acc), np.std(train_acc)) \n", | |
"print(\"Test accuracy: (averaged across seeds)\", np.mean(test_acc), np.std(test_acc))" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 4, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"name": "stdout", | |
"output_type": "stream", | |
"text": [ | |
"Train accuracy: 1.0\n", | |
"Test accuracy: 0.8445\n", | |
"Train accuracy: 1.0\n", | |
"Test accuracy: 0.8405\n", | |
"Train accuracy: 1.0\n", | |
"Test accuracy: 0.8495\n", | |
"Train accuracy: 1.0\n", | |
"Test accuracy: 0.8505\n", | |
"Train accuracy: 0.9999444444444444\n", | |
"Test accuracy: 0.8315\n", | |
"Train accuracy: 1.0\n", | |
"Test accuracy: 0.841\n", | |
"Train accuracy: 1.0\n", | |
"Test accuracy: 0.828\n", | |
"Train accuracy: 0.9999444444444444\n", | |
"Test accuracy: 0.8385\n", | |
"Train accuracy: 1.0\n", | |
"Test accuracy: 0.8415\n", | |
"Train accuracy: 1.0\n", | |
"Test accuracy: 0.84\n", | |
"Train accuracy (averaged across seeds): 0.999988888888889 2.2222222222234576e-05\n", | |
"Test accuracy: (averaged across seeds) 0.84055 0.006631176366226449\n" | |
] | |
} | |
], | |
"source": [ | |
"train_acc, test_acc = [], []\n", | |
"\n", | |
"for seed in range(10):\n", | |
" train_acc_, test_acc_ = train(x, y_binary, test_size=0.1, seed=seed)\n", | |
" train_acc.append(train_acc_)\n", | |
" test_acc.append(test_acc_)\n", | |
" print(\"Train accuracy:\", train_acc_)\n", | |
" print(\"Test accuracy:\", test_acc_)\n", | |
"\n", | |
"print(\"Train accuracy (averaged across seeds):\", np.mean(train_acc), np.std(train_acc)) \n", | |
"print(\"Test accuracy: (averaged across seeds)\", np.mean(test_acc), np.std(test_acc))" | |
] | |
} | |
], | |
"metadata": { | |
"kernelspec": { | |
"display_name": "torchenv", | |
"language": "python", | |
"name": "python3" | |
}, | |
"language_info": { | |
"codemirror_mode": { | |
"name": "ipython", | |
"version": 3 | |
}, | |
"file_extension": ".py", | |
"mimetype": "text/x-python", | |
"name": "python", | |
"nbconvert_exporter": "python", | |
"pygments_lexer": "ipython3", | |
"version": "3.10.14" | |
} | |
}, | |
"nbformat": 4, | |
"nbformat_minor": 2 | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment