Created
August 11, 2019 02:45
-
-
Save kohnakagawa/ecc0dfdf3e81c0a69531fb1602101bdf to your computer and use it in GitHub Desktop.
Malware Data Science chapter 8の内容をEmberで使われている特徴量で実施した場合の結果
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
{ | |
"cells": [ | |
{ | |
"cell_type": "code", | |
"execution_count": 4, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"import sys\n", | |
"import os\n", | |
"import glob\n", | |
"import sklearn\n", | |
"import ember\n", | |
"import numpy as np\n", | |
"import yara\n", | |
"import pickle" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 5, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"b_paths = glob.glob(os.path.join(\"data\", \"benignware\", \"*\"))\n", | |
"m_paths = glob.glob(os.path.join(\"data\", \"malware\", \"*\"))\n", | |
"labels = [0 for _ in b_paths] + [1 for _ in m_paths]\n", | |
"data_paths = b_paths + m_paths" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 6, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"rule = yara.compile(source='rule IsPeFile {strings:$mz = \"MZ\"condition:$mz at 0 and uint32(uint32(0x3C)) == 0x4550}')\n", | |
"path_labels = [(p, l) for p, l in zip(data_paths, labels) if rule.match(p)]" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 7, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"fextractor = ember.PEFeatureExtractor()\n", | |
"fvector = np.array([fextractor.feature_vector(bytez=open(p, \"rb\").read()) for p, _ in path_labels])\n", | |
"labels = np.array([l for _, l in path_labels])" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 8, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"with open(\"fvector.pickle\", \"wb\") as fb:\n", | |
" pickle.dump(fvector, fb)" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 9, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"from sklearn.model_selection import train_test_split\n", | |
"X = fvector\n", | |
"y = labels\n", | |
"X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 10, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"name": "stderr", | |
"output_type": "stream", | |
"text": [ | |
"/usr/local/lib/python3.6/dist-packages/sklearn/ensemble/forest.py:245: FutureWarning: The default value of n_estimators will change from 10 in version 0.20 to 100 in 0.22.\n", | |
" \"10 in version 0.20 to 100 in 0.22.\", FutureWarning)\n" | |
] | |
} | |
], | |
"source": [ | |
"from sklearn.ensemble import RandomForestClassifier\n", | |
"from sklearn.metrics import accuracy_score\n", | |
"from sklearn.model_selection import KFold\n", | |
"\n", | |
"clf = RandomForestClassifier()\n", | |
"kf = KFold(n_splits=4)\n", | |
"results = []\n", | |
"for train_idx, test_idx in kf.split(X_train, y_train):\n", | |
" clf.fit(X_train[train_idx], y_train[train_idx])\n", | |
" y_pred = clf.predict(X_train[test_idx])\n", | |
" results.append(accuracy_score(y_train[test_idx], y_pred))" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 11, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"name": "stdout", | |
"output_type": "stream", | |
"text": [ | |
"0.978494623655914\n" | |
] | |
} | |
], | |
"source": [ | |
"y_pred = clf.predict(X_test)\n", | |
"print(accuracy_score(y_test, y_pred))" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 12, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"from sklearn.metrics import roc_curve, auc\n", | |
"y_pred_prob = clf.predict_proba(X_test)[:,1]\n", | |
"fpr, tpr, thresholds = roc_curve(y_test, y_pred_prob)\n", | |
"roc_auc = auc(fpr, tpr)" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 13, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/plain": [ | |
"<Figure size 640x480 with 1 Axes>" | |
] | |
}, | |
"metadata": {}, | |
"output_type": "display_data" | |
} | |
], | |
"source": [ | |
"import matplotlib.pyplot as plt\n", | |
"plt.title('Receiver Operating Characteristic')\n", | |
"plt.plot(fpr, tpr, 'b', label = 'AUC = %0.5f' % roc_auc)\n", | |
"plt.legend(loc = 'lower right')\n", | |
"plt.xlim([0, 0.03])\n", | |
"plt.ylim([0, 1])\n", | |
"plt.ylabel('True Positive Rate')\n", | |
"plt.xlabel('False Positive Rate')\n", | |
"plt.show()" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 26, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/plain": [ | |
"<matplotlib.axes._subplots.AxesSubplot at 0x7f7908ba6668>" | |
] | |
}, | |
"execution_count": 26, | |
"metadata": {}, | |
"output_type": "execute_result" | |
}, | |
{ | |
"data": { | |
"image/png": "iVBORw0KGgoAAAANSUhEUgAAAa8AAAD4CAYAAABbl2n6AAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADh0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uMy4xLjEsIGh0dHA6Ly9tYXRwbG90bGliLm9yZy8QZhcZAAAcZUlEQVR4nO3deZwdVZ3+8c/DviSGTZgAQmtgCFsSSUBZBXH4ubEoYUAWiTpmcEURfi4wGBwcYRB/riwRMSBxxIBIAGURkE2C6YYshEVkERF+IlsIhGASnvnjnuCl6U53ktv3prqf9+uVV1edOlX1PbdDHk5V3Xtlm4iIiCpZpdUFRERELKuEV0REVE7CKyIiKifhFRERlZPwioiIylmt1QUMFBtttJHb2tpaXUZERKV0dHQ8ZfuNndsTXk3S1tZGe3t7q8uIiKgUSX/qqj2XDSMionIy82qSex97mtEnXNjqMiIimqrjjA/3yXEz84qIiMpJeEVEROUkvCIionISXhERUTkJr4iIqJyVIrwktUm6u4v2r0l611L2O0jSdn1bXURErGxWivDqju2Tbf9mKV0OAhoSXpLytoGIiIpYmcJrVUk/lDRH0rWS1pY0SdJYAEmnSbpH0ixJ35S0G3AAcIakGZKGSRolaVrpc5mk9cu+O5e2GZLOWDLLkzRO0lRJNwDXSxok6XpJd0qaLenA0q9N0n2lnj9ImizpXZJuk/SApF1a9JpFRAxIK1N4bQ38wPb2wHPAwUs2SNoQ+ACwve0RwKm2fwdMBU6wPcr2g8CFwBdLn9nAV8shfgz8u+1RwOJO590JGGv7HcAC4AO2dwL2Ac6UpNJvK+BMYHj5cziwB3A88JWuBiRpvKR2Se2L5s9b7hcmIiJea2UKr4dtzyjLHUBb3ba51ILlR5I+CMzvvLOkIcB6tm8qTRcAe0laDxhs+/bS/tNOu15n+5klhwH+S9Is4DfAZsAmdfXNtv0KMAe43raphWQbXbA90fYY22NWW2dwz69ARET0ysoUXi/XLS+m7qOrbC8CdgEuAd4PXN3A875Yt3wE8EZgdJml/RVYq4v6Xqlbf4V8zFZERFOtTOHVLUmDgCG2fwV8HhhZNs0DBgPYngs8K2nPsu0o4CbbzwHzJL2ttB+2lFMNAZ60vVDSPsCWDR5KREQ0QFVmDIOByyWtRe3S3nGl/WfADyV9FhgLHA2cI2kd4CHgI6Xfx0q/V4CbqF2G7Mpk4ApJs4F24L6+GExERKwY1W7b9G+SBtl+oSx/CRhq+9hm1rDuP73Zw486pZmnjIhouRX9VHlJHbbHdG6vysxrRb1P0pepjfdPwLjWlhMREStiQISX7YuBi1tdR0RENEYlHtiIiIiol/CKiIjKGRCXDVcG226+Ie199HXYEREDTWZeERFROQmviIionIRXRERUTsIrIiIqJw9sNMnfn5jDo1/bsdVlRCdbnDy71SVExHLIzCsiIion4RUREZWT8IqIiMpJeEVEROUkvCIionL6NLwktUm6exn6f6WP6thb0m59ceyIiGi+lW3m1SfhBewNdBlekvJ2gYiIimlGeK0mabKkeyVdIum9kn65ZKOkf5F0maTTgLUlzZA0uWw7UtLvS9u5klYt7ftJul3SnZKmSBpU2h+RdEppny1puKQ24Bjg8+U4e0qaJOkcSXcA/y1pA0m/lDRL0jRJI8rxJkj6STnXA5I+XtovlHRQ3RgmSzqwCa9lRETQnPDaBjjL9rbA88D2wHBJbyzbPwKcb/tLwEu2R9k+QtK2wKHA7rZHAYuBIyRtBJwEvMv2TkA7cFzd+Z4q7WcDx9t+BDgH+H/l2LeUfpsDu9k+DjgFuMv2CGqzvwvrjjcCeCewK3CypE2BH1G+jVnSEGqzuqsa8WJFRETPmhFef7Z9W1m+CNgd+AlwpKT1qIXCr7vYb19gNDBd0oyy/hbg7cB2wG2l/Whgy7r9flF+dgBtS6lriu3FZXmPUhO2bwA2lPSGsu1y2y/Zfgq4EdjF9k3A1iWAPwRcantR5xNIGi+pXVL7My8u7rw5IiKWUzPu97iL9R8DVwALqIXI6/7hBwRcYPvLr2mU9geus/2hbs73cvm5mKWP78WeCq+rt6v1C4EjgcOozR5fv6M9EZgIMGKztTsfJyIillMzZl5bSNq1LB8O3Gr7ceBxapf/flzXd6Gk1cvy9cBYSRsDlPtSWwLTgN0lbVXa15X0zz3UMA8YvJTttwBHlOPtTe3S4/Nl24GS1pK0IbUHP6aX9knA5wBs39PD+SMiooGaEV73A5+SdC+wPrV7UQCTqV1SvLeu70RglqTJJRBOAq6VNAu4Dhhq+2/U7jf9T2m/HRjeQw1XAB9Y8sBGF9snAKPL8U6jdilyiVnULhdOA/6zBC+2/wrcy2vDNyIimkB2a65mSfo+tYckftSSAnpB0gTgBdvf7GLbOsBsYCfbc3s61ojN1vaV/75V44uMFZJPlY9YuUnqsD2mc3tL3uclqYPaU3wXteL8K0rSu6jNur7Xm+CKiIjGaskbdG2PbsV5l5XtCd20/4bXPuEYERFNtLJ9wkZERESPEl4REVE5+Vy/Jllj6PZscXJ7q8uIiOgXMvOKiIjKSXhFRETlJLwiIqJyEl4REVE5eWCjSe578j52/97urS4j+tBtn7mt504R0RCZeUVEROUkvCIionISXhERUTkJr4iIqJyEV0REVE7CKyIiKqffhpekz5UvjOxu+3mStmtmTRER0Rj9NryAzwFdhpekVW3/m+17mlxTREQ0QL8IL0nrSrpK0kxJd0v6KrApcKOkG0ufFySdKWkmsKuk30oaU7ft62X/aZI2Ke3DyvpsSadKeqG0D5V0s6QZ5Xx7tmjoEREDUr8IL+DdwOO2R9reAfg28Diwj+19Sp91gTtKn1s77b8uMM32SOBm4OOl/TvAd2zvCDxW1/9w4Brbo4CRwIyuipI0XlK7pPaFLyxswDAjIgL6T3jNBv5F0umS9rQ9t4s+i4FLu9n/78CVZbkDaCvLuwJTyvJP6/pPBz4iaQKwo+15XR3U9kTbY2yPWX3Q6r0eTERELF2/CC/bfwB2ohZip0o6uYtuC2wv7uYQC227LC+mh898tH0zsBfwF2CSpA8vX+UREbE8+kV4SdoUmG/7IuAMakE2Dxi8goeeBhxclg+rO9+WwF9t/xA4r5wvIiKapL98qvyOwBmSXgEWAp+gdsnvakmP1933WlafAy6SdCJwNbDkcuTewAmSFgIvAJl5RUQ0kf5xtSw6K+8Te8m2JR0GfMj2gctzrEFbDPLIE0Y2tsBYqeQrUSIaT1KH7TGd2/vLzKuvjAa+L0nAc8BHW1xPRESQ8Foq27dQexQ+IiJWIv3igY2IiBhYMvNqkuEbD889kYiIBsnMKyIiKifhFRERlZPwioiIykl4RURE5SS8mmTe/fdz017vaHUZERH9QsIrIiIqJ+EVERGVk/CKiIjKSXhFRETlJLwiIqJyEl4REVE5Ca8VJOm3kl73XTMREdF3El49UE1ep4iIlUhl/1GW9B+S7pd0q6T/kXS8pGGSrpbUIekWScNL30mSvivpd5IekjS27jgnSJouaZakU0pbWzn2hcDdwJsknS2pXdKcJf0iIqI1KvmVKJJ2Bg6m9kWRqwN3Ah3AROAY2w9IehtwFvDOsttQYA9gODAVuETSfsDWwC6AgKmS9gIeLe1H255Wznmi7WckrQpcL2mE7Vk91DkeGA+wyZprNmz8EREDXSXDC9gduNz2AmCBpCuAtYDdgCmSlvSrT4xf2n4FuEfSJqVtv/LnrrI+iFpoPQr8aUlwFf9awmg1akG4HbDU8LI9kVqgss3gwV6egUZExOtVNby6sgrwnO1R3Wx/uW5ZdT+/Yfvc+o6S2oAX69bfDBwP7Gz7WUmTqIVlRES0QFXved0G7C9pLUmDgPcD84GHJR0Crz5oMbKH41wDfLQcA0mbSdq4i35voBZmc8us7T2NGkhERCy7Ss68bE+XNJXaZbu/ArOBucARwNmSTqJ2L+xnwMylHOdaSdsCt5dLjS8ARwKLO/WbKeku4D7gz9TCMyIiWkR2NW/FSBpk+wVJ6wA3A+Nt39nqurqzzeDBnvjWnXjHzTe1upSIiMqQ1GH7de+lreTMq5goaTtq954uWJmDKyIiGquy4WX78FbXEBERrVHVBzYiImIAS3hFRETlJLyaZPA22+RhjYiIBkl4RURE5SS8IiKichJeERFROQmvJnnysbl8/wtXtLqMiIh+IeEVERGVk/CKiIjKSXhFRETlJLwiIqJyEl4REVE5Ca+IiKicfhFektok3d0Hx31E0kaNPm5ERKyYfhFeKwtJq7a6hoiIgaA/hdeqkn4oaY6kayWtLWmYpKsldUi6RdJwAEn7S7pD0l2SfiNpk9K+Ydl3jqTzAC05uKQjJf1e0gxJ5y4JKkkvSDpT0kxg11YMPCJioOlP4bU18APb2wPPAQcDE4HP2B4NHA+cVfreCrzd9luBnwH/t7R/Fbi1HOMyYAsASdsChwK72x4FLAaOKPusC9xhe6TtW+sLkjReUruk9hfmz+2TQUdEDESV/SblLjxse0ZZ7gDagN2AKdKrE6g1y8/NgYslDQXWAB4u7XsBHwSwfZWkZ0v7vsBoYHo51trAk2XbYuDSrgqyPZFagLLFP23tFRteREQs0Z/C6+W65cXAJsBzZabU2feAb9meKmlvYEIPxxZwge0vd7Ftge3Fy1FvREQsp/502bCz54GHJR0CoJqRZdsQ4C9l+ei6fW4GDi/93wOsX9qvB8ZK2rhs20DSln1cf0REdKM/hxfU7kt9rDxMMQc4sLRPoHY5sQN4qq7/KcBekuZQu3z4KIDte4CTgGslzQKuA4Y2ZQQREfE6/eKyoe1HgB3q1r9Zt/ndXfS/HLi8i/angf26OcfFwMVdtA9a9oojImJF9PeZV0RE9EMJr4iIqJyEV0REVE7CKyIiKifh1SQbbz6ET5+5f6vLiIjoFxJeERFROQmviIionIRXRERUTsIrIiIqp198wkYVPPHwg3z9yLErdIwTL7qkQdVERFRbZl4REVE5Ca+IiKichFdERFROwisiIion4RUREZUz4MNLUpukw+vWx0j6bitrioiIpRvw4QW0Aa+Gl+12259tXTkREdGTyoeXpHUlXSVppqS7JR0qabSkmyR1SLpG0tDSdytJvyl975Q0DDgN2FPSDEmfl7S3pCtL/w0k/VLSLEnTJI0o7RMknS/pt5IekpSwi4hoov7wJuV3A4/bfh+ApCHAr4EDbf9N0qHA14GPApOB02xfJmktauH9JeB42+8v++9dd+xTgLtsHyTpncCFwKiybTiwDzAYuF/S2bYX1hcmaTwwHmDIOms3fuQREQNUfwiv2cCZkk4HrgSeBXYArpMEsCrwhKTBwGa2LwOwvQCg9OnOHsDBpf8NkjaU9Iay7SrbLwMvS3oS2AR4rH5n2xOBiQCbbbi+GzDWiIigH4SX7T9I2gl4L3AqcAMwx/au9f1KeDXSy3XLi+kHr2VERFX0h3temwLzbV8EnAG8DXijpF3L9tUlbW97HvCYpINK+5qS1gHmUbv015VbgCNK/72Bp2w/36cDioiIHvWH2cKOwBmSXgEWAp8AFgHfLfe/VgO+DcwBjgLOlfS10vcQYBawWNJMYBJwV92xJwDnS5oFzAeObsaAIiJi6WTnVkwzbLbh+v7ke/ZdoWPkU+UjYqCR1GF7TOf2yl82jIiIgSfhFRERlZPwioiIykl4RURE5fSHpw0rYeibh+WBi4iIBsnMKyIiKifhFRERlZPwioiIykl4RURE5eSBjSZZ8MQ87v36DU0737YnvrNp54qIaLbMvCIionISXhERUTkJr4iIqJyEV0REVE7CKyIiKqey4SXpd00+X5ukw5t5zoiI6Fplw8v2bs06l6TVgDYg4RURsRKo7Pu8JL1ge5CkvYFTgOeAHYGfA7OBY4G1gYNsPyhpErAAGAO8ATjO9pWS1gLOLu2LSvuNksYBHwQGAasCawLbSpoBXABcC/wYWIPa/wQcbPuBZow9ImKgq2x4dTIS2BZ4BngIOM/2LpKOBT4DfK70awN2AYYBN0raCvgUYNs7ShoOXCvpn0v/nYARtp8pIXm87fcDSPoe8B3bkyWtQS3gXkPSeGA8wNAhG/fBsCMiBqbKXjbsZLrtJ2y/DDxIbVYEtRlYW12/n9t+pcyQHgKGA3sAFwHYvg/4E7AkvK6z/Uw357wd+IqkLwJb2n6pcwfbE22PsT1mg3XXW7ERRkTEq/pLeL1ct/xK3forvHZ26U77dV7v7MXuNtj+KXAA8BLwK0n5PKaIiCbpL+HVW4dIWkXSMOAtwP3ALcARAOVy4RalvbN5wOAlK5LeAjxk+7vA5cCIPq49IiKK/nLPq7ceBX5P7YGNY2wvkHQWcLak2dQe2Bhn+2VJnfedBSyWNBOYRO0BjqMkLQT+P/BfTRpDRMSAJ7unK2f9Q3na8Erbl7Ti/Dtsto2nfPLspp0vnyofEf2BpA7bYzq3D7TLhhER0Q8MmMuGtse1uoaIiGiMzLwiIqJyEl4REVE5A+ayYautNXRwHqKIiGiQzLwiIqJyEl4REVE5Ca+IiKichFdERFROHthokscff5wJEya8pq3zekRE9E5mXhERUTkJr4iIqJyEV0REVE7CKyIiKifhFRERlZPw6iVJ4yRt2uo6IiIi4dUrklYFxgEJr4iIlUClw0vSkZJ+L2mGpHMlbSnpAUkbSVpF0i2S9pPUJuk+SZMl3SvpEknrlGPsK+kuSbMlnS9pzdL+iKTTJd0JfAgYA0wu51pb0mmS7pE0S9I3W/gyREQMOJUNL0nbAocCu9seBSwG3gGcDpwNfAG4x/a1ZZdtgLNsbws8D3xS0lrAJOBQ2ztSe9P2J+pO87TtnWxfBLQDR5RzrQN8ANje9gjg1L4dbURE1KtseAH7AqOB6ZJmlPW32D4PeANwDHB8Xf8/276tLF8E7EEt0B62/YfSfgGwV90+F3dz7rnAAuBHkj4IzO+qk6Txktoltc+f32WXiIhYDlX+eCgBF9j+8msaa5cDNy+rg4B5Zdmd9u+83pUXu2q0vUjSLtQCcyzwaeB1X9ZleyIwEWDTTTftzfkiIqIXqjzzuh4YK2ljAEkbSNqS2mXDycDJwA/r+m8hadeyfDhwK3A/0CZpq9J+FHBTN+ebBwwu5xoEDLH9K+DzwMiGjSoiInpU2ZmX7XsknQRcK2kVYCFwHLAztftgiyUdLOkjwI3UgupTks4H7gHOtr2gbJ8iaTVgOnBON6ecBJwj6SXgPcDl5Z6ZynkjIqJJKhteALYv5vX3pd5et/2DAJLagEW2j+ziGNcDb+2iva3T+qXApXVNuyxn2RERsYKqfNkwIiIGqErPvHrL9iPADq2uIyIiGiMzr4iIqJyEV0REVI7svP2oGcaMGeP29vZWlxERUSmSOmyP6dyemVdERFROwisiIion4RUREZWT8IqIiMoZEO/zWhk8++y9/HzKin8ox78e8vsGVBMRUW2ZeUVEROUkvCIionISXhERUTkJr4iIqJyEV0REVE7CKyIiKmeZw0vSBEnHL2X7QZK2W47jvlB+birpkmXdv8G1HCPpw42oISIiGq8vZl4HAcscGEvYftz22M7tkpbnPWnLXIuk1WyfY/vC5ThfREQ0Qa/CS9KJkv4g6VZgm9L2cUnTJc2UdKmkdSTtBhwAnCFphqRhXfUr+79Z0u2SZks6te5cbZLuLsvjJE2VdANwfWk7oRxvlqRT6vb7cGmbKekn3dQyStK00u8ySeuXfX8r6duS2oFj62eXZb+rJXVIukXS8NJ+iKS7y/luXsHfQ0RELIMew0vSaOAwYBTwXmDnsukXtne2PRK4F/iY7d8BU4ETbI+y/WBX/cr+3wHOtr0j8MRSStgJGGv7HZL2A7YGdin1jJa0l6TtgZOAd5bzHNtNLRcCX7Q9ApgNfLXuPGvYHmP7zE7nnwh8xvZo4HjgrNJ+MvB/yvkO6Oa1Gy+pXVL7888vWsoQIyJiWfTmUtyewGW25wNImlradygzpvWAQcA13ezfXb/dgYPL8k+A07vZ/zrbz5Tl/cqfu8r6IGphNhKYYvspgLr+r5I0BFjP9k2l6QJgSl2Xi7vYZxCwGzBF0pLmNcvP24BJkn4O/KKrwm1PpBZ+DBu2br44LSKiQVbksw0nAQfZnilpHLD3cvTrzT/oL9YtC/iG7XPrO0j6TK8q7v15llgFeM72qM4bbB8j6W3A+4AOSaNtP92AOiIioge9ued1M3CQpLUlDQb2L+2DgSckrQ4cUdd/XtlGD/1uo3Y5kk7tS3MN8NEyI0LSZpI2Bm4ADpG0YWnfoHMttucCz0ras2w7CriJpbD9PPCwpEPKcSVpZFkeZvsO2ycDfwPe1MsxRETECuoxvGzfSe2S2kzg18D0suk/gDuohdB9dbv8DDhB0l2Shi2l37HApyTNBjbrTbG2rwV+Ctxe9rsEGGx7DvB14CZJM4FvdVPL0dQe4JhF7Z7Z13px2iOAj5XjzgEOLO1nlIdN7gZ+R+31iYiIJpCdWzHNMGzYuv7Gaduv8HHylSgRMZBI6rA9pnN7PmEjIiIqJ+EVERGVk/CKiIjKWZFH5WMZrL/+trlfFRHRIJl5RURE5SS8IiKicvKofJNImgfc3+o6mmwj4KlWF9ECA3HcGfPA0exxb2n7jZ0bc8+ree7v6r0K/Zmk9oE2ZhiY486YB46VZdy5bBgREZWT8IqIiMpJeDXPxFYX0AIDccwwMMedMQ8cK8W488BGRERUTmZeERFROQmviIionIRXg0l6t6T7Jf1R0pe62L6mpIvL9jsktTW/ysbqxZj3knSnpEWSxraixkbrxZiPk3SPpFmSrpe0ZSvqbLRejPuY8j13MyTdKmm7VtTZSD2Nua7fwZIsqeWPka+oXvyex0n6W/k9z5D0b00v0nb+NOgPsCrwIPAWYA1qX1C5Xac+nwTOKcuHARe3uu4mjLkNGAFcCIxtdc1NGvM+wDpl+RNV/z0vw7jfULd8AHB1q+vu6zGXfoOpfev8NGBMq+tuwu95HPD9VtaZmVdj7QL80fZDtv9O7ZucD+zU50DggrJ8CbCvJDWxxkbrccy2H7E9C3ilFQX2gd6M+Ubb88vqNGDzJtfYF3oz7ufrVtcFqv5EWG/+mwb4T+B0YEEzi+sjvR1zSyW8Gmsz4M9164+Vti772F4EzAU2bEp1faM3Y+5vlnXMHwN+3acVNUevxi3pU5IeBP4b+GyTausrPY5Z0k7Am2xf1czC+lBv/34fXC6LXyLpTc0p7R8SXhF9SNKRwBjgjFbX0iy2f2B7GPBF4KRW19OXJK0CfAv4QqtrabIrgDbbI4Dr+MfVpKZJeDXWX4D6/wPZvLR12UfSasAQ4OmmVNc3ejPm/qZXY5b0LuBE4ADbLzeptr60rL/rnwEH9WlFfa+nMQ8GdgB+K+kR4O3A1Io/tNHj79n203V/p88DRjeptlclvBprOrC1pDdLWoPaAxlTO/WZChxdlscCN7jcAa2o3oy5v+lxzJLeCpxLLbiebEGNfaE34966bvV9wANNrK8vLHXMtufa3sh2m+02avc3D7Dd3ppyG6I3v+ehdasHAPc2sT4gnyrfULYXSfo0cA21J3bOtz1H0teAdttTgR8BP5H0R+AZan8xKqs3Y5a0M3AZsD6wv6RTbG/fwrJXSC9/z2cAg4Ap5XmcR20f0LKiG6CX4/50mXEuBJ7lH/+jVkm9HHO/0ssxf1bSAcAiav+OjWt2nfl4qIiIqJxcNoyIiMpJeEVEROUkvCIionISXhERUTkJr4iIqJyEV0REVE7CKyIiKud/Af95FsLTkTi5AAAAAElFTkSuQmCC\n", | |
"text/plain": [ | |
"<Figure size 432x288 with 1 Axes>" | |
] | |
}, | |
"metadata": { | |
"needs_background": "light" | |
}, | |
"output_type": "display_data" | |
} | |
], | |
"source": [ | |
"import seaborn as sns\n", | |
"fti = clf.feature_importances_ \n", | |
"idx = 0\n", | |
"feature_dim_names = []\n", | |
"for f in fextractor.features:\n", | |
" feature_dim_names.append((f.name, idx, idx + f.dim))\n", | |
" idx += f.dim\n", | |
"\n", | |
"importances = []\n", | |
"for name, idx_beg, idx_end in feature_dim_names:\n", | |
" importance_sum = np.sum(fti[idx_beg:idx_end])\n", | |
" importances.append(importance_sum)\n", | |
"\n", | |
"names = [f.name for f in fextractor.features] \n", | |
"sns.barplot(x=importances, y=names)" | |
] | |
} | |
], | |
"metadata": { | |
"kernelspec": { | |
"display_name": "Python 3", | |
"language": "python", | |
"name": "python3" | |
}, | |
"language_info": { | |
"codemirror_mode": { | |
"name": "ipython", | |
"version": 3 | |
}, | |
"file_extension": ".py", | |
"mimetype": "text/x-python", | |
"name": "python", | |
"nbconvert_exporter": "python", | |
"pygments_lexer": "ipython3", | |
"version": "3.6.8" | |
} | |
}, | |
"nbformat": 4, | |
"nbformat_minor": 2 | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
AUCを比較すると、Emberのほうが優れていた。
Malware Data Scienceのモデルだと AUC が0.9951
Emberだと AUC が0.9972