Skip to content

Instantly share code, notes, and snippets.

@redoxate
Created January 5, 2021 18:42
Show Gist options
  • Save redoxate/c5cb93e0d5cf8f67d98eeeb20d949f98 to your computer and use it in GitHub Desktop.
Save redoxate/c5cb93e0d5cf8f67d98eeeb20d949f98 to your computer and use it in GitHub Desktop.
classification_APS.ipynb
Display the source blob
Display the rendered blob
Raw
{
"nbformat": 4,
"nbformat_minor": 0,
"metadata": {
"colab": {
"name": "classification_APS.ipynb",
"provenance": [],
"collapsed_sections": [],
"authorship_tag": "ABX9TyM0qoC0a1/ExAP4qMEjxFil",
"include_colab_link": true
},
"kernelspec": {
"name": "python3",
"display_name": "Python 3"
}
},
"cells": [
{
"cell_type": "markdown",
"metadata": {
"id": "view-in-github",
"colab_type": "text"
},
"source": [
"<a href=\"https://colab.research.google.com/gist/redoxate/c5cb93e0d5cf8f67d98eeeb20d949f98/classification_aps.ipynb\" target=\"_parent\"><img src=\"https://colab.research.google.com/assets/colab-badge.svg\" alt=\"Open In Colab\"/></a>"
]
},
{
"cell_type": "code",
"metadata": {
"id": "vfxAtXT_gbeu"
},
"source": [
"import pandas as pd\n",
"import numpy as np\n",
"from sklearn.impute import SimpleImputer\n",
"from sklearn.metrics import confusion_matrix\n",
"from sklearn.tree import DecisionTreeClassifier\n",
"from sklearn.linear_model import LogisticRegression\n",
"from sklearn.model_selection import train_test_split\n",
"from sklearn.neighbors import KNeighborsClassifier\n"
],
"execution_count": 98,
"outputs": []
},
{
"cell_type": "code",
"metadata": {
"id": "llFVfVvor71S"
},
"source": [
"data = pd.DataFrame(pd.read_csv('aps_failure_training_set.csv', encoding='utf-8'))"
],
"execution_count": 99,
"outputs": []
},
{
"cell_type": "code",
"metadata": {
"id": "v2WOZuo-sO1T"
},
"source": [
"data = data.replace(['na'],np.nan)\n",
"data = data.replace(['neg'],0)\n",
"data = data.replace(['pos'],1)"
],
"execution_count": 100,
"outputs": []
},
{
"cell_type": "code",
"metadata": {
"id": "Ny-keFEE3BEo"
},
"source": [
"imputer = SimpleImputer(missing_values=np.nan, strategy='mean')\n",
"#imputer.fit(data)\n",
"#data = imputer.transform(data)\n",
"data = imputer.fit_transform(data)\n",
"y = data[:,0]\n",
"X = data[:,1:]\n",
"\n",
"X_train, X_test, y_train, y_test = train_test_split( X, y, test_size=0.20, random_state=42)\n",
"\n"
],
"execution_count": 101,
"outputs": []
},
{
"cell_type": "code",
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/",
"height": 0
},
"id": "bVyLgG3A9Pal",
"outputId": "8c44cb16-8414-4bbc-db1c-2173cd3d423e"
},
"source": [
"for n_voisins in [3,4,5,6,7]:\n",
" KN = KNeighborsClassifier(n_neighbors=n_voisins)\n",
" KN.fit(X_train,y_train)\n",
" y_kn_predict= KN.predict(X_test)\n",
"\n",
" tn, fp, fn, tp = confusion_matrix(y_test, y_kn_predict).ravel()\n",
" print('KN avec n_voisins : ', n_voisins , (tn, fp, fn, tp))"
],
"execution_count": 103,
"outputs": [
{
"output_type": "stream",
"text": [
"KN avec n_voisins : 3 (11745, 43, 115, 97)\n",
"KN avec n_voisins : 4 (11764, 24, 141, 71)\n",
"KN avec n_voisins : 5 (11747, 41, 115, 97)\n",
"KN avec n_voisins : 6 (11763, 25, 143, 69)\n",
"KN avec n_voisins : 7 (11750, 38, 118, 94)\n"
],
"name": "stdout"
}
]
},
{
"cell_type": "code",
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/",
"height": 0
},
"id": "t8UYjaMxCSlx",
"outputId": "5d58a69f-1e7f-4ac5-febe-052b54c29613"
},
"source": [
"\n",
"\n",
"clf = DecisionTreeClassifier()\n",
"clf = clf.fit(X_train,y_train)\n",
"y_clf_predict = clf.predict(X_test)\n",
"\n",
"tn, fp, fn, tp = confusion_matrix(y_test, y_clf_predict).ravel()\n",
"print('DecisionTree: ', (tn, fp, fn, tp))\n"
],
"execution_count": 91,
"outputs": [
{
"output_type": "stream",
"text": [
"DecisionTree : (11713, 75, 69, 143)\n"
],
"name": "stdout"
}
]
},
{
"cell_type": "code",
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/",
"height": 0
},
"id": "GHG3s7b9IYfb",
"outputId": "bf51fa5f-bef2-46c1-ef52-926cdafc513f"
},
"source": [
"\n",
"from sklearn.model_selection import GridSearchCV\n",
"\n",
"\n",
"logisticRegr = LogisticRegression()\n",
"param_grid = [ \n",
" {'solver' : ['lbfgs','newton-cg','liblinear','sag','saga'],\n",
" 'penalty' : ['l1', 'l2', 'elasticnet', 'none'],\n",
" 'max_iter' : [100, 1000,2500, 5000]\n",
" }\n",
"]\n",
"logisticRegr\n"
],
"execution_count": 108,
"outputs": [
{
"output_type": "execute_result",
"data": {
"text/plain": [
"LogisticRegression(C=1.0, class_weight=None, dual=False, fit_intercept=True,\n",
" intercept_scaling=1, l1_ratio=None, max_iter=100,\n",
" multi_class='auto', n_jobs=None, penalty='l2',\n",
" random_state=None, solver='lbfgs', tol=0.0001, verbose=0,\n",
" warm_start=False)"
]
},
"metadata": {
"tags": []
},
"execution_count": 108
}
]
},
{
"cell_type": "code",
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/",
"height": 0
},
"id": "38wqorNOkOKQ",
"outputId": "179f681c-bf73-4be6-9e23-65c3e059157a"
},
"source": [
"clf = GridSearchCV(logisticRegr, param_grid = param_grid, cv = 3, verbose=True, n_jobs=-1)\n",
"best_clf = clf.fit(X_train,y_train)\n",
"best_clf.best_estimator_"
],
"execution_count": null,
"outputs": [
{
"output_type": "stream",
"text": [
"Fitting 3 folds for each of 80 candidates, totalling 240 fits\n"
],
"name": "stdout"
},
{
"output_type": "stream",
"text": [
"[Parallel(n_jobs=-1)]: Using backend LokyBackend with 2 concurrent workers.\n",
"[Parallel(n_jobs=-1)]: Done 46 tasks | elapsed: 29.4min\n"
],
"name": "stderr"
}
]
}
]
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment