ashishpatel26 · March 18, 2020 05:59
diff --git a/Stacking.ipynb b/Stacking.ipynb
 {
  "nbformat": 4,
  "nbformat_minor": 0,
  "metadata": {
    "colab": {
      "name": "Stacking.ipynb",
      "provenance": [],
      "authorship_tag": "ABX9TyP6uxGd1ww4vJjfbwztW4cP",
      "include_colab_link": true
    },
    "kernelspec": {
      "name": "python3",
      "display_name": "Python 3"
    }
  },
  "cells": [
    {
      "cell_type": "markdown",
      "metadata": {
        "id": "view-in-github",
        "colab_type": "text"
      },
      "source": [
        "<a href=\"https://colab.research.google.com/github/ashishpatel26/Ensemble-Learning-Algorithm-Medium/blob/master/Stacking.ipynb\" target=\"_parent\"><img src=\"https://colab.research.google.com/assets/colab-badge.svg\" alt=\"Open In Colab\"/></a>"
      ]
    },
    {
      "cell_type": "markdown",
      "metadata": {
        "id": "COhWmSXqImMq",
        "colab_type": "text"
      },
      "source": [
        "#Import\tIRIS\tdataset\tfrom\tsklearn"
      ]
    },
    {
      "cell_type": "code",
      "metadata": {
        "id": "LmAaNZs-In8M",
        "colab_type": "code",
        "colab": {}
      },
      "source": [
        "from\tsklearn\timport\tdatasets"
      ],
      "execution_count": 0,
      "outputs": []
    },
    {
      "cell_type": "markdown",
      "metadata": {
        "id": "ZG42X93cIqS-",
        "colab_type": "text"
      },
      "source": [
        "#Impoert\tRandom\tforest\tLogistic\tregression,\tnaive\tbayes\tand\tknn\tclassifier\tclasses\tfor\tcreating\tstacking"
      ]
    },
    {
      "cell_type": "code",
      "metadata": {
        "id": "gfrzPwfZIpD1",
        "colab_type": "code",
        "colab": {}
      },
      "source": [
        "from sklearn.ensemble\timport RandomForestClassifier\n",
        "from sklearn.linear_model\timport LogisticRegression\n",
        "from sklearn.naive_bayes\timport GaussianNB\n",
        "from sklearn.neighbors import KNeighborsClassifier"
      ],
      "execution_count": 0,
      "outputs": []
    },
    {
      "cell_type": "markdown",
      "metadata": {
        "id": "9QJhDmDaJYE7",
        "colab_type": "text"
      },
      "source": [
        "# Import numpy for array based operations"
      ]
    },
    {
      "cell_type": "code",
      "metadata": {
        "id": "puO_NK5LJWvT",
        "colab_type": "code",
        "colab": {}
      },
      "source": [
        "import numpy as np"
      ],
      "execution_count": 0,
      "outputs": []
    },
    {
      "cell_type": "markdown",
      "metadata": {
        "id": "gUjuiqWTJe2L",
        "colab_type": "text"
      },
      "source": [
        "# Load the dataset"
      ]
    },
    {
      "cell_type": "code",
      "metadata": {
        "id": "J5FFTMLiJdRK",
        "colab_type": "code",
        "colab": {}
      },
      "source": [
        "iris = datasets.load_iris()"
      ],
      "execution_count": 0,
      "outputs": []
    },
    {
      "cell_type": "markdown",
      "metadata": {
        "id": "cTcZm1U2Jmfb",
        "colab_type": "text"
      },
      "source": [
        "# Extract data and target out of dataset"
      ]
    },
    {
      "cell_type": "code",
      "metadata": {
        "id": "RuxlCFJrJlGx",
        "colab_type": "code",
        "colab": {}
      },
      "source": [
        "X,y = iris.data[:,1:3], iris.target"
      ],
      "execution_count": 0,
      "outputs": []
    },
    {
      "cell_type": "markdown",
      "metadata": {
        "id": "Z5CHi94aJ8rw",
        "colab_type": "text"
      },
      "source": [
        "# We will define a method to calculate accuracy of prericted output with known lables"
      ]
    },
    {
      "cell_type": "code",
      "metadata": {
        "id": "a30zMP9XJyiR",
        "colab_type": "code",
        "colab": {}
      },
      "source": [
        "def CalculateAccuracy(y_test, pred_label):\n",
        "  nnz = np.shape(y_test)[0] - np.count_nonzero(pred_label - y_test)\n",
        "  acc = 100*nnz / float(np.shape(y_test)[0])\n",
        "  return acc"
      ],
      "execution_count": 0,
      "outputs": []
    },
    {
      "cell_type": "markdown",
      "metadata": {
        "id": "T_ej07aBKYPo",
        "colab_type": "text"
      },
      "source": [
        "#Create\ta\tKNN\tclassifier\twith\t2\tnearest\tneighbors"
      ]
    },
    {
      "cell_type": "code",
      "metadata": {
        "id": "dbbkJNMRJ5Oo",
        "colab_type": "code",
        "colab": {}
      },
      "source": [
        "clf1\t=\tKNeighborsClassifier(n_neighbors=2)"
      ],
      "execution_count": 0,
      "outputs": []
    },
    {
      "cell_type": "markdown",
      "metadata": {
        "id": "vIRqPjz1KcFs",
        "colab_type": "text"
      },
      "source": [
        "#We\twill\tcreate\ta\trandom\tforest\tclassifier\twith\t2\tdecision\ttrees"
      ]
    },
    {
      "cell_type": "code",
      "metadata": {
        "id": "ib8tYBIfKZuG",
        "colab_type": "code",
        "colab": {}
      },
      "source": [
        "clf2\t=\tRandomForestClassifier(n_estimators\t=\t2,random_state=1)"
      ],
      "execution_count": 0,
      "outputs": []
    },
    {
      "cell_type": "markdown",
      "metadata": {
        "id": "cLXFrxIvKgMz",
        "colab_type": "text"
      },
      "source": [
        "#Create\ta\tNaive\tbayes\tclassifier"
      ]
    },
    {
      "cell_type": "code",
      "metadata": {
        "id": "dnC5KuMaKd0u",
        "colab_type": "code",
        "colab": {}
      },
      "source": [
        "clf3\t=\tGaussianNB()"
      ],
      "execution_count": 0,
      "outputs": []
    },
    {
      "cell_type": "markdown",
      "metadata": {
        "id": "7cxS0kTwKk2f",
        "colab_type": "text"
      },
      "source": [
        "#Finally\tcreate\ta\tlogistic\tregression\tclassifier\tto\tcombine\tprediction\tfrom\tabove\tclassifiers."
      ]
    },
    {
      "cell_type": "code",
      "metadata": {
        "id": "hRhzOv5lKhqW",
        "colab_type": "code",
        "colab": {}
      },
      "source": [
        "lr\t=\tLogisticRegression()"
      ],
      "execution_count": 0,
      "outputs": []
    },
    {
      "cell_type": "markdown",
      "metadata": {
        "id": "2tedDwXAKrit",
        "colab_type": "text"
      },
      "source": [
        "#Now\twe\twill\tTrain\tall\tfirst\tlevel\tclassifiers"
      ]
    },
    {
      "cell_type": "code",
      "metadata": {
        "id": "4s4fzZhiKmr1",
        "colab_type": "code",
        "outputId": "80486c43-76f8-4168-f895-3c7eb8a51a0a",
        "colab": {
          "base_uri": "https://localhost:8080/",
          "height": 34
        }
      },
      "source": [
        "clf1.fit(X,\ty) \n",
        "clf2.fit(X,\ty) \n",
        "clf3.fit(X,\ty)"
      ],
      "execution_count": 11,
      "outputs": [
        {
          "output_type": "execute_result",
          "data": {
            "text/plain": [
              "GaussianNB(priors=None, var_smoothing=1e-09)"
            ]
          },
          "metadata": {
            "tags": []
          },
          "execution_count": 11
        }
      ]
    },
    {
      "cell_type": "markdown",
      "metadata": {
        "id": "8EhDjAnnKx1z",
        "colab_type": "text"
      },
      "source": [
        "#Predict\tthe\tlabels\tfor\tinput\tdata\tby\tall\tthe\tclassifier;\tprint\ttheir\taccuracy\tand\tstore\tthe\tprediction\tinto\tan\tarray\t(f1,f2,f3)"
      ]
    },
    {
      "cell_type": "code",
      "metadata": {
        "id": "aOF0lKoFKs_F",
        "colab_type": "code",
        "outputId": "c9bcebee-2c3f-4fa3-c4e0-6d663c540f49",
        "colab": {
          "base_uri": "https://localhost:8080/",
          "height": 34
        }
      },
      "source": [
        "f1 = clf1.predict(X) \n",
        "acc1 = CalculateAccuracy(y,\tf1) \n",
        "print(\"accuracy from KNN: \"+str(acc1))"
      ],
      "execution_count": 12,
      "outputs": [
        {
          "output_type": "stream",
          "text": [
            "accuracy from KNN: 96.66666666666667\n"
          ],
          "name": "stdout"
        }
      ]
    },
    {
      "cell_type": "code",
      "metadata": {
        "id": "F_JMQ-Z2K8Zc",
        "colab_type": "code",
        "outputId": "4a6b046e-fda7-4fb0-dd53-0881e4395d6f",
        "colab": {
          "base_uri": "https://localhost:8080/",
          "height": 34
        }
      },
      "source": [
        "f2 = clf2.predict(X) \n",
        "acc2 = CalculateAccuracy(y,\tf2) \n",
        "print(\"accuracy from Random Forest: \"+str(acc2))"
      ],
      "execution_count": 13,
      "outputs": [
        {
          "output_type": "stream",
          "text": [
            "accuracy from Random Forest: 94.66666666666667\n"
          ],
          "name": "stdout"
        }
      ]
    },
    {
      "cell_type": "code",
      "metadata": {
        "id": "EmBxIl3RLKfb",
        "colab_type": "code",
        "outputId": "29af354b-159a-48d1-c8c3-7eb4929a3f47",
        "colab": {
          "base_uri": "https://localhost:8080/",
          "height": 34
        }
      },
      "source": [
        "f3 = clf3.predict(X) \n",
        "acc3 = CalculateAccuracy(y,\tf3) \n",
        "print(\"accuracy from Naive Bayes: \"+str(acc3))"
      ],
      "execution_count": 14,
      "outputs": [
        {
          "output_type": "stream",
          "text": [
            "accuracy from Naive Bayes: 92.0\n"
          ],
          "name": "stdout"
        }
      ]
    },
    {
      "cell_type": "markdown",
      "metadata": {
        "id": "1xHG9NGDLatn",
        "colab_type": "text"
      },
      "source": [
        "#Combine\tthe\tpredictions\tinto\ta\tsingle\tarray\tand\ttranspose\tthe\tarray\tto\tmatch\tinput\tshape\tof\tor\tclassifier. "
      ]
    },
    {
      "cell_type": "code",
      "metadata": {
        "id": "Aa8W_r6ULPTk",
        "colab_type": "code",
        "colab": {}
      },
      "source": [
        "f\t=\t[f1,f2,f3] \n",
        "f\t= np.transpose(f)"
      ],
      "execution_count": 0,
      "outputs": []
    },
    {
      "cell_type": "markdown",
      "metadata": {
        "id": "8Lf29p2-LiEO",
        "colab_type": "text"
      },
      "source": [
        "#Now train the classifier"
      ]
    },
    {
      "cell_type": "code",
      "metadata": {
        "id": "YWaO4ygrLeK6",
        "colab_type": "code",
        "colab": {}
      },
      "source": [
        "lr.fit(f,\ty) \n",
        "final\t=\tlr.predict(f)"
      ],
      "execution_count": 0,
      "outputs": []
    },
    {
      "cell_type": "markdown",
      "metadata": {
        "id": "Ep5EeixqLn1Z",
        "colab_type": "text"
      },
      "source": [
        "#Calculate\tand\tprint\tthe\taccuracy\tof\tfinal\tclassifier."
      ]
    },
    {
      "cell_type": "code",
      "metadata": {
        "id": "2gbjrT4VLlCa",
        "colab_type": "code",
        "outputId": "086bdc20-1f87-49e0-b072-fb9c86334433",
        "colab": {
          "base_uri": "https://localhost:8080/",
          "height": 34
        }
      },
      "source": [
        "acc4 = CalculateAccuracy(y,\tfinal) \n",
        "print(\"accuracy from Stacking:\"+str(acc4))"
      ],
      "execution_count": 17,
      "outputs": [
        {
          "output_type": "stream",
          "text": [
            "accuracy from Stacking:97.33333333333333\n"
          ],
          "name": "stdout"
        }
      ]
    }
  ]
 }
	{
	"nbformat": 4,
	"nbformat_minor": 0,
	"metadata": {
	"colab": {
	"name": "Stacking.ipynb",
	"provenance": [],
	"authorship_tag": "ABX9TyP6uxGd1ww4vJjfbwztW4cP",
	"include_colab_link": true
	},
	"kernelspec": {
	"name": "python3",
	"display_name": "Python 3"
	}
	},
	"cells": [
	{
	"cell_type": "markdown",
	"metadata": {
	"id": "view-in-github",
	"colab_type": "text"
	},
	"source": [
	"<a href=\"https://colab.research.google.com/github/ashishpatel26/Ensemble-Learning-Algorithm-Medium/blob/master/Stacking.ipynb\" target=\"_parent\"><img src=\"https://colab.research.google.com/assets/colab-badge.svg\" alt=\"Open In Colab\"/></a>"
	]
	},
	{
	"cell_type": "markdown",
	"metadata": {
	"id": "COhWmSXqImMq",
	"colab_type": "text"
	},
	"source": [
	"#Import\tIRIS\tdataset\tfrom\tsklearn"
	]
	},
	{
	"cell_type": "code",
	"metadata": {
	"id": "LmAaNZs-In8M",
	"colab_type": "code",
	"colab": {}
	},
	"source": [
	"from\tsklearn\timport\tdatasets"
	],
	"execution_count": 0,
	"outputs": []
	},
	{
	"cell_type": "markdown",
	"metadata": {
	"id": "ZG42X93cIqS-",
	"colab_type": "text"
	},
	"source": [
	"#Impoert\tRandom\tforest\tLogistic\tregression,\tnaive\tbayes\tand\tknn\tclassifier\tclasses\tfor\tcreating\tstacking"
	]
	},
	{
	"cell_type": "code",
	"metadata": {
	"id": "gfrzPwfZIpD1",
	"colab_type": "code",
	"colab": {}
	},
	"source": [
	"from sklearn.ensemble\timport RandomForestClassifier\n",
	"from sklearn.linear_model\timport LogisticRegression\n",
	"from sklearn.naive_bayes\timport GaussianNB\n",
	"from sklearn.neighbors import KNeighborsClassifier"
	],
	"execution_count": 0,
	"outputs": []
	},
	{
	"cell_type": "markdown",
	"metadata": {
	"id": "9QJhDmDaJYE7",
	"colab_type": "text"
	},
	"source": [
	"# Import numpy for array based operations"
	]
	},
	{
	"cell_type": "code",
	"metadata": {
	"id": "puO_NK5LJWvT",
	"colab_type": "code",
	"colab": {}
	},
	"source": [
	"import numpy as np"
	],
	"execution_count": 0,
	"outputs": []
	},
	{
	"cell_type": "markdown",
	"metadata": {
	"id": "gUjuiqWTJe2L",
	"colab_type": "text"
	},
	"source": [
	"# Load the dataset"
	]
	},
	{
	"cell_type": "code",
	"metadata": {
	"id": "J5FFTMLiJdRK",
	"colab_type": "code",
	"colab": {}
	},
	"source": [
	"iris = datasets.load_iris()"
	],
	"execution_count": 0,
	"outputs": []
	},
	{
	"cell_type": "markdown",
	"metadata": {
	"id": "cTcZm1U2Jmfb",
	"colab_type": "text"
	},
	"source": [
	"# Extract data and target out of dataset"
	]
	},
	{
	"cell_type": "code",
	"metadata": {
	"id": "RuxlCFJrJlGx",
	"colab_type": "code",
	"colab": {}
	},
	"source": [
	"X,y = iris.data[:,1:3], iris.target"
	],
	"execution_count": 0,
	"outputs": []
	},
	{
	"cell_type": "markdown",
	"metadata": {
	"id": "Z5CHi94aJ8rw",
	"colab_type": "text"
	},
	"source": [
	"# We will define a method to calculate accuracy of prericted output with known lables"
	]
	},
	{
	"cell_type": "code",
	"metadata": {
	"id": "a30zMP9XJyiR",
	"colab_type": "code",
	"colab": {}
	},
	"source": [
	"def CalculateAccuracy(y_test, pred_label):\n",
	" nnz = np.shape(y_test)[0] - np.count_nonzero(pred_label - y_test)\n",
	" acc = 100*nnz / float(np.shape(y_test)[0])\n",
	" return acc"
	],
	"execution_count": 0,
	"outputs": []
	},
	{
	"cell_type": "markdown",
	"metadata": {
	"id": "T_ej07aBKYPo",
	"colab_type": "text"
	},
	"source": [
	"#Create\ta\tKNN\tclassifier\twith\t2\tnearest\tneighbors"
	]
	},
	{
	"cell_type": "code",
	"metadata": {
	"id": "dbbkJNMRJ5Oo",
	"colab_type": "code",
	"colab": {}
	},
	"source": [
	"clf1\t=\tKNeighborsClassifier(n_neighbors=2)"
	],
	"execution_count": 0,
	"outputs": []
	},
	{
	"cell_type": "markdown",
	"metadata": {
	"id": "vIRqPjz1KcFs",
	"colab_type": "text"
	},
	"source": [
	"#We\twill\tcreate\ta\trandom\tforest\tclassifier\twith\t2\tdecision\ttrees"
	]
	},
	{
	"cell_type": "code",
	"metadata": {
	"id": "ib8tYBIfKZuG",
	"colab_type": "code",
	"colab": {}
	},
	"source": [
	"clf2\t=\tRandomForestClassifier(n_estimators\t=\t2,random_state=1)"
	],
	"execution_count": 0,
	"outputs": []
	},
	{
	"cell_type": "markdown",
	"metadata": {
	"id": "cLXFrxIvKgMz",
	"colab_type": "text"
	},
	"source": [
	"#Create\ta\tNaive\tbayes\tclassifier"
	]
	},
	{
	"cell_type": "code",
	"metadata": {
	"id": "dnC5KuMaKd0u",
	"colab_type": "code",
	"colab": {}
	},
	"source": [
	"clf3\t=\tGaussianNB()"
	],
	"execution_count": 0,
	"outputs": []
	},
	{
	"cell_type": "markdown",
	"metadata": {
	"id": "7cxS0kTwKk2f",
	"colab_type": "text"
	},
	"source": [
	"#Finally\tcreate\ta\tlogistic\tregression\tclassifier\tto\tcombine\tprediction\tfrom\tabove\tclassifiers."
	]
	},
	{
	"cell_type": "code",
	"metadata": {
	"id": "hRhzOv5lKhqW",
	"colab_type": "code",
	"colab": {}
	},
	"source": [
	"lr\t=\tLogisticRegression()"
	],
	"execution_count": 0,
	"outputs": []
	},
	{
	"cell_type": "markdown",
	"metadata": {
	"id": "2tedDwXAKrit",
	"colab_type": "text"
	},
	"source": [
	"#Now\twe\twill\tTrain\tall\tfirst\tlevel\tclassifiers"
	]
	},
	{
	"cell_type": "code",
	"metadata": {
	"id": "4s4fzZhiKmr1",
	"colab_type": "code",
	"outputId": "80486c43-76f8-4168-f895-3c7eb8a51a0a",
	"colab": {
	"base_uri": "https://localhost:8080/",
	"height": 34
	}
	},
	"source": [
	"clf1.fit(X,\ty) \n",
	"clf2.fit(X,\ty) \n",
	"clf3.fit(X,\ty)"
	],
	"execution_count": 11,
	"outputs": [
	{
	"output_type": "execute_result",
	"data": {
	"text/plain": [
	"GaussianNB(priors=None, var_smoothing=1e-09)"
	]
	},
	"metadata": {
	"tags": []
	},
	"execution_count": 11
	}
	]
	},
	{
	"cell_type": "markdown",
	"metadata": {
	"id": "8EhDjAnnKx1z",
	"colab_type": "text"
	},
	"source": [
	"#Predict\tthe\tlabels\tfor\tinput\tdata\tby\tall\tthe\tclassifier;\tprint\ttheir\taccuracy\tand\tstore\tthe\tprediction\tinto\tan\tarray\t(f1,f2,f3)"
	]
	},
	{
	"cell_type": "code",
	"metadata": {
	"id": "aOF0lKoFKs_F",
	"colab_type": "code",
	"outputId": "c9bcebee-2c3f-4fa3-c4e0-6d663c540f49",
	"colab": {
	"base_uri": "https://localhost:8080/",
	"height": 34
	}
	},
	"source": [
	"f1 = clf1.predict(X) \n",
	"acc1 = CalculateAccuracy(y,\tf1) \n",
	"print(\"accuracy from KNN: \"+str(acc1))"
	],
	"execution_count": 12,
	"outputs": [
	{
	"output_type": "stream",
	"text": [
	"accuracy from KNN: 96.66666666666667\n"
	],
	"name": "stdout"
	}
	]
	},
	{
	"cell_type": "code",
	"metadata": {
	"id": "F_JMQ-Z2K8Zc",
	"colab_type": "code",
	"outputId": "4a6b046e-fda7-4fb0-dd53-0881e4395d6f",
	"colab": {
	"base_uri": "https://localhost:8080/",
	"height": 34
	}
	},
	"source": [
	"f2 = clf2.predict(X) \n",
	"acc2 = CalculateAccuracy(y,\tf2) \n",
	"print(\"accuracy from Random Forest: \"+str(acc2))"
	],
	"execution_count": 13,
	"outputs": [
	{
	"output_type": "stream",
	"text": [
	"accuracy from Random Forest: 94.66666666666667\n"
	],
	"name": "stdout"
	}
	]
	},
	{
	"cell_type": "code",
	"metadata": {
	"id": "EmBxIl3RLKfb",
	"colab_type": "code",
	"outputId": "29af354b-159a-48d1-c8c3-7eb4929a3f47",
	"colab": {
	"base_uri": "https://localhost:8080/",
	"height": 34
	}
	},
	"source": [
	"f3 = clf3.predict(X) \n",
	"acc3 = CalculateAccuracy(y,\tf3) \n",
	"print(\"accuracy from Naive Bayes: \"+str(acc3))"
	],
	"execution_count": 14,
	"outputs": [
	{
	"output_type": "stream",
	"text": [
	"accuracy from Naive Bayes: 92.0\n"
	],
	"name": "stdout"
	}
	]
	},
	{
	"cell_type": "markdown",
	"metadata": {
	"id": "1xHG9NGDLatn",
	"colab_type": "text"
	},
	"source": [
	"#Combine\tthe\tpredictions\tinto\ta\tsingle\tarray\tand\ttranspose\tthe\tarray\tto\tmatch\tinput\tshape\tof\tor\tclassifier. "
	]
	},
	{
	"cell_type": "code",
	"metadata": {
	"id": "Aa8W_r6ULPTk",
	"colab_type": "code",
	"colab": {}
	},
	"source": [
	"f\t=\t[f1,f2,f3] \n",
	"f\t= np.transpose(f)"
	],
	"execution_count": 0,
	"outputs": []
	},
	{
	"cell_type": "markdown",
	"metadata": {
	"id": "8Lf29p2-LiEO",
	"colab_type": "text"
	},
	"source": [
	"#Now train the classifier"
	]
	},
	{
	"cell_type": "code",
	"metadata": {
	"id": "YWaO4ygrLeK6",
	"colab_type": "code",
	"colab": {}
	},
	"source": [
	"lr.fit(f,\ty) \n",
	"final\t=\tlr.predict(f)"
	],
	"execution_count": 0,
	"outputs": []
	},
	{
	"cell_type": "markdown",
	"metadata": {
	"id": "Ep5EeixqLn1Z",
	"colab_type": "text"
	},
	"source": [
	"#Calculate\tand\tprint\tthe\taccuracy\tof\tfinal\tclassifier."
	]
	},
	{
	"cell_type": "code",
	"metadata": {
	"id": "2gbjrT4VLlCa",
	"colab_type": "code",
	"outputId": "086bdc20-1f87-49e0-b072-fb9c86334433",
	"colab": {
	"base_uri": "https://localhost:8080/",
	"height": 34
	}
	},
	"source": [
	"acc4 = CalculateAccuracy(y,\tfinal) \n",
	"print(\"accuracy from Stacking:\"+str(acc4))"
	],
	"execution_count": 17,
	"outputs": [
	{
	"output_type": "stream",
	"text": [
	"accuracy from Stacking:97.33333333333333\n"
	],
	"name": "stdout"
	}
	]
	}
	]
	}