ashishpatel26 · August 15, 2019 06:32
diff --git a/Bagging.ipynb b/Bagging.ipynb
 {
  "nbformat": 4,
  "nbformat_minor": 0,
  "metadata": {
    "colab": {
      "name": "Bagging.ipynb",
      "version": "0.3.2",
      "provenance": [],
      "include_colab_link": true
    },
    "kernelspec": {
      "name": "python3",
      "display_name": "Python 3"
    }
  },
  "cells": [
    {
      "cell_type": "markdown",
      "metadata": {
        "id": "view-in-github",
        "colab_type": "text"
      },
      "source": [
        "<a href=\"https://colab.research.google.com/github/ashishpatel26/Ensemble-Learning-Algorithm-Medium/blob/master/Bagging.ipynb\" target=\"_parent\"><img src=\"https://colab.research.google.com/assets/colab-badge.svg\" alt=\"Open In Colab\"/></a>"
      ]
    },
    {
      "cell_type": "markdown",
      "metadata": {
        "id": "cKUJ2Od19iHG",
        "colab_type": "text"
      },
      "source": [
        "# Import All the required packages from sklearn"
      ]
    },
    {
      "cell_type": "code",
      "metadata": {
        "id": "HhZBMk6j8Ofs",
        "colab_type": "code",
        "colab": {}
      },
      "source": [
        "# Import All the required packages from sklearn\n",
        "import numpy as np\n",
        "from sklearn import model_selection\n",
        "from sklearn.ensemble import BaggingClassifier\n",
        "from sklearn.tree import DecisionTreeClassifier\n",
        "from sklearn.datasets import load_iris"
      ],
      "execution_count": 0,
      "outputs": []
    },
    {
      "cell_type": "markdown",
      "metadata": {
        "id": "sUnIoRRn9kJn",
        "colab_type": "text"
      },
      "source": [
        "#Load data "
      ]
    },
    {
      "cell_type": "code",
      "metadata": {
        "id": "LVn8bCLi85vq",
        "colab_type": "code",
        "colab": {}
      },
      "source": [
        "iris = load_iris()\n",
        "X = iris.data\n",
        "Y = iris.target"
      ],
      "execution_count": 0,
      "outputs": []
    },
    {
      "cell_type": "markdown",
      "metadata": {
        "id": "iNSYEzi-9sZk",
        "colab_type": "text"
      },
      "source": [
        "#Split data in training and testing set "
      ]
    },
    {
      "cell_type": "code",
      "metadata": {
        "id": "IATJrgBr89Pz",
        "colab_type": "code",
        "colab": {}
      },
      "source": [
        "X_fit, X_eval, y_fit, y_test= model_selection.train_test_split( X, Y, test_size=0.30, random_state=1 )"
      ],
      "execution_count": 0,
      "outputs": []
    },
    {
      "cell_type": "markdown",
      "metadata": {
        "id": "J8OfMB4z9vCg",
        "colab_type": "text"
      },
      "source": [
        "#Create random sub sample to train multiple models"
      ]
    },
    {
      "cell_type": "code",
      "metadata": {
        "id": "bbybfEus9ERU",
        "colab_type": "code",
        "colab": {}
      },
      "source": [
        "seed = 7\n",
        "kfold = model_selection.KFold(n_splits=10, random_state=seed)"
      ],
      "execution_count": 0,
      "outputs": []
    },
    {
      "cell_type": "markdown",
      "metadata": {
        "id": "t6XlIQQe9yxx",
        "colab_type": "text"
      },
      "source": [
        "#Define a decision tree classifier"
      ]
    },
    {
      "cell_type": "code",
      "metadata": {
        "id": "8JuPaqgO9HM1",
        "colab_type": "code",
        "colab": {}
      },
      "source": [
        "cart = DecisionTreeClassifier()\n",
        "num_trees = 100"
      ],
      "execution_count": 0,
      "outputs": []
    },
    {
      "cell_type": "markdown",
      "metadata": {
        "id": "8_XibRu792Z2",
        "colab_type": "text"
      },
      "source": [
        "#Create classification model for bagging"
      ]
    },
    {
      "cell_type": "code",
      "metadata": {
        "id": "NLqtRBEO9I3j",
        "colab_type": "code",
        "colab": {}
      },
      "source": [
        "model = BaggingClassifier(base_estimator=cart, n_estimators=num_trees, random_state=seed)"
      ],
      "execution_count": 0,
      "outputs": []
    },
    {
      "cell_type": "markdown",
      "metadata": {
        "id": "Zf7FrqNN949D",
        "colab_type": "text"
      },
      "source": [
        "#Train different models and print their accuracy"
      ]
    },
    {
      "cell_type": "code",
      "metadata": {
        "id": "Wctsy98h9K9z",
        "colab_type": "code",
        "outputId": "1db54ab0-77dc-425a-a852-bec98cd5cf9c",
        "colab": {
          "base_uri": "https://localhost:8080/",
          "height": 187
        }
      },
      "source": [
        "results = model_selection.cross_val_score(model, X_fit, y_fit,cv=kfold)\n",
        "for i in range(len(results)):\n",
        "    print(\"Model: \"+str(i)+\" Accuracy is: \"+str(results[i]))"
      ],
      "execution_count": 10,
      "outputs": [
        {
          "output_type": "stream",
          "text": [
            "Model: 0 Accuracy is: 1.0\n",
            "Model: 1 Accuracy is: 1.0\n",
            "Model: 2 Accuracy is: 1.0\n",
            "Model: 3 Accuracy is: 0.9090909090909091\n",
            "Model: 4 Accuracy is: 1.0\n",
            "Model: 5 Accuracy is: 1.0\n",
            "Model: 6 Accuracy is: 0.9\n",
            "Model: 7 Accuracy is: 1.0\n",
            "Model: 8 Accuracy is: 1.0\n",
            "Model: 9 Accuracy is: 0.7\n"
          ],
          "name": "stdout"
        }
      ]
    },
    {
      "cell_type": "code",
      "metadata": {
        "id": "6QMgyGK-9OUz",
        "colab_type": "code",
        "outputId": "80fcd9f0-1ef3-4c9c-ab54-7c6d93fe3f09",
        "colab": {
          "base_uri": "https://localhost:8080/",
          "height": 34
        }
      },
      "source": [
        "print(\"Mean Accuracy is: \"+str(results.mean()))"
      ],
      "execution_count": 11,
      "outputs": [
        {
          "output_type": "stream",
          "text": [
            "Mean Accuracy is: 0.9509090909090908\n"
          ],
          "name": "stdout"
        }
      ]
    },
    {
      "cell_type": "code",
      "metadata": {
        "id": "LcRuIQ9n9SFV",
        "colab_type": "code",
        "outputId": "724add97-40dc-46d6-ff87-96177eb70307",
        "colab": {
          "base_uri": "https://localhost:8080/",
          "height": 34
        }
      },
      "source": [
        "model.fit(X_fit, y_fit)\n",
        "pred_label = model.predict(X_eval)\n",
        "nnz = np.shape(y_test)[0] - np.count_nonzero(pred_label - y_test)\n",
        "acc = 100*nnz/np.shape(y_test)[0]\n",
        "print('accuracy is: '+str(acc))"
      ],
      "execution_count": 12,
      "outputs": [
        {
          "output_type": "stream",
          "text": [
            "accuracy is: 95.55555555555556\n"
          ],
          "name": "stdout"
        }
      ]
    }
  ]
 }
	{
	"nbformat": 4,
	"nbformat_minor": 0,
	"metadata": {
	"colab": {
	"name": "Bagging.ipynb",
	"version": "0.3.2",
	"provenance": [],
	"include_colab_link": true
	},
	"kernelspec": {
	"name": "python3",
	"display_name": "Python 3"
	}
	},
	"cells": [
	{
	"cell_type": "markdown",
	"metadata": {
	"id": "view-in-github",
	"colab_type": "text"
	},
	"source": [
	"<a href=\"https://colab.research.google.com/github/ashishpatel26/Ensemble-Learning-Algorithm-Medium/blob/master/Bagging.ipynb\" target=\"_parent\"><img src=\"https://colab.research.google.com/assets/colab-badge.svg\" alt=\"Open In Colab\"/></a>"
	]
	},
	{
	"cell_type": "markdown",
	"metadata": {
	"id": "cKUJ2Od19iHG",
	"colab_type": "text"
	},
	"source": [
	"# Import All the required packages from sklearn"
	]
	},
	{
	"cell_type": "code",
	"metadata": {
	"id": "HhZBMk6j8Ofs",
	"colab_type": "code",
	"colab": {}
	},
	"source": [
	"# Import All the required packages from sklearn\n",
	"import numpy as np\n",
	"from sklearn import model_selection\n",
	"from sklearn.ensemble import BaggingClassifier\n",
	"from sklearn.tree import DecisionTreeClassifier\n",
	"from sklearn.datasets import load_iris"
	],
	"execution_count": 0,
	"outputs": []
	},
	{
	"cell_type": "markdown",
	"metadata": {
	"id": "sUnIoRRn9kJn",
	"colab_type": "text"
	},
	"source": [
	"#Load data "
	]
	},
	{
	"cell_type": "code",
	"metadata": {
	"id": "LVn8bCLi85vq",
	"colab_type": "code",
	"colab": {}
	},
	"source": [
	"iris = load_iris()\n",
	"X = iris.data\n",
	"Y = iris.target"
	],
	"execution_count": 0,
	"outputs": []
	},
	{
	"cell_type": "markdown",
	"metadata": {
	"id": "iNSYEzi-9sZk",
	"colab_type": "text"
	},
	"source": [
	"#Split data in training and testing set "
	]
	},
	{
	"cell_type": "code",
	"metadata": {
	"id": "IATJrgBr89Pz",
	"colab_type": "code",
	"colab": {}
	},
	"source": [
	"X_fit, X_eval, y_fit, y_test= model_selection.train_test_split( X, Y, test_size=0.30, random_state=1 )"
	],
	"execution_count": 0,
	"outputs": []
	},
	{
	"cell_type": "markdown",
	"metadata": {
	"id": "J8OfMB4z9vCg",
	"colab_type": "text"
	},
	"source": [
	"#Create random sub sample to train multiple models"
	]
	},
	{
	"cell_type": "code",
	"metadata": {
	"id": "bbybfEus9ERU",
	"colab_type": "code",
	"colab": {}
	},
	"source": [
	"seed = 7\n",
	"kfold = model_selection.KFold(n_splits=10, random_state=seed)"
	],
	"execution_count": 0,
	"outputs": []
	},
	{
	"cell_type": "markdown",
	"metadata": {
	"id": "t6XlIQQe9yxx",
	"colab_type": "text"
	},
	"source": [
	"#Define a decision tree classifier"
	]
	},
	{
	"cell_type": "code",
	"metadata": {
	"id": "8JuPaqgO9HM1",
	"colab_type": "code",
	"colab": {}
	},
	"source": [
	"cart = DecisionTreeClassifier()\n",
	"num_trees = 100"
	],
	"execution_count": 0,
	"outputs": []
	},
	{
	"cell_type": "markdown",
	"metadata": {
	"id": "8_XibRu792Z2",
	"colab_type": "text"
	},
	"source": [
	"#Create classification model for bagging"
	]
	},
	{
	"cell_type": "code",
	"metadata": {
	"id": "NLqtRBEO9I3j",
	"colab_type": "code",
	"colab": {}
	},
	"source": [
	"model = BaggingClassifier(base_estimator=cart, n_estimators=num_trees, random_state=seed)"
	],
	"execution_count": 0,
	"outputs": []
	},
	{
	"cell_type": "markdown",
	"metadata": {
	"id": "Zf7FrqNN949D",
	"colab_type": "text"
	},
	"source": [
	"#Train different models and print their accuracy"
	]
	},
	{
	"cell_type": "code",
	"metadata": {
	"id": "Wctsy98h9K9z",
	"colab_type": "code",
	"outputId": "1db54ab0-77dc-425a-a852-bec98cd5cf9c",
	"colab": {
	"base_uri": "https://localhost:8080/",
	"height": 187
	}
	},
	"source": [
	"results = model_selection.cross_val_score(model, X_fit, y_fit,cv=kfold)\n",
	"for i in range(len(results)):\n",
	" print(\"Model: \"+str(i)+\" Accuracy is: \"+str(results[i]))"
	],
	"execution_count": 10,
	"outputs": [
	{
	"output_type": "stream",
	"text": [
	"Model: 0 Accuracy is: 1.0\n",
	"Model: 1 Accuracy is: 1.0\n",
	"Model: 2 Accuracy is: 1.0\n",
	"Model: 3 Accuracy is: 0.9090909090909091\n",
	"Model: 4 Accuracy is: 1.0\n",
	"Model: 5 Accuracy is: 1.0\n",
	"Model: 6 Accuracy is: 0.9\n",
	"Model: 7 Accuracy is: 1.0\n",
	"Model: 8 Accuracy is: 1.0\n",
	"Model: 9 Accuracy is: 0.7\n"
	],
	"name": "stdout"
	}
	]
	},
	{
	"cell_type": "code",
	"metadata": {
	"id": "6QMgyGK-9OUz",
	"colab_type": "code",
	"outputId": "80fcd9f0-1ef3-4c9c-ab54-7c6d93fe3f09",
	"colab": {
	"base_uri": "https://localhost:8080/",
	"height": 34
	}
	},
	"source": [
	"print(\"Mean Accuracy is: \"+str(results.mean()))"
	],
	"execution_count": 11,
	"outputs": [
	{
	"output_type": "stream",
	"text": [
	"Mean Accuracy is: 0.9509090909090908\n"
	],
	"name": "stdout"
	}
	]
	},
	{
	"cell_type": "code",
	"metadata": {
	"id": "LcRuIQ9n9SFV",
	"colab_type": "code",
	"outputId": "724add97-40dc-46d6-ff87-96177eb70307",
	"colab": {
	"base_uri": "https://localhost:8080/",
	"height": 34
	}
	},
	"source": [
	"model.fit(X_fit, y_fit)\n",
	"pred_label = model.predict(X_eval)\n",
	"nnz = np.shape(y_test)[0] - np.count_nonzero(pred_label - y_test)\n",
	"acc = 100*nnz/np.shape(y_test)[0]\n",
	"print('accuracy is: '+str(acc))"
	],
	"execution_count": 12,
	"outputs": [
	{
	"output_type": "stream",
	"text": [
	"accuracy is: 95.55555555555556\n"
	],
	"name": "stdout"
	}
	]
	}
	]
	}