Prathmeshp20 · October 7, 2021 18:13
diff --git a/Assignment15 - Random Forest.ipynb b/Assignment15 - Random Forest.ipynb
 {
  "cells": [
    {
      "metadata": {},
      "cell_type": "markdown",
      "source": "Fraud Check"
    },
    {
      "metadata": {
        "trusted": true
      },
      "cell_type": "code",
      "source": "import pandas as pd\nfrom sklearn.model_selection import KFold\nfrom sklearn.model_selection import cross_val_score\nfrom sklearn.ensemble import RandomForestClassifier",
      "execution_count": 1,
      "outputs": []
    },
    {
      "metadata": {
        "trusted": true
      },
      "cell_type": "code",
      "source": "fraud = pd.read_csv('C:/Users/Prathmesh/Downloads/Fraud_check.csv')",
      "execution_count": 2,
      "outputs": []
    },
    {
      "metadata": {
        "trusted": true
      },
      "cell_type": "code",
      "source": "fraud1 = pd.get_dummies(fraud, columns = ['Undergrad', 'Marital.Status', 'Urban'])",
      "execution_count": 3,
      "outputs": []
    },
    {
      "metadata": {
        "trusted": true
      },
      "cell_type": "code",
      "source": "fraud1.loc[fraud1[\"Taxable.Income\"]>30000,\"income\"]=\"Good\"\nfraud1.loc[fraud1[\"Taxable.Income\"]<=30000,\"income\"]=\"Risky\"",
      "execution_count": 4,
      "outputs": []
    },
    {
      "metadata": {
        "trusted": true
      },
      "cell_type": "code",
      "source": "fraud1.head()",
      "execution_count": 5,
      "outputs": [
        {
          "output_type": "execute_result",
          "execution_count": 5,
          "data": {
            "text/plain": "   Taxable.Income  City.Population  Work.Experience  Undergrad_NO  \\\n0           68833            50047               10             1   \n1           33700           134075               18             0   \n2           36925           160205               30             1   \n3           50190           193264               15             0   \n4           81002            27533               28             1   \n\n   Undergrad_YES  Marital.Status_Divorced  Marital.Status_Married  \\\n0              0                        0                       0   \n1              1                        1                       0   \n2              0                        0                       1   \n3              1                        0                       0   \n4              0                        0                       1   \n\n   Marital.Status_Single  Urban_NO  Urban_YES income  \n0                      1         0          1   Good  \n1                      0         0          1   Good  \n2                      0         0          1   Good  \n3                      1         0          1   Good  \n4                      0         1          0   Good  ",
            "text/html": "<div>\n<style scoped>\n    .dataframe tbody tr th:only-of-type {\n        vertical-align: middle;\n    }\n\n    .dataframe tbody tr th {\n        vertical-align: top;\n    }\n\n    .dataframe thead th {\n        text-align: right;\n    }\n</style>\n<table border=\"1\" class=\"dataframe\">\n  <thead>\n    <tr style=\"text-align: right;\">\n      <th></th>\n      <th>Taxable.Income</th>\n      <th>City.Population</th>\n      <th>Work.Experience</th>\n      <th>Undergrad_NO</th>\n      <th>Undergrad_YES</th>\n      <th>Marital.Status_Divorced</th>\n      <th>Marital.Status_Married</th>\n      <th>Marital.Status_Single</th>\n      <th>Urban_NO</th>\n      <th>Urban_YES</th>\n      <th>income</th>\n    </tr>\n  </thead>\n  <tbody>\n    <tr>\n      <th>0</th>\n      <td>68833</td>\n      <td>50047</td>\n      <td>10</td>\n      <td>1</td>\n      <td>0</td>\n      <td>0</td>\n      <td>0</td>\n      <td>1</td>\n      <td>0</td>\n      <td>1</td>\n      <td>Good</td>\n    </tr>\n    <tr>\n      <th>1</th>\n      <td>33700</td>\n      <td>134075</td>\n      <td>18</td>\n      <td>0</td>\n      <td>1</td>\n      <td>1</td>\n      <td>0</td>\n      <td>0</td>\n      <td>0</td>\n      <td>1</td>\n      <td>Good</td>\n    </tr>\n    <tr>\n      <th>2</th>\n      <td>36925</td>\n      <td>160205</td>\n      <td>30</td>\n      <td>1</td>\n      <td>0</td>\n      <td>0</td>\n      <td>1</td>\n      <td>0</td>\n      <td>0</td>\n      <td>1</td>\n      <td>Good</td>\n    </tr>\n    <tr>\n      <th>3</th>\n      <td>50190</td>\n      <td>193264</td>\n      <td>15</td>\n      <td>0</td>\n      <td>1</td>\n      <td>0</td>\n      <td>0</td>\n      <td>1</td>\n      <td>0</td>\n      <td>1</td>\n      <td>Good</td>\n    </tr>\n    <tr>\n      <th>4</th>\n      <td>81002</td>\n      <td>27533</td>\n      <td>28</td>\n      <td>1</td>\n      <td>0</td>\n      <td>0</td>\n      <td>1</td>\n      <td>0</td>\n      <td>1</td>\n      <td>0</td>\n      <td>Good</td>\n    </tr>\n  </tbody>\n</table>\n</div>"
          },
          "metadata": {}
        }
      ]
    },
    {
      "metadata": {
        "trusted": true
      },
      "cell_type": "code",
      "source": "x = fraud1.iloc[:,1:10]\ny = fraud1['income']",
      "execution_count": 6,
      "outputs": []
    },
    {
      "metadata": {
        "trusted": true
      },
      "cell_type": "code",
      "source": "max_features = 3\nkfold = KFold(n_splits=10, random_state=7)",
      "execution_count": 7,
      "outputs": [
        {
          "output_type": "stream",
          "text": "C:\\Users\\Prathmesh\\anaconda3\\lib\\site-packages\\sklearn\\model_selection\\_split.py:293: FutureWarning: Setting a random_state has no effect since shuffle is False. This will raise an error in 0.24. You should leave random_state to its default (None), or set shuffle=True.\n  warnings.warn(\n",
          "name": "stderr"
        }
      ]
    },
    {
      "metadata": {
        "trusted": true
      },
      "cell_type": "code",
      "source": "num_trees = 80\nmodel = RandomForestClassifier(n_estimators=num_trees, max_features=max_features)\nresults = cross_val_score(model, x, y, cv=kfold)\nprint(results.mean())",
      "execution_count": 9,
      "outputs": [
        {
          "output_type": "stream",
          "text": "0.7366666666666667\n",
          "name": "stdout"
        }
      ]
    },
    {
      "metadata": {
        "trusted": true
      },
      "cell_type": "code",
      "source": "",
      "execution_count": null,
      "outputs": []
    },
    {
      "metadata": {},
      "cell_type": "markdown",
      "source": "Company Data"
    },
    {
      "metadata": {
        "trusted": true
      },
      "cell_type": "code",
      "source": "company = pd.read_csv('C:/Users/Prathmesh/Downloads/Company_Data.csv')",
      "execution_count": 12,
      "outputs": []
    },
    {
      "metadata": {
        "trusted": true
      },
      "cell_type": "code",
      "source": "company1 = pd.get_dummies(company, columns = ['ShelveLoc', 'Urban', 'US'])",
      "execution_count": 13,
      "outputs": []
    },
    {
      "metadata": {
        "trusted": true
      },
      "cell_type": "code",
      "source": "company1.loc[company1[\"Sales\"]>8,\"High\"]=1\ncompany1.loc[company1[\"Sales\"]<=8,\"High\"]=0",
      "execution_count": 18,
      "outputs": []
    },
    {
      "metadata": {
        "trusted": true
      },
      "cell_type": "code",
      "source": "x = company1.iloc[:,1:15]\ny = company1['High']",
      "execution_count": 19,
      "outputs": []
    },
    {
      "metadata": {
        "trusted": true
      },
      "cell_type": "code",
      "source": "max_features = 4\nkfold = KFold(n_splits=10, random_state=7)",
      "execution_count": 20,
      "outputs": [
        {
          "output_type": "stream",
          "text": "C:\\Users\\Prathmesh\\anaconda3\\lib\\site-packages\\sklearn\\model_selection\\_split.py:293: FutureWarning: Setting a random_state has no effect since shuffle is False. This will raise an error in 0.24. You should leave random_state to its default (None), or set shuffle=True.\n  warnings.warn(\n",
          "name": "stderr"
        }
      ]
    },
    {
      "metadata": {
        "trusted": true
      },
      "cell_type": "code",
      "source": "num_trees = 100\nmodel = RandomForestClassifier(n_estimators=num_trees, max_features=max_features)\nresults = cross_val_score(model, x, y, cv=kfold)\nprint(results.mean())",
      "execution_count": 23,
      "outputs": [
        {
          "output_type": "stream",
          "text": "0.82\n",
          "name": "stdout"
        }
      ]
    },
    {
      "metadata": {
        "trusted": true
      },
      "cell_type": "code",
      "source": "",
      "execution_count": null,
      "outputs": []
    }
  ],
  "metadata": {
    "kernelspec": {
      "name": "python3",
      "display_name": "Python 3",
      "language": "python"
    },
    "language_info": {
      "name": "python",
      "version": "3.8.5",
      "mimetype": "text/x-python",
      "codemirror_mode": {
        "name": "ipython",
        "version": 3
      },
      "pygments_lexer": "ipython3",
      "nbconvert_exporter": "python",
      "file_extension": ".py"
    },
    "gist": {
      "id": "",
      "data": {
        "description": "Assignment15 - RandomForest.ipynb",
        "public": true
      }
    }
  },
  "nbformat": 4,
  "nbformat_minor": 4
 }
	{
	"cells": [
	{
	"metadata": {},
	"cell_type": "markdown",
	"source": "Fraud Check"
	},
	{
	"metadata": {
	"trusted": true
	},
	"cell_type": "code",
	"source": "import pandas as pd\nfrom sklearn.model_selection import KFold\nfrom sklearn.model_selection import cross_val_score\nfrom sklearn.ensemble import RandomForestClassifier",
	"execution_count": 1,
	"outputs": []
	},
	{
	"metadata": {
	"trusted": true
	},
	"cell_type": "code",
	"source": "fraud = pd.read_csv('C:/Users/Prathmesh/Downloads/Fraud_check.csv')",
	"execution_count": 2,
	"outputs": []
	},
	{
	"metadata": {
	"trusted": true
	},
	"cell_type": "code",
	"source": "fraud1 = pd.get_dummies(fraud, columns = ['Undergrad', 'Marital.Status', 'Urban'])",
	"execution_count": 3,
	"outputs": []
	},
	{
	"metadata": {
	"trusted": true
	},
	"cell_type": "code",
	"source": "fraud1.loc[fraud1[\"Taxable.Income\"]>30000,\"income\"]=\"Good\"\nfraud1.loc[fraud1[\"Taxable.Income\"]<=30000,\"income\"]=\"Risky\"",
	"execution_count": 4,
	"outputs": []
	},
	{
	"metadata": {
	"trusted": true
	},
	"cell_type": "code",
	"source": "fraud1.head()",
	"execution_count": 5,
	"outputs": [
	{
	"output_type": "execute_result",
	"execution_count": 5,
	"data": {
	"text/plain": " Taxable.Income City.Population Work.Experience Undergrad_NO \\\n0 68833 50047 10 1 \n1 33700 134075 18 0 \n2 36925 160205 30 1 \n3 50190 193264 15 0 \n4 81002 27533 28 1 \n\n Undergrad_YES Marital.Status_Divorced Marital.Status_Married \\\n0 0 0 0 \n1 1 1 0 \n2 0 0 1 \n3 1 0 0 \n4 0 0 1 \n\n Marital.Status_Single Urban_NO Urban_YES income \n0 1 0 1 Good \n1 0 0 1 Good \n2 0 0 1 Good \n3 1 0 1 Good \n4 0 1 0 Good ",
	"text/html": "<div>\n<style scoped>\n .dataframe tbody tr th:only-of-type {\n vertical-align: middle;\n }\n\n .dataframe tbody tr th {\n vertical-align: top;\n }\n\n .dataframe thead th {\n text-align: right;\n }\n</style>\n<table border=\"1\" class=\"dataframe\">\n <thead>\n <tr style=\"text-align: right;\">\n <th></th>\n <th>Taxable.Income</th>\n <th>City.Population</th>\n <th>Work.Experience</th>\n <th>Undergrad_NO</th>\n <th>Undergrad_YES</th>\n <th>Marital.Status_Divorced</th>\n <th>Marital.Status_Married</th>\n <th>Marital.Status_Single</th>\n <th>Urban_NO</th>\n <th>Urban_YES</th>\n <th>income</th>\n </tr>\n </thead>\n <tbody>\n <tr>\n <th>0</th>\n <td>68833</td>\n <td>50047</td>\n <td>10</td>\n <td>1</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>1</td>\n <td>0</td>\n <td>1</td>\n <td>Good</td>\n </tr>\n <tr>\n <th>1</th>\n <td>33700</td>\n <td>134075</td>\n <td>18</td>\n <td>0</td>\n <td>1</td>\n <td>1</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>1</td>\n <td>Good</td>\n </tr>\n <tr>\n <th>2</th>\n <td>36925</td>\n <td>160205</td>\n <td>30</td>\n <td>1</td>\n <td>0</td>\n <td>0</td>\n <td>1</td>\n <td>0</td>\n <td>0</td>\n <td>1</td>\n <td>Good</td>\n </tr>\n <tr>\n <th>3</th>\n <td>50190</td>\n <td>193264</td>\n <td>15</td>\n <td>0</td>\n <td>1</td>\n <td>0</td>\n <td>0</td>\n <td>1</td>\n <td>0</td>\n <td>1</td>\n <td>Good</td>\n </tr>\n <tr>\n <th>4</th>\n <td>81002</td>\n <td>27533</td>\n <td>28</td>\n <td>1</td>\n <td>0</td>\n <td>0</td>\n <td>1</td>\n <td>0</td>\n <td>1</td>\n <td>0</td>\n <td>Good</td>\n </tr>\n </tbody>\n</table>\n</div>"
	},
	"metadata": {}
	}
	]
	},
	{
	"metadata": {
	"trusted": true
	},
	"cell_type": "code",
	"source": "x = fraud1.iloc[:,1:10]\ny = fraud1['income']",
	"execution_count": 6,
	"outputs": []
	},
	{
	"metadata": {
	"trusted": true
	},
	"cell_type": "code",
	"source": "max_features = 3\nkfold = KFold(n_splits=10, random_state=7)",
	"execution_count": 7,
	"outputs": [
	{
	"output_type": "stream",
	"text": "C:\\Users\\Prathmesh\\anaconda3\\lib\\site-packages\\sklearn\\model_selection\\_split.py:293: FutureWarning: Setting a random_state has no effect since shuffle is False. This will raise an error in 0.24. You should leave random_state to its default (None), or set shuffle=True.\n warnings.warn(\n",
	"name": "stderr"
	}
	]
	},
	{
	"metadata": {
	"trusted": true
	},
	"cell_type": "code",
	"source": "num_trees = 80\nmodel = RandomForestClassifier(n_estimators=num_trees, max_features=max_features)\nresults = cross_val_score(model, x, y, cv=kfold)\nprint(results.mean())",
	"execution_count": 9,
	"outputs": [
	{
	"output_type": "stream",
	"text": "0.7366666666666667\n",
	"name": "stdout"
	}
	]
	},
	{
	"metadata": {
	"trusted": true
	},
	"cell_type": "code",
	"source": "",
	"execution_count": null,
	"outputs": []
	},
	{
	"metadata": {},
	"cell_type": "markdown",
	"source": "Company Data"
	},
	{
	"metadata": {
	"trusted": true
	},
	"cell_type": "code",
	"source": "company = pd.read_csv('C:/Users/Prathmesh/Downloads/Company_Data.csv')",
	"execution_count": 12,
	"outputs": []
	},
	{
	"metadata": {
	"trusted": true
	},
	"cell_type": "code",
	"source": "company1 = pd.get_dummies(company, columns = ['ShelveLoc', 'Urban', 'US'])",
	"execution_count": 13,
	"outputs": []
	},
	{
	"metadata": {
	"trusted": true
	},
	"cell_type": "code",
	"source": "company1.loc[company1[\"Sales\"]>8,\"High\"]=1\ncompany1.loc[company1[\"Sales\"]<=8,\"High\"]=0",
	"execution_count": 18,
	"outputs": []
	},
	{
	"metadata": {
	"trusted": true
	},
	"cell_type": "code",
	"source": "x = company1.iloc[:,1:15]\ny = company1['High']",
	"execution_count": 19,
	"outputs": []
	},
	{
	"metadata": {
	"trusted": true
	},
	"cell_type": "code",
	"source": "max_features = 4\nkfold = KFold(n_splits=10, random_state=7)",
	"execution_count": 20,
	"outputs": [
	{
	"output_type": "stream",
	"text": "C:\\Users\\Prathmesh\\anaconda3\\lib\\site-packages\\sklearn\\model_selection\\_split.py:293: FutureWarning: Setting a random_state has no effect since shuffle is False. This will raise an error in 0.24. You should leave random_state to its default (None), or set shuffle=True.\n warnings.warn(\n",
	"name": "stderr"
	}
	]
	},
	{
	"metadata": {
	"trusted": true
	},
	"cell_type": "code",
	"source": "num_trees = 100\nmodel = RandomForestClassifier(n_estimators=num_trees, max_features=max_features)\nresults = cross_val_score(model, x, y, cv=kfold)\nprint(results.mean())",
	"execution_count": 23,
	"outputs": [
	{
	"output_type": "stream",
	"text": "0.82\n",
	"name": "stdout"
	}
	]
	},
	{
	"metadata": {
	"trusted": true
	},
	"cell_type": "code",
	"source": "",
	"execution_count": null,
	"outputs": []
	}
	],
	"metadata": {
	"kernelspec": {
	"name": "python3",
	"display_name": "Python 3",
	"language": "python"
	},
	"language_info": {
	"name": "python",
	"version": "3.8.5",
	"mimetype": "text/x-python",
	"codemirror_mode": {
	"name": "ipython",
	"version": 3
	},
	"pygments_lexer": "ipython3",
	"nbconvert_exporter": "python",
	"file_extension": ".py"
	},
	"gist": {
	"id": "",
	"data": {
	"description": "Assignment15 - RandomForest.ipynb",
	"public": true
	}
	}
	},
	"nbformat": 4,
	"nbformat_minor": 4
	}