Skip to content

Instantly share code, notes, and snippets.

@Prathmeshp20
Created October 7, 2021 18:13
Show Gist options
  • Save Prathmeshp20/52b03eafa0730391788bb17170948884 to your computer and use it in GitHub Desktop.
Save Prathmeshp20/52b03eafa0730391788bb17170948884 to your computer and use it in GitHub Desktop.
Assignment15 - RandomForest.ipynb
Display the source blob
Display the rendered blob
Raw
{
"cells": [
{
"metadata": {},
"cell_type": "markdown",
"source": "Fraud Check"
},
{
"metadata": {
"trusted": true
},
"cell_type": "code",
"source": "import pandas as pd\nfrom sklearn.model_selection import KFold\nfrom sklearn.model_selection import cross_val_score\nfrom sklearn.ensemble import RandomForestClassifier",
"execution_count": 1,
"outputs": []
},
{
"metadata": {
"trusted": true
},
"cell_type": "code",
"source": "fraud = pd.read_csv('C:/Users/Prathmesh/Downloads/Fraud_check.csv')",
"execution_count": 2,
"outputs": []
},
{
"metadata": {
"trusted": true
},
"cell_type": "code",
"source": "fraud1 = pd.get_dummies(fraud, columns = ['Undergrad', 'Marital.Status', 'Urban'])",
"execution_count": 3,
"outputs": []
},
{
"metadata": {
"trusted": true
},
"cell_type": "code",
"source": "fraud1.loc[fraud1[\"Taxable.Income\"]>30000,\"income\"]=\"Good\"\nfraud1.loc[fraud1[\"Taxable.Income\"]<=30000,\"income\"]=\"Risky\"",
"execution_count": 4,
"outputs": []
},
{
"metadata": {
"trusted": true
},
"cell_type": "code",
"source": "fraud1.head()",
"execution_count": 5,
"outputs": [
{
"output_type": "execute_result",
"execution_count": 5,
"data": {
"text/plain": " Taxable.Income City.Population Work.Experience Undergrad_NO \\\n0 68833 50047 10 1 \n1 33700 134075 18 0 \n2 36925 160205 30 1 \n3 50190 193264 15 0 \n4 81002 27533 28 1 \n\n Undergrad_YES Marital.Status_Divorced Marital.Status_Married \\\n0 0 0 0 \n1 1 1 0 \n2 0 0 1 \n3 1 0 0 \n4 0 0 1 \n\n Marital.Status_Single Urban_NO Urban_YES income \n0 1 0 1 Good \n1 0 0 1 Good \n2 0 0 1 Good \n3 1 0 1 Good \n4 0 1 0 Good ",
"text/html": "<div>\n<style scoped>\n .dataframe tbody tr th:only-of-type {\n vertical-align: middle;\n }\n\n .dataframe tbody tr th {\n vertical-align: top;\n }\n\n .dataframe thead th {\n text-align: right;\n }\n</style>\n<table border=\"1\" class=\"dataframe\">\n <thead>\n <tr style=\"text-align: right;\">\n <th></th>\n <th>Taxable.Income</th>\n <th>City.Population</th>\n <th>Work.Experience</th>\n <th>Undergrad_NO</th>\n <th>Undergrad_YES</th>\n <th>Marital.Status_Divorced</th>\n <th>Marital.Status_Married</th>\n <th>Marital.Status_Single</th>\n <th>Urban_NO</th>\n <th>Urban_YES</th>\n <th>income</th>\n </tr>\n </thead>\n <tbody>\n <tr>\n <th>0</th>\n <td>68833</td>\n <td>50047</td>\n <td>10</td>\n <td>1</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>1</td>\n <td>0</td>\n <td>1</td>\n <td>Good</td>\n </tr>\n <tr>\n <th>1</th>\n <td>33700</td>\n <td>134075</td>\n <td>18</td>\n <td>0</td>\n <td>1</td>\n <td>1</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>1</td>\n <td>Good</td>\n </tr>\n <tr>\n <th>2</th>\n <td>36925</td>\n <td>160205</td>\n <td>30</td>\n <td>1</td>\n <td>0</td>\n <td>0</td>\n <td>1</td>\n <td>0</td>\n <td>0</td>\n <td>1</td>\n <td>Good</td>\n </tr>\n <tr>\n <th>3</th>\n <td>50190</td>\n <td>193264</td>\n <td>15</td>\n <td>0</td>\n <td>1</td>\n <td>0</td>\n <td>0</td>\n <td>1</td>\n <td>0</td>\n <td>1</td>\n <td>Good</td>\n </tr>\n <tr>\n <th>4</th>\n <td>81002</td>\n <td>27533</td>\n <td>28</td>\n <td>1</td>\n <td>0</td>\n <td>0</td>\n <td>1</td>\n <td>0</td>\n <td>1</td>\n <td>0</td>\n <td>Good</td>\n </tr>\n </tbody>\n</table>\n</div>"
},
"metadata": {}
}
]
},
{
"metadata": {
"trusted": true
},
"cell_type": "code",
"source": "x = fraud1.iloc[:,1:10]\ny = fraud1['income']",
"execution_count": 6,
"outputs": []
},
{
"metadata": {
"trusted": true
},
"cell_type": "code",
"source": "max_features = 3\nkfold = KFold(n_splits=10, random_state=7)",
"execution_count": 7,
"outputs": [
{
"output_type": "stream",
"text": "C:\\Users\\Prathmesh\\anaconda3\\lib\\site-packages\\sklearn\\model_selection\\_split.py:293: FutureWarning: Setting a random_state has no effect since shuffle is False. This will raise an error in 0.24. You should leave random_state to its default (None), or set shuffle=True.\n warnings.warn(\n",
"name": "stderr"
}
]
},
{
"metadata": {
"trusted": true
},
"cell_type": "code",
"source": "num_trees = 80\nmodel = RandomForestClassifier(n_estimators=num_trees, max_features=max_features)\nresults = cross_val_score(model, x, y, cv=kfold)\nprint(results.mean())",
"execution_count": 9,
"outputs": [
{
"output_type": "stream",
"text": "0.7366666666666667\n",
"name": "stdout"
}
]
},
{
"metadata": {
"trusted": true
},
"cell_type": "code",
"source": "",
"execution_count": null,
"outputs": []
},
{
"metadata": {},
"cell_type": "markdown",
"source": "Company Data"
},
{
"metadata": {
"trusted": true
},
"cell_type": "code",
"source": "company = pd.read_csv('C:/Users/Prathmesh/Downloads/Company_Data.csv')",
"execution_count": 12,
"outputs": []
},
{
"metadata": {
"trusted": true
},
"cell_type": "code",
"source": "company1 = pd.get_dummies(company, columns = ['ShelveLoc', 'Urban', 'US'])",
"execution_count": 13,
"outputs": []
},
{
"metadata": {
"trusted": true
},
"cell_type": "code",
"source": "company1.loc[company1[\"Sales\"]>8,\"High\"]=1\ncompany1.loc[company1[\"Sales\"]<=8,\"High\"]=0",
"execution_count": 18,
"outputs": []
},
{
"metadata": {
"trusted": true
},
"cell_type": "code",
"source": "x = company1.iloc[:,1:15]\ny = company1['High']",
"execution_count": 19,
"outputs": []
},
{
"metadata": {
"trusted": true
},
"cell_type": "code",
"source": "max_features = 4\nkfold = KFold(n_splits=10, random_state=7)",
"execution_count": 20,
"outputs": [
{
"output_type": "stream",
"text": "C:\\Users\\Prathmesh\\anaconda3\\lib\\site-packages\\sklearn\\model_selection\\_split.py:293: FutureWarning: Setting a random_state has no effect since shuffle is False. This will raise an error in 0.24. You should leave random_state to its default (None), or set shuffle=True.\n warnings.warn(\n",
"name": "stderr"
}
]
},
{
"metadata": {
"trusted": true
},
"cell_type": "code",
"source": "num_trees = 100\nmodel = RandomForestClassifier(n_estimators=num_trees, max_features=max_features)\nresults = cross_val_score(model, x, y, cv=kfold)\nprint(results.mean())",
"execution_count": 23,
"outputs": [
{
"output_type": "stream",
"text": "0.82\n",
"name": "stdout"
}
]
},
{
"metadata": {
"trusted": true
},
"cell_type": "code",
"source": "",
"execution_count": null,
"outputs": []
}
],
"metadata": {
"kernelspec": {
"name": "python3",
"display_name": "Python 3",
"language": "python"
},
"language_info": {
"name": "python",
"version": "3.8.5",
"mimetype": "text/x-python",
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"pygments_lexer": "ipython3",
"nbconvert_exporter": "python",
"file_extension": ".py"
},
"gist": {
"id": "",
"data": {
"description": "Assignment15 - RandomForest.ipynb",
"public": true
}
}
},
"nbformat": 4,
"nbformat_minor": 4
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment