Created
October 7, 2021 18:13
-
-
Save Prathmeshp20/52b03eafa0730391788bb17170948884 to your computer and use it in GitHub Desktop.
Assignment15 - RandomForest.ipynb
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
{ | |
"cells": [ | |
{ | |
"metadata": {}, | |
"cell_type": "markdown", | |
"source": "Fraud Check" | |
}, | |
{ | |
"metadata": { | |
"trusted": true | |
}, | |
"cell_type": "code", | |
"source": "import pandas as pd\nfrom sklearn.model_selection import KFold\nfrom sklearn.model_selection import cross_val_score\nfrom sklearn.ensemble import RandomForestClassifier", | |
"execution_count": 1, | |
"outputs": [] | |
}, | |
{ | |
"metadata": { | |
"trusted": true | |
}, | |
"cell_type": "code", | |
"source": "fraud = pd.read_csv('C:/Users/Prathmesh/Downloads/Fraud_check.csv')", | |
"execution_count": 2, | |
"outputs": [] | |
}, | |
{ | |
"metadata": { | |
"trusted": true | |
}, | |
"cell_type": "code", | |
"source": "fraud1 = pd.get_dummies(fraud, columns = ['Undergrad', 'Marital.Status', 'Urban'])", | |
"execution_count": 3, | |
"outputs": [] | |
}, | |
{ | |
"metadata": { | |
"trusted": true | |
}, | |
"cell_type": "code", | |
"source": "fraud1.loc[fraud1[\"Taxable.Income\"]>30000,\"income\"]=\"Good\"\nfraud1.loc[fraud1[\"Taxable.Income\"]<=30000,\"income\"]=\"Risky\"", | |
"execution_count": 4, | |
"outputs": [] | |
}, | |
{ | |
"metadata": { | |
"trusted": true | |
}, | |
"cell_type": "code", | |
"source": "fraud1.head()", | |
"execution_count": 5, | |
"outputs": [ | |
{ | |
"output_type": "execute_result", | |
"execution_count": 5, | |
"data": { | |
"text/plain": " Taxable.Income City.Population Work.Experience Undergrad_NO \\\n0 68833 50047 10 1 \n1 33700 134075 18 0 \n2 36925 160205 30 1 \n3 50190 193264 15 0 \n4 81002 27533 28 1 \n\n Undergrad_YES Marital.Status_Divorced Marital.Status_Married \\\n0 0 0 0 \n1 1 1 0 \n2 0 0 1 \n3 1 0 0 \n4 0 0 1 \n\n Marital.Status_Single Urban_NO Urban_YES income \n0 1 0 1 Good \n1 0 0 1 Good \n2 0 0 1 Good \n3 1 0 1 Good \n4 0 1 0 Good ", | |
"text/html": "<div>\n<style scoped>\n .dataframe tbody tr th:only-of-type {\n vertical-align: middle;\n }\n\n .dataframe tbody tr th {\n vertical-align: top;\n }\n\n .dataframe thead th {\n text-align: right;\n }\n</style>\n<table border=\"1\" class=\"dataframe\">\n <thead>\n <tr style=\"text-align: right;\">\n <th></th>\n <th>Taxable.Income</th>\n <th>City.Population</th>\n <th>Work.Experience</th>\n <th>Undergrad_NO</th>\n <th>Undergrad_YES</th>\n <th>Marital.Status_Divorced</th>\n <th>Marital.Status_Married</th>\n <th>Marital.Status_Single</th>\n <th>Urban_NO</th>\n <th>Urban_YES</th>\n <th>income</th>\n </tr>\n </thead>\n <tbody>\n <tr>\n <th>0</th>\n <td>68833</td>\n <td>50047</td>\n <td>10</td>\n <td>1</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>1</td>\n <td>0</td>\n <td>1</td>\n <td>Good</td>\n </tr>\n <tr>\n <th>1</th>\n <td>33700</td>\n <td>134075</td>\n <td>18</td>\n <td>0</td>\n <td>1</td>\n <td>1</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>1</td>\n <td>Good</td>\n </tr>\n <tr>\n <th>2</th>\n <td>36925</td>\n <td>160205</td>\n <td>30</td>\n <td>1</td>\n <td>0</td>\n <td>0</td>\n <td>1</td>\n <td>0</td>\n <td>0</td>\n <td>1</td>\n <td>Good</td>\n </tr>\n <tr>\n <th>3</th>\n <td>50190</td>\n <td>193264</td>\n <td>15</td>\n <td>0</td>\n <td>1</td>\n <td>0</td>\n <td>0</td>\n <td>1</td>\n <td>0</td>\n <td>1</td>\n <td>Good</td>\n </tr>\n <tr>\n <th>4</th>\n <td>81002</td>\n <td>27533</td>\n <td>28</td>\n <td>1</td>\n <td>0</td>\n <td>0</td>\n <td>1</td>\n <td>0</td>\n <td>1</td>\n <td>0</td>\n <td>Good</td>\n </tr>\n </tbody>\n</table>\n</div>" | |
}, | |
"metadata": {} | |
} | |
] | |
}, | |
{ | |
"metadata": { | |
"trusted": true | |
}, | |
"cell_type": "code", | |
"source": "x = fraud1.iloc[:,1:10]\ny = fraud1['income']", | |
"execution_count": 6, | |
"outputs": [] | |
}, | |
{ | |
"metadata": { | |
"trusted": true | |
}, | |
"cell_type": "code", | |
"source": "max_features = 3\nkfold = KFold(n_splits=10, random_state=7)", | |
"execution_count": 7, | |
"outputs": [ | |
{ | |
"output_type": "stream", | |
"text": "C:\\Users\\Prathmesh\\anaconda3\\lib\\site-packages\\sklearn\\model_selection\\_split.py:293: FutureWarning: Setting a random_state has no effect since shuffle is False. This will raise an error in 0.24. You should leave random_state to its default (None), or set shuffle=True.\n warnings.warn(\n", | |
"name": "stderr" | |
} | |
] | |
}, | |
{ | |
"metadata": { | |
"trusted": true | |
}, | |
"cell_type": "code", | |
"source": "num_trees = 80\nmodel = RandomForestClassifier(n_estimators=num_trees, max_features=max_features)\nresults = cross_val_score(model, x, y, cv=kfold)\nprint(results.mean())", | |
"execution_count": 9, | |
"outputs": [ | |
{ | |
"output_type": "stream", | |
"text": "0.7366666666666667\n", | |
"name": "stdout" | |
} | |
] | |
}, | |
{ | |
"metadata": { | |
"trusted": true | |
}, | |
"cell_type": "code", | |
"source": "", | |
"execution_count": null, | |
"outputs": [] | |
}, | |
{ | |
"metadata": {}, | |
"cell_type": "markdown", | |
"source": "Company Data" | |
}, | |
{ | |
"metadata": { | |
"trusted": true | |
}, | |
"cell_type": "code", | |
"source": "company = pd.read_csv('C:/Users/Prathmesh/Downloads/Company_Data.csv')", | |
"execution_count": 12, | |
"outputs": [] | |
}, | |
{ | |
"metadata": { | |
"trusted": true | |
}, | |
"cell_type": "code", | |
"source": "company1 = pd.get_dummies(company, columns = ['ShelveLoc', 'Urban', 'US'])", | |
"execution_count": 13, | |
"outputs": [] | |
}, | |
{ | |
"metadata": { | |
"trusted": true | |
}, | |
"cell_type": "code", | |
"source": "company1.loc[company1[\"Sales\"]>8,\"High\"]=1\ncompany1.loc[company1[\"Sales\"]<=8,\"High\"]=0", | |
"execution_count": 18, | |
"outputs": [] | |
}, | |
{ | |
"metadata": { | |
"trusted": true | |
}, | |
"cell_type": "code", | |
"source": "x = company1.iloc[:,1:15]\ny = company1['High']", | |
"execution_count": 19, | |
"outputs": [] | |
}, | |
{ | |
"metadata": { | |
"trusted": true | |
}, | |
"cell_type": "code", | |
"source": "max_features = 4\nkfold = KFold(n_splits=10, random_state=7)", | |
"execution_count": 20, | |
"outputs": [ | |
{ | |
"output_type": "stream", | |
"text": "C:\\Users\\Prathmesh\\anaconda3\\lib\\site-packages\\sklearn\\model_selection\\_split.py:293: FutureWarning: Setting a random_state has no effect since shuffle is False. This will raise an error in 0.24. You should leave random_state to its default (None), or set shuffle=True.\n warnings.warn(\n", | |
"name": "stderr" | |
} | |
] | |
}, | |
{ | |
"metadata": { | |
"trusted": true | |
}, | |
"cell_type": "code", | |
"source": "num_trees = 100\nmodel = RandomForestClassifier(n_estimators=num_trees, max_features=max_features)\nresults = cross_val_score(model, x, y, cv=kfold)\nprint(results.mean())", | |
"execution_count": 23, | |
"outputs": [ | |
{ | |
"output_type": "stream", | |
"text": "0.82\n", | |
"name": "stdout" | |
} | |
] | |
}, | |
{ | |
"metadata": { | |
"trusted": true | |
}, | |
"cell_type": "code", | |
"source": "", | |
"execution_count": null, | |
"outputs": [] | |
} | |
], | |
"metadata": { | |
"kernelspec": { | |
"name": "python3", | |
"display_name": "Python 3", | |
"language": "python" | |
}, | |
"language_info": { | |
"name": "python", | |
"version": "3.8.5", | |
"mimetype": "text/x-python", | |
"codemirror_mode": { | |
"name": "ipython", | |
"version": 3 | |
}, | |
"pygments_lexer": "ipython3", | |
"nbconvert_exporter": "python", | |
"file_extension": ".py" | |
}, | |
"gist": { | |
"id": "", | |
"data": { | |
"description": "Assignment15 - RandomForest.ipynb", | |
"public": true | |
} | |
} | |
}, | |
"nbformat": 4, | |
"nbformat_minor": 4 | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment