Created
October 18, 2021 17:54
-
-
Save Prathmeshp20/740b1cd7dcea6fea9c6aaaadce12d7c9 to your computer and use it in GitHub Desktop.
Assignment17-SVM-SalaryData.ipynb
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
{ | |
"cells": [ | |
{ | |
"metadata": { | |
"trusted": true | |
}, | |
"cell_type": "code", | |
"source": "import pandas as pd\nimport numpy as np\nfrom sklearn.feature_extraction.text import CountVectorizer, TfidfVectorizer\nfrom sklearn.preprocessing import StandardScaler\n\nfrom sklearn import svm\nfrom sklearn.svm import SVC\nfrom sklearn.model_selection import GridSearchCV\nfrom sklearn.metrics import classification_report\n\n\nfrom sklearn.metrics import accuracy_score, confusion_matrix\nfrom sklearn.model_selection import train_test_split, cross_val_score", | |
"execution_count": 1, | |
"outputs": [] | |
}, | |
{ | |
"metadata": { | |
"trusted": true | |
}, | |
"cell_type": "code", | |
"source": "Salary_train = pd.read_csv(\"C:/Users/Prathmesh/Downloads/SalaryData_Train(1).csv\")\nSalary_train.head()", | |
"execution_count": 2, | |
"outputs": [ | |
{ | |
"output_type": "execute_result", | |
"execution_count": 2, | |
"data": { | |
"text/plain": " age workclass education educationno maritalstatus \\\n0 39 State-gov Bachelors 13 Never-married \n1 50 Self-emp-not-inc Bachelors 13 Married-civ-spouse \n2 38 Private HS-grad 9 Divorced \n3 53 Private 11th 7 Married-civ-spouse \n4 28 Private Bachelors 13 Married-civ-spouse \n\n occupation relationship race sex capitalgain \\\n0 Adm-clerical Not-in-family White Male 2174 \n1 Exec-managerial Husband White Male 0 \n2 Handlers-cleaners Not-in-family White Male 0 \n3 Handlers-cleaners Husband Black Male 0 \n4 Prof-specialty Wife Black Female 0 \n\n capitalloss hoursperweek native Salary \n0 0 40 United-States <=50K \n1 0 13 United-States <=50K \n2 0 40 United-States <=50K \n3 0 40 United-States <=50K \n4 0 40 Cuba <=50K ", | |
"text/html": "<div>\n<style scoped>\n .dataframe tbody tr th:only-of-type {\n vertical-align: middle;\n }\n\n .dataframe tbody tr th {\n vertical-align: top;\n }\n\n .dataframe thead th {\n text-align: right;\n }\n</style>\n<table border=\"1\" class=\"dataframe\">\n <thead>\n <tr style=\"text-align: right;\">\n <th></th>\n <th>age</th>\n <th>workclass</th>\n <th>education</th>\n <th>educationno</th>\n <th>maritalstatus</th>\n <th>occupation</th>\n <th>relationship</th>\n <th>race</th>\n <th>sex</th>\n <th>capitalgain</th>\n <th>capitalloss</th>\n <th>hoursperweek</th>\n <th>native</th>\n <th>Salary</th>\n </tr>\n </thead>\n <tbody>\n <tr>\n <th>0</th>\n <td>39</td>\n <td>State-gov</td>\n <td>Bachelors</td>\n <td>13</td>\n <td>Never-married</td>\n <td>Adm-clerical</td>\n <td>Not-in-family</td>\n <td>White</td>\n <td>Male</td>\n <td>2174</td>\n <td>0</td>\n <td>40</td>\n <td>United-States</td>\n <td><=50K</td>\n </tr>\n <tr>\n <th>1</th>\n <td>50</td>\n <td>Self-emp-not-inc</td>\n <td>Bachelors</td>\n <td>13</td>\n <td>Married-civ-spouse</td>\n <td>Exec-managerial</td>\n <td>Husband</td>\n <td>White</td>\n <td>Male</td>\n <td>0</td>\n <td>0</td>\n <td>13</td>\n <td>United-States</td>\n <td><=50K</td>\n </tr>\n <tr>\n <th>2</th>\n <td>38</td>\n <td>Private</td>\n <td>HS-grad</td>\n <td>9</td>\n <td>Divorced</td>\n <td>Handlers-cleaners</td>\n <td>Not-in-family</td>\n <td>White</td>\n <td>Male</td>\n <td>0</td>\n <td>0</td>\n <td>40</td>\n <td>United-States</td>\n <td><=50K</td>\n </tr>\n <tr>\n <th>3</th>\n <td>53</td>\n <td>Private</td>\n <td>11th</td>\n <td>7</td>\n <td>Married-civ-spouse</td>\n <td>Handlers-cleaners</td>\n <td>Husband</td>\n <td>Black</td>\n <td>Male</td>\n <td>0</td>\n <td>0</td>\n <td>40</td>\n <td>United-States</td>\n <td><=50K</td>\n </tr>\n <tr>\n <th>4</th>\n <td>28</td>\n <td>Private</td>\n <td>Bachelors</td>\n <td>13</td>\n <td>Married-civ-spouse</td>\n <td>Prof-specialty</td>\n <td>Wife</td>\n <td>Black</td>\n <td>Female</td>\n <td>0</td>\n <td>0</td>\n <td>40</td>\n <td>Cuba</td>\n <td><=50K</td>\n </tr>\n </tbody>\n</table>\n</div>" | |
}, | |
"metadata": {} | |
} | |
] | |
}, | |
{ | |
"metadata": { | |
"trusted": true | |
}, | |
"cell_type": "code", | |
"source": "Salary_test = pd.read_csv(\"C:/Users/Prathmesh/Downloads/SalaryData_Test(1).csv\")\nSalary_test.head()", | |
"execution_count": 3, | |
"outputs": [ | |
{ | |
"output_type": "execute_result", | |
"execution_count": 3, | |
"data": { | |
"text/plain": " age workclass education educationno maritalstatus \\\n0 25 Private 11th 7 Never-married \n1 38 Private HS-grad 9 Married-civ-spouse \n2 28 Local-gov Assoc-acdm 12 Married-civ-spouse \n3 44 Private Some-college 10 Married-civ-spouse \n4 34 Private 10th 6 Never-married \n\n occupation relationship race sex capitalgain \\\n0 Machine-op-inspct Own-child Black Male 0 \n1 Farming-fishing Husband White Male 0 \n2 Protective-serv Husband White Male 0 \n3 Machine-op-inspct Husband Black Male 7688 \n4 Other-service Not-in-family White Male 0 \n\n capitalloss hoursperweek native Salary \n0 0 40 United-States <=50K \n1 0 50 United-States <=50K \n2 0 40 United-States >50K \n3 0 40 United-States >50K \n4 0 30 United-States <=50K ", | |
"text/html": "<div>\n<style scoped>\n .dataframe tbody tr th:only-of-type {\n vertical-align: middle;\n }\n\n .dataframe tbody tr th {\n vertical-align: top;\n }\n\n .dataframe thead th {\n text-align: right;\n }\n</style>\n<table border=\"1\" class=\"dataframe\">\n <thead>\n <tr style=\"text-align: right;\">\n <th></th>\n <th>age</th>\n <th>workclass</th>\n <th>education</th>\n <th>educationno</th>\n <th>maritalstatus</th>\n <th>occupation</th>\n <th>relationship</th>\n <th>race</th>\n <th>sex</th>\n <th>capitalgain</th>\n <th>capitalloss</th>\n <th>hoursperweek</th>\n <th>native</th>\n <th>Salary</th>\n </tr>\n </thead>\n <tbody>\n <tr>\n <th>0</th>\n <td>25</td>\n <td>Private</td>\n <td>11th</td>\n <td>7</td>\n <td>Never-married</td>\n <td>Machine-op-inspct</td>\n <td>Own-child</td>\n <td>Black</td>\n <td>Male</td>\n <td>0</td>\n <td>0</td>\n <td>40</td>\n <td>United-States</td>\n <td><=50K</td>\n </tr>\n <tr>\n <th>1</th>\n <td>38</td>\n <td>Private</td>\n <td>HS-grad</td>\n <td>9</td>\n <td>Married-civ-spouse</td>\n <td>Farming-fishing</td>\n <td>Husband</td>\n <td>White</td>\n <td>Male</td>\n <td>0</td>\n <td>0</td>\n <td>50</td>\n <td>United-States</td>\n <td><=50K</td>\n </tr>\n <tr>\n <th>2</th>\n <td>28</td>\n <td>Local-gov</td>\n <td>Assoc-acdm</td>\n <td>12</td>\n <td>Married-civ-spouse</td>\n <td>Protective-serv</td>\n <td>Husband</td>\n <td>White</td>\n <td>Male</td>\n <td>0</td>\n <td>0</td>\n <td>40</td>\n <td>United-States</td>\n <td>>50K</td>\n </tr>\n <tr>\n <th>3</th>\n <td>44</td>\n <td>Private</td>\n <td>Some-college</td>\n <td>10</td>\n <td>Married-civ-spouse</td>\n <td>Machine-op-inspct</td>\n <td>Husband</td>\n <td>Black</td>\n <td>Male</td>\n <td>7688</td>\n <td>0</td>\n <td>40</td>\n <td>United-States</td>\n <td>>50K</td>\n </tr>\n <tr>\n <th>4</th>\n <td>34</td>\n <td>Private</td>\n <td>10th</td>\n <td>6</td>\n <td>Never-married</td>\n <td>Other-service</td>\n <td>Not-in-family</td>\n <td>White</td>\n <td>Male</td>\n <td>0</td>\n <td>0</td>\n <td>30</td>\n <td>United-States</td>\n <td><=50K</td>\n </tr>\n </tbody>\n</table>\n</div>" | |
}, | |
"metadata": {} | |
} | |
] | |
}, | |
{ | |
"metadata": { | |
"trusted": true | |
}, | |
"cell_type": "code", | |
"source": "string_col=['workclass','education','maritalstatus','occupation','relationship','race','sex','native']\nfrom sklearn import preprocessing\nlabel_encoder=preprocessing.LabelEncoder()\nfor i in string_col:\n Salary_train[i]=label_encoder.fit_transform(Salary_train[i])\n Salary_test[i]=label_encoder.fit_transform(Salary_test[i])", | |
"execution_count": 4, | |
"outputs": [] | |
}, | |
{ | |
"metadata": { | |
"trusted": true | |
}, | |
"cell_type": "code", | |
"source": "Salary_train.head()", | |
"execution_count": 5, | |
"outputs": [ | |
{ | |
"output_type": "execute_result", | |
"execution_count": 5, | |
"data": { | |
"text/plain": " age workclass education educationno maritalstatus occupation \\\n0 39 5 9 13 4 0 \n1 50 4 9 13 2 3 \n2 38 2 11 9 0 5 \n3 53 2 1 7 2 5 \n4 28 2 9 13 2 9 \n\n relationship race sex capitalgain capitalloss hoursperweek native \\\n0 1 4 1 2174 0 40 37 \n1 0 4 1 0 0 13 37 \n2 1 4 1 0 0 40 37 \n3 0 2 1 0 0 40 37 \n4 5 2 0 0 0 40 4 \n\n Salary \n0 <=50K \n1 <=50K \n2 <=50K \n3 <=50K \n4 <=50K ", | |
"text/html": "<div>\n<style scoped>\n .dataframe tbody tr th:only-of-type {\n vertical-align: middle;\n }\n\n .dataframe tbody tr th {\n vertical-align: top;\n }\n\n .dataframe thead th {\n text-align: right;\n }\n</style>\n<table border=\"1\" class=\"dataframe\">\n <thead>\n <tr style=\"text-align: right;\">\n <th></th>\n <th>age</th>\n <th>workclass</th>\n <th>education</th>\n <th>educationno</th>\n <th>maritalstatus</th>\n <th>occupation</th>\n <th>relationship</th>\n <th>race</th>\n <th>sex</th>\n <th>capitalgain</th>\n <th>capitalloss</th>\n <th>hoursperweek</th>\n <th>native</th>\n <th>Salary</th>\n </tr>\n </thead>\n <tbody>\n <tr>\n <th>0</th>\n <td>39</td>\n <td>5</td>\n <td>9</td>\n <td>13</td>\n <td>4</td>\n <td>0</td>\n <td>1</td>\n <td>4</td>\n <td>1</td>\n <td>2174</td>\n <td>0</td>\n <td>40</td>\n <td>37</td>\n <td><=50K</td>\n </tr>\n <tr>\n <th>1</th>\n <td>50</td>\n <td>4</td>\n <td>9</td>\n <td>13</td>\n <td>2</td>\n <td>3</td>\n <td>0</td>\n <td>4</td>\n <td>1</td>\n <td>0</td>\n <td>0</td>\n <td>13</td>\n <td>37</td>\n <td><=50K</td>\n </tr>\n <tr>\n <th>2</th>\n <td>38</td>\n <td>2</td>\n <td>11</td>\n <td>9</td>\n <td>0</td>\n <td>5</td>\n <td>1</td>\n <td>4</td>\n <td>1</td>\n <td>0</td>\n <td>0</td>\n <td>40</td>\n <td>37</td>\n <td><=50K</td>\n </tr>\n <tr>\n <th>3</th>\n <td>53</td>\n <td>2</td>\n <td>1</td>\n <td>7</td>\n <td>2</td>\n <td>5</td>\n <td>0</td>\n <td>2</td>\n <td>1</td>\n <td>0</td>\n <td>0</td>\n <td>40</td>\n <td>37</td>\n <td><=50K</td>\n </tr>\n <tr>\n <th>4</th>\n <td>28</td>\n <td>2</td>\n <td>9</td>\n <td>13</td>\n <td>2</td>\n <td>9</td>\n <td>5</td>\n <td>2</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>40</td>\n <td>4</td>\n <td><=50K</td>\n </tr>\n </tbody>\n</table>\n</div>" | |
}, | |
"metadata": {} | |
} | |
] | |
}, | |
{ | |
"metadata": { | |
"trusted": true | |
}, | |
"cell_type": "code", | |
"source": "x_train = Salary_train.iloc[0:300,0:13]\ny_train = Salary_train.iloc[0:300,13]\nx_test = Salary_test.iloc[0:150,0:13]\ny_test = Salary_test.iloc[0:150,13]", | |
"execution_count": 6, | |
"outputs": [] | |
}, | |
{ | |
"metadata": { | |
"trusted": true | |
}, | |
"cell_type": "code", | |
"source": "clf = SVC()\nparam_grid = [{'kernel':['rbf'],'gamma':[50,5,10,0.5],'C':[15,14,13,12,11,10,0.1,0.001] }]\ngsv = GridSearchCV(clf,param_grid,cv=10)\ngsv.fit(x_train,y_train)", | |
"execution_count": null, | |
"outputs": [] | |
}, | |
{ | |
"metadata": { | |
"trusted": true | |
}, | |
"cell_type": "code", | |
"source": "gsv.best_params_ , gsv.best_score_ ", | |
"execution_count": 8, | |
"outputs": [ | |
{ | |
"output_type": "execute_result", | |
"execution_count": 8, | |
"data": { | |
"text/plain": "({'C': 15, 'gamma': 50, 'kernel': 'rbf'}, 0.7566666666666666)" | |
}, | |
"metadata": {} | |
} | |
] | |
}, | |
{ | |
"metadata": { | |
"trusted": true | |
}, | |
"cell_type": "code", | |
"source": "clf = SVC(C= 15, gamma = 50)\nclf.fit(x_train , y_train)\ny_pred = clf.predict(x_test)\nacc = accuracy_score(y_test, y_pred) * 100\nprint(\"Accuracy =\", acc)\nconfusion_matrix(y_test, y_pred)", | |
"execution_count": 9, | |
"outputs": [ | |
{ | |
"output_type": "stream", | |
"text": "Accuracy = 78.0\n", | |
"name": "stdout" | |
}, | |
{ | |
"output_type": "execute_result", | |
"execution_count": 9, | |
"data": { | |
"text/plain": "array([[117, 0],\n [ 33, 0]], dtype=int64)" | |
}, | |
"metadata": {} | |
} | |
] | |
}, | |
{ | |
"metadata": { | |
"trusted": true | |
}, | |
"cell_type": "code", | |
"source": "", | |
"execution_count": null, | |
"outputs": [] | |
} | |
], | |
"metadata": { | |
"kernelspec": { | |
"name": "python3", | |
"display_name": "Python 3", | |
"language": "python" | |
}, | |
"language_info": { | |
"name": "python", | |
"version": "3.8.5", | |
"mimetype": "text/x-python", | |
"codemirror_mode": { | |
"name": "ipython", | |
"version": 3 | |
}, | |
"pygments_lexer": "ipython3", | |
"nbconvert_exporter": "python", | |
"file_extension": ".py" | |
}, | |
"gist": { | |
"id": "", | |
"data": { | |
"description": "Assignment17-SVM-SalaryData.ipynb", | |
"public": true | |
} | |
} | |
}, | |
"nbformat": 4, | |
"nbformat_minor": 4 | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment