Created
February 13, 2017 08:53
-
-
Save soumikghosal/34ce215b047056e258b31e17f7dd05b2 to your computer and use it in GitHub Desktop.
Implementing Naive Bayes without using sklearn.naive_bayes
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
{ | |
"cells": [ | |
{ | |
"cell_type": "code", | |
"execution_count": 33, | |
"metadata": { | |
"collapsed": true | |
}, | |
"outputs": [], | |
"source": [ | |
"import pandas as pd\n", | |
"import numpy as np" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 34, | |
"metadata": { | |
"collapsed": true | |
}, | |
"outputs": [], | |
"source": [ | |
"data=pd.read_csv(\"C:\\\\Users\\\\COM\\\\Desktop\\\\Test\\\\Q2-tennis.csv\")" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 35, | |
"metadata": { | |
"collapsed": true | |
}, | |
"outputs": [], | |
"source": [ | |
"df=pd.DataFrame(data)" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 36, | |
"metadata": { | |
"collapsed": false | |
}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/html": [ | |
"<div>\n", | |
"<table border=\"1\" class=\"dataframe\">\n", | |
" <thead>\n", | |
" <tr style=\"text-align: right;\">\n", | |
" <th></th>\n", | |
" <th>Outlook</th>\n", | |
" <th>Temp.</th>\n", | |
" <th>Humidity</th>\n", | |
" <th>Windy</th>\n", | |
" <th>Play</th>\n", | |
" </tr>\n", | |
" </thead>\n", | |
" <tbody>\n", | |
" <tr>\n", | |
" <th>0</th>\n", | |
" <td>sunny</td>\n", | |
" <td>hot</td>\n", | |
" <td>high</td>\n", | |
" <td>false</td>\n", | |
" <td>no</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>1</th>\n", | |
" <td>sunny</td>\n", | |
" <td>hot</td>\n", | |
" <td>high</td>\n", | |
" <td>true</td>\n", | |
" <td>no</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>2</th>\n", | |
" <td>overcast</td>\n", | |
" <td>hot</td>\n", | |
" <td>high</td>\n", | |
" <td>false</td>\n", | |
" <td>yes</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>3</th>\n", | |
" <td>rainy</td>\n", | |
" <td>mild</td>\n", | |
" <td>high</td>\n", | |
" <td>false</td>\n", | |
" <td>yes</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>4</th>\n", | |
" <td>rainy</td>\n", | |
" <td>cool</td>\n", | |
" <td>normal</td>\n", | |
" <td>false</td>\n", | |
" <td>yes</td>\n", | |
" </tr>\n", | |
" </tbody>\n", | |
"</table>\n", | |
"</div>" | |
], | |
"text/plain": [ | |
" Outlook Temp. Humidity Windy Play\n", | |
"0 sunny hot high false no\n", | |
"1 sunny hot high true no\n", | |
"2 overcast hot high false yes\n", | |
"3 rainy mild high false yes\n", | |
"4 rainy cool normal false yes" | |
] | |
}, | |
"execution_count": 36, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"df.head()" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 37, | |
"metadata": { | |
"collapsed": false | |
}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/plain": [ | |
"RangeIndex(start=0, stop=14, step=1)" | |
] | |
}, | |
"execution_count": 37, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"df.index" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 38, | |
"metadata": { | |
"collapsed": false | |
}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/plain": [ | |
"Index(['Outlook', 'Temp.', 'Humidity', 'Windy', 'Play'], dtype='object')" | |
] | |
}, | |
"execution_count": 38, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"df.columns" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 39, | |
"metadata": { | |
"collapsed": false | |
}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/plain": [ | |
"yes 0.642857\n", | |
"no 0.357143\n", | |
"Name: Play, dtype: float64" | |
] | |
}, | |
"execution_count": 39, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"g=pd.value_counts(df.Play)/len(df.index)\n", | |
"g" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 40, | |
"metadata": { | |
"collapsed": true | |
}, | |
"outputs": [], | |
"source": [ | |
"# Frequency Table" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 41, | |
"metadata": { | |
"collapsed": false | |
}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/html": [ | |
"<div>\n", | |
"<table border=\"1\" class=\"dataframe\">\n", | |
" <thead>\n", | |
" <tr style=\"text-align: right;\">\n", | |
" <th>Play</th>\n", | |
" <th>no</th>\n", | |
" <th>yes</th>\n", | |
" <th>All</th>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>Outlook</th>\n", | |
" <th></th>\n", | |
" <th></th>\n", | |
" <th></th>\n", | |
" </tr>\n", | |
" </thead>\n", | |
" <tbody>\n", | |
" <tr>\n", | |
" <th>overcast</th>\n", | |
" <td>0</td>\n", | |
" <td>4</td>\n", | |
" <td>4</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>rainy</th>\n", | |
" <td>2</td>\n", | |
" <td>3</td>\n", | |
" <td>5</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>sunny</th>\n", | |
" <td>3</td>\n", | |
" <td>2</td>\n", | |
" <td>5</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>All</th>\n", | |
" <td>5</td>\n", | |
" <td>9</td>\n", | |
" <td>14</td>\n", | |
" </tr>\n", | |
" </tbody>\n", | |
"</table>\n", | |
"</div>" | |
], | |
"text/plain": [ | |
"Play no yes All\n", | |
"Outlook \n", | |
"overcast 0 4 4\n", | |
"rainy 2 3 5\n", | |
"sunny 3 2 5\n", | |
"All 5 9 14" | |
] | |
}, | |
"execution_count": 41, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"Outlook_play=pd.crosstab(df.Outlook,df.Play,margins='TRUE')\n", | |
"Outlook_play" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 42, | |
"metadata": { | |
"collapsed": true | |
}, | |
"outputs": [], | |
"source": [ | |
"#Smoothing: changing the value 0's to 1's" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 43, | |
"metadata": { | |
"collapsed": true | |
}, | |
"outputs": [], | |
"source": [ | |
"for i in range(len(Outlook_play.index)):\n", | |
" for j in range(len(Outlook_play.columns)):\n", | |
" if(Outlook_play.ix[i,j]==0):\n", | |
" Outlook_play.ix[i,j]=1\n", | |
" \n", | |
"Outlook_play.ix[0,2]=Outlook_play.ix[0,1]+Outlook_play.ix[0,0]\n", | |
"Outlook_play.ix[3,0]=Outlook_play.ix[2,0]+Outlook_play.ix[1,0]+Outlook_play.ix[0,0]\n", | |
"Outlook_play.ix[3,2]=Outlook_play.ix[3,0]+Outlook_play.ix[3,1]" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 44, | |
"metadata": { | |
"collapsed": false | |
}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/html": [ | |
"<div>\n", | |
"<table border=\"1\" class=\"dataframe\">\n", | |
" <thead>\n", | |
" <tr style=\"text-align: right;\">\n", | |
" <th>Play</th>\n", | |
" <th>no</th>\n", | |
" <th>yes</th>\n", | |
" <th>All</th>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>Outlook</th>\n", | |
" <th></th>\n", | |
" <th></th>\n", | |
" <th></th>\n", | |
" </tr>\n", | |
" </thead>\n", | |
" <tbody>\n", | |
" <tr>\n", | |
" <th>overcast</th>\n", | |
" <td>1</td>\n", | |
" <td>4</td>\n", | |
" <td>5</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>rainy</th>\n", | |
" <td>2</td>\n", | |
" <td>3</td>\n", | |
" <td>5</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>sunny</th>\n", | |
" <td>3</td>\n", | |
" <td>2</td>\n", | |
" <td>5</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>All</th>\n", | |
" <td>6</td>\n", | |
" <td>9</td>\n", | |
" <td>15</td>\n", | |
" </tr>\n", | |
" </tbody>\n", | |
"</table>\n", | |
"</div>" | |
], | |
"text/plain": [ | |
"Play no yes All\n", | |
"Outlook \n", | |
"overcast 1 4 5\n", | |
"rainy 2 3 5\n", | |
"sunny 3 2 5\n", | |
"All 6 9 15" | |
] | |
}, | |
"execution_count": 44, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"Outlook_play" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 45, | |
"metadata": { | |
"collapsed": false | |
}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/html": [ | |
"<div>\n", | |
"<table border=\"1\" class=\"dataframe\">\n", | |
" <thead>\n", | |
" <tr style=\"text-align: right;\">\n", | |
" <th>Play</th>\n", | |
" <th>no</th>\n", | |
" <th>yes</th>\n", | |
" <th>All</th>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>Temp.</th>\n", | |
" <th></th>\n", | |
" <th></th>\n", | |
" <th></th>\n", | |
" </tr>\n", | |
" </thead>\n", | |
" <tbody>\n", | |
" <tr>\n", | |
" <th>cool</th>\n", | |
" <td>1</td>\n", | |
" <td>3</td>\n", | |
" <td>4</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>hot</th>\n", | |
" <td>2</td>\n", | |
" <td>2</td>\n", | |
" <td>4</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>mild</th>\n", | |
" <td>2</td>\n", | |
" <td>4</td>\n", | |
" <td>6</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>All</th>\n", | |
" <td>5</td>\n", | |
" <td>9</td>\n", | |
" <td>14</td>\n", | |
" </tr>\n", | |
" </tbody>\n", | |
"</table>\n", | |
"</div>" | |
], | |
"text/plain": [ | |
"Play no yes All\n", | |
"Temp. \n", | |
"cool 1 3 4\n", | |
"hot 2 2 4\n", | |
"mild 2 4 6\n", | |
"All 5 9 14" | |
] | |
}, | |
"execution_count": 45, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"Temp_play=pd.crosstab(df['Temp.'],df.Play,margins='TRUE')\n", | |
"Temp_play" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 46, | |
"metadata": { | |
"collapsed": false | |
}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/html": [ | |
"<div>\n", | |
"<table border=\"1\" class=\"dataframe\">\n", | |
" <thead>\n", | |
" <tr style=\"text-align: right;\">\n", | |
" <th>Play</th>\n", | |
" <th>no</th>\n", | |
" <th>yes</th>\n", | |
" <th>All</th>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>Humidity</th>\n", | |
" <th></th>\n", | |
" <th></th>\n", | |
" <th></th>\n", | |
" </tr>\n", | |
" </thead>\n", | |
" <tbody>\n", | |
" <tr>\n", | |
" <th>high</th>\n", | |
" <td>4</td>\n", | |
" <td>3</td>\n", | |
" <td>7</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>normal</th>\n", | |
" <td>1</td>\n", | |
" <td>6</td>\n", | |
" <td>7</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>All</th>\n", | |
" <td>5</td>\n", | |
" <td>9</td>\n", | |
" <td>14</td>\n", | |
" </tr>\n", | |
" </tbody>\n", | |
"</table>\n", | |
"</div>" | |
], | |
"text/plain": [ | |
"Play no yes All\n", | |
"Humidity \n", | |
"high 4 3 7\n", | |
"normal 1 6 7\n", | |
"All 5 9 14" | |
] | |
}, | |
"execution_count": 46, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"Humidity_play=pd.crosstab(df.Humidity,df.Play,margins='TRUE')\n", | |
"Humidity_play" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 47, | |
"metadata": { | |
"collapsed": false | |
}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/html": [ | |
"<div>\n", | |
"<table border=\"1\" class=\"dataframe\">\n", | |
" <thead>\n", | |
" <tr style=\"text-align: right;\">\n", | |
" <th>Play</th>\n", | |
" <th>no</th>\n", | |
" <th>yes</th>\n", | |
" <th>All</th>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>Windy</th>\n", | |
" <th></th>\n", | |
" <th></th>\n", | |
" <th></th>\n", | |
" </tr>\n", | |
" </thead>\n", | |
" <tbody>\n", | |
" <tr>\n", | |
" <th>false</th>\n", | |
" <td>2</td>\n", | |
" <td>6</td>\n", | |
" <td>8</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>true</th>\n", | |
" <td>3</td>\n", | |
" <td>3</td>\n", | |
" <td>6</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>All</th>\n", | |
" <td>5</td>\n", | |
" <td>9</td>\n", | |
" <td>14</td>\n", | |
" </tr>\n", | |
" </tbody>\n", | |
"</table>\n", | |
"</div>" | |
], | |
"text/plain": [ | |
"Play no yes All\n", | |
"Windy \n", | |
"false 2 6 8\n", | |
"true 3 3 6\n", | |
"All 5 9 14" | |
] | |
}, | |
"execution_count": 47, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"Windy_play=pd.crosstab(df.Windy,df.Play,margins='TRUE')\n", | |
"Windy_play" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 48, | |
"metadata": { | |
"collapsed": false | |
}, | |
"outputs": [], | |
"source": [ | |
"# Frequency Table for probability" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 49, | |
"metadata": { | |
"collapsed": true | |
}, | |
"outputs": [], | |
"source": [ | |
"Outlook_play.ix[0:3:,1]=Outlook_play.ix[0:3,1]/Outlook_play.ix[3,1]\n", | |
"Outlook_play.ix[0:3:,0]=Outlook_play.ix[0:3,0]/Outlook_play.ix[3,0]\n", | |
"Outlook_play.ix[0:3:,2]=Outlook_play.ix[0:3,2]/Outlook_play.ix[3,2]\n", | |
"Outlook_play.ix[3,:]=Outlook_play.ix[3,:]/Outlook_play.ix[3,2]" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 50, | |
"metadata": { | |
"collapsed": false | |
}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/html": [ | |
"<div>\n", | |
"<table border=\"1\" class=\"dataframe\">\n", | |
" <thead>\n", | |
" <tr style=\"text-align: right;\">\n", | |
" <th>Play</th>\n", | |
" <th>no</th>\n", | |
" <th>yes</th>\n", | |
" <th>P(x)</th>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>Outlook</th>\n", | |
" <th></th>\n", | |
" <th></th>\n", | |
" <th></th>\n", | |
" </tr>\n", | |
" </thead>\n", | |
" <tbody>\n", | |
" <tr>\n", | |
" <th>overcast</th>\n", | |
" <td>0.166667</td>\n", | |
" <td>0.444444</td>\n", | |
" <td>0.333333</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>rainy</th>\n", | |
" <td>0.333333</td>\n", | |
" <td>0.333333</td>\n", | |
" <td>0.333333</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>sunny</th>\n", | |
" <td>0.500000</td>\n", | |
" <td>0.222222</td>\n", | |
" <td>0.333333</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>P(c)</th>\n", | |
" <td>0.400000</td>\n", | |
" <td>0.600000</td>\n", | |
" <td>1.000000</td>\n", | |
" </tr>\n", | |
" </tbody>\n", | |
"</table>\n", | |
"</div>" | |
], | |
"text/plain": [ | |
"Play no yes P(x)\n", | |
"Outlook \n", | |
"overcast 0.166667 0.444444 0.333333\n", | |
"rainy 0.333333 0.333333 0.333333\n", | |
"sunny 0.500000 0.222222 0.333333\n", | |
"P(c) 0.400000 0.600000 1.000000" | |
] | |
}, | |
"execution_count": 50, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"Outlook_play.rename(columns={'All':'P(x)'} , index={'All':'P(c)'})" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 51, | |
"metadata": { | |
"collapsed": true | |
}, | |
"outputs": [], | |
"source": [ | |
"Temp_play.ix[0:3:,1]=Temp_play.ix[0:3,1]/Temp_play.ix[3,1]\n", | |
"Temp_play.ix[0:3:,0]=Temp_play.ix[0:3,0]/Temp_play.ix[3,0]\n", | |
"Temp_play.ix[0:3:,2]=Temp_play.ix[0:3,2]/Temp_play.ix[3,2]\n", | |
"Temp_play.ix[3,:]=Temp_play.ix[3,:]/Temp_play.ix[3,2]" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 52, | |
"metadata": { | |
"collapsed": false | |
}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/html": [ | |
"<div>\n", | |
"<table border=\"1\" class=\"dataframe\">\n", | |
" <thead>\n", | |
" <tr style=\"text-align: right;\">\n", | |
" <th>Play</th>\n", | |
" <th>no</th>\n", | |
" <th>yes</th>\n", | |
" <th>P(x)</th>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>Temp.</th>\n", | |
" <th></th>\n", | |
" <th></th>\n", | |
" <th></th>\n", | |
" </tr>\n", | |
" </thead>\n", | |
" <tbody>\n", | |
" <tr>\n", | |
" <th>cool</th>\n", | |
" <td>0.200000</td>\n", | |
" <td>0.333333</td>\n", | |
" <td>0.285714</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>hot</th>\n", | |
" <td>0.400000</td>\n", | |
" <td>0.222222</td>\n", | |
" <td>0.285714</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>mild</th>\n", | |
" <td>0.400000</td>\n", | |
" <td>0.444444</td>\n", | |
" <td>0.428571</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>P(c)</th>\n", | |
" <td>0.357143</td>\n", | |
" <td>0.642857</td>\n", | |
" <td>1.000000</td>\n", | |
" </tr>\n", | |
" </tbody>\n", | |
"</table>\n", | |
"</div>" | |
], | |
"text/plain": [ | |
"Play no yes P(x)\n", | |
"Temp. \n", | |
"cool 0.200000 0.333333 0.285714\n", | |
"hot 0.400000 0.222222 0.285714\n", | |
"mild 0.400000 0.444444 0.428571\n", | |
"P(c) 0.357143 0.642857 1.000000" | |
] | |
}, | |
"execution_count": 52, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"Temp_play.rename(columns={'All':'P(x)'} , index={'All':'P(c)'})" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 53, | |
"metadata": { | |
"collapsed": true | |
}, | |
"outputs": [], | |
"source": [ | |
"Humidity_play.ix[0:2:,1]=Humidity_play.ix[0:2,1]/Humidity_play.ix[2,1]\n", | |
"Humidity_play.ix[0:2:,0]=Humidity_play.ix[0:2,0]/Humidity_play.ix[2,0]\n", | |
"Humidity_play.ix[0:2:,2]=Humidity_play.ix[0:2,2]/Humidity_play.ix[2,2]\n", | |
"Humidity_play.ix[2,:]=Humidity_play.ix[2,:]/Humidity_play.ix[2,2]" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 54, | |
"metadata": { | |
"collapsed": false | |
}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/html": [ | |
"<div>\n", | |
"<table border=\"1\" class=\"dataframe\">\n", | |
" <thead>\n", | |
" <tr style=\"text-align: right;\">\n", | |
" <th>Play</th>\n", | |
" <th>no</th>\n", | |
" <th>yes</th>\n", | |
" <th>P(x)</th>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>Humidity</th>\n", | |
" <th></th>\n", | |
" <th></th>\n", | |
" <th></th>\n", | |
" </tr>\n", | |
" </thead>\n", | |
" <tbody>\n", | |
" <tr>\n", | |
" <th>high</th>\n", | |
" <td>0.800000</td>\n", | |
" <td>0.333333</td>\n", | |
" <td>0.5</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>normal</th>\n", | |
" <td>0.200000</td>\n", | |
" <td>0.666667</td>\n", | |
" <td>0.5</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>P(c)</th>\n", | |
" <td>0.357143</td>\n", | |
" <td>0.642857</td>\n", | |
" <td>1.0</td>\n", | |
" </tr>\n", | |
" </tbody>\n", | |
"</table>\n", | |
"</div>" | |
], | |
"text/plain": [ | |
"Play no yes P(x)\n", | |
"Humidity \n", | |
"high 0.800000 0.333333 0.5\n", | |
"normal 0.200000 0.666667 0.5\n", | |
"P(c) 0.357143 0.642857 1.0" | |
] | |
}, | |
"execution_count": 54, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"Humidity_play.rename(columns={'All':'P(x)'} , index={'All':'P(c)'})" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 55, | |
"metadata": { | |
"collapsed": true | |
}, | |
"outputs": [], | |
"source": [ | |
"Windy_play.ix[0:2:,1]=Windy_play.ix[0:2,1]/Windy_play.ix[2,1]\n", | |
"Windy_play.ix[0:2:,0]=Windy_play.ix[0:2,0]/Windy_play.ix[2,0]\n", | |
"Windy_play.ix[0:2:,2]=Windy_play.ix[0:2,2]/Windy_play.ix[2,2]\n", | |
"Windy_play.ix[2,:]=Windy_play.ix[2,:]/Windy_play.ix[2,2]" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 56, | |
"metadata": { | |
"collapsed": false | |
}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/html": [ | |
"<div>\n", | |
"<table border=\"1\" class=\"dataframe\">\n", | |
" <thead>\n", | |
" <tr style=\"text-align: right;\">\n", | |
" <th>Play</th>\n", | |
" <th>no</th>\n", | |
" <th>yes</th>\n", | |
" <th>P(x)</th>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>Windy</th>\n", | |
" <th></th>\n", | |
" <th></th>\n", | |
" <th></th>\n", | |
" </tr>\n", | |
" </thead>\n", | |
" <tbody>\n", | |
" <tr>\n", | |
" <th>false</th>\n", | |
" <td>0.400000</td>\n", | |
" <td>0.666667</td>\n", | |
" <td>0.571429</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>true</th>\n", | |
" <td>0.600000</td>\n", | |
" <td>0.333333</td>\n", | |
" <td>0.428571</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>P(c)</th>\n", | |
" <td>0.357143</td>\n", | |
" <td>0.642857</td>\n", | |
" <td>1.000000</td>\n", | |
" </tr>\n", | |
" </tbody>\n", | |
"</table>\n", | |
"</div>" | |
], | |
"text/plain": [ | |
"Play no yes P(x)\n", | |
"Windy \n", | |
"false 0.400000 0.666667 0.571429\n", | |
"true 0.600000 0.333333 0.428571\n", | |
"P(c) 0.357143 0.642857 1.000000" | |
] | |
}, | |
"execution_count": 56, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"Windy_play.rename(columns={'All':'P(x)'} , index={'All':'P(c)'})" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 57, | |
"metadata": { | |
"collapsed": true | |
}, | |
"outputs": [], | |
"source": [ | |
"#Prediction" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 58, | |
"metadata": { | |
"collapsed": false | |
}, | |
"outputs": [], | |
"source": [ | |
"pred_play=[]\n", | |
"for i in range(len(df.index)):\n", | |
" pcxy=Outlook_play.ix[df.ix[i,0],'yes']*Temp_play.ix[df.ix[i,1],'yes']*Humidity_play.ix[df.ix[i,2],'yes']*Windy_play.ix[df.ix[i,3],'yes']*g['yes']\n", | |
" pcxn=Outlook_play.ix[df.ix[i,0],'no']*Temp_play.ix[df.ix[i,1],'no']*Humidity_play.ix[df.ix[i,2],'no']*Windy_play.ix[df.ix[i,3],'no']*g['no']\n", | |
" yes_prob=pcxy/(pcxy+pcxn)\n", | |
" no_prob=pcxn/(pcxy+pcxn)\n", | |
" if(yes_prob > no_prob):\n", | |
" pred_play.append(\"yes\")\n", | |
" elif(yes_prob < no_prob):\n", | |
" pred_play.append(\"no\")" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 59, | |
"metadata": { | |
"collapsed": false | |
}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/html": [ | |
"<div>\n", | |
"<table border=\"1\" class=\"dataframe\">\n", | |
" <thead>\n", | |
" <tr style=\"text-align: right;\">\n", | |
" <th></th>\n", | |
" <th>0</th>\n", | |
" </tr>\n", | |
" </thead>\n", | |
" <tbody>\n", | |
" <tr>\n", | |
" <th>0</th>\n", | |
" <td>no</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>1</th>\n", | |
" <td>no</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>2</th>\n", | |
" <td>yes</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>3</th>\n", | |
" <td>yes</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>4</th>\n", | |
" <td>yes</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>5</th>\n", | |
" <td>yes</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>6</th>\n", | |
" <td>yes</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>7</th>\n", | |
" <td>no</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>8</th>\n", | |
" <td>yes</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>9</th>\n", | |
" <td>yes</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>10</th>\n", | |
" <td>yes</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>11</th>\n", | |
" <td>yes</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>12</th>\n", | |
" <td>yes</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>13</th>\n", | |
" <td>no</td>\n", | |
" </tr>\n", | |
" </tbody>\n", | |
"</table>\n", | |
"</div>" | |
], | |
"text/plain": [ | |
" 0\n", | |
"0 no\n", | |
"1 no\n", | |
"2 yes\n", | |
"3 yes\n", | |
"4 yes\n", | |
"5 yes\n", | |
"6 yes\n", | |
"7 no\n", | |
"8 yes\n", | |
"9 yes\n", | |
"10 yes\n", | |
"11 yes\n", | |
"12 yes\n", | |
"13 no" | |
] | |
}, | |
"execution_count": 59, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"pred_play=pd.DataFrame(pred_play)\n", | |
"pred_play" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 60, | |
"metadata": { | |
"collapsed": false | |
}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/html": [ | |
"<div>\n", | |
"<table border=\"1\" class=\"dataframe\">\n", | |
" <thead>\n", | |
" <tr style=\"text-align: right;\">\n", | |
" <th></th>\n", | |
" <th>Play</th>\n", | |
" </tr>\n", | |
" </thead>\n", | |
" <tbody>\n", | |
" <tr>\n", | |
" <th>0</th>\n", | |
" <td>no</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>1</th>\n", | |
" <td>no</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>2</th>\n", | |
" <td>yes</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>3</th>\n", | |
" <td>yes</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>4</th>\n", | |
" <td>yes</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>5</th>\n", | |
" <td>no</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>6</th>\n", | |
" <td>yes</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>7</th>\n", | |
" <td>no</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>8</th>\n", | |
" <td>yes</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>9</th>\n", | |
" <td>yes</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>10</th>\n", | |
" <td>yes</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>11</th>\n", | |
" <td>yes</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>12</th>\n", | |
" <td>yes</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>13</th>\n", | |
" <td>no</td>\n", | |
" </tr>\n", | |
" </tbody>\n", | |
"</table>\n", | |
"</div>" | |
], | |
"text/plain": [ | |
" Play\n", | |
"0 no\n", | |
"1 no\n", | |
"2 yes\n", | |
"3 yes\n", | |
"4 yes\n", | |
"5 no\n", | |
"6 yes\n", | |
"7 no\n", | |
"8 yes\n", | |
"9 yes\n", | |
"10 yes\n", | |
"11 yes\n", | |
"12 yes\n", | |
"13 no" | |
] | |
}, | |
"execution_count": 60, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"Expected=df[['Play']]\n", | |
"Expected" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 30, | |
"metadata": { | |
"collapsed": true | |
}, | |
"outputs": [], | |
"source": [ | |
"#Calculating the Accuracy" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 61, | |
"metadata": { | |
"collapsed": true | |
}, | |
"outputs": [], | |
"source": [ | |
"from sklearn import metrics" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 62, | |
"metadata": { | |
"collapsed": false | |
}, | |
"outputs": [ | |
{ | |
"name": "stdout", | |
"output_type": "stream", | |
"text": [ | |
"0.928571428571\n" | |
] | |
} | |
], | |
"source": [ | |
"print(metrics.accuracy_score(Expected,pred_play))" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": null, | |
"metadata": { | |
"collapsed": true | |
}, | |
"outputs": [], | |
"source": [] | |
} | |
], | |
"metadata": { | |
"anaconda-cloud": {}, | |
"kernelspec": { | |
"display_name": "Python [default]", | |
"language": "python", | |
"name": "python3" | |
}, | |
"language_info": { | |
"codemirror_mode": { | |
"name": "ipython", | |
"version": 3 | |
}, | |
"file_extension": ".py", | |
"mimetype": "text/x-python", | |
"name": "python", | |
"nbconvert_exporter": "python", | |
"pygments_lexer": "ipython3", | |
"version": "3.5.2" | |
} | |
}, | |
"nbformat": 4, | |
"nbformat_minor": 1 | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
• Implement k-fold cross-validation (e.g., 5-fold) for the Naïve Bayesian classifier on a given dataset. Calculate the average accuracy of the classifier over the k folds and report the results.