Skip to content

Instantly share code, notes, and snippets.

Show Gist options
  • Save analyticsindiamagazine/836da3f0b03da3d8ade0f52aeeb12526 to your computer and use it in GitHub Desktop.
Save analyticsindiamagazine/836da3f0b03da3d8ade0f52aeeb12526 to your computer and use it in GitHub Desktop.
Display the source blob
Display the rendered blob
Raw
{
"cells": [
{
"cell_type": "code",
"execution_count": 1,
"metadata": {
"colab": {},
"colab_type": "code",
"id": "6BZ-jLOunczC"
},
"outputs": [],
"source": [
"#1 Importing essential libraries\n",
"import pandas as pd"
]
},
{
"cell_type": "code",
"execution_count": 2,
"metadata": {
"colab": {},
"colab_type": "code",
"id": "WKtERLr9n09B"
},
"outputs": [],
"source": [
"#2 Importing the dataset\n",
"dataset = pd.read_csv('Salary_Data.csv')"
]
},
{
"cell_type": "code",
"execution_count": 3,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>YearsExperience</th>\n",
" <th>Salary</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>1.1</td>\n",
" <td>39343.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>1.3</td>\n",
" <td>46205.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>1.5</td>\n",
" <td>37731.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>2.0</td>\n",
" <td>43525.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>2.2</td>\n",
" <td>39891.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>5</th>\n",
" <td>2.9</td>\n",
" <td>56642.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>6</th>\n",
" <td>3.0</td>\n",
" <td>60150.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>7</th>\n",
" <td>3.2</td>\n",
" <td>54445.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>8</th>\n",
" <td>3.2</td>\n",
" <td>64445.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>9</th>\n",
" <td>3.7</td>\n",
" <td>57189.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>10</th>\n",
" <td>3.9</td>\n",
" <td>63218.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>11</th>\n",
" <td>4.0</td>\n",
" <td>55794.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>12</th>\n",
" <td>4.0</td>\n",
" <td>56957.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>13</th>\n",
" <td>4.1</td>\n",
" <td>57081.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>14</th>\n",
" <td>4.5</td>\n",
" <td>61111.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>15</th>\n",
" <td>4.9</td>\n",
" <td>67938.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>16</th>\n",
" <td>5.1</td>\n",
" <td>66029.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>17</th>\n",
" <td>5.3</td>\n",
" <td>83088.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>18</th>\n",
" <td>5.9</td>\n",
" <td>81363.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>19</th>\n",
" <td>6.0</td>\n",
" <td>93940.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>20</th>\n",
" <td>6.8</td>\n",
" <td>91738.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>21</th>\n",
" <td>7.1</td>\n",
" <td>98273.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>22</th>\n",
" <td>7.9</td>\n",
" <td>101302.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>23</th>\n",
" <td>8.2</td>\n",
" <td>113812.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>24</th>\n",
" <td>8.7</td>\n",
" <td>109431.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>25</th>\n",
" <td>9.0</td>\n",
" <td>105582.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>26</th>\n",
" <td>9.5</td>\n",
" <td>116969.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>27</th>\n",
" <td>9.6</td>\n",
" <td>112635.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>28</th>\n",
" <td>10.3</td>\n",
" <td>122391.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>29</th>\n",
" <td>10.5</td>\n",
" <td>121872.0</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" YearsExperience Salary\n",
"0 1.1 39343.0\n",
"1 1.3 46205.0\n",
"2 1.5 37731.0\n",
"3 2.0 43525.0\n",
"4 2.2 39891.0\n",
"5 2.9 56642.0\n",
"6 3.0 60150.0\n",
"7 3.2 54445.0\n",
"8 3.2 64445.0\n",
"9 3.7 57189.0\n",
"10 3.9 63218.0\n",
"11 4.0 55794.0\n",
"12 4.0 56957.0\n",
"13 4.1 57081.0\n",
"14 4.5 61111.0\n",
"15 4.9 67938.0\n",
"16 5.1 66029.0\n",
"17 5.3 83088.0\n",
"18 5.9 81363.0\n",
"19 6.0 93940.0\n",
"20 6.8 91738.0\n",
"21 7.1 98273.0\n",
"22 7.9 101302.0\n",
"23 8.2 113812.0\n",
"24 8.7 109431.0\n",
"25 9.0 105582.0\n",
"26 9.5 116969.0\n",
"27 9.6 112635.0\n",
"28 10.3 122391.0\n",
"29 10.5 121872.0"
]
},
"execution_count": 3,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"#Displaying the dataset\n",
"dataset"
]
},
{
"cell_type": "code",
"execution_count": 4,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"<matplotlib.axes._subplots.AxesSubplot at 0x1112bbbe0>"
]
},
"execution_count": 4,
"metadata": {},
"output_type": "execute_result"
},
{
"data": {
"image/png": "\n",
"text/plain": [
"<Figure size 432x288 with 1 Axes>"
]
},
"metadata": {
"needs_background": "light"
},
"output_type": "display_data"
}
],
"source": [
"#Plotting the relation between salary and experience\n",
"dataset.plot(x='YearsExperience', y='Salary')"
]
},
{
"cell_type": "code",
"execution_count": 5,
"metadata": {
"colab": {},
"colab_type": "code",
"id": "G5WuUWRFn4j8"
},
"outputs": [],
"source": [
"#3 classify dependent and independent variables\n",
"X = dataset.iloc[:,:-1].values #independent variable YearsofExperience\n",
"y = dataset.iloc[:,-1].values #dependent variable salary"
]
},
{
"cell_type": "code",
"execution_count": 6,
"metadata": {
"colab": {},
"colab_type": "code",
"id": "-bZ82kVbn7Ga"
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"\n",
"Idependent Variable (Experience):\n",
"\n",
" [[ 1.1]\n",
" [ 1.3]\n",
" [ 1.5]\n",
" [ 2. ]\n",
" [ 2.2]\n",
" [ 2.9]\n",
" [ 3. ]\n",
" [ 3.2]\n",
" [ 3.2]\n",
" [ 3.7]\n",
" [ 3.9]\n",
" [ 4. ]\n",
" [ 4. ]\n",
" [ 4.1]\n",
" [ 4.5]\n",
" [ 4.9]\n",
" [ 5.1]\n",
" [ 5.3]\n",
" [ 5.9]\n",
" [ 6. ]\n",
" [ 6.8]\n",
" [ 7.1]\n",
" [ 7.9]\n",
" [ 8.2]\n",
" [ 8.7]\n",
" [ 9. ]\n",
" [ 9.5]\n",
" [ 9.6]\n",
" [10.3]\n",
" [10.5]]\n",
"\n",
"Dependent Variable (Salary):\n",
"\n",
" [ 39343. 46205. 37731. 43525. 39891. 56642. 60150. 54445. 64445.\n",
" 57189. 63218. 55794. 56957. 57081. 61111. 67938. 66029. 83088.\n",
" 81363. 93940. 91738. 98273. 101302. 113812. 109431. 105582. 116969.\n",
" 112635. 122391. 121872.]\n"
]
}
],
"source": [
"print(\"\\nIdependent Variable (Experience):\\n\\n\", X)\n",
"print(\"\\nDependent Variable (Salary):\\n\\n\", y)"
]
},
{
"cell_type": "code",
"execution_count": 7,
"metadata": {
"colab": {},
"colab_type": "code",
"id": "BFMIwI5Zn9MX"
},
"outputs": [],
"source": [
"#4 Creating training set and testing set\n",
"from sklearn.model_selection import train_test_split\n",
"X_train, X_test, y_train, y_test = train_test_split(X ,y, test_size = 1/3,random_state = 0) "
]
},
{
"cell_type": "code",
"execution_count": 8,
"metadata": {
"colab": {},
"colab_type": "code",
"id": "YcY06YGDn_dz"
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"\n",
"\n",
"Training Set :\n",
"----------------\n",
"\n",
"X = \n",
" [[ 2.9]\n",
" [ 5.1]\n",
" [ 3.2]\n",
" [ 4.5]\n",
" [ 8.2]\n",
" [ 6.8]\n",
" [ 1.3]\n",
" [10.5]\n",
" [ 3. ]\n",
" [ 2.2]\n",
" [ 5.9]\n",
" [ 6. ]\n",
" [ 3.7]\n",
" [ 3.2]\n",
" [ 9. ]\n",
" [ 2. ]\n",
" [ 1.1]\n",
" [ 7.1]\n",
" [ 4.9]\n",
" [ 4. ]]\n",
"y = \n",
" [ 56642. 66029. 64445. 61111. 113812. 91738. 46205. 121872. 60150.\n",
" 39891. 81363. 93940. 57189. 54445. 105582. 43525. 39343. 98273.\n",
" 67938. 56957.]\n",
"\n",
"\n",
"Test Set :\n",
"----------------\n",
"\n",
"X = \n",
" [[ 1.5]\n",
" [10.3]\n",
" [ 4.1]\n",
" [ 3.9]\n",
" [ 9.5]\n",
" [ 8.7]\n",
" [ 9.6]\n",
" [ 4. ]\n",
" [ 5.3]\n",
" [ 7.9]]\n",
"y = \n",
" [ 37731. 122391. 57081. 63218. 116969. 109431. 112635. 55794. 83088.\n",
" 101302.]\n"
]
}
],
"source": [
"print(\"\\n\\nTraining Set :\\n----------------\\n\")\n",
"print(\"X = \\n\", X_train)\n",
"print(\"y = \\n\", y_train)\n",
"\n",
"print(\"\\n\\nTest Set :\\n----------------\\n\")\n",
"print(\"X = \\n\",X_test)\n",
"print(\"y = \\n\", y_test)"
]
},
{
"cell_type": "code",
"execution_count": 9,
"metadata": {
"colab": {},
"colab_type": "code",
"id": "8XbYFyk8oCrH"
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"\n",
"\n",
"Predictions = [ 40835.10590871 123079.39940819 65134.55626083 63265.36777221\n",
" 115602.64545369 108125.8914992 116537.23969801 64199.96201652\n",
" 76349.68719258 100649.1375447 ]\n"
]
}
],
"source": [
"\"\"\"# II. Simple Linear Regressor \"\"\"\n",
"\n",
"#5 import SLR library\n",
"from sklearn.linear_model import LinearRegression\n",
"\n",
"#6 Train the Regressor with training set\n",
"regressor = LinearRegression()\n",
"regressor.fit(X_train, y_train)\n",
"\n",
"#7 predict the outcome of test sets\n",
"y_Pred = regressor.predict(X_test)\n",
"print(\"\\n\\nPredictions = \", y_Pred)"
]
},
{
"cell_type": "code",
"execution_count": 10,
"metadata": {
"colab": {},
"colab_type": "code",
"id": "ZAwiJVWuoHEX"
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Prediction Accuracy = 0.9749154407708353\n",
"\n",
"Actual vs Predicted Salaries \n",
"-------------------------\n",
"\n",
"Actual :\n",
" [ 37731. 122391. 57081. 63218. 116969. 109431. 112635. 55794. 83088.\n",
" 101302.]\n",
"Predicted :\n",
" [ 40835.10590871 123079.39940819 65134.55626083 63265.36777221\n",
" 115602.64545369 108125.8914992 116537.23969801 64199.96201652\n",
" 76349.68719258 100649.1375447 ]\n"
]
}
],
"source": [
"#8 Claculating the Accuracy of the predictions\n",
"from sklearn import metrics\n",
"print(\"Prediction Accuracy = \", metrics.r2_score(y_test, y_Pred))\n",
"\n",
"#9 Comparing Actual and Predicted Salaries for he test set\n",
"print(\"\\nActual vs Predicted Salaries \\n-------------------------\\n\")\n",
"print(\"Actual :\\n \", y_test)\n",
"print(\"Predicted :\\n \", y_Pred)"
]
},
{
"cell_type": "code",
"execution_count": 11,
"metadata": {},
"outputs": [
{
"data": {
"image/png": "\n",
"text/plain": [
"<Figure size 432x288 with 1 Axes>"
]
},
"metadata": {
"needs_background": "light"
},
"output_type": "display_data"
}
],
"source": [
"#Plotting Actual observation vs Predictions\n",
"import matplotlib.pyplot as plt\n",
"%matplotlib inline \n",
"plt.scatter(X_test,y_test, s = 70, label='Actual')\n",
"plt.scatter(X_test,y_Pred, s = 90, marker = '^', label='Predicted')\n",
"plt.xlabel('Years of Experience')\n",
"plt.ylabel('Salary')\n",
"plt.legend()\n",
"plt.show()"
]
}
],
"metadata": {
"colab": {
"collapsed_sections": [],
"name": "LinearRegression.ipynb",
"provenance": []
},
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.8.0"
}
},
"nbformat": 4,
"nbformat_minor": 1
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment