Last active
August 21, 2021 14:47
-
-
Save MainakRepositor/52cc3353ab8c454154c40c02d000cfab to your computer and use it in GitHub Desktop.
ML 1.ipynb
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
{ | |
"nbformat": 4, | |
"nbformat_minor": 0, | |
"metadata": { | |
"colab": { | |
"name": "ML 1.ipynb", | |
"provenance": [], | |
"authorship_tag": "ABX9TyO21KfcJ5D36hbc7lxJPqW9", | |
"include_colab_link": true | |
}, | |
"kernelspec": { | |
"name": "python3", | |
"display_name": "Python 3" | |
}, | |
"language_info": { | |
"name": "python" | |
} | |
}, | |
"cells": [ | |
{ | |
"cell_type": "markdown", | |
"metadata": { | |
"id": "view-in-github", | |
"colab_type": "text" | |
}, | |
"source": [ | |
"<a href=\"https://colab.research.google.com/gist/MainakRepositor/52cc3353ab8c454154c40c02d000cfab/ml-1.ipynb\" target=\"_parent\"><img src=\"https://colab.research.google.com/assets/colab-badge.svg\" alt=\"Open In Colab\"/></a>" | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": { | |
"id": "wkSGsQKkufyG" | |
}, | |
"source": [ | |
"<center>\n", | |
"<h1>Simple Linear Regression</h1>\n", | |
"</center>" | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": { | |
"id": "g-51WYNluqi0" | |
}, | |
"source": [ | |
"### 1. Importing necessary libraries" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"metadata": { | |
"colab": { | |
"base_uri": "https://localhost:8080/" | |
}, | |
"id": "4ZJykaM4uc2E", | |
"outputId": "a8a38797-b9bf-465f-b618-7733ff957e5e" | |
}, | |
"source": [ | |
"import pandas as pd\n", | |
"import numpy as np\n", | |
"import matplotlib.pyplot as plt\n", | |
"%matplotlib inline\n", | |
"\n", | |
"## For ML and SLR\n", | |
"from sklearn.model_selection import train_test_split\n", | |
"from sklearn.linear_model import LinearRegression\n", | |
"print(\"All packages are included successfully!\")" | |
], | |
"execution_count": 6, | |
"outputs": [ | |
{ | |
"output_type": "stream", | |
"text": [ | |
"All packages are included successfully!\n" | |
], | |
"name": "stdout" | |
} | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": { | |
"id": "eHuf1xe4uwgM" | |
}, | |
"source": [ | |
"### 2. Importing the dataset" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"metadata": { | |
"colab": { | |
"base_uri": "https://localhost:8080/", | |
"height": 236 | |
}, | |
"id": "zuC0MsLNuuJS", | |
"outputId": "19ccdcfc-7c77-4afc-e008-ba43978af93c" | |
}, | |
"source": [ | |
"url = 'https://raw.githubusercontent.com/MainakRepositor/Datasets-/master/Salary_Data.csv'\n", | |
"df = pd.read_csv(url,error_bad_lines=False)\n", | |
"print(\"Displaying the top 5 rows of the dataset :\\n\")\n", | |
"df.head() " | |
], | |
"execution_count": 2, | |
"outputs": [ | |
{ | |
"output_type": "stream", | |
"text": [ | |
"Displaying the top 5 rows of the dataset :\n", | |
"\n" | |
], | |
"name": "stdout" | |
}, | |
{ | |
"output_type": "execute_result", | |
"data": { | |
"text/html": [ | |
"<div>\n", | |
"<style scoped>\n", | |
" .dataframe tbody tr th:only-of-type {\n", | |
" vertical-align: middle;\n", | |
" }\n", | |
"\n", | |
" .dataframe tbody tr th {\n", | |
" vertical-align: top;\n", | |
" }\n", | |
"\n", | |
" .dataframe thead th {\n", | |
" text-align: right;\n", | |
" }\n", | |
"</style>\n", | |
"<table border=\"1\" class=\"dataframe\">\n", | |
" <thead>\n", | |
" <tr style=\"text-align: right;\">\n", | |
" <th></th>\n", | |
" <th>YearsExperience</th>\n", | |
" <th>Salary</th>\n", | |
" </tr>\n", | |
" </thead>\n", | |
" <tbody>\n", | |
" <tr>\n", | |
" <th>0</th>\n", | |
" <td>1.1</td>\n", | |
" <td>39343.0</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>1</th>\n", | |
" <td>1.3</td>\n", | |
" <td>46205.0</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>2</th>\n", | |
" <td>1.5</td>\n", | |
" <td>37731.0</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>3</th>\n", | |
" <td>2.0</td>\n", | |
" <td>43525.0</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>4</th>\n", | |
" <td>2.2</td>\n", | |
" <td>39891.0</td>\n", | |
" </tr>\n", | |
" </tbody>\n", | |
"</table>\n", | |
"</div>" | |
], | |
"text/plain": [ | |
" YearsExperience Salary\n", | |
"0 1.1 39343.0\n", | |
"1 1.3 46205.0\n", | |
"2 1.5 37731.0\n", | |
"3 2.0 43525.0\n", | |
"4 2.2 39891.0" | |
] | |
}, | |
"metadata": { | |
"tags": [] | |
}, | |
"execution_count": 2 | |
} | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": { | |
"id": "yAxlTAU1u35h" | |
}, | |
"source": [ | |
"### 3. Data exploration" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"metadata": { | |
"colab": { | |
"base_uri": "https://localhost:8080/" | |
}, | |
"id": "2cSNB1Hju0oC", | |
"outputId": "053e1baf-abf7-4d65-8818-6c337c86a276" | |
}, | |
"source": [ | |
"df.shape" | |
], | |
"execution_count": 3, | |
"outputs": [ | |
{ | |
"output_type": "execute_result", | |
"data": { | |
"text/plain": [ | |
"(30, 2)" | |
] | |
}, | |
"metadata": { | |
"tags": [] | |
}, | |
"execution_count": 3 | |
} | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": { | |
"id": "M4UGg0aBu8_w" | |
}, | |
"source": [ | |
"### 4.Checking for missing values" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"metadata": { | |
"colab": { | |
"base_uri": "https://localhost:8080/" | |
}, | |
"id": "SKfdUz2Lu50Z", | |
"outputId": "e46dbe61-0af6-47cc-acb2-de9222a35761" | |
}, | |
"source": [ | |
"print(\"Null values present : \",df.isnull().values.any())\n" | |
], | |
"execution_count": 4, | |
"outputs": [ | |
{ | |
"output_type": "stream", | |
"text": [ | |
"Null values present : False\n" | |
], | |
"name": "stdout" | |
} | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": { | |
"id": "p6kI6Rn-vFyD" | |
}, | |
"source": [ | |
"### 5.Building the Simple Linear Regression model" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"metadata": { | |
"colab": { | |
"base_uri": "https://localhost:8080/" | |
}, | |
"id": "lPWf5F5DvBp6", | |
"outputId": "0c7f0252-e8a2-44c5-fea6-538437655089" | |
}, | |
"source": [ | |
"x = df.iloc[:,:-1].values.reshape(-1,1)\n", | |
"y = df.iloc[:,-1].values\n", | |
"print(\"Show x and y\")\n", | |
"print(\"--------X---------\\n\")\n", | |
"print(x)\n", | |
"print(\"--------Y---------\\n\")\n", | |
"print(y)\n", | |
"print(\"\\n\\n\")\n", | |
"print(\"type of data of x :\",type(x))\n", | |
"print(\"type of data of y :\",type(y))" | |
], | |
"execution_count": 5, | |
"outputs": [ | |
{ | |
"output_type": "stream", | |
"text": [ | |
"Show x and y\n", | |
"--------X---------\n", | |
"\n", | |
"[[ 1.1]\n", | |
" [ 1.3]\n", | |
" [ 1.5]\n", | |
" [ 2. ]\n", | |
" [ 2.2]\n", | |
" [ 2.9]\n", | |
" [ 3. ]\n", | |
" [ 3.2]\n", | |
" [ 3.2]\n", | |
" [ 3.7]\n", | |
" [ 3.9]\n", | |
" [ 4. ]\n", | |
" [ 4. ]\n", | |
" [ 4.1]\n", | |
" [ 4.5]\n", | |
" [ 4.9]\n", | |
" [ 5.1]\n", | |
" [ 5.3]\n", | |
" [ 5.9]\n", | |
" [ 6. ]\n", | |
" [ 6.8]\n", | |
" [ 7.1]\n", | |
" [ 7.9]\n", | |
" [ 8.2]\n", | |
" [ 8.7]\n", | |
" [ 9. ]\n", | |
" [ 9.5]\n", | |
" [ 9.6]\n", | |
" [10.3]\n", | |
" [10.5]]\n", | |
"--------Y---------\n", | |
"\n", | |
"[ 39343. 46205. 37731. 43525. 39891. 56642. 60150. 54445. 64445.\n", | |
" 57189. 63218. 55794. 56957. 57081. 61111. 67938. 66029. 83088.\n", | |
" 81363. 93940. 91738. 98273. 101302. 113812. 109431. 105582. 116969.\n", | |
" 112635. 122391. 121872.]\n", | |
"\n", | |
"\n", | |
"\n", | |
"type of data of x : <class 'numpy.ndarray'>\n", | |
"type of data of y : <class 'numpy.ndarray'>\n" | |
], | |
"name": "stdout" | |
} | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": { | |
"id": "yBOpmZwBvU0F" | |
}, | |
"source": [ | |
"### 6. Setting up the regressor and splitting the dataset" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"metadata": { | |
"colab": { | |
"base_uri": "https://localhost:8080/" | |
}, | |
"id": "cLYD7VouvI10", | |
"outputId": "40c706fa-0f16-4c6d-df43-a59fe2126309" | |
}, | |
"source": [ | |
"reg = LinearRegression()\n", | |
"x_train,x_test,y_train,y_test = train_test_split(x,y,test_size=1/3,random_state=0)\n", | |
"reg.fit(x_train,y_train)" | |
], | |
"execution_count": 8, | |
"outputs": [ | |
{ | |
"output_type": "execute_result", | |
"data": { | |
"text/plain": [ | |
"LinearRegression(copy_X=True, fit_intercept=True, n_jobs=None, normalize=False)" | |
] | |
}, | |
"metadata": { | |
"tags": [] | |
}, | |
"execution_count": 8 | |
} | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": { | |
"id": "tF1DEnF-vhZy" | |
}, | |
"source": [ | |
"### 7. Obtaining the regressor slope, coefficients and intercepts" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"metadata": { | |
"colab": { | |
"base_uri": "https://localhost:8080/" | |
}, | |
"id": "ljLRlR9QvXnh", | |
"outputId": "e9ee4836-fcff-4cf4-fc39-7f5d12372346" | |
}, | |
"source": [ | |
"y_pred = reg.predict(x_test)\n", | |
"print(y_pred)" | |
], | |
"execution_count": 9, | |
"outputs": [ | |
{ | |
"output_type": "stream", | |
"text": [ | |
"[ 40835.10590871 123079.39940819 65134.55626083 63265.36777221\n", | |
" 115602.64545369 108125.8914992 116537.23969801 64199.96201652\n", | |
" 76349.68719258 100649.1375447 ]\n" | |
], | |
"name": "stdout" | |
} | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"metadata": { | |
"colab": { | |
"base_uri": "https://localhost:8080/" | |
}, | |
"id": "IcDuoBxmvkpA", | |
"outputId": "f935e6dc-00c2-4f79-f4d0-13793b6774f6" | |
}, | |
"source": [ | |
"reg.coef_" | |
], | |
"execution_count": 10, | |
"outputs": [ | |
{ | |
"output_type": "execute_result", | |
"data": { | |
"text/plain": [ | |
"array([9345.94244312])" | |
] | |
}, | |
"metadata": { | |
"tags": [] | |
}, | |
"execution_count": 10 | |
} | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"metadata": { | |
"colab": { | |
"base_uri": "https://localhost:8080/" | |
}, | |
"id": "bS_Hdgg1vsBj", | |
"outputId": "35c84474-1e24-4a0d-cce1-fcaf9b4370a7" | |
}, | |
"source": [ | |
"reg.intercept_" | |
], | |
"execution_count": 11, | |
"outputs": [ | |
{ | |
"output_type": "execute_result", | |
"data": { | |
"text/plain": [ | |
"26816.192244031183" | |
] | |
}, | |
"metadata": { | |
"tags": [] | |
}, | |
"execution_count": 11 | |
} | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": { | |
"id": "yKrkxg6avxlI" | |
}, | |
"source": [ | |
"### 8. Making a prediction" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"metadata": { | |
"colab": { | |
"base_uri": "https://localhost:8080/" | |
}, | |
"id": "okY_E1-QvuEb", | |
"outputId": "58e13970-8c9f-48de-f7cd-36842143ab18" | |
}, | |
"source": [ | |
"#Predict the salary for 8 years of experience\n", | |
"\n", | |
"reg.predict([[8]])" | |
], | |
"execution_count": 13, | |
"outputs": [ | |
{ | |
"output_type": "execute_result", | |
"data": { | |
"text/plain": [ | |
"array([101583.73178901])" | |
] | |
}, | |
"metadata": { | |
"tags": [] | |
}, | |
"execution_count": 13 | |
} | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"metadata": { | |
"colab": { | |
"base_uri": "https://localhost:8080/" | |
}, | |
"id": "CODnKQlkv0Bn", | |
"outputId": "8686973f-4524-4770-bd10-48a09a780be4" | |
}, | |
"source": [ | |
"# Mathematical Verification\n", | |
"print(\"coefficient = \",reg.coef_)\n", | |
"print(\"intercept = \",reg.intercept_)\n", | |
"print(\"value of x = \",8)\n", | |
"print(\"Result (y) = \",(reg.coef_*8 + reg.intercept_))" | |
], | |
"execution_count": 14, | |
"outputs": [ | |
{ | |
"output_type": "stream", | |
"text": [ | |
"coefficient = [9345.94244312]\n", | |
"intercept = 26816.192244031183\n", | |
"value of x = 8\n", | |
"Result (y) = [101583.73178901]\n" | |
], | |
"name": "stdout" | |
} | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": { | |
"id": "9QLYGJk7wq_D" | |
}, | |
"source": [ | |
"#### The predicted results are equal to the mathematically calculated values" | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": { | |
"id": "CSl7fM7Bwy6I" | |
}, | |
"source": [ | |
"### 9. Predicting Accuracy" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"metadata": { | |
"colab": { | |
"base_uri": "https://localhost:8080/" | |
}, | |
"id": "jLboRNcVweOo", | |
"outputId": "32a1f237-ea7d-4686-e15d-b967e32522fb" | |
}, | |
"source": [ | |
"y = reg.coef_*8 + reg.intercept_\n", | |
"yp = reg.predict([[8]])\n", | |
"r = int(100.00 - (y - yp))\n", | |
"print(\"Predicted result accuracy percentage : \",r,\"%\")" | |
], | |
"execution_count": 15, | |
"outputs": [ | |
{ | |
"output_type": "stream", | |
"text": [ | |
"Predicted result accuracy percentage : 100 %\n" | |
], | |
"name": "stdout" | |
} | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": { | |
"id": "0TUIUOS2w6s8" | |
}, | |
"source": [ | |
"### 10. Displaying test values" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"metadata": { | |
"colab": { | |
"base_uri": "https://localhost:8080/" | |
}, | |
"id": "XVWOmBoLwxnN", | |
"outputId": "992d88fe-1c85-44c8-d93a-362ec833e792" | |
}, | |
"source": [ | |
"print(x_test)" | |
], | |
"execution_count": 16, | |
"outputs": [ | |
{ | |
"output_type": "stream", | |
"text": [ | |
"[[ 1.5]\n", | |
" [10.3]\n", | |
" [ 4.1]\n", | |
" [ 3.9]\n", | |
" [ 9.5]\n", | |
" [ 8.7]\n", | |
" [ 9.6]\n", | |
" [ 4. ]\n", | |
" [ 5.3]\n", | |
" [ 7.9]]\n" | |
], | |
"name": "stdout" | |
} | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"metadata": { | |
"colab": { | |
"base_uri": "https://localhost:8080/" | |
}, | |
"id": "pbILzGKBw9P3", | |
"outputId": "d1a39081-6046-4628-b050-18126dbdfb87" | |
}, | |
"source": [ | |
"print(y_test)" | |
], | |
"execution_count": 17, | |
"outputs": [ | |
{ | |
"output_type": "stream", | |
"text": [ | |
"[ 37731. 122391. 57081. 63218. 116969. 109431. 112635. 55794. 83088.\n", | |
" 101302.]\n" | |
], | |
"name": "stdout" | |
} | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": { | |
"id": "tY_hwLrMxDPy" | |
}, | |
"source": [ | |
"### 11. Visualizing the results" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"metadata": { | |
"colab": { | |
"base_uri": "https://localhost:8080/", | |
"height": 503 | |
}, | |
"id": "_R5cBdAAw_7u", | |
"outputId": "cb21107e-2ce5-49cd-ee65-c2025aaa01c9" | |
}, | |
"source": [ | |
"plt.figure(figsize=(20,7))\n", | |
"plt.scatter(x_train,y_train,color='red')\n", | |
"plt.plot(x_train,reg.predict(x_train),color='blue')\n", | |
"plt.xlabel('Years of Experience',size=18,color='indigo')\n", | |
"plt.ylabel('Salary',size=18,color='indigo')\n", | |
"plt.title('Salary vs Experience regression graph (Training set)\\n',size=24,color='orange',fontweight='bold')\n", | |
"plt.show()" | |
], | |
"execution_count": 18, | |
"outputs": [ | |
{ | |
"output_type": "display_data", | |
"data": { | |
"image/png": "\n", | |
"text/plain": [ | |
"<Figure size 1440x504 with 1 Axes>" | |
] | |
}, | |
"metadata": { | |
"tags": [], | |
"needs_background": "light" | |
} | |
} | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"metadata": { | |
"colab": { | |
"base_uri": "https://localhost:8080/", | |
"height": 503 | |
}, | |
"id": "IzMplqERxI9Z", | |
"outputId": "ea00279c-92a7-41c5-f448-6c47c37ca32d" | |
}, | |
"source": [ | |
"plt.figure(figsize=(20,7))\n", | |
"plt.scatter(x_test,y_test,color='green')\n", | |
"plt.plot(x_train,reg.predict(x_train),color='black')\n", | |
"plt.xlabel('Years of Experience',size=18,color='indigo')\n", | |
"plt.ylabel('Salary',size=18,color='indigo')\n", | |
"plt.title('Salary vs Experience regression graph (Training set)\\n',size=24,color='green',fontweight='bold')\n", | |
"plt.show()" | |
], | |
"execution_count": 19, | |
"outputs": [ | |
{ | |
"output_type": "display_data", | |
"data": { | |
"image/png": "\n", | |
"text/plain": [ | |
"<Figure size 1440x504 with 1 Axes>" | |
] | |
}, | |
"metadata": { | |
"tags": [], | |
"needs_background": "light" | |
} | |
} | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"metadata": { | |
"id": "pvXsE4VOxNEg" | |
}, | |
"source": [ | |
"" | |
], | |
"execution_count": null, | |
"outputs": [] | |
} | |
] | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment