Created
July 28, 2018 23:03
-
-
Save jcms2665/7f93614d58ca352b01ae59a78c3e8d03 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
{ | |
"cells": [ | |
{ | |
"cell_type": "code", | |
"execution_count": 1, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"name": "stdout", | |
"output_type": "stream", | |
"text": [ | |
"2018-07-19T19:04:58-05:00\n", | |
"\n", | |
"CPython 3.6.5\n", | |
"IPython 6.4.0\n", | |
"\n", | |
"compiler : MSC v.1900 64 bit (AMD64)\n", | |
"system : Windows\n", | |
"release : 10\n", | |
"machine : AMD64\n", | |
"processor : AMD64 Family 21 Model 112 Stepping 0, AuthenticAMD\n", | |
"CPU cores : 2\n", | |
"interpreter: 64bit\n" | |
] | |
} | |
], | |
"source": [ | |
"%load_ext watermark\n", | |
"%watermark" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 53, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"import pandas as pd\n", | |
"import numpy as np" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 54, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"%matplotlib inline\n", | |
"import matplotlib.pyplot as plt\n", | |
"plt.rcParams['figure.figsize']=(12,12)\n", | |
"plt.rcParams['font.size']=16" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 5, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"name": "stdout", | |
"output_type": "stream", | |
"text": [ | |
" El volumen de la unidad C no tiene etiqueta.\n", | |
" El número de serie del volumen es: C893-DDFA\n", | |
"\n", | |
" Directorio de C:\\Users\\jmartinez\\Desktop\\Escritorio2\\AnacondaJC\n", | |
"\n", | |
"19/07/2018 07:08 p. m. <DIR> .\n", | |
"19/07/2018 07:08 p. m. <DIR> ..\n", | |
"19/07/2018 07:04 p. m. <DIR> .ipynb_checkpoints\n", | |
"07/07/2018 06:59 p. m. <DIR> curso_data_science-master\n", | |
"17/07/2018 09:22 p. m. 10,557 ejercicio.ipynb\n", | |
"12/07/2018 11:54 p. m. 6,603 phyton y numpy.ipynb\n", | |
"12/07/2018 11:17 p. m. 2,568 primer programa con jupyter.ipynb\n", | |
"13/07/2018 12:06 a. m. 2,963 Untitled.ipynb\n", | |
"19/07/2018 07:08 p. m. 1,510 Untitled1.ipynb\n", | |
"17/07/2018 02:46 p. m. 16,848,926 vehicles.csv\n", | |
"18/07/2018 09:29 a. m. 1,468,901 vehicles.csv.zip\n", | |
"17/07/2018 09:15 p. m. 4,269,212 vehiculos-original.csv\n", | |
"17/07/2018 09:08 p. m. 1,199,229 watermarrk.ipynb\n", | |
"16/07/2018 05:25 p. m. 224,831 wcp.png\n", | |
" 10 archivos 24,035,300 bytes\n", | |
" 4 dirs 720,719,024,128 bytes libres\n" | |
] | |
} | |
], | |
"source": [ | |
"ls" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 8, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"vehiculos=pd.read_csv(\"C:/Users/jmartinez/Desktop/Escritorio2/AnacondaJC/curso_data_science-master/Secciones/Seccion5.Machine_Learning/Regresion_Lineal_practica/data/vehiculos_procesado.csv\",\n", | |
" usecols=[\"consumo\",\"co2\",\"cilindros\",\"desplazamiento\"])" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 9, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"litros_por_galon=3.78541" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 10, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"vehiculos[\"consumo_litos_milla\"]=litros_por_galon/vehiculos.consumo" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 11, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/plain": [ | |
"(35539, 5)" | |
] | |
}, | |
"execution_count": 11, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"vehiculos.shape" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 12, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/html": [ | |
"<div>\n", | |
"<style scoped>\n", | |
" .dataframe tbody tr th:only-of-type {\n", | |
" vertical-align: middle;\n", | |
" }\n", | |
"\n", | |
" .dataframe tbody tr th {\n", | |
" vertical-align: top;\n", | |
" }\n", | |
"\n", | |
" .dataframe thead th {\n", | |
" text-align: right;\n", | |
" }\n", | |
"</style>\n", | |
"<table border=\"1\" class=\"dataframe\">\n", | |
" <thead>\n", | |
" <tr style=\"text-align: right;\">\n", | |
" <th></th>\n", | |
" <th>desplazamiento</th>\n", | |
" <th>cilindros</th>\n", | |
" <th>consumo</th>\n", | |
" <th>co2</th>\n", | |
" <th>consumo_litos_milla</th>\n", | |
" </tr>\n", | |
" </thead>\n", | |
" <tbody>\n", | |
" <tr>\n", | |
" <th>0</th>\n", | |
" <td>2.5</td>\n", | |
" <td>4.0</td>\n", | |
" <td>17</td>\n", | |
" <td>522.764706</td>\n", | |
" <td>0.222671</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>1</th>\n", | |
" <td>4.2</td>\n", | |
" <td>6.0</td>\n", | |
" <td>13</td>\n", | |
" <td>683.615385</td>\n", | |
" <td>0.291185</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>2</th>\n", | |
" <td>2.5</td>\n", | |
" <td>4.0</td>\n", | |
" <td>16</td>\n", | |
" <td>555.437500</td>\n", | |
" <td>0.236588</td>\n", | |
" </tr>\n", | |
" </tbody>\n", | |
"</table>\n", | |
"</div>" | |
], | |
"text/plain": [ | |
" desplazamiento cilindros consumo co2 consumo_litos_milla\n", | |
"0 2.5 4.0 17 522.764706 0.222671\n", | |
"1 4.2 6.0 13 683.615385 0.291185\n", | |
"2 2.5 4.0 16 555.437500 0.236588" | |
] | |
}, | |
"execution_count": 12, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"vehiculos.head(3)" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 13, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"pct_entrenamiento=0.8" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 14, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"vehiculos_training=vehiculos.sample(frac=pct_entrenamiento)" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 15, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"vehiculos_test=vehiculos[~vehiculos.index.isin(vehiculos_training.index)]" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 16, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"name": "stdout", | |
"output_type": "stream", | |
"text": [ | |
"(28431, 5)\n" | |
] | |
} | |
], | |
"source": [ | |
"print(vehiculos_training.shape)" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 17, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"name": "stdout", | |
"output_type": "stream", | |
"text": [ | |
"(7108, 5)\n" | |
] | |
} | |
], | |
"source": [ | |
"print(vehiculos_test.shape)" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 45, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"variables_independientes=[\"desplazamiento\",\"cilindros\",\"consumo_litos_milla\"]" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": null, | |
"metadata": {}, | |
"outputs": [], | |
"source": [] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 46, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"variable_dependiente=\"co2\"" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 47, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"name": "stderr", | |
"output_type": "stream", | |
"text": [ | |
"C:\\Users\\jmartinez\\AppData\\Local\\Continuum\\anaconda3\\lib\\site-packages\\ipykernel_launcher.py:1: FutureWarning: Method .as_matrix will be removed in a future version. Use .values instead.\n", | |
" \"\"\"Entry point for launching an IPython kernel.\n" | |
] | |
} | |
], | |
"source": [ | |
"X = vehiculos_training[variables_independientes].as_matrix()" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 48, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"name": "stderr", | |
"output_type": "stream", | |
"text": [ | |
"C:\\Users\\jmartinez\\AppData\\Local\\Continuum\\anaconda3\\lib\\site-packages\\ipykernel_launcher.py:1: FutureWarning: Method .as_matrix will be removed in a future version. Use .values instead.\n", | |
" \"\"\"Entry point for launching an IPython kernel.\n" | |
] | |
} | |
], | |
"source": [ | |
"Y = vehiculos_training[variable_dependiente].as_matrix()" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 49, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"X_T = X.T" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 52, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/plain": [ | |
"array([[3.1 , 6. , 0.21030056],\n", | |
" [5. , 8. , 0.27038643],\n", | |
" [4.3 , 6. , 0.25236067],\n", | |
" ...,\n", | |
" [3.3 , 6. , 0.18025762],\n", | |
" [4.3 , 6. , 0.23658813],\n", | |
" [3. , 6. , 0.22267118]])" | |
] | |
}, | |
"execution_count": 52, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"X" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 55, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"betas = np.linalg.inv(X_T@X)@X_T@Y" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 56, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/plain": [ | |
"array([ 3.74255395e+00, -5.40919129e-01, 2.30466918e+03])" | |
] | |
}, | |
"execution_count": 56, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"betas" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 57, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/plain": [ | |
"470.6694731294241" | |
] | |
}, | |
"execution_count": 57, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"Y.mean()" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 58, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"name": "stderr", | |
"output_type": "stream", | |
"text": [ | |
"C:\\Users\\jmartinez\\AppData\\Local\\Continuum\\anaconda3\\lib\\site-packages\\ipykernel_launcher.py:1: FutureWarning: Method .as_matrix will be removed in a future version. Use .values instead.\n", | |
" \"\"\"Entry point for launching an IPython kernel.\n" | |
] | |
}, | |
{ | |
"data": { | |
"text/plain": [ | |
"0.36737696715539414" | |
] | |
}, | |
"execution_count": 58, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"alfa=Y.mean()-np.dot(betas,vehiculos_training[variables_independientes].mean().as_matrix())\n", | |
"alfa" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 59, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"def predecir(r):\n", | |
" return alfa + np.dot(betas, r.values)" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": null, | |
"metadata": {}, | |
"outputs": [], | |
"source": [] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": null, | |
"metadata": {}, | |
"outputs": [], | |
"source": [] | |
} | |
], | |
"metadata": { | |
"kernelspec": { | |
"display_name": "Python 3", | |
"language": "python", | |
"name": "python3" | |
}, | |
"language_info": { | |
"codemirror_mode": { | |
"name": "ipython", | |
"version": 3 | |
}, | |
"file_extension": ".py", | |
"mimetype": "text/x-python", | |
"name": "python", | |
"nbconvert_exporter": "python", | |
"pygments_lexer": "ipython3", | |
"version": "3.6.5" | |
} | |
}, | |
"nbformat": 4, | |
"nbformat_minor": 2 | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment