Skip to content

Instantly share code, notes, and snippets.

@jcms2665
Created July 28, 2018 23:03
Show Gist options
  • Save jcms2665/7f93614d58ca352b01ae59a78c3e8d03 to your computer and use it in GitHub Desktop.
Save jcms2665/7f93614d58ca352b01ae59a78c3e8d03 to your computer and use it in GitHub Desktop.
Display the source blob
Display the rendered blob
Raw
{
"cells": [
{
"cell_type": "code",
"execution_count": 1,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"2018-07-19T19:04:58-05:00\n",
"\n",
"CPython 3.6.5\n",
"IPython 6.4.0\n",
"\n",
"compiler : MSC v.1900 64 bit (AMD64)\n",
"system : Windows\n",
"release : 10\n",
"machine : AMD64\n",
"processor : AMD64 Family 21 Model 112 Stepping 0, AuthenticAMD\n",
"CPU cores : 2\n",
"interpreter: 64bit\n"
]
}
],
"source": [
"%load_ext watermark\n",
"%watermark"
]
},
{
"cell_type": "code",
"execution_count": 53,
"metadata": {},
"outputs": [],
"source": [
"import pandas as pd\n",
"import numpy as np"
]
},
{
"cell_type": "code",
"execution_count": 54,
"metadata": {},
"outputs": [],
"source": [
"%matplotlib inline\n",
"import matplotlib.pyplot as plt\n",
"plt.rcParams['figure.figsize']=(12,12)\n",
"plt.rcParams['font.size']=16"
]
},
{
"cell_type": "code",
"execution_count": 5,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
" El volumen de la unidad C no tiene etiqueta.\n",
" El número de serie del volumen es: C893-DDFA\n",
"\n",
" Directorio de C:\\Users\\jmartinez\\Desktop\\Escritorio2\\AnacondaJC\n",
"\n",
"19/07/2018 07:08 p. m. <DIR> .\n",
"19/07/2018 07:08 p. m. <DIR> ..\n",
"19/07/2018 07:04 p. m. <DIR> .ipynb_checkpoints\n",
"07/07/2018 06:59 p. m. <DIR> curso_data_science-master\n",
"17/07/2018 09:22 p. m. 10,557 ejercicio.ipynb\n",
"12/07/2018 11:54 p. m. 6,603 phyton y numpy.ipynb\n",
"12/07/2018 11:17 p. m. 2,568 primer programa con jupyter.ipynb\n",
"13/07/2018 12:06 a. m. 2,963 Untitled.ipynb\n",
"19/07/2018 07:08 p. m. 1,510 Untitled1.ipynb\n",
"17/07/2018 02:46 p. m. 16,848,926 vehicles.csv\n",
"18/07/2018 09:29 a. m. 1,468,901 vehicles.csv.zip\n",
"17/07/2018 09:15 p. m. 4,269,212 vehiculos-original.csv\n",
"17/07/2018 09:08 p. m. 1,199,229 watermarrk.ipynb\n",
"16/07/2018 05:25 p. m. 224,831 wcp.png\n",
" 10 archivos 24,035,300 bytes\n",
" 4 dirs 720,719,024,128 bytes libres\n"
]
}
],
"source": [
"ls"
]
},
{
"cell_type": "code",
"execution_count": 8,
"metadata": {},
"outputs": [],
"source": [
"vehiculos=pd.read_csv(\"C:/Users/jmartinez/Desktop/Escritorio2/AnacondaJC/curso_data_science-master/Secciones/Seccion5.Machine_Learning/Regresion_Lineal_practica/data/vehiculos_procesado.csv\",\n",
" usecols=[\"consumo\",\"co2\",\"cilindros\",\"desplazamiento\"])"
]
},
{
"cell_type": "code",
"execution_count": 9,
"metadata": {},
"outputs": [],
"source": [
"litros_por_galon=3.78541"
]
},
{
"cell_type": "code",
"execution_count": 10,
"metadata": {},
"outputs": [],
"source": [
"vehiculos[\"consumo_litos_milla\"]=litros_por_galon/vehiculos.consumo"
]
},
{
"cell_type": "code",
"execution_count": 11,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"(35539, 5)"
]
},
"execution_count": 11,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"vehiculos.shape"
]
},
{
"cell_type": "code",
"execution_count": 12,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>desplazamiento</th>\n",
" <th>cilindros</th>\n",
" <th>consumo</th>\n",
" <th>co2</th>\n",
" <th>consumo_litos_milla</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>2.5</td>\n",
" <td>4.0</td>\n",
" <td>17</td>\n",
" <td>522.764706</td>\n",
" <td>0.222671</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>4.2</td>\n",
" <td>6.0</td>\n",
" <td>13</td>\n",
" <td>683.615385</td>\n",
" <td>0.291185</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>2.5</td>\n",
" <td>4.0</td>\n",
" <td>16</td>\n",
" <td>555.437500</td>\n",
" <td>0.236588</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" desplazamiento cilindros consumo co2 consumo_litos_milla\n",
"0 2.5 4.0 17 522.764706 0.222671\n",
"1 4.2 6.0 13 683.615385 0.291185\n",
"2 2.5 4.0 16 555.437500 0.236588"
]
},
"execution_count": 12,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"vehiculos.head(3)"
]
},
{
"cell_type": "code",
"execution_count": 13,
"metadata": {},
"outputs": [],
"source": [
"pct_entrenamiento=0.8"
]
},
{
"cell_type": "code",
"execution_count": 14,
"metadata": {},
"outputs": [],
"source": [
"vehiculos_training=vehiculos.sample(frac=pct_entrenamiento)"
]
},
{
"cell_type": "code",
"execution_count": 15,
"metadata": {},
"outputs": [],
"source": [
"vehiculos_test=vehiculos[~vehiculos.index.isin(vehiculos_training.index)]"
]
},
{
"cell_type": "code",
"execution_count": 16,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"(28431, 5)\n"
]
}
],
"source": [
"print(vehiculos_training.shape)"
]
},
{
"cell_type": "code",
"execution_count": 17,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"(7108, 5)\n"
]
}
],
"source": [
"print(vehiculos_test.shape)"
]
},
{
"cell_type": "code",
"execution_count": 45,
"metadata": {},
"outputs": [],
"source": [
"variables_independientes=[\"desplazamiento\",\"cilindros\",\"consumo_litos_milla\"]"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": []
},
{
"cell_type": "code",
"execution_count": 46,
"metadata": {},
"outputs": [],
"source": [
"variable_dependiente=\"co2\""
]
},
{
"cell_type": "code",
"execution_count": 47,
"metadata": {},
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
"C:\\Users\\jmartinez\\AppData\\Local\\Continuum\\anaconda3\\lib\\site-packages\\ipykernel_launcher.py:1: FutureWarning: Method .as_matrix will be removed in a future version. Use .values instead.\n",
" \"\"\"Entry point for launching an IPython kernel.\n"
]
}
],
"source": [
"X = vehiculos_training[variables_independientes].as_matrix()"
]
},
{
"cell_type": "code",
"execution_count": 48,
"metadata": {},
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
"C:\\Users\\jmartinez\\AppData\\Local\\Continuum\\anaconda3\\lib\\site-packages\\ipykernel_launcher.py:1: FutureWarning: Method .as_matrix will be removed in a future version. Use .values instead.\n",
" \"\"\"Entry point for launching an IPython kernel.\n"
]
}
],
"source": [
"Y = vehiculos_training[variable_dependiente].as_matrix()"
]
},
{
"cell_type": "code",
"execution_count": 49,
"metadata": {},
"outputs": [],
"source": [
"X_T = X.T"
]
},
{
"cell_type": "code",
"execution_count": 52,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"array([[3.1 , 6. , 0.21030056],\n",
" [5. , 8. , 0.27038643],\n",
" [4.3 , 6. , 0.25236067],\n",
" ...,\n",
" [3.3 , 6. , 0.18025762],\n",
" [4.3 , 6. , 0.23658813],\n",
" [3. , 6. , 0.22267118]])"
]
},
"execution_count": 52,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"X"
]
},
{
"cell_type": "code",
"execution_count": 55,
"metadata": {},
"outputs": [],
"source": [
"betas = np.linalg.inv(X_T@X)@X_T@Y"
]
},
{
"cell_type": "code",
"execution_count": 56,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"array([ 3.74255395e+00, -5.40919129e-01, 2.30466918e+03])"
]
},
"execution_count": 56,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"betas"
]
},
{
"cell_type": "code",
"execution_count": 57,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"470.6694731294241"
]
},
"execution_count": 57,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"Y.mean()"
]
},
{
"cell_type": "code",
"execution_count": 58,
"metadata": {},
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
"C:\\Users\\jmartinez\\AppData\\Local\\Continuum\\anaconda3\\lib\\site-packages\\ipykernel_launcher.py:1: FutureWarning: Method .as_matrix will be removed in a future version. Use .values instead.\n",
" \"\"\"Entry point for launching an IPython kernel.\n"
]
},
{
"data": {
"text/plain": [
"0.36737696715539414"
]
},
"execution_count": 58,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"alfa=Y.mean()-np.dot(betas,vehiculos_training[variables_independientes].mean().as_matrix())\n",
"alfa"
]
},
{
"cell_type": "code",
"execution_count": 59,
"metadata": {},
"outputs": [],
"source": [
"def predecir(r):\n",
" return alfa + np.dot(betas, r.values)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": []
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": []
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.6.5"
}
},
"nbformat": 4,
"nbformat_minor": 2
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment