Created
April 14, 2020 21:42
-
-
Save FavioVazquez/c8d4ee1c04b1fcdaab681044b514a15e to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
{ | |
"nbformat": 4, | |
"nbformat_minor": 0, | |
"metadata": { | |
"colab": { | |
"name": "Untitled1.ipynb", | |
"provenance": [] | |
}, | |
"kernelspec": { | |
"name": "python3", | |
"display_name": "Python 3" | |
} | |
}, | |
"cells": [ | |
{ | |
"cell_type": "code", | |
"metadata": { | |
"id": "NBXEXw3tn7rI", | |
"colab_type": "code", | |
"colab": {} | |
}, | |
"source": [ | |
"import pandas as pd" | |
], | |
"execution_count": 0, | |
"outputs": [] | |
}, | |
{ | |
"cell_type": "code", | |
"metadata": { | |
"id": "TP1GRmepn9qV", | |
"colab_type": "code", | |
"colab": {} | |
}, | |
"source": [ | |
"df = pd.read_csv(\"kc_house_data.csv\")" | |
], | |
"execution_count": 0, | |
"outputs": [] | |
}, | |
{ | |
"cell_type": "code", | |
"metadata": { | |
"id": "TcvfqTQXoYXX", | |
"colab_type": "code", | |
"colab": { | |
"base_uri": "https://localhost:8080/", | |
"height": 224 | |
}, | |
"outputId": "a25b73ed-838a-4bff-9999-630cf190955b" | |
}, | |
"source": [ | |
"df.head()" | |
], | |
"execution_count": 3, | |
"outputs": [ | |
{ | |
"output_type": "execute_result", | |
"data": { | |
"text/html": [ | |
"<div>\n", | |
"<style scoped>\n", | |
" .dataframe tbody tr th:only-of-type {\n", | |
" vertical-align: middle;\n", | |
" }\n", | |
"\n", | |
" .dataframe tbody tr th {\n", | |
" vertical-align: top;\n", | |
" }\n", | |
"\n", | |
" .dataframe thead th {\n", | |
" text-align: right;\n", | |
" }\n", | |
"</style>\n", | |
"<table border=\"1\" class=\"dataframe\">\n", | |
" <thead>\n", | |
" <tr style=\"text-align: right;\">\n", | |
" <th></th>\n", | |
" <th>id</th>\n", | |
" <th>date</th>\n", | |
" <th>price</th>\n", | |
" <th>bedrooms</th>\n", | |
" <th>bathrooms</th>\n", | |
" <th>sqft_living</th>\n", | |
" <th>sqft_lot</th>\n", | |
" <th>floors</th>\n", | |
" <th>waterfront</th>\n", | |
" <th>view</th>\n", | |
" <th>condition</th>\n", | |
" <th>grade</th>\n", | |
" <th>sqft_above</th>\n", | |
" <th>sqft_basement</th>\n", | |
" <th>yr_built</th>\n", | |
" <th>yr_renovated</th>\n", | |
" <th>zipcode</th>\n", | |
" <th>lat</th>\n", | |
" <th>long</th>\n", | |
" <th>sqft_living15</th>\n", | |
" <th>sqft_lot15</th>\n", | |
" </tr>\n", | |
" </thead>\n", | |
" <tbody>\n", | |
" <tr>\n", | |
" <th>0</th>\n", | |
" <td>7129300520</td>\n", | |
" <td>20141013T000000</td>\n", | |
" <td>221900.0</td>\n", | |
" <td>3</td>\n", | |
" <td>1.00</td>\n", | |
" <td>1180</td>\n", | |
" <td>5650</td>\n", | |
" <td>1.0</td>\n", | |
" <td>0</td>\n", | |
" <td>0</td>\n", | |
" <td>3</td>\n", | |
" <td>7</td>\n", | |
" <td>1180</td>\n", | |
" <td>0</td>\n", | |
" <td>1955</td>\n", | |
" <td>0</td>\n", | |
" <td>98178</td>\n", | |
" <td>47.5112</td>\n", | |
" <td>-122.257</td>\n", | |
" <td>1340</td>\n", | |
" <td>5650</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>1</th>\n", | |
" <td>6414100192</td>\n", | |
" <td>20141209T000000</td>\n", | |
" <td>538000.0</td>\n", | |
" <td>3</td>\n", | |
" <td>2.25</td>\n", | |
" <td>2570</td>\n", | |
" <td>7242</td>\n", | |
" <td>2.0</td>\n", | |
" <td>0</td>\n", | |
" <td>0</td>\n", | |
" <td>3</td>\n", | |
" <td>7</td>\n", | |
" <td>2170</td>\n", | |
" <td>400</td>\n", | |
" <td>1951</td>\n", | |
" <td>1991</td>\n", | |
" <td>98125</td>\n", | |
" <td>47.7210</td>\n", | |
" <td>-122.319</td>\n", | |
" <td>1690</td>\n", | |
" <td>7639</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>2</th>\n", | |
" <td>5631500400</td>\n", | |
" <td>20150225T000000</td>\n", | |
" <td>180000.0</td>\n", | |
" <td>2</td>\n", | |
" <td>1.00</td>\n", | |
" <td>770</td>\n", | |
" <td>10000</td>\n", | |
" <td>1.0</td>\n", | |
" <td>0</td>\n", | |
" <td>0</td>\n", | |
" <td>3</td>\n", | |
" <td>6</td>\n", | |
" <td>770</td>\n", | |
" <td>0</td>\n", | |
" <td>1933</td>\n", | |
" <td>0</td>\n", | |
" <td>98028</td>\n", | |
" <td>47.7379</td>\n", | |
" <td>-122.233</td>\n", | |
" <td>2720</td>\n", | |
" <td>8062</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>3</th>\n", | |
" <td>2487200875</td>\n", | |
" <td>20141209T000000</td>\n", | |
" <td>604000.0</td>\n", | |
" <td>4</td>\n", | |
" <td>3.00</td>\n", | |
" <td>1960</td>\n", | |
" <td>5000</td>\n", | |
" <td>1.0</td>\n", | |
" <td>0</td>\n", | |
" <td>0</td>\n", | |
" <td>5</td>\n", | |
" <td>7</td>\n", | |
" <td>1050</td>\n", | |
" <td>910</td>\n", | |
" <td>1965</td>\n", | |
" <td>0</td>\n", | |
" <td>98136</td>\n", | |
" <td>47.5208</td>\n", | |
" <td>-122.393</td>\n", | |
" <td>1360</td>\n", | |
" <td>5000</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>4</th>\n", | |
" <td>1954400510</td>\n", | |
" <td>20150218T000000</td>\n", | |
" <td>510000.0</td>\n", | |
" <td>3</td>\n", | |
" <td>2.00</td>\n", | |
" <td>1680</td>\n", | |
" <td>8080</td>\n", | |
" <td>1.0</td>\n", | |
" <td>0</td>\n", | |
" <td>0</td>\n", | |
" <td>3</td>\n", | |
" <td>8</td>\n", | |
" <td>1680</td>\n", | |
" <td>0</td>\n", | |
" <td>1987</td>\n", | |
" <td>0</td>\n", | |
" <td>98074</td>\n", | |
" <td>47.6168</td>\n", | |
" <td>-122.045</td>\n", | |
" <td>1800</td>\n", | |
" <td>7503</td>\n", | |
" </tr>\n", | |
" </tbody>\n", | |
"</table>\n", | |
"</div>" | |
], | |
"text/plain": [ | |
" id date price ... long sqft_living15 sqft_lot15\n", | |
"0 7129300520 20141013T000000 221900.0 ... -122.257 1340 5650\n", | |
"1 6414100192 20141209T000000 538000.0 ... -122.319 1690 7639\n", | |
"2 5631500400 20150225T000000 180000.0 ... -122.233 2720 8062\n", | |
"3 2487200875 20141209T000000 604000.0 ... -122.393 1360 5000\n", | |
"4 1954400510 20150218T000000 510000.0 ... -122.045 1800 7503\n", | |
"\n", | |
"[5 rows x 21 columns]" | |
] | |
}, | |
"metadata": { | |
"tags": [] | |
}, | |
"execution_count": 3 | |
} | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"metadata": { | |
"id": "xCQUrg79oZWx", | |
"colab_type": "code", | |
"colab": { | |
"base_uri": "https://localhost:8080/", | |
"height": 317 | |
}, | |
"outputId": "589cd0fb-5b1f-4926-9948-7addae49eca8" | |
}, | |
"source": [ | |
"df.describe()" | |
], | |
"execution_count": 4, | |
"outputs": [ | |
{ | |
"output_type": "execute_result", | |
"data": { | |
"text/html": [ | |
"<div>\n", | |
"<style scoped>\n", | |
" .dataframe tbody tr th:only-of-type {\n", | |
" vertical-align: middle;\n", | |
" }\n", | |
"\n", | |
" .dataframe tbody tr th {\n", | |
" vertical-align: top;\n", | |
" }\n", | |
"\n", | |
" .dataframe thead th {\n", | |
" text-align: right;\n", | |
" }\n", | |
"</style>\n", | |
"<table border=\"1\" class=\"dataframe\">\n", | |
" <thead>\n", | |
" <tr style=\"text-align: right;\">\n", | |
" <th></th>\n", | |
" <th>id</th>\n", | |
" <th>price</th>\n", | |
" <th>bedrooms</th>\n", | |
" <th>bathrooms</th>\n", | |
" <th>sqft_living</th>\n", | |
" <th>sqft_lot</th>\n", | |
" <th>floors</th>\n", | |
" <th>waterfront</th>\n", | |
" <th>view</th>\n", | |
" <th>condition</th>\n", | |
" <th>grade</th>\n", | |
" <th>sqft_above</th>\n", | |
" <th>sqft_basement</th>\n", | |
" <th>yr_built</th>\n", | |
" <th>yr_renovated</th>\n", | |
" <th>zipcode</th>\n", | |
" <th>lat</th>\n", | |
" <th>long</th>\n", | |
" <th>sqft_living15</th>\n", | |
" <th>sqft_lot15</th>\n", | |
" </tr>\n", | |
" </thead>\n", | |
" <tbody>\n", | |
" <tr>\n", | |
" <th>count</th>\n", | |
" <td>2.161300e+04</td>\n", | |
" <td>2.161300e+04</td>\n", | |
" <td>21613.000000</td>\n", | |
" <td>21613.000000</td>\n", | |
" <td>21613.000000</td>\n", | |
" <td>2.161300e+04</td>\n", | |
" <td>21613.000000</td>\n", | |
" <td>21613.000000</td>\n", | |
" <td>21613.000000</td>\n", | |
" <td>21613.000000</td>\n", | |
" <td>21613.000000</td>\n", | |
" <td>21613.000000</td>\n", | |
" <td>21613.000000</td>\n", | |
" <td>21613.000000</td>\n", | |
" <td>21613.000000</td>\n", | |
" <td>21613.000000</td>\n", | |
" <td>21613.000000</td>\n", | |
" <td>21613.000000</td>\n", | |
" <td>21613.000000</td>\n", | |
" <td>21613.000000</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>mean</th>\n", | |
" <td>4.580302e+09</td>\n", | |
" <td>5.400881e+05</td>\n", | |
" <td>3.370842</td>\n", | |
" <td>2.114757</td>\n", | |
" <td>2079.899736</td>\n", | |
" <td>1.510697e+04</td>\n", | |
" <td>1.494309</td>\n", | |
" <td>0.007542</td>\n", | |
" <td>0.234303</td>\n", | |
" <td>3.409430</td>\n", | |
" <td>7.656873</td>\n", | |
" <td>1788.390691</td>\n", | |
" <td>291.509045</td>\n", | |
" <td>1971.005136</td>\n", | |
" <td>84.402258</td>\n", | |
" <td>98077.939805</td>\n", | |
" <td>47.560053</td>\n", | |
" <td>-122.213896</td>\n", | |
" <td>1986.552492</td>\n", | |
" <td>12768.455652</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>std</th>\n", | |
" <td>2.876566e+09</td>\n", | |
" <td>3.671272e+05</td>\n", | |
" <td>0.930062</td>\n", | |
" <td>0.770163</td>\n", | |
" <td>918.440897</td>\n", | |
" <td>4.142051e+04</td>\n", | |
" <td>0.539989</td>\n", | |
" <td>0.086517</td>\n", | |
" <td>0.766318</td>\n", | |
" <td>0.650743</td>\n", | |
" <td>1.175459</td>\n", | |
" <td>828.090978</td>\n", | |
" <td>442.575043</td>\n", | |
" <td>29.373411</td>\n", | |
" <td>401.679240</td>\n", | |
" <td>53.505026</td>\n", | |
" <td>0.138564</td>\n", | |
" <td>0.140828</td>\n", | |
" <td>685.391304</td>\n", | |
" <td>27304.179631</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>min</th>\n", | |
" <td>1.000102e+06</td>\n", | |
" <td>7.500000e+04</td>\n", | |
" <td>0.000000</td>\n", | |
" <td>0.000000</td>\n", | |
" <td>290.000000</td>\n", | |
" <td>5.200000e+02</td>\n", | |
" <td>1.000000</td>\n", | |
" <td>0.000000</td>\n", | |
" <td>0.000000</td>\n", | |
" <td>1.000000</td>\n", | |
" <td>1.000000</td>\n", | |
" <td>290.000000</td>\n", | |
" <td>0.000000</td>\n", | |
" <td>1900.000000</td>\n", | |
" <td>0.000000</td>\n", | |
" <td>98001.000000</td>\n", | |
" <td>47.155900</td>\n", | |
" <td>-122.519000</td>\n", | |
" <td>399.000000</td>\n", | |
" <td>651.000000</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>25%</th>\n", | |
" <td>2.123049e+09</td>\n", | |
" <td>3.219500e+05</td>\n", | |
" <td>3.000000</td>\n", | |
" <td>1.750000</td>\n", | |
" <td>1427.000000</td>\n", | |
" <td>5.040000e+03</td>\n", | |
" <td>1.000000</td>\n", | |
" <td>0.000000</td>\n", | |
" <td>0.000000</td>\n", | |
" <td>3.000000</td>\n", | |
" <td>7.000000</td>\n", | |
" <td>1190.000000</td>\n", | |
" <td>0.000000</td>\n", | |
" <td>1951.000000</td>\n", | |
" <td>0.000000</td>\n", | |
" <td>98033.000000</td>\n", | |
" <td>47.471000</td>\n", | |
" <td>-122.328000</td>\n", | |
" <td>1490.000000</td>\n", | |
" <td>5100.000000</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>50%</th>\n", | |
" <td>3.904930e+09</td>\n", | |
" <td>4.500000e+05</td>\n", | |
" <td>3.000000</td>\n", | |
" <td>2.250000</td>\n", | |
" <td>1910.000000</td>\n", | |
" <td>7.618000e+03</td>\n", | |
" <td>1.500000</td>\n", | |
" <td>0.000000</td>\n", | |
" <td>0.000000</td>\n", | |
" <td>3.000000</td>\n", | |
" <td>7.000000</td>\n", | |
" <td>1560.000000</td>\n", | |
" <td>0.000000</td>\n", | |
" <td>1975.000000</td>\n", | |
" <td>0.000000</td>\n", | |
" <td>98065.000000</td>\n", | |
" <td>47.571800</td>\n", | |
" <td>-122.230000</td>\n", | |
" <td>1840.000000</td>\n", | |
" <td>7620.000000</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>75%</th>\n", | |
" <td>7.308900e+09</td>\n", | |
" <td>6.450000e+05</td>\n", | |
" <td>4.000000</td>\n", | |
" <td>2.500000</td>\n", | |
" <td>2550.000000</td>\n", | |
" <td>1.068800e+04</td>\n", | |
" <td>2.000000</td>\n", | |
" <td>0.000000</td>\n", | |
" <td>0.000000</td>\n", | |
" <td>4.000000</td>\n", | |
" <td>8.000000</td>\n", | |
" <td>2210.000000</td>\n", | |
" <td>560.000000</td>\n", | |
" <td>1997.000000</td>\n", | |
" <td>0.000000</td>\n", | |
" <td>98118.000000</td>\n", | |
" <td>47.678000</td>\n", | |
" <td>-122.125000</td>\n", | |
" <td>2360.000000</td>\n", | |
" <td>10083.000000</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>max</th>\n", | |
" <td>9.900000e+09</td>\n", | |
" <td>7.700000e+06</td>\n", | |
" <td>33.000000</td>\n", | |
" <td>8.000000</td>\n", | |
" <td>13540.000000</td>\n", | |
" <td>1.651359e+06</td>\n", | |
" <td>3.500000</td>\n", | |
" <td>1.000000</td>\n", | |
" <td>4.000000</td>\n", | |
" <td>5.000000</td>\n", | |
" <td>13.000000</td>\n", | |
" <td>9410.000000</td>\n", | |
" <td>4820.000000</td>\n", | |
" <td>2015.000000</td>\n", | |
" <td>2015.000000</td>\n", | |
" <td>98199.000000</td>\n", | |
" <td>47.777600</td>\n", | |
" <td>-121.315000</td>\n", | |
" <td>6210.000000</td>\n", | |
" <td>871200.000000</td>\n", | |
" </tr>\n", | |
" </tbody>\n", | |
"</table>\n", | |
"</div>" | |
], | |
"text/plain": [ | |
" id price ... sqft_living15 sqft_lot15\n", | |
"count 2.161300e+04 2.161300e+04 ... 21613.000000 21613.000000\n", | |
"mean 4.580302e+09 5.400881e+05 ... 1986.552492 12768.455652\n", | |
"std 2.876566e+09 3.671272e+05 ... 685.391304 27304.179631\n", | |
"min 1.000102e+06 7.500000e+04 ... 399.000000 651.000000\n", | |
"25% 2.123049e+09 3.219500e+05 ... 1490.000000 5100.000000\n", | |
"50% 3.904930e+09 4.500000e+05 ... 1840.000000 7620.000000\n", | |
"75% 7.308900e+09 6.450000e+05 ... 2360.000000 10083.000000\n", | |
"max 9.900000e+09 7.700000e+06 ... 6210.000000 871200.000000\n", | |
"\n", | |
"[8 rows x 20 columns]" | |
] | |
}, | |
"metadata": { | |
"tags": [] | |
}, | |
"execution_count": 4 | |
} | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"metadata": { | |
"id": "SI7bX8fxofLB", | |
"colab_type": "code", | |
"colab": { | |
"base_uri": "https://localhost:8080/", | |
"height": 493 | |
}, | |
"outputId": "bc91f405-45e6-4b4d-9341-3937b4c035df" | |
}, | |
"source": [ | |
"df.info()" | |
], | |
"execution_count": 5, | |
"outputs": [ | |
{ | |
"output_type": "stream", | |
"text": [ | |
"<class 'pandas.core.frame.DataFrame'>\n", | |
"RangeIndex: 21613 entries, 0 to 21612\n", | |
"Data columns (total 21 columns):\n", | |
" # Column Non-Null Count Dtype \n", | |
"--- ------ -------------- ----- \n", | |
" 0 id 21613 non-null int64 \n", | |
" 1 date 21613 non-null object \n", | |
" 2 price 21613 non-null float64\n", | |
" 3 bedrooms 21613 non-null int64 \n", | |
" 4 bathrooms 21613 non-null float64\n", | |
" 5 sqft_living 21613 non-null int64 \n", | |
" 6 sqft_lot 21613 non-null int64 \n", | |
" 7 floors 21613 non-null float64\n", | |
" 8 waterfront 21613 non-null int64 \n", | |
" 9 view 21613 non-null int64 \n", | |
" 10 condition 21613 non-null int64 \n", | |
" 11 grade 21613 non-null int64 \n", | |
" 12 sqft_above 21613 non-null int64 \n", | |
" 13 sqft_basement 21613 non-null int64 \n", | |
" 14 yr_built 21613 non-null int64 \n", | |
" 15 yr_renovated 21613 non-null int64 \n", | |
" 16 zipcode 21613 non-null int64 \n", | |
" 17 lat 21613 non-null float64\n", | |
" 18 long 21613 non-null float64\n", | |
" 19 sqft_living15 21613 non-null int64 \n", | |
" 20 sqft_lot15 21613 non-null int64 \n", | |
"dtypes: float64(5), int64(15), object(1)\n", | |
"memory usage: 3.5+ MB\n" | |
], | |
"name": "stdout" | |
} | |
] | |
} | |
] | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment