Skip to content

Instantly share code, notes, and snippets.

@FavioVazquez
Created April 14, 2020 21:42
Show Gist options
  • Save FavioVazquez/c8d4ee1c04b1fcdaab681044b514a15e to your computer and use it in GitHub Desktop.
Save FavioVazquez/c8d4ee1c04b1fcdaab681044b514a15e to your computer and use it in GitHub Desktop.
Display the source blob
Display the rendered blob
Raw
{
"nbformat": 4,
"nbformat_minor": 0,
"metadata": {
"colab": {
"name": "Untitled1.ipynb",
"provenance": []
},
"kernelspec": {
"name": "python3",
"display_name": "Python 3"
}
},
"cells": [
{
"cell_type": "code",
"metadata": {
"id": "NBXEXw3tn7rI",
"colab_type": "code",
"colab": {}
},
"source": [
"import pandas as pd"
],
"execution_count": 0,
"outputs": []
},
{
"cell_type": "code",
"metadata": {
"id": "TP1GRmepn9qV",
"colab_type": "code",
"colab": {}
},
"source": [
"df = pd.read_csv(\"kc_house_data.csv\")"
],
"execution_count": 0,
"outputs": []
},
{
"cell_type": "code",
"metadata": {
"id": "TcvfqTQXoYXX",
"colab_type": "code",
"colab": {
"base_uri": "https://localhost:8080/",
"height": 224
},
"outputId": "a25b73ed-838a-4bff-9999-630cf190955b"
},
"source": [
"df.head()"
],
"execution_count": 3,
"outputs": [
{
"output_type": "execute_result",
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>id</th>\n",
" <th>date</th>\n",
" <th>price</th>\n",
" <th>bedrooms</th>\n",
" <th>bathrooms</th>\n",
" <th>sqft_living</th>\n",
" <th>sqft_lot</th>\n",
" <th>floors</th>\n",
" <th>waterfront</th>\n",
" <th>view</th>\n",
" <th>condition</th>\n",
" <th>grade</th>\n",
" <th>sqft_above</th>\n",
" <th>sqft_basement</th>\n",
" <th>yr_built</th>\n",
" <th>yr_renovated</th>\n",
" <th>zipcode</th>\n",
" <th>lat</th>\n",
" <th>long</th>\n",
" <th>sqft_living15</th>\n",
" <th>sqft_lot15</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>7129300520</td>\n",
" <td>20141013T000000</td>\n",
" <td>221900.0</td>\n",
" <td>3</td>\n",
" <td>1.00</td>\n",
" <td>1180</td>\n",
" <td>5650</td>\n",
" <td>1.0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>3</td>\n",
" <td>7</td>\n",
" <td>1180</td>\n",
" <td>0</td>\n",
" <td>1955</td>\n",
" <td>0</td>\n",
" <td>98178</td>\n",
" <td>47.5112</td>\n",
" <td>-122.257</td>\n",
" <td>1340</td>\n",
" <td>5650</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>6414100192</td>\n",
" <td>20141209T000000</td>\n",
" <td>538000.0</td>\n",
" <td>3</td>\n",
" <td>2.25</td>\n",
" <td>2570</td>\n",
" <td>7242</td>\n",
" <td>2.0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>3</td>\n",
" <td>7</td>\n",
" <td>2170</td>\n",
" <td>400</td>\n",
" <td>1951</td>\n",
" <td>1991</td>\n",
" <td>98125</td>\n",
" <td>47.7210</td>\n",
" <td>-122.319</td>\n",
" <td>1690</td>\n",
" <td>7639</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>5631500400</td>\n",
" <td>20150225T000000</td>\n",
" <td>180000.0</td>\n",
" <td>2</td>\n",
" <td>1.00</td>\n",
" <td>770</td>\n",
" <td>10000</td>\n",
" <td>1.0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>3</td>\n",
" <td>6</td>\n",
" <td>770</td>\n",
" <td>0</td>\n",
" <td>1933</td>\n",
" <td>0</td>\n",
" <td>98028</td>\n",
" <td>47.7379</td>\n",
" <td>-122.233</td>\n",
" <td>2720</td>\n",
" <td>8062</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>2487200875</td>\n",
" <td>20141209T000000</td>\n",
" <td>604000.0</td>\n",
" <td>4</td>\n",
" <td>3.00</td>\n",
" <td>1960</td>\n",
" <td>5000</td>\n",
" <td>1.0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>5</td>\n",
" <td>7</td>\n",
" <td>1050</td>\n",
" <td>910</td>\n",
" <td>1965</td>\n",
" <td>0</td>\n",
" <td>98136</td>\n",
" <td>47.5208</td>\n",
" <td>-122.393</td>\n",
" <td>1360</td>\n",
" <td>5000</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>1954400510</td>\n",
" <td>20150218T000000</td>\n",
" <td>510000.0</td>\n",
" <td>3</td>\n",
" <td>2.00</td>\n",
" <td>1680</td>\n",
" <td>8080</td>\n",
" <td>1.0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>3</td>\n",
" <td>8</td>\n",
" <td>1680</td>\n",
" <td>0</td>\n",
" <td>1987</td>\n",
" <td>0</td>\n",
" <td>98074</td>\n",
" <td>47.6168</td>\n",
" <td>-122.045</td>\n",
" <td>1800</td>\n",
" <td>7503</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" id date price ... long sqft_living15 sqft_lot15\n",
"0 7129300520 20141013T000000 221900.0 ... -122.257 1340 5650\n",
"1 6414100192 20141209T000000 538000.0 ... -122.319 1690 7639\n",
"2 5631500400 20150225T000000 180000.0 ... -122.233 2720 8062\n",
"3 2487200875 20141209T000000 604000.0 ... -122.393 1360 5000\n",
"4 1954400510 20150218T000000 510000.0 ... -122.045 1800 7503\n",
"\n",
"[5 rows x 21 columns]"
]
},
"metadata": {
"tags": []
},
"execution_count": 3
}
]
},
{
"cell_type": "code",
"metadata": {
"id": "xCQUrg79oZWx",
"colab_type": "code",
"colab": {
"base_uri": "https://localhost:8080/",
"height": 317
},
"outputId": "589cd0fb-5b1f-4926-9948-7addae49eca8"
},
"source": [
"df.describe()"
],
"execution_count": 4,
"outputs": [
{
"output_type": "execute_result",
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>id</th>\n",
" <th>price</th>\n",
" <th>bedrooms</th>\n",
" <th>bathrooms</th>\n",
" <th>sqft_living</th>\n",
" <th>sqft_lot</th>\n",
" <th>floors</th>\n",
" <th>waterfront</th>\n",
" <th>view</th>\n",
" <th>condition</th>\n",
" <th>grade</th>\n",
" <th>sqft_above</th>\n",
" <th>sqft_basement</th>\n",
" <th>yr_built</th>\n",
" <th>yr_renovated</th>\n",
" <th>zipcode</th>\n",
" <th>lat</th>\n",
" <th>long</th>\n",
" <th>sqft_living15</th>\n",
" <th>sqft_lot15</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>count</th>\n",
" <td>2.161300e+04</td>\n",
" <td>2.161300e+04</td>\n",
" <td>21613.000000</td>\n",
" <td>21613.000000</td>\n",
" <td>21613.000000</td>\n",
" <td>2.161300e+04</td>\n",
" <td>21613.000000</td>\n",
" <td>21613.000000</td>\n",
" <td>21613.000000</td>\n",
" <td>21613.000000</td>\n",
" <td>21613.000000</td>\n",
" <td>21613.000000</td>\n",
" <td>21613.000000</td>\n",
" <td>21613.000000</td>\n",
" <td>21613.000000</td>\n",
" <td>21613.000000</td>\n",
" <td>21613.000000</td>\n",
" <td>21613.000000</td>\n",
" <td>21613.000000</td>\n",
" <td>21613.000000</td>\n",
" </tr>\n",
" <tr>\n",
" <th>mean</th>\n",
" <td>4.580302e+09</td>\n",
" <td>5.400881e+05</td>\n",
" <td>3.370842</td>\n",
" <td>2.114757</td>\n",
" <td>2079.899736</td>\n",
" <td>1.510697e+04</td>\n",
" <td>1.494309</td>\n",
" <td>0.007542</td>\n",
" <td>0.234303</td>\n",
" <td>3.409430</td>\n",
" <td>7.656873</td>\n",
" <td>1788.390691</td>\n",
" <td>291.509045</td>\n",
" <td>1971.005136</td>\n",
" <td>84.402258</td>\n",
" <td>98077.939805</td>\n",
" <td>47.560053</td>\n",
" <td>-122.213896</td>\n",
" <td>1986.552492</td>\n",
" <td>12768.455652</td>\n",
" </tr>\n",
" <tr>\n",
" <th>std</th>\n",
" <td>2.876566e+09</td>\n",
" <td>3.671272e+05</td>\n",
" <td>0.930062</td>\n",
" <td>0.770163</td>\n",
" <td>918.440897</td>\n",
" <td>4.142051e+04</td>\n",
" <td>0.539989</td>\n",
" <td>0.086517</td>\n",
" <td>0.766318</td>\n",
" <td>0.650743</td>\n",
" <td>1.175459</td>\n",
" <td>828.090978</td>\n",
" <td>442.575043</td>\n",
" <td>29.373411</td>\n",
" <td>401.679240</td>\n",
" <td>53.505026</td>\n",
" <td>0.138564</td>\n",
" <td>0.140828</td>\n",
" <td>685.391304</td>\n",
" <td>27304.179631</td>\n",
" </tr>\n",
" <tr>\n",
" <th>min</th>\n",
" <td>1.000102e+06</td>\n",
" <td>7.500000e+04</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>290.000000</td>\n",
" <td>5.200000e+02</td>\n",
" <td>1.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>1.000000</td>\n",
" <td>1.000000</td>\n",
" <td>290.000000</td>\n",
" <td>0.000000</td>\n",
" <td>1900.000000</td>\n",
" <td>0.000000</td>\n",
" <td>98001.000000</td>\n",
" <td>47.155900</td>\n",
" <td>-122.519000</td>\n",
" <td>399.000000</td>\n",
" <td>651.000000</td>\n",
" </tr>\n",
" <tr>\n",
" <th>25%</th>\n",
" <td>2.123049e+09</td>\n",
" <td>3.219500e+05</td>\n",
" <td>3.000000</td>\n",
" <td>1.750000</td>\n",
" <td>1427.000000</td>\n",
" <td>5.040000e+03</td>\n",
" <td>1.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>3.000000</td>\n",
" <td>7.000000</td>\n",
" <td>1190.000000</td>\n",
" <td>0.000000</td>\n",
" <td>1951.000000</td>\n",
" <td>0.000000</td>\n",
" <td>98033.000000</td>\n",
" <td>47.471000</td>\n",
" <td>-122.328000</td>\n",
" <td>1490.000000</td>\n",
" <td>5100.000000</td>\n",
" </tr>\n",
" <tr>\n",
" <th>50%</th>\n",
" <td>3.904930e+09</td>\n",
" <td>4.500000e+05</td>\n",
" <td>3.000000</td>\n",
" <td>2.250000</td>\n",
" <td>1910.000000</td>\n",
" <td>7.618000e+03</td>\n",
" <td>1.500000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>3.000000</td>\n",
" <td>7.000000</td>\n",
" <td>1560.000000</td>\n",
" <td>0.000000</td>\n",
" <td>1975.000000</td>\n",
" <td>0.000000</td>\n",
" <td>98065.000000</td>\n",
" <td>47.571800</td>\n",
" <td>-122.230000</td>\n",
" <td>1840.000000</td>\n",
" <td>7620.000000</td>\n",
" </tr>\n",
" <tr>\n",
" <th>75%</th>\n",
" <td>7.308900e+09</td>\n",
" <td>6.450000e+05</td>\n",
" <td>4.000000</td>\n",
" <td>2.500000</td>\n",
" <td>2550.000000</td>\n",
" <td>1.068800e+04</td>\n",
" <td>2.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>4.000000</td>\n",
" <td>8.000000</td>\n",
" <td>2210.000000</td>\n",
" <td>560.000000</td>\n",
" <td>1997.000000</td>\n",
" <td>0.000000</td>\n",
" <td>98118.000000</td>\n",
" <td>47.678000</td>\n",
" <td>-122.125000</td>\n",
" <td>2360.000000</td>\n",
" <td>10083.000000</td>\n",
" </tr>\n",
" <tr>\n",
" <th>max</th>\n",
" <td>9.900000e+09</td>\n",
" <td>7.700000e+06</td>\n",
" <td>33.000000</td>\n",
" <td>8.000000</td>\n",
" <td>13540.000000</td>\n",
" <td>1.651359e+06</td>\n",
" <td>3.500000</td>\n",
" <td>1.000000</td>\n",
" <td>4.000000</td>\n",
" <td>5.000000</td>\n",
" <td>13.000000</td>\n",
" <td>9410.000000</td>\n",
" <td>4820.000000</td>\n",
" <td>2015.000000</td>\n",
" <td>2015.000000</td>\n",
" <td>98199.000000</td>\n",
" <td>47.777600</td>\n",
" <td>-121.315000</td>\n",
" <td>6210.000000</td>\n",
" <td>871200.000000</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" id price ... sqft_living15 sqft_lot15\n",
"count 2.161300e+04 2.161300e+04 ... 21613.000000 21613.000000\n",
"mean 4.580302e+09 5.400881e+05 ... 1986.552492 12768.455652\n",
"std 2.876566e+09 3.671272e+05 ... 685.391304 27304.179631\n",
"min 1.000102e+06 7.500000e+04 ... 399.000000 651.000000\n",
"25% 2.123049e+09 3.219500e+05 ... 1490.000000 5100.000000\n",
"50% 3.904930e+09 4.500000e+05 ... 1840.000000 7620.000000\n",
"75% 7.308900e+09 6.450000e+05 ... 2360.000000 10083.000000\n",
"max 9.900000e+09 7.700000e+06 ... 6210.000000 871200.000000\n",
"\n",
"[8 rows x 20 columns]"
]
},
"metadata": {
"tags": []
},
"execution_count": 4
}
]
},
{
"cell_type": "code",
"metadata": {
"id": "SI7bX8fxofLB",
"colab_type": "code",
"colab": {
"base_uri": "https://localhost:8080/",
"height": 493
},
"outputId": "bc91f405-45e6-4b4d-9341-3937b4c035df"
},
"source": [
"df.info()"
],
"execution_count": 5,
"outputs": [
{
"output_type": "stream",
"text": [
"<class 'pandas.core.frame.DataFrame'>\n",
"RangeIndex: 21613 entries, 0 to 21612\n",
"Data columns (total 21 columns):\n",
" # Column Non-Null Count Dtype \n",
"--- ------ -------------- ----- \n",
" 0 id 21613 non-null int64 \n",
" 1 date 21613 non-null object \n",
" 2 price 21613 non-null float64\n",
" 3 bedrooms 21613 non-null int64 \n",
" 4 bathrooms 21613 non-null float64\n",
" 5 sqft_living 21613 non-null int64 \n",
" 6 sqft_lot 21613 non-null int64 \n",
" 7 floors 21613 non-null float64\n",
" 8 waterfront 21613 non-null int64 \n",
" 9 view 21613 non-null int64 \n",
" 10 condition 21613 non-null int64 \n",
" 11 grade 21613 non-null int64 \n",
" 12 sqft_above 21613 non-null int64 \n",
" 13 sqft_basement 21613 non-null int64 \n",
" 14 yr_built 21613 non-null int64 \n",
" 15 yr_renovated 21613 non-null int64 \n",
" 16 zipcode 21613 non-null int64 \n",
" 17 lat 21613 non-null float64\n",
" 18 long 21613 non-null float64\n",
" 19 sqft_living15 21613 non-null int64 \n",
" 20 sqft_lot15 21613 non-null int64 \n",
"dtypes: float64(5), int64(15), object(1)\n",
"memory usage: 3.5+ MB\n"
],
"name": "stdout"
}
]
}
]
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment