Created
December 4, 2024 03:45
-
-
Save sohang3112/41d77a6df15331990e7b47bd4a09a379 to your computer and use it in GitHub Desktop.
Regression (assuming exponential population growth) Indian population size as of 2021 (since last census was in 2011)
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| { | |
| "cells": [ | |
| { | |
| "cell_type": "markdown", | |
| "metadata": {}, | |
| "source": [ | |
| "# Estimating Indian population size\n", | |
| "Since last census in India was in 2011, precise population currently is not known, however there are various expert guesses.\n", | |
| "\n", | |
| "**TLDR**: Regression (assuming exponential population increase) predicted *1423.1 million* as of 2021. This is fairly close to [this expert prediction](https://www.reuters.com/world/india/india-have-29-mln-more-people-than-china-by-mid-2023-un-estimate-shows-2023-04-19/) of *1428.6 million* as of mid-2023." | |
| ] | |
| }, | |
| { | |
| "cell_type": "markdown", | |
| "metadata": {}, | |
| "source": [ | |
| "" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": null, | |
| "metadata": {}, | |
| "outputs": [], | |
| "source": [ | |
| "%pip install nbformat # plotly says mime type rendering requires nbformat>=4.2" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 18, | |
| "metadata": {}, | |
| "outputs": [], | |
| "source": [ | |
| "import pandas as pd\n", | |
| "from sklearn.linear_model import LinearRegression # for some bizzarre reason, sometimes import sklearn; sklearn.linear_model doesn't work (attribute error)\n", | |
| "from sklearn.preprocessing import FunctionTransformer\n", | |
| "from sklearn.pipeline import Pipeline, make_pipeline\n", | |
| "import statsmodels\n", | |
| "import numpy as np\n", | |
| "import matplotlib.pyplot as plt\n", | |
| "import plotly.express as px" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 20, | |
| "metadata": {}, | |
| "outputs": [ | |
| { | |
| "data": { | |
| "text/html": [ | |
| "<div>\n", | |
| "<style scoped>\n", | |
| " .dataframe tbody tr th:only-of-type {\n", | |
| " vertical-align: middle;\n", | |
| " }\n", | |
| "\n", | |
| " .dataframe tbody tr th {\n", | |
| " vertical-align: top;\n", | |
| " }\n", | |
| "\n", | |
| " .dataframe thead th {\n", | |
| " text-align: right;\n", | |
| " }\n", | |
| "</style>\n", | |
| "<table border=\"1\" class=\"dataframe\">\n", | |
| " <thead>\n", | |
| " <tr style=\"text-align: right;\">\n", | |
| " <th></th>\n", | |
| " <th>year</th>\n", | |
| " <th>population_million</th>\n", | |
| " </tr>\n", | |
| " </thead>\n", | |
| " <tbody>\n", | |
| " <tr>\n", | |
| " <th>2</th>\n", | |
| " <td>1921</td>\n", | |
| " <td>251.32</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>3</th>\n", | |
| " <td>1931</td>\n", | |
| " <td>278.98</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>4</th>\n", | |
| " <td>1941</td>\n", | |
| " <td>318.16</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>5</th>\n", | |
| " <td>1951</td>\n", | |
| " <td>361.09</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>6</th>\n", | |
| " <td>1961</td>\n", | |
| " <td>439.23</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>7</th>\n", | |
| " <td>1971</td>\n", | |
| " <td>548.16</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>8</th>\n", | |
| " <td>1981</td>\n", | |
| " <td>683.33</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>9</th>\n", | |
| " <td>1991</td>\n", | |
| " <td>846.42</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>10</th>\n", | |
| " <td>2001</td>\n", | |
| " <td>1028.74</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>11</th>\n", | |
| " <td>2011</td>\n", | |
| " <td>1210.19</td>\n", | |
| " </tr>\n", | |
| " </tbody>\n", | |
| "</table>\n", | |
| "</div>" | |
| ], | |
| "text/plain": [ | |
| " year population_million\n", | |
| "2 1921 251.32\n", | |
| "3 1931 278.98\n", | |
| "4 1941 318.16\n", | |
| "5 1951 361.09\n", | |
| "6 1961 439.23\n", | |
| "7 1971 548.16\n", | |
| "8 1981 683.33\n", | |
| "9 1991 846.42\n", | |
| "10 2001 1028.74\n", | |
| "11 2011 1210.19" | |
| ] | |
| }, | |
| "execution_count": 20, | |
| "metadata": {}, | |
| "output_type": "execute_result" | |
| } | |
| ], | |
| "source": [ | |
| "# copied from above wikipedia image\n", | |
| "df = pd.DataFrame([\n", | |
| " (1901, 238.4),\n", | |
| " (1911, 252.09),\n", | |
| " (1921, 251.32),\n", | |
| " (1931, 278.98),\n", | |
| " (1941, 318.16),\n", | |
| " (1951, 361.09),\n", | |
| " (1961, 439.23),\n", | |
| " (1971, 548.16),\n", | |
| " (1981, 683.33),\n", | |
| " (1991, 846.42),\n", | |
| " (2001, 1028.74),\n", | |
| " (2011, 1210.19)\n", | |
| "], columns=['year', 'population_million'])\n", | |
| "df = df[2:] # drop first 2 yrs (outliers) - increase then decrease\n", | |
| "xfuture = [2021,2031,2041]\n", | |
| "df" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": null, | |
| "metadata": {}, | |
| "outputs": [ | |
| { | |
| "name": "stdout", | |
| "output_type": "stream", | |
| "text": [ | |
| "0.9257610324940422 [10.66369697] -20368.266242424244\n", | |
| "Predicted (future): [1183.06533333 1289.70230303 1396.33927273]\n" | |
| ] | |
| }, | |
| { | |
| "data": { | |
| "application/vnd.plotly.v1+json": { | |
| "config": { | |
| "plotlyServerURL": "https://plot.ly" | |
| }, | |
| "data": [ | |
| { | |
| "hovertemplate": "year=%{x}<br>population_million=%{y}<extra></extra>", | |
| "legendgroup": "", | |
| "marker": { | |
| "color": "#636efa", | |
| "symbol": "circle" | |
| }, | |
| "mode": "markers", | |
| "name": "", | |
| "orientation": "v", | |
| "showlegend": false, | |
| "type": "scatter", | |
| "x": [ | |
| 1921, | |
| 1931, | |
| 1941, | |
| 1951, | |
| 1961, | |
| 1971, | |
| 1981, | |
| 1991, | |
| 2001, | |
| 2011 | |
| ], | |
| "xaxis": "x", | |
| "y": [ | |
| 251.32, | |
| 278.98, | |
| 318.16, | |
| 361.09, | |
| 439.23, | |
| 548.16, | |
| 683.33, | |
| 846.42, | |
| 1028.74, | |
| 1210.19 | |
| ], | |
| "yaxis": "y" | |
| }, | |
| { | |
| "hovertemplate": "year=%{x}<br>y=%{y}<extra></extra>", | |
| "legendgroup": "", | |
| "line": { | |
| "color": "#636efa", | |
| "dash": "solid" | |
| }, | |
| "marker": { | |
| "symbol": "circle" | |
| }, | |
| "mode": "lines", | |
| "name": "", | |
| "orientation": "v", | |
| "showlegend": false, | |
| "type": "scatter", | |
| "x": [ | |
| 1921, | |
| 1931, | |
| 1941, | |
| 1951, | |
| 1961, | |
| 1971, | |
| 1981, | |
| 1991, | |
| 2001, | |
| 2011 | |
| ], | |
| "xaxis": "x", | |
| "y": [ | |
| 116.69563636363455, | |
| 223.33260606060503, | |
| 329.9695757575755, | |
| 436.60654545454236, | |
| 543.2435151515128, | |
| 649.8804848484833, | |
| 756.5174545454538, | |
| 863.1544242424206, | |
| 969.7913939393911, | |
| 1076.4283636363616 | |
| ], | |
| "yaxis": "y" | |
| } | |
| ], | |
| "layout": { | |
| "legend": { | |
| "tracegroupgap": 0 | |
| }, | |
| "template": { | |
| "data": { | |
| "bar": [ | |
| { | |
| "error_x": { | |
| "color": "#2a3f5f" | |
| }, | |
| "error_y": { | |
| "color": "#2a3f5f" | |
| }, | |
| "marker": { | |
| "line": { | |
| "color": "#E5ECF6", | |
| "width": 0.5 | |
| }, | |
| "pattern": { | |
| "fillmode": "overlay", | |
| "size": 10, | |
| "solidity": 0.2 | |
| } | |
| }, | |
| "type": "bar" | |
| } | |
| ], | |
| "barpolar": [ | |
| { | |
| "marker": { | |
| "line": { | |
| "color": "#E5ECF6", | |
| "width": 0.5 | |
| }, | |
| "pattern": { | |
| "fillmode": "overlay", | |
| "size": 10, | |
| "solidity": 0.2 | |
| } | |
| }, | |
| "type": "barpolar" | |
| } | |
| ], | |
| "carpet": [ | |
| { | |
| "aaxis": { | |
| "endlinecolor": "#2a3f5f", | |
| "gridcolor": "white", | |
| "linecolor": "white", | |
| "minorgridcolor": "white", | |
| "startlinecolor": "#2a3f5f" | |
| }, | |
| "baxis": { | |
| "endlinecolor": "#2a3f5f", | |
| "gridcolor": "white", | |
| "linecolor": "white", | |
| "minorgridcolor": "white", | |
| "startlinecolor": "#2a3f5f" | |
| }, | |
| "type": "carpet" | |
| } | |
| ], | |
| "choropleth": [ | |
| { | |
| "colorbar": { | |
| "outlinewidth": 0, | |
| "ticks": "" | |
| }, | |
| "type": "choropleth" | |
| } | |
| ], | |
| "contour": [ | |
| { | |
| "colorbar": { | |
| "outlinewidth": 0, | |
| "ticks": "" | |
| }, | |
| "colorscale": [ | |
| [ | |
| 0, | |
| "#0d0887" | |
| ], | |
| [ | |
| 0.1111111111111111, | |
| "#46039f" | |
| ], | |
| [ | |
| 0.2222222222222222, | |
| "#7201a8" | |
| ], | |
| [ | |
| 0.3333333333333333, | |
| "#9c179e" | |
| ], | |
| [ | |
| 0.4444444444444444, | |
| "#bd3786" | |
| ], | |
| [ | |
| 0.5555555555555556, | |
| "#d8576b" | |
| ], | |
| [ | |
| 0.6666666666666666, | |
| "#ed7953" | |
| ], | |
| [ | |
| 0.7777777777777778, | |
| "#fb9f3a" | |
| ], | |
| [ | |
| 0.8888888888888888, | |
| "#fdca26" | |
| ], | |
| [ | |
| 1, | |
| "#f0f921" | |
| ] | |
| ], | |
| "type": "contour" | |
| } | |
| ], | |
| "contourcarpet": [ | |
| { | |
| "colorbar": { | |
| "outlinewidth": 0, | |
| "ticks": "" | |
| }, | |
| "type": "contourcarpet" | |
| } | |
| ], | |
| "heatmap": [ | |
| { | |
| "colorbar": { | |
| "outlinewidth": 0, | |
| "ticks": "" | |
| }, | |
| "colorscale": [ | |
| [ | |
| 0, | |
| "#0d0887" | |
| ], | |
| [ | |
| 0.1111111111111111, | |
| "#46039f" | |
| ], | |
| [ | |
| 0.2222222222222222, | |
| "#7201a8" | |
| ], | |
| [ | |
| 0.3333333333333333, | |
| "#9c179e" | |
| ], | |
| [ | |
| 0.4444444444444444, | |
| "#bd3786" | |
| ], | |
| [ | |
| 0.5555555555555556, | |
| "#d8576b" | |
| ], | |
| [ | |
| 0.6666666666666666, | |
| "#ed7953" | |
| ], | |
| [ | |
| 0.7777777777777778, | |
| "#fb9f3a" | |
| ], | |
| [ | |
| 0.8888888888888888, | |
| "#fdca26" | |
| ], | |
| [ | |
| 1, | |
| "#f0f921" | |
| ] | |
| ], | |
| "type": "heatmap" | |
| } | |
| ], | |
| "heatmapgl": [ | |
| { | |
| "colorbar": { | |
| "outlinewidth": 0, | |
| "ticks": "" | |
| }, | |
| "colorscale": [ | |
| [ | |
| 0, | |
| "#0d0887" | |
| ], | |
| [ | |
| 0.1111111111111111, | |
| "#46039f" | |
| ], | |
| [ | |
| 0.2222222222222222, | |
| "#7201a8" | |
| ], | |
| [ | |
| 0.3333333333333333, | |
| "#9c179e" | |
| ], | |
| [ | |
| 0.4444444444444444, | |
| "#bd3786" | |
| ], | |
| [ | |
| 0.5555555555555556, | |
| "#d8576b" | |
| ], | |
| [ | |
| 0.6666666666666666, | |
| "#ed7953" | |
| ], | |
| [ | |
| 0.7777777777777778, | |
| "#fb9f3a" | |
| ], | |
| [ | |
| 0.8888888888888888, | |
| "#fdca26" | |
| ], | |
| [ | |
| 1, | |
| "#f0f921" | |
| ] | |
| ], | |
| "type": "heatmapgl" | |
| } | |
| ], | |
| "histogram": [ | |
| { | |
| "marker": { | |
| "pattern": { | |
| "fillmode": "overlay", | |
| "size": 10, | |
| "solidity": 0.2 | |
| } | |
| }, | |
| "type": "histogram" | |
| } | |
| ], | |
| "histogram2d": [ | |
| { | |
| "colorbar": { | |
| "outlinewidth": 0, | |
| "ticks": "" | |
| }, | |
| "colorscale": [ | |
| [ | |
| 0, | |
| "#0d0887" | |
| ], | |
| [ | |
| 0.1111111111111111, | |
| "#46039f" | |
| ], | |
| [ | |
| 0.2222222222222222, | |
| "#7201a8" | |
| ], | |
| [ | |
| 0.3333333333333333, | |
| "#9c179e" | |
| ], | |
| [ | |
| 0.4444444444444444, | |
| "#bd3786" | |
| ], | |
| [ | |
| 0.5555555555555556, | |
| "#d8576b" | |
| ], | |
| [ | |
| 0.6666666666666666, | |
| "#ed7953" | |
| ], | |
| [ | |
| 0.7777777777777778, | |
| "#fb9f3a" | |
| ], | |
| [ | |
| 0.8888888888888888, | |
| "#fdca26" | |
| ], | |
| [ | |
| 1, | |
| "#f0f921" | |
| ] | |
| ], | |
| "type": "histogram2d" | |
| } | |
| ], | |
| "histogram2dcontour": [ | |
| { | |
| "colorbar": { | |
| "outlinewidth": 0, | |
| "ticks": "" | |
| }, | |
| "colorscale": [ | |
| [ | |
| 0, | |
| "#0d0887" | |
| ], | |
| [ | |
| 0.1111111111111111, | |
| "#46039f" | |
| ], | |
| [ | |
| 0.2222222222222222, | |
| "#7201a8" | |
| ], | |
| [ | |
| 0.3333333333333333, | |
| "#9c179e" | |
| ], | |
| [ | |
| 0.4444444444444444, | |
| "#bd3786" | |
| ], | |
| [ | |
| 0.5555555555555556, | |
| "#d8576b" | |
| ], | |
| [ | |
| 0.6666666666666666, | |
| "#ed7953" | |
| ], | |
| [ | |
| 0.7777777777777778, | |
| "#fb9f3a" | |
| ], | |
| [ | |
| 0.8888888888888888, | |
| "#fdca26" | |
| ], | |
| [ | |
| 1, | |
| "#f0f921" | |
| ] | |
| ], | |
| "type": "histogram2dcontour" | |
| } | |
| ], | |
| "mesh3d": [ | |
| { | |
| "colorbar": { | |
| "outlinewidth": 0, | |
| "ticks": "" | |
| }, | |
| "type": "mesh3d" | |
| } | |
| ], | |
| "parcoords": [ | |
| { | |
| "line": { | |
| "colorbar": { | |
| "outlinewidth": 0, | |
| "ticks": "" | |
| } | |
| }, | |
| "type": "parcoords" | |
| } | |
| ], | |
| "pie": [ | |
| { | |
| "automargin": true, | |
| "type": "pie" | |
| } | |
| ], | |
| "scatter": [ | |
| { | |
| "fillpattern": { | |
| "fillmode": "overlay", | |
| "size": 10, | |
| "solidity": 0.2 | |
| }, | |
| "type": "scatter" | |
| } | |
| ], | |
| "scatter3d": [ | |
| { | |
| "line": { | |
| "colorbar": { | |
| "outlinewidth": 0, | |
| "ticks": "" | |
| } | |
| }, | |
| "marker": { | |
| "colorbar": { | |
| "outlinewidth": 0, | |
| "ticks": "" | |
| } | |
| }, | |
| "type": "scatter3d" | |
| } | |
| ], | |
| "scattercarpet": [ | |
| { | |
| "marker": { | |
| "colorbar": { | |
| "outlinewidth": 0, | |
| "ticks": "" | |
| } | |
| }, | |
| "type": "scattercarpet" | |
| } | |
| ], | |
| "scattergeo": [ | |
| { | |
| "marker": { | |
| "colorbar": { | |
| "outlinewidth": 0, | |
| "ticks": "" | |
| } | |
| }, | |
| "type": "scattergeo" | |
| } | |
| ], | |
| "scattergl": [ | |
| { | |
| "marker": { | |
| "colorbar": { | |
| "outlinewidth": 0, | |
| "ticks": "" | |
| } | |
| }, | |
| "type": "scattergl" | |
| } | |
| ], | |
| "scattermapbox": [ | |
| { | |
| "marker": { | |
| "colorbar": { | |
| "outlinewidth": 0, | |
| "ticks": "" | |
| } | |
| }, | |
| "type": "scattermapbox" | |
| } | |
| ], | |
| "scatterpolar": [ | |
| { | |
| "marker": { | |
| "colorbar": { | |
| "outlinewidth": 0, | |
| "ticks": "" | |
| } | |
| }, | |
| "type": "scatterpolar" | |
| } | |
| ], | |
| "scatterpolargl": [ | |
| { | |
| "marker": { | |
| "colorbar": { | |
| "outlinewidth": 0, | |
| "ticks": "" | |
| } | |
| }, | |
| "type": "scatterpolargl" | |
| } | |
| ], | |
| "scatterternary": [ | |
| { | |
| "marker": { | |
| "colorbar": { | |
| "outlinewidth": 0, | |
| "ticks": "" | |
| } | |
| }, | |
| "type": "scatterternary" | |
| } | |
| ], | |
| "surface": [ | |
| { | |
| "colorbar": { | |
| "outlinewidth": 0, | |
| "ticks": "" | |
| }, | |
| "colorscale": [ | |
| [ | |
| 0, | |
| "#0d0887" | |
| ], | |
| [ | |
| 0.1111111111111111, | |
| "#46039f" | |
| ], | |
| [ | |
| 0.2222222222222222, | |
| "#7201a8" | |
| ], | |
| [ | |
| 0.3333333333333333, | |
| "#9c179e" | |
| ], | |
| [ | |
| 0.4444444444444444, | |
| "#bd3786" | |
| ], | |
| [ | |
| 0.5555555555555556, | |
| "#d8576b" | |
| ], | |
| [ | |
| 0.6666666666666666, | |
| "#ed7953" | |
| ], | |
| [ | |
| 0.7777777777777778, | |
| "#fb9f3a" | |
| ], | |
| [ | |
| 0.8888888888888888, | |
| "#fdca26" | |
| ], | |
| [ | |
| 1, | |
| "#f0f921" | |
| ] | |
| ], | |
| "type": "surface" | |
| } | |
| ], | |
| "table": [ | |
| { | |
| "cells": { | |
| "fill": { | |
| "color": "#EBF0F8" | |
| }, | |
| "line": { | |
| "color": "white" | |
| } | |
| }, | |
| "header": { | |
| "fill": { | |
| "color": "#C8D4E3" | |
| }, | |
| "line": { | |
| "color": "white" | |
| } | |
| }, | |
| "type": "table" | |
| } | |
| ] | |
| }, | |
| "layout": { | |
| "annotationdefaults": { | |
| "arrowcolor": "#2a3f5f", | |
| "arrowhead": 0, | |
| "arrowwidth": 1 | |
| }, | |
| "autotypenumbers": "strict", | |
| "coloraxis": { | |
| "colorbar": { | |
| "outlinewidth": 0, | |
| "ticks": "" | |
| } | |
| }, | |
| "colorscale": { | |
| "diverging": [ | |
| [ | |
| 0, | |
| "#8e0152" | |
| ], | |
| [ | |
| 0.1, | |
| "#c51b7d" | |
| ], | |
| [ | |
| 0.2, | |
| "#de77ae" | |
| ], | |
| [ | |
| 0.3, | |
| "#f1b6da" | |
| ], | |
| [ | |
| 0.4, | |
| "#fde0ef" | |
| ], | |
| [ | |
| 0.5, | |
| "#f7f7f7" | |
| ], | |
| [ | |
| 0.6, | |
| "#e6f5d0" | |
| ], | |
| [ | |
| 0.7, | |
| "#b8e186" | |
| ], | |
| [ | |
| 0.8, | |
| "#7fbc41" | |
| ], | |
| [ | |
| 0.9, | |
| "#4d9221" | |
| ], | |
| [ | |
| 1, | |
| "#276419" | |
| ] | |
| ], | |
| "sequential": [ | |
| [ | |
| 0, | |
| "#0d0887" | |
| ], | |
| [ | |
| 0.1111111111111111, | |
| "#46039f" | |
| ], | |
| [ | |
| 0.2222222222222222, | |
| "#7201a8" | |
| ], | |
| [ | |
| 0.3333333333333333, | |
| "#9c179e" | |
| ], | |
| [ | |
| 0.4444444444444444, | |
| "#bd3786" | |
| ], | |
| [ | |
| 0.5555555555555556, | |
| "#d8576b" | |
| ], | |
| [ | |
| 0.6666666666666666, | |
| "#ed7953" | |
| ], | |
| [ | |
| 0.7777777777777778, | |
| "#fb9f3a" | |
| ], | |
| [ | |
| 0.8888888888888888, | |
| "#fdca26" | |
| ], | |
| [ | |
| 1, | |
| "#f0f921" | |
| ] | |
| ], | |
| "sequentialminus": [ | |
| [ | |
| 0, | |
| "#0d0887" | |
| ], | |
| [ | |
| 0.1111111111111111, | |
| "#46039f" | |
| ], | |
| [ | |
| 0.2222222222222222, | |
| "#7201a8" | |
| ], | |
| [ | |
| 0.3333333333333333, | |
| "#9c179e" | |
| ], | |
| [ | |
| 0.4444444444444444, | |
| "#bd3786" | |
| ], | |
| [ | |
| 0.5555555555555556, | |
| "#d8576b" | |
| ], | |
| [ | |
| 0.6666666666666666, | |
| "#ed7953" | |
| ], | |
| [ | |
| 0.7777777777777778, | |
| "#fb9f3a" | |
| ], | |
| [ | |
| 0.8888888888888888, | |
| "#fdca26" | |
| ], | |
| [ | |
| 1, | |
| "#f0f921" | |
| ] | |
| ] | |
| }, | |
| "colorway": [ | |
| "#636efa", | |
| "#EF553B", | |
| "#00cc96", | |
| "#ab63fa", | |
| "#FFA15A", | |
| "#19d3f3", | |
| "#FF6692", | |
| "#B6E880", | |
| "#FF97FF", | |
| "#FECB52" | |
| ], | |
| "font": { | |
| "color": "#2a3f5f" | |
| }, | |
| "geo": { | |
| "bgcolor": "white", | |
| "lakecolor": "white", | |
| "landcolor": "#E5ECF6", | |
| "showlakes": true, | |
| "showland": true, | |
| "subunitcolor": "white" | |
| }, | |
| "hoverlabel": { | |
| "align": "left" | |
| }, | |
| "hovermode": "closest", | |
| "mapbox": { | |
| "style": "light" | |
| }, | |
| "paper_bgcolor": "white", | |
| "plot_bgcolor": "#E5ECF6", | |
| "polar": { | |
| "angularaxis": { | |
| "gridcolor": "white", | |
| "linecolor": "white", | |
| "ticks": "" | |
| }, | |
| "bgcolor": "#E5ECF6", | |
| "radialaxis": { | |
| "gridcolor": "white", | |
| "linecolor": "white", | |
| "ticks": "" | |
| } | |
| }, | |
| "scene": { | |
| "xaxis": { | |
| "backgroundcolor": "#E5ECF6", | |
| "gridcolor": "white", | |
| "gridwidth": 2, | |
| "linecolor": "white", | |
| "showbackground": true, | |
| "ticks": "", | |
| "zerolinecolor": "white" | |
| }, | |
| "yaxis": { | |
| "backgroundcolor": "#E5ECF6", | |
| "gridcolor": "white", | |
| "gridwidth": 2, | |
| "linecolor": "white", | |
| "showbackground": true, | |
| "ticks": "", | |
| "zerolinecolor": "white" | |
| }, | |
| "zaxis": { | |
| "backgroundcolor": "#E5ECF6", | |
| "gridcolor": "white", | |
| "gridwidth": 2, | |
| "linecolor": "white", | |
| "showbackground": true, | |
| "ticks": "", | |
| "zerolinecolor": "white" | |
| } | |
| }, | |
| "shapedefaults": { | |
| "line": { | |
| "color": "#2a3f5f" | |
| } | |
| }, | |
| "ternary": { | |
| "aaxis": { | |
| "gridcolor": "white", | |
| "linecolor": "white", | |
| "ticks": "" | |
| }, | |
| "baxis": { | |
| "gridcolor": "white", | |
| "linecolor": "white", | |
| "ticks": "" | |
| }, | |
| "bgcolor": "#E5ECF6", | |
| "caxis": { | |
| "gridcolor": "white", | |
| "linecolor": "white", | |
| "ticks": "" | |
| } | |
| }, | |
| "title": { | |
| "x": 0.05 | |
| }, | |
| "xaxis": { | |
| "automargin": true, | |
| "gridcolor": "white", | |
| "linecolor": "white", | |
| "ticks": "", | |
| "title": { | |
| "standoff": 15 | |
| }, | |
| "zerolinecolor": "white", | |
| "zerolinewidth": 2 | |
| }, | |
| "yaxis": { | |
| "automargin": true, | |
| "gridcolor": "white", | |
| "linecolor": "white", | |
| "ticks": "", | |
| "title": { | |
| "standoff": 15 | |
| }, | |
| "zerolinecolor": "white", | |
| "zerolinewidth": 2 | |
| } | |
| } | |
| }, | |
| "title": { | |
| "text": "Direct Regression" | |
| }, | |
| "xaxis": { | |
| "anchor": "y", | |
| "domain": [ | |
| 0, | |
| 1 | |
| ], | |
| "tickmode": "array", | |
| "tickvals": [ | |
| 1921, | |
| 1931, | |
| 1941, | |
| 1951, | |
| 1961, | |
| 1971, | |
| 1981, | |
| 1991, | |
| 2001, | |
| 2011 | |
| ], | |
| "title": { | |
| "text": "year" | |
| } | |
| }, | |
| "yaxis": { | |
| "anchor": "x", | |
| "domain": [ | |
| 0, | |
| 1 | |
| ], | |
| "title": { | |
| "text": "population_million" | |
| } | |
| } | |
| } | |
| } | |
| }, | |
| "metadata": {}, | |
| "output_type": "display_data" | |
| } | |
| ], | |
| "source": [ | |
| "X, y = df[['year']], df['population_million'] # training data\n", | |
| "reg = LinearRegression().fit(X,y)\n", | |
| "print(reg.score(X,y), reg.coef_, reg.intercept_) # 0.9 correlation obtained is nearly perfect\n", | |
| "yfuture = reg.predict(pd.DataFrame({'year': xfuture})\n", | |
| "print('Predicted (future):', yfuture))\n", | |
| "ypred = reg.predict(X)\n", | |
| "fig = px.scatter(df, x='year', y='population_million', title='Direct Regression')\n", | |
| "fig.add_trace(px.line(df, x='year', y=ypred).data[0])\n", | |
| "fig.update_xaxes(tickmode='array', tickvals=df['year']) # show exact x axis labels\n", | |
| "fig.show()\n", | |
| "#plt.scatter(df['year'], df['population_million'])\n", | |
| "#plt.plot(df['year'], reg_model.predict(X))" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 16, | |
| "metadata": {}, | |
| "outputs": [ | |
| { | |
| "name": "stdout", | |
| "output_type": "stream", | |
| "text": [ | |
| "0.9257610324940422 [0.01836867] -29.86242627878029\n", | |
| "Predicted: [1423.18457705 1710.1544759 2054.98877561]\n" | |
| ] | |
| }, | |
| { | |
| "data": { | |
| "application/vnd.plotly.v1+json": { | |
| "config": { | |
| "plotlyServerURL": "https://plot.ly" | |
| }, | |
| "data": [ | |
| { | |
| "hovertemplate": "year=%{x}<br>population_million=%{y}<extra></extra>", | |
| "legendgroup": "", | |
| "marker": { | |
| "color": "#636efa", | |
| "symbol": "circle" | |
| }, | |
| "mode": "markers", | |
| "name": "", | |
| "orientation": "v", | |
| "showlegend": false, | |
| "type": "scatter", | |
| "x": [ | |
| 1921, | |
| 1931, | |
| 1941, | |
| 1951, | |
| 1961, | |
| 1971, | |
| 1981, | |
| 1991, | |
| 2001, | |
| 2011 | |
| ], | |
| "xaxis": "x", | |
| "y": [ | |
| 251.32, | |
| 278.98, | |
| 318.16, | |
| 361.09, | |
| 439.23, | |
| 548.16, | |
| 683.33, | |
| 846.42, | |
| 1028.74, | |
| 1210.19 | |
| ], | |
| "yaxis": "y" | |
| }, | |
| { | |
| "hovertemplate": "year=%{x}<br>y=%{y}<extra></extra>", | |
| "legendgroup": "", | |
| "line": { | |
| "color": "#636efa", | |
| "dash": "solid" | |
| }, | |
| "marker": { | |
| "symbol": "circle" | |
| }, | |
| "mode": "lines", | |
| "name": "", | |
| "orientation": "v", | |
| "showlegend": false, | |
| "type": "scatter", | |
| "x": [ | |
| 1921, | |
| 1931, | |
| 1941, | |
| 1951, | |
| 1961, | |
| 1971, | |
| 1981, | |
| 1991, | |
| 2001, | |
| 2011 | |
| ], | |
| "xaxis": "x", | |
| "y": [ | |
| 226.7358020977741, | |
| 272.45464366229646, | |
| 327.39219905437756, | |
| 393.40732299837845, | |
| 472.73368832788066, | |
| 568.0553640355123, | |
| 682.5976328255831, | |
| 820.2361210516876, | |
| 985.6279335352224, | |
| 1184.3692303130058 | |
| ], | |
| "yaxis": "y" | |
| } | |
| ], | |
| "layout": { | |
| "legend": { | |
| "tracegroupgap": 0 | |
| }, | |
| "template": { | |
| "data": { | |
| "bar": [ | |
| { | |
| "error_x": { | |
| "color": "#2a3f5f" | |
| }, | |
| "error_y": { | |
| "color": "#2a3f5f" | |
| }, | |
| "marker": { | |
| "line": { | |
| "color": "#E5ECF6", | |
| "width": 0.5 | |
| }, | |
| "pattern": { | |
| "fillmode": "overlay", | |
| "size": 10, | |
| "solidity": 0.2 | |
| } | |
| }, | |
| "type": "bar" | |
| } | |
| ], | |
| "barpolar": [ | |
| { | |
| "marker": { | |
| "line": { | |
| "color": "#E5ECF6", | |
| "width": 0.5 | |
| }, | |
| "pattern": { | |
| "fillmode": "overlay", | |
| "size": 10, | |
| "solidity": 0.2 | |
| } | |
| }, | |
| "type": "barpolar" | |
| } | |
| ], | |
| "carpet": [ | |
| { | |
| "aaxis": { | |
| "endlinecolor": "#2a3f5f", | |
| "gridcolor": "white", | |
| "linecolor": "white", | |
| "minorgridcolor": "white", | |
| "startlinecolor": "#2a3f5f" | |
| }, | |
| "baxis": { | |
| "endlinecolor": "#2a3f5f", | |
| "gridcolor": "white", | |
| "linecolor": "white", | |
| "minorgridcolor": "white", | |
| "startlinecolor": "#2a3f5f" | |
| }, | |
| "type": "carpet" | |
| } | |
| ], | |
| "choropleth": [ | |
| { | |
| "colorbar": { | |
| "outlinewidth": 0, | |
| "ticks": "" | |
| }, | |
| "type": "choropleth" | |
| } | |
| ], | |
| "contour": [ | |
| { | |
| "colorbar": { | |
| "outlinewidth": 0, | |
| "ticks": "" | |
| }, | |
| "colorscale": [ | |
| [ | |
| 0, | |
| "#0d0887" | |
| ], | |
| [ | |
| 0.1111111111111111, | |
| "#46039f" | |
| ], | |
| [ | |
| 0.2222222222222222, | |
| "#7201a8" | |
| ], | |
| [ | |
| 0.3333333333333333, | |
| "#9c179e" | |
| ], | |
| [ | |
| 0.4444444444444444, | |
| "#bd3786" | |
| ], | |
| [ | |
| 0.5555555555555556, | |
| "#d8576b" | |
| ], | |
| [ | |
| 0.6666666666666666, | |
| "#ed7953" | |
| ], | |
| [ | |
| 0.7777777777777778, | |
| "#fb9f3a" | |
| ], | |
| [ | |
| 0.8888888888888888, | |
| "#fdca26" | |
| ], | |
| [ | |
| 1, | |
| "#f0f921" | |
| ] | |
| ], | |
| "type": "contour" | |
| } | |
| ], | |
| "contourcarpet": [ | |
| { | |
| "colorbar": { | |
| "outlinewidth": 0, | |
| "ticks": "" | |
| }, | |
| "type": "contourcarpet" | |
| } | |
| ], | |
| "heatmap": [ | |
| { | |
| "colorbar": { | |
| "outlinewidth": 0, | |
| "ticks": "" | |
| }, | |
| "colorscale": [ | |
| [ | |
| 0, | |
| "#0d0887" | |
| ], | |
| [ | |
| 0.1111111111111111, | |
| "#46039f" | |
| ], | |
| [ | |
| 0.2222222222222222, | |
| "#7201a8" | |
| ], | |
| [ | |
| 0.3333333333333333, | |
| "#9c179e" | |
| ], | |
| [ | |
| 0.4444444444444444, | |
| "#bd3786" | |
| ], | |
| [ | |
| 0.5555555555555556, | |
| "#d8576b" | |
| ], | |
| [ | |
| 0.6666666666666666, | |
| "#ed7953" | |
| ], | |
| [ | |
| 0.7777777777777778, | |
| "#fb9f3a" | |
| ], | |
| [ | |
| 0.8888888888888888, | |
| "#fdca26" | |
| ], | |
| [ | |
| 1, | |
| "#f0f921" | |
| ] | |
| ], | |
| "type": "heatmap" | |
| } | |
| ], | |
| "heatmapgl": [ | |
| { | |
| "colorbar": { | |
| "outlinewidth": 0, | |
| "ticks": "" | |
| }, | |
| "colorscale": [ | |
| [ | |
| 0, | |
| "#0d0887" | |
| ], | |
| [ | |
| 0.1111111111111111, | |
| "#46039f" | |
| ], | |
| [ | |
| 0.2222222222222222, | |
| "#7201a8" | |
| ], | |
| [ | |
| 0.3333333333333333, | |
| "#9c179e" | |
| ], | |
| [ | |
| 0.4444444444444444, | |
| "#bd3786" | |
| ], | |
| [ | |
| 0.5555555555555556, | |
| "#d8576b" | |
| ], | |
| [ | |
| 0.6666666666666666, | |
| "#ed7953" | |
| ], | |
| [ | |
| 0.7777777777777778, | |
| "#fb9f3a" | |
| ], | |
| [ | |
| 0.8888888888888888, | |
| "#fdca26" | |
| ], | |
| [ | |
| 1, | |
| "#f0f921" | |
| ] | |
| ], | |
| "type": "heatmapgl" | |
| } | |
| ], | |
| "histogram": [ | |
| { | |
| "marker": { | |
| "pattern": { | |
| "fillmode": "overlay", | |
| "size": 10, | |
| "solidity": 0.2 | |
| } | |
| }, | |
| "type": "histogram" | |
| } | |
| ], | |
| "histogram2d": [ | |
| { | |
| "colorbar": { | |
| "outlinewidth": 0, | |
| "ticks": "" | |
| }, | |
| "colorscale": [ | |
| [ | |
| 0, | |
| "#0d0887" | |
| ], | |
| [ | |
| 0.1111111111111111, | |
| "#46039f" | |
| ], | |
| [ | |
| 0.2222222222222222, | |
| "#7201a8" | |
| ], | |
| [ | |
| 0.3333333333333333, | |
| "#9c179e" | |
| ], | |
| [ | |
| 0.4444444444444444, | |
| "#bd3786" | |
| ], | |
| [ | |
| 0.5555555555555556, | |
| "#d8576b" | |
| ], | |
| [ | |
| 0.6666666666666666, | |
| "#ed7953" | |
| ], | |
| [ | |
| 0.7777777777777778, | |
| "#fb9f3a" | |
| ], | |
| [ | |
| 0.8888888888888888, | |
| "#fdca26" | |
| ], | |
| [ | |
| 1, | |
| "#f0f921" | |
| ] | |
| ], | |
| "type": "histogram2d" | |
| } | |
| ], | |
| "histogram2dcontour": [ | |
| { | |
| "colorbar": { | |
| "outlinewidth": 0, | |
| "ticks": "" | |
| }, | |
| "colorscale": [ | |
| [ | |
| 0, | |
| "#0d0887" | |
| ], | |
| [ | |
| 0.1111111111111111, | |
| "#46039f" | |
| ], | |
| [ | |
| 0.2222222222222222, | |
| "#7201a8" | |
| ], | |
| [ | |
| 0.3333333333333333, | |
| "#9c179e" | |
| ], | |
| [ | |
| 0.4444444444444444, | |
| "#bd3786" | |
| ], | |
| [ | |
| 0.5555555555555556, | |
| "#d8576b" | |
| ], | |
| [ | |
| 0.6666666666666666, | |
| "#ed7953" | |
| ], | |
| [ | |
| 0.7777777777777778, | |
| "#fb9f3a" | |
| ], | |
| [ | |
| 0.8888888888888888, | |
| "#fdca26" | |
| ], | |
| [ | |
| 1, | |
| "#f0f921" | |
| ] | |
| ], | |
| "type": "histogram2dcontour" | |
| } | |
| ], | |
| "mesh3d": [ | |
| { | |
| "colorbar": { | |
| "outlinewidth": 0, | |
| "ticks": "" | |
| }, | |
| "type": "mesh3d" | |
| } | |
| ], | |
| "parcoords": [ | |
| { | |
| "line": { | |
| "colorbar": { | |
| "outlinewidth": 0, | |
| "ticks": "" | |
| } | |
| }, | |
| "type": "parcoords" | |
| } | |
| ], | |
| "pie": [ | |
| { | |
| "automargin": true, | |
| "type": "pie" | |
| } | |
| ], | |
| "scatter": [ | |
| { | |
| "fillpattern": { | |
| "fillmode": "overlay", | |
| "size": 10, | |
| "solidity": 0.2 | |
| }, | |
| "type": "scatter" | |
| } | |
| ], | |
| "scatter3d": [ | |
| { | |
| "line": { | |
| "colorbar": { | |
| "outlinewidth": 0, | |
| "ticks": "" | |
| } | |
| }, | |
| "marker": { | |
| "colorbar": { | |
| "outlinewidth": 0, | |
| "ticks": "" | |
| } | |
| }, | |
| "type": "scatter3d" | |
| } | |
| ], | |
| "scattercarpet": [ | |
| { | |
| "marker": { | |
| "colorbar": { | |
| "outlinewidth": 0, | |
| "ticks": "" | |
| } | |
| }, | |
| "type": "scattercarpet" | |
| } | |
| ], | |
| "scattergeo": [ | |
| { | |
| "marker": { | |
| "colorbar": { | |
| "outlinewidth": 0, | |
| "ticks": "" | |
| } | |
| }, | |
| "type": "scattergeo" | |
| } | |
| ], | |
| "scattergl": [ | |
| { | |
| "marker": { | |
| "colorbar": { | |
| "outlinewidth": 0, | |
| "ticks": "" | |
| } | |
| }, | |
| "type": "scattergl" | |
| } | |
| ], | |
| "scattermapbox": [ | |
| { | |
| "marker": { | |
| "colorbar": { | |
| "outlinewidth": 0, | |
| "ticks": "" | |
| } | |
| }, | |
| "type": "scattermapbox" | |
| } | |
| ], | |
| "scatterpolar": [ | |
| { | |
| "marker": { | |
| "colorbar": { | |
| "outlinewidth": 0, | |
| "ticks": "" | |
| } | |
| }, | |
| "type": "scatterpolar" | |
| } | |
| ], | |
| "scatterpolargl": [ | |
| { | |
| "marker": { | |
| "colorbar": { | |
| "outlinewidth": 0, | |
| "ticks": "" | |
| } | |
| }, | |
| "type": "scatterpolargl" | |
| } | |
| ], | |
| "scatterternary": [ | |
| { | |
| "marker": { | |
| "colorbar": { | |
| "outlinewidth": 0, | |
| "ticks": "" | |
| } | |
| }, | |
| "type": "scatterternary" | |
| } | |
| ], | |
| "surface": [ | |
| { | |
| "colorbar": { | |
| "outlinewidth": 0, | |
| "ticks": "" | |
| }, | |
| "colorscale": [ | |
| [ | |
| 0, | |
| "#0d0887" | |
| ], | |
| [ | |
| 0.1111111111111111, | |
| "#46039f" | |
| ], | |
| [ | |
| 0.2222222222222222, | |
| "#7201a8" | |
| ], | |
| [ | |
| 0.3333333333333333, | |
| "#9c179e" | |
| ], | |
| [ | |
| 0.4444444444444444, | |
| "#bd3786" | |
| ], | |
| [ | |
| 0.5555555555555556, | |
| "#d8576b" | |
| ], | |
| [ | |
| 0.6666666666666666, | |
| "#ed7953" | |
| ], | |
| [ | |
| 0.7777777777777778, | |
| "#fb9f3a" | |
| ], | |
| [ | |
| 0.8888888888888888, | |
| "#fdca26" | |
| ], | |
| [ | |
| 1, | |
| "#f0f921" | |
| ] | |
| ], | |
| "type": "surface" | |
| } | |
| ], | |
| "table": [ | |
| { | |
| "cells": { | |
| "fill": { | |
| "color": "#EBF0F8" | |
| }, | |
| "line": { | |
| "color": "white" | |
| } | |
| }, | |
| "header": { | |
| "fill": { | |
| "color": "#C8D4E3" | |
| }, | |
| "line": { | |
| "color": "white" | |
| } | |
| }, | |
| "type": "table" | |
| } | |
| ] | |
| }, | |
| "layout": { | |
| "annotationdefaults": { | |
| "arrowcolor": "#2a3f5f", | |
| "arrowhead": 0, | |
| "arrowwidth": 1 | |
| }, | |
| "autotypenumbers": "strict", | |
| "coloraxis": { | |
| "colorbar": { | |
| "outlinewidth": 0, | |
| "ticks": "" | |
| } | |
| }, | |
| "colorscale": { | |
| "diverging": [ | |
| [ | |
| 0, | |
| "#8e0152" | |
| ], | |
| [ | |
| 0.1, | |
| "#c51b7d" | |
| ], | |
| [ | |
| 0.2, | |
| "#de77ae" | |
| ], | |
| [ | |
| 0.3, | |
| "#f1b6da" | |
| ], | |
| [ | |
| 0.4, | |
| "#fde0ef" | |
| ], | |
| [ | |
| 0.5, | |
| "#f7f7f7" | |
| ], | |
| [ | |
| 0.6, | |
| "#e6f5d0" | |
| ], | |
| [ | |
| 0.7, | |
| "#b8e186" | |
| ], | |
| [ | |
| 0.8, | |
| "#7fbc41" | |
| ], | |
| [ | |
| 0.9, | |
| "#4d9221" | |
| ], | |
| [ | |
| 1, | |
| "#276419" | |
| ] | |
| ], | |
| "sequential": [ | |
| [ | |
| 0, | |
| "#0d0887" | |
| ], | |
| [ | |
| 0.1111111111111111, | |
| "#46039f" | |
| ], | |
| [ | |
| 0.2222222222222222, | |
| "#7201a8" | |
| ], | |
| [ | |
| 0.3333333333333333, | |
| "#9c179e" | |
| ], | |
| [ | |
| 0.4444444444444444, | |
| "#bd3786" | |
| ], | |
| [ | |
| 0.5555555555555556, | |
| "#d8576b" | |
| ], | |
| [ | |
| 0.6666666666666666, | |
| "#ed7953" | |
| ], | |
| [ | |
| 0.7777777777777778, | |
| "#fb9f3a" | |
| ], | |
| [ | |
| 0.8888888888888888, | |
| "#fdca26" | |
| ], | |
| [ | |
| 1, | |
| "#f0f921" | |
| ] | |
| ], | |
| "sequentialminus": [ | |
| [ | |
| 0, | |
| "#0d0887" | |
| ], | |
| [ | |
| 0.1111111111111111, | |
| "#46039f" | |
| ], | |
| [ | |
| 0.2222222222222222, | |
| "#7201a8" | |
| ], | |
| [ | |
| 0.3333333333333333, | |
| "#9c179e" | |
| ], | |
| [ | |
| 0.4444444444444444, | |
| "#bd3786" | |
| ], | |
| [ | |
| 0.5555555555555556, | |
| "#d8576b" | |
| ], | |
| [ | |
| 0.6666666666666666, | |
| "#ed7953" | |
| ], | |
| [ | |
| 0.7777777777777778, | |
| "#fb9f3a" | |
| ], | |
| [ | |
| 0.8888888888888888, | |
| "#fdca26" | |
| ], | |
| [ | |
| 1, | |
| "#f0f921" | |
| ] | |
| ] | |
| }, | |
| "colorway": [ | |
| "#636efa", | |
| "#EF553B", | |
| "#00cc96", | |
| "#ab63fa", | |
| "#FFA15A", | |
| "#19d3f3", | |
| "#FF6692", | |
| "#B6E880", | |
| "#FF97FF", | |
| "#FECB52" | |
| ], | |
| "font": { | |
| "color": "#2a3f5f" | |
| }, | |
| "geo": { | |
| "bgcolor": "white", | |
| "lakecolor": "white", | |
| "landcolor": "#E5ECF6", | |
| "showlakes": true, | |
| "showland": true, | |
| "subunitcolor": "white" | |
| }, | |
| "hoverlabel": { | |
| "align": "left" | |
| }, | |
| "hovermode": "closest", | |
| "mapbox": { | |
| "style": "light" | |
| }, | |
| "paper_bgcolor": "white", | |
| "plot_bgcolor": "#E5ECF6", | |
| "polar": { | |
| "angularaxis": { | |
| "gridcolor": "white", | |
| "linecolor": "white", | |
| "ticks": "" | |
| }, | |
| "bgcolor": "#E5ECF6", | |
| "radialaxis": { | |
| "gridcolor": "white", | |
| "linecolor": "white", | |
| "ticks": "" | |
| } | |
| }, | |
| "scene": { | |
| "xaxis": { | |
| "backgroundcolor": "#E5ECF6", | |
| "gridcolor": "white", | |
| "gridwidth": 2, | |
| "linecolor": "white", | |
| "showbackground": true, | |
| "ticks": "", | |
| "zerolinecolor": "white" | |
| }, | |
| "yaxis": { | |
| "backgroundcolor": "#E5ECF6", | |
| "gridcolor": "white", | |
| "gridwidth": 2, | |
| "linecolor": "white", | |
| "showbackground": true, | |
| "ticks": "", | |
| "zerolinecolor": "white" | |
| }, | |
| "zaxis": { | |
| "backgroundcolor": "#E5ECF6", | |
| "gridcolor": "white", | |
| "gridwidth": 2, | |
| "linecolor": "white", | |
| "showbackground": true, | |
| "ticks": "", | |
| "zerolinecolor": "white" | |
| } | |
| }, | |
| "shapedefaults": { | |
| "line": { | |
| "color": "#2a3f5f" | |
| } | |
| }, | |
| "ternary": { | |
| "aaxis": { | |
| "gridcolor": "white", | |
| "linecolor": "white", | |
| "ticks": "" | |
| }, | |
| "baxis": { | |
| "gridcolor": "white", | |
| "linecolor": "white", | |
| "ticks": "" | |
| }, | |
| "bgcolor": "#E5ECF6", | |
| "caxis": { | |
| "gridcolor": "white", | |
| "linecolor": "white", | |
| "ticks": "" | |
| } | |
| }, | |
| "title": { | |
| "x": 0.05 | |
| }, | |
| "xaxis": { | |
| "automargin": true, | |
| "gridcolor": "white", | |
| "linecolor": "white", | |
| "ticks": "", | |
| "title": { | |
| "standoff": 15 | |
| }, | |
| "zerolinecolor": "white", | |
| "zerolinewidth": 2 | |
| }, | |
| "yaxis": { | |
| "automargin": true, | |
| "gridcolor": "white", | |
| "linecolor": "white", | |
| "ticks": "", | |
| "title": { | |
| "standoff": 15 | |
| }, | |
| "zerolinecolor": "white", | |
| "zerolinewidth": 2 | |
| } | |
| } | |
| }, | |
| "title": { | |
| "text": "Regression after taking log(y)" | |
| }, | |
| "xaxis": { | |
| "anchor": "y", | |
| "domain": [ | |
| 0, | |
| 1 | |
| ], | |
| "tickmode": "array", | |
| "tickvals": [ | |
| 1921, | |
| 1931, | |
| 1941, | |
| 1951, | |
| 1961, | |
| 1971, | |
| 1981, | |
| 1991, | |
| 2001, | |
| 2011 | |
| ], | |
| "title": { | |
| "text": "year" | |
| } | |
| }, | |
| "yaxis": { | |
| "anchor": "x", | |
| "domain": [ | |
| 0, | |
| 1 | |
| ], | |
| "title": { | |
| "text": "population_million" | |
| } | |
| } | |
| } | |
| } | |
| }, | |
| "metadata": {}, | |
| "output_type": "display_data" | |
| } | |
| ], | |
| "source": [ | |
| "X, y = df[['year']], df['population_million'] # training data\n", | |
| "reg_log = LinearRegression().fit(X, np.log(y))\n", | |
| "print(reg.score(X,y), reg_log.coef_, reg_log.intercept_) # 0.9 correlation obtained is nearly perfect\n", | |
| "print('Predicted:', np.exp(reg_log.predict(pd.DataFrame({'year': [2021,2031,2041]}))))\n", | |
| "ypred = np.exp(reg_log.predict(X))\n", | |
| "#plt.scatter(df['year'], df['population_million'])\n", | |
| "fig = px.scatter(df, x='year', y='population_million', title='Regression after taking log(y)')\n", | |
| "fig.add_trace(px.line(df, x='year', y=ypred).data[0])\n", | |
| "fig.update_xaxes(tickmode='array', tickvals=df['year']) # show exact x axis labels\n", | |
| "fig.show()" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": null, | |
| "metadata": {}, | |
| "outputs": [], | |
| "source": [ | |
| "# residual plot: TODO\n" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": null, | |
| "metadata": {}, | |
| "outputs": [ | |
| { | |
| "data": { | |
| "text/html": [ | |
| "<div>\n", | |
| "<style scoped>\n", | |
| " .dataframe tbody tr th:only-of-type {\n", | |
| " vertical-align: middle;\n", | |
| " }\n", | |
| "\n", | |
| " .dataframe tbody tr th {\n", | |
| " vertical-align: top;\n", | |
| " }\n", | |
| "\n", | |
| " .dataframe thead th {\n", | |
| " text-align: right;\n", | |
| " }\n", | |
| "</style>\n", | |
| "<table border=\"1\" class=\"dataframe\">\n", | |
| " <thead>\n", | |
| " <tr style=\"text-align: right;\">\n", | |
| " <th></th>\n", | |
| " <th>0</th>\n", | |
| " <th>1</th>\n", | |
| " </tr>\n", | |
| " </thead>\n", | |
| " <tbody>\n", | |
| " <tr>\n", | |
| " <th>const</th>\n", | |
| " <td>-25209.089813</td>\n", | |
| " <td>-15527.442672</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>year</th>\n", | |
| " <td>8.201689</td>\n", | |
| " <td>13.125705</td>\n", | |
| " </tr>\n", | |
| " </tbody>\n", | |
| "</table>\n", | |
| "</div>" | |
| ], | |
| "text/plain": [ | |
| " 0 1\n", | |
| "const -25209.089813 -15527.442672\n", | |
| "year 8.201689 13.125705" | |
| ] | |
| }, | |
| "execution_count": 21, | |
| "metadata": {}, | |
| "output_type": "execute_result" | |
| } | |
| ], | |
| "source": [ | |
| "# Source: https://stackoverflow.com/a/74673133/12947681\n", | |
| "# Using statsmodels just to try to estimate confidence interval of prediction (i.e. determine upper and lower bounds of our 2021 prediction)\n", | |
| "import statsmodels.api as sm\n", | |
| "alpha = 0.05 # 95% confidence interval\n", | |
| "lr = sm.OLS(y, sm.add_constant(X)).fit()\n", | |
| "conf_interval = lr.conf_int(alpha)\n", | |
| "conf_interval" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 22, | |
| "metadata": {}, | |
| "outputs": [ | |
| { | |
| "name": "stdout", | |
| "output_type": "stream", | |
| "text": [ | |
| "\u001b[0;31mType:\u001b[0m RegressionResultsWrapper\n", | |
| "\u001b[0;31mString form:\u001b[0m <statsmodels.regression.linear_model.RegressionResultsWrapper object at 0x7f99ea874ec0>\n", | |
| "\u001b[0;31mFile:\u001b[0m ~/.local/lib/python3.13/site-packages/statsmodels/regression/linear_model.py\n", | |
| "\u001b[0;31mDocstring:\u001b[0m \n", | |
| "Results class for for an OLS model.\n", | |
| "\n", | |
| "Parameters\n", | |
| "----------\n", | |
| "model : RegressionModel\n", | |
| " The regression model instance.\n", | |
| "params : ndarray\n", | |
| " The estimated parameters.\n", | |
| "normalized_cov_params : ndarray\n", | |
| " The normalized covariance parameters.\n", | |
| "scale : float\n", | |
| " The estimated scale of the residuals.\n", | |
| "cov_type : str\n", | |
| " The covariance estimator used in the results.\n", | |
| "cov_kwds : dict\n", | |
| " Additional keywords used in the covariance specification.\n", | |
| "use_t : bool\n", | |
| " Flag indicating to use the Student's t in inference.\n", | |
| "**kwargs\n", | |
| " Additional keyword arguments used to initialize the results.\n", | |
| "\n", | |
| "See Also\n", | |
| "--------\n", | |
| "RegressionResults\n", | |
| " Results store for WLS and GLW models.\n", | |
| "\n", | |
| "Notes\n", | |
| "-----\n", | |
| "Most of the methods and attributes are inherited from RegressionResults.\n", | |
| "The special methods that are only available for OLS are:\n", | |
| "\n", | |
| "- get_influence\n", | |
| "- outlier_test\n", | |
| "- el_test\n", | |
| "- conf_int_el\n", | |
| "\u001b[0;31mClass docstring:\u001b[0m\n", | |
| "Class which wraps a statsmodels estimation Results class and steps in to\n", | |
| "reattach metadata to results (if available)" | |
| ] | |
| } | |
| ], | |
| "source": [ | |
| "lr?" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 23, | |
| "metadata": {}, | |
| "outputs": [ | |
| { | |
| "name": "stdout", | |
| "output_type": "stream", | |
| "text": [ | |
| "\u001b[0;31mType:\u001b[0m OLS\n", | |
| "\u001b[0;31mString form:\u001b[0m <statsmodels.regression.linear_model.OLS object at 0x7f99ea874ad0>\n", | |
| "\u001b[0;31mFile:\u001b[0m ~/.local/lib/python3.13/site-packages/statsmodels/regression/linear_model.py\n", | |
| "\u001b[0;31mDocstring:\u001b[0m \n", | |
| "Ordinary Least Squares\n", | |
| "\n", | |
| "Parameters\n", | |
| "----------\n", | |
| "endog : array_like\n", | |
| " A 1-d endogenous response variable. The dependent variable.\n", | |
| "exog : array_like\n", | |
| " A nobs x k array where `nobs` is the number of observations and `k`\n", | |
| " is the number of regressors. An intercept is not included by default\n", | |
| " and should be added by the user. See\n", | |
| " :func:`statsmodels.tools.add_constant`.\n", | |
| "missing : str\n", | |
| " Available options are 'none', 'drop', and 'raise'. If 'none', no nan\n", | |
| " checking is done. If 'drop', any observations with nans are dropped.\n", | |
| " If 'raise', an error is raised. Default is 'none'.\n", | |
| "hasconst : None or bool\n", | |
| " Indicates whether the RHS includes a user-supplied constant. If True,\n", | |
| " a constant is not checked for and k_constant is set to 1 and all\n", | |
| " result statistics are calculated as if a constant is present. If\n", | |
| " False, a constant is not checked for and k_constant is set to 0.\n", | |
| "**kwargs\n", | |
| " Extra arguments that are used to set model properties when using the\n", | |
| " formula interface.\n", | |
| "\n", | |
| "Attributes\n", | |
| "----------\n", | |
| "weights : scalar\n", | |
| " Has an attribute weights = array(1.0) due to inheritance from WLS.\n", | |
| "\n", | |
| "See Also\n", | |
| "--------\n", | |
| "WLS : Fit a linear model using Weighted Least Squares.\n", | |
| "GLS : Fit a linear model using Generalized Least Squares.\n", | |
| "\n", | |
| "Notes\n", | |
| "-----\n", | |
| "No constant is added by the model unless you are using formulas.\n", | |
| "\n", | |
| "Examples\n", | |
| "--------\n", | |
| ">>> import statsmodels.api as sm\n", | |
| ">>> import numpy as np\n", | |
| ">>> duncan_prestige = sm.datasets.get_rdataset(\"Duncan\", \"carData\")\n", | |
| ">>> Y = duncan_prestige.data['income']\n", | |
| ">>> X = duncan_prestige.data['education']\n", | |
| ">>> X = sm.add_constant(X)\n", | |
| ">>> model = sm.OLS(Y,X)\n", | |
| ">>> results = model.fit()\n", | |
| ">>> results.params\n", | |
| "const 10.603498\n", | |
| "education 0.594859\n", | |
| "dtype: float64\n", | |
| "\n", | |
| ">>> results.tvalues\n", | |
| "const 2.039813\n", | |
| "education 6.892802\n", | |
| "dtype: float64\n", | |
| "\n", | |
| ">>> print(results.t_test([1, 0]))\n", | |
| " Test for Constraints\n", | |
| "==============================================================================\n", | |
| " coef std err t P>|t| [0.025 0.975]\n", | |
| "------------------------------------------------------------------------------\n", | |
| "c0 10.6035 5.198 2.040 0.048 0.120 21.087\n", | |
| "==============================================================================\n", | |
| "\n", | |
| ">>> print(results.f_test(np.identity(2)))\n", | |
| "<F test: F=array([[159.63031026]]), p=1.2607168903696672e-20,\n", | |
| " df_denom=43, df_num=2>" | |
| ] | |
| } | |
| ], | |
| "source": [ | |
| "lr.model?" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": null, | |
| "metadata": {}, | |
| "outputs": [], | |
| "source": [] | |
| } | |
| ], | |
| "metadata": { | |
| "kernelspec": { | |
| "display_name": "tensorflow_py312", | |
| "language": "python", | |
| "name": "python3" | |
| }, | |
| "language_info": { | |
| "codemirror_mode": { | |
| "name": "ipython", | |
| "version": 3 | |
| }, | |
| "file_extension": ".py", | |
| "mimetype": "text/x-python", | |
| "name": "python", | |
| "nbconvert_exporter": "python", | |
| "pygments_lexer": "ipython3", | |
| "version": "3.12.7" | |
| } | |
| }, | |
| "nbformat": 4, | |
| "nbformat_minor": 2 | |
| } |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment