Created
September 9, 2018 03:47
-
-
Save demacdolincoln/2a35ccb09db5f3d43ddccc1d38763278 to your computer and use it in GitHub Desktop.
simples regressão linear em Julia
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
{ | |
"cells": [ | |
{ | |
"cell_type": "code", | |
"execution_count": 1, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"using DataFrames, CSV, Query" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 2, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"df = CSV.read(\"co2_por_nacao.csv\");" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 3, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/html": [ | |
"<table class=\"data-frame\"><thead><tr><th></th><th>Nation</th><th>Year</th><th>Total CO2 emissions from fossil-fuels and cement production (thousand metric tons of C)</th><th>Emissions from solid fuel consumption</th><th>Emissions from liquid fuel consumption</th><th>Emissions from gas fuel consumption</th><th>Emissions from cement production</th><th>Emissions from gas flaring</th><th>Per capita CO2 emissions (metric tons of carbon)</th><th>Emissions from bunker fuels (not included in the totals)</th></tr></thead><tbody><tr><th>1</th><td>AFGHANISTAN</td><td>1949</td><td>4</td><td>4</td><td>0</td><td>0</td><td>0</td><td>missing</td><td>missing</td><td>0</td></tr><tr><th>2</th><td>AFGHANISTAN</td><td>1950</td><td>23</td><td>6</td><td>18</td><td>0</td><td>0</td><td>0</td><td>0.0</td><td>0</td></tr><tr><th>3</th><td>AFGHANISTAN</td><td>1951</td><td>25</td><td>7</td><td>18</td><td>0</td><td>0</td><td>0</td><td>0.0</td><td>0</td></tr><tr><th>4</th><td>AFGHANISTAN</td><td>1952</td><td>25</td><td>9</td><td>17</td><td>0</td><td>0</td><td>0</td><td>0.0</td><td>0</td></tr><tr><th>5</th><td>AFGHANISTAN</td><td>1953</td><td>29</td><td>10</td><td>18</td><td>0</td><td>0</td><td>0</td><td>0.0</td><td>0</td></tr><tr><th>6</th><td>AFGHANISTAN</td><td>1954</td><td>29</td><td>12</td><td>18</td><td>0</td><td>0</td><td>0</td><td>0.0</td><td>0</td></tr></tbody></table>" | |
], | |
"text/plain": [ | |
"6×10 DataFrame. Omitted printing of 8 columns\n", | |
"│ Row │ Nation │ Year │\n", | |
"├─────┼─────────────┼──────┤\n", | |
"│ 1 │ AFGHANISTAN │ 1949 │\n", | |
"│ 2 │ AFGHANISTAN │ 1950 │\n", | |
"│ 3 │ AFGHANISTAN │ 1951 │\n", | |
"│ 4 │ AFGHANISTAN │ 1952 │\n", | |
"│ 5 │ AFGHANISTAN │ 1953 │\n", | |
"│ 6 │ AFGHANISTAN │ 1954 │" | |
] | |
}, | |
"execution_count": 3, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"head(df)" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 4, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"col = Symbol(\"Total CO2 emissions from fossil-fuels and cement production (thousand metric tons of C)\")\n", | |
"nation_str = \"JAPAN\";" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 5, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"nation_df = @from i in df begin\n", | |
" @where i.Nation == nation_str\n", | |
" @select i\n", | |
" @collect DataFrame\n", | |
"end\n", | |
";" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 6, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/html": [ | |
"<table class=\"data-frame\"><thead><tr><th></th><th>variable</th><th>mean</th><th>min</th><th>median</th><th>max</th><th>nunique</th><th>nmissing</th><th>eltype</th></tr></thead><tbody><tr><th>1</th><td>Nation</td><td></td><td>JAPAN</td><td></td><td>JAPAN</td><td>1</td><td>0</td><td>CategoricalString{UInt32}</td></tr><tr><th>2</th><td>Year</td><td>1981.5</td><td>1950</td><td>1981.5</td><td>2013</td><td></td><td>0</td><td>Int64</td></tr><tr><th>3</th><td>Total CO2 emissions from fossil-fuels and cement production (thousand metric tons of C)</td><td>2.22702e5</td><td>27991</td><td>249744.0</td><td>345244</td><td></td><td>0</td><td>Int64</td></tr><tr><th>4</th><td>Emissions from solid fuel consumption</td><td>70233.7</td><td>26127</td><td>64468.5</td><td>125349</td><td></td><td>0</td><td>Int64</td></tr><tr><th>5</th><td>Emissions from liquid fuel consumption</td><td>1.24351e5</td><td>1220</td><td>1.53707e5</td><td>189585</td><td></td><td>0</td><td>Int64</td></tr><tr><th>6</th><td>Emissions from gas fuel consumption</td><td>20150.4</td><td>37</td><td>13419.5</td><td>66389</td><td></td><td>0</td><td>Int64</td></tr><tr><th>7</th><td>Emissions from cement production</td><td>7961.31</td><td>607</td><td>9275.0</td><td>12851</td><td></td><td>0</td><td>Int64</td></tr><tr><th>8</th><td>Emissions from gas flaring</td><td>5.6875</td><td>0</td><td>0.0</td><td>30</td><td></td><td>0</td><td>Int64</td></tr><tr><th>9</th><td>Per capita CO2 emissions (metric tons of carbon)</td><td>1.89266</td><td>0.34</td><td>2.195</td><td>2.73</td><td></td><td>0</td><td>Float64</td></tr><tr><th>10</th><td>Emissions from bunker fuels (not included in the totals)</td><td>8173.27</td><td>429</td><td>8651.5</td><td>18070</td><td></td><td>0</td><td>Int64</td></tr></tbody></table>" | |
], | |
"text/plain": [ | |
"10×8 DataFrame. Omitted printing of 8 columns\n", | |
"│ Row │ ├─────┼\n", | |
"│ 1 │ │ 2 │ │ 3 │ │ 4 │ │ 5 │ │ 6 │ │ 7 │ │ 8 │ │ 9 │ │ 10 │ " | |
] | |
}, | |
"execution_count": 6, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"describe(nation_df)" | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": {}, | |
"source": [ | |
"# calculando os coeficientes para regressão linear:\n", | |
"\n", | |
"$$ \\beta = \\frac{\\sum^n_{n=1} x_iy_i - \\frac{1}{n} \\sum^n_{i=1} x_i \\sum^n_{j=1} y_i}{\\sum^n_{i=1} (x^2_i) - \\frac{1}{n}(\\sum^n_{i=1} x_i)^2} $$\n", | |
"\n", | |
"$$ \\alpha = \\frac{\\sum^n_{n=1} y_i}{n} - \\beta \\frac{\\sum^n_{i=1} x_i}{n} $$\n", | |
"\n", | |
"$$ f(x) = \\alpha + \\beta x$$\n", | |
"\n", | |
"*fonte: https://pt.wikipedia.org/wiki/Regress%C3%A3o_linear_simples*" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 7, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"x, y = nation_df[:Year], nation_df[col]\n", | |
"n = length(x)\n", | |
";" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 8, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/plain": [ | |
"5492.159088827839" | |
] | |
}, | |
"execution_count": 8, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"β = (sum(x.*y) -1/n * sum(x) * sum(y) ) / (sum(x.^2) -1/n * sum(x)^2)" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 9, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/plain": [ | |
"-1.0660010843887363e7" | |
] | |
}, | |
"execution_count": 9, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"α = sum(y)/n - β * sum(x)/n" | |
] | |
} | |
], | |
"metadata": { | |
"kernelspec": { | |
"display_name": "Julia 1.0.0", | |
"language": "julia", | |
"name": "julia-1.0" | |
}, | |
"language_info": { | |
"file_extension": ".jl", | |
"mimetype": "application/julia", | |
"name": "julia", | |
"version": "1.0.0" | |
} | |
}, | |
"nbformat": 4, | |
"nbformat_minor": 2 | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment