Last active
December 27, 2022 18:46
-
-
Save ftfarias/a4dae91f5374db3d9b35bf9a71afd4cc to your computer and use it in GitHub Desktop.
how to do a linear regression in SQL
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
drop table if exists linear_test; | |
create table linear_test ( | |
x numeric(19,4), | |
y numeric(19,4), | |
workspace_id text | |
); | |
insert into linear_test (x,y, workspace_id) values (1, 10, 'a'), (2, 20, 'a'), (3, 30, 'a'), (4, 40, 'a'), (5, 50, 'a'); | |
insert into linear_test (x,y, workspace_id) values (5, 10, 'b'), (6, 20, 'b'), (7, 30, 'b'), (8, 40, 'b'), (9, 50, 'b'); | |
insert into linear_test (x,y, workspace_id) values (1, -1, 'c'), (2, -2, 'c'), (3, -3, 'c'), (4, -4, 'c'), (5, -5, 'c'); | |
select * from linear_test; | |
with base_table as ( | |
select | |
workspace_id, | |
x as x_axis, | |
y | |
from linear_test | |
), | |
sums_table as (select distinct | |
workspace_id, | |
sum(x_axis) over (partition by workspace_id) as sum_x, | |
sum(x_axis * x_axis) over (partition by workspace_id) as sum_x2, | |
sum(y) over (partition by workspace_id) as sum_y, | |
sum(x_axis * y) over (partition by workspace_id) as sum_x_y, | |
COUNT(x_axis) over (partition by workspace_id) as num_rows | |
from base_table | |
) | |
select | |
workspace_id, | |
num_rows, | |
sum_x, | |
sum_x2, | |
sum_y, | |
sum_x_y, | |
((sum_y * sum_x2) - (sum_x * sum_x_y)) / nullif((num_rows * sum_x2) - (sum_x * sum_x),0) as a_order_count, | |
((num_rows * sum_x_y ) - (sum_x * sum_y )) / nullif((num_rows * sum_x2) - (sum_x * sum_x),0) as b_order_count | |
from sums_table | |
order by workspace_id |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment