-
-
Save NikolayIT/d86118a3a0cb3f5ed63d674a350d75f2 to your computer and use it in GitHub Desktop.
namespace LinearRegression | |
{ | |
using System; | |
using System.Diagnostics; | |
public static class Program | |
{ | |
public static void Main() | |
{ | |
var xValues = new double[] | |
{ | |
1990, 1991, 1992, 1993, 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001, 2002, 2003, 2004, | |
2005, 2006, 2007, 2008, 2009 | |
}; | |
var yValues = new double[] | |
{ | |
8669269, 8595500, 8484900, 8459800, 8427400, 8384700, 8340900, 8283200, 8230400, 8190900, | |
8149468, 7932984, 7845841, 7801273, 7761049, 7720000, 7679290, 7640238, 7606551, | |
7563710 | |
}; | |
double rSquared, intercept, slope; | |
LinearRegression(xValues, yValues, out rSquared, out intercept, out slope); | |
Console.WriteLine($"R-squared = {rSquared}"); | |
Console.WriteLine($"Intercept = {intercept}"); | |
Console.WriteLine($"Slope = {slope}"); | |
var predictedValue = (slope * 2017) + intercept; | |
Console.WriteLine($"Prediction for 2017: {predictedValue}"); | |
} | |
/// <summary> | |
/// Fits a line to a collection of (x,y) points. | |
/// </summary> | |
/// <param name="xVals">The x-axis values.</param> | |
/// <param name="yVals">The y-axis values.</param> | |
/// <param name="rSquared">The r^2 value of the line.</param> | |
/// <param name="yIntercept">The y-intercept value of the line (i.e. y = ax + b, yIntercept is b).</param> | |
/// <param name="slope">The slop of the line (i.e. y = ax + b, slope is a).</param> | |
public static void LinearRegression( | |
double[] xVals, | |
double[] yVals, | |
out double rSquared, | |
out double yIntercept, | |
out double slope) | |
{ | |
if (xVals.Length != yVals.Length) | |
{ | |
throw new Exception("Input values should be with the same length."); | |
} | |
double sumOfX = 0; | |
double sumOfY = 0; | |
double sumOfXSq = 0; | |
double sumOfYSq = 0; | |
double sumCodeviates = 0; | |
for (var i = 0; i < xVals.Length; i++) | |
{ | |
var x = xVals[i]; | |
var y = yVals[i]; | |
sumCodeviates += x * y; | |
sumOfX += x; | |
sumOfY += y; | |
sumOfXSq += x * x; | |
sumOfYSq += y * y; | |
} | |
var count = xVals.Length; | |
var ssX = sumOfXSq - ((sumOfX * sumOfX) / count); | |
var ssY = sumOfYSq - ((sumOfY * sumOfY) / count); | |
var rNumerator = (count * sumCodeviates) - (sumOfX * sumOfY); | |
var rDenom = (count * sumOfXSq - (sumOfX * sumOfX)) * (count * sumOfYSq - (sumOfY * sumOfY)); | |
var sCo = sumCodeviates - ((sumOfX * sumOfY) / count); | |
var meanX = sumOfX / count; | |
var meanY = sumOfY / count; | |
var dblR = rNumerator / Math.Sqrt(rDenom); | |
rSquared = dblR * dblR; | |
yIntercept = meanY - ((sCo / ssX) * meanX); | |
slope = sCo / ssX; | |
} | |
} | |
} |
Extremely hard to understand, may you add comments please?
Thanks a lot!
@SIMOMEGA, its fairly standard concepts for statistics, I'm not sure how much documentation really can be added to the code here. This might help even though it has lots of math notation: https://en.wikipedia.org/wiki/Simple_linear_regression#Numerical_example
For my usage I did quickly convert it to returning an object which I thought I'd share:
public class LinearRegressionComponents
{
/// <summary>The r^2 value of the line. Used to give an idea of the accuracy given the input values</summary>
public double rSquared { get; set; }
/// <summary>The y-intercept value of the line (i.e. y = ax + b, yIntercept is b).</summary>
public double yIntercept { get; set; }
/// <summary>The slop of the line (i.e. y = ax + b, slope is a).</summary>
public double slope { get; set; }
public double CalculatePrediction(double input)
{
return (input * slope) + yIntercept;
}
}
Cheers for the code snipet. The "ssY" variable does not seem to be used anywhere?
under what license is this code published here? can I use it, commercially?
@manscrober I give you permission to use it whatever you want.
Thank you. Very simple to implement.
Thank you! Is the code free for commercial use available to all? If not can I use it too?
Yes everyone can use it. It can be considered as having MIT license.
Thank you very much!
HI @NikolayIT
i noticed that the var ssY is not used is there a reason for this
@dittytwo Yes you are right. Its not needed
Thanks a lot! :)
If anyone wants to calculate the sample standard deviation (sigma) with this, just put this at the bottom and also add it as another out parameter: stdDev = Math.Sqrt(ssY / (count - 1));
Hi,
If we removed the last value of each of the above arrays (2009 & 7563710).
What formulae would I use to calculate the x-result (approx. 2009) when the y-input is 7563710?
This Version will work but it has some unnecessary calculations as well as too much Division. For example we are computing sumOfX * sumOfY in order to get therDenom but then we are computing sumOfX * sumOfY and storing it in the intermediate variable sCo.
A more readable way to do this with increased performance is to compute the sum of the codeviates while we are in the loop so that we can get the the sum of squares first then store them in intermediary variables. this allows us to get the answer without recalculating the exponents we can also avoid the overhead of a function call to the Math Library.
The time Complexity is the same in theory but in practice the code below is significantly faster.
``
public static void LinearRegression(float[] xVals, float[] yVals, out double rSquared, out double yIntercept, out double slope)
{
double sumOfX = 0;
double sumOfY = 0,
double sumOfXSq = 0,
double sumOfYSq = 0,
double sumCodeviates = 0;
int count = xVals.Length;
// Compute all required sums in a single loop
for (var i = 0; i < count; i++)
{
var x = xVals[i];
var y = yVals[i];
sumOfX += x;
sumOfY += y;
sumOfXSq += x * x;
sumOfYSq += y * y;
sumCodeviates += x * y;
}
// Precompute the reciprocal of count
double reciprocalCount = 1.0 / count;
// Precompute means using the reciprocal
double meanX = sumOfX * reciprocalCount;
double meanY = sumOfY * reciprocalCount;
// Precompute terms for covariance and variance
double ssX = sumOfXSq - (sumOfX * meanX); // Variance of X
double ssY = sumOfYSq - (sumOfY * meanY); // Variance of Y
double covariance = sumCodeviates - meanY * sumOfX; // Covariance of X and Y
// Precompute reciprocals
double reciprocalSSX = 1.0 / ssX;
// Compute slope and y-intercept
slope = covariance * reciprocalSSX;
yIntercept = meanY - slope * meanX;
// Compute r-squared using precomputed reciprocals
rSquared = covariance * covariance * reciprocalSSX / ssY;
}
``
30% speed increase on 12 Core AMD Ryzen.
Thank you for this code!!