Created
January 22, 2016 19:42
-
-
Save sudipto80/3b99f6bbe9b21b76386d to your computer and use it in GitHub Desktop.
Linear Regression
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#load "...\packages\MathNet.Numerics.FSharp.3.10.0\MathNet.Numerics.fsx" | |
open MathNet.Numerics.LinearAlgebra | |
open System.IO | |
let velocities = vector[23.;4.;5.;2.] | |
//let y = matrix [[1.;3.] | |
// [1.;5.] | |
// [1.;4.]] | |
//Loading values of the csv file and generating a dense matrix | |
//Please modify the file path to point it in your local disc | |
let rows = File.ReadAllLines("C:\\mpg.csv") | |
|> Array.map ( fun t -> t.Split(',') | |
|> Array.map(fun t -> float t)) | |
let mpgData = DenseMatrix.ofRowArrays rows | |
//let myMat = matrix [[1. ;2.; 3.] | |
// [4. ;5.; 2.] | |
// [7. ;0.8; 9.]] | |
// | |
//let myMat' = myMat.Inverse() | |
//let myMat = matrix [[1.;2.;3.] | |
// [4.;5.;2.] | |
// [7.;0.8;9.]] | |
// | |
//let myMat' = myMat.Inverse() | |
//let myMat = matrix [[1. ;2.; 3.] | |
// [4. ;5.; 2.] | |
// [7. ;0.8; 9.]] | |
// | |
//let myMatTrace = myMat.Trace() | |
let myMat = matrix [[1.0;2.0;3.0] | |
[4.0;5.0;2.0] | |
[7.0;0.8;9.0]] | |
let qr = myMat.QR() | |
let svdR = myMat.Svd(true) | |
//Gets the singular values of matrix in ascending value. | |
let s= svdR.S | |
//Gets the transpose right singular vectors | |
//(transpose of V, an n-by-n unitary matrix) | |
let v' = svdR.VT | |
//Gets the left singular vectors (U - m-by-m unitary matrix) | |
let u = svdR.U | |
//Returns the singular values as a diagonal Matrix<T>. | |
let w = svdR.W | |
//Generating the original matrix again | |
let myMatSVD = u*w*v' | |
printfn "%A" myMatSVD | |
////Represent the number of Disk-I/Os | |
//let x = [14;16;27;42;39;50;83] | |
////Represent the time processor takes | |
//let y = [02;05;07;09;10;13;20] | |
//Locate the file "FsPlotBootstrap.fsx" and provide that path here | |
#load "..\packages\FsPlot.0.6.6\FsPlotBootstrap.fsx" | |
open System | |
open FsPlot.Data | |
open FsPlot.Highcharts.Charting | |
let x = [14;16;27;42;39;50;83] | |
let y = [02;05;07;09;10;13;20] | |
let y' = [3;4;5;7;23;21;34] | |
//Here you shall be using the values of b0 and b1 calculated before | |
//let b0 = -0.00828236493374135 | |
//let b1 = 0.243756371049949 | |
let regressionPairs = x |> List.map ( fun xElem -> (xElem, b0 + b1* float xElem )) | |
let pairs = List.zip x y | |
let scatter = Series.Scatter pairs | |
let regressionLine = Series.Line regressionPairs | |
let chart = | |
[scatter;regressionLine] | |
|> Chart.Combine | |
|> Chart.WithNames ["Actual data"; "Regression Line"] | |
|> Chart.WithTitle "Processor Time and Disk I/O" | |
|> Chart.WithLegend true | |
open MathNet.Numerics.LinearRegression | |
open MathNet.Numerics.Fit | |
open MathNet.Numerics.LinearAlgebra | |
let xV = [|14.;16.;27.;42.;39.;50.;83.|] | |
let yV = [|02.;05.;07.;09.;10.;13.;20.|] | |
let (b0,b1) = SimpleRegression.Fit(xV,yV) | |
let genRandomTemps count = | |
let rnd = System.Random() | |
List.init count (fun _ -> rnd.Next (40,100)) | |
let temps = genRandomTemps 50 | |
let t_d = 19 | |
let RH_Formula = temps |> List.map ( fun t -> float ((100 - 5 * ( t - t_d)))) | |
|> List.toArray | |
let temp_Array = temps|>List.map ( fun t -> float t) | |
|>List.toArray | |
let from_Formula = Array.zip temp_Array RH_Formula | |
let (rhB0, rhB1) = SimpleRegression.Fit from_Formula | |
let regressionPairs = temp_Array |> Array.map ( fun t -> (t, rhB0 + rhB1* t )) | |
let formulaSpots = Series.Scatter from_Formula | |
let regressionLine = Series.Line regressionPairs | |
let chart = | |
[formulaSpots;regressionLine] | |
|> Chart.Combine | |
|> Chart.WithNames ["Actual data"; "Regression Line"] | |
|> Chart.WithTitle "Predicting Relative Humidity" | |
|> Chart.WithLegend true | |
// | |
//Here we have the predictor variables | |
let X = matrix[[1. ;2.; 3.] | |
[4. ;5.; 2.] | |
[7. ;0.8; 9.]] | |
//This is the new set of predictor values for an observation | |
let X_unseen = vector [4.;5.;1.89] | |
//These are the values for the set of observations seen | |
let Y = matrix[[3.] | |
[4.] | |
[5.]] | |
//Calculating theta from the above formula | |
let theta = ((X.Transpose() * X).Inverse() * X.Transpose()) * Y | |
//Calculating the prediction for the new Y for the new set of | |
//predictor values. | |
let predicted_Y = theta.Transpose() * X_unseen | |
// | |
let rows = File.ReadAllLines("C:\\mpgdata.csv") | |
|> Array.map ( fun t -> t.Split(',')|> Array.toSeq |> Seq.take 6 | |
|> Seq.toArray | |
|> Array.map(fun t -> float t)) | |
|> Array.toSeq | |
|> Seq.take 350 | |
|> Seq.toArray | |
//A matrix is created with all the numeric columns and 350 rows | |
let created1 = DenseMatrix.ofRowArrays rows | |
//Values for the predicted variable is extracted. | |
let milesPerGallon = created1.Column(0) | |
//After extracting the predicted column let's remove it | |
//to get the matrix to calculate the theta | |
let created2 = created1.RemoveColumn(0) | |
//Storing predicted values in another variable | |
let Y_MPG = milesPerGallon | |
//Calculating Theta as per the given formula | |
let Theta_MPG = (created2.Transpose() * created2).Inverse() | |
* created2.Transpose() * milesPerGallon | |
//Details on an unknown car | |
let unknownCarDetails = vector [4.;140.;90.;2264.;15.5] | |
//Calculating the predicted mpg value of the new unknown car | |
let predictedMPG = Theta_MPG * unknownCarDetails | |
// | |
let m = matrix[[2.;3.;4.;5.] | |
[4.;55.;2.;4.] | |
[3.;4.;2.;3.] | |
[2.;4.;2.;1.]] | |
let x_n = vector[1.;2.2;31.;4.1] | |
//the weight matrix. The following line has to be in a single line | |
let W = DiagonalMatrix.ofDiagArray[|for i in 0 .. 3 -> (m.Row i).Subtract(x_n).L2Norm()|] | |
// | |
//Experiment with several values of tau | |
let tau = 1. | |
//the weight matrix | |
let W = DiagonalMatrix.ofDiagArray[|for i in 0 .. 3 -> (m.Row i).Subtract(x_n).L2Norm() / (2.*tau**2.)|] | |
// | |
let unknownCarDetails = vector [4.;140.;90.;2264.;15.5] | |
let values = [|for i in 0 .. 349 -> (created2.Row i).Subtract(unknownCarDetails).L2Norm() / (2.*tau**2.)|] | |
let Weights_MPG = DiagonalMatrix.ofDiagArray values | |
let Theta_MPG = (created2.Transpose() * Weights_MPG * created2).Inverse() | |
* created2.Transpose() * Weights_MPG * milesPerGallon | |
let predictedMPG = Theta_MPG * unknownCarDetails | |
//--- | |
let mpgPairs = [|for i in 0 .. 349 -> (i, milesPerGallon.At(i))|] | |
let predictedMPGPairs = [|for i in 0 .. 349 -> (i, Theta_MPG* created2.Row(i))|] | |
let scatterMPG = Series.Scatter mpgPairs | |
let linearRegSpline = Series.Line predictedMPGPairs | |
let chartMPG = | |
[scatterMPG;linearRegSpline] | |
|> Chart.Combine | |
|> Chart.WithNames ["Actual data"; "Multiple Regression Line"] | |
|> Chart.WithTitle "Miles per gallon prediction using Multiple Linear Regression" | |
|> Chart.WithLegend true | |
let mpgResiduals = [|for i in 0 .. 349 ->(milesPerGallon.At(i), | |
Theta_MPG* created2.Row(i), | |
milesPerGallon.At(i)-Theta_MPG* created2.Row(i))|] | |
|> Seq.ofArray | |
|> Seq.take 5 | |
|> Seq.toArray | |
//// | |
let mpgResidualPairs = Series.Column [|for i in 0 .. 349 -> (i,abs ( milesPerGallon.At(i) - Theta_MPG* created2.Row(i)))|] | |
let predictedMPGPairs = Series.Line [|for i in 0 .. 349 -> (i, Theta_MPG* created2.Row(i))|] | |
let actualMPGRecords = Series.Spline [|for i in 0 .. 349 -> (i, milesPerGallon.At(i))|] | |
let scatterMPG = Series.Scatter mpgPairs | |
let chartMPGResidue = | |
[mpgResidualPairs;scatterMPG;predictedMPGPairs] | |
|> Chart.Combine | |
|> Chart.WithNames ["Residuals";"Actual data"; "Multiple Regression Line"] | |
|> Chart.WithTitle "Miles per gallon prediction using Multiple Linear Regression" | |
|> Chart.WithLegend true | |
// | |
//Let's say we have details about several houses | |
//With "bedrooms","Area","Bathroom" count as listed below | |
let houseDetails = matrix[[3.5;4000.;3.] | |
[5.;4542.;3.] | |
[3.;2545.;4.] | |
[2.;1150.;2.] | |
[2.;1220.;2.] | |
[1.;734.;1.]] | |
let lambda = 11. | |
let newHouseDetails = vector[1.;750.;1.] | |
let prices = vector[3400.;2102.;1334.;3432.;5342.;782.;] | |
let I = DenseMatrix.identity<float> houseDetails.ColumnCount | |
let ridgeRegressionTheta = (houseDetails.Transpose() * houseDetails | |
+ lambda * I).Inverse() | |
* houseDetails.Transpose() | |
* prices | |
let newHousePredictedPrice = newHouseDetails * ridgeRegressionTheta | |
//// | |
//Locate these files and provide correct paths to all these files | |
#r @"...\packages\Accord.2.15.0\lib\net45\Accord.dll" | |
#r @"...\packages\Accord.Math.2.15.0\lib\net45\Accord.Math.dll" | |
#r @"...\packages\Accord.Statistics.2.15.0\lib\net45\Accord.Statistics.dll" | |
#r @"...\packages\MathNet.Numerics.FSharp.3.10.0\lib\net40\MathNet.Numerics.FSharp.dll" | |
#r @"...\packages\MathNet.Numerics.3.10.0\lib\net40\MathNet.Numerics.dll" | |
#load "...\packages\MathNet.Numerics.FSharp.3.10.0\MathNet.Numerics.fsx" | |
open Accord.Statistics | |
open Accord.Statistics.Models.Regression.Linear | |
open MathNet.Numerics.LinearRegression | |
open MathNet.Numerics.LinearAlgebra | |
open MathNet.Numerics.LinearAlgebra.Double | |
open MathNet.Numerics | |
//Input set of values | |
let inputs = [|[|1.;1.;1.|];[|2.;1.;1.|];[|3.;1.;1.|]|] | |
//Output for | |
let outputs = [|[|2.;3.|];[|4.;6.|];[|6.;9.|]|] | |
//This is a regression that takes a input variable set of 3 | |
values | |
//each and projects the result to a two variable output. | |
//Thus we need a 3 x 2 regression model | |
let regression = new MultivariateLinearRegression (3, 2); | |
let error = regression.Regress (inputs, outputs) | |
printfn "%A" regression.Coefficients | |
//Let's say we have a new set of values as per the given data | |
let newInput = DenseMatrix.OfColumns [[2.4;1.2;1.4]] | |
//Creating a theta from this coefficinets | |
let theta = DenseMatrix.OfArray regression.Coefficients | |
//Calculating the predicted value for this new input set. | |
let newOutputs = theta.Transpose() * newInput | |
//feature scaling | |
let avgBedRooms = houseDetails.Column 0 |> Seq.average | |
let avgArea = houseDetails.Column 1 |> Seq.average | |
let avgBathRooms = houseDetails.Column 2 |> Seq.average | |
let rangeBedRooms = (houseDetails.Column 0 |> Seq.max) - (houseDetails.Column 0 |> Seq.min) | |
let rangeArea = (houseDetails.Column 1 |> Seq.max) - (houseDetails.Column 1 |> Seq.min) | |
let rangeBathRooms = (houseDetails.Column 2 |> Seq.max) - (houseDetails.Column 2 |> Seq.min) | |
//However, you can't do this for a very large matrix. | |
//So the following code does that programmatically for matrix of any size: | |
//This method performs feature scaling for all the columns | |
let scaleFeatures (avgs: float []) (ranges: float []) (column : Vector<float>) = | |
for i in 0 .. avgs.Length - 1 do | |
column.Storage.At(i,(column.Storage.At(i)- avgs.[i])/ranges.[i]) | |
column | |
//Finding averages for all columns | |
let allAvgs = [|for i in 0 .. houseDetails.ColumnCount - 1 | |
-> houseDetails.Column i |> Seq.average|] | |
//Finding ranges for all columns | |
let allRanges = [|for i in 0 .. houseDetails.ColumnCount - 1 -> | |
(houseDetails.Column i |> Seq.max) - (houseDetails.Column i |> Seq.min)|] | |
let allColumns = [for i in 0 .. houseDetails.ColumnCount - 1 -> | |
(houseDetails.Column i) ] | |
//Scaled Column values | |
let scaledColumns = allColumns | |
|> List.map ( fun column -> scaleFeatures allAvgs allRanges column) | |
//Creating a matrix from scaled values. | |
let scaledHouseDetails = DenseMatrix.ofColumns scaledColumns |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment