Created
October 18, 2015 15:38
-
-
Save MartinBodocky/0ce5580e5a312b465b9b to your computer and use it in GitHub Desktop.
PCA analysis on more real data with Accord.Net with Deedle and FSharp.Charting.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
// data from http://archive.ics.uci.edu/ml/machine-learning-databases/secom/ | |
// reference accord framework | |
#r "../packages/Accord.3.0.2/lib/net45/Accord.dll" | |
#r "../packages/Accord.Controls.3.0.2/lib/net45/Accord.Controls.dll" | |
#r "../packages/Accord.IO.3.0.2/lib/net45/Accord.IO.dll" | |
#r "../packages/Accord.Math.3.0.2/lib/net45/Accord.Math.dll" | |
#r "../packages/Accord.Statistics.3.0.2/lib/net45/Accord.Statistics.dll" | |
//reference deelde with fsharp charting | |
#r "../packages/Deedle.1.2.4/lib/net40/Deedle.dll" | |
#r "../packages/FSharp.Charting.0.90.12/lib/net40/FSharp.Charting.dll" | |
#I "../packages/FSharp.Charting.0.90.12" | |
#I "../packages/Deedle.1.2.4" | |
#load "FSharp.Charting.fsx" | |
#load "Deedle.fsx" | |
open Deedle | |
open System | |
open Accord | |
open Accord.Controls | |
open Accord.Math | |
open Accord.Math.Comparers | |
open Accord.Math.Decompositions | |
open Accord.Statistics | |
open Accord.Statistics.Analysis | |
open FSharp.Charting | |
let frame1 = Frame.ReadCsv(__SOURCE_DIRECTORY__ + "../../data/secom.data.txt", hasHeaders = false, separators = " ") | |
// replace missing data by mean values | |
frame1.ColumnKeys | |
|> Seq.map( | |
fun key -> | |
let column = frame1.GetColumn(key) | |
// compure mean | |
let mean = Stats.mean column | |
(key,mean)) | |
|> Seq.iter(fun (key, mean) -> | |
let column = frame1.GetColumn(key) | |
let newColumn = column.FillMissing mean | |
frame1.ReplaceColumn(key, newColumn)) | |
// get data form frame | |
let matrix : float [,] = frame1.ToArray2D() | |
let pca = new PrincipalComponentAnalysis(matrix) | |
// Also we can set to use the analysis by correlation, which is more indicated when analysing data with high different measurement units | |
pca.Method = AnalysisMethod.Standardize | |
// and just compute | |
pca.Compute(); | |
// show components proportions | |
Chart.Line(pca.ComponentProportions).WithXAxis(Max=20.0) | |
// transform data if needed | |
let pcaFinalData = pca.Transform(matrix) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment