Skip to content

Instantly share code, notes, and snippets.

@MartinBodocky
Created October 18, 2015 15:38
Show Gist options
  • Save MartinBodocky/0ce5580e5a312b465b9b to your computer and use it in GitHub Desktop.
Save MartinBodocky/0ce5580e5a312b465b9b to your computer and use it in GitHub Desktop.
PCA analysis on more real data with Accord.Net with Deedle and FSharp.Charting.
// data from http://archive.ics.uci.edu/ml/machine-learning-databases/secom/
// reference accord framework
#r "../packages/Accord.3.0.2/lib/net45/Accord.dll"
#r "../packages/Accord.Controls.3.0.2/lib/net45/Accord.Controls.dll"
#r "../packages/Accord.IO.3.0.2/lib/net45/Accord.IO.dll"
#r "../packages/Accord.Math.3.0.2/lib/net45/Accord.Math.dll"
#r "../packages/Accord.Statistics.3.0.2/lib/net45/Accord.Statistics.dll"
//reference deelde with fsharp charting
#r "../packages/Deedle.1.2.4/lib/net40/Deedle.dll"
#r "../packages/FSharp.Charting.0.90.12/lib/net40/FSharp.Charting.dll"
#I "../packages/FSharp.Charting.0.90.12"
#I "../packages/Deedle.1.2.4"
#load "FSharp.Charting.fsx"
#load "Deedle.fsx"
open Deedle
open System
open Accord
open Accord.Controls
open Accord.Math
open Accord.Math.Comparers
open Accord.Math.Decompositions
open Accord.Statistics
open Accord.Statistics.Analysis
open FSharp.Charting
let frame1 = Frame.ReadCsv(__SOURCE_DIRECTORY__ + "../../data/secom.data.txt", hasHeaders = false, separators = " ")
// replace missing data by mean values
frame1.ColumnKeys
|> Seq.map(
fun key ->
let column = frame1.GetColumn(key)
// compure mean
let mean = Stats.mean column
(key,mean))
|> Seq.iter(fun (key, mean) ->
let column = frame1.GetColumn(key)
let newColumn = column.FillMissing mean
frame1.ReplaceColumn(key, newColumn))
// get data form frame
let matrix : float [,] = frame1.ToArray2D()
let pca = new PrincipalComponentAnalysis(matrix)
// Also we can set to use the analysis by correlation, which is more indicated when analysing data with high different measurement units
pca.Method = AnalysisMethod.Standardize
// and just compute
pca.Compute();
// show components proportions
Chart.Line(pca.ComponentProportions).WithXAxis(Max=20.0)
// transform data if needed
let pcaFinalData = pca.Transform(matrix)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment