Created
January 22, 2016 19:14
-
-
Save sudipto80/e599ab069981736ffa1d to your computer and use it in GitHub Desktop.
Anomaly Detection
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
//Finds the median | |
let median numbers = | |
let sorted = List.sort numbers | |
let n = float numbers.Length | |
let x = int (n/2.) | |
let mutable result = 0.0 | |
if (float numbers.Length) % 2. = 0.0 then result <- float (numbers.[x] + | |
numbers.[x-1]) / 2.0 | |
else result <- float numbers.[x] | |
result | |
//Finds the inter quartile range | |
let getIQRRange numbers = | |
let med = median numbers | |
let smaller = numbers |> List.filter (fun item -> item < med) | |
let bigger = numbers |> List.filter (fun item -> item > med) | |
let q1 = median smaller | |
let q3 = median bigger | |
let iqr = q3 - q1 | |
(q1-1.5 * iqr, q3 + 1.5*iqr ) | |
//Find the indices where the outliers occur | |
let findOutliers numbers = | |
let iqrRange = getIQRRange numbers | |
numbers |> List.mapi (fun index item -> if item < fst iqrRange || item > snd iqrRange | |
then index else -1) | |
|> List.filter (fun index -> index <> -1) | |
let stdDevList list = | |
let avg = List.average list | |
sqrt (List.fold (fun acc elem -> acc + (float elem - avg) ** 2.0 ) 0.0 list | |
/ float list.Length) | |
let zScores xs = | |
let x_bar = List.average xs | |
let s = stdDevList xs | |
let scores = xs |>List.map ( fun x -> abs (x - x_bar) / s ) | |
scores | |
let findAnomalies (xs:float list) t = | |
let n = float xs.Length | |
let threshold = (( n - 1.)/(sqrt n)) * sqrt ( t ** 2. / ( n - 2. + t ** 2.)) | |
let z_scores = zScores xs | |
xs |> List.mapi (fun i x -> if z_scores.[i] > threshold then i else - 1 ) | |
|> List.filter (fun z -> z <> -1) | |
#load "...\packages\MathNet.Numerics.FSharp.3.10.0\MathNet.Numerics.fsx" | |
open MathNet.Numerics.LinearAlgebra | |
//Returns the mean value of each column | |
let meanOf(x:(float list)list)= | |
let k = x.[0].Length - 1 | |
let n = x.Length - 1 | |
let revs = [for i in 0 .. n -> [0 .. k] |> List.map(fun t -> x.[i].[t])] | |
[0 .. k]|>List.map (fun k -> List.average revs.[k]) | |
//Gets the covariance matrix of the given matrix | |
let getCovarianceMatrix (x:(float list)list)= | |
let n = x.Length //Number of rows | |
let k = x.[0].Length//Number of columns | |
let mean = meanOf(x)//Mean of the rows returns a vector of k elements | |
//repmats is the repetition of mean row n times | |
let repmats = DenseMatrix.ofRowList [for i in 0 .. n - 1 -> mean] | |
let xC = (DenseMatrix.ofRowList x) - repmats | |
let covMat = (xC.Transpose() * xC).DivideByThis(float n) | |
covMat | |
//Converting multivariate data to univariate data | |
//so that Grubb's test can be used. | |
let toUnivariate (xs:(float list)list) = | |
let s = getCovarianceMatrix xs | |
let x_bar = meanOf xs | |
let mats = xs |> List.map (fun x -> (x, DenseMatrix.ofRowList[x] - | |
DenseMatrix.ofRowList [x_bar])) | |
mats |> List.map (fun elem -> (fst elem, (((snd elem) * s.Inverse()) * | |
(snd elem).Transpose()).At(0,0))) | |
let ys = toUnivariate [[2.;2.];[2.;5.];[6.;5.];[100.;345.]] | |
printfn "ys = %A" ys | |
let chiSquareStatistic xs es = | |
List.zip xs es | |
|> List.map (fun elem -> (fst elem,( (fst elem - snd elem ) ** 2.0) | |
/(fst elem))) | |
//Calculates mu j | |
let mu(x:(float list)list)(j:int)= | |
x |> List.map ( fun xrow -> xrow.[j]) | |
|> List.average | |
//The following function finds the square of the standard deviation | |
//of the jth feature: Calculates sigma squared j | |
let sigmaSqr(x:(float list)list)(j:int)= | |
x |> List.map (fun xrow -> (xrow.[j] - mu x j) ** 2.0) | |
|> List.average | |
//Calculates the product of the probabilities | |
//for each feature. | |
let px (trainingSet:(float list)list)(xtest:float list)= | |
let n = trainingSet.Length | |
let root2pi = sqrt ( 2.0 * 3.14159) | |
let probs = [for i in 0 .. n - 1 -> (1./root2pi * sqrt(sigmaSqr trainingSet i)) * exp ( - (xtest.[i] - mu trainingSet i)/ | |
(2.0 * sigmaSqr trainingSet i))] | |
let mutable pxValue = 1.0 | |
probs |> List.map (fun z -> pxValue <- pxValue * z) |> ignore | |
pxValue | |
let data = [1;45;1;3;54;1;45;24;5;23;5;5] | |
let windowSize = 3 | |
let series = [for i in 0 .. data.Length-windowSize -> | |
data |> Seq.skip i |> Seq.take 3 |> Seq.toList] |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment