Last active
December 7, 2016 08:27
-
-
Save mathias-brandewinder/05683d63bfa67c8b706ce458035c0b81 to your computer and use it in GitHub Desktop.
Gradient Boosting exploration
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
// blog post: brandewinder.com/2016/08/06/gradient-boosting-part-1 | |
// https://en.wikipedia.org/wiki/Gradient_boosting#Algorithm | |
(* | |
Exploring the dataset | |
*) | |
#I "./packages/" | |
#r "fsharp.data/lib/net40/fsharp.data.dll" | |
open FSharp.Data | |
#r "xplot.googlecharts/lib/net45/xplot.googlecharts.dll" | |
#r "google.datatable.net.wrapper/lib/google.datatable.net.wrapper.dll" | |
open XPlot.GoogleCharts | |
type Wine = CsvProvider<"data/winequality-red.csv",";",InferRows=1500> | |
let reds = Wine.Load("data/winequality-red.csv") | |
type Observation = Wine.Row | |
type Feature = Observation -> float | |
let ``Alcohol Level`` : Feature = | |
fun obs -> obs.Alcohol |> float | |
let ``Volatile Acidity`` : Feature = | |
fun obs -> obs.``Volatile acidity`` |> float | |
let ``Fixed Acidity`` : Feature = | |
fun obs -> obs.``Fixed acidity`` |> float | |
let options = Configuration.Options() | |
options.dataOpacity <- 0.25 | |
options.pointSize <- 10 | |
reds.Rows | |
|> Seq.map (fun obs -> ``Alcohol Level`` obs, obs.Quality) | |
|> Chart.Scatter | |
|> Chart.WithOptions options | |
|> Chart.WithTitle "Alcohol Level vs. Quality" | |
|> Chart.WithXTitle "Alcohol Level" | |
|> Chart.WithYTitle "Quality" | |
|> Chart.Show | |
reds.Rows | |
|> Seq.map (fun obs -> ``Volatile Acidity`` obs, obs.Quality) | |
|> Chart.Scatter | |
|> Chart.WithOptions options | |
|> Chart.WithTitle "Volatile Acidity vs. Quality" | |
|> Chart.WithXTitle "Volatile Acidity" | |
|> Chart.WithYTitle "Quality" | |
|> Chart.Show | |
reds.Rows | |
|> Seq.map (fun obs -> ``Fixed Acidity`` obs, obs.Quality) | |
|> Chart.Scatter | |
|> Chart.WithOptions options | |
|> Chart.WithTitle "Fixed Acidity vs. Quality" | |
|> Chart.WithXTitle "Fixed Acidity" | |
|> Chart.WithYTitle "Quality" | |
|> Chart.Show | |
(* | |
Stumps | |
*) | |
type Example = Observation * float | |
type Predictor = Observation -> float | |
let learnStump (sample:Example seq) (feature:Feature) threshold = | |
let under = | |
sample | |
|> Seq.filter (fun (obs,lbl) -> feature obs <= threshold) | |
|> Seq.averageBy (fun (obs,lbl) -> lbl) | |
let over = | |
sample | |
|> Seq.filter (fun (obs,lbl) -> feature obs > threshold) | |
|> Seq.averageBy (fun (obs,lbl) -> lbl) | |
fun obs -> | |
if (feature obs <= threshold) | |
then under | |
else over | |
let redSample = | |
reds.Rows | |
|> Seq.map (fun row -> row, row.Quality |> float) | |
let testStump = learnStump redSample ``Alcohol Level`` 11.0 | |
let predicted = | |
redSample | |
|> Seq.map (fun (obs,value) -> (``Alcohol Level`` obs, obs |> testStump)) | |
predicted | |
|> Seq.sortBy fst | |
|> Chart.Line | |
|> Chart.WithTitle "Alcohol Level vs. Quality" | |
|> Chart.WithXTitle "Alcohol Level" | |
|> Chart.WithYTitle "Quality" | |
|> Chart.Show | |
(* | |
Picking the best stump | |
2 issues: quality, and possible splits | |
*) | |
let sumOfSquares (sample:Example seq) predictor = | |
sample | |
|> Seq.sumBy (fun (obs,lbl) -> | |
pown (lbl - predictor obs) 2) | |
sumOfSquares redSample testStump | |
let evenSplits (sample:Example seq) (feature:Feature) (n:int) = | |
let values = sample |> Seq.map (fst >> feature) | |
let min = values |> Seq.min | |
let max = values |> Seq.max | |
let width = (max-min) / (float (n + 1)) | |
[ min + width .. width .. max - width ] | |
let alcoholSplits = evenSplits redSample ``Alcohol Level`` 10 | |
let bestStump = | |
alcoholSplits | |
|> List.map (learnStump redSample ``Alcohol Level``) | |
|> List.minBy (sumOfSquares redSample) | |
sumOfSquares redSample bestStump | |
redSample | |
|> Seq.map (fun (obs,value) -> (``Alcohol Level`` obs, obs |> bestStump)) | |
|> Seq.sortBy fst | |
|> Chart.Line | |
|> Chart.WithTitle "Alcohol Level vs. Quality" | |
|> Chart.WithXTitle "Alcohol Level" | |
|> Chart.WithYTitle "Quality" | |
|> Chart.Show | |
(* | |
Analyzing the residuals | |
*) | |
redSample | |
|> Seq.map (fun (obs,lbl) -> ``Alcohol Level`` obs, lbl - (obs |> bestStump)) | |
|> Chart.Scatter | |
|> Chart.WithOptions options | |
|> Chart.WithTitle "Alcohol Level vs. Residuals" | |
|> Chart.WithXTitle "Alcohol Level" | |
|> Chart.WithYTitle "Residuals" | |
|> Chart.Show | |
// alternate chart, aggregating together Observations | |
// with same alcohol level | |
redSample | |
|> Seq.map (fun (obs,lbl) -> ``Alcohol Level`` obs, lbl - (obs |> bestStump)) | |
|> Seq.groupBy fst | |
|> Seq.map (fun (x,group) -> | |
x, | |
group | |
|> Seq.map snd | |
|> Seq.average) | |
|> Chart.Scatter | |
|> Chart.WithOptions options | |
|> Chart.WithTitle "Alcohol Level vs. Residuals" | |
|> Chart.WithXTitle "Alcohol Level" | |
|> Chart.WithYTitle "Residuals" | |
|> Chart.Show | |
(* | |
Fitting another stump on the residuals | |
*) | |
let residualsSample = | |
redSample | |
|> Seq.map (fun (obs,lbl) -> obs, lbl - (obs |> bestStump)) | |
let residualsStump = | |
alcoholSplits | |
|> List.map (learnStump residualsSample ``Alcohol Level``) | |
|> List.minBy (sumOfSquares redSample) | |
let combined = fun obs -> bestStump obs + residualsStump obs | |
sumOfSquares redSample combined | |
redSample | |
|> Seq.map (fun (obs,value) -> (``Alcohol Level`` obs, obs |> combined)) | |
|> Seq.sortBy fst | |
|> Chart.Line | |
|> Chart.WithTitle "Alcohol Level vs. Quality" | |
|> Chart.WithXTitle "Alcohol Level" | |
|> Chart.WithYTitle "Quality" | |
|> Chart.Show | |
// residuals | |
redSample | |
|> Seq.map (fun (obs,lbl) -> ``Alcohol Level`` obs, lbl - (obs |> combined)) | |
|> Chart.Scatter | |
|> Chart.WithOptions options | |
|> Chart.WithTitle "Alcohol Level vs. Residuals" | |
|> Chart.WithXTitle "Alcohol Level" | |
|> Chart.WithYTitle "Residuals" | |
|> Chart.Show | |
(* | |
Iteratively adding stumps | |
*) | |
let learn (sample:Example seq) (feature:Feature) (depth:int) = | |
let splits = evenSplits sample feature 10 | |
let rec next iterationsLeft predictor = | |
// we have reached depth 0: we are done | |
if iterationsLeft = 0 | |
then predictor | |
else | |
// compute new residuals | |
let newSample = | |
sample | |
|> Seq.map (fun (obs,y) -> obs, y - predictor obs) | |
// learn possible stumps against residuals, | |
// and pick the one with smallest error | |
let newStump = | |
splits | |
|> Seq.map (learnStump newSample feature) | |
|> Seq.minBy (sumOfSquares newSample) | |
// create new predictor | |
let newPredictor = fun obs -> predictor obs + newStump obs | |
// ... and keep going | |
next (iterationsLeft - 1) newPredictor | |
// initialize with a predictor that | |
// predicts the average sample value | |
let baseValue = sample |> Seq.map snd |> Seq.average | |
let basePredictor = fun (obs:Observation) -> baseValue | |
next depth basePredictor | |
let model = learn redSample ``Alcohol Level`` 10 | |
sumOfSquares redSample model | |
redSample | |
|> Seq.map (fun (obs,value) -> (``Alcohol Level`` obs, obs |> model)) | |
|> Seq.sortBy fst | |
|> Chart.Line | |
|> Chart.WithTitle "Alcohol Level vs. Quality" | |
|> Chart.WithXTitle "Alcohol Level" | |
|> Chart.WithYTitle "Quality" | |
|> Chart.Show | |
// increasing depth | |
[ 1 .. 15 ] | |
|> Seq.map (fun depth -> depth, learn redSample ``Alcohol Level`` depth) | |
|> Seq.map (fun (depth,model) -> depth, sumOfSquares redSample model) | |
|> Chart.Column | |
|> Chart.Show |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
// blog post: brandewinder.com/2016/08/14/gradient-boosting-part-2 | |
//https://en.wikipedia.org/wiki/Gradient_boosting#Algorithm | |
(* | |
Exploring the dataset | |
*) | |
#I "./packages/" | |
#r "fsharp.data/lib/net40/fsharp.data.dll" | |
open FSharp.Data | |
#r "xplot.googlecharts/lib/net45/xplot.googlecharts.dll" | |
#r "google.datatable.net.wrapper/lib/google.datatable.net.wrapper.dll" | |
open XPlot.GoogleCharts | |
type Wine = CsvProvider<"data/winequality-red.csv",";",InferRows=1500> | |
let reds = Wine.Load("data/winequality-red.csv") | |
type Observation = Wine.Row | |
type Feature = Observation -> float | |
let ``Alcohol Level`` : Feature = | |
fun obs -> obs.Alcohol |> float | |
let ``Chlorides`` : Feature = | |
fun obs -> obs.Chlorides |> float | |
let ``Citric Acid`` : Feature = | |
fun obs -> obs.``Citric acid`` |> float | |
let ``Density`` : Feature = | |
fun obs -> obs.Density |> float | |
let ``Fixed Acidity`` : Feature = | |
fun obs -> obs.``Fixed acidity`` |> float | |
let ``Free Sulfur Dioxide`` : Feature = | |
fun obs -> obs.``Free sulfur dioxide`` |> float | |
let ``PH`` : Feature = | |
fun obs -> obs.PH |> float | |
let ``Residual Sugar`` : Feature = | |
fun obs -> obs.``Residual sugar`` |> float | |
let ``Total Sulfur Dioxide`` : Feature = | |
fun obs -> obs.``Total sulfur dioxide`` |> float | |
let ``Volatile Acidity`` : Feature = | |
fun obs -> obs.``Volatile acidity`` |> float | |
(* | |
Trees | |
*) | |
type Example = Observation * float | |
type Predictor = Observation -> float | |
type Tree = | |
| Leaf of float | |
| Branch of (Feature * float) * Tree * Tree | |
let exampleTree = | |
Branch( | |
(``Alcohol Level``, 10.5), | |
Branch( | |
(``Volatile Acidity``, 0.8), | |
Leaf(6.0), | |
Leaf(3.0) | |
), | |
Leaf(5.5) | |
) | |
let rec predict (tree:Tree) (obs:Observation) = | |
match tree with | |
| Leaf(prediction) -> prediction | |
| Branch((feature,split),under,over) -> | |
let featureValue = feature obs | |
if featureValue <= split | |
then predict under obs | |
else predict over obs | |
predict exampleTree (reds.Rows |> Seq.head) | |
let examplePredictor = predict exampleTree | |
let sumOfSquares (sample:Example seq) predictor = | |
sample | |
|> Seq.sumBy (fun (obs,lbl) -> | |
pown (lbl - predictor obs) 2) | |
let redSample = | |
reds.Rows | |
|> Seq.map (fun row -> row, row.Quality |> float) | |
sumOfSquares redSample examplePredictor | |
(* | |
Learning a Tree | |
*) | |
let learnStump (sample:Example seq) (feature:Feature) threshold = | |
let under = | |
sample | |
|> Seq.filter (fun (obs,lbl) -> feature obs <= threshold) | |
|> Seq.averageBy (fun (obs,lbl) -> lbl) | |
let over = | |
sample | |
|> Seq.filter (fun (obs,lbl) -> feature obs > threshold) | |
|> Seq.averageBy (fun (obs,lbl) -> lbl) | |
fun obs -> | |
if (feature obs <= threshold) | |
then under | |
else over | |
let evenSplits (sample:Example seq) (feature:Feature) (n:int) = | |
let values = sample |> Seq.map (fst >> feature) | |
let min = values |> Seq.min | |
let max = values |> Seq.max | |
let width = (max-min) / (float (n + 1)) | |
[ min + width .. width .. max - width ] | |
let rec draftLearnTree (sample:Example seq) (features:Feature list) (depth:int) = | |
if depth = 0 | |
then | |
let avg = sample |> Seq.averageBy snd | |
Leaf(avg) | |
else | |
let (bestFeature,bestSplit) = | |
// create all feature * split combinations | |
seq { | |
for feature in features do | |
let splits = evenSplits sample feature 10 | |
for split in splits -> feature,split | |
} | |
// find the split with the smallest error | |
|> Seq.minBy (fun (feature,split) -> | |
let predictor = learnStump sample feature split | |
sumOfSquares sample predictor) | |
let under = | |
sample | |
|> Seq.filter (fun (obs,_) -> | |
bestFeature obs <= bestSplit) | |
let over = | |
sample | |
|> Seq.filter (fun (obs,_) -> | |
bestFeature obs > bestSplit) | |
let underTree = draftLearnTree under features (depth - 1) | |
let overTree = draftLearnTree over features (depth - 1) | |
Branch((bestFeature,bestSplit),underTree,overTree) | |
// replicate the original stump | |
let originalStump = draftLearnTree redSample [ ``Alcohol Level`` ] 1 | |
sumOfSquares redSample (predict originalStump) | |
let deeperTree = draftLearnTree redSample [``Alcohol Level``;``Volatile Acidity``] 4 | |
sumOfSquares redSample (predict deeperTree) | |
// problem! | |
let explodingTree = draftLearnTree redSample [``Alcohol Level``] 5 | |
(* | |
Cleaning things up | |
*) | |
let underOver (sample:Example seq) (feat:Feature,split:float) = | |
let under = sample |> Seq.filter (fun (obs,_) -> feat obs <= split) | |
let over = sample |> Seq.filter (fun (obs,_) -> feat obs > split) | |
under,over | |
type Splitter = Example seq -> Feature -> float list | |
type Cost = Example seq -> float | |
let rec learnTree (splitter:Splitter,cost:Cost) (sample:Example seq) (features:Feature list) (depth:int) = | |
if depth = 0 | |
then | |
let avg = sample |> Seq.averageBy snd | |
Leaf(avg) | |
else | |
let initialCost = cost sample | |
let candidates = | |
// build up all the feature/split candidates, | |
// and their associated sample splits | |
seq { | |
for feature in features do | |
let splits = splitter sample feature | |
for split in splits -> | |
let under,over = underOver sample (feature,split) | |
(feature,split),(under,over) | |
} | |
// compute and append cost of split | |
|> Seq.map (fun (candidate,(under,over)) -> | |
candidate,(under,over), cost under + cost over) | |
// retain only candidates with strict cost improvement | |
|> Seq.filter (fun (candidate,(under,over),splitCost) -> | |
splitCost < initialCost) | |
if (Seq.isEmpty candidates) | |
then | |
let avg = sample |> Seq.averageBy snd | |
Leaf(avg) | |
else | |
let ((bestFeature,bestSplit),(under,over),spliCost) = | |
candidates | |
|> Seq.minBy (fun (_,_,splitCost) -> splitCost) | |
let underTree = learnTree (splitter,cost) under features (depth - 1) | |
let overTree = learnTree (splitter,cost) over features (depth - 1) | |
Branch((bestFeature,bestSplit),underTree,overTree) | |
let evenSplitter n (sample:Example seq) (feature:Feature) = | |
let values = sample |> Seq.map (fst >> feature) | |
let min = values |> Seq.min | |
let max = values |> Seq.max | |
if min = max | |
then [] | |
else | |
let width = (max-min) / (float (n + 1)) | |
[ min + width .. width .. max - width ] | |
let sumOfSquaresCost (sample:Example seq) = | |
let avg = sample |> Seq.averageBy snd | |
sample |> Seq.sumBy (fun (_,lbl) -> pown (lbl - avg) 2) | |
// alternate cost specification | |
let manhattanCost (sample:Example seq) = | |
let avg = sample |> Seq.averageBy snd | |
sample |> Seq.sumBy (fun (_,lbl) -> abs (lbl - avg)) | |
let stableTree = learnTree (evenSplitter 10,sumOfSquaresCost) redSample [``Alcohol Level``;``Volatile Acidity``] 10 | |
sumOfSquares redSample (predict stableTree) | |
// we include every feature available | |
let features = [ | |
``Alcohol Level`` | |
``Chlorides`` | |
``Citric Acid`` | |
``Density`` | |
``Fixed Acidity`` | |
``Free Sulfur Dioxide`` | |
``PH`` | |
``Residual Sugar`` | |
``Total Sulfur Dioxide`` | |
``Volatile Acidity`` | |
] | |
let fullTree = learnTree (evenSplitter 5,sumOfSquaresCost) redSample features 10 | |
sumOfSquares redSample (predict fullTree) | |
// plotting actual vs. predicted values | |
let options = Configuration.Options() | |
options.dataOpacity <- 0.25 | |
options.pointSize <- 10 | |
redSample | |
|> Seq.map (fun (obs,lbl) -> lbl, predict fullTree obs) | |
|> Chart.Scatter | |
|> Chart.WithOptions options | |
|> Chart.WithTitle "Wine Quality: Actual vs. Predicted" | |
|> Chart.WithXTitle "Actual" | |
|> Chart.WithYTitle "Predicted" | |
|> Chart.Show | |
(* | |
Over-fitting? | |
*) | |
// we split the sample in halves | |
let sampleSize = redSample |> Seq.length | |
let training = redSample |> Seq.take (sampleSize/2) | |
let testing = redSample |> Seq.skip (sampleSize/2) | |
// careful - this takes a bit of time :) | |
let trees = | |
[ for depth in 1 .. 10 -> | |
// for increasing depth | |
depth, | |
// we train a tree on the training sample | |
learnTree (evenSplitter 10,sumOfSquaresCost) training features depth | |
] | |
// we evaluate errors, on the training and the testing samples | |
let trainingError = trees |> List.map (fun (d,tree) -> d, sumOfSquares training (predict tree)) | |
let testingError = trees |> List.map (fun (d,tree) -> d, sumOfSquares testing (predict tree)) | |
[ trainingError; testingError ] | |
|> Chart.Line | |
|> Chart.WithLabels ["Train"; "Test"] | |
|> Chart.WithTitle "Over-Fitting Analysis" | |
|> Chart.WithXTitle "Depth" | |
|> Chart.WithYTitle "Error" | |
|> Chart.Show |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
// blog post: brandewinder.com/2016/09/03/gradient-boosting-part-3 | |
(* | |
Dependencies | |
*) | |
#I "./packages/" | |
#r "fsharp.data/lib/net40/fsharp.data.dll" | |
open FSharp.Data | |
#r "xplot.googlecharts/lib/net45/xplot.googlecharts.dll" | |
#r "google.datatable.net.wrapper/lib/google.datatable.net.wrapper.dll" | |
open XPlot.GoogleCharts | |
#r "fsalg/lib/fsalg.dll" | |
#r "diffsharp/lib/diffsharp.dll" | |
open DiffSharp.Numerical | |
let scatterOptions = Configuration.Options() | |
scatterOptions.dataOpacity <- 0.25 | |
scatterOptions.pointSize <- 10 | |
scatterOptions.hAxis <- Axis(minValue = 0, maxValue = 10) | |
scatterOptions.vAxis <- Axis(minValue = 0, maxValue = 10) | |
(* | |
Declaring our core types and importing the data. | |
*) | |
type Wine = CsvProvider<"data/winequality-red.csv",";",InferRows=1500> | |
type Observation = Wine.Row | |
type Feature = Observation -> float | |
type Example = Observation * float | |
type Predictor = Observation -> float | |
let redSample = | |
Wine.Load("data/winequality-red.csv").Rows | |
|> Seq.map (fun row -> row, row.Quality |> float) | |
(* | |
Creating features for that dataset | |
*) | |
let ``Alcohol Level`` : Feature = | |
fun obs -> obs.Alcohol |> float | |
let ``Chlorides`` : Feature = | |
fun obs -> obs.Chlorides |> float | |
let ``Citric Acid`` : Feature = | |
fun obs -> obs.``Citric acid`` |> float | |
let ``Density`` : Feature = | |
fun obs -> obs.Density |> float | |
let ``Fixed Acidity`` : Feature = | |
fun obs -> obs.``Fixed acidity`` |> float | |
let ``Free Sulfur Dioxide`` : Feature = | |
fun obs -> obs.``Free sulfur dioxide`` |> float | |
let ``PH`` : Feature = | |
fun obs -> obs.PH |> float | |
let ``Residual Sugar`` : Feature = | |
fun obs -> obs.``Residual sugar`` |> float | |
let ``Total Sulfur Dioxide`` : Feature = | |
fun obs -> obs.``Total sulfur dioxide`` |> float | |
let ``Volatile Acidity`` : Feature = | |
fun obs -> obs.``Volatile acidity`` |> float | |
let features = [ | |
``Alcohol Level`` | |
``Chlorides`` | |
``Citric Acid`` | |
``Density`` | |
``Fixed Acidity`` | |
``Free Sulfur Dioxide`` | |
``PH`` | |
``Residual Sugar`` | |
``Total Sulfur Dioxide`` | |
``Volatile Acidity`` | |
] | |
(* | |
Basic regression tree implementation | |
*) | |
type Tree = | |
| Leaf of float | |
| Branch of (Feature * float) * Tree * Tree | |
let rec predict (tree:Tree) (obs:Observation) = | |
match tree with | |
| Leaf(prediction) -> prediction | |
| Branch((feature,split),under,over) -> | |
let featureValue = feature obs | |
if featureValue <= split | |
then predict under obs | |
else predict over obs | |
let underOver (sample:Example seq) (feat:Feature,split:float) = | |
let under = sample |> Seq.filter (fun (obs,_) -> feat obs <= split) | |
let over = sample |> Seq.filter (fun (obs,_) -> feat obs > split) | |
under,over | |
type Splitter = Example seq -> Feature -> float list | |
type Cost = Example seq -> float | |
let rec learnTree (splitter:Splitter,cost:Cost) (sample:Example seq) (features:Feature list) (depth:int) = | |
if depth = 0 | |
then | |
let avg = sample |> Seq.averageBy snd | |
Leaf(avg) | |
else | |
let initialCost = cost sample | |
let candidates = | |
// build up all the feature/split candidates, | |
// and their associated sample splits | |
seq { | |
for feature in features do | |
let splits = splitter sample feature | |
for split in splits -> | |
let under,over = underOver sample (feature,split) | |
(feature,split),(under,over) | |
} | |
// compute and append cost of split | |
|> Seq.map (fun (candidate,(under,over)) -> | |
let underSize = under |> Seq.length |> float | |
let overSize = over |> Seq.length |> float | |
let size = underSize + overSize | |
let weightedCost = (underSize / size) * (cost under) + (overSize / size) * (cost over) | |
candidate,(under,over), weightedCost) | |
// retain only candidates with strict cost improvement | |
|> Seq.filter (fun (candidate,(under,over),splitCost) -> | |
splitCost < initialCost) | |
if (Seq.isEmpty candidates) | |
then | |
let avg = sample |> Seq.averageBy snd | |
Leaf(avg) | |
else | |
let ((bestFeature,bestSplit),(under,over),spliCost) = | |
candidates | |
|> Seq.minBy (fun (_,_,splitCost) -> splitCost) | |
let underTree = learnTree (splitter,cost) under features (depth - 1) | |
let overTree = learnTree (splitter,cost) over features (depth - 1) | |
Branch((bestFeature,bestSplit),underTree,overTree) | |
let evenSplitter n (sample:Example seq) (feature:Feature) = | |
let values = sample |> Seq.map (fst >> feature) | |
let min = values |> Seq.min | |
let max = values |> Seq.max | |
if min = max | |
then [] | |
else | |
let width = (max-min) / (float (n + 1)) | |
[ min + width .. width .. max - width ] | |
let sumOfSquaresCost (sample:Example seq) = | |
let avg = sample |> Seq.averageBy snd | |
sample |> Seq.sumBy (fun (_,lbl) -> pown (lbl - avg) 2) | |
let fullTree = learnTree (evenSplitter 5,sumOfSquaresCost) redSample features 3 | |
let averageSquareError (sample:Example seq) predictor = | |
sample | |
|> Seq.averageBy (fun (obs,lbl) -> | |
pown (lbl - predictor obs) 2) | |
averageSquareError redSample (predict fullTree) | |
redSample | |
|> Seq.map (fun (obs,lbl) -> lbl, (predict fullTree) obs) | |
|> Chart.Scatter | |
|> Chart.WithOptions scatterOptions | |
|> Chart.WithTitle "Wine Quality: Actual vs. Predicted (Tree)" | |
|> Chart.WithXTitle "Actual" | |
|> Chart.WithYTitle "Predicted" | |
|> Chart.Show | |
(* | |
Gradient Boosting | |
*) | |
type Learner = Example seq -> Predictor | |
let learn (sample:Example seq) (learner:Learner) (depth:int) = | |
let rec next iterationsLeft predictor = | |
// we have reached depth 0: we are done | |
if iterationsLeft = 0 | |
then predictor | |
else | |
// compute new residuals, | |
let newSample = | |
sample | |
|> Seq.map (fun (obs,y) -> obs, y - predictor obs) | |
// learn a predictor against residuals, | |
let residualsPredictor = learner newSample | |
// create new predictor | |
let newPredictor = | |
fun obs -> predictor obs + residualsPredictor obs | |
// ... and keep going | |
next (iterationsLeft - 1) newPredictor | |
// initialize with a predictor that | |
// predicts the average sample value | |
let baseValue = sample |> Seq.map snd |> Seq.average | |
let basePredictor = fun (obs:Observation) -> baseValue | |
next depth basePredictor | |
let treeLearner (sample:Example seq) = | |
learnTree (evenSplitter 5,sumOfSquaresCost) sample features 3 | |
|> predict | |
// evaluate boosting at different depth | |
[ 1 .. 5 ] | |
|> List.map (fun depth -> | |
let model = learn redSample treeLearner depth | |
depth, averageSquareError redSample model) | |
(* | |
True Gradient Boosting, using pseudo-residuals | |
*) | |
type Loss = float -> float | |
let draftBoostedLearn (sample:Example seq) (learner:Learner) (loss:Loss) (depth:int) = | |
let pseudoResiduals = diff loss | |
let rec next iterationsLeft predictor = | |
// we have reached depth 0: we are done | |
if iterationsLeft = 0 | |
then predictor | |
else | |
// compute new residuals, | |
let newSample = | |
sample | |
|> Seq.map (fun (obs,y) -> | |
obs, | |
pseudoResiduals (y - predictor obs)) | |
// learn a tree against residuals, | |
let residualsPredictor = learner newSample | |
// create new predictor | |
let newPredictor = | |
fun obs -> | |
predictor obs + residualsPredictor obs | |
// ... and keep going | |
next (iterationsLeft - 1) newPredictor | |
// initialize with a predictor that | |
// predicts the average sample value | |
let baseValue = sample |> Seq.map snd |> Seq.average | |
let basePredictor = fun (obs:Observation) -> baseValue | |
next depth basePredictor | |
// we should have the same results as before | |
let squareLoss : Loss = fun x -> 0.5 * pown x 2 | |
[ 1 .. 5 ] | |
|> List.map (fun depth -> | |
let model = draftBoostedLearn redSample treeLearner squareLoss depth | |
depth, averageSquareError redSample model) | |
// illustration: differentiating the square loss function | |
// does produce the residuals. | |
let diffSquareLoss = diff squareLoss | |
[ - 5.0 .. 0.1 .. 5.0 ] | |
|> List.map (fun x -> x, diffSquareLoss x) | |
|> Chart.Line | |
|> Chart.Show | |
(* | |
Optimal combination of predictors | |
*) | |
let combination f1 f2 gamma : Predictor = | |
fun obs -> f1 obs + gamma * f2 obs | |
let gradientDescent f x0 eta epsilon = | |
let rec desc x = | |
let g = diff f x | |
if abs g < epsilon | |
then x | |
else | |
printfn "%.3f" x | |
desc (x - eta * g) | |
desc x0 | |
// illustration | |
let foo x = pown x 2 | |
let min_foo = gradientDescent foo 10. 0.1 0.0001 | |
let optimalGamma (sample:Example seq) f1 f2 (loss:Loss) = | |
let combine gamma = combination f1 f2 gamma | |
let costOf gamma = | |
sample | |
|> Seq.sumBy (fun (obs,y) -> | |
combine gamma obs - y |> loss) | |
gradientDescent costOf 1.0 0.001 0.01 | |
let boostedLearn (sample:Example seq) (learner:Learner) (loss:Loss) (depth:int) = | |
let pseudoResiduals = diff loss | |
let rec next iterationsLeft predictor = | |
// we have reached depth 0: we are done | |
if iterationsLeft = 0 | |
then predictor | |
else | |
// compute new residuals, | |
let newSample = | |
sample | |
|> Seq.map (fun (obs,y) -> | |
obs, | |
pseudoResiduals (y - predictor obs)) | |
// learn a tree against residuals, | |
let residualsPredictor = learner newSample | |
// find optimal gamma | |
let gamma = optimalGamma sample predictor residualsPredictor loss | |
// create new predictor | |
let newPredictor = | |
fun obs -> | |
predictor obs + gamma * residualsPredictor obs | |
// ... and keep going | |
next (iterationsLeft - 1) newPredictor | |
// initialize with a predictor that | |
// predicts the average sample value | |
let baseValue = sample |> Seq.map snd |> Seq.average | |
let basePredictor = fun (obs:Observation) -> baseValue | |
next depth basePredictor | |
[ 1 .. 5 ] | |
|> List.map (fun depth -> | |
let model = boostedLearn redSample treeLearner squareLoss depth | |
depth, averageSquareError redSample model) | |
let ssrPredictor = boostedLearn redSample treeLearner squareLoss 5 | |
redSample | |
|> Seq.map (fun (obs,lbl) -> lbl, ssrPredictor obs) | |
|> Chart.Scatter | |
|> Chart.WithOptions scatterOptions | |
|> Chart.WithTitle "Wine Quality: Actual vs. Predicted (SSR)" | |
|> Chart.WithXTitle "Actual" | |
|> Chart.WithYTitle "Predicted" | |
|> Chart.Show | |
(* | |
Using a more complex Loss function, the Huber Loss | |
*) | |
// https://en.wikipedia.org/wiki/Huber_loss#Definition | |
let huber delta x = | |
if abs x <= delta | |
then 0.5 * pown x 2 | |
else delta * (abs x - 0.5 * delta) | |
[ - 5.0 .. 0.1 .. 5.0 ] | |
|> List.map (fun x -> x, huber 1.0 x) | |
|> Chart.Line | |
|> Chart.Show | |
// illustration: differenting the square loss function | |
// does produce the residuals. | |
let diffHuber = diff (huber 1.0) | |
[ - 5.0 .. 0.1 .. 5.0 ] | |
|> List.map (fun x -> x, diffHuber x) | |
|> Chart.Line | |
|> Chart.Show | |
[ 1 .. 5 ] | |
|> List.map (fun depth -> | |
let model = boostedLearn redSample treeLearner (huber 1.0) depth | |
depth, averageSquareError redSample model) | |
let huberPredictor = boostedLearn redSample treeLearner (huber 1.0) 5 | |
redSample | |
|> Seq.map (fun (obs,lbl) -> lbl, huberPredictor obs) | |
|> Chart.Scatter | |
|> Chart.WithOptions scatterOptions | |
|> Chart.WithTitle "Wine Quality: Actual vs. Predicted (Huber 1.0)" | |
|> Chart.WithXTitle "Actual" | |
|> Chart.WithYTitle "Predicted" | |
|> Chart.Show |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
how to download the file winequality-red.csv