Created
March 27, 2016 23:51
-
-
Save mathias-brandewinder/4830fe68ebb208329ebd to your computer and use it in GitHub Desktop.
kaggle home depot notes
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
type Observation = { | |
SearchTerms: string | |
ProductTitle: string | |
} | |
with member this.SearchLength = this.SearchTerms.Length |> float | |
type Relevance = float | |
type Predictor = Observation -> Relevance | |
type Example = Relevance * Observation | |
type Learner = Example [] -> Predictor | |
let trivialModel : Learner = | |
function sample -> | |
let average = | |
sample | |
|> Seq.map fst | |
|> Seq.average | |
let predictor (obs:Observation) = average | |
predictor | |
type Feature = Observation -> float | |
let extractFeatures | |
(features: Feature[]) | |
(obs: Observation) = | |
features | |
|> Array.map (fun f -> f obs) | |
let ``Search Terms characters`` : Feature = | |
function obs -> | |
obs.SearchTerms.Length |> float | |
let ``Matching characters between title and search terms`` : Feature = | |
function obs -> | |
let searchChars = obs.SearchTerms |> Set.ofSeq | |
let titleChars = obs.ProductTitle |> Set.ofSeq | |
Set.intersect searchChars titleChars | |
|> Set.count | |
|> float | |
#I "../packages" | |
#r @"FSharp.Data/lib/net40/FSharp.Data.dll" | |
open FSharp.Data | |
type Training = CsvProvider<"""../data/train.csv"""> | |
let training = | |
Training.GetSample().Rows | |
|> Seq.map (fun row -> | |
row.Relevance |> float, | |
{ | |
SearchTerms = row.Search_term | |
ProductTitle = row.Product_title | |
}) | |
|> Seq.toArray | |
#r @"Accord/lib/net45/Accord.dll" | |
#r @"Accord.Math/lib/net45/Accord.Math.dll" | |
#r @"Accord.Statistics/lib/net45/Accord.Statistics.dll" | |
open Accord.Statistics.Models.Regression | |
open Accord.Statistics.Models.Regression.Fitting | |
let model = [| | |
``Search Terms characters`` | |
``Matching characters between title and search terms`` | |
|] | |
let logisticModel : Learner = | |
function sample -> | |
let inputsCount = model.Length | |
let regression = LogisticRegression(inputsCount) | |
let teacher = IterativeReweightedLeastSquares(regression) | |
let labelNormalize x = (x - 1.) / 2. | |
let labelDenormalize x = (x * 2.) + 1. | |
let input,output = | |
sample | |
|> Seq.map (fun (label,obs) -> | |
extractFeatures model obs, | |
labelNormalize label) | |
|> Seq.toArray | |
|> Array.unzip | |
let rec learn () = | |
let error = teacher.Run(input, output) | |
if error < 0.01 | |
then regression | |
else learn () | |
let logPredictor = learn () | |
let predictor (obs:Observation) = | |
obs | |
|> extractFeatures model | |
|> logPredictor.Compute | |
|> labelDenormalize | |
predictor | |
let logisticPredictor = logisticModel training | |
training | |
|> Seq.take 10 | |
|> Seq.map (fun (l,o) -> l,logisticPredictor o) | |
|> Seq.iter (fun (act,pred) -> | |
printfn "Actual: %.2f, Predicted: %.2f" act pred) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment