Created
July 8, 2022 19:46
-
-
Save luisquintanilla/0227b62ea07cc6bc20174b38cf7aa3c1 to your computer and use it in GitHub Desktop.
F# AutoML 2.0 Taxi Fare Experiment
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
// Get packages | |
#r "nuget:Microsoft.Data.Analysis,0.20.0-preview.22313.1" | |
#r "nuget:Microsoft.ML.AutoML,0.20.0-preview.22313.1" | |
// Import packages | |
open System.Text.Json; | |
open Microsoft.Data.Analysis; | |
open Microsoft.ML; | |
open Microsoft.ML.AutoML; | |
open Microsoft.ML.Data; | |
open Microsoft.ML.Trainers.FastTree; | |
open type Microsoft.ML.Transforms.OneHotEncodingEstimator; | |
open System.Net.Http | |
open Microsoft.FSharp.Core | |
// Utility function to download data | |
let getData (url:string) = | |
async { | |
use client = new HttpClient() | |
let! data = client.GetStringAsync(url) |> Async.AwaitTask | |
return data | |
} | |
// Load data into DataFrame | |
let df = | |
"https://github.com/dotnet/csharp-notebooks/raw/main/machine-learning/data/taxi-fare.csv" | |
|> getData | |
|> Async.RunSynchronously | |
|> DataFrame.LoadCsvFromString | |
// Initialie MLContext | |
let ctx = MLContext() | |
// Split in to train / validation / test sets | |
let trainTestData = | |
ctx.Data.TrainTestSplit(df,testFraction=0.2) | |
let validationTestData = | |
ctx.Data.TrainTestSplit(trainTestData.TestSet,testFraction=0.5) | |
let trainSet = trainTestData.TrainSet; | |
let validationSet = validationTestData.TrainSet; | |
let testSet = validationTestData.TestSet; | |
// Utility function to cast pipeline transforms | |
// This is needed to convert the individual transforms | |
// Or an individual EstimatorChain | |
let toIEstimator (est: 'a) = | |
est :> obj :?> IEstimator<ITransformer> | |
// Pipeline containing preprocessing transforms | |
let preprocessingPipeline = | |
EstimatorChain() | |
.Append(ctx.Transforms.Categorical.OneHotEncoding( | |
[| | |
InputOutputColumnPair(@"vendor_id", @"vendor_id") | |
InputOutputColumnPair(@"payment_type", @"payment_type")|], | |
outputKind=OutputKind.Binary)) | |
.Append(ctx.Transforms.ReplaceMissingValues( | |
[| | |
InputOutputColumnPair(@"rate_code", @"rate_code") | |
InputOutputColumnPair(@"passenger_count", @"passenger_count") | |
InputOutputColumnPair(@"trip_time_in_secs", @"trip_time_in_secs") | |
InputOutputColumnPair(@"trip_distance", @"trip_distance") | |
|])) | |
.Append(ctx.Transforms.Concatenate("Features", [|"rate_code";"passenger_count";"trip_time_in_secs";"trip_distance"|])) | |
// AutoML Sweepable estimator | |
let autoMLEstimator = ctx.Auto().Regression(labelColumnName= "fare_amount") | |
// Final pipeline | |
// Note that you need to use the toEstimator function to cast | |
// Before appending the AutoML Sweepable estimator | |
let pipeline = | |
(preprocessingPipeline |> toIEstimator) | |
.Append(autoMLEstimator) | |
// Configure experiment settings | |
let experiment = | |
ctx.Auto().CreateExperiment() | |
.SetPipeline(pipeline) | |
.SetTrainingTimeInSeconds(30u) | |
.SetDataset(trainSet, validationSet) | |
.SetEvaluateMetric(RegressionMetric.RSquared, "fare_amount", "Score") | |
// Configure progress handler | |
let f (e:LoggingEventArgs) = | |
printfn $"{e.RawMessage}" | |
ctx.Log.Add(f) | |
// Utility function to run experiment | |
let runExperimentAsync (autoMLExp:AutoMLExperiment) = | |
async { | |
let! expResults = autoMLExp.RunAsync() |> Async.AwaitTask | |
return expResults | |
} | |
// Run experiment | |
let expResults = | |
experiment | |
|> runExperimentAsync | |
|> Async.RunSynchronously | |
// Display metric | |
expResults.Metric |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment