Skip to content

Instantly share code, notes, and snippets.

@luisquintanilla
Created July 8, 2022 19:46
Show Gist options
  • Save luisquintanilla/0227b62ea07cc6bc20174b38cf7aa3c1 to your computer and use it in GitHub Desktop.
Save luisquintanilla/0227b62ea07cc6bc20174b38cf7aa3c1 to your computer and use it in GitHub Desktop.
F# AutoML 2.0 Taxi Fare Experiment
// Get packages
#r "nuget:Microsoft.Data.Analysis,0.20.0-preview.22313.1"
#r "nuget:Microsoft.ML.AutoML,0.20.0-preview.22313.1"
// Import packages
open System.Text.Json;
open Microsoft.Data.Analysis;
open Microsoft.ML;
open Microsoft.ML.AutoML;
open Microsoft.ML.Data;
open Microsoft.ML.Trainers.FastTree;
open type Microsoft.ML.Transforms.OneHotEncodingEstimator;
open System.Net.Http
open Microsoft.FSharp.Core
// Utility function to download data
let getData (url:string) =
async {
use client = new HttpClient()
let! data = client.GetStringAsync(url) |> Async.AwaitTask
return data
}
// Load data into DataFrame
let df =
"https://github.com/dotnet/csharp-notebooks/raw/main/machine-learning/data/taxi-fare.csv"
|> getData
|> Async.RunSynchronously
|> DataFrame.LoadCsvFromString
// Initialie MLContext
let ctx = MLContext()
// Split in to train / validation / test sets
let trainTestData =
ctx.Data.TrainTestSplit(df,testFraction=0.2)
let validationTestData =
ctx.Data.TrainTestSplit(trainTestData.TestSet,testFraction=0.5)
let trainSet = trainTestData.TrainSet;
let validationSet = validationTestData.TrainSet;
let testSet = validationTestData.TestSet;
// Utility function to cast pipeline transforms
// This is needed to convert the individual transforms
// Or an individual EstimatorChain
let toIEstimator (est: 'a) =
est :> obj :?> IEstimator<ITransformer>
// Pipeline containing preprocessing transforms
let preprocessingPipeline =
EstimatorChain()
.Append(ctx.Transforms.Categorical.OneHotEncoding(
[|
InputOutputColumnPair(@"vendor_id", @"vendor_id")
InputOutputColumnPair(@"payment_type", @"payment_type")|],
outputKind=OutputKind.Binary))
.Append(ctx.Transforms.ReplaceMissingValues(
[|
InputOutputColumnPair(@"rate_code", @"rate_code")
InputOutputColumnPair(@"passenger_count", @"passenger_count")
InputOutputColumnPair(@"trip_time_in_secs", @"trip_time_in_secs")
InputOutputColumnPair(@"trip_distance", @"trip_distance")
|]))
.Append(ctx.Transforms.Concatenate("Features", [|"rate_code";"passenger_count";"trip_time_in_secs";"trip_distance"|]))
// AutoML Sweepable estimator
let autoMLEstimator = ctx.Auto().Regression(labelColumnName= "fare_amount")
// Final pipeline
// Note that you need to use the toEstimator function to cast
// Before appending the AutoML Sweepable estimator
let pipeline =
(preprocessingPipeline |> toIEstimator)
.Append(autoMLEstimator)
// Configure experiment settings
let experiment =
ctx.Auto().CreateExperiment()
.SetPipeline(pipeline)
.SetTrainingTimeInSeconds(30u)
.SetDataset(trainSet, validationSet)
.SetEvaluateMetric(RegressionMetric.RSquared, "fare_amount", "Score")
// Configure progress handler
let f (e:LoggingEventArgs) =
printfn $"{e.RawMessage}"
ctx.Log.Add(f)
// Utility function to run experiment
let runExperimentAsync (autoMLExp:AutoMLExperiment) =
async {
let! expResults = autoMLExp.RunAsync() |> Async.AwaitTask
return expResults
}
// Run experiment
let expResults =
experiment
|> runExperimentAsync
|> Async.RunSynchronously
// Display metric
expResults.Metric
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment