Created
October 31, 2023 05:00
-
-
Save jonas1ara/13fde4be109ff1771f227f8740cbd3a4 to your computer and use it in GitHub Desktop.
Sentiment analysis using ML.NET and F#
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
open Shared | |
open System | |
open System.IO | |
open Microsoft.ML | |
open Shared.Models.MLModel | |
// Define variables | |
let baseDirectory = __SOURCE_DIRECTORY__ | |
let baseDirectory' = Directory.GetParent(baseDirectory) | |
let appPath = Path.GetDirectoryName(Environment.GetCommandLineArgs().[0]) | |
let baseDatasetsLocation = Path.Combine(baseDirectory'.FullName, "MLData") | |
let dataPath = sprintf @"%s/yelp_labelled.csv" baseDatasetsLocation | |
let baseModelsPath = Path.Combine(baseDirectory'.FullName, "MLData") | |
let modelPath = sprintf @"%s/SentimentModel.zip" baseModelsPath | |
let absolutePath relativePath = | |
let dataRoot = FileInfo(Reflection.Assembly.GetExecutingAssembly().Location) | |
Path.Combine(dataRoot.Directory.FullName, relativePath) | |
let buildTrainEvaluateAndSaveModel (mlContext : MLContext) = | |
// STEP 1: Common data loading configuration | |
let dataView = mlContext.Data.LoadFromTextFile<SentimentIssue>(dataPath, hasHeader = true) | |
let trainTestSplit = mlContext.Data.TrainTestSplit(dataView, testFraction=0.2) | |
let trainingDataView = trainTestSplit.TrainSet | |
let testDataView = trainTestSplit.TestSet | |
// STEP 2: Common data process configuration with pipeline data transformations | |
let dataProcessPipeline = mlContext.Transforms.Text.FeaturizeText("Features", "Text") | |
// STEP 3: Set the training algorithm, then create and config the modelBuilder | |
let trainer = mlContext.BinaryClassification.Trainers.FastTree(labelColumnName = "Label", featureColumnName = "Features") | |
let trainingPipeline = dataProcessPipeline.Append(trainer) | |
// STEP 4: Train the model fitting to the DataSet | |
printfn "=============== Training the model ===============" | |
let trainedModel = trainingPipeline.Fit(trainingDataView) | |
// STEP 5: Evaluate the model and show accuracy stats | |
printfn "===== Evaluating Model's accuracy with Test data =====" | |
let predictions = trainedModel.Transform testDataView | |
let metrics = mlContext.BinaryClassification.Evaluate(predictions, "Label", "Score") | |
Common.ConsoleHelper.printBinaryClassificationMetrics (trainer.ToString()) metrics | |
// STEP 6: Save/persist the trained model to a .ZIP file | |
use fs = new FileStream(modelPath, FileMode.Create, FileAccess.Write, FileShare.Write) | |
mlContext.Model.Save(trainedModel, trainingDataView.Schema, fs) | |
printfn "The model is saved to %s" (absolutePath modelPath) | |
// Test a single prediction by loding the model from the file, first. | |
let testSinglePrediction (mlContext : MLContext) = | |
let sampleStatement = { Label = true; Text = "Sentiment analysis" } | |
use stream = new FileStream(modelPath, FileMode.Open, FileAccess.Read, FileShare.Read) | |
let trainedModel,inputSchema = mlContext.Model.Load(stream) | |
// Create prediction engine related to the loaded trained model | |
let predEngine= mlContext.Model.CreatePredictionEngine<SentimentIssue, SentimentPrediction>(trainedModel) | |
// Score | |
let resultprediction = predEngine.Predict(sampleStatement) | |
printfn "=============== Single Prediction ===============" | |
printfn | |
"Text: %s | Prediction: %s sentiment | Probability: %f" | |
sampleStatement.Text | |
(if not resultprediction.Prediction then "Negative" else "Positive") | |
resultprediction.Probability | |
printfn "==================================================" | |
[<EntryPoint>] | |
let main argv = | |
//Set a random seed for repeatable/deterministic results across multiple trainings. | |
let mlContext = MLContext(seed = Nullable 1) | |
// Create, Train, Evaluate and Save a model | |
buildTrainEvaluateAndSaveModel mlContext | |
Common.ConsoleHelper.consoleWriteHeader "=============== End of training process ===============" | |
// Make a single test prediction loding the model from .ZIP file | |
testSinglePrediction mlContext | |
Common.ConsoleHelper.consoleWriteHeader "=============== End of process ===============" | |
0 |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
Just create a console app with F# in .NET 6 or higher, and inside create a directory called MLData that contains yelp_labelled it can be used in .txt, .csv and .tsv. The choice is up to your preference.