-
-
Save YuriyGuts/7201f4f131398913b68a9d9bd85b4566 to your computer and use it in GitHub Desktop.
// This is an example of a C# application that uploads a CSV dataset to DataRobot, | |
// runs a classification project in autopilot mode and prints the leaderboard | |
// using the V2 REST API. Here you can also see an example of interacting with the | |
// asynchronous API routes (upload project, set target). | |
// Since the WebClient class has limited HTTP functionality, we use the newer | |
// Microsoft.Net.Http.HttpClient class. Its methods are async; as a result, | |
// the C# code uses asynchronous operations too. | |
// You may need to install the following packages for this code to work: | |
// * Newtonsoft.Json (aka Json.NET) | |
// * Microsoft.Net.Http | |
// * Microsoft.CSharp | |
using System; | |
using System.Collections.Generic; | |
using System.IO; | |
using System.Linq; | |
using System.Net; | |
using System.Net.Http; | |
using System.Text; | |
using System.Threading.Tasks; | |
using Newtonsoft.Json; | |
namespace DataRobotV2ApiAutopilotDemo | |
{ | |
class Program | |
{ | |
private static string apiRootUrl = "https://app.datarobot.com/api/v2"; | |
private static string apiToken = "token1234567890abcdef"; | |
private static string dataset = "/path/to/dataset.csv"; | |
private static string target = "targetColumnName"; | |
private static TimeSpan AsyncPingDelay = TimeSpan.FromSeconds(5); | |
private static async Task PrintHttpError(HttpResponseMessage response) | |
{ | |
Console.WriteLine($"ERROR! HTTP code {(int)response.StatusCode}, content: {await response.Content.ReadAsStringAsync()}"); | |
} | |
private static async Task<string> WaitForAsyncResolution(HttpClient client, string url) | |
{ | |
Console.WriteLine($"Waiting for job {url} to complete..."); | |
while (true) | |
{ | |
var response = await client.GetAsync(url); | |
Console.Write($"Job status code: {(int)response.StatusCode}"); | |
if (response.StatusCode == HttpStatusCode.SeeOther) | |
{ | |
var resultLocation = response.Headers.GetValues("Location").First(); | |
Console.WriteLine($" - job complete, resolved to {resultLocation}"); | |
return resultLocation; | |
} | |
else | |
{ | |
Console.WriteLine(" - not done yet, will check later"); | |
await Task.Delay(AsyncPingDelay); | |
} | |
} | |
} | |
private static async Task<string> UploadDataset(HttpClient client, string datasetFileName) | |
{ | |
Console.WriteLine($"Creating a project from dataset: {datasetFileName}"); | |
var requestUrl = $"{apiRootUrl}/projects/"; | |
var projectName = Path.GetFileName(datasetFileName); | |
using (var content = new MultipartFormDataContent()) | |
{ | |
content.Add(new ByteArrayContent(File.ReadAllBytes(datasetFileName)), "file", projectName); | |
var response = await client.PostAsync(requestUrl, content); | |
if (!response.IsSuccessStatusCode) | |
{ | |
await PrintHttpError(response); | |
} | |
var statusUrl = response.Headers.GetValues("Location").First(); | |
var projectUrl = await WaitForAsyncResolution(client, statusUrl); | |
Console.WriteLine($"Project created at URL: {projectUrl}"); | |
return projectUrl; | |
} | |
} | |
private static string GetProjectIdFromUrl(string projectUrl) | |
{ | |
var projectUrlComponents = projectUrl.Split('/'); | |
var projectId = projectUrlComponents[projectUrlComponents.Length - 2]; | |
return projectId; | |
} | |
private static async Task SetTarget(HttpClient client, string projectId, string targetName) | |
{ | |
Console.WriteLine($"Setting target '{targetName}' for project {projectId}"); | |
var requestUrl = $"{apiRootUrl}/projects/{projectId}/aim/"; | |
var method = new HttpMethod("PATCH"); | |
var payload = new Dictionary<string, object>{ | |
{"mode", "auto"}, | |
{"target", targetName}, | |
{"metric", "LogLoss"}, | |
// These fields are not required but we specify them just to run the project faster. | |
{"quickrun", true}, | |
{"cvMethod", "random"}, | |
{"validationType", "TVH"}, | |
{"validationPct", 16}, | |
{"holdoutPct", 20}, | |
}; | |
var request = new HttpRequestMessage(method, requestUrl) | |
{ | |
Content = new StringContent(JsonConvert.SerializeObject(payload), Encoding.UTF8, "application/json") | |
}; | |
var response = await client.SendAsync(request); | |
if (!response.IsSuccessStatusCode) | |
{ | |
await PrintHttpError(response); | |
} | |
var statusUrl = response.Headers.GetValues("Location").First(); | |
await WaitForAsyncResolution(client, statusUrl); | |
} | |
private static async Task WaitForAutopilot(HttpClient client, string projectId) | |
{ | |
Console.WriteLine("Waiting for autopilot to complete..."); | |
while (true) | |
{ | |
Console.Write("Checking autopilot status... "); | |
var requestUrl = $"{apiRootUrl}/projects/{projectId}/status/"; | |
var response = await client.GetAsync(requestUrl); | |
if (!response.IsSuccessStatusCode) | |
{ | |
await PrintHttpError(response); | |
} | |
dynamic responseJson = JsonConvert.DeserializeObject(await response.Content.ReadAsStringAsync()); | |
var isAutopilotDone = responseJson.autopilotDone == true; | |
if (isAutopilotDone) | |
{ | |
Console.WriteLine("done"); | |
break; | |
} | |
else | |
{ | |
Console.WriteLine("not done yet, will check later"); | |
await Task.Delay(AsyncPingDelay); | |
} | |
} | |
} | |
private static async Task PrintLeaderboard(HttpClient client, string projectId) | |
{ | |
Console.WriteLine("Retrieving the leaderboard..."); | |
var requestUrl = $"{apiRootUrl}/projects/{projectId}/models/"; | |
var response = await client.GetAsync(requestUrl); | |
if (!response.IsSuccessStatusCode) | |
{ | |
await PrintHttpError(response); | |
} | |
dynamic responseJson = JsonConvert.DeserializeObject(await response.Content.ReadAsStringAsync()); | |
foreach (dynamic model in responseJson) | |
{ | |
Console.WriteLine($"Model: {model.modelType}, Validation score: {model.metrics.LogLoss.validation}"); | |
} | |
} | |
public static async Task RunDataRobotAutopilotDemo() | |
{ | |
HttpClientHandler httpClientHandler = new HttpClientHandler | |
{ | |
// We will check for HTTP 303 status codes in the async routes manually. | |
AllowAutoRedirect = false | |
}; | |
using (var client = new HttpClient(httpClientHandler)) | |
{ | |
client.DefaultRequestHeaders.Add("Authorization", $"Token {apiToken}"); | |
// POST /projects/ | |
var projectUrl = await UploadDataset(client, dataset); | |
var projectId = GetProjectIdFromUrl(projectUrl); | |
// PATCH /projects/projectId/aim/ | |
await SetTarget(client, projectId, target); | |
// GET /projects/projectId/status/ | |
await WaitForAutopilot(client, projectId); | |
// GET /projects/projectId/models/ | |
await PrintLeaderboard(client, projectId); | |
} | |
} | |
public static void Main(string[] args) | |
{ | |
Task.Run(async () => await RunDataRobotAutopilotDemo()).GetAwaiter().GetResult(); | |
Console.WriteLine("Done! Press any key to exit."); | |
Console.ReadKey(true); | |
} | |
} | |
} |
Hi @iluveu28,
Yes, this is for creating a project and running the autopilot to find the best model. For predictions, there are multiple options:
- Using model deployments (recommended). In this case, you can use something like this to make predictions (assuming a .csv file with the data to score):
using System;
using System.Net;
using System.Text;
using System.IO;
namespace DataRobot.PredictionAPI.Examples.CSharp
{
class Program
{
private static string urlTemplate = "https://{0}/predApi/v1.0/deployments/{1}/predictions";
private static string predictionsHost = "INSERT-SERVER-URL-HERE.datarobot.com";
private static string userName = "[email protected]";
private static string userApiToken = "INSERT-DATAROBOT-USER-API-TOKEN-HERE";
private static string datarobotKey = "INSERT-PREDICTION-SERVER-SECRET-HERE";
private static string deploymentId = "INSERT-DEPLOYMENT-ID-HERE";
static int Main(string[] args)
{
if (args.Length == 0)
{
Console.Error.WriteLine("Error: please provide the path to the dataset to score.");
return 1;
}
var filePath = args[0];
var uriString = string.Format(urlTemplate, predictionsHost, deploymentId);
using (var client = new WebClient())
{
var authToken = string.Format("{0}:{1}", userName, userApiToken);
var authTokenBytes = Encoding.ASCII.GetBytes(authToken);
client.Headers.Add("Content-Type", "text/csv; charset=utf-8");
client.Encoding = Encoding.UTF8;
client.Headers.Add("Authorization", "Basic " + Convert.ToBase64String(authTokenBytes));
client.Headers.Add("datarobot-key", datarobotKey);
try
{
byte[] responseBytes = client.UploadFile(uriString, "POST", filePath);
var responseStr = Encoding.UTF8.GetString(responseBytes);
Console.WriteLine(responseStr);
}
catch (WebException ex)
{
Console.Error.WriteLine("Error when requesting predictions: " + ex.Message);
var resp = new StreamReader(ex.Response.GetResponseStream()).ReadToEnd();
Console.Error.WriteLine(resp);
return 2;
}
}
return 0;
}
}
}
- Predicting on the modeling workers (equivalent to drag-and-drop predictions on the UI). This is not recommended for production-sensitive workloads, but possible. In this case, you'll have to call the following API routes:
# Upload a dataset for predictions (async)
POST /api/v2/projects/(projectId)/predictionDatasets/fileUploads/
# Launch a prediction job (async)
POST /api/v2/projects/(projectId)/predictions/
# Retrieve predictions when the job is completed
GET /api/v2/projects/(projectId)/predictions/(predictionId)/
Unfortunately, I don't have a C# example for this method but if you're a DataRobot customer, you can contact your CFDS or support if you need assistance with this. They'll be able to provide more help depending on what kind of environment you're using.
Thanks a lot for your swift response. What I'm trying to achieve is that, I will generate the training model via the UI every now and then whenever the accuracy drops.
The prediction part is where my customers will use. I will create a UI and a web api that will hit the DataRobot API. The customers will upload their prediction csv file via my custom UI which will be processed by DataRobot using the pre-trained model above and the results will be returned either online via polling the DataRobot API every 5sec, or offline later via email or stored in a shared drive somewhere. With this use case, would option 1 suffice or do I have to go with option 2?
@iluveu28 I would recommend option 1 for all cases, and option 2 only if it is not possible to use option 1. Model deployments, in addition to making predictions with lower latency, will allow you to track service health, monitor models for data drift / prediction drift, and allow easier model replacement. Option 2 does not have those features and is better suited for one-off batch predictions.
Do you happen to have codes for option 1 which uses HttpClient instead of WebClient? The csv file will have to be in bytes or stream.
@YuriyGuts Do you know why does the /models API only returns maximum of 10 models vs 20++ models via the web portal? Is there any way I can increase the limit?
Sorry, I got it working by removing the quickrun flag
Actually, where do you find the list of all REST API endpoints ? I could only find the deploy API details in the official doc.
Can you create an example using manual auto pilot and fix it to use a specific algo?
@iluveu28 The complete REST API documentation for each release is available at the Support Portal, available to DataRobot customers: https://support.datarobot.com/hc/en-us/sections/203291123-Referenced-API-Files
It should also be distributed in the release documentation for on-premise releases. Unfortunately, I'm not at liberty to disclose the details of the enterprise product in unofficial channels. If you are a DataRobot customer or are affiliated with a DataRobot customer, I would highly recommend going through the official support channels for technical questions, which will ensure a more timely response and a better quality of support. In addition, we've recently launched the Community Portal that is also maintained by DataRobot employees and is a good place for a wide variety of product questions.
Thanks Yuri, is there a chance you could extend your sample code above to demonstrate manual mode? Btw I added you to linkedIn, I'm a big customer of yours, DR is a great product! :)
This is the training part right? Do you have sample codes for the predict part?