-
-
Save YuriyGuts/7201f4f131398913b68a9d9bd85b4566 to your computer and use it in GitHub Desktop.
// This is an example of a C# application that uploads a CSV dataset to DataRobot, | |
// runs a classification project in autopilot mode and prints the leaderboard | |
// using the V2 REST API. Here you can also see an example of interacting with the | |
// asynchronous API routes (upload project, set target). | |
// Since the WebClient class has limited HTTP functionality, we use the newer | |
// Microsoft.Net.Http.HttpClient class. Its methods are async; as a result, | |
// the C# code uses asynchronous operations too. | |
// You may need to install the following packages for this code to work: | |
// * Newtonsoft.Json (aka Json.NET) | |
// * Microsoft.Net.Http | |
// * Microsoft.CSharp | |
using System; | |
using System.Collections.Generic; | |
using System.IO; | |
using System.Linq; | |
using System.Net; | |
using System.Net.Http; | |
using System.Text; | |
using System.Threading.Tasks; | |
using Newtonsoft.Json; | |
namespace DataRobotV2ApiAutopilotDemo | |
{ | |
class Program | |
{ | |
private static string apiRootUrl = "https://app.datarobot.com/api/v2"; | |
private static string apiToken = "token1234567890abcdef"; | |
private static string dataset = "/path/to/dataset.csv"; | |
private static string target = "targetColumnName"; | |
private static TimeSpan AsyncPingDelay = TimeSpan.FromSeconds(5); | |
private static async Task PrintHttpError(HttpResponseMessage response) | |
{ | |
Console.WriteLine($"ERROR! HTTP code {(int)response.StatusCode}, content: {await response.Content.ReadAsStringAsync()}"); | |
} | |
private static async Task<string> WaitForAsyncResolution(HttpClient client, string url) | |
{ | |
Console.WriteLine($"Waiting for job {url} to complete..."); | |
while (true) | |
{ | |
var response = await client.GetAsync(url); | |
Console.Write($"Job status code: {(int)response.StatusCode}"); | |
if (response.StatusCode == HttpStatusCode.SeeOther) | |
{ | |
var resultLocation = response.Headers.GetValues("Location").First(); | |
Console.WriteLine($" - job complete, resolved to {resultLocation}"); | |
return resultLocation; | |
} | |
else | |
{ | |
Console.WriteLine(" - not done yet, will check later"); | |
await Task.Delay(AsyncPingDelay); | |
} | |
} | |
} | |
private static async Task<string> UploadDataset(HttpClient client, string datasetFileName) | |
{ | |
Console.WriteLine($"Creating a project from dataset: {datasetFileName}"); | |
var requestUrl = $"{apiRootUrl}/projects/"; | |
var projectName = Path.GetFileName(datasetFileName); | |
using (var content = new MultipartFormDataContent()) | |
{ | |
content.Add(new ByteArrayContent(File.ReadAllBytes(datasetFileName)), "file", projectName); | |
var response = await client.PostAsync(requestUrl, content); | |
if (!response.IsSuccessStatusCode) | |
{ | |
await PrintHttpError(response); | |
} | |
var statusUrl = response.Headers.GetValues("Location").First(); | |
var projectUrl = await WaitForAsyncResolution(client, statusUrl); | |
Console.WriteLine($"Project created at URL: {projectUrl}"); | |
return projectUrl; | |
} | |
} | |
private static string GetProjectIdFromUrl(string projectUrl) | |
{ | |
var projectUrlComponents = projectUrl.Split('/'); | |
var projectId = projectUrlComponents[projectUrlComponents.Length - 2]; | |
return projectId; | |
} | |
private static async Task SetTarget(HttpClient client, string projectId, string targetName) | |
{ | |
Console.WriteLine($"Setting target '{targetName}' for project {projectId}"); | |
var requestUrl = $"{apiRootUrl}/projects/{projectId}/aim/"; | |
var method = new HttpMethod("PATCH"); | |
var payload = new Dictionary<string, object>{ | |
{"mode", "auto"}, | |
{"target", targetName}, | |
{"metric", "LogLoss"}, | |
// These fields are not required but we specify them just to run the project faster. | |
{"quickrun", true}, | |
{"cvMethod", "random"}, | |
{"validationType", "TVH"}, | |
{"validationPct", 16}, | |
{"holdoutPct", 20}, | |
}; | |
var request = new HttpRequestMessage(method, requestUrl) | |
{ | |
Content = new StringContent(JsonConvert.SerializeObject(payload), Encoding.UTF8, "application/json") | |
}; | |
var response = await client.SendAsync(request); | |
if (!response.IsSuccessStatusCode) | |
{ | |
await PrintHttpError(response); | |
} | |
var statusUrl = response.Headers.GetValues("Location").First(); | |
await WaitForAsyncResolution(client, statusUrl); | |
} | |
private static async Task WaitForAutopilot(HttpClient client, string projectId) | |
{ | |
Console.WriteLine("Waiting for autopilot to complete..."); | |
while (true) | |
{ | |
Console.Write("Checking autopilot status... "); | |
var requestUrl = $"{apiRootUrl}/projects/{projectId}/status/"; | |
var response = await client.GetAsync(requestUrl); | |
if (!response.IsSuccessStatusCode) | |
{ | |
await PrintHttpError(response); | |
} | |
dynamic responseJson = JsonConvert.DeserializeObject(await response.Content.ReadAsStringAsync()); | |
var isAutopilotDone = responseJson.autopilotDone == true; | |
if (isAutopilotDone) | |
{ | |
Console.WriteLine("done"); | |
break; | |
} | |
else | |
{ | |
Console.WriteLine("not done yet, will check later"); | |
await Task.Delay(AsyncPingDelay); | |
} | |
} | |
} | |
private static async Task PrintLeaderboard(HttpClient client, string projectId) | |
{ | |
Console.WriteLine("Retrieving the leaderboard..."); | |
var requestUrl = $"{apiRootUrl}/projects/{projectId}/models/"; | |
var response = await client.GetAsync(requestUrl); | |
if (!response.IsSuccessStatusCode) | |
{ | |
await PrintHttpError(response); | |
} | |
dynamic responseJson = JsonConvert.DeserializeObject(await response.Content.ReadAsStringAsync()); | |
foreach (dynamic model in responseJson) | |
{ | |
Console.WriteLine($"Model: {model.modelType}, Validation score: {model.metrics.LogLoss.validation}"); | |
} | |
} | |
public static async Task RunDataRobotAutopilotDemo() | |
{ | |
HttpClientHandler httpClientHandler = new HttpClientHandler | |
{ | |
// We will check for HTTP 303 status codes in the async routes manually. | |
AllowAutoRedirect = false | |
}; | |
using (var client = new HttpClient(httpClientHandler)) | |
{ | |
client.DefaultRequestHeaders.Add("Authorization", $"Token {apiToken}"); | |
// POST /projects/ | |
var projectUrl = await UploadDataset(client, dataset); | |
var projectId = GetProjectIdFromUrl(projectUrl); | |
// PATCH /projects/projectId/aim/ | |
await SetTarget(client, projectId, target); | |
// GET /projects/projectId/status/ | |
await WaitForAutopilot(client, projectId); | |
// GET /projects/projectId/models/ | |
await PrintLeaderboard(client, projectId); | |
} | |
} | |
public static void Main(string[] args) | |
{ | |
Task.Run(async () => await RunDataRobotAutopilotDemo()).GetAwaiter().GetResult(); | |
Console.WriteLine("Done! Press any key to exit."); | |
Console.ReadKey(true); | |
} | |
} | |
} |
Sorry, I got it working by removing the quickrun flag
Actually, where do you find the list of all REST API endpoints ? I could only find the deploy API details in the official doc.
Can you create an example using manual auto pilot and fix it to use a specific algo?
@iluveu28 The complete REST API documentation for each release is available at the Support Portal, available to DataRobot customers: https://support.datarobot.com/hc/en-us/sections/203291123-Referenced-API-Files
It should also be distributed in the release documentation for on-premise releases. Unfortunately, I'm not at liberty to disclose the details of the enterprise product in unofficial channels. If you are a DataRobot customer or are affiliated with a DataRobot customer, I would highly recommend going through the official support channels for technical questions, which will ensure a more timely response and a better quality of support. In addition, we've recently launched the Community Portal that is also maintained by DataRobot employees and is a good place for a wide variety of product questions.
Thanks Yuri, is there a chance you could extend your sample code above to demonstrate manual mode? Btw I added you to linkedIn, I'm a big customer of yours, DR is a great product! :)
@YuriyGuts Do you know why does the /models API only returns maximum of 10 models vs 20++ models via the web portal? Is there any way I can increase the limit?