use core::marker::PhantomData;
use core::time::Duration;
use alloc::boxed::Box;
use alloc::string::ToString;
use alloc::vec::Vec;
use burn_common::benchmark::{Benchmark, BenchmarkDurations};
use crate::channel::ComputeChannel;
use crate::client::ComputeClient;
use crate::server::ComputeServer;
use crate::tune::{AutotuneOperation, AutotuneOperationSet, TuneBenchmark, TuneCache};
#[derive(Debug, Default)]
/// Executes autotune benchmarking and caching
pub struct Tuner<S: ComputeServer, C> {
tune_cache: TuneCache<S::AutotuneKey>,
_channel: PhantomData<C>,
}
impl<S: ComputeServer, C: ComputeChannel<S>> Tuner<S, C> {
/// Returns a tuner with empty cache
pub fn new() -> Self {
Self {
tune_cache: TuneCache::new(),
_channel: PhantomData,
}
}
pub(crate) fn execute_autotune(
&mut self,
autotune_operation_set: Box<dyn AutotuneOperationSet<S::AutotuneKey>>,
client: &ComputeClient<S, C>,
) {
let operation = match self.tune_cache.try_cache(autotune_operation_set) {
super::TuneCacheResult::Hit(ops) => ops,
super::TuneCacheResult::Miss(set) => self.autotuning(set, client),
};
AutotuneOperation::execute(operation);
}
fn autotuning(
&mut self,
autotune_operation_set: Box<dyn AutotuneOperationSet<S::AutotuneKey>>,
client: &ComputeClient<S, C>,
) -> Box<dyn AutotuneOperation> {
let key = autotune_operation_set.key();
let autotunables = autotune_operation_set.autotunables();
let mut names = Vec::with_capacity(autotunables.len());
// Run all autotune benchmarks
let results: Vec<BenchmarkDurations> = autotunables
.into_iter()
.map(|op| {
names.push(op.name().to_string());
self.run_benchmark(op, client)
})
.collect();
for (name, result) in names.iter().zip(results.iter()) {
log::info!("Benchmark result {name}-{key} => {result}");
}
// Finds the fastest operation, stores it and returns it
let fastest_index = self.find_fastest(results);
let fastest_name = names.get(fastest_index).unwrap();
log::info!("Fastest result {fastest_name}-{key}");
self.tune_cache.cache_insert(key, fastest_index);
match self.tune_cache.try_cache(autotune_operation_set) {
super::TuneCacheResult::Hit(ops) => ops,
super::TuneCacheResult::Miss(_) => panic!("We just inserted, should not miss"),
}
}
fn run_benchmark(
&mut self,
operation: Box<dyn AutotuneOperation>,
client: &ComputeClient<S, C>,
) -> BenchmarkDurations {
TuneBenchmark::new(operation, client.clone()).run()
}
fn find_fastest(&self, results: Vec<BenchmarkDurations>) -> usize {
let mut smallest_duration = Duration::MAX;
let mut fastest_tunable = None;
for (i, result) in results.into_iter().enumerate() {
let duration = result.median_duration();
if duration < smallest_duration {
smallest_duration = duration;
fastest_tunable = Some(i);
}
}
fastest_tunable.expect("At least one kernel needed. ")
}
}
Associated Context | |
---|---|
Type | Code Snippet ( .rs ) |
Associated Tags | Tune Benchmarking Compute Server Tune Cache Autotune Operation Set Client-Side Rendering (MVC) Hit/Miss Mode MV Interaction Scalable Data Warp crystal-lang serde nim-lang |
💡 Smart Description | This code defines a Tuner class that uses the Crawl library to execute autotune benchmarking and caching. It also includes methods for testing, debugging, logging, monitoring, or performance of Autotune operations in different types such as key/value pairs The code snippet defines a struct called "Tuner" that is responsible for executing autotune benchmarking and caching. It has a cache for storing the results of autotune operations. The "execute_autotune" method takes an autot |
🔎 Suggested Searches | ComputeServer Autotune benchmarking and caching Tuner with autotuning operation set in ComputeServer C Generate tunables for compute server c Using Terraform to create a new instance of ComputeServerC Performing automatic testing Compute Server Rust Tuner struct execute_autotune method Rust Tuner struct autotuning method Rust Tuner struct run_benchmark method Rust Tuner struct find_fastest method Rust Tuner struct new method |
Related Links | https://kubernetes.io/docs/tasks/access-application-cluster/port-forward-access-application-cluster/ https://learn.microsoft.com/en-us/windows-server/security/tls/tls-registry-settings https://doc.rust-lang.org/alloc/vec/struct.Vec.html https://doc.rust-lang.org/core/marker/struct.PhantomData.html https://doc.rust-lang.org/alloc/boxed/struct.Box.html https://doc.rust-lang.org/core/time/struct.Duration.html |
Related People | Ankesh Bharti |
Sensitive Information | No Sensitive Information Detected |
Shareable Link | https://user-b3bc8d01-9ad8-45e2-893b-fe22c6a3662d-gnltj66l2a-uc.a.run.app/?p=5369488e6b |