Skip to content

Instantly share code, notes, and snippets.

@indiv0
Created December 10, 2023 04:57
Show Gist options
  • Save indiv0/94a443f953622ca471cb68e366bd6406 to your computer and use it in GitHub Desktop.
Save indiv0/94a443f953622ca471cb68e366bd6406 to your computer and use it in GitHub Desktop.
day_6_1_gpu.rs
use std::borrow;
use wgpu::util::DeviceExt as _;
// ===========
// === Run ===
// ===========
pub fn run(s: &str) -> u64 {
pollster::block_on(async {
let numbers = parse_input(s);
execute_gpu(&numbers).await.unwrap()
})
}
// ===================
// === Parse Input ===
// ===================
#[inline(always)]
pub fn parse_input(s: &str) -> [u32; 8] {
const PREFIX: &str = "Distance: ";
const PREFIX_LEN: usize = PREFIX.len();
const SPACE_LEN: usize = 3;
const NL_LEN: usize = 1;
const LINE_LEN: usize = PREFIX_LEN + 3 + SPACE_LEN + 4 + SPACE_LEN + 4 + SPACE_LEN + 4 + NL_LEN;
const TOTAL_LEN: usize = LINE_LEN * 2;
let s = s.as_bytes();
let times = &s[PREFIX_LEN..=LINE_LEN];
let times = [
b_to_u16(times[1]) as u8 * 10 + b_to_u16(times[2]) as u8,
b_to_u16(times[8]) as u8 * 10 + b_to_u16(times[9]) as u8,
b_to_u16(times[15]) as u8 * 10 + b_to_u16(times[16]) as u8,
b_to_u16(times[22]) as u8 * 10 + b_to_u16(times[23]) as u8,
];
let distances = &s[(LINE_LEN + PREFIX_LEN)..TOTAL_LEN - NL_LEN];
let distances = [
b_to_u16(distances[0]) * 100 + b_to_u16(distances[1]) * 10 + b_to_u16(distances[2]),
b_to_u16(distances[6]) * 1000 + b_to_u16(distances[7]) * 100 + b_to_u16(distances[8]) * 10 + b_to_u16(distances[9]),
b_to_u16(distances[13]) * 1000 + b_to_u16(distances[14]) * 100 + b_to_u16(distances[15]) * 10 + b_to_u16(distances[16]),
b_to_u16(distances[20]) * 1000 + b_to_u16(distances[21]) * 100 + b_to_u16(distances[22]) * 10 + b_to_u16(distances[23]),
];
[
times[0] as _,
times[1] as _,
times[2] as _,
times[3] as _,
distances[0] as _,
distances[1] as _,
distances[2] as _,
distances[3] as _,
]
}
// Converts a byte of ASCII `[0-9]` to the equivalent `u8`.
#[inline(always)]
const fn b_to_u8(b: u8) -> u8 {
b - b'0'
}
// Converts a byte of ASCII `[0-9]` to the equivalent `u16`.
#[inline(always)]
const fn b_to_u16(b: u8) -> u16 {
b_to_u8(b) as u16
}
// ===================
// === Execute GPU ===
// ===================
async fn execute_gpu(numbers: &[u32]) -> Option<u64> {
// Instantiates instance of WebGPU
let instance = wgpu::Instance::default();
// `request_adapter` instantiates the general connection to the GPU
let adapter = instance
.request_adapter(&wgpu::RequestAdapterOptions::default())
.await?;
// `request_device` instantiates the feature specific connection to the GPU, defining some parameters,
// `features` being the available features.
let (device, queue) = adapter
.request_device(
&wgpu::DeviceDescriptor {
label: None,
features: wgpu::Features::empty(),
limits: wgpu::Limits::downlevel_defaults(),
},
None,
)
.await
.unwrap();
let wins = execute_gpu_inner(&device, &queue, numbers).await?;
// Multiply the values together.
Some(wins.iter().take(4).fold(1, |acc, n| acc * n) as _)
}
async fn execute_gpu_inner(
device: &wgpu::Device,
queue: &wgpu::Queue,
numbers: &[u32],
) -> Option<Vec<u32>> {
const SHADER: &str = r#"@group(0)
@binding(0)
var<storage, read_write> v_indices: array<u32>; // this is used as both input and output for convenience
// Let,
// a = acceleration
// v = velocity
// t_a = time for acceleration
// t_m = time for movement
// T = total time
// d = distance travelled
// D = race record distance + 1
//
// We have,
// d = (a * t_a) * t_m
// = (a * t_a) * (T - t_a)
// = a * t_a * T - a * t_a * t_a
// = -a * t_a^2 + a * t_a * T
// T = t_a + t_m => t_m = T - t_a
//
// The roots of f(t_a) = D gives us the values of t_a, between which, d will be greater or equal to
// D.
//
// t_a = ( aT +- sqrt( (aT)^2 - 4aD ) ) / 2
// = ( aT +- det ) / 2
//
// where det = sqrt( (aT)^2 - 4aD )
//
// => t1 = (aT - det) / 2
// and t2 = (aT + det) / 2
//
// The number of ways to win is the number of valid integer time values between the two roots =
// ceil(t1) - floor(t2) + 1
fn simulate_race(t_base: u32, d_base: u32) -> u32 {
//assert!(t <= 999);
//assert!(d <= 9999);
var t: f32 = f32(t_base);
var d: f32 = f32(d_base) + 0.5;
let at = t;
// FIXME [NP]: use square?
let det = sqrt(at * at - 4. * d);
//assert!(det <= 50.);
let t1 = ceil((at - det) / 2.);
let t2 = floor((at + det) / 2.);
let n = t2 - t1 + 1.;
//assert!(n.is_sign_positive());
return u32(n);
}
@compute
@workgroup_size(1)
fn main(@builtin(global_invocation_id) global_id: vec3<u32>) {
var y = global_id.x + 4u;
v_indices[global_id.x] = simulate_race(v_indices[global_id.x], v_indices[y]);
}"#;
// Loads the shader from WGSL
let cs_module = device.create_shader_module(wgpu::ShaderModuleDescriptor {
label: None,
source: wgpu::ShaderSource::Wgsl(borrow::Cow::Borrowed(SHADER)),
});
// Gets the size in bytes of the buffer.
let size = std::mem::size_of_val(numbers) as wgpu::BufferAddress;
// Instantiates buffer without data.
// `usage` of buffer specifies how it can be used:
// `BufferUsages::MAP_READ` allows it to be read (outside the shader).
// `BufferUsages::COPY_DST` allows it to be the destination of the copy.
let staging_buffer = device.create_buffer(&wgpu::BufferDescriptor {
label: None,
size,
usage: wgpu::BufferUsages::MAP_READ | wgpu::BufferUsages::COPY_DST,
mapped_at_creation: false,
});
// Instantiates buffer with data (`numbers`).
// Usage allowing the buffer to be:
// A storage buffer (can be bound within a bind group and thus available to a shader).
// The destination of a copy.
// The source of a copy.
let storage_buffer = device.create_buffer_init(&wgpu::util::BufferInitDescriptor {
label: Some("Storage Buffer"),
contents: bytemuck::cast_slice(numbers),
usage: wgpu::BufferUsages::STORAGE
| wgpu::BufferUsages::COPY_DST
| wgpu::BufferUsages::COPY_SRC,
});
// A bind group defines how buffers are accessed by shaders.
// It is to WebGPU what a descriptor set is to Vulkan.
// `binding` here refers to the `binding` of a buffer in the shader (`layout(set = 0, binding = 0) buffer`).
// A pipeline specifies the operation of a shader
// Instantiates the pipeline.
let compute_pipeline = device.create_compute_pipeline(&wgpu::ComputePipelineDescriptor {
label: None,
layout: None,
module: &cs_module,
entry_point: "main",
});
// Instantiates the bind group, once again specifying the binding of buffers.
let bind_group_layout = compute_pipeline.get_bind_group_layout(0);
let bind_group = device.create_bind_group(&wgpu::BindGroupDescriptor {
label: None,
layout: &bind_group_layout,
entries: &[wgpu::BindGroupEntry {
binding: 0,
resource: storage_buffer.as_entire_binding(),
}],
});
// A command encoder executes one or many pipelines.
// It is to WebGPU what a command buffer is to Vulkan.
let mut encoder =
device.create_command_encoder(&wgpu::CommandEncoderDescriptor { label: None });
{
let mut cpass = encoder.begin_compute_pass(&wgpu::ComputePassDescriptor {
label: None,
timestamp_writes: None,
});
cpass.set_pipeline(&compute_pipeline);
cpass.set_bind_group(0, &bind_group, &[]);
cpass.insert_debug_marker("compute race iterations");
cpass.dispatch_workgroups(numbers.len() as u32 / 2, 1, 1); // Number of cells to run, the (x,y,z) size of item being processed
}
// Sets adds copy operation to command encoder.
// Will copy data from storage buffer on GPU to staging buffer on CPU.
encoder.copy_buffer_to_buffer(&storage_buffer, 0, &staging_buffer, 0, size);
// Submits command encoder for processing
queue.submit(Some(encoder.finish()));
// Note that we're not calling `.await` here.
let buffer_slice = staging_buffer.slice(..);
// Sets the buffer up for mapping, sending over the result of the mapping back to us when it is finished.
let (sender, receiver) = flume::bounded(1);
buffer_slice.map_async(wgpu::MapMode::Read, move |v| sender.send(v).unwrap());
// Poll the device in a blocking manner so that our future resolves.
// In an actual application, `device.poll(...)` should
// be called in an event loop or on another thread.
device.poll(wgpu::Maintain::Wait);
// Awaits until `buffer_future` can be read from
if let Ok(Ok(())) = receiver.recv_async().await {
// Gets contents of buffer
let data = buffer_slice.get_mapped_range();
// Since contents are got in bytes, this converts these bytes back to u32
let result = bytemuck::cast_slice(&data).to_vec();
// With the current interface, we have to make sure all mapped views are
// dropped before we unmap the buffer.
drop(data);
staging_buffer.unmap(); // Unmaps buffer from memory
// If you are familiar with C++ these 2 lines can be thought of similarly to:
// delete myPointer;
// myPointer = NULL;
// It effectively frees the memory
// Returns data from buffer
Some(result)
} else {
panic!("failed to run compute on gpu!")
}
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment