#gpu #kernel #rust
- GPU kernels in Rust
- Comptime
- Automatic vectorization
- Instruction and shape specialization
- Loop unrolling
| symbols_real = [] | |
| symbols_imag = [] | |
| base_freq = 100 # This is in Hertz | |
| symbol_rate = 23.6 | |
| num_tones = 16 # This particular MFSK modulation contains 16 tones i.e. (500-100/23.6) =~ 16.95 | |
| # But its apparently odd to have an odd number of frequencies | |
| # So, I settled on '16'. Turns out that was right. | |
| tone_zero = int(round(base_freq/symbol_rate)) # the first or lowest tone in the sequence |
| tokens_real = [] | |
| tokens_imag = [] | |
| for i in range(0, len(symbols_real)-1): | |
| if symbols_real[i] == 15: | |
| token = symbols_real[i:i+8] | |
| tokens_real.append(token) | |
| for i in range(0, len(symbols_imag)-1): | |
| if symbols_imag[i] == 15: |
| real_tokens = [] | |
| imag_tokens = [] | |
| for i in range(0, len(symbols_real)): | |
| if symbols_real[i] == 15: | |
| token = symbols_real[i:i+9] | |
| checksum = np.sum(token) % 16 | |
| if checksum <= 4 or checksum >= 13 : | |
| q = (''.join([format(symbol, 'x') for symbol in token[1:7]])) | |
| # print(q) |
| #[derive(Debug)] | |
| #[repr(C)] | |
| /// Constructs a device-tree `node`, given a name and buffer. The buffer must be adequately sized. | |
| pub struct RawNodeConstructor<'a> { | |
| fdt_begin_node: u32, | |
| node_name: &'a [u8], | |
| } | |
| impl<'a> RawNodeConstructor<'a> { | |
| pub fn make_raw_node(buf: &'a mut [u8], name: &'a str) -> Result<Self> { |
| [START_KERNEL_COMPILATION] | |
| name: gelu::gelu_array::GeluArray< | |
| cubecl_core::frontend::element::float::F32, | |
| cubecl_wgpu::runtime::WgpuRuntime, | |
| > | |
| cube_dim: (4, 1, 1) | |
| shared_memory: 0 bytes | |
| info: ( | |
| KernelSettings { |
| // 🦀 Generated by Rust Macro Expand 🦀 | |
| // 🦀 Timestamp: 16/09/2024, 12:50:30 🦀 | |
| #![allow(warnings)] | |
| #![feature(print_internals)] | |
| #![feature(panic_internals)] | |
| #![feature(prelude_import)] | |
| #[prelude_import] | |
| use std::prelude::rust_2021::*; | |
| #[macro_use] | |
| extern crate std; |
| KernelDefinition { inputs: [Binding { location: Storage, visibility: Read, item: Item { elem: Float(F32), vectorization: Some(1) | |
| }, size: None | |
| } | |
| ], outputs: [Binding { location: Storage, visibility: ReadWrite, item: Item { elem: Float(F32), vectorization: Some(1) | |
| }, size: None | |
| } | |
| ], named: [("info", Binding { location: Storage, visibility: Read, item: Item { elem: UInt, vectorization: None | |
| }, size: None | |
| }) | |
| ], cube_dim: CubeDim { x: 4, y: 1, z: 1 |
Plan for Building a Backend in Cranelift
/cranelift/codegen/src/isa, where each backend resides.TargetIsa: Specifies the target architecture’s interface.LowerBackend: Manages instruction lowering for the architecture.#mlir #llvm #compiler