Last active
December 31, 2024 05:22
-
-
Save sandersaares/e0f059e7ae553e85ee64c34c334f50e6 to your computer and use it in GitHub Desktop.
vectorized-rust
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
pub fn criterion_benchmark(c: &mut Criterion) { | |
const X_A: u64 = 94; | |
const X_B: u64 = 22; | |
const X: u64 = 8400 + 94 * 123456; | |
const Y_A: u64 = 34; | |
const Y_B: u64 = 67; | |
const Y: u64 = 5400 + 34 * 123456; | |
c.bench_function("naive", |b| { | |
b.iter(|| { | |
_ = black_box(naive_solver::solve( | |
black_box(X_A), | |
black_box(X_B), | |
black_box(X), | |
black_box(Y_A), | |
black_box(Y_B), | |
black_box(Y), | |
)); | |
}); | |
}); | |
c.bench_function("faster_4_u64", |b| { | |
b.iter(|| { | |
_ = black_box(faster_solver_u64::solve::<4>( | |
black_box(X_A), | |
black_box(X_B), | |
black_box(X), | |
black_box(Y_A), | |
black_box(Y_B), | |
black_box(Y), | |
)); | |
}); | |
}); | |
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
pub fn solve<const CHUNK_SIZE: usize>(x_a: u64, x_b: u64, x: u64, y_a: u64, y_b: u64, y: u64, | |
) -> Option<Solution> | |
where | |
LaneCount<CHUNK_SIZE>: SupportedLaneCount, | |
{ | |
// .. snip .. | |
let max_a = (x / x_a).min(y / y_a); | |
let full_chunk_count = max_a / CHUNK_SIZE as u64; | |
for chunk_index in 0..full_chunk_count { | |
// .. snip .. | |
} | |
// .. snip .. | |
None | |
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
pub fn solve<const CHUNK_SIZE: usize>(x_a: u64, x_b: u64, x: u64, y_a: u64, y_b: u64, y: u64, | |
) -> Option<Solution> | |
where | |
LaneCount<CHUNK_SIZE>: SupportedLaneCount, | |
{ | |
// .. snip .. | |
let max_a = (x / x_a).min(y / y_a); | |
let full_chunk_count = max_a / CHUNK_SIZE as u64; | |
// .. snip .. | |
// Values for A that fall into the final partial chunk. May be an empty range. | |
let mut partial_chunk_candidates = (CHUNK_SIZE as u64 * full_chunk_count)..=max_a; | |
// If there was any part of the sequence that didn't fit into a full chunk, | |
// we process it with the naive algorithm. | |
partial_chunk_candidates.find_map(|a| evaluate_naive(a, x_a, x_b, x, y_a, y_b, y)) | |
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
pub fn solve<const CHUNK_SIZE: usize>(x_a: u64, x_b: u64, x: u64, y_a: u64, y_b: u64, y: u64, | |
) -> Option<Solution> | |
where | |
LaneCount<CHUNK_SIZE>: SupportedLaneCount, | |
{ | |
// We will prepare vector versions of our constants | |
// as vector operations require vectors for input. | |
let x_a_vec = Simd::splat(x_a); | |
let x_b_vec = Simd::splat(x_b); | |
let x_vec = Simd::splat(x); | |
let y_a_vec = Simd::splat(y_a); | |
let y_b_vec = Simd::splat(y_b); | |
let y_vec = Simd::splat(y); | |
// .. snip .. | |
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
pub fn solve(x_a: u64, x_b: u64, x: u64, y_a: u64, y_b: u64, y: u64) -> Option<Solution> { | |
let max_a = (x / x_a).min(y / y_a); | |
(0..=max_a).find_map(|a| evaluate_naive(a, x_a, x_b, x, y_a, y_b, y)) | |
} | |
pub(crate) fn evaluate_naive( | |
a_candidate: u64, | |
x_a: u64, | |
x_b: u64, | |
x: u64, | |
y_a: u64, | |
y_b: u64, | |
y: u64, | |
) -> Option<Solution> { | |
// If we assume the given A, what is the expected value of Xb * B? | |
let b_component_x = x - x_a * a_candidate; | |
// If not evenly divisible, there is no solution with this A. | |
if b_component_x % x_b != 0 { | |
return None; | |
} | |
let b_component_y = y - y_a * a_candidate; | |
// If not evenly divisible, there is no solution with this A. | |
if b_component_y % y_b != 0 { | |
return None; | |
} | |
let b_x = b_component_x / x_b; | |
let b_y = b_component_y / y_b; | |
// If values for B do not match, there is no solution with this A. | |
if b_x != b_y { | |
return None; | |
} | |
Some(Solution { | |
a: a_candidate, | |
b: b_x, | |
}) | |
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#[derive(Debug, PartialEq, Eq)] | |
pub struct Solution { | |
pub a: u64, | |
pub b: u64, | |
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
fn main() { | |
let start = Instant::now(); | |
let solution = faster_solver_u64::solve::<4>( | |
black_box(26), | |
black_box(67), | |
black_box(12748 + 10000000000000), | |
black_box(66), | |
black_box(21), | |
black_box(12176 + 10000000000000), | |
); | |
assert_eq!( | |
solution, | |
Some(Solution { | |
a: 118679050709, | |
b: 103199174542 | |
}) | |
); | |
let duration = start.elapsed(); | |
println!("Time elapsed: {} milliseconds", duration.as_millis()); | |
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
fn evaluate_chunk<const CHUNK_SIZE: usize>( | |
a_candidates: Simd<u64, CHUNK_SIZE>, | |
x_a: Simd<u64, CHUNK_SIZE>, | |
x_b: Simd<u64, CHUNK_SIZE>, | |
x: Simd<u64, CHUNK_SIZE>, | |
y_a: Simd<u64, CHUNK_SIZE>, | |
y_b: Simd<u64, CHUNK_SIZE>, | |
y: Simd<u64, CHUNK_SIZE>, | |
) -> Mask<i64, CHUNK_SIZE> | |
where | |
LaneCount<CHUNK_SIZE>: SupportedLaneCount, | |
{ | |
// If we assume the given A, what is the expected value of Xb * B? | |
let b_component_x = x - x_a * a_candidates; | |
// Probe what a matching value for B might be for our current A. | |
let b_x = b_component_x / x_b; | |
// If not evenly divisible to yield a B, there is no solution with this A. | |
let is_evenly_divisible_x = (b_x * x_b).simd_eq(b_component_x); | |
// Do the same for Y. | |
let b_component_y = y - y_a * a_candidates; | |
let b_y = b_component_y / y_b; | |
let is_evenly_divisible_y = (b_y * y_b).simd_eq(b_component_y); | |
// Both the X and Y equations must yield the same value for B. | |
let is_equal_x_y = b_x.simd_eq(b_y); | |
// Combine all the conditions to yield an "is valid solution" boolean. | |
is_evenly_divisible_x & is_evenly_divisible_y & is_equal_x_y | |
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
pub fn solve<const CHUNK_SIZE: usize>(x_a: u64, x_b: u64, x: u64, y_a: u64, y_b: u64, y: u64 | |
) -> Option<Solution> | |
where | |
LaneCount<CHUNK_SIZE>: SupportedLaneCount, | |
{ | |
// We will prepare vector versions of our constants | |
// as vector operations require vectors for input. | |
let x_a_vec = Simd::splat(x_a); | |
let x_b_vec = Simd::splat(x_b); | |
let x_vec = Simd::splat(x); | |
let y_a_vec = Simd::splat(y_a); | |
let y_b_vec = Simd::splat(y_b); | |
let y_vec = Simd::splat(y); | |
// This is a vector with the offsets used to go from a vector with the first | |
// candidate A (splatted to all lanes) to all the candidates in a chunk. | |
let mut candidate_offsets = Simd::splat(0); | |
for lane_index in 0..CHUNK_SIZE { | |
candidate_offsets[lane_index] = lane_index as u64; | |
} | |
let max_a = (x / x_a).min(y / y_a); | |
let full_chunk_count = max_a / CHUNK_SIZE as u64; | |
for chunk_index in 0..full_chunk_count { | |
let first_candidate_in_chunk = chunk_index * CHUNK_SIZE as u64; | |
// This gives us a vector with all the candidate A values in this chunk. | |
// e.g. [0, 1, 2, 3] for the first chunk. | |
let a_candidates = | |
Simd::<_, CHUNK_SIZE>::splat(first_candidate_in_chunk) + candidate_offsets; | |
let result = evaluate_chunk(a_candidates, x_a_vec, x_b_vec, x_vec, y_a_vec, y_b_vec, y_vec); | |
// We get booleans as output of the evaluation, answering "is this A a valid solution". | |
// Notably, the evaluation result does not give the actual numeric value for B. | |
// Therefore, if we found a valid solution, we still need to calculate B, | |
// which we do by falling back to the naive algorithm. | |
if result.any() { | |
let solution_index = result | |
.to_array() | |
.iter() | |
.position(|&x| x) | |
.expect("we verified that at least one element is true"); | |
let a = a_candidates.as_array()[solution_index]; | |
return Some( | |
evaluate_naive(a as u64, x_a, x_b, x, y_a, y_b, y) | |
.expect("we verified that this is a valid solution"), | |
); | |
} | |
} | |
// Values for A that fall into the final partial chunk. May be an empty range. | |
let mut partial_chunk_candidates = (CHUNK_SIZE as u64 * full_chunk_count)..=max_a; | |
// If there was any part of the sequence that didn't fit into a full chunk, | |
// we process it with the naive algorithm. | |
partial_chunk_candidates.find_map(|a| evaluate_naive(a, x_a, x_b, x, y_a, y_b, y)) | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment