Skip to content

Instantly share code, notes, and snippets.

@MikuroXina
Created November 25, 2024 08:15
Show Gist options
  • Save MikuroXina/24c350cd645bf0c225594883805bbb76 to your computer and use it in GitHub Desktop.
Save MikuroXina/24c350cd645bf0c225594883805bbb76 to your computer and use it in GitHub Desktop.
Emulations of rounding modes with Rust.
#![feature(float_next_up_down)]
//! Source: http://verifiedby.me/adiary/pub/kashi/image/201406/nas2014.pdf
/// Floating point number with mathematical error.
#[derive(Debug, Clone, Copy, PartialEq, PartialOrd)]
pub struct F64E {
/// An actual result by the default rounding mode.
base: f64,
/// Mathematical error for an ideal result.
error: f64,
}
/// Adds two `f64`s into a `F64E`.
pub fn two_sum(a: f64, b: f64) -> F64E {
let base = a + b;
if a.abs() > b.abs() {
let tmp = base - a;
F64E {
base,
error: b - tmp,
}
} else {
let tmp = base - b;
F64E {
base,
error: a - tmp,
}
}
}
fn split(a: f64) -> F64E {
let tmp = a * (2.0f64.powi(27) + 1.0);
let base = tmp - (tmp - a);
let error = a - base;
F64E { base, error }
}
/// Products two `f64`s into a `F64E`.
pub fn two_product(a: f64, b: f64) -> F64E {
let (a_shift, b_shift) = if a.abs() > 2.0f64.powi(996) {
(a * 2.0f64.powi(-28), b * 2.0f64.powi(28))
} else if b.abs() > 2.0f64.powi(996) {
(a * 2.0f64.powi(28), b * 2.0f64.powi(-28))
} else {
(a, b)
};
let base = a * b;
let F64E {
base: a_base,
error: a_error,
} = split(a_shift);
let F64E {
base: b_base,
error: b_error,
} = split(b_shift);
let error = if base.abs() > 2.0f64.powi(1023) {
a_error * b_error
- (((base * 0.5 - (a_base * 0.5) * b_base) * 2.0 - a_error * b_base) - a_base * b_error)
} else {
a_error * b_error - (((base - a_base * b_base) - a_error * b_base) - a_base * b_error)
};
F64E { base, error }
}
/// Adds two `f64`s by rounding up.
pub fn add_up(a: f64, b: f64) -> f64 {
let F64E { base, error } = two_sum(a, b);
if base.is_infinite() {
if base.is_sign_positive() {
return base;
}
if a.is_infinite() && a.is_sign_negative() {
return base;
}
if b.is_infinite() && b.is_sign_negative() {
return base;
}
return f64::MIN;
}
if error > 0.0 {
base.next_up()
} else {
base
}
}
/// Adds two `f64`s by rounding down.
pub fn add_down(a: f64, b: f64) -> f64 {
let F64E { base, error } = two_sum(a, b);
if base.is_infinite() {
if base.is_sign_positive() {
if a.is_infinite() && a.is_sign_positive() {
return base;
}
if b.is_infinite() && b.is_sign_positive() {
return base;
}
return f64::MAX;
}
return base;
}
if error < 0.0 {
base.next_down()
} else {
base
}
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment