Last active
August 15, 2024 23:00
-
-
Save Lohann/b3dd04848c295fa450267b2846052657 to your computer and use it in GitHub Desktop.
Example on how to convert, parse and display custom float types.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
// Author: Lohann Paterno Coutinho Ferreira | |
// | |
use num_bigint::BigUint; // ^0.4.6 | |
use num_traits::{One, Zero}; // ^0.2.19 | |
use num_integer::Integer; // ^0.1.46 | |
#[allow(clippy::unwrap_used, clippy::cast_possible_truncation)] | |
fn main() { | |
/// Decimal precision used when printing float and rational values. | |
const PRECISION: u32 = 32; | |
/// Default rounding mode used when converting rational to float. | |
const ROUNDING: Rounding = Rounding::NearestTiesEven; | |
////////////// | |
// Rational // | |
////////////// | |
let num = BigUint::from(123_456_789_123_456_789u64); | |
let den = BigUint::from(1_000_000_000u64); | |
let value = rational_to_string(num.clone(), den.clone(), PRECISION); | |
println!(" Rational: {num:>27} / {den:<17} = {value}"); | |
/////////////////////////////// | |
// IEEE-754 single precision // | |
/////////////////////////////// | |
let value = IEEE754SinglePrecision::rational_to_float(num.clone(), den.clone(), ROUNDING).unwrap(); | |
let (mantissa, exponent) = IEEE754SinglePrecision::into_parts(value); | |
let float = f32::from_bits(value as u32); | |
println!("IEEE-754 (f32): {value:<#018x} -> 2^{exponent:<3} * {mantissa:<17} = {float:<.0$}", PRECISION as usize); | |
/////////////////////////////// | |
// IEEE-754 double precision // | |
/////////////////////////////// | |
let value = IEEE754DoublePrecision::rational_to_float(num.clone(), den.clone(), ROUNDING).unwrap(); | |
let (mantissa, exponent) = IEEE754DoublePrecision::into_parts(value); | |
let float = f64::from_bits(value); | |
println!("IEEE-754 (f64): {value:#018x} -> 2^{exponent:<3} * {mantissa:<17} = {float:.0$}", PRECISION as usize); | |
//////////////// | |
// UFloat9x56 // | |
//////////////// | |
let float = UFloat9x56::rational_to_float(num, den, ROUNDING).unwrap(); | |
let (mantissa, exponent) = UFloat9x56::into_parts(float); | |
// Obs: we convert to f64 to print the value, but we lose 3bit precision from mantissa | |
let value = UFloat9x56::to_string(float, PRECISION); | |
println!(" UFloat9x56: {float:#018x} -> 2^{exponent:<3} * {mantissa:<17} = {value}"); | |
} | |
#[derive(Debug, PartialEq, Eq)] | |
pub enum ConversionError { | |
/// The denominator is zero | |
DivisionByZero, | |
} | |
/// Basic float conversion and helper methods, supports custom 64bit floats. | |
/// OBS: Doesn't support NaNs, Subnormal numbers, infinities and negative values. | |
pub trait Float { | |
/// Number of bits for represent the exponent | |
const EXPONENT_DIGITS: u32; | |
/// Number of bits used to represent the mantissa | |
const MANTISSA_DIGITS: u32; | |
/// The zero exponent offset | |
const EXPONENT_OFFSET: u32; | |
/// Wether the float supports `not a number` or not. | |
const HAS_NAN: bool; | |
/// Convert a rational number to float | |
/// # Errors | |
/// Returns an error if the value is too big or too tiny to be represented. | |
#[allow(clippy::cast_possible_wrap, clippy::cast_possible_truncation, clippy::unwrap_used, clippy::missing_errors_doc)] | |
fn rational_to_float<N, D>(num: N, den: D, rounding: Rounding) -> Result<u64, ConversionError> where N: Into<BigUint>, D: Into<BigUint> { | |
let num = num.into(); | |
let den = den.into(); | |
if den.is_zero() { | |
return Err(ConversionError::DivisionByZero); | |
} | |
// Trivial case for zero | |
if num.is_zero() { | |
return Ok(0); | |
} | |
// Compute the number of bits required to represent the numerator and denominator. | |
let num_bits = num.bits() as i32; | |
let den_bits = den.bits() as i32; | |
// Try an exponent that will make the mantissa fit in the available bits. | |
let mut exponent = (Self::MANTISSA_DIGITS as i32) - (num_bits - den_bits); | |
// Extract the mantissa by computing: numerator * 2^exponent / denominator. | |
let mut mantissa = mul_div(exponent, num.clone(), den.clone(), rounding); | |
// If the mantissa is too large, adjust the exponent. | |
let mantissa_max = (BigUint::one() << Self::MANTISSA_DIGITS) - 1u32; | |
if mantissa > mantissa_max { | |
exponent -= 1; | |
mantissa = mul_div(exponent, num, den, rounding); | |
} | |
// Normalize mantissa to be a value between 1~0 | |
exponent = -exponent; | |
exponent += (Self::MANTISSA_DIGITS - 1) as i32; | |
exponent += Self::EXPONENT_OFFSET as i32; | |
// Verify if the float is subnormal number (msb is not set) | |
if exponent < 0 { | |
mantissa = mul_div(exponent, mantissa, BigUint::one() << exponent.unsigned_abs(), rounding); | |
exponent = 0; | |
} | |
let mut exponent = u64::from(exponent.unsigned_abs()); | |
// If the the exponent is too big to be represented, bound it to the maximum value. | |
let max_exponent = (1 << Self::EXPONENT_DIGITS) - 1; | |
if exponent > max_exponent { | |
exponent = max_exponent; | |
if Self::HAS_NAN { | |
// Set the mantissa to zero, to represent infinity instead of NaN | |
mantissa = BigUint::zero(); | |
} else { | |
// Set mantissa to the maximum value | |
mantissa.clone_from(&mantissa_max); | |
} | |
} | |
// Remove most significant bit from mantissa (it is always set) | |
mantissa &= mantissa_max >> 1; | |
// Shift the exponent to the right position | |
exponent <<= Self::MANTISSA_DIGITS - 1; | |
// Convert mantissa and exponent to float | |
Ok(mantissa.to_u64_digits().first().unwrap_or(&0u64) | exponent) | |
} | |
/// Retrieve the mantissa and exponent | |
#[allow(clippy::cast_possible_truncation, clippy::cast_possible_wrap)] | |
#[must_use] | |
fn into_parts(float: u64) -> (u64, i32) { | |
let mantissa_msb = 1 << (Self::MANTISSA_DIGITS - 1); | |
let mut exp = (float >> (Self::MANTISSA_DIGITS - 1)) as i32; | |
let mut mantissa = float & (mantissa_msb - 1); | |
// Doesn't set the msb if the float is a subnormal number | |
if exp > 0 { | |
mantissa |= mantissa_msb; | |
} | |
exp -= Self::EXPONENT_OFFSET as i32; | |
if mantissa > 0 { | |
exp -= (Self::MANTISSA_DIGITS - 1) as i32; | |
} | |
(mantissa, exp) | |
} | |
/// Converts a float to a rational number | |
#[allow(clippy::cast_possible_truncation, clippy::cast_possible_wrap)] | |
#[must_use] | |
fn to_rational(float: u64) -> (BigUint, BigUint) { | |
let (mantissa, exponent) = Self::into_parts(float); | |
// If mantissa is zero, return 0 | |
if mantissa == 0 { | |
return (0u32.into(), 1u32.into()); | |
} | |
if exponent > 0 { | |
let mantissa = BigUint::from(mantissa) << exponent.unsigned_abs(); | |
(mantissa, BigUint::from(1u32)) | |
} else { | |
let exponent = BigUint::from(1u8) << exponent.unsigned_abs(); | |
(BigUint::from(mantissa), exponent) | |
} | |
} | |
/// Generic function for formating float values, equivalent to: | |
/// OBS: negative values, NaNs, Infinite, and subnormal numbers are ignored. | |
/// ```rust | |
/// let precision = 32usize; | |
/// let value = 0.1f64; | |
/// let _ = format!("{value:.0$}", precision); | |
/// ``` | |
#[must_use] | |
fn to_string(float: u64, precision: u32) -> String { | |
let (numerator, denominator) = Self::to_rational(float); | |
rational_to_string(numerator, denominator, precision) | |
} | |
} | |
// Ref: https://github.com/Analog-Labs/analog-gmp/blob/5653d1dc756e93072e060ab0bbc0b8822931e2c0/src/utils/Float9x56.sol#L13-L117 | |
pub enum UFloat9x56 {} | |
impl Float for UFloat9x56 { | |
/// Number of bits for represent the exponent | |
const EXPONENT_DIGITS: u32 = 9; | |
/// Number of bits for represent the mantissa (msb is always set) | |
const MANTISSA_DIGITS: u32 = 56; | |
/// The zero exponent in raw format. | |
/// which is: 0x8000000000000000 >> 55 | |
const EXPONENT_OFFSET: u32 = 256; | |
/// `UFloat9x56` doesn't support NaN or infinity. | |
const HAS_NAN: bool = false; | |
} | |
// Ref: https://en.wikipedia.org/wiki/IEEE_754 | |
pub enum IEEE754SinglePrecision {} | |
impl Float for IEEE754SinglePrecision { | |
/// Number of bits used to represent the exponent | |
const EXPONENT_DIGITS: u32 = 8; | |
/// Number of bits used to represent the mantissa (msb is always set) | |
const MANTISSA_DIGITS: u32 = 24; | |
/// The zero exponent in raw format. | |
/// which is: 0x3f800000 >> 23 | |
const EXPONENT_OFFSET: u32 = 127; | |
/// Is +infinity when exponent = 128 and mantissa = 0 | |
const HAS_NAN: bool = true; | |
} | |
// Ref: https://en.wikipedia.org/wiki/Double-precision_floating-point_format | |
pub enum IEEE754DoublePrecision {} | |
impl Float for IEEE754DoublePrecision { | |
/// Number of bits used to represent the exponent | |
const EXPONENT_DIGITS: u32 = 11; | |
/// Number of bits for represent the mantissa (msb is always set) | |
const MANTISSA_DIGITS: u32 = 53; | |
/// The zero exponent in raw format. | |
/// which is: 0x3ff0000000000000 >> 52 | |
const EXPONENT_OFFSET: u32 = 1023; | |
/// Is +infinity when exponent = 1024 and mantissa = 0 | |
const HAS_NAN: bool = true; | |
} | |
/// Rounding mode used when converting rational to float. | |
#[derive(Debug, Clone, Copy, PartialEq, Eq)] | |
pub enum Rounding { | |
/// Rounds towards zero | |
Down, | |
/// Rounds to the nearest value, if the number falls midway, | |
/// it is rounded to the nearest value with an even least significant digit. | |
NearestTiesEven, | |
/// Rounds to the nearest value; if the number falls midway, | |
/// it is rounded to the nearest value above. | |
NearestTiesAwayFromZero, | |
/// Rounds towards infinite | |
Up, | |
} | |
/// Computes: numerator * 2^exponent / denominator. | |
#[allow(clippy::cast_possible_truncation)] | |
#[inline] | |
fn mul_div<N, D>(exponent: i32, numerator: N, denominator: D, rounding: Rounding) -> BigUint where N: Into<BigUint>, D: Into<BigUint> { | |
let mut numerator = numerator.into(); | |
let denominator = denominator.into(); | |
// Multiply the numerator by 2^exponent | |
if exponent >= 0 { | |
numerator <<= exponent.unsigned_abs(); | |
} else { | |
numerator >>= exponent.unsigned_abs(); | |
}; | |
// Round the result | |
let remainder = match rounding { | |
Rounding::Down => BigUint::zero(), | |
Rounding::NearestTiesEven => { | |
let mut rem = &denominator >> 1u32; | |
if !rem.is_zero() && denominator.is_even() { | |
// Round towards even | |
let is_odd = mul_div(0, numerator.clone(), denominator.clone(), Rounding::Up).is_odd(); | |
rem -= u32::from(is_odd); | |
} | |
rem | |
}, | |
Rounding::NearestTiesAwayFromZero => &denominator >> 1u32, | |
Rounding::Up => &denominator - BigUint::one(), | |
}; | |
(numerator + remainder) / denominator | |
} | |
/// Convert a rational to decimal with arbitrary precision. | |
fn rational_to_string<N: Into<BigUint>, D: Into<BigUint>>(num: N, den: D, precision: u32) -> String { | |
let num = num.into(); | |
let den = den.into(); | |
// Split integer and fraction parts | |
let mut integer = &num / &den; | |
let mut fraction = &num % &den; | |
// Decimals counter | |
let mut decimals = 0; | |
// Extract decimals digits | |
while decimals < precision { | |
fraction *= 10u8; | |
integer *= 10u8; | |
integer += &fraction / &den; | |
fraction = &fraction % &den; | |
decimals += 1; | |
// Round last digit to nearest | |
if decimals == precision && fraction.pow(2) >= den { | |
integer += 1u32; | |
} | |
} | |
// format the result | |
let denominator = BigUint::from(10u8).pow(decimals); | |
format!("{}.{:02$}", &integer / &denominator, integer % denominator, decimals as usize) | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment