Lohann · August 15, 2024 23:00
diff --git a/float_utils.rs b/float_utils.rs
 // Author: Lohann Paterno Coutinho Ferreira
 //
 use num_bigint::BigUint; // ^0.4.6
 use num_traits::{One, Zero}; // ^0.2.19
 use num_integer::Integer; // ^0.1.46

 #[allow(clippy::unwrap_used, clippy::cast_possible_truncation)]
 fn main() {
    /// Decimal precision used when printing float and rational values.
    const PRECISION: u32 = 32;
    /// Default rounding mode used when converting rational to float.
    const ROUNDING: Rounding = Rounding::NearestTiesEven;

    //////////////
    // Rational //
    //////////////
    let num = BigUint::from(123_456_789_123_456_789u64);
    let den = BigUint::from(1_000_000_000u64);
    let value = rational_to_string(num.clone(), den.clone(), PRECISION);
    println!("      Rational: {num:>27} / {den:<17} = {value}");

    ///////////////////////////////
    // IEEE-754 single precision //
    ///////////////////////////////
    let value = IEEE754SinglePrecision::rational_to_float(num.clone(), den.clone(), ROUNDING).unwrap();
    let (mantissa, exponent) = IEEE754SinglePrecision::into_parts(value);
    let float = f32::from_bits(value as u32);
    println!("IEEE-754 (f32): {value:<#018x} -> 2^{exponent:<3} * {mantissa:<17} = {float:<.0$}", PRECISION as usize);

    ///////////////////////////////
    // IEEE-754 double precision //
    ///////////////////////////////
    let value = IEEE754DoublePrecision::rational_to_float(num.clone(), den.clone(), ROUNDING).unwrap();
    let (mantissa, exponent) = IEEE754DoublePrecision::into_parts(value);
    let float = f64::from_bits(value);
    println!("IEEE-754 (f64): {value:#018x} -> 2^{exponent:<3} * {mantissa:<17} = {float:.0$}", PRECISION as usize);

    ////////////////
    // UFloat9x56 //
    ////////////////
    let float = UFloat9x56::rational_to_float(num, den, ROUNDING).unwrap();
    let (mantissa, exponent) = UFloat9x56::into_parts(float);
    // Obs: we convert to f64 to print the value, but we lose 3bit precision from mantissa
    let value = UFloat9x56::to_string(float, PRECISION);
    println!("    UFloat9x56: {float:#018x} -> 2^{exponent:<3} * {mantissa:<17} = {value}");
 }

 #[derive(Debug, PartialEq, Eq)]
 pub enum ConversionError {
    /// The denominator is zero
    DivisionByZero,
 }

 /// Basic float conversion and helper methods, supports custom 64bit floats.
 /// OBS: Doesn't support NaNs, Subnormal numbers, infinities and negative values.
 pub trait Float {
    /// Number of bits for represent the exponent
    const EXPONENT_DIGITS: u32;
    /// Number of bits used to represent the mantissa
    const MANTISSA_DIGITS: u32;
    /// The zero exponent offset
    const EXPONENT_OFFSET: u32;
    /// Wether the float supports `not a number` or not.
    const HAS_NAN: bool;

    /// Convert a rational number to float
    /// # Errors
    /// Returns an error if the value is too big or too tiny to be represented.
    #[allow(clippy::cast_possible_wrap, clippy::cast_possible_truncation, clippy::unwrap_used, clippy::missing_errors_doc)]
    fn rational_to_float<N, D>(num: N, den: D, rounding: Rounding) -> Result<u64, ConversionError> where N: Into<BigUint>, D: Into<BigUint> {
        let num = num.into();
        let den = den.into();

        if den.is_zero() {
            return Err(ConversionError::DivisionByZero);
        }

        // Trivial case for zero
        if num.is_zero() {
            return Ok(0);
        }

        // Compute the number of bits required to represent the numerator and denominator.
        let num_bits = num.bits() as i32;
        let den_bits = den.bits() as i32;

        // Try an exponent that will make the mantissa fit in the available bits.
        let mut exponent = (Self::MANTISSA_DIGITS as i32) - (num_bits - den_bits);

        // Extract the mantissa by computing: numerator * 2^exponent / denominator.
        let mut mantissa = mul_div(exponent, num.clone(), den.clone(), rounding);
        
        // If the mantissa is too large, adjust the exponent.
        let mantissa_max = (BigUint::one() << Self::MANTISSA_DIGITS) - 1u32;
        if mantissa > mantissa_max {
            exponent -= 1;
            mantissa = mul_div(exponent, num, den, rounding);
        }

        // Normalize mantissa to be a value between 1~0
        exponent = -exponent;
        exponent += (Self::MANTISSA_DIGITS - 1) as i32;
        exponent += Self::EXPONENT_OFFSET as i32;
        
        // Verify if the float is subnormal number (msb is not set)
        if exponent < 0 {
            mantissa = mul_div(exponent, mantissa, BigUint::one() << exponent.unsigned_abs(), rounding);
            exponent = 0;
        }
        let mut exponent = u64::from(exponent.unsigned_abs());
        
        // If the the exponent is too big to be represented, bound it to the maximum value.
        let max_exponent = (1 << Self::EXPONENT_DIGITS) - 1;
        if exponent > max_exponent {
            exponent = max_exponent;
            if Self::HAS_NAN {
                // Set the mantissa to zero, to represent infinity instead of NaN
                mantissa = BigUint::zero();
            } else {
                // Set mantissa to the maximum value
                mantissa.clone_from(&mantissa_max);
            }
        }

        // Remove most significant bit from mantissa (it is always set)
        mantissa &= mantissa_max >> 1;
                    
        // Shift the exponent to the right position
        exponent <<= Self::MANTISSA_DIGITS - 1;
        
        // Convert mantissa and exponent to float
        Ok(mantissa.to_u64_digits().first().unwrap_or(&0u64) | exponent)
    }

    /// Retrieve the mantissa and exponent
    #[allow(clippy::cast_possible_truncation, clippy::cast_possible_wrap)]
    #[must_use]
    fn into_parts(float: u64) -> (u64, i32) {
        let mantissa_msb = 1 << (Self::MANTISSA_DIGITS - 1);
        let mut exp = (float >> (Self::MANTISSA_DIGITS - 1)) as i32;
        let mut mantissa = float & (mantissa_msb - 1);

        // Doesn't set the msb if the float is a subnormal number
        if exp > 0 {
            mantissa |= mantissa_msb;
        }
        exp -= Self::EXPONENT_OFFSET as i32;
        if mantissa > 0 {
            exp -= (Self::MANTISSA_DIGITS - 1) as i32;
        }
        (mantissa, exp)
    }

    /// Converts a float to a rational number
    #[allow(clippy::cast_possible_truncation, clippy::cast_possible_wrap)]
    #[must_use]
    fn to_rational(float: u64) -> (BigUint, BigUint) {
        let (mantissa, exponent) = Self::into_parts(float);

        // If mantissa is zero, return 0
        if mantissa == 0 {
            return (0u32.into(), 1u32.into());
        }
        
        if exponent > 0 {
            let mantissa = BigUint::from(mantissa) << exponent.unsigned_abs();
            (mantissa, BigUint::from(1u32))
        } else {
            let exponent =  BigUint::from(1u8) << exponent.unsigned_abs();
            (BigUint::from(mantissa), exponent)
        }
    }

    /// Generic function for formating float values, equivalent to:
    /// OBS: negative values, NaNs, Infinite, and subnormal numbers are ignored.
    /// ```rust
    /// let precision = 32usize;
    /// let value = 0.1f64;
    /// let _ = format!("{value:.0$}", precision);
    /// ```
    #[must_use]
    fn to_string(float: u64, precision: u32) -> String {
        let (numerator, denominator) = Self::to_rational(float);
        rational_to_string(numerator, denominator, precision)
    }
 }

 // Ref: https://github.com/Analog-Labs/analog-gmp/blob/5653d1dc756e93072e060ab0bbc0b8822931e2c0/src/utils/Float9x56.sol#L13-L117
 pub enum UFloat9x56 {}
 impl Float for UFloat9x56 {
    /// Number of bits for represent the exponent
    const EXPONENT_DIGITS: u32 = 9;
    /// Number of bits for represent the mantissa (msb is always set)
    const MANTISSA_DIGITS: u32 = 56;
    /// The zero exponent in raw format.
    /// which is: 0x8000000000000000 >> 55
    const EXPONENT_OFFSET: u32 = 256;
    /// `UFloat9x56` doesn't support NaN or infinity.
    const HAS_NAN: bool = false;
 }

 // Ref: https://en.wikipedia.org/wiki/IEEE_754
 pub enum IEEE754SinglePrecision {}
 impl Float for IEEE754SinglePrecision {
    /// Number of bits used to represent the exponent
    const EXPONENT_DIGITS: u32 = 8;
    /// Number of bits used to represent the mantissa (msb is always set)
    const MANTISSA_DIGITS: u32 = 24;
    /// The zero exponent in raw format.
    /// which is: 0x3f800000 >> 23
    const EXPONENT_OFFSET: u32 = 127;
    /// Is +infinity when exponent = 128 and mantissa = 0
    const HAS_NAN: bool = true;
 }

 // Ref: https://en.wikipedia.org/wiki/Double-precision_floating-point_format
 pub enum IEEE754DoublePrecision {}
 impl Float for IEEE754DoublePrecision {
    /// Number of bits used to represent the exponent
    const EXPONENT_DIGITS: u32 = 11;
    /// Number of bits for represent the mantissa (msb is always set)
    const MANTISSA_DIGITS: u32 = 53;
    /// The zero exponent in raw format.
    /// which is: 0x3ff0000000000000 >> 52
    const EXPONENT_OFFSET: u32 = 1023;
    /// Is +infinity when exponent = 1024 and mantissa = 0
    const HAS_NAN: bool = true;
 }

 /// Rounding mode used when converting rational to float.
 #[derive(Debug, Clone, Copy, PartialEq, Eq)]
 pub enum Rounding {
    /// Rounds towards zero
    Down,
    /// Rounds to the nearest value, if the number falls midway,
    /// it is rounded to the nearest value with an even least significant digit.
    NearestTiesEven,
    /// Rounds to the nearest value; if the number falls midway,
    /// it is rounded to the nearest value above.
    NearestTiesAwayFromZero,
    /// Rounds towards infinite
    Up,
 }

 /// Computes: numerator * 2^exponent / denominator.
 #[allow(clippy::cast_possible_truncation)]
 #[inline]
 fn mul_div<N, D>(exponent: i32, numerator: N, denominator: D, rounding: Rounding) -> BigUint where N: Into<BigUint>, D: Into<BigUint> {
    let mut numerator = numerator.into();
    let denominator = denominator.into();

    // Multiply the numerator by 2^exponent
    if exponent >= 0 {
        numerator <<= exponent.unsigned_abs();
    } else {
        numerator >>= exponent.unsigned_abs();
    };

    // Round the result
    let remainder = match rounding {
        Rounding::Down => BigUint::zero(),
        Rounding::NearestTiesEven => {
            let mut rem = &denominator >> 1u32;
            if !rem.is_zero() && denominator.is_even() {
                // Round towards even
                let is_odd = mul_div(0, numerator.clone(), denominator.clone(), Rounding::Up).is_odd();
                rem -= u32::from(is_odd);
            }
            rem
        },
        Rounding::NearestTiesAwayFromZero => &denominator >> 1u32,
        Rounding::Up => &denominator - BigUint::one(),
    };
    
    (numerator + remainder) / denominator
 }

 /// Convert a rational to decimal with arbitrary precision.
 fn rational_to_string<N: Into<BigUint>, D: Into<BigUint>>(num: N, den: D, precision: u32) -> String {
    let num = num.into();
    let den = den.into();

    // Split integer and fraction parts
    let mut integer = &num / &den;
    let mut fraction = &num % &den;

    // Decimals counter
    let mut decimals = 0;
    
    // Extract decimals digits
    while decimals < precision {
        fraction *= 10u8;
        integer *= 10u8;
        integer += &fraction / &den;
        fraction = &fraction % &den;
        decimals += 1;

        // Round last digit to nearest
        if decimals == precision && fraction.pow(2) >= den {
            integer += 1u32;
        }
    }

    // format the result
    let denominator = BigUint::from(10u8).pow(decimals);
    format!("{}.{:02$}", &integer / &denominator, integer % denominator, decimals as usize)
 }
	// Author: Lohann Paterno Coutinho Ferreira
	//
	use num_bigint::BigUint; // ^0.4.6
	use num_traits::{One, Zero}; // ^0.2.19
	use num_integer::Integer; // ^0.1.46

	#[allow(clippy::unwrap_used, clippy::cast_possible_truncation)]
	fn main() {
	/// Decimal precision used when printing float and rational values.
	const PRECISION: u32 = 32;
	/// Default rounding mode used when converting rational to float.
	const ROUNDING: Rounding = Rounding::NearestTiesEven;

	//////////////
	// Rational //
	//////////////
	let num = BigUint::from(123_456_789_123_456_789u64);
	let den = BigUint::from(1_000_000_000u64);
	let value = rational_to_string(num.clone(), den.clone(), PRECISION);
	println!(" Rational: {num:>27} / {den:<17} = {value}");

	///////////////////////////////
	// IEEE-754 single precision //
	///////////////////////////////
	let value = IEEE754SinglePrecision::rational_to_float(num.clone(), den.clone(), ROUNDING).unwrap();
	let (mantissa, exponent) = IEEE754SinglePrecision::into_parts(value);
	let float = f32::from_bits(value as u32);
	println!("IEEE-754 (f32): {value:<#018x} -> 2^{exponent:<3} * {mantissa:<17} = {float:<.0$}", PRECISION as usize);

	///////////////////////////////
	// IEEE-754 double precision //
	///////////////////////////////
	let value = IEEE754DoublePrecision::rational_to_float(num.clone(), den.clone(), ROUNDING).unwrap();
	let (mantissa, exponent) = IEEE754DoublePrecision::into_parts(value);
	let float = f64::from_bits(value);
	println!("IEEE-754 (f64): {value:#018x} -> 2^{exponent:<3} * {mantissa:<17} = {float:.0$}", PRECISION as usize);

	////////////////
	// UFloat9x56 //
	////////////////
	let float = UFloat9x56::rational_to_float(num, den, ROUNDING).unwrap();
	let (mantissa, exponent) = UFloat9x56::into_parts(float);
	// Obs: we convert to f64 to print the value, but we lose 3bit precision from mantissa
	let value = UFloat9x56::to_string(float, PRECISION);
	println!(" UFloat9x56: {float:#018x} -> 2^{exponent:<3} * {mantissa:<17} = {value}");
	}

	#[derive(Debug, PartialEq, Eq)]
	pub enum ConversionError {
	/// The denominator is zero
	DivisionByZero,
	}

	/// Basic float conversion and helper methods, supports custom 64bit floats.
	/// OBS: Doesn't support NaNs, Subnormal numbers, infinities and negative values.
	pub trait Float {
	/// Number of bits for represent the exponent
	const EXPONENT_DIGITS: u32;
	/// Number of bits used to represent the mantissa
	const MANTISSA_DIGITS: u32;
	/// The zero exponent offset
	const EXPONENT_OFFSET: u32;
	/// Wether the float supports `not a number` or not.
	const HAS_NAN: bool;

	/// Convert a rational number to float
	/// # Errors
	/// Returns an error if the value is too big or too tiny to be represented.
	#[allow(clippy::cast_possible_wrap, clippy::cast_possible_truncation, clippy::unwrap_used, clippy::missing_errors_doc)]
	fn rational_to_float<N, D>(num: N, den: D, rounding: Rounding) -> Result<u64, ConversionError> where N: Into<BigUint>, D: Into<BigUint> {
	let num = num.into();
	let den = den.into();

	if den.is_zero() {
	return Err(ConversionError::DivisionByZero);
	}

	// Trivial case for zero
	if num.is_zero() {
	return Ok(0);
	}

	// Compute the number of bits required to represent the numerator and denominator.
	let num_bits = num.bits() as i32;
	let den_bits = den.bits() as i32;

	// Try an exponent that will make the mantissa fit in the available bits.
	let mut exponent = (Self::MANTISSA_DIGITS as i32) - (num_bits - den_bits);

	// Extract the mantissa by computing: numerator * 2^exponent / denominator.
	let mut mantissa = mul_div(exponent, num.clone(), den.clone(), rounding);

	// If the mantissa is too large, adjust the exponent.
	let mantissa_max = (BigUint::one() << Self::MANTISSA_DIGITS) - 1u32;
	if mantissa > mantissa_max {
	exponent -= 1;
	mantissa = mul_div(exponent, num, den, rounding);
	}

	// Normalize mantissa to be a value between 1~0
	exponent = -exponent;
	exponent += (Self::MANTISSA_DIGITS - 1) as i32;
	exponent += Self::EXPONENT_OFFSET as i32;

	// Verify if the float is subnormal number (msb is not set)
	if exponent < 0 {
	mantissa = mul_div(exponent, mantissa, BigUint::one() << exponent.unsigned_abs(), rounding);
	exponent = 0;
	}
	let mut exponent = u64::from(exponent.unsigned_abs());

	// If the the exponent is too big to be represented, bound it to the maximum value.
	let max_exponent = (1 << Self::EXPONENT_DIGITS) - 1;
	if exponent > max_exponent {
	exponent = max_exponent;
	if Self::HAS_NAN {
	// Set the mantissa to zero, to represent infinity instead of NaN
	mantissa = BigUint::zero();
	} else {
	// Set mantissa to the maximum value
	mantissa.clone_from(&mantissa_max);
	}
	}

	// Remove most significant bit from mantissa (it is always set)
	mantissa &= mantissa_max >> 1;

	// Shift the exponent to the right position
	exponent <<= Self::MANTISSA_DIGITS - 1;

	// Convert mantissa and exponent to float
	Ok(mantissa.to_u64_digits().first().unwrap_or(&0u64) \| exponent)
	}

	/// Retrieve the mantissa and exponent
	#[allow(clippy::cast_possible_truncation, clippy::cast_possible_wrap)]
	#[must_use]
	fn into_parts(float: u64) -> (u64, i32) {
	let mantissa_msb = 1 << (Self::MANTISSA_DIGITS - 1);
	let mut exp = (float >> (Self::MANTISSA_DIGITS - 1)) as i32;
	let mut mantissa = float & (mantissa_msb - 1);

	// Doesn't set the msb if the float is a subnormal number
	if exp > 0 {
	mantissa \|= mantissa_msb;
	}
	exp -= Self::EXPONENT_OFFSET as i32;
	if mantissa > 0 {
	exp -= (Self::MANTISSA_DIGITS - 1) as i32;
	}
	(mantissa, exp)
	}

	/// Converts a float to a rational number
	#[allow(clippy::cast_possible_truncation, clippy::cast_possible_wrap)]
	#[must_use]
	fn to_rational(float: u64) -> (BigUint, BigUint) {
	let (mantissa, exponent) = Self::into_parts(float);

	// If mantissa is zero, return 0
	if mantissa == 0 {
	return (0u32.into(), 1u32.into());
	}

	if exponent > 0 {
	let mantissa = BigUint::from(mantissa) << exponent.unsigned_abs();
	(mantissa, BigUint::from(1u32))
	} else {
	let exponent = BigUint::from(1u8) << exponent.unsigned_abs();
	(BigUint::from(mantissa), exponent)
	}
	}

	/// Generic function for formating float values, equivalent to:
	/// OBS: negative values, NaNs, Infinite, and subnormal numbers are ignored.
	/// ```rust
	/// let precision = 32usize;
	/// let value = 0.1f64;
	/// let _ = format!("{value:.0$}", precision);
	/// ```
	#[must_use]
	fn to_string(float: u64, precision: u32) -> String {
	let (numerator, denominator) = Self::to_rational(float);
	rational_to_string(numerator, denominator, precision)
	}
	}

	// Ref: https://github.com/Analog-Labs/analog-gmp/blob/5653d1dc756e93072e060ab0bbc0b8822931e2c0/src/utils/Float9x56.sol#L13-L117
	pub enum UFloat9x56 {}
	impl Float for UFloat9x56 {
	/// Number of bits for represent the exponent
	const EXPONENT_DIGITS: u32 = 9;
	/// Number of bits for represent the mantissa (msb is always set)
	const MANTISSA_DIGITS: u32 = 56;
	/// The zero exponent in raw format.
	/// which is: 0x8000000000000000 >> 55
	const EXPONENT_OFFSET: u32 = 256;
	/// `UFloat9x56` doesn't support NaN or infinity.
	const HAS_NAN: bool = false;
	}

	// Ref: https://en.wikipedia.org/wiki/IEEE_754
	pub enum IEEE754SinglePrecision {}
	impl Float for IEEE754SinglePrecision {
	/// Number of bits used to represent the exponent
	const EXPONENT_DIGITS: u32 = 8;
	/// Number of bits used to represent the mantissa (msb is always set)
	const MANTISSA_DIGITS: u32 = 24;
	/// The zero exponent in raw format.
	/// which is: 0x3f800000 >> 23
	const EXPONENT_OFFSET: u32 = 127;
	/// Is +infinity when exponent = 128 and mantissa = 0
	const HAS_NAN: bool = true;
	}

	// Ref: https://en.wikipedia.org/wiki/Double-precision_floating-point_format
	pub enum IEEE754DoublePrecision {}
	impl Float for IEEE754DoublePrecision {
	/// Number of bits used to represent the exponent
	const EXPONENT_DIGITS: u32 = 11;
	/// Number of bits for represent the mantissa (msb is always set)
	const MANTISSA_DIGITS: u32 = 53;
	/// The zero exponent in raw format.
	/// which is: 0x3ff0000000000000 >> 52
	const EXPONENT_OFFSET: u32 = 1023;
	/// Is +infinity when exponent = 1024 and mantissa = 0
	const HAS_NAN: bool = true;
	}

	/// Rounding mode used when converting rational to float.
	#[derive(Debug, Clone, Copy, PartialEq, Eq)]
	pub enum Rounding {
	/// Rounds towards zero
	Down,
	/// Rounds to the nearest value, if the number falls midway,
	/// it is rounded to the nearest value with an even least significant digit.
	NearestTiesEven,
	/// Rounds to the nearest value; if the number falls midway,
	/// it is rounded to the nearest value above.
	NearestTiesAwayFromZero,
	/// Rounds towards infinite
	Up,
	}

	/// Computes: numerator * 2^exponent / denominator.
	#[allow(clippy::cast_possible_truncation)]
	#[inline]
	fn mul_div<N, D>(exponent: i32, numerator: N, denominator: D, rounding: Rounding) -> BigUint where N: Into<BigUint>, D: Into<BigUint> {
	let mut numerator = numerator.into();
	let denominator = denominator.into();

	// Multiply the numerator by 2^exponent
	if exponent >= 0 {
	numerator <<= exponent.unsigned_abs();
	} else {
	numerator >>= exponent.unsigned_abs();
	};

	// Round the result
	let remainder = match rounding {
	Rounding::Down => BigUint::zero(),
	Rounding::NearestTiesEven => {
	let mut rem = &denominator >> 1u32;
	if !rem.is_zero() && denominator.is_even() {
	// Round towards even
	let is_odd = mul_div(0, numerator.clone(), denominator.clone(), Rounding::Up).is_odd();
	rem -= u32::from(is_odd);
	}
	rem
	},
	Rounding::NearestTiesAwayFromZero => &denominator >> 1u32,
	Rounding::Up => &denominator - BigUint::one(),
	};

	(numerator + remainder) / denominator
	}

	/// Convert a rational to decimal with arbitrary precision.
	fn rational_to_string<N: Into<BigUint>, D: Into<BigUint>>(num: N, den: D, precision: u32) -> String {
	let num = num.into();
	let den = den.into();

	// Split integer and fraction parts
	let mut integer = &num / &den;
	let mut fraction = &num % &den;

	// Decimals counter
	let mut decimals = 0;

	// Extract decimals digits
	while decimals < precision {
	fraction *= 10u8;
	integer *= 10u8;
	integer += &fraction / &den;
	fraction = &fraction % &den;
	decimals += 1;

	// Round last digit to nearest
	if decimals == precision && fraction.pow(2) >= den {
	integer += 1u32;
	}
	}

	// format the result
	let denominator = BigUint::from(10u8).pow(decimals);
	format!("{}.{:02$}", &integer / &denominator, integer % denominator, decimals as usize)
	}