Skip to content

Instantly share code, notes, and snippets.

@Lohann
Last active August 15, 2024 23:00
Show Gist options
  • Save Lohann/b3dd04848c295fa450267b2846052657 to your computer and use it in GitHub Desktop.
Save Lohann/b3dd04848c295fa450267b2846052657 to your computer and use it in GitHub Desktop.
Example on how to convert, parse and display custom float types.
// Author: Lohann Paterno Coutinho Ferreira
//
use num_bigint::BigUint; // ^0.4.6
use num_traits::{One, Zero}; // ^0.2.19
use num_integer::Integer; // ^0.1.46
#[allow(clippy::unwrap_used, clippy::cast_possible_truncation)]
fn main() {
/// Decimal precision used when printing float and rational values.
const PRECISION: u32 = 32;
/// Default rounding mode used when converting rational to float.
const ROUNDING: Rounding = Rounding::NearestTiesEven;
//////////////
// Rational //
//////////////
let num = BigUint::from(123_456_789_123_456_789u64);
let den = BigUint::from(1_000_000_000u64);
let value = rational_to_string(num.clone(), den.clone(), PRECISION);
println!(" Rational: {num:>27} / {den:<17} = {value}");
///////////////////////////////
// IEEE-754 single precision //
///////////////////////////////
let value = IEEE754SinglePrecision::rational_to_float(num.clone(), den.clone(), ROUNDING).unwrap();
let (mantissa, exponent) = IEEE754SinglePrecision::into_parts(value);
let float = f32::from_bits(value as u32);
println!("IEEE-754 (f32): {value:<#018x} -> 2^{exponent:<3} * {mantissa:<17} = {float:<.0$}", PRECISION as usize);
///////////////////////////////
// IEEE-754 double precision //
///////////////////////////////
let value = IEEE754DoublePrecision::rational_to_float(num.clone(), den.clone(), ROUNDING).unwrap();
let (mantissa, exponent) = IEEE754DoublePrecision::into_parts(value);
let float = f64::from_bits(value);
println!("IEEE-754 (f64): {value:#018x} -> 2^{exponent:<3} * {mantissa:<17} = {float:.0$}", PRECISION as usize);
////////////////
// UFloat9x56 //
////////////////
let float = UFloat9x56::rational_to_float(num, den, ROUNDING).unwrap();
let (mantissa, exponent) = UFloat9x56::into_parts(float);
// Obs: we convert to f64 to print the value, but we lose 3bit precision from mantissa
let value = UFloat9x56::to_string(float, PRECISION);
println!(" UFloat9x56: {float:#018x} -> 2^{exponent:<3} * {mantissa:<17} = {value}");
}
#[derive(Debug, PartialEq, Eq)]
pub enum ConversionError {
/// The denominator is zero
DivisionByZero,
}
/// Basic float conversion and helper methods, supports custom 64bit floats.
/// OBS: Doesn't support NaNs, Subnormal numbers, infinities and negative values.
pub trait Float {
/// Number of bits for represent the exponent
const EXPONENT_DIGITS: u32;
/// Number of bits used to represent the mantissa
const MANTISSA_DIGITS: u32;
/// The zero exponent offset
const EXPONENT_OFFSET: u32;
/// Wether the float supports `not a number` or not.
const HAS_NAN: bool;
/// Convert a rational number to float
/// # Errors
/// Returns an error if the value is too big or too tiny to be represented.
#[allow(clippy::cast_possible_wrap, clippy::cast_possible_truncation, clippy::unwrap_used, clippy::missing_errors_doc)]
fn rational_to_float<N, D>(num: N, den: D, rounding: Rounding) -> Result<u64, ConversionError> where N: Into<BigUint>, D: Into<BigUint> {
let num = num.into();
let den = den.into();
if den.is_zero() {
return Err(ConversionError::DivisionByZero);
}
// Trivial case for zero
if num.is_zero() {
return Ok(0);
}
// Compute the number of bits required to represent the numerator and denominator.
let num_bits = num.bits() as i32;
let den_bits = den.bits() as i32;
// Try an exponent that will make the mantissa fit in the available bits.
let mut exponent = (Self::MANTISSA_DIGITS as i32) - (num_bits - den_bits);
// Extract the mantissa by computing: numerator * 2^exponent / denominator.
let mut mantissa = mul_div(exponent, num.clone(), den.clone(), rounding);
// If the mantissa is too large, adjust the exponent.
let mantissa_max = (BigUint::one() << Self::MANTISSA_DIGITS) - 1u32;
if mantissa > mantissa_max {
exponent -= 1;
mantissa = mul_div(exponent, num, den, rounding);
}
// Normalize mantissa to be a value between 1~0
exponent = -exponent;
exponent += (Self::MANTISSA_DIGITS - 1) as i32;
exponent += Self::EXPONENT_OFFSET as i32;
// Verify if the float is subnormal number (msb is not set)
if exponent < 0 {
mantissa = mul_div(exponent, mantissa, BigUint::one() << exponent.unsigned_abs(), rounding);
exponent = 0;
}
let mut exponent = u64::from(exponent.unsigned_abs());
// If the the exponent is too big to be represented, bound it to the maximum value.
let max_exponent = (1 << Self::EXPONENT_DIGITS) - 1;
if exponent > max_exponent {
exponent = max_exponent;
if Self::HAS_NAN {
// Set the mantissa to zero, to represent infinity instead of NaN
mantissa = BigUint::zero();
} else {
// Set mantissa to the maximum value
mantissa.clone_from(&mantissa_max);
}
}
// Remove most significant bit from mantissa (it is always set)
mantissa &= mantissa_max >> 1;
// Shift the exponent to the right position
exponent <<= Self::MANTISSA_DIGITS - 1;
// Convert mantissa and exponent to float
Ok(mantissa.to_u64_digits().first().unwrap_or(&0u64) | exponent)
}
/// Retrieve the mantissa and exponent
#[allow(clippy::cast_possible_truncation, clippy::cast_possible_wrap)]
#[must_use]
fn into_parts(float: u64) -> (u64, i32) {
let mantissa_msb = 1 << (Self::MANTISSA_DIGITS - 1);
let mut exp = (float >> (Self::MANTISSA_DIGITS - 1)) as i32;
let mut mantissa = float & (mantissa_msb - 1);
// Doesn't set the msb if the float is a subnormal number
if exp > 0 {
mantissa |= mantissa_msb;
}
exp -= Self::EXPONENT_OFFSET as i32;
if mantissa > 0 {
exp -= (Self::MANTISSA_DIGITS - 1) as i32;
}
(mantissa, exp)
}
/// Converts a float to a rational number
#[allow(clippy::cast_possible_truncation, clippy::cast_possible_wrap)]
#[must_use]
fn to_rational(float: u64) -> (BigUint, BigUint) {
let (mantissa, exponent) = Self::into_parts(float);
// If mantissa is zero, return 0
if mantissa == 0 {
return (0u32.into(), 1u32.into());
}
if exponent > 0 {
let mantissa = BigUint::from(mantissa) << exponent.unsigned_abs();
(mantissa, BigUint::from(1u32))
} else {
let exponent = BigUint::from(1u8) << exponent.unsigned_abs();
(BigUint::from(mantissa), exponent)
}
}
/// Generic function for formating float values, equivalent to:
/// OBS: negative values, NaNs, Infinite, and subnormal numbers are ignored.
/// ```rust
/// let precision = 32usize;
/// let value = 0.1f64;
/// let _ = format!("{value:.0$}", precision);
/// ```
#[must_use]
fn to_string(float: u64, precision: u32) -> String {
let (numerator, denominator) = Self::to_rational(float);
rational_to_string(numerator, denominator, precision)
}
}
// Ref: https://github.com/Analog-Labs/analog-gmp/blob/5653d1dc756e93072e060ab0bbc0b8822931e2c0/src/utils/Float9x56.sol#L13-L117
pub enum UFloat9x56 {}
impl Float for UFloat9x56 {
/// Number of bits for represent the exponent
const EXPONENT_DIGITS: u32 = 9;
/// Number of bits for represent the mantissa (msb is always set)
const MANTISSA_DIGITS: u32 = 56;
/// The zero exponent in raw format.
/// which is: 0x8000000000000000 >> 55
const EXPONENT_OFFSET: u32 = 256;
/// `UFloat9x56` doesn't support NaN or infinity.
const HAS_NAN: bool = false;
}
// Ref: https://en.wikipedia.org/wiki/IEEE_754
pub enum IEEE754SinglePrecision {}
impl Float for IEEE754SinglePrecision {
/// Number of bits used to represent the exponent
const EXPONENT_DIGITS: u32 = 8;
/// Number of bits used to represent the mantissa (msb is always set)
const MANTISSA_DIGITS: u32 = 24;
/// The zero exponent in raw format.
/// which is: 0x3f800000 >> 23
const EXPONENT_OFFSET: u32 = 127;
/// Is +infinity when exponent = 128 and mantissa = 0
const HAS_NAN: bool = true;
}
// Ref: https://en.wikipedia.org/wiki/Double-precision_floating-point_format
pub enum IEEE754DoublePrecision {}
impl Float for IEEE754DoublePrecision {
/// Number of bits used to represent the exponent
const EXPONENT_DIGITS: u32 = 11;
/// Number of bits for represent the mantissa (msb is always set)
const MANTISSA_DIGITS: u32 = 53;
/// The zero exponent in raw format.
/// which is: 0x3ff0000000000000 >> 52
const EXPONENT_OFFSET: u32 = 1023;
/// Is +infinity when exponent = 1024 and mantissa = 0
const HAS_NAN: bool = true;
}
/// Rounding mode used when converting rational to float.
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
pub enum Rounding {
/// Rounds towards zero
Down,
/// Rounds to the nearest value, if the number falls midway,
/// it is rounded to the nearest value with an even least significant digit.
NearestTiesEven,
/// Rounds to the nearest value; if the number falls midway,
/// it is rounded to the nearest value above.
NearestTiesAwayFromZero,
/// Rounds towards infinite
Up,
}
/// Computes: numerator * 2^exponent / denominator.
#[allow(clippy::cast_possible_truncation)]
#[inline]
fn mul_div<N, D>(exponent: i32, numerator: N, denominator: D, rounding: Rounding) -> BigUint where N: Into<BigUint>, D: Into<BigUint> {
let mut numerator = numerator.into();
let denominator = denominator.into();
// Multiply the numerator by 2^exponent
if exponent >= 0 {
numerator <<= exponent.unsigned_abs();
} else {
numerator >>= exponent.unsigned_abs();
};
// Round the result
let remainder = match rounding {
Rounding::Down => BigUint::zero(),
Rounding::NearestTiesEven => {
let mut rem = &denominator >> 1u32;
if !rem.is_zero() && denominator.is_even() {
// Round towards even
let is_odd = mul_div(0, numerator.clone(), denominator.clone(), Rounding::Up).is_odd();
rem -= u32::from(is_odd);
}
rem
},
Rounding::NearestTiesAwayFromZero => &denominator >> 1u32,
Rounding::Up => &denominator - BigUint::one(),
};
(numerator + remainder) / denominator
}
/// Convert a rational to decimal with arbitrary precision.
fn rational_to_string<N: Into<BigUint>, D: Into<BigUint>>(num: N, den: D, precision: u32) -> String {
let num = num.into();
let den = den.into();
// Split integer and fraction parts
let mut integer = &num / &den;
let mut fraction = &num % &den;
// Decimals counter
let mut decimals = 0;
// Extract decimals digits
while decimals < precision {
fraction *= 10u8;
integer *= 10u8;
integer += &fraction / &den;
fraction = &fraction % &den;
decimals += 1;
// Round last digit to nearest
if decimals == precision && fraction.pow(2) >= den {
integer += 1u32;
}
}
// format the result
let denominator = BigUint::from(10u8).pow(decimals);
format!("{}.{:02$}", &integer / &denominator, integer % denominator, decimals as usize)
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment