Last active
March 24, 2020 11:27
-
-
Save jepers/0be2290ae7177af1a371fe1d0059ea7f to your computer and use it in GitHub Desktop.
A Float8 type
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
// ============================================================================ | |
// This is an attempt at implementing a Float8 type by Jens Persson. | |
// It seems to work. | |
// This file is in the form of a command line program which will | |
// do some basic checks and print all Float8 values. | |
// --------------------------------------------------------------------------- | |
// Use it in any way you like, please let me know of any issues or | |
// improvements here: https://forums.swift.org/t/33337/38 | |
// =========================================================================== | |
// ---------------------------------------------------------------------------- | |
// About Avoiding Accidental Infinite Recursion. | |
// ---------------------------------------------------------------------------- | |
// When implementing something like `Float8`, it's easy to cause unintentional | |
// infinite recursion, especially in the presence of default implementations, | |
// and when "cheating" by eg converting to `Float`, doing some work, and | |
// then converting the result back to `Float8`, as discussed here: | |
// https://forums.swift.org/t/33337/8 | |
// https://forums.swift.org/t/33337/9 | |
// | |
// So we'll implement eg `Float8.init(_ value: Float)` ourselves, rather than | |
// using the default implementation, to avoid the risk of infinite recursion | |
// now or in the future. And our implementation must not call any member of | |
// `Float8` that might result in a call back to it. Members that have to be | |
// avoided (depending on how we implement them) might include operators and | |
// literal initializers, which might be tricky to spot or remember: | |
// `let a: Float8 = -0.0` and `someFloat8 = 0` and `someFloat8 = -someOtherF8`. | |
// | |
// ---------------------------------------------------------------------------- | |
// But anyway, to summarize, and to keep it relatively simple and managable: | |
// ---------------------------------------------------------------------------- | |
// | |
// * All members (of `Float8`) are separated into "layers"(/extensions). | |
// | |
// * A member defined in layer N is only allowed to call members defined in | |
// layer N-1, ie: A member of layer N must not call any member in layer >= N. | |
// | |
// * Checking for disallowed calls can be done manually by commenting out all | |
// but the checked member within the same layer, and commenting out all | |
// higher layers. Or I guess it could be automated in some way. | |
// | |
// * But note that these precausions won't help with the problem of default | |
// implementations. We must identify and implement all of these ourselves. | |
// | |
// ---------------------------------------------------------------------------- | |
import Darwin | |
// ---------------------------------------------------------------------------- | |
// MARK: - Member Layer 0 | |
// ---------------------------------------------------------------------------- | |
/// An 8-bit floating point type (which might not work as expected, though I | |
/// haven't found any issues so far). | |
/// | |
/// This type has been put together by an amateur looking at this: | |
/// * https://en.wikipedia.org/wiki/Single-precision_floating-point_format | |
/// * http://www.cs.jhu.edu/~jorgev/cs333/readings/8-Bit_Floating_Point.pdf | |
/// * https://raw.githubusercontent.com/apple/swift/master/stdlib/public/core/FloatingPointTypes.swift.gyb | |
/// and by piggybacking on `Float32` as much as possible while trying to avoid | |
/// the risk of infinite recursion. | |
/// | |
/// `Float8` has 4 exponent bits and 3 significand bits. | |
/// | |
/// ``` | |
/// Eponent bias 7 | |
/// Exponent bit pattern: 0 1 2 3 4 5 6 7 8 9 A B C D E F | |
/// Exponent: sub -6 -5 -4 -3 -2 -1 0 1 2 3 4 5 6 7 inf/nan | |
/// | |
/// 0_0000_001 = 0x01 = 2**(-6) * (0 + 1/8) = 0.001953125 (least nonzero magnitude) | |
/// 0_0000_111 = 0x0f = 2**(-6) * (0 + 7/8) = 0.013671875 (greatest subnormal magnitude) | |
/// 0_0001_000 = 0x10 = 2**(-6) * (1 + 0/8) = 0.015625 (least normal nonzero magnitude) | |
/// 0_0111_000 = 0x30 = 2**( 0) * (1 + 0/8) = 1.0 | |
/// 0_1110_111 = 0x6f = 2**( 7) * (1 + 7/8) = 240.0 (greatest finite magnitude) | |
/// ``` | |
struct Float8 { | |
private (set) var bitPattern: UInt8 | |
init(bitPattern: UInt8) { | |
self.bitPattern = bitPattern | |
} | |
} | |
// ---------------------------------------------------------------------------- | |
// MARK: - Member Layer 1 | |
// ---------------------------------------------------------------------------- | |
extension Float8 { | |
typealias Exponent = Int | |
typealias RawSignificand = UInt8 | |
typealias RawExponent = UInt | |
typealias Stride = Float8 | |
typealias Magnitude = Float8 | |
typealias FloatLiteralType = Float32 | |
typealias IntegerLiteralType = Int64 | |
static var exponentBitCount: Int { 4 } | |
static var significandBitCount: Int { 3 } | |
static var _exponentBias: UInt { 7 } // (1 << (exponentBitCount)) - 1 | |
static var nan: Float8 { Float8(bitPattern: 0b0_1111_110) } | |
static var signalingNaN: Float8 { Float8(bitPattern: 0b0_1111_010) } | |
static var infinity: Float8 { Float8(bitPattern: 0b0_1111_000) } | |
static var _negativeInfinity: Float8 { Float8(bitPattern: 0b1_1111_000) } | |
static var _infinityExponent: UInt = 0b1111 | |
static var _significandMask: UInt8 = 0b111 | |
static var zero: Float8 { return Float8(bitPattern: 0) } | |
static var _negativeZero: Float8 { Float8(bitPattern: 0b1_0000_000) } | |
static var one: Float8 { return Float8(bitPattern: 0b0_0111_000) } | |
/// 0.015625 | |
static var leastNormalMagnitude: Float8 { | |
Float8(bitPattern: 0b0_0001_000) | |
} | |
/// 0.001953125 | |
static var leastNonzeroMagnitude: Float8 { | |
Float8(bitPattern: 0b0_0000_001) | |
} | |
/// 240.0 | |
static var greatestFiniteMagnitude: Float8 { | |
Float8(bitPattern: 0b0_1110_111) | |
} | |
/// The mathematical constant pi approximated by the closest representable | |
/// `Float8` value less than pi, which is `3.0`. | |
static var pi: Float8 { return Float8(bitPattern: 0b0_1000_100) } | |
var exponentBitPattern: UInt { UInt((bitPattern &>> 3) & 0b1111) } | |
var significandBitPattern: UInt8 { bitPattern & 0b111 } | |
var sign: FloatingPointSign { bitPattern & 128 == 0 ? .plus : .minus } | |
var isCanonical: Bool { return true } | |
var isZero: Bool { | |
return self.bitPattern & 0b0_1111_111 == 0 | |
} | |
static prefix func -(lhs: Float8) -> Float8 { | |
// I have verified that the corresponding implementation is valid for | |
// all bit patterns of `Float32`. | |
return Float8(bitPattern: lhs.bitPattern ^ 0b1_0000_000) | |
} | |
} | |
// ---------------------------------------------------------------------------- | |
// MARK: - Member Layer 3 | |
// ---------------------------------------------------------------------------- | |
extension Float8 { | |
private static var _quietNaNMask: UInt8 { | |
return 1 &<< UInt8(significandBitCount - 1) | |
} | |
var isFinite: Bool { | |
return exponentBitPattern < (1 << Float8.exponentBitCount) &- 1 | |
} | |
} | |
// ---------------------------------------------------------------------------- | |
// MARK: - Member Layer 4 | |
// ---------------------------------------------------------------------------- | |
extension Float8 { | |
var isNormal: Bool { | |
return exponentBitPattern > 0 && isFinite | |
} | |
var isSubnormal: Bool { | |
return exponentBitPattern == 0 && significandBitPattern != 0 | |
} | |
var isInfinite: Bool { | |
return !isFinite && significandBitPattern == 0 | |
} | |
var isNaN: Bool { | |
return !isFinite && significandBitPattern != 0 | |
} | |
} | |
// ---------------------------------------------------------------------------- | |
// MARK: - Member Layer 5 | |
// ---------------------------------------------------------------------------- | |
extension Float8 { | |
var isSignalingNaN: Bool { | |
return isNaN && (significandBitPattern & Self._quietNaNMask) == 0 | |
} | |
var exponent: Int { | |
if !isFinite { return .max } | |
if isZero { return .min } | |
let provisional = Int(exponentBitPattern) - Int(Self._exponentBias) | |
if isNormal { return provisional } | |
let shift = Self.significandBitCount - | |
significandBitPattern._binaryLogarithm() | |
return provisional + 1 - shift | |
} | |
} | |
// ---------------------------------------------------------------------------- | |
// MARK: - Member Layer 6 | |
// ---------------------------------------------------------------------------- | |
extension Float8 { | |
init(sign: FloatingPointSign, | |
exponentBitPattern: UInt, | |
significandBitPattern: UInt8) | |
{ | |
let signBits = sign == .minus | |
? Float8._negativeZero.bitPattern | |
: Float8.zero.bitPattern | |
let exponentBits = UInt8(truncatingIfNeeded: | |
(exponentBitPattern & 0b1111)) &<< Self.significandBitCount | |
let significandBits = significandBitPattern & Self._significandMask | |
self.init(bitPattern: signBits | exponentBits | significandBits) | |
} | |
var nextUp : Float8 { | |
// I've verified that this implementation works the same as the | |
// one in the standard library for `Float`. | |
// ------------------------------------------------------------ | |
// Silence signaling NaNs, map -0 to +0: | |
// (Can't use `let x = v + 0` here) | |
var x = self | |
if x.isSignalingNaN { | |
x = Float8(bitPattern: x.bitPattern | Float8.nan.bitPattern) | |
} else if x.isZero { | |
x = .zero | |
} | |
if x < Float8.infinity { | |
let increment = Int8(bitPattern: x.bitPattern) &>> 7 | 1 | |
let bitPattern_ = x.bitPattern &+ UInt8(bitPattern: increment) | |
return Float8(bitPattern: bitPattern_) | |
} | |
return x | |
} | |
public var nextDown: Float8 { | |
return -(-self).nextUp | |
} | |
} | |
// ---------------------------------------------------------------------------- | |
// MARK: - Member Layer 7 | |
// ---------------------------------------------------------------------------- | |
extension Float8 { | |
private static func _convert<Source: BinaryInteger>(from source: Source) | |
-> (value: Self, exact: Bool) | |
{ | |
// -------------------------------------------------------------------- | |
// Copied with modifications from stdlib impl in FloatingPoint.swift | |
// -------------------------------------------------------------------- | |
// Zero is really extra simple, and saves us from trying to normalize | |
// a value that cannot be normalized. | |
if _fastPath(source == 0) { return (Self.zero, true) } | |
// We now have a non-zero value; convert it to a strictly positive | |
// value by taking the magnitude. | |
let magnitude = source.magnitude | |
var exponent = magnitude._binaryLogarithm() | |
// If the exponent would be larger than the largest representable | |
// exponent, the result is just an infinity of the appropriate sign. | |
guard exponent <= Self.greatestFiniteMagnitude.exponent else { | |
return ( | |
Source.isSigned && source < 0 ? ._negativeInfinity : .infinity, | |
false | |
) | |
} | |
// If exponent <= significandBitCount, we don't need to round it to | |
// construct the significand; we just need to left-shift it into place; | |
// the result is always exact as we've accounted for exponent-too-large | |
// already and no rounding can occur. | |
if exponent <= Self.significandBitCount { | |
let shift = Self.significandBitCount &- exponent | |
let significand = RawSignificand(magnitude) &<< shift | |
let value = Self( | |
sign: Source.isSigned && source < 0 ? .minus : .plus, | |
exponentBitPattern: Self._exponentBias + RawExponent(exponent), | |
significandBitPattern: significand | |
) | |
return (value, true) | |
} | |
// exponent > significandBitCount, so we need to do a rounding right | |
// shift, and adjust exponent if needed | |
let shift = exponent &- Self.significandBitCount | |
let halfway = (1 as Source.Magnitude) << (shift - 1) | |
let mask = 2 * halfway - 1 | |
let fraction = magnitude & mask | |
var significand = | |
RawSignificand(truncatingIfNeeded: magnitude >> shift) | |
& Self._significandMask | |
if fraction > halfway || (fraction == halfway && significand & 1 == 1) { | |
var carry = false | |
(significand, carry) = significand.addingReportingOverflow(1) | |
if carry || significand > Self._significandMask { | |
exponent += 1 | |
guard exponent <= Self.greatestFiniteMagnitude.exponent else { | |
return (Source.isSigned && source < 0 | |
? ._negativeInfinity | |
: .infinity, false) | |
} | |
} | |
} | |
return (Self( | |
sign: Source.isSigned && source < 0 ? .minus : .plus, | |
exponentBitPattern: Self._exponentBias + RawExponent(exponent), | |
significandBitPattern: significand | |
), fraction == 0) | |
} | |
static func _convert<Source: BinaryFloatingPoint>(from source: Source) | |
-> (value: Self, exact: Bool) | |
{ | |
// -------------------------------------------------------------------- | |
// Copied with modifications from stdlib impl in FloatingPoint.swift | |
// -------------------------------------------------------------------- | |
// NOTE: It have/had a bug in the stdlib: | |
// https://forums.swift.org/t/33337/31 | |
// The following code has the fix: | |
precondition(Source.self != Self.self) | |
guard _fastPath(!source.isZero) else { | |
return (source.sign == .minus | |
? ._negativeZero | |
: .zero, true) | |
} | |
guard _fastPath(source.isFinite) else { | |
if source.isInfinite { | |
return (source.sign == .minus | |
? ._negativeInfinity | |
: .infinity, true) | |
} | |
// IEEE 754 requires that any NaN payload be propagated, | |
// if possible. | |
let payload_ = | |
source.significandBitPattern & | |
~(Source.nan.significandBitPattern | | |
Source.signalingNaN.significandBitPattern) | |
let mask = | |
Self.greatestFiniteMagnitude.significandBitPattern & | |
~(Self.nan.significandBitPattern | | |
Self.signalingNaN.significandBitPattern) | |
let payload = RawSignificand(truncatingIfNeeded: payload_) & mask | |
// Although | |
// .signalingNaN.exponentBitPattern == .nan.exponentBitPattern, | |
// we do not *need* to rely on this relation, and therefore we | |
// do not. | |
let value = source.isSignalingNaN | |
? Self( | |
sign: source.sign, | |
exponentBitPattern: Self.signalingNaN.exponentBitPattern, | |
significandBitPattern: payload | | |
Self.signalingNaN.significandBitPattern) | |
: Self( | |
sign: source.sign, | |
exponentBitPattern: Self.nan.exponentBitPattern, | |
significandBitPattern: payload | | |
Self.nan.significandBitPattern) | |
// We define exactness by equality after roundtripping; since NaN | |
// is never equal to itself, it can never be converted exactly. | |
return (value, false) | |
} | |
let exponent = source.exponent | |
var exemplar = Self.leastNormalMagnitude | |
let exponentBitPattern: Self.RawExponent | |
let leadingBitIndex: Int | |
let shift: Int | |
let significandBitPattern: Self.RawSignificand | |
if exponent < exemplar.exponent { | |
// The floating-point result is either zero or subnormal. | |
exemplar = Self.leastNonzeroMagnitude | |
let minExponent = exemplar.exponent | |
if exponent + 1 < minExponent { | |
return (source.sign == .minus ? ._negativeZero : .zero, false) | |
} | |
if _slowPath(exponent + 1 == minExponent) { | |
// Although the most significant bit (MSB) of a subnormal | |
// source significand is explicit, Swift BinaryFloatingPoint | |
// APIs actually omit any explicit MSB from the count | |
// represented in significandWidth. For instance: | |
// | |
// Double.leastNonzeroMagnitude.significandWidth == 0 | |
// | |
// Therefore, we do not need to adjust our work here for a | |
// subnormal source. | |
return source.significandWidth == 0 | |
? (source.sign == .minus ? ._negativeZero : .zero, false) | |
: (source.sign == .minus | |
? Self(bitPattern: exemplar.bitPattern | 0b10000000) | |
: exemplar, false) | |
} | |
exponentBitPattern = 0 as Self.RawExponent | |
leadingBitIndex = Int(Self.Exponent(exponent) - minExponent) | |
shift = | |
leadingBitIndex &- | |
(source.significandWidth &+ | |
source.significandBitPattern.trailingZeroBitCount) | |
let leadingBit = source.isNormal | |
? (1 as Self.RawSignificand) << leadingBitIndex | |
: 0 | |
significandBitPattern = leadingBit | (shift >= 0 | |
? Self.RawSignificand(source.significandBitPattern) << shift | |
: Self.RawSignificand(source.significandBitPattern >> -shift)) | |
} else { | |
// The floating-point result is either normal or infinite. | |
exemplar = Self.greatestFiniteMagnitude | |
if exponent > exemplar.exponent { | |
return (source.sign == .minus ? ._negativeInfinity : .infinity, | |
false) | |
} | |
exponentBitPattern = exponent < 0 | |
? (1 as Self).exponentBitPattern - Self.RawExponent(-exponent) | |
: (1 as Self).exponentBitPattern + Self.RawExponent(exponent) | |
leadingBitIndex = exemplar.significandWidth | |
shift = | |
leadingBitIndex &- | |
(source.significandWidth &+ | |
source.significandBitPattern.trailingZeroBitCount) | |
let sourceLeadingBit = source.isSubnormal | |
? (1 as Source.RawSignificand) << | |
(source.significandWidth &+ | |
source.significandBitPattern.trailingZeroBitCount) | |
: 0 | |
significandBitPattern = shift >= 0 | |
? Self.RawSignificand( | |
sourceLeadingBit ^ source.significandBitPattern) << shift | |
: Self.RawSignificand( | |
(sourceLeadingBit ^ source.significandBitPattern) >> -shift) | |
} | |
let value = Self( | |
sign: source.sign, | |
exponentBitPattern: exponentBitPattern, | |
significandBitPattern: significandBitPattern) | |
if source.significandWidth <= leadingBitIndex { | |
return (value, true) | |
} | |
// We promise to round to the closest representation, and if two | |
// representable values are equally close, the value with more trailing | |
// zeros in its significand bit pattern. Therefore, we must take a look | |
// at the bits that we've just truncated. | |
let ulp = (1 as Source.RawSignificand) << -shift | |
let truncatedBits = source.significandBitPattern & (ulp - 1) | |
if truncatedBits < ulp / 2 { | |
return (value, false) | |
} | |
let rounded = source.sign == .minus ? value.nextDown : value.nextUp | |
guard _fastPath( | |
truncatedBits != ulp / 2 || | |
significandBitPattern.trailingZeroBitCount < | |
rounded.significandBitPattern.trailingZeroBitCount) | |
else { return (value, false) } | |
return (rounded, false) | |
} | |
} | |
// ---------------------------------------------------------------------------- | |
// MARK: - Member Layer 8 | |
// ---------------------------------------------------------------------------- | |
extension Float8 { | |
init<Source: BinaryInteger>(_ value: Source) { | |
self = Self._convert(from: value).value | |
} | |
init?<Source: BinaryInteger>(exactly value: Source) { | |
let (value_, exact) = Self._convert(from: value) | |
guard exact else { return nil } | |
self = value_ | |
} | |
init(integerLiteral value: Int64) { | |
self = Self._convert(from: value).value | |
} | |
init<Source: BinaryFloatingPoint>(_ value: Source) { | |
self = Self._convert(from: value).value | |
} | |
init?<Source: BinaryFloatingPoint>(exactly value: Source) { | |
let (value_, exact) = Self._convert(from: value) | |
guard exact else { return nil } | |
self = value_ | |
} | |
} | |
// ---------------------------------------------------------------------------- | |
// MARK: - Member Layer 9 | |
// ---------------------------------------------------------------------------- | |
extension Float8 { | |
init(sign: FloatingPointSign, exponent: Int, significand: Float8) { | |
self.init(Float(sign: sign, | |
exponent: exponent, | |
significand: significand.float)) | |
} | |
init(floatLiteral value: Float) { | |
// There was an infinite recursion here for eg `Float8(-Float(0))`, | |
// but not for `Float8(-Float(1))` or `Float8(Float(0))`. | |
// This check takes care of that particular case, but are there more? | |
// if value == -Float(0) { self.init(bitPattern: 0b1_0000_000) } | |
// else { self.init(value) } | |
self.init(value) | |
} | |
var float: Float { | |
// This will be used a lot later (see code further down) when promoting | |
// to Float, doing arithmetic, converting back the result to Float8. | |
// We cannot use the following (I guess) since we have no control over | |
// which members of Float8 it will call (now or in the future: | |
// | |
// return Float.init(self) | |
// | |
// So we'll have to implement it ourselves: | |
if self.isFinite { | |
var zeroOrOne: Float = self.isZero ? 0.0 : 1.0 | |
var exp = Float(exponentBitPattern) - Float(Self._exponentBias) | |
if isSubnormal { | |
zeroOrOne = 0.0 | |
exp += 1 | |
} | |
let fraction: Float = Float(bitPattern & 0b111) / 8.0 | |
let fsign = sign == .minus ? -Float(1) : Float(1) | |
return fsign * powf(Float(2), exp) * (zeroOrOne + fraction) | |
} else if self.isInfinite { | |
return self.sign == .minus ? -Float.infinity : Float.infinity | |
} | |
let payload_ = self.significandBitPattern & | |
~(Float8.nan.significandBitPattern | | |
Float8.signalingNaN.significandBitPattern) | |
let mask = Float.greatestFiniteMagnitude.significandBitPattern & | |
~(Float.nan.significandBitPattern | | |
Float.signalingNaN.significandBitPattern) | |
let payload = UInt32(payload_) & mask | |
let nanBitPattern = isSignalingNaN | |
? Float.signalingNaN.bitPattern | |
: Float.nan.bitPattern | |
let signBit: UInt32 = sign == .minus ? UInt32(1) &<< UInt32(31) : 0 | |
return Float(bitPattern: nanBitPattern | payload | signBit) | |
} | |
func distance(to other: Float8) -> Float8 { | |
return Float8.init(other.float - self.float) | |
} | |
func advanced(by n: Float8) -> Float8 { | |
return Float8.init(self.float + n.float) | |
} | |
var magnitude: Float8 { | |
return Float8.init(self.float.magnitude) | |
} | |
} | |
// TODO: Sort members of this extension into appropriate "layers": | |
extension Float8 : BinaryFloatingPoint { | |
var significand: Float8 { | |
if isNaN { return self } | |
if isNormal { | |
return Float8(sign: .plus, | |
exponentBitPattern: Self._exponentBias, | |
significandBitPattern: significandBitPattern) | |
} | |
if isSubnormal { | |
let shift = Self.significandBitCount - | |
significandBitPattern._binaryLogarithm() | |
return Float8( | |
sign: .plus, | |
exponentBitPattern: Self._exponentBias, | |
significandBitPattern: significandBitPattern &<< shift | |
) | |
} | |
// zero or infinity. | |
return Float8( | |
sign: .plus, | |
exponentBitPattern: exponentBitPattern, | |
significandBitPattern: 0 | |
) | |
} | |
var ulp: Float8 { | |
guard isFinite else { return .nan } | |
if isNormal { | |
let bitPattern_ = bitPattern & Self.infinity.bitPattern | |
return Float8(bitPattern: bitPattern_) * 0x1p-3 | |
} | |
return .leastNormalMagnitude * 0x1p-3 | |
} | |
var binade: Float8 { | |
guard isFinite else { return Float8.nan } | |
if isSubnormal { | |
let shifts = (bitPattern & 0b0_0000_111).leadingZeroBitCount | |
let signBit = bitPattern & 0b1_0000_000 | |
return Float8(bitPattern: signBit | (UInt8(1) &<< (7 &- shifts))) | |
} | |
return Float8(bitPattern: | |
bitPattern & (Float8._negativeInfinity).bitPattern) | |
} | |
var significandWidth: Int { | |
let trailingZeroBits = significandBitPattern.trailingZeroBitCount | |
if isNormal { | |
guard significandBitPattern != 0 else { return 0 } | |
return Self.significandBitCount &- trailingZeroBits | |
} | |
if isSubnormal { | |
let leadingZeroBits = significandBitPattern.leadingZeroBitCount | |
return Self.RawSignificand.bitWidth &- | |
(trailingZeroBits &+ leadingZeroBits &+ 1) | |
} | |
return -1 | |
} | |
mutating func round(_ rule: FloatingPointRoundingRule) { | |
var f = self.float | |
f.round(rule) | |
self = Float8(f) | |
} | |
static func - (lhs: Float8, rhs: Float8) -> Float8 { | |
return Float8(lhs.float - rhs.float) | |
} | |
static func * (lhs: Float8, rhs: Float8) -> Float8 { | |
return Float8(lhs.float * rhs.float) | |
} | |
static func *= (lhs: inout Float8, rhs: Float8) { | |
var f = lhs.float | |
f *= rhs.float | |
lhs = Float8(f) | |
} | |
static func / (lhs: Float8, rhs: Float8) -> Float8 { | |
return Float8(lhs.float / rhs.float) | |
} | |
static func /= (lhs: inout Float8, rhs: Float8) { | |
var f = lhs.float | |
f /= rhs.float | |
lhs = Float8(f) | |
} | |
static func += (lhs: inout Float8, rhs: Float8) { | |
var f = lhs.float | |
f += rhs.float | |
lhs = Float8(f) | |
} | |
static func + (lhs: Float8, rhs: Float8) -> Float8 { | |
let r = lhs.float + rhs.float | |
return Float8.init(r) | |
} | |
static func -= (lhs: inout Float8, rhs: Float8) { | |
var f = lhs.float | |
f -= rhs.float | |
lhs = Float8(f) | |
} | |
mutating func formRemainder(dividingBy other: Float8) { | |
var f = self.float | |
f.formRemainder(dividingBy: other.float) | |
self = Float8(f) | |
} | |
mutating func formTruncatingRemainder(dividingBy other: Float8) { | |
var f = self.float | |
f.formTruncatingRemainder(dividingBy: other.float) | |
self = Float8(f) | |
} | |
mutating func formSquareRoot() { | |
var f = self.float | |
f.formSquareRoot() | |
self = Float8(f) | |
} | |
mutating func addProduct(_ lhs: Float8, _ rhs: Float8) { | |
var f = self.float | |
f.addProduct(lhs.float, rhs.float) | |
self = Float8(f) | |
} | |
func isEqual(to other: Float8) -> Bool { | |
return self.float.isEqual(to: other.float) | |
} | |
func isLess(than other: Float8) -> Bool { | |
return self.float.isLess(than: other.float) | |
} | |
func isLessThanOrEqualTo(_ other: Float8) -> Bool { | |
return self.float.isLessThanOrEqualTo(other.float) | |
} | |
} | |
extension Float8 : CustomStringConvertible, LosslessStringConvertible { | |
var description: String { return "\(self.float)" } | |
init?(_ description: String) { | |
guard let f32 = Float(description) else { return nil } | |
let f8 = Float8(f32) | |
if f8.description != description { return nil } | |
self = f8 | |
} | |
} | |
//----------------------------------------------------------------------------- | |
// MARK: - Demo | |
//----------------------------------------------------------------------------- | |
extension String { | |
func leftPadded(to minCount: Int, with char: Character=" ") -> String { | |
let c = max(0, minCount-count) | |
if c == 0 { return self } | |
return String(repeating: char, count: c) + self | |
} | |
} | |
extension BinaryFloatingPoint { | |
var segmentedBinaryString: String { | |
let e = String(exponentBitPattern, radix: 2) | |
let s = String(significandBitPattern, radix: 2) | |
return [self.sign == .plus ? "0" : "1", "_", | |
e.leftPadded(to: Self.exponentBitCount, with: "0"), "_", | |
s.leftPadded(to: Self.significandBitCount, with: "0")].joined() | |
} | |
} | |
extension LosslessStringConvertible { | |
func leftPadded(to minCount: Int, with char: Character=" ") -> String { | |
return description.leftPadded(to: minCount, with: char) | |
} | |
} | |
extension Float8 { | |
static func test() { | |
// -------------------------------------------------------------------- | |
// See https://forums.swift.org/t/33337/19 and | |
// https://forums.swift.org/t/33337/23 | |
// for details about this. | |
precondition(Float8.significandBitCount >= 2) // at least 3 bits, one | |
// of which may be implicit. | |
precondition(Float8.exponentBitCount >= 2) | |
// IEEE-754 imposes the following constraints on the exponent field: | |
let emin = Float8.leastNormalMagnitude.exponent | |
let emax = Float8.greatestFiniteMagnitude.exponent | |
precondition(emin <= emax) | |
precondition(emax >= 2) | |
precondition(emin == 1 - emax) | |
precondition(emax == (1 << (Float8.exponentBitCount - 1)) - 1) | |
// -------------------------------------------------------------------- | |
// Some other checks: | |
// If x is -leastNonzeroMagnitude, then x.nextUp is -0.0. | |
do { | |
var x = Float8.leastNonzeroMagnitude.nextDown | |
precondition(x.isZero && x.sign == .plus) | |
x = (-Float8.leastNonzeroMagnitude).nextUp | |
precondition(x.isZero && x.sign == .minus) | |
} | |
precondition(Float8.leastNonzeroMagnitude.significandWidth == 0) | |
precondition(Float8.leastNonzeroMagnitude.nextUp.significandWidth == 0) | |
precondition(Float8.leastNonzeroMagnitude.nextUp.nextUp.significandWidth == 1) | |
precondition(Float8(-Float(0)).ulp > 0) | |
precondition((0 as Float8).binade == -Float8(0) + -0.0) | |
precondition(Double(Float8.zero.ulp) == | |
Double(sign: .plus, | |
exponent: 1 - Int(Float8._exponentBias), | |
significand: 1.0 / Double(1 << Float8.significandBitCount))) | |
precondition(Float8.leastNonzeroMagnitude.binade * -1 == -Float8.zero.ulp) | |
precondition(-Float8.leastNonzeroMagnitude.nextUp.binade == -(Float8.zero.ulp * 2 + -0.0)) | |
do { | |
let a = Float8.greatestFiniteMagnitude | |
let b = a.ulp / 2 | |
precondition(a + b == .infinity) | |
precondition(a + b.nextDown == a) | |
precondition(-a - b == -.infinity) | |
precondition(-a - b.nextDown == -a) | |
} | |
// Print all values: | |
var finCount = 0 | |
var infCount = 0 | |
var nanCount = 0 | |
print(" N Float8 bitPattern exponent significand binade ulp") | |
print("-------------------------------------------------------------------------------") | |
for byteValue: UInt8 in .min ... .max { | |
let v = Float8(bitPattern: byteValue) | |
let expStr: String | |
switch v.exponent { | |
case .min: expStr = "Int.min" | |
case .max: expStr = "Int.max" | |
default: expStr = v.exponent.description | |
} | |
print( | |
byteValue.leftPadded(to: 4), | |
v.leftPadded(to: 12), | |
v.segmentedBinaryString.leftPadded(to: 12), | |
expStr.leftPadded(to: 9), | |
v.significand.leftPadded(to: 12), | |
v.binade.leftPadded(to: 12), | |
v.ulp.leftPadded(to: 12), | |
v.isSubnormal ? "subnormal" : v.isNormal ? "normal" : "n/a" | |
) | |
if v.isFinite { finCount += 1 } | |
if v.isNaN { nanCount += 1 } | |
if v.isInfinite { infCount += 1 } | |
} | |
print("Number of finite values:", finCount) | |
print("Number of infinite values:", infCount) | |
print("Number of NaNs:", nanCount) | |
precondition(finCount + infCount + nanCount == 256) | |
print("--") | |
var w: (Float8, Float8) = (-240, 240) | |
while true { | |
print(w.0.leftPadded(to: 12), | |
w.0.segmentedBinaryString.leftPadded(to: 12), | |
w.1.segmentedBinaryString.leftPadded(to: 12), | |
w.1.leftPadded(to: 12) | |
) | |
precondition(w.0 + w.1 == -0.0) // use -0.0 just to check | |
precondition((w.0 - w.1).sign == w.0.sign) | |
precondition(w.0.nextUp.nextDown == w.0) | |
if w.0.nextUp.isInfinite || w.1.nextDown.isInfinite { break } | |
w = (w.0.nextUp, w.1.nextDown) | |
} | |
precondition(w == (240, -240)) | |
print("--") | |
} | |
} | |
Float8.test() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment