matu3ba · March 17, 2022 22:29
diff --git a/parse_float.patch b/parse_float.patch
 diff --git a/lib/std/fmt/parse_float.zig b/lib/std/fmt/parse_float.zig
 index 585c23ed5..585400587 100644
 --- a/lib/std/fmt/parse_float.zig
 +++ b/lib/std/fmt/parse_float.zig
 @@ -1,36 +1,1111 @@
 -// Adapted from https://github.com/grzegorz-kraszewski/stringtofloat.
 +// Code ported from musl libc 8f12c4e110acb3bbbdc8abfb3a552c3ced718039
 +// and then modified to use softfloat and to assume f128 for everything
 
 -// MIT License
 -//
 -// Copyright (c) 2016 Grzegorz Kraszewski
 -//
 -// Permission is hereby granted, free of charge, to any person obtaining a copy
 -// of this software and associated documentation files (the "Software"), to deal
 -// in the Software without restriction, including without limitation the rights
 -// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 -// copies of the Software, and to permit persons to whom the Software is
 -// furnished to do so, subject to the following conditions:
 -//
 -// The above copyright notice and this permission notice shall be included in all
 -// copies or substantial portions of the Software.
 -//
 -// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
 -// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
 -// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
 -// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
 -// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
 -// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
 -// SOFTWARE.
 -//
 +const std = @import("std");
 +const builtin = @import("builtin");
 +const ascii = std.ascii;
 +const endian = builtin.target.cpu.arch.endian();
 
 -// Be aware that this implementation has the following limitations:
 +// TODO port
 +//#define shcnt(f) ((f)->shcnt + ((f)->rpos - (f)->buf))
 +//#define shlim(f, lim) __shlim((f), (lim))
 +//#define shgetc(f) (((f)->rpos != (f)->shend) ? *(f)->rpos++ : __shgetc(f))
 +//#define shunget(f) ((f)->shlim>=0 ? (void)(f)->rpos-- : (void)0)
 //
 -// - Is not round-trip accurate for all values
 -// - Only supports round-to-zero
 -// - Does not handle denormals
 +//#define sh_fromstring(f, s) \
 +//    ((f)->buf = (f)->rpos = (void *)(s), (f)->rend = (void*)-1)
 
 -const std = @import("std");
 -const ascii = std.ascii;
 +const LD_B1B_DIG = 4;
 +const KMAX = 2048;
 +
 +const MASK = (KMAX - 1);
 +
 +const F_PERM = 1;
 +const F_NORD = 4;
 +const F_NOWR = 8;
 +const F_EOF = 16;
 +const F_ERR = 32;
 +const F_SVB = 64;
 +const F_APP = 128;
 +
 +const EOF = -1;
 +
 +const LDBL_MANT_DIG = 113;
 +const LDBL_MIN_EXP = -16381;
 +const LDBL_MAX_EXP = 16384;
 +const LDBL_DIG = 33;
 +const LDBL_MIN_10_EXP = -4931;
 +const LDBL_MAX_10_EXP = 4932;
 +const DECIMAL_DIG = 36;
 +
 +const LdshapeLittle = union {
 +    f: f128,
 +    i: struct {
 +        lo: u64,
 +        mid: u32,
 +        top: u16,
 +        se: u16,
 +    },
 +    i2: struct {
 +        lo: u64,
 +        hi: u64,
 +    },
 +};
 +
 +const LdshapeBig = union {
 +    f: f128,
 +    i: struct {
 +        se: u16,
 +        top: u16,
 +        mid: u32,
 +        lo: u64,
 +    },
 +    @"i2": struct {
 +        hi: u64,
 +        lo: u64,
 +    },
 +};
 +
 +const Ldshape = switch (endian) {
 +    .Little => LdshapeLittle,
 +    .Big => LdshapeBig,
 +};
 +
 +const MuslFILE = struct {
 +    flags: u16, // unsigned
 +    rpos: *u8, // unsigned char*
 +    rend: *u8, // unsigned char*
 +    wend: *u8, // unsigned char*
 +    wpos: *u8, // unsigned char*
 +    wbase: *u8, // unsigned char*
 +    buf: *u8, // unsigned char*
 +    buf_size: usize, // size_t
 +    mode: i32, // int
 +    shend: *u8, // unsigned char*
 +    shlim: isize, // off_t
 +    shcnt: isize, // off_t
 +};
 +
 +fn shlimHelp(f: *MuslFILE, lim: isize) void {
 +    f.shlim = lim;
 +    f.shcnt = f.buf - f.rpos;
 +    // If lim is nonzero, rend must be a valid pointer.
 +    if (lim and f.rend - f.rpos > lim) {
 +        f.shend = f.rpos + lim;
 +    } else {
 +        f.shend = f.rend;
 +    }
 +}
 +
 +fn toReadHelp(f: *MuslFILE) i32 {
 +    f.mode |= f.mode - 1;
 +    if (f.wpos != f.wbase) f.write(f, 0, 0); // TODO fix this
 +    f.wpos = 0;
 +    f.wbase = 0;
 +    f.wend = 0;
 +    if (f.flags & F_NORD) {
 +        f.flags |= F_ERR;
 +        return EOF;
 +    }
 +    f.rend = f.buf + f.buf_size;
 +    f.rpos = f.rend;
 +    if (f.flags & F_EOF > 0) {
 +        return EOF;
 +    } else {
 +        return 0;
 +    }
 +}
 +
 +fn uflow(f: *MuslFILE) i32 {
 +    var c: u8 = undefined;
 +    if (!toReadHelp(f) and f.read(f, &c, 1) == 1)
 +        return c;
 +    return EOF;
 +}
 +
 +// TODO below
 +fn shgetc(f: *MuslFILE) i32 {
 +    var c: i32 = undefined;
 +    var cnt: isize = shcnt(f);
 +    // TODO is it possible to further simplify the following statement?
 +    // if ((f->shlim && cnt >= f->shlim) || (c=__uflow(f)) < 0) {
 +    if(f.shlim and cnt >= f.shlim) {
 +        f.shcnt = f.buf - f.rpos + cnt;
 +        f.shend = f.rpos;
 +        f.shlim = -1;
 +        return EOF;
 +
 +    } else {
 +        c = uflow(f);
 +        if(c<0) {
 +            f.shcnt = f.buf - f.rpos + cnt;
 +            f.shend = f.rpos;
 +            f.shlim = -1;
 +            return EOF;
 +        }
 +    }
 +    cnt+=1;
 +    if (f.shlim && f.rend - f.rpos > f.shlim - cnt) {
 +        f.shend = f.rpos + (f.shlim - cnt);
 +    } else {
 +        f.shend = f.rend;
 +    }
 +    f.shcnt = f.buf - f.rpos + cnt;
 +    if (f.rpos[-1] != c) f.rpos[-1] = c;
 +    return c;
 +}
 +
 +fn scanexp(f: *MuslFILE, pok: i32) i64 {
 +    var c: i32 = undefined;
 +    var x: i32 = undefined;
 +    var y: i64 = undefined;
 +    var neg: i32 = 0;
 +
 +    c = shgetc(f);
 +    if (c=='+' or c=='-') {
 +        neg = (c=='-');
 +        c = shgetc(f);
 +        if (c-'0'>=10 and pok) // 10U
 +            shunget(f);
 +    }
 +    if (c-'0'>=10 and c!='_') { //10U
 +        shunget(f);
 +        return LLONG_MIN;
 +    }
 +    x = 0;
 +    while(true) {
 +        c = shgetc(f);
 +        if (c=='_') {
 +            continue;
 +        } else if (c-'0'<10 and x<INT_MAX/10) { // 10U
 +            x = 10*x + c-'0';
 +        } else {
 +            break;
 +        }
 +    }
 +    y=x;
 +    while(true) {
 +        c = shgetc(f);
 +        if (c=='_') {
 +            continue;
 +        } else if (c-'0'<10 and y<LLONG_MAX/100) { // 10U
 +            y = 10*y + c-'0';
 +        } else {
 +            break;
 +        }
 +    }
 +    while(c-'0'<10 or c=='_') { // 10U
 +        c = shgetc(f);
 +    }
 +    shunget(f);
 +    if (neg != 0) { // return neg ? -y : y;
 +        return -y;
 +    }else {
 +        return y;
 +    }
 +}
 +
 +fn copysignf128(x: f128, y: f128) f128 {
 +{
 +    var ux = Ldshape {
 +        .f = x,
 +    };
 +    var uy = Ldshape {
 +        .f = y,
 +    };
 +    ux.i.se &= 0x7fff;
 +    ux.i.se |= uy.i.se & 0x8000;
 +    return ux.f;
 +}
 +
 +fn mul_eq_f128_float(x: *f128, op_float: f32) void { // float op_float
 +    //x *= 0x1p120f;
 +    var op_f32: f32 = undefined;
 +    memcpy(&op_f32, &op_float, sizeof(float)); // FIXME
 +    var op_f128: f128 = undefined;
 +    f32_to_f128M(op_f32, &op_f128); // FIXME
 +    var new_value: f128 = undefined;
 +    f128M_mul(x, &op_f128, &new_value); // FIXME
 +    x.* = new_value;
 +}
 +
 +fn dbl_to_f128(x: f64) f128 {
 +    var x_f64: f64;
 +    memcpy(&x_f64, &x, sizeof(double)); // FIXME
 +    var result: f128 = undefined;
 +    f64_to_f128M(x_f64, &result); // FIXME
 +    return result;
 +}
 +
 +fn fmodf128(x: f128, y: f128) f128
 +{
 +    var ux = Ldshape {
 +        .f = x,
 +    };
 +    var uy = Ldshape {
 +        .f = y,
 +    };
 +    var ex: i32 = ux.i.se & 0x7fff;
 +    var ey: i32 = uy.i.se & 0x7fff;
 +    var sx: i32 = ux.i.se & 0x8000;
 +
 +    var zero: f128;
 +    ui32_to_f128M(0, &zero); // FIXME
 +    // if (y == 0 || isnan(y) || ex == 0x7fff)
 +    if (f128M_eq(&y, &zero) || f128M_isSignalingNaN(&y) || ex == 0x7fff) { // FIXME
 +        //return (x*y)/(x*y);
 +        var x_times_y: f128;
 +        f128M_mul(&x, &y, &x_times_y); // FIXME
 +        var result: f128;
 +        f128M_div(&x_times_y, &x_times_y, &result); // FIXME
 +        return result;
 +    }
 +    ux.i.se = ex;
 +    uy.i.se = ey;
 +    //if (ux.f <= uy.f) {
 +    if (f128M_le(&ux.f, &uy.f)) {
 +        //if (ux.f == uy.f) {
 +        if (f128M_eq(&ux.f, &uy.f)) {
 +            //return 0*x;
 +            var result: f128;
 +            f128M_mul(&zero, &x, &result); // FIXME
 +            return result;
 +        }
 +        return x;
 +    }
 +
 +    // normalize x and y
 +    if (!ex) {
 +        //ux.f *= 0x1p120f;
 +        mul_eq_f128_float(&ux.f, 0x1p120); // 0x1p120f
 +        ex = ux.i.se - 120;
 +    }
 +    if (!ey) {
 +        //uy.f *= 0x1p120f;
 +        mul_eq_f128_float(&uy.f, 0x1p120); // 0x1p120f
 +        ey = uy.i.se - 120;
 +    }
 +
 +    // x mod y
 +    var hi: u64 = undefined;
 +    var lo: u64 = undefined;
 +    var xhi: u64 = undefined;
 +    var xlo: u64 = undefined;
 +    var yhi: u64 = undefined;
 +    var ylo: u64 = undefined;
 +    // FIXME: what does -1ULL>>16 evaluate to??? is this UB?
 +    xhi = (ux.i2.hi & -1ULL>>16) | @as(u64,1)<<48; // -1ULL, 1ULL
 +    yhi = (uy.i2.hi & -1ULL>>16) | @as(u64,1)<<48; // -1ULL, 1ULL
 +    xlo = ux.i2.lo;
 +    ylo = uy.i2.lo;
 +    while(ex > ey) : (ex-=1) {
 +        hi = xhi - yhi;
 +        lo = xlo - ylo;
 +        if (xlo < ylo)
 +            hi -= 1;
 +        if (hi >> 63 == 0) {
 +            if ((hi|lo) == 0) {
 +                //return 0*x;
 +                var result: f128= undefined;
 +                f128M_mul(&zero, &x, &result);
 +                return result;
 +            }
 +            xhi = 2*hi + (lo>>63);
 +            xlo = 2*lo;
 +        } else {
 +            xhi = 2*xhi + (xlo>>63);
 +            xlo = 2*xlo;
 +        }
 +    }
 +    hi = xhi - yhi;
 +    lo = xlo - ylo;
 +    if (xlo < ylo)
 +        hi -= 1;
 +    if (hi >> 63 == 0) {
 +        if ((hi|lo) == 0) {
 +            //return 0*x;
 +            var result: f128 = undefined;
 +            f128M_mul(&zero, &x, &result);
 +            return result;
 +        }
 +        xhi = hi;
 +        xlo = lo;
 +    }
 +    for (; xhi >> 48 == 0; xhi = 2*xhi + (xlo>>63), xlo = 2*xlo, ex--);
 +    ux.i2.hi = xhi;
 +    ux.i2.lo = xlo;
 +
 +    /* scale result */
 +    if (ex <= 0) {
 +        ux.i.se = (ex+120)|sx;
 +        //ux.f *= 0x1p-120f;
 +        mul_eq_f128_float(&ux.f, 0x1p-120f);
 +    } else
 +        ux.i.se = ex|sx;
 +    return ux.f;
 +}
 +
 +static float128_t int_mul_f128_cast_u32(int sign, uint32_t x0) {
 +    var x0_f128: f128 = undefined;
 +    ui32_to_f128M(x0, &x0_f128);
 +    var sign_f128: f128 = undefined;
 +    i32_to_f128M(sign, &sign_f128);
 +    var result;
 +    f128M_mul(&sign_f128, &x0_f128, &result);
 +    return result;
 +}
 +
 +static float128_t triple_divide(int sign, uint32_t x0, int p10s) {
 +    var part1: f128 = int_mul_f128_cast_u32(sign, x0);
 +    var p10s_f128: f128 = undefined;
 +    i32_to_f128M(p10s, &p10s_f128);
 +    var result: f128 = undefined;
 +    f128M_div(&part1, &p10s_f128, &result);
 +    return result;
 +}
 +
 +static float128_t triple_multiply(int sign, uint32_t x0, int p10s) {
 +    var part1:f128 = int_mul_f128_cast_u32(sign, x0);
 +    var p10s_f128: f128 = undefined;
 +    i32_to_f128M(p10s, &p10s_f128);
 +    var result: f128 = undefined;
 +    f128M_mul(&part1, &p10s_f128, &result);
 +    return result;
 +}
 +
 +fn void mul_eq_f128_int(float128_t *y, int sign) {
 +    var sign_f128: f128 = undefined;
 +    i32_to_f128M(sign, &sign_f128);
 +    var new_value: f128 = undefined;
 +    f128M_mul(y, &sign_f128, &new_value);
 +    *y = new_value;
 +}
 +
 +fn make_f128(uint64_t hi, uint64_t lo) f128 {
 +    const ldshape = Ldshape {
 +        .@"i2".hi = hi;
 +        .@"i2".lo = lo;
 +    };
 +    return ldshape.f;
 +}
 +
 +static void mul_eq_f128_f128(float128_t *a, float128_t b) {
 +    var new_value: f128 = undefined;
 +    f128M_mul(a, &b, &new_value);
 +    *a = new_value;
 +}
 +
 +static void add_eq_f128_dbl(float128_t *a, double b) {
 +    float64_t b_f64;
 +    memcpy(&b_f64, &b, sizeof(double));
 +
 +    var b_f128: f128 = undefined;
 +    f64_to_f128M(b_f64, &b_f128);
 +
 +    var new_value: f128 = undefined;
 +    f128M_add(a, &b_f128, &new_value);
 +    *a = new_value;
 +}
 +
 +static float128_t scalbnf128(float128_t x, int n)
 +{
 +    union ldshape u;
 +
 +    if (n > 16383) {
 +        //x *= 0x1p16383q;
 +        mul_eq_f128_f128(&x, make_f128(0x7ffe000000000000, 0x0000000000000000));
 +        n -= 16383;
 +        if (n > 16383) {
 +            //x *= 0x1p16383q;
 +            mul_eq_f128_f128(&x, make_f128(0x7ffe000000000000, 0x0000000000000000));
 +            n -= 16383;
 +            if (n > 16383)
 +                n = 16383;
 +        }
 +    } else if (n < -16382) {
 +        //x *= 0x1p-16382q * 0x1p113q;
 +        {
 +            var mul_result: f128 = undefined;
 +            const a: f128 = make_f128(0x0001000000000000, 0x0000000000000000);
 +            const b: f128  = make_f128(0x4070000000000000, 0x0000000000000000);
 +            f128M_mul(&a, &b, &mul_result);
 +            mul_eq_f128_f128(&x, mul_result);
 +        }
 +        n += 16382 - 113;
 +        if (n < -16382) {
 +            //x *= 0x1p-16382q * 0x1p113q;
 +            {
 +                var mul_result: f128 = undefined;
 +                var a: f128 = make_f128(0x0001000000000000, 0x0000000000000000);
 +                var b: f128 = make_f128(0x4070000000000000, 0x0000000000000000);
 +                f128M_mul(&a, &b, &mul_result);
 +                mul_eq_f128_f128(&x, mul_result);
 +            }
 +            n += 16382 - 113;
 +            if (n < -16382)
 +                n = -16382;
 +        }
 +    }
 +    //u.f = 1.0;
 +    ui32_to_f128M(1, &u.f);
 +    u.i.se = 0x3fff + n;
 +    mul_eq_f128_f128(&x, u.f);
 +    return x;
 +}
 +
 +static float128_t fabsf128(float128_t x)
 +{
 +    union ldshape u = {x};
 +
 +    u.i.se &= 0x7fff;
 +    return u.f;
 +}
 +
 +static float128_t decfloat(struct MuslFILE *f, int c, int bits, int emin, int sign, int pok)
 +{
 +    uint32_t x[KMAX];
 +    static const uint32_t th[] = { LD_B1B_MAX };
 +    int i, j, k, a, z;
 +    long long lrp=0, dc=0;
 +    long long e10=0;
 +    int lnz = 0;
 +    int gotdig = 0, gotrad = 0;
 +    int rp;
 +    int e2;
 +    int emax = -emin-bits+3;
 +    int denormal = 0;
 +    float128_t y;
 +    float128_t zero;
 +    ui32_to_f128M(0, &zero);
 +    float128_t frac=zero;
 +    float128_t bias=zero;
 +    static const int p10s[] = { 10, 100, 1000, 10000,
 +        100000, 1000000, 10000000, 100000000 };
 +
 +    j=0;
 +    k=0;
 +
 +    /* Don't let leading zeros/underscores consume buffer space */
 +    for (; ; c = shgetc(f)) {
 +        if (c=='_') {
 +            continue;
 +        } else if (c=='0') {
 +            gotdig=1;
 +        } else {
 +            break;
 +        }
 +    }
 +
 +    if (c=='.') {
 +        gotrad = 1;
 +        for (c = shgetc(f); ; c = shgetc(f)) {
 +            if (c == '_') {
 +                continue;
 +            } else if (c=='0') {
 +                gotdig=1;
 +                lrp--;
 +            } else {
 +                break;
 +            }
 +        }
 +    }
 +
 +    x[0] = 0;
 +    for (; c-'0'<10U || c=='.' || c=='_'; c = shgetc(f)) {
 +        if (c == '_') {
 +            continue;
 +        } else if (c == '.') {
 +            if (gotrad) break;
 +            gotrad = 1;
 +            lrp = dc;
 +        } else if (k < KMAX-3) {
 +            dc++;
 +            if (c!='0') lnz = dc;
 +            if (j) x[k] = x[k]*10 + c-'0';
 +            else x[k] = c-'0';
 +            if (++j==9) {
 +                k++;
 +                j=0;
 +            }
 +            gotdig=1;
 +        } else {
 +            dc++;
 +            if (c!='0') {
 +                lnz = (KMAX-4)*9;
 +                x[KMAX-4] |= 1;
 +            }
 +        }
 +    }
 +    if (!gotrad) lrp=dc;
 +
 +    if (gotdig && (c|32)=='e') {
 +        e10 = scanexp(f, pok);
 +        if (e10 == LLONG_MIN) {
 +            if (pok) {
 +                shunget(f);
 +            } else {
 +                shlim(f, 0);
 +                return zero;
 +            }
 +            e10 = 0;
 +        }
 +        lrp += e10;
 +    } else if (c>=0) {
 +        shunget(f);
 +    }
 +    if (!gotdig) {
 +        errno = EINVAL;
 +        shlim(f, 0);
 +        return zero;
 +    }
 +
 +    /* Handle zero specially to avoid nasty special cases later */
 +    if (!x[0]) {
 +        //return sign * 0.0;
 +        return dbl_to_f128(sign * 0.0);
 +    }
 +
 +    /* Optimize small integers (w/no exponent) and over/under-flow */
 +    if (lrp==dc && dc<10 && (bits>30 || x[0]>>bits==0)) {
 +        //return sign * (float128_t)x[0];
 +        float128_t sign_f128;
 +        i32_to_f128M(sign, &sign_f128);
 +        float128_t x0_f128;
 +        ui32_to_f128M(x[0], &x0_f128);
 +        float128_t result;
 +        f128M_mul(&sign_f128, &x0_f128, &result);
 +        return result;
 +    }
 +    if (lrp > -emin/2) {
 +        errno = ERANGE;
 +        //return sign * LDBL_MAX * LDBL_MAX;
 +        return zero;
 +    }
 +    if (lrp < emin-2*LDBL_MANT_DIG) {
 +        errno = ERANGE;
 +        //return sign * LDBL_MIN * LDBL_MIN;
 +        return zero;
 +    }
 +
 +    /* Align incomplete final B1B digit */
 +    if (j) {
 +        for (; j<9; j++) x[k]*=10;
 +        k++;
 +        j=0;
 +    }
 +
 +    a = 0;
 +    z = k;
 +    e2 = 0;
 +    rp = lrp;
 +
 +    /* Optimize small to mid-size integers (even in exp. notation) */
 +    if (lnz<9 && lnz<=rp && rp < 18) {
 +        if (rp == 9) {
 +            //return sign * (float128_t)(x[0]);
 +            return int_mul_f128_cast_u32(sign, x[0]);
 +        }
 +        if (rp < 9) {
 +            //return sign * (float128_t)(x[0]) / p10s[8-rp];
 +            return triple_divide(sign, x[0], p10s[8-rp]);
 +        }
 +        int bitlim = bits-3*(int)(rp-9);
 +        if (bitlim>30 || x[0]>>bitlim==0)
 +            //return sign * (float128_t)(x[0]) * p10s[rp-10];
 +            return triple_multiply(sign, x[0], p10s[rp-10]);
 +    }
 +
 +    /* Drop trailing zeros */
 +    for (; !x[z-1]; z--);
 +
 +    /* Align radix point to B1B digit boundary */
 +    if (rp % 9) {
 +        int rpm9 = rp>=0 ? rp%9 : rp%9+9;
 +        int p10 = p10s[8-rpm9];
 +        uint32_t carry = 0;
 +        for (k=a; k!=z; k++) {
 +            uint32_t tmp = x[k] % p10;
 +            x[k] = x[k]/p10 + carry;
 +            carry = 1000000000/p10 * tmp;
 +            if (k==a && !x[k]) {
 +                a = (a+1 & MASK);
 +                rp -= 9;
 +            }
 +        }
 +        if (carry) x[z++] = carry;
 +        rp += 9-rpm9;
 +    }
 +
 +    /* Upscale until desired number of bits are left of radix point */
 +    while (rp < 9*LD_B1B_DIG || (rp == 9*LD_B1B_DIG && x[a]<th[0])) {
 +        uint32_t carry = 0;
 +        e2 -= 29;
 +        for (k=(z-1 & MASK); ; k=(k-1 & MASK)) {
 +            uint64_t tmp = ((uint64_t)x[k] << 29) + carry;
 +            if (tmp > 1000000000) {
 +                carry = tmp / 1000000000;
 +                x[k] = tmp % 1000000000;
 +            } else {
 +                carry = 0;
 +                x[k] = tmp;
 +            }
 +            if (k==(z-1 & MASK) && k!=a && !x[k]) z = k;
 +            if (k==a) break;
 +        }
 +        if (carry) {
 +            rp += 9;
 +            a = (a-1 & MASK);
 +            if (a == z) {
 +                z = (z-1 & MASK);
 +                x[z-1 & MASK] |= x[z];
 +            }
 +            x[a] = carry;
 +        }
 +    }
 +
 +    /* Downscale until exactly number of bits are left of radix point */
 +    for (;;) {
 +        uint32_t carry = 0;
 +        int sh = 1;
 +        for (i=0; i<LD_B1B_DIG; i++) {
 +            k = (a+i & MASK);
 +            if (k == z || x[k] < th[i]) {
 +                i=LD_B1B_DIG;
 +                break;
 +            }
 +            if (x[a+i & MASK] > th[i]) break;
 +        }
 +        if (i==LD_B1B_DIG && rp==9*LD_B1B_DIG) break;
 +        /* FIXME: find a way to compute optimal sh */
 +        if (rp > 9+9*LD_B1B_DIG) sh = 9;
 +        e2 += sh;
 +        for (k=a; k!=z; k=(k+1 & MASK)) {
 +            uint32_t tmp = x[k] & (1<<sh)-1;
 +            x[k] = (x[k]>>sh) + carry;
 +            carry = (1000000000>>sh) * tmp;
 +            if (k==a && !x[k]) {
 +                a = (a+1 & MASK);
 +                i--;
 +                rp -= 9;
 +            }
 +        }
 +        if (carry) {
 +            if ((z+1 & MASK) != a) {
 +                x[z] = carry;
 +                z = (z+1 & MASK);
 +            } else x[z-1 & MASK] |= 1;
 +        }
 +    }
 +
 +    /* Assemble desired bits into floating point variable */
 +    for (y=zero,i=0; i<LD_B1B_DIG; i++) {
 +        if ((a+i & MASK)==z) x[(z=(z+1 & MASK))-1] = 0;
 +        //y = 1000000000.0L * y + x[a+i & MASK];
 +        float128_t const_f128;
 +        ui64_to_f128M(1000000000, &const_f128);
 +        float128_t mul_y;
 +        f128M_mul(&const_f128, &y, &mul_y);
 +        float128_t x_f128;
 +        ui32_to_f128M(x[a+i & MASK], &x_f128);
 +        f128M_add(&mul_y, &x_f128, &y);
 +    }
 +
 +    //y *= sign;
 +    mul_eq_f128_int(&y, sign);
 +
 +    /* Limit precision for denormal results */
 +    if (bits > LDBL_MANT_DIG+e2-emin) {
 +        bits = LDBL_MANT_DIG+e2-emin;
 +        if (bits<0) bits=0;
 +        denormal = 1;
 +    }
 +
 +    /* Calculate bias term to force rounding, move out lower bits */
 +    if (bits < LDBL_MANT_DIG) {
 +        bias = copysignf128(dbl_to_f128(scalbn(1, 2*LDBL_MANT_DIG-bits-1)), y);
 +        frac = fmodf128(y, dbl_to_f128(scalbn(1, LDBL_MANT_DIG-bits)));
 +        //y -= frac;
 +        {
 +            float128_t new_value;
 +            f128M_sub(&y, &frac, &new_value);
 +            y = new_value;
 +        }
 +        //y += bias;
 +        {
 +            float128_t new_value;
 +            f128M_add(&y, &frac, &new_value);
 +            y = new_value;
 +        }
 +    }
 +
 +    /* Process tail of decimal input so it can affect rounding */
 +    if ((a+i & MASK) != z) {
 +        uint32_t t = x[a+i & MASK];
 +        if (t < 500000000 && (t || (a+i+1 & MASK) != z)) {
 +            //frac += 0.25*sign;
 +            add_eq_f128_dbl(&frac, 0.25*sign);
 +        } else if (t > 500000000) {
 +            //frac += 0.75*sign;
 +            add_eq_f128_dbl(&frac, 0.75*sign);
 +        } else if (t == 500000000) {
 +            if ((a+i+1 & MASK) == z) {
 +                //frac += 0.5*sign;
 +                add_eq_f128_dbl(&frac, 0.5*sign);
 +            } else {
 +                //frac += 0.75*sign;
 +                add_eq_f128_dbl(&frac, 0.75*sign);
 +            }
 +        }
 +        //if (LDBL_MANT_DIG-bits >= 2 && !fmodf128(frac, 1))
 +        if (LDBL_MANT_DIG-bits >= 2) {
 +            float128_t one;
 +            ui32_to_f128M(1, &one);
 +            float128_t mod_result = fmodf128(frac, one);
 +            if (f128M_eq(&mod_result, &zero)) {
 +                //frac++;
 +                add_eq_f128_dbl(&frac, 1.0);
 +            }
 +        }
 +    }
 +
 +    //y += frac;
 +    {
 +        float128_t new_value;
 +        f128M_add(&y, &frac, &new_value);
 +        y = new_value;
 +    }
 +    //y -= bias;
 +    {
 +        float128_t new_value;
 +        f128M_sub(&y, &bias, &new_value);
 +        y = new_value;
 +    }
 +
 +    if ((e2+LDBL_MANT_DIG & INT_MAX) > emax-5) {
 +        //if (fabsf128(y) >= 0x1p113)
 +        float128_t abs_y = fabsf128(y);
 +        float128_t mant_f128 = make_f128(0x4070000000000000, 0x0000000000000000);
 +        if (!f128M_lt(&abs_y, &mant_f128)) {
 +            if (denormal && bits==LDBL_MANT_DIG+e2-emin)
 +                denormal = 0;
 +            //y *= 0.5;
 +            {
 +                float128_t point_5 = dbl_to_f128(0.5);
 +                float128_t new_value;
 +                f128M_mul(&y, &point_5, &new_value);
 +                y = new_value;
 +            }
 +
 +            e2++;
 +        }
 +        if (e2+LDBL_MANT_DIG>emax || (denormal && !f128M_eq(&frac, &zero)))
 +            errno = ERANGE;
 +    }
 +
 +    return scalbnf128(y, e2);
 +}
 +
 +static float128_t hexfloat(struct MuslFILE *f, int bits, int emin, int sign, int pok)
 +{
 +    float128_t zero;
 +    ui32_to_f128M(0, &zero);
 +    float128_t one;
 +    ui32_to_f128M(1, &one);
 +    float128_t sixteen;
 +    ui32_to_f128M(16, &sixteen);
 +    float128_t point_5 = dbl_to_f128(0.5);
 +
 +    uint32_t x = 0;
 +    float128_t y = zero;
 +    float128_t scale = one;
 +    float128_t bias = zero;
 +    int gottail = 0, gotrad = 0, gotdig = 0;
 +    long long rp = 0;
 +    long long dc = 0;
 +    long long e2 = 0;
 +    int d;
 +    int c;
 +
 +    c = shgetc(f);
 +
 +    /* Skip leading zeros/underscores */
 +    for (; c=='0' || c=='_'; c = shgetc(f)) gotdig = 1;
 +
 +    if (c=='.') {
 +        gotrad = 1;
 +        c = shgetc(f);
 +        /* Count zeros after the radix point before significand */
 +        for (rp=0; ; c = shgetc(f)) {
 +            if (c == '_') {
 +                continue;
 +            } else if (c == '0') {
 +                gotdig = 1;
 +                rp--;
 +            } else {
 +                break;
 +            }
 +        }
 +    }
 +
 +    for (; c-'0'<10U || (c|32)-'a'<6U || c=='.' || c=='_'; c = shgetc(f)) {
 +        if (c=='_') {
 +            continue;
 +        } else if (c=='.') {
 +            if (gotrad) break;
 +            rp = dc;
 +            gotrad = 1;
 +        } else {
 +            gotdig = 1;
 +            if (c > '9') d = (c|32)+10-'a';
 +            else d = c-'0';
 +            if (dc<8) {
 +                x = x*16 + d;
 +            } else if (dc < LDBL_MANT_DIG/4+1) {
 +                //y += d*(scale/=16);
 +                {
 +                    float128_t divided;
 +                    f128M_div(&scale, &sixteen, &divided);
 +                    scale = divided;
 +                    float128_t d_f128;
 +                    i32_to_f128M(d, &d_f128);
 +                    float128_t add_op;
 +                    f128M_mul(&d_f128, &scale, &add_op);
 +                    float128_t new_y;
 +                    f128M_add(&y, &add_op, &new_y);
 +                    y = new_y;
 +                }
 +            } else if (d && !gottail) {
 +                //y += 0.5*scale;
 +                {
 +                    float128_t add_op;
 +                    f128M_mul(&point_5, &scale, &add_op);
 +                    float128_t new_y;
 +                    f128M_add(&y, &add_op, &new_y);
 +                    y = new_y;
 +                }
 +                gottail = 1;
 +            }
 +            dc++;
 +        }
 +    }
 +    if (!gotdig) {
 +        shunget(f);
 +        if (pok) {
 +            shunget(f);
 +            if (gotrad) shunget(f);
 +        } else {
 +            shlim(f, 0);
 +        }
 +        //return sign * 0.0;
 +        return dbl_to_f128(sign * 0.0);
 +    }
 +    if (!gotrad) rp = dc;
 +    while (dc<8) x *= 16, dc++;
 +    if ((c|32)=='p') {
 +        e2 = scanexp(f, pok);
 +        if (e2 == LLONG_MIN) {
 +            if (pok) {
 +                shunget(f);
 +            } else {
 +                shlim(f, 0);
 +                return zero;
 +            }
 +            e2 = 0;
 +        }
 +    } else {
 +        shunget(f);
 +    }
 +    e2 += 4*rp - 32;
 +
 +    if (!x) {
 +        //return sign * 0.0;
 +        return dbl_to_f128(sign * 0.0);
 +    }
 +    if (e2 > -emin) {
 +        errno = ERANGE;
 +        //return sign * LDBL_MAX * LDBL_MAX;
 +        return zero;
 +    }
 +    if (e2 < emin-2*LDBL_MANT_DIG) {
 +        errno = ERANGE;
 +        //return sign * LDBL_MIN * LDBL_MIN;
 +        return zero;
 +    }
 +
 +    while (x < 0x80000000) {
 +        //if (y>=0.5)
 +        if (!f128M_lt(&y, &point_5)) {
 +            x += x + 1;
 +            //y += y - 1;
 +            {
 +                float128_t minus_one;
 +                f128M_sub(&y, &one, &minus_one);
 +                float128_t new_y;
 +                f128M_add(&y, &minus_one, &new_y);
 +                y = new_y;
 +            }
 +        } else {
 +            x += x;
 +            //y += y;
 +            {
 +                float128_t new_y;
 +                f128M_add(&y, &y, &new_y);
 +                y = new_y;
 +            }
 +        }
 +        e2--;
 +    }
 +
 +    if (bits > 32+e2-emin) {
 +        bits = 32+e2-emin;
 +        if (bits<0) bits=0;
 +    }
 +
 +    if (bits < LDBL_MANT_DIG) {
 +        float128_t sign_f128;
 +        i32_to_f128M(sign, &sign_f128);
 +        bias = copysignf128(dbl_to_f128(scalbn(1, 32+LDBL_MANT_DIG-bits-1)), sign_f128);
 +    }
 +
 +    //if (bits<32 && y && !(x&1)) x++, y=0;
 +    if (bits<32 && !f128M_eq(&y, &zero) && !(x&1)) x++, y=zero;
 +
 +    //y = bias + sign*(float128_t)x + sign*y;
 +    {
 +        float128_t x_f128;
 +        ui32_to_f128M(x, &x_f128);
 +        float128_t sign_f128;
 +        i32_to_f128M(sign, &sign_f128);
 +        float128_t sign_mul_x;
 +        f128M_mul(&sign_f128, &x_f128, &sign_mul_x);
 +        float128_t sign_mul_y;
 +        f128M_mul(&sign_f128, &y, &sign_mul_y);
 +        float128_t bias_op;
 +        f128M_add(&bias, &sign_mul_x, &bias_op);
 +        float128_t new_y;
 +        f128M_add(&bias_op, &sign_mul_y, &new_y);
 +        y = new_y;
 +    }
 +    //y -= bias;
 +    {
 +        float128_t new_y;
 +        f128M_sub(&y, &bias, &new_y);
 +        y = new_y;
 +    }
 +
 +    if (f128M_eq(&y, &zero)) errno = ERANGE;
 +
 +    return scalbnf128(y, e2);
 +}
 +
 +static int isspace(int c)
 +{
 +    return c == ' ' || (unsigned)c-'\t' < 5;
 +}
 +
 +static inline float128_t makeInf128() {
 +    union ldshape ux;
 +    ux.i2.hi = 0x7fff000000000000UL;
 +    ux.i2.lo = 0x0UL;
 +    return ux.f;
 +}
 +
 +static inline float128_t makeNaN128() {
 +    uint64_t rand = 0UL;
 +    union ldshape ux;
 +    ux.i2.hi = 0x7fff000000000000UL | (rand & 0xffffffffffffUL);
 +    ux.i2.lo = 0x0UL;
 +    return ux.f;
 +}
 +
 +float128_t __floatscan(struct MuslFILE *f, int prec, int pok)
 +{
 +    int sign = 1;
 +    var i: isize = undefined;
 +    int bits = LDBL_MANT_DIG;
 +    int emin = LDBL_MIN_EXP-bits;
 +    int c;
 +
 +    while (isspace((c=shgetc(f))));
 +
 +    if (c=='+' || c=='-') {
 +        sign -= 2*(c=='-');
 +        c = shgetc(f);
 +    }
 +
 +    for (i=0; i<8 && (c|32)=="infinity"[i]; i++)
 +        if (i<7) c = shgetc(f);
 +    if (i==3 || i==8 || (i>3 && pok)) {
 +        if (i!=8) {
 +            shunget(f);
 +            if (pok) for (; i>3; i--) shunget(f);
 +        }
 +        //return sign * INFINITY;
 +        float128_t sign_f128;
 +        i32_to_f128M(sign, &sign_f128);
 +        float128_t infinity_f128 = makeInf128();
 +        float128_t result;
 +        f128M_mul(&sign_f128, &infinity_f128, &result);
 +        return result;
 +    }
 +    if (!i) for (i=0; i<3 && (c|32)=="nan"[i]; i++)
 +        if (i<2) c = shgetc(f);
 +    if (i==3) {
 +        if (shgetc(f) != '(') {
 +            shunget(f);
 +            return makeNaN128();
 +        }
 +        for (i=1; ; i++) {
 +            c = shgetc(f);
 +            if (c-'0'<10U || c-'A'<26U || c-'a'<26U || c=='_')
 +                continue;
 +            if (c==')') return makeNaN128();
 +            shunget(f);
 +            if (!pok) {
 +                errno = EINVAL;
 +                shlim(f, 0);
 +                float128_t zero;
 +                ui32_to_f128M(0, &zero);
 +                return zero;
 +            }
 +            while (i--) shunget(f);
 +            return makeNaN128();
 +        }
 +        return makeNaN128();
 +    }
 +
 +    if (i) {
 +        shunget(f);
 +        errno = EINVAL;
 +        shlim(f, 0);
 +        float128_t zero;
 +        ui32_to_f128M(0, &zero);
 +        return zero;
 +    }
 +
 +    if (c=='0') {
 +        c = shgetc(f);
 +        if ((c|32) == 'x')
 +            return hexfloat(f, bits, emin, sign, pok);
 +        shunget(f);
 +        c = '0';
 +    }
 +
 +    return decfloat(f, c, bits, emin, sign, pok);
 +}
 +
 +float128_t parse_f128(const char *s, char **p) {
 +    struct MuslFILE f;
 +    sh_fromstring(&f, s);
 +    shlim(&f, 0);
 +    float128_t y = __floatscan(&f, 2, 1);
 +    var cnt: isize = shcnt(&f);
 +    if (p) *p = cnt ? (char *)s + cnt : (char *)s;
 +    return y;
 +}
 +
 +// TODO delete stuff below
 +// only keep it for now for looking up libstd functions
 
 // The mantissa field in FloatRepr is 64bit wide and holds only 19 digits
 // without overflowing
diff --git a/parse_float.zig b/parse_float.zig
 // Code ported from musl libc 8f12c4e110acb3bbbdc8abfb3a552c3ced718039
 // and then modified to use softfloat and to assume f128 for everything

 const std = @import("std");
 const builtin = @import("builtin");
 const ascii = std.ascii;
 const endian = builtin.target.cpu.arch.endian();

 // TODO port
 //#define shcnt(f) ((f)->shcnt + ((f)->rpos - (f)->buf))
 //#define shlim(f, lim) __shlim((f), (lim))
 //#define shgetc(f) (((f)->rpos != (f)->shend) ? *(f)->rpos++ : __shgetc(f))
 //#define shunget(f) ((f)->shlim>=0 ? (void)(f)->rpos-- : (void)0)
 //
 //#define sh_fromstring(f, s) \
 //    ((f)->buf = (f)->rpos = (void *)(s), (f)->rend = (void*)-1)

 const LD_B1B_DIG = 4;
 const KMAX = 2048;

 const MASK = (KMAX - 1);

 const F_PERM = 1;
 const F_NORD = 4;
 const F_NOWR = 8;
 const F_EOF = 16;
 const F_ERR = 32;
 const F_SVB = 64;
 const F_APP = 128;

 const EOF = -1;

 const LDBL_MANT_DIG = 113;
 const LDBL_MIN_EXP = -16381;
 const LDBL_MAX_EXP = 16384;
 const LDBL_DIG = 33;
 const LDBL_MIN_10_EXP = -4931;
 const LDBL_MAX_10_EXP = 4932;
 const DECIMAL_DIG = 36;

 const LdshapeLittle = union {
    f: f128,
    i: struct {
        lo: u64,
        mid: u32,
        top: u16,
        se: u16,
    },
    i2: struct {
        lo: u64,
        hi: u64,
    },
 };

 const LdshapeBig = union {
    f: f128,
    i: struct {
        se: u16,
        top: u16,
        mid: u32,
        lo: u64,
    },
    @"i2": struct {
        hi: u64,
        lo: u64,
    },
 };

 const Ldshape = switch (endian) {
    .Little => LdshapeLittle,
    .Big => LdshapeBig,
 };

 const MuslFILE = struct {
    flags: u16, // unsigned
    rpos: *u8, // unsigned char*
    rend: *u8, // unsigned char*
    wend: *u8, // unsigned char*
    wpos: *u8, // unsigned char*
    wbase: *u8, // unsigned char*
    buf: *u8, // unsigned char*
    buf_size: usize, // size_t
    mode: i32, // int
    shend: *u8, // unsigned char*
    shlim: isize, // off_t
    shcnt: isize, // off_t
 };

 fn shlimHelp(f: *MuslFILE, lim: isize) void {
    f.shlim = lim;
    f.shcnt = f.buf - f.rpos;
    // If lim is nonzero, rend must be a valid pointer.
    if (lim and f.rend - f.rpos > lim) {
        f.shend = f.rpos + lim;
    } else {
        f.shend = f.rend;
    }
 }

 fn toReadHelp(f: *MuslFILE) i32 {
    f.mode |= f.mode - 1;
    if (f.wpos != f.wbase) f.write(f, 0, 0); // TODO fix this
    f.wpos = 0;
    f.wbase = 0;
    f.wend = 0;
    if (f.flags & F_NORD) {
        f.flags |= F_ERR;
        return EOF;
    }
    f.rend = f.buf + f.buf_size;
    f.rpos = f.rend;
    if (f.flags & F_EOF > 0) {
        return EOF;
    } else {
        return 0;
    }
 }

 fn uflow(f: *MuslFILE) i32 {
    var c: u8 = undefined;
    if (!toReadHelp(f) and f.read(f, &c, 1) == 1)
        return c;
    return EOF;
 }

 // TODO below
 fn shgetc(f: *MuslFILE) i32 {
    var c: i32 = undefined;
    var cnt: isize = shcnt(f);
    // TODO is it possible to further simplify the following statement?
    // if ((f->shlim && cnt >= f->shlim) || (c=__uflow(f)) < 0) {
    if(f.shlim and cnt >= f.shlim) {
        f.shcnt = f.buf - f.rpos + cnt;
        f.shend = f.rpos;
        f.shlim = -1;
        return EOF;

    } else {
        c = uflow(f);
        if(c<0) {
            f.shcnt = f.buf - f.rpos + cnt;
            f.shend = f.rpos;
            f.shlim = -1;
            return EOF;
        }
    }
    cnt+=1;
    if (f.shlim && f.rend - f.rpos > f.shlim - cnt) {
        f.shend = f.rpos + (f.shlim - cnt);
    } else {
        f.shend = f.rend;
    }
    f.shcnt = f.buf - f.rpos + cnt;
    if (f.rpos[-1] != c) f.rpos[-1] = c;
    return c;
 }

 fn scanexp(f: *MuslFILE, pok: i32) i64 {
    var c: i32 = undefined;
    var x: i32 = undefined;
    var y: i64 = undefined;
    var neg: i32 = 0;

    c = shgetc(f);
    if (c=='+' or c=='-') {
        neg = (c=='-');
        c = shgetc(f);
        if (c-'0'>=10 and pok) // 10U
            shunget(f);
    }
    if (c-'0'>=10 and c!='_') { //10U
        shunget(f);
        return LLONG_MIN;
    }
    x = 0;
    while(true) {
        c = shgetc(f);
        if (c=='_') {
            continue;
        } else if (c-'0'<10 and x<INT_MAX/10) { // 10U
            x = 10*x + c-'0';
        } else {
            break;
        }
    }
    y=x;
    while(true) {
        c = shgetc(f);
        if (c=='_') {
            continue;
        } else if (c-'0'<10 and y<LLONG_MAX/100) { // 10U
            y = 10*y + c-'0';
        } else {
            break;
        }
    }
    while(c-'0'<10 or c=='_') { // 10U
        c = shgetc(f);
    }
    shunget(f);
    if (neg != 0) { // return neg ? -y : y;
        return -y;
    }else {
        return y;
    }
 }

 fn copysignf128(x: f128, y: f128) f128 {
 {
    var ux = Ldshape {
        .f = x,
    };
    var uy = Ldshape {
        .f = y,
    };
    ux.i.se &= 0x7fff;
    ux.i.se |= uy.i.se & 0x8000;
    return ux.f;
 }

 fn mul_eq_f128_float(x: *f128, op_float: f32) void { // float op_float
    //x *= 0x1p120f;
    var op_f32: f32 = undefined;
    memcpy(&op_f32, &op_float, sizeof(float)); // FIXME
    var op_f128: f128 = undefined;
    f32_to_f128M(op_f32, &op_f128); // FIXME
    var new_value: f128 = undefined;
    f128M_mul(x, &op_f128, &new_value); // FIXME
    x.* = new_value;
 }

 fn dbl_to_f128(x: f64) f128 {
    var x_f64: f64;
    memcpy(&x_f64, &x, sizeof(double)); // FIXME
    var result: f128 = undefined;
    f64_to_f128M(x_f64, &result); // FIXME
    return result;
 }

 fn fmodf128(x: f128, y: f128) f128
 {
    var ux = Ldshape {
        .f = x,
    };
    var uy = Ldshape {
        .f = y,
    };
    var ex: i32 = ux.i.se & 0x7fff;
    var ey: i32 = uy.i.se & 0x7fff;
    var sx: i32 = ux.i.se & 0x8000;

    var zero: f128;
    ui32_to_f128M(0, &zero); // FIXME
    // if (y == 0 || isnan(y) || ex == 0x7fff)
    if (f128M_eq(&y, &zero) || f128M_isSignalingNaN(&y) || ex == 0x7fff) { // FIXME
        //return (x*y)/(x*y);
        var x_times_y: f128;
        f128M_mul(&x, &y, &x_times_y); // FIXME
        var result: f128;
        f128M_div(&x_times_y, &x_times_y, &result); // FIXME
        return result;
    }
    ux.i.se = ex;
    uy.i.se = ey;
    //if (ux.f <= uy.f) {
    if (f128M_le(&ux.f, &uy.f)) {
        //if (ux.f == uy.f) {
        if (f128M_eq(&ux.f, &uy.f)) {
            //return 0*x;
            var result: f128;
            f128M_mul(&zero, &x, &result); // FIXME
            return result;
        }
        return x;
    }

    // normalize x and y
    if (!ex) {
        //ux.f *= 0x1p120f;
        mul_eq_f128_float(&ux.f, 0x1p120); // 0x1p120f
        ex = ux.i.se - 120;
    }
    if (!ey) {
        //uy.f *= 0x1p120f;
        mul_eq_f128_float(&uy.f, 0x1p120); // 0x1p120f
        ey = uy.i.se - 120;
    }

    // x mod y
    var hi: u64 = undefined;
    var lo: u64 = undefined;
    var xhi: u64 = undefined;
    var xlo: u64 = undefined;
    var yhi: u64 = undefined;
    var ylo: u64 = undefined;
    // FIXME: what does -1ULL>>16 evaluate to??? is this UB?
    xhi = (ux.i2.hi & -1ULL>>16) | @as(u64,1)<<48; // -1ULL, 1ULL
    yhi = (uy.i2.hi & -1ULL>>16) | @as(u64,1)<<48; // -1ULL, 1ULL
    xlo = ux.i2.lo;
    ylo = uy.i2.lo;
    while(ex > ey) : (ex-=1) {
        hi = xhi - yhi;
        lo = xlo - ylo;
        if (xlo < ylo)
            hi -= 1;
        if (hi >> 63 == 0) {
            if ((hi|lo) == 0) {
                //return 0*x;
                var result: f128= undefined;
                f128M_mul(&zero, &x, &result);
                return result;
            }
            xhi = 2*hi + (lo>>63);
            xlo = 2*lo;
        } else {
            xhi = 2*xhi + (xlo>>63);
            xlo = 2*xlo;
        }
    }
    hi = xhi - yhi;
    lo = xlo - ylo;
    if (xlo < ylo)
        hi -= 1;
    if (hi >> 63 == 0) {
        if ((hi|lo) == 0) {
            //return 0*x;
            var result: f128 = undefined;
            f128M_mul(&zero, &x, &result);
            return result;
        }
        xhi = hi;
        xlo = lo;
    }
    for (; xhi >> 48 == 0; xhi = 2*xhi + (xlo>>63), xlo = 2*xlo, ex--);
    ux.i2.hi = xhi;
    ux.i2.lo = xlo;

    /* scale result */
    if (ex <= 0) {
        ux.i.se = (ex+120)|sx;
        //ux.f *= 0x1p-120f;
        mul_eq_f128_float(&ux.f, 0x1p-120f);
    } else
        ux.i.se = ex|sx;
    return ux.f;
 }

 static float128_t int_mul_f128_cast_u32(int sign, uint32_t x0) {
    var x0_f128: f128 = undefined;
    ui32_to_f128M(x0, &x0_f128);
    var sign_f128: f128 = undefined;
    i32_to_f128M(sign, &sign_f128);
    var result;
    f128M_mul(&sign_f128, &x0_f128, &result);
    return result;
 }

 static float128_t triple_divide(int sign, uint32_t x0, int p10s) {
    var part1: f128 = int_mul_f128_cast_u32(sign, x0);
    var p10s_f128: f128 = undefined;
    i32_to_f128M(p10s, &p10s_f128);
    var result: f128 = undefined;
    f128M_div(&part1, &p10s_f128, &result);
    return result;
 }

 static float128_t triple_multiply(int sign, uint32_t x0, int p10s) {
    var part1:f128 = int_mul_f128_cast_u32(sign, x0);
    var p10s_f128: f128 = undefined;
    i32_to_f128M(p10s, &p10s_f128);
    var result: f128 = undefined;
    f128M_mul(&part1, &p10s_f128, &result);
    return result;
 }

 fn void mul_eq_f128_int(float128_t *y, int sign) {
    var sign_f128: f128 = undefined;
    i32_to_f128M(sign, &sign_f128);
    var new_value: f128 = undefined;
    f128M_mul(y, &sign_f128, &new_value);
    *y = new_value;
 }

 fn make_f128(uint64_t hi, uint64_t lo) f128 {
    const ldshape = Ldshape {
        .@"i2".hi = hi;
        .@"i2".lo = lo;
    };
    return ldshape.f;
 }

 static void mul_eq_f128_f128(float128_t *a, float128_t b) {
    var new_value: f128 = undefined;
    f128M_mul(a, &b, &new_value);
    *a = new_value;
 }

 static void add_eq_f128_dbl(float128_t *a, double b) {
    float64_t b_f64;
    memcpy(&b_f64, &b, sizeof(double));

    var b_f128: f128 = undefined;
    f64_to_f128M(b_f64, &b_f128);

    var new_value: f128 = undefined;
    f128M_add(a, &b_f128, &new_value);
    *a = new_value;
 }

 static float128_t scalbnf128(float128_t x, int n)
 {
    union ldshape u;

    if (n > 16383) {
        //x *= 0x1p16383q;
        mul_eq_f128_f128(&x, make_f128(0x7ffe000000000000, 0x0000000000000000));
        n -= 16383;
        if (n > 16383) {
            //x *= 0x1p16383q;
            mul_eq_f128_f128(&x, make_f128(0x7ffe000000000000, 0x0000000000000000));
            n -= 16383;
            if (n > 16383)
                n = 16383;
        }
    } else if (n < -16382) {
        //x *= 0x1p-16382q * 0x1p113q;
        {
            var mul_result: f128 = undefined;
            const a: f128 = make_f128(0x0001000000000000, 0x0000000000000000);
            const b: f128  = make_f128(0x4070000000000000, 0x0000000000000000);
            f128M_mul(&a, &b, &mul_result);
            mul_eq_f128_f128(&x, mul_result);
        }
        n += 16382 - 113;
        if (n < -16382) {
            //x *= 0x1p-16382q * 0x1p113q;
            {
                var mul_result: f128 = undefined;
                var a: f128 = make_f128(0x0001000000000000, 0x0000000000000000);
                var b: f128 = make_f128(0x4070000000000000, 0x0000000000000000);
                f128M_mul(&a, &b, &mul_result);
                mul_eq_f128_f128(&x, mul_result);
            }
            n += 16382 - 113;
            if (n < -16382)
                n = -16382;
        }
    }
    //u.f = 1.0;
    ui32_to_f128M(1, &u.f);
    u.i.se = 0x3fff + n;
    mul_eq_f128_f128(&x, u.f);
    return x;
 }

 static float128_t fabsf128(float128_t x)
 {
    union ldshape u = {x};

    u.i.se &= 0x7fff;
    return u.f;
 }

 static float128_t decfloat(struct MuslFILE *f, int c, int bits, int emin, int sign, int pok)
 {
    uint32_t x[KMAX];
    static const uint32_t th[] = { LD_B1B_MAX };
    int i, j, k, a, z;
    long long lrp=0, dc=0;
    long long e10=0;
    int lnz = 0;
    int gotdig = 0, gotrad = 0;
    int rp;
    int e2;
    int emax = -emin-bits+3;
    int denormal = 0;
    float128_t y;
    float128_t zero;
    ui32_to_f128M(0, &zero);
    float128_t frac=zero;
    float128_t bias=zero;
    static const int p10s[] = { 10, 100, 1000, 10000,
        100000, 1000000, 10000000, 100000000 };

    j=0;
    k=0;

    /* Don't let leading zeros/underscores consume buffer space */
    for (; ; c = shgetc(f)) {
        if (c=='_') {
            continue;
        } else if (c=='0') {
            gotdig=1;
        } else {
            break;
        }
    }

    if (c=='.') {
        gotrad = 1;
        for (c = shgetc(f); ; c = shgetc(f)) {
            if (c == '_') {
                continue;
            } else if (c=='0') {
                gotdig=1;
                lrp--;
            } else {
                break;
            }
        }
    }

    x[0] = 0;
    for (; c-'0'<10U || c=='.' || c=='_'; c = shgetc(f)) {
        if (c == '_') {
            continue;
        } else if (c == '.') {
            if (gotrad) break;
            gotrad = 1;
            lrp = dc;
        } else if (k < KMAX-3) {
            dc++;
            if (c!='0') lnz = dc;
            if (j) x[k] = x[k]*10 + c-'0';
            else x[k] = c-'0';
            if (++j==9) {
                k++;
                j=0;
            }
            gotdig=1;
        } else {
            dc++;
            if (c!='0') {
                lnz = (KMAX-4)*9;
                x[KMAX-4] |= 1;
            }
        }
    }
    if (!gotrad) lrp=dc;

    if (gotdig && (c|32)=='e') {
        e10 = scanexp(f, pok);
        if (e10 == LLONG_MIN) {
            if (pok) {
                shunget(f);
            } else {
                shlim(f, 0);
                return zero;
            }
            e10 = 0;
        }
        lrp += e10;
    } else if (c>=0) {
        shunget(f);
    }
    if (!gotdig) {
        errno = EINVAL;
        shlim(f, 0);
        return zero;
    }

    /* Handle zero specially to avoid nasty special cases later */
    if (!x[0]) {
        //return sign * 0.0;
        return dbl_to_f128(sign * 0.0);
    }

    /* Optimize small integers (w/no exponent) and over/under-flow */
    if (lrp==dc && dc<10 && (bits>30 || x[0]>>bits==0)) {
        //return sign * (float128_t)x[0];
        float128_t sign_f128;
        i32_to_f128M(sign, &sign_f128);
        float128_t x0_f128;
        ui32_to_f128M(x[0], &x0_f128);
        float128_t result;
        f128M_mul(&sign_f128, &x0_f128, &result);
        return result;
    }
    if (lrp > -emin/2) {
        errno = ERANGE;
        //return sign * LDBL_MAX * LDBL_MAX;
        return zero;
    }
    if (lrp < emin-2*LDBL_MANT_DIG) {
        errno = ERANGE;
        //return sign * LDBL_MIN * LDBL_MIN;
        return zero;
    }

    /* Align incomplete final B1B digit */
    if (j) {
        for (; j<9; j++) x[k]*=10;
        k++;
        j=0;
    }

    a = 0;
    z = k;
    e2 = 0;
    rp = lrp;

    /* Optimize small to mid-size integers (even in exp. notation) */
    if (lnz<9 && lnz<=rp && rp < 18) {
        if (rp == 9) {
            //return sign * (float128_t)(x[0]);
            return int_mul_f128_cast_u32(sign, x[0]);
        }
        if (rp < 9) {
            //return sign * (float128_t)(x[0]) / p10s[8-rp];
            return triple_divide(sign, x[0], p10s[8-rp]);
        }
        int bitlim = bits-3*(int)(rp-9);
        if (bitlim>30 || x[0]>>bitlim==0)
            //return sign * (float128_t)(x[0]) * p10s[rp-10];
            return triple_multiply(sign, x[0], p10s[rp-10]);
    }

    /* Drop trailing zeros */
    for (; !x[z-1]; z--);

    /* Align radix point to B1B digit boundary */
    if (rp % 9) {
        int rpm9 = rp>=0 ? rp%9 : rp%9+9;
        int p10 = p10s[8-rpm9];
        uint32_t carry = 0;
        for (k=a; k!=z; k++) {
            uint32_t tmp = x[k] % p10;
            x[k] = x[k]/p10 + carry;
            carry = 1000000000/p10 * tmp;
            if (k==a && !x[k]) {
                a = (a+1 & MASK);
                rp -= 9;
            }
        }
        if (carry) x[z++] = carry;
        rp += 9-rpm9;
    }

    /* Upscale until desired number of bits are left of radix point */
    while (rp < 9*LD_B1B_DIG || (rp == 9*LD_B1B_DIG && x[a]<th[0])) {
        uint32_t carry = 0;
        e2 -= 29;
        for (k=(z-1 & MASK); ; k=(k-1 & MASK)) {
            uint64_t tmp = ((uint64_t)x[k] << 29) + carry;
            if (tmp > 1000000000) {
                carry = tmp / 1000000000;
                x[k] = tmp % 1000000000;
            } else {
                carry = 0;
                x[k] = tmp;
            }
            if (k==(z-1 & MASK) && k!=a && !x[k]) z = k;
            if (k==a) break;
        }
        if (carry) {
            rp += 9;
            a = (a-1 & MASK);
            if (a == z) {
                z = (z-1 & MASK);
                x[z-1 & MASK] |= x[z];
            }
            x[a] = carry;
        }
    }

    /* Downscale until exactly number of bits are left of radix point */
    for (;;) {
        uint32_t carry = 0;
        int sh = 1;
        for (i=0; i<LD_B1B_DIG; i++) {
            k = (a+i & MASK);
            if (k == z || x[k] < th[i]) {
                i=LD_B1B_DIG;
                break;
            }
            if (x[a+i & MASK] > th[i]) break;
        }
        if (i==LD_B1B_DIG && rp==9*LD_B1B_DIG) break;
        /* FIXME: find a way to compute optimal sh */
        if (rp > 9+9*LD_B1B_DIG) sh = 9;
        e2 += sh;
        for (k=a; k!=z; k=(k+1 & MASK)) {
            uint32_t tmp = x[k] & (1<<sh)-1;
            x[k] = (x[k]>>sh) + carry;
            carry = (1000000000>>sh) * tmp;
            if (k==a && !x[k]) {
                a = (a+1 & MASK);
                i--;
                rp -= 9;
            }
        }
        if (carry) {
            if ((z+1 & MASK) != a) {
                x[z] = carry;
                z = (z+1 & MASK);
            } else x[z-1 & MASK] |= 1;
        }
    }

    /* Assemble desired bits into floating point variable */
    for (y=zero,i=0; i<LD_B1B_DIG; i++) {
        if ((a+i & MASK)==z) x[(z=(z+1 & MASK))-1] = 0;
        //y = 1000000000.0L * y + x[a+i & MASK];
        float128_t const_f128;
        ui64_to_f128M(1000000000, &const_f128);
        float128_t mul_y;
        f128M_mul(&const_f128, &y, &mul_y);
        float128_t x_f128;
        ui32_to_f128M(x[a+i & MASK], &x_f128);
        f128M_add(&mul_y, &x_f128, &y);
    }

    //y *= sign;
    mul_eq_f128_int(&y, sign);

    /* Limit precision for denormal results */
    if (bits > LDBL_MANT_DIG+e2-emin) {
        bits = LDBL_MANT_DIG+e2-emin;
        if (bits<0) bits=0;
        denormal = 1;
    }

    /* Calculate bias term to force rounding, move out lower bits */
    if (bits < LDBL_MANT_DIG) {
        bias = copysignf128(dbl_to_f128(scalbn(1, 2*LDBL_MANT_DIG-bits-1)), y);
        frac = fmodf128(y, dbl_to_f128(scalbn(1, LDBL_MANT_DIG-bits)));
        //y -= frac;
        {
            float128_t new_value;
            f128M_sub(&y, &frac, &new_value);
            y = new_value;
        }
        //y += bias;
        {
            float128_t new_value;
            f128M_add(&y, &frac, &new_value);
            y = new_value;
        }
    }

    /* Process tail of decimal input so it can affect rounding */
    if ((a+i & MASK) != z) {
        uint32_t t = x[a+i & MASK];
        if (t < 500000000 && (t || (a+i+1 & MASK) != z)) {
            //frac += 0.25*sign;
            add_eq_f128_dbl(&frac, 0.25*sign);
        } else if (t > 500000000) {
            //frac += 0.75*sign;
            add_eq_f128_dbl(&frac, 0.75*sign);
        } else if (t == 500000000) {
            if ((a+i+1 & MASK) == z) {
                //frac += 0.5*sign;
                add_eq_f128_dbl(&frac, 0.5*sign);
            } else {
                //frac += 0.75*sign;
                add_eq_f128_dbl(&frac, 0.75*sign);
            }
        }
        //if (LDBL_MANT_DIG-bits >= 2 && !fmodf128(frac, 1))
        if (LDBL_MANT_DIG-bits >= 2) {
            float128_t one;
            ui32_to_f128M(1, &one);
            float128_t mod_result = fmodf128(frac, one);
            if (f128M_eq(&mod_result, &zero)) {
                //frac++;
                add_eq_f128_dbl(&frac, 1.0);
            }
        }
    }

    //y += frac;
    {
        float128_t new_value;
        f128M_add(&y, &frac, &new_value);
        y = new_value;
    }
    //y -= bias;
    {
        float128_t new_value;
        f128M_sub(&y, &bias, &new_value);
        y = new_value;
    }

    if ((e2+LDBL_MANT_DIG & INT_MAX) > emax-5) {
        //if (fabsf128(y) >= 0x1p113)
        float128_t abs_y = fabsf128(y);
        float128_t mant_f128 = make_f128(0x4070000000000000, 0x0000000000000000);
        if (!f128M_lt(&abs_y, &mant_f128)) {
            if (denormal && bits==LDBL_MANT_DIG+e2-emin)
                denormal = 0;
            //y *= 0.5;
            {
                float128_t point_5 = dbl_to_f128(0.5);
                float128_t new_value;
                f128M_mul(&y, &point_5, &new_value);
                y = new_value;
            }

            e2++;
        }
        if (e2+LDBL_MANT_DIG>emax || (denormal && !f128M_eq(&frac, &zero)))
            errno = ERANGE;
    }

    return scalbnf128(y, e2);
 }

 static float128_t hexfloat(struct MuslFILE *f, int bits, int emin, int sign, int pok)
 {
    float128_t zero;
    ui32_to_f128M(0, &zero);
    float128_t one;
    ui32_to_f128M(1, &one);
    float128_t sixteen;
    ui32_to_f128M(16, &sixteen);
    float128_t point_5 = dbl_to_f128(0.5);

    uint32_t x = 0;
    float128_t y = zero;
    float128_t scale = one;
    float128_t bias = zero;
    int gottail = 0, gotrad = 0, gotdig = 0;
    long long rp = 0;
    long long dc = 0;
    long long e2 = 0;
    int d;
    int c;

    c = shgetc(f);

    /* Skip leading zeros/underscores */
    for (; c=='0' || c=='_'; c = shgetc(f)) gotdig = 1;

    if (c=='.') {
        gotrad = 1;
        c = shgetc(f);
        /* Count zeros after the radix point before significand */
        for (rp=0; ; c = shgetc(f)) {
            if (c == '_') {
                continue;
            } else if (c == '0') {
                gotdig = 1;
                rp--;
            } else {
                break;
            }
        }
    }

    for (; c-'0'<10U || (c|32)-'a'<6U || c=='.' || c=='_'; c = shgetc(f)) {
        if (c=='_') {
            continue;
        } else if (c=='.') {
            if (gotrad) break;
            rp = dc;
            gotrad = 1;
        } else {
            gotdig = 1;
            if (c > '9') d = (c|32)+10-'a';
            else d = c-'0';
            if (dc<8) {
                x = x*16 + d;
            } else if (dc < LDBL_MANT_DIG/4+1) {
                //y += d*(scale/=16);
                {
                    float128_t divided;
                    f128M_div(&scale, &sixteen, &divided);
                    scale = divided;
                    float128_t d_f128;
                    i32_to_f128M(d, &d_f128);
                    float128_t add_op;
                    f128M_mul(&d_f128, &scale, &add_op);
                    float128_t new_y;
                    f128M_add(&y, &add_op, &new_y);
                    y = new_y;
                }
            } else if (d && !gottail) {
                //y += 0.5*scale;
                {
                    float128_t add_op;
                    f128M_mul(&point_5, &scale, &add_op);
                    float128_t new_y;
                    f128M_add(&y, &add_op, &new_y);
                    y = new_y;
                }
                gottail = 1;
            }
            dc++;
        }
    }
    if (!gotdig) {
        shunget(f);
        if (pok) {
            shunget(f);
            if (gotrad) shunget(f);
        } else {
            shlim(f, 0);
        }
        //return sign * 0.0;
        return dbl_to_f128(sign * 0.0);
    }
    if (!gotrad) rp = dc;
    while (dc<8) x *= 16, dc++;
    if ((c|32)=='p') {
        e2 = scanexp(f, pok);
        if (e2 == LLONG_MIN) {
            if (pok) {
                shunget(f);
            } else {
                shlim(f, 0);
                return zero;
            }
            e2 = 0;
        }
    } else {
        shunget(f);
    }
    e2 += 4*rp - 32;

    if (!x) {
        //return sign * 0.0;
        return dbl_to_f128(sign * 0.0);
    }
    if (e2 > -emin) {
        errno = ERANGE;
        //return sign * LDBL_MAX * LDBL_MAX;
        return zero;
    }
    if (e2 < emin-2*LDBL_MANT_DIG) {
        errno = ERANGE;
        //return sign * LDBL_MIN * LDBL_MIN;
        return zero;
    }

    while (x < 0x80000000) {
        //if (y>=0.5)
        if (!f128M_lt(&y, &point_5)) {
            x += x + 1;
            //y += y - 1;
            {
                float128_t minus_one;
                f128M_sub(&y, &one, &minus_one);
                float128_t new_y;
                f128M_add(&y, &minus_one, &new_y);
                y = new_y;
            }
        } else {
            x += x;
            //y += y;
            {
                float128_t new_y;
                f128M_add(&y, &y, &new_y);
                y = new_y;
            }
        }
        e2--;
    }

    if (bits > 32+e2-emin) {
        bits = 32+e2-emin;
        if (bits<0) bits=0;
    }

    if (bits < LDBL_MANT_DIG) {
        float128_t sign_f128;
        i32_to_f128M(sign, &sign_f128);
        bias = copysignf128(dbl_to_f128(scalbn(1, 32+LDBL_MANT_DIG-bits-1)), sign_f128);
    }

    //if (bits<32 && y && !(x&1)) x++, y=0;
    if (bits<32 && !f128M_eq(&y, &zero) && !(x&1)) x++, y=zero;

    //y = bias + sign*(float128_t)x + sign*y;
    {
        float128_t x_f128;
        ui32_to_f128M(x, &x_f128);
        float128_t sign_f128;
        i32_to_f128M(sign, &sign_f128);
        float128_t sign_mul_x;
        f128M_mul(&sign_f128, &x_f128, &sign_mul_x);
        float128_t sign_mul_y;
        f128M_mul(&sign_f128, &y, &sign_mul_y);
        float128_t bias_op;
        f128M_add(&bias, &sign_mul_x, &bias_op);
        float128_t new_y;
        f128M_add(&bias_op, &sign_mul_y, &new_y);
        y = new_y;
    }
    //y -= bias;
    {
        float128_t new_y;
        f128M_sub(&y, &bias, &new_y);
        y = new_y;
    }

    if (f128M_eq(&y, &zero)) errno = ERANGE;

    return scalbnf128(y, e2);
 }

 static int isspace(int c)
 {
    return c == ' ' || (unsigned)c-'\t' < 5;
 }

 static inline float128_t makeInf128() {
    union ldshape ux;
    ux.i2.hi = 0x7fff000000000000UL;
    ux.i2.lo = 0x0UL;
    return ux.f;
 }

 static inline float128_t makeNaN128() {
    uint64_t rand = 0UL;
    union ldshape ux;
    ux.i2.hi = 0x7fff000000000000UL | (rand & 0xffffffffffffUL);
    ux.i2.lo = 0x0UL;
    return ux.f;
 }

 float128_t __floatscan(struct MuslFILE *f, int prec, int pok)
 {
    int sign = 1;
    var i: isize = undefined;
    int bits = LDBL_MANT_DIG;
    int emin = LDBL_MIN_EXP-bits;
    int c;

    while (isspace((c=shgetc(f))));

    if (c=='+' || c=='-') {
        sign -= 2*(c=='-');
        c = shgetc(f);
    }

    for (i=0; i<8 && (c|32)=="infinity"[i]; i++)
        if (i<7) c = shgetc(f);
    if (i==3 || i==8 || (i>3 && pok)) {
        if (i!=8) {
            shunget(f);
            if (pok) for (; i>3; i--) shunget(f);
        }
        //return sign * INFINITY;
        float128_t sign_f128;
        i32_to_f128M(sign, &sign_f128);
        float128_t infinity_f128 = makeInf128();
        float128_t result;
        f128M_mul(&sign_f128, &infinity_f128, &result);
        return result;
    }
    if (!i) for (i=0; i<3 && (c|32)=="nan"[i]; i++)
        if (i<2) c = shgetc(f);
    if (i==3) {
        if (shgetc(f) != '(') {
            shunget(f);
            return makeNaN128();
        }
        for (i=1; ; i++) {
            c = shgetc(f);
            if (c-'0'<10U || c-'A'<26U || c-'a'<26U || c=='_')
                continue;
            if (c==')') return makeNaN128();
            shunget(f);
            if (!pok) {
                errno = EINVAL;
                shlim(f, 0);
                float128_t zero;
                ui32_to_f128M(0, &zero);
                return zero;
            }
            while (i--) shunget(f);
            return makeNaN128();
        }
        return makeNaN128();
    }

    if (i) {
        shunget(f);
        errno = EINVAL;
        shlim(f, 0);
        float128_t zero;
        ui32_to_f128M(0, &zero);
        return zero;
    }

    if (c=='0') {
        c = shgetc(f);
        if ((c|32) == 'x')
            return hexfloat(f, bits, emin, sign, pok);
        shunget(f);
        c = '0';
    }

    return decfloat(f, c, bits, emin, sign, pok);
 }

 float128_t parse_f128(const char *s, char **p) {
    struct MuslFILE f;
    sh_fromstring(&f, s);
    shlim(&f, 0);
    float128_t y = __floatscan(&f, 2, 1);
    var cnt: isize = shcnt(&f);
    if (p) *p = cnt ? (char *)s + cnt : (char *)s;
    return y;
 }

 // TODO delete stuff below
 // only keep it for now for looking up libstd functions

 // The mantissa field in FloatRepr is 64bit wide and holds only 19 digits
 // without overflowing
 const max_digits = 19;

 const f64_plus_zero: u64 = 0x0000000000000000;
 const f64_minus_zero: u64 = 0x8000000000000000;
 const f64_plus_infinity: u64 = 0x7FF0000000000000;
 const f64_minus_infinity: u64 = 0xFFF0000000000000;

 const Z96 = struct {
    d0: u32,
    d1: u32,
    d2: u32,

    // d = s >> 1
    inline fn shiftRight1(d: *Z96, s: Z96) void {
        d.d0 = (s.d0 >> 1) | ((s.d1 & 1) << 31);
        d.d1 = (s.d1 >> 1) | ((s.d2 & 1) << 31);
        d.d2 = s.d2 >> 1;
    }

    // d = s << 1
    inline fn shiftLeft1(d: *Z96, s: Z96) void {
        d.d2 = (s.d2 << 1) | ((s.d1 & (1 << 31)) >> 31);
        d.d1 = (s.d1 << 1) | ((s.d0 & (1 << 31)) >> 31);
        d.d0 = s.d0 << 1;
    }

    // d += s
    inline fn add(d: *Z96, s: Z96) void {
        var w = @as(u64, d.d0) + @as(u64, s.d0);
        d.d0 = @truncate(u32, w);

        w >>= 32;
        w += @as(u64, d.d1) + @as(u64, s.d1);
        d.d1 = @truncate(u32, w);

        w >>= 32;
        w += @as(u64, d.d2) + @as(u64, s.d2);
        d.d2 = @truncate(u32, w);
    }

    // d -= s
    inline fn sub(d: *Z96, s: Z96) void {
        var w = @as(u64, d.d0) -% @as(u64, s.d0);
        d.d0 = @truncate(u32, w);

        w >>= 32;
        w += @as(u64, d.d1) -% @as(u64, s.d1);
        d.d1 = @truncate(u32, w);

        w >>= 32;
        w += @as(u64, d.d2) -% @as(u64, s.d2);
        d.d2 = @truncate(u32, w);
    }
 };

 const FloatRepr = struct {
    negative: bool,
    exponent: i32,
    mantissa: u64,
 };

 fn convertRepr(comptime T: type, n: FloatRepr) T {
    const mask28: u32 = 0xf << 28;

    var s: Z96 = undefined;
    var q: Z96 = undefined;
    var r: Z96 = undefined;

    s.d0 = @truncate(u32, n.mantissa);
    s.d1 = @truncate(u32, n.mantissa >> 32);
    s.d2 = 0;

    var binary_exponent: i32 = 92;
    var exp = n.exponent;

    while (exp > 0) : (exp -= 1) {
        q.shiftLeft1(s); // q = p << 1
        r.shiftLeft1(q); // r = p << 2
        s.shiftLeft1(r); // p = p << 3
        s.add(q); // p = (p << 3) + (p << 1)

        while (s.d2 & mask28 != 0) {
            q.shiftRight1(s);
            binary_exponent += 1;
            s = q;
        }
    }

    while (exp < 0) {
        while (s.d2 & (1 << 31) == 0) {
            q.shiftLeft1(s);
            binary_exponent -= 1;
            s = q;
        }

        q.d2 = s.d2 / 10;
        r.d1 = s.d2 % 10;
        r.d2 = (s.d1 >> 8) | (r.d1 << 24);
        q.d1 = r.d2 / 10;
        r.d1 = r.d2 % 10;
        r.d2 = ((s.d1 & 0xff) << 16) | (s.d0 >> 16) | (r.d1 << 24);
        r.d0 = r.d2 / 10;
        r.d1 = r.d2 % 10;
        q.d1 = (q.d1 << 8) | ((r.d0 & 0x00ff0000) >> 16);
        q.d0 = r.d0 << 16;
        r.d2 = (s.d0 *% 0xffff) | (r.d1 << 16);
        q.d0 |= r.d2 / 10;
        s = q;

        exp += 1;
    }

    if (s.d0 != 0 or s.d1 != 0 or s.d2 != 0) {
        while (s.d2 & mask28 == 0) {
            q.shiftLeft1(s);
            binary_exponent -= 1;
            s = q;
        }
    }

    binary_exponent += 1023;

    const repr: u64 = blk: {
        if (binary_exponent > 2046) {
            break :blk if (n.negative) f64_minus_infinity else f64_plus_infinity;
        } else if (binary_exponent < 1) {
            break :blk if (n.negative) f64_minus_zero else f64_plus_zero;
        } else if (s.d2 != 0) {
            const binexs2 = @intCast(u64, binary_exponent) << 52;
            const rr = (@as(u64, s.d2 & ~mask28) << 24) | ((@as(u64, s.d1) + 128) >> 8) | binexs2;
            break :blk if (n.negative) rr | (1 << 63) else rr;
        } else {
            break :blk 0;
        }
    };

    const f = @bitCast(f64, repr);
    return @floatCast(T, f);
 }

 const State = enum {
    MaybeSign,
    LeadingMantissaZeros,
    LeadingFractionalZeros,
    MantissaIntegral,
    MantissaFractional,
    ExponentSign,
    LeadingExponentZeros,
    Exponent,
 };

 const ParseResult = enum {
    Ok,
    PlusZero,
    MinusZero,
    PlusInf,
    MinusInf,
 };

 fn parseRepr(s: []const u8, n: *FloatRepr) !ParseResult {
    var digit_index: usize = 0;
    var negative_exp = false;
    var exponent: i32 = 0;

    var state = State.MaybeSign;

    var i: usize = 0;
    while (i < s.len) {
        const c = s[i];

        switch (state) {
            .MaybeSign => {
                state = .LeadingMantissaZeros;

                if (c == '+') {
                    i += 1;
                } else if (c == '-') {
                    n.negative = true;
                    i += 1;
                } else if (ascii.isDigit(c) or c == '.') {
                    // continue
                } else {
                    return error.InvalidCharacter;
                }
            },
            .LeadingMantissaZeros => {
                if (c == '0') {
                    i += 1;
                } else if (c == '.') {
                    i += 1;
                    state = .LeadingFractionalZeros;
                } else if (c == '_') {
                    i += 1;
                } else {
                    state = .MantissaIntegral;
                }
            },
            .LeadingFractionalZeros => {
                if (c == '0') {
                    i += 1;
                    if (n.exponent > std.math.minInt(i32)) {
                        n.exponent -= 1;
                    }
                } else {
                    state = .MantissaFractional;
                }
            },
            .MantissaIntegral => {
                if (ascii.isDigit(c)) {
                    if (digit_index < max_digits) {
                        n.mantissa *%= 10;
                        n.mantissa += c - '0';
                        digit_index += 1;
                    } else if (n.exponent < std.math.maxInt(i32)) {
                        n.exponent += 1;
                    }

                    i += 1;
                } else if (c == '.') {
                    i += 1;
                    state = .MantissaFractional;
                } else if (c == '_') {
                    i += 1;
                } else {
                    state = .MantissaFractional;
                }
            },
            .MantissaFractional => {
                if (ascii.isDigit(c)) {
                    if (digit_index < max_digits) {
                        n.mantissa *%= 10;
                        n.mantissa += c - '0';
                        n.exponent -%= 1;
                        digit_index += 1;
                    }

                    i += 1;
                } else if (c == 'e' or c == 'E') {
                    i += 1;
                    state = .ExponentSign;
                } else if (c == '_') {
                    i += 1;
                } else {
                    state = .ExponentSign;
                }
            },
            .ExponentSign => {
                if (c == '+') {
                    i += 1;
                } else if (c == '_') {
                    return error.InvalidCharacter;
                } else if (c == '-') {
                    negative_exp = true;
                    i += 1;
                }

                state = .LeadingExponentZeros;
            },
            .LeadingExponentZeros => {
                if (c == '0') {
                    i += 1;
                } else if (c == '_') {
                    i += 1;
                } else {
                    state = .Exponent;
                }
            },
            .Exponent => {
                if (ascii.isDigit(c)) {
                    if (exponent < std.math.maxInt(i32) / 10) {
                        exponent *= 10;
                        exponent += @intCast(i32, c - '0');
                    }

                    i += 1;
                } else if (c == '_') {
                    i += 1;
                } else {
                    return error.InvalidCharacter;
                }
            },
        }
    }

    if (negative_exp) exponent = -exponent;
    n.exponent += exponent;

    if (n.mantissa == 0) {
        return if (n.negative) .MinusZero else .PlusZero;
    } else if (n.exponent > 309) {
        return if (n.negative) .MinusInf else .PlusInf;
    } else if (n.exponent < -328) {
        return if (n.negative) .MinusZero else .PlusZero;
    }

    return .Ok;
 }

 fn caseInEql(a: []const u8, b: []const u8) bool {
    if (a.len != b.len) return false;

    for (a) |_, i| {
        if (ascii.toUpper(a[i]) != ascii.toUpper(b[i])) {
            return false;
        }
    }

    return true;
 }

 pub const ParseFloatError = error{InvalidCharacter};

 pub fn parseFloat(comptime T: type, s: []const u8) ParseFloatError!T {
    if (s.len == 0 or (s.len == 1 and (s[0] == '+' or s[0] == '-'))) {
        return error.InvalidCharacter;
    }

    if (caseInEql(s, "nan")) {
        return std.math.nan(T);
    } else if (caseInEql(s, "inf") or caseInEql(s, "+inf")) {
        return std.math.inf(T);
    } else if (caseInEql(s, "-inf")) {
        return -std.math.inf(T);
    }

    var r = FloatRepr{
        .negative = false,
        .exponent = 0,
        .mantissa = 0,
    };

    return switch (try parseRepr(s, &r)) {
        .Ok => convertRepr(T, r),
        .PlusZero => 0.0,
        .MinusZero => -@as(T, 0.0),
        .PlusInf => std.math.inf(T),
        .MinusInf => -std.math.inf(T),
    };
 }

 test "fmt.parseFloat" {
    const testing = std.testing;
    const expect = testing.expect;
    const expectEqual = testing.expectEqual;
    const approxEqAbs = std.math.approxEqAbs;
    const epsilon = 1e-7;

    inline for ([_]type{ f16, f32, f64, f128 }) |T| {
        const Z = std.meta.Int(.unsigned, @typeInfo(T).Float.bits);

        try testing.expectError(error.InvalidCharacter, parseFloat(T, ""));
        try testing.expectError(error.InvalidCharacter, parseFloat(T, "   1"));
        try testing.expectError(error.InvalidCharacter, parseFloat(T, "1abc"));
        try testing.expectError(error.InvalidCharacter, parseFloat(T, "+"));
        try testing.expectError(error.InvalidCharacter, parseFloat(T, "-"));

        try expectEqual(try parseFloat(T, "0"), 0.0);
        try expectEqual(try parseFloat(T, "0"), 0.0);
        try expectEqual(try parseFloat(T, "+0"), 0.0);
        try expectEqual(try parseFloat(T, "-0"), 0.0);

        try expectEqual(try parseFloat(T, "0e0"), 0);
        try expectEqual(try parseFloat(T, "2e3"), 2000.0);
        try expectEqual(try parseFloat(T, "1e0"), 1.0);
        try expectEqual(try parseFloat(T, "-2e3"), -2000.0);
        try expectEqual(try parseFloat(T, "-1e0"), -1.0);
        try expectEqual(try parseFloat(T, "1.234e3"), 1234);

        try expect(approxEqAbs(T, try parseFloat(T, "3.141"), 3.141, epsilon));
        try expect(approxEqAbs(T, try parseFloat(T, "-3.141"), -3.141, epsilon));

        try expectEqual(try parseFloat(T, "1e-700"), 0);
        try expectEqual(try parseFloat(T, "1e+700"), std.math.inf(T));

        try expectEqual(@bitCast(Z, try parseFloat(T, "nAn")), @bitCast(Z, std.math.nan(T)));
        try expectEqual(try parseFloat(T, "inF"), std.math.inf(T));
        try expectEqual(try parseFloat(T, "-INF"), -std.math.inf(T));

        try expectEqual(try parseFloat(T, "0.4e0066999999999999999999999999999999999999999999999999999"), std.math.inf(T));
        try expect(approxEqAbs(T, try parseFloat(T, "0_1_2_3_4_5_6.7_8_9_0_0_0e0_0_1_0"), @as(T, 123456.789000e10), epsilon));

        if (T != f16) {
            try expect(approxEqAbs(T, try parseFloat(T, "1e-2"), 0.01, epsilon));
            try expect(approxEqAbs(T, try parseFloat(T, "1234e-2"), 12.34, epsilon));

            try expect(approxEqAbs(T, try parseFloat(T, "123142.1"), 123142.1, epsilon));
            try expect(approxEqAbs(T, try parseFloat(T, "-123142.1124"), @as(T, -123142.1124), epsilon));
            try expect(approxEqAbs(T, try parseFloat(T, "0.7062146892655368"), @as(T, 0.7062146892655368), epsilon));
            try expect(approxEqAbs(T, try parseFloat(T, "2.71828182845904523536"), @as(T, 2.718281828459045), epsilon));
        }
    }
 }