kimwalisch · March 20, 2016 10:19
diff --git a/libdivide-without-power-of-2.cpp b/libdivide-without-power-of-2.cpp
 struct libdivide_u64_t libdivide_u64_gen(uint64_t d) {
    struct libdivide_u64_t result;

    const uint32_t floor_log_2_d = 63 - libdivide__count_leading_zeros64(d);
    
    uint64_t proposed_m, rem;
    uint8_t more;
    proposed_m = libdivide_128_div_64_to_64(1ULL << floor_log_2_d, 0, d, &rem); //== (1 << (64 + floor_log_2_d)) / d

    LIBDIVIDE_ASSERT(rem > 0 && rem < d);
    const uint64_t e = d - rem;

 /* This power works if e < 2**floor_log_2_d. */
 if (e < (1ULL << floor_log_2_d)) {
        /* This power works */
        more = floor_log_2_d;
    }
    else {
        /* We have to use the general 65-bit algorithm.  We need to compute (2**power) / d. However, we already have (2**(power-1))/d and its remainder.  By doubling both, and then correcting the remainder, we can compute the larger division. */
        proposed_m += proposed_m; //don't care about overflow here - in fact, we expect it
        const uint64_t twice_rem = rem + rem;
        if (twice_rem >= d || twice_rem < rem) proposed_m += 1;
        more = floor_log_2_d | LIBDIVIDE_ADD_MARKER;
    }
    result.magic = 1 + proposed_m;
    result.more = more;
    //result.more's shift should in general be ceil_log_2_d.  But if we used the smaller power, we subtract one from the shift because we're using the smaller power. If we're using the larger power, we subtract one from the shift because it's taken care of by the add indicator.  So floor_log_2_d happens to be correct in both cases, which is why we do it outside of the if statement.

    return result;
 }

 uint64_t libdivide_u64_do(uint64_t numer, const struct libdivide_u64_t *denom) {
    uint8_t more = denom->more;
    uint64_t q = libdivide__mullhi_u64(denom->magic, numer);
    if (more & LIBDIVIDE_ADD_MARKER) {
        uint64_t t = ((numer - q) >> 1) + q;
        return t >> (more & LIBDIVIDE_64_SHIFT_MASK);
    }
    else {
        return q >> more; //all upper bits are 0 - don't need to mask them off
    }
 }
	struct libdivide_u64_t libdivide_u64_gen(uint64_t d) {
	struct libdivide_u64_t result;

	const uint32_t floor_log_2_d = 63 - libdivide__count_leading_zeros64(d);

	uint64_t proposed_m, rem;
	uint8_t more;
	proposed_m = libdivide_128_div_64_to_64(1ULL << floor_log_2_d, 0, d, &rem); //== (1 << (64 + floor_log_2_d)) / d

	LIBDIVIDE_ASSERT(rem > 0 && rem < d);
	const uint64_t e = d - rem;

	/* This power works if e < 2*floor_log_2_d. /
	if (e < (1ULL << floor_log_2_d)) {
	/* This power works */
	more = floor_log_2_d;
	}
	else {
	/* We have to use the general 65-bit algorithm. We need to compute (2power) / d. However, we already have (2(power-1))/d and its remainder. By doubling both, and then correcting the remainder, we can compute the larger division. */
	proposed_m += proposed_m; //don't care about overflow here - in fact, we expect it
	const uint64_t twice_rem = rem + rem;
	if (twice_rem >= d \|\| twice_rem < rem) proposed_m += 1;
	more = floor_log_2_d \| LIBDIVIDE_ADD_MARKER;
	}
	result.magic = 1 + proposed_m;
	result.more = more;
	//result.more's shift should in general be ceil_log_2_d. But if we used the smaller power, we subtract one from the shift because we're using the smaller power. If we're using the larger power, we subtract one from the shift because it's taken care of by the add indicator. So floor_log_2_d happens to be correct in both cases, which is why we do it outside of the if statement.

	return result;
	}

	uint64_t libdivide_u64_do(uint64_t numer, const struct libdivide_u64_t *denom) {
	uint8_t more = denom->more;
	uint64_t q = libdivide__mullhi_u64(denom->magic, numer);
	if (more & LIBDIVIDE_ADD_MARKER) {
	uint64_t t = ((numer - q) >> 1) + q;
	return t >> (more & LIBDIVIDE_64_SHIFT_MASK);
	}
	else {
	return q >> more; //all upper bits are 0 - don't need to mask them off
	}
	}
No results found