Created
January 25, 2024 20:07
-
-
Save fredrik-johansson/2f88376c5dd9dc0b668e7b6d91b9d266 to your computer and use it in GitHub Desktop.
mulhigh.c
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| #include <stdint.h> | |
| #include "profiler.h" | |
| #include "mpn_extras.h" | |
| #include "crt_helpers.h" | |
| #include "ulong_extras.h" | |
| FLINT_FORCE_INLINE mp_limb_t | |
| flint_mpn_inline_addmul_1(mp_ptr res, mp_srcptr a, mp_size_t n, mp_limb_t c) | |
| { | |
| mp_limb_t hi, lo, cy_limb; | |
| slong i; | |
| umul_ppmm(hi, lo, a[0], c); | |
| add_ssaaaa(cy_limb, res[0], hi, lo, 0, res[0]); | |
| for (i = 1; i < n; i++) | |
| { | |
| umul_ppmm(hi, lo, a[i], c); | |
| add_ssaaaa(hi, lo, hi, lo, 0, res[i]); | |
| add_ssaaaa(cy_limb, res[i], hi, lo, 0, cy_limb); | |
| } | |
| return cy_limb; | |
| } | |
| #undef NN_DOTREV_S3_1X1 | |
| /* {s0,s1,s2} = u[0]v[n-1] + u[1]v[n-2] + ... */ | |
| /* Assumes n >= 2 */ | |
| #define NN_DOTREV_S3_1X1(s2, s1, s0, u, v, n) \ | |
| do { \ | |
| mp_limb_t __dt0, __dt1, __ds0, __ds1, __ds2; \ | |
| slong __i; \ | |
| FLINT_ASSERT((n) >= 2); \ | |
| umul_ppmm(__ds1, __ds0, (u)[0], (v)[(n) - 1]); \ | |
| umul_ppmm(__dt1, __dt0, (u)[1], (v)[(n) - 2]); \ | |
| add_sssaaaaaa(__ds2, __ds1, __ds0, 0, __ds1, __ds0, 0, __dt1, __dt0); \ | |
| for (__i = 2; __i < (n); __i++) \ | |
| { \ | |
| umul_ppmm(__dt1, __dt0, (u)[__i], (v)[(n) - 1 - __i]); \ | |
| add_sssaaaaaa(__ds2, __ds1, __ds0, __ds2, __ds1, __ds0, 0, __dt1, __dt0); \ | |
| } \ | |
| (s0) = __ds0; (s1) = __ds1; (s2) = __ds2; \ | |
| } while (0) \ | |
| /* {r0,r1,r2} = {s0,s1,s2} + u[0]v[n-1] + u[1]v[n-2] + ... */ | |
| /* Assumes n >= 1. May have s2 != 0, but the final sum is assumed to fit in 3 limbs. */ | |
| #define NN_DOTREV_S3_A3_1X1(r2, r1, r0, s2, s1, s0, u, v, n) \ | |
| do { \ | |
| mp_limb_t __dt0, __dt1, __ds0, __ds1, __ds2; \ | |
| slong __i; \ | |
| FLINT_ASSERT((n) >= 1); \ | |
| __ds0 = (s0); __ds1 = (s1); __ds2 = (s2); \ | |
| for (__i = 0; __i < (n); __i++) \ | |
| { \ | |
| umul_ppmm(__dt1, __dt0, (u)[__i], (v)[(n) - 1 - __i]); \ | |
| add_sssaaaaaa(__ds2, __ds1, __ds0, __ds2, __ds1, __ds0, 0, __dt1, __dt0); \ | |
| } \ | |
| (r0) = __ds0; (r1) = __ds1; (r2) = __ds2; \ | |
| } while (0) \ | |
| #define NN_MUL_1X1 umul_ppmm | |
| /* {r0,r1} = {s0,s1} + x * y, with no carry-out. */ | |
| #define NN_ADDMUL_S2_A2_1X1(r1, r0, s1, s0, x, y) \ | |
| do { \ | |
| mp_limb_t __dt0, __dt1; \ | |
| umul_ppmm(__dt1, __dt0, (x), (y)); \ | |
| add_ssaaaa(r1, r0, s1, s0, __dt1, __dt0); \ | |
| } while (0); \ | |
| void flint_mpn_mulhigh_1(mp_ptr res, mp_srcptr u, mp_srcptr v) | |
| { | |
| NN_MUL_1X1(res[1], res[0], u[0], v[0]); | |
| } | |
| void flint_mpn_mulhigh_2(mp_ptr res, mp_srcptr u, mp_srcptr v) | |
| { | |
| mp_limb_t a, b; | |
| NN_DOTREV_S3_1X1(b, a, res[1], u, v, 2); | |
| NN_ADDMUL_S2_A2_1X1(res[3], res[2], b, a, u[1], v[1]); | |
| } | |
| void flint_mpn_mulhigh_3(mp_ptr res, mp_srcptr u, mp_srcptr v) | |
| { | |
| mp_limb_t a, b; | |
| NN_DOTREV_S3_1X1(b, a, res[2], u, v, 3); | |
| NN_DOTREV_S3_A3_1X1(b, a, res[3], 0, b, a, u + 1, v + 1, 2); | |
| NN_ADDMUL_S2_A2_1X1(res[5], res[4], b, a, u[2], v[2]); | |
| } | |
| void flint_mpn_mulhigh_4(mp_ptr res, mp_srcptr u, mp_srcptr v) | |
| { | |
| mp_limb_t a, b; | |
| NN_DOTREV_S3_1X1(b, a, res[3], u, v, 4); | |
| NN_DOTREV_S3_A3_1X1(b, a, res[4], 0, b, a, u + 1, v + 1, 3); | |
| NN_DOTREV_S3_A3_1X1(b, a, res[5], 0, b, a, u + 2, v + 2, 2); | |
| NN_ADDMUL_S2_A2_1X1(res[7], res[6], b, a, u[3], v[3]); | |
| } | |
| void flint_mpn_mulhigh_5(mp_ptr res, mp_srcptr u, mp_srcptr v) | |
| { | |
| mp_limb_t a, b; | |
| NN_DOTREV_S3_1X1(b, a, res[4], u, v, 5); | |
| NN_DOTREV_S3_A3_1X1(b, a, res[5], 0, b, a, u + 1, v + 1, 4); | |
| NN_DOTREV_S3_A3_1X1(b, a, res[6], 0, b, a, u + 2, v + 2, 3); | |
| NN_DOTREV_S3_A3_1X1(b, a, res[7], 0, b, a, u + 3, v + 3, 2); | |
| NN_ADDMUL_S2_A2_1X1(res[9], res[8], b, a, u[4], v[4]); | |
| } | |
| void flint_mpn_mulhigh_6(mp_ptr res, mp_srcptr u, mp_srcptr v) | |
| { | |
| mp_limb_t a, b; | |
| NN_DOTREV_S3_1X1(b, a, res[5], u, v, 6); | |
| NN_DOTREV_S3_A3_1X1(b, a, res[6], 0, b, a, u + 1, v + 1, 5); | |
| NN_DOTREV_S3_A3_1X1(b, a, res[7], 0, b, a, u + 2, v + 2, 4); | |
| NN_DOTREV_S3_A3_1X1(b, a, res[8], 0, b, a, u + 3, v + 3, 3); | |
| NN_DOTREV_S3_A3_1X1(b, a, res[9], 0, b, a, u + 4, v + 4, 2); | |
| NN_ADDMUL_S2_A2_1X1(res[11], res[10], b, a, u[5], v[5]); | |
| } | |
| void flint_mpn_mulhigh_7(mp_ptr res, mp_srcptr u, mp_srcptr v) | |
| { | |
| mp_limb_t a, b; | |
| NN_DOTREV_S3_1X1(b, a, res[6], u, v, 7); | |
| NN_DOTREV_S3_A3_1X1(b, a, res[7], 0, b, a, u + 1, v + 1, 6); | |
| NN_DOTREV_S3_A3_1X1(b, a, res[8], 0, b, a, u + 2, v + 2, 5); | |
| NN_DOTREV_S3_A3_1X1(b, a, res[9], 0, b, a, u + 3, v + 3, 4); | |
| NN_DOTREV_S3_A3_1X1(b, a, res[10], 0, b, a, u + 4, v + 4, 3); | |
| NN_DOTREV_S3_A3_1X1(b, a, res[11], 0, b, a, u + 5, v + 5, 2); | |
| NN_ADDMUL_S2_A2_1X1(res[13], res[12], b, a, u[6], v[6]); | |
| } | |
| void flint_mpn_mulhigh_8(mp_ptr res, mp_srcptr u, mp_srcptr v) | |
| { | |
| mp_limb_t a, b; | |
| NN_DOTREV_S3_1X1(b, a, res[7], u, v, 8); | |
| NN_DOTREV_S3_A3_1X1(b, a, res[8], 0, b, a, u + 1, v + 1, 7); | |
| NN_DOTREV_S3_A3_1X1(b, a, res[9], 0, b, a, u + 2, v + 2, 6); | |
| NN_DOTREV_S3_A3_1X1(b, a, res[10], 0, b, a, u + 3, v + 3, 5); | |
| NN_DOTREV_S3_A3_1X1(b, a, res[11], 0, b, a, u + 4, v + 4, 4); | |
| NN_DOTREV_S3_A3_1X1(b, a, res[12], 0, b, a, u + 5, v + 5, 3); | |
| NN_DOTREV_S3_A3_1X1(b, a, res[13], 0, b, a, u + 6, v + 6, 2); | |
| NN_ADDMUL_S2_A2_1X1(res[15], res[14], b, a, u[7], v[7]); | |
| } | |
| void flint_mpn_mulhigh_9(mp_ptr res, mp_srcptr u, mp_srcptr v) | |
| { | |
| mp_limb_t a, b; | |
| NN_DOTREV_S3_1X1(b, a, res[8], u, v, 9); | |
| NN_DOTREV_S3_A3_1X1(b, a, res[9], 0, b, a, u + 1, v + 1, 8); | |
| NN_DOTREV_S3_A3_1X1(b, a, res[10], 0, b, a, u + 2, v + 2, 7); | |
| NN_DOTREV_S3_A3_1X1(b, a, res[11], 0, b, a, u + 3, v + 3, 6); | |
| NN_DOTREV_S3_A3_1X1(b, a, res[12], 0, b, a, u + 4, v + 4, 5); | |
| NN_DOTREV_S3_A3_1X1(b, a, res[13], 0, b, a, u + 5, v + 5, 4); | |
| NN_DOTREV_S3_A3_1X1(b, a, res[14], 0, b, a, u + 6, v + 6, 3); | |
| NN_DOTREV_S3_A3_1X1(b, a, res[15], 0, b, a, u + 7, v + 7, 2); | |
| NN_ADDMUL_S2_A2_1X1(res[17], res[16], b, a, u[8], v[8]); | |
| } | |
| void flint_mpn_mulhigh_10(mp_ptr res, mp_srcptr u, mp_srcptr v) | |
| { | |
| mp_limb_t a, b; | |
| NN_DOTREV_S3_1X1(b, a, res[9], u, v, 10); | |
| NN_DOTREV_S3_A3_1X1(b, a, res[10], 0, b, a, u + 1, v + 1, 9); | |
| NN_DOTREV_S3_A3_1X1(b, a, res[11], 0, b, a, u + 2, v + 2, 8); | |
| NN_DOTREV_S3_A3_1X1(b, a, res[12], 0, b, a, u + 3, v + 3, 7); | |
| NN_DOTREV_S3_A3_1X1(b, a, res[13], 0, b, a, u + 4, v + 4, 6); | |
| NN_DOTREV_S3_A3_1X1(b, a, res[14], 0, b, a, u + 5, v + 5, 5); | |
| NN_DOTREV_S3_A3_1X1(b, a, res[15], 0, b, a, u + 6, v + 6, 4); | |
| NN_DOTREV_S3_A3_1X1(b, a, res[16], 0, b, a, u + 7, v + 7, 3); | |
| NN_DOTREV_S3_A3_1X1(b, a, res[17], 0, b, a, u + 8, v + 8, 2); | |
| NN_ADDMUL_S2_A2_1X1(res[19], res[18], b, a, u[9], v[9]); | |
| } | |
| void flint_mpn_mulhigh_11(mp_ptr res, mp_srcptr u, mp_srcptr v) | |
| { | |
| mp_limb_t a, b; | |
| NN_DOTREV_S3_1X1(b, a, res[10], u, v, 11); | |
| NN_DOTREV_S3_A3_1X1(b, a, res[11], 0, b, a, u + 1, v + 1, 10); | |
| NN_DOTREV_S3_A3_1X1(b, a, res[12], 0, b, a, u + 2, v + 2, 9); | |
| NN_DOTREV_S3_A3_1X1(b, a, res[13], 0, b, a, u + 3, v + 3, 8); | |
| NN_DOTREV_S3_A3_1X1(b, a, res[14], 0, b, a, u + 4, v + 4, 7); | |
| NN_DOTREV_S3_A3_1X1(b, a, res[15], 0, b, a, u + 5, v + 5, 6); | |
| NN_DOTREV_S3_A3_1X1(b, a, res[16], 0, b, a, u + 6, v + 6, 5); | |
| NN_DOTREV_S3_A3_1X1(b, a, res[17], 0, b, a, u + 7, v + 7, 4); | |
| NN_DOTREV_S3_A3_1X1(b, a, res[18], 0, b, a, u + 8, v + 8, 3); | |
| NN_DOTREV_S3_A3_1X1(b, a, res[19], 0, b, a, u + 9, v + 9, 2); | |
| NN_ADDMUL_S2_A2_1X1(res[21], res[20], b, a, u[10], v[10]); | |
| } | |
| void flint_mpn_mulhigh_12(mp_ptr res, mp_srcptr u, mp_srcptr v) | |
| { | |
| mp_limb_t a, b; | |
| NN_DOTREV_S3_1X1(b, a, res[11], u, v, 12); | |
| NN_DOTREV_S3_A3_1X1(b, a, res[12], 0, b, a, u + 1, v + 1, 11); | |
| NN_DOTREV_S3_A3_1X1(b, a, res[13], 0, b, a, u + 2, v + 2, 10); | |
| NN_DOTREV_S3_A3_1X1(b, a, res[14], 0, b, a, u + 3, v + 3, 9); | |
| NN_DOTREV_S3_A3_1X1(b, a, res[15], 0, b, a, u + 4, v + 4, 8); | |
| NN_DOTREV_S3_A3_1X1(b, a, res[16], 0, b, a, u + 5, v + 5, 7); | |
| NN_DOTREV_S3_A3_1X1(b, a, res[17], 0, b, a, u + 6, v + 6, 6); | |
| NN_DOTREV_S3_A3_1X1(b, a, res[18], 0, b, a, u + 7, v + 7, 5); | |
| NN_DOTREV_S3_A3_1X1(b, a, res[19], 0, b, a, u + 8, v + 8, 4); | |
| NN_DOTREV_S3_A3_1X1(b, a, res[20], 0, b, a, u + 9, v + 9, 3); | |
| NN_DOTREV_S3_A3_1X1(b, a, res[21], 0, b, a, u + 10, v + 10, 2); | |
| NN_ADDMUL_S2_A2_1X1(res[23], res[22], b, a, u[11], v[11]); | |
| } | |
| void flint_mpn_mulhigh_13(mp_ptr res, mp_srcptr u, mp_srcptr v) | |
| { | |
| mp_limb_t a, b; | |
| NN_DOTREV_S3_1X1(b, a, res[12], u, v, 13); | |
| NN_DOTREV_S3_A3_1X1(b, a, res[13], 0, b, a, u + 1, v + 1, 12); | |
| NN_DOTREV_S3_A3_1X1(b, a, res[14], 0, b, a, u + 2, v + 2, 11); | |
| NN_DOTREV_S3_A3_1X1(b, a, res[15], 0, b, a, u + 3, v + 3, 10); | |
| NN_DOTREV_S3_A3_1X1(b, a, res[16], 0, b, a, u + 4, v + 4, 9); | |
| NN_DOTREV_S3_A3_1X1(b, a, res[17], 0, b, a, u + 5, v + 5, 8); | |
| NN_DOTREV_S3_A3_1X1(b, a, res[18], 0, b, a, u + 6, v + 6, 7); | |
| NN_DOTREV_S3_A3_1X1(b, a, res[19], 0, b, a, u + 7, v + 7, 6); | |
| NN_DOTREV_S3_A3_1X1(b, a, res[20], 0, b, a, u + 8, v + 8, 5); | |
| NN_DOTREV_S3_A3_1X1(b, a, res[21], 0, b, a, u + 9, v + 9, 4); | |
| NN_DOTREV_S3_A3_1X1(b, a, res[22], 0, b, a, u + 10, v + 10, 3); | |
| NN_DOTREV_S3_A3_1X1(b, a, res[23], 0, b, a, u + 11, v + 11, 2); | |
| NN_ADDMUL_S2_A2_1X1(res[25], res[24], b, a, u[12], v[12]); | |
| } | |
| void flint_mpn_mulhigh_14(mp_ptr res, mp_srcptr u, mp_srcptr v) | |
| { | |
| mp_limb_t a, b; | |
| NN_DOTREV_S3_1X1(b, a, res[13], u, v, 14); | |
| NN_DOTREV_S3_A3_1X1(b, a, res[14], 0, b, a, u + 1, v + 1, 13); | |
| NN_DOTREV_S3_A3_1X1(b, a, res[15], 0, b, a, u + 2, v + 2, 12); | |
| NN_DOTREV_S3_A3_1X1(b, a, res[16], 0, b, a, u + 3, v + 3, 11); | |
| NN_DOTREV_S3_A3_1X1(b, a, res[17], 0, b, a, u + 4, v + 4, 10); | |
| NN_DOTREV_S3_A3_1X1(b, a, res[18], 0, b, a, u + 5, v + 5, 9); | |
| NN_DOTREV_S3_A3_1X1(b, a, res[19], 0, b, a, u + 6, v + 6, 8); | |
| NN_DOTREV_S3_A3_1X1(b, a, res[20], 0, b, a, u + 7, v + 7, 7); | |
| NN_DOTREV_S3_A3_1X1(b, a, res[21], 0, b, a, u + 8, v + 8, 6); | |
| NN_DOTREV_S3_A3_1X1(b, a, res[22], 0, b, a, u + 9, v + 9, 5); | |
| NN_DOTREV_S3_A3_1X1(b, a, res[23], 0, b, a, u + 10, v + 10, 4); | |
| NN_DOTREV_S3_A3_1X1(b, a, res[24], 0, b, a, u + 11, v + 11, 3); | |
| NN_DOTREV_S3_A3_1X1(b, a, res[25], 0, b, a, u + 12, v + 12, 2); | |
| NN_ADDMUL_S2_A2_1X1(res[27], res[26], b, a, u[13], v[13]); | |
| } | |
| void flint_mpn_mulhigh_15(mp_ptr res, mp_srcptr u, mp_srcptr v) | |
| { | |
| mp_limb_t a, b; | |
| NN_DOTREV_S3_1X1(b, a, res[14], u, v, 15); | |
| NN_DOTREV_S3_A3_1X1(b, a, res[15], 0, b, a, u + 1, v + 1, 14); | |
| NN_DOTREV_S3_A3_1X1(b, a, res[16], 0, b, a, u + 2, v + 2, 13); | |
| NN_DOTREV_S3_A3_1X1(b, a, res[17], 0, b, a, u + 3, v + 3, 12); | |
| NN_DOTREV_S3_A3_1X1(b, a, res[18], 0, b, a, u + 4, v + 4, 11); | |
| NN_DOTREV_S3_A3_1X1(b, a, res[19], 0, b, a, u + 5, v + 5, 10); | |
| NN_DOTREV_S3_A3_1X1(b, a, res[20], 0, b, a, u + 6, v + 6, 9); | |
| NN_DOTREV_S3_A3_1X1(b, a, res[21], 0, b, a, u + 7, v + 7, 8); | |
| NN_DOTREV_S3_A3_1X1(b, a, res[22], 0, b, a, u + 8, v + 8, 7); | |
| NN_DOTREV_S3_A3_1X1(b, a, res[23], 0, b, a, u + 9, v + 9, 6); | |
| NN_DOTREV_S3_A3_1X1(b, a, res[24], 0, b, a, u + 10, v + 10, 5); | |
| NN_DOTREV_S3_A3_1X1(b, a, res[25], 0, b, a, u + 11, v + 11, 4); | |
| NN_DOTREV_S3_A3_1X1(b, a, res[26], 0, b, a, u + 12, v + 12, 3); | |
| NN_DOTREV_S3_A3_1X1(b, a, res[27], 0, b, a, u + 13, v + 13, 2); | |
| NN_ADDMUL_S2_A2_1X1(res[29], res[28], b, a, u[14], v[14]); | |
| } | |
| void flint_mpn_mulhigh_16(mp_ptr res, mp_srcptr u, mp_srcptr v) | |
| { | |
| mp_limb_t a, b; | |
| NN_DOTREV_S3_1X1(b, a, res[15], u, v, 16); | |
| NN_DOTREV_S3_A3_1X1(b, a, res[16], 0, b, a, u + 1, v + 1, 15); | |
| NN_DOTREV_S3_A3_1X1(b, a, res[17], 0, b, a, u + 2, v + 2, 14); | |
| NN_DOTREV_S3_A3_1X1(b, a, res[18], 0, b, a, u + 3, v + 3, 13); | |
| NN_DOTREV_S3_A3_1X1(b, a, res[19], 0, b, a, u + 4, v + 4, 12); | |
| NN_DOTREV_S3_A3_1X1(b, a, res[20], 0, b, a, u + 5, v + 5, 11); | |
| NN_DOTREV_S3_A3_1X1(b, a, res[21], 0, b, a, u + 6, v + 6, 10); | |
| NN_DOTREV_S3_A3_1X1(b, a, res[22], 0, b, a, u + 7, v + 7, 9); | |
| NN_DOTREV_S3_A3_1X1(b, a, res[23], 0, b, a, u + 8, v + 8, 8); | |
| NN_DOTREV_S3_A3_1X1(b, a, res[24], 0, b, a, u + 9, v + 9, 7); | |
| NN_DOTREV_S3_A3_1X1(b, a, res[25], 0, b, a, u + 10, v + 10, 6); | |
| NN_DOTREV_S3_A3_1X1(b, a, res[26], 0, b, a, u + 11, v + 11, 5); | |
| NN_DOTREV_S3_A3_1X1(b, a, res[27], 0, b, a, u + 12, v + 12, 4); | |
| NN_DOTREV_S3_A3_1X1(b, a, res[28], 0, b, a, u + 13, v + 13, 3); | |
| NN_DOTREV_S3_A3_1X1(b, a, res[29], 0, b, a, u + 14, v + 14, 2); | |
| NN_ADDMUL_S2_A2_1X1(res[31], res[30], b, a, u[15], v[15]); | |
| } | |
| void flint_mpn_mulhigh_17(mp_ptr res, mp_srcptr u, mp_srcptr v) | |
| { | |
| mp_limb_t a, b; | |
| NN_DOTREV_S3_1X1(b, a, res[16], u, v, 17); | |
| NN_DOTREV_S3_A3_1X1(b, a, res[17], 0, b, a, u + 1, v + 1, 16); | |
| NN_DOTREV_S3_A3_1X1(b, a, res[18], 0, b, a, u + 2, v + 2, 15); | |
| NN_DOTREV_S3_A3_1X1(b, a, res[19], 0, b, a, u + 3, v + 3, 14); | |
| NN_DOTREV_S3_A3_1X1(b, a, res[20], 0, b, a, u + 4, v + 4, 13); | |
| NN_DOTREV_S3_A3_1X1(b, a, res[21], 0, b, a, u + 5, v + 5, 12); | |
| NN_DOTREV_S3_A3_1X1(b, a, res[22], 0, b, a, u + 6, v + 6, 11); | |
| NN_DOTREV_S3_A3_1X1(b, a, res[23], 0, b, a, u + 7, v + 7, 10); | |
| NN_DOTREV_S3_A3_1X1(b, a, res[24], 0, b, a, u + 8, v + 8, 9); | |
| NN_DOTREV_S3_A3_1X1(b, a, res[25], 0, b, a, u + 9, v + 9, 8); | |
| NN_DOTREV_S3_A3_1X1(b, a, res[26], 0, b, a, u + 10, v + 10, 7); | |
| NN_DOTREV_S3_A3_1X1(b, a, res[27], 0, b, a, u + 11, v + 11, 6); | |
| NN_DOTREV_S3_A3_1X1(b, a, res[28], 0, b, a, u + 12, v + 12, 5); | |
| NN_DOTREV_S3_A3_1X1(b, a, res[29], 0, b, a, u + 13, v + 13, 4); | |
| NN_DOTREV_S3_A3_1X1(b, a, res[30], 0, b, a, u + 14, v + 14, 3); | |
| NN_DOTREV_S3_A3_1X1(b, a, res[31], 0, b, a, u + 15, v + 15, 2); | |
| NN_ADDMUL_S2_A2_1X1(res[33], res[32], b, a, u[16], v[16]); | |
| } | |
| void flint_mpn_mulhigh_18(mp_ptr res, mp_srcptr u, mp_srcptr v) | |
| { | |
| mp_limb_t a, b; | |
| NN_DOTREV_S3_1X1(b, a, res[17], u, v, 18); | |
| NN_DOTREV_S3_A3_1X1(b, a, res[18], 0, b, a, u + 1, v + 1, 17); | |
| NN_DOTREV_S3_A3_1X1(b, a, res[19], 0, b, a, u + 2, v + 2, 16); | |
| NN_DOTREV_S3_A3_1X1(b, a, res[20], 0, b, a, u + 3, v + 3, 15); | |
| NN_DOTREV_S3_A3_1X1(b, a, res[21], 0, b, a, u + 4, v + 4, 14); | |
| NN_DOTREV_S3_A3_1X1(b, a, res[22], 0, b, a, u + 5, v + 5, 13); | |
| NN_DOTREV_S3_A3_1X1(b, a, res[23], 0, b, a, u + 6, v + 6, 12); | |
| NN_DOTREV_S3_A3_1X1(b, a, res[24], 0, b, a, u + 7, v + 7, 11); | |
| NN_DOTREV_S3_A3_1X1(b, a, res[25], 0, b, a, u + 8, v + 8, 10); | |
| NN_DOTREV_S3_A3_1X1(b, a, res[26], 0, b, a, u + 9, v + 9, 9); | |
| NN_DOTREV_S3_A3_1X1(b, a, res[27], 0, b, a, u + 10, v + 10, 8); | |
| NN_DOTREV_S3_A3_1X1(b, a, res[28], 0, b, a, u + 11, v + 11, 7); | |
| NN_DOTREV_S3_A3_1X1(b, a, res[29], 0, b, a, u + 12, v + 12, 6); | |
| NN_DOTREV_S3_A3_1X1(b, a, res[30], 0, b, a, u + 13, v + 13, 5); | |
| NN_DOTREV_S3_A3_1X1(b, a, res[31], 0, b, a, u + 14, v + 14, 4); | |
| NN_DOTREV_S3_A3_1X1(b, a, res[32], 0, b, a, u + 15, v + 15, 3); | |
| NN_DOTREV_S3_A3_1X1(b, a, res[33], 0, b, a, u + 16, v + 16, 2); | |
| NN_ADDMUL_S2_A2_1X1(res[35], res[34], b, a, u[17], v[17]); | |
| } | |
| void flint_mpn_mulhigh_19(mp_ptr res, mp_srcptr u, mp_srcptr v) | |
| { | |
| mp_limb_t a, b; | |
| NN_DOTREV_S3_1X1(b, a, res[18], u, v, 19); | |
| NN_DOTREV_S3_A3_1X1(b, a, res[19], 0, b, a, u + 1, v + 1, 18); | |
| NN_DOTREV_S3_A3_1X1(b, a, res[20], 0, b, a, u + 2, v + 2, 17); | |
| NN_DOTREV_S3_A3_1X1(b, a, res[21], 0, b, a, u + 3, v + 3, 16); | |
| NN_DOTREV_S3_A3_1X1(b, a, res[22], 0, b, a, u + 4, v + 4, 15); | |
| NN_DOTREV_S3_A3_1X1(b, a, res[23], 0, b, a, u + 5, v + 5, 14); | |
| NN_DOTREV_S3_A3_1X1(b, a, res[24], 0, b, a, u + 6, v + 6, 13); | |
| NN_DOTREV_S3_A3_1X1(b, a, res[25], 0, b, a, u + 7, v + 7, 12); | |
| NN_DOTREV_S3_A3_1X1(b, a, res[26], 0, b, a, u + 8, v + 8, 11); | |
| NN_DOTREV_S3_A3_1X1(b, a, res[27], 0, b, a, u + 9, v + 9, 10); | |
| NN_DOTREV_S3_A3_1X1(b, a, res[28], 0, b, a, u + 10, v + 10, 9); | |
| NN_DOTREV_S3_A3_1X1(b, a, res[29], 0, b, a, u + 11, v + 11, 8); | |
| NN_DOTREV_S3_A3_1X1(b, a, res[30], 0, b, a, u + 12, v + 12, 7); | |
| NN_DOTREV_S3_A3_1X1(b, a, res[31], 0, b, a, u + 13, v + 13, 6); | |
| NN_DOTREV_S3_A3_1X1(b, a, res[32], 0, b, a, u + 14, v + 14, 5); | |
| NN_DOTREV_S3_A3_1X1(b, a, res[33], 0, b, a, u + 15, v + 15, 4); | |
| NN_DOTREV_S3_A3_1X1(b, a, res[34], 0, b, a, u + 16, v + 16, 3); | |
| NN_DOTREV_S3_A3_1X1(b, a, res[35], 0, b, a, u + 17, v + 17, 2); | |
| NN_ADDMUL_S2_A2_1X1(res[37], res[36], b, a, u[18], v[18]); | |
| } | |
| void | |
| flint_mpn_mulhigh_n_classical(mp_ptr res, mp_srcptr u, mp_srcptr v, mp_size_t n) | |
| { | |
| slong i; | |
| res += n - 1; | |
| umul_ppmm(res[1], res[0], u[n - 1], v[0]); | |
| for (i = 2 ; i <= n ; i++) | |
| res[i] = mpn_addmul_1(res, u + n - i, i, v[i - 1]); | |
| } | |
| void flint_mpn_mulhigh_n_basecase(mp_ptr rp, mp_srcptr np, mp_srcptr mp, mp_size_t n) | |
| { | |
| switch (n) | |
| { | |
| case 0: FLINT_UNREACHABLE; | |
| case 1: flint_mpn_mulhigh_1(rp, np, mp); break; | |
| case 2: flint_mpn_mulhigh_2(rp, np, mp); break; | |
| case 3: flint_mpn_mulhigh_3(rp, np, mp); break; | |
| case 4: flint_mpn_mulhigh_4(rp, np, mp); break; | |
| case 5: flint_mpn_mulhigh_5(rp, np, mp); break; | |
| case 6: flint_mpn_mulhigh_6(rp, np, mp); break; | |
| case 7: flint_mpn_mulhigh_7(rp, np, mp); break; | |
| case 8: flint_mpn_mulhigh_8(rp, np, mp); break; | |
| case 9: flint_mpn_mulhigh_9(rp, np, mp); break; | |
| case 10: flint_mpn_mulhigh_10(rp, np, mp); break; | |
| case 11: flint_mpn_mulhigh_11(rp, np, mp); break; | |
| case 12: flint_mpn_mulhigh_12(rp, np, mp); break; | |
| case 13: flint_mpn_mulhigh_13(rp, np, mp); break; | |
| case 14: flint_mpn_mulhigh_14(rp, np, mp); break; | |
| case 15: flint_mpn_mulhigh_15(rp, np, mp); break; | |
| case 16: flint_mpn_mulhigh_16(rp, np, mp); break; | |
| case 17: flint_mpn_mulhigh_17(rp, np, mp); break; | |
| case 18: flint_mpn_mulhigh_18(rp, np, mp); break; | |
| case 19: flint_mpn_mulhigh_19(rp, np, mp); break; | |
| default: flint_mpn_mulhigh_n_classical(rp, np, mp, n); break; | |
| } | |
| } | |
| #if 0 | |
| FLINT_FORCE_INLINE | |
| void flint_mpn_mulhigh_1(mp_ptr res, mp_srcptr u, mp_srcptr v) | |
| { | |
| umul_ppmm(res[1], res[0], u[0], v[0]); | |
| } | |
| FLINT_FORCE_INLINE | |
| void flint_mpn_mulhigh_2(mp_ptr res, mp_srcptr u, mp_srcptr v) | |
| { | |
| umul_ppmm(res[1], res[0], u[1], v[0]); | |
| res[2] = flint_mpn_inline_addmul_1(res, u + 0, 2, v[1]); | |
| } | |
| FLINT_FORCE_INLINE | |
| void flint_mpn_mulhigh_3(mp_ptr res, mp_srcptr u, mp_srcptr v) | |
| { | |
| umul_ppmm(res[1], res[0], u[2], v[0]); | |
| res[2] = flint_mpn_inline_addmul_1(res, u + 1, 2, v[1]); | |
| res[3] = flint_mpn_inline_addmul_1(res, u + 0, 3, v[2]); | |
| } | |
| FLINT_FORCE_INLINE | |
| void flint_mpn_mulhigh_4(mp_ptr res, mp_srcptr u, mp_srcptr v) | |
| { | |
| umul_ppmm(res[1], res[0], u[3], v[0]); | |
| res[2] = flint_mpn_inline_addmul_1(res, u + 2, 2, v[1]); | |
| res[3] = flint_mpn_inline_addmul_1(res, u + 1, 3, v[2]); | |
| res[4] = flint_mpn_inline_addmul_1(res, u + 0, 4, v[3]); | |
| } | |
| void flint_mpn_mulhigh_5(mp_ptr res, mp_srcptr u, mp_srcptr v) | |
| { | |
| umul_ppmm(res[1], res[0], u[4], v[0]); | |
| res[2] = flint_mpn_inline_addmul_1(res, u + 3, 2, v[1]); | |
| res[3] = flint_mpn_inline_addmul_1(res, u + 2, 3, v[2]); | |
| res[4] = flint_mpn_inline_addmul_1(res, u + 1, 4, v[3]); | |
| res[5] = flint_mpn_inline_addmul_1(res, u + 0, 5, v[4]); | |
| } | |
| void flint_mpn_mulhigh_6a(mp_ptr res, mp_srcptr u, mp_srcptr v) | |
| { | |
| umul_ppmm(res[1], res[0], u[5], v[0]); | |
| res[2] = flint_mpn_inline_addmul_1(res, u + 4, 2, v[1]); | |
| res[3] = flint_mpn_inline_addmul_1(res, u + 3, 3, v[2]); | |
| res[4] = flint_mpn_inline_addmul_1(res, u + 2, 4, v[3]); | |
| res[5] = flint_mpn_inline_addmul_1(res, u + 1, 5, v[4]); | |
| res[6] = flint_mpn_inline_addmul_1(res, u + 0, 6, v[5]); | |
| } | |
| #endif | |
| /* Tuning values against GMP 6.3.0 and fft_small on Zen3. */ | |
| /* Table stores k / 4 so that entries can fit in a byte. */ | |
| #define KTAB_LEN 800 | |
| static unsigned char flint_mpn_mulhigh_ktab[KTAB_LEN] = { | |
| 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, | |
| 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, | |
| 0, 0, 12, 0, 0, 0, 0, 0, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 13, 12, 12, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, | |
| 14, 14, 14, 14, 16, 14, 14, 16, 16, 16, 16, 16, 16, 17, 19, 16, 19, 19, 18, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 20, 19, | |
| 20, 19, 22, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 27, 24, 24, 24, 24, 25, 24, 27, 24, 24, 27, 24, 27, 27, 24, 27, 27, 27, 27, | |
| 27, 27, 26, 27, 24, 27, 27, 27, 27, 26, 27, 29, 28, 29, 28, 27, 27, 27, 29, 29, 30, 28, 27, 27, 27, 28, 28, 27, 33, 27, 27, 33, | |
| 28, 34, 36, 30, 29, 35, 36, 37, 35, 36, 35, 35, 36, 37, 36, 37, 36, 36, 37, 37, 36, 37, 38, 37, 37, 34, 39, 38, 36, 37, 37, 38, | |
| 37, 36, 37, 37, 36, 37, 37, 38, 37, 38, 38, 37, 36, 38, 36, 35, 36, 37, 43, 47, 47, 47, 39, 47, 47, 38, 47, 47, 47, 47, 43, 47, | |
| 47, 47, 46, 47, 47, 47, 43, 47, 47, 47, 43, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 51, 47, 47, 47, 47, 47, 47, 47, 55, 47, 55, | |
| 51, 55, 46, 47, 51, 51, 51, 51, 55, 51, 51, 51, 55, 47, 51, 51, 47, 47, 55, 55, 55, 47, 55, 54, 51, 55, 55, 54, 63, 59, 55, 59, | |
| 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 64, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 67, 55, 64, 67, 64, 67, 67, 63, | |
| 67, 66, 59, 66, 67, 67, 66, 67, 67, 67, 67, 67, 67, 67, 67, 67, 67, 67, 63, 67, 67, 67, 67, 67, 67, 71, 71, 71, 67, 67, 72, 67, | |
| 67, 67, 67, 71, 67, 75, 72, 75, 75, 75, 71, 76, 75, 76, 75, 71, 71, 75, 82, 75, 82, 75, 75, 75, 76, 82, 75, 75, 82, 75, 82, 82, | |
| 76, 82, 75, 82, 82, 82, 82, 82, 76, 82, 82, 82, 82, 82, 82, 82, 82, 82, 82, 82, 82, 82, 82, 82, 82, 82, 82, 82, 82, 82, 82, 82, | |
| 82, 82, 82, 82, 82, 82, 82, 82, 82, 82, 82, 82, 82, 82, 82, 82, 82, 82, 82, 82, 82, 82, 94, 110, 108, 112, 109, 110, 112, 108, 111, 109, | |
| 112, 110, 111, 112, 113, 110, 112, 112, 110, 113, 114, 115, 115, 113, 112, 117, 115, 118, 115, 118, 116, 119, 115, 119, 117, 120, 120, 118, 119, 119, 120, 119, | |
| 118, 119, 120, 119, 119, 118, 120, 120, 119, 120, 120, 120, 120, 120, 119, 119, 120, 119, 120, 120, 120, 120, 120, 120, 120, 120, 120, 120, 120, 120, 120, 120, | |
| 120, 120, 120, 120, 120, 120, 120, 120, 120, 120, 120, 120, 120, 120, 120, 120, 120, 120, 120, 120, 120, 120, 120, 120, 120, 120, 120, 120, 120, 120, 120, 120, | |
| 120, 120, 120, 120, 120, 120, 120, 120, 120, 120, 120, 120, 120, 120, 141, 120, 140, 136, 140, 140, 139, 138, 139, 140, 141, 140, 144, 141, 142, 144, 144, 144, | |
| 141, 144, 144, 144, 144, 142, 144, 144, 144, 144, 144, 144, 144, 143, 143, 144, 143, 144, 144, 144, 144, 144, 144, 144, 144, 143, 144, 144, 144, 144, 144, 143, | |
| 144, 144, 143, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 141, 144, 144, 144, 144, 144, 144, 144, 144, 144, | |
| 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 142, 143, 144, 144, 144, 144, 142, 144, 143, 143, 144, 144, 144, 140, 141, 141, 141, | |
| 144, 143, 139, 143, 142, 144, 139, 142, 139, 141, 140, 141, 144, 142, 142, 143, 143, 144, 143, 144, 143, 144, 141, 143, 143, 143, 141, 142, 144, 143, 142, 144, | |
| 140, 144, 143, 140, 142, 141, 141, 144, 142, 142, 144, 143, 144, 143, 142, 142, 142, 142, 143, 143, 143, 144, 144, 144, 140, 141, 142, 140, 143, 142, 142, 144, | |
| 144, 144, 144, 255, 255, 255, 255, 255, 255, 143, 255, 255, 255, 255, 255, 194, 255, 194, 193, 193, 193, 194, 195, 194, 193, 195, 255, 194, 194, 193, 194, 195, | |
| }; | |
| void flint_mpn_mulhigh_n(mp_ptr rp, mp_srcptr np, mp_srcptr mp, mp_size_t n); | |
| void | |
| flint_mpn_mulhigh_n_recursive(mp_ptr res, mp_srcptr u, mp_srcptr v, mp_size_t n, int k) | |
| { | |
| mp_limb_t cy; | |
| mp_size_t l; | |
| if (k == 0) | |
| { | |
| flint_mpn_mulhigh_n_basecase(res, u, v, n); | |
| return; | |
| } | |
| else if (k == 255) | |
| { | |
| flint_mpn_mul_n(res, u, v, n); | |
| return; | |
| } | |
| /* Required for correctness (see MPFR). */ | |
| k = FLINT_MAX(k, (n + 4) / 2); | |
| k = FLINT_MIN(k, n - 1); | |
| l = n - k; | |
| flint_mpn_mul_n(res + 2 * l, u + l, v + l, k); | |
| flint_mpn_mulhigh_n(res, u + k, v, l); | |
| cy = mpn_add_n(res + n - 1, res + n - 1, res + l - 1, l + 1); | |
| flint_mpn_mulhigh_n(res, u, v + k, l); | |
| cy += mpn_add_n(res + n - 1, res + n - 1, res + l - 1, l + 1); | |
| mpn_add_1(res + n + l, res + n + l, k, cy); | |
| } | |
| void | |
| flint_mpn_mulhigh_n(mp_ptr res, mp_srcptr u, mp_srcptr v, mp_size_t n) | |
| { | |
| int k; | |
| switch (n) | |
| { | |
| case 1: flint_mpn_mulhigh_1(res, u, v); break; | |
| case 2: flint_mpn_mulhigh_2(res, u, v); break; | |
| case 3: flint_mpn_mulhigh_3(res, u, v); break; | |
| case 4: flint_mpn_mulhigh_4(res, u, v); break; | |
| case 5: flint_mpn_mulhigh_5(res, u, v); break; | |
| case 6: flint_mpn_mulhigh_6(res, u, v); break; | |
| case 7: flint_mpn_mulhigh_7(res, u, v); break; | |
| case 8: flint_mpn_mulhigh_8(res, u, v); break; | |
| case 9: flint_mpn_mulhigh_9(res, u, v); break; | |
| case 10: flint_mpn_mulhigh_10(res, u, v); break; | |
| case 11: flint_mpn_mulhigh_11(res, u, v); break; | |
| case 12: flint_mpn_mulhigh_12(res, u, v); break; | |
| case 13: flint_mpn_mulhigh_13(res, u, v); break; | |
| case 14: flint_mpn_mulhigh_14(res, u, v); break; | |
| case 15: flint_mpn_mulhigh_15(res, u, v); break; | |
| case 16: flint_mpn_mulhigh_16(res, u, v); break; | |
| case 17: flint_mpn_mulhigh_17(res, u, v); break; | |
| case 18: flint_mpn_mulhigh_18(res, u, v); break; | |
| case 19: flint_mpn_mulhigh_19(res, u, v); break; | |
| default: | |
| { | |
| if (n < KTAB_LEN) | |
| { | |
| k = (int) flint_mpn_mulhigh_ktab[n]; | |
| if (k != 255) | |
| k *= 4; | |
| if (k == 0) | |
| flint_mpn_mulhigh_n_basecase(res, u, v, n); | |
| else | |
| flint_mpn_mulhigh_n_recursive(res, u, v, n, k); | |
| } | |
| else | |
| { | |
| flint_mpn_mul_n(res, u, v, n); | |
| } | |
| } | |
| } | |
| } | |
| void mpfr_mulhigh_n (mp_ptr rp, mp_srcptr np, mp_srcptr mp, mp_size_t n); | |
| void mpfr_sqrhigh_n(mp_ptr rp, mp_srcptr np, mp_size_t n); | |
| #undef TIMEIT_END_REPEAT | |
| #undef TIMEIT_STOP_VALUES | |
| #define TIMEIT_END_REPEAT(__timer, __reps) \ | |
| } \ | |
| timeit_stop(__timer); \ | |
| if (__timer->cpu >= 10) \ | |
| break; \ | |
| __reps *= 10; \ | |
| } \ | |
| } while (0); | |
| #define TIMEIT_STOP_VALUES(tcpu, twall) \ | |
| TIMEIT_END_REPEAT(__timer, __reps) \ | |
| (tcpu) = __timer->cpu*0.001 / __reps; \ | |
| (twall) = __timer->wall*0.001 / __reps; \ | |
| } while (0); | |
| slong next_r(slong r) | |
| { | |
| if (r <= 15) | |
| return r + 1; | |
| if ((r & (r - 1)) == 0) | |
| r = (3 * r) / 2; | |
| else | |
| r = (4 * r) / 3; | |
| return r; | |
| } | |
| int main() | |
| { | |
| int iter; | |
| flint_rand_t state; | |
| flint_randinit(state); | |
| for (iter = 0; iter < 100000; iter++) | |
| { | |
| slong i, N; | |
| mp_ptr X, Y, R1, R2, ERR, TOL; | |
| if (n_randint(state, 100) == 0) | |
| N = 1 + n_randtest(state) % 1000; | |
| else if (n_randint(state, 10) == 0) | |
| N = 1 + n_randtest(state) % 100; | |
| else | |
| N = 1 + n_randtest(state) % 20; | |
| X = flint_malloc(sizeof(mp_limb_t) * N); | |
| Y = flint_malloc(sizeof(mp_limb_t) * N); | |
| R1 = flint_malloc(sizeof(mp_limb_t) * 2 * N); | |
| R2 = flint_malloc(sizeof(mp_limb_t) * 2 * N); | |
| ERR = flint_malloc(sizeof(mp_limb_t) * (N + 1)); | |
| TOL = flint_malloc(sizeof(mp_limb_t) * (N + 1)); | |
| for (i = 0; i < N; i++) | |
| { | |
| X[i] = n_randtest(state); | |
| Y[i] = n_randtest(state); | |
| } | |
| for (i = 0; i < 2 * N; i++) | |
| R1[i] = n_randtest(state); | |
| flint_mpn_mulhigh_n(R1, X, Y, N); | |
| flint_mpn_mul_n(R2, X, Y, N); | |
| ERR[N] = mpn_sub_n(ERR, R2 + N, R1 + N, N); | |
| mpn_zero(TOL + 1, N); | |
| TOL[0] = N - 1; | |
| if (mpn_cmp(ERR, TOL, N + 1) > 0) | |
| { | |
| flint_printf("FAIL: N = %wd\n", N); | |
| flint_printf("X = "); flint_mpn_debug(X, N); | |
| flint_printf("Y = "); flint_mpn_debug(Y, N); | |
| flint_printf("R1 = "); flint_mpn_debug(R1, 2 * N); | |
| flint_printf("R2 = "); flint_mpn_debug(R2, 2 * N); | |
| flint_printf("ERR = "); flint_mpn_debug(ERR, N + 1); | |
| flint_printf("TOL = "); flint_mpn_debug(TOL, N + 1); | |
| flint_abort(); | |
| } | |
| flint_free(X); | |
| flint_free(Y); | |
| flint_free(R1); | |
| flint_free(R2); | |
| flint_free(ERR); | |
| flint_free(TOL); | |
| } | |
| flint_printf("OK\n"); | |
| { | |
| slong i, n; | |
| mp_limb_t x[4000], y[2000], z[2000]; | |
| for (i = 0; i < 2000; i++) | |
| { | |
| x[i] = n_randtest(state); | |
| y[i] = n_randtest(state); | |
| z[i] = n_randtest(state); | |
| } | |
| flint_printf("\n"); | |
| flint_printf("n, flint_mpn_mul_n, flint_mpn_mul_n / mpfr, flint_mpn_mul_n / flint_mpn_mulhigh_n\n"); | |
| for (n = 1; n <= 2000; n = next_r(n)) | |
| { | |
| double t1, t2, t3, tt, __attribute__((unused)) __; | |
| TIMEIT_START | |
| flint_mpn_mul_n(z, x, y, n); | |
| TIMEIT_STOP_VALUES(__, t1) | |
| TIMEIT_START | |
| flint_mpn_mul_n(z, x, y, n); | |
| TIMEIT_STOP_VALUES(__, tt) | |
| t1 = FLINT_MIN(t1, tt); | |
| TIMEIT_START | |
| mpfr_mulhigh_n(z, x, y, n); | |
| TIMEIT_STOP_VALUES(__, t2) | |
| TIMEIT_START | |
| mpfr_mulhigh_n(z, x, y, n); | |
| TIMEIT_STOP_VALUES(__, tt) | |
| t2 = FLINT_MIN(t2, tt); | |
| TIMEIT_START | |
| flint_mpn_mulhigh_n(z, x, y, n); | |
| TIMEIT_STOP_VALUES(__, t3) | |
| TIMEIT_START | |
| flint_mpn_mulhigh_n(z, x, y, n); | |
| TIMEIT_STOP_VALUES(__, tt) | |
| t3 = FLINT_MIN(t3, tt); | |
| flint_printf("%wd %g %.3f %.3f %s\n", n, t1, t1 / t2, t1 / t3, (t3 < 1.01 * t2 ? " " : "!!!!!")); | |
| } | |
| } | |
| } |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment