August 24, 2013 21:00
diff --git a/product.c b/product.c
 /*

 My results:

  Given COUNT floats, each float ranging from [1/MAG, MAG]:

     Naive product overflows easily (depends on MAG and COUNT)

     Divide & conquer product can still overflow

     Divide & conquer product has significantly less error vs naive
        product as COUNT grows

     exp(sum(log())) has 5x-10x the error of divide & conquer product

     Computing exp(sum(log())) with divide & conquer for sum has
        larger error than div&conq product if MAG > 1.35, and
        smaller error if MAG < 1.35

     Using doubles gives exact float results for all four methods
        with all COUNT and MAG that I tried (they deviate from
        each other further down, but I don't know which one is
        right)

     Using doubles, naive product and div&conq product could still
        overflow, e.g. COUNT=262144, MAG = 3.3

 Recommendations for multiplying many floats:

   If doubles are available and overflow is possible, use exp(sum(log))

   If doubles are available and overflow is unlikely, use d&c product

   If doubles are not available and range is narrow, use d&c exp(sum(log))

   If doubles are not available and range is wide, use SORTED d&c product

 SORTED d&c product:

   Sort numbers from smallest abs value to largest abs value

   Multiply first number by last number, second by second-last, etc.

   Now you have half as many numbers; sort and repeat

 I just made that up just now, but I bet there's something like that
 that's well known.

 ===============================================================================

 Summary of raw data:

 In each of the following runs, we run 100 iterations of COUNT numbers,
 with each number ranging from -exp(WIDTH) to exp(WIDTH).

 We take divide-and-conquer with doubles as the reference result (so that
 row will always have 0 error), except when that underflows/overflows.

 The raw data is below; first a summary.

   2048: div & conquer overlows less, roughly same error as naive product
         exp-sum-log never overflows, but has 5-10x the error as d&c
         div & conquer exp-sum-log has 2-3x error as div & conquer product
            depending on range of numbers
         doubles never overflow, all agree to float precision

   8192: div & conquer overflows less AND significantly less error than naive
         exp-sum-log never overflows, 10x error for div&conquer product
         div & conquer exp-sum-log has 2-5x error as div & conquer product,
            depending on range of numbers

  65536: naive float products almost always overflow
         start seeing occasional overflows with doubles even with narrow ranges
         still similar error ratios

 262144: float div & conquer still overflows even with numbers only [0.75,1.35)
         otherwise, still same error ratios
         note that for range [0.75,1.35), d&c expsum actually produces
             about the same errors as d&c product. but for wider ranges,
             it's still worse

 So, summing up the results:

    expsumlog with doubles is likely to produce float results with +-1 ULP
       and never overflow

    divide & conquer with doubles is highly unlikely to overflow, but might

    divide & conquer with
         
 The first column shows the algorithm. The second column shows the number
 of iterations on which the algorithm underflowed (to 0) or overflowed (to
 infinity). The third column shows the max and average errors discounting
 iterations that under/overflowed.

 #define COUNT  2048
 #define WIDTH  2

 naive float | 0u  5o| max: 0.0000027179  avg: 0.0000008707  rms: 0.0000010886
 div & conq f | 0u  2o| max: 0.0000031137  avg: 0.0000008258  rms: 0.0000010245
 expsumlog f | 0u  0o| max: 0.0000236833  avg: 0.0000066556  rms: 0.0000083713
 d&c expsum f | 0u  0o| max: 0.0000163776  avg: 0.0000029277  rms: 0.0000041276
 naive double | 0u  0o| max: 0.0000000000  avg: 0.0000000000  rms: 0.0000000000
 div & conq d | 0u  0o| max: 0.0000000000  avg: 0.0000000000  rms: 0.0000000000
 expsumlog d | 0u  0o| max: 0.0000000000  avg: 0.0000000000  rms: 0.0000000000
 d&c expsum d | 0u  0o| max: 0.0000000000  avg: 0.0000000000  rms: 0.0000000000


 #define COUNT  2048
 #define WIDTH  1

 naive float | 0u  0o| max: 0.0000049017  avg: 0.0000009353  rms: 0.0000011751
 div & conq f | 0u  0o| max: 0.0000043426  avg: 0.0000009181  rms: 0.0000011571
 expsumlog f | 0u  0o| max: 0.0000302809  avg: 0.0000040743  rms: 0.0000054612
 d&c expsum f | 0u  0o| max: 0.0000107832  avg: 0.0000017204  rms: 0.0000022063
 naive double | 0u  0o| max: 0.0000000000  avg: 0.0000000000  rms: 0.0000000000
 div & conq d | 0u  0o| max: 0.0000000000  avg: 0.0000000000  rms: 0.0000000000
 expsumlog d | 0u  0o| max: 0.0000000000  avg: 0.0000000000  rms: 0.0000000000
 d&c expsum d | 0u  0o| max: 0.0000000000  avg: 0.0000000000  rms: 0.0000000000
                      // this is actually result for 1000 iterations, not 100



 #define COUNT  8192
 #define WIDTH  2

 naive float |20u 22o| max: 0.0177300752  avg: 0.0003093051  rms: 0.0023280929
 div & conq f | 0u 34o| max: 0.0000068044  avg: 0.0000019087  rms: 0.0000024460
 expsumlog f | 0u  0o| max: 0.0000850223  avg: 0.0000212589  rms: 0.0000280062
 d&c expsum f | 0u  0o| max: 0.0000240424  avg: 0.0000073712  rms: 0.0000093041
 naive double | 0u  0o| max: 0.0000000000  avg: 0.0000000000  rms: 0.0000000000
 div & conq d | 0u  0o| max: 0.0000000000  avg: 0.0000000000  rms: 0.0000000000
 expsumlog d | 0u  0o| max: 0.0000000000  avg: 0.0000000000  rms: 0.0000000000
 d&c expsum d | 0u  0o| max: 0.0000000000  avg: 0.0000000000  rms: 0.0000000000

 #define COUNT  8192
 #define WIDTH  1

 naive float | 6u 24o| max: 0.0937609580  avg: 0.0002542558  rms: 0.0039971759
 div & conq f | 0u 26o| max: 0.0000076250  avg: 0.0000018105  rms: 0.0000022902
 expsumlog f | 0u  0o| max: 0.0000846776  avg: 0.0000120225  rms: 0.0000157857
 d&c expsum f | 0u  0o| max: 0.0000190889  avg: 0.0000039110  rms: 0.0000050194
 naive double | 0u  0o| max: 0.0000000000  avg: 0.0000000000  rms: 0.0000000000
 div & conq d | 0u  0o| max: 0.0000000000  avg: 0.0000000000  rms: 0.0000000000
 expsumlog d | 0u  0o| max: 0.0000000000  avg: 0.0000000000  rms: 0.0000000000
 d&c expsum d | 0u  0o| max: 0.0000000000  avg: 0.0000000000  rms: 0.0000000000



 #define COUNT  65536
 #define WIDTH  2

 naive float |50u 48o| max: 0.0051862393  avg: 0.0025944349  rms: 0.0036672254
 div & conq f | 0u 98o| max: 0.0000088420  avg: 0.0000086192  rms: 0.0000086221
 expsumlog f | 0u  0o| max: 0.0002821505  avg: 0.0000928288  rms: 0.0001134311
 d&c expsum f | 0u  0o| max: 0.0000851772  avg: 0.0000212911  rms: 0.0000273464
 naive double | 1u  0o| max: 0.0000000000  avg: 0.0000000000  rms: 0.0000000000
 div & conq d | 0u  1o| max: 0.0000000000  avg: 0.0000000000  rms: 0.0000000000
 expsumlog d | 0u  0o| max: 0.0000000000  avg: 0.0000000000  rms: 0.0000000000
 d&c expsum d | 0u  0o| max: 0.0000000000  avg: 0.0000000000  rms: 0.0000000000


 #define COUNT  65536
 #define WIDTH  1.5

 naive float |45u 48o| max: 0.0808237638  avg: 0.0115511928  rms: 0.0305485122
 div & conq f | 0u 89o| max: 0.0000091207  avg: 0.0000029542  rms: 0.0000040769
 expsumlog f | 0u  0o| max: 0.0003306693  avg: 0.0000729488  rms: 0.0001005624
 d&c expsum f | 0u  0o| max: 0.0000765506  avg: 0.0000189131  rms: 0.0000249135
 naive double | 0u  0o| max: 0.0000000000  avg: 0.0000000000  rms: 0.0000000000
 div & conq d | 0u  0o| max: 0.0000000000  avg: 0.0000000000  rms: 0.0000000000
 expsumlog d | 0u  0o| max: 0.0000000000  avg: 0.0000000000  rms: 0.0000000000
 d&c expsum d | 0u  0o| max: 0.0000000000  avg: 0.0000000000  rms: 0.0000000000


 #define COUNT  65536
 #define WIDTH  1.2

 naive float |42u 42o| max: 0.0012234093  avg: 0.0001408503  rms: 0.0003806614
 div & conq f | 0u 81o| max: 0.0000119770  avg: 0.0000057240  rms: 0.0000063431
 expsumlog f | 0u  0o| max: 0.0003976301  avg: 0.0000669214  rms: 0.0000911918
 d&c expsum f | 0u  0o| max: 0.0000555896  avg: 0.0000145403  rms: 0.0000189150
 naive double | 0u  0o| max: 0.0000000000  avg: 0.0000000000  rms: 0.0000000000
 div & conq d | 0u  0o| max: 0.0000000000  avg: 0.0000000000  rms: 0.0000000000
 expsumlog d | 0u  0o| max: 0.0000000000  avg: 0.0000000000  rms: 0.0000000000
 d&c expsum d | 0u  0o| max: 0.0000000000  avg: 0.0000000000  rms: 0.0000000000


 #define COUNT  65536
 #define WIDTH  1

 naive float |33u 38o| max: 0.0937657292  avg: 0.0037975804  rms: 0.0175970931
 div & conq f | 0u 62o| max: 0.0000200516  avg: 0.0000062828  rms: 0.0000078558
 expsumlog f | 0u  0o| max: 0.0002676856  avg: 0.0000628544  rms: 0.0000858625
 d&c expsum f | 0u  0o| max: 0.0000590484  avg: 0.0000127801  rms: 0.0000172268
 naive double | 0u  0o| max: 0.0000000000  avg: 0.0000000000  rms: 0.0000000000
 div & conq d | 0u  0o| max: 0.0000000000  avg: 0.0000000000  rms: 0.0000000000
 expsumlog d | 0u  0o| max: 0.0000000000  avg: 0.0000000000  rms: 0.0000000000
 d&c expsum d | 0u  0o| max: 0.0000000000  avg: 0.0000000000  rms: 0.0000000000


 #define COUNT  (65536*4)
 #define WIDTH  1.2

 naive float |44u 56o| max:      n/a      avg:      n/a      rms:      n/a
 div & conq f | 0u 99o| max: 0.0000085888  avg: 0.0000085888  rms: 0.0000085888
 expsumlog f | 0u  0o| max: 0.0007992241  avg: 0.0002292591  rms: 0.0002983976
 d&c expsum f | 0u  0o| max: 0.0000821443  avg: 0.0000313129  rms: 0.0000382251
 naive double | 1u  1o| max: 0.0000000000  avg: 0.0000000000  rms: 0.0000000000
 div & conq d | 0u  2o| max: 0.0000000000  avg: 0.0000000000  rms: 0.0000000000
 expsumlog d | 0u  0o| max: 0.0000000000  avg: 0.0000000000  rms: 0.0000000000
 d&c expsum d | 0u  0o| max: 0.0000000000  avg: 0.0000000000  rms: 0.0000000000


 #define COUNT  (65536*4)
 #define WIDTH  0.75

 naive float |43u 52o| max: 0.2921414687  avg: 0.0618006679  rms: 0.1308632012
 div & conq f | 0u 94o| max: 0.0000226060  avg: 0.0000127530  rms: 0.0000152179
 expsumlog f | 0u  0o| max: 0.0005427947  avg: 0.0001458992  rms: 0.0001869438
 d&c expsum f | 0u  0o| max: 0.0000655081  avg: 0.0000192601  rms: 0.0000234380
 naive double | 0u  0o| max: 0.0000000000  avg: 0.0000000000  rms: 0.0000000000
 div & conq d | 0u  0o| max: 0.0000000000  avg: 0.0000000000  rms: 0.0000000000
 expsumlog d | 0u  0o| max: 0.0000000000  avg: 0.0000000000  rms: 0.0000000000
 d&c expsum d | 0u  0o| max: 0.0000000000  avg: 0.0000000000  rms: 0.0000000000


 #define COUNT  (65536*4)
 #define WIDTH  0.5

 naive float | 0u 57o| max: 1.96452e+030  avg: 4.79043e+028  rms: 2.99939e+029
 div & conq f | 0u 67o| max: 0.0000284797  avg: 0.0000110656  rms: 0.0000135814
 expsumlog f | 0u  0o| max: 0.0004995333  avg: 0.0001182828  rms: 0.0001606016
 d&c expsum f | 0u  0o| max: 0.0000397396  avg: 0.0000123350  rms: 0.0000153791
 naive double | 0u  0o| max: 0.0000000000  avg: 0.0000000000  rms: 0.0000000000
 div & conq d | 0u  0o| max: 0.0000000000  avg: 0.0000000000  rms: 0.0000000000
 expsumlog d | 0u  0o| max: 0.0000000000  avg: 0.0000000000  rms: 0.0000000000
 d&c expsum d | 0u  0o| max: 0.0000000000  avg: 0.0000000000  rms: 0.0000000000


 #define COUNT  (65536*4)
 #define WIDTH  0.3

 naive float |10u 22o| max: 0.0964692639  avg: 0.0027056422  rms: 0.0157422156
 div & conq f | 0u 25o| max: 0.0000254426  avg: 0.0000087956  rms: 0.0000109531
 expsumlog f | 0u  0o| max: 0.0003534482  avg: 0.0000743110  rms: 0.0001025451
 d&c expsum f | 0u  0o| max: 0.0000277140  avg: 0.0000088533  rms: 0.0000110160
 naive double | 0u  0o| max: 0.0000000000  avg: 0.0000000000  rms: 0.0000000000
 div & conq d | 0u  0o| max: 0.0000000000  avg: 0.0000000000  rms: 0.0000000000
 expsumlog d | 0u  0o| max: 0.0000000000  avg: 0.0000000000  rms: 0.0000000000
 d&c expsum d | 0u  0o| max: 0.0000000000  avg: 0.0000000000  rms: 0.0000000000

 */


 #include <float.h>
 #define STB_DEFINE
 #include "stb.h"

 #define COUNT  (65536*4)        // number of numbers to compute product of; must be pow2
 #define WIDTH  0.1              // range of numbers, larger causes more overflows

 float numbers[COUNT];
 float helpers[COUNT];
 double dhelpers[COUNT];

 int main(int argc, char **argv)
 {
   int i,j,k;
   double sum[8] = { 0 };
   double sum_squared[8] = {0 };
   double worst[8] = { 0 };
   int overflow[8] = { 0 };
   int underflow[8] = { 0 };
   int valid[8] = { 0 };

   for (k=0; k < 100; ++k) {
      double result[8];
      double correct;
      float product;
      double dproduct;

      // generate random numbers that have a controlled running
      // product and that would cancel out to 1 in the end if the
      // individual numbers weren't rounded to floats.
      // constructed to make sure the divconq approach below
      // doesn't ever take advantage of that cancellation
      for (i=0; i < COUNT/2; ++i) {
         uint32 r = stb_rand() & 0x7fffffff;
         float  t = r / (float) 0x7fffffff;
         numbers[i] = (float) exp(stb_lerp(t,-WIDTH,WIDTH));
         if (numbers[i] == 0) { --i; continue; }
         numbers[COUNT-1-i] = 1.0f / numbers[i];
      }

      // direct multiplication as doubles or floats
      dproduct = 1;
      product = 1;
      for (i=0; i < COUNT; ++i) {
         dproduct *= numbers[i];
         product *= numbers[i];
      }

      result[0] = product;
      result[4] = dproduct;

      // divide&conquer via powers of two
      for (i=0; i < COUNT; ++i) {
         helpers[i] = numbers[i];
         dhelpers[i] = numbers[i];
      }

      for (j=COUNT; j >= 2; j >>= 1) {
         for (i=0; i < j; i += 2) {
            helpers[i>>1] = helpers[i]*helpers[i+1];
            dhelpers[i>>1] = dhelpers[i]*dhelpers[i+1];
         }
      }

      result[1] = helpers[0];
      result[5] = dhelpers[0];

      // running sums of logs
      product = 0;
      dproduct = 0;
      for (i=0; i < COUNT; ++i) {
         product += (float) log(numbers[i]);
         dproduct += (float) log(numbers[i]);
      }
      result[2] = (float) exp(product);
      result[6] = exp(dproduct);

      // divide&conquer via powers of two of sums of logs
      for (i=0; i < COUNT; ++i) {
         helpers[i] = (float) log(numbers[i]);
         dhelpers[i] = (float) log(numbers[i]);
      }

      for (j=COUNT; j >= 2; j >>= 1) {
         for (i=0; i < j; i += 2) {
            helpers[i>>1] = helpers[i]+helpers[i+1];
            dhelpers[i>>1] = dhelpers[i]+dhelpers[i+1];
         }
      }

      result[3] = exp(helpers[0]);
      result[7] = (float) exp(dhelpers[0]);

      // assume divide-and-conquer on doubles is correct result
      correct = result[5];

      // unless it over/underflowed
      if (_isnan(correct) || !_finite(correct) || correct==0)
         correct = result[4];
      if (_isnan(correct) || !_finite(correct) || correct==0)
         correct = result[7];

      assert(!_isnan(correct) && _finite(correct) && correct != 0);

      for (i=0; i < 8; ++i) {
         if (_isnan(result[i]) || !_finite(result[i]))
            ++overflow[i];
         else if (result[i] < 0.0001)
            ++underflow[i];
         else { 
            double error = fabs(result[i] - correct);
            assert(!_isnan(error));
            assert(_finite(error));
            ++valid[i];
            if (error > worst[i])
               worst[i] = error;
            sum[i] += error;
            sum_squared[i] += error * error;
            assert(!_isnan(sum[i]));
            assert(_finite(sum[i]));
         }
      }
   }

   #define STRINGIFY(x) STRINGIFY_(x)
   #define STRINGIFY_(x) #x

   printf("#define COUNT  " STRINGIFY(COUNT) "\n");
   printf("#define WIDTH  " STRINGIFY(WIDTH) "\n\n");

   for (i=0; i < 8; ++i) {
      char *name[8] = {
         "naive float",
         "div & conq f",
         "expsumlog f",
         "d&c expsum f",
         "naive double",
         "div & conq d",
         "expsumlog d",
         "d&c expsum d",
      };
      printf("%12s |%2du %2do| max: %12.10f  avg: %12.10f  rms: %12.10f\n",
           name[i], underflow[i], overflow[i], worst[i], sum[i]/valid[i], sqrt(sum_squared[i]/valid[i]));
   }

   return 0;
 }
	/*

	My results:

	Given COUNT floats, each float ranging from [1/MAG, MAG]:

	Naive product overflows easily (depends on MAG and COUNT)

	Divide & conquer product can still overflow

	Divide & conquer product has significantly less error vs naive
	product as COUNT grows

	exp(sum(log())) has 5x-10x the error of divide & conquer product

	Computing exp(sum(log())) with divide & conquer for sum has
	larger error than div&conq product if MAG > 1.35, and
	smaller error if MAG < 1.35

	Using doubles gives exact float results for all four methods
	with all COUNT and MAG that I tried (they deviate from
	each other further down, but I don't know which one is
	right)

	Using doubles, naive product and div&conq product could still
	overflow, e.g. COUNT=262144, MAG = 3.3

	Recommendations for multiplying many floats:

	If doubles are available and overflow is possible, use exp(sum(log))

	If doubles are available and overflow is unlikely, use d&c product

	If doubles are not available and range is narrow, use d&c exp(sum(log))

	If doubles are not available and range is wide, use SORTED d&c product

	SORTED d&c product:

	Sort numbers from smallest abs value to largest abs value

	Multiply first number by last number, second by second-last, etc.

	Now you have half as many numbers; sort and repeat

	I just made that up just now, but I bet there's something like that
	that's well known.

	===============================================================================

	Summary of raw data:

	In each of the following runs, we run 100 iterations of COUNT numbers,
	with each number ranging from -exp(WIDTH) to exp(WIDTH).

	We take divide-and-conquer with doubles as the reference result (so that
	row will always have 0 error), except when that underflows/overflows.

	The raw data is below; first a summary.

	2048: div & conquer overlows less, roughly same error as naive product
	exp-sum-log never overflows, but has 5-10x the error as d&c
	div & conquer exp-sum-log has 2-3x error as div & conquer product
	depending on range of numbers
	doubles never overflow, all agree to float precision

	8192: div & conquer overflows less AND significantly less error than naive
	exp-sum-log never overflows, 10x error for div&conquer product
	div & conquer exp-sum-log has 2-5x error as div & conquer product,
	depending on range of numbers

	65536: naive float products almost always overflow
	start seeing occasional overflows with doubles even with narrow ranges
	still similar error ratios

	262144: float div & conquer still overflows even with numbers only [0.75,1.35)
	otherwise, still same error ratios
	note that for range [0.75,1.35), d&c expsum actually produces
	about the same errors as d&c product. but for wider ranges,
	it's still worse

	So, summing up the results:

	expsumlog with doubles is likely to produce float results with +-1 ULP
	and never overflow

	divide & conquer with doubles is highly unlikely to overflow, but might

	divide & conquer with

	The first column shows the algorithm. The second column shows the number
	of iterations on which the algorithm underflowed (to 0) or overflowed (to
	infinity). The third column shows the max and average errors discounting
	iterations that under/overflowed.

	#define COUNT 2048
	#define WIDTH 2

	naive float \| 0u 5o\| max: 0.0000027179 avg: 0.0000008707 rms: 0.0000010886
	div & conq f \| 0u 2o\| max: 0.0000031137 avg: 0.0000008258 rms: 0.0000010245
	expsumlog f \| 0u 0o\| max: 0.0000236833 avg: 0.0000066556 rms: 0.0000083713
	d&c expsum f \| 0u 0o\| max: 0.0000163776 avg: 0.0000029277 rms: 0.0000041276
	naive double \| 0u 0o\| max: 0.0000000000 avg: 0.0000000000 rms: 0.0000000000
	div & conq d \| 0u 0o\| max: 0.0000000000 avg: 0.0000000000 rms: 0.0000000000
	expsumlog d \| 0u 0o\| max: 0.0000000000 avg: 0.0000000000 rms: 0.0000000000
	d&c expsum d \| 0u 0o\| max: 0.0000000000 avg: 0.0000000000 rms: 0.0000000000


	#define COUNT 2048
	#define WIDTH 1

	naive float \| 0u 0o\| max: 0.0000049017 avg: 0.0000009353 rms: 0.0000011751
	div & conq f \| 0u 0o\| max: 0.0000043426 avg: 0.0000009181 rms: 0.0000011571
	expsumlog f \| 0u 0o\| max: 0.0000302809 avg: 0.0000040743 rms: 0.0000054612
	d&c expsum f \| 0u 0o\| max: 0.0000107832 avg: 0.0000017204 rms: 0.0000022063
	naive double \| 0u 0o\| max: 0.0000000000 avg: 0.0000000000 rms: 0.0000000000
	div & conq d \| 0u 0o\| max: 0.0000000000 avg: 0.0000000000 rms: 0.0000000000
	expsumlog d \| 0u 0o\| max: 0.0000000000 avg: 0.0000000000 rms: 0.0000000000
	d&c expsum d \| 0u 0o\| max: 0.0000000000 avg: 0.0000000000 rms: 0.0000000000
	// this is actually result for 1000 iterations, not 100



	#define COUNT 8192
	#define WIDTH 2

	naive float \|20u 22o\| max: 0.0177300752 avg: 0.0003093051 rms: 0.0023280929
	div & conq f \| 0u 34o\| max: 0.0000068044 avg: 0.0000019087 rms: 0.0000024460
	expsumlog f \| 0u 0o\| max: 0.0000850223 avg: 0.0000212589 rms: 0.0000280062
	d&c expsum f \| 0u 0o\| max: 0.0000240424 avg: 0.0000073712 rms: 0.0000093041
	naive double \| 0u 0o\| max: 0.0000000000 avg: 0.0000000000 rms: 0.0000000000
	div & conq d \| 0u 0o\| max: 0.0000000000 avg: 0.0000000000 rms: 0.0000000000
	expsumlog d \| 0u 0o\| max: 0.0000000000 avg: 0.0000000000 rms: 0.0000000000
	d&c expsum d \| 0u 0o\| max: 0.0000000000 avg: 0.0000000000 rms: 0.0000000000

	#define COUNT 8192
	#define WIDTH 1

	naive float \| 6u 24o\| max: 0.0937609580 avg: 0.0002542558 rms: 0.0039971759
	div & conq f \| 0u 26o\| max: 0.0000076250 avg: 0.0000018105 rms: 0.0000022902
	expsumlog f \| 0u 0o\| max: 0.0000846776 avg: 0.0000120225 rms: 0.0000157857
	d&c expsum f \| 0u 0o\| max: 0.0000190889 avg: 0.0000039110 rms: 0.0000050194
	naive double \| 0u 0o\| max: 0.0000000000 avg: 0.0000000000 rms: 0.0000000000
	div & conq d \| 0u 0o\| max: 0.0000000000 avg: 0.0000000000 rms: 0.0000000000
	expsumlog d \| 0u 0o\| max: 0.0000000000 avg: 0.0000000000 rms: 0.0000000000
	d&c expsum d \| 0u 0o\| max: 0.0000000000 avg: 0.0000000000 rms: 0.0000000000



	#define COUNT 65536
	#define WIDTH 2

	naive float \|50u 48o\| max: 0.0051862393 avg: 0.0025944349 rms: 0.0036672254
	div & conq f \| 0u 98o\| max: 0.0000088420 avg: 0.0000086192 rms: 0.0000086221
	expsumlog f \| 0u 0o\| max: 0.0002821505 avg: 0.0000928288 rms: 0.0001134311
	d&c expsum f \| 0u 0o\| max: 0.0000851772 avg: 0.0000212911 rms: 0.0000273464
	naive double \| 1u 0o\| max: 0.0000000000 avg: 0.0000000000 rms: 0.0000000000
	div & conq d \| 0u 1o\| max: 0.0000000000 avg: 0.0000000000 rms: 0.0000000000
	expsumlog d \| 0u 0o\| max: 0.0000000000 avg: 0.0000000000 rms: 0.0000000000
	d&c expsum d \| 0u 0o\| max: 0.0000000000 avg: 0.0000000000 rms: 0.0000000000


	#define COUNT 65536
	#define WIDTH 1.5

	naive float \|45u 48o\| max: 0.0808237638 avg: 0.0115511928 rms: 0.0305485122
	div & conq f \| 0u 89o\| max: 0.0000091207 avg: 0.0000029542 rms: 0.0000040769
	expsumlog f \| 0u 0o\| max: 0.0003306693 avg: 0.0000729488 rms: 0.0001005624
	d&c expsum f \| 0u 0o\| max: 0.0000765506 avg: 0.0000189131 rms: 0.0000249135
	naive double \| 0u 0o\| max: 0.0000000000 avg: 0.0000000000 rms: 0.0000000000
	div & conq d \| 0u 0o\| max: 0.0000000000 avg: 0.0000000000 rms: 0.0000000000
	expsumlog d \| 0u 0o\| max: 0.0000000000 avg: 0.0000000000 rms: 0.0000000000
	d&c expsum d \| 0u 0o\| max: 0.0000000000 avg: 0.0000000000 rms: 0.0000000000


	#define COUNT 65536
	#define WIDTH 1.2

	naive float \|42u 42o\| max: 0.0012234093 avg: 0.0001408503 rms: 0.0003806614
	div & conq f \| 0u 81o\| max: 0.0000119770 avg: 0.0000057240 rms: 0.0000063431
	expsumlog f \| 0u 0o\| max: 0.0003976301 avg: 0.0000669214 rms: 0.0000911918
	d&c expsum f \| 0u 0o\| max: 0.0000555896 avg: 0.0000145403 rms: 0.0000189150
	naive double \| 0u 0o\| max: 0.0000000000 avg: 0.0000000000 rms: 0.0000000000
	div & conq d \| 0u 0o\| max: 0.0000000000 avg: 0.0000000000 rms: 0.0000000000
	expsumlog d \| 0u 0o\| max: 0.0000000000 avg: 0.0000000000 rms: 0.0000000000
	d&c expsum d \| 0u 0o\| max: 0.0000000000 avg: 0.0000000000 rms: 0.0000000000


	#define COUNT 65536
	#define WIDTH 1

	naive float \|33u 38o\| max: 0.0937657292 avg: 0.0037975804 rms: 0.0175970931
	div & conq f \| 0u 62o\| max: 0.0000200516 avg: 0.0000062828 rms: 0.0000078558
	expsumlog f \| 0u 0o\| max: 0.0002676856 avg: 0.0000628544 rms: 0.0000858625
	d&c expsum f \| 0u 0o\| max: 0.0000590484 avg: 0.0000127801 rms: 0.0000172268
	naive double \| 0u 0o\| max: 0.0000000000 avg: 0.0000000000 rms: 0.0000000000
	div & conq d \| 0u 0o\| max: 0.0000000000 avg: 0.0000000000 rms: 0.0000000000
	expsumlog d \| 0u 0o\| max: 0.0000000000 avg: 0.0000000000 rms: 0.0000000000
	d&c expsum d \| 0u 0o\| max: 0.0000000000 avg: 0.0000000000 rms: 0.0000000000


	#define COUNT (65536*4)
	#define WIDTH 1.2

	naive float \|44u 56o\| max: n/a avg: n/a rms: n/a
	div & conq f \| 0u 99o\| max: 0.0000085888 avg: 0.0000085888 rms: 0.0000085888
	expsumlog f \| 0u 0o\| max: 0.0007992241 avg: 0.0002292591 rms: 0.0002983976
	d&c expsum f \| 0u 0o\| max: 0.0000821443 avg: 0.0000313129 rms: 0.0000382251
	naive double \| 1u 1o\| max: 0.0000000000 avg: 0.0000000000 rms: 0.0000000000
	div & conq d \| 0u 2o\| max: 0.0000000000 avg: 0.0000000000 rms: 0.0000000000
	expsumlog d \| 0u 0o\| max: 0.0000000000 avg: 0.0000000000 rms: 0.0000000000
	d&c expsum d \| 0u 0o\| max: 0.0000000000 avg: 0.0000000000 rms: 0.0000000000


	#define COUNT (65536*4)
	#define WIDTH 0.75

	naive float \|43u 52o\| max: 0.2921414687 avg: 0.0618006679 rms: 0.1308632012
	div & conq f \| 0u 94o\| max: 0.0000226060 avg: 0.0000127530 rms: 0.0000152179
	expsumlog f \| 0u 0o\| max: 0.0005427947 avg: 0.0001458992 rms: 0.0001869438
	d&c expsum f \| 0u 0o\| max: 0.0000655081 avg: 0.0000192601 rms: 0.0000234380
	naive double \| 0u 0o\| max: 0.0000000000 avg: 0.0000000000 rms: 0.0000000000
	div & conq d \| 0u 0o\| max: 0.0000000000 avg: 0.0000000000 rms: 0.0000000000
	expsumlog d \| 0u 0o\| max: 0.0000000000 avg: 0.0000000000 rms: 0.0000000000
	d&c expsum d \| 0u 0o\| max: 0.0000000000 avg: 0.0000000000 rms: 0.0000000000


	#define COUNT (65536*4)
	#define WIDTH 0.5

	naive float \| 0u 57o\| max: 1.96452e+030 avg: 4.79043e+028 rms: 2.99939e+029
	div & conq f \| 0u 67o\| max: 0.0000284797 avg: 0.0000110656 rms: 0.0000135814
	expsumlog f \| 0u 0o\| max: 0.0004995333 avg: 0.0001182828 rms: 0.0001606016
	d&c expsum f \| 0u 0o\| max: 0.0000397396 avg: 0.0000123350 rms: 0.0000153791
	naive double \| 0u 0o\| max: 0.0000000000 avg: 0.0000000000 rms: 0.0000000000
	div & conq d \| 0u 0o\| max: 0.0000000000 avg: 0.0000000000 rms: 0.0000000000
	expsumlog d \| 0u 0o\| max: 0.0000000000 avg: 0.0000000000 rms: 0.0000000000
	d&c expsum d \| 0u 0o\| max: 0.0000000000 avg: 0.0000000000 rms: 0.0000000000


	#define COUNT (65536*4)
	#define WIDTH 0.3

	naive float \|10u 22o\| max: 0.0964692639 avg: 0.0027056422 rms: 0.0157422156
	div & conq f \| 0u 25o\| max: 0.0000254426 avg: 0.0000087956 rms: 0.0000109531
	expsumlog f \| 0u 0o\| max: 0.0003534482 avg: 0.0000743110 rms: 0.0001025451
	d&c expsum f \| 0u 0o\| max: 0.0000277140 avg: 0.0000088533 rms: 0.0000110160
	naive double \| 0u 0o\| max: 0.0000000000 avg: 0.0000000000 rms: 0.0000000000
	div & conq d \| 0u 0o\| max: 0.0000000000 avg: 0.0000000000 rms: 0.0000000000
	expsumlog d \| 0u 0o\| max: 0.0000000000 avg: 0.0000000000 rms: 0.0000000000
	d&c expsum d \| 0u 0o\| max: 0.0000000000 avg: 0.0000000000 rms: 0.0000000000

	*/


	#include <float.h>
	#define STB_DEFINE
	#include "stb.h"

	#define COUNT (65536*4) // number of numbers to compute product of; must be pow2
	#define WIDTH 0.1 // range of numbers, larger causes more overflows

	float numbers[COUNT];
	float helpers[COUNT];
	double dhelpers[COUNT];

	int main(int argc, char **argv)
	{
	int i,j,k;
	double sum[8] = { 0 };
	double sum_squared[8] = {0 };
	double worst[8] = { 0 };
	int overflow[8] = { 0 };
	int underflow[8] = { 0 };
	int valid[8] = { 0 };

	for (k=0; k < 100; ++k) {
	double result[8];
	double correct;
	float product;
	double dproduct;

	// generate random numbers that have a controlled running
	// product and that would cancel out to 1 in the end if the
	// individual numbers weren't rounded to floats.
	// constructed to make sure the divconq approach below
	// doesn't ever take advantage of that cancellation
	for (i=0; i < COUNT/2; ++i) {
	uint32 r = stb_rand() & 0x7fffffff;
	float t = r / (float) 0x7fffffff;
	numbers[i] = (float) exp(stb_lerp(t,-WIDTH,WIDTH));
	if (numbers[i] == 0) { --i; continue; }
	numbers[COUNT-1-i] = 1.0f / numbers[i];
	}

	// direct multiplication as doubles or floats
	dproduct = 1;
	product = 1;
	for (i=0; i < COUNT; ++i) {
	dproduct *= numbers[i];
	product *= numbers[i];
	}

	result[0] = product;
	result[4] = dproduct;

	// divide&conquer via powers of two
	for (i=0; i < COUNT; ++i) {
	helpers[i] = numbers[i];
	dhelpers[i] = numbers[i];
	}

	for (j=COUNT; j >= 2; j >>= 1) {
	for (i=0; i < j; i += 2) {
	helpers[i>>1] = helpers[i]*helpers[i+1];
	dhelpers[i>>1] = dhelpers[i]*dhelpers[i+1];
	}
	}

	result[1] = helpers[0];
	result[5] = dhelpers[0];

	// running sums of logs
	product = 0;
	dproduct = 0;
	for (i=0; i < COUNT; ++i) {
	product += (float) log(numbers[i]);
	dproduct += (float) log(numbers[i]);
	}
	result[2] = (float) exp(product);
	result[6] = exp(dproduct);

	// divide&conquer via powers of two of sums of logs
	for (i=0; i < COUNT; ++i) {
	helpers[i] = (float) log(numbers[i]);
	dhelpers[i] = (float) log(numbers[i]);
	}

	for (j=COUNT; j >= 2; j >>= 1) {
	for (i=0; i < j; i += 2) {
	helpers[i>>1] = helpers[i]+helpers[i+1];
	dhelpers[i>>1] = dhelpers[i]+dhelpers[i+1];
	}
	}

	result[3] = exp(helpers[0]);
	result[7] = (float) exp(dhelpers[0]);

	// assume divide-and-conquer on doubles is correct result
	correct = result[5];

	// unless it over/underflowed
	if (_isnan(correct) \|\| !_finite(correct) \|\| correct==0)
	correct = result[4];
	if (_isnan(correct) \|\| !_finite(correct) \|\| correct==0)
	correct = result[7];

	assert(!_isnan(correct) && _finite(correct) && correct != 0);

	for (i=0; i < 8; ++i) {
	if (_isnan(result[i]) \|\| !_finite(result[i]))
	++overflow[i];
	else if (result[i] < 0.0001)
	++underflow[i];
	else {
	double error = fabs(result[i] - correct);
	assert(!_isnan(error));
	assert(_finite(error));
	++valid[i];
	if (error > worst[i])
	worst[i] = error;
	sum[i] += error;
	sum_squared[i] += error * error;
	assert(!_isnan(sum[i]));
	assert(_finite(sum[i]));
	}
	}
	}

	#define STRINGIFY(x) STRINGIFY_(x)
	#define STRINGIFY_(x) #x

	printf("#define COUNT " STRINGIFY(COUNT) "\n");
	printf("#define WIDTH " STRINGIFY(WIDTH) "\n\n");

	for (i=0; i < 8; ++i) {
	char *name[8] = {
	"naive float",
	"div & conq f",
	"expsumlog f",
	"d&c expsum f",
	"naive double",
	"div & conq d",
	"expsumlog d",
	"d&c expsum d",
	};
	printf("%12s \|%2du %2do\| max: %12.10f avg: %12.10f rms: %12.10f\n",
	name[i], underflow[i], overflow[i], worst[i], sum[i]/valid[i], sqrt(sum_squared[i]/valid[i]));
	}

	return 0;
	}