robstewart57 · May 16, 2017 10:46
diff --git a/mean-space-thoughts.txt b/mean-space-thoughts.txt
 consider this Haskell:

 import Prelude hiding (sum,length)

 mean_good :: [Int] -> Double
 mean_good xs = go 0 0 xs
  where
    go !accum !denom [] =
      fromIntegral accum / fromIntegral denom
    go !accum !denom (!x:xs) =
      go (accum+x) (denom+1) xs

 sum xs = foldr (+) 0 xs

 length xs = foldr (const (+1)) 0 xs

 mean_naive :: [Int] -> Double
 mean_naive xs = fromIntegral (sum xs)
                / fromIntegral (length xs)


 Observations:

 Haskell reductions
 ~~~~~~~~~~~~~~~~~~

 mean_good [1,2,3]
 => go 0 0 xs
 => go 0 0 (1:xs)
 => go 1 1 (2:xs)
 => go 3 2 (3:xs)
 => go 6 3 []
 => 6 / 3
 => 2

 -- accumulator then denominator
 mean_naive [1,2,3]
 => sum xs / length xs
 => foldr (+) 0 xs / length xs
 => (1 + (foldr (+) 0 (2:ys))) / length xs
 => (1 + (2 + (foldr (+) 0 (3:ys))) / length xs
 => (1 + (2 + (3 (foldr (+) 0 [])))) / length xs
 => 6 / foldr (const (+1)) 0 (1:ys)
 => 6 / (1 + (foldr (const (+1)) 0 (2:ys)))
 => 6 / (1 + (1 + (foldr (const (+1)) 0 (3:ys))))
 => 6 / (1 + (1 + (1 + foldr (const (+1) 0 []))))
 => 6 / 3
 => 2

 -- interleave the reduction of accumulator and denominator
 mean_naive [1,2,3]
 => sum xs / length xs
 => foldr (+) 0 xs / length xs
 => (1 + (foldr (+) 0 (2:ys))) / length xs
 => (1 + (foldr (+) 0 (2:ys))) / foldr (const (+1)) 0 (1:zs)
 =>
 => -- GC the head of xs (before the tail [2,3] is manifested in the heap)
 => (1 + (2 + (foldr (+) 0 (3:ys))) / foldr (const (+1)) 0 (1:zs)
 => (1 + (2 + (foldr (+) 0 (3:ys))) / (1 + (foldr (const (+1)) 0 (2:ys)))
 =>
 => -- GC the head of `tail xs` (before the tail [3] is manifested in the heap)
 => (1 + (2 + (3 (foldr (+) 0 [])))) / (1 + (foldr (const (+1)) 0 (2:ys)))
 => (1 + (2 + (3 (foldr (+) 0 [])))) / (1 + (1 + (foldr (const (+1)) 0 (3:ys))))
 => (1 + (2 + (3 (foldr (+) 0 [])))) / (1 + (1 + (1 + foldr (const (+1) 0 []))))
 => 6 / (1 + (1 + (1 + foldr (const (+1) 0 []))))
 => 6 / 3
 => 2

 Dataflow analysis
 ~~~~~~~~~~~~~~~~~

 Can dataflow analysis help recover the bad space performance of mean_naive
 back to the space performance of mean_good?

                +-----+
          /---->| sum |-----\
          |     +-----+     |  10   +------+
 [4,3,2,1] |                 \------>|      |  2.5
 ----------+                         | mean |------->
          |                 /------>|      |
          |    +--------+   |   4   +------+
          \--->| length |---/
               +--------+

 The rate at which `sum` and `length` consume the list is the same.

 If `sum` consumes the list entirely before `length` consumes the same list,
 then the list has to stay in the heap for this time. In the dataflow model,
 this would equate the "depth" of the wire to carry [1,2,3,4] to `length` is 4.

 The "depth" of a dataflow wire here would equate to the object size on the heap.

 Dataflow analysis of `sum` and `length` would see that both functions consume
 from their input and discard the head of the the input at the same rate. The
 lazy population of their input wires could therefore result in the wire "depth"
 for their inputs could be reduced to 1, eliminating the memory space cost of
 `mean_naive` -- the space cost of `mean_naive`, when the dataflow wires have
 depth=1, is constant regardless of the input list size.

 -[1]-> sum    (folded state=1)
 -[1]-> length (folded state=1)
 -[2]-> sum    (folded state=3)
 -[2]-> length (folded state=2)
 -[3]-> sum    (folded state=6)
 -[3]-> length (folded state=3)
 -[4]-> sum    (folded state=10)
 -[4]-> length (folded state=4)
	consider this Haskell:

	import Prelude hiding (sum,length)

	mean_good :: [Int] -> Double
	mean_good xs = go 0 0 xs
	where
	go !accum !denom [] =
	fromIntegral accum / fromIntegral denom
	go !accum !denom (!x:xs) =
	go (accum+x) (denom+1) xs

	sum xs = foldr (+) 0 xs

	length xs = foldr (const (+1)) 0 xs

	mean_naive :: [Int] -> Double
	mean_naive xs = fromIntegral (sum xs)
	/ fromIntegral (length xs)


	Observations:

	Haskell reductions
	~~~~~~~~~~~~~~~~~~

	mean_good [1,2,3]
	=> go 0 0 xs
	=> go 0 0 (1:xs)
	=> go 1 1 (2:xs)
	=> go 3 2 (3:xs)
	=> go 6 3 []
	=> 6 / 3
	=> 2

	-- accumulator then denominator
	mean_naive [1,2,3]
	=> sum xs / length xs
	=> foldr (+) 0 xs / length xs
	=> (1 + (foldr (+) 0 (2:ys))) / length xs
	=> (1 + (2 + (foldr (+) 0 (3:ys))) / length xs
	=> (1 + (2 + (3 (foldr (+) 0 [])))) / length xs
	=> 6 / foldr (const (+1)) 0 (1:ys)
	=> 6 / (1 + (foldr (const (+1)) 0 (2:ys)))
	=> 6 / (1 + (1 + (foldr (const (+1)) 0 (3:ys))))
	=> 6 / (1 + (1 + (1 + foldr (const (+1) 0 []))))
	=> 6 / 3
	=> 2

	-- interleave the reduction of accumulator and denominator
	mean_naive [1,2,3]
	=> sum xs / length xs
	=> foldr (+) 0 xs / length xs
	=> (1 + (foldr (+) 0 (2:ys))) / length xs
	=> (1 + (foldr (+) 0 (2:ys))) / foldr (const (+1)) 0 (1:zs)
	=>
	=> -- GC the head of xs (before the tail [2,3] is manifested in the heap)
	=> (1 + (2 + (foldr (+) 0 (3:ys))) / foldr (const (+1)) 0 (1:zs)
	=> (1 + (2 + (foldr (+) 0 (3:ys))) / (1 + (foldr (const (+1)) 0 (2:ys)))
	=>
	=> -- GC the head of `tail xs` (before the tail [3] is manifested in the heap)
	=> (1 + (2 + (3 (foldr (+) 0 [])))) / (1 + (foldr (const (+1)) 0 (2:ys)))
	=> (1 + (2 + (3 (foldr (+) 0 [])))) / (1 + (1 + (foldr (const (+1)) 0 (3:ys))))
	=> (1 + (2 + (3 (foldr (+) 0 [])))) / (1 + (1 + (1 + foldr (const (+1) 0 []))))
	=> 6 / (1 + (1 + (1 + foldr (const (+1) 0 []))))
	=> 6 / 3
	=> 2

	Dataflow analysis
	~~~~~~~~~~~~~~~~~

	Can dataflow analysis help recover the bad space performance of mean_naive
	back to the space performance of mean_good?

	+-----+
	/---->\| sum \|-----\
	\| +-----+ \| 10 +------+
	[4,3,2,1] \| \------>\| \| 2.5
	----------+ \| mean \|------->
	\| /------>\| \|
	\| +--------+ \| 4 +------+
	\--->\| length \|---/
	+--------+

	The rate at which `sum` and `length` consume the list is the same.

	If `sum` consumes the list entirely before `length` consumes the same list,
	then the list has to stay in the heap for this time. In the dataflow model,
	this would equate the "depth" of the wire to carry [1,2,3,4] to `length` is 4.

	The "depth" of a dataflow wire here would equate to the object size on the heap.

	Dataflow analysis of `sum` and `length` would see that both functions consume
	from their input and discard the head of the the input at the same rate. The
	lazy population of their input wires could therefore result in the wire "depth"
	for their inputs could be reduced to 1, eliminating the memory space cost of
	`mean_naive` -- the space cost of `mean_naive`, when the dataflow wires have
	depth=1, is constant regardless of the input list size.

	-[1]-> sum (folded state=1)
	-[1]-> length (folded state=1)
	-[2]-> sum (folded state=3)
	-[2]-> length (folded state=2)
	-[3]-> sum (folded state=6)
	-[3]-> length (folded state=3)
	-[4]-> sum (folded state=10)
	-[4]-> length (folded state=4)