The original code (~7.2s on my laptop).
import System.Random
import System.CPUTime
rainfall :: [Int] -> Int
rainfall xs = sum (zipWith (-) mins xs)
where
mins = zipWith min maxl maxr
maxl = scanl1 max xs
maxr = scanr1 max xs
main :: IO ()
main = do
g <- getStdGen
let hs = take 10000000 (randomRs (0, 200) g :: [Int])
startTime <- getCPUTime
let n = rainfall hs
putStrLn (show n)
finishTime <- getCPUTime
putStrLn (show (fromIntegral (finishTime - startTime) / 1000000000000))
The biggest problem with the original code is that the random number generation is being benchmarked (which is unlike the C# code).
We will fix it in a very crude way by printing the sum of the list, so that every element is evaluated.
This reduces the time to 3.3s.
import System.Random
import System.CPUTime
rainfall :: [Int] -> Int
rainfall xs = sum (zipWith (-) mins xs)
where
mins = zipWith min maxl maxr
maxl = scanl1 max xs
maxr = scanr1 max xs
main :: IO ()
main = do
g <- getStdGen
let hs = take 10000000 (randomRs (0, 200) g :: [Int])
print (sum hs)
startTime <- getCPUTime
let n = rainfall hs
putStrLn (show n)
finishTime <- getCPUTime
putStrLn (show (fromIntegral (finishTime - startTime) / 1000000000000))
Next, to make this somewhat fair we should use similar datastructures across all languages.
We switch the list to an unboxed vector. This mostly requires putting a few V.
prefixes on the existing code.
After doing this the runtime is ~0.1s.
import System.Random
import System.CPUTime
import Data.Vector.Unboxed (Vector)
import qualified Data.Vector.Unboxed as V
rainfall :: Vector Int -> Int
rainfall xs = V.sum (V.zipWith (-) mins xs)
where
mins = V.zipWith min maxl maxr
maxl = V.scanl1 max xs
maxr = V.scanr1 max xs
main :: IO ()
main = do
g <- getStdGen
let hs = V.fromList (take 10000000 (randomRs (0, 200) g :: [Int]))
print (V.sum hs)
startTime <- getCPUTime
let n = rainfall hs
putStrLn (show n)
finishTime <- getCPUTime
putStrLn (show (fromIntegral (finishTime - startTime) / 1000000000000))
We're also making an intermediate vector we aren't using so get rid of that.
(Doesn't seem to make any discernable difference to runtime but makes the next part a little clearer.)
import System.Random
import System.CPUTime
import Data.Vector.Unboxed (Vector)
import qualified Data.Vector.Unboxed as V
rainfall :: Vector Int -> Int
rainfall xs = V.sum (V.zipWith3 (\l r x -> (min l r) - x) maxl maxr xs)
where
maxl = V.scanl1 max xs
maxr = V.scanr1 max xs
main :: IO ()
main = do
g <- getStdGen
let hs = V.fromList (take 10000000 (randomRs (0, 200) g :: [Int]))
print (V.sum hs)
startTime <- getCPUTime
let n = rainfall hs
putStrLn (show n)
finishTime <- getCPUTime
putStrLn (show (fromIntegral (finishTime - startTime) / 1000000000000))
Now we can spark off the computation of maxl
and maxr
in parallel.
This reduces the runtime to ~0.07s. I think we're running into timing quanta at this point because the time is pretty repeatable.
import System.Random
import System.CPUTime
import Control.Parallel (par)
import Data.Vector.Unboxed (Vector)
import qualified Data.Vector.Unboxed as V
rainfall :: Vector Int -> Int
rainfall xs = maxl `par` maxr `par` V.sum (V.zipWith3 (\l r x -> (min l r) - x) maxl maxr xs)
where
maxl = V.scanl1 max xs
maxr = V.scanr1 max xs
main :: IO ()
main = do
g <- getStdGen
let hs = V.fromList (take 10000000 (randomRs (0, 200) g :: [Int]))
print (V.sum hs)
startTime <- getCPUTime
let n = rainfall hs
putStrLn (show n)
finishTime <- getCPUTime
putStrLn (show (fromIntegral (finishTime - startTime) / 1000000000000))
... so we should be using Criterion for benchmarking.
If we do that, we have:
import Control.Parallel (par)
import Criterion.Main
import Data.Vector.Unboxed (Vector)
import qualified Data.Vector.Unboxed as V
import System.Random
original_rainfall :: [Int] -> Int
original_rainfall xs = sum (zipWith (-) mins xs)
where
mins = zipWith min maxl maxr
maxl = scanl1 max xs
maxr = scanr1 max xs
rainfall :: Vector Int -> Int
rainfall xs = V.sum (V.zipWith3 (\l r x -> (min l r) - x) maxl maxr xs)
where
maxl = V.scanl1 max xs
maxr = V.scanr1 max xs
par_rainfall :: Vector Int -> Int
par_rainfall xs = maxl `par` maxr `par` V.sum (V.zipWith3 (\l r x -> (min l r) - x) maxl maxr xs)
where
maxl = V.scanl1 max xs
maxr = V.scanr1 max xs
makeList :: IO [Int]
makeList = fmap (take 10000000 . randomRs (0, 200)) getStdGen
main :: IO ()
main = do
defaultMain
[ env makeList (\hs -> bench "rainfall" (nf rainfall (V.fromList hs)))
, env makeList (\hs -> bench "par rainfall" (nf par_rainfall (V.fromList hs)))
, env makeList (\hs -> bench "original" (nf original_rainfall hs))
]
With the results:
benchmarking rainfall
time 109.2 ms (106.7 ms .. 112.0 ms)
0.999 R² (0.995 R² .. 1.000 R²)
mean 109.0 ms (107.8 ms .. 110.9 ms)
std dev 2.278 ms (943.4 us .. 2.976 ms)
benchmarking par rainfall
time 54.04 ms (51.66 ms .. 57.73 ms)
0.992 R² (0.983 R² .. 0.999 R²)
mean 54.31 ms (53.10 ms .. 56.17 ms)
std dev 2.739 ms (1.964 ms .. 3.796 ms)
variance introduced by outliers: 14% (moderately inflated)
benchmarking original
time 2.705 s (2.287 s .. 3.473 s)
0.991 R² (0.979 R² .. 1.000 R²)
mean 2.913 s (2.774 s .. 2.995 s)
std dev 125.6 ms (0.0 s .. 140.6 ms)
variance introduced by outliers: 19% (moderately inflated)
This shows the parallelism gives a nice improvement in runtime.
We can also use a strict scan, which again improves the times:
import Control.Parallel (par)
import Criterion.Main
import Data.Vector.Unboxed (Vector)
import qualified Data.Vector.Unboxed as V
import System.Random
original_rainfall :: [Int] -> Int
original_rainfall xs = sum (zipWith (-) mins xs)
where
mins = zipWith min maxl maxr
maxl = scanl1 max xs
maxr = scanr1 max xs
rainfall :: Vector Int -> Int
rainfall xs = V.sum (V.zipWith3 (\l r x -> (min l r) - x) maxl maxr xs)
where
maxl = V.scanl1 max xs
maxr = V.scanr1 max xs
par_rainfall :: Vector Int -> Int
par_rainfall xs = maxl `par` maxr `par` V.sum (V.zipWith3 (\l r x -> (min l r) - x) maxl maxr xs)
where
maxl = V.scanl1 max xs
maxr = V.scanr1 max xs
rainfall' :: Vector Int -> Int
rainfall' xs = V.sum (V.zipWith3 (\l r x -> (min l r) - x) maxl maxr xs)
where
maxl = V.scanl1' max xs
maxr = V.scanr1' max xs
par_rainfall' :: Vector Int -> Int
par_rainfall' xs = maxl `par` maxr `par` V.sum (V.zipWith3 (\l r x -> (min l r) - x) maxl maxr xs)
where
maxl = V.scanl1' max xs
maxr = V.scanr1' max xs
makeList :: IO [Int]
makeList = fmap (take 10000000 . randomRs (0, 200)) getStdGen
main :: IO ()
main = do
defaultMain
[ env makeList (\hs -> bench "rainfall" (nf rainfall (V.fromList hs)))
, env makeList (\hs -> bench "par rainfall" (nf par_rainfall (V.fromList hs)))
, env makeList (\hs -> bench "rainfall'" (nf rainfall' (V.fromList hs)))
, env makeList (\hs -> bench "par rainfall'" (nf par_rainfall' (V.fromList hs)))
, env makeList (\hs -> bench "original" (nf original_rainfall hs))
]
Output:
benchmarking rainfall
time 104.9 ms (103.9 ms .. 105.7 ms)
1.000 R² (1.000 R² .. 1.000 R²)
mean 107.0 ms (106.3 ms .. 107.9 ms)
std dev 1.232 ms (886.3 us .. 1.652 ms)
benchmarking par rainfall
time 52.11 ms (51.38 ms .. 53.00 ms)
1.000 R² (0.999 R² .. 1.000 R²)
mean 51.45 ms (51.10 ms .. 51.77 ms)
std dev 615.7 us (470.1 us .. 793.8 us)
benchmarking rainfall'
time 61.60 ms (60.70 ms .. 62.77 ms)
0.999 R² (0.999 R² .. 1.000 R²)
mean 61.38 ms (61.09 ms .. 61.86 ms)
std dev 651.5 us (405.4 us .. 852.0 us)
benchmarking par rainfall'
time 46.47 ms (39.22 ms .. 50.79 ms)
0.961 R² (0.903 R² .. 0.999 R²)
mean 63.87 ms (58.51 ms .. 72.60 ms)
std dev 12.68 ms (10.14 ms .. 16.46 ms)
variance introduced by outliers: 65% (severely inflated)
benchmarking original
time 3.515 s (2.165 s .. 4.630 s)
0.983 R² (0.964 R² .. 1.000 R²)
mean 3.210 s (2.808 s .. 3.412 s)
std dev 348.0 ms (543.9 as .. 350.5 ms)
variance introduced by outliers: 23% (moderately inflated)
Quite probably, the times are rather variable on my laptop. (You can see the variance is much higher than the sequential version so this could be tricking criterion's
time
calculation a bit.)