timjb · February 7, 2015 01:35
diff --git a/MinRunBenchs.hs b/MinRunBenchs.hs
 {-# LANGUAGE BangPatterns #-}

 module Main (main) where

 import Criterion.Main
 import Data.Bits

 main :: IO ()
 main = defaultMain benchmarks
  where
    benchmarks =
      [ bgroup "minRun1"
        [ bench "1" $ whnf minRun1 1
        , bench "1k" $ whnf minRun1 1000
        , bench "1M" $ whnf minRun1 1000000
        ]
      , bgroup "minRun2"
        [ bench "1" $ whnf minRun2 1
        , bench "1k" $ whnf minRun2 1000
        , bench "1M" $ whnf minRun2 1000000
        ]
      ]


 -- | Computes the minimum run size for the sort. The goal is to choose a size
 -- such that there are almost if not exactly 2^n chunks of that size in the
 -- array.
 minRun1 :: Int -> Int
 minRun1 n0 = (n0 `unsafeShiftR` extra) + if (lowMask .&. n0) > 0 then 1 else 0
 where
 -- smear the bits down from the most significant bit
 !n1 = n0 .|. unsafeShiftR n0 1
 !n2 = n1 .|. unsafeShiftR n1 2
 !n3 = n2 .|. unsafeShiftR n2 4
 !n4 = n3 .|. unsafeShiftR n3 8
 !n5 = n4 .|. unsafeShiftR n4 16
 !n6 = n5 .|. unsafeShiftR n5 32

 -- mask for the bits lower than the 6 highest bits
 !lowMask = n6 `unsafeShiftR` 6

 !extra = popCount lowMask


 -- | Given `N`, this function calculates `minRun` in the range [32,65] s.t. in
 --     q, r = divmod(N, minRun)
 -- `q` is a power of 2 (or slightly less than a power of 2).
 minRun2 :: Int -> Int
 minRun2 = loop 0
  where
    -- Take the first 6 bits of `N` and add one if one of the remaining bits
    -- are set.
    loop !r n | n < 64 = r + n
    loop !r n = loop (r .|. (n .&. 1)) (n `shiftR` 1)
diff --git a/results.txt b/results.txt
 benchmarking minRun1/1
 time                 18.12 ns   (18.01 ns .. 18.24 ns)
                     0.999 R²   (0.999 R² .. 1.000 R²)
 mean                 18.10 ns   (17.91 ns .. 18.29 ns)
 std dev              646.5 ps   (476.5 ps .. 1.044 ns)
 variance introduced by outliers: 58% (severely inflated)

 benchmarking minRun1/1k
 time                 18.42 ns   (18.28 ns .. 18.59 ns)
                     0.999 R²   (0.999 R² .. 1.000 R²)
 mean                 18.51 ns   (18.34 ns .. 18.68 ns)
 std dev              582.4 ps   (471.9 ps .. 746.7 ps)
 variance introduced by outliers: 52% (severely inflated)

 benchmarking minRun1/1M
 time                 18.48 ns   (18.36 ns .. 18.60 ns)
                     0.999 R²   (0.999 R² .. 1.000 R²)
 mean                 18.62 ns   (18.47 ns .. 18.78 ns)
 std dev              531.6 ps   (413.2 ps .. 742.5 ps)
 variance introduced by outliers: 47% (moderately inflated)

 benchmarking minRun2/1
 time                 10.98 ns   (10.90 ns .. 11.04 ns)
                     1.000 R²   (0.999 R² .. 1.000 R²)
 mean                 11.02 ns   (10.94 ns .. 11.16 ns)
 std dev              370.5 ps   (232.5 ps .. 647.8 ps)
 variance introduced by outliers: 56% (severely inflated)

 benchmarking minRun2/1k
 time                 15.75 ns   (15.64 ns .. 15.85 ns)
                     0.999 R²   (0.999 R² .. 1.000 R²)
 mean                 15.79 ns   (15.64 ns .. 15.97 ns)
 std dev              553.7 ps   (434.6 ps .. 756.8 ps)
 variance introduced by outliers: 57% (severely inflated)

 benchmarking minRun2/1M
 time                 28.46 ns   (28.18 ns .. 28.79 ns)
                     0.999 R²   (0.998 R² .. 0.999 R²)
 mean                 28.49 ns   (28.17 ns .. 28.95 ns)
 std dev              1.233 ns   (973.5 ps .. 1.584 ns)
 variance introduced by outliers: 66% (severely inflated)
	{-# LANGUAGE BangPatterns #-}

	module Main (main) where

	import Criterion.Main
	import Data.Bits

	main :: IO ()
	main = defaultMain benchmarks
	where
	benchmarks =
	[ bgroup "minRun1"
	[ bench "1" $ whnf minRun1 1
	, bench "1k" $ whnf minRun1 1000
	, bench "1M" $ whnf minRun1 1000000
	]
	, bgroup "minRun2"
	[ bench "1" $ whnf minRun2 1
	, bench "1k" $ whnf minRun2 1000
	, bench "1M" $ whnf minRun2 1000000
	]
	]


	-- \| Computes the minimum run size for the sort. The goal is to choose a size
	-- such that there are almost if not exactly 2^n chunks of that size in the
	-- array.
	minRun1 :: Int -> Int
	minRun1 n0 = (n0 `unsafeShiftR` extra) + if (lowMask .&. n0) > 0 then 1 else 0
	where
	-- smear the bits down from the most significant bit
	!n1 = n0 .\|. unsafeShiftR n0 1
	!n2 = n1 .\|. unsafeShiftR n1 2
	!n3 = n2 .\|. unsafeShiftR n2 4
	!n4 = n3 .\|. unsafeShiftR n3 8
	!n5 = n4 .\|. unsafeShiftR n4 16
	!n6 = n5 .\|. unsafeShiftR n5 32

	-- mask for the bits lower than the 6 highest bits
	!lowMask = n6 `unsafeShiftR` 6

	!extra = popCount lowMask


	-- \| Given `N`, this function calculates `minRun` in the range [32,65] s.t. in
	-- q, r = divmod(N, minRun)
	-- `q` is a power of 2 (or slightly less than a power of 2).
	minRun2 :: Int -> Int
	minRun2 = loop 0
	where
	-- Take the first 6 bits of `N` and add one if one of the remaining bits
	-- are set.
	loop !r n \| n < 64 = r + n
	loop !r n = loop (r .\|. (n .&. 1)) (n `shiftR` 1)
	benchmarking minRun1/1
	time 18.12 ns (18.01 ns .. 18.24 ns)
	0.999 R² (0.999 R² .. 1.000 R²)
	mean 18.10 ns (17.91 ns .. 18.29 ns)
	std dev 646.5 ps (476.5 ps .. 1.044 ns)
	variance introduced by outliers: 58% (severely inflated)

	benchmarking minRun1/1k
	time 18.42 ns (18.28 ns .. 18.59 ns)
	0.999 R² (0.999 R² .. 1.000 R²)
	mean 18.51 ns (18.34 ns .. 18.68 ns)
	std dev 582.4 ps (471.9 ps .. 746.7 ps)
	variance introduced by outliers: 52% (severely inflated)

	benchmarking minRun1/1M
	time 18.48 ns (18.36 ns .. 18.60 ns)
	0.999 R² (0.999 R² .. 1.000 R²)
	mean 18.62 ns (18.47 ns .. 18.78 ns)
	std dev 531.6 ps (413.2 ps .. 742.5 ps)
	variance introduced by outliers: 47% (moderately inflated)

	benchmarking minRun2/1
	time 10.98 ns (10.90 ns .. 11.04 ns)
	1.000 R² (0.999 R² .. 1.000 R²)
	mean 11.02 ns (10.94 ns .. 11.16 ns)
	std dev 370.5 ps (232.5 ps .. 647.8 ps)
	variance introduced by outliers: 56% (severely inflated)

	benchmarking minRun2/1k
	time 15.75 ns (15.64 ns .. 15.85 ns)
	0.999 R² (0.999 R² .. 1.000 R²)
	mean 15.79 ns (15.64 ns .. 15.97 ns)
	std dev 553.7 ps (434.6 ps .. 756.8 ps)
	variance introduced by outliers: 57% (severely inflated)

	benchmarking minRun2/1M
	time 28.46 ns (28.18 ns .. 28.79 ns)
	0.999 R² (0.998 R² .. 0.999 R²)
	mean 28.49 ns (28.17 ns .. 28.95 ns)
	std dev 1.233 ns (973.5 ps .. 1.584 ns)
	variance introduced by outliers: 66% (severely inflated)