Created
December 18, 2020 18:31
-
-
Save noughtmare/f2478b9ea7a466d33b3f0185dc51f0dd to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
{-# LANGUAGE BangPatterns #-} | |
module Main where | |
import qualified Data.ByteString as S -- S for strict (hmm...) | |
import qualified Data.ByteString.Internal as S | |
import qualified Data.ByteString.Unsafe as S | |
import qualified Data.ByteString.Lazy.Internal as L | |
import Foreign | |
import Gauge.Main | |
import Data.ByteString.Lazy.Char8 as BL | |
import Data.ByteString.Char8 as B | |
import GHC.Base (unsafeChr) | |
myDropWhileInline :: (Char -> Bool) -> BL.ByteString -> BL.ByteString | |
myDropWhileInline f cs0 = dropWhile' cs0 | |
where dropWhile' L.Empty = L.Empty | |
dropWhile' (L.Chunk c cs) = | |
case findIndexOrEnd (not . f . unsafeChr . fromIntegral) c of | |
n | n < S.length c -> L.Chunk (S.unsafeDrop n c) cs | |
| otherwise -> dropWhile' cs | |
{-# INLINE myDropWhileInline #-} | |
myDropWhileNoInline :: (Char -> Bool) -> BL.ByteString -> BL.ByteString | |
myDropWhileNoInline f cs0 = dropWhile' cs0 | |
where dropWhile' L.Empty = L.Empty | |
dropWhile' (L.Chunk c cs) = | |
case findIndexOrEnd (not . f . unsafeChr . fromIntegral) c of | |
n | n < S.length c -> L.Chunk (S.unsafeDrop n c) cs | |
| otherwise -> dropWhile' cs | |
{-# NOINLINE myDropWhileNoInline #-} | |
findIndexOrEnd :: (Word8 -> Bool) -> B.ByteString -> Int | |
findIndexOrEnd k (S.BS x l) = | |
S.accursedUnutterablePerformIO $ withForeignPtr x g | |
where | |
g ptr = go 0 where | |
go !n | n >= l = return l | |
| otherwise = do w <- peek $ ptr `plusPtr` n | |
if k w | |
then return n | |
else go (n+1) | |
{-# INLINE findIndexOrEnd #-} | |
myFindBytInline :: BL.ByteString -> BL.ByteString | |
myFindBytInline bs = myDropWhileInline (/= ' ') bs | |
myFindByt :: BL.ByteString -> BL.ByteString | |
myFindByt bs = myDropWhileNoInline (/= ' ') bs | |
findBytL :: BL.ByteString -> BL.ByteString | |
findBytL bs = BL.dropWhile (/= ' ') bs | |
findByt :: B.ByteString -> B.ByteString | |
findByt bs = B.dropWhile (/= ' ') bs | |
main :: IO () | |
main = defaultMain | |
[ bench "lazy - custom dropWhile - inline" $ nfIO | |
$ myFindBytInline <$> BL.readFile "test.txt" | |
, bench "lazy - custom dropWhile - no inline" $ nfIO | |
$ myFindByt <$> BL.readFile "test.txt" | |
, bench "lazy" $ nfIO | |
$ findBytL <$> BL.readFile "test.txt" | |
, bench "strict" $ nfIO | |
$ findByt <$> B.readFile "test.txt" | |
] |
Results:
benchmarking lazy - custom dropWhile - inline ... took 19.74 s, total 56 iterations
benchmarked lazy - custom dropWhile - inline
time 357.5 ms (355.8 ms .. 358.6 ms)
1.000 R² (1.000 R² .. 1.000 R²)
mean 359.5 ms (358.6 ms .. 360.8 ms)
std dev 1.805 ms (811.2 μs .. 2.834 ms)
benchmarking lazy - custom dropWhile - no inline ... took 59.03 s, total 56 iterations
benchmarked lazy - custom dropWhile - no inline
time 1.073 s (1.070 s .. 1.075 s)
1.000 R² (1.000 R² .. 1.000 R²)
mean 1.073 s (1.072 s .. 1.075 s)
std dev 1.701 ms (911.8 μs .. 2.607 ms)
benchmarking lazy ... took 81.56 s, total 56 iterations
benchmarked lazy
time 1.486 s (1.482 s .. 1.491 s)
1.000 R² (1.000 R² .. 1.000 R²)
mean 1.482 s (1.480 s .. 1.484 s)
std dev 3.951 ms (2.876 ms .. 5.626 ms)
benchmarking strict ... took 28.26 s, total 56 iterations
benchmarked strict
time 492.1 ms (490.3 ms .. 494.0 ms)
1.000 R² (1.000 R² .. 1.000 R²)
mean 522.6 ms (510.7 ms .. 547.5 ms)
std dev 28.34 ms (12.14 ms .. 45.58 ms)
variance introduced by outliers: 18% (moderately inflated)
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
Generate test.txt with
python -c "a = 'aaa' * 100000000; print(a + ' ' + a)" > test.txt
.