Skip to content

Instantly share code, notes, and snippets.

@michaelt
Created February 14, 2014 03:55
Show Gist options
  • Save michaelt/8995507 to your computer and use it in GitHub Desktop.
Save michaelt/8995507 to your computer and use it in GitHub Desktop.
utf8-bench
{-# LANGUAGE RankNTypes #-}
import Data.Conduit
import qualified Data.Conduit.Text as CT
import Criterion.Main
import qualified Data.Text as T
import qualified Data.Text.Lazy as TL
import qualified Data.Text.Lazy.Encoding as TLE
import qualified Data.ByteString as S
import qualified Data.ByteString.Lazy as L
import qualified Data.Conduit.List as CL
import Data.Text.StreamDecoding
import Data.Text.Encoding (decodeUtf8)
import qualified Pipes.Text as PT
import qualified Pipes as P
import Pipes (lift,next)
import Lens.Family (view)
import Control.Monad (void)
import Data.Functor.Identity
lengthT :: Monad m => Consumer T.Text m Int
lengthT = CL.fold (\x y -> x + T.length y) 0
main :: IO ()
main = do
bs <- S.readFile "utf8-bench.hs"
let bss = replicate 1000 bs
src = mapM_ yield bss
producer = P.each bss
lbs = L.fromChunks bss
defaultMain
[ -- bench "old conduit" $ whnf (\src' -> runException_ $ src' $$ OT.decode OT.utf8 =$ lengthT) src
bench "lazy text" $ whnf (TL.length . TLE.decodeUtf8) lbs
, bench "new conduit" $ whnf (\src' -> runException_ $ src' $$ CT.decode CT.utf8 =$ lengthT) src
, bench "stream" $ whnf calcLen bss
, bench "pipes" $ whnf (runIdentity . PT.length . void . view PT.decodeUtf8) producer
, bench "pipesnew" $ whnf (runIdentity . PT.length . void . decodePipe) producer
]
calcLen [] = 0
calcLen (bs0:bss0) =
loop (streamUtf8 bs0) bss0 0
where
loop (DecodeResultSuccess t next) bss total =
let total' = total + T.length t
in case bss of
[] -> total'
bs:bss' -> total' `seq` loop (next bs) bss' total'
decodePipe :: Monad m => P.Producer S.ByteString m r -> P.Producer T.Text m (P.Producer S.ByteString m r)
decodePipe = go streamUtf8 where
go dec0 p = do
x <- lift (next p)
case x of Left r -> return (return r)
Right (chunk, p') -> case dec0 chunk of
DecodeResultSuccess text dec -> do P.yield text
go dec p'
DecodeResultFailure text bs -> do P.yield text
return (do P.yield bs
p')
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment