Created
February 14, 2014 03:55
-
-
Save michaelt/8995507 to your computer and use it in GitHub Desktop.
utf8-bench
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
{-# LANGUAGE RankNTypes #-} | |
import Data.Conduit | |
import qualified Data.Conduit.Text as CT | |
import Criterion.Main | |
import qualified Data.Text as T | |
import qualified Data.Text.Lazy as TL | |
import qualified Data.Text.Lazy.Encoding as TLE | |
import qualified Data.ByteString as S | |
import qualified Data.ByteString.Lazy as L | |
import qualified Data.Conduit.List as CL | |
import Data.Text.StreamDecoding | |
import Data.Text.Encoding (decodeUtf8) | |
import qualified Pipes.Text as PT | |
import qualified Pipes as P | |
import Pipes (lift,next) | |
import Lens.Family (view) | |
import Control.Monad (void) | |
import Data.Functor.Identity | |
lengthT :: Monad m => Consumer T.Text m Int | |
lengthT = CL.fold (\x y -> x + T.length y) 0 | |
main :: IO () | |
main = do | |
bs <- S.readFile "utf8-bench.hs" | |
let bss = replicate 1000 bs | |
src = mapM_ yield bss | |
producer = P.each bss | |
lbs = L.fromChunks bss | |
defaultMain | |
[ -- bench "old conduit" $ whnf (\src' -> runException_ $ src' $$ OT.decode OT.utf8 =$ lengthT) src | |
bench "lazy text" $ whnf (TL.length . TLE.decodeUtf8) lbs | |
, bench "new conduit" $ whnf (\src' -> runException_ $ src' $$ CT.decode CT.utf8 =$ lengthT) src | |
, bench "stream" $ whnf calcLen bss | |
, bench "pipes" $ whnf (runIdentity . PT.length . void . view PT.decodeUtf8) producer | |
, bench "pipesnew" $ whnf (runIdentity . PT.length . void . decodePipe) producer | |
] | |
calcLen [] = 0 | |
calcLen (bs0:bss0) = | |
loop (streamUtf8 bs0) bss0 0 | |
where | |
loop (DecodeResultSuccess t next) bss total = | |
let total' = total + T.length t | |
in case bss of | |
[] -> total' | |
bs:bss' -> total' `seq` loop (next bs) bss' total' | |
decodePipe :: Monad m => P.Producer S.ByteString m r -> P.Producer T.Text m (P.Producer S.ByteString m r) | |
decodePipe = go streamUtf8 where | |
go dec0 p = do | |
x <- lift (next p) | |
case x of Left r -> return (return r) | |
Right (chunk, p') -> case dec0 chunk of | |
DecodeResultSuccess text dec -> do P.yield text | |
go dec p' | |
DecodeResultFailure text bs -> do P.yield text | |
return (do P.yield bs | |
p') | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment