chrisdone · November 9, 2019 12:35
diff --git a/README.md b/README.md
diff --git a/Zepto_based_lexer.hs b/Zepto_based_lexer.hs
 {-# OPTIONS_GHC -Wall #-}
 {-# LANGUAGE DeriveGeneric #-}
 {-# LANGUAGE BangPatterns #-}
 {-# LANGUAGE NamedFieldPuns #-}
 {-# LANGUAGE GeneralizedNewtypeDeriving #-}

 module Main (main) where

 import           Control.Applicative
 import           Control.DeepSeq
 import           Control.Exception
 import           Control.Monad.State.Strict
 import           Data.ByteString (ByteString)
 import qualified Data.ByteString as S
 import qualified Data.ByteString.Char8 as S8
 import           Data.Functor.Identity
 import           Data.Maybe
 import           Data.Word
 import           GHC.Generics
 import           Prelude hiding (dropWhile)
 import           System.Environment
 import           Zepto

 data Point =
  Point
    { line :: !Int
    , column :: !Int
    , indentation :: !Int
    }
  deriving (Show, Eq, Generic)
 instance NFData Point
 newtype P m a = P (ZeptoT (StateT Point m) a)
  deriving (Functor, Applicative, Monad, Alternative, MonadIO)

 instance MonadTrans P where
   lift m = P (lift (lift m))
   {-# INLINE lift #-}

 data Token =
  Token
    { byteString :: {-# UNPACK  #-}!ByteString
    , start :: {-# UNPACK #-}!Point
    , end :: {-# UNPACK #-}!Point
    } deriving (Show, Generic)
 instance NFData Token
 takeToken :: Monad m => (Word8 -> Bool) -> P m Token
 takeToken p = do
  start@(Point {line}) <- P (lift get)
  byteString <- P (Zepto.takeWhile p)
  let !newlines = S8.count '\n' byteString
      !lastLine =
        if newlines == 0
          then byteString
          else fromMaybe
                 byteString
                 (fmap
                    (flip S.drop byteString . (+ 1))
                    (S8.elemIndexEnd '\n' byteString))
      !indentation = S.length (S.takeWhile (== 32) lastLine)
      !column = S.length lastLine
      !end = start {line = line + newlines, column, indentation}
  P (lift (put end))
  pure (Token {start, end, byteString})
 {-# INLINE takeToken #-}

 dropWhile :: Monad m => (Word8 -> Bool) -> P m ()
 dropWhile p = void (P (Zepto.takeWhile p))
 {-# INLINE dropWhile #-}

 run :: Monad m =>  P m a -> ByteString -> m (Either String a)
 run (P m) i =
  evalStateT (parseT m i) (Point {line = 1, column = 1, indentation = 0})
 {-# INLINE run #-}

 simple :: P IO ()
 simple = do
  word *> spaces
  end <- P atEnd
  unless end simple

 simple_ :: Monad m => P m ()
 simple_ = do
  (_, end)<- couple
  unless end simple_

 simple_count :: P (State Int) ()
 simple_count = do
  (Token { start = Point {line, column, indentation}
         , end = Point { line = line1
                       , column = column2
                       , indentation = indentation2
                       }
         }, end) <- couple
  lift
    (modify
       (+ (line + column + indentation + line1 + column2 +indentation2
          )))
  unless end simple_count


 couple :: Monad m => P m (Token, Bool)
 couple = do
  token<- word' <* spaces
  end <- P atEnd
  pure (token, end)
 {-# INLINE couple #-}

 word :: P IO ()
 word = do
  token <- (takeToken (not . isSpace8))
  liftIO (print token)

 word' :: Monad m => P m Token
 word' = do
  !w <- takeToken (not . isSpace8)
  pure w

 spaces :: Monad m => P m ()
 spaces = dropWhile isSpace8
 {-# INLINE spaces #-}

 isSpace8 :: (Eq a, Num a) => a -> Bool
 isSpace8 c = c==13 || c==32 || c==10

 main :: IO ()
 main = do
  fp:mode:_ <- getArgs
  case mode of
    "print" -> do
      S.readFile fp >>= void . run simple
    "silent" -> do
      void (S.readFile fp >>= evaluate . runIdentity . run simple_)
    "count" -> do
      void (S.readFile fp >>= print . flip execState 0 . run simple_count)
    _ -> pure ()

 -- With only evaluate

  -- 6,655,456 bytes allocated in the heap
  --    11,184 bytes copied during GC
  --    46,720 bytes maximum residency (1 sample(s))
  --    31,104 bytes maximum slop
  --         9 MB total memory in use (0 MB lost due to fragmentation)

 -- With simple_

  --      6,655,592 bytes allocated in the heap
  --         11,184 bytes copied during GC
  --         46,720 bytes maximum residency (1 sample(s))
  --         31,104 bytes maximum slop
  --              9 MB total memory in use (0 MB lost due to fragmentation)

  --                                    Tot time (elapsed)  Avg pause  Max pause
  -- Gen  0         1 colls,     0 par    0.000s   0.000s     0.0000s    0.0000s
  -- Gen  1         1 colls,     0 par    0.000s   0.000s     0.0001s    0.0001s

  -- INIT    time    0.000s  (  0.000s elapsed)
  -- MUT     time    0.027s  (  0.030s elapsed)
  -- GC      time    0.000s  (  0.000s elapsed)
  -- EXIT    time    0.000s  (  0.000s elapsed)
  -- Total   time    0.027s  (  0.031s elapsed)

  -- %GC     time       0.3%  (0.3% elapsed)

  -- Alloc rate    250,210,225 bytes per MUT second

  -- Productivity  99.4% of total user, 99.5% of total elapsed
diff --git a/Zepto_with_MonadTrans_instance.hs b/Zepto_with_MonadTrans_instance.hs
 {-# LANGUAGE CPP #-}
 #if __GLASGOW_HASKELL__ >= 702
 {-# LANGUAGE Trustworthy #-} -- Data.ByteString.Unsafe
 #endif
 {-# LANGUAGE BangPatterns #-}

 -- |
 -- Module      :  Data.Attoparsec.Zepto
 -- Copyright   :  Bryan O'Sullivan 2007-2015
 -- License     :  BSD3
 --
 -- Maintainer  :  [email protected]
 -- Stability   :  experimental
 -- Portability :  unknown
 --
 -- A tiny, highly specialized combinator parser for 'B.ByteString'
 -- strings.
 --
 -- While the main attoparsec module generally performs well, this
 -- module is particularly fast for simple non-recursive loops that
 -- should not normally result in failed parses.
 --
 -- /Warning/: on more complex inputs involving recursion or failure,
 -- parsers based on this module may be as much as /ten times slower/
 -- than regular attoparsec! You should /only/ use this module when you
 -- have benchmarks that prove that its use speeds your code up.
 module Zepto
    (
      Parser
    , ZeptoT
    , parse
    , parseT
    , atEnd
    , string
    , take
    , takeWhile
    , gets
    ) where

 import           Control.Applicative
 import           Control.Monad (MonadPlus(..), ap)
 import qualified Control.Monad.Fail as Fail
 import           Control.Monad.IO.Class (MonadIO(..))
 import           Control.Monad.Trans
 import           Data.ByteString (ByteString)
 import qualified Data.ByteString as B
 import qualified Data.ByteString.Unsafe as B
 import           Data.Functor.Identity (Identity(runIdentity))
 import           Data.Monoid as Mon (Monoid(..))
 import           Data.Semigroup (Semigroup(..))
 import           Data.Word (Word8)
 import           Prelude hiding (take, takeWhile)

 newtype S = S {
      input :: ByteString
    }

 data Result a = Fail String
              | OK !a S

 -- | A simple parser.
 --
 -- This monad is strict in its state, and the monadic bind operator
 -- ('>>=') evaluates each result to weak head normal form before
 -- passing it along.
 newtype ZeptoT m a = Parser {
      runParser :: S -> m (Result a)
    }

 type Parser a = ZeptoT Identity a

 instance MonadTrans ZeptoT where
  lift m = Parser (\s -> fmap (\a -> OK a s) m)
  {-# INLINE lift #-}

 instance Monad m => Functor (ZeptoT m) where
    fmap f m = Parser $ \s -> do
      result <- runParser m s
      case result of
        OK a s'  -> return (OK (f a) s')
        Fail err -> return (Fail err)
    {-# INLINE fmap #-}

 instance MonadIO m => MonadIO (ZeptoT m) where
  liftIO act = Parser $ \s -> do
    result <- liftIO act
    return (OK result s)
  {-# INLINE liftIO #-}

 instance Monad m => Monad (ZeptoT m) where
    return = pure
    {-# INLINE return #-}

    m >>= k   = Parser $ \(s) -> do
      result <- runParser m s
      case result of
        OK a s'  -> runParser (k a) s'
        Fail err -> return (Fail err)
    {-# INLINE (>>=) #-}

 #if !(MIN_VERSION_base(4,13,0))
    fail = Fail.fail
    {-# INLINE fail #-}
 #endif

 instance Monad m => Fail.MonadFail (ZeptoT m) where
    fail msg = Parser $ \_ -> return (Fail msg)
    {-# INLINE fail #-}

 instance Monad m => MonadPlus (ZeptoT m) where
    mzero = fail "mzero"
    {-# INLINE mzero #-}

    mplus a b = Parser $ \s -> do
      result <- runParser a s
      case result of
        ok@(OK _ _) -> return ok
        _           -> runParser b s
    {-# INLINE mplus #-}

 instance (Monad m) => Applicative (ZeptoT m) where
    pure a = Parser $ \s -> return (OK a s)
    {-# INLINE pure #-}
    (<*>)  = ap
    {-# INLINE (<*>) #-}

 gets :: Monad m => (S -> a) -> ZeptoT m a
 gets f = Parser $ \s -> return (OK (f s) s)
 {-# INLINE gets #-}

 put :: Monad m => S -> ZeptoT m ()
 put s = Parser $ \_ -> return (OK () s)
 {-# INLINE put #-}

 -- | Run a parser.
 parse :: Parser a -> ByteString -> Either String a
 parse p bs = case runIdentity (runParser p (S bs)) of
               (OK a _)   -> Right a
               (Fail err) -> Left err
 {-# INLINE parse #-}

 -- | Run a parser on top of the given base monad.
 parseT :: Monad m => ZeptoT m a -> ByteString -> m (Either String a)
 parseT p bs = do
  result <- runParser p (S bs)
  case result of
    OK a _   -> return (Right a)
    Fail err -> return (Left err)
 {-# INLINE parseT #-}

 instance Monad m => Semigroup (ZeptoT m a) where
    (<>) = mplus
    {-# INLINE (<>) #-}

 instance Monad m => Mon.Monoid (ZeptoT m a) where
    mempty  = fail "mempty"
    {-# INLINE mempty #-}
    mappend = (<>)
    {-# INLINE mappend #-}

 instance Monad m => Alternative (ZeptoT m) where
    empty = fail "empty"
    {-# INLINE empty #-}
    (<|>) = mplus
    {-# INLINE (<|>) #-}

 -- | Consume input while the predicate returns 'True'.
 takeWhile :: Monad m => (Word8 -> Bool) -> ZeptoT m ByteString
 takeWhile p = do
  (h,t) <- gets (B.span p . input)
  put (S t)
  return h
 {-# INLINE takeWhile #-}

 -- | Consume @n@ bytes of input.
 take :: Monad m => Int -> ZeptoT m ByteString
 take !n = do
  s <- gets input
  if B.length s >= n
    then put (S (B.unsafeDrop n s)) >> return (B.unsafeTake n s)
    else fail "insufficient input"
 {-# INLINE take #-}

 -- | Match a string exactly.
 string :: Monad m => ByteString -> ZeptoT m ()
 string s = do
  i <- gets input
  if s `B.isPrefixOf` i
    then put (S (B.unsafeDrop (B.length s) i)) >> return ()
    else fail "string"
 {-# INLINE string #-}

 -- | Indicate whether the end of the input has been reached.
 atEnd :: Monad m => ZeptoT m Bool
 atEnd = do
  i <- gets input
  return $! B.null i
 {-# INLINE atEnd #-}
	{-# OPTIONS_GHC -Wall #-}
	{-# LANGUAGE DeriveGeneric #-}
	{-# LANGUAGE BangPatterns #-}
	{-# LANGUAGE NamedFieldPuns #-}
	{-# LANGUAGE GeneralizedNewtypeDeriving #-}

	module Main (main) where

	import Control.Applicative
	import Control.DeepSeq
	import Control.Exception
	import Control.Monad.State.Strict
	import Data.ByteString (ByteString)
	import qualified Data.ByteString as S
	import qualified Data.ByteString.Char8 as S8
	import Data.Functor.Identity
	import Data.Maybe
	import Data.Word
	import GHC.Generics
	import Prelude hiding (dropWhile)
	import System.Environment
	import Zepto

	data Point =
	Point
	{ line :: !Int
	, column :: !Int
	, indentation :: !Int
	}
	deriving (Show, Eq, Generic)
	instance NFData Point
	newtype P m a = P (ZeptoT (StateT Point m) a)
	deriving (Functor, Applicative, Monad, Alternative, MonadIO)

	instance MonadTrans P where
	lift m = P (lift (lift m))
	{-# INLINE lift #-}

	data Token =
	Token
	{ byteString :: {-# UNPACK #-}!ByteString
	, start :: {-# UNPACK #-}!Point
	, end :: {-# UNPACK #-}!Point
	} deriving (Show, Generic)
	instance NFData Token
	takeToken :: Monad m => (Word8 -> Bool) -> P m Token
	takeToken p = do
	start@(Point {line}) <- P (lift get)
	byteString <- P (Zepto.takeWhile p)
	let !newlines = S8.count '\n' byteString
	!lastLine =
	if newlines == 0
	then byteString
	else fromMaybe
	byteString
	(fmap
	(flip S.drop byteString . (+ 1))
	(S8.elemIndexEnd '\n' byteString))
	!indentation = S.length (S.takeWhile (== 32) lastLine)
	!column = S.length lastLine
	!end = start {line = line + newlines, column, indentation}
	P (lift (put end))
	pure (Token {start, end, byteString})
	{-# INLINE takeToken #-}

	dropWhile :: Monad m => (Word8 -> Bool) -> P m ()
	dropWhile p = void (P (Zepto.takeWhile p))
	{-# INLINE dropWhile #-}

	run :: Monad m => P m a -> ByteString -> m (Either String a)
	run (P m) i =
	evalStateT (parseT m i) (Point {line = 1, column = 1, indentation = 0})
	{-# INLINE run #-}

	simple :: P IO ()
	simple = do
	word *> spaces
	end <- P atEnd
	unless end simple

	simple_ :: Monad m => P m ()
	simple_ = do
	(_, end)<- couple
	unless end simple_

	simple_count :: P (State Int) ()
	simple_count = do
	(Token { start = Point {line, column, indentation}
	, end = Point { line = line1
	, column = column2
	, indentation = indentation2
	}
	}, end) <- couple
	lift
	(modify
	(+ (line + column + indentation + line1 + column2 +indentation2
	)))
	unless end simple_count


	couple :: Monad m => P m (Token, Bool)
	couple = do
	token<- word' <* spaces
	end <- P atEnd
	pure (token, end)
	{-# INLINE couple #-}

	word :: P IO ()
	word = do
	token <- (takeToken (not . isSpace8))
	liftIO (print token)

	word' :: Monad m => P m Token
	word' = do
	!w <- takeToken (not . isSpace8)
	pure w

	spaces :: Monad m => P m ()
	spaces = dropWhile isSpace8
	{-# INLINE spaces #-}

	isSpace8 :: (Eq a, Num a) => a -> Bool
	isSpace8 c = c==13 \|\| c==32 \|\| c==10

	main :: IO ()
	main = do
	fp:mode:_ <- getArgs
	case mode of
	"print" -> do
	S.readFile fp >>= void . run simple
	"silent" -> do
	void (S.readFile fp >>= evaluate . runIdentity . run simple_)
	"count" -> do
	void (S.readFile fp >>= print . flip execState 0 . run simple_count)
	_ -> pure ()

	-- With only evaluate

	-- 6,655,456 bytes allocated in the heap
	-- 11,184 bytes copied during GC
	-- 46,720 bytes maximum residency (1 sample(s))
	-- 31,104 bytes maximum slop
	-- 9 MB total memory in use (0 MB lost due to fragmentation)

	-- With simple_

	-- 6,655,592 bytes allocated in the heap
	-- 11,184 bytes copied during GC
	-- 46,720 bytes maximum residency (1 sample(s))
	-- 31,104 bytes maximum slop
	-- 9 MB total memory in use (0 MB lost due to fragmentation)

	-- Tot time (elapsed) Avg pause Max pause
	-- Gen 0 1 colls, 0 par 0.000s 0.000s 0.0000s 0.0000s
	-- Gen 1 1 colls, 0 par 0.000s 0.000s 0.0001s 0.0001s

	-- INIT time 0.000s ( 0.000s elapsed)
	-- MUT time 0.027s ( 0.030s elapsed)
	-- GC time 0.000s ( 0.000s elapsed)
	-- EXIT time 0.000s ( 0.000s elapsed)
	-- Total time 0.027s ( 0.031s elapsed)

	-- %GC time 0.3% (0.3% elapsed)

	-- Alloc rate 250,210,225 bytes per MUT second

	-- Productivity 99.4% of total user, 99.5% of total elapsed
	{-# LANGUAGE CPP #-}
	#if __GLASGOW_HASKELL__ >= 702
	{-# LANGUAGE Trustworthy #-} -- Data.ByteString.Unsafe
	#endif
	{-# LANGUAGE BangPatterns #-}

	-- \|
	-- Module : Data.Attoparsec.Zepto
	-- Copyright : Bryan O'Sullivan 2007-2015
	-- License : BSD3
	--
	-- Maintainer : [email protected]
	-- Stability : experimental
	-- Portability : unknown
	--
	-- A tiny, highly specialized combinator parser for 'B.ByteString'
	-- strings.
	--
	-- While the main attoparsec module generally performs well, this
	-- module is particularly fast for simple non-recursive loops that
	-- should not normally result in failed parses.
	--
	-- /Warning/: on more complex inputs involving recursion or failure,
	-- parsers based on this module may be as much as /ten times slower/
	-- than regular attoparsec! You should /only/ use this module when you
	-- have benchmarks that prove that its use speeds your code up.
	module Zepto
	(
	Parser
	, ZeptoT
	, parse
	, parseT
	, atEnd
	, string
	, take
	, takeWhile
	, gets
	) where

	import Control.Applicative
	import Control.Monad (MonadPlus(..), ap)
	import qualified Control.Monad.Fail as Fail
	import Control.Monad.IO.Class (MonadIO(..))
	import Control.Monad.Trans
	import Data.ByteString (ByteString)
	import qualified Data.ByteString as B
	import qualified Data.ByteString.Unsafe as B
	import Data.Functor.Identity (Identity(runIdentity))
	import Data.Monoid as Mon (Monoid(..))
	import Data.Semigroup (Semigroup(..))
	import Data.Word (Word8)
	import Prelude hiding (take, takeWhile)

	newtype S = S {
	input :: ByteString
	}

	data Result a = Fail String
	\| OK !a S

	-- \| A simple parser.
	--
	-- This monad is strict in its state, and the monadic bind operator
	-- ('>>=') evaluates each result to weak head normal form before
	-- passing it along.
	newtype ZeptoT m a = Parser {
	runParser :: S -> m (Result a)
	}

	type Parser a = ZeptoT Identity a

	instance MonadTrans ZeptoT where
	lift m = Parser (\s -> fmap (\a -> OK a s) m)
	{-# INLINE lift #-}

	instance Monad m => Functor (ZeptoT m) where
	fmap f m = Parser $ \s -> do
	result <- runParser m s
	case result of
	OK a s' -> return (OK (f a) s')
	Fail err -> return (Fail err)
	{-# INLINE fmap #-}

	instance MonadIO m => MonadIO (ZeptoT m) where
	liftIO act = Parser $ \s -> do
	result <- liftIO act
	return (OK result s)
	{-# INLINE liftIO #-}

	instance Monad m => Monad (ZeptoT m) where
	return = pure
	{-# INLINE return #-}

	m >>= k = Parser $ \(s) -> do
	result <- runParser m s
	case result of
	OK a s' -> runParser (k a) s'
	Fail err -> return (Fail err)
	{-# INLINE (>>=) #-}

	#if !(MIN_VERSION_base(4,13,0))
	fail = Fail.fail
	{-# INLINE fail #-}
	#endif

	instance Monad m => Fail.MonadFail (ZeptoT m) where
	fail msg = Parser $ \_ -> return (Fail msg)
	{-# INLINE fail #-}

	instance Monad m => MonadPlus (ZeptoT m) where
	mzero = fail "mzero"
	{-# INLINE mzero #-}

	mplus a b = Parser $ \s -> do
	result <- runParser a s
	case result of
	ok@(OK _ _) -> return ok
	_ -> runParser b s
	{-# INLINE mplus #-}

	instance (Monad m) => Applicative (ZeptoT m) where
	pure a = Parser $ \s -> return (OK a s)
	{-# INLINE pure #-}
	(<*>) = ap
	{-# INLINE (<*>) #-}

	gets :: Monad m => (S -> a) -> ZeptoT m a
	gets f = Parser $ \s -> return (OK (f s) s)
	{-# INLINE gets #-}

	put :: Monad m => S -> ZeptoT m ()
	put s = Parser $ \_ -> return (OK () s)
	{-# INLINE put #-}

	-- \| Run a parser.
	parse :: Parser a -> ByteString -> Either String a
	parse p bs = case runIdentity (runParser p (S bs)) of
	(OK a _) -> Right a
	(Fail err) -> Left err
	{-# INLINE parse #-}

	-- \| Run a parser on top of the given base monad.
	parseT :: Monad m => ZeptoT m a -> ByteString -> m (Either String a)
	parseT p bs = do
	result <- runParser p (S bs)
	case result of
	OK a _ -> return (Right a)
	Fail err -> return (Left err)
	{-# INLINE parseT #-}

	instance Monad m => Semigroup (ZeptoT m a) where
	(<>) = mplus
	{-# INLINE (<>) #-}

	instance Monad m => Mon.Monoid (ZeptoT m a) where
	mempty = fail "mempty"
	{-# INLINE mempty #-}
	mappend = (<>)
	{-# INLINE mappend #-}

	instance Monad m => Alternative (ZeptoT m) where
	empty = fail "empty"
	{-# INLINE empty #-}
	(<\|>) = mplus
	{-# INLINE (<\|>) #-}

	-- \| Consume input while the predicate returns 'True'.
	takeWhile :: Monad m => (Word8 -> Bool) -> ZeptoT m ByteString
	takeWhile p = do
	(h,t) <- gets (B.span p . input)
	put (S t)
	return h
	{-# INLINE takeWhile #-}

	-- \| Consume @n@ bytes of input.
	take :: Monad m => Int -> ZeptoT m ByteString
	take !n = do
	s <- gets input
	if B.length s >= n
	then put (S (B.unsafeDrop n s)) >> return (B.unsafeTake n s)
	else fail "insufficient input"
	{-# INLINE take #-}

	-- \| Match a string exactly.
	string :: Monad m => ByteString -> ZeptoT m ()
	string s = do
	i <- gets input
	if s `B.isPrefixOf` i
	then put (S (B.unsafeDrop (B.length s) i)) >> return ()
	else fail "string"
	{-# INLINE string #-}

	-- \| Indicate whether the end of the input has been reached.
	atEnd :: Monad m => ZeptoT m Bool
	atEnd = do
	i <- gets input
	return $! B.null i
	{-# INLINE atEnd #-}