Skip to content

Instantly share code, notes, and snippets.

@m-renaud
Last active August 29, 2015 14:10
Show Gist options
  • Save m-renaud/d518234854c27e4ec837 to your computer and use it in GitHub Desktop.
Save m-renaud/d518234854c27e4ec837 to your computer and use it in GitHub Desktop.
Fast Int Parsing
{-# LANGUAGE BangPatterns #-}
module FastParse (parseInts) where
import Prelude hiding (length)
import Control.Applicative hiding (empty)
import Control.Monad
import Data.Char
import Data.Maybe
import Data.Sequence
import Data.Attoparsec.ByteString.Char8
import qualified Data.ByteString.Unsafe as BU
import qualified Data.ByteString.Char8 as S -- 'Strict'
import qualified Data.ByteString.Lazy.Char8 as L -- 'Lazy'
parseInts :: L.ByteString -> Seq Int
parseInts ss = buildSeq empty (L.toChunks ss)
buildSeq :: Seq Int -> [S.ByteString] -> Seq Int
buildSeq ints [] = ints
buildSeq ints (s:t:ts)
| S.last s /= '\n' = buildSeq (processChunk ints s') ts'
where
(s',r) = S.breakEnd (=='\n') s
(r',rs) = S.break (=='\n') t
ts' = S.concat [r,r',S.singleton '\n'] : BU.unsafeTail rs : ts
buildSeq ints (s:ss) = buildSeq (processChunk ints s) ss
processChunk :: Seq Int -> S.ByteString -> Seq Int
processChunk l s = fst $ S.foldl' f (l, 0) s
where f(!ll, !n) '\n' = (ll |> n, 0)
f(!ll, !n) w = (ll, 10*n+ord w-ord '0')
-- | Version of many' written for Seq.
seqMany' :: (MonadPlus m) => m a -> m (Seq a)
seqMany' p = many_p
where many_p = some_p `mplus` return mzero
some_p = liftM2 (<|) p many_p
-- | This version uses Attoparsec for each chunk.
processChunk' :: Seq Int -> S.ByteString -> Seq Int
processChunk' l s = case parseOnly (many' (decimal <* char '\n')) s of
Left err -> fail err
Right ns -> l >< (fromList ns)
{-# LANGUAGE BangPatterns #-}
import qualified Data.ByteString.Char8 as S
import qualified Data.Vector as U
main = S.getContents >>= print . U.length . parse
-- Fill a new vector from a file containing a list of numbers.
parse = U.unfoldr step
where
step !s = case S.readInt s of
Nothing -> Nothing
Just (!k, !t) -> Just (k, S.tail t)
module Main where
import Prelude hiding (foldl)
import FastParse
import qualified Data.ByteString.Lazy.Char8 as L -- 'Lazy'
import qualified Data.IntSet as IS
import Data.Foldable
import Data.Sequence
main = do
n <- readLn :: IO Int
ss <- L.getContents
let ints = parseInts ss
intSet = foldl' (flip IS.insert) IS.empty ints
print $ IS.findMin intSet
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment