Last active
August 29, 2015 14:10
-
-
Save m-renaud/d518234854c27e4ec837 to your computer and use it in GitHub Desktop.
Fast Int Parsing
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
{-# LANGUAGE BangPatterns #-} | |
module FastParse (parseInts) where | |
import Prelude hiding (length) | |
import Control.Applicative hiding (empty) | |
import Control.Monad | |
import Data.Char | |
import Data.Maybe | |
import Data.Sequence | |
import Data.Attoparsec.ByteString.Char8 | |
import qualified Data.ByteString.Unsafe as BU | |
import qualified Data.ByteString.Char8 as S -- 'Strict' | |
import qualified Data.ByteString.Lazy.Char8 as L -- 'Lazy' | |
parseInts :: L.ByteString -> Seq Int | |
parseInts ss = buildSeq empty (L.toChunks ss) | |
buildSeq :: Seq Int -> [S.ByteString] -> Seq Int | |
buildSeq ints [] = ints | |
buildSeq ints (s:t:ts) | |
| S.last s /= '\n' = buildSeq (processChunk ints s') ts' | |
where | |
(s',r) = S.breakEnd (=='\n') s | |
(r',rs) = S.break (=='\n') t | |
ts' = S.concat [r,r',S.singleton '\n'] : BU.unsafeTail rs : ts | |
buildSeq ints (s:ss) = buildSeq (processChunk ints s) ss | |
processChunk :: Seq Int -> S.ByteString -> Seq Int | |
processChunk l s = fst $ S.foldl' f (l, 0) s | |
where f(!ll, !n) '\n' = (ll |> n, 0) | |
f(!ll, !n) w = (ll, 10*n+ord w-ord '0') | |
-- | Version of many' written for Seq. | |
seqMany' :: (MonadPlus m) => m a -> m (Seq a) | |
seqMany' p = many_p | |
where many_p = some_p `mplus` return mzero | |
some_p = liftM2 (<|) p many_p | |
-- | This version uses Attoparsec for each chunk. | |
processChunk' :: Seq Int -> S.ByteString -> Seq Int | |
processChunk' l s = case parseOnly (many' (decimal <* char '\n')) s of | |
Left err -> fail err | |
Right ns -> l >< (fromList ns) |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
{-# LANGUAGE BangPatterns #-} | |
import qualified Data.ByteString.Char8 as S | |
import qualified Data.Vector as U | |
main = S.getContents >>= print . U.length . parse | |
-- Fill a new vector from a file containing a list of numbers. | |
parse = U.unfoldr step | |
where | |
step !s = case S.readInt s of | |
Nothing -> Nothing | |
Just (!k, !t) -> Just (k, S.tail t) |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
module Main where | |
import Prelude hiding (foldl) | |
import FastParse | |
import qualified Data.ByteString.Lazy.Char8 as L -- 'Lazy' | |
import qualified Data.IntSet as IS | |
import Data.Foldable | |
import Data.Sequence | |
main = do | |
n <- readLn :: IO Int | |
ss <- L.getContents | |
let ints = parseInts ss | |
intSet = foldl' (flip IS.insert) IS.empty ints | |
print $ IS.findMin intSet |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment