Created
March 3, 2011 10:28
-
-
Save gregorycollins/852598 to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
{-# LANGUAGE BangPatterns #-} | |
----------------------------------------------------------------------------- | |
-- | | |
-- Copyright: 2010 John Millikin | |
-- License: MIT | |
-- | |
-- Maintainer: [email protected] | |
-- Portability: portable | |
-- | |
----------------------------------------------------------------------------- | |
module Main (main) where | |
import qualified Control.Exception as Exc | |
import Data.Enumerator as E | |
import qualified Data.List as L | |
import qualified Data.Enumerator.Binary as EB | |
import qualified Data.Enumerator.Text as ET | |
import qualified Data.ByteString as B | |
import qualified Data.ByteString.Char8 as B8 | |
import qualified Data.Text as T | |
-- support imports | |
import Control.Exception as E | |
import Data.List | |
import Control.Monad (unless, forM_) | |
import Control.Monad.Trans | |
import System.IO | |
import System.Console.GetOpt | |
import System.Environment | |
import System.Exit | |
-- support wc modes -c (bytes), -m (characters), and -l (lines) | |
-- iterBytes simply counts how many bytes are in each chunk, accumulates this | |
-- count, and returns it when EOF is received | |
iterBytes :: Monad m => Iteratee B.ByteString m Int | |
iterBytes = continue (step 0) where | |
step acc EOF = yield acc EOF | |
step acc (Chunks xs) = continue $ step $! Data.List.foldl' foldStep acc xs | |
foldStep acc bytes = acc + (B.length bytes) | |
-- iterLines is similar, except it only counts newlines ('\n') | |
-- | |
-- Because it's basically the same as 'iterBytes', we use it to demonstrate | |
-- the 'liftFoldL\'' helper function. | |
iterLines :: Monad m => Iteratee B.ByteString m Int | |
iterLines = E.continue (k 0) | |
where | |
k !acc EOF = return acc | |
k !acc (Chunks xs) = let !nls = L.foldl' f 0 xs + acc in | |
E.continue (k nls) | |
f !acc !bs = let !x = acc + B8.count '\n' bs in x | |
-- iterChars is a bit more complicated. It has to decode the input (for now, | |
-- assuming UTF-8) before performing any counting. Leftover bytes, not part | |
-- of a valid UTF-8 character, are yielded as surplus | |
-- | |
-- Note the use of joinI. 'ET.decode' is an enumeratee, which means it returns | |
-- an iteratee yielding an inner step. 'joinI' "collapses" an enumeratee's | |
-- return value, much as 'join' does to monadic values. | |
iterChars :: Monad m => Iteratee B.ByteString m Int | |
iterChars = joinI (ET.decode ET.utf8 $$ count) where | |
count = E.foldl' (\acc t -> acc + (T.length t)) 0 | |
enumFile2 :: FilePath -> Enumerator B.ByteString IO b | |
enumFile2 path = enum | |
where | |
tryStep get io = do | |
tried <- liftIO (Exc.try get) | |
case tried of | |
Right t -> io t | |
Left err -> throwError (err :: Exc.SomeException) | |
withHandle = tryStep (openBinaryFile path ReadMode) | |
enum step = withHandle $ \h -> do | |
Iteratee $ Exc.finally | |
(runIteratee (EB.enumHandle (2^16) h step)) | |
(hClose h) | |
main :: IO () | |
main = do | |
(mode, files) <- getMode | |
-- Exactly matching wc's output is too annoying, so this example | |
-- will just print one line per file, and support counting at most | |
-- one statistic per run | |
let iter = case mode of | |
OptionBytes -> iterBytes | |
OptionLines -> iterLines | |
OptionChars -> iterChars | |
forM_ files $ \filename -> do | |
putStr $ filename ++ ": " | |
-- see cat.hs for commented implementation of 'Data.Enumerator.IO.enumFile' | |
eitherStat <- run (enumFile2 filename $$ iter) | |
putStrLn $ case eitherStat of | |
Left err -> "ERROR: " ++ show err | |
Right stat -> show stat | |
-- uninteresting option parsing follows | |
data Option | |
= OptionBytes | |
| OptionChars | |
| OptionLines | |
optionInfo :: [OptDescr Option] | |
optionInfo = | |
[ Option ['c'] ["bytes"] (NoArg OptionBytes) "count bytes" | |
, Option ['m'] ["chars"] (NoArg OptionChars) "count characters" | |
, Option ['l'] ["lines"] (NoArg OptionLines) "count lines" | |
] | |
usage :: String -> String | |
usage name = "Usage: " ++ name ++ " <MODE> [FILES]" | |
getMode :: IO (Option, [FilePath]) | |
getMode = do | |
args <- getArgs | |
let (options, files, errors) = getOpt Permute optionInfo args | |
unless (null errors && not (null options) && not (null files)) $ do | |
name <- getProgName | |
hPutStrLn stderr $ concat errors | |
hPutStrLn stderr $ usageInfo (usage name) optionInfo | |
exitFailure | |
return (Prelude.head options, files) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment