Last active
December 24, 2019 15:36
-
-
Save michaelt/6c6843e6dd8030e95d58 to your computer and use it in GitHub Desktop.
simple shell-like programs using Data.ByteString.Streaming, following the io-streams tutorial
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
-- These examples are based on the tutorial module in the io-streams package | |
{-#LANGUAGE OverloadedStrings #-} | |
import Streaming | |
import Streaming.Prelude (yield, next, each, for, with, subst) | |
import qualified Streaming.Prelude as S | |
import qualified Data.ByteString.Char8 as B | |
import Data.ByteString.Streaming (ByteString) | |
import qualified Data.ByteString.Streaming.Char8 as Q | |
import System.IO (withFile, IOMode(..)) | |
import Control.Monad | |
import Control.Applicative | |
import Data.Monoid | |
cat :: FilePath -> IO () | |
cat file = withFile file ReadMode $ \h -> Q.stdout (Q.fromHandle h) | |
echo :: IO () | |
echo = Q.stdout Q.stdin | |
-- Send the first n lines to stdout. | |
-- Note that this streams even if it hits a 10 terabyte "line" | |
head :: Int -> FilePath -> IO () | |
head n file = withFile file ReadMode $ \h -> | |
Q.stdout -- IO () -- stream to IO.stdout | |
$ Q.unlines -- ByteString m () -- insert '\n' between bytestream layers | |
$ takes n -- Stream (ByteString m) m () -- nb. "takes" is 'functor general' | |
$ Q.lines -- Stream (ByteString m) m () -- divided into Stream layers | |
$ Q.fromHandle h -- ByteString m () -- raw bytes | |
yes :: IO () | |
yes = Q.stdout $ Q.cycle "y\n" -- uses OverloadedStrings instance for 'ByteString m ()' | |
grep :: B.ByteString -> FilePath -> IO () | |
grep pat file = withFile file ReadMode $ \h -> do | |
let raw :: ByteString IO () -- get raw bytes | |
raw = Q.fromHandle h | |
segmented :: Stream (ByteString IO) IO () -- divide on newlines | |
segmented = Q.lines raw | |
individualized :: Stream (Of B.ByteString) IO () | |
individualized = mapped Q.toStrict segmented -- danger: concatenate 'real' bytestrings! | |
matching :: Stream (Of B.ByteString) IO () -- filter out matching bytestrings | |
matching = S.filter (B.isInfixOf pat) individualized | |
deindividualized :: Stream (ByteString IO) IO () -- restream (implicitly using | |
deindividualized = with matching Q.chunk -- the new chunk structure) | |
unsegmented :: ByteString IO () -- add newlines | |
unsegmented = Q.unlines deindividualized | |
Q.stdout unsegmented -- stream to IO.stdout | |
-- or, more compactly: | |
grep' :: B.ByteString -> FilePath -> IO () | |
grep' pat file = withFile file ReadMode $ \h -> do | |
Q.stdout | |
$ Q.unlines | |
$ subst Q.chunk | |
$ S.filter (B.isInfixOf pat) | |
$ mapped Q.toStrict | |
$ Q.lines | |
$ Q.fromHandle h | |
-- >>> grep "grep" "examples.hs" | |
-- grep :: B.ByteString -> FilePath -> IO () | |
-- grep pat file = withFile file ReadMode $ \h -> do | |
-- grep' pat file = withFile file ReadMode $ \h -> do | |
data Option = Bytes | Words | Lines | |
len = S.fold (\n _ -> n + 1) 0 id | |
wc :: Option -> FilePath -> IO () | |
wc opt file = withFile file ReadMode $ \h -> | |
do n <- count (Q.fromHandle h) | |
print n | |
where | |
count is = case opt of | |
Bytes -> Q.length is | |
Words -> S.sum $ mapped blank_layer $ Q.words is | |
Lines -> S.sum $ mapped blank_layer $ Q.lines is | |
blank_layer :: Monad m => ByteString m r -> m (Of Int r) | |
blank_layer = liftM (1 :>) . Q.effects | |
-- replace each layer with (1 :> ...); here we do not accumlate strict-bs words | |
-- >>> wc Words "examples.hs" | |
-- 801 :> () | |
-- exercise: write `wc` to permit a list of options, using `foldl` to combine | |
-- the different folds. This would require a more direct implementation | |
-- of what might be called `line_count :: Fold Char Int` and `word_count :: Fold Char Int` | |
paste file file' = withFile file ReadMode $ \h -> | |
withFile file' ReadMode $ \h' -> | |
Q.stdout | |
$ Q.unlines | |
$ let left = Q.lines (Q.fromHandle h) | |
right = Q.lines (Q.fromHandle h' ) | |
center = repeats $ Q.chunk "\t" | |
in left <|> center <|> right | |
-- >>> paste "nums.txt" "numbers.txt" | |
-- 1.1 | |
-- 1 2.2 | |
-- 2 | |
-- 3 3.3 | |
-- 4 | |
-- 5 4.4 | |
-- 6 5.5 -- this program is ziplike and breaks where either does. | |
-- number the lines of a file | |
nl :: FilePath -> IO () | |
nl file = withFile file ReadMode $ \h -> | |
Q.stdout -- IO () / \ | |
$ Q.unlines -- ByteString IO () | | |
$ (\bss -> with bss Q.chunk) -- Stream (ByteString IO) IO () | | |
$ S.zipWith nlpad (each [1..]) -- Stream (Of B.ByteString) IO () | | |
$ mapped Q.toStrict -- Stream (Of B.ByteString) IO () | | |
$ Q.lines -- Stream (ByteString IO) IO () | | |
$ Q.fromHandle h -- ByteString IO () | | |
where | |
nlpad :: Int -> B.ByteString -> B.ByteString | |
nlpad n bs = padding <> B.pack (show n <> " ") <> bs | |
where | |
len = length (show n); diff = 9 - len | |
padding = if diff > 0 then B.replicate diff ' ' else B.singleton ' ' | |
nl_streaming :: FilePath -> IO () | |
nl_streaming file = withFile file ReadMode $ \h -> | |
Q.stdout | |
$ Q.unlines | |
$ interleaves numerals -- interleaves might instead be called 'fuseLayers' or something | |
$ Q.lines -- note proper streaming | |
$ Q.fromHandle h | |
where | |
numerals :: Monad m => Stream (ByteString m) m () | |
numerals = maps trans (each [1..]) where | |
trans (n:>r) = Q.chunk stuff >> return r | |
where | |
len = length (show n); diff = 9 - len | |
padding = if diff > 0 then B.replicate diff ' ' else B.singleton ' ' | |
stuff = padding <> B.pack (show n ++ " ") |
How would you modify nl
to only number non-blank lines?
Shouldn't takes n
in line 27 be Streaming.Prelude.take n
?
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
Ha, I just noticed this. I just called it Q because I ran out of letters. T=Text, S = Streaming; P = Pipes; L = Control.Foldl; B=ByteString ... so I hit on R=Data.ByteString.Streaming and Q=Data.ByteString.Streaming.Char8. I guess it is a little strange, but I became accustomed to using L for Foldl, following the documentation, and it is very sensible once you get a little accustomed to it.