-
-
Save michaelt/88b89b7fcab5bd8a47a7 to your computer and use it in GitHub Desktop.
unauthorized pipes solution to http://www.reddit.com/r/haskell/comments/2ejzst/streaming_tabseparated_logfile_analysis_with/ following Michael S's model
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
server user timestamp event | |
foo 1 2014-10-10 {"tag": "Login", "contents": {"foo": "bar"}} | |
foo 1 2014-10-10 {"tag": "Login", "contents": {"foo": "bar"}} | |
bar 1 2014-10-10 {"tag": "Login", "contents": {"foo": "bar"}} | |
baz 1 2014-10-10 {"tag": "Login", "contents": {"foo": "bar"}} | |
baz 1 2014-10-10 {"tag": "Login", "contents": {"foo": "bar"}} | |
foo 1 2014-10-10 {"tag": "Login", "contents": {"foo": "bar"}} | |
baz 1 2014-10-10 {"tag": "Something else", "contents": {"foo": "bar"}} | |
foo 1 2014-10-10 {"tag": "Login", "contents": {"foo": "bar"}} | |
bar 1 2014-10-10 {"tag": "Login", "contents": {"foo": "bar"}} | |
bar 1 2014-10-10 {"tag": "Login", "contents": {"foo": "bar"}} | |
bar 1 2014-10-10 {"tag": "Login", "contents": {"foo": "bar"}} | |
baz 1 2014-10-10 {"tag": "Login", "contents": {"foo": "bar"}} | |
foo 1 2014-10-10 {"tag": "Login", "contents": {"foo": "bar"}} | |
foo 1 2014-10-10 {"tag": "Something else", "contents": {"foo": "bar"}} | |
foo 1 2014-10-10 {"tag": "Login", "contents": {"foo": "bar"}} | |
foo 1 2014-10-10 {"tag": "Login", "contents": {"foo": "bar"}} | |
foo 1 2014-10-10 {"tag": "Login", "contents": {"foo": "bar"}} |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
{-# LANGUAGE OverloadedStrings #-} | |
import Pipes | |
import Pipes.Group | |
import qualified Pipes.Prelude as P | |
import qualified Pipes.ByteString as PB | |
import qualified Pipes.Attoparsec as PA | |
import Data.Aeson | |
import Data.Aeson.Parser | |
import Data.Aeson.Types | |
import Data.Monoid | |
import Control.Monad.Trans.State.Strict | |
import Data.Text (Text) | |
import Data.Function (on) | |
import Lens.Family -- from `lens-family`or Control.Lens from `lens` | |
import qualified Pipes.Internal as I -- for the missing fold' | |
main = runEffect $ toCount PB.stdin >-> P.print where | |
toCount = folds (\(_,n) (server,o) -> (server,n+o)) ("",0) id | |
. view (groupsBy (on (==) fst)) | |
. concats | |
. maps parseLogin | |
. drops 1 | |
. view PB.lines | |
parseLogin p = do | |
let divided = p ^. PB.break (== 9) . PB.splitAt 80 . to concat_bytes | |
(server, rest) <- lift divided | |
good <- rest >-> P.drain -- drop any excess that comes before tab | |
(me,p) <- lift $ runStateT (PA.parse json) $ skip_fields good | |
case me of Just (Right a) -> | |
case parseEither (withObject "" (.: "tag")) a of | |
Right tag | tag == login -> yield (server, 1::Int) | |
_ -> yield (server, 0) | |
_ -> return () | |
p >-> P.drain | |
where login = "Login" :: Text | |
concat_bytes = fold' (<>) mempty id | |
skip_field = PB.drop 1 . PB.dropWhile (/= 9) | |
skip_fields = skip_field . skip_field . PB.drop 1 | |
fold' :: Monad m | |
=> (x -> a -> x) -> x -> (x -> b) | |
-> Producer a m r -> m (b,r) | |
fold' step begin done p0 = loop p0 begin | |
where | |
loop p x = case p of | |
I.Request v _ -> I.closed v | |
I.Respond a fu -> loop (fu ()) $! step x a | |
I.M m -> m >>= \p' -> loop p' x | |
I.Pure r -> return (done x, r) |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
$ cat input.txt | ./tsvlines2 | |
("foo",2) | |
("bar",1) | |
("baz",2) | |
("foo",1) | |
("baz",0) | |
("foo",1) | |
("bar",3) | |
("baz",1) | |
("foo",4) | |
$ cat input.txt | python tsvlines.py | |
server numLogins | |
foo 2 | |
bar 1 | |
baz 2 | |
foo 1 | |
baz 0 | |
foo 1 | |
bar 3 | |
baz 1 | |
foo 4 |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment