Last active
April 12, 2016 23:47
-
-
Save snoyberg/a422d558e9142e37d2aa to your computer and use it in GitHub Desktop.
Conduit solution to: http://www.reddit.com/r/haskell/comments/2ejzst/streaming_tabseparated_logfile_analysis_with/
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
module-template: ! 'module MODULE_NAME where | |
' | |
extensions: {} | |
environment: default | |
cabal-file: project.cabal | |
version: 1 | |
ghc-args: [] | |
excluded-modules: [] |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
server user timestamp event | |
foo 1 2014-10-10 {"tag": "Login", "contents": {"foo": "bar"}} | |
foo 1 2014-10-10 {"tag": "Login", "contents": {"foo": "bar"}} | |
foo 1 2014-10-10 {"tag": "Login", "contents": {"foo": "bar"}} | |
foo 1 2014-10-10 {"tag": "Login", "contents": {"foo": "bar"}} | |
foo 1 2014-10-10 {"tag": "Login", "contents": {"foo": "bar"}} | |
foo 1 2014-10-10 {"tag": "Login", "contents": {"foo": "bar"}} | |
foo 1 2014-10-10 {"tag": "Login", "contents": {"foo": "bar"}} | |
foo 1 2014-10-10 {"tag": "Login", "contents": {"foo": "bar"}} | |
bar 1 2014-10-10 {"tag": "Login", "contents": {"foo": "bar"}} | |
bar 1 2014-10-10 {"tag": "Login", "contents": {"foo": "bar"}} | |
bar 1 2014-10-10 {"tag": "Login", "contents": {"foo": "bar"}} | |
bar 1 2014-10-10 {"tag": "Login", "contents": {"foo": "bar"}} | |
baz 1 2014-10-10 {"tag": "Login", "contents": {"foo": "bar"}} | |
baz 1 2014-10-10 {"tag": "Something else", "contents": {"foo": "bar"}} | |
baz 1 2014-10-10 {"tag": "Login", "contents": {"foo": "bar"}} | |
baz 1 2014-10-10 {"tag": "Something else", "contents": {"foo": "bar"}} | |
baz 1 2014-10-10 {"tag": "Login", "contents": {"foo": "bar"}} | |
baz 1 2014-10-10 {"tag": "Something else", "contents": {"foo": "bar"}} | |
baz 1 2014-10-10 {"tag": "Login", "contents": {"foo": "bar"}} | |
baz 1 2014-10-10 {"tag": "Something else", "contents": {"foo": "bar"}} |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
{-# LANGUAGE NoImplicitPrelude, OverloadedStrings #-} | |
import ClassyPrelude.Conduit | |
import Data.Conduit.Attoparsec (sinkParser) | |
import Data.Aeson.Parser (json) | |
import Data.Aeson.Types (parseEither) | |
import Data.Aeson (withObject, (.:)) | |
main :: IO () | |
main = do | |
runResourceT | |
$ sourceFile "input.txt" | |
$$ (dropLine >> convertLines) | |
=$ countResults | |
-- Use lineAsciiC to stream in all the contents of the line. | |
-- Then we just ignore those contents. | |
dropLine = lineAsciiC $ return () | |
convertLines = do | |
-- peek to see if there are more contents. If so, convert the | |
-- next line and then loop | |
peekC >>= maybe (return ()) (const $ convertLine >> convertLines) | |
-- Stream the contents of a single line | |
convertLine = lineAsciiC $ do | |
-- Get the serverName: everything up until the first tab. | |
-- takeCE 80 is to prevent a memory attack | |
-- The CE nomenclature means "conduit elementwise", since | |
-- we're dealing with chunked data (ByteStrings) | |
serverName <- takeWhileCE (/= 9) =$= takeCE 80 =$= foldC | |
dropField -- user | |
dropField -- timestamp | |
dropCE 1 -- drop the tab after timestamp | |
val <- sinkParser json -- stream into aeson's parser | |
tag <- case parseEither getTag val of | |
Left e -> error e -- could do nicer error handling if we wanted | |
Right tag -> return tag | |
when ((tag :: Text) == "Login") $ yield $ decodeUtf8 serverName | |
dropField = do | |
dropCE 1 | |
dropWhileCE (/= 9) | |
getTag = withObject "" (.: "tag") | |
countResults = do | |
mname <- peekC | |
case mname of | |
Nothing -> return () | |
Just name -> do | |
cnt <- takeWhileC (== name) =$= lengthC | |
liftIO $ putStrLn $ name ++ ": " ++ tshow cnt | |
countResults |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment