Skip to content

Instantly share code, notes, and snippets.

@tmountain
Created November 27, 2017 15:04
Show Gist options
  • Save tmountain/935ca8392755e6d548bf450fbbc5e391 to your computer and use it in GitHub Desktop.
Save tmountain/935ca8392755e6d548bf450fbbc5e391 to your computer and use it in GitHub Desktop.
module Main where
import Data.Char (isSpace)
import qualified Data.List as D
import qualified Data.List.Split as S
import qualified Data.Map.Strict as M
import Data.Maybe (fromMaybe)
import qualified Text.Read as TR
data Score = Score
{ domainScore :: Int
, pageScore :: Int
, tokenScore :: Int
} deriving (Show, Eq)
addScore :: Score -> Score -> Score
addScore newScore oldScore =
Score
{ tokenScore = tokenScore newScore + tokenScore oldScore
, pageScore = pageScore newScore + pageScore oldScore
, domainScore = domainScore newScore + domainScore oldScore
}
-- utils for removing unwanted chars from input data
delete :: Eq a => a -> [a] -> [a]
delete deleted xs = [x | x <- xs, x /= deleted]
delQuote :: String -> String
delQuote = delete '"'
-- functions for sanitizing and cleaning data
trim :: String -> String
trim = f . f
where
f = reverse . dropWhile isSpace
sanitize :: String -> String
sanitize = delQuote . trim
incTid :: String -> Int -> Score
incTid tid score =
case tid of
"tokens" -> Score {domainScore = 0, pageScore = 0, tokenScore = score}
"pages" -> Score {domainScore = 0, pageScore = score, tokenScore = 0}
"score" -> Score {domainScore = score, pageScore = 0, tokenScore = 0}
_ -> Score {domainScore = 0, pageScore = 0, tokenScore = 0}
foldScore :: String -> M.Map String Score -> M.Map String Score
foldScore l m =
let [token, scoreStr] = S.splitOn "\t" l
score = fromMaybe 0 $ TR.readMaybe scoreStr
[domain, tid] = S.splitOn ":" token
in M.insertWith addScore domain (incTid tid score) m
toString :: (String, Score) -> String
toString (k, v) =
k ++
"\t" ++
(show (domainScore v)) ++
"\t" ++ (show (pageScore v)) ++ "\t" ++ (show (tokenScore v))
validLine :: String -> Bool
validLine l =
(D.isInfixOf ":tokens" l || D.isInfixOf ":pages" l || D.isInfixOf ":score" l) &&
(length . filter (== ':') $ l) == 1
tokens :: String -> String
tokens input =
let allLines = lines input
freqs =
foldr (foldScore . sanitize) (M.fromList []) $ filter validLine allLines
result = map toString $ M.assocs freqs
in unlines result
main :: IO ()
main = interact tokens
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment