Created
April 13, 2010 12:26
-
-
Save kowey/364550 to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| go :: LlrType -> FilePath -> IO () | |
| go unitType d = | |
| do enFiles <- filter (\f -> takeExtension f == ".en") `fmap` getDirectoryContents d | |
| let bnames = map takeBaseName enFiles | |
| hPutStrLn stderr "Reading input files... (step 1)" | |
| pairs <- case unitType of | |
| SentenceLevel -> concat `fmap` mapM (readSentencePairs d) bnames | |
| DocumentLevel -> mapM (readDocPair d) bnames | |
| rnf pairs `seq` hPutStrLn stderr "Computing frequencies... (step 2)" | |
| let freqs = frequencies pairs | |
| rnf freqs `seq` hPutStrLn stderr "Generating list of key pairs... (step 3)" | |
| let keyPairs = allPairs freqs | |
| rnf keyPairs `seq` hPutStrLn stderr "Computing LLR scores... (step 4 [last]). This could take a while!" | |
| let loglinScores = [ (k, logLinear freqs k) | k <- keyPairs ] | |
| putStr . unlines . map showEntry $ -- loglinScores -- sequential | |
| (loglinScores `using` parListChunk 250 rdeepseq) -- parallel |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment