Created
January 30, 2016 19:27
-
-
Save darkf/5e04b7ed74f40e2a497a to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
{-# LANGUAGE BangPatterns #-} | |
{-# LANGUAGE OverloadedStrings #-} | |
{-# LANGUAGE DeriveGeneric #-} | |
import Data.List | |
import Data.Ord (comparing) | |
import Control.Applicative ((<|>)) | |
import Criterion.Main | |
import GHC.Generics (Generic) | |
import Control.DeepSeq | |
import Data.Maybe (fromJust) | |
import Data.Word (Word8) | |
import qualified Data.ByteString as BS | |
import qualified Data.ByteString.Internal as BS (c2w, w2c) | |
data HTree a = Node !Int !(HTree a) !(HTree a) | Leaf !a !Int deriving (Eq, Show, Generic) | |
instance NFData a => NFData (HTree a) | |
--nubBS :: BS.ByteString -> BS.ByteString | |
--nubBS = BS.pack . nub . BS.unpack | |
frequencies :: BS.ByteString -> [(Int, Word8)] | |
-- frequencies !xs = sortBy (comparing snd) . map (\xs -> (BS.length xs, BS.head xs)) . BS.group . BS.sort $ xs | |
frequencies xs = map (\c -> (BS.count c xs, c)) $ nub $ BS.unpack xs | |
probability :: HTree a -> Int | |
probability !(Node p _ _) = p | |
probability !(Leaf _ p) = p | |
initialTree :: [(Int, a)] -> HTree a | |
initialTree !ps = f (tail ps) ((\(p, a) -> Leaf a p) $ head ps) | |
where | |
f [] node = node | |
f [(p, a)] node = Node (probability node + p) (Leaf a p) node | |
f ((p_a, c_a) : (p_b, c_b) : ps) node = | |
f ps $ Node (p_a + p_b) (Node (probability node + p_a) node (Leaf c_a p_a)) (Leaf c_b p_b) | |
encode :: Eq a => HTree a -> a -> Maybe [Bool] | |
encode !tree !x = | |
case tree of | |
Node _ l r -> fmap (False :) (encode l x) <|> fmap (True :) (encode r x) | |
Leaf c _ | c == x -> Just [] | |
Leaf _ _ -> Nothing | |
encodes :: HTree Word8 -> BS.ByteString -> Maybe [Bool] | |
encodes !tree = mconcat . map (encode tree) . BS.unpack | |
encodes' :: HTree Word8 -> BS.ByteString -> [Bool] | |
encodes' !tree !xs = foldr (\x ys -> fromJust (encode tree x) ++ ys) [] $ BS.unpack xs | |
encoding :: HTree Word8 -> [[Bool]] | |
encoding !tree = map (fromJust . encode tree) [0..255] | |
encodes'' :: [[Bool]] -> BS.ByteString -> [Bool] | |
encodes'' !enc = concatMap ((enc !!) . fromIntegral) . BS.unpack | |
main = do | |
!xs <- BS.readFile "pg2600.txt" | |
let !tree_xs = xs `deepseq` initialTree (frequencies xs) | |
--let e = encodes tree_xs xs | |
--let !e' = e `deepseq` e | |
--print e' | |
--print $ length $ encodes' tree_xs xs | |
let !enc = encoding tree_xs | |
defaultMain [ bgroup "pg2600" [ bench "1" $ nf (encodes' tree_xs) xs | |
, bench "2" $ nf (encodes'' enc) xs | |
] ] |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment