Created
November 7, 2014 23:32
-
-
Save orchid-hybrid/1f6db4bdb2683a820b62 to your computer and use it in GitHub Desktop.
Compressor.hs
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import Data.List | |
import Data.Ord | |
import Data.Bits | |
import Data.Char | |
data DInst | |
= Emit Char | |
| Backref Int Int | |
deriving (Eq, Show) | |
encode :: DInst -> [Bool] | |
encode (Emit c) = True : toBinary 8 (ord c) | |
encode (Backref i l) = [False] ++ concatMap escape (toBinary' i) ++ [False, True] | |
++ concatMap escape (toBinary' l) ++ [False, True] | |
where escape True = [True] | |
escape False = [False, False] | |
toBinary len n = map ((>0) . (n .&.)) . takeWhile (<= n) . map (2^) $ [0..len-1] | |
toBinary' n = map ((>0) . (n .&.)) . takeWhile (<= n) . map (2^) $ [0..] | |
deflate :: [DInst] -> String | |
deflate instructions = result where | |
result = go 0 instructions result | |
go _ [] _ = "" | |
go i (Emit c:rest) r = c : go (i+1) rest r | |
go i (Backref idx len:rest) r | i >= idx + len = ref ++ go (i + len) rest r | |
where ref = take len . drop idx $ r | |
go _ _ _ = error "invalid backref" | |
{- | |
-- Run length encoding for free from int-e | |
deflate :: [DInst] -> String | |
deflate instructions = result where | |
result = go 0 instructions result | |
go _ [] _ = "" | |
go i (Emit c:rest) r = c : go (i+1) rest r | |
go i (Backref idx len:rest) r | idx > 0 && idx <= i = ref ++ go (i + len) rest r | |
where ref = take len . drop (i-idx) $ r | |
go _ _ _ = error "invalid backref" | |
test1 = deflate [Emit 'f', Emit 'o', Emit 'o', Backref 3 3] | |
test2 = deflate [Emit 'f', Emit 'o', Backref 1 4] | |
test3 = deflate [Emit 'f', Emit 'o', Emit 'o', Backref 0 1, Emit 'x', Emit 'x', Emit 'x'] | |
--} | |
test1 = deflate [Emit 'f', Emit 'o', Emit 'o', Backref 0 3] | |
test2 = deflate [Emit 'f', Emit 'o', Backref 1 6] | |
compress [] l = [] | |
compress (x:xs) l = case findBackref x xs l of | |
Nothing -> Emit x : compress xs (updateBackrefs l x) | |
Just ((idx, len), (l', xs')) -> Backref idx len : compress xs' l' | |
findBackref x xs l = evaluateBackrefs | |
. sortBy (flip (comparing snd)) | |
. map (\(a,b) -> (a,startingWith (x:xs) b)) | |
. filter ((== x) . head . snd) | |
. filter (not . null . snd) | |
. zip [0..] | |
. tails $ l | |
where startingWith [] _ = 0 | |
startingWith (x:xs) (y:ys) | x == y = 1 + startingWith xs ys | |
startingWith _ _ = 0 | |
evaluateBackrefs [] = Nothing | |
evaluateBackrefs ((i,len):_) | len >= 3 = Just ((i,len), (l ++ take len (x:xs), drop len (x:xs))) | |
evaluateBackrefs _ = Nothing | |
updateBackrefs l x = l ++ [x] | |
{- | |
*Main Data.List> let s = "foofoobaaaaaaaaaaaaaaaaaaaaaaaaaarbazzerbazzerbazzerbazzerbazzermoooaaaaaaaa" | |
*Main Data.List> s | |
"foofoobaaaaaaaaaaaaaaaaaaaaaaaaaarbazzerbazzerbazzerbazzerbazzermoooaaaaaaaa" | |
*Main Data.List> deflate . compress s $ "" | |
"foofoobaaaaaaaaaaaaaaaaaaaaaaaaaarbazzerbazzerbazzerbazzerbazzermoooaaaaaaaa" | |
*Main Data.List> (deflate . compress s $ "") == s | |
True | |
*Main Data.List> compress s $ "" | |
[Emit 'f',Emit 'o',Emit 'o',Backref 0 3,Emit 'b',Emit 'a',Emit 'a',Emit 'a',Backref 7 3,Backref 7 6,Backref 7 12,Emit 'a',Emit 'a',Emit 'r',Emit 'b',Emit 'a',Emit 'z',Emit 'z',Emit 'e',Backref 33 6,Backref 33 12,Backref 33 7,Emit 'm',Emit 'o',Emit 'o',Emit 'o',Backref 7 8] | |
-} | |
{-- | |
*Main Data.List> length . concatMap encode . compress s $ "" | |
268 | |
*Main Data.List> (8*) . length $ s | |
608 | |
*Main Data.List> s | |
"foofoobaaaaaaaaaaaaaaaaaaaaaaaaaarbazzerbazzerbazzerbazzerbazzermoooaaaaaaaa" | |
*Main Data.List> s <- readFile "Compressor.hs" | |
*Main Data.List> (8*) . length $ s | |
26728 | |
*Main Data.List> length . concatMap encode . compress s $ "" | |
10973 | |
*Main Data.List> s == (deflate . compress s $ "") | |
True | |
--} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment