Skip to content

Instantly share code, notes, and snippets.

@orchid-hybrid
Created November 7, 2014 23:32
Show Gist options
  • Save orchid-hybrid/1f6db4bdb2683a820b62 to your computer and use it in GitHub Desktop.
Save orchid-hybrid/1f6db4bdb2683a820b62 to your computer and use it in GitHub Desktop.
Compressor.hs
import Data.List
import Data.Ord
import Data.Bits
import Data.Char
data DInst
= Emit Char
| Backref Int Int
deriving (Eq, Show)
encode :: DInst -> [Bool]
encode (Emit c) = True : toBinary 8 (ord c)
encode (Backref i l) = [False] ++ concatMap escape (toBinary' i) ++ [False, True]
++ concatMap escape (toBinary' l) ++ [False, True]
where escape True = [True]
escape False = [False, False]
toBinary len n = map ((>0) . (n .&.)) . takeWhile (<= n) . map (2^) $ [0..len-1]
toBinary' n = map ((>0) . (n .&.)) . takeWhile (<= n) . map (2^) $ [0..]
deflate :: [DInst] -> String
deflate instructions = result where
result = go 0 instructions result
go _ [] _ = ""
go i (Emit c:rest) r = c : go (i+1) rest r
go i (Backref idx len:rest) r | i >= idx + len = ref ++ go (i + len) rest r
where ref = take len . drop idx $ r
go _ _ _ = error "invalid backref"
{-
-- Run length encoding for free from int-e
deflate :: [DInst] -> String
deflate instructions = result where
result = go 0 instructions result
go _ [] _ = ""
go i (Emit c:rest) r = c : go (i+1) rest r
go i (Backref idx len:rest) r | idx > 0 && idx <= i = ref ++ go (i + len) rest r
where ref = take len . drop (i-idx) $ r
go _ _ _ = error "invalid backref"
test1 = deflate [Emit 'f', Emit 'o', Emit 'o', Backref 3 3]
test2 = deflate [Emit 'f', Emit 'o', Backref 1 4]
test3 = deflate [Emit 'f', Emit 'o', Emit 'o', Backref 0 1, Emit 'x', Emit 'x', Emit 'x']
--}
test1 = deflate [Emit 'f', Emit 'o', Emit 'o', Backref 0 3]
test2 = deflate [Emit 'f', Emit 'o', Backref 1 6]
compress [] l = []
compress (x:xs) l = case findBackref x xs l of
Nothing -> Emit x : compress xs (updateBackrefs l x)
Just ((idx, len), (l', xs')) -> Backref idx len : compress xs' l'
findBackref x xs l = evaluateBackrefs
. sortBy (flip (comparing snd))
. map (\(a,b) -> (a,startingWith (x:xs) b))
. filter ((== x) . head . snd)
. filter (not . null . snd)
. zip [0..]
. tails $ l
where startingWith [] _ = 0
startingWith (x:xs) (y:ys) | x == y = 1 + startingWith xs ys
startingWith _ _ = 0
evaluateBackrefs [] = Nothing
evaluateBackrefs ((i,len):_) | len >= 3 = Just ((i,len), (l ++ take len (x:xs), drop len (x:xs)))
evaluateBackrefs _ = Nothing
updateBackrefs l x = l ++ [x]
{-
*Main Data.List> let s = "foofoobaaaaaaaaaaaaaaaaaaaaaaaaaarbazzerbazzerbazzerbazzerbazzermoooaaaaaaaa"
*Main Data.List> s
"foofoobaaaaaaaaaaaaaaaaaaaaaaaaaarbazzerbazzerbazzerbazzerbazzermoooaaaaaaaa"
*Main Data.List> deflate . compress s $ ""
"foofoobaaaaaaaaaaaaaaaaaaaaaaaaaarbazzerbazzerbazzerbazzerbazzermoooaaaaaaaa"
*Main Data.List> (deflate . compress s $ "") == s
True
*Main Data.List> compress s $ ""
[Emit 'f',Emit 'o',Emit 'o',Backref 0 3,Emit 'b',Emit 'a',Emit 'a',Emit 'a',Backref 7 3,Backref 7 6,Backref 7 12,Emit 'a',Emit 'a',Emit 'r',Emit 'b',Emit 'a',Emit 'z',Emit 'z',Emit 'e',Backref 33 6,Backref 33 12,Backref 33 7,Emit 'm',Emit 'o',Emit 'o',Emit 'o',Backref 7 8]
-}
{--
*Main Data.List> length . concatMap encode . compress s $ ""
268
*Main Data.List> (8*) . length $ s
608
*Main Data.List> s
"foofoobaaaaaaaaaaaaaaaaaaaaaaaaaarbazzerbazzerbazzerbazzerbazzermoooaaaaaaaa"
*Main Data.List> s <- readFile "Compressor.hs"
*Main Data.List> (8*) . length $ s
26728
*Main Data.List> length . concatMap encode . compress s $ ""
10973
*Main Data.List> s == (deflate . compress s $ "")
True
--}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment