Skip to content

Instantly share code, notes, and snippets.

@monoid
Created December 29, 2014 17:52
Show Gist options
  • Select an option

  • Save monoid/7e5bf172a4fd7192a556 to your computer and use it in GitHub Desktop.

Select an option

Save monoid/7e5bf172a4fd7192a556 to your computer and use it in GitHub Desktop.
Variable-lenght encoding
import Data.Bits
import qualified Data.ByteString as BS
import Data.Maybe
import Data.Word
import Data.List.Split -- main, для профилирования
{-
8-кодирование: число разбивается на группы по 7 бит, начиная с младших битов,
и записываются в файл. В последней группе 8-й бит установлен в 1 как признак
конца группы.
Правильно это называется Variable length quantity.
-}
eightBytes :: Maybe Int -> Maybe (Word8, Maybe Int)
eightBytes Nothing = Nothing
eightBytes (Just n) = if n < 0x80 then
Just (toEnum (n .|. 0x80), Nothing)
else
Just (toEnum (n .&. 0x7F), Just $ shiftR n 7)
packEight :: Int -> BS.ByteString
packEight = BS.unfoldr eightBytes . Just
-- v ++ [new_a] выглядит подозрительно даже с учётом ленивости.
uneightBytes :: ([Int], Int, Int) -> Word8 -> ([Int], Int, Int)
uneightBytes (parsed, acc, n) b =
if (b .&. 0x80) == 0 then
(parsed, new_acc, n + 7)
else
(parsed ++ [new_acc], 0, 0)
where new_acc = acc .|. ((fromEnum (b .&. 0x7F)) `shiftL` n)
unpackEights :: BS.ByteString -> [Int]
unpackEights bytes = values
where (values, a, n) = BS.foldl uneightBytes ([], 0, 0) bytes
-- TODO: if n > 0, raise error
uneight :: Word8 -> ([Int], Maybe Int) -> ([Int], Maybe Int)
uneight bt (parsed, acc) =
if (bt .&. 0x80) == 0 then
case acc of
Just n -> (parsed, Just $ (n `shiftL` 7) .|. fromEnum bt)
Nothing -> error "Incomplete 8bit sequence."
else
((maybeToList acc) ++ parsed, Just $ fromEnum (bt .&. 0x7F))
unpackEights' :: BS.ByteString -> [Int]
unpackEights' bytes = maybeToList acc ++ values
where (values, acc) = BS.foldr uneight ([], Nothing) bytes
unpackEights'' :: BS.ByteString -> [Int]
unpackEights'' bytes = uneight'' bytes 0 0 0 $ BS.length bytes
uneight'' :: BS.ByteString -> Int -> Int -> Int -> Int -> [Int]
uneight'' bytes !acc !n !pos !end
| (pos == end) = []
| otherwise = if (bt .&. 0x80) == 0 then
-- Хвостовая рекурсия...
(uneight'' bytes $! new_acc) (n+7) (pos+1) end
else
new_acc : uneight'' bytes 0 0 (pos + 1) end
where bt = BS.index bytes pos
new_acc = (acc .|. ((fromEnum (bt .&. 0x7F) `shiftL` n)))
main = do
let vals = map BS.concat $ chunksOf 1000 $ map packEight [0..100000000]
let unvals = concatMap (take 15 . unpackEights) vals
putStrLn $ show $ sum unvals
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment