lashtear · August 1, 2016 22:12
diff --git a/Creepsay.hs b/Creepsay.hs
 {-# OPTIONS -Wall #-}
 module Creepsay where

 -- ^ This file has some random scribblings for Screeps Diplomacy
 -- protocol discussion.  Most of the things in here are not directly
 -- of use, but might help for assembling reference examples, once we
 -- figure out how the various layers should work.
 --
 -- The 62k suffix entries are "base62k" encoding, i.e. using all of
 -- the UCS2 space except for the 0xd800-0xdfff region historically
 -- reserved for surrogate pairs.  This allows the ten "character"
 -- creep message to convey about 159.54 bits; (log base 2 of
 -- (62*1024)^10).  Most people find this too big a pain to bother
 -- with.
 --
 -- The 32k suffix entries are for the raw "base32k" encoding, where
 -- each 10 char packet covers a simple 150 bits of data.
 --
 -- The 18B suffix entries are for the next proposal, essentially 32k
 -- but with each frame limited to 144bits (exactly 18 bytes) and
 -- leaving the 6 reserved bits zero.

 import qualified Data.ByteString    as B
 import           Data.List
 import qualified Data.Text          as T
 import qualified Data.Text.Encoding as E
 import           Data.Word          (Word8)

 -- for playing with compression
 -- import qualified Codec.Compression.Zlib.Raw as Z
 -- import qualified Data.ByteString.Lazy       as LB

 -- | Not used, but allows for easy examples with compressed streams
 compress :: B.ByteString -> B.ByteString
 compress = id -- LB.toStrict . Z.compress . LB.fromStrict

 -- | Not used, but allows for easy examples with compressed streams
 decompress :: B.ByteString -> B.ByteString
 decompress = id -- LB.toStrict . Z.decompress . LB.fromStrict

 -- | Remap avoiding surrogate pair.
 encodePiece62k :: Int -> Int
 encodePiece62k n
  | n >= 0x0000 && n < 0xd800 = n
  | n >= 0xd800 && n < 0xf800 = n + 0x0800
  | True = undefined

 -- | Inverse of encodePiece
 decodePiece62k :: Int -> Int
 decodePiece62k n
  | n >= 0x0000 && n < 0xd800 = n
  | n >= 0xe000 && n <= 0xffff = n - 0x0800
  | True = undefined

 -- | Map an arbitrary precision integer to a sequence of encodable
 -- pieces.  Note that the divisor is not a power of two, so this
 -- results in fractional numbers of bits per output word.
 encodeNum62k :: Integer -> [Int]
 encodeNum62k n
  | n == 0 = []
  | True =
    case n `quotRem` 0xf800 of
      (n',r) -> (encodePiece62k $ fromInteger r):(encodeNum62k n')

 -- | Inverse of encodeNum62k
 decodeNum62k :: [Int] -> Integer
 decodeNum62k ps = foldr (\p acc->acc*0xf800+(fromIntegral $ decodePiece62k p)) 0 ps

 -- | Map an arbitrary precision integer to a sequence of 15bit words.
 encodeNum32k :: Integer -> [Int]
 encodeNum32k n
  | n == 0 = []
  | True =
    case n `quotRem` 32768 of
      (n',r) -> (fromInteger r):(encodeNum32k n')

 -- | Map an arbitrary precision integer to a sequence of 15bit words.
 decodeNum32k :: [Int] -> Integer
 decodeNum32k ps = foldr (\p acc->acc*32768+(fromIntegral p)) 0 ps

 -- | Map a Word8 list to an arbitrary precision integer.
 listToInteger :: [Word8] -> Integer
 listToInteger = foldr (\n acc->(fromIntegral n)+acc*256) 0

 -- | Map an arbitrary precision integer to a Word8 list.
 integerToList :: Integer -> [Word8]
 integerToList = unfoldr (\n->if n == 0
                            then Nothing
                            else case n `quotRem` 256 of
                                   (q,r) -> Just ((fromIntegral r), q))

 -- | Break a list into sublists of n pieces.
 chunk :: Int -> [a] -> [[a]]
 chunk n = unfoldr (\x -> case splitAt n x of
                          ([],[]) -> Nothing
                          (xn,xs) -> Just (xn,xs))

 encodeString62k :: String -> [[Int]]
 encodeString62k =
  (chunk 10) . encodeNum62k . listToInteger . B.unpack . compress . E.encodeUtf8 . T.pack

 encodeString32k :: String -> [[Int]]
 encodeString32k =
  (chunk 10) . encodeNum32k . listToInteger . B.unpack . compress . E.encodeUtf8 . T.pack

 decodeString62k :: [[Int]] -> String
 decodeString62k =
  T.unpack . E.decodeUtf8 . decompress . B.pack . integerToList . decodeNum62k . concat

 decodeString32k :: [[Int]] -> String
 decodeString32k =
  T.unpack . E.decodeUtf8 . decompress . B.pack . integerToList . decodeNum32k . concat

 encodeString18B :: String -> [[Int]]
 encodeString18B =
  (map (encodeNum32k . listToInteger)) . (chunk 18) . B.unpack . compress . E.encodeUtf8 . T.pack

 decodeString18B :: [[Int]] -> String
 decodeString18B =
  T.unpack . E.decodeUtf8 . decompress . B.pack . concatMap (integerToList . decodeNum32k)

 -- *Main> encodeString62k "This is a test of longer, multi-tick messages."
 -- [[8276,41399,53787,16551,11573,21382,15878,25184,7257,59183],
 --  [18515,53732,48523,18842,58308,33984,61606,27609,15210,33060],
 --  [48195,47858,23909]]
 -- *Main> encodeString32k "This is a test of longer, multi-tick messages."
 -- [[26708,26322,9345,923,1554,11908,7385,4154,26223,22592],
 --  [14781,11067,18214,9221,7515,14902,11625,21224,11661,26883],
 --  [13910,11886,23000,14770,46]]
 -- *Main> encodeString18B "This is a test of longer, multi-tick messages."
 --  [[26708,26322,9345,923,1554,11908,7385,4154,26223,64],
 --   [28524,20188,18837,355,22226,3470,23133,14870,25449,214],
 --   [27936,26314,1485,11067,26422,5]]
 --
 -- And of course decode* inverts to the original string.
	{-# OPTIONS -Wall #-}
	module Creepsay where

	-- ^ This file has some random scribblings for Screeps Diplomacy
	-- protocol discussion. Most of the things in here are not directly
	-- of use, but might help for assembling reference examples, once we
	-- figure out how the various layers should work.
	--
	-- The 62k suffix entries are "base62k" encoding, i.e. using all of
	-- the UCS2 space except for the 0xd800-0xdfff region historically
	-- reserved for surrogate pairs. This allows the ten "character"
	-- creep message to convey about 159.54 bits; (log base 2 of
	-- (62*1024)^10). Most people find this too big a pain to bother
	-- with.
	--
	-- The 32k suffix entries are for the raw "base32k" encoding, where
	-- each 10 char packet covers a simple 150 bits of data.
	--
	-- The 18B suffix entries are for the next proposal, essentially 32k
	-- but with each frame limited to 144bits (exactly 18 bytes) and
	-- leaving the 6 reserved bits zero.

	import qualified Data.ByteString as B
	import Data.List
	import qualified Data.Text as T
	import qualified Data.Text.Encoding as E
	import Data.Word (Word8)

	-- for playing with compression
	-- import qualified Codec.Compression.Zlib.Raw as Z
	-- import qualified Data.ByteString.Lazy as LB

	-- \| Not used, but allows for easy examples with compressed streams
	compress :: B.ByteString -> B.ByteString
	compress = id -- LB.toStrict . Z.compress . LB.fromStrict

	-- \| Not used, but allows for easy examples with compressed streams
	decompress :: B.ByteString -> B.ByteString
	decompress = id -- LB.toStrict . Z.decompress . LB.fromStrict

	-- \| Remap avoiding surrogate pair.
	encodePiece62k :: Int -> Int
	encodePiece62k n
	\| n >= 0x0000 && n < 0xd800 = n
	\| n >= 0xd800 && n < 0xf800 = n + 0x0800
	\| True = undefined

	-- \| Inverse of encodePiece
	decodePiece62k :: Int -> Int
	decodePiece62k n
	\| n >= 0x0000 && n < 0xd800 = n
	\| n >= 0xe000 && n <= 0xffff = n - 0x0800
	\| True = undefined

	-- \| Map an arbitrary precision integer to a sequence of encodable
	-- pieces. Note that the divisor is not a power of two, so this
	-- results in fractional numbers of bits per output word.
	encodeNum62k :: Integer -> [Int]
	encodeNum62k n
	\| n == 0 = []
	\| True =
	case n `quotRem` 0xf800 of
	(n',r) -> (encodePiece62k $ fromInteger r):(encodeNum62k n')

	-- \| Inverse of encodeNum62k
	decodeNum62k :: [Int] -> Integer
	decodeNum62k ps = foldr (\p acc->acc*0xf800+(fromIntegral $ decodePiece62k p)) 0 ps

	-- \| Map an arbitrary precision integer to a sequence of 15bit words.
	encodeNum32k :: Integer -> [Int]
	encodeNum32k n
	\| n == 0 = []
	\| True =
	case n `quotRem` 32768 of
	(n',r) -> (fromInteger r):(encodeNum32k n')

	-- \| Map an arbitrary precision integer to a sequence of 15bit words.
	decodeNum32k :: [Int] -> Integer
	decodeNum32k ps = foldr (\p acc->acc*32768+(fromIntegral p)) 0 ps

	-- \| Map a Word8 list to an arbitrary precision integer.
	listToInteger :: [Word8] -> Integer
	listToInteger = foldr (\n acc->(fromIntegral n)+acc*256) 0

	-- \| Map an arbitrary precision integer to a Word8 list.
	integerToList :: Integer -> [Word8]
	integerToList = unfoldr (\n->if n == 0
	then Nothing
	else case n `quotRem` 256 of
	(q,r) -> Just ((fromIntegral r), q))

	-- \| Break a list into sublists of n pieces.
	chunk :: Int -> [a] -> [[a]]
	chunk n = unfoldr (\x -> case splitAt n x of
	([],[]) -> Nothing
	(xn,xs) -> Just (xn,xs))

	encodeString62k :: String -> [[Int]]
	encodeString62k =
	(chunk 10) . encodeNum62k . listToInteger . B.unpack . compress . E.encodeUtf8 . T.pack

	encodeString32k :: String -> [[Int]]
	encodeString32k =
	(chunk 10) . encodeNum32k . listToInteger . B.unpack . compress . E.encodeUtf8 . T.pack

	decodeString62k :: [[Int]] -> String
	decodeString62k =
	T.unpack . E.decodeUtf8 . decompress . B.pack . integerToList . decodeNum62k . concat

	decodeString32k :: [[Int]] -> String
	decodeString32k =
	T.unpack . E.decodeUtf8 . decompress . B.pack . integerToList . decodeNum32k . concat

	encodeString18B :: String -> [[Int]]
	encodeString18B =
	(map (encodeNum32k . listToInteger)) . (chunk 18) . B.unpack . compress . E.encodeUtf8 . T.pack

	decodeString18B :: [[Int]] -> String
	decodeString18B =
	T.unpack . E.decodeUtf8 . decompress . B.pack . concatMap (integerToList . decodeNum32k)

	-- *Main> encodeString62k "This is a test of longer, multi-tick messages."
	-- [[8276,41399,53787,16551,11573,21382,15878,25184,7257,59183],
	-- [18515,53732,48523,18842,58308,33984,61606,27609,15210,33060],
	-- [48195,47858,23909]]
	-- *Main> encodeString32k "This is a test of longer, multi-tick messages."
	-- [[26708,26322,9345,923,1554,11908,7385,4154,26223,22592],
	-- [14781,11067,18214,9221,7515,14902,11625,21224,11661,26883],
	-- [13910,11886,23000,14770,46]]
	-- *Main> encodeString18B "This is a test of longer, multi-tick messages."
	-- [[26708,26322,9345,923,1554,11908,7385,4154,26223,64],
	-- [28524,20188,18837,355,22226,3470,23133,14870,25449,214],
	-- [27936,26314,1485,11067,26422,5]]
	--
	-- And of course decode* inverts to the original string.