Created
August 5, 2013 09:15
-
-
Save hvr/6154533 to your computer and use it in GitHub Desktop.
Compact heap representation a `ByteString` can be converted to/from.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
{-# LANGUAGE MagicHash #-} | |
module Data.ByteString.Raw (RawByteString, empty, fromByteString, toByteString) where | |
import qualified Data.ByteString as B | |
import Data.ByteString.Internal | |
import GHC.Prim | |
import GHC.ForeignPtr | |
import GHC.Types | |
import System.IO.Unsafe (unsafePerformIO) | |
-- |Compact heap representation a 'ByteString' can be converted to/from. | |
-- | |
-- This data type depends on the 'ByteString' type to be useful but has | |
-- a different cost-model. | |
-- | |
-- This representation avoids the 'ForeignPtr' indirection, and the | |
-- offset/length slice representation for shared 'ByteString', and is | |
-- therefore suitable if you need to store many small strings in a | |
-- data records or for use as keys in container types. On the other | |
-- hand, string operations on 'RawByteString' would require | |
-- reallocations, and thus are not supported. If you need to perform | |
-- such operations convert and operate on 'ByteString's instead. | |
-- | |
-- This structure can supports @{-# UNPACK -#}@, and then only has an | |
-- overhead of 3 words (beyond the word-padded storage of the | |
-- byte-string payload), as it's basically just a pointer to a | |
-- 'MutableByteArray#'. In contrast, a single non-shared unpacked | |
-- 'ByteString' field exhibits an overhead of 8 words. | |
-- | |
-- As an optimization, all zero-length strings are mapped to the | |
-- singleton value 'empty'. | |
data RawByteString = RBS !(MutableByteArray# RealWorld) | |
-- |Singleton value the 'empty' 'ByteString' is mapped to/from. | |
empty :: RawByteString | |
empty = unsafePerformIO $ do | |
(ForeignPtr _ (PlainPtr mbarr#)) <- mallocPlainForeignPtrBytes 0 | |
return $! RBS mbarr# | |
{-# NOINLINE empty #-} | |
-- |Extract 'RawByteString' from 'ByteString' | |
-- | |
-- If possible, the internally used 'MutableByteArray#' is shared with | |
-- the original 'ByteString'. However, if necessary, a compact copy of | |
-- the 'ByteString' will be created via 'B.copy'. | |
fromByteString :: ByteString -> RawByteString | |
fromByteString bs@(PS _ _ 0) = empty | |
fromByteString bs@(PS (ForeignPtr addr (PlainPtr mbarr#)) 0 l) | |
| neAddr# addr' addr = error "internal error" -- optional sanity check | |
| l' == l = RBS mbarr# | |
| otherwise = fromByteString (B.copy bs) -- we assume this doesn't lead to | |
where | |
l' = I# (sizeofMutableByteArray# mbarr#) | |
addr' = byteArrayContents# (unsafeCoerce# mbarr#) | |
{-# INLINE fromByteString #-} | |
-- |Convert a 'RawByteString' back into a 'ByteString'. | |
-- | |
-- This wraps the 'RawByteString' into a 'ForeignPtr' | |
toByteString :: RawByteString -> ByteString | |
toByteString (RBS mbarr#) | l == 0 = B.empty | |
| otherwise = PS fp 0 l | |
where | |
l = I# (sizeofMutableByteArray# mbarr#) | |
addr = byteArrayContents# (unsafeCoerce# mbarr#) | |
fp = ForeignPtr addr (PlainPtr mbarr#) | |
{-# INLINE toByteString #-} | |
-- TODO: implement direct version | |
instance Eq RawByteString where | |
x == y = toByteString x == toByteString y | |
-- for convenience | |
instance Show RawByteString where | |
showsPrec p rbs = showsPrec p (toByteString rbs) | |
instance Read RawByteString where | |
readsPrec p str = [ (fromByteString x, y) | (x, y) <- readsPrec p str ] |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment