Skip to content

Instantly share code, notes, and snippets.

@hvr
Created August 5, 2013 09:15
Show Gist options
  • Save hvr/6154533 to your computer and use it in GitHub Desktop.
Save hvr/6154533 to your computer and use it in GitHub Desktop.
Compact heap representation a `ByteString` can be converted to/from.
{-# LANGUAGE MagicHash #-}
module Data.ByteString.Raw (RawByteString, empty, fromByteString, toByteString) where
import qualified Data.ByteString as B
import Data.ByteString.Internal
import GHC.Prim
import GHC.ForeignPtr
import GHC.Types
import System.IO.Unsafe (unsafePerformIO)
-- |Compact heap representation a 'ByteString' can be converted to/from.
--
-- This data type depends on the 'ByteString' type to be useful but has
-- a different cost-model.
--
-- This representation avoids the 'ForeignPtr' indirection, and the
-- offset/length slice representation for shared 'ByteString', and is
-- therefore suitable if you need to store many small strings in a
-- data records or for use as keys in container types. On the other
-- hand, string operations on 'RawByteString' would require
-- reallocations, and thus are not supported. If you need to perform
-- such operations convert and operate on 'ByteString's instead.
--
-- This structure can supports @{-# UNPACK -#}@, and then only has an
-- overhead of 3 words (beyond the word-padded storage of the
-- byte-string payload), as it's basically just a pointer to a
-- 'MutableByteArray#'. In contrast, a single non-shared unpacked
-- 'ByteString' field exhibits an overhead of 8 words.
--
-- As an optimization, all zero-length strings are mapped to the
-- singleton value 'empty'.
data RawByteString = RBS !(MutableByteArray# RealWorld)
-- |Singleton value the 'empty' 'ByteString' is mapped to/from.
empty :: RawByteString
empty = unsafePerformIO $ do
(ForeignPtr _ (PlainPtr mbarr#)) <- mallocPlainForeignPtrBytes 0
return $! RBS mbarr#
{-# NOINLINE empty #-}
-- |Extract 'RawByteString' from 'ByteString'
--
-- If possible, the internally used 'MutableByteArray#' is shared with
-- the original 'ByteString'. However, if necessary, a compact copy of
-- the 'ByteString' will be created via 'B.copy'.
fromByteString :: ByteString -> RawByteString
fromByteString bs@(PS _ _ 0) = empty
fromByteString bs@(PS (ForeignPtr addr (PlainPtr mbarr#)) 0 l)
| neAddr# addr' addr = error "internal error" -- optional sanity check
| l' == l = RBS mbarr#
| otherwise = fromByteString (B.copy bs) -- we assume this doesn't lead to
where
l' = I# (sizeofMutableByteArray# mbarr#)
addr' = byteArrayContents# (unsafeCoerce# mbarr#)
{-# INLINE fromByteString #-}
-- |Convert a 'RawByteString' back into a 'ByteString'.
--
-- This wraps the 'RawByteString' into a 'ForeignPtr'
toByteString :: RawByteString -> ByteString
toByteString (RBS mbarr#) | l == 0 = B.empty
| otherwise = PS fp 0 l
where
l = I# (sizeofMutableByteArray# mbarr#)
addr = byteArrayContents# (unsafeCoerce# mbarr#)
fp = ForeignPtr addr (PlainPtr mbarr#)
{-# INLINE toByteString #-}
-- TODO: implement direct version
instance Eq RawByteString where
x == y = toByteString x == toByteString y
-- for convenience
instance Show RawByteString where
showsPrec p rbs = showsPrec p (toByteString rbs)
instance Read RawByteString where
readsPrec p str = [ (fromByteString x, y) | (x, y) <- readsPrec p str ]
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment