Created
January 4, 2018 07:24
-
-
Save adituv/d1df4059f8409cdc567032c520053e38 to your computer and use it in GitHub Desktop.
Text to Foundation's String
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
{-# LANGUAGE BangPatterns #-} | |
{-# LANGUAGE NoImplicitPrelude #-} | |
{-# LANGUAGE TypeFamilies #-} | |
module Foundation.Interop.Text where | |
import Foundation | |
import Foundation.Array | |
import Foundation.Bits | |
import Foundation.Collection | |
import Foundation.Primitive | |
import Foundation.String | |
import Control.Monad.ST | |
import qualified Data.Text as T | |
import qualified Data.Text.Array as A | |
import qualified Data.Text.Internal as T | |
type ErrMsg = String | |
-- I'm pretty sure errors should never happen assuming text's | |
-- implementation is correct. Maybe do an "unsafe" version? | |
fromText :: T.Text -> Either ErrMsg String | |
fromText (T.Text bytes off len) = do | |
array <- runST $ build len (copyBytes off (off + len)) | |
let (str, vf, _) = fromBytes UTF16 array | |
case vf of | |
Just failure -> Left $ show failure | |
Nothing -> pure () | |
pure str | |
where | |
copyBytes :: Int -> Int -> Builder (UArray Word8) (MUArray Word8) Word8 (ST s) String () | |
copyBytes !n !max' | |
| n >= max' = pure () | |
| otherwise = do | |
let x = A.unsafeIndex bytes n | |
append $ integralDownsize (x .&. 0xFF) | |
append $ integralDownsize (x `shiftR` 8) | |
copyBytes (n+1) max' |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
Welp, this is an order of magnitude slower than
fromList . unpack
. Guess I'm sticking to the naive way - it's definitely fast enough.(X-axis is execution time/ms. The third benchmark is roughly one paragraph of text and takes 14us naively or ~280us with this code.)