Created
November 8, 2013 21:22
-
-
Save michaelt/7377887 to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
{-#LANGUAGE LambdaCase#-} | |
import qualified Pipes.ByteString as BP | |
import Pipes | |
import Pipes.Parse | |
import qualified Data.ByteString as B | |
import qualified Data.Text as T | |
import Data.Text (Text(..)) | |
import qualified Data.Text.IO as T | |
import Data.Text.Encoding | |
import Data.Text.Encoding.Error | |
main = runEffect $ for (splits a' >-> decode) (lift . T.putStr) | |
a = encodeUtf8 $ T.pack $ "你好世界" | |
a' = encodeUtf8 $ T.pack $ "你好" | |
sing = B.singleton | |
decode :: Pipe B.ByteString Text IO r | |
decode = decodeWith lenient | |
lenient _ (Just 228) = Just 'X' | |
lenient _ (Just 229) = Just 'Y' | |
lenient _ _ = Just '?' | |
decodeWith :: OnDecodeError | |
-> Pipe B.ByteString Text IO r | |
decodeWith onErr = go 1 (streamDecodeUtf8With onErr) | |
where go n dec = do chunk <- await | |
case dec chunk of | |
Some text l dec' -> | |
do lift $ do putChar '\n' | |
putStr "Chunk " | |
print n | |
putStr "Input Bytestring: " | |
print chunk | |
putStr "Text output as ByteString: " | |
print (encodeUtf8 text) | |
putStr "ByteString leftover: " | |
print l | |
yield text | |
go (n+1) dec' | |
splits bs = go len | |
where len = B.length bs | |
go 0 = return () | |
go n = do let (a,b) = B.splitAt (len - n) bs | |
yield a | |
yield b | |
go (n-1) | |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Chunk 1 | |
Input Bytestring: "" | |
Text output as ByteString: "" | |
ByteString leftover: "" | |
Chunk 2 | |
Input Bytestring: "\228\189\160\229\165\189" | |
Text output as ByteString: "\228\189\160\229\165\189" | |
ByteString leftover: "" | |
你好 | |
Chunk 3 | |
Input Bytestring: "\228" | |
Text output as ByteString: "" | |
ByteString leftover: "\228" | |
Chunk 4 | |
Input Bytestring: "\189\160\229\165\189" | |
Text output as ByteString: "\228\189\160\229\165\189" | |
ByteString leftover: "" | |
你好 | |
Chunk 5 | |
Input Bytestring: "\228\189" | |
Text output as ByteString: "" | |
ByteString leftover: "\228\189" | |
Chunk 6 | |
Input Bytestring: "\160\229\165\189" | |
Text output as ByteString: "\228\189\160\229\165\189" | |
ByteString leftover: "" | |
你好 | |
Chunk 7 | |
Input Bytestring: "\228\189\160" | |
Text output as ByteString: "\228\189\160" | |
ByteString leftover: "" | |
你 | |
Chunk 8 | |
Input Bytestring: "\229\165\189" | |
Text output as ByteString: "\229\165\189" | |
ByteString leftover: "" | |
好 | |
Chunk 9 | |
Input Bytestring: "\228\189\160\229" | |
Text output as ByteString: "\228\189\160" | |
ByteString leftover: "\229" | |
你 | |
Chunk 10 | |
Input Bytestring: "\165\189" | |
Text output as ByteString: "\229\165\189" | |
ByteString leftover: "" | |
好 | |
Chunk 11 | |
Input Bytestring: "\228\189\160\229\165" | |
Text output as ByteString: "\228\189\160" | |
ByteString leftover: "\229\165" | |
你 | |
Chunk 12 | |
Input Bytestring: "\189" | |
Text output as ByteString: "\229\165\189" | |
ByteString leftover: "" | |
好 |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment