Created
February 13, 2014 17:29
-
-
Save michaelt/8979818 to your computer and use it in GitHub Desktop.
streamUtf8 tests
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
{-#LANGUAGE ScopedTypeVariables#-} | |
import Test.QuickCheck hiding ((.&.)) | |
import Test.Framework (Test, testGroup, defaultMain) | |
import Test.Framework.Providers.QuickCheck2 (testProperty) | |
import Debug.Trace (trace) | |
import Control.Exception (SomeException, evaluate, try) | |
import System.IO.Unsafe (unsafePerformIO) | |
import Data.Bits ((.&.)) | |
import Data.Char (chr) | |
import Data.String | |
import Data.Char (chr) | |
import Data.List (intersperse) | |
import Control.Monad | |
import Data.Either (partitionEithers) | |
import qualified Data.ByteString as B | |
import qualified Data.Text as T | |
import qualified Data.Text.Encoding as E | |
import Data.Text.StreamDecoding | |
main :: IO () | |
main = defaultMain [tests] | |
-- >>> :main -a 10000 -t badmiddle | |
tests = testGroup "stream_utf8" [ | |
testProperty "t_utf8_stream_badmiddle" t_utf8_stream_badmiddle, | |
testProperty "t_utf8_stream_badend" t_utf8_stream_badend, | |
testProperty "t_utf8_stream_badstart" t_utf8_stream_badstart] | |
(<>) = B.append | |
chunk n bs = let (a,b) = B.splitAt n bs in if B.null a then [] else a : chunk n b | |
space = intersperse B.empty | |
setup = do | |
Positive n <- arbitrary | |
Positive k <- arbitrary | |
Positive u <- arbitrary | |
txt <- genUnicode :: Gen T.Text | |
let chunkSize = mod n 7 + 1 :: Int | |
vecSize = mod k 7 + 1 :: Int | |
spaces = foldr (.) id (take (mod u 2) (repeat space)) | |
return (spaces, txt, chunkSize, vecSize) | |
t_utf8_stream_badend = do | |
(spaces, txt, chunkSize, vecSize) <- setup | |
forAll (vector vecSize) $ | |
(roundtrip' . spaces . chunk chunkSize . appendBytes txt) | |
`eq` (appendBytes txt) | |
where appendBytes txt bts = E.encodeUtf8 txt <> B.pack bts | |
t_utf8_stream_badmiddle = do | |
(spaces, txt, chunkSize, vecSize) <- setup | |
forAll (vector vecSize) $ | |
(roundtrip' . spaces . chunk chunkSize . insertBytes txt) | |
`eq` (insertBytes txt) | |
where insertBytes txt bts = let n = T.length txt | |
(a,b) = T.splitAt n txt | |
in E.encodeUtf8 a <> B.pack bts <> E.encodeUtf8 b | |
t_utf8_stream_badstart = do | |
(spaces, txt, chunkSize, vecSize) <- setup | |
forAll (vector vecSize) $ | |
(roundtrip' . spaces . chunk chunkSize . affixBytes txt) | |
`eq` (affixBytes txt) | |
where affixBytes txt bts = B.pack bts <> E.encodeUtf8 txt | |
roundtrip' :: [B.ByteString] -> B.ByteString | |
roundtrip' bss = let (ts,bs) = twolists bss in B.concat $ map E.encodeUtf8 ts ++ bs | |
where | |
twolists :: [B.ByteString] -> ([T.Text],[B.ByteString]) | |
twolists = partitionEithers . mark streamUtf8 where | |
mark dec [] = case dec B.empty of | |
DecodeResultSuccess t dec' -> [] | |
DecodeResultFailure t bs' -> Left t : Right bs': [] | |
mark dec (bs:bss) = case dec bs of | |
DecodeResultSuccess t dec' -> Left t : mark dec' bss | |
DecodeResultFailure t bs' -> Left t : map Right (bs':bss) | |
roundtrip :: [B.ByteString] -> B.ByteString | |
roundtrip bss = go streamUtf8 B.empty bss where | |
go dec acc [] = case dec B.empty of | |
DecodeResultSuccess t dec' -> acc | |
DecodeResultFailure t bs' -> acc <> bs' | |
go dec acc (bs:bss) = case dec bs of | |
DecodeResultSuccess t dec' -> go dec' (acc <> E.encodeUtf8 t) bss | |
DecodeResultFailure t bs' -> acc <> E.encodeUtf8 t <> bs' <> B.concat bss | |
-- Helpers from the `text` test suite | |
-- Ensure that two potentially bottom values (in the sense of crashing | |
-- for some inputs, not looping infinitely) either both crash, or both | |
-- give comparable results for some input. | |
(=^=) :: (Eq a, Show a) => a -> a -> Bool | |
i =^= j = unsafePerformIO $ do | |
x <- try (evaluate i) | |
y <- try (evaluate j) | |
case (x,y) of | |
(Left (_ :: SomeException), Left (_ :: SomeException)) | |
-> return True | |
(Right a, Right b) -> return (a == b) | |
e -> trace ("*** Divergence: " ++ show e) return False | |
infix 4 =^= | |
{-# NOINLINE (=^=) #-} | |
-- Do two functions give the same answer? | |
eq :: (Eq a, Show a) => (t -> a) -> (t -> a) -> t -> Bool | |
eq a b s = a s =^= b s | |
instance Arbitrary B.ByteString where | |
arbitrary = B.pack `fmap` arbitrary | |
genUnicode :: IsString a => Gen a | |
genUnicode = fmap fromString string where | |
string = sized $ \n -> | |
do k <- choose (0,n) | |
sequence [ char | _ <- [1..k] ] | |
excluding :: [a -> Bool] -> Gen a -> Gen a | |
excluding bad gen = loop | |
where | |
loop = do | |
x <- gen | |
if or (map ($ x) bad) | |
then loop | |
else return x | |
reserved = [lowSurrogate, highSurrogate, noncharacter] | |
lowSurrogate c = c >= 0xDC00 && c <= 0xDFFF | |
highSurrogate c = c >= 0xD800 && c <= 0xDBFF | |
noncharacter c = masked == 0xFFFE || masked == 0xFFFF | |
where | |
masked = c .&. 0xFFFF | |
ascii = choose (0,0x7F) | |
plane0 = choose (0xF0, 0xFFFF) | |
plane1 = oneof [ choose (0x10000, 0x10FFF) | |
, choose (0x11000, 0x11FFF) | |
, choose (0x12000, 0x12FFF) | |
, choose (0x13000, 0x13FFF) | |
, choose (0x1D000, 0x1DFFF) | |
, choose (0x1F000, 0x1FFFF) | |
] | |
plane2 = oneof [ choose (0x20000, 0x20FFF) | |
, choose (0x21000, 0x21FFF) | |
, choose (0x22000, 0x22FFF) | |
, choose (0x23000, 0x23FFF) | |
, choose (0x24000, 0x24FFF) | |
, choose (0x25000, 0x25FFF) | |
, choose (0x26000, 0x26FFF) | |
, choose (0x27000, 0x27FFF) | |
, choose (0x28000, 0x28FFF) | |
, choose (0x29000, 0x29FFF) | |
, choose (0x2A000, 0x2AFFF) | |
, choose (0x2B000, 0x2BFFF) | |
, choose (0x2F000, 0x2FFFF) | |
] | |
plane14 = choose (0xE0000, 0xE0FFF) | |
planes = [ascii, plane0, plane1, plane2, plane14] | |
char = chr `fmap` excluding reserved (oneof planes) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment