Last active
August 29, 2015 14:21
-
-
Save ScottPJones/4e6e8938f0559998f9fc to your computer and use it in GitHub Desktop.
Tests for string conversions
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# issue #11004 (#10959) | |
module TestConvert | |
global counttest = 0 | |
global counterr = 0 | |
incrtest() = (global counttest += 1) | |
increrr() = (global counterr += 1) | |
T = "" | |
byt = 0x0 | |
macro testn(nam, arg) | |
quote | |
incrtest() | |
if !eval($(esc(arg))) | |
increrr() | |
print("Fail: ") | |
else | |
print("Pass: "); | |
end | |
println($nam) | |
end | |
end | |
macro test_throws(err, nam, arg) | |
quote | |
incrtest() | |
try | |
eval($(esc(arg))) | |
increrr() | |
print("Fail: ") | |
catch x; | |
print("Pass: "); | |
end | |
println($nam) | |
end | |
end | |
# Create some ASCII, UTF8, UTF16, and UTF32 strings | |
strAscii = "abcdefgh" | |
strA_8 = ("abcdefgh\uff")[1:8] | |
strL_8 = "abcdef\uff\uff" | |
str2_8 = "abcd\uff\uff\u7ff\u7ff" | |
str3_8 = "abcd\uff\uff\u7fff\u7fff" | |
str4_8 = "abcd\uff\u7ff\u7fff\U7ffff" | |
strS_8 = UTF8String(b"abcd\xc3\xbf\xdf\xbf\xe7\xbf\xbf\xed\xa0\x80\xed\xb0\x80") | |
strC_8 = UTF8String(b"abcd\xc3\xbf\xdf\xbf\xe7\xbf\xbf\U10000") | |
strZ_8 = UTF8String(b"abcd\xc3\xbf\xdf\xbf\xe7\xbf\xbf\xc0\x80") | |
strz_8 = UTF8String(b"abcd\xc3\xbf\xdf\xbf\xe7\xbf\xbf\0") | |
strA_16 = utf16(strA_8) | |
strL_16 = utf16(strL_8) | |
str2_16 = utf16(str2_8) | |
str3_16 = utf16(str3_8) | |
str4_16 = utf16(str4_8) | |
strS_16 = utf16(strS_8) | |
strA_32 = utf32(strA_8) | |
strL_32 = utf32(strL_8) | |
str2_32 = utf32(str2_8) | |
str3_32 = utf32(str3_8) | |
str4_32 = utf32(str4_8) | |
strS_32 = utf32(strS_8) | |
@testn "utf8(strAscii) == strAscii" utf8(strAscii) == strAscii | |
@testn "utf16(strAscii) == strAscii" utf16(strAscii) == strAscii | |
@testn "utf32(strAscii) == strAscii" utf32(strAscii) == strAscii | |
for (strUTF8, strUTF16, strUTF32) in ((strA_8, strA_16, strA_32), | |
(strL_8, strL_16, strL_32), | |
(str2_8, str2_16, str2_32), | |
(str3_8, str3_16, str3_32), | |
(str4_8, str4_16, str4_32)) | |
println("strUTF8 = $strUTF8, strUTF16 = $strUTF16, strUTF32 = $strUTF32") | |
@testn "utf16(strUTF8) == strUTF16" utf16(strUTF8) == strUTF16 | |
@testn "utf32(strUTF8) == strUTF32" utf32(strUTF8) == strUTF32 | |
@testn "utf8(strUTF16) == strUTF8" utf8(strUTF16) == strUTF8 | |
@testn "utf32(strUTF16) == strUTF32" utf32(strUTF16) == strUTF32 | |
@testn "utf8(strUTF32) == strUTF8" utf8(strUTF32) == strUTF8 | |
@testn "utf16(strUTF32) == strUTF16" utf16(strUTF32) == strUTF16 | |
end | |
# Test converting surrogate pairs | |
@testn "utf16(strS_8) == strC_8" utf16(strS_8) == strC_8 | |
@testn "utf32(strS_8) == strC_8" utf32(strS_8) == strC_8 | |
@testn "utf8(strS_16) == strC_8" utf8(strS_16) == strC_8 | |
@testn "utf32(strS_16) == strC_8" utf32(strS_16) == strC_8 | |
@testn "utf8(strS_32) == strC_8" utf8(strS_32) == strC_8 | |
@testn "utf16(strS_32) == strC_8" utf16(strS_32) == strC_8 | |
# Test converting overlong \0 | |
# currently broken! (in utf8.jl) | |
@testn "utf8(strZ_8) == strz_8" utf8(strZ_8) == strz_8 | |
@testn "utf16(strZ_8) == strz_8" utf16(strZ_8) == strz_8 | |
@testn "utf32(strZ_8) == strz_8" utf32(strZ_8) == strz_8 | |
# Test invalid sequences | |
for T in (UTF8String, UTF16String, UTF32String) | |
try | |
# Continuation byte not after lead | |
for byt in 0x80:0xbf | |
@test_throws ArgumentError "convert($T, UTF8String(UInt8[0x$(hex(byt))]))" convert(T, UTF8String(UInt8[byt])) | |
end | |
# Test lead bytes | |
for byt in 0xc0:0xff | |
# Single lead byte at end of string | |
@test_throws ArgumentError "convert($T, UTF8String(UInt8[0x$(hex(byt))]))" convert(T, UTF8String(UInt8[byt])) | |
# Lead followed by non-continuation character < 0x80 | |
@test_throws ArgumentError "convert($T, UTF8String(UInt8[0x$(hex(byt)),0]))" convert(T, UTF8String(UInt8[byt,0])) | |
# Lead followed by non-continuation character > 0xbf | |
@test_throws ArgumentError "convert($T, UTF8String(UInt8[0x$(hex(byt)),0xc0]))" convert(T, UTF8String(UInt8[byt,0xc0])) | |
end | |
# Test overlong 2-byte | |
for byt in 0x81:0xbf | |
@test_throws ArgumentError "convert($T, UTF8String(UInt8[0xc0,0x$(hex(byt))]))" convert(T, UTF8String(UInt8[0xc0,byt])) | |
end | |
for byt in 0x80:0xbf | |
@test_throws ArgumentError "convert($T, UTF8String(UInt8[0xc1,0x$(hex(byt))]))" convert(T, UTF8String(UInt8[0xc1,byt])) | |
end | |
# Test overlong 3-byte | |
for byt in 0x80:0x9f | |
@test_throws ArgumentError "convert($T, UTF8String(UInt8[0xe0,0x$(hex(byt)),0x80]))" convert(T, UTF8String(UInt8[0xe0,byt,0x80])) | |
end | |
# Test overlong 4-byte | |
for byt in 0x80:0x8f | |
@test_throws ArgumentError "convert($T, UTF8String(UInt8[0xef,0x$(hex(byt)),0x80,0x80]))" convert(T, UTF8String(UInt8[0xef,byt,0x80,0x80])) | |
end | |
# Test 4-byte > 0x10ffff | |
for byt in 0x90:0xbf | |
@test_throws ArgumentError "convert($T, UTF8String(UInt8[0xf4,0x$(hex(byt)),0x80,0x80]))" convert(T, UTF8String(UInt8[0xf4,byt,0x80,0x80])) | |
end | |
for byt in 0xf5:0xf7 | |
@test_throws ArgumentError "convert($T, UTF8String(UInt8[0x$(hex(byt)),0x80,0x80,0x80]))" convert(T, UTF8String(UInt8[byt,0x80,0x80,0x80])) | |
end | |
# Test 5-byte | |
for byt in 0xf8:0xfb | |
@test_throws ArgumentError "convert($T, UTF8String(UInt8[0x$(hex(byt)),0x80,0x80,0x80,0x80]))" convert(T, UTF8String(UInt8[byt,0x80,0x80,0x80,0x80])) | |
end | |
# Test 6-byte | |
for byt in 0xfc:0xfd | |
@test_throws ArgumentError "convert($T, UTF8String(UInt8[0x$(hex(byt)),0x80,0x80,0x80,0x80,0x80]))" convert(T, UTF8String(UInt8[byt,0x80,0x80,0x80,0x80,0x80])) | |
end | |
# Test 7-byte | |
@test_throws ArgumentError "convert($T, UTF8String(UInt8[0xfe,0x80,0x80,0x80,0x80,0x80,0x80]))" convert(T, UTF8String(UInt8[0xfe,0x80,0x80,0x80,0x80,0x80,0x80])) | |
# Three and above byte sequences | |
for byt in 0xe0:0xef | |
# Lead followed by only 1 continuation byte | |
@test_throws ArgumentError "convert($T, UTF8String(UInt8[0x$(hex(byt)),0x80]))" convert(T, UTF8String(UInt8[byt,0x80])) | |
# Lead ended by non-continuation character < 0x80 | |
@test_throws ArgumentError "convert($T, UTF8String(UInt8[0x$(hex(byt)),0x80,0]))" convert(T, UTF8String(UInt8[byt,0x80,0])) | |
# Lead ended by non-continuation character > 0xbf | |
@test_throws ArgumentError "convert($T, UTF8String(UInt8[0x$(hex(byt)),0x80,0xc0]))" convert(T, UTF8String(UInt8[byt,0x80,0xc0])) | |
end | |
# 3-byte encoded surrogate character(s) | |
# Single surrogate | |
@test_throws ArgumentError "convert($T, UTF8String(UInt8[0xed,0xa0,0x80]))" convert(T, UTF8String(UInt8[0xed,0xa0,0x80])) | |
# Not followed by surrogate | |
@test_throws ArgumentError "convert($T, UTF8String(UInt8[0xed,0xa0,0x80,0xed,0x80,0x80]))" convert(T, UTF8String(UInt8[0xed,0xa0,0x80,0xed,0x80,0x80])) | |
# Trailing surrogate first | |
@test_throws ArgumentError "convert($T, UTF8String(UInt8[0xed,0xb0,0x80,0xed,0xb0,0x80]))" convert(T, UTF8String(UInt8[0xed,0xb0,0x80,0xed,0xb0,0x80])) | |
# Followed by lead surrogate | |
@test_throws ArgumentError "convert($T, UTF8String(UInt8[0xed,0xa0,0x80,0xed,0xa0,0x80]))" convert(T, UTF8String(UInt8[0xed,0xa0,0x80,0xed,0xa0,0x80])) | |
# Four byte sequences | |
for byt in 0xf0:0xf4 | |
# Lead followed by only 2 continuation bytes | |
@test_throws ArgumentError "convert($T, UTF8String(UInt8[0x$(hex(byt)),0x80,0x80]))" convert(T, UTF8String(UInt8[byt,0x80,0x80])) | |
# Lead followed by non-continuation character < 0x80 | |
@test_throws ArgumentError "convert($T, UTF8String(UInt8[0x$(hex(byt)),0x80,0x80,0]))" convert(T, UTF8String(UInt8[byt,0x80,0x80,0])) | |
# Lead followed by non-continuation character > 0xbf | |
@test_throws ArgumentError "convert($T, UTF8String(UInt8[0x$(hex(byt)),0x80,0x80,0xc0]))" convert(T, UTF8String(UInt8[byt,0x80,0x80,0xc0])) | |
end | |
catch exp ; | |
println("Error checking $T: 0x$(hex(byt))") | |
throw(exp) | |
end | |
end | |
println("Out of $(TestConvert.counttest) tests, $(TestConvert.counterr) failed") | |
end |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment