Skip to content

Instantly share code, notes, and snippets.

@ScottPJones
Last active August 29, 2015 14:27
Show Gist options
  • Save ScottPJones/8feed7aa12f4ab25e76b to your computer and use it in GitHub Desktop.
Save ScottPJones/8feed7aa12f4ab25e76b to your computer and use it in GitHub Desktop.
Code to test performance of new pure Julia reverse(str::UTF8String), along with test results from my MacBook Pro
julia> dotest(1000000)
Length of string: 0
ASCII reverse: 0.056237 seconds (2.00 M allocations: 91.553 MB, 11.00% gc time)
UTF-8 oldreverse: 0.037658 seconds (2.00 M allocations: 91.553 MB, 7.40% gc time)
UTF-8 newreverse: 0.003620 seconds
UTF-16 reverse: 0.046076 seconds (2.00 M allocations: 91.553 MB, 8.44% gc time)
UTF-32 reverse: 0.054243 seconds (2.00 M allocations: 91.553 MB, 6.92% gc time)
0.142451 seconds (6.00 M allocations: 274.666 MB, 7.32% gc time)
Length of string: 1
ASCII reverse: 0.042259 seconds (2.00 M allocations: 91.553 MB, 7.25% gc time)
ASCII values:
UTF-8 oldreverse: 0.041105 seconds (2.00 M allocations: 91.553 MB, 9.24% gc time)
UTF-8 newreverse: 0.003491 seconds
UTF-16 reverse: 0.044414 seconds (2.00 M allocations: 91.553 MB, 6.95% gc time)
UTF-32 reverse: 0.048348 seconds (2.00 M allocations: 91.553 MB, 6.17% gc time)
Latin1 values:
UTF-8 oldreverse: 0.041937 seconds (2.00 M allocations: 91.553 MB, 9.69% gc time)
UTF-8 newreverse: 0.046335 seconds (2.00 M allocations: 91.553 MB, 7.21% gc time)
UTF-16 reverse: 0.045165 seconds (2.00 M allocations: 91.553 MB, 6.76% gc time)
UTF-32 reverse: 0.050539 seconds (2.00 M allocations: 91.553 MB, 6.23% gc time)
UCS2 values:
UTF-8 oldreverse: 0.039621 seconds (2.00 M allocations: 91.553 MB, 7.60% gc time)
UTF-8 newreverse: 0.045446 seconds (2.00 M allocations: 91.553 MB, 8.49% gc time)
UTF-16 reverse: 0.045310 seconds (2.00 M allocations: 91.553 MB, 6.72% gc time)
UTF-32 reverse: 0.047398 seconds (2.00 M allocations: 91.553 MB, 7.43% gc time)
Chars > 0xffff:
UTF-8 oldreverse: 0.045028 seconds (2.00 M allocations: 91.553 MB, 7.92% gc time)
UTF-8 newreverse: 0.043877 seconds (2.00 M allocations: 91.553 MB, 7.21% gc time)
UTF-16 reverse: 0.047826 seconds (2.00 M allocations: 91.553 MB, 6.64% gc time)
UTF-32 reverse: 0.056674 seconds (2.00 M allocations: 91.553 MB, 7.96% gc time)
Length of string: 4
ASCII reverse: 0.046714 seconds (2.00 M allocations: 91.553 MB, 6.93% gc time)
ASCII values:
UTF-8 oldreverse: 0.052762 seconds (2.00 M allocations: 91.553 MB, 7.88% gc time)
UTF-8 newreverse: 0.043381 seconds (2.00 M allocations: 91.553 MB, 7.98% gc time)
UTF-16 reverse: 0.046619 seconds (2.00 M allocations: 91.553 MB, 6.97% gc time)
UTF-32 reverse: 0.067789 seconds (2.00 M allocations: 106.812 MB, 13.26% gc time)
Latin1 values:
UTF-8 oldreverse: 0.047569 seconds (2.00 M allocations: 91.553 MB, 9.38% gc time)
UTF-8 newreverse: 0.046444 seconds (2.00 M allocations: 91.553 MB, 9.37% gc time)
UTF-16 reverse: 0.051302 seconds (2.00 M allocations: 91.553 MB, 7.80% gc time)
UTF-32 reverse: 0.060555 seconds (2.00 M allocations: 106.812 MB, 14.66% gc time)
UCS2 values:
UTF-8 oldreverse: 0.046441 seconds (2.00 M allocations: 91.553 MB, 8.19% gc time)
UTF-8 newreverse: 0.051044 seconds (2.00 M allocations: 91.553 MB, 7.14% gc time)
UTF-16 reverse: 0.047345 seconds (2.00 M allocations: 91.553 MB, 7.17% gc time)
UTF-32 reverse: 0.062245 seconds (2.00 M allocations: 106.812 MB, 13.33% gc time)
Chars > 0xffff:
UTF-8 oldreverse: 0.047050 seconds (2.00 M allocations: 91.553 MB, 8.19% gc time)
UTF-8 newreverse: 0.046330 seconds (2.00 M allocations: 91.553 MB, 9.19% gc time)
UTF-16 reverse: 0.051295 seconds (2.00 M allocations: 91.553 MB, 6.55% gc time)
UTF-32 reverse: 0.060346 seconds (2.00 M allocations: 106.812 MB, 13.86% gc time)
Length of string: 8
ASCII reverse: 0.053933 seconds (2.00 M allocations: 91.553 MB, 7.04% gc time)
ASCII values:
UTF-8 oldreverse: 0.050015 seconds (2.00 M allocations: 91.553 MB, 6.98% gc time)
UTF-8 newreverse: 0.044817 seconds (2.00 M allocations: 91.553 MB, 7.01% gc time)
UTF-16 reverse: 0.055804 seconds (2.00 M allocations: 106.812 MB, 14.63% gc time)
UTF-32 reverse: 0.087160 seconds (2.00 M allocations: 122.071 MB, 23.70% gc time)
Latin1 values:
UTF-8 oldreverse: 0.053177 seconds (2.00 M allocations: 91.553 MB, 7.72% gc time)
UTF-8 newreverse: 0.049747 seconds (2.00 M allocations: 91.553 MB, 6.83% gc time)
UTF-16 reverse: 0.059836 seconds (2.00 M allocations: 106.812 MB, 15.06% gc time)
UTF-32 reverse: 0.075300 seconds (2.00 M allocations: 122.071 MB, 22.69% gc time)
UCS2 values:
UTF-8 oldreverse: 0.047666 seconds (2.00 M allocations: 91.553 MB, 13.84% gc time)
UTF-8 newreverse: 0.054100 seconds (2.00 M allocations: 91.553 MB, 6.70% gc time)
UTF-16 reverse: 0.057785 seconds (2.00 M allocations: 106.812 MB, 14.92% gc time)
UTF-32 reverse: 0.078539 seconds (2.00 M allocations: 122.071 MB, 21.44% gc time)
Chars > 0xffff:
UTF-8 oldreverse: 0.047186 seconds (2.00 M allocations: 91.553 MB, 11.27% gc time)
UTF-8 newreverse: 0.045796 seconds (2.00 M allocations: 91.553 MB, 8.85% gc time)
UTF-16 reverse: 0.056153 seconds (2.00 M allocations: 106.812 MB, 14.88% gc time)
UTF-32 reverse: 0.071291 seconds (2.00 M allocations: 122.071 MB, 22.02% gc time)
Length of string: 16
ASCII reverse: 0.060178 seconds (2.00 M allocations: 106.812 MB, 15.36% gc time)
ASCII values:
UTF-8 oldreverse: 0.059139 seconds (2.00 M allocations: 106.812 MB, 13.68% gc time)
UTF-8 newreverse: 0.064110 seconds (2.00 M allocations: 106.812 MB, 14.58% gc time)
UTF-16 reverse: 0.082799 seconds (2.00 M allocations: 122.071 MB, 24.45% gc time)
UTF-32 reverse: 0.091677 seconds (2.00 M allocations: 152.588 MB, 27.74% gc time)
Latin1 values:
UTF-8 oldreverse: 0.066855 seconds (2.00 M allocations: 106.812 MB, 16.39% gc time)
UTF-8 newreverse: 0.066047 seconds (2.00 M allocations: 106.812 MB, 13.95% gc time)
UTF-16 reverse: 0.083560 seconds (2.00 M allocations: 122.071 MB, 24.83% gc time)
UTF-32 reverse: 0.089040 seconds (2.00 M allocations: 152.588 MB, 29.19% gc time)
UCS2 values:
UTF-8 oldreverse: 0.059305 seconds (2.00 M allocations: 106.812 MB, 14.27% gc time)
UTF-8 newreverse: 0.061206 seconds (2.00 M allocations: 106.812 MB, 10.40% gc time)
UTF-16 reverse: 0.088623 seconds (2.00 M allocations: 122.071 MB, 23.10% gc time)
UTF-32 reverse: 0.089503 seconds (2.00 M allocations: 152.588 MB, 30.00% gc time)
Chars > 0xffff:
UTF-8 oldreverse: 0.068810 seconds (2.00 M allocations: 106.812 MB, 17.35% gc time)
UTF-8 newreverse: 0.059569 seconds (2.00 M allocations: 106.812 MB, 14.30% gc time)
UTF-16 reverse: 0.081272 seconds (2.00 M allocations: 122.071 MB, 19.47% gc time)
UTF-32 reverse: 0.088320 seconds (2.00 M allocations: 152.588 MB, 26.54% gc time)
Length of string: 64
ASCII reverse: 0.128005 seconds (2.00 M allocations: 152.588 MB, 22.81% gc time)
ASCII values:
UTF-8 oldreverse: 0.128985 seconds (2.00 M allocations: 152.588 MB, 21.58% gc time)
UTF-8 newreverse: 0.142336 seconds (2.00 M allocations: 152.588 MB, 21.13% gc time)
UTF-16 reverse: 0.152780 seconds (2.00 M allocations: 213.623 MB, 26.39% gc time)
UTF-32 reverse: 0.170482 seconds (2.00 M allocations: 396.729 MB, 36.47% gc time)
Latin1 values:
UTF-8 oldreverse: 0.146011 seconds (2.00 M allocations: 152.588 MB, 19.97% gc time)
UTF-8 newreverse: 0.146322 seconds (2.00 M allocations: 152.588 MB, 19.15% gc time)
UTF-16 reverse: 0.146836 seconds (2.00 M allocations: 213.623 MB, 27.09% gc time)
UTF-32 reverse: 0.167517 seconds (2.00 M allocations: 396.729 MB, 35.38% gc time)
UCS2 values:
UTF-8 oldreverse: 0.133692 seconds (2.00 M allocations: 152.588 MB, 19.01% gc time)
UTF-8 newreverse: 0.141955 seconds (2.00 M allocations: 152.588 MB, 20.95% gc time)
UTF-16 reverse: 0.150047 seconds (2.00 M allocations: 213.623 MB, 28.45% gc time)
UTF-32 reverse: 0.169460 seconds (2.00 M allocations: 396.729 MB, 36.12% gc time)
Chars > 0xffff:
UTF-8 oldreverse: 0.144327 seconds (2.00 M allocations: 152.588 MB, 21.39% gc time)
UTF-8 newreverse: 0.168502 seconds (2.00 M allocations: 152.588 MB, 17.93% gc time)
UTF-16 reverse: 0.164903 seconds (2.00 M allocations: 213.623 MB, 27.13% gc time)
UTF-32 reverse: 0.192607 seconds (2.00 M allocations: 396.729 MB, 36.02% gc time)
Length of string: 256
ASCII reverse: 0.340366 seconds (2.00 M allocations: 396.729 MB, 22.01% gc time)
ASCII values:
UTF-8 oldreverse: 0.367536 seconds (2.00 M allocations: 396.729 MB, 21.02% gc time)
UTF-8 newreverse: 0.346424 seconds (2.00 M allocations: 396.729 MB, 21.23% gc time)
UTF-16 reverse: 0.415075 seconds (2.00 M allocations: 610.352 MB, 27.36% gc time)
UTF-32 reverse: 0.436749 seconds (2.00 M allocations: 1.103 GB, 39.53% gc time)
Latin1 values:
UTF-8 oldreverse: 0.426764 seconds (2.00 M allocations: 396.729 MB, 18.80% gc time)
UTF-8 newreverse: 0.424360 seconds (2.00 M allocations: 396.729 MB, 17.64% gc time)
UTF-16 reverse: 0.432054 seconds (2.00 M allocations: 610.352 MB, 27.65% gc time)
UTF-32 reverse: 0.405953 seconds (2.00 M allocations: 1.103 GB, 39.80% gc time)
UCS2 values:
UTF-8 oldreverse: 0.370457 seconds (2.00 M allocations: 396.729 MB, 21.41% gc time)
UTF-8 newreverse: 0.436571 seconds (2.00 M allocations: 396.729 MB, 17.26% gc time)
UTF-16 reverse: 0.423437 seconds (2.00 M allocations: 610.352 MB, 27.63% gc time)
UTF-32 reverse: 0.425747 seconds (2.00 M allocations: 1.103 GB, 39.11% gc time)
Chars > 0xffff:
UTF-8 oldreverse: 0.375635 seconds (2.00 M allocations: 396.729 MB, 19.48% gc time)
UTF-8 newreverse: 0.527642 seconds (2.00 M allocations: 396.729 MB, 14.54% gc time)
UTF-16 reverse: 0.454972 seconds (2.00 M allocations: 656.129 MB, 27.24% gc time)
UTF-32 reverse: 0.418369 seconds (2.00 M allocations: 1.103 GB, 40.37% gc time)
Length of string: 1024
ASCII reverse: 1.137994 seconds (2.00 M allocations: 1.103 GB, 18.84% gc time)
ASCII values:
UTF-8 oldreverse: 1.197267 seconds (2.00 M allocations: 1.103 GB, 17.88% gc time)
UTF-8 newreverse: 1.196598 seconds (2.00 M allocations: 1.103 GB, 18.38% gc time)
UTF-16 reverse: 1.413917 seconds (2.00 M allocations: 2.012 GB, 20.07% gc time)
UTF-32 reverse: 1.198975 seconds (2.00 M allocations: 3.919 GB, 27.13% gc time)
Latin1 values:
UTF-8 oldreverse: 1.390881 seconds (2.00 M allocations: 1.103 GB, 15.10% gc time)
UTF-8 newreverse: 1.545459 seconds (2.00 M allocations: 1.103 GB, 14.62% gc time)
UTF-16 reverse: 1.518551 seconds (2.00 M allocations: 2.012 GB, 21.30% gc time)
UTF-32 reverse: 1.242489 seconds (2.00 M allocations: 3.919 GB, 27.60% gc time)
UCS2 values:
UTF-8 oldreverse: 1.248608 seconds (2.00 M allocations: 1.177 GB, 18.47% gc time)
UTF-8 newreverse: 1.538900 seconds (2.00 M allocations: 1.177 GB, 14.86% gc time)
UTF-16 reverse: 1.453261 seconds (2.00 M allocations: 2.012 GB, 21.23% gc time)
UTF-32 reverse: 1.282385 seconds (2.00 M allocations: 3.919 GB, 27.68% gc time)
Chars > 0xffff:
UTF-8 oldreverse: 1.299516 seconds (2.00 M allocations: 1.281 GB, 18.30% gc time)
UTF-8 newreverse: 2.049694 seconds (2.00 M allocations: 1.281 GB, 11.97% gc time)
UTF-16 reverse: 1.456468 seconds (2.00 M allocations: 2.131 GB, 20.06% gc time)
UTF-32 reverse: 1.260085 seconds (2.00 M allocations: 4.157 GB, 26.16% gc time)
Length of string: 4096
ASCII reverse: 4.105720 seconds (2.00 M allocations: 3.919 GB, 12.61% gc time)
ASCII values:
UTF-8 oldreverse: 4.276641 seconds (2.00 M allocations: 3.919 GB, 12.21% gc time)
UTF-8 newreverse: 4.267139 seconds (2.00 M allocations: 3.919 GB, 12.28% gc time)
UTF-16 reverse: 4.728655 seconds (2.00 M allocations: 7.734 GB, 14.42% gc time)
UTF-32 reverse: 4.057931 seconds (3.00 M allocations: 15.348 GB, 24.56% gc time)
Latin1 values:
UTF-8 oldreverse: 5.540127 seconds (2.00 M allocations: 4.157 GB, 9.46% gc time)
UTF-8 newreverse: 5.604333 seconds (2.00 M allocations: 4.157 GB, 9.42% gc time)
UTF-16 reverse: 4.815122 seconds (2.00 M allocations: 7.734 GB, 14.70% gc time)
UTF-32 reverse: 4.165398 seconds (3.00 M allocations: 15.348 GB, 24.82% gc time)
UCS2 values:
UTF-8 oldreverse: 4.628135 seconds (2.00 M allocations: 4.396 GB, 11.86% gc time)
UTF-8 newreverse: 5.866495 seconds (2.00 M allocations: 4.396 GB, 9.44% gc time)
UTF-16 reverse: 4.537087 seconds (2.00 M allocations: 7.734 GB, 13.68% gc time)
UTF-32 reverse: 3.897756 seconds (3.00 M allocations: 15.348 GB, 24.05% gc time)
Chars > 0xffff:
UTF-8 oldreverse: 4.716892 seconds (2.00 M allocations: 4.634 GB, 12.02% gc time)
UTF-8 newreverse: 7.894531 seconds (2.00 M allocations: 4.634 GB, 7.21% gc time)
UTF-16 reverse: 5.014368 seconds (2.00 M allocations: 8.211 GB, 14.32% gc time)
UTF-32 reverse: 4.348015 seconds (3.00 M allocations: 16.302 GB, 24.73% gc time)
Length of string: 16384
ASCII reverse: 15.248985 seconds (3.00 M allocations: 15.348 GB, 10.29% gc time)
ASCII values:
UTF-8 oldreverse: 16.403243 seconds (3.00 M allocations: 15.348 GB, 10.08% gc time)
UTF-8 newreverse: 16.085859 seconds (3.00 M allocations: 15.348 GB, 9.38% gc time)
UTF-16 reverse: 16.989447 seconds (3.00 M allocations: 30.607 GB, 11.32% gc time)
UTF-32 reverse: 17.613437 seconds (3.00 M allocations: 61.125 GB, 22.62% gc time)
Latin1 values:
UTF-8 oldreverse: 22.172378 seconds (3.00 M allocations: 16.302 GB, 7.29% gc time)
UTF-8 newreverse: 21.677226 seconds (3.00 M allocations: 16.302 GB, 7.49% gc time)
UTF-16 reverse: 18.306207 seconds (3.00 M allocations: 30.607 GB, 12.79% gc time)
UTF-32 reverse: 16.821477 seconds (3.00 M allocations: 61.125 GB, 22.92% gc time)
UCS2 values:
UTF-8 oldreverse: 17.363041 seconds (3.00 M allocations: 17.256 GB, 9.44% gc time)
UTF-8 newreverse: 22.434713 seconds (3.00 M allocations: 17.256 GB, 7.24% gc time)
UTF-16 reverse: 17.906505 seconds (3.00 M allocations: 30.607 GB, 12.29% gc time)
UTF-32 reverse: 17.722014 seconds (3.00 M allocations: 61.125 GB, 23.17% gc time)
Chars > 0xffff:
UTF-8 oldreverse: 17.909246 seconds (3.00 M allocations: 18.209 GB, 8.86% gc time)
UTF-8 newreverse: 29.814591 seconds (3.00 M allocations: 18.209 GB, 5.34% gc time)
UTF-16 reverse: 17.806809 seconds (3.00 M allocations: 32.514 GB, 11.04% gc time)
UTF-32 reverse: 18.531684 seconds (3.00 M allocations: 64.939 GB, 23.12% gc time)
VERSION < v"0.4-" && (typealias AbstractString String)
@noinline throw_uni_error(msg, pos, ch) = throw(UnicodeError(msg, pos, ch))
function newreverse(s::UTF8String)
dat = s.data
n = length(dat)
n <= 1 && return s
buf = Vector{UInt8}(n)
out = n
pos = 1
@inbounds while out > 0
ch = dat[pos]
if ch > 0xdf
if ch < 0xf0
(out -= 3) < 0 && throw_uni_error(Base.UTF_ERR_SHORT, pos, ch)
buf[out + 1], buf[out + 2], buf[out + 3] = ch, dat[pos + 1], dat[pos + 2]
pos += 3
else
(out -= 4) < 0 && throw_uni_error(Base.UTF_ERR_SHORT, pos, ch)
buf[out+1], buf[out+2], buf[out+3], buf[out+4] = ch, dat[pos+1], dat[pos+2], dat[pos+3]
pos += 4
end
elseif ch > 0x7f
(out -= 2) < 0 && throw_uni_error(Base.UTF_ERR_SHORT, pos, ch)
buf[out + 1], buf[out + 2] = ch, dat[pos + 1]
pos += 2
else
buf[out] = ch
out -= 1
pos += 1
end
end
UTF8String(buf)
end
function oldreverse(s::UTF8String)
out = similar(s.data)
if ccall(:u8_reverse, Cint, (Ptr{UInt8}, Ptr{UInt8}, Csize_t),
out, s.data, length(out)) == 1
throw(UnicodeError(Base.UTF_ERR_INVALID_8,0,0))
end
UTF8String(out)
end
function tst{T <: AbstractString}(str::T, max::Int)
local out
for i=1:max
out = reverse(str)
end
out
end
function tstold{T <: AbstractString}(str::T, max::Int)
local out
for i=1:max
out = oldreverse(str)
end
out
end
function tstnew{T <: AbstractString}(str::T, max::Int)
local out
for i=1:max
out = newreverse(str)
end
out
end
function tstrev(n::Int, strUTF8::UTF8String)
print("UTF-8 oldreverse: ")
@time tstold(strUTF8,n)
print("UTF-8 newreverse: ")
@time tstnew(strUTF8,n)
print("UTF-16 reverse: ")
@time tst(utf16(strUTF8),n)
print("UTF-32 reverse: ")
@time tst(utf32(strUTF8),n)
println()
""
end
function tstreverse(n,strAscii,strA_UTF8,strL_UTF8,str3_UTF8,str4_UTF8)
println("Length of string: ",length(strAscii))
print("ASCII reverse: ")
@time tst(strAscii,n)
println("ASCII values:")
tstrev(n,strA_UTF8)
println("Latin1 values:")
tstrev(n,strL_UTF8)
println("UCS2 values:")
tstrev(n,str3_UTF8)
println("Chars > 0xffff:")
tstrev(n,str4_UTF8)
end
function dotest(n)
strA = "a"
strL = "\uff"
str3 = "\u7fff"
str4 = "\u7ffff"
ascstr = "bcdefghijklmnop"
println("Length of string: 0")
print("ASCII reverse: ")
@time tst("",n)
@time tstrev(n, utf8(""))
for extra in ("", ascstr[1:3], ascstr[1:7], ascstr)
tstreverse(n,strA*extra,utf8(strA*extra),strL*extra,str3*extra,str4*extra)
end
strA *= ascstr
strL *= ascstr
str3 *= ascstr
str4 *= ascstr
for i=1:5
strA ^= 4
strL ^= 4
str3 ^= 4
str4 ^= 4
tstreverse(n,strA,utf8(strA),strL,str3,str4)
end
end
""
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment