Created
April 17, 2019 22:19
-
-
Save quinnj/712821c3718ac434af466d5775a238c6 to your computer and use it in GitHub Desktop.
Argument type perf difference
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
module Foo | |
delimlen(delim::UInt8) = 1 | |
delimlen(delim::Tuple{Ptr{UInt8}, Int}) = delim[2] | |
checkdelim(delim::UInt8, b, buf, pos, eof) = delim == b | |
@inline function checkdelim(delim::Tuple{Ptr{UInt8}, Int}, b, buf, pos, eof) | |
startptr = pointer(buf, pos) | |
if pos + delim[2] <= eof | |
match = memcmp(startptr, delim[1], delim[2]) | |
if match | |
return true | |
end | |
end | |
return false | |
end | |
overflowval(::Type{T}) where {T <: Integer} = div(typemax(T) - T(9), T(10)) | |
const ReturnCode = Int16 | |
ok(x::ReturnCode) = x > 0 | |
const SUCCESS = 0b0000000000000000 % ReturnCode | |
const INVALID = 0b1000000000000000 % ReturnCode | |
# success flags | |
const OK = 0b0000000000000001 % ReturnCode | |
const SENTINEL = 0b0000000000000010 % ReturnCode | |
# property flags | |
const QUOTED = 0b0000000000000100 % ReturnCode | |
const DELIMITED = 0b0000000000001000 % ReturnCode | |
const NEWLINE = 0b0000000000010000 % ReturnCode | |
const EOF = 0b0000000000100000 % ReturnCode | |
const ESCAPED_STRING = 0b0000001000000000 % ReturnCode | |
# invalid flags | |
const INVALID_QUOTED_FIELD = 0b1000000001000000 % ReturnCode | |
const INVALID_DELIMITER = 0b1000000010000000 % ReturnCode | |
const OVERFLOW = 0b1000000100000000 % ReturnCode | |
@inline function defaultparser(::Type{T}, buf, pos, eof, sentinel, wh1::UInt8, wh2::UInt8, oq::UInt8, cq::UInt8, e::UInt8, ignorerepeated::Bool, delim::Union{UInt8, Tuple{Ptr{UInt8}, Int}}) where {T <: Integer} | |
startpos = pos | |
code = SUCCESS | |
x = zero(T) | |
neg = false | |
quoted = false | |
sentinelpos = 0 | |
if pos >= eof | |
code = (sentinel === missing ? SENTINEL : INVALID) | EOF | |
@goto donedone | |
end | |
@inbounds b = buf[pos] | |
# strip leading whitespace | |
while b == wh1 || b == wh2 | |
pos += 1 | |
if pos == eof | |
code = INVALID | EOF | |
@goto donedone | |
end | |
@inbounds b = buf[pos] | |
end | |
# check for start of quoted field | |
quoted = b == oq | |
if quoted | |
code = QUOTED | |
pos += 1 | |
if pos == eof | |
code |= INVALID_QUOTED_FIELD | |
@goto donedone | |
end | |
@inbounds b = buf[pos] | |
# ignore whitespace within quoted field | |
while b == wh1 || b == wh2 | |
pos += 1 | |
if pos == eof | |
code |= INVALID_QUOTED_FIELD | EOF | |
@goto donedone | |
end | |
@inbounds b = buf[pos] | |
end | |
end | |
# check for sentinel values if applicable | |
if sentinel !== nothing && sentinel !== missing | |
startptr = pointer(buf, pos) | |
# sentinel is an iterable of Tuple{Ptr{UInt8}, Int}, sorted from longest sentinel string to shortest | |
for (ptr, len) in sentinel | |
if pos + len <= eof | |
match = memcmp(startptr, ptr, len) | |
if match | |
sentinelpos = pos + len | |
break | |
end | |
end | |
end | |
end | |
# start actual int parsing | |
neg = b == UInt8('-') | |
pos += neg || b == UInt8('+') | |
if pos == eof | |
# eof after stripping whitespace & maybe parsing '-' or '+' | |
if sentinel === missing && pos == startpos | |
# if we haven't moved any chars, then we count it as a zero-width sentinel match | |
code |= SENTINEL | EOF | |
elseif sentinel !== nothing && sentinel !== missing && sentinelpos > 0 | |
# if we matched a sentinel value | |
pos = sentinelpos | |
code |= SENTINEL | EOF | |
else | |
# otherwise, it's just an invalid value | |
code |= INVALID | EOF | |
end | |
if quoted | |
# if we detected a quote character, it's an invalid quoted field due to eof in the middle | |
code |= INVALID_QUOTED_FIELD | |
end | |
@goto donedone | |
end | |
@inbounds b = buf[pos] - UInt8('0') | |
if b > 0x09 | |
# character isn't a digit, check for sentinels, otherwise INVALID value | |
if sentinel === missing && pos == startpos | |
code |= SENTINEL | |
elseif sentinel !== nothing && sentinel !== missing && sentinelpos > 0 | |
pos = sentinelpos | |
code |= SENTINEL | |
else | |
code |= INVALID | |
end | |
@goto donevalue | |
end | |
while true | |
x = T(10) * x + b | |
pos += 1 | |
if pos == eof | |
x = ifelse(neg, -x, x) | |
# if we matched a sentinel and it's length is more than the # of digits we parsed | |
# we mark SENTINEL (>= length), otherwise mark it as OK value | |
if sentinel !== nothing && sentinel !== missing && sentinelpos >= pos | |
pos = sentinelpos | |
code |= SENTINEL | EOF | |
else | |
code |= OK | EOF | |
end | |
if quoted | |
# but if we're inside a quoted field, that's invalid | |
code |= INVALID_QUOTED_FIELD | |
end | |
@goto donedone | |
end | |
@inbounds b = buf[pos] - UInt8('0') | |
if b > 0x09 | |
# detected a non-digit, time to bail on value parsing | |
if sentinel !== nothing && sentinel !== missing && sentinelpos >= pos | |
pos = sentinelpos | |
code |= SENTINEL | |
else | |
code |= OK | |
end | |
x = ifelse(neg, -x, x) | |
@goto donevalue | |
end | |
x > overflowval(T) && break | |
end | |
# extra loop because we got too close to overflowing while parsing digits | |
while true | |
x, ov_mul = Base.mul_with_overflow(x, T(10)) | |
x, ov_add = Base.add_with_overflow(x, T(b)) | |
if ov_mul | ov_add | |
# we overflowed, check for valid sentinel, otherwise mark as OVERFLOW | |
if sentinel !== nothing && sentinel !== missing && sentinelpos >= pos | |
pos = sentinelpos | |
code |= SENTINEL | |
else | |
code |= OVERFLOW | |
end | |
@goto donevalue | |
end | |
pos += 1 | |
if pos == eof | |
if sentinel !== nothing && sentinel !== missing && sentinelpos >= pos | |
pos = sentinelpos | |
code |= SENTINEL | EOF | |
else | |
code |= OK | EOF | |
end | |
x = ifelse(neg, -x, x) | |
if quoted | |
code |= INVALID_QUOTED_FIELD | |
end | |
@goto donedone | |
end | |
@inbounds b = buf[pos] - UInt8('0') | |
if b > 0x09 | |
if sentinel !== nothing && sentinel !== missing && sentinelpos >= pos | |
pos = sentinelpos | |
code |= SENTINEL | |
else | |
code |= OK | |
end | |
x = ifelse(neg, -x, x) | |
@goto donevalue | |
end | |
end | |
@label donevalue | |
# donevalue means we finished parsing a value or sentinel, but didn't reach eof, b is still the current byte | |
# strip trailing whitespace | |
while b == wh1 || b == wh2 | |
pos += 1 | |
if pos == eof | |
code |= EOF | |
@goto donedone | |
end | |
@inbounds b = buf[pos] | |
end | |
# for quoted fields, find the closing quote character | |
# we should be positioned at the correct place to find the closing quote character if everything is as it should be | |
# if we don't find the quote character immediately, something's wrong, so mark INVALID | |
if quoted | |
same = cq == e | |
first = true | |
while true | |
pos += 1 | |
if same && b == e | |
if pos == eof | |
code |= EOF | |
if !first | |
code |= INVALID | |
end | |
@goto donedone | |
elseif buf[pos] != cq | |
if !first | |
code |= INVALID | |
end | |
break | |
end | |
pos += 1 | |
elseif b == e | |
if pos == eof | |
code |= INVALID_QUOTED_FIELD | EOF | |
@goto donedone | |
end | |
pos += 1 | |
elseif b == cq | |
if !first | |
code |= INVALID | |
end | |
break | |
end | |
if pos == eof | |
code |= INVALID_QUOTED_FIELD | EOF | |
@goto donedone | |
end | |
first = false | |
@inbounds b = buf[pos] | |
end | |
@inbounds b = buf[pos] | |
# ignore whitespace after quoted field | |
while b == wh1 || b == wh2 | |
pos += 1 | |
if pos == eof | |
code |= EOF | |
@goto donedone | |
end | |
@inbounds b = buf[pos] | |
end | |
end | |
# now we check for a delimiter; if we don't find it, keep parsing until we do | |
if !ignorerepeated | |
if checkdelim(delim, b, buf, pos, eof) | |
# found the delimiter we were looking for | |
code |= DELIMITED | |
@goto donedone | |
end | |
else | |
matched = false | |
while checkdelim(delim, b, buf, pos, eof) | |
matched = true | |
pos += delimlen(delim) | |
if pos >= eof | |
code |= EOF | |
break | |
end | |
@inbounds b = buf[pos] | |
end | |
if matched | |
code |= DELIMITED | |
@goto donedone | |
end | |
end | |
# didn't find delimiter, but let's check for a newline character | |
if b == UInt8('\n') | |
code |= NEWLINE | ifelse(pos + 1 == eof, EOF, SUCCESS) | |
@goto donedone | |
elseif b == UInt8('\r') | |
if buf[pos + 1] == UInt8('\n') | |
pos += 1 | |
end | |
code |= NEWLINE | ifelse(pos + 1 == eof, EOF, SUCCESS) | |
@goto donedone | |
end | |
# didn't find delimiter or newline, so we're invalid, keep parsing until we find delimiter, newline, or eof | |
while true | |
pos += 1 | |
if pos == eof | |
code |= EOF | INVALID_DELIMITER | |
end | |
@inbounds b = buf[pos] | |
if !ignorerepeated | |
if checkdelim(delim, b, buf, pos, eof) | |
# found the delimiter we were looking for | |
code |= DELIMITED | INVALID_DELIMITER | |
@goto donedone | |
end | |
else | |
matched = false | |
while checkdelim(delim, b, buf, pos, eof) | |
matched = true | |
pos += delimlen(delim) | |
if pos >= eof | |
code |= EOF | |
break | |
end | |
@inbounds b = buf[pos] | |
end | |
if matched | |
code |= DELIMITED | INVALID_DELIMITER | |
@goto donedone | |
end | |
end | |
# didn't find delimiter, but let's check for a newline character | |
if b == UInt8('\n') | |
code |= NEWLINE | ifelse(pos + 1 == eof, EOF, SUCCESS) | |
@goto donedone | |
elseif b == UInt8('\r') | |
if buf[pos + 1] == UInt8('\n') | |
pos += 1 | |
end | |
code |= NEWLINE | ifelse(pos + 1 == eof, EOF, SUCCESS) | |
@goto donedone | |
end | |
end | |
@label donedone | |
return x, code, pos - startpos | |
end | |
end # module |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment