Created
August 8, 2016 02:58
-
-
Save c42f/9999dc6f9b63a9bd4ea4237a95876475 to your computer and use it in GitHub Desktop.
prototyping for sscanf in julia
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#include <stdio.h> | |
#include <stdlib.h> | |
int main() | |
{ | |
FILE* f = fopen("AUSGeoid09_GDA94_V1.01_DOV.txt", "r"); | |
if (!f || fscanf(f, "%*[^\n]\n") != 0) | |
return 1; | |
int hlen = 10; | |
int hindex = 0; | |
float* height = malloc(hlen*sizeof(float)); | |
while (1) | |
{ | |
float a1; | |
char a2; | |
int a3; | |
int a4; | |
float a5; | |
char a6; | |
int a7; | |
int a8; | |
float a9; | |
float a10; | |
float a11; | |
if (fscanf(f, "GEO %f %c %d %d %f %c%d %d %f %f %f\n", | |
&a1, &a2, &a3, &a4, &a5, &a6, &a7, &a8, &a9, &a10, &a11) != 11) | |
{ | |
break; | |
} | |
if (hindex >= hlen) | |
{ | |
hlen *= 2; | |
height = realloc(height, hlen*sizeof(float)); | |
} | |
height[hindex] = a1; | |
hindex += 1; | |
} | |
return 0; | |
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
module A | |
type ScanF{Types} | |
fmt::Vector{UInt8} | |
# TODO: Persistent Refs and other Buffers? | |
end | |
function ScanF(formatstr) | |
match_range = 1:1 | |
argtypes = Any[] | |
while true | |
match_range = search(formatstr, r"%[a-zA-Z]", last(match_range)) | |
if isempty(match_range) | |
break | |
end | |
fmtchar = formatstr[last(match_range)] | |
if fmtchar == 'f' | |
push!(argtypes, Cfloat) | |
elseif fmtchar == 'd' | |
push!(argtypes, Cint) | |
elseif fmtchar == 'c' | |
push!(argtypes, Cchar) | |
else | |
throw(ErrorException("Format spec $(formatstr[match_range]) not implemented")) | |
end | |
end | |
fmt = Vector{UInt8}(formatstr) | |
if fmt[end] != 0 | |
push!(fmt, 0) | |
end | |
ScanF{Tuple{argtypes...}}(fmt) | |
end | |
function Base.show{ArgTypes}(io::IO, scanf::ScanF{ArgTypes}) | |
formatstr = ASCIIString(scanf.fmt) | |
types = (ArgTypes.parameters...) | |
print(io, "ScanF(\"$formatstr\") -> $types") | |
end | |
macro scanf_str(fmt) | |
ScanF(fmt) | |
end | |
@generated function Base.read{ArgTypes}(io::IO, scanf::ScanF{ArgTypes}) | |
scantypes = (ArgTypes.parameters...) | |
nargs = length(scantypes) | |
args = [Symbol("arg$i") for i in 1:nargs] | |
sscanf_types = :((Cstring, Ptr{UInt8}, $([:(Ptr{$p}) for p in scantypes]...))) | |
argptrs = [:($(args[i]) = Ref{$(scantypes[i])}()) for i = 1:nargs] | |
results_dereferenced = [Expr(:ref, args[i]) for i=1:nargs] | |
quote | |
nargs = $(length(args)) | |
str = readline(io) | |
$(argptrs...) | |
result = ccall((:sscanf, :libc), Cint, $sscanf_types, str, scanf.fmt, $(args...)) | |
# FIXME | |
#if result != nargs | |
#Nullable{typeof(($(results_dereferenced...)...))}() | |
#error(string("Converted ", result, " arguments, expected ", nargs)) | |
#end | |
($(results_dereferenced...),) | |
end | |
end | |
export @scanf_str | |
end | |
#------------------------------------------------------------------------------ | |
# Base performance benchmark: simply read the whole file line by line, and | |
# discard everything. | |
function bench_readline() | |
open("AUSGeoid09_GDA94_V1.01_DOV.txt") do io | |
heights = Float64[] | |
while true | |
line = readline(io) | |
if isempty(line) | |
break | |
end | |
push!(heights, Int(line[1])) | |
end | |
heights | |
end | |
end | |
# Benchmark the prototype ScanF object above | |
function bench_scanf() | |
open("AUSGeoid09_GDA94_V1.01_DOV.txt") do io | |
heights = Float64[] | |
while !eof(io) | |
results = read(io, A.@scanf_str("GEO %f %c %d %d %f %c%d %d %f %f %f")) | |
push!(heights, results[1]) | |
end | |
heights | |
end | |
end | |
# Experimental readline! converted from base/io.jl. Very slow relative to | |
# readline(::IOStream), since the latter has magic optimization in jl_readuntil | |
# via the internal private streambuf in the C code :-( | |
function readline!(line::Vector{UInt8}, io::IO) | |
delim = UInt8('\n') | |
while !eof(io) | |
c = read(io, UInt8) | |
push!(line, c) | |
if c == delim | |
break | |
end | |
end | |
line | |
end | |
# Explicitly write out everything needed to do the ccall, so we can see what | |
# kind of performance is possible. | |
function bench_explicit() | |
open("AUSGeoid09_GDA94_V1.01_DOV.txt") do io | |
heights = Float64[] | |
fmt = Vector{UInt8}("GEO %f %c %d %d %f %c%d %d %f %f %f"); push!(fmt, '\0') | |
a1 = Ref{Cfloat}() | |
a2 = Ref{Cchar}() | |
a3 = Ref{Cint}() | |
a4 = Ref{Cint}() | |
a5 = Ref{Cfloat}() | |
a6 = Ref{Cchar}() | |
a7 = Ref{Cint}() | |
a8 = Ref{Cint}() | |
a9 = Ref{Cfloat}() | |
a10 = Ref{Cfloat}() | |
a11 = Ref{Cfloat}() | |
while true | |
# Note calling readline!() here to avoid allocating the line buffer | |
# every time is a big performance hole (see readline!() | |
# implementation above). | |
line = readline(io) | |
if isempty(line) | |
break | |
end | |
result = ccall((:sscanf, :libc), Cint, | |
(Cstring, Ptr{UInt8}, Ptr{Cfloat}, Ptr{Cchar}, Ptr{Cint}, Ptr{Cint}, Ptr{Cfloat}, Ptr{Cchar}, Ptr{Cint}, Ptr{Cint}, Ptr{Cfloat}, Ptr{Cfloat}, Ptr{Cfloat}), | |
line, fmt, a1, a2, a3, a4, a5, a6, a7, a8, a9, a10, a11 | |
) | |
push!(heights, a1[]) | |
end | |
heights | |
end | |
end | |
if !isfile("AUSGeoid09_GDA94_V1.01_DOV.txt") | |
println("Test data available at ftp://ftp.ga.gov.au/geodesy-outgoing/gravity/ausgeoid/AUSGeoid09_V1.01/AUSGeoid09_GDA94_V1.01_DOV.zip") | |
else | |
println("bench_readline() - read and discard lines") | |
@time bench_readline() | |
println("bench_scanf() - parse lines with prototype scanf wrapper") | |
@time bench_scanf() | |
println("bench_explicit() - parse lines with bare ccall") | |
@time bench_explicit() | |
end | |
nothing | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment