Skip to content

Instantly share code, notes, and snippets.

@c42f
Created August 8, 2016 02:58
Show Gist options
  • Save c42f/9999dc6f9b63a9bd4ea4237a95876475 to your computer and use it in GitHub Desktop.
Save c42f/9999dc6f9b63a9bd4ea4237a95876475 to your computer and use it in GitHub Desktop.
prototyping for sscanf in julia
#include <stdio.h>
#include <stdlib.h>
int main()
{
FILE* f = fopen("AUSGeoid09_GDA94_V1.01_DOV.txt", "r");
if (!f || fscanf(f, "%*[^\n]\n") != 0)
return 1;
int hlen = 10;
int hindex = 0;
float* height = malloc(hlen*sizeof(float));
while (1)
{
float a1;
char a2;
int a3;
int a4;
float a5;
char a6;
int a7;
int a8;
float a9;
float a10;
float a11;
if (fscanf(f, "GEO %f %c %d %d %f %c%d %d %f %f %f\n",
&a1, &a2, &a3, &a4, &a5, &a6, &a7, &a8, &a9, &a10, &a11) != 11)
{
break;
}
if (hindex >= hlen)
{
hlen *= 2;
height = realloc(height, hlen*sizeof(float));
}
height[hindex] = a1;
hindex += 1;
}
return 0;
}
module A
type ScanF{Types}
fmt::Vector{UInt8}
# TODO: Persistent Refs and other Buffers?
end
function ScanF(formatstr)
match_range = 1:1
argtypes = Any[]
while true
match_range = search(formatstr, r"%[a-zA-Z]", last(match_range))
if isempty(match_range)
break
end
fmtchar = formatstr[last(match_range)]
if fmtchar == 'f'
push!(argtypes, Cfloat)
elseif fmtchar == 'd'
push!(argtypes, Cint)
elseif fmtchar == 'c'
push!(argtypes, Cchar)
else
throw(ErrorException("Format spec $(formatstr[match_range]) not implemented"))
end
end
fmt = Vector{UInt8}(formatstr)
if fmt[end] != 0
push!(fmt, 0)
end
ScanF{Tuple{argtypes...}}(fmt)
end
function Base.show{ArgTypes}(io::IO, scanf::ScanF{ArgTypes})
formatstr = ASCIIString(scanf.fmt)
types = (ArgTypes.parameters...)
print(io, "ScanF(\"$formatstr\") -> $types")
end
macro scanf_str(fmt)
ScanF(fmt)
end
@generated function Base.read{ArgTypes}(io::IO, scanf::ScanF{ArgTypes})
scantypes = (ArgTypes.parameters...)
nargs = length(scantypes)
args = [Symbol("arg$i") for i in 1:nargs]
sscanf_types = :((Cstring, Ptr{UInt8}, $([:(Ptr{$p}) for p in scantypes]...)))
argptrs = [:($(args[i]) = Ref{$(scantypes[i])}()) for i = 1:nargs]
results_dereferenced = [Expr(:ref, args[i]) for i=1:nargs]
quote
nargs = $(length(args))
str = readline(io)
$(argptrs...)
result = ccall((:sscanf, :libc), Cint, $sscanf_types, str, scanf.fmt, $(args...))
# FIXME
#if result != nargs
#Nullable{typeof(($(results_dereferenced...)...))}()
#error(string("Converted ", result, " arguments, expected ", nargs))
#end
($(results_dereferenced...),)
end
end
export @scanf_str
end
#------------------------------------------------------------------------------
# Base performance benchmark: simply read the whole file line by line, and
# discard everything.
function bench_readline()
open("AUSGeoid09_GDA94_V1.01_DOV.txt") do io
heights = Float64[]
while true
line = readline(io)
if isempty(line)
break
end
push!(heights, Int(line[1]))
end
heights
end
end
# Benchmark the prototype ScanF object above
function bench_scanf()
open("AUSGeoid09_GDA94_V1.01_DOV.txt") do io
heights = Float64[]
while !eof(io)
results = read(io, A.@scanf_str("GEO %f %c %d %d %f %c%d %d %f %f %f"))
push!(heights, results[1])
end
heights
end
end
# Experimental readline! converted from base/io.jl. Very slow relative to
# readline(::IOStream), since the latter has magic optimization in jl_readuntil
# via the internal private streambuf in the C code :-(
function readline!(line::Vector{UInt8}, io::IO)
delim = UInt8('\n')
while !eof(io)
c = read(io, UInt8)
push!(line, c)
if c == delim
break
end
end
line
end
# Explicitly write out everything needed to do the ccall, so we can see what
# kind of performance is possible.
function bench_explicit()
open("AUSGeoid09_GDA94_V1.01_DOV.txt") do io
heights = Float64[]
fmt = Vector{UInt8}("GEO %f %c %d %d %f %c%d %d %f %f %f"); push!(fmt, '\0')
a1 = Ref{Cfloat}()
a2 = Ref{Cchar}()
a3 = Ref{Cint}()
a4 = Ref{Cint}()
a5 = Ref{Cfloat}()
a6 = Ref{Cchar}()
a7 = Ref{Cint}()
a8 = Ref{Cint}()
a9 = Ref{Cfloat}()
a10 = Ref{Cfloat}()
a11 = Ref{Cfloat}()
while true
# Note calling readline!() here to avoid allocating the line buffer
# every time is a big performance hole (see readline!()
# implementation above).
line = readline(io)
if isempty(line)
break
end
result = ccall((:sscanf, :libc), Cint,
(Cstring, Ptr{UInt8}, Ptr{Cfloat}, Ptr{Cchar}, Ptr{Cint}, Ptr{Cint}, Ptr{Cfloat}, Ptr{Cchar}, Ptr{Cint}, Ptr{Cint}, Ptr{Cfloat}, Ptr{Cfloat}, Ptr{Cfloat}),
line, fmt, a1, a2, a3, a4, a5, a6, a7, a8, a9, a10, a11
)
push!(heights, a1[])
end
heights
end
end
if !isfile("AUSGeoid09_GDA94_V1.01_DOV.txt")
println("Test data available at ftp://ftp.ga.gov.au/geodesy-outgoing/gravity/ausgeoid/AUSGeoid09_V1.01/AUSGeoid09_GDA94_V1.01_DOV.zip")
else
println("bench_readline() - read and discard lines")
@time bench_readline()
println("bench_scanf() - parse lines with prototype scanf wrapper")
@time bench_scanf()
println("bench_explicit() - parse lines with bare ccall")
@time bench_explicit()
end
nothing
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment