Last active
December 15, 2015 23:29
-
-
Save quinnj/5340631 to your computer and use it in GitHub Desktop.
* Implemented the spectral-norm benchmark from: http://benchmarksgame.alioth.debian.org/u32/performance.php?test=spectralnorm
* From my timings, this implementation consistently ran in 10-12s, putting us ahead of all other dynamic languages (and represents a significant speed up compared to the previous version at https://github.com/JuliaLang/ju…
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# | |
# The Computer Language Benchmarks Game | |
# spectral-norm benchmark | |
# http://shootout.alioth.debian.org/u32/performance.php?test=spectralnorm | |
# | |
# Based on the Javascript program | |
# | |
include("timing.jl") | |
A(i,j) = 1.0 / ((i+j)*(i+j+1.0)/2.0+i+1.0) | |
function Au(u,w) | |
n = length(u) | |
for i = 1:n, j = 1:n | |
j == 1 && (w[i] = 0.0) | |
w[i] += A(i-1,j-1) * u[j] | |
end | |
end | |
function Atu(w,v) | |
n = length(w) | |
for i = 1:n, j = 1:n | |
j == 1 && (v[i] = 0.0) | |
v[i] += A(j-1,i-1) * w[j] | |
end | |
end | |
function AtAu(u,v,w) | |
Au(u,w) | |
Atu(w,v) | |
end | |
function approximate(n) | |
u = ones(Float64,n) | |
v = zeros(Float64,n) | |
w = zeros(Float64,n) | |
vv = vBv = 0.0 | |
for i = 1:10 | |
AtAu(u,v,w) | |
AtAu(v,u,w) | |
end | |
for i = 1:n | |
vBv += u[i]*v[i] | |
vv += v[i]*v[i] | |
end | |
return sqrt(vBv/vv) | |
end | |
function spectralnorm(N) | |
@printf("%.09f\n", approximate(N)) | |
end | |
if length(ARGS) >= 1 | |
N = int(ARGS[1]) | |
else | |
N = 100 | |
end | |
spectralnorm(N) | |
# @assert round(spectralnorm(100),9) == 1.274219991 | |
# @timeit spectralnorm(500) "spectralnorm(n=500)" | |
# @timeit spectralnorm(3000) "spectralnorm(n=3000)" | |
# @timeit spectralnorm(5500) "spectralnorm(n=5500)" | |
# macro timeit(ex,name) | |
# @gensym t i | |
# quote | |
# $t = Inf | |
# for $i=1:5 | |
# $t = min($t, @elapsed $ex) | |
# end | |
# @printf("julia, %s: best of 5 took %.1f ms\n", $name, $t*1000) | |
# end | |
# end |
It's not impossible that it is already getting inlined automatically.
Here's the disassemble output, which is not easy to read:
; julia> disassemble(Au,(Vector{Float64},Vector{Float64}))
define %jl_value_t* @julia_Au(%jl_value_t*, %jl_value_t**, i32) {
top:
%3 = load %jl_value_t** %1, !dbg !5508
%4 = getelementptr inbounds %jl_value_t* %3, i32 0, i32 0, !dbg !5515
%5 = getelementptr %jl_value_t** %4, i64 2, !dbg !5515
%6 = load %jl_value_t** %5, !dbg !5515
%7 = ptrtoint %jl_value_t* %6 to i64, !dbg !5515
%8 = icmp slt i64 %7, 1, !dbg !5516
br i1 %8, label %L15, label %L1.preheader.lr.ph, !dbg !5516
L1.preheader.lr.ph: ; preds = %top
%9 = getelementptr %jl_value_t** %1, i64 1, !dbg !5508
%10 = load %jl_value_t** %9, !dbg !5508
%11 = getelementptr inbounds %jl_value_t* %10, i32 0, i32 0, !dbg !5517
%12 = getelementptr %jl_value_t** %11, i64 2, !dbg !5517
%13 = getelementptr %jl_value_t** %11, i64 1, !dbg !5517
%14 = getelementptr %jl_value_t** %4, i64 1, !dbg !5518
br label %if2.lr.ph, !dbg !5516
if2.lr.ph: ; preds = %L13, %L1.preheader.lr.ph
%"#s1.017" = phi i64 [ 1, %L1.preheader.lr.ph ], [ %54, %L13 ]
%15 = add i64 %"#s1.017", -1, !dbg !5517
%16 = sitofp i64 %15 to double, !dbg !5518
br label %if2, !dbg !5516
if2: ; preds = %if2.lr.ph, %idxend10
%"#s3.016" = phi i64 [ 1, %if2.lr.ph ], [ %52, %idxend10 ]
%17 = icmp eq i64 %"#s3.016", 1, !dbg !5517
br i1 %17, label %if3, label %L4, !dbg !5517
if3: ; preds = %if2
%18 = load %jl_value_t** %12, !dbg !5517
%19 = ptrtoint %jl_value_t* %18 to i64, !dbg !5517
%20 = icmp ult i64 %15, %19, !dbg !5517
br i1 %20, label %idxend, label %oob, !dbg !5517
oob: ; preds = %if3
%21 = load %jl_value_t** @jl_bounds_exception, !dbg !5517
call void @jl_throw_with_superfluous_argument(%jl_value_t* %21, i32 4), !dbg !5517
unreachable, !dbg !5517
idxend: ; preds = %if3
%22 = load %jl_value_t** %13, !dbg !5517
%23 = bitcast %jl_value_t* %22 to double*, !dbg !5517
%24 = getelementptr double* %23, i64 %15, !dbg !5517
store double 0.000000e+00, double* %24, !dbg !5517
br label %L4, !dbg !5517
L4: ; preds = %if2, %idxend
%25 = add i64 %"#s3.016", -1, !dbg !5518
%26 = load %jl_value_t** %12, !dbg !5518
%27 = ptrtoint %jl_value_t* %26 to i64, !dbg !5518
%28 = icmp ult i64 %15, %27, !dbg !5518
br i1 %28, label %idxend6, label %oob5, !dbg !5518
oob5: ; preds = %L4
%29 = load %jl_value_t** @jl_bounds_exception, !dbg !5518
call void @jl_throw_with_superfluous_argument(%jl_value_t* %29, i32 5), !dbg !5518
unreachable, !dbg !5518
idxend6: ; preds = %L4
%30 = load %jl_value_t** %5, !dbg !5518
%31 = ptrtoint %jl_value_t* %30 to i64, !dbg !5518
%32 = icmp ult i64 %25, %31, !dbg !5518
br i1 %32, label %idxend10, label %oob7, !dbg !5518
oob7: ; preds = %idxend6
%33 = load %jl_value_t** @jl_bounds_exception, !dbg !5518
call void @jl_throw_with_superfluous_argument(%jl_value_t* %33, i32 5), !dbg !5518
unreachable, !dbg !5518
idxend10: ; preds = %idxend6
%34 = add i64 %25, %15, !dbg !5518
%35 = sitofp i64 %34 to double, !dbg !5518
%36 = fadd double %35, 1.000000e+00, !dbg !5518
%37 = fmul double %35, %36, !dbg !5518
%38 = load %jl_value_t** %14, !dbg !5518
%39 = fmul double %37, 5.000000e-01, !dbg !5518
%40 = bitcast %jl_value_t* %38 to double*, !dbg !5518
%41 = fadd double %16, %39, !dbg !5518
%42 = load %jl_value_t** %13, !dbg !5518
%43 = getelementptr double* %40, i64 %25, !dbg !5518
%44 = fadd double %41, 1.000000e+00, !dbg !5518
%45 = bitcast %jl_value_t* %42 to double*, !dbg !5518
%46 = load double* %43, !dbg !5518
%47 = fdiv double 1.000000e+00, %44, !dbg !5518
%48 = getelementptr double* %45, i64 %15, !dbg !5518
%49 = fmul double %46, %47, !dbg !5518
%50 = load double* %48, !dbg !5518
%51 = fadd double %50, %49, !dbg !5518
store double %51, double* %48, !dbg !5518
%52 = add i64 %"#s3.016", 1, !dbg !5518
%53 = icmp sgt i64 %52, %7, !dbg !5516
br i1 %53, label %L13, label %if2, !dbg !5516
L13: ; preds = %idxend10
%54 = add i64 %"#s1.017", 1, !dbg !5518
%55 = icmp sgt i64 %54, %7, !dbg !5516
br i1 %55, label %L15, label %if2.lr.ph, !dbg !5516
L15: ; preds = %L13, %top
ret %jl_value_t* inttoptr (i64 4312184832 to %jl_value_t*), !dbg !5518
}
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
This looks pretty slick. The only thing I could imagine (maybe) giving a speedup would be inlining
A(i,j) = 1.0 / ((i+j)*(i+j+1.0)/2.0+i+1.0)
.