Skip to content

Instantly share code, notes, and snippets.

@ArchRobison
Created March 27, 2014 15:01
Show Gist options
  • Select an option

  • Save ArchRobison/9809496 to your computer and use it in GitHub Desktop.

Select an option

Save ArchRobison/9809496 to your computer and use it in GitHub Desktop.
Example of @simd and 32-bit floating point.
$ cat v.jl
function vadd_one!(arr::Array{Float32, 1})
@simd for i = 1:length(arr)
@inbounds arr[i] += 1
end
end
code_llvm(vadd_one!,(Array{Float32, 1},))
$ julia v.jl
define void @"julia_vadd_one!15588"(%jl_value_t*) {
top:
%1 = getelementptr inbounds %jl_value_t* %0, i64 2, i32 0, !dbg !175
%2 = load %jl_value_t** %1, align 8, !dbg !175, !tbaa %jtbaa_arraylen
%3 = ptrtoint %jl_value_t* %2 to i64, !dbg !175
%4 = call { i64, i1 } @llvm.ssub.with.overflow.i64(i64 %3, i64 1), !dbg !175
%5 = extractvalue { i64, i1 } %4, 1, !dbg !175
br i1 %5, label %fail, label %pass, !dbg !175
fail: ; preds = %top
%6 = load %jl_value_t** @jl_overflow_exception, align 8, !dbg !175
call void @jl_throw_with_superfluous_argument(%jl_value_t* %6, i32 41), !dbg !175
unreachable, !dbg !175
pass: ; preds = %top
%7 = extractvalue { i64, i1 } %4, 0, !dbg !175
%8 = call { i64, i1 } @llvm.sadd.with.overflow.i64(i64 %7, i64 1), !dbg !175
%9 = extractvalue { i64, i1 } %8, 1, !dbg !175
br i1 %9, label %fail1, label %pass2, !dbg !175
fail1: ; preds = %pass
%10 = load %jl_value_t** @jl_overflow_exception, align 8, !dbg !175
call void @jl_throw_with_superfluous_argument(%jl_value_t* %10, i32 41), !dbg !175
unreachable, !dbg !175
pass2: ; preds = %pass
%11 = icmp sge %jl_value_t* %2, inttoptr (i64 1 to %jl_value_t*), !dbg !175
%12 = extractvalue { i64, i1 } %8, 0, !dbg !175
%13 = select i1 %11, i64 %12, i64 0, !dbg !175
%14 = icmp slt i64 %13, 1, !dbg !180
br i1 %14, label %L5, label %L.preheader, !dbg !180
L.preheader: ; preds = %pass2
%15 = getelementptr inbounds %jl_value_t* %0, i64 1, i32 0, !dbg !181
%16 = load %jl_value_t** %15, align 8, !dbg !181, !tbaa %jtbaa_arrayptr, !llvm.mem.parallel_loop_access !183
%17 = bitcast %jl_value_t* %16 to float*, !dbg !181
%n.vec = and i64 %13, -8
%end.idx.rnd.down12 = or i64 %n.vec, 1
%cmp.zero = icmp eq i64 %end.idx.rnd.down12, 1
br i1 %cmp.zero, label %middle.block, label %vector.body
vector.body: ; preds = %L.preheader, %vector.body
%index = phi i64 [ %index.next, %vector.body ], [ 1, %L.preheader ]
%18 = add i64 %index, -1
%19 = getelementptr float* %17, i64 %18, !dbg !181
%20 = bitcast float* %19 to <4 x float>*
%wide.load = load <4 x float>* %20, align 4
%.sum = add i64 %index, 3
%21 = getelementptr float* %17, i64 %.sum
%22 = bitcast float* %21 to <4 x float>*
%wide.load7 = load <4 x float>* %22, align 4
%23 = fadd <4 x float> %wide.load, <float 1.000000e+00, float 1.000000e+00, float 1.000000e+00, float 1.000000e+00>
%24 = fadd <4 x float> %wide.load7, <float 1.000000e+00, float 1.000000e+00, float 1.000000e+00, float 1.000000e+00>
store <4 x float> %23, <4 x float>* %20, align 4
store <4 x float> %24, <4 x float>* %22, align 4
%index.next = add i64 %index, 8
%25 = icmp eq i64 %index.next, %end.idx.rnd.down12
br i1 %25, label %middle.block, label %vector.body
middle.block: ; preds = %vector.body, %L.preheader
%resume.val = phi i64 [ 1, %L.preheader ], [ %end.idx.rnd.down12, %vector.body ]
%end.idx = add i64 %13, 1
%cmp.n = icmp eq i64 %end.idx, %resume.val
br i1 %cmp.n, label %L5, label %L
L: ; preds = %middle.block, %L
%i.0 = phi i64 [ %30, %L ], [ %resume.val, %middle.block ]
%26 = add i64 %i.0, -1, !dbg !181
%27 = getelementptr float* %17, i64 %26, !dbg !181
%28 = load float* %27, align 4, !dbg !181, !tbaa %jtbaa_user, !llvm.mem.parallel_loop_access !183
%29 = fadd float %28, 1.000000e+00, !dbg !181
store float %29, float* %27, align 4, !dbg !181, !tbaa %jtbaa_user, !llvm.mem.parallel_loop_access !183
%30 = add i64 %i.0, 1, !dbg !186
%exitcond = icmp eq i64 %i.0, %13, !dbg !187
br i1 %exitcond, label %L5, label %L, !dbg !187, !llvm.loop.parallel !184, !llvm.vectorizer.already_vectorized !184
L5: ; preds = %middle.block, %L, %pass2
ret void, !dbg !187
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment