Created
March 27, 2014 15:01
-
-
Save ArchRobison/9809496 to your computer and use it in GitHub Desktop.
Example of @simd and 32-bit floating point.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| $ cat v.jl | |
| function vadd_one!(arr::Array{Float32, 1}) | |
| @simd for i = 1:length(arr) | |
| @inbounds arr[i] += 1 | |
| end | |
| end | |
| code_llvm(vadd_one!,(Array{Float32, 1},)) | |
| $ julia v.jl | |
| define void @"julia_vadd_one!15588"(%jl_value_t*) { | |
| top: | |
| %1 = getelementptr inbounds %jl_value_t* %0, i64 2, i32 0, !dbg !175 | |
| %2 = load %jl_value_t** %1, align 8, !dbg !175, !tbaa %jtbaa_arraylen | |
| %3 = ptrtoint %jl_value_t* %2 to i64, !dbg !175 | |
| %4 = call { i64, i1 } @llvm.ssub.with.overflow.i64(i64 %3, i64 1), !dbg !175 | |
| %5 = extractvalue { i64, i1 } %4, 1, !dbg !175 | |
| br i1 %5, label %fail, label %pass, !dbg !175 | |
| fail: ; preds = %top | |
| %6 = load %jl_value_t** @jl_overflow_exception, align 8, !dbg !175 | |
| call void @jl_throw_with_superfluous_argument(%jl_value_t* %6, i32 41), !dbg !175 | |
| unreachable, !dbg !175 | |
| pass: ; preds = %top | |
| %7 = extractvalue { i64, i1 } %4, 0, !dbg !175 | |
| %8 = call { i64, i1 } @llvm.sadd.with.overflow.i64(i64 %7, i64 1), !dbg !175 | |
| %9 = extractvalue { i64, i1 } %8, 1, !dbg !175 | |
| br i1 %9, label %fail1, label %pass2, !dbg !175 | |
| fail1: ; preds = %pass | |
| %10 = load %jl_value_t** @jl_overflow_exception, align 8, !dbg !175 | |
| call void @jl_throw_with_superfluous_argument(%jl_value_t* %10, i32 41), !dbg !175 | |
| unreachable, !dbg !175 | |
| pass2: ; preds = %pass | |
| %11 = icmp sge %jl_value_t* %2, inttoptr (i64 1 to %jl_value_t*), !dbg !175 | |
| %12 = extractvalue { i64, i1 } %8, 0, !dbg !175 | |
| %13 = select i1 %11, i64 %12, i64 0, !dbg !175 | |
| %14 = icmp slt i64 %13, 1, !dbg !180 | |
| br i1 %14, label %L5, label %L.preheader, !dbg !180 | |
| L.preheader: ; preds = %pass2 | |
| %15 = getelementptr inbounds %jl_value_t* %0, i64 1, i32 0, !dbg !181 | |
| %16 = load %jl_value_t** %15, align 8, !dbg !181, !tbaa %jtbaa_arrayptr, !llvm.mem.parallel_loop_access !183 | |
| %17 = bitcast %jl_value_t* %16 to float*, !dbg !181 | |
| %n.vec = and i64 %13, -8 | |
| %end.idx.rnd.down12 = or i64 %n.vec, 1 | |
| %cmp.zero = icmp eq i64 %end.idx.rnd.down12, 1 | |
| br i1 %cmp.zero, label %middle.block, label %vector.body | |
| vector.body: ; preds = %L.preheader, %vector.body | |
| %index = phi i64 [ %index.next, %vector.body ], [ 1, %L.preheader ] | |
| %18 = add i64 %index, -1 | |
| %19 = getelementptr float* %17, i64 %18, !dbg !181 | |
| %20 = bitcast float* %19 to <4 x float>* | |
| %wide.load = load <4 x float>* %20, align 4 | |
| %.sum = add i64 %index, 3 | |
| %21 = getelementptr float* %17, i64 %.sum | |
| %22 = bitcast float* %21 to <4 x float>* | |
| %wide.load7 = load <4 x float>* %22, align 4 | |
| %23 = fadd <4 x float> %wide.load, <float 1.000000e+00, float 1.000000e+00, float 1.000000e+00, float 1.000000e+00> | |
| %24 = fadd <4 x float> %wide.load7, <float 1.000000e+00, float 1.000000e+00, float 1.000000e+00, float 1.000000e+00> | |
| store <4 x float> %23, <4 x float>* %20, align 4 | |
| store <4 x float> %24, <4 x float>* %22, align 4 | |
| %index.next = add i64 %index, 8 | |
| %25 = icmp eq i64 %index.next, %end.idx.rnd.down12 | |
| br i1 %25, label %middle.block, label %vector.body | |
| middle.block: ; preds = %vector.body, %L.preheader | |
| %resume.val = phi i64 [ 1, %L.preheader ], [ %end.idx.rnd.down12, %vector.body ] | |
| %end.idx = add i64 %13, 1 | |
| %cmp.n = icmp eq i64 %end.idx, %resume.val | |
| br i1 %cmp.n, label %L5, label %L | |
| L: ; preds = %middle.block, %L | |
| %i.0 = phi i64 [ %30, %L ], [ %resume.val, %middle.block ] | |
| %26 = add i64 %i.0, -1, !dbg !181 | |
| %27 = getelementptr float* %17, i64 %26, !dbg !181 | |
| %28 = load float* %27, align 4, !dbg !181, !tbaa %jtbaa_user, !llvm.mem.parallel_loop_access !183 | |
| %29 = fadd float %28, 1.000000e+00, !dbg !181 | |
| store float %29, float* %27, align 4, !dbg !181, !tbaa %jtbaa_user, !llvm.mem.parallel_loop_access !183 | |
| %30 = add i64 %i.0, 1, !dbg !186 | |
| %exitcond = icmp eq i64 %i.0, %13, !dbg !187 | |
| br i1 %exitcond, label %L5, label %L, !dbg !187, !llvm.loop.parallel !184, !llvm.vectorizer.already_vectorized !184 | |
| L5: ; preds = %middle.block, %L, %pass2 | |
| ret void, !dbg !187 | |
| } |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment