Last active
March 27, 2018 17:22
-
-
Save ejpcmac/9b864fd5da528041f7111840463f8aa2 to your computer and use it in GitHub Desktop.
Rust loop performance
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
; test_loop::sum_manual | |
; Function Attrs: noinline uwtable | |
define internal fastcc void @_ZN9test_loop10sum_manual17h519a3902bc8dc8acE(%"alloc::string::String"* noalias nocapture dereferenceable(24), i64** dereferenceable(8) %rng, i8 %n) unnamed_addr #5 { | |
start: | |
%sum = alloca i32, align 4 | |
%1 = bitcast i32* %sum to i8* | |
call void @llvm.lifetime.start(i64 4, i8* nonnull %1) | |
store i32 0, i32* %sum, align 4 | |
%2 = icmp eq i8 %n, 0 | |
br i1 %2, label %bb2, label %bb3.preheader | |
bb3.preheader: ; preds = %start | |
br label %bb3 | |
bb1.bb2_crit_edge: ; preds = %bb3 | |
store i32 %6, i32* %sum, align 4 | |
%phitmp = add i32 %6, 1 | |
br label %bb2 | |
bb2: ; preds = %start, %bb1.bb2_crit_edge | |
%3 = phi i32 [ 1, %start ], [ %phitmp, %bb1.bb2_crit_edge ] | |
store i32 %3, i32* %sum, align 4 | |
; call <T as alloc::string::ToString>::to_string | |
call fastcc void @"_ZN45_$LT$T$u20$as$u20$alloc..string..ToString$GT$9to_string17h3ff76f5625d5cd43E"(%"alloc::string::String"* noalias nocapture nonnull dereferenceable(24) %0, i32* noalias nonnull readonly dereferenceable(4) %sum) | |
call void @llvm.lifetime.end(i64 4, i8* nonnull %1) | |
ret void | |
bb3: ; preds = %bb3.preheader, %bb3 | |
%4 = phi i32 [ %6, %bb3 ], [ 0, %bb3.preheader ] | |
%i.02 = phi i8 [ %7, %bb3 ], [ 0, %bb3.preheader ] | |
; call <rand::ThreadRng as rand::Rng>::next_u32 | |
%5 = tail call i32 @"_ZN45_$LT$rand..ThreadRng$u20$as$u20$rand..Rng$GT$8next_u3217h0077021a89e53d20E"(i64** nonnull dereferenceable(8) %rng) | |
%6 = add i32 %4, %5 | |
%7 = add nuw i8 %i.02, 1 | |
%exitcond = icmp eq i8 %7, %n | |
br i1 %exitcond, label %bb1.bb2_crit_edge, label %bb3 | |
} | |
; test_loop::sum_for | |
; Function Attrs: noinline uwtable | |
define internal fastcc void @_ZN9test_loop7sum_for17h10e61573495e47d9E(%"alloc::string::String"* noalias nocapture dereferenceable(24), i64** dereferenceable(8) %rng, i8 %n) unnamed_addr #5 personality i32 (i32, i32, i64, %"unwind::libunwind::_Unwind_Exception"*, %"unwind::libunwind::_Unwind_Context"*)* @rust_eh_personality { | |
start: | |
%sum = alloca i32, align 4 | |
%1 = bitcast i32* %sum to i8* | |
call void @llvm.lifetime.start(i64 4, i8* nonnull %1) | |
%2 = icmp eq i8 %n, 0 | |
br i1 %2, label %bb4, label %"_ZN4core4iter5range93_$LT$impl$u20$core..iter..iterator..Iterator$u20$for$u20$core..ops..range..Range$LT$A$GT$$GT$4next17h48a4445c50a27a2eE.exit.preheader" | |
"_ZN4core4iter5range93_$LT$impl$u20$core..iter..iterator..Iterator$u20$for$u20$core..ops..range..Range$LT$A$GT$$GT$4next17h48a4445c50a27a2eE.exit.preheader": ; preds = %start | |
br label %"_ZN4core4iter5range93_$LT$impl$u20$core..iter..iterator..Iterator$u20$for$u20$core..ops..range..Range$LT$A$GT$$GT$4next17h48a4445c50a27a2eE.exit" | |
"_ZN4core4iter5range93_$LT$impl$u20$core..iter..iterator..Iterator$u20$for$u20$core..ops..range..Range$LT$A$GT$$GT$4next17h48a4445c50a27a2eE.exit": ; preds = %"_ZN4core4iter5range93_$LT$impl$u20$core..iter..iterator..Iterator$u20$for$u20$core..ops..range..Range$LT$A$GT$$GT$4next17h48a4445c50a27a2eE.exit.preheader", %bb6 | |
%iter.sroa.0.04 = phi i8 [ %iter.sroa.0.0., %bb6 ], [ 0, %"_ZN4core4iter5range93_$LT$impl$u20$core..iter..iterator..Iterator$u20$for$u20$core..ops..range..Range$LT$A$GT$$GT$4next17h48a4445c50a27a2eE.exit.preheader" ] | |
%storemerge3 = phi i32 [ %8, %bb6 ], [ 0, %"_ZN4core4iter5range93_$LT$impl$u20$core..iter..iterator..Iterator$u20$for$u20$core..ops..range..Range$LT$A$GT$$GT$4next17h48a4445c50a27a2eE.exit.preheader" ] | |
%3 = tail call { i8, i1 } @llvm.uadd.with.overflow.i8(i8 %iter.sroa.0.04, i8 1) #8 | |
%4 = extractvalue { i8, i1 } %3, 1 | |
br i1 %4, label %bb4.loopexit, label %bb6 | |
bb4.loopexit: ; preds = %bb6, %"_ZN4core4iter5range93_$LT$impl$u20$core..iter..iterator..Iterator$u20$for$u20$core..ops..range..Range$LT$A$GT$$GT$4next17h48a4445c50a27a2eE.exit" | |
%storemerge.lcssa.ph = phi i32 [ %storemerge3, %"_ZN4core4iter5range93_$LT$impl$u20$core..iter..iterator..Iterator$u20$for$u20$core..ops..range..Range$LT$A$GT$$GT$4next17h48a4445c50a27a2eE.exit" ], [ %8, %bb6 ] | |
br label %bb4 | |
bb4: ; preds = %bb4.loopexit, %start | |
%storemerge.lcssa = phi i32 [ 0, %start ], [ %storemerge.lcssa.ph, %bb4.loopexit ] | |
%5 = add i32 %storemerge.lcssa, 1 | |
store i32 %5, i32* %sum, align 4 | |
; call <T as alloc::string::ToString>::to_string | |
call fastcc void @"_ZN45_$LT$T$u20$as$u20$alloc..string..ToString$GT$9to_string17h3ff76f5625d5cd43E"(%"alloc::string::String"* noalias nocapture nonnull dereferenceable(24) %0, i32* noalias nonnull readonly dereferenceable(4) %sum) | |
call void @llvm.lifetime.end(i64 4, i8* nonnull %1) | |
ret void | |
bb6: ; preds = %"_ZN4core4iter5range93_$LT$impl$u20$core..iter..iterator..Iterator$u20$for$u20$core..ops..range..Range$LT$A$GT$$GT$4next17h48a4445c50a27a2eE.exit" | |
%6 = extractvalue { i8, i1 } %3, 0 | |
%iter.sroa.0.0. = select i1 %4, i8 %iter.sroa.0.04, i8 %6 | |
; call <rand::ThreadRng as rand::Rng>::next_u32 | |
%7 = tail call i32 @"_ZN45_$LT$rand..ThreadRng$u20$as$u20$rand..Rng$GT$8next_u3217h0077021a89e53d20E"(i64** nonnull dereferenceable(8) %rng) | |
%8 = add i32 %storemerge3, %7 | |
%9 = icmp ult i8 %iter.sroa.0.0., %n | |
br i1 %9, label %"_ZN4core4iter5range93_$LT$impl$u20$core..iter..iterator..Iterator$u20$for$u20$core..ops..range..Range$LT$A$GT$$GT$4next17h48a4445c50a27a2eE.exit", label %bb4.loopexit | |
} | |
; test_loop::sum_mut_fold | |
; Function Attrs: noinline uwtable | |
define internal fastcc void @_ZN9test_loop12sum_mut_fold17h5ec7beccd9c6eebcE(%"alloc::string::String"* noalias nocapture dereferenceable(24), i64** dereferenceable(8), i8 %n) unnamed_addr #5 personality i32 (i32, i32, i64, %"unwind::libunwind::_Unwind_Exception"*, %"unwind::libunwind::_Unwind_Context"*)* @rust_eh_personality { | |
start: | |
%sum = alloca i32, align 4 | |
%2 = bitcast i32* %sum to i8* | |
call void @llvm.lifetime.start(i64 4, i8* nonnull %2) | |
%3 = icmp eq i8 %n, 0 | |
br i1 %3, label %_ZN4core4iter8iterator8Iterator4fold17heb7f8159379347e6E.exit, label %bb7.i.i.i.preheader | |
bb7.i.i.i.preheader: ; preds = %start | |
br label %bb7.i.i.i | |
bb7.i.i.i: ; preds = %bb7.i.i.i.preheader, %bb7.i.i.i | |
%.val.i4.i.i = phi i8 [ %4, %bb7.i.i.i ], [ 0, %bb7.i.i.i.preheader ] | |
%accum.03.i.i = phi i32 [ %6, %bb7.i.i.i ], [ 0, %bb7.i.i.i.preheader ] | |
%4 = add nuw i8 %.val.i4.i.i, 1 | |
; call <rand::ThreadRng as rand::Rng>::next_u32 | |
%5 = tail call i32 @"_ZN45_$LT$rand..ThreadRng$u20$as$u20$rand..Rng$GT$8next_u3217h0077021a89e53d20E"(i64** nonnull dereferenceable(8) %1) | |
%6 = add i32 %5, %accum.03.i.i | |
%exitcond = icmp eq i8 %4, %n | |
br i1 %exitcond, label %_ZN4core4iter8iterator8Iterator4fold17heb7f8159379347e6E.exit.loopexit, label %bb7.i.i.i | |
_ZN4core4iter8iterator8Iterator4fold17heb7f8159379347e6E.exit.loopexit: ; preds = %bb7.i.i.i | |
%phitmp = add i32 %6, 1 | |
br label %_ZN4core4iter8iterator8Iterator4fold17heb7f8159379347e6E.exit | |
_ZN4core4iter8iterator8Iterator4fold17heb7f8159379347e6E.exit: ; preds = %_ZN4core4iter8iterator8Iterator4fold17heb7f8159379347e6E.exit.loopexit, %start | |
%accum.0.lcssa.i.i = phi i32 [ 1, %start ], [ %phitmp, %_ZN4core4iter8iterator8Iterator4fold17heb7f8159379347e6E.exit.loopexit ] | |
store i32 %accum.0.lcssa.i.i, i32* %sum, align 4 | |
; call <T as alloc::string::ToString>::to_string | |
call fastcc void @"_ZN45_$LT$T$u20$as$u20$alloc..string..ToString$GT$9to_string17h3ff76f5625d5cd43E"(%"alloc::string::String"* noalias nocapture nonnull dereferenceable(24) %0, i32* noalias nonnull readonly dereferenceable(4) %sum) | |
call void @llvm.lifetime.end(i64 4, i8* nonnull %2) | |
ret void | |
} | |
; test_loop::sum_fold | |
; Function Attrs: noinline uwtable | |
define internal fastcc void @_ZN9test_loop8sum_fold17h46523101744f15d5E(%"alloc::string::String"* noalias nocapture dereferenceable(24), i64** dereferenceable(8), i8) unnamed_addr #5 personality i32 (i32, i32, i64, %"unwind::libunwind::_Unwind_Exception"*, %"unwind::libunwind::_Unwind_Context"*)* @rust_eh_personality { | |
start: | |
%sum = alloca i32, align 4 | |
%3 = bitcast i32* %sum to i8* | |
call void @llvm.lifetime.start(i64 4, i8* nonnull %3) | |
%4 = icmp eq i8 %2, 0 | |
br i1 %4, label %_ZN4core4iter8iterator8Iterator4fold17h6ce2bb06d0419cffE.exit, label %bb3.i.lr.ph.i.i | |
bb3.i.lr.ph.i.i: ; preds = %start | |
%5 = add i8 %2, -1 | |
br label %bb7.i.i.i | |
bb7.i.i.i: ; preds = %bb7.i.i.i, %bb3.i.lr.ph.i.i | |
%.val.i4.i.i = phi i8 [ 0, %bb3.i.lr.ph.i.i ], [ %6, %bb7.i.i.i ] | |
%accum.03.i.i = phi i32 [ 0, %bb3.i.lr.ph.i.i ], [ %9, %bb7.i.i.i ] | |
%6 = add nuw i8 %.val.i4.i.i, 1 | |
; call <rand::ThreadRng as rand::Rng>::next_u32 | |
%7 = tail call i32 @"_ZN45_$LT$rand..ThreadRng$u20$as$u20$rand..Rng$GT$8next_u3217h0077021a89e53d20E"(i64** nonnull dereferenceable(8) %1), !noalias !39 | |
%not..i.i.i.i = icmp ule i8 %5, %.val.i4.i.i | |
%..i.i.i.i = zext i1 %not..i.i.i.i to i32 | |
%8 = add i32 %..i.i.i.i, %accum.03.i.i | |
%9 = add i32 %8, %7 | |
%exitcond = icmp eq i8 %6, %2 | |
br i1 %exitcond, label %_ZN4core4iter8iterator8Iterator4fold17h6ce2bb06d0419cffE.exit.loopexit, label %bb7.i.i.i | |
_ZN4core4iter8iterator8Iterator4fold17h6ce2bb06d0419cffE.exit.loopexit: ; preds = %bb7.i.i.i | |
br label %_ZN4core4iter8iterator8Iterator4fold17h6ce2bb06d0419cffE.exit | |
_ZN4core4iter8iterator8Iterator4fold17h6ce2bb06d0419cffE.exit: ; preds = %_ZN4core4iter8iterator8Iterator4fold17h6ce2bb06d0419cffE.exit.loopexit, %start | |
%accum.0.lcssa.i.i = phi i32 [ 0, %start ], [ %9, %_ZN4core4iter8iterator8Iterator4fold17h6ce2bb06d0419cffE.exit.loopexit ] | |
store i32 %accum.0.lcssa.i.i, i32* %sum, align 4 | |
; call <T as alloc::string::ToString>::to_string | |
call fastcc void @"_ZN45_$LT$T$u20$as$u20$alloc..string..ToString$GT$9to_string17h3ff76f5625d5cd43E"(%"alloc::string::String"* noalias nocapture nonnull dereferenceable(24) %0, i32* noalias nonnull readonly dereferenceable(4) %sum) | |
call void @llvm.lifetime.end(i64 4, i8* nonnull %3) | |
ret void | |
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
extern crate rand; | |
use rand::Rng; | |
fn main() { | |
let mut rng = rand::thread_rng(); | |
let n = rng.gen::<u8>(); | |
sum_manual(&mut rng, n); | |
sum_for(&mut rng, n); | |
sum_mut_fold(&mut rng, n); | |
sum_fold(&mut rng, n); | |
} | |
#[inline(never)] | |
fn sum_manual<R: Rng>(rng: &mut R, n: u8) -> String { | |
let mut sum = 0; | |
let mut i = 0; | |
while i < n { | |
sum += rng.gen::<i32>(); | |
i += 1; | |
} | |
sum += 1; | |
sum.to_string() | |
} | |
#[inline(never)] | |
fn sum_for<R: Rng>(rng: &mut R, n: u8) -> String { | |
let mut sum = 0; | |
for _ in 0..n { | |
sum += rng.gen::<i32>(); | |
} | |
sum += 1; | |
sum.to_string() | |
} | |
// It seems this one is the shorter in the LLVM IR. | |
#[inline(never)] | |
fn sum_mut_fold<R: Rng>(rng: &mut R, n: u8) -> String { | |
let mut sum = (0..n).fold(0, |acc, _| acc + rng.gen::<i32>()); | |
sum += 1; | |
sum.to_string() | |
} | |
#[inline(never)] | |
fn sum_fold<R: Rng>(rng: &mut R, n: u8) -> String { | |
let sum = (0..n).fold(0, |acc, i| { | |
acc + rng.gen::<i32>() + if i < n - 1 { 0 } else { 1 } | |
}); | |
sum.to_string() | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment