Skip to content

Instantly share code, notes, and snippets.

@ejpcmac
Last active March 27, 2018 17:22
Show Gist options
  • Save ejpcmac/9b864fd5da528041f7111840463f8aa2 to your computer and use it in GitHub Desktop.
Save ejpcmac/9b864fd5da528041f7111840463f8aa2 to your computer and use it in GitHub Desktop.
Rust loop performance
; test_loop::sum_manual
; Function Attrs: noinline uwtable
define internal fastcc void @_ZN9test_loop10sum_manual17h519a3902bc8dc8acE(%"alloc::string::String"* noalias nocapture dereferenceable(24), i64** dereferenceable(8) %rng, i8 %n) unnamed_addr #5 {
start:
%sum = alloca i32, align 4
%1 = bitcast i32* %sum to i8*
call void @llvm.lifetime.start(i64 4, i8* nonnull %1)
store i32 0, i32* %sum, align 4
%2 = icmp eq i8 %n, 0
br i1 %2, label %bb2, label %bb3.preheader
bb3.preheader: ; preds = %start
br label %bb3
bb1.bb2_crit_edge: ; preds = %bb3
store i32 %6, i32* %sum, align 4
%phitmp = add i32 %6, 1
br label %bb2
bb2: ; preds = %start, %bb1.bb2_crit_edge
%3 = phi i32 [ 1, %start ], [ %phitmp, %bb1.bb2_crit_edge ]
store i32 %3, i32* %sum, align 4
; call <T as alloc::string::ToString>::to_string
call fastcc void @"_ZN45_$LT$T$u20$as$u20$alloc..string..ToString$GT$9to_string17h3ff76f5625d5cd43E"(%"alloc::string::String"* noalias nocapture nonnull dereferenceable(24) %0, i32* noalias nonnull readonly dereferenceable(4) %sum)
call void @llvm.lifetime.end(i64 4, i8* nonnull %1)
ret void
bb3: ; preds = %bb3.preheader, %bb3
%4 = phi i32 [ %6, %bb3 ], [ 0, %bb3.preheader ]
%i.02 = phi i8 [ %7, %bb3 ], [ 0, %bb3.preheader ]
; call <rand::ThreadRng as rand::Rng>::next_u32
%5 = tail call i32 @"_ZN45_$LT$rand..ThreadRng$u20$as$u20$rand..Rng$GT$8next_u3217h0077021a89e53d20E"(i64** nonnull dereferenceable(8) %rng)
%6 = add i32 %4, %5
%7 = add nuw i8 %i.02, 1
%exitcond = icmp eq i8 %7, %n
br i1 %exitcond, label %bb1.bb2_crit_edge, label %bb3
}
; test_loop::sum_for
; Function Attrs: noinline uwtable
define internal fastcc void @_ZN9test_loop7sum_for17h10e61573495e47d9E(%"alloc::string::String"* noalias nocapture dereferenceable(24), i64** dereferenceable(8) %rng, i8 %n) unnamed_addr #5 personality i32 (i32, i32, i64, %"unwind::libunwind::_Unwind_Exception"*, %"unwind::libunwind::_Unwind_Context"*)* @rust_eh_personality {
start:
%sum = alloca i32, align 4
%1 = bitcast i32* %sum to i8*
call void @llvm.lifetime.start(i64 4, i8* nonnull %1)
%2 = icmp eq i8 %n, 0
br i1 %2, label %bb4, label %"_ZN4core4iter5range93_$LT$impl$u20$core..iter..iterator..Iterator$u20$for$u20$core..ops..range..Range$LT$A$GT$$GT$4next17h48a4445c50a27a2eE.exit.preheader"
"_ZN4core4iter5range93_$LT$impl$u20$core..iter..iterator..Iterator$u20$for$u20$core..ops..range..Range$LT$A$GT$$GT$4next17h48a4445c50a27a2eE.exit.preheader": ; preds = %start
br label %"_ZN4core4iter5range93_$LT$impl$u20$core..iter..iterator..Iterator$u20$for$u20$core..ops..range..Range$LT$A$GT$$GT$4next17h48a4445c50a27a2eE.exit"
"_ZN4core4iter5range93_$LT$impl$u20$core..iter..iterator..Iterator$u20$for$u20$core..ops..range..Range$LT$A$GT$$GT$4next17h48a4445c50a27a2eE.exit": ; preds = %"_ZN4core4iter5range93_$LT$impl$u20$core..iter..iterator..Iterator$u20$for$u20$core..ops..range..Range$LT$A$GT$$GT$4next17h48a4445c50a27a2eE.exit.preheader", %bb6
%iter.sroa.0.04 = phi i8 [ %iter.sroa.0.0., %bb6 ], [ 0, %"_ZN4core4iter5range93_$LT$impl$u20$core..iter..iterator..Iterator$u20$for$u20$core..ops..range..Range$LT$A$GT$$GT$4next17h48a4445c50a27a2eE.exit.preheader" ]
%storemerge3 = phi i32 [ %8, %bb6 ], [ 0, %"_ZN4core4iter5range93_$LT$impl$u20$core..iter..iterator..Iterator$u20$for$u20$core..ops..range..Range$LT$A$GT$$GT$4next17h48a4445c50a27a2eE.exit.preheader" ]
%3 = tail call { i8, i1 } @llvm.uadd.with.overflow.i8(i8 %iter.sroa.0.04, i8 1) #8
%4 = extractvalue { i8, i1 } %3, 1
br i1 %4, label %bb4.loopexit, label %bb6
bb4.loopexit: ; preds = %bb6, %"_ZN4core4iter5range93_$LT$impl$u20$core..iter..iterator..Iterator$u20$for$u20$core..ops..range..Range$LT$A$GT$$GT$4next17h48a4445c50a27a2eE.exit"
%storemerge.lcssa.ph = phi i32 [ %storemerge3, %"_ZN4core4iter5range93_$LT$impl$u20$core..iter..iterator..Iterator$u20$for$u20$core..ops..range..Range$LT$A$GT$$GT$4next17h48a4445c50a27a2eE.exit" ], [ %8, %bb6 ]
br label %bb4
bb4: ; preds = %bb4.loopexit, %start
%storemerge.lcssa = phi i32 [ 0, %start ], [ %storemerge.lcssa.ph, %bb4.loopexit ]
%5 = add i32 %storemerge.lcssa, 1
store i32 %5, i32* %sum, align 4
; call <T as alloc::string::ToString>::to_string
call fastcc void @"_ZN45_$LT$T$u20$as$u20$alloc..string..ToString$GT$9to_string17h3ff76f5625d5cd43E"(%"alloc::string::String"* noalias nocapture nonnull dereferenceable(24) %0, i32* noalias nonnull readonly dereferenceable(4) %sum)
call void @llvm.lifetime.end(i64 4, i8* nonnull %1)
ret void
bb6: ; preds = %"_ZN4core4iter5range93_$LT$impl$u20$core..iter..iterator..Iterator$u20$for$u20$core..ops..range..Range$LT$A$GT$$GT$4next17h48a4445c50a27a2eE.exit"
%6 = extractvalue { i8, i1 } %3, 0
%iter.sroa.0.0. = select i1 %4, i8 %iter.sroa.0.04, i8 %6
; call <rand::ThreadRng as rand::Rng>::next_u32
%7 = tail call i32 @"_ZN45_$LT$rand..ThreadRng$u20$as$u20$rand..Rng$GT$8next_u3217h0077021a89e53d20E"(i64** nonnull dereferenceable(8) %rng)
%8 = add i32 %storemerge3, %7
%9 = icmp ult i8 %iter.sroa.0.0., %n
br i1 %9, label %"_ZN4core4iter5range93_$LT$impl$u20$core..iter..iterator..Iterator$u20$for$u20$core..ops..range..Range$LT$A$GT$$GT$4next17h48a4445c50a27a2eE.exit", label %bb4.loopexit
}
; test_loop::sum_mut_fold
; Function Attrs: noinline uwtable
define internal fastcc void @_ZN9test_loop12sum_mut_fold17h5ec7beccd9c6eebcE(%"alloc::string::String"* noalias nocapture dereferenceable(24), i64** dereferenceable(8), i8 %n) unnamed_addr #5 personality i32 (i32, i32, i64, %"unwind::libunwind::_Unwind_Exception"*, %"unwind::libunwind::_Unwind_Context"*)* @rust_eh_personality {
start:
%sum = alloca i32, align 4
%2 = bitcast i32* %sum to i8*
call void @llvm.lifetime.start(i64 4, i8* nonnull %2)
%3 = icmp eq i8 %n, 0
br i1 %3, label %_ZN4core4iter8iterator8Iterator4fold17heb7f8159379347e6E.exit, label %bb7.i.i.i.preheader
bb7.i.i.i.preheader: ; preds = %start
br label %bb7.i.i.i
bb7.i.i.i: ; preds = %bb7.i.i.i.preheader, %bb7.i.i.i
%.val.i4.i.i = phi i8 [ %4, %bb7.i.i.i ], [ 0, %bb7.i.i.i.preheader ]
%accum.03.i.i = phi i32 [ %6, %bb7.i.i.i ], [ 0, %bb7.i.i.i.preheader ]
%4 = add nuw i8 %.val.i4.i.i, 1
; call <rand::ThreadRng as rand::Rng>::next_u32
%5 = tail call i32 @"_ZN45_$LT$rand..ThreadRng$u20$as$u20$rand..Rng$GT$8next_u3217h0077021a89e53d20E"(i64** nonnull dereferenceable(8) %1)
%6 = add i32 %5, %accum.03.i.i
%exitcond = icmp eq i8 %4, %n
br i1 %exitcond, label %_ZN4core4iter8iterator8Iterator4fold17heb7f8159379347e6E.exit.loopexit, label %bb7.i.i.i
_ZN4core4iter8iterator8Iterator4fold17heb7f8159379347e6E.exit.loopexit: ; preds = %bb7.i.i.i
%phitmp = add i32 %6, 1
br label %_ZN4core4iter8iterator8Iterator4fold17heb7f8159379347e6E.exit
_ZN4core4iter8iterator8Iterator4fold17heb7f8159379347e6E.exit: ; preds = %_ZN4core4iter8iterator8Iterator4fold17heb7f8159379347e6E.exit.loopexit, %start
%accum.0.lcssa.i.i = phi i32 [ 1, %start ], [ %phitmp, %_ZN4core4iter8iterator8Iterator4fold17heb7f8159379347e6E.exit.loopexit ]
store i32 %accum.0.lcssa.i.i, i32* %sum, align 4
; call <T as alloc::string::ToString>::to_string
call fastcc void @"_ZN45_$LT$T$u20$as$u20$alloc..string..ToString$GT$9to_string17h3ff76f5625d5cd43E"(%"alloc::string::String"* noalias nocapture nonnull dereferenceable(24) %0, i32* noalias nonnull readonly dereferenceable(4) %sum)
call void @llvm.lifetime.end(i64 4, i8* nonnull %2)
ret void
}
; test_loop::sum_fold
; Function Attrs: noinline uwtable
define internal fastcc void @_ZN9test_loop8sum_fold17h46523101744f15d5E(%"alloc::string::String"* noalias nocapture dereferenceable(24), i64** dereferenceable(8), i8) unnamed_addr #5 personality i32 (i32, i32, i64, %"unwind::libunwind::_Unwind_Exception"*, %"unwind::libunwind::_Unwind_Context"*)* @rust_eh_personality {
start:
%sum = alloca i32, align 4
%3 = bitcast i32* %sum to i8*
call void @llvm.lifetime.start(i64 4, i8* nonnull %3)
%4 = icmp eq i8 %2, 0
br i1 %4, label %_ZN4core4iter8iterator8Iterator4fold17h6ce2bb06d0419cffE.exit, label %bb3.i.lr.ph.i.i
bb3.i.lr.ph.i.i: ; preds = %start
%5 = add i8 %2, -1
br label %bb7.i.i.i
bb7.i.i.i: ; preds = %bb7.i.i.i, %bb3.i.lr.ph.i.i
%.val.i4.i.i = phi i8 [ 0, %bb3.i.lr.ph.i.i ], [ %6, %bb7.i.i.i ]
%accum.03.i.i = phi i32 [ 0, %bb3.i.lr.ph.i.i ], [ %9, %bb7.i.i.i ]
%6 = add nuw i8 %.val.i4.i.i, 1
; call <rand::ThreadRng as rand::Rng>::next_u32
%7 = tail call i32 @"_ZN45_$LT$rand..ThreadRng$u20$as$u20$rand..Rng$GT$8next_u3217h0077021a89e53d20E"(i64** nonnull dereferenceable(8) %1), !noalias !39
%not..i.i.i.i = icmp ule i8 %5, %.val.i4.i.i
%..i.i.i.i = zext i1 %not..i.i.i.i to i32
%8 = add i32 %..i.i.i.i, %accum.03.i.i
%9 = add i32 %8, %7
%exitcond = icmp eq i8 %6, %2
br i1 %exitcond, label %_ZN4core4iter8iterator8Iterator4fold17h6ce2bb06d0419cffE.exit.loopexit, label %bb7.i.i.i
_ZN4core4iter8iterator8Iterator4fold17h6ce2bb06d0419cffE.exit.loopexit: ; preds = %bb7.i.i.i
br label %_ZN4core4iter8iterator8Iterator4fold17h6ce2bb06d0419cffE.exit
_ZN4core4iter8iterator8Iterator4fold17h6ce2bb06d0419cffE.exit: ; preds = %_ZN4core4iter8iterator8Iterator4fold17h6ce2bb06d0419cffE.exit.loopexit, %start
%accum.0.lcssa.i.i = phi i32 [ 0, %start ], [ %9, %_ZN4core4iter8iterator8Iterator4fold17h6ce2bb06d0419cffE.exit.loopexit ]
store i32 %accum.0.lcssa.i.i, i32* %sum, align 4
; call <T as alloc::string::ToString>::to_string
call fastcc void @"_ZN45_$LT$T$u20$as$u20$alloc..string..ToString$GT$9to_string17h3ff76f5625d5cd43E"(%"alloc::string::String"* noalias nocapture nonnull dereferenceable(24) %0, i32* noalias nonnull readonly dereferenceable(4) %sum)
call void @llvm.lifetime.end(i64 4, i8* nonnull %3)
ret void
}
extern crate rand;
use rand::Rng;
fn main() {
let mut rng = rand::thread_rng();
let n = rng.gen::<u8>();
sum_manual(&mut rng, n);
sum_for(&mut rng, n);
sum_mut_fold(&mut rng, n);
sum_fold(&mut rng, n);
}
#[inline(never)]
fn sum_manual<R: Rng>(rng: &mut R, n: u8) -> String {
let mut sum = 0;
let mut i = 0;
while i < n {
sum += rng.gen::<i32>();
i += 1;
}
sum += 1;
sum.to_string()
}
#[inline(never)]
fn sum_for<R: Rng>(rng: &mut R, n: u8) -> String {
let mut sum = 0;
for _ in 0..n {
sum += rng.gen::<i32>();
}
sum += 1;
sum.to_string()
}
// It seems this one is the shorter in the LLVM IR.
#[inline(never)]
fn sum_mut_fold<R: Rng>(rng: &mut R, n: u8) -> String {
let mut sum = (0..n).fold(0, |acc, _| acc + rng.gen::<i32>());
sum += 1;
sum.to_string()
}
#[inline(never)]
fn sum_fold<R: Rng>(rng: &mut R, n: u8) -> String {
let sum = (0..n).fold(0, |acc, i| {
acc + rng.gen::<i32>() + if i < n - 1 { 0 } else { 1 }
});
sum.to_string()
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment