Skip to content

Instantly share code, notes, and snippets.

@pczarn
Created January 6, 2015 12:57
Show Gist options
  • Save pczarn/2bb4e5c98e063312bd42 to your computer and use it in GitHub Desktop.
Save pczarn/2bb4e5c98e063312bd42 to your computer and use it in GitHub Desktop.
pub struct MySlice {
start: *const u8,
len: uint,
}
#[inline(never)]
pub fn merge_slice(val: &[u8]) -> MySlice {
let mut val_ptr = val.as_ptr();
let mut len = val.len();
let mut state = unsafe {
MySlice {
start: val_ptr,
len: 0,
}
};
unsafe {
while len != 0 {
let mut tmp1 = val_ptr;
let mut tmp2 = val_ptr;
loop {
tmp1 = tmp1.offset(1);
tmp2 = tmp2.offset(1);
len -= 1;
state.len += 1;
if tmp1 != tmp2 {
black_box(&state);
val_ptr = tmp2;
state = MySlice {
start: val_ptr,
len: 0,
};
break;
}
if len == 0 {
return state;
}
}
}
}
state
}
fn main() {
let mut val_slice: &[u8] = &[12, 23, 34, 45, 12];
let mut r = &val_slice;
black_box(&merge_slice(*r));
}
; Function Attrs: noinline nounwind uwtable
define internal fastcc void @_ZN11merge_slice20ha8074868c3655a38MdaE(%struct.MySlice* noalias nocapture sret dereferenceable(16), { i8*, i64 }* noalias nocapture dereferenceable(16)) unnamed_addr #1 {
entry-block:
%state = alloca %struct.MySlice, align 8
%2 = bitcast { i8*, i64 }* %1 to i8*
%arg.sroa.0.0..sroa_idx = getelementptr inbounds { i8*, i64 }* %1, i64 0, i32 0
%arg.sroa.0.0.copyload = load i8** %arg.sroa.0.0..sroa_idx, align 8
%arg.sroa.5.0..sroa_idx32 = getelementptr inbounds { i8*, i64 }* %1, i64 0, i32 1
%arg1.sroa.4.0.copyload = load i64* %arg.sroa.5.0..sroa_idx32, align 8
%3 = bitcast %struct.MySlice* %state to i8*
call void @llvm.lifetime.start(i64 16, i8* %3)
%4 = getelementptr inbounds %struct.MySlice* %state, i64 0, i32 0
store i8* %arg.sroa.0.0.copyload, i8** %4, align 8
%5 = getelementptr inbounds %struct.MySlice* %state, i64 0, i32 1
store i64 0, i64* %5, align 8
%6 = icmp eq i64 %arg1.sroa.4.0.copyload, 0
br i1 %6, label %while_exit, label %next-block.preheader
next-block.preheader: ; preds = %entry-block
br label %next-block
while_exit: ; preds = %entry-block
%7 = bitcast %struct.MySlice* %0 to i8*
call void @llvm.memcpy.p0i8.p0i8.i64(i8* %7, i8* %3, i64 16, i32 8, i1 false)
call void @llvm.lifetime.end(i64 16, i8* %3)
br label %clean_custom_
next-block: ; preds = %next-block.preheader, %next-block
%8 = phi i64 [ %10, %next-block ], [ 0, %next-block.preheader ]
%len.1 = phi i64 [ %9, %next-block ], [ %arg1.sroa.4.0.copyload, %next-block.preheader ]
%9 = add i64 %len.1, -1
%10 = add i64 %8, 1
%11 = icmp eq i64 %9, 0
br i1 %11, label %then-block-319-, label %next-block
then-block-319-: ; preds = %next-block
%.lcssa = phi i64 [ %10, %next-block ]
store i64 %.lcssa, i64* %5, align 8
%12 = bitcast %struct.MySlice* %0 to i8*
call void @llvm.memcpy.p0i8.p0i8.i64(i8* %12, i8* %3, i64 16, i32 8, i1 false)
call void @llvm.lifetime.end(i64 16, i8* %3)
br label %clean_custom_
clean_custom_: ; preds = %while_exit, %then-block-319-
call void @llvm.lifetime.end(i64 16, i8* %2)
ret void
}
_ZN11merge_slice20ha8074868c3655a38MdaE:
.cfi_startproc
cmpq %fs:112, %rsp
ja .LBB0_2
movabsq $16, %r10
movabsq $0, %r11
callq __morestack
retq
.LBB0_2:
subq $16, %rsp
.Ltmp0:
.cfi_def_cfa_offset 24
movq (%rsi), %rcx
movq 8(%rsi), %rax
movq %rcx, (%rsp)
movq $0, 8(%rsp)
xorl %ecx, %ecx
testq %rax, %rax
je .LBB0_3
.align 16, 0x90
.LBB0_4:
incq %rcx
cmpq %rcx, %rax
jne .LBB0_4
movq %rcx, 8(%rsp)
movq %rcx, 8(%rdi)
movq (%rsp), %rax
jmp .LBB0_6
.LBB0_3:
movq (%rsp), %rax
movq 8(%rsp), %rcx
movq %rcx, 8(%rdi)
.LBB0_6:
movq %rax, (%rdi)
movq %rdi, %rax
addq $16, %rsp
retq
.Ltmp1:
.size _ZN11merge_slice20ha8074868c3655a38MdaE, .Ltmp1-_ZN11merge_slice20ha8074868c3655a38MdaE
.cfi_endproc
@dotdash
Copy link

dotdash commented Feb 5, 2015

; ModuleID = 'a-src.0.rs'
target datalayout = "e-i64:64-f80:128-n8:16:32:64-S128"
target triple = "x86_64-unknown-linux-gnu"

%struct.MySlice = type { i8*, i64 }

; Function Attrs: noinline nounwind uwtable
define internal fastcc void @_ZN11merge_slice20h2f5d58798877d641oaaE(%struct.MySlice* noalias nocapture sret dereferenceable(16), { i8*, i64 }* noalias nocapture dereferenceable(16)) unnamed_addr #0 {
entry-block:
  %state = alloca %struct.MySlice, align 8
  %2 = bitcast { i8*, i64 }* %1 to i8*
  %arg.sroa.0.0..sroa_idx = getelementptr inbounds { i8*, i64 }* %1, i64 0, i32 0
  %arg.sroa.0.0.copyload = load i8** %arg.sroa.0.0..sroa_idx, align 8
  %arg.sroa.5.0..sroa_idx36 = getelementptr inbounds { i8*, i64 }* %1, i64 0, i32 1
  %arg1.sroa.4.0.copyload = load i64* %arg.sroa.5.0..sroa_idx36, align 8
  %3 = bitcast %struct.MySlice* %state to i8*
  call void @llvm.lifetime.start(i64 16, i8* %3)
  %4 = getelementptr inbounds %struct.MySlice* %state, i64 0, i32 0
  store i8* %arg.sroa.0.0.copyload, i8** %4, align 8
  %5 = getelementptr inbounds %struct.MySlice* %state, i64 0, i32 1
  store i64 0, i64* %5, align 8
  %6 = icmp eq i64 %arg1.sroa.4.0.copyload, 0
  br i1 %6, label %while_exit, label %next-block.preheader

next-block.preheader:                             ; preds = %entry-block
  %xtraiter = and i64 %arg1.sroa.4.0.copyload, 7
  %lcmp.mod = icmp ne i64 %xtraiter, 0
  %lcmp.overflow = icmp eq i64 %arg1.sroa.4.0.copyload, 0
  %lcmp.or = or i1 %lcmp.overflow, %lcmp.mod
  br i1 %lcmp.or, label %next-block.prol, label %next-block.preheader.split

next-block.prol:                                  ; preds = %next-block.prol, %next-block.preheader
  %7 = phi i64 [ %9, %next-block.prol ], [ 0, %next-block.preheader ]
  %len.1.prol = phi i64 [ %8, %next-block.prol ], [ %arg1.sroa.4.0.copyload, %next-block.preheader ]
  %prol.iter = phi i64 [ %xtraiter, %next-block.preheader ], [ %prol.iter.sub, %next-block.prol ]
  %8 = add i64 %len.1.prol, -1
  %9 = add i64 %7, 1
  %10 = icmp eq i64 %8, 0
  %prol.iter.sub = sub i64 %prol.iter, 1
  %prol.iter.cmp = icmp ne i64 %prol.iter.sub, 0
  br i1 %prol.iter.cmp, label %next-block.prol, label %next-block.preheader.split, !llvm.loop !0

next-block.preheader.split:                       ; preds = %next-block.prol, %next-block.preheader
  %.lcssa.unr = phi i64 [ 0, %next-block.preheader ], [ %9, %next-block.prol ]
  %.unr = phi i64 [ 0, %next-block.preheader ], [ %9, %next-block.prol ]
  %len.1.unr = phi i64 [ %arg1.sroa.4.0.copyload, %next-block.preheader ], [ %8, %next-block.prol ]
  %11 = icmp ult i64 %arg1.sroa.4.0.copyload, 8
  br i1 %11, label %then-block-109-, label %next-block.preheader.split.split

next-block.preheader.split.split:                 ; preds = %next-block.preheader.split
  br label %next-block

while_exit:                                       ; preds = %entry-block
  %12 = bitcast %struct.MySlice* %0 to i8*
  call void @llvm.memcpy.p0i8.p0i8.i64(i8* %12, i8* %3, i64 16, i32 8, i1 false)
  call void @llvm.lifetime.end(i64 16, i8* %3)
  br label %clean_custom_

next-block:                                       ; preds = %next-block, %next-block.preheader.split.split
  %13 = phi i64 [ %.unr, %next-block.preheader.split.split ], [ %29, %next-block ]
  %len.1 = phi i64 [ %len.1.unr, %next-block.preheader.split.split ], [ %28, %next-block ]
  %14 = add i64 %len.1, -1
  %15 = add i64 %13, 1
  %16 = add i64 %14, -1
  %17 = add i64 %15, 1
  %18 = add i64 %16, -1
  %19 = add i64 %17, 1
  %20 = add i64 %18, -1
  %21 = add i64 %19, 1
  %22 = add i64 %20, -1
  %23 = add i64 %21, 1
  %24 = add i64 %22, -1
  %25 = add i64 %23, 1
  %26 = add i64 %24, -1
  %27 = add i64 %25, 1
  %28 = add i64 %26, -1
  %29 = add i64 %27, 1
  %30 = icmp eq i64 %28, 0
  br i1 %30, label %then-block-109-.unr-lcssa, label %next-block

then-block-109-.unr-lcssa:                        ; preds = %next-block
  %.lcssa.ph = phi i64 [ %29, %next-block ]
  br label %then-block-109-

then-block-109-:                                  ; preds = %next-block.preheader.split, %then-block-109-.unr-lcssa
  %.lcssa = phi i64 [ %.lcssa.unr, %next-block.preheader.split ], [ %.lcssa.ph, %then-block-109-.unr-lcssa ]
  store i64 %.lcssa, i64* %5, align 8
  %31 = bitcast %struct.MySlice* %0 to i8*
  call void @llvm.memcpy.p0i8.p0i8.i64(i8* %31, i8* %3, i64 16, i32 8, i1 false)
  call void @llvm.lifetime.end(i64 16, i8* %3)
  br label %clean_custom_

clean_custom_:                                    ; preds = %while_exit, %then-block-109-
  call void @llvm.lifetime.end(i64 16, i8* %2)
  ret void
}

; Function Attrs: nounwind
declare void @llvm.lifetime.start(i64, i8* nocapture) unnamed_addr #1

; Function Attrs: nounwind
declare void @llvm.memcpy.p0i8.p0i8.i64(i8* nocapture, i8* nocapture readonly, i64, i32, i1) unnamed_addr #1

; Function Attrs: nounwind
declare void @llvm.lifetime.end(i64, i8* nocapture) unnamed_addr #1

; Function Attrs: nounwind uwtable
define internal void @_ZN4main20h383ce47de90dc4cc0baE() unnamed_addr #2 {
entry-block:
  %dummy.i = alloca %struct.MySlice*, align 8
  %0 = alloca [5 x i8], align 1
  %1 = alloca %struct.MySlice, align 8
  %arg = alloca { i8*, i64 }, align 8
  %.sub = getelementptr inbounds [5 x i8]* %0, i64 0, i64 0
  call void @llvm.lifetime.start(i64 1, i8* %.sub)
  store i8 12, i8* %.sub, align 1
  %2 = getelementptr inbounds [5 x i8]* %0, i64 0, i64 1
  store i8 23, i8* %2, align 1
  %3 = getelementptr inbounds [5 x i8]* %0, i64 0, i64 2
  store i8 34, i8* %3, align 1
  %4 = getelementptr inbounds [5 x i8]* %0, i64 0, i64 3
  store i8 45, i8* %4, align 1
  %5 = getelementptr inbounds [5 x i8]* %0, i64 0, i64 4
  store i8 12, i8* %5, align 1
  %6 = bitcast %struct.MySlice* %1 to i8*
  call void @llvm.lifetime.start(i64 16, i8* %6)
  %7 = bitcast { i8*, i64 }* %arg to i8*
  call void @llvm.lifetime.start(i64 16, i8* %7)
  %val_slice.sroa.0.0..sroa_idx = getelementptr inbounds { i8*, i64 }* %arg, i64 0, i32 0
  store i8* %.sub, i8** %val_slice.sroa.0.0..sroa_idx, align 8
  %val_slice.sroa.4.0..sroa_idx11 = getelementptr inbounds { i8*, i64 }* %arg, i64 0, i32 1
  store i64 5, i64* %val_slice.sroa.4.0..sroa_idx11, align 8
  call fastcc void @_ZN11merge_slice20h2f5d58798877d641oaaE(%struct.MySlice* noalias nocapture sret dereferenceable(16) %1, { i8*, i64 }* noalias nocapture dereferenceable(16) %arg)
  call void @llvm.lifetime.end(i64 16, i8* %7)
  %8 = bitcast %struct.MySlice** %dummy.i to i8*
  call void @llvm.lifetime.start(i64 8, i8* %8) #4, !noalias !2
  store %struct.MySlice* %1, %struct.MySlice** %dummy.i, align 8, !noalias !2
  call void asm "", "r,~{dirflag},~{fpsr},~{flags}"(%struct.MySlice** %dummy.i) #4, !srcloc !5
  call void @llvm.lifetime.end(i64 8, i8* %8) #4, !noalias !2
  call void @llvm.lifetime.end(i64 16, i8* %6)
  call void @llvm.lifetime.end(i64 5, i8* %.sub)
  ret void
}

define i64 @main(i64, i8**) unnamed_addr #3 {
top:
  %2 = tail call i64 @_ZN2rt10lang_start20he353c2582d278f49WQFE(i8* bitcast (void ()* @_ZN4main20h383ce47de90dc4cc0baE to i8*), i64 %0, i8** %1)
  ret i64 %2
}

declare i64 @_ZN2rt10lang_start20he353c2582d278f49WQFE(i8*, i64, i8**) unnamed_addr #3

attributes #0 = { noinline nounwind uwtable "split-stack" }
attributes #1 = { nounwind "split-stack" }
attributes #2 = { nounwind uwtable "split-stack" }
attributes #3 = { "split-stack" }
attributes #4 = { nounwind }

!0 = distinct !{!0, !1}
!1 = !{!"llvm.loop.unroll.disable"}
!2 = !{!3}
!3 = distinct !{!3, !4, !"_ZN9black_box18h90424135468576568E: argument 0"}
!4 = distinct !{!4, !"_ZN9black_box18h90424135468576568E"}
!5 = !{i32 301}

@pczarn
Copy link
Author

pczarn commented May 15, 2019

@dotdash could you elaborate on your comment from 4 years ago? (Also related stuff https://gist.github.com/pczarn/62a953ea9ad40d264487 )

@dotdash
Copy link

dotdash commented May 16, 2019

@pczarn Sorry, I don't remember what that comment was about. Seems that the IR in my version has the core loop unrolled, but I have no idea whatsoever what caused that difference, i.e. whether I used a different opt-level or modified the compiler or...

@pczarn
Copy link
Author

pczarn commented May 17, 2019

@dotdash OK, no worries, I will investigate what causes the difference w.r.t unrolling the core loop, next month.

I will experiment and try to make one-shot hashing and/or adaptive hashing happen.

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment