Skip to content

Instantly share code, notes, and snippets.

@pczarn
Created January 6, 2015 12:57
Show Gist options
  • Save pczarn/2bb4e5c98e063312bd42 to your computer and use it in GitHub Desktop.
Save pczarn/2bb4e5c98e063312bd42 to your computer and use it in GitHub Desktop.
pub struct MySlice {
start: *const u8,
len: uint,
}
#[inline(never)]
pub fn merge_slice(val: &[u8]) -> MySlice {
let mut val_ptr = val.as_ptr();
let mut len = val.len();
let mut state = unsafe {
MySlice {
start: val_ptr,
len: 0,
}
};
unsafe {
while len != 0 {
let mut tmp1 = val_ptr;
let mut tmp2 = val_ptr;
loop {
tmp1 = tmp1.offset(1);
tmp2 = tmp2.offset(1);
len -= 1;
state.len += 1;
if tmp1 != tmp2 {
black_box(&state);
val_ptr = tmp2;
state = MySlice {
start: val_ptr,
len: 0,
};
break;
}
if len == 0 {
return state;
}
}
}
}
state
}
fn main() {
let mut val_slice: &[u8] = &[12, 23, 34, 45, 12];
let mut r = &val_slice;
black_box(&merge_slice(*r));
}
; Function Attrs: noinline nounwind uwtable
define internal fastcc void @_ZN11merge_slice20ha8074868c3655a38MdaE(%struct.MySlice* noalias nocapture sret dereferenceable(16), { i8*, i64 }* noalias nocapture dereferenceable(16)) unnamed_addr #1 {
entry-block:
%state = alloca %struct.MySlice, align 8
%2 = bitcast { i8*, i64 }* %1 to i8*
%arg.sroa.0.0..sroa_idx = getelementptr inbounds { i8*, i64 }* %1, i64 0, i32 0
%arg.sroa.0.0.copyload = load i8** %arg.sroa.0.0..sroa_idx, align 8
%arg.sroa.5.0..sroa_idx32 = getelementptr inbounds { i8*, i64 }* %1, i64 0, i32 1
%arg1.sroa.4.0.copyload = load i64* %arg.sroa.5.0..sroa_idx32, align 8
%3 = bitcast %struct.MySlice* %state to i8*
call void @llvm.lifetime.start(i64 16, i8* %3)
%4 = getelementptr inbounds %struct.MySlice* %state, i64 0, i32 0
store i8* %arg.sroa.0.0.copyload, i8** %4, align 8
%5 = getelementptr inbounds %struct.MySlice* %state, i64 0, i32 1
store i64 0, i64* %5, align 8
%6 = icmp eq i64 %arg1.sroa.4.0.copyload, 0
br i1 %6, label %while_exit, label %next-block.preheader
next-block.preheader: ; preds = %entry-block
br label %next-block
while_exit: ; preds = %entry-block
%7 = bitcast %struct.MySlice* %0 to i8*
call void @llvm.memcpy.p0i8.p0i8.i64(i8* %7, i8* %3, i64 16, i32 8, i1 false)
call void @llvm.lifetime.end(i64 16, i8* %3)
br label %clean_custom_
next-block: ; preds = %next-block.preheader, %next-block
%8 = phi i64 [ %10, %next-block ], [ 0, %next-block.preheader ]
%len.1 = phi i64 [ %9, %next-block ], [ %arg1.sroa.4.0.copyload, %next-block.preheader ]
%9 = add i64 %len.1, -1
%10 = add i64 %8, 1
%11 = icmp eq i64 %9, 0
br i1 %11, label %then-block-319-, label %next-block
then-block-319-: ; preds = %next-block
%.lcssa = phi i64 [ %10, %next-block ]
store i64 %.lcssa, i64* %5, align 8
%12 = bitcast %struct.MySlice* %0 to i8*
call void @llvm.memcpy.p0i8.p0i8.i64(i8* %12, i8* %3, i64 16, i32 8, i1 false)
call void @llvm.lifetime.end(i64 16, i8* %3)
br label %clean_custom_
clean_custom_: ; preds = %while_exit, %then-block-319-
call void @llvm.lifetime.end(i64 16, i8* %2)
ret void
}
_ZN11merge_slice20ha8074868c3655a38MdaE:
.cfi_startproc
cmpq %fs:112, %rsp
ja .LBB0_2
movabsq $16, %r10
movabsq $0, %r11
callq __morestack
retq
.LBB0_2:
subq $16, %rsp
.Ltmp0:
.cfi_def_cfa_offset 24
movq (%rsi), %rcx
movq 8(%rsi), %rax
movq %rcx, (%rsp)
movq $0, 8(%rsp)
xorl %ecx, %ecx
testq %rax, %rax
je .LBB0_3
.align 16, 0x90
.LBB0_4:
incq %rcx
cmpq %rcx, %rax
jne .LBB0_4
movq %rcx, 8(%rsp)
movq %rcx, 8(%rdi)
movq (%rsp), %rax
jmp .LBB0_6
.LBB0_3:
movq (%rsp), %rax
movq 8(%rsp), %rcx
movq %rcx, 8(%rdi)
.LBB0_6:
movq %rax, (%rdi)
movq %rdi, %rax
addq $16, %rsp
retq
.Ltmp1:
.size _ZN11merge_slice20ha8074868c3655a38MdaE, .Ltmp1-_ZN11merge_slice20ha8074868c3655a38MdaE
.cfi_endproc
@pczarn
Copy link
Author

pczarn commented May 15, 2019

@dotdash could you elaborate on your comment from 4 years ago? (Also related stuff https://gist.github.com/pczarn/62a953ea9ad40d264487 )

@dotdash
Copy link

dotdash commented May 16, 2019

@pczarn Sorry, I don't remember what that comment was about. Seems that the IR in my version has the core loop unrolled, but I have no idea whatsoever what caused that difference, i.e. whether I used a different opt-level or modified the compiler or...

@pczarn
Copy link
Author

pczarn commented May 17, 2019

@dotdash OK, no worries, I will investigate what causes the difference w.r.t unrolling the core loop, next month.

I will experiment and try to make one-shot hashing and/or adaptive hashing happen.

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment