Created
January 6, 2015 12:57
-
-
Save pczarn/2bb4e5c98e063312bd42 to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
pub struct MySlice { | |
start: *const u8, | |
len: uint, | |
} | |
#[inline(never)] | |
pub fn merge_slice(val: &[u8]) -> MySlice { | |
let mut val_ptr = val.as_ptr(); | |
let mut len = val.len(); | |
let mut state = unsafe { | |
MySlice { | |
start: val_ptr, | |
len: 0, | |
} | |
}; | |
unsafe { | |
while len != 0 { | |
let mut tmp1 = val_ptr; | |
let mut tmp2 = val_ptr; | |
loop { | |
tmp1 = tmp1.offset(1); | |
tmp2 = tmp2.offset(1); | |
len -= 1; | |
state.len += 1; | |
if tmp1 != tmp2 { | |
black_box(&state); | |
val_ptr = tmp2; | |
state = MySlice { | |
start: val_ptr, | |
len: 0, | |
}; | |
break; | |
} | |
if len == 0 { | |
return state; | |
} | |
} | |
} | |
} | |
state | |
} | |
fn main() { | |
let mut val_slice: &[u8] = &[12, 23, 34, 45, 12]; | |
let mut r = &val_slice; | |
black_box(&merge_slice(*r)); | |
} |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
; Function Attrs: noinline nounwind uwtable | |
define internal fastcc void @_ZN11merge_slice20ha8074868c3655a38MdaE(%struct.MySlice* noalias nocapture sret dereferenceable(16), { i8*, i64 }* noalias nocapture dereferenceable(16)) unnamed_addr #1 { | |
entry-block: | |
%state = alloca %struct.MySlice, align 8 | |
%2 = bitcast { i8*, i64 }* %1 to i8* | |
%arg.sroa.0.0..sroa_idx = getelementptr inbounds { i8*, i64 }* %1, i64 0, i32 0 | |
%arg.sroa.0.0.copyload = load i8** %arg.sroa.0.0..sroa_idx, align 8 | |
%arg.sroa.5.0..sroa_idx32 = getelementptr inbounds { i8*, i64 }* %1, i64 0, i32 1 | |
%arg1.sroa.4.0.copyload = load i64* %arg.sroa.5.0..sroa_idx32, align 8 | |
%3 = bitcast %struct.MySlice* %state to i8* | |
call void @llvm.lifetime.start(i64 16, i8* %3) | |
%4 = getelementptr inbounds %struct.MySlice* %state, i64 0, i32 0 | |
store i8* %arg.sroa.0.0.copyload, i8** %4, align 8 | |
%5 = getelementptr inbounds %struct.MySlice* %state, i64 0, i32 1 | |
store i64 0, i64* %5, align 8 | |
%6 = icmp eq i64 %arg1.sroa.4.0.copyload, 0 | |
br i1 %6, label %while_exit, label %next-block.preheader | |
next-block.preheader: ; preds = %entry-block | |
br label %next-block | |
while_exit: ; preds = %entry-block | |
%7 = bitcast %struct.MySlice* %0 to i8* | |
call void @llvm.memcpy.p0i8.p0i8.i64(i8* %7, i8* %3, i64 16, i32 8, i1 false) | |
call void @llvm.lifetime.end(i64 16, i8* %3) | |
br label %clean_custom_ | |
next-block: ; preds = %next-block.preheader, %next-block | |
%8 = phi i64 [ %10, %next-block ], [ 0, %next-block.preheader ] | |
%len.1 = phi i64 [ %9, %next-block ], [ %arg1.sroa.4.0.copyload, %next-block.preheader ] | |
%9 = add i64 %len.1, -1 | |
%10 = add i64 %8, 1 | |
%11 = icmp eq i64 %9, 0 | |
br i1 %11, label %then-block-319-, label %next-block | |
then-block-319-: ; preds = %next-block | |
%.lcssa = phi i64 [ %10, %next-block ] | |
store i64 %.lcssa, i64* %5, align 8 | |
%12 = bitcast %struct.MySlice* %0 to i8* | |
call void @llvm.memcpy.p0i8.p0i8.i64(i8* %12, i8* %3, i64 16, i32 8, i1 false) | |
call void @llvm.lifetime.end(i64 16, i8* %3) | |
br label %clean_custom_ | |
clean_custom_: ; preds = %while_exit, %then-block-319- | |
call void @llvm.lifetime.end(i64 16, i8* %2) | |
ret void | |
} |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
_ZN11merge_slice20ha8074868c3655a38MdaE: | |
.cfi_startproc | |
cmpq %fs:112, %rsp | |
ja .LBB0_2 | |
movabsq $16, %r10 | |
movabsq $0, %r11 | |
callq __morestack | |
retq | |
.LBB0_2: | |
subq $16, %rsp | |
.Ltmp0: | |
.cfi_def_cfa_offset 24 | |
movq (%rsi), %rcx | |
movq 8(%rsi), %rax | |
movq %rcx, (%rsp) | |
movq $0, 8(%rsp) | |
xorl %ecx, %ecx | |
testq %rax, %rax | |
je .LBB0_3 | |
.align 16, 0x90 | |
.LBB0_4: | |
incq %rcx | |
cmpq %rcx, %rax | |
jne .LBB0_4 | |
movq %rcx, 8(%rsp) | |
movq %rcx, 8(%rdi) | |
movq (%rsp), %rax | |
jmp .LBB0_6 | |
.LBB0_3: | |
movq (%rsp), %rax | |
movq 8(%rsp), %rcx | |
movq %rcx, 8(%rdi) | |
.LBB0_6: | |
movq %rax, (%rdi) | |
movq %rdi, %rax | |
addq $16, %rsp | |
retq | |
.Ltmp1: | |
.size _ZN11merge_slice20ha8074868c3655a38MdaE, .Ltmp1-_ZN11merge_slice20ha8074868c3655a38MdaE | |
.cfi_endproc |
dotdash
commented
Feb 5, 2015
@dotdash could you elaborate on your comment from 4 years ago? (Also related stuff https://gist.github.com/pczarn/62a953ea9ad40d264487 )
@pczarn Sorry, I don't remember what that comment was about. Seems that the IR in my version has the core loop unrolled, but I have no idea whatsoever what caused that difference, i.e. whether I used a different opt-level or modified the compiler or...
@dotdash OK, no worries, I will investigate what causes the difference w.r.t unrolling the core loop, next month.
I will experiment and try to make one-shot hashing and/or adaptive hashing happen.
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment