Skip to content

Instantly share code, notes, and snippets.

@Jutho
Created January 13, 2019 13:02
Show Gist options
  • Save Jutho/93be2e4ff71944bc7a5e33b3052d82db to your computer and use it in GitHub Desktop.
Save Jutho/93be2e4ff71944bc7a5e33b3052d82db to your computer and use it in GitHub Desktop.
gen_diff vs rec_diff
; Function rec_diff
%2 = getelementptr [20 x i64], [20 x i64] addrspace(11)* %1, i64 0, i64 1
%3 = getelementptr [20 x i64], [20 x i64] addrspace(11)* %1, i64 0, i64 0
%4 = load i64, i64 addrspace(11)* %3, align 8
%5 = getelementptr [20 x i64], [20 x i64] addrspace(11)* %1, i64 0, i64 5
%6 = getelementptr [20 x i64], [20 x i64] addrspace(11)* %1, i64 0, i64 9
%7 = getelementptr [20 x i64], [20 x i64] addrspace(11)* %1, i64 0, i64 13
%8 = getelementptr [20 x i64], [20 x i64] addrspace(11)* %1, i64 0, i64 17
%9 = getelementptr [20 x i64], [20 x i64] addrspace(11)* %1, i64 0, i64 18
%10 = bitcast i64 addrspace(11)* %2 to <4 x i64> addrspace(11)*
%11 = load <4 x i64>, <4 x i64> addrspace(11)* %10, align 8
%12 = insertelement <4 x i64> undef, i64 %4, i32 0
%13 = shufflevector <4 x i64> %12, <4 x i64> %11, <4 x i32> <i32 0, i32 4, i32 5, i32 6>
%14 = sub <4 x i64> %11, %13
%15 = bitcast i64 addrspace(11)* %5 to <4 x i64> addrspace(11)*
%16 = load <4 x i64>, <4 x i64> addrspace(11)* %15, align 8
%17 = shufflevector <4 x i64> %11, <4 x i64> %16, <4 x i32> <i32 3, i32 4, i32 5, i32 6>
%18 = sub <4 x i64> %16, %17
%19 = bitcast i64 addrspace(11)* %6 to <4 x i64> addrspace(11)*
%20 = load <4 x i64>, <4 x i64> addrspace(11)* %19, align 8
%21 = shufflevector <4 x i64> %16, <4 x i64> %20, <4 x i32> <i32 3, i32 4, i32 5, i32 6>
%22 = sub <4 x i64> %20, %21
%23 = bitcast i64 addrspace(11)* %7 to <4 x i64> addrspace(11)*
%24 = load <4 x i64>, <4 x i64> addrspace(11)* %23, align 8
%25 = shufflevector <4 x i64> %20, <4 x i64> %24, <4 x i32> <i32 3, i32 4, i32 5, i32 6>
%26 = sub <4 x i64> %24, %25
%27 = load i64, i64 addrspace(11)* %8, align 8
%28 = extractelement <4 x i64> %24, i32 3
%29 = sub i64 %27, %28
%30 = bitcast i64 addrspace(11)* %9 to <2 x i64> addrspace(11)*
%31 = load <2 x i64>, <2 x i64> addrspace(11)* %30, align 8
%32 = insertelement <2 x i64> undef, i64 %27, i32 0
%33 = shufflevector <2 x i64> %32, <2 x i64> %31, <2 x i32> <i32 0, i32 2>
%34 = sub <2 x i64> %31, %33
%35 = bitcast [19 x i64]* %0 to <4 x i64>*
store <4 x i64> %14, <4 x i64>* %35, align 8
%.sroa.5.0..sroa_idx4 = getelementptr inbounds [19 x i64], [19 x i64]* %0, i64 0, i64 4
%36 = bitcast i64* %.sroa.5.0..sroa_idx4 to <4 x i64>*
store <4 x i64> %18, <4 x i64>* %36, align 8
%.sroa.9.0..sroa_idx8 = getelementptr inbounds [19 x i64], [19 x i64]* %0, i64 0, i64 8
%37 = bitcast i64* %.sroa.9.0..sroa_idx8 to <4 x i64>*
store <4 x i64> %22, <4 x i64>* %37, align 8
%.sroa.13.0..sroa_idx12 = getelementptr inbounds [19 x i64], [19 x i64]* %0, i64 0, i64 12
%38 = bitcast i64* %.sroa.13.0..sroa_idx12 to <4 x i64>*
store <4 x i64> %26, <4 x i64>* %38, align 8
%.sroa.17.0..sroa_idx16 = getelementptr inbounds [19 x i64], [19 x i64]* %0, i64 0, i64 16
store i64 %29, i64* %.sroa.17.0..sroa_idx16, align 8
%.sroa.18.0..sroa_idx17 = getelementptr inbounds [19 x i64], [19 x i64]* %0, i64 0, i64 17
%39 = bitcast i64* %.sroa.18.0..sroa_idx17 to <2 x i64>*
store <2 x i64> %34, <2 x i64>* %39, align 8
ret void
}
; Function gen_diff
%2 = getelementptr [20 x i64], [20 x i64] addrspace(11)* %1, i64 0, i64 1
%3 = getelementptr [20 x i64], [20 x i64] addrspace(11)* %1, i64 0, i64 0
%4 = load i64, i64 addrspace(11)* %3, align 8
%5 = bitcast i64 addrspace(11)* %2 to <4 x i64> addrspace(11)*
%6 = load <4 x i64>, <4 x i64> addrspace(11)* %5, align 8
%7 = insertelement <4 x i64> undef, i64 %4, i32 0
%8 = shufflevector <4 x i64> %7, <4 x i64> %6, <4 x i32> <i32 0, i32 4, i32 5, i32 6>
%9 = sub <4 x i64> %6, %8
%10 = getelementptr [20 x i64], [20 x i64] addrspace(11)* %1, i64 0, i64 5
%11 = bitcast i64 addrspace(11)* %10 to <4 x i64> addrspace(11)*
%12 = load <4 x i64>, <4 x i64> addrspace(11)* %11, align 8
%13 = shufflevector <4 x i64> %6, <4 x i64> %12, <4 x i32> <i32 3, i32 4, i32 5, i32 6>
%14 = sub <4 x i64> %12, %13
%15 = getelementptr [20 x i64], [20 x i64] addrspace(11)* %1, i64 0, i64 9
%16 = bitcast i64 addrspace(11)* %15 to <4 x i64> addrspace(11)*
%17 = load <4 x i64>, <4 x i64> addrspace(11)* %16, align 8
%18 = shufflevector <4 x i64> %12, <4 x i64> %17, <4 x i32> <i32 3, i32 4, i32 5, i32 6>
%19 = sub <4 x i64> %17, %18
%20 = getelementptr [20 x i64], [20 x i64] addrspace(11)* %1, i64 0, i64 13
%21 = bitcast i64 addrspace(11)* %20 to <4 x i64> addrspace(11)*
%22 = load <4 x i64>, <4 x i64> addrspace(11)* %21, align 8
%23 = shufflevector <4 x i64> %17, <4 x i64> %22, <4 x i32> <i32 3, i32 4, i32 5, i32 6>
%24 = sub <4 x i64> %22, %23
%25 = getelementptr [20 x i64], [20 x i64] addrspace(11)* %1, i64 0, i64 17
%26 = load i64, i64 addrspace(11)* %25, align 8
%27 = extractelement <4 x i64> %22, i32 3
%28 = sub i64 %26, %27
%29 = getelementptr [20 x i64], [20 x i64] addrspace(11)* %1, i64 0, i64 18
%30 = bitcast i64 addrspace(11)* %29 to <2 x i64> addrspace(11)*
%31 = load <2 x i64>, <2 x i64> addrspace(11)* %30, align 8
%32 = insertelement <2 x i64> undef, i64 %26, i32 0
%33 = shufflevector <2 x i64> %32, <2 x i64> %31, <2 x i32> <i32 0, i32 2>
%34 = sub <2 x i64> %31, %33
%35 = bitcast [19 x i64]* %0 to <4 x i64>*
store <4 x i64> %9, <4 x i64>* %35, align 8
%.sroa.5.0..sroa_idx4 = getelementptr inbounds [19 x i64], [19 x i64]* %0, i64 0, i64 4
%36 = bitcast i64* %.sroa.5.0..sroa_idx4 to <4 x i64>*
store <4 x i64> %14, <4 x i64>* %36, align 8
%.sroa.9.0..sroa_idx8 = getelementptr inbounds [19 x i64], [19 x i64]* %0, i64 0, i64 8
%37 = bitcast i64* %.sroa.9.0..sroa_idx8 to <4 x i64>*
store <4 x i64> %19, <4 x i64>* %37, align 8
%.sroa.13.0..sroa_idx12 = getelementptr inbounds [19 x i64], [19 x i64]* %0, i64 0, i64 12
%38 = bitcast i64* %.sroa.13.0..sroa_idx12 to <4 x i64>*
store <4 x i64> %24, <4 x i64>* %38, align 8
%.sroa.17.0..sroa_idx16 = getelementptr inbounds [19 x i64], [19 x i64]* %0, i64 0, i64 16
store i64 %28, i64* %.sroa.17.0..sroa_idx16, align 8
%.sroa.18.0..sroa_idx17 = getelementptr inbounds [19 x i64], [19 x i64]* %0, i64 0, i64 17
%39 = bitcast i64* %.sroa.18.0..sroa_idx17 to <2 x i64>*
store <2 x i64> %34, <2 x i64>* %39, align 8
ret void
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment