Skip to content

Instantly share code, notes, and snippets.

@yoffy
yoffy / resize.cpp
Last active June 14, 2020 13:23
bilinear image resampling (with AVX2)
// clang++ -c -std=c++11 -Wall -Wextra -Ofast -g -march=native -D NDEBUG
#include <stdint.h>
#include <math.h>
#include <vector>
#include <immintrin.h>
struct image
{
int width;
int height;
; clang++-4.0 -I/home/yoffy/src/libiqo/include -I/home/yoffy/src/libiqo/src/../include -Wall -Wextra -Wconversion -Wno-sign-conversion -std=c++98 -fno-exceptions -Wall -Wextra -Wconversion -Wno-sign-conversion -Ofast -march=core2 -msse4.1 -mtune=westmere -S -mllvm --x86-asm-syntax=intel /home/yoffy/src/libiqo/src/IQOAreaResizerImpl_SSE4_1.
cpp
.LBB5_2: # =>This Loop Header: Depth=1
# Child Loop BB5_4 Depth 2
test r14d, r14d
jle .LBB5_6
# BB#3: # in Loop: Header=BB5_2 Depth=1
movdqu xmm5, xmmword ptr [r8 + 4*r15]
mov rbp, r15
# clang++-4.0 -I/home/yoffy/src/libiqo/include -I/home/yoffy/src/libiqo/src/../include -Wall -Wextra -Wconversion -Wno-sign-conversion -std=c++98 -fno-exceptions -Wall -Wextra -Wconversion -Wno-sign-conversion -Ofast -march=core2 -msse4.1 -mtune=westmere -S -mllvm --x86-asm-syntax=intel /home/yoffy/src/libiqo/src/IQOLanczosResizerImpl_SSE4_1.cpp
.LBB7_11: # =>This Loop Header: Depth=1
# Child Loop BB7_12 Depth 2
mov rax, qword ptr [rsp - 16] # 8-byte Reload
movdqu xmm1, xmmword ptr [rax + 4*r15]
movdqu xmm2, xmmword ptr [rax + 4*r15 + 16]
mov rax, qword ptr [rsp - 8] # 8-byte Reload
lea rdi, [rax + 4*r12]
xorps xmm0, xmm0