This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
/* Loop of 16x16 blocks */ | |
for (y = 0; y < heightComp[compCtr]; y += 16) | |
{ | |
... | |
for (x = 0; x < widthComp[compCtr]; x += 16) | |
{ | |
... | |
for (blkId = 0; blkId < 4; blkId++) | |
{ | |
yOffset8x8 = (blkId >> 1) * 8; |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
int16_t fg_compute_block_avg(int16_t *dstSampleBlk8, uint32_t widthComp, uint16_t *pNumSamples, | |
uint8_t ySize, uint8_t xSize, uint8_t bitDepth) | |
{ | |
uint32_t blockAvg = 0; | |
uint16_t numSamples = 0; | |
uint8_t k, l; | |
for (k = 0; k < ySize; k++) | |
{ | |
for (l = 0; l < xSize; l++) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
int16_t fg_compute_block_avg_sse4(int16_t *dstSampleBlk8, uint32_t widthComp, uint16_t *pNumSamples, | |
uint8_t ySize, uint8_t xSize, uint8_t bitDepth) | |
{ | |
uint16_t blockAvg = 0; | |
uint16_t numSamples = 0; | |
__m128i acc = _mm_setzero_si128(); | |
for (int i = 0; i < ySize; i+=1, numSamples+=8) | |
{ |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
void fg_blend_stripe_sse4(int16_t *dstSampleOffsetY, int16_t *srcSampleOffsetY, int32_t *grainStripe, uint32_t widthComp, uint32_t blockHeight, uint8_t bitDepth) | |
{ | |
uint32_t k, l; | |
// Prepare SIMD SSE4 ov_clip_uintp2 | |
__m128i mask = _mm_set1_epi32((1 << bitDepth)); | |
__m128i not_mask = _mm_xor_si128(mask, mask); | |
not_mask = _mm_sub_epi32(not_mask, mask); | |
mask = _mm_sub_epi32(mask, _mm_set1_epi32(1)); |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
%define ARCH_X86_64 1 | |
%define private_prefix asm | |
%include "x86inc.asm" | |
section .rodata align=16 | |
zeros_vec: dd 0, 0, 0, 0 | |
ones_vec: dd 1, 1, 1, 1 | |
neg_ones_vec: dd -1, -1, -1, -1 |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
static inline uint32_t ov_clip_uintp2(int32_t val, uint32_t a) | |
{ | |
if (val > 0) { | |
int32_t mask = (1 << a) - 1; | |
int32_t overflow = !!(val & (~mask)); | |
return ((-overflow) & mask) | (val & mask); | |
} else { | |
return 0; | |
} | |
#if 0 |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
void fg_simulate_grain_blk8x8_sse4(int32_t *grainStripe, uint32_t grainStripeOffsetBlk8, | |
uint32_t width, uint8_t log2ScaleFactor, int16_t scaleFactor, uint32_t kOffset, uint32_t lOffset, uint8_t h, uint8_t v, uint32_t xSize) | |
{ | |
uint32_t idx_offset_l1, idx_offset_l2, idx_offset_l3, idx_offset_l4; | |
uint32_t grainStripeOffsetBlk8_l1, grainStripeOffsetBlk8_l2, grainStripeOffsetBlk8_l3, grainStripeOffsetBlk8_l4; | |
uint32_t idx_offset = ( h*NUM_CUT_OFF_FREQ + v ) * DATA_BASE_SIZE * DATA_BASE_SIZE; | |
__m128i scale = _mm_set_epi32(scaleFactor, scaleFactor, scaleFactor, scaleFactor); |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
void fg_simulate_grain_blk8x8(int32_t *grainStripe, uint32_t grainStripeOffsetBlk8, | |
uint32_t width, uint8_t log2ScaleFactor, int16_t scaleFactor, uint32_t kOffset, uint32_t lOffset, uint8_t h, uint8_t v, uint32_t xSize) | |
{ | |
uint32_t k, l; | |
uint32_t idx, idx_offset, idx_offset_l, grainStripeOffsetBlk8_l; | |
idx_offset = ( h*NUM_CUT_OFF_FREQ + v ) * DATA_BASE_SIZE * DATA_BASE_SIZE; | |
for (l = 0; l < 8; l++) /* y direction */ | |
{ | |
idx_offset_l = idx_offset + (l + lOffset) * DATA_BASE_SIZE; |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
struct BenchWrapper | |
{ | |
size_t nb_fct_call; | |
size_t iter_per_fct; | |
// Define function arguments here | |
int (*func)(const OVSEI *sei, OVFrame **frame); | |
}; | |
void bench_decorator(struct BenchWrapper bw, char* name, const OVSEI *sei, OVFrame **frame) | |
{ |