Created
June 1, 2016 08:37
-
-
Save wjchen/4e99fd23ba1c940466bc7a2837a3ff09 to your computer and use it in GitHub Desktop.
yuyv to bgr
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| #include <arm_neon.h> | |
| void yuyv2bgr(const unsigned char *yuv, int len, unsigned char *bgr) { | |
| int i, j = 0; | |
| for(i = 0; i < len; i += 4) { | |
| const unsigned char *pyuv = yuv+i; | |
| unsigned char *pbgr = bgr+j; | |
| int r = (22987 * ((pyuv)[3] - 128)) >> 14; | |
| int g = (-5636 * ((pyuv)[1] - 128) - 11698 * ((pyuv)[3] - 128)) >> 14; | |
| int b = (29049 * ((pyuv)[1] - 128)) >> 14; | |
| (pbgr)[0] = (*(pyuv) + b); | |
| (pbgr)[1] = (*(pyuv) + g); | |
| (pbgr)[2] = (*(pyuv) + r); | |
| (pbgr)[3] = ((pyuv)[2] + b); | |
| (pbgr)[4] = ((pyuv)[2] + g); | |
| (pbgr)[5] = ((pyuv)[2] + r); | |
| j+= 6; | |
| } | |
| } | |
| void yuyv2bgr_neon(const unsigned char *yuv, int len, unsigned char *bgr) { | |
| int i, j = 0; | |
| int16x8_t const half = vdupq_n_s16(128); | |
| for(i = 0; i < len; i += 4*8) { | |
| const unsigned char *pyuv = yuv+i; | |
| unsigned char *pbgr = bgr+j; | |
| uint8x8x4_t yuvval = vld4_u8(pyuv); | |
| int16x8_t yuv0 = (int16x8_t)vmovl_u8(yuvval.val[0]); | |
| int16x8_t yuv1 = vsubq_s16((int16x8_t)vmovl_u8(yuvval.val[1]), half); | |
| int16x8_t yuv2 = (int16x8_t)vmovl_u8(yuvval.val[2]); | |
| int16x8_t yuv3 = vsubq_s16((int16x8_t)vmovl_u8(yuvval.val[3]), half); | |
| //(29049 * ((pyuv)[1] - 128)) >> 14 | |
| int32x4_t tB0 = vshrq_n_s32(vmull_n_s16(vget_low_s16(yuv1), 29049), 14); | |
| int32x4_t tB1 = vshrq_n_s32(vmull_n_s16(vget_high_s16(yuv1), 29049), 14); | |
| //(-5636 * ((pyuv)[1] - 128) - 11698 * ((pyuv)[3] - 128)) >> 14; | |
| int32x4_t tG0 = vshrq_n_s32(vaddq_s32(vmull_n_s16(vget_low_s16(yuv1), -5636), vmull_n_s16(vget_low_s16(yuv3), -11698)), 14); | |
| int32x4_t tG1 = vshrq_n_s32(vaddq_s32(vmull_n_s16(vget_high_s16(yuv1), -5636), vmull_n_s16(vget_high_s16(yuv3), -11698)), 14); | |
| //(22987 * ((pyuv)[3] - 128)) >> 14; | |
| int32x4_t tR0 = vshrq_n_s32(vmull_n_s16(vget_low_s16(yuv3), 22987), 14); | |
| int32x4_t tR1 = vshrq_n_s32(vmull_n_s16(vget_high_s16(yuv3), 22987), 14); | |
| int32x4_t yuv00 = vmovl_s16(vget_low_s16(yuv0)); | |
| int32x4_t yuv01 = vmovl_s16(vget_high_s16(yuv0)); | |
| int32x4_t yuv20 = vmovl_s16(vget_low_s16(yuv2)); | |
| int32x4_t yuv21 = vmovl_s16(vget_high_s16(yuv2)); | |
| uint8x8_t r0 = vmovn_u16(vcombine_u16(vqmovun_s32(vaddq_s32(yuv00, tB0)), vqmovun_s32(vaddq_s32(yuv01, tB1)))); | |
| uint8x8_t r1 = vmovn_u16(vcombine_u16(vqmovun_s32(vaddq_s32(yuv00, tG0)), vqmovun_s32(vaddq_s32(yuv01, tG1)))); | |
| uint8x8_t r2 = vmovn_u16(vcombine_u16(vqmovun_s32(vaddq_s32(yuv00, tR0)), vqmovun_s32(vaddq_s32(yuv01, tR1)))); | |
| uint8x8_t r3 = vmovn_u16(vcombine_u16(vqmovun_s32(vaddq_s32(yuv20, tB0)), vqmovun_s32(vaddq_s32(yuv21, tB1)))); | |
| uint8x8_t r4 = vmovn_u16(vcombine_u16(vqmovun_s32(vaddq_s32(yuv20, tG0)), vqmovun_s32(vaddq_s32(yuv21, tG1)))); | |
| uint8x8_t r5 = vmovn_u16(vcombine_u16(vqmovun_s32(vaddq_s32(yuv20, tR0)), vqmovun_s32(vaddq_s32(yuv21, tR1)))); | |
| uint16x8x3_t pblock; | |
| pblock.val[0] = vaddw_u8(vshll_n_u8(r1, 8), r0); | |
| pblock.val[1] = vaddw_u8(vshll_n_u8(r3, 8), r2); | |
| pblock.val[2] = vaddw_u8(vshll_n_u8(r5, 8), r4); | |
| vst3q_u16((uint16_t *)pbgr, pblock); | |
| j += 6*8; | |
| } | |
| } |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment