Created
July 27, 2019 08:01
-
-
Save AllanChen/7b802048bc3021bf9f33d223f3031620 to your computer and use it in GitHub Desktop.
Neon vs cpp
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
// | |
// ViewController.m | |
// compare | |
// | |
// Created by Allan on 27/7/2019. | |
// Copyright © 2019 Allan. All rights reserved. | |
// | |
#import "ViewController.h" | |
#include <arm_neon.h> | |
@interface ViewController () | |
@end | |
@implementation ViewController | |
- (void)viewDidLoad { | |
[super viewDidLoad]; | |
// [self cppTest]; | |
[self neonTest]; | |
} | |
-(void)neonTest{ | |
double tic = CACurrentMediaTime(); | |
int total = 1024 * 1024 * 100; | |
#if __ARM_NEON | |
float *input = new float[total]; | |
float *output = new float[total]; | |
float alpha = 1.0f; | |
float beta = 2.0f; | |
int nn = total >> 2; | |
for(int i = 0; i<total; i++){ | |
input[i] = (float)100; | |
} | |
float32x4_t _alpha = vdupq_n_f32(alpha); | |
float32x4_t _beta = vdupq_n_f32(beta); | |
#else | |
#endif | |
#if __ARM_NEON | |
const float *inputptr = input; | |
float *outputptr = input; | |
#if __aarch64__ | |
if(nn > 0){ | |
asm volatile( | |
"0: \n" | |
"mov v0.4s, %5.4s \n" | |
"mov v1.4s, %6.4s \n" | |
"prfm pldl1keep, [%4, #64] \n" | |
"ld1 {v2.4s}, [%4], #16 \n" | |
"fmul v3.4s, v2.4s, v0.4s \n" | |
"fadd v4.4s, v3.4s, v1.4s \n" | |
"subs %w0, %w0, #1 \n" | |
"st1 {v4.4s}, [%1], #16 \n" | |
"bne 0b" | |
: | |
"=r"(nn), //%0 | |
"=r"(outputptr)//%1 | |
: | |
"0"(nn), | |
"1"(outputptr), | |
"r"(inputptr), //%4 | |
"w"(_alpha), //%5 | |
"w"(_beta) //%6 | |
:"cc","memory","v0","v1","v2","v3","v4" | |
); | |
} | |
#else | |
#endif// __aarch64 | |
for (int i=0; i<2; i++){ | |
int nn = total >> 2; | |
outputptr[i] = inputptr [i + nn * 4] *alpha +beta; | |
} | |
#endif// __ARM_NEON | |
printf("neon -- cost %.4f ms\n", 1000 * (CACurrentMediaTime() - tic)); | |
} | |
- (void)cppTest{ | |
double tic = CACurrentMediaTime(); | |
int m_total = 1024 * 1024 *100; | |
float *input = new float[m_total]; | |
float *output = new float[m_total]; | |
for(int i = 0; i< m_total; i++){ | |
input[i] = (float)100; | |
} | |
for(int i=0; i<m_total;i++){ | |
output[i] = input[i] * 2 +3; | |
} | |
printf("cpp cost %.4f ms\n", 1000 * (CACurrentMediaTime() - tic)); | |
} | |
@end |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment