Displaying 2 results from an estimated 2 matches for "vfp16".
Did you mean:
fp16
2019 Sep 05
2
ARM vectorized fp16 support
...s, v1.4s, v0.4s // fp32 is GOOD
mov v0.16b, v2.16b
ret
$ cat vfp32.c
#include <arm_neon.h>
float32x4_t test_vfma_lane_f16(float32x4_t a, float32x4_t b, float32x4_t c) {
c += a * b;
return c;
}
$ clang -O3 -march=armv8.2-a+fp16fml -ffast-math -S -o- vfp16.c
test_vfma_lane_f16: // @test_vfma_lane_f16
fmul v0.4h, v1.4h, v0.4h
fadd v0.4h, v0.4h, v2.4h // fp16 does NOT use FMLA
ret
$ cat vfp16.c
#include <arm_neon.h>
float16x4_t test_vfma_lane_f16(float16x4_t a, float...
2019 Sep 05
2
ARM vectorized fp16 support
...mov v0.16b, v2.16b
> ret
> $ cat vfp32.c
> #include <arm_neon.h>
> float32x4_t test_vfma_lane_f16(float32x4_t a, float32x4_t b, float32x4_t c) {
> c += a * b;
> return c;
> }
>
> $ clang -O3 -march=armv8.2-a+fp16fml -ffast-math -S -o- vfp16.c
> test_vfma_lane_f16: // @test_vfma_lane_f16
> fmul v0.4h, v1.4h, v0.4h
> fadd v0.4h, v0.4h, v2.4h // fp16 does NOT use FMLA
> ret
> $ cat vfp16.c
> #include <arm_neon.h>
> float16x4_t test...