Displaying 3 results from an estimated 3 matches for "float16x4_t".
2019 Sep 05
2
ARM vectorized fp16 support
...3 -march=armv8.2-a+fp16fml -ffast-math -S -o- vfp16.c
test_vfma_lane_f16: // @test_vfma_lane_f16
fmul v0.4h, v1.4h, v0.4h
fadd v0.4h, v0.4h, v2.4h // fp16 does NOT use FMLA
ret
$ cat vfp16.c
#include <arm_neon.h>
float16x4_t test_vfma_lane_f16(float16x4_t a, float16x4_t b, float16x4_t c) {
c += a * b;
return c;
}
--
Yizhi Liu
2019 Sep 05
2
ARM vectorized fp16 support
...ath -S -o- vfp16.c
> test_vfma_lane_f16: // @test_vfma_lane_f16
> fmul v0.4h, v1.4h, v0.4h
> fadd v0.4h, v0.4h, v2.4h // fp16 does NOT use FMLA
> ret
> $ cat vfp16.c
> #include <arm_neon.h>
> float16x4_t test_vfma_lane_f16(float16x4_t a, float16x4_t b, float16x4_t c) {
> c += a * b;
> return c;
> }
>
> --
> Yizhi Liu
> _______________________________________________
> LLVM Developers mailing list
> llvm-dev at lists.llvm.org
> https://lists.llvm.org/cgi-bin/mailman...
2019 Jul 12
2
[cfe-dev] ARM float16 intrinsic test
...M;AArch64"
[arm.cpp]
#define vst4_lane_f16(__p0, __p1, __p2) __extension__ ({ \
float16x4x4_t __s1 = __p1; \
__builtin_neon_vst4_lane_v(__p0, __s1.val[0], __s1.val[1], __s1.val[2],
__s1.val[3], __p2, 8); \
})
typedef __fp16 float16_t;
typedef __attribute__((neon_vector_type(4))) float16_t float16x4_t;
typedef struct float16x4x4_t {
float16x4_t val[4];
} float16x4x4_t;
void test_vst4_lane_f16(float16_t * a, float16x4x4_t b) {
vst4_lane_f16(a, b, 3);
}
I tried:
$$COMP_ROOT/clang -cc1 -triple thumbv7s-apple-darwin -target-abi apcs-gnu
-target-cpu swift -fallow-half-arguments-and-returns -targ...