search for: s_s32x4

Displaying 5 results from an estimated 5 matches for "s_s32x4".

Did you mean: t_s32x4
2017 Apr 26
2
2 patches related to silk_biquad_alt() optimization
...vget_high_s32(t_s32x4)); /* S[ {0,1} ] = silk_SMLAWB( S[ {0,1} ], B_Q28[ {1,2} ], in[ k ] ); */ } static inline void silk_biquad_alt_stride2_kernel(const int32x4_t A_Q28_s32x4, const int32x4_t B_Q28_s32x4, const int32x2_t t_s32x2, const int32x4_t inval_s32x4, int32x4_t *S_s32x4, int32x2_t *out32_Q14_s32x2) { int32x4_t t_s32x4, out32_Q14_s32x4; *out32_Q14_s32x2 = vadd_s32(vget_low_s32(*S_s32x4), t_s32x2); /* silk_SMLAWB( S{0,1}, B_Q28[ 0 ], in[ k * 2 + {0,1} ] ) */ *S_s32x4 = vcombine_s32(vget_high_s32(*S_s32x4), vdup_n_s32(0));...
2017 May 15
2
2 patches related to silk_biquad_alt() optimization
...{0,1} ] = silk_SMLAWB( S[ {0,1} ], > B_Q28[ {1,2} ], in[ k ] ); */ > } > > static inline void silk_biquad_alt_stride2_kernel(const int32x4_t > A_Q28_s32x4, const int32x4_t B_Q28_s32x4, const int32x2_t t_s32x2, > const int32x4_t inval_s32x4, int32x4_t *S_s32x4, int32x2_t > *out32_Q14_s32x2) > { > int32x4_t t_s32x4, out32_Q14_s32x4; > > *out32_Q14_s32x2 = vadd_s32(vget_low_s32(*S_s32x4), t_s32x2); > /* silk_SMLAWB( S{0,1}, B_Q28[ 0 ], in[ k * 2 + {0,1} ] ) >...
2017 May 08
0
2 patches related to silk_biquad_alt() optimization
.../* S[ {0,1} ] = silk_SMLAWB( S[ {0,1} ], B_Q28[ {1,2} ], > in[ k ] ); */ > } > > static inline void silk_biquad_alt_stride2_kernel(const int32x4_t > A_Q28_s32x4, const int32x4_t B_Q28_s32x4, const int32x2_t t_s32x2, const > int32x4_t inval_s32x4, int32x4_t *S_s32x4, int32x2_t *out32_Q14_s32x2) > { > int32x4_t t_s32x4, out32_Q14_s32x4; > > *out32_Q14_s32x2 = vadd_s32(vget_low_s32(*S_s32x4), t_s32x2); > /* silk_SMLAWB( S{0,1}, B_Q28[ 0 ], in[ k * 2 + {0,1} ] ) > */ > *S_s32x4 = vcombine_s32(vge...
2017 May 17
0
2 patches related to silk_biquad_alt() optimization
...1} ], > > B_Q28[ {1,2} ], in[ k ] ); */ > > } > > > > static inline void silk_biquad_alt_stride2_kernel(const int32x4_t > > A_Q28_s32x4, const int32x4_t B_Q28_s32x4, const int32x2_t t_s32x2, > > const int32x4_t inval_s32x4, int32x4_t *S_s32x4, int32x2_t > > *out32_Q14_s32x2) > > { > > int32x4_t t_s32x4, out32_Q14_s32x4; > > > > *out32_Q14_s32x2 = vadd_s32(vget_low_s32(*S_s32x4), t_s32x2); > > /* silk_SMLAWB( S{0,1}, B_Q28[ 0 ], in[ k * 2 + {0,1} ] ) > >...
2017 Apr 25
2
2 patches related to silk_biquad_alt() optimization
On Mon, Apr 24, 2017 at 5:52 PM, Jean-Marc Valin <jmvalin at jmvalin.ca> wrote: > On 24/04/17 08:03 PM, Linfeng Zhang wrote: > > Tested on my chromebook, when stride (channel) == 1, the optimization > > has no gain compared with C function. > > You mean that the Neon code is the same speed as the C code for > stride==1? This is not terribly surprising for an IIRC