Displaying 5 results from an estimated 5 matches for "s_s32x4".
Did you mean:
t_s32x4
2017 Apr 26
2
2 patches related to silk_biquad_alt() optimization
...vget_high_s32(t_s32x4));
/* S[ {0,1} ] = silk_SMLAWB( S[ {0,1} ], B_Q28[ {1,2} ],
in[ k ] ); */
}
static inline void silk_biquad_alt_stride2_kernel(const int32x4_t
A_Q28_s32x4, const int32x4_t B_Q28_s32x4, const int32x2_t t_s32x2, const
int32x4_t inval_s32x4, int32x4_t *S_s32x4, int32x2_t *out32_Q14_s32x2)
{
int32x4_t t_s32x4, out32_Q14_s32x4;
*out32_Q14_s32x2 = vadd_s32(vget_low_s32(*S_s32x4), t_s32x2);
/* silk_SMLAWB( S{0,1}, B_Q28[ 0 ], in[ k * 2 + {0,1} ] )
*/
*S_s32x4 = vcombine_s32(vget_high_s32(*S_s32x4),
vdup_n_s32(0));...
2017 May 15
2
2 patches related to silk_biquad_alt() optimization
...{0,1} ] = silk_SMLAWB( S[ {0,1} ],
> B_Q28[ {1,2} ], in[ k ] ); */
> }
>
> static inline void silk_biquad_alt_stride2_kernel(const int32x4_t
> A_Q28_s32x4, const int32x4_t B_Q28_s32x4, const int32x2_t t_s32x2,
> const int32x4_t inval_s32x4, int32x4_t *S_s32x4, int32x2_t
> *out32_Q14_s32x2)
> {
> int32x4_t t_s32x4, out32_Q14_s32x4;
>
> *out32_Q14_s32x2 = vadd_s32(vget_low_s32(*S_s32x4), t_s32x2);
> /* silk_SMLAWB( S{0,1}, B_Q28[ 0 ], in[ k * 2 + {0,1} ] )
>...
2017 May 08
0
2 patches related to silk_biquad_alt() optimization
.../* S[ {0,1} ] = silk_SMLAWB( S[ {0,1} ], B_Q28[ {1,2} ],
> in[ k ] ); */
> }
>
> static inline void silk_biquad_alt_stride2_kernel(const int32x4_t
> A_Q28_s32x4, const int32x4_t B_Q28_s32x4, const int32x2_t t_s32x2, const
> int32x4_t inval_s32x4, int32x4_t *S_s32x4, int32x2_t *out32_Q14_s32x2)
> {
> int32x4_t t_s32x4, out32_Q14_s32x4;
>
> *out32_Q14_s32x2 = vadd_s32(vget_low_s32(*S_s32x4), t_s32x2);
> /* silk_SMLAWB( S{0,1}, B_Q28[ 0 ], in[ k * 2 + {0,1} ] )
> */
> *S_s32x4 = vcombine_s32(vge...
2017 May 17
0
2 patches related to silk_biquad_alt() optimization
...1} ],
> > B_Q28[ {1,2} ], in[ k ] ); */
> > }
> >
> > static inline void silk_biquad_alt_stride2_kernel(const int32x4_t
> > A_Q28_s32x4, const int32x4_t B_Q28_s32x4, const int32x2_t t_s32x2,
> > const int32x4_t inval_s32x4, int32x4_t *S_s32x4, int32x2_t
> > *out32_Q14_s32x2)
> > {
> > int32x4_t t_s32x4, out32_Q14_s32x4;
> >
> > *out32_Q14_s32x2 = vadd_s32(vget_low_s32(*S_s32x4), t_s32x2);
> > /* silk_SMLAWB( S{0,1}, B_Q28[ 0 ], in[ k * 2 + {0,1} ] )
> >...
2017 Apr 25
2
2 patches related to silk_biquad_alt() optimization
On Mon, Apr 24, 2017 at 5:52 PM, Jean-Marc Valin <jmvalin at jmvalin.ca> wrote:
> On 24/04/17 08:03 PM, Linfeng Zhang wrote:
> > Tested on my chromebook, when stride (channel) == 1, the optimization
> > has no gain compared with C function.
>
> You mean that the Neon code is the same speed as the C code for
> stride==1? This is not terribly surprising for an IIRC