Hi Team, Can anyone take a look? We are already using this optimization in production. Best regards, Zheng On Thu, 15 Sept 2022 at 19:04, Zheng Lv <lvzheng at google.com> wrote:> This makes kiss_twiddle_cpx 4-byte aligned (instead of 2-byte) for > fixed-point builds. Tested with an armv6j+nofp development board, CELT > encoding becomes 1.4x as fast, and decoding over 2x. > > Performance gain is mostly attributed to the proper alignment of the > static const array mdct_twiddles960. > > Co-authored-by: David Gao <davidgao at google.com> > --- > celt/kiss_fft.h | 12 +++++++++++- > 1 file changed, 11 insertions(+), 1 deletion(-) > > diff --git a/celt/kiss_fft.h b/celt/kiss_fft.h > index bffa2bfa..267f72f9 100644 > --- a/celt/kiss_fft.h > +++ b/celt/kiss_fft.h > @@ -49,31 +49,41 @@ extern "C" { > #ifdef FIXED_POINT > #include "arch.h" > > # define kiss_fft_scalar opus_int32 > # define kiss_twiddle_scalar opus_int16 > > +/* Some 32-bit CPUs would load/store a kiss_twiddle_cpx with a single > memory > + * access, and could benefit from additional alignment. > + */ > +# define KISS_TWIDDLE_CPX_ALIGNMENT (sizeof(opus_int32)) > > #else > # ifndef kiss_fft_scalar > /* default is float */ > # define kiss_fft_scalar float > # define kiss_twiddle_scalar float > # define KF_SUFFIX _celt_single > # endif > #endif > > +#if defined(__GNUC__) && defined(KISS_TWIDDLE_CPX_ALIGNMENT) > +#define KISS_TWIDDLE_CPX_ALIGNED > __attribute__((aligned(KISS_TWIDDLE_CPX_ALIGNMENT))) > +#else > +#define KISS_TWIDDLE_CPX_ALIGNED > +#endif > + > typedef struct { > kiss_fft_scalar r; > kiss_fft_scalar i; > }kiss_fft_cpx; > > typedef struct { > kiss_twiddle_scalar r; > kiss_twiddle_scalar i; > -}kiss_twiddle_cpx; > +} KISS_TWIDDLE_CPX_ALIGNED kiss_twiddle_cpx; > > #define MAXFACTORS 8 > /* e.g. an fft of length 128 has 4 factors > as far as kissfft is concerned > 4*4*4*2 > */ > -- > 2.37.2.789.g6183377224-goog > >-------------- next part -------------- An HTML attachment was scrubbed... URL: <http://lists.xiph.org/pipermail/opus/attachments/20221024/5e831677/attachment.htm>
Ulrich Windl
2022-Oct-24 07:27 UTC
[opus] Antw: [EXT] Re: [PATCH] Make CELT FFT twiddle complex type aligned
>>> Zheng Lv <lvzheng at google.com> schrieb am 24.10.2022 um 04:31 in Nachricht<CACNzCb422LVKKUzRSYJnDH09uaVgOABB1w6z48zYUXMPQvQAmg at mail.gmail.com>:> Hi Team, > > Can anyone take a look? We are already using this optimization in > production. > > Best regards, > Zheng > > On Thu, 15 Sept 2022 at 19:04, Zheng Lv <lvzheng at google.com> wrote: > >> This makes kiss_twiddle_cpx 4-byte aligned (instead of 2-byte) for >> fixed-point builds. Tested with an armv6j+nofp development board, CELT >> encoding becomes 1.4x as fast, and decoding over 2x. >> >> Performance gain is mostly attributed to the proper alignment of the >> static const array mdct_twiddles960. >> >> Co-authored-by: David Gao <davidgao at google.com> >> --- >> celt/kiss_fft.h | 12 +++++++++++- >> 1 file changed, 11 insertions(+), 1 deletion(-) >> >> diff --git a/celt/kiss_fft.h b/celt/kiss_fft.h >> index bffa2bfa..267f72f9 100644 >> --- a/celt/kiss_fft.h >> +++ b/celt/kiss_fft.h >> @@ -49,31 +49,41 @@ extern "C" { >> #ifdef FIXED_POINT >> #include "arch.h" >> >> # define kiss_fft_scalar opus_int32 >> # define kiss_twiddle_scalar opus_int16 >> >> +/* Some 32-bit CPUs would load/store a kiss_twiddle_cpx with a single >> memory >> + * access, and could benefit from additional alignment. >> + */ >> +# define KISS_TWIDDLE_CPX_ALIGNMENT (sizeof(opus_int32)) >> >> #else >> # ifndef kiss_fft_scalar >> /* default is float */ >> # define kiss_fft_scalar float >> # define kiss_twiddle_scalar float >> # define KF_SUFFIX _celt_single >> # endif >> #endif >> >> +#if defined(__GNUC__) && defined(KISS_TWIDDLE_CPX_ALIGNMENT) >> +#define KISS_TWIDDLE_CPX_ALIGNED >> __attribute__((aligned(KISS_TWIDDLE_CPX_ALIGNMENT))) >> +#else >> +#define KISS_TWIDDLE_CPX_ALIGNED >> +#endif >> + >> typedef struct { >> kiss_fft_scalar r; >> kiss_fft_scalar i; >> }kiss_fft_cpx; >> >> typedef struct { >> kiss_twiddle_scalar r; >> kiss_twiddle_scalar i; >> -}kiss_twiddle_cpx; >> +} KISS_TWIDDLE_CPX_ALIGNED kiss_twiddle_cpx;I'm only surprised that you do align the typedef and not the actual data!>> >> #define MAXFACTORS 8 >> /* e.g. an fft of length 128 has 4 factors >> as far as kissfft is concerned >> 4*4*4*2 >> */ >> -- >> 2.37.2.789.g6183377224-goog >> >>