Timothy B. Terriberry
2015-Dec-08 17:13 UTC
[opus] [Aarch64 v2 02/18] Reorganize ARM CPU #ifdefs.
Jonathan Lennox wrote:> -# if defined(FIXED_POINT) > +# if defined(FIXED_POINT) && \ > + ((defined(OPUS_ARM_MAY_HAVE_NEON) && !defined(OPUS_ARM_PRESUME_NEON)) || \ > + (defined(OPUS_ARM_MAY_HAVE_MEDIA) && !defined(OPUS_ARM_PRESUME_MEDIA)) || \ > + (defined(OPUS_ARM_MAY_HAVE_EDSP) && !defined(OPUS_ARM_PRESUME_EDSP))) > opus_val32 (*const CELT_PITCH_XCORR_IMPL[OPUS_ARCHMASK+1])(const opus_val16 *, > const opus_val16 *, opus_val32 *, int , int) = {Maybe I'm missing something, but...> -/*Is run-time CPU detection enabled on this platform?*/ > -# if defined(OPUS_HAVE_RTCD) && (defined(OPUS_ARM_ASM) \ > - || (defined(OPUS_ARM_MAY_HAVE_NEON_INTR) \ > - && !defined(OPUS_ARM_PRESUME_NEON_INTR))) > +# if defined(OPUS_HAVE_RTCD) && \ > + (defined(FIXED_POINT) && \ > + ((defined(OPUS_ARM_MAY_HAVE_NEON) && !defined(OPUS_ARM_PRESUME_NEON)) || \ > + (defined(OPUS_ARM_MAY_HAVE_MEDIA) && !defined(OPUS_ARM_PRESUME_MEDIA)) || \ > + (defined(OPUS_ARM_MAY_HAVE_EDSP) && !defined(OPUS_ARM_PRESUME_EDSP)))) || \ > + (!defined(FIXED_POINT) && \ > + (defined(OPUS_ARM_MAY_HAVE_NEON_INTR) && !defined(OPUS_ARM_PRESUME_NEON_INTR))) > extern > # if defined(FIXED_POINT) > opus_val32 > # else > void > # endif > (*const CELT_PITCH_XCORR_IMPL[OPUS_ARCHMASK+1])(const opus_val16 *, > const opus_val16 *, opus_val32 *, int, int);Shouldn't the first case have a corresponding update in the #else clause for the !defined(FIXED_POINT) case?> celt_pitch_xcorr_c, /* ARMv4 */ > diff --git a/celt/arm/pitch_arm.h b/celt/arm/pitch_arm.h > index eaf61c9..bd41774 100644 > --- a/celt/arm/pitch_arm.h > +++ b/celt/arm/pitch_arm.h > @@ -46,7 +46,13 @@ opus_val32 celt_pitch_xcorr_edsp(const opus_val16 *_x, const opus_val16 *_y, > opus_val32 *xcorr, int len, int max_pitch); > # endif > > -# if !defined(OPUS_HAVE_RTCD) > +# if (defined(OPUS_ARM_MAY_HAVE_NEON) || \ > + defined(OPUS_ARM_MAY_HAVE_MEDIA) || \ > + defined(OPUS_ARM_MAY_HAVE_EDSP)) && \ > + (!defined(OPUS_HAVE_RTCD) || \ > + defined(OPUS_ARM_PRESUME_NEON) || \ > + (defined(OPUS_ARM_PRESUME_MEDIA) && !defined(OPUS_ARM_MAY_HAVE_NEON)) || \ > + (defined(OPUS_ARM_PRESUME_EDSP) && !defined(OPUS_ARM_MAY_HAVE_NEON) && !defined(OPUS_ARM_MAY_HAVE_MEDIA))) > # define OVERRIDE_PITCH_XCORR (1) > # define celt_pitch_xcorr(_x, _y, xcorr, len, max_pitch, arch) \ > ((void)(arch),PRESUME_NEON(celt_pitch_xcorr)(_x, _y, xcorr, len, max_pitch)) > @@ -66,10 +72,13 @@ void celt_pitch_xcorr_float_neon(const opus_val16 *_x, const opus_val16 *_y, > > #endif /* end !FIXED_POINT */ >And shouldn't this be the inverse of the previous two?
Jonathan Lennox
2015-Dec-10 15:49 UTC
[opus] [Aarch64 v2 02/18] Reorganize ARM CPU #ifdefs.
> On Dec 8, 2015, at 12:13 PM, Timothy B. Terriberry <tterribe at xiph.org> wrote: > > Jonathan Lennox wrote: >> -# if defined(FIXED_POINT) >> +# if defined(FIXED_POINT) && \ >> + ((defined(OPUS_ARM_MAY_HAVE_NEON) && !defined(OPUS_ARM_PRESUME_NEON)) || \ >> + (defined(OPUS_ARM_MAY_HAVE_MEDIA) && !defined(OPUS_ARM_PRESUME_MEDIA)) || \ >> + (defined(OPUS_ARM_MAY_HAVE_EDSP) && !defined(OPUS_ARM_PRESUME_EDSP))) >> opus_val32 (*const CELT_PITCH_XCORR_IMPL[OPUS_ARCHMASK+1])(const opus_val16 *, >> const opus_val16 *, opus_val32 *, int , int) = { > > Maybe I'm missing something, but... > >> -/*Is run-time CPU detection enabled on this platform?*/ >> -# if defined(OPUS_HAVE_RTCD) && (defined(OPUS_ARM_ASM) \ >> - || (defined(OPUS_ARM_MAY_HAVE_NEON_INTR) \ >> - && !defined(OPUS_ARM_PRESUME_NEON_INTR))) >> +# if defined(OPUS_HAVE_RTCD) && \ >> + (defined(FIXED_POINT) && \ >> + ((defined(OPUS_ARM_MAY_HAVE_NEON) && !defined(OPUS_ARM_PRESUME_NEON)) || \ >> + (defined(OPUS_ARM_MAY_HAVE_MEDIA) && !defined(OPUS_ARM_PRESUME_MEDIA)) || \ >> + (defined(OPUS_ARM_MAY_HAVE_EDSP) && !defined(OPUS_ARM_PRESUME_EDSP)))) || \ >> + (!defined(FIXED_POINT) && \ >> + (defined(OPUS_ARM_MAY_HAVE_NEON_INTR) && !defined(OPUS_ARM_PRESUME_NEON_INTR))) >> extern >> # if defined(FIXED_POINT) >> opus_val32 >> # else >> void >> # endif >> (*const CELT_PITCH_XCORR_IMPL[OPUS_ARCHMASK+1])(const opus_val16 *, >> const opus_val16 *, opus_val32 *, int, int); > > Shouldn't the first case have a corresponding update in the #else clause > for the !defined(FIXED_POINT) case?Hm, yes, you?re right ? there are cases where we?ll end up emitting a (useless and unused, but harmless) CELT_PITCH_XCORR_IMPL object unnecessarily. I?ll submit an updated version of the patch.> >> celt_pitch_xcorr_c, /* ARMv4 */ >> diff --git a/celt/arm/pitch_arm.h b/celt/arm/pitch_arm.h >> index eaf61c9..bd41774 100644 >> --- a/celt/arm/pitch_arm.h >> +++ b/celt/arm/pitch_arm.h >> @@ -46,7 +46,13 @@ opus_val32 celt_pitch_xcorr_edsp(const opus_val16 *_x, const opus_val16 *_y, >> opus_val32 *xcorr, int len, int max_pitch); >> # endif >> >> -# if !defined(OPUS_HAVE_RTCD) >> +# if (defined(OPUS_ARM_MAY_HAVE_NEON) || \ >> + defined(OPUS_ARM_MAY_HAVE_MEDIA) || \ >> + defined(OPUS_ARM_MAY_HAVE_EDSP)) && \ >> + (!defined(OPUS_HAVE_RTCD) || \ >> + defined(OPUS_ARM_PRESUME_NEON) || \ >> + (defined(OPUS_ARM_PRESUME_MEDIA) && !defined(OPUS_ARM_MAY_HAVE_NEON)) || \ >> + (defined(OPUS_ARM_PRESUME_EDSP) && !defined(OPUS_ARM_MAY_HAVE_NEON) && !defined(OPUS_ARM_MAY_HAVE_MEDIA))) >> # define OVERRIDE_PITCH_XCORR (1) >> # define celt_pitch_xcorr(_x, _y, xcorr, len, max_pitch, arch) \ >> ((void)(arch),PRESUME_NEON(celt_pitch_xcorr)(_x, _y, xcorr, len, max_pitch)) >> @@ -66,10 +72,13 @@ void celt_pitch_xcorr_float_neon(const opus_val16 *_x, const opus_val16 *_y, >> >> #endif /* end !FIXED_POINT */ >> > > And shouldn't this be the inverse of the previous two?I think it?s logically equivalent (under the assumption that the MAY_HAVE and PRESUME defines are ordered), but the different representation is confusing. I?ll align them.
Jonathan Lennox
2015-Dec-14 21:06 UTC
[opus] [Aarch64 v3 02/18] Reorganize ARM CPU #ifdefs.
This is an updated version of entry 2/18 of the previous Aarch64 patch series, after Tim's comments. Should be no behavioral change. All other patches in the series are unchanged. --- celt/arm/arm_celt_map.c | 7 ++++++- celt/arm/pitch_arm.h | 49 +++++++++++++++++++++++++++++-------------------- 2 files changed, 35 insertions(+), 21 deletions(-) diff --git a/celt/arm/arm_celt_map.c b/celt/arm/arm_celt_map.c index ee6c244..85a48e8 100644 --- a/celt/arm/arm_celt_map.c +++ b/celt/arm/arm_celt_map.c @@ -36,6 +36,9 @@ #if defined(OPUS_HAVE_RTCD) # if defined(FIXED_POINT) +# if ((defined(OPUS_ARM_MAY_HAVE_NEON) && !defined(OPUS_ARM_PRESUME_NEON)) || \ + (defined(OPUS_ARM_MAY_HAVE_MEDIA) && !defined(OPUS_ARM_PRESUME_MEDIA)) || \ + (defined(OPUS_ARM_MAY_HAVE_EDSP) && !defined(OPUS_ARM_PRESUME_EDSP))) opus_val32 (*const CELT_PITCH_XCORR_IMPL[OPUS_ARCHMASK+1])(const opus_val16 *, const opus_val16 *, opus_val32 *, int , int) = { celt_pitch_xcorr_c, /* ARMv4 */ @@ -43,8 +46,10 @@ opus_val32 (*const CELT_PITCH_XCORR_IMPL[OPUS_ARCHMASK+1])(const opus_val16 *, MAY_HAVE_MEDIA(celt_pitch_xcorr), /* Media */ MAY_HAVE_NEON(celt_pitch_xcorr) /* NEON */ }; + +# endif # else /* !FIXED_POINT */ -# if defined(OPUS_ARM_MAY_HAVE_NEON_INTR) +# if defined(OPUS_ARM_MAY_HAVE_NEON_INTR) && !defined(OPUS_ARM_PRESUME_NEON_INTR) void (*const CELT_PITCH_XCORR_IMPL[OPUS_ARCHMASK+1])(const opus_val16 *, const opus_val16 *, opus_val32 *, int, int) = { celt_pitch_xcorr_c, /* ARMv4 */ diff --git a/celt/arm/pitch_arm.h b/celt/arm/pitch_arm.h index eaf61c9..2264f71 100644 --- a/celt/arm/pitch_arm.h +++ b/celt/arm/pitch_arm.h @@ -46,7 +46,21 @@ opus_val32 celt_pitch_xcorr_edsp(const opus_val16 *_x, const opus_val16 *_y, opus_val32 *xcorr, int len, int max_pitch); # endif -# if !defined(OPUS_HAVE_RTCD) +# if defined(OPUS_HAVE_RTCD) && \ + ((defined(OPUS_ARM_MAY_HAVE_NEON) && !defined(OPUS_ARM_PRESUME_NEON)) || \ + (defined(OPUS_ARM_MAY_HAVE_MEDIA) && !defined(OPUS_ARM_PRESUME_MEDIA)) || \ + (defined(OPUS_ARM_MAY_HAVE_EDSP) && !defined(OPUS_ARM_PRESUME_EDSP))) +extern opus_val32 +(*const CELT_PITCH_XCORR_IMPL[OPUS_ARCHMASK+1])(const opus_val16 *, + const opus_val16 *, opus_val32 *, int, int); +# define OVERRIDE_PITCH_XCORR (1) +# define celt_pitch_xcorr(_x, _y, xcorr, len, max_pitch, arch) \ + ((*CELT_PITCH_XCORR_IMPL[(arch)&OPUS_ARCHMASK])(_x, _y, \ + xcorr, len, max_pitch)) + +# elif defined(OPUS_ARM_PRESUME_EDSP) || \ + defined(OPUS_ARM_PRESUME_MEDIA) || \ + defined(OPUS_ARM_PRESUME_NEON) # define OVERRIDE_PITCH_XCORR (1) # define celt_pitch_xcorr(_x, _y, xcorr, len, max_pitch, arch) \ ((void)(arch),PRESUME_NEON(celt_pitch_xcorr)(_x, _y, xcorr, len, max_pitch)) @@ -57,32 +71,27 @@ opus_val32 celt_pitch_xcorr_edsp(const opus_val16 *_x, const opus_val16 *_y, #if defined(OPUS_ARM_MAY_HAVE_NEON_INTR) void celt_pitch_xcorr_float_neon(const opus_val16 *_x, const opus_val16 *_y, opus_val32 *xcorr, int len, int max_pitch); -#if !defined(OPUS_HAVE_RTCD) || defined(OPUS_ARM_PRESUME_NEON_INTR) -#define OVERRIDE_PITCH_XCORR (1) -# define celt_pitch_xcorr(_x, _y, xcorr, len, max_pitch, arch) \ - ((void)(arch),celt_pitch_xcorr_float_neon(_x, _y, xcorr, len, max_pitch)) #endif -#endif - -#endif /* end !FIXED_POINT */ -/*Is run-time CPU detection enabled on this platform?*/ -# if defined(OPUS_HAVE_RTCD) && (defined(OPUS_ARM_ASM) \ - || (defined(OPUS_ARM_MAY_HAVE_NEON_INTR) \ - && !defined(OPUS_ARM_PRESUME_NEON_INTR))) -extern -# if defined(FIXED_POINT) -opus_val32 -# else -void -# endif +# if defined(OPUS_HAVE_RTCD) && \ + (defined(OPUS_ARM_MAY_HAVE_NEON_INTR) && !defined(OPUS_ARM_PRESUME_NEON_INTR)) +extern void (*const CELT_PITCH_XCORR_IMPL[OPUS_ARCHMASK+1])(const opus_val16 *, const opus_val16 *, opus_val32 *, int, int); -# define OVERRIDE_PITCH_XCORR +# define OVERRIDE_PITCH_XCORR (1) # define celt_pitch_xcorr(_x, _y, xcorr, len, max_pitch, arch) \ ((*CELT_PITCH_XCORR_IMPL[(arch)&OPUS_ARCHMASK])(_x, _y, \ xcorr, len, max_pitch)) -# endif + +# elif defined(OPUS_ARM_PRESUME_NEON_INTR) + +# define OVERRIDE_PITCH_XCORR (1) +# define celt_pitch_xcorr(_x, _y, xcorr, len, max_pitch, arch) \ + ((void)(arch),celt_pitch_xcorr_float_neon(_x, _y, xcorr, len, max_pitch)) + +# endif + +#endif /* end !FIXED_POINT */ #endif -- 2.5.4 (Apple Git-61)
Maybe Matching Threads
- [Aarch64 v2 02/18] Reorganize ARM CPU #ifdefs.
- [Aarch64 v2 02/18] Reorganize ARM CPU #ifdefs.
- [PATCH 2/8] Reorganize pitch_arm.h, so RTCD works for intrinsics functions as well.
- Opus floating-point NEON jump table question
- Opus floating-point NEON jump table question