Timothy B. Terriberry
2015-Dec-08  17:13 UTC
[opus] [Aarch64 v2 02/18] Reorganize ARM CPU #ifdefs.
Jonathan Lennox wrote:> -# if defined(FIXED_POINT) > +# if defined(FIXED_POINT) && \ > + ((defined(OPUS_ARM_MAY_HAVE_NEON) && !defined(OPUS_ARM_PRESUME_NEON)) || \ > + (defined(OPUS_ARM_MAY_HAVE_MEDIA) && !defined(OPUS_ARM_PRESUME_MEDIA)) || \ > + (defined(OPUS_ARM_MAY_HAVE_EDSP) && !defined(OPUS_ARM_PRESUME_EDSP))) > opus_val32 (*const CELT_PITCH_XCORR_IMPL[OPUS_ARCHMASK+1])(const opus_val16 *, > const opus_val16 *, opus_val32 *, int , int) = {Maybe I'm missing something, but...> -/*Is run-time CPU detection enabled on this platform?*/ > -# if defined(OPUS_HAVE_RTCD) && (defined(OPUS_ARM_ASM) \ > - || (defined(OPUS_ARM_MAY_HAVE_NEON_INTR) \ > - && !defined(OPUS_ARM_PRESUME_NEON_INTR))) > +# if defined(OPUS_HAVE_RTCD) && \ > + (defined(FIXED_POINT) && \ > + ((defined(OPUS_ARM_MAY_HAVE_NEON) && !defined(OPUS_ARM_PRESUME_NEON)) || \ > + (defined(OPUS_ARM_MAY_HAVE_MEDIA) && !defined(OPUS_ARM_PRESUME_MEDIA)) || \ > + (defined(OPUS_ARM_MAY_HAVE_EDSP) && !defined(OPUS_ARM_PRESUME_EDSP)))) || \ > + (!defined(FIXED_POINT) && \ > + (defined(OPUS_ARM_MAY_HAVE_NEON_INTR) && !defined(OPUS_ARM_PRESUME_NEON_INTR))) > extern > # if defined(FIXED_POINT) > opus_val32 > # else > void > # endif > (*const CELT_PITCH_XCORR_IMPL[OPUS_ARCHMASK+1])(const opus_val16 *, > const opus_val16 *, opus_val32 *, int, int);Shouldn't the first case have a corresponding update in the #else clause for the !defined(FIXED_POINT) case?> celt_pitch_xcorr_c, /* ARMv4 */ > diff --git a/celt/arm/pitch_arm.h b/celt/arm/pitch_arm.h > index eaf61c9..bd41774 100644 > --- a/celt/arm/pitch_arm.h > +++ b/celt/arm/pitch_arm.h > @@ -46,7 +46,13 @@ opus_val32 celt_pitch_xcorr_edsp(const opus_val16 *_x, const opus_val16 *_y, > opus_val32 *xcorr, int len, int max_pitch); > # endif > > -# if !defined(OPUS_HAVE_RTCD) > +# if (defined(OPUS_ARM_MAY_HAVE_NEON) || \ > + defined(OPUS_ARM_MAY_HAVE_MEDIA) || \ > + defined(OPUS_ARM_MAY_HAVE_EDSP)) && \ > + (!defined(OPUS_HAVE_RTCD) || \ > + defined(OPUS_ARM_PRESUME_NEON) || \ > + (defined(OPUS_ARM_PRESUME_MEDIA) && !defined(OPUS_ARM_MAY_HAVE_NEON)) || \ > + (defined(OPUS_ARM_PRESUME_EDSP) && !defined(OPUS_ARM_MAY_HAVE_NEON) && !defined(OPUS_ARM_MAY_HAVE_MEDIA))) > # define OVERRIDE_PITCH_XCORR (1) > # define celt_pitch_xcorr(_x, _y, xcorr, len, max_pitch, arch) \ > ((void)(arch),PRESUME_NEON(celt_pitch_xcorr)(_x, _y, xcorr, len, max_pitch)) > @@ -66,10 +72,13 @@ void celt_pitch_xcorr_float_neon(const opus_val16 *_x, const opus_val16 *_y, > > #endif /* end !FIXED_POINT */ >And shouldn't this be the inverse of the previous two?
Jonathan Lennox
2015-Dec-10  15:49 UTC
[opus] [Aarch64 v2 02/18] Reorganize ARM CPU #ifdefs.
> On Dec 8, 2015, at 12:13 PM, Timothy B. Terriberry <tterribe at xiph.org> wrote: > > Jonathan Lennox wrote: >> -# if defined(FIXED_POINT) >> +# if defined(FIXED_POINT) && \ >> + ((defined(OPUS_ARM_MAY_HAVE_NEON) && !defined(OPUS_ARM_PRESUME_NEON)) || \ >> + (defined(OPUS_ARM_MAY_HAVE_MEDIA) && !defined(OPUS_ARM_PRESUME_MEDIA)) || \ >> + (defined(OPUS_ARM_MAY_HAVE_EDSP) && !defined(OPUS_ARM_PRESUME_EDSP))) >> opus_val32 (*const CELT_PITCH_XCORR_IMPL[OPUS_ARCHMASK+1])(const opus_val16 *, >> const opus_val16 *, opus_val32 *, int , int) = { > > Maybe I'm missing something, but... > >> -/*Is run-time CPU detection enabled on this platform?*/ >> -# if defined(OPUS_HAVE_RTCD) && (defined(OPUS_ARM_ASM) \ >> - || (defined(OPUS_ARM_MAY_HAVE_NEON_INTR) \ >> - && !defined(OPUS_ARM_PRESUME_NEON_INTR))) >> +# if defined(OPUS_HAVE_RTCD) && \ >> + (defined(FIXED_POINT) && \ >> + ((defined(OPUS_ARM_MAY_HAVE_NEON) && !defined(OPUS_ARM_PRESUME_NEON)) || \ >> + (defined(OPUS_ARM_MAY_HAVE_MEDIA) && !defined(OPUS_ARM_PRESUME_MEDIA)) || \ >> + (defined(OPUS_ARM_MAY_HAVE_EDSP) && !defined(OPUS_ARM_PRESUME_EDSP)))) || \ >> + (!defined(FIXED_POINT) && \ >> + (defined(OPUS_ARM_MAY_HAVE_NEON_INTR) && !defined(OPUS_ARM_PRESUME_NEON_INTR))) >> extern >> # if defined(FIXED_POINT) >> opus_val32 >> # else >> void >> # endif >> (*const CELT_PITCH_XCORR_IMPL[OPUS_ARCHMASK+1])(const opus_val16 *, >> const opus_val16 *, opus_val32 *, int, int); > > Shouldn't the first case have a corresponding update in the #else clause > for the !defined(FIXED_POINT) case?Hm, yes, you?re right ? there are cases where we?ll end up emitting a (useless and unused, but harmless) CELT_PITCH_XCORR_IMPL object unnecessarily. I?ll submit an updated version of the patch.> >> celt_pitch_xcorr_c, /* ARMv4 */ >> diff --git a/celt/arm/pitch_arm.h b/celt/arm/pitch_arm.h >> index eaf61c9..bd41774 100644 >> --- a/celt/arm/pitch_arm.h >> +++ b/celt/arm/pitch_arm.h >> @@ -46,7 +46,13 @@ opus_val32 celt_pitch_xcorr_edsp(const opus_val16 *_x, const opus_val16 *_y, >> opus_val32 *xcorr, int len, int max_pitch); >> # endif >> >> -# if !defined(OPUS_HAVE_RTCD) >> +# if (defined(OPUS_ARM_MAY_HAVE_NEON) || \ >> + defined(OPUS_ARM_MAY_HAVE_MEDIA) || \ >> + defined(OPUS_ARM_MAY_HAVE_EDSP)) && \ >> + (!defined(OPUS_HAVE_RTCD) || \ >> + defined(OPUS_ARM_PRESUME_NEON) || \ >> + (defined(OPUS_ARM_PRESUME_MEDIA) && !defined(OPUS_ARM_MAY_HAVE_NEON)) || \ >> + (defined(OPUS_ARM_PRESUME_EDSP) && !defined(OPUS_ARM_MAY_HAVE_NEON) && !defined(OPUS_ARM_MAY_HAVE_MEDIA))) >> # define OVERRIDE_PITCH_XCORR (1) >> # define celt_pitch_xcorr(_x, _y, xcorr, len, max_pitch, arch) \ >> ((void)(arch),PRESUME_NEON(celt_pitch_xcorr)(_x, _y, xcorr, len, max_pitch)) >> @@ -66,10 +72,13 @@ void celt_pitch_xcorr_float_neon(const opus_val16 *_x, const opus_val16 *_y, >> >> #endif /* end !FIXED_POINT */ >> > > And shouldn't this be the inverse of the previous two?I think it?s logically equivalent (under the assumption that the MAY_HAVE and PRESUME defines are ordered), but the different representation is confusing. I?ll align them.
Jonathan Lennox
2015-Dec-14  21:06 UTC
[opus] [Aarch64 v3 02/18] Reorganize ARM CPU #ifdefs.
This is an updated version of entry 2/18 of the previous Aarch64 patch series,
after Tim's comments.
Should be no behavioral change.  All other patches in the series are unchanged.
---
 celt/arm/arm_celt_map.c |  7 ++++++-
 celt/arm/pitch_arm.h    | 49 +++++++++++++++++++++++++++++--------------------
 2 files changed, 35 insertions(+), 21 deletions(-)
diff --git a/celt/arm/arm_celt_map.c b/celt/arm/arm_celt_map.c
index ee6c244..85a48e8 100644
--- a/celt/arm/arm_celt_map.c
+++ b/celt/arm/arm_celt_map.c
@@ -36,6 +36,9 @@
 #if defined(OPUS_HAVE_RTCD)
 
 # if defined(FIXED_POINT)
+#  if ((defined(OPUS_ARM_MAY_HAVE_NEON) &&
!defined(OPUS_ARM_PRESUME_NEON)) || \
+		(defined(OPUS_ARM_MAY_HAVE_MEDIA) &&
!defined(OPUS_ARM_PRESUME_MEDIA)) || \
+		(defined(OPUS_ARM_MAY_HAVE_EDSP) && !defined(OPUS_ARM_PRESUME_EDSP)))
 opus_val32 (*const CELT_PITCH_XCORR_IMPL[OPUS_ARCHMASK+1])(const opus_val16 *,
     const opus_val16 *, opus_val32 *, int , int) = {
   celt_pitch_xcorr_c,               /* ARMv4 */
@@ -43,8 +46,10 @@ opus_val32 (*const
CELT_PITCH_XCORR_IMPL[OPUS_ARCHMASK+1])(const opus_val16 *,
   MAY_HAVE_MEDIA(celt_pitch_xcorr), /* Media */
   MAY_HAVE_NEON(celt_pitch_xcorr)   /* NEON */
 };
+
+#  endif
 # else /* !FIXED_POINT */
-#  if defined(OPUS_ARM_MAY_HAVE_NEON_INTR)
+#  if defined(OPUS_ARM_MAY_HAVE_NEON_INTR) &&
!defined(OPUS_ARM_PRESUME_NEON_INTR)
 void (*const CELT_PITCH_XCORR_IMPL[OPUS_ARCHMASK+1])(const opus_val16 *,
     const opus_val16 *, opus_val32 *, int, int) = {
   celt_pitch_xcorr_c,              /* ARMv4 */
diff --git a/celt/arm/pitch_arm.h b/celt/arm/pitch_arm.h
index eaf61c9..2264f71 100644
--- a/celt/arm/pitch_arm.h
+++ b/celt/arm/pitch_arm.h
@@ -46,7 +46,21 @@ opus_val32 celt_pitch_xcorr_edsp(const opus_val16 *_x, const
opus_val16 *_y,
     opus_val32 *xcorr, int len, int max_pitch);
 #  endif
 
-#  if !defined(OPUS_HAVE_RTCD)
+#  if defined(OPUS_HAVE_RTCD) && \
+	((defined(OPUS_ARM_MAY_HAVE_NEON) && !defined(OPUS_ARM_PRESUME_NEON))
|| \
+		(defined(OPUS_ARM_MAY_HAVE_MEDIA) &&
!defined(OPUS_ARM_PRESUME_MEDIA)) || \
+		(defined(OPUS_ARM_MAY_HAVE_EDSP) && !defined(OPUS_ARM_PRESUME_EDSP)))
+extern opus_val32
+(*const CELT_PITCH_XCORR_IMPL[OPUS_ARCHMASK+1])(const opus_val16 *,
+      const opus_val16 *, opus_val32 *, int, int);
+#   define OVERRIDE_PITCH_XCORR (1)
+#   define celt_pitch_xcorr(_x, _y, xcorr, len, max_pitch, arch) \
+  ((*CELT_PITCH_XCORR_IMPL[(arch)&OPUS_ARCHMASK])(_x, _y, \
+        xcorr, len, max_pitch))
+
+#  elif defined(OPUS_ARM_PRESUME_EDSP) || \
+	defined(OPUS_ARM_PRESUME_MEDIA) || \
+	defined(OPUS_ARM_PRESUME_NEON)
 #   define OVERRIDE_PITCH_XCORR (1)
 #   define celt_pitch_xcorr(_x, _y, xcorr, len, max_pitch, arch) \
   ((void)(arch),PRESUME_NEON(celt_pitch_xcorr)(_x, _y, xcorr, len, max_pitch))
@@ -57,32 +71,27 @@ opus_val32 celt_pitch_xcorr_edsp(const opus_val16 *_x, const
opus_val16 *_y,
 #if defined(OPUS_ARM_MAY_HAVE_NEON_INTR)
 void celt_pitch_xcorr_float_neon(const opus_val16 *_x, const opus_val16 *_y,
                                  opus_val32 *xcorr, int len, int max_pitch);
-#if !defined(OPUS_HAVE_RTCD) || defined(OPUS_ARM_PRESUME_NEON_INTR)
-#define OVERRIDE_PITCH_XCORR (1)
-#   define celt_pitch_xcorr(_x, _y, xcorr, len, max_pitch, arch) \
-   ((void)(arch),celt_pitch_xcorr_float_neon(_x, _y, xcorr, len, max_pitch))
 #endif
-#endif
-
-#endif /* end !FIXED_POINT */
 
-/*Is run-time CPU detection enabled on this platform?*/
-# if defined(OPUS_HAVE_RTCD) && (defined(OPUS_ARM_ASM) \
-   || (defined(OPUS_ARM_MAY_HAVE_NEON_INTR) \
-   && !defined(OPUS_ARM_PRESUME_NEON_INTR)))
-extern
-#  if defined(FIXED_POINT)
-opus_val32
-#  else
-void
-#  endif
+#  if defined(OPUS_HAVE_RTCD) && \
+	(defined(OPUS_ARM_MAY_HAVE_NEON_INTR) &&
!defined(OPUS_ARM_PRESUME_NEON_INTR))
+extern void
 (*const CELT_PITCH_XCORR_IMPL[OPUS_ARCHMASK+1])(const opus_val16 *,
       const opus_val16 *, opus_val32 *, int, int);
 
-#  define OVERRIDE_PITCH_XCORR
+#  define OVERRIDE_PITCH_XCORR (1)
 #  define celt_pitch_xcorr(_x, _y, xcorr, len, max_pitch, arch) \
   ((*CELT_PITCH_XCORR_IMPL[(arch)&OPUS_ARCHMASK])(_x, _y, \
         xcorr, len, max_pitch))
-# endif
+
+#  elif defined(OPUS_ARM_PRESUME_NEON_INTR)
+
+#   define OVERRIDE_PITCH_XCORR (1)
+#   define celt_pitch_xcorr(_x, _y, xcorr, len, max_pitch, arch) \
+   ((void)(arch),celt_pitch_xcorr_float_neon(_x, _y, xcorr, len, max_pitch))
+
+#  endif
+
+#endif /* end !FIXED_POINT */
 
 #endif
-- 
2.5.4 (Apple Git-61)
Maybe Matching Threads
- [Aarch64 v2 02/18] Reorganize ARM CPU #ifdefs.
- [Aarch64 v2 02/18] Reorganize ARM CPU #ifdefs.
- [PATCH 2/8] Reorganize pitch_arm.h, so RTCD works for intrinsics functions as well.
- Opus floating-point NEON jump table question
- Opus floating-point NEON jump table question