Jonathan Lennox
2016-May-31 19:52 UTC
[opus] [PATCH 1/2] Modify autoconf tests for intrinsics to stop clang from optimizing them away.
---
configure.ac | 30 +++++++++++++++++++++---------
1 file changed, 21 insertions(+), 9 deletions(-)
diff --git a/configure.ac b/configure.ac
index a67aa37..c722556 100644
--- a/configure.ac
+++ b/configure.ac
@@ -472,6 +472,7 @@ AS_IF([test x"$enable_intrinsics" =
x"yes"],[
[[
static float32x4_t A0, A1, SUMM;
SUMM = vmlaq_f32(SUMM, A0, A1);
+ return (int)vgetq_lane_f32(SUMM, 0);
]]
)
AS_IF([test x"$OPUS_ARM_MAY_HAVE_NEON_INTR" = x"1"
&& test x"$OPUS_ARM_PRESUME_NEON_INTR" != x"1"],
@@ -521,10 +522,13 @@ AS_IF([test x"$enable_intrinsics" =
x"yes"],[
[OPUS_X86_MAY_HAVE_SSE],
[OPUS_X86_PRESUME_SSE],
[[#include <xmmintrin.h>
+ #include <time.h>
]],
[[
- static __m128 mtest;
- mtest = _mm_setzero_ps();
+ __m128 mtest;
+ mtest = _mm_set1_ps((float)time(NULL));
+ mtest = _mm_mul_ps(mtest, mtest);
+ return _mm_cvtss_si32(mtest);
]]
)
AS_IF([test x"$OPUS_X86_MAY_HAVE_SSE" = x"1"
&& test x"$OPUS_X86_PRESUME_SSE" != x"1"],
@@ -539,10 +543,13 @@ AS_IF([test x"$enable_intrinsics" =
x"yes"],[
[OPUS_X86_MAY_HAVE_SSE2],
[OPUS_X86_PRESUME_SSE2],
[[#include <emmintrin.h>
+ #include <time.h>
]],
[[
- static __m128i mtest;
- mtest = _mm_setzero_si128();
+ __m128i mtest;
+ mtest = _mm_set1_epi32((int)time(NULL));
+ mtest = _mm_mul_epu32(mtest, mtest);
+ return _mm_cvtsi128_si32(mtest);
]]
)
AS_IF([test x"$OPUS_X86_MAY_HAVE_SSE2" = x"1"
&& test x"$OPUS_X86_PRESUME_SSE2" != x"1"],
@@ -557,11 +564,13 @@ AS_IF([test x"$enable_intrinsics" =
x"yes"],[
[OPUS_X86_MAY_HAVE_SSE4_1],
[OPUS_X86_PRESUME_SSE4_1],
[[#include <smmintrin.h>
+ #include <time.h>
]],
[[
- static __m128i mtest;
- mtest = _mm_setzero_si128();
- mtest = _mm_cmpeq_epi64(mtest, mtest);
+ __m128i mtest;
+ mtest = _mm_set1_epi32((int)time(NULL));
+ mtest = _mm_mul_epi32(mtest, mtest);
+ return _mm_cvtsi128_si32(mtest);
]]
)
AS_IF([test x"$OPUS_X86_MAY_HAVE_SSE4_1" = x"1"
&& test x"$OPUS_X86_PRESUME_SSE4_1" != x"1"],
@@ -576,10 +585,13 @@ AS_IF([test x"$enable_intrinsics" =
x"yes"],[
[OPUS_X86_MAY_HAVE_AVX],
[OPUS_X86_PRESUME_AVX],
[[#include <immintrin.h>
+ #include <time.h>
]],
[[
- static __m256 mtest;
- mtest = _mm256_setzero_ps();
+ __m256 mtest;
+ mtest = _mm256_set1_ps((float)time(NULL));
+ mtest = _mm256_addsub_ps(mtest, mtest);
+ return _mm_cvtss_si32(_mm256_extractf128_ps(mtest, 0));
]]
)
AS_IF([test x"$OPUS_X86_MAY_HAVE_AVX" = x"1"
&& test x"$OPUS_X86_PRESUME_AVX" != x"1"],
--
2.7.4 (Apple Git-66)
Jonathan Lennox
2016-May-31 19:52 UTC
[opus] [PATCH 2/2] List intrinsics flags being tested after existing cflags, so they override other options.
---
m4/opus-intrinsics.m4 | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/m4/opus-intrinsics.m4 b/m4/opus-intrinsics.m4
index b93ddd3..a262ca1 100644
--- a/m4/opus-intrinsics.m4
+++ b/m4/opus-intrinsics.m4
@@ -15,7 +15,7 @@ AC_DEFUN([OPUS_CHECK_INTRINSICS],
$4=0
AC_MSG_RESULT([no])
AC_MSG_CHECKING([if compiler supports $1 intrinsics with $2])
- save_CFLAGS="$CFLAGS"; CFLAGS="$2 $CFLAGS"
+ save_CFLAGS="$CFLAGS"; CFLAGS="$CFLAGS $2"
AC_LINK_IFELSE([AC_LANG_PROGRAM($5, $6)],
[
AC_MSG_RESULT([yes])
--
2.7.4 (Apple Git-66)
Jean-Marc Valin
2016-May-31 21:59 UTC
[opus] [PATCH 1/2] Modify autoconf tests for intrinsics to stop clang from optimizing them away.
Hi Jonathan, Thanks for the patch. It's now in master. Cheers, Jean-Marc On 05/31/2016 03:52 PM, Jonathan Lennox wrote:> --- > configure.ac | 30 +++++++++++++++++++++--------- > 1 file changed, 21 insertions(+), 9 deletions(-) > > diff --git a/configure.ac b/configure.ac > index a67aa37..c722556 100644 > --- a/configure.ac > +++ b/configure.ac > @@ -472,6 +472,7 @@ AS_IF([test x"$enable_intrinsics" = x"yes"],[ > [[ > static float32x4_t A0, A1, SUMM; > SUMM = vmlaq_f32(SUMM, A0, A1); > + return (int)vgetq_lane_f32(SUMM, 0); > ]] > ) > AS_IF([test x"$OPUS_ARM_MAY_HAVE_NEON_INTR" = x"1" && test x"$OPUS_ARM_PRESUME_NEON_INTR" != x"1"], > @@ -521,10 +522,13 @@ AS_IF([test x"$enable_intrinsics" = x"yes"],[ > [OPUS_X86_MAY_HAVE_SSE], > [OPUS_X86_PRESUME_SSE], > [[#include <xmmintrin.h> > + #include <time.h> > ]], > [[ > - static __m128 mtest; > - mtest = _mm_setzero_ps(); > + __m128 mtest; > + mtest = _mm_set1_ps((float)time(NULL)); > + mtest = _mm_mul_ps(mtest, mtest); > + return _mm_cvtss_si32(mtest); > ]] > ) > AS_IF([test x"$OPUS_X86_MAY_HAVE_SSE" = x"1" && test x"$OPUS_X86_PRESUME_SSE" != x"1"], > @@ -539,10 +543,13 @@ AS_IF([test x"$enable_intrinsics" = x"yes"],[ > [OPUS_X86_MAY_HAVE_SSE2], > [OPUS_X86_PRESUME_SSE2], > [[#include <emmintrin.h> > + #include <time.h> > ]], > [[ > - static __m128i mtest; > - mtest = _mm_setzero_si128(); > + __m128i mtest; > + mtest = _mm_set1_epi32((int)time(NULL)); > + mtest = _mm_mul_epu32(mtest, mtest); > + return _mm_cvtsi128_si32(mtest); > ]] > ) > AS_IF([test x"$OPUS_X86_MAY_HAVE_SSE2" = x"1" && test x"$OPUS_X86_PRESUME_SSE2" != x"1"], > @@ -557,11 +564,13 @@ AS_IF([test x"$enable_intrinsics" = x"yes"],[ > [OPUS_X86_MAY_HAVE_SSE4_1], > [OPUS_X86_PRESUME_SSE4_1], > [[#include <smmintrin.h> > + #include <time.h> > ]], > [[ > - static __m128i mtest; > - mtest = _mm_setzero_si128(); > - mtest = _mm_cmpeq_epi64(mtest, mtest); > + __m128i mtest; > + mtest = _mm_set1_epi32((int)time(NULL)); > + mtest = _mm_mul_epi32(mtest, mtest); > + return _mm_cvtsi128_si32(mtest); > ]] > ) > AS_IF([test x"$OPUS_X86_MAY_HAVE_SSE4_1" = x"1" && test x"$OPUS_X86_PRESUME_SSE4_1" != x"1"], > @@ -576,10 +585,13 @@ AS_IF([test x"$enable_intrinsics" = x"yes"],[ > [OPUS_X86_MAY_HAVE_AVX], > [OPUS_X86_PRESUME_AVX], > [[#include <immintrin.h> > + #include <time.h> > ]], > [[ > - static __m256 mtest; > - mtest = _mm256_setzero_ps(); > + __m256 mtest; > + mtest = _mm256_set1_ps((float)time(NULL)); > + mtest = _mm256_addsub_ps(mtest, mtest); > + return _mm_cvtss_si32(_mm256_extractf128_ps(mtest, 0)); > ]] > ) > AS_IF([test x"$OPUS_X86_MAY_HAVE_AVX" = x"1" && test x"$OPUS_X86_PRESUME_AVX" != x"1"], >
Reasonably Related Threads
- [PATCH] Fix x86 build if we presume SSE4.1 (and earlier), but not AVX.
- [RFC PATCH v3] Intrinsics/RTCD related fixes. Mostly x86.
- [RFC PATCHv2] Intrinsics/RTCD related fixes. Mostly x86.
- Patch cleaning up Opus x86 intrinsics configury
- [RFC PATCH v1 0/4] Enable aarch64 intrinsics/Ne10