Jonathan Lennox
2016-May-31 19:52 UTC
[opus] [PATCH 1/2] Modify autoconf tests for intrinsics to stop clang from optimizing them away.
--- configure.ac | 30 +++++++++++++++++++++--------- 1 file changed, 21 insertions(+), 9 deletions(-) diff --git a/configure.ac b/configure.ac index a67aa37..c722556 100644 --- a/configure.ac +++ b/configure.ac @@ -472,6 +472,7 @@ AS_IF([test x"$enable_intrinsics" = x"yes"],[ [[ static float32x4_t A0, A1, SUMM; SUMM = vmlaq_f32(SUMM, A0, A1); + return (int)vgetq_lane_f32(SUMM, 0); ]] ) AS_IF([test x"$OPUS_ARM_MAY_HAVE_NEON_INTR" = x"1" && test x"$OPUS_ARM_PRESUME_NEON_INTR" != x"1"], @@ -521,10 +522,13 @@ AS_IF([test x"$enable_intrinsics" = x"yes"],[ [OPUS_X86_MAY_HAVE_SSE], [OPUS_X86_PRESUME_SSE], [[#include <xmmintrin.h> + #include <time.h> ]], [[ - static __m128 mtest; - mtest = _mm_setzero_ps(); + __m128 mtest; + mtest = _mm_set1_ps((float)time(NULL)); + mtest = _mm_mul_ps(mtest, mtest); + return _mm_cvtss_si32(mtest); ]] ) AS_IF([test x"$OPUS_X86_MAY_HAVE_SSE" = x"1" && test x"$OPUS_X86_PRESUME_SSE" != x"1"], @@ -539,10 +543,13 @@ AS_IF([test x"$enable_intrinsics" = x"yes"],[ [OPUS_X86_MAY_HAVE_SSE2], [OPUS_X86_PRESUME_SSE2], [[#include <emmintrin.h> + #include <time.h> ]], [[ - static __m128i mtest; - mtest = _mm_setzero_si128(); + __m128i mtest; + mtest = _mm_set1_epi32((int)time(NULL)); + mtest = _mm_mul_epu32(mtest, mtest); + return _mm_cvtsi128_si32(mtest); ]] ) AS_IF([test x"$OPUS_X86_MAY_HAVE_SSE2" = x"1" && test x"$OPUS_X86_PRESUME_SSE2" != x"1"], @@ -557,11 +564,13 @@ AS_IF([test x"$enable_intrinsics" = x"yes"],[ [OPUS_X86_MAY_HAVE_SSE4_1], [OPUS_X86_PRESUME_SSE4_1], [[#include <smmintrin.h> + #include <time.h> ]], [[ - static __m128i mtest; - mtest = _mm_setzero_si128(); - mtest = _mm_cmpeq_epi64(mtest, mtest); + __m128i mtest; + mtest = _mm_set1_epi32((int)time(NULL)); + mtest = _mm_mul_epi32(mtest, mtest); + return _mm_cvtsi128_si32(mtest); ]] ) AS_IF([test x"$OPUS_X86_MAY_HAVE_SSE4_1" = x"1" && test x"$OPUS_X86_PRESUME_SSE4_1" != x"1"], @@ -576,10 +585,13 @@ AS_IF([test x"$enable_intrinsics" = x"yes"],[ [OPUS_X86_MAY_HAVE_AVX], [OPUS_X86_PRESUME_AVX], [[#include <immintrin.h> + #include <time.h> ]], [[ - static __m256 mtest; - mtest = _mm256_setzero_ps(); + __m256 mtest; + mtest = _mm256_set1_ps((float)time(NULL)); + mtest = _mm256_addsub_ps(mtest, mtest); + return _mm_cvtss_si32(_mm256_extractf128_ps(mtest, 0)); ]] ) AS_IF([test x"$OPUS_X86_MAY_HAVE_AVX" = x"1" && test x"$OPUS_X86_PRESUME_AVX" != x"1"], -- 2.7.4 (Apple Git-66)
Jonathan Lennox
2016-May-31 19:52 UTC
[opus] [PATCH 2/2] List intrinsics flags being tested after existing cflags, so they override other options.
--- m4/opus-intrinsics.m4 | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/m4/opus-intrinsics.m4 b/m4/opus-intrinsics.m4 index b93ddd3..a262ca1 100644 --- a/m4/opus-intrinsics.m4 +++ b/m4/opus-intrinsics.m4 @@ -15,7 +15,7 @@ AC_DEFUN([OPUS_CHECK_INTRINSICS], $4=0 AC_MSG_RESULT([no]) AC_MSG_CHECKING([if compiler supports $1 intrinsics with $2]) - save_CFLAGS="$CFLAGS"; CFLAGS="$2 $CFLAGS" + save_CFLAGS="$CFLAGS"; CFLAGS="$CFLAGS $2" AC_LINK_IFELSE([AC_LANG_PROGRAM($5, $6)], [ AC_MSG_RESULT([yes]) -- 2.7.4 (Apple Git-66)
Jean-Marc Valin
2016-May-31 21:59 UTC
[opus] [PATCH 1/2] Modify autoconf tests for intrinsics to stop clang from optimizing them away.
Hi Jonathan, Thanks for the patch. It's now in master. Cheers, Jean-Marc On 05/31/2016 03:52 PM, Jonathan Lennox wrote:> --- > configure.ac | 30 +++++++++++++++++++++--------- > 1 file changed, 21 insertions(+), 9 deletions(-) > > diff --git a/configure.ac b/configure.ac > index a67aa37..c722556 100644 > --- a/configure.ac > +++ b/configure.ac > @@ -472,6 +472,7 @@ AS_IF([test x"$enable_intrinsics" = x"yes"],[ > [[ > static float32x4_t A0, A1, SUMM; > SUMM = vmlaq_f32(SUMM, A0, A1); > + return (int)vgetq_lane_f32(SUMM, 0); > ]] > ) > AS_IF([test x"$OPUS_ARM_MAY_HAVE_NEON_INTR" = x"1" && test x"$OPUS_ARM_PRESUME_NEON_INTR" != x"1"], > @@ -521,10 +522,13 @@ AS_IF([test x"$enable_intrinsics" = x"yes"],[ > [OPUS_X86_MAY_HAVE_SSE], > [OPUS_X86_PRESUME_SSE], > [[#include <xmmintrin.h> > + #include <time.h> > ]], > [[ > - static __m128 mtest; > - mtest = _mm_setzero_ps(); > + __m128 mtest; > + mtest = _mm_set1_ps((float)time(NULL)); > + mtest = _mm_mul_ps(mtest, mtest); > + return _mm_cvtss_si32(mtest); > ]] > ) > AS_IF([test x"$OPUS_X86_MAY_HAVE_SSE" = x"1" && test x"$OPUS_X86_PRESUME_SSE" != x"1"], > @@ -539,10 +543,13 @@ AS_IF([test x"$enable_intrinsics" = x"yes"],[ > [OPUS_X86_MAY_HAVE_SSE2], > [OPUS_X86_PRESUME_SSE2], > [[#include <emmintrin.h> > + #include <time.h> > ]], > [[ > - static __m128i mtest; > - mtest = _mm_setzero_si128(); > + __m128i mtest; > + mtest = _mm_set1_epi32((int)time(NULL)); > + mtest = _mm_mul_epu32(mtest, mtest); > + return _mm_cvtsi128_si32(mtest); > ]] > ) > AS_IF([test x"$OPUS_X86_MAY_HAVE_SSE2" = x"1" && test x"$OPUS_X86_PRESUME_SSE2" != x"1"], > @@ -557,11 +564,13 @@ AS_IF([test x"$enable_intrinsics" = x"yes"],[ > [OPUS_X86_MAY_HAVE_SSE4_1], > [OPUS_X86_PRESUME_SSE4_1], > [[#include <smmintrin.h> > + #include <time.h> > ]], > [[ > - static __m128i mtest; > - mtest = _mm_setzero_si128(); > - mtest = _mm_cmpeq_epi64(mtest, mtest); > + __m128i mtest; > + mtest = _mm_set1_epi32((int)time(NULL)); > + mtest = _mm_mul_epi32(mtest, mtest); > + return _mm_cvtsi128_si32(mtest); > ]] > ) > AS_IF([test x"$OPUS_X86_MAY_HAVE_SSE4_1" = x"1" && test x"$OPUS_X86_PRESUME_SSE4_1" != x"1"], > @@ -576,10 +585,13 @@ AS_IF([test x"$enable_intrinsics" = x"yes"],[ > [OPUS_X86_MAY_HAVE_AVX], > [OPUS_X86_PRESUME_AVX], > [[#include <immintrin.h> > + #include <time.h> > ]], > [[ > - static __m256 mtest; > - mtest = _mm256_setzero_ps(); > + __m256 mtest; > + mtest = _mm256_set1_ps((float)time(NULL)); > + mtest = _mm256_addsub_ps(mtest, mtest); > + return _mm_cvtss_si32(_mm256_extractf128_ps(mtest, 0)); > ]] > ) > AS_IF([test x"$OPUS_X86_MAY_HAVE_AVX" = x"1" && test x"$OPUS_X86_PRESUME_AVX" != x"1"], >
Reasonably Related Threads
- [PATCH] Fix x86 build if we presume SSE4.1 (and earlier), but not AVX.
- [RFC PATCH v3] Intrinsics/RTCD related fixes. Mostly x86.
- [RFC PATCHv2] Intrinsics/RTCD related fixes. Mostly x86.
- Patch cleaning up Opus x86 intrinsics configury
- [RFC PATCH v1 0/4] Enable aarch64 intrinsics/Ne10