thr3ads.net - Speex dev - [Speex-dev] [PATCH] Fix miscompile of SSE resampler [Oct 2009]

If this information is useful, please help other people find it:
Share via:

Thorvald Natvig

2009-Oct-26 16:41 UTC

[Speex-dev] [PATCH] Fix miscompile of SSE resampler

From: Thorvald Natvig <slicer at users.sourceforge.net>

Some optimizing compilers miscompile the current SSE optimizations when
full optimizations are enabled. By using output value pointer instead of
a return value, we can bypass this misbehaviour.
---
 libspeex/resample.c     |    8 ++++----
 libspeex/resample_sse.h |   24 ++++++++----------------
 2 files changed, 12 insertions(+), 20 deletions(-)

diff --git a/libspeex/resample.c b/libspeex/resample.c
index 7b5a308..8131380 100644
--- a/libspeex/resample.c
+++ b/libspeex/resample.c
@@ -361,7 +361,7 @@ static int resampler_basic_direct_single(SpeexResamplerState
*st, spx_uint32_t c
       sum = accum[0] + accum[1] + accum[2] + accum[3];
 */
 #else
-      sum = inner_product_single(sinc, iptr, N);
+      inner_product_single(&sum, sinc, iptr, N);
 #endif
 
       out[out_stride * out_sample++] = SATURATE32(PSHR32(sum, 15), 32767);
@@ -412,7 +412,7 @@ static int resampler_basic_direct_double(SpeexResamplerState
*st, spx_uint32_t c
       }
       sum = accum[0] + accum[1] + accum[2] + accum[3];
 #else
-      sum = inner_product_double(sinc, iptr, N);
+      inner_product_double(&sum, sinc, iptr, N);
 #endif
 
       out[out_stride * out_sample++] = PSHR32(sum, 15);
@@ -472,7 +472,7 @@ static int
resampler_basic_interpolate_single(SpeexResamplerState *st, spx_uint3
       sum = MULT16_32_Q15(interp[0],SHR32(accum[0], 1)) +
MULT16_32_Q15(interp[1],SHR32(accum[1], 1)) +
MULT16_32_Q15(interp[2],SHR32(accum[2], 1)) +
MULT16_32_Q15(interp[3],SHR32(accum[3], 1));
 #else
       cubic_coef(frac, interp);
-      sum = interpolate_product_single(iptr, st->sinc_table +
st->oversample + 4 - offset - 2, N, st->oversample, interp);
+      interpolate_product_single(&sum, iptr, st->sinc_table +
st->oversample + 4 - offset - 2, N, st->oversample, interp);
 #endif
       
       out[out_stride * out_sample++] = SATURATE32(PSHR32(sum, 14), 32767);
@@ -534,7 +534,7 @@ static int
resampler_basic_interpolate_double(SpeexResamplerState *st, spx_uint3
       sum = MULT16_32_Q15(interp[0],accum[0]) +
MULT16_32_Q15(interp[1],accum[1]) + MULT16_32_Q15(interp[2],accum[2]) +
MULT16_32_Q15(interp[3],accum[3]);
 #else
       cubic_coef(frac, interp);
-      sum = interpolate_product_double(iptr, st->sinc_table +
st->oversample + 4 - offset - 2, N, st->oversample, interp);
+      interpolate_product_double(&sum, iptr, st->sinc_table +
st->oversample + 4 - offset - 2, N, st->oversample, interp);
 #endif
       
       out[out_stride * out_sample++] = PSHR32(sum,15);
diff --git a/libspeex/resample_sse.h b/libspeex/resample_sse.h
index 64be8a1..86ff35e 100644
--- a/libspeex/resample_sse.h
+++ b/libspeex/resample_sse.h
@@ -37,10 +37,9 @@
 #include <xmmintrin.h>
 
 #define OVERRIDE_INNER_PRODUCT_SINGLE
-static inline float inner_product_single(const float *a, const float *b,
unsigned int len)
+static inline void inner_product_single(float *ret, const float *a, const float
*b, unsigned int len)
 {
    int i;
-   float ret;
    __m128 sum = _mm_setzero_ps();
    for (i=0;i<len;i+=8)
    {
@@ -49,14 +48,12 @@ static inline float inner_product_single(const float *a,
const float *b, unsigne
    }
    sum = _mm_add_ps(sum, _mm_movehl_ps(sum, sum));
    sum = _mm_add_ss(sum, _mm_shuffle_ps(sum, sum, 0x55));
-   _mm_store_ss(&ret, sum);
-   return ret;
+   _mm_store_ss(ret, sum);
 }
 
 #define OVERRIDE_INTERPOLATE_PRODUCT_SINGLE
-static inline float interpolate_product_single(const float *a, const float *b,
unsigned int len, const spx_uint32_t oversample, float *frac) {
+static inline void interpolate_product_single(float *ret, const float *a, const
float *b, unsigned int len, const spx_uint32_t oversample, float *frac) {
   int i;
-  float ret;
   __m128 sum = _mm_setzero_ps();
   __m128 f = _mm_loadu_ps(frac);
   for(i=0;i<len;i+=2)
@@ -67,18 +64,16 @@ static inline float interpolate_product_single(const float
*a, const float *b, u
    sum = _mm_mul_ps(f, sum);
    sum = _mm_add_ps(sum, _mm_movehl_ps(sum, sum));
    sum = _mm_add_ss(sum, _mm_shuffle_ps(sum, sum, 0x55));
-   _mm_store_ss(&ret, sum);
-   return ret;
+   _mm_store_ss(ret, sum);
 }
 
 #ifdef _USE_SSE2
 #include <emmintrin.h>
 #define OVERRIDE_INNER_PRODUCT_DOUBLE
 
-static inline double inner_product_double(const float *a, const float *b,
unsigned int len)
+static inline void inner_product_double(double *ret, const float *a, const
float *b, unsigned int len)
 {
    int i;
-   double ret;
    __m128d sum = _mm_setzero_pd();
    __m128 t;
    for (i=0;i<len;i+=8)
@@ -92,14 +87,12 @@ static inline double inner_product_double(const float *a,
const float *b, unsign
       sum = _mm_add_pd(sum, _mm_cvtps_pd(_mm_movehl_ps(t, t)));
    }
    sum = _mm_add_sd(sum, _mm_unpackhi_pd(sum, sum));
-   _mm_store_sd(&ret, sum);
-   return ret;
+   _mm_store_sd(ret, sum);
 }
 
 #define OVERRIDE_INTERPOLATE_PRODUCT_DOUBLE
-static inline double interpolate_product_double(const float *a, const float *b,
unsigned int len, const spx_uint32_t oversample, float *frac) {
+static inline void interpolate_product_double(double *ret, const float *a,
const float *b, unsigned int len, const spx_uint32_t oversample, float *frac) {
   int i;
-  double ret;
   __m128d sum;
   __m128d sum1 = _mm_setzero_pd();
   __m128d sum2 = _mm_setzero_pd();
@@ -121,8 +114,7 @@ static inline double interpolate_product_double(const float
*a, const float *b,
   sum2 = _mm_mul_pd(f2, sum2);
   sum = _mm_add_pd(sum1, sum2);
   sum = _mm_add_sd(sum, _mm_unpackhi_pd(sum, sum));
-  _mm_store_sd(&ret, sum);
-  return ret;
+  _mm_store_sd(ret, sum);
 }
 
 #endif
-- 
1.6.4.msysgit.0.19.gd78f4

Jeff Wallace

2009-Oct-26 18:26 UTC

head link

[Speex-dev] [PATCH] Fix miscompile of SSE resampler

Out of curiosity, what compiler(s) got it wrong?

On Mon, Oct 26, 2009 at 9:41 AM, Thorvald Natvig <thorvald at
natvig.com>wrote:
> From: Thorvald Natvig <slicer at users.sourceforge.net>
>
> Some optimizing compilers miscompile the current SSE optimizations when
> full optimizations are enabled. By using output value pointer instead of
> a return value, we can bypass this misbehaviour.
> ---
>  libspeex/resample.c     |    8 ++++----
>  libspeex/resample_sse.h |   24 ++++++++----------------
>  2 files changed, 12 insertions(+), 20 deletions(-)
>
> diff --git a/libspeex/resample.c b/libspeex/resample.c
> index 7b5a308..8131380 100644
> --- a/libspeex/resample.c
> +++ b/libspeex/resample.c
> @@ -361,7 +361,7 @@ static int
> resampler_basic_direct_single(SpeexResamplerState *st, spx_uint32_t c
>       sum = accum[0] + accum[1] + accum[2] + accum[3];
>  */
>  #else
> -      sum = inner_product_single(sinc, iptr, N);
> +      inner_product_single(&sum, sinc, iptr, N);
>  #endif
>
>       out[out_stride * out_sample++] = SATURATE32(PSHR32(sum, 15), 32767);
> @@ -412,7 +412,7 @@ static int
> resampler_basic_direct_double(SpeexResamplerState *st, spx_uint32_t c
>       }
>       sum = accum[0] + accum[1] + accum[2] + accum[3];
>  #else
> -      sum = inner_product_double(sinc, iptr, N);
> +      inner_product_double(&sum, sinc, iptr, N);
>  #endif
>
>       out[out_stride * out_sample++] = PSHR32(sum, 15);
> @@ -472,7 +472,7 @@ static int
> resampler_basic_interpolate_single(SpeexResamplerState *st, spx_uint3
>       sum = MULT16_32_Q15(interp[0],SHR32(accum[0], 1)) +
> MULT16_32_Q15(interp[1],SHR32(accum[1], 1)) +
> MULT16_32_Q15(interp[2],SHR32(accum[2], 1)) +
> MULT16_32_Q15(interp[3],SHR32(accum[3], 1));
>  #else
>       cubic_coef(frac, interp);
> -      sum = interpolate_product_single(iptr, st->sinc_table +
> st->oversample + 4 - offset - 2, N, st->oversample, interp);
> +      interpolate_product_single(&sum, iptr, st->sinc_table +
> st->oversample + 4 - offset - 2, N, st->oversample, interp);
>  #endif
>
>       out[out_stride * out_sample++] = SATURATE32(PSHR32(sum, 14), 32767);
> @@ -534,7 +534,7 @@ static int
> resampler_basic_interpolate_double(SpeexResamplerState *st, spx_uint3
>       sum = MULT16_32_Q15(interp[0],accum[0]) +
> MULT16_32_Q15(interp[1],accum[1]) + MULT16_32_Q15(interp[2],accum[2]) +
> MULT16_32_Q15(interp[3],accum[3]);
>  #else
>       cubic_coef(frac, interp);
> -      sum = interpolate_product_double(iptr, st->sinc_table +
> st->oversample + 4 - offset - 2, N, st->oversample, interp);
> +      interpolate_product_double(&sum, iptr, st->sinc_table +
> st->oversample + 4 - offset - 2, N, st->oversample, interp);
>  #endif
>
>       out[out_stride * out_sample++] = PSHR32(sum,15);
> diff --git a/libspeex/resample_sse.h b/libspeex/resample_sse.h
> index 64be8a1..86ff35e 100644
> --- a/libspeex/resample_sse.h
> +++ b/libspeex/resample_sse.h
> @@ -37,10 +37,9 @@
>  #include <xmmintrin.h>
>
>  #define OVERRIDE_INNER_PRODUCT_SINGLE
> -static inline float inner_product_single(const float *a, const float *b,
> unsigned int len)
> +static inline void inner_product_single(float *ret, const float *a, const
> float *b, unsigned int len)
>  {
>    int i;
> -   float ret;
>    __m128 sum = _mm_setzero_ps();
>    for (i=0;i<len;i+=8)
>    {
> @@ -49,14 +48,12 @@ static inline float inner_product_single(const float
> *a, const float *b, unsigne
>    }
>    sum = _mm_add_ps(sum, _mm_movehl_ps(sum, sum));
>    sum = _mm_add_ss(sum, _mm_shuffle_ps(sum, sum, 0x55));
> -   _mm_store_ss(&ret, sum);
> -   return ret;
> +   _mm_store_ss(ret, sum);
>  }
>
>  #define OVERRIDE_INTERPOLATE_PRODUCT_SINGLE
> -static inline float interpolate_product_single(const float *a, const float
> *b, unsigned int len, const spx_uint32_t oversample, float *frac) {
> +static inline void interpolate_product_single(float *ret, const float *a,
> const float *b, unsigned int len, const spx_uint32_t oversample, float
> *frac) {
>   int i;
> -  float ret;
>   __m128 sum = _mm_setzero_ps();
>   __m128 f = _mm_loadu_ps(frac);
>   for(i=0;i<len;i+=2)
> @@ -67,18 +64,16 @@ static inline float interpolate_product_single(const
> float *a, const float *b, u
>    sum = _mm_mul_ps(f, sum);
>    sum = _mm_add_ps(sum, _mm_movehl_ps(sum, sum));
>    sum = _mm_add_ss(sum, _mm_shuffle_ps(sum, sum, 0x55));
> -   _mm_store_ss(&ret, sum);
> -   return ret;
> +   _mm_store_ss(ret, sum);
>  }
>
>  #ifdef _USE_SSE2
>  #include <emmintrin.h>
>  #define OVERRIDE_INNER_PRODUCT_DOUBLE
>
> -static inline double inner_product_double(const float *a, const float *b,
> unsigned int len)
> +static inline void inner_product_double(double *ret, const float *a, const
> float *b, unsigned int len)
>  {
>    int i;
> -   double ret;
>    __m128d sum = _mm_setzero_pd();
>    __m128 t;
>    for (i=0;i<len;i+=8)
> @@ -92,14 +87,12 @@ static inline double inner_product_double(const float
> *a, const float *b, unsign
>       sum = _mm_add_pd(sum, _mm_cvtps_pd(_mm_movehl_ps(t, t)));
>    }
>    sum = _mm_add_sd(sum, _mm_unpackhi_pd(sum, sum));
> -   _mm_store_sd(&ret, sum);
> -   return ret;
> +   _mm_store_sd(ret, sum);
>  }
>
>  #define OVERRIDE_INTERPOLATE_PRODUCT_DOUBLE
> -static inline double interpolate_product_double(const float *a, const
> float *b, unsigned int len, const spx_uint32_t oversample, float *frac) {
> +static inline void interpolate_product_double(double *ret, const float *a,
> const float *b, unsigned int len, const spx_uint32_t oversample, float
> *frac) {
>   int i;
> -  double ret;
>   __m128d sum;
>   __m128d sum1 = _mm_setzero_pd();
>   __m128d sum2 = _mm_setzero_pd();
> @@ -121,8 +114,7 @@ static inline double interpolate_product_double(const
> float *a, const float *b,
>   sum2 = _mm_mul_pd(f2, sum2);
>   sum = _mm_add_pd(sum1, sum2);
>   sum = _mm_add_sd(sum, _mm_unpackhi_pd(sum, sum));
> -  _mm_store_sd(&ret, sum);
> -  return ret;
> +  _mm_store_sd(ret, sum);
>  }
>
>  #endif
> --
> 1.6.4.msysgit.0.19.gd78f4
>
> _______________________________________________
> Speex-dev mailing list
> Speex-dev at xiph.org
> http://lists.xiph.org/mailman/listinfo/speex-dev
>-------------- next part --------------
An HTML attachment was scrubbed...
URL:
http://lists.xiph.org/pipermail/speex-dev/attachments/20091026/d08c6510/attachment-0001.htm

Reasonably Related Threads

Search for more apparently analagous threads

Speex dev - Oct 2009 - [PATCH] Fix miscompile of SSE resampler

[Speex-dev] [PATCH] Fix miscompile of SSE resampler

[Speex-dev] [PATCH] Fix miscompile of SSE resampler

Reasonably Related Threads