Displaying 4 results from an estimated 4 matches for "inner_product_double".
2009 Oct 26
1
[PATCH] Fix miscompile of SSE resampler
...e(&sum, sinc, iptr, N);
#endif
out[out_stride * out_sample++] = SATURATE32(PSHR32(sum, 15), 32767);
@@ -412,7 +412,7 @@ static int resampler_basic_direct_double(SpeexResamplerState *st, spx_uint32_t c
}
sum = accum[0] + accum[1] + accum[2] + accum[3];
#else
- sum = inner_product_double(sinc, iptr, N);
+ inner_product_double(&sum, sinc, iptr, N);
#endif
out[out_stride * out_sample++] = PSHR32(sum, 15);
@@ -472,7 +472,7 @@ static int resampler_basic_interpolate_single(SpeexResamplerState *st, spx_uint3
sum = MULT16_32_Q15(interp[0],SHR32(accum[0], 1)) + MU...
2008 Nov 26
1
SSE2 code won't compile in VC
...th
_USE_SSE2 defined because it refuses to cast __m128 to __m128d and vice
versa. While there are intrinsics to do the casts, I thought it would be
simpler to just use an intrinsic that accomplishes the same thing
without all the casting. Thanks,
--John
@@ -91,7 +91,7 @@ static inline double inner_product_double(const float
*a, const float *b, unsign
sum = _mm_add_pd(sum, _mm_cvtps_pd(t));
sum = _mm_add_pd(sum, _mm_cvtps_pd(_mm_movehl_ps(t, t)));
}
- sum = _mm_add_sd(sum, (__m128d) _mm_movehl_ps((__m128) sum, (__m128)
sum));
+ sum = _mm_add_sd(sum, _mm_unpackhi_pd(sum, sum));
_m...
2008 May 03
2
Resampler (no api)
...m_loadu_ps(b+(i+1)*oversample)));
+ }
+ sum = _mm_mul_ps(f, sum);
+ sum = _mm_add_ps(sum, _mm_movehl_ps(sum, sum));
+ sum = _mm_add_ss(sum, _mm_shuffle_ps(sum, sum, 0x55));
+ _mm_store_ss(&ret, sum);
+ return ret;
+}
+
+#ifdef _USE_SSE2
+#include <emmintrin.h>
+#define OVERRIDE_INNER_PRODUCT_DOUBLE
+
+static inline double inner_product_double(const float *a, const float *b, unsigned int len)
+{
+ int i;
+ double ret;
+ __m128d sum = _mm_setzero_pd();
+ __m128 t;
+ for (i=0;i<len;i+=8)
+ {
+ t = _mm_mul_ps(_mm_loadu_ps(a+i), _mm_loadu_ps(b+i));
+ sum = _mm_add_pd(sum,...
2008 May 03
0
Resampler, memory only variant
...m_loadu_ps(b+(i+1)*oversample)));
+ }
+ sum = _mm_mul_ps(f, sum);
+ sum = _mm_add_ps(sum, _mm_movehl_ps(sum, sum));
+ sum = _mm_add_ss(sum, _mm_shuffle_ps(sum, sum, 0x55));
+ _mm_store_ss(&ret, sum);
+ return ret;
+}
+
+#ifdef _USE_SSE2
+#include <emmintrin.h>
+#define OVERRIDE_INNER_PRODUCT_DOUBLE
+
+static inline double inner_product_double(const float *a, const float *b, unsigned int len)
+{
+ int i;
+ double ret;
+ __m128d sum = _mm_setzero_pd();
+ __m128 t;
+ for (i=0;i<len;i+=8)
+ {
+ t = _mm_mul_ps(_mm_loadu_ps(a+i), _mm_loadu_ps(b+i));
+ sum = _mm_add_pd(sum,...