Displaying 4 results from an estimated 4 matches for "_mm_store_sd".
Did you mean:
_mm_store_ss
2009 Oct 26
1
[PATCH] Fix miscompile of SSE resampler
...28d sum = _mm_setzero_pd();
__m128 t;
for (i=0;i<len;i+=8)
@@ -92,14 +87,12 @@ static inline double inner_product_double(const float *a, const float *b, unsign
sum = _mm_add_pd(sum, _mm_cvtps_pd(_mm_movehl_ps(t, t)));
}
sum = _mm_add_sd(sum, _mm_unpackhi_pd(sum, sum));
- _mm_store_sd(&ret, sum);
- return ret;
+ _mm_store_sd(ret, sum);
}
#define OVERRIDE_INTERPOLATE_PRODUCT_DOUBLE
-static inline double interpolate_product_double(const float *a, const float *b, unsigned int len, const spx_uint32_t oversample, float *frac) {
+static inline void interpolate_product_doub...
2008 Nov 26
1
SSE2 code won't compile in VC
...le(const float
*a, const float *b, unsign
sum = _mm_add_pd(sum, _mm_cvtps_pd(t));
sum = _mm_add_pd(sum, _mm_cvtps_pd(_mm_movehl_ps(t, t)));
}
- sum = _mm_add_sd(sum, (__m128d) _mm_movehl_ps((__m128) sum, (__m128)
sum));
+ sum = _mm_add_sd(sum, _mm_unpackhi_pd(sum, sum));
_mm_store_sd(&ret, sum);
return ret;
}
@@ -120,7 +120,7 @@ static inline double
interpolate_product_double(const float *a, const float *b,
sum1 = _mm_mul_pd(f1, sum1);
sum2 = _mm_mul_pd(f2, sum2);
sum = _mm_add_pd(sum1, sum2);
- sum = _mm_add_sd(sum, (__m128d) _mm_movehl_ps((__m128) sum, (__...
2008 May 03
2
Resampler (no api)
...vtps_pd(_mm_movehl_ps(t, t)));
+
+ t = _mm_mul_ps(_mm_loadu_ps(a+i+4), _mm_loadu_ps(b+i+4));
+ sum = _mm_add_pd(sum, _mm_cvtps_pd(t));
+ sum = _mm_add_pd(sum, _mm_cvtps_pd(_mm_movehl_ps(t, t)));
+ }
+ sum = _mm_add_sd(sum, (__m128d) _mm_movehl_ps((__m128) sum, (__m128) sum));
+ _mm_store_sd(&ret, sum);
+ return ret;
+}
+
+#define OVERRIDE_INTERPOLATE_PRODUCT_DOUBLE
+static inline double interpolate_product_double(const float *a, const float *b, unsigned int len, const spx_uint32_t oversample, float *frac) {
+ int i;
+ double ret;
+ __m128d sum;
+ __m128d sum1 = _mm_setzero_p...
2008 May 03
0
Resampler, memory only variant
...vtps_pd(_mm_movehl_ps(t, t)));
+
+ t = _mm_mul_ps(_mm_loadu_ps(a+i+4), _mm_loadu_ps(b+i+4));
+ sum = _mm_add_pd(sum, _mm_cvtps_pd(t));
+ sum = _mm_add_pd(sum, _mm_cvtps_pd(_mm_movehl_ps(t, t)));
+ }
+ sum = _mm_add_sd(sum, (__m128d) _mm_movehl_ps((__m128) sum, (__m128) sum));
+ _mm_store_sd(&ret, sum);
+ return ret;
+}
+
+#define OVERRIDE_INTERPOLATE_PRODUCT_DOUBLE
+static inline double interpolate_product_double(const float *a, const float *b, unsigned int len, const spx_uint32_t oversample, float *frac) {
+ int i;
+ double ret;
+ __m128d sum;
+ __m128d sum1 = _mm_setzero_p...