Displaying 3 results from an estimated 3 matches for "_mm_setzero_pd".
Did you mean:
_mm_setzero_ps
2009 Oct 26
1
[PATCH] Fix miscompile of SSE resampler
...mintrin.h>
#define OVERRIDE_INNER_PRODUCT_DOUBLE
-static inline double inner_product_double(const float *a, const float *b, unsigned int len)
+static inline void inner_product_double(double *ret, const float *a, const float *b, unsigned int len)
{
int i;
- double ret;
__m128d sum = _mm_setzero_pd();
__m128 t;
for (i=0;i<len;i+=8)
@@ -92,14 +87,12 @@ static inline double inner_product_double(const float *a, const float *b, unsign
sum = _mm_add_pd(sum, _mm_cvtps_pd(_mm_movehl_ps(t, t)));
}
sum = _mm_add_sd(sum, _mm_unpackhi_pd(sum, sum));
- _mm_store_sd(&ret, s...
2008 May 03
2
Resampler (no api)
...m, sum, 0x55));
+ _mm_store_ss(&ret, sum);
+ return ret;
+}
+
+#ifdef _USE_SSE2
+#include <emmintrin.h>
+#define OVERRIDE_INNER_PRODUCT_DOUBLE
+
+static inline double inner_product_double(const float *a, const float *b, unsigned int len)
+{
+ int i;
+ double ret;
+ __m128d sum = _mm_setzero_pd();
+ __m128 t;
+ for (i=0;i<len;i+=8)
+ {
+ t = _mm_mul_ps(_mm_loadu_ps(a+i), _mm_loadu_ps(b+i));
+ sum = _mm_add_pd(sum, _mm_cvtps_pd(t));
+ sum = _mm_add_pd(sum, _mm_cvtps_pd(_mm_movehl_ps(t, t)));
+
+ t = _mm_mul_ps(_mm_loadu_ps(a+i+4), _mm_loadu_ps(b+i+4));
+ s...
2008 May 03
0
Resampler, memory only variant
...m, sum, 0x55));
+ _mm_store_ss(&ret, sum);
+ return ret;
+}
+
+#ifdef _USE_SSE2
+#include <emmintrin.h>
+#define OVERRIDE_INNER_PRODUCT_DOUBLE
+
+static inline double inner_product_double(const float *a, const float *b, unsigned int len)
+{
+ int i;
+ double ret;
+ __m128d sum = _mm_setzero_pd();
+ __m128 t;
+ for (i=0;i<len;i+=8)
+ {
+ t = _mm_mul_ps(_mm_loadu_ps(a+i), _mm_loadu_ps(b+i));
+ sum = _mm_add_pd(sum, _mm_cvtps_pd(t));
+ sum = _mm_add_pd(sum, _mm_cvtps_pd(_mm_movehl_ps(t, t)));
+
+ t = _mm_mul_ps(_mm_loadu_ps(a+i+4), _mm_loadu_ps(b+i+4));
+ s...