Displaying 7 results from an estimated 7 matches for "_mm_set1_epi16".
2018 Aug 22
7
[7.0.0 Release] rc2 has been tagged
Dear testers,
7.0.0-rc2 was just tagged (from branch revision r340437).
There have been a bunch of merges since rc1, and hopefully many of the
issues with the previous candidate are fixed in this one.
Please run the test script, share the results, and upload binaries.
I will publish source tarballs, docs, and binaries on the web page
once they're ready.
Thanks,
Hans
2020 May 18
6
[PATCH] SSE2/SSSE3 optimized version of get_checksum1() for x86-64
...);
+}
+
+static inline __m128i sse_mulu_odd_epi8(__m128i a, __m128i b) {
+ return _mm_mullo_epi16(
+ _mm_srli_epi16(a, 8),
+ _mm_srai_epi16(b, 8)
+ );
+}
+
+static inline __m128i sse_mulu_even_epi8(__m128i a, __m128i b) {
+ return _mm_mullo_epi16(
+ _mm_and_si128(a, _mm_set1_epi16(0xFF)),
+ _mm_srai_epi16(_mm_slli_si128(b, 1), 8)
+ );
+}
+#endif
+
+static inline __m128i sse_hadds_epi16(__m128i a, __m128i b) {
+#ifdef __SSSE3__
+ return _mm_hadds_epi16(a, b);
+#else
+ return _mm_adds_epi16(
+ sse_interleave_even_epi16(a, b),
+ sse_interleave_odd_...
2020 May 18
0
[PATCH] SSE2/SSSE3 optimized version of get_checksum1() for x86-64
...d_epi8(__m128i a, __m128i b) {
> + return _mm_mullo_epi16(
> + _mm_srli_epi16(a, 8),
> + _mm_srai_epi16(b, 8)
> + );
> +}
> +
> +static inline __m128i sse_mulu_even_epi8(__m128i a, __m128i b) {
> + return _mm_mullo_epi16(
> + _mm_and_si128(a, _mm_set1_epi16(0xFF)),
> + _mm_srai_epi16(_mm_slli_si128(b, 1), 8)
> + );
> +}
> +#endif
> +
> +static inline __m128i sse_hadds_epi16(__m128i a, __m128i b) {
> +#ifdef __SSSE3__
> + return _mm_hadds_epi16(a, b);
> +#else
> + return _mm_adds_epi16(
> + sse_in...
2020 May 19
5
[PATCHv2] SSE2/SSSE3 optimized version of get_checksum1() for x86-64
...dd_epi8(__m128i a, __m128i b) {
+ return _mm_mullo_epi16(
+ _mm_srli_epi16(a, 8),
+ _mm_srai_epi16(b, 8)
+ );
+}
+
+__attribute__ ((target ("sse2"))) static inline __m128i
sse_mulu_even_epi8(__m128i a, __m128i b) {
+ return _mm_mullo_epi16(
+ _mm_and_si128(a, _mm_set1_epi16(0xFF)),
+ _mm_srai_epi16(_mm_slli_si128(b, 1), 8)
+ );
+}
+
+__attribute__ ((target ("sse2"))) static inline __m128i
sse_hadds_epi16(__m128i a, __m128i b) {
+ return _mm_adds_epi16(
+ sse_interleave_even_epi16(a, b),
+ sse_interleave_odd_epi16(a, b)
+ );
+}
+...
2020 May 18
2
[PATCH] SSE2/SSSE3 optimized version of get_checksum1() for x86-64
...gt; + return _mm_mullo_epi16(
>> + _mm_srli_epi16(a, 8),
>> + _mm_srai_epi16(b, 8)
>> + );
>> +}
>> +
>> +static inline __m128i sse_mulu_even_epi8(__m128i a, __m128i b) {
>> + return _mm_mullo_epi16(
>> + _mm_and_si128(a, _mm_set1_epi16(0xFF)),
>> + _mm_srai_epi16(_mm_slli_si128(b, 1), 8)
>> + );
>> +}
>> +#endif
>> +
>> +static inline __m128i sse_hadds_epi16(__m128i a, __m128i b) {
>> +#ifdef __SSSE3__
>> + return _mm_hadds_epi16(a, b);
>> +#else
>> + ret...
2020 May 20
0
[PATCHv2] SSE2/SSSE3 optimized version of get_checksum1() for x86-64
...mm_mullo_epi16(
> + _mm_srli_epi16(a, 8),
> + _mm_srai_epi16(b, 8)
> + );
> +}
> +
> +__attribute__ ((target ("sse2"))) static inline __m128i
> sse_mulu_even_epi8(__m128i a, __m128i b) {
> + return _mm_mullo_epi16(
> + _mm_and_si128(a, _mm_set1_epi16(0xFF)),
> + _mm_srai_epi16(_mm_slli_si128(b, 1), 8)
> + );
> +}
> +
> +__attribute__ ((target ("sse2"))) static inline __m128i
> sse_hadds_epi16(__m128i a, __m128i b) {
> + return _mm_adds_epi16(
> + sse_interleave_even_epi16(a, b),
> +...
2020 May 18
3
[PATCH] SSE2/SSSE3 optimized version of get_checksum1() for x86-64
What do you base this on?
Per https://gcc.gnu.org/onlinedocs/gcc/x86-Options.html :
"For the x86-32 compiler, you must use -march=cpu-type, -msse or
-msse2 switches to enable SSE extensions and make this option
effective. For the x86-64 compiler, these extensions are enabled by
default."
That reads to me like we're fine for SSE2. As stated in my comments,
SSSE3 support must be