search for: sse_load_si128

Displaying 6 results from an estimated 6 matches for "sse_load_si128".

2020 May 18
6
[PATCH] SSE2/SSSE3 optimized version of get_checksum1() for x86-64
...e CFLAGS="-mssse3 -O2" + */ + +#ifdef __x86_64__ +#ifdef __SSE2__ + +#include "rsync.h" + +#ifdef __SSSE3__ +#include <immintrin.h> +#else +#include <tmmintrin.h> +#endif + +/* Compatibility functions to let our SSSE3 algorithm run on SSE2 */ + +static inline __m128i sse_load_si128(void const* buf) { +#ifdef __SSSE3__ + return _mm_lddqu_si128(buf); // same as loadu on all but the oldest SSSE3 CPUs +#else + return _mm_loadu_si128(buf); +#endif +} + +#ifndef __SSSE3__ +static inline __m128i sse_interleave_odd_epi16(__m128i a, __m128i b) { + return _mm_packs_epi32( +...
2020 May 20
0
[PATCHv2] SSE2/SSSE3 optimized version of get_checksum1() for x86-64
...ifdef __cplusplus > + > +#include "rsync.h" > + > +#ifdef ENABLE_SSE2 > + > +#include <immintrin.h> > + > +/* Compatibility functions to let our SSSE3 algorithm run on SSE2 */ > + > +__attribute__ ((target ("sse2"))) static inline __m128i > sse_load_si128(__m128i_u* buf) { > + return _mm_loadu_si128(buf); > +} > + > +__attribute__ ((target ("ssse3"))) static inline __m128i > sse_load_si128(__m128i_u* buf) { > + return _mm_lddqu_si128(buf); // same as loadu on all but the > oldest SSSE3 CPUs > +} > + > +...
2020 May 19
5
[PATCHv2] SSE2/SSSE3 optimized version of get_checksum1() for x86-64
...ction of CPU capabilities. + */ + +#ifdef __x86_64__ +#ifdef __cplusplus + +#include "rsync.h" + +#ifdef ENABLE_SSE2 + +#include <immintrin.h> + +/* Compatibility functions to let our SSSE3 algorithm run on SSE2 */ + +__attribute__ ((target ("sse2"))) static inline __m128i sse_load_si128(__m128i_u* buf) { + return _mm_loadu_si128(buf); +} + +__attribute__ ((target ("ssse3"))) static inline __m128i sse_load_si128(__m128i_u* buf) { + return _mm_lddqu_si128(buf); // same as loadu on all but the oldest SSSE3 CPUs +} + +__attribute__ ((target ("sse2"))) static...
2020 May 18
0
[PATCH] SSE2/SSSE3 optimized version of get_checksum1() for x86-64
...#ifdef __SSE2__ > + > +#include "rsync.h" > + > +#ifdef __SSSE3__ > +#include <immintrin.h> > +#else > +#include <tmmintrin.h> > +#endif > + > +/* Compatibility functions to let our SSSE3 algorithm run on SSE2 */ > + > +static inline __m128i sse_load_si128(void const* buf) { > +#ifdef __SSSE3__ > + return _mm_lddqu_si128(buf); // same as loadu on all but the > oldest SSSE3 CPUs > +#else > + return _mm_loadu_si128(buf); > +#endif > +} > + > +#ifndef __SSSE3__ > +static inline __m128i sse_interleave_odd_epi16(__m128...
2020 May 18
3
[PATCH] SSE2/SSSE3 optimized version of get_checksum1() for x86-64
What do you base this on? Per https://gcc.gnu.org/onlinedocs/gcc/x86-Options.html : "For the x86-32 compiler, you must use -march=cpu-type, -msse or -msse2 switches to enable SSE extensions and make this option effective. For the x86-64 compiler, these extensions are enabled by default." That reads to me like we're fine for SSE2. As stated in my comments, SSSE3 support must be
2020 May 18
2
[PATCH] SSE2/SSSE3 optimized version of get_checksum1() for x86-64
...uot;rsync.h" >> + >> +#ifdef __SSSE3__ >> +#include <immintrin.h> >> +#else >> +#include <tmmintrin.h> >> +#endif >> + >> +/* Compatibility functions to let our SSSE3 algorithm run on SSE2 */ >> + >> +static inline __m128i sse_load_si128(void const* buf) { >> +#ifdef __SSSE3__ >> + return _mm_lddqu_si128(buf); // same as loadu on all but the >> oldest SSSE3 CPUs >> +#else >> + return _mm_loadu_si128(buf); >> +#endif >> +} >> + >> +#ifndef __SSSE3__ >> +static inline...