Displaying 2 results from an estimated 2 matches for "wordmask".
Did you mean:
wordmark
2013 Jun 30
0
[PATCH v2] nv50: H.264/MPEG2 decoding support via VP2, available on NV84-NV96, NVA0
...that there are a lot of 0's, and things go
+ * a lot faster if one skips over them.
+ */
+
+#ifdef PIPE_ARCH_SSE
+/* This returns a 16-bit bit-mask, each 2 bits are both 1 or both 0, depending
+ * on whether the corresponding (16-bit) word in blocks is zero or non-zero. */
+#define wordmask(blocks, zero) \
+ (uint64_t)(_mm_movemask_epi8( \
+ _mm_cmpeq_epi16( \
+ zero, _mm_load_si128((__m128i *)(blocks)))))
+
+ __m128i zero = _mm_setzero_si128();
+
+ /* TODO: Look into doing the inverse quantization in terms of SSE
+...
2013 Jun 27
4
[PATCH] nv50: H.264/MPEG2 decoding support via VP2, available on NV84-NV96, NVA0
...block_index++) {
+ if ((macrob->coded_block_pattern & mask) == 0)
+ continue;
+
+ count = 0;
+
+ /*
+ * The observation here is that there are a lot of 0's, and things go
+ * a lot faster if one skips over them.
+ */
+
+#ifdef __SSE4_2__
+#define wordmask(blocks, zero) \
+ _mm_cmpestrm(_mm_load_si128((__m128i *)(blocks)), 8, zero, 8, 0x19)
+
+ __m128i zero = _mm_setzero_si128();
+
+
+ __m128i tmask1, tmask2, tmask3, tmask4, tmask5, tmask6, tmask7, tmask8,
+ tmask9, tmask10, tmask11, tmask12;
+ /*
+ * Shuffle all the...