Displaying 4 results from an estimated 4 matches for "punpckhbw".
2005 Aug 17
2
MMX loop filter for theora-exp
...t;lea (%1,%1,2),%%esi\n" /* esi = _ystride*3 */
+"movq (%0,%%esi),%%mm4\n" /* mm4 = _pix[0..8]+_ystride*3] */
+"movq %%mm7,%%mm6\n" /* mm6 = _pix[0..8] */
+"punpcklbw %%mm0,%%mm6\n" /* expand unsigned _pix[0..3] to 16 bits */
+"movq %%mm4,%%mm5\n"
+"punpckhbw %%mm0,%%mm7\n" /* expand unsigned _pix[4..8] to 16 bits */
+"punpcklbw %%mm0,%%mm4\n" /* expand other arrays too */
+"punpckhbw %%mm0,%%mm5\n"
+"psubw %%mm4,%%mm6\n" /* mm6 = mm6 - mm4 */
+"psubw %%mm5,%%mm7\n" /* mm7 = mm7 - mm5 */
+ /* mm7:mm6 = _p[0...
2004 Aug 24
5
MMX/mmxext optimisations
quite some speed improvement indeed.
attached the updated patch to apply to svn/trunk.
j
-------------- next part --------------
A non-text attachment was scrubbed...
Name: theora-mmx.patch.gz
Type: application/x-gzip
Size: 8648 bytes
Desc: not available
Url : http://lists.xiph.org/pipermail/theora-dev/attachments/20040824/5a5f2731/theora-mmx.patch-0001.bin
2005 Mar 23
3
[PATCH] promised MMX patches rc1
...(
+" movl $0x7, %%eax \n\t" /* 8x loop */
+" pxor %%mm0, %%mm0 \n\t" /* zero mm0 */
+" .balign 16 \n\t"
+"1: movq (%4), %%mm2 \n\t" /* load mm2 with _src */
+" movq %%mm2, %%mm3 \n\t" /* copy mm2 to mm3 */
+" punpckhbw %%mm0, %%mm2 \n\t" /* expand high part of _src to 16 bits */
+" punpcklbw %%mm0, %%mm3 \n\t" /* expand low part of _src to 16 bits */
+" paddsw (%1), %%mm3 \n\t" /* add low part with low part of residue */
+" paddsw 8(%1), %%mm2 \n\t" /* high with high */...
2005 Mar 23
0
[PATCH]
...(
+" movl $0x7, %%eax \n\t" /* 8x loop */
+" pxor %%mm0, %%mm0 \n\t" /* zero mm0 */
+" .balign 16 \n\t"
+"1: movq (%4), %%mm2 \n\t" /* load mm2 with _src */
+" movq %%mm2, %%mm3 \n\t" /* copy mm2 to mm3 */
+" punpckhbw %%mm0, %%mm2 \n\t" /* expand high part of _src to 16 bits */
+" punpcklbw %%mm0, %%mm3 \n\t" /* expand low part of _src to 16 bits */
+" paddsw (%1), %%mm3 \n\t" /* add low part with low part of residue */
+" paddsw 8(%1), %%mm2 \n\t" /* high with high */...