This is a slightly modified version of the original VGA patch that removes changes to the configure script to check for SSE2 capabilities. SSE2 is now only checked at run time. Signed-off-by: Don Dugger <donald.d.dugger@intel.com> -- Don Dugger "Censeo Toto nos in Kansa esse decisse." - D. Gale Donald.D.Dugger@intel.com Ph: (303)440-1368 diff -r c445d4a0dd76 tools/ioemu/hw/vga.c --- a/tools/ioemu/hw/vga.c Tue Mar 14 19:33:45 2006 +0100 +++ b/tools/ioemu/hw/vga.c Thu Mar 16 14:15:07 2006 -0700 @@ -21,6 +21,10 @@ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN * THE SOFTWARE. */ +#include <signal.h> +#include <setjmp.h> +#include <emmintrin.h> + #include "vl.h" #include "vga_int.h" @@ -149,6 +153,8 @@ static uint8_t expand4to8[16]; VGAState *vga_state; int vga_io_memory; + +int sse2_ok = 1; static uint32_t vga_ioport_read(void *opaque, uint32_t addr) { @@ -1340,6 +1346,80 @@ void vga_invalidate_scanlines(VGAState * } } +extern inline int cmp_vram(VGAState *s, int offset, int n) +{ + long *vp, *sp; + + if (s->vram_shadow == NULL) + return 1; + vp = (long *)(s->vram_ptr + offset); + sp = (long *)(s->vram_shadow + offset); + while ((n -= sizeof(*vp)) >= 0) + if (*vp++ != *sp++) { + memcpy(sp - 1, vp - 1, n + sizeof(*vp)); + return 1; + } + return 0; +} + +static inline unsigned int cpuid_edx(unsigned int op) +{ + unsigned int eax, edx; + + __asm__("cpuid" + : "=a" (eax), "=d" (edx) + : "0" (op) + : "bx", "cx"); + return edx; +} + +jmp_buf sse_jbuf; + +void intr(int sig) +{ + + sse2_ok = 0; + longjmp(sse_jbuf, 1); + return; +} + +void check_sse2(void) +{ + + if ((cpuid_edx(1) & 0x4000000) == 0) { + sse2_ok = 0; + return; + } + signal(SIGILL, intr); + if (setjmp(sse_jbuf) == 0) + __asm__("xorps %xmm0,%xmm0\n"); + return; +} + +int vram_dirty(VGAState *s, int offset, int n) +{ + __m128i *sp, *vp; + + if (s->vram_shadow == NULL) + return 1; + if (sse2_ok == 0) + return cmp_vram(s, offset, n); + vp = (__m128i *)(s->vram_ptr + offset); + sp = (__m128i *)(s->vram_shadow + offset); + while ((n -= sizeof(*vp)) >= 0) { + if (_mm_movemask_epi8(_mm_cmpeq_epi8(*sp, *vp)) != 0xffff) { + while (n >= 0) { + _mm_store_si128(sp++, _mm_load_si128(vp++)); + n -= sizeof(*vp); + } + return 1; + } + sp++; + vp++; + } + return 0; +} + /* * graphic modes */ @@ -1434,6 +1514,9 @@ static void vga_draw_graphic(VGAState *s printf("w=%d h=%d v=%d line_offset=%d cr[0x09]=0x%02x cr[0x17]=0x%02x linecmp=%d sr[0x01]=0x%02x\n", width, height, v, line_offset, s->cr[9], s->cr[0x17], s->line_compare, s->sr[0x01]); #endif + for (y = 0; y < s->vram_size; y += TARGET_PAGE_SIZE) + if (vram_dirty(s, y, TARGET_PAGE_SIZE)) + cpu_physical_memory_set_dirty(s->vram_offset + y); addr1 = (s->start_addr * 4); bwidth = width * 4; y_start = -1; @@ -1536,8 +1619,17 @@ static void vga_draw_blank(VGAState *s, void vga_update_display(void) { + static int loop; VGAState *s = vga_state; int full_update, graphic_mode; + + /* + * Only update the display every other time. The responsiveness is + * acceptable and it cuts down on the overhead of the VRAM compare + * in `vram_dirty'' + */ + if (loop++ & 1) + return; if (s->ds->depth == 0) { /* nothing to do */ @@ -1569,7 +1661,6 @@ void vga_update_display(void) full_update = 1; } - full_update = 1; switch(graphic_mode) { case GMODE_TEXT: vga_draw_text(s, full_update); @@ -1874,7 +1965,10 @@ void vga_common_init(VGAState *s, Displa #else s->vram_ptr = qemu_malloc(vga_ram_size); #endif - + check_sse2(); + if ((s->vram_shadow = qemu_malloc(vga_ram_size+TARGET_PAGE_SIZE+1)) == NULL) + fprintf(stderr, "Cannot allocate %d bytes for VRAM shadow, mouse will be slow\n", vga_ram_size); + s->vram_shadow = (uint8_t *)((long)(s->vram_shadow + TARGET_PAGE_SIZE - 1) & ~(TARGET_PAGE_SIZE - 1)); s->vram_offset = vga_ram_offset; s->vram_size = vga_ram_size; s->ds = ds; diff -r c445d4a0dd76 tools/ioemu/hw/vga_int.h --- a/tools/ioemu/hw/vga_int.h Tue Mar 14 19:33:45 2006 +0100 +++ b/tools/ioemu/hw/vga_int.h Thu Mar 16 14:15:07 2006 -0700 @@ -76,6 +76,7 @@ #define VGA_STATE_COMMON \ uint8_t *vram_ptr; \ + uint8_t *vram_shadow; \ unsigned long vram_offset; \ unsigned int vram_size; \ uint32_t latch; \ diff -r c445d4a0dd76 tools/ioemu/target-i386-dm/Makefile --- a/tools/ioemu/target-i386-dm/Makefile Tue Mar 14 19:33:45 2006 +0100 +++ b/tools/ioemu/target-i386-dm/Makefile Thu Mar 16 14:15:07 2006 -0700 @@ -13,7 +13,7 @@ VPATH+=:$(SRC_PATH)/linux-user VPATH+=:$(SRC_PATH)/linux-user DEFINES+=-I$(SRC_PATH)/linux-user -I$(SRC_PATH)/linux-user/$(TARGET_ARCH) endif -CFLAGS+=-g -fno-strict-aliasing +CFLAGS+=-g -fno-strict-aliasing -msse2 LDFLAGS=-g LIBS HELPER_CFLAGS=$(CFLAGS) _______________________________________________ Xen-devel mailing list Xen-devel@lists.xensource.com http://lists.xensource.com/xen-devel