Add a shadow VRAM to track changes to the real VRAM. When the guest
OS was given write access to the VRAM the device model tracked all
VRAM changes by updating the entire screen on every output loop,
causing significant overhead (a CPU bound loop in a guest slows down
by about 35%) and significant mouse latency (VNC uses the same data
path for mouse events and video updates). With the shadow VRAM only
modified pages need to be updated and the comparison of the shadow
VRAM to the real VRAM only adds ~4% overhead while eliminating the
mouse latencies.
Signed-off-by: Don Dugger <donald.d.dugger@intel.com>
--
Don Dugger
"Censeo Toto nos in Kansa esse decisse." - D. Gale
Donald.D.Dugger@intel.com
Ph: (303)440-1368
diff -r 7d8efd4f1ac7 tools/ioemu/configure
--- a/tools/ioemu/configure Tue Mar 14 15:18:35 2006 +0100
+++ b/tools/ioemu/configure Tue Mar 14 15:47:32 2006 -0700
@@ -160,6 +160,8 @@ for opt do
;;
--fmod-inc=*) fmod_inc=${opt#--fmod-inc=}
;;
+ --disable-sse2) have_sse2="no"
+ ;;
--disable-vnc) vnc="no"
;;
--enable-mingw32) mingw32="yes" ;
cross_prefix="i386-mingw32-"
@@ -225,6 +227,44 @@ if $cc -fno-reorder-blocks -fno-optimize
have_gcc3_options="yes"
fi
+# SSE2 - check if supported
+cat > $TMPC <<EOF
+#include <signal.h>
+#include <emmintrin.h>
+static inline unsigned int cpuid_edx(unsigned int op)
+{
+ unsigned int eax, edx;
+
+ __asm__("cpuid"
+ : "=a" (eax), "=d" (edx)
+ : "0" (op)
+ : "bx", "cx");
+ return edx;
+}
+
+void intr(int sig)
+{
+ exit(1);
+}
+
+int main(void) {
+ if ((cpuid_edx(1) & 0x4000000) == 0)
+ exit(1);
+ signal(SIGILL, intr);
+ __asm__("xorps %xmm0,%xmm0\n");
+ exit(0);
+}
+EOF
+
+if test -z "$have_sse2" ; then
+ have_sse2="no"
+ if $cc -msse2 -o $TMPE $TMPC 2> /dev/null ; then
+ if $TMPE ; then
+ have_sse2="yes"
+ fi
+ fi
+fi
+
##########################################
# VNC probe
@@ -234,6 +274,29 @@ if libvncserver-config --version >& /dev
vnc=yes
else
vnc=no
+fi
+
+fi
+
+if test "$vnc" = "yes" ; then
+
+# check for eager event handling
+cat > $TMPC <<EOF
+#include "rfb/rfb.h"
+int main(void) {
+ rfbScreenInfoPtr screen;
+
+ screen->handleEventsEagerly = 1;
+}
+EOF
+
+if $cc `libvncserver-config --cflags` -o $TMPO $TMPC 2> /dev/null ; then
+ have_eager_events="yes"
+else
+ echo "!!"
+ echo "!! Slow VNC mouse, LibVNCServer doesn''t support eager
events"
+ echo "!!"
+ have_eager_events="no"
fi
fi
@@ -315,6 +378,7 @@ echo " --enable-fmod enable
echo " --enable-fmod enable FMOD audio output driver"
echo " --fmod-lib path to FMOD library"
echo " --fmod-inc path to FMOD includes"
+echo " --disable-sse2 disable use of SSE2 instructions"
echo ""
echo "NOTE: The object files are build at the place where configure is
launched"
exit 1
@@ -361,6 +425,7 @@ echo "VNC support $vnc"
echo "VNC support $vnc"
echo "SDL support $sdl"
echo "SDL static link $sdl_static"
+echo "SSE2 support $have_sse2"
echo "mingw32 support $mingw32"
echo "Adlib support $adlib"
echo -n "FMOD support $fmod"
@@ -392,6 +457,9 @@ echo "configdir=$configdir" >> $config_m
echo "configdir=$configdir" >> $config_mak
echo "LIBDIR=$libdir" >> $config_mak
echo "#define CONFIG_QEMU_SHAREDIR \"$datadir\"" >>
$config_h
+if test "$have_sse2" = "yes" ; then
+ echo "LOCAL_CFLAGS=-msse2" >>$config_mak
+fi
#echo "MAKE=$make" >> $config_mak
#echo "CC=$cc" >> $config_mak
#if test "$have_gcc3_options" = "yes" ; then
@@ -458,6 +526,10 @@ echo "SRC_PATH=$source_path" >> $config_
echo "SRC_PATH=$source_path" >> $config_mak
echo "TARGET_DIRS=$target_list" >> $config_mak
+if test "$have_sse2" = "yes" ; then
+ echo "#define USE_SSE2 1" >>$config_h
+fi
+
# XXX: suppress that
if [ "$bsd" = "yes" ] ; then
echo "#define O_LARGEFILE 0" >> $config_h
@@ -472,6 +544,9 @@ if test "$vnc" = "yes"; then
vnc_cflags="/usr/include"
fi
echo "VNC_CFLAGS=$vnc_cflags" >> $config_mak
+ if test "$have_eager_events" = "yes" ; then
+ echo "#define VNC_EAGER_EVENTS 1" >> $config_h
+ fi
fi
if test "$sdl" = "yes"; then
diff -r 7d8efd4f1ac7 tools/ioemu/hw/vga.c
--- a/tools/ioemu/hw/vga.c Tue Mar 14 15:18:35 2006 +0100
+++ b/tools/ioemu/hw/vga.c Tue Mar 14 15:47:32 2006 -0700
@@ -1340,6 +1340,100 @@ void vga_invalidate_scanlines(VGAState *
}
}
+extern inline int cmp_vram(VGAState *s, int offset, int n)
+{
+ long *vp, *sp;
+
+ if (s->vram_shadow == NULL)
+ return 1;
+ vp = (long *)(s->vram_ptr + offset);
+ sp = (long *)(s->vram_shadow + offset);
+ while ((n -= sizeof(*vp)) >= 0)
+ if (*vp++ != *sp++) {
+ memcpy(sp - 1, vp - 1, n + sizeof(*vp));
+ return 1;
+ }
+ return 0;
+}
+
+#ifdef USE_SSE2
+#include <signal.h>
+#include <setjmp.h>
+#include <emmintrin.h>
+
+int sse2_ok = 1;
+
+static inline unsigned int cpuid_edx(unsigned int op)
+{
+ unsigned int eax, edx;
+
+ __asm__("cpuid"
+ : "=a" (eax), "=d" (edx)
+ : "0" (op)
+ : "bx", "cx");
+ return edx;
+}
+
+jmp_buf sse_jbuf;
+
+void intr(int sig)
+{
+
+ sse2_ok = 0;
+ longjmp(sse_jbuf, 1);
+ return;
+}
+
+void check_sse2()
+{
+
+ if ((cpuid_edx(1) & 0x4000000) == 0) {
+ sse2_ok = 0;
+ return;
+ }
+ signal(SIGILL, intr);
+ if (setjmp(sse_jbuf) == 0)
+ __asm__("xorps %xmm0,%xmm0\n");
+ return;
+}
+
+int vram_dirty(VGAState *s, int offset, int n)
+{
+ __m128i *sp, *vp;
+
+ if (s->vram_shadow == NULL)
+ return 1;
+ if (sse2_ok == 0)
+ return cmp_vram(s, offset, n);
+ vp = (__m128i *)(s->vram_ptr + offset);
+ sp = (__m128i *)(s->vram_shadow + offset);
+ while ((n -= sizeof(*vp)) >= 0) {
+ if (_mm_movemask_epi8(_mm_cmpeq_epi8(*sp, *vp)) != 0xffff) {
+ while (n >= 0) {
+ _mm_store_si128(sp++, _mm_load_si128(vp++));
+ n -= sizeof(*vp);
+ }
+ return 1;
+ }
+ sp++;
+ vp++;
+ }
+ return 0;
+}
+#else // USE_SSE2
+int vram_dirty(VGAState *s, int offset, int n)
+{
+
+ return cmp_vram(s, offset, n);
+}
+
+void check_sse2()
+{
+
+ return;
+}
+#endif // USE_SSE2
+
/*
* graphic modes
*/
@@ -1434,6 +1528,9 @@ static void vga_draw_graphic(VGAState *s
printf("w=%d h=%d v=%d line_offset=%d cr[0x09]=0x%02x cr[0x17]=0x%02x
linecmp=%d sr[0x01]=0x%02x\n",
width, height, v, line_offset, s->cr[9], s->cr[0x17],
s->line_compare, s->sr[0x01]);
#endif
+ for (y = 0; y < s->vram_size; y += TARGET_PAGE_SIZE)
+ if (vram_dirty(s, y, TARGET_PAGE_SIZE))
+ cpu_physical_memory_set_dirty(s->vram_offset + y);
addr1 = (s->start_addr * 4);
bwidth = width * 4;
y_start = -1;
@@ -1536,8 +1633,17 @@ static void vga_draw_blank(VGAState *s,
void vga_update_display(void)
{
+ static int loop;
VGAState *s = vga_state;
int full_update, graphic_mode;
+
+ /*
+ * Only update the display every other time. The responsiveness is
+ * acceptable and it cuts down on the overhead of the VRAM compare
+ * in `vram_dirty''
+ */
+ if (loop++ & 1)
+ return;
if (s->ds->depth == 0) {
/* nothing to do */
@@ -1569,7 +1675,6 @@ void vga_update_display(void)
full_update = 1;
}
- full_update = 1;
switch(graphic_mode) {
case GMODE_TEXT:
vga_draw_text(s, full_update);
@@ -1874,7 +1979,10 @@ void vga_common_init(VGAState *s, Displa
#else
s->vram_ptr = qemu_malloc(vga_ram_size);
#endif
-
+ check_sse2();
+ if ((s->vram_shadow = qemu_malloc(vga_ram_size+TARGET_PAGE_SIZE+1)) ==
NULL)
+ fprintf(stderr, "Cannot allocate %d bytes for VRAM shadow, mouse will be
slow\n", vga_ram_size);
+ s->vram_shadow = (uint8_t *)((long)(s->vram_shadow + TARGET_PAGE_SIZE
- 1) & ~(TARGET_PAGE_SIZE - 1));
s->vram_offset = vga_ram_offset;
s->vram_size = vga_ram_size;
s->ds = ds;
diff -r 7d8efd4f1ac7 tools/ioemu/hw/vga_int.h
--- a/tools/ioemu/hw/vga_int.h Tue Mar 14 15:18:35 2006 +0100
+++ b/tools/ioemu/hw/vga_int.h Tue Mar 14 15:47:32 2006 -0700
@@ -76,6 +76,7 @@
#define VGA_STATE_COMMON \
uint8_t *vram_ptr; \
+ uint8_t *vram_shadow; \
unsigned long vram_offset; \
unsigned int vram_size; \
uint32_t latch; \
diff -r 7d8efd4f1ac7 tools/ioemu/target-i386-dm/Makefile
--- a/tools/ioemu/target-i386-dm/Makefile Tue Mar 14 15:18:35 2006 +0100
+++ b/tools/ioemu/target-i386-dm/Makefile Tue Mar 14 15:47:32 2006 -0700
@@ -13,7 +13,7 @@ VPATH+=:$(SRC_PATH)/linux-user
VPATH+=:$(SRC_PATH)/linux-user
DEFINES+=-I$(SRC_PATH)/linux-user -I$(SRC_PATH)/linux-user/$(TARGET_ARCH)
endif
-CFLAGS+=-g -fno-strict-aliasing
+CFLAGS+=-g -fno-strict-aliasing $(LOCAL_CFLAGS)
LDFLAGS=-g
LIBS HELPER_CFLAGS=$(CFLAGS)
_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xensource.com
http://lists.xensource.com/xen-devel