Stefano Stabellini
2012-Jan-31 16:11 UTC
[PATCH v4 0/6] prevent QEMU from waking up needlessly
Hi all, this small patch series prevents QEMU from waking up needlessly on Xen several times a second in order to check some timers. The first patch stops QEMU from emulating the PIT on Xen, the second patch disables the rtc_clock entirely. The third patch makes use of a new mechanism to receive buffered io event notifications from Xen, so that QEMU doesn''t need to check the buffered io page for data 10 times a sec for the entire life of the VM. The fourth patch fixes win32_rearm_timer and mm_rearm_timer that risk an overflow if INT64_MAX is passed as delta. The fifth patch changes qemu_next_alarm_deadline only to check the expire time of a clock if it is enabled. Finally the last patch increases the default select timeout to 1h: nothing should rely on the select timeout to be 1sec, so we might as well increase it to 1h. Changes in v4: - do not initialize pcspk on xen; - disable rtc_clock only when it points to the host_clock (the default); - make sure it compiles on older xen versions. Changes in v3: - added a new patch to fix win32_rearm_timer and mm_rearm_timer, that risk an overflow if INT64_MAX is passed as delta. Shortlog and diffstat follow: Stefano Stabellini (6): xen: do not initialize the interval timer and PCSPK emulator xen: disable rtc_clock xen: introduce an event channel for buffered io event notifications timers: the rearm function should be able to handle delta = INT64_MAX qemu_next_alarm_deadline: check the expire time of a clock only if it is enabled qemu_calculate_timeout: increase minimum timeout to 1h hw/pc.c | 9 ++++++--- qemu-timer.c | 30 ++++++++++++++++++------------ xen-all.c | 49 +++++++++++++++++++++++++++++++++++++++++++------ 3 files changed, 67 insertions(+), 21 deletions(-) A git tree, based on fd39941ac78fbe969e292eeb91415ec548bd97a6, is available here: git://xenbits.xen.org/people/sstabellini/qemu-dm.git timers-4 Cheers, Stefano
Stefano Stabellini
2012-Jan-31 16:12 UTC
[PATCH v4 1/6] xen: do not initialize the interval timer and PCSPK emulator
PIT and PCSPK are emulated by the hypervisor so we don''t need to emulate them in Qemu: this patch prevents Qemu from waking up needlessly at PIT_FREQ on Xen. Signed-off-by: Stefano Stabellini <stefano.stabellini@eu.citrix.com> --- hw/pc.c | 9 ++++++--- 1 files changed, 6 insertions(+), 3 deletions(-) diff --git a/hw/pc.c b/hw/pc.c index 31608d3..18abee0 100644 --- a/hw/pc.c +++ b/hw/pc.c @@ -44,6 +44,7 @@ #include "ui/qemu-spice.h" #include "memory.h" #include "exec-memory.h" +#include "arch_init.h" /* output Bochs bios info messages */ //#define DEBUG_BIOS @@ -1137,7 +1138,7 @@ void pc_basic_device_init(ISABus *isa_bus, qemu_irq *gsi, DriveInfo *fd[MAX_FD]; qemu_irq rtc_irq = NULL; qemu_irq *a20_line; - ISADevice *i8042, *port92, *vmmouse, *pit; + ISADevice *i8042, *port92, *vmmouse, *pit = NULL; qemu_irq *cpu_exit_irq; register_ioport_write(0x80, 1, 1, ioport80_write, NULL); @@ -1158,8 +1159,10 @@ void pc_basic_device_init(ISABus *isa_bus, qemu_irq *gsi, qemu_register_boot_set(pc_boot_set, *rtc_state); - pit = pit_init(isa_bus, 0x40, 0); - pcspk_init(pit); + if (!xen_available()) { + pit = pit_init(isa_bus, 0x40, 0); + pcspk_init(pit); + } for(i = 0; i < MAX_SERIAL_PORTS; i++) { if (serial_hds[i]) { -- 1.7.2.5
rtc_clock is only used by the RTC emulator (mc146818rtc.c), however Xen has its own RTC emulator in the hypervisor so we can disable it. Signed-off-by: Stefano Stabellini <stefano.stabellini@eu.citrix.com> --- xen-all.c | 4 ++++ 1 files changed, 4 insertions(+), 0 deletions(-) diff --git a/xen-all.c b/xen-all.c index fd39168..101c962 100644 --- a/xen-all.c +++ b/xen-all.c @@ -514,6 +514,10 @@ void xen_vcpu_init(void) qemu_register_reset(xen_reset_vcpu, first_cpu); xen_reset_vcpu(first_cpu); } + /* if rtc_clock is left to default (host_clock), disable it */ + if (rtc_clock == host_clock) { + qemu_clock_enable(rtc_clock, false); + } } /* get the ioreq packets from share mem */ -- 1.7.2.5
Stefano Stabellini
2012-Jan-31 16:12 UTC
[PATCH v4 3/6] xen: introduce an event channel for buffered io event notifications
Use the newly introduced HVM_PARAM_BUFIOREQ_EVTCHN to receive notifications for buffered io events. After the first notification is received leave the event channel masked and setup a timer to process the rest of the batch. Once we have completed processing the batch, unmask the event channel and delete the timer. Signed-off-by: Stefano Stabellini <stefano.stabellini@eu.citrix.com> --- xen-all.c | 45 +++++++++++++++++++++++++++++++++++++++------ 1 files changed, 39 insertions(+), 6 deletions(-) diff --git a/xen-all.c b/xen-all.c index 101c962..0ce8002 100644 --- a/xen-all.c +++ b/xen-all.c @@ -59,6 +59,9 @@ static inline ioreq_t *xen_vcpu_ioreq(shared_iopage_t *shared_page, int vcpu) } # define FMT_ioreq_size "u" #endif +#ifndef HVM_PARAM_BUFIOREQ_EVTCHN +#define HVM_PARAM_BUFIOREQ_EVTCHN 26 +#endif #define BUFFER_IO_MAX_DELAY 100 @@ -77,6 +80,8 @@ typedef struct XenIOState { QEMUTimer *buffered_io_timer; /* the evtchn port for polling the notification, */ evtchn_port_t *ioreq_local_port; + /* evtchn local port for buffered io */ + evtchn_port_t bufioreq_local_port; /* the evtchn fd for polling */ XenEvtchn xce_handle; /* which vcpu we are serving */ @@ -549,6 +554,12 @@ static ioreq_t *cpu_get_ioreq(XenIOState *state) evtchn_port_t port; port = xc_evtchn_pending(state->xce_handle); + if (port == state->bufioreq_local_port) { + qemu_mod_timer(state->buffered_io_timer, + BUFFER_IO_MAX_DELAY + qemu_get_clock_ms(rt_clock)); + return NULL; + } + if (port != -1) { for (i = 0; i < smp_cpus; i++) { if (state->ioreq_local_port[i] == port) { @@ -697,16 +708,18 @@ static void handle_ioreq(ioreq_t *req) } } -static void handle_buffered_iopage(XenIOState *state) +static int handle_buffered_iopage(XenIOState *state) { buf_ioreq_t *buf_req = NULL; ioreq_t req; int qw; if (!state->buffered_io_page) { - return; + return 0; } + memset(&req, 0x00, sizeof(req)); + while (state->buffered_io_page->read_pointer != state->buffered_io_page->write_pointer) { buf_req = &state->buffered_io_page->buf_ioreq[ state->buffered_io_page->read_pointer % IOREQ_BUFFER_SLOT_NUM]; @@ -731,15 +744,21 @@ static void handle_buffered_iopage(XenIOState *state) xen_mb(); state->buffered_io_page->read_pointer += qw ? 2 : 1; } + + return req.count; } static void handle_buffered_io(void *opaque) { XenIOState *state = opaque; - handle_buffered_iopage(state); - qemu_mod_timer(state->buffered_io_timer, - BUFFER_IO_MAX_DELAY + qemu_get_clock_ms(rt_clock)); + if (handle_buffered_iopage(state)) { + qemu_mod_timer(state->buffered_io_timer, + BUFFER_IO_MAX_DELAY + qemu_get_clock_ms(rt_clock)); + } else { + qemu_del_timer(state->buffered_io_timer); + xc_evtchn_unmask(state->xce_handle, state->bufioreq_local_port); + } } static void cpu_handle_ioreq(void *opaque) @@ -869,7 +888,6 @@ static void xen_main_loop_prepare(XenIOState *state) state->buffered_io_timer = qemu_new_timer_ms(rt_clock, handle_buffered_io, state); - qemu_mod_timer(state->buffered_io_timer, qemu_get_clock_ms(rt_clock)); if (evtchn_fd != -1) { qemu_set_fd_handler(evtchn_fd, cpu_handle_ioreq, NULL, state); @@ -921,6 +939,7 @@ int xen_hvm_init(void) { int i, rc; unsigned long ioreq_pfn; + unsigned long bufioreq_evtchn; XenIOState *state; state = g_malloc0(sizeof (XenIOState)); @@ -970,6 +989,20 @@ int xen_hvm_init(void) state->ioreq_local_port[i] = rc; } + rc = xc_get_hvm_param(xen_xc, xen_domid, HVM_PARAM_BUFIOREQ_EVTCHN, + &bufioreq_evtchn); + if (rc < 0) { + fprintf(stderr, "failed to get HVM_PARAM_BUFIOREQ_EVTCHN\n"); + return -1; + } + rc = xc_evtchn_bind_interdomain(state->xce_handle, xen_domid, + (uint32_t)bufioreq_evtchn); + if (rc == -1) { + fprintf(stderr, "bind interdomain ioctl error %d\n", errno); + return -1; + } + state->bufioreq_local_port = rc; + /* Init RAM management */ xen_map_cache_init(); xen_ram_init(ram_size); -- 1.7.2.5
Stefano Stabellini
2012-Jan-31 16:12 UTC
[PATCH v4 4/6] timers: the rearm function should be able to handle delta = INT64_MAX
Fix win32_rearm_timer and mm_rearm_timer: they should be able to handle INT64_MAX as a delta parameter without overflowing. Also, the next deadline in ms should be calculated rounding down rather than up (see unix_rearm_timer and dynticks_rearm_timer). Finally ChangeTimerQueueTimer takes an unsigned long and timeSetEvent takes an unsigned int as delta, so cast the ms delta to the appropriate unsigned integer. Signed-off-by: Stefano Stabellini <stefano.stabellini@eu.citrix.com> --- qemu-timer.c | 18 +++++++++++++----- 1 files changed, 13 insertions(+), 5 deletions(-) diff --git a/qemu-timer.c b/qemu-timer.c index a22f27e..29410f1 100644 --- a/qemu-timer.c +++ b/qemu-timer.c @@ -696,13 +696,17 @@ static void mm_stop_timer(struct qemu_alarm_timer *t) static void mm_rearm_timer(struct qemu_alarm_timer *t, int64_t delta) { - int nearest_delta_ms = (delta + 999999) / 1000000; + int64_t nearest_delta_ms = delta / 1000000; if (nearest_delta_ms < 1) { nearest_delta_ms = 1; } + /* UINT_MAX can be 32 bit */ + if (nearest_delta_ms > UINT_MAX) { + nearest_delta_ms = UINT_MAX; + } timeKillEvent(mm_timer); - mm_timer = timeSetEvent(nearest_delta_ms, + mm_timer = timeSetEvent((unsigned int) nearest_delta_ms, mm_period, mm_alarm_handler, (DWORD_PTR)t, @@ -757,16 +761,20 @@ static void win32_rearm_timer(struct qemu_alarm_timer *t, int64_t nearest_delta_ns) { HANDLE hTimer = t->timer; - int nearest_delta_ms; + int64_t nearest_delta_ms; BOOLEAN success; - nearest_delta_ms = (nearest_delta_ns + 999999) / 1000000; + nearest_delta_ms = nearest_delta_ns / 1000000; if (nearest_delta_ms < 1) { nearest_delta_ms = 1; } + /* ULONG_MAX can be 32 bit */ + if (nearest_delta_ms > ULONG_MAX) { + nearest_delta_ms = ULONG_MAX; + } success = ChangeTimerQueueTimer(NULL, hTimer, - nearest_delta_ms, + (unsigned long) nearest_delta_ms, 3600000); if (!success) { -- 1.7.2.5
Stefano Stabellini
2012-Jan-31 16:12 UTC
[PATCH v4 5/6] qemu_next_alarm_deadline: check the expire time of a clock only if it is enabled
Also delta in qemu_next_alarm_deadline is a 64 bit value so set the default to INT64_MAX instead of INT32_MAX. Signed-off-by: Stefano Stabellini <stefano.stabellini@eu.citrix.com> --- qemu-timer.c | 10 ++++------ 1 files changed, 4 insertions(+), 6 deletions(-) diff --git a/qemu-timer.c b/qemu-timer.c index 29410f1..de20852 100644 --- a/qemu-timer.c +++ b/qemu-timer.c @@ -106,23 +106,21 @@ static inline int alarm_has_dynticks(struct qemu_alarm_timer *t) static int64_t qemu_next_alarm_deadline(void) { - int64_t delta; + int64_t delta = INT64_MAX; int64_t rtdelta; - if (!use_icount && vm_clock->active_timers) { + if (!use_icount && vm_clock->enabled && vm_clock->active_timers) { delta = vm_clock->active_timers->expire_time - qemu_get_clock_ns(vm_clock); - } else { - delta = INT32_MAX; } - if (host_clock->active_timers) { + if (host_clock->enabled && host_clock->active_timers) { int64_t hdelta = host_clock->active_timers->expire_time - qemu_get_clock_ns(host_clock); if (hdelta < delta) { delta = hdelta; } } - if (rt_clock->active_timers) { + if (rt_clock->enabled && rt_clock->active_timers) { rtdelta = (rt_clock->active_timers->expire_time - qemu_get_clock_ns(rt_clock)); if (rtdelta < delta) { -- 1.7.2.5
Stefano Stabellini
2012-Jan-31 16:12 UTC
[PATCH v4 6/6] qemu_calculate_timeout: increase minimum timeout to 1h
There is no reason why the minimum timeout should be 1sec, it could easily be 1h and we would save lots of cpu cycles. Signed-off-by: Stefano Stabellini <stefano.stabellini@eu.citrix.com> --- qemu-timer.c | 2 +- 1 files changed, 1 insertions(+), 1 deletions(-) diff --git a/qemu-timer.c b/qemu-timer.c index de20852..84b970e 100644 --- a/qemu-timer.c +++ b/qemu-timer.c @@ -823,6 +823,6 @@ fail: int qemu_calculate_timeout(void) { - return 1000; + return 1000*60*60; } -- 1.7.2.5