George Dunlap
2013-Mar-04 11:02 UTC
[PATCH v3 RESEND 1/2] xen, credit2: Avoid extra c2t calcuation in csched_runtime
csched_runtime() needs to call the ct2() function to change credits into time. The c2t() function, however, is expensive, as it requires an integer division. c2t() was being called twice, once for the main vcpu''s credit and once for the difference between its credit and the next in the queue. But this is unnecessary; by calculating in "credit" first, we can make it so that we just do one conversion later in the algorithm. This also adds more documentation describing the intended algorithm, along with a relevant assertion.. The effect of the new code should be the same as the old code. v3: - ASSERT that rt_credt >= 0, since there''s a possibility for credits to be equal v2: - Change rt_credit into an int - ASSERT() that rt_credit > 0, with explanation Spotted-by: Jan Beulich <JBeulich@suse.com> Signed-off-by: George Dunlap <george.dunlap@eu.citrix.com> --- xen/common/sched_credit2.c | 48 ++++++++++++++++++++++++++++++++++---------- 1 file changed, 37 insertions(+), 11 deletions(-) diff --git a/xen/common/sched_credit2.c b/xen/common/sched_credit2.c index b0af010..804049e 100644 --- a/xen/common/sched_credit2.c +++ b/xen/common/sched_credit2.c @@ -1505,31 +1505,57 @@ csched_dom_destroy(const struct scheduler *ops, struct domain *dom) static s_time_t csched_runtime(const struct scheduler *ops, int cpu, struct csched_vcpu *snext) { - s_time_t time = CSCHED_MAX_TIMER; + s_time_t time; + int rt_credit; /* Proposed runtime measured in credits */ struct csched_runqueue_data *rqd = RQD(ops, cpu); struct list_head *runq = &rqd->runq; if ( is_idle_vcpu(snext->vcpu) ) return CSCHED_MAX_TIMER; - /* Basic time */ - time = c2t(rqd, snext->credit, snext); + /* General algorithm: + * 1) Run until snext''s credit will be 0 + * 2) But if someone is waiting, run until snext''s credit is equal + * to his + * 3) But never run longer than MAX_TIMER or shorter than MIN_TIMER. + */ + + /* 1) Basic time: Run until credit is 0. */ + rt_credit = snext->credit; - /* Next guy on runqueue */ + /* 2) If there''s someone waiting whose credit is positive, + * run until your credit ~= his */ if ( ! list_empty(runq) ) { - struct csched_vcpu *svc = __runq_elem(runq->next); - s_time_t ntime; + struct csched_vcpu *swait = __runq_elem(runq->next); - if ( ! is_idle_vcpu(svc->vcpu) ) + if ( ! is_idle_vcpu(swait->vcpu) + && swait->credit > 0 ) { - ntime = c2t(rqd, snext->credit - svc->credit, snext); - - if ( time > ntime ) - time = ntime; + rt_credit = snext->credit - swait->credit; } } + /* + * snext is about to be scheduled; so: + * + * 1. if snext->credit were less than 0 when it was taken off the + * runqueue, then csched_schedule() should have called + * reset_credit(). So at this point snext->credit must be greater + * than 0. + * + * 2. snext''s credit must be greater than or equal to anyone else + * in the queue, so snext->credit - swait->credit must be greater + * than or equal to 0. + */ + ASSERT(rt_credit >= 0); + + /* FIXME: See if we can eliminate this conversion if we know time + * will be outside (MIN,MAX). Probably requires pre-calculating + * credit values of MIN,MAX per vcpu, since each vcpu burns credit + * at a different rate. */ + time = c2t(rqd, rt_credit, snext); + /* Check limits */ if ( time < CSCHED_MIN_TIMER ) time = CSCHED_MIN_TIMER; -- 1.7.9.5
George Dunlap
2013-Mar-04 11:02 UTC
[PATCH v3 RESEND 2/2] credit2: track residual from divisions done during accounting
This should help with under-accounting of vCPU-s running for extremly short periods of time, but becoming runnable again at a high frequency. Don''t bother subtracting the residual from the runtime, as it can only ever add up to one nanosecond, and will end up being debited during the next reset interval anyway. Original-patch-by: Jan Beulich <jbeulich@suse.com> Signed-off-by: George Dunlap <george.dunlap@eu.citrix.com> --- xen/common/sched_credit2.c | 22 +++++++++++++++------- 1 file changed, 15 insertions(+), 7 deletions(-) diff --git a/xen/common/sched_credit2.c b/xen/common/sched_credit2.c index 804049e..a7bd2ee 100644 --- a/xen/common/sched_credit2.c +++ b/xen/common/sched_credit2.c @@ -21,7 +21,7 @@ #include <xen/perfc.h> #include <xen/sched-if.h> #include <xen/softirq.h> -#include <asm/atomic.h> +#include <asm/div64.h> #include <xen/errno.h> #include <xen/trace.h> #include <xen/cpu.h> @@ -205,7 +205,7 @@ struct csched_runqueue_data { struct list_head runq; /* Ordered list of runnable vms */ struct list_head svc; /* List of all vcpus assigned to this runqueue */ - int max_weight; + unsigned int max_weight; cpumask_t idle, /* Currently idle */ tickled; /* Another cpu in the queue is already targeted for this one */ @@ -244,7 +244,8 @@ struct csched_vcpu { struct csched_dom *sdom; struct vcpu *vcpu; - int weight; + unsigned int weight; + unsigned int residual; int credit; s_time_t start_time; /* When we were scheduled (used for credit) */ @@ -271,11 +272,19 @@ struct csched_dom { /* * Time-to-credit, credit-to-time. + * + * We keep track of the "residual" time to make sure that frequent short + * schedules still get accounted for in the end. + * * FIXME: Do pre-calculated division? */ -static s_time_t t2c(struct csched_runqueue_data *rqd, s_time_t time, struct csched_vcpu *svc) +static void t2c_update(struct csched_runqueue_data *rqd, s_time_t time, + struct csched_vcpu *svc) { - return time * rqd->max_weight / svc->weight; + uint64_t val = time * rqd->max_weight + svc->residual; + + svc->residual = do_div(val, svc->weight); + svc->credit -= val; } static s_time_t c2t(struct csched_runqueue_data *rqd, s_time_t credit, struct csched_vcpu *svc) @@ -636,8 +645,7 @@ void burn_credits(struct csched_runqueue_data *rqd, struct csched_vcpu *svc, s_t delta = now - svc->start_time; if ( delta > 0 ) { - /* This will round down; should we consider rounding up...? */ - svc->credit -= t2c(rqd, delta, svc); + t2c_update(rqd, delta, svc); svc->start_time = now; d2printk("b d%dv%d c%d\n", -- 1.7.9.5