George Dunlap
2013-Feb-26 17:08 UTC
[PATCH 1/2] xen, credit2: Avoid extra c2t calcuation in csched_runtime
csched_runtime() needs to call the ct2() function to change credits into time. The c2t() function, however, is expensive, as it requires an integer division. c2t() was being called twice, once for the main vcpu''s credit and once for the difference between its credit and the next in the queue. But this is unnecessary; by calculating in "credit" first, we can make it so that we just do one conversion later in the algorithm. This also adds more documentation describing the intended algorithm, along with a relevant assertion.. The effect of the new code should be the same as the old code. v2: - Change rt_credit into an int - ASSERT() that rt_credit > 0, with explanation Spotted-by: Jan Beulich <JBeulich@suse.com> Signed-off-by: George Dunlap <george.dunlap@eu.citrix.com> xen,credit2: Avoid extra c2t calcuation in csched_runtime csched_runtime() needs to call the ct2() function to change credits into time. The c2t() function, however, is expensive, as it requires an integer division. c2t() was being called twice, once for the main vcpu''s credit and once for the difference between its credit and the next in the queue. But this is unnecessary; by calculating in "credit" first, we can make it so that we just do one conversion later in the algorithm. This also adds more documentation describing the intended algorithm, along with a relevant assertion.. The effect of the new code should be the same as the old code. v2: - Change rt_credit into an int - ASSERT() that rt_credit > 0, with explanation Spotted-by: Jan Beulich <JBeulich@suse.com> Signed-off-by: George Dunlap <george.dunlap@eu.citrix.com> --- xen/common/sched_credit2.c | 48 ++++++++++++++++++++++++++++++++++---------- 1 file changed, 37 insertions(+), 11 deletions(-) diff --git a/xen/common/sched_credit2.c b/xen/common/sched_credit2.c index b0af010..804049e 100644 --- a/xen/common/sched_credit2.c +++ b/xen/common/sched_credit2.c @@ -1505,31 +1505,57 @@ csched_dom_destroy(const struct scheduler *ops, struct domain *dom) static s_time_t csched_runtime(const struct scheduler *ops, int cpu, struct csched_vcpu *snext) { - s_time_t time = CSCHED_MAX_TIMER; + s_time_t time; + int rt_credit; /* Proposed runtime measured in credits */ struct csched_runqueue_data *rqd = RQD(ops, cpu); struct list_head *runq = &rqd->runq; if ( is_idle_vcpu(snext->vcpu) ) return CSCHED_MAX_TIMER; - /* Basic time */ - time = c2t(rqd, snext->credit, snext); + /* General algorithm: + * 1) Run until snext''s credit will be 0 + * 2) But if someone is waiting, run until snext''s credit is equal + * to his + * 3) But never run longer than MAX_TIMER or shorter than MIN_TIMER. + */ + + /* 1) Basic time: Run until credit is 0. */ + rt_credit = snext->credit; - /* Next guy on runqueue */ + /* 2) If there''s someone waiting whose credit is positive, + * run until your credit ~= his */ if ( ! list_empty(runq) ) { - struct csched_vcpu *svc = __runq_elem(runq->next); - s_time_t ntime; + struct csched_vcpu *swait = __runq_elem(runq->next); - if ( ! is_idle_vcpu(svc->vcpu) ) + if ( ! is_idle_vcpu(swait->vcpu) + && swait->credit > 0 ) { - ntime = c2t(rqd, snext->credit - svc->credit, snext); - - if ( time > ntime ) - time = ntime; + rt_credit = snext->credit - swait->credit; } } + /* + * snext is about to be scheduled; so: + * + * 1. if snext->credit were less than 0 when it was taken off the + * runqueue, then csched_schedule() should have called + * reset_credit(). So at this point snext->credit must be greater + * than 0. + * + * 2. snext''s credit must be greater than or equal to anyone else + * in the queue, so snext->credit - swait->credit must be greater + * than or equal to 0. + */ + ASSERT(rt_credit >= 0); + + /* FIXME: See if we can eliminate this conversion if we know time + * will be outside (MIN,MAX). Probably requires pre-calculating + * credit values of MIN,MAX per vcpu, since each vcpu burns credit + * at a different rate. */ + time = c2t(rqd, rt_credit, snext); + /* Check limits */ if ( time < CSCHED_MIN_TIMER ) time = CSCHED_MIN_TIMER; -- 1.7.9.5
George Dunlap
2013-Feb-26 17:08 UTC
[PATCH 2/2] credit2: track residual from divisions done during accounting
This should help with under-accounting of vCPU-s running for extremly short periods of time, but becoming runnable again at a high frequency. Original-patch-by: Jan Beulich <jbeulich@suse.com> Signed-off-by: George Dunlap <george.dunlap@eu.citrix.com> --- xen/common/sched_credit2.c | 24 ++++++++++++++++-------- 1 file changed, 16 insertions(+), 8 deletions(-) diff --git a/xen/common/sched_credit2.c b/xen/common/sched_credit2.c index 804049e..189b56a 100644 --- a/xen/common/sched_credit2.c +++ b/xen/common/sched_credit2.c @@ -21,7 +21,7 @@ #include <xen/perfc.h> #include <xen/sched-if.h> #include <xen/softirq.h> -#include <asm/atomic.h> +#include <asm/div64.h> #include <xen/errno.h> #include <xen/trace.h> #include <xen/cpu.h> @@ -205,7 +205,7 @@ struct csched_runqueue_data { struct list_head runq; /* Ordered list of runnable vms */ struct list_head svc; /* List of all vcpus assigned to this runqueue */ - int max_weight; + unsigned int max_weight; cpumask_t idle, /* Currently idle */ tickled; /* Another cpu in the queue is already targeted for this one */ @@ -244,7 +244,8 @@ struct csched_vcpu { struct csched_dom *sdom; struct vcpu *vcpu; - int weight; + unsigned int weight; + unsigned int residual; int credit; s_time_t start_time; /* When we were scheduled (used for credit) */ @@ -271,16 +272,24 @@ struct csched_dom { /* * Time-to-credit, credit-to-time. + * + * We keep track of the "residual" time to make sure that frequent short + * schedules still get accounted for in the end. + * * FIXME: Do pre-calculated division? */ -static s_time_t t2c(struct csched_runqueue_data *rqd, s_time_t time, struct csched_vcpu *svc) +static void t2c_update(struct csched_runqueue_data *rqd, s_time_t time, + struct csched_vcpu *svc) { - return time * rqd->max_weight / svc->weight; + uint64_t val = time * rqd->max_weight + svc->residual; + + svc->residual = do_div(val, svc->weight); + svc->credit -= val; } static s_time_t c2t(struct csched_runqueue_data *rqd, s_time_t credit, struct csched_vcpu *svc) { - return credit * svc->weight / rqd->max_weight; + return (credit * svc->weight) / rqd->max_weight; } /* @@ -636,8 +645,7 @@ void burn_credits(struct csched_runqueue_data *rqd, struct csched_vcpu *svc, s_t delta = now - svc->start_time; if ( delta > 0 ) { - /* This will round down; should we consider rounding up...? */ - svc->credit -= t2c(rqd, delta, svc); + t2c_update(rqd, delta, svc); svc->start_time = now; d2printk("b d%dv%d c%d\n", -- 1.7.9.5
Jan Beulich
2013-Feb-27 09:34 UTC
Re: [PATCH 1/2] xen, credit2: Avoid extra c2t calcuation in csched_runtime
>>> On 26.02.13 at 18:08, George Dunlap <george.dunlap@eu.citrix.com> wrote: > csched_runtime() needs to call the ct2() function to change credits > into time. The c2t() function, however, is expensive, as it requires > an integer division. > > c2t() was being called twice, once for the main vcpu''s credit and once > for the difference between its credit and the next in the queue. But > this is unnecessary; by calculating in "credit" first, we can make it > so that we just do one conversion later in the algorithm. > > This also adds more documentation describing the intended algorithm, > along with a relevant assertion.. > > The effect of the new code should be the same as the old code. > > v2: > - Change rt_credit into an int > - ASSERT() that rt_credit > 0, with explanation > > Spotted-by: Jan Beulich <JBeulich@suse.com> > Signed-off-by: George Dunlap <george.dunlap@eu.citrix.com>Acked-by: Jan Beulich <jbeulich@suse.com> (apart from the redundant description below)> xen,credit2: Avoid extra c2t calcuation in csched_runtime > > csched_runtime() needs to call the ct2() function to change credits > into time. The c2t() function, however, is expensive, as it requires > an integer division. > > c2t() was being called twice, once for the main vcpu''s credit and once > for the difference between its credit and the next in the queue. But > this is unnecessary; by calculating in "credit" first, we can make it > so that we just do one conversion later in the algorithm. > > This also adds more documentation describing the intended algorithm, > along with a relevant assertion.. > > The effect of the new code should be the same as the old code. > > v2: > - Change rt_credit into an int > - ASSERT() that rt_credit > 0, with explanation > > Spotted-by: Jan Beulich <JBeulich@suse.com> > Signed-off-by: George Dunlap <george.dunlap@eu.citrix.com> > --- > xen/common/sched_credit2.c | 48 ++++++++++++++++++++++++++++++++++---------- > 1 file changed, 37 insertions(+), 11 deletions(-) > > diff --git a/xen/common/sched_credit2.c b/xen/common/sched_credit2.c > index b0af010..804049e 100644 > --- a/xen/common/sched_credit2.c > +++ b/xen/common/sched_credit2.c > @@ -1505,31 +1505,57 @@ csched_dom_destroy(const struct scheduler *ops, > struct domain *dom) > static s_time_t > csched_runtime(const struct scheduler *ops, int cpu, struct csched_vcpu > *snext) > { > - s_time_t time = CSCHED_MAX_TIMER; > + s_time_t time; > + int rt_credit; /* Proposed runtime measured in credits */ > struct csched_runqueue_data *rqd = RQD(ops, cpu); > struct list_head *runq = &rqd->runq; > > if ( is_idle_vcpu(snext->vcpu) ) > return CSCHED_MAX_TIMER; > > - /* Basic time */ > - time = c2t(rqd, snext->credit, snext); > + /* General algorithm: > + * 1) Run until snext''s credit will be 0 > + * 2) But if someone is waiting, run until snext''s credit is equal > + * to his > + * 3) But never run longer than MAX_TIMER or shorter than MIN_TIMER. > + */ > + > + /* 1) Basic time: Run until credit is 0. */ > + rt_credit = snext->credit; > > - /* Next guy on runqueue */ > + /* 2) If there''s someone waiting whose credit is positive, > + * run until your credit ~= his */ > if ( ! list_empty(runq) ) > { > - struct csched_vcpu *svc = __runq_elem(runq->next); > - s_time_t ntime; > + struct csched_vcpu *swait = __runq_elem(runq->next); > > - if ( ! is_idle_vcpu(svc->vcpu) ) > + if ( ! is_idle_vcpu(swait->vcpu) > + && swait->credit > 0 ) > { > - ntime = c2t(rqd, snext->credit - svc->credit, snext); > - > - if ( time > ntime ) > - time = ntime; > + rt_credit = snext->credit - swait->credit; > } > } > > + /* > + * snext is about to be scheduled; so: > + * > + * 1. if snext->credit were less than 0 when it was taken off the > + * runqueue, then csched_schedule() should have called > + * reset_credit(). So at this point snext->credit must be greater > + * than 0. > + * > + * 2. snext''s credit must be greater than or equal to anyone else > + * in the queue, so snext->credit - swait->credit must be greater > + * than or equal to 0. > + */ > + ASSERT(rt_credit >= 0); > + > + /* FIXME: See if we can eliminate this conversion if we know time > + * will be outside (MIN,MAX). Probably requires pre-calculating > + * credit values of MIN,MAX per vcpu, since each vcpu burns credit > + * at a different rate. */ > + time = c2t(rqd, rt_credit, snext); > + > /* Check limits */ > if ( time < CSCHED_MIN_TIMER ) > time = CSCHED_MIN_TIMER; > -- > 1.7.9.5 > > > _______________________________________________ > Xen-devel mailing list > Xen-devel@lists.xen.org > http://lists.xen.org/xen-devel
Jan Beulich
2013-Feb-27 09:40 UTC
Re: [PATCH 2/2] credit2: track residual from divisions done during accounting
>>> On 26.02.13 at 18:08, George Dunlap <george.dunlap@eu.citrix.com> wrote: > @@ -271,16 +272,24 @@ struct csched_dom { > > /* > * Time-to-credit, credit-to-time. > + * > + * We keep track of the "residual" time to make sure that frequent short > + * schedules still get accounted for in the end. > + * > * FIXME: Do pre-calculated division? > */ > -static s_time_t t2c(struct csched_runqueue_data *rqd, s_time_t time, struct csched_vcpu *svc) > +static void t2c_update(struct csched_runqueue_data *rqd, s_time_t time, > + struct csched_vcpu *svc) > { > - return time * rqd->max_weight / svc->weight; > + uint64_t val = time * rqd->max_weight + svc->residual; > + > + svc->residual = do_div(val, svc->weight); > + svc->credit -= val; > } > > static s_time_t c2t(struct csched_runqueue_data *rqd, s_time_t credit, struct csched_vcpu *svc) > { > - return credit * svc->weight / rqd->max_weight; > + return (credit * svc->weight) / rqd->max_weight;So you dropped the subtraction of svc->residual here - why? And if indeed intended, the insertion of parentheses here could be dropped? Jan
George Dunlap
2013-Feb-27 11:35 UTC
Re: [PATCH 2/2] credit2: track residual from divisions done during accounting
On Wed, Feb 27, 2013 at 9:40 AM, Jan Beulich <JBeulich@suse.com> wrote:> >>> On 26.02.13 at 18:08, George Dunlap <george.dunlap@eu.citrix.com> > wrote: > > @@ -271,16 +272,24 @@ struct csched_dom { > > > > /* > > * Time-to-credit, credit-to-time. > > + * > > + * We keep track of the "residual" time to make sure that frequent short > > + * schedules still get accounted for in the end. > > + * > > * FIXME: Do pre-calculated division? > > */ > > -static s_time_t t2c(struct csched_runqueue_data *rqd, s_time_t time, > struct csched_vcpu *svc) > > +static void t2c_update(struct csched_runqueue_data *rqd, s_time_t time, > > + struct csched_vcpu *svc) > > { > > - return time * rqd->max_weight / svc->weight; > > + uint64_t val = time * rqd->max_weight + svc->residual; > > + > > + svc->residual = do_div(val, svc->weight); > > + svc->credit -= val; > > } > > > > static s_time_t c2t(struct csched_runqueue_data *rqd, s_time_t credit, > struct csched_vcpu *svc) > > { > > - return credit * svc->weight / rqd->max_weight; > > + return (credit * svc->weight) / rqd->max_weight; > > So you dropped the subtraction of svc->residual here - why? > > And if indeed intended, the insertion of parentheses here could be > dropped? >Well for one I think the equation was wrong -- the residual is units of "time", so you should have subtracted the residual after dividing by max_weight. (From a mathematical perspective, svc->weight / rqd->max_weight is the ratio that changes units of "credit" into units of "time; if we were using floating point ops I might put parentheses around that ratio to make it more clear that''s what''s going on.) I had originally thought that there was no real need to subtract the time off the end -- we don''t do it for credit1, after all, and as long as we''re adding up the residuals it will all "come out in the wash", so to speak. But on further reflection I think it''s probably not a bad idea. I''ll respin the patch with the modified equation. -George _______________________________________________ Xen-devel mailing list Xen-devel@lists.xen.org http://lists.xen.org/xen-devel
Jan Beulich
2013-Feb-27 12:48 UTC
Re: [PATCH 2/2] credit2: track residual from divisions done during accounting
>>> On 27.02.13 at 13:31, George Dunlap <George.Dunlap@eu.citrix.com> wrote: > On Wed, Feb 27, 2013 at 11:35 AM, George Dunlap <George.Dunlap@eu.citrix.com >> wrote: > >> On Wed, Feb 27, 2013 at 9:40 AM, Jan Beulich <JBeulich@suse.com> wrote: >> >>> >>> On 26.02.13 at 18:08, George Dunlap <george.dunlap@eu.citrix.com> >>> wrote: >>> > @@ -271,16 +272,24 @@ struct csched_dom { >>> > >>> > /* >>> > * Time-to-credit, credit-to-time. >>> > + * >>> > + * We keep track of the "residual" time to make sure that frequent >>> short >>> > + * schedules still get accounted for in the end. >>> > + * >>> > * FIXME: Do pre-calculated division? >>> > */ >>> > -static s_time_t t2c(struct csched_runqueue_data *rqd, s_time_t time, >>> struct csched_vcpu *svc) >>> > +static void t2c_update(struct csched_runqueue_data *rqd, s_time_t time, >>> > + struct csched_vcpu *svc) >>> > { >>> > - return time * rqd->max_weight / svc->weight; >>> > + uint64_t val = time * rqd->max_weight + svc->residual; >>> > + >>> > + svc->residual = do_div(val, svc->weight); >>> > + svc->credit -= val; >>> > } >>> > >>> > static s_time_t c2t(struct csched_runqueue_data *rqd, s_time_t credit, >>> struct csched_vcpu *svc) >>> > { >>> > - return credit * svc->weight / rqd->max_weight; >>> > + return (credit * svc->weight) / rqd->max_weight; >>> >>> So you dropped the subtraction of svc->residual here - why? >>> >>> And if indeed intended, the insertion of parentheses here could be >>> dropped? >>> >> >> Well for one I think the equation was wrong -- the residual is units of >> "time", so you should have subtracted the residual after dividing by >> max_weight. (From a mathematical perspective, svc->weight / >> rqd->max_weight is the ratio that changes units of "credit" into units of >> "time; if we were using floating point ops I might put parentheses around >> that ratio to make it more clear that''s what''s going on.) >> > > > Hmm, on further investigation, I think I''ve convinced myself that this > reasoning is wrong, and your original equation was correct. > > However, the difference will only ever end up to be one nanosecond, which > (if used) will be subtracted from the credit after the reset; so I think we > might as well do without the subtraction here.Oh, I didn''t realize this would only be a nanosecond - my understanding was that it would be up to (but not including) a credit unit. Jan
George Dunlap
2013-Feb-27 14:08 UTC
Re: [PATCH 2/2] credit2: track residual from divisions done during accounting
On 27/02/13 12:48, Jan Beulich wrote:>>>> On 27.02.13 at 13:31, George Dunlap <George.Dunlap@eu.citrix.com> wrote: >> On Wed, Feb 27, 2013 at 11:35 AM, George Dunlap <George.Dunlap@eu.citrix.com >>> wrote: >>> On Wed, Feb 27, 2013 at 9:40 AM, Jan Beulich <JBeulich@suse.com> wrote: >>> >>>>>>> On 26.02.13 at 18:08, George Dunlap <george.dunlap@eu.citrix.com> >>>> wrote: >>>>> @@ -271,16 +272,24 @@ struct csched_dom { >>>>> >>>>> /* >>>>> * Time-to-credit, credit-to-time. >>>>> + * >>>>> + * We keep track of the "residual" time to make sure that frequent >>>> short >>>>> + * schedules still get accounted for in the end. >>>>> + * >>>>> * FIXME: Do pre-calculated division? >>>>> */ >>>>> -static s_time_t t2c(struct csched_runqueue_data *rqd, s_time_t time, >>>> struct csched_vcpu *svc) >>>>> +static void t2c_update(struct csched_runqueue_data *rqd, s_time_t time, >>>>> + struct csched_vcpu *svc) >>>>> { >>>>> - return time * rqd->max_weight / svc->weight; >>>>> + uint64_t val = time * rqd->max_weight + svc->residual; >>>>> + >>>>> + svc->residual = do_div(val, svc->weight); >>>>> + svc->credit -= val; >>>>> } >>>>> >>>>> static s_time_t c2t(struct csched_runqueue_data *rqd, s_time_t credit, >>>> struct csched_vcpu *svc) >>>>> { >>>>> - return credit * svc->weight / rqd->max_weight; >>>>> + return (credit * svc->weight) / rqd->max_weight; >>>> So you dropped the subtraction of svc->residual here - why? >>>> >>>> And if indeed intended, the insertion of parentheses here could be >>>> dropped? >>>> >>> Well for one I think the equation was wrong -- the residual is units of >>> "time", so you should have subtracted the residual after dividing by >>> max_weight. (From a mathematical perspective, svc->weight / >>> rqd->max_weight is the ratio that changes units of "credit" into units of >>> "time; if we were using floating point ops I might put parentheses around >>> that ratio to make it more clear that''s what''s going on.) >>> >> >> Hmm, on further investigation, I think I''ve convinced myself that this >> reasoning is wrong, and your original equation was correct. >> >> However, the difference will only ever end up to be one nanosecond, which >> (if used) will be subtracted from the credit after the reset; so I think we >> might as well do without the subtraction here. > Oh, I didn''t realize this would only be a nanosecond - my > understanding was that it would be up to (but not including) a > credit unit.The "residual" will always be less than the svc->weight, which will always be less than or equal to rqd->max_weight. So adding it in to the equation before dividing by rqd->max_weight can at most increase the result by 1. Empiraclly, I''ve been playing around with values in a spreadsheet to make sure the logic was behaving as one would expect, and the difference between the calculation of c2t with and without the residual was never more than 1. -George
George Dunlap
2013-Feb-27 14:10 UTC
[PATCH 1/2] xen, credit2: Avoid extra c2t calcuation in csched_runtime
csched_runtime() needs to call the ct2() function to change credits into time. The c2t() function, however, is expensive, as it requires an integer division. c2t() was being called twice, once for the main vcpu''s credit and once for the difference between its credit and the next in the queue. But this is unnecessary; by calculating in "credit" first, we can make it so that we just do one conversion later in the algorithm. This also adds more documentation describing the intended algorithm, along with a relevant assertion.. The effect of the new code should be the same as the old code. v3: - ASSERT that rt_credt >= 0, since there''s a possibility for credits to be equal v2: - Change rt_credit into an int - ASSERT() that rt_credit > 0, with explanation Spotted-by: Jan Beulich <JBeulich@suse.com> Signed-off-by: George Dunlap <george.dunlap@eu.citrix.com> --- xen/common/sched_credit2.c | 48 ++++++++++++++++++++++++++++++++++---------- 1 file changed, 37 insertions(+), 11 deletions(-) diff --git a/xen/common/sched_credit2.c b/xen/common/sched_credit2.c index b0af010..804049e 100644 --- a/xen/common/sched_credit2.c +++ b/xen/common/sched_credit2.c @@ -1505,31 +1505,57 @@ csched_dom_destroy(const struct scheduler *ops, struct domain *dom) static s_time_t csched_runtime(const struct scheduler *ops, int cpu, struct csched_vcpu *snext) { - s_time_t time = CSCHED_MAX_TIMER; + s_time_t time; + int rt_credit; /* Proposed runtime measured in credits */ struct csched_runqueue_data *rqd = RQD(ops, cpu); struct list_head *runq = &rqd->runq; if ( is_idle_vcpu(snext->vcpu) ) return CSCHED_MAX_TIMER; - /* Basic time */ - time = c2t(rqd, snext->credit, snext); + /* General algorithm: + * 1) Run until snext''s credit will be 0 + * 2) But if someone is waiting, run until snext''s credit is equal + * to his + * 3) But never run longer than MAX_TIMER or shorter than MIN_TIMER. + */ + + /* 1) Basic time: Run until credit is 0. */ + rt_credit = snext->credit; - /* Next guy on runqueue */ + /* 2) If there''s someone waiting whose credit is positive, + * run until your credit ~= his */ if ( ! list_empty(runq) ) { - struct csched_vcpu *svc = __runq_elem(runq->next); - s_time_t ntime; + struct csched_vcpu *swait = __runq_elem(runq->next); - if ( ! is_idle_vcpu(svc->vcpu) ) + if ( ! is_idle_vcpu(swait->vcpu) + && swait->credit > 0 ) { - ntime = c2t(rqd, snext->credit - svc->credit, snext); - - if ( time > ntime ) - time = ntime; + rt_credit = snext->credit - swait->credit; } } + /* + * snext is about to be scheduled; so: + * + * 1. if snext->credit were less than 0 when it was taken off the + * runqueue, then csched_schedule() should have called + * reset_credit(). So at this point snext->credit must be greater + * than 0. + * + * 2. snext''s credit must be greater than or equal to anyone else + * in the queue, so snext->credit - swait->credit must be greater + * than or equal to 0. + */ + ASSERT(rt_credit >= 0); + + /* FIXME: See if we can eliminate this conversion if we know time + * will be outside (MIN,MAX). Probably requires pre-calculating + * credit values of MIN,MAX per vcpu, since each vcpu burns credit + * at a different rate. */ + time = c2t(rqd, rt_credit, snext); + /* Check limits */ if ( time < CSCHED_MIN_TIMER ) time = CSCHED_MIN_TIMER; -- 1.7.9.5
George Dunlap
2013-Feb-27 14:10 UTC
[PATCH 2/2] credit2: track residual from divisions done during accounting
This should help with under-accounting of vCPU-s running for extremly short periods of time, but becoming runnable again at a high frequency. Don''t bother subtracting the residual from the runtime, as it can only ever add up to one nanosecond, and will end up being debited during the next reset interval anyway. Original-patch-by: Jan Beulich <jbeulich@suse.com> Signed-off-by: George Dunlap <george.dunlap@eu.citrix.com> --- xen/common/sched_credit2.c | 24 ++++++++++++++++-------- 1 file changed, 16 insertions(+), 8 deletions(-) diff --git a/xen/common/sched_credit2.c b/xen/common/sched_credit2.c index 804049e..eb544db 100644 --- a/xen/common/sched_credit2.c +++ b/xen/common/sched_credit2.c @@ -21,7 +21,7 @@ #include <xen/perfc.h> #include <xen/sched-if.h> #include <xen/softirq.h> -#include <asm/atomic.h> +#include <asm/div64.h> #include <xen/errno.h> #include <xen/trace.h> #include <xen/cpu.h> @@ -205,7 +205,7 @@ struct csched_runqueue_data { struct list_head runq; /* Ordered list of runnable vms */ struct list_head svc; /* List of all vcpus assigned to this runqueue */ - int max_weight; + unsigned int max_weight; cpumask_t idle, /* Currently idle */ tickled; /* Another cpu in the queue is already targeted for this one */ @@ -244,7 +244,8 @@ struct csched_vcpu { struct csched_dom *sdom; struct vcpu *vcpu; - int weight; + unsigned int weight; + unsigned int residual; int credit; s_time_t start_time; /* When we were scheduled (used for credit) */ @@ -271,14 +272,22 @@ struct csched_dom { /* * Time-to-credit, credit-to-time. + * + * We keep track of the "residual" time to make sure that frequent short + * schedules still get accounted for in the end. + * * FIXME: Do pre-calculated division? */ -static s_time_t t2c(struct csched_runqueue_data *rqd, s_time_t time, struct csched_vcpu *svc) +static void t2c_update(struct csched_runqueue_data *rqd, s_time_t time, + struct csched_vcpu *svc) { - return time * rqd->max_weight / svc->weight; + uint64_t val = time * rqd->max_weight + svc->residual; + + svc->residual = do_div(val, svc->weight); + svc->credit -= val; } -static s_time_t c2t(struct csched_runqueue_data *rqd, s_time_t credit, struct csched_vcpu *svc) +static s_time_t c2t_adjusted(struct csched_runqueue_data *rqd, s_time_t credit, struct csched_vcpu *svc) { return credit * svc->weight / rqd->max_weight; } @@ -636,8 +645,7 @@ void burn_credits(struct csched_runqueue_data *rqd, struct csched_vcpu *svc, s_t delta = now - svc->start_time; if ( delta > 0 ) { - /* This will round down; should we consider rounding up...? */ - svc->credit -= t2c(rqd, delta, svc); + t2c_update(rqd, delta, svc); svc->start_time = now; d2printk("b d%dv%d c%d\n", -- 1.7.9.5
George Dunlap
2013-Feb-27 14:27 UTC
[PATCH 1/2] xen, credit2: Avoid extra c2t calcuation in csched_runtime
csched_runtime() needs to call the ct2() function to change credits into time. The c2t() function, however, is expensive, as it requires an integer division. c2t() was being called twice, once for the main vcpu''s credit and once for the difference between its credit and the next in the queue. But this is unnecessary; by calculating in "credit" first, we can make it so that we just do one conversion later in the algorithm. This also adds more documentation describing the intended algorithm, along with a relevant assertion.. The effect of the new code should be the same as the old code. v3: - ASSERT that rt_credt >= 0, since there''s a possibility for credits to be equal v2: - Change rt_credit into an int - ASSERT() that rt_credit > 0, with explanation Spotted-by: Jan Beulich <JBeulich@suse.com> Signed-off-by: George Dunlap <george.dunlap@eu.citrix.com> --- xen/common/sched_credit2.c | 48 ++++++++++++++++++++++++++++++++++---------- 1 file changed, 37 insertions(+), 11 deletions(-) diff --git a/xen/common/sched_credit2.c b/xen/common/sched_credit2.c index b0af010..804049e 100644 --- a/xen/common/sched_credit2.c +++ b/xen/common/sched_credit2.c @@ -1505,31 +1505,57 @@ csched_dom_destroy(const struct scheduler *ops, struct domain *dom) static s_time_t csched_runtime(const struct scheduler *ops, int cpu, struct csched_vcpu *snext) { - s_time_t time = CSCHED_MAX_TIMER; + s_time_t time; + int rt_credit; /* Proposed runtime measured in credits */ struct csched_runqueue_data *rqd = RQD(ops, cpu); struct list_head *runq = &rqd->runq; if ( is_idle_vcpu(snext->vcpu) ) return CSCHED_MAX_TIMER; - /* Basic time */ - time = c2t(rqd, snext->credit, snext); + /* General algorithm: + * 1) Run until snext''s credit will be 0 + * 2) But if someone is waiting, run until snext''s credit is equal + * to his + * 3) But never run longer than MAX_TIMER or shorter than MIN_TIMER. + */ + + /* 1) Basic time: Run until credit is 0. */ + rt_credit = snext->credit; - /* Next guy on runqueue */ + /* 2) If there''s someone waiting whose credit is positive, + * run until your credit ~= his */ if ( ! list_empty(runq) ) { - struct csched_vcpu *svc = __runq_elem(runq->next); - s_time_t ntime; + struct csched_vcpu *swait = __runq_elem(runq->next); - if ( ! is_idle_vcpu(svc->vcpu) ) + if ( ! is_idle_vcpu(swait->vcpu) + && swait->credit > 0 ) { - ntime = c2t(rqd, snext->credit - svc->credit, snext); - - if ( time > ntime ) - time = ntime; + rt_credit = snext->credit - swait->credit; } } + /* + * snext is about to be scheduled; so: + * + * 1. if snext->credit were less than 0 when it was taken off the + * runqueue, then csched_schedule() should have called + * reset_credit(). So at this point snext->credit must be greater + * than 0. + * + * 2. snext''s credit must be greater than or equal to anyone else + * in the queue, so snext->credit - swait->credit must be greater + * than or equal to 0. + */ + ASSERT(rt_credit >= 0); + + /* FIXME: See if we can eliminate this conversion if we know time + * will be outside (MIN,MAX). Probably requires pre-calculating + * credit values of MIN,MAX per vcpu, since each vcpu burns credit + * at a different rate. */ + time = c2t(rqd, rt_credit, snext); + /* Check limits */ if ( time < CSCHED_MIN_TIMER ) time = CSCHED_MIN_TIMER; -- 1.7.9.5
George Dunlap
2013-Feb-27 14:27 UTC
[PATCH 2/2] credit2: track residual from divisions done during accounting
This should help with under-accounting of vCPU-s running for extremly short periods of time, but becoming runnable again at a high frequency. Don''t bother subtracting the residual from the runtime, as it can only ever add up to one nanosecond, and will end up being debited during the next reset interval anyway. Original-patch-by: Jan Beulich <jbeulich@suse.com> Signed-off-by: George Dunlap <george.dunlap@eu.citrix.com> --- xen/common/sched_credit2.c | 22 +++++++++++++++------- 1 file changed, 15 insertions(+), 7 deletions(-) diff --git a/xen/common/sched_credit2.c b/xen/common/sched_credit2.c index 804049e..a7bd2ee 100644 --- a/xen/common/sched_credit2.c +++ b/xen/common/sched_credit2.c @@ -21,7 +21,7 @@ #include <xen/perfc.h> #include <xen/sched-if.h> #include <xen/softirq.h> -#include <asm/atomic.h> +#include <asm/div64.h> #include <xen/errno.h> #include <xen/trace.h> #include <xen/cpu.h> @@ -205,7 +205,7 @@ struct csched_runqueue_data { struct list_head runq; /* Ordered list of runnable vms */ struct list_head svc; /* List of all vcpus assigned to this runqueue */ - int max_weight; + unsigned int max_weight; cpumask_t idle, /* Currently idle */ tickled; /* Another cpu in the queue is already targeted for this one */ @@ -244,7 +244,8 @@ struct csched_vcpu { struct csched_dom *sdom; struct vcpu *vcpu; - int weight; + unsigned int weight; + unsigned int residual; int credit; s_time_t start_time; /* When we were scheduled (used for credit) */ @@ -271,11 +272,19 @@ struct csched_dom { /* * Time-to-credit, credit-to-time. + * + * We keep track of the "residual" time to make sure that frequent short + * schedules still get accounted for in the end. + * * FIXME: Do pre-calculated division? */ -static s_time_t t2c(struct csched_runqueue_data *rqd, s_time_t time, struct csched_vcpu *svc) +static void t2c_update(struct csched_runqueue_data *rqd, s_time_t time, + struct csched_vcpu *svc) { - return time * rqd->max_weight / svc->weight; + uint64_t val = time * rqd->max_weight + svc->residual; + + svc->residual = do_div(val, svc->weight); + svc->credit -= val; } static s_time_t c2t(struct csched_runqueue_data *rqd, s_time_t credit, struct csched_vcpu *svc) @@ -636,8 +645,7 @@ void burn_credits(struct csched_runqueue_data *rqd, struct csched_vcpu *svc, s_t delta = now - svc->start_time; if ( delta > 0 ) { - /* This will round down; should we consider rounding up...? */ - svc->credit -= t2c(rqd, delta, svc); + t2c_update(rqd, delta, svc); svc->start_time = now; d2printk("b d%dv%d c%d\n", -- 1.7.9.5
George Dunlap
2013-Feb-27 14:30 UTC
Re: [PATCH 2/2] credit2: track residual from divisions done during accounting
Gah, sorry -- ignore this one... On Wed, Feb 27, 2013 at 2:10 PM, George Dunlap <george.dunlap@eu.citrix.com>wrote:> This should help with under-accounting of vCPU-s running for extremly > short periods of time, but becoming runnable again at a high frequency. > > Don''t bother subtracting the residual from the runtime, as it can only ever > add up to one nanosecond, and will end up being debited during the next > reset interval anyway. > > Original-patch-by: Jan Beulich <jbeulich@suse.com> > Signed-off-by: George Dunlap <george.dunlap@eu.citrix.com> > --- > xen/common/sched_credit2.c | 24 ++++++++++++++++-------- > 1 file changed, 16 insertions(+), 8 deletions(-) > > diff --git a/xen/common/sched_credit2.c b/xen/common/sched_credit2.c > index 804049e..eb544db 100644 > --- a/xen/common/sched_credit2.c > +++ b/xen/common/sched_credit2.c > @@ -21,7 +21,7 @@ > #include <xen/perfc.h> > #include <xen/sched-if.h> > #include <xen/softirq.h> > -#include <asm/atomic.h> > +#include <asm/div64.h> > #include <xen/errno.h> > #include <xen/trace.h> > #include <xen/cpu.h> > @@ -205,7 +205,7 @@ struct csched_runqueue_data { > > struct list_head runq; /* Ordered list of runnable vms */ > struct list_head svc; /* List of all vcpus assigned to this runqueue > */ > - int max_weight; > + unsigned int max_weight; > > cpumask_t idle, /* Currently idle */ > tickled; /* Another cpu in the queue is already > targeted for this one */ > @@ -244,7 +244,8 @@ struct csched_vcpu { > struct csched_dom *sdom; > struct vcpu *vcpu; > > - int weight; > + unsigned int weight; > + unsigned int residual; > > int credit; > s_time_t start_time; /* When we were scheduled (used for credit) */ > @@ -271,14 +272,22 @@ struct csched_dom { > > /* > * Time-to-credit, credit-to-time. > + * > + * We keep track of the "residual" time to make sure that frequent short > + * schedules still get accounted for in the end. > + * > * FIXME: Do pre-calculated division? > */ > -static s_time_t t2c(struct csched_runqueue_data *rqd, s_time_t time, > struct csched_vcpu *svc) > +static void t2c_update(struct csched_runqueue_data *rqd, s_time_t time, > + struct csched_vcpu *svc) > { > - return time * rqd->max_weight / svc->weight; > + uint64_t val = time * rqd->max_weight + svc->residual; > + > + svc->residual = do_div(val, svc->weight); > + svc->credit -= val; > } > > -static s_time_t c2t(struct csched_runqueue_data *rqd, s_time_t credit, > struct csched_vcpu *svc) > +static s_time_t c2t_adjusted(struct csched_runqueue_data *rqd, s_time_t > credit, struct csched_vcpu *svc) > { > return credit * svc->weight / rqd->max_weight; > } > @@ -636,8 +645,7 @@ void burn_credits(struct csched_runqueue_data *rqd, > struct csched_vcpu *svc, s_t > delta = now - svc->start_time; > > if ( delta > 0 ) { > - /* This will round down; should we consider rounding up...? */ > - svc->credit -= t2c(rqd, delta, svc); > + t2c_update(rqd, delta, svc); > svc->start_time = now; > > d2printk("b d%dv%d c%d\n", > -- > 1.7.9.5 > > > _______________________________________________ > Xen-devel mailing list > Xen-devel@lists.xen.org > http://lists.xen.org/xen-devel >_______________________________________________ Xen-devel mailing list Xen-devel@lists.xen.org http://lists.xen.org/xen-devel