Laurent Vivier
2007-Aug-20 06:14 UTC
[PATCH 2/4] Introduce a new fields "gtime" and "cgtime" in task_struct and signal_struct
[PATCH 2/4] like for cpustat, introduce the "gtime" (guest time of the task) and "cgtime" (guest time of the task children) fields for the tasks. Modify signal_struct and task_struct. Modify /proc/<pid>/stat to display these new fields. Signed-off-by: Laurent Vivier <Laurent.Vivier@bull.net> -- ------------- Laurent.Vivier@bull.net -------------- "Software is hard" - Donald Knuth -------------- next part -------------- Index: kvm/fs/proc/array.c =3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D --- kvm.orig/fs/proc/array.c 2007-08-20 11:11:30.000000000 +0200 +++ kvm/fs/proc/array.c 2007-08-20 13:04:03.000000000 +0200 @@ -354,6 +354,15 @@ static clock_t task_stime(struct task_st return stime; } +#ifdef CONFIG_GUEST_ACCOUNTING +static clock_t task_gtime(struct task_struct *p) +{ + clock_t gtime =3D cputime_to_clock_t(p->gtime); + + return gtime; +} +#endif + static int do_task_stat(struct task_struct *task, char *buffer, int whole) { unsigned long vsize, eip, esp, wchan =3D ~0UL; @@ -369,6 +378,10 @@ static int do_task_stat(struct task_stru unsigned long cmin_flt =3D 0, cmaj_flt =3D 0; unsigned long min_flt =3D 0, maj_flt =3D 0; cputime_t cutime, cstime; +#ifdef CONFIG_GUEST_ACCOUNTING + cputime_t cgtime; + clock_t gtime; +#endif clock_t utime, stime; unsigned long rsslim =3D 0; char tcomm[sizeof(task->comm)]; @@ -388,6 +401,10 @@ static int do_task_stat(struct task_stru sigemptyset(&sigign); sigemptyset(&sigcatch); cutime =3D cstime =3D cputime_zero; +#ifdef CONFIG_GUEST_ACCOUNTING + cgtime =3D cputime_zero; + gtime =3D 0; +#endif utime =3D stime =3D 0; rcu_read_lock(); @@ -406,6 +423,9 @@ static int do_task_stat(struct task_stru cmaj_flt =3D sig->cmaj_flt; cutime =3D sig->cutime; cstime =3D sig->cstime; +#ifdef CONFIG_GUEST_ACCOUNTING + cgtime =3D sig->cgtime; +#endif rsslim =3D sig->rlim[RLIMIT_RSS].rlim_cur; /* add up live thread stats at the group level */ @@ -416,6 +436,9 @@ static int do_task_stat(struct task_stru maj_flt +=3D t->maj_flt; utime +=3D task_utime(t); stime +=3D task_stime(t); +#ifdef CONFIG_GUEST_ACCOUNTING + gtime +=3D task_gtime(t); +#endif t =3D next_thread(t); } while (t !=3D task); @@ -423,6 +446,9 @@ static int do_task_stat(struct task_stru maj_flt +=3D sig->maj_flt; utime +=3D cputime_to_clock_t(sig->utime); stime +=3D cputime_to_clock_t(sig->stime); +#ifdef CONFIG_GUEST_ACCOUNTING + gtime +=3D cputime_to_clock_t(sig->gtime); +#endif } sid =3D signal_session(sig); @@ -440,6 +466,9 @@ static int do_task_stat(struct task_stru maj_flt =3D task->maj_flt; utime =3D task_utime(task); stime =3D task_stime(task); +#ifdef CONFIG_GUEST_ACCOUNTING + gtime =3D task_gtime(task); +#endif } /* scale priority and nice values from timeslices to -20..20 */ @@ -455,9 +484,13 @@ static int do_task_stat(struct task_stru /* convert nsec -> ticks */ start_time =3D nsec_to_clock_t(start_time); - res =3D sprintf(buffer, "%d (%s) %c %d %d %d %d %d %u %lu \ -%lu %lu %lu %lu %lu %ld %ld %ld %ld %d 0 %llu %lu %ld %lu %lu %lu %lu %lu \ -%lu %lu %lu %lu %lu %lu %lu %lu %d %d %u %u %llu\n", + res =3D sprintf(buffer, "%d (%s) %c %d %d %d %d %d %u %lu " +"%lu %lu %lu %lu %lu %ld %ld %ld %ld %d 0 %llu %lu %ld %lu %lu %lu %lu %lu " +#ifdef CONFIG_GUEST_ACCOUNTING +"%lu %lu %lu %lu %lu %lu %lu %lu %d %d %u %u %llu %lu %ld\n", +#else +"%lu %lu %lu %lu %lu %lu %lu %lu %d %d %u %u %llu\n", +#endif task->pid, tcomm, state, @@ -502,7 +535,13 @@ static int do_task_stat(struct task_stru task_cpu(task), task->rt_priority, task->policy, +#ifdef CONFIG_GUEST_ACCOUNTING + (unsigned long long)delayacct_blkio_ticks(task), + gtime, + cputime_to_clock_t(cgtime)); +#else (unsigned long long)delayacct_blkio_ticks(task)); +#endif if (mm) mmput(mm); return res; Index: kvm/include/linux/sched.h =3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D --- kvm.orig/include/linux/sched.h 2007-08-20 11:11:30.000000000 +0200 +++ kvm/include/linux/sched.h 2007-08-20 13:00:02.000000000 +0200 @@ -515,6 +515,10 @@ struct signal_struct { * in __exit_signal, except for the group leader. */ cputime_t utime, stime, cutime, cstime; +#ifdef CONFIG_GUEST_ACCOUNTING + cputime_t gtime; + cputime_t cgtime; +#endif unsigned long nvcsw, nivcsw, cnvcsw, cnivcsw; unsigned long min_flt, maj_flt, cmin_flt, cmaj_flt; unsigned long inblock, oublock, cinblock, coublock; @@ -1019,6 +1023,9 @@ struct task_struct { unsigned int rt_priority; cputime_t utime, stime; +#ifdef CONFIG_GUEST_ACCOUNTING + cputime_t gtime; +#endif unsigned long nvcsw, nivcsw; /* context switch counts */ struct timespec start_time; /* monotonic time */ struct timespec real_start_time; /* boot based time */ Index: kvm/kernel/exit.c =3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D --- kvm.orig/kernel/exit.c 2007-08-20 11:11:30.000000000 +0200 +++ kvm/kernel/exit.c 2007-08-20 12:32:08.000000000 +0200 @@ -120,6 +120,9 @@ static void __exit_signal(struct task_st */ sig->utime =3D cputime_add(sig->utime, tsk->utime); sig->stime =3D cputime_add(sig->stime, tsk->stime); +#ifdef CONFIG_GUEST_ACCOUNTING + sig->gtime =3D cputime_add(sig->gtime, tsk->gtime); +#endif sig->min_flt +=3D tsk->min_flt; sig->maj_flt +=3D tsk->maj_flt; sig->nvcsw +=3D tsk->nvcsw; @@ -1255,6 +1258,13 @@ static int wait_task_zombie(struct task_ cputime_add(p->stime, cputime_add(sig->stime, sig->cstime))); +#ifdef CONFIG_GUEST_ACCOUNTING + psig->cgtime =3D + cputime_add(psig->cgtime, + cputime_add(p->gtime, + cputime_add(sig->gtime, + sig->cgtime))); +#endif psig->cmin_flt +=3D p->min_flt + sig->min_flt + sig->cmin_flt; psig->cmaj_flt +=3D Index: kvm/kernel/fork.c =3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D --- kvm.orig/kernel/fork.c 2007-08-20 11:11:30.000000000 +0200 +++ kvm/kernel/fork.c 2007-08-20 12:38:55.000000000 +0200 @@ -877,6 +877,10 @@ static inline int copy_signal(unsigned l sig->tty_old_pgrp =3D NULL; sig->utime =3D sig->stime =3D sig->cutime =3D sig->cstime =3D cputime_zero; +#ifdef CONFIG_GUEST_ACCOUNTING + sig->gtime =3D cputime_zero; + sig->cgtime =3D cputime_zero; +#endif sig->nvcsw =3D sig->nivcsw =3D sig->cnvcsw =3D sig->cnivcsw =3D 0; sig->min_flt =3D sig->maj_flt =3D sig->cmin_flt =3D sig->cmaj_flt =3D 0; sig->inblock =3D sig->oublock =3D sig->cinblock =3D sig->coublock =3D 0; @@ -1045,6 +1049,9 @@ static struct task_struct *copy_process( p->utime =3D cputime_zero; p->stime =3D cputime_zero; +#ifdef CONFIG_GUEST_ACCOUNTING + p->gtime =3D cputime_zero; +#endif #ifdef CONFIG_TASK_XACCT p->rchar =3D 0; /* I/O counter: bytes read */
Christian Borntraeger
2007-Aug-20 06:53 UTC
[kvm-devel] [PATCH 2/4] Introduce a new fields "gtime" and "cgtime" in task_struct and signal_struct
Am Montag, 20. August 2007 schrieb Laurent Vivier:> Index: kvm/fs/proc/array.c > ==================================================================> --- kvm.orig/fs/proc/array.c????2007-08-20 11:11:30.000000000 +0200 > +++ kvm/fs/proc/array.c?2007-08-20 13:04:03.000000000 +0200Just a heads up, this patch collides with this fix in mm: http://marc.info/?l=linux-mm-commits&m=118737949222362&w=2 If Ingo accepts this fix, your patch should be adopted in array.c to use cputime_t for gtime as well. Lets see what Ingo thinks. Christian
Laurent Vivier
2007-Aug-20 07:17 UTC
[kvm-devel] [PATCH 2/4] Introduce a new fields "gtime" and "cgtime" in task_struct and signal_struct
Christian Borntraeger wrote:> Am Montag, 20. August 2007 schrieb Laurent Vivier: >> Index: kvm/fs/proc/array.c >> =3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D >> --- kvm.orig/fs/proc/array.c 2007-08-20 11:11:30.000000000 +0200 >> +++ kvm/fs/proc/array.c 2007-08-20 13:04:03.000000000 +0200 > > Just a heads up, this patch collides with this fix in mm: > http://marc.info/?l=3Dlinux-mm-commits&m=3D118737949222362&w=3D2 > > If Ingo accepts this fix, your patch should be adopted in array.c to use > cputime_t for gtime as well. Lets see what Ingo thinks.Thank you for the comment. No problem for me to rewrite the patch according the fix in the mm. Laurent -- ------------- Laurent.Vivier@bull.net -------------- "Software is hard" - Donald Knuth -------------- next part -------------- A non-text attachment was scrubbed... Name: signature.asc Type: application/pgp-signature Size: 189 bytes Desc: OpenPGP digital signature Url : http://lists.linux-foundation.org/pipermail/virtualization/attachments/20070820/8ebc5e1f/signature.pgp
Ingo Molnar
2009-Aug-05 06:59 UTC
[PATCH 2/4] Introduce a new fields "gtime" and "cgtime" in task_struct and signal_struct
* Laurent Vivier <Laurent.Vivier at bull.net> wrote:> [PATCH 2/4] like for cpustat, introduce the "gtime" (guest time of > the task) and "cgtime" (guest time of the task children) fields > for the tasks. Modify signal_struct and task_struct. Modify > /proc/<pid>/stat to display these new fields.> --- kvm.orig/include/linux/sched.h 2007-08-20 11:11:30.000000000 +0200 > +++ kvm/include/linux/sched.h 2007-08-20 13:00:02.000000000 +0200 > @@ -515,6 +515,10 @@ struct signal_struct { > * in __exit_signal, except for the group leader. > */ > cputime_t utime, stime, cutime, cstime; > +#ifdef CONFIG_GUEST_ACCOUNTING > + cputime_t gtime; > + cputime_t cgtime; > +#endifA handful of general (and less general) observations about these patches: 1- The code is very ugly due to being an #ifdef fest. Please always try to avoid them. 2- cputime_t is very coarse on x86: measured in jiffies. This means that with a default HZ of 250 we'll have units of 4 msecs. That's almost useless to rely on in new instrumentation: an irq can come in and out without accounting noticing it, etc. If we do some new statistics then it should be a lot better than jiffies granular. 3- stime of vcpu tasks/threads already approximates 'guest time' adequately. (as Jeremy observed it as well) Yes, it mixes 'true guest mode' and 'host mode' system time, but then again due to the jiffies granularity we have a _far_ bigger skew going on already. 4- namespace collision: 'gtime' is already used as 'group time' in a few places. One of the two things needs to be renamed. 5- tracepoints and perfcounters could be used to measure guest time precisely, in a low-overhead mode. These issues need to be addressed in a meaningful way. #2 probably means a revamping of cputime_t handling on x86 - of not just the gtime. But #3 is worth keeping in mind as well. I think #5 is the most capable solution by a wide margin - we need just a single tracepoint to emit 'nsecs spent in guest mode' information and that's it. It would be a far smaller patch. The tracepoint might even sample the guest RIP and hence could be used as a VM-exit profiler and 'perf record -e kvm:vm_exit + perf report' could be used to examine/profile/trace guest exit reasons. Ingo
Possibly Parallel Threads
- [PATCH 2/4] Introduce a new fields "gtime" and "cgtime" in task_struct and signal_struct
- [PATCH/RFC 2/4]Introduce a new field "guest" in task_struct
- [PATCH/RFC 2/4]Introduce a new field "guest" in task_struct
- [PATCH 0/4] Virtual Machine Time Accounting
- [PATCH 0/4] Virtual Machine Time Accounting