David Vrabel
2012-Oct-01 17:52 UTC
[PATCH 0 of 2] xenalyze: decode new hypercall trace records
This series allows xenalyze to decode the new PV_HYPERCALL_V2 and PV_HYPERCALL_SUBCALL trace records being proposed for Xen 4.3. David
David Vrabel
2012-Oct-01 17:52 UTC
[PATCH 1 of 2] xenalyze: decode PV_HYPERCALL_V2 records
Newer version of Xen produce TRC_PV_HYPERCALL_V2 records instead of the older TRC_PV_HYPERCALL format. This updated format doesn''t included the IP but it does include select hypercall arguments. Signed-off-by: David Vrabel <david.vrabel@citrix.com> diff --git a/pv.h b/pv.h new file mode 100644 --- /dev/null +++ b/pv.h @@ -0,0 +1,41 @@ +/* + * PV event decoding. + * + * Copyright (C) 2012 Citrix Systems R&D Ltd. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms and conditions of the GNU General Public License, + * version 2, as published by the Free Software Foundation. + */ +#ifndef __PV_H + +#include "analyze.h" +#include "trace.h" + +#ifdef __cplusplus +extern "C" { +#endif + +#define ARG_MISSING 0x0 +#define ARG_32BIT 0x1 +#define ARG_64BIT 0x2 + +#define MMU_UPDATE_PREEMPTED (~(~0U>>1)) + +static inline uint32_t pv_hypercall_op(const struct record_info *ri) +{ + return ri->d[0] & ~TRC_PV_HYPERCALL_V2_ARG_MASK; +} + +static inline int pv_hypercall_arg_present(const struct record_info *ri, int arg) +{ + return (ri->d[0] >> (20 + 2*arg)) & 0x3; +} + +uint64_t pv_hypercall_arg(const struct record_info *ri, int i); + +#ifdef __cplusplus +} /* extern "C" */ +#endif + +#endif diff --git a/xenalyze.c b/xenalyze.c --- a/xenalyze.c +++ b/xenalyze.c @@ -32,6 +32,7 @@ #include "trace.h" #include "analyze.h" #include "mread.h" +#include "pv.h" #include <errno.h> #include <strings.h> #include <string.h> @@ -1485,6 +1486,7 @@ enum { PV_GDT_LDT_MAPPING_FAULT, PV_PTWR_EMULATION, PV_PTWR_EMULATION_PAE, + PV_HYPERCALL_V2 = 13, PV_MAX }; @@ -1499,7 +1501,9 @@ char *pv_name[PV_MAX] = { [PV_PAGING_FIXUP]="paging fixup", [PV_GDT_LDT_MAPPING_FAULT]="gdt/ldt mapping fault", [PV_PTWR_EMULATION]="ptwr", - [PV_PTWR_EMULATION_PAE]="ptwr(pae)" + [PV_PTWR_EMULATION_PAE]="ptwr(pae)", + [PV_HYPERCALL_V2]="hypercall", + [PV_HYPERCALL_SUBCALL]="hypercall (subcall)", }; #define PV_HYPERCALL_MAX 56 @@ -6500,10 +6504,20 @@ void pv_summary(struct pv_data *pv) { printf("PV events:\n"); for(i=0; i<PV_MAX; i++) { - if(pv->count[i]) - printf(" %s %d\n", pv_name[i], pv->count[i]); + int count; + + count = pv->count[i]; + if (i == PV_HYPERCALL_V2) + count += pv->count[PV_HYPERCALL_SUBCALL]; + + if (count == 0) + continue; + + printf(" %s %d\n", pv_name[i], count); + switch(i) { case PV_HYPERCALL: + case PV_HYPERCALL_V2: for(j=0; j<PV_HYPERCALL_MAX; j++) { if(pv->hypercall_count[j]) printf(" %-29s[%2d]: %6d\n", @@ -6523,6 +6537,168 @@ void pv_summary(struct pv_data *pv) { } } +uint64_t pv_hypercall_arg(const struct record_info *ri, int arg) +{ + int i, word; + + for (i = 0, word = 1; i < 6 && word < ri->extra_words; i++) { + int present = pv_hypercall_arg_present(ri, i); + + /* Is this the argument we''re looking for? */ + if (i == arg) { + switch (present) { + case ARG_MISSING: + return 0; + case ARG_32BIT: + return ri->d[word]; + case ARG_64BIT: + return ((uint64_t)ri->d[word + 1] << 32) | ri->d[word]; + } + } + + /* Skip over any words for this argument. */ + word += present; + } + + return 0; +} + +static void pv_hypercall_print_args(const struct record_info *ri) +{ + int i, word; + + for (i = 0, word = 1; i < 6 && word < ri->extra_words; i++) { + int present = pv_hypercall_arg_present(ri, i); + + switch (present) { + case ARG_MISSING: + printf(" ??"); + break; + case ARG_32BIT: + printf(" %08x", ri->d[word]); + break; + case ARG_64BIT: + printf(" %016"PRIu64"", ((uint64_t)ri->d[word + 1] << 32) | ri->d[word]); + break; + } + + word += present; + } +} + +static const char *grant_table_op_cmd_to_str(uint32_t cmd) +{ + const char *cmd_str[] = { + "map_grant_ref", "unmap_grant_ref", "setup_table", "dump_table", + "transfer", "copy", "query_size", "unmap_and_replace", + "set_version", "get_status_frames", "get_version", "swap_grant_ref", + }; + static char buf[32]; + + if (cmd <= 11) + return cmd_str[cmd]; + + snprintf(buf, sizeof(buf), "unknown (%d)", cmd); + return buf; +} + +static const char *vcpu_op_cmd_to_str(uint32_t cmd) +{ + const char *cmd_str[] = { + "initialise", "up", "down", "is_up", "get_runstate_info", + "register_runstate_memory_area", "set_periodic_timer", + "stop_periodic_timer", "set_singleshot_timer", "stop_singleshot_timer", + "register_vcpu_info", "send_nmi", "get_physid", + "register_vcpu_time_memory_area", + }; + static char buf[32]; + + if (cmd <= 13) + return cmd_str[cmd]; + + snprintf(buf, sizeof(buf), "unknown (%d)", cmd); + return buf; +} + +static const char *sched_op_cmd_to_str(uint32_t cmd) +{ + const char *cmd_str[] = { + "yield", "block", "shutdown", "poll", "remote_shutdown", "shutdown_code", + "watchdog", + }; + static char buf[32]; + + if (cmd <= 6) + return cmd_str[cmd]; + + snprintf(buf, sizeof(buf), "unknown (%d)", cmd); + return buf; +} + +void pv_hypercall_v2_process(struct record_info *ri, struct pv_data *pv) +{ + int op = pv_hypercall_op(ri); + + if(opt.summary_info) { + if(op < PV_HYPERCALL_MAX) + pv->hypercall_count[op]++; + } + + if(opt.dump_all) { + if(op < HYPERCALL_MAX) + printf(" %s hypercall %2x (%s)", + ri->dump_header, op, hypercall_name[op]); + else + printf(" %s hypercall %2x", + ri->dump_header, op); + switch(op) { + case HYPERCALL_mmu_update: + { + uint32_t count = pv_hypercall_arg(ri, 1); + printf(" %d updates%s", count & ~MMU_UPDATE_PREEMPTED, + (count & MMU_UPDATE_PREEMPTED) ? " (preempted)" : ""); + } + break; + case HYPERCALL_multicall: + { + uint32_t calls = pv_hypercall_arg(ri, 1); + printf(" %d calls", calls); + } + break; + case HYPERCALL_grant_table_op: + { + uint32_t cmd = pv_hypercall_arg(ri, 0); + uint32_t count = pv_hypercall_arg(ri, 2); + printf(" %s %d ops", grant_table_op_cmd_to_str(cmd), count); + } + break; + case HYPERCALL_vcpu_op: + { + uint32_t cmd = pv_hypercall_arg(ri, 0); + uint32_t vcpuid = pv_hypercall_arg(ri, 1); + printf(" %s vcpu %d", vcpu_op_cmd_to_str(cmd), vcpuid); + } + break; + case HYPERCALL_mmuext_op: + { + uint32_t count = pv_hypercall_arg(ri, 1); + printf(" %d ops", count); + } + break; + case HYPERCALL_sched_op: + { + uint32_t cmd = pv_hypercall_arg(ri, 0); + printf(" %s", sched_op_cmd_to_str(cmd)); + } + break; + default: + pv_hypercall_print_args(ri); + break; + } + printf("\n"); + } +} + void pv_process(struct pcpu_info *p) { struct record_info *ri = &p->ri; @@ -6555,9 +6731,9 @@ void pv_process(struct pcpu_info *p) case PV_PTWR_EMULATION_PAE: pv_ptwr_emulation_process(ri, pv); break; - case PV_PAGE_FAULT: - //pv_pf_process(ri, pv); - //break; + case PV_HYPERCALL_V2: + pv_hypercall_v2_process(ri, pv); + break; default: pv_generic_process(ri, pv); break;
David Vrabel
2012-Oct-01 17:52 UTC
[PATCH 2 of 2] xenalyze: decode PV_HYPERCALL_SUBCALL events
Decode the PV_HYPERCALL_SUBCALL events which are used for calls within a multicall hypercall. They are indented so its easier to see which multicall they were part of. Signed-off-by: David Vrabel <david.vrabel@citrix.com> --- diff --git a/xenalyze.c b/xenalyze.c --- a/xenalyze.c +++ b/xenalyze.c @@ -1487,6 +1487,7 @@ enum { PV_PTWR_EMULATION, PV_PTWR_EMULATION_PAE, PV_HYPERCALL_V2 = 13, + PV_HYPERCALL_SUBCALL = 14, PV_MAX }; @@ -6635,7 +6636,8 @@ static const char *sched_op_cmd_to_str(u return buf; } -void pv_hypercall_v2_process(struct record_info *ri, struct pv_data *pv) +void pv_hypercall_v2_process(struct record_info *ri, struct pv_data *pv, + const char *indent) { int op = pv_hypercall_op(ri); @@ -6646,11 +6648,11 @@ void pv_hypercall_v2_process(struct reco if(opt.dump_all) { if(op < HYPERCALL_MAX) - printf(" %s hypercall %2x (%s)", - ri->dump_header, op, hypercall_name[op]); + printf(" %s%s hypercall %2x (%s)", + ri->dump_header, indent, op, hypercall_name[op]); else - printf(" %s hypercall %2x", - ri->dump_header, op); + printf(" %s%s hypercall %2x", + ri->dump_header, indent, op); switch(op) { case HYPERCALL_mmu_update: { @@ -6732,7 +6734,10 @@ void pv_process(struct pcpu_info *p) pv_ptwr_emulation_process(ri, pv); break; case PV_HYPERCALL_V2: - pv_hypercall_v2_process(ri, pv); + pv_hypercall_v2_process(ri, pv, ""); + break; + case PV_HYPERCALL_SUBCALL: + pv_hypercall_v2_process(ri, pv, " "); break; default: pv_generic_process(ri, pv);
George Dunlap
2012-Oct-03 10:33 UTC
Re: [PATCH 1 of 2] xenalyze: decode PV_HYPERCALL_V2 records
On Mon, Oct 1, 2012 at 6:52 PM, David Vrabel <david.vrabel@citrix.com> wrote:> Newer version of Xen produce TRC_PV_HYPERCALL_V2 records instead of > the older TRC_PV_HYPERCALL format. This updated format doesn''t > included the IP but it does include select hypercall arguments. > > Signed-off-by: David Vrabel <david.vrabel@citrix.com>Thanks -- I did a bunch of optimization work on xenalyze late last year, so I''m afraid I''m going to be pretty picky about some things to avoid losing that effort. :-)> diff --git a/pv.h b/pv.h > new file mode 100644 > --- /dev/null > +++ b/pv.h > @@ -0,0 +1,41 @@ > +/* > + * PV event decoding. > + * > + * Copyright (C) 2012 Citrix Systems R&D Ltd. > + * > + * This program is free software; you can redistribute it and/or modify it > + * under the terms and conditions of the GNU General Public License, > + * version 2, as published by the Free Software Foundation. > + */ > +#ifndef __PV_H > + > +#include "analyze.h" > +#include "trace.h" > + > +#ifdef __cplusplus > +extern "C" { > +#endif > + > +#define ARG_MISSING 0x0 > +#define ARG_32BIT 0x1 > +#define ARG_64BIT 0x2 > + > +#define MMU_UPDATE_PREEMPTED (~(~0U>>1)) > + > +static inline uint32_t pv_hypercall_op(const struct record_info *ri) > +{ > + return ri->d[0] & ~TRC_PV_HYPERCALL_V2_ARG_MASK; > +} > + > +static inline int pv_hypercall_arg_present(const struct record_info *ri, int arg) > +{ > + return (ri->d[0] >> (20 + 2*arg)) & 0x3; > +}Try to avoid an integer multiply here; (arg<<1) or (arg+arg) please.> @@ -6523,6 +6537,168 @@ void pv_summary(struct pv_data *pv) { > } > } > > +uint64_t pv_hypercall_arg(const struct record_info *ri, int arg) > +{ > + int i, word; > + > + for (i = 0, word = 1; i < 6 && word < ri->extra_words; i++) { > + int present = pv_hypercall_arg_present(ri, i); > + > + /* Is this the argument we''re looking for? */ > + if (i == arg) { > + switch (present) { > + case ARG_MISSING: > + return 0; > + case ARG_32BIT: > + return ri->d[word]; > + case ARG_64BIT: > + return ((uint64_t)ri->d[word + 1] << 32) | ri->d[word]; > + } > + } > + > + /* Skip over any words for this argument. */ > + word += present; > + }So at the moment, this is only called if you''re in dump mode, which is fairly slow already; but it may at some point be called in summary mode, which I''d like to keep pretty fast. This little loop will basically be O(n^2) with the number of recorded arguments. Since we''re expecting the hypercall arguments xenalyze asks for to match the arguments put into the trace, it seems like we should be able to do better. Would it make sense to do something like pass in an array of what arguments it expects to get, and then have the function double-check that they match properly, and then replaces the "expected argument number" with "traced value" (returning an error if they don''t match)? Alternately, since pv_hypercall_arg() returns 0 for a non-existent arg anyway, couldn''t you have an array, initialized to 0, and filled in as appropriate?> +static void pv_hypercall_print_args(const struct record_info *ri) > +{ > + int i, word; > + > + for (i = 0, word = 1; i < 6 && word < ri->extra_words; i++) { > + int present = pv_hypercall_arg_present(ri, i); > + > + switch (present) { > + case ARG_MISSING: > + printf(" ??"); > + break; > + case ARG_32BIT: > + printf(" %08x", ri->d[word]); > + break; > + case ARG_64BIT: > + printf(" %016"PRIu64"", ((uint64_t)ri->d[word + 1] << 32) | ri->d[word]); > + break; > + } > + > + word += present; > + } > +} > + > +static const char *grant_table_op_cmd_to_str(uint32_t cmd) > +{ > + const char *cmd_str[] = { > + "map_grant_ref", "unmap_grant_ref", "setup_table", "dump_table", > + "transfer", "copy", "query_size", "unmap_and_replace", > + "set_version", "get_status_frames", "get_version", "swap_grant_ref", > + }; > + static char buf[32]; > + > + if (cmd <= 11) > + return cmd_str[cmd]; > + > + snprintf(buf, sizeof(buf), "unknown (%d)", cmd);Just FYI, in my analysis, time spent in libc''s vprintf() takes up the vast majority of the time of xenalyze''s dump mode -- even just doing things like printing %s (which you would think would be more or less a strcpy). In this case (and the others in this patch) the sprintf() should be the uncommon case, so they''re fine. But just a heads-up that I''ll be picky about sprintfs() in hot paths. :-) Thanks, -George
George Dunlap
2012-Oct-03 10:35 UTC
Re: [PATCH 2 of 2] xenalyze: decode PV_HYPERCALL_SUBCALL events
On Mon, Oct 1, 2012 at 6:52 PM, David Vrabel <david.vrabel@citrix.com> wrote:> Decode the PV_HYPERCALL_SUBCALL events which are used for calls within > a multicall hypercall. They are indented so its easier to see which > multicall they were part of. > > Signed-off-by: David Vrabel <david.vrabel@citrix.com>Acked-by: George Dunlap <george.dunlap@eu.citrix.com>> --- > > diff --git a/xenalyze.c b/xenalyze.c > --- a/xenalyze.c > +++ b/xenalyze.c > @@ -1487,6 +1487,7 @@ enum { > PV_PTWR_EMULATION, > PV_PTWR_EMULATION_PAE, > PV_HYPERCALL_V2 = 13, > + PV_HYPERCALL_SUBCALL = 14, > PV_MAX > }; > > @@ -6635,7 +6636,8 @@ static const char *sched_op_cmd_to_str(u > return buf; > } > > -void pv_hypercall_v2_process(struct record_info *ri, struct pv_data *pv) > +void pv_hypercall_v2_process(struct record_info *ri, struct pv_data *pv, > + const char *indent) > { > int op = pv_hypercall_op(ri); > > @@ -6646,11 +6648,11 @@ void pv_hypercall_v2_process(struct reco > > if(opt.dump_all) { > if(op < HYPERCALL_MAX) > - printf(" %s hypercall %2x (%s)", > - ri->dump_header, op, hypercall_name[op]); > + printf(" %s%s hypercall %2x (%s)", > + ri->dump_header, indent, op, hypercall_name[op]); > else > - printf(" %s hypercall %2x", > - ri->dump_header, op); > + printf(" %s%s hypercall %2x", > + ri->dump_header, indent, op); > switch(op) { > case HYPERCALL_mmu_update: > { > @@ -6732,7 +6734,10 @@ void pv_process(struct pcpu_info *p) > pv_ptwr_emulation_process(ri, pv); > break; > case PV_HYPERCALL_V2: > - pv_hypercall_v2_process(ri, pv); > + pv_hypercall_v2_process(ri, pv, ""); > + break; > + case PV_HYPERCALL_SUBCALL: > + pv_hypercall_v2_process(ri, pv, " "); > break; > default: > pv_generic_process(ri, pv); > > _______________________________________________ > Xen-devel mailing list > Xen-devel@lists.xen.org > http://lists.xen.org/xen-devel
David Vrabel
2012-Oct-04 12:19 UTC
Re: [PATCH 1 of 2] xenalyze: decode PV_HYPERCALL_V2 records
On 03/10/12 11:33, George Dunlap wrote:> On Mon, Oct 1, 2012 at 6:52 PM, David Vrabel <david.vrabel@citrix.com> wrote: >> Newer version of Xen produce TRC_PV_HYPERCALL_V2 records instead of >> the older TRC_PV_HYPERCALL format. This updated format doesn''t >> included the IP but it does include select hypercall arguments. >> >> Signed-off-by: David Vrabel <david.vrabel@citrix.com> > > Thanks -- I did a bunch of optimization work on xenalyze late last > year, so I''m afraid I''m going to be pretty picky about some things to > avoid losing that effort. :-)>> +static inline int pv_hypercall_arg_present(const struct record_info *ri, int arg) >> +{ >> + return (ri->d[0] >> (20 + 2*arg)) & 0x3; >> +} > > Try to avoid an integer multiply here; (arg<<1) or (arg+arg) please.The compiler does this for me.>> @@ -6523,6 +6537,168 @@ void pv_summary(struct pv_data *pv) { >> } >> } >> >> +uint64_t pv_hypercall_arg(const struct record_info *ri, int arg) >> +{ >> + int i, word; >> + >> + for (i = 0, word = 1; i < 6 && word < ri->extra_words; i++) { >> + int present = pv_hypercall_arg_present(ri, i); >> + >> + /* Is this the argument we''re looking for? */ >> + if (i == arg) { >> + switch (present) { >> + case ARG_MISSING: >> + return 0; >> + case ARG_32BIT: >> + return ri->d[word]; >> + case ARG_64BIT: >> + return ((uint64_t)ri->d[word + 1] << 32) | ri->d[word]; >> + } >> + } >> + >> + /* Skip over any words for this argument. */ >> + word += present; >> + } > > > Alternately, since pv_hypercall_arg() returns 0 for a non-existent arg > anyway, couldn''t you have an array, initialized to 0, and filled in as > appropriate?I''ve done this. David