This is a series of optimizations to xenalyze. The optimizations in this series can speed up the summary-mode analysis of my 700MiB test file from over 100 seconds down to around 10 seconds.
George Dunlap
2012-Jan-26 17:20 UTC
[PATCH 1 of 8] xenalyze: Improve record-sorting algorithm
The old method of finding the next record was to search through all active pcpus to find the one with the lowest tsc value. This was simple but incredibly wasteful, especially as frequently the same cpu had several records to process in a row, and only a small subset of pcpus was active at any one time. This patch introduces a sorted list, s.t. the top of the list is always the next record to process. After that record is processed, the next record is found, and the record is then "bubbled down" to its appropriate place on the list. Signed-off-by: George Dunlap <george.dunlap@eu.citrix.com> diff -r d8c1c4df4535 -r f8c3c44be309 xenalyze.c --- a/xenalyze.c Thu Jan 12 14:27:56 2012 +0100 +++ b/xenalyze.c Thu Jan 26 17:11:34 2012 +0000 @@ -6522,7 +6522,7 @@ void shadow_process(struct pcpu_info *p) if(sevt.minor <= PF_XEN_LAST_FAULT) { shadow_fault_generic_process(ri, h); } else { - fprintf(warn, "Warning: processing shadow as generic\n"); + warn_once("Warning: processing shadow as generic\n"); process_generic(ri); } break; @@ -8558,6 +8558,10 @@ void base_process(struct pcpu_info *p) { /* Non-compat only */ +void record_order_insert(struct pcpu_info *new); +void record_order_remove(struct pcpu_info *rem); +void record_order_bubble(struct pcpu_info *last); + struct cpu_change_data { int cpu; unsigned window_size; @@ -8624,6 +8628,8 @@ loff_t scan_for_new_pcpu(loff_t offset) p->file_offset = offset; p->next_cpu_change_offset = offset; + record_order_insert(p); + offset += r + cd->window_size; sched_default_vcpu_activate(p); @@ -8675,6 +8681,9 @@ void deactivate_pcpu(struct pcpu_info *p lose_vcpu(p->current, p->last_tsc); } p->active = 0; + + record_order_remove(p); + if ( p->pid == P.max_active_pcpu ) { int i, max_active_pcpu = -1; @@ -8840,11 +8849,6 @@ void process_cpu_change(struct pcpu_info if(r->cpu > P.max_active_pcpu) P.max_active_pcpu = r->cpu; - fprintf(warn, "%s: Activating pcpu %d at offset %lld\n", - __func__, r->cpu, (unsigned long long)p->file_offset); - - sched_default_vcpu_activate(p2); - /* Taking this record as the first record should make everything * run swimmingly. */ p2->ri = *ri; @@ -8852,6 +8856,13 @@ void process_cpu_change(struct pcpu_info p2->ri.d = p2->ri.rec.u.notsc.data; p2->file_offset = p->file_offset; p2->next_cpu_change_offset = p->file_offset; + + fprintf(warn, "%s: Activating pcpu %d at offset %lld\n", + __func__, r->cpu, (unsigned long long)p->file_offset); + + record_order_insert(p2); + + sched_default_vcpu_activate(p2); } p->last_cpu_change_pid = r->cpu; @@ -8979,6 +8990,7 @@ int toplevel_assert_check(int toplevel, if ( ! (v->data_type == VCPU_DATA_NONE || v->data_type == mask.vcpu_data_mode) ) { + /* This may happen for track_dirty_vram, which causes a SHADOW_WRMAP_BF trace f/ dom0 */ fprintf(warn, "WARNING: Unexpected vcpu data type for d%dv%d on proc %d! Expected %d got %d. Not processing\n", v->d->did, v->vid, p->pid, mask.vcpu_data_mode, @@ -9317,31 +9329,102 @@ char * pcpu_string(int pcpu) return s; } +/* Null terminated */ +struct pcpu_info *record_order[MAX_CPUS+1] = { 0 }; + +/* In the case of identical tsc values, the old algorithm would favor the + * pcpu with the lowest number. By default the new algorithm favors the + * pcpu which has been processed most recently. + * + * I think the second way is better; but it''s good to be able to use the + * old ordering, at very lest to verify that there are no (other) ordering + * differences. Enabling the below flag will cause the insertion / bubble + * routines to order by pcpu id as well as tsc, preserving the old order. */ +//#define PRESERVE_PCPU_ORDERING + +/* Steady state: + * + Entire list is in order, except (potentially) for the first entry + * + last is pointing to the first entry. + */ +void record_order_bubble(struct pcpu_info *last) +{ + int i; + + /* Find the pcpu to "bubble". This is usually the + * first one, but if other pcpus have been activated, it may + * not be. */ + for(i=0; record_order[i] && record_order[i]!=last; i++); + + assert(record_order[i]); + + /* Now bubble it down */ + for( ; + record_order[i+1] + && ( record_order[i+1]->order_tsc < last->order_tsc +#ifdef PRESERVE_PCPU_ORDERING + || ( record_order[i+1]->order_tsc == last->order_tsc + && record_order[i+1]->pid < last->pid ) +#endif + ) ; + i++) + record_order[i]=record_order[i+1]; + record_order[i]=last; +} + +void record_order_insert(struct pcpu_info *new) +{ + int i; + struct pcpu_info *p=NULL, *t=NULL; + + /* Sanity check: Make sure it''s not already in there */ + for(i=0; record_order[i]; i++) + assert(record_order[i]!=new); + + /* Find where to insert it */ + for(i=0; + record_order[i] + && ( record_order[i]->order_tsc < new->order_tsc +#ifdef PRESERVE_PCPU_ORDERING + || ( record_order[i]->order_tsc == new->order_tsc + && record_order[i]->pid < new->pid ) +#endif + ) ; + i++) + ; + + /* And insert it */ + for( p=new; p ; i++) + { + t=record_order[i]; + record_order[i]=p; + p=t; + } +} + +void record_order_remove(struct pcpu_info *rem) +{ + int i; + + /* Find where the record is */ + for(i=0; record_order[i] && record_order[i]!=rem; i++) + ; + + /* Sanity check: Make sure it''s actually there! */ + assert(record_order[i]); + + /* And move everyone forward */ + for(; (record_order[i]=record_order[i+1]); i++) + ; +} + struct pcpu_info * choose_next_record(void) { - int i; - struct pcpu_info * p, *min_p=NULL; - loff_t min_offset = 0; - - /* Need to: - * - find the pcpu with the lowest order_tsc - * - Find the lowest file offset - */ - for(i=0; i<=P.max_active_pcpu; i++) - { - p = P.pcpu+i; - if(!p->active) - continue; - - if(!min_p || p->order_tsc < min_p->order_tsc) - min_p = p; - - if(!min_offset || p->file_offset < min_offset) - min_offset = p->file_offset; - } - - if(opt.progress && min_offset >= G.progress.update_offset) - progress_update(min_offset); + struct pcpu_info *min_p=NULL; + + min_p=record_order[0]; + + if(opt.progress && min_p && min_p->file_offset >= G.progress.update_offset) + progress_update(min_p->file_offset); /* If there are active pcpus, make sure we chose one */ assert(min_p || (P.max_active_pcpu==-1)); @@ -9372,6 +9455,9 @@ void process_records(void) { else read_record(G.fd, p); + /* Update this pcpu in the processing order */ + if ( p->active ) + record_order_bubble(p); } }
George Dunlap
2012-Jan-26 17:20 UTC
[PATCH 2 of 8] xenalyze: Remove spurious dump_header construction
The h->dump_header information was being filled out on summary runs, even though it''s not used. Use it only if opt.dump_all is enabled. Signed-off-by: George Dunlap <george.dunlap@eu.citrix.com> diff -r f8c3c44be309 -r 2ab3da778828 xenalyze.c --- a/xenalyze.c Thu Jan 26 17:11:34 2012 +0000 +++ b/xenalyze.c Thu Jan 26 17:16:32 2012 +0000 @@ -5327,7 +5327,7 @@ void hvm_vmexit_process(struct record_in h->post_process = hvm_generic_postprocess; h->inflight.generic.event = 0; - { + if ( opt.dump_all) { struct time_struct t; char * c; int len, r;
Remove a vestigal option that hasn''t been used or maintained in years. Signed-off-by: George Dunlap <george.dunlap@eu.citrix.com> diff -r 2ab3da778828 -r 6772e9e46ab2 xenalyze.c --- a/xenalyze.c Thu Jan 26 17:16:32 2012 +0000 +++ b/xenalyze.c Thu Jan 26 17:16:35 2012 +0000 @@ -156,7 +156,6 @@ struct { scatterplot_irq:1, histogram_interrupt_eip:1, interval_mode:1, - dump_cooked:1, dump_all:1, dump_raw_process:1, dump_raw_reads:1, @@ -231,7 +230,6 @@ struct { .scatterplot_rdtsc=0, .scatterplot_irq=0, .histogram_interrupt_eip=0, - .dump_cooked = 0, .dump_all = 0, .dump_raw_process = 0, .dump_raw_reads = 0, @@ -3425,30 +3423,6 @@ void hvm_pf_xen_postprocess(struct hvm_d /* Set summary handler */ hvm_set_summary_handler(h, hvm_pf_xen_summary, NULL); } - - if(opt.dump_cooked) - { - switch(e->pf_case) - { - case PF_XEN_EMULATE: - printf(" %s pf_xen:emulate va %llx ec %x eip %llx%s lvl %d corr %llx\n", - h->dump_header, e->va, e->error_code, - h->rip, find_symbol(h->rip), - e->pt_level, e->corresponding_va); - break; - case PF_XEN_MMIO: - printf(" %s pf_xen:mmio va %llx ec %x eip %llx%s data %x\n", - h->dump_header, e->va, e->error_code, - h->rip, find_symbol(h->rip), - e->data); - break; - default: - printf(" %s pf_xen va %llx ec %x eip %llx%s\n", - h->dump_header, e->va, e->error_code, - h->rip, find_symbol(h->rip)); - break; - } - } } void hvm_pf_xen_process(struct record_info *ri, struct hvm_data *h) { @@ -3610,7 +3584,7 @@ void hvm_vlapic_icr_handler(struct hvm_d o->count++; - if((opt.dump_all || opt.dump_cooked) + if((opt.dump_all) #if 0 && (ov->runstate.state != RUNSTATE_RUNNING || ov->hvm.vmexit_valid) @@ -3624,7 +3598,7 @@ void hvm_vlapic_icr_handler(struct hvm_d } if(m->is_write) { - if(opt.dump_all || opt.dump_cooked) { + if(opt.dump_all) { printf(" [vla] d%dv%d icr vec %d %s\n", h->v->d->did, h->v->vid, icr.vec, @@ -3655,7 +3629,7 @@ void hvm_vlapic_icr_handler(struct hvm_d } } else { /* Read */ - if(opt.dump_all || opt.dump_cooked) { + if(opt.dump_all) { printf(" [vla] d%dv%d icr status %s\n", h->v->d->did, h->v->vid, icr.delivery_status?"pending":"idle"); @@ -3683,7 +3657,7 @@ void hvm_vlapic_inject(struct vcpu_data } void hvm_vlapic_eoi_handler(struct hvm_data *h) { - if(opt.dump_all || opt.dump_cooked) + if(opt.dump_all) printf(" [vla] d%dv%d eoi\n", h->v->d->did, h->v->vid); } @@ -3834,7 +3808,7 @@ void hvm_inj_virq_process(struct record_ int vector, fake; } *r = (typeof(r))h->d; - if(opt.dump_cooked | opt.dump_all) { + if(opt.dump_all) { printf(" %s inj_virq vec %u %s\n", ri->dump_header, r->vector, r->fake?"fake":"real"); @@ -3956,24 +3930,12 @@ void hvm_io_address_summary(struct io_ad void hvm_io_write_postprocess(struct hvm_data *h) { - if(opt.dump_cooked) - { - printf(" %s io_write port %x val %x\n", - h->dump_header, h->inflight.io.port, - h->inflight.io.val); - } if(opt.with_pio_enumeration) update_io_address(&h->summary.io.pio, h->inflight.io.port, 1, h->arc_cycles, 0); } void hvm_io_read_postprocess(struct hvm_data *h) { - if(opt.dump_cooked) - { - printf(" %s io_read port %x val %x\n", - h->dump_header, h->inflight.io.port, - h->inflight.io.val); - } if(opt.with_pio_enumeration) update_io_address(&h->summary.io.pio, h->inflight.io.port, 0, h->arc_cycles, 0); if(opt.scatterplot_io && h->inflight.io.port == opt.scatterplot_io_port) @@ -4275,24 +4237,6 @@ void hvm_cr_write_postprocess(struct hvm } flush=1; } - - if(opt.dump_cooked) - { - printf(" %s cr_write cr3 val %llx oval %llx (%d resyncs) %s\n", - h->dump_header, - new_val, - oval, - h->resyncs, - flush?"flush":""); - } - } else { - if(opt.dump_cooked) - { - printf(" %s cr_write cr3 val %llx (%d resyncs)\n", - h->dump_header, - h->inflight.cr_write.val, - h->resyncs); - } } if(opt.summary_info) { @@ -4313,14 +4257,6 @@ void hvm_cr_write_postprocess(struct hvm if(!flush) cr3_switch(new_val, h); } else { - if(opt.dump_cooked) - { - printf(" %s cr_write cr%d val %llx\n", - h->dump_header, - h->inflight.cr_write.cr, - h->inflight.cr_write.val); - } - if(opt.summary_info) { if(h->inflight.cr_write.cr < CR_MAX) @@ -4420,14 +4356,6 @@ void hvm_msr_write_summary(struct hvm_da void hvm_msr_write_postprocess(struct hvm_data *h) { - if(opt.dump_cooked) - { - printf(" %s msr_write msr %d val %llx\n", - h->dump_header, - h->inflight.msr.addr, - h->inflight.msr.val); - } - if(opt.summary_info) { } @@ -4466,14 +4394,6 @@ void hvm_msr_read_summary(struct hvm_dat void hvm_msr_read_postprocess(struct hvm_data *h) { - if(opt.dump_cooked) - { - printf(" %s msr_read msr %d val %llx\n", - h->dump_header, - h->inflight.msr.addr, - h->inflight.msr.val); - } - if(opt.summary_info) { } @@ -4564,7 +4484,7 @@ void hvm_inj_exc_process(struct record_i unsigned vec, ec; } *r = (typeof(r))h->d; - if ( opt.dump_cooked || opt.dump_all ) + if ( opt.dump_all ) { if(r->vec < HVM_TRAP_MAX) printf(" %3u.%09u %s inj_exc trap %s ec %x\n", @@ -4620,7 +4540,7 @@ void hvm_intr_process(struct hvm_data *h h->inflight.intr.vec = vec; - if ( opt.dump_cooked || opt.dump_all ) + if ( opt.dump_all ) { if ( vec < EXTERNAL_INTERRUPT_MAX && hvm_extint_vector_name[vec] ) @@ -4742,7 +4662,7 @@ void hvm_pf_inject_process(struct record ec = r->x32.ec; } - if ( opt.dump_cooked || opt.dump_all ) + if ( opt.dump_all ) { printf(" %3u.%09u %s pf_inject%s guest_cr2 %llx guest_ec %x\n", ri->t.s, ri->t.ns, pcpu_string(ri->cpu), @@ -4938,7 +4858,7 @@ void hvm_handler_process(struct record_i hvm_pf_inject_process(ri, h); break; case TRC_HVM_REINJ_VIRQ: - if ( opt.dump_cooked || opt.dump_all ) + if ( opt.dump_all ) { printf(" %3u.%09u %s inj_virq vec %u\n", ri->t.s, ri->t.ns, pcpu_string(ri->cpu), @@ -4964,7 +4884,7 @@ void hvm_handler_process(struct record_i } else if(opt.with_cr3_enumeration) { fprintf(warn, "Warning: destroy_proc: don''t know current cr3\n"); } - if ( opt.dump_cooked || opt.dump_all ) + if ( opt.dump_all ) { printf(" %3u.%09u %s destroy_proc cur_cr3 %llx\n", ri->t.s, ri->t.ns, pcpu_string(ri->cpu), h->v->cr3.val); @@ -5681,18 +5601,6 @@ void shadow_emulate_postprocess(struct h hvm_update_short_summary(h, HVM_SHORT_SUMMARY_EMULATE); } - if ( opt.dump_cooked ) - { - printf(" %s emulate va %llx eip %llx%s lvl %d/%d corr %llx wval %llx flags %s\n", - h->dump_header, e->va, - h->rip, find_symbol(h->rip), - e->pt_level, h->v->guest_paging_levels, - e->corresponding_va, - e->wval, - flag_string(e)); - } - - if(opt.scatterplot_unpin_promote) { if(e->flag_early_unshadow) scatterplot_vs_time(h->exit_tsc, -10); @@ -5906,14 +5814,6 @@ void shadow_unsync_postprocess(struct hv update_summary(&h->summary.pf_xen_unsync[h->resyncs], h->arc_cycles); } - - if(opt.dump_cooked) - { - printf(" %s unsync gfn %llx %s (%d resyncs)\n", - h->dump_header, e->gfn, - h->resyncs?"(resync)":"", - h->resyncs); - } } @@ -6013,17 +5913,6 @@ void shadow_fixup_postprocess(struct hvm hvm_update_short_summary(h, HVM_SHORT_SUMMARY_FIXUP); } - if ( opt.dump_cooked ) - { - printf(" %s fixup%s va %llx eip %llx%s gl1e %llx flags %s\n", - h->dump_header, - e->flag_unsync?":unsync":"", - e->va, - h->rip, find_symbol(h->rip), - e->gl1e, - flag_string(e)); - } - if(opt.scatterplot_unpin_promote) { if(h->prealloc_unpin) scatterplot_vs_time(h->exit_tsc, 0); @@ -6133,8 +6022,6 @@ void shadow_fixup_process(struct record_ void shadow_mmio_postprocess(struct hvm_data *h) { struct pf_xen_extra *e = &h->inflight.pf_xen; - struct mmio_info *m = &h->inflight.mmio; - if ( opt.summary_info ) { if(e->pf_case) @@ -6148,28 +6035,6 @@ void shadow_mmio_postprocess(struct hvm_ if(opt.with_mmio_enumeration) enumerate_mmio(h); - - if ( opt.dump_cooked ) - { - if(m->data_valid) - printf(" %s %smmio %s va %llx eip %llx%s gpa %llx data %x\n", - h->dump_header, - (e->pf_case==PF_XEN_FAST_MMIO)?"fast ":"", - m->is_write?"write":"read", - e->va, - h->rip, find_symbol(h->rip), - m->gpa, - m->data); - else - printf(" %s %smmio %s va %llx eip %llx%s gpa %llx (no data)\n", - h->dump_header, - (e->pf_case==PF_XEN_FAST_MMIO)?"fast ":"", - m->is_write?"write":"read", - m->va, - h->rip, find_symbol(h->rip), - m->gpa); - } - } void shadow_mmio_process(struct record_info *ri, struct hvm_data *h) @@ -6403,7 +6268,7 @@ void shadow_resync_process(struct record unsigned long long gfn; } *r = (typeof(r))ri->d; - if(opt.dump_all || opt.dump_cooked) + if(opt.dump_all) printf(" %s oos resync %s gfn %llx\n", ri->dump_header, (ri->event == TRC_SHADOW_RESYNC_FULL)?"full":"only", @@ -6417,7 +6282,7 @@ void shadow_prealloc_unpin_process(struc unsigned long long gfn; } *r = (typeof(r))ri->d; - if(opt.dump_all || opt.dump_cooked) + if(opt.dump_all) printf(" %s prealloc-unpin gfn %llx\n", ri->dump_header, r->gfn); @@ -6435,7 +6300,7 @@ void shadow_wrmap_bf_process(struct reco unsigned long long gfn; } *r = (typeof(r))ri->d; - if(opt.dump_all || opt.dump_cooked) + if(opt.dump_all) printf(" %s wrmap-bf gfn %llx\n", ri->dump_header, r->gfn); @@ -6569,7 +6434,7 @@ void pv_hypercall_process(struct record_ pv->hypercall_count[eax]++; } - if(opt.dump_cooked || opt.dump_all) { + if(opt.dump_all) { if(eax < HYPERCALL_MAX) printf(" %s hypercall %2x (%s) eip %llx\n", ri->dump_header, eax, @@ -6616,7 +6481,7 @@ void pv_trap_process(struct record_info pv->trap_count[trapnr]++; } - if(opt.dump_cooked || opt.dump_all) { + if(opt.dump_all) { printf(" %s trap %x eip %llx", ri->dump_header, trapnr, eip); if(use_ec) @@ -6673,7 +6538,7 @@ void pv_ptwr_emulation_process(struct re return; } - if ( opt.dump_cooked || opt.dump_all ) + if ( opt.dump_all ) { printf(" %s ptwr l1e %llx eip %llx addr %llx\n", ri->dump_header, @@ -6711,7 +6576,7 @@ void pv_update_va_mapping_process(struct e.flags = r->x32.flags; } - if ( opt.dump_cooked || opt.dump_all ) + if ( opt.dump_all ) { printf(" %s update_va_mapping l1e %llx va %llx flags %llx\n", ri->dump_header, @@ -6721,7 +6586,7 @@ void pv_update_va_mapping_process(struct void pv_generic_process(struct record_info *ri, struct pv_data *pv) { union pv_event pevt = { .event = ri->event }; - if ( opt.dump_cooked || opt.dump_all ) { + if ( opt.dump_all ) { if(pevt.minor < PV_MAX && pv_name[pevt.minor]) printf(" %s %s", ri->dump_header, @@ -7145,7 +7010,7 @@ void sched_runstate_process(struct pcpu_ perfctrs = (ri->extra_words == 5); - if(opt.dump_cooked || opt.dump_all) { + if(opt.dump_all) { if( perfctrs ) { printf(" %s %s {%lld,%lld} d%uv%u %s->%s\n", ri->dump_header, @@ -7199,9 +7064,6 @@ void sched_runstate_process(struct pcpu_ if(sevt.new_runstate == RUNSTATE_RUNNABLE && v->data_type == VCPU_DATA_HVM && v->hvm.vmexit_valid) { - if(opt.dump_cooked) - printf("%s: d%dv%d changing to state runnable, closing vmexit\n", - __func__, r->dom, r->vcpu); hvm_close_vmexit(&v->hvm, ri->tsc); } @@ -7408,7 +7270,7 @@ update: cpi = ((double)run_cycles) / run_instr; - if(opt.dump_cooked || opt.dump_all) { + if(opt.dump_all) { printf(" cpi: %2.2lf ( %lld / %lld )\n", cpi, run_cycles, run_instr); } @@ -7485,7 +7347,7 @@ void sched_switch_process(struct pcpu_in unsigned int prev_dom, prev_vcpu, next_dom, next_vcpu; } * r = (typeof(r))ri->d; - if(opt.dump_cooked || opt.dump_all) + if(opt.dump_all) printf("%s sched_switch prev d%uv%u next d%uv%u\n", ri->dump_header, r->prev_dom, r->prev_vcpu, @@ -7786,7 +7648,7 @@ void mem_process(struct pcpu_info *p) { mem_pod_superpage_splinter_process(p); break; default: - if(opt.dump_all || opt.dump_cooked) { + if(opt.dump_all) { dump_generic(stdout, ri); } @@ -7848,7 +7710,7 @@ void pm_process(struct pcpu_info *p) { pcpu_string_draw(p); break; default: - if(opt.dump_all || opt.dump_cooked) { + if(opt.dump_all) { dump_generic(stdout, ri); } break; @@ -8209,7 +8071,7 @@ void irq_process(struct pcpu_info *p) { case TRC_HW_IRQ_CLEAR_VECTOR: case TRC_HW_IRQ_MOVE_FINISH : default: - if(opt.dump_all || opt.dump_cooked) { + if(opt.dump_all) { dump_generic(stdout, ri); } break; @@ -8311,7 +8173,7 @@ void process_generic(struct record_info error(ERR_STRICT, ri); - if(opt.dump_cooked || opt.dump_all) { + if(opt.dump_all) { dump_generic(stdout, ri); } } @@ -8384,7 +8246,7 @@ void process_lost_records(struct pcpu_in first_tsc = r->first_tsc; - if(opt.dump_cooked || opt.dump_all) + if(opt.dump_all) { if(p->current) printf(" %s lost_records count %d d%uv%u (cur d%dv%d) first_tsc %lld\n", @@ -8490,7 +8352,7 @@ void process_lost_records_end(struct pcp * Update the information. */ if(ri->tsc > p->lost_record.tsc) { - if(opt.dump_cooked || opt.dump_all) + if(opt.dump_all) printf(" %s lost_records end ---\n", pcpu_string(p->pid)); @@ -8500,7 +8362,7 @@ void process_lost_records_end(struct pcp int did = p->lost_record.did, vid = p->lost_record.vid; - if(opt.dump_cooked || opt.dump_all) + if(opt.dump_all) printf(" %s lost_records end d%dv%d---\n", pcpu_string(p->pid), did, vid); @@ -8523,7 +8385,7 @@ void process_lost_records_end(struct pcp p->lost_record.seen_valid_schedule=0; /* Let next vcpu_next_update know that this one was inferred */ } else { - if(opt.dump_cooked || opt.dump_all) + if(opt.dump_all) printf(" %s lost_records end (domain invalid)---\n", pcpu_string(p->pid)); } @@ -9026,7 +8888,7 @@ void process_record(struct pcpu_info *p) process_record_tsc(p->order_tsc, ri); - if(opt.dump_cooked || opt.dump_all) + if(opt.dump_all) create_dump_header(ri, p); @@ -9592,7 +9454,6 @@ void init_pcpus(void) { enum { OPT_NULL=0, /* Dumping info */ - OPT_DUMP_COOKED, OPT_DUMP_RAW_READS, OPT_DUMP_RAW_PROCESS, OPT_DUMP_NO_PROCESSING, @@ -9765,10 +9626,6 @@ error_t cmd_parser(int key, char *arg, s switch (key) { /* Dump group */ - case OPT_DUMP_COOKED: - opt.dump_cooked = 1; - G.output_defined = 1; - break; case OPT_DUMP_ALL: opt.dump_all = 1; G.output_defined = 1; @@ -10157,11 +10014,6 @@ error_t cmd_parser(int key, char *arg, s const struct argp_option cmd_opts[] = { /* Dump group */ - { .name = "dump-cooked", - .key = OPT_DUMP_COOKED, - .group = OPT_GROUP_DUMP, - .doc = "Dump a sanitized summary of vmexit/vmentry.", }, - { .name = "dump-all", .key = OPT_DUMP_ALL, .group = OPT_GROUP_DUMP, @@ -10453,7 +10305,7 @@ int main(int argc, char *argv[]) { if (G.symbol_file != NULL) parse_symbol_file(G.symbol_file); - if(opt.dump_cooked || opt.dump_all) + if(opt.dump_all) warn = stdout; init_pcpus();
George Dunlap
2012-Jan-26 17:21 UTC
[PATCH 4 of 8] xenalyze: Enable -O2 optimization level
-O3 is marginally better, but the automatic inlining obscures where the guest is spending its time. Signed-off-by: George Dunlap <george.dunlap@eu.citrix.com> diff -r 6772e9e46ab2 -r 0663e3e8f69d Makefile --- a/Makefile Thu Jan 26 17:16:35 2012 +0000 +++ b/Makefile Thu Jan 26 17:16:59 2012 +0000 @@ -1,6 +1,6 @@ CC = gcc -CFLAGS += -g +CFLAGS += -g -O2 CFLAGS += -fno-strict-aliasing CFLAGS += -std=gnu99 CFLAGS += -Wall -Wstrict-prototypes
George Dunlap
2012-Jan-26 17:21 UTC
[PATCH 5 of 8] xenalyze: Rework math to remove two 64-bit divisions
abs_cycles_to_time is called on every record for dump mode; it has four 64-bit divisions (well, 3 divisions and 1 mod), which map to library functions on a 32-bit platform. A simple rework of the math can eliminate two of those. Signed-off-by: George Dunlap <george.dunlap@eu.citrix.com> diff -r 0663e3e8f69d -r 4b3639bd3255 xenalyze.c --- a/xenalyze.c Thu Jan 26 17:16:59 2012 +0000 +++ b/xenalyze.c Thu Jan 26 17:17:19 2012 +0000 @@ -1976,16 +1976,19 @@ void cpumask_union(cpu_mask_t *d, const /* -- Time code -- */ void cycles_to_time(unsigned long long c, struct time_struct *t) { - t->time = ((c) * 1000) / (opt.cpu_hz / 1000000 ); - t->s = t->time / 1000000000; - t->ns = t->time % 1000000000; + t->time = ((c - P.f.first_tsc) * 1000 * 1000000) / opt.cpu_hz; + t->s = t->time / 1000000000; + t->ns = t->time - (t->s * 1000000000); } void abs_cycles_to_time(unsigned long long ac, struct time_struct *t) { if(ac > P.f.first_tsc) { - t->time = ((ac - P.f.first_tsc) * 1000) / (opt.cpu_hz / 1000000 ); - t->s = t->time / 1000000000; - t->ns = t->time % 1000000000; + /* t->time = ((ac - P.f.first_tsc) * 1000) / (opt.cpu_hz / 1000000 ); */ + /* t->s = t->time / 1000000000; */ + /* t->ns = t->time % 1000000000; */ + t->time = ((ac - P.f.first_tsc) * 1000 * 1000000) / opt.cpu_hz; + t->s = t->time / 1000000000; + t->ns = t->time - (t->s * 1000000000); } else { t->time = t->s = t->ns = 0; }
George Dunlap
2012-Jan-26 17:21 UTC
[PATCH 6 of 8] xenalyze: Eliminate unnecessary cycles_to_time calculation
The time elements are only really accessed in dump mode, but they cost a significant amount to calculate per record. Only calculate them if we''re in dump mode. Signed-off-by: George Dunlap <george.dunlap@eu.citrix.com> diff -r 4b3639bd3255 -r 108d02354403 xenalyze.c --- a/xenalyze.c Thu Jan 26 17:17:19 2012 +0000 +++ b/xenalyze.c Thu Jan 26 17:17:53 2012 +0000 @@ -8601,9 +8601,6 @@ void process_record_tsc(tsc_t order_tsc, P.now = tsc; } - - /* Convert to s/ns once for convenience */ - abs_cycles_to_time(ri->tsc, &ri->t); } /* Standardized part of dump output */ @@ -8615,6 +8612,8 @@ void create_dump_header(struct record_in len = DUMP_HEADER_MAX; c = ri->dump_header; + abs_cycles_to_time(ri->tsc, &ri->t); + if ( ri->t.time ) { r=snprintf(c, len, "%3u.%09u", ri->t.s, ri->t.ns);
George Dunlap
2012-Jan-26 17:21 UTC
[PATCH 7 of 8] xenalyze: Introduce more efficient read mechanism
The pread functionality was convenient for reading at arbitrary offsets within a file, but it was incredibly inefficient; after recent optimizations, a summary analysis was spending 30% of its time doing system calls, and indications were that a big chunk of the rest of the overhead was due to cache misses coming out of that path. This patch introduces a read using mmap. Because the file may be too big to map on 32-bit systems, I use a "mapcache" of 8 different 2MiB buffers. If an offset is in what''s already mapped, I just copy it; if not, I unmap one of the buffers and map it instead. This optimization alone took the summary processing time for my test trace from 34s down to 10s. Signed-off-by: George Dunlap <george.dunlap@eu.citrix.com> diff -r 108d02354403 -r f86ebfe66384 Makefile --- a/Makefile Thu Jan 26 17:17:53 2012 +0000 +++ b/Makefile Thu Jan 26 17:18:13 2012 +0000 @@ -13,7 +13,7 @@ CFLAGS += -Werror BIN = xenalyze dump-raw -HDRS = trace.h analyze.h +HDRS = trace.h analyze.h mread.h all: $(BIN) @@ -23,3 +23,6 @@ clean: %: %.c $(HDRS) Makefile $(CC) $(CFLAGS) -o $@ $< + +xenalyze: xenalyze.o mread.o + $(CC) $(CFLAGS) -o $@ $^ diff -r 108d02354403 -r f86ebfe66384 mread.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/mread.c Thu Jan 26 17:18:13 2012 +0000 @@ -0,0 +1,160 @@ +#include <stdio.h> +#include <stdlib.h> +#include <strings.h> +#include <sys/types.h> +#include <sys/stat.h> +#include <sys/mman.h> +#include <errno.h> +#include "mread.h" + +mread_handle_t mread_init(int fd) +{ + struct stat64 s; + mread_handle_t h; + + h=malloc(sizeof(struct mread_ctrl)); + + if (!h) + { + perror("malloc"); + exit(1); + } + + bzero(h, sizeof(struct mread_ctrl)); + + h->fd = fd; + + fstat64(fd, &s); + h->file_size = s.st_size; + + return h; +} + +ssize_t mread64(mread_handle_t h, void *rec, ssize_t len, loff_t offset) +{ + /* Idea: have a "cache" of N mmaped regions. If the offset is + * in one of the regions, just copy it. If not, evict one of the + * regions and map the appropriate range. + * + * Basic algorithm: + * - See if the offset is in one of the regions + * - If not, map it + * - evict an old region + * - map the new region + * - Copy + */ + char * b=NULL; + int bind=-1; + loff_t boffset=0; + ssize_t bsize; + +#define dprintf(x...) +//#define dprintf fprintf + + dprintf(warn, "%s: offset %llx len %d\n", __func__, + offset, len); + if ( offset > h->file_size ) + { + dprintf(warn, " offset > file size %llx, returning 0\n", + h->file_size); + return 0; + } + if ( offset + len > h->file_size ) + { + dprintf(warn, " offset+len > file size %llx, truncating\n", + h->file_size); + len = h->file_size - offset; + } + + /* Try to find the offset in our range */ + dprintf(warn, " Trying last, %d\n", last); + if ( h->map[h->last].buffer + && (offset & MREAD_BUF_MASK) == h->map[h->last].start_offset ) + { + bind=h->last; + goto copy; + } + + /* Scan to see if it''s anywhere else */ + dprintf(warn, " Scanning\n"); + for(bind=0; bind<MREAD_MAPS; bind++) + if ( h->map[bind].buffer + && (offset & MREAD_BUF_MASK) == h->map[bind].start_offset ) + { + dprintf(warn, " Found, index %d\n", bind); + break; + } + + /* If we didn''t find it, evict someone and map it */ + if ( bind == MREAD_MAPS ) + { + dprintf(warn, " Clock\n"); + while(1) + { + h->clock++; + if(h->clock >= MREAD_MAPS) + h->clock=0; + dprintf(warn, " %d\n", h->clock); + if(h->map[h->clock].buffer == NULL) + { + dprintf(warn, " Buffer null, using\n"); + break; + } + if(!h->map[h->clock].accessed) + { + dprintf(warn, " Not accessed, using\n"); + break; + } + h->map[h->clock].accessed=0; + } + if(h->map[h->clock].buffer) + { + dprintf(warn, " Unmapping\n"); + munmap(h->map[h->clock].buffer, MREAD_BUF_SIZE); + } + /* FIXME: Try MAP_HUGETLB? */ + /* FIXME: Make sure this works on large files... */ + h->map[h->clock].start_offset = offset & MREAD_BUF_MASK; + dprintf(warn, " Mapping %llx from offset %llx\n", + MREAD_BUF_SIZE, h->map[h->clock].start_offset); + h->map[h->clock].buffer = mmap(NULL, MREAD_BUF_SIZE, PROT_READ, + MAP_SHARED, + h->fd, + h->map[h->clock].start_offset); + dprintf(warn, " mmap returned %p\n", h->map[h->clock].buffer); + if ( h->map[h->clock].buffer == MAP_FAILED ) + { + h->map[h->clock].buffer = NULL; + perror("mmap"); + exit(1); + } + bind = h->clock; + } + + h->last=bind; +copy: + h->map[bind].accessed=1; + b=h->map[bind].buffer; + boffset=offset - h->map[bind].start_offset; + if ( boffset + len > MREAD_BUF_SIZE ) + bsize = MREAD_BUF_SIZE - boffset; + else + bsize = len; + dprintf(warn, " Using index %d, buffer at %p, buffer offset %llx len %d\n", + bind, b, boffset, bsize); + + bcopy(b+boffset, rec, len); + + /* Handle the boundary case; make sure this is after doing anything + * with the static variables*/ + if ( len > bsize ) + { + dprintf(warn, " Finishing up by reading l %d o %llx\n", + len-bsize, offset+bsize); + mread64(h, rec+bsize, len-bsize, offset+bsize); + } + + /* FIXME: ?? */ + return len; +#undef dprintf +} diff -r 108d02354403 -r f86ebfe66384 mread.h --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/mread.h Thu Jan 26 17:18:13 2012 +0000 @@ -0,0 +1,18 @@ +#define MREAD_MAPS 8 +#define MREAD_BUF_SHIFT 9 +#define PAGE_SHIFT 12 +#define MREAD_BUF_SIZE (1ULL<<(PAGE_SHIFT+MREAD_BUF_SHIFT)) +#define MREAD_BUF_MASK (~(MREAD_BUF_SIZE-1)) +typedef struct mread_ctrl { + int fd; + loff_t file_size; + struct mread_buffer { + char * buffer; + loff_t start_offset; + int accessed; + } map[MREAD_MAPS]; + int clock, last; +} *mread_handle_t; + +mread_handle_t mread_init(int fd); +ssize_t mread64(mread_handle_t h, void *dst, ssize_t len, loff_t offset); diff -r 108d02354403 -r f86ebfe66384 xenalyze.c --- a/xenalyze.c Thu Jan 26 17:17:53 2012 +0000 +++ b/xenalyze.c Thu Jan 26 17:18:13 2012 +0000 @@ -31,11 +31,15 @@ #include <unistd.h> #include "trace.h" #include "analyze.h" +#include "mread.h" #include <errno.h> #include <strings.h> #include <string.h> #include <assert.h> +struct mread_ctrl; + + typedef unsigned long long tsc_t; #define DEFAULT_CPU_HZ 2400000000LL @@ -60,6 +64,7 @@ struct array_struct { /* -- Global variables -- */ struct { int fd; + struct mread_ctrl *mh; struct symbol_struct * symbols; char * symbol_file; char * trace_file; @@ -257,18 +262,23 @@ struct { /* -- on-disk trace buffer definitions -- */ struct trace_record { - unsigned event:28, - extra_words:3, - cycle_flag:1; union { struct { - uint32_t tsc_lo, tsc_hi; - uint32_t data[7]; - } tsc; - struct { - uint32_t data[7]; - } notsc; - } u; + unsigned event:28, + extra_words:3, + cycle_flag:1; + union { + struct { + uint32_t tsc_lo, tsc_hi; + uint32_t data[7]; + } tsc; + struct { + uint32_t data[7]; + } notsc; + } u; + }; + uint32_t raw[8]; + }; }; FILE *warn = NULL; @@ -1943,7 +1953,7 @@ char * pcpu_string(int pcpu); void pcpu_string_draw(struct pcpu_info *p); void process_generic(struct record_info *ri); void dump_generic(FILE *f, struct record_info *ri); -ssize_t __read_record(int fd, struct trace_record *rec, loff_t offset); +ssize_t __read_record(struct trace_record *rec, loff_t offset); void error(enum error_level l, struct record_info *ri); void update_io_address(struct io_address ** list, unsigned int pa, int dir, tsc_t arc_cycles, unsigned int va); @@ -8123,17 +8133,26 @@ void dump_raw(char * s, struct record_in int i; if(ri->rec.cycle_flag) - printf("%s %x %d %lld [", + printf("%s %7x %d %14lld [", s, ri->event, ri->extra_words, ri->tsc); else - printf("%s %x %d - [", - s, ri->event, ri->extra_words); - - for(i=0; i<ri->extra_words; i++) { - printf(" %x", ri->d[i]); + printf("%s %7x %d %14s [", + s, ri->event, ri->extra_words, "-"); + + for(i=0; i<7; i++) { + if ( i < ri->extra_words ) + printf(" %8x", ri->d[i]); + else + printf(" "); } - printf(" ]\n"); + printf(" ] | "); + + for (i=0; i<8; i++) { + printf(" %08x", ri->rec.raw[i]); + } + + printf(" |\n"); } void error(enum error_level l, struct record_info *ri) @@ -8455,7 +8474,7 @@ loff_t scan_for_new_pcpu(loff_t offset) struct trace_record rec; struct cpu_change_data *cd; - r=__read_record(G.fd, &rec, offset); + r=__read_record(&rec, offset); if(r==0) return 0; @@ -9028,11 +9047,11 @@ void progress_finish(void) { } } -ssize_t __read_record(int fd, struct trace_record *rec, loff_t offset) +ssize_t __read_record(struct trace_record *rec, loff_t offset) { ssize_t r, rsize; - r=pread64(G.fd, rec, sizeof(*rec), offset); + r=mread64(G.mh, rec, sizeof(*rec), offset); if(r < 0) { /* Read error */ @@ -9110,14 +9129,14 @@ void __fill_in_record_info(struct pcpu_i ri->cpu = p->pid; } -ssize_t read_record(int fd, struct pcpu_info * p) { +ssize_t read_record(struct pcpu_info * p) { loff_t * offset; struct record_info *ri; offset = &p->file_offset; ri = &p->ri; - ri->size = __read_record(fd, &ri->rec, *offset); + ri->size = __read_record(&ri->rec, *offset); if(ri->size) { __fill_in_record_info(p); @@ -9317,7 +9336,7 @@ void process_records(void) { } } else - read_record(G.fd, p); + read_record(p); /* Update this pcpu in the processing order */ if ( p->active ) @@ -10304,6 +10323,10 @@ int main(int argc, char *argv[]) { fstat64(G.fd, &s); G.file_size = s.st_size; } + + if ( (G.mh = mread_init(G.fd)) == NULL ) + perror("mread"); + if (G.symbol_file != NULL) parse_symbol_file(G.symbol_file);
George Dunlap
2012-Jan-26 17:21 UTC
[PATCH 8 of 8] xenalyze: Get rid of redundant hvm dump_header
No need to have a second dump_header. Signed-off-by: George Dunlap <george.dunlap@eu.citrix.com> diff -r f86ebfe66384 -r 84d9c7ac3cbf xenalyze.c --- a/xenalyze.c Thu Jan 26 17:18:13 2012 +0000 +++ b/xenalyze.c Thu Jan 26 17:18:43 2012 +0000 @@ -1435,7 +1435,6 @@ struct hvm_data { tsc_t exit_tsc, arc_cycles, entry_tsc; unsigned long long rip; unsigned exit_reason, event_handler; - char dump_header[256]; int short_summary_done:1, prealloc_unpin:1, wrmap_bf:1; /* Immediate processing */ @@ -3800,11 +3799,11 @@ void hvm_mmio_assist_process(struct reco { if(m->data_valid) printf("]%s mmio_assist %c gpa %llx data %x\n", - h->dump_header, + ri->dump_header, mevt.write?''w'':''r'', m->gpa, m->data); else - printf("]%s mmio_assist %c gpa %llx (no data)\n", h->dump_header, + printf("]%s mmio_assist %c gpa %llx (no data)\n", ri->dump_header, mevt.write?''w'':''r'', m->gpa); } @@ -3995,7 +3994,7 @@ void hvm_io_assist_process(struct record if(opt.dump_all) { printf(" %s io %s port %x val %x\n", - h->dump_header, + ri->dump_header, mevt.write?"write":"read", r->x32.port, r->x32.data); @@ -4348,13 +4347,13 @@ void hvm_cr_write_process(struct record_ { if(cr == 3 && h->v->cr3.val) { printf("]%s cr_write cr3 val %llx oval %llx %s\n", - h->dump_header, + ri->dump_header, val, h->v->cr3.val, (h->v->cr3.val == val)?"flush":"switch"); } else { printf(" %s cr_write cr%d val %llx\n", - h->dump_header, + ri->dump_header, cr, val); } @@ -4392,7 +4391,7 @@ void hvm_msr_write_process(struct record if(opt.dump_all) { printf(" %s msr_write addr %x val %llx\n", - h->dump_header, + ri->dump_header, r->addr, r->val); } @@ -4430,7 +4429,7 @@ void hvm_msr_read_process(struct record_ if(opt.dump_all) { printf(" %s msr_read addr %x val %llx\n", - h->dump_header, + ri->dump_header, r->addr, r->val); } @@ -4476,12 +4475,12 @@ void hvm_vmcall_process(struct record_in if(opt.dump_all) { if(r->eax < HYPERCALL_MAX) printf(" %s vmcall %2x (%s)\n", - h->dump_header, + ri->dump_header, r->eax, hypercall_name[r->eax]); else printf(" %s vmcall %2x\n", - h->dump_header, + ri->dump_header, r->eax); } @@ -4534,7 +4533,7 @@ void hvm_intr_summary(struct hvm_data *h } -void hvm_intr_process(struct hvm_data *h) +void hvm_intr_process(struct record_info *ri, struct hvm_data *h) { unsigned vec = *(unsigned *)h->d; @@ -4558,12 +4557,12 @@ void hvm_intr_process(struct hvm_data *h if ( vec < EXTERNAL_INTERRUPT_MAX && hvm_extint_vector_name[vec] ) printf(" %s intr vec %s(%x)\n", - h->dump_header, + ri->dump_header, hvm_extint_vector_name[vec], vec); else printf(" %s intr vec %x\n", - h->dump_header, vec); + ri->dump_header, vec); } if(opt.scatterplot_interrupt_eip @@ -4695,7 +4694,7 @@ void hvm_npf_process(struct record_info if ( opt.dump_all ) printf(" %s npf gpa %llx q %x mfn %llx t %d\n", - h->dump_header, + ri->dump_header, (unsigned long long)r->gpa, r->qualification, (unsigned long long)r->mfn, r->p2mt); } @@ -4708,7 +4707,7 @@ void hvm_rdtsc_process(struct record_inf if ( opt.dump_all ) printf(" %s rdtsc %llx %lld %s\n", - h->dump_header, + ri->dump_header, (unsigned long long)r->tsc, (unsigned long long)r->tsc, h->last_rdtsc > r->tsc ? "BACKWARDS" : ""); @@ -4922,7 +4921,7 @@ needs_vmexit: switch(ri->event) { /* Records adding to the vmexit reason */ case TRC_HVM_INTR: - hvm_intr_process(h); + hvm_intr_process(ri, h); break; case TRC_HVM_PF_XEN: case TRC_HVM_PF_XEN64: @@ -5259,50 +5258,6 @@ void hvm_vmexit_process(struct record_in h->post_process = hvm_generic_postprocess; h->inflight.generic.event = 0; - - if ( opt.dump_all) { - struct time_struct t; - char * c; - int len, r; - - abs_cycles_to_time(h->exit_tsc, &t); - - len = DUMP_HEADER_MAX; - c = h->dump_header; - - if ( t.time ) - { - r=snprintf(c, len, "%3u.%09u", t.s, t.ns); - c+=r; - len-=r; - } - else - { - r=snprintf(c, - len, - " "); - c+=r; - len-=r; - } - - r = snprintf(c, len, " %s", pcpu_string(ri->cpu)); - c+=r; - len-=r; - - if ( v ) - { - r = snprintf(c, len, " d%dv%d", v->d->did, v->vid); - c+=r; - len-=r; - } - else - { - r = snprintf(c, len, " d?v?"); - c+=r; - len-=r; - } - - } } void hvm_close_vmexit(struct hvm_data *h, tsc_t tsc) { @@ -5844,7 +5799,7 @@ void shadow_unsync_process(struct record if(opt.dump_all) printf("]%s shadow unsync gmfn %llx va %llx pt_level %d corr %llx\n", - h->dump_header, + ri->dump_header, e->gmfn, e->va, e->pt_level, @@ -5873,7 +5828,7 @@ void shadow_emulate_other_process(struct if(opt.dump_all) printf("]%s shadow %s gfn %llx va %llx\n", - h->dump_header, + ri->dump_header, pf_xen_name[sevt.minor], e->gfn, e->va); @@ -6016,14 +5971,14 @@ void shadow_fixup_process(struct record_ { if ( e->flag_unsync ) printf("]%s fixup:unsync va %llx gl1e %llx corr %llx flags (%x)%s\n", - h->dump_header, + ri->dump_header, e->va, e->gl1e, e->corresponding_va, e->flags, flag_string(e)); else printf("]%s fixup va %llx gl1e %llx flags (%x)%s\n", - h->dump_header, + ri->dump_header, e->va, e->gl1e, e->flags, flag_string(e)); } @@ -6103,7 +6058,7 @@ void shadow_mmio_process(struct record_i if(opt.dump_all) printf("]%s %smmio va %llx\n", - h->dump_header, + ri->dump_header, (e->pf_case==PF_XEN_FAST_MMIO)?"fast ":"", e->va); @@ -6200,7 +6155,7 @@ void shadow_propagate_process(struct rec if(opt.dump_all) printf("]%s propagate va %llx gl1e %llx flags (%x)%s\n", - h->dump_header, + ri->dump_header, e->va, e->gl1e, e->flags, flag_string(e));