Jan Beulich
2009-May-20 13:20 UTC
[Xen-devel] [PATCH] adjust the way cpumasks get passed around
Rather than passing cpumasks by value in all cases (which is problematic for large NR_CPUS configurations), pass them by value only if they are not wider than a pointer; in all other cases, pass them ''by reference'' (i.e. through a pointer to a const cpumask). On x86 this changes send_IPI_mask() to always only send IPIs to remote CPUs (meaning any caller needing to handle the current CPU as well has to do so on its own). Since the patch touches on_selected_cpus() parameters anyway, it at once removes that function''s (and then for consistency also on_each_cpu()''s as well as smp_call_function()''s similar) dead "retry" parameter. Signed-off-by: Jan Beulich <jbeulich@novell.com> --- 2009-05-19.orig/xen/arch/ia64/linux-xen/mca.c 2009-05-20 14:40:35.000000000 +0200 +++ 2009-05-19/xen/arch/ia64/linux-xen/mca.c 2009-05-19 16:42:41.000000000 +0200 @@ -956,7 +956,7 @@ ia64_mca_cmc_vector_enable (void *dummy) static void ia64_mca_cmc_vector_disable_keventd(void *unused) { - on_each_cpu(ia64_mca_cmc_vector_disable, NULL, 1, 0); + on_each_cpu(ia64_mca_cmc_vector_disable, NULL, 0); } /* @@ -968,7 +968,7 @@ ia64_mca_cmc_vector_disable_keventd(void static void ia64_mca_cmc_vector_enable_keventd(void *unused) { - on_each_cpu(ia64_mca_cmc_vector_enable, NULL, 1, 0); + on_each_cpu(ia64_mca_cmc_vector_enable, NULL, 0); } #endif /* !XEN */ --- 2009-05-19.orig/xen/arch/ia64/linux-xen/perfmon.c 2009-05-20 14:40:35.000000000 +0200 +++ 2009-05-19/xen/arch/ia64/linux-xen/perfmon.c 2009-05-19 16:42:41.000000000 +0200 @@ -6895,7 +6895,7 @@ pfm_install_alt_pmu_interrupt(pfm_intr_h } /* save the current system wide pmu states */ - ret = on_each_cpu(pfm_alt_save_pmu_state, NULL, 0, 1); + ret = on_each_cpu(pfm_alt_save_pmu_state, NULL, 1); if (ret) { DPRINT(("on_each_cpu() failed: %d\n", ret)); goto cleanup_reserve; @@ -6940,7 +6940,7 @@ pfm_remove_alt_pmu_interrupt(pfm_intr_ha pfm_alt_intr_handler = NULL; - ret = on_each_cpu(pfm_alt_restore_pmu_state, NULL, 0, 1); + ret = on_each_cpu(pfm_alt_restore_pmu_state, NULL, 1); if (ret) { DPRINT(("on_each_cpu() failed: %d\n", ret)); } @@ -7499,7 +7499,7 @@ xenpfm_context_load(XEN_GUEST_HANDLE(pfa BUG_ON(in_irq()); spin_lock(&xenpfm_context_lock); - smp_call_function(&xenpfm_context_load_cpu, &arg, 1, 1); + smp_call_function(&xenpfm_context_load_cpu, &arg, 1); xenpfm_context_load_cpu(&arg); spin_unlock(&xenpfm_context_lock); for_each_online_cpu(cpu) { @@ -7553,7 +7553,7 @@ xenpfm_context_unload(void) return error; } - smp_call_function(&xenpfm_context_unload_cpu, &arg, 1, 1); + smp_call_function(&xenpfm_context_unload_cpu, &arg, 1); xenpfm_context_unload_cpu(&arg); spin_unlock(&xenpfm_context_lock); for_each_online_cpu(cpu) { --- 2009-05-19.orig/xen/arch/ia64/linux-xen/smp.c 2009-05-20 14:40:35.000000000 +0200 +++ 2009-05-19/xen/arch/ia64/linux-xen/smp.c 2009-05-19 16:42:41.000000000 +0200 @@ -57,19 +57,18 @@ //#if CONFIG_SMP || IA64 #if CONFIG_SMP //Huh? This seems to be used on ia64 even if !CONFIG_SMP -void smp_send_event_check_mask(cpumask_t mask) +void smp_send_event_check_mask(cpumask_ref_t mask) { int cpu; /* Not for me. */ - cpu_clear(smp_processor_id(), mask); - if (cpus_empty(mask)) + if (cpumask_subset(mask, cpumask_of(smp_processor_id()))) return; //printf("smp_send_event_check_mask called\n"); for (cpu = 0; cpu < NR_CPUS; ++cpu) - if (cpu_isset(cpu, mask)) + if (cpumask_test(cpu, mask) && cpu != smp_processor_id()) platform_send_ipi(cpu, IA64_IPI_RESCHEDULE, IA64_IPI_DM_INT, 0); } #endif @@ -275,7 +274,7 @@ smp_send_reschedule (int cpu) void smp_flush_tlb_all (void) { - on_each_cpu((void (*)(void *))local_flush_tlb_all, NULL, 1, 1); + on_each_cpu((void (*)(void *))local_flush_tlb_all, NULL, 1); } void @@ -298,7 +297,7 @@ smp_flush_tlb_mm (struct mm_struct *mm) * anyhow, and once a CPU is interrupted, the cost of local_flush_tlb_all() is * rather trivial. */ - on_each_cpu((void (*)(void *))local_finish_flush_tlb_mm, mm, 1, 1); + on_each_cpu((void (*)(void *))local_finish_flush_tlb_mm, mm, 1); } #endif @@ -373,7 +372,6 @@ EXPORT_SYMBOL(smp_call_function_single); * [SUMMARY] Run a function on all other CPUs. * <func> The function to run. This must be fast and non-blocking. * <info> An arbitrary pointer to pass to the function. - * <nonatomic> currently unused. * <wait> If true, wait (atomically) until function has completed on other CPUs. * [RETURNS] 0 on success, else a negative status code. * @@ -384,7 +382,7 @@ EXPORT_SYMBOL(smp_call_function_single); * hardware interrupt handler or from a bottom half handler. */ int -smp_call_function (void (*func) (void *info), void *info, int nonatomic, int wait) +smp_call_function (void (*func) (void *info), void *info, int wait) { struct call_data_struct data; int cpus = num_online_cpus()-1; @@ -438,11 +436,11 @@ EXPORT_SYMBOL(smp_call_function); #ifdef XEN int -on_selected_cpus(cpumask_t selected, void (*func) (void *info), void *info, - int retry, int wait) +on_selected_cpus(cpumask_ref_t selected, void (*func) (void *info), + void *info, int wait) { struct call_data_struct data; - unsigned int cpu, nr_cpus = cpus_weight(selected); + unsigned int cpu, nr_cpus = cpumask_weight(selected); ASSERT(local_irq_is_enabled()); @@ -460,7 +458,7 @@ on_selected_cpus(cpumask_t selected, voi call_data = &data; wmb(); - for_each_cpu_mask(cpu, selected) + for_each_cpu_mask(cpu, cpumask_deref(selected)) send_IPI_single(cpu, IPI_CALL_FUNC); while (atomic_read(wait ? &data.finished : &data.started) != nr_cpus) --- 2009-05-19.orig/xen/arch/ia64/linux-xen/sn/kernel/sn2_smp.c 2009-05-20 14:40:35.000000000 +0200 +++ 2009-05-19/xen/arch/ia64/linux-xen/sn/kernel/sn2_smp.c 2009-05-19 16:42:41.000000000 +0200 @@ -239,8 +239,8 @@ sn2_global_tlb_purge(unsigned long start flush_data.start = start; flush_data.end = end; flush_data.nbits = nbits; - on_selected_cpus(selected_cpus, sn_flush_ptcga_cpu, - &flush_data, 1, 1); + on_selected_cpus(cpumask_ref(selected_cpus), + sn_flush_ptcga_cpu, &flush_data, 1); } spin_unlock(&sn2_ptcg_lock2); } --- 2009-05-19.orig/xen/arch/ia64/xen/flushtlb.c 2009-05-20 14:40:35.000000000 +0200 +++ 2009-05-19/xen/arch/ia64/xen/flushtlb.c 2009-05-19 16:42:41.000000000 +0200 @@ -70,7 +70,7 @@ void new_tlbflush_clock_period(void) { /* flush all vhpt of physical cpu and mTLB */ - on_each_cpu(tlbflush_clock_local_flush, NULL, 1, 1); + on_each_cpu(tlbflush_clock_local_flush, NULL, 1); /* * if global TLB shootdown is finished, increment tlbflush_time --- 2009-05-19.orig/xen/arch/ia64/xen/fw_emul.c 2009-05-20 14:40:35.000000000 +0200 +++ 2009-05-19/xen/arch/ia64/xen/fw_emul.c 2009-05-20 14:47:08.000000000 +0200 @@ -845,8 +845,7 @@ xen_pal_emulator(unsigned long index, u6 .progress = 0, .status = 0 }; - smp_call_function(remote_pal_cache_flush, - (void *)&args, 1, 1); + smp_call_function(remote_pal_cache_flush, &args, 1); if (args.status != 0) panic_domain(NULL, "PAL_CACHE_FLUSH ERROR, " "remote status %lx", args.status); @@ -945,7 +944,7 @@ xen_pal_emulator(unsigned long index, u6 /* must be performed on all remote processors in the coherence domain. */ smp_call_function(remote_pal_prefetch_visibility, - (void *)in1, 1, 1); + (void *)in1, 1); status = 1; /* no more necessary on remote processor */ } break; @@ -953,7 +952,7 @@ xen_pal_emulator(unsigned long index, u6 status = ia64_pal_mc_drain(); /* FIXME: All vcpus likely call PAL_MC_DRAIN. That causes the congestion. */ - smp_call_function(remote_pal_mc_drain, NULL, 1, 1); + smp_call_function(remote_pal_mc_drain, NULL, 1); break; case PAL_BRAND_INFO: if (in1 == 0) { --- 2009-05-19.orig/xen/arch/ia64/xen/mm.c 2009-05-20 14:40:35.000000000 +0200 +++ 2009-05-19/xen/arch/ia64/xen/mm.c 2009-05-19 16:42:41.000000000 +0200 @@ -3182,7 +3182,7 @@ int get_page_type(struct page_info *page if ( unlikely(!cpus_empty(mask)) ) { perfc_incr(need_flush_tlb_flush); - flush_tlb_mask(mask); + flush_tlb_mask(cpumask_ref(mask)); } /* We lose existing type, back pointer, and validity. */ --- 2009-05-19.orig/xen/arch/ia64/xen/vhpt.c 2009-05-20 14:40:35.000000000 +0200 +++ 2009-05-19/xen/arch/ia64/xen/vhpt.c 2009-05-20 14:44:35.000000000 +0200 @@ -513,9 +513,9 @@ void domain_flush_tlb_vhpt(struct domain { /* Very heavy... */ if (HAS_PERVCPU_VHPT(d) || is_hvm_domain(d)) - on_each_cpu((void (*)(void *))local_flush_tlb_all, NULL, 1, 1); + on_each_cpu((void (*)(void *))local_flush_tlb_all, NULL, 1); else - on_each_cpu((void (*)(void *))flush_tlb_vhpt_all, d, 1, 1); + on_each_cpu((void (*)(void *))flush_tlb_vhpt_all, d, 1); cpus_clear (d->domain_dirty_cpumask); } @@ -532,7 +532,7 @@ void flush_tlb_for_log_dirty(struct doma thash_purge_all(v); } smp_call_function((void (*)(void *))local_flush_tlb_all, - NULL, 1, 1); + NULL, 1); } else if (HAS_PERVCPU_VHPT(d)) { for_each_vcpu (d, v) { if (!v->is_initialised) @@ -541,29 +541,28 @@ void flush_tlb_for_log_dirty(struct doma vcpu_purge_tr_entry(&PSCBX(v,itlb)); vcpu_vhpt_flush(v); } - on_each_cpu((void (*)(void *))local_flush_tlb_all, NULL, 1, 1); + on_each_cpu((void (*)(void *))local_flush_tlb_all, NULL, 1); } else { - on_each_cpu((void (*)(void *))flush_tlb_vhpt_all, d, 1, 1); + on_each_cpu((void (*)(void *))flush_tlb_vhpt_all, d, 1); } cpus_clear (d->domain_dirty_cpumask); } -void flush_tlb_mask(cpumask_t mask) +void flush_tlb_mask(cpumask_ref_t mask) { int cpu; cpu = smp_processor_id(); - if (cpu_isset (cpu, mask)) { - cpu_clear(cpu, mask); + if (cpumask_test(cpu, mask)) flush_tlb_vhpt_all (NULL); - } - if (cpus_empty(mask)) + if (cpumask_subset(mask, cpumask_of(cpu))) return; - for_each_cpu_mask (cpu, mask) - smp_call_function_single - (cpu, (void (*)(void *))flush_tlb_vhpt_all, NULL, 1, 1); + for_each_cpu_mask (cpu, cpumask_deref(mask)) + if (cpu != smp_processor_id()) + smp_call_function_single + (cpu, (void (*)(void *))flush_tlb_vhpt_all, NULL, 1, 1); } #ifdef PERF_COUNTERS --- 2009-05-19.orig/xen/arch/x86/acpi/cpufreq/cpufreq.c 2009-05-20 14:40:35.000000000 +0200 +++ 2009-05-19/xen/arch/x86/acpi/cpufreq/cpufreq.c 2009-05-19 16:44:31.000000000 +0200 @@ -186,7 +186,7 @@ static void drv_read(struct drv_cmd *cmd if (likely(cpu_isset(smp_processor_id(), cmd->mask))) do_drv_read((void *)cmd); else - on_selected_cpus( cmd->mask, do_drv_read, (void *)cmd, 0, 1); + on_selected_cpus(cpumask_ref(cmd->mask), do_drv_read, cmd, 1); } static void drv_write(struct drv_cmd *cmd) @@ -195,7 +195,7 @@ static void drv_write(struct drv_cmd *cm cpu_isset(smp_processor_id(), cmd->mask)) do_drv_write((void *)cmd); else - on_selected_cpus( cmd->mask, do_drv_write, (void *)cmd, 0, 0); + on_selected_cpus(cpumask_ref(cmd->mask), do_drv_write, cmd, 0); } static u32 get_cur_val(cpumask_t mask) @@ -274,7 +274,6 @@ static unsigned int get_measured_perf(un struct cpufreq_policy *policy; struct perf_pair readin, cur, *saved; unsigned int perf_percent; - cpumask_t cpumask; unsigned int retval; if (!cpu_online(cpu)) @@ -303,9 +302,7 @@ static unsigned int get_measured_perf(un if (cpu == smp_processor_id()) { read_measured_perf_ctrs((void *)&readin); } else { - cpumask = cpumask_of_cpu(cpu); - on_selected_cpus(cpumask, read_measured_perf_ctrs, - (void *)&readin, 0, 1); + on_selected_cpus(cpumask_of(cpu), read_measured_perf_ctrs, &readin, 1); } cur.aperf.whole = readin.aperf.whole - saved->aperf.whole; --- 2009-05-19.orig/xen/arch/x86/acpi/cpufreq/powernow.c 2009-05-20 14:40:35.000000000 +0200 +++ 2009-05-19/xen/arch/x86/acpi/cpufreq/powernow.c 2009-05-19 16:42:41.000000000 +0200 @@ -121,7 +121,7 @@ static int powernow_cpufreq_target(struc cmd.val = next_perf_state; - on_selected_cpus( cmd.mask, transition_pstate, (void *) &cmd, 0, 0); + on_selected_cpus(cpumask_ref(cmd.mask), transition_pstate, &cmd, 0); perf->state = next_perf_state; policy->cur = freqs.new; --- 2009-05-19.orig/xen/arch/x86/cpu/amd.c 2009-05-20 14:40:35.000000000 +0200 +++ 2009-05-19/xen/arch/x86/cpu/amd.c 2009-05-19 16:42:41.000000000 +0200 @@ -246,7 +246,7 @@ static void check_disable_c1e(unsigned i { /* C1E is sometimes enabled during entry to ACPI mode. */ if ((port == acpi_smi_cmd) && (value == acpi_enable_value)) - on_each_cpu(disable_c1e, NULL, 1, 1); + on_each_cpu(disable_c1e, NULL, 1); } static void __devinit init_amd(struct cpuinfo_x86 *c) --- 2009-05-19.orig/xen/arch/x86/cpu/mcheck/amd_nonfatal.c 2009-05-20 14:40:35.000000000 +0200 +++ 2009-05-19/xen/arch/x86/cpu/mcheck/amd_nonfatal.c 2009-05-19 16:42:41.000000000 +0200 @@ -133,7 +133,7 @@ void mce_amd_checkregs(void *info) */ static void mce_amd_work_fn(void *data) { - on_each_cpu(mce_amd_checkregs, data, 1, 1); + on_each_cpu(mce_amd_checkregs, data, 1); if (adjust > 0) { if (!guest_enabled_event(dom0->vcpu[0], VIRQ_MCA) ) { --- 2009-05-19.orig/xen/arch/x86/cpu/mcheck/mce.c 2009-05-20 14:40:35.000000000 +0200 +++ 2009-05-19/xen/arch/x86/cpu/mcheck/mce.c 2009-05-19 16:48:12.000000000 +0200 @@ -1162,8 +1162,7 @@ long do_mca(XEN_GUEST_HANDLE(xen_mc_t) u if (log_cpus == NULL) return x86_mcerr("do_mca cpuinfo", -ENOMEM); - if (on_each_cpu(do_mc_get_cpu_info, log_cpus, - 1, 1) != 0) { + if (on_each_cpu(do_mc_get_cpu_info, log_cpus, 1)) { xfree(log_cpus); return x86_mcerr("do_mca cpuinfo", -EIO); } @@ -1205,8 +1204,8 @@ long do_mca(XEN_GUEST_HANDLE(xen_mc_t) u add_taint(TAINT_ERROR_INJECT); - on_selected_cpus(cpumask_of_cpu(target), - x86_mc_msrinject, mc_msrinject, 1, 1); + on_selected_cpus(cpumask_of(target), x86_mc_msrinject, + mc_msrinject, 1); break; @@ -1225,8 +1224,8 @@ long do_mca(XEN_GUEST_HANDLE(xen_mc_t) u add_taint(TAINT_ERROR_INJECT); - on_selected_cpus(cpumask_of_cpu(target), x86_mc_mceinject, - mc_mceinject, 1, 1); + on_selected_cpus(cpumask_of(target), x86_mc_mceinject, + mc_mceinject, 1); break; default: --- 2009-05-19.orig/xen/arch/x86/cpu/mcheck/mce_intel.c 2009-05-20 14:40:35.000000000 +0200 +++ 2009-05-19/xen/arch/x86/cpu/mcheck/mce_intel.c 2009-05-19 16:42:41.000000000 +0200 @@ -632,7 +632,7 @@ static void __cpu_mcheck_distribute_cmci void cpu_mcheck_distribute_cmci(void) { if (cmci_support && !mce_disabled) - on_each_cpu(__cpu_mcheck_distribute_cmci, NULL, 0, 0); + on_each_cpu(__cpu_mcheck_distribute_cmci, NULL, 0); } static void clear_cmci(void) --- 2009-05-19.orig/xen/arch/x86/cpu/mcheck/non-fatal.c 2009-05-20 14:40:35.000000000 +0200 +++ 2009-05-19/xen/arch/x86/cpu/mcheck/non-fatal.c 2009-05-19 16:42:41.000000000 +0200 @@ -69,7 +69,7 @@ static void mce_checkregs (void *info) static void mce_work_fn(void *data) { - on_each_cpu(mce_checkregs, NULL, 1, 1); + on_each_cpu(mce_checkregs, NULL, 1); if (variable_period) { if (adjust) --- 2009-05-19.orig/xen/arch/x86/cpu/mtrr/main.c 2009-05-20 14:40:35.000000000 +0200 +++ 2009-05-19/xen/arch/x86/cpu/mtrr/main.c 2009-05-19 16:42:41.000000000 +0200 @@ -229,7 +229,7 @@ static void set_mtrr(unsigned int reg, u atomic_set(&data.gate,0); /* Start the ball rolling on other CPUs */ - if (smp_call_function(ipi_handler, &data, 1, 0) != 0) + if (smp_call_function(ipi_handler, &data, 0) != 0) panic("mtrr: timed out waiting for other CPUs\n"); local_irq_save(flags); @@ -688,7 +688,7 @@ void mtrr_save_state(void) if (cpu == 0) mtrr_save_fixed_ranges(NULL); else - on_selected_cpus(cpumask_of_cpu(0), mtrr_save_fixed_ranges, NULL, 1, 1); + on_selected_cpus(cpumask_of(0), mtrr_save_fixed_ranges, NULL, 1); put_cpu(); } --- 2009-05-19.orig/xen/arch/x86/crash.c 2009-05-20 14:40:35.000000000 +0200 +++ 2009-05-19/xen/arch/x86/crash.c 2009-05-19 16:42:41.000000000 +0200 @@ -58,10 +58,8 @@ static int crash_nmi_callback(struct cpu */ static void smp_send_nmi_allbutself(void) { - cpumask_t allbutself = cpu_online_map; - cpu_clear(smp_processor_id(), allbutself); - if ( !cpus_empty(allbutself) ) - send_IPI_mask(allbutself, APIC_DM_NMI); + if ( !cpumask_subset(cpu_online_ref, cpumask_of(smp_processor_id())) ) + send_IPI_mask(cpu_online_ref, APIC_DM_NMI); } static void nmi_shootdown_cpus(void) --- 2009-05-19.orig/xen/arch/x86/domain.c 2009-05-19 16:40:37.000000000 +0200 +++ 2009-05-19/xen/arch/x86/domain.c 2009-05-19 16:42:41.000000000 +0200 @@ -1316,7 +1316,7 @@ void context_switch(struct vcpu *prev, s if ( unlikely(!cpu_isset(cpu, dirty_mask) && !cpus_empty(dirty_mask)) ) { /* Other cpus call __sync_lazy_execstate from flush ipi handler. */ - flush_tlb_mask(dirty_mask); + flush_tlb_mask(cpumask_ref(dirty_mask)); } if ( is_hvm_vcpu(prev) && !list_empty(&prev->arch.hvm_vcpu.tm_list) ) @@ -1410,7 +1410,7 @@ void sync_vcpu_execstate(struct vcpu *v) (void)__sync_lazy_execstate(); /* Other cpus call __sync_lazy_execstate from flush ipi handler. */ - flush_tlb_mask(v->vcpu_dirty_cpumask); + flush_tlb_mask(cpumask_ref(v->vcpu_dirty_cpumask)); } struct migrate_info { --- 2009-05-19.orig/xen/arch/x86/genapic/x2apic.c 2009-05-20 14:40:35.000000000 +0200 +++ 2009-05-19/xen/arch/x86/genapic/x2apic.c 2009-05-19 16:42:41.000000000 +0200 @@ -56,7 +56,7 @@ unsigned int cpu_mask_to_apicid_x2apic(c return cpu_physical_id(first_cpu(cpumask)); } -void send_IPI_mask_x2apic(cpumask_t cpumask, int vector) +void send_IPI_mask_x2apic(cpumask_ref_t cpumask, int vector) { unsigned int cpu, cfg; unsigned long flags; @@ -76,8 +76,9 @@ void send_IPI_mask_x2apic(cpumask_t cpum local_irq_save(flags); cfg = APIC_DM_FIXED | 0 /* no shorthand */ | APIC_DEST_PHYSICAL | vector; - for_each_cpu_mask ( cpu, cpumask ) - apic_wrmsr(APIC_ICR, cfg, cpu_physical_id(cpu)); + for_each_cpu_mask ( cpu, cpumask_deref(cpumask) ) + if ( cpu != smp_processor_id() ) + apic_wrmsr(APIC_ICR, cfg, cpu_physical_id(cpu)); local_irq_restore(flags); } --- 2009-05-19.orig/xen/arch/x86/hpet.c 2009-05-20 14:40:35.000000000 +0200 +++ 2009-05-19/xen/arch/x86/hpet.c 2009-05-20 08:41:08.000000000 +0200 @@ -617,7 +617,7 @@ void hpet_disable_legacy_broadcast(void) spin_unlock_irq(&legacy_hpet_event.lock); - smp_send_event_check_mask(cpu_online_map); + smp_send_event_check_mask(cpu_online_ref); } void hpet_broadcast_enter(void) --- 2009-05-19.orig/xen/arch/x86/hvm/hvm.c 2009-05-20 14:40:35.000000000 +0200 +++ 2009-05-19/xen/arch/x86/hvm/hvm.c 2009-05-19 16:42:41.000000000 +0200 @@ -971,7 +971,7 @@ int hvm_set_cr0(unsigned long value) if ( !v->domain->arch.hvm_domain.is_in_uc_mode ) { /* Flush physical caches. */ - on_each_cpu(local_flush_cache, NULL, 1, 1); + on_each_cpu(local_flush_cache, NULL, 1); hvm_set_uc_mode(v, 1); } spin_unlock(&v->domain->arch.hvm_domain.uc_lock); @@ -2408,7 +2408,7 @@ static int hvmop_flush_tlb_all(void) paging_update_cr3(v); /* Flush all dirty TLBs. */ - flush_tlb_mask(d->domain_dirty_cpumask); + flush_tlb_mask(cpumask_ref(d->domain_dirty_cpumask)); /* Done. */ for_each_vcpu ( d, v ) --- 2009-05-19.orig/xen/arch/x86/hvm/svm/svm.c 2009-05-20 14:40:35.000000000 +0200 +++ 2009-05-19/xen/arch/x86/hvm/svm/svm.c 2009-05-19 16:42:41.000000000 +0200 @@ -1173,7 +1173,7 @@ static void wbinvd_ipi(void *info) static void svm_wbinvd_intercept(void) { if ( has_arch_pdevs(current->domain) ) - on_each_cpu(wbinvd_ipi, NULL, 1, 1); + on_each_cpu(wbinvd_ipi, NULL, 1); } static void svm_vmexit_do_invalidate_cache(struct cpu_user_regs *regs) --- 2009-05-19.orig/xen/arch/x86/hvm/vmx/vmcs.c 2009-05-20 14:40:35.000000000 +0200 +++ 2009-05-19/xen/arch/x86/hvm/vmx/vmcs.c 2009-05-19 16:42:41.000000000 +0200 @@ -264,7 +264,7 @@ static void vmx_clear_vmcs(struct vcpu * int cpu = v->arch.hvm_vmx.active_cpu; if ( cpu != -1 ) - on_selected_cpus(cpumask_of_cpu(cpu), __vmx_clear_vmcs, v, 1, 1); + on_selected_cpus(cpumask_of(cpu), __vmx_clear_vmcs, v, 1); } static void vmx_load_vmcs(struct vcpu *v) @@ -899,7 +899,7 @@ void vmx_do_resume(struct vcpu *v) { int cpu = v->arch.hvm_vmx.active_cpu; if ( cpu != -1 ) - on_selected_cpus(cpumask_of_cpu(cpu), wbinvd_ipi, NULL, 1, 1); + on_selected_cpus(cpumask_of(cpu), wbinvd_ipi, NULL, 1); } vmx_clear_vmcs(v); --- 2009-05-19.orig/xen/arch/x86/hvm/vmx/vmx.c 2009-05-20 14:40:35.000000000 +0200 +++ 2009-05-19/xen/arch/x86/hvm/vmx/vmx.c 2009-05-19 16:42:41.000000000 +0200 @@ -1219,7 +1219,7 @@ void ept_sync_domain(struct domain *d) if ( d->arch.hvm_domain.hap_enabled && d->vcpu[0] ) { ASSERT(local_irq_is_enabled()); - on_each_cpu(__ept_sync_domain, d, 1, 1); + on_each_cpu(__ept_sync_domain, d, 1); } } @@ -2130,7 +2130,7 @@ static void vmx_wbinvd_intercept(void) return; if ( cpu_has_wbinvd_exiting ) - on_each_cpu(wbinvd_ipi, NULL, 1, 1); + on_each_cpu(wbinvd_ipi, NULL, 1); else wbinvd(); } @@ -2163,7 +2163,7 @@ static void ept_handle_violation(unsigne { paging_mark_dirty(d, mfn_x(mfn)); p2m_change_type(d, gfn, p2m_ram_logdirty, p2m_ram_rw); - flush_tlb_mask(d->domain_dirty_cpumask); + flush_tlb_mask(cpumask_ref(d->domain_dirty_cpumask)); } return; } --- 2009-05-19.orig/xen/arch/x86/irq.c 2009-05-20 11:46:56.000000000 +0200 +++ 2009-05-19/xen/arch/x86/irq.c 2009-05-20 11:47:10.000000000 +0200 @@ -516,7 +516,7 @@ static void __pirq_guest_eoi(struct doma } if ( !cpus_empty(cpu_eoi_map) ) - on_selected_cpus(cpu_eoi_map, set_eoi_ready, desc, 1, 0); + on_selected_cpus(cpumask_ref(cpu_eoi_map), set_eoi_ready, desc, 0); } int pirq_guest_eoi(struct domain *d, int irq) @@ -755,7 +755,7 @@ static irq_guest_action_t *__pirq_guest_ { cpu_eoi_map = action->cpu_eoi_map; spin_unlock_irq(&desc->lock); - on_selected_cpus(cpu_eoi_map, set_eoi_ready, desc, 1, 0); + on_selected_cpus(cpumask_ref(cpu_eoi_map), set_eoi_ready, desc, 0); spin_lock_irq(&desc->lock); } break; @@ -793,7 +793,7 @@ static irq_guest_action_t *__pirq_guest_ { BUG_ON(action->ack_type != ACKTYPE_EOI); spin_unlock_irq(&desc->lock); - on_selected_cpus(cpu_eoi_map, set_eoi_ready, desc, 1, 1); + on_selected_cpus(cpumask_ref(cpu_eoi_map), set_eoi_ready, desc, 1); spin_lock_irq(&desc->lock); } --- 2009-05-19.orig/xen/arch/x86/machine_kexec.c 2009-05-20 14:40:35.000000000 +0200 +++ 2009-05-19/xen/arch/x86/machine_kexec.c 2009-05-19 16:42:41.000000000 +0200 @@ -91,7 +91,6 @@ static void __machine_reboot_kexec(void void machine_reboot_kexec(xen_kexec_image_t *image) { int reboot_cpu_id; - cpumask_t reboot_cpu; reboot_cpu_id = 0; @@ -100,9 +99,8 @@ void machine_reboot_kexec(xen_kexec_imag if ( reboot_cpu_id != smp_processor_id() ) { - cpus_clear(reboot_cpu); - cpu_set(reboot_cpu_id, reboot_cpu); - on_selected_cpus(reboot_cpu, __machine_reboot_kexec, image, 1, 0); + on_selected_cpus(cpumask_of(reboot_cpu_id), __machine_reboot_kexec, + image, 0); for (;;) ; /* nothing */ } --- 2009-05-19.orig/xen/arch/x86/mm.c 2009-05-20 14:40:35.000000000 +0200 +++ 2009-05-19/xen/arch/x86/mm.c 2009-05-19 16:42:41.000000000 +0200 @@ -510,7 +510,7 @@ static void invalidate_shadow_ldt(struct /* Rid TLBs of stale mappings (guest mappings and shadow mappings). */ if ( flush ) - flush_tlb_mask(v->vcpu_dirty_cpumask); + flush_tlb_mask(cpumask_ref(v->vcpu_dirty_cpumask)); out: spin_unlock(&v->arch.shadow_ldt_lock); @@ -1235,7 +1235,7 @@ static void pae_flush_pgd( paging_update_cr3(v); cpus_or(m, m, v->vcpu_dirty_cpumask); } - flush_tlb_mask(m); + flush_tlb_mask(cpumask_ref(m)); } /* If below 4GB then the pgdir is not shadowed in low memory. */ @@ -1260,7 +1260,7 @@ static void pae_flush_pgd( spin_unlock(&cache->lock); } - flush_tlb_mask(d->domain_dirty_cpumask); + flush_tlb_mask(cpumask_ref(d->domain_dirty_cpumask)); } #else # define pae_flush_pgd(mfn, idx, nl3e) ((void)0) @@ -2275,7 +2275,7 @@ static int __get_page_type(struct page_i ((nx & PGT_type_mask) == PGT_writable_page)) ) { perfc_incr(need_flush_tlb_flush); - flush_tlb_mask(mask); + flush_tlb_mask(cpumask_ref(mask)); } /* We lose existing type and validity. */ @@ -2476,7 +2476,7 @@ static void process_deferred_ops(void) if ( deferred_ops & (DOP_FLUSH_ALL_TLBS|DOP_FLUSH_TLB) ) { if ( deferred_ops & DOP_FLUSH_ALL_TLBS ) - flush_tlb_mask(d->domain_dirty_cpumask); + flush_tlb_mask(cpumask_ref(d->domain_dirty_cpumask)); else flush_tlb_local(); } @@ -2811,9 +2811,9 @@ int do_mmuext_op( } pmask = vcpumask_to_pcpumask(d, vmask); if ( op.cmd == MMUEXT_TLB_FLUSH_MULTI ) - flush_tlb_mask(pmask); + flush_tlb_mask(cpumask_ref(pmask)); else - flush_tlb_one_mask(pmask, op.arg1.linear_addr); + flush_tlb_one_mask(cpumask_ref(pmask), op.arg1.linear_addr); break; } @@ -2822,7 +2822,8 @@ int do_mmuext_op( break; case MMUEXT_INVLPG_ALL: - flush_tlb_one_mask(d->domain_dirty_cpumask, op.arg1.linear_addr); + flush_tlb_one_mask(cpumask_ref(d->domain_dirty_cpumask), + op.arg1.linear_addr); break; case MMUEXT_FLUSH_CACHE: @@ -3639,7 +3640,7 @@ int do_update_va_mapping(unsigned long v pmask = vcpumask_to_pcpumask(d, vmask); if ( cpu_isset(smp_processor_id(), pmask) ) this_cpu(percpu_mm_info).deferred_ops &= ~DOP_FLUSH_TLB; - flush_tlb_mask(pmask); + flush_tlb_mask(cpumask_ref(pmask)); break; } break; @@ -3657,7 +3658,7 @@ int do_update_va_mapping(unsigned long v flush_tlb_one_local(va); break; case UVMF_ALL: - flush_tlb_one_mask(d->domain_dirty_cpumask, va); + flush_tlb_one_mask(cpumask_ref(d->domain_dirty_cpumask), va); break; default: if ( unlikely(!is_pv_32on64_domain(d) ? @@ -3667,7 +3668,7 @@ int do_update_va_mapping(unsigned long v pmask = vcpumask_to_pcpumask(d, vmask); if ( this_cpu(percpu_mm_info).deferred_ops & DOP_FLUSH_TLB ) cpu_clear(smp_processor_id(), pmask); - flush_tlb_one_mask(pmask, va); + flush_tlb_one_mask(cpumask_ref(pmask), va); break; } break; --- 2009-05-19.orig/xen/arch/x86/mm/hap/hap.c 2009-05-20 14:40:35.000000000 +0200 +++ 2009-05-19/xen/arch/x86/mm/hap/hap.c 2009-05-19 16:42:41.000000000 +0200 @@ -64,7 +64,7 @@ int hap_enable_log_dirty(struct domain * /* set l1e entries of P2M table to be read-only. */ p2m_change_entry_type_global(d, p2m_ram_rw, p2m_ram_logdirty); - flush_tlb_mask(d->domain_dirty_cpumask); + flush_tlb_mask(cpumask_ref(d->domain_dirty_cpumask)); return 0; } @@ -83,7 +83,7 @@ void hap_clean_dirty_bitmap(struct domai { /* set l1e entries of P2M table to be read-only. */ p2m_change_entry_type_global(d, p2m_ram_rw, p2m_ram_logdirty); - flush_tlb_mask(d->domain_dirty_cpumask); + flush_tlb_mask(cpumask_ref(d->domain_dirty_cpumask)); } /************************************************/ @@ -643,7 +643,7 @@ hap_write_p2m_entry(struct vcpu *v, unsi safe_write_pte(p, new); if ( (old_flags & _PAGE_PRESENT) && (level == 1 || (level == 2 && (old_flags & _PAGE_PSE))) ) - flush_tlb_mask(v->domain->domain_dirty_cpumask); + flush_tlb_mask(cpumask_ref(v->domain->domain_dirty_cpumask)); #if CONFIG_PAGING_LEVELS == 3 /* install P2M in monitor table for PAE Xen */ --- 2009-05-19.orig/xen/arch/x86/mm/shadow/common.c 2009-05-20 14:40:35.000000000 +0200 +++ 2009-05-19/xen/arch/x86/mm/shadow/common.c 2009-05-19 16:42:41.000000000 +0200 @@ -695,7 +695,7 @@ static int oos_remove_write_access(struc } if ( ftlb ) - flush_tlb_mask(v->domain->domain_dirty_cpumask); + flush_tlb_mask(cpumask_ref(v->domain->domain_dirty_cpumask)); return 0; } @@ -1145,7 +1145,7 @@ sh_validate_guest_pt_write(struct vcpu * rc = sh_validate_guest_entry(v, gmfn, entry, size); if ( rc & SHADOW_SET_FLUSH ) /* Need to flush TLBs to pick up shadow PT changes */ - flush_tlb_mask(d->domain_dirty_cpumask); + flush_tlb_mask(cpumask_ref(d->domain_dirty_cpumask)); if ( rc & SHADOW_SET_ERROR ) { /* This page is probably not a pagetable any more: tear it out of the @@ -1393,7 +1393,7 @@ static void _shadow_prealloc( /* See if that freed up enough space */ if ( space_is_available(d, order, count) ) { - flush_tlb_mask(d->domain_dirty_cpumask); + flush_tlb_mask(cpumask_ref(d->domain_dirty_cpumask)); return; } } @@ -1447,7 +1447,7 @@ static void shadow_blow_tables(struct do pagetable_get_mfn(v->arch.shadow_table[i])); /* Make sure everyone sees the unshadowings */ - flush_tlb_mask(d->domain_dirty_cpumask); + flush_tlb_mask(cpumask_ref(d->domain_dirty_cpumask)); } void shadow_blow_tables_per_domain(struct domain *d) @@ -1554,7 +1554,7 @@ mfn_t shadow_alloc(struct domain *d, if ( unlikely(!cpus_empty(mask)) ) { perfc_incr(shadow_alloc_tlbflush); - flush_tlb_mask(mask); + flush_tlb_mask(cpumask_ref(mask)); } /* Now safe to clear the page for reuse */ p = sh_map_domain_page(page_to_mfn(sp+i)); @@ -2803,7 +2803,7 @@ void sh_remove_shadows(struct vcpu *v, m /* Need to flush TLBs now, so that linear maps are safe next time we * take a fault. */ - flush_tlb_mask(v->domain->domain_dirty_cpumask); + flush_tlb_mask(cpumask_ref(v->domain->domain_dirty_cpumask)); if ( do_locking ) shadow_unlock(v->domain); } @@ -3435,7 +3435,7 @@ shadow_write_p2m_entry(struct vcpu *v, u { sh_remove_all_shadows_and_parents(v, mfn); if ( sh_remove_all_mappings(v, mfn) ) - flush_tlb_mask(d->domain_dirty_cpumask); + flush_tlb_mask(cpumask_ref(d->domain_dirty_cpumask)); } } @@ -3474,7 +3474,7 @@ shadow_write_p2m_entry(struct vcpu *v, u } omfn = _mfn(mfn_x(omfn) + 1); } - flush_tlb_mask(flushmask); + flush_tlb_mask(cpumask_ref(flushmask)); if ( npte ) unmap_domain_page(npte); @@ -3752,7 +3752,7 @@ int shadow_track_dirty_vram(struct domai } } if ( flush_tlb ) - flush_tlb_mask(d->domain_dirty_cpumask); + flush_tlb_mask(cpumask_ref(d->domain_dirty_cpumask)); goto out; out_sl1ma: --- 2009-05-19.orig/xen/arch/x86/mm/shadow/multi.c 2009-05-20 14:40:35.000000000 +0200 +++ 2009-05-19/xen/arch/x86/mm/shadow/multi.c 2009-05-19 16:42:41.000000000 +0200 @@ -3146,7 +3146,7 @@ static int sh_page_fault(struct vcpu *v, */ perfc_incr(shadow_rm_write_flush_tlb); atomic_inc(&d->arch.paging.shadow.gtable_dirty_version); - flush_tlb_mask(d->domain_dirty_cpumask); + flush_tlb_mask(cpumask_ref(d->domain_dirty_cpumask)); } #if (SHADOW_OPTIMIZATIONS & SHOPT_OUT_OF_SYNC) @@ -4135,7 +4135,7 @@ sh_update_cr3(struct vcpu *v, int do_loc * (old) shadow linear maps in the writeable mapping heuristics. */ #if GUEST_PAGING_LEVELS == 2 if ( sh_remove_write_access(v, gmfn, 2, 0) != 0 ) - flush_tlb_mask(v->domain->domain_dirty_cpumask); + flush_tlb_mask(cpumask_ref(v->domain->domain_dirty_cpumask)); sh_set_toplevel_shadow(v, 0, gmfn, SH_type_l2_shadow); #elif GUEST_PAGING_LEVELS == 3 /* PAE guests have four shadow_table entries, based on the @@ -4158,7 +4158,7 @@ sh_update_cr3(struct vcpu *v, int do_loc } } if ( flush ) - flush_tlb_mask(v->domain->domain_dirty_cpumask); + flush_tlb_mask(cpumask_ref(v->domain->domain_dirty_cpumask)); /* Now install the new shadows. */ for ( i = 0; i < 4; i++ ) { @@ -4179,7 +4179,7 @@ sh_update_cr3(struct vcpu *v, int do_loc } #elif GUEST_PAGING_LEVELS == 4 if ( sh_remove_write_access(v, gmfn, 4, 0) != 0 ) - flush_tlb_mask(v->domain->domain_dirty_cpumask); + flush_tlb_mask(cpumask_ref(v->domain->domain_dirty_cpumask)); sh_set_toplevel_shadow(v, 0, gmfn, SH_type_l4_shadow); #else #error This should never happen --- 2009-05-19.orig/xen/arch/x86/oprofile/nmi_int.c 2009-05-20 14:40:35.000000000 +0200 +++ 2009-05-19/xen/arch/x86/oprofile/nmi_int.c 2009-05-19 16:42:41.000000000 +0200 @@ -186,7 +186,7 @@ static void nmi_cpu_setup(void * dummy) int nmi_setup_events(void) { - on_each_cpu(nmi_cpu_setup, NULL, 0, 1); + on_each_cpu(nmi_cpu_setup, NULL, 1); return 0; } @@ -207,7 +207,7 @@ int nmi_reserve_counters(void) /* We need to serialize save and setup for HT because the subset * of msrs are distinct for save and setup operations */ - on_each_cpu(nmi_save_registers, NULL, 0, 1); + on_each_cpu(nmi_save_registers, NULL, 1); return 0; } @@ -256,7 +256,7 @@ static void nmi_cpu_shutdown(void * dumm void nmi_release_counters(void) { - on_each_cpu(nmi_cpu_shutdown, NULL, 0, 1); + on_each_cpu(nmi_cpu_shutdown, NULL, 1); release_lapic_nmi(); free_msrs(); } @@ -274,7 +274,7 @@ static void nmi_cpu_start(void * dummy) int nmi_start(void) { - on_each_cpu(nmi_cpu_start, NULL, 0, 1); + on_each_cpu(nmi_cpu_start, NULL, 1); return 0; } @@ -306,7 +306,7 @@ static void nmi_cpu_stop(void * dummy) void nmi_stop(void) { - on_each_cpu(nmi_cpu_stop, NULL, 0, 1); + on_each_cpu(nmi_cpu_stop, NULL, 1); } --- 2009-05-19.orig/xen/arch/x86/shutdown.c 2009-05-20 14:40:35.000000000 +0200 +++ 2009-05-19/xen/arch/x86/shutdown.c 2009-05-19 16:42:41.000000000 +0200 @@ -91,7 +91,7 @@ void machine_halt(void) watchdog_disable(); console_start_sync(); local_irq_enable(); - smp_call_function(__machine_halt, NULL, 1, 0); + smp_call_function(__machine_halt, NULL, 0); __machine_halt(NULL); } @@ -310,8 +310,8 @@ void machine_restart(unsigned int delay_ if ( get_apic_id() != boot_cpu_physical_apicid ) { /* Send IPI to the boot CPU (logical cpu 0). */ - on_selected_cpus(cpumask_of_cpu(0), __machine_restart, - &delay_millisecs, 1, 0); + on_selected_cpus(cpumask_of(0), __machine_restart, + &delay_millisecs, 0); for ( ; ; ) halt(); } --- 2009-05-19.orig/xen/arch/x86/smp.c 2009-05-20 14:40:35.000000000 +0200 +++ 2009-05-19/xen/arch/x86/smp.c 2009-05-19 16:42:41.000000000 +0200 @@ -84,12 +84,14 @@ void apic_wait_icr_idle(void) cpu_relax(); } -void send_IPI_mask_flat(cpumask_t cpumask, int vector) +void send_IPI_mask_flat(cpumask_ref_t cpumask, int vector) { - unsigned long mask = cpus_addr(cpumask)[0]; + unsigned long mask = cpus_addr(cpumask_deref(cpumask))[0]; unsigned long cfg; unsigned long flags; + mask &= ~(1UL << smp_processor_id()); + /* An IPI with no target generates a send accept error from P5/P6 APICs. */ WARN_ON(mask == 0); @@ -119,15 +121,18 @@ void send_IPI_mask_flat(cpumask_t cpumas local_irq_restore(flags); } -void send_IPI_mask_phys(cpumask_t mask, int vector) +void send_IPI_mask_phys(cpumask_ref_t mask, int vector) { unsigned long cfg, flags; unsigned int query_cpu; local_irq_save(flags); - for_each_cpu_mask ( query_cpu, mask ) + for_each_cpu_mask ( query_cpu, cpumask_deref(mask) ) { + if ( query_cpu == smp_processor_id() ) + continue; + /* * Wait for idle. */ @@ -170,20 +175,18 @@ fastcall void smp_invalidate_interrupt(v irq_exit(); } -void flush_area_mask(cpumask_t mask, const void *va, unsigned int flags) +void flush_area_mask(cpumask_ref_t mask, const void *va, unsigned int flags) { ASSERT(local_irq_is_enabled()); - if ( cpu_isset(smp_processor_id(), mask) ) - { + if ( cpumask_test(smp_processor_id(), mask) ) flush_area_local(va, flags); - cpu_clear(smp_processor_id(), mask); - } - if ( !cpus_empty(mask) ) + if ( !cpumask_subset(mask, cpumask_of(smp_processor_id())) ) { spin_lock(&flush_lock); - flush_cpumask = mask; + cpus_andnot(flush_cpumask, cpumask_deref(mask), + cpumask_of_cpu(smp_processor_id())); flush_va = va; flush_flags = flags; send_IPI_mask(mask, INVALIDATE_TLB_VECTOR); @@ -201,17 +204,16 @@ void new_tlbflush_clock_period(void) /* Flush everyone else. We definitely flushed just before entry. */ allbutself = cpu_online_map; cpu_clear(smp_processor_id(), allbutself); - flush_mask(allbutself, FLUSH_TLB); + flush_mask(cpumask_ref(allbutself), FLUSH_TLB); /* No need for atomicity: we are the only possible updater. */ ASSERT(tlbflush_clock == 0); tlbflush_clock++; } -void smp_send_event_check_mask(cpumask_t mask) +void smp_send_event_check_mask(cpumask_ref_t mask) { - cpu_clear(smp_processor_id(), mask); - if ( !cpus_empty(mask) ) + if ( !cpumask_subset(mask, cpumask_of(smp_processor_id())) ) send_IPI_mask(mask, EVENT_CHECK_VECTOR); } @@ -225,7 +227,7 @@ struct call_data_struct { int wait; atomic_t started; atomic_t finished; - cpumask_t selected; + cpumask_ref_t selected; }; static DEFINE_SPINLOCK(call_lock); @@ -234,23 +236,21 @@ static struct call_data_struct *call_dat int smp_call_function( void (*func) (void *info), void *info, - int retry, int wait) { cpumask_t allbutself = cpu_online_map; cpu_clear(smp_processor_id(), allbutself); - return on_selected_cpus(allbutself, func, info, retry, wait); + return on_selected_cpus(cpumask_ref(allbutself), func, info, wait); } int on_selected_cpus( - cpumask_t selected, + cpumask_ref_t selected, void (*func) (void *info), void *info, - int retry, int wait) { struct call_data_struct data; - unsigned int nr_cpus = cpus_weight(selected); + unsigned int nr_cpus = cpumask_weight(selected); ASSERT(local_irq_is_enabled()); @@ -258,7 +258,7 @@ int on_selected_cpus( if ( unlikely(!cpu_has_apic) ) { ASSERT(num_online_cpus() == 1); - if ( cpu_isset(0, selected) ) + if ( cpumask_test(0, selected) ) { local_irq_disable(); func(info); @@ -279,9 +279,18 @@ int on_selected_cpus( spin_lock(&call_lock); - call_data = &data; + if ( !cpumask_subset(selected, cpumask_of(smp_processor_id())) ) + { + call_data = &data; - send_IPI_mask(selected, CALL_FUNCTION_VECTOR); + send_IPI_mask(selected, CALL_FUNCTION_VECTOR); + } + + if ( cpumask_test(smp_processor_id(), selected) ) + { + func(info); + --nr_cpus; + } while ( atomic_read(wait ? &data.finished : &data.started) != nr_cpus ) cpu_relax(); @@ -323,7 +332,7 @@ void smp_send_stop(void) { int timeout = 10; - smp_call_function(stop_this_cpu, NULL, 1, 0); + smp_call_function(stop_this_cpu, NULL, 0); /* Wait 10ms for all other CPUs to go offline. */ while ( (num_online_cpus() > 1) && (timeout-- > 0) ) @@ -349,7 +358,7 @@ fastcall void smp_call_function_interrup ack_APIC_irq(); perfc_incr(ipis); - if ( !cpu_isset(smp_processor_id(), call_data->selected) ) + if ( !cpumask_test(smp_processor_id(), call_data->selected) ) return; irq_enter(); --- 2009-05-19.orig/xen/arch/x86/time.c 2009-05-20 14:40:35.000000000 +0200 +++ 2009-05-19/xen/arch/x86/time.c 2009-05-19 16:49:40.000000000 +0200 @@ -1189,11 +1189,11 @@ static void time_calibration(void *unuse }; /* @wait=1 because we must wait for all cpus before freeing @r. */ - on_selected_cpus(r.cpu_calibration_map, + on_selected_cpus(cpumask_ref(r.cpu_calibration_map), opt_consistent_tscs ? time_calibration_tsc_rendezvous : time_calibration_std_rendezvous, - &r, 0, 1); + &r, 1); } void init_percpu_time(void) --- 2009-05-19.orig/xen/arch/x86/x86_32/traps.c 2009-05-20 14:40:35.000000000 +0200 +++ 2009-05-19/xen/arch/x86/x86_32/traps.c 2009-05-19 16:42:41.000000000 +0200 @@ -403,7 +403,7 @@ static long register_guest_callback(stru case CALLBACKTYPE_sysenter_deprecated: if ( !cpu_has_sep ) ret = -EINVAL; - else if ( on_each_cpu(do_update_sysenter, ®->address, 1, 1) != 0 ) + else if ( on_each_cpu(do_update_sysenter, ®->address, 1) != 0 ) ret = -EIO; break; --- 2009-05-19.orig/xen/common/Makefile 2009-05-20 14:40:35.000000000 +0200 +++ 2009-05-19/xen/common/Makefile 2009-05-19 16:42:41.000000000 +0200 @@ -1,4 +1,5 @@ obj-y += bitmap.o +obj-y += cpu.o obj-y += domctl.o obj-y += domain.o obj-y += event_channel.o --- /dev/null 1970-01-01 00:00:00.000000000 +0000 +++ 2009-05-19/xen/common/cpu.c 2009-05-19 16:42:41.000000000 +0200 @@ -0,0 +1,26 @@ +#include <xen/config.h> +#include <xen/cpumask.h> + +/* + * cpu_bit_bitmap[] is a special, "compressed" data structure that + * represents all NR_CPUS bits binary values of 1<<nr. + * + * It is used by cpumask_of() to get a constant address to a CPU + * mask value that has a single bit set only. + */ + +/* cpu_bit_bitmap[0] is empty - so we can back into it */ +#define MASK_DECLARE_1(x) [x+1][0] = 1UL << (x) +#define MASK_DECLARE_2(x) MASK_DECLARE_1(x), MASK_DECLARE_1(x+1) +#define MASK_DECLARE_4(x) MASK_DECLARE_2(x), MASK_DECLARE_2(x+2) +#define MASK_DECLARE_8(x) MASK_DECLARE_4(x), MASK_DECLARE_4(x+4) + +const unsigned long cpu_bit_bitmap[BITS_PER_LONG+1][BITS_TO_LONGS(NR_CPUS)] = { + + MASK_DECLARE_8(0), MASK_DECLARE_8(8), + MASK_DECLARE_8(16), MASK_DECLARE_8(24), +#if BITS_PER_LONG > 32 + MASK_DECLARE_8(32), MASK_DECLARE_8(40), + MASK_DECLARE_8(48), MASK_DECLARE_8(56), +#endif +}; --- 2009-05-19.orig/xen/common/gdbstub.c 2009-05-20 14:40:35.000000000 +0200 +++ 2009-05-19/xen/common/gdbstub.c 2009-05-19 16:42:41.000000000 +0200 @@ -672,7 +672,7 @@ static void gdb_smp_pause(void) atomic_set(&gdb_smp_paused_count, 0); - smp_call_function(gdb_pause_this_cpu, NULL, /* dont wait! */0, 0); + smp_call_function(gdb_pause_this_cpu, NULL, /* dont wait! */0); /* Wait 100ms for all other CPUs to enter pause loop */ while ( (atomic_read(&gdb_smp_paused_count) < (num_online_cpus() - 1)) --- 2009-05-19.orig/xen/common/grant_table.c 2009-05-20 14:40:35.000000000 +0200 +++ 2009-05-19/xen/common/grant_table.c 2009-05-19 16:42:41.000000000 +0200 @@ -715,7 +715,7 @@ gnttab_unmap_grant_ref( goto fault; } - flush_tlb_mask(current->domain->domain_dirty_cpumask); + flush_tlb_mask(cpumask_ref(current->domain->domain_dirty_cpumask)); for ( i = 0; i < partial_done; i++ ) __gnttab_unmap_common_complete(&(common[i])); @@ -727,7 +727,7 @@ gnttab_unmap_grant_ref( return 0; fault: - flush_tlb_mask(current->domain->domain_dirty_cpumask); + flush_tlb_mask(cpumask_ref(current->domain->domain_dirty_cpumask)); for ( i = 0; i < partial_done; i++ ) __gnttab_unmap_common_complete(&(common[i])); @@ -774,7 +774,7 @@ gnttab_unmap_and_replace( goto fault; } - flush_tlb_mask(current->domain->domain_dirty_cpumask); + flush_tlb_mask(cpumask_ref(current->domain->domain_dirty_cpumask)); for ( i = 0; i < partial_done; i++ ) __gnttab_unmap_common_complete(&(common[i])); @@ -786,7 +786,7 @@ gnttab_unmap_and_replace( return 0; fault: - flush_tlb_mask(current->domain->domain_dirty_cpumask); + flush_tlb_mask(cpumask_ref(current->domain->domain_dirty_cpumask)); for ( i = 0; i < partial_done; i++ ) __gnttab_unmap_common_complete(&(common[i])); @@ -1123,7 +1123,7 @@ gnttab_transfer( #ifndef __ia64__ /* IA64 implicitly replaces the old page in steal_page(). */ guest_physmap_remove_page(d, gop.mfn, mfn, 0); #endif - flush_tlb_mask(d->domain_dirty_cpumask); + flush_tlb_mask(cpumask_ref(d->domain_dirty_cpumask)); /* Find the target domain. */ if ( unlikely((e = rcu_lock_domain_by_id(gop.domid)) == NULL) ) --- 2009-05-19.orig/xen/common/keyhandler.c 2009-05-20 14:40:35.000000000 +0200 +++ 2009-05-19/xen/common/keyhandler.c 2009-05-19 16:50:35.000000000 +0200 @@ -119,7 +119,7 @@ static void dump_registers(unsigned char if ( cpu == smp_processor_id() ) continue; printk("\n*** Dumping CPU%d host state: ***\n", cpu); - on_selected_cpus(cpumask_of_cpu(cpu), __dump_execstate, NULL, 1, 1); + on_selected_cpus(cpumask_of(cpu), __dump_execstate, NULL, 1); } printk("\n"); @@ -263,7 +263,7 @@ static void read_clocks(unsigned char ke spin_lock(&lock); - smp_call_function(read_clocks_slave, NULL, 0, 0); + smp_call_function(read_clocks_slave, NULL, 0); local_irq_disable(); read_clocks_cpumask = cpu_online_map; --- 2009-05-19.orig/xen/common/page_alloc.c 2009-05-20 14:40:35.000000000 +0200 +++ 2009-05-19/xen/common/page_alloc.c 2009-05-19 16:42:41.000000000 +0200 @@ -422,7 +422,7 @@ static struct page_info *alloc_heap_page if ( unlikely(!cpus_empty(mask)) ) { perfc_incr(need_flush_tlb_flush); - flush_tlb_mask(mask); + flush_tlb_mask(cpumask_ref(mask)); } return pg; --- 2009-05-19.orig/xen/include/asm-ia64/tlbflush.h 2009-05-20 14:40:35.000000000 +0200 +++ 2009-05-19/xen/include/asm-ia64/tlbflush.h 2009-05-19 16:42:41.000000000 +0200 @@ -39,7 +39,7 @@ void domain_flush_tlb_vhpt(struct domain void flush_tlb_for_log_dirty(struct domain *d); /* Flush v-tlb on cpus set in mask for current domain. */ -void flush_tlb_mask(cpumask_t mask); +void flush_tlb_mask(cpumask_ref_t mask); /* Flush local machine TLB. */ void local_flush_tlb_all (void); --- 2009-05-19.orig/xen/include/asm-x86/flushtlb.h 2009-05-20 14:40:35.000000000 +0200 +++ 2009-05-19/xen/include/asm-x86/flushtlb.h 2009-05-19 16:42:41.000000000 +0200 @@ -90,12 +90,12 @@ void flush_area_local(const void *va, un #define flush_local(flags) flush_area_local(NULL, flags) /* Flush specified CPUs'' TLBs/caches */ -void flush_area_mask(cpumask_t, const void *va, unsigned int flags); +void flush_area_mask(cpumask_ref_t, const void *va, unsigned int flags); #define flush_mask(mask, flags) flush_area_mask(mask, NULL, flags) /* Flush all CPUs'' TLBs/caches */ -#define flush_area_all(va, flags) flush_area_mask(cpu_online_map, va, flags) -#define flush_all(flags) flush_mask(cpu_online_map, flags) +#define flush_area_all(va, flags) flush_area_mask(cpu_online_ref, va, flags) +#define flush_all(flags) flush_mask(cpu_online_ref, flags) /* Flush local TLBs */ #define flush_tlb_local() \ @@ -111,8 +111,8 @@ void flush_area_mask(cpumask_t, const vo /* Flush all CPUs'' TLBs */ #define flush_tlb_all() \ - flush_tlb_mask(cpu_online_map) + flush_tlb_mask(cpu_online_ref) #define flush_tlb_one_all(v) \ - flush_tlb_one_mask(cpu_online_map, v) + flush_tlb_one_mask(cpu_online_ref, v) #endif /* __FLUSHTLB_H__ */ --- 2009-05-19.orig/xen/include/asm-x86/genapic.h 2009-05-20 14:40:35.000000000 +0200 +++ 2009-05-19/xen/include/asm-x86/genapic.h 2009-05-19 16:42:41.000000000 +0200 @@ -35,7 +35,7 @@ struct genapic { void (*clustered_apic_check)(void); cpumask_t (*target_cpus)(void); unsigned int (*cpu_mask_to_apicid)(cpumask_t cpumask); - void (*send_IPI_mask)(cpumask_t mask, int vector); + void (*send_IPI_mask)(cpumask_ref_t mask, int vector); }; #define APICFUNC(x) .x = x @@ -52,7 +52,7 @@ void init_apic_ldr_flat(void); void clustered_apic_check_flat(void); cpumask_t target_cpus_flat(void); unsigned int cpu_mask_to_apicid_flat(cpumask_t cpumask); -void send_IPI_mask_flat(cpumask_t mask, int vector); +void send_IPI_mask_flat(cpumask_ref_t mask, int vector); #define GENAPIC_FLAT \ .int_delivery_mode = dest_LowestPrio, \ .int_dest_mode = 1 /* logical delivery */, \ @@ -66,7 +66,7 @@ void init_apic_ldr_x2apic(void); void clustered_apic_check_x2apic(void); cpumask_t target_cpus_x2apic(void); unsigned int cpu_mask_to_apicid_x2apic(cpumask_t cpumask); -void send_IPI_mask_x2apic(cpumask_t mask, int vector); +void send_IPI_mask_x2apic(cpumask_ref_t mask, int vector); #define GENAPIC_X2APIC \ .int_delivery_mode = dest_Fixed, \ .int_dest_mode = 0 /* physical delivery */, \ @@ -80,7 +80,7 @@ void init_apic_ldr_phys(void); void clustered_apic_check_phys(void); cpumask_t target_cpus_phys(void); unsigned int cpu_mask_to_apicid_phys(cpumask_t cpumask); -void send_IPI_mask_phys(cpumask_t mask, int vector); +void send_IPI_mask_phys(cpumask_ref_t mask, int vector); #define GENAPIC_PHYS \ .int_delivery_mode = dest_Fixed, \ .int_dest_mode = 0 /* physical delivery */, \ --- 2009-05-19.orig/xen/include/xen/cpumask.h 2009-05-20 14:40:35.000000000 +0200 +++ 2009-05-19/xen/include/xen/cpumask.h 2009-05-19 16:42:41.000000000 +0200 @@ -80,7 +80,16 @@ #include <xen/kernel.h> typedef struct { DECLARE_BITMAP(bits, NR_CPUS); } cpumask_t; -extern cpumask_t _unused_cpumask_arg_; + +#if NR_CPUS <= BITS_PER_LONG +typedef cpumask_t cpumask_ref_t; +#define cpumask_ref(m) (m) +#define cpumask_deref(r) (r) +#else +typedef const cpumask_t *cpumask_ref_t; +#define cpumask_ref(m) (&(m)) +#define cpumask_deref(r) (*(r)) +#endif #define cpu_set(cpu, dst) __cpu_set((cpu), &(dst)) static inline void __cpu_set(int cpu, volatile cpumask_t *dstp) @@ -108,6 +117,7 @@ static inline void __cpus_clear(cpumask_ /* No static inline type checking - see Subtlety (1) above. */ #define cpu_isset(cpu, cpumask) test_bit((cpu), (cpumask).bits) +#define cpumask_test(cpu, cpumask) cpu_isset(cpu, cpumask_deref(cpumask)) #define cpu_test_and_set(cpu, cpumask) __cpu_test_and_set((cpu), &(cpumask)) static inline int __cpu_test_and_set(int cpu, cpumask_t *addr) @@ -164,6 +174,8 @@ static inline int __cpus_equal(const cpu return bitmap_equal(src1p->bits, src2p->bits, nbits); } +#define cpumask_equal(m1, m2) cpus_equal(cpumask_deref(m1), cpumask_deref(m2)) + #define cpus_intersects(src1, src2) __cpus_intersects(&(src1), &(src2), NR_CPUS) static inline int __cpus_intersects(const cpumask_t *src1p, const cpumask_t *src2p, int nbits) @@ -171,6 +183,9 @@ static inline int __cpus_intersects(cons return bitmap_intersects(src1p->bits, src2p->bits, nbits); } +#define cpumask_intersects(m1, m2) cpus_intersects(cpumask_deref(m1), \ + cpumask_deref(m2)) + #define cpus_subset(src1, src2) __cpus_subset(&(src1), &(src2), NR_CPUS) static inline int __cpus_subset(const cpumask_t *src1p, const cpumask_t *src2p, int nbits) @@ -178,24 +193,32 @@ static inline int __cpus_subset(const cp return bitmap_subset(src1p->bits, src2p->bits, nbits); } +#define cpumask_subset(m1, m2) cpus_subset(cpumask_deref(m1), cpumask_deref(m2)) + #define cpus_empty(src) __cpus_empty(&(src), NR_CPUS) static inline int __cpus_empty(const cpumask_t *srcp, int nbits) { return bitmap_empty(srcp->bits, nbits); } +#define cpumask_empty(m) cpus_empty(cpumask_deref(m)) + #define cpus_full(cpumask) __cpus_full(&(cpumask), NR_CPUS) static inline int __cpus_full(const cpumask_t *srcp, int nbits) { return bitmap_full(srcp->bits, nbits); } +#define cpumask_full(m) cpus_full(cpumask_deref(m)) + #define cpus_weight(cpumask) __cpus_weight(&(cpumask), NR_CPUS) static inline int __cpus_weight(const cpumask_t *srcp, int nbits) { return bitmap_weight(srcp->bits, nbits); } +#define cpumask_weight(cpumask) cpus_weight(cpumask_deref(cpumask)) + #define cpus_shift_right(dst, src, n) \ __cpus_shift_right(&(dst), &(src), (n), NR_CPUS) static inline void __cpus_shift_right(cpumask_t *dstp, @@ -244,17 +267,24 @@ static inline int __cycle_cpu(int n, con return nxt; } -#define cpumask_of_cpu(cpu) \ -({ \ - typeof(_unused_cpumask_arg_) m; \ - if (sizeof(m) == sizeof(unsigned long)) { \ - m.bits[0] = 1UL<<(cpu); \ - } else { \ - cpus_clear(m); \ - cpu_set((cpu), m); \ - } \ - m; \ -}) +/* + * Special-case data structure for "single bit set only" constant CPU masks. + * + * We pre-generate all the 64 (or 32) possible bit positions, with enough + * padding to the left and the right, and return the constant pointer + * appropriately offset. + */ +extern const unsigned long + cpu_bit_bitmap[BITS_PER_LONG+1][BITS_TO_LONGS(NR_CPUS)]; + +static inline const cpumask_t *get_cpu_mask(unsigned int cpu) +{ + const unsigned long *p = cpu_bit_bitmap[1 + cpu % BITS_PER_LONG]; + return (const cpumask_t *)(p - cpu / BITS_PER_LONG); +} + +#define cpumask_of(cpu) cpumask_ref(*get_cpu_mask(cpu)) +#define cpumask_of_cpu(cpu) (*get_cpu_mask(cpu)) #define CPU_MASK_LAST_WORD BITMAP_LAST_WORD_MASK(NR_CPUS) @@ -372,6 +402,10 @@ extern cpumask_t cpu_possible_map; extern cpumask_t cpu_online_map; extern cpumask_t cpu_present_map; +#define cpu_possible_ref cpumask_ref(cpu_possible_map) +#define cpu_online_ref cpumask_ref(cpu_online_map) +#define cpu_present_ref cpumask_ref(cpu_present_map) + #if NR_CPUS > 1 #define num_online_cpus() cpus_weight(cpu_online_map) #define num_possible_cpus() cpus_weight(cpu_possible_map) --- 2009-05-19.orig/xen/include/xen/smp.h 2009-05-20 14:40:35.000000000 +0200 +++ 2009-05-19/xen/include/xen/smp.h 2009-05-19 16:42:41.000000000 +0200 @@ -9,9 +9,9 @@ */ extern void smp_send_stop(void); -extern void smp_send_event_check_mask(cpumask_t mask); +extern void smp_send_event_check_mask(cpumask_ref_t mask); #define smp_send_event_check_cpu(cpu) \ - smp_send_event_check_mask(cpumask_of_cpu(cpu)) + smp_send_event_check_mask(cpumask_of(cpu)) /* * Prepare machine for booting other CPUs. @@ -34,17 +34,15 @@ extern void smp_cpus_done(unsigned int m extern int smp_call_function( void (*func) (void *info), void *info, - int retry, int wait); /* * Call a function on a selection of processors */ extern int on_selected_cpus( - cpumask_t selected, + cpumask_ref_t selected, void (*func) (void *info), void *info, - int retry, int wait); /* @@ -59,10 +57,9 @@ void smp_prepare_boot_cpu(void); static inline int on_each_cpu( void (*func) (void *info), void *info, - int retry, int wait) { - return on_selected_cpus(cpu_online_map, func, info, retry, wait); + return on_selected_cpus(cpu_online_ref, func, info, wait); } #define smp_processor_id() raw_smp_processor_id() --- 2009-05-19.orig/xen/include/xen/softirq.h 2009-05-20 14:40:35.000000000 +0200 +++ 2009-05-19/xen/include/xen/softirq.h 2009-05-19 16:42:41.000000000 +0200 @@ -39,7 +39,7 @@ static inline void cpumask_raise_softirq cpu_clear(cpu, mask); } - smp_send_event_check_mask(mask); + smp_send_event_check_mask(cpumask_ref(mask)); } static inline void cpu_raise_softirq(unsigned int cpu, unsigned int nr) _______________________________________________ Xen-devel mailing list Xen-devel@lists.xensource.com http://lists.xensource.com/xen-devel
Keir Fraser
2009-May-20 13:39 UTC
Re: [Xen-devel] [PATCH] adjust the way cpumasks get passed around
On 20/05/2009 06:20, "Jan Beulich" <JBeulich@novell.com> wrote:> Rather than passing cpumasks by value in all cases (which is problematic > for large NR_CPUS configurations), pass them by value only if they are > not wider than a pointer; in all other cases, pass them ''by reference'' > (i.e. through a pointer to a const cpumask). > > On x86 this changes send_IPI_mask() to always only send IPIs to remote > CPUs (meaning any caller needing to handle the current CPU as well has > to do so on its own). > > Since the patch touches on_selected_cpus() parameters anyway, it at > once removes that function''s (and then for consistency also > on_each_cpu()''s as well as smp_call_function()''s similar) dead "retry" > parameter.Removing the unused retry parameters belongs in a separate patch. I don''t like the conditional pass-by-ref/value approach. We end up with yet another layer of macro abstraction and names in cpumask.h, and it means different builds may yield different bugs (pass-by-ref is riskier since the cpumask can change under the callee''s feet, and this would be the rarer case for testing since requires a wide cpumask at build time). So, I can agree with using pass-by-ref, but then please just make a patch to do that always. And put the unrelated cleanups in a separate patch please. -- Keir _______________________________________________ Xen-devel mailing list Xen-devel@lists.xensource.com http://lists.xensource.com/xen-devel