Puthiyaparambil, Aravindh
2005-Aug-06 16:03 UTC
RE: [Xen-devel] Genapic in 32-bit Dom0 (Was : More Problems booting32-bit Domain 0 on ES7000 x86_64)
Chris, Is there any way for us to see an early copy of this patch? I think we need to come up with way for apic_xen to accommodate clustered apics too or there might be issues running on large systems. Thanks Aravindh> -----Original Message----- > From: Chris Wright [mailto:chrisw@osdl.org] > Sent: Friday, August 05, 2005 3:29 PM > To: Puthiyaparambil, Aravindh > Cc: Chris Wright; Keir Fraser; Magolan, John F; xen- > devel@lists.xensource.com; Subrahmanian, Raj; Vessey, Bruce A > Subject: Re: [Xen-devel] Genapic in 32-bit Dom0 (Was : More Problems > booting32-bit Domain 0 on ES7000 x86_64) > > * Puthiyaparambil, Aravindh (aravindh.puthiyaparambil@unisys.com)wrote:> > Ooops that could cause problems for us on the ES7000 x86_64 as werun in> > clustered apic mode. With a uniprocessor Dom0 I see that it picks up > > clustered apic. From a cursory look at the genapic code in Dom0 Idon''t> > see a difference between UP and SMP clustered_apic_check(). Am Ilooking> > at the wrong place? > > Yes, that patch hasn''t been merged yet. > > Here''s the snippet that''s likely to cause a problem as it stands now: > > +print: > + /* hardcode to xen apic functions */ > + genapic = &apic_xen; > + printk(KERN_INFO "Setting APIC routing to %s\n", genapic->name); > > thanks, > -chris_______________________________________________ Xen-devel mailing list Xen-devel@lists.xensource.com http://lists.xensource.com/xen-devel
Chris Wright
2005-Aug-08 07:23 UTC
Re: [Xen-devel] Genapic in 32-bit Dom0 (Was : More Problems booting32-bit Domain 0 on ES7000 x86_64)
* Puthiyaparambil, Aravindh (aravindh.puthiyaparambil@unisys.com) wrote:> Is there any way for us to see an early copy of this patch? I think we > need to come up with way for apic_xen to accommodate clustered apics too > or there might be issues running on large systems.Here''s an update from the last copy I made (about one week old I think). Many thanks to Xin who found my last bug so that 64-bit syscalls actually worked! I''d run this one quite successfully on dom0 (before refreshing to a newer Xen snapshot, this actual patch is not more than compile tested). I believe Xin is continuing on to work on domU (which conincidentally has apic compilation issues), and may have a more up-to-date patch. thanks, -chris Index: xen-unstable/linux-2.6-xen-sparse/arch/xen/x86_64/kernel/head.S ==================================================================--- xen-unstable.orig/linux-2.6-xen-sparse/arch/xen/x86_64/kernel/head.S +++ xen-unstable/linux-2.6-xen-sparse/arch/xen/x86_64/kernel/head.S @@ -43,7 +43,6 @@ startup_64: ENTRY(_start) cld - movq init_rsp(%rip),%rsp /* Copy the necessary stuff from xen_start_info structure. */ movq $xen_start_info_union,%rdi movq $64,%rcx /* sizeof (union xen_start_info_union) / sizeof (long) */ @@ -54,6 +53,7 @@ ENTRY(_start) cld #endif /* CONFIG_SMP */ + movq init_rsp(%rip),%rsp /* zero EFLAGS after setting rsp */ pushq $0 popfq @@ -140,6 +140,7 @@ ENTRY(cpu_gdt_table) .quad 0,0 /* TSS */ .quad 0,0 /* LDT */ .quad 0,0,0 /* three TLS descriptors */ + .quad 0 /* unused now? __KERNEL16_CS - 16bit PM for S3 wakeup. */ gdt_end: /* asm/segment.h:GDT_ENTRIES must match this */ Index: xen-unstable/linux-2.6-xen-sparse/arch/xen/x86_64/kernel/Makefile ==================================================================--- xen-unstable.orig/linux-2.6-xen-sparse/arch/xen/x86_64/kernel/Makefile +++ xen-unstable/linux-2.6-xen-sparse/arch/xen/x86_64/kernel/Makefile @@ -25,10 +25,10 @@ obj-$(CONFIG_ACPI_BOOT) += acpi/ c-obj-$(CONFIG_X86_MSR) += msr.o obj-$(CONFIG_MICROCODE) += microcode.o obj-$(CONFIG_X86_CPUID) += cpuid.o -#obj-$(CONFIG_SMP) += smp.o smpboot.o trampoline.o +obj-$(CONFIG_SMP) += smp.o smpboot.o obj-$(CONFIG_X86_LOCAL_APIC) += apic.o c-obj-$(CONFIG_X86_LOCAL_APIC) += nmi.o -obj-$(CONFIG_X86_IO_APIC) += io_apic.o mpparse.o +obj-$(CONFIG_X86_IO_APIC) += io_apic.o mpparse.o genapic_xen.o c-obj-$(CONFIG_X86_IO_APIC) += genapic.o genapic_cluster.o genapic_flat.o #obj-$(CONFIG_PM) += suspend.o #obj-$(CONFIG_SOFTWARE_SUSPEND) += suspend_asm.o @@ -54,7 +54,7 @@ intel_cacheinfo-y += ../../../i386/kern quirks-y += ../../i386/kernel/quirks.o c-link := init_task.o -s-link := vsyscall.o +s-link := vsyscall.o $(patsubst %.o,$(obj)/%.c,$(c-obj-y) $(c-obj-m) $(c-link)) $(patsubst %.o,$(obj)/%.S,$(s-obj-y) $(s-link)): @ln -fsn $(srctree)/arch/x86_64/kernel/$(notdir $@) $@ Index: xen-unstable/linux-2.6-xen-sparse/arch/xen/x86_64/kernel/setup64.c ==================================================================--- xen-unstable.orig/linux-2.6-xen-sparse/arch/xen/x86_64/kernel/setup64.c +++ xen-unstable/linux-2.6-xen-sparse/arch/xen/x86_64/kernel/setup64.c @@ -259,13 +259,13 @@ void __init cpu_init (void) * Initialize the per-CPU GDT with the boot GDT, * and set up the GDT descriptor: */ +#ifndef CONFIG_XEN if (cpu) { memcpy(cpu_gdt_table[cpu], cpu_gdt_table[0], GDT_SIZE); } cpu_gdt_descr[cpu].size = GDT_SIZE; cpu_gdt_descr[cpu].address = (unsigned long)cpu_gdt_table[cpu]; -#if 0 asm volatile("lgdt %0" :: "m" (cpu_gdt_descr[cpu])); asm volatile("lidt %0" :: "m" (idt_descr)); #endif Index: xen-unstable/linux-2.6-xen-sparse/arch/xen/x86_64/kernel/setup.c ==================================================================--- xen-unstable.orig/linux-2.6-xen-sparse/arch/xen/x86_64/kernel/setup.c +++ xen-unstable/linux-2.6-xen-sparse/arch/xen/x86_64/kernel/setup.c @@ -599,6 +599,17 @@ static void __init print_memory_map(char } } +void __init smp_alloc_memory(void) +{ + int cpu; + + for (cpu = 1; cpu < NR_CPUS; cpu++) { + cpu_gdt_descr[cpu].address = (unsigned long) + alloc_bootmem_low_pages(PAGE_SIZE); + /* XXX free unused pages later */ + } +} + void __init setup_arch(char **cmdline_p) { int i, j; @@ -738,6 +749,9 @@ void __init setup_arch(char **cmdline_p) } } #endif +#ifdef CONFIG_SMP + smp_alloc_memory(); +#endif paging_init(); #ifdef CONFIG_X86_LOCAL_APIC /* Index: xen-unstable/linux-2.6-xen-sparse/arch/xen/x86_64/kernel/xen_entry.S ==================================================================--- xen-unstable.orig/linux-2.6-xen-sparse/arch/xen/x86_64/kernel/xen_entry.S +++ xen-unstable/linux-2.6-xen-sparse/arch/xen/x86_64/kernel/xen_entry.S @@ -8,11 +8,14 @@ #define sizeof_vcpu_shift 3 #ifdef CONFIG_SMP -#define preempt_disable(reg) incl threadinfo_preempt_count(reg) -#define preempt_enable(reg) decl threadinfo_preempt_count(reg) +//#define preempt_disable(reg) incl threadinfo_preempt_count(reg) +//#define preempt_enable(reg) decl threadinfo_preempt_count(reg) +#define preempt_disable(reg) +#define preempt_enable(reg) #define XEN_GET_VCPU_INFO(reg) preempt_disable(%rbp) ; \ movq %gs:pda_cpunumber,reg ; \ - shl $sizeof_vcpu_shift,reg ; \ + shl $32, reg ; \ + shr $32-sizeof_vcpu_shift,reg ; \ addq HYPERVISOR_shared_info,reg #define XEN_PUT_VCPU_INFO(reg) preempt_enable(%rbp) ; \ #define XEN_PUT_VCPU_INFO_fixup .byte 0xff,0xff,0xff Index: xen-unstable/linux-2.6-xen-sparse/arch/xen/x86_64/kernel/irq.c ==================================================================--- xen-unstable.orig/linux-2.6-xen-sparse/arch/xen/x86_64/kernel/irq.c +++ xen-unstable/linux-2.6-xen-sparse/arch/xen/x86_64/kernel/irq.c @@ -20,7 +20,11 @@ */ atomic_t irq_err_count; - +#ifdef CONFIG_X86_IO_APIC +#ifdef APIC_MISMATCH_DEBUG +atomic_t irq_mis_count; +#endif +#endif /* * Generic, controller-independent functions: Index: xen-unstable/linux-2.6-xen-sparse/arch/xen/x86_64/kernel/smp.c ==================================================================--- xen-unstable.orig/linux-2.6-xen-sparse/arch/xen/x86_64/kernel/smp.c +++ xen-unstable/linux-2.6-xen-sparse/arch/xen/x86_64/kernel/smp.c @@ -28,6 +28,9 @@ #include <asm/mmu_context.h> #include <asm/proto.h> #include <asm/apicdef.h> +#include <asm-xen/evtchn.h> + +#define xxprint(msg) HYPERVISOR_console_io(CONSOLEIO_write, strlen(msg), msg) /* * Smarter SMP flushing macros. @@ -103,6 +106,7 @@ static inline void leave_mm (unsigned lo * 2) Leave the mm if we are in the lazy tlb mode. */ +#if 0 /* Xen */ asmlinkage void smp_invalidate_interrupt (void) { unsigned long cpu; @@ -251,6 +255,20 @@ void flush_tlb_all(void) on_each_cpu(do_flush_tlb_all, NULL, 1, 1); } +#else +asmlinkage void smp_invalidate_interrupt (void) +{ return; } +void flush_tlb_current_task(void) +{ xen_tlb_flush_mask(¤t->mm->cpu_vm_mask); } +void flush_tlb_mm (struct mm_struct * mm) +{ xen_tlb_flush_mask(&mm->cpu_vm_mask); } +void flush_tlb_page(struct vm_area_struct * vma, unsigned long va) +{ xen_invlpg_mask(&vma->vm_mm->cpu_vm_mask, va); } +void flush_tlb_all(void) +{ xen_tlb_flush_all(); } +#endif /* Xen */ + + void smp_kdb_stop(void) { send_IPI_allbutself(KDB_VECTOR); @@ -310,13 +328,13 @@ static void __smp_call_function (void (* /* Wait for response */ while (atomic_read(&data.started) != cpus) - cpu_relax(); + barrier(); if (!wait) return; while (atomic_read(&data.finished) != cpus) - cpu_relax(); + barrier(); } /* @@ -350,7 +368,11 @@ void smp_stop_cpu(void) */ cpu_clear(smp_processor_id(), cpu_online_map); local_irq_disable(); +#ifdef CONFIG_XEN + xxprint("stop_this_cpu disable_local_APIC\n"); +#else disable_local_APIC(); +#endif local_irq_enable(); } @@ -364,8 +386,10 @@ static void smp_really_stop_cpu(void *du void smp_send_stop(void) { int nolock = 0; +#ifndef CONFIG_XEN if (reboot_force) return; +#endif /* Don''t deadlock on the call lock in panic */ if (!spin_trylock(&call_lock)) { /* ignore locking because we have paniced anyways */ @@ -376,7 +400,11 @@ void smp_send_stop(void) spin_unlock(&call_lock); local_irq_disable(); +#ifdef CONFIG_XEN + xxprint("stop_this_cpu disable_local_APIC\n"); +#else disable_local_APIC(); +#endif local_irq_enable(); } @@ -385,18 +413,17 @@ void smp_send_stop(void) * all the work is done automatically when * we return from the interrupt. */ -asmlinkage void smp_reschedule_interrupt(void) +asmlinkage irqreturn_t smp_reschedule_interrupt(void) { - ack_APIC_irq(); + return IRQ_HANDLED; } -asmlinkage void smp_call_function_interrupt(void) +asmlinkage irqreturn_t smp_call_function_interrupt(void) { void (*func) (void *info) = call_data->func; void *info = call_data->info; int wait = call_data->wait; - ack_APIC_irq(); /* * Notify initiating CPU that I''ve grabbed the data and am * about to execute the function @@ -413,6 +440,8 @@ asmlinkage void smp_call_function_interr mb(); atomic_inc(&call_data->finished); } + + return IRQ_HANDLED; } int safe_smp_processor_id(void) @@ -422,7 +451,10 @@ int safe_smp_processor_id(void) if (disable_apic) return 0; - apicid = hard_smp_processor_id(); +#ifdef CONFIG_XEN + return smp_processor_id(); +#endif + if (x86_cpu_to_apicid[apicid] == apicid) return apicid; Index: xen-unstable/linux-2.6-xen-sparse/arch/xen/x86_64/kernel/traps.c ==================================================================--- xen-unstable.orig/linux-2.6-xen-sparse/arch/xen/x86_64/kernel/traps.c +++ xen-unstable/linux-2.6-xen-sparse/arch/xen/x86_64/kernel/traps.c @@ -953,6 +953,17 @@ void __init trap_init(void) cpu_init(); } +void smp_trap_init(trap_info_t *trap_ctxt) +{ + trap_info_t *t = trap_table; + + for (t = trap_table; t->address; t++) { + trap_ctxt[t->vector].flags = t->flags; + trap_ctxt[t->vector].cs = t->cs; + trap_ctxt[t->vector].address = t->address; + } +} + /* Actual parsing is done early in setup.c. */ static int __init oops_dummy(char *s) Index: xen-unstable/linux-2.6-xen-sparse/arch/xen/x86_64/kernel/smpboot.c ==================================================================--- xen-unstable.orig/linux-2.6-xen-sparse/arch/xen/x86_64/kernel/smpboot.c +++ xen-unstable/linux-2.6-xen-sparse/arch/xen/x86_64/kernel/smpboot.c @@ -47,6 +47,7 @@ #include <linux/bootmem.h> #include <linux/thread_info.h> #include <linux/module.h> +#include <linux/interrupt.h> #include <linux/delay.h> #include <linux/mc146818rtc.h> @@ -57,6 +58,8 @@ #include <asm/tlbflush.h> #include <asm/proto.h> #include <asm/nmi.h> +#include <asm/mpspec.h> +#include <asm/arch_hooks.h> /* Change for real CPU hotplug. Note other files need to be fixed first too. */ @@ -96,6 +99,7 @@ cpumask_t cpu_sibling_map[NR_CPUS] __cac cpumask_t cpu_core_map[NR_CPUS] __cacheline_aligned; EXPORT_SYMBOL(cpu_core_map); +#ifndef CONFIG_XEN /* * Trampoline 80x86 program as an array. */ @@ -115,6 +119,7 @@ static unsigned long __cpuinit setup_tra memcpy(tramp, trampoline_data, trampoline_end - trampoline_data); return virt_to_phys(tramp); } +#endif /* * The bootstrap kernel entry code has set these up. Save them for @@ -130,6 +135,7 @@ static void __cpuinit smp_store_cpu_info print_cpu_info(c); } +#ifndef CONFIG_XEN /* * New Funky TSC sync algorithm borrowed from IA64. * Main advantage is that it doesn''t reset the TSCs fully and @@ -331,6 +337,7 @@ static __init int notscsync_setup(char * return 0; } __setup("notscsync", notscsync_setup); +#endif static atomic_t init_deasserted __cpuinitdata; @@ -343,6 +350,7 @@ void __cpuinit smp_callin(void) int cpuid, phys_id; unsigned long timeout; +#ifndef CONFIG_XEN /* * If waken up by an INIT in an 82489DX configuration * we may get here before an INIT-deassert IPI reaches @@ -352,10 +360,11 @@ void __cpuinit smp_callin(void) while (!atomic_read(&init_deasserted)) cpu_relax(); +#endif /* * (This works even if the APIC is not enabled.) */ - phys_id = GET_APIC_ID(apic_read(APIC_ID)); + phys_id = smp_processor_id(); cpuid = smp_processor_id(); if (cpu_isset(cpuid, cpu_callin_map)) { panic("smp_callin: phys CPU#%d, CPU#%d already present??\n", @@ -389,6 +398,7 @@ void __cpuinit smp_callin(void) cpuid); } +#ifndef CONFIG_XEN /* * the boot CPU has finished the init stage and is spinning * on callin_map until we finish. We are free to set up this @@ -398,6 +408,7 @@ void __cpuinit smp_callin(void) Dprintk("CALLIN, before setup_local_APIC().\n"); setup_local_APIC(); +#endif /* * Get our bogomips. @@ -405,7 +416,9 @@ void __cpuinit smp_callin(void) calibrate_delay(); Dprintk("Stack at about %p\n",&cpuid); +#ifndef CONFIG_XEN disable_APIC_timer(); +#endif /* * Save our processor parameters @@ -418,6 +431,26 @@ void __cpuinit smp_callin(void) cpu_set(cpuid, cpu_callin_map); } +static irqreturn_t ldebug_interrupt(int irq, void *dev_id, struct pt_regs *regs) +{ + return IRQ_HANDLED; +} + +static DEFINE_PER_CPU(int, ldebug_irq); +static char ldebug_name[NR_CPUS][15]; + +void ldebug_setup(void) +{ + int cpu = smp_processor_id(); + + per_cpu(ldebug_irq, cpu) = bind_virq_to_irq(VIRQ_DEBUG); + sprintf(ldebug_name[cpu], "ldebug%d", cpu); + BUG_ON(request_irq(per_cpu(ldebug_irq, cpu), ldebug_interrupt, + SA_INTERRUPT, ldebug_name[cpu], NULL)); +} + +extern void local_setup_timer(void); + /* * Setup code on secondary processor (after comming out of the trampoline) */ @@ -434,6 +467,7 @@ void __cpuinit start_secondary(void) /* otherwise gcc will move up the smp_processor_id before the cpu_init */ barrier(); +#ifndef CONFIG_XEN Dprintk("cpu %d: setting up apic clock\n", smp_processor_id()); setup_secondary_APIC_clock(); @@ -446,6 +480,12 @@ void __cpuinit start_secondary(void) } enable_APIC_timer(); +#else + local_setup_timer(); + ldebug_setup(); + smp_intr_init(); + local_irq_enable(); +#endif /* * Allow the master to continue. @@ -453,10 +493,12 @@ void __cpuinit start_secondary(void) cpu_set(smp_processor_id(), cpu_online_map); mb(); +#ifndef CONFIG_XEN /* Wait for TSC sync to not schedule things before. We still process interrupts, which could see an inconsistent time in that window unfortunately. */ tsc_sync_wait(); +#endif cpu_idle(); } @@ -502,6 +544,7 @@ static void inquire_remote_apic(int apic } #endif +#ifndef CONFIG_XEN /* * Kick the secondary to wake up. */ @@ -627,6 +670,7 @@ static int __cpuinit wakeup_secondary_vi return (send_status | accept_status); } +#endif /* * Boot one CPU. @@ -637,6 +681,14 @@ static int __cpuinit do_boot_cpu(int cpu unsigned long boot_error; int timeout; unsigned long start_rip; +#ifdef CONFIG_XEN + vcpu_guest_context_t ctxt; + extern void startup_64_smp(void); + extern void hypervisor_callback(void); + extern void failsafe_callback(void); + extern void smp_trap_init(trap_info_t *); + int i; +#endif /* * We can''t use kernel_thread since we must avoid to * reschedule the child. @@ -649,7 +701,7 @@ static int __cpuinit do_boot_cpu(int cpu cpu_pda[cpu].pcurrent = idle; - start_rip = setup_trampoline(); + start_rip = (unsigned long)startup_64_smp; init_rsp = idle->thread.rsp; per_cpu(init_tss,cpu).rsp0 = init_rsp; @@ -666,6 +718,95 @@ static int __cpuinit do_boot_cpu(int cpu atomic_set(&init_deasserted, 0); +#ifdef CONFIG_XEN + if (cpu_gdt_descr[0].size > PAGE_SIZE) + BUG(); + cpu_gdt_descr[cpu].size = cpu_gdt_descr[0].size; + memcpy((void *)cpu_gdt_descr[cpu].address, + (void *)cpu_gdt_descr[0].address, cpu_gdt_descr[0].size); + + memset(&ctxt, 0, sizeof(ctxt)); + + ctxt.flags = VGCF_IN_KERNEL; + ctxt.user_regs.ds = __USER_DS; + ctxt.user_regs.es = __USER_DS; + ctxt.user_regs.fs = 0; + ctxt.user_regs.gs = 0; + ctxt.user_regs.ss = __KERNEL_DS|0x3; + ctxt.user_regs.cs = __KERNEL_CS|0x3; + ctxt.user_regs.rip = start_rip; + ctxt.user_regs.rsp = idle->thread.rsp; + ctxt.user_regs.eflags = (1<<9) | (1<<2) | (idle->thread.io_pl<<12); + + /* FPU is set up to default initial state. */ + memset(&ctxt.fpu_ctxt, 0, sizeof(ctxt.fpu_ctxt)); + + /* Virtual IDT is empty at start-of-day. */ + for ( i = 0; i < 256; i++ ) + { + ctxt.trap_ctxt[i].vector = i; + ctxt.trap_ctxt[i].cs = FLAT_KERNEL_CS; + } + smp_trap_init(ctxt.trap_ctxt); + + /* No LDT. */ + ctxt.ldt_ents = 0; + + { + unsigned long va; + int f; + + for (va = cpu_gdt_descr[cpu].address, f = 0; + va < cpu_gdt_descr[cpu].address + cpu_gdt_descr[cpu].size; + va += PAGE_SIZE, f++) { + ctxt.gdt_frames[f] = virt_to_machine(va) >> PAGE_SHIFT; + make_page_readonly((void *)va); + } + ctxt.gdt_ents = GDT_ENTRIES; + } + + /* Ring 1 stack is the initial stack. */ + ctxt.kernel_ss = __KERNEL_DS; + ctxt.kernel_sp = idle->thread.rsp; + + /* Callback handlers. */ + ctxt.event_callback_eip = (unsigned long)hypervisor_callback; + ctxt.failsafe_callback_eip = (unsigned long)failsafe_callback; + ctxt.syscall_callback_eip = (unsigned long)system_call; + + ctxt.ctrlreg[3] = (unsigned long)virt_to_machine(init_level4_pgt); + + boot_error = HYPERVISOR_boot_vcpu(cpu, &ctxt); + + if (!boot_error) { + /* + * allow APs to start initializing. + */ + Dprintk("Before Callout %d.\n", cpu); + cpu_set(cpu, cpu_callout_map); + Dprintk("After Callout %d.\n", cpu); + + /* + * Wait 5s total for a response + */ + for (timeout = 0; timeout < 50000; timeout++) { + if (cpu_isset(cpu, cpu_callin_map)) + break; /* It has booted */ + udelay(100); + } + + if (cpu_isset(cpu, cpu_callin_map)) { + /* number CPUs logically, starting from 1 (BSP is 0) */ + Dprintk("OK.\n"); + printk("CPU%d: ", cpu); + print_cpu_info(&cpu_data[cpu]); + Dprintk("CPU has booted.\n"); + } else { + boot_error= 1; + } + } + x86_cpu_to_apicid[cpu] = apicid; +#else Dprintk("Setting warm reset code and vector.\n"); CMOS_WRITE(0xa, 0xf); @@ -729,6 +870,7 @@ static int __cpuinit do_boot_cpu(int cpu #endif } } +#endif if (boot_error) { cpu_clear(cpu, cpu_callout_map); /* was set here (do_boot_cpu()) */ clear_bit(cpu, &cpu_initialized); /* was set by cpu_init() */ @@ -857,11 +999,13 @@ static __cpuinit void enforce_max_cpus(u */ static int __cpuinit smp_sanity_check(unsigned max_cpus) { +#ifndef CONFIG_XEN if (!physid_isset(hard_smp_processor_id(), phys_cpu_present_map)) { printk("weird, boot CPU (#%d) not listed by the BIOS.\n", hard_smp_processor_id()); physid_set(hard_smp_processor_id(), phys_cpu_present_map); } +#endif /* * If we couldn''t find an SMP configuration at boot time, @@ -876,6 +1020,7 @@ static int __cpuinit smp_sanity_check(un return -1; } +#ifndef CONFIG_XEN /* * Should not be necessary because the MP table should list the boot * CPU too, but we do it for the sake of robustness anyway. @@ -896,16 +1041,17 @@ static int __cpuinit smp_sanity_check(un nr_ioapics = 0; return -1; } +#endif /* * If SMP should be disabled, then really disable it! */ if (!max_cpus) { + HYPERVISOR_shared_info->n_vcpu = 1; printk(KERN_INFO "SMP mode deactivated, forcing use of dummy APIC emulation.\n"); nr_ioapics = 0; return -1; } - return 0; } @@ -928,7 +1074,7 @@ void __cpuinit smp_prepare_cpus(unsigned */ for (i = 0; i < NR_CPUS; i++) { int apicid = cpu_present_to_apicid(i); - if (physid_isset(apicid, phys_cpu_present_map)) { + if (i < HYPERVISOR_shared_info->n_vcpu) { cpu_set(i, cpu_present_map); /* possible map would be different if we supported real CPU hotplug. */ @@ -942,7 +1088,9 @@ void __cpuinit smp_prepare_cpus(unsigned return; } + smp_intr_init(); +#ifndef CONFIG_XEN /* * Switch from PIC to APIC mode. */ @@ -954,6 +1102,7 @@ void __cpuinit smp_prepare_cpus(unsigned GET_APIC_ID(apic_read(APIC_ID)), boot_cpu_id); /* Or can we switch back to PIC here? */ } +#endif /* * Now start the IO-APICs @@ -967,7 +1116,9 @@ void __cpuinit smp_prepare_cpus(unsigned * Set up local APIC timer on boot CPU. */ +#ifndef CONFIG_XEN setup_boot_APIC_clock(); +#endif } /* @@ -1021,6 +1172,7 @@ int __cpuinit __cpu_up(unsigned int cpu) */ void __cpuinit smp_cpus_done(unsigned int max_cpus) { +#ifndef CONFIG_XEN zap_low_mappings(); smp_cleanup_boot(); @@ -1028,8 +1180,60 @@ void __cpuinit smp_cpus_done(unsigned in setup_ioapic_dest(); #endif - detect_siblings(); time_init_gtod(); check_nmi_watchdog(); +#endif + detect_siblings(); +} + +extern irqreturn_t smp_reschedule_interrupt(int, void *, struct pt_regs *); +extern irqreturn_t smp_call_function_interrupt(int, void *, struct pt_regs *); + +static DEFINE_PER_CPU(int, resched_irq); +static DEFINE_PER_CPU(int, callfunc_irq); +static char resched_name[NR_CPUS][15]; +static char callfunc_name[NR_CPUS][15]; + +void smp_intr_init(void) +{ + int cpu = smp_processor_id(); + + per_cpu(resched_irq, cpu) + bind_ipi_to_irq(RESCHEDULE_VECTOR); + sprintf(resched_name[cpu], "resched%d", cpu); + BUG_ON(request_irq(per_cpu(resched_irq, cpu), smp_reschedule_interrupt, + SA_INTERRUPT, resched_name[cpu], NULL)); + + per_cpu(callfunc_irq, cpu) + bind_ipi_to_irq(CALL_FUNCTION_VECTOR); + sprintf(callfunc_name[cpu], "callfunc%d", cpu); + BUG_ON(request_irq(per_cpu(callfunc_irq, cpu), + smp_call_function_interrupt, + SA_INTERRUPT, callfunc_name[cpu], NULL)); +} + +static void smp_intr_exit(void) +{ + int cpu = smp_processor_id(); + + free_irq(per_cpu(resched_irq, cpu), NULL); + unbind_ipi_from_irq(RESCHEDULE_VECTOR); + + free_irq(per_cpu(callfunc_irq, cpu), NULL); + unbind_ipi_from_irq(CALL_FUNCTION_VECTOR); +} + +void smp_suspend(void) +{ + /* XXX todo: take down time and ipi''s on all cpus */ + local_teardown_timer_irq(); + smp_intr_exit(); +} + +void smp_resume(void) +{ + /* XXX todo: restore time and ipi''s on all cpus */ + smp_intr_init(); + local_setup_timer_irq(); } Index: xen-unstable/linux-2.6-xen-sparse/arch/xen/x86_64/kernel/mpparse.c ==================================================================--- xen-unstable.orig/linux-2.6-xen-sparse/arch/xen/x86_64/kernel/mpparse.c +++ xen-unstable/linux-2.6-xen-sparse/arch/xen/x86_64/kernel/mpparse.c @@ -105,7 +105,6 @@ static int __init mpf_checksum(unsigned return sum & 0xFF; } -#ifndef CONFIG_XEN static void __init MP_processor_info (struct mpc_config_processor *m) { int ver; @@ -162,12 +161,6 @@ static void __init MP_processor_info (st x86_cpu_to_apicid[num_processors - found_bsp] = m->mpc_apicid; } } -#else -void __init MP_processor_info (struct mpc_config_processor *m) -{ - num_processors++; -} -#endif /* CONFIG_XEN */ static void __init MP_bus_info (struct mpc_config_bus *m) { @@ -702,7 +695,6 @@ void __init mp_register_lapic ( if (id == boot_cpu_physical_apicid) boot_cpu = 1; -#ifndef CONFIG_XEN processor.mpc_type = MP_PROCESSOR; processor.mpc_apicid = id; processor.mpc_apicver = 0x10; /* TBD: lapic version */ @@ -713,7 +705,6 @@ void __init mp_register_lapic ( processor.mpc_featureflag = boot_cpu_data.x86_capability[0]; processor.mpc_reserved[0] = 0; processor.mpc_reserved[1] = 0; -#endif MP_processor_info(&processor); } Index: xen-unstable/linux-2.6-xen-sparse/arch/xen/x86_64/kernel/genapic.c ==================================================================--- /dev/null +++ xen-unstable/linux-2.6-xen-sparse/arch/xen/x86_64/kernel/genapic.c @@ -0,0 +1,109 @@ +/* + * Copyright 2004 James Cleverdon, IBM. + * Subject to the GNU Public License, v.2 + * + * Generic APIC sub-arch probe layer. + * + * Hacked for x86-64 by James Cleverdon from i386 architecture code by + * Martin Bligh, Andi Kleen, James Bottomley, John Stultz, and + * James Cleverdon. + */ +#include <linux/config.h> +#include <linux/threads.h> +#include <linux/cpumask.h> +#include <linux/string.h> +#include <linux/kernel.h> +#include <linux/ctype.h> +#include <linux/init.h> +#include <linux/module.h> + +#include <asm/smp.h> +#include <asm/ipi.h> + +#if defined(CONFIG_ACPI_BUS) +#include <acpi/acpi_bus.h> +#endif + +/* which logical CPU number maps to which CPU (physical APIC ID) */ +u8 x86_cpu_to_apicid[NR_CPUS] = { [0 ... NR_CPUS-1] = BAD_APICID }; +EXPORT_SYMBOL(x86_cpu_to_apicid); +u8 x86_cpu_to_log_apicid[NR_CPUS] = { [0 ... NR_CPUS-1] = BAD_APICID }; + +extern struct genapic apic_cluster; +extern struct genapic apic_flat; +extern struct genapic apic_xen; + +struct genapic *genapic = &apic_xen; + +/* + * Check the APIC IDs in bios_cpu_apicid and choose the APIC mode. + */ +void __init clustered_apic_check(void) +{ + long i; + u8 clusters, max_cluster; + u8 id; + u8 cluster_cnt[NUM_APIC_CLUSTERS]; + + if (boot_cpu_data.x86_vendor == X86_VENDOR_AMD) { + /* AMD always uses flat mode right now */ + genapic = &apic_flat; + goto print; + } + +#if defined(CONFIG_ACPI_BUS) + /* + * Some x86_64 machines use physical APIC mode regardless of how many + * procs/clusters are present (x86_64 ES7000 is an example). + */ + if (acpi_fadt.revision > FADT2_REVISION_ID) + if (acpi_fadt.force_apic_physical_destination_mode) { + genapic = &apic_cluster; + goto print; + } +#endif + + memset(cluster_cnt, 0, sizeof(cluster_cnt)); + + for (i = 0; i < NR_CPUS; i++) { + id = bios_cpu_apicid[i]; + if (id != BAD_APICID) + cluster_cnt[APIC_CLUSTERID(id)]++; + } + + clusters = 0; + max_cluster = 0; + for (i = 0; i < NUM_APIC_CLUSTERS; i++) { + if (cluster_cnt[i] > 0) { + ++clusters; + if (cluster_cnt[i] > max_cluster) + max_cluster = cluster_cnt[i]; + } + } + + /* + * If we have clusters <= 1 and CPUs <= 8 in cluster 0, then flat mode, + * else if max_cluster <= 4 and cluster_cnt[15] == 0, clustered logical + * else physical mode. + * (We don''t use lowest priority delivery + HW APIC IRQ steering, so + * can ignore the clustered logical case and go straight to physical.) + */ + if (clusters <= 1 && max_cluster <= 8 && cluster_cnt[0] == max_cluster) + genapic = &apic_flat; + else + genapic = &apic_cluster; + +print: + /* hardcode to xen apic functions */ + genapic = &apic_xen; + printk(KERN_INFO "Setting APIC routing to %s\n", genapic->name); +} + +/* Same for both flat and clustered. */ + +extern void xen_send_IPI_shortcut(unsigned int shortcut, int vector, unsigned int dest); + +void send_IPI_self(int vector) +{ + xen_send_IPI_shortcut(APIC_DEST_SELF, vector, APIC_DEST_PHYSICAL); +} Index: xen-unstable/linux-2.6-xen-sparse/arch/xen/x86_64/kernel/genapic_xen.c ==================================================================--- /dev/null +++ xen-unstable/linux-2.6-xen-sparse/arch/xen/x86_64/kernel/genapic_xen.c @@ -0,0 +1,157 @@ +/* + * Copyright 2004 James Cleverdon, IBM. + * Subject to the GNU Public License, v.2 + * + * Xen APIC subarch code. Maximum 8 CPUs, logical delivery. + * + * Hacked for x86-64 by James Cleverdon from i386 architecture code by + * Martin Bligh, Andi Kleen, James Bottomley, John Stultz, and + * James Cleverdon. + * + * Hacked to pieces for Xen by Chris Wright. + */ +#include <linux/config.h> +#include <linux/threads.h> +#include <linux/cpumask.h> +#include <linux/string.h> +#include <linux/kernel.h> +#include <linux/ctype.h> +#include <linux/init.h> +#include <asm/smp.h> +#include <asm/ipi.h> +#include <asm-xen/evtchn.h> + +DECLARE_PER_CPU(int, ipi_to_evtchn[NR_IPIS]); + +static inline void __send_IPI_one(unsigned int cpu, int vector) +{ + unsigned int evtchn; + Dprintk("%s\n", __FUNCTION__); + + evtchn = per_cpu(ipi_to_evtchn, cpu)[vector]; + if (evtchn) + notify_via_evtchn(evtchn); + else + printk("send_IPI to unbound port %d/%d", cpu, vector); + +} + +void xen_send_IPI_shortcut(unsigned int shortcut, int vector, unsigned int dest) +{ + int cpu; + + switch (shortcut) { + case APIC_DEST_SELF: + __send_IPI_one(smp_processor_id(), vector); + break; + case APIC_DEST_ALLBUT: + for (cpu = 0; cpu < NR_CPUS; ++cpu) { + if (cpu == smp_processor_id()) + continue; + if (cpu_isset(cpu, cpu_online_map)) { + __send_IPI_one(cpu, vector); + } + } + break; + case APIC_DEST_ALLINC: + for (cpu = 0; cpu < NR_CPUS; ++cpu) { + if (cpu_isset(cpu, cpu_online_map)) { + __send_IPI_one(cpu, vector); + } + } + break; + default: + printk("XXXXXX __send_IPI_shortcut %08x vector %d\n", shortcut, + vector); + break; + } + +} + +static cpumask_t xen_target_cpus(void) +{ + return cpu_online_map; +} + +/* + * Set up the logical destination ID. + * Do nothing, not called now. + */ +static void xen_init_apic_ldr(void) +{ + Dprintk("%s\n", __FUNCTION__); + return; +} + +static void xen_send_IPI_allbutself(int vector) +{ + /* + * if there are no other CPUs in the system then + * we get an APIC send error if we try to broadcast. + * thus we have to avoid sending IPIs in this case. + */ + Dprintk("%s\n", __FUNCTION__); + if (num_online_cpus() > 1) + xen_send_IPI_shortcut(APIC_DEST_ALLBUT, vector, APIC_DEST_LOGICAL); +} + +static void xen_send_IPI_all(int vector) +{ + Dprintk("%s\n", __FUNCTION__); + xen_send_IPI_shortcut(APIC_DEST_ALLINC, vector, APIC_DEST_LOGICAL); +} + +static void xen_send_IPI_mask(cpumask_t cpumask, int vector) +{ + unsigned long mask = cpus_addr(cpumask)[0]; + unsigned int cpu; + unsigned long flags; + + Dprintk("%s\n", __FUNCTION__); + local_irq_save(flags); + WARN_ON(mask & ~cpus_addr(cpu_online_map)[0]); + + for (cpu = 0; cpu < NR_CPUS; ++cpu) { + if (cpu_isset(cpu, cpumask)) { + __send_IPI_one(cpu, vector); + } + } + local_irq_restore(flags); +} + +static int xen_apic_id_registered(void) +{ + /* better be set */ + Dprintk("%s\n", __FUNCTION__); + return physid_isset(smp_processor_id(), phys_cpu_present_map); +} + +static unsigned int xen_cpu_mask_to_apicid(cpumask_t cpumask) +{ + Dprintk("%s\n", __FUNCTION__); + return cpus_addr(cpumask)[0] & APIC_ALL_CPUS; +} + +static unsigned int phys_pkg_id(int index_msb) +{ + u32 ebx; + + Dprintk("%s\n", __FUNCTION__); + ebx = cpuid_ebx(1); + return ((ebx >> 24) & 0xFF) >> index_msb; +} + +struct genapic apic_xen = { + .name = "xen", + .int_delivery_mode = dest_LowestPrio, + .int_dest_mode = (APIC_DEST_LOGICAL != 0), + .int_delivery_dest = APIC_DEST_LOGICAL | APIC_DM_LOWEST, + .target_cpus = xen_target_cpus, + .apic_id_registered = xen_apic_id_registered, + .init_apic_ldr = xen_init_apic_ldr, + .send_IPI_all = xen_send_IPI_all, + .send_IPI_allbutself = xen_send_IPI_allbutself, + .send_IPI_mask = xen_send_IPI_mask, + .cpu_mask_to_apicid = xen_cpu_mask_to_apicid, + .phys_pkg_id = phys_pkg_id, +}; _______________________________________________ Xen-devel mailing list Xen-devel@lists.xensource.com http://lists.xensource.com/xen-devel