# This is a BitKeeper generated diff -Nru style patch. # # ChangeSet # 2005/05/07 21:32:49-07:00 kmacy@curly.lab.netapp.com # get AP booting working # currently crashing in init_secondary - will fix after adding SMP debug support # Signed-off-by: Kip Macy <kmacy@fsmware.com> # # freebsd-5.3-xen-sparse/i386-xen/include/xenfunc.h # 2005/05/07 21:32:47-07:00 kmacy@curly.lab.netapp.com +4 -0 # add declaration for per-cpu clock init # # freebsd-5.3-xen-sparse/i386-xen/include/pmap.h # 2005/05/07 21:32:47-07:00 kmacy@curly.lab.netapp.com +1 -0 # make pmap_lazyfix_action global # # freebsd-5.3-xen-sparse/i386-xen/include/pcpu.h # 2005/05/07 21:32:47-07:00 kmacy@curly.lab.netapp.com +6 -1 # add IPI fields # # freebsd-5.3-xen-sparse/i386-xen/include/hypervisor.h # 2005/05/07 21:32:47-07:00 kmacy@curly.lab.netapp.com +16 -0 # add boot_vcpu call # # freebsd-5.3-xen-sparse/i386-xen/i386-xen/xen_machdep.c # 2005/05/07 21:32:47-07:00 kmacy@curly.lab.netapp.com +0 -2 # make PANIC_IF declaration global # # freebsd-5.3-xen-sparse/i386-xen/i386-xen/pmap.c # 2005/05/07 21:32:47-07:00 kmacy@curly.lab.netapp.com +0 -1 # make pmap_lazyfix_action global # # freebsd-5.3-xen-sparse/i386-xen/i386-xen/mp_machdep.c # 2005/05/07 21:32:47-07:00 kmacy@curly.lab.netapp.com +229 -55 # add support for booting APs # # freebsd-5.3-xen-sparse/i386-xen/i386-xen/machdep.c # 2005/05/07 21:32:47-07:00 kmacy@curly.lab.netapp.com +78 -46 # do per-cpu GDT initialization up-front # # freebsd-5.3-xen-sparse/i386-xen/i386-xen/evtchn.c # 2005/05/07 21:32:47-07:00 kmacy@curly.lab.netapp.com +15 -8 # special case AST IPI # # freebsd-5.3-xen-sparse/i386-xen/i386-xen/clock.c # 2005/05/07 21:32:46-07:00 kmacy@curly.lab.netapp.com +60 -14 # add per-cpu clock support # diff -Nru a/freebsd-5.3-xen-sparse/i386-xen/i386-xen/clock.c b/freebsd-5.3-xen-sparse/i386-xen/i386-xen/clock.c --- a/freebsd-5.3-xen-sparse/i386-xen/i386-xen/clock.c 2005-05-06 21:37:31 -07:00 +++ b/freebsd-5.3-xen-sparse/i386-xen/i386-xen/clock.c 2005-05-06 21:37:31 -07:00 @@ -87,6 +87,12 @@ /* XEN specific defines */ #include <machine/xen_intr.h> +#include <vm/vm.h> /* needed by machine/pmap.h */ +#include <vm/pmap.h> /* needed by machine/pmap.h */ +#include <machine/pmap.h> /* needed by xen-os.h */ +#include <machine/hypervisor-ifs.h> +#include <machine/xen-os.h> /* needed by xenfunc.h */ +#include <machine/xenfunc.h> /* * 32-bit time_t''s can''t reach leap years before 1904 or after 2036, so we @@ -129,7 +135,15 @@ static uint32_t shadow_time_version; static struct timeval shadow_tv; +#define DEFINE_PER_CPU(type, name) \ + __typeof__(type) per_cpu__##name + +#define per_cpu(var, cpu) (*((void)cpu, &per_cpu__##var)) + + static uint64_t processed_system_time;/* System time (ns) at last processing. */ +static DEFINE_PER_CPU(uint64_t, processed_system_time); + #define NS_PER_TICK (1000000000ULL/hz) @@ -202,18 +216,19 @@ static void clkintr(struct clockframe *frame) { - int64_t delta; + int64_t cpu_delta, delta; + int cpu = smp_processor_id(); long ticks = 0; - do { __get_time_values_from_xen(); - delta = (int64_t)(shadow_system_time + - xen_get_offset() * 1000 - - processed_system_time); + delta = cpu_delta = (int64_t)shadow_system_time + + (int64_t)xen_get_offset() * 1000; + delta -= processed_system_time; + cpu_delta -= per_cpu(processed_system_time, cpu); } while (!TIME_VALUES_UP_TO_DATE); - if (unlikely(delta < 0)) { + if (unlikely(delta < 0) || unlikely(cpu_delta < 0)) { printk("Timer ISR: Time went backwards: %lld\n", delta); return; } @@ -225,15 +240,28 @@ delta -= NS_PER_TICK; processed_system_time += NS_PER_TICK; } - - if (ticks > 0) { - if (frame) - timer_func(frame); -#ifdef SMP - if (timer_func == hardclock && frame) - forward_hardclock(); + /* Local CPU jiffy work. */ + while (cpu_delta >= NS_PER_TICK) { + cpu_delta -= NS_PER_TICK; + per_cpu(processed_system_time, cpu) += NS_PER_TICK; +#if 0 + update_process_times(user_mode(regs)); + profile_tick(CPU_PROFILING, regs); #endif } + if (ticks > 0) { + if (frame) timer_func(frame); + } + + if (cpu != 0) + return; + /* + * Take synchronised time from Xen once a minute if we''re not + * synchronised ourselves, and we haven''t chosen to keep an independent + * time base. + */ + + /* XXX TODO */ } #include "opt_ddb.h" @@ -429,7 +457,7 @@ * Start clocks running. */ void -cpu_initclocks() +cpu_initclocks(void) { int diag; int time_irq = bind_virq_to_irq(VIRQ_TIMER); @@ -445,7 +473,25 @@ /* initialize xen values */ __get_time_values_from_xen(); processed_system_time = shadow_system_time; + per_cpu(processed_system_time, 0) = processed_system_time; + +} + +#ifdef SMP +void +ap_cpu_initclocks(void) +{ + int irq; + int cpu = smp_processor_id(); + + per_cpu(processed_system_time, cpu) = shadow_system_time; + + irq = bind_virq_to_irq(VIRQ_TIMER); + PCPU_SET(time_irq, irq); + PANIC_IF(intr_add_handler("clk", irq, (driver_intr_t *)clkintr, + NULL, INTR_TYPE_CLK | INTR_FAST, NULL)); } +#endif void cpu_startprofclock(void) diff -Nru a/freebsd-5.3-xen-sparse/i386-xen/i386-xen/evtchn.c b/freebsd-5.3-xen-sparse/i386-xen/i386-xen/evtchn.c --- a/freebsd-5.3-xen-sparse/i386-xen/i386-xen/evtchn.c 2005-05-06 21:37:31 -07:00 +++ b/freebsd-5.3-xen-sparse/i386-xen/i386-xen/evtchn.c 2005-05-06 21:37:31 -07:00 @@ -79,9 +79,14 @@ l2 &= ~(1 << l2i); port = (l1i << 5) + l2i; + irq = evtchn_to_irq[port]; +#ifdef SMP + if (irq == PCPU_GET(cpuast)) + continue; +#endif if ( (owned = mtx_owned(&sched_lock)) != 0 ) mtx_unlock_spin_flags(&sched_lock, MTX_QUIET); - if ( (irq = evtchn_to_irq[port]) != -1 ) { + if ( irq != -1 ) { struct intsrc *isrc = intr_lookup_source(irq); intr_execute_handlers(isrc, frame); } else { @@ -584,6 +589,7 @@ PCPU_GET(virq_to_irq)[i] = -1; } + static void evtchn_init(void *dummy __unused) { @@ -591,13 +597,6 @@ struct xenpic *xp; struct xenpic_intsrc *pin; - /* - * xenpic_lock: in order to allow an interrupt to occur in a critical - * section, to set pcpu->ipending (etc...) properly, we - * must be able to get the icu lock, so it can''t be - * under witness. - */ - mtx_init(&irq_mapping_update_lock, "xp", NULL, MTX_DEF); /* XXX -- expedience hack */ PCPU_SET(virq_to_irq, (int *)&virq_to_irq[0]); @@ -657,3 +656,11 @@ } SYSINIT(evtchn_init, SI_SUB_INTR, SI_ORDER_ANY, evtchn_init, NULL); + /* + * xenpic_lock: in order to allow an interrupt to occur in a critical + * section, to set pcpu->ipending (etc...) properly, we + * must be able to get the icu lock, so it can''t be + * under witness. + */ + +MTX_SYSINIT(irq_mapping_update_lock, &irq_mapping_update_lock, "xp", MTX_DEF|MTX_NOWITNESS); diff -Nru a/freebsd-5.3-xen-sparse/i386-xen/i386-xen/machdep.c b/freebsd-5.3-xen-sparse/i386-xen/i386-xen/machdep.c --- a/freebsd-5.3-xen-sparse/i386-xen/i386-xen/machdep.c 2005-05-06 21:37:31 -07:00 +++ b/freebsd-5.3-xen-sparse/i386-xen/i386-xen/machdep.c 2005-05-06 21:37:31 -07:00 @@ -78,6 +78,7 @@ #include <sys/sched.h> #include <sys/sysent.h> #include <sys/sysctl.h> +#include <sys/smp.h> #include <sys/ucontext.h> #include <sys/vmmeter.h> #include <sys/bus.h> @@ -883,14 +884,6 @@ static void cpu_idle_default(void) { -#if 0 - /* - * we must absolutely guarentee that hlt is the - * absolute next instruction after sti or we - * introduce a timing window. - */ - __asm __volatile("sti; hlt"); -#endif idle_block(); enable_intr(); } @@ -1376,6 +1369,7 @@ unsigned long *xen_machine_phys = ((unsigned long *)VADDR(1008, 0)); int preemptable; int gdt_set; +static int ncpus; /* Linux infection */ #define PAGE_OFFSET KERNBASE @@ -1387,6 +1381,10 @@ int i; vm_paddr_t pdir_shadow_ma, KPTphys; vm_offset_t *pdir_shadow; +#ifdef SMP + int j; +#endif + #ifdef WRITABLE_PAGETABLES printk("using writable pagetables\n"); HYPERVISOR_vm_assist(VMASST_CMD_enable, VMASST_TYPE_writable_pagetables); @@ -1447,18 +1445,19 @@ #ifdef SMP +#if 0 /* allocate cpu0 private page */ cpu0prvpage = (KERNBASE + (tmpindex << PAGE_SHIFT)); tmpindex++; - +#endif /* allocate SMP page table */ SMPpt = (unsigned long *)(KERNBASE + (tmpindex << PAGE_SHIFT)); - +#if 0 /* Map the private page into the SMP page table */ SMPpt[0] = vtomach(cpu0prvpage) | PG_RW | PG_M | PG_V | PG_A; - +#endif /* map SMP page table RO */ - PT_SET_MA(SMPpt, vtomach(SMPpt) & ~PG_RW); + PT_SET_MA(SMPpt, *vtopte((vm_offset_t)SMPpt) & ~PG_RW); /* put the page table into the page directory */ xen_queue_pt_update((vm_paddr_t)(IdlePTD + MPPTDI), @@ -1496,44 +1495,61 @@ tmpindex++; HYPERVISOR_shared_info->arch.pfn_to_mfn_frame_list = (unsigned long)xen_phys_machine; + ncpus = HYPERVISOR_shared_info->n_vcpu; +#ifdef SMP + for (i = 0; i < ncpus; i++) { + int npages = (sizeof(struct privatespace) + 1)/PAGE_SIZE; + for (j = 0; j < npages; j++) { + vm_paddr_t ma = xpmap_ptom(tmpindex << PAGE_SHIFT); + tmpindex++; + PT_SET_VA_MA(SMPpt + i*npages + j, ma | PG_A | PG_V | PG_RW | PG_M, FALSE); + } + } + xen_flush_queue(); +#endif init_first = tmpindex; } + +trap_info_t trap_table[] = { + { 0, 0, GSEL(GCODE_SEL, SEL_KPL), (unsigned long) &IDTVEC(div)}, + { 1, 0, GSEL(GCODE_SEL, SEL_KPL), (unsigned long) &IDTVEC(dbg)}, + { 3, 3, GSEL(GCODE_SEL, SEL_KPL), (unsigned long) &IDTVEC(bpt)}, + { 4, 3, GSEL(GCODE_SEL, SEL_KPL), (unsigned long) &IDTVEC(ofl)}, + /* This is UPL on Linux and KPL on BSD */ + { 5, 3, GSEL(GCODE_SEL, SEL_KPL), (unsigned long) &IDTVEC(bnd)}, + { 6, 0, GSEL(GCODE_SEL, SEL_KPL), (unsigned long) &IDTVEC(ill)}, + { 7, 0, GSEL(GCODE_SEL, SEL_KPL), (unsigned long) &IDTVEC(dna)}, + /* + * { 8, 0, GSEL(GCODE_SEL, SEL_KPL), (unsigned long) &IDTVEC(XXX)}, + * no handler for double fault + */ + { 9, 0, GSEL(GCODE_SEL, SEL_KPL), (unsigned long) &IDTVEC(fpusegm)}, + {10, 0, GSEL(GCODE_SEL, SEL_KPL), (unsigned long) &IDTVEC(tss)}, + {11, 0, GSEL(GCODE_SEL, SEL_KPL), (unsigned long) &IDTVEC(missing)}, + {12, 0, GSEL(GCODE_SEL, SEL_KPL), (unsigned long) &IDTVEC(stk)}, + {13, 0, GSEL(GCODE_SEL, SEL_KPL), (unsigned long) &IDTVEC(prot)}, + {14, 0, GSEL(GCODE_SEL, SEL_KPL), (unsigned long) &IDTVEC(page)}, + {15, 0, GSEL(GCODE_SEL, SEL_KPL), (unsigned long) &IDTVEC(rsvd)}, + {16, 0, GSEL(GCODE_SEL, SEL_KPL), (unsigned long) &IDTVEC(fpu)}, + {17, 0, GSEL(GCODE_SEL, SEL_KPL), (unsigned long) &IDTVEC(align)}, + {18, 0, GSEL(GCODE_SEL, SEL_KPL), (unsigned long) &IDTVEC(mchk)}, + {19, 0, GSEL(GCODE_SEL, SEL_KPL), (unsigned long) &IDTVEC(xmm)}, + {0x80, 3, GSEL(GCODE_SEL, SEL_KPL), (unsigned long) &IDTVEC(int0x80_syscall)}, + { 0, 0, 0, 0 } +}; + void init386(void) { int gsel_tss, metadata_missing, off, x, error; struct pcpu *pc; unsigned long gdtmachpfn; - trap_info_t trap_table[] = { - { 0, 0, GSEL(GCODE_SEL, SEL_KPL), (unsigned long) &IDTVEC(div)}, - { 1, 0, GSEL(GCODE_SEL, SEL_KPL), (unsigned long) &IDTVEC(dbg)}, - { 3, 3, GSEL(GCODE_SEL, SEL_KPL), (unsigned long) &IDTVEC(bpt)}, - { 4, 3, GSEL(GCODE_SEL, SEL_KPL), (unsigned long) &IDTVEC(ofl)}, - /* This is UPL on Linux and KPL on BSD */ - { 5, 3, GSEL(GCODE_SEL, SEL_KPL), (unsigned long) &IDTVEC(bnd)}, - { 6, 0, GSEL(GCODE_SEL, SEL_KPL), (unsigned long) &IDTVEC(ill)}, - { 7, 0, GSEL(GCODE_SEL, SEL_KPL), (unsigned long) &IDTVEC(dna)}, - /* - * { 8, 0, GSEL(GCODE_SEL, SEL_KPL), (unsigned long) &IDTVEC(XXX)}, - * no handler for double fault - */ - { 9, 0, GSEL(GCODE_SEL, SEL_KPL), (unsigned long) &IDTVEC(fpusegm)}, - {10, 0, GSEL(GCODE_SEL, SEL_KPL), (unsigned long) &IDTVEC(tss)}, - {11, 0, GSEL(GCODE_SEL, SEL_KPL), (unsigned long) &IDTVEC(missing)}, - {12, 0, GSEL(GCODE_SEL, SEL_KPL), (unsigned long) &IDTVEC(stk)}, - {13, 0, GSEL(GCODE_SEL, SEL_KPL), (unsigned long) &IDTVEC(prot)}, - {14, 0, GSEL(GCODE_SEL, SEL_KPL), (unsigned long) &IDTVEC(page)}, - {15, 0, GSEL(GCODE_SEL, SEL_KPL), (unsigned long) &IDTVEC(rsvd)}, - {16, 0, GSEL(GCODE_SEL, SEL_KPL), (unsigned long) &IDTVEC(fpu)}, - {17, 0, GSEL(GCODE_SEL, SEL_KPL), (unsigned long) &IDTVEC(align)}, - {18, 0, GSEL(GCODE_SEL, SEL_KPL), (unsigned long) &IDTVEC(mchk)}, - {19, 0, GSEL(GCODE_SEL, SEL_KPL), (unsigned long) &IDTVEC(xmm)}, - {0x80, 3, GSEL(GCODE_SEL, SEL_KPL), (unsigned long) &IDTVEC(int0x80_syscall)}, - { 0, 0, 0, 0 } - }; +#ifdef SMP + int i; +#endif proc0.p_uarea = proc0uarea; thread0.td_kstack = proc0kstack; thread0.td_pcb = (struct pcb *) @@ -1583,26 +1599,42 @@ gdt_segs[GDATA_SEL].ssd_limit = atop(0 - ((1 << 26) - (1 << 22) + (1 << 16))); #endif #ifdef SMP - /* this correspond to the cpu private page as mapped into the SMP page - * table in initvalues + /* XXX this will blow up if there are more than 512/NGDT vcpus - will never + * be an issue in the real world but should add an assert on general principles + * we''ll likely blow up when we hit LAST_RESERVED_GDT_ENTRY, at which point we + * would need to start allocating more pages for the GDT */ pc = &SMP_prvspace[0].pcpu; - gdt_segs[GPRIV_SEL].ssd_limit - atop(sizeof(struct privatespace) - 1); + for (i = 0; i < ncpus; i++) { + cpu_add(i, (i == 0)); + + gdt_segs[GPRIV_SEL].ssd_base = (int) &SMP_prvspace[i]; + gdt_segs[GPRIV_SEL].ssd_limit + atop(sizeof(struct privatespace) - 1); + gdt_segs[GPROC0_SEL].ssd_base + (int) &SMP_prvspace[i].pcpu.pc_common_tss; + SMP_prvspace[i].pcpu.pc_prvspace + &SMP_prvspace[i].pcpu; + + for (x = 0; x < NGDT; x++) { + ssdtosd(&gdt_segs[x], &gdt[i * NGDT + x].sd); + } + } #else pc = &__pcpu; gdt_segs[GPRIV_SEL].ssd_limit atop(sizeof(struct pcpu) - 1); -#endif gdt_segs[GPRIV_SEL].ssd_base = (int) pc; gdt_segs[GPROC0_SEL].ssd_base = (int) &pc->pc_common_tss; for (x = 0; x < NGDT; x++) ssdtosd(&gdt_segs[x], &gdt[x].sd); +#endif + PT_SET_MA(gdt, *vtopte((unsigned long)gdt) & ~PG_RW); gdtmachpfn = vtomach(gdt) >> PAGE_SHIFT; - if ((error = HYPERVISOR_set_gdt(&gdtmachpfn, LAST_RESERVED_GDT_ENTRY + 1))) - panic("set_gdt failed"); + PANIC_IF(HYPERVISOR_set_gdt(&gdtmachpfn, LAST_RESERVED_GDT_ENTRY + 1) != 0); + lgdt_finish(); gdt_set = 1; diff -Nru a/freebsd-5.3-xen-sparse/i386-xen/i386-xen/mp_machdep.c b/freebsd-5.3-xen-sparse/i386-xen/i386-xen/mp_machdep.c --- a/freebsd-5.3-xen-sparse/i386-xen/i386-xen/mp_machdep.c 2005-05-06 21:37:31 -07:00 +++ b/freebsd-5.3-xen-sparse/i386-xen/i386-xen/mp_machdep.c 2005-05-06 21:37:31 -07:00 @@ -83,7 +83,16 @@ #include <machine/specialreg.h> #include <machine/privatespace.h> + +/* XEN includes */ #include <machine/xenfunc.h> +#include <machine/xen_intr.h> + +void Xhypervisor_callback(void); +void failsafe_callback(void); + +/***************/ + #define WARMBOOT_TARGET 0 #define WARMBOOT_OFF (KERNBASE + 0x0467) @@ -94,6 +103,10 @@ #define BIOS_RESET (0x0f) #define BIOS_WARM (0x0a) + +#undef POSTCODE +#define POSTCODE(x) + /* * this code MUST be enabled here and in mpboot.s. * it follows the very early stages of AP boot by placing values in CMOS ram. @@ -175,6 +188,8 @@ /* SMP page table page */ extern pt_entry_t *SMPpt; +extern trap_info_t trap_table[]; + struct pcb stoppcbs[MAXCPU]; /* Variables needed for SMP tlb shootdown. */ @@ -208,7 +223,9 @@ static void set_logical_apic_ids(void); static int start_all_aps(void); +#if 0 static void install_ap_tramp(void); +#endif static int start_ap(int apic_id); static void release_aps(void *dummy); @@ -314,6 +331,7 @@ cpu_mp_probe(void) { + mp_ncpus = HYPERVISOR_shared_info->n_vcpu; /* * Always record BSP in CPU map so that the mbuf init code works * correctly. @@ -342,20 +360,24 @@ return (1); } -/* - * Initialize the IPI handlers and start up the AP''s. - */ -void -cpu_mp_start(void) +static void +cpu_mp_ipi_init(void) { - int i; - - POSTCODE(MP_START_POST); - - /* Initialize the logical ID to APIC ID table. */ - for (i = 0; i < MAXCPU; i++) - cpu_apic_ids[i] = -1; - + int irq; + int cpu = smp_processor_id(); + /* + * these are not needed by XenFreeBSD - from Keir: + * For TLB-flush related IPIs, Xen has hypercalls + * you should use instead. You can pass a pointer + * to a vcpu bitmap to update_va_mapping(), and to + * MMUEXT_flush_tlb_multi and MMEXT_invlpg_multi. + * Xen will then make sure that those vcpus get + * flushed appropriately before returning to the + * caller. + * There is also no indication that we need to forward + * clock interrupts. + */ +#if 0 /* Install an inter-CPU IPI for TLB invalidation */ setidt(IPI_INVLTLB, IDTVEC(invltlb), SDT_SYS386IGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL)); @@ -371,22 +393,69 @@ /* Install an inter-CPU IPI for forwarding statclock() */ setidt(IPI_STATCLOCK, IDTVEC(statclock), SDT_SYS386IGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL)); - +#endif + + /* + * These can all be consolidated. For now leaving + * as individual IPIs. + * + */ +#if 0 /* Install an inter-CPU IPI for lazy pmap release */ setidt(IPI_LAZYPMAP, IDTVEC(lazypmap), SDT_SYS386IGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL)); +#else + irq = bind_ipi_on_cpu_to_irq(cpu, IPI_LAZYPMAP); + PCPU_SET(lazypmap, irq); + PANIC_IF(intr_add_handler("pmap_lazyfix", irq, + (driver_intr_t *)pmap_lazyfix_action, + NULL, INTR_TYPE_CLK | INTR_FAST, NULL)); +#endif +#if 0 /* Install an inter-CPU IPI for all-CPU rendezvous */ setidt(IPI_RENDEZVOUS, IDTVEC(rendezvous), SDT_SYS386IGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL)); +#else + irq = bind_ipi_on_cpu_to_irq(cpu, IPI_RENDEZVOUS); + PCPU_SET(rendezvous, irq); + PANIC_IF(intr_add_handler("smp_rendezvous", irq, + (driver_intr_t *)smp_rendezvous_action, + NULL, INTR_TYPE_CLK | INTR_FAST, NULL)); +#endif +#if 0 /* Install an inter-CPU IPI for forcing an additional software trap */ setidt(IPI_AST, IDTVEC(cpuast), SDT_SYS386IGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL)); - +#else + irq = bind_ipi_on_cpu_to_irq(cpu, IPI_AST); + PCPU_SET(cpuast, irq); +#endif + /* XXX ignore for now */ +#if 0 /* Install an inter-CPU IPI for CPU stop/restart */ setidt(IPI_STOP, IDTVEC(cpustop), SDT_SYS386IGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL)); +#endif + +} + +SYSINIT(ipi_setup, SI_SUB_INTR, SI_ORDER_ANY, cpu_mp_ipi_init, NULL); + +/* + * Initialize the IPI handlers and start up the AP''s. + */ +void +cpu_mp_start(void) /* --- Start here --- */ +{ + int i; + + POSTCODE(MP_START_POST); + + /* Initialize the logical ID to APIC ID table. */ + for (i = 0; i < MAXCPU; i++) + cpu_apic_ids[i] = -1; /* Set boot_cpu_id if needed. */ @@ -437,35 +506,44 @@ void init_secondary(void) { - int gsel_tss; - int x, myid; + int myid; + unsigned long gdtmachpfn; + printk("MADE IT!!"); + #if 0 u_int cr0; #endif + /* Steps to booting SMP on xen as gleaned from XenLinux: + * - cpu_init() - processor specific initialization + * - smp_callin() + * - wait 2s for BP to finish its startup sequence + * - map_cpu_to_logical_apicid() + * - save cpuid info + * - set bit in callin map to let master (BP?) continue + * - local setup timer() - per cpu timer initialization + * - ldebug_setup() - bind debug IRQ to local CPU. + * - smp_intr_init() - IPI setup that we do in cpu_mp_start + * - local_irq_enable() - enable interrupts locally + * - cpu_set(id, map) - announce that we''re up + * - cpu_idle() - make us schedulable + */ + + /* bootAP is set in start_ap() to our ID. */ myid = bootAP; - gdt_segs[GPRIV_SEL].ssd_base = (int) &SMP_prvspace[myid]; - gdt_segs[GPROC0_SEL].ssd_base - (int) &SMP_prvspace[myid].pcpu.pc_common_tss; - SMP_prvspace[myid].pcpu.pc_prvspace - &SMP_prvspace[myid].pcpu; - for (x = 0; x < NGDT; x++) { - ssdtosd(&gdt_segs[x], &gdt[myid * NGDT + x].sd); - } + gdtmachpfn = vtomach(gdt) >> PAGE_SHIFT; + PANIC_IF(HYPERVISOR_set_gdt(&gdtmachpfn, LAST_RESERVED_GDT_ENTRY + 1) != 0); -#if 0 - r_gdt.rd_limit = NGDT * sizeof(gdt[0]) - 1; - r_gdt.rd_base = (int) &gdt[myid * NGDT]; - lgdt(&r_gdt); /* does magic intra-segment return */ + + lgdt_finish(); - lidt(&r_idt); - lldt(_default_ldt); -#endif + PCPU_SET(cpuid, myid); + + + set_user_ldt((struct mdproc *)_default_ldt); PCPU_SET(currentldt, _default_ldt); - gsel_tss = GSEL(GPROC0_SEL, SEL_KPL); - gdt[myid * NGDT + GPROC0_SEL].sd.sd_type = SDT_SYS386TSS; PCPU_SET(common_tss.tss_esp0, 0); /* not used until after switch */ PCPU_SET(common_tss.tss_ss0, GSEL(GDATA_SEL, SEL_KPL)); PCPU_SET(common_tss.tss_ioopt, (sizeof (struct i386tss)) << 16); @@ -557,6 +635,13 @@ while (smp_started == 0) ia32_pause(); + /* need to wait until now to setup the IPIs as SI_SUB_CPU is + * much earlier than SI_SUB_INTR + */ + ap_evtchn_init(myid); + ap_cpu_initclocks(); + cpu_mp_ipi_init(); + /* ok, now grab sched_lock and enter the scheduler */ mtx_lock_spin(&sched_lock); @@ -610,28 +695,35 @@ static int start_all_aps(void) { -#ifndef PC98 - u_char mpbiosreason; -#endif - u_long mpbioswarmvec; struct pcpu *pc; char *stack; - uintptr_t kptbase; - int i, pg, apic_id, cpu; + int i, apic_id, cpu; + + /* + * This function corresponds most closely to + * smp_boot_cpus in XenLinux - the sequence there + * is: + * - check if SMP config is found - if not: + * - clear the I/O APIC IRQs + * - map cpu to logical apicid + * - exit + * - smp_intr_init - IPI initialization + * - map cpu to logical apicid + * - boot each of the vcpus + * - clear and then construct the cpu sibling [logical CPUs] map. + * + */ POSTCODE(START_ALL_APS_POST); mtx_init(&ap_boot_mtx, "ap boot", NULL, MTX_SPIN); - +#if 0 /* install the AP 1st level boot code */ install_ap_tramp(); /* save the current value of the warm-start vector */ mpbioswarmvec = *((u_long *) WARMBOOT_OFF); -#ifndef PC98 - outb(CMOS_REG, BIOS_RESET); - mpbiosreason = inb(CMOS_DATA); -#endif + /* set up temporary P==V mapping for AP boot */ /* XXX this is a hack, we should boot the AP on its own stack/PTD */ @@ -640,7 +732,7 @@ PTD[i] = (pd_entry_t)(PG_V | PG_RW | ((kptbase + i * PAGE_SIZE) & PG_FRAME)); invltlb(); - +#endif /* start each AP */ for (cpu = 0, apic_id = 0; apic_id < MAXCPU; apic_id++) { if (!cpu_info[apic_id].cpu_present || @@ -650,7 +742,7 @@ /* save APIC ID for this logical ID */ cpu_apic_ids[cpu] = apic_id; - +#if 0 /* first page of AP''s private space */ pg = cpu * i386_btop(sizeof(struct privatespace)); @@ -665,11 +757,14 @@ for (i = 0; i < KSTACK_PAGES; i++) SMPpt[pg + 1 + i] = (pt_entry_t) (PG_V | PG_RW | vtophys(PAGE_SIZE * i + stack)); +#endif + pc = &SMP_prvspace[cpu].pcpu; /* prime data page for it to use */ pcpu_init(pc, cpu, sizeof(struct pcpu)); pc->pc_apic_id = apic_id; +#if 0 /* setup a vector to our boot code */ *((volatile u_short *) WARMBOOT_OFF) = WARMBOOT_TARGET; *((volatile u_short *) WARMBOOT_SEG) = (boot_address >> 4); @@ -677,7 +772,7 @@ outb(CMOS_REG, BIOS_RESET); outb(CMOS_DATA, BIOS_WARM); /* ''warm-start'' */ #endif - +#endif bootSTK = &SMP_prvspace[cpu].idlekstack[KSTACK_PAGES * PAGE_SIZE]; bootAP = cpu; @@ -700,13 +795,10 @@ /* build our map of ''other'' CPUs */ PCPU_SET(other_cpus, all_cpus & ~PCPU_GET(cpumask)); +#if 0 /* restore the warmstart vector */ *(u_long *) WARMBOOT_OFF = mpbioswarmvec; -#ifndef PC98 - outb(CMOS_REG, BIOS_RESET); - outb(CMOS_DATA, mpbiosreason); #endif - /* * Set up the idle context for the BSP. Similar to above except * that some was done by locore, some by pmap.c and some is implicit @@ -739,7 +831,7 @@ extern void MPentry(void); extern u_int MP_GDT; extern u_int mp_gdtbase; - +#if 0 static void install_ap_tramp(void) { @@ -791,6 +883,21 @@ *dst16 = (u_int) boot_address & 0xffff; *dst8 = ((u_int) boot_address >> 16) & 0xff; } +#endif + +static int +cpu_mp_trap_init(trap_info_t *trap_ctxt) +{ + + trap_info_t *t = trap_table; + + for (t = trap_table; t->address; t++) { + trap_ctxt[t->vector].flags = t->flags; + trap_ctxt[t->vector].cs = t->cs; + trap_ctxt[t->vector].address = t->address; + } + return 0x80 /*SYSCALL_VECTOR*/; +} /* * This function starts the AP (application processor) identified @@ -802,8 +909,25 @@ static int start_ap(int apic_id) { - int vector, ms; - int cpus; + int vector, ms, i; + int cpus, boot_error; + vcpu_guest_context_t ctxt; + + /* + * This is the FreeBSD equivalent to do_boot_cpu(apicid) in + * smpboot.c. + * its initialization sequence consists of: + * - fork_idle(cpu) to create separate idle context + * - initialization of idle''s context to start_secondary + * - initialization of cpu ctxt to start in startup_32_smp + * - then we call HYPERVISOR_boot_vcpu with the cpu index and + * a pointer to the context. + * - on boot success we: + * - set ourselves in the callout_map + * - wait up to 5 seconds for us to be set in the callin map + * - set x86_cpu_to_apicid[cpu] = apicid; + * + */ POSTCODE(START_AP_POST); @@ -813,6 +937,55 @@ /* used as a watchpoint to signal AP startup */ cpus = mp_naps; + memset(&ctxt, 0, sizeof(ctxt)); + + ctxt.user_regs.ds = GSEL(GDATA_SEL, SEL_KPL); + ctxt.user_regs.es = GSEL(GDATA_SEL, SEL_KPL); + ctxt.user_regs.fs = 0; + ctxt.user_regs.gs = 0; + ctxt.user_regs.ss = __KERNEL_DS; + ctxt.user_regs.cs = __KERNEL_CS; + ctxt.user_regs.eip = (unsigned long)init_secondary; + ctxt.user_regs.esp = (unsigned long)bootSTK; +#ifdef notyet + ctxt.user_regs.eflags = (1<<9) | (1<<2) | (idle->thread.io_pl<<12); +#else + ctxt.user_regs.eflags = (1<<9) | (1<<2); +#endif + /* FPU is set up to default initial state. */ + memset(&ctxt.fpu_ctxt, 0, sizeof(ctxt.fpu_ctxt)); + + /* Virtual IDT is empty at start-of-day. */ + for ( i = 0; i < 256; i++ ) + { + ctxt.trap_ctxt[i].vector = i; + ctxt.trap_ctxt[i].cs = FLAT_KERNEL_CS; + } + ctxt.fast_trap_idx = cpu_mp_trap_init(ctxt.trap_ctxt); + + /* No LDT. */ + ctxt.ldt_ents = 0; + + /* Ring 1 stack is the initial stack. */ + ctxt.kernel_ss = __KERNEL_DS; + ctxt.kernel_sp = (unsigned long)bootSTK; + + /* Callback handlers. */ + ctxt.event_callback_cs = __KERNEL_CS; + ctxt.event_callback_eip = (unsigned long)Xhypervisor_callback; + ctxt.failsafe_callback_cs = __KERNEL_CS; + ctxt.failsafe_callback_eip = (unsigned long)failsafe_callback; + + ctxt.pt_base = (vm_paddr_t)IdlePTD; + + boot_error = HYPERVISOR_boot_vcpu(bootAP, &ctxt); + + + if (boot_error) + printk("Houston we have a problem\n"); + else + printk("boot_vcpu succeeded\n"); +#if 0 /* * first we do an INIT/RESET IPI this INIT IPI might be run, reseting * and running the target CPU. OR this INIT IPI might be latched (P5 @@ -862,6 +1035,7 @@ APIC_LEVEL_DEASSERT | APIC_DESTMODE_PHY | APIC_DELMODE_STARTUP | vector, apic_id); lapic_ipi_wait(-1); +#endif DELAY(200); /* wait ~200uS */ /* Wait up to 5 seconds for it to start. */ diff -Nru a/freebsd-5.3-xen-sparse/i386-xen/i386-xen/pmap.c b/freebsd-5.3-xen-sparse/i386-xen/i386-xen/pmap.c --- a/freebsd-5.3-xen-sparse/i386-xen/i386-xen/pmap.c 2005-05-06 21:37:31 -07:00 +++ b/freebsd-5.3-xen-sparse/i386-xen/i386-xen/pmap.c 2005-05-06 21:37:31 -07:00 @@ -1374,7 +1374,6 @@ static u_int lazyptd; static volatile u_int lazywait; -void pmap_lazyfix_action(void); void pmap_lazyfix_action(void) diff -Nru a/freebsd-5.3-xen-sparse/i386-xen/i386-xen/xen_machdep.c b/freebsd-5.3-xen-sparse/i386-xen/i386-xen/xen_machdep.c --- a/freebsd-5.3-xen-sparse/i386-xen/i386-xen/xen_machdep.c 2005-05-06 21:37:31 -07:00 +++ b/freebsd-5.3-xen-sparse/i386-xen/i386-xen/xen_machdep.c 2005-05-06 21:37:31 -07:00 @@ -380,8 +380,6 @@ (void)HYPERVISOR_console_write(buf, ret); } -#define PANIC_IF(exp) if (unlikely(exp)) {printk("%s failed\n",#exp); panic("%s: %s:%d", #exp, __FILE__, __LINE__);} - #define XPQUEUE_SIZE 128 #ifdef SMP diff -Nru a/freebsd-5.3-xen-sparse/i386-xen/include/hypervisor.h b/freebsd-5.3-xen-sparse/i386-xen/include/hypervisor.h --- a/freebsd-5.3-xen-sparse/i386-xen/include/hypervisor.h 2005-05-06 21:37:31 -07:00 +++ b/freebsd-5.3-xen-sparse/i386-xen/include/hypervisor.h 2005-05-06 21:37:31 -07:00 @@ -441,4 +441,20 @@ return ret; } +static inline int +HYPERVISOR_boot_vcpu( + unsigned long vcpu, vcpu_guest_context_t *ctxt) +{ + int ret; + unsigned long ign1, ign2; + + __asm__ __volatile__ ( + TRAP_INSTR + : "=a" (ret), "=b" (ign1), "=c" (ign2) + : "0" (__HYPERVISOR_boot_vcpu), "1" (vcpu), "2" (ctxt) + : "memory"); + + return ret; +} + #endif /* __HYPERVISOR_H__ */ diff -Nru a/freebsd-5.3-xen-sparse/i386-xen/include/pcpu.h b/freebsd-5.3-xen-sparse/i386-xen/include/pcpu.h --- a/freebsd-5.3-xen-sparse/i386-xen/include/pcpu.h 2005-05-06 21:37:31 -07:00 +++ b/freebsd-5.3-xen-sparse/i386-xen/include/pcpu.h 2005-05-06 21:37:31 -07:00 @@ -53,7 +53,12 @@ int *pc_ipi_to_evtchn; \ int *pc_virq_to_irq; \ u_int pc_cr2; \ - u_int pc_pdir + u_int pc_pdir; \ + u_int pc_lazypmap; \ + u_int pc_rendezvous; \ + u_int pc_cpuast; \ + u_int pc_time_irq; \ + uint64_t pc_processed_system_time; #if defined(lint) diff -Nru a/freebsd-5.3-xen-sparse/i386-xen/include/pmap.h b/freebsd-5.3-xen-sparse/i386-xen/include/pmap.h --- a/freebsd-5.3-xen-sparse/i386-xen/include/pmap.h 2005-05-06 21:37:31 -07:00 +++ b/freebsd-5.3-xen-sparse/i386-xen/include/pmap.h 2005-05-06 21:37:31 -07:00 @@ -343,6 +343,7 @@ void pmap_invalidate_page(pmap_t, vm_offset_t); void pmap_invalidate_range(pmap_t, vm_offset_t, vm_offset_t); void pmap_invalidate_all(pmap_t); +void pmap_lazyfix_action(void); void pmap_map_readonly(pmap_t pmap, vm_offset_t va, int len); void pmap_map_readwrite(pmap_t pmap, vm_offset_t va, int len); diff -Nru a/freebsd-5.3-xen-sparse/i386-xen/include/xenfunc.h b/freebsd-5.3-xen-sparse/i386-xen/include/xenfunc.h --- a/freebsd-5.3-xen-sparse/i386-xen/include/xenfunc.h 2005-05-06 21:37:31 -07:00 +++ b/freebsd-5.3-xen-sparse/i386-xen/include/xenfunc.h 2005-05-06 21:37:31 -07:00 @@ -61,6 +61,9 @@ void xen_machphys_update(unsigned long, unsigned long); void xen_update_descriptor(union descriptor *, union descriptor *); void lldt(u_short sel); +void ap_cpu_initclocks(void); + + /* * Invalidate a patricular VA on all cpus * @@ -79,5 +82,6 @@ } +#define PANIC_IF(exp) if (unlikely(exp)) {printk("%s failed\n",#exp); panic("%s: %s:%d", #exp, __FILE__, __LINE__);} #endif /* _XEN_XENFUNC_H_ */ _______________________________________________ Xen-devel mailing list Xen-devel@lists.xensource.com http://lists.xensource.com/xen-devel