Jan Beulich
2009-Jun-10 14:48 UTC
[Xen-devel] [PATCH 3/3] x86_64: allow more vCPU-s per guest
Since the shared info layout is fixed, guests are required to use VCPUOP_register_vcpu_info prior to booting any vCPU beyond the traditional limit of 32. MAX_VIRT_CPUS, being an implemetation detail of the hypervisor, is no longer being exposed in the public headers. The tools changes are clearly incomplete (and done only so things would build again), and the current state of the tools (using scalar variables all over the place to represent vCPU bitmaps) very likely doesn''t permit booting DomU-s with more than the traditional number of vCPU-s. Testing of the extended functionality was done with Dom0 (96 vCPU-s, as well as 128 vCPU-s out of which the kernel elected - by way of a simple kernel side patch - to use only some, resulting in a sparse bitmap). ia64 changes only to make things build, and build-tested only (and the tools part only as far as the build would go without encountering unrelated problems in the blktap code). Signed-off-by: Jan Beulich <jbeulich@novell.com> --- 2009-06-10.orig/docs/src/interface.tex 2009-06-10 16:19:50.000000000 +0200 +++ 2009-06-10/docs/src/interface.tex 2009-06-10 10:48:29.000000000 +0200 @@ -462,7 +462,7 @@ The structure is declared in {\bf xen/in \scriptsize \begin{verbatim} typedef struct shared_info { - vcpu_info_t vcpu_info[MAX_VIRT_CPUS]; + vcpu_info_t vcpu_info[XEN_LEGACY_MAX_VCPUS]; /* * A domain can create "event channels" on which it can send and receive --- 2009-06-10.orig/tools/debugger/xenitp/xenitp.c 2009-06-10 16:19:50.000000000 +0200 +++ 2009-06-10/tools/debugger/xenitp/xenitp.c 2009-06-10 10:48:29.000000000 +0200 @@ -955,7 +955,7 @@ char *parse_arg (char **buf) return res; } -vcpu_guest_context_any_t vcpu_ctx_any[MAX_VIRT_CPUS]; +vcpu_guest_context_any_t *vcpu_ctx_any; int vcpu_setcontext (int vcpu) { @@ -1584,11 +1584,23 @@ void xenitp (int vcpu) { int ret; struct sigaction sa; - - cur_ctx = &vcpu_ctx_any[vcpu].c; + xc_dominfo_t dominfo; xc_handle = xc_interface_open (); /* for accessing control interface */ + ret = xc_domain_getinfo (xc_handle, domid, 1, &dominfo); + if (ret < 0) { + perror ("xc_domain_getinfo"); + exit (-1); + } + + vcpu_ctx_any = calloc (sizeof(vcpu_ctx_any), dominfo.max_vcpu_id + 1); + if (!vcpu_ctx_any) { + perror ("vcpu context array alloc"); + exit (-1); + } + cur_ctx = &vcpu_ctx_any[vcpu].c; + if (xc_domain_setdebugging (xc_handle, domid, 1) != 0) perror ("setdebugging"); --- 2009-06-10.orig/tools/include/xen-foreign/structs.py 2009-06-10 16:19:50.000000000 +0200 +++ 2009-06-10/tools/include/xen-foreign/structs.py 2009-06-10 10:48:29.000000000 +0200 @@ -53,6 +53,6 @@ defines = [ "__i386__", # all archs "xen_pfn_to_cr3", - "MAX_VIRT_CPUS", + "XEN_LEGACY_MAX_VCPUS", "MAX_GUEST_CMDLINE" ]; --- 2009-06-10.orig/tools/libxc/ia64/xc_ia64_linux_restore.c 2009-06-10 16:19:50.000000000 +0200 +++ 2009-06-10/tools/libxc/ia64/xc_ia64_linux_restore.c 2009-06-10 10:48:29.000000000 +0200 @@ -174,7 +174,7 @@ xc_ia64_recv_shared_info(int xc_handle, /* clear any pending events and the selector */ memset(&(shared_info->evtchn_pending[0]), 0, sizeof (shared_info->evtchn_pending)); - for (i = 0; i < MAX_VIRT_CPUS; i++) + for (i = 0; i < XEN_LEGACY_MAX_VCPUS; i++) shared_info->vcpu_info[i].evtchn_pending_sel = 0; if (start_info_pfn != NULL) --- 2009-06-10.orig/tools/libxc/ia64/xc_ia64_linux_save.c 2009-06-10 16:19:50.000000000 +0200 +++ 2009-06-10/tools/libxc/ia64/xc_ia64_linux_save.c 2009-06-10 10:48:29.000000000 +0200 @@ -238,7 +238,7 @@ xc_ia64_pv_send_context(int xc_handle, i /* vcpu map */ uint64_t *vcpumap = NULL; - if (xc_ia64_send_vcpumap(xc_handle, io_fd, dom, info, MAX_VIRT_CPUS, + if (xc_ia64_send_vcpumap(xc_handle, io_fd, dom, info, XEN_LEGACY_MAX_VCPUS, &vcpumap)) goto out; @@ -308,7 +308,7 @@ xc_ia64_hvm_send_context(int xc_handle, return -1; /* vcpu map */ - if (xc_ia64_send_vcpumap(xc_handle, io_fd, dom, info, MAX_VIRT_CPUS, + if (xc_ia64_send_vcpumap(xc_handle, io_fd, dom, info, XEN_LEGACY_MAX_VCPUS, &vcpumap)) goto out; --- 2009-06-10.orig/tools/libxc/xc_core.c 2009-06-10 16:19:50.000000000 +0200 +++ 2009-06-10/tools/libxc/xc_core.c 2009-06-10 10:48:29.000000000 +0200 @@ -430,7 +430,7 @@ xc_domain_dumpcore_via_callback(int xc_h int nr_vcpus = 0; char *dump_mem, *dump_mem_start = NULL; - vcpu_guest_context_any_t ctxt[MAX_VIRT_CPUS]; + vcpu_guest_context_any_t *ctxt = NULL; struct xc_core_arch_context arch_ctxt; char dummy[PAGE_SIZE]; int dummy_len; @@ -495,6 +495,13 @@ xc_domain_dumpcore_via_callback(int xc_h goto out; } + ctxt = calloc(sizeof(*ctxt), info.max_vcpu_id + 1); + if ( !ctxt ) + { + PERROR("Could not allocate vcpu context array", domid); + goto out; + } + for ( i = 0; i <= info.max_vcpu_id; i++ ) { if ( xc_vcpu_getcontext(xc_handle, domid, i, &ctxt[nr_vcpus]) == 0 ) @@ -900,6 +907,8 @@ out: xc_core_shdr_free(sheaders); if ( strtab != NULL ) xc_core_strtab_free(strtab); + if ( ctxt != NULL ) + free(ctxt); if ( dump_mem_start != NULL ) free(dump_mem_start); if ( live_shinfo != NULL ) --- 2009-06-10.orig/tools/libxc/xc_core_ia64.c 2009-06-10 16:19:50.000000000 +0200 +++ 2009-06-10/tools/libxc/xc_core_ia64.c 2009-06-10 10:48:29.000000000 +0200 @@ -251,13 +251,10 @@ xc_core_arch_map_p2m(int xc_handle, unsi void xc_core_arch_context_init(struct xc_core_arch_context* arch_ctxt) { - int i; - arch_ctxt->mapped_regs_size (XMAPPEDREGS_SIZE < PAGE_SIZE) ? PAGE_SIZE: XMAPPEDREGS_SIZE; arch_ctxt->nr_vcpus = 0; - for ( i = 0; i < MAX_VIRT_CPUS; i++ ) - arch_ctxt->mapped_regs[i] = NULL; + arch_ctxt->mapped_regs = NULL; xc_ia64_p2m_init(&arch_ctxt->p2m_table); } @@ -269,6 +266,7 @@ xc_core_arch_context_free(struct xc_core for ( i = 0; i < arch_ctxt->nr_vcpus; i++ ) if ( arch_ctxt->mapped_regs[i] != NULL ) munmap(arch_ctxt->mapped_regs[i], arch_ctxt->mapped_regs_size); + free(arch_ctxt->mapped_regs); xc_ia64_p2m_unmap(&arch_ctxt->p2m_table); } @@ -289,6 +287,21 @@ xc_core_arch_context_get(struct xc_core_ errno = ENOENT; return -1; } + if ( !(arch_ctxt->nr_vcpus & (arch_ctxt->nr_vcpus - 1)) ) { + unsigned int nr = arch_ctxt->nr_vcpus ? arch_ctxt->nr_vcpus << 1 : 1; + mapped_regs_t** new = realloc(arch_ctxt->mapped_regs, + nr * sizeof(*new)); + + if ( !new ) + { + PERROR("Could not alloc mapped regs pointer array"); + return -1; + } + memset(new + arch_ctxt->nr_vcpus, 0, + (nr - arch_ctxt->nr_vcpus) * sizeof(*new)); + arch_ctxt->mapped_regs = new; + } + mapped_regs = xc_map_foreign_range(xc_handle, domid, arch_ctxt->mapped_regs_size, PROT_READ, ctxt->privregs_pfn); --- 2009-06-10.orig/tools/libxc/xc_core_ia64.h 2009-06-10 16:19:50.000000000 +0200 +++ 2009-06-10/tools/libxc/xc_core_ia64.h 2009-06-10 10:48:29.000000000 +0200 @@ -29,7 +29,7 @@ struct xc_core_arch_context { size_t mapped_regs_size; int nr_vcpus; - mapped_regs_t* mapped_regs[MAX_VIRT_CPUS]; + mapped_regs_t** mapped_regs; struct xen_ia64_p2m_table p2m_table; }; --- 2009-06-10.orig/tools/libxc/xc_dom_ia64.c 2009-06-10 16:19:50.000000000 +0200 +++ 2009-06-10/tools/libxc/xc_dom_ia64.c 2009-06-10 10:48:29.000000000 +0200 @@ -87,7 +87,7 @@ int shared_info_ia64(struct xc_dom_image xc_dom_printf("%s: called\n", __FUNCTION__); memset(shared_info, 0, sizeof(*shared_info)); - for (i = 0; i < MAX_VIRT_CPUS; i++) + for (i = 0; i < XEN_LEGACY_MAX_VCPUS; i++) shared_info->vcpu_info[i].evtchn_upcall_mask = 1; shared_info->arch.start_info_pfn = dom->start_info_pfn; shared_info->arch.memmap_info_num_pages = 1; //XXX --- 2009-06-10.orig/tools/libxc/xc_dom_x86.c 2009-06-10 16:19:50.000000000 +0200 +++ 2009-06-10/tools/libxc/xc_dom_x86.c 2009-06-10 10:48:29.000000000 +0200 @@ -498,7 +498,7 @@ static int shared_info_x86_32(struct xc_ xc_dom_printf("%s: called\n", __FUNCTION__); memset(shared_info, 0, sizeof(*shared_info)); - for ( i = 0; i < MAX_VIRT_CPUS; i++ ) + for ( i = 0; i < XEN_LEGACY_MAX_VCPUS; i++ ) shared_info->vcpu_info[i].evtchn_upcall_mask = 1; return 0; } @@ -511,7 +511,7 @@ static int shared_info_x86_64(struct xc_ xc_dom_printf("%s: called\n", __FUNCTION__); memset(shared_info, 0, sizeof(*shared_info)); - for ( i = 0; i < MAX_VIRT_CPUS; i++ ) + for ( i = 0; i < XEN_LEGACY_MAX_VCPUS; i++ ) shared_info->vcpu_info[i].evtchn_upcall_mask = 1; return 0; } --- 2009-06-10.orig/tools/libxc/xc_domain_restore.c 2009-06-10 16:19:50.000000000 +0200 +++ 2009-06-10/tools/libxc/xc_domain_restore.c 2009-06-10 10:48:29.000000000 +0200 @@ -1146,7 +1146,7 @@ int xc_domain_restore(int xc_handle, int /* clear any pending events and the selector */ MEMSET_ARRAY_FIELD(new_shared_info, evtchn_pending, 0); - for ( i = 0; i < MAX_VIRT_CPUS; i++ ) + for ( i = 0; i < XEN_LEGACY_MAX_VCPUS; i++ ) SET_FIELD(new_shared_info, vcpu_info[i].evtchn_pending_sel, 0); /* mask event channels */ --- 2009-06-10.orig/tools/libxc/xc_private.h 2009-06-10 16:19:50.000000000 +0200 +++ 2009-06-10/tools/libxc/xc_private.h 2009-06-10 10:48:29.000000000 +0200 @@ -191,11 +191,6 @@ void *xc_map_foreign_ranges(int xc_handl size_t size, int prot, size_t chunksize, privcmd_mmap_entry_t entries[], int nentries); -void *map_domain_va_core(unsigned long domfd, int cpu, void *guest_va, - vcpu_guest_context_any_t *ctxt); -int xc_waitdomain_core(int xc_handle, int domain, int *status, - int options, vcpu_guest_context_any_t *ctxt); - void bitmap_64_to_byte(uint8_t *bp, const uint64_t *lp, int nbits); void bitmap_byte_to_64(uint64_t *lp, const uint8_t *bp, int nbits); --- 2009-06-10.orig/tools/libxc/xc_ptrace.c 2009-06-10 16:19:50.000000000 +0200 +++ 2009-06-10/tools/libxc/xc_ptrace.c 2009-06-10 10:48:29.000000000 +0200 @@ -42,7 +42,8 @@ static int current_is_hvm; static uint64_t online_cpumap; static uint64_t regs_valid; -static vcpu_guest_context_any_t ctxt[MAX_VIRT_CPUS]; +static unsigned int nr_vcpu_ids; +static vcpu_guest_context_any_t *ctxt; #define FOREACH_CPU(cpumap, i) for ( cpumap = online_cpumap; (i = xc_ffs64(cpumap)); cpumap &= ~(1 << (index - 1)) ) @@ -101,6 +102,21 @@ paging_enabled(vcpu_guest_context_any_t return (cr0 & X86_CR0_PE) && (cr0 & X86_CR0_PG); } +vcpu_guest_context_any_t *xc_ptrace_get_vcpu_ctxt(unsigned int nr_cpus) +{ + if (nr_cpus > nr_vcpu_ids) { + vcpu_guest_context_any_t *new; + + new = realloc(ctxt, nr_cpus * sizeof(*ctxt)); + if (!new) + return NULL; + ctxt = new; + nr_vcpu_ids = nr_cpus; + } + + return ctxt; +} + /* * Fetch registers for all online cpus and set the cpumap * to indicate which cpus are online @@ -113,6 +129,9 @@ get_online_cpumap(int xc_handle, struct { int i, online; + if (!xc_ptrace_get_vcpu_ctxt(d->max_vcpu_id + 1)) + return -ENOMEM; + *cpumap = 0; for (i = 0; i <= d->max_vcpu_id; i++) { fetch_regs(xc_handle, i, &online); @@ -261,7 +280,7 @@ xc_ptrace( case PTRACE_PEEKDATA: if (current_isfile) guest_va = (unsigned long *)map_domain_va_core( - current_domid, cpu, addr, ctxt); + current_domid, cpu, addr); else guest_va = (unsigned long *)map_domain_va( xc_handle, cpu, addr, PROT_READ); @@ -277,7 +296,7 @@ xc_ptrace( /* XXX assume that all CPUs have the same address space */ if (current_isfile) guest_va = (unsigned long *)map_domain_va_core( - current_domid, cpu, addr, ctxt); + current_domid, cpu, addr); else guest_va = (unsigned long *)map_domain_va( xc_handle, cpu, addr, PROT_READ|PROT_WRITE); @@ -433,7 +452,7 @@ xc_waitdomain( int options) { if (current_isfile) - return xc_waitdomain_core(xc_handle, domain, status, options, ctxt); + return xc_waitdomain_core(xc_handle, domain, status, options); return __xc_waitdomain(xc_handle, domain, status, options); } --- 2009-06-10.orig/tools/libxc/xc_ptrace.h 2009-06-10 16:19:50.000000000 +0200 +++ 2009-06-10/tools/libxc/xc_ptrace.h 2009-06-10 10:48:29.000000000 +0200 @@ -157,4 +157,9 @@ struct gdb_regs { } #endif +void *map_domain_va_core(unsigned long domfd, int cpu, void *guest_va); +int xc_waitdomain_core(int xc_handle, int domain, int *status, int options); +vcpu_guest_context_any_t *xc_ptrace_get_vcpu_ctxt(unsigned int nr_cpus); + + #endif /* XC_PTRACE */ --- 2009-06-10.orig/tools/libxc/xc_ptrace_core.c 2009-06-10 16:19:50.000000000 +0200 +++ 2009-06-10/tools/libxc/xc_ptrace_core.c 2009-06-10 10:48:29.000000000 +0200 @@ -12,6 +12,44 @@ #include <time.h> #include <inttypes.h> +static unsigned int max_nr_vcpus; +static unsigned long *cr3; +static unsigned long *cr3_phys; +static unsigned long **cr3_virt; +static unsigned long *pde_phys; +static unsigned long **pde_virt; +static unsigned long *page_phys; +static unsigned long **page_virt; + +static vcpu_guest_context_t * +ptrace_core_get_vcpu_ctxt(unsigned int nr_vcpus) +{ + if (nr_vcpus > max_nr_vcpus) { + void *new; + +#define REALLOC(what) \ + new = realloc(what, nr_vcpus * sizeof(*what)); \ + if (!new) \ + return NULL; \ + memset(what + max_nr_vcpus, 0, \ + (nr_vcpus - max_nr_vcpus) * sizeof(*what)); \ + what = new + + REALLOC(cr3); + REALLOC(cr3_phys); + REALLOC(cr3_virt); + REALLOC(pde_phys); + REALLOC(pde_virt); + REALLOC(page_phys); + REALLOC(page_virt); + +#undef REALLOC + max_nr_vcpus = nr_vcpus; + } + + return &xc_ptrace_get_vcpu_ctxt(nr_vcpus)->c; +} + /* Leave the code for the old format as is. */ /* --- compatible layer for old format ------------------------------------- */ /* XXX application state */ @@ -21,7 +59,6 @@ static long nr_pages_compat = 0; static unsigned long *p2m_array_compat = NULL; static unsigned long *m2p_array_compat = NULL; static unsigned long pages_offset_compat; -static unsigned long cr3_compat[MAX_VIRT_CPUS]; /* --------------------- */ @@ -34,23 +71,15 @@ map_mtop_offset_compat(unsigned long ma) static void * -map_domain_va_core_compat(unsigned long domfd, int cpu, void *guest_va, - vcpu_guest_context_t *ctxt) +map_domain_va_core_compat(unsigned long domfd, int cpu, void *guest_va) { unsigned long pde, page; unsigned long va = (unsigned long)guest_va; void *v; - static unsigned long cr3_phys[MAX_VIRT_CPUS]; - static unsigned long *cr3_virt[MAX_VIRT_CPUS]; - static unsigned long pde_phys[MAX_VIRT_CPUS]; - static unsigned long *pde_virt[MAX_VIRT_CPUS]; - static unsigned long page_phys[MAX_VIRT_CPUS]; - static unsigned long *page_virt[MAX_VIRT_CPUS]; - - if (cr3_compat[cpu] != cr3_phys[cpu]) + if (cr3[cpu] != cr3_phys[cpu]) { - cr3_phys[cpu] = cr3_compat[cpu]; + cr3_phys[cpu] = cr3[cpu]; if (cr3_virt[cpu]) munmap(cr3_virt[cpu], PAGE_SIZE); v = mmap( @@ -93,7 +122,7 @@ map_domain_va_core_compat(unsigned long map_mtop_offset_compat(page_phys[cpu])); if (v == MAP_FAILED) { - IPRINTF("cr3 %lx pde %lx page %lx pti %lx\n", cr3_compat[cpu], pde, page, l1_table_offset_i386(va)); + IPRINTF("cr3 %lx pde %lx page %lx pti %lx\n", cr3[cpu], pde, page, l1_table_offset_i386(va)); page_phys[cpu] = 0; return NULL; } @@ -107,11 +136,11 @@ xc_waitdomain_core_compat( int xc_handle, int domfd, int *status, - int options, - vcpu_guest_context_t *ctxt) + int options) { int nr_vcpus; int i; + vcpu_guest_context_t *ctxt; xc_core_header_t header; if ( nr_pages_compat == 0 ) @@ -132,12 +161,18 @@ xc_waitdomain_core_compat( nr_vcpus = header.xch_nr_vcpus; pages_offset_compat = header.xch_pages_offset; + if ((ctxt = ptrace_core_get_vcpu_ctxt(nr_vcpus)) == NULL) + { + IPRINTF("Could not allocate vcpu context array\n"); + return -1; + } + if (read(domfd, ctxt, sizeof(vcpu_guest_context_t)*nr_vcpus) ! sizeof(vcpu_guest_context_t)*nr_vcpus) return -1; for (i = 0; i < nr_vcpus; i++) - cr3_compat[i] = ctxt[i].ctrlreg[3]; + cr3[i] = ctxt[i].ctrlreg[3]; if ((p2m_array_compat = malloc(nr_pages_compat * sizeof(unsigned long))) == NULL) { @@ -375,7 +410,6 @@ static uint64_t* pfn_array = NULL; /* fo static uint64_t pfn_array_size = 0; static long nr_pages = 0; static uint64_t pages_offset; -static unsigned long cr3[MAX_VIRT_CPUS]; static const struct xen_dumpcore_elfnote_format_version_desc known_format_version[] @@ -413,21 +447,13 @@ map_gmfn_to_offset_elf(unsigned long gmf } static void * -map_domain_va_core_elf(unsigned long domfd, int cpu, void *guest_va, - vcpu_guest_context_t *ctxt) +map_domain_va_core_elf(unsigned long domfd, int cpu, void *guest_va) { unsigned long pde, page; unsigned long va = (unsigned long)guest_va; unsigned long offset; void *v; - static unsigned long cr3_phys[MAX_VIRT_CPUS]; - static unsigned long *cr3_virt[MAX_VIRT_CPUS]; - static unsigned long pde_phys[MAX_VIRT_CPUS]; - static unsigned long *pde_virt[MAX_VIRT_CPUS]; - static unsigned long page_phys[MAX_VIRT_CPUS]; - static unsigned long *page_virt[MAX_VIRT_CPUS]; - if (cr3[cpu] != cr3_phys[cpu]) { if (cr3_virt[cpu]) @@ -498,10 +524,10 @@ xc_waitdomain_core_elf( int xc_handle, int domfd, int *status, - int options, - vcpu_guest_context_t *ctxt) + int options) { int i; + vcpu_guest_context_t *ctxt; struct elf_core ecore; struct xen_dumpcore_elfnote_none *none; @@ -527,14 +553,13 @@ xc_waitdomain_core_elf( if ((header->header.xch_magic != XC_CORE_MAGIC && header->header.xch_magic != XC_CORE_MAGIC_HVM) || header->header.xch_nr_vcpus == 0 || - header->header.xch_nr_vcpus >= MAX_VIRT_CPUS || header->header.xch_nr_pages == 0 || header->header.xch_page_size != PAGE_SIZE) goto out; current_is_auto_translated_physmap (header->header.xch_magic == XC_CORE_MAGIC_HVM); nr_pages = header->header.xch_nr_pages; - + /* .note.Xen: xen_version */ if (elf_core_search_note(&ecore, XEN_DUMPCORE_ELFNOTE_NAME, XEN_ELFNOTE_DUMPCORE_XEN_VERSION, @@ -561,6 +586,9 @@ xc_waitdomain_core_elf( format_version->format_version.version); } + if ((ctxt = ptrace_core_get_vcpu_ctxt(header->header.xch_nr_vcpus)) == NULL) + goto out; + /* .xen_prstatus: read vcpu_guest_context_t*/ if (elf_core_read_sec_by_name(&ecore, XEN_DUMPCORE_SEC_PRSTATUS, (char*)ctxt) < 0) @@ -621,12 +649,10 @@ out: typedef int (*xc_waitdomain_core_t)(int xc_handle, int domfd, int *status, - int options, - vcpu_guest_context_t *ctxt); + int options); typedef void *(*map_domain_va_core_t)(unsigned long domfd, int cpu, - void *guest_va, - vcpu_guest_context_t *ctxt); + void *guest_va); struct xc_core_format_type { xc_waitdomain_core_t waitdomain_core; map_domain_va_core_t map_domain_va_core; @@ -642,25 +668,22 @@ static const struct xc_core_format_type static const struct xc_core_format_type* current_format_type = NULL; void * -map_domain_va_core(unsigned long domfd, int cpu, void *guest_va, - vcpu_guest_context_any_t *ctxt) +map_domain_va_core(unsigned long domfd, int cpu, void *guest_va) { if (current_format_type == NULL) return NULL; - return (current_format_type->map_domain_va_core)(domfd, cpu, guest_va, - &ctxt->c); + return (current_format_type->map_domain_va_core)(domfd, cpu, guest_va); } int -xc_waitdomain_core(int xc_handle, int domfd, int *status, int options, - vcpu_guest_context_any_t *ctxt) +xc_waitdomain_core(int xc_handle, int domfd, int *status, int options) { int ret; int i; for (i = 0; i < NR_FORMAT_TYPE; i++) { ret = (format_type[i].waitdomain_core)(xc_handle, domfd, status, - options, &ctxt->c); + options); if (ret == 0) { current_format_type = &format_type[i]; break; --- 2009-06-10.orig/xen/arch/ia64/xen/dom_fw_common.c 2009-01-08 10:30:05.000000000 +0100 +++ 2009-06-10/xen/arch/ia64/xen/dom_fw_common.c 2009-06-10 16:20:38.000000000 +0200 @@ -28,6 +28,7 @@ #include "ia64/xc_dom_ia64_util.h" #define ia64_fc(addr) asm volatile ("fc %0" :: "r"(addr) : "memory") +#define MAX_VIRT_CPUS XEN_LEGACY_MAX_VCPUS /* XXX */ #endif /* __XEN__ */ #include <xen/acpi.h> --- 2009-06-10.orig/xen/arch/ia64/xen/domain.c 2009-06-10 16:19:50.000000000 +0200 +++ 2009-06-10/xen/arch/ia64/xen/domain.c 2009-06-10 10:48:29.000000000 +0200 @@ -2225,13 +2225,6 @@ int __init construct_dom0(struct domain for ( i = 1; i < MAX_VIRT_CPUS; i++ ) d->shared_info->vcpu_info[i].evtchn_upcall_mask = 1; - if (dom0_max_vcpus == 0) - dom0_max_vcpus = MAX_VIRT_CPUS; - if (dom0_max_vcpus > num_online_cpus()) - dom0_max_vcpus = num_online_cpus(); - if (dom0_max_vcpus > MAX_VIRT_CPUS) - dom0_max_vcpus = MAX_VIRT_CPUS; - printk ("Dom0 max_vcpus=%d\n", dom0_max_vcpus); for ( i = 1; i < dom0_max_vcpus; i++ ) if (alloc_vcpu(d, i, i) == NULL) @@ -2306,6 +2299,24 @@ int __init construct_dom0(struct domain return 0; } +struct vcpu *__init alloc_dom0_vcpu0(void) +{ + if (dom0_max_vcpus == 0) + dom0_max_vcpus = MAX_VIRT_CPUS; + if (dom0_max_vcpus > num_online_cpus()) + dom0_max_vcpus = num_online_cpus(); + if (dom0_max_vcpus > MAX_VIRT_CPUS) + dom0_max_vcpus = MAX_VIRT_CPUS; + + dom0->vcpu = xmalloc_array(struct vcpu *, dom0_max_vcpus); + if ( !dom0->vcpu ) + return NULL; + memset(dom0->vcpu, 0, dom0_max_vcpus * sizeof(*dom0->vcpu)); + dom0->max_vcpus = dom0_max_vcpus; + + return alloc_vcpu(dom0, 0, 0); +} + void machine_restart(unsigned int delay_millisecs) { mdelay(delay_millisecs); --- 2009-06-10.orig/xen/arch/ia64/xen/xensetup.c 2009-06-10 16:19:50.000000000 +0200 +++ 2009-06-10/xen/arch/ia64/xen/xensetup.c 2009-06-10 10:48:29.000000000 +0200 @@ -570,7 +570,11 @@ skip_move: scheduler_init(); idle_vcpu[0] = (struct vcpu*) ia64_r13; idle_domain = domain_create(IDLE_DOMAIN_ID, 0, 0); - if ( (idle_domain == NULL) || (alloc_vcpu(idle_domain, 0, 0) == NULL) ) + if ( idle_domain == NULL ) + BUG(); + idle_domain->vcpu = idle_vcpu; + idle_domain->max_vcpus = NR_CPUS; + if ( alloc_vcpu(idle_domain, 0, 0) == NULL ) BUG(); alloc_dom_xen_and_dom_io(); @@ -657,7 +661,7 @@ printk("num_online_cpus=%d, max_cpus=%d\ if (dom0 == NULL) panic("Error creating domain 0\n"); domain_set_vhpt_size(dom0, dom0_vhpt_size_log2); - dom0_vcpu0 = alloc_vcpu(dom0, 0, 0); + dom0_vcpu0 = alloc_dom0_vcpu0(); if (dom0_vcpu0 == NULL || vcpu_late_initialise(dom0_vcpu0) != 0) panic("Cannot allocate dom0 vcpu 0\n"); --- 2009-06-10.orig/xen/arch/x86/domain.c 2009-06-10 10:48:19.000000000 +0200 +++ 2009-06-10/xen/arch/x86/domain.c 2009-06-10 10:48:29.000000000 +0200 @@ -263,7 +263,7 @@ int switch_native(struct domain *d) d->arch.is_32bit_pv = d->arch.has_32bit_shinfo = 0; - for ( vcpuid = 0; vcpuid < MAX_VIRT_CPUS; vcpuid++ ) + for ( vcpuid = 0; vcpuid < d->max_vcpus; vcpuid++ ) { if (d->vcpu[vcpuid]) release_compat_l4(d->vcpu[vcpuid]); @@ -285,7 +285,7 @@ int switch_compat(struct domain *d) d->arch.is_32bit_pv = d->arch.has_32bit_shinfo = 1; - for ( vcpuid = 0; vcpuid < MAX_VIRT_CPUS; vcpuid++ ) + for ( vcpuid = 0; vcpuid < d->max_vcpus; vcpuid++ ) { if ( (d->vcpu[vcpuid] != NULL) && (setup_compat_l4(d->vcpu[vcpuid]) != 0) ) @@ -423,12 +423,13 @@ int arch_domain_create(struct domain *d, #else /* __x86_64__ */ - d->arch.mm_perdomain_pt_pages = xmalloc_array(struct page_info *, - PDPT_L2_ENTRIES); - if ( !d->arch.mm_perdomain_pt_pages ) + BUILD_BUG_ON(PDPT_L2_ENTRIES * sizeof(*d->arch.mm_perdomain_pt_pages) + != PAGE_SIZE); + pg = alloc_domheap_page(NULL, MEMF_node(domain_to_node(d))); + if ( !pg ) goto fail; - memset(d->arch.mm_perdomain_pt_pages, 0, - PDPT_L2_ENTRIES * sizeof(*d->arch.mm_perdomain_pt_pages)); + d->arch.mm_perdomain_pt_pages = page_to_virt(pg); + clear_page(d->arch.mm_perdomain_pt_pages); pg = alloc_domheap_page(NULL, MEMF_node(domain_to_node(d))); if ( pg == NULL ) @@ -523,7 +524,8 @@ int arch_domain_create(struct domain *d, free_domheap_page(virt_to_page(d->arch.mm_perdomain_l2)); if ( d->arch.mm_perdomain_l3 ) free_domheap_page(virt_to_page(d->arch.mm_perdomain_l3)); - xfree(d->arch.mm_perdomain_pt_pages); + if ( d->arch.mm_perdomain_pt_pages ) + free_domheap_page(virt_to_page(d->arch.mm_perdomain_pt_pages)); #else free_xenheap_pages(d->arch.mm_perdomain_pt, pdpt_order); #endif @@ -556,7 +558,7 @@ void arch_domain_destroy(struct domain * if ( perdomain_pt_page(d, i) ) free_domheap_page(perdomain_pt_page(d, i)); } - xfree(d->arch.mm_perdomain_pt_pages); + free_domheap_page(virt_to_page(d->arch.mm_perdomain_pt_pages)); free_domheap_page(virt_to_page(d->arch.mm_perdomain_l2)); free_domheap_page(virt_to_page(d->arch.mm_perdomain_l3)); #endif @@ -872,7 +874,13 @@ map_vcpu_info(struct vcpu *v, unsigned l new_info = (vcpu_info_t *)(mapping + offset); - memcpy(new_info, v->vcpu_info, sizeof(*new_info)); + if ( v->vcpu_info ) + memcpy(new_info, v->vcpu_info, sizeof(*new_info)); + else + { + memset(new_info, 0, sizeof(*new_info)); + __vcpu_info(v, new_info, evtchn_upcall_mask) = 1; + } v->vcpu_info = new_info; v->arch.vcpu_info_mfn = mfn; --- 2009-06-10.orig/xen/arch/x86/domain_build.c 2009-06-10 10:46:31.000000000 +0200 +++ 2009-06-10/xen/arch/x86/domain_build.c 2009-06-10 15:02:42.000000000 +0200 @@ -82,9 +82,25 @@ static void __init parse_dom0_mem(const } custom_param("dom0_mem", parse_dom0_mem); -static unsigned int opt_dom0_max_vcpus; +static unsigned int __initdata opt_dom0_max_vcpus; integer_param("dom0_max_vcpus", opt_dom0_max_vcpus); +struct vcpu *__init alloc_dom0_vcpu0(void) +{ + if ( opt_dom0_max_vcpus == 0 ) + opt_dom0_max_vcpus = num_online_cpus(); + if ( opt_dom0_max_vcpus > MAX_VIRT_CPUS ) + opt_dom0_max_vcpus = MAX_VIRT_CPUS; + + dom0->vcpu = xmalloc_array(struct vcpu *, opt_dom0_max_vcpus); + if ( !dom0->vcpu ) + return NULL; + memset(dom0->vcpu, 0, opt_dom0_max_vcpus * sizeof(*dom0->vcpu)); + dom0->max_vcpus = opt_dom0_max_vcpus; + + return alloc_vcpu(dom0, 0, 0); +} + static unsigned int opt_dom0_shadow; boolean_param("dom0_shadow", opt_dom0_shadow); @@ -701,13 +717,9 @@ int __init construct_dom0( #endif /* __x86_64__ */ /* Mask all upcalls... */ - for ( i = 0; i < MAX_VIRT_CPUS; i++ ) + for ( i = 0; i < XEN_LEGACY_MAX_VCPUS; i++ ) shared_info(d, vcpu_info[i].evtchn_upcall_mask) = 1; - if ( opt_dom0_max_vcpus == 0 ) - opt_dom0_max_vcpus = num_online_cpus(); - if ( opt_dom0_max_vcpus > MAX_VIRT_CPUS ) - opt_dom0_max_vcpus = MAX_VIRT_CPUS; printk("Dom0 has maximum %u VCPUs\n", opt_dom0_max_vcpus); for ( i = 1; i < opt_dom0_max_vcpus; i++ ) --- 2009-06-10.orig/xen/arch/x86/domctl.c 2009-06-10 16:19:50.000000000 +0200 +++ 2009-06-10/xen/arch/x86/domctl.c 2009-06-10 15:03:07.000000000 +0200 @@ -574,7 +574,8 @@ long arch_do_domctl( goto sendtrigger_out; ret = -ESRCH; - if ( (v = d->vcpu[domctl->u.sendtrigger.vcpu]) == NULL ) + if ( domctl->u.sendtrigger.vcpu >= d->max_vcpus || + (v = d->vcpu[domctl->u.sendtrigger.vcpu]) == NULL ) goto sendtrigger_out; switch ( domctl->u.sendtrigger.trigger ) @@ -963,7 +964,7 @@ long arch_do_domctl( goto ext_vcpucontext_out; ret = -ESRCH; - if ( (evc->vcpu >= MAX_VIRT_CPUS) || + if ( (evc->vcpu >= d->max_vcpus) || ((v = d->vcpu[evc->vcpu]) == NULL) ) goto ext_vcpucontext_out; @@ -1085,7 +1086,7 @@ long arch_do_domctl( break; ret = -EINVAL; - if ( (domctl->u.debug_op.vcpu >= MAX_VIRT_CPUS) || + if ( (domctl->u.debug_op.vcpu >= d->max_vcpus) || ((v = d->vcpu[domctl->u.debug_op.vcpu]) == NULL) ) goto debug_op_out; --- 2009-06-10.orig/xen/arch/x86/hvm/hvm.c 2009-06-10 16:19:50.000000000 +0200 +++ 2009-06-10/xen/arch/x86/hvm/hvm.c 2009-06-10 10:49:48.000000000 +0200 @@ -364,7 +364,7 @@ void hvm_domain_relinquish_resources(str /* Stop all asynchronous timer actions. */ rtc_deinit(d); - if ( d->vcpu[0] != NULL ) + if ( d->vcpu != NULL && d->vcpu[0] != NULL ) { pit_deinit(d); pmtimer_deinit(d); @@ -504,7 +504,7 @@ static int hvm_load_cpu_ctxt(struct doma /* Which vcpu is this? */ vcpuid = hvm_load_instance(h); - if ( vcpuid >= MAX_VIRT_CPUS || (v = d->vcpu[vcpuid]) == NULL ) + if ( vcpuid >= d->max_vcpus || (v = d->vcpu[vcpuid]) == NULL ) { gdprintk(XENLOG_ERR, "HVM restore: domain has no vcpu %u\n", vcpuid); return -EINVAL; @@ -2279,7 +2279,7 @@ static void hvm_s3_suspend(struct domain domain_pause(d); domain_lock(d); - if ( d->is_dying || (d->vcpu[0] == NULL) || + if ( d->is_dying || (d->vcpu == NULL) || (d->vcpu[0] == NULL) || test_and_set_bool(d->arch.hvm_domain.is_s3_suspended) ) { domain_unlock(d); @@ -2654,7 +2654,7 @@ long do_hvm_op(unsigned long op, XEN_GUE goto param_fail2; rc = -EINVAL; - if ( d->vcpu[0] == NULL ) + if ( d->vcpu == NULL || d->vcpu[0] == NULL ) goto param_fail2; if ( shadow_mode_enabled(d) ) --- 2009-06-10.orig/xen/arch/x86/hvm/mtrr.c 2009-06-10 16:19:50.000000000 +0200 +++ 2009-06-10/xen/arch/x86/hvm/mtrr.c 2009-06-10 11:03:58.000000000 +0200 @@ -676,7 +676,7 @@ static int hvm_load_mtrr_msr(struct doma struct hvm_hw_mtrr hw_mtrr; vcpuid = hvm_load_instance(h); - if ( vcpuid >= MAX_VIRT_CPUS || (v = d->vcpu[vcpuid]) == NULL ) + if ( vcpuid >= d->max_vcpus || (v = d->vcpu[vcpuid]) == NULL ) { gdprintk(XENLOG_ERR, "HVM restore: domain has no vcpu %u\n", vcpuid); return -EINVAL; @@ -720,7 +720,8 @@ uint8_t epte_get_entry_emt( *igmt = 0; - if ( (current->domain != d) && ((v = d->vcpu[0]) == NULL) ) + if ( (current->domain != d) && + ((d->vcpu == NULL) || ((v = d->vcpu[0]) == NULL)) ) return MTRR_TYPE_WRBACK; if ( !v->domain->arch.hvm_domain.params[HVM_PARAM_IDENT_PT] ) --- 2009-06-10.orig/xen/arch/x86/hvm/vioapic.c 2009-06-10 16:19:50.000000000 +0200 +++ 2009-06-10/xen/arch/x86/hvm/vioapic.c 2009-06-10 11:07:43.000000000 +0200 @@ -339,7 +339,8 @@ static void vioapic_deliver(struct hvm_h /* Force round-robin to pick VCPU 0 */ if ( (irq == hvm_isa_irq_to_gsi(0)) && pit_channel0_enabled() ) { - v = vioapic_domain(vioapic)->vcpu[0]; + v = vioapic_domain(vioapic)->vcpu ? + vioapic_domain(vioapic)->vcpu[0] : NULL; target = v ? vcpu_vlapic(v) : NULL; } else @@ -367,12 +368,14 @@ static void vioapic_deliver(struct hvm_h if ( !(deliver_bitmask & (1 << bit)) ) continue; deliver_bitmask &= ~(1 << bit); + if ( vioapic_domain(vioapic)->vcpu == NULL ) + v = NULL; #ifdef IRQ0_SPECIAL_ROUTING /* Do not deliver timer interrupts to VCPU != 0 */ - if ( (irq == hvm_isa_irq_to_gsi(0)) && pit_channel0_enabled() ) + else if ( (irq == hvm_isa_irq_to_gsi(0)) && pit_channel0_enabled() ) v = vioapic_domain(vioapic)->vcpu[0]; - else #endif + else v = vioapic_domain(vioapic)->vcpu[bit]; if ( v != NULL ) { @@ -392,7 +395,8 @@ static void vioapic_deliver(struct hvm_h if ( !(deliver_bitmask & (1 << bit)) ) continue; deliver_bitmask &= ~(1 << bit); - if ( ((v = vioapic_domain(vioapic)->vcpu[bit]) != NULL) && + if ( (vioapic_domain(vioapic)->vcpu != NULL) && + ((v = vioapic_domain(vioapic)->vcpu[bit]) != NULL) && !test_and_set_bool(v->nmi_pending) ) vcpu_kick(v); } --- 2009-06-10.orig/xen/arch/x86/hvm/vlapic.c 2009-06-10 16:19:50.000000000 +0200 +++ 2009-06-10/xen/arch/x86/hvm/vlapic.c 2009-06-10 10:48:29.000000000 +0200 @@ -384,7 +384,7 @@ struct vlapic *apic_lowest_prio(struct d struct vlapic *vlapic, *target = NULL; struct vcpu *v; - if ( unlikely((v = d->vcpu[old]) == NULL) ) + if ( unlikely(!d->vcpu) || unlikely((v = d->vcpu[old]) == NULL) ) return NULL; do { @@ -913,7 +913,7 @@ static int lapic_load_hidden(struct doma /* Which vlapic to load? */ vcpuid = hvm_load_instance(h); - if ( vcpuid >= MAX_VIRT_CPUS || (v = d->vcpu[vcpuid]) == NULL ) + if ( vcpuid >= d->max_vcpus || (v = d->vcpu[vcpuid]) == NULL ) { gdprintk(XENLOG_ERR, "HVM restore: domain has no vlapic %u\n", vcpuid); return -EINVAL; @@ -936,7 +936,7 @@ static int lapic_load_regs(struct domain /* Which vlapic to load? */ vcpuid = hvm_load_instance(h); - if ( vcpuid >= MAX_VIRT_CPUS || (v = d->vcpu[vcpuid]) == NULL ) + if ( vcpuid >= d->max_vcpus || (v = d->vcpu[vcpuid]) == NULL ) { gdprintk(XENLOG_ERR, "HVM restore: domain has no vlapic %u\n", vcpuid); return -EINVAL; --- 2009-06-10.orig/xen/arch/x86/hvm/vmx/vmx.c 2009-06-10 16:19:50.000000000 +0200 +++ 2009-06-10/xen/arch/x86/hvm/vmx/vmx.c 2009-06-10 10:48:29.000000000 +0200 @@ -1221,7 +1221,7 @@ static void __ept_sync_domain(void *info void ept_sync_domain(struct domain *d) { /* Only if using EPT and this domain has some VCPUs to dirty. */ - if ( d->arch.hvm_domain.hap_enabled && d->vcpu[0] ) + if ( d->arch.hvm_domain.hap_enabled && d->vcpu && d->vcpu[0] ) { ASSERT(local_irq_is_enabled()); on_each_cpu(__ept_sync_domain, d, 1); @@ -1399,7 +1399,7 @@ static struct hvm_function_table vmx_fun }; static unsigned long *vpid_bitmap; -#define VPID_BITMAP_SIZE ((1u << VMCS_VPID_WIDTH) / MAX_VIRT_CPUS) +#define VPID_BITMAP_SIZE ((1u << VMCS_VPID_WIDTH) / XEN_LEGACY_MAX_VCPUS) void start_vmx(void) { @@ -1923,7 +1923,7 @@ static int vmx_alloc_vpid(struct domain } while ( test_and_set_bit(idx, vpid_bitmap) ); - d->arch.hvm_domain.vmx.vpid_base = idx * MAX_VIRT_CPUS; + d->arch.hvm_domain.vmx.vpid_base = idx * XEN_LEGACY_MAX_VCPUS; return 0; } @@ -1932,7 +1932,8 @@ static void vmx_free_vpid(struct domain if ( !cpu_has_vmx_vpid ) return; - clear_bit(d->arch.hvm_domain.vmx.vpid_base / MAX_VIRT_CPUS, vpid_bitmap); + clear_bit(d->arch.hvm_domain.vmx.vpid_base / XEN_LEGACY_MAX_VCPUS, + vpid_bitmap); } static void vmx_install_vlapic_mapping(struct vcpu *v) --- 2009-06-10.orig/xen/arch/x86/hvm/vpic.c 2009-06-10 16:19:50.000000000 +0200 +++ 2009-06-10/xen/arch/x86/hvm/vpic.c 2009-06-10 11:09:09.000000000 +0200 @@ -110,7 +110,9 @@ static void vpic_update_int_output(struc if ( vpic->is_master ) { /* Master INT line is connected to VCPU0''s VLAPIC LVT0. */ - struct vcpu *v = vpic_domain(vpic)->vcpu[0]; + struct vcpu *v = vpic_domain(vpic)->vcpu ? + vpic_domain(vpic)->vcpu[0] : NULL; + if ( (v != NULL) && vlapic_accept_pic_intr(v) ) vcpu_kick(v); } --- 2009-06-10.orig/xen/arch/x86/mm.c 2009-06-10 10:46:31.000000000 +0200 +++ 2009-06-10/xen/arch/x86/mm.c 2009-06-10 10:48:29.000000000 +0200 @@ -1336,7 +1336,7 @@ static int alloc_l3_table(struct page_in */ if ( (pfn >= 0x100000) && unlikely(!VM_ASSIST(d, VMASST_TYPE_pae_extended_cr3)) && - d->vcpu[0] && d->vcpu[0]->is_initialised ) + d->vcpu && d->vcpu[0] && d->vcpu[0]->is_initialised ) { MEM_LOG("PAE pgd must be below 4GB (0x%lx >= 0x100000)", pfn); return -EINVAL; @@ -2575,7 +2575,7 @@ static inline int vcpumask_to_pcpumask( for ( vmask = 0, offs = 0; ; ++offs) { vcpu_bias = offs * (is_native ? BITS_PER_LONG : 32); - if ( vcpu_bias >= MAX_VIRT_CPUS ) + if ( vcpu_bias >= d->max_vcpus ) return 0; if ( unlikely(is_native ? @@ -2592,7 +2592,7 @@ static inline int vcpumask_to_pcpumask( vcpu_id = find_first_set_bit(vmask); vmask &= ~(1UL << vcpu_id); vcpu_id += vcpu_bias; - if ( (vcpu_id >= MAX_VIRT_CPUS) ) + if ( (vcpu_id >= d->max_vcpus) ) return 0; if ( ((v = d->vcpu[vcpu_id]) != NULL) ) cpus_or(*pmask, *pmask, v->vcpu_dirty_cpumask); --- 2009-06-10.orig/xen/arch/x86/mm/paging.c 2009-06-10 16:19:50.000000000 +0200 +++ 2009-06-10/xen/arch/x86/mm/paging.c 2009-06-10 10:48:29.000000000 +0200 @@ -684,7 +684,7 @@ int paging_domctl(struct domain *d, xen_ return 0; } - if ( unlikely(d->vcpu[0] == NULL) ) + if ( unlikely(d->vcpu == NULL) || unlikely(d->vcpu[0] == NULL) ) { PAGING_ERROR("Paging op on a domain (%u) with no vcpus\n", d->domain_id); --- 2009-06-10.orig/xen/arch/x86/mm/shadow/common.c 2009-06-10 16:19:50.000000000 +0200 +++ 2009-06-10/xen/arch/x86/mm/shadow/common.c 2009-06-10 10:48:29.000000000 +0200 @@ -1452,7 +1452,7 @@ static void shadow_blow_tables(struct do void shadow_blow_tables_per_domain(struct domain *d) { - if ( shadow_mode_enabled(d) && d->vcpu[0] != NULL ) { + if ( shadow_mode_enabled(d) && d->vcpu != NULL && d->vcpu[0] != NULL ) { shadow_lock(d); shadow_blow_tables(d); shadow_unlock(d); @@ -1470,7 +1470,7 @@ static void shadow_blow_all_tables(unsig rcu_read_lock(&domlist_read_lock); for_each_domain(d) { - if ( shadow_mode_enabled(d) && d->vcpu[0] != NULL ) + if ( shadow_mode_enabled(d) && d->vcpu != NULL && d->vcpu[0] != NULL ) { shadow_lock(d); shadow_blow_tables(d); --- 2009-06-10.orig/xen/arch/x86/nmi.c 2009-06-10 16:19:50.000000000 +0200 +++ 2009-06-10/xen/arch/x86/nmi.c 2009-06-10 10:48:29.000000000 +0200 @@ -463,7 +463,8 @@ static void do_nmi_stats(unsigned char k for_each_cpu ( i ) printk("%3d\t%3d\n", i, nmi_count(i)); - if ( ((d = dom0) == NULL) || ((v = d->vcpu[0]) == NULL) ) + if ( ((d = dom0) == NULL) || (d->vcpu == NULL) || + ((v = d->vcpu[0]) == NULL) ) return; if ( v->nmi_pending || (v->trap_priority >= VCPU_TRAP_NMI) ) --- 2009-06-10.orig/xen/arch/x86/setup.c 2009-06-10 16:19:50.000000000 +0200 +++ 2009-06-10/xen/arch/x86/setup.c 2009-06-10 10:48:29.000000000 +0200 @@ -234,11 +234,15 @@ static void __init init_idle_domain(void scheduler_init(); idle_domain = domain_create(IDLE_DOMAIN_ID, 0, 0); - if ( (idle_domain == NULL) || (alloc_vcpu(idle_domain, 0, 0) == NULL) ) + if ( idle_domain == NULL ) + BUG(); + idle_domain->vcpu = idle_vcpu; + idle_domain->max_vcpus = NR_CPUS; + if ( alloc_vcpu(idle_domain, 0, 0) == NULL ) BUG(); - set_current(idle_domain->vcpu[0]); - idle_vcpu[0] = this_cpu(curr_vcpu) = current; + set_current(idle_vcpu[0]); + this_cpu(curr_vcpu) = current; setup_idle_pagetable(); } @@ -998,7 +1002,7 @@ void __init __start_xen(unsigned long mb /* Create initial domain 0. */ dom0 = domain_create(0, DOMCRF_s3_integrity, DOM0_SSIDREF); - if ( (dom0 == NULL) || (alloc_vcpu(dom0, 0, 0) == NULL) ) + if ( (dom0 == NULL) || (alloc_dom0_vcpu0() == NULL) ) panic("Error creating domain 0\n"); dom0->is_privileged = 1; --- 2009-06-10.orig/xen/arch/x86/traps.c 2009-06-10 16:19:50.000000000 +0200 +++ 2009-06-10/xen/arch/x86/traps.c 2009-06-10 10:48:29.000000000 +0200 @@ -2839,7 +2839,7 @@ static void nmi_dom0_report(unsigned int { struct domain *d = dom0; - if ( (d == NULL) || (d->vcpu[0] == NULL) ) + if ( (d == NULL) || (d->vcpu == NULL) || (d->vcpu[0] == NULL) ) return; set_bit(reason_idx, nmi_reason(d)); @@ -3176,7 +3176,7 @@ int guest_has_trap_callback(struct domai struct trap_info *t; BUG_ON(d == NULL); - BUG_ON(vcpuid >= MAX_VIRT_CPUS); + BUG_ON(vcpuid >= d->max_vcpus); /* Sanity check - XXX should be more fine grained. */ BUG_ON(trap_nr > TRAP_syscall); @@ -3194,7 +3194,7 @@ int send_guest_trap(struct domain *d, ui struct softirq_trap *st; BUG_ON(d == NULL); - BUG_ON(vcpuid >= MAX_VIRT_CPUS); + BUG_ON(vcpuid >= d->max_vcpus); v = d->vcpu[vcpuid]; switch (trap_nr) { --- 2009-06-10.orig/xen/common/compat/domain.c 2009-06-10 16:19:50.000000000 +0200 +++ 2009-06-10/xen/common/compat/domain.c 2009-06-10 15:03:23.000000000 +0200 @@ -24,7 +24,7 @@ int compat_vcpu_op(int cmd, int vcpuid, if ( (vcpuid < 0) || (vcpuid >= MAX_VIRT_CPUS) ) return -EINVAL; - if ( (v = d->vcpu[vcpuid]) == NULL ) + if ( vcpuid >= d->max_vcpus || (v = d->vcpu[vcpuid]) == NULL ) return -ENOENT; switch ( cmd ) --- 2009-06-10.orig/xen/common/domain.c 2009-06-10 16:19:50.000000000 +0200 +++ 2009-06-10/xen/common/domain.c 2009-06-10 15:04:10.000000000 +0200 @@ -134,7 +134,7 @@ struct vcpu *alloc_vcpu( { struct vcpu *v; - BUG_ON(d->vcpu[vcpu_id] != NULL); + BUG_ON((!is_idle_domain(d) || vcpu_id) && d->vcpu[vcpu_id]); if ( (v = alloc_vcpu_struct()) == NULL ) return NULL; @@ -153,7 +153,8 @@ struct vcpu *alloc_vcpu( v->runstate.state = RUNSTATE_offline; v->runstate.state_entry_time = NOW(); set_bit(_VPF_down, &v->pause_flags); - v->vcpu_info = (void *)&shared_info(d, vcpu_info[vcpu_id]); + if ( vcpu_id < XEN_LEGACY_MAX_VCPUS ) + v->vcpu_info = (void *)&shared_info(d, vcpu_info[vcpu_id]); } if ( sched_init_vcpu(v, cpu_id) != 0 ) @@ -181,22 +182,8 @@ struct vcpu *alloc_vcpu( struct vcpu *alloc_idle_vcpu(unsigned int cpu_id) { - struct domain *d; - struct vcpu *v; - unsigned int vcpu_id = cpu_id % MAX_VIRT_CPUS; - - if ( (v = idle_vcpu[cpu_id]) != NULL ) - return v; - - d = (vcpu_id == 0) ? - domain_create(IDLE_DOMAIN_ID, 0, 0) : - idle_vcpu[cpu_id - vcpu_id]->domain; - BUG_ON(d == NULL); - - v = alloc_vcpu(d, vcpu_id, cpu_id); - idle_vcpu[cpu_id] = v; - - return v; + return idle_vcpu[cpu_id] ?: alloc_vcpu(idle_vcpu[0]->domain, + cpu_id, cpu_id); } static unsigned int extra_dom0_irqs, extra_domU_irqs = 8; @@ -575,7 +562,7 @@ static void complete_domain_destroy(stru struct vcpu *v; int i; - for ( i = MAX_VIRT_CPUS-1; i >= 0; i-- ) + for ( i = d->max_vcpus - 1; i >= 0; i-- ) { if ( (v = d->vcpu[i]) == NULL ) continue; @@ -594,7 +581,7 @@ static void complete_domain_destroy(stru /* Free page used by xen oprofile buffer. */ free_xenoprof_pages(d); - for ( i = MAX_VIRT_CPUS-1; i >= 0; i-- ) + for ( i = d->max_vcpus - 1; i >= 0; i-- ) if ( (v = d->vcpu[i]) != NULL ) free_vcpu_struct(v); @@ -742,12 +729,15 @@ long do_vcpu_op(int cmd, int vcpuid, XEN if ( (vcpuid < 0) || (vcpuid >= MAX_VIRT_CPUS) ) return -EINVAL; - if ( (v = d->vcpu[vcpuid]) == NULL ) + if ( vcpuid >= d->max_vcpus || (v = d->vcpu[vcpuid]) == NULL ) return -ENOENT; switch ( cmd ) { case VCPUOP_initialise: + if ( !v->vcpu_info ) + return -EINVAL; + if ( (ctxt = xmalloc(struct vcpu_guest_context)) == NULL ) return -ENOMEM; --- 2009-06-10.orig/xen/common/domctl.c 2009-06-10 16:19:50.000000000 +0200 +++ 2009-06-10/xen/common/domctl.c 2009-06-10 15:04:45.000000000 +0200 @@ -253,7 +253,7 @@ long do_domctl(XEN_GUEST_HANDLE(xen_domc ret = -EINVAL; if ( (d == current->domain) || /* no domain_pause() */ - (vcpu >= MAX_VIRT_CPUS) || ((v = d->vcpu[vcpu]) == NULL) ) + (vcpu >= d->max_vcpus) || ((v = d->vcpu[vcpu]) == NULL) ) goto svc_out; if ( guest_handle_is_null(op->u.vcpucontext.ctxt) ) @@ -433,7 +433,8 @@ long do_domctl(XEN_GUEST_HANDLE(xen_domc ret = -EINVAL; if ( (d == current->domain) || /* no domain_pause() */ - (max > MAX_VIRT_CPUS) ) + (max > MAX_VIRT_CPUS) || + (is_hvm_domain(d) && max > XEN_LEGACY_MAX_VCPUS) ) { rcu_unlock_domain(d); break; @@ -446,15 +447,40 @@ long do_domctl(XEN_GUEST_HANDLE(xen_domc break; } + /* Until Xenoprof can dynamically grow its vcpu-s array... */ + if ( d->xenoprof ) + { + rcu_unlock_domain(d); + ret = -EAGAIN; + break; + } + /* Needed, for example, to ensure writable p.t. state is synced. */ domain_pause(d); /* We cannot reduce maximum VCPUs. */ ret = -EINVAL; - if ( (max != MAX_VIRT_CPUS) && (d->vcpu[max] != NULL) ) + if ( (max < d->max_vcpus) && (d->vcpu[max] != NULL) ) goto maxvcpu_out; ret = -ENOMEM; + if ( max > d->max_vcpus ) + { + struct vcpu **vcpus = xmalloc_array(struct vcpu *, max); + void *ptr; + + if ( !vcpus ) + goto maxvcpu_out; + memcpy(vcpus, d->vcpu, d->max_vcpus * sizeof(*vcpus)); + memset(vcpus + d->max_vcpus, 0, + (max - d->max_vcpus) * sizeof(*vcpus)); + + ptr = d->vcpu; + d->vcpu = vcpus; + wmb(); + d->max_vcpus = max; + xfree(ptr); + } for ( i = 0; i < max; i++ ) { if ( d->vcpu[i] != NULL ) @@ -505,7 +531,7 @@ long do_domctl(XEN_GUEST_HANDLE(xen_domc goto vcpuaffinity_out; ret = -EINVAL; - if ( op->u.vcpuaffinity.vcpu >= MAX_VIRT_CPUS ) + if ( op->u.vcpuaffinity.vcpu >= d->max_vcpus ) goto vcpuaffinity_out; ret = -ESRCH; @@ -599,7 +625,7 @@ long do_domctl(XEN_GUEST_HANDLE(xen_domc goto getvcpucontext_out; ret = -EINVAL; - if ( op->u.vcpucontext.vcpu >= MAX_VIRT_CPUS ) + if ( op->u.vcpucontext.vcpu >= d->max_vcpus ) goto getvcpucontext_out; ret = -ESRCH; @@ -661,7 +687,7 @@ long do_domctl(XEN_GUEST_HANDLE(xen_domc goto getvcpuinfo_out; ret = -EINVAL; - if ( op->u.getvcpuinfo.vcpu >= MAX_VIRT_CPUS ) + if ( op->u.getvcpuinfo.vcpu >= d->max_vcpus ) goto getvcpuinfo_out; ret = -ESRCH; --- 2009-06-10.orig/xen/common/event_channel.c 2009-06-10 16:19:50.000000000 +0200 +++ 2009-06-10/xen/common/event_channel.c 2009-06-10 10:54:25.000000000 +0200 @@ -240,10 +240,13 @@ static long evtchn_bind_virq(evtchn_bind if ( virq_is_global(virq) && (vcpu != 0) ) return -EINVAL; - if ( (vcpu < 0) || (vcpu >= ARRAY_SIZE(d->vcpu)) || + if ( (vcpu < 0) || (vcpu >= d->max_vcpus) || ((v = d->vcpu[vcpu]) == NULL) ) return -ENOENT; + if ( unlikely(!v->vcpu_info) ) + return -EAGAIN; + spin_lock(&d->event_lock); if ( v->virq_to_evtchn[virq] != 0 ) @@ -273,10 +276,13 @@ static long evtchn_bind_ipi(evtchn_bind_ int port, vcpu = bind->vcpu; long rc = 0; - if ( (vcpu < 0) || (vcpu >= ARRAY_SIZE(d->vcpu)) || + if ( (vcpu < 0) || (vcpu >= d->max_vcpus) || (d->vcpu[vcpu] == NULL) ) return -ENOENT; + if ( unlikely(!d->vcpu[vcpu]->vcpu_info) ) + return -EAGAIN; + spin_lock(&d->event_lock); if ( (port = get_free_port(d)) < 0 ) @@ -555,13 +561,13 @@ static int evtchn_set_pending(struct vcp } /* Check if some VCPU might be polling for this event. */ - if ( likely(bitmap_empty(d->poll_mask, MAX_VIRT_CPUS)) ) + if ( likely(bitmap_empty(d->poll_mask, d->max_vcpus)) ) return 0; /* Wake any interested (or potentially interested) pollers. */ - for ( vcpuid = find_first_bit(d->poll_mask, MAX_VIRT_CPUS); - vcpuid < MAX_VIRT_CPUS; - vcpuid = find_next_bit(d->poll_mask, MAX_VIRT_CPUS, vcpuid+1) ) + for ( vcpuid = find_first_bit(d->poll_mask, d->max_vcpus); + vcpuid < d->max_vcpus; + vcpuid = find_next_bit(d->poll_mask, d->max_vcpus, vcpuid+1) ) { v = d->vcpu[vcpuid]; if ( ((v->poll_evtchn <= 0) || (v->poll_evtchn == port)) && @@ -608,7 +614,7 @@ void send_guest_global_virq(struct domai ASSERT(virq_is_global(virq)); - if ( unlikely(d == NULL) ) + if ( unlikely(d == NULL) || unlikely(d->vcpu == NULL) ) return; v = d->vcpu[0]; @@ -717,9 +723,12 @@ long evtchn_bind_vcpu(unsigned int port, struct evtchn *chn; long rc = 0; - if ( (vcpu_id >= ARRAY_SIZE(d->vcpu)) || (d->vcpu[vcpu_id] == NULL) ) + if ( (vcpu_id >= d->max_vcpus) || (d->vcpu[vcpu_id] == NULL) ) return -ENOENT; + if ( unlikely(!d->vcpu[vcpu_id]->vcpu_info) ) + return -EAGAIN; + spin_lock(&d->event_lock); if ( !port_is_valid(d, port) ) @@ -943,6 +952,9 @@ int alloc_unbound_xen_event_channel( struct domain *d = local_vcpu->domain; int port; + if ( unlikely(!local_vcpu->vcpu_info) ) + return -EAGAIN; + spin_lock(&d->event_lock); if ( (port = get_free_port(d)) < 0 ) @@ -1016,6 +1028,14 @@ int evtchn_init(struct domain *d) if ( get_free_port(d) != 0 ) return -EINVAL; evtchn_from_port(d, 0)->state = ECS_RESERVED; + +#if MAX_VIRT_CPUS > BITS_PER_LONG + d->poll_mask = xmalloc_array(unsigned long, BITS_TO_LONGS(MAX_VIRT_CPUS)); + if ( !d->poll_mask ) + return -ENOMEM; + bitmap_zero(d->poll_mask, MAX_VIRT_CPUS); +#endif + return 0; } @@ -1044,6 +1064,11 @@ void evtchn_destroy(struct domain *d) d->evtchn[i] = NULL; } spin_unlock(&d->event_lock); + +#if MAX_VIRT_CPUS > BITS_PER_LONG + xfree(d->poll_mask); + d->poll_mask = NULL; +#endif } static void domain_dump_evtchn_info(struct domain *d) --- 2009-06-10.orig/xen/common/keyhandler.c 2009-06-10 16:19:50.000000000 +0200 +++ 2009-06-10/xen/common/keyhandler.c 2009-06-10 10:48:29.000000000 +0200 @@ -209,8 +209,8 @@ static void dump_domains(unsigned char k v->vcpu_id, v->processor, v->is_running ? ''T'':''F'', v->pause_flags, v->poll_evtchn, - vcpu_info(v, evtchn_upcall_pending), - vcpu_info(v, evtchn_upcall_mask)); + v->vcpu_info ? vcpu_info(v, evtchn_upcall_pending) : 0, + v->vcpu_info ? vcpu_info(v, evtchn_upcall_mask) : 1); cpuset_print(tmpstr, sizeof(tmpstr), v->vcpu_dirty_cpumask); printk("dirty_cpus=%s ", tmpstr); cpuset_print(tmpstr, sizeof(tmpstr), v->cpu_affinity); @@ -218,6 +218,8 @@ static void dump_domains(unsigned char k arch_dump_vcpu_info(v); periodic_timer_print(tmpstr, sizeof(tmpstr), v->periodic_period); printk(" %s\n", tmpstr); + if ( !v->vcpu_info ) + continue; printk(" Notifying guest (virq %d, port %d, stat %d/%d/%d)\n", VIRQ_DEBUG, v->virq_to_evtchn[VIRQ_DEBUG], test_bit(v->virq_to_evtchn[VIRQ_DEBUG], --- 2009-06-10.orig/xen/common/sched_sedf.c 2009-06-10 16:19:50.000000000 +0200 +++ 2009-06-10/xen/common/sched_sedf.c 2009-06-10 10:48:29.000000000 +0200 @@ -1376,6 +1376,9 @@ static int sedf_adjust(struct domain *p, p->domain_id, op->u.sedf.period, op->u.sedf.slice, op->u.sedf.latency, (op->u.sedf.extratime)?"yes":"no"); + if ( !p->vcpu ) + return -EINVAL; + if ( op->cmd == XEN_DOMCTL_SCHEDOP_putinfo ) { /* Check for sane parameters. */ --- 2009-06-10.orig/xen/common/xenoprof.c 2009-06-10 16:19:50.000000000 +0200 +++ 2009-06-10/xen/common/xenoprof.c 2009-06-10 10:48:29.000000000 +0200 @@ -120,7 +120,7 @@ static void xenoprof_reset_buf(struct do return; } - for ( j = 0; j < MAX_VIRT_CPUS; j++ ) + for ( j = 0; j < d->max_vcpus; j++ ) { buf = d->xenoprof->vcpu[j].buffer; if ( buf != NULL ) @@ -201,6 +201,17 @@ static int alloc_xenoprof_struct( memset(d->xenoprof, 0, sizeof(*d->xenoprof)); + d->xenoprof->vcpu = xmalloc_array(struct xenoprof_vcpu, d->max_vcpus); + if ( d->xenoprof->vcpu == NULL ) + { + xfree(d->xenoprof); + d->xenoprof = NULL; + printk("alloc_xenoprof_struct(): vcpu array allocation failed\n"); + return -ENOMEM; + } + + memset(d->xenoprof->vcpu, 0, d->max_vcpus * sizeof(*d->xenoprof->vcpu)); + nvcpu = 0; for_each_vcpu ( d, v ) nvcpu++; --- 2009-06-10.orig/xen/include/asm-ia64/config.h 2009-06-10 16:19:50.000000000 +0200 +++ 2009-06-10/xen/include/asm-ia64/config.h 2009-06-10 10:48:29.000000000 +0200 @@ -31,6 +31,7 @@ #else #define NR_CPUS 64 #endif +#define MAX_VIRT_CPUS XEN_LEGACY_MAX_VCPUS #define CONFIG_NUMA #define CONFIG_ACPI_NUMA #define NODES_SHIFT 8 /* linux/asm/numnodes.h */ --- 2009-06-10.orig/xen/include/asm-x86/config.h 2009-06-10 16:19:50.000000000 +0200 +++ 2009-06-10/xen/include/asm-x86/config.h 2009-06-10 15:05:35.000000000 +0200 @@ -52,9 +52,13 @@ #define NR_CPUS 32 #endif -#if defined(__i386__) && (NR_CPUS > 32) +#ifdef __i386__ +#if NR_CPUS > 32 #error "Maximum of 32 physical processors supported by Xen on x86_32" #endif +/* Maximum number of virtual CPUs in multi-processor guests. */ +#define MAX_VIRT_CPUS XEN_LEGACY_MAX_VCPUS +#endif #ifdef CONFIG_X86_SUPERVISOR_MODE_KERNEL # define supervisor_mode_kernel (1) @@ -203,7 +207,7 @@ extern unsigned int video_mode, video_fl /* Slot 260: per-domain mappings. */ #define PERDOMAIN_VIRT_START (PML4_ADDR(260)) #define PERDOMAIN_VIRT_END (PERDOMAIN_VIRT_START + (PERDOMAIN_MBYTES<<20)) -#define PERDOMAIN_MBYTES ((unsigned long)GDT_LDT_MBYTES) +#define PERDOMAIN_MBYTES (PML4_ENTRY_BYTES >> (20 + PAGETABLE_ORDER)) /* Slot 261: machine-to-phys conversion table (16GB). */ #define RDWR_MPT_VIRT_START (PML4_ADDR(261)) #define RDWR_MPT_VIRT_END (RDWR_MPT_VIRT_START + (16UL<<30)) @@ -242,6 +246,8 @@ extern unsigned int video_mode, video_fl #define COMPAT_L2_PAGETABLE_XEN_SLOTS(d) \ (COMPAT_L2_PAGETABLE_LAST_XEN_SLOT - COMPAT_L2_PAGETABLE_FIRST_XEN_SLOT(d) + 1) +#define COMPAT_LEGACY_MAX_VCPUS XEN_LEGACY_MAX_VCPUS + #endif #define PGT_base_page_table PGT_l4_page_table @@ -347,7 +353,12 @@ extern unsigned long xenheap_phys_end; /* GDT/LDT shadow mapping area. The first per-domain-mapping sub-area. */ #define GDT_LDT_VCPU_SHIFT 5 #define GDT_LDT_VCPU_VA_SHIFT (GDT_LDT_VCPU_SHIFT + PAGE_SHIFT) +#ifdef MAX_VIRT_CPUS #define GDT_LDT_MBYTES (MAX_VIRT_CPUS >> (20-GDT_LDT_VCPU_VA_SHIFT)) +#else +#define GDT_LDT_MBYTES PERDOMAIN_MBYTES +#define MAX_VIRT_CPUS (GDT_LDT_MBYTES << (20-GDT_LDT_VCPU_VA_SHIFT)) +#endif #define GDT_LDT_VIRT_START PERDOMAIN_VIRT_START #define GDT_LDT_VIRT_END (GDT_LDT_VIRT_START + (GDT_LDT_MBYTES << 20)) --- 2009-06-10.orig/xen/include/asm-x86/paging.h 2009-06-10 16:19:50.000000000 +0200 +++ 2009-06-10/xen/include/asm-x86/paging.h 2009-06-10 11:14:06.000000000 +0200 @@ -332,7 +332,7 @@ static inline void paging_write_p2m_entr { struct vcpu *v = current; if ( v->domain != d ) - v = d->vcpu[0]; + v = d->vcpu ? d->vcpu[0] : NULL; if ( likely(v && paging_mode_enabled(d) && v->arch.paging.mode != NULL) ) { return v->arch.paging.mode->write_p2m_entry(v, gfn, p, table_mfn, --- 2009-06-10.orig/xen/include/asm-x86/shared.h 2009-06-10 16:19:50.000000000 +0200 +++ 2009-06-10/xen/include/asm-x86/shared.h 2009-06-10 10:48:29.000000000 +0200 @@ -26,6 +26,8 @@ static inline void arch_set_##field(stru #define GET_SET_VCPU(type, field) \ static inline type arch_get_##field(const struct vcpu *v) \ { \ + if ( unlikely(!v->vcpu_info) ) \ + return 0; \ return !has_32bit_shinfo(v->domain) ? \ v->vcpu_info->native.arch.field : \ v->vcpu_info->compat.arch.field; \ @@ -57,7 +59,7 @@ static inline void arch_set_##field(stru #define GET_SET_VCPU(type, field) \ static inline type arch_get_##field(const struct vcpu *v) \ { \ - return v->vcpu_info->arch.field; \ + return v->vcpu_info ? v->vcpu_info->arch.field : 0; \ } \ static inline void arch_set_##field(struct vcpu *v, \ type val) \ --- 2009-06-10.orig/xen/include/public/arch-ia64.h 2009-06-10 16:19:50.000000000 +0200 +++ 2009-06-10/xen/include/public/arch-ia64.h 2009-06-10 10:48:29.000000000 +0200 @@ -66,7 +66,7 @@ typedef unsigned long xen_pfn_t; /* Maximum number of virtual CPUs in multi-processor guests. */ /* WARNING: before changing this, check that shared_info fits on a page */ -#define MAX_VIRT_CPUS 64 +#define XEN_LEGACY_MAX_VCPUS 64 /* IO ports location for PV. */ #define IO_PORTS_PADDR 0x00000ffffc000000UL --- 2009-06-10.orig/xen/include/public/arch-x86/xen.h 2009-06-10 16:19:50.000000000 +0200 +++ 2009-06-10/xen/include/public/arch-x86/xen.h 2009-06-10 10:48:29.000000000 +0200 @@ -73,8 +73,8 @@ typedef unsigned long xen_pfn_t; #define FIRST_RESERVED_GDT_BYTE (FIRST_RESERVED_GDT_PAGE * 4096) #define FIRST_RESERVED_GDT_ENTRY (FIRST_RESERVED_GDT_BYTE / 8) -/* Maximum number of virtual CPUs in multi-processor guests. */ -#define MAX_VIRT_CPUS 32 +/* Maximum number of virtual CPUs in legacy multi-processor guests. */ +#define XEN_LEGACY_MAX_VCPUS 32 #ifndef __ASSEMBLY__ --- 2009-06-10.orig/xen/include/public/xen.h 2009-06-10 16:19:50.000000000 +0200 +++ 2009-06-10/xen/include/public/xen.h 2009-06-10 10:48:29.000000000 +0200 @@ -458,7 +458,7 @@ typedef struct vcpu_info vcpu_info_t; * of this structure remaining constant. */ struct shared_info { - struct vcpu_info vcpu_info[MAX_VIRT_CPUS]; + struct vcpu_info vcpu_info[XEN_LEGACY_MAX_VCPUS]; /* * A domain can create "event channels" on which it can send and receive --- 2009-06-10.orig/xen/include/xen/domain.h 2009-06-10 16:19:50.000000000 +0200 +++ 2009-06-10/xen/include/xen/domain.h 2009-06-10 10:48:29.000000000 +0200 @@ -14,6 +14,7 @@ struct vcpu *alloc_vcpu( int boot_vcpu( struct domain *d, int vcpuid, vcpu_guest_context_u ctxt); struct vcpu *alloc_idle_vcpu(unsigned int cpu_id); +struct vcpu *alloc_dom0_vcpu0(void); void vcpu_reset(struct vcpu *v); struct xen_domctl_getdomaininfo; --- 2009-06-10.orig/xen/include/xen/numa.h 2009-06-10 16:19:50.000000000 +0200 +++ 2009-06-10/xen/include/xen/numa.h 2009-06-10 10:48:29.000000000 +0200 @@ -15,6 +15,7 @@ #define vcpu_to_node(v) (cpu_to_node((v)->processor)) #define domain_to_node(d) \ - (((d)->vcpu[0] != NULL) ? vcpu_to_node((d)->vcpu[0]) : NUMA_NO_NODE) + (((d)->vcpu != NULL && (d)->vcpu[0] != NULL) \ + ? vcpu_to_node((d)->vcpu[0]) : NUMA_NO_NODE) #endif /* _XEN_NUMA_H */ --- 2009-06-10.orig/xen/include/xen/sched.h 2009-06-10 16:19:50.000000000 +0200 +++ 2009-06-10/xen/include/xen/sched.h 2009-06-10 15:06:01.000000000 +0200 @@ -180,6 +180,8 @@ struct domain unsigned int max_pages; /* maximum value for tot_pages */ unsigned int xenheap_pages; /* # pages allocated from Xen heap */ + unsigned int max_vcpus; + /* Scheduling. */ void *sched_priv; /* scheduler-specific data */ @@ -226,7 +228,11 @@ struct domain bool_t is_pinned; /* Are any VCPUs polling event channels (SCHEDOP_poll)? */ +#if MAX_VIRT_CPUS <= BITS_PER_LONG DECLARE_BITMAP(poll_mask, MAX_VIRT_CPUS); +#else + unsigned long *poll_mask; +#endif /* Guest has shut down (inc. reason code)? */ spinlock_t shutdown_lock; @@ -244,7 +250,7 @@ struct domain atomic_t refcnt; - struct vcpu *vcpu[MAX_VIRT_CPUS]; + struct vcpu **vcpu; /* Bitmask of CPUs which are holding onto this domain''s state. */ cpumask_t domain_dirty_cpumask; @@ -497,7 +503,7 @@ extern struct domain *domain_list; (_d) = rcu_dereference((_d)->next_in_list )) \ #define for_each_vcpu(_d,_v) \ - for ( (_v) = (_d)->vcpu[0]; \ + for ( (_v) = (_d)->vcpu ? (_d)->vcpu[0] : NULL; \ (_v) != NULL; \ (_v) = (_v)->next_in_list ) --- 2009-06-10.orig/xen/include/xen/shared.h 2009-06-10 16:19:50.000000000 +0200 +++ 2009-06-10/xen/include/xen/shared.h 2009-06-10 10:48:29.000000000 +0200 @@ -21,8 +21,6 @@ typedef union { (*(!has_32bit_shinfo(d) ? \ (typeof(&(s)->compat.field))&(s)->native.field : \ (typeof(&(s)->compat.field))&(s)->compat.field)) -#define shared_info(d, field) \ - __shared_info(d, (d)->shared_info, field) typedef union { struct vcpu_info native; @@ -30,19 +28,22 @@ typedef union { } vcpu_info_t; /* As above, cast to compat field type. */ -#define vcpu_info(v, field) \ - (*(!has_32bit_shinfo((v)->domain) ? \ - (typeof(&(v)->vcpu_info->compat.field))&(v)->vcpu_info->native.field : \ - (typeof(&(v)->vcpu_info->compat.field))&(v)->vcpu_info->compat.field)) +#define __vcpu_info(v, i, field) \ + (*(!has_32bit_shinfo((v)->domain) ? \ + (typeof(&(i)->compat.field))&(i)->native.field : \ + (typeof(&(i)->compat.field))&(i)->compat.field)) #else typedef struct shared_info shared_info_t; -#define shared_info(d, field) ((d)->shared_info->field) +#define __shared_info(d, s, field) ((s)->field) typedef struct vcpu_info vcpu_info_t; -#define vcpu_info(v, field) ((v)->vcpu_info->field) +#define __vcpu_info(v, i, field) ((i)->field) #endif +#define shared_info(d, field) __shared_info(d, (d)->shared_info, field) +#define vcpu_info(v, field) __vcpu_info(v, (v)->vcpu_info, field) + #endif /* __XEN_SHARED_H__ */ --- 2009-06-10.orig/xen/include/xen/xenoprof.h 2009-06-10 16:19:50.000000000 +0200 +++ 2009-06-10/xen/include/xen/xenoprof.h 2009-06-10 10:48:29.000000000 +0200 @@ -50,7 +50,7 @@ struct xenoprof { #ifdef CONFIG_COMPAT int is_compat; #endif - struct xenoprof_vcpu vcpu [MAX_VIRT_CPUS]; + struct xenoprof_vcpu *vcpu; }; #ifndef CONFIG_COMPAT _______________________________________________ Xen-devel mailing list Xen-devel@lists.xensource.com http://lists.xensource.com/xen-devel
Keir Fraser
2009-Jun-18 09:16 UTC
Re: [Xen-devel] [PATCH 3/3] x86_64: allow more vCPU-s per guest
On 10/06/2009 15:48, "Jan Beulich" <JBeulich@novell.com> wrote:> Since the shared info layout is fixed, guests are required to use > VCPUOP_register_vcpu_info prior to booting any vCPU beyond the > traditional limit of 32.Applied. Is the vcpu[] array re-allocation in XEN_DOMCTL_max_vcpus an example of over-optimistic-concurrency-control? It can''t really be 100% safe without extra locking on all users of that array (not good), or using rcu (better), can it. The wmb() is a nice try. ;-) -- Keir _______________________________________________ Xen-devel mailing list Xen-devel@lists.xensource.com http://lists.xensource.com/xen-devel
Jan Beulich
2009-Jun-18 09:27 UTC
Re: [Xen-devel] [PATCH 3/3] x86_64: allow more vCPU-s per guest
>>> Keir Fraser <keir.fraser@eu.citrix.com> 18.06.09 11:16 >>> >On 10/06/2009 15:48, "Jan Beulich" <JBeulich@novell.com> wrote: > >> Since the shared info layout is fixed, guests are required to use >> VCPUOP_register_vcpu_info prior to booting any vCPU beyond the >> traditional limit of 32. > >Applied. Is the vcpu[] array re-allocation in XEN_DOMCTL_max_vcpus an >example of over-optimistic-concurrency-control? It can''t really be 100% safe >without extra locking on all users of that array (not good), or using rcu >(better), can it. The wmb() is a nice try. ;-)Agreed. Originally I intended to add at least a comment, but after realizing that the path currently is only reached during domain creation (and I believe there are issues elsewhere if one would really allow increasing the # of vCPU-s in a domain on the fly), I decided to leave it as is (and the wmb() can be considered sort of a comment to that effect). Jan _______________________________________________ Xen-devel mailing list Xen-devel@lists.xensource.com http://lists.xensource.com/xen-devel
Keir Fraser
2009-Jun-18 09:36 UTC
Re: [Xen-devel] [PATCH 3/3] x86_64: allow more vCPU-s per guest
On 18/06/2009 10:27, "Jan Beulich" <JBeulich@novell.com> wrote:>> Applied. Is the vcpu[] array re-allocation in XEN_DOMCTL_max_vcpus an >> example of over-optimistic-concurrency-control? It can''t really be 100% safe >> without extra locking on all users of that array (not good), or using rcu >> (better), can it. The wmb() is a nice try. ;-) > > Agreed. Originally I intended to add at least a comment, but after realizing > that the path currently is only reached during domain creation (and I believe > there are issues elsewhere if one would really allow increasing the # of > vCPU-s in a domain on the fly), I decided to leave it as is (and the wmb() > can be considered sort of a comment to that effect).Then I think it would be better to make that domctl really singleshot (i.e., fail if d->max_vcpus is already non-zero), with a comment explaining why it is implemented this way. Buggily implementing an unused case can''t be good. Can you generate a patch for this, please? Is it safe do you think to go from d->max_vcpus=0 to d->max_vcpus!=0 on a live domain? (i.e, the specific case we do use in your general observation that increasing d->max_vcpus may be dangerous)? -- Keir _______________________________________________ Xen-devel mailing list Xen-devel@lists.xensource.com http://lists.xensource.com/xen-devel
Keir Fraser
2009-Jun-18 09:39 UTC
Re: [Xen-devel] [PATCH 3/3] x86_64: allow more vCPU-s per guest
On 18/06/2009 10:36, "Keir Fraser" <keir.fraser@eu.citrix.com> wrote:>> Agreed. Originally I intended to add at least a comment, but after realizing >> that the path currently is only reached during domain creation (and I believe >> there are issues elsewhere if one would really allow increasing the # of >> vCPU-s in a domain on the fly), I decided to leave it as is (and the wmb() >> can be considered sort of a comment to that effect). > > Then I think it would be better to make that domctl really singleshot (i.e., > fail if d->max_vcpus is already non-zero), with a comment explaining why it > is implemented this way. Buggily implementing an unused case can''t be good. > Can you generate a patch for this, please? > > Is it safe do you think to go from d->max_vcpus=0 to d->max_vcpus!=0 on a > live domain? (i.e, the specific case we do use in your general observation > that increasing d->max_vcpus may be dangerous)?Or alternatively I would be happy to keep the full domctl functionality, but then we have to use rcu for freeing the old vcpu array, and we have to convince ourselves that arbitrarily increasing max_vcpus is safe (I''m not sure what problems you foresee there?). -- Keir _______________________________________________ Xen-devel mailing list Xen-devel@lists.xensource.com http://lists.xensource.com/xen-devel
Jan Beulich
2009-Jun-18 10:13 UTC
Re: [Xen-devel] [PATCH 3/3] x86_64: allow more vCPU-s per guest
>>> Keir Fraser <keir.fraser@eu.citrix.com> 18.06.09 11:36 >>> >On 18/06/2009 10:27, "Jan Beulich" <JBeulich@novell.com> wrote: > >>> Applied. Is the vcpu[] array re-allocation in XEN_DOMCTL_max_vcpus an >>> example of over-optimistic-concurrency-control? It can''t really be 100% safe >>> without extra locking on all users of that array (not good), or using rcu >>> (better), can it. The wmb() is a nice try. ;-) >> >> Agreed. Originally I intended to add at least a comment, but after realizing >> that the path currently is only reached during domain creation (and I believe >> there are issues elsewhere if one would really allow increasing the # of >> vCPU-s in a domain on the fly), I decided to leave it as is (and the wmb() >> can be considered sort of a comment to that effect). > >Then I think it would be better to make that domctl really singleshot (i.e., >fail if d->max_vcpus is already non-zero), with a comment explaining why it >is implemented this way. Buggily implementing an unused case can''t be good. >Can you generate a patch for this, please?Sure.>Is it safe do you think to go from d->max_vcpus=0 to d->max_vcpus!=0 on a >live domain? (i.e, the specific case we do use in your general observation >that increasing d->max_vcpus may be dangerous)?Yes, this is safe imo. Jan _______________________________________________ Xen-devel mailing list Xen-devel@lists.xensource.com http://lists.xensource.com/xen-devel
Keir Fraser
2009-Jun-18 10:17 UTC
Re: [Xen-devel] [PATCH 3/3] x86_64: allow more vCPU-s per guest
On 18/06/2009 11:13, "Jan Beulich" <JBeulich@novell.com> wrote:>> Is it safe do you think to go from d->max_vcpus=0 to d->max_vcpus!=0 on a >> live domain? (i.e, the specific case we do use in your general observation >> that increasing d->max_vcpus may be dangerous)? > > Yes, this is safe imo.Can you give an example where increasing max_vcpus from X!=0 to Y>X might be dangerous? -- Keir _______________________________________________ Xen-devel mailing list Xen-devel@lists.xensource.com http://lists.xensource.com/xen-devel
Jan Beulich
2009-Jun-18 10:22 UTC
Re: [Xen-devel] [PATCH 3/3] x86_64: allow more vCPU-s per guest
>>> Keir Fraser <keir.fraser@eu.citrix.com> 18.06.09 12:17 >>> >On 18/06/2009 11:13, "Jan Beulich" <JBeulich@novell.com> wrote: > >>> Is it safe do you think to go from d->max_vcpus=0 to d->max_vcpus!=0 on a >>> live domain? (i.e, the specific case we do use in your general observation >>> that increasing d->max_vcpus may be dangerous)? >> >> Yes, this is safe imo. > >Can you give an example where increasing max_vcpus from X!=0 to Y>X might be >dangerous?I didn''t keep any record, so I''ll have to see if I can spot the place(s) again (if any - I only vaguely remember possibly having noticed some problem in the past). Oh, maybe it was just that the Linux kernel can''t deal with it. I''ll look around in any case and let you know. Jan _______________________________________________ Xen-devel mailing list Xen-devel@lists.xensource.com http://lists.xensource.com/xen-devel
Keir Fraser
2009-Jun-18 10:40 UTC
Re: [Xen-devel] [PATCH 3/3] x86_64: allow more vCPU-s per guest
On 18/06/2009 11:22, "Jan Beulich" <JBeulich@novell.com> wrote:>>>> Is it safe do you think to go from d->max_vcpus=0 to d->max_vcpus!=0 on a >>>> live domain? (i.e, the specific case we do use in your general observation >>>> that increasing d->max_vcpus may be dangerous)? >>> >>> Yes, this is safe imo. >> >> Can you give an example where increasing max_vcpus from X!=0 to Y>X might be >> dangerous? > > I didn''t keep any record, so I''ll have to see if I can spot the place(s) again > (if any - I only vaguely remember possibly having noticed some problem in > the past). Oh, maybe it was just that the Linux kernel can''t deal with it. > I''ll > look around in any case and let you know.Fair enough. Anyhow, making the domctl singleshot for now is fine by me, since we have no usage scenario that requires greater flexibility than that. -- Keir _______________________________________________ Xen-devel mailing list Xen-devel@lists.xensource.com http://lists.xensource.com/xen-devel
Jan Beulich
2009-Jun-18 11:50 UTC
Re: [Xen-devel] [PATCH 3/3] x86_64: allow more vCPU-s per guest
>>> Keir Fraser <keir.fraser@eu.citrix.com> 18.06.09 11:39 >>> > Then I think it would be better to make that domctl really singleshot (i.e., > fail if d->max_vcpus is already non-zero), with a comment explaining why it > is implemented this way. Buggily implementing an unused case can''t be good. > Can you generate a patch for this, please?Here we go: Signed-off-by: Jan Beulich <jbeulich@novell.com> --- 2009-06-10.orig/xen/common/domctl.c 2009-06-10 15:04:45.000000000 +0200 +++ 2009-06-10/xen/common/domctl.c 2009-06-18 13:48:30.000000000 +0200 @@ -463,6 +463,16 @@ long do_domctl(XEN_GUEST_HANDLE(xen_domc if ( (max < d->max_vcpus) && (d->vcpu[max] != NULL) ) goto maxvcpu_out; + /* + * For now don''t allow increasing the vcpu count from a non-zero + * value: This code and all readers of d->vcpu would otherwise need + * to be converted to use RCU, but at present there''s no tools side + * code path that would issue such a request. + */ + ret = -EBUSY; + if ( (d->max_vcpus > 0) && (max > d->max_vcpus) ) + goto maxvcpu_out; + ret = -ENOMEM; if ( max > d->max_vcpus ) { _______________________________________________ Xen-devel mailing list Xen-devel@lists.xensource.com http://lists.xensource.com/xen-devel