Index: 2006-10-04/xen/arch/x86/domain.c ==================================================================--- 2006-10-04.orig/xen/arch/x86/domain.c 2006-10-04 15:18:36.000000000 +0200 +++ 2006-10-04/xen/arch/x86/domain.c 2006-10-04 15:18:51.000000000 +0200 @@ -114,6 +114,58 @@ void dump_pageframe_info(struct domain * } } +#ifdef CONFIG_COMPAT +int setup_arg_xlat_area(struct domain *d, unsigned int vcpu_id, l4_pgentry_t *l4tab) +{ + unsigned i; + struct page_info *pg; + + if ( !d->arch.mm_arg_xlat_l3 ) + { + pg = alloc_domheap_page(NULL); + if ( !pg ) + return -ENOMEM; + d->arch.mm_arg_xlat_l3 = clear_page(page_to_virt(pg)); + } + + l4tab[l4_table_offset(COMPAT_ARG_XLAT_VIRT_BASE)] + l4e_from_paddr(__pa(d->arch.mm_arg_xlat_l3), __PAGE_HYPERVISOR); + + for ( i = 0; i < COMPAT_ARG_XLAT_PAGES; ++i ) + { + unsigned long va = COMPAT_ARG_XLAT_VIRT_START(vcpu_id) + i * PAGE_SIZE; + l2_pgentry_t *l2tab; + l1_pgentry_t *l1tab; + + if ( !l3e_get_intpte(d->arch.mm_arg_xlat_l3[l3_table_offset(va)]) ) + { + pg = alloc_domheap_page(NULL); + if ( !pg ) + return -ENOMEM; + clear_page(page_to_virt(pg)); + d->arch.mm_arg_xlat_l3[l3_table_offset(va)] = l3e_from_page(pg, PAGE_HYPERVISOR); + } + l2tab = l3e_to_l2e(d->arch.mm_arg_xlat_l3[l3_table_offset(va)]); + if ( !l2e_get_intpte(l2tab[l2_table_offset(va)]) ) + { + pg = alloc_domheap_page(NULL); + if ( !pg ) + return -ENOMEM; + clear_page(page_to_virt(pg)); + l2tab[l2_table_offset(va)] = l2e_from_page(pg, PAGE_HYPERVISOR); + } + l1tab = l2e_to_l1e(l2tab[l2_table_offset(va)]); + BUG_ON(l1e_get_intpte(l1tab[l1_table_offset(va)])); + pg = alloc_domheap_page(NULL); + if ( !pg ) + return -ENOMEM; + l1tab[l1_table_offset(va)] = l1e_from_page(pg, PAGE_HYPERVISOR); + } + + return 0; +} +#endif + struct vcpu *alloc_vcpu_struct(struct domain *d, unsigned int vcpu_id) { struct vcpu *v; @@ -161,6 +213,13 @@ struct vcpu *alloc_vcpu_struct(struct do l4e_from_paddr(__pa(d->arch.mm_perdomain_l3), __PAGE_HYPERVISOR); v->arch.guest_table = pagetable_from_page(pg); v->arch.guest_table_user = v->arch.guest_table; + + if ( setup_arg_xlat_area(d, vcpu_id, l4tab) < 0 ) + { + free_xenheap_page(l4tab); + xfree(v); + return NULL; + } } #endif @@ -273,6 +332,46 @@ void arch_domain_destroy(struct domain * free_domheap_page(virt_to_page(d->arch.mm_perdomain_l3)); #endif +#ifdef CONFIG_COMPAT + if ( d->arch.mm_arg_xlat_l3 ) + { + struct page_info *pg; + unsigned l3; + + for ( l3 = 0; l3 < L3_PAGETABLE_ENTRIES; ++l3 ) + { + if ( l3e_get_intpte(d->arch.mm_arg_xlat_l3[l3]) ) + { + l2_pgentry_t *l2tab = l3e_to_l2e(d->arch.mm_arg_xlat_l3[l3]); + unsigned l2; + + for ( l2 = 0; l2 < L2_PAGETABLE_ENTRIES; ++l2 ) + { + if ( l2e_get_intpte(l2tab[l2]) ) + { + l1_pgentry_t *l1tab = l2e_to_l1e(l2tab[l2]); + unsigned l1; + + for ( l1 = 0; l1 < L1_PAGETABLE_ENTRIES; ++l1 ) + { + if ( l1e_get_intpte(l1tab[l1]) ) + { + pg = l1e_get_page(l1tab[l1]); + free_domheap_page(pg); + } + } + pg = l2e_get_page(l2tab[l2]); + free_domheap_page(pg); + } + } + pg = l3e_get_page(d->arch.mm_arg_xlat_l3[l3]); + free_domheap_page(pg); + } + } + free_domheap_page(virt_to_page(d->arch.mm_arg_xlat_l3)); + } +#endif + free_xenheap_page(d->shared_info); } @@ -931,55 +1030,131 @@ unsigned long hypercall_create_continuat for ( i = 0; *p != ''\0''; i++ ) mcs->call.args[i] = next_arg(p, args); + if ( IS_COMPAT(current->domain) ) + { + for ( ; i < 6; i++ ) + mcs->call.args[i] = 0; + } } else { regs = guest_cpu_user_regs(); -#if defined(__i386__) regs->eax = op; + regs->eip -= 2; /* re-execute ''syscall'' / ''int 0x82'' */ - if ( supervisor_mode_kernel || hvm_guest(current) ) - regs->eip &= ~31; /* re-execute entire hypercall entry stub */ +#if defined(__x86_64__) + if ( !IS_COMPAT(current->domain) ) + { + for ( i = 0; *p != ''\0''; i++ ) + { + arg = next_arg(p, args); + switch ( i ) + { + case 0: regs->rdi = arg; break; + case 1: regs->rsi = arg; break; + case 2: regs->rdx = arg; break; + case 3: regs->r10 = arg; break; + case 4: regs->r8 = arg; break; + case 5: regs->r9 = arg; break; + } + } + } else - regs->eip -= 2; /* re-execute ''int 0x82'' */ - - for ( i = 0; *p != ''\0''; i++ ) +#endif { - arg = next_arg(p, args); - switch ( i ) + if ( supervisor_mode_kernel || hvm_guest(current) ) + regs->eip &= ~31; /* re-execute entire hypercall entry stub */ + + for ( i = 0; *p != ''\0''; i++ ) { - case 0: regs->ebx = arg; break; - case 1: regs->ecx = arg; break; - case 2: regs->edx = arg; break; - case 3: regs->esi = arg; break; - case 4: regs->edi = arg; break; - case 5: regs->ebp = arg; break; + arg = next_arg(p, args); + switch ( i ) + { + case 0: regs->ebx = arg; break; + case 1: regs->ecx = arg; break; + case 2: regs->edx = arg; break; + case 3: regs->esi = arg; break; + case 4: regs->edi = arg; break; + case 5: regs->ebp = arg; break; + } } } -#elif defined(__x86_64__) - regs->rax = op; - regs->rip -= 2; /* re-execute ''syscall'' */ + } - for ( i = 0; *p != ''\0''; i++ ) + va_end(args); + + return op; +} + +#ifdef CONFIG_COMPAT +int hypercall_xlat_continuation(unsigned int mask, ...) +{ + int rc = 0; + struct mc_state *mcs = &this_cpu(mc_state); + struct cpu_user_regs *regs = guest_cpu_user_regs(); + unsigned int i, cval = 0; + unsigned long nval = 0; + va_list args; + + va_start(args, mask); + + if ( test_bit(_MCSF_in_multicall, &mcs->flags) ) + { + for ( i = 0; i < 6; ++i, mask >>= 1 ) { - arg = next_arg(p, args); + if ( (mask & 1) ) + { + nval = va_arg(args, unsigned long); + cval = va_arg(args, unsigned int); + } + if ( (mask & 1) && mcs->call.args[i] == nval ) + { + ++rc; + } + else + { + cval = mcs->call.args[i]; + BUG_ON(mcs->call.args[i] != cval); + } + mcs->compat_call.args[i] = cval; + } + } + else + { + for ( i = 0; i < 6; ++i, mask >>= 1 ) + { + unsigned long *reg; + switch ( i ) { - case 0: regs->rdi = arg; break; - case 1: regs->rsi = arg; break; - case 2: regs->rdx = arg; break; - case 3: regs->r10 = arg; break; - case 4: regs->r8 = arg; break; - case 5: regs->r9 = arg; break; + case 0: reg = ®s->ebx; break; + case 1: reg = ®s->ecx; break; + case 2: reg = ®s->edx; break; + case 3: reg = ®s->esi; break; + case 4: reg = ®s->edi; break; + case 5: reg = ®s->ebp; break; + default: BUG(); reg = NULL; break; + } + if ( (mask & 1) ) + { + nval = va_arg(args, unsigned long); + cval = va_arg(args, unsigned int); + } + if ( (mask & 1) && *reg == nval ) + { + *reg = cval; + ++rc; } + else + BUG_ON(*reg != (unsigned int)*reg); } -#endif } va_end(args); - return op; + return rc; } +#endif static void relinquish_memory(struct domain *d, struct list_head *list) { Index: 2006-10-04/xen/arch/x86/domain_build.c ==================================================================--- 2006-10-04.orig/xen/arch/x86/domain_build.c 2006-10-04 15:16:05.000000000 +0200 +++ 2006-10-04/xen/arch/x86/domain_build.c 2006-10-04 15:18:51.000000000 +0200 @@ -665,7 +665,11 @@ int construct_dom0(struct domain *d, l4e_from_paddr(__pa(d->arch.mm_perdomain_l3), __PAGE_HYPERVISOR); v->arch.guest_table = pagetable_from_paddr(__pa(l4start)); if ( IS_COMPAT(d) ) + { v->arch.guest_table_user = v->arch.guest_table; + if ( setup_arg_xlat_area(d, 0, l4start) < 0 ) + panic("Not enough RAM for domain 0 hypercall argument translation.\n"); + } l4tab += l4_table_offset(dsi.v_start); mfn = alloc_spfn; Index: 2006-10-04/xen/arch/x86/mm.c ==================================================================--- 2006-10-04.orig/xen/arch/x86/mm.c 2006-10-04 15:18:45.000000000 +0200 +++ 2006-10-04/xen/arch/x86/mm.c 2006-10-04 15:18:51.000000000 +0200 @@ -1106,9 +1106,12 @@ static int alloc_l4_table(struct page_in pl4e[l4_table_offset(LINEAR_PT_VIRT_START)] l4e_from_pfn(pfn, __PAGE_HYPERVISOR); pl4e[l4_table_offset(PERDOMAIN_VIRT_START)] - l4e_from_page( - virt_to_page(page_get_owner(page)->arch.mm_perdomain_l3), - __PAGE_HYPERVISOR); + l4e_from_page(virt_to_page(d->arch.mm_perdomain_l3), + __PAGE_HYPERVISOR); + if ( IS_COMPAT(d) ) + pl4e[l4_table_offset(COMPAT_ARG_XLAT_VIRT_BASE)] + l4e_from_page(virt_to_page(d->arch.mm_arg_xlat_l3), + __PAGE_HYPERVISOR); return 1; @@ -2732,7 +2735,9 @@ int do_update_va_mapping(unsigned long v flush_tlb_mask(d->domain_dirty_cpumask); break; default: - if ( unlikely(get_user(vmask, (unsigned long *)bmap_ptr)) ) + if ( unlikely(!IS_COMPAT(d) ? + get_user(vmask, (unsigned long *)bmap_ptr) : + get_user(vmask, (unsigned int *)bmap_ptr)) ) rc = -EFAULT; pmask = vcpumask_to_pcpumask(d, vmask); flush_tlb_mask(pmask); Index: 2006-10-04/xen/arch/x86/x86_64/Makefile ==================================================================--- 2006-10-04.orig/xen/arch/x86/x86_64/Makefile 2006-10-04 15:06:22.000000000 +0200 +++ 2006-10-04/xen/arch/x86/x86_64/Makefile 2006-10-04 15:18:51.000000000 +0200 @@ -6,5 +6,6 @@ obj-y += traps.o ifeq ($(CONFIG_COMPAT),y) # extra dependencies entry.o: compat/entry.S +mm.o: compat/mm.c traps.o: compat/traps.c endif Index: 2006-10-04/xen/arch/x86/x86_64/compat/entry.S ==================================================================--- 2006-10-04.orig/xen/arch/x86/x86_64/compat/entry.S 2006-10-04 15:11:03.000000000 +0200 +++ 2006-10-04/xen/arch/x86/x86_64/compat/entry.S 2006-10-04 15:18:51.000000000 +0200 @@ -282,15 +282,11 @@ CFIX14: #define compat_mmu_update domain_crash_synchronous #define compat_set_gdt domain_crash_synchronous #define compat_platform_op domain_crash_synchronous -#define compat_update_descriptor domain_crash_synchronous -#define compat_memory_op domain_crash_synchronous #define compat_multicall domain_crash_synchronous -#define compat_update_va_mapping domain_crash_synchronous #define compat_set_timer_op domain_crash_synchronous #define compat_event_channel_op_compat domain_crash_synchronous #define compat_physdev_op_compat domain_crash_synchronous #define compat_grant_table_op domain_crash_synchronous -#define compat_update_va_mapping_otherdomain domain_crash_synchronous #define compat_vcpu_op domain_crash_synchronous #define compat_mmuext_op domain_crash_synchronous #define compat_acm_op domain_crash_synchronous Index: 2006-10-04/xen/arch/x86/x86_64/compat/mm.c ==================================================================--- /dev/null 1970-01-01 00:00:00.000000000 +0000 +++ 2006-10-04/xen/arch/x86/x86_64/compat/mm.c 2006-10-04 15:18:51.000000000 +0200 @@ -0,0 +1,128 @@ +#ifdef CONFIG_COMPAT + +#include <compat/memory.h> + +int compat_update_descriptor(u32 pa_lo, u32 pa_hi, u32 desc_lo, u32 desc_hi) +{ + return do_update_descriptor(pa_lo | ((u64)pa_hi << 32), + desc_lo | ((u64)desc_hi << 32)); +} + +int compat_arch_memory_op(int op, XEN_GUEST_HANDLE(void) arg) +{ + struct compat_machphys_mfn_list xmml; + l2_pgentry_t l2e; + unsigned long v; + compat_pfn_t mfn; + unsigned int i; + int rc = 0; + + switch ( op ) + { + case XENMEM_add_to_physmap: + { + struct compat_add_to_physmap cmp; + struct xen_add_to_physmap *nat = (void *)COMPAT_ARG_XLAT_VIRT_START(current->vcpu_id); + + if ( copy_from_guest(&cmp, arg, 1) ) + return -EFAULT; + + XLAT_add_to_physmap(nat, &cmp); + rc = arch_memory_op(op, guest_handle_from_ptr(nat, void)); + + break; + } + + case XENMEM_memory_map: + case XENMEM_machine_memory_map: + { + struct compat_memory_map cmp; + struct xen_memory_map *nat = (void *)COMPAT_ARG_XLAT_VIRT_START(current->vcpu_id); + + if ( copy_from_guest(&cmp, arg, 1) ) + return -EFAULT; +#define XLAT_memory_map_HNDL_buffer(_d_, _s_) \ + guest_from_compat_handle((_d_)->buffer, (_s_)->buffer) + XLAT_memory_map(nat, &cmp); +#undef XLAT_memory_map_HNDL_buffer + + rc = arch_memory_op(op, guest_handle_from_ptr(nat, void)); + if ( rc < 0 ) + break; + +#define XLAT_memory_map_HNDL_buffer(_d_, _s_) ((void)0) + XLAT_memory_map(&cmp, nat); +#undef XLAT_memory_map_HNDL_buffer + if ( copy_to_guest(arg, &cmp, 1) ) + rc = -EFAULT; + + break; + } + + case XENMEM_machphys_mapping: + { + static /*const*/ struct compat_machphys_mapping mapping = { + .v_start = MACH2PHYS_COMPAT_VIRT_START, + .v_end = MACH2PHYS_COMPAT_VIRT_END, + .max_mfn = MACH2PHYS_COMPAT_NR_ENTRIES - 1 + }; + + if ( copy_to_guest(arg, &mapping, 1) ) + rc = -EFAULT; + + break; + } + + case XENMEM_machphys_mfn_list: + if ( copy_from_guest(&xmml, arg, 1) ) + return -EFAULT; + + for ( i = 0, v = RDWR_COMPAT_MPT_VIRT_START; + (i != xmml.max_extents) && (v != RDWR_COMPAT_MPT_VIRT_END); + i++, v += 1 << L2_PAGETABLE_SHIFT ) + { + l2e = compat_idle_pg_table_l2[l2_table_offset(v)]; + if ( !(l2e_get_flags(l2e) & _PAGE_PRESENT) ) + break; + mfn = l2e_get_pfn(l2e) + l1_table_offset(v); + if ( copy_to_compat_offset(xmml.extent_start, i, &mfn, 1) ) + return -EFAULT; + } + + xmml.nr_extents = i; + if ( copy_to_guest(arg, &xmml, 1) ) + rc = -EFAULT; + + break; + + default: + rc = -ENOSYS; + break; + } + + return rc; +} + +int compat_update_va_mapping(unsigned int va, u32 lo, u32 hi, + unsigned int flags) +{ + return do_update_va_mapping(va, lo | ((u64)hi << 32), flags); +} + +int compat_update_va_mapping_otherdomain(unsigned long va, u32 lo, u32 hi, + unsigned long flags, + domid_t domid) +{ + return do_update_va_mapping_otherdomain(va, lo | ((u64)hi << 32), flags, domid); +} +#endif /* CONFIG_COMPAT */ + +/* + * Local variables: + * mode: C + * c-set-style: "BSD" + * c-basic-offset: 4 + * tab-width: 4 + * indent-tabs-mode: nil + * End: + */ Index: 2006-10-04/xen/arch/x86/x86_64/mm.c ==================================================================--- 2006-10-04.orig/xen/arch/x86/x86_64/mm.c 2006-10-04 15:18:45.000000000 +0200 +++ 2006-10-04/xen/arch/x86/x86_64/mm.c 2006-10-04 15:18:51.000000000 +0200 @@ -28,6 +28,7 @@ #include <asm/page.h> #include <asm/flushtlb.h> #include <asm/fixmap.h> +#include <asm/hypercall.h> #include <asm/msr.h> #include <public/memory.h> @@ -383,6 +384,8 @@ int check_descriptor(const struct domain return 0; } +#include "compat/mm.c" + /* * Local variables: * mode: C Index: 2006-10-04/xen/common/compat/Makefile ==================================================================--- 2006-10-04.orig/xen/common/compat/Makefile 2006-10-04 15:10:46.000000000 +0200 +++ 2006-10-04/xen/common/compat/Makefile 2006-10-04 15:18:51.000000000 +0200 @@ -1,4 +1,5 @@ obj-y += kernel.o +obj-y += memory.o obj-y += xlat.o # extra dependencies Index: 2006-10-04/xen/common/compat/memory.c ==================================================================--- /dev/null 1970-01-01 00:00:00.000000000 +0000 +++ 2006-10-04/xen/common/compat/memory.c 2006-10-04 15:18:51.000000000 +0200 @@ -0,0 +1,358 @@ +#include <xen/config.h> +#include <xen/types.h> +#include <xen/hypercall.h> +#include <xen/guest_access.h> +#include <xen/sched.h> +#include <xen/event.h> +#include <asm/current.h> +#include <compat/memory.h> + +int compat_memory_op(unsigned int cmd, XEN_GUEST_HANDLE(void) compat) +{ + int rc, split, op = cmd & MEMOP_CMD_MASK; + unsigned int start_extent = cmd >> MEMOP_EXTENT_SHIFT; + + do + { + unsigned int i, end_extent = 0; + union { + XEN_GUEST_HANDLE(void) hnd; + struct xen_memory_reservation *rsrv; + struct xen_memory_exchange *xchg; + struct xen_translate_gpfn_list *xlat; + } nat; + union { + struct compat_memory_reservation rsrv; + struct compat_memory_exchange xchg; + struct compat_translate_gpfn_list xlat; + } cmp; + + set_xen_guest_handle(nat.hnd, (void *)COMPAT_ARG_XLAT_VIRT_START(current->vcpu_id)); + split = 0; + switch ( op ) + { + xen_pfn_t *space; + + case XENMEM_increase_reservation: + case XENMEM_decrease_reservation: + case XENMEM_populate_physmap: + if ( copy_from_guest(&cmp.rsrv, compat, 1) ) + return start_extent; + + /* Is size too large for us to encode a continuation? */ + if ( cmp.rsrv.nr_extents > (UINT_MAX >> MEMOP_EXTENT_SHIFT) ) + return start_extent; + + if ( !compat_handle_is_null(cmp.rsrv.extent_start) && + !compat_handle_okay(cmp.rsrv.extent_start, cmp.rsrv.nr_extents) ) + return start_extent; + + end_extent = start_extent + (COMPAT_ARG_XLAT_SIZE - sizeof(*nat.rsrv)) / + sizeof(*space); + if ( end_extent > cmp.rsrv.nr_extents ) + end_extent = cmp.rsrv.nr_extents; + + space = (xen_pfn_t *)(nat.rsrv + 1); +#define XLAT_memory_reservation_HNDL_extent_start(_d_, _s_) \ + do \ + { \ + if ( !compat_handle_is_null((_s_)->extent_start) ) \ + { \ + set_xen_guest_handle((_d_)->extent_start, space - start_extent); \ + if ( op != XENMEM_increase_reservation ) \ + { \ + for ( i = start_extent; i < end_extent; ++i ) \ + { \ + compat_pfn_t pfn; \ + if ( __copy_from_compat_offset(&pfn, (_s_)->extent_start, i, 1) ) \ + { \ + end_extent = i; \ + split = -1; \ + break; \ + } \ + *space++ = pfn; \ + } \ + } \ + } \ + else \ + { \ + set_xen_guest_handle((_d_)->extent_start, NULL); \ + end_extent = cmp.rsrv.nr_extents; \ + } \ + } while (0) + XLAT_memory_reservation(nat.rsrv, &cmp.rsrv); +#undef XLAT_memory_reservation_HNDL_extent_start + + if ( end_extent < cmp.rsrv.nr_extents ) + { + nat.rsrv->nr_extents = end_extent; + ++split; + } + + break; + + case XENMEM_exchange: + { + int order_delta; + + if ( copy_from_guest(&cmp.xchg, compat, 1) ) + return -EFAULT; + + order_delta = cmp.xchg.out.extent_order - cmp.xchg.in.extent_order; + /* Various sanity checks. */ + if ( (cmp.xchg.nr_exchanged > cmp.xchg.in.nr_extents) || + (order_delta > 0 && (cmp.xchg.nr_exchanged & ((1U << order_delta) - 1))) || + /* Sizes of input and output lists do not overflow an int? */ + ((~0U >> cmp.xchg.in.extent_order) < cmp.xchg.in.nr_extents) || + ((~0U >> cmp.xchg.out.extent_order) < cmp.xchg.out.nr_extents) || + /* Sizes of input and output lists match? */ + ((cmp.xchg.in.nr_extents << cmp.xchg.in.extent_order) !+ (cmp.xchg.out.nr_extents << cmp.xchg.out.extent_order)) ) + return -EINVAL; + + start_extent = cmp.xchg.nr_exchanged; + end_extent = (COMPAT_ARG_XLAT_SIZE - sizeof(*nat.xchg)) / + (((1U << __builtin_abs(order_delta)) + 1) * + sizeof(*space)); + if ( end_extent == 0 ) + { + printk("Cannot translate compatibility mode XENMEM_exchange extents (%u,%u)\n", + cmp.xchg.in.extent_order, cmp.xchg.out.extent_order); + return -E2BIG; + } + if ( order_delta > 0 ) + end_extent <<= order_delta; + end_extent += start_extent; + if ( end_extent > cmp.xchg.in.nr_extents ) + end_extent = cmp.xchg.in.nr_extents; + + space = (xen_pfn_t *)(nat.xchg + 1); + /* Code below depends upon .in preceding .out. */ + BUILD_BUG_ON(offsetof(xen_memory_exchange_t, in) > offsetof(xen_memory_exchange_t, out)); +#define XLAT_memory_reservation_HNDL_extent_start(_d_, _s_) \ + do \ + { \ + set_xen_guest_handle((_d_)->extent_start, space - start_extent); \ + for ( i = start_extent; i < end_extent; ++i ) \ + { \ + compat_pfn_t pfn; \ + if ( __copy_from_compat_offset(&pfn, (_s_)->extent_start, i, 1) ) \ + return -EFAULT; \ + *space++ = pfn; \ + } \ + if ( order_delta > 0 ) \ + { \ + start_extent >>= order_delta; \ + end_extent >>= order_delta; \ + } \ + else \ + { \ + start_extent <<= -order_delta; \ + end_extent <<= -order_delta; \ + } \ + order_delta = -order_delta; \ + } while (0) + XLAT_memory_exchange(nat.xchg, &cmp.xchg); +#undef XLAT_memory_reservation_HNDL_extent_start + + if ( end_extent < cmp.xchg.in.nr_extents ) + { + nat.xchg->in.nr_extents = end_extent; + if ( order_delta >= 0 ) + nat.xchg->out.nr_extents = end_extent >> order_delta; + else + nat.xchg->out.nr_extents = end_extent << order_delta; + ++split; + } + + break; + } + + case XENMEM_current_reservation: + case XENMEM_maximum_reservation: + { +#define xen_domid_t domid_t +#define compat_domid_t domid_compat_t + CHECK_TYPE(domid); +#undef compat_domid_t +#undef xen_domid_t + } + case XENMEM_maximum_ram_page: + nat.hnd = compat; + break; + + case XENMEM_translate_gpfn_list: + if ( copy_from_guest(&cmp.xlat, compat, 1) ) + return -EFAULT; + + /* Is size too large for us to encode a continuation? */ + if ( cmp.xlat.nr_gpfns > (UINT_MAX >> MEMOP_EXTENT_SHIFT) ) + return -EINVAL; + + if ( !compat_handle_okay(cmp.xlat.gpfn_list, cmp.xlat.nr_gpfns) || + !compat_handle_okay(cmp.xlat.mfn_list, cmp.xlat.nr_gpfns) ) + return -EFAULT; + + end_extent = start_extent + (COMPAT_ARG_XLAT_SIZE - sizeof(*nat.xlat)) / + sizeof(*space); + if ( end_extent > cmp.xlat.nr_gpfns ) + end_extent = cmp.xlat.nr_gpfns; + + space = (xen_pfn_t *)(nat.xlat + 1); + /* Code below depends upon .gpfn_list preceding .mfn_list. */ + BUILD_BUG_ON(offsetof(xen_translate_gpfn_list_t, gpfn_list) > offsetof(xen_translate_gpfn_list_t, mfn_list)); +#define XLAT_translate_gpfn_list_HNDL_gpfn_list(_d_, _s_) \ + do \ + { \ + set_xen_guest_handle((_d_)->gpfn_list, space - start_extent); \ + for ( i = start_extent; i < end_extent; ++i ) \ + { \ + compat_pfn_t pfn; \ + if ( __copy_from_compat_offset(&pfn, (_s_)->gpfn_list, i, 1) ) \ + return -EFAULT; \ + *space++ = pfn; \ + } \ + } while (0) +#define XLAT_translate_gpfn_list_HNDL_mfn_list(_d_, _s_) \ + (_d_)->mfn_list = (_d_)->gpfn_list + XLAT_translate_gpfn_list(nat.xlat, &cmp.xlat); +#undef XLAT_translate_gpfn_list_HNDL_mfn_list +#undef XLAT_translate_gpfn_list_HNDL_gpfn_list + + if ( end_extent < cmp.xlat.nr_gpfns ) + { + nat.xlat->nr_gpfns = end_extent; + ++split; + } + + break; + + default: + return compat_arch_memory_op(cmd, compat); + } + + rc = do_memory_op(cmd, nat.hnd); + if ( rc < 0 ) + return rc; + + if ( hypercall_xlat_continuation(0x02, nat.hnd, compat) ) + split = -1; + + switch ( op ) + { + case XENMEM_increase_reservation: + case XENMEM_decrease_reservation: + case XENMEM_populate_physmap: + end_extent = split >= 0 ? rc : rc >> MEMOP_EXTENT_SHIFT; + if ( op != XENMEM_decrease_reservation && + !guest_handle_is_null(nat.rsrv->extent_start) ) + { + for ( ; start_extent < end_extent; ++start_extent ) + { + compat_pfn_t pfn = nat.rsrv->extent_start.p[start_extent]; + + BUG_ON(pfn != nat.rsrv->extent_start.p[start_extent]); + if ( __copy_to_compat_offset(cmp.rsrv.extent_start, start_extent, &pfn, 1) ) + { + if ( split >= 0 ) + { + rc = start_extent; + split = 0; + } + else + /* + * Short of being able to cancel the continuation, + * force it to restart here; eventually we shall + * get out of this state. + */ + rc = (start_extent << MEMOP_EXTENT_SHIFT) | op; + break; + } + } + } + else + start_extent = end_extent; + break; + + case XENMEM_exchange: + { + DEFINE_XEN_GUEST_HANDLE(compat_memory_exchange_t); + int order_delta; + + BUG_ON(rc); + BUG_ON(end_extent < nat.xchg->nr_exchanged); + end_extent = nat.xchg->nr_exchanged; + + order_delta = cmp.xchg.out.extent_order - cmp.xchg.in.extent_order; + if ( order_delta > 0 ) + { + start_extent >>= order_delta; + BUG_ON(end_extent & ((1U << order_delta) - 1)); + end_extent >>= order_delta; + } + else + { + start_extent <<= -order_delta; + end_extent <<= -order_delta; + } + + for ( ; start_extent < end_extent; ++start_extent ) + { + compat_pfn_t pfn = nat.xchg->out.extent_start.p[start_extent]; + + BUG_ON(pfn != nat.xchg->out.extent_start.p[start_extent]); + /* Note that we ignore errors accessing the output extent list. */ + __copy_to_compat_offset(cmp.xchg.out.extent_start, start_extent, &pfn, 1); + } + + cmp.xchg.nr_exchanged = nat.xchg->nr_exchanged; + if ( copy_field_to_guest(guest_handle_cast(compat, compat_memory_exchange_t), + &cmp.xchg, nr_exchanged) ) + { + if ( split < 0 ) + /* Cannot cancel the continuation... */ + domain_crash_synchronous(); + return -EFAULT; + } + break; + } + + case XENMEM_maximum_ram_page: + case XENMEM_current_reservation: + case XENMEM_maximum_reservation: + break; + + case XENMEM_translate_gpfn_list: + if ( split < 0 ) + end_extent = rc >> MEMOP_EXTENT_SHIFT; + else + BUG_ON(rc); + + for ( ; start_extent < end_extent; ++start_extent ) + { + compat_pfn_t pfn = nat.xlat->mfn_list.p[start_extent]; + + BUG_ON(pfn != nat.xlat->mfn_list.p[start_extent]); + if ( __copy_to_compat_offset(cmp.xlat.mfn_list, start_extent, &pfn, 1) ) + { + if ( split < 0 ) + /* Cannot cancel the continuation... */ + domain_crash_synchronous(); + return -EFAULT; + } + } + break; + + default: + domain_crash_synchronous(); + break; + } + + cmd = op | (start_extent << MEMOP_EXTENT_SHIFT); + if ( split > 0 && hypercall_preempt_check() ) + return hypercall_create_continuation( + __HYPERVISOR_memory_op, "ih", cmd, compat); + } while ( split > 0 ); + + return rc; +} Index: 2006-10-04/xen/common/memory.c ==================================================================--- 2006-10-04.orig/xen/common/memory.c 2006-08-21 18:02:24.000000000 +0200 +++ 2006-10-04/xen/common/memory.c 2006-10-04 15:18:51.000000000 +0200 @@ -17,18 +17,12 @@ #include <xen/shadow.h> #include <xen/iocap.h> #include <xen/guest_access.h> +#include <xen/hypercall.h> #include <xen/errno.h> #include <asm/current.h> #include <asm/hardirq.h> #include <public/memory.h> -/* - * To allow safe resume of do_memory_op() after preemption, we need to know - * at what point in the page list to resume. For this purpose I steal the - * high-order bits of the @cmd parameter, which are otherwise unused and zero. - */ -#define START_EXTENT_SHIFT 4 /* cmd[:4] == start_extent */ - static long increase_reservation( struct domain *d, @@ -236,7 +230,7 @@ translate_gpfn_list( return -EFAULT; /* Is size too large for us to encode a continuation? */ - if ( op.nr_gpfns > (ULONG_MAX >> START_EXTENT_SHIFT) ) + if ( op.nr_gpfns > (ULONG_MAX >> MEMOP_EXTENT_SHIFT) ) return -EINVAL; if ( !guest_handle_okay(op.gpfn_list, op.nr_gpfns) || @@ -517,20 +511,20 @@ long do_memory_op(unsigned long cmd, XEN struct xen_memory_reservation reservation; domid_t domid; - op = cmd & ((1 << START_EXTENT_SHIFT) - 1); + op = cmd & MEMOP_CMD_MASK; switch ( op ) { case XENMEM_increase_reservation: case XENMEM_decrease_reservation: case XENMEM_populate_physmap: - start_extent = cmd >> START_EXTENT_SHIFT; + start_extent = cmd >> MEMOP_EXTENT_SHIFT; if ( copy_from_guest(&reservation, arg, 1) ) return start_extent; /* Is size too large for us to encode a continuation? */ - if ( reservation.nr_extents > (ULONG_MAX >> START_EXTENT_SHIFT) ) + if ( reservation.nr_extents > (ULONG_MAX >> MEMOP_EXTENT_SHIFT) ) return start_extent; if ( unlikely(start_extent > reservation.nr_extents) ) @@ -594,7 +588,7 @@ long do_memory_op(unsigned long cmd, XEN if ( preempted ) return hypercall_create_continuation( __HYPERVISOR_memory_op, "lh", - op | (rc << START_EXTENT_SHIFT), arg); + op | (rc << MEMOP_EXTENT_SHIFT), arg); break; @@ -626,14 +620,14 @@ long do_memory_op(unsigned long cmd, XEN break; case XENMEM_translate_gpfn_list: - progress = cmd >> START_EXTENT_SHIFT; + progress = cmd >> MEMOP_EXTENT_SHIFT; rc = translate_gpfn_list( guest_handle_cast(arg, xen_translate_gpfn_list_t), &progress); if ( rc == -EAGAIN ) return hypercall_create_continuation( __HYPERVISOR_memory_op, "lh", - op | (progress << START_EXTENT_SHIFT), arg); + op | (progress << MEMOP_EXTENT_SHIFT), arg); break; default: Index: 2006-10-04/xen/include/asm-x86/config.h ==================================================================--- 2006-10-04.orig/xen/include/asm-x86/config.h 2006-10-04 15:16:05.000000000 +0200 +++ 2006-10-04/xen/include/asm-x86/config.h 2006-10-04 15:18:51.000000000 +0200 @@ -114,7 +114,7 @@ static inline void FORCE_CRASH(void) /* * Memory layout: * 0x0000000000000000 - 0x00007fffffffffff [128TB, 2^47 bytes, PML4:0-255] - * Guest-defined use. + * Guest-defined use (see below for compatibility mode guests). * 0x0000800000000000 - 0xffff7fffffffffff [16EB] * Inaccessible: current arch only supports 48-bit sign-extended VAs. * 0xffff800000000000 - 0xffff803fffffffff [256GB, 2^38 bytes, PML4:256] @@ -147,6 +147,18 @@ static inline void FORCE_CRASH(void) * Reserved for future use. * 0xffff880000000000 - 0xffffffffffffffff [120TB, PML4:272-511] * Guest-defined use. + * + * Compatibility guest area layout: + * 0x0000000000000000 - 0x00000000f57fffff [3928MB, PML4:0] + * Guest-defined use. + * 0x0000000f58000000 - 0x00000000ffffffff [168MB, PML4:0] + * Read-only machine-to-phys translation table (GUEST ACCESSIBLE). + * 0x0000000000000000 - 0x00000000ffffffff [508GB, PML4:0] + * Unused. + * 0x0000008000000000 - 0x000000ffffffffff [512GB, 2^39 bytes, PML4:1] + * Hypercall argument translation area. + * 0x0000010000000000 - 0x00007fffffffffff [127TB, 2^46 bytes, PML4:2-255] + * Reserved for future use. */ @@ -216,6 +228,14 @@ static inline void FORCE_CRASH(void) #define COMPAT_L2_PAGETABLE_XEN_SLOTS \ (COMPAT_L2_PAGETABLE_LAST_XEN_SLOT - COMPAT_L2_PAGETABLE_FIRST_XEN_SLOT + 1) +#define COMPAT_ARG_XLAT_VIRT_BASE (1UL << ROOT_PAGETABLE_SHIFT) +#define COMPAT_ARG_XLAT_SHIFT 0 +#define COMPAT_ARG_XLAT_PAGES (1U << COMPAT_ARG_XLAT_SHIFT) +#define COMPAT_ARG_XLAT_SIZE (COMPAT_ARG_XLAT_PAGES << PAGE_SHIFT) +#define COMPAT_ARG_XLAT_VIRT_START(vcpu_id) \ + (COMPAT_ARG_XLAT_VIRT_BASE + ((unsigned long)(vcpu_id) << \ + (PAGE_SHIFT + COMPAT_ARG_XLAT_SHIFT + 1))) + #define PGT_base_page_table PGT_l4_page_table #define __HYPERVISOR_CS64 0xe008 Index: 2006-10-04/xen/include/asm-x86/domain.h ==================================================================--- 2006-10-04.orig/xen/include/asm-x86/domain.h 2006-09-21 11:09:00.000000000 +0200 +++ 2006-10-04/xen/include/asm-x86/domain.h 2006-10-04 15:18:51.000000000 +0200 @@ -98,6 +98,10 @@ struct arch_domain struct mapcache mapcache; #endif +#ifdef CONFIG_COMPAT + l3_pgentry_t *mm_arg_xlat_l3; +#endif + /* I/O-port admin-specified access capabilities. */ struct rangeset *ioport_caps; Index: 2006-10-04/xen/include/asm-x86/mm.h ==================================================================--- 2006-10-04.orig/xen/include/asm-x86/mm.h 2006-10-04 15:18:45.000000000 +0200 +++ 2006-10-04/xen/include/asm-x86/mm.h 2006-10-04 15:18:51.000000000 +0200 @@ -394,8 +394,18 @@ int __sync_lazy_execstate(void); /* Arch-specific portion of memory_op hypercall. */ long arch_memory_op(int op, XEN_GUEST_HANDLE(void) arg); long subarch_memory_op(int op, XEN_GUEST_HANDLE(void) arg); +#ifdef CONFIG_COMPAT +int compat_arch_memory_op(int op, XEN_GUEST_HANDLE(void)); +int compat_subarch_memory_op(int op, XEN_GUEST_HANDLE(void)); +#endif int steal_page( struct domain *d, struct page_info *page, unsigned int memflags); +#ifdef CONFIG_COMPAT +int setup_arg_xlat_area(struct domain *, unsigned int vcpu_id, l4_pgentry_t *); +#else +# define setup_arg_xlat_area(dom, vcpu_id, l4tab) 0 +#endif + #endif /* __ASM_X86_MM_H__ */ Index: 2006-10-04/xen/include/xen/hypercall.h ==================================================================--- 2006-10-04.orig/xen/include/xen/hypercall.h 2006-08-28 08:32:38.000000000 +0200 +++ 2006-10-04/xen/include/xen/hypercall.h 2006-10-04 15:18:51.000000000 +0200 @@ -42,9 +42,17 @@ extern long do_platform_op( XEN_GUEST_HANDLE(xen_platform_op_t) u_xenpf_op); +/* + * To allow safe resume of do_memory_op() after preemption, we need to know + * at what point in the page list to resume. For this purpose I steal the + * high-order bits of the @cmd parameter, which are otherwise unused and zero. + */ +#define MEMOP_EXTENT_SHIFT 4 /* cmd[:4] == start_extent */ +#define MEMOP_CMD_MASK ((1 << MEMOP_EXTENT_SHIFT) - 1) + extern long do_memory_op( - int cmd, + unsigned long cmd, XEN_GUEST_HANDLE(void) arg); extern long @@ -102,4 +110,13 @@ do_hvm_op( unsigned long op, XEN_GUEST_HANDLE(void) arg); +#ifdef CONFIG_COMPAT + +extern int +compat_memory_op( + unsigned int cmd, + XEN_GUEST_HANDLE(void) arg); + +#endif + #endif /* __XEN_HYPERCALL_H__ */ Index: 2006-10-04/xen/include/xen/compat.h ==================================================================--- 2006-10-04.orig/xen/include/xen/compat.h 2006-10-04 15:11:04.000000000 +0200 +++ 2006-10-04/xen/include/xen/compat.h 2006-10-04 15:18:51.000000000 +0200 @@ -143,6 +143,8 @@ &((k compat_ ## n *)0)->f1.f2.f3) * 2] +int hypercall_xlat_continuation(unsigned int mask, ...); + /* In-place translation functons: */ struct start_info; void xlat_start_info(struct start_info *, enum XLAT_start_info_console); Index: 2006-10-04/xen/include/xlat.lst ==================================================================--- 2006-10-04.orig/xen/include/xlat.lst 2006-10-04 15:11:04.000000000 +0200 +++ 2006-10-04/xen/include/xlat.lst 2006-10-04 15:18:51.000000000 +0200 @@ -4,3 +4,8 @@ ? dom0_vga_console_info xen.h ! start_info xen.h ? vcpu_time_info xen.h +! add_to_physmap memory.h +! memory_exchange memory.h +! memory_map memory.h +! memory_reservation memory.h +! translate_gpfn_list memory.h _______________________________________________ Xen-devel mailing list Xen-devel@lists.xensource.com http://lists.xensource.com/xen-devel