Mukesh Rathor
2013-Jan-12 02:07 UTC
[RFC PATCH 12/16]: PVH xen: return PVH features during creation, etc...
Debug.c: we don''t need to lock the page, as no path it''s called from requires that. traps.c: need to inject PF or other exception in PVH case. Signed-off-by: Mukesh Rathor <mukesh.rathor@oracle.com> diff -r 33fc5356ad7c -r 31a145002453 xen/arch/x86/debug.c --- a/xen/arch/x86/debug.c Fri Jan 11 16:35:48 2013 -0800 +++ b/xen/arch/x86/debug.c Fri Jan 11 16:38:07 2013 -0800 @@ -59,7 +59,7 @@ dbg_hvm_va2mfn(dbgva_t vaddr, struct dom return INVALID_MFN; } - mfn = mfn_x(get_gfn(dp, *gfn, &gfntype)); + mfn = mfn_x(get_gfn_query_unlocked(dp, *gfn, &gfntype)); if ( p2m_is_readonly(gfntype) && toaddr ) { DBGP2("kdb:p2m_is_readonly: gfntype:%x\n", gfntype); @@ -153,7 +153,7 @@ dbg_rw_guest_mem(dbgva_t addr, dbgbyte_t pagecnt = min_t(long, PAGE_SIZE - (addr & ~PAGE_MASK), len); - mfn = (dp->is_hvm + mfn = (is_hvm_or_pvh_domain(dp) ? dbg_hvm_va2mfn(addr, dp, toaddr, &gfn) : dbg_pv_va2mfn(addr, dp, pgd3)); if ( mfn == INVALID_MFN ) @@ -173,8 +173,6 @@ dbg_rw_guest_mem(dbgva_t addr, dbgbyte_t } unmap_domain_page(va); - if ( gfn != INVALID_GFN ) - put_gfn(dp, gfn); addr += pagecnt; buf += pagecnt; diff -r 33fc5356ad7c -r 31a145002453 xen/arch/x86/traps.c --- a/xen/arch/x86/traps.c Fri Jan 11 16:35:48 2013 -0800 +++ b/xen/arch/x86/traps.c Fri Jan 11 16:38:07 2013 -0800 @@ -470,6 +470,11 @@ static unsigned int check_guest_io_break unsigned int width, i, match = 0; unsigned long start; + if ( is_pvh_vcpu(v) ) { + /* for pvh, ctrlreg field is not implemented/used unless we need to */ + /* printk("PVH: fixme: (ctrlreg) check_guest_io_breakpoint\n"); */ + return 0; + } if ( !(v->arch.debugreg[5]) || !(v->arch.pv_vcpu.ctrlreg[4] & X86_CR4_DE) ) return 0; @@ -910,6 +915,10 @@ int emulate_forced_invalid_op(struct cpu /* Check for forced emulation signature: ud2 ; .ascii "xen". */ if ( (rc = copy_from_user(sig, (char *)eip, sizeof(sig))) != 0 ) { + /* PVH: fixme: hmm... what do we do for PVH? */ + if ( is_pvh_vcpu(current) ) + return 0; + propagate_page_fault(eip + sizeof(sig) - rc, 0); return EXCRET_fault_fixed; } @@ -920,6 +929,10 @@ int emulate_forced_invalid_op(struct cpu /* We only emulate CPUID. */ if ( ( rc = copy_from_user(instr, (char *)eip, sizeof(instr))) != 0 ) { + /* PVH: fixme: hmm... what do we do for PVH? */ + if ( is_pvh_vcpu(current) ) + return 0; + propagate_page_fault(eip + sizeof(instr) - rc, 0); return EXCRET_fault_fixed; } @@ -1063,6 +1076,10 @@ void propagate_page_fault(unsigned long struct vcpu *v = current; struct trap_bounce *tb = &v->arch.pv_vcpu.trap_bounce; + /* PVH should not get here. ctrlreg is not implemented amongst other + * things */ + NO_PVH_ASSERT_VCPU(v); + v->arch.pv_vcpu.ctrlreg[2] = addr; arch_set_cr2(v, addr); @@ -1448,6 +1465,9 @@ static int read_descriptor(unsigned int { struct desc_struct desc; + if ( is_pvh_vcpu(v) ) + return hvm_pvh_read_descriptor(sel, v, regs, base, limit, ar); + if ( !vm86_mode(regs) ) { if ( sel < 4) @@ -1566,6 +1586,10 @@ static int guest_io_okay( int user_mode = !(v->arch.flags & TF_kernel_mode); #define TOGGLE_MODE() if ( user_mode ) toggle_guest_mode(v) + /* for PVH we check this in vmexit for EXIT_REASON_IO_INSTRUCTION */ + if (is_pvh_vcpu(v)) + return 1; + if ( !vm86_mode(regs) && (v->arch.pv_vcpu.iopl >= (guest_kernel_mode(v, regs) ? 1 : 3)) ) return 1; @@ -1811,8 +1835,9 @@ static inline uint64_t guest_misc_enable _ptr = (unsigned int)_ptr; \ if ( (limit) < sizeof(_x) - 1 || (eip) > (limit) - (sizeof(_x) - 1) ) \ goto fail; \ - if ( (_rc = copy_from_user(&_x, (type *)_ptr, sizeof(_x))) != 0 ) \ + if ( (_rc = raw_copy_from_guest(&_x, (type *)_ptr, sizeof(_x))) != 0 ) \ { \ + /* PVH: fixme: probably return -EFAULT ??? */ \ propagate_page_fault(_ptr + sizeof(_x) - _rc, 0); \ goto skip; \ } \ @@ -2132,7 +2157,8 @@ int emulate_privileged_op(struct cpu_use case 0xfa: /* CLI */ case 0xfb: /* STI */ - if ( v->arch.pv_vcpu.iopl < (guest_kernel_mode(v, regs) ? 1 : 3) ) + if ( !is_pvh_vcpu(v) && + v->arch.pv_vcpu.iopl < (guest_kernel_mode(v, regs) ? 1 : 3) ) goto fail; /* * This is just too dangerous to allow, in my opinion. Consider if the @@ -3224,6 +3250,9 @@ void do_device_not_available(struct cpu_ BUG_ON(!guest_mode(regs)); + /* PVH should not get here. ctrlreg is not implemented */ + NO_PVH_ASSERT_VCPU(curr); + vcpu_restore_fpu_lazy(curr); if ( curr->arch.pv_vcpu.ctrlreg[0] & X86_CR0_TS ) diff -r 33fc5356ad7c -r 31a145002453 xen/arch/x86/x86_64/traps.c --- a/xen/arch/x86/x86_64/traps.c Fri Jan 11 16:35:48 2013 -0800 +++ b/xen/arch/x86/x86_64/traps.c Fri Jan 11 16:38:07 2013 -0800 @@ -152,7 +152,7 @@ void vcpu_show_registers(const struct vc unsigned long crs[8]; /* No need to handle HVM for now. */ - if ( is_hvm_vcpu(v) ) + if ( is_hvm_or_pvh_vcpu(v) ) return; crs[0] = v->arch.pv_vcpu.ctrlreg[0]; @@ -444,6 +444,8 @@ static long register_guest_callback(stru long ret = 0; struct vcpu *v = current; + NO_PVH_ASSERT_VCPU(v); + if ( !is_canonical_address(reg->address) ) return -EINVAL; @@ -624,7 +626,7 @@ static void hypercall_page_initialise_ri void hypercall_page_initialise(struct domain *d, void *hypercall_page) { memset(hypercall_page, 0xCC, PAGE_SIZE); - if ( is_hvm_domain(d) ) + if ( is_hvm_or_pvh_domain(d) ) hvm_hypercall_page_initialise(d, hypercall_page); else if ( !is_pv_32bit_domain(d) ) hypercall_page_initialise_ring3_kernel(hypercall_page); diff -r 33fc5356ad7c -r 31a145002453 xen/common/domain.c --- a/xen/common/domain.c Fri Jan 11 16:35:48 2013 -0800 +++ b/xen/common/domain.c Fri Jan 11 16:38:07 2013 -0800 @@ -232,6 +232,14 @@ struct domain *domain_create( if ( domcr_flags & DOMCRF_hvm ) d->is_hvm = 1; + else if ( domcr_flags & DOMCRF_pvh ) { + d->is_pvh = 1; + if ( !(domcr_flags & DOMCRF_hap) ) { + printk("PVH guest must have HAP on\n"); + goto fail; + } else + printk("Yay... PVH guest. domid:%d\n", domid); + } if ( domid == 0 ) { diff -r 33fc5356ad7c -r 31a145002453 xen/common/domctl.c --- a/xen/common/domctl.c Fri Jan 11 16:35:48 2013 -0800 +++ b/xen/common/domctl.c Fri Jan 11 16:38:07 2013 -0800 @@ -149,6 +149,8 @@ void getdomaininfo(struct domain *d, str if ( is_hvm_domain(d) ) info->flags |= XEN_DOMINF_hvm_guest; + else if ( is_pvh_domain(d) ) + info->flags |= XEN_DOMINF_pvh_guest; xsm_security_domaininfo(d, info); @@ -449,6 +451,8 @@ long do_domctl(XEN_GUEST_HANDLE_PARAM(xe domcr_flags = 0; if ( op->u.createdomain.flags & XEN_DOMCTL_CDF_hvm_guest ) domcr_flags |= DOMCRF_hvm; + else if ( op->u.createdomain.flags & XEN_DOMCTL_CDF_hap ) + domcr_flags |= DOMCRF_pvh; /* PV with HAP is a PVH guest */ if ( op->u.createdomain.flags & XEN_DOMCTL_CDF_hap ) domcr_flags |= DOMCRF_hap; if ( op->u.createdomain.flags & XEN_DOMCTL_CDF_s3_integrity ) diff -r 33fc5356ad7c -r 31a145002453 xen/common/kernel.c --- a/xen/common/kernel.c Fri Jan 11 16:35:48 2013 -0800 +++ b/xen/common/kernel.c Fri Jan 11 16:38:07 2013 -0800 @@ -289,7 +289,11 @@ DO(xen_version)(int cmd, XEN_GUEST_HANDL if ( current->domain == dom0 ) fi.submap |= 1U << XENFEAT_dom0; #ifdef CONFIG_X86 - if ( !is_hvm_vcpu(current) ) + if ( is_pvh_vcpu(current) ) + fi.submap |= (1U << XENFEAT_hvm_safe_pvclock) | + (1U << XENFEAT_supervisor_mode_kernel) | + (1U << XENFEAT_hvm_callback_vector); + else if ( !is_hvm_vcpu(current) ) fi.submap |= (1U << XENFEAT_mmu_pt_update_preserve_ad) | (1U << XENFEAT_highmem_assist) | (1U << XENFEAT_gnttab_map_avail_bits);
Jan Beulich
2013-Jan-14 12:20 UTC
Re: [RFC PATCH 12/16]: PVH xen: return PVH features during creation, etc...
>>> On 12.01.13 at 03:07, Mukesh Rathor <mukesh.rathor@oracle.com> wrote: > @@ -910,6 +915,10 @@ int emulate_forced_invalid_op(struct cpu > /* Check for forced emulation signature: ud2 ; .ascii "xen". */ > if ( (rc = copy_from_user(sig, (char *)eip, sizeof(sig))) != 0 ) > { > + /* PVH: fixme: hmm... what do we do for PVH? */ > + if ( is_pvh_vcpu(current) )The fixme and check ought to sit earlier - the copy_from_user() above isn''t valid there. And I don''t see how you would validly get here anyway - you don''t need to intercept GP faults to emulate guest CPUID invocations.> @@ -1566,6 +1586,10 @@ static int guest_io_okay( > int user_mode = !(v->arch.flags & TF_kernel_mode); > #define TOGGLE_MODE() if ( user_mode ) toggle_guest_mode(v) > > + /* for PVH we check this in vmexit for EXIT_REASON_IO_INSTRUCTION */ > + if (is_pvh_vcpu(v))The why would it get here at all?> @@ -1811,8 +1835,9 @@ static inline uint64_t guest_misc_enable > _ptr = (unsigned int)_ptr; \ > if ( (limit) < sizeof(_x) - 1 || (eip) > (limit) - (sizeof(_x) - 1) ) \ > goto fail; \ > - if ( (_rc = copy_from_user(&_x, (type *)_ptr, sizeof(_x))) != 0 ) \ > + if ( (_rc = raw_copy_from_guest(&_x, (type *)_ptr, sizeof(_x))) != 0 ) \So here you realized the need to change the call.> { \ > + /* PVH: fixme: probably return -EFAULT ??? */ \ > propagate_page_fault(_ptr + sizeof(_x) - _rc, 0); \I don''t think so - propagate_page_fault() should do the right thing in that case, if you can validly get here for a PVH guest.> @@ -2132,7 +2157,8 @@ int emulate_privileged_op(struct cpu_use > > case 0xfa: /* CLI */ > case 0xfb: /* STI */ > - if ( v->arch.pv_vcpu.iopl < (guest_kernel_mode(v, regs) ? 1 : 3) ) > + if ( !is_pvh_vcpu(v) &&This ought to be impossible.> @@ -444,6 +444,8 @@ static long register_guest_callback(stru > long ret = 0; > struct vcpu *v = current; > > + NO_PVH_ASSERT_VCPU(v);Either the code is unreachable for a PVH guest (in which case the assert is likely superfluous, or you need to return an error here rather than asserting. Jan
Mukesh Rathor
2013-Jan-17 23:36 UTC
Re: [RFC PATCH 12/16]: PVH xen: return PVH features during creation, etc...
On Mon, 14 Jan 2013 12:20:12 +0000 "Jan Beulich" <JBeulich@suse.com> wrote:> >>> On 12.01.13 at 03:07, Mukesh Rathor <mukesh.rathor@oracle.com> > >>> wrote: > > @@ -910,6 +915,10 @@ int emulate_forced_invalid_op(struct cpu > > /* Check for forced emulation signature: ud2 ; .ascii "xen". */ > > if ( (rc = copy_from_user(sig, (char *)eip, sizeof(sig))) !> > 0 ) { > > + /* PVH: fixme: hmm... what do we do for PVH? */ > > + if ( is_pvh_vcpu(current) ) > > The fixme and check ought to sit earlier - the copy_from_user() > above isn''t valid there. And I don''t see how you would validly > get here anyway - you don''t need to intercept GP faults to > emulate guest CPUID invocations.Yup, I need raw_copy like later. I guess I went back and forth between supporting XEN_EMULATE_PREFIX or not, since a cpuid can be trapped via vmexit. But we need to support it from user apps, so I need to fix this to raw_copy.> I don''t think so - propagate_page_fault() should do the right thing > in that case, if you can validly get here for a PVH guest.Agree, I need to make propgate_page_fault() inject PF into the PVH guest. Working on it now.> > @@ -1566,6 +1586,10 @@ static int guest_io_okay( > > int user_mode = !(v->arch.flags & TF_kernel_mode); > > #define TOGGLE_MODE() if ( user_mode ) toggle_guest_mode(v) > > > > + /* for PVH we check this in vmexit for > > EXIT_REASON_IO_INSTRUCTION */ > > + if (is_pvh_vcpu(v)) > > The why would it get here at all?From, emulate_privileged_op(). I should change the comment to say we don''t need to check again, as we check at vmexit. We won''t get to emulate_privileged_op() if check fails. Easier to add that in guest_io_okay() than to change every place in emulate_privileged_op() where guest_io_okay() is called and not call it for PVH.> > @@ -2132,7 +2157,8 @@ int emulate_privileged_op(struct cpu_use > > > > case 0xfa: /* CLI */ > > case 0xfb: /* STI */ > > - if ( v->arch.pv_vcpu.iopl < (guest_kernel_mode(v, regs) ? > > 1 : 3) ) > > + if ( !is_pvh_vcpu(v) && > > This ought to be impossible.You mean call to emulate STI/CLI for PVH. Correct. I could just remove it. I went thru looking for places that were using pv_vcpu.iopl.> > @@ -444,6 +444,8 @@ static long register_guest_callback(stru > > long ret = 0; > > struct vcpu *v = current; > > > > + NO_PVH_ASSERT_VCPU(v); > > Either the code is unreachable for a PVH guest (in which case the > assert is likely superfluous, or you need to return an error here > rather than asserting.superfluous, hence it''s a debug assert to catch any places I might have missed. I plan to remove them later when PVH is stable. Hope it can stay for a little bit :). thanks, Mukesh
Mukesh Rathor
2013-Jan-18 02:29 UTC
Re: [RFC PATCH 12/16]: PVH xen: return PVH features during creation, etc...
On Thu, 17 Jan 2013 15:36:17 -0800 Mukesh Rathor <mukesh.rathor@oracle.com> wrote:> On Mon, 14 Jan 2013 12:20:12 +0000 > "Jan Beulich" <JBeulich@suse.com> wrote: > Agree, I need to make propgate_page_fault() inject PF into the PVH > guest. Working on it now.Done. No other callers of prop_page_fault for PVH. So are you OK with something like this: static noinline int vmxit_invalid_op(struct cpu_user_regs *regs) { ulong addr=0; if ( guest_kernel_mode(current, regs) || (addr = emulate_forced_invalid_op(regs)) == 0 ) { hvm_inject_hw_exception(TRAP_invalid_op, HVM_DELIVER_NO_ERROR_CODE); return 0; } if (addr != EXCRET_fault_fixed) hvm_inject_page_fault(0, addr); return 0; } unsigned long emulate_forced_invalid_op(struct cpu_user_regs *regs) { char sig[5], instr[2]; unsigned long eip, rc, addr; eip = regs->eip; /* Check for forced emulation signature: ud2 ; .ascii "xen". */ if ( (rc = raw_copy_from_guest(sig, (char *)eip, sizeof(sig))) ! 0 ) { addr = eip + sizeof(sig) - rc; if ( is_pvh_vcpu(current) ) return addr; propagate_page_fault(addr, 0); return EXCRET_fault_fixed; } if ( memcmp(sig, "\xf\xbxen", sizeof(sig)) ) return 0; eip += sizeof(sig); /* We only emulate CPUID. */ if ( ( rc = raw_copy_from_guest(instr, (char *)eip, sizeof(instr))) != 0 ) { addr = eip + sizeof(instr) - rc; if ( is_pvh_vcpu(current) ) return addr; propagate_page_fault(addr, 0); return EXCRET_fault_fixed; } if ( memcmp(instr, "\xf\xa2", sizeof(instr)) ) return 0; eip += sizeof(instr); pv_cpuid(regs); if ( is_pvh_vcpu(current) ) regs->eip = eip; else instruction_done(regs, eip, 0); trace_trap_one_addr(TRC_PV_FORCED_INVALID_OP, regs->eip); return EXCRET_fault_fixed; Thanks, Mukesh
Jan Beulich
2013-Jan-18 09:23 UTC
Re: [RFC PATCH 12/16]: PVH xen: return PVH features during creation, etc...
>>> On 18.01.13 at 03:29, Mukesh Rathor <mukesh.rathor@oracle.com> wrote: > Done. No other callers of prop_page_fault for PVH. So are you OK with > something like this: > > static noinline int vmxit_invalid_op(struct cpu_user_regs *regs) > { > ulong addr=0; > > if ( guest_kernel_mode(current, regs) || > (addr = emulate_forced_invalid_op(regs)) == 0 )Actually, on a second thought that depends on whether you want to be able to build kernels that can run both PV and PVH. If so, you may need to emulate this even for the guest kernel.> { > hvm_inject_hw_exception(TRAP_invalid_op, > HVM_DELIVER_NO_ERROR_CODE); return 0; > } > > if (addr != EXCRET_fault_fixed) > hvm_inject_page_fault(0, addr); > return 0; > } > > > unsigned long emulate_forced_invalid_op(struct cpu_user_regs *regs) > { > char sig[5], instr[2]; > unsigned long eip, rc, addr; > > eip = regs->eip; > > /* Check for forced emulation signature: ud2 ; .ascii "xen". */ > if ( (rc = raw_copy_from_guest(sig, (char *)eip, sizeof(sig))) !> 0 ) { > addr = eip + sizeof(sig) - rc; > if ( is_pvh_vcpu(current) ) > return addr; > > propagate_page_fault(addr, 0); > return EXCRET_fault_fixed; > } > if ( memcmp(sig, "\xf\xbxen", sizeof(sig)) ) > return 0; > eip += sizeof(sig); > > /* We only emulate CPUID. */ > if ( ( rc = raw_copy_from_guest(instr, (char *)eip, > sizeof(instr))) != 0 ) { > addr = eip + sizeof(instr) - rc; > if ( is_pvh_vcpu(current) ) > return addr; > > propagate_page_fault(addr, 0); > return EXCRET_fault_fixed; > } > if ( memcmp(instr, "\xf\xa2", sizeof(instr)) ) > return 0; > eip += sizeof(instr); > > pv_cpuid(regs); >Looks okay up to here at a first glance.> if ( is_pvh_vcpu(current) ) > regs->eip = eip; > else > instruction_done(regs, eip, 0);Why can''t you use instruction_done() (or make it fit your needs, so that other code wanting to use it wouldn''t need similar special casing)? Jan> trace_trap_one_addr(TRC_PV_FORCED_INVALID_OP, regs->eip); > > return EXCRET_fault_fixed;
Mukesh Rathor
2013-Jan-18 20:41 UTC
Re: [RFC PATCH 12/16]: PVH xen: return PVH features during creation, etc...
On Fri, 18 Jan 2013 09:23:01 +0000 "Jan Beulich" <JBeulich@suse.com> wrote:> >>> On 18.01.13 at 03:29, Mukesh Rathor <mukesh.rathor@oracle.com> > >>> wrote: > > Done. No other callers of prop_page_fault for PVH. So are you OK > > with something like this: > > > > static noinline int vmxit_invalid_op(struct cpu_user_regs *regs) > > { > > ulong addr=0; > > > > if ( guest_kernel_mode(current, regs) || > > (addr = emulate_forced_invalid_op(regs)) == 0 ) > > Actually, on a second thought that depends on whether you want > to be able to build kernels that can run both PV and PVH. If so, > you may need to emulate this even for the guest kernel.Actually, I changed linux code so that PVH paths will only go thru native_cpuid(). So we can leave this as is to discourage future XEN_EMULATE_PREFIX. Sound good? In PV mode it will not be running in HVM container, hence not go thru this path.> > if ( is_pvh_vcpu(current) ) > > regs->eip = eip; > > else > > instruction_done(regs, eip, 0); > > Why can''t you use instruction_done() (or make it fit your needs, > so that other code wanting to use it wouldn''t need similar special > casing)?Sure, NP. Done. Thanks, Mukesh
Tim Deegan
2013-Jan-24 16:57 UTC
Re: [RFC PATCH 12/16]: PVH xen: return PVH features during creation, etc...
At 18:07 -0800 on 11 Jan (1357927656), Mukesh Rathor wrote:> Debug.c: we don''t need to lock the page, as no path it''s called from > requires that.I''m afraid it does need to lock it: it''s going to write to the page, so it can''t risk having the page freed underfoot. Tim.
Mukesh Rathor
2013-Jan-25 02:08 UTC
Re: [RFC PATCH 12/16]: PVH xen: return PVH features during creation, etc...
On Thu, 24 Jan 2013 16:57:19 +0000 Tim Deegan <tim@xen.org> wrote:> At 18:07 -0800 on 11 Jan (1357927656), Mukesh Rathor wrote: > > Debug.c: we don''t need to lock the page, as no path it''s called from > > requires that. > > I''m afraid it does need to lock it: it''s going to write to the page, > so it can''t risk having the page freed underfoot.Ok, will do it thanks.