Razvan Cojocaru
2013-Jan-14 14:32 UTC
[PATCH] mem_event: Allow emulating an instruction that caused a page fault
This patch makes it possible to emulate an instruction that triggered a page fault (received via the mem_event API). This is done by setting the MEM_EVENT_FLAG_EMULATE in mem_event_response_t.flags. The purpose of this is to be able to receive several distinct page fault mem_events for the same address, and choose which ones are allowed to go through from dom0 userspace. Signed-off-by: Razvan Cojocaru <rzvncj@gmail.com> diff -r 35a0556a7f76 -r a22fe4e2bc32 xen/arch/x86/hvm/hvm.c --- a/xen/arch/x86/hvm/hvm.c Thu Jan 10 17:32:10 2013 +0000 +++ b/xen/arch/x86/hvm/hvm.c Mon Jan 14 16:31:29 2013 +0200 @@ -1293,7 +1293,8 @@ int hvm_hap_nested_page_fault(paddr_t gp unsigned long gla, bool_t access_r, bool_t access_w, - bool_t access_x) + bool_t access_x, + struct cpu_user_regs *regs) { unsigned long gfn = gpa >> PAGE_SHIFT; p2m_type_t p2mt; @@ -1386,7 +1387,7 @@ int hvm_hap_nested_page_fault(paddr_t gp if ( violation ) { if ( p2m_mem_access_check(gpa, gla_valid, gla, access_r, - access_w, access_x, &req_ptr) ) + access_w, access_x, &req_ptr, regs) ) { fall_through = 1; } else { diff -r 35a0556a7f76 -r a22fe4e2bc32 xen/arch/x86/hvm/svm/svm.c --- a/xen/arch/x86/hvm/svm/svm.c Thu Jan 10 17:32:10 2013 +0000 +++ b/xen/arch/x86/hvm/svm/svm.c Mon Jan 14 16:31:29 2013 +0200 @@ -1292,7 +1292,8 @@ static void svm_do_nested_pgfault(struct ret = hvm_hap_nested_page_fault(gpa, 0, ~0ul, 1, /* All NPFs count as reads */ npfec & PFEC_write_access, - npfec & PFEC_insn_fetch); + npfec & PFEC_insn_fetch, + regs); if ( tb_init_done ) { diff -r 35a0556a7f76 -r a22fe4e2bc32 xen/arch/x86/hvm/vmx/vmx.c --- a/xen/arch/x86/hvm/vmx/vmx.c Thu Jan 10 17:32:10 2013 +0000 +++ b/xen/arch/x86/hvm/vmx/vmx.c Mon Jan 14 16:31:29 2013 +0200 @@ -2042,7 +2042,7 @@ static void vmx_wbinvd_intercept(void) wbinvd(); } -static void ept_handle_violation(unsigned long qualification, paddr_t gpa) +static void ept_handle_violation(unsigned long qualification, paddr_t gpa, struct cpu_user_regs *regs) { unsigned long gla, gfn = gpa >> PAGE_SHIFT; mfn_t mfn; @@ -2071,7 +2071,8 @@ static void ept_handle_violation(unsigne ? __vmread(GUEST_LINEAR_ADDRESS) : ~0ull, qualification & EPT_READ_VIOLATION ? 1 : 0, qualification & EPT_WRITE_VIOLATION ? 1 : 0, - qualification & EPT_EXEC_VIOLATION ? 1 : 0) ) + qualification & EPT_EXEC_VIOLATION ? 1 : 0, + regs) ) return; /* Everything else is an error. */ @@ -2670,7 +2671,7 @@ void vmx_vmexit_handler(struct cpu_user_ { paddr_t gpa = __vmread(GUEST_PHYSICAL_ADDRESS); exit_qualification = __vmread(EXIT_QUALIFICATION); - ept_handle_violation(exit_qualification, gpa); + ept_handle_violation(exit_qualification, gpa, regs); break; } diff -r 35a0556a7f76 -r a22fe4e2bc32 xen/arch/x86/mm/p2m.c --- a/xen/arch/x86/mm/p2m.c Thu Jan 10 17:32:10 2013 +0000 +++ b/xen/arch/x86/mm/p2m.c Mon Jan 14 16:31:29 2013 +0200 @@ -1205,7 +1205,7 @@ void p2m_mem_paging_resume(struct domain bool_t p2m_mem_access_check(paddr_t gpa, bool_t gla_valid, unsigned long gla, bool_t access_r, bool_t access_w, bool_t access_x, - mem_event_request_t **req_ptr) + mem_event_request_t **req_ptr, struct cpu_user_regs *regs) { struct vcpu *v = current; unsigned long gfn = gpa >> PAGE_SHIFT; @@ -1258,6 +1258,17 @@ bool_t p2m_mem_access_check(paddr_t gpa, } } + if ( v->arch.hvm_vmx.mem_event_emulate ) + { + struct hvm_emulate_ctxt ctx[1] = {}; + + v->arch.hvm_vmx.mem_event_emulate = 0; + hvm_emulate_prepare(ctx, regs); + hvm_emulate_one(ctx); + + return 1; + } + *req_ptr = NULL; req = xzalloc(mem_event_request_t); if ( req ) @@ -1296,8 +1307,15 @@ void p2m_mem_access_resume(struct domain /* Pull all responses off the ring */ while( mem_event_get_response(d, &d->mem_event->access, &rsp) ) { + d->vcpu[rsp.vcpu_id]->arch.hvm_vmx.mem_event_emulate = 0; + if ( rsp.flags & MEM_EVENT_FLAG_DUMMY ) continue; + + /* Mark vcpu for skipping one instruction upon rescheduling */ + if ( rsp.flags & MEM_EVENT_FLAG_EMULATE ) + d->vcpu[rsp.vcpu_id]->arch.hvm_vmx.mem_event_emulate = 1; + /* Unpause domain */ if ( rsp.flags & MEM_EVENT_FLAG_VCPU_PAUSED ) vcpu_unpause(d->vcpu[rsp.vcpu_id]); diff -r 35a0556a7f76 -r a22fe4e2bc32 xen/arch/x86/x86_64/asm-offsets.c --- a/xen/arch/x86/x86_64/asm-offsets.c Thu Jan 10 17:32:10 2013 +0000 +++ b/xen/arch/x86/x86_64/asm-offsets.c Mon Jan 14 16:31:29 2013 +0200 @@ -113,6 +113,7 @@ void __dummy__(void) OFFSET(VCPU_vmx_emulate, struct vcpu, arch.hvm_vmx.vmx_emulate); OFFSET(VCPU_vm86_seg_mask, struct vcpu, arch.hvm_vmx.vm86_segment_mask); OFFSET(VCPU_hvm_guest_cr2, struct vcpu, arch.hvm_vcpu.guest_cr[2]); + OFFSET(VCPU_mem_event_emulate, struct vcpu, arch.hvm_vmx.mem_event_emulate); BLANK(); OFFSET(VCPU_nhvm_guestmode, struct vcpu, arch.hvm_vcpu.nvcpu.nv_guestmode); diff -r 35a0556a7f76 -r a22fe4e2bc32 xen/include/asm-x86/hvm/hvm.h --- a/xen/include/asm-x86/hvm/hvm.h Thu Jan 10 17:32:10 2013 +0000 +++ b/xen/include/asm-x86/hvm/hvm.h Mon Jan 14 16:31:29 2013 +0200 @@ -433,7 +433,8 @@ int hvm_hap_nested_page_fault(paddr_t gp bool_t gla_valid, unsigned long gla, bool_t access_r, bool_t access_w, - bool_t access_x); + bool_t access_x, + struct cpu_user_regs *regs); #define hvm_msr_tsc_aux(v) ({ \ struct domain *__d = (v)->domain; \ diff -r 35a0556a7f76 -r a22fe4e2bc32 xen/include/asm-x86/hvm/vmx/vmcs.h --- a/xen/include/asm-x86/hvm/vmx/vmcs.h Thu Jan 10 17:32:10 2013 +0000 +++ b/xen/include/asm-x86/hvm/vmx/vmcs.h Mon Jan 14 16:31:29 2013 +0200 @@ -124,6 +124,8 @@ struct arch_vmx_struct { /* Remember EFLAGS while in virtual 8086 mode */ uint32_t vm86_saved_eflags; int hostenv_migrated; + /* Should we emulate the first instruction on VCPU resume after a mem_event? */ + uint8_t mem_event_emulate; }; int vmx_create_vmcs(struct vcpu *v); diff -r 35a0556a7f76 -r a22fe4e2bc32 xen/include/asm-x86/p2m.h --- a/xen/include/asm-x86/p2m.h Thu Jan 10 17:32:10 2013 +0000 +++ b/xen/include/asm-x86/p2m.h Mon Jan 14 16:31:29 2013 +0200 @@ -566,7 +566,7 @@ void p2m_mem_paging_resume(struct domain * locks -- caller must also xfree the request. */ bool_t p2m_mem_access_check(paddr_t gpa, bool_t gla_valid, unsigned long gla, bool_t access_r, bool_t access_w, bool_t access_x, - mem_event_request_t **req_ptr); + mem_event_request_t **req_ptr, struct cpu_user_regs *regs); /* Resumes the running of the VCPU, restarting the last instruction */ void p2m_mem_access_resume(struct domain *d); diff -r 35a0556a7f76 -r a22fe4e2bc32 xen/include/public/mem_event.h --- a/xen/include/public/mem_event.h Thu Jan 10 17:32:10 2013 +0000 +++ b/xen/include/public/mem_event.h Mon Jan 14 16:31:29 2013 +0200 @@ -36,6 +36,7 @@ #define MEM_EVENT_FLAG_EVICT_FAIL (1 << 2) #define MEM_EVENT_FLAG_FOREIGN (1 << 3) #define MEM_EVENT_FLAG_DUMMY (1 << 4) +#define MEM_EVENT_FLAG_EMULATE (1 << 5) /* Reasons for the memory event request */ #define MEM_EVENT_REASON_UNKNOWN 0 /* typical reason */
Tim Deegan
2013-Jan-17 12:16 UTC
Re: [PATCH] mem_event: Allow emulating an instruction that caused a page fault
Hi, At 16:32 +0200 on 14 Jan (1358181126), Razvan Cojocaru wrote:> This patch makes it possible to emulate an instruction that triggered > a page fault (received via the mem_event API). This is done by setting > the MEM_EVENT_FLAG_EMULATE in mem_event_response_t.flags. The purpose > of this is to be able to receive several distinct page fault mem_events > for the same address, and choose which ones are allowed to go through > from dom0 userspace.I think there ought to be some other control to this: what if a single instruction accesses multiple pages, each of which would cause an access fault? You only get a notification of the first one, so short of emulating the instruction yourself in userspace I don''t know how you can decide that it''s safe. I''ve a few comments on implementation below:> diff -r 35a0556a7f76 -r a22fe4e2bc32 xen/arch/x86/mm/p2m.c > --- a/xen/arch/x86/mm/p2m.c Thu Jan 10 17:32:10 2013 +0000 > +++ b/xen/arch/x86/mm/p2m.c Mon Jan 14 16:31:29 2013 +0200 > @@ -1205,7 +1205,7 @@ void p2m_mem_paging_resume(struct domain > > bool_t p2m_mem_access_check(paddr_t gpa, bool_t gla_valid, unsigned long gla, > bool_t access_r, bool_t access_w, bool_t access_x, > - mem_event_request_t **req_ptr) > + mem_event_request_t **req_ptr, struct cpu_user_regs *regs) > { > struct vcpu *v = current; > unsigned long gfn = gpa >> PAGE_SHIFT; > @@ -1258,6 +1258,17 @@ bool_t p2m_mem_access_check(paddr_t gpa, > } > } > > + if ( v->arch.hvm_vmx.mem_event_emulate ) > + { > + struct hvm_emulate_ctxt ctx[1] = {}; > + > + v->arch.hvm_vmx.mem_event_emulate = 0; > + hvm_emulate_prepare(ctx, regs);This function always operates on the currently scheduled vcpu, so you don''t need to pass a cpu-user-regs struct all the way down the stack -- you can just use guest_cpu_user_regs() here.> + hvm_emulate_one(ctx); > + > + return 1; > + } > + > *req_ptr = NULL; > req = xzalloc(mem_event_request_t); > if ( req )5A> diff -r 35a0556a7f76 -r a22fe4e2bc32 xen/arch/x86/x86_64/asm-offsets.c> --- a/xen/arch/x86/x86_64/asm-offsets.c Thu Jan 10 17:32:10 2013 +0000 > +++ b/xen/arch/x86/x86_64/asm-offsets.c Mon Jan 14 16:31:29 2013 +0200 > @@ -113,6 +113,7 @@ void __dummy__(void) > OFFSET(VCPU_vmx_emulate, struct vcpu, arch.hvm_vmx.vmx_emulate); > OFFSET(VCPU_vm86_seg_mask, struct vcpu, arch.hvm_vmx.vm86_segment_mask); > OFFSET(VCPU_hvm_guest_cr2, struct vcpu, arch.hvm_vcpu.guest_cr[2]); > + OFFSET(VCPU_mem_event_emulate, struct vcpu, arch.hvm_vmx.mem_event_emulate);I don''t think this is necessary: you only need to add a field to thuiis file if you''ll be using it from assembly code.> BLANK(); > > OFFSET(VCPU_nhvm_guestmode, struct vcpu, arch.hvm_vcpu.nvcpu.nv_guestmode); > diff -r 35a0556a7f76 -r a22fe4e2bc32 xen/include/public/mem_event.h > --- a/xen/include/public/mem_event.h Thu Jan 10 17:32:10 2013 +0000 > +++ b/xen/include/public/mem_event.h Mon Jan 14 16:31:29 2013 +0200 > @@ -36,6 +36,7 @@ > #define MEM_EVENT_FLAG_EVICT_FAIL (1 << 2) > #define MEM_EVENT_FLAG_FOREIGN (1 << 3) > #define MEM_EVENT_FLAG_DUMMY (1 << 4) > +#define MEM_EVENT_FLAG_EMULATE (1 << 5)Please add a comment saying what this flag does. I know the rest of this code is poorly commented, but let''s try to make thing better as we go. :) Tim.
Razvan Cojocaru
2013-Jan-17 13:16 UTC
Re: [PATCH] mem_event: Allow emulating an instruction that caused a page fault
> Hi,Hello Tim, thank you for taking the time to review this.> I think there ought to be some other control to this: what if a single > instruction accesses multiple pages, each of which would cause an access > fault? You only get a notification of the first one, so short of > emulating the instruction yourself in userspace I don''t know how you can > decide that it''s safe.You''re right, but I can''t see how this case could be handled at all without lifting the restrictions, one page at a time. And that''s precisely what this patch aims to make unnecessary. I can''t see a way around it (not while the emulation support is limited to hvm_emulate_one() and hvm_emulate_one_nowrite()).> This function always operates on the currently scheduled vcpu, so you > don''t need to pass a cpu-user-regs struct all the way down the stack -- > you can just use guest_cpu_user_regs() here.Of course. Thank you.> I don''t think this is necessary: you only need to add a field to thuiis > file if you''ll be using it from assembly code.I thought I''d be polite and add it anyway, in case somebody will want to use it later. Was that pollution? I''ll remove it.>> +#define MEM_EVENT_FLAG_EMULATE (1 << 5) > > Please add a comment saying what this flag does. I know the rest of > this code is poorly commented, but let''s try to make thing better as we > go. :)I will. Thank you, Razvan Cojocaru