Jiang, Yunhong
2009-May-31 11:12 UTC
[Xen-devel] [PATCH 6/6] Change MMU_PT_UPDATE_RESERVE_AD to support update page table for foreign domain
Currently MMU_PT_UPDATE_RESERVE_AD support only update page table for current domain. This patch add support for foreign domain also. Signed-off-by: Jiang, Yunhong <yunhong.jiang@intel.com> Update the page table update hypercall diff -r f6457425560b xen/arch/x86/mm.c --- a/xen/arch/x86/mm.c Wed May 27 03:22:43 2009 +0800 +++ b/xen/arch/x86/mm.c Wed May 27 03:40:38 2009 +0800 @@ -110,6 +110,7 @@ #include <asm/hypercall.h> #include <asm/shared.h> #include <public/memory.h> +#include <public/sched.h> #include <xsm/xsm.h> #include <xen/trace.h> @@ -2990,7 +2991,8 @@ int do_mmu_update( struct page_info *page; int rc = 0, okay = 1, i = 0; unsigned int cmd, done = 0; - struct domain *d = current->domain; + struct domain *d = current->domain, *pt_owner = NULL; + struct vcpu *v = current; struct domain_mmap_cache mapcache; if ( unlikely(count & MMU_UPDATE_PREEMPTED) ) @@ -3051,10 +3053,35 @@ int do_mmu_update( gmfn = req.ptr >> PAGE_SHIFT; mfn = gmfn_to_mfn(d, gmfn); - if ( unlikely(!get_page_from_pagenr(mfn, d)) ) + if (!mfn_valid(mfn)) + mfn = gmfn_to_mfn(FOREIGNDOM, gmfn); + if (!mfn_valid(mfn)) { MEM_LOG("Could not get page for normal update"); break; + } + + pt_owner = page_get_owner_and_reference(mfn_to_page(mfn)); + + if ( pt_owner != d ) + { + if ( pt_owner == FOREIGNDOM ) + { + spin_lock(&FOREIGNDOM->shutdown_lock); + if ( !IS_PRIV(d) || + !FOREIGNDOM->is_shut_down || + (FOREIGNDOM->shutdown_code != SHUTDOWN_suspend) ) + { + spin_unlock(&FOREIGNDOM->shutdown_lock); + rc = -EPERM; + break; + } + v = FOREIGNDOM->vcpu[0]; + }else + { + rc = -EPERM; + break; + } } va = map_domain_page_with_cache(mfn, &mapcache); @@ -3070,24 +3097,21 @@ int do_mmu_update( { l1_pgentry_t l1e = l1e_from_intpte(req.val); okay = mod_l1_entry(va, l1e, mfn, - cmd == MMU_PT_UPDATE_PRESERVE_AD, - current); + cmd == MMU_PT_UPDATE_PRESERVE_AD, v); } break; case PGT_l2_page_table: { l2_pgentry_t l2e = l2e_from_intpte(req.val); okay = mod_l2_entry(va, l2e, mfn, - cmd == MMU_PT_UPDATE_PRESERVE_AD, - current); + cmd == MMU_PT_UPDATE_PRESERVE_AD, v); } break; case PGT_l3_page_table: { l3_pgentry_t l3e = l3e_from_intpte(req.val); rc = mod_l3_entry(va, l3e, mfn, - cmd == MMU_PT_UPDATE_PRESERVE_AD, 1, - current); + cmd == MMU_PT_UPDATE_PRESERVE_AD, 1, v); okay = !rc; } break; @@ -3096,8 +3120,7 @@ int do_mmu_update( { l4_pgentry_t l4e = l4e_from_intpte(req.val); rc = mod_l4_entry(va, l4e, mfn, - cmd == MMU_PT_UPDATE_PRESERVE_AD, 1, - current); + cmd == MMU_PT_UPDATE_PRESERVE_AD, 1, v); okay = !rc; } break; @@ -3105,7 +3128,7 @@ int do_mmu_update( case PGT_writable_page: perfc_incr(writable_mmu_updates); okay = paging_write_guest_entry( - current, va, req.val, _mfn(mfn)); + v, va, req.val, _mfn(mfn)); break; } page_unlock(page); @@ -3116,11 +3139,13 @@ int do_mmu_update( { perfc_incr(writable_mmu_updates); okay = paging_write_guest_entry( - current, va, req.val, _mfn(mfn)); + v, va, req.val, _mfn(mfn)); put_page_type(page); } unmap_domain_page_with_cache(va, &mapcache); + if (pt_owner != d) + spin_unlock(&FOREIGNDOM->shutdown_lock); put_page(page); break; diff -r f6457425560b xen/include/public/xen.h --- a/xen/include/public/xen.h Wed May 27 03:22:43 2009 +0800 +++ b/xen/include/public/xen.h Wed May 27 03:26:46 2009 +0800 @@ -170,6 +170,8 @@ DEFINE_XEN_GUEST_HANDLE(xen_pfn_t); * table entry is valid/present, the mapped frame must belong to the FD, if * an FD has been specified. If attempting to map an I/O page then the * caller assumes the privilege of the FD. + * The page table entry normally belongs to the calling domain. Otherwise it + * should belong to the FD and the FD should be suspended already * FD == DOMID_IO: Permit /only/ I/O mappings, at the priv level of the caller. * FD == DOMID_XEN: Map restricted areas of Xen''s heap space. * ptr[:2] -- Machine address of the page-table entry to modify. _______________________________________________ Xen-devel mailing list Xen-devel@lists.xensource.com http://lists.xensource.com/xen-devel
Jiang, Yunhong
2009-Jun-01 06:24 UTC
RE: [Xen-devel] [PATCH 6/6] Change MMU_PT_UPDATE_RESERVE_AD to support update page table for foreign domain
I want to clarify that this patch need more discussion because there is no clear way to distinguish if the page table address (i.e. address passed in req[:2] ) is owned by the current domain or by the foreign domain. So I just try to check if mfn_valid(), but I suspect if this is the right method. Thanks Yunhong Jiang xen-devel-bounces@lists.xensource.com wrote:> foreign domain > > Currently MMU_PT_UPDATE_RESERVE_AD support only update page > table for current domain. This patch add support for foreign > domain also. > > Signed-off-by: Jiang, Yunhong <yunhong.jiang@intel.com> Update > the page table update hypercall > > diff -r f6457425560b xen/arch/x86/mm.c > --- a/xen/arch/x86/mm.c Wed May 27 03:22:43 2009 +0800 > +++ b/xen/arch/x86/mm.c Wed May 27 03:40:38 2009 +0800 @@ -110,6 > +110,7 @@ #include <asm/hypercall.h> > #include <asm/shared.h> > #include <public/memory.h> > +#include <public/sched.h> > #include <xsm/xsm.h> > #include <xen/trace.h> > > @@ -2990,7 +2991,8 @@ int do_mmu_update( > struct page_info *page; > int rc = 0, okay = 1, i = 0; > unsigned int cmd, done = 0; > - struct domain *d = current->domain; > + struct domain *d = current->domain, *pt_owner = NULL; > + struct vcpu *v = current; > struct domain_mmap_cache mapcache; > > if ( unlikely(count & MMU_UPDATE_PREEMPTED) ) > @@ -3051,10 +3053,35 @@ int do_mmu_update( > gmfn = req.ptr >> PAGE_SHIFT; > mfn = gmfn_to_mfn(d, gmfn); > > - if ( unlikely(!get_page_from_pagenr(mfn, d)) ) > + if (!mfn_valid(mfn)) > + mfn = gmfn_to_mfn(FOREIGNDOM, gmfn); > + if (!mfn_valid(mfn)) > { > MEM_LOG("Could not get page for normal update"); > break; + } > + > + pt_owner > page_get_owner_and_reference(mfn_to_page(mfn)); + + if ( > pt_owner != d ) + { > + if ( pt_owner == FOREIGNDOM ) > + { > + spin_lock(&FOREIGNDOM->shutdown_lock); > + if ( !IS_PRIV(d) || > + !FOREIGNDOM->is_shut_down || > + (FOREIGNDOM->shutdown_code !> SHUTDOWN_suspend) ) + { > + spin_unlock(&FOREIGNDOM->shutdown_lock); > + rc = -EPERM; > + break; > + } > + v = FOREIGNDOM->vcpu[0]; > + }else > + { > + rc = -EPERM; > + break; > + } > } > > va = map_domain_page_with_cache(mfn, &mapcache); > @@ -3070,24 +3097,21 @@ int do_mmu_update( > { > l1_pgentry_t l1e = l1e_from_intpte(req.val); > okay = mod_l1_entry(va, l1e, mfn, > - cmd => MMU_PT_UPDATE_PRESERVE_AD, > - current); > + cmd => MMU_PT_UPDATE_PRESERVE_AD, v); > } > break; > case PGT_l2_page_table: > { > l2_pgentry_t l2e = l2e_from_intpte(req.val); > okay = mod_l2_entry(va, l2e, mfn, > - cmd => MMU_PT_UPDATE_PRESERVE_AD, > - current); > + cmd => MMU_PT_UPDATE_PRESERVE_AD, v); > } > break; > case PGT_l3_page_table: > { > l3_pgentry_t l3e = l3e_from_intpte(req.val); > rc = mod_l3_entry(va, l3e, mfn, > - cmd => MMU_PT_UPDATE_PRESERVE_AD, 1, > - current); > + cmd => MMU_PT_UPDATE_PRESERVE_AD, 1, v); > okay = !rc; > } > break; > @@ -3096,8 +3120,7 @@ int do_mmu_update( > { > l4_pgentry_t l4e = l4e_from_intpte(req.val); > rc = mod_l4_entry(va, l4e, mfn, > - cmd => MMU_PT_UPDATE_PRESERVE_AD, 1, > - current); > + cmd => MMU_PT_UPDATE_PRESERVE_AD, 1, v); > okay = !rc; > } > break; > @@ -3105,7 +3128,7 @@ int do_mmu_update( > case PGT_writable_page: > perfc_incr(writable_mmu_updates); > okay = paging_write_guest_entry( > - current, va, req.val, _mfn(mfn)); > + v, va, req.val, _mfn(mfn)); > break; } > page_unlock(page); > @@ -3116,11 +3139,13 @@ int do_mmu_update( > { > perfc_incr(writable_mmu_updates); > okay = paging_write_guest_entry( > - current, va, req.val, _mfn(mfn)); > + v, va, req.val, _mfn(mfn)); > put_page_type(page); > } > > unmap_domain_page_with_cache(va, &mapcache); > + if (pt_owner != d) > + spin_unlock(&FOREIGNDOM->shutdown_lock); > put_page(page); break; > > diff -r f6457425560b xen/include/public/xen.h > --- a/xen/include/public/xen.h Wed May 27 03:22:43 2009 +0800 > +++ b/xen/include/public/xen.h Wed May 27 03:26:46 2009 +0800 > @@ -170,6 +170,8 @@ DEFINE_XEN_GUEST_HANDLE(xen_pfn_t); > * table entry is valid/present, the mapped frame must belong > to the FD, if > * an FD has been specified. If attempting to map an I/O page then the > * caller assumes the privilege of the FD. > + * The page table entry normally belongs to the calling > domain. Otherwise it > + * should belong to the FD and the FD should be suspended already > * FD == DOMID_IO: Permit /only/ I/O mappings, at the priv > level of the caller. > * FD == DOMID_XEN: Map restricted areas of Xen''s heap space. > * ptr[:2] -- Machine address of the page-table entry to modify._______________________________________________ Xen-devel mailing list Xen-devel@lists.xensource.com http://lists.xensource.com/xen-devel
Keir Fraser
2009-Jun-01 07:53 UTC
Re: [Xen-devel] [PATCH 6/6] Change MMU_PT_UPDATE_RESERVE_AD to support update page table for foreign domain
I''d pack an extra domid into the top 16 bits of the foreigndom parameter to mmu_update(). Bottom 16 bits remain foreign owner of data page. Upper 16 bits, if non-zero, are foreign owner of pt page (we could make this field +1 so that dom0 can be encoded as well). -- Keir On 01/06/2009 07:24, "Jiang, Yunhong" <yunhong.jiang@intel.com> wrote:> I want to clarify that this patch need more discussion because there is no > clear way to distinguish if the page table address (i.e. address passed in > req[:2] ) is owned by the current domain or by the foreign domain. So I just > try to check if mfn_valid(), but I suspect if this is the right method. > > Thanks > Yunhong Jiang > > xen-devel-bounces@lists.xensource.com wrote: >> foreign domain >> >> Currently MMU_PT_UPDATE_RESERVE_AD support only update page >> table for current domain. This patch add support for foreign >> domain also. >> >> Signed-off-by: Jiang, Yunhong <yunhong.jiang@intel.com> Update >> the page table update hypercall >> >> diff -r f6457425560b xen/arch/x86/mm.c >> --- a/xen/arch/x86/mm.c Wed May 27 03:22:43 2009 +0800 >> +++ b/xen/arch/x86/mm.c Wed May 27 03:40:38 2009 +0800 @@ -110,6 >> +110,7 @@ #include <asm/hypercall.h> >> #include <asm/shared.h> >> #include <public/memory.h> >> +#include <public/sched.h> >> #include <xsm/xsm.h> >> #include <xen/trace.h> >> >> @@ -2990,7 +2991,8 @@ int do_mmu_update( >> struct page_info *page; >> int rc = 0, okay = 1, i = 0; >> unsigned int cmd, done = 0; >> - struct domain *d = current->domain; >> + struct domain *d = current->domain, *pt_owner = NULL; >> + struct vcpu *v = current; >> struct domain_mmap_cache mapcache; >> >> if ( unlikely(count & MMU_UPDATE_PREEMPTED) ) >> @@ -3051,10 +3053,35 @@ int do_mmu_update( >> gmfn = req.ptr >> PAGE_SHIFT; >> mfn = gmfn_to_mfn(d, gmfn); >> >> - if ( unlikely(!get_page_from_pagenr(mfn, d)) ) >> + if (!mfn_valid(mfn)) >> + mfn = gmfn_to_mfn(FOREIGNDOM, gmfn); >> + if (!mfn_valid(mfn)) >> { >> MEM_LOG("Could not get page for normal update"); >> break; + } >> + >> + pt_owner >> page_get_owner_and_reference(mfn_to_page(mfn)); + + if ( >> pt_owner != d ) + { >> + if ( pt_owner == FOREIGNDOM ) >> + { >> + spin_lock(&FOREIGNDOM->shutdown_lock); >> + if ( !IS_PRIV(d) || >> + !FOREIGNDOM->is_shut_down || >> + (FOREIGNDOM->shutdown_code !>> SHUTDOWN_suspend) ) + { >> + spin_unlock(&FOREIGNDOM->shutdown_lock); >> + rc = -EPERM; >> + break; >> + } >> + v = FOREIGNDOM->vcpu[0]; >> + }else >> + { >> + rc = -EPERM; >> + break; >> + } >> } >> >> va = map_domain_page_with_cache(mfn, &mapcache); >> @@ -3070,24 +3097,21 @@ int do_mmu_update( >> { >> l1_pgentry_t l1e = l1e_from_intpte(req.val); >> okay = mod_l1_entry(va, l1e, mfn, >> - cmd =>> MMU_PT_UPDATE_PRESERVE_AD, >> - current); >> + cmd =>> MMU_PT_UPDATE_PRESERVE_AD, v); >> } >> break; >> case PGT_l2_page_table: >> { >> l2_pgentry_t l2e = l2e_from_intpte(req.val); >> okay = mod_l2_entry(va, l2e, mfn, >> - cmd =>> MMU_PT_UPDATE_PRESERVE_AD, >> - current); >> + cmd =>> MMU_PT_UPDATE_PRESERVE_AD, v); >> } >> break; >> case PGT_l3_page_table: >> { >> l3_pgentry_t l3e = l3e_from_intpte(req.val); >> rc = mod_l3_entry(va, l3e, mfn, >> - cmd =>> MMU_PT_UPDATE_PRESERVE_AD, 1, >> - current); >> + cmd =>> MMU_PT_UPDATE_PRESERVE_AD, 1, v); >> okay = !rc; >> } >> break; >> @@ -3096,8 +3120,7 @@ int do_mmu_update( >> { >> l4_pgentry_t l4e = l4e_from_intpte(req.val); >> rc = mod_l4_entry(va, l4e, mfn, >> - cmd =>> MMU_PT_UPDATE_PRESERVE_AD, 1, >> - current); >> + cmd =>> MMU_PT_UPDATE_PRESERVE_AD, 1, v); >> okay = !rc; >> } >> break; >> @@ -3105,7 +3128,7 @@ int do_mmu_update( >> case PGT_writable_page: >> perfc_incr(writable_mmu_updates); >> okay = paging_write_guest_entry( >> - current, va, req.val, _mfn(mfn)); >> + v, va, req.val, _mfn(mfn)); >> break; } >> page_unlock(page); >> @@ -3116,11 +3139,13 @@ int do_mmu_update( >> { >> perfc_incr(writable_mmu_updates); >> okay = paging_write_guest_entry( >> - current, va, req.val, _mfn(mfn)); >> + v, va, req.val, _mfn(mfn)); >> put_page_type(page); >> } >> >> unmap_domain_page_with_cache(va, &mapcache); >> + if (pt_owner != d) >> + spin_unlock(&FOREIGNDOM->shutdown_lock); >> put_page(page); break; >> >> diff -r f6457425560b xen/include/public/xen.h >> --- a/xen/include/public/xen.h Wed May 27 03:22:43 2009 +0800 >> +++ b/xen/include/public/xen.h Wed May 27 03:26:46 2009 +0800 >> @@ -170,6 +170,8 @@ DEFINE_XEN_GUEST_HANDLE(xen_pfn_t); >> * table entry is valid/present, the mapped frame must belong >> to the FD, if >> * an FD has been specified. If attempting to map an I/O page then the >> * caller assumes the privilege of the FD. >> + * The page table entry normally belongs to the calling >> domain. Otherwise it >> + * should belong to the FD and the FD should be suspended already >> * FD == DOMID_IO: Permit /only/ I/O mappings, at the priv >> level of the caller. >> * FD == DOMID_XEN: Map restricted areas of Xen''s heap space. >> * ptr[:2] -- Machine address of the page-table entry to modify._______________________________________________ Xen-devel mailing list Xen-devel@lists.xensource.com http://lists.xensource.com/xen-devel
Jiang, Yunhong
2009-Jun-01 08:40 UTC
RE: [Xen-devel] [PATCH 6/6] Change MMU_PT_UPDATE_RESERVE_AD to support update page table for foreign domain
Thanks for suggestion. I''m always nervous on API changes. I''m still considering if any other potential usage mode for patch 5/6l (i.e. change page table or exchange memory for other domain),, but frustratedly realize no other requirement. Thanks -- jyh xen-devel-bounces@lists.xensource.com wrote:> I''d pack an extra domid into the top 16 bits of the foreigndom > parameter to mmu_update(). Bottom 16 bits remain foreign owner of data > page. Upper 16 > bits, if non-zero, are foreign owner of pt page (we could make > this field +1 > so that dom0 can be encoded as well). > > -- Keir > > On 01/06/2009 07:24, "Jiang, Yunhong" <yunhong.jiang@intel.com> wrote: > >> I want to clarify that this patch need more discussion because there >> is no clear way to distinguish if the page table address (i.e. >> address passed in req[:2] ) is owned by the current domain or by the >> foreign domain. So I just try to check if mfn_valid(), but I suspect >> if this is the right method. >> >> Thanks >> Yunhong Jiang >> >> xen-devel-bounces@lists.xensource.com wrote: >>> foreign domain >>> >>> Currently MMU_PT_UPDATE_RESERVE_AD support only update page >>> table for current domain. This patch add support for foreign domain >>> also. >>> >>> Signed-off-by: Jiang, Yunhong <yunhong.jiang@intel.com> Update >>> the page table update hypercall >>> >>> diff -r f6457425560b xen/arch/x86/mm.c >>> --- a/xen/arch/x86/mm.c Wed May 27 03:22:43 2009 +0800 >>> +++ b/xen/arch/x86/mm.c Wed May 27 03:40:38 2009 +0800 @@ -110,6 >>> +110,7 @@ #include <asm/hypercall.h> >>> #include <asm/shared.h> >>> #include <public/memory.h> >>> +#include <public/sched.h> >>> #include <xsm/xsm.h> >>> #include <xen/trace.h> >>> >>> @@ -2990,7 +2991,8 @@ int do_mmu_update( >>> struct page_info *page; >>> int rc = 0, okay = 1, i = 0; >>> unsigned int cmd, done = 0; >>> - struct domain *d = current->domain; >>> + struct domain *d = current->domain, *pt_owner = NULL; >>> + struct vcpu *v = current; >>> struct domain_mmap_cache mapcache; >>> >>> if ( unlikely(count & MMU_UPDATE_PREEMPTED) ) >>> @@ -3051,10 +3053,35 @@ int do_mmu_update( >>> gmfn = req.ptr >> PAGE_SHIFT; >>> mfn = gmfn_to_mfn(d, gmfn); >>> >>> - if ( unlikely(!get_page_from_pagenr(mfn, d)) ) >>> + if (!mfn_valid(mfn)) >>> + mfn = gmfn_to_mfn(FOREIGNDOM, gmfn); >>> + if (!mfn_valid(mfn)) >>> { >>> MEM_LOG("Could not get page for normal update"); >>> break; + } + >>> + pt_owner >>> page_get_owner_and_reference(mfn_to_page(mfn)); + + if ( >>> pt_owner != d ) + { >>> + if ( pt_owner == FOREIGNDOM ) >>> + { >>> + spin_lock(&FOREIGNDOM->shutdown_lock); >>> + if ( !IS_PRIV(d) || >>> + !FOREIGNDOM->is_shut_down || >>> + (FOREIGNDOM->shutdown_code !>>> SHUTDOWN_suspend) ) + { >>> + spin_unlock(&FOREIGNDOM->shutdown_lock); >>> + rc = -EPERM; >>> + break; >>> + } >>> + v = FOREIGNDOM->vcpu[0]; >>> + }else >>> + { >>> + rc = -EPERM; >>> + break; >>> + } >>> } >>> >>> va = map_domain_page_with_cache(mfn, &mapcache); >>> @@ -3070,24 +3097,21 @@ int do_mmu_update( >>> { >>> l1_pgentry_t l1e = l1e_from_intpte(req.val); >>> okay = mod_l1_entry(va, l1e, mfn, >>> - cmd =>>> MMU_PT_UPDATE_PRESERVE_AD, >>> - current); >>> + cmd =>>> MMU_PT_UPDATE_PRESERVE_AD, v); >>> } >>> break; >>> case PGT_l2_page_table: >>> { >>> l2_pgentry_t l2e = l2e_from_intpte(req.val); >>> okay = mod_l2_entry(va, l2e, mfn, >>> - cmd =>>> MMU_PT_UPDATE_PRESERVE_AD, >>> - current); >>> + cmd =>>> MMU_PT_UPDATE_PRESERVE_AD, v); >>> } >>> break; >>> case PGT_l3_page_table: >>> { >>> l3_pgentry_t l3e = l3e_from_intpte(req.val); >>> rc = mod_l3_entry(va, l3e, mfn, >>> - cmd =>>> MMU_PT_UPDATE_PRESERVE_AD, 1, >>> - current); >>> + cmd =>>> MMU_PT_UPDATE_PRESERVE_AD, 1, v); >>> okay = !rc; >>> } >>> break; >>> @@ -3096,8 +3120,7 @@ int do_mmu_update( >>> { >>> l4_pgentry_t l4e = l4e_from_intpte(req.val); >>> rc = mod_l4_entry(va, l4e, mfn, >>> - cmd =>>> MMU_PT_UPDATE_PRESERVE_AD, 1, >>> - current); >>> + cmd =>>> MMU_PT_UPDATE_PRESERVE_AD, 1, v); >>> okay = !rc; >>> } >>> break; >>> @@ -3105,7 +3128,7 @@ int do_mmu_update( >>> case PGT_writable_page: >>> perfc_incr(writable_mmu_updates); >>> okay = paging_write_guest_entry( >>> - current, va, req.val, _mfn(mfn)); >>> + v, va, req.val, _mfn(mfn)); >>> break; } page_unlock(page); >>> @@ -3116,11 +3139,13 @@ int do_mmu_update( >>> { >>> perfc_incr(writable_mmu_updates); >>> okay = paging_write_guest_entry( >>> - current, va, req.val, _mfn(mfn)); >>> + v, va, req.val, _mfn(mfn)); >>> put_page_type(page); >>> } >>> >>> unmap_domain_page_with_cache(va, &mapcache); >>> + if (pt_owner != d) >>> + spin_unlock(&FOREIGNDOM->shutdown_lock); >>> put_page(page); break; >>> >>> diff -r f6457425560b xen/include/public/xen.h >>> --- a/xen/include/public/xen.h Wed May 27 03:22:43 2009 +0800 >>> +++ b/xen/include/public/xen.h Wed May 27 03:26:46 2009 +0800 >>> @@ -170,6 +170,8 @@ DEFINE_XEN_GUEST_HANDLE(xen_pfn_t); >>> * table entry is valid/present, the mapped frame must belong >>> to the FD, if >>> * an FD has been specified. If attempting to map an I/O page then >>> the >>> * caller assumes the privilege of the FD. >>> + * The page table entry normally belongs to the calling >>> domain. Otherwise it >>> + * should belong to the FD and the FD should be suspended already >>> * FD == DOMID_IO: Permit /only/ I/O mappings, at the priv >>> level of the caller. >>> * FD == DOMID_XEN: Map restricted areas of Xen''s heap space. >>> * ptr[:2] -- Machine address of the page-table entry to modify. > > > > _______________________________________________ > Xen-devel mailing list > Xen-devel@lists.xensource.com > http://lists.xensource.com/xen-devel_______________________________________________ Xen-devel mailing list Xen-devel@lists.xensource.com http://lists.xensource.com/xen-devel
Keir Fraser
2009-Jun-01 09:20 UTC
Re: [Xen-devel] [PATCH 6/6] Change MMU_PT_UPDATE_RESERVE_AD to support update page table for foreign domain
On 01/06/2009 09:40, "Jiang, Yunhong" <yunhong.jiang@intel.com> wrote:> Thanks for suggestion. I''m always nervous on API changes. > > I''m still considering if any other potential usage mode for patch 5/6l (i.e. > change page table or exchange memory for other domain),, but frustratedly > realize no other requirement.The API for XENMEM_exchange already permits specifying a foreign domain, so you''re just filling in the missing implementation. By the way I did not check your implementation, but the major subtlety is that you must take care for domains which are dying (d->is_dying). Giving new pages to such a domain is generally a bug because you race domain_relinquish_resources() which is walking the domain''s page lists and freeing the pages. You therefore risk leaving a zombie domain which will not die (refcount never zero). See for example how that is handled with page_alloc.c for calls such as XENMEM_populate_physmap, and how it is handled specially by MMUEXT_PIN_L*_TABLE. -- Keir _______________________________________________ Xen-devel mailing list Xen-devel@lists.xensource.com http://lists.xensource.com/xen-devel
Jiang, Yunhong
2009-Jun-01 09:35 UTC
RE: [Xen-devel] [PATCH 6/6] Change MMU_PT_UPDATE_RESERVE_AD to support update page table for foreign domain
Keir Fraser wrote:> On 01/06/2009 09:40, "Jiang, Yunhong" <yunhong.jiang@intel.com> wrote: > >> Thanks for suggestion. I''m always nervous on API changes. >> >> I''m still considering if any other potential usage mode for patch >> 5/6l (i.e. change page table or exchange memory for other domain),, >> but frustratedly realize no other requirement. > > The API for XENMEM_exchange already permits specifying a > foreign domain, so > you''re just filling in the missing implementation. By the way I did > not check your implementation, but the major subtlety is that you > must take care > for domains which are dying (d->is_dying). Giving new pages to > such a domain > is generally a bug because you race > domain_relinquish_resources() which is > walking the domain''s page lists and freeing the pages. You > therefore risk > leaving a zombie domain which will not die (refcount never zero). > > See for example how that is handled with page_alloc.c for calls such > as XENMEM_populate_physmap, and how it is handled specially by > MMUEXT_PIN_L*_TABLE. > > -- KeirI think I missed this logic, I just make sure the domain is suspended, but seems be wrong because a suspended domain can still be killed and relinquish resources. So I will add this logic and resend the patch. But I''m not sure if we need make sure the guest is suspended/paused during these functions, as exchange a page or pte in flight may cause trouble. Or we can leave it to user space tools, since this situation will not damage xen HV itself. Thanks Yunhong Jiang _______________________________________________ Xen-devel mailing list Xen-devel@lists.xensource.com http://lists.xensource.com/xen-devel
Keir Fraser
2009-Jun-01 09:45 UTC
Re: [Xen-devel] [PATCH 6/6] Change MMU_PT_UPDATE_RESERVE_AD to support update page table for foreign domain
On 01/06/2009 10:35, "Jiang, Yunhong" <yunhong.jiang@intel.com> wrote:> I think I missed this logic, I just make sure the domain is suspended, but > seems be wrong because a suspended domain can still be killed and relinquish > resources. So I will add this logic and resend the patch. > > But I''m not sure if we need make sure the guest is suspended/paused during > these functions, as exchange a page or pte in flight may cause trouble. Or we > can leave it to user space tools, since this situation will not damage xen HV > itself.The HV should not bother to do the suspend check. It is a higher-level problem which should be checked by the tools. -- Keir _______________________________________________ Xen-devel mailing list Xen-devel@lists.xensource.com http://lists.xensource.com/xen-devel
Jiang, Yunhong
2009-Jun-30 07:55 UTC
[PATCH] Enhance memory exchange to support foreign domain -- Was RE: [Xen-devel] [PATCH 6/6] Change MMU_PT_UPDATE_RESERVE_AD to support update page table for foreign domain
Keir, this is the patch based on the discussion before. Can you please have a look on it? I can''t triger the corner case of domain being dying, so I hope it can be achieved through code review. Thanks Yunhong Jiang Extend memory_exchange to support exchange memory for foreign domain. When domain is killed during the memory exchange, it will try to decrease domain''s page count that has been removed from page_list. Signed-off-by: Jiang, Yunhong <yunhong.jiang@intel.com> diff -r 02003bee3e80 xen/common/memory.c --- a/xen/common/memory.c Thu Jun 25 18:31:10 2009 +0100 +++ b/xen/common/memory.c Tue Jun 30 01:44:28 2009 +0800 @@ -265,16 +265,22 @@ static long memory_exchange(XEN_GUEST_HA out_chunk_order = exch.in.extent_order - exch.out.extent_order; } - /* - * Only support exchange on calling domain right now. Otherwise there are - * tricky corner cases to consider (e.g., dying domain). - */ - if ( unlikely(exch.in.domid != DOMID_SELF) ) - { - rc = IS_PRIV(current->domain) ? -EINVAL : -EPERM; - goto fail_early; - } - d = current->domain; + if ( likely(exch.in.domid == DOMID_SELF) ) + { + d = rcu_lock_current_domain(); + } + else + { + if ( (d = rcu_lock_domain_by_id(exch.in.domid)) == NULL ) + goto fail_early; + + if ( !IS_PRIV_FOR(current->domain, d) ) + { + rcu_unlock_domain(d); + rc = -EPERM; + goto fail_early; + } + } memflags |= MEMF_bits(domain_clamp_alloc_bitsize( d, @@ -292,11 +298,15 @@ static long memory_exchange(XEN_GUEST_HA if ( hypercall_preempt_check() ) { exch.nr_exchanged = i << in_chunk_order; + rcu_unlock_domain(d); if ( copy_field_to_guest(arg, &exch, nr_exchanged) ) return -EFAULT; return hypercall_create_continuation( __HYPERVISOR_memory_op, "lh", XENMEM_exchange, arg); } + + if (d->is_dying) + goto dying; /* Steal a chunk''s worth of input pages from the domain. */ for ( j = 0; j < (1UL << in_chunk_order); j++ ) @@ -360,9 +370,24 @@ static long memory_exchange(XEN_GUEST_HA j = 0; while ( (page = page_list_remove_head(&out_chunk_list)) ) { - if ( assign_pages(d, page, exch.out.extent_order, - MEMF_no_refcount) ) + rc = (assign_pages(d, page, exch.out.extent_order, + MEMF_no_refcount)); + + if (rc == -EINVAL) BUG(); + /* Domain is being destroy */ + else if (rc == -ENODEV) + { + int dec_count; + dec_count = ( ( 1UL << exch.in.extent_order)* + (1UL << in_chunk_order) - + j * (1UL << exch.out.extent_order)); + d->tot_pages -= dec_count; + + if (dec_count && !d->tot_pages) + put_domain(d); + break; + } /* Note that we ignore errors accessing the output extent list. */ (void)__copy_from_guest_offset( @@ -378,15 +403,15 @@ static long memory_exchange(XEN_GUEST_HA (void)__copy_to_guest_offset( exch.out.extent_start, (i<<out_chunk_order)+j, &mfn, 1); } - j++; } - BUG_ON(j != (1UL << out_chunk_order)); + BUG_ON( !(d->is_dying) && (j != (1UL << out_chunk_order)) ); } exch.nr_exchanged = exch.in.nr_extents; if ( copy_field_to_guest(arg, &exch, nr_exchanged) ) rc = -EFAULT; + rcu_unlock_domain(d); return rc; /* @@ -398,7 +423,8 @@ static long memory_exchange(XEN_GUEST_HA while ( (page = page_list_remove_head(&in_chunk_list)) ) if ( assign_pages(d, page, 0, MEMF_no_refcount) ) BUG(); - + dying: + rcu_unlock_domain(d); /* Free any output pages we managed to allocate. */ while ( (page = page_list_remove_head(&out_chunk_list)) ) free_domheap_pages(page, exch.out.extent_order); diff -r 02003bee3e80 xen/common/page_alloc.c --- a/xen/common/page_alloc.c Thu Jun 25 18:31:10 2009 +0100 +++ b/xen/common/page_alloc.c Sun Jun 28 23:39:28 2009 +0800 @@ -1120,6 +1120,7 @@ int assign_pages( unsigned int memflags) { unsigned long i; + int rc = -EINVAL; spin_lock(&d->page_alloc_lock); @@ -1127,6 +1128,7 @@ int assign_pages( { gdprintk(XENLOG_INFO, "Cannot assign page to domain%d -- dying.\n", d->domain_id); + rc = -ENODEV; goto fail; } @@ -1136,6 +1138,7 @@ int assign_pages( { gdprintk(XENLOG_INFO, "Over-allocation for domain %u: %u > %u\n", d->domain_id, d->tot_pages + (1 << order), d->max_pages); + rc = -EINVAL; goto fail; } @@ -1160,7 +1163,7 @@ int assign_pages( fail: spin_unlock(&d->page_alloc_lock); - return -1; + return rc; } Keir Fraser wrote:> On 01/06/2009 09:40, "Jiang, Yunhong" <yunhong.jiang@intel.com> wrote: > >> Thanks for suggestion. I''m always nervous on API changes. >> >> I''m still considering if any other potential usage mode for patch >> 5/6l (i.e. change page table or exchange memory for other domain),, >> but frustratedly realize no other requirement. > > The API for XENMEM_exchange already permits specifying a > foreign domain, so > you''re just filling in the missing implementation. By the way I did > not check your implementation, but the major subtlety is that you > must take care > for domains which are dying (d->is_dying). Giving new pages to > such a domain > is generally a bug because you race > domain_relinquish_resources() which is > walking the domain''s page lists and freeing the pages. You > therefore risk > leaving a zombie domain which will not die (refcount never zero). > > See for example how that is handled with page_alloc.c for calls such > as XENMEM_populate_physmap, and how it is handled specially by > MMUEXT_PIN_L*_TABLE. > > -- Keir_______________________________________________ Xen-devel mailing list Xen-devel@lists.xensource.com http://lists.xensource.com/xen-devel
Jiang, Yunhong
2009-Jun-30 07:58 UTC
RE: [Xen-devel] [PATCH 6/6] Change MMU_PT_UPDATE_RESERVE_AD to support update page table for foreign domain
Keir, this is the updated version, please have a look on it. Thanks Yunhong Jiang Currently MMU_PT_UPDATE_RESERVE_AD support only update page table for current domain. This patch add support for foreign domain also. Signed-off-by: Jiang, Yunhong <yunhong.jiang@intel.com> diff -r 9a74617c4a28 xen/arch/x86/mm.c --- a/xen/arch/x86/mm.c Tue Jun 30 01:44:30 2009 +0800 +++ b/xen/arch/x86/mm.c Tue Jun 30 01:51:15 2009 +0800 @@ -110,6 +110,7 @@ #include <asm/hypercall.h> #include <asm/shared.h> #include <public/memory.h> +#include <public/sched.h> #include <xsm/xsm.h> #include <xen/trace.h> @@ -2999,8 +3000,9 @@ int do_mmu_update( unsigned long gpfn, gmfn, mfn; struct page_info *page; int rc = 0, okay = 1, i = 0; - unsigned int cmd, done = 0; - struct domain *d = current->domain; + unsigned int cmd, done = 0, pt_dom; + struct domain *d = current->domain, *pt_owner = NULL; + struct vcpu *v = current; struct domain_mmap_cache mapcache; if ( unlikely(count & MMU_UPDATE_PREEMPTED) ) @@ -3018,7 +3020,28 @@ int do_mmu_update( goto out; } - if ( !set_foreigndom(foreigndom) ) + pt_dom = foreigndom >> 16; + if (pt_dom) + { + /* the page table belongs to foreign domain */ + pt_owner = rcu_lock_domain_by_id(pt_dom - 1); + if (!pt_owner ) + { + rc = -EINVAL; + goto out; + } + if( !IS_PRIV_FOR(d, pt_owner) ) + { + rc = -ESRCH; + goto out; + } + if (pt_dom == d) + rcu_unlock_domain(pt_owner); + v = pt_owner->vcpu[0]; + } else + pt_owner = d; + + if ( !set_foreigndom(foreigndom & 0xFFFFU) ) { rc = -ESRCH; goto out; @@ -3059,9 +3082,9 @@ int do_mmu_update( req.ptr -= cmd; gmfn = req.ptr >> PAGE_SHIFT; - mfn = gmfn_to_mfn(d, gmfn); - - if ( unlikely(!get_page_from_pagenr(mfn, d)) ) + mfn = gmfn_to_mfn(pt_owner, gmfn); + + if ( unlikely(!get_page_from_pagenr(mfn, pt_owner)) ) { MEM_LOG("Could not get page for normal update"); break; @@ -3080,24 +3103,21 @@ int do_mmu_update( { l1_pgentry_t l1e = l1e_from_intpte(req.val); okay = mod_l1_entry(va, l1e, mfn, - cmd == MMU_PT_UPDATE_PRESERVE_AD, - current); + cmd == MMU_PT_UPDATE_PRESERVE_AD, v); } break; case PGT_l2_page_table: { l2_pgentry_t l2e = l2e_from_intpte(req.val); okay = mod_l2_entry(va, l2e, mfn, - cmd == MMU_PT_UPDATE_PRESERVE_AD, - current); + cmd == MMU_PT_UPDATE_PRESERVE_AD, v); } break; case PGT_l3_page_table: { l3_pgentry_t l3e = l3e_from_intpte(req.val); rc = mod_l3_entry(va, l3e, mfn, - cmd == MMU_PT_UPDATE_PRESERVE_AD, 1, - current); + cmd == MMU_PT_UPDATE_PRESERVE_AD, 1, v); okay = !rc; } break; @@ -3106,8 +3126,7 @@ int do_mmu_update( { l4_pgentry_t l4e = l4e_from_intpte(req.val); rc = mod_l4_entry(va, l4e, mfn, - cmd == MMU_PT_UPDATE_PRESERVE_AD, 1, - current); + cmd == MMU_PT_UPDATE_PRESERVE_AD, 1, v); okay = !rc; } break; @@ -3115,7 +3134,7 @@ int do_mmu_update( case PGT_writable_page: perfc_incr(writable_mmu_updates); okay = paging_write_guest_entry( - current, va, req.val, _mfn(mfn)); + v, va, req.val, _mfn(mfn)); break; } page_unlock(page); @@ -3126,7 +3145,7 @@ int do_mmu_update( { perfc_incr(writable_mmu_updates); okay = paging_write_guest_entry( - current, va, req.val, _mfn(mfn)); + v, va, req.val, _mfn(mfn)); put_page_type(page); } @@ -3191,6 +3210,9 @@ int do_mmu_update( perfc_add(num_page_updates, i); out: + if (pt_owner && (pt_owner != d)) + rcu_unlock_domain(pt_owner); + /* Add incremental work we have done to the @done output parameter. */ if ( unlikely(!guest_handle_is_null(pdone)) ) { diff -r 9a74617c4a28 xen/include/public/xen.h --- a/xen/include/public/xen.h Tue Jun 30 01:44:30 2009 +0800 +++ b/xen/include/public/xen.h Tue Jun 30 01:44:33 2009 +0800 @@ -166,10 +166,14 @@ DEFINE_XEN_GUEST_HANDLE(xen_pfn_t); * ptr[1:0] specifies the appropriate MMU_* command. * * ptr[1:0] == MMU_NORMAL_PT_UPDATE: - * Updates an entry in a page table. If updating an L1 table, and the new + * Updates an entry in a page table. + * The page table can be owned by current domain, or a foreign domain. If + * the page table belongs to a foreign domain, it should be specified in + * upper 16 bits in FD + * If updating an L1 table, and the new * table entry is valid/present, the mapped frame must belong to the FD, if - * an FD has been specified. If attempting to map an I/O page then the - * caller assumes the privilege of the FD. + * an foreign domain specified in lower 16 btis in FD. If attempting to map + * an I/O page then the caller assumes the privilege of the FD. * FD == DOMID_IO: Permit /only/ I/O mappings, at the priv level of the caller. * FD == DOMID_XEN: Map restricted areas of Xen''s heap space. * ptr[:2] -- Machine address of the page-table entry to modify. Keir Fraser wrote:> On 01/06/2009 10:35, "Jiang, Yunhong" <yunhong.jiang@intel.com> wrote: > >> I think I missed this logic, I just make sure the domain is >> suspended, but seems be wrong because a suspended domain can still >> be killed and relinquish resources. So I will add this logic and >> resend the patch. >> >> But I''m not sure if we need make sure the guest is suspended/paused >> during these functions, as exchange a page or pte in flight may >> cause trouble. Or we can leave it to user space tools, since this >> situation will not damage xen HV itself. > > The HV should not bother to do the suspend check. It is a higher-level > problem which should be checked by the tools. > > -- Keir_______________________________________________ Xen-devel mailing list Xen-devel@lists.xensource.com http://lists.xensource.com/xen-devel
Keir Fraser
2009-Jun-30 09:17 UTC
Re: [PATCH] Enhance memory exchange to support foreign domain -- Was RE: [Xen-devel] [PATCH 6/6] Change MMU_PT_UPDATE_RESERVE_AD to support update page table for foreign domain
Perhaps try dropping calls to domain_kill() into memory_exchange()? It may not complete the destroy, but it will set ->is_dying and trigger your error paths. Then you can ''xm destroy'' from the tools afterwards to try to finish up the destroy, and see if you get left with a zombie or not. Looking at this patch I can immediately see that: A. Your extra check(s) of is_dying are pointless. Only the check in assign_pages() is critical. So just leave it to that. B. Your adjustment of tot_pages is confusing. I can''t convince myself the arithmetic is correct. Furthermore messing with tot_pages outside of the d->page_alloc_lock is not allowed. -- Keir On 30/06/2009 08:55, "Jiang, Yunhong" <yunhong.jiang@intel.com> wrote:> Keir, this is the patch based on the discussion before. Can you please have a > look on it? I can''t triger the corner case of domain being dying, so I hope it > can be achieved through code review. > > Thanks > Yunhong Jiang > > Extend memory_exchange to support exchange memory for foreign domain. > When domain is killed during the memory exchange, it will try to decrease > domain''s page count that has been removed from page_list. > > Signed-off-by: Jiang, Yunhong <yunhong.jiang@intel.com> > > diff -r 02003bee3e80 xen/common/memory.c > --- a/xen/common/memory.c Thu Jun 25 18:31:10 2009 +0100 > +++ b/xen/common/memory.c Tue Jun 30 01:44:28 2009 +0800 > @@ -265,16 +265,22 @@ static long memory_exchange(XEN_GUEST_HA > out_chunk_order = exch.in.extent_order - exch.out.extent_order; > } > > - /* > - * Only support exchange on calling domain right now. Otherwise there are > - * tricky corner cases to consider (e.g., dying domain). > - */ > - if ( unlikely(exch.in.domid != DOMID_SELF) ) > - { > - rc = IS_PRIV(current->domain) ? -EINVAL : -EPERM; > - goto fail_early; > - } > - d = current->domain; > + if ( likely(exch.in.domid == DOMID_SELF) ) > + { > + d = rcu_lock_current_domain(); > + } > + else > + { > + if ( (d = rcu_lock_domain_by_id(exch.in.domid)) == NULL ) > + goto fail_early; > + > + if ( !IS_PRIV_FOR(current->domain, d) ) > + { > + rcu_unlock_domain(d); > + rc = -EPERM; > + goto fail_early; > + } > + } > > memflags |= MEMF_bits(domain_clamp_alloc_bitsize( > d, > @@ -292,11 +298,15 @@ static long memory_exchange(XEN_GUEST_HA > if ( hypercall_preempt_check() ) > { > exch.nr_exchanged = i << in_chunk_order; > + rcu_unlock_domain(d); > if ( copy_field_to_guest(arg, &exch, nr_exchanged) ) > return -EFAULT; > return hypercall_create_continuation( > __HYPERVISOR_memory_op, "lh", XENMEM_exchange, arg); > } > + > + if (d->is_dying) > + goto dying; > > /* Steal a chunk''s worth of input pages from the domain. */ > for ( j = 0; j < (1UL << in_chunk_order); j++ ) > @@ -360,9 +370,24 @@ static long memory_exchange(XEN_GUEST_HA > j = 0; > while ( (page = page_list_remove_head(&out_chunk_list)) ) > { > - if ( assign_pages(d, page, exch.out.extent_order, > - MEMF_no_refcount) ) > + rc = (assign_pages(d, page, exch.out.extent_order, > + MEMF_no_refcount)); > + > + if (rc == -EINVAL) > BUG(); > + /* Domain is being destroy */ > + else if (rc == -ENODEV) > + { > + int dec_count; > + dec_count = ( ( 1UL << exch.in.extent_order)* > + (1UL << in_chunk_order) - > + j * (1UL << exch.out.extent_order)); > + d->tot_pages -= dec_count; > + > + if (dec_count && !d->tot_pages) > + put_domain(d); > + break; > + } > > /* Note that we ignore errors accessing the output extent list. > */ > (void)__copy_from_guest_offset( > @@ -378,15 +403,15 @@ static long memory_exchange(XEN_GUEST_HA > (void)__copy_to_guest_offset( > exch.out.extent_start, (i<<out_chunk_order)+j, &mfn, 1); > } > - > j++; > } > - BUG_ON(j != (1UL << out_chunk_order)); > + BUG_ON( !(d->is_dying) && (j != (1UL << out_chunk_order)) ); > } > > exch.nr_exchanged = exch.in.nr_extents; > if ( copy_field_to_guest(arg, &exch, nr_exchanged) ) > rc = -EFAULT; > + rcu_unlock_domain(d); > return rc; > > /* > @@ -398,7 +423,8 @@ static long memory_exchange(XEN_GUEST_HA > while ( (page = page_list_remove_head(&in_chunk_list)) ) > if ( assign_pages(d, page, 0, MEMF_no_refcount) ) > BUG(); > - > + dying: > + rcu_unlock_domain(d); > /* Free any output pages we managed to allocate. */ > while ( (page = page_list_remove_head(&out_chunk_list)) ) > free_domheap_pages(page, exch.out.extent_order); > diff -r 02003bee3e80 xen/common/page_alloc.c > --- a/xen/common/page_alloc.c Thu Jun 25 18:31:10 2009 +0100 > +++ b/xen/common/page_alloc.c Sun Jun 28 23:39:28 2009 +0800 > @@ -1120,6 +1120,7 @@ int assign_pages( > unsigned int memflags) > { > unsigned long i; > + int rc = -EINVAL; > > spin_lock(&d->page_alloc_lock); > > @@ -1127,6 +1128,7 @@ int assign_pages( > { > gdprintk(XENLOG_INFO, "Cannot assign page to domain%d -- dying.\n", > d->domain_id); > + rc = -ENODEV; > goto fail; > } > > @@ -1136,6 +1138,7 @@ int assign_pages( > { > gdprintk(XENLOG_INFO, "Over-allocation for domain %u: %u > %u\n", > d->domain_id, d->tot_pages + (1 << order), d->max_pages); > + rc = -EINVAL; > goto fail; > } > > @@ -1160,7 +1163,7 @@ int assign_pages( > > fail: > spin_unlock(&d->page_alloc_lock); > - return -1; > + return rc; > } > > > > > Keir Fraser wrote: >> On 01/06/2009 09:40, "Jiang, Yunhong" <yunhong.jiang@intel.com> wrote: >> >>> Thanks for suggestion. I''m always nervous on API changes. >>> >>> I''m still considering if any other potential usage mode for patch >>> 5/6l (i.e. change page table or exchange memory for other domain),, >>> but frustratedly realize no other requirement. >> >> The API for XENMEM_exchange already permits specifying a >> foreign domain, so >> you''re just filling in the missing implementation. By the way I did >> not check your implementation, but the major subtlety is that you >> must take care >> for domains which are dying (d->is_dying). Giving new pages to >> such a domain >> is generally a bug because you race >> domain_relinquish_resources() which is >> walking the domain''s page lists and freeing the pages. You >> therefore risk >> leaving a zombie domain which will not die (refcount never zero). >> >> See for example how that is handled with page_alloc.c for calls such >> as XENMEM_populate_physmap, and how it is handled specially by >> MMUEXT_PIN_L*_TABLE. >> >> -- Keir_______________________________________________ Xen-devel mailing list Xen-devel@lists.xensource.com http://lists.xensource.com/xen-devel
Jiang, Yunhong
2009-Jun-30 09:40 UTC
RE: [PATCH] Enhance memory exchange to support foreign domain -- Was RE: [Xen-devel] [PATCH 6/6] Change MMU_PT_UPDATE_RESERVE_AD to support update page table for foreign domain
Keir Fraser wrote:> Perhaps try dropping calls to domain_kill() into > memory_exchange()? It may > not complete the destroy, but it will set ->is_dying and > trigger your error > paths. Then you can ''xm destroy'' from the tools afterwards to > try to finish > up the destroy, and see if you get left with a zombie or not.It will cover some case, but not all. But yees, I can try that.> > Looking at this patch I can immediately see that: > A. Your extra check(s) of is_dying are pointless. Only the check in > assign_pages() is critical. So just leave it to that.I add that to avoid meaningless loop if the domain is dying. But yes it is not important because of low probability.> B. Your adjustment of tot_pages is confusing. I can''t > convince myself the > arithmetic is correct. Furthermore messing with tot_pages > outside of the > d->page_alloc_lock is not allowed.The idea of the adjustment is: In each loop, we remove some pages (the in_chunk_list) without decrease the tot_pages. Then when we get domain is dying when assign pages (the out_chunk_list), we need decrease the count. For those page that has been assigned, it should be covered by domain_relinquish_resources(), so what we need decrease is: the number that has been removed - the number that has been assigned already. Yes, I need keep the page_alloc_lock for tot_pages.> > -- Keir > > On 30/06/2009 08:55, "Jiang, Yunhong" <yunhong.jiang@intel.com> wrote: > >> Keir, this is the patch based on the discussion before. Can you >> please have a look on it? I can''t triger the corner case of domain >> being dying, so I hope it can be achieved through code review. >> >> Thanks >> Yunhong Jiang >> >> Extend memory_exchange to support exchange memory for foreign domain. >> When domain is killed during the memory exchange, it will try to >> decrease domain''s page count that has been removed from page_list. >> >> Signed-off-by: Jiang, Yunhong <yunhong.jiang@intel.com> >> >> diff -r 02003bee3e80 xen/common/memory.c >> --- a/xen/common/memory.c Thu Jun 25 18:31:10 2009 +0100 >> +++ b/xen/common/memory.c Tue Jun 30 01:44:28 2009 +0800 >> @@ -265,16 +265,22 @@ static long memory_exchange(XEN_GUEST_HA >> out_chunk_order = exch.in.extent_order - >> exch.out.extent_order; } >> >> - /* >> - * Only support exchange on calling domain right now. Otherwise >> there are >> - * tricky corner cases to consider (e.g., dying domain). >> - */ >> - if ( unlikely(exch.in.domid != DOMID_SELF) ) >> - { >> - rc = IS_PRIV(current->domain) ? -EINVAL : -EPERM; >> - goto fail_early; >> - } >> - d = current->domain; >> + if ( likely(exch.in.domid == DOMID_SELF) ) >> + { >> + d = rcu_lock_current_domain(); >> + } >> + else >> + { >> + if ( (d = rcu_lock_domain_by_id(exch.in.domid)) == NULL ) + >> goto fail_early; + >> + if ( !IS_PRIV_FOR(current->domain, d) ) >> + { >> + rcu_unlock_domain(d); >> + rc = -EPERM; >> + goto fail_early; >> + } >> + } >> >> memflags |= MEMF_bits(domain_clamp_alloc_bitsize( d, >> @@ -292,11 +298,15 @@ static long memory_exchange(XEN_GUEST_HA >> if ( hypercall_preempt_check() ) >> { >> exch.nr_exchanged = i << in_chunk_order; >> + rcu_unlock_domain(d); >> if ( copy_field_to_guest(arg, &exch, nr_exchanged) ) >> return -EFAULT; >> return hypercall_create_continuation( >> __HYPERVISOR_memory_op, "lh", XENMEM_exchange, >> arg); } + >> + if (d->is_dying) >> + goto dying; >> >> /* Steal a chunk''s worth of input pages from the domain. */ >> for ( j = 0; j < (1UL << in_chunk_order); j++ ) >> @@ -360,9 +370,24 @@ static long memory_exchange(XEN_GUEST_HA >> j = 0; while ( (page >> page_list_remove_head(&out_chunk_list)) ) { >> - if ( assign_pages(d, page, exch.out.extent_order, >> - MEMF_no_refcount) ) >> + rc = (assign_pages(d, page, exch.out.extent_order, >> + MEMF_no_refcount)); >> + >> + if (rc == -EINVAL) >> BUG(); >> + /* Domain is being destroy */ >> + else if (rc == -ENODEV) >> + { >> + int dec_count; >> + dec_count = ( ( 1UL << exch.in.extent_order)* >> + (1UL << in_chunk_order) - >> + j * (1UL << exch.out.extent_order)); >> + d->tot_pages -= dec_count; >> + >> + if (dec_count && !d->tot_pages) >> + put_domain(d); >> + break; >> + } >> >> /* Note that we ignore errors accessing the output >> extent list. */ (void)__copy_from_guest_offset( >> @@ -378,15 +403,15 @@ static long memory_exchange(XEN_GUEST_HA >> (void)__copy_to_guest_offset( >> exch.out.extent_start, > (i<<out_chunk_order)+j, &mfn, 1); >> } >> - >> j++; >> } >> - BUG_ON(j != (1UL << out_chunk_order)); >> + BUG_ON( !(d->is_dying) && (j != (1UL << out_chunk_order)) >> ); } >> >> exch.nr_exchanged = exch.in.nr_extents; >> if ( copy_field_to_guest(arg, &exch, nr_exchanged) ) >> rc = -EFAULT; + rcu_unlock_domain(d); >> return rc; >> >> /* >> @@ -398,7 +423,8 @@ static long memory_exchange(XEN_GUEST_HA >> while ( (page = page_list_remove_head(&in_chunk_list)) ) >> if ( assign_pages(d, page, 0, MEMF_no_refcount) ) >> BUG(); - >> + dying: >> + rcu_unlock_domain(d); >> /* Free any output pages we managed to allocate. */ >> while ( (page = page_list_remove_head(&out_chunk_list)) ) >> free_domheap_pages(page, exch.out.extent_order); >> diff -r 02003bee3e80 xen/common/page_alloc.c >> --- a/xen/common/page_alloc.c Thu Jun 25 18:31:10 2009 +0100 >> +++ b/xen/common/page_alloc.c Sun Jun 28 23:39:28 2009 +0800 >> @@ -1120,6 +1120,7 @@ int assign_pages( >> unsigned int memflags) >> { >> unsigned long i; >> + int rc = -EINVAL; >> >> spin_lock(&d->page_alloc_lock); >> >> @@ -1127,6 +1128,7 @@ int assign_pages( >> { >> gdprintk(XENLOG_INFO, "Cannot assign page to domain%d -- >> dying.\n", d->domain_id); >> + rc = -ENODEV; >> goto fail; >> } >> >> @@ -1136,6 +1138,7 @@ int assign_pages( >> { >> gdprintk(XENLOG_INFO, "Over-allocation for domain %u: >> %u > %u\n", d->domain_id, d->tot_pages + (1 << >> order), d->max_pages); + rc = -EINVAL; goto >> fail; } >> >> @@ -1160,7 +1163,7 @@ int assign_pages( >> >> fail: >> spin_unlock(&d->page_alloc_lock); >> - return -1; >> + return rc; >> } >> >> >> >> >> Keir Fraser wrote: >>> On 01/06/2009 09:40, "Jiang, Yunhong" > <yunhong.jiang@intel.com> wrote: >>> >>>> Thanks for suggestion. I''m always nervous on API changes. >>>> >>>> I''m still considering if any other potential usage mode for patch >>>> 5/6l (i.e. change page table or exchange memory for other domain),, >>>> but frustratedly realize no other requirement. >>> >>> The API for XENMEM_exchange already permits specifying a >>> foreign domain, so >>> you''re just filling in the missing implementation. By the way I did >>> not check your implementation, but the major subtlety is that you >>> must take care for domains which are dying (d->is_dying). Giving >>> new pages to >>> such a domain >>> is generally a bug because you race >>> domain_relinquish_resources() which is >>> walking the domain''s page lists and freeing the pages. You >>> therefore risk leaving a zombie domain which will not die (refcount >>> never zero). >>> >>> See for example how that is handled with page_alloc.c for calls such >>> as XENMEM_populate_physmap, and how it is handled specially by >>> MMUEXT_PIN_L*_TABLE. >>> >>> -- Keir_______________________________________________ Xen-devel mailing list Xen-devel@lists.xensource.com http://lists.xensource.com/xen-devel
Keir Fraser
2009-Jun-30 10:40 UTC
Re: [PATCH] Enhance memory exchange to support foreign domain -- Was RE: [Xen-devel] [PATCH 6/6] Change MMU_PT_UPDATE_RESERVE_AD to support update page table for foreign domain
On 30/06/2009 10:40, "Jiang, Yunhong" <yunhong.jiang@intel.com> wrote:>> B. Your adjustment of tot_pages is confusing. I can''t >> convince myself the >> arithmetic is correct. Furthermore messing with tot_pages >> outside of the >> d->page_alloc_lock is not allowed. > > The idea of the adjustment is: > > In each loop, we remove some pages (the in_chunk_list) without decrease the > tot_pages. Then when we get domain is dying when assign pages (the > out_chunk_list), we need decrease the count. For those page that has been > assigned, it should be covered by domain_relinquish_resources(), so what we > need decrease is: > the number that has been removed - the number that has been assigned already.There''s still quite a logical leap to the arithmetic in your code. Spell it out in a comment. -- Keir _______________________________________________ Xen-devel mailing list Xen-devel@lists.xensource.com http://lists.xensource.com/xen-devel
Keir Fraser
2009-Jun-30 14:59 UTC
Re: [Xen-devel] [PATCH 6/6] Change MMU_PT_UPDATE_RESERVE_AD to support update page table for foreign domain
This doesn''t even build. You compare a pointer with an integer. -- Keir On 30/06/2009 08:58, "Jiang, Yunhong" <yunhong.jiang@intel.com> wrote:> Keir, this is the updated version, please have a look on it. > > Thanks > Yunhong Jiang > > Currently MMU_PT_UPDATE_RESERVE_AD support only update page table for current > domain. This patch add support for foreign domain also. > > Signed-off-by: Jiang, Yunhong <yunhong.jiang@intel.com> > > diff -r 9a74617c4a28 xen/arch/x86/mm.c > --- a/xen/arch/x86/mm.c Tue Jun 30 01:44:30 2009 +0800 > +++ b/xen/arch/x86/mm.c Tue Jun 30 01:51:15 2009 +0800 > @@ -110,6 +110,7 @@ > #include <asm/hypercall.h> > #include <asm/shared.h> > #include <public/memory.h> > +#include <public/sched.h> > #include <xsm/xsm.h> > #include <xen/trace.h> > > @@ -2999,8 +3000,9 @@ int do_mmu_update( > unsigned long gpfn, gmfn, mfn; > struct page_info *page; > int rc = 0, okay = 1, i = 0; > - unsigned int cmd, done = 0; > - struct domain *d = current->domain; > + unsigned int cmd, done = 0, pt_dom; > + struct domain *d = current->domain, *pt_owner = NULL; > + struct vcpu *v = current; > struct domain_mmap_cache mapcache; > > if ( unlikely(count & MMU_UPDATE_PREEMPTED) ) > @@ -3018,7 +3020,28 @@ int do_mmu_update( > goto out; > } > > - if ( !set_foreigndom(foreigndom) ) > + pt_dom = foreigndom >> 16; > + if (pt_dom) > + { > + /* the page table belongs to foreign domain */ > + pt_owner = rcu_lock_domain_by_id(pt_dom - 1); > + if (!pt_owner ) > + { > + rc = -EINVAL; > + goto out; > + } > + if( !IS_PRIV_FOR(d, pt_owner) ) > + { > + rc = -ESRCH; > + goto out; > + } > + if (pt_dom == d) > + rcu_unlock_domain(pt_owner); > + v = pt_owner->vcpu[0]; > + } else > + pt_owner = d; > + > + if ( !set_foreigndom(foreigndom & 0xFFFFU) ) > { > rc = -ESRCH; > goto out; > @@ -3059,9 +3082,9 @@ int do_mmu_update( > > req.ptr -= cmd; > gmfn = req.ptr >> PAGE_SHIFT; > - mfn = gmfn_to_mfn(d, gmfn); > - > - if ( unlikely(!get_page_from_pagenr(mfn, d)) ) > + mfn = gmfn_to_mfn(pt_owner, gmfn); > + > + if ( unlikely(!get_page_from_pagenr(mfn, pt_owner)) ) > { > MEM_LOG("Could not get page for normal update"); > break; > @@ -3080,24 +3103,21 @@ int do_mmu_update( > { > l1_pgentry_t l1e = l1e_from_intpte(req.val); > okay = mod_l1_entry(va, l1e, mfn, > - cmd == MMU_PT_UPDATE_PRESERVE_AD, > - current); > + cmd == MMU_PT_UPDATE_PRESERVE_AD, v); > } > break; > case PGT_l2_page_table: > { > l2_pgentry_t l2e = l2e_from_intpte(req.val); > okay = mod_l2_entry(va, l2e, mfn, > - cmd == MMU_PT_UPDATE_PRESERVE_AD, > - current); > + cmd == MMU_PT_UPDATE_PRESERVE_AD, v); > } > break; > case PGT_l3_page_table: > { > l3_pgentry_t l3e = l3e_from_intpte(req.val); > rc = mod_l3_entry(va, l3e, mfn, > - cmd == MMU_PT_UPDATE_PRESERVE_AD, 1, > - current); > + cmd == MMU_PT_UPDATE_PRESERVE_AD, 1, > v); > okay = !rc; > } > break; > @@ -3106,8 +3126,7 @@ int do_mmu_update( > { > l4_pgentry_t l4e = l4e_from_intpte(req.val); > rc = mod_l4_entry(va, l4e, mfn, > - cmd == MMU_PT_UPDATE_PRESERVE_AD, 1, > - current); > + cmd == MMU_PT_UPDATE_PRESERVE_AD, 1, > v); > okay = !rc; > } > break; > @@ -3115,7 +3134,7 @@ int do_mmu_update( > case PGT_writable_page: > perfc_incr(writable_mmu_updates); > okay = paging_write_guest_entry( > - current, va, req.val, _mfn(mfn)); > + v, va, req.val, _mfn(mfn)); > break; > } > page_unlock(page); > @@ -3126,7 +3145,7 @@ int do_mmu_update( > { > perfc_incr(writable_mmu_updates); > okay = paging_write_guest_entry( > - current, va, req.val, _mfn(mfn)); > + v, va, req.val, _mfn(mfn)); > put_page_type(page); > } > > @@ -3191,6 +3210,9 @@ int do_mmu_update( > perfc_add(num_page_updates, i); > > out: > + if (pt_owner && (pt_owner != d)) > + rcu_unlock_domain(pt_owner); > + > /* Add incremental work we have done to the @done output parameter. */ > if ( unlikely(!guest_handle_is_null(pdone)) ) > { > diff -r 9a74617c4a28 xen/include/public/xen.h > --- a/xen/include/public/xen.h Tue Jun 30 01:44:30 2009 +0800 > +++ b/xen/include/public/xen.h Tue Jun 30 01:44:33 2009 +0800 > @@ -166,10 +166,14 @@ DEFINE_XEN_GUEST_HANDLE(xen_pfn_t); > * ptr[1:0] specifies the appropriate MMU_* command. > * > * ptr[1:0] == MMU_NORMAL_PT_UPDATE: > - * Updates an entry in a page table. If updating an L1 table, and the new > + * Updates an entry in a page table. > + * The page table can be owned by current domain, or a foreign domain. If > + * the page table belongs to a foreign domain, it should be specified in > + * upper 16 bits in FD > + * If updating an L1 table, and the new > * table entry is valid/present, the mapped frame must belong to the FD, if > - * an FD has been specified. If attempting to map an I/O page then the > - * caller assumes the privilege of the FD. > + * an foreign domain specified in lower 16 btis in FD. If attempting to map > + * an I/O page then the caller assumes the privilege of the FD. > * FD == DOMID_IO: Permit /only/ I/O mappings, at the priv level of the > caller. > * FD == DOMID_XEN: Map restricted areas of Xen''s heap space. > * ptr[:2] -- Machine address of the page-table entry to modify. > > > Keir Fraser wrote: >> On 01/06/2009 10:35, "Jiang, Yunhong" <yunhong.jiang@intel.com> wrote: >> >>> I think I missed this logic, I just make sure the domain is >>> suspended, but seems be wrong because a suspended domain can still >>> be killed and relinquish resources. So I will add this logic and >>> resend the patch. >>> >>> But I''m not sure if we need make sure the guest is suspended/paused >>> during these functions, as exchange a page or pte in flight may >>> cause trouble. Or we can leave it to user space tools, since this >>> situation will not damage xen HV itself. >> >> The HV should not bother to do the suspend check. It is a higher-level >> problem which should be checked by the tools. >> >> -- Keir_______________________________________________ Xen-devel mailing list Xen-devel@lists.xensource.com http://lists.xensource.com/xen-devel
Jiang, Yunhong
2009-Jul-02 06:26 UTC
RE: [Xen-devel] [PATCH 6/6] Change MMU_PT_UPDATE_RESERVE_AD to support update page table for foreign domain
Sorry I forgot the last qref before building. I found missing this rcu_unlock before sending, but wrongly changed as pt_dom == d. I changed that when found build error but forgot qref. Attached is the new one, please have a look. Thanks Yunhong Jiang Currently MMU_PT_UPDATE_RESERVE_AD support only update page table for current domain. This patch add support for foreign domain also. Signed-off-by: Jiang, Yunhong <yunhong.jiang@intel.com> diff -r 9a74617c4a28 xen/arch/x86/mm.c --- a/xen/arch/x86/mm.c Tue Jun 30 01:44:30 2009 +0800 +++ b/xen/arch/x86/mm.c Tue Jun 30 19:02:38 2009 +0800 @@ -110,6 +110,7 @@ #include <asm/hypercall.h> #include <asm/shared.h> #include <public/memory.h> +#include <public/sched.h> #include <xsm/xsm.h> #include <xen/trace.h> @@ -2999,8 +3000,9 @@ int do_mmu_update( unsigned long gpfn, gmfn, mfn; struct page_info *page; int rc = 0, okay = 1, i = 0; - unsigned int cmd, done = 0; - struct domain *d = current->domain; + unsigned int cmd, done = 0, pt_dom; + struct domain *d = current->domain, *pt_owner = NULL; + struct vcpu *v = current; struct domain_mmap_cache mapcache; if ( unlikely(count & MMU_UPDATE_PREEMPTED) ) @@ -3018,7 +3020,28 @@ int do_mmu_update( goto out; } - if ( !set_foreigndom(foreigndom) ) + pt_dom = foreigndom >> 16; + if (pt_dom) + { + /* the page table belongs to foreign domain */ + pt_owner = rcu_lock_domain_by_id(pt_dom - 1); + if (!pt_owner ) + { + rc = -EINVAL; + goto out; + } + if( !IS_PRIV_FOR(d, pt_owner) ) + { + rc = -ESRCH; + goto out; + } + if (pt_owner == d) + rcu_unlock_domain(pt_owner); + v = pt_owner->vcpu[0]; + } else + pt_owner = d; + + if ( !set_foreigndom(foreigndom & 0xFFFFU) ) { rc = -ESRCH; goto out; @@ -3059,9 +3082,9 @@ int do_mmu_update( req.ptr -= cmd; gmfn = req.ptr >> PAGE_SHIFT; - mfn = gmfn_to_mfn(d, gmfn); - - if ( unlikely(!get_page_from_pagenr(mfn, d)) ) + mfn = gmfn_to_mfn(pt_owner, gmfn); + + if ( unlikely(!get_page_from_pagenr(mfn, pt_owner)) ) { MEM_LOG("Could not get page for normal update"); break; @@ -3080,24 +3103,21 @@ int do_mmu_update( { l1_pgentry_t l1e = l1e_from_intpte(req.val); okay = mod_l1_entry(va, l1e, mfn, - cmd == MMU_PT_UPDATE_PRESERVE_AD, - current); + cmd == MMU_PT_UPDATE_PRESERVE_AD, v); } break; case PGT_l2_page_table: { l2_pgentry_t l2e = l2e_from_intpte(req.val); okay = mod_l2_entry(va, l2e, mfn, - cmd == MMU_PT_UPDATE_PRESERVE_AD, - current); + cmd == MMU_PT_UPDATE_PRESERVE_AD, v); } break; case PGT_l3_page_table: { l3_pgentry_t l3e = l3e_from_intpte(req.val); rc = mod_l3_entry(va, l3e, mfn, - cmd == MMU_PT_UPDATE_PRESERVE_AD, 1, - current); + cmd == MMU_PT_UPDATE_PRESERVE_AD, 1, v); okay = !rc; } break; @@ -3106,8 +3126,7 @@ int do_mmu_update( { l4_pgentry_t l4e = l4e_from_intpte(req.val); rc = mod_l4_entry(va, l4e, mfn, - cmd == MMU_PT_UPDATE_PRESERVE_AD, 1, - current); + cmd == MMU_PT_UPDATE_PRESERVE_AD, 1, v); okay = !rc; } break; @@ -3115,7 +3134,7 @@ int do_mmu_update( case PGT_writable_page: perfc_incr(writable_mmu_updates); okay = paging_write_guest_entry( - current, va, req.val, _mfn(mfn)); + v, va, req.val, _mfn(mfn)); break; } page_unlock(page); @@ -3126,7 +3145,7 @@ int do_mmu_update( { perfc_incr(writable_mmu_updates); okay = paging_write_guest_entry( - current, va, req.val, _mfn(mfn)); + v, va, req.val, _mfn(mfn)); put_page_type(page); } @@ -3191,6 +3210,9 @@ int do_mmu_update( perfc_add(num_page_updates, i); out: + if (pt_owner && (pt_owner != d)) + rcu_unlock_domain(pt_owner); + /* Add incremental work we have done to the @done output parameter. */ if ( unlikely(!guest_handle_is_null(pdone)) ) { diff -r 9a74617c4a28 xen/include/public/xen.h --- a/xen/include/public/xen.h Tue Jun 30 01:44:30 2009 +0800 +++ b/xen/include/public/xen.h Tue Jun 30 01:44:33 2009 +0800 @@ -166,10 +166,14 @@ DEFINE_XEN_GUEST_HANDLE(xen_pfn_t); * ptr[1:0] specifies the appropriate MMU_* command. * * ptr[1:0] == MMU_NORMAL_PT_UPDATE: - * Updates an entry in a page table. If updating an L1 table, and the new + * Updates an entry in a page table. + * The page table can be owned by current domain, or a foreign domain. If + * the page table belongs to a foreign domain, it should be specified in + * upper 16 bits in FD + * If updating an L1 table, and the new * table entry is valid/present, the mapped frame must belong to the FD, if - * an FD has been specified. If attempting to map an I/O page then the - * caller assumes the privilege of the FD. + * an foreign domain specified in lower 16 btis in FD. If attempting to map + * an I/O page then the caller assumes the privilege of the FD. * FD == DOMID_IO: Permit /only/ I/O mappings, at the priv level of the caller. * FD == DOMID_XEN: Map restricted areas of Xen''s heap space. * ptr[:2] -- Machine address of the page-table entry to modify. Keir Fraser wrote:> This doesn''t even build. You compare a pointer with an integer. > > -- Keir > > On 30/06/2009 08:58, "Jiang, Yunhong" <yunhong.jiang@intel.com> wrote: > >> Keir, this is the updated version, please have a look on it. >> >> Thanks >> Yunhong Jiang >> >> Currently MMU_PT_UPDATE_RESERVE_AD support only update page table >> for current domain. This patch add support for foreign domain also. >> >> Signed-off-by: Jiang, Yunhong <yunhong.jiang@intel.com> >> >> diff -r 9a74617c4a28 xen/arch/x86/mm.c >> --- a/xen/arch/x86/mm.c Tue Jun 30 01:44:30 2009 +0800 >> +++ b/xen/arch/x86/mm.c Tue Jun 30 01:51:15 2009 +0800 @@ -110,6 >> +110,7 @@ #include <asm/hypercall.h> >> #include <asm/shared.h> >> #include <public/memory.h> >> +#include <public/sched.h> >> #include <xsm/xsm.h> >> #include <xen/trace.h> >> >> @@ -2999,8 +3000,9 @@ int do_mmu_update( >> unsigned long gpfn, gmfn, mfn; >> struct page_info *page; >> int rc = 0, okay = 1, i = 0; >> - unsigned int cmd, done = 0; >> - struct domain *d = current->domain; >> + unsigned int cmd, done = 0, pt_dom; >> + struct domain *d = current->domain, *pt_owner = NULL; >> + struct vcpu *v = current; >> struct domain_mmap_cache mapcache; >> >> if ( unlikely(count & MMU_UPDATE_PREEMPTED) ) >> @@ -3018,7 +3020,28 @@ int do_mmu_update( >> goto out; >> } >> >> - if ( !set_foreigndom(foreigndom) ) >> + pt_dom = foreigndom >> 16; >> + if (pt_dom) >> + { >> + /* the page table belongs to foreign domain */ >> + pt_owner = rcu_lock_domain_by_id(pt_dom - 1); + if >> (!pt_owner ) + { >> + rc = -EINVAL; >> + goto out; >> + } >> + if( !IS_PRIV_FOR(d, pt_owner) ) >> + { >> + rc = -ESRCH; >> + goto out; >> + } >> + if (pt_dom == d) >> + rcu_unlock_domain(pt_owner); >> + v = pt_owner->vcpu[0]; >> + } else >> + pt_owner = d; >> + >> + if ( !set_foreigndom(foreigndom & 0xFFFFU) ) >> { >> rc = -ESRCH; >> goto out; >> @@ -3059,9 +3082,9 @@ int do_mmu_update( >> >> req.ptr -= cmd; >> gmfn = req.ptr >> PAGE_SHIFT; >> - mfn = gmfn_to_mfn(d, gmfn); >> - >> - if ( unlikely(!get_page_from_pagenr(mfn, d)) ) >> + mfn = gmfn_to_mfn(pt_owner, gmfn); >> + >> + if ( unlikely(!get_page_from_pagenr(mfn, pt_owner)) ) >> { MEM_LOG("Could not get page for normal update"); >> break; @@ -3080,24 +3103,21 @@ int do_mmu_update( >> { >> l1_pgentry_t l1e = l1e_from_intpte(req.val); >> okay = mod_l1_entry(va, l1e, mfn, >> - cmd =>> MMU_PT_UPDATE_PRESERVE_AD, >> - current); >> + cmd =>> MMU_PT_UPDATE_PRESERVE_AD, v); } >> break; case PGT_l2_page_table: >> { >> l2_pgentry_t l2e = l2e_from_intpte(req.val); >> okay = mod_l2_entry(va, l2e, mfn, >> - cmd =>> MMU_PT_UPDATE_PRESERVE_AD, >> - current); >> + cmd =>> MMU_PT_UPDATE_PRESERVE_AD, v); } >> break; case PGT_l3_page_table: >> { >> l3_pgentry_t l3e = l3e_from_intpte(req.val); >> rc = mod_l3_entry(va, l3e, mfn, >> - cmd =>> MMU_PT_UPDATE_PRESERVE_AD, 1, >> - current); >> + cmd =>> MMU_PT_UPDATE_PRESERVE_AD, 1, v); okay = !rc; >> } >> break; >> @@ -3106,8 +3126,7 @@ int do_mmu_update( >> { >> l4_pgentry_t l4e = l4e_from_intpte(req.val); >> rc = mod_l4_entry(va, l4e, mfn, >> - cmd =>> MMU_PT_UPDATE_PRESERVE_AD, 1, >> - current); >> + cmd =>> MMU_PT_UPDATE_PRESERVE_AD, 1, v); okay = !rc; >> } >> break; >> @@ -3115,7 +3134,7 @@ int do_mmu_update( >> case PGT_writable_page: >> perfc_incr(writable_mmu_updates); >> okay = paging_write_guest_entry( >> - current, va, req.val, _mfn(mfn)); >> + v, va, req.val, _mfn(mfn)); >> break; } >> page_unlock(page); >> @@ -3126,7 +3145,7 @@ int do_mmu_update( >> { >> perfc_incr(writable_mmu_updates); >> okay = paging_write_guest_entry( >> - current, va, req.val, _mfn(mfn)); >> + v, va, req.val, _mfn(mfn)); >> put_page_type(page); >> } >> >> @@ -3191,6 +3210,9 @@ int do_mmu_update( >> perfc_add(num_page_updates, i); >> >> out: >> + if (pt_owner && (pt_owner != d)) >> + rcu_unlock_domain(pt_owner); >> + >> /* Add incremental work we have done to the @done output >> parameter. */ if ( unlikely(!guest_handle_is_null(pdone)) ) >> { >> diff -r 9a74617c4a28 xen/include/public/xen.h >> --- a/xen/include/public/xen.h Tue Jun 30 01:44:30 2009 +0800 >> +++ b/xen/include/public/xen.h Tue Jun 30 01:44:33 2009 +0800 >> @@ -166,10 +166,14 @@ DEFINE_XEN_GUEST_HANDLE(xen_pfn_t); >> * ptr[1:0] specifies the appropriate MMU_* command. * >> * ptr[1:0] == MMU_NORMAL_PT_UPDATE: >> - * Updates an entry in a page table. If updating an L1 table, and >> the new + * Updates an entry in a page table. >> + * The page table can be owned by current domain, or a foreign >> domain. If + * the page table belongs to a foreign domain, it should >> be specified in + * upper 16 bits in FD + * If updating an L1 table, >> and the new * table entry is valid/present, the mapped frame must >> belong to the FD, if >> - * an FD has been specified. If attempting to map an I/O page then >> the >> - * caller assumes the privilege of the FD. >> + * an foreign domain specified in lower 16 btis in FD. If >> attempting to map + * an I/O page then the caller assumes the >> privilege of the FD. >> * FD == DOMID_IO: Permit /only/ I/O mappings, at the priv level of >> the caller. >> * FD == DOMID_XEN: Map restricted areas of Xen''s heap space. >> * ptr[:2] -- Machine address of the page-table entry to modify. >> >> >> Keir Fraser wrote: >>> On 01/06/2009 10:35, "Jiang, Yunhong" > <yunhong.jiang@intel.com> wrote: >>> >>>> I think I missed this logic, I just make sure the domain is >>>> suspended, but seems be wrong because a suspended domain can still >>>> be killed and relinquish resources. So I will add this logic and >>>> resend the patch. >>>> >>>> But I''m not sure if we need make sure the guest is suspended/paused >>>> during these functions, as exchange a page or pte in flight may >>>> cause trouble. Or we can leave it to user space tools, since this >>>> situation will not damage xen HV itself. >>> >>> The HV should not bother to do the suspend check. It is a >>> higher-level problem which should be checked by the tools. >>> >>> -- Keir_______________________________________________ Xen-devel mailing list Xen-devel@lists.xensource.com http://lists.xensource.com/xen-devel
Jiang, Yunhong
2009-Jul-02 06:38 UTC
RE: [PATCH] Enhance memory exchange to support foreign domain -- Was RE: [Xen-devel] [PATCH 6/6] Change MMU_PT_UPDATE_RESERVE_AD to support update page table for foreign domain
Attached is the updated patch. I tried to call domain_kill() duing memory_exchange, and do hit the situation that assign_pages will fail because of domain dying. The domain can be killed cleanly still, no zombie domain left. Changes from previous submission: a) lock protection for tot_pages, b) free the page that has been moved out of the out_chunk_list. Thanks Yunhong Jiang diff -r 02003bee3e80 xen/common/memory.c --- a/xen/common/memory.c Thu Jun 25 18:31:10 2009 +0100 +++ b/xen/common/memory.c Wed Jul 01 19:55:23 2009 +0800 @@ -265,16 +265,22 @@ static long memory_exchange(XEN_GUEST_HA out_chunk_order = exch.in.extent_order - exch.out.extent_order; } - /* - * Only support exchange on calling domain right now. Otherwise there are - * tricky corner cases to consider (e.g., dying domain). - */ - if ( unlikely(exch.in.domid != DOMID_SELF) ) - { - rc = IS_PRIV(current->domain) ? -EINVAL : -EPERM; - goto fail_early; - } - d = current->domain; + if ( likely(exch.in.domid == DOMID_SELF) ) + { + d = rcu_lock_current_domain(); + } + else + { + if ( (d = rcu_lock_domain_by_id(exch.in.domid)) == NULL ) + goto fail_early; + + if ( !IS_PRIV_FOR(current->domain, d) ) + { + rcu_unlock_domain(d); + rc = -EPERM; + goto fail_early; + } + } memflags |= MEMF_bits(domain_clamp_alloc_bitsize( d, @@ -292,6 +298,7 @@ static long memory_exchange(XEN_GUEST_HA if ( hypercall_preempt_check() ) { exch.nr_exchanged = i << in_chunk_order; + rcu_unlock_domain(d); if ( copy_field_to_guest(arg, &exch, nr_exchanged) ) return -EFAULT; return hypercall_create_continuation( @@ -360,9 +367,39 @@ static long memory_exchange(XEN_GUEST_HA j = 0; while ( (page = page_list_remove_head(&out_chunk_list)) ) { - if ( assign_pages(d, page, exch.out.extent_order, - MEMF_no_refcount) ) + rc = (assign_pages(d, page, exch.out.extent_order, + MEMF_no_refcount)); + + if (rc == -EINVAL) BUG(); + /* Domain is being destroy */ + else if (rc == -ENODEV) + { + int dec_count, drop_dom_ref = 0; + + /* + * Pages in in_chunk_list is stolen without + * decreasing the tot_pages. If the domain is dying when + * assign pages, we need decrease the count. For those pages + * that has been assigned, it should be covered by + * domain_relinquish_resources(). + */ + dec_count = ( ( 1UL << exch.in.extent_order)* + (1UL << in_chunk_order) - + j * (1UL << exch.out.extent_order)); + + spin_lock(&d->page_alloc_lock); + d->tot_pages -= dec_count; + if (dec_count && !d->tot_pages) + drop_dom_ref = 1; + spin_unlock(&d->page_alloc_lock); + + if (drop_dom_ref) + put_domain(d); + + free_domheap_pages(page, exch.out.extent_order); + goto dying; + } /* Note that we ignore errors accessing the output extent list. */ (void)__copy_from_guest_offset( @@ -378,15 +415,15 @@ static long memory_exchange(XEN_GUEST_HA (void)__copy_to_guest_offset( exch.out.extent_start, (i<<out_chunk_order)+j, &mfn, 1); } - j++; } - BUG_ON(j != (1UL << out_chunk_order)); + BUG_ON( !(d->is_dying) && (j != (1UL << out_chunk_order)) ); } exch.nr_exchanged = exch.in.nr_extents; if ( copy_field_to_guest(arg, &exch, nr_exchanged) ) rc = -EFAULT; + rcu_unlock_domain(d); return rc; /* @@ -398,7 +435,8 @@ static long memory_exchange(XEN_GUEST_HA while ( (page = page_list_remove_head(&in_chunk_list)) ) if ( assign_pages(d, page, 0, MEMF_no_refcount) ) BUG(); - + dying: + rcu_unlock_domain(d); /* Free any output pages we managed to allocate. */ while ( (page = page_list_remove_head(&out_chunk_list)) ) free_domheap_pages(page, exch.out.extent_order); diff -r 02003bee3e80 xen/common/page_alloc.c --- a/xen/common/page_alloc.c Thu Jun 25 18:31:10 2009 +0100 +++ b/xen/common/page_alloc.c Tue Jun 30 23:33:47 2009 +0800 @@ -1120,6 +1120,7 @@ int assign_pages( unsigned int memflags) { unsigned long i; + int rc = -EINVAL; spin_lock(&d->page_alloc_lock); @@ -1127,6 +1128,7 @@ int assign_pages( { gdprintk(XENLOG_INFO, "Cannot assign page to domain%d -- dying.\n", d->domain_id); + rc = -ENODEV; goto fail; } @@ -1136,6 +1138,7 @@ int assign_pages( { gdprintk(XENLOG_INFO, "Over-allocation for domain %u: %u > %u\n", d->domain_id, d->tot_pages + (1 << order), d->max_pages); + rc = -EINVAL; goto fail; } @@ -1160,7 +1163,7 @@ int assign_pages( fail: spin_unlock(&d->page_alloc_lock); - return -1; + return rc; } Keir Fraser wrote:> On 30/06/2009 10:40, "Jiang, Yunhong" <yunhong.jiang@intel.com> wrote: > >>> B. Your adjustment of tot_pages is confusing. I can''t convince >>> myself the arithmetic is correct. Furthermore messing with >>> tot_pages outside of the d->page_alloc_lock is not allowed. >> >> The idea of the adjustment is: >> >> In each loop, we remove some pages (the in_chunk_list) without >> decrease the tot_pages. Then when we get domain is dying when assign >> pages (the out_chunk_list), we need decrease the count. For those >> page that has been assigned, it should be covered by > domain_relinquish_resources(), so what we >> need decrease is: >> the number that has been removed - the number that has been assigned >> already. > > There''s still quite a logical leap to the arithmetic in your > code. Spell it > out in a comment. > > -- Keir_______________________________________________ Xen-devel mailing list Xen-devel@lists.xensource.com http://lists.xensource.com/xen-devel