Jiang, Yunhong
2009-May-31 11:12 UTC
[Xen-devel] [PATCH 6/6] Change MMU_PT_UPDATE_RESERVE_AD to support update page table for foreign domain
Currently MMU_PT_UPDATE_RESERVE_AD support only update page table for current
domain. This patch add support for foreign domain also.
Signed-off-by: Jiang, Yunhong <yunhong.jiang@intel.com> Update the page
table update hypercall
diff -r f6457425560b xen/arch/x86/mm.c
--- a/xen/arch/x86/mm.c Wed May 27 03:22:43 2009 +0800
+++ b/xen/arch/x86/mm.c Wed May 27 03:40:38 2009 +0800
@@ -110,6 +110,7 @@
#include <asm/hypercall.h>
#include <asm/shared.h>
#include <public/memory.h>
+#include <public/sched.h>
#include <xsm/xsm.h>
#include <xen/trace.h>
@@ -2990,7 +2991,8 @@ int do_mmu_update(
struct page_info *page;
int rc = 0, okay = 1, i = 0;
unsigned int cmd, done = 0;
- struct domain *d = current->domain;
+ struct domain *d = current->domain, *pt_owner = NULL;
+ struct vcpu *v = current;
struct domain_mmap_cache mapcache;
if ( unlikely(count & MMU_UPDATE_PREEMPTED) )
@@ -3051,10 +3053,35 @@ int do_mmu_update(
gmfn = req.ptr >> PAGE_SHIFT;
mfn = gmfn_to_mfn(d, gmfn);
- if ( unlikely(!get_page_from_pagenr(mfn, d)) )
+ if (!mfn_valid(mfn))
+ mfn = gmfn_to_mfn(FOREIGNDOM, gmfn);
+ if (!mfn_valid(mfn))
{
MEM_LOG("Could not get page for normal update");
break;
+ }
+
+ pt_owner = page_get_owner_and_reference(mfn_to_page(mfn));
+
+ if ( pt_owner != d )
+ {
+ if ( pt_owner == FOREIGNDOM )
+ {
+ spin_lock(&FOREIGNDOM->shutdown_lock);
+ if ( !IS_PRIV(d) ||
+ !FOREIGNDOM->is_shut_down ||
+ (FOREIGNDOM->shutdown_code != SHUTDOWN_suspend) )
+ {
+ spin_unlock(&FOREIGNDOM->shutdown_lock);
+ rc = -EPERM;
+ break;
+ }
+ v = FOREIGNDOM->vcpu[0];
+ }else
+ {
+ rc = -EPERM;
+ break;
+ }
}
va = map_domain_page_with_cache(mfn, &mapcache);
@@ -3070,24 +3097,21 @@ int do_mmu_update(
{
l1_pgentry_t l1e = l1e_from_intpte(req.val);
okay = mod_l1_entry(va, l1e, mfn,
- cmd == MMU_PT_UPDATE_PRESERVE_AD,
- current);
+ cmd == MMU_PT_UPDATE_PRESERVE_AD, v);
}
break;
case PGT_l2_page_table:
{
l2_pgentry_t l2e = l2e_from_intpte(req.val);
okay = mod_l2_entry(va, l2e, mfn,
- cmd == MMU_PT_UPDATE_PRESERVE_AD,
- current);
+ cmd == MMU_PT_UPDATE_PRESERVE_AD, v);
}
break;
case PGT_l3_page_table:
{
l3_pgentry_t l3e = l3e_from_intpte(req.val);
rc = mod_l3_entry(va, l3e, mfn,
- cmd == MMU_PT_UPDATE_PRESERVE_AD, 1,
- current);
+ cmd == MMU_PT_UPDATE_PRESERVE_AD, 1, v);
okay = !rc;
}
break;
@@ -3096,8 +3120,7 @@ int do_mmu_update(
{
l4_pgentry_t l4e = l4e_from_intpte(req.val);
rc = mod_l4_entry(va, l4e, mfn,
- cmd == MMU_PT_UPDATE_PRESERVE_AD, 1,
- current);
+ cmd == MMU_PT_UPDATE_PRESERVE_AD, 1, v);
okay = !rc;
}
break;
@@ -3105,7 +3128,7 @@ int do_mmu_update(
case PGT_writable_page:
perfc_incr(writable_mmu_updates);
okay = paging_write_guest_entry(
- current, va, req.val, _mfn(mfn));
+ v, va, req.val, _mfn(mfn));
break;
}
page_unlock(page);
@@ -3116,11 +3139,13 @@ int do_mmu_update(
{
perfc_incr(writable_mmu_updates);
okay = paging_write_guest_entry(
- current, va, req.val, _mfn(mfn));
+ v, va, req.val, _mfn(mfn));
put_page_type(page);
}
unmap_domain_page_with_cache(va, &mapcache);
+ if (pt_owner != d)
+ spin_unlock(&FOREIGNDOM->shutdown_lock);
put_page(page);
break;
diff -r f6457425560b xen/include/public/xen.h
--- a/xen/include/public/xen.h Wed May 27 03:22:43 2009 +0800
+++ b/xen/include/public/xen.h Wed May 27 03:26:46 2009 +0800
@@ -170,6 +170,8 @@ DEFINE_XEN_GUEST_HANDLE(xen_pfn_t);
* table entry is valid/present, the mapped frame must belong to the FD, if
* an FD has been specified. If attempting to map an I/O page then the
* caller assumes the privilege of the FD.
+ * The page table entry normally belongs to the calling domain. Otherwise it
+ * should belong to the FD and the FD should be suspended already
* FD == DOMID_IO: Permit /only/ I/O mappings, at the priv level of the caller.
* FD == DOMID_XEN: Map restricted areas of Xen''s heap space.
* ptr[:2] -- Machine address of the page-table entry to modify.
_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xensource.com
http://lists.xensource.com/xen-devel
Jiang, Yunhong
2009-Jun-01 06:24 UTC
RE: [Xen-devel] [PATCH 6/6] Change MMU_PT_UPDATE_RESERVE_AD to support update page table for foreign domain
I want to clarify that this patch need more discussion because there is no clear way to distinguish if the page table address (i.e. address passed in req[:2] ) is owned by the current domain or by the foreign domain. So I just try to check if mfn_valid(), but I suspect if this is the right method. Thanks Yunhong Jiang xen-devel-bounces@lists.xensource.com wrote:> foreign domain > > Currently MMU_PT_UPDATE_RESERVE_AD support only update page > table for current domain. This patch add support for foreign > domain also. > > Signed-off-by: Jiang, Yunhong <yunhong.jiang@intel.com> Update > the page table update hypercall > > diff -r f6457425560b xen/arch/x86/mm.c > --- a/xen/arch/x86/mm.c Wed May 27 03:22:43 2009 +0800 > +++ b/xen/arch/x86/mm.c Wed May 27 03:40:38 2009 +0800 @@ -110,6 > +110,7 @@ #include <asm/hypercall.h> > #include <asm/shared.h> > #include <public/memory.h> > +#include <public/sched.h> > #include <xsm/xsm.h> > #include <xen/trace.h> > > @@ -2990,7 +2991,8 @@ int do_mmu_update( > struct page_info *page; > int rc = 0, okay = 1, i = 0; > unsigned int cmd, done = 0; > - struct domain *d = current->domain; > + struct domain *d = current->domain, *pt_owner = NULL; > + struct vcpu *v = current; > struct domain_mmap_cache mapcache; > > if ( unlikely(count & MMU_UPDATE_PREEMPTED) ) > @@ -3051,10 +3053,35 @@ int do_mmu_update( > gmfn = req.ptr >> PAGE_SHIFT; > mfn = gmfn_to_mfn(d, gmfn); > > - if ( unlikely(!get_page_from_pagenr(mfn, d)) ) > + if (!mfn_valid(mfn)) > + mfn = gmfn_to_mfn(FOREIGNDOM, gmfn); > + if (!mfn_valid(mfn)) > { > MEM_LOG("Could not get page for normal update"); > break; + } > + > + pt_owner > page_get_owner_and_reference(mfn_to_page(mfn)); + + if ( > pt_owner != d ) + { > + if ( pt_owner == FOREIGNDOM ) > + { > + spin_lock(&FOREIGNDOM->shutdown_lock); > + if ( !IS_PRIV(d) || > + !FOREIGNDOM->is_shut_down || > + (FOREIGNDOM->shutdown_code !> SHUTDOWN_suspend) ) + { > + spin_unlock(&FOREIGNDOM->shutdown_lock); > + rc = -EPERM; > + break; > + } > + v = FOREIGNDOM->vcpu[0]; > + }else > + { > + rc = -EPERM; > + break; > + } > } > > va = map_domain_page_with_cache(mfn, &mapcache); > @@ -3070,24 +3097,21 @@ int do_mmu_update( > { > l1_pgentry_t l1e = l1e_from_intpte(req.val); > okay = mod_l1_entry(va, l1e, mfn, > - cmd => MMU_PT_UPDATE_PRESERVE_AD, > - current); > + cmd => MMU_PT_UPDATE_PRESERVE_AD, v); > } > break; > case PGT_l2_page_table: > { > l2_pgentry_t l2e = l2e_from_intpte(req.val); > okay = mod_l2_entry(va, l2e, mfn, > - cmd => MMU_PT_UPDATE_PRESERVE_AD, > - current); > + cmd => MMU_PT_UPDATE_PRESERVE_AD, v); > } > break; > case PGT_l3_page_table: > { > l3_pgentry_t l3e = l3e_from_intpte(req.val); > rc = mod_l3_entry(va, l3e, mfn, > - cmd => MMU_PT_UPDATE_PRESERVE_AD, 1, > - current); > + cmd => MMU_PT_UPDATE_PRESERVE_AD, 1, v); > okay = !rc; > } > break; > @@ -3096,8 +3120,7 @@ int do_mmu_update( > { > l4_pgentry_t l4e = l4e_from_intpte(req.val); > rc = mod_l4_entry(va, l4e, mfn, > - cmd => MMU_PT_UPDATE_PRESERVE_AD, 1, > - current); > + cmd => MMU_PT_UPDATE_PRESERVE_AD, 1, v); > okay = !rc; > } > break; > @@ -3105,7 +3128,7 @@ int do_mmu_update( > case PGT_writable_page: > perfc_incr(writable_mmu_updates); > okay = paging_write_guest_entry( > - current, va, req.val, _mfn(mfn)); > + v, va, req.val, _mfn(mfn)); > break; } > page_unlock(page); > @@ -3116,11 +3139,13 @@ int do_mmu_update( > { > perfc_incr(writable_mmu_updates); > okay = paging_write_guest_entry( > - current, va, req.val, _mfn(mfn)); > + v, va, req.val, _mfn(mfn)); > put_page_type(page); > } > > unmap_domain_page_with_cache(va, &mapcache); > + if (pt_owner != d) > + spin_unlock(&FOREIGNDOM->shutdown_lock); > put_page(page); break; > > diff -r f6457425560b xen/include/public/xen.h > --- a/xen/include/public/xen.h Wed May 27 03:22:43 2009 +0800 > +++ b/xen/include/public/xen.h Wed May 27 03:26:46 2009 +0800 > @@ -170,6 +170,8 @@ DEFINE_XEN_GUEST_HANDLE(xen_pfn_t); > * table entry is valid/present, the mapped frame must belong > to the FD, if > * an FD has been specified. If attempting to map an I/O page then the > * caller assumes the privilege of the FD. > + * The page table entry normally belongs to the calling > domain. Otherwise it > + * should belong to the FD and the FD should be suspended already > * FD == DOMID_IO: Permit /only/ I/O mappings, at the priv > level of the caller. > * FD == DOMID_XEN: Map restricted areas of Xen''s heap space. > * ptr[:2] -- Machine address of the page-table entry to modify._______________________________________________ Xen-devel mailing list Xen-devel@lists.xensource.com http://lists.xensource.com/xen-devel
Keir Fraser
2009-Jun-01 07:53 UTC
Re: [Xen-devel] [PATCH 6/6] Change MMU_PT_UPDATE_RESERVE_AD to support update page table for foreign domain
I''d pack an extra domid into the top 16 bits of the foreigndom parameter to mmu_update(). Bottom 16 bits remain foreign owner of data page. Upper 16 bits, if non-zero, are foreign owner of pt page (we could make this field +1 so that dom0 can be encoded as well). -- Keir On 01/06/2009 07:24, "Jiang, Yunhong" <yunhong.jiang@intel.com> wrote:> I want to clarify that this patch need more discussion because there is no > clear way to distinguish if the page table address (i.e. address passed in > req[:2] ) is owned by the current domain or by the foreign domain. So I just > try to check if mfn_valid(), but I suspect if this is the right method. > > Thanks > Yunhong Jiang > > xen-devel-bounces@lists.xensource.com wrote: >> foreign domain >> >> Currently MMU_PT_UPDATE_RESERVE_AD support only update page >> table for current domain. This patch add support for foreign >> domain also. >> >> Signed-off-by: Jiang, Yunhong <yunhong.jiang@intel.com> Update >> the page table update hypercall >> >> diff -r f6457425560b xen/arch/x86/mm.c >> --- a/xen/arch/x86/mm.c Wed May 27 03:22:43 2009 +0800 >> +++ b/xen/arch/x86/mm.c Wed May 27 03:40:38 2009 +0800 @@ -110,6 >> +110,7 @@ #include <asm/hypercall.h> >> #include <asm/shared.h> >> #include <public/memory.h> >> +#include <public/sched.h> >> #include <xsm/xsm.h> >> #include <xen/trace.h> >> >> @@ -2990,7 +2991,8 @@ int do_mmu_update( >> struct page_info *page; >> int rc = 0, okay = 1, i = 0; >> unsigned int cmd, done = 0; >> - struct domain *d = current->domain; >> + struct domain *d = current->domain, *pt_owner = NULL; >> + struct vcpu *v = current; >> struct domain_mmap_cache mapcache; >> >> if ( unlikely(count & MMU_UPDATE_PREEMPTED) ) >> @@ -3051,10 +3053,35 @@ int do_mmu_update( >> gmfn = req.ptr >> PAGE_SHIFT; >> mfn = gmfn_to_mfn(d, gmfn); >> >> - if ( unlikely(!get_page_from_pagenr(mfn, d)) ) >> + if (!mfn_valid(mfn)) >> + mfn = gmfn_to_mfn(FOREIGNDOM, gmfn); >> + if (!mfn_valid(mfn)) >> { >> MEM_LOG("Could not get page for normal update"); >> break; + } >> + >> + pt_owner >> page_get_owner_and_reference(mfn_to_page(mfn)); + + if ( >> pt_owner != d ) + { >> + if ( pt_owner == FOREIGNDOM ) >> + { >> + spin_lock(&FOREIGNDOM->shutdown_lock); >> + if ( !IS_PRIV(d) || >> + !FOREIGNDOM->is_shut_down || >> + (FOREIGNDOM->shutdown_code !>> SHUTDOWN_suspend) ) + { >> + spin_unlock(&FOREIGNDOM->shutdown_lock); >> + rc = -EPERM; >> + break; >> + } >> + v = FOREIGNDOM->vcpu[0]; >> + }else >> + { >> + rc = -EPERM; >> + break; >> + } >> } >> >> va = map_domain_page_with_cache(mfn, &mapcache); >> @@ -3070,24 +3097,21 @@ int do_mmu_update( >> { >> l1_pgentry_t l1e = l1e_from_intpte(req.val); >> okay = mod_l1_entry(va, l1e, mfn, >> - cmd =>> MMU_PT_UPDATE_PRESERVE_AD, >> - current); >> + cmd =>> MMU_PT_UPDATE_PRESERVE_AD, v); >> } >> break; >> case PGT_l2_page_table: >> { >> l2_pgentry_t l2e = l2e_from_intpte(req.val); >> okay = mod_l2_entry(va, l2e, mfn, >> - cmd =>> MMU_PT_UPDATE_PRESERVE_AD, >> - current); >> + cmd =>> MMU_PT_UPDATE_PRESERVE_AD, v); >> } >> break; >> case PGT_l3_page_table: >> { >> l3_pgentry_t l3e = l3e_from_intpte(req.val); >> rc = mod_l3_entry(va, l3e, mfn, >> - cmd =>> MMU_PT_UPDATE_PRESERVE_AD, 1, >> - current); >> + cmd =>> MMU_PT_UPDATE_PRESERVE_AD, 1, v); >> okay = !rc; >> } >> break; >> @@ -3096,8 +3120,7 @@ int do_mmu_update( >> { >> l4_pgentry_t l4e = l4e_from_intpte(req.val); >> rc = mod_l4_entry(va, l4e, mfn, >> - cmd =>> MMU_PT_UPDATE_PRESERVE_AD, 1, >> - current); >> + cmd =>> MMU_PT_UPDATE_PRESERVE_AD, 1, v); >> okay = !rc; >> } >> break; >> @@ -3105,7 +3128,7 @@ int do_mmu_update( >> case PGT_writable_page: >> perfc_incr(writable_mmu_updates); >> okay = paging_write_guest_entry( >> - current, va, req.val, _mfn(mfn)); >> + v, va, req.val, _mfn(mfn)); >> break; } >> page_unlock(page); >> @@ -3116,11 +3139,13 @@ int do_mmu_update( >> { >> perfc_incr(writable_mmu_updates); >> okay = paging_write_guest_entry( >> - current, va, req.val, _mfn(mfn)); >> + v, va, req.val, _mfn(mfn)); >> put_page_type(page); >> } >> >> unmap_domain_page_with_cache(va, &mapcache); >> + if (pt_owner != d) >> + spin_unlock(&FOREIGNDOM->shutdown_lock); >> put_page(page); break; >> >> diff -r f6457425560b xen/include/public/xen.h >> --- a/xen/include/public/xen.h Wed May 27 03:22:43 2009 +0800 >> +++ b/xen/include/public/xen.h Wed May 27 03:26:46 2009 +0800 >> @@ -170,6 +170,8 @@ DEFINE_XEN_GUEST_HANDLE(xen_pfn_t); >> * table entry is valid/present, the mapped frame must belong >> to the FD, if >> * an FD has been specified. If attempting to map an I/O page then the >> * caller assumes the privilege of the FD. >> + * The page table entry normally belongs to the calling >> domain. Otherwise it >> + * should belong to the FD and the FD should be suspended already >> * FD == DOMID_IO: Permit /only/ I/O mappings, at the priv >> level of the caller. >> * FD == DOMID_XEN: Map restricted areas of Xen''s heap space. >> * ptr[:2] -- Machine address of the page-table entry to modify._______________________________________________ Xen-devel mailing list Xen-devel@lists.xensource.com http://lists.xensource.com/xen-devel
Jiang, Yunhong
2009-Jun-01 08:40 UTC
RE: [Xen-devel] [PATCH 6/6] Change MMU_PT_UPDATE_RESERVE_AD to support update page table for foreign domain
Thanks for suggestion. I''m always nervous on API changes. I''m still considering if any other potential usage mode for patch 5/6l (i.e. change page table or exchange memory for other domain),, but frustratedly realize no other requirement. Thanks -- jyh xen-devel-bounces@lists.xensource.com wrote:> I''d pack an extra domid into the top 16 bits of the foreigndom > parameter to mmu_update(). Bottom 16 bits remain foreign owner of data > page. Upper 16 > bits, if non-zero, are foreign owner of pt page (we could make > this field +1 > so that dom0 can be encoded as well). > > -- Keir > > On 01/06/2009 07:24, "Jiang, Yunhong" <yunhong.jiang@intel.com> wrote: > >> I want to clarify that this patch need more discussion because there >> is no clear way to distinguish if the page table address (i.e. >> address passed in req[:2] ) is owned by the current domain or by the >> foreign domain. So I just try to check if mfn_valid(), but I suspect >> if this is the right method. >> >> Thanks >> Yunhong Jiang >> >> xen-devel-bounces@lists.xensource.com wrote: >>> foreign domain >>> >>> Currently MMU_PT_UPDATE_RESERVE_AD support only update page >>> table for current domain. This patch add support for foreign domain >>> also. >>> >>> Signed-off-by: Jiang, Yunhong <yunhong.jiang@intel.com> Update >>> the page table update hypercall >>> >>> diff -r f6457425560b xen/arch/x86/mm.c >>> --- a/xen/arch/x86/mm.c Wed May 27 03:22:43 2009 +0800 >>> +++ b/xen/arch/x86/mm.c Wed May 27 03:40:38 2009 +0800 @@ -110,6 >>> +110,7 @@ #include <asm/hypercall.h> >>> #include <asm/shared.h> >>> #include <public/memory.h> >>> +#include <public/sched.h> >>> #include <xsm/xsm.h> >>> #include <xen/trace.h> >>> >>> @@ -2990,7 +2991,8 @@ int do_mmu_update( >>> struct page_info *page; >>> int rc = 0, okay = 1, i = 0; >>> unsigned int cmd, done = 0; >>> - struct domain *d = current->domain; >>> + struct domain *d = current->domain, *pt_owner = NULL; >>> + struct vcpu *v = current; >>> struct domain_mmap_cache mapcache; >>> >>> if ( unlikely(count & MMU_UPDATE_PREEMPTED) ) >>> @@ -3051,10 +3053,35 @@ int do_mmu_update( >>> gmfn = req.ptr >> PAGE_SHIFT; >>> mfn = gmfn_to_mfn(d, gmfn); >>> >>> - if ( unlikely(!get_page_from_pagenr(mfn, d)) ) >>> + if (!mfn_valid(mfn)) >>> + mfn = gmfn_to_mfn(FOREIGNDOM, gmfn); >>> + if (!mfn_valid(mfn)) >>> { >>> MEM_LOG("Could not get page for normal update"); >>> break; + } + >>> + pt_owner >>> page_get_owner_and_reference(mfn_to_page(mfn)); + + if ( >>> pt_owner != d ) + { >>> + if ( pt_owner == FOREIGNDOM ) >>> + { >>> + spin_lock(&FOREIGNDOM->shutdown_lock); >>> + if ( !IS_PRIV(d) || >>> + !FOREIGNDOM->is_shut_down || >>> + (FOREIGNDOM->shutdown_code !>>> SHUTDOWN_suspend) ) + { >>> + spin_unlock(&FOREIGNDOM->shutdown_lock); >>> + rc = -EPERM; >>> + break; >>> + } >>> + v = FOREIGNDOM->vcpu[0]; >>> + }else >>> + { >>> + rc = -EPERM; >>> + break; >>> + } >>> } >>> >>> va = map_domain_page_with_cache(mfn, &mapcache); >>> @@ -3070,24 +3097,21 @@ int do_mmu_update( >>> { >>> l1_pgentry_t l1e = l1e_from_intpte(req.val); >>> okay = mod_l1_entry(va, l1e, mfn, >>> - cmd =>>> MMU_PT_UPDATE_PRESERVE_AD, >>> - current); >>> + cmd =>>> MMU_PT_UPDATE_PRESERVE_AD, v); >>> } >>> break; >>> case PGT_l2_page_table: >>> { >>> l2_pgentry_t l2e = l2e_from_intpte(req.val); >>> okay = mod_l2_entry(va, l2e, mfn, >>> - cmd =>>> MMU_PT_UPDATE_PRESERVE_AD, >>> - current); >>> + cmd =>>> MMU_PT_UPDATE_PRESERVE_AD, v); >>> } >>> break; >>> case PGT_l3_page_table: >>> { >>> l3_pgentry_t l3e = l3e_from_intpte(req.val); >>> rc = mod_l3_entry(va, l3e, mfn, >>> - cmd =>>> MMU_PT_UPDATE_PRESERVE_AD, 1, >>> - current); >>> + cmd =>>> MMU_PT_UPDATE_PRESERVE_AD, 1, v); >>> okay = !rc; >>> } >>> break; >>> @@ -3096,8 +3120,7 @@ int do_mmu_update( >>> { >>> l4_pgentry_t l4e = l4e_from_intpte(req.val); >>> rc = mod_l4_entry(va, l4e, mfn, >>> - cmd =>>> MMU_PT_UPDATE_PRESERVE_AD, 1, >>> - current); >>> + cmd =>>> MMU_PT_UPDATE_PRESERVE_AD, 1, v); >>> okay = !rc; >>> } >>> break; >>> @@ -3105,7 +3128,7 @@ int do_mmu_update( >>> case PGT_writable_page: >>> perfc_incr(writable_mmu_updates); >>> okay = paging_write_guest_entry( >>> - current, va, req.val, _mfn(mfn)); >>> + v, va, req.val, _mfn(mfn)); >>> break; } page_unlock(page); >>> @@ -3116,11 +3139,13 @@ int do_mmu_update( >>> { >>> perfc_incr(writable_mmu_updates); >>> okay = paging_write_guest_entry( >>> - current, va, req.val, _mfn(mfn)); >>> + v, va, req.val, _mfn(mfn)); >>> put_page_type(page); >>> } >>> >>> unmap_domain_page_with_cache(va, &mapcache); >>> + if (pt_owner != d) >>> + spin_unlock(&FOREIGNDOM->shutdown_lock); >>> put_page(page); break; >>> >>> diff -r f6457425560b xen/include/public/xen.h >>> --- a/xen/include/public/xen.h Wed May 27 03:22:43 2009 +0800 >>> +++ b/xen/include/public/xen.h Wed May 27 03:26:46 2009 +0800 >>> @@ -170,6 +170,8 @@ DEFINE_XEN_GUEST_HANDLE(xen_pfn_t); >>> * table entry is valid/present, the mapped frame must belong >>> to the FD, if >>> * an FD has been specified. If attempting to map an I/O page then >>> the >>> * caller assumes the privilege of the FD. >>> + * The page table entry normally belongs to the calling >>> domain. Otherwise it >>> + * should belong to the FD and the FD should be suspended already >>> * FD == DOMID_IO: Permit /only/ I/O mappings, at the priv >>> level of the caller. >>> * FD == DOMID_XEN: Map restricted areas of Xen''s heap space. >>> * ptr[:2] -- Machine address of the page-table entry to modify. > > > > _______________________________________________ > Xen-devel mailing list > Xen-devel@lists.xensource.com > http://lists.xensource.com/xen-devel_______________________________________________ Xen-devel mailing list Xen-devel@lists.xensource.com http://lists.xensource.com/xen-devel
Keir Fraser
2009-Jun-01 09:20 UTC
Re: [Xen-devel] [PATCH 6/6] Change MMU_PT_UPDATE_RESERVE_AD to support update page table for foreign domain
On 01/06/2009 09:40, "Jiang, Yunhong" <yunhong.jiang@intel.com> wrote:> Thanks for suggestion. I''m always nervous on API changes. > > I''m still considering if any other potential usage mode for patch 5/6l (i.e. > change page table or exchange memory for other domain),, but frustratedly > realize no other requirement.The API for XENMEM_exchange already permits specifying a foreign domain, so you''re just filling in the missing implementation. By the way I did not check your implementation, but the major subtlety is that you must take care for domains which are dying (d->is_dying). Giving new pages to such a domain is generally a bug because you race domain_relinquish_resources() which is walking the domain''s page lists and freeing the pages. You therefore risk leaving a zombie domain which will not die (refcount never zero). See for example how that is handled with page_alloc.c for calls such as XENMEM_populate_physmap, and how it is handled specially by MMUEXT_PIN_L*_TABLE. -- Keir _______________________________________________ Xen-devel mailing list Xen-devel@lists.xensource.com http://lists.xensource.com/xen-devel
Jiang, Yunhong
2009-Jun-01 09:35 UTC
RE: [Xen-devel] [PATCH 6/6] Change MMU_PT_UPDATE_RESERVE_AD to support update page table for foreign domain
Keir Fraser wrote:> On 01/06/2009 09:40, "Jiang, Yunhong" <yunhong.jiang@intel.com> wrote: > >> Thanks for suggestion. I''m always nervous on API changes. >> >> I''m still considering if any other potential usage mode for patch >> 5/6l (i.e. change page table or exchange memory for other domain),, >> but frustratedly realize no other requirement. > > The API for XENMEM_exchange already permits specifying a > foreign domain, so > you''re just filling in the missing implementation. By the way I did > not check your implementation, but the major subtlety is that you > must take care > for domains which are dying (d->is_dying). Giving new pages to > such a domain > is generally a bug because you race > domain_relinquish_resources() which is > walking the domain''s page lists and freeing the pages. You > therefore risk > leaving a zombie domain which will not die (refcount never zero). > > See for example how that is handled with page_alloc.c for calls such > as XENMEM_populate_physmap, and how it is handled specially by > MMUEXT_PIN_L*_TABLE. > > -- KeirI think I missed this logic, I just make sure the domain is suspended, but seems be wrong because a suspended domain can still be killed and relinquish resources. So I will add this logic and resend the patch. But I''m not sure if we need make sure the guest is suspended/paused during these functions, as exchange a page or pte in flight may cause trouble. Or we can leave it to user space tools, since this situation will not damage xen HV itself. Thanks Yunhong Jiang _______________________________________________ Xen-devel mailing list Xen-devel@lists.xensource.com http://lists.xensource.com/xen-devel
Keir Fraser
2009-Jun-01 09:45 UTC
Re: [Xen-devel] [PATCH 6/6] Change MMU_PT_UPDATE_RESERVE_AD to support update page table for foreign domain
On 01/06/2009 10:35, "Jiang, Yunhong" <yunhong.jiang@intel.com> wrote:> I think I missed this logic, I just make sure the domain is suspended, but > seems be wrong because a suspended domain can still be killed and relinquish > resources. So I will add this logic and resend the patch. > > But I''m not sure if we need make sure the guest is suspended/paused during > these functions, as exchange a page or pte in flight may cause trouble. Or we > can leave it to user space tools, since this situation will not damage xen HV > itself.The HV should not bother to do the suspend check. It is a higher-level problem which should be checked by the tools. -- Keir _______________________________________________ Xen-devel mailing list Xen-devel@lists.xensource.com http://lists.xensource.com/xen-devel
Jiang, Yunhong
2009-Jun-30 07:55 UTC
[PATCH] Enhance memory exchange to support foreign domain -- Was RE: [Xen-devel] [PATCH 6/6] Change MMU_PT_UPDATE_RESERVE_AD to support update page table for foreign domain
Keir, this is the patch based on the discussion before. Can you please have a
look on it? I can''t triger the corner case of domain being dying, so I
hope it can be achieved through code review.
Thanks
Yunhong Jiang
Extend memory_exchange to support exchange memory for foreign domain.
When domain is killed during the memory exchange, it will try to decrease
domain''s page count that has been removed from page_list.
Signed-off-by: Jiang, Yunhong <yunhong.jiang@intel.com>
diff -r 02003bee3e80 xen/common/memory.c
--- a/xen/common/memory.c Thu Jun 25 18:31:10 2009 +0100
+++ b/xen/common/memory.c Tue Jun 30 01:44:28 2009 +0800
@@ -265,16 +265,22 @@ static long memory_exchange(XEN_GUEST_HA
out_chunk_order = exch.in.extent_order - exch.out.extent_order;
}
- /*
- * Only support exchange on calling domain right now. Otherwise there are
- * tricky corner cases to consider (e.g., dying domain).
- */
- if ( unlikely(exch.in.domid != DOMID_SELF) )
- {
- rc = IS_PRIV(current->domain) ? -EINVAL : -EPERM;
- goto fail_early;
- }
- d = current->domain;
+ if ( likely(exch.in.domid == DOMID_SELF) )
+ {
+ d = rcu_lock_current_domain();
+ }
+ else
+ {
+ if ( (d = rcu_lock_domain_by_id(exch.in.domid)) == NULL )
+ goto fail_early;
+
+ if ( !IS_PRIV_FOR(current->domain, d) )
+ {
+ rcu_unlock_domain(d);
+ rc = -EPERM;
+ goto fail_early;
+ }
+ }
memflags |= MEMF_bits(domain_clamp_alloc_bitsize(
d,
@@ -292,11 +298,15 @@ static long memory_exchange(XEN_GUEST_HA
if ( hypercall_preempt_check() )
{
exch.nr_exchanged = i << in_chunk_order;
+ rcu_unlock_domain(d);
if ( copy_field_to_guest(arg, &exch, nr_exchanged) )
return -EFAULT;
return hypercall_create_continuation(
__HYPERVISOR_memory_op, "lh", XENMEM_exchange, arg);
}
+
+ if (d->is_dying)
+ goto dying;
/* Steal a chunk''s worth of input pages from the domain. */
for ( j = 0; j < (1UL << in_chunk_order); j++ )
@@ -360,9 +370,24 @@ static long memory_exchange(XEN_GUEST_HA
j = 0;
while ( (page = page_list_remove_head(&out_chunk_list)) )
{
- if ( assign_pages(d, page, exch.out.extent_order,
- MEMF_no_refcount) )
+ rc = (assign_pages(d, page, exch.out.extent_order,
+ MEMF_no_refcount));
+
+ if (rc == -EINVAL)
BUG();
+ /* Domain is being destroy */
+ else if (rc == -ENODEV)
+ {
+ int dec_count;
+ dec_count = ( ( 1UL << exch.in.extent_order)*
+ (1UL << in_chunk_order) -
+ j * (1UL << exch.out.extent_order));
+ d->tot_pages -= dec_count;
+
+ if (dec_count && !d->tot_pages)
+ put_domain(d);
+ break;
+ }
/* Note that we ignore errors accessing the output extent list. */
(void)__copy_from_guest_offset(
@@ -378,15 +403,15 @@ static long memory_exchange(XEN_GUEST_HA
(void)__copy_to_guest_offset(
exch.out.extent_start, (i<<out_chunk_order)+j,
&mfn, 1);
}
-
j++;
}
- BUG_ON(j != (1UL << out_chunk_order));
+ BUG_ON( !(d->is_dying) && (j != (1UL <<
out_chunk_order)) );
}
exch.nr_exchanged = exch.in.nr_extents;
if ( copy_field_to_guest(arg, &exch, nr_exchanged) )
rc = -EFAULT;
+ rcu_unlock_domain(d);
return rc;
/*
@@ -398,7 +423,8 @@ static long memory_exchange(XEN_GUEST_HA
while ( (page = page_list_remove_head(&in_chunk_list)) )
if ( assign_pages(d, page, 0, MEMF_no_refcount) )
BUG();
-
+ dying:
+ rcu_unlock_domain(d);
/* Free any output pages we managed to allocate. */
while ( (page = page_list_remove_head(&out_chunk_list)) )
free_domheap_pages(page, exch.out.extent_order);
diff -r 02003bee3e80 xen/common/page_alloc.c
--- a/xen/common/page_alloc.c Thu Jun 25 18:31:10 2009 +0100
+++ b/xen/common/page_alloc.c Sun Jun 28 23:39:28 2009 +0800
@@ -1120,6 +1120,7 @@ int assign_pages(
unsigned int memflags)
{
unsigned long i;
+ int rc = -EINVAL;
spin_lock(&d->page_alloc_lock);
@@ -1127,6 +1128,7 @@ int assign_pages(
{
gdprintk(XENLOG_INFO, "Cannot assign page to domain%d --
dying.\n",
d->domain_id);
+ rc = -ENODEV;
goto fail;
}
@@ -1136,6 +1138,7 @@ int assign_pages(
{
gdprintk(XENLOG_INFO, "Over-allocation for domain %u: %u >
%u\n",
d->domain_id, d->tot_pages + (1 << order),
d->max_pages);
+ rc = -EINVAL;
goto fail;
}
@@ -1160,7 +1163,7 @@ int assign_pages(
fail:
spin_unlock(&d->page_alloc_lock);
- return -1;
+ return rc;
}
Keir Fraser wrote:> On 01/06/2009 09:40, "Jiang, Yunhong"
<yunhong.jiang@intel.com> wrote:
>
>> Thanks for suggestion. I''m always nervous on API changes.
>>
>> I''m still considering if any other potential usage mode for
patch
>> 5/6l (i.e. change page table or exchange memory for other domain),,
>> but frustratedly realize no other requirement.
>
> The API for XENMEM_exchange already permits specifying a
> foreign domain, so
> you''re just filling in the missing implementation. By the way I
did
> not check your implementation, but the major subtlety is that you
> must take care
> for domains which are dying (d->is_dying). Giving new pages to
> such a domain
> is generally a bug because you race
> domain_relinquish_resources() which is
> walking the domain''s page lists and freeing the pages. You
> therefore risk
> leaving a zombie domain which will not die (refcount never zero).
>
> See for example how that is handled with page_alloc.c for calls such
> as XENMEM_populate_physmap, and how it is handled specially by
> MMUEXT_PIN_L*_TABLE.
>
> -- Keir
_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xensource.com
http://lists.xensource.com/xen-devel
Jiang, Yunhong
2009-Jun-30 07:58 UTC
RE: [Xen-devel] [PATCH 6/6] Change MMU_PT_UPDATE_RESERVE_AD to support update page table for foreign domain
Keir, this is the updated version, please have a look on it.
Thanks
Yunhong Jiang
Currently MMU_PT_UPDATE_RESERVE_AD support only update page table for current
domain. This patch add support for foreign domain also.
Signed-off-by: Jiang, Yunhong <yunhong.jiang@intel.com>
diff -r 9a74617c4a28 xen/arch/x86/mm.c
--- a/xen/arch/x86/mm.c Tue Jun 30 01:44:30 2009 +0800
+++ b/xen/arch/x86/mm.c Tue Jun 30 01:51:15 2009 +0800
@@ -110,6 +110,7 @@
#include <asm/hypercall.h>
#include <asm/shared.h>
#include <public/memory.h>
+#include <public/sched.h>
#include <xsm/xsm.h>
#include <xen/trace.h>
@@ -2999,8 +3000,9 @@ int do_mmu_update(
unsigned long gpfn, gmfn, mfn;
struct page_info *page;
int rc = 0, okay = 1, i = 0;
- unsigned int cmd, done = 0;
- struct domain *d = current->domain;
+ unsigned int cmd, done = 0, pt_dom;
+ struct domain *d = current->domain, *pt_owner = NULL;
+ struct vcpu *v = current;
struct domain_mmap_cache mapcache;
if ( unlikely(count & MMU_UPDATE_PREEMPTED) )
@@ -3018,7 +3020,28 @@ int do_mmu_update(
goto out;
}
- if ( !set_foreigndom(foreigndom) )
+ pt_dom = foreigndom >> 16;
+ if (pt_dom)
+ {
+ /* the page table belongs to foreign domain */
+ pt_owner = rcu_lock_domain_by_id(pt_dom - 1);
+ if (!pt_owner )
+ {
+ rc = -EINVAL;
+ goto out;
+ }
+ if( !IS_PRIV_FOR(d, pt_owner) )
+ {
+ rc = -ESRCH;
+ goto out;
+ }
+ if (pt_dom == d)
+ rcu_unlock_domain(pt_owner);
+ v = pt_owner->vcpu[0];
+ } else
+ pt_owner = d;
+
+ if ( !set_foreigndom(foreigndom & 0xFFFFU) )
{
rc = -ESRCH;
goto out;
@@ -3059,9 +3082,9 @@ int do_mmu_update(
req.ptr -= cmd;
gmfn = req.ptr >> PAGE_SHIFT;
- mfn = gmfn_to_mfn(d, gmfn);
-
- if ( unlikely(!get_page_from_pagenr(mfn, d)) )
+ mfn = gmfn_to_mfn(pt_owner, gmfn);
+
+ if ( unlikely(!get_page_from_pagenr(mfn, pt_owner)) )
{
MEM_LOG("Could not get page for normal update");
break;
@@ -3080,24 +3103,21 @@ int do_mmu_update(
{
l1_pgentry_t l1e = l1e_from_intpte(req.val);
okay = mod_l1_entry(va, l1e, mfn,
- cmd == MMU_PT_UPDATE_PRESERVE_AD,
- current);
+ cmd == MMU_PT_UPDATE_PRESERVE_AD, v);
}
break;
case PGT_l2_page_table:
{
l2_pgentry_t l2e = l2e_from_intpte(req.val);
okay = mod_l2_entry(va, l2e, mfn,
- cmd == MMU_PT_UPDATE_PRESERVE_AD,
- current);
+ cmd == MMU_PT_UPDATE_PRESERVE_AD, v);
}
break;
case PGT_l3_page_table:
{
l3_pgentry_t l3e = l3e_from_intpte(req.val);
rc = mod_l3_entry(va, l3e, mfn,
- cmd == MMU_PT_UPDATE_PRESERVE_AD, 1,
- current);
+ cmd == MMU_PT_UPDATE_PRESERVE_AD, 1, v);
okay = !rc;
}
break;
@@ -3106,8 +3126,7 @@ int do_mmu_update(
{
l4_pgentry_t l4e = l4e_from_intpte(req.val);
rc = mod_l4_entry(va, l4e, mfn,
- cmd == MMU_PT_UPDATE_PRESERVE_AD, 1,
- current);
+ cmd == MMU_PT_UPDATE_PRESERVE_AD, 1, v);
okay = !rc;
}
break;
@@ -3115,7 +3134,7 @@ int do_mmu_update(
case PGT_writable_page:
perfc_incr(writable_mmu_updates);
okay = paging_write_guest_entry(
- current, va, req.val, _mfn(mfn));
+ v, va, req.val, _mfn(mfn));
break;
}
page_unlock(page);
@@ -3126,7 +3145,7 @@ int do_mmu_update(
{
perfc_incr(writable_mmu_updates);
okay = paging_write_guest_entry(
- current, va, req.val, _mfn(mfn));
+ v, va, req.val, _mfn(mfn));
put_page_type(page);
}
@@ -3191,6 +3210,9 @@ int do_mmu_update(
perfc_add(num_page_updates, i);
out:
+ if (pt_owner && (pt_owner != d))
+ rcu_unlock_domain(pt_owner);
+
/* Add incremental work we have done to the @done output parameter. */
if ( unlikely(!guest_handle_is_null(pdone)) )
{
diff -r 9a74617c4a28 xen/include/public/xen.h
--- a/xen/include/public/xen.h Tue Jun 30 01:44:30 2009 +0800
+++ b/xen/include/public/xen.h Tue Jun 30 01:44:33 2009 +0800
@@ -166,10 +166,14 @@ DEFINE_XEN_GUEST_HANDLE(xen_pfn_t);
* ptr[1:0] specifies the appropriate MMU_* command.
*
* ptr[1:0] == MMU_NORMAL_PT_UPDATE:
- * Updates an entry in a page table. If updating an L1 table, and the new
+ * Updates an entry in a page table.
+ * The page table can be owned by current domain, or a foreign domain. If
+ * the page table belongs to a foreign domain, it should be specified in
+ * upper 16 bits in FD
+ * If updating an L1 table, and the new
* table entry is valid/present, the mapped frame must belong to the FD, if
- * an FD has been specified. If attempting to map an I/O page then the
- * caller assumes the privilege of the FD.
+ * an foreign domain specified in lower 16 btis in FD. If attempting to map
+ * an I/O page then the caller assumes the privilege of the FD.
* FD == DOMID_IO: Permit /only/ I/O mappings, at the priv level of the caller.
* FD == DOMID_XEN: Map restricted areas of Xen''s heap space.
* ptr[:2] -- Machine address of the page-table entry to modify.
Keir Fraser wrote:> On 01/06/2009 10:35, "Jiang, Yunhong"
<yunhong.jiang@intel.com> wrote:
>
>> I think I missed this logic, I just make sure the domain is
>> suspended, but seems be wrong because a suspended domain can still
>> be killed and relinquish resources. So I will add this logic and
>> resend the patch.
>>
>> But I''m not sure if we need make sure the guest is
suspended/paused
>> during these functions, as exchange a page or pte in flight may
>> cause trouble. Or we can leave it to user space tools, since this
>> situation will not damage xen HV itself.
>
> The HV should not bother to do the suspend check. It is a higher-level
> problem which should be checked by the tools.
>
> -- Keir
_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xensource.com
http://lists.xensource.com/xen-devel
Keir Fraser
2009-Jun-30 09:17 UTC
Re: [PATCH] Enhance memory exchange to support foreign domain -- Was RE: [Xen-devel] [PATCH 6/6] Change MMU_PT_UPDATE_RESERVE_AD to support update page table for foreign domain
Perhaps try dropping calls to domain_kill() into memory_exchange()? It may not complete the destroy, but it will set ->is_dying and trigger your error paths. Then you can ''xm destroy'' from the tools afterwards to try to finish up the destroy, and see if you get left with a zombie or not. Looking at this patch I can immediately see that: A. Your extra check(s) of is_dying are pointless. Only the check in assign_pages() is critical. So just leave it to that. B. Your adjustment of tot_pages is confusing. I can''t convince myself the arithmetic is correct. Furthermore messing with tot_pages outside of the d->page_alloc_lock is not allowed. -- Keir On 30/06/2009 08:55, "Jiang, Yunhong" <yunhong.jiang@intel.com> wrote:> Keir, this is the patch based on the discussion before. Can you please have a > look on it? I can''t triger the corner case of domain being dying, so I hope it > can be achieved through code review. > > Thanks > Yunhong Jiang > > Extend memory_exchange to support exchange memory for foreign domain. > When domain is killed during the memory exchange, it will try to decrease > domain''s page count that has been removed from page_list. > > Signed-off-by: Jiang, Yunhong <yunhong.jiang@intel.com> > > diff -r 02003bee3e80 xen/common/memory.c > --- a/xen/common/memory.c Thu Jun 25 18:31:10 2009 +0100 > +++ b/xen/common/memory.c Tue Jun 30 01:44:28 2009 +0800 > @@ -265,16 +265,22 @@ static long memory_exchange(XEN_GUEST_HA > out_chunk_order = exch.in.extent_order - exch.out.extent_order; > } > > - /* > - * Only support exchange on calling domain right now. Otherwise there are > - * tricky corner cases to consider (e.g., dying domain). > - */ > - if ( unlikely(exch.in.domid != DOMID_SELF) ) > - { > - rc = IS_PRIV(current->domain) ? -EINVAL : -EPERM; > - goto fail_early; > - } > - d = current->domain; > + if ( likely(exch.in.domid == DOMID_SELF) ) > + { > + d = rcu_lock_current_domain(); > + } > + else > + { > + if ( (d = rcu_lock_domain_by_id(exch.in.domid)) == NULL ) > + goto fail_early; > + > + if ( !IS_PRIV_FOR(current->domain, d) ) > + { > + rcu_unlock_domain(d); > + rc = -EPERM; > + goto fail_early; > + } > + } > > memflags |= MEMF_bits(domain_clamp_alloc_bitsize( > d, > @@ -292,11 +298,15 @@ static long memory_exchange(XEN_GUEST_HA > if ( hypercall_preempt_check() ) > { > exch.nr_exchanged = i << in_chunk_order; > + rcu_unlock_domain(d); > if ( copy_field_to_guest(arg, &exch, nr_exchanged) ) > return -EFAULT; > return hypercall_create_continuation( > __HYPERVISOR_memory_op, "lh", XENMEM_exchange, arg); > } > + > + if (d->is_dying) > + goto dying; > > /* Steal a chunk''s worth of input pages from the domain. */ > for ( j = 0; j < (1UL << in_chunk_order); j++ ) > @@ -360,9 +370,24 @@ static long memory_exchange(XEN_GUEST_HA > j = 0; > while ( (page = page_list_remove_head(&out_chunk_list)) ) > { > - if ( assign_pages(d, page, exch.out.extent_order, > - MEMF_no_refcount) ) > + rc = (assign_pages(d, page, exch.out.extent_order, > + MEMF_no_refcount)); > + > + if (rc == -EINVAL) > BUG(); > + /* Domain is being destroy */ > + else if (rc == -ENODEV) > + { > + int dec_count; > + dec_count = ( ( 1UL << exch.in.extent_order)* > + (1UL << in_chunk_order) - > + j * (1UL << exch.out.extent_order)); > + d->tot_pages -= dec_count; > + > + if (dec_count && !d->tot_pages) > + put_domain(d); > + break; > + } > > /* Note that we ignore errors accessing the output extent list. > */ > (void)__copy_from_guest_offset( > @@ -378,15 +403,15 @@ static long memory_exchange(XEN_GUEST_HA > (void)__copy_to_guest_offset( > exch.out.extent_start, (i<<out_chunk_order)+j, &mfn, 1); > } > - > j++; > } > - BUG_ON(j != (1UL << out_chunk_order)); > + BUG_ON( !(d->is_dying) && (j != (1UL << out_chunk_order)) ); > } > > exch.nr_exchanged = exch.in.nr_extents; > if ( copy_field_to_guest(arg, &exch, nr_exchanged) ) > rc = -EFAULT; > + rcu_unlock_domain(d); > return rc; > > /* > @@ -398,7 +423,8 @@ static long memory_exchange(XEN_GUEST_HA > while ( (page = page_list_remove_head(&in_chunk_list)) ) > if ( assign_pages(d, page, 0, MEMF_no_refcount) ) > BUG(); > - > + dying: > + rcu_unlock_domain(d); > /* Free any output pages we managed to allocate. */ > while ( (page = page_list_remove_head(&out_chunk_list)) ) > free_domheap_pages(page, exch.out.extent_order); > diff -r 02003bee3e80 xen/common/page_alloc.c > --- a/xen/common/page_alloc.c Thu Jun 25 18:31:10 2009 +0100 > +++ b/xen/common/page_alloc.c Sun Jun 28 23:39:28 2009 +0800 > @@ -1120,6 +1120,7 @@ int assign_pages( > unsigned int memflags) > { > unsigned long i; > + int rc = -EINVAL; > > spin_lock(&d->page_alloc_lock); > > @@ -1127,6 +1128,7 @@ int assign_pages( > { > gdprintk(XENLOG_INFO, "Cannot assign page to domain%d -- dying.\n", > d->domain_id); > + rc = -ENODEV; > goto fail; > } > > @@ -1136,6 +1138,7 @@ int assign_pages( > { > gdprintk(XENLOG_INFO, "Over-allocation for domain %u: %u > %u\n", > d->domain_id, d->tot_pages + (1 << order), d->max_pages); > + rc = -EINVAL; > goto fail; > } > > @@ -1160,7 +1163,7 @@ int assign_pages( > > fail: > spin_unlock(&d->page_alloc_lock); > - return -1; > + return rc; > } > > > > > Keir Fraser wrote: >> On 01/06/2009 09:40, "Jiang, Yunhong" <yunhong.jiang@intel.com> wrote: >> >>> Thanks for suggestion. I''m always nervous on API changes. >>> >>> I''m still considering if any other potential usage mode for patch >>> 5/6l (i.e. change page table or exchange memory for other domain),, >>> but frustratedly realize no other requirement. >> >> The API for XENMEM_exchange already permits specifying a >> foreign domain, so >> you''re just filling in the missing implementation. By the way I did >> not check your implementation, but the major subtlety is that you >> must take care >> for domains which are dying (d->is_dying). Giving new pages to >> such a domain >> is generally a bug because you race >> domain_relinquish_resources() which is >> walking the domain''s page lists and freeing the pages. You >> therefore risk >> leaving a zombie domain which will not die (refcount never zero). >> >> See for example how that is handled with page_alloc.c for calls such >> as XENMEM_populate_physmap, and how it is handled specially by >> MMUEXT_PIN_L*_TABLE. >> >> -- Keir_______________________________________________ Xen-devel mailing list Xen-devel@lists.xensource.com http://lists.xensource.com/xen-devel
Jiang, Yunhong
2009-Jun-30 09:40 UTC
RE: [PATCH] Enhance memory exchange to support foreign domain -- Was RE: [Xen-devel] [PATCH 6/6] Change MMU_PT_UPDATE_RESERVE_AD to support update page table for foreign domain
Keir Fraser wrote:> Perhaps try dropping calls to domain_kill() into > memory_exchange()? It may > not complete the destroy, but it will set ->is_dying and > trigger your error > paths. Then you can ''xm destroy'' from the tools afterwards to > try to finish > up the destroy, and see if you get left with a zombie or not.It will cover some case, but not all. But yees, I can try that.> > Looking at this patch I can immediately see that: > A. Your extra check(s) of is_dying are pointless. Only the check in > assign_pages() is critical. So just leave it to that.I add that to avoid meaningless loop if the domain is dying. But yes it is not important because of low probability.> B. Your adjustment of tot_pages is confusing. I can''t > convince myself the > arithmetic is correct. Furthermore messing with tot_pages > outside of the > d->page_alloc_lock is not allowed.The idea of the adjustment is: In each loop, we remove some pages (the in_chunk_list) without decrease the tot_pages. Then when we get domain is dying when assign pages (the out_chunk_list), we need decrease the count. For those page that has been assigned, it should be covered by domain_relinquish_resources(), so what we need decrease is: the number that has been removed - the number that has been assigned already. Yes, I need keep the page_alloc_lock for tot_pages.> > -- Keir > > On 30/06/2009 08:55, "Jiang, Yunhong" <yunhong.jiang@intel.com> wrote: > >> Keir, this is the patch based on the discussion before. Can you >> please have a look on it? I can''t triger the corner case of domain >> being dying, so I hope it can be achieved through code review. >> >> Thanks >> Yunhong Jiang >> >> Extend memory_exchange to support exchange memory for foreign domain. >> When domain is killed during the memory exchange, it will try to >> decrease domain''s page count that has been removed from page_list. >> >> Signed-off-by: Jiang, Yunhong <yunhong.jiang@intel.com> >> >> diff -r 02003bee3e80 xen/common/memory.c >> --- a/xen/common/memory.c Thu Jun 25 18:31:10 2009 +0100 >> +++ b/xen/common/memory.c Tue Jun 30 01:44:28 2009 +0800 >> @@ -265,16 +265,22 @@ static long memory_exchange(XEN_GUEST_HA >> out_chunk_order = exch.in.extent_order - >> exch.out.extent_order; } >> >> - /* >> - * Only support exchange on calling domain right now. Otherwise >> there are >> - * tricky corner cases to consider (e.g., dying domain). >> - */ >> - if ( unlikely(exch.in.domid != DOMID_SELF) ) >> - { >> - rc = IS_PRIV(current->domain) ? -EINVAL : -EPERM; >> - goto fail_early; >> - } >> - d = current->domain; >> + if ( likely(exch.in.domid == DOMID_SELF) ) >> + { >> + d = rcu_lock_current_domain(); >> + } >> + else >> + { >> + if ( (d = rcu_lock_domain_by_id(exch.in.domid)) == NULL ) + >> goto fail_early; + >> + if ( !IS_PRIV_FOR(current->domain, d) ) >> + { >> + rcu_unlock_domain(d); >> + rc = -EPERM; >> + goto fail_early; >> + } >> + } >> >> memflags |= MEMF_bits(domain_clamp_alloc_bitsize( d, >> @@ -292,11 +298,15 @@ static long memory_exchange(XEN_GUEST_HA >> if ( hypercall_preempt_check() ) >> { >> exch.nr_exchanged = i << in_chunk_order; >> + rcu_unlock_domain(d); >> if ( copy_field_to_guest(arg, &exch, nr_exchanged) ) >> return -EFAULT; >> return hypercall_create_continuation( >> __HYPERVISOR_memory_op, "lh", XENMEM_exchange, >> arg); } + >> + if (d->is_dying) >> + goto dying; >> >> /* Steal a chunk''s worth of input pages from the domain. */ >> for ( j = 0; j < (1UL << in_chunk_order); j++ ) >> @@ -360,9 +370,24 @@ static long memory_exchange(XEN_GUEST_HA >> j = 0; while ( (page >> page_list_remove_head(&out_chunk_list)) ) { >> - if ( assign_pages(d, page, exch.out.extent_order, >> - MEMF_no_refcount) ) >> + rc = (assign_pages(d, page, exch.out.extent_order, >> + MEMF_no_refcount)); >> + >> + if (rc == -EINVAL) >> BUG(); >> + /* Domain is being destroy */ >> + else if (rc == -ENODEV) >> + { >> + int dec_count; >> + dec_count = ( ( 1UL << exch.in.extent_order)* >> + (1UL << in_chunk_order) - >> + j * (1UL << exch.out.extent_order)); >> + d->tot_pages -= dec_count; >> + >> + if (dec_count && !d->tot_pages) >> + put_domain(d); >> + break; >> + } >> >> /* Note that we ignore errors accessing the output >> extent list. */ (void)__copy_from_guest_offset( >> @@ -378,15 +403,15 @@ static long memory_exchange(XEN_GUEST_HA >> (void)__copy_to_guest_offset( >> exch.out.extent_start, > (i<<out_chunk_order)+j, &mfn, 1); >> } >> - >> j++; >> } >> - BUG_ON(j != (1UL << out_chunk_order)); >> + BUG_ON( !(d->is_dying) && (j != (1UL << out_chunk_order)) >> ); } >> >> exch.nr_exchanged = exch.in.nr_extents; >> if ( copy_field_to_guest(arg, &exch, nr_exchanged) ) >> rc = -EFAULT; + rcu_unlock_domain(d); >> return rc; >> >> /* >> @@ -398,7 +423,8 @@ static long memory_exchange(XEN_GUEST_HA >> while ( (page = page_list_remove_head(&in_chunk_list)) ) >> if ( assign_pages(d, page, 0, MEMF_no_refcount) ) >> BUG(); - >> + dying: >> + rcu_unlock_domain(d); >> /* Free any output pages we managed to allocate. */ >> while ( (page = page_list_remove_head(&out_chunk_list)) ) >> free_domheap_pages(page, exch.out.extent_order); >> diff -r 02003bee3e80 xen/common/page_alloc.c >> --- a/xen/common/page_alloc.c Thu Jun 25 18:31:10 2009 +0100 >> +++ b/xen/common/page_alloc.c Sun Jun 28 23:39:28 2009 +0800 >> @@ -1120,6 +1120,7 @@ int assign_pages( >> unsigned int memflags) >> { >> unsigned long i; >> + int rc = -EINVAL; >> >> spin_lock(&d->page_alloc_lock); >> >> @@ -1127,6 +1128,7 @@ int assign_pages( >> { >> gdprintk(XENLOG_INFO, "Cannot assign page to domain%d -- >> dying.\n", d->domain_id); >> + rc = -ENODEV; >> goto fail; >> } >> >> @@ -1136,6 +1138,7 @@ int assign_pages( >> { >> gdprintk(XENLOG_INFO, "Over-allocation for domain %u: >> %u > %u\n", d->domain_id, d->tot_pages + (1 << >> order), d->max_pages); + rc = -EINVAL; goto >> fail; } >> >> @@ -1160,7 +1163,7 @@ int assign_pages( >> >> fail: >> spin_unlock(&d->page_alloc_lock); >> - return -1; >> + return rc; >> } >> >> >> >> >> Keir Fraser wrote: >>> On 01/06/2009 09:40, "Jiang, Yunhong" > <yunhong.jiang@intel.com> wrote: >>> >>>> Thanks for suggestion. I''m always nervous on API changes. >>>> >>>> I''m still considering if any other potential usage mode for patch >>>> 5/6l (i.e. change page table or exchange memory for other domain),, >>>> but frustratedly realize no other requirement. >>> >>> The API for XENMEM_exchange already permits specifying a >>> foreign domain, so >>> you''re just filling in the missing implementation. By the way I did >>> not check your implementation, but the major subtlety is that you >>> must take care for domains which are dying (d->is_dying). Giving >>> new pages to >>> such a domain >>> is generally a bug because you race >>> domain_relinquish_resources() which is >>> walking the domain''s page lists and freeing the pages. You >>> therefore risk leaving a zombie domain which will not die (refcount >>> never zero). >>> >>> See for example how that is handled with page_alloc.c for calls such >>> as XENMEM_populate_physmap, and how it is handled specially by >>> MMUEXT_PIN_L*_TABLE. >>> >>> -- Keir_______________________________________________ Xen-devel mailing list Xen-devel@lists.xensource.com http://lists.xensource.com/xen-devel
Keir Fraser
2009-Jun-30 10:40 UTC
Re: [PATCH] Enhance memory exchange to support foreign domain -- Was RE: [Xen-devel] [PATCH 6/6] Change MMU_PT_UPDATE_RESERVE_AD to support update page table for foreign domain
On 30/06/2009 10:40, "Jiang, Yunhong" <yunhong.jiang@intel.com> wrote:>> B. Your adjustment of tot_pages is confusing. I can''t >> convince myself the >> arithmetic is correct. Furthermore messing with tot_pages >> outside of the >> d->page_alloc_lock is not allowed. > > The idea of the adjustment is: > > In each loop, we remove some pages (the in_chunk_list) without decrease the > tot_pages. Then when we get domain is dying when assign pages (the > out_chunk_list), we need decrease the count. For those page that has been > assigned, it should be covered by domain_relinquish_resources(), so what we > need decrease is: > the number that has been removed - the number that has been assigned already.There''s still quite a logical leap to the arithmetic in your code. Spell it out in a comment. -- Keir _______________________________________________ Xen-devel mailing list Xen-devel@lists.xensource.com http://lists.xensource.com/xen-devel
Keir Fraser
2009-Jun-30 14:59 UTC
Re: [Xen-devel] [PATCH 6/6] Change MMU_PT_UPDATE_RESERVE_AD to support update page table for foreign domain
This doesn''t even build. You compare a pointer with an integer. -- Keir On 30/06/2009 08:58, "Jiang, Yunhong" <yunhong.jiang@intel.com> wrote:> Keir, this is the updated version, please have a look on it. > > Thanks > Yunhong Jiang > > Currently MMU_PT_UPDATE_RESERVE_AD support only update page table for current > domain. This patch add support for foreign domain also. > > Signed-off-by: Jiang, Yunhong <yunhong.jiang@intel.com> > > diff -r 9a74617c4a28 xen/arch/x86/mm.c > --- a/xen/arch/x86/mm.c Tue Jun 30 01:44:30 2009 +0800 > +++ b/xen/arch/x86/mm.c Tue Jun 30 01:51:15 2009 +0800 > @@ -110,6 +110,7 @@ > #include <asm/hypercall.h> > #include <asm/shared.h> > #include <public/memory.h> > +#include <public/sched.h> > #include <xsm/xsm.h> > #include <xen/trace.h> > > @@ -2999,8 +3000,9 @@ int do_mmu_update( > unsigned long gpfn, gmfn, mfn; > struct page_info *page; > int rc = 0, okay = 1, i = 0; > - unsigned int cmd, done = 0; > - struct domain *d = current->domain; > + unsigned int cmd, done = 0, pt_dom; > + struct domain *d = current->domain, *pt_owner = NULL; > + struct vcpu *v = current; > struct domain_mmap_cache mapcache; > > if ( unlikely(count & MMU_UPDATE_PREEMPTED) ) > @@ -3018,7 +3020,28 @@ int do_mmu_update( > goto out; > } > > - if ( !set_foreigndom(foreigndom) ) > + pt_dom = foreigndom >> 16; > + if (pt_dom) > + { > + /* the page table belongs to foreign domain */ > + pt_owner = rcu_lock_domain_by_id(pt_dom - 1); > + if (!pt_owner ) > + { > + rc = -EINVAL; > + goto out; > + } > + if( !IS_PRIV_FOR(d, pt_owner) ) > + { > + rc = -ESRCH; > + goto out; > + } > + if (pt_dom == d) > + rcu_unlock_domain(pt_owner); > + v = pt_owner->vcpu[0]; > + } else > + pt_owner = d; > + > + if ( !set_foreigndom(foreigndom & 0xFFFFU) ) > { > rc = -ESRCH; > goto out; > @@ -3059,9 +3082,9 @@ int do_mmu_update( > > req.ptr -= cmd; > gmfn = req.ptr >> PAGE_SHIFT; > - mfn = gmfn_to_mfn(d, gmfn); > - > - if ( unlikely(!get_page_from_pagenr(mfn, d)) ) > + mfn = gmfn_to_mfn(pt_owner, gmfn); > + > + if ( unlikely(!get_page_from_pagenr(mfn, pt_owner)) ) > { > MEM_LOG("Could not get page for normal update"); > break; > @@ -3080,24 +3103,21 @@ int do_mmu_update( > { > l1_pgentry_t l1e = l1e_from_intpte(req.val); > okay = mod_l1_entry(va, l1e, mfn, > - cmd == MMU_PT_UPDATE_PRESERVE_AD, > - current); > + cmd == MMU_PT_UPDATE_PRESERVE_AD, v); > } > break; > case PGT_l2_page_table: > { > l2_pgentry_t l2e = l2e_from_intpte(req.val); > okay = mod_l2_entry(va, l2e, mfn, > - cmd == MMU_PT_UPDATE_PRESERVE_AD, > - current); > + cmd == MMU_PT_UPDATE_PRESERVE_AD, v); > } > break; > case PGT_l3_page_table: > { > l3_pgentry_t l3e = l3e_from_intpte(req.val); > rc = mod_l3_entry(va, l3e, mfn, > - cmd == MMU_PT_UPDATE_PRESERVE_AD, 1, > - current); > + cmd == MMU_PT_UPDATE_PRESERVE_AD, 1, > v); > okay = !rc; > } > break; > @@ -3106,8 +3126,7 @@ int do_mmu_update( > { > l4_pgentry_t l4e = l4e_from_intpte(req.val); > rc = mod_l4_entry(va, l4e, mfn, > - cmd == MMU_PT_UPDATE_PRESERVE_AD, 1, > - current); > + cmd == MMU_PT_UPDATE_PRESERVE_AD, 1, > v); > okay = !rc; > } > break; > @@ -3115,7 +3134,7 @@ int do_mmu_update( > case PGT_writable_page: > perfc_incr(writable_mmu_updates); > okay = paging_write_guest_entry( > - current, va, req.val, _mfn(mfn)); > + v, va, req.val, _mfn(mfn)); > break; > } > page_unlock(page); > @@ -3126,7 +3145,7 @@ int do_mmu_update( > { > perfc_incr(writable_mmu_updates); > okay = paging_write_guest_entry( > - current, va, req.val, _mfn(mfn)); > + v, va, req.val, _mfn(mfn)); > put_page_type(page); > } > > @@ -3191,6 +3210,9 @@ int do_mmu_update( > perfc_add(num_page_updates, i); > > out: > + if (pt_owner && (pt_owner != d)) > + rcu_unlock_domain(pt_owner); > + > /* Add incremental work we have done to the @done output parameter. */ > if ( unlikely(!guest_handle_is_null(pdone)) ) > { > diff -r 9a74617c4a28 xen/include/public/xen.h > --- a/xen/include/public/xen.h Tue Jun 30 01:44:30 2009 +0800 > +++ b/xen/include/public/xen.h Tue Jun 30 01:44:33 2009 +0800 > @@ -166,10 +166,14 @@ DEFINE_XEN_GUEST_HANDLE(xen_pfn_t); > * ptr[1:0] specifies the appropriate MMU_* command. > * > * ptr[1:0] == MMU_NORMAL_PT_UPDATE: > - * Updates an entry in a page table. If updating an L1 table, and the new > + * Updates an entry in a page table. > + * The page table can be owned by current domain, or a foreign domain. If > + * the page table belongs to a foreign domain, it should be specified in > + * upper 16 bits in FD > + * If updating an L1 table, and the new > * table entry is valid/present, the mapped frame must belong to the FD, if > - * an FD has been specified. If attempting to map an I/O page then the > - * caller assumes the privilege of the FD. > + * an foreign domain specified in lower 16 btis in FD. If attempting to map > + * an I/O page then the caller assumes the privilege of the FD. > * FD == DOMID_IO: Permit /only/ I/O mappings, at the priv level of the > caller. > * FD == DOMID_XEN: Map restricted areas of Xen''s heap space. > * ptr[:2] -- Machine address of the page-table entry to modify. > > > Keir Fraser wrote: >> On 01/06/2009 10:35, "Jiang, Yunhong" <yunhong.jiang@intel.com> wrote: >> >>> I think I missed this logic, I just make sure the domain is >>> suspended, but seems be wrong because a suspended domain can still >>> be killed and relinquish resources. So I will add this logic and >>> resend the patch. >>> >>> But I''m not sure if we need make sure the guest is suspended/paused >>> during these functions, as exchange a page or pte in flight may >>> cause trouble. Or we can leave it to user space tools, since this >>> situation will not damage xen HV itself. >> >> The HV should not bother to do the suspend check. It is a higher-level >> problem which should be checked by the tools. >> >> -- Keir_______________________________________________ Xen-devel mailing list Xen-devel@lists.xensource.com http://lists.xensource.com/xen-devel
Jiang, Yunhong
2009-Jul-02 06:26 UTC
RE: [Xen-devel] [PATCH 6/6] Change MMU_PT_UPDATE_RESERVE_AD to support update page table for foreign domain
Sorry I forgot the last qref before building. I found missing this rcu_unlock
before sending, but wrongly changed as pt_dom == d. I changed that when found
build error but forgot qref.
Attached is the new one, please have a look.
Thanks
Yunhong Jiang
Currently MMU_PT_UPDATE_RESERVE_AD support only update page table for current
domain. This patch add support for foreign domain also.
Signed-off-by: Jiang, Yunhong <yunhong.jiang@intel.com>
diff -r 9a74617c4a28 xen/arch/x86/mm.c
--- a/xen/arch/x86/mm.c Tue Jun 30 01:44:30 2009 +0800
+++ b/xen/arch/x86/mm.c Tue Jun 30 19:02:38 2009 +0800
@@ -110,6 +110,7 @@
#include <asm/hypercall.h>
#include <asm/shared.h>
#include <public/memory.h>
+#include <public/sched.h>
#include <xsm/xsm.h>
#include <xen/trace.h>
@@ -2999,8 +3000,9 @@ int do_mmu_update(
unsigned long gpfn, gmfn, mfn;
struct page_info *page;
int rc = 0, okay = 1, i = 0;
- unsigned int cmd, done = 0;
- struct domain *d = current->domain;
+ unsigned int cmd, done = 0, pt_dom;
+ struct domain *d = current->domain, *pt_owner = NULL;
+ struct vcpu *v = current;
struct domain_mmap_cache mapcache;
if ( unlikely(count & MMU_UPDATE_PREEMPTED) )
@@ -3018,7 +3020,28 @@ int do_mmu_update(
goto out;
}
- if ( !set_foreigndom(foreigndom) )
+ pt_dom = foreigndom >> 16;
+ if (pt_dom)
+ {
+ /* the page table belongs to foreign domain */
+ pt_owner = rcu_lock_domain_by_id(pt_dom - 1);
+ if (!pt_owner )
+ {
+ rc = -EINVAL;
+ goto out;
+ }
+ if( !IS_PRIV_FOR(d, pt_owner) )
+ {
+ rc = -ESRCH;
+ goto out;
+ }
+ if (pt_owner == d)
+ rcu_unlock_domain(pt_owner);
+ v = pt_owner->vcpu[0];
+ } else
+ pt_owner = d;
+
+ if ( !set_foreigndom(foreigndom & 0xFFFFU) )
{
rc = -ESRCH;
goto out;
@@ -3059,9 +3082,9 @@ int do_mmu_update(
req.ptr -= cmd;
gmfn = req.ptr >> PAGE_SHIFT;
- mfn = gmfn_to_mfn(d, gmfn);
-
- if ( unlikely(!get_page_from_pagenr(mfn, d)) )
+ mfn = gmfn_to_mfn(pt_owner, gmfn);
+
+ if ( unlikely(!get_page_from_pagenr(mfn, pt_owner)) )
{
MEM_LOG("Could not get page for normal update");
break;
@@ -3080,24 +3103,21 @@ int do_mmu_update(
{
l1_pgentry_t l1e = l1e_from_intpte(req.val);
okay = mod_l1_entry(va, l1e, mfn,
- cmd == MMU_PT_UPDATE_PRESERVE_AD,
- current);
+ cmd == MMU_PT_UPDATE_PRESERVE_AD, v);
}
break;
case PGT_l2_page_table:
{
l2_pgentry_t l2e = l2e_from_intpte(req.val);
okay = mod_l2_entry(va, l2e, mfn,
- cmd == MMU_PT_UPDATE_PRESERVE_AD,
- current);
+ cmd == MMU_PT_UPDATE_PRESERVE_AD, v);
}
break;
case PGT_l3_page_table:
{
l3_pgentry_t l3e = l3e_from_intpte(req.val);
rc = mod_l3_entry(va, l3e, mfn,
- cmd == MMU_PT_UPDATE_PRESERVE_AD, 1,
- current);
+ cmd == MMU_PT_UPDATE_PRESERVE_AD, 1, v);
okay = !rc;
}
break;
@@ -3106,8 +3126,7 @@ int do_mmu_update(
{
l4_pgentry_t l4e = l4e_from_intpte(req.val);
rc = mod_l4_entry(va, l4e, mfn,
- cmd == MMU_PT_UPDATE_PRESERVE_AD, 1,
- current);
+ cmd == MMU_PT_UPDATE_PRESERVE_AD, 1, v);
okay = !rc;
}
break;
@@ -3115,7 +3134,7 @@ int do_mmu_update(
case PGT_writable_page:
perfc_incr(writable_mmu_updates);
okay = paging_write_guest_entry(
- current, va, req.val, _mfn(mfn));
+ v, va, req.val, _mfn(mfn));
break;
}
page_unlock(page);
@@ -3126,7 +3145,7 @@ int do_mmu_update(
{
perfc_incr(writable_mmu_updates);
okay = paging_write_guest_entry(
- current, va, req.val, _mfn(mfn));
+ v, va, req.val, _mfn(mfn));
put_page_type(page);
}
@@ -3191,6 +3210,9 @@ int do_mmu_update(
perfc_add(num_page_updates, i);
out:
+ if (pt_owner && (pt_owner != d))
+ rcu_unlock_domain(pt_owner);
+
/* Add incremental work we have done to the @done output parameter. */
if ( unlikely(!guest_handle_is_null(pdone)) )
{
diff -r 9a74617c4a28 xen/include/public/xen.h
--- a/xen/include/public/xen.h Tue Jun 30 01:44:30 2009 +0800
+++ b/xen/include/public/xen.h Tue Jun 30 01:44:33 2009 +0800
@@ -166,10 +166,14 @@ DEFINE_XEN_GUEST_HANDLE(xen_pfn_t);
* ptr[1:0] specifies the appropriate MMU_* command.
*
* ptr[1:0] == MMU_NORMAL_PT_UPDATE:
- * Updates an entry in a page table. If updating an L1 table, and the new
+ * Updates an entry in a page table.
+ * The page table can be owned by current domain, or a foreign domain. If
+ * the page table belongs to a foreign domain, it should be specified in
+ * upper 16 bits in FD
+ * If updating an L1 table, and the new
* table entry is valid/present, the mapped frame must belong to the FD, if
- * an FD has been specified. If attempting to map an I/O page then the
- * caller assumes the privilege of the FD.
+ * an foreign domain specified in lower 16 btis in FD. If attempting to map
+ * an I/O page then the caller assumes the privilege of the FD.
* FD == DOMID_IO: Permit /only/ I/O mappings, at the priv level of the caller.
* FD == DOMID_XEN: Map restricted areas of Xen''s heap space.
* ptr[:2] -- Machine address of the page-table entry to modify.
Keir Fraser wrote:> This doesn''t even build. You compare a pointer with an integer.
>
> -- Keir
>
> On 30/06/2009 08:58, "Jiang, Yunhong"
<yunhong.jiang@intel.com> wrote:
>
>> Keir, this is the updated version, please have a look on it.
>>
>> Thanks
>> Yunhong Jiang
>>
>> Currently MMU_PT_UPDATE_RESERVE_AD support only update page table
>> for current domain. This patch add support for foreign domain also.
>>
>> Signed-off-by: Jiang, Yunhong <yunhong.jiang@intel.com>
>>
>> diff -r 9a74617c4a28 xen/arch/x86/mm.c
>> --- a/xen/arch/x86/mm.c Tue Jun 30 01:44:30 2009 +0800
>> +++ b/xen/arch/x86/mm.c Tue Jun 30 01:51:15 2009 +0800 @@ -110,6
>> +110,7 @@ #include <asm/hypercall.h>
>> #include <asm/shared.h>
>> #include <public/memory.h>
>> +#include <public/sched.h>
>> #include <xsm/xsm.h>
>> #include <xen/trace.h>
>>
>> @@ -2999,8 +3000,9 @@ int do_mmu_update(
>> unsigned long gpfn, gmfn, mfn;
>> struct page_info *page;
>> int rc = 0, okay = 1, i = 0;
>> - unsigned int cmd, done = 0;
>> - struct domain *d = current->domain;
>> + unsigned int cmd, done = 0, pt_dom;
>> + struct domain *d = current->domain, *pt_owner = NULL;
>> + struct vcpu *v = current;
>> struct domain_mmap_cache mapcache;
>>
>> if ( unlikely(count & MMU_UPDATE_PREEMPTED) )
>> @@ -3018,7 +3020,28 @@ int do_mmu_update(
>> goto out;
>> }
>>
>> - if ( !set_foreigndom(foreigndom) )
>> + pt_dom = foreigndom >> 16;
>> + if (pt_dom)
>> + {
>> + /* the page table belongs to foreign domain */
>> + pt_owner = rcu_lock_domain_by_id(pt_dom - 1); + if
>> (!pt_owner ) + {
>> + rc = -EINVAL;
>> + goto out;
>> + }
>> + if( !IS_PRIV_FOR(d, pt_owner) )
>> + {
>> + rc = -ESRCH;
>> + goto out;
>> + }
>> + if (pt_dom == d)
>> + rcu_unlock_domain(pt_owner);
>> + v = pt_owner->vcpu[0];
>> + } else
>> + pt_owner = d;
>> +
>> + if ( !set_foreigndom(foreigndom & 0xFFFFU) )
>> {
>> rc = -ESRCH;
>> goto out;
>> @@ -3059,9 +3082,9 @@ int do_mmu_update(
>>
>> req.ptr -= cmd;
>> gmfn = req.ptr >> PAGE_SHIFT;
>> - mfn = gmfn_to_mfn(d, gmfn);
>> -
>> - if ( unlikely(!get_page_from_pagenr(mfn, d)) )
>> + mfn = gmfn_to_mfn(pt_owner, gmfn);
>> +
>> + if ( unlikely(!get_page_from_pagenr(mfn, pt_owner)) )
>> { MEM_LOG("Could not get page for normal
update");
>> break; @@ -3080,24 +3103,21 @@ int do_mmu_update(
>> {
>> l1_pgentry_t l1e = l1e_from_intpte(req.val);
>> okay = mod_l1_entry(va, l1e, mfn,
>> - cmd =>>
MMU_PT_UPDATE_PRESERVE_AD,
>> - current);
>> + cmd =>>
MMU_PT_UPDATE_PRESERVE_AD, v); }
>> break; case PGT_l2_page_table:
>> {
>> l2_pgentry_t l2e = l2e_from_intpte(req.val);
>> okay = mod_l2_entry(va, l2e, mfn,
>> - cmd =>>
MMU_PT_UPDATE_PRESERVE_AD,
>> - current);
>> + cmd =>>
MMU_PT_UPDATE_PRESERVE_AD, v); }
>> break; case PGT_l3_page_table:
>> {
>> l3_pgentry_t l3e = l3e_from_intpte(req.val);
>> rc = mod_l3_entry(va, l3e, mfn,
>> - cmd =>>
MMU_PT_UPDATE_PRESERVE_AD, 1,
>> - current);
>> + cmd =>>
MMU_PT_UPDATE_PRESERVE_AD, 1, v); okay = !rc;
>> }
>> break;
>> @@ -3106,8 +3126,7 @@ int do_mmu_update(
>> {
>> l4_pgentry_t l4e = l4e_from_intpte(req.val);
>> rc = mod_l4_entry(va, l4e, mfn,
>> - cmd =>>
MMU_PT_UPDATE_PRESERVE_AD, 1,
>> - current);
>> + cmd =>>
MMU_PT_UPDATE_PRESERVE_AD, 1, v); okay = !rc;
>> }
>> break;
>> @@ -3115,7 +3134,7 @@ int do_mmu_update(
>> case PGT_writable_page:
>> perfc_incr(writable_mmu_updates);
>> okay = paging_write_guest_entry(
>> - current, va, req.val, _mfn(mfn));
>> + v, va, req.val, _mfn(mfn));
>> break; }
>> page_unlock(page);
>> @@ -3126,7 +3145,7 @@ int do_mmu_update(
>> {
>> perfc_incr(writable_mmu_updates);
>> okay = paging_write_guest_entry(
>> - current, va, req.val, _mfn(mfn));
>> + v, va, req.val, _mfn(mfn));
>> put_page_type(page);
>> }
>>
>> @@ -3191,6 +3210,9 @@ int do_mmu_update(
>> perfc_add(num_page_updates, i);
>>
>> out:
>> + if (pt_owner && (pt_owner != d))
>> + rcu_unlock_domain(pt_owner);
>> +
>> /* Add incremental work we have done to the @done output
>> parameter. */ if ( unlikely(!guest_handle_is_null(pdone)) )
>> {
>> diff -r 9a74617c4a28 xen/include/public/xen.h
>> --- a/xen/include/public/xen.h Tue Jun 30 01:44:30 2009 +0800
>> +++ b/xen/include/public/xen.h Tue Jun 30 01:44:33 2009 +0800
>> @@ -166,10 +166,14 @@ DEFINE_XEN_GUEST_HANDLE(xen_pfn_t);
>> * ptr[1:0] specifies the appropriate MMU_* command. *
>> * ptr[1:0] == MMU_NORMAL_PT_UPDATE:
>> - * Updates an entry in a page table. If updating an L1 table, and
>> the new + * Updates an entry in a page table.
>> + * The page table can be owned by current domain, or a foreign
>> domain. If + * the page table belongs to a foreign domain, it should
>> be specified in + * upper 16 bits in FD + * If updating an L1 table,
>> and the new * table entry is valid/present, the mapped frame must
>> belong to the FD, if
>> - * an FD has been specified. If attempting to map an I/O page then
>> the
>> - * caller assumes the privilege of the FD.
>> + * an foreign domain specified in lower 16 btis in FD. If
>> attempting to map + * an I/O page then the caller assumes the
>> privilege of the FD.
>> * FD == DOMID_IO: Permit /only/ I/O mappings, at the priv level of
>> the caller.
>> * FD == DOMID_XEN: Map restricted areas of Xen''s heap space.
>> * ptr[:2] -- Machine address of the page-table entry to modify.
>>
>>
>> Keir Fraser wrote:
>>> On 01/06/2009 10:35, "Jiang, Yunhong"
> <yunhong.jiang@intel.com> wrote:
>>>
>>>> I think I missed this logic, I just make sure the domain is
>>>> suspended, but seems be wrong because a suspended domain can
still
>>>> be killed and relinquish resources. So I will add this logic
and
>>>> resend the patch.
>>>>
>>>> But I''m not sure if we need make sure the guest is
suspended/paused
>>>> during these functions, as exchange a page or pte in flight may
>>>> cause trouble. Or we can leave it to user space tools, since
this
>>>> situation will not damage xen HV itself.
>>>
>>> The HV should not bother to do the suspend check. It is a
>>> higher-level problem which should be checked by the tools.
>>>
>>> -- Keir
_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xensource.com
http://lists.xensource.com/xen-devel
Jiang, Yunhong
2009-Jul-02 06:38 UTC
RE: [PATCH] Enhance memory exchange to support foreign domain -- Was RE: [Xen-devel] [PATCH 6/6] Change MMU_PT_UPDATE_RESERVE_AD to support update page table for foreign domain
Attached is the updated patch. I tried to call domain_kill() duing
memory_exchange, and do hit the situation that assign_pages will fail because of
domain dying. The domain can be killed cleanly still, no zombie domain left.
Changes from previous submission: a) lock protection for tot_pages, b) free the
page that has been moved out of the out_chunk_list.
Thanks
Yunhong Jiang
diff -r 02003bee3e80 xen/common/memory.c
--- a/xen/common/memory.c Thu Jun 25 18:31:10 2009 +0100
+++ b/xen/common/memory.c Wed Jul 01 19:55:23 2009 +0800
@@ -265,16 +265,22 @@ static long memory_exchange(XEN_GUEST_HA
out_chunk_order = exch.in.extent_order - exch.out.extent_order;
}
- /*
- * Only support exchange on calling domain right now. Otherwise there are
- * tricky corner cases to consider (e.g., dying domain).
- */
- if ( unlikely(exch.in.domid != DOMID_SELF) )
- {
- rc = IS_PRIV(current->domain) ? -EINVAL : -EPERM;
- goto fail_early;
- }
- d = current->domain;
+ if ( likely(exch.in.domid == DOMID_SELF) )
+ {
+ d = rcu_lock_current_domain();
+ }
+ else
+ {
+ if ( (d = rcu_lock_domain_by_id(exch.in.domid)) == NULL )
+ goto fail_early;
+
+ if ( !IS_PRIV_FOR(current->domain, d) )
+ {
+ rcu_unlock_domain(d);
+ rc = -EPERM;
+ goto fail_early;
+ }
+ }
memflags |= MEMF_bits(domain_clamp_alloc_bitsize(
d,
@@ -292,6 +298,7 @@ static long memory_exchange(XEN_GUEST_HA
if ( hypercall_preempt_check() )
{
exch.nr_exchanged = i << in_chunk_order;
+ rcu_unlock_domain(d);
if ( copy_field_to_guest(arg, &exch, nr_exchanged) )
return -EFAULT;
return hypercall_create_continuation(
@@ -360,9 +367,39 @@ static long memory_exchange(XEN_GUEST_HA
j = 0;
while ( (page = page_list_remove_head(&out_chunk_list)) )
{
- if ( assign_pages(d, page, exch.out.extent_order,
- MEMF_no_refcount) )
+ rc = (assign_pages(d, page, exch.out.extent_order,
+ MEMF_no_refcount));
+
+ if (rc == -EINVAL)
BUG();
+ /* Domain is being destroy */
+ else if (rc == -ENODEV)
+ {
+ int dec_count, drop_dom_ref = 0;
+
+ /*
+ * Pages in in_chunk_list is stolen without
+ * decreasing the tot_pages. If the domain is dying when
+ * assign pages, we need decrease the count. For those pages
+ * that has been assigned, it should be covered by
+ * domain_relinquish_resources().
+ */
+ dec_count = ( ( 1UL << exch.in.extent_order)*
+ (1UL << in_chunk_order) -
+ j * (1UL << exch.out.extent_order));
+
+ spin_lock(&d->page_alloc_lock);
+ d->tot_pages -= dec_count;
+ if (dec_count && !d->tot_pages)
+ drop_dom_ref = 1;
+ spin_unlock(&d->page_alloc_lock);
+
+ if (drop_dom_ref)
+ put_domain(d);
+
+ free_domheap_pages(page, exch.out.extent_order);
+ goto dying;
+ }
/* Note that we ignore errors accessing the output extent list. */
(void)__copy_from_guest_offset(
@@ -378,15 +415,15 @@ static long memory_exchange(XEN_GUEST_HA
(void)__copy_to_guest_offset(
exch.out.extent_start, (i<<out_chunk_order)+j,
&mfn, 1);
}
-
j++;
}
- BUG_ON(j != (1UL << out_chunk_order));
+ BUG_ON( !(d->is_dying) && (j != (1UL <<
out_chunk_order)) );
}
exch.nr_exchanged = exch.in.nr_extents;
if ( copy_field_to_guest(arg, &exch, nr_exchanged) )
rc = -EFAULT;
+ rcu_unlock_domain(d);
return rc;
/*
@@ -398,7 +435,8 @@ static long memory_exchange(XEN_GUEST_HA
while ( (page = page_list_remove_head(&in_chunk_list)) )
if ( assign_pages(d, page, 0, MEMF_no_refcount) )
BUG();
-
+ dying:
+ rcu_unlock_domain(d);
/* Free any output pages we managed to allocate. */
while ( (page = page_list_remove_head(&out_chunk_list)) )
free_domheap_pages(page, exch.out.extent_order);
diff -r 02003bee3e80 xen/common/page_alloc.c
--- a/xen/common/page_alloc.c Thu Jun 25 18:31:10 2009 +0100
+++ b/xen/common/page_alloc.c Tue Jun 30 23:33:47 2009 +0800
@@ -1120,6 +1120,7 @@ int assign_pages(
unsigned int memflags)
{
unsigned long i;
+ int rc = -EINVAL;
spin_lock(&d->page_alloc_lock);
@@ -1127,6 +1128,7 @@ int assign_pages(
{
gdprintk(XENLOG_INFO, "Cannot assign page to domain%d --
dying.\n",
d->domain_id);
+ rc = -ENODEV;
goto fail;
}
@@ -1136,6 +1138,7 @@ int assign_pages(
{
gdprintk(XENLOG_INFO, "Over-allocation for domain %u: %u >
%u\n",
d->domain_id, d->tot_pages + (1 << order),
d->max_pages);
+ rc = -EINVAL;
goto fail;
}
@@ -1160,7 +1163,7 @@ int assign_pages(
fail:
spin_unlock(&d->page_alloc_lock);
- return -1;
+ return rc;
}
Keir Fraser wrote:> On 30/06/2009 10:40, "Jiang, Yunhong"
<yunhong.jiang@intel.com> wrote:
>
>>> B. Your adjustment of tot_pages is confusing. I can''t
convince
>>> myself the arithmetic is correct. Furthermore messing with
>>> tot_pages outside of the d->page_alloc_lock is not allowed.
>>
>> The idea of the adjustment is:
>>
>> In each loop, we remove some pages (the in_chunk_list) without
>> decrease the tot_pages. Then when we get domain is dying when assign
>> pages (the out_chunk_list), we need decrease the count. For those
>> page that has been assigned, it should be covered by
> domain_relinquish_resources(), so what we
>> need decrease is:
>> the number that has been removed - the number that has been assigned
>> already.
>
> There''s still quite a logical leap to the arithmetic in your
> code. Spell it
> out in a comment.
>
> -- Keir
_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xensource.com
http://lists.xensource.com/xen-devel