Jan Beulich
2008-Nov-03 07:42 UTC
[Xen-devel] [PATCH] x86: simplify page reference handling for partially (in-)validated pages
As suggested by Keir, simplify general page reference management for preempted (partiall [in-]validated) pages: Reserve on reference that can be acquired without the risk of overflowing the reference count, thus allowing to have a simplified get_page() equivalent that cannot fail (but must be used with care). Doing this conversion pointed out a latent issue in the changes done previously in this area: The extra reference must be acquired before the ''normal'' reference gets dropped, so the patch fixes this at once in both the alloc_page_type() and free_page_type() paths (it''s really only the latter that failed to work with the change described above). Signed-off-by: Jan Beulich <jbeulich@novell.com> Index: 2008-10-27/xen/arch/x86/mm.c ==================================================================--- 2008-10-27.orig/xen/arch/x86/mm.c 2008-10-29 16:46:52.000000000 +0100 +++ 2008-10-27/xen/arch/x86/mm.c 2008-10-31 16:13:56.000000000 +0100 @@ -1856,7 +1856,8 @@ int get_page(struct page_info *page, str nx = x + 1; d = nd; if ( unlikely((x & PGC_count_mask) == 0) || /* Not allocated? */ - unlikely((nx & PGC_count_mask) == 0) || /* Count overflow? */ + /* Keep one spare reference to be acquired by get_page_light(). */ + unlikely(((nx + 1) & PGC_count_mask) <= 1) || /* Overflow? */ unlikely(d != _domain) ) /* Wrong owner? */ { if ( !_shadow_mode_refcounts(domain) && !domain->is_dying ) @@ -1878,6 +1879,28 @@ int get_page(struct page_info *page, str return 1; } +/* + * Special version of get_page() to be used exclusively when + * - a page is known to already have a non-zero reference count + * - the page does not need its owner to be checked + * - it will not be called more than once without dropping the thus + * acquired reference again. + * Due to get_page() reserving one reference, this call cannot fail. + */ +static void get_page_light(struct page_info *page) +{ + u32 x, nx, y = page->count_info; + + do { + x = y; + nx = x + 1; + BUG_ON(!(x & PGC_count_mask)); /* Not allocated? */ + BUG_ON(!(nx & PGC_count_mask)); /* Overflow? */ + y = cmpxchg(&page->count_info, x, nx); + } + while ( unlikely(y != x) ); +} + static int alloc_page_type(struct page_info *page, unsigned long type, int preemptible) @@ -1885,10 +1908,6 @@ static int alloc_page_type(struct page_i struct domain *owner = page_get_owner(page); int rc; - /* Obtain an extra reference to retain if we set PGT_partial. */ - if ( preemptible && !get_page(page, owner) ) - return -EINVAL; - /* A page table is dirtied when its type count becomes non-zero. */ if ( likely(owner != NULL) ) paging_mark_dirty(owner, page_to_mfn(page)); @@ -1922,14 +1941,10 @@ static int alloc_page_type(struct page_i wmb(); if ( rc == -EAGAIN ) { + get_page_light(page); page->u.inuse.type_info |= PGT_partial; - return -EAGAIN; } - - if ( preemptible ) - put_page(page); - - if ( rc == -EINTR ) + else if ( rc == -EINTR ) { ASSERT((page->u.inuse.type_info & (PGT_count_mask|PGT_validated|PGT_partial)) == 1); @@ -2037,8 +2052,8 @@ static int __put_page_type_final(struct page->u.inuse.type_info--; break; case -EINTR: - ASSERT(!(page->u.inuse.type_info & - (PGT_count_mask|PGT_validated|PGT_partial))); + ASSERT((page->u.inuse.type_info & + (PGT_count_mask|PGT_validated|PGT_partial)) == 1); if ( !(shadow_mode_enabled(page_get_owner(page)) && (page->count_info & PGC_page_table)) ) page->tlbflush_timestamp = tlbflush_current_time(); @@ -2047,17 +2062,13 @@ static int __put_page_type_final(struct break; case -EAGAIN: wmb(); + get_page_light(page); page->u.inuse.type_info |= PGT_partial; - /* Must skip put_page() below. */ - preemptible = 0; break; default: BUG(); } - if ( preemptible ) - put_page(page); - return rc; } @@ -2066,10 +2077,7 @@ static int __put_page_type(struct page_i int preemptible) { unsigned long nx, x, y = page->u.inuse.type_info; - - /* Obtain an extra reference to retain if we set PGT_partial. */ - if ( preemptible && !get_page(page, page_get_owner(page)) ) - return -EINVAL; + int rc = 0; for ( ; ; ) { @@ -2092,10 +2100,11 @@ static int __put_page_type(struct page_i if ( unlikely((y = cmpxchg(&page->u.inuse.type_info, x, nx)) != x) ) continue; + /* We cleared the ''valid bit'' so we do the clean up. */ + rc = __put_page_type_final(page, x, preemptible); if ( x & PGT_partial ) put_page(page); - /* We cleared the ''valid bit'' so we do the clean up. */ - return __put_page_type_final(page, x, preemptible); + break; } /* @@ -2114,17 +2123,10 @@ static int __put_page_type(struct page_i break; if ( preemptible && hypercall_preempt_check() ) - { - if ( preemptible ) - put_page(page); return -EINTR; - } } - if ( preemptible ) - put_page(page); - - return 0; + return rc; } @@ -2132,6 +2134,7 @@ static int __get_page_type(struct page_i int preemptible) { unsigned long nx, x, y = page->u.inuse.type_info; + int rc = 0; ASSERT(!(type & ~(PGT_type_mask | PGT_pae_xen_l2))); @@ -2227,11 +2230,7 @@ static int __get_page_type(struct page_i } if ( likely((y = cmpxchg(&page->u.inuse.type_info, x, nx)) == x) ) - { - if ( (x & PGT_partial) && !(nx & PGT_partial) ) - put_page(page); break; - } if ( preemptible && hypercall_preempt_check() ) return -EINTR; @@ -2258,10 +2257,13 @@ static int __get_page_type(struct page_i page->nr_validated_ptes = 0; page->partial_pte = 0; } - return alloc_page_type(page, type, preemptible); + rc = alloc_page_type(page, type, preemptible); } - return 0; + if ( (x & PGT_partial) && !(nx & PGT_partial) ) + put_page(page); + + return rc; } void put_page_type(struct page_info *page) _______________________________________________ Xen-devel mailing list Xen-devel@lists.xensource.com http://lists.xensource.com/xen-devel
Keir Fraser
2008-Nov-03 07:58 UTC
Re: [Xen-devel] [PATCH] x86: simplify page reference handling for partially (in-)validated pages
On 3/11/08 07:42, "Jan Beulich" <jbeulich@novell.com> wrote:> As suggested by Keir, simplify general page reference management for > preempted (partiall [in-]validated) pages: Reserve on reference that > can be acquired without the risk of overflowing the reference count, > thus allowing to have a simplified get_page() equivalent that cannot > fail (but must be used with care). > > Doing this conversion pointed out a latent issue in the changes done > previously in this area: The extra reference must be acquired before > the ''normal'' reference gets dropped, so the patch fixes this at once > in both the alloc_page_type() and free_page_type() paths (it''s really > only the latter that failed to work with the change described above).Yes, that makes the *_page_type() functions quite a bit clearer. Thanks! -- Keir _______________________________________________ Xen-devel mailing list Xen-devel@lists.xensource.com http://lists.xensource.com/xen-devel