Jan Beulich
2008-May-20 14:42 UTC
[Xen-devel] [RFC] linux: add p[mug]d_clear_full() accessors
.. to avoid using hypercalls for clearing dead (un-pinned) page tables. This eliminates well over a quarter of a million hypercalls during kernel builds (up to about a million depending on the configuration). This is against 2.6.25.4, it does not apply to the 2.6.18 tree (I''d make this effort only if the patch is desired into that tree). The primary goal of sending this is to find out whether the required change to include/asm-generic/pgtable.h and mm/memory.c is considered worthwhile (in which case I''d make an attempt at getting this accepted upstream). Signed-off-by: Jan Beulich <jbeulich@novell.com> Index: head-2008-05-19/include/asm-generic/pgtable.h ==================================================================--- head-2008-05-19.orig/include/asm-generic/pgtable.h 2008-05-19 10:48:15.000000000 +0200 +++ head-2008-05-19/include/asm-generic/pgtable.h 2008-05-19 10:57:25.000000000 +0200 @@ -133,6 +133,18 @@ static inline void ptep_set_wrprotect(st #define move_pte(pte, prot, old_addr, new_addr) (pte) #endif +#ifndef __HAVE_ARCH_PMD_CLEAR_FULL +#define pmd_clear_full(mm, addr, pmd, full) pmd_clear(pmd) +#endif + +#ifndef __HAVE_ARCH_PUD_CLEAR_FULL +#define pud_clear_full(mm, addr, pud, full) pud_clear(pud) +#endif + +#ifndef __HAVE_ARCH_PGD_CLEAR_FULL +#define pgd_clear_full(mm, addr, pgd, full) pgd_clear(pgd) +#endif + /* * When walking page tables, get the address of the next boundary, * or the end address of the range if that comes earlier. Although no Index: head-2008-05-19/include/asm-x86/mach-xen/asm/pgtable.h ==================================================================--- head-2008-05-19.orig/include/asm-x86/mach-xen/asm/pgtable.h 2008-05-19 10:57:24.000000000 +0200 +++ head-2008-05-19/include/asm-x86/mach-xen/asm/pgtable.h 2008-05-19 10:57:25.000000000 +0200 @@ -397,6 +397,22 @@ static inline pte_t ptep_get_and_clear(s pte_t xen_ptep_get_and_clear_full(struct vm_area_struct *, unsigned long, pte_t *, int); +#define __HAVE_ARCH_PMD_CLEAR_FULL +#define pmd_clear_full(mm, addr, pmd, full) \ + (!mm_is_pinned(mm) ? __xen_pmd_clear(pmd) : xen_pmd_clear(pmd)) + +#ifndef __PAGETABLE_PMD_FOLDED +#define __HAVE_ARCH_PUD_CLEAR_FULL +#define pud_clear_full(mm, addr, pud, full) \ + (!mm_is_pinned(mm) ? __xen_pud_clear(pud) : xen_pud_clear(pud)) +#endif + +#ifndef __PAGETABLE_PUD_FOLDED +#define __HAVE_ARCH_PGD_CLEAR_FULL +#define pgd_clear_full(mm, addr, pgd, full) \ + (!mm_is_pinned(mm) ? __xen_pgd_clear(pgd) : xen_pgd_clear(pgd)) +#endif + #define __HAVE_ARCH_PTEP_SET_WRPROTECT static inline void ptep_set_wrprotect(struct mm_struct *mm, unsigned long addr, pte_t *ptep) { Index: head-2008-05-19/include/asm-x86/mach-xen/asm/pgtable-2level.h ==================================================================--- head-2008-05-19.orig/include/asm-x86/mach-xen/asm/pgtable-2level.h 2008-05-19 10:49:58.000000000 +0200 +++ head-2008-05-19/include/asm-x86/mach-xen/asm/pgtable-2level.h 2008-05-19 10:57:25.000000000 +0200 @@ -35,6 +35,11 @@ static inline void xen_pmd_clear(pmd_t * #define __xen_pte_clear(ptep) xen_set_pte(ptep, __pte(0)) +static inline void __xen_pmd_clear(pmd_t *pmdp) +{ + *pmdp = __pmd(0); +} + #ifdef CONFIG_SMP static inline pte_t xen_ptep_get_and_clear(pte_t *xp, pte_t res) { Index: head-2008-05-19/include/asm-x86/mach-xen/asm/pgtable-3level.h ==================================================================--- head-2008-05-19.orig/include/asm-x86/mach-xen/asm/pgtable-3level.h 2008-05-19 10:49:58.000000000 +0200 +++ head-2008-05-19/include/asm-x86/mach-xen/asm/pgtable-3level.h 2008-05-19 10:57:25.000000000 +0200 @@ -76,6 +76,11 @@ static inline void xen_pmd_clear(pmd_t * xen_l2_entry_update(pmd, __pmd(0)); } +static inline void __xen_pmd_clear(pmd_t *pmd) +{ + *pmd = __pmd(0); +} + static inline void pud_clear(pud_t *pudp) { pgdval_t pgd; @@ -96,6 +101,11 @@ static inline void pud_clear(pud_t *pudp xen_tlb_flush(); } +static inline void __xen_pud_clear(pud_t *pudp) +{ + *pudp = __pud(0); +} + #define pud_page(pud) \ ((struct page *) __va(pud_val(pud) & PAGE_MASK)) Index: head-2008-05-19/include/asm-x86/mach-xen/asm/pgtable_64.h ==================================================================--- head-2008-05-19.orig/include/asm-x86/mach-xen/asm/pgtable_64.h 2008-05-19 10:57:24.000000000 +0200 +++ head-2008-05-19/include/asm-x86/mach-xen/asm/pgtable_64.h 2008-05-19 10:57:25.000000000 +0200 @@ -109,6 +109,11 @@ static inline void xen_pmd_clear(pmd_t * xen_set_pmd(pmd, xen_make_pmd(0)); } +static inline void __xen_pmd_clear(pmd_t *pmd) +{ + *pmd = xen_make_pmd(0); +} + static inline void xen_set_pud(pud_t *pudp, pud_t pud) { xen_l3_entry_update(pudp, pud); @@ -119,6 +124,11 @@ static inline void xen_pud_clear(pud_t * xen_set_pud(pud, xen_make_pud(0)); } +static inline void __xen_pud_clear(pud_t *pud) +{ + *pud = xen_make_pud(0); +} + #define __user_pgd(pgd) ((pgd) + PTRS_PER_PGD) static inline void xen_set_pgd(pgd_t *pgdp, pgd_t pgd) @@ -132,6 +142,12 @@ static inline void xen_pgd_clear(pgd_t * xen_set_pgd(__user_pgd(pgd), xen_make_pgd(0)); } +static inline void __xen_pgd_clear(pgd_t *pgd) +{ + *pgd = xen_make_pgd(0); + *__user_pgd(pgd) = xen_make_pgd(0); +} + #define pte_same(a, b) ((a).pte == (b).pte) #endif /* !__ASSEMBLY__ */ Index: head-2008-05-19/mm/memory.c ==================================================================--- head-2008-05-19.orig/mm/memory.c 2008-05-19 10:49:46.000000000 +0200 +++ head-2008-05-19/mm/memory.c 2008-05-19 10:57:25.000000000 +0200 @@ -132,10 +132,11 @@ void pmd_clear_bad(pmd_t *pmd) * Note: this doesn''t free the actual pages themselves. That * has been handled earlier when unmapping all the memory regions. */ -static void free_pte_range(struct mmu_gather *tlb, pmd_t *pmd) +static void free_pte_range(struct mmu_gather *tlb, pmd_t *pmd, + unsigned long addr) { pgtable_t token = pmd_pgtable(*pmd); - pmd_clear(pmd); + pmd_clear_full(tlb->mm, addr, pmd, tlb->fullmm); pte_free_tlb(tlb, token); tlb->mm->nr_ptes--; } @@ -154,7 +155,7 @@ static inline void free_pmd_range(struct next = pmd_addr_end(addr, end); if (pmd_none_or_clear_bad(pmd)) continue; - free_pte_range(tlb, pmd); + free_pte_range(tlb, pmd, addr); } while (pmd++, addr = next, addr != end); start &= PUD_MASK; @@ -169,7 +170,7 @@ static inline void free_pmd_range(struct return; pmd = pmd_offset(pud, start); - pud_clear(pud); + pud_clear_full(tlb->mm, start, pud, tlb->fullmm); pmd_free_tlb(tlb, pmd); } @@ -202,7 +203,7 @@ static inline void free_pud_range(struct return; pud = pud_offset(pgd, start); - pgd_clear(pgd); + pgd_clear_full(tlb->mm, start, pgd, tlb->fullmm); pud_free_tlb(tlb, pud); } _______________________________________________ Xen-devel mailing list Xen-devel@lists.xensource.com http://lists.xensource.com/xen-devel
Keir Fraser
2008-May-20 14:46 UTC
Re: [Xen-devel] [RFC] linux: add p[mug]d_clear_full() accessors
We''d certainly be happy to have it in the 2.6.18 tree. But of course that is a dead-end with respect to upstream aspirations for this patch. -- Keir On 20/5/08 15:42, "Jan Beulich" <jbeulich@novell.com> wrote:> .. to avoid using hypercalls for clearing dead (un-pinned) page tables. > This eliminates well over a quarter of a million hypercalls during > kernel builds (up to about a million depending on the configuration). > > This is against 2.6.25.4, it does not apply to the 2.6.18 tree (I''d > make this effort only if the patch is desired into that tree). The > primary goal of sending this is to find out whether the required change > to include/asm-generic/pgtable.h and mm/memory.c is considered > worthwhile (in which case I''d make an attempt at getting this accepted > upstream). > > Signed-off-by: Jan Beulich <jbeulich@novell.com> > > Index: head-2008-05-19/include/asm-generic/pgtable.h > ==================================================================> --- head-2008-05-19.orig/include/asm-generic/pgtable.h 2008-05-19 > 10:48:15.000000000 +0200 > +++ head-2008-05-19/include/asm-generic/pgtable.h 2008-05-19 > 10:57:25.000000000 +0200 > @@ -133,6 +133,18 @@ static inline void ptep_set_wrprotect(st > #define move_pte(pte, prot, old_addr, new_addr) (pte) > #endif > > +#ifndef __HAVE_ARCH_PMD_CLEAR_FULL > +#define pmd_clear_full(mm, addr, pmd, full) pmd_clear(pmd) > +#endif > + > +#ifndef __HAVE_ARCH_PUD_CLEAR_FULL > +#define pud_clear_full(mm, addr, pud, full) pud_clear(pud) > +#endif > + > +#ifndef __HAVE_ARCH_PGD_CLEAR_FULL > +#define pgd_clear_full(mm, addr, pgd, full) pgd_clear(pgd) > +#endif > + > /* > * When walking page tables, get the address of the next boundary, > * or the end address of the range if that comes earlier. Although no > Index: head-2008-05-19/include/asm-x86/mach-xen/asm/pgtable.h > ==================================================================> --- head-2008-05-19.orig/include/asm-x86/mach-xen/asm/pgtable.h 2008-05-19 > 10:57:24.000000000 +0200 > +++ head-2008-05-19/include/asm-x86/mach-xen/asm/pgtable.h 2008-05-19 > 10:57:25.000000000 +0200 > @@ -397,6 +397,22 @@ static inline pte_t ptep_get_and_clear(s > > pte_t xen_ptep_get_and_clear_full(struct vm_area_struct *, unsigned long, > pte_t *, int); > > +#define __HAVE_ARCH_PMD_CLEAR_FULL > +#define pmd_clear_full(mm, addr, pmd, full) \ > + (!mm_is_pinned(mm) ? __xen_pmd_clear(pmd) : xen_pmd_clear(pmd)) > + > +#ifndef __PAGETABLE_PMD_FOLDED > +#define __HAVE_ARCH_PUD_CLEAR_FULL > +#define pud_clear_full(mm, addr, pud, full) \ > + (!mm_is_pinned(mm) ? __xen_pud_clear(pud) : xen_pud_clear(pud)) > +#endif > + > +#ifndef __PAGETABLE_PUD_FOLDED > +#define __HAVE_ARCH_PGD_CLEAR_FULL > +#define pgd_clear_full(mm, addr, pgd, full) \ > + (!mm_is_pinned(mm) ? __xen_pgd_clear(pgd) : xen_pgd_clear(pgd)) > +#endif > + > #define __HAVE_ARCH_PTEP_SET_WRPROTECT > static inline void ptep_set_wrprotect(struct mm_struct *mm, unsigned long > addr, pte_t *ptep) > { > Index: head-2008-05-19/include/asm-x86/mach-xen/asm/pgtable-2level.h > ==================================================================> --- > head-2008-05-19.orig/include/asm-x86/mach-xen/asm/pgtable-2level.h 2008-05-19 > 10:49:58.000000000 +0200 > +++ head-2008-05-19/include/asm-x86/mach-xen/asm/pgtable-2level.h 2008-05-19 > 10:57:25.000000000 +0200 > @@ -35,6 +35,11 @@ static inline void xen_pmd_clear(pmd_t * > > #define __xen_pte_clear(ptep) xen_set_pte(ptep, __pte(0)) > > +static inline void __xen_pmd_clear(pmd_t *pmdp) > +{ > + *pmdp = __pmd(0); > +} > + > #ifdef CONFIG_SMP > static inline pte_t xen_ptep_get_and_clear(pte_t *xp, pte_t res) > { > Index: head-2008-05-19/include/asm-x86/mach-xen/asm/pgtable-3level.h > ==================================================================> --- > head-2008-05-19.orig/include/asm-x86/mach-xen/asm/pgtable-3level.h 2008-05-19 > 10:49:58.000000000 +0200 > +++ head-2008-05-19/include/asm-x86/mach-xen/asm/pgtable-3level.h 2008-05-19 > 10:57:25.000000000 +0200 > @@ -76,6 +76,11 @@ static inline void xen_pmd_clear(pmd_t * > xen_l2_entry_update(pmd, __pmd(0)); > } > > +static inline void __xen_pmd_clear(pmd_t *pmd) > +{ > + *pmd = __pmd(0); > +} > + > static inline void pud_clear(pud_t *pudp) > { > pgdval_t pgd; > @@ -96,6 +101,11 @@ static inline void pud_clear(pud_t *pudp > xen_tlb_flush(); > } > > +static inline void __xen_pud_clear(pud_t *pudp) > +{ > + *pudp = __pud(0); > +} > + > #define pud_page(pud) \ > ((struct page *) __va(pud_val(pud) & PAGE_MASK)) > > Index: head-2008-05-19/include/asm-x86/mach-xen/asm/pgtable_64.h > ==================================================================> --- head-2008-05-19.orig/include/asm-x86/mach-xen/asm/pgtable_64.h 2008-05-19 > 10:57:24.000000000 +0200 > +++ head-2008-05-19/include/asm-x86/mach-xen/asm/pgtable_64.h 2008-05-19 > 10:57:25.000000000 +0200 > @@ -109,6 +109,11 @@ static inline void xen_pmd_clear(pmd_t * > xen_set_pmd(pmd, xen_make_pmd(0)); > } > > +static inline void __xen_pmd_clear(pmd_t *pmd) > +{ > + *pmd = xen_make_pmd(0); > +} > + > static inline void xen_set_pud(pud_t *pudp, pud_t pud) > { > xen_l3_entry_update(pudp, pud); > @@ -119,6 +124,11 @@ static inline void xen_pud_clear(pud_t * > xen_set_pud(pud, xen_make_pud(0)); > } > > +static inline void __xen_pud_clear(pud_t *pud) > +{ > + *pud = xen_make_pud(0); > +} > + > #define __user_pgd(pgd) ((pgd) + PTRS_PER_PGD) > > static inline void xen_set_pgd(pgd_t *pgdp, pgd_t pgd) > @@ -132,6 +142,12 @@ static inline void xen_pgd_clear(pgd_t * > xen_set_pgd(__user_pgd(pgd), xen_make_pgd(0)); > } > > +static inline void __xen_pgd_clear(pgd_t *pgd) > +{ > + *pgd = xen_make_pgd(0); > + *__user_pgd(pgd) = xen_make_pgd(0); > +} > + > #define pte_same(a, b) ((a).pte == (b).pte) > > #endif /* !__ASSEMBLY__ */ > Index: head-2008-05-19/mm/memory.c > ==================================================================> --- head-2008-05-19.orig/mm/memory.c 2008-05-19 10:49:46.000000000 +0200 > +++ head-2008-05-19/mm/memory.c 2008-05-19 10:57:25.000000000 +0200 > @@ -132,10 +132,11 @@ void pmd_clear_bad(pmd_t *pmd) > * Note: this doesn''t free the actual pages themselves. That > * has been handled earlier when unmapping all the memory regions. > */ > -static void free_pte_range(struct mmu_gather *tlb, pmd_t *pmd) > +static void free_pte_range(struct mmu_gather *tlb, pmd_t *pmd, > + unsigned long addr) > { > pgtable_t token = pmd_pgtable(*pmd); > - pmd_clear(pmd); > + pmd_clear_full(tlb->mm, addr, pmd, tlb->fullmm); > pte_free_tlb(tlb, token); > tlb->mm->nr_ptes--; > } > @@ -154,7 +155,7 @@ static inline void free_pmd_range(struct > next = pmd_addr_end(addr, end); > if (pmd_none_or_clear_bad(pmd)) > continue; > - free_pte_range(tlb, pmd); > + free_pte_range(tlb, pmd, addr); > } while (pmd++, addr = next, addr != end); > > start &= PUD_MASK; > @@ -169,7 +170,7 @@ static inline void free_pmd_range(struct > return; > > pmd = pmd_offset(pud, start); > - pud_clear(pud); > + pud_clear_full(tlb->mm, start, pud, tlb->fullmm); > pmd_free_tlb(tlb, pmd); > } > > @@ -202,7 +203,7 @@ static inline void free_pud_range(struct > return; > > pud = pud_offset(pgd, start); > - pgd_clear(pgd); > + pgd_clear_full(tlb->mm, start, pgd, tlb->fullmm); > pud_free_tlb(tlb, pud); > } > > > > > _______________________________________________ > Xen-devel mailing list > Xen-devel@lists.xensource.com > http://lists.xensource.com/xen-devel_______________________________________________ Xen-devel mailing list Xen-devel@lists.xensource.com http://lists.xensource.com/xen-devel
Jeremy Fitzhardinge
2008-May-20 15:17 UTC
Re: [Xen-devel] [RFC] linux: add p[mug]d_clear_full() accessors
Jan Beulich wrote:> .. to avoid using hypercalls for clearing dead (un-pinned) page tables. > This eliminates well over a quarter of a million hypercalls during > kernel builds (up to about a million depending on the configuration). >Does it have much effect on performance?> This is against 2.6.25.4, it does not apply to the 2.6.18 tree (I''d > make this effort only if the patch is desired into that tree). The > primary goal of sending this is to find out whether the required change > to include/asm-generic/pgtable.h and mm/memory.c is considered > worthwhile (in which case I''d make an attempt at getting this accepted > upstream). >Is this based on your forward-port of the Xen patches? Stock 2.6.25.4 doesn''t have mm_is_pinned(). In fact, in pvops-Xen, I set PG_pinned on all pinned pagetable pages (not just the top level), so you can tell whether you''re updating a pinned pgd/pud/pmd without needing an mm on hand. So I think this optimisation can be implemented in current Linux entirely within the Xen-specific code. J> Signed-off-by: Jan Beulich <jbeulich@novell.com> > > Index: head-2008-05-19/include/asm-generic/pgtable.h > ==================================================================> --- head-2008-05-19.orig/include/asm-generic/pgtable.h 2008-05-19 10:48:15.000000000 +0200 > +++ head-2008-05-19/include/asm-generic/pgtable.h 2008-05-19 10:57:25.000000000 +0200 > @@ -133,6 +133,18 @@ static inline void ptep_set_wrprotect(st > #define move_pte(pte, prot, old_addr, new_addr) (pte) > #endif > > +#ifndef __HAVE_ARCH_PMD_CLEAR_FULL > +#define pmd_clear_full(mm, addr, pmd, full) pmd_clear(pmd) > +#endif > + > +#ifndef __HAVE_ARCH_PUD_CLEAR_FULL > +#define pud_clear_full(mm, addr, pud, full) pud_clear(pud) > +#endif > + > +#ifndef __HAVE_ARCH_PGD_CLEAR_FULL > +#define pgd_clear_full(mm, addr, pgd, full) pgd_clear(pgd) > +#endif > + > /* > * When walking page tables, get the address of the next boundary, > * or the end address of the range if that comes earlier. Although no > Index: head-2008-05-19/include/asm-x86/mach-xen/asm/pgtable.h > ==================================================================> --- head-2008-05-19.orig/include/asm-x86/mach-xen/asm/pgtable.h 2008-05-19 10:57:24.000000000 +0200 > +++ head-2008-05-19/include/asm-x86/mach-xen/asm/pgtable.h 2008-05-19 10:57:25.000000000 +0200 > @@ -397,6 +397,22 @@ static inline pte_t ptep_get_and_clear(s > > pte_t xen_ptep_get_and_clear_full(struct vm_area_struct *, unsigned long, pte_t *, int); > > +#define __HAVE_ARCH_PMD_CLEAR_FULL > +#define pmd_clear_full(mm, addr, pmd, full) \ > + (!mm_is_pinned(mm) ? __xen_pmd_clear(pmd) : xen_pmd_clear(pmd)) > + > +#ifndef __PAGETABLE_PMD_FOLDED > +#define __HAVE_ARCH_PUD_CLEAR_FULL > +#define pud_clear_full(mm, addr, pud, full) \ > + (!mm_is_pinned(mm) ? __xen_pud_clear(pud) : xen_pud_clear(pud)) > +#endif > + > +#ifndef __PAGETABLE_PUD_FOLDED > +#define __HAVE_ARCH_PGD_CLEAR_FULL > +#define pgd_clear_full(mm, addr, pgd, full) \ > + (!mm_is_pinned(mm) ? __xen_pgd_clear(pgd) : xen_pgd_clear(pgd)) > +#endif > + > #define __HAVE_ARCH_PTEP_SET_WRPROTECT > static inline void ptep_set_wrprotect(struct mm_struct *mm, unsigned long addr, pte_t *ptep) > { > Index: head-2008-05-19/include/asm-x86/mach-xen/asm/pgtable-2level.h > ==================================================================> --- head-2008-05-19.orig/include/asm-x86/mach-xen/asm/pgtable-2level.h 2008-05-19 10:49:58.000000000 +0200 > +++ head-2008-05-19/include/asm-x86/mach-xen/asm/pgtable-2level.h 2008-05-19 10:57:25.000000000 +0200 > @@ -35,6 +35,11 @@ static inline void xen_pmd_clear(pmd_t * > > #define __xen_pte_clear(ptep) xen_set_pte(ptep, __pte(0)) > > +static inline void __xen_pmd_clear(pmd_t *pmdp) > +{ > + *pmdp = __pmd(0); > +} > + > #ifdef CONFIG_SMP > static inline pte_t xen_ptep_get_and_clear(pte_t *xp, pte_t res) > { > Index: head-2008-05-19/include/asm-x86/mach-xen/asm/pgtable-3level.h > ==================================================================> --- head-2008-05-19.orig/include/asm-x86/mach-xen/asm/pgtable-3level.h 2008-05-19 10:49:58.000000000 +0200 > +++ head-2008-05-19/include/asm-x86/mach-xen/asm/pgtable-3level.h 2008-05-19 10:57:25.000000000 +0200 > @@ -76,6 +76,11 @@ static inline void xen_pmd_clear(pmd_t * > xen_l2_entry_update(pmd, __pmd(0)); > } > > +static inline void __xen_pmd_clear(pmd_t *pmd) > +{ > + *pmd = __pmd(0); > +} > + > static inline void pud_clear(pud_t *pudp) > { > pgdval_t pgd; > @@ -96,6 +101,11 @@ static inline void pud_clear(pud_t *pudp > xen_tlb_flush(); > } > > +static inline void __xen_pud_clear(pud_t *pudp) > +{ > + *pudp = __pud(0); > +} > + > #define pud_page(pud) \ > ((struct page *) __va(pud_val(pud) & PAGE_MASK)) > > Index: head-2008-05-19/include/asm-x86/mach-xen/asm/pgtable_64.h > ==================================================================> --- head-2008-05-19.orig/include/asm-x86/mach-xen/asm/pgtable_64.h 2008-05-19 10:57:24.000000000 +0200 > +++ head-2008-05-19/include/asm-x86/mach-xen/asm/pgtable_64.h 2008-05-19 10:57:25.000000000 +0200 > @@ -109,6 +109,11 @@ static inline void xen_pmd_clear(pmd_t * > xen_set_pmd(pmd, xen_make_pmd(0)); > } > > +static inline void __xen_pmd_clear(pmd_t *pmd) > +{ > + *pmd = xen_make_pmd(0); > +} > + > static inline void xen_set_pud(pud_t *pudp, pud_t pud) > { > xen_l3_entry_update(pudp, pud); > @@ -119,6 +124,11 @@ static inline void xen_pud_clear(pud_t * > xen_set_pud(pud, xen_make_pud(0)); > } > > +static inline void __xen_pud_clear(pud_t *pud) > +{ > + *pud = xen_make_pud(0); > +} > + > #define __user_pgd(pgd) ((pgd) + PTRS_PER_PGD) > > static inline void xen_set_pgd(pgd_t *pgdp, pgd_t pgd) > @@ -132,6 +142,12 @@ static inline void xen_pgd_clear(pgd_t * > xen_set_pgd(__user_pgd(pgd), xen_make_pgd(0)); > } > > +static inline void __xen_pgd_clear(pgd_t *pgd) > +{ > + *pgd = xen_make_pgd(0); > + *__user_pgd(pgd) = xen_make_pgd(0); > +} > + > #define pte_same(a, b) ((a).pte == (b).pte) > > #endif /* !__ASSEMBLY__ */ > Index: head-2008-05-19/mm/memory.c > ==================================================================> --- head-2008-05-19.orig/mm/memory.c 2008-05-19 10:49:46.000000000 +0200 > +++ head-2008-05-19/mm/memory.c 2008-05-19 10:57:25.000000000 +0200 > @@ -132,10 +132,11 @@ void pmd_clear_bad(pmd_t *pmd) > * Note: this doesn''t free the actual pages themselves. That > * has been handled earlier when unmapping all the memory regions. > */ > -static void free_pte_range(struct mmu_gather *tlb, pmd_t *pmd) > +static void free_pte_range(struct mmu_gather *tlb, pmd_t *pmd, > + unsigned long addr) > { > pgtable_t token = pmd_pgtable(*pmd); > - pmd_clear(pmd); > + pmd_clear_full(tlb->mm, addr, pmd, tlb->fullmm); > pte_free_tlb(tlb, token); > tlb->mm->nr_ptes--; > } > @@ -154,7 +155,7 @@ static inline void free_pmd_range(struct > next = pmd_addr_end(addr, end); > if (pmd_none_or_clear_bad(pmd)) > continue; > - free_pte_range(tlb, pmd); > + free_pte_range(tlb, pmd, addr); > } while (pmd++, addr = next, addr != end); > > start &= PUD_MASK; > @@ -169,7 +170,7 @@ static inline void free_pmd_range(struct > return; > > pmd = pmd_offset(pud, start); > - pud_clear(pud); > + pud_clear_full(tlb->mm, start, pud, tlb->fullmm); > pmd_free_tlb(tlb, pmd); > } > > @@ -202,7 +203,7 @@ static inline void free_pud_range(struct > return; > > pud = pud_offset(pgd, start); > - pgd_clear(pgd); > + pgd_clear_full(tlb->mm, start, pgd, tlb->fullmm); > pud_free_tlb(tlb, pud); > } > > > > > _______________________________________________ > Xen-devel mailing list > Xen-devel@lists.xensource.com > http://lists.xensource.com/xen-devel >_______________________________________________ Xen-devel mailing list Xen-devel@lists.xensource.com http://lists.xensource.com/xen-devel
Jan Beulich
2008-May-20 15:54 UTC
Re: [Xen-devel] [RFC] linux: add p[mug]d_clear_full() accessors
>>> Jeremy Fitzhardinge <jeremy@goop.org> 20.05.08 17:17 >>> >Is this based on your forward-port of the Xen patches? Stock 2.6.25.4 >doesn''t have mm_is_pinned().Yes.>In fact, in pvops-Xen, I set PG_pinned on all pinned pagetable pages >(not just the top level), so you can tell whether you''re updating a >pinned pgd/pud/pmd without needing an mm on hand. So I think this >optimisation can be implemented in current Linux entirely within the >Xen-specific code.Ah, I didn''t realize that so far. However, 64-bit doesn''t use a PG_* flag for identifying pinned mm-s, because of a conflict in use of PG_arch_1 in those older kernel versions. But as the conflict is gone in recent kernel, I should indeed be able to synchronize 64-bit with 32-bit here, then follow your approach of marking all pinned page tables, and finally get away without adding a new set of abstractions. One question though - in our 2.6.23 merge (where pv-ops-Xen''s PG_pinned appeared as an alias of PG_owner_priv_1, and where PG_arch_1 got assigned a meaning for native x86, so PG_pinned for the traditional patches needed to change anyway) I intentionally didn''t follow pv-ops for our patches, since PG_pinned is among the flags bad_page() checks for (in the XenSource tree, and I think this should really also be done in upstream Linux), and hence re-using the bit here would change behavior for other parts of the kernel. Jan _______________________________________________ Xen-devel mailing list Xen-devel@lists.xensource.com http://lists.xensource.com/xen-devel
Jeremy Fitzhardinge
2008-May-20 20:03 UTC
Re: [Xen-devel] [RFC] linux: add p[mug]d_clear_full() accessors
Jan Beulich wrote:>> In fact, in pvops-Xen, I set PG_pinned on all pinned pagetable pages >> (not just the top level), so you can tell whether you''re updating a >> pinned pgd/pud/pmd without needing an mm on hand. So I think this >> optimisation can be implemented in current Linux entirely within the >> Xen-specific code. >> > > Ah, I didn''t realize that so far. However, 64-bit doesn''t use a PG_* > flag for identifying pinned mm-s, because of a conflict in use of > PG_arch_1 in those older kernel versions. But as the conflict is gone > in recent kernel, I should indeed be able to synchronize 64-bit with > 32-bit here, then follow your approach of marking all pinned page > tables, and finally get away without adding a new set of abstractions. >Yes, 64-bit has masses of free page flags, so we could allocate new ones if need be. But owner_priv_1 is definitely available for Xen''s use, so it may as well be common (indeed, I''d expect all the existing code to just work on 64-bit at the moment, though perhaps its missing some 4th-level pagetable stuff in places, and testing).> One question though - in our 2.6.23 merge (where pv-ops-Xen''s > PG_pinned appeared as an alias of PG_owner_priv_1, and where > PG_arch_1 got assigned a meaning for native x86, so PG_pinned > for the traditional patches needed to change anyway) I intentionally > didn''t follow pv-ops for our patches, since PG_pinned is among the > flags bad_page() checks for (in the XenSource tree, and I think this > should really also be done in upstream Linux), and hence re-using > the bit here would change behavior for other parts of the kernel.I don''t think so. So long as its clear by the time you free the page, it doesn''t matter how it gets used in the meantime (after all, you should never free a pinned pagetable page). In general, almost all the page flags are available for our use in pagetable pages. The pages are privately owned by Xen, and don''t participate in any of the mm/ code, so there''s no conflict if we use the bits for something else. For example, in my Xen save/restore patches, I''ve overloaded PG_dirty to mean pinned-for-suspend, and there''s no conflict with any other users. J _______________________________________________ Xen-devel mailing list Xen-devel@lists.xensource.com http://lists.xensource.com/xen-devel
Jan Beulich
2008-May-21 06:47 UTC
Re: [Xen-devel] [RFC] linux: add p[mug]d_clear_full() accessors
>> One question though - in our 2.6.23 merge (where pv-ops-Xen''s >> PG_pinned appeared as an alias of PG_owner_priv_1, and where >> PG_arch_1 got assigned a meaning for native x86, so PG_pinned >> for the traditional patches needed to change anyway) I intentionally >> didn''t follow pv-ops for our patches, since PG_pinned is among the >> flags bad_page() checks for (in the XenSource tree, and I think this >> should really also be done in upstream Linux), and hence re-using >> the bit here would change behavior for other parts of the kernel. > >I don''t think so. So long as its clear by the time you free the page, >it doesn''t matter how it gets used in the meantime (after all, you >should never free a pinned pagetable page).But isn''t that one of the purposes of those page flags checks - checking whether e.g. a page may validly get freed (free_pages_check())? All of bad_page(), free_pages_check(), and prep_new_page() currently add PG_pinned into the set of flags cleared/checked, and I continue to think that that is the right thing to do. Jan _______________________________________________ Xen-devel mailing list Xen-devel@lists.xensource.com http://lists.xensource.com/xen-devel
Jeremy Fitzhardinge
2008-May-21 09:02 UTC
Re: [Xen-devel] [RFC] linux: add p[mug]d_clear_full() accessors
Jan Beulich wrote:>>> One question though - in our 2.6.23 merge (where pv-ops-Xen''s >>> PG_pinned appeared as an alias of PG_owner_priv_1, and where >>> PG_arch_1 got assigned a meaning for native x86, so PG_pinned >>> for the traditional patches needed to change anyway) I intentionally >>> didn''t follow pv-ops for our patches, since PG_pinned is among the >>> flags bad_page() checks for (in the XenSource tree, and I think this >>> should really also be done in upstream Linux), and hence re-using >>> the bit here would change behavior for other parts of the kernel. >>> >> I don''t think so. So long as its clear by the time you free the page, >> it doesn''t matter how it gets used in the meantime (after all, you >> should never free a pinned pagetable page). >> > > But isn''t that one of the purposes of those page flags checks - checking > whether e.g. a page may validly get freed (free_pages_check())? All > of bad_page(), free_pages_check(), and prep_new_page() currently > add PG_pinned into the set of flags cleared/checked, and I continue to > think that that is the right thing to do.Well, yes, it would be helpful for free_pages_check to test whether PG_owner_priv_1 is set on a page you''re attempting to free, since that would definitely be a bug. But its also a bug which would turn up fairly quickly anyway, since Xen would complain about any subsequent users of that page. Regardless, its not actually a bug that has turned up, since the lifespan of pagetable pages is pretty well controlled, and there''s only a couple of places where they get allocated and freed. So, not a big issue either way, I think. J _______________________________________________ Xen-devel mailing list Xen-devel@lists.xensource.com http://lists.xensource.com/xen-devel