Jan Beulich
2007-Jan-10 09:57 UTC
[Xen-devel] [PATCH] linux/i386: allow CONFIG_HIGHPTE on i386
While, as discussed, the performance impact of this option is certainly higher than on native Linux, the option should not be entirely disallowed if people want to sacrifice performance for less lowmem pressure. The patch at once does away with the use of PageForeign on pte pages, to match x86-64 and to reduce the delta to native. Signed-off-by: Jan Beulich <jbeulich@novell.com> Index: sle10-sp1-2006-12-21/arch/i386/Kconfig ==================================================================--- sle10-sp1-2006-12-21.orig/arch/i386/Kconfig 2007-01-09 14:39:13.000000000 +0100 +++ sle10-sp1-2006-12-21/arch/i386/Kconfig 2007-01-09 11:47:18.000000000 +0100 @@ -594,7 +594,7 @@ config HAVE_ARCH_EARLY_PFN_TO_NID config HIGHPTE bool "Allocate 3rd-level pagetables from highmem" - depends on (HIGHMEM4G || HIGHMEM64G) && !X86_XEN + depends on HIGHMEM4G || HIGHMEM64G help The VM uses one page table entry for each page of physical memory. For systems with a lot of RAM, this can be wasteful of precious Index: sle10-sp1-2006-12-21/arch/i386/mm/highmem-xen.c ==================================================================--- sle10-sp1-2006-12-21.orig/arch/i386/mm/highmem-xen.c 2007-01-09 14:39:13.000000000 +0100 +++ sle10-sp1-2006-12-21/arch/i386/mm/highmem-xen.c 2007-01-09 12:45:43.000000000 +0100 @@ -129,5 +129,6 @@ struct page *kmap_atomic_to_page(void *p EXPORT_SYMBOL(kmap); EXPORT_SYMBOL(kunmap); EXPORT_SYMBOL(kmap_atomic); +EXPORT_SYMBOL(kmap_atomic_pte); EXPORT_SYMBOL(kunmap_atomic); EXPORT_SYMBOL(kmap_atomic_to_page); Index: sle10-sp1-2006-12-21/arch/i386/mm/pgtable-xen.c ==================================================================--- sle10-sp1-2006-12-21.orig/arch/i386/mm/pgtable-xen.c 2007-01-09 14:39:13.000000000 +0100 +++ sle10-sp1-2006-12-21/arch/i386/mm/pgtable-xen.c 2007-01-09 16:53:58.000000000 +0100 @@ -238,26 +238,37 @@ struct page *pte_alloc_one(struct mm_str #ifdef CONFIG_HIGHPTE pte = alloc_pages(GFP_KERNEL|__GFP_HIGHMEM|__GFP_REPEAT|__GFP_ZERO, 0); + if (pte && PageHighMem(pte)) { + struct mmuext_op op; + + kmap_flush_unused(); + op.cmd = MMUEXT_PIN_L1_TABLE; + op.arg1.mfn = pfn_to_mfn(page_to_pfn(pte)); + BUG_ON(HYPERVISOR_mmuext_op(&op, 1, NULL, DOMID_SELF) < 0); + } #else pte = alloc_pages(GFP_KERNEL|__GFP_REPEAT|__GFP_ZERO, 0); - if (pte) { - SetPageForeign(pte, pte_free); - set_page_count(pte, 1); - } #endif return pte; } void pte_free(struct page *pte) { - unsigned long va = (unsigned long)__va(page_to_pfn(pte)<<PAGE_SHIFT); + unsigned long pfn = page_to_pfn(pte); - if (!pte_write(*virt_to_ptep(va))) - BUG_ON(HYPERVISOR_update_va_mapping( - va, pfn_pte(page_to_pfn(pte), PAGE_KERNEL), 0)); + if (!PageHighMem(pte)) { + unsigned long va = (unsigned long)__va(pfn << PAGE_SHIFT); - ClearPageForeign(pte); - set_page_count(pte, 1); + if (!pte_write(*virt_to_ptep(va))) + BUG_ON(HYPERVISOR_update_va_mapping( + va, pfn_pte(pfn, PAGE_KERNEL), 0)); + } else { + struct mmuext_op op; + + op.cmd = MMUEXT_UNPIN_TABLE; + op.arg1.mfn = pfn_to_mfn(pfn); + BUG_ON(HYPERVISOR_mmuext_op(&op, 1, NULL, DOMID_SELF) < 0); + } __free_page(pte); } Index: sle10-sp1-2006-12-21/include/asm-i386/mach-xen/asm/pgalloc.h ==================================================================--- sle10-sp1-2006-12-21.orig/include/asm-i386/mach-xen/asm/pgalloc.h 2007-01-09 14:39:13.000000000 +0100 +++ sle10-sp1-2006-12-21/include/asm-i386/mach-xen/asm/pgalloc.h 2007-01-09 11:11:54.000000000 +0100 @@ -42,12 +42,12 @@ extern struct page *pte_alloc_one(struct static inline void pte_free_kernel(pte_t *pte) { free_page((unsigned long)pte); - make_page_writable(pte, XENFEAT_writable_page_tables); + make_lowmem_page_writable(pte, XENFEAT_writable_page_tables); } extern void pte_free(struct page *pte); -#define __pte_free_tlb(tlb,pte) tlb_remove_page((tlb),(pte)) +#define __pte_free_tlb(tlb,pte) pte_free(pte) #ifdef CONFIG_X86_PAE /* _______________________________________________ Xen-devel mailing list Xen-devel@lists.xensource.com http://lists.xensource.com/xen-devel
Keir Fraser
2007-Jan-10 10:57 UTC
Re: [Xen-devel] [PATCH] linux/i386: allow CONFIG_HIGHPTE on i386
On 10/1/07 09:57, "Jan Beulich" <jbeulich@novell.com> wrote:> The patch at once does away with the use of PageForeign on pte pages, to match > x86-64 and to reduce the delta to native.Is it actually safe to circumvent tlb_remove_page()? It does rather more than just free_page()! That''s why I used a ForeignPage hook when writing the i386 code. It''s certainly *not* safe to assume that whoever wrote that part of the x86/64 port understood all the ramifications of what they were doing. -- Keir _______________________________________________ Xen-devel mailing list Xen-devel@lists.xensource.com http://lists.xensource.com/xen-devel
Jan Beulich
2007-Jan-10 11:14 UTC
Re: [Xen-devel] [PATCH] linux/i386: allow CONFIG_HIGHPTE on i386
>>> Keir Fraser <keir@xensource.com> 10.01.07 11:57 >>> >On 10/1/07 09:57, "Jan Beulich" <jbeulich@novell.com> wrote: > >> The patch at once does away with the use of PageForeign on pte pages, to match >> x86-64 and to reduce the delta to native. > >Is it actually safe to circumvent tlb_remove_page()? It does rather more >than just free_page()! That''s why I used a ForeignPage hook when writing the >i386 code. It''s certainly *not* safe to assume that whoever wrote that part >of the x86/64 port understood all the ramifications of what they were doing.Not really, as I understand it - it either frees the page (when the TLB is in fast mode) or inserts the page into the gather list, bumping the contained pages count and freeing all of them and flushing the hardware TLBs if exceeding the threshold - the latter step is otherwise done from tlb_finish_mmu(). And - I asked about this inconsistency between i386 and x86-64 before submitting the patch, with no useful responses... Jan _______________________________________________ Xen-devel mailing list Xen-devel@lists.xensource.com http://lists.xensource.com/xen-devel
Keir Fraser
2007-Jan-10 11:28 UTC
Re: [Xen-devel] [PATCH] linux/i386: allow CONFIG_HIGHPTE on i386
On 10/1/07 11:14, "Jan Beulich" <jbeulich@novell.com> wrote:>> Is it actually safe to circumvent tlb_remove_page()? It does rather more >> than just free_page()! That''s why I used a ForeignPage hook when writing the >> i386 code. It''s certainly *not* safe to assume that whoever wrote that part >> of the x86/64 port understood all the ramifications of what they were doing. > > Not really, as I understand it - it either frees the page (when the TLB is in > fast > mode) or inserts the page into the gather list, bumping the contained pages > count and freeing all of them and flushing the hardware TLBs if exceeding the > threshold - the latter step is otherwise done from tlb_finish_mmu().Presumably this gathering happens because it may not be safe to free the pages without having first flushed the TLBs? Freeing the page with no TLB flush management at all would be a bad idea in that case. And I said that sync''ing with i386 is a good general rule. I trust the code a lot more. :-) -- Keir _______________________________________________ Xen-devel mailing list Xen-devel@lists.xensource.com http://lists.xensource.com/xen-devel
Jan Beulich
2007-Jan-10 11:39 UTC
Re: [Xen-devel] [PATCH] linux/i386: allow CONFIG_HIGHPTE on i386
>And I said that sync''ing with i386 is a good general rule. I trust the code >a lot more. :-)I certainly agree to this in general. And I can certainly re-do the patch syncing the other way around... Jan _______________________________________________ Xen-devel mailing list Xen-devel@lists.xensource.com http://lists.xensource.com/xen-devel
Jan Beulich
2007-Jan-10 15:52 UTC
[Xen-devel] [PATCH] linux/x86-64: sync page table allocation with i386 (was: Re: [PATCH] linux/i386: allow CONFIG_HIGHPTE on i386)
>>> Keir Fraser <keir@xensource.com> 10.01.07 12:28 >>> >On 10/1/07 11:14, "Jan Beulich" <jbeulich@novell.com> wrote: > >>> Is it actually safe to circumvent tlb_remove_page()? It does rather more >>> than just free_page()! That''s why I used a ForeignPage hook when writing the >>> i386 code. It''s certainly *not* safe to assume that whoever wrote that part >>> of the x86/64 port understood all the ramifications of what they were doing. >> >> Not really, as I understand it - it either frees the page (when the TLB is in >> fast >> mode) or inserts the page into the gather list, bumping the contained pages >> count and freeing all of them and flushing the hardware TLBs if exceeding the >> threshold - the latter step is otherwise done from tlb_finish_mmu(). > >Presumably this gathering happens because it may not be safe to free the >pages without having first flushed the TLBs? Freeing the page with no TLB >flush management at all would be a bad idea in that case. > >And I said that sync''ing with i386 is a good general rule. I trust the code >a lot more. :-)Signed-off-by: Jan Beulich <jbeulich@novell.com> Index: sle10-sp1-2007-01-10/arch/x86_64/mm/pageattr-xen.c ==================================================================--- sle10-sp1-2007-01-10.orig/arch/x86_64/mm/pageattr-xen.c 2007-01-10 13:44:37.000000000 +0100 +++ sle10-sp1-2007-01-10/arch/x86_64/mm/pageattr-xen.c 2007-01-10 14:07:10.000000000 +0100 @@ -164,6 +164,18 @@ void _arch_exit_mmap(struct mm_struct *m mm_unpin(mm); } +struct page *pte_alloc_one(struct mm_struct *mm, unsigned long address) +{ + struct page *pte; + + pte = alloc_pages(GFP_KERNEL|__GFP_REPEAT|__GFP_ZERO, 0); + if (pte) { + SetPageForeign(pte, pte_free); + set_page_count(pte, 1); + } + return pte; +} + void pte_free(struct page *pte) { unsigned long va = (unsigned long)__va(page_to_pfn(pte)<<PAGE_SHIFT); @@ -171,6 +183,10 @@ void pte_free(struct page *pte) if (!pte_write(*virt_to_ptep(va))) BUG_ON(HYPERVISOR_update_va_mapping( va, pfn_pte(page_to_pfn(pte), PAGE_KERNEL), 0)); + + ClearPageForeign(pte); + set_page_count(pte, 1); + __free_page(pte); } #endif /* CONFIG_XEN */ Index: sle10-sp1-2007-01-10/include/asm-x86_64/mach-xen/asm/pgalloc.h ==================================================================--- sle10-sp1-2007-01-10.orig/include/asm-x86_64/mach-xen/asm/pgalloc.h 2007-01-10 10:09:53.000000000 +0100 +++ sle10-sp1-2007-01-10/include/asm-x86_64/mach-xen/asm/pgalloc.h 2007-01-10 14:33:04.000000000 +0100 @@ -64,42 +64,35 @@ static inline void pgd_populate(struct m } } -static inline void pmd_free(pmd_t *pmd) +extern struct page *pte_alloc_one(struct mm_struct *mm, unsigned long addr); +extern void pte_free(struct page *pte); + +static inline pmd_t *pmd_alloc_one(struct mm_struct *mm, unsigned long addr) { - pte_t *ptep = virt_to_ptep(pmd); + struct page *pg; - if (!pte_write(*ptep)) { - BUG_ON(HYPERVISOR_update_va_mapping( - (unsigned long)pmd, - pfn_pte(virt_to_phys(pmd)>>PAGE_SHIFT, PAGE_KERNEL), - 0)); - } - free_page((unsigned long)pmd); + pg = pte_alloc_one(mm, addr); + return pg ? page_address(pg) : NULL; } -static inline pmd_t *pmd_alloc_one(struct mm_struct *mm, unsigned long addr) +static inline void pmd_free(pmd_t *pmd) { - pmd_t *pmd = (pmd_t *) get_zeroed_page(GFP_KERNEL|__GFP_REPEAT); - return pmd; + BUG_ON((unsigned long)pmd & (PAGE_SIZE-1)); + pte_free(virt_to_page(pmd)); } static inline pud_t *pud_alloc_one(struct mm_struct *mm, unsigned long addr) { - pud_t *pud = (pud_t *) get_zeroed_page(GFP_KERNEL|__GFP_REPEAT); - return pud; + struct page *pg; + + pg = pte_alloc_one(mm, addr); + return pg ? page_address(pg) : NULL; } static inline void pud_free(pud_t *pud) { - pte_t *ptep = virt_to_ptep(pud); - - if (!pte_write(*ptep)) { - BUG_ON(HYPERVISOR_update_va_mapping( - (unsigned long)pud, - pfn_pte(virt_to_phys(pud)>>PAGE_SHIFT, PAGE_KERNEL), - 0)); - } - free_page((unsigned long)pud); + BUG_ON((unsigned long)pud & (PAGE_SIZE-1)); + pte_free(virt_to_page(pud)); } static inline pgd_t *pgd_alloc(struct mm_struct *mm) @@ -167,14 +160,6 @@ static inline pte_t *pte_alloc_one_kerne return pte; } -static inline struct page *pte_alloc_one(struct mm_struct *mm, unsigned long address) -{ - struct page *pte; - - pte = alloc_pages(GFP_KERNEL|__GFP_REPEAT|__GFP_ZERO, 0); - return pte; -} - /* Should really implement gc for free page table pages. This could be done with a reference count in struct page. */ @@ -185,14 +170,8 @@ static inline void pte_free_kernel(pte_t free_page((unsigned long)pte); } -extern void pte_free(struct page *pte); - -//#define __pte_free_tlb(tlb,pte) tlb_remove_page((tlb),(pte)) -//#define __pmd_free_tlb(tlb,x) tlb_remove_page((tlb),virt_to_page(x)) -//#define __pud_free_tlb(tlb,x) tlb_remove_page((tlb),virt_to_page(x)) - -#define __pte_free_tlb(tlb,x) pte_free((x)) -#define __pmd_free_tlb(tlb,x) pmd_free((x)) -#define __pud_free_tlb(tlb,x) pud_free((x)) +#define __pte_free_tlb(tlb,pte) tlb_remove_page((tlb),(pte)) +#define __pmd_free_tlb(tlb,x) tlb_remove_page((tlb),virt_to_page(x)) +#define __pud_free_tlb(tlb,x) tlb_remove_page((tlb),virt_to_page(x)) #endif /* _X86_64_PGALLOC_H */ _______________________________________________ Xen-devel mailing list Xen-devel@lists.xensource.com http://lists.xensource.com/xen-devel