Mark McLoughlin
2008-Mar-28 16:38 UTC
[Xen-devel] [PATCH 0/3] xen: Fix oops when mapping/unmapping addr above 1Gb
Hi, In Fedora rawhide we''re currently seeing oopses whenever a processes dlopen()s a library which prelink() has previously relocated above 1Gb. Some more details at: https://bugzilla.redhat.com/436453 The issue seems to be that during xen_pgd_pin(), we only pin PTE pages, but xen''s release_pmd() tries to unpin PMD pages too. The following series of patches is against the latest x86 tree. Cheers, Mark. _______________________________________________ Xen-devel mailing list Xen-devel@lists.xensource.com http://lists.xensource.com/xen-devel
Mark McLoughlin
2008-Mar-28 16:38 UTC
[Xen-devel] [PATCH 1/3] xen: Refactor xen_{alloc, release}_{pte, pmd}()
Use the pt_level enum with generic alloc_ptpage() and release_ptpage() functions to make it a little more clear what''s going on in next commit. Signed-off-by: Mark McLoughlin <markmc@redhat.com> --- arch/x86/xen/enlighten.c | 27 ++++++++++++++++++++------- arch/x86/xen/mmu.c | 7 ------- arch/x86/xen/mmu.h | 7 +++++++ 3 files changed, 27 insertions(+), 14 deletions(-) diff --git a/arch/x86/xen/enlighten.c b/arch/x86/xen/enlighten.c index 1e5ea9e..a6a1b40 100644 --- a/arch/x86/xen/enlighten.c +++ b/arch/x86/xen/enlighten.c @@ -669,10 +669,10 @@ static void xen_release_pte_init(u32 pfn) make_lowmem_page_readwrite(__va(PFN_PHYS(pfn))); } -static void pin_pagetable_pfn(unsigned level, unsigned long pfn) +static void pin_pagetable_pfn(unsigned cmd, unsigned long pfn) { struct mmuext_op op; - op.cmd = level; + op.cmd = cmd; op.arg1.mfn = pfn_to_mfn(pfn); if (HYPERVISOR_mmuext_op(&op, 1, NULL, DOMID_SELF)) BUG(); @@ -689,7 +689,10 @@ static void xen_alloc_ptpage(struct mm_struct *mm, u32 pfn, unsigned level) if (!PageHighMem(page)) { make_lowmem_page_readonly(__va(PFN_PHYS(pfn))); - pin_pagetable_pfn(level, pfn); + if (level == PT_PTE) + pin_pagetable_pfn(MMUEXT_PIN_L1_TABLE, pfn); + else if (level == PT_PMD) + pin_pagetable_pfn(MMUEXT_PIN_L2_TABLE, pfn); } else /* make sure there are no stray mappings of this page */ @@ -699,16 +702,16 @@ static void xen_alloc_ptpage(struct mm_struct *mm, u32 pfn, unsigned level) static void xen_alloc_pte(struct mm_struct *mm, u32 pfn) { - xen_alloc_ptpage(mm, pfn, MMUEXT_PIN_L1_TABLE); + xen_alloc_ptpage(mm, pfn, PT_PTE); } static void xen_alloc_pmd(struct mm_struct *mm, u32 pfn) { - xen_alloc_ptpage(mm, pfn, MMUEXT_PIN_L2_TABLE); + xen_alloc_ptpage(mm, pfn, PT_PMD); } /* This should never happen until we''re OK to use struct page */ -static void xen_release_pte(u32 pfn) +static void xen_release_ptpage(u32 pfn, unsigned level) { struct page *page = pfn_to_page(pfn); @@ -720,6 +723,16 @@ static void xen_release_pte(u32 pfn) } } +static void xen_release_pte(u32 pfn) +{ + xen_release_ptpage(pfn, PT_PTE); +} + +static void xen_release_pmd(u32 pfn) +{ + xen_release_ptpage(pfn, PT_PMD); +} + #ifdef CONFIG_HIGHPTE static void *xen_kmap_atomic_pte(struct page *page, enum km_type type) { @@ -840,7 +853,7 @@ static __init void xen_pagetable_setup_done(pgd_t *base) pv_mmu_ops.alloc_pte = xen_alloc_pte; pv_mmu_ops.alloc_pmd = xen_alloc_pmd; pv_mmu_ops.release_pte = xen_release_pte; - pv_mmu_ops.release_pmd = xen_release_pte; + pv_mmu_ops.release_pmd = xen_release_pmd; pv_mmu_ops.set_pte = xen_set_pte; setup_shared_info(); diff --git a/arch/x86/xen/mmu.c b/arch/x86/xen/mmu.c index f5bb0be..272635a 100644 --- a/arch/x86/xen/mmu.c +++ b/arch/x86/xen/mmu.c @@ -271,13 +271,6 @@ void xen_set_pte(pte_t *ptep, pte_t pte) } #endif /* CONFIG_X86_PAE */ -enum pt_level { - PT_PGD, - PT_PUD, - PT_PMD, - PT_PTE -}; - /* (Yet another) pagetable walker. This one is intended for pinning a pagetable. This means that it walks a pagetable and calls the diff --git a/arch/x86/xen/mmu.h b/arch/x86/xen/mmu.h index c9ff27f..b5e189b 100644 --- a/arch/x86/xen/mmu.h +++ b/arch/x86/xen/mmu.h @@ -3,6 +3,13 @@ #include <linux/linkage.h> #include <asm/page.h> +enum pt_level { + PT_PGD, + PT_PUD, + PT_PMD, + PT_PTE +}; + /* * Page-directory addresses above 4GB do not fit into architectural %cr3. * When accessing %cr3, or equivalent field in vcpu_guest_context, guests -- 1.5.4.1 _______________________________________________ Xen-devel mailing list Xen-devel@lists.xensource.com http://lists.xensource.com/xen-devel
Mark McLoughlin
2008-Mar-28 16:38 UTC
[Xen-devel] [PATCH 2/3] xen: Do not pin/unpin PMD pages
During process startup, in xen_pgd_pin(), we pin PTE pages but not PMD pages. If we then later map and unmap using an otherwise unused PMD page, the unpin in release_pmd() fails because it wasn''t previously pinned. i.e. with this simple test case: int fd = open("/dev/zero", O_RDONLY); munmap(mmap((void *)0x40000000, 0x1000_LEN, PROT_READ, MAP_PRIVATE, fd, 0), 0x1000); close(fd); we currently get: kernel BUG at arch/x86/xen/enlighten.c:678! ... EIP is at xen_release_pt+0x79/0xa9 ... Call Trace: [<c041da25>] ? __pmd_free_tlb+0x1a/0x75 [<c047a192>] ? free_pgd_range+0x1d2/0x2b5 [<c047a2f3>] ? free_pgtables+0x7e/0x93 [<c047b272>] ? unmap_region+0xb9/0xf5 [<c047c1bd>] ? do_munmap+0x193/0x1f5 [<c047c24f>] ? sys_munmap+0x30/0x3f [<c0408cce>] ? syscall_call+0x7/0xb ====================== and xen complains: (XEN) mm.c:2241:d4 Mfn 1cc37 not pinned Signed-off-by: Mark McLoughlin <markmc@redhat.com> --- arch/x86/xen/enlighten.c | 5 ++--- 1 files changed, 2 insertions(+), 3 deletions(-) diff --git a/arch/x86/xen/enlighten.c b/arch/x86/xen/enlighten.c index a6a1b40..0ce225f 100644 --- a/arch/x86/xen/enlighten.c +++ b/arch/x86/xen/enlighten.c @@ -691,8 +691,6 @@ static void xen_alloc_ptpage(struct mm_struct *mm, u32 pfn, unsigned level) make_lowmem_page_readonly(__va(PFN_PHYS(pfn))); if (level == PT_PTE) pin_pagetable_pfn(MMUEXT_PIN_L1_TABLE, pfn); - else if (level == PT_PMD) - pin_pagetable_pfn(MMUEXT_PIN_L2_TABLE, pfn); } else /* make sure there are no stray mappings of this page */ @@ -717,7 +715,8 @@ static void xen_release_ptpage(u32 pfn, unsigned level) if (PagePinned(page)) { if (!PageHighMem(page)) { - pin_pagetable_pfn(MMUEXT_UNPIN_TABLE, pfn); + if (level == PT_PTE) + pin_pagetable_pfn(MMUEXT_UNPIN_TABLE, pfn); make_lowmem_page_readwrite(__va(PFN_PHYS(pfn))); } } -- 1.5.4.1 _______________________________________________ Xen-devel mailing list Xen-devel@lists.xensource.com http://lists.xensource.com/xen-devel
Mark McLoughlin
2008-Mar-28 16:38 UTC
[Xen-devel] [PATCH 3/3] xen: Clear PG_pinned in release_{pte, pmd}()
We don''t currently clear PG_pinned in release_pt(), probably because the page is to be freed anyway, but it jumps out as an obvious gap in the logic so add clear the flag for consistency sake. Signed-off-by: Mark McLoughlin <markmc@redhat.com> --- arch/x86/xen/enlighten.c | 1 + 1 files changed, 1 insertions(+), 0 deletions(-) diff --git a/arch/x86/xen/enlighten.c b/arch/x86/xen/enlighten.c index 0ce225f..8c5ff24 100644 --- a/arch/x86/xen/enlighten.c +++ b/arch/x86/xen/enlighten.c @@ -719,6 +719,7 @@ static void xen_release_ptpage(u32 pfn, unsigned level) pin_pagetable_pfn(MMUEXT_UNPIN_TABLE, pfn); make_lowmem_page_readwrite(__va(PFN_PHYS(pfn))); } + ClearPagePinned(page); } } -- 1.5.4.1 _______________________________________________ Xen-devel mailing list Xen-devel@lists.xensource.com http://lists.xensource.com/xen-devel
Jeremy Fitzhardinge
2008-Mar-28 18:02 UTC
[Xen-devel] Re: [PATCH 0/3] xen: Fix oops when mapping/unmapping addr above 1Gb
Mark McLoughlin wrote:> Hi, > In Fedora rawhide we''re currently seeing oopses whenever > a processes dlopen()s a library which prelink() has previously > relocated above 1Gb. Some more details at: > > https://bugzilla.redhat.com/436453 > > The issue seems to be that during xen_pgd_pin(), we only > pin PTE pages, but xen''s release_pmd() tries to unpin PMD pages > too. > > The following series of patches is against the latest > x86 tree. >Hm, OK. I''m trying to work out what happened here. I guess its a latent bug which is only visible now that its possible to incrementally free pmds. A side note is that I''m planning on pinning pmds in the future, so that we can get away replace the full pgd page per mm with a pgd page per cpu. J _______________________________________________ Xen-devel mailing list Xen-devel@lists.xensource.com http://lists.xensource.com/xen-devel
Ingo Molnar
2008-Mar-31 13:14 UTC
[Xen-devel] Re: [PATCH 0/3] xen: Fix oops when mapping/unmapping addr above 1Gb
* Mark McLoughlin <markmc@redhat.com> wrote:> In Fedora rawhide we''re currently seeing oopses whenever a > processes dlopen()s a library which prelink() has previously relocated > above 1Gb. Some more details at: > > https://bugzilla.redhat.com/436453 > > The issue seems to be that during xen_pgd_pin(), we only pin > PTE pages, but xen''s release_pmd() tries to unpin PMD pages too. > > The following series of patches is against the latest x86 > tree.thanks, i''ve picked them up to get them tested - but it would be nice to have an ack from Jeremy as well. This seems like v2.6.25 material as well, right? Ingo _______________________________________________ Xen-devel mailing list Xen-devel@lists.xensource.com http://lists.xensource.com/xen-devel
Jeremy Fitzhardinge
2008-Mar-31 14:55 UTC
[Xen-devel] Re: [PATCH 0/3] xen: Fix oops when mapping/unmapping addr above 1Gb
Ingo Molnar wrote:> * Mark McLoughlin <markmc@redhat.com> wrote: > > >> In Fedora rawhide we''re currently seeing oopses whenever a >> processes dlopen()s a library which prelink() has previously relocated >> above 1Gb. Some more details at: >> >> https://bugzilla.redhat.com/436453 >> >> The issue seems to be that during xen_pgd_pin(), we only pin >> PTE pages, but xen''s release_pmd() tries to unpin PMD pages too. >> >> The following series of patches is against the latest x86 >> tree. >> > > thanks, i''ve picked them up to get them tested - but it would be nice to > have an ack from Jeremy as well. This seems like v2.6.25 material as > well, right?Didn''t I reply? Anyway, the patch looks good. It''s definitely 2.6.25 material; its fallout from the changes I did to dynamically allocate/deallocate pmds rather than always do them with the pgd. J _______________________________________________ Xen-devel mailing list Xen-devel@lists.xensource.com http://lists.xensource.com/xen-devel
Mark McLoughlin
2008-Apr-02 14:35 UTC
[Xen-devel] Re: [PATCH 0/3] xen: Fix oops when mapping/unmapping addr above 1Gb
On Mon, 2008-03-31 at 07:55 -0700, Jeremy Fitzhardinge wrote:> Ingo Molnar wrote: > > * Mark McLoughlin <markmc@redhat.com> wrote: > > > >> In Fedora rawhide we''re currently seeing oopses whenever a > >> processes dlopen()s a library which prelink() has previously relocated > >> above 1Gb. Some more details at: > >> > >> https://bugzilla.redhat.com/436453 > >> > >> The issue seems to be that during xen_pgd_pin(), we only pin > >> PTE pages, but xen''s release_pmd() tries to unpin PMD pages too. > >> > >> The following series of patches is against the latest x86 > >> tree. > >> > > > > thanks, i''ve picked them up to get them tested - but it would be nice to > > have an ack from Jeremy as well. This seems like v2.6.25 material as > > well, right? > > Didn''t I reply? Anyway, the patch looks good. It''s definitely 2.6.25 > material; its fallout from the changes I did to dynamically > allocate/deallocate pmds rather than always do them with the pgd.Okay, here are the patches rebased to 2.6.25-rc8. I''ve reproduced the issue with plain -rc8 and confirmed the patches fix it. Cheers, Mark. _______________________________________________ Xen-devel mailing list Xen-devel@lists.xensource.com http://lists.xensource.com/xen-devel
Mark McLoughlin
2008-Apr-02 14:36 UTC
[Xen-devel] [PATCH 1/3] xen: Refactor xen_{alloc, release}_{pt, pd}()
Use the pt_level enum with generic alloc_ptpage() and release_ptpage() functions to make it a little more clear what''s going on in next commit. Signed-off-by: Mark McLoughlin <markmc@redhat.com> --- arch/x86/xen/enlighten.c | 27 ++++++++++++++++++++------- arch/x86/xen/mmu.c | 7 ------- arch/x86/xen/mmu.h | 7 +++++++ 3 files changed, 27 insertions(+), 14 deletions(-) diff --git a/arch/x86/xen/enlighten.c b/arch/x86/xen/enlighten.c index de4e6f0..16e2f80 100644 --- a/arch/x86/xen/enlighten.c +++ b/arch/x86/xen/enlighten.c @@ -667,10 +667,10 @@ static void xen_release_pt_init(u32 pfn) make_lowmem_page_readwrite(__va(PFN_PHYS(pfn))); } -static void pin_pagetable_pfn(unsigned level, unsigned long pfn) +static void pin_pagetable_pfn(unsigned cmd, unsigned long pfn) { struct mmuext_op op; - op.cmd = level; + op.cmd = cmd; op.arg1.mfn = pfn_to_mfn(pfn); if (HYPERVISOR_mmuext_op(&op, 1, NULL, DOMID_SELF)) BUG(); @@ -687,7 +687,10 @@ static void xen_alloc_ptpage(struct mm_struct *mm, u32 pfn, unsigned level) if (!PageHighMem(page)) { make_lowmem_page_readonly(__va(PFN_PHYS(pfn))); - pin_pagetable_pfn(level, pfn); + if (level == PT_PTE) + pin_pagetable_pfn(MMUEXT_PIN_L1_TABLE, pfn); + else if (level == PT_PMD) + pin_pagetable_pfn(MMUEXT_PIN_L2_TABLE, pfn); } else /* make sure there are no stray mappings of this page */ @@ -697,16 +700,16 @@ static void xen_alloc_ptpage(struct mm_struct *mm, u32 pfn, unsigned level) static void xen_alloc_pt(struct mm_struct *mm, u32 pfn) { - xen_alloc_ptpage(mm, pfn, MMUEXT_PIN_L1_TABLE); + xen_alloc_ptpage(mm, pfn, PT_PTE); } static void xen_alloc_pd(struct mm_struct *mm, u32 pfn) { - xen_alloc_ptpage(mm, pfn, MMUEXT_PIN_L2_TABLE); + xen_alloc_ptpage(mm, pfn, PT_PMD); } /* This should never happen until we''re OK to use struct page */ -static void xen_release_pt(u32 pfn) +static void xen_release_ptpage(u32 pfn, unsigned level) { struct page *page = pfn_to_page(pfn); @@ -718,6 +721,16 @@ static void xen_release_pt(u32 pfn) } } +static void xen_release_pt(u32 pfn) +{ + xen_release_ptpage(pfn, PT_PTE); +} + +static void xen_release_pd(u32 pfn) +{ + xen_release_ptpage(pfn, PT_PMD); +} + #ifdef CONFIG_HIGHPTE static void *xen_kmap_atomic_pte(struct page *page, enum km_type type) { @@ -838,7 +851,7 @@ static __init void xen_pagetable_setup_done(pgd_t *base) pv_mmu_ops.alloc_pt = xen_alloc_pt; pv_mmu_ops.alloc_pd = xen_alloc_pd; pv_mmu_ops.release_pt = xen_release_pt; - pv_mmu_ops.release_pd = xen_release_pt; + pv_mmu_ops.release_pd = xen_release_pd; pv_mmu_ops.set_pte = xen_set_pte; setup_shared_info(); diff --git a/arch/x86/xen/mmu.c b/arch/x86/xen/mmu.c index 0144395..2a054ef 100644 --- a/arch/x86/xen/mmu.c +++ b/arch/x86/xen/mmu.c @@ -310,13 +310,6 @@ pgd_t xen_make_pgd(unsigned long pgd) } #endif /* CONFIG_X86_PAE */ -enum pt_level { - PT_PGD, - PT_PUD, - PT_PMD, - PT_PTE -}; - /* (Yet another) pagetable walker. This one is intended for pinning a pagetable. This means that it walks a pagetable and calls the diff --git a/arch/x86/xen/mmu.h b/arch/x86/xen/mmu.h index c9ff27f..b5e189b 100644 --- a/arch/x86/xen/mmu.h +++ b/arch/x86/xen/mmu.h @@ -3,6 +3,13 @@ #include <linux/linkage.h> #include <asm/page.h> +enum pt_level { + PT_PGD, + PT_PUD, + PT_PMD, + PT_PTE +}; + /* * Page-directory addresses above 4GB do not fit into architectural %cr3. * When accessing %cr3, or equivalent field in vcpu_guest_context, guests -- 1.5.4.5 _______________________________________________ Xen-devel mailing list Xen-devel@lists.xensource.com http://lists.xensource.com/xen-devel
Mark McLoughlin
2008-Apr-02 14:36 UTC
[Xen-devel] [PATCH 2/3] xen: Do not pin/unpin PMD pages
During process startup, in xen_pgd_pin(), we pin PTE pages but not PMD pages. If we then later map and unmap using an otherwise unused PMD page, the unpin in release_pd() fails because it wasn''t previously pinned. i.e. with this simple test case: int fd = open("/dev/zero", O_RDONLY); munmap(mmap((void *)0x40000000, 0x1000_LEN, PROT_READ, MAP_PRIVATE, fd, 0), 0x1000); close(fd); we currently get: kernel BUG at arch/x86/xen/enlighten.c:678! ... EIP is at xen_release_pt+0x79/0xa9 ... Call Trace: [<c041da25>] ? __pmd_free_tlb+0x1a/0x75 [<c047a192>] ? free_pgd_range+0x1d2/0x2b5 [<c047a2f3>] ? free_pgtables+0x7e/0x93 [<c047b272>] ? unmap_region+0xb9/0xf5 [<c047c1bd>] ? do_munmap+0x193/0x1f5 [<c047c24f>] ? sys_munmap+0x30/0x3f [<c0408cce>] ? syscall_call+0x7/0xb ====================== and xen complains: (XEN) mm.c:2241:d4 Mfn 1cc37 not pinned Further details at: https://bugzilla.redhat.com/436453 Signed-off-by: Mark McLoughlin <markmc@redhat.com> --- arch/x86/xen/enlighten.c | 5 ++--- 1 files changed, 2 insertions(+), 3 deletions(-) diff --git a/arch/x86/xen/enlighten.c b/arch/x86/xen/enlighten.c index 16e2f80..f16b056 100644 --- a/arch/x86/xen/enlighten.c +++ b/arch/x86/xen/enlighten.c @@ -689,8 +689,6 @@ static void xen_alloc_ptpage(struct mm_struct *mm, u32 pfn, unsigned level) make_lowmem_page_readonly(__va(PFN_PHYS(pfn))); if (level == PT_PTE) pin_pagetable_pfn(MMUEXT_PIN_L1_TABLE, pfn); - else if (level == PT_PMD) - pin_pagetable_pfn(MMUEXT_PIN_L2_TABLE, pfn); } else /* make sure there are no stray mappings of this page */ @@ -715,7 +713,8 @@ static void xen_release_ptpage(u32 pfn, unsigned level) if (PagePinned(page)) { if (!PageHighMem(page)) { - pin_pagetable_pfn(MMUEXT_UNPIN_TABLE, pfn); + if (level == PT_PTE) + pin_pagetable_pfn(MMUEXT_UNPIN_TABLE, pfn); make_lowmem_page_readwrite(__va(PFN_PHYS(pfn))); } } -- 1.5.4.5 _______________________________________________ Xen-devel mailing list Xen-devel@lists.xensource.com http://lists.xensource.com/xen-devel
Mark McLoughlin
2008-Apr-02 14:36 UTC
[Xen-devel] [PATCH 3/3] xen: Clear PG_pinned in release_{pt,pd}()
We don''t currently clear PG_pinned in release_pt(), probably because the page is to be freed anyway, but it jumps out as an obvious gap in the logic so add clear the flag for consistency sake. Signed-off-by: Mark McLoughlin <markmc@redhat.com> --- arch/x86/xen/enlighten.c | 1 + 1 files changed, 1 insertions(+), 0 deletions(-) diff --git a/arch/x86/xen/enlighten.c b/arch/x86/xen/enlighten.c index f16b056..27ee26a 100644 --- a/arch/x86/xen/enlighten.c +++ b/arch/x86/xen/enlighten.c @@ -717,6 +717,7 @@ static void xen_release_ptpage(u32 pfn, unsigned level) pin_pagetable_pfn(MMUEXT_UNPIN_TABLE, pfn); make_lowmem_page_readwrite(__va(PFN_PHYS(pfn))); } + ClearPagePinned(page); } } -- 1.5.4.5 _______________________________________________ Xen-devel mailing list Xen-devel@lists.xensource.com http://lists.xensource.com/xen-devel