Mark McLoughlin
2008-Mar-28 16:38 UTC
[Xen-devel] [PATCH 0/3] xen: Fix oops when mapping/unmapping addr above 1Gb
Hi,
In Fedora rawhide we''re currently seeing oopses whenever
a processes dlopen()s a library which prelink() has previously
relocated above 1Gb. Some more details at:
https://bugzilla.redhat.com/436453
The issue seems to be that during xen_pgd_pin(), we only
pin PTE pages, but xen''s release_pmd() tries to unpin PMD pages
too.
The following series of patches is against the latest
x86 tree.
Cheers,
Mark.
_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xensource.com
http://lists.xensource.com/xen-devel
Mark McLoughlin
2008-Mar-28 16:38 UTC
[Xen-devel] [PATCH 1/3] xen: Refactor xen_{alloc, release}_{pte, pmd}()
Use the pt_level enum with generic alloc_ptpage()
and release_ptpage() functions to make it a little
more clear what''s going on in next commit.
Signed-off-by: Mark McLoughlin <markmc@redhat.com>
---
arch/x86/xen/enlighten.c | 27 ++++++++++++++++++++-------
arch/x86/xen/mmu.c | 7 -------
arch/x86/xen/mmu.h | 7 +++++++
3 files changed, 27 insertions(+), 14 deletions(-)
diff --git a/arch/x86/xen/enlighten.c b/arch/x86/xen/enlighten.c
index 1e5ea9e..a6a1b40 100644
--- a/arch/x86/xen/enlighten.c
+++ b/arch/x86/xen/enlighten.c
@@ -669,10 +669,10 @@ static void xen_release_pte_init(u32 pfn)
make_lowmem_page_readwrite(__va(PFN_PHYS(pfn)));
}
-static void pin_pagetable_pfn(unsigned level, unsigned long pfn)
+static void pin_pagetable_pfn(unsigned cmd, unsigned long pfn)
{
struct mmuext_op op;
- op.cmd = level;
+ op.cmd = cmd;
op.arg1.mfn = pfn_to_mfn(pfn);
if (HYPERVISOR_mmuext_op(&op, 1, NULL, DOMID_SELF))
BUG();
@@ -689,7 +689,10 @@ static void xen_alloc_ptpage(struct mm_struct *mm, u32 pfn,
unsigned level)
if (!PageHighMem(page)) {
make_lowmem_page_readonly(__va(PFN_PHYS(pfn)));
- pin_pagetable_pfn(level, pfn);
+ if (level == PT_PTE)
+ pin_pagetable_pfn(MMUEXT_PIN_L1_TABLE, pfn);
+ else if (level == PT_PMD)
+ pin_pagetable_pfn(MMUEXT_PIN_L2_TABLE, pfn);
} else
/* make sure there are no stray mappings of
this page */
@@ -699,16 +702,16 @@ static void xen_alloc_ptpage(struct mm_struct *mm, u32
pfn, unsigned level)
static void xen_alloc_pte(struct mm_struct *mm, u32 pfn)
{
- xen_alloc_ptpage(mm, pfn, MMUEXT_PIN_L1_TABLE);
+ xen_alloc_ptpage(mm, pfn, PT_PTE);
}
static void xen_alloc_pmd(struct mm_struct *mm, u32 pfn)
{
- xen_alloc_ptpage(mm, pfn, MMUEXT_PIN_L2_TABLE);
+ xen_alloc_ptpage(mm, pfn, PT_PMD);
}
/* This should never happen until we''re OK to use struct page */
-static void xen_release_pte(u32 pfn)
+static void xen_release_ptpage(u32 pfn, unsigned level)
{
struct page *page = pfn_to_page(pfn);
@@ -720,6 +723,16 @@ static void xen_release_pte(u32 pfn)
}
}
+static void xen_release_pte(u32 pfn)
+{
+ xen_release_ptpage(pfn, PT_PTE);
+}
+
+static void xen_release_pmd(u32 pfn)
+{
+ xen_release_ptpage(pfn, PT_PMD);
+}
+
#ifdef CONFIG_HIGHPTE
static void *xen_kmap_atomic_pte(struct page *page, enum km_type type)
{
@@ -840,7 +853,7 @@ static __init void xen_pagetable_setup_done(pgd_t *base)
pv_mmu_ops.alloc_pte = xen_alloc_pte;
pv_mmu_ops.alloc_pmd = xen_alloc_pmd;
pv_mmu_ops.release_pte = xen_release_pte;
- pv_mmu_ops.release_pmd = xen_release_pte;
+ pv_mmu_ops.release_pmd = xen_release_pmd;
pv_mmu_ops.set_pte = xen_set_pte;
setup_shared_info();
diff --git a/arch/x86/xen/mmu.c b/arch/x86/xen/mmu.c
index f5bb0be..272635a 100644
--- a/arch/x86/xen/mmu.c
+++ b/arch/x86/xen/mmu.c
@@ -271,13 +271,6 @@ void xen_set_pte(pte_t *ptep, pte_t pte)
}
#endif /* CONFIG_X86_PAE */
-enum pt_level {
- PT_PGD,
- PT_PUD,
- PT_PMD,
- PT_PTE
-};
-
/*
(Yet another) pagetable walker. This one is intended for pinning a
pagetable. This means that it walks a pagetable and calls the
diff --git a/arch/x86/xen/mmu.h b/arch/x86/xen/mmu.h
index c9ff27f..b5e189b 100644
--- a/arch/x86/xen/mmu.h
+++ b/arch/x86/xen/mmu.h
@@ -3,6 +3,13 @@
#include <linux/linkage.h>
#include <asm/page.h>
+enum pt_level {
+ PT_PGD,
+ PT_PUD,
+ PT_PMD,
+ PT_PTE
+};
+
/*
* Page-directory addresses above 4GB do not fit into architectural %cr3.
* When accessing %cr3, or equivalent field in vcpu_guest_context, guests
--
1.5.4.1
_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xensource.com
http://lists.xensource.com/xen-devel
Mark McLoughlin
2008-Mar-28 16:38 UTC
[Xen-devel] [PATCH 2/3] xen: Do not pin/unpin PMD pages
During process startup, in xen_pgd_pin(), we pin PTE pages
but not PMD pages. If we then later map and unmap using an
otherwise unused PMD page, the unpin in release_pmd() fails
because it wasn''t previously pinned.
i.e. with this simple test case:
int fd = open("/dev/zero", O_RDONLY);
munmap(mmap((void *)0x40000000, 0x1000_LEN, PROT_READ, MAP_PRIVATE, fd, 0),
0x1000);
close(fd);
we currently get:
kernel BUG at arch/x86/xen/enlighten.c:678!
...
EIP is at xen_release_pt+0x79/0xa9
...
Call Trace:
[<c041da25>] ? __pmd_free_tlb+0x1a/0x75
[<c047a192>] ? free_pgd_range+0x1d2/0x2b5
[<c047a2f3>] ? free_pgtables+0x7e/0x93
[<c047b272>] ? unmap_region+0xb9/0xf5
[<c047c1bd>] ? do_munmap+0x193/0x1f5
[<c047c24f>] ? sys_munmap+0x30/0x3f
[<c0408cce>] ? syscall_call+0x7/0xb
======================
and xen complains:
(XEN) mm.c:2241:d4 Mfn 1cc37 not pinned
Signed-off-by: Mark McLoughlin <markmc@redhat.com>
---
arch/x86/xen/enlighten.c | 5 ++---
1 files changed, 2 insertions(+), 3 deletions(-)
diff --git a/arch/x86/xen/enlighten.c b/arch/x86/xen/enlighten.c
index a6a1b40..0ce225f 100644
--- a/arch/x86/xen/enlighten.c
+++ b/arch/x86/xen/enlighten.c
@@ -691,8 +691,6 @@ static void xen_alloc_ptpage(struct mm_struct *mm, u32 pfn,
unsigned level)
make_lowmem_page_readonly(__va(PFN_PHYS(pfn)));
if (level == PT_PTE)
pin_pagetable_pfn(MMUEXT_PIN_L1_TABLE, pfn);
- else if (level == PT_PMD)
- pin_pagetable_pfn(MMUEXT_PIN_L2_TABLE, pfn);
} else
/* make sure there are no stray mappings of
this page */
@@ -717,7 +715,8 @@ static void xen_release_ptpage(u32 pfn, unsigned level)
if (PagePinned(page)) {
if (!PageHighMem(page)) {
- pin_pagetable_pfn(MMUEXT_UNPIN_TABLE, pfn);
+ if (level == PT_PTE)
+ pin_pagetable_pfn(MMUEXT_UNPIN_TABLE, pfn);
make_lowmem_page_readwrite(__va(PFN_PHYS(pfn)));
}
}
--
1.5.4.1
_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xensource.com
http://lists.xensource.com/xen-devel
Mark McLoughlin
2008-Mar-28 16:38 UTC
[Xen-devel] [PATCH 3/3] xen: Clear PG_pinned in release_{pte, pmd}()
We don''t currently clear PG_pinned in release_pt(), probably because the page is to be freed anyway, but it jumps out as an obvious gap in the logic so add clear the flag for consistency sake. Signed-off-by: Mark McLoughlin <markmc@redhat.com> --- arch/x86/xen/enlighten.c | 1 + 1 files changed, 1 insertions(+), 0 deletions(-) diff --git a/arch/x86/xen/enlighten.c b/arch/x86/xen/enlighten.c index 0ce225f..8c5ff24 100644 --- a/arch/x86/xen/enlighten.c +++ b/arch/x86/xen/enlighten.c @@ -719,6 +719,7 @@ static void xen_release_ptpage(u32 pfn, unsigned level) pin_pagetable_pfn(MMUEXT_UNPIN_TABLE, pfn); make_lowmem_page_readwrite(__va(PFN_PHYS(pfn))); } + ClearPagePinned(page); } } -- 1.5.4.1 _______________________________________________ Xen-devel mailing list Xen-devel@lists.xensource.com http://lists.xensource.com/xen-devel
Jeremy Fitzhardinge
2008-Mar-28 18:02 UTC
[Xen-devel] Re: [PATCH 0/3] xen: Fix oops when mapping/unmapping addr above 1Gb
Mark McLoughlin wrote:> Hi, > In Fedora rawhide we''re currently seeing oopses whenever > a processes dlopen()s a library which prelink() has previously > relocated above 1Gb. Some more details at: > > https://bugzilla.redhat.com/436453 > > The issue seems to be that during xen_pgd_pin(), we only > pin PTE pages, but xen''s release_pmd() tries to unpin PMD pages > too. > > The following series of patches is against the latest > x86 tree. >Hm, OK. I''m trying to work out what happened here. I guess its a latent bug which is only visible now that its possible to incrementally free pmds. A side note is that I''m planning on pinning pmds in the future, so that we can get away replace the full pgd page per mm with a pgd page per cpu. J _______________________________________________ Xen-devel mailing list Xen-devel@lists.xensource.com http://lists.xensource.com/xen-devel
Ingo Molnar
2008-Mar-31 13:14 UTC
[Xen-devel] Re: [PATCH 0/3] xen: Fix oops when mapping/unmapping addr above 1Gb
* Mark McLoughlin <markmc@redhat.com> wrote:> In Fedora rawhide we''re currently seeing oopses whenever a > processes dlopen()s a library which prelink() has previously relocated > above 1Gb. Some more details at: > > https://bugzilla.redhat.com/436453 > > The issue seems to be that during xen_pgd_pin(), we only pin > PTE pages, but xen''s release_pmd() tries to unpin PMD pages too. > > The following series of patches is against the latest x86 > tree.thanks, i''ve picked them up to get them tested - but it would be nice to have an ack from Jeremy as well. This seems like v2.6.25 material as well, right? Ingo _______________________________________________ Xen-devel mailing list Xen-devel@lists.xensource.com http://lists.xensource.com/xen-devel
Jeremy Fitzhardinge
2008-Mar-31 14:55 UTC
[Xen-devel] Re: [PATCH 0/3] xen: Fix oops when mapping/unmapping addr above 1Gb
Ingo Molnar wrote:> * Mark McLoughlin <markmc@redhat.com> wrote: > > >> In Fedora rawhide we''re currently seeing oopses whenever a >> processes dlopen()s a library which prelink() has previously relocated >> above 1Gb. Some more details at: >> >> https://bugzilla.redhat.com/436453 >> >> The issue seems to be that during xen_pgd_pin(), we only pin >> PTE pages, but xen''s release_pmd() tries to unpin PMD pages too. >> >> The following series of patches is against the latest x86 >> tree. >> > > thanks, i''ve picked them up to get them tested - but it would be nice to > have an ack from Jeremy as well. This seems like v2.6.25 material as > well, right?Didn''t I reply? Anyway, the patch looks good. It''s definitely 2.6.25 material; its fallout from the changes I did to dynamically allocate/deallocate pmds rather than always do them with the pgd. J _______________________________________________ Xen-devel mailing list Xen-devel@lists.xensource.com http://lists.xensource.com/xen-devel
Mark McLoughlin
2008-Apr-02 14:35 UTC
[Xen-devel] Re: [PATCH 0/3] xen: Fix oops when mapping/unmapping addr above 1Gb
On Mon, 2008-03-31 at 07:55 -0700, Jeremy Fitzhardinge wrote:> Ingo Molnar wrote: > > * Mark McLoughlin <markmc@redhat.com> wrote: > > > >> In Fedora rawhide we''re currently seeing oopses whenever a > >> processes dlopen()s a library which prelink() has previously relocated > >> above 1Gb. Some more details at: > >> > >> https://bugzilla.redhat.com/436453 > >> > >> The issue seems to be that during xen_pgd_pin(), we only pin > >> PTE pages, but xen''s release_pmd() tries to unpin PMD pages too. > >> > >> The following series of patches is against the latest x86 > >> tree. > >> > > > > thanks, i''ve picked them up to get them tested - but it would be nice to > > have an ack from Jeremy as well. This seems like v2.6.25 material as > > well, right? > > Didn''t I reply? Anyway, the patch looks good. It''s definitely 2.6.25 > material; its fallout from the changes I did to dynamically > allocate/deallocate pmds rather than always do them with the pgd.Okay, here are the patches rebased to 2.6.25-rc8. I''ve reproduced the issue with plain -rc8 and confirmed the patches fix it. Cheers, Mark. _______________________________________________ Xen-devel mailing list Xen-devel@lists.xensource.com http://lists.xensource.com/xen-devel
Mark McLoughlin
2008-Apr-02 14:36 UTC
[Xen-devel] [PATCH 1/3] xen: Refactor xen_{alloc, release}_{pt, pd}()
Use the pt_level enum with generic alloc_ptpage()
and release_ptpage() functions to make it a little
more clear what''s going on in next commit.
Signed-off-by: Mark McLoughlin <markmc@redhat.com>
---
arch/x86/xen/enlighten.c | 27 ++++++++++++++++++++-------
arch/x86/xen/mmu.c | 7 -------
arch/x86/xen/mmu.h | 7 +++++++
3 files changed, 27 insertions(+), 14 deletions(-)
diff --git a/arch/x86/xen/enlighten.c b/arch/x86/xen/enlighten.c
index de4e6f0..16e2f80 100644
--- a/arch/x86/xen/enlighten.c
+++ b/arch/x86/xen/enlighten.c
@@ -667,10 +667,10 @@ static void xen_release_pt_init(u32 pfn)
make_lowmem_page_readwrite(__va(PFN_PHYS(pfn)));
}
-static void pin_pagetable_pfn(unsigned level, unsigned long pfn)
+static void pin_pagetable_pfn(unsigned cmd, unsigned long pfn)
{
struct mmuext_op op;
- op.cmd = level;
+ op.cmd = cmd;
op.arg1.mfn = pfn_to_mfn(pfn);
if (HYPERVISOR_mmuext_op(&op, 1, NULL, DOMID_SELF))
BUG();
@@ -687,7 +687,10 @@ static void xen_alloc_ptpage(struct mm_struct *mm, u32 pfn,
unsigned level)
if (!PageHighMem(page)) {
make_lowmem_page_readonly(__va(PFN_PHYS(pfn)));
- pin_pagetable_pfn(level, pfn);
+ if (level == PT_PTE)
+ pin_pagetable_pfn(MMUEXT_PIN_L1_TABLE, pfn);
+ else if (level == PT_PMD)
+ pin_pagetable_pfn(MMUEXT_PIN_L2_TABLE, pfn);
} else
/* make sure there are no stray mappings of
this page */
@@ -697,16 +700,16 @@ static void xen_alloc_ptpage(struct mm_struct *mm, u32
pfn, unsigned level)
static void xen_alloc_pt(struct mm_struct *mm, u32 pfn)
{
- xen_alloc_ptpage(mm, pfn, MMUEXT_PIN_L1_TABLE);
+ xen_alloc_ptpage(mm, pfn, PT_PTE);
}
static void xen_alloc_pd(struct mm_struct *mm, u32 pfn)
{
- xen_alloc_ptpage(mm, pfn, MMUEXT_PIN_L2_TABLE);
+ xen_alloc_ptpage(mm, pfn, PT_PMD);
}
/* This should never happen until we''re OK to use struct page */
-static void xen_release_pt(u32 pfn)
+static void xen_release_ptpage(u32 pfn, unsigned level)
{
struct page *page = pfn_to_page(pfn);
@@ -718,6 +721,16 @@ static void xen_release_pt(u32 pfn)
}
}
+static void xen_release_pt(u32 pfn)
+{
+ xen_release_ptpage(pfn, PT_PTE);
+}
+
+static void xen_release_pd(u32 pfn)
+{
+ xen_release_ptpage(pfn, PT_PMD);
+}
+
#ifdef CONFIG_HIGHPTE
static void *xen_kmap_atomic_pte(struct page *page, enum km_type type)
{
@@ -838,7 +851,7 @@ static __init void xen_pagetable_setup_done(pgd_t *base)
pv_mmu_ops.alloc_pt = xen_alloc_pt;
pv_mmu_ops.alloc_pd = xen_alloc_pd;
pv_mmu_ops.release_pt = xen_release_pt;
- pv_mmu_ops.release_pd = xen_release_pt;
+ pv_mmu_ops.release_pd = xen_release_pd;
pv_mmu_ops.set_pte = xen_set_pte;
setup_shared_info();
diff --git a/arch/x86/xen/mmu.c b/arch/x86/xen/mmu.c
index 0144395..2a054ef 100644
--- a/arch/x86/xen/mmu.c
+++ b/arch/x86/xen/mmu.c
@@ -310,13 +310,6 @@ pgd_t xen_make_pgd(unsigned long pgd)
}
#endif /* CONFIG_X86_PAE */
-enum pt_level {
- PT_PGD,
- PT_PUD,
- PT_PMD,
- PT_PTE
-};
-
/*
(Yet another) pagetable walker. This one is intended for pinning a
pagetable. This means that it walks a pagetable and calls the
diff --git a/arch/x86/xen/mmu.h b/arch/x86/xen/mmu.h
index c9ff27f..b5e189b 100644
--- a/arch/x86/xen/mmu.h
+++ b/arch/x86/xen/mmu.h
@@ -3,6 +3,13 @@
#include <linux/linkage.h>
#include <asm/page.h>
+enum pt_level {
+ PT_PGD,
+ PT_PUD,
+ PT_PMD,
+ PT_PTE
+};
+
/*
* Page-directory addresses above 4GB do not fit into architectural %cr3.
* When accessing %cr3, or equivalent field in vcpu_guest_context, guests
--
1.5.4.5
_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xensource.com
http://lists.xensource.com/xen-devel
Mark McLoughlin
2008-Apr-02 14:36 UTC
[Xen-devel] [PATCH 2/3] xen: Do not pin/unpin PMD pages
During process startup, in xen_pgd_pin(), we pin PTE pages
but not PMD pages. If we then later map and unmap using an
otherwise unused PMD page, the unpin in release_pd() fails
because it wasn''t previously pinned.
i.e. with this simple test case:
int fd = open("/dev/zero", O_RDONLY);
munmap(mmap((void *)0x40000000, 0x1000_LEN, PROT_READ, MAP_PRIVATE, fd, 0),
0x1000);
close(fd);
we currently get:
kernel BUG at arch/x86/xen/enlighten.c:678!
...
EIP is at xen_release_pt+0x79/0xa9
...
Call Trace:
[<c041da25>] ? __pmd_free_tlb+0x1a/0x75
[<c047a192>] ? free_pgd_range+0x1d2/0x2b5
[<c047a2f3>] ? free_pgtables+0x7e/0x93
[<c047b272>] ? unmap_region+0xb9/0xf5
[<c047c1bd>] ? do_munmap+0x193/0x1f5
[<c047c24f>] ? sys_munmap+0x30/0x3f
[<c0408cce>] ? syscall_call+0x7/0xb
======================
and xen complains:
(XEN) mm.c:2241:d4 Mfn 1cc37 not pinned
Further details at:
https://bugzilla.redhat.com/436453
Signed-off-by: Mark McLoughlin <markmc@redhat.com>
---
arch/x86/xen/enlighten.c | 5 ++---
1 files changed, 2 insertions(+), 3 deletions(-)
diff --git a/arch/x86/xen/enlighten.c b/arch/x86/xen/enlighten.c
index 16e2f80..f16b056 100644
--- a/arch/x86/xen/enlighten.c
+++ b/arch/x86/xen/enlighten.c
@@ -689,8 +689,6 @@ static void xen_alloc_ptpage(struct mm_struct *mm, u32 pfn,
unsigned level)
make_lowmem_page_readonly(__va(PFN_PHYS(pfn)));
if (level == PT_PTE)
pin_pagetable_pfn(MMUEXT_PIN_L1_TABLE, pfn);
- else if (level == PT_PMD)
- pin_pagetable_pfn(MMUEXT_PIN_L2_TABLE, pfn);
} else
/* make sure there are no stray mappings of
this page */
@@ -715,7 +713,8 @@ static void xen_release_ptpage(u32 pfn, unsigned level)
if (PagePinned(page)) {
if (!PageHighMem(page)) {
- pin_pagetable_pfn(MMUEXT_UNPIN_TABLE, pfn);
+ if (level == PT_PTE)
+ pin_pagetable_pfn(MMUEXT_UNPIN_TABLE, pfn);
make_lowmem_page_readwrite(__va(PFN_PHYS(pfn)));
}
}
--
1.5.4.5
_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xensource.com
http://lists.xensource.com/xen-devel
Mark McLoughlin
2008-Apr-02 14:36 UTC
[Xen-devel] [PATCH 3/3] xen: Clear PG_pinned in release_{pt,pd}()
We don''t currently clear PG_pinned in release_pt(), probably because the page is to be freed anyway, but it jumps out as an obvious gap in the logic so add clear the flag for consistency sake. Signed-off-by: Mark McLoughlin <markmc@redhat.com> --- arch/x86/xen/enlighten.c | 1 + 1 files changed, 1 insertions(+), 0 deletions(-) diff --git a/arch/x86/xen/enlighten.c b/arch/x86/xen/enlighten.c index f16b056..27ee26a 100644 --- a/arch/x86/xen/enlighten.c +++ b/arch/x86/xen/enlighten.c @@ -717,6 +717,7 @@ static void xen_release_ptpage(u32 pfn, unsigned level) pin_pagetable_pfn(MMUEXT_UNPIN_TABLE, pfn); make_lowmem_page_readwrite(__va(PFN_PHYS(pfn))); } + ClearPagePinned(page); } } -- 1.5.4.5 _______________________________________________ Xen-devel mailing list Xen-devel@lists.xensource.com http://lists.xensource.com/xen-devel