In commit cd9151e2: xen/balloon: set a mapping for ballooned out pages we have the ballooned out page''s mapping set to a scratch page. When the page is ballooned in again its P2M entry can be the MFN of the scratch page, hitting the BUG_ONs in __set_phys_to_machine. Signed-off-by: Wei Liu <wei.liu2@citrix.com> Cc: Stefano Stabellini <stefano.stabellini@eu.citrix.com> --- arch/x86/xen/p2m.c | 14 ++++++++++++-- drivers/xen/balloon.c | 2 +- 2 files changed, 13 insertions(+), 3 deletions(-) diff --git a/arch/x86/xen/p2m.c b/arch/x86/xen/p2m.c index 74672ee..4c11f6a 100644 --- a/arch/x86/xen/p2m.c +++ b/arch/x86/xen/p2m.c @@ -793,17 +793,27 @@ unsigned long __init set_phys_range_identity(unsigned long pfn_s, return pfn - pfn_s; } +DECLARE_PER_CPU(struct page *, balloon_scratch_page); /* Try to install p2m mapping; fail if intermediate bits missing */ bool __set_phys_to_machine(unsigned long pfn, unsigned long mfn) { unsigned topidx, mididx, idx; + unsigned long balloon_scratch_pfn; + unsigned long balloon_scratch_mfn; + + balloon_scratch_pfn = page_to_pfn(__get_cpu_var(balloon_scratch_page)); + balloon_scratch_mfn = pfn_to_mfn(balloon_scratch_pfn); if (unlikely(xen_feature(XENFEAT_auto_translated_physmap))) { - BUG_ON(pfn != mfn && mfn != INVALID_P2M_ENTRY); + BUG_ON(pfn != mfn && + pfn != balloon_scratch_mfn && + mfn != INVALID_P2M_ENTRY && + mfn != balloon_scratch_mfn); return true; } if (unlikely(pfn >= MAX_P2M_PFN)) { - BUG_ON(mfn != INVALID_P2M_ENTRY); + BUG_ON(mfn != INVALID_P2M_ENTRY && + mfn != balloon_scratch_mfn); return true; } diff --git a/drivers/xen/balloon.c b/drivers/xen/balloon.c index a3dc75d..89e2f3b 100644 --- a/drivers/xen/balloon.c +++ b/drivers/xen/balloon.c @@ -92,7 +92,7 @@ EXPORT_SYMBOL_GPL(balloon_stats); /* We increase/decrease in batches which fit in a page */ static xen_pfn_t frame_list[PAGE_SIZE / sizeof(unsigned long)]; -static DEFINE_PER_CPU(struct page *, balloon_scratch_page); +DEFINE_PER_CPU(struct page *, balloon_scratch_page); /* List of ballooned pages, threaded through the mem_map array. */ -- 1.7.10.4
On Thu, 2013-08-22 at 11:10 +0100, Wei Liu wrote:> In commit cd9151e2: xen/balloon: set a mapping for ballooned out pages > we have the ballooned out page''s mapping set to a scratch page. When the > page is ballooned in again its P2M entry can be the MFN of the scratch > page, hitting the BUG_ONs in __set_phys_to_machine. > > Signed-off-by: Wei Liu <wei.liu2@citrix.com> > Cc: Stefano Stabellini <stefano.stabellini@eu.citrix.com> > --- > arch/x86/xen/p2m.c | 14 ++++++++++++-- > drivers/xen/balloon.c | 2 +- > 2 files changed, 13 insertions(+), 3 deletions(-) > > diff --git a/arch/x86/xen/p2m.c b/arch/x86/xen/p2m.c > index 74672ee..4c11f6a 100644 > --- a/arch/x86/xen/p2m.c > +++ b/arch/x86/xen/p2m.c > @@ -793,17 +793,27 @@ unsigned long __init set_phys_range_identity(unsigned long pfn_s, > return pfn - pfn_s; > } > > +DECLARE_PER_CPU(struct page *, balloon_scratch_page);Should b in a header I think?> /* Try to install p2m mapping; fail if intermediate bits missing */ > bool __set_phys_to_machine(unsigned long pfn, unsigned long mfn) > { > unsigned topidx, mididx, idx; > + unsigned long balloon_scratch_pfn; > + unsigned long balloon_scratch_mfn; > + > + balloon_scratch_pfn = page_to_pfn(__get_cpu_var(balloon_scratch_page)); > + balloon_scratch_mfn = pfn_to_mfn(balloon_scratch_pfn); > > if (unlikely(xen_feature(XENFEAT_auto_translated_physmap))) { > - BUG_ON(pfn != mfn && mfn != INVALID_P2M_ENTRY); > + BUG_ON(pfn != mfn && > + pfn != balloon_scratch_mfn && > + mfn != INVALID_P2M_ENTRY && > + mfn != balloon_scratch_mfn);Is there any guarantee that __set_phys_to_machine will always operate on a page which was ballooned out on this particular CPU?> return true; > } > if (unlikely(pfn >= MAX_P2M_PFN)) { > - BUG_ON(mfn != INVALID_P2M_ENTRY); > + BUG_ON(mfn != INVALID_P2M_ENTRY && > + mfn != balloon_scratch_mfn); > return true; > } > > diff --git a/drivers/xen/balloon.c b/drivers/xen/balloon.c > index a3dc75d..89e2f3b 100644 > --- a/drivers/xen/balloon.c > +++ b/drivers/xen/balloon.c > @@ -92,7 +92,7 @@ EXPORT_SYMBOL_GPL(balloon_stats); > > /* We increase/decrease in batches which fit in a page */ > static xen_pfn_t frame_list[PAGE_SIZE / sizeof(unsigned long)]; > -static DEFINE_PER_CPU(struct page *, balloon_scratch_page); > +DEFINE_PER_CPU(struct page *, balloon_scratch_page); > > > /* List of ballooned pages, threaded through the mem_map array. */
On 22/08/13 11:10, Wei Liu wrote:> In commit cd9151e2: xen/balloon: set a mapping for ballooned out pages > we have the ballooned out page''s mapping set to a scratch page. When the > page is ballooned in again its P2M entry can be the MFN of the scratch > page, hitting the BUG_ONs in __set_phys_to_machine.Looking at the commit that introduced this bug I wonder if the the correct fix is to restore the original call of __set_phys_to_machine(pfn, INVALID_P2M_ENTRY) in decrease_reservation(). We only need a valid kernel mapping for the ballooned out page, the p2m should still be invalid for the ballooned out page, right?> --- a/arch/x86/xen/p2m.c > +++ b/arch/x86/xen/p2m.c > @@ -793,17 +793,27 @@ unsigned long __init set_phys_range_identity(unsigned long pfn_s, > return pfn - pfn_s; > } > > +DECLARE_PER_CPU(struct page *, balloon_scratch_page); > /* Try to install p2m mapping; fail if intermediate bits missing */ > bool __set_phys_to_machine(unsigned long pfn, unsigned long mfn) > { > unsigned topidx, mididx, idx; > + unsigned long balloon_scratch_pfn; > + unsigned long balloon_scratch_mfn; > + > + balloon_scratch_pfn = page_to_pfn(__get_cpu_var(balloon_scratch_page)); > + balloon_scratch_mfn = pfn_to_mfn(balloon_scratch_pfn); > > if (unlikely(xen_feature(XENFEAT_auto_translated_physmap))) { > - BUG_ON(pfn != mfn && mfn != INVALID_P2M_ENTRY); > + BUG_ON(pfn != mfn && > + pfn != balloon_scratch_mfn && > + mfn != INVALID_P2M_ENTRY && > + mfn != balloon_scratch_mfn); > return true; > } > if (unlikely(pfn >= MAX_P2M_PFN)) { > - BUG_ON(mfn != INVALID_P2M_ENTRY); > + BUG_ON(mfn != INVALID_P2M_ENTRY && > + mfn != balloon_scratch_mfn);This bit looks wrong/unnecessary. David
On Thu, Aug 22, 2013 at 01:08:29PM +0100, David Vrabel wrote:> On 22/08/13 11:10, Wei Liu wrote: > > In commit cd9151e2: xen/balloon: set a mapping for ballooned out pages > > we have the ballooned out page''s mapping set to a scratch page. When the > > page is ballooned in again its P2M entry can be the MFN of the scratch > > page, hitting the BUG_ONs in __set_phys_to_machine. > > Looking at the commit that introduced this bug I wonder if the the > correct fix is to restore the original call of > __set_phys_to_machine(pfn, INVALID_P2M_ENTRY) in decrease_reservation(). > > We only need a valid kernel mapping for the ballooned out page, the p2m > should still be invalid for the ballooned out page, right? >Indeed, this is a simple and straightforward fix. Patch on the way. Wei.> > --- a/arch/x86/xen/p2m.c > > +++ b/arch/x86/xen/p2m.c > > @@ -793,17 +793,27 @@ unsigned long __init set_phys_range_identity(unsigned long pfn_s, > > return pfn - pfn_s; > > } > > > > +DECLARE_PER_CPU(struct page *, balloon_scratch_page); > > /* Try to install p2m mapping; fail if intermediate bits missing */ > > bool __set_phys_to_machine(unsigned long pfn, unsigned long mfn) > > { > > unsigned topidx, mididx, idx; > > + unsigned long balloon_scratch_pfn; > > + unsigned long balloon_scratch_mfn; > > + > > + balloon_scratch_pfn = page_to_pfn(__get_cpu_var(balloon_scratch_page)); > > + balloon_scratch_mfn = pfn_to_mfn(balloon_scratch_pfn); > > > > if (unlikely(xen_feature(XENFEAT_auto_translated_physmap))) { > > - BUG_ON(pfn != mfn && mfn != INVALID_P2M_ENTRY); > > + BUG_ON(pfn != mfn && > > + pfn != balloon_scratch_mfn && > > + mfn != INVALID_P2M_ENTRY && > > + mfn != balloon_scratch_mfn); > > return true; > > } > > if (unlikely(pfn >= MAX_P2M_PFN)) { > > - BUG_ON(mfn != INVALID_P2M_ENTRY); > > + BUG_ON(mfn != INVALID_P2M_ENTRY && > > + mfn != balloon_scratch_mfn); > > This bit looks wrong/unnecessary. > > David