Jan Beulich
2007-Feb-16 15:48 UTC
[Xen-devel] [PATCH, correction] linux/x86: Adjust page table handling
Ensure that all and only those page table entries that have their present bit set undergo p2m/m2p translation in all relevant places. This should fix migration issues with _PAGE_PROTNONE pages which previously could retain MFNs in PTEs while having the present bit clear (and thus were not getting (un)canonicalized during save/ restore). In the original version, the pte_pfn() changes were inappropriately killing support for mapped foreign pages, and pte_mfn() wasn''t being adjusted. In the course of fixing this I found that it might be necessary to adjust/split the xxx_ma functions/macros so that xxx_ma() really means machine address format as output, whereas to-be-created ones would mean the raw value (e.g. the already existing __pte_val() macro on x86-64). Nevertheless, due to their present usage this doesn''t seem to be an immediate problem that needs to be addressed with this already largish patch. Many thanks to Keir Fraser for his analysis, suggestions, and corrections. Signed-off-by: Jan Beulich <jbeulich@novell.com> Index: sle10sp1-2007-02-16/include/asm-i386/mach-xen/asm/maddr.h ==================================================================--- sle10sp1-2007-02-16.orig/include/asm-i386/mach-xen/asm/maddr.h 2007-02-16 14:59:59.000000000 +0100 +++ sle10sp1-2007-02-16/include/asm-i386/mach-xen/asm/maddr.h 2007-02-13 13:41:37.000000000 +0100 @@ -21,6 +21,7 @@ typedef unsigned long maddr_t; #ifdef CONFIG_XEN extern unsigned long *phys_to_machine_mapping; +extern unsigned long max_mapnr; #undef machine_to_phys_mapping extern unsigned long *machine_to_phys_mapping; @@ -30,20 +31,20 @@ static inline unsigned long pfn_to_mfn(u { if (xen_feature(XENFEAT_auto_translated_physmap)) return pfn; - return phys_to_machine_mapping[(unsigned int)(pfn)] & - ~FOREIGN_FRAME_BIT; + BUG_ON(max_mapnr && pfn >= max_mapnr); + return phys_to_machine_mapping[pfn] & ~FOREIGN_FRAME_BIT; } static inline int phys_to_machine_mapping_valid(unsigned long pfn) { if (xen_feature(XENFEAT_auto_translated_physmap)) return 1; + BUG_ON(max_mapnr && pfn >= max_mapnr); return (phys_to_machine_mapping[pfn] != INVALID_P2M_ENTRY); } static inline unsigned long mfn_to_pfn(unsigned long mfn) { - extern unsigned long max_mapnr; unsigned long pfn; if (xen_feature(XENFEAT_auto_translated_physmap)) @@ -92,7 +93,6 @@ static inline unsigned long mfn_to_pfn(u */ static inline unsigned long mfn_to_local_pfn(unsigned long mfn) { - extern unsigned long max_mapnr; unsigned long pfn = mfn_to_pfn(mfn); if ((pfn < max_mapnr) && !xen_feature(XENFEAT_auto_translated_physmap) @@ -103,6 +103,7 @@ static inline unsigned long mfn_to_local static inline void set_phys_to_machine(unsigned long pfn, unsigned long mfn) { + BUG_ON(pfn >= max_mapnr); if (xen_feature(XENFEAT_auto_translated_physmap)) { BUG_ON(pfn != mfn && mfn != INVALID_P2M_ENTRY); return; @@ -124,6 +125,20 @@ static inline paddr_t machine_to_phys(ma return phys; } +#ifdef CONFIG_X86_PAE +static inline paddr_t pte_phys_to_machine(paddr_t phys) +{ + /* + * In PAE mode, the NX bit needs to be dealt with in the value + * passed to pfn_to_mfn(). On x86_64, we need to mask it off, + * but for i386 the conversion to ulong for the argument will + * clip it off. + */ + maddr_t machine = pfn_to_mfn(phys >> PAGE_SHIFT); + machine = (machine << PAGE_SHIFT) | (phys & ~PHYSICAL_PAGE_MASK); + return machine; +} + static inline paddr_t pte_machine_to_phys(maddr_t machine) { /* @@ -136,6 +151,7 @@ static inline paddr_t pte_machine_to_phy phys = (phys << PAGE_SHIFT) | (machine & ~PHYSICAL_PAGE_MASK); return phys; } +#endif #else /* !CONFIG_XEN */ @@ -146,7 +162,6 @@ static inline paddr_t pte_machine_to_phy #define phys_to_machine_mapping_valid(pfn) (1) #define phys_to_machine(phys) ((maddr_t)(phys)) #define machine_to_phys(mach) ((paddr_t)(mach)) -#define pte_machine_to_phys(mach) ((paddr_t)(mach)) #endif /* !CONFIG_XEN */ Index: sle10sp1-2007-02-16/include/asm-i386/mach-xen/asm/page.h ==================================================================--- sle10sp1-2007-02-16.orig/include/asm-i386/mach-xen/asm/page.h 2007-02-16 14:59:59.000000000 +0100 +++ sle10sp1-2007-02-16/include/asm-i386/mach-xen/asm/page.h 2007-02-13 12:27:08.000000000 +0100 @@ -29,6 +29,13 @@ #include <xen/interface/xen.h> #include <xen/features.h> +/* + * Need to repeat this here in order to not include pgtable.h (which in turn + * depends on definitions made here), but to be able to use the symbolic + * below. The preprocessor will warn if the two definitions aren''t identical. + */ +#define _PAGE_PRESENT 0x001 + #define arch_free_page(_page,_order) \ ({ int foreign = PageForeign(_page); \ if (foreign) \ @@ -81,40 +88,38 @@ typedef struct { unsigned long long pgpr #define pgprot_val(x) ((x).pgprot) #include <asm/maddr.h> #define __pte(x) ({ unsigned long long _x = (x); \ - if (_x & 1) _x = phys_to_machine(_x); \ + if (_x & _PAGE_PRESENT) _x = pte_phys_to_machine(_x); \ ((pte_t) {(unsigned long)(_x), (unsigned long)(_x>>32)}); }) #define __pgd(x) ({ unsigned long long _x = (x); \ - (((_x)&1) ? ((pgd_t) {phys_to_machine(_x)}) : ((pgd_t) {(_x)})); }) + (pgd_t) {((_x) & _PAGE_PRESENT) ? pte_phys_to_machine(_x) : (_x)}; }) #define __pmd(x) ({ unsigned long long _x = (x); \ - (((_x)&1) ? ((pmd_t) {phys_to_machine(_x)}) : ((pmd_t) {(_x)})); }) + (pmd_t) {((_x) & _PAGE_PRESENT) ? pte_phys_to_machine(_x) : (_x)}; }) +static inline unsigned long long pte_val_ma(pte_t x) +{ + return ((unsigned long long)x.pte_high << 32) | x.pte_low; +} static inline unsigned long long pte_val(pte_t x) { - unsigned long long ret; - - if (x.pte_low) { - ret = x.pte_low | (unsigned long long)x.pte_high << 32; - ret = pte_machine_to_phys(ret) | 1; - } else { - ret = 0; - } + unsigned long long ret = pte_val_ma(x); + if (x.pte_low & _PAGE_PRESENT) ret = pte_machine_to_phys(ret); return ret; } static inline unsigned long long pmd_val(pmd_t x) { unsigned long long ret = x.pmd; - if (ret) ret = pte_machine_to_phys(ret) | 1; +#ifdef CONFIG_XEN_COMPAT_030002 + if (ret) ret = pte_machine_to_phys(ret) | _PAGE_PRESENT; +#else + if (ret & _PAGE_PRESENT) ret = pte_machine_to_phys(ret); +#endif return ret; } static inline unsigned long long pgd_val(pgd_t x) { unsigned long long ret = x.pgd; - if (ret) ret = pte_machine_to_phys(ret) | 1; + if (ret & _PAGE_PRESENT) ret = pte_machine_to_phys(ret); return ret; } -static inline unsigned long long pte_val_ma(pte_t x) -{ - return (unsigned long long)x.pte_high << 32 | x.pte_low; -} #define HPAGE_SHIFT 21 #else typedef struct { unsigned long pte_low; } pte_t; @@ -123,23 +128,23 @@ typedef struct { unsigned long pgprot; } #define pgprot_val(x) ((x).pgprot) #include <asm/maddr.h> #define boot_pte_t pte_t /* or would you rather have a typedef */ -#define pte_val(x) (((x).pte_low & 1) ? \ - pte_machine_to_phys((x).pte_low) : \ +#define pte_val(x) (((x).pte_low & _PAGE_PRESENT) ? \ + machine_to_phys((x).pte_low) : \ (x).pte_low) #define pte_val_ma(x) ((x).pte_low) #define __pte(x) ({ unsigned long _x = (x); \ - (((_x)&1) ? ((pte_t) {phys_to_machine(_x)}) : ((pte_t) {(_x)})); }) + (pte_t) {((_x) & _PAGE_PRESENT) ? phys_to_machine(_x) : (_x)}; }) #define __pgd(x) ({ unsigned long _x = (x); \ - (((_x)&1) ? ((pgd_t) {phys_to_machine(_x)}) : ((pgd_t) {(_x)})); }) + (pgd_t) {((_x) & _PAGE_PRESENT) ? phys_to_machine(_x) : (_x)}; }) static inline unsigned long pgd_val(pgd_t x) { unsigned long ret = x.pgd; - if (ret) ret = pte_machine_to_phys(ret) | 1; + if (ret & _PAGE_PRESENT) ret = machine_to_phys(ret); return ret; } #define HPAGE_SHIFT 22 #endif -#define PTE_MASK PAGE_MASK +#define PTE_MASK PHYSICAL_PAGE_MASK #ifdef CONFIG_HUGETLB_PAGE #define HPAGE_SIZE ((1UL) << HPAGE_SHIFT) Index: sle10sp1-2007-02-16/include/asm-i386/mach-xen/asm/pgtable-2level.h ==================================================================--- sle10sp1-2007-02-16.orig/include/asm-i386/mach-xen/asm/pgtable-2level.h 2007-02-16 14:59:59.000000000 +0100 +++ sle10sp1-2007-02-16/include/asm-i386/mach-xen/asm/pgtable-2level.h 2007-02-16 14:54:06.000000000 +0100 @@ -38,8 +38,11 @@ #define ptep_get_and_clear(mm,addr,xp) __pte_ma(xchg(&(xp)->pte_low, 0)) #define pte_same(a, b) ((a).pte_low == (b).pte_low) -#define pte_mfn(_pte) ((_pte).pte_low >> PAGE_SHIFT) -#define pte_pfn(_pte) mfn_to_local_pfn(pte_mfn(_pte)) +#define __pte_mfn(_pte) ((_pte).pte_low >> PAGE_SHIFT) +#define pte_mfn(_pte) ((_pte).pte_low & _PAGE_PRESENT ? \ + __pte_mfn(_pte) : pfn_to_mfn(__pte_mfn(_pte))) +#define pte_pfn(_pte) ((_pte).pte_low & _PAGE_PRESENT ? \ + mfn_to_local_pfn(__pte_mfn(_pte)) : __pte_mfn(_pte)) #define pte_page(_pte) pfn_to_page(pte_pfn(_pte)) Index: sle10sp1-2007-02-16/include/asm-i386/mach-xen/asm/pgtable-3level.h ==================================================================--- sle10sp1-2007-02-16.orig/include/asm-i386/mach-xen/asm/pgtable-3level.h 2007-02-16 14:59:59.000000000 +0100 +++ sle10sp1-2007-02-16/include/asm-i386/mach-xen/asm/pgtable-3level.h 2007-02-16 14:54:20.000000000 +0100 @@ -145,21 +145,24 @@ static inline int pte_none(pte_t pte) return !pte.pte_low && !pte.pte_high; } -#define pte_mfn(_pte) (((_pte).pte_low >> PAGE_SHIFT) |\ - (((_pte).pte_high & 0xfff) << (32-PAGE_SHIFT))) -#define pte_pfn(_pte) mfn_to_local_pfn(pte_mfn(_pte)) +#define __pte_mfn(_pte) (((_pte).pte_low >> PAGE_SHIFT) | \ + ((_pte).pte_high << (32-PAGE_SHIFT))) +#define pte_mfn(_pte) ((_pte).pte_low & _PAGE_PRESENT ? \ + __pte_mfn(_pte) : pfn_to_mfn(__pte_mfn(_pte))) +#define pte_pfn(_pte) ((_pte).pte_low & _PAGE_PRESENT ? \ + mfn_to_local_pfn(__pte_mfn(_pte)) : __pte_mfn(_pte)) extern unsigned long long __supported_pte_mask; static inline pte_t pfn_pte(unsigned long page_nr, pgprot_t pgprot) { - return pfn_pte_ma(pfn_to_mfn(page_nr), pgprot); + return __pte((((unsigned long long)page_nr << PAGE_SHIFT) | + pgprot_val(pgprot)) & __supported_pte_mask); } static inline pmd_t pfn_pmd(unsigned long page_nr, pgprot_t pgprot) { - BUG(); panic("needs review"); - return __pmd((((unsigned long long)page_nr << PAGE_SHIFT) | \ + return __pmd((((unsigned long long)page_nr << PAGE_SHIFT) | pgprot_val(pgprot)) & __supported_pte_mask); } Index: sle10sp1-2007-02-16/include/asm-i386/mach-xen/asm/pgtable.h ==================================================================--- sle10sp1-2007-02-16.orig/include/asm-i386/mach-xen/asm/pgtable.h 2007-02-16 14:59:59.000000000 +0100 +++ sle10sp1-2007-02-16/include/asm-i386/mach-xen/asm/pgtable.h 2007-02-13 14:14:47.000000000 +0100 @@ -315,18 +315,19 @@ static inline void clone_pgd_range(pgd_t static inline pte_t pte_modify(pte_t pte, pgprot_t newprot) { - pte.pte_low &= _PAGE_CHG_MASK; - pte.pte_low |= pgprot_val(newprot); -#ifdef CONFIG_X86_PAE /* - * Chop off the NX bit (if present), and add the NX portion of - * the newprot (if present): + * Since this might change the present bit (which controls whether + * a pte_t object has undergone p2m translation), we must use + * pte_val() on the input pte and __pte() for the return value. */ - pte.pte_high &= ~(1 << (_PAGE_BIT_NX - 32)); - pte.pte_high |= (pgprot_val(newprot) >> 32) & \ - (__supported_pte_mask >> 32); + paddr_t pteval = pte_val(pte); + + pteval &= _PAGE_CHG_MASK; + pteval |= pgprot_val(newprot); +#ifdef CONFIG_X86_PAE + pteval &= __supported_pte_mask; #endif - return pte; + return __pte(pteval); } #define pmd_large(pmd) \ Index: sle10sp1-2007-02-16/include/asm-x86_64/mach-xen/asm/maddr.h ==================================================================--- sle10sp1-2007-02-16.orig/include/asm-x86_64/mach-xen/asm/maddr.h 2007-02-16 14:59:59.000000000 +0100 +++ sle10sp1-2007-02-16/include/asm-x86_64/mach-xen/asm/maddr.h 2007-02-13 13:35:31.000000000 +0100 @@ -25,14 +25,15 @@ static inline unsigned long pfn_to_mfn(u { if (xen_feature(XENFEAT_auto_translated_physmap)) return pfn; - return phys_to_machine_mapping[(unsigned int)(pfn)] & - ~FOREIGN_FRAME_BIT; + BUG_ON(end_pfn && pfn >= end_pfn); + return phys_to_machine_mapping[pfn] & ~FOREIGN_FRAME_BIT; } static inline int phys_to_machine_mapping_valid(unsigned long pfn) { if (xen_feature(XENFEAT_auto_translated_physmap)) return 1; + BUG_ON(end_pfn && pfn >= end_pfn); return (phys_to_machine_mapping[pfn] != INVALID_P2M_ENTRY); } @@ -96,6 +97,7 @@ static inline unsigned long mfn_to_local static inline void set_phys_to_machine(unsigned long pfn, unsigned long mfn) { + BUG_ON(pfn >= end_pfn); if (xen_feature(XENFEAT_auto_translated_physmap)) { BUG_ON(pfn != mfn && mfn != INVALID_P2M_ENTRY); return; @@ -117,6 +119,14 @@ static inline paddr_t machine_to_phys(ma return phys; } +static inline paddr_t pte_phys_to_machine(paddr_t phys) +{ + maddr_t machine; + machine = pfn_to_mfn((phys & PHYSICAL_PAGE_MASK) >> PAGE_SHIFT); + machine = (machine << PAGE_SHIFT) | (phys & ~PHYSICAL_PAGE_MASK); + return machine; +} + static inline paddr_t pte_machine_to_phys(maddr_t machine) { paddr_t phys; @@ -134,7 +144,6 @@ static inline paddr_t pte_machine_to_phy #define phys_to_machine_mapping_valid(pfn) (1) #define phys_to_machine(phys) ((maddr_t)(phys)) #define machine_to_phys(mach) ((paddr_t)(mach)) -#define pte_machine_to_phys(mach) ((paddr_t)(mach)) #endif /* !CONFIG_XEN */ Index: sle10sp1-2007-02-16/include/asm-x86_64/mach-xen/asm/page.h ==================================================================--- sle10sp1-2007-02-16.orig/include/asm-x86_64/mach-xen/asm/page.h 2007-02-16 14:59:59.000000000 +0100 +++ sle10sp1-2007-02-16/include/asm-x86_64/mach-xen/asm/page.h 2007-02-13 12:27:08.000000000 +0100 @@ -9,6 +9,13 @@ #endif #include <xen/interface/xen.h> +/* + * Need to repeat this here in order to not include pgtable.h (which in turn + * depends on definitions made here), but to be able to use the symbolic + * below. The preprocessor will warn if the two definitions aren''t identical. + */ +#define _PAGE_PRESENT 0x001 + #define arch_free_page(_page,_order) \ ({ int foreign = PageForeign(_page); \ if (foreign) \ @@ -95,28 +102,33 @@ typedef struct { unsigned long pgd; } pg typedef struct { unsigned long pgprot; } pgprot_t; -#define pte_val(x) (((x).pte & 1) ? pte_machine_to_phys((x).pte) : \ +#define pte_val(x) (((x).pte & _PAGE_PRESENT) ? \ + pte_machine_to_phys((x).pte) : \ (x).pte) #define pte_val_ma(x) ((x).pte) static inline unsigned long pmd_val(pmd_t x) { unsigned long ret = x.pmd; - if (ret) ret = pte_machine_to_phys(ret); +#ifdef CONFIG_XEN_COMPAT_030002 + if (ret) ret = pte_machine_to_phys(ret) | _PAGE_PRESENT; +#else + if (ret & _PAGE_PRESENT) ret = pte_machine_to_phys(ret); +#endif return ret; } static inline unsigned long pud_val(pud_t x) { unsigned long ret = x.pud; - if (ret) ret = pte_machine_to_phys(ret); + if (ret & _PAGE_PRESENT) ret = pte_machine_to_phys(ret); return ret; } static inline unsigned long pgd_val(pgd_t x) { unsigned long ret = x.pgd; - if (ret) ret = pte_machine_to_phys(ret); + if (ret & _PAGE_PRESENT) ret = pte_machine_to_phys(ret); return ret; } @@ -124,25 +136,25 @@ static inline unsigned long pgd_val(pgd_ static inline pte_t __pte(unsigned long x) { - if (x & 1) x = phys_to_machine(x); + if (x & _PAGE_PRESENT) x = pte_phys_to_machine(x); return ((pte_t) { (x) }); } static inline pmd_t __pmd(unsigned long x) { - if ((x & 1)) x = phys_to_machine(x); + if (x & _PAGE_PRESENT) x = pte_phys_to_machine(x); return ((pmd_t) { (x) }); } static inline pud_t __pud(unsigned long x) { - if ((x & 1)) x = phys_to_machine(x); + if (x & _PAGE_PRESENT) x = pte_phys_to_machine(x); return ((pud_t) { (x) }); } static inline pgd_t __pgd(unsigned long x) { - if ((x & 1)) x = phys_to_machine(x); + if (x & _PAGE_PRESENT) x = pte_phys_to_machine(x); return ((pgd_t) { (x) }); } Index: sle10sp1-2007-02-16/include/asm-x86_64/mach-xen/asm/pgtable.h ==================================================================--- sle10sp1-2007-02-16.orig/include/asm-x86_64/mach-xen/asm/pgtable.h 2007-02-16 14:59:59.000000000 +0100 +++ sle10sp1-2007-02-16/include/asm-x86_64/mach-xen/asm/pgtable.h 2007-02-16 15:00:20.000000000 +0100 @@ -302,19 +302,20 @@ static inline unsigned long pud_bad(pud_ #define pages_to_mb(x) ((x) >> (20-PAGE_SHIFT)) -#define pte_mfn(_pte) (((_pte).pte & PTE_MASK) >> PAGE_SHIFT) -#define pte_pfn(_pte) mfn_to_local_pfn(pte_mfn(_pte)) +#define __pte_mfn(_pte) (((_pte).pte & PTE_MASK) >> PAGE_SHIFT) +#define pte_mfn(_pte) ((_pte).pte & _PAGE_PRESENT ? \ + __pte_mfn(_pte) : pfn_to_mfn(__pte_mfn(_pte))) +#define pte_pfn(_pte) ((_pte).pte & _PAGE_PRESENT ? \ + mfn_to_local_pfn(__pte_mfn(_pte)) : __pte_mfn(_pte)) #define pte_page(x) pfn_to_page(pte_pfn(x)) static inline pte_t pfn_pte(unsigned long page_nr, pgprot_t pgprot) { - pte_t pte; - - (pte).pte = (pfn_to_mfn(page_nr) << PAGE_SHIFT); - (pte).pte |= pgprot_val(pgprot); - (pte).pte &= __supported_pte_mask; - return pte; + unsigned long pte = page_nr << PAGE_SHIFT; + pte |= pgprot_val(pgprot); + pte &= __supported_pte_mask; + return __pte(pte); } /* @@ -446,18 +447,25 @@ static inline pud_t *pud_offset_k(pgd_t /* physical address -> PTE */ static inline pte_t mk_pte_phys(unsigned long physpage, pgprot_t pgprot) { - pte_t pte; - (pte).pte = physpage | pgprot_val(pgprot); - return pte; + unsigned long pteval; + pteval = physpage | pgprot_val(pgprot); + return __pte(pteval); } /* Change flags of a PTE */ static inline pte_t pte_modify(pte_t pte, pgprot_t newprot) { - (pte).pte &= _PAGE_CHG_MASK; - (pte).pte |= pgprot_val(newprot); - (pte).pte &= __supported_pte_mask; - return pte; + /* + * Since this might change the present bit (which controls whether + * a pte_t object has undergone p2m translation), we must use + * pte_val() on the input pte and __pte() for the return value. + */ + unsigned long pteval = pte_val(pte); + + pteval &= _PAGE_CHG_MASK; + pteval |= pgprot_val(newprot); + pteval &= __supported_pte_mask; + return __pte(pteval); } #define pte_index(address) \ _______________________________________________ Xen-devel mailing list Xen-devel@lists.xensource.com http://lists.xensource.com/xen-devel
Keir Fraser
2007-Feb-16 16:04 UTC
Re: [Xen-devel] [PATCH, correction] linux/x86: Adjust page table handling
On 16/2/07 15:48, "Jan Beulich" <jbeulich@novell.com> wrote:> In the original version, the pte_pfn() changes were inappropriately > killing support for mapped foreign pages, and pte_mfn() wasn''t being > adjusted.It''s kind of tempting to get rid of pte_mfn() isn''t it. :-) -- Keir _______________________________________________ Xen-devel mailing list Xen-devel@lists.xensource.com http://lists.xensource.com/xen-devel
Jan Beulich
2007-Feb-16 16:14 UTC
Re: [Xen-devel] [PATCH, correction] linux/x86: Adjust page table handling
>>> Keir Fraser <keir@xensource.com> 16.02.07 17:04 >>> >On 16/2/07 15:48, "Jan Beulich" <jbeulich@novell.com> wrote: > >> In the original version, the pte_pfn() changes were inappropriately >> killing support for mapped foreign pages, and pte_mfn() wasn''t being >> adjusted. > >It''s kind of tempting to get rid of pte_mfn() isn''t it. :-)Yeah, I was considering this, but then didn''t (yet). Jan _______________________________________________ Xen-devel mailing list Xen-devel@lists.xensource.com http://lists.xensource.com/xen-devel