Jan Beulich
2007-Feb-13 15:05 UTC
[Xen-devel] [PATCH] linux/x86: Adjust page table handling
Ensure that all and only those page table entries that have their present bit set undergo p2m/m2p translation in all relevant places. This should fix migration issues with _PAGE_PROTNONE pages which previously could retain MFNs in PTEs while having the present bit clear (and thus were not getting (un)canonicalized during save/restore). Many thanks to Keir Fraser, who set me strait on a number of aspects in this area while working out the way things should work. Signed-off-by: Jan Beulich <jbeulich@novell.com> Index: sle10sp1-2007-01-31/include/asm-i386/mach-xen/asm/maddr.h ==================================================================--- sle10sp1-2007-01-31.orig/include/asm-i386/mach-xen/asm/maddr.h 2007-02-13 12:27:05.000000000 +0100 +++ sle10sp1-2007-01-31/include/asm-i386/mach-xen/asm/maddr.h 2007-02-13 13:41:37.000000000 +0100 @@ -21,6 +21,7 @@ typedef unsigned long maddr_t; #ifdef CONFIG_XEN extern unsigned long *phys_to_machine_mapping; +extern unsigned long max_mapnr; #undef machine_to_phys_mapping extern unsigned long *machine_to_phys_mapping; @@ -30,20 +31,20 @@ static inline unsigned long pfn_to_mfn(u { if (xen_feature(XENFEAT_auto_translated_physmap)) return pfn; - return phys_to_machine_mapping[(unsigned int)(pfn)] & - ~FOREIGN_FRAME_BIT; + BUG_ON(max_mapnr && pfn >= max_mapnr); + return phys_to_machine_mapping[pfn] & ~FOREIGN_FRAME_BIT; } static inline int phys_to_machine_mapping_valid(unsigned long pfn) { if (xen_feature(XENFEAT_auto_translated_physmap)) return 1; + BUG_ON(max_mapnr && pfn >= max_mapnr); return (phys_to_machine_mapping[pfn] != INVALID_P2M_ENTRY); } static inline unsigned long mfn_to_pfn(unsigned long mfn) { - extern unsigned long max_mapnr; unsigned long pfn; if (xen_feature(XENFEAT_auto_translated_physmap)) @@ -92,7 +93,6 @@ static inline unsigned long mfn_to_pfn(u */ static inline unsigned long mfn_to_local_pfn(unsigned long mfn) { - extern unsigned long max_mapnr; unsigned long pfn = mfn_to_pfn(mfn); if ((pfn < max_mapnr) && !xen_feature(XENFEAT_auto_translated_physmap) @@ -103,6 +103,7 @@ static inline unsigned long mfn_to_local static inline void set_phys_to_machine(unsigned long pfn, unsigned long mfn) { + BUG_ON(pfn >= max_mapnr); if (xen_feature(XENFEAT_auto_translated_physmap)) { BUG_ON(pfn != mfn && mfn != INVALID_P2M_ENTRY); return; @@ -124,6 +125,20 @@ static inline paddr_t machine_to_phys(ma return phys; } +#ifdef CONFIG_X86_PAE +static inline paddr_t pte_phys_to_machine(paddr_t phys) +{ + /* + * In PAE mode, the NX bit needs to be dealt with in the value + * passed to pfn_to_mfn(). On x86_64, we need to mask it off, + * but for i386 the conversion to ulong for the argument will + * clip it off. + */ + maddr_t machine = pfn_to_mfn(phys >> PAGE_SHIFT); + machine = (machine << PAGE_SHIFT) | (phys & ~PHYSICAL_PAGE_MASK); + return machine; +} + static inline paddr_t pte_machine_to_phys(maddr_t machine) { /* @@ -136,6 +151,7 @@ static inline paddr_t pte_machine_to_phy phys = (phys << PAGE_SHIFT) | (machine & ~PHYSICAL_PAGE_MASK); return phys; } +#endif #else /* !CONFIG_XEN */ @@ -146,7 +162,6 @@ static inline paddr_t pte_machine_to_phy #define phys_to_machine_mapping_valid(pfn) (1) #define phys_to_machine(phys) ((maddr_t)(phys)) #define machine_to_phys(mach) ((paddr_t)(mach)) -#define pte_machine_to_phys(mach) ((paddr_t)(mach)) #endif /* !CONFIG_XEN */ Index: sle10sp1-2007-01-31/include/asm-i386/mach-xen/asm/page.h ==================================================================--- sle10sp1-2007-01-31.orig/include/asm-i386/mach-xen/asm/page.h 2007-02-13 12:27:05.000000000 +0100 +++ sle10sp1-2007-01-31/include/asm-i386/mach-xen/asm/page.h 2007-02-13 12:27:08.000000000 +0100 @@ -29,6 +29,13 @@ #include <xen/interface/xen.h> #include <xen/features.h> +/* + * Need to repeat this here in order to not include pgtable.h (which in turn + * depends on definitions made here), but to be able to use the symbolic + * below. The preprocessor will warn if the two definitions aren''t identical. + */ +#define _PAGE_PRESENT 0x001 + #define arch_free_page(_page,_order) \ ({ int foreign = PageForeign(_page); \ if (foreign) \ @@ -81,40 +88,38 @@ typedef struct { unsigned long long pgpr #define pgprot_val(x) ((x).pgprot) #include <asm/maddr.h> #define __pte(x) ({ unsigned long long _x = (x); \ - if (_x & 1) _x = phys_to_machine(_x); \ + if (_x & _PAGE_PRESENT) _x = pte_phys_to_machine(_x); \ ((pte_t) {(unsigned long)(_x), (unsigned long)(_x>>32)}); }) #define __pgd(x) ({ unsigned long long _x = (x); \ - (((_x)&1) ? ((pgd_t) {phys_to_machine(_x)}) : ((pgd_t) {(_x)})); }) + (pgd_t) {((_x) & _PAGE_PRESENT) ? pte_phys_to_machine(_x) : (_x)}; }) #define __pmd(x) ({ unsigned long long _x = (x); \ - (((_x)&1) ? ((pmd_t) {phys_to_machine(_x)}) : ((pmd_t) {(_x)})); }) + (pmd_t) {((_x) & _PAGE_PRESENT) ? pte_phys_to_machine(_x) : (_x)}; }) +static inline unsigned long long pte_val_ma(pte_t x) +{ + return ((unsigned long long)x.pte_high << 32) | x.pte_low; +} static inline unsigned long long pte_val(pte_t x) { - unsigned long long ret; - - if (x.pte_low) { - ret = x.pte_low | (unsigned long long)x.pte_high << 32; - ret = pte_machine_to_phys(ret) | 1; - } else { - ret = 0; - } + unsigned long long ret = pte_val_ma(x); + if (x.pte_low & _PAGE_PRESENT) ret = pte_machine_to_phys(ret); return ret; } static inline unsigned long long pmd_val(pmd_t x) { unsigned long long ret = x.pmd; - if (ret) ret = pte_machine_to_phys(ret) | 1; +#ifdef CONFIG_XEN_COMPAT_030002 + if (ret) ret = pte_machine_to_phys(ret) | _PAGE_PRESENT; +#else + if (ret & _PAGE_PRESENT) ret = pte_machine_to_phys(ret); +#endif return ret; } static inline unsigned long long pgd_val(pgd_t x) { unsigned long long ret = x.pgd; - if (ret) ret = pte_machine_to_phys(ret) | 1; + if (ret & _PAGE_PRESENT) ret = pte_machine_to_phys(ret); return ret; } -static inline unsigned long long pte_val_ma(pte_t x) -{ - return (unsigned long long)x.pte_high << 32 | x.pte_low; -} #define HPAGE_SHIFT 21 #else typedef struct { unsigned long pte_low; } pte_t; @@ -123,23 +128,23 @@ typedef struct { unsigned long pgprot; } #define pgprot_val(x) ((x).pgprot) #include <asm/maddr.h> #define boot_pte_t pte_t /* or would you rather have a typedef */ -#define pte_val(x) (((x).pte_low & 1) ? \ - pte_machine_to_phys((x).pte_low) : \ +#define pte_val(x) (((x).pte_low & _PAGE_PRESENT) ? \ + machine_to_phys((x).pte_low) : \ (x).pte_low) #define pte_val_ma(x) ((x).pte_low) #define __pte(x) ({ unsigned long _x = (x); \ - (((_x)&1) ? ((pte_t) {phys_to_machine(_x)}) : ((pte_t) {(_x)})); }) + (pte_t) {((_x) & _PAGE_PRESENT) ? phys_to_machine(_x) : (_x)}; }) #define __pgd(x) ({ unsigned long _x = (x); \ - (((_x)&1) ? ((pgd_t) {phys_to_machine(_x)}) : ((pgd_t) {(_x)})); }) + (pgd_t) {((_x) & _PAGE_PRESENT) ? phys_to_machine(_x) : (_x)}; }) static inline unsigned long pgd_val(pgd_t x) { unsigned long ret = x.pgd; - if (ret) ret = pte_machine_to_phys(ret) | 1; + if (ret & _PAGE_PRESENT) ret = machine_to_phys(ret); return ret; } #define HPAGE_SHIFT 22 #endif -#define PTE_MASK PAGE_MASK +#define PTE_MASK PHYSICAL_PAGE_MASK #ifdef CONFIG_HUGETLB_PAGE #define HPAGE_SIZE ((1UL) << HPAGE_SHIFT) Index: sle10sp1-2007-01-31/include/asm-i386/mach-xen/asm/pgtable-2level.h ==================================================================--- sle10sp1-2007-01-31.orig/include/asm-i386/mach-xen/asm/pgtable-2level.h 2007-02-13 12:27:05.000000000 +0100 +++ sle10sp1-2007-01-31/include/asm-i386/mach-xen/asm/pgtable-2level.h 2007-02-13 12:27:08.000000000 +0100 @@ -39,7 +39,7 @@ #define ptep_get_and_clear(mm,addr,xp) __pte_ma(xchg(&(xp)->pte_low, 0)) #define pte_same(a, b) ((a).pte_low == (b).pte_low) #define pte_mfn(_pte) ((_pte).pte_low >> PAGE_SHIFT) -#define pte_pfn(_pte) mfn_to_local_pfn(pte_mfn(_pte)) +#define pte_pfn(_pte) (pte_val(_pte) >> PAGE_SHIFT) #define pte_page(_pte) pfn_to_page(pte_pfn(_pte)) Index: sle10sp1-2007-01-31/include/asm-i386/mach-xen/asm/pgtable-3level.h ==================================================================--- sle10sp1-2007-01-31.orig/include/asm-i386/mach-xen/asm/pgtable-3level.h 2007-02-13 12:27:05.000000000 +0100 +++ sle10sp1-2007-01-31/include/asm-i386/mach-xen/asm/pgtable-3level.h 2007-02-13 14:14:45.000000000 +0100 @@ -146,20 +146,20 @@ static inline int pte_none(pte_t pte) } #define pte_mfn(_pte) (((_pte).pte_low >> PAGE_SHIFT) |\ - (((_pte).pte_high & 0xfff) << (32-PAGE_SHIFT))) -#define pte_pfn(_pte) mfn_to_local_pfn(pte_mfn(_pte)) + ((_pte).pte_high << (32-PAGE_SHIFT))) +#define pte_pfn(_pte) ((pte_val(_pte) & PHYSICAL_PAGE_MASK) >> PAGE_SHIFT) extern unsigned long long __supported_pte_mask; static inline pte_t pfn_pte(unsigned long page_nr, pgprot_t pgprot) { - return pfn_pte_ma(pfn_to_mfn(page_nr), pgprot); + return __pte((((unsigned long long)page_nr << PAGE_SHIFT) | + pgprot_val(pgprot)) & __supported_pte_mask); } static inline pmd_t pfn_pmd(unsigned long page_nr, pgprot_t pgprot) { - BUG(); panic("needs review"); - return __pmd((((unsigned long long)page_nr << PAGE_SHIFT) | \ + return __pmd((((unsigned long long)page_nr << PAGE_SHIFT) | pgprot_val(pgprot)) & __supported_pte_mask); } Index: sle10sp1-2007-01-31/include/asm-i386/mach-xen/asm/pgtable.h ==================================================================--- sle10sp1-2007-01-31.orig/include/asm-i386/mach-xen/asm/pgtable.h 2007-02-13 12:27:05.000000000 +0100 +++ sle10sp1-2007-01-31/include/asm-i386/mach-xen/asm/pgtable.h 2007-02-13 14:14:47.000000000 +0100 @@ -315,18 +315,19 @@ static inline void clone_pgd_range(pgd_t static inline pte_t pte_modify(pte_t pte, pgprot_t newprot) { - pte.pte_low &= _PAGE_CHG_MASK; - pte.pte_low |= pgprot_val(newprot); -#ifdef CONFIG_X86_PAE /* - * Chop off the NX bit (if present), and add the NX portion of - * the newprot (if present): + * Since this might change the present bit (which controls whether + * a pte_t object has undergone p2m translation), we must use + * pte_val() on the input pte and __pte() for the return value. */ - pte.pte_high &= ~(1 << (_PAGE_BIT_NX - 32)); - pte.pte_high |= (pgprot_val(newprot) >> 32) & \ - (__supported_pte_mask >> 32); + paddr_t pteval = pte_val(pte); + + pteval &= _PAGE_CHG_MASK; + pteval |= pgprot_val(newprot); +#ifdef CONFIG_X86_PAE + pteval &= __supported_pte_mask; #endif - return pte; + return __pte(pteval); } #define pmd_large(pmd) \ Index: sle10sp1-2007-01-31/include/asm-x86_64/mach-xen/asm/maddr.h ==================================================================--- sle10sp1-2007-01-31.orig/include/asm-x86_64/mach-xen/asm/maddr.h 2007-02-13 12:27:05.000000000 +0100 +++ sle10sp1-2007-01-31/include/asm-x86_64/mach-xen/asm/maddr.h 2007-02-13 13:35:31.000000000 +0100 @@ -25,14 +25,15 @@ static inline unsigned long pfn_to_mfn(u { if (xen_feature(XENFEAT_auto_translated_physmap)) return pfn; - return phys_to_machine_mapping[(unsigned int)(pfn)] & - ~FOREIGN_FRAME_BIT; + BUG_ON(end_pfn && pfn >= end_pfn); + return phys_to_machine_mapping[pfn] & ~FOREIGN_FRAME_BIT; } static inline int phys_to_machine_mapping_valid(unsigned long pfn) { if (xen_feature(XENFEAT_auto_translated_physmap)) return 1; + BUG_ON(end_pfn && pfn >= end_pfn); return (phys_to_machine_mapping[pfn] != INVALID_P2M_ENTRY); } @@ -96,6 +97,7 @@ static inline unsigned long mfn_to_local static inline void set_phys_to_machine(unsigned long pfn, unsigned long mfn) { + BUG_ON(pfn >= end_pfn); if (xen_feature(XENFEAT_auto_translated_physmap)) { BUG_ON(pfn != mfn && mfn != INVALID_P2M_ENTRY); return; @@ -117,6 +119,14 @@ static inline paddr_t machine_to_phys(ma return phys; } +static inline paddr_t pte_phys_to_machine(paddr_t phys) +{ + maddr_t machine; + machine = pfn_to_mfn((phys & PHYSICAL_PAGE_MASK) >> PAGE_SHIFT); + machine = (machine << PAGE_SHIFT) | (phys & ~PHYSICAL_PAGE_MASK); + return machine; +} + static inline paddr_t pte_machine_to_phys(maddr_t machine) { paddr_t phys; @@ -134,7 +144,6 @@ static inline paddr_t pte_machine_to_phy #define phys_to_machine_mapping_valid(pfn) (1) #define phys_to_machine(phys) ((maddr_t)(phys)) #define machine_to_phys(mach) ((paddr_t)(mach)) -#define pte_machine_to_phys(mach) ((paddr_t)(mach)) #endif /* !CONFIG_XEN */ Index: sle10sp1-2007-01-31/include/asm-x86_64/mach-xen/asm/page.h ==================================================================--- sle10sp1-2007-01-31.orig/include/asm-x86_64/mach-xen/asm/page.h 2007-02-13 12:27:05.000000000 +0100 +++ sle10sp1-2007-01-31/include/asm-x86_64/mach-xen/asm/page.h 2007-02-13 12:27:08.000000000 +0100 @@ -9,6 +9,13 @@ #endif #include <xen/interface/xen.h> +/* + * Need to repeat this here in order to not include pgtable.h (which in turn + * depends on definitions made here), but to be able to use the symbolic + * below. The preprocessor will warn if the two definitions aren''t identical. + */ +#define _PAGE_PRESENT 0x001 + #define arch_free_page(_page,_order) \ ({ int foreign = PageForeign(_page); \ if (foreign) \ @@ -95,28 +102,33 @@ typedef struct { unsigned long pgd; } pg typedef struct { unsigned long pgprot; } pgprot_t; -#define pte_val(x) (((x).pte & 1) ? pte_machine_to_phys((x).pte) : \ +#define pte_val(x) (((x).pte & _PAGE_PRESENT) ? \ + pte_machine_to_phys((x).pte) : \ (x).pte) #define pte_val_ma(x) ((x).pte) static inline unsigned long pmd_val(pmd_t x) { unsigned long ret = x.pmd; - if (ret) ret = pte_machine_to_phys(ret); +#ifdef CONFIG_XEN_COMPAT_030002 + if (ret) ret = pte_machine_to_phys(ret) | _PAGE_PRESENT; +#else + if (ret & _PAGE_PRESENT) ret = pte_machine_to_phys(ret); +#endif return ret; } static inline unsigned long pud_val(pud_t x) { unsigned long ret = x.pud; - if (ret) ret = pte_machine_to_phys(ret); + if (ret & _PAGE_PRESENT) ret = pte_machine_to_phys(ret); return ret; } static inline unsigned long pgd_val(pgd_t x) { unsigned long ret = x.pgd; - if (ret) ret = pte_machine_to_phys(ret); + if (ret & _PAGE_PRESENT) ret = pte_machine_to_phys(ret); return ret; } @@ -124,25 +136,25 @@ static inline unsigned long pgd_val(pgd_ static inline pte_t __pte(unsigned long x) { - if (x & 1) x = phys_to_machine(x); + if (x & _PAGE_PRESENT) x = pte_phys_to_machine(x); return ((pte_t) { (x) }); } static inline pmd_t __pmd(unsigned long x) { - if ((x & 1)) x = phys_to_machine(x); + if (x & _PAGE_PRESENT) x = pte_phys_to_machine(x); return ((pmd_t) { (x) }); } static inline pud_t __pud(unsigned long x) { - if ((x & 1)) x = phys_to_machine(x); + if (x & _PAGE_PRESENT) x = pte_phys_to_machine(x); return ((pud_t) { (x) }); } static inline pgd_t __pgd(unsigned long x) { - if ((x & 1)) x = phys_to_machine(x); + if (x & _PAGE_PRESENT) x = pte_phys_to_machine(x); return ((pgd_t) { (x) }); } Index: sle10sp1-2007-01-31/include/asm-x86_64/mach-xen/asm/pgtable.h ==================================================================--- sle10sp1-2007-01-31.orig/include/asm-x86_64/mach-xen/asm/pgtable.h 2007-02-13 12:27:05.000000000 +0100 +++ sle10sp1-2007-01-31/include/asm-x86_64/mach-xen/asm/pgtable.h 2007-02-13 14:14:47.000000000 +0100 @@ -303,18 +303,16 @@ static inline unsigned long pud_bad(pud_ #define pages_to_mb(x) ((x) >> (20-PAGE_SHIFT)) #define pte_mfn(_pte) (((_pte).pte & PTE_MASK) >> PAGE_SHIFT) -#define pte_pfn(_pte) mfn_to_local_pfn(pte_mfn(_pte)) +#define pte_pfn(_pte) ((pte_val(_pte) & PHYSICAL_PAGE_MASK) >> PAGE_SHIFT) #define pte_page(x) pfn_to_page(pte_pfn(x)) static inline pte_t pfn_pte(unsigned long page_nr, pgprot_t pgprot) { - pte_t pte; - - (pte).pte = (pfn_to_mfn(page_nr) << PAGE_SHIFT); - (pte).pte |= pgprot_val(pgprot); - (pte).pte &= __supported_pte_mask; - return pte; + unsigned long pte = page_nr << PAGE_SHIFT; + pte |= pgprot_val(pgprot); + pte &= __supported_pte_mask; + return __pte(pte); } /* @@ -446,18 +444,25 @@ static inline pud_t *pud_offset_k(pgd_t /* physical address -> PTE */ static inline pte_t mk_pte_phys(unsigned long physpage, pgprot_t pgprot) { - pte_t pte; - (pte).pte = physpage | pgprot_val(pgprot); - return pte; + unsigned long pteval; + pteval = physpage | pgprot_val(pgprot); + return __pte(pteval); } /* Change flags of a PTE */ static inline pte_t pte_modify(pte_t pte, pgprot_t newprot) { - (pte).pte &= _PAGE_CHG_MASK; - (pte).pte |= pgprot_val(newprot); - (pte).pte &= __supported_pte_mask; - return pte; + /* + * Since this might change the present bit (which controls whether + * a pte_t object has undergone p2m translation), we must use + * pte_val() on the input pte and __pte() for the return value. + */ + unsigned long pteval = pte_val(pte); + + pteval &= _PAGE_CHG_MASK; + pteval |= pgprot_val(newprot); + pteval &= __supported_pte_mask; + return __pte(pteval); } #define pte_index(address) \ _______________________________________________ Xen-devel mailing list Xen-devel@lists.xensource.com http://lists.xensource.com/xen-devel