The current version of superpage mapping takes a PGT_writable reference to every page in a superpage each time it is mapped. This is extremely slow, so slow that applications become unusable. My solution for this is to introduce a superpage table in the hypervisor, similar to the frametable structure for pages. Currently this table only has a type_info element. There are three types a superpage can have, SGT_mark, SGT_dynamic, or SGT_none. In normal operation, the first time a superpage is mapped, a PGT_writable reference is taken to each page in the superpage, and the superpage is set to type SGT_dynamic and the superpage typecount is incremented. On subsequent mappings and unmappings, only the superpage typecount changes. On the last unmap, the PGT_writable reference on each page is removed. The SGT_mark type is set and cleared through two new MMUEXT hypercalls, mark_super and unmark_super. When the hypercall is made, the superpage''s type is set to SGT_mark and a PGT_writable reference is taken to its pages. On unmark, the type is cleared and the reference removed. If a page is already set to SGT_dynamic when mark_super is called, the type is changed to SGT_mark and no additional PGT_writable reference is taken. If there are still outstanding mappings of this superpage when unmark_super is called, the type is set to SGT_dynamic and the PGT_writable reference is not removed. Signed-off-by: Dave McCracken <dave.mccracken@oracle.com> -------- --- xen-staging/xen/include/asm-x86/mm.h 2010-05-18 09:45:53.000000000 -0500 +++ xen-staging-fs//xen/include/asm-x86/mm.h 2010-05-24 09:00:42.000000000 -0500 @@ -214,6 +214,21 @@ struct page_info #define PGC_count_width PG_shift(9) #define PGC_count_mask ((1UL<<PGC_count_width)-1) +struct spage_info +{ + unsigned long type_info; +}; + + /* The following page types are MUTUALLY EXCLUSIVE. */ +#define SGT_none PG_mask(0, 2) /* superpage not in use */ +#define SGT_mark PG_mask(1, 2) /* Marked as a superpage */ +#define SGT_dynamic PG_mask(2, 2) /* has been dynamically mapped as a superpage */ +#define SGT_type_mask PG_mask(3, 2) /* Bits 30-31 or 62-63. */ + + /* Count of uses of this superpage as its current type. */ +#define SGT_count_width PG_shift(3) +#define SGT_count_mask ((1UL<<SGT_count_width)-1) + #if defined(__i386__) #define is_xen_heap_page(page) is_xen_heap_mfn(page_to_mfn(page)) #define is_xen_heap_mfn(mfn) ({ \ @@ -262,6 +277,7 @@ extern void share_xen_page_with_privileg struct page_info *page, int readonly); #define frame_table ((struct page_info *)FRAMETABLE_VIRT_START) +#define spage_table ((struct spage_info *)SPAGETABLE_VIRT_START) extern unsigned long max_page; extern unsigned long total_pages; void init_frametable(void); @@ -305,6 +321,8 @@ void cleanup_page_cacheattr(struct page_ int is_iomem_page(unsigned long mfn); +void clear_superpage_mark(struct page_info *page); + struct domain *page_get_owner_and_reference(struct page_info *page); void put_page(struct page_info *page); int get_page(struct page_info *page, struct domain *domain); @@ -370,7 +388,7 @@ pae_copy_root(struct vcpu *v, l3_pgentry int check_descriptor(const struct domain *, struct desc_struct *d); -extern int opt_allow_hugepage; +extern int opt_allow_superpage; extern int mem_hotplug; /****************************************************************************** --- xen-staging/xen/include/asm-x86/guest_pt.h 2010-05-18 09:45:53.000000000 -0500 +++ xen-staging-fs//xen/include/asm-x86/guest_pt.h 2010-05-24 09:00:42.000000000 -0500 @@ -187,7 +187,7 @@ guest_supports_superpages(struct vcpu *v * CR4.PSE is set or the guest is in PAE or long mode. * It''s also used in the dummy PT for vcpus with CR4.PG cleared. */ return (!is_hvm_vcpu(v) - ? opt_allow_hugepage + ? opt_allow_superpage : (GUEST_PAGING_LEVELS != 2 || !hvm_paging_enabled(v) || (v->arch.hvm_vcpu.guest_cr[4] & X86_CR4_PSE))); --- xen-staging/xen/include/asm-x86/x86_32/page.h 2010-05-18 09:45:54.000000000 -0500 +++ xen-staging-fs//xen/include/asm-x86/x86_32/page.h 2010-05-24 09:00:43.000000000 -0500 @@ -6,6 +6,7 @@ #define L2_PAGETABLE_SHIFT 21 #define L3_PAGETABLE_SHIFT 30 #define PAGE_SHIFT L1_PAGETABLE_SHIFT +#define SUPERPAGE_SHIFT L2_PAGETABLE_SHIFT #define ROOT_PAGETABLE_SHIFT L3_PAGETABLE_SHIFT #define PAGETABLE_ORDER 9 @@ -13,6 +14,7 @@ #define L2_PAGETABLE_ENTRIES (1<<PAGETABLE_ORDER) #define L3_PAGETABLE_ENTRIES 4 #define ROOT_PAGETABLE_ENTRIES L3_PAGETABLE_ENTRIES +#define SUPERPAGE_ORDER PAGETABLE_ORDER /* * Architecturally, physical addresses may be up to 52 bits. However, the @@ -53,6 +55,9 @@ #define virt_to_pdx(va) virt_to_mfn(va) #define pdx_to_virt(pdx) mfn_to_virt(pdx) +#define pfn_to_sdx(pfn) ((pfn)>>(SUPERPAGE_SHIFT-PAGE_SHIFT)) +#define sdx_to_pfn(sdx) ((sdx)<<(SUPERPAGE_SHIFT-PAGE_SHIFT)) + static inline unsigned long __virt_to_maddr(unsigned long va) { ASSERT(va >= DIRECTMAP_VIRT_START && va < DIRECTMAP_VIRT_END); --- xen-staging/xen/include/asm-x86/config.h 2010-05-18 09:45:53.000000000 -0500 +++ xen-staging-fs//xen/include/asm-x86/config.h 2010-05-24 09:00:43.000000000 -0500 @@ -225,6 +225,11 @@ extern unsigned int video_mode, video_fl /* Slot 261: xen text, static data and bss (1GB). */ #define XEN_VIRT_START (HIRO_COMPAT_MPT_VIRT_END) #define XEN_VIRT_END (XEN_VIRT_START + GB(1)) +/* Slot 261: superpage information array (20MB). */ +#define SPAGETABLE_VIRT_END FRAMETABLE_VIRT_START +#define SPAGETABLE_SIZE ((DIRECTMAP_SIZE >> SUPERPAGE_SHIFT) * \ + sizeof(struct spage_info)) +#define SPAGETABLE_VIRT_START (SPAGETABLE_VIRT_END - SPAGETABLE_SIZE) /* Slot 261: page-frame information array (40GB). */ #define FRAMETABLE_VIRT_END DIRECTMAP_VIRT_START #define FRAMETABLE_SIZE ((DIRECTMAP_SIZE >> PAGE_SHIFT) * \ --- xen-staging/xen/include/asm-x86/page.h 2010-05-18 09:45:53.000000000 -0500 +++ xen-staging-fs//xen/include/asm-x86/page.h 2010-05-24 09:00:43.000000000 -0500 @@ -240,6 +240,14 @@ void copy_page_sse2(void *, const void * #define __pfn_to_paddr(pfn) ((paddr_t)(pfn) << PAGE_SHIFT) #define __paddr_to_pfn(pa) ((unsigned long)((pa) >> PAGE_SHIFT)) +/* Convert between machine frame numbers and spage-info structures. */ +#define __mfn_to_spage(mfn) (spage_table + pfn_to_sdx(mfn)) +#define __spage_to_mfn(pg) sdx_to_pfn((unsigned long)((pg) - spage_table)) + +/* Convert between page-info structures and spage-info structures. */ +#define page_to_spage(page) (spage_table+(((page)-frame_table)>>(SUPERPAGE_SHIFT-PAGE_SHIFT))) +#define spage_to_page(spage) (frame_table+(((spage)-spage_table)<<(SUPERPAGE_SHIFT-PAGE_SHIFT))) + /* * We define non-underscored wrappers for above conversion functions. These are * overridden in various source files while underscored versions remain intact. @@ -251,6 +259,8 @@ void copy_page_sse2(void *, const void * #define maddr_to_virt(ma) __maddr_to_virt((unsigned long)(ma)) #define mfn_to_page(mfn) __mfn_to_page(mfn) #define page_to_mfn(pg) __page_to_mfn(pg) +#define mfn_to_spage(mfn) __mfn_to_spage(mfn) +#define spage_to_mfn(pg) __spage_to_mfn(pg) #define maddr_to_page(ma) __maddr_to_page(ma) #define page_to_maddr(pg) __page_to_maddr(pg) #define virt_to_page(va) __virt_to_page(va) --- xen-staging/xen/include/asm-x86/x86_64/page.h 2010-05-18 09:45:54.000000000 -0500 +++ xen-staging-fs//xen/include/asm-x86/x86_64/page.h 2010-05-24 09:00:43.000000000 -0500 @@ -7,6 +7,7 @@ #define L3_PAGETABLE_SHIFT 30 #define L4_PAGETABLE_SHIFT 39 #define PAGE_SHIFT L1_PAGETABLE_SHIFT +#define SUPERPAGE_SHIFT L2_PAGETABLE_SHIFT #define ROOT_PAGETABLE_SHIFT L4_PAGETABLE_SHIFT #define PAGETABLE_ORDER 9 @@ -15,6 +16,7 @@ #define L3_PAGETABLE_ENTRIES (1<<PAGETABLE_ORDER) #define L4_PAGETABLE_ENTRIES (1<<PAGETABLE_ORDER) #define ROOT_PAGETABLE_ENTRIES L4_PAGETABLE_ENTRIES +#define SUPERPAGE_ORDER PAGETABLE_ORDER #define __PAGE_OFFSET DIRECTMAP_VIRT_START #define __XEN_VIRT_START XEN_VIRT_START @@ -41,6 +43,8 @@ extern void pfn_pdx_hole_setup(unsigned #define page_to_pdx(pg) ((pg) - frame_table) #define pdx_to_page(pdx) (frame_table + (pdx)) +#define spage_to_pdx(spg) ((spg>>(SUPERPAGE_SHIFT-PAGE_SHIFT)) - spage_table) +#define pdx_to_spage(pdx) (spage_table + ((pdx)<<(SUPERPAGE_SHIFT-PAGE_SHIFT))) /* * Note: These are solely for the use by page_{get,set}_owner(), and * therefore don''t need to handle the XEN_VIRT_{START,END} range. @@ -64,6 +68,16 @@ static inline unsigned long pdx_to_pfn(u ((pdx << pfn_pdx_hole_shift) & pfn_top_mask); } +static inline unsigned long pfn_to_sdx(unsigned long pfn) +{ + return pfn_to_pdx(pfn) >> (SUPERPAGE_SHIFT-PAGE_SHIFT); +} + +static inline unsigned long sdx_to_pfn(unsigned long sdx) +{ + return pdx_to_pfn(sdx << (SUPERPAGE_SHIFT-PAGE_SHIFT)); +} + static inline unsigned long __virt_to_maddr(unsigned long va) { ASSERT(va >= XEN_VIRT_START); --- xen-staging/xen/arch/x86/domain.c 2010-05-24 08:59:03.000000000 -0500 +++ xen-staging-fs//xen/arch/x86/domain.c 2010-05-24 09:00:43.000000000 -0500 @@ -1739,6 +1739,9 @@ static int relinquish_memory( BUG(); } + if (opt_allow_superpage) + clear_superpage_mark(page); + if ( test_and_clear_bit(_PGC_allocated, &page->count_info) ) put_page(page); --- xen-staging/xen/arch/x86/mm.c 2010-05-18 09:45:53.000000000 -0500 +++ xen-staging-fs//xen/arch/x86/mm.c 2010-05-24 09:00:43.000000000 -0500 @@ -151,8 +151,11 @@ unsigned long __read_mostly pdx_group_va #define PAGE_CACHE_ATTRS (_PAGE_PAT|_PAGE_PCD|_PAGE_PWT) -int opt_allow_hugepage; -boolean_param("allowhugepage", opt_allow_hugepage); +int opt_allow_superpage; +boolean_param("allowsuperpage", opt_allow_superpage); + +static int get_superpage(struct spage_info *spage, struct domain *d); +static void put_superpage(struct spage_info *spage); #define l1_disallow_mask(d) \ ((d != dom_io) && \ @@ -171,6 +174,28 @@ l2_pgentry_t *compat_idle_pg_table_l2 #define l3_disallow_mask(d) L3_DISALLOW_MASK #endif +static void __init init_spagetable(void) +{ + unsigned long s, start = SPAGETABLE_VIRT_START; + unsigned long end = SPAGETABLE_VIRT_END; + unsigned long step, mfn; + unsigned int max_entries; + + step = 1UL << PAGETABLE_ORDER; + max_entries = (max_pdx + ((1UL<<SUPERPAGE_ORDER)-1)) >> SUPERPAGE_ORDER; + end = start + (((max_entries * sizeof(*spage_table)) + + ((1UL<<SUPERPAGE_SHIFT)-1)) & (~((1UL<<SUPERPAGE_SHIFT)-1))); + + for (s = start; s < end; s += step << PAGE_SHIFT) + { + mfn = alloc_boot_pages(step, step); + if ( !mfn ) + panic("Not enough memory for spage table"); + map_pages_to_xen(s, mfn, step, PAGE_HYPERVISOR); + } + memset((void *)start, 0, end - start); +} + static void __init init_frametable_chunk(void *start, void *end) { unsigned long s = (unsigned long)start; @@ -232,6 +257,8 @@ void __init init_frametable(void) (unsigned long)pdx_to_page(max_idx * PDX_GROUP_COUNT) - (unsigned long)pdx_to_page(max_pdx)); } + if (opt_allow_superpage) + init_spagetable(); } void __init arch_init_memory(void) @@ -652,19 +679,6 @@ static int get_page_and_type_from_pagenr return rc; } -static int get_data_page( - struct page_info *page, struct domain *d, int writeable) -{ - int rc; - - if ( writeable ) - rc = get_page_and_type(page, d, PGT_writable_page); - else - rc = get_page(page, d); - - return rc; -} - static void put_data_page( struct page_info *page, int writeable) { @@ -887,30 +901,21 @@ get_page_from_l2e( rc = get_page_and_type_from_pagenr(mfn, PGT_l1_page_table, d, 0, 0); if ( unlikely(rc == -EINVAL) && get_l2_linear_pagetable(l2e, pfn, d) ) rc = 0; + + return rc; } - else if ( !opt_allow_hugepage || (mfn & (L1_PAGETABLE_ENTRIES-1)) ) + if ( !opt_allow_superpage ) { - rc = -EINVAL; + MEM_LOG("Attempt to map superpage without allowsuperpage flag in hypervisor"); + return -EINVAL; } - else + if ( mfn & (L1_PAGETABLE_ENTRIES-1) ) { - unsigned long m = mfn; - int writeable = !!(l2e_get_flags(l2e) & _PAGE_RW); - - do { - if ( !mfn_valid(m) || - !get_data_page(mfn_to_page(m), d, writeable) ) - { - while ( m-- > mfn ) - put_data_page(mfn_to_page(m), writeable); - return -EINVAL; - } - } while ( m++ < (mfn + (L1_PAGETABLE_ENTRIES-1)) ); - - rc = 1; + MEM_LOG("Unaligned superpage map attempt mfn %lx", mfn); + return -EINVAL; } + return get_superpage(mfn_to_spage(mfn), d); - return rc; } @@ -1101,13 +1106,7 @@ static int put_page_from_l2e(l2_pgentry_ if ( l2e_get_flags(l2e) & _PAGE_PSE ) { - unsigned long mfn = l2e_get_pfn(l2e), m = mfn; - int writeable = l2e_get_flags(l2e) & _PAGE_RW; - - ASSERT(!(mfn & (L1_PAGETABLE_ENTRIES-1))); - do { - put_data_page(mfn_to_page(m), writeable); - } while ( m++ < (mfn + (L1_PAGETABLE_ENTRIES-1)) ); + put_superpage(mfn_to_spage(l2e_get_pfn(l2e))); } else { @@ -2445,6 +2444,169 @@ int get_page_type_preemptible(struct pag return __get_page_type(page, type, 1); } +static int get_spage_pages(struct spage_info *spage, struct domain *d) +{ + struct page_info *page = spage_to_page(spage); + int i; + + for (i = 0; i < (1<<PAGETABLE_ORDER); i++, page++) + { + if (!get_page_and_type(page, d, PGT_writable_page)) + { + while (--i >= 0) + put_page_and_type(--page); + return 0; + } + } + return 1; +} + +static void put_spage_pages(struct spage_info *spage) +{ + struct page_info *page = spage_to_page(spage); + int i; + + for (i = 0; i < (1<<PAGETABLE_ORDER); i++, page++) + { + put_page_and_type(page); + } + return; +} + +static int mark_superpage(struct spage_info *spage, struct domain *d) +{ + unsigned long x, nx, y = spage->type_info; + int pages_done = 0; + + do { + x = y; + nx = x + 1; + if ((x & SGT_type_mask) == SGT_mark) + { + MEM_LOG("Duplicate superpage mark attempt mfn %lx", spage_to_mfn(spage)); + if (pages_done) + put_spage_pages(spage); + + return -EINVAL; + } + if ((x & SGT_type_mask) == SGT_dynamic) + { + if (pages_done) + { + put_spage_pages(spage); + pages_done = 0; + } + } + else if (!pages_done) + { + if (!get_spage_pages(spage, d)) + { + MEM_LOG("Superpage type conflict in mark attempt mfn %lx", + spage_to_mfn(spage)); + return -EINVAL; + } + pages_done = 1; + } + nx = (nx & ~SGT_type_mask) | SGT_mark; + + } while ((y = cmpxchg(&spage->type_info, x, nx)) != x); + return 0; +} + +static int unmark_superpage(struct spage_info *spage) +{ + unsigned long x, nx, y = spage->type_info; + unsigned long do_pages = 0; + + do { + x = y; + nx = x - 1; + if ((x & SGT_type_mask) != SGT_mark) + { + MEM_LOG("Attempt to unmark unmarked superpage mfn %lx", spage_to_mfn(spage)); + return -EINVAL; + } + if ((nx & SGT_count_mask) == 0) + { + nx = (nx & ~SGT_type_mask) | SGT_none; + do_pages = 1; + } + else + { + nx = (nx & ~SGT_type_mask) | SGT_dynamic; + } + } while ((y = cmpxchg(&spage->type_info, x, nx)) != x); + + if (do_pages) + put_spage_pages(spage); + + return 0; +} + +void clear_superpage_mark(struct page_info *page) +{ + struct spage_info *spage = page_to_spage(page); + + if ((spage->type_info & SGT_type_mask) == SGT_mark) + unmark_superpage(spage); + +} +static int get_superpage(struct spage_info *spage, struct domain *d) +{ + unsigned long x, nx, y = spage->type_info; + int pages_done = 0; + + do { + x = y; + nx = x + 1; + if ((x & SGT_type_mask) != SGT_none) + { + if (pages_done) + { + put_spage_pages(spage); + pages_done = 0; + } + } + else + { + if (!get_spage_pages(spage, d)) + { + MEM_LOG("Type conflict on superpage mapping mfn %lx", + spage_to_mfn(spage)); + return -EINVAL; + } + pages_done = 1; + nx = (nx & ~SGT_type_mask) | SGT_dynamic; + } + } while ((y = cmpxchg(&spage->type_info, x, nx)) != x); + return 0; +} + +static void put_superpage(struct spage_info *spage) +{ + unsigned long x, nx, y = spage->type_info; + unsigned long do_pages = 0; + + do { + x = y; + nx = x - 1; + if ((x & SGT_type_mask) == SGT_dynamic) + { + if ((nx & SGT_count_mask) == 0) + { + nx = (nx & ~SGT_type_mask) | SGT_none; + do_pages = 1; + } + } + + } while ((y = cmpxchg(&spage->type_info, x, nx)) != x); + + if (do_pages) + put_spage_pages(spage); + + return; +} + void cleanup_page_cacheattr(struct page_info *page) { uint32_t cacheattr @@ -3002,6 +3164,45 @@ int do_mmuext_op( break; } + case MMUEXT_MARK_SUPER: + { + unsigned long mfn; + struct spage_info *spage; + + mfn = op.arg1.mfn; + if (mfn & (L1_PAGETABLE_ENTRIES-1)) + { + MEM_LOG("Unaligned superpage reference mfn %lx", mfn); + okay = 0; + break; + } + + spage = mfn_to_spage(mfn); + if (mark_superpage(spage, d) < 0) + okay = 0; + + break; + } + + case MMUEXT_UNMARK_SUPER: + { + unsigned long mfn; + struct spage_info *spage; + + mfn = op.arg1.mfn; + if (mfn & (L1_PAGETABLE_ENTRIES-1)) + { + MEM_LOG("Unaligned superpage reference mfn %lx", mfn); + okay = 0; + break; + } + spage = mfn_to_spage(mfn); + if (unmark_superpage(spage) < 0) + okay = 0; + + break; + } + default: MEM_LOG("Invalid extended pt command 0x%x", op.cmd); rc = -ENOSYS; _______________________________________________ Xen-devel mailing list Xen-devel@lists.xensource.com http://lists.xensource.com/xen-devel