Jaeyong Yoo
2013-Nov-06 03:47 UTC
[PATCH v5 4/6] xen/arm: Implement virtual-linear page table for guest p2m mapping in live migration
Implement vlpt (virtual-linear page table) for fast accessing of 3rd PTE of guest p2m. When creating a mapping for vlpt, just copy the 1st level PTE of guest p2m to the xen''s 2nd level PTE. Then the mapping becomes the following: xen''s 1st PTE --> xen''s 2nd PTE (which is the same as 1st PTE of guest p2m) --> guest p2m''s 2nd PTE --> guest p2m''s 3rd PTE (the memory contents where the vlpt points) For more info about vlpt, see: http://www.technovelty.org/linux/virtual-linear-page-table.html This function is used in dirty-page tracing: when domU write-fault is trapped by xen, xen can immediately locate the 3rd PTE of guest p2m. The following link shows the performance comparison for handling a dirty-page between vlpt and typical page table walking. http://lists.xen.org/archives/html/xen-devel/2013-08/msg01503.html Changes from v4: 1. In the restoring vlpt, use __foo variant without barriers for write_pte and flush_xen_data_tlb_range_va. 2. Support two consecutive pages for guest''s first level page table. Signed-off-by: Jaeyong Yoo <jaeyong.yoo@samsung.com> --- xen/arch/arm/domain.c | 5 ++ xen/arch/arm/mm.c | 112 +++++++++++++++++++++++++++++++++++++++ xen/include/asm-arm/arm32/page.h | 41 ++++++++++---- xen/include/asm-arm/config.h | 5 ++ xen/include/asm-arm/domain.h | 7 +++ xen/include/asm-arm/mm.h | 15 ++++++ 6 files changed, 174 insertions(+), 11 deletions(-) diff --git a/xen/arch/arm/domain.c b/xen/arch/arm/domain.c index cb0424d..c0b5dd8 100644 --- a/xen/arch/arm/domain.c +++ b/xen/arch/arm/domain.c @@ -509,6 +509,11 @@ int arch_domain_create(struct domain *d, unsigned int domcr_flags) /* Default the virtual ID to match the physical */ d->arch.vpidr = boot_cpu_data.midr.bits; + d->arch.dirty.second_lvl_start = 0; + d->arch.dirty.second_lvl_end = 0; + d->arch.dirty.second_lvl[0] = NULL; + d->arch.dirty.second_lvl[1] = NULL; + clear_page(d->shared_info); share_xen_page_with_guest( virt_to_page(d->shared_info), d, XENSHARE_writable); diff --git a/xen/arch/arm/mm.c b/xen/arch/arm/mm.c index 3801f07..bf13993 100644 --- a/xen/arch/arm/mm.c +++ b/xen/arch/arm/mm.c @@ -1328,6 +1328,118 @@ void get_gma_start_end(struct domain *d, paddr_t *start, paddr_t *end) *end = GUEST_RAM_BASE + ((paddr_t) d->max_pages << PAGE_SHIFT); } +/* flush the vlpt area */ +void flush_vlpt(struct domain *d) +{ + int flush_size; + flush_size = (d->arch.dirty.second_lvl_end - + d->arch.dirty.second_lvl_start) << SECOND_SHIFT; + /* flushing the 3rd level mapping */ + flush_xen_data_tlb_range_va(d->arch.dirty.second_lvl_start << SECOND_SHIFT, + flush_size); +} + +/* restore the xen page table for vlpt mapping for domain d */ +void restore_vlpt(struct domain *d) +{ + int i; + dsb(); + for ( i = d->arch.dirty.second_lvl_start; + i < d->arch.dirty.second_lvl_end; + ++i ) + { + int k = i % LPAE_ENTRIES; + int l = i / LPAE_ENTRIES; + + if ( xen_second[i].bits != d->arch.dirty.second_lvl[l][k].bits ) + { + __write_pte(&xen_second[i], d->arch.dirty.second_lvl[l][k]); + __flush_xen_data_tlb_range_va(i << SECOND_SHIFT, + 1 << SECOND_SHIFT); + } + } + dsb(); + isb(); +} + +/* setting up the xen page table for vlpt mapping for domain d */ +int prepare_vlpt(struct domain *d) +{ + int xen_second_linear_base; + int gp2m_start_index, gp2m_end_index; + struct p2m_domain *p2m = &d->arch.p2m; + struct page_info *second_lvl_page; + paddr_t gma_start = 0; + paddr_t gma_end = 0; + lpae_t *first[2]; + int i; + uint64_t required, avail = VIRT_LIN_P2M_END - VIRT_LIN_P2M_START; + + get_gma_start_end(d, &gma_start, &gma_end); + required = (gma_end - gma_start) >> LPAE_SHIFT; + + + if ( required > avail ) + { + dprintk(XENLOG_ERR, "Available VLPT is small for domU guest" + "(avail: %llx, required: %llx)\n", + avail, required); + return -ENOMEM; + } + + xen_second_linear_base = second_linear_offset(VIRT_LIN_P2M_START); + + gp2m_start_index = gma_start >> FIRST_SHIFT; + gp2m_end_index = (gma_end >> FIRST_SHIFT) + 1; + + if ( xen_second_linear_base + gp2m_end_index >= LPAE_ENTRIES * 2 ) + { + dprintk(XENLOG_ERR, "xen second page is small for VLPT for domU"); + return -ENOMEM; + } + + second_lvl_page = alloc_domheap_pages(NULL, 1, 0); + if ( second_lvl_page == NULL ) + return -ENOMEM; + + /* First level p2m is 2 consecutive pages */ + d->arch.dirty.second_lvl[0] = map_domain_page_global( + page_to_mfn(second_lvl_page) ); + d->arch.dirty.second_lvl[1] = map_domain_page_global( + page_to_mfn(second_lvl_page+1) ); + + first[0] = __map_domain_page(p2m->first_level); + first[1] = __map_domain_page(p2m->first_level+1); + for ( i = gp2m_start_index; i < gp2m_end_index; ++i ) + { + int k = i % LPAE_ENTRIES; + int l = i / LPAE_ENTRIES; + int k2 = (xen_second_linear_base + i) % LPAE_ENTRIES; + int l2 = (xen_second_linear_base + i) / LPAE_ENTRIES; + + write_pte(&xen_second[xen_second_linear_base+i], first[l][k]); + + /* we copy the mapping into domain''s structure as a reference + * in case of the context switch (used in restore_vlpt) */ + d->arch.dirty.second_lvl[l2][k2] = first[l][k]; + } + unmap_domain_page(first[0]); + unmap_domain_page(first[1]); + + /* storing the start and end index */ + d->arch.dirty.second_lvl_start = xen_second_linear_base + gp2m_start_index; + d->arch.dirty.second_lvl_end = xen_second_linear_base + gp2m_end_index; + + flush_vlpt(d); + return 0; +} + +void cleanup_vlpt(struct domain *d) +{ + /* First level p2m is 2 consecutive pages */ + unmap_domain_page_global(d->arch.dirty.second_lvl[0]); + unmap_domain_page_global(d->arch.dirty.second_lvl[1]); +} /* * Local variables: * mode: C diff --git a/xen/include/asm-arm/arm32/page.h b/xen/include/asm-arm/arm32/page.h index cf12a89..0a4e115 100644 --- a/xen/include/asm-arm/arm32/page.h +++ b/xen/include/asm-arm/arm32/page.h @@ -5,20 +5,28 @@ /* Write a pagetable entry. * - * If the table entry is changing a text mapping, it is responsibility - * of the caller to issue an ISB after write_pte. + * All necessary barriers are responsibility of the caller */ -static inline void write_pte(lpae_t *p, lpae_t pte) +static inline void __write_pte(lpae_t *p, lpae_t pte) { asm volatile ( - /* Ensure any writes have completed with the old mappings. */ - "dsb;" /* Safely write the entry (STRD is atomic on CPUs that support LPAE) */ "strd %0, %H0, [%1];" - "dsb;" : : "r" (pte.bits), "r" (p) : "memory"); } +/* Write a pagetable entry with dsb barriers. + * + * If the table entry is changing a text mapping, it is responsibility + * of the caller to issue an ISB after write_pte. + */ +static inline void write_pte(lpae_t *p, lpae_t pte) +{ + dsb(); + __write_pte(p, pte); + dsb(); +} + /* Inline ASM to flush dcache on register R (may be an inline asm operand) */ #define __flush_xen_dcache_one(R) STORE_CP32(R, DCCMVAC) @@ -57,18 +65,28 @@ static inline void flush_xen_data_tlb(void) } /* - * Flush a range of VA''s hypervisor mappings from the data TLB. This is not - * sufficient when changing code mappings or for self modifying code. + * Flush a range of VA''s hypervisor mappings from the data TLB. + * All necessary barriers are responsibility of the caller */ -static inline void flush_xen_data_tlb_range_va(unsigned long va, unsigned long size) +static inline void __flush_xen_data_tlb_range_va(unsigned long va, unsigned long size) { unsigned long end = va + size; - dsb(); /* Ensure preceding are visible */ while ( va < end ) { asm volatile(STORE_CP32(0, TLBIMVAH) : : "r" (va) : "memory"); va += PAGE_SIZE; } +} + +/* + * Flush a range of VA''s hypervisor mappings from the data TLB with barriers. + * This barrier is not sufficient when changing code mappings or for self + * modifying code. + */ +static inline void flush_xen_data_tlb_range_va(unsigned long va, unsigned long size) +{ + dsb(); /* Ensure preceding are visible */ + __flush_xen_data_tlb_range_va(va, size); dsb(); /* Ensure completion of the TLB flush */ isb(); } diff --git a/xen/include/asm-arm/config.h b/xen/include/asm-arm/config.h index 5b7b1a8..15ad56d 100644 --- a/xen/include/asm-arm/config.h +++ b/xen/include/asm-arm/config.h @@ -87,6 +87,7 @@ * 0 - 8M <COMMON> * * 32M - 128M Frametable: 24 bytes per page for 16GB of RAM + * 128M - 256M Virtual-linear mapping to P2M table * 256M - 1G VMAP: ioremap and early_ioremap use this virtual address * space * @@ -124,7 +125,9 @@ #define CONFIG_SEPARATE_XENHEAP 1 #define FRAMETABLE_VIRT_START _AT(vaddr_t,0x02000000) +#define VIRT_LIN_P2M_START _AT(vaddr_t,0x08000000) #define VMAP_VIRT_START _AT(vaddr_t,0x10000000) +#define VIRT_LIN_P2M_END VMAP_VIRT_START #define XENHEAP_VIRT_START _AT(vaddr_t,0x40000000) #define XENHEAP_VIRT_END _AT(vaddr_t,0x7fffffff) #define DOMHEAP_VIRT_START _AT(vaddr_t,0x80000000) @@ -157,6 +160,8 @@ #define HYPERVISOR_VIRT_END DIRECTMAP_VIRT_END +/*TODO (ARM_64): define VIRT_LIN_P2M_START VIRT_LIN_P2M_END */ + #endif /* Fixmap slots */ diff --git a/xen/include/asm-arm/domain.h b/xen/include/asm-arm/domain.h index 67bfbbc..4f366f1 100644 --- a/xen/include/asm-arm/domain.h +++ b/xen/include/asm-arm/domain.h @@ -112,6 +112,13 @@ struct arch_domain spinlock_t lock; } vuart; + /* dirty-page tracing */ + struct { + volatile int second_lvl_start; /* for context switch */ + volatile int second_lvl_end; + lpae_t *second_lvl[2]; /* copy of guest p2m''s first */ + } dirty; + } __cacheline_aligned; struct arch_vcpu diff --git a/xen/include/asm-arm/mm.h b/xen/include/asm-arm/mm.h index c5cb3af..a74e135 100644 --- a/xen/include/asm-arm/mm.h +++ b/xen/include/asm-arm/mm.h @@ -342,6 +342,21 @@ static inline void put_page_and_type(struct page_info *page) } void get_gma_start_end(struct domain *d, paddr_t *start, paddr_t *end); +int prepare_vlpt(struct domain *d); +void cleanup_vlpt(struct domain *d); +void restore_vlpt(struct domain *d); + +/* calculate the xen''s virtual address for accessing the leaf PTE of + * a given address (GPA) */ +static inline lpae_t * get_vlpt_3lvl_pte(paddr_t addr) +{ + lpae_t *table = (lpae_t *)VIRT_LIN_P2M_START; + + /* Since we slotted the guest''s first p2m page table to xen''s + * second page table, one shift is enough for calculating the + * index of guest p2m table entry */ + return &table[addr >> PAGE_SHIFT]; +} #endif /* __ARCH_ARM_MM__ */ /* -- 1.8.1.2 _______________________________________________ Xen-devel mailing list Xen-devel@lists.xen.org http://lists.xen.org/xen-devel