Jan Beulich
2007-Mar-01 16:27 UTC
[Xen-devel] [PATCH] linux: allow use of split page table locks
This changes netback to use struct page''s mapping field instead of the index one, and the PageForeign logic the other way around. That way, CONFIG_SPLIT_PTLOCK_CPUS doesn''t have to be suppressed anymore for x86. After that adjustment, the sparse tree''s mm/Kconfig should be identical to native and could hence be deleted. Signed-off-by: Jan Beulich <jbeulich@novell.com> Index: head-2007-02-27/drivers/xen/netback/netback.c ==================================================================--- head-2007-02-27.orig/drivers/xen/netback/netback.c 2007-02-27 16:31:12.000000000 +0100 +++ head-2007-02-27/drivers/xen/netback/netback.c 2007-03-01 15:23:00.000000000 +0100 @@ -40,6 +40,9 @@ /*#define NETBE_DEBUG_INTERRUPT*/ +/* extra field used in struct page */ +#define netif_page_index(pg) (*(long *)&(pg)->mapping) + struct netbk_rx_meta { skb_frag_t frag; int id; @@ -352,7 +355,7 @@ static u16 netbk_gop_frag(netif_t *netif copy_gop->flags = GNTCOPY_dest_gref; if (PageForeign(page)) { struct pending_tx_info *src_pend - &pending_tx_info[page->index]; + &pending_tx_info[netif_page_index(page)]; copy_gop->source.domid = src_pend->netif->domid; copy_gop->source.u.ref = src_pend->req.gref; copy_gop->flags |= GNTCOPY_source_gref; @@ -1327,7 +1330,7 @@ static void netif_page_release(struct pa /* Ready for next use. */ init_page_count(page); - netif_idx_release(page->index); + netif_idx_release(netif_page_index(page)); } irqreturn_t netif_be_int(int irq, void *dev_id, struct pt_regs *regs) @@ -1457,7 +1460,7 @@ static int __init netback_init(void) for (i = 0; i < MAX_PENDING_REQS; i++) { page = mmap_pages[i]; SetPageForeign(page, netif_page_release); - page->index = i; + netif_page_index(page) = i; } pending_cons = 0; Index: head-2007-02-27/include/linux/page-flags.h ==================================================================--- head-2007-02-27.orig/include/linux/page-flags.h 2007-02-27 16:26:22.000000000 +0100 +++ head-2007-02-27/include/linux/page-flags.h 2007-03-01 15:15:29.000000000 +0100 @@ -252,14 +252,14 @@ static inline void SetPageUptodate(struc #define PageForeign(page) test_bit(PG_foreign, &(page)->flags) #define SetPageForeign(page, dtor) do { \ set_bit(PG_foreign, &(page)->flags); \ - (page)->mapping = (void *)dtor; \ + (page)->index = (long)(dtor); \ } while (0) #define ClearPageForeign(page) do { \ clear_bit(PG_foreign, &(page)->flags); \ - (page)->mapping = NULL; \ + (page)->index = 0; \ } while (0) #define PageForeignDestructor(page) \ - ( (void (*) (struct page *)) (page)->mapping )(page) + ( (void (*) (struct page *)) (page)->index )(page) struct page; /* forward declaration */ Index: head-2007-02-27/mm/Kconfig ==================================================================--- head-2007-02-27.orig/mm/Kconfig 2007-02-27 16:23:14.000000000 +0100 +++ head-2007-02-27/mm/Kconfig 2007-03-01 15:16:15.000000000 +0100 @@ -127,14 +127,11 @@ config MEMORY_HOTPLUG_SPARSE # Default to 4 for wider testing, though 8 might be more appropriate. # ARM''s adjust_pte (unused if VIPT) depends on mm-wide page_table_lock. # PA-RISC 7xxx''s spinlock_t would enlarge struct page from 32 to 44 bytes. -# XEN on x86 architecture uses the mapping field on pagetable pages to store a -# pointer to the destructor. This conflicts with pte_lock_deinit(). # config SPLIT_PTLOCK_CPUS int default "4096" if ARM && !CPU_CACHE_VIPT default "4096" if PARISC && !PA20 - default "4096" if X86_XEN || X86_64_XEN default "4" # _______________________________________________ Xen-devel mailing list Xen-devel@lists.xensource.com http://lists.xensource.com/xen-devel
Jan Beulich
2007-Mar-26 14:59 UTC
[Xen-devel] [PATCH] linux: allow use of split page table locks
This fixes the race condition previously experienced between (un)pinning and vmscan. Signed-off-by: Jan Beulich <jbeulich@novell.com> Index: head-2007-03-19/arch/i386/mm/pgtable-xen.c ==================================================================--- head-2007-03-19.orig/arch/i386/mm/pgtable-xen.c 2007-03-21 10:19:07.000000000 +0100 +++ head-2007-03-19/arch/i386/mm/pgtable-xen.c 2007-03-21 11:51:37.000000000 +0100 @@ -574,6 +574,64 @@ void make_pages_writable(void *va, unsig } } +static void _pin_lock(struct mm_struct *mm, int lock) { + if (lock) + spin_lock(&mm->page_table_lock); +#if NR_CPUS >= CONFIG_SPLIT_PTLOCK_CPUS + /* While mm->page_table_lock protects us against insertions and + * removals of higher level page table pages, it doesn''t protect + * against updates of pte-s. Such updates, however, require the + * pte pages to be in consistent state (unpinned+writable or + * pinned+readonly). The pinning and attribute changes, however + * cannot be done atomically, which is why such updates must be + * prevented from happening concurrently. + * Note that no pte lock can ever elsewhere be acquired nesting + * with an already acquired one in the same mm, or with the mm''s + * page_table_lock already acquired, as that would break in the + * non-split case (where all these are actually resolving to the + * one page_table_lock). Thus acquiring all of them here is not + * going to result in dead locks, and the order of acquires + * doesn''t matter. + */ + { + pgd_t *pgd = mm->pgd; + unsigned g; + + for (g = 0; g < USER_PTRS_PER_PGD; g++, pgd++) { + pud_t *pud; + unsigned u; + + if (pgd_none(*pgd)) + continue; + pud = pud_offset(pgd, 0); + for (u = 0; u < PTRS_PER_PUD; u++, pud++) { + pmd_t *pmd; + unsigned m; + + if (pud_none(*pud)) + continue; + pmd = pmd_offset(pud, 0); + for (m = 0; m < PTRS_PER_PMD; m++, pmd++) { + spinlock_t *ptl; + + if (pmd_none(*pmd)) + continue; + ptl = pte_lockptr(0, pmd); + if (lock) + spin_lock(ptl); + else + spin_unlock(ptl); + } + } + } + } +#endif + if (!lock) + spin_unlock(&mm->page_table_lock); +} +#define pin_lock(mm) _pin_lock(mm, 1) +#define pin_unlock(mm) _pin_lock(mm, 0) + static inline void pgd_walk_set_prot(struct page *page, pgprot_t flags) { unsigned long pfn = page_to_pfn(page); @@ -656,18 +714,18 @@ void mm_pin(struct mm_struct *mm) { if (xen_feature(XENFEAT_writable_page_tables)) return; - spin_lock(&mm->page_table_lock); + pin_lock(mm); __pgd_pin(mm->pgd); - spin_unlock(&mm->page_table_lock); + pin_unlock(mm); } void mm_unpin(struct mm_struct *mm) { if (xen_feature(XENFEAT_writable_page_tables)) return; - spin_lock(&mm->page_table_lock); + pin_lock(mm); __pgd_unpin(mm->pgd); - spin_unlock(&mm->page_table_lock); + pin_unlock(mm); } void mm_pin_all(void) Index: head-2007-03-19/arch/x86_64/mm/pageattr-xen.c ==================================================================--- head-2007-03-19.orig/arch/x86_64/mm/pageattr-xen.c 2007-03-21 10:19:07.000000000 +0100 +++ head-2007-03-19/arch/x86_64/mm/pageattr-xen.c 2007-03-21 11:50:13.000000000 +0100 @@ -20,6 +20,64 @@ LIST_HEAD(mm_unpinned); DEFINE_SPINLOCK(mm_unpinned_lock); +static void _pin_lock(struct mm_struct *mm, int lock) { + if (lock) + spin_lock(&mm->page_table_lock); +#if NR_CPUS >= CONFIG_SPLIT_PTLOCK_CPUS + /* While mm->page_table_lock protects us against insertions and + * removals of higher level page table pages, it doesn''t protect + * against updates of pte-s. Such updates, however, require the + * pte pages to be in consistent state (unpinned+writable or + * pinned+readonly). The pinning and attribute changes, however + * cannot be done atomically, which is why such updates must be + * prevented from happening concurrently. + * Note that no pte lock can ever elsewhere be acquired nesting + * with an already acquired one in the same mm, or with the mm''s + * page_table_lock already acquired, as that would break in the + * non-split case (where all these are actually resolving to the + * one page_table_lock). Thus acquiring all of them here is not + * going to result in dead locks, and the order of acquires + * doesn''t matter. + */ + { + pgd_t *pgd = mm->pgd; + unsigned g; + + for (g = 0; g <= ((TASK_SIZE64-1) / PGDIR_SIZE); g++, pgd++) { + pud_t *pud; + unsigned u; + + if (pgd_none(*pgd)) + continue; + pud = pud_offset(pgd, 0); + for (u = 0; u < PTRS_PER_PUD; u++, pud++) { + pmd_t *pmd; + unsigned m; + + if (pud_none(*pud)) + continue; + pmd = pmd_offset(pud, 0); + for (m = 0; m < PTRS_PER_PMD; m++, pmd++) { + spinlock_t *ptl; + + if (pmd_none(*pmd)) + continue; + ptl = pte_lockptr(0, pmd); + if (lock) + spin_lock(ptl); + else + spin_unlock(ptl); + } + } + } + } +#endif + if (!lock) + spin_unlock(&mm->page_table_lock); +} +#define pin_lock(mm) _pin_lock(mm, 1) +#define pin_unlock(mm) _pin_lock(mm, 0) + static inline void mm_walk_set_prot(void *pt, pgprot_t flags) { struct page *page = virt_to_page(pt); @@ -76,7 +134,7 @@ void mm_pin(struct mm_struct *mm) if (xen_feature(XENFEAT_writable_page_tables)) return; - spin_lock(&mm->page_table_lock); + pin_lock(mm); mm_walk(mm, PAGE_KERNEL_RO); if (HYPERVISOR_update_va_mapping( @@ -97,7 +155,7 @@ void mm_pin(struct mm_struct *mm) list_del(&mm->context.unpinned); spin_unlock(&mm_unpinned_lock); - spin_unlock(&mm->page_table_lock); + pin_unlock(mm); } void mm_unpin(struct mm_struct *mm) @@ -105,7 +163,7 @@ void mm_unpin(struct mm_struct *mm) if (xen_feature(XENFEAT_writable_page_tables)) return; - spin_lock(&mm->page_table_lock); + pin_lock(mm); xen_pgd_unpin(__pa(mm->pgd)); xen_pgd_unpin(__pa(__user_pgd(mm->pgd))); @@ -125,7 +183,7 @@ void mm_unpin(struct mm_struct *mm) list_add(&mm->context.unpinned, &mm_unpinned); spin_unlock(&mm_unpinned_lock); - spin_unlock(&mm->page_table_lock); + pin_unlock(mm); } void mm_pin_all(void) Index: head-2007-03-19/mm/Kconfig ==================================================================--- head-2007-03-19.orig/mm/Kconfig 2007-03-19 15:34:24.000000000 +0100 +++ head-2007-03-19/mm/Kconfig 2007-03-21 11:50:13.000000000 +0100 @@ -127,14 +127,11 @@ config MEMORY_HOTPLUG_SPARSE # Default to 4 for wider testing, though 8 might be more appropriate. # ARM''s adjust_pte (unused if VIPT) depends on mm-wide page_table_lock. # PA-RISC 7xxx''s spinlock_t would enlarge struct page from 32 to 44 bytes. -# XEN on x86 architecture uses the mapping field on pagetable pages to store a -# pointer to the destructor. This conflicts with pte_lock_deinit(). # config SPLIT_PTLOCK_CPUS int default "4096" if ARM && !CPU_CACHE_VIPT default "4096" if PARISC && !PA20 - default "4096" if X86_XEN || X86_64_XEN default "4" # _______________________________________________ Xen-devel mailing list Xen-devel@lists.xensource.com http://lists.xensource.com/xen-devel