Jiang, Yunhong
2009-Jun-28 09:27 UTC
[Xen-devel] [PATCH 5/6] Handler m2p table and frametable fault in page fault handler
After memory added, the m2p table and frametable maybe increased, and the idle_page_table is updated for it. These new mapping are propgated to other guest, when access to the new m2p/frame table. The access can happen either in HV or in guest for read-only mapping. The propagation is needed for 32 bit Xen environment, or compatibility guest in 64 bit Xen environment. Signed-off-by: Jiang, Yunhong <yunhong.jiang@intel.com> diff -r fbba70d76aec xen/arch/x86/traps.c --- a/xen/arch/x86/traps.c Sun Jun 28 02:34:55 2009 +0800 +++ b/xen/arch/x86/traps.c Sun Jun 28 02:34:57 2009 +0800 @@ -1213,6 +1213,9 @@ static int fixup_page_fault(unsigned lon (addr >= GDT_LDT_VIRT_START) && (addr < GDT_LDT_VIRT_END) ) return handle_gdt_ldt_mapping_fault( addr - GDT_LDT_VIRT_START, regs); + if ( !(regs->error_code & PFEC_page_present) && + (pagefault_by_memadd(addr, regs)) ) + return handle_memadd_fault(addr, regs); return 0; } diff -r fbba70d76aec xen/arch/x86/x86_32/mm.c --- a/xen/arch/x86/x86_32/mm.c Sun Jun 28 02:34:55 2009 +0800 +++ b/xen/arch/x86/x86_32/mm.c Sun Jun 28 02:41:09 2009 +0800 @@ -383,6 +383,63 @@ int check_descriptor(const struct domain return 0; } + +int pagefault_by_memadd(unsigned long addr, struct cpu_user_regs *regs) +{ + if ( guest_mode(regs) ) + { + if ( ((addr >= RO_MPT_VIRT_START) && (addr < RO_MPT_VIRT_END)) ) + return 1; + } + else + { + if ( (addr >= RO_MPT_VIRT_START) && (addr < RDWR_MPT_VIRT_END) ) + return 1; + } + + return 0; +} + +int handle_memadd_fault(unsigned long addr, struct cpu_user_regs *regs) +{ + l3_pgentry_t *pl3e; + l3_pgentry_t l3e; + l2_pgentry_t *pl2e; + l2_pgentry_t l2e, idle_l2e; + unsigned long mfn, cr3; + + idle_l2e = idle_pg_table_l2[l2_linear_offset(addr)]; + if (!(l2e_get_flags(idle_l2e) & _PAGE_PRESENT)) + return 0; + + cr3 = read_cr3(); + mfn = cr3 >> PAGE_SHIFT; + + /* + * No need get page type here and validate checking for xen mapping + */ + pl3e = map_domain_page(mfn); + pl3e += (cr3 & 0xFE0UL) >> 3; + l3e = pl3e[3]; + + if ( !(l3e_get_flags(l3e) & _PAGE_PRESENT) ) + return 0; + + mfn = l3e_get_pfn(l3e); + pl2e = map_domain_page(mfn); + + l2e = pl2e[l2_table_offset(addr)]; + + if (l2e_get_flags(l2e) & _PAGE_PRESENT) + return 0; + + memcpy(&pl2e[l2_table_offset(addr)], + &idle_pg_table_l2[l2_linear_offset(addr)], + sizeof(l2_pgentry_t)); + + return EXCRET_fault_fixed; +} + /* * Local variables: * mode: C diff -r fbba70d76aec xen/arch/x86/x86_64/mm.c --- a/xen/arch/x86/x86_64/mm.c Sun Jun 28 02:34:55 2009 +0800 +++ b/xen/arch/x86/x86_64/mm.c Sun Jun 28 02:42:08 2009 +0800 @@ -643,6 +643,76 @@ unsigned int domain_clamp_alloc_bitsize( return min(d->arch.physaddr_bitsize, bits); } +int pagefault_by_memadd(unsigned long addr, struct cpu_user_regs *regs) +{ + struct domain *d = current->domain; +#ifdef CONFIG_COMPAT + if (guest_mode(regs) && + ((addr >= HYPERVISOR_COMPAT_VIRT_START(d)) && + (addr < MACH2PHYS_COMPAT_VIRT_END)) ) + return 1; +#endif + return 0; +} + +int handle_memadd_fault(unsigned long addr, struct cpu_user_regs *regs) +{ +#ifdef CONFIG_COMPAT + struct domain *d = current->domain; + l4_pgentry_t *pl4e; + l4_pgentry_t l4e; + l3_pgentry_t *pl3e; + l3_pgentry_t l3e; + l2_pgentry_t *pl2e; + l2_pgentry_t l2e, idle_l2e; + unsigned long mfn, idle_index; + + if (!is_pv_32on64_domain(d)) + return 0; + + mfn = (read_cr3()) >> PAGE_SHIFT; + + pl4e = map_domain_page(mfn); + + l4e = pl4e[0]; + + if (!(l4e_get_flags(l4e) & _PAGE_PRESENT)) + return 0; + + mfn = l4e_get_pfn(l4e); + /* We don''t need get page type here since it is current CR3 */ + pl3e = map_domain_page(mfn); + + l3e = pl3e[3]; + + if ( !(l3e_get_flags(l3e) & _PAGE_PRESENT) ) + return 0; + + mfn = l3e_get_pfn(l3e); + pl2e = map_domain_page(mfn); + + l2e = pl2e[l2_table_offset(addr)]; + + if (l2e_get_flags(l2e) & _PAGE_PRESENT) + return 0; + + idle_index = (l2_table_offset(addr) - + COMPAT_L2_PAGETABLE_FIRST_XEN_SLOT(d))/ + sizeof(l2_pgentry_t); + idle_l2e = compat_idle_pg_table_l2[idle_index]; + if (!(l2e_get_flags(idle_l2e) & _PAGE_PRESENT)) + return 0; + memcpy(&pl2e[l2_table_offset(addr)], &compat_idle_pg_table_l2[l2_table_offset(addr)], + sizeof(l2_pgentry_t)); + + memcpy(&pl2e[l2_table_offset(addr)], + &compat_idle_pg_table_l2[idle_index], + sizeof(l2_pgentry_t)); + return EXCRET_fault_fixed; +#endif + return 0; +} + #include "compat/mm.c" /* diff -r fbba70d76aec xen/include/asm-x86/mm.h --- a/xen/include/asm-x86/mm.h Sun Jun 28 02:34:55 2009 +0800 +++ b/xen/include/asm-x86/mm.h Sun Jun 28 02:34:57 2009 +0800 @@ -518,6 +518,8 @@ int setup_m2p_table(unsigned long spfn, int setup_m2p_table(unsigned long spfn, unsigned long epfn, int hotplug); unsigned long domain_get_maximum_gpfn(struct domain *d); +int handle_memadd_fault(unsigned long addr, struct cpu_user_regs *regs); +int pagefault_by_memadd(unsigned long addr, struct cpu_user_regs *regs); extern struct domain *dom_xen, *dom_io; /* for vmcoreinfo */ #endif /* __ASM_X86_MM_H__ */ _______________________________________________ Xen-devel mailing list Xen-devel@lists.xensource.com http://lists.xensource.com/xen-devel
Jan Beulich
2009-Jun-29 09:32 UTC
[Xen-devel] Re: [PATCH 5/6] Handler m2p table and frametable fault in page faulthandler
>>> "Jiang, Yunhong" <yunhong.jiang@intel.com> 28.06.09 11:27 >>> >... >+int handle_memadd_fault(unsigned long addr, struct cpu_user_regs *regs) >+{ >+ l3_pgentry_t *pl3e; >+ l3_pgentry_t l3e; >+ l2_pgentry_t *pl2e; >+ l2_pgentry_t l2e, idle_l2e; >+ unsigned long mfn, cr3; >+ >+ idle_l2e = idle_pg_table_l2[l2_linear_offset(addr)]; >+ if (!(l2e_get_flags(idle_l2e) & _PAGE_PRESENT)) >+ return 0; >+ >+ cr3 = read_cr3(); >+ mfn = cr3 >> PAGE_SHIFT; >+ >+ /* >+ * No need get page type here and validate checking for xen mapping >+ */ >+ pl3e = map_domain_page(mfn); >+ pl3e += (cr3 & 0xFE0UL) >> 3; >+ l3e = pl3e[3]; >+ >+ if ( !(l3e_get_flags(l3e) & _PAGE_PRESENT) ) >+ return 0; >+ >+ mfn = l3e_get_pfn(l3e); >+ pl2e = map_domain_page(mfn); >+ >+ l2e = pl2e[l2_table_offset(addr)]; >+ >+ if (l2e_get_flags(l2e) & _PAGE_PRESENT) >+ return 0;You''d also need to check that idle_page_table_l2''s entry has _PAGE_PRESENT set here, otherwise you''d risk live locking the domain on an out-of-current- bounds M2P access. Furthermore, I''m unsure forwarding the page fault in case you found the domain''s L2 entry to already have _PAGE_PRESENT set is a good way to handle things: A racing access on another vCPU of the same guest may just have managed to install that entry. Bottom line is, you probably need to exclusively check idle_page_table_l2 here.>+ >+ memcpy(&pl2e[l2_table_offset(addr)], >+ &idle_pg_table_l2[l2_linear_offset(addr)], >+ sizeof(l2_pgentry_t));Using memcpy for a single page table entry seems odd - why not use a direct assignment? However, perhaps using memcpy() here is the right thing - to avoid future faults, you could update the full M2P related part of the L2 directory in a single step. This ought to be safe, since the M2P table can only be extended, but never shrunk.>+ >+ return EXCRET_fault_fixed; >+}You''re leaking map_domain_page()-es here (and at the earlier return points). (All the comments likewise apply to the subsequent 64-bit variant.) Jan _______________________________________________ Xen-devel mailing list Xen-devel@lists.xensource.com http://lists.xensource.com/xen-devel
Jiang, Yunhong
2009-Jun-30 01:15 UTC
[Xen-devel] RE: [PATCH 5/6] Handler m2p table and frametable fault in page faulthandler
Jan Beulich wrote:>>>> "Jiang, Yunhong" <yunhong.jiang@intel.com> 28.06.09 11:27 >>> >> ... >> +int handle_memadd_fault(unsigned long addr, struct cpu_user_regs >> *regs) +{ + l3_pgentry_t *pl3e; >> + l3_pgentry_t l3e; >> + l2_pgentry_t *pl2e; >> + l2_pgentry_t l2e, idle_l2e; >> + unsigned long mfn, cr3; >> + >> + idle_l2e = idle_pg_table_l2[l2_linear_offset(addr)]; >> + if (!(l2e_get_flags(idle_l2e) & _PAGE_PRESENT)) + return >> 0; + >> + cr3 = read_cr3(); >> + mfn = cr3 >> PAGE_SHIFT; >> + >> + /* >> + * No need get page type here and validate checking for xen >> mapping + */ + pl3e = map_domain_page(mfn); >> + pl3e += (cr3 & 0xFE0UL) >> 3; >> + l3e = pl3e[3]; >> + >> + if ( !(l3e_get_flags(l3e) & _PAGE_PRESENT) ) >> + return 0; >> + >> + mfn = l3e_get_pfn(l3e); >> + pl2e = map_domain_page(mfn); >> + >> + l2e = pl2e[l2_table_offset(addr)]; >> + >> + if (l2e_get_flags(l2e) & _PAGE_PRESENT) >> + return 0; > > You''d also need to check that idle_page_table_l2''s entry has > _PAGE_PRESENT set here, otherwise you''d risk live locking the domain > on an out-of-current- bounds M2P access.Seems I forgot check it in 32bit, (it is checked in 64 bit version). will add that back.> > Furthermore, I''m unsure forwarding the page fault in case you found > the domain''s L2 entry to already have _PAGE_PRESENT set is a good way > to handle things: A racing access on another vCPU of the same > guest may just > have managed to install that entry.Good catch. Considering these mapping should only be set by Xen HV, if the PAGE_PRESENT is set, we can assume it has been handled by other vCPU and return successfully, since only non_present fault will come here. (an ASSERT to check the content of the guest l2e is same as idel_page_table will be enough).> Bottom line is, you probably need to exclusively check > idle_page_table_l2 here. > >> + >> + memcpy(&pl2e[l2_table_offset(addr)], >> + &idle_pg_table_l2[l2_linear_offset(addr)], >> + sizeof(l2_pgentry_t)); > > Using memcpy for a single page table entry seems odd - why not > use a direct > assignment? However, perhaps using memcpy() here is the right > thing - to > avoid future faults, you could update the full M2P related > part of the L2 > directory in a single step. This ought to be safe, since the > M2P table can only > be extended, but never shrunk. > >> + >> + return EXCRET_fault_fixed; >> +} > > You''re leaking map_domain_page()-es here (and at the earlier > return points).:$ , Will add that back.> > (All the comments likewise apply to the subsequent 64-bit variant.) > > Jan_______________________________________________ Xen-devel mailing list Xen-devel@lists.xensource.com http://lists.xensource.com/xen-devel