Samuel Thibault
2008-Jan-18 16:05 UTC
[Xen-devel] [PATCH] minios: support COW for a zero page
minios: support COW for a zero page Permits to support sparse data. Signed-off-by: Samuel Thibault <samuel.thibault@eu.citrix.com> diff -r 27ad7ed41be2 extras/mini-os/arch/x86/mm.c --- a/extras/mini-os/arch/x86/mm.c Fri Jan 18 15:55:13 2008 +0000 +++ b/extras/mini-os/arch/x86/mm.c Fri Jan 18 16:02:32 2008 +0000 @@ -50,6 +50,7 @@ #endif unsigned long *phys_to_machine_mapping; +unsigned long mfn_zero; extern char stack[]; extern void page_walk(unsigned long virt_addr); @@ -492,10 +493,13 @@ static void clear_bootstrap(void) static void clear_bootstrap(void) { struct xen_memory_reservation reservation; - xen_pfn_t mfns[] = { virt_to_mfn(0), virt_to_mfn(&shared_info) }; + xen_pfn_t mfns[] = { virt_to_mfn(&shared_info) }; int n = sizeof(mfns)/sizeof(*mfns); pte_t nullpte = { }; + /* Use page 0 as the CoW zero page */ + memset(NULL, 0, PAGE_SIZE); + mfn_zero = pfn_to_mfn(0); if (HYPERVISOR_update_va_mapping(0, nullpte, UVMF_INVLPG)) printk("Unable to unmap page 0\n"); diff -r 27ad7ed41be2 extras/mini-os/arch/x86/traps.c --- a/extras/mini-os/arch/x86/traps.c Fri Jan 18 15:55:13 2008 +0000 +++ b/extras/mini-os/arch/x86/traps.c Fri Jan 18 16:02:32 2008 +0000 @@ -118,6 +118,46 @@ void page_walk(unsigned long virt_addres } +static int handle_cow(unsigned long addr) { + pgentry_t *tab = (pgentry_t *)start_info.pt_base, page; + unsigned long new_page; + int rc; + +#if defined(__x86_64__) + page = tab[l4_table_offset(addr)]; + if (!(page & _PAGE_PRESENT)) + return 0; + tab = pte_to_virt(page); +#endif +#if defined(__x86_64__) || defined(CONFIG_X86_PAE) + page = tab[l3_table_offset(addr)]; + if (!(page & _PAGE_PRESENT)) + return 0; + tab = pte_to_virt(page); +#endif + page = tab[l2_table_offset(addr)]; + if (!(page & _PAGE_PRESENT)) + return 0; + tab = pte_to_virt(page); + + page = tab[l1_table_offset(addr)]; + if (!(page & _PAGE_PRESENT)) + return 0; + /* Only support CoW for the zero page. */ + if (PHYS_PFN(page) != mfn_zero) + return 0; + + new_page = alloc_pages(0); + memset((void*) new_page, 0, PAGE_SIZE); + + rc = HYPERVISOR_update_va_mapping(addr & PAGE_MASK, __pte(virt_to_mach(new_page) | L1_PROT), UVMF_INVLPG); + if (!rc) + return 1; + + printk("Map zero page to %lx failed: %d.\n", addr, rc); + return 0; +} + #define read_cr2() \ (HYPERVISOR_shared_info->vcpu_info[smp_processor_id()].arch.cr2) @@ -126,6 +166,10 @@ void do_page_fault(struct pt_regs *regs, void do_page_fault(struct pt_regs *regs, unsigned long error_code) { unsigned long addr = read_cr2(); + + if ((error_code & TRAP_PF_WRITE) && handle_cow(addr)) + return; + /* If we are already handling a page fault, and got another one that means we faulted in pagetable walk. Continuing here would cause a recursive fault */ diff -r 27ad7ed41be2 extras/mini-os/include/ia64/arch_mm.h --- a/extras/mini-os/include/ia64/arch_mm.h Fri Jan 18 15:55:13 2008 +0000 +++ b/extras/mini-os/include/ia64/arch_mm.h Fri Jan 18 16:02:32 2008 +0000 @@ -37,5 +37,7 @@ #define STACK_SIZE (PAGE_SIZE * (1 << STACK_SIZE_PAGE_ORDER)) #define map_frames(f, n) map_frames_ex(f, n, 1, 0, 1, DOMID_SELF, 0, 0) +/* TODO */ +#define map_zero(n, a) map_frames_ex(NULL, n, 0, 0, a, DOMID_SELF, 0, 0) #endif /* __ARCH_MM_H__ */ diff -r 27ad7ed41be2 extras/mini-os/include/types.h --- a/extras/mini-os/include/types.h Fri Jan 18 15:55:13 2008 +0000 +++ b/extras/mini-os/include/types.h Fri Jan 18 16:02:32 2008 +0000 @@ -57,6 +57,13 @@ typedef struct { unsigned long pte; } pt typedef struct { unsigned long pte; } pte_t; #endif /* __i386__ || __x86_64__ */ +#if !defined(CONFIG_X86_PAE) +#define __pte(x) ((pte_t) { (x) } ) +#else +#define __pte(x) ({ unsigned long long _x = (x); \ + ((pte_t) {(unsigned long)(_x), (unsigned long)(_x>>32)}); }) +#endif + typedef u8 uint8_t; typedef s8 int8_t; typedef u16 uint16_t; diff -r 27ad7ed41be2 extras/mini-os/include/x86/arch_mm.h --- a/extras/mini-os/include/x86/arch_mm.h Fri Jan 18 15:55:13 2008 +0000 +++ b/extras/mini-os/include/x86/arch_mm.h Fri Jan 18 16:02:32 2008 +0000 @@ -144,12 +144,14 @@ typedef unsigned long pgentry_t; #if defined(__i386__) #define L1_PROT (_PAGE_PRESENT|_PAGE_RW|_PAGE_ACCESSED) +#define L1_PROT_RO (_PAGE_PRESENT|_PAGE_ACCESSED) #define L2_PROT (_PAGE_PRESENT|_PAGE_RW|_PAGE_ACCESSED|_PAGE_DIRTY |_PAGE_USER) #if defined(CONFIG_X86_PAE) #define L3_PROT (_PAGE_PRESENT) #endif /* CONFIG_X86_PAE */ #elif defined(__x86_64__) #define L1_PROT (_PAGE_PRESENT|_PAGE_RW|_PAGE_ACCESSED|_PAGE_USER) +#define L1_PROT_RO (_PAGE_PRESENT|_PAGE_ACCESSED|_PAGE_USER) #define L2_PROT (_PAGE_PRESENT|_PAGE_RW|_PAGE_ACCESSED|_PAGE_DIRTY|_PAGE_USER) #define L3_PROT (_PAGE_PRESENT|_PAGE_RW|_PAGE_ACCESSED|_PAGE_DIRTY|_PAGE_USER) #define L4_PROT (_PAGE_PRESENT|_PAGE_RW|_PAGE_ACCESSED|_PAGE_DIRTY|_PAGE_USER) @@ -190,6 +192,7 @@ typedef unsigned long maddr_t; extern unsigned long *phys_to_machine_mapping; extern char _text, _etext, _erodata, _edata, _end; +extern unsigned long mfn_zero; #define pfn_to_mfn(_pfn) (phys_to_machine_mapping[(_pfn)]) static __inline__ maddr_t phys_to_machine(paddr_t phys) { @@ -224,5 +227,6 @@ static __inline__ paddr_t machine_to_phy #define pte_to_virt(_pte) to_virt(mfn_to_pfn(pte_to_mfn(_pte)) << PAGE_SHIFT) #define map_frames(f, n) map_frames_ex(f, n, 1, 0, 1, DOMID_SELF, 0, L1_PROT) +#define map_zero(n, a) map_frames_ex(&mfn_zero, n, 0, 0, a, DOMID_SELF, 0, L1_PROT_RO) #endif /* _ARCH_MM_H_ */ diff -r 27ad7ed41be2 extras/mini-os/include/x86/traps.h --- a/extras/mini-os/include/x86/traps.h Fri Jan 18 15:55:13 2008 +0000 +++ b/extras/mini-os/include/x86/traps.h Fri Jan 18 16:02:32 2008 +0000 @@ -70,4 +70,8 @@ struct pt_regs { void dump_regs(struct pt_regs *regs); +#define TRAP_PF_PROT 0x1 +#define TRAP_PF_WRITE 0x2 +#define TRAP_PF_USER 0x4 + #endif /* _TRAPS_H_ */ _______________________________________________ Xen-devel mailing list Xen-devel@lists.xensource.com http://lists.xensource.com/xen-devel