A little of docs, cleanups of code, and some extra bits to make reporting more pleasing to eyes. arch/x86/xen/enlighten.c | 41 ++++++++++++++++++++++++++++++--- arch/x86/xen/mmu.c | 25 ++++++++++++++------ arch/x86/xen/p2m.c | 19 ++++++++++----- arch/x86/xen/setup.c | 9 ------- arch/x86/xen/xen-head.S | 56 ++++++++++++++++++++++++++++++++++++++++++++- arch/x86/xen/xen-ops.h | 2 +- 6 files changed, 121 insertions(+), 31 deletions(-)
Konrad Rzeszutek Wilk
2012-Jul-23 18:28 UTC
[PATCH 1/5] xen/p2m: Fix the comment describing the P2M tree.
It mixed up the p2m_mid_missing with p2m_missing. Also remove some extra spaces. Signed-off-by: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com> --- arch/x86/xen/p2m.c | 14 +++++++------- 1 files changed, 7 insertions(+), 7 deletions(-) diff --git a/arch/x86/xen/p2m.c b/arch/x86/xen/p2m.c index 64effdc..e4adbfb 100644 --- a/arch/x86/xen/p2m.c +++ b/arch/x86/xen/p2m.c @@ -22,7 +22,7 @@ * * P2M_PER_PAGE depends on the architecture, as a mfn is always * unsigned long (8 bytes on 64-bit, 4 bytes on 32), leading to - * 512 and 1024 entries respectively. + * 512 and 1024 entries respectively. * * In short, these structures contain the Machine Frame Number (MFN) of the PFN. * @@ -139,11 +139,11 @@ * / | ~0, ~0, .... | * | \---------------/ * | - * p2m_missing p2m_missing - * /------------------\ /------------\ - * | [p2m_mid_missing]+---->| ~0, ~0, ~0 | - * | [p2m_mid_missing]+---->| ..., ~0 | - * \------------------/ \------------/ + * p2m_mid_missing p2m_missing + * /-----------------\ /------------\ + * | [p2m_missing] +---->| ~0, ~0, ~0 | + * | [p2m_missing] +---->| ..., ~0 | + * \-----------------/ \------------/ * * where ~0 is INVALID_P2M_ENTRY. IDENTITY is (PFN | IDENTITY_BIT) */ @@ -423,7 +423,7 @@ static void free_p2m_page(void *p) free_page((unsigned long)p); } -/* +/* * Fully allocate the p2m structure for a given pfn. We need to check * that both the top and mid levels are allocated, and make sure the * parallel mfn tree is kept in sync. We may race with other cpus, so -- 1.7.7.6
Konrad Rzeszutek Wilk
2012-Jul-23 18:28 UTC
[PATCH 2/5] xen/mmu: The xen_setup_kernel_pagetable doesn''t need to return anything.
We don''t need to return the new PGD - as we do not use it. Signed-off-by: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com> --- arch/x86/xen/enlighten.c | 5 +---- arch/x86/xen/mmu.c | 10 ++-------- arch/x86/xen/xen-ops.h | 2 +- 3 files changed, 4 insertions(+), 13 deletions(-) diff --git a/arch/x86/xen/enlighten.c b/arch/x86/xen/enlighten.c index ed7d549..ae8a2ab 100644 --- a/arch/x86/xen/enlighten.c +++ b/arch/x86/xen/enlighten.c @@ -1256,7 +1256,6 @@ asmlinkage void __init xen_start_kernel(void) { struct physdev_set_iopl set_iopl; int rc; - pgd_t *pgd; if (!xen_start_info) return; @@ -1348,8 +1347,6 @@ asmlinkage void __init xen_start_kernel(void) acpi_numa = -1; #endif - pgd = (pgd_t *)xen_start_info->pt_base; - /* Don''t do the full vcpu_info placement stuff until we have a possible map and a non-dummy shared_info. */ per_cpu(xen_vcpu, 0) = &HYPERVISOR_shared_info->vcpu_info[0]; @@ -1358,7 +1355,7 @@ asmlinkage void __init xen_start_kernel(void) early_boot_irqs_disabled = true; xen_raw_console_write("mapping kernel into physical memory\n"); - pgd = xen_setup_kernel_pagetable(pgd, xen_start_info->nr_pages); + xen_setup_kernel_pagetable((pgd_t *)xen_start_info->pt_base, xen_start_info->nr_pages); /* Allocate and initialize top and mid mfn levels for p2m structure */ xen_build_mfn_list_list(); diff --git a/arch/x86/xen/mmu.c b/arch/x86/xen/mmu.c index 3a73785..4ac21a4 100644 --- a/arch/x86/xen/mmu.c +++ b/arch/x86/xen/mmu.c @@ -1719,8 +1719,7 @@ static void convert_pfn_mfn(void *v) * of the physical mapping once some sort of allocator has been set * up. */ -pgd_t * __init xen_setup_kernel_pagetable(pgd_t *pgd, - unsigned long max_pfn) +void __init xen_setup_kernel_pagetable(pgd_t *pgd, unsigned long max_pfn) { pud_t *l3; pmd_t *l2; @@ -1781,8 +1780,6 @@ pgd_t * __init xen_setup_kernel_pagetable(pgd_t *pgd, memblock_reserve(__pa(xen_start_info->pt_base), xen_start_info->nr_pt_frames * PAGE_SIZE); - - return pgd; } #else /* !CONFIG_X86_64 */ static RESERVE_BRK_ARRAY(pmd_t, initial_kernel_pmd, PTRS_PER_PMD); @@ -1825,8 +1822,7 @@ static void __init xen_write_cr3_init(unsigned long cr3) pv_mmu_ops.write_cr3 = &xen_write_cr3; } -pgd_t * __init xen_setup_kernel_pagetable(pgd_t *pgd, - unsigned long max_pfn) +void __init xen_setup_kernel_pagetable(pgd_t *pgd, unsigned long max_pfn) { pmd_t *kernel_pmd; @@ -1858,8 +1854,6 @@ pgd_t * __init xen_setup_kernel_pagetable(pgd_t *pgd, memblock_reserve(__pa(xen_start_info->pt_base), xen_start_info->nr_pt_frames * PAGE_SIZE); - - return initial_page_table; } #endif /* CONFIG_X86_64 */ diff --git a/arch/x86/xen/xen-ops.h b/arch/x86/xen/xen-ops.h index 202d4c1..2230f57 100644 --- a/arch/x86/xen/xen-ops.h +++ b/arch/x86/xen/xen-ops.h @@ -27,7 +27,7 @@ void xen_setup_mfn_list_list(void); void xen_setup_shared_info(void); void xen_build_mfn_list_list(void); void xen_setup_machphys_mapping(void); -pgd_t *xen_setup_kernel_pagetable(pgd_t *pgd, unsigned long max_pfn); +void xen_setup_kernel_pagetable(pgd_t *pgd, unsigned long max_pfn); void xen_reserve_top(void); extern unsigned long xen_max_p2m_pfn; -- 1.7.7.6
Konrad Rzeszutek Wilk
2012-Jul-23 18:28 UTC
[PATCH 3/5] xen/mmu: Provide comments describing the _kva and _va aliasing issue
And also explain how the pagetables are being grafted on. Signed-off-by: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com> --- arch/x86/xen/mmu.c | 15 +++++++++++++++ 1 files changed, 15 insertions(+), 0 deletions(-) diff --git a/arch/x86/xen/mmu.c b/arch/x86/xen/mmu.c index 4ac21a4..b0b72e0 100644 --- a/arch/x86/xen/mmu.c +++ b/arch/x86/xen/mmu.c @@ -1734,19 +1734,34 @@ void __init xen_setup_kernel_pagetable(pgd_t *pgd, unsigned long max_pfn) init_level4_pgt[0] = __pgd(0); /* Pre-constructed entries are in pfn, so convert to mfn */ + /* L4[272] -> level3_ident_pgt + * L4[511] -> level3_kernel_pgt */ convert_pfn_mfn(init_level4_pgt); + + /* L3_i[0] -> level2_ident_pgt */ convert_pfn_mfn(level3_ident_pgt); + /* L3_k[510] -> level2_kernel_pgt + * L3_i[511] -> level2_fixmap_pgt */ convert_pfn_mfn(level3_kernel_pgt); + /* We get [511][511] and have Xen''s version of level2_kernel_pgt */ l3 = m2v(pgd[pgd_index(__START_KERNEL_map)].pgd); l2 = m2v(l3[pud_index(__START_KERNEL_map)].pud); + /* Graft it onto L4[272][0]. Note that we creating an aliasing problem: + * Both L4[272][0] and L4[511][511] have entries that point to the same + * L2 (PMD) tables. Meaning that if you modify it in __va space + * it will be also modified in the __kva space! */ memcpy(level2_ident_pgt, l2, sizeof(pmd_t) * PTRS_PER_PMD); + /* Graft it onto L4[511][511] */ memcpy(level2_kernel_pgt, l2, sizeof(pmd_t) * PTRS_PER_PMD); + /* Get [511][510] and graft that in level2_fixmap_pgt */ l3 = m2v(pgd[pgd_index(__START_KERNEL_map + PMD_SIZE)].pgd); l2 = m2v(l3[pud_index(__START_KERNEL_map + PMD_SIZE)].pud); memcpy(level2_fixmap_pgt, l2, sizeof(pmd_t) * PTRS_PER_PMD); + /* Note that we don''t do anything with level1_fixmap_pgt which + * we don''t need. */ /* Set up identity map */ xen_map_identity_early(level2_ident_pgt, max_pfn); -- 1.7.7.6
Konrad Rzeszutek Wilk
2012-Jul-23 18:28 UTC
[PATCH 4/5] xen/x86: Use memblock_reserve for sensitive areas.
instead of a big memblock_reserve. This way we can be more selective in freeing regions (and it also makes it easier to understand where is what). Signed-off-by: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com> --- arch/x86/xen/enlighten.c | 36 ++++++++++++++++++++++++++++++++++++ arch/x86/xen/p2m.c | 5 +++++ arch/x86/xen/setup.c | 9 --------- 3 files changed, 41 insertions(+), 9 deletions(-) diff --git a/arch/x86/xen/enlighten.c b/arch/x86/xen/enlighten.c index ae8a2ab..c986b7f 100644 --- a/arch/x86/xen/enlighten.c +++ b/arch/x86/xen/enlighten.c @@ -998,7 +998,42 @@ static int xen_write_msr_safe(unsigned int msr, unsigned low, unsigned high) return ret; } +static void __init xen_reserve_mfn(unsigned long mfn) +{ + unsigned long pfn; + if (!mfn) + return; + pfn = mfn_to_pfn(mfn); + if (phys_to_machine_mapping_valid(pfn)) + memblock_reserve(PFN_PHYS(pfn), PAGE_SIZE); +} +static void __init xen_reserve_internals(void) +{ + unsigned long size; + + if (!xen_pv_domain()) + return; + + if (xen_feature(XENFEAT_auto_translated_physmap)) + return; + + /* ALIGN up to compensate for the p2m_page pointing to an array that + * can be partially filled (look in xen_build_dynamic_phys_to_machine). + */ + size = PAGE_ALIGN(xen_start_info->nr_pages * sizeof(unsigned long)); + + memblock_reserve(__pa(xen_start_info->mfn_list), size); + + memblock_reserve(__pa(xen_start_info), PAGE_SIZE); + + xen_reserve_mfn(PFN_DOWN(xen_start_info->shared_info)); + xen_reserve_mfn(xen_start_info->store_mfn); + + if (!xen_initial_domain()) + xen_reserve_mfn(xen_start_info->console.domU.mfn); + /* The pagetables are reserved in mmu.c */ +} void xen_setup_shared_info(void) { if (!xen_feature(XENFEAT_auto_translated_physmap)) { @@ -1357,6 +1392,7 @@ asmlinkage void __init xen_start_kernel(void) xen_raw_console_write("mapping kernel into physical memory\n"); xen_setup_kernel_pagetable((pgd_t *)xen_start_info->pt_base, xen_start_info->nr_pages); + xen_reserve_internals(); /* Allocate and initialize top and mid mfn levels for p2m structure */ xen_build_mfn_list_list(); diff --git a/arch/x86/xen/p2m.c b/arch/x86/xen/p2m.c index e4adbfb..4219f9a 100644 --- a/arch/x86/xen/p2m.c +++ b/arch/x86/xen/p2m.c @@ -388,6 +388,11 @@ void __init xen_build_dynamic_phys_to_machine(void) } m2p_override_init(); + + /* NOTE: We cannot call memblock_reserve here for the mfn_list as there + * isn''t enough pieces to make it work (for one - we are still using the + * Xen provided pagetable). So we do it a bit later: + * (xen_reserve_internals).*/ } unsigned long get_phys_to_machine(unsigned long pfn) diff --git a/arch/x86/xen/setup.c b/arch/x86/xen/setup.c index a4790bf..9efca75 100644 --- a/arch/x86/xen/setup.c +++ b/arch/x86/xen/setup.c @@ -424,15 +424,6 @@ char * __init xen_memory_setup(void) e820_add_region(ISA_START_ADDRESS, ISA_END_ADDRESS - ISA_START_ADDRESS, E820_RESERVED); - /* - * Reserve Xen bits: - * - mfn_list - * - xen_start_info - * See comment above "struct start_info" in <xen/interface/xen.h> - */ - memblock_reserve(__pa(xen_start_info->mfn_list), - xen_start_info->pt_base - xen_start_info->mfn_list); - sanitize_e820_map(e820.map, ARRAY_SIZE(e820.map), &e820.nr_map); return "Xen"; -- 1.7.7.6
Konrad Rzeszutek Wilk
2012-Jul-23 18:28 UTC
[PATCH 5/5] xen/perf: Define .glob for the different hypercalls.
This allows us in perf to have this: 99.67% [kernel] [k] xen_hypercall_sched_op 0.11% [kernel] [k] xen_hypercall_xen_version instead of the borring ever-encompassing: 99.13% [kernel] [k] hypercall_page [v2: Use a macro to define the name and skip] [v3: Use balign per Jan''s suggestion] Signed-off-by: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com> --- arch/x86/xen/xen-head.S | 56 +++++++++++++++++++++++++++++++++++++++++++++- 1 files changed, 54 insertions(+), 2 deletions(-) diff --git a/arch/x86/xen/xen-head.S b/arch/x86/xen/xen-head.S index aaa7291..7faed58 100644 --- a/arch/x86/xen/xen-head.S +++ b/arch/x86/xen/xen-head.S @@ -28,9 +28,61 @@ ENTRY(startup_xen) __FINIT .pushsection .text - .align PAGE_SIZE + .balign PAGE_SIZE ENTRY(hypercall_page) - .skip PAGE_SIZE +#define NEXT_HYPERCALL(x) \ + ENTRY(xen_hypercall_##x) \ + .skip 32 + +NEXT_HYPERCALL(set_trap_table) +NEXT_HYPERCALL(mmu_update) +NEXT_HYPERCALL(set_gdt) +NEXT_HYPERCALL(stack_switch) +NEXT_HYPERCALL(set_callbacks) +NEXT_HYPERCALL(fpu_taskswitch) +NEXT_HYPERCALL(sched_op_compat) +NEXT_HYPERCALL(platform_op) +NEXT_HYPERCALL(set_debugreg) +NEXT_HYPERCALL(get_debugreg) +NEXT_HYPERCALL(update_descriptor) +NEXT_HYPERCALL(ni) +NEXT_HYPERCALL(memory_op) +NEXT_HYPERCALL(multicall) +NEXT_HYPERCALL(update_va_mapping) +NEXT_HYPERCALL(set_timer_op) +NEXT_HYPERCALL(event_channel_op_compat) +NEXT_HYPERCALL(xen_version) +NEXT_HYPERCALL(console_io) +NEXT_HYPERCALL(physdev_op_compat) +NEXT_HYPERCALL(grant_table_op) +NEXT_HYPERCALL(vm_assist) +NEXT_HYPERCALL(update_va_mapping_otherdomain) +NEXT_HYPERCALL(iret) +NEXT_HYPERCALL(vcpu_op) +NEXT_HYPERCALL(set_segment_base) +NEXT_HYPERCALL(mmuext_op) +NEXT_HYPERCALL(xsm_op) +NEXT_HYPERCALL(nmi_op) +NEXT_HYPERCALL(sched_op) +NEXT_HYPERCALL(callback_op) +NEXT_HYPERCALL(xenoprof_op) +NEXT_HYPERCALL(event_channel_op) +NEXT_HYPERCALL(physdev_op) +NEXT_HYPERCALL(hvm_op) +NEXT_HYPERCALL(sysctl) +NEXT_HYPERCALL(domctl) +NEXT_HYPERCALL(kexec_op) +NEXT_HYPERCALL(tmem_op) /* 38 */ +ENTRY(xen_hypercall_rsvr) + .skip 320 +NEXT_HYPERCALL(mca) /* 48 */ +NEXT_HYPERCALL(arch_1) +NEXT_HYPERCALL(arch_2) +NEXT_HYPERCALL(arch_3) +NEXT_HYPERCALL(arch_4) +NEXT_HYPERCALL(arch_5) +NEXT_HYPERCALL(arch_6) + .balign PAGE_SIZE .popsection ELFNOTE(Xen, XEN_ELFNOTE_GUEST_OS, .asciz "linux") -- 1.7.7.6
Konrad Rzeszutek Wilk
2012-Jul-24 13:25 UTC
Re: [Xen-devel] [PATCH 4/5] xen/x86: Use memblock_reserve for sensitive areas.
On Mon, Jul 23, 2012 at 02:28:22PM -0400, Konrad Rzeszutek Wilk wrote:> instead of a big memblock_reserve. This way we can be more > selective in freeing regions (and it also makes it easier > to understand where is what). > > Signed-off-by: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com> > --- > arch/x86/xen/enlighten.c | 36 ++++++++++++++++++++++++++++++++++++ > arch/x86/xen/p2m.c | 5 +++++ > arch/x86/xen/setup.c | 9 --------- > 3 files changed, 41 insertions(+), 9 deletions(-) > > diff --git a/arch/x86/xen/enlighten.c b/arch/x86/xen/enlighten.c > index ae8a2ab..c986b7f 100644 > --- a/arch/x86/xen/enlighten.c > +++ b/arch/x86/xen/enlighten.c > @@ -998,7 +998,42 @@ static int xen_write_msr_safe(unsigned int msr, unsigned low, unsigned high) > > return ret; > } > +static void __init xen_reserve_mfn(unsigned long mfn) > +{ > + unsigned long pfn; > > + if (!mfn) > + return; > + pfn = mfn_to_pfn(mfn); > + if (phys_to_machine_mapping_valid(pfn)) > + memblock_reserve(PFN_PHYS(pfn), PAGE_SIZE); > +} > +static void __init xen_reserve_internals(void) > +{ > + unsigned long size; > + > + if (!xen_pv_domain()) > + return; > + > + if (xen_feature(XENFEAT_auto_translated_physmap)) > + return;This needs to be redone. On PV guests that do autotranslation the other areas (xen_start_info for example) are not reserved - which is not good.> + > + /* ALIGN up to compensate for the p2m_page pointing to an array that > + * can be partially filled (look in xen_build_dynamic_phys_to_machine). > + */ > + size = PAGE_ALIGN(xen_start_info->nr_pages * sizeof(unsigned long)); > + > + memblock_reserve(__pa(xen_start_info->mfn_list), size); > + > + memblock_reserve(__pa(xen_start_info), PAGE_SIZE); > + > + xen_reserve_mfn(PFN_DOWN(xen_start_info->shared_info)); > + xen_reserve_mfn(xen_start_info->store_mfn); > + > + if (!xen_initial_domain()) > + xen_reserve_mfn(xen_start_info->console.domU.mfn); > + /* The pagetables are reserved in mmu.c */ > +} > void xen_setup_shared_info(void) > { > if (!xen_feature(XENFEAT_auto_translated_physmap)) { > @@ -1357,6 +1392,7 @@ asmlinkage void __init xen_start_kernel(void) > xen_raw_console_write("mapping kernel into physical memory\n"); > xen_setup_kernel_pagetable((pgd_t *)xen_start_info->pt_base, xen_start_info->nr_pages); > > + xen_reserve_internals(); > /* Allocate and initialize top and mid mfn levels for p2m structure */ > xen_build_mfn_list_list(); > > diff --git a/arch/x86/xen/p2m.c b/arch/x86/xen/p2m.c > index e4adbfb..4219f9a 100644 > --- a/arch/x86/xen/p2m.c > +++ b/arch/x86/xen/p2m.c > @@ -388,6 +388,11 @@ void __init xen_build_dynamic_phys_to_machine(void) > } > > m2p_override_init(); > + > + /* NOTE: We cannot call memblock_reserve here for the mfn_list as there > + * isn''t enough pieces to make it work (for one - we are still using the > + * Xen provided pagetable). So we do it a bit later: > + * (xen_reserve_internals).*/ > } > > unsigned long get_phys_to_machine(unsigned long pfn) > diff --git a/arch/x86/xen/setup.c b/arch/x86/xen/setup.c > index a4790bf..9efca75 100644 > --- a/arch/x86/xen/setup.c > +++ b/arch/x86/xen/setup.c > @@ -424,15 +424,6 @@ char * __init xen_memory_setup(void) > e820_add_region(ISA_START_ADDRESS, ISA_END_ADDRESS - ISA_START_ADDRESS, > E820_RESERVED); > > - /* > - * Reserve Xen bits: > - * - mfn_list > - * - xen_start_info > - * See comment above "struct start_info" in <xen/interface/xen.h> > - */ > - memblock_reserve(__pa(xen_start_info->mfn_list), > - xen_start_info->pt_base - xen_start_info->mfn_list); > - > sanitize_e820_map(e820.map, ARRAY_SIZE(e820.map), &e820.nr_map); > > return "Xen"; > -- > 1.7.7.6 > > > _______________________________________________ > Xen-devel mailing list > Xen-devel@lists.xen.org > http://lists.xen.org/xen-devel