Attilio Rao
2012-Aug-10 14:17 UTC
[PATCH 0/2] Document pagetable_reserve PVOP and enforce a better semantic
When looking for documenting the pagetable_reserve PVOP, I realized that it assumes start == pgt_buf_start. I think this is not semantically right (even if with the current code this should not be a problem in practice) and what we really want is to extend the logic in order to do the RO -> RW convertion also for the range [pgt_buf_start, start). This patch then implements this missing conversion, adding some smaller cleanups and finally provides documentation for the PVOP. Please look at 2/2 for more details on how the comment is structured. If we get this right we will have a reference to be used later on for others PVOPs. A preliminary version of this patch has been already reviewed by Stefano Stabellini. Attilio Rao (2): XEN, X86: Improve semantic support for pagetable_reserve PVOP Document the semantic of the pagetable_reserve PVOP arch/x86/include/asm/x86_init.h | 19 +++++++++++++++++-- arch/x86/mm/init.c | 4 ++++ arch/x86/xen/mmu.c | 22 ++++++++++++++++++++-- 3 files changed, 41 insertions(+), 4 deletions(-) -- 1.7.2.5
Attilio Rao
2012-Aug-10 14:17 UTC
[PATCH 1/2] XEN, X86: Improve semantic support for pagetable_reserve PVOP
- Allow xen_mapping_pagetable_reserve() to handle a start different from pgt_buf_start, but still bigger than it. - Add checks to xen_mapping_pagetable_reserve() and native_pagetable_reserve() for verifying start and end are contained in the range [pgt_buf_start, pgt_buf_top]. - In xen_mapping_pagetable_reserve(), change printk into pr_debug. - In xen_mapping_pagetable_reserve(), print out diagnostic only if there is an actual need to do that (or, in other words, if there are actually some pages going to switch from RO to RW). Signed-off-by: Attilio Rao <attilio.rao@citrix.com> --- arch/x86/mm/init.c | 4 ++++ arch/x86/xen/mmu.c | 22 ++++++++++++++++++++-- 2 files changed, 24 insertions(+), 2 deletions(-) diff --git a/arch/x86/mm/init.c b/arch/x86/mm/init.c index e0e6990..c5849b6 100644 --- a/arch/x86/mm/init.c +++ b/arch/x86/mm/init.c @@ -92,6 +92,10 @@ static void __init find_early_table_space(struct map_range *mr, unsigned long en void __init native_pagetable_reserve(u64 start, u64 end) { + if (start < PFN_PHYS(pgt_buf_start) || end > PFN_PHYS(pgt_buf_top)) + panic("Invalid address range: [%llu - %llu] should be a subset of [%llu - %llu]\n" + start, end, PFN_PHYS(pgt_buf_start), + PFN_PHYS(pgt_buf_top)); memblock_reserve(start, end - start); } diff --git a/arch/x86/xen/mmu.c b/arch/x86/xen/mmu.c index b65a761..8d943e0a 100644 --- a/arch/x86/xen/mmu.c +++ b/arch/x86/xen/mmu.c @@ -1180,12 +1180,30 @@ static void __init xen_pagetable_setup_start(pgd_t *base) static __init void xen_mapping_pagetable_reserve(u64 start, u64 end) { + u64 sh_start; + + sh_start = PFN_PHYS(pgt_buf_start); + + if (start < sh_start || end > PFN_PHYS(pgt_buf_top)) + panic("Invalid address range: [%llu - %llu] should be a subset of [%llu - %llu]\n" + start, end, sh_start, PFN_PHYS(pgt_buf_top)); + + /* set RW the initial range */ + if (start != sh_start) + pr_debug("xen: setting RW the range %llx - %llx\n", + sh_start, start); + while (sh_start < start) { + make_lowmem_page_readwrite(__va(sh_start)); + sh_start += PAGE_SIZE; + } + /* reserve the range used */ native_pagetable_reserve(start, end); /* set as RW the rest */ - printk(KERN_DEBUG "xen: setting RW the range %llx - %llx\n", end, - PFN_PHYS(pgt_buf_top)); + if (end != PFN_PHYS(pgt_buf_top)) + pr_debug("xen: setting RW the range %llx - %llx\n", + end, PFN_PHYS(pgt_buf_top)); while (end < PFN_PHYS(pgt_buf_top)) { make_lowmem_page_readwrite(__va(end)); end += PAGE_SIZE; -- 1.7.2.5
Attilio Rao
2012-Aug-10 14:17 UTC
[PATCH 2/2] Document the semantic of the pagetable_reserve PVOP
The informations added on the hook are: - Native behaviour - Xen specific behaviour - Logic behind the Xen specific behaviour - PVOP semantic Signed-off-by: Attilio Rao <attilio.rao@citrix.com> --- arch/x86/include/asm/x86_init.h | 19 +++++++++++++++++-- 1 files changed, 17 insertions(+), 2 deletions(-) diff --git a/arch/x86/include/asm/x86_init.h b/arch/x86/include/asm/x86_init.h index 38155f6..b22093c 100644 --- a/arch/x86/include/asm/x86_init.h +++ b/arch/x86/include/asm/x86_init.h @@ -72,8 +72,23 @@ struct x86_init_oem { * struct x86_init_mapping - platform specific initial kernel pagetable setup * @pagetable_reserve: reserve a range of addresses for kernel pagetable usage * - * For more details on the purpose of this hook, look in - * init_memory_mapping and the commit that added it. + * It does reserve a range of pages, to be used as pagetable pages. + * The start and end parameters are expected to be contained in the + * [pgt_buf_start, pgt_buf_top] range. + * The native implementation reserves the pages via the memblock_reserve() + * interface. + * The Xen implementation, besides reserving the range via memblock_reserve(), + * also sets RW the remaining pages contained in the ranges + * [pgt_buf_start, start) and [end, pgt_buf_top). + * This is needed because the range [pgt_buf_start, pgt_buf_top] was + * previously mapped read-only by xen_set_pte_init: when running + * on Xen all the pagetable pages need to be mapped read-only in order to + * avoid protection faults from the hypervisor. However, once the correct + * amount of pages is reserved for the pagetables, all the others contained + * in the range must be set to RW so that they can be correctly recycled by + * the VM subsystem. + * This operation is meant to be performed only during init_memory_mapping(), + * just after space for the kernel direct mapping tables is found. */ struct x86_init_mapping { void (*pagetable_reserve)(u64 start, u64 end); -- 1.7.2.5
Konrad Rzeszutek Wilk
2012-Aug-13 20:42 UTC
Re: [PATCH 0/2] Document pagetable_reserve PVOP and enforce a better semantic
On Fri, Aug 10, 2012 at 03:17:05PM +0100, Attilio Rao wrote:> When looking for documenting the pagetable_reserve PVOP, I realized that it > assumes start == pgt_buf_start. I think this is not semantically right > (even if with the current code this should not be a problem in practice) and > what we really want is to extend the logic in order to do the RO -> RW > convertion also for the range [pgt_buf_start, start). > This patch then implements this missing conversion, adding some smaller > cleanups and finally provides documentation for the PVOP. > Please look at 2/2 for more details on how the comment is structured. > If we get this right we will have a reference to be used later on for others > PVOPs. > A preliminary version of this patch has been already reviewed by > Stefano Stabellini. > > Attilio Rao (2): > XEN, X86: Improve semantic support for pagetable_reserve PVOP > Document the semantic of the pagetable_reserve PVOPThe titles need a prefix, like ''x86'' or ''xen/x86''. s/PVOP/PVOPS/> > arch/x86/include/asm/x86_init.h | 19 +++++++++++++++++-- > arch/x86/mm/init.c | 4 ++++ > arch/x86/xen/mmu.c | 22 ++++++++++++++++++++-- > 3 files changed, 41 insertions(+), 4 deletions(-) > > -- > 1.7.2.5
Konrad Rzeszutek Wilk
2012-Aug-13 20:43 UTC
Re: [PATCH 1/2] XEN, X86: Improve semantic support for pagetable_reserve PVOP
On Fri, Aug 10, 2012 at 03:17:06PM +0100, Attilio Rao wrote:> - Allow xen_mapping_pagetable_reserve() to handle a start different from > pgt_buf_start, but still bigger than it. > - Add checks to xen_mapping_pagetable_reserve() and native_pagetable_reserve() > for verifying start and end are contained in the range > [pgt_buf_start, pgt_buf_top]. > - In xen_mapping_pagetable_reserve(), change printk into pr_debug. > - In xen_mapping_pagetable_reserve(), print out diagnostic only if there is > an actual need to do that (or, in other words, if there are actually some > pages going to switch from RO to RW). > > Signed-off-by: Attilio Rao <attilio.rao@citrix.com> > --- > arch/x86/mm/init.c | 4 ++++ > arch/x86/xen/mmu.c | 22 ++++++++++++++++++++-- > 2 files changed, 24 insertions(+), 2 deletions(-) > > diff --git a/arch/x86/mm/init.c b/arch/x86/mm/init.c > index e0e6990..c5849b6 100644 > --- a/arch/x86/mm/init.c > +++ b/arch/x86/mm/init.c > @@ -92,6 +92,10 @@ static void __init find_early_table_space(struct map_range *mr, unsigned long en > > void __init native_pagetable_reserve(u64 start, u64 end) > { > + if (start < PFN_PHYS(pgt_buf_start) || end > PFN_PHYS(pgt_buf_top)) > + panic("Invalid address range: [%llu - %llu] should be a subset of [%llu - %llu]\n" > + start, end, PFN_PHYS(pgt_buf_start), > + PFN_PHYS(pgt_buf_top)); > memblock_reserve(start, end - start); > } > > diff --git a/arch/x86/xen/mmu.c b/arch/x86/xen/mmu.c > index b65a761..8d943e0a 100644 > --- a/arch/x86/xen/mmu.c > +++ b/arch/x86/xen/mmu.c > @@ -1180,12 +1180,30 @@ static void __init xen_pagetable_setup_start(pgd_t *base) > > static __init void xen_mapping_pagetable_reserve(u64 start, u64 end) > { > + u64 sh_start;sh... shared? shell? Can it be just ''begin'' or ''pgt_buf_start_phys'' ? Or ''start_phys'' ? Perhaps ''orig_start'' ? ''early_start''? ''_start''?> + > + sh_start = PFN_PHYS(pgt_buf_start); > + > + if (start < sh_start || end > PFN_PHYS(pgt_buf_top)) > + panic("Invalid address range: [%llu - %llu] should be a subset of [%llu - %llu]\n" > + start, end, sh_start, PFN_PHYS(pgt_buf_top)); > + > + /* set RW the initial range */ > + if (start != sh_start) > + pr_debug("xen: setting RW the range %llx - %llx\n", > + sh_start, start); > + while (sh_start < start) { > + make_lowmem_page_readwrite(__va(sh_start)); > + sh_start += PAGE_SIZE; > + } > + > /* reserve the range used */ > native_pagetable_reserve(start, end); > > /* set as RW the rest */ > - printk(KERN_DEBUG "xen: setting RW the range %llx - %llx\n", end, > - PFN_PHYS(pgt_buf_top)); > + if (end != PFN_PHYS(pgt_buf_top)) > + pr_debug("xen: setting RW the range %llx - %llx\n", > + end, PFN_PHYS(pgt_buf_top)); > while (end < PFN_PHYS(pgt_buf_top)) { > make_lowmem_page_readwrite(__va(end)); > end += PAGE_SIZE; > -- > 1.7.2.5