thr3ads.net - Xen devel - [RFC PATCH 3/8]: PVH: memory manager and paging related changes [Aug 2012]

If this information is useful, please help other people find it:
Share via:

Mukesh Rathor

2012-Aug-16 01:02 UTC

[RFC PATCH 3/8]: PVH: memory manager and paging related changes

---
 arch/x86/xen/mmu.c              |  179 ++++++++++++++++++++++++++++++++++++---
 arch/x86/xen/mmu.h              |    2 +
 include/xen/interface/memory.h  |   27 ++++++-
 include/xen/interface/physdev.h |   10 ++
 include/xen/xen-ops.h           |    7 ++
 5 files changed, 211 insertions(+), 14 deletions(-)

diff --git a/arch/x86/xen/mmu.c b/arch/x86/xen/mmu.c
index b65a761..44a6477 100644
--- a/arch/x86/xen/mmu.c
+++ b/arch/x86/xen/mmu.c
@@ -330,6 +330,38 @@ static void xen_set_pte(pte_t *ptep, pte_t pteval)
 	__xen_set_pte(ptep, pteval);
 }
 
+/* This for PV guest in hvm container */
+void xen_set_clr_mmio_pvh_pte(unsigned long pfn, unsigned long mfn,
+			      int nr_mfns, int add_mapping)
+{
+	int rc;
+	struct physdev_map_iomem iomem;
+
+	iomem.first_gfn = pfn;
+	iomem.first_mfn = mfn;
+	iomem.nr_mfns = nr_mfns;
+	iomem.add_mapping = add_mapping;
+
+	rc = HYPERVISOR_physdev_op(PHYSDEVOP_pvh_map_iomem, &iomem);
+	BUG_ON(rc);
+}
+
+/* This for PV guest in hvm container.
+ * We need this because during boot early_ioremap path eventually calls
+ * set_pte that maps io space. Also, ACPI pages are not mapped into to the
+ * EPT during dom0 creation. The pages are mapped initially here from
+ * kernel_physical_mapping_init() then later the memtype is changed.  */
+static void xen_dom0pvh_set_pte(pte_t *ptep, pte_t pteval)
+{
+	native_set_pte(ptep, pteval);
+}
+
+static void xen_dom0pvh_set_pte_at(struct mm_struct *mm, unsigned long addr,
+				   pte_t *ptep, pte_t pteval)
+{
+	native_set_pte(ptep, pteval);
+}
+
 static void xen_set_pte_at(struct mm_struct *mm, unsigned long addr,
 		    pte_t *ptep, pte_t pteval)
 {
@@ -1197,6 +1229,10 @@ static void xen_post_allocator_init(void);
 static void __init xen_pagetable_setup_done(pgd_t *base)
 {
 	xen_setup_shared_info();
+
+	if (xen_pvh_domain())
+		return;
+
 	xen_post_allocator_init();
 }
 
@@ -1652,6 +1688,10 @@ static void set_page_prot(void *addr, pgprot_t prot)
 	unsigned long pfn = __pa(addr) >> PAGE_SHIFT;
 	pte_t pte = pfn_pte(pfn, prot);
 
+	/* for PVH, page tables are native. */
+	if (xen_pvh_domain())
+		return;
+
 	if (HYPERVISOR_update_va_mapping((unsigned long)addr, pte, 0))
 		BUG();
 }
@@ -1745,6 +1785,7 @@ static void convert_pfn_mfn(void *v)
  * but that''s enough to get __va working.  We need to fill in the rest
  * of the physical mapping once some sort of allocator has been set
  * up.
+ * NOTE: for PVH, the page tables are native with HAP required.
  */
 pgd_t * __init xen_setup_kernel_pagetable(pgd_t *pgd,
 					 unsigned long max_pfn)
@@ -1761,10 +1802,12 @@ pgd_t * __init xen_setup_kernel_pagetable(pgd_t *pgd,
 	/* Zap identity mapping */
 	init_level4_pgt[0] = __pgd(0);
 
-	/* Pre-constructed entries are in pfn, so convert to mfn */
-	convert_pfn_mfn(init_level4_pgt);
-	convert_pfn_mfn(level3_ident_pgt);
-	convert_pfn_mfn(level3_kernel_pgt);
+	if (!xen_pvh_domain()) {
+		/* Pre-constructed entries are in pfn, so convert to mfn */
+		convert_pfn_mfn(init_level4_pgt);
+		convert_pfn_mfn(level3_ident_pgt);
+		convert_pfn_mfn(level3_kernel_pgt);
+	}
 
 	l3 = m2v(pgd[pgd_index(__START_KERNEL_map)].pgd);
 	l2 = m2v(l3[pud_index(__START_KERNEL_map)].pud);
@@ -1787,12 +1830,14 @@ pgd_t * __init xen_setup_kernel_pagetable(pgd_t *pgd,
 	set_page_prot(level2_kernel_pgt, PAGE_KERNEL_RO);
 	set_page_prot(level2_fixmap_pgt, PAGE_KERNEL_RO);
 
-	/* Pin down new L4 */
-	pin_pagetable_pfn(MMUEXT_PIN_L4_TABLE,
-			  PFN_DOWN(__pa_symbol(init_level4_pgt)));
+	if (!xen_pvh_domain()) {
+		/* Pin down new L4 */
+		pin_pagetable_pfn(MMUEXT_PIN_L4_TABLE,
+				PFN_DOWN(__pa_symbol(init_level4_pgt)));
 
-	/* Unpin Xen-provided one */
-	pin_pagetable_pfn(MMUEXT_UNPIN_TABLE, PFN_DOWN(__pa(pgd)));
+		/* Unpin Xen-provided one */
+		pin_pagetable_pfn(MMUEXT_UNPIN_TABLE, PFN_DOWN(__pa(pgd)));
+	}
 
 	/* Switch over */
 	pgd = init_level4_pgt;
@@ -1802,9 +1847,13 @@ pgd_t * __init xen_setup_kernel_pagetable(pgd_t *pgd,
 	 * structure to attach it to, so make sure we just set kernel
 	 * pgd.
 	 */
-	xen_mc_batch();
-	__xen_write_cr3(true, __pa(pgd));
-	xen_mc_issue(PARAVIRT_LAZY_CPU);
+	if (xen_pvh_domain()) {
+		native_write_cr3(__pa(pgd));
+	} else {
+		xen_mc_batch();
+		__xen_write_cr3(true, __pa(pgd));
+		xen_mc_issue(PARAVIRT_LAZY_CPU);
+	}
 
 	memblock_reserve(__pa(xen_start_info->pt_base),
 			 xen_start_info->nr_pt_frames * PAGE_SIZE);
@@ -2067,9 +2116,21 @@ static const struct pv_mmu_ops xen_mmu_ops __initconst =
{
 
 void __init xen_init_mmu_ops(void)
 {
+	x86_init.paging.pagetable_setup_done = xen_pagetable_setup_done;
+
+	if (xen_pvh_domain()) {
+		pv_mmu_ops.flush_tlb_others = xen_flush_tlb_others;
+
+		/* set_pte* for PCI devices to map iomem. */
+		if (xen_initial_domain()) {
+			pv_mmu_ops.set_pte = xen_dom0pvh_set_pte;
+			pv_mmu_ops.set_pte_at = xen_dom0pvh_set_pte_at;
+		}
+		return;
+	}
+
 	x86_init.mapping.pagetable_reserve = xen_mapping_pagetable_reserve;
 	x86_init.paging.pagetable_setup_start = xen_pagetable_setup_start;
-	x86_init.paging.pagetable_setup_done = xen_pagetable_setup_done;
 	pv_mmu_ops = xen_mmu_ops;
 
 	memset(dummy_mapping, 0xff, PAGE_SIZE);
@@ -2305,6 +2366,93 @@ void __init xen_hvm_init_mmu_ops(void)
 }
 #endif
 
+/* Map foreign gmfn, fgmfn, to local pfn, lpfn. This for the user space
+ * creating new guest on PVH dom0 and needs to map domU pages. Called from
+ * exported function, so no need to export this.
+ */
+static int pvh_add_to_xen_p2m(unsigned long lpfn, unsigned long fgmfn,
+			      unsigned int domid)
+{
+	int rc;
+	struct xen_add_to_physmap pmb = {.foreign_domid = domid};
+
+	pmb.gpfn = lpfn;
+	pmb.idx = fgmfn;
+	pmb.space = XENMAPSPACE_gmfn_foreign;
+	rc = HYPERVISOR_memory_op(XENMEM_add_to_physmap, &pmb);
+	if (rc) {
+		pr_warn("Failed to map pfn to mfn rc:%d pfn:%lx mfn:%lx\n",
+			rc, lpfn, fgmfn);
+		return 1;
+	}
+	return 0;
+}
+
+/* Unmap an entry from xen p2m table */
+int pvh_rem_xen_p2m(unsigned long spfn, int count)
+{
+	struct xen_remove_from_physmap xrp;
+	int i, rc;
+
+	for (i=0; i < count; i++) {
+		xrp.domid = DOMID_SELF;
+		xrp.gpfn = spfn+i;
+		rc = HYPERVISOR_memory_op(XENMEM_remove_from_physmap, &xrp);
+		if (rc) {
+			pr_warn("Failed to unmap pfn:%lx rc:%d done:%d\n",
+				spfn+i, rc, i);
+			return 1;
+		}
+	}
+	return 0;
+}
+EXPORT_SYMBOL_GPL(pvh_rem_xen_p2m);
+
+struct pvh_remap_data {
+	unsigned long fgmfn;		/* foreign domain''s gmfn */
+	pgprot_t prot;
+	domid_t  domid;
+	struct vm_area_struct *vma;
+};
+
+static int pvh_map_pte_fn(pte_t *ptep, pgtable_t token, unsigned long addr,
+			void *data)
+{
+	struct pvh_remap_data *pvhp = data;
+	struct xen_pvh_sav_pfn_info *savp = pvhp->vma->vm_private_data;
+	unsigned long pfn = page_to_pfn(savp->sp_paga[savp->sp_next_todo++]);
+	pte_t pteval = pte_mkspecial(pfn_pte(pfn, pvhp->prot));
+
+	native_set_pte(ptep, pteval);
+	if (pvh_add_to_xen_p2m(pfn, pvhp->fgmfn, pvhp->domid))
+		return -EFAULT;
+
+	return 0;
+}
+
+/* The only caller at moment passes one gmfn at a time.
+ * PVH TBD/FIXME: expand this in future to honor batch requests.
+ */
+static int pvh_remap_gmfn_range(struct vm_area_struct *vma,
+				unsigned long addr, unsigned long mfn, int nr,
+				pgprot_t prot, unsigned domid)
+{
+	int err;
+	struct pvh_remap_data pvhdata;
+
+	if (nr > 1)
+		return -EINVAL;
+
+	pvhdata.fgmfn = mfn;
+	pvhdata.prot = prot;
+	pvhdata.domid = domid;
+	pvhdata.vma = vma;
+	err = apply_to_page_range(vma->vm_mm, addr, nr << PAGE_SHIFT,
+				  pvh_map_pte_fn, &pvhdata);
+	flush_tlb_all();
+	return err;
+}
+
 #define REMAP_BATCH_SIZE 16
 
 struct remap_data {
@@ -2342,6 +2490,11 @@ int xen_remap_domain_mfn_range(struct vm_area_struct
*vma,
 	BUG_ON(!((vma->vm_flags & (VM_PFNMAP | VM_RESERVED | VM_IO)) = 			
(VM_PFNMAP | VM_RESERVED | VM_IO)));
 
+	if (xen_pvh_domain()) {
+		/* We need to update the local page tables and the xen HAP */
+		return pvh_remap_gmfn_range(vma, addr, mfn, nr, prot, domid);
+	}
+
 	rmd.mfn = mfn;
 	rmd.prot = prot;
 
diff --git a/arch/x86/xen/mmu.h b/arch/x86/xen/mmu.h
index 73809bb..6d0bb56 100644
--- a/arch/x86/xen/mmu.h
+++ b/arch/x86/xen/mmu.h
@@ -23,4 +23,6 @@ unsigned long xen_read_cr2_direct(void);
 
 extern void xen_init_mmu_ops(void);
 extern void xen_hvm_init_mmu_ops(void);
+extern void xen_set_clr_mmio_pvh_pte(unsigned long pfn, unsigned long mfn,
+				     int nr_mfns, int add_mapping);
 #endif	/* _XEN_MMU_H */
diff --git a/include/xen/interface/memory.h b/include/xen/interface/memory.h
index eac3ce1..1b213b1 100644
--- a/include/xen/interface/memory.h
+++ b/include/xen/interface/memory.h
@@ -163,10 +163,19 @@ struct xen_add_to_physmap {
     /* Which domain to change the mapping for. */
     domid_t domid;
 
+    /* Number of pages to go through for gmfn_range */
+    uint16_t    size;
+
     /* Source mapping space. */
 #define XENMAPSPACE_shared_info 0 /* shared info page */
 #define XENMAPSPACE_grant_table 1 /* grant table page */
-    unsigned int space;
+#define XENMAPSPACE_gmfn        2 /* GMFN */
+#define XENMAPSPACE_gmfn_range  3 /* GMFN range */
+#define XENMAPSPACE_gmfn_foreign 4 /* GMFN from another guest */
+    uint16_t space;
+    domid_t foreign_domid;         /* IFF XENMAPSPACE_gmfn_foreign */
+
+#define XENMAPIDX_grant_table_status 0x80000000
 
     /* Index into source mapping space. */
     unsigned long idx;
@@ -234,4 +243,20 @@ DEFINE_GUEST_HANDLE_STRUCT(xen_memory_map);
  * during a driver critical region.
  */
 extern spinlock_t xen_reservation_lock;
+
+/*
+ * Unmaps the page appearing at a particular GPFN from the specified
guest''s
+ * pseudophysical address space.
+ * arg == addr of xen_remove_from_physmap_t.
+ */
+#define XENMEM_remove_from_physmap      15
+struct xen_remove_from_physmap {
+    /* Which domain to change the mapping for. */
+    domid_t domid;
+
+    /* GPFN of the current mapping of the page. */
+    unsigned long     gpfn;
+};
+DEFINE_GUEST_HANDLE_STRUCT(xen_remove_from_physmap);
+
 #endif /* __XEN_PUBLIC_MEMORY_H__ */
diff --git a/include/xen/interface/physdev.h b/include/xen/interface/physdev.h
index 9ce788d..80f792e 100644
--- a/include/xen/interface/physdev.h
+++ b/include/xen/interface/physdev.h
@@ -258,6 +258,16 @@ struct physdev_pci_device {
     uint8_t devfn;
 };
 
+#define PHYSDEVOP_pvh_map_iomem        29
+struct physdev_map_iomem {
+    /* IN */
+    uint64_t first_gfn;
+    uint64_t first_mfn;
+    uint32_t nr_mfns;
+    uint32_t add_mapping;        /* 1 == add mapping;  0 == unmap */
+
+};
+
 /*
  * Notify that some PIRQ-bound event channels have been unmasked.
  * ** This command is obsolete since interface version 0x00030202 and is **
diff --git a/include/xen/xen-ops.h b/include/xen/xen-ops.h
index 6a198e4..fa595e1 100644
--- a/include/xen/xen-ops.h
+++ b/include/xen/xen-ops.h
@@ -29,4 +29,11 @@ int xen_remap_domain_mfn_range(struct vm_area_struct *vma,
 			       unsigned long mfn, int nr,
 			       pgprot_t prot, unsigned domid);
 
+struct xen_pvh_sav_pfn_info {
+	struct page **sp_paga;	/* save pfn (info) page array */
+	int sp_num_pgs;
+	int sp_next_todo;
+};
+extern int pvh_rem_xen_p2m(unsigned long spfn, int count);
+
 #endif /* INCLUDE_XEN_OPS_H */
-- 
1.7.2.3

Stefano Stabellini

2012-Aug-16 14:10 UTC

head link

Re: [RFC PATCH 3/8]: PVH: memory manager and paging related changes

On Thu, 16 Aug 2012, Mukesh Rathor wrote:>  arch/x86/xen/mmu.c              |  179
++++++++++++++++++++++++++++++++++++---
>  arch/x86/xen/mmu.h              |    2 +
>  include/xen/interface/memory.h  |   27 ++++++-
>  include/xen/interface/physdev.h |   10 ++
>  include/xen/xen-ops.h           |    7 ++
>  5 files changed, 211 insertions(+), 14 deletions(-)
> 
> diff --git a/arch/x86/xen/mmu.c b/arch/x86/xen/mmu.c
> index b65a761..44a6477 100644
> --- a/arch/x86/xen/mmu.c
> +++ b/arch/x86/xen/mmu.c
> @@ -330,6 +330,38 @@ static void xen_set_pte(pte_t *ptep, pte_t pteval)
>         __xen_set_pte(ptep, pteval);
>  }
> 
> +/* This for PV guest in hvm container */
> +void xen_set_clr_mmio_pvh_pte(unsigned long pfn, unsigned long mfn,
> +                             int nr_mfns, int add_mapping)
> +{
> +       int rc;
> +       struct physdev_map_iomem iomem;
> +
> +       iomem.first_gfn = pfn;
> +       iomem.first_mfn = mfn;
> +       iomem.nr_mfns = nr_mfns;
> +       iomem.add_mapping = add_mapping;
> +
> +       rc = HYPERVISOR_physdev_op(PHYSDEVOP_pvh_map_iomem, &iomem);
> +       BUG_ON(rc);
> +}
> +
> +/* This for PV guest in hvm container.
> + * We need this because during boot early_ioremap path eventually calls
> + * set_pte that maps io space. Also, ACPI pages are not mapped into to the
> + * EPT during dom0 creation. The pages are mapped initially here from
> + * kernel_physical_mapping_init() then later the memtype is changed.  */
> +static void xen_dom0pvh_set_pte(pte_t *ptep, pte_t pteval)
> +{
> +       native_set_pte(ptep, pteval);
> +}
> +
> +static void xen_dom0pvh_set_pte_at(struct mm_struct *mm, unsigned long
addr,
> +                                  pte_t *ptep, pte_t pteval)
> +{
> +       native_set_pte(ptep, pteval);
> +}
> +
>  static void xen_set_pte_at(struct mm_struct *mm, unsigned long addr,
>                     pte_t *ptep, pte_t pteval)
>  {
> @@ -1197,6 +1229,10 @@ static void xen_post_allocator_init(void);
>  static void __init xen_pagetable_setup_done(pgd_t *base)
>  {
>         xen_setup_shared_info();
> +
> +       if (xen_pvh_domain())
> +               return;
> +
>         xen_post_allocator_init();
>  }
> 
> @@ -1652,6 +1688,10 @@ static void set_page_prot(void *addr, pgprot_t prot)
>         unsigned long pfn = __pa(addr) >> PAGE_SHIFT;
>         pte_t pte = pfn_pte(pfn, prot);
> 
> +       /* for PVH, page tables are native. */
> +       if (xen_pvh_domain())
> +               return;
> +
>         if (HYPERVISOR_update_va_mapping((unsigned long)addr, pte, 0))
>                 BUG();
>  }
> @@ -1745,6 +1785,7 @@ static void convert_pfn_mfn(void *v)
>   * but that''s enough to get __va working.  We need to fill in the
rest
>   * of the physical mapping once some sort of allocator has been set
>   * up.
> + * NOTE: for PVH, the page tables are native with HAP required.
>   */
>  pgd_t * __init xen_setup_kernel_pagetable(pgd_t *pgd,
>                                          unsigned long max_pfn)
> @@ -1761,10 +1802,12 @@ pgd_t * __init xen_setup_kernel_pagetable(pgd_t
*pgd,
>         /* Zap identity mapping */
>         init_level4_pgt[0] = __pgd(0);
> 
> -       /* Pre-constructed entries are in pfn, so convert to mfn */
> -       convert_pfn_mfn(init_level4_pgt);
> -       convert_pfn_mfn(level3_ident_pgt);
> -       convert_pfn_mfn(level3_kernel_pgt);
> +       if (!xen_pvh_domain()) {
> +               /* Pre-constructed entries are in pfn, so convert to mfn */
> +               convert_pfn_mfn(init_level4_pgt);
> +               convert_pfn_mfn(level3_ident_pgt);
> +               convert_pfn_mfn(level3_kernel_pgt);
> +       }
> 
>         l3 = m2v(pgd[pgd_index(__START_KERNEL_map)].pgd);
>         l2 = m2v(l3[pud_index(__START_KERNEL_map)].pud);
> @@ -1787,12 +1830,14 @@ pgd_t * __init xen_setup_kernel_pagetable(pgd_t
*pgd,
>         set_page_prot(level2_kernel_pgt, PAGE_KERNEL_RO);
>         set_page_prot(level2_fixmap_pgt, PAGE_KERNEL_RO);
> 
> -       /* Pin down new L4 */
> -       pin_pagetable_pfn(MMUEXT_PIN_L4_TABLE,
> -                         PFN_DOWN(__pa_symbol(init_level4_pgt)));
> +       if (!xen_pvh_domain()) {
> +               /* Pin down new L4 */
> +               pin_pagetable_pfn(MMUEXT_PIN_L4_TABLE,
> +                               PFN_DOWN(__pa_symbol(init_level4_pgt)));
> 
> -       /* Unpin Xen-provided one */
> -       pin_pagetable_pfn(MMUEXT_UNPIN_TABLE, PFN_DOWN(__pa(pgd)));
> +               /* Unpin Xen-provided one */
> +               pin_pagetable_pfn(MMUEXT_UNPIN_TABLE, PFN_DOWN(__pa(pgd)));
> +       }
> 
>         /* Switch over */
>         pgd = init_level4_pgt;
> @@ -1802,9 +1847,13 @@ pgd_t * __init xen_setup_kernel_pagetable(pgd_t
*pgd,
>          * structure to attach it to, so make sure we just set kernel
>          * pgd.
>          */
> -       xen_mc_batch();
> -       __xen_write_cr3(true, __pa(pgd));
> -       xen_mc_issue(PARAVIRT_LAZY_CPU);
> +       if (xen_pvh_domain()) {
> +               native_write_cr3(__pa(pgd));
> +       } else {
> +               xen_mc_batch();
> +               __xen_write_cr3(true, __pa(pgd));
> +               xen_mc_issue(PARAVIRT_LAZY_CPU);
> +       }
> 
>         memblock_reserve(__pa(xen_start_info->pt_base),
>                          xen_start_info->nr_pt_frames * PAGE_SIZE);
> @@ -2067,9 +2116,21 @@ static const struct pv_mmu_ops xen_mmu_ops
__initconst = {
> 
>  void __init xen_init_mmu_ops(void)
>  {
> +       x86_init.paging.pagetable_setup_done = xen_pagetable_setup_done;
> +
> +       if (xen_pvh_domain()) {
> +               pv_mmu_ops.flush_tlb_others = xen_flush_tlb_others;
> +
> +               /* set_pte* for PCI devices to map iomem. */
> +               if (xen_initial_domain()) {
> +                       pv_mmu_ops.set_pte = xen_dom0pvh_set_pte;
> +                       pv_mmu_ops.set_pte_at = xen_dom0pvh_set_pte_at;
> +               }
> +               return;
> +       }
Considering that the implementation of xen_dom0pvh_set_pte is
native_set_pte, can''t we just leave it to the default that is
native_set_pte?

>         x86_init.mapping.pagetable_reserve = xen_mapping_pagetable_reserve;
>         x86_init.paging.pagetable_setup_start = xen_pagetable_setup_start;
> -       x86_init.paging.pagetable_setup_done = xen_pagetable_setup_done;
>         pv_mmu_ops = xen_mmu_ops;
> 
>         memset(dummy_mapping, 0xff, PAGE_SIZE);
> @@ -2305,6 +2366,93 @@ void __init xen_hvm_init_mmu_ops(void)
>  }
>  #endif
> 
> +/* Map foreign gmfn, fgmfn, to local pfn, lpfn. This for the user space
> + * creating new guest on PVH dom0 and needs to map domU pages. Called from
> + * exported function, so no need to export this.
> + */
> +static int pvh_add_to_xen_p2m(unsigned long lpfn, unsigned long fgmfn,
> +                             unsigned int domid)
> +{
> +       int rc;
> +       struct xen_add_to_physmap pmb = {.foreign_domid = domid};
> +
> +       pmb.gpfn = lpfn;
> +       pmb.idx = fgmfn;
> +       pmb.space = XENMAPSPACE_gmfn_foreign;
> +       rc = HYPERVISOR_memory_op(XENMEM_add_to_physmap, &pmb);
> +       if (rc) {
> +               pr_warn("Failed to map pfn to mfn rc:%d pfn:%lx
mfn:%lx\n",
> +                       rc, lpfn, fgmfn);
> +               return 1;
> +       }
> +       return 0;
> +}
> +
> +/* Unmap an entry from xen p2m table */
> +int pvh_rem_xen_p2m(unsigned long spfn, int count)
> +{
> +       struct xen_remove_from_physmap xrp;
> +       int i, rc;
> +
> +       for (i=0; i < count; i++) {
> +               xrp.domid = DOMID_SELF;
> +               xrp.gpfn = spfn+i;
> +               rc = HYPERVISOR_memory_op(XENMEM_remove_from_physmap,
&xrp);
> +               if (rc) {
> +                       pr_warn("Failed to unmap pfn:%lx rc:%d
done:%d\n",
> +                               spfn+i, rc, i);
> +                       return 1;
> +               }
> +       }
> +       return 0;
> +}
> +EXPORT_SYMBOL_GPL(pvh_rem_xen_p2m);
> +
> +struct pvh_remap_data {
> +       unsigned long fgmfn;            /* foreign domain''s gmfn
*/
> +       pgprot_t prot;
> +       domid_t  domid;
> +       struct vm_area_struct *vma;
> +};
> +
> +static int pvh_map_pte_fn(pte_t *ptep, pgtable_t token, unsigned long
addr,
> +                       void *data)
> +{
> +       struct pvh_remap_data *pvhp = data;
> +       struct xen_pvh_sav_pfn_info *savp =
pvhp->vma->vm_private_data;
> +       unsigned long pfn =
page_to_pfn(savp->sp_paga[savp->sp_next_todo++]);
> +       pte_t pteval = pte_mkspecial(pfn_pte(pfn, pvhp->prot));
> +
> +       native_set_pte(ptep, pteval);
> +       if (pvh_add_to_xen_p2m(pfn, pvhp->fgmfn, pvhp->domid))
> +               return -EFAULT;
> +
> +       return 0;
> +}
> +
> +/* The only caller at moment passes one gmfn at a time.
> + * PVH TBD/FIXME: expand this in future to honor batch requests.
> + */
> +static int pvh_remap_gmfn_range(struct vm_area_struct *vma,
> +                               unsigned long addr, unsigned long mfn, int
nr,
> +                               pgprot_t prot, unsigned domid)
> +{
> +       int err;
> +       struct pvh_remap_data pvhdata;
> +
> +       if (nr > 1)
> +               return -EINVAL;
> +
> +       pvhdata.fgmfn = mfn;
> +       pvhdata.prot = prot;
> +       pvhdata.domid = domid;
> +       pvhdata.vma = vma;
> +       err = apply_to_page_range(vma->vm_mm, addr, nr <<
PAGE_SHIFT,
> +                                 pvh_map_pte_fn, &pvhdata);
> +       flush_tlb_all();
> +       return err;
> +}
> +
>  #define REMAP_BATCH_SIZE 16
> 
>  struct remap_data {
> @@ -2342,6 +2490,11 @@ int xen_remap_domain_mfn_range(struct vm_area_struct
*vma,
>         BUG_ON(!((vma->vm_flags & (VM_PFNMAP | VM_RESERVED | VM_IO))
=>                                 (VM_PFNMAP | VM_RESERVED | VM_IO)));
> 
> +       if (xen_pvh_domain()) {
> +               /* We need to update the local page tables and the xen HAP
*/
> +               return pvh_remap_gmfn_range(vma, addr, mfn, nr, prot,
domid);
> +       }
> +
>         rmd.mfn = mfn;
>         rmd.prot = prot;
> 
> diff --git a/arch/x86/xen/mmu.h b/arch/x86/xen/mmu.h
> index 73809bb..6d0bb56 100644
> --- a/arch/x86/xen/mmu.h
> +++ b/arch/x86/xen/mmu.h
> @@ -23,4 +23,6 @@ unsigned long xen_read_cr2_direct(void);
> 
>  extern void xen_init_mmu_ops(void);
>  extern void xen_hvm_init_mmu_ops(void);
> +extern void xen_set_clr_mmio_pvh_pte(unsigned long pfn, unsigned long mfn,
> +                                    int nr_mfns, int add_mapping);
>  #endif /* _XEN_MMU_H */
> diff --git a/include/xen/interface/memory.h
b/include/xen/interface/memory.h
> index eac3ce1..1b213b1 100644
> --- a/include/xen/interface/memory.h
> +++ b/include/xen/interface/memory.h
> @@ -163,10 +163,19 @@ struct xen_add_to_physmap {
>      /* Which domain to change the mapping for. */
>      domid_t domid;
> 
> +    /* Number of pages to go through for gmfn_range */
> +    uint16_t    size;
> +
>      /* Source mapping space. */
>  #define XENMAPSPACE_shared_info 0 /* shared info page */
>  #define XENMAPSPACE_grant_table 1 /* grant table page */
> -    unsigned int space;
> +#define XENMAPSPACE_gmfn        2 /* GMFN */
> +#define XENMAPSPACE_gmfn_range  3 /* GMFN range */
> +#define XENMAPSPACE_gmfn_foreign 4 /* GMFN from another guest */
> +    uint16_t space;
> +    domid_t foreign_domid;         /* IFF XENMAPSPACE_gmfn_foreign */
> +
> +#define XENMAPIDX_grant_table_status 0x80000000
As you have seen, I have a very similar patch in my series. 

>      /* Index into source mapping space. */
>      unsigned long idx;
> @@ -234,4 +243,20 @@ DEFINE_GUEST_HANDLE_STRUCT(xen_memory_map);
>   * during a driver critical region.
>   */
>  extern spinlock_t xen_reservation_lock;
> +
> +/*
> + * Unmaps the page appearing at a particular GPFN from the specified
guest''s
> + * pseudophysical address space.
> + * arg == addr of xen_remove_from_physmap_t.
> + */
> +#define XENMEM_remove_from_physmap      15
> +struct xen_remove_from_physmap {
> +    /* Which domain to change the mapping for. */
> +    domid_t domid;
> +
> +    /* GPFN of the current mapping of the page. */
> +    unsigned long     gpfn;
> +};
> +DEFINE_GUEST_HANDLE_STRUCT(xen_remove_from_physmap);
> +
>  #endif /* __XEN_PUBLIC_MEMORY_H__ */
> diff --git a/include/xen/interface/physdev.h
b/include/xen/interface/physdev.h
> index 9ce788d..80f792e 100644
> --- a/include/xen/interface/physdev.h
> +++ b/include/xen/interface/physdev.h
> @@ -258,6 +258,16 @@ struct physdev_pci_device {
>      uint8_t devfn;
>  };
> 
> +#define PHYSDEVOP_pvh_map_iomem        29
> +struct physdev_map_iomem {
> +    /* IN */
> +    uint64_t first_gfn;
> +    uint64_t first_mfn;
> +    uint32_t nr_mfns;
> +    uint32_t add_mapping;        /* 1 == add mapping;  0 == unmap */
> +
> +};
> +
>  /*
>   * Notify that some PIRQ-bound event channels have been unmasked.
>   * ** This command is obsolete since interface version 0x00030202 and is
**
> diff --git a/include/xen/xen-ops.h b/include/xen/xen-ops.h
> index 6a198e4..fa595e1 100644
> --- a/include/xen/xen-ops.h
> +++ b/include/xen/xen-ops.h
> @@ -29,4 +29,11 @@ int xen_remap_domain_mfn_range(struct vm_area_struct
*vma,
>                                unsigned long mfn, int nr,
>                                pgprot_t prot, unsigned domid);
> 
> +struct xen_pvh_sav_pfn_info {
> +       struct page **sp_paga;  /* save pfn (info) page array */
> +       int sp_num_pgs;
> +       int sp_next_todo;
> +};
> +extern int pvh_rem_xen_p2m(unsigned long spfn, int count);
> +
>  #endif /* INCLUDE_XEN_OPS_H */
> --
> 1.7.2.3
> 
> 
> _______________________________________________
> Xen-devel mailing list
> Xen-devel@lists.xen.org
> http://lists.xen.org/xen-devel
>

Ian Campbell

2012-Aug-16 14:15 UTC

head link

Re: [RFC PATCH 3/8]: PVH: memory manager and paging related changes

On Thu, 2012-08-16 at 15:10 +0100, Stefano Stabellini wrote:
On Thu, 16 Aug 2012, Mukesh Rathor wrote:> [....]
Please can you trim your quotes, getting a repeat of hundreds of lines
of quote for just a couple of lines of new content makes it quite hard
to spot the actual content.

Ian.

Ian Campbell

2012-Aug-17 09:26 UTC

head link

Re: [RFC PATCH 3/8]: PVH: memory manager and paging related changes

On Thu, 2012-08-16 at 02:02 +0100, Mukesh Rathor wrote:> 
> ---
>  arch/x86/xen/mmu.c              |  179
++++++++++++++++++++++++++++++++++++---
>  arch/x86/xen/mmu.h              |    2 +
>  include/xen/interface/memory.h  |   27 ++++++-
>  include/xen/interface/physdev.h |   10 ++
>  include/xen/xen-ops.h           |    7 ++
>  5 files changed, 211 insertions(+), 14 deletions(-)
> 
> diff --git a/arch/x86/xen/mmu.c b/arch/x86/xen/mmu.c
> index b65a761..44a6477 100644
> --- a/arch/x86/xen/mmu.c
> +++ b/arch/x86/xen/mmu.c
> @@ -330,6 +330,38 @@ static void xen_set_pte(pte_t *ptep, pte_t pteval)
>         __xen_set_pte(ptep, pteval);
>  }
> 
> +/* This for PV guest in hvm container */
Shall I stop mentioning uses of "HVM" in the context of PVH? ;-)
> +void xen_set_clr_mmio_pvh_pte(unsigned long pfn, unsigned long mfn,
> +                             int nr_mfns, int add_mapping)
> +{
> +       int rc;
> +       struct physdev_map_iomem iomem;
> +
> +       iomem.first_gfn = pfn;
> +       iomem.first_mfn = mfn;
> +       iomem.nr_mfns = nr_mfns;
> +       iomem.add_mapping = add_mapping;
> +
> +       rc = HYPERVISOR_physdev_op(PHYSDEVOP_pvh_map_iomem, &iomem);
> +       BUG_ON(rc);
It''s purely a matter of taste but we would tend to write
	if (HYPERVISOR_foO(...)
		BUG();
> +}
> +
> +/* This for PV guest in hvm container.
> + * We need this because during boot early_ioremap path eventually calls
> + * set_pte that maps io space. Also, ACPI pages are not mapped into to the
> + * EPT during dom0 creation. The pages are mapped initially here from
> + * kernel_physical_mapping_init() then later the memtype is changed.  */
> +static void xen_dom0pvh_set_pte(pte_t *ptep, pte_t pteval)
> +{
> +       native_set_pte(ptep, pteval);
> +}
> +
> +static void xen_dom0pvh_set_pte_at(struct mm_struct *mm, unsigned long
addr,
> +                                  pte_t *ptep, pte_t pteval)
> +{
> +       native_set_pte(ptep, pteval);
> +}
I think Stefano alreeady asked why not just use native_* as the hook.
> +
>  static void xen_set_pte_at(struct mm_struct *mm, unsigned long addr,
>                     pte_t *ptep, pte_t pteval)
>  {
> @@ -1197,6 +1229,10 @@ static void xen_post_allocator_init(void);
>  static void __init xen_pagetable_setup_done(pgd_t *base)
>  {
>         xen_setup_shared_info();
> +
> +       if (xen_pvh_domain())
> +               return;
> +
>         xen_post_allocator_init();
>  }
> 
> @@ -1652,6 +1688,10 @@ static void set_page_prot(void *addr, pgprot_t prot)
>         unsigned long pfn = __pa(addr) >> PAGE_SHIFT;
>         pte_t pte = pfn_pte(pfn, prot);
> 
> +       /* for PVH, page tables are native. */
> +       if (xen_pvh_domain())
> +               return;
> +
>         if (HYPERVISOR_update_va_mapping((unsigned long)addr, pte, 0))
>                 BUG();
>  }
> @@ -1745,6 +1785,7 @@ static void convert_pfn_mfn(void *v)
>   * but that''s enough to get __va working.  We need to fill in the
rest
>   * of the physical mapping once some sort of allocator has been set
>   * up.
> + * NOTE: for PVH, the page tables are native with HAP required.
OOI does this mean shadow doesn''t work?
> @@ -1761,10 +1802,12 @@ pgd_t * __init xen_setup_kernel_pagetable(pgd_t
*pgd,
>         /* Zap identity mapping */
>         init_level4_pgt[0] = __pgd(0);
> 
> -       /* Pre-constructed entries are in pfn, so convert to mfn */
> -       convert_pfn_mfn(init_level4_pgt);
> -       convert_pfn_mfn(level3_ident_pgt);
> -       convert_pfn_mfn(level3_kernel_pgt);
> +       if (!xen_pvh_domain()) {
Does this test really mean if(XENFEAT_auto_translated_physmap)?

And maybe it fits mnore logically in convert_pfn_mfn itself?
> +               /* Pre-constructed entries are in pfn, so convert to mfn */
> +               convert_pfn_mfn(init_level4_pgt);
> +               convert_pfn_mfn(level3_ident_pgt);
> +               convert_pfn_mfn(level3_kernel_pgt);
> +       }
> 
>         l3 = m2v(pgd[pgd_index(__START_KERNEL_map)].pgd);
>         l2 = m2v(l3[pud_index(__START_KERNEL_map)].pud);
> @@ -1787,12 +1830,14 @@ pgd_t * __init xen_setup_kernel_pagetable(pgd_t
*pgd,
>         set_page_prot(level2_kernel_pgt, PAGE_KERNEL_RO);
>         set_page_prot(level2_fixmap_pgt, PAGE_KERNEL_RO);
> 
> -       /* Pin down new L4 */
> -       pin_pagetable_pfn(MMUEXT_PIN_L4_TABLE,
> -                         PFN_DOWN(__pa_symbol(init_level4_pgt)));
> +       if (!xen_pvh_domain()) {
XENFEAT_writeable_page_tables?

And inside pin_pagetable_pfn perhaps?

> +               /* Pin down new L4 */
> +               pin_pagetable_pfn(MMUEXT_PIN_L4_TABLE,
> +                               PFN_DOWN(__pa_symbol(init_level4_pgt)));
> 
> -       /* Unpin Xen-provided one */
> -       pin_pagetable_pfn(MMUEXT_UNPIN_TABLE, PFN_DOWN(__pa(pgd)));
> +               /* Unpin Xen-provided one */
> +               pin_pagetable_pfn(MMUEXT_UNPIN_TABLE, PFN_DOWN(__pa(pgd)));
> +       }
> 
>         /* Switch over */
>         pgd = init_level4_pgt;
> @@ -1802,9 +1847,13 @@ pgd_t * __init xen_setup_kernel_pagetable(pgd_t
*pgd,
>          * structure to attach it to, so make sure we just set kernel
>          * pgd.
>          */
> -       xen_mc_batch();
> -       __xen_write_cr3(true, __pa(pgd));
> -       xen_mc_issue(PARAVIRT_LAZY_CPU);
Not your issue but I wonder why we do this weird looking singleton
batch?
> +       if (xen_pvh_domain()) {
> +               native_write_cr3(__pa(pgd));
> +       } else {
> +               xen_mc_batch();
> +               __xen_write_cr3(true, __pa(pgd));
> +               xen_mc_issue(PARAVIRT_LAZY_CPU);
> +       }
> 
>         memblock_reserve(__pa(xen_start_info->pt_base),
>                          xen_start_info->nr_pt_frames * PAGE_SIZE);
> @@ -2067,9 +2116,21 @@ static const struct pv_mmu_ops xen_mmu_ops
__initconst = {
> 
>  void __init xen_init_mmu_ops(void)
>  {
> +       x86_init.paging.pagetable_setup_done = xen_pagetable_setup_done;
I''d be tempted to leave this below, where it fits in alongside the
others and duplicate it in the if as necessary.

Given that the two cases are basically non-overlapping perhaps you want
xen_init_(pv,pvh)_mmu_ops as separate hooks?

Looking back the same might apply to the pagetable_setup_done hook?
> +
> +       if (xen_pvh_domain()) {
> +               pv_mmu_ops.flush_tlb_others = xen_flush_tlb_others;
> +
> +               /* set_pte* for PCI devices to map iomem. */
> +               if (xen_initial_domain()) {
> +                       pv_mmu_ops.set_pte = xen_dom0pvh_set_pte;
> +                       pv_mmu_ops.set_pte_at = xen_dom0pvh_set_pte_at;
Is this wrong for domU or have you just not tried/implemented
passthrough support yet?
> +               }
> +               return;
> +       }
> +
>         x86_init.mapping.pagetable_reserve = xen_mapping_pagetable_reserve;
>         x86_init.paging.pagetable_setup_start = xen_pagetable_setup_start;
> -       x86_init.paging.pagetable_setup_done = xen_pagetable_setup_done;
>         pv_mmu_ops = xen_mmu_ops;
> 
>         memset(dummy_mapping, 0xff, PAGE_SIZE);
> @@ -2305,6 +2366,93 @@ void __init xen_hvm_init_mmu_ops(void)
>  }
>  #endif
> 
> +/* Map foreign gmfn, fgmfn, to local pfn, lpfn. This for the user space
> + * creating new guest on PVH dom0 and needs to map domU pages. Called from
> + * exported function, so no need to export this.
> + */
> +static int pvh_add_to_xen_p2m(unsigned long lpfn, unsigned long fgmfn,
> +                             unsigned int domid)
> +{
> +       int rc;
> +       struct xen_add_to_physmap pmb = {.foreign_domid = domid};
I''m not sure but I think CodingStyle would want spaces inside the {}s.

What is the b in pmb? Phys Map B??? (every other user of this interface
says xatp, FWIW)
> +
> +       pmb.gpfn = lpfn;
> +       pmb.idx = fgmfn;
> +       pmb.space = XENMAPSPACE_gmfn_foreign;
> +       rc = HYPERVISOR_memory_op(XENMEM_add_to_physmap, &pmb);
> +       if (rc) {
> +               pr_warn("Failed to map pfn to mfn rc:%d pfn:%lx
mfn:%lx\n",
> +                       rc, lpfn, fgmfn);
> +               return 1;
> +       }
> +       return 0;
> +}
> +
> +/* Unmap an entry from xen p2m table */
Actually it is @count entries, starting a spfn. That seems obvious
enough from the prototype not to be worth saying though.
> +int pvh_rem_xen_p2m(unsigned long spfn, int count)
> +{
> +       struct xen_remove_from_physmap xrp;
> +       int i, rc;
> +
> +       for (i=0; i < count; i++) {
> +               xrp.domid = DOMID_SELF;
> +               xrp.gpfn = spfn+i;
> +               rc = HYPERVISOR_memory_op(XENMEM_remove_from_physmap,
&xrp);
> +               if (rc) {
> +                       pr_warn("Failed to unmap pfn:%lx rc:%d
done:%d\n",
> +                               spfn+i, rc, i);
> +                       return 1;
> +               }
> +       }
> +       return 0;
> +}
> +EXPORT_SYMBOL_GPL(pvh_rem_xen_p2m);
What is the external/modular user of this?

I guess this is why you noted that pvh_add_to_xen_p2m didn''t need
exporting, which struck me as unnecessary at the time.

pvh_add_to_xen_p2m seems to have an exported a wrapper, why not rem too?
e.g. xen_unmap_domain_mfn_range?
> +
> +struct pvh_remap_data {
> +       unsigned long fgmfn;            /* foreign domain''s gmfn
*/
> +       pgprot_t prot;
> +       domid_t  domid;
> +       struct vm_area_struct *vma;
> +};
> +
> +static int pvh_map_pte_fn(pte_t *ptep, pgtable_t token, unsigned long
addr,
> +                       void *data)
> +{
> +       struct pvh_remap_data *pvhp = data;
> +       struct xen_pvh_sav_pfn_info *savp =
pvhp->vma->vm_private_data;
> +       unsigned long pfn =
page_to_pfn(savp->sp_paga[savp->sp_next_todo++]);
> +       pte_t pteval = pte_mkspecial(pfn_pte(pfn, pvhp->prot));
> +
> +       native_set_pte(ptep, pteval);
> +       if (pvh_add_to_xen_p2m(pfn, pvhp->fgmfn, pvhp->domid))
> +               return -EFAULT;
Is there a little window here where we''ve setup the page table entry
but
the p2m entry behind it is uninitialised?

I suppose even if an interrupt occurred we can rely on the virtual
address not having been "exposed" anywhere yet and therefore there is
no
chance of anyone dereferencing it. But is there any harm in just
flipping the ordering here?

Why EFAULT? Seems a bit random, I think HYPERVISOR_memory_op returns a
-ve errno which you could propagate.
> diff --git a/include/xen/interface/memory.h
b/include/xen/interface/memory.h
> index eac3ce1..1b213b1 100644
> --- a/include/xen/interface/memory.h
> +++ b/include/xen/interface/memory.h
> @@ -163,10 +163,19 @@ struct xen_add_to_physmap {
>      /* Which domain to change the mapping for. */
>      domid_t domid;
> 
> +    /* Number of pages to go through for gmfn_range */
> +    uint16_t    size;
> +
>      /* Source mapping space. */
>  #define XENMAPSPACE_shared_info 0 /* shared info page */
>  #define XENMAPSPACE_grant_table 1 /* grant table page */
> -    unsigned int space;
> +#define XENMAPSPACE_gmfn        2 /* GMFN */
> +#define XENMAPSPACE_gmfn_range  3 /* GMFN range */
> +#define XENMAPSPACE_gmfn_foreign 4 /* GMFN from another guest */
> +    uint16_t space;
> +    domid_t foreign_domid;         /* IFF XENMAPSPACE_gmfn_foreign */
> +
> +#define XENMAPIDX_grant_table_status 0x80000000
Even if we don''t get the full PVH implementation in the hypervisor
right
away I think we should at least update the canonical interface
definition in the Xen tree (xen/include/public) first. (same goes for
the other interface definitions here).

Ian.

Mukesh Rathor

2012-Aug-17 23:40 UTC

head link

Re: [RFC PATCH 3/8]: PVH: memory manager and paging related changes

On Thu, 16 Aug 2012 15:10:03 +0100
Stefano Stabellini <stefano.stabellini@eu.citrix.com> wrote:
> >  void __init xen_init_mmu_ops(void)
> >  {
> > +       x86_init.paging.pagetable_setup_done > >
xen_pagetable_setup_done; +
> > +       if (xen_pvh_domain()) {
> > +               pv_mmu_ops.flush_tlb_others = xen_flush_tlb_others;
> > +
> > +               /* set_pte* for PCI devices to map iomem. */
> > +               if (xen_initial_domain()) {
> > +                       pv_mmu_ops.set_pte = xen_dom0pvh_set_pte;
> > +                       pv_mmu_ops.set_pte_at > >
xen_dom0pvh_set_pte_at;
> > +               }
> > +               return;
> > +       }
> 
> Considering that the implementation of xen_dom0pvh_set_pte is
> native_set_pte, can''t we just leave it to the default that is
> native_set_pte?
We can. I had more code in the function that I removed when I mapped
the entire IO space up front. Then, I had lot of debug code there. 
I can change it to native now.

Mukesh Rathor

2012-Aug-17 23:49 UTC

head link

Re: [RFC PATCH 3/8]: PVH: memory manager and paging related changes

On Fri, 17 Aug 2012 10:26:15 +0100
Ian Campbell <Ian.Campbell@citrix.com> wrote:
> On Thu, 2012-08-16 at 02:02 +0100, Mukesh Rathor wrote:
> > +
> >         if (HYPERVISOR_update_va_mapping((unsigned long)addr, pte,
> > 0)) BUG();
> >  }
> > @@ -1745,6 +1785,7 @@ static void convert_pfn_mfn(void *v)
> >   * but that''s enough to get __va working.  We need to fill
in the
> > rest
> >   * of the physical mapping once some sort of allocator has been set
> >   * up.
> > + * NOTE: for PVH, the page tables are native with HAP required.
> 
> OOI does this mean shadow doesn''t work?
Most likely now. Will need to explore that. Later.

> > +
> > +       if (xen_pvh_domain()) {
> > +               pv_mmu_ops.flush_tlb_others = xen_flush_tlb_others;
> > +
> > +               /* set_pte* for PCI devices to map iomem. */
> > +               if (xen_initial_domain()) {
> > +                       pv_mmu_ops.set_pte = xen_dom0pvh_set_pte;
> > +                       pv_mmu_ops.set_pte_at > >
xen_dom0pvh_set_pte_at;
> 
> Is this wrong for domU or have you just not tried/implemented
> passthrough support yet?
No, passthrough is phase II/III. Too big a project for one person to do in
a single shot as it is ;)..

> > + * exported function, so no need to export this.
> > + */
> > +static int pvh_add_to_xen_p2m(unsigned long lpfn, unsigned long
> > fgmfn,
> > +                             unsigned int domid)
> > +{
> > +       int rc;
> > +       struct xen_add_to_physmap pmb = {.foreign_domid = domid};
> 
> I''m not sure but I think CodingStyle would want spaces inside the
{}s.
> 
> What is the b in pmb? Phys Map B??? (every other user of this
> interface says xatp, FWIW)
because originally it was a batch function but got changed after the
code review earlier. I will rename it.

> > HYPERVISOR_memory_op(XENMEM_remove_from_physmap, &xrp);
> > +               if (rc) {
> > +                       pr_warn("Failed to unmap pfn:%lx rc:%d
> > done:%d\n",
> > +                               spfn+i, rc, i);
> > +                       return 1;
> > +               }
> > +       }
> > +       return 0;
> > +}
> > +EXPORT_SYMBOL_GPL(pvh_rem_xen_p2m);
> 
> What is the external/modular user of this?
privcmd.
> I guess this is why you noted that pvh_add_to_xen_p2m didn''t need
> exporting, which struck me as unnecessary at the time.
> 
> pvh_add_to_xen_p2m seems to have an exported a wrapper, why not rem
> too? e.g. xen_unmap_domain_mfn_range?
I believe unmapping happens thru native code. So, we just need to remove 
the entries from the ept. Ok, I mean, hap. 
> > +
> > +       native_set_pte(ptep, pteval);
> > +       if (pvh_add_to_xen_p2m(pfn, pvhp->fgmfn, pvhp->domid))
> > +               return -EFAULT;
> 
> Is there a little window here where we''ve setup the page table
entry
> but the p2m entry behind it is uninitialised?
> 
> I suppose even if an interrupt occurred we can rely on the virtual
> address not having been "exposed" anywhere yet and therefore
there is
> no chance of anyone dereferencing it. But is there any harm in just
> flipping the ordering here?
Should be ok flippiong the order.
> Why EFAULT? Seems a bit random, I think HYPERVISOR_memory_op returns a
> -ve errno which you could propagate.
Ok, sounds good.

Ian Campbell

2012-Sep-14 16:20 UTC

head link

Re: [RFC PATCH 3/8]: PVH: memory manager and paging related changes

On Thu, 2012-08-16 at 02:02 +0100, Mukesh Rathor wrote:> 
> +/* Map foreign gmfn, fgmfn, to local pfn, lpfn. This for the user
> space
> + * creating new guest on PVH dom0 and needs to map domU pages. Called
> from
> + * exported function, so no need to export this.
> + */
> +static int pvh_add_to_xen_p2m(unsigned long lpfn, unsigned long
> fgmfn,
> +                             unsigned int domid)
> +{
> +       int rc;
> +       struct xen_add_to_physmap pmb = {.foreign_domid = domid};
> +
> +       pmb.gpfn = lpfn;
> +       pmb.idx = fgmfn;
> +       pmb.space = XENMAPSPACE_gmfn_foreign;
You''ve not initialised pmb.domid here. I think you want/need to set it
to DOMID_SELF and have the h/v side behave accordingly.
> +       rc = HYPERVISOR_memory_op(XENMEM_add_to_physmap, &pmb);

Xen devel - Aug 2012 - [RFC PATCH 3/8]: PVH: memory manager and paging related changes

[RFC PATCH 3/8]: PVH: memory manager and paging related changes

Re: [RFC PATCH 3/8]: PVH: memory manager and paging related changes

Re: [RFC PATCH 3/8]: PVH: memory manager and paging related changes

Re: [RFC PATCH 3/8]: PVH: memory manager and paging related changes

Re: [RFC PATCH 3/8]: PVH: memory manager and paging related changes

Re: [RFC PATCH 3/8]: PVH: memory manager and paging related changes

Re: [RFC PATCH 3/8]: PVH: memory manager and paging related changes