--- drivers/xen/privcmd.c | 77 ++++++++++++++++++++++++++++++++++++++++++++---- 1 files changed, 70 insertions(+), 7 deletions(-) diff --git a/drivers/xen/privcmd.c b/drivers/xen/privcmd.c index ccee0f1..195d89f 100644 --- a/drivers/xen/privcmd.c +++ b/drivers/xen/privcmd.c @@ -33,6 +33,7 @@ #include <xen/features.h> #include <xen/page.h> #include <xen/xen-ops.h> +#include <xen/balloon.h> #include "privcmd.h" @@ -178,7 +179,7 @@ static int mmap_mfn_range(void *data, void *state) msg->va & PAGE_MASK, msg->mfn, msg->npages, vma->vm_page_prot, - st->domain); + st->domain, NULL); if (rc < 0) return rc; @@ -199,6 +200,10 @@ static long privcmd_ioctl_mmap(void __user *udata) if (!xen_initial_domain()) return -EPERM; + /* PVH: TBD/FIXME. For now we only support privcmd_ioctl_mmap_batch */ + if (xen_pv_domain() && xen_feature(XENFEAT_auto_translated_physmap)) + return -ENOSYS; + if (copy_from_user(&mmapcmd, udata, sizeof(mmapcmd))) return -EFAULT; @@ -251,13 +256,18 @@ struct mmap_batch_state { xen_pfn_t __user *user; }; +/* PVH dom0 fyi: if domU being created is PV, then mfn is mfn(addr on bus). If + * it''s PVH then mfn is pfn (input to HAP). */ static int mmap_batch_fn(void *data, void *state) { xen_pfn_t *mfnp = data; struct mmap_batch_state *st = state; + struct vm_area_struct *vma = st->vma; + struct xen_pvh_pfn_info *pvhp = vma ? vma->vm_private_data : NULL; - if (xen_remap_domain_mfn_range(st->vma, st->va & PAGE_MASK, *mfnp, 1, - st->vma->vm_page_prot, st->domain) < 0) { + if (xen_remap_domain_mfn_range(vma, st->va & PAGE_MASK, *mfnp, 1, + vma->vm_page_prot, st->domain, + pvhp) < 0) { *mfnp |= 0xf0000000U; st->err++; } @@ -274,6 +284,40 @@ static int mmap_return_errors(void *data, void *state) return put_user(*mfnp, st->user++); } +/* Allocate pfns that are then mapped with gmfns from foreign domid. Update + * the vma with the page info to use later. + * Returns: 0 if success, otherwise -errno + */ +static int pvh_privcmd_resv_pfns(struct vm_area_struct *vma, int numpgs) +{ + int rc; + struct xen_pvh_pfn_info *pvhp; + + pvhp = kzalloc(sizeof(struct xen_pvh_pfn_info), GFP_KERNEL); + if (pvhp == NULL) + return -ENOMEM; + + pvhp->pi_paga = kcalloc(numpgs, sizeof(pvhp->pi_paga[0]), GFP_KERNEL); + if (pvhp->pi_paga == NULL) { + kfree(pvhp); + return -ENOMEM; + } + + rc = alloc_xenballooned_pages(numpgs, pvhp->pi_paga, 0); + if (rc != 0) { + pr_warn("%s Could not alloc %d pfns rc:%d\n", __FUNCTION__, + numpgs, rc); + kfree(pvhp->pi_paga); + kfree(pvhp); + return -ENOMEM; + } + pvhp->pi_num_pgs = numpgs; + BUG_ON(vma->vm_private_data != (void *)1); + vma->vm_private_data = pvhp; + + return 0; +} + static struct vm_operations_struct privcmd_vm_ops; static long privcmd_ioctl_mmap_batch(void __user *udata) @@ -315,6 +359,12 @@ static long privcmd_ioctl_mmap_batch(void __user *udata) goto out; } + if (xen_pv_domain() && xen_feature(XENFEAT_auto_translated_physmap)) { + if ((ret=pvh_privcmd_resv_pfns(vma, m.num))) { + up_write(&mm->mmap_sem); + goto out; + } + } state.domain = m.dom; state.vma = vma; state.va = m.addr; @@ -365,6 +415,22 @@ static long privcmd_ioctl(struct file *file, return ret; } +static void privcmd_close(struct vm_area_struct *vma) +{ + int count; + struct xen_pvh_pfn_info *pvhp = vma ? vma->vm_private_data : NULL; + + if (!xen_pv_domain() || !pvhp || + !xen_feature(XENFEAT_auto_translated_physmap)) + return; + + count = xen_unmap_domain_mfn_range(vma, pvhp); + while (count--) + free_xenballooned_pages(1, &pvhp->pi_paga[count]); + kfree(pvhp->pi_paga); + kfree(pvhp); +} + static int privcmd_fault(struct vm_area_struct *vma, struct vm_fault *vmf) { printk(KERN_DEBUG "privcmd_fault: vma=%p %lx-%lx, pgoff=%lx, uv=%p\n", @@ -375,15 +441,12 @@ static int privcmd_fault(struct vm_area_struct *vma, struct vm_fault *vmf) } static struct vm_operations_struct privcmd_vm_ops = { + .close = privcmd_close, .fault = privcmd_fault }; static int privcmd_mmap(struct file *file, struct vm_area_struct *vma) { - /* Unsupported for auto-translate guests. */ - if (xen_feature(XENFEAT_auto_translated_physmap)) - return -ENOSYS; - /* DONTCOPY is essential for Xen because copy_page_range doesn''t know * how to recreate these mappings */ vma->vm_flags |= VM_RESERVED | VM_IO | VM_DONTCOPY | VM_PFNMAP; -- 1.7.2.3
On Fri, 21 Sep 2012, Mukesh Rathor wrote:> --- > drivers/xen/privcmd.c | 77 ++++++++++++++++++++++++++++++++++++++++++++---- > 1 files changed, 70 insertions(+), 7 deletions(-) > > diff --git a/drivers/xen/privcmd.c b/drivers/xen/privcmd.c > index ccee0f1..195d89f 100644 > --- a/drivers/xen/privcmd.c > +++ b/drivers/xen/privcmd.c > @@ -33,6 +33,7 @@ > #include <xen/features.h> > #include <xen/page.h> > #include <xen/xen-ops.h> > +#include <xen/balloon.h> > > #include "privcmd.h" > > @@ -178,7 +179,7 @@ static int mmap_mfn_range(void *data, void *state) > msg->va & PAGE_MASK, > msg->mfn, msg->npages, > vma->vm_page_prot, > - st->domain); > + st->domain, NULL); > if (rc < 0) > return rc; > > @@ -199,6 +200,10 @@ static long privcmd_ioctl_mmap(void __user *udata) > if (!xen_initial_domain()) > return -EPERM; > > + /* PVH: TBD/FIXME. For now we only support privcmd_ioctl_mmap_batch */ > + if (xen_pv_domain() && xen_feature(XENFEAT_auto_translated_physmap)) > + return -ENOSYS; > + > if (copy_from_user(&mmapcmd, udata, sizeof(mmapcmd))) > return -EFAULT; > > @@ -251,13 +256,18 @@ struct mmap_batch_state { > xen_pfn_t __user *user; > }; > > +/* PVH dom0 fyi: if domU being created is PV, then mfn is mfn(addr on bus). If > + * it''s PVH then mfn is pfn (input to HAP). */ > static int mmap_batch_fn(void *data, void *state) > { > xen_pfn_t *mfnp = data; > struct mmap_batch_state *st = state; > + struct vm_area_struct *vma = st->vma; > + struct xen_pvh_pfn_info *pvhp = vma ? vma->vm_private_data : NULL; > > - if (xen_remap_domain_mfn_range(st->vma, st->va & PAGE_MASK, *mfnp, 1, > - st->vma->vm_page_prot, st->domain) < 0) { > + if (xen_remap_domain_mfn_range(vma, st->va & PAGE_MASK, *mfnp, 1, > + vma->vm_page_prot, st->domain, > + pvhp) < 0) { > *mfnp |= 0xf0000000U; > st->err++; > }I don''t like that a parameter like "xen_pvh_pfn_info" has been added to a generic arch-agnostic function like xen_remap_domain_mfn_range. If you need to pass more parameters to xen_remap_domain_mfn_range, it should be done in a cross-architecture way. In fact, keep in mind that privcmd.c compiles on ARM (and IA64?) as well. I think that in this particular case you are using pvh to actually specify auto_translate_physmap, am I correct? Maybe we just need to rename xen_pvh_pfn_info to xen_xlat_pfn_info.> @@ -274,6 +284,40 @@ static int mmap_return_errors(void *data, void *state) > return put_user(*mfnp, st->user++); > } > > +/* Allocate pfns that are then mapped with gmfns from foreign domid. Update > + * the vma with the page info to use later. > + * Returns: 0 if success, otherwise -errno > + */ > +static int pvh_privcmd_resv_pfns(struct vm_area_struct *vma, int numpgs) > +{ > + int rc; > + struct xen_pvh_pfn_info *pvhp; > + > + pvhp = kzalloc(sizeof(struct xen_pvh_pfn_info), GFP_KERNEL); > + if (pvhp == NULL) > + return -ENOMEM; > + > + pvhp->pi_paga = kcalloc(numpgs, sizeof(pvhp->pi_paga[0]), GFP_KERNEL); > + if (pvhp->pi_paga == NULL) { > + kfree(pvhp); > + return -ENOMEM; > + } > + > + rc = alloc_xenballooned_pages(numpgs, pvhp->pi_paga, 0); > + if (rc != 0) { > + pr_warn("%s Could not alloc %d pfns rc:%d\n", __FUNCTION__, > + numpgs, rc); > + kfree(pvhp->pi_paga); > + kfree(pvhp); > + return -ENOMEM; > + } > + pvhp->pi_num_pgs = numpgs; > + BUG_ON(vma->vm_private_data != (void *)1);what?> + vma->vm_private_data = pvhp; > > + return 0; > +} > + > static struct vm_operations_struct privcmd_vm_ops; > > static long privcmd_ioctl_mmap_batch(void __user *udata) > @@ -315,6 +359,12 @@ static long privcmd_ioctl_mmap_batch(void __user *udata) > goto out; > } > > + if (xen_pv_domain() && xen_feature(XENFEAT_auto_translated_physmap)) { > + if ((ret=pvh_privcmd_resv_pfns(vma, m.num))) {I would change this into: if ((ret = pvh_privcmd_resv_pfns(vma, m.num)) < 0) {> + up_write(&mm->mmap_sem); > + goto out; > + } > + } > state.domain = m.dom; > state.vma = vma; > state.va = m.addr; > @@ -365,6 +415,22 @@ static long privcmd_ioctl(struct file *file, > return ret; > } > > +static void privcmd_close(struct vm_area_struct *vma) > +{ > + int count; > + struct xen_pvh_pfn_info *pvhp = vma ? vma->vm_private_data : NULL; > + > + if (!xen_pv_domain() || !pvhp || > + !xen_feature(XENFEAT_auto_translated_physmap)) > + return; > + > + count = xen_unmap_domain_mfn_range(vma, pvhp); > + while (count--) > + free_xenballooned_pages(1, &pvhp->pi_paga[count]); > + kfree(pvhp->pi_paga); > + kfree(pvhp);So xen_remap_domain_mfn_range adds the mappings to the vma, while xen_unmap_domain_mfn_range does not remove them. I take that somehow this is done by the generic Linux code that calls into this function?
On Mon, 2012-09-24 at 15:24 +0100, Stefano Stabellini wrote:> On Fri, 21 Sep 2012, Mukesh Rathor wrote: > > --- > > drivers/xen/privcmd.c | 77 ++++++++++++++++++++++++++++++++++++++++++++---- > > 1 files changed, 70 insertions(+), 7 deletions(-) > > > > diff --git a/drivers/xen/privcmd.c b/drivers/xen/privcmd.c > > index ccee0f1..195d89f 100644 > > --- a/drivers/xen/privcmd.c > > +++ b/drivers/xen/privcmd.c > > @@ -33,6 +33,7 @@ > > #include <xen/features.h> > > #include <xen/page.h> > > #include <xen/xen-ops.h> > > +#include <xen/balloon.h> > > > > #include "privcmd.h" > > > > @@ -178,7 +179,7 @@ static int mmap_mfn_range(void *data, void *state) > > msg->va & PAGE_MASK, > > msg->mfn, msg->npages, > > vma->vm_page_prot, > > - st->domain); > > + st->domain, NULL); > > if (rc < 0) > > return rc; > > > > @@ -199,6 +200,10 @@ static long privcmd_ioctl_mmap(void __user *udata) > > if (!xen_initial_domain()) > > return -EPERM; > > > > + /* PVH: TBD/FIXME. For now we only support privcmd_ioctl_mmap_batch */ > > + if (xen_pv_domain() && xen_feature(XENFEAT_auto_translated_physmap)) > > + return -ENOSYS; > > + > > if (copy_from_user(&mmapcmd, udata, sizeof(mmapcmd))) > > return -EFAULT; > > > > @@ -251,13 +256,18 @@ struct mmap_batch_state { > > xen_pfn_t __user *user; > > }; > > > > +/* PVH dom0 fyi: if domU being created is PV, then mfn is mfn(addr on bus). If > > + * it''s PVH then mfn is pfn (input to HAP). */ > > static int mmap_batch_fn(void *data, void *state) > > { > > xen_pfn_t *mfnp = data; > > struct mmap_batch_state *st = state; > > + struct vm_area_struct *vma = st->vma; > > + struct xen_pvh_pfn_info *pvhp = vma ? vma->vm_private_data : NULL; > > > > - if (xen_remap_domain_mfn_range(st->vma, st->va & PAGE_MASK, *mfnp, 1, > > - st->vma->vm_page_prot, st->domain) < 0) { > > + if (xen_remap_domain_mfn_range(vma, st->va & PAGE_MASK, *mfnp, 1, > > + vma->vm_page_prot, st->domain, > > + pvhp) < 0) { > > *mfnp |= 0xf0000000U; > > st->err++; > > } > > I don''t like that a parameter like "xen_pvh_pfn_info" has been added to > a generic arch-agnostic function like xen_remap_domain_mfn_range.This might have been my suggestion but actually I was thinking more along the lines of what you suggest :> If you need to pass more parameters to xen_remap_domain_mfn_range, it > should be done in a cross-architecture way. In fact, keep in mind that > privcmd.c compiles on ARM (and IA64?) as well. > > I think that in this particular case you are using pvh to actually > specify auto_translate_physmap, am I correct? > Maybe we just need to rename xen_pvh_pfn_info to xen_xlat_pfn_info.Or perhaps passing pvhp->pages as the new argument.> > @@ -315,6 +359,12 @@ static long privcmd_ioctl_mmap_batch(void __user *udata) > > goto out; > > } > > > > + if (xen_pv_domain() && xen_feature(XENFEAT_auto_translated_physmap)) { > > + if ((ret=pvh_privcmd_resv_pfns(vma, m.num))) { > > I would change this into: > > if ((ret = pvh_privcmd_resv_pfns(vma, m.num)) < 0) {I thought assignment in if statements was frowned upon by CodingStyle, although having looked the only bit which backs that up is "Kernel coding style is super simple. Avoid tricky expressions." which isn''t exactly explicit. Ian.
> pr_warn("%s Could not alloc %d pfns rc:%d\n", __FUNCTION__, > + numpgs, rc);IIRC __FUNCTION__ is a deprecated gcc''ism replaced by C99''s __func__. checkpatch.pl warns about this. Ian.
On Mon, 24 Sep 2012 15:24:16 +0100 Stefano Stabellini <stefano.stabellini@eu.citrix.com> wrote:> On Fri, 21 Sep 2012, Mukesh Rathor wrote: > > --- > > drivers/xen/privcmd.c | 77 > > ++++++++++++++++++++++++++++++++++++++++++++---- 1 files changed, > > 70 insertions(+), 7 deletions(-) > > > > diff --git a/drivers/xen/privcmd.c b/drivers/xen/privcmd.c > > index ccee0f1..195d89f 100644 > > - st->vma->vm_page_prot, > > st->domain) < 0) { > > + if (xen_remap_domain_mfn_range(vma, st->va & PAGE_MASK, > > *mfnp, 1, > > + vma->vm_page_prot, > > st->domain, > > + pvhp) < 0) { > > *mfnp |= 0xf0000000U; > > st->err++; > > } > > I don''t like that a parameter like "xen_pvh_pfn_info" has been added > to a generic arch-agnostic function like xen_remap_domain_mfn_range. > > If you need to pass more parameters to xen_remap_domain_mfn_range, it > should be done in a cross-architecture way. In fact, keep in mind that > privcmd.c compiles on ARM (and IA64?) as well. > > I think that in this particular case you are using pvh to actually > specify auto_translate_physmap, am I correct? > Maybe we just need to rename xen_pvh_pfn_info to xen_xlat_pfn_info.Ok, I renamed it. And for the API, as I mentioned in prev email, I changed xen_remap_domain_mfn_range to have last parameter be called void *arch_specific_info.> > > @@ -274,6 +284,40 @@ static int mmap_return_errors(void *data, void > > + return -ENOMEM; > > + } > > + > > + rc = alloc_xenballooned_pages(numpgs, pvhp->pi_paga, 0); > > + if (rc != 0) { > > + pr_warn("%s Could not alloc %d pfns rc:%d\n", > > __FUNCTION__, > > + numpgs, rc); > > + kfree(pvhp->pi_paga); > > + kfree(pvhp); > > + return -ENOMEM; > > + } > > + pvhp->pi_num_pgs = numpgs; > > + BUG_ON(vma->vm_private_data != (void *)1); > > what?See privcmd_enforce_singleshot_mapping().> > > > + if (xen_pv_domain() && > > xen_feature(XENFEAT_auto_translated_physmap)) { > > + if ((ret=pvh_privcmd_resv_pfns(vma, m.num))) { > > I would change this into: > > if ((ret = pvh_privcmd_resv_pfns(vma, m.num)) < 0) { >OK.> > + count = xen_unmap_domain_mfn_range(vma, pvhp); > > + while (count--) > > + free_xenballooned_pages(1, &pvhp->pi_paga[count]); > > + kfree(pvhp->pi_paga); > > + kfree(pvhp); > > So xen_remap_domain_mfn_range adds the mappings to the vma, while > xen_unmap_domain_mfn_range does not remove them. I take that somehow > this is done by the generic Linux code that calls into this function?Correct. The kernel MMU seems to do all cleanup before calling privcmd_close.
On Tue, Sep 25, 2012 at 06:28:48PM -0700, Mukesh Rathor wrote:> On Mon, 24 Sep 2012 15:24:16 +0100 > Stefano Stabellini <stefano.stabellini@eu.citrix.com> wrote: > > > On Fri, 21 Sep 2012, Mukesh Rathor wrote: > > > --- > > > drivers/xen/privcmd.c | 77 > > > ++++++++++++++++++++++++++++++++++++++++++++---- 1 files changed, > > > 70 insertions(+), 7 deletions(-) > > > > > > diff --git a/drivers/xen/privcmd.c b/drivers/xen/privcmd.c > > > index ccee0f1..195d89f 100644 > > > - st->vma->vm_page_prot, > > > st->domain) < 0) { > > > + if (xen_remap_domain_mfn_range(vma, st->va & PAGE_MASK, > > > *mfnp, 1, > > > + vma->vm_page_prot, > > > st->domain, > > > + pvhp) < 0) { > > > *mfnp |= 0xf0000000U; > > > st->err++; > > > } > > > > I don''t like that a parameter like "xen_pvh_pfn_info" has been added > > to a generic arch-agnostic function like xen_remap_domain_mfn_range. > > > > If you need to pass more parameters to xen_remap_domain_mfn_range, it > > should be done in a cross-architecture way. In fact, keep in mind that > > privcmd.c compiles on ARM (and IA64?) as well. > > > > I think that in this particular case you are using pvh to actually > > specify auto_translate_physmap, am I correct? > > Maybe we just need to rename xen_pvh_pfn_info to xen_xlat_pfn_info. > > Ok, I renamed it. And for the API, as I mentioned in prev email, > I changed xen_remap_domain_mfn_range to have last parameter be > called void *arch_specific_info. > > > > > > @@ -274,6 +284,40 @@ static int mmap_return_errors(void *data, void > > > + return -ENOMEM; > > > + } > > > + > > > + rc = alloc_xenballooned_pages(numpgs, pvhp->pi_paga, 0); > > > + if (rc != 0) { > > > + pr_warn("%s Could not alloc %d pfns rc:%d\n", > > > __FUNCTION__, > > > + numpgs, rc); > > > + kfree(pvhp->pi_paga); > > > + kfree(pvhp); > > > + return -ENOMEM; > > > + } > > > + pvhp->pi_num_pgs = numpgs; > > > + BUG_ON(vma->vm_private_data != (void *)1); > > > > what? >Make sure you have a comment explaining it in here. In 3 months neither you or me are going to recall why this is there. Can the 1 become a #define?> See privcmd_enforce_singleshot_mapping(). > > > > > > > + if (xen_pv_domain() && > > > xen_feature(XENFEAT_auto_translated_physmap)) { > > > + if ((ret=pvh_privcmd_resv_pfns(vma, m.num))) { > > > > I would change this into: > > > > if ((ret = pvh_privcmd_resv_pfns(vma, m.num)) < 0) { > > > > OK. > > > > > + count = xen_unmap_domain_mfn_range(vma, pvhp); > > > + while (count--) > > > + free_xenballooned_pages(1, &pvhp->pi_paga[count]); > > > + kfree(pvhp->pi_paga); > > > + kfree(pvhp); > > > > So xen_remap_domain_mfn_range adds the mappings to the vma, while > > xen_unmap_domain_mfn_range does not remove them. I take that somehow > > this is done by the generic Linux code that calls into this function? > > Correct. The kernel MMU seems to do all cleanup before calling > privcmd_close.
On Fri, 21 Sep 2012, Mukesh Rathor wrote:> > --- > drivers/xen/privcmd.c | 77 ++++++++++++++++++++++++++++++++++++++++++++---- > 1 files changed, 70 insertions(+), 7 deletions(-) > > diff --git a/drivers/xen/privcmd.c b/drivers/xen/privcmd.c > index ccee0f1..195d89f 100644 > --- a/drivers/xen/privcmd.c > +++ b/drivers/xen/privcmd.c > @@ -33,6 +33,7 @@ > #include <xen/features.h> > #include <xen/page.h> > #include <xen/xen-ops.h> > +#include <xen/balloon.h> > > #include "privcmd.h" > > @@ -178,7 +179,7 @@ static int mmap_mfn_range(void *data, void *state) > msg->va & PAGE_MASK, > msg->mfn, msg->npages, > vma->vm_page_prot, > - st->domain); > + st->domain, NULL); > if (rc < 0) > return rc; > > @@ -199,6 +200,10 @@ static long privcmd_ioctl_mmap(void __user *udata) > if (!xen_initial_domain()) > return -EPERM; > > + /* PVH: TBD/FIXME. For now we only support privcmd_ioctl_mmap_batch */ > + if (xen_pv_domain() && xen_feature(XENFEAT_auto_translated_physmap)) > + return -ENOSYS; > + > if (copy_from_user(&mmapcmd, udata, sizeof(mmapcmd))) > return -EFAULT; > > @@ -251,13 +256,18 @@ struct mmap_batch_state { > xen_pfn_t __user *user; > }; > > +/* PVH dom0 fyi: if domU being created is PV, then mfn is mfn(addr on bus). If > + * it''s PVH then mfn is pfn (input to HAP). */ > static int mmap_batch_fn(void *data, void *state) > { > xen_pfn_t *mfnp = data; > struct mmap_batch_state *st = state; > + struct vm_area_struct *vma = st->vma; > + struct xen_pvh_pfn_info *pvhp = vma ? vma->vm_private_data : NULL; > > - if (xen_remap_domain_mfn_range(st->vma, st->va & PAGE_MASK, *mfnp, 1, > - st->vma->vm_page_prot, st->domain) < 0) { > + if (xen_remap_domain_mfn_range(vma, st->va & PAGE_MASK, *mfnp, 1, > + vma->vm_page_prot, st->domain, > + pvhp) < 0) { > *mfnp |= 0xf0000000U; > st->err++; > } > @@ -274,6 +284,40 @@ static int mmap_return_errors(void *data, void *state) > return put_user(*mfnp, st->user++); > } > > +/* Allocate pfns that are then mapped with gmfns from foreign domid. Update > + * the vma with the page info to use later. > + * Returns: 0 if success, otherwise -errno > + */ > +static int pvh_privcmd_resv_pfns(struct vm_area_struct *vma, int numpgs) > +{ > + int rc; > + struct xen_pvh_pfn_info *pvhp; > + > + pvhp = kzalloc(sizeof(struct xen_pvh_pfn_info), GFP_KERNEL); > + if (pvhp == NULL) > + return -ENOMEM; > + > + pvhp->pi_paga = kcalloc(numpgs, sizeof(pvhp->pi_paga[0]), GFP_KERNEL); > + if (pvhp->pi_paga == NULL) { > + kfree(pvhp); > + return -ENOMEM; > + } > + > + rc = alloc_xenballooned_pages(numpgs, pvhp->pi_paga, 0); > + if (rc != 0) { > + pr_warn("%s Could not alloc %d pfns rc:%d\n", __FUNCTION__, > + numpgs, rc); > + kfree(pvhp->pi_paga); > + kfree(pvhp); > + return -ENOMEM; > + } > + pvhp->pi_num_pgs = numpgs; > + BUG_ON(vma->vm_private_data != (void *)1); > + vma->vm_private_data = pvhp; > + > + return 0; > +} > + > static struct vm_operations_struct privcmd_vm_ops; > > static long privcmd_ioctl_mmap_batch(void __user *udata) > @@ -315,6 +359,12 @@ static long privcmd_ioctl_mmap_batch(void __user *udata) > goto out; > } > > + if (xen_pv_domain() && xen_feature(XENFEAT_auto_translated_physmap)) { > + if ((ret=pvh_privcmd_resv_pfns(vma, m.num))) {ret = pvh_privcmd_resv_pfns> + up_write(&mm->mmap_sem); > + goto out; > + } > + } > state.domain = m.dom; > state.vma = vma; > state.va = m.addr; > @@ -365,6 +415,22 @@ static long privcmd_ioctl(struct file *file, > return ret; > } > > +static void privcmd_close(struct vm_area_struct *vma) > +{ > + int count; > + struct xen_pvh_pfn_info *pvhp = vma ? vma->vm_private_data : NULL; > + > + if (!xen_pv_domain() || !pvhp || > + !xen_feature(XENFEAT_auto_translated_physmap)) > + return; > + > + count = xen_unmap_domain_mfn_range(vma, pvhp); > + while (count--) > + free_xenballooned_pages(1, &pvhp->pi_paga[count]); > + kfree(pvhp->pi_paga); > + kfree(pvhp); > +} > + > static int privcmd_fault(struct vm_area_struct *vma, struct vm_fault *vmf) > { > printk(KERN_DEBUG "privcmd_fault: vma=%p %lx-%lx, pgoff=%lx, uv=%p\n", > @@ -375,15 +441,12 @@ static int privcmd_fault(struct vm_area_struct *vma, struct vm_fault *vmf) > } > > static struct vm_operations_struct privcmd_vm_ops = { > + .close = privcmd_close, > .fault = privcmd_fault > }; > > static int privcmd_mmap(struct file *file, struct vm_area_struct *vma) > { > - /* Unsupported for auto-translate guests. */ > - if (xen_feature(XENFEAT_auto_translated_physmap)) > - return -ENOSYS; > - > /* DONTCOPY is essential for Xen because copy_page_range doesn''t know > * how to recreate these mappings */ > vma->vm_flags |= VM_RESERVED | VM_IO | VM_DONTCOPY | VM_PFNMAP; > -- > 1.7.2.3 >
On Fri, 2012-09-21 at 20:21 +0100, Mukesh Rathor wrote:> +static int pvh_privcmd_resv_pfns(struct vm_area_struct *vma, int numpgs)...> + pvhp->pi_num_pgs = numpgs; > + BUG_ON(vma->vm_private_data != (void *)1); > + vma->vm_private_data = pvhp;How does this interact with: static int privcmd_enforce_singleshot_mapping(struct vm_area_struct *vma) { return (xchg(&vma->vm_private_data, (void *)1) == NULL); } If someone tries to map a second time then won''t this correct the pvhp in vm_private_data by resetting it to 1? Then when the original mapping is torn down things all fall apart? Perhaps we need a cmpxchg here? Or to rework the callers a little bit perhaps. Ian.
On Wed, 3 Oct 2012 14:21:35 +0100 Ian Campbell <Ian.Campbell@citrix.com> wrote:> On Fri, 2012-09-21 at 20:21 +0100, Mukesh Rathor wrote: > > +static int pvh_privcmd_resv_pfns(struct vm_area_struct *vma, int > > numpgs) > ... > > + pvhp->pi_num_pgs = numpgs; > > + BUG_ON(vma->vm_private_data != (void *)1); > > + vma->vm_private_data = pvhp; > > How does this interact with: > > static int privcmd_enforce_singleshot_mapping(struct vm_area_struct > *vma) { > return (xchg(&vma->vm_private_data, (void *)1) == NULL); > } > > If someone tries to map a second time then won''t this correct the pvhp > in vm_private_data by resetting it to 1? Then when the original > mapping is torn down things all fall apart? > > Perhaps we need a cmpxchg here? Or to rework the callers a little bit > perhaps.Right, that''s why I had it originally checking for auto xlated and doing something different. I think that is better than to change this and change again. I''ll change it back to just putting the ptr here.
On Wed, 2012-10-03 at 23:31 +0100, Mukesh Rathor wrote:> On Wed, 3 Oct 2012 14:21:35 +0100 > Ian Campbell <Ian.Campbell@citrix.com> wrote: > > > On Fri, 2012-09-21 at 20:21 +0100, Mukesh Rathor wrote: > > > +static int pvh_privcmd_resv_pfns(struct vm_area_struct *vma, int > > > numpgs) > > ... > > > + pvhp->pi_num_pgs = numpgs; > > > + BUG_ON(vma->vm_private_data != (void *)1); > > > + vma->vm_private_data = pvhp; > > > > How does this interact with: > > > > static int privcmd_enforce_singleshot_mapping(struct vm_area_struct > > *vma) { > > return (xchg(&vma->vm_private_data, (void *)1) == NULL); > > } > > > > If someone tries to map a second time then won''t this correct the pvhp > > in vm_private_data by resetting it to 1? Then when the original > > mapping is torn down things all fall apart? > > > > Perhaps we need a cmpxchg here? Or to rework the callers a little bit > > perhaps. > > Right, that''s why I had it originally checking for auto xlated and > doing something different. I think that is better than to change this > and change again. I''ll change it back to just putting the ptr here.Won''t that break because on the second call you will pass in the freshly allocated pointer and overwrite the exiting (useful) one with it? I think at a minimum you need a cmpxchg or some higher level locking strategy. None of the existing VM_* flags look suitable for this interlock, but I wonder if the idea of forcing a singleton mapping is generic enough to be worth such a flag? There''s no lock in the struct vm_area_struct, unless one is available via vm_file or something? I''ve no idea what the locking hierarchy looks like around this stuff sadly. Perhaps the right answer is to allocate the struct in privcmd_mmap for all modes and include the mapped flag in that? Ian.
On Thu, 4 Oct 2012 09:50:42 +0100 Ian Campbell <Ian.Campbell@citrix.com> wrote:> On Wed, 2012-10-03 at 23:31 +0100, Mukesh Rathor wrote: > > On Wed, 3 Oct 2012 14:21:35 +0100 > > Ian Campbell <Ian.Campbell@citrix.com> wrote: > > > > > On Fri, 2012-09-21 at 20:21 +0100, Mukesh Rathor wrote: > > > > +static int pvh_privcmd_resv_pfns(struct vm_area_struct *vma, > > > > int numpgs) > > > ... > > > > + pvhp->pi_num_pgs = numpgs; > > > > + BUG_ON(vma->vm_private_data != (void *)1); > > > > + vma->vm_private_data = pvhp; > > > > > > How does this interact with: > > > > > > static int privcmd_enforce_singleshot_mapping(struct > > > vm_area_struct *vma) { > > > return (xchg(&vma->vm_private_data, (void *)1) == NULL); > > > } > > > > > > If someone tries to map a second time then won''t this correct the > > > pvhp in vm_private_data by resetting it to 1? Then when the > > > original mapping is torn down things all fall apart? > > > > > > Perhaps we need a cmpxchg here? Or to rework the callers a little > > > bit perhaps. > > > > Right, that''s why I had it originally checking for auto xlated and > > doing something different. I think that is better than to change > > this and change again. I''ll change it back to just putting the ptr > > here. > > Won''t that break because on the second call you will pass in the > freshly allocated pointer and overwrite the exiting (useful) one with > it?No, for xlate, I just check for NULL. I didn''t think it was big deal to special case xlate in this case. We got so many if xlate cases already thru the code. It leaves the semantics easy to understand: NULL == avail. 1 == locked PV. PTR == Locked PVH. I''ll add a comment this time :). thanks, Mukesh
On Thu, 2012-10-04 at 19:20 +0100, Mukesh Rathor wrote:> On Thu, 4 Oct 2012 09:50:42 +0100 > Ian Campbell <Ian.Campbell@citrix.com> wrote: > > > On Wed, 2012-10-03 at 23:31 +0100, Mukesh Rathor wrote: > > > On Wed, 3 Oct 2012 14:21:35 +0100 > > > Ian Campbell <Ian.Campbell@citrix.com> wrote: > > > > > > > On Fri, 2012-09-21 at 20:21 +0100, Mukesh Rathor wrote: > > > > > +static int pvh_privcmd_resv_pfns(struct vm_area_struct *vma, > > > > > int numpgs) > > > > ... > > > > > + pvhp->pi_num_pgs = numpgs; > > > > > + BUG_ON(vma->vm_private_data != (void *)1); > > > > > + vma->vm_private_data = pvhp; > > > > > > > > How does this interact with: > > > > > > > > static int privcmd_enforce_singleshot_mapping(struct > > > > vm_area_struct *vma) { > > > > return (xchg(&vma->vm_private_data, (void *)1) == NULL); > > > > } > > > > > > > > If someone tries to map a second time then won''t this correct the > > > > pvhp in vm_private_data by resetting it to 1? Then when the > > > > original mapping is torn down things all fall apart? > > > > > > > > Perhaps we need a cmpxchg here? Or to rework the callers a little > > > > bit perhaps. > > > > > > Right, that''s why I had it originally checking for auto xlated and > > > doing something different. I think that is better than to change > > > this and change again. I''ll change it back to just putting the ptr > > > here. > > > > Won''t that break because on the second call you will pass in the > > freshly allocated pointer and overwrite the exiting (useful) one with > > it? > > No, for xlate, I just check for NULL. I didn''t think it was big > deal to special case xlate in this case. We got so many if xlate > cases already thru the code. It leaves the semantics easy to > understand: NULL == avail. 1 == locked PV. PTR == Locked PVH. I''ll add > a comment this time :).The transition from NULL => Locked PVH still needs to be done atomically and without clobbering any existing non-NULL value, otherwise it doesn''t actually protect against multiple mappings like it is supposed to. Ian.
On Fri, 5 Oct 2012 10:21:18 +0100 Ian Campbell <Ian.Campbell@citrix.com> wrote:> On Thu, 2012-10-04 at 19:20 +0100, Mukesh Rathor wrote: > > On Thu, 4 Oct 2012 09:50:42 +0100 > > Ian Campbell <Ian.Campbell@citrix.com> wrote: > > > > > On Wed, 2012-10-03 at 23:31 +0100, Mukesh Rathor wrote: > > > > On Wed, 3 Oct 2012 14:21:35 +0100 > > > > Ian Campbell <Ian.Campbell@citrix.com> wrote:.....> > > > Right, that''s why I had it originally checking for auto xlated > > > > and doing something different. I think that is better than to > > > > change this and change again. I''ll change it back to just > > > > putting the ptr here. > > > > > > Won''t that break because on the second call you will pass in the > > > freshly allocated pointer and overwrite the exiting (useful) one > > > with it? > > > > No, for xlate, I just check for NULL. I didn''t think it was big > > deal to special case xlate in this case. We got so many if xlate > > cases already thru the code. It leaves the semantics easy to > > understand: NULL == avail. 1 == locked PV. PTR == Locked PVH. I''ll > > add a comment this time :). > > The transition from NULL => Locked PVH still needs to be done > atomically and without clobbering any existing non-NULL value, > otherwise it doesn''t actually protect against multiple mappings like > it is supposed to. > > Ian. >yes, of course :).
On Fri, 5 Oct 2012 10:21:18 +0100 Ian Campbell <Ian.Campbell@citrix.com> wrote:> On Thu, 2012-10-04 at 19:20 +0100, Mukesh Rathor wrote: > > On Thu, 4 Oct 2012 09:50:42 +0100 > > Ian Campbell <Ian.Campbell@citrix.com> wrote: > > > > > > > > Won''t that break because on the second call you will pass in the > > > freshly allocated pointer and overwrite the exiting (useful) one > > > with it? > > > > No, for xlate, I just check for NULL. I didn''t think it was big > > deal to special case xlate in this case. We got so many if xlate > > cases already thru the code. It leaves the semantics easy to > > understand: NULL == avail. 1 == locked PV. PTR == Locked PVH. I''ll > > add a comment this time :). > > The transition from NULL => Locked PVH still needs to be done > atomically and without clobbering any existing non-NULL value, > otherwise it doesn''t actually protect against multiple mappings like > it is supposed to.Ok, changed it to, and tested it: static int privcmd_enforce_singleshot_mapping(struct vm_area_struct *vma) { if (xen_feature(XENFEAT_auto_translated_physmap)) { int sz = sizeof(vma->vm_private_data); return (!__cmpxchg(&vma->vm_private_data, NULL, NULL, sz)); } return (xchg(&vma->vm_private_data, (void *)1) == NULL); } Then in pvh_privcmd_resv_pfns(): BUG_ON(vma->vm_private_data); vma->vm_private_data = pvhp; Mukesh
On Fri, 2012-10-05 at 22:22 +0100, Mukesh Rathor wrote:> On Fri, 5 Oct 2012 10:21:18 +0100 > Ian Campbell <Ian.Campbell@citrix.com> wrote: > > > On Thu, 2012-10-04 at 19:20 +0100, Mukesh Rathor wrote: > > > On Thu, 4 Oct 2012 09:50:42 +0100 > > > Ian Campbell <Ian.Campbell@citrix.com> wrote: > > > > > > > > > > > Won''t that break because on the second call you will pass in the > > > > freshly allocated pointer and overwrite the exiting (useful) one > > > > with it? > > > > > > No, for xlate, I just check for NULL. I didn''t think it was big > > > deal to special case xlate in this case. We got so many if xlate > > > cases already thru the code. It leaves the semantics easy to > > > understand: NULL == avail. 1 == locked PV. PTR == Locked PVH. I''ll > > > add a comment this time :). > > > > The transition from NULL => Locked PVH still needs to be done > > atomically and without clobbering any existing non-NULL value, > > otherwise it doesn''t actually protect against multiple mappings like > > it is supposed to.> Ok, changed it to, and tested it: > > static int privcmd_enforce_singleshot_mapping(struct vm_area_struct *vma) > { > if (xen_feature(XENFEAT_auto_translated_physmap)) { > int sz = sizeof(vma->vm_private_data); > return (!__cmpxchg(&vma->vm_private_data, NULL, NULL, sz));Passing NULL for both old and new values can''t be right, can it? Did you test with something which tries to map twice? Also using cmpxchg instead of __cmpxchg includes the sizeof bit for you automatically and IIRC Coding-Style doesn''t like () around return values. So, I think you want: return !cmpxchg(&vma->vm_private_data, NULL, 1); This will set vma->vm_private_data to 1 iff it is currently NULL and returns true success iff the old values was NULL (although you might want to double check my logic on the return value). As Konrad said though using a symbolic constant for the 1 would be a good idea. I''m not sure if the cmpxchg is so expensive to be worth special casing XENFEAT_auto_translated_physmap. It''d probably be fine to just unconditionally use cmpxchg even in the other case, I don''t think this this path is so hot that it would matter.> } > return (xchg(&vma->vm_private_data, (void *)1) == NULL); > } > > Then in pvh_privcmd_resv_pfns(): > > BUG_ON(vma->vm_private_data); > vma->vm_private_data = pvhp;With the above this becomes: BUG_ON(vma->vm_private_data != 1); vma->vm_private_data = pvhp; I previously thought this assignment was unsafe, but privcmd_enforce_singleshot_mapping is always called first and ensures that only one thread ever gets to this part and that v_p_d is always 1 if that happens, so I think it is likely be OK. A comment to that effect would be helpful. Ian.
On Mon, 8 Oct 2012 10:21:42 +0100 Ian Campbell <Ian.Campbell@citrix.com> wrote:> On Fri, 2012-10-05 at 22:22 +0100, Mukesh Rathor wrote: > > On Fri, 5 Oct 2012 10:21:18 +0100 > > Ian Campbell <Ian.Campbell@citrix.com> wrote: > > > > > On Thu, 2012-10-04 at 19:20 +0100, Mukesh Rathor wrote: > > > > On Thu, 4 Oct 2012 09:50:42 +0100 > > > > Ian Campbell <Ian.Campbell@citrix.com> wrote: > > > > > > > > > > > > > > Won''t that break because on the second call you will pass in > > > > > the freshly allocated pointer and overwrite the exiting > > > > > (useful) one with it? > > > > > > > > No, for xlate, I just check for NULL. I didn''t think it was big > > > > deal to special case xlate in this case. We got so many if > > > > xlate cases already thru the code. It leaves the semantics easy > > > > to understand: NULL == avail. 1 == locked PV. PTR == Locked > > > > PVH. I''ll add a comment this time :). > > > > > > The transition from NULL => Locked PVH still needs to be done > > > atomically and without clobbering any existing non-NULL value, > > > otherwise it doesn''t actually protect against multiple mappings > > > like it is supposed to. > > > Ok, changed it to, and tested it: > > > > static int privcmd_enforce_singleshot_mapping(struct vm_area_struct > > *vma) { > > if (xen_feature(XENFEAT_auto_translated_physmap)) { > > int sz = sizeof(vma->vm_private_data); > > return (!__cmpxchg(&vma->vm_private_data, NULL, > > NULL, sz)); > > Passing NULL for both old and new values can''t be right, can it? Did > you test with something which tries to map twice?well, if it''s already set to pointer, then __cmpxchg would leave the ptr alone and return it. The function would then return false which would fail the api. OTOH, if it''s NULL, it would continue and get set later.> Also using cmpxchg instead of __cmpxchg includes the sizeof bit for > you automatically and IIRC Coding-Style doesn''t like () around return > values. > > So, I think you want: > return !cmpxchg(&vma->vm_private_data, NULL, 1);This would work also, and is prob better than to special condition xlated. thanks Mukesh
On Mon, 2012-10-08 at 20:10 +0100, Mukesh Rathor wrote:> On Mon, 8 Oct 2012 10:21:42 +0100 > Ian Campbell <Ian.Campbell@citrix.com> wrote: > > > On Fri, 2012-10-05 at 22:22 +0100, Mukesh Rathor wrote: > > > On Fri, 5 Oct 2012 10:21:18 +0100 > > > Ian Campbell <Ian.Campbell@citrix.com> wrote: > > > > > > > On Thu, 2012-10-04 at 19:20 +0100, Mukesh Rathor wrote: > > > > > On Thu, 4 Oct 2012 09:50:42 +0100 > > > > > Ian Campbell <Ian.Campbell@citrix.com> wrote: > > > > > > > > > > > > > > > > > Won''t that break because on the second call you will pass in > > > > > > the freshly allocated pointer and overwrite the exiting > > > > > > (useful) one with it? > > > > > > > > > > No, for xlate, I just check for NULL. I didn''t think it was big > > > > > deal to special case xlate in this case. We got so many if > > > > > xlate cases already thru the code. It leaves the semantics easy > > > > > to understand: NULL == avail. 1 == locked PV. PTR == Locked > > > > > PVH. I''ll add a comment this time :). > > > > > > > > The transition from NULL => Locked PVH still needs to be done > > > > atomically and without clobbering any existing non-NULL value, > > > > otherwise it doesn''t actually protect against multiple mappings > > > > like it is supposed to. > > > > > Ok, changed it to, and tested it: > > > > > > static int privcmd_enforce_singleshot_mapping(struct vm_area_struct > > > *vma) { > > > if (xen_feature(XENFEAT_auto_translated_physmap)) { > > > int sz = sizeof(vma->vm_private_data); > > > return (!__cmpxchg(&vma->vm_private_data, NULL, > > > NULL, sz)); > > > > Passing NULL for both old and new values can''t be right, can it? Did > > you test with something which tries to map twice? > > well, if it''s already set to pointer, then __cmpxchg would leave the ptr > alone and return it. The function would then return false which would > fail the api. OTOH, if it''s NULL, it would continue and get set later.What happens if a second thread tries to create a mapping while the first is between the cmpxchg and the assignment? i.e. while the value is still NULL. Ian.
On Mon, 8 Oct 2012 20:26:45 +0100 Ian Campbell <Ian.Campbell@citrix.com> wrote:> On Mon, 2012-10-08 at 20:10 +0100, Mukesh Rathor wrote: > > On Mon, 8 Oct 2012 10:21:42 +0100 > > Ian Campbell <Ian.Campbell@citrix.com> wrote: > > > > > On Fri, 2012-10-05 at 22:22 +0100, Mukesh Rathor wrote: > > > > On Fri, 5 Oct 2012 10:21:18 +0100 > > > > Ian Campbell <Ian.Campbell@citrix.com> wrote: > > > > > > > > > On Thu, 2012-10-04 at 19:20 +0100, Mukesh Rathor wrote: > > > > > > On Thu, 4 Oct 2012 09:50:42 +0100 > > > > if (xen_feature(XENFEAT_auto_translated_physmap)) { > > > > int sz = sizeof(vma->vm_private_data); > > > > return (!__cmpxchg(&vma->vm_private_data, NULL, > > > > NULL, sz)); > > > > > > Passing NULL for both old and new values can''t be right, can it? > > > Did you test with something which tries to map twice? > > > > well, if it''s already set to pointer, then __cmpxchg would leave > > the ptr alone and return it. The function would then return false > > which would fail the api. OTOH, if it''s NULL, it would continue and > > get set later. > > What happens if a second thread tries to create a mapping while the > first is between the cmpxchg and the assignment? i.e. while the value > is still NULL.Ah, right, I forgot multi threaded...