David Vrabel
2012-Aug-29 13:15 UTC
[PATCHv2 0/2] xen/privcmd: support for paged-out frames
This series is a straight forward-port of some functionality from classic kernels to support Xen hosts that do paging of guests. This isn''t functionality the XenServer makes use of so I''ve not tested these with paging in use. Changes since v1: - Don''t change PRIVCMD_MMAPBATCH (except to #define a constant for the error). It''s broken and not really fixable sensibly and libxc will use V2 if it is available. - Return -ENOENT if all failures were -ENOENT. - Clear arg->err on success (libxc expected this). I think this should probably get a "Tested-by" Andres or someone else who uses paging before being applied. David
David Vrabel
2012-Aug-29 13:15 UTC
[PATCH 1/2] xen/mm: return more precise error from xen_remap_domain_range()
From: David Vrabel <david.vrabel@citrix.com> Callers of xen_remap_domain_range() need to know if the remap failed because frame is currently paged out. So they can retry the remap later on. Return -ENOENT in this case. This assumes that the error codes returned by Xen are a subset of those used by the kernel. It is unclear if this is defined as part of the hypercall ABI. Signed-off-by: David Vrabel <david.vrabel@citrix.com> --- arch/x86/xen/mmu.c | 4 ++-- 1 files changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/x86/xen/mmu.c b/arch/x86/xen/mmu.c index b65a761..fb187ea 100644 --- a/arch/x86/xen/mmu.c +++ b/arch/x86/xen/mmu.c @@ -2355,8 +2355,8 @@ int xen_remap_domain_mfn_range(struct vm_area_struct *vma, if (err) goto out; - err = -EFAULT; - if (HYPERVISOR_mmu_update(mmu_update, batch, NULL, domid) < 0) + err = HYPERVISOR_mmu_update(mmu_update, batch, NULL, domid); + if (err < 0) goto out; nr -= batch; -- 1.7.2.5
David Vrabel
2012-Aug-29 13:15 UTC
[PATCH 2/2] xen/privcmd: add PRIVCMD_MMAPBATCH_V2 ioctl
From: David Vrabel <david.vrabel@citrix.com> PRIVCMD_MMAPBATCH_V2 extends PRIVCMD_MMAPBATCH with an additional field for reporting the error code for every frame that could not be mapped. libxc prefers PRIVCMD_MMAPBATCH_V2 over PRIVCMD_MMAPBATCH. Signed-off-by: David Vrabel <david.vrabel@citrix.com> --- drivers/xen/privcmd.c | 78 +++++++++++++++++++++++++++++++++++++----------- include/xen/privcmd.h | 21 ++++++++++++- 2 files changed, 80 insertions(+), 19 deletions(-) diff --git a/drivers/xen/privcmd.c b/drivers/xen/privcmd.c index ccee0f1..ddd32cf 100644 --- a/drivers/xen/privcmd.c +++ b/drivers/xen/privcmd.c @@ -248,18 +248,23 @@ struct mmap_batch_state { struct vm_area_struct *vma; int err; - xen_pfn_t __user *user; + xen_pfn_t __user *user_mfn; + int __user *user_err; }; static int mmap_batch_fn(void *data, void *state) { xen_pfn_t *mfnp = data; + int *err = data; struct mmap_batch_state *st = state; + int ret; - if (xen_remap_domain_mfn_range(st->vma, st->va & PAGE_MASK, *mfnp, 1, - st->vma->vm_page_prot, st->domain) < 0) { - *mfnp |= 0xf0000000U; - st->err++; + ret = xen_remap_domain_mfn_range(st->vma, st->va & PAGE_MASK, *mfnp, 1, + st->vma->vm_page_prot, st->domain); + if (ret < 0) { + *err = ret; + if (st->err == 0 || st->err == -ENOENT) + st->err = ret; } st->va += PAGE_SIZE; @@ -268,18 +273,30 @@ static int mmap_batch_fn(void *data, void *state) static int mmap_return_errors(void *data, void *state) { - xen_pfn_t *mfnp = data; + int *err = data; struct mmap_batch_state *st = state; + int ret; + + if (st->user_err) + return __put_user(*err, st->user_err++); + else { + xen_pfn_t mfn; - return put_user(*mfnp, st->user++); + ret = __get_user(mfn, st->user_mfn); + if (ret < 0) + return ret; + + mfn |= PRIVCMD_MMAPBATCH_MFN_ERROR; + return __put_user(mfn, st->user_mfn++); + } } static struct vm_operations_struct privcmd_vm_ops; -static long privcmd_ioctl_mmap_batch(void __user *udata) +static long privcmd_ioctl_mmap_batch(void __user *udata, int version) { int ret; - struct privcmd_mmapbatch m; + struct privcmd_mmapbatch_v2 m; struct mm_struct *mm = current->mm; struct vm_area_struct *vma; unsigned long nr_pages; @@ -289,15 +306,32 @@ static long privcmd_ioctl_mmap_batch(void __user *udata) if (!xen_initial_domain()) return -EPERM; - if (copy_from_user(&m, udata, sizeof(m))) - return -EFAULT; + switch (version) { + case 1: + if (copy_from_user(&m, udata, sizeof(struct privcmd_mmapbatch))) + return -EFAULT; + /* Returns per-frame error in m.arr. */ + m.err = NULL; + if (!access_ok(VERIFY_WRITE, m.arr, m.num * sizeof(*m.arr))) + return -EFAULT; + break; + case 2: + if (copy_from_user(&m, udata, sizeof(struct privcmd_mmapbatch_v2))) + return -EFAULT; + /* Returns per-frame error code in m.err. */ + if (!access_ok(VERIFY_WRITE, m.err, m.num * (sizeof(*m.err)))) + return -EFAULT; + break; + default: + return -EINVAL; + } nr_pages = m.num; if ((m.num <= 0) || (nr_pages > (LONG_MAX >> PAGE_SHIFT))) return -EINVAL; ret = gather_array(&pagelist, m.num, sizeof(xen_pfn_t), - m.arr); + (xen_pfn_t *)m.arr); if (ret || list_empty(&pagelist)) goto out; @@ -325,12 +359,16 @@ static long privcmd_ioctl_mmap_batch(void __user *udata) up_write(&mm->mmap_sem); - if (state.err > 0) { - state.user = m.arr; + if (state.err) { + state.user_mfn = (xen_pfn_t *)m.arr; + state.user_err = m.err; ret = traverse_pages(m.num, sizeof(xen_pfn_t), - &pagelist, - mmap_return_errors, &state); - } + &pagelist, + mmap_return_errors, &state); + if (!ret) + ret = state.err; + } else if (m.err) + __clear_user(m.err, m.num * sizeof(*m.err)); out: free_page_list(&pagelist); @@ -354,7 +392,11 @@ static long privcmd_ioctl(struct file *file, break; case IOCTL_PRIVCMD_MMAPBATCH: - ret = privcmd_ioctl_mmap_batch(udata); + ret = privcmd_ioctl_mmap_batch(udata, 1); + break; + + case IOCTL_PRIVCMD_MMAPBATCH_V2: + ret = privcmd_ioctl_mmap_batch(udata, 2); break; default: diff --git a/include/xen/privcmd.h b/include/xen/privcmd.h index 17857fb..37e5255 100644 --- a/include/xen/privcmd.h +++ b/include/xen/privcmd.h @@ -59,13 +59,30 @@ struct privcmd_mmapbatch { int num; /* number of pages to populate */ domid_t dom; /* target domain */ __u64 addr; /* virtual address */ - xen_pfn_t __user *arr; /* array of mfns - top nibble set on err */ + xen_pfn_t __user *arr; /* array of mfns - or''d with + PRIVCMD_MMAPBATCH_MFN_ERROR on err */ +}; + +#define PRIVCMD_MMAPBATCH_MFN_ERROR 0xf0000000U + +struct privcmd_mmapbatch_v2 { + unsigned int num; /* number of pages to populate */ + domid_t dom; /* target domain */ + __u64 addr; /* virtual address */ + const xen_pfn_t __user *arr; /* array of mfns */ + int __user *err; /* array of error codes */ }; /* * @cmd: IOCTL_PRIVCMD_HYPERCALL * @arg: &privcmd_hypercall_t * Return: Value returned from execution of the specified hypercall. + * + * @cmd: IOCTL_PRIVCMD_MMAPBATCH_V2 + * @arg: &struct privcmd_mmapbatch_v2 + * Return: 0 if all pages were mapped successfully. -ENOENT if all + * failed mappings returned -ENOENT, otherwise the error code of the + * first failed mapping. */ #define IOCTL_PRIVCMD_HYPERCALL \ _IOC(_IOC_NONE, ''P'', 0, sizeof(struct privcmd_hypercall)) @@ -73,5 +90,7 @@ struct privcmd_mmapbatch { _IOC(_IOC_NONE, ''P'', 2, sizeof(struct privcmd_mmap)) #define IOCTL_PRIVCMD_MMAPBATCH \ _IOC(_IOC_NONE, ''P'', 3, sizeof(struct privcmd_mmapbatch)) +#define IOCTL_PRIVCMD_MMAPBATCH_V2 \ + _IOC(_IOC_NONE, ''P'', 4, sizeof(struct privcmd_mmapbatch_v2)) #endif /* __LINUX_PUBLIC_PRIVCMD_H__ */ -- 1.7.2.5
Andres Lagar-Cavilla
2012-Aug-29 16:14 UTC
Re: [PATCH 2/2] xen/privcmd: add PRIVCMD_MMAPBATCH_V2 ioctl
On Aug 29, 2012, at 9:15 AM, David Vrabel wrote:> From: David Vrabel <david.vrabel@citrix.com> > > PRIVCMD_MMAPBATCH_V2 extends PRIVCMD_MMAPBATCH with an additional > field for reporting the error code for every frame that could not be > mapped. libxc prefers PRIVCMD_MMAPBATCH_V2 over PRIVCMD_MMAPBATCH. > > Signed-off-by: David Vrabel <david.vrabel@citrix.com> > --- > drivers/xen/privcmd.c | 78 +++++++++++++++++++++++++++++++++++++----------- > include/xen/privcmd.h | 21 ++++++++++++- > 2 files changed, 80 insertions(+), 19 deletions(-) > > diff --git a/drivers/xen/privcmd.c b/drivers/xen/privcmd.c > index ccee0f1..ddd32cf 100644 > --- a/drivers/xen/privcmd.c > +++ b/drivers/xen/privcmd.c > @@ -248,18 +248,23 @@ struct mmap_batch_state { > struct vm_area_struct *vma; > int err; > > - xen_pfn_t __user *user; > + xen_pfn_t __user *user_mfn; > + int __user *user_err; > }; > > static int mmap_batch_fn(void *data, void *state) > { > xen_pfn_t *mfnp = data; > + int *err = data;Am I missing something or is there an aliasing here? Both mfnp and err point to data?> struct mmap_batch_state *st = state; > + int ret; > > - if (xen_remap_domain_mfn_range(st->vma, st->va & PAGE_MASK, *mfnp, 1, > - st->vma->vm_page_prot, st->domain) < 0) { > - *mfnp |= 0xf0000000U; > - st->err++; > + ret = xen_remap_domain_mfn_range(st->vma, st->va & PAGE_MASK, *mfnp, 1, > + st->vma->vm_page_prot, st->domain); > + if (ret < 0) { > + *err = ret; > + if (st->err == 0 || st->err == -ENOENT) > + st->err = ret;This will unset -ENOENT if a frame after an ENOENT error fails differently.> } > st->va += PAGE_SIZE; > > @@ -268,18 +273,30 @@ static int mmap_batch_fn(void *data, void *state) > > static int mmap_return_errors(void *data, void *state) > { > - xen_pfn_t *mfnp = data; > + int *err = data; > struct mmap_batch_state *st = state; > + int ret; > + > + if (st->user_err) > + return __put_user(*err, st->user_err++); > + else { > + xen_pfn_t mfn; > > - return put_user(*mfnp, st->user++); > + ret = __get_user(mfn, st->user_mfn); > + if (ret < 0) > + return ret; > + > + mfn |= PRIVCMD_MMAPBATCH_MFN_ERROR; > + return __put_user(mfn, st->user_mfn++); > + } > } > > static struct vm_operations_struct privcmd_vm_ops; > > -static long privcmd_ioctl_mmap_batch(void __user *udata) > +static long privcmd_ioctl_mmap_batch(void __user *udata, int version) > { > int ret; > - struct privcmd_mmapbatch m; > + struct privcmd_mmapbatch_v2 m; > struct mm_struct *mm = current->mm; > struct vm_area_struct *vma; > unsigned long nr_pages; > @@ -289,15 +306,32 @@ static long privcmd_ioctl_mmap_batch(void __user *udata) > if (!xen_initial_domain()) > return -EPERM; > > - if (copy_from_user(&m, udata, sizeof(m))) > - return -EFAULT; > + switch (version) { > + case 1: > + if (copy_from_user(&m, udata, sizeof(struct privcmd_mmapbatch))) > + return -EFAULT; > + /* Returns per-frame error in m.arr. */ > + m.err = NULL; > + if (!access_ok(VERIFY_WRITE, m.arr, m.num * sizeof(*m.arr))) > + return -EFAULT; > + break; > + case 2: > + if (copy_from_user(&m, udata, sizeof(struct privcmd_mmapbatch_v2))) > + return -EFAULT; > + /* Returns per-frame error code in m.err. */ > + if (!access_ok(VERIFY_WRITE, m.err, m.num * (sizeof(*m.err)))) > + return -EFAULT; > + break; > + default: > + return -EINVAL; > + } > > nr_pages = m.num; > if ((m.num <= 0) || (nr_pages > (LONG_MAX >> PAGE_SHIFT))) > return -EINVAL; > > ret = gather_array(&pagelist, m.num, sizeof(xen_pfn_t), > - m.arr); > + (xen_pfn_t *)m.arr); > > if (ret || list_empty(&pagelist)) > goto out; > @@ -325,12 +359,16 @@ static long privcmd_ioctl_mmap_batch(void __user *udata) > > up_write(&mm->mmap_sem); > > - if (state.err > 0) { > - state.user = m.arr; > + if (state.err) { > + state.user_mfn = (xen_pfn_t *)m.arr; > + state.user_err = m.err; > ret = traverse_pages(m.num, sizeof(xen_pfn_t), > - &pagelist, > - mmap_return_errors, &state); > - } > + &pagelist, > + mmap_return_errors, &state);The callback now maps data to err (instead of mfnp … but I see no change to the gather_array other than a cast …am I missing something? Thanks Andres> + if (!ret) > + ret = state.err; > + } else if (m.err) > + __clear_user(m.err, m.num * sizeof(*m.err)); > > out: > free_page_list(&pagelist); > @@ -354,7 +392,11 @@ static long privcmd_ioctl(struct file *file, > break; > > case IOCTL_PRIVCMD_MMAPBATCH: > - ret = privcmd_ioctl_mmap_batch(udata); > + ret = privcmd_ioctl_mmap_batch(udata, 1); > + break; > + > + case IOCTL_PRIVCMD_MMAPBATCH_V2: > + ret = privcmd_ioctl_mmap_batch(udata, 2); > break; > > default: > diff --git a/include/xen/privcmd.h b/include/xen/privcmd.h > index 17857fb..37e5255 100644 > --- a/include/xen/privcmd.h > +++ b/include/xen/privcmd.h > @@ -59,13 +59,30 @@ struct privcmd_mmapbatch { > int num; /* number of pages to populate */ > domid_t dom; /* target domain */ > __u64 addr; /* virtual address */ > - xen_pfn_t __user *arr; /* array of mfns - top nibble set on err */ > + xen_pfn_t __user *arr; /* array of mfns - or''d with > + PRIVCMD_MMAPBATCH_MFN_ERROR on err */ > +}; > + > +#define PRIVCMD_MMAPBATCH_MFN_ERROR 0xf0000000U > + > +struct privcmd_mmapbatch_v2 { > + unsigned int num; /* number of pages to populate */ > + domid_t dom; /* target domain */ > + __u64 addr; /* virtual address */ > + const xen_pfn_t __user *arr; /* array of mfns */ > + int __user *err; /* array of error codes */ > }; > > /* > * @cmd: IOCTL_PRIVCMD_HYPERCALL > * @arg: &privcmd_hypercall_t > * Return: Value returned from execution of the specified hypercall. > + * > + * @cmd: IOCTL_PRIVCMD_MMAPBATCH_V2 > + * @arg: &struct privcmd_mmapbatch_v2 > + * Return: 0 if all pages were mapped successfully. -ENOENT if all > + * failed mappings returned -ENOENT, otherwise the error code of the > + * first failed mapping. > */ > #define IOCTL_PRIVCMD_HYPERCALL \ > _IOC(_IOC_NONE, ''P'', 0, sizeof(struct privcmd_hypercall)) > @@ -73,5 +90,7 @@ struct privcmd_mmapbatch { > _IOC(_IOC_NONE, ''P'', 2, sizeof(struct privcmd_mmap)) > #define IOCTL_PRIVCMD_MMAPBATCH \ > _IOC(_IOC_NONE, ''P'', 3, sizeof(struct privcmd_mmapbatch)) > +#define IOCTL_PRIVCMD_MMAPBATCH_V2 \ > + _IOC(_IOC_NONE, ''P'', 4, sizeof(struct privcmd_mmapbatch_v2)) > > #endif /* __LINUX_PUBLIC_PRIVCMD_H__ */ > -- > 1.7.2.5 >
Ian Campbell
2012-Aug-29 16:23 UTC
Re: [PATCHv2 0/2] xen/privcmd: support for paged-out frames
On Wed, 2012-08-29 at 14:15 +0100, David Vrabel wrote:> This series is a straight forward-port of some functionality from > classic kernels to support Xen hosts that do paging of guests. > > This isn''t functionality the XenServer makes use of so I''ve not tested > these with paging in use. > > Changes since v1: > > - Don''t change PRIVCMD_MMAPBATCH (except to #define a constant for the > error). It''s broken and not really fixable sensibly and libxc will > use V2 if it is available. > - Return -ENOENT if all failures were -ENOENT.Is this behaviour a requirement from something? Usually hypercalls of this type return a global error only if something went wrong with the general mechanics of the hypercall (e.g. faults reading arguments etc) and leave reporting of the individual failures of subops to the op specific field, even if all the subops fail in the same way. Ian.> - Clear arg->err on success (libxc expected this). > > I think this should probably get a "Tested-by" Andres or someone else > who uses paging before being applied. > > David > > > _______________________________________________ > Xen-devel mailing list > Xen-devel@lists.xen.org > http://lists.xen.org/xen-devel
David Vrabel
2012-Aug-29 16:36 UTC
Re: [PATCH 2/2] xen/privcmd: add PRIVCMD_MMAPBATCH_V2 ioctl
On 29/08/12 17:14, Andres Lagar-Cavilla wrote:> > On Aug 29, 2012, at 9:15 AM, David Vrabel wrote: > >> From: David Vrabel <david.vrabel@citrix.com> >> >> PRIVCMD_MMAPBATCH_V2 extends PRIVCMD_MMAPBATCH with an additional >> field for reporting the error code for every frame that could not be >> mapped. libxc prefers PRIVCMD_MMAPBATCH_V2 over PRIVCMD_MMAPBATCH.[...]>> diff --git a/drivers/xen/privcmd.c b/drivers/xen/privcmd.c >> index ccee0f1..ddd32cf 100644 >> --- a/drivers/xen/privcmd.c >> +++ b/drivers/xen/privcmd.c >> @@ -248,18 +248,23 @@ struct mmap_batch_state { >> struct vm_area_struct *vma; >> int err; >> >> - xen_pfn_t __user *user; >> + xen_pfn_t __user *user_mfn; >> + int __user *user_err; >> }; >> >> static int mmap_batch_fn(void *data, void *state) >> { >> xen_pfn_t *mfnp = data; >> + int *err = data; > Am I missing something or is there an aliasing here? Both mfnp and err point to data?There is deliberate aliasing here. We use the mfn array to save the error codes for later processing.>> struct mmap_batch_state *st = state; >> + int ret; >> >> - if (xen_remap_domain_mfn_range(st->vma, st->va & PAGE_MASK, *mfnp, 1, >> - st->vma->vm_page_prot, st->domain) < 0) { >> - *mfnp |= 0xf0000000U; >> - st->err++; >> + ret = xen_remap_domain_mfn_range(st->vma, st->va & PAGE_MASK, *mfnp, 1, >> + st->vma->vm_page_prot, st->domain); >> + if (ret < 0) { >> + *err = ret; >> + if (st->err == 0 || st->err == -ENOENT) >> + st->err = ret; > This will unset -ENOENT if a frame after an ENOENT error fails differently.I thought that was what the original implementation did but it seems it does not.>> @@ -325,12 +359,16 @@ static long privcmd_ioctl_mmap_batch(void __user *udata) >> >> up_write(&mm->mmap_sem); >> >> - if (state.err > 0) { >> - state.user = m.arr; >> + if (state.err) { >> + state.user_mfn = (xen_pfn_t *)m.arr; >> + state.user_err = m.err; >> ret = traverse_pages(m.num, sizeof(xen_pfn_t), >> - &pagelist, >> - mmap_return_errors, &state); >> - } >> + &pagelist, >> + mmap_return_errors, &state);> The callback now maps data to err (instead of mfnp … but I see no > change to the gather_array other than a cast …am I missing something?The kernel mfn and the err array are aliased. I could have made gather_array() allow the kernel array to have larger elements than the user array but that looked like too much work. David
David Vrabel
2012-Aug-29 16:56 UTC
Re: [PATCHv2 0/2] xen/privcmd: support for paged-out frames
On 29/08/12 17:23, Ian Campbell wrote:> On Wed, 2012-08-29 at 14:15 +0100, David Vrabel wrote: >> This series is a straight forward-port of some functionality from >> classic kernels to support Xen hosts that do paging of guests. >> >> This isn''t functionality the XenServer makes use of so I''ve not tested >> these with paging in use. >> >> Changes since v1: >> >> - Don''t change PRIVCMD_MMAPBATCH (except to #define a constant for the >> error). It''s broken and not really fixable sensibly and libxc will >> use V2 if it is available. >> - Return -ENOENT if all failures were -ENOENT. > > Is this behaviour a requirement from something?It''s the behaviour libxc is expecting. It doesn''t retry unless errno =ENOENT.> Usually hypercalls of this type return a global error only if something > went wrong with the general mechanics of the hypercall (e.g. faults > reading arguments etc) and leave reporting of the individual failures of > subops to the op specific field, even if all the subops fail in the same > way.I didn''t design this interface... Feel free to propose (and implement) an alternate MMAPBATCH_V3 interface. David
Ian Campbell
2012-Aug-29 18:05 UTC
Re: [PATCHv2 0/2] xen/privcmd: support for paged-out frames
On Wed, 2012-08-29 at 17:56 +0100, David Vrabel wrote:> On 29/08/12 17:23, Ian Campbell wrote: > > On Wed, 2012-08-29 at 14:15 +0100, David Vrabel wrote: > >> This series is a straight forward-port of some functionality from > >> classic kernels to support Xen hosts that do paging of guests. > >> > >> This isn''t functionality the XenServer makes use of so I''ve not tested > >> these with paging in use. > >> > >> Changes since v1: > >> > >> - Don''t change PRIVCMD_MMAPBATCH (except to #define a constant for the > >> error). It''s broken and not really fixable sensibly and libxc will > >> use V2 if it is available. > >> - Return -ENOENT if all failures were -ENOENT. > > > > Is this behaviour a requirement from something? > > It''s the behaviour libxc is expecting. It doesn''t retry unless errno => ENOENT.Surely if that is the case you must return -ENOENT if *any* failure was -ENOENT? That seems to be what the linux-2.6.18-xen.hg implementation does.> > Usually hypercalls of this type return a global error only if something > > went wrong with the general mechanics of the hypercall (e.g. faults > > reading arguments etc) and leave reporting of the individual failures of > > subops to the op specific field, even if all the subops fail in the same > > way. > > I didn''t design this interface...The interface you described doesn''t make any sense so I was trying to suggest a way in which you may have misunderstood the interface you were trying to implement by pointing out the common pattern. It turns out this interface doesn''t follow that common pattern, but it''s still different to what you described AFAICT.> Feel free to propose (and implement) an alternate MMAPBATCH_V3 interface.No thanks ;-) Ian.
Andres Lagar-Cavilla
2012-Aug-29 18:10 UTC
Re: [PATCH 2/2] xen/privcmd: add PRIVCMD_MMAPBATCH_V2 ioctl
On Aug 29, 2012, at 12:36 PM, David Vrabel wrote:> On 29/08/12 17:14, Andres Lagar-Cavilla wrote: >> >> On Aug 29, 2012, at 9:15 AM, David Vrabel wrote: >> >>> From: David Vrabel <david.vrabel@citrix.com> >>> >>> PRIVCMD_MMAPBATCH_V2 extends PRIVCMD_MMAPBATCH with an additional >>> field for reporting the error code for every frame that could not be >>> mapped. libxc prefers PRIVCMD_MMAPBATCH_V2 over PRIVCMD_MMAPBATCH. > [...] >>> diff --git a/drivers/xen/privcmd.c b/drivers/xen/privcmd.c >>> index ccee0f1..ddd32cf 100644 >>> --- a/drivers/xen/privcmd.c >>> +++ b/drivers/xen/privcmd.c >>> @@ -248,18 +248,23 @@ struct mmap_batch_state { >>> struct vm_area_struct *vma; >>> int err; >>> >>> - xen_pfn_t __user *user; >>> + xen_pfn_t __user *user_mfn; >>> + int __user *user_err; >>> }; >>> >>> static int mmap_batch_fn(void *data, void *state) >>> { >>> xen_pfn_t *mfnp = data; >>> + int *err = data; >> Am I missing something or is there an aliasing here? Both mfnp and err point to data? > > There is deliberate aliasing here. We use the mfn array to save the > error codes for later processing.May I suggest a comment to clarify this here? Are xen_pfn_t and int the same size in both bitnesses? The very fact that I raise the question is an argument against this black-magic aliasing. Imho. A explicit union for each slot in the *data, or passing both arrays to the callback looks better to me.> >>> struct mmap_batch_state *st = state; >>> + int ret; >>> >>> - if (xen_remap_domain_mfn_range(st->vma, st->va & PAGE_MASK, *mfnp, 1, >>> - st->vma->vm_page_prot, st->domain) < 0) { >>> - *mfnp |= 0xf0000000U; >>> - st->err++; >>> + ret = xen_remap_domain_mfn_range(st->vma, st->va & PAGE_MASK, *mfnp, 1, >>> + st->vma->vm_page_prot, st->domain); >>> + if (ret < 0) { >>> + *err = ret; >>> + if (st->err == 0 || st->err == -ENOENT) >>> + st->err = ret; >> This will unset -ENOENT if a frame after an ENOENT error fails differently. > > I thought that was what the original implementation did but it seems it > does notI think the best way to do this is: if ((ret == -ENOENT) && (st->err == 0)) st->err = -ENOENT; Then st->err is -ENOENT if at least there was one individual -ENOENT or zero otherwise. Which is the expectation of libxc (barring an EFAULT or some other higher-level whole-operation error). Andres> . > >>> @@ -325,12 +359,16 @@ static long privcmd_ioctl_mmap_batch(void __user *udata) >>> >>> up_write(&mm->mmap_sem); >>> >>> - if (state.err > 0) { >>> - state.user = m.arr; >>> + if (state.err) { >>> + state.user_mfn = (xen_pfn_t *)m.arr; >>> + state.user_err = m.err; >>> ret = traverse_pages(m.num, sizeof(xen_pfn_t), >>> - &pagelist, >>> - mmap_return_errors, &state); >>> - } >>> + &pagelist, >>> + mmap_return_errors, &state); > >> The callback now maps data to err (instead of mfnp … but I see no >> change to the gather_array other than a cast …am I missing something? > > The kernel mfn and the err array are aliased. > > I could have made gather_array() allow the kernel array to have larger > elements than the user array but that looked like too much work. > > David
David Vrabel
2012-Aug-30 10:09 UTC
Re: [PATCHv2 0/2] xen/privcmd: support for paged-out frames
On 29/08/12 19:05, Ian Campbell wrote:> On Wed, 2012-08-29 at 17:56 +0100, David Vrabel wrote: >> On 29/08/12 17:23, Ian Campbell wrote: >>> On Wed, 2012-08-29 at 14:15 +0100, David Vrabel wrote: >>>> This series is a straight forward-port of some functionality from >>>> classic kernels to support Xen hosts that do paging of guests. >>>> >>>> This isn''t functionality the XenServer makes use of so I''ve not tested >>>> these with paging in use. >>>> >>>> Changes since v1: >>>> >>>> - Don''t change PRIVCMD_MMAPBATCH (except to #define a constant for the >>>> error). It''s broken and not really fixable sensibly and libxc will >>>> use V2 if it is available. >>>> - Return -ENOENT if all failures were -ENOENT. >>> >>> Is this behaviour a requirement from something? >> >> It''s the behaviour libxc is expecting. It doesn''t retry unless errno =>> ENOENT. > > Surely if that is the case you must return -ENOENT if *any* failure was > -ENOENT? That seems to be what the linux-2.6.18-xen.hg implementation > does.Yes. libxc will subsequently fail the whole map call is any frame errors without ENOENT so if someone was to propose a V3 it may be useful to return a different error code for other errors.>>> Usually hypercalls of this type return a global error only if something >>> went wrong with the general mechanics of the hypercall (e.g. faults >>> reading arguments etc) and leave reporting of the individual failures of >>> subops to the op specific field, even if all the subops fail in the same >>> way. >> >> I didn''t design this interface... > > The interface you described doesn''t make any sense...I don''t entirely agree. There are three types of result: success, retryable errors, and fatal errors. It''s not nonsensical to return different error codes for these. Regardless, it''s not what the original implementation does so I''ll fix rework the patch. David