In this patch, we define PHYSDEVOP_map_iomem and add support for it. Also, XEN_DOMCTL_memory_mapping code is put into a function so it can be shared later for PVH. No change in XEN_DOMCTL_memory_mapping functionality. Signed-off-by: Mukesh Rathor <mukesh.rathor@oracle.com> diff -r ede1afe68962 -r 93d95f6dd693 xen/arch/x86/domctl.c --- a/xen/arch/x86/domctl.c Fri Jan 11 16:20:38 2013 -0800 +++ b/xen/arch/x86/domctl.c Fri Jan 11 16:22:57 2013 -0800 @@ -46,6 +46,73 @@ static int gdbsx_guest_mem_io( return (iop->remain ? -EFAULT : 0); } +long domctl_memory_mapping(struct domain *d, unsigned long gfn, + unsigned long mfn, unsigned long nr_mfns, + int add_map) +{ + int i; + long ret = -EINVAL; + + if ( (mfn + nr_mfns - 1) < mfn || /* wrap? */ + ((mfn | (mfn + nr_mfns - 1)) >> (paddr_bits - PAGE_SHIFT)) || + (gfn + nr_mfns - 1) < gfn ) /* wrap? */ + return ret; + + ret = -EPERM; + if ( !IS_PRIV(current->domain) && + !iomem_access_permitted(current->domain, mfn, mfn + nr_mfns - 1) ) + return ret; + + ret = xsm_iomem_permission(d, mfn, mfn + nr_mfns - 1, add_map); + if ( ret ) + return ret; + + if ( add_map ) + { + printk(XENLOG_G_INFO + "memory_map:add: dom%d gfn=%lx mfn=%lx nr=%lx\n", + d->domain_id, gfn, mfn, nr_mfns); + + ret = iomem_permit_access(d, mfn, mfn + nr_mfns - 1); + if ( !ret && paging_mode_translate(d) ) + { + for ( i = 0; !ret && i < nr_mfns; i++ ) + if ( !set_mmio_p2m_entry(d, gfn + i, _mfn(mfn + i)) ) + ret = -EIO; + if ( ret ) + { + printk(XENLOG_G_WARNING + "memory_map:fail: dom%d gfn=%lx mfn=%lx\n", + d->domain_id, gfn + i, mfn + i); + while ( i-- ) + clear_mmio_p2m_entry(d, gfn + i); + if ( iomem_deny_access(d, mfn, mfn + nr_mfns - 1) && + IS_PRIV(current->domain) ) + printk(XENLOG_ERR + "memory_map: failed to deny dom%d access to [%lx,%lx]\n", + d->domain_id, mfn, mfn + nr_mfns - 1); + } + } + } else { + printk(XENLOG_G_INFO + "memory_map:remove: dom%d gfn=%lx mfn=%lx nr=%lx\n", + d->domain_id, gfn, mfn, nr_mfns); + + if ( paging_mode_translate(d) ) + for ( i = 0; i < nr_mfns; i++ ) + add_map |= !clear_mmio_p2m_entry(d, gfn + i); + ret = iomem_deny_access(d, mfn, mfn + nr_mfns - 1); + if ( !ret && add_map ) + ret = -EIO; + if ( ret && IS_PRIV(current->domain) ) + printk(XENLOG_ERR + "memory_map: error %ld %s dom%d access to [%lx,%lx]\n", + ret, add_map ? "removing" : "denying", d->domain_id, + mfn, mfn + nr_mfns - 1); + } + return ret; +} + long arch_do_domctl( struct xen_domctl *domctl, XEN_GUEST_HANDLE_PARAM(xen_domctl_t) u_domctl) @@ -825,75 +892,13 @@ long arch_do_domctl( unsigned long mfn = domctl->u.memory_mapping.first_mfn; unsigned long nr_mfns = domctl->u.memory_mapping.nr_mfns; int add = domctl->u.memory_mapping.add_mapping; - unsigned long i; - ret = -EINVAL; - if ( (mfn + nr_mfns - 1) < mfn || /* wrap? */ - ((mfn | (mfn + nr_mfns - 1)) >> (paddr_bits - PAGE_SHIFT)) || - (gfn + nr_mfns - 1) < gfn ) /* wrap? */ - break; - - ret = -EPERM; - if ( !IS_PRIV(current->domain) && - !iomem_access_permitted(current->domain, mfn, mfn + nr_mfns - 1) ) - break; ret = -ESRCH; if ( unlikely((d = rcu_lock_domain_by_id(domctl->domain)) == NULL) ) break; - ret = xsm_iomem_permission(d, mfn, mfn + nr_mfns - 1, add); - if ( ret ) { - rcu_unlock_domain(d); - break; - } - - if ( add ) - { - printk(XENLOG_G_INFO - "memory_map:add: dom%d gfn=%lx mfn=%lx nr=%lx\n", - d->domain_id, gfn, mfn, nr_mfns); - - ret = iomem_permit_access(d, mfn, mfn + nr_mfns - 1); - if ( !ret && paging_mode_translate(d) ) - { - for ( i = 0; !ret && i < nr_mfns; i++ ) - if ( !set_mmio_p2m_entry(d, gfn + i, _mfn(mfn + i)) ) - ret = -EIO; - if ( ret ) - { - printk(XENLOG_G_WARNING - "memory_map:fail: dom%d gfn=%lx mfn=%lx\n", - d->domain_id, gfn + i, mfn + i); - while ( i-- ) - clear_mmio_p2m_entry(d, gfn + i); - if ( iomem_deny_access(d, mfn, mfn + nr_mfns - 1) && - IS_PRIV(current->domain) ) - printk(XENLOG_ERR - "memory_map: failed to deny dom%d access to [%lx,%lx]\n", - d->domain_id, mfn, mfn + nr_mfns - 1); - } - } - } - else - { - printk(XENLOG_G_INFO - "memory_map:remove: dom%d gfn=%lx mfn=%lx nr=%lx\n", - d->domain_id, gfn, mfn, nr_mfns); - - if ( paging_mode_translate(d) ) - for ( i = 0; i < nr_mfns; i++ ) - add |= !clear_mmio_p2m_entry(d, gfn + i); - ret = iomem_deny_access(d, mfn, mfn + nr_mfns - 1); - if ( !ret && add ) - ret = -EIO; - if ( ret && IS_PRIV(current->domain) ) - printk(XENLOG_ERR - "memory_map: error %ld %s dom%d access to [%lx,%lx]\n", - ret, add ? "removing" : "denying", d->domain_id, - mfn, mfn + nr_mfns - 1); - } - + ret = domctl_memory_mapping(d, gfn, mfn, nr_mfns, add); rcu_unlock_domain(d); } break; diff -r ede1afe68962 -r 93d95f6dd693 xen/arch/x86/physdev.c --- a/xen/arch/x86/physdev.c Fri Jan 11 16:20:38 2013 -0800 +++ b/xen/arch/x86/physdev.c Fri Jan 11 16:22:57 2013 -0800 @@ -732,6 +732,24 @@ ret_t do_physdev_op(int cmd, XEN_GUEST_H break; } + case PHYSDEVOP_map_iomem : { + + struct physdev_map_iomem iomem; + struct domain *d = current->domain; + + ret = -EPERM; + + d = rcu_lock_current_domain(); + + ret = -EFAULT; + if ( copy_from_guest(&iomem, arg, 1) != 0 ) + break; + + ret = domctl_memory_mapping(d, iomem.first_gfn, iomem.first_mfn, + iomem.nr_mfns, iomem.add_mapping); + break; + } + default: ret = -ENOSYS; break; diff -r ede1afe68962 -r 93d95f6dd693 xen/include/public/physdev.h --- a/xen/include/public/physdev.h Fri Jan 11 16:20:38 2013 -0800 +++ b/xen/include/public/physdev.h Fri Jan 11 16:22:57 2013 -0800 @@ -330,6 +330,19 @@ struct physdev_dbgp_op { typedef struct physdev_dbgp_op physdev_dbgp_op_t; DEFINE_XEN_GUEST_HANDLE(physdev_dbgp_op_t); + +#define PHYSDEVOP_map_iomem 30 +struct physdev_map_iomem { + /* IN */ + unsigned long first_gfn; + unsigned long first_mfn; + unsigned int nr_mfns; + unsigned int add_mapping; /* 1 == add mapping; 0 == unmap */ + +}; +typedef struct physdev_map_iomem physdev_map_iomem_t; +DEFINE_XEN_GUEST_HANDLE(physdev_map_iomem_t); + /* * Notify that some PIRQ-bound event channels have been unmasked. * ** This command is obsolete since interface version 0x00030202 and is ** diff -r ede1afe68962 -r 93d95f6dd693 xen/include/xen/domain.h --- a/xen/include/xen/domain.h Fri Jan 11 16:20:38 2013 -0800 +++ b/xen/include/xen/domain.h Fri Jan 11 16:22:57 2013 -0800 @@ -86,4 +86,7 @@ extern unsigned int xen_processor_pmbits extern bool_t opt_dom0_vcpus_pin; +extern long domctl_memory_mapping(struct domain *d, unsigned long gfn, + unsigned long mfn, unsigned long nr_mfns, int add_map); + #endif /* __XEN_DOMAIN_H__ */
>>> On 12.01.13 at 02:32, Mukesh Rathor <mukesh.rathor@oracle.com> wrote: > In this patch, we define PHYSDEVOP_map_iomem and add support for > it. Also, XEN_DOMCTL_memory_mapping code is put into a function so it > can be shared later for PVH. No change in XEN_DOMCTL_memory_mapping > functionality.Is that to say that a PVH guest will need to issue this for each and every MMIO range? Irrespective of being DomU or Dom0? I would have expected that this could be transparent... Jan> Signed-off-by: Mukesh Rathor <mukesh.rathor@oracle.com> > > diff -r ede1afe68962 -r 93d95f6dd693 xen/arch/x86/domctl.c > --- a/xen/arch/x86/domctl.c Fri Jan 11 16:20:38 2013 -0800 > +++ b/xen/arch/x86/domctl.c Fri Jan 11 16:22:57 2013 -0800 > @@ -46,6 +46,73 @@ static int gdbsx_guest_mem_io( > return (iop->remain ? -EFAULT : 0); > } > > +long domctl_memory_mapping(struct domain *d, unsigned long gfn, > + unsigned long mfn, unsigned long nr_mfns, > + int add_map) > +{ > + int i; > + long ret = -EINVAL; > + > + if ( (mfn + nr_mfns - 1) < mfn || /* wrap? */ > + ((mfn | (mfn + nr_mfns - 1)) >> (paddr_bits - PAGE_SHIFT)) || > + (gfn + nr_mfns - 1) < gfn ) /* wrap? */ > + return ret; > + > + ret = -EPERM; > + if ( !IS_PRIV(current->domain) && > + !iomem_access_permitted(current->domain, mfn, mfn + nr_mfns - 1) ) > + return ret; > + > + ret = xsm_iomem_permission(d, mfn, mfn + nr_mfns - 1, add_map); > + if ( ret ) > + return ret; > + > + if ( add_map ) > + { > + printk(XENLOG_G_INFO > + "memory_map:add: dom%d gfn=%lx mfn=%lx nr=%lx\n", > + d->domain_id, gfn, mfn, nr_mfns); > + > + ret = iomem_permit_access(d, mfn, mfn + nr_mfns - 1); > + if ( !ret && paging_mode_translate(d) ) > + { > + for ( i = 0; !ret && i < nr_mfns; i++ ) > + if ( !set_mmio_p2m_entry(d, gfn + i, _mfn(mfn + i)) ) > + ret = -EIO; > + if ( ret ) > + { > + printk(XENLOG_G_WARNING > + "memory_map:fail: dom%d gfn=%lx mfn=%lx\n", > + d->domain_id, gfn + i, mfn + i); > + while ( i-- ) > + clear_mmio_p2m_entry(d, gfn + i); > + if ( iomem_deny_access(d, mfn, mfn + nr_mfns - 1) && > + IS_PRIV(current->domain) ) > + printk(XENLOG_ERR > + "memory_map: failed to deny dom%d access to > [%lx,%lx]\n", > + d->domain_id, mfn, mfn + nr_mfns - 1); > + } > + } > + } else { > + printk(XENLOG_G_INFO > + "memory_map:remove: dom%d gfn=%lx mfn=%lx nr=%lx\n", > + d->domain_id, gfn, mfn, nr_mfns); > + > + if ( paging_mode_translate(d) ) > + for ( i = 0; i < nr_mfns; i++ ) > + add_map |= !clear_mmio_p2m_entry(d, gfn + i); > + ret = iomem_deny_access(d, mfn, mfn + nr_mfns - 1); > + if ( !ret && add_map ) > + ret = -EIO; > + if ( ret && IS_PRIV(current->domain) ) > + printk(XENLOG_ERR > + "memory_map: error %ld %s dom%d access to [%lx,%lx]\n", > + ret, add_map ? "removing" : "denying", d->domain_id, > + mfn, mfn + nr_mfns - 1); > + } > + return ret; > +} > + > long arch_do_domctl( > struct xen_domctl *domctl, > XEN_GUEST_HANDLE_PARAM(xen_domctl_t) u_domctl) > @@ -825,75 +892,13 @@ long arch_do_domctl( > unsigned long mfn = domctl->u.memory_mapping.first_mfn; > unsigned long nr_mfns = domctl->u.memory_mapping.nr_mfns; > int add = domctl->u.memory_mapping.add_mapping; > - unsigned long i; > > - ret = -EINVAL; > - if ( (mfn + nr_mfns - 1) < mfn || /* wrap? */ > - ((mfn | (mfn + nr_mfns - 1)) >> (paddr_bits - PAGE_SHIFT)) || > - (gfn + nr_mfns - 1) < gfn ) /* wrap? */ > - break; > - > - ret = -EPERM; > - if ( !IS_PRIV(current->domain) && > - !iomem_access_permitted(current->domain, mfn, mfn + nr_mfns - 1) ) > - break; > > ret = -ESRCH; > if ( unlikely((d = rcu_lock_domain_by_id(domctl->domain)) == NULL) ) > break; > > - ret = xsm_iomem_permission(d, mfn, mfn + nr_mfns - 1, add); > - if ( ret ) { > - rcu_unlock_domain(d); > - break; > - } > - > - if ( add ) > - { > - printk(XENLOG_G_INFO > - "memory_map:add: dom%d gfn=%lx mfn=%lx nr=%lx\n", > - d->domain_id, gfn, mfn, nr_mfns); > - > - ret = iomem_permit_access(d, mfn, mfn + nr_mfns - 1); > - if ( !ret && paging_mode_translate(d) ) > - { > - for ( i = 0; !ret && i < nr_mfns; i++ ) > - if ( !set_mmio_p2m_entry(d, gfn + i, _mfn(mfn + i)) ) > - ret = -EIO; > - if ( ret ) > - { > - printk(XENLOG_G_WARNING > - "memory_map:fail: dom%d gfn=%lx mfn=%lx\n", > - d->domain_id, gfn + i, mfn + i); > - while ( i-- ) > - clear_mmio_p2m_entry(d, gfn + i); > - if ( iomem_deny_access(d, mfn, mfn + nr_mfns - 1) && > - IS_PRIV(current->domain) ) > - printk(XENLOG_ERR > - "memory_map: failed to deny dom%d access to > [%lx,%lx]\n", > - d->domain_id, mfn, mfn + nr_mfns - 1); > - } > - } > - } > - else > - { > - printk(XENLOG_G_INFO > - "memory_map:remove: dom%d gfn=%lx mfn=%lx nr=%lx\n", > - d->domain_id, gfn, mfn, nr_mfns); > - > - if ( paging_mode_translate(d) ) > - for ( i = 0; i < nr_mfns; i++ ) > - add |= !clear_mmio_p2m_entry(d, gfn + i); > - ret = iomem_deny_access(d, mfn, mfn + nr_mfns - 1); > - if ( !ret && add ) > - ret = -EIO; > - if ( ret && IS_PRIV(current->domain) ) > - printk(XENLOG_ERR > - "memory_map: error %ld %s dom%d access to > [%lx,%lx]\n", > - ret, add ? "removing" : "denying", d->domain_id, > - mfn, mfn + nr_mfns - 1); > - } > - > + ret = domctl_memory_mapping(d, gfn, mfn, nr_mfns, add); > rcu_unlock_domain(d); > } > break; > diff -r ede1afe68962 -r 93d95f6dd693 xen/arch/x86/physdev.c > --- a/xen/arch/x86/physdev.c Fri Jan 11 16:20:38 2013 -0800 > +++ b/xen/arch/x86/physdev.c Fri Jan 11 16:22:57 2013 -0800 > @@ -732,6 +732,24 @@ ret_t do_physdev_op(int cmd, XEN_GUEST_H > break; > } > > + case PHYSDEVOP_map_iomem : { > + > + struct physdev_map_iomem iomem; > + struct domain *d = current->domain; > + > + ret = -EPERM; > + > + d = rcu_lock_current_domain(); > + > + ret = -EFAULT; > + if ( copy_from_guest(&iomem, arg, 1) != 0 ) > + break; > + > + ret = domctl_memory_mapping(d, iomem.first_gfn, iomem.first_mfn, > + iomem.nr_mfns, iomem.add_mapping); > + break; > + } > + > default: > ret = -ENOSYS; > break; > diff -r ede1afe68962 -r 93d95f6dd693 xen/include/public/physdev.h > --- a/xen/include/public/physdev.h Fri Jan 11 16:20:38 2013 -0800 > +++ b/xen/include/public/physdev.h Fri Jan 11 16:22:57 2013 -0800 > @@ -330,6 +330,19 @@ struct physdev_dbgp_op { > typedef struct physdev_dbgp_op physdev_dbgp_op_t; > DEFINE_XEN_GUEST_HANDLE(physdev_dbgp_op_t); > > + > +#define PHYSDEVOP_map_iomem 30 > +struct physdev_map_iomem { > + /* IN */ > + unsigned long first_gfn; > + unsigned long first_mfn; > + unsigned int nr_mfns; > + unsigned int add_mapping; /* 1 == add mapping; 0 == unmap */ > + > +}; > +typedef struct physdev_map_iomem physdev_map_iomem_t; > +DEFINE_XEN_GUEST_HANDLE(physdev_map_iomem_t); > + > /* > * Notify that some PIRQ-bound event channels have been unmasked. > * ** This command is obsolete since interface version 0x00030202 and is ** > diff -r ede1afe68962 -r 93d95f6dd693 xen/include/xen/domain.h > --- a/xen/include/xen/domain.h Fri Jan 11 16:20:38 2013 -0800 > +++ b/xen/include/xen/domain.h Fri Jan 11 16:22:57 2013 -0800 > @@ -86,4 +86,7 @@ extern unsigned int xen_processor_pmbits > > extern bool_t opt_dom0_vcpus_pin; > > +extern long domctl_memory_mapping(struct domain *d, unsigned long gfn, > + unsigned long mfn, unsigned long nr_mfns, int add_map); > + > #endif /* __XEN_DOMAIN_H__ */ > > _______________________________________________ > Xen-devel mailing list > Xen-devel@lists.xen.org > http://lists.xen.org/xen-devel
Mukesh Rathor
2013-Jan-15 23:35 UTC
Re: [RFC PATCH 3/16]: PVH xen: Add PHYSDEVOP_map_iomem
On Mon, 14 Jan 2013 11:23:42 +0000 "Jan Beulich" <JBeulich@suse.com> wrote:> >>> On 12.01.13 at 02:32, Mukesh Rathor <mukesh.rathor@oracle.com> > >>> wrote: > > In this patch, we define PHYSDEVOP_map_iomem and add support for > > it. Also, XEN_DOMCTL_memory_mapping code is put into a function so > > it can be shared later for PVH. No change in > > XEN_DOMCTL_memory_mapping functionality. > > Is that to say that a PVH guest will need to issue this for each and > every MMIO range? Irrespective of being DomU or Dom0? I would > have expected that this could be transparent... >Hmm.. we discussed this at xen hackathon last year. The guest maps the entire range in one shot. Doing it this way keeps things flexible for future if EPT size gets to be a problem. Mukesh
>>> On 16.01.13 at 00:35, Mukesh Rathor <mukesh.rathor@oracle.com> wrote: > On Mon, 14 Jan 2013 11:23:42 +0000 "Jan Beulich" <JBeulich@suse.com> wrote: >> >>> On 12.01.13 at 02:32, Mukesh Rathor <mukesh.rathor@oracle.com> >> >>> wrote: >> > In this patch, we define PHYSDEVOP_map_iomem and add support for >> > it. Also, XEN_DOMCTL_memory_mapping code is put into a function so >> > it can be shared later for PVH. No change in >> > XEN_DOMCTL_memory_mapping functionality. >> >> Is that to say that a PVH guest will need to issue this for each and >> every MMIO range? Irrespective of being DomU or Dom0? I would >> have expected that this could be transparent... > > Hmm.. we discussed this at xen hackathon last year. The > guest maps the entire range in one shot. Doing it this way keeps > things flexible for future if EPT size gets to be a problem.But is this the only way to do this? I.e. is there no transparent alternative? I hadn''t been on the hackathon, so I don''t know what eventual alternative options got explored... Jan
Mukesh Rathor
2013-Jan-24 02:12 UTC
Re: [RFC PATCH 3/16]: PVH xen: Add PHYSDEVOP_map_iomem
On Wed, 16 Jan 2013 09:45:07 +0000 "Jan Beulich" <JBeulich@suse.com> wrote:> >>> On 16.01.13 at 00:35, Mukesh Rathor <mukesh.rathor@oracle.com> > >>> wrote: > > On Mon, 14 Jan 2013 11:23:42 +0000 "Jan Beulich" > > <JBeulich@suse.com> wrote: > >> >>> On 12.01.13 at 02:32, Mukesh Rathor <mukesh.rathor@oracle.com> > >> >>> wrote: > >> > In this patch, we define PHYSDEVOP_map_iomem and add support for > >> > it. Also, XEN_DOMCTL_memory_mapping code is put into a function > >> > so it can be shared later for PVH. No change in > >> > XEN_DOMCTL_memory_mapping functionality. > >> > >> Is that to say that a PVH guest will need to issue this for each > >> and every MMIO range? Irrespective of being DomU or Dom0? I would > >> have expected that this could be transparent... > > > > Hmm.. we discussed this at xen hackathon last year. The > > guest maps the entire range in one shot. Doing it this way keeps > > things flexible for future if EPT size gets to be a problem. > > But is this the only way to do this? I.e. is there no transparent > alternative?Like what? If you can explain a bit more, I can try to prototype it. Are you suggesting you don''t want the guest be involved at all in the mmio mappings? BTW, we map the entire range at present walking the e820. thanks, Mukesh
>>> On 24.01.13 at 03:12, Mukesh Rathor <mukesh.rathor@oracle.com> wrote: > On Wed, 16 Jan 2013 09:45:07 +0000 > "Jan Beulich" <JBeulich@suse.com> wrote: > >> >>> On 16.01.13 at 00:35, Mukesh Rathor <mukesh.rathor@oracle.com> >> >>> wrote: >> > On Mon, 14 Jan 2013 11:23:42 +0000 "Jan Beulich" >> > <JBeulich@suse.com> wrote: >> >> >>> On 12.01.13 at 02:32, Mukesh Rathor <mukesh.rathor@oracle.com> >> >> >>> wrote: >> >> > In this patch, we define PHYSDEVOP_map_iomem and add support for >> >> > it. Also, XEN_DOMCTL_memory_mapping code is put into a function >> >> > so it can be shared later for PVH. No change in >> >> > XEN_DOMCTL_memory_mapping functionality. >> >> >> >> Is that to say that a PVH guest will need to issue this for each >> >> and every MMIO range? Irrespective of being DomU or Dom0? I would >> >> have expected that this could be transparent... >> > >> > Hmm.. we discussed this at xen hackathon last year. The >> > guest maps the entire range in one shot. Doing it this way keeps >> > things flexible for future if EPT size gets to be a problem. >> >> But is this the only way to do this? I.e. is there no transparent >> alternative? > > Like what? If you can explain a bit more, I can try to prototype it. > Are you suggesting you don''t want the guest be involved at all in the > mmio mappings? BTW, we map the entire range at present walking the > e820.If you map the entire range anyway, I see even less reason for Dom0 to do that - the hypervisor could launch Dom0 with all the ranges mapped then. Jan
At 17:32 -0800 on 11 Jan (1357925563), Mukesh Rathor wrote:> In this patch, we define PHYSDEVOP_map_iomem and add support for > it. Also, XEN_DOMCTL_memory_mapping code is put into a function so it > can be shared later for PVH. No change in XEN_DOMCTL_memory_mapping > functionality. > > Signed-off-by: Mukesh Rathor <mukesh.rathor@oracle.com> > > diff -r ede1afe68962 -r 93d95f6dd693 xen/arch/x86/domctl.c > diff -r ede1afe68962 -r 93d95f6dd693 xen/include/public/physdev.h > --- a/xen/include/public/physdev.h Fri Jan 11 16:20:38 2013 -0800 > +++ b/xen/include/public/physdev.h Fri Jan 11 16:22:57 2013 -0800 > @@ -330,6 +330,19 @@ struct physdev_dbgp_op { > typedef struct physdev_dbgp_op physdev_dbgp_op_t; > DEFINE_XEN_GUEST_HANDLE(physdev_dbgp_op_t); > > + > +#define PHYSDEVOP_map_iomem 30 > +struct physdev_map_iomem { > + /* IN */ > + unsigned long first_gfn; > + unsigned long first_mfn; > + unsigned int nr_mfns; > + unsigned int add_mapping; /* 1 == add mapping; 0 == unmap */ > + > +}; > +typedef struct physdev_map_iomem physdev_map_iomem_t; > +DEFINE_XEN_GUEST_HANDLE(physdev_map_iomem_t); > +This needs documentation. Also, the arguemnts should be explicitly sized to avoid compat difficulties. Tim.
Mukesh Rathor
2013-Jan-25 01:03 UTC
Re: [RFC PATCH 3/16]: PVH xen: Add PHYSDEVOP_map_iomem
On Thu, 24 Jan 2013 15:06:29 +0000 Tim Deegan <tim@xen.org> wrote:> At 17:32 -0800 on 11 Jan (1357925563), Mukesh Rathor wrote: >DEFINE_XEN_GUEST_HANDLE(physdev_dbgp_op_t); > > > > + > > +#define PHYSDEVOP_map_iomem 30 > > +struct physdev_map_iomem { > > + /* IN */ > > + unsigned long first_gfn; > > + unsigned long first_mfn; > > + unsigned int nr_mfns; > > + unsigned int add_mapping; /* 1 == add mapping; 0 => > unmap */ + > > +}; > > +typedef struct physdev_map_iomem physdev_map_iomem_t; > > +DEFINE_XEN_GUEST_HANDLE(physdev_map_iomem_t); > > + > > This needs documentation. Also, the arguemnts should be explicitly > sized to avoid compat difficulties. > > Tim.Done: /* Map given gfns to mfns where mfns are part of IO space. */ #define PHYSDEVOP_map_iomem 30 struct physdev_map_iomem { /* IN */ uint64_t first_gfn; uint64_t first_mfn; uint32_t nr_mfns; uint32_t add_mapping; /* 1 == add mapping; 0 == unmap */ }; thanks, Mukesh
Mukesh Rathor
2013-Jan-25 01:53 UTC
Re: [RFC PATCH 3/16]: PVH xen: Add PHYSDEVOP_map_iomem
On Thu, 24 Jan 2013 09:23:33 +0000 "Jan Beulich" <JBeulich@suse.com> wrote:> >>> On 24.01.13 at 03:12, Mukesh Rathor <mukesh.rathor@oracle.com> > >>> wrote: > > On Wed, 16 Jan 2013 09:45:07 +0000 > > "Jan Beulich" <JBeulich@suse.com> wrote: > > > >> >>> On 16.01.13 at 00:35, Mukesh Rathor <mukesh.rathor@oracle.com> > >> >>> wrote: > >> > On Mon, 14 Jan 2013 11:23:42 +0000 "Jan Beulich" > >> that to say that a PVH guest will need to issue this for each > >> >> and every MMIO range? Irrespective of being DomU or Dom0? I > >> >> would have expected that this could be transparent... > >> > > >> > Hmm.. we discussed this at xen hackathon last year. The > >> > guest maps the entire range in one shot. Doing it this way keeps > >> > things flexible for future if EPT size gets to be a problem. > >> > >> But is this the only way to do this? I.e. is there no transparent > >> alternative? > > > > Like what? If you can explain a bit more, I can try to prototype it. > > Are you suggesting you don''t want the guest be involved at all in > > the mmio mappings? BTW, we map the entire range at present walking > > the e820. > > If you map the entire range anyway, I see even less reason for > Dom0 to do that - the hypervisor could launch Dom0 with all the > ranges mapped then.True. A brief background: initially I was only mapping selective ranges by hooking into pte updates by drivers, etc.. in linux. At that point ACPI table was giving a bit of problem (I later got that working), so after discussing with Ian and Stefano we decided to map the entire range initially, but later if EPT size gets to be an issue (specially on large NUMA) boxes, we could go back to selective mapping of ranges. Since this happens once during boot, I am ok either way. Staying with what I''ve keeps linux code clean and also provides flexibily for future in case. But if you feel strongly, I can special case dom0 in linux to assume xen has it all mapped, and generate a patch there. Please lmk. thanks, Mukesh
>>> On 25.01.13 at 02:53, Mukesh Rathor <mukesh.rathor@oracle.com> wrote: > Since this happens once during boot, I am ok either way. Staying with > what I''ve keeps linux code clean and also provides flexibily for future > in case. But if you feel strongly, I can special case dom0 in linux > to assume xen has it all mapped, and generate a patch there. Please lmk.Hmm, special casing Dom0 isn''t what I''m after. I want this to be transparent to the kernel in all cases (keeps the Linux code even cleaner). Jan
Mukesh Rathor
2013-Jan-26 02:23 UTC
Re: [RFC PATCH 3/16]: PVH xen: Add PHYSDEVOP_map_iomem
On Fri, 25 Jan 2013 08:05:40 +0000 "Jan Beulich" <JBeulich@suse.com> wrote:> >>> On 25.01.13 at 02:53, Mukesh Rathor <mukesh.rathor@oracle.com> > >>> wrote: > > Since this happens once during boot, I am ok either way. Staying > > with what I''ve keeps linux code clean and also provides flexibily > > for future in case. But if you feel strongly, I can special case > > dom0 in linux to assume xen has it all mapped, and generate a patch > > there. Please lmk. > > Hmm, special casing Dom0 isn''t what I''m after. I want this to be > transparent to the kernel in all cases (keeps the Linux code even > cleaner).Ok. I am looking into it. Stefano, Ian, any comments? You guys OK with that approach? I probably won''t need PHYSDEVOP_map_iomem then. thanks, Mukesh
Mukesh Rathor
2013-Jan-26 03:04 UTC
Re: [RFC PATCH 3/16]: PVH xen: Add PHYSDEVOP_map_iomem
On Fri, 25 Jan 2013 18:23:49 -0800 Mukesh Rathor <mukesh.rathor@oracle.com> wrote:> On Fri, 25 Jan 2013 08:05:40 +0000 > "Jan Beulich" <JBeulich@suse.com> wrote: > > > >>> On 25.01.13 at 02:53, Mukesh Rathor <mukesh.rathor@oracle.com> > > >>> wrote: > > > Since this happens once during boot, I am ok either way. Staying > > > with what I''ve keeps linux code clean and also provides flexibily > > > for future in case. But if you feel strongly, I can special case > > > dom0 in linux to assume xen has it all mapped, and generate a > > > patch there. Please lmk. > > > > Hmm, special casing Dom0 isn''t what I''m after. I want this to be > > transparent to the kernel in all cases (keeps the Linux code even > > cleaner). > > Ok. I am looking into it. Stefano, Ian, any comments? You guys OK with > that approach? I probably won''t need PHYSDEVOP_map_iomem then.Forgot to cc stefano and Ian. Resending CCing them.
On Sat, 2013-01-26 at 02:23 +0000, Mukesh Rathor wrote:> On Fri, 25 Jan 2013 08:05:40 +0000 > "Jan Beulich" <JBeulich@suse.com> wrote: > > > >>> On 25.01.13 at 02:53, Mukesh Rathor <mukesh.rathor@oracle.com> > > >>> wrote: > > > Since this happens once during boot, I am ok either way. Staying > > > with what I''ve keeps linux code clean and also provides flexibily > > > for future in case. But if you feel strongly, I can special case > > > dom0 in linux to assume xen has it all mapped, and generate a patch > > > there. Please lmk. > > > > Hmm, special casing Dom0 isn''t what I''m after. I want this to be > > transparent to the kernel in all cases (keeps the Linux code even > > cleaner). > > Ok. I am looking into it."it" here is mapping the MMIO regions in the domain builder instead of have the kernel do it? Sounds good to me. Ian.
Stefano Stabellini
2013-Jan-28 15:13 UTC
Re: [RFC PATCH 3/16]: PVH xen: Add PHYSDEVOP_map_iomem
On Sat, 26 Jan 2013, Mukesh Rathor wrote:> On Fri, 25 Jan 2013 18:23:49 -0800 > Mukesh Rathor <mukesh.rathor@oracle.com> wrote: > > > On Fri, 25 Jan 2013 08:05:40 +0000 > > "Jan Beulich" <JBeulich@suse.com> wrote: > > > > > >>> On 25.01.13 at 02:53, Mukesh Rathor <mukesh.rathor@oracle.com> > > > >>> wrote: > > > > Since this happens once during boot, I am ok either way. Staying > > > > with what I''ve keeps linux code clean and also provides flexibily > > > > for future in case. But if you feel strongly, I can special case > > > > dom0 in linux to assume xen has it all mapped, and generate a > > > > patch there. Please lmk. > > > > > > Hmm, special casing Dom0 isn''t what I''m after. I want this to be > > > transparent to the kernel in all cases (keeps the Linux code even > > > cleaner). > > > > Ok. I am looking into it. Stefano, Ian, any comments? You guys OK with > > that approach? I probably won''t need PHYSDEVOP_map_iomem then. > > Forgot to cc stefano and Ian. Resending CCing them.Yeah, it looks like a reasonable approach.
Konrad Rzeszutek Wilk
2013-Jan-28 16:39 UTC
Re: [RFC PATCH 3/16]: PVH xen: Add PHYSDEVOP_map_iomem
On Thu, Jan 24, 2013 at 05:03:50PM -0800, Mukesh Rathor wrote:> On Thu, 24 Jan 2013 15:06:29 +0000 > Tim Deegan <tim@xen.org> wrote: > > > At 17:32 -0800 on 11 Jan (1357925563), Mukesh Rathor wrote: > >DEFINE_XEN_GUEST_HANDLE(physdev_dbgp_op_t); > > > > > > + > > > +#define PHYSDEVOP_map_iomem 30 > > > +struct physdev_map_iomem { > > > + /* IN */ > > > + unsigned long first_gfn; > > > + unsigned long first_mfn; > > > + unsigned int nr_mfns; > > > + unsigned int add_mapping; /* 1 == add mapping; 0 => > > unmap */ + > > > +}; > > > +typedef struct physdev_map_iomem physdev_map_iomem_t; > > > +DEFINE_XEN_GUEST_HANDLE(physdev_map_iomem_t); > > > + > > > > This needs documentation. Also, the arguemnts should be explicitly > > sized to avoid compat difficulties. > > > > Tim. > > Done: > > /* Map given gfns to mfns where mfns are part of IO space. */ > #define PHYSDEVOP_map_iomem 30 > struct physdev_map_iomem { > /* IN */ > uint64_t first_gfn; > uint64_t first_mfn; > uint32_t nr_mfns; > uint32_t add_mapping; /* 1 == add mapping; 0 == unmap */ > > };Which is BTW what the Linux tree already has. Perhaps the ''add_mapping'' should be just called ''flags'' and have two #defines? It is also has a bit of an issue when you use __packed__ - that is it will shrink from 32 bytes down to 24 bytes. Perhaps we should make this hypercall be: uint64_t first_gfn uint64_t first_mfn; uint32_t nr_mfns; uint32_t flags; uint64_t _pad; and then it is a nice 32 bytes long?> > > thanks, > Mukesh > > _______________________________________________ > Xen-devel mailing list > Xen-devel@lists.xen.org > http://lists.xen.org/xen-devel >
Konrad Rzeszutek Wilk
2013-Jan-28 16:43 UTC
Re: [RFC PATCH 3/16]: PVH xen: Add PHYSDEVOP_map_iomem
On Mon, Jan 28, 2013 at 03:13:48PM +0000, Stefano Stabellini wrote:> On Sat, 26 Jan 2013, Mukesh Rathor wrote: > > On Fri, 25 Jan 2013 18:23:49 -0800 > > Mukesh Rathor <mukesh.rathor@oracle.com> wrote: > > > > > On Fri, 25 Jan 2013 08:05:40 +0000 > > > "Jan Beulich" <JBeulich@suse.com> wrote: > > > > > > > >>> On 25.01.13 at 02:53, Mukesh Rathor <mukesh.rathor@oracle.com> > > > > >>> wrote: > > > > > Since this happens once during boot, I am ok either way. Staying > > > > > with what I''ve keeps linux code clean and also provides flexibily > > > > > for future in case. But if you feel strongly, I can special case > > > > > dom0 in linux to assume xen has it all mapped, and generate a > > > > > patch there. Please lmk. > > > > > > > > Hmm, special casing Dom0 isn''t what I''m after. I want this to be > > > > transparent to the kernel in all cases (keeps the Linux code even > > > > cleaner). > > > > > > Ok. I am looking into it. Stefano, Ian, any comments? You guys OK with > > > that approach? I probably won''t need PHYSDEVOP_map_iomem then. > > > > Forgot to cc stefano and Ian. Resending CCing them. > > Yeah, it looks like a reasonable approach.That would mean that the PVH domU with PCI devices would have do this as well. Usually this is done in the toolstack - so would this mean that the PHYSDEVOP_map_iomem would be used there? Or would it be just part of the XEN_DOMCTL_iomem_permission?
>>> On 28.01.13 at 17:39, Konrad Rzeszutek Wilk <konrad@kernel.org> wrote: > On Thu, Jan 24, 2013 at 05:03:50PM -0800, Mukesh Rathor wrote: >> On Thu, 24 Jan 2013 15:06:29 +0000 >> Tim Deegan <tim@xen.org> wrote: >> >> > At 17:32 -0800 on 11 Jan (1357925563), Mukesh Rathor wrote: >> >DEFINE_XEN_GUEST_HANDLE(physdev_dbgp_op_t); >> > > >> > > + >> > > +#define PHYSDEVOP_map_iomem 30 >> > > +struct physdev_map_iomem { >> > > + /* IN */ >> > > + unsigned long first_gfn; >> > > + unsigned long first_mfn; >> > > + unsigned int nr_mfns; >> > > + unsigned int add_mapping; /* 1 == add mapping; 0 =>> > > unmap */ + >> > > +}; >> > > +typedef struct physdev_map_iomem physdev_map_iomem_t; >> > > +DEFINE_XEN_GUEST_HANDLE(physdev_map_iomem_t); >> > > + >> > >> > This needs documentation. Also, the arguemnts should be explicitly >> > sized to avoid compat difficulties. >> > >> > Tim. >> >> Done: >> >> /* Map given gfns to mfns where mfns are part of IO space. */ >> #define PHYSDEVOP_map_iomem 30 >> struct physdev_map_iomem { >> /* IN */ >> uint64_t first_gfn; >> uint64_t first_mfn; >> uint32_t nr_mfns; >> uint32_t add_mapping; /* 1 == add mapping; 0 == unmap */ >> >> }; > > Which is BTW what the Linux tree already has. > > Perhaps the ''add_mapping'' should be just called ''flags'' > and have two #defines? > > It is also has a bit of an issue when you use __packed__ - that is it > will shrink from 32 bytes down to 24 bytes.How that? Or really - how would it ever end up being 32 bytes?> Perhaps we should make this > hypercall be: > > uint64_t first_gfn > uint64_t first_mfn; > uint32_t nr_mfns; > uint32_t flags; > uint64_t _pad; > > and then it is a nice 32 bytes long?A trailing 64-bit field seems pretty pointless to me. Jan
>>> On 28.01.13 at 17:43, Konrad Rzeszutek Wilk <konrad@kernel.org> wrote: > On Mon, Jan 28, 2013 at 03:13:48PM +0000, Stefano Stabellini wrote: >> On Sat, 26 Jan 2013, Mukesh Rathor wrote: >> > On Fri, 25 Jan 2013 18:23:49 -0800 >> > Mukesh Rathor <mukesh.rathor@oracle.com> wrote: >> > >> > > On Fri, 25 Jan 2013 08:05:40 +0000 >> > > "Jan Beulich" <JBeulich@suse.com> wrote: >> > > >> > > > >>> On 25.01.13 at 02:53, Mukesh Rathor <mukesh.rathor@oracle.com> >> > > > >>> wrote: >> > > > > Since this happens once during boot, I am ok either way. Staying >> > > > > with what I''ve keeps linux code clean and also provides flexibily >> > > > > for future in case. But if you feel strongly, I can special case >> > > > > dom0 in linux to assume xen has it all mapped, and generate a >> > > > > patch there. Please lmk. >> > > > >> > > > Hmm, special casing Dom0 isn''t what I''m after. I want this to be >> > > > transparent to the kernel in all cases (keeps the Linux code even >> > > > cleaner). >> > > >> > > Ok. I am looking into it. Stefano, Ian, any comments? You guys OK with >> > > that approach? I probably won''t need PHYSDEVOP_map_iomem then. >> > >> > Forgot to cc stefano and Ian. Resending CCing them. >> >> Yeah, it looks like a reasonable approach. > > That would mean that the PVH domU with PCI devices would have do this > as well. Usually this is done in the toolstack - so would this mean that > the PHYSDEVOP_map_iomem would be used there? Or would it be just part > of the XEN_DOMCTL_iomem_permission?I''d expect the latter - completely transparent to the guest. Jan
On Mon, 2013-01-28 at 16:47 +0000, Jan Beulich wrote:> >>> On 28.01.13 at 17:43, Konrad Rzeszutek Wilk <konrad@kernel.org> wrote: > > On Mon, Jan 28, 2013 at 03:13:48PM +0000, Stefano Stabellini wrote: > >> On Sat, 26 Jan 2013, Mukesh Rathor wrote: > >> > On Fri, 25 Jan 2013 18:23:49 -0800 > >> > Mukesh Rathor <mukesh.rathor@oracle.com> wrote: > >> > > >> > > On Fri, 25 Jan 2013 08:05:40 +0000 > >> > > "Jan Beulich" <JBeulich@suse.com> wrote: > >> > > > >> > > > >>> On 25.01.13 at 02:53, Mukesh Rathor <mukesh.rathor@oracle.com> > >> > > > >>> wrote: > >> > > > > Since this happens once during boot, I am ok either way. Staying > >> > > > > with what I''ve keeps linux code clean and also provides flexibily > >> > > > > for future in case. But if you feel strongly, I can special case > >> > > > > dom0 in linux to assume xen has it all mapped, and generate a > >> > > > > patch there. Please lmk. > >> > > > > >> > > > Hmm, special casing Dom0 isn''t what I''m after. I want this to be > >> > > > transparent to the kernel in all cases (keeps the Linux code even > >> > > > cleaner). > >> > > > >> > > Ok. I am looking into it. Stefano, Ian, any comments? You guys OK with > >> > > that approach? I probably won''t need PHYSDEVOP_map_iomem then. > >> > > >> > Forgot to cc stefano and Ian. Resending CCing them. > >> > >> Yeah, it looks like a reasonable approach. > > > > That would mean that the PVH domU with PCI devices would have do this > > as well. Usually this is done in the toolstack - so would this mean that > > the PHYSDEVOP_map_iomem would be used there? Or would it be just part > > of the XEN_DOMCTL_iomem_permission? > > I''d expect the latter - completely transparent to the guest.That''s what I would have said too. Ian.
Konrad Rzeszutek Wilk
2013-Jan-30 21:24 UTC
Re: [RFC PATCH 3/16]: PVH xen: Add PHYSDEVOP_map_iomem
On Mon, Jan 28, 2013 at 04:47:01PM +0000, Jan Beulich wrote:> >>> On 28.01.13 at 17:39, Konrad Rzeszutek Wilk <konrad@kernel.org> wrote: > > On Thu, Jan 24, 2013 at 05:03:50PM -0800, Mukesh Rathor wrote: > >> On Thu, 24 Jan 2013 15:06:29 +0000 > >> Tim Deegan <tim@xen.org> wrote: > >> > >> > At 17:32 -0800 on 11 Jan (1357925563), Mukesh Rathor wrote: > >> >DEFINE_XEN_GUEST_HANDLE(physdev_dbgp_op_t); > >> > > > >> > > + > >> > > +#define PHYSDEVOP_map_iomem 30 > >> > > +struct physdev_map_iomem { > >> > > + /* IN */ > >> > > + unsigned long first_gfn; > >> > > + unsigned long first_mfn; > >> > > + unsigned int nr_mfns; > >> > > + unsigned int add_mapping; /* 1 == add mapping; 0 => >> > > unmap */ + > >> > > +}; > >> > > +typedef struct physdev_map_iomem physdev_map_iomem_t; > >> > > +DEFINE_XEN_GUEST_HANDLE(physdev_map_iomem_t); > >> > > + > >> > > >> > This needs documentation. Also, the arguemnts should be explicitly > >> > sized to avoid compat difficulties. > >> > > >> > Tim. > >> > >> Done: > >> > >> /* Map given gfns to mfns where mfns are part of IO space. */ > >> #define PHYSDEVOP_map_iomem 30 > >> struct physdev_map_iomem { > >> /* IN */ > >> uint64_t first_gfn; > >> uint64_t first_mfn; > >> uint32_t nr_mfns; > >> uint32_t add_mapping; /* 1 == add mapping; 0 == unmap */ > >> > >> }; > > > > Which is BTW what the Linux tree already has. > > > > Perhaps the ''add_mapping'' should be just called ''flags'' > > and have two #defines? > > > > It is also has a bit of an issue when you use __packed__ - that is it > > will shrink from 32 bytes down to 24 bytes. > > How that? Or really - how would it ever end up being 32 bytes?It won''t. The structure will be aligned on the 8 bytes, so it will be 24 bytes. Thanks for noticing my mistake.