Elena Ufimtseva
2013-Sep-13 08:49 UTC
[PATCH RFC v2 1/7] xen/vNUMA: vNUMA support for PV guests.
Defines XENMEM subop hypercall for PV vNUMA
enabled guests and provides vNUMA topology
information from per-domain vnuma topology build info.
Signed-off-by: Elena Ufimtseva <ufimtseva@gmail.com>
---
Changes since v1:
* changed type from int to uint and unsigned in vNUMA structures;
* removed unecessary file vnuma.h as requested in review
* added domain_vnuma_destroy;
* fixed usage of rcu_lock_domain_by_any_id;
* removed unecessary guest_handle_cast calls;
* coding style fixes;
---
xen/common/domain.c | 25 +++++++++++++++-
xen/common/domctl.c | 68 ++++++++++++++++++++++++++++++++++++++++++-
xen/common/memory.c | 56 +++++++++++++++++++++++++++++++++++
xen/include/public/domctl.h | 15 +++++++++-
xen/include/public/memory.h | 9 +++++-
xen/include/xen/domain.h | 11 +++++++
xen/include/xen/sched.h | 1 +
xen/include/xen/vnuma.h | 27 +++++++++++++++++
8 files changed, 208 insertions(+), 4 deletions(-)
create mode 100644 xen/include/xen/vnuma.h
diff --git a/xen/common/domain.c b/xen/common/domain.c
index 9390a22..bb414cf 100644
--- a/xen/common/domain.c
+++ b/xen/common/domain.c
@@ -227,6 +227,7 @@ struct domain *domain_create(
spin_lock_init(&d->node_affinity_lock);
d->node_affinity = NODE_MASK_ALL;
d->auto_node_affinity = 1;
+ d->vnuma.nr_vnodes = 0;
spin_lock_init(&d->shutdown_lock);
d->shutdown_code = -1;
@@ -530,8 +531,9 @@ int domain_kill(struct domain *d)
evtchn_destroy(d);
gnttab_release_mappings(d);
tmem_destroy(d->tmem);
- domain_set_outstanding_pages(d, 0);
d->tmem = NULL;
+ domain_set_outstanding_pages(d, 0);
+ domain_vnuma_destroy(&d->vnuma);
/* fallthrough */
case DOMDYING_dying:
rc = domain_relinquish_resources(d);
@@ -1279,6 +1281,27 @@ int continue_hypercall_on_cpu(
return 0;
}
+void domain_vnuma_destroy(struct domain_vnuma_info *v)
+{
+ if (v->vnuma_memblks) {
+ xfree(v->vnuma_memblks);
+ v->vnuma_memblks = NULL;
+ }
+ if (v->vcpu_to_vnode) {
+ xfree(v->vcpu_to_vnode);
+ v->vcpu_to_vnode = NULL;
+ }
+ if (v->vdistance) {
+ xfree(v->vdistance);
+ v->vdistance = NULL;
+ }
+ if (v->vnode_to_pnode) {
+ xfree(v->vnode_to_pnode);
+ v->vnode_to_pnode = NULL;
+ }
+ v->nr_vnodes = 0;
+}
+
/*
* Local variables:
* mode: C
diff --git a/xen/common/domctl.c b/xen/common/domctl.c
index 9760d50..e5d05c7 100644
--- a/xen/common/domctl.c
+++ b/xen/common/domctl.c
@@ -29,6 +29,7 @@
#include <asm/page.h>
#include <public/domctl.h>
#include <xsm/xsm.h>
+#include <xen/vnuma.h>
static DEFINE_SPINLOCK(domctl_lock);
DEFINE_SPINLOCK(vcpu_alloc_lock);
@@ -862,7 +863,72 @@ long do_domctl(XEN_GUEST_HANDLE_PARAM(xen_domctl_t)
u_domctl)
ret = set_global_virq_handler(d, virq);
}
break;
-
+ case XEN_DOMCTL_setvnumainfo:
+ {
+ unsigned int i, j, nr_vnodes, dist_size;
+ unsigned int dist, vmap, vntop;
+ vnuma_memblk_t vmemblk;
+
+ ret = -EINVAL;
+ dist = i = j = 0;
+ nr_vnodes = op->u.vnuma.nr_vnodes;
+ if (nr_vnodes <= 0 || nr_vnodes > NR_CPUS)
+ break;
+ d->vnuma.nr_vnodes = nr_vnodes;
+ dist_size = nr_vnodes * nr_vnodes;
+ if (
+ (d->vnuma.vdistance = xmalloc_bytes(
+ sizeof(*d->vnuma.vdistance) * dist_size)) == NULL ||
+ (d->vnuma.vnuma_memblks = xmalloc_bytes(
+ sizeof(*d->vnuma.vnuma_memblks) * nr_vnodes)) == NULL ||
+ (d->vnuma.vcpu_to_vnode = xmalloc_bytes(
+ sizeof(*d->vnuma.vcpu_to_vnode) * d->max_vcpus)) == NULL
||
+ (d->vnuma.vnode_to_pnode = xmalloc_bytes(
+ sizeof(*d->vnuma.vnode_to_pnode) * nr_vnodes)) == NULL )
+ goto err_dom;
+ for ( i = 0; i < nr_vnodes; i++ )
+ for ( j = 0; j < nr_vnodes; j++ )
+ {
+ if ( unlikely(__copy_from_guest_offset(&dist,
+ op->u.vnuma.vdistance,
+ __vnode_distance_offset(d, i, j), 1)) )
+ goto err_dom;
+ __vnode_distance_set(d, i, j, dist);
+ }
+ for ( i = 0; i < nr_vnodes; i++ )
+ {
+ if ( unlikely(__copy_from_guest_offset(&vmemblk,
+ op->u.vnuma.vnuma_memblks, i, 1)) )
+ goto err_dom;
+ d->vnuma.vnuma_memblks[i].start = vmemblk.start;
+ d->vnuma.vnuma_memblks[i].end = vmemblk.end;
+ }
+ for ( i = 0; i < d->max_vcpus; i++ )
+ {
+ if ( unlikely(__copy_from_guest_offset(&vmap,
+ op->u.vnuma.vcpu_to_vnode, i, 1)) )
+ goto err_dom;
+ d->vnuma.vcpu_to_vnode[i] = vmap;
+ }
+ if ( !guest_handle_is_null(op->u.vnuma.vnode_to_pnode) )
+ {
+ for ( i = 0; i < nr_vnodes; i++ )
+ {
+ if ( unlikely(__copy_from_guest_offset(&vntop,
+ op->u.vnuma.vnode_to_pnode, i, 1)) )
+ goto err_dom;
+ d->vnuma.vnode_to_pnode[i] = vntop;
+ }
+ }
+ else
+ for(i = 0; i < nr_vnodes; i++)
+ d->vnuma.vnode_to_pnode[i] = NUMA_NO_NODE;
+ ret = 0;
+ break;
+err_dom:
+ ret = -EINVAL;
+ }
+ break;
default:
ret = arch_do_domctl(op, d, u_domctl);
break;
diff --git a/xen/common/memory.c b/xen/common/memory.c
index 50b740f..6dc2452 100644
--- a/xen/common/memory.c
+++ b/xen/common/memory.c
@@ -28,6 +28,7 @@
#include <public/memory.h>
#include <xsm/xsm.h>
#include <xen/trace.h>
+#include <xen/vnuma.h>
struct memop_args {
/* INPUT */
@@ -732,7 +733,62 @@ long do_memory_op(unsigned long cmd,
XEN_GUEST_HANDLE_PARAM(void) arg)
rcu_unlock_domain(d);
break;
+ case XENMEM_get_vnuma_info:
+ {
+ int i, j;
+ struct vnuma_topology_info mtopology;
+ struct vnuma_topology_info touser_topo;
+ struct domain *d;
+ unsigned int max_pages, max_vcpus, nr_vnodes;
+ vnuma_memblk_t *vblks;
+ rc = -EINVAL;
+ if ( guest_handle_is_null(arg) )
+ return rc;
+ if( copy_from_guest(&mtopology, arg, 1) )
+ return -EINVAL;
+ if ( (d = rcu_lock_domain_by_any_id(mtopology.domid)) == NULL )
+ return -EINVAL;
+ touser_topo.nr_vnodes = d->vnuma.nr_vnodes;
+ max_pages = d->max_pages;
+ max_vcpus = d->max_vcpus;
+ rcu_unlock_domain(d);
+
+ nr_vnodes = touser_topo.nr_vnodes;
+ rc = copy_to_guest(arg, &touser_topo, 1);
+ if ( rc )
+ return -EFAULT;
+ if ( nr_vnodes == 0 || nr_vnodes > max_vcpus )
+ return -EFAULT;
+ vblks = xmalloc_array(struct vnuma_memblk, nr_vnodes);
+ if ( vblks == NULL )
+ return -EFAULT;
+ for ( i = 0; i < nr_vnodes; i++ )
+ {
+ if ( copy_to_guest_offset(mtopology.vnuma_memblks, i,
+ &d->vnuma.vnuma_memblks[i], 1) < 0 )
+ goto out;
+ }
+ for ( i = 0; i < touser_topo.nr_vnodes; i++ )
+ for ( j = 0; j < touser_topo.nr_vnodes; j++ )
+ {
+ if ( copy_to_guest_offset(mtopology.vdistance,
+ __vnode_distance_offset(d, i, j),
+ &__vnode_distance(d, i, j), 1)
)
+ goto out;
+ }
+ for ( i = 0; i < d->max_vcpus ; i++ )
+ {
+ if ( copy_to_guest_offset(mtopology.vcpu_to_vnode, i,
+ &d->vnuma.vcpu_to_vnode[i], 1) )
+ goto out;
+ }
+ return rc;
+out:
+ if ( vblks ) xfree(vblks);
+ return rc;
+ break;
+ }
default:
rc = arch_memory_op(op, arg);
break;
diff --git a/xen/include/public/domctl.h b/xen/include/public/domctl.h
index 4c5b2bb..3574d0a 100644
--- a/xen/include/public/domctl.h
+++ b/xen/include/public/domctl.h
@@ -35,6 +35,7 @@
#include "xen.h"
#include "grant_table.h"
#include "hvm/save.h"
+#include "memory.h"
#define XEN_DOMCTL_INTERFACE_VERSION 0x00000009
@@ -852,6 +853,17 @@ struct xen_domctl_set_broken_page_p2m {
typedef struct xen_domctl_set_broken_page_p2m xen_domctl_set_broken_page_p2m_t;
DEFINE_XEN_GUEST_HANDLE(xen_domctl_set_broken_page_p2m_t);
+struct xen_domctl_vnuma {
+ uint16_t nr_vnodes;
+ XEN_GUEST_HANDLE_64(uint) vdistance;
+ XEN_GUEST_HANDLE_64(uint) vcpu_to_vnode;
+ XEN_GUEST_HANDLE_64(uint) vnode_to_pnode;
+ XEN_GUEST_HANDLE_64(vnuma_memblk_t) vnuma_memblks;
+};
+
+typedef struct xen_domctl_vnuma xen_domctl_vnuma_t;
+DEFINE_XEN_GUEST_HANDLE(xen_domctl_vnuma_t);
+
struct xen_domctl {
uint32_t cmd;
#define XEN_DOMCTL_createdomain 1
@@ -920,6 +932,7 @@ struct xen_domctl {
#define XEN_DOMCTL_set_broken_page_p2m 67
#define XEN_DOMCTL_setnodeaffinity 68
#define XEN_DOMCTL_getnodeaffinity 69
+#define XEN_DOMCTL_setvnumainfo 70
#define XEN_DOMCTL_gdbsx_guestmemio 1000
#define XEN_DOMCTL_gdbsx_pausevcpu 1001
#define XEN_DOMCTL_gdbsx_unpausevcpu 1002
@@ -979,6 +992,7 @@ struct xen_domctl {
struct xen_domctl_set_broken_page_p2m set_broken_page_p2m;
struct xen_domctl_gdbsx_pauseunp_vcpu gdbsx_pauseunp_vcpu;
struct xen_domctl_gdbsx_domstatus gdbsx_domstatus;
+ struct xen_domctl_vnuma vnuma;
uint8_t pad[128];
} u;
};
@@ -986,7 +1000,6 @@ typedef struct xen_domctl xen_domctl_t;
DEFINE_XEN_GUEST_HANDLE(xen_domctl_t);
#endif /* __XEN_PUBLIC_DOMCTL_H__ */
-
/*
* Local variables:
* mode: C
diff --git a/xen/include/public/memory.h b/xen/include/public/memory.h
index 7a26dee..28f6aaf 100644
--- a/xen/include/public/memory.h
+++ b/xen/include/public/memory.h
@@ -453,12 +453,19 @@ DEFINE_XEN_GUEST_HANDLE(xen_mem_sharing_op_t);
* Caller must be privileged or the hypercall fails.
*/
#define XENMEM_claim_pages 24
-
/*
* XENMEM_claim_pages flags - the are no flags at this time.
* The zero value is appropiate.
*/
+struct vnuma_memblk {
+ uint64_t start, end;
+};
+typedef struct vnuma_memblk vnuma_memblk_t;
+DEFINE_XEN_GUEST_HANDLE(vnuma_memblk_t);
+
+#define XENMEM_get_vnuma_info 25
+
#endif /* defined(__XEN__) || defined(__XEN_TOOLS__) */
#endif /* __XEN_PUBLIC_MEMORY_H__ */
diff --git a/xen/include/xen/domain.h b/xen/include/xen/domain.h
index a057069..c9d53e3 100644
--- a/xen/include/xen/domain.h
+++ b/xen/include/xen/domain.h
@@ -4,6 +4,7 @@
#include <public/xen.h>
#include <asm/domain.h>
+#include <public/memory.h>
typedef union {
struct vcpu_guest_context *nat;
@@ -89,4 +90,14 @@ extern unsigned int xen_processor_pmbits;
extern bool_t opt_dom0_vcpus_pin;
+struct domain_vnuma_info {
+ uint16_t nr_vnodes;
+ uint *vdistance;
+ uint *vcpu_to_vnode;
+ uint *vnode_to_pnode;
+ vnuma_memblk_t *vnuma_memblks;
+};
+
+void domain_vnuma_destroy(struct domain_vnuma_info *v);
+
#endif /* __XEN_DOMAIN_H__ */
diff --git a/xen/include/xen/sched.h b/xen/include/xen/sched.h
index ae6a3b8..cb023cf 100644
--- a/xen/include/xen/sched.h
+++ b/xen/include/xen/sched.h
@@ -377,6 +377,7 @@ struct domain
nodemask_t node_affinity;
unsigned int last_alloc_node;
spinlock_t node_affinity_lock;
+ struct domain_vnuma_info vnuma;
};
struct domain_setup_info
diff --git a/xen/include/xen/vnuma.h b/xen/include/xen/vnuma.h
new file mode 100644
index 0000000..0b41da0
--- /dev/null
+++ b/xen/include/xen/vnuma.h
@@ -0,0 +1,27 @@
+#ifndef _VNUMA_H
+#define _VNUMA_H
+#include <public/memory.h>
+
+/* DEFINE_XEN_GUEST_HANDLE(vnuma_memblk_t); */
+
+struct vnuma_topology_info {
+ domid_t domid;
+ uint16_t nr_vnodes;
+ uint32_t _pad;
+ XEN_GUEST_HANDLE_64(uint) vdistance;
+ XEN_GUEST_HANDLE_64(uint) vcpu_to_vnode;
+ XEN_GUEST_HANDLE_64(vnuma_memblk_t) vnuma_memblks;
+};
+typedef struct vnuma_topology_info vnuma_topology_info_t;
+DEFINE_XEN_GUEST_HANDLE(vnuma_topology_info_t);
+
+#define __vnode_distance_offset(_dom, _i, _j) \
+ ( ((_j) * ((_dom)->vnuma.nr_vnodes)) + (_i) )
+
+#define __vnode_distance(_dom, _i, _j) \
+ ( (_dom)->vnuma.vdistance[__vnode_distance_offset((_dom), (_i),
(_j))] )
+
+#define __vnode_distance_set(_dom, _i, _j, _v) \
+ do { __vnode_distance((_dom), (_i), (_j)) = (_v); } while(0)
+
+#endif
--
1.7.10.4
Andrew Cooper
2013-Sep-13 09:31 UTC
Re: [PATCH RFC v2 1/7] xen/vNUMA: vNUMA support for PV guests.
On 13/09/2013 09:49, Elena Ufimtseva wrote:> Defines XENMEM subop hypercall for PV vNUMA > enabled guests and provides vNUMA topology > information from per-domain vnuma topology build info. > > Signed-off-by: Elena Ufimtseva <ufimtseva@gmail.com> > > --- > Changes since v1: > * changed type from int to uint and unsigned in vNUMA structures; > * removed unecessary file vnuma.h as requested in review > * added domain_vnuma_destroy; > * fixed usage of rcu_lock_domain_by_any_id; > * removed unecessary guest_handle_cast calls; > * coding style fixes; > --- > xen/common/domain.c | 25 +++++++++++++++- > xen/common/domctl.c | 68 ++++++++++++++++++++++++++++++++++++++++++- > xen/common/memory.c | 56 +++++++++++++++++++++++++++++++++++ > xen/include/public/domctl.h | 15 +++++++++- > xen/include/public/memory.h | 9 +++++- > xen/include/xen/domain.h | 11 +++++++ > xen/include/xen/sched.h | 1 + > xen/include/xen/vnuma.h | 27 +++++++++++++++++ > 8 files changed, 208 insertions(+), 4 deletions(-) > create mode 100644 xen/include/xen/vnuma.h > > diff --git a/xen/common/domain.c b/xen/common/domain.c > index 9390a22..bb414cf 100644 > --- a/xen/common/domain.c > +++ b/xen/common/domain.c > @@ -227,6 +227,7 @@ struct domain *domain_create( > spin_lock_init(&d->node_affinity_lock); > d->node_affinity = NODE_MASK_ALL; > d->auto_node_affinity = 1; > + d->vnuma.nr_vnodes = 0;alloc_domain_struct() clears d, so this assignment of 0 is not needed.> > spin_lock_init(&d->shutdown_lock); > d->shutdown_code = -1; > @@ -530,8 +531,9 @@ int domain_kill(struct domain *d) > evtchn_destroy(d); > gnttab_release_mappings(d); > tmem_destroy(d->tmem); > - domain_set_outstanding_pages(d, 0);Any particular reason for this function call to move? I cant see any reason.> d->tmem = NULL; > + domain_set_outstanding_pages(d, 0); > + domain_vnuma_destroy(&d->vnuma); > /* fallthrough */ > case DOMDYING_dying: > rc = domain_relinquish_resources(d); > @@ -1279,6 +1281,27 @@ int continue_hypercall_on_cpu( > return 0; > } > > +void domain_vnuma_destroy(struct domain_vnuma_info *v) > +{ > + if (v->vnuma_memblks) { > + xfree(v->vnuma_memblks); > + v->vnuma_memblks = NULL; > + }Coding style (see CODING_STYLE in the root of the tree) so this would become: if ( v->vnuma_memblks ) { xfree(v->vnuma_memblks); v->vnuma_memblks = NULL; } However, xfree() (like regular free()) is happy with NULL pointers, so you can drop the if altogether.> + if (v->vcpu_to_vnode) { > + xfree(v->vcpu_to_vnode); > + v->vcpu_to_vnode = NULL; > + } > + if (v->vdistance) { > + xfree(v->vdistance); > + v->vdistance = NULL; > + } > + if (v->vnode_to_pnode) { > + xfree(v->vnode_to_pnode); > + v->vnode_to_pnode = NULL; > + } > + v->nr_vnodes = 0; > +} > + > /* > * Local variables: > * mode: C > diff --git a/xen/common/domctl.c b/xen/common/domctl.c > index 9760d50..e5d05c7 100644 > --- a/xen/common/domctl.c > +++ b/xen/common/domctl.c > @@ -29,6 +29,7 @@ > #include <asm/page.h> > #include <public/domctl.h> > #include <xsm/xsm.h> > +#include <xen/vnuma.h> > > static DEFINE_SPINLOCK(domctl_lock); > DEFINE_SPINLOCK(vcpu_alloc_lock); > @@ -862,7 +863,72 @@ long do_domctl(XEN_GUEST_HANDLE_PARAM(xen_domctl_t) u_domctl) > ret = set_global_virq_handler(d, virq); > } > break; > -Please leave this blank line in here.> + case XEN_DOMCTL_setvnumainfo: > + { > + unsigned int i, j, nr_vnodes, dist_size; > + unsigned int dist, vmap, vntop; > + vnuma_memblk_t vmemblk; > + > + ret = -EINVAL; > + dist = i = j = 0; > + nr_vnodes = op->u.vnuma.nr_vnodes; > + if (nr_vnodes <= 0 || nr_vnodes > NR_CPUS) > + break;nr_vnodes is unsigned. It cannot possibly be less than 0.> + d->vnuma.nr_vnodes = nr_vnodes; > + dist_size = nr_vnodes * nr_vnodes; > + if ( > + (d->vnuma.vdistance = xmalloc_bytes( > + sizeof(*d->vnuma.vdistance) * dist_size)) == NULL ||sizeof in an operator not a function. As you are not passing a type, the brackets are not required.> + (d->vnuma.vnuma_memblks = xmalloc_bytes( > + sizeof(*d->vnuma.vnuma_memblks) * nr_vnodes)) == NULL || > + (d->vnuma.vcpu_to_vnode = xmalloc_bytes( > + sizeof(*d->vnuma.vcpu_to_vnode) * d->max_vcpus)) == NULL || > + (d->vnuma.vnode_to_pnode = xmalloc_bytes( > + sizeof(*d->vnuma.vnode_to_pnode) * nr_vnodes)) == NULL ) > + goto err_dom; > + for ( i = 0; i < nr_vnodes; i++ ) > + for ( j = 0; j < nr_vnodes; j++ ) > + { > + if ( unlikely(__copy_from_guest_offset(&dist, > + op->u.vnuma.vdistance, > + __vnode_distance_offset(d, i, j), 1)) ) > + goto err_dom;This error logic is broken. A failure in __copy_from_guest_offset() should result in -EFAULT, but will result in -EINVAL after following err_dom;> + __vnode_distance_set(d, i, j, dist); > + } > + for ( i = 0; i < nr_vnodes; i++ ) > + { > + if ( unlikely(__copy_from_guest_offset(&vmemblk, > + op->u.vnuma.vnuma_memblks, i, 1)) ) > + goto err_dom; > + d->vnuma.vnuma_memblks[i].start = vmemblk.start; > + d->vnuma.vnuma_memblks[i].end = vmemblk.end; > + } > + for ( i = 0; i < d->max_vcpus; i++ ) > + { > + if ( unlikely(__copy_from_guest_offset(&vmap, > + op->u.vnuma.vcpu_to_vnode, i, 1)) ) > + goto err_dom; > + d->vnuma.vcpu_to_vnode[i] = vmap; > + } > + if ( !guest_handle_is_null(op->u.vnuma.vnode_to_pnode) ) > + { > + for ( i = 0; i < nr_vnodes; i++ ) > + { > + if ( unlikely(__copy_from_guest_offset(&vntop, > + op->u.vnuma.vnode_to_pnode, i, 1)) ) > + goto err_dom; > + d->vnuma.vnode_to_pnode[i] = vntop; > + } > + } > + else > + for(i = 0; i < nr_vnodes; i++) > + d->vnuma.vnode_to_pnode[i] = NUMA_NO_NODE; > + ret = 0; > + break; > +err_dom: > + ret = -EINVAL; > + } > + break;And an extra line in here please.> default: > ret = arch_do_domctl(op, d, u_domctl); > break; > diff --git a/xen/common/memory.c b/xen/common/memory.c > index 50b740f..6dc2452 100644 > --- a/xen/common/memory.c > +++ b/xen/common/memory.c > @@ -28,6 +28,7 @@ > #include <public/memory.h> > #include <xsm/xsm.h> > #include <xen/trace.h> > +#include <xen/vnuma.h> > > struct memop_args { > /* INPUT */ > @@ -732,7 +733,62 @@ long do_memory_op(unsigned long cmd, XEN_GUEST_HANDLE_PARAM(void) arg) > rcu_unlock_domain(d); > > break; > + case XENMEM_get_vnuma_info: > + { > + int i, j; > + struct vnuma_topology_info mtopology; > + struct vnuma_topology_info touser_topo; > + struct domain *d; > + unsigned int max_pages, max_vcpus, nr_vnodes; > + vnuma_memblk_t *vblks; > > + rc = -EINVAL; > + if ( guest_handle_is_null(arg) ) > + return rc; > + if( copy_from_guest(&mtopology, arg, 1) ) > + return -EINVAL;-EFAULT;> + if ( (d = rcu_lock_domain_by_any_id(mtopology.domid)) == NULL ) > + return -EINVAL;-ESRCH ( I think? )> + touser_topo.nr_vnodes = d->vnuma.nr_vnodes; > + max_pages = d->max_pages; > + max_vcpus = d->max_vcpus; > + rcu_unlock_domain(d); > + > + nr_vnodes = touser_topo.nr_vnodes; > + rc = copy_to_guest(arg, &touser_topo, 1); > + if ( rc ) > + return -EFAULT; > + if ( nr_vnodes == 0 || nr_vnodes > max_vcpus ) > + return -EFAULT;-EINVAL;> + vblks = xmalloc_array(struct vnuma_memblk, nr_vnodes); > + if ( vblks == NULL ) > + return -EFAULT;-ENOMEM; Now, if you have an unconditional rc = -EFAULT then all your goto out logic will be correct> + for ( i = 0; i < nr_vnodes; i++ ) > + { > + if ( copy_to_guest_offset(mtopology.vnuma_memblks, i, > + &d->vnuma.vnuma_memblks[i], 1) < 0 ) > + goto out; > + } > + for ( i = 0; i < touser_topo.nr_vnodes; i++ ) > + for ( j = 0; j < touser_topo.nr_vnodes; j++ ) > + { > + if ( copy_to_guest_offset(mtopology.vdistance, > + __vnode_distance_offset(d, i, j), > + &__vnode_distance(d, i, j), 1) ) > + goto out; > + } > + for ( i = 0; i < d->max_vcpus ; i++ ) > + { > + if ( copy_to_guest_offset(mtopology.vcpu_to_vnode, i, > + &d->vnuma.vcpu_to_vnode[i], 1) ) > + goto out; > + } > + return rc;But this would need to turn into rc = 0; to avoid return -EFAULT in the success case, and unconditionally leaking vblks> +out: > + if ( vblks ) xfree(vblks); > + return rc; > + break; > + } > default: > rc = arch_memory_op(op, arg); > break; > diff --git a/xen/include/public/domctl.h b/xen/include/public/domctl.h > index 4c5b2bb..3574d0a 100644 > --- a/xen/include/public/domctl.h > +++ b/xen/include/public/domctl.h > @@ -35,6 +35,7 @@ > #include "xen.h" > #include "grant_table.h" > #include "hvm/save.h" > +#include "memory.h" > > #define XEN_DOMCTL_INTERFACE_VERSION 0x00000009 > > @@ -852,6 +853,17 @@ struct xen_domctl_set_broken_page_p2m { > typedef struct xen_domctl_set_broken_page_p2m xen_domctl_set_broken_page_p2m_t; > DEFINE_XEN_GUEST_HANDLE(xen_domctl_set_broken_page_p2m_t); > > +struct xen_domctl_vnuma { > + uint16_t nr_vnodes; > + XEN_GUEST_HANDLE_64(uint) vdistance; > + XEN_GUEST_HANDLE_64(uint) vcpu_to_vnode; > + XEN_GUEST_HANDLE_64(uint) vnode_to_pnode; > + XEN_GUEST_HANDLE_64(vnuma_memblk_t) vnuma_memblks; > +}; > + > +typedef struct xen_domctl_vnuma xen_domctl_vnuma_t; > +DEFINE_XEN_GUEST_HANDLE(xen_domctl_vnuma_t); > + > struct xen_domctl { > uint32_t cmd; > #define XEN_DOMCTL_createdomain 1 > @@ -920,6 +932,7 @@ struct xen_domctl { > #define XEN_DOMCTL_set_broken_page_p2m 67 > #define XEN_DOMCTL_setnodeaffinity 68 > #define XEN_DOMCTL_getnodeaffinity 69 > +#define XEN_DOMCTL_setvnumainfo 70 > #define XEN_DOMCTL_gdbsx_guestmemio 1000 > #define XEN_DOMCTL_gdbsx_pausevcpu 1001 > #define XEN_DOMCTL_gdbsx_unpausevcpu 1002 > @@ -979,6 +992,7 @@ struct xen_domctl { > struct xen_domctl_set_broken_page_p2m set_broken_page_p2m; > struct xen_domctl_gdbsx_pauseunp_vcpu gdbsx_pauseunp_vcpu; > struct xen_domctl_gdbsx_domstatus gdbsx_domstatus; > + struct xen_domctl_vnuma vnuma; > uint8_t pad[128]; > } u; > }; > @@ -986,7 +1000,6 @@ typedef struct xen_domctl xen_domctl_t; > DEFINE_XEN_GUEST_HANDLE(xen_domctl_t); > > #endif /* __XEN_PUBLIC_DOMCTL_H__ */ > -Spurious whitespace change. Please remove> /* > * Local variables: > * mode: C > diff --git a/xen/include/public/memory.h b/xen/include/public/memory.h > index 7a26dee..28f6aaf 100644 > --- a/xen/include/public/memory.h > +++ b/xen/include/public/memory.h > @@ -453,12 +453,19 @@ DEFINE_XEN_GUEST_HANDLE(xen_mem_sharing_op_t); > * Caller must be privileged or the hypercall fails. > */ > #define XENMEM_claim_pages 24 > -And here> /* > * XENMEM_claim_pages flags - the are no flags at this time. > * The zero value is appropiate. > */ > > +struct vnuma_memblk { > + uint64_t start, end; > +}; > +typedef struct vnuma_memblk vnuma_memblk_t; > +DEFINE_XEN_GUEST_HANDLE(vnuma_memblk_t); > + > +#define XENMEM_get_vnuma_info 25 > + > #endif /* defined(__XEN__) || defined(__XEN_TOOLS__) */ > > #endif /* __XEN_PUBLIC_MEMORY_H__ */ > diff --git a/xen/include/xen/domain.h b/xen/include/xen/domain.h > index a057069..c9d53e3 100644 > --- a/xen/include/xen/domain.h > +++ b/xen/include/xen/domain.h > @@ -4,6 +4,7 @@ > > #include <public/xen.h> > #include <asm/domain.h> > +#include <public/memory.h> > > typedef union { > struct vcpu_guest_context *nat; > @@ -89,4 +90,14 @@ extern unsigned int xen_processor_pmbits; > > extern bool_t opt_dom0_vcpus_pin; > > +struct domain_vnuma_info { > + uint16_t nr_vnodes; > + uint *vdistance; > + uint *vcpu_to_vnode; > + uint *vnode_to_pnode; > + vnuma_memblk_t *vnuma_memblks; > +}; > + > +void domain_vnuma_destroy(struct domain_vnuma_info *v); > + > #endif /* __XEN_DOMAIN_H__ */ > diff --git a/xen/include/xen/sched.h b/xen/include/xen/sched.h > index ae6a3b8..cb023cf 100644 > --- a/xen/include/xen/sched.h > +++ b/xen/include/xen/sched.h > @@ -377,6 +377,7 @@ struct domain > nodemask_t node_affinity; > unsigned int last_alloc_node; > spinlock_t node_affinity_lock; > + struct domain_vnuma_info vnuma; > }; > > struct domain_setup_info > diff --git a/xen/include/xen/vnuma.h b/xen/include/xen/vnuma.h > new file mode 100644 > index 0000000..0b41da0 > --- /dev/null > +++ b/xen/include/xen/vnuma.h > @@ -0,0 +1,27 @@ > +#ifndef _VNUMA_H > +#define _VNUMA_H > +#include <public/memory.h> > + > +/* DEFINE_XEN_GUEST_HANDLE(vnuma_memblk_t); */ > + > +struct vnuma_topology_info { > + domid_t domid; > + uint16_t nr_vnodes; > + uint32_t _pad; > + XEN_GUEST_HANDLE_64(uint) vdistance; > + XEN_GUEST_HANDLE_64(uint) vcpu_to_vnode; > + XEN_GUEST_HANDLE_64(vnuma_memblk_t) vnuma_memblks; > +}; > +typedef struct vnuma_topology_info vnuma_topology_info_t; > +DEFINE_XEN_GUEST_HANDLE(vnuma_topology_info_t); > + > +#define __vnode_distance_offset(_dom, _i, _j) \ > + ( ((_j) * ((_dom)->vnuma.nr_vnodes)) + (_i) ) > + > +#define __vnode_distance(_dom, _i, _j) \ > + ( (_dom)->vnuma.vdistance[__vnode_distance_offset((_dom), (_i), (_j))] )You expand _dom here twice, which is bad practice for macros. I suggest static inline functions as a better alternative. ~Andrew> + > +#define __vnode_distance_set(_dom, _i, _j, _v) \ > + do { __vnode_distance((_dom), (_i), (_j)) = (_v); } while(0) > + > +#endif
Jan Beulich
2013-Sep-13 11:00 UTC
Re: [PATCH RFC v2 1/7] xen/vNUMA: vNUMA support for PV guests.
>>> On 13.09.13 at 10:49, Elena Ufimtseva <ufimtseva@gmail.com> wrote: > --- a/xen/common/domain.c > +++ b/xen/common/domain.c > @@ -227,6 +227,7 @@ struct domain *domain_create( > spin_lock_init(&d->node_affinity_lock); > d->node_affinity = NODE_MASK_ALL; > d->auto_node_affinity = 1; > + d->vnuma.nr_vnodes = 0;Pointless (struct domain gets zero-allocated)?> @@ -530,8 +531,9 @@ int domain_kill(struct domain *d) > evtchn_destroy(d); > gnttab_release_mappings(d); > tmem_destroy(d->tmem); > - domain_set_outstanding_pages(d, 0); > d->tmem = NULL; > + domain_set_outstanding_pages(d, 0);Why is this being moved?> +void domain_vnuma_destroy(struct domain_vnuma_info *v) > +{ > + if (v->vnuma_memblks) { > + xfree(v->vnuma_memblks); > + v->vnuma_memblks = NULL; > + } > + if (v->vcpu_to_vnode) { > + xfree(v->vcpu_to_vnode); > + v->vcpu_to_vnode = NULL; > + } > + if (v->vdistance) { > + xfree(v->vdistance); > + v->vdistance = NULL; > + } > + if (v->vnode_to_pnode) { > + xfree(v->vnode_to_pnode); > + v->vnode_to_pnode = NULL; > + } > + v->nr_vnodes = 0;All the if()s are pointless - xfree() deals with NULL input quite fine. And for the record, they also all don''t match our coding style.> @@ -862,7 +863,72 @@ long do_domctl(XEN_GUEST_HANDLE_PARAM(xen_domctl_t) u_domctl) > ret = set_global_virq_handler(d, virq); > } > break; > -Please keep that newline, and insert another one after the final break of the code you add.> + case XEN_DOMCTL_setvnumainfo: > + { > + unsigned int i, j, nr_vnodes, dist_size; > + unsigned int dist, vmap, vntop; > + vnuma_memblk_t vmemblk; > + > + ret = -EINVAL; > + dist = i = j = 0; > + nr_vnodes = op->u.vnuma.nr_vnodes; > + if (nr_vnodes <= 0 || nr_vnodes > NR_CPUS) > + break; > + d->vnuma.nr_vnodes = nr_vnodes; > + dist_size = nr_vnodes * nr_vnodes; > + if ( > + (d->vnuma.vdistance = xmalloc_bytes( > + sizeof(*d->vnuma.vdistance) * dist_size)) == NULL || > + (d->vnuma.vnuma_memblks = xmalloc_bytes( > + sizeof(*d->vnuma.vnuma_memblks) * nr_vnodes)) == NULL || > + (d->vnuma.vcpu_to_vnode = xmalloc_bytes( > + sizeof(*d->vnuma.vcpu_to_vnode) * d->max_vcpus)) == NULL || > + (d->vnuma.vnode_to_pnode = xmalloc_bytes( > + sizeof(*d->vnuma.vnode_to_pnode) * nr_vnodes)) == NULL ) > + goto err_dom;Coding style. Also, you leak all memory that you managed to get hold of in the error case.> + for ( i = 0; i < nr_vnodes; i++ ) > + for ( j = 0; j < nr_vnodes; j++ ) > + { > + if ( unlikely(__copy_from_guest_offset(&dist,__copy_from_guest_offset() without prior guest_handle_okay()? And the error code should be -EFAULT in that case.> + op->u.vnuma.vdistance, > + __vnode_distance_offset(d, i, j), 1)) ) > + goto err_dom; > + __vnode_distance_set(d, i, j, dist); > + } > + for ( i = 0; i < nr_vnodes; i++ )If possible please fold this with the earlier loop.> + { > + if ( unlikely(__copy_from_guest_offset(&vmemblk, > + op->u.vnuma.vnuma_memblks, i, 1)) ) > + goto err_dom; > + d->vnuma.vnuma_memblks[i].start = vmemblk.start; > + d->vnuma.vnuma_memblks[i].end = vmemblk.end; > + } > + for ( i = 0; i < d->max_vcpus; i++ ) > + { > + if ( unlikely(__copy_from_guest_offset(&vmap, > + op->u.vnuma.vcpu_to_vnode, i, 1)) ) > + goto err_dom; > + d->vnuma.vcpu_to_vnode[i] = vmap;If the types are compatible, things like this don''t need doing in a loop - you could copy the whole array in one go.> + } > + if ( !guest_handle_is_null(op->u.vnuma.vnode_to_pnode) ) > + { > + for ( i = 0; i < nr_vnodes; i++ ) > + { > + if ( unlikely(__copy_from_guest_offset(&vntop, > + op->u.vnuma.vnode_to_pnode, i, 1)) ) > + goto err_dom; > + d->vnuma.vnode_to_pnode[i] = vntop; > + } > + } > + else > + for(i = 0; i < nr_vnodes; i++)Coding style.> + d->vnuma.vnode_to_pnode[i] = NUMA_NO_NODE;What''s the value of using this domctl this way?> + ret = 0; > + break; > +err_dom: > + ret = -EINVAL;"ret" should not be unconditionally set to a fixed value here, the more that you set it almost first thing in the case statement.> @@ -732,7 +733,62 @@ long do_memory_op(unsigned long cmd, XEN_GUEST_HANDLE_PARAM(void) arg) > rcu_unlock_domain(d); > > break; > + case XENMEM_get_vnuma_info: > + { > + int i, j; > + struct vnuma_topology_info mtopology; > + struct vnuma_topology_info touser_topo; > + struct domain *d; > + unsigned int max_pages, max_vcpus, nr_vnodes; > + vnuma_memblk_t *vblks; > > + rc = -EINVAL; > + if ( guest_handle_is_null(arg) ) > + return rc; > + if( copy_from_guest(&mtopology, arg, 1) ) > + return -EINVAL;Consistency please, preferably such that you - as long as possible - always return -E rather than rc. Also the return value here again is -EFAULT.> + if ( (d = rcu_lock_domain_by_any_id(mtopology.domid)) == NULL ) > + return -EINVAL;And -ESRCH or some such here (please use existing code as reference).> + touser_topo.nr_vnodes = d->vnuma.nr_vnodes; > + max_pages = d->max_pages; > + max_vcpus = d->max_vcpus; > + rcu_unlock_domain(d); > + > + nr_vnodes = touser_topo.nr_vnodes; > + rc = copy_to_guest(arg, &touser_topo, 1); > + if ( rc ) > + return -EFAULT; > + if ( nr_vnodes == 0 || nr_vnodes > max_vcpus ) > + return -EFAULT;-EINVAL> + vblks = xmalloc_array(struct vnuma_memblk, nr_vnodes); > + if ( vblks == NULL ) > + return -EFAULT;-ENOMEM> + for ( i = 0; i < nr_vnodes; i++ ) > + { > + if ( copy_to_guest_offset(mtopology.vnuma_memblks, i, > + &d->vnuma.vnuma_memblks[i], 1) < 0 )Indentation.> + goto out; > + } > + for ( i = 0; i < touser_topo.nr_vnodes; i++ )Again, please fold this with the previous loop, and use (preferably) the simple variable as loop boundary (but in any case be consistent).> + for ( j = 0; j < touser_topo.nr_vnodes; j++ ) > + { > + if ( copy_to_guest_offset(mtopology.vdistance, > + __vnode_distance_offset(d, i, j), > + &__vnode_distance(d, i, j), 1) ) > + goto out; > + } > + for ( i = 0; i < d->max_vcpus ; i++ ) > + { > + if ( copy_to_guest_offset(mtopology.vcpu_to_vnode, i, > + &d->vnuma.vcpu_to_vnode[i], 1) ) > + goto out; > + } > + return rc; > +out: > + if ( vblks ) xfree(vblks);Coding style.> --- /dev/null > +++ b/xen/include/xen/vnuma.h > @@ -0,0 +1,27 @@ > +#ifndef _VNUMA_H > +#define _VNUMA_H > +#include <public/memory.h> > + > +/* DEFINE_XEN_GUEST_HANDLE(vnuma_memblk_t); */ > + > +struct vnuma_topology_info { > + domid_t domid; > + uint16_t nr_vnodes; > + uint32_t _pad; > + XEN_GUEST_HANDLE_64(uint) vdistance; > + XEN_GUEST_HANDLE_64(uint) vcpu_to_vnode; > + XEN_GUEST_HANDLE_64(vnuma_memblk_t) vnuma_memblks; > +}; > +typedef struct vnuma_topology_info vnuma_topology_info_t; > +DEFINE_XEN_GUEST_HANDLE(vnuma_topology_info_t);At least up to here this appears to belong into the public header. Jan> + > +#define __vnode_distance_offset(_dom, _i, _j) \ > + ( ((_j) * ((_dom)->vnuma.nr_vnodes)) + (_i) ) > + > +#define __vnode_distance(_dom, _i, _j) \ > + ( (_dom)->vnuma.vdistance[__vnode_distance_offset((_dom), (_i), > (_j))] ) > + > +#define __vnode_distance_set(_dom, _i, _j, _v) \ > + do { __vnode_distance((_dom), (_i), (_j)) = (_v); } while(0) > + > +#endif > -- > 1.7.10.4
Jan Beulich
2013-Sep-13 11:53 UTC
Re: [PATCH RFC v2 1/7] xen/vNUMA: vNUMA support for PV guests.
>>> On 13.09.13 at 11:31, Andrew Cooper <andrew.cooper3@citrix.com> wrote: > On 13/09/2013 09:49, Elena Ufimtseva wrote: >> + if ( >> + (d->vnuma.vdistance = xmalloc_bytes( >> + sizeof(*d->vnuma.vdistance) * dist_size)) == NULL || > > sizeof in an operator not a function. As you are not passing a type, > the brackets are not required.Actually, I''d request parentheses to be added if I saw a use of sizeof() without them. I think we have quite few (hopefully a majority) of uses similar to the one above in the tree. Jan
George Dunlap
2013-Sep-16 15:46 UTC
Re: [PATCH RFC v2 1/7] xen/vNUMA: vNUMA support for PV guests.
On Fri, Sep 13, 2013 at 9:49 AM, Elena Ufimtseva <ufimtseva@gmail.com> wrote:> +void domain_vnuma_destroy(struct domain_vnuma_info *v)As long as you''re respinning, I think I''d prefer this be named something different -- ''v'' is almost universally of type "struct vcpu" in the Xen code, so this code is a little confusing. I might use "vinfo", or even just take a domain pointer and dereference it yourself.> +{ > + if (v->vnuma_memblks) { > + xfree(v->vnuma_memblks); > + v->vnuma_memblks = NULL; > + } > + if (v->vcpu_to_vnode) { > + xfree(v->vcpu_to_vnode); > + v->vcpu_to_vnode = NULL; > + } > + if (v->vdistance) { > + xfree(v->vdistance); > + v->vdistance = NULL; > + } > + if (v->vnode_to_pnode) { > + xfree(v->vnode_to_pnode); > + v->vnode_to_pnode = NULL; > + } > + v->nr_vnodes = 0; > +} > + > /* > * Local variables: > * mode: C > diff --git a/xen/common/domctl.c b/xen/common/domctl.c > index 9760d50..e5d05c7 100644 > --- a/xen/common/domctl.c > +++ b/xen/common/domctl.c > @@ -29,6 +29,7 @@ > #include <asm/page.h> > #include <public/domctl.h> > #include <xsm/xsm.h> > +#include <xen/vnuma.h> > > static DEFINE_SPINLOCK(domctl_lock); > DEFINE_SPINLOCK(vcpu_alloc_lock); > @@ -862,7 +863,72 @@ long do_domctl(XEN_GUEST_HANDLE_PARAM(xen_domctl_t) u_domctl) > ret = set_global_virq_handler(d, virq); > } > break; > - > + case XEN_DOMCTL_setvnumainfo: > + { > + unsigned int i, j, nr_vnodes, dist_size; > + unsigned int dist, vmap, vntop; > + vnuma_memblk_t vmemblk; > + > + ret = -EINVAL; > + dist = i = j = 0;I think the zeroing of these is unnecessary -- you zero them / initialize them before you use them below. You need to break this code into "paragraphs" of related changes, with a space in between. For example, maybe here...> + nr_vnodes = op->u.vnuma.nr_vnodes; > + if (nr_vnodes <= 0 || nr_vnodes > NR_CPUS) > + break;here...> + d->vnuma.nr_vnodes = nr_vnodes; > + dist_size = nr_vnodes * nr_vnodes;here...> + if ( > + (d->vnuma.vdistance = xmalloc_bytes( > + sizeof(*d->vnuma.vdistance) * dist_size)) == NULL || > + (d->vnuma.vnuma_memblks = xmalloc_bytes( > + sizeof(*d->vnuma.vnuma_memblks) * nr_vnodes)) == NULL || > + (d->vnuma.vcpu_to_vnode = xmalloc_bytes( > + sizeof(*d->vnuma.vcpu_to_vnode) * d->max_vcpus)) == NULL || > + (d->vnuma.vnode_to_pnode = xmalloc_bytes( > + sizeof(*d->vnuma.vnode_to_pnode) * nr_vnodes)) == NULL ) > + goto err_dom;here... Also, for an idiomatic way to do this kind of multiple allocation w/ error checking, take a look at xen/arch/x86/hvm/hvm.c:hvm_domain_initialise() (to pick a random function I''ve worked with recently). Look at the> + for ( i = 0; i < nr_vnodes; i++ ) > + for ( j = 0; j < nr_vnodes; j++ ) > + { > + if ( unlikely(__copy_from_guest_offset(&dist, > + op->u.vnuma.vdistance, > + __vnode_distance_offset(d, i, j), 1)) ) > + goto err_dom; > + __vnode_distance_set(d, i, j, dist); > + }here... Also, in each of these error paths, you leak the structures allocated above.> + for ( i = 0; i < nr_vnodes; i++ ) > + { > + if ( unlikely(__copy_from_guest_offset(&vmemblk, > + op->u.vnuma.vnuma_memblks, i, 1)) ) > + goto err_dom; > + d->vnuma.vnuma_memblks[i].start = vmemblk.start; > + d->vnuma.vnuma_memblks[i].end = vmemblk.end; > + }here...> + for ( i = 0; i < d->max_vcpus; i++ ) > + { > + if ( unlikely(__copy_from_guest_offset(&vmap, > + op->u.vnuma.vcpu_to_vnode, i, 1)) ) > + goto err_dom; > + d->vnuma.vcpu_to_vnode[i] = vmap; > + }here...> + if ( !guest_handle_is_null(op->u.vnuma.vnode_to_pnode) ) > + { > + for ( i = 0; i < nr_vnodes; i++ ) > + { > + if ( unlikely(__copy_from_guest_offset(&vntop, > + op->u.vnuma.vnode_to_pnode, i, 1)) ) > + goto err_dom; > + d->vnuma.vnode_to_pnode[i] = vntop; > + } > + } > + else > + for(i = 0; i < nr_vnodes; i++) > + d->vnuma.vnode_to_pnode[i] = NUMA_NO_NODE;here...> + ret = 0;and maybe here.> + break; > +err_dom: > + ret = -EINVAL; > + } > + break; > default: > ret = arch_do_domctl(op, d, u_domctl); > break; > diff --git a/xen/common/memory.c b/xen/common/memory.c > index 50b740f..6dc2452 100644 > --- a/xen/common/memory.c > +++ b/xen/common/memory.c > @@ -28,6 +28,7 @@ > #include <public/memory.h> > #include <xsm/xsm.h> > #include <xen/trace.h> > +#include <xen/vnuma.h> > > struct memop_args { > /* INPUT */ > @@ -732,7 +733,62 @@ long do_memory_op(unsigned long cmd, XEN_GUEST_HANDLE_PARAM(void) arg) > rcu_unlock_domain(d); > > break; > + case XENMEM_get_vnuma_info: > + { > + int i, j; > + struct vnuma_topology_info mtopology; > + struct vnuma_topology_info touser_topo; > + struct domain *d; > + unsigned int max_pages, max_vcpus, nr_vnodes; > + vnuma_memblk_t *vblks; > > + rc = -EINVAL;here...> + if ( guest_handle_is_null(arg) ) > + return rc;here...> + if( copy_from_guest(&mtopology, arg, 1) ) > + return -EINVAL;here... (and so on) This should be -EFAULT> + if ( (d = rcu_lock_domain_by_any_id(mtopology.domid)) == NULL ) > + return -EINVAL; > + touser_topo.nr_vnodes = d->vnuma.nr_vnodes; > + max_pages = d->max_pages; > + max_vcpus = d->max_vcpus; > + rcu_unlock_domain(d); > + > + nr_vnodes = touser_topo.nr_vnodes; > + rc = copy_to_guest(arg, &touser_topo, 1); > + if ( rc ) > + return -EFAULT; > + if ( nr_vnodes == 0 || nr_vnodes > max_vcpus ) > + return -EFAULT; > + vblks = xmalloc_array(struct vnuma_memblk, nr_vnodes); > + if ( vblks == NULL ) > + return -EFAULT;Uum, what is this for? It doesn''t appear to be used -- if there''s a copy failure it''s freed again, if things succeed it''s leaked.> + for ( i = 0; i < nr_vnodes; i++ ) > + { > + if ( copy_to_guest_offset(mtopology.vnuma_memblks, i, > + &d->vnuma.vnuma_memblks[i], 1) < 0 ) > + goto out;4 spaces for indentation, please.> + } > + for ( i = 0; i < touser_topo.nr_vnodes; i++ ) > + for ( j = 0; j < touser_topo.nr_vnodes; j++ ) > + { > + if ( copy_to_guest_offset(mtopology.vdistance, > + __vnode_distance_offset(d, i, j), > + &__vnode_distance(d, i, j), 1) ) > + goto out; > + } > + for ( i = 0; i < d->max_vcpus ; i++ ) > + { > + if ( copy_to_guest_offset(mtopology.vcpu_to_vnode, i, > + &d->vnuma.vcpu_to_vnode[i], 1) ) > + goto out; > + } > + return rc; > +out: > + if ( vblks ) xfree(vblks); > + return rc; > + break; > + } > default: > rc = arch_memory_op(op, arg); > break; > diff --git a/xen/include/public/domctl.h b/xen/include/public/domctl.h > index 4c5b2bb..3574d0a 100644 > --- a/xen/include/public/domctl.h > +++ b/xen/include/public/domctl.h > @@ -35,6 +35,7 @@ > #include "xen.h" > #include "grant_table.h" > #include "hvm/save.h" > +#include "memory.h" > > #define XEN_DOMCTL_INTERFACE_VERSION 0x00000009 > > @@ -852,6 +853,17 @@ struct xen_domctl_set_broken_page_p2m { > typedef struct xen_domctl_set_broken_page_p2m xen_domctl_set_broken_page_p2m_t; > DEFINE_XEN_GUEST_HANDLE(xen_domctl_set_broken_page_p2m_t); > > +struct xen_domctl_vnuma { > + uint16_t nr_vnodes; > + XEN_GUEST_HANDLE_64(uint) vdistance; > + XEN_GUEST_HANDLE_64(uint) vcpu_to_vnode; > + XEN_GUEST_HANDLE_64(uint) vnode_to_pnode; > + XEN_GUEST_HANDLE_64(vnuma_memblk_t) vnuma_memblks; > +}; > + > +typedef struct xen_domctl_vnuma xen_domctl_vnuma_t; > +DEFINE_XEN_GUEST_HANDLE(xen_domctl_vnuma_t); > + > struct xen_domctl { > uint32_t cmd; > #define XEN_DOMCTL_createdomain 1 > @@ -920,6 +932,7 @@ struct xen_domctl { > #define XEN_DOMCTL_set_broken_page_p2m 67 > #define XEN_DOMCTL_setnodeaffinity 68 > #define XEN_DOMCTL_getnodeaffinity 69 > +#define XEN_DOMCTL_setvnumainfo 70 > #define XEN_DOMCTL_gdbsx_guestmemio 1000 > #define XEN_DOMCTL_gdbsx_pausevcpu 1001 > #define XEN_DOMCTL_gdbsx_unpausevcpu 1002 > @@ -979,6 +992,7 @@ struct xen_domctl { > struct xen_domctl_set_broken_page_p2m set_broken_page_p2m; > struct xen_domctl_gdbsx_pauseunp_vcpu gdbsx_pauseunp_vcpu; > struct xen_domctl_gdbsx_domstatus gdbsx_domstatus; > + struct xen_domctl_vnuma vnuma; > uint8_t pad[128]; > } u; > }; > @@ -986,7 +1000,6 @@ typedef struct xen_domctl xen_domctl_t; > DEFINE_XEN_GUEST_HANDLE(xen_domctl_t); > > #endif /* __XEN_PUBLIC_DOMCTL_H__ */ > - > /* > * Local variables: > * mode: C > diff --git a/xen/include/public/memory.h b/xen/include/public/memory.h > index 7a26dee..28f6aaf 100644 > --- a/xen/include/public/memory.h > +++ b/xen/include/public/memory.h > @@ -453,12 +453,19 @@ DEFINE_XEN_GUEST_HANDLE(xen_mem_sharing_op_t); > * Caller must be privileged or the hypercall fails. > */ > #define XENMEM_claim_pages 24 > - > /* > * XENMEM_claim_pages flags - the are no flags at this time. > * The zero value is appropiate. > */ > > +struct vnuma_memblk { > + uint64_t start, end; > +}; > +typedef struct vnuma_memblk vnuma_memblk_t; > +DEFINE_XEN_GUEST_HANDLE(vnuma_memblk_t); > + > +#define XENMEM_get_vnuma_info 25 > + > #endif /* defined(__XEN__) || defined(__XEN_TOOLS__) */ > > #endif /* __XEN_PUBLIC_MEMORY_H__ */ > diff --git a/xen/include/xen/domain.h b/xen/include/xen/domain.h > index a057069..c9d53e3 100644 > --- a/xen/include/xen/domain.h > +++ b/xen/include/xen/domain.h > @@ -4,6 +4,7 @@ > > #include <public/xen.h> > #include <asm/domain.h> > +#include <public/memory.h> > > typedef union { > struct vcpu_guest_context *nat; > @@ -89,4 +90,14 @@ extern unsigned int xen_processor_pmbits; > > extern bool_t opt_dom0_vcpus_pin; > > +struct domain_vnuma_info { > + uint16_t nr_vnodes; > + uint *vdistance; > + uint *vcpu_to_vnode; > + uint *vnode_to_pnode; > + vnuma_memblk_t *vnuma_memblks; > +};Are these the right sizes for things to allocate? Do we really need 32 bits to represent distance, vnode and pnode? Can we use u16 or u8? -George
Elena Ufimtseva
2013-Sep-17 06:44 UTC
Re: [PATCH RFC v2 1/7] xen/vNUMA: vNUMA support for PV guests.
Hi guys Thank you for reviewing, I will clean these out. George, after talking to Dario, I think the max number of physical nodes will not exceed 256. Dario''s automatic NUMA placement work with this number and I think it can be easily u8. Unless anyone has other thoughts. Elena On Mon, Sep 16, 2013 at 11:46 AM, George Dunlap <George.Dunlap@eu.citrix.com> wrote:> On Fri, Sep 13, 2013 at 9:49 AM, Elena Ufimtseva <ufimtseva@gmail.com> wrote: >> +void domain_vnuma_destroy(struct domain_vnuma_info *v) > > As long as you''re respinning, I think I''d prefer this be named > something different -- ''v'' is almost universally of type "struct vcpu" > in the Xen code, so this code is a little confusing. I might use > "vinfo", or even just take a domain pointer and dereference it > yourself. > >> +{ >> + if (v->vnuma_memblks) { >> + xfree(v->vnuma_memblks); >> + v->vnuma_memblks = NULL; >> + } >> + if (v->vcpu_to_vnode) { >> + xfree(v->vcpu_to_vnode); >> + v->vcpu_to_vnode = NULL; >> + } >> + if (v->vdistance) { >> + xfree(v->vdistance); >> + v->vdistance = NULL; >> + } >> + if (v->vnode_to_pnode) { >> + xfree(v->vnode_to_pnode); >> + v->vnode_to_pnode = NULL; >> + } >> + v->nr_vnodes = 0; >> +} >> + >> /* >> * Local variables: >> * mode: C >> diff --git a/xen/common/domctl.c b/xen/common/domctl.c >> index 9760d50..e5d05c7 100644 >> --- a/xen/common/domctl.c >> +++ b/xen/common/domctl.c >> @@ -29,6 +29,7 @@ >> #include <asm/page.h> >> #include <public/domctl.h> >> #include <xsm/xsm.h> >> +#include <xen/vnuma.h> >> >> static DEFINE_SPINLOCK(domctl_lock); >> DEFINE_SPINLOCK(vcpu_alloc_lock); >> @@ -862,7 +863,72 @@ long do_domctl(XEN_GUEST_HANDLE_PARAM(xen_domctl_t) u_domctl) >> ret = set_global_virq_handler(d, virq); >> } >> break; >> - >> + case XEN_DOMCTL_setvnumainfo: >> + { >> + unsigned int i, j, nr_vnodes, dist_size; >> + unsigned int dist, vmap, vntop; >> + vnuma_memblk_t vmemblk; >> + >> + ret = -EINVAL; >> + dist = i = j = 0; > > I think the zeroing of these is unnecessary -- you zero them / > initialize them before you use them below. > > You need to break this code into "paragraphs" of related changes, with > a space in between. For example, maybe here... > >> + nr_vnodes = op->u.vnuma.nr_vnodes; >> + if (nr_vnodes <= 0 || nr_vnodes > NR_CPUS) >> + break; > > here... > >> + d->vnuma.nr_vnodes = nr_vnodes; >> + dist_size = nr_vnodes * nr_vnodes; > > here... > >> + if ( >> + (d->vnuma.vdistance = xmalloc_bytes( >> + sizeof(*d->vnuma.vdistance) * dist_size)) == NULL || >> + (d->vnuma.vnuma_memblks = xmalloc_bytes( >> + sizeof(*d->vnuma.vnuma_memblks) * nr_vnodes)) == NULL || >> + (d->vnuma.vcpu_to_vnode = xmalloc_bytes( >> + sizeof(*d->vnuma.vcpu_to_vnode) * d->max_vcpus)) == NULL || >> + (d->vnuma.vnode_to_pnode = xmalloc_bytes( >> + sizeof(*d->vnuma.vnode_to_pnode) * nr_vnodes)) == NULL ) >> + goto err_dom; > > here... > > Also, for an idiomatic way to do this kind of multiple allocation w/ > error checking, take a look at > xen/arch/x86/hvm/hvm.c:hvm_domain_initialise() (to pick a random > function I''ve worked with recently). Look at the > >> + for ( i = 0; i < nr_vnodes; i++ ) >> + for ( j = 0; j < nr_vnodes; j++ ) >> + { >> + if ( unlikely(__copy_from_guest_offset(&dist, >> + op->u.vnuma.vdistance, >> + __vnode_distance_offset(d, i, j), 1)) ) >> + goto err_dom; >> + __vnode_distance_set(d, i, j, dist); >> + } > > here... > > Also, in each of these error paths, you leak the structures allocated above. > >> + for ( i = 0; i < nr_vnodes; i++ ) >> + { >> + if ( unlikely(__copy_from_guest_offset(&vmemblk, >> + op->u.vnuma.vnuma_memblks, i, 1)) ) >> + goto err_dom; >> + d->vnuma.vnuma_memblks[i].start = vmemblk.start; >> + d->vnuma.vnuma_memblks[i].end = vmemblk.end; >> + } > > here... > >> + for ( i = 0; i < d->max_vcpus; i++ ) >> + { >> + if ( unlikely(__copy_from_guest_offset(&vmap, >> + op->u.vnuma.vcpu_to_vnode, i, 1)) ) >> + goto err_dom; >> + d->vnuma.vcpu_to_vnode[i] = vmap; >> + } > > here... > >> + if ( !guest_handle_is_null(op->u.vnuma.vnode_to_pnode) ) >> + { >> + for ( i = 0; i < nr_vnodes; i++ ) >> + { >> + if ( unlikely(__copy_from_guest_offset(&vntop, >> + op->u.vnuma.vnode_to_pnode, i, 1)) ) >> + goto err_dom; >> + d->vnuma.vnode_to_pnode[i] = vntop; >> + } >> + } >> + else >> + for(i = 0; i < nr_vnodes; i++) >> + d->vnuma.vnode_to_pnode[i] = NUMA_NO_NODE; > > here... > >> + ret = 0; > > and maybe here. > >> + break; >> +err_dom: >> + ret = -EINVAL; >> + } >> + break; >> default: >> ret = arch_do_domctl(op, d, u_domctl); >> break; >> diff --git a/xen/common/memory.c b/xen/common/memory.c >> index 50b740f..6dc2452 100644 >> --- a/xen/common/memory.c >> +++ b/xen/common/memory.c >> @@ -28,6 +28,7 @@ >> #include <public/memory.h> >> #include <xsm/xsm.h> >> #include <xen/trace.h> >> +#include <xen/vnuma.h> >> >> struct memop_args { >> /* INPUT */ >> @@ -732,7 +733,62 @@ long do_memory_op(unsigned long cmd, XEN_GUEST_HANDLE_PARAM(void) arg) >> rcu_unlock_domain(d); >> >> break; >> + case XENMEM_get_vnuma_info: >> + { >> + int i, j; >> + struct vnuma_topology_info mtopology; >> + struct vnuma_topology_info touser_topo; >> + struct domain *d; >> + unsigned int max_pages, max_vcpus, nr_vnodes; >> + vnuma_memblk_t *vblks; >> >> + rc = -EINVAL; > > here... > >> + if ( guest_handle_is_null(arg) ) >> + return rc; > > here... > >> + if( copy_from_guest(&mtopology, arg, 1) ) >> + return -EINVAL; > > here... (and so on) > > This should be -EFAULT > >> + if ( (d = rcu_lock_domain_by_any_id(mtopology.domid)) == NULL ) >> + return -EINVAL; >> + touser_topo.nr_vnodes = d->vnuma.nr_vnodes; >> + max_pages = d->max_pages; >> + max_vcpus = d->max_vcpus; >> + rcu_unlock_domain(d); >> + >> + nr_vnodes = touser_topo.nr_vnodes; >> + rc = copy_to_guest(arg, &touser_topo, 1); >> + if ( rc ) >> + return -EFAULT; >> + if ( nr_vnodes == 0 || nr_vnodes > max_vcpus ) >> + return -EFAULT; >> + vblks = xmalloc_array(struct vnuma_memblk, nr_vnodes); >> + if ( vblks == NULL ) >> + return -EFAULT; > > Uum, what is this for? It doesn''t appear to be used -- if there''s a > copy failure it''s freed again, if things succeed it''s leaked. > >> + for ( i = 0; i < nr_vnodes; i++ ) >> + { >> + if ( copy_to_guest_offset(mtopology.vnuma_memblks, i, >> + &d->vnuma.vnuma_memblks[i], 1) < 0 ) >> + goto out; > > 4 spaces for indentation, please. > >> + } >> + for ( i = 0; i < touser_topo.nr_vnodes; i++ ) >> + for ( j = 0; j < touser_topo.nr_vnodes; j++ ) >> + { >> + if ( copy_to_guest_offset(mtopology.vdistance, >> + __vnode_distance_offset(d, i, j), >> + &__vnode_distance(d, i, j), 1) ) >> + goto out; >> + } >> + for ( i = 0; i < d->max_vcpus ; i++ ) >> + { >> + if ( copy_to_guest_offset(mtopology.vcpu_to_vnode, i, >> + &d->vnuma.vcpu_to_vnode[i], 1) ) >> + goto out; >> + } >> + return rc; >> +out: >> + if ( vblks ) xfree(vblks); >> + return rc; >> + break; >> + } >> default: >> rc = arch_memory_op(op, arg); >> break; >> diff --git a/xen/include/public/domctl.h b/xen/include/public/domctl.h >> index 4c5b2bb..3574d0a 100644 >> --- a/xen/include/public/domctl.h >> +++ b/xen/include/public/domctl.h >> @@ -35,6 +35,7 @@ >> #include "xen.h" >> #include "grant_table.h" >> #include "hvm/save.h" >> +#include "memory.h" >> >> #define XEN_DOMCTL_INTERFACE_VERSION 0x00000009 >> >> @@ -852,6 +853,17 @@ struct xen_domctl_set_broken_page_p2m { >> typedef struct xen_domctl_set_broken_page_p2m xen_domctl_set_broken_page_p2m_t; >> DEFINE_XEN_GUEST_HANDLE(xen_domctl_set_broken_page_p2m_t); >> >> +struct xen_domctl_vnuma { >> + uint16_t nr_vnodes; >> + XEN_GUEST_HANDLE_64(uint) vdistance; >> + XEN_GUEST_HANDLE_64(uint) vcpu_to_vnode; >> + XEN_GUEST_HANDLE_64(uint) vnode_to_pnode; >> + XEN_GUEST_HANDLE_64(vnuma_memblk_t) vnuma_memblks; >> +}; >> + >> +typedef struct xen_domctl_vnuma xen_domctl_vnuma_t; >> +DEFINE_XEN_GUEST_HANDLE(xen_domctl_vnuma_t); >> + >> struct xen_domctl { >> uint32_t cmd; >> #define XEN_DOMCTL_createdomain 1 >> @@ -920,6 +932,7 @@ struct xen_domctl { >> #define XEN_DOMCTL_set_broken_page_p2m 67 >> #define XEN_DOMCTL_setnodeaffinity 68 >> #define XEN_DOMCTL_getnodeaffinity 69 >> +#define XEN_DOMCTL_setvnumainfo 70 >> #define XEN_DOMCTL_gdbsx_guestmemio 1000 >> #define XEN_DOMCTL_gdbsx_pausevcpu 1001 >> #define XEN_DOMCTL_gdbsx_unpausevcpu 1002 >> @@ -979,6 +992,7 @@ struct xen_domctl { >> struct xen_domctl_set_broken_page_p2m set_broken_page_p2m; >> struct xen_domctl_gdbsx_pauseunp_vcpu gdbsx_pauseunp_vcpu; >> struct xen_domctl_gdbsx_domstatus gdbsx_domstatus; >> + struct xen_domctl_vnuma vnuma; >> uint8_t pad[128]; >> } u; >> }; >> @@ -986,7 +1000,6 @@ typedef struct xen_domctl xen_domctl_t; >> DEFINE_XEN_GUEST_HANDLE(xen_domctl_t); >> >> #endif /* __XEN_PUBLIC_DOMCTL_H__ */ >> - >> /* >> * Local variables: >> * mode: C >> diff --git a/xen/include/public/memory.h b/xen/include/public/memory.h >> index 7a26dee..28f6aaf 100644 >> --- a/xen/include/public/memory.h >> +++ b/xen/include/public/memory.h >> @@ -453,12 +453,19 @@ DEFINE_XEN_GUEST_HANDLE(xen_mem_sharing_op_t); >> * Caller must be privileged or the hypercall fails. >> */ >> #define XENMEM_claim_pages 24 >> - >> /* >> * XENMEM_claim_pages flags - the are no flags at this time. >> * The zero value is appropiate. >> */ >> >> +struct vnuma_memblk { >> + uint64_t start, end; >> +}; >> +typedef struct vnuma_memblk vnuma_memblk_t; >> +DEFINE_XEN_GUEST_HANDLE(vnuma_memblk_t); >> + >> +#define XENMEM_get_vnuma_info 25 >> + >> #endif /* defined(__XEN__) || defined(__XEN_TOOLS__) */ >> >> #endif /* __XEN_PUBLIC_MEMORY_H__ */ >> diff --git a/xen/include/xen/domain.h b/xen/include/xen/domain.h >> index a057069..c9d53e3 100644 >> --- a/xen/include/xen/domain.h >> +++ b/xen/include/xen/domain.h >> @@ -4,6 +4,7 @@ >> >> #include <public/xen.h> >> #include <asm/domain.h> >> +#include <public/memory.h> >> >> typedef union { >> struct vcpu_guest_context *nat; >> @@ -89,4 +90,14 @@ extern unsigned int xen_processor_pmbits; >> >> extern bool_t opt_dom0_vcpus_pin; >> >> +struct domain_vnuma_info { >> + uint16_t nr_vnodes; >> + uint *vdistance; >> + uint *vcpu_to_vnode; >> + uint *vnode_to_pnode; >> + vnuma_memblk_t *vnuma_memblks; >> +}; > > Are these the right sizes for things to allocate? Do we really need > 32 bits to represent distance, vnode and pnode? Can we use u16 or u8? > > -George-- Elena
Elena Ufimtseva
2013-Sep-17 06:59 UTC
Re: [PATCH RFC v2 1/7] xen/vNUMA: vNUMA support for PV guests.
Looks like I have.. Linux type for number of nodes is int, so Id rather keep it this way. On Tue, Sep 17, 2013 at 2:44 AM, Elena Ufimtseva <ufimtseva@gmail.com> wrote:> Hi guys > Thank you for reviewing, I will clean these out. > > George, after talking to Dario, I think the max number of physical > nodes will not exceed 256. Dario''s automatic NUMA > placement work with this number and I think it can be easily u8. > Unless anyone has other thoughts. > > Elena > > On Mon, Sep 16, 2013 at 11:46 AM, George Dunlap > <George.Dunlap@eu.citrix.com> wrote: >> On Fri, Sep 13, 2013 at 9:49 AM, Elena Ufimtseva <ufimtseva@gmail.com> wrote: >>> +void domain_vnuma_destroy(struct domain_vnuma_info *v) >> >> As long as you''re respinning, I think I''d prefer this be named >> something different -- ''v'' is almost universally of type "struct vcpu" >> in the Xen code, so this code is a little confusing. I might use >> "vinfo", or even just take a domain pointer and dereference it >> yourself. >> >>> +{ >>> + if (v->vnuma_memblks) { >>> + xfree(v->vnuma_memblks); >>> + v->vnuma_memblks = NULL; >>> + } >>> + if (v->vcpu_to_vnode) { >>> + xfree(v->vcpu_to_vnode); >>> + v->vcpu_to_vnode = NULL; >>> + } >>> + if (v->vdistance) { >>> + xfree(v->vdistance); >>> + v->vdistance = NULL; >>> + } >>> + if (v->vnode_to_pnode) { >>> + xfree(v->vnode_to_pnode); >>> + v->vnode_to_pnode = NULL; >>> + } >>> + v->nr_vnodes = 0; >>> +} >>> + >>> /* >>> * Local variables: >>> * mode: C >>> diff --git a/xen/common/domctl.c b/xen/common/domctl.c >>> index 9760d50..e5d05c7 100644 >>> --- a/xen/common/domctl.c >>> +++ b/xen/common/domctl.c >>> @@ -29,6 +29,7 @@ >>> #include <asm/page.h> >>> #include <public/domctl.h> >>> #include <xsm/xsm.h> >>> +#include <xen/vnuma.h> >>> >>> static DEFINE_SPINLOCK(domctl_lock); >>> DEFINE_SPINLOCK(vcpu_alloc_lock); >>> @@ -862,7 +863,72 @@ long do_domctl(XEN_GUEST_HANDLE_PARAM(xen_domctl_t) u_domctl) >>> ret = set_global_virq_handler(d, virq); >>> } >>> break; >>> - >>> + case XEN_DOMCTL_setvnumainfo: >>> + { >>> + unsigned int i, j, nr_vnodes, dist_size; >>> + unsigned int dist, vmap, vntop; >>> + vnuma_memblk_t vmemblk; >>> + >>> + ret = -EINVAL; >>> + dist = i = j = 0; >> >> I think the zeroing of these is unnecessary -- you zero them / >> initialize them before you use them below. >> >> You need to break this code into "paragraphs" of related changes, with >> a space in between. For example, maybe here... >> >>> + nr_vnodes = op->u.vnuma.nr_vnodes; >>> + if (nr_vnodes <= 0 || nr_vnodes > NR_CPUS) >>> + break; >> >> here... >> >>> + d->vnuma.nr_vnodes = nr_vnodes; >>> + dist_size = nr_vnodes * nr_vnodes; >> >> here... >> >>> + if ( >>> + (d->vnuma.vdistance = xmalloc_bytes( >>> + sizeof(*d->vnuma.vdistance) * dist_size)) == NULL || >>> + (d->vnuma.vnuma_memblks = xmalloc_bytes( >>> + sizeof(*d->vnuma.vnuma_memblks) * nr_vnodes)) == NULL || >>> + (d->vnuma.vcpu_to_vnode = xmalloc_bytes( >>> + sizeof(*d->vnuma.vcpu_to_vnode) * d->max_vcpus)) == NULL || >>> + (d->vnuma.vnode_to_pnode = xmalloc_bytes( >>> + sizeof(*d->vnuma.vnode_to_pnode) * nr_vnodes)) == NULL ) >>> + goto err_dom; >> >> here... >> >> Also, for an idiomatic way to do this kind of multiple allocation w/ >> error checking, take a look at >> xen/arch/x86/hvm/hvm.c:hvm_domain_initialise() (to pick a random >> function I''ve worked with recently). Look at the >> >>> + for ( i = 0; i < nr_vnodes; i++ ) >>> + for ( j = 0; j < nr_vnodes; j++ ) >>> + { >>> + if ( unlikely(__copy_from_guest_offset(&dist, >>> + op->u.vnuma.vdistance, >>> + __vnode_distance_offset(d, i, j), 1)) ) >>> + goto err_dom; >>> + __vnode_distance_set(d, i, j, dist); >>> + } >> >> here... >> >> Also, in each of these error paths, you leak the structures allocated above. >> >>> + for ( i = 0; i < nr_vnodes; i++ ) >>> + { >>> + if ( unlikely(__copy_from_guest_offset(&vmemblk, >>> + op->u.vnuma.vnuma_memblks, i, 1)) ) >>> + goto err_dom; >>> + d->vnuma.vnuma_memblks[i].start = vmemblk.start; >>> + d->vnuma.vnuma_memblks[i].end = vmemblk.end; >>> + } >> >> here... >> >>> + for ( i = 0; i < d->max_vcpus; i++ ) >>> + { >>> + if ( unlikely(__copy_from_guest_offset(&vmap, >>> + op->u.vnuma.vcpu_to_vnode, i, 1)) ) >>> + goto err_dom; >>> + d->vnuma.vcpu_to_vnode[i] = vmap; >>> + } >> >> here... >> >>> + if ( !guest_handle_is_null(op->u.vnuma.vnode_to_pnode) ) >>> + { >>> + for ( i = 0; i < nr_vnodes; i++ ) >>> + { >>> + if ( unlikely(__copy_from_guest_offset(&vntop, >>> + op->u.vnuma.vnode_to_pnode, i, 1)) ) >>> + goto err_dom; >>> + d->vnuma.vnode_to_pnode[i] = vntop; >>> + } >>> + } >>> + else >>> + for(i = 0; i < nr_vnodes; i++) >>> + d->vnuma.vnode_to_pnode[i] = NUMA_NO_NODE; >> >> here... >> >>> + ret = 0; >> >> and maybe here. >> >>> + break; >>> +err_dom: >>> + ret = -EINVAL; >>> + } >>> + break; >>> default: >>> ret = arch_do_domctl(op, d, u_domctl); >>> break; >>> diff --git a/xen/common/memory.c b/xen/common/memory.c >>> index 50b740f..6dc2452 100644 >>> --- a/xen/common/memory.c >>> +++ b/xen/common/memory.c >>> @@ -28,6 +28,7 @@ >>> #include <public/memory.h> >>> #include <xsm/xsm.h> >>> #include <xen/trace.h> >>> +#include <xen/vnuma.h> >>> >>> struct memop_args { >>> /* INPUT */ >>> @@ -732,7 +733,62 @@ long do_memory_op(unsigned long cmd, XEN_GUEST_HANDLE_PARAM(void) arg) >>> rcu_unlock_domain(d); >>> >>> break; >>> + case XENMEM_get_vnuma_info: >>> + { >>> + int i, j; >>> + struct vnuma_topology_info mtopology; >>> + struct vnuma_topology_info touser_topo; >>> + struct domain *d; >>> + unsigned int max_pages, max_vcpus, nr_vnodes; >>> + vnuma_memblk_t *vblks; >>> >>> + rc = -EINVAL; >> >> here... >> >>> + if ( guest_handle_is_null(arg) ) >>> + return rc; >> >> here... >> >>> + if( copy_from_guest(&mtopology, arg, 1) ) >>> + return -EINVAL; >> >> here... (and so on) >> >> This should be -EFAULT >> >>> + if ( (d = rcu_lock_domain_by_any_id(mtopology.domid)) == NULL ) >>> + return -EINVAL; >>> + touser_topo.nr_vnodes = d->vnuma.nr_vnodes; >>> + max_pages = d->max_pages; >>> + max_vcpus = d->max_vcpus; >>> + rcu_unlock_domain(d); >>> + >>> + nr_vnodes = touser_topo.nr_vnodes; >>> + rc = copy_to_guest(arg, &touser_topo, 1); >>> + if ( rc ) >>> + return -EFAULT; >>> + if ( nr_vnodes == 0 || nr_vnodes > max_vcpus ) >>> + return -EFAULT; >>> + vblks = xmalloc_array(struct vnuma_memblk, nr_vnodes); >>> + if ( vblks == NULL ) >>> + return -EFAULT; >> >> Uum, what is this for? It doesn''t appear to be used -- if there''s a >> copy failure it''s freed again, if things succeed it''s leaked. >> >>> + for ( i = 0; i < nr_vnodes; i++ ) >>> + { >>> + if ( copy_to_guest_offset(mtopology.vnuma_memblks, i, >>> + &d->vnuma.vnuma_memblks[i], 1) < 0 ) >>> + goto out; >> >> 4 spaces for indentation, please. >> >>> + } >>> + for ( i = 0; i < touser_topo.nr_vnodes; i++ ) >>> + for ( j = 0; j < touser_topo.nr_vnodes; j++ ) >>> + { >>> + if ( copy_to_guest_offset(mtopology.vdistance, >>> + __vnode_distance_offset(d, i, j), >>> + &__vnode_distance(d, i, j), 1) ) >>> + goto out; >>> + } >>> + for ( i = 0; i < d->max_vcpus ; i++ ) >>> + { >>> + if ( copy_to_guest_offset(mtopology.vcpu_to_vnode, i, >>> + &d->vnuma.vcpu_to_vnode[i], 1) ) >>> + goto out; >>> + } >>> + return rc; >>> +out: >>> + if ( vblks ) xfree(vblks); >>> + return rc; >>> + break; >>> + } >>> default: >>> rc = arch_memory_op(op, arg); >>> break; >>> diff --git a/xen/include/public/domctl.h b/xen/include/public/domctl.h >>> index 4c5b2bb..3574d0a 100644 >>> --- a/xen/include/public/domctl.h >>> +++ b/xen/include/public/domctl.h >>> @@ -35,6 +35,7 @@ >>> #include "xen.h" >>> #include "grant_table.h" >>> #include "hvm/save.h" >>> +#include "memory.h" >>> >>> #define XEN_DOMCTL_INTERFACE_VERSION 0x00000009 >>> >>> @@ -852,6 +853,17 @@ struct xen_domctl_set_broken_page_p2m { >>> typedef struct xen_domctl_set_broken_page_p2m xen_domctl_set_broken_page_p2m_t; >>> DEFINE_XEN_GUEST_HANDLE(xen_domctl_set_broken_page_p2m_t); >>> >>> +struct xen_domctl_vnuma { >>> + uint16_t nr_vnodes; >>> + XEN_GUEST_HANDLE_64(uint) vdistance; >>> + XEN_GUEST_HANDLE_64(uint) vcpu_to_vnode; >>> + XEN_GUEST_HANDLE_64(uint) vnode_to_pnode; >>> + XEN_GUEST_HANDLE_64(vnuma_memblk_t) vnuma_memblks; >>> +}; >>> + >>> +typedef struct xen_domctl_vnuma xen_domctl_vnuma_t; >>> +DEFINE_XEN_GUEST_HANDLE(xen_domctl_vnuma_t); >>> + >>> struct xen_domctl { >>> uint32_t cmd; >>> #define XEN_DOMCTL_createdomain 1 >>> @@ -920,6 +932,7 @@ struct xen_domctl { >>> #define XEN_DOMCTL_set_broken_page_p2m 67 >>> #define XEN_DOMCTL_setnodeaffinity 68 >>> #define XEN_DOMCTL_getnodeaffinity 69 >>> +#define XEN_DOMCTL_setvnumainfo 70 >>> #define XEN_DOMCTL_gdbsx_guestmemio 1000 >>> #define XEN_DOMCTL_gdbsx_pausevcpu 1001 >>> #define XEN_DOMCTL_gdbsx_unpausevcpu 1002 >>> @@ -979,6 +992,7 @@ struct xen_domctl { >>> struct xen_domctl_set_broken_page_p2m set_broken_page_p2m; >>> struct xen_domctl_gdbsx_pauseunp_vcpu gdbsx_pauseunp_vcpu; >>> struct xen_domctl_gdbsx_domstatus gdbsx_domstatus; >>> + struct xen_domctl_vnuma vnuma; >>> uint8_t pad[128]; >>> } u; >>> }; >>> @@ -986,7 +1000,6 @@ typedef struct xen_domctl xen_domctl_t; >>> DEFINE_XEN_GUEST_HANDLE(xen_domctl_t); >>> >>> #endif /* __XEN_PUBLIC_DOMCTL_H__ */ >>> - >>> /* >>> * Local variables: >>> * mode: C >>> diff --git a/xen/include/public/memory.h b/xen/include/public/memory.h >>> index 7a26dee..28f6aaf 100644 >>> --- a/xen/include/public/memory.h >>> +++ b/xen/include/public/memory.h >>> @@ -453,12 +453,19 @@ DEFINE_XEN_GUEST_HANDLE(xen_mem_sharing_op_t); >>> * Caller must be privileged or the hypercall fails. >>> */ >>> #define XENMEM_claim_pages 24 >>> - >>> /* >>> * XENMEM_claim_pages flags - the are no flags at this time. >>> * The zero value is appropiate. >>> */ >>> >>> +struct vnuma_memblk { >>> + uint64_t start, end; >>> +}; >>> +typedef struct vnuma_memblk vnuma_memblk_t; >>> +DEFINE_XEN_GUEST_HANDLE(vnuma_memblk_t); >>> + >>> +#define XENMEM_get_vnuma_info 25 >>> + >>> #endif /* defined(__XEN__) || defined(__XEN_TOOLS__) */ >>> >>> #endif /* __XEN_PUBLIC_MEMORY_H__ */ >>> diff --git a/xen/include/xen/domain.h b/xen/include/xen/domain.h >>> index a057069..c9d53e3 100644 >>> --- a/xen/include/xen/domain.h >>> +++ b/xen/include/xen/domain.h >>> @@ -4,6 +4,7 @@ >>> >>> #include <public/xen.h> >>> #include <asm/domain.h> >>> +#include <public/memory.h> >>> >>> typedef union { >>> struct vcpu_guest_context *nat; >>> @@ -89,4 +90,14 @@ extern unsigned int xen_processor_pmbits; >>> >>> extern bool_t opt_dom0_vcpus_pin; >>> >>> +struct domain_vnuma_info { >>> + uint16_t nr_vnodes; >>> + uint *vdistance; >>> + uint *vcpu_to_vnode; >>> + uint *vnode_to_pnode; >>> + vnuma_memblk_t *vnuma_memblks; >>> +}; >> >> Are these the right sizes for things to allocate? Do we really need >> 32 bits to represent distance, vnode and pnode? Can we use u16 or u8? >> >> -George > > > > -- > Elena-- Elena
Jan Beulich
2013-Sep-17 07:05 UTC
Re: [PATCH RFC v2 1/7] xen/vNUMA: vNUMA support for PV guests.
>>> On 17.09.13 at 08:44, Elena Ufimtseva <ufimtseva@gmail.com> wrote:Please don''t top post.> George, after talking to Dario, I think the max number of physical > nodes will not exceed 256. Dario''s automatic NUMA > placement work with this number and I think it can be easily u8. > Unless anyone has other thoughts.With nr_vnodes being uint16_t, the vnode numbers should be too. Limiting them to u8 would possibly be even better, but then nr_vnodes would better be unsigned int (perhaps that was the case from the beginning, regardless of the types used for the arrays). The pnode array surely can also be uint8_t for the time being, considering that there are other places where node IDs are limited to 8 bits. And with struct acpi_table_slit having just 8-bit distances, there''s no apparent reason why the virtual distances can''t be 8 bits too. But - all this is only for the internal representations. Anything in the public interface should be wide enough to allow future extension. Jan>>> @@ -89,4 +90,14 @@ extern unsigned int xen_processor_pmbits; >>> >>> extern bool_t opt_dom0_vcpus_pin; >>> >>> +struct domain_vnuma_info { >>> + uint16_t nr_vnodes; >>> + uint *vdistance; >>> + uint *vcpu_to_vnode; >>> + uint *vnode_to_pnode; >>> + vnuma_memblk_t *vnuma_memblks; >>> +}; >> >> Are these the right sizes for things to allocate? Do we really need >> 32 bits to represent distance, vnode and pnode? Can we use u16 or u8?
Dario Faggioli
2013-Sep-17 07:11 UTC
Re: [PATCH RFC v2 1/7] xen/vNUMA: vNUMA support for PV guests.
On mar, 2013-09-17 at 08:05 +0100, Jan Beulich wrote:> >>> On 17.09.13 at 08:44, Elena Ufimtseva <ufimtseva@gmail.com> wrote: > > Please don''t top post. > > > George, after talking to Dario, I think the max number of physical > > nodes will not exceed 256. Dario''s automatic NUMA > > placement work with this number and I think it can be easily u8. > > Unless anyone has other thoughts. > > With nr_vnodes being uint16_t, the vnode numbers should be > too. Limiting them to u8 would possibly be even better, but then > nr_vnodes would better be unsigned int (perhaps that was the > case from the beginning, regardless of the types used for the > arrays). > > The pnode array surely can also be uint8_t for the time being, > considering that there are other places where node IDs are > limited to 8 bits. >All agreed.> And with struct acpi_table_slit having just 8-bit distances, there''s > no apparent reason why the virtual distances can''t be 8 bits too. > > But - all this is only for the internal representations. Anything in > the public interface should be wide enough to allow future > extension. >And, in fact, ''node_to_node_distance'' in xen/include/public/sysctl.h (http://xenbits.xen.org/gitweb/?p=xen.git;a=blob;f=xen/include/public/sysctl.h) is uint32. Dario -- <<This happens because I choose it to happen!>> (Raistlin Majere) ----------------------------------------------------------------- Dario Faggioli, Ph.D, http://about.me/dario.faggioli Senior Software Engineer, Citrix Systems R&D Ltd., Cambridge (UK) _______________________________________________ Xen-devel mailing list Xen-devel@lists.xen.org http://lists.xen.org/xen-devel
Elena Ufimtseva
2013-Sep-17 07:19 UTC
Re: [PATCH RFC v2 1/7] xen/vNUMA: vNUMA support for PV guests.
On Tue, Sep 17, 2013 at 3:11 AM, Dario Faggioli <dario.faggioli@citrix.com> wrote:> On mar, 2013-09-17 at 08:05 +0100, Jan Beulich wrote: >> >>> On 17.09.13 at 08:44, Elena Ufimtseva <ufimtseva@gmail.com> wrote: >> >> Please don''t top post. >> >> > George, after talking to Dario, I think the max number of physical >> > nodes will not exceed 256. Dario''s automatic NUMA >> > placement work with this number and I think it can be easily u8. >> > Unless anyone has other thoughts. >> >> With nr_vnodes being uint16_t, the vnode numbers should be >> too. Limiting them to u8 would possibly be even better, but then >> nr_vnodes would better be unsigned int (perhaps that was the >> case from the beginning, regardless of the types used for the >> arrays). >> >> The pnode array surely can also be uint8_t for the time being, >> considering that there are other places where node IDs are >> limited to 8 bits. >> > All agreed. > >> And with struct acpi_table_slit having just 8-bit distances, there''s >> no apparent reason why the virtual distances can''t be 8 bits too. >> >> But - all this is only for the internal representations. Anything in >> the public interface should be wide enough to allow future >> extension. >> > And, in fact, ''node_to_node_distance'' in xen/include/public/sysctl.h > (http://xenbits.xen.org/gitweb/?p=xen.git;a=blob;f=xen/include/public/sysctl.h) > is uint32. >Linux has u8 for distance. Ok, thank you for pointing that out. Elena> Dario > > -- > <<This happens because I choose it to happen!>> (Raistlin Majere) > ----------------------------------------------------------------- > Dario Faggioli, Ph.D, http://about.me/dario.faggioli > Senior Software Engineer, Citrix Systems R&D Ltd., Cambridge (UK) >-- Elena
Dario Faggioli
2013-Sep-17 09:04 UTC
Re: [PATCH RFC v2 1/7] xen/vNUMA: vNUMA support for PV guests.
On mar, 2013-09-17 at 03:19 -0400, Elena Ufimtseva wrote:> On Tue, Sep 17, 2013 at 3:11 AM, Dario Faggioli > <dario.faggioli@citrix.com> wrote: > > On mar, 2013-09-17 at 08:05 +0100, Jan Beulich wrote: > >> But - all this is only for the internal representations. Anything in > >> the public interface should be wide enough to allow future > >> extension. > >> > > And, in fact, ''node_to_node_distance'' in xen/include/public/sysctl.h > > (http://xenbits.xen.org/gitweb/?p=xen.git;a=blob;f=xen/include/public/sysctl.h) > > is uint32. > > > > Linux has u8 for distance. Ok, thank you for pointing that out. >EhEh... So, very hard to be consistent with every actor in the play! :-) Anyway, I think the point here is, as Jan was saying, to distinguish internal representation from interface. Within Xen, we should store everything in the smallest and nicest possible way (which, BTW, is what Linux does by using u8 for distances). OTOH, when it comes to exported interfaces, we should be much more cautious, since changing the way Xen stores distances internally is trivial, changing the interface (either API or ABI) could be a nightmare! In this case, what we (Xen) tell Linux is the interface, it is up to him (Linux) to convert that in a way that suits its own internals. In fact, despite Linux being the only one OS using this interface for now, it''s not wise to design the interface itself specifically for Linux, since many OSes may want to support it in the future. Regards, Dario -- <<This happens because I choose it to happen!>> (Raistlin Majere) ----------------------------------------------------------------- Dario Faggioli, Ph.D, http://about.me/dario.faggioli Senior Software Engineer, Citrix Systems R&D Ltd., Cambridge (UK) _______________________________________________ Xen-devel mailing list Xen-devel@lists.xen.org http://lists.xen.org/xen-devel
Li Yechen
2013-Oct-03 12:27 UTC
Re: [PATCH RFC v2 1/7] xen/vNUMA: vNUMA support for PV guests.
Hi Elena,
Thank you for your great work first :)
In your patch, guest can use a hypercall name: "XENMEM_get_vnuma_info"
to
get the structure below:
+struct vnuma_topology_info {
+ domid_t domid;
+ uint16_t nr_vnodes;
+ uint32_t _pad;
+ XEN_GUEST_HANDLE_64(uint) vdistance;
+ XEN_GUEST_HANDLE_64(uint) vcpu_to_vnode;
+ XEN_GUEST_HANDLE_64(vnuma_
memblk_t) vnuma_memblks;
+};
I see that Xen have save a vnode_to_pnode[] array for mapping
However, this hypercall won''t give this mapping to Guest VM.
Is that right?
On Tue, Sep 17, 2013 at 5:04 PM, Dario Faggioli
<dario.faggioli@citrix.com>wrote:
> On mar, 2013-09-17 at 03:19 -0400, Elena Ufimtseva wrote:
> > On Tue, Sep 17, 2013 at 3:11 AM, Dario Faggioli
> > <dario.faggioli@citrix.com> wrote:
> > > On mar, 2013-09-17 at 08:05 +0100, Jan Beulich wrote:
> > >> But - all this is only for the internal representations.
Anything in
> > >> the public interface should be wide enough to allow future
> > >> extension.
> > >>
> > > And, in fact, ''node_to_node_distance'' in
xen/include/public/sysctl.h
> > > (
>
http://xenbits.xen.org/gitweb/?p=xen.git;a=blob;f=xen/include/public/sysctl.h
> )
> > > is uint32.
> > >
> >
> > Linux has u8 for distance. Ok, thank you for pointing that out.
> >
> EhEh... So, very hard to be consistent with every actor in the
> play! :-)
>
> Anyway, I think the point here is, as Jan was saying, to distinguish
> internal representation from interface. Within Xen, we should store
> everything in the smallest and nicest possible way (which, BTW, is what
> Linux does by using u8 for distances).
>
> OTOH, when it comes to exported interfaces, we should be much more
> cautious, since changing the way Xen stores distances internally is
> trivial, changing the interface (either API or ABI) could be a
> nightmare!
>
> In this case, what we (Xen) tell Linux is the interface, it is up to him
> (Linux) to convert that in a way that suits its own internals. In fact,
> despite Linux being the only one OS using this interface for now,
it''s
> not wise to design the interface itself specifically for Linux, since
> many OSes may want to support it in the future.
>
> Regards,
> Dario
>
> --
> <<This happens because I choose it to happen!>> (Raistlin
Majere)
> -----------------------------------------------------------------
> Dario Faggioli, Ph.D, http://about.me/dario.faggioli
> Senior Software Engineer, Citrix Systems R&D Ltd., Cambridge (UK)
>
>
--
Yechen Li
Team of System Virtualization and Cloud Computing
School of Electronic Engineering and Computer Science,
Peking University, China
Nothing is impossible because impossible itself says: " I''m
possible "
lccycc From PKU
_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xen.org
http://lists.xen.org/xen-devel
Dario Faggioli
2013-Oct-03 15:17 UTC
Re: [PATCH RFC v2 1/7] xen/vNUMA: vNUMA support for PV guests.
On gio, 2013-10-03 at 20:27 +0800, Li Yechen wrote:> Hi Elena, > Thank you for your great work first :) > > In your patch, guest can use a hypercall name: "XENMEM_get_vnuma_info" > to get the structure below: > +struct vnuma_topology_info { > + domid_t domid; > + uint16_t nr_vnodes; > + uint32_t _pad; > + XEN_GUEST_HANDLE_64(uint) vdistance; > + XEN_GUEST_HANDLE_64(uint) vcpu_to_vnode; > + XEN_GUEST_HANDLE_64(vnuma_ > memblk_t) vnuma_memblks; > +}; > > > I see that Xen have save a vnode_to_pnode[] array for mapping >Yes it does. We spoke about it quite a bit, and Xen seems to be the best place where to store such info, considering how easy it is to keep it there, and that it may turn out useful in future (and ballooning is a lcear example of that).> However, this hypercall won''t give this mapping to Guest VM. >Not with this hypercall, no. Actually, did most of the people, when reviewing your RFC patches, said that they prefer the mapping not exposed at all to the guest? Dario -- <<This happens because I choose it to happen!>> (Raistlin Majere) ----------------------------------------------------------------- Dario Faggioli, Ph.D, http://about.me/dario.faggioli Senior Software Engineer, Citrix Systems R&D Ltd., Cambridge (UK) _______________________________________________ Xen-devel mailing list Xen-devel@lists.xen.org http://lists.xen.org/xen-devel
Elena Ufimtseva
2013-Oct-03 15:33 UTC
Re: [PATCH RFC v2 1/7] xen/vNUMA: vNUMA support for PV guests.
Hello Li, Dario! :) On Thu, Oct 3, 2013 at 11:17 AM, Dario Faggioli <dario.faggioli@citrix.com> wrote:> On gio, 2013-10-03 at 20:27 +0800, Li Yechen wrote: >> Hi Elena, >> Thank you for your great work first :)>> >> In your patch, guest can use a hypercall name: "XENMEM_get_vnuma_info" >> to get the structure below: >> +struct vnuma_topology_info { >> + domid_t domid; >> + uint16_t nr_vnodes; >> + uint32_t _pad; >> + XEN_GUEST_HANDLE_64(uint) vdistance; >> + XEN_GUEST_HANDLE_64(uint) vcpu_to_vnode; >> + XEN_GUEST_HANDLE_64(vnuma_ >> memblk_t) vnuma_memblks; >> +}; >> >> >> I see that Xen have save a vnode_to_pnode[] array for mapping >> > Yes it does. We spoke about it quite a bit, and Xen seems to be the best > place where to store such info, considering how easy it is to keep it > there, and that it may turn out useful in future (and ballooning is a > lcear example of that). > >> However, this hypercall won''t give this mapping to Guest VM. >> > Not with this hypercall, no. Actually, did most of the people, when > reviewing your RFC patches, said that they prefer the mapping not > exposed at all to the guest?Yes Dario, correct. I think someone, possibly Konrad, was saying that it would be an interesting possibility. But I dont think there was a change in this. So fo Li I think I can provide hypercall that will return the mappings. Li, do you think it work for you?> > Dario > > -- > <<This happens because I choose it to happen!>> (Raistlin Majere) > ----------------------------------------------------------------- > Dario Faggioli, Ph.D, http://about.me/dario.faggioli > Senior Software Engineer, Citrix Systems R&D Ltd., Cambridge (UK) >-- Elena