This patch series concerns Xen. Another serie will come for QEMU. As we discussed on Xen/QEMU mailing list (http://marc.info/?l=qemu-devel&m=133042969527515), I have worked on multiple QEMU support for one domain. QEMU must registered all IO ranges (MMIO and PIO) and PCI that it''s want''s to use to allow multiple QEMU. Each QEMU will handle a subset of the hardware. It will retrieve its configuration with XenStore. Both of these patch series (one for Xen, the other Xen) are not complete, and it breaks some parts of Xen ... The purpose of these series is to start a discussion on how to implement multiple ioreq server on Xen and QEMU. Julien Grall (15): hvm: Modify interface to support multiple ioreq server hvm: Add functions to handle ioreq servers hvm-pci: Handle PCI config space in Xen hvm: Change initialization/destruction of an hvm hvm: Modify hvm_op hvm-io: IO refactoring with ioreq server hvm-io: send invalidate map cache to each registered servers hvm-io: Handle server in buffered IO xc: Add the hypercall for multiple servers xc: Add argument to allocate more special pages xc: Fix python build xl: Add interface to handle multiple device models xl-qmp: add device model id to qmp function xl-parsing: Parse the new option device_models xl: Launch and destroy all device models tools/libxc/xc_domain.c | 140 +++++++++ tools/libxc/xc_hvm_build.c | 57 +++-- tools/libxc/xenctrl.h | 13 + tools/libxc/xenguest.h | 6 +- tools/libxc/xg_private.c | 3 +- tools/libxl/Makefile | 2 +- tools/libxl/libxl.c | 8 +- tools/libxl/libxl.h | 4 +- tools/libxl/libxl_create.c | 30 ++- tools/libxl/libxl_dm.c | 225 ++++++++++++++- tools/libxl/libxl_dom.c | 6 +- tools/libxl/libxl_internal.h | 19 +- tools/libxl/libxl_qmp.c | 24 +- tools/libxl/libxl_types.idl | 11 + tools/libxl/libxlu_dm.c | 202 +++++++++++++ tools/libxl/libxlutil.h | 5 + tools/libxl/xl_cmdimpl.c | 28 ++- tools/python/xen/lowlevel/xc/xc.c | 3 +- xen/arch/x86/hvm/Makefile | 1 + xen/arch/x86/hvm/emulate.c | 56 ++++ xen/arch/x86/hvm/hvm.c | 558 +++++++++++++++++++++++++++++++------ xen/arch/x86/hvm/io.c | 100 +++++-- xen/arch/x86/hvm/pci_emul.c | 147 ++++++++++ xen/include/asm-x86/hvm/domain.h | 24 ++- xen/include/asm-x86/hvm/support.h | 26 ++- xen/include/asm-x86/hvm/vcpu.h | 4 +- xen/include/public/hvm/hvm_op.h | 49 ++++ xen/include/public/hvm/ioreq.h | 1 + xen/include/public/hvm/params.h | 6 +- xen/include/public/xen.h | 1 + xen/include/xen/hvm/pci_emul.h | 37 +++ 31 files changed, 1615 insertions(+), 181 deletions(-) create mode 100644 tools/libxl/libxlu_dm.c create mode 100644 xen/arch/x86/hvm/pci_emul.c create mode 100644 xen/include/xen/hvm/pci_emul.h -- Julien Grall
Julien Grall
2012-Mar-22 15:59 UTC
[XEN][RFC PATCH 01/15] hvm: Modify interface to support multiple ioreq server
Add structure to handle ioreq server. It''s server which can handle a range of IO (MMIO and/or PIO) and emulate a PCI. Each server as its own shared page to receive ioreq. So we have introduced to HVM PARAM to set/get the first and the last shared used for ioreq. With it''s id, the server knows which page it must use. We introduce a new kind a ioreq type IOREQ_TYPE_PCICONFIG which permit to forward easily PCI config space access. Signed-off-by: Julien Grall <julien.grall@citrix.com> --- xen/include/asm-x86/hvm/domain.h | 24 +++++++++++++++++- xen/include/asm-x86/hvm/vcpu.h | 4 ++- xen/include/public/hvm/hvm_op.h | 49 ++++++++++++++++++++++++++++++++++++++ xen/include/public/hvm/ioreq.h | 1 + xen/include/public/hvm/params.h | 6 ++++- xen/include/public/xen.h | 1 + xen/include/xen/hvm/pci_emul.h | 37 ++++++++++++++++++++++++++++ 7 files changed, 119 insertions(+), 3 deletions(-) create mode 100644 xen/include/xen/hvm/pci_emul.h diff --git a/xen/include/asm-x86/hvm/domain.h b/xen/include/asm-x86/hvm/domain.h index 27b3de5..0cfdc69 100644 --- a/xen/include/asm-x86/hvm/domain.h +++ b/xen/include/asm-x86/hvm/domain.h @@ -28,6 +28,7 @@ #include <asm/hvm/vioapic.h> #include <asm/hvm/io.h> #include <xen/hvm/iommu.h> +#include <xen/hvm/pci_emul.h> #include <asm/hvm/viridian.h> #include <asm/hvm/vmx/vmcs.h> #include <asm/hvm/svm/vmcb.h> @@ -41,14 +42,35 @@ struct hvm_ioreq_page { void *va; }; +struct hvm_io_range { + uint64_t s, e; + struct hvm_io_range *next; +}; + +struct hvm_ioreq_server { + unsigned int id; + struct hvm_io_range *mmio_range_list; + struct hvm_io_range *portio_range_list; + struct hvm_ioreq_server *next; + struct hvm_ioreq_page ioreq; + struct hvm_ioreq_page buf_ioreq; + unsigned int buf_ioreq_evtchn; +}; + struct hvm_domain { + /* Use for the IO handles by Xen */ struct hvm_ioreq_page ioreq; - struct hvm_ioreq_page buf_ioreq; + struct hvm_ioreq_server *ioreq_server_list; + uint32_t nr_ioreq_server; + spinlock_t ioreq_server_lock; struct pl_time pl_time; struct hvm_io_handler *io_handler; + /* PCI Information */ + struct pci_root_emul pci_root; + /* Lock protects access to irq, vpic and vioapic. */ spinlock_t irq_lock; struct hvm_irq irq; diff --git a/xen/include/asm-x86/hvm/vcpu.h b/xen/include/asm-x86/hvm/vcpu.h index 537da96..2774ced 100644 --- a/xen/include/asm-x86/hvm/vcpu.h +++ b/xen/include/asm-x86/hvm/vcpu.h @@ -125,7 +125,9 @@ struct hvm_vcpu { spinlock_t tm_lock; struct list_head tm_list; - int xen_port; + struct hvm_ioreq_page *ioreq; + /* PCI Information */ + uint32_t pci_cf8; bool_t flag_dr_dirty; bool_t debug_state_latch; diff --git a/xen/include/public/hvm/hvm_op.h b/xen/include/public/hvm/hvm_op.h index 6a78f75..1e0e27b 100644 --- a/xen/include/public/hvm/hvm_op.h +++ b/xen/include/public/hvm/hvm_op.h @@ -24,6 +24,8 @@ #include "../xen.h" #include "../trace.h" +#include "hvm_info_table.h" /* HVM_MAX_VCPUS */ + /* Get/set subcommands: extra argument == pointer to xen_hvm_param struct. */ #define HVMOP_set_param 0 #define HVMOP_get_param 1 @@ -227,6 +229,53 @@ struct xen_hvm_inject_trap { typedef struct xen_hvm_inject_trap xen_hvm_inject_trap_t; DEFINE_XEN_GUEST_HANDLE(xen_hvm_inject_trap_t); +#define HVMOP_register_ioreq_server 20 +struct xen_hvm_register_ioreq_server { + domid_t domid; /* IN - domain to be serviced */ + unsigned int id; /* OUT - handle for identifying this server */ +}; +typedef struct xen_hvm_register_ioreq_server xen_hvm_register_ioreq_server_t; +DEFINE_XEN_GUEST_HANDLE(xen_hvm_register_ioreq_server_t); + +#define HVMOP_get_ioreq_server_buf_channel 21 +struct xen_hvm_get_ioreq_server_buf_channel { + domid_t domid; /* IN - domain to be serviced */ + servid_t id; /* IN - handle from HVMOP_register_ioreq_server */ + unsigned int channel; /* OUT - buf ioreq channel */ +}; +typedef struct xen_hvm_get_ioreq_server_buf_channel xen_hvm_get_ioreq_server_buf_channel_t; +DEFINE_XEN_GUEST_HANDLE(xen_hvm_get_ioreq_server_buf_channel_t); + +#define HVMOP_map_io_range_to_ioreq_server 22 +struct xen_hvm_map_io_range_to_ioreq_server { + domid_t domid; /* IN - domain to be serviced */ + uint8_t is_mmio; /* IN - MMIO or port IO? */ + servid_t id; /* IN - handle from HVMOP_register_ioreq_server */ + uint64_aligned_t s, e; /* IN - inclusive start and end of range */ +}; +typedef struct xen_hvm_map_io_range_to_ioreq_server xen_hvm_map_io_range_to_ioreq_server_t; +DEFINE_XEN_GUEST_HANDLE(xen_hvm_map_io_range_to_ioreq_server_t); + +#define HVMOP_unmap_io_range_from_ioreq_server 23 +struct xen_hvm_unmap_io_range_from_ioreq_server { + domid_t domid; /* IN - domain to be serviced */ + uint8_t is_mmio; /* IN - MMIO or port IO? */ + servid_t id; /* IN - handle from HVMOP_register_ioreq_server */ + uint64_aligned_t addr; /* IN - address inside the range to remove */ +}; +typedef struct xen_hvm_unmap_io_range_from_ioreq_server xen_hvm_unmap_io_range_from_ioreq_server_t; +DEFINE_XEN_GUEST_HANDLE(xen_hvm_unmap_io_range_from_ioreq_server_t); + +#define HVMOP_register_pcidev 24 +struct xen_hvm_register_pcidev { + domid_t domid; /* IN - domain to be serviced */ + servid_t id; /* IN - handle from HVMOP_register_ioreq_server */ + uint16_t bdf; /* IN - pci */ +}; +typedef struct xen_hvm_register_pcidev xen_hvm_register_pcidev_t; +DEFINE_XEN_GUEST_HANDLE(xen_hvm_register_pcidev_t); + + #endif /* defined(__XEN__) || defined(__XEN_TOOLS__) */ #define HVMOP_get_mem_type 15 diff --git a/xen/include/public/hvm/ioreq.h b/xen/include/public/hvm/ioreq.h index 4022a1d..87aacd3 100644 --- a/xen/include/public/hvm/ioreq.h +++ b/xen/include/public/hvm/ioreq.h @@ -34,6 +34,7 @@ #define IOREQ_TYPE_PIO 0 /* pio */ #define IOREQ_TYPE_COPY 1 /* mmio ops */ +#define IOREQ_TYPE_PCI_CONFIG 2 /* pci config space ops */ #define IOREQ_TYPE_TIMEOFFSET 7 #define IOREQ_TYPE_INVALIDATE 8 /* mapcache */ diff --git a/xen/include/public/hvm/params.h b/xen/include/public/hvm/params.h index 55c1b57..309ac1b 100644 --- a/xen/include/public/hvm/params.h +++ b/xen/include/public/hvm/params.h @@ -147,6 +147,10 @@ #define HVM_PARAM_ACCESS_RING_PFN 28 #define HVM_PARAM_SHARING_RING_PFN 29 -#define HVM_NR_PARAMS 30 +/* Param for ioreq servers */ +#define HVM_PARAM_IO_PFN_FIRST 30 +#define HVM_PARAM_IO_PFN_LAST 31 + +#define HVM_NR_PARAMS 32 #endif /* __XEN_PUBLIC_HVM_PARAMS_H__ */ diff --git a/xen/include/public/xen.h b/xen/include/public/xen.h index b2f6c50..26d0e9d 100644 --- a/xen/include/public/xen.h +++ b/xen/include/public/xen.h @@ -466,6 +466,7 @@ DEFINE_XEN_GUEST_HANDLE(mmuext_op_t); #ifndef __ASSEMBLY__ typedef uint16_t domid_t; +typedef uint32_t servid_t; /* Domain ids >= DOMID_FIRST_RESERVED cannot be used for ordinary domains. */ #define DOMID_FIRST_RESERVED (0x7FF0U) diff --git a/xen/include/xen/hvm/pci_emul.h b/xen/include/xen/hvm/pci_emul.h new file mode 100644 index 0000000..4e4a2fd --- /dev/null +++ b/xen/include/xen/hvm/pci_emul.h @@ -0,0 +1,37 @@ +#ifndef PCI_EMUL_H_ +# define PCI_EMUL_H_ + +# include <xen/spinlock.h> +# include <xen/types.h> + +int hvm_init_pci_emul(struct domain *d); +void hvm_destroy_pci_emul(struct domain *d); +int hvm_register_pcidev(domid_t domid, servid_t id, u16 bdf); + +/* Size of the standard PCI config space */ +#define PCI_CONFIG_SPACE_SIZE 0x100 +#define PCI_CMP_BDF(Pci, Bdf) ((Pci)->bdf == PCI_MASK_BDF(Bdf)) +#define PCI_MASK_BDF(bdf) (((bdf) & 0x00ffff00) >> 8) + +struct pci_device_emul { + u16 bdf; + struct hvm_ioreq_server *server; + struct pci_device_emul *next; +}; + +struct pci_root_emul { + spinlock_t pci_lock; + struct pci_device_emul *pci; +}; + +#endif /* !PCI_EMUL_H_ */ + +/* + * Local variables: + * mode: C + * c-set-style: "BSD" + * c-basic-offset: 4 + * tab-width: 4 + * indent-tabs-mode: nil + * End: + */ -- Julien Grall
Julien Grall
2012-Mar-22 15:59 UTC
[XEN][RFC PATCH 02/15] hvm: Add functions to handle ioreq servers
This patchs add functions to help to : - create/destroy server - map/unmap IO range to a server Signed-off-by: Julien Grall <julien.grall@citrix.com> --- xen/arch/x86/hvm/hvm.c | 352 ++++++++++++++++++++++++++++++++++++++++++++++++ 1 files changed, 352 insertions(+), 0 deletions(-) diff --git a/xen/arch/x86/hvm/hvm.c b/xen/arch/x86/hvm/hvm.c index 9832daf..e7a8f18 100644 --- a/xen/arch/x86/hvm/hvm.c +++ b/xen/arch/x86/hvm/hvm.c @@ -331,6 +331,87 @@ void hvm_do_resume(struct vcpu *v) } } +static void hvm_init_ioreq_servers(struct domain *d) +{ + spin_lock_init(&d->arch.hvm_domain.ioreq_server_lock); + d->arch.hvm_domain.nr_ioreq_server = 0; +} + +static void hvm_destroy_ioreq_server(struct domain *d, + struct hvm_ioreq_server *s) +{ + struct hvm_io_range *x; + shared_iopage_t *p; + int i; + + while ( (x = s->mmio_range_list) != NULL ) + { + s->mmio_range_list = x->next; + xfree(x); + } + while ( (x = s->portio_range_list) != NULL ) + { + s->portio_range_list = x->next; + xfree(x); + } + + p = s->ioreq.va; + + for ( i = 0; i < MAX_HVM_VCPUS; i++ ) + { + if ( p->vcpu_ioreq[i].vp_eport ) + { + free_xen_event_channel(d->vcpu[i], p->vcpu_ioreq[i].vp_eport); + } + } + + free_xen_event_channel(d->vcpu[0], s->buf_ioreq_evtchn); + + xfree(s); +} + +static void hvm_destroy_ioreq_servers(struct domain *d) +{ + struct hvm_ioreq_server *s; + + spin_lock(&d->arch.hvm_domain.ioreq_server_lock); + + ASSERT(d->is_dying); + + while ( (s = d->arch.hvm_domain.ioreq_server_list) != NULL ) + { + d->arch.hvm_domain.ioreq_server_list = s->next; + hvm_destroy_ioreq_server(d, s); + } + + spin_unlock(&d->arch.hvm_domain.ioreq_server_lock); +} + +static int hvm_ioreq_servers_new_vcpu(struct vcpu *v) +{ + struct hvm_ioreq_server *s; + struct domain *d = v->domain; + shared_iopage_t *p; + int rc = 0; + + spin_lock(&d->arch.hvm_domain.ioreq_server_lock); + + for ( s = d->arch.hvm_domain.ioreq_server_list; s != NULL; s = s->next ) + { + p = s->ioreq.va; + ASSERT(p != NULL); + + rc = alloc_unbound_xen_event_channel(v, 0, NULL); + if ( rc < 0 ) + break; + p->vcpu_ioreq[v->vcpu_id].vp_eport = rc; + } + + spin_unlock(&d->arch.hvm_domain.ioreq_server_lock); + + return (rc < 0) ? rc : 0; +} + static void hvm_init_ioreq_page( struct domain *d, struct hvm_ioreq_page *iorp) { @@ -3658,6 +3739,277 @@ static int hvmop_flush_tlb_all(void) return 0; } +static int hvm_alloc_ioreq_server_page(struct domain *d, + struct hvm_ioreq_server *s, + struct hvm_ioreq_page *pfn, + int i) +{ + int rc = 0; + unsigned long gmfn; + + if (i < 0 || i > 1) + return -EINVAL; + + hvm_init_ioreq_page(d, pfn); + + gmfn = d->arch.hvm_domain.params[HVM_PARAM_IO_PFN_FIRST] + + (s->id - 1) * 2 + i + 1; + + if (gmfn > d->arch.hvm_domain.params[HVM_PARAM_IO_PFN_LAST]) + return -EINVAL; + + rc = hvm_set_ioreq_page(d, pfn, gmfn); + + if (!rc && pfn->va == NULL) + rc = -ENOMEM; + + return rc; +} + +static int hvmop_register_ioreq_server( + struct xen_hvm_register_ioreq_server *a) +{ + struct hvm_ioreq_server *s, **pp; + struct domain *d; + shared_iopage_t *p; + struct vcpu *v; + int i; + int rc = 0; + + if ( current->domain->domain_id != 0 ) + return -EINVAL; + + rc = rcu_lock_target_domain_by_id(a->domid, &d); + if ( rc != 0 ) + return rc; + + if ( !is_hvm_domain(d) ) + { + rcu_unlock_domain(d); + return -EINVAL; + } + + s = xmalloc(struct hvm_ioreq_server); + if ( s == NULL ) + { + rcu_unlock_domain(d); + return -ENOMEM; + } + memset(s, 0, sizeof(*s)); + + if ( d->is_dying) + { + rc = -EINVAL; + goto register_died; + } + + spin_lock(&d->arch.hvm_domain.ioreq_server_lock); + + s->id = d->arch.hvm_domain.nr_ioreq_server + 1; + + /* Initialize shared pages */ + if ( (rc = hvm_alloc_ioreq_server_page(d, s, &s->ioreq, 0)) ) + goto register_ioreq; + if ( (rc = hvm_alloc_ioreq_server_page(d, s, &s->buf_ioreq, 1)) ) + goto register_buf_ioreq; + + p = s->ioreq.va; + + for_each_vcpu ( d, v ) + { + rc = alloc_unbound_xen_event_channel(v, 0, NULL); + if ( rc < 0 ) + goto register_ports; + p->vcpu_ioreq[v->vcpu_id].vp_eport = rc; + } + + /* Allocate buffer event channel */ + rc = alloc_unbound_xen_event_channel(d->vcpu[0], 0, NULL); + + if (rc < 0) + goto register_ports; + s->buf_ioreq_evtchn = rc; + + pp = &d->arch.hvm_domain.ioreq_server_list; + while ( *pp != NULL ) + pp = &(*pp)->next; + *pp = s; + + d->arch.hvm_domain.nr_ioreq_server += 1; + a->id = s->id; + + spin_unlock(&d->arch.hvm_domain.ioreq_server_lock); + rcu_unlock_domain(d); + + goto register_done; + +register_ports: + p = s->ioreq.va; + for ( i = 0; i < MAX_HVM_VCPUS; i++ ) + { + if ( p->vcpu_ioreq[i].vp_eport ) + free_xen_event_channel(d->vcpu[i], p->vcpu_ioreq[i].vp_eport); + } + hvm_destroy_ioreq_page(d, &s->buf_ioreq); +register_buf_ioreq: + hvm_destroy_ioreq_page(d, &s->ioreq); +register_ioreq: + spin_unlock(&d->arch.hvm_domain.ioreq_server_lock); +register_died: + xfree(s); + rcu_unlock_domain(d); +register_done: + return 0; +} + +static int hvmop_get_ioreq_server_buf_channel( + struct xen_hvm_get_ioreq_server_buf_channel *a) +{ + struct domain *d; + struct hvm_ioreq_server *s; + int rc; + + rc = rcu_lock_target_domain_by_id(a->domid, &d); + + if ( rc != 0 ) + return rc; + + if ( !is_hvm_domain(d) ) + { + rcu_unlock_domain(d); + return -EINVAL; + } + + spin_lock(&d->arch.hvm_domain.ioreq_server_lock); + s = d->arch.hvm_domain.ioreq_server_list; + + while ( (s != NULL) && (s->id != a->id) ) + s = s->next; + + if ( s == NULL ) + { + spin_unlock(&d->arch.hvm_domain.ioreq_server_lock); + rcu_unlock_domain(d); + return -ENOENT; + } + + a->channel = s->buf_ioreq_evtchn; + + spin_unlock(&d->arch.hvm_domain.ioreq_server_lock); + rcu_unlock_domain(d); + + return 0; +} + +static int hvmop_map_io_range_to_ioreq_server( + struct xen_hvm_map_io_range_to_ioreq_server *a) +{ + struct hvm_ioreq_server *s; + struct hvm_io_range *x; + struct domain *d; + int rc; + + rc = rcu_lock_target_domain_by_id(a->domid, &d); + if ( rc != 0 ) + return rc; + + if ( !is_hvm_domain(d) ) + { + rcu_unlock_domain(d); + return -EINVAL; + } + + spin_lock(&d->arch.hvm_domain.ioreq_server_lock); + + x = xmalloc(struct hvm_io_range); + s = d->arch.hvm_domain.ioreq_server_list; + while ( (s != NULL) && (s->id != a->id) ) + s = s->next; + if ( (s == NULL) || (x == NULL) ) + { + xfree(x); + spin_unlock(&d->arch.hvm_domain.ioreq_server_lock); + rcu_unlock_domain(d); + return x ? -ENOENT : -ENOMEM; + } + + x->s = a->s; + x->e = a->e; + if ( a->is_mmio ) + { + x->next = s->mmio_range_list; + s->mmio_range_list = x; + } + else + { + x->next = s->portio_range_list; + s->portio_range_list = x; + } + + spin_unlock(&d->arch.hvm_domain.ioreq_server_lock); + rcu_unlock_domain(d); + return 0; +} + +static int hvmop_unmap_io_range_from_ioreq_server( + struct xen_hvm_unmap_io_range_from_ioreq_server *a) +{ + struct hvm_ioreq_server *s; + struct hvm_io_range *x, **xp; + struct domain *d; + int rc; + + rc = rcu_lock_target_domain_by_id(a->domid, &d); + if ( rc != 0 ) + return rc; + + if ( !is_hvm_domain(d) ) + { + rcu_unlock_domain(d); + return -EINVAL; + } + + spin_lock(&d->arch.hvm_domain.ioreq_server_lock); + + s = d->arch.hvm_domain.ioreq_server_list; + while ( (s != NULL) && (s->id != a->id) ) + s = s->next; + if ( (s == NULL) ) + { + spin_unlock(&d->arch.hvm_domain.ioreq_server_lock); + rcu_unlock_domain(d); + return -ENOENT; + } + + if ( a->is_mmio ) + { + x = s->mmio_range_list; + xp = &s->mmio_range_list; + } + else + { + x = s->portio_range_list; + xp = &s->portio_range_list; + } + while ( (x != NULL) && (a->addr < x->s || a->addr > x->e) ) + { + xp = &x->next; + x = x->next; + } + if ( (x != NULL) ) + { + *xp = x->next; + xfree(x); + rc = 0; + } + else + rc = -ENOENT; + + spin_unlock(&d->arch.hvm_domain.ioreq_server_lock); + rcu_unlock_domain(d); + return rc; +} + long do_hvm_op(unsigned long op, XEN_GUEST_HANDLE(void) arg) { -- Julien Grall
Julien Grall
2012-Mar-22 15:59 UTC
[XEN][RFC PATCH 03/15] hvm-pci: Handle PCI config space in Xen
Add function to register a bdf with a server. To handle cf8 -> cff we add an handler with register_portio_handle. When Xen reveice a pio for cf8, it''s store the value inside the current vcpu until it receives a pio for cfc -> cff. In this case, it checks if the bdf is registered and forge the ioreq that will be forward to server later. Signed-off-by: Julien Grall <julien.grall@citrix.com> --- xen/arch/x86/hvm/Makefile | 1 + xen/arch/x86/hvm/pci_emul.c | 147 +++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 148 insertions(+), 0 deletions(-) create mode 100644 xen/arch/x86/hvm/pci_emul.c diff --git a/xen/arch/x86/hvm/Makefile b/xen/arch/x86/hvm/Makefile index eea5555..585e9c9 100644 --- a/xen/arch/x86/hvm/Makefile +++ b/xen/arch/x86/hvm/Makefile @@ -12,6 +12,7 @@ obj-y += irq.o obj-y += mtrr.o obj-y += nestedhvm.o obj-y += pmtimer.o +obj-y += pci_emul.o obj-y += quirks.o obj-y += rtc.o obj-y += save.o diff --git a/xen/arch/x86/hvm/pci_emul.c b/xen/arch/x86/hvm/pci_emul.c new file mode 100644 index 0000000..b390e73 --- /dev/null +++ b/xen/arch/x86/hvm/pci_emul.c @@ -0,0 +1,147 @@ +#include <asm/hvm/support.h> +#include <xen/hvm/pci_emul.h> +#include <xen/pci.h> +#include <xen/sched.h> +#include <xen/xmalloc.h> + +#define PCI_DEBUGSTR "%x:%x.%x" +#define PCI_DEBUG(bdf) ((bdf) >> 16) & 0xff, ((bdf) >> 11) & 0x1f, ((bdf) >> 8) & 0x7 + +static int handle_config_space(int dir, uint32_t port, uint32_t bytes, + uint32_t *val) +{ + uint32_t pci_cf8; + struct pci_device_emul *pci; + ioreq_t *p = get_ioreq(current); + int rc = X86EMUL_UNHANDLEABLE; + struct vcpu *v = current; + + spin_lock(&v->domain->arch.hvm_domain.pci_root.pci_lock); + + if (port == 0xcf8) + { + rc = X86EMUL_OKAY; + v->arch.hvm_vcpu.pci_cf8 = *val; + goto end_handle; + } + + pci_cf8 = v->arch.hvm_vcpu.pci_cf8; + + /* Retrieve PCI */ + pci = v->domain->arch.hvm_domain.pci_root.pci; + + while (pci && !PCI_CMP_BDF(pci, pci_cf8)) + pci = pci->next; + + /* We just fill the ioreq, hvm_send_assist_req will send the request */ + if (unlikely(pci == NULL)) + { + *val = ~0; + rc = X86EMUL_OKAY; + goto end_handle; + } + + p->type = IOREQ_TYPE_PCI_CONFIG; + p->addr = (pci_cf8 & ~3) + (p->addr & 3); + + set_ioreq(v, &pci->server->ioreq, p); + +end_handle: + spin_unlock(&v->domain->arch.hvm_domain.pci_root.pci_lock); + return rc; +} + +int hvm_register_pcidev(domid_t domid, unsigned int id, u16 bdf) +{ + struct domain *d; + struct hvm_ioreq_server *s; + struct pci_device_emul *x; + int rc = 0; + + rc = rcu_lock_target_domain_by_id(domid, &d); + + if (rc != 0) + return rc; + + if (!is_hvm_domain(d)) + { + rcu_unlock_domain(d); + return -EINVAL; + } + + /* Search server */ + spin_lock(&d->arch.hvm_domain.ioreq_server_lock); + s = d->arch.hvm_domain.ioreq_server_list; + while ((s != NULL) && (s->id != id)) + s = s->next; + + if (s == NULL) + { + dprintk(XENLOG_DEBUG, "Cannot find server\n"); + rc = -ENOENT; + goto create_end; + } + + spin_unlock(&d->arch.hvm_domain.ioreq_server_lock); + + spin_lock(&d->arch.hvm_domain.pci_root.pci_lock); + x = xmalloc(struct pci_device_emul); + + if (!x) + { + dprintk(XENLOG_DEBUG, "Cannot allocate pci\n"); + rc = -ENOMEM; + goto create_end; + } + + x->bdf = PCI_MASK_BDF(bdf); + x->server = s; + x->next = d->arch.hvm_domain.pci_root.pci; + d->arch.hvm_domain.pci_root.pci = x; + +create_end: + spin_unlock(&d->arch.hvm_domain.pci_root.pci_lock); + rcu_unlock_domain(d); + + return rc; +} + +int hvm_init_pci_emul(struct domain *d) +{ + struct pci_root_emul *root = &d->arch.hvm_domain.pci_root; + + spin_lock_init(&root->pci_lock); + + root->pci = NULL; + + /* Register the config space handler */ + register_portio_handler(d, 0xcf8, 8, handle_config_space); + + return 0; +} + +void hvm_destroy_pci_emul(struct domain *d) +{ + struct pci_root_emul *root = &d->arch.hvm_domain.pci_root; + struct pci_device_emul *p; + + spin_lock(&root->pci_lock); + + while ( (p = root->pci) != NULL ) + { + root->pci = p->next; + xfree(p); + } + + spin_unlock(&root->pci_lock); +} + +/* + * Local variables: + * mode: C + * c-set-style: "BSD" + * c-basic-offset: 4 + * tab-width: 4 + * indent-tabs-mode: nil + * End: + */ -- Julien Grall
Julien Grall
2012-Mar-22 15:59 UTC
[XEN][RFC PATCH 04/15] hvm: Change initialization/destruction of an hvm
This patch modifies initialization and the destruction of an hvm Signed-off-by: Julien Grall <julien.grall@citrix.com> --- xen/arch/x86/hvm/hvm.c | 33 ++++++++++----------------------- 1 files changed, 10 insertions(+), 23 deletions(-) diff --git a/xen/arch/x86/hvm/hvm.c b/xen/arch/x86/hvm/hvm.c index e7a8f18..1b38762 100644 --- a/xen/arch/x86/hvm/hvm.c +++ b/xen/arch/x86/hvm/hvm.c @@ -601,10 +601,13 @@ int hvm_domain_initialise(struct domain *d) rtc_init(d); hvm_init_ioreq_page(d, &d->arch.hvm_domain.ioreq); - hvm_init_ioreq_page(d, &d->arch.hvm_domain.buf_ioreq); + hvm_init_ioreq_servers(d); register_portio_handler(d, 0xe9, 1, hvm_print_line); + if ( hvm_init_pci_emul(d) ) + goto fail2; + rc = hvm_funcs.domain_initialise(d); if ( rc != 0 ) goto fail2; @@ -626,8 +629,8 @@ int hvm_domain_initialise(struct domain *d) void hvm_domain_relinquish_resources(struct domain *d) { - hvm_destroy_ioreq_page(d, &d->arch.hvm_domain.ioreq); - hvm_destroy_ioreq_page(d, &d->arch.hvm_domain.buf_ioreq); + hvm_destroy_ioreq_servers(d); + hvm_destroy_pci_emul(d); msixtbl_pt_cleanup(d); @@ -1085,27 +1088,11 @@ int hvm_vcpu_initialise(struct vcpu *v) && (rc = nestedhvm_vcpu_initialise(v)) < 0 ) goto fail3; - /* Create ioreq event channel. */ - rc = alloc_unbound_xen_event_channel(v, 0, NULL); - if ( rc < 0 ) - goto fail4; - - /* Register ioreq event channel. */ - v->arch.hvm_vcpu.xen_port = rc; - - if ( v->vcpu_id == 0 ) - { - /* Create bufioreq event channel. */ - rc = alloc_unbound_xen_event_channel(v, 0, NULL); - if ( rc < 0 ) - goto fail2; - v->domain->arch.hvm_domain.params[HVM_PARAM_BUFIOREQ_EVTCHN] = rc; - } + rc = hvm_ioreq_servers_new_vcpu(v); + if ( rc != 0 ) + goto fail3; - spin_lock(&v->domain->arch.hvm_domain.ioreq.lock); - if ( v->domain->arch.hvm_domain.ioreq.va != NULL ) - get_ioreq(v)->vp_eport = v->arch.hvm_vcpu.xen_port; - spin_unlock(&v->domain->arch.hvm_domain.ioreq.lock); + v->arch.hvm_vcpu.ioreq = &v->domain->arch.hvm_domain.ioreq; spin_lock_init(&v->arch.hvm_vcpu.tm_lock); INIT_LIST_HEAD(&v->arch.hvm_vcpu.tm_list); -- Julien Grall
This patch remove useless hvm_param due to structure modification and bind the new hypercalls to handle ioreq servers and pci. Signed-off-by: Julien Grall <julien.grall@citrix.com> --- xen/arch/x86/hvm/hvm.c | 127 ++++++++++++++++++++++++++++++------------------ 1 files changed, 80 insertions(+), 47 deletions(-) diff --git a/xen/arch/x86/hvm/hvm.c b/xen/arch/x86/hvm/hvm.c index 1b38762..3117ae1 100644 --- a/xen/arch/x86/hvm/hvm.c +++ b/xen/arch/x86/hvm/hvm.c @@ -4009,7 +4009,6 @@ long do_hvm_op(unsigned long op, XEN_GUEST_HANDLE(void) arg) case HVMOP_get_param: { struct xen_hvm_param a; - struct hvm_ioreq_page *iorp; struct domain *d; struct vcpu *v; @@ -4037,21 +4036,14 @@ long do_hvm_op(unsigned long op, XEN_GUEST_HANDLE(void) arg) switch ( a.index ) { - case HVM_PARAM_IOREQ_PFN: - iorp = &d->arch.hvm_domain.ioreq; - if ( (rc = hvm_set_ioreq_page(d, iorp, a.value)) != 0 ) - break; - spin_lock(&iorp->lock); - if ( iorp->va != NULL ) - /* Initialise evtchn port info if VCPUs already created. */ - for_each_vcpu ( d, v ) - get_ioreq(v)->vp_eport = v->arch.hvm_vcpu.xen_port; - spin_unlock(&iorp->lock); - break; - case HVM_PARAM_BUFIOREQ_PFN: - iorp = &d->arch.hvm_domain.buf_ioreq; - rc = hvm_set_ioreq_page(d, iorp, a.value); + case HVM_PARAM_IO_PFN_FIRST: + rc = hvm_set_ioreq_page(d, &d->arch.hvm_domain.ioreq, a.value); + gdprintk(XENLOG_DEBUG, "Pfn first = 0x%lx\n", a.value); + gdprintk(XENLOG_DEBUG, "va = %p\n", d->arch.hvm_domain.ioreq.va); break; + case HVM_PARAM_IO_PFN_LAST: + if ( (d->arch.hvm_domain.params[HVM_PARAM_IO_PFN_LAST]) ) + rc = -EINVAL; case HVM_PARAM_CALLBACK_IRQ: hvm_set_callback_via(d, a.value); hvm_latch_shinfo_size(d); @@ -4096,38 +4088,6 @@ long do_hvm_op(unsigned long op, XEN_GUEST_HANDLE(void) arg) domctl_lock_release(); break; - case HVM_PARAM_DM_DOMAIN: - /* Not reflexive, as we must domain_pause(). */ - rc = -EPERM; - if ( curr_d == d ) - break; - - if ( a.value == DOMID_SELF ) - a.value = curr_d->domain_id; - - rc = 0; - domain_pause(d); /* safe to change per-vcpu xen_port */ - iorp = &d->arch.hvm_domain.ioreq; - for_each_vcpu ( d, v ) - { - int old_port, new_port; - new_port = alloc_unbound_xen_event_channel( - v, a.value, NULL); - if ( new_port < 0 ) - { - rc = new_port; - break; - } - /* xchg() ensures that only we free_xen_event_channel() */ - old_port = xchg(&v->arch.hvm_vcpu.xen_port, new_port); - free_xen_event_channel(v, old_port); - spin_lock(&iorp->lock); - if ( iorp->va != NULL ) - get_ioreq(v)->vp_eport = v->arch.hvm_vcpu.xen_port; - spin_unlock(&iorp->lock); - } - domain_unpause(d); - break; case HVM_PARAM_ACPI_S_STATE: /* Not reflexive, as we must domain_pause(). */ rc = -EPERM; @@ -4650,6 +4610,79 @@ long do_hvm_op(unsigned long op, XEN_GUEST_HANDLE(void) arg) break; } + case HVMOP_register_ioreq_server: + { + struct xen_hvm_register_ioreq_server a; + + if ( copy_from_guest(&a, arg, 1) ) + return -EFAULT; + + rc = hvmop_register_ioreq_server(&a); + if ( rc != 0 ) + return rc; + + rc = copy_to_guest(arg, &a, 1) ? -EFAULT : 0; + break; + } + + case HVMOP_get_ioreq_server_buf_channel: + { + struct xen_hvm_get_ioreq_server_buf_channel a; + + if ( copy_from_guest(&a, arg, 1) ) + return -EFAULT; + + rc = hvmop_get_ioreq_server_buf_channel(&a); + if ( rc != 0 ) + return rc; + + rc = copy_to_guest(arg, &a, 1) ? -EFAULT : 0; + + break; + } + + case HVMOP_map_io_range_to_ioreq_server: + { + struct xen_hvm_map_io_range_to_ioreq_server a; + + if ( copy_from_guest(&a, arg, 1) ) + return -EFAULT; + + rc = hvmop_map_io_range_to_ioreq_server(&a); + if ( rc != 0 ) + return rc; + + break; + } + + case HVMOP_unmap_io_range_from_ioreq_server: + { + struct xen_hvm_unmap_io_range_from_ioreq_server a; + + if ( copy_from_guest(&a, arg, 1) ) + return -EFAULT; + + rc = hvmop_unmap_io_range_from_ioreq_server(&a); + if ( rc != 0 ) + return rc; + + break; + } + + case HVMOP_register_pcidev: + { + struct xen_hvm_register_pcidev a; + + if ( copy_from_guest(&a, arg, 1) ) + return -EFAULT; + + rc = hvm_register_pcidev(a.domid, a.id, a.bdf); + if ( rc != 0 ) + return rc; + + break; + } + default: { gdprintk(XENLOG_DEBUG, "Bad HVM op %ld.\n", op); -- Julien Grall
Julien Grall
2012-Mar-22 15:59 UTC
[XEN][RFC PATCH 06/15] hvm-io: IO refactoring with ioreq server
This modify several parts of the IO handle. Each vcpu now contains a pointer to the current IO shared page. A default shared page has been created for IO handle by Xen. Each time that Xen receives an ioreq, it will use the default shared page and set the right shared page when it''s able to know the server. Moreover, all IO which are unhandleabled by Xen or by a server will be directly discard by Xen. Signed-off-by: Julien Grall <julien.grall@citrix.com> --- xen/arch/x86/hvm/emulate.c | 56 +++++++++++++++++++++++++++++++++++++ xen/arch/x86/hvm/hvm.c | 5 ++- xen/include/asm-x86/hvm/support.h | 26 ++++++++++++++-- 3 files changed, 81 insertions(+), 6 deletions(-) diff --git a/xen/arch/x86/hvm/emulate.c b/xen/arch/x86/hvm/emulate.c index 82efd1a..284c8b2 100644 --- a/xen/arch/x86/hvm/emulate.c +++ b/xen/arch/x86/hvm/emulate.c @@ -49,6 +49,55 @@ static void hvmtrace_io_assist(int is_mmio, ioreq_t *p) trace_var(event, 0/*!cycles*/, size, buffer); } +static int hvmemul_prepare_assist(ioreq_t *p) +{ + struct vcpu *v = current; + struct hvm_ioreq_server *s; + int i; + int sign; + uint32_t data = ~0; + + if ( p->type == IOREQ_TYPE_PCI_CONFIG ) + return X86EMUL_UNHANDLEABLE; + + spin_lock(&v->domain->arch.hvm_domain.ioreq_server_lock); + for ( s = v->domain->arch.hvm_domain.ioreq_server_list; s; s = s->next ) + { + struct hvm_io_range *x = (p->type == IOREQ_TYPE_COPY) + ? s->mmio_range_list : s->portio_range_list; + + for ( ; x; x = x->next ) + { + if ( (p->addr >= x->s) && (p->addr <= x->e) ) + goto done_server_scan; + } + } + + spin_unlock(&v->domain->arch.hvm_domain.ioreq_server_lock); + + sign = p->df ? -1 : 1; + + if ( p->dir != IOREQ_WRITE ) + { + if ( !p->data_is_ptr ) + p->data = ~0; + else + { + for ( i = 0; i < p->count; i++ ) + hvm_copy_to_guest_phys(p->data + sign * i * p->size, &data, + p->size); + } + } + + return X86EMUL_OKAY; + + done_server_scan: + set_ioreq(v, &s->ioreq, p); + spin_unlock(&v->domain->arch.hvm_domain.ioreq_server_lock); + + return X86EMUL_UNHANDLEABLE; +} + static int hvmemul_do_io( int is_mmio, paddr_t addr, unsigned long *reps, int size, paddr_t ram_gpa, int dir, int df, void *p_data) @@ -182,6 +231,10 @@ static int hvmemul_do_io( (p_data == NULL) ? HVMIO_dispatched : HVMIO_awaiting_completion; vio->io_size = size; + /* Use the default shared page */ + current->arch.hvm_vcpu.ioreq = &curr->domain->arch.hvm_domain.ioreq; + p = get_ioreq(current); + p->dir = dir; p->data_is_ptr = value_is_ptr; p->type = is_mmio ? IOREQ_TYPE_COPY : IOREQ_TYPE_PIO; @@ -204,6 +257,9 @@ static int hvmemul_do_io( rc = hvm_portio_intercept(p); } + if ( rc == X86EMUL_UNHANDLEABLE ) + rc = hvmemul_prepare_assist(p); + switch ( rc ) { case X86EMUL_OKAY: diff --git a/xen/arch/x86/hvm/hvm.c b/xen/arch/x86/hvm/hvm.c index 3117ae1..e8ea42e 100644 --- a/xen/arch/x86/hvm/hvm.c +++ b/xen/arch/x86/hvm/hvm.c @@ -1204,14 +1204,15 @@ bool_t hvm_send_assist_req(struct vcpu *v) return 0; } - prepare_wait_on_xen_event_channel(v->arch.hvm_vcpu.xen_port); + prepare_wait_on_xen_event_channel(p->vp_eport); /* * Following happens /after/ blocking and setting up ioreq contents. * prepare_wait_on_xen_event_channel() is an implicit barrier. */ p->state = STATE_IOREQ_READY; - notify_via_xen_event_channel(v->domain, v->arch.hvm_vcpu.xen_port); + + notify_via_xen_event_channel(v->domain, p->vp_eport); return 1; } diff --git a/xen/include/asm-x86/hvm/support.h b/xen/include/asm-x86/hvm/support.h index f9b102f..44acd37 100644 --- a/xen/include/asm-x86/hvm/support.h +++ b/xen/include/asm-x86/hvm/support.h @@ -29,13 +29,31 @@ static inline ioreq_t *get_ioreq(struct vcpu *v) { - struct domain *d = v->domain; - shared_iopage_t *p = d->arch.hvm_domain.ioreq.va; - ASSERT((v == current) || spin_is_locked(&d->arch.hvm_domain.ioreq.lock)); - ASSERT(d->arch.hvm_domain.ioreq.va != NULL); + shared_iopage_t *p = v->arch.hvm_vcpu.ioreq->va; + ASSERT((v == current) || spin_is_locked(&v->arch.hvm_vcpu.ioreq->lock)); + ASSERT(v->arch.hvm_vcpu.ioreq->va != NULL); return &p->vcpu_ioreq[v->vcpu_id]; } +static inline void set_ioreq(struct vcpu *v, struct hvm_ioreq_page *page, + ioreq_t *p) +{ + ioreq_t *np; + + v->arch.hvm_vcpu.ioreq = page; + spin_lock(&v->arch.hvm_vcpu.ioreq->lock); + np = get_ioreq(v); + np->dir = p->dir; + np->data_is_ptr = p->data_is_ptr; + np->type = p->type; + np->size = p->size; + np->addr = p->addr; + np->count = p->count; + np->df = p->df; + np->data = p->data; + spin_unlock(&v->arch.hvm_vcpu.ioreq->lock); +} + #define HVM_DELIVER_NO_ERROR_CODE -1 #ifndef NDEBUG -- Julien Grall
Julien Grall
2012-Mar-22 15:59 UTC
[XEN][RFC PATCH 07/15] hvm-io: send invalidate map cache to each registered servers
When an invalidate mapcache cache occurs, Xen need to send and IOREQ_TYPE_INVALIDATE to each server and wait that all IO is completed. We introduce a new function hvm_wait_on_io to wait until an IO is completed. Signed-off-by: Julien Grall <julien.grall@citrix.com> --- xen/arch/x86/hvm/hvm.c | 41 ++++++++++++++++++++++++++++++++--------- xen/arch/x86/hvm/io.c | 15 +++++++++++++-- 2 files changed, 45 insertions(+), 11 deletions(-) diff --git a/xen/arch/x86/hvm/hvm.c b/xen/arch/x86/hvm/hvm.c index e8ea42e..f57e3aa 100644 --- a/xen/arch/x86/hvm/hvm.c +++ b/xen/arch/x86/hvm/hvm.c @@ -291,16 +291,9 @@ void hvm_migrate_pirqs(struct vcpu *v) spin_unlock(&d->event_lock); } -void hvm_do_resume(struct vcpu *v) +static void hvm_wait_on_io(struct vcpu *v, ioreq_t *p) { - ioreq_t *p; - - pt_restore_timer(v); - - check_wakeup_from_wait(); - /* NB. Optimised for common case (p->state == STATE_IOREQ_NONE). */ - p = get_ioreq(v); while ( p->state != STATE_IOREQ_NONE ) { switch ( p->state ) @@ -310,7 +303,7 @@ void hvm_do_resume(struct vcpu *v) break; case STATE_IOREQ_READY: /* IOREQ_{READY,INPROCESS} -> IORESP_READY */ case STATE_IOREQ_INPROCESS: - wait_on_xen_event_channel(v->arch.hvm_vcpu.xen_port, + wait_on_xen_event_channel(p->vp_eport, (p->state != STATE_IOREQ_READY) && (p->state != STATE_IOREQ_INPROCESS)); break; @@ -320,6 +313,36 @@ void hvm_do_resume(struct vcpu *v) return; /* bail */ } } +} + +void hvm_do_resume(struct vcpu *v) +{ + ioreq_t *p; + struct hvm_ioreq_server *s; + shared_iopage_t *page; + + pt_restore_timer(v); + + check_wakeup_from_wait(); + + p = get_ioreq(v); + + if ( p->type == IOREQ_TYPE_INVALIDATE ) + { + spin_lock(&v->domain->arch.hvm_domain.ioreq_server_lock); + /* Wait all servers */ + for ( s = v->domain->arch.hvm_domain.ioreq_server_list; s; s = s->next ) + { + page = s->ioreq.va; + ASSERT((v == current) || spin_is_locked(&s->ioreq.lock)); + ASSERT(s->ioreq.va != NULL); + v->arch.hvm_vcpu.ioreq = &s->ioreq; + hvm_wait_on_io(v, &page->vcpu_ioreq[v->vcpu_id]); + } + spin_unlock(&v->domain->arch.hvm_domain.ioreq_server_lock); + } + else + hvm_wait_on_io(v, p); /* Inject pending hw/sw trap */ if (v->arch.hvm_vcpu.inject_trap != -1) diff --git a/xen/arch/x86/hvm/io.c b/xen/arch/x86/hvm/io.c index 41a2ede..cd89ff6 100644 --- a/xen/arch/x86/hvm/io.c +++ b/xen/arch/x86/hvm/io.c @@ -150,7 +150,8 @@ void send_timeoffset_req(unsigned long timeoff) void send_invalidate_req(void) { struct vcpu *v = current; - ioreq_t *p = get_ioreq(v); + ioreq_t p[1]; + struct hvm_ioreq_server *s; if ( p->state != STATE_IOREQ_NONE ) { @@ -164,8 +165,18 @@ void send_invalidate_req(void) p->size = 4; p->dir = IOREQ_WRITE; p->data = ~0UL; /* flush all */ + p->count = 0; + p->addr = 0; + + spin_lock(&v->domain->arch.hvm_domain.ioreq_server_lock); + for (s = v->domain->arch.hvm_domain.ioreq_server_list; s; s = s->next) + { + set_ioreq(v, &s->ioreq, p); + (void)hvm_send_assist_req(v); + } + spin_unlock(&v->domain->arch.hvm_domain.ioreq_server_lock); - (void)hvm_send_assist_req(v); + set_ioreq(v, &v->domain->arch.hvm_domain.ioreq, p); } int handle_mmio(void) -- Julien Grall
Julien Grall
2012-Mar-22 15:59 UTC
[XEN][RFC PATCH 08/15] hvm-io: Handle server in buffered IO
As for the normal IO, Xen browses the ranges to find which server is able to handle the IO. There is a special case for IOREQ_TYPE_TIMEOFFSET. Indeed, this IO must be send to all servers. For that we have introduce a new function hvm_buffered_io_send_to_server, which send an IO to a specify server. Signed-off-by: Julien Grall <julien.grall@citrix.com> --- xen/arch/x86/hvm/io.c | 85 ++++++++++++++++++++++++++++++++++++------------- 1 files changed, 63 insertions(+), 22 deletions(-) diff --git a/xen/arch/x86/hvm/io.c b/xen/arch/x86/hvm/io.c index cd89ff6..d9df913 100644 --- a/xen/arch/x86/hvm/io.c +++ b/xen/arch/x86/hvm/io.c @@ -46,28 +46,17 @@ #include <xen/iocap.h> #include <public/hvm/ioreq.h> -int hvm_buffered_io_send(ioreq_t *p) +static int hvm_buffered_io_send_to_server(ioreq_t *p, struct hvm_ioreq_server *s) { struct vcpu *v = current; - struct hvm_ioreq_page *iorp = &v->domain->arch.hvm_domain.buf_ioreq; - buffered_iopage_t *pg = iorp->va; + struct hvm_ioreq_page *iorp; + buffered_iopage_t *pg; buf_ioreq_t bp; /* Timeoffset sends 64b data, but no address. Use two consecutive slots. */ int qw = 0; - /* Ensure buffered_iopage fits in a page */ - BUILD_BUG_ON(sizeof(buffered_iopage_t) > PAGE_SIZE); - - /* - * Return 0 for the cases we can''t deal with: - * - ''addr'' is only a 20-bit field, so we cannot address beyond 1MB - * - we cannot buffer accesses to guest memory buffers, as the guest - * may expect the memory buffer to be synchronously accessed - * - the count field is usually used with data_is_ptr and since we don''t - * support data_is_ptr we do not waste space for the count field either - */ - if ( (p->addr > 0xffffful) || p->data_is_ptr || (p->count != 1) ) - return 0; + iorp = &s->buf_ioreq; + pg = iorp->va; bp.type = p->type; bp.dir = p->dir; @@ -90,10 +79,10 @@ int hvm_buffered_io_send(ioreq_t *p) gdprintk(XENLOG_WARNING, "unexpected ioreq size: %u\n", p->size); return 0; } - + bp.data = p->data; bp.addr = p->addr; - + spin_lock(&iorp->lock); if ( (pg->write_pointer - pg->read_pointer) >@@ -103,10 +92,10 @@ int hvm_buffered_io_send(ioreq_t *p) spin_unlock(&iorp->lock); return 0; } - + memcpy(&pg->buf_ioreq[pg->write_pointer % IOREQ_BUFFER_SLOT_NUM], &bp, sizeof(bp)); - + if ( qw ) { bp.data = p->data >> 32; @@ -119,12 +108,64 @@ int hvm_buffered_io_send(ioreq_t *p) pg->write_pointer += qw ? 2 : 1; notify_via_xen_event_channel(v->domain, - v->domain->arch.hvm_domain.params[HVM_PARAM_BUFIOREQ_EVTCHN]); + s->buf_ioreq_evtchn); spin_unlock(&iorp->lock); - + return 1; } +int hvm_buffered_io_send(ioreq_t *p) +{ + struct vcpu *v = current; + struct hvm_ioreq_server *s; + int rc = 1; + + /* Ensure buffered_iopage fits in a page */ + BUILD_BUG_ON(sizeof(buffered_iopage_t) > PAGE_SIZE); + + /* + * Return 0 for the cases we can''t deal with: + * - ''addr'' is only a 20-bit field, so we cannot address beyond 1MB + * - we cannot buffer accesses to guest memory buffers, as the guest + * may expect the memory buffer to be synchronously accessed + * - the count field is usually used with data_is_ptr and since we don''t + * support data_is_ptr we do not waste space for the count field either + */ + if ( (p->addr > 0xffffful) || p->data_is_ptr || (p->count != 1) ) + return 0; + + spin_lock(&v->domain->arch.hvm_domain.ioreq_server_lock); + if ( p->type == IOREQ_TYPE_TIMEOFFSET ) + { + /* Send TIME OFFSET to all servers */ + for ( s = v->domain->arch.hvm_domain.ioreq_server_list; s; s = s->next ) + rc = hvm_buffered_io_send_to_server(p, s) && rc; + } + else + { + for ( s = v->domain->arch.hvm_domain.ioreq_server_list; s; s = s->next ) + { + struct hvm_io_range *x = (p->type == IOREQ_TYPE_COPY) + ? s->mmio_range_list : s->portio_range_list; + for ( ; x; x = x->next ) + { + if ( (p->addr >= x->s) && (p->addr <= x->e) ) + { + rc = hvm_buffered_io_send_to_server(p, s); + spin_unlock(&v->domain->arch.hvm_domain.ioreq_server_lock); + + return rc; + } + } + } + rc = 0; + } + + spin_unlock(&v->domain->arch.hvm_domain.ioreq_server_lock); + + return rc; +} + void send_timeoffset_req(unsigned long timeoff) { ioreq_t p[1]; -- Julien Grall
Julien Grall
2012-Mar-22 15:59 UTC
[XEN][RFC PATCH 09/15] xc: Add the hypercall for multiple servers
This patch add 5 hypercalls to register server, io range and PCI. Signed-off-by: Julien Grall <julien.grall@citrix.com> --- tools/libxc/xc_domain.c | 140 +++++++++++++++++++++++++++++++++++++++++++++++ tools/libxc/xenctrl.h | 13 ++++ 2 files changed, 153 insertions(+), 0 deletions(-) diff --git a/tools/libxc/xc_domain.c b/tools/libxc/xc_domain.c index d98e68b..8067397 100644 --- a/tools/libxc/xc_domain.c +++ b/tools/libxc/xc_domain.c @@ -1514,6 +1514,146 @@ int xc_domain_set_virq_handler(xc_interface *xch, uint32_t domid, int virq) return do_domctl(xch, &domctl); } +int xc_hvm_register_ioreq_server(xc_interface *xch, domid_t dom, servid_t *id) +{ + DECLARE_HYPERCALL; + DECLARE_HYPERCALL_BUFFER(xen_hvm_register_ioreq_server_t, arg); + int rc = -1; + + arg = xc_hypercall_buffer_alloc(xch, arg, sizeof (*arg)); + if (!arg) { + PERROR("Could not allocate memory for xc_hvm_register_ioreq_server hypercall"); + goto out; + } + + hypercall.op = __HYPERVISOR_hvm_op; + hypercall.arg[0] = HVMOP_register_ioreq_server; + hypercall.arg[1] = HYPERCALL_BUFFER_AS_ARG(arg); + + arg->domid = dom; + rc = do_xen_hypercall(xch, &hypercall); + *id = arg->id; + + xc_hypercall_buffer_free(xch, arg); +out: + return rc; +} + +int xc_hvm_get_ioreq_server_buf_channel(xc_interface *xch, domid_t dom, servid_t id, + unsigned int *channel) +{ + DECLARE_HYPERCALL; + DECLARE_HYPERCALL_BUFFER(xen_hvm_get_ioreq_server_buf_channel_t, arg); + int rc = -1; + + arg = xc_hypercall_buffer_alloc(xch, arg, sizeof (*arg)); + if (!arg) { + PERROR("Could not allocate memory for xc_hvm_get_ioreq_servr_buf_channel"); + goto out; + } + + hypercall.op = __HYPERVISOR_hvm_op; + hypercall.arg[0] = HVMOP_get_ioreq_server_buf_channel; + hypercall.arg[1] = HYPERCALL_BUFFER_AS_ARG(arg); + + arg->domid = dom; + arg->id = id; + rc = do_xen_hypercall(xch, &hypercall); + *channel = arg->channel; + + xc_hypercall_buffer_free(xch, arg); + +out: + return rc; +} + +int xc_hvm_map_io_range_to_ioreq_server(xc_interface *xch, domid_t dom, servid_t id, + char is_mmio, uint64_t start, uint64_t end) +{ + DECLARE_HYPERCALL; + DECLARE_HYPERCALL_BUFFER(xen_hvm_map_io_range_to_ioreq_server_t, arg); + int rc = -1; + + arg = xc_hypercall_buffer_alloc(xch, arg, sizeof (*arg)); + if (!arg) { + PERROR("Could not allocate memory for xc_hvm_map_io_range_to_ioreq_server hypercall"); + goto out; + } + + hypercall.op = __HYPERVISOR_hvm_op; + hypercall.arg[0] = HVMOP_map_io_range_to_ioreq_server; + hypercall.arg[1] = HYPERCALL_BUFFER_AS_ARG(arg); + + arg->domid = dom; + arg->id = id; + arg->is_mmio = is_mmio; + arg->s = start; + arg->e = end; + + rc = do_xen_hypercall(xch, &hypercall); + + xc_hypercall_buffer_free(xch, arg); +out: + return rc; +} + +int xc_hvm_unmap_io_range_from_ioreq_server(xc_interface *xch, domid_t dom, servid_t id, + char is_mmio, uint64_t addr) +{ + DECLARE_HYPERCALL; + DECLARE_HYPERCALL_BUFFER(xen_hvm_unmap_io_range_from_ioreq_server_t, arg); + int rc = -1; + + arg = xc_hypercall_buffer_alloc(xch, arg, sizeof (*arg)); + if (!arg) { + PERROR("Could not allocate memory for xc_hvm_unmap_io_range_from_ioreq_server hypercall"); + goto out; + } + + hypercall.op = __HYPERVISOR_hvm_op; + hypercall.arg[0] = HVMOP_unmap_io_range_from_ioreq_server; + hypercall.arg[1] = HYPERCALL_BUFFER_AS_ARG(arg); + + arg->domid = dom; + arg->id = id; + arg->is_mmio = is_mmio; + arg->addr = addr; + rc = do_xen_hypercall(xch, &hypercall); + + xc_hypercall_buffer_free(xch, arg); +out: + return rc; +} + +int xc_hvm_register_pcidev(xc_interface *xch, domid_t dom, servid_t id, + uint16_t bdf) +{ + DECLARE_HYPERCALL; + DECLARE_HYPERCALL_BUFFER(xen_hvm_register_pcidev_t, arg); + int rc = -1; + + arg = xc_hypercall_buffer_alloc(xch, arg, sizeof (*arg)); + if (!arg) + { + PERROR("Could not allocate memory for xc_hvm_create_pci hypercall"); + goto out; + } + + hypercall.op = __HYPERVISOR_hvm_op; + hypercall.arg[0] = HVMOP_register_pcidev; + hypercall.arg[1] = HYPERCALL_BUFFER_AS_ARG(arg); + + arg->domid = dom; + arg->id = id; + arg->bdf = bdf; + rc = do_xen_hypercall(xch, &hypercall); + + xc_hypercall_buffer_free(xch, arg); +out: + return rc; +} + + /* * Local variables: * mode: C diff --git a/tools/libxc/xenctrl.h b/tools/libxc/xenctrl.h index 812e723..bcbfee5 100644 --- a/tools/libxc/xenctrl.h +++ b/tools/libxc/xenctrl.h @@ -1648,6 +1648,19 @@ void xc_clear_last_error(xc_interface *xch); int xc_set_hvm_param(xc_interface *handle, domid_t dom, int param, unsigned long value); int xc_get_hvm_param(xc_interface *handle, domid_t dom, int param, unsigned long *value); +int xc_hvm_register_ioreq_server(xc_interface *xch, domid_t dom, unsigned int *id); +int xc_hvm_get_ioreq_server_buf_channel(xc_interface *xch, domid_t dom, servid_t id, + unsigned int *channel); +int xc_hvm_map_io_range_to_ioreq_server(xc_interface *xch, domid_t dom, unsigned int id, + char is_mmio, uint64_t start, uint64_t end); +int xc_hvm_unmap_io_range_from_ioreq_server(xc_interface *xch, domid_t dom, unsigned int id, + char is_mmio, uint64_t addr); +/* + * Register a PCI device + */ +int xc_hvm_register_pcidev(xc_interface *xch, domid_t dom, unsigned int id, + uint16_t bdf); + /* IA64 specific, nvram save */ int xc_ia64_save_to_nvram(xc_interface *xch, uint32_t dom); -- Julien Grall
Julien Grall
2012-Mar-22 15:59 UTC
[XEN][RFC PATCH 10/15] xc: Add argument to allocate more special pages
This patchs permits to allocate more special pages. Indeed, for multiple ioreq server, we need to have 2 shared pages by server. xc_hvm_build will take an argument which will indicate the number of special pages we want to allocate. Signed-off-by: Julien Grall <julien.grall@citrix.com> --- tools/libxc/xc_hvm_build.c | 57 +++++++++++++++++++++++++------------------ tools/libxc/xenguest.h | 6 +++- tools/libxc/xg_private.c | 3 +- 3 files changed, 39 insertions(+), 27 deletions(-) diff --git a/tools/libxc/xc_hvm_build.c b/tools/libxc/xc_hvm_build.c index 696c012..62b4ff1 100644 --- a/tools/libxc/xc_hvm_build.c +++ b/tools/libxc/xc_hvm_build.c @@ -47,10 +47,11 @@ #define SPECIALPAGE_IDENT_PT 6 #define SPECIALPAGE_CONSOLE 7 #define NR_SPECIAL_PAGES 8 -#define special_pfn(x) (0xff000u - NR_SPECIAL_PAGES + (x)) +#define special_pfn(x, add) (0xff000u - (NR_SPECIAL_PAGES + (add)) + (x)) static void build_hvm_info(void *hvm_info_page, uint64_t mem_size, - uint64_t mmio_start, uint64_t mmio_size) + uint64_t mmio_start, uint64_t mmio_size, + uint32_t nr_special_pages) { struct hvm_info_table *hvm_info = (struct hvm_info_table *) (((unsigned char *)hvm_info_page) + HVM_INFO_OFFSET); @@ -78,7 +79,7 @@ static void build_hvm_info(void *hvm_info_page, uint64_t mem_size, /* Memory parameters. */ hvm_info->low_mem_pgend = lowmem_end >> PAGE_SHIFT; hvm_info->high_mem_pgend = highmem_end >> PAGE_SHIFT; - hvm_info->reserved_mem_pgstart = special_pfn(0); + hvm_info->reserved_mem_pgstart = special_pfn(0, nr_special_pages); /* Finish with the checksum. */ for ( i = 0, sum = 0; i < hvm_info->length; i++ ) @@ -141,7 +142,8 @@ static int check_mmio_hole(uint64_t start, uint64_t memsize, static int setup_guest(xc_interface *xch, uint32_t dom, const struct xc_hvm_build_args *args, - char *image, unsigned long image_size) + char *image, unsigned long image_size, + uint32_t nr_special_pages) { xen_pfn_t *page_array = NULL; unsigned long i, nr_pages = args->mem_size >> PAGE_SHIFT; @@ -334,37 +336,42 @@ static int setup_guest(xc_interface *xch, xch, dom, PAGE_SIZE, PROT_READ | PROT_WRITE, HVM_INFO_PFN)) == NULL ) goto error_out; - build_hvm_info(hvm_info_page, v_end, mmio_start, mmio_size); + build_hvm_info(hvm_info_page, v_end, mmio_start, mmio_size, nr_special_pages); munmap(hvm_info_page, PAGE_SIZE); /* Allocate and clear special pages. */ - for ( i = 0; i < NR_SPECIAL_PAGES; i++ ) + for ( i = 0; i < (NR_SPECIAL_PAGES + nr_special_pages); i++ ) { - xen_pfn_t pfn = special_pfn(i); + xen_pfn_t pfn = special_pfn(i, nr_special_pages); rc = xc_domain_populate_physmap_exact(xch, dom, 1, 0, 0, &pfn); if ( rc != 0 ) { PERROR("Could not allocate %d''th special page.", i); goto error_out; } - if ( xc_clear_domain_page(xch, dom, special_pfn(i)) ) + if ( xc_clear_domain_page(xch, dom, special_pfn(i, nr_special_pages)) ) goto error_out; } xc_set_hvm_param(xch, dom, HVM_PARAM_STORE_PFN, - special_pfn(SPECIALPAGE_XENSTORE)); + special_pfn(SPECIALPAGE_XENSTORE, nr_special_pages)); xc_set_hvm_param(xch, dom, HVM_PARAM_BUFIOREQ_PFN, - special_pfn(SPECIALPAGE_BUFIOREQ)); + special_pfn(SPECIALPAGE_BUFIOREQ, nr_special_pages)); xc_set_hvm_param(xch, dom, HVM_PARAM_IOREQ_PFN, - special_pfn(SPECIALPAGE_IOREQ)); + special_pfn(SPECIALPAGE_IOREQ, nr_special_pages)); xc_set_hvm_param(xch, dom, HVM_PARAM_CONSOLE_PFN, - special_pfn(SPECIALPAGE_CONSOLE)); + special_pfn(SPECIALPAGE_CONSOLE, nr_special_pages)); xc_set_hvm_param(xch, dom, HVM_PARAM_PAGING_RING_PFN, - special_pfn(SPECIALPAGE_PAGING)); + special_pfn(SPECIALPAGE_PAGING, nr_special_pages)); xc_set_hvm_param(xch, dom, HVM_PARAM_ACCESS_RING_PFN, - special_pfn(SPECIALPAGE_ACCESS)); + special_pfn(SPECIALPAGE_ACCESS, nr_special_pages)); xc_set_hvm_param(xch, dom, HVM_PARAM_SHARING_RING_PFN, - special_pfn(SPECIALPAGE_SHARING)); + special_pfn(SPECIALPAGE_SHARING, nr_special_pages)); + xc_set_hvm_param(xch, dom, HVM_PARAM_IO_PFN_FIRST, + special_pfn(NR_SPECIAL_PAGES, nr_special_pages)); + xc_set_hvm_param(xch, dom, HVM_PARAM_IO_PFN_LAST, + special_pfn(NR_SPECIAL_PAGES + nr_special_pages - 1, + nr_special_pages)); /* * Identity-map page table is required for running with CR0.PG=0 when @@ -372,14 +379,14 @@ static int setup_guest(xc_interface *xch, */ if ( (ident_pt = xc_map_foreign_range( xch, dom, PAGE_SIZE, PROT_READ | PROT_WRITE, - special_pfn(SPECIALPAGE_IDENT_PT))) == NULL ) + special_pfn(SPECIALPAGE_IDENT_PT, nr_special_pages))) == NULL ) goto error_out; for ( i = 0; i < PAGE_SIZE / sizeof(*ident_pt); i++ ) ident_pt[i] = ((i << 22) | _PAGE_PRESENT | _PAGE_RW | _PAGE_USER | _PAGE_ACCESSED | _PAGE_DIRTY | _PAGE_PSE); munmap(ident_pt, PAGE_SIZE); xc_set_hvm_param(xch, dom, HVM_PARAM_IDENT_PT, - special_pfn(SPECIALPAGE_IDENT_PT) << PAGE_SHIFT); + special_pfn(SPECIALPAGE_IDENT_PT, nr_special_pages) << PAGE_SHIFT); /* Insert JMP <rel32> instruction at address 0x0 to reach entry point. */ entry_eip = elf_uval(&elf, elf.ehdr, e_entry); @@ -406,7 +413,8 @@ static int setup_guest(xc_interface *xch, * Create a domain for a virtualized Linux, using files/filenames. */ int xc_hvm_build(xc_interface *xch, uint32_t domid, - const struct xc_hvm_build_args *hvm_args) + const struct xc_hvm_build_args *hvm_args, + uint32_t nr_special_pages) { struct xc_hvm_build_args args = *hvm_args; void *image; @@ -432,7 +440,7 @@ int xc_hvm_build(xc_interface *xch, uint32_t domid, if ( image == NULL ) return -1; - sts = setup_guest(xch, domid, &args, image, image_size); + sts = setup_guest(xch, domid, &args, image, image_size, nr_special_pages); free(image); @@ -447,10 +455,11 @@ int xc_hvm_build(xc_interface *xch, uint32_t domid, * If target == memsize, pages are populated normally. */ int xc_hvm_build_target_mem(xc_interface *xch, - uint32_t domid, - int memsize, - int target, - const char *image_name) + uint32_t domid, + int memsize, + int target, + const char *image_name, + uint32_t nr_special_pages) { struct xc_hvm_build_args args = {}; @@ -458,7 +467,7 @@ int xc_hvm_build_target_mem(xc_interface *xch, args.mem_target = (uint64_t)target << 20; args.image_file_name = image_name; - return xc_hvm_build(xch, domid, &args); + return xc_hvm_build(xch, domid, &args, nr_special_pages); } /* diff --git a/tools/libxc/xenguest.h b/tools/libxc/xenguest.h index 8d885d3..092ee24 100644 --- a/tools/libxc/xenguest.h +++ b/tools/libxc/xenguest.h @@ -188,13 +188,15 @@ struct xc_hvm_build_args { * are optional. */ int xc_hvm_build(xc_interface *xch, uint32_t domid, - const struct xc_hvm_build_args *hvm_args); + const struct xc_hvm_build_args *hvm_args, + uint32_t nr_special_pages); int xc_hvm_build_target_mem(xc_interface *xch, uint32_t domid, int memsize, int target, - const char *image_name); + const char *image_name, + uint32_t nr_special_pages); int xc_suspend_evtchn_release(xc_interface *xch, xc_evtchn *xce, int domid, int suspend_evtchn); diff --git a/tools/libxc/xg_private.c b/tools/libxc/xg_private.c index 3864bc7..e74adce 100644 --- a/tools/libxc/xg_private.c +++ b/tools/libxc/xg_private.c @@ -192,7 +192,8 @@ unsigned long csum_page(void *page) __attribute__((weak)) int xc_hvm_build(xc_interface *xch, uint32_t domid, - const struct xc_hvm_build_args *hvm_args) + const struct xc_hvm_build_args *hvm_args, + uint32_t nr_servers) { errno = ENOSYS; return -1; -- Julien Grall
Quickly fix for hvm_build in python. Signed-off-by: Julien Grall <julien.grall@citrix.com> --- tools/python/xen/lowlevel/xc/xc.c | 3 ++- 1 files changed, 2 insertions(+), 1 deletions(-) diff --git a/tools/python/xen/lowlevel/xc/xc.c b/tools/python/xen/lowlevel/xc/xc.c index 7c89756..eb004b6 100644 --- a/tools/python/xen/lowlevel/xc/xc.c +++ b/tools/python/xen/lowlevel/xc/xc.c @@ -984,8 +984,9 @@ static PyObject *pyxc_hvm_build(XcObject *self, if ( target == -1 ) target = memsize; + // Ugly fix : we must retrieve the number of servers if ( xc_hvm_build_target_mem(self->xc_handle, dom, memsize, - target, image) != 0 ) + target, image, 0) != 0 ) return pyxc_error_to_exception(self->xc_handle); #if !defined(__ia64__) -- Julien Grall
Julien Grall
2012-Mar-22 15:59 UTC
[XEN][RFC PATCH 12/15] xl: Add interface to handle multiple device models
This patch add a structure with contain all informations about a device model. Signed-off-by: Julien Grall <julien.grall@citrix.com> --- tools/libxl/libxl.h | 4 ++-- tools/libxl/libxl_internal.h | 1 + tools/libxl/libxl_types.idl | 11 +++++++++++ 3 files changed, 14 insertions(+), 2 deletions(-) diff --git a/tools/libxl/libxl.h b/tools/libxl/libxl.h index 6b69030..a347a34 100644 --- a/tools/libxl/libxl.h +++ b/tools/libxl/libxl.h @@ -357,9 +357,9 @@ typedef struct { typedef struct { libxl_domain_create_info c_info; libxl_domain_build_info b_info; - int num_disks, num_vifs, num_pcidevs, num_vfbs, num_vkbs; - + int num_dms; + libxl_dm *dms; libxl_device_disk *disks; libxl_device_nic *vifs; libxl_device_pci *pcidevs; diff --git a/tools/libxl/libxl_internal.h b/tools/libxl/libxl_internal.h index e0a1070..247bdb9 100644 --- a/tools/libxl/libxl_internal.h +++ b/tools/libxl/libxl_internal.h @@ -767,6 +767,7 @@ typedef struct { char *dom_path; /* from libxl_malloc, only for libxl_spawner_record_pid */ const char *pid_path; /* only for libxl_spawner_record_pid */ int domid; + uint32_t dmid; libxl__spawn_starting *for_spawn; } libxl__spawner_starting; diff --git a/tools/libxl/libxl_types.idl b/tools/libxl/libxl_types.idl index 413a1a6..7e48817 100644 --- a/tools/libxl/libxl_types.idl +++ b/tools/libxl/libxl_types.idl @@ -37,6 +37,7 @@ libxl_domain_type = Enumeration("domain_type", [ libxl_device_model_version = Enumeration("device_model_version", [ (1, "QEMU_XEN_TRADITIONAL"), # Historical qemu-xen device model (qemu-dm) (2, "QEMU_XEN"), # Upstream based qemu-xen device model + (3, "MULTIPLE_QEMU_XEN"), # Handle multiple dm ]) libxl_console_type = Enumeration("console_type", [ @@ -224,6 +225,15 @@ libxl_domain_create_info = Struct("domain_create_info",[ MemKB = UInt(64, init_val = "LIBXL_MEMKB_DEFAULT") +libxl_dm = Struct("dm", [ + ("id", uint32), + ("name", string), + ("path", string), + ("pcis", libxl_string_list), + ("mmios", libxl_string_list), + ("pios", libxl_string_list), + ]) + # Instances of libxl_file_reference contained in this struct which # have been mapped (with libxl_file_reference_map) will be unmapped # by libxl_domain_build/restore. If either of these are never called @@ -289,6 +299,7 @@ libxl_domain_build_info = Struct("domain_build_info",[ ("usbdevice", string), ("soundhw", string), ("xen_platform_pci", libxl_defbool), + ("max_servers", integer), ])), ("pv", Struct(None, [("kernel", libxl_file_reference), ("slack_memkb", MemKB), -- Julien Grall
Julien Grall
2012-Mar-22 15:59 UTC
[XEN][RFC PATCH 13/15] xl-qmp: add device model id to qmp function
With the support of multiple device, the qmp library needs to know which device model is currently used. Signed-off-by: Julien Grall <julien.grall@citrix.com> --- tools/libxl/libxl_internal.h | 9 +++++---- tools/libxl/libxl_qmp.c | 24 +++++++++++++----------- 2 files changed, 18 insertions(+), 15 deletions(-) diff --git a/tools/libxl/libxl_internal.h b/tools/libxl/libxl_internal.h index 247bdb9..52a2429 100644 --- a/tools/libxl/libxl_internal.h +++ b/tools/libxl/libxl_internal.h @@ -1022,7 +1022,8 @@ typedef struct libxl__qmp_handler libxl__qmp_handler; * Return an handler or NULL if there is an error */ _hidden libxl__qmp_handler *libxl__qmp_initialize(libxl__gc *gc, - uint32_t domid); + uint32_t domid, + uint32_t dmid); /* ask to QEMU the serial port information and store it in xenstore. */ _hidden int libxl__qmp_query_serial(libxl__qmp_handler *qmp); _hidden int libxl__qmp_pci_add(libxl__gc *gc, int d, libxl_device_pci *pcidev); @@ -1034,12 +1035,12 @@ _hidden int libxl__qmp_migrate(libxl__gc *gc, int domid, int fd); _hidden void libxl__qmp_close(libxl__qmp_handler *qmp); /* remove the socket file, if the file has already been removed, * nothing happen */ -_hidden void libxl__qmp_cleanup(libxl__gc *gc, uint32_t domid); +_hidden void libxl__qmp_cleanup(libxl__gc *gc, uint32_t domid, uint32_t deamonid); /* this helper calls qmp_initialize, query_serial and qmp_close */ _hidden int libxl__qmp_initializations(libxl__gc *gc, uint32_t domid, - const libxl_domain_config *guest_config); - + const libxl_domain_config *guest_config, + uint32_t dmid); /* from libxl_json */ #include <yajl/yajl_gen.h> diff --git a/tools/libxl/libxl_qmp.c b/tools/libxl/libxl_qmp.c index f5a3edc..89d3f48 100644 --- a/tools/libxl/libxl_qmp.c +++ b/tools/libxl/libxl_qmp.c @@ -624,7 +624,8 @@ static void qmp_free_handler(libxl__qmp_handler *qmp) * API */ -libxl__qmp_handler *libxl__qmp_initialize(libxl__gc *gc, uint32_t domid) +libxl__qmp_handler *libxl__qmp_initialize(libxl__gc *gc, uint32_t domid, + uint32_t dmid) { int ret = 0; libxl__qmp_handler *qmp = NULL; @@ -632,8 +633,8 @@ libxl__qmp_handler *libxl__qmp_initialize(libxl__gc *gc, uint32_t domid) qmp = qmp_init_handler(gc, domid); - qmp_socket = libxl__sprintf(gc, "%s/qmp-libxl-%d", - libxl_run_dir_path(), domid); + qmp_socket = libxl__sprintf(gc, "%s/qmp-libxl-%u-%u", + libxl_run_dir_path(), domid, dmid); if ((ret = qmp_open(qmp, qmp_socket, QMP_SOCKET_CONNECT_TIMEOUT)) < 0) { LIBXL__LOG_ERRNO(qmp->ctx, LIBXL__LOG_ERROR, "Connection error"); qmp_free_handler(qmp); @@ -665,13 +666,13 @@ void libxl__qmp_close(libxl__qmp_handler *qmp) qmp_free_handler(qmp); } -void libxl__qmp_cleanup(libxl__gc *gc, uint32_t domid) +void libxl__qmp_cleanup(libxl__gc *gc, uint32_t domid, uint32_t dmid) { libxl_ctx *ctx = libxl__gc_owner(gc); char *qmp_socket; - qmp_socket = libxl__sprintf(gc, "%s/qmp-libxl-%d", - libxl_run_dir_path(), domid); + qmp_socket = libxl__sprintf(gc, "%s/qmp-libxl-%u-%u", + libxl_run_dir_path(), domid, dmid); if (unlink(qmp_socket) == -1) { if (errno != ENOENT) { LIBXL__LOG_ERRNO(ctx, LIBXL__LOG_ERROR, @@ -744,7 +745,7 @@ int libxl__qmp_pci_add(libxl__gc *gc, int domid, libxl_device_pci *pcidev) char *hostaddr = NULL; int rc = 0; - qmp = libxl__qmp_initialize(gc, domid); + qmp = libxl__qmp_initialize(gc, domid, 0); if (!qmp) return -1; @@ -789,7 +790,7 @@ static int qmp_device_del(libxl__gc *gc, int domid, char *id) libxl_key_value_list args = NULL; int rc = 0; - qmp = libxl__qmp_initialize(gc, domid); + qmp = libxl__qmp_initialize(gc, domid, 0); if (!qmp) return ERROR_FAIL; @@ -850,7 +851,7 @@ int libxl__qmp_migrate(libxl__gc *gc, int domid, int fd) libxl_key_value_list args = NULL; int rc = 0; - qmp = libxl__qmp_initialize(gc, domid); + qmp = libxl__qmp_initialize(gc, domid, 0); if (!qmp) return ERROR_FAIL; @@ -904,13 +905,14 @@ static int qmp_change(libxl__gc *gc, libxl__qmp_handler *qmp, } int libxl__qmp_initializations(libxl__gc *gc, uint32_t domid, - const libxl_domain_config *guest_config) + const libxl_domain_config *guest_config, + uint32_t dmid) { const libxl_vnc_info *vnc = libxl__dm_vnc(guest_config); libxl__qmp_handler *qmp = NULL; int ret = 0; - qmp = libxl__qmp_initialize(gc, domid); + qmp = libxl__qmp_initialize(gc, domid, dmid); if (!qmp) return -1; ret = libxl__qmp_query_serial(qmp); -- Julien Grall
Julien Grall
2012-Mar-22 15:59 UTC
[XEN][RFC PATCH 14/15] xl-parsing: Parse the new option device_models
For the support of multiple ioreq server, we add a new option "device_models". It''s an array of device model, for each device model, we need to specify which pci, IO range (MMIO, PIO) will be allow. For instance, if we want a QEMU which handle a specify PCI: name=net, path=/path/to/qemu-wrapper, pci=00:4.0 Signed-off-by: Julien Grall <julien.grall@citrix.com> --- tools/libxl/Makefile | 2 +- tools/libxl/libxlu_dm.c | 202 ++++++++++++++++++++++++++++++++++++++++++++++ tools/libxl/libxlutil.h | 5 + tools/libxl/xl_cmdimpl.c | 28 ++++++- 4 files changed, 235 insertions(+), 2 deletions(-) create mode 100644 tools/libxl/libxlu_dm.c diff --git a/tools/libxl/Makefile b/tools/libxl/Makefile index e44fcfa..e35d382 100644 --- a/tools/libxl/Makefile +++ b/tools/libxl/Makefile @@ -57,7 +57,7 @@ $(LIBXL_OBJS): CFLAGS += $(CFLAGS_libxenctrl) $(CFLAGS_libxenguest) $(CFLAGS_lib AUTOINCS= libxlu_cfg_y.h libxlu_cfg_l.h _libxl_list.h AUTOSRCS= libxlu_cfg_y.c libxlu_cfg_l.c LIBXLU_OBJS = libxlu_cfg_y.o libxlu_cfg_l.o libxlu_cfg.o \ - libxlu_disk_l.o libxlu_disk.o + libxlu_disk_l.o libxlu_disk.o libxlu_dm.o $(LIBXLU_OBJS): CFLAGS += $(CFLAGS_libxenctrl) # For xentoollog.h CLIENTS = xl testidl diff --git a/tools/libxl/libxlu_dm.c b/tools/libxl/libxlu_dm.c new file mode 100644 index 0000000..928c7d2 --- /dev/null +++ b/tools/libxl/libxlu_dm.c @@ -0,0 +1,202 @@ +#include "libxl_osdeps.h" /* must come before any other headers */ +#include <stdlib.h> +#include "libxlu_internal.h" +#include "libxlu_cfg_i.h" + +static void split_string_into_string_list(const char *str, + const char *delim, + libxl_string_list *psl) +{ + char *s, *saveptr; + const char *p; + libxl_string_list sl; + + int i = 0, nr = 0; + + s = strdup(str); + if (s == NULL) { + fprintf(stderr, "xlu_dm: unable to allocate memory\n"); + exit(-1); + } + + /* Count number of entries */ + p = strtok_r(s, delim, &saveptr); + do { + nr++; + } while ((p = strtok_r(NULL, delim, &saveptr))); + + free(s); + + s = strdup(str); + + sl = malloc((nr+1) * sizeof (char *)); + if (sl == NULL) { + fprintf(stderr, "xlu_dm: unable to allocate memory\n"); + exit(-1); + } + + p = strtok_r(s, delim, &saveptr); + do { + assert(i < nr); + // Skip blank + while (*p == '' '') + p++; + sl[i] = strdup(p); + i++; + } while ((p = strtok_r(NULL, delim, &saveptr))); + sl[i] = NULL; + + *psl = sl; + + free(s); +} + +static int xlu_dm_check_pci(const char *pci) +{ + unsigned long bus; + unsigned long function; + unsigned long device; + char *buf; + + while (*pci == '' '') + pci++; + + bus = strtol(pci, &buf, 16); + if (pci == buf || *buf != '':'') + return 1; + + pci = buf + 1; + device = strtol(pci, &buf, 16); + if (pci == buf || *buf != ''.'') + return 1; + + pci = buf + 1; + function = strtol(pci, &buf, 16); + if (pci == buf) + return 1; + + pci = buf; + + while (*pci == '' '') + pci++; + + if (*pci != ''\0'') + return 1; + + buf[0] = ''\0''; + + if (bus > 0xff || device > 0x1f || function > 0x7 + || (bus == 0xff && device == 0x1f && function == 0x7)) + return 1; + + return 0; +} + +static int xlu_dm_check_range(const char *range) +{ + unsigned long begin; + unsigned long end; + char *buf; + + begin = strtol(range, &buf, 0); + if (buf == range) + return 0; + + if (*buf == ''-'') + { + range = buf + 1; + end = strtol(range, &buf, 0); + if (buf == range) + return 1; + } + else + end = begin; + + range = buf; + + while (*range == '' '') + range++; + + if (*range != ''\0'') + return 1; + + buf[0] = ''\0''; + + if (begin > end) + return 1; + + return 0; +} + +int xlu_dm_parse(XLU_Config *cfg, const char *spec, + libxl_dm *dm) +{ + char *buf = strdup(spec); + char *p, *p2; + int i = 0; + int rc = 0; + + p = strtok (buf, ","); + if (!p) + goto skip_dm; + do { + while (*p == '' '') + p++; + if ((p2 = strchr (p, ''='')) == NULL) + break; + *p2 = ''\0''; + if (!strcmp (p, "name")) + dm->name = strdup (p2 + 1); + else if (!strcmp (p, "path")) + dm->path = strdup (p2 + 1); + else if (!strcmp (p, "pci")) + { + split_string_into_string_list(p2 + 1, ";", &dm->pcis); + for (i = 0; dm->pcis[i]; i++) + { + if (xlu_dm_check_pci(dm->pcis[i])) + { + fprintf(stderr, "xlu_dm: invalid pci \"%s\"\n", + dm->pcis[i]); + rc = 1; + } + } + } + else if (!strcmp (p, "mmio")) + { + split_string_into_string_list(p2 + 1, ";", &dm->mmios); + for (i = 0; dm->mmios[i]; i++) + { + if (xlu_dm_check_range(dm->mmios[i])) + { + fprintf(stderr, "xlu_dm: invalid mmio range \"%s\"\n", + dm->mmios[i]); + rc = 1; + } + } + } + else if (!strcmp (p, "pio")) + { + split_string_into_string_list(p2 + 1, ";", &dm->pios); + for (i = 0; dm->pios[i]; i++) + { + if (xlu_dm_check_range(dm->pios[i])) + { + fprintf(stderr, "xlu_dm: invalid pio range \"%s\"\n", + dm->pios[i]); + rc = 1; + } + } + } + } while ((p = strtok (NULL, ",")) != NULL); + + if (!dm->name && dm->path) + { + fprintf (stderr, "xl: Unable to parse device_deamon\n"); + exit (-ERROR_FAIL); + } +skip_dm: + free(buf); + + return rc; +} diff --git a/tools/libxl/libxlutil.h b/tools/libxl/libxlutil.h index 620b9db..8eb7e16 100644 --- a/tools/libxl/libxlutil.h +++ b/tools/libxl/libxlutil.h @@ -88,6 +88,11 @@ int xlu_disk_parse(XLU_Config *cfg, int nspecs, const char *const *specs, * resulting disk struct is used with libxl. */ +/* + * Daemon specification parsing. + */ +int xlu_dm_parse(XLU_Config *cfg, const char *spec, + libxl_dm *dm); #endif /* LIBXLUTIL_H */ diff --git a/tools/libxl/xl_cmdimpl.c b/tools/libxl/xl_cmdimpl.c index 1d59b89..5473faf 100644 --- a/tools/libxl/xl_cmdimpl.c +++ b/tools/libxl/xl_cmdimpl.c @@ -515,7 +515,7 @@ static void parse_config_data(const char *configfile_filename_report, const char *buf; long l; XLU_Config *config; - XLU_ConfigList *cpus, *vbds, *nics, *pcis, *cvfbs, *cpuids; + XLU_ConfigList *cpus, *vbds, *nics, *pcis, *cvfbs, *cpuids, *dms; int pci_power_mgmt = 0; int pci_msitranslate = 1; int e; @@ -1119,6 +1119,9 @@ skip_vfb: } else if (!strcmp(buf, "qemu-xen")) { b_info->device_model_version = LIBXL_DEVICE_MODEL_VERSION_QEMU_XEN; + } else if (!strcmp(buf, "multiple-qemu-xen") && c_info->type == LIBXL_DOMAIN_TYPE_HVM) { + b_info->device_model_version + = LIBXL_DEVICE_MODEL_VERSION_MULTIPLE_QEMU_XEN; } else { fprintf(stderr, "Unknown device_model_version \"%s\" specified\n", buf); @@ -1144,6 +1147,29 @@ skip_vfb: } } } + + if (b_info->device_model_version + == LIBXL_DEVICE_MODEL_VERSION_MULTIPLE_QEMU_XEN) { + if (!xlu_cfg_get_list (config, "device_models", &dms, 0, 0)) { + d_config->num_dms = 0; + d_config->dms = NULL; + while ((buf = xlu_cfg_get_listitem (dms, d_config->num_dms)) + != NULL) { + libxl_dm *dm; + + d_config->dms = (libxl_dm *) realloc (d_config->dms, sizeof (libxl_dm) * (d_config->num_dms + 1)); + dm = d_config->dms + d_config->num_dms; + libxl_dm_init (dm); + dm->id = d_config->num_dms + 1; + if (xlu_dm_parse(config, buf, dm)) + exit(-ERROR_FAIL); + + d_config->num_dms++; + } + b_info->u.hvm.max_servers = d_config->num_dms; + } + } + #define parse_extra_args(type) \ e = xlu_cfg_get_list_as_string_list(config, "device_model_args"#type, \ &b_info->extra##type, 0); \ -- Julien Grall
Julien Grall
2012-Mar-22 15:59 UTC
[XEN][RFC PATCH 15/15] xl: Launch and destroy all device models
This patch permits to launch and destroy all device models. For the moment it''s a fork of libxl__build_device* Signed-off-by: Julien Grall <julien.grall@citrix.com> --- tools/libxl/libxl.c | 8 +- tools/libxl/libxl_create.c | 30 +++++- tools/libxl/libxl_dm.c | 225 +++++++++++++++++++++++++++++++++++++++--- tools/libxl/libxl_dom.c | 6 +- tools/libxl/libxl_internal.h | 9 ++ 5 files changed, 256 insertions(+), 22 deletions(-) diff --git a/tools/libxl/libxl.c b/tools/libxl/libxl.c index 5344366..b578ada 100644 --- a/tools/libxl/libxl.c +++ b/tools/libxl/libxl.c @@ -1047,7 +1047,8 @@ int libxl_domain_destroy(libxl_ctx *ctx, uint32_t domid) switch (libxl__domain_type(gc, domid)) { case LIBXL_DOMAIN_TYPE_HVM: - dm_present = 1; + pid = libxl__xs_read(gc, XBT_NULL, libxl__sprintf(gc, "/local/domain/%d/image/device-model-pid", domid)); + dm_present = (pid != NULL); break; case LIBXL_DOMAIN_TYPE_PV: pid = libxl__xs_read(gc, XBT_NULL, libxl__sprintf(gc, "/local/domain/%d/image/device-model-pid", domid)); @@ -1073,8 +1074,11 @@ int libxl_domain_destroy(libxl_ctx *ctx, uint32_t domid) if (libxl__destroy_device_model(gc, domid) < 0) LIBXL__LOG(ctx, LIBXL__LOG_ERROR, "libxl__destroy_device_model failed for %d", domid); - libxl__qmp_cleanup(gc, domid); + libxl__qmp_cleanup(gc, domid, 0); } + + libxl__destroy_dms(gc, domid); + if (libxl__devices_destroy(gc, domid) < 0) LIBXL__LOG(ctx, LIBXL__LOG_ERROR, "libxl__devices_destroy failed for %d", domid); diff --git a/tools/libxl/libxl_create.c b/tools/libxl/libxl_create.c index 8417661..43a97f5 100644 --- a/tools/libxl/libxl_create.c +++ b/tools/libxl/libxl_create.c @@ -26,6 +26,10 @@ void libxl_domain_config_dispose(libxl_domain_config *d_config) { int i; + for (i=0; i<d_config->num_dms; i++) + libxl_dm_dispose(&d_config->dms[i]); + free(d_config->dms); + for (i=0; i<d_config->num_disks; i++) libxl_device_disk_dispose(&d_config->disks[i]); free(d_config->disks); @@ -80,6 +84,7 @@ int libxl__domain_build_info_setdefault(libxl__gc *gc, switch (b_info->device_model_version) { case 1: b_info->u.hvm.bios = LIBXL_BIOS_TYPE_ROMBIOS; break; case 2: b_info->u.hvm.bios = LIBXL_BIOS_TYPE_SEABIOS; break; + case 3: b_info->u.hvm.bios = LIBXL_BIOS_TYPE_SEABIOS; break; default:return ERROR_INVAL; } @@ -90,6 +95,7 @@ int libxl__domain_build_info_setdefault(libxl__gc *gc, return ERROR_INVAL; break; case 2: + case 3: if (b_info->u.hvm.bios == LIBXL_BIOS_TYPE_ROMBIOS) return ERROR_INVAL; break; @@ -621,12 +627,24 @@ static int do_domain_create(libxl__gc *gc, libxl_domain_config *d_config, libxl_device_vkb_add(ctx, domid, &vkb); libxl_device_vkb_dispose(&vkb); - ret = libxl__create_device_model(gc, domid, d_config, + if (d_config->b_info.device_model_version + == LIBXL_DEVICE_MODEL_VERSION_MULTIPLE_QEMU_XEN) { + ret = libxl__launch_dms(gc, domid, &state, d_config); + if (ret < 0) { + LIBXL__LOG(ctx, LIBXL__LOG_ERROR, + "failed to launch device models: %d\n", ret); + goto error_out; + } + } + else + { + ret = libxl__create_device_model(gc, domid, ~0, d_config, &state, &dm_starting); - if (ret < 0) { - LIBXL__LOG(ctx, LIBXL__LOG_ERROR, - "failed to create device model: %d", ret); - goto error_out; + if (ret < 0) { + LIBXL__LOG(ctx, LIBXL__LOG_ERROR, + "failed to create device model: %d", ret); + goto error_out; + } } break; } @@ -667,7 +685,7 @@ static int do_domain_create(libxl__gc *gc, libxl_domain_config *d_config, if (dm_starting) { if (d_config->b_info.device_model_version == LIBXL_DEVICE_MODEL_VERSION_QEMU_XEN) { - libxl__qmp_initializations(gc, domid, d_config); + libxl__qmp_initializations(gc, domid, d_config, 0); } ret = libxl__confirm_device_model_startup(gc, &state, dm_starting); if (ret < 0) { diff --git a/tools/libxl/libxl_dm.c b/tools/libxl/libxl_dm.c index 1261499..150b03a 100644 --- a/tools/libxl/libxl_dm.c +++ b/tools/libxl/libxl_dm.c @@ -308,6 +308,7 @@ static char *dm_spice_options(libxl__gc *gc, static char ** libxl__build_device_model_args_new(libxl__gc *gc, const char *dm, int guest_domid, + uint32_t dmid, const libxl_domain_config *guest_config, const libxl__domain_build_state *state) { @@ -324,6 +325,11 @@ static char ** libxl__build_device_model_args_new(libxl__gc *gc, flexarray_t *dm_args; int i; + if (dmid == ~0) + dmid = 0; + else + dmid = guest_config->dms[dmid].id; + dm_args = flexarray_make(16, 1); if (!dm_args) return NULL; @@ -332,11 +338,15 @@ static char ** libxl__build_device_model_args_new(libxl__gc *gc, "-xen-domid", libxl__sprintf(gc, "%d", guest_domid), NULL); + flexarray_append(dm_args, "-xen-dmid"); + flexarray_append(dm_args, + libxl__sprintf(gc, "%u", dmid)); + flexarray_append(dm_args, "-chardev"); flexarray_append(dm_args, libxl__sprintf(gc, "socket,id=libxl-cmd," - "path=%s/qmp-libxl-%d,server,nowait", - libxl_run_dir_path(), guest_domid)); + "path=%s/qmp-libxl-%u-%u,server,nowait", + libxl_run_dir_path(), guest_domid, dmid)); flexarray_append(dm_args, "-mon"); flexarray_append(dm_args, "chardev=libxl-cmd,mode=control"); @@ -455,6 +465,7 @@ static char ** libxl__build_device_model_args_new(libxl__gc *gc, } else { ifname = vifs[i].ifname; } + ifname = libxl__sprintf(gc, "%s.%u", ifname, dmid); flexarray_append(dm_args, "-device"); flexarray_append(dm_args, libxl__sprintf(gc, "%s,id=nic%d,netdev=net%d,mac=%s", @@ -573,6 +584,7 @@ static char ** libxl__build_device_model_args_new(libxl__gc *gc, static char ** libxl__build_device_model_args(libxl__gc *gc, const char *dm, int guest_domid, + uint32_t dmid, const libxl_domain_config *guest_config, const libxl__domain_build_state *state) { @@ -584,9 +596,10 @@ static char ** libxl__build_device_model_args(libxl__gc *gc, guest_domid, guest_config, state); case LIBXL_DEVICE_MODEL_VERSION_QEMU_XEN: + case LIBXL_DEVICE_MODEL_VERSION_MULTIPLE_QEMU_XEN: return libxl__build_device_model_args_new(gc, dm, - guest_domid, guest_config, - state); + guest_domid, dmid, + guest_config, state); default: LIBXL__LOG_ERRNO(ctx, LIBXL__LOG_ERROR, "unknown device model version %d", guest_config->b_info.device_model_version); @@ -748,7 +761,7 @@ static int libxl__create_stubdom(libxl__gc *gc, if (ret) goto out; - args = libxl__build_device_model_args(gc, "stubdom-dm", guest_domid, + args = libxl__build_device_model_args(gc, "stubdom-dm", guest_domid, 0, guest_config, d_state); if (!args) { ret = ERROR_FAIL; @@ -882,6 +895,7 @@ out: int libxl__create_device_model(libxl__gc *gc, int domid, + uint32_t dmid, libxl_domain_config *guest_config, libxl__domain_build_state *state, libxl__spawner_starting **starting_r) @@ -899,13 +913,18 @@ int libxl__create_device_model(libxl__gc *gc, char *vm_path; char **pass_stuff; const char *dm; + int i = 0; if (libxl_defbool_val(b_info->device_model_stubdomain)) { rc = libxl__create_stubdom(gc, domid, guest_config, state, starting_r); goto out; } - dm = libxl__domain_device_model(gc, b_info); + if (dmid == ~0) + dm = libxl__domain_device_model(gc, b_info); + else + dm = guest_config->dms[dmid].path; + if (!dm) { rc = ERROR_FAIL; goto out; @@ -916,7 +935,7 @@ int libxl__create_device_model(libxl__gc *gc, rc = ERROR_FAIL; goto out; } - args = libxl__build_device_model_args(gc, dm, domid, guest_config, state); + args = libxl__build_device_model_args(gc, dm, domid, dmid, guest_config, state); if (!args) { rc = ERROR_FAIL; goto out; @@ -930,7 +949,11 @@ int libxl__create_device_model(libxl__gc *gc, free(path); } - path = libxl__sprintf(gc, "/local/domain/0/device-model/%d", domid); + if (dmid == ~0) + path = libxl__sprintf(gc, "/local/domain/0/device-model/%d", domid); + else + path = libxl__sprintf(gc, "/local/domain/0/dms/%u/%u", domid, + guest_config->dms[dmid].id); xs_mkdir(ctx->xsh, XBT_NULL, path); if (b_info->type == LIBXL_DOMAIN_TYPE_HVM && @@ -939,8 +962,57 @@ int libxl__create_device_model(libxl__gc *gc, libxl__xs_write(gc, XBT_NULL, libxl__sprintf(gc, "%s/disable_pf", path), "%d", !libxl_defbool_val(b_info->u.hvm.xen_platform_pci)); + if (dmid != ~0) { + path = libxl__sprintf(gc, "%s/image/dms/%u/pci", + libxl__xs_get_dompath(gc, domid), + guest_config->dms[dmid].id); + xs_mkdir(ctx->xsh, XBT_NULL, path); + + if (guest_config->dms[dmid].pcis) { + for (i = 0; guest_config->dms[dmid].pcis[i]; i++) { + path = xs_get_domain_path(ctx->xsh, domid); + path = libxl__sprintf(gc, "%s/image/dms/%u/pci/%u", + path, guest_config->dms[dmid].id, i); + libxl__xs_write(gc, XBT_NULL, path, + "%s", guest_config->dms[dmid].pcis[i]); + } + } + + path = libxl__sprintf(gc, "%s/image/dms/%u/mmio", + libxl__xs_get_dompath(gc, domid), + guest_config->dms[dmid].id); + xs_mkdir(ctx->xsh, XBT_NULL, path); + + if (guest_config->dms[dmid].mmios) { + for (i = 0; guest_config->dms[dmid].mmios[i]; i++) { + path = xs_get_domain_path(ctx->xsh, domid); + path = libxl__sprintf(gc, "%s/image/dms/%u/mmio/%u", + path, guest_config->dms[dmid].id, i); + libxl__xs_write(gc, XBT_NULL, path, + "%s", guest_config->dms[dmid].mmios[i]); + } + } + + path = libxl__sprintf(gc, "%s/image/dms/%u/pio", + libxl__xs_get_dompath(gc, domid), + guest_config->dms[dmid].id); + xs_mkdir(ctx->xsh, XBT_NULL, path); + + if (guest_config->dms[dmid].pios) { + for (i = 0; guest_config->dms[dmid].pios[i]; i++) { + path = xs_get_domain_path(ctx->xsh, domid); + path = libxl__sprintf(gc, "%s/image/dms/%u/pio/%u", + path, guest_config->dms[dmid].id, i); + libxl__xs_write(gc, XBT_NULL, path, + "%s", guest_config->dms[dmid].pios[i]); + } + } + } + libxl_create_logfile(ctx, - libxl__sprintf(gc, "qemu-dm-%s", c_info->name), + libxl__sprintf(gc, "qemu-%s-%s", + (dmid == ~0) ? "dm" : guest_config->dms[dmid].name, + c_info->name), &logfile); logfile_w = open(logfile, O_WRONLY|O_CREAT|O_APPEND, 0644); free(logfile); @@ -960,7 +1032,15 @@ int libxl__create_device_model(libxl__gc *gc, p->domid = domid; p->dom_path = libxl__xs_get_dompath(gc, domid); - p->pid_path = "image/device-model-pid"; + if (dmid == ~0) { + p->pid_path = "image/device-model-pid"; + p->dmid = 0; + } + else { + p->pid_path = libxl__sprintf(gc, "image/dms/%u-pid", guest_config->dms[dmid].id); + p->dmid = guest_config->dms[dmid].id; + } + if (!p->dom_path) { rc = ERROR_FAIL; goto out_close; @@ -985,7 +1065,8 @@ retry_transaction: } } - rc = libxl__spawn_spawn(gc, p->for_spawn, "device model", + path = (dmid == ~0) ? "device model" : guest_config->dms[dmid].name; + rc = libxl__spawn_spawn(gc, p->for_spawn, path, libxl_spawner_record_pid, p); if (rc < 0) goto out_close; @@ -1011,8 +1092,14 @@ int libxl__confirm_device_model_startup(libxl__gc *gc, { char *path; int domid = starting->domid; + uint32_t dmid = starting->dmid; int ret, ret2; - path = libxl__sprintf(gc, "/local/domain/0/device-model/%d/state", domid); + + if (!dmid) + path = libxl__sprintf(gc, "/local/domain/0/device-model/%d/state", domid); + else + path = libxl__sprintf(gc, "/local/domain/0/dms/%u/%u/state", domid, dmid); + ret = libxl__spawn_confirm_offspring_startup(gc, LIBXL_DEVICE_MODEL_START_TIMEOUT, "Device Model", path, "running", starting); @@ -1125,10 +1212,122 @@ int libxl__create_xenpv_qemu(libxl__gc *gc, uint32_t domid, libxl__domain_build_state *state, libxl__spawner_starting **starting_r) { - libxl__create_device_model(gc, domid, guest_config, state, starting_r); + libxl__create_device_model(gc, domid, ~0, guest_config, state, starting_r); return 0; } +int libxl__launch_dms(libxl__gc *gc, + libxl_domid domid, + libxl__domain_build_state *state, + libxl_domain_config *guest_config) +{ + char *path; + libxl_ctx *ctx = libxl__gc_owner(gc); + libxl_dm *dm = NULL; + int i; + libxl__spawner_starting *dm_starting = 0; + int ret = 0; + + path = libxl__sprintf(gc, "/local/domain/0/dms/%u", domid); + xs_mkdir(ctx->xsh, XBT_NULL, path); + path = xs_get_domain_path(ctx->xsh, domid); + xs_mkdir(ctx->xsh, XBT_NULL, libxl__sprintf(gc, "%s/image/dms", path)); + free(path); + + for (i = 0; i < guest_config->num_dms; i++) + { + dm = &guest_config->dms[i]; + ret = libxl__create_device_model(gc, domid, i, guest_config, + state, &dm_starting); + if (ret < 0) + fprintf(stderr, "Can''t launch dm %s\n", + guest_config->dms[i].name); + if (dm_starting) { + libxl__qmp_initializations(gc, domid, guest_config, dm->id); + ret = libxl__confirm_device_model_startup(gc, state, dm_starting); + if (ret < 0) { + LIBXL__LOG(ctx, LIBXL__LOG_ERROR, + "dm %s did not start: %d", + guest_config->dms[i].name, + ret); + break; + } + } + } + + return ret; +} + +static int libxl_destroy_dm(libxl__gc *gc, + libxl_domid domid, + char *dmid) +{ + libxl_ctx *ctx = libxl__gc_owner(gc); + char *path; + char *pid; + int ret = 0; + + path = libxl__sprintf(gc, "/local/domain/%u/image/dms/%s-pid", + domid, dmid); + + + pid = libxl__xs_read(gc, XBT_NULL, path); + if (!pid) + return ERROR_FAIL; + + ret = kill(atoi(pid), SIGHUP); + + if (ret < 0 && errno == ESRCH) { + LIBXL__LOG(ctx, LIBXL__LOG_DEBUG, "Daemon %s already exited", dmid); + ret = 0; + } else if (ret == 0) { + LIBXL__LOG(ctx, LIBXL__LOG_DEBUG, "Daemon %s signaled", dmid); + ret = 0; + } else { + LIBXL__LOG(ctx, LIBXL__LOG_ERROR, "failed to kill Daemon %s [%d]", + dmid, atoi(pid)); + ret = ERROR_FAIL; + } + + if (!ret) + { + path = libxl__sprintf(gc, "/local/domain/0/dms/%u/%s", + domid, dmid); + + xs_rm(ctx->xsh, XBT_NULL, path); + } + + return 0; +} + +int libxl__destroy_dms(libxl__gc *gc, + libxl_domid domid) +{ + libxl_ctx *ctx = libxl__gc_owner(gc); + int ret = 0; + char **dir = NULL; + unsigned int n; + char *path; + unsigned int i = 0; + + path = libxl__sprintf(gc, "/local/domain/0/dms/%u", domid); + + dir = libxl__xs_directory(gc, XBT_NULL, path, &n); + if (dir) + { + for (i = 0; i < n; i++) + { + if (libxl_destroy_dm(gc, domid, dir[i])) + ret = ERROR_FAIL; + } + } + + if (!ret) + xs_rm(ctx->xsh, XBT_NULL, libxl__sprintf(gc, "/local/domain/0/dms/%u", domid)); + + return ret; +} + /* * Local variables: * mode: C diff --git a/tools/libxl/libxl_dom.c b/tools/libxl/libxl_dom.c index 9b33267..a08b445 100644 --- a/tools/libxl/libxl_dom.c +++ b/tools/libxl/libxl_dom.c @@ -339,6 +339,9 @@ static const char *libxl__domain_firmware(libxl__gc *gc, case LIBXL_DEVICE_MODEL_VERSION_QEMU_XEN: firmware = "hvmloader"; break; + case LIBXL_DEVICE_MODEL_VERSION_MULTIPLE_QEMU_XEN: + firmware = "hvmloader"; + break; default: LIBXL__LOG(ctx, LIBXL__LOG_ERROR, "invalid device model version %d", info->device_model_version); @@ -364,7 +367,8 @@ int libxl__build_hvm(libxl__gc *gc, uint32_t domid, domid, (info->max_memkb - info->video_memkb) / 1024, (info->target_memkb - info->video_memkb) / 1024, - firmware); + firmware, + info->u.hvm.max_servers * 2 + 1); if (ret) { LIBXL__LOG_ERRNOVAL(ctx, LIBXL__LOG_ERROR, ret, "hvm building failed"); goto out; diff --git a/tools/libxl/libxl_internal.h b/tools/libxl/libxl_internal.h index 52a2429..d1dd083 100644 --- a/tools/libxl/libxl_internal.h +++ b/tools/libxl/libxl_internal.h @@ -893,11 +893,20 @@ _hidden int libxl__domain_build(libxl__gc *gc, uint32_t domid, libxl__domain_build_state *state); +/* for deamon create */ +_hidden int libxl__launch_dms(libxl__gc *gc, + libxl_domid domid, + libxl__domain_build_state *state, + libxl_domain_config *guest_config); +_hidden int libxl__destroy_dms(libxl__gc *gc, + libxl_domid domid); + /* for device model creation */ _hidden const char *libxl__domain_device_model(libxl__gc *gc, const libxl_domain_build_info *info); _hidden int libxl__create_device_model(libxl__gc *gc, int domid, + uint32_t dmid, libxl_domain_config *guest_config, libxl__domain_build_state *state, libxl__spawner_starting **starting_r); -- Julien Grall
Hi, At 15:59 +0000 on 22 Mar (1332431961), Julien Grall wrote:> Julien Grall (15): > xc: Add the hypercall for multiple servers > xc: Add argument to allocate more special pages > xc: Fix python buildShouldn''t something here update xc_domain_save/xc_domain_restore? Cheers, Tim.
Jan Beulich
2012-Mar-23 08:18 UTC
Re: [XEN][RFC PATCH 01/15] hvm: Modify interface to support multiple ioreq server
>>> On 22.03.12 at 16:59, Julien Grall <julien.grall@citrix.com> wrote: > --- a/xen/include/public/hvm/hvm_op.h > +++ b/xen/include/public/hvm/hvm_op.h > @@ -24,6 +24,8 @@ > #include "../xen.h" > #include "../trace.h" > > +#include "hvm_info_table.h" /* HVM_MAX_VCPUS */ > + > /* Get/set subcommands: extra argument == pointer to xen_hvm_param struct. > */ > #define HVMOP_set_param 0 > #define HVMOP_get_param 1 > @@ -227,6 +229,53 @@ struct xen_hvm_inject_trap { > typedef struct xen_hvm_inject_trap xen_hvm_inject_trap_t; > DEFINE_XEN_GUEST_HANDLE(xen_hvm_inject_trap_t); > > +#define HVMOP_register_ioreq_server 20 > +struct xen_hvm_register_ioreq_server { > + domid_t domid; /* IN - domain to be serviced */ > + unsigned int id; /* OUT - handle for identifying this server */ > +}; > +typedef struct xen_hvm_register_ioreq_server > xen_hvm_register_ioreq_server_t; > +DEFINE_XEN_GUEST_HANDLE(xen_hvm_register_ioreq_server_t); > + > +#define HVMOP_get_ioreq_server_buf_channel 21 > +struct xen_hvm_get_ioreq_server_buf_channel { > + domid_t domid; /* IN - domain to be serviced */ > + servid_t id; /* IN - handle from HVMOP_register_ioreq_server */ > + unsigned int channel; /* OUT - buf ioreq channel */ > +}; > +typedef struct xen_hvm_get_ioreq_server_buf_channel > xen_hvm_get_ioreq_server_buf_channel_t; > +DEFINE_XEN_GUEST_HANDLE(xen_hvm_get_ioreq_server_buf_channel_t); > + > +#define HVMOP_map_io_range_to_ioreq_server 22 > +struct xen_hvm_map_io_range_to_ioreq_server { > + domid_t domid; /* IN - domain to be serviced */ > + uint8_t is_mmio; /* IN - MMIO or port IO? */ > + servid_t id; /* IN - handle from HVMOP_register_ioreq_server > */ > + uint64_aligned_t s, e; /* IN - inclusive start and end of range */ > +}; > +typedef struct xen_hvm_map_io_range_to_ioreq_server > xen_hvm_map_io_range_to_ioreq_server_t; > +DEFINE_XEN_GUEST_HANDLE(xen_hvm_map_io_range_to_ioreq_server_t); > + > +#define HVMOP_unmap_io_range_from_ioreq_server 23 > +struct xen_hvm_unmap_io_range_from_ioreq_server { > + domid_t domid; /* IN - domain to be serviced */ > + uint8_t is_mmio; /* IN - MMIO or port IO? */ > + servid_t id; /* IN - handle from HVMOP_register_ioreq_server > */ > + uint64_aligned_t addr; /* IN - address inside the range to remove */ > +}; > +typedef struct xen_hvm_unmap_io_range_from_ioreq_server > xen_hvm_unmap_io_range_from_ioreq_server_t; > +DEFINE_XEN_GUEST_HANDLE(xen_hvm_unmap_io_range_from_ioreq_server_t); > + > +#define HVMOP_register_pcidev 24 > +struct xen_hvm_register_pcidev { > + domid_t domid; /* IN - domain to be serviced */ > + servid_t id; /* IN - handle from HVMOP_register_ioreq_server */ > + uint16_t bdf; /* IN - pci */Can we please avoid the mistake of again not surfacing the PCI segment in interface definitions, even if it may be required to be zero for the immediate needs? Jan> +}; > +typedef struct xen_hvm_register_pcidev xen_hvm_register_pcidev_t; > +DEFINE_XEN_GUEST_HANDLE(xen_hvm_register_pcidev_t); > + > + > #endif /* defined(__XEN__) || defined(__XEN_TOOLS__) */ > > #define HVMOP_get_mem_type 15
Jan Beulich
2012-Mar-23 08:29 UTC
Re: [XEN][RFC PATCH 03/15] hvm-pci: Handle PCI config space in Xen
>>> On 22.03.12 at 16:59, Julien Grall <julien.grall@citrix.com> wrote: > --- /dev/null > +++ b/xen/arch/x86/hvm/pci_emul.c > @@ -0,0 +1,147 @@ > +#include <asm/hvm/support.h> > +#include <xen/hvm/pci_emul.h> > +#include <xen/pci.h> > +#include <xen/sched.h> > +#include <xen/xmalloc.h> > + > +#define PCI_DEBUGSTR "%x:%x.%x" > +#define PCI_DEBUG(bdf) ((bdf) >> 16) & 0xff, ((bdf) >> 11) & 0x1f, ((bdf) >> 8) & > 0x7 > + > +static int handle_config_space(int dir, uint32_t port, uint32_t bytes, > + uint32_t *val) > +{ > + uint32_t pci_cf8; > + struct pci_device_emul *pci; > + ioreq_t *p = get_ioreq(current); > + int rc = X86EMUL_UNHANDLEABLE; > + struct vcpu *v = current; > + > + spin_lock(&v->domain->arch.hvm_domain.pci_root.pci_lock); > + > + if (port == 0xcf8)You need to be considerably more careful here: This code should handle only 32-bit wide aligned accesses, and you need to make sure that everything else still gets properly forwarded to qemu (so that e.g. port CF9 could still be properly emulated if desired).> + { > + rc = X86EMUL_OKAY; > + v->arch.hvm_vcpu.pci_cf8 = *val; > + goto end_handle; > + } > + > + pci_cf8 = v->arch.hvm_vcpu.pci_cf8; > + > + /* Retrieve PCI */ > + pci = v->domain->arch.hvm_domain.pci_root.pci; > + > + while (pci && !PCI_CMP_BDF(pci, pci_cf8)) > + pci = pci->next;Is there a reasonably low enforced boundary on the number of devices? Otherwise, a linear lookup would seem overly simple to me. Further, with how PCI_CMP_BDF() is defined, you''re doing the wrong thing here anyway - bit 31 is required to be set for the port CFC access to be a config space one. Plus there''s an AMD extension to this interface, so I think other than shifting out the low 8 bits and checking that the high bit is set, you shouldn''t do any other masking here. Jan> + > + /* We just fill the ioreq, hvm_send_assist_req will send the request */ > + if (unlikely(pci == NULL)) > + { > + *val = ~0; > + rc = X86EMUL_OKAY; > + goto end_handle; > + } > + > + p->type = IOREQ_TYPE_PCI_CONFIG; > + p->addr = (pci_cf8 & ~3) + (p->addr & 3); > + > + set_ioreq(v, &pci->server->ioreq, p); > + > +end_handle: > + spin_unlock(&v->domain->arch.hvm_domain.pci_root.pci_lock); > + return rc; > +}
Ian Campbell
2012-Mar-23 11:33 UTC
Re: [XEN][RFC PATCH 01/15] hvm: Modify interface to support multiple ioreq server
On Thu, 2012-03-22 at 15:59 +0000, Julien Grall wrote:> Add structure to handle ioreq server. It''s server which can > handle a range of IO (MMIO and/or PIO) and emulate a PCI. > Each server as its own shared page to receive ioreq. So > we have introduced to HVM PARAM to set/get the first and > the last shared used for ioreq. > With it''s id, the server knows which page it must use.So id is always the page offset with the range? Why not just call it iobuf_offset then? Is the additional layer of abstraction from calling it "id" useful if we are just going to peek around it?> We introduce a new kind a ioreq type IOREQ_TYPE_PCICONFIG > which permit to forward easily PCI config space access. > > Signed-off-by: Julien Grall <julien.grall@citrix.com> > --- > [...] > xen/include/public/hvm/hvm_op.h | 49 ++++++++++++++++++++++++++++++++++++++ > xen/include/public/hvm/ioreq.h | 1 + > xen/include/public/hvm/params.h | 6 ++++- > xen/include/public/xen.h | 1 +I only looked at the public interface changes so far.> diff --git a/xen/include/public/hvm/hvm_op.h b/xen/include/public/hvm/hvm_op.h > index 6a78f75..1e0e27b 100644 > --- a/xen/include/public/hvm/hvm_op.h > +++ b/xen/include/public/hvm/hvm_op.h > @@ -24,6 +24,8 @@ > #include "../xen.h" > #include "../trace.h" > > +#include "hvm_info_table.h" /* HVM_MAX_VCPUS */You don''t appear to use HVM_MAX_VCPUS anywhere in your additions?> /* Get/set subcommands: extra argument == pointer to xen_hvm_param struct. */ > #define HVMOP_set_param 0 > #define HVMOP_get_param 1 > @@ -227,6 +229,53 @@ struct xen_hvm_inject_trap { > typedef struct xen_hvm_inject_trap xen_hvm_inject_trap_t; > DEFINE_XEN_GUEST_HANDLE(xen_hvm_inject_trap_t); > > +#define HVMOP_register_ioreq_server 20 > +struct xen_hvm_register_ioreq_server { > + domid_t domid; /* IN - domain to be serviced */ > + unsigned int id; /* OUT - handle for identifying this server */elsewhere this is called servid_t?> +}; > +typedef struct xen_hvm_register_ioreq_server xen_hvm_register_ioreq_server_t; > +DEFINE_XEN_GUEST_HANDLE(xen_hvm_register_ioreq_server_t); > + > +#define HVMOP_get_ioreq_server_buf_channel 21 > +struct xen_hvm_get_ioreq_server_buf_channel { > + domid_t domid; /* IN - domain to be serviced */ > + servid_t id; /* IN - handle from HVMOP_register_ioreq_server */ > + unsigned int channel; /* OUT - buf ioreq channel */evtchn_port_t?> +}; > +typedef struct xen_hvm_get_ioreq_server_buf_channel xen_hvm_get_ioreq_server_buf_channel_t; > +DEFINE_XEN_GUEST_HANDLE(xen_hvm_get_ioreq_server_buf_channel_t); > + > +#define HVMOP_map_io_range_to_ioreq_server 22 > +struct xen_hvm_map_io_range_to_ioreq_server { > + domid_t domid; /* IN - domain to be serviced */ > + uint8_t is_mmio; /* IN - MMIO or port IO? */ > + servid_t id; /* IN - handle from HVMOP_register_ioreq_server */ > + uint64_aligned_t s, e; /* IN - inclusive start and end of range */ > +}; > +typedef struct xen_hvm_map_io_range_to_ioreq_server xen_hvm_map_io_range_to_ioreq_server_t; > +DEFINE_XEN_GUEST_HANDLE(xen_hvm_map_io_range_to_ioreq_server_t); > + > +#define HVMOP_unmap_io_range_from_ioreq_server 23 > +struct xen_hvm_unmap_io_range_from_ioreq_server { > + domid_t domid; /* IN - domain to be serviced */ > + uint8_t is_mmio; /* IN - MMIO or port IO? */ > + servid_t id; /* IN - handle from HVMOP_register_ioreq_server */ > + uint64_aligned_t addr; /* IN - address inside the range to remove */ > +}; > +typedef struct xen_hvm_unmap_io_range_from_ioreq_server xen_hvm_unmap_io_range_from_ioreq_server_t; > +DEFINE_XEN_GUEST_HANDLE(xen_hvm_unmap_io_range_from_ioreq_server_t); > + > +#define HVMOP_register_pcidev 24 > +struct xen_hvm_register_pcidev { > + domid_t domid; /* IN - domain to be serviced */ > + servid_t id; /* IN - handle from HVMOP_register_ioreq_server */ > + uint16_t bdf; /* IN - pci */ > +}; > +typedef struct xen_hvm_register_pcidev xen_hvm_register_pcidev_t; > +DEFINE_XEN_GUEST_HANDLE(xen_hvm_register_pcidev_t); > + > + > #endif /* defined(__XEN__) || defined(__XEN_TOOLS__) */ > > #define HVMOP_get_mem_type 15 > diff --git a/xen/include/public/hvm/ioreq.h b/xen/include/public/hvm/ioreq.h > index 4022a1d..87aacd3 100644 > --- a/xen/include/public/hvm/ioreq.h > +++ b/xen/include/public/hvm/ioreq.h > @@ -34,6 +34,7 @@ > > #define IOREQ_TYPE_PIO 0 /* pio */ > #define IOREQ_TYPE_COPY 1 /* mmio ops */ > +#define IOREQ_TYPE_PCI_CONFIG 2 /* pci config space ops */ > #define IOREQ_TYPE_TIMEOFFSET 7 > #define IOREQ_TYPE_INVALIDATE 8 /* mapcache */ > > diff --git a/xen/include/public/hvm/params.h b/xen/include/public/hvm/params.h > index 55c1b57..309ac1b 100644 > --- a/xen/include/public/hvm/params.h > +++ b/xen/include/public/hvm/params.h > @@ -147,6 +147,10 @@ > #define HVM_PARAM_ACCESS_RING_PFN 28 > #define HVM_PARAM_SHARING_RING_PFN 29 > > -#define HVM_NR_PARAMS 30 > +/* Param for ioreq servers */ > +#define HVM_PARAM_IO_PFN_FIRST 30 > +#define HVM_PARAM_IO_PFN_LAST 31 > + > +#define HVM_NR_PARAMS 32 > > #endif /* __XEN_PUBLIC_HVM_PARAMS_H__ */ > diff --git a/xen/include/public/xen.h b/xen/include/public/xen.h > index b2f6c50..26d0e9d 100644 > --- a/xen/include/public/xen.h > +++ b/xen/include/public/xen.h > @@ -466,6 +466,7 @@ DEFINE_XEN_GUEST_HANDLE(mmuext_op_t); > #ifndef __ASSEMBLY__ > > typedef uint16_t domid_t; > +typedef uint32_t servid_t;ioservid_t?> /* Domain ids >= DOMID_FIRST_RESERVED cannot be used for ordinary domains. */ > #define DOMID_FIRST_RESERVED (0x7FF0U)
Ian Campbell
2012-Mar-23 11:37 UTC
Re: [XEN][RFC PATCH 09/15] xc: Add the hypercall for multiple servers
On Thu, 2012-03-22 at 15:59 +0000, Julien Grall wrote:> This patch add 5 hypercalls to register server, io range and PCI. > > Signed-off-by: Julien Grall <julien.grall@citrix.com> > --- > tools/libxc/xc_domain.c | 140 +++++++++++++++++++++++++++++++++++++++++++++++ > tools/libxc/xenctrl.h | 13 ++++ > 2 files changed, 153 insertions(+), 0 deletions(-) > > diff --git a/tools/libxc/xc_domain.c b/tools/libxc/xc_domain.c > index d98e68b..8067397 100644 > --- a/tools/libxc/xc_domain.c > +++ b/tools/libxc/xc_domain.c > @@ -1514,6 +1514,146 @@ int xc_domain_set_virq_handler(xc_interface *xch, uint32_t domid, int virq) > return do_domctl(xch, &domctl); > } > > +int xc_hvm_register_ioreq_server(xc_interface *xch, domid_t dom, servid_t *id) > +{ > + DECLARE_HYPERCALL; > + DECLARE_HYPERCALL_BUFFER(xen_hvm_register_ioreq_server_t, arg); > + int rc = -1; > + > + arg = xc_hypercall_buffer_alloc(xch, arg, sizeof (*arg)); > + if (!arg) {Xen Coding style calls for if ( !arg ) { here and elsewhere in this patch.> + PERROR("Could not allocate memory for xc_hvm_register_ioreq_server hypercall"); > + goto out; > + } > + > + hypercall.op = __HYPERVISOR_hvm_op; > + hypercall.arg[0] = HVMOP_register_ioreq_server; > + hypercall.arg[1] = HYPERCALL_BUFFER_AS_ARG(arg); > + > + arg->domid = dom; > + rc = do_xen_hypercall(xch, &hypercall); > + *id = arg->id;You could just return this if it''s always +ve (vs -ve errors). Similarly in xc_hvm_get_ioreq_server_buf_channel> + > + xc_hypercall_buffer_free(xch, arg); > +out: > + return rc; > +} > + > +int xc_hvm_get_ioreq_server_buf_channel(xc_interface *xch, domid_t dom, servid_t id, > + unsigned int *channel)channel should be evtchn_port_t, or if you decide to return it instead evtchn_port_or_error_t.> +{ > + DECLARE_HYPERCALL; > + DECLARE_HYPERCALL_BUFFER(xen_hvm_get_ioreq_server_buf_channel_t, arg); > + int rc = -1; > + > + arg = xc_hypercall_buffer_alloc(xch, arg, sizeof (*arg)); > + if (!arg) { > + PERROR("Could not allocate memory for xc_hvm_get_ioreq_servr_buf_channel"); > + goto out; > + } > + > + hypercall.op = __HYPERVISOR_hvm_op; > + hypercall.arg[0] = HVMOP_get_ioreq_server_buf_channel; > + hypercall.arg[1] = HYPERCALL_BUFFER_AS_ARG(arg); > + > + arg->domid = dom; > + arg->id = id; > + rc = do_xen_hypercall(xch, &hypercall); > + *channel = arg->channel; > + > + xc_hypercall_buffer_free(xch, arg); > + > +out: > + return rc; > +} > + > +int xc_hvm_map_io_range_to_ioreq_server(xc_interface *xch, domid_t dom, servid_t id, > + char is_mmio, uint64_t start, uint64_t end)not sure char here buys us anything, either bool or int would seem fine.> +{ > + DECLARE_HYPERCALL; > + DECLARE_HYPERCALL_BUFFER(xen_hvm_map_io_range_to_ioreq_server_t, arg); > + int rc = -1; > + > + arg = xc_hypercall_buffer_alloc(xch, arg, sizeof (*arg)); > + if (!arg) { > + PERROR("Could not allocate memory for xc_hvm_map_io_range_to_ioreq_server hypercall"); > + goto out; > + } > + > + hypercall.op = __HYPERVISOR_hvm_op; > + hypercall.arg[0] = HVMOP_map_io_range_to_ioreq_server; > + hypercall.arg[1] = HYPERCALL_BUFFER_AS_ARG(arg); > + > + arg->domid = dom; > + arg->id = id; > + arg->is_mmio = is_mmio; > + arg->s = start; > + arg->e = end; > + > + rc = do_xen_hypercall(xch, &hypercall); > + > + xc_hypercall_buffer_free(xch, arg); > +out: > + return rc; > +} > + > +int xc_hvm_unmap_io_range_from_ioreq_server(xc_interface *xch, domid_t dom, servid_t id, > + char is_mmio, uint64_t addr) > +{ > + DECLARE_HYPERCALL; > + DECLARE_HYPERCALL_BUFFER(xen_hvm_unmap_io_range_from_ioreq_server_t, arg); > + int rc = -1; > + > + arg = xc_hypercall_buffer_alloc(xch, arg, sizeof (*arg)); > + if (!arg) { > + PERROR("Could not allocate memory for xc_hvm_unmap_io_range_from_ioreq_server hypercall"); > + goto out; > + } > + > + hypercall.op = __HYPERVISOR_hvm_op; > + hypercall.arg[0] = HVMOP_unmap_io_range_from_ioreq_server; > + hypercall.arg[1] = HYPERCALL_BUFFER_AS_ARG(arg); > + > + arg->domid = dom; > + arg->id = id; > + arg->is_mmio = is_mmio; > + arg->addr = addr; > + rc = do_xen_hypercall(xch, &hypercall); > + > + xc_hypercall_buffer_free(xch, arg); > +out: > + return rc; > +} > + > +int xc_hvm_register_pcidev(xc_interface *xch, domid_t dom, servid_t id, > + uint16_t bdf) > +{ > + DECLARE_HYPERCALL; > + DECLARE_HYPERCALL_BUFFER(xen_hvm_register_pcidev_t, arg); > + int rc = -1; > + > + arg = xc_hypercall_buffer_alloc(xch, arg, sizeof (*arg)); > + if (!arg) > + { > + PERROR("Could not allocate memory for xc_hvm_create_pci hypercall"); > + goto out; > + } > + > + hypercall.op = __HYPERVISOR_hvm_op; > + hypercall.arg[0] = HVMOP_register_pcidev; > + hypercall.arg[1] = HYPERCALL_BUFFER_AS_ARG(arg); > + > + arg->domid = dom; > + arg->id = id; > + arg->bdf = bdf; > + rc = do_xen_hypercall(xch, &hypercall); > + > + xc_hypercall_buffer_free(xch, arg); > +out: > + return rc; > +} > + > + > /* > * Local variables: > * mode: C > diff --git a/tools/libxc/xenctrl.h b/tools/libxc/xenctrl.h > index 812e723..bcbfee5 100644 > --- a/tools/libxc/xenctrl.h > +++ b/tools/libxc/xenctrl.h > @@ -1648,6 +1648,19 @@ void xc_clear_last_error(xc_interface *xch); > int xc_set_hvm_param(xc_interface *handle, domid_t dom, int param, unsigned long value); > int xc_get_hvm_param(xc_interface *handle, domid_t dom, int param, unsigned long *value); > > +int xc_hvm_register_ioreq_server(xc_interface *xch, domid_t dom, unsigned int *id); > +int xc_hvm_get_ioreq_server_buf_channel(xc_interface *xch, domid_t dom, servid_t id, > + unsigned int *channel); > +int xc_hvm_map_io_range_to_ioreq_server(xc_interface *xch, domid_t dom, unsigned int id, > + char is_mmio, uint64_t start, uint64_t end); > +int xc_hvm_unmap_io_range_from_ioreq_server(xc_interface *xch, domid_t dom, unsigned int id, > + char is_mmio, uint64_t addr); > +/* > + * Register a PCI device > + */ > +int xc_hvm_register_pcidev(xc_interface *xch, domid_t dom, unsigned int id, > + uint16_t bdf); > + > /* IA64 specific, nvram save */ > int xc_ia64_save_to_nvram(xc_interface *xch, uint32_t dom); >
Ian Campbell
2012-Mar-23 11:39 UTC
Re: [XEN][RFC PATCH 10/15] xc: Add argument to allocate more special pages
On Thu, 2012-03-22 at 15:59 +0000, Julien Grall wrote:> This patchs permits to allocate more special pages. Indeed, for multiple > ioreq server, we need to have 2 shared pages by server. > > xc_hvm_build will take an argument which will indicate the number of > special pages we want to allocate.struct xc_hvm_build_args was just added to avoid exactly this proliferation of arguments, you should add this there.> > Signed-off-by: Julien Grall <julien.grall@citrix.com> > --- > tools/libxc/xc_hvm_build.c | 57 +++++++++++++++++++++++++------------------ > tools/libxc/xenguest.h | 6 +++- > tools/libxc/xg_private.c | 3 +- > 3 files changed, 39 insertions(+), 27 deletions(-) > > diff --git a/tools/libxc/xc_hvm_build.c b/tools/libxc/xc_hvm_build.c > index 696c012..62b4ff1 100644 > --- a/tools/libxc/xc_hvm_build.c > +++ b/tools/libxc/xc_hvm_build.c > @@ -47,10 +47,11 @@ > #define SPECIALPAGE_IDENT_PT 6 > #define SPECIALPAGE_CONSOLE 7 > #define NR_SPECIAL_PAGES 8 > -#define special_pfn(x) (0xff000u - NR_SPECIAL_PAGES + (x)) > +#define special_pfn(x, add) (0xff000u - (NR_SPECIAL_PAGES + (add)) + (x)) > > static void build_hvm_info(void *hvm_info_page, uint64_t mem_size, > - uint64_t mmio_start, uint64_t mmio_size) > + uint64_t mmio_start, uint64_t mmio_size, > + uint32_t nr_special_pages) > { > struct hvm_info_table *hvm_info = (struct hvm_info_table *) > (((unsigned char *)hvm_info_page) + HVM_INFO_OFFSET); > @@ -78,7 +79,7 @@ static void build_hvm_info(void *hvm_info_page, uint64_t mem_size, > /* Memory parameters. */ > hvm_info->low_mem_pgend = lowmem_end >> PAGE_SHIFT; > hvm_info->high_mem_pgend = highmem_end >> PAGE_SHIFT; > - hvm_info->reserved_mem_pgstart = special_pfn(0); > + hvm_info->reserved_mem_pgstart = special_pfn(0, nr_special_pages); > > /* Finish with the checksum. */ > for ( i = 0, sum = 0; i < hvm_info->length; i++ ) > @@ -141,7 +142,8 @@ static int check_mmio_hole(uint64_t start, uint64_t memsize, > > static int setup_guest(xc_interface *xch, > uint32_t dom, const struct xc_hvm_build_args *args, > - char *image, unsigned long image_size) > + char *image, unsigned long image_size, > + uint32_t nr_special_pages) > { > xen_pfn_t *page_array = NULL; > unsigned long i, nr_pages = args->mem_size >> PAGE_SHIFT; > @@ -334,37 +336,42 @@ static int setup_guest(xc_interface *xch, > xch, dom, PAGE_SIZE, PROT_READ | PROT_WRITE, > HVM_INFO_PFN)) == NULL ) > goto error_out; > - build_hvm_info(hvm_info_page, v_end, mmio_start, mmio_size); > + build_hvm_info(hvm_info_page, v_end, mmio_start, mmio_size, nr_special_pages); > munmap(hvm_info_page, PAGE_SIZE); > > /* Allocate and clear special pages. */ > - for ( i = 0; i < NR_SPECIAL_PAGES; i++ ) > + for ( i = 0; i < (NR_SPECIAL_PAGES + nr_special_pages); i++ ) > { > - xen_pfn_t pfn = special_pfn(i); > + xen_pfn_t pfn = special_pfn(i, nr_special_pages); > rc = xc_domain_populate_physmap_exact(xch, dom, 1, 0, 0, &pfn); > if ( rc != 0 ) > { > PERROR("Could not allocate %d''th special page.", i); > goto error_out; > } > - if ( xc_clear_domain_page(xch, dom, special_pfn(i)) ) > + if ( xc_clear_domain_page(xch, dom, special_pfn(i, nr_special_pages)) ) > goto error_out; > } > > xc_set_hvm_param(xch, dom, HVM_PARAM_STORE_PFN, > - special_pfn(SPECIALPAGE_XENSTORE)); > + special_pfn(SPECIALPAGE_XENSTORE, nr_special_pages)); > xc_set_hvm_param(xch, dom, HVM_PARAM_BUFIOREQ_PFN, > - special_pfn(SPECIALPAGE_BUFIOREQ)); > + special_pfn(SPECIALPAGE_BUFIOREQ, nr_special_pages)); > xc_set_hvm_param(xch, dom, HVM_PARAM_IOREQ_PFN, > - special_pfn(SPECIALPAGE_IOREQ)); > + special_pfn(SPECIALPAGE_IOREQ, nr_special_pages)); > xc_set_hvm_param(xch, dom, HVM_PARAM_CONSOLE_PFN, > - special_pfn(SPECIALPAGE_CONSOLE)); > + special_pfn(SPECIALPAGE_CONSOLE, nr_special_pages)); > xc_set_hvm_param(xch, dom, HVM_PARAM_PAGING_RING_PFN, > - special_pfn(SPECIALPAGE_PAGING)); > + special_pfn(SPECIALPAGE_PAGING, nr_special_pages)); > xc_set_hvm_param(xch, dom, HVM_PARAM_ACCESS_RING_PFN, > - special_pfn(SPECIALPAGE_ACCESS)); > + special_pfn(SPECIALPAGE_ACCESS, nr_special_pages)); > xc_set_hvm_param(xch, dom, HVM_PARAM_SHARING_RING_PFN, > - special_pfn(SPECIALPAGE_SHARING)); > + special_pfn(SPECIALPAGE_SHARING, nr_special_pages)); > + xc_set_hvm_param(xch, dom, HVM_PARAM_IO_PFN_FIRST, > + special_pfn(NR_SPECIAL_PAGES, nr_special_pages)); > + xc_set_hvm_param(xch, dom, HVM_PARAM_IO_PFN_LAST, > + special_pfn(NR_SPECIAL_PAGES + nr_special_pages - 1, > + nr_special_pages)); > > /* > * Identity-map page table is required for running with CR0.PG=0 when > @@ -372,14 +379,14 @@ static int setup_guest(xc_interface *xch, > */ > if ( (ident_pt = xc_map_foreign_range( > xch, dom, PAGE_SIZE, PROT_READ | PROT_WRITE, > - special_pfn(SPECIALPAGE_IDENT_PT))) == NULL ) > + special_pfn(SPECIALPAGE_IDENT_PT, nr_special_pages))) == NULL ) > goto error_out; > for ( i = 0; i < PAGE_SIZE / sizeof(*ident_pt); i++ ) > ident_pt[i] = ((i << 22) | _PAGE_PRESENT | _PAGE_RW | _PAGE_USER | > _PAGE_ACCESSED | _PAGE_DIRTY | _PAGE_PSE); > munmap(ident_pt, PAGE_SIZE); > xc_set_hvm_param(xch, dom, HVM_PARAM_IDENT_PT, > - special_pfn(SPECIALPAGE_IDENT_PT) << PAGE_SHIFT); > + special_pfn(SPECIALPAGE_IDENT_PT, nr_special_pages) << PAGE_SHIFT); > > /* Insert JMP <rel32> instruction at address 0x0 to reach entry point. */ > entry_eip = elf_uval(&elf, elf.ehdr, e_entry); > @@ -406,7 +413,8 @@ static int setup_guest(xc_interface *xch, > * Create a domain for a virtualized Linux, using files/filenames. > */ > int xc_hvm_build(xc_interface *xch, uint32_t domid, > - const struct xc_hvm_build_args *hvm_args) > + const struct xc_hvm_build_args *hvm_args, > + uint32_t nr_special_pages) > { > struct xc_hvm_build_args args = *hvm_args; > void *image; > @@ -432,7 +440,7 @@ int xc_hvm_build(xc_interface *xch, uint32_t domid, > if ( image == NULL ) > return -1; > > - sts = setup_guest(xch, domid, &args, image, image_size); > + sts = setup_guest(xch, domid, &args, image, image_size, nr_special_pages); > > free(image); > > @@ -447,10 +455,11 @@ int xc_hvm_build(xc_interface *xch, uint32_t domid, > * If target == memsize, pages are populated normally. > */ > int xc_hvm_build_target_mem(xc_interface *xch, > - uint32_t domid, > - int memsize, > - int target, > - const char *image_name) > + uint32_t domid, > + int memsize, > + int target, > + const char *image_name, > + uint32_t nr_special_pages) > { > struct xc_hvm_build_args args = {}; > > @@ -458,7 +467,7 @@ int xc_hvm_build_target_mem(xc_interface *xch, > args.mem_target = (uint64_t)target << 20; > args.image_file_name = image_name; > > - return xc_hvm_build(xch, domid, &args); > + return xc_hvm_build(xch, domid, &args, nr_special_pages); > } > > /* > diff --git a/tools/libxc/xenguest.h b/tools/libxc/xenguest.h > index 8d885d3..092ee24 100644 > --- a/tools/libxc/xenguest.h > +++ b/tools/libxc/xenguest.h > @@ -188,13 +188,15 @@ struct xc_hvm_build_args { > * are optional. > */ > int xc_hvm_build(xc_interface *xch, uint32_t domid, > - const struct xc_hvm_build_args *hvm_args); > + const struct xc_hvm_build_args *hvm_args, > + uint32_t nr_special_pages); > > int xc_hvm_build_target_mem(xc_interface *xch, > uint32_t domid, > int memsize, > int target, > - const char *image_name); > + const char *image_name, > + uint32_t nr_special_pages); > > int xc_suspend_evtchn_release(xc_interface *xch, xc_evtchn *xce, int domid, int suspend_evtchn); > > diff --git a/tools/libxc/xg_private.c b/tools/libxc/xg_private.c > index 3864bc7..e74adce 100644 > --- a/tools/libxc/xg_private.c > +++ b/tools/libxc/xg_private.c > @@ -192,7 +192,8 @@ unsigned long csum_page(void *page) > __attribute__((weak)) > int xc_hvm_build(xc_interface *xch, > uint32_t domid, > - const struct xc_hvm_build_args *hvm_args) > + const struct xc_hvm_build_args *hvm_args, > + uint32_t nr_servers) > { > errno = ENOSYS; > return -1;
On Thu, 2012-03-22 at 15:59 +0000, Julien Grall wrote:> Quickly fix for hvm_build in python.If an earlier patch breaks the build then this need to be part of that patch to allow bisection.> Signed-off-by: Julien Grall <julien.grall@citrix.com> > --- > tools/python/xen/lowlevel/xc/xc.c | 3 ++- > 1 files changed, 2 insertions(+), 1 deletions(-) > > diff --git a/tools/python/xen/lowlevel/xc/xc.c b/tools/python/xen/lowlevel/xc/xc.c > index 7c89756..eb004b6 100644 > --- a/tools/python/xen/lowlevel/xc/xc.c > +++ b/tools/python/xen/lowlevel/xc/xc.c > @@ -984,8 +984,9 @@ static PyObject *pyxc_hvm_build(XcObject *self, > if ( target == -1 ) > target = memsize; > > + // Ugly fix : we must retrieve the number of servers > if ( xc_hvm_build_target_mem(self->xc_handle, dom, memsize, > - target, image) != 0 ) > + target, image, 0) != 0 ) > return pyxc_error_to_exception(self->xc_handle); > > #if !defined(__ia64__)
Ian Campbell
2012-Mar-23 11:47 UTC
Re: [XEN][RFC PATCH 12/15] xl: Add interface to handle multiple device models
On Thu, 2012-03-22 at 15:59 +0000, Julien Grall wrote:> This patch add a structure with contain all informations about > a device model. > > Signed-off-by: Julien Grall <julien.grall@citrix.com> > --- > tools/libxl/libxl.h | 4 ++-- > tools/libxl/libxl_internal.h | 1 + > tools/libxl/libxl_types.idl | 11 +++++++++++ > 3 files changed, 14 insertions(+), 2 deletions(-) > > diff --git a/tools/libxl/libxl.h b/tools/libxl/libxl.h > index 6b69030..a347a34 100644 > --- a/tools/libxl/libxl.h > +++ b/tools/libxl/libxl.h > @@ -357,9 +357,9 @@ typedef struct { > typedef struct { > libxl_domain_create_info c_info; > libxl_domain_build_info b_info; > - > int num_disks, num_vifs, num_pcidevs, num_vfbs, num_vkbs; > - > + int num_dms; > + libxl_dm *dms; > libxl_device_disk *disks; > libxl_device_nic *vifs; > libxl_device_pci *pcidevs; > diff --git a/tools/libxl/libxl_internal.h b/tools/libxl/libxl_internal.h > index e0a1070..247bdb9 100644 > --- a/tools/libxl/libxl_internal.h > +++ b/tools/libxl/libxl_internal.h > @@ -767,6 +767,7 @@ typedef struct { > char *dom_path; /* from libxl_malloc, only for libxl_spawner_record_pid */ > const char *pid_path; /* only for libxl_spawner_record_pid */ > int domid; > + uint32_t dmid; > libxl__spawn_starting *for_spawn; > } libxl__spawner_starting; > > diff --git a/tools/libxl/libxl_types.idl b/tools/libxl/libxl_types.idl > index 413a1a6..7e48817 100644 > --- a/tools/libxl/libxl_types.idl > +++ b/tools/libxl/libxl_types.idl > @@ -37,6 +37,7 @@ libxl_domain_type = Enumeration("domain_type", [ > libxl_device_model_version = Enumeration("device_model_version", [ > (1, "QEMU_XEN_TRADITIONAL"), # Historical qemu-xen device model (qemu-dm) > (2, "QEMU_XEN"), # Upstream based qemu-xen device model > + (3, "MULTIPLE_QEMU_XEN"), # Handle multiple dmIsn''t this implicit in the provision or otherwise of num_dms?> ]) > > libxl_console_type = Enumeration("console_type", [ > @@ -224,6 +225,15 @@ libxl_domain_create_info = Struct("domain_create_info",[ > > MemKB = UInt(64, init_val = "LIBXL_MEMKB_DEFAULT") > > +libxl_dm = Struct("dm", [ > + ("id", uint32), > + ("name", string), > + ("path", string), > + ("pcis", libxl_string_list), > + ("mmios", libxl_string_list), > + ("pios", libxl_string_list), > + ])Why does the user of libxl need to know the id? can''t that be internal to the library? What are name and path? I guess path is something to do with xenstore but isn''t that also internal to the libxl<->dm interface not something the caller of libxl need be aware of? I''m not sure what syntax "pcis", "mmios" and "pios" are going to have but I expect that this would be better represent as actual datastructures rather than encoding it as a string. How are toolstack supposed to know the values for e.g. pcis? All in all this seems like a very raw/low-level interface. Can libxl not expose something a bit more meaningful to toolstack authors? For example if we consider emulated disk controllers then perhaps the options are * Handled by the "primary" dm * Handled by a single disaggregated dm * Handled by multiple disaggregated dm''s (one per disk controller) Similarly for other classes or emulated device. Or maybe this should be a flag on those actual devices (e.g. in libxl_device_FOO)?> + > # Instances of libxl_file_reference contained in this struct which > # have been mapped (with libxl_file_reference_map) will be unmapped > # by libxl_domain_build/restore. If either of these are never called > @@ -289,6 +299,7 @@ libxl_domain_build_info = Struct("domain_build_info",[ > ("usbdevice", string), > ("soundhw", string), > ("xen_platform_pci", libxl_defbool), > + ("max_servers", integer),As a toolstack author how do I decide what number to use here? Ian.
Julien Grall
2012-Mar-23 13:06 UTC
Re: [Xen-devel] [XEN][RFC PATCH 12/15] xl: Add interface to handle multiple device models
On 03/23/2012 11:47 AM, Ian Campbell wrote:> On Thu, 2012-03-22 at 15:59 +0000, Julien Grall wrote: > >> This patch add a structure with contain all informations about >> a device model. >> >> Signed-off-by: Julien Grall<julien.grall@citrix.com> >> --- >> tools/libxl/libxl.h | 4 ++-- >> tools/libxl/libxl_internal.h | 1 + >> tools/libxl/libxl_types.idl | 11 +++++++++++ >> 3 files changed, 14 insertions(+), 2 deletions(-) >> >> diff --git a/tools/libxl/libxl.h b/tools/libxl/libxl.h >> index 6b69030..a347a34 100644 >> --- a/tools/libxl/libxl.h >> +++ b/tools/libxl/libxl.h >> @@ -357,9 +357,9 @@ typedef struct { >> typedef struct { >> libxl_domain_create_info c_info; >> libxl_domain_build_info b_info; >> - >> int num_disks, num_vifs, num_pcidevs, num_vfbs, num_vkbs; >> - >> + int num_dms; >> + libxl_dm *dms; >> libxl_device_disk *disks; >> libxl_device_nic *vifs; >> libxl_device_pci *pcidevs; >> diff --git a/tools/libxl/libxl_internal.h b/tools/libxl/libxl_internal.h >> index e0a1070..247bdb9 100644 >> --- a/tools/libxl/libxl_internal.h >> +++ b/tools/libxl/libxl_internal.h >> @@ -767,6 +767,7 @@ typedef struct { >> char *dom_path; /* from libxl_malloc, only for libxl_spawner_record_pid */ >> const char *pid_path; /* only for libxl_spawner_record_pid */ >> int domid; >> + uint32_t dmid; >> libxl__spawn_starting *for_spawn; >> } libxl__spawner_starting; >> >> diff --git a/tools/libxl/libxl_types.idl b/tools/libxl/libxl_types.idl >> index 413a1a6..7e48817 100644 >> --- a/tools/libxl/libxl_types.idl >> +++ b/tools/libxl/libxl_types.idl >> @@ -37,6 +37,7 @@ libxl_domain_type = Enumeration("domain_type", [ >> libxl_device_model_version = Enumeration("device_model_version", [ >> (1, "QEMU_XEN_TRADITIONAL"), # Historical qemu-xen device model (qemu-dm) >> (2, "QEMU_XEN"), # Upstream based qemu-xen device model >> + (3, "MULTIPLE_QEMU_XEN"), # Handle multiple dm >> > Isn''t this implicit in the provision or otherwise of num_dms? >Sorry but I don''t understand the question.>> ]) >> >> libxl_console_type = Enumeration("console_type", [ >> @@ -224,6 +225,15 @@ libxl_domain_create_info = Struct("domain_create_info",[ >> >> MemKB = UInt(64, init_val = "LIBXL_MEMKB_DEFAULT") >> >> +libxl_dm = Struct("dm", [ >> + ("id", uint32), >> + ("name", string), >> + ("path", string), >> + ("pcis", libxl_string_list), >> + ("mmios", libxl_string_list), >> + ("pios", libxl_string_list), >> + ]) >> > Why does the user of libxl need to know the id? can''t that be internal > to the library? >Indeed, I will remove that.> What are name and path? I guess path is something to do with xenstore > but isn''t that also internal to the libxl<->dm interface not something > the caller of libxl need be aware of? >The path is the binary path and the name is just a string append to the log filename.> I''m not sure what syntax "pcis", "mmios" and "pios" are going to have > but I expect that this would be better represent as actual > datastructures rather than encoding it as a string. > > How are toolstack supposed to know the values for e.g. pcis? >For the moment pcis are describe as bdf for instance: 00:01.1 and mmios/pios as range (0x10-0x20 or just 0x19).> All in all this seems like a very raw/low-level interface. Can libxl not > expose something a bit more meaningful to toolstack authors? For example > if we consider emulated disk controllers then perhaps the options are > * Handled by the "primary" dm > * Handled by a single disaggregated dm > * Handled by multiple disaggregated dm''s (one per disk controller) > Similarly for other classes or emulated device. Or maybe this should be > a flag on those actual devices (e.g. in libxl_device_FOO)? >I have not really ideas about that. The first solution that comes in my mind is to bind each device with the device model name. But in this case, we need to modify QEMU to launch only a subset of hardware and find a way to said which QEMU will emulate the "basic" hardware (keyboard, mouse, piix, ...). Moreover, who will allocate the bdf ? The toolstack ?>> + >> # Instances of libxl_file_reference contained in this struct which >> # have been mapped (with libxl_file_reference_map) will be unmapped >> # by libxl_domain_build/restore. If either of these are never called >> @@ -289,6 +299,7 @@ libxl_domain_build_info = Struct("domain_build_info",[ >> ("usbdevice", string), >> ("soundhw", string), >> ("xen_platform_pci", libxl_defbool), >> + ("max_servers", integer), >> > As a toolstack author how do I decide what number to use here? >The max_servers variables is used to compute the additionnal special pages. So the max_servers indicates the maximum servers that we want to spawn.
Julien Grall
2012-Mar-23 13:44 UTC
Re: [Xen-devel] [XEN][RFC PATCH 00/15] QEMU disaggregation
On 03/22/2012 04:59 PM, Tim Deegan wrote:> At 15:59 +0000 on 22 Mar (1332431961), Julien Grall wrote: > >> Julien Grall (15): >> xc: Add the hypercall for multiple servers >> xc: Add argument to allocate more special pages >> xc: Fix python build >> > Shouldn''t something here update xc_domain_save/xc_domain_restore? >For the moment no, I will modify the both functions for the next version of the series.
Ian Campbell
2012-Mar-23 13:55 UTC
Re: [XEN][RFC PATCH 12/15] xl: Add interface to handle multiple device models
On Fri, 2012-03-23 at 13:06 +0000, Julien Grall wrote:> >> diff --git a/tools/libxl/libxl_types.idl b/tools/libxl/libxl_types.idl > >> index 413a1a6..7e48817 100644 > >> --- a/tools/libxl/libxl_types.idl > >> +++ b/tools/libxl/libxl_types.idl > >> @@ -37,6 +37,7 @@ libxl_domain_type = Enumeration("domain_type", [ > >> libxl_device_model_version = Enumeration("device_model_version", [ > >> (1, "QEMU_XEN_TRADITIONAL"), # Historical qemu-xen device model (qemu-dm) > >> (2, "QEMU_XEN"), # Upstream based qemu-xen device model > >> + (3, "MULTIPLE_QEMU_XEN"), # Handle multiple dm > >> > > Isn''t this implicit in the provision or otherwise of num_dms? > > > Sorry but I don''t understand the question.What I mean is can''t you tell that the user wants multiple qemus from the fact that domain_config.num_dms is non-zero (or >1, whichever it is)?> > What are name and path? I guess path is something to do with xenstore > > but isn''t that also internal to the libxl<->dm interface not something > > the caller of libxl need be aware of? > > > The path is the binary pathAre these supposed to supersede libxl_domain_build_info.device_model ?> and the name is just a string append to the log filename.This could be internal to libxl I expect.> > I''m not sure what syntax "pcis", "mmios" and "pios" are going to have > > but I expect that this would be better represent as actual > > datastructures rather than encoding it as a string. > > > > How are toolstack supposed to know the values for e.g. pcis? > > > For the moment pcis are describe as bdf for instance: 00:01.1 > and mmios/pios as range (0x10-0x20 or just 0x19).And how does the toolstack know what values to use? How does it decide that BDF 00:01.1 is something it wants to move into a different DM?> > All in all this seems like a very raw/low-level interface. Can libxl not > > expose something a bit more meaningful to toolstack authors? For example > > if we consider emulated disk controllers then perhaps the options are > > * Handled by the "primary" dm > > * Handled by a single disaggregated dm > > * Handled by multiple disaggregated dm''s (one per disk controller) > > Similarly for other classes or emulated device. Or maybe this should be > > a flag on those actual devices (e.g. in libxl_device_FOO)? > > > > I have not really ideas about that. The first solution that comes > in my mind is to bind each device with the device model name. > > But in this case, we need to modify QEMU to launch only a subset > of hardware and find a way to said which QEMU will emulate the > "basic" hardware (keyboard, mouse, piix, ...). > Moreover, who will allocate the bdf ? The toolstack ?I don''t know. It''s (currently) not the toolstack AFAIK, I guess it is either the lower level libraries (libxl, libxc) or the device model itself. I think this needs figuring out before we can decide what a sensible libxl interface for for describing which device is emulated by which dm looks like. What I want to avoid is the situation where there is something deep in the bowels of Xen which defines (or even hardcodes) the property that BDF X:Y.Z corresponds the some device or other and then require that the upper level tool stack (or, worse, the user) then has to encode that magic knowledge as a string in the configuration.> >> + > >> # Instances of libxl_file_reference contained in this struct which > >> # have been mapped (with libxl_file_reference_map) will be unmapped > >> # by libxl_domain_build/restore. If either of these are never called > >> @@ -289,6 +299,7 @@ libxl_domain_build_info = Struct("domain_build_info",[ > >> ("usbdevice", string), > >> ("soundhw", string), > >> ("xen_platform_pci", libxl_defbool), > >> + ("max_servers", integer), > >> > > As a toolstack author how do I decide what number to use here? > > > The max_servers variables is used to compute the additionnal special pages. > So the max_servers indicates the maximum servers that we want to spawn.We already know this from num_dms, don''t we? Ian.
Julien Grall
2012-Mar-26 12:20 UTC
Re: [XEN][RFC PATCH 03/15] hvm-pci: Handle PCI config space in Xen
On 03/23/2012 08:29 AM, Jan Beulich wrote:> Is there a reasonably low enforced boundary on the number > of devices? Otherwise, a linear lookup would seem overly > simple to me. >The maximum of bdf is 2^16 => 65536. Which kind of structure do you advice ? Array ? Hash Table ?> Further, with how PCI_CMP_BDF() is defined, you''re doing the > wrong thing here anyway - bit 31 is required to be set for the > port CFC access to be a config space one. Plus there''s an AMD > extension to this interface, so I think other than shifting out > the low 8 bits and checking that the high bit is set, you shouldn''t > do any other masking here. >Actually in config address register the 24-30 bits are reserved. So, do I need to mask it ? Moreover what is the AMD extension ?> Jan > > >> + >> + /* We just fill the ioreq, hvm_send_assist_req will send the request */ >> + if (unlikely(pci == NULL)) >> + { >> + *val = ~0; >> + rc = X86EMUL_OKAY; >> + goto end_handle; >> + } >> + >> + p->type = IOREQ_TYPE_PCI_CONFIG; >> + p->addr = (pci_cf8& ~3) + (p->addr& 3); >> + >> + set_ioreq(v,&pci->server->ioreq, p); >> + >> +end_handle: >> + spin_unlock(&v->domain->arch.hvm_domain.pci_root.pci_lock); >> + return rc; >> +} >> > >
Julien Grall
2012-Mar-26 12:32 UTC
Re: [XEN][RFC PATCH 01/15] hvm: Modify interface to support multiple ioreq server
On 03/23/2012 08:18 AM, Jan Beulich wrote:>> +#define HVMOP_register_pcidev 24 >> +struct xen_hvm_register_pcidev { >> + domid_t domid; /* IN - domain to be serviced */ >> + servid_t id; /* IN - handle from HVMOP_register_ioreq_server */ >> + uint16_t bdf; /* IN - pci */ >> > Can we please avoid the mistake of again not surfacing the PCI > segment in interface definitions, even if it may be required to be > zero for the immediate needs? >What do you hear by surfacing the PCI segment ? Do I need to add the PCI domain ?
Jan Beulich
2012-Mar-26 12:52 UTC
Re: [XEN][RFC PATCH 03/15] hvm-pci: Handle PCI config space in Xen
>>> On 26.03.12 at 14:20, Julien Grall <julien.grall@citrix.com> wrote: > On 03/23/2012 08:29 AM, Jan Beulich wrote: >> Is there a reasonably low enforced boundary on the number >> of devices? Otherwise, a linear lookup would seem overly >> simple to me. >> > The maximum of bdf is 2^16 => 65536. > Which kind of structure do you advice ? Array ? Hash Table ?Radix tree, especially if you fold in the segment number.>> Further, with how PCI_CMP_BDF() is defined, you''re doing the >> wrong thing here anyway - bit 31 is required to be set for the >> port CFC access to be a config space one. Plus there''s an AMD >> extension to this interface, so I think other than shifting out >> the low 8 bits and checking that the high bit is set, you shouldn''t >> do any other masking here. >> > Actually in config address register the 24-30 bits are reserved. > So, do I need to mask it ?Not necessarily - I''d suggest considering the part of the address (which should generally result in a mismatch on any comparison. This so that this ...> Moreover what is the AMD extension ?... can work without additional code. For an implementation, please have a look at current Linux''es arch/x86/pci/direct.c - bits 24...27 are used for extended config space register accesses (which will be needed for advanced PCIe or PCI-X functionality, and which may be particularly important as long as we don''t emulate MMCFG - at least I don''t think we do). Jan
Jan Beulich
2012-Mar-26 12:53 UTC
Re: [XEN][RFC PATCH 01/15] hvm: Modify interface to support multiple ioreq server
>>> On 26.03.12 at 14:32, Julien Grall <julien.grall@citrix.com> wrote: > On 03/23/2012 08:18 AM, Jan Beulich wrote: >>> +#define HVMOP_register_pcidev 24 >>> +struct xen_hvm_register_pcidev { >>> + domid_t domid; /* IN - domain to be serviced */ >>> + servid_t id; /* IN - handle from HVMOP_register_ioreq_server */ >>> + uint16_t bdf; /* IN - pci */ >>> >> Can we please avoid the mistake of again not surfacing the PCI >> segment in interface definitions, even if it may be required to be >> zero for the immediate needs? >> > > What do you hear by surfacing the PCI segment ? > Do I need to add the PCI domain ?"domain" and "segment" appear to be used interchangeably. Jan
Ian Jackson
2012-Apr-02 17:11 UTC
Re: [Xen-devel] [XEN][RFC PATCH 14/15] xl-parsing: Parse the new option device_models
Julien Grall writes ("[Xen-devel] [XEN][RFC PATCH 14/15] xl-parsing: Parse the new option device_models"):> For the support of multiple ioreq server, we add a new option "device_models". > It''s an array of device model, for each device model, we need to specify > which pci, IO range (MMIO, PIO) will be allow.I don''t think this is really a suitable interface. The PCI space in the guest is controlled by the device models(s) and the user should surely specify which devices should be provided by which dms, in terms of devices not in terms of PCI space. Ian.
Ian Jackson
2012-Apr-02 17:12 UTC
Re: [Xen-devel] [XEN][RFC PATCH 01/15] hvm: Modify interface to support multiple ioreq server
Julien Grall writes ("[Xen-devel] [XEN][RFC PATCH 01/15] hvm: Modify interface to support multiple ioreq server"):> Add structure to handle ioreq server. It''s server which can > handle a range of IO (MMIO and/or PIO) and emulate a PCI. > Each server as its own shared page to receive ioreq. So > we have introduced to HVM PARAM to set/get the first and > the last shared used for ioreq. With it''s id, the server > knows which page it must use.This explanation, and full documentation of the new HVMOPs, should be included in the hvm_op.h header. Ian.
Stefano Stabellini
2012-Apr-03 10:05 UTC
Re: [Xen-devel] [XEN][RFC PATCH 14/15] xl-parsing: Parse the new option device_models
On Mon, 2 Apr 2012, Ian Jackson wrote:> Julien Grall writes ("[Xen-devel] [XEN][RFC PATCH 14/15] xl-parsing: Parse the new option device_models"): > > For the support of multiple ioreq server, we add a new option "device_models". > > It''s an array of device model, for each device model, we need to specify > > which pci, IO range (MMIO, PIO) will be allow. > > I don''t think this is really a suitable interface. The PCI space in > the guest is controlled by the device models(s) and the user should > surely specify which devices should be provided by which dms, in terms > of devices not in terms of PCI space.Julien added a name parameter to select the device, maybe we need something clearer? Specifying the PCI address is important, because we have to make sure the PCI addresses of the devices remain the same in a given VM across multiple boots. Thus we could make it optional from the user POV, but in that case we need a clear, well defined and stable algorithm in xl to figure out a PCI address from a given config file.
Ian Jackson
2012-Apr-03 13:31 UTC
Re: [Xen-devel] [XEN][RFC PATCH 14/15] xl-parsing: Parse the new option device_models
Stefano Stabellini writes ("Re: [Xen-devel] [XEN][RFC PATCH 14/15] xl-parsing: Parse the new option device_models"):> On Mon, 2 Apr 2012, Ian Jackson wrote: > > I don''t think this is really a suitable interface. The PCI space in > > the guest is controlled by the device models(s) and the user should > > surely specify which devices should be provided by which dms, in terms > > of devices not in terms of PCI space. > > Julien added a name parameter to select the device, maybe we need > something clearer? > Specifying the PCI address is important, because we have to make sure > the PCI addresses of the devices remain the same in a given VM across > multiple boots.Are the PCI addresses not assigned in a deterministic fashion by code in qemu-dm, in this case in the qemu-dm which is emulating the pci bridge ? If not then that needs to be fixed, surely ? Ian.
Julien Grall
2012-Apr-03 13:54 UTC
Re: [XEN][RFC PATCH 14/15] xl-parsing: Parse the new option device_models
On 04/03/2012 02:31 PM, Ian Jackson wrote:> Stefano Stabellini writes ("Re: [Xen-devel] [XEN][RFC PATCH 14/15] xl-parsing: Parse the new option device_models"): > >> On Mon, 2 Apr 2012, Ian Jackson wrote: >> >>> I don''t think this is really a suitable interface. The PCI space in >>> the guest is controlled by the device models(s) and the user should >>> surely specify which devices should be provided by which dms, in terms >>> of devices not in terms of PCI space. >>> >> >> Julien added a name parameter to select the device, maybe we need >> something clearer? >> Specifying the PCI address is important, because we have to make sure >> the PCI addresses of the devices remain the same in a given VM across >> multiple boots. >> > Are the PCI addresses not assigned in a deterministic fashion by code > in qemu-dm, in this case in the qemu-dm which is emulating the pci > bridge ? If not then that needs to be fixed, surely ? >Indeed but each QEMU emulate a subset of the hardware. So how QEMU can know the available PCI addresses ? I think that toolstack must allocate the BDF, otherwise we need to have communication between each qemu-dm.
Ian Jackson
2012-Apr-03 14:02 UTC
Re: [XEN][RFC PATCH 14/15] xl-parsing: Parse the new option device_models
Julien Grall writes ("Re: [Xen-devel] [XEN][RFC PATCH 14/15] xl-parsing: Parse the new option device_models"):> On 04/03/2012 02:31 PM, Ian Jackson wrote: > > Are the PCI addresses not assigned in a deterministic fashion by code > > in qemu-dm, in this case in the qemu-dm which is emulating the pci > > bridge ? If not then that needs to be fixed, surely ? > > Indeed but each QEMU emulate a subset of the hardware. > So how QEMU can know the available PCI addresses ? > I think that toolstack must allocate the BDF, otherwise we need to have > communication between each qemu-dm.Currently the bdfs are allocated by the single qemu-dm, right ? Why cannot that functionality stay there, with the pci bridge emulation ? Ian.
Stefano Stabellini
2012-Apr-03 14:16 UTC
Re: [Xen-devel] [XEN][RFC PATCH 14/15] xl-parsing: Parse the new option device_models
On Tue, 3 Apr 2012, Ian Jackson wrote:> Julien Grall writes ("Re: [Xen-devel] [XEN][RFC PATCH 14/15] xl-parsing: Parse the new option device_models"): > > On 04/03/2012 02:31 PM, Ian Jackson wrote: > > > Are the PCI addresses not assigned in a deterministic fashion by code > > > in qemu-dm, in this case in the qemu-dm which is emulating the pci > > > bridge ? If not then that needs to be fixed, surely ? > > > > Indeed but each QEMU emulate a subset of the hardware. > > So how QEMU can know the available PCI addresses ? > > I think that toolstack must allocate the BDF, otherwise we need to have > > communication between each qemu-dm. > > Currently the bdfs are allocated by the single qemu-dm, right ? Why > cannot that functionality stay there, with the pci bridge emulation ?Because the allocation of most BDFs in QEMU is done ad-hoc in a first come first served basis. If the first QEMU is not going to emulate these devices then it is not going to allocate the BDF for them either.
Ian Jackson
2012-Apr-03 14:23 UTC
Re: [Xen-devel] [XEN][RFC PATCH 14/15] xl-parsing: Parse the new option device_models
Stefano Stabellini writes ("Re: [Xen-devel] [XEN][RFC PATCH 14/15] xl-parsing: Parse the new option device_models"):> On Tue, 3 Apr 2012, Ian Jackson wrote: > > Currently the bdfs are allocated by the single qemu-dm, right ? Why > > cannot that functionality stay there, with the pci bridge emulation ? > > Because the allocation of most BDFs in QEMU is done ad-hoc in a first > come first served basis. If the first QEMU is not going to emulate these > devices then it is not going to allocate the BDF for them either.Do we want the bdf allocation to be stable when switching between the combined and disaggregated qemu-dms ? If we do then it still needs to be done by that qemu-dm using the same algorithm, presumably with additional ipc. If we don''t then it would be ok for the bdfs to be allocated by new code in the toolstack somewhere, eg libxl, but it still shouldn''t be up to the user to configure. Ian.
Julien Grall
2012-Apr-12 19:33 UTC
Re: [Xen-devel] [XEN][RFC PATCH 01/15] hvm: Modify interface to support multiple ioreq server
On 03/23/2012 11:33 AM, Ian Campbell wrote:> On Thu, 2012-03-22 at 15:59 +0000, Julien Grall wrote: > >> Add structure to handle ioreq server. It''s server which can >> handle a range of IO (MMIO and/or PIO) and emulate a PCI. >> Each server as its own shared page to receive ioreq. So >> we have introduced to HVM PARAM to set/get the first and >> the last shared used for ioreq. >> With it''s id, the server knows which page it must use. >> > So id is always the page offset with the range? Why not just call it > iobuf_offset then? Is the additional layer of abstraction from calling > it "id" useful if we are just going to peek around it? >Indeed, but this parameter is also used to register bdf/io range. So can we call it server_id or server_offset ?>> diff --git a/xen/include/public/hvm/hvm_op.h b/xen/include/public/hvm/hvm_op.h >> index 6a78f75..1e0e27b 100644 >> --- a/xen/include/public/hvm/hvm_op.h >> +++ b/xen/include/public/hvm/hvm_op.h >> @@ -24,6 +24,8 @@ >> #include "../xen.h" >> #include "../trace.h" >> >> +#include "hvm_info_table.h" /* HVM_MAX_VCPUS */ >> > You don''t appear to use HVM_MAX_VCPUS anywhere in your additions? > >I use it in xen-all.c in QEMU. It permits to check that smp_cpus is lower that the maximum of vpcus. It avoids page overflow.
Julien Grall <julien.grall <at> citrix.com> writes:> This patch remove useless hvm_param due to structure modification > and bind the new hypercalls to handle ioreq servers and pci. > > + case HVM_PARAM_IO_PFN_LAST: > + if ( (d->arch.hvm_domain.params[HVM_PARAM_IO_PFN_LAST]) ) > + rc = -EINVAL;+ break;> case HVM_PARAM_CALLBACK_IRQ: > hvm_set_callback_via(d, a.value); > hvm_latch_shinfo_size(d);Above has a missing break statement. christian