Hi Michael, hi Jason, This patches applies a few cleanups to the virtio PCI interrupt handling code, and then converts the virtio PCI code to use the automatic MSI-X vectors spreading, as well as using the information in virtio-blk and virtio-scsi to automatically align the blk-mq queues to the MSI-X vectors. Changes since V2: - remove a redundant callback check - calculate ->msix_vectors correctly - add a few Reviewed-by: tags for Jason Changes since V1: - dropped the patches already merged for 4.10-rc - new patch to remove struct virtio_pci_vq_info - improve probe error handling by unwinding step by step - new patch to convert virtio-scsi
Christoph Hellwig
2017-Feb-05 17:15 UTC
[PATCH 1/9] virtio_pci: remove struct virtio_pci_vq_info
We don't really need struct virtio_pci_vq_info, as most field in there are redundant: - the vq backpointer is not strictly neede to start with - the entry in the vqs list is not needed - the generic virtqueue already has list, we only need to check if it has a callback to get the same semantics - we can use a simple array to look up the MSI-X vec if needed. - That simple array now also duoble serves to replace the per_vq_vectors flag Signed-off-by: Christoph Hellwig <hch at lst.de> --- drivers/virtio/virtio_pci_common.c | 117 +++++++++++-------------------------- drivers/virtio/virtio_pci_common.h | 25 +------- drivers/virtio/virtio_pci_legacy.c | 6 +- drivers/virtio/virtio_pci_modern.c | 6 +- 4 files changed, 39 insertions(+), 115 deletions(-) diff --git a/drivers/virtio/virtio_pci_common.c b/drivers/virtio/virtio_pci_common.c index 186cbab327b8..a33767318cbf 100644 --- a/drivers/virtio/virtio_pci_common.c +++ b/drivers/virtio/virtio_pci_common.c @@ -62,16 +62,13 @@ static irqreturn_t vp_config_changed(int irq, void *opaque) static irqreturn_t vp_vring_interrupt(int irq, void *opaque) { struct virtio_pci_device *vp_dev = opaque; - struct virtio_pci_vq_info *info; irqreturn_t ret = IRQ_NONE; - unsigned long flags; + struct virtqueue *vq; - spin_lock_irqsave(&vp_dev->lock, flags); - list_for_each_entry(info, &vp_dev->virtqueues, node) { - if (vring_interrupt(irq, info->vq) == IRQ_HANDLED) + list_for_each_entry(vq, &vp_dev->vdev.vqs, list) { + if (vq->callback && vring_interrupt(irq, vq) == IRQ_HANDLED) ret = IRQ_HANDLED; } - spin_unlock_irqrestore(&vp_dev->lock, flags); return ret; } @@ -167,55 +164,6 @@ static int vp_request_msix_vectors(struct virtio_device *vdev, int nvectors, return err; } -static struct virtqueue *vp_setup_vq(struct virtio_device *vdev, unsigned index, - void (*callback)(struct virtqueue *vq), - const char *name, - u16 msix_vec) -{ - struct virtio_pci_device *vp_dev = to_vp_device(vdev); - struct virtio_pci_vq_info *info = kmalloc(sizeof *info, GFP_KERNEL); - struct virtqueue *vq; - unsigned long flags; - - /* fill out our structure that represents an active queue */ - if (!info) - return ERR_PTR(-ENOMEM); - - vq = vp_dev->setup_vq(vp_dev, info, index, callback, name, msix_vec); - if (IS_ERR(vq)) - goto out_info; - - info->vq = vq; - if (callback) { - spin_lock_irqsave(&vp_dev->lock, flags); - list_add(&info->node, &vp_dev->virtqueues); - spin_unlock_irqrestore(&vp_dev->lock, flags); - } else { - INIT_LIST_HEAD(&info->node); - } - - vp_dev->vqs[index] = info; - return vq; - -out_info: - kfree(info); - return vq; -} - -static void vp_del_vq(struct virtqueue *vq) -{ - struct virtio_pci_device *vp_dev = to_vp_device(vq->vdev); - struct virtio_pci_vq_info *info = vp_dev->vqs[vq->index]; - unsigned long flags; - - spin_lock_irqsave(&vp_dev->lock, flags); - list_del(&info->node); - spin_unlock_irqrestore(&vp_dev->lock, flags); - - vp_dev->del_vq(info); - kfree(info); -} - /* the config->del_vqs() implementation */ void vp_del_vqs(struct virtio_device *vdev) { @@ -224,16 +172,15 @@ void vp_del_vqs(struct virtio_device *vdev) int i; list_for_each_entry_safe(vq, n, &vdev->vqs, list) { - if (vp_dev->per_vq_vectors) { - int v = vp_dev->vqs[vq->index]->msix_vector; + if (vp_dev->msix_vector_map) { + int v = vp_dev->msix_vector_map[vq->index]; if (v != VIRTIO_MSI_NO_VECTOR) free_irq(pci_irq_vector(vp_dev->pci_dev, v), vq); } - vp_del_vq(vq); + vp_dev->del_vq(vq); } - vp_dev->per_vq_vectors = false; if (vp_dev->intx_enabled) { free_irq(vp_dev->pci_dev->irq, vp_dev); @@ -261,8 +208,8 @@ void vp_del_vqs(struct virtio_device *vdev) vp_dev->msix_names = NULL; kfree(vp_dev->msix_affinity_masks); vp_dev->msix_affinity_masks = NULL; - kfree(vp_dev->vqs); - vp_dev->vqs = NULL; + kfree(vp_dev->msix_vector_map); + vp_dev->msix_vector_map = NULL; } static int vp_find_vqs_msix(struct virtio_device *vdev, unsigned nvqs, @@ -275,10 +222,6 @@ static int vp_find_vqs_msix(struct virtio_device *vdev, unsigned nvqs, u16 msix_vec; int i, err, nvectors, allocated_vectors; - vp_dev->vqs = kcalloc(nvqs, sizeof(*vp_dev->vqs), GFP_KERNEL); - if (!vp_dev->vqs) - return -ENOMEM; - if (per_vq_vectors) { /* Best option: one for change interrupt, one per vq. */ nvectors = 1; @@ -294,7 +237,13 @@ static int vp_find_vqs_msix(struct virtio_device *vdev, unsigned nvqs, if (err) goto error_find; - vp_dev->per_vq_vectors = per_vq_vectors; + if (per_vq_vectors) { + vp_dev->msix_vector_map = kmalloc_array(nvqs, + sizeof(*vp_dev->msix_vector_map), GFP_KERNEL); + if (!vp_dev->msix_vector_map) + goto error_find; + } + allocated_vectors = vp_dev->msix_used_vectors; for (i = 0; i < nvqs; ++i) { if (!names[i]) { @@ -304,19 +253,25 @@ static int vp_find_vqs_msix(struct virtio_device *vdev, unsigned nvqs, if (!callbacks[i]) msix_vec = VIRTIO_MSI_NO_VECTOR; - else if (vp_dev->per_vq_vectors) + else if (per_vq_vectors) msix_vec = allocated_vectors++; else msix_vec = VP_MSIX_VQ_VECTOR; - vqs[i] = vp_setup_vq(vdev, i, callbacks[i], names[i], msix_vec); + vqs[i] = vp_dev->setup_vq(vp_dev, i, callbacks[i], names[i], + msix_vec); if (IS_ERR(vqs[i])) { err = PTR_ERR(vqs[i]); goto error_find; } - if (!vp_dev->per_vq_vectors || msix_vec == VIRTIO_MSI_NO_VECTOR) + if (!per_vq_vectors) continue; + if (msix_vec == VIRTIO_MSI_NO_VECTOR) { + vp_dev->msix_vector_map[i] = VIRTIO_MSI_NO_VECTOR; + continue; + } + /* allocate per-vq irq if available and necessary */ snprintf(vp_dev->msix_names[msix_vec], sizeof *vp_dev->msix_names, @@ -326,8 +281,12 @@ static int vp_find_vqs_msix(struct virtio_device *vdev, unsigned nvqs, vring_interrupt, 0, vp_dev->msix_names[msix_vec], vqs[i]); - if (err) + if (err) { + /* don't free this irq on error */ + vp_dev->msix_vector_map[i] = VIRTIO_MSI_NO_VECTOR; goto error_find; + } + vp_dev->msix_vector_map[i] = msix_vec; } return 0; @@ -343,23 +302,18 @@ static int vp_find_vqs_intx(struct virtio_device *vdev, unsigned nvqs, struct virtio_pci_device *vp_dev = to_vp_device(vdev); int i, err; - vp_dev->vqs = kcalloc(nvqs, sizeof(*vp_dev->vqs), GFP_KERNEL); - if (!vp_dev->vqs) - return -ENOMEM; - err = request_irq(vp_dev->pci_dev->irq, vp_interrupt, IRQF_SHARED, dev_name(&vdev->dev), vp_dev); if (err) goto out_del_vqs; vp_dev->intx_enabled = 1; - vp_dev->per_vq_vectors = false; for (i = 0; i < nvqs; ++i) { if (!names[i]) { vqs[i] = NULL; continue; } - vqs[i] = vp_setup_vq(vdev, i, callbacks[i], names[i], + vqs[i] = vp_dev->setup_vq(vp_dev, i, callbacks[i], names[i], VIRTIO_MSI_NO_VECTOR); if (IS_ERR(vqs[i])) { err = PTR_ERR(vqs[i]); @@ -409,16 +363,15 @@ int vp_set_vq_affinity(struct virtqueue *vq, int cpu) { struct virtio_device *vdev = vq->vdev; struct virtio_pci_device *vp_dev = to_vp_device(vdev); - struct virtio_pci_vq_info *info = vp_dev->vqs[vq->index]; - struct cpumask *mask; - unsigned int irq; if (!vq->callback) return -EINVAL; if (vp_dev->msix_enabled) { - mask = vp_dev->msix_affinity_masks[info->msix_vector]; - irq = pci_irq_vector(vp_dev->pci_dev, info->msix_vector); + int vec = vp_dev->msix_vector_map[vq->index]; + struct cpumask *mask = vp_dev->msix_affinity_masks[vec]; + unsigned int irq = pci_irq_vector(vp_dev->pci_dev, vec); + if (cpu == -1) irq_set_affinity_hint(irq, NULL); else { @@ -498,8 +451,6 @@ static int virtio_pci_probe(struct pci_dev *pci_dev, vp_dev->vdev.dev.parent = &pci_dev->dev; vp_dev->vdev.dev.release = virtio_pci_release_dev; vp_dev->pci_dev = pci_dev; - INIT_LIST_HEAD(&vp_dev->virtqueues); - spin_lock_init(&vp_dev->lock); /* enable the device */ rc = pci_enable_device(pci_dev); diff --git a/drivers/virtio/virtio_pci_common.h b/drivers/virtio/virtio_pci_common.h index b2f666250ae0..2038887bdf23 100644 --- a/drivers/virtio/virtio_pci_common.h +++ b/drivers/virtio/virtio_pci_common.h @@ -31,17 +31,6 @@ #include <linux/highmem.h> #include <linux/spinlock.h> -struct virtio_pci_vq_info { - /* the actual virtqueue */ - struct virtqueue *vq; - - /* the list node for the virtqueues list */ - struct list_head node; - - /* MSI-X vector (or none) */ - unsigned msix_vector; -}; - /* Our device structure */ struct virtio_pci_device { struct virtio_device vdev; @@ -75,13 +64,6 @@ struct virtio_pci_device { /* the IO mapping for the PCI config space */ void __iomem *ioaddr; - /* a list of queues so we can dispatch IRQs */ - spinlock_t lock; - struct list_head virtqueues; - - /* array of all queues for house-keeping */ - struct virtio_pci_vq_info **vqs; - /* MSI-X support */ int msix_enabled; int intx_enabled; @@ -94,16 +76,15 @@ struct virtio_pci_device { /* Vectors allocated, excluding per-vq vectors if any */ unsigned msix_used_vectors; - /* Whether we have vector per vq */ - bool per_vq_vectors; + /* Map of per-VQ MSI-X vectors, may be NULL */ + unsigned *msix_vector_map; struct virtqueue *(*setup_vq)(struct virtio_pci_device *vp_dev, - struct virtio_pci_vq_info *info, unsigned idx, void (*callback)(struct virtqueue *vq), const char *name, u16 msix_vec); - void (*del_vq)(struct virtio_pci_vq_info *info); + void (*del_vq)(struct virtqueue *vq); u16 (*config_vector)(struct virtio_pci_device *vp_dev, u16 vector); }; diff --git a/drivers/virtio/virtio_pci_legacy.c b/drivers/virtio/virtio_pci_legacy.c index 6d9e5173d5fa..47292dad0ff9 100644 --- a/drivers/virtio/virtio_pci_legacy.c +++ b/drivers/virtio/virtio_pci_legacy.c @@ -112,7 +112,6 @@ static u16 vp_config_vector(struct virtio_pci_device *vp_dev, u16 vector) } static struct virtqueue *setup_vq(struct virtio_pci_device *vp_dev, - struct virtio_pci_vq_info *info, unsigned index, void (*callback)(struct virtqueue *vq), const char *name, @@ -130,8 +129,6 @@ static struct virtqueue *setup_vq(struct virtio_pci_device *vp_dev, if (!num || ioread32(vp_dev->ioaddr + VIRTIO_PCI_QUEUE_PFN)) return ERR_PTR(-ENOENT); - info->msix_vector = msix_vec; - /* create the vring */ vq = vring_create_virtqueue(index, num, VIRTIO_PCI_VRING_ALIGN, &vp_dev->vdev, @@ -162,9 +159,8 @@ static struct virtqueue *setup_vq(struct virtio_pci_device *vp_dev, return ERR_PTR(err); } -static void del_vq(struct virtio_pci_vq_info *info) +static void del_vq(struct virtqueue *vq) { - struct virtqueue *vq = info->vq; struct virtio_pci_device *vp_dev = to_vp_device(vq->vdev); iowrite16(vq->index, vp_dev->ioaddr + VIRTIO_PCI_QUEUE_SEL); diff --git a/drivers/virtio/virtio_pci_modern.c b/drivers/virtio/virtio_pci_modern.c index 4bf7ab375894..00e6fc1df407 100644 --- a/drivers/virtio/virtio_pci_modern.c +++ b/drivers/virtio/virtio_pci_modern.c @@ -293,7 +293,6 @@ static u16 vp_config_vector(struct virtio_pci_device *vp_dev, u16 vector) } static struct virtqueue *setup_vq(struct virtio_pci_device *vp_dev, - struct virtio_pci_vq_info *info, unsigned index, void (*callback)(struct virtqueue *vq), const char *name, @@ -323,8 +322,6 @@ static struct virtqueue *setup_vq(struct virtio_pci_device *vp_dev, /* get offset of notification word for this vq */ off = vp_ioread16(&cfg->queue_notify_off); - info->msix_vector = msix_vec; - /* create the vring */ vq = vring_create_virtqueue(index, num, SMP_CACHE_BYTES, &vp_dev->vdev, @@ -409,9 +406,8 @@ static int vp_modern_find_vqs(struct virtio_device *vdev, unsigned nvqs, return 0; } -static void del_vq(struct virtio_pci_vq_info *info) +static void del_vq(struct virtqueue *vq) { - struct virtqueue *vq = info->vq; struct virtio_pci_device *vp_dev = to_vp_device(vq->vdev); vp_iowrite16(vq->index, &vp_dev->common->queue_select); -- 2.11.0
Christoph Hellwig
2017-Feb-05 17:15 UTC
[PATCH 2/9] virtio_pci: use shared interrupts for virtqueues
This lets IRQ layer handle dispatching IRQs to separate handlers for the case where we don't have per-VQ MSI-X vectors, and allows us to greatly simplify the code based on the assumption that we always have interrupt vector 0 (legacy INTx or config interrupt for MSI-X) available, and any other interrupt is request/freed throught the VQ, even if the actual interrupt line might be shared in some cases. This allows removing a great deal of variables keeping track of the interrupt state in struct virtio_pci_device, as we can now simply walk the list of VQs and deal with per-VQ interrupt handlers there, and only treat vector 0 special. Additionally clean up the VQ allocation code to properly unwind on error instead of having a single global cleanup label, which is error prone, and in this case also leads to more code. Signed-off-by: Christoph Hellwig <hch at lst.de> --- drivers/virtio/virtio_pci_common.c | 235 ++++++++++++++++--------------------- drivers/virtio/virtio_pci_common.h | 16 +-- 2 files changed, 106 insertions(+), 145 deletions(-) diff --git a/drivers/virtio/virtio_pci_common.c b/drivers/virtio/virtio_pci_common.c index a33767318cbf..274dc1ff09c0 100644 --- a/drivers/virtio/virtio_pci_common.c +++ b/drivers/virtio/virtio_pci_common.c @@ -33,10 +33,8 @@ void vp_synchronize_vectors(struct virtio_device *vdev) struct virtio_pci_device *vp_dev = to_vp_device(vdev); int i; - if (vp_dev->intx_enabled) - synchronize_irq(vp_dev->pci_dev->irq); - - for (i = 0; i < vp_dev->msix_vectors; ++i) + synchronize_irq(pci_irq_vector(vp_dev->pci_dev, 0)); + for (i = 1; i < vp_dev->msix_vectors; i++) synchronize_irq(pci_irq_vector(vp_dev->pci_dev, i)); } @@ -99,77 +97,10 @@ static irqreturn_t vp_interrupt(int irq, void *opaque) return vp_vring_interrupt(irq, opaque); } -static int vp_request_msix_vectors(struct virtio_device *vdev, int nvectors, - bool per_vq_vectors) -{ - struct virtio_pci_device *vp_dev = to_vp_device(vdev); - const char *name = dev_name(&vp_dev->vdev.dev); - unsigned i, v; - int err = -ENOMEM; - - vp_dev->msix_vectors = nvectors; - - vp_dev->msix_names = kmalloc(nvectors * sizeof *vp_dev->msix_names, - GFP_KERNEL); - if (!vp_dev->msix_names) - goto error; - vp_dev->msix_affinity_masks - = kzalloc(nvectors * sizeof *vp_dev->msix_affinity_masks, - GFP_KERNEL); - if (!vp_dev->msix_affinity_masks) - goto error; - for (i = 0; i < nvectors; ++i) - if (!alloc_cpumask_var(&vp_dev->msix_affinity_masks[i], - GFP_KERNEL)) - goto error; - - err = pci_alloc_irq_vectors(vp_dev->pci_dev, nvectors, nvectors, - PCI_IRQ_MSIX); - if (err < 0) - goto error; - vp_dev->msix_enabled = 1; - - /* Set the vector used for configuration */ - v = vp_dev->msix_used_vectors; - snprintf(vp_dev->msix_names[v], sizeof *vp_dev->msix_names, - "%s-config", name); - err = request_irq(pci_irq_vector(vp_dev->pci_dev, v), - vp_config_changed, 0, vp_dev->msix_names[v], - vp_dev); - if (err) - goto error; - ++vp_dev->msix_used_vectors; - - v = vp_dev->config_vector(vp_dev, v); - /* Verify we had enough resources to assign the vector */ - if (v == VIRTIO_MSI_NO_VECTOR) { - err = -EBUSY; - goto error; - } - - if (!per_vq_vectors) { - /* Shared vector for all VQs */ - v = vp_dev->msix_used_vectors; - snprintf(vp_dev->msix_names[v], sizeof *vp_dev->msix_names, - "%s-virtqueues", name); - err = request_irq(pci_irq_vector(vp_dev->pci_dev, v), - vp_vring_interrupt, 0, vp_dev->msix_names[v], - vp_dev); - if (err) - goto error; - ++vp_dev->msix_used_vectors; - } - return 0; -error: - return err; -} - -/* the config->del_vqs() implementation */ -void vp_del_vqs(struct virtio_device *vdev) +static void vp_remove_vqs(struct virtio_device *vdev) { struct virtio_pci_device *vp_dev = to_vp_device(vdev); struct virtqueue *vq, *n; - int i; list_for_each_entry_safe(vq, n, &vdev->vqs, list) { if (vp_dev->msix_vector_map) { @@ -181,35 +112,33 @@ void vp_del_vqs(struct virtio_device *vdev) } vp_dev->del_vq(vq); } +} - if (vp_dev->intx_enabled) { - free_irq(vp_dev->pci_dev->irq, vp_dev); - vp_dev->intx_enabled = 0; - } +/* the config->del_vqs() implementation */ +void vp_del_vqs(struct virtio_device *vdev) +{ + struct virtio_pci_device *vp_dev = to_vp_device(vdev); + int i; - for (i = 0; i < vp_dev->msix_used_vectors; ++i) - free_irq(pci_irq_vector(vp_dev->pci_dev, i), vp_dev); + if (WARN_ON_ONCE(list_empty_careful(&vdev->vqs))) + return; - for (i = 0; i < vp_dev->msix_vectors; i++) - if (vp_dev->msix_affinity_masks[i]) - free_cpumask_var(vp_dev->msix_affinity_masks[i]); + vp_remove_vqs(vdev); if (vp_dev->msix_enabled) { + for (i = 0; i < vp_dev->msix_vectors; i++) + free_cpumask_var(vp_dev->msix_affinity_masks[i]); + /* Disable the vector used for configuration */ vp_dev->config_vector(vp_dev, VIRTIO_MSI_NO_VECTOR); - pci_free_irq_vectors(vp_dev->pci_dev); - vp_dev->msix_enabled = 0; + kfree(vp_dev->msix_affinity_masks); + kfree(vp_dev->msix_names); + kfree(vp_dev->msix_vector_map); } - vp_dev->msix_vectors = 0; - vp_dev->msix_used_vectors = 0; - kfree(vp_dev->msix_names); - vp_dev->msix_names = NULL; - kfree(vp_dev->msix_affinity_masks); - vp_dev->msix_affinity_masks = NULL; - kfree(vp_dev->msix_vector_map); - vp_dev->msix_vector_map = NULL; + free_irq(pci_irq_vector(vp_dev->pci_dev, 0), vp_dev); + pci_free_irq_vectors(vp_dev->pci_dev); } static int vp_find_vqs_msix(struct virtio_device *vdev, unsigned nvqs, @@ -219,79 +148,122 @@ static int vp_find_vqs_msix(struct virtio_device *vdev, unsigned nvqs, bool per_vq_vectors) { struct virtio_pci_device *vp_dev = to_vp_device(vdev); + const char *name = dev_name(&vp_dev->vdev.dev); + int i, err = -ENOMEM, allocated_vectors, nvectors; u16 msix_vec; - int i, err, nvectors, allocated_vectors; + + nvectors = 1; + for (i = 0; i < nvqs; i++) + if (callbacks[i]) + nvectors++; if (per_vq_vectors) { - /* Best option: one for change interrupt, one per vq. */ - nvectors = 1; - for (i = 0; i < nvqs; ++i) - if (callbacks[i]) - ++nvectors; + err = pci_alloc_irq_vectors(vp_dev->pci_dev, nvectors, nvectors, + PCI_IRQ_MSIX); } else { - /* Second best: one for change, shared for all vqs. */ - nvectors = 2; + err = pci_alloc_irq_vectors(vp_dev->pci_dev, 2, 2, + PCI_IRQ_MSIX); } + if (err < 0) + return err; - err = vp_request_msix_vectors(vdev, nvectors, per_vq_vectors); + vp_dev->msix_vectors = nvectors; + vp_dev->msix_names = kmalloc_array(nvectors, + sizeof(*vp_dev->msix_names), GFP_KERNEL); + if (!vp_dev->msix_names) + goto out_free_irq_vectors; + + vp_dev->msix_affinity_masks = kcalloc(nvectors, + sizeof(*vp_dev->msix_affinity_masks), GFP_KERNEL); + if (!vp_dev->msix_affinity_masks) + goto out_free_msix_names; + + for (i = 0; i < nvectors; ++i) { + if (!alloc_cpumask_var(&vp_dev->msix_affinity_masks[i], + GFP_KERNEL)) + goto out_free_msix_affinity_masks; + } + + /* Set the vector used for configuration */ + snprintf(vp_dev->msix_names[0], sizeof(*vp_dev->msix_names), + "%s-config", name); + err = request_irq(pci_irq_vector(vp_dev->pci_dev, 0), vp_config_changed, + 0, vp_dev->msix_names[0], vp_dev); if (err) - goto error_find; + goto out_free_irq_vectors; - if (per_vq_vectors) { - vp_dev->msix_vector_map = kmalloc_array(nvqs, - sizeof(*vp_dev->msix_vector_map), GFP_KERNEL); - if (!vp_dev->msix_vector_map) - goto error_find; + /* Verify we had enough resources to assign the vector */ + if (vp_dev->config_vector(vp_dev, 0) == VIRTIO_MSI_NO_VECTOR) { + err = -EBUSY; + goto out_free_config_irq; } - allocated_vectors = vp_dev->msix_used_vectors; + vp_dev->msix_vector_map = kmalloc_array(nvqs, + sizeof(*vp_dev->msix_vector_map), GFP_KERNEL); + if (!vp_dev->msix_vector_map) + goto out_disable_config_irq; + + allocated_vectors = 1; /* vector 0 is the config interrupt */ for (i = 0; i < nvqs; ++i) { if (!names[i]) { vqs[i] = NULL; continue; } - if (!callbacks[i]) - msix_vec = VIRTIO_MSI_NO_VECTOR; - else if (per_vq_vectors) - msix_vec = allocated_vectors++; + if (callbacks[i]) + msix_vec = allocated_vectors; else - msix_vec = VP_MSIX_VQ_VECTOR; + msix_vec = VIRTIO_MSI_NO_VECTOR; + vqs[i] = vp_dev->setup_vq(vp_dev, i, callbacks[i], names[i], msix_vec); if (IS_ERR(vqs[i])) { err = PTR_ERR(vqs[i]); - goto error_find; + goto out_remove_vqs; } - if (!per_vq_vectors) - continue; - if (msix_vec == VIRTIO_MSI_NO_VECTOR) { vp_dev->msix_vector_map[i] = VIRTIO_MSI_NO_VECTOR; continue; } - /* allocate per-vq irq if available and necessary */ - snprintf(vp_dev->msix_names[msix_vec], - sizeof *vp_dev->msix_names, - "%s-%s", + snprintf(vp_dev->msix_names[i + 1], + sizeof(*vp_dev->msix_names), "%s-%s", dev_name(&vp_dev->vdev.dev), names[i]); err = request_irq(pci_irq_vector(vp_dev->pci_dev, msix_vec), - vring_interrupt, 0, - vp_dev->msix_names[msix_vec], - vqs[i]); + vring_interrupt, IRQF_SHARED, + vp_dev->msix_names[i + 1], vqs[i]); if (err) { /* don't free this irq on error */ vp_dev->msix_vector_map[i] = VIRTIO_MSI_NO_VECTOR; - goto error_find; + goto out_remove_vqs; } vp_dev->msix_vector_map[i] = msix_vec; + + if (per_vq_vectors) + allocated_vectors++; } + + vp_dev->msix_enabled = 1; return 0; -error_find: - vp_del_vqs(vdev); +out_remove_vqs: + vp_remove_vqs(vdev); + kfree(vp_dev->msix_vector_map); +out_disable_config_irq: + vp_dev->config_vector(vp_dev, VIRTIO_MSI_NO_VECTOR); +out_free_config_irq: + free_irq(pci_irq_vector(vp_dev->pci_dev, 0), vp_dev); +out_free_msix_affinity_masks: + for (i = 0; i < nvectors; i++) { + if (vp_dev->msix_affinity_masks[i]) + free_cpumask_var(vp_dev->msix_affinity_masks[i]); + } + kfree(vp_dev->msix_affinity_masks); +out_free_msix_names: + kfree(vp_dev->msix_names); +out_free_irq_vectors: + pci_free_irq_vectors(vp_dev->pci_dev); return err; } @@ -305,9 +277,8 @@ static int vp_find_vqs_intx(struct virtio_device *vdev, unsigned nvqs, err = request_irq(vp_dev->pci_dev->irq, vp_interrupt, IRQF_SHARED, dev_name(&vdev->dev), vp_dev); if (err) - goto out_del_vqs; + return err; - vp_dev->intx_enabled = 1; for (i = 0; i < nvqs; ++i) { if (!names[i]) { vqs[i] = NULL; @@ -317,13 +288,15 @@ static int vp_find_vqs_intx(struct virtio_device *vdev, unsigned nvqs, VIRTIO_MSI_NO_VECTOR); if (IS_ERR(vqs[i])) { err = PTR_ERR(vqs[i]); - goto out_del_vqs; + goto out_remove_vqs; } } return 0; -out_del_vqs: - vp_del_vqs(vdev); + +out_remove_vqs: + vp_remove_vqs(vdev); + free_irq(pci_irq_vector(vp_dev->pci_dev, 0), vp_dev); return err; } diff --git a/drivers/virtio/virtio_pci_common.h b/drivers/virtio/virtio_pci_common.h index 2038887bdf23..85593867e712 100644 --- a/drivers/virtio/virtio_pci_common.h +++ b/drivers/virtio/virtio_pci_common.h @@ -66,16 +66,12 @@ struct virtio_pci_device { /* MSI-X support */ int msix_enabled; - int intx_enabled; cpumask_var_t *msix_affinity_masks; /* Name strings for interrupts. This size should be enough, * and I'm too lazy to allocate each name separately. */ char (*msix_names)[256]; - /* Number of available vectors */ - unsigned msix_vectors; - /* Vectors allocated, excluding per-vq vectors if any */ - unsigned msix_used_vectors; - + /* Total Number of MSI-X vectors (including per-VQ ones). */ + int msix_vectors; /* Map of per-VQ MSI-X vectors, may be NULL */ unsigned *msix_vector_map; @@ -89,14 +85,6 @@ struct virtio_pci_device { u16 (*config_vector)(struct virtio_pci_device *vp_dev, u16 vector); }; -/* Constants for MSI-X */ -/* Use first vector for configuration changes, second and the rest for - * virtqueues Thus, we need at least 2 vectors for MSI. */ -enum { - VP_MSIX_CONFIG_VECTOR = 0, - VP_MSIX_VQ_VECTOR = 1, -}; - /* Convert a generic virtio device to our structure */ static struct virtio_pci_device *to_vp_device(struct virtio_device *vdev) { -- 2.11.0
Christoph Hellwig
2017-Feb-05 17:15 UTC
[PATCH 3/9] virtio_pci: don't duplicate the msix_enable flag in struct pci_dev
Signed-off-by: Christoph Hellwig <hch at lst.de> Reviewed-by: Jason Wang <jasowang at redhat.com> --- drivers/virtio/virtio_pci_common.c | 5 ++--- drivers/virtio/virtio_pci_common.h | 2 -- drivers/virtio/virtio_pci_legacy.c | 2 +- drivers/virtio/virtio_pci_modern.c | 2 +- include/uapi/linux/virtio_pci.h | 2 +- 5 files changed, 5 insertions(+), 8 deletions(-) diff --git a/drivers/virtio/virtio_pci_common.c b/drivers/virtio/virtio_pci_common.c index 274dc1ff09c0..b83053082875 100644 --- a/drivers/virtio/virtio_pci_common.c +++ b/drivers/virtio/virtio_pci_common.c @@ -125,7 +125,7 @@ void vp_del_vqs(struct virtio_device *vdev) vp_remove_vqs(vdev); - if (vp_dev->msix_enabled) { + if (vp_dev->pci_dev->msix_enabled) { for (i = 0; i < vp_dev->msix_vectors; i++) free_cpumask_var(vp_dev->msix_affinity_masks[i]); @@ -244,7 +244,6 @@ static int vp_find_vqs_msix(struct virtio_device *vdev, unsigned nvqs, allocated_vectors++; } - vp_dev->msix_enabled = 1; return 0; out_remove_vqs: @@ -340,7 +339,7 @@ int vp_set_vq_affinity(struct virtqueue *vq, int cpu) if (!vq->callback) return -EINVAL; - if (vp_dev->msix_enabled) { + if (vp_dev->pci_dev->msix_enabled) { int vec = vp_dev->msix_vector_map[vq->index]; struct cpumask *mask = vp_dev->msix_affinity_masks[vec]; unsigned int irq = pci_irq_vector(vp_dev->pci_dev, vec); diff --git a/drivers/virtio/virtio_pci_common.h b/drivers/virtio/virtio_pci_common.h index 85593867e712..217ca876eed7 100644 --- a/drivers/virtio/virtio_pci_common.h +++ b/drivers/virtio/virtio_pci_common.h @@ -64,8 +64,6 @@ struct virtio_pci_device { /* the IO mapping for the PCI config space */ void __iomem *ioaddr; - /* MSI-X support */ - int msix_enabled; cpumask_var_t *msix_affinity_masks; /* Name strings for interrupts. This size should be enough, * and I'm too lazy to allocate each name separately. */ diff --git a/drivers/virtio/virtio_pci_legacy.c b/drivers/virtio/virtio_pci_legacy.c index 47292dad0ff9..2ab6aee51bf6 100644 --- a/drivers/virtio/virtio_pci_legacy.c +++ b/drivers/virtio/virtio_pci_legacy.c @@ -165,7 +165,7 @@ static void del_vq(struct virtqueue *vq) iowrite16(vq->index, vp_dev->ioaddr + VIRTIO_PCI_QUEUE_SEL); - if (vp_dev->msix_enabled) { + if (vp_dev->pci_dev->msix_enabled) { iowrite16(VIRTIO_MSI_NO_VECTOR, vp_dev->ioaddr + VIRTIO_MSI_QUEUE_VECTOR); /* Flush the write out to device */ diff --git a/drivers/virtio/virtio_pci_modern.c b/drivers/virtio/virtio_pci_modern.c index 00e6fc1df407..e5ce31091953 100644 --- a/drivers/virtio/virtio_pci_modern.c +++ b/drivers/virtio/virtio_pci_modern.c @@ -412,7 +412,7 @@ static void del_vq(struct virtqueue *vq) vp_iowrite16(vq->index, &vp_dev->common->queue_select); - if (vp_dev->msix_enabled) { + if (vp_dev->pci_dev->msix_enabled) { vp_iowrite16(VIRTIO_MSI_NO_VECTOR, &vp_dev->common->queue_msix_vector); /* Flush the write out to device */ diff --git a/include/uapi/linux/virtio_pci.h b/include/uapi/linux/virtio_pci.h index 90007a1abcab..15b4385a2be1 100644 --- a/include/uapi/linux/virtio_pci.h +++ b/include/uapi/linux/virtio_pci.h @@ -79,7 +79,7 @@ * configuration space */ #define VIRTIO_PCI_CONFIG_OFF(msix_enabled) ((msix_enabled) ? 24 : 20) /* Deprecated: please use VIRTIO_PCI_CONFIG_OFF instead */ -#define VIRTIO_PCI_CONFIG(dev) VIRTIO_PCI_CONFIG_OFF((dev)->msix_enabled) +#define VIRTIO_PCI_CONFIG(dev) VIRTIO_PCI_CONFIG_OFF((dev)->pci_dev->msix_enabled) /* Virtio ABI version, this must match exactly */ #define VIRTIO_PCI_ABI_VERSION 0 -- 2.11.0
Try to grab the MSI-X vectors early and fall back to the shared one before doing lots of allocations. Signed-off-by: Christoph Hellwig <hch at lst.de> Reviewed-by: Jason Wang <jasowang at redhat.com> --- drivers/virtio/virtio_pci_common.c | 35 ++++++++++++++++++----------------- 1 file changed, 18 insertions(+), 17 deletions(-) diff --git a/drivers/virtio/virtio_pci_common.c b/drivers/virtio/virtio_pci_common.c index b83053082875..822f8e5dcee4 100644 --- a/drivers/virtio/virtio_pci_common.c +++ b/drivers/virtio/virtio_pci_common.c @@ -142,14 +142,13 @@ void vp_del_vqs(struct virtio_device *vdev) } static int vp_find_vqs_msix(struct virtio_device *vdev, unsigned nvqs, - struct virtqueue *vqs[], - vq_callback_t *callbacks[], - const char * const names[], - bool per_vq_vectors) + struct virtqueue *vqs[], vq_callback_t *callbacks[], + const char * const names[]) { struct virtio_pci_device *vp_dev = to_vp_device(vdev); const char *name = dev_name(&vp_dev->vdev.dev); int i, err = -ENOMEM, allocated_vectors, nvectors; + bool shared = false; u16 msix_vec; nvectors = 1; @@ -157,12 +156,16 @@ static int vp_find_vqs_msix(struct virtio_device *vdev, unsigned nvqs, if (callbacks[i]) nvectors++; - if (per_vq_vectors) { - err = pci_alloc_irq_vectors(vp_dev->pci_dev, nvectors, nvectors, - PCI_IRQ_MSIX); - } else { + /* Try one vector per queue first. */ + err = pci_alloc_irq_vectors(vp_dev->pci_dev, nvectors, nvectors, + PCI_IRQ_MSIX); + if (err < 0) { + /* Fallback to one vector for config, one shared for queues. */ + shared = true; err = pci_alloc_irq_vectors(vp_dev->pci_dev, 2, 2, PCI_IRQ_MSIX); + if (err < 0) + return err; } if (err < 0) return err; @@ -190,7 +193,7 @@ static int vp_find_vqs_msix(struct virtio_device *vdev, unsigned nvqs, err = request_irq(pci_irq_vector(vp_dev->pci_dev, 0), vp_config_changed, 0, vp_dev->msix_names[0], vp_dev); if (err) - goto out_free_irq_vectors; + goto out_free_msix_affinity_masks; /* Verify we had enough resources to assign the vector */ if (vp_dev->config_vector(vp_dev, 0) == VIRTIO_MSI_NO_VECTOR) { @@ -240,7 +243,11 @@ static int vp_find_vqs_msix(struct virtio_device *vdev, unsigned nvqs, } vp_dev->msix_vector_map[i] = msix_vec; - if (per_vq_vectors) + /* + * Use a different vector for each queue if they are available, + * else share the same vector for all VQs. + */ + if (!shared) allocated_vectors++; } @@ -307,15 +314,9 @@ int vp_find_vqs(struct virtio_device *vdev, unsigned nvqs, { int err; - /* Try MSI-X with one vector per queue. */ - err = vp_find_vqs_msix(vdev, nvqs, vqs, callbacks, names, true); - if (!err) - return 0; - /* Fallback: MSI-X with one vector for config, one shared for queues. */ - err = vp_find_vqs_msix(vdev, nvqs, vqs, callbacks, names, false); + err = vp_find_vqs_msix(vdev, nvqs, vqs, callbacks, names); if (!err) return 0; - /* Finally fall back to regular interrupts. */ return vp_find_vqs_intx(vdev, nvqs, vqs, callbacks, names); } -- 2.11.0
Christoph Hellwig
2017-Feb-05 17:15 UTC
[PATCH 5/9] virtio: allow drivers to request IRQ affinity when creating VQs
Add a struct irq_affinity pointer to the find_vqs methods, which if set is used to tell the PCI layer to create the MSI-X vectors for our I/O virtqueues with the proper affinity from the start. Compared to after the fact affinity hints this gives us an instantly working setup and allows to allocate the irq descritors node-local and avoid interconnect traffic. Last but not least this will allow blk-mq queues are created based on the interrupt affinity for storage drivers. Signed-off-by: Christoph Hellwig <hch at lst.de> Reviewed-by: Jason Wang <jasowang at redhat.com> --- drivers/block/virtio_blk.c | 3 ++- drivers/char/virtio_console.c | 2 +- drivers/crypto/virtio/virtio_crypto_core.c | 2 +- drivers/gpu/drm/virtio/virtgpu_kms.c | 2 +- drivers/misc/mic/vop/vop_main.c | 2 +- drivers/net/caif/caif_virtio.c | 3 ++- drivers/net/virtio_net.c | 2 +- drivers/remoteproc/remoteproc_virtio.c | 3 ++- drivers/rpmsg/virtio_rpmsg_bus.c | 2 +- drivers/s390/virtio/kvm_virtio.c | 3 ++- drivers/s390/virtio/virtio_ccw.c | 3 ++- drivers/scsi/virtio_scsi.c | 3 ++- drivers/virtio/virtio_balloon.c | 3 ++- drivers/virtio/virtio_input.c | 3 ++- drivers/virtio/virtio_mmio.c | 3 ++- drivers/virtio/virtio_pci_common.c | 19 ++++++++++++------- drivers/virtio/virtio_pci_common.h | 5 ++--- drivers/virtio/virtio_pci_modern.c | 7 +++---- include/linux/virtio_config.h | 9 +++++---- net/vmw_vsock/virtio_transport.c | 3 ++- 20 files changed, 48 insertions(+), 34 deletions(-) diff --git a/drivers/block/virtio_blk.c b/drivers/block/virtio_blk.c index 10332c24f961..c54118bdc67d 100644 --- a/drivers/block/virtio_blk.c +++ b/drivers/block/virtio_blk.c @@ -411,7 +411,8 @@ static int init_vq(struct virtio_blk *vblk) } /* Discover virtqueues and write information to configuration. */ - err = vdev->config->find_vqs(vdev, num_vqs, vqs, callbacks, names); + err = vdev->config->find_vqs(vdev, num_vqs, vqs, callbacks, names, + NULL); if (err) goto out; diff --git a/drivers/char/virtio_console.c b/drivers/char/virtio_console.c index 17857beb4892..6266c0568e1d 100644 --- a/drivers/char/virtio_console.c +++ b/drivers/char/virtio_console.c @@ -1939,7 +1939,7 @@ static int init_vqs(struct ports_device *portdev) /* Find the queues. */ err = portdev->vdev->config->find_vqs(portdev->vdev, nr_queues, vqs, io_callbacks, - (const char **)io_names); + (const char **)io_names, NULL); if (err) goto free; diff --git a/drivers/crypto/virtio/virtio_crypto_core.c b/drivers/crypto/virtio/virtio_crypto_core.c index fe70ec823b27..0aa2f045543b 100644 --- a/drivers/crypto/virtio/virtio_crypto_core.c +++ b/drivers/crypto/virtio/virtio_crypto_core.c @@ -119,7 +119,7 @@ static int virtcrypto_find_vqs(struct virtio_crypto *vi) } ret = vi->vdev->config->find_vqs(vi->vdev, total_vqs, vqs, callbacks, - names); + names, NULL); if (ret) goto err_find; diff --git a/drivers/gpu/drm/virtio/virtgpu_kms.c b/drivers/gpu/drm/virtio/virtgpu_kms.c index 1235519853f4..e975fa5b0a32 100644 --- a/drivers/gpu/drm/virtio/virtgpu_kms.c +++ b/drivers/gpu/drm/virtio/virtgpu_kms.c @@ -172,7 +172,7 @@ int virtio_gpu_driver_load(struct drm_device *dev, unsigned long flags) vgdev->has_virgl_3d ? "enabled" : "not available"); ret = vgdev->vdev->config->find_vqs(vgdev->vdev, 2, vqs, - callbacks, names); + callbacks, names, NULL); if (ret) { DRM_ERROR("failed to find virt queues\n"); goto err_vqs; diff --git a/drivers/misc/mic/vop/vop_main.c b/drivers/misc/mic/vop/vop_main.c index 1a2b67f3183d..c2e29d7f0de8 100644 --- a/drivers/misc/mic/vop/vop_main.c +++ b/drivers/misc/mic/vop/vop_main.c @@ -374,7 +374,7 @@ static struct virtqueue *vop_find_vq(struct virtio_device *dev, static int vop_find_vqs(struct virtio_device *dev, unsigned nvqs, struct virtqueue *vqs[], vq_callback_t *callbacks[], - const char * const names[]) + const char * const names[], struct irq_affinity *desc) { struct _vop_vdev *vdev = to_vopvdev(dev); struct vop_device *vpdev = vdev->vpdev; diff --git a/drivers/net/caif/caif_virtio.c b/drivers/net/caif/caif_virtio.c index b306210b02b7..bc0eb47eccee 100644 --- a/drivers/net/caif/caif_virtio.c +++ b/drivers/net/caif/caif_virtio.c @@ -679,7 +679,8 @@ static int cfv_probe(struct virtio_device *vdev) goto err; /* Get the TX virtio ring. This is a "guest side vring". */ - err = vdev->config->find_vqs(vdev, 1, &cfv->vq_tx, &vq_cbs, &names); + err = vdev->config->find_vqs(vdev, 1, &cfv->vq_tx, &vq_cbs, &names, + NULL); if (err) goto err; diff --git a/drivers/net/virtio_net.c b/drivers/net/virtio_net.c index 765c2d6358da..9be74c2dfb22 100644 --- a/drivers/net/virtio_net.c +++ b/drivers/net/virtio_net.c @@ -2003,7 +2003,7 @@ static int virtnet_find_vqs(struct virtnet_info *vi) } ret = vi->vdev->config->find_vqs(vi->vdev, total_vqs, vqs, callbacks, - names); + names, NULL); if (ret) goto err_find; diff --git a/drivers/remoteproc/remoteproc_virtio.c b/drivers/remoteproc/remoteproc_virtio.c index 364411fb7734..0142cc3f0c91 100644 --- a/drivers/remoteproc/remoteproc_virtio.c +++ b/drivers/remoteproc/remoteproc_virtio.c @@ -137,7 +137,8 @@ static void rproc_virtio_del_vqs(struct virtio_device *vdev) static int rproc_virtio_find_vqs(struct virtio_device *vdev, unsigned int nvqs, struct virtqueue *vqs[], vq_callback_t *callbacks[], - const char * const names[]) + const char * const names[], + struct irq_affinity *desc) { int i, ret; diff --git a/drivers/rpmsg/virtio_rpmsg_bus.c b/drivers/rpmsg/virtio_rpmsg_bus.c index 3090b0d3072f..5e66e081027e 100644 --- a/drivers/rpmsg/virtio_rpmsg_bus.c +++ b/drivers/rpmsg/virtio_rpmsg_bus.c @@ -869,7 +869,7 @@ static int rpmsg_probe(struct virtio_device *vdev) init_waitqueue_head(&vrp->sendq); /* We expect two virtqueues, rx and tx (and in this order) */ - err = vdev->config->find_vqs(vdev, 2, vqs, vq_cbs, names); + err = vdev->config->find_vqs(vdev, 2, vqs, vq_cbs, names, NULL); if (err) goto free_vrp; diff --git a/drivers/s390/virtio/kvm_virtio.c b/drivers/s390/virtio/kvm_virtio.c index 5e5c11f37b24..2ce0b3eb2efe 100644 --- a/drivers/s390/virtio/kvm_virtio.c +++ b/drivers/s390/virtio/kvm_virtio.c @@ -255,7 +255,8 @@ static void kvm_del_vqs(struct virtio_device *vdev) static int kvm_find_vqs(struct virtio_device *vdev, unsigned nvqs, struct virtqueue *vqs[], vq_callback_t *callbacks[], - const char * const names[]) + const char * const names[], + struct irq_affinity *desc) { struct kvm_device *kdev = to_kvmdev(vdev); int i; diff --git a/drivers/s390/virtio/virtio_ccw.c b/drivers/s390/virtio/virtio_ccw.c index 070c4da95f48..304d3b3cbfd3 100644 --- a/drivers/s390/virtio/virtio_ccw.c +++ b/drivers/s390/virtio/virtio_ccw.c @@ -628,7 +628,8 @@ static int virtio_ccw_register_adapter_ind(struct virtio_ccw_device *vcdev, static int virtio_ccw_find_vqs(struct virtio_device *vdev, unsigned nvqs, struct virtqueue *vqs[], vq_callback_t *callbacks[], - const char * const names[]) + const char * const names[], + struct irq_affinity *desc) { struct virtio_ccw_device *vcdev = to_vc_device(vdev); unsigned long *indicatorp = NULL; diff --git a/drivers/scsi/virtio_scsi.c b/drivers/scsi/virtio_scsi.c index ec91bd07f00a..32a16293436b 100644 --- a/drivers/scsi/virtio_scsi.c +++ b/drivers/scsi/virtio_scsi.c @@ -932,7 +932,8 @@ static int virtscsi_init(struct virtio_device *vdev, } /* Discover virtqueues and write information to configuration. */ - err = vdev->config->find_vqs(vdev, num_vqs, vqs, callbacks, names); + err = vdev->config->find_vqs(vdev, num_vqs, vqs, callbacks, names, + NULL); if (err) goto out; diff --git a/drivers/virtio/virtio_balloon.c b/drivers/virtio/virtio_balloon.c index 181793f07852..36c9c8fcb7f8 100644 --- a/drivers/virtio/virtio_balloon.c +++ b/drivers/virtio/virtio_balloon.c @@ -413,7 +413,8 @@ static int init_vqs(struct virtio_balloon *vb) * optionally stat. */ nvqs = virtio_has_feature(vb->vdev, VIRTIO_BALLOON_F_STATS_VQ) ? 3 : 2; - err = vb->vdev->config->find_vqs(vb->vdev, nvqs, vqs, callbacks, names); + err = vb->vdev->config->find_vqs(vb->vdev, nvqs, vqs, callbacks, names, + NULL); if (err) return err; diff --git a/drivers/virtio/virtio_input.c b/drivers/virtio/virtio_input.c index 350a2a5a49db..79f1293cda93 100644 --- a/drivers/virtio/virtio_input.c +++ b/drivers/virtio/virtio_input.c @@ -173,7 +173,8 @@ static int virtinput_init_vqs(struct virtio_input *vi) static const char * const names[] = { "events", "status" }; int err; - err = vi->vdev->config->find_vqs(vi->vdev, 2, vqs, cbs, names); + err = vi->vdev->config->find_vqs(vi->vdev, 2, vqs, cbs, names, + NULL); if (err) return err; vi->evt = vqs[0]; diff --git a/drivers/virtio/virtio_mmio.c b/drivers/virtio/virtio_mmio.c index c71fde5fe835..ec8ccb21e9b7 100644 --- a/drivers/virtio/virtio_mmio.c +++ b/drivers/virtio/virtio_mmio.c @@ -446,7 +446,8 @@ static struct virtqueue *vm_setup_vq(struct virtio_device *vdev, unsigned index, static int vm_find_vqs(struct virtio_device *vdev, unsigned nvqs, struct virtqueue *vqs[], vq_callback_t *callbacks[], - const char * const names[]) + const char * const names[], + struct irq_affinity *desc) { struct virtio_mmio_device *vm_dev = to_virtio_mmio_device(vdev); unsigned int irq = platform_get_irq(vm_dev->pdev, 0); diff --git a/drivers/virtio/virtio_pci_common.c b/drivers/virtio/virtio_pci_common.c index 822f8e5dcee4..7902e920fc73 100644 --- a/drivers/virtio/virtio_pci_common.c +++ b/drivers/virtio/virtio_pci_common.c @@ -143,22 +143,28 @@ void vp_del_vqs(struct virtio_device *vdev) static int vp_find_vqs_msix(struct virtio_device *vdev, unsigned nvqs, struct virtqueue *vqs[], vq_callback_t *callbacks[], - const char * const names[]) + const char * const names[], struct irq_affinity *desc) { struct virtio_pci_device *vp_dev = to_vp_device(vdev); const char *name = dev_name(&vp_dev->vdev.dev); int i, err = -ENOMEM, allocated_vectors, nvectors; + unsigned flags = PCI_IRQ_MSIX; bool shared = false; u16 msix_vec; + if (desc) { + flags |= PCI_IRQ_AFFINITY; + desc->pre_vectors++; /* virtio config vector */ + } + nvectors = 1; for (i = 0; i < nvqs; i++) if (callbacks[i]) nvectors++; /* Try one vector per queue first. */ - err = pci_alloc_irq_vectors(vp_dev->pci_dev, nvectors, nvectors, - PCI_IRQ_MSIX); + err = pci_alloc_irq_vectors_affinity(vp_dev->pci_dev, nvectors, + nvectors, flags, desc); if (err < 0) { /* Fallback to one vector for config, one shared for queues. */ shared = true; @@ -308,13 +314,12 @@ static int vp_find_vqs_intx(struct virtio_device *vdev, unsigned nvqs, /* the config->find_vqs() implementation */ int vp_find_vqs(struct virtio_device *vdev, unsigned nvqs, - struct virtqueue *vqs[], - vq_callback_t *callbacks[], - const char * const names[]) + struct virtqueue *vqs[], vq_callback_t *callbacks[], + const char * const names[], struct irq_affinity *desc) { int err; - err = vp_find_vqs_msix(vdev, nvqs, vqs, callbacks, names); + err = vp_find_vqs_msix(vdev, nvqs, vqs, callbacks, names, desc); if (!err) return 0; return vp_find_vqs_intx(vdev, nvqs, vqs, callbacks, names); diff --git a/drivers/virtio/virtio_pci_common.h b/drivers/virtio/virtio_pci_common.h index 217ca876eed7..a6ad9ec6baef 100644 --- a/drivers/virtio/virtio_pci_common.h +++ b/drivers/virtio/virtio_pci_common.h @@ -97,9 +97,8 @@ bool vp_notify(struct virtqueue *vq); void vp_del_vqs(struct virtio_device *vdev); /* the config->find_vqs() implementation */ int vp_find_vqs(struct virtio_device *vdev, unsigned nvqs, - struct virtqueue *vqs[], - vq_callback_t *callbacks[], - const char * const names[]); + struct virtqueue *vqs[], vq_callback_t *callbacks[], + const char * const names[], struct irq_affinity *desc); const char *vp_bus_name(struct virtio_device *vdev); /* Setup the affinity for a virtqueue: diff --git a/drivers/virtio/virtio_pci_modern.c b/drivers/virtio/virtio_pci_modern.c index e5ce31091953..a7a0981e441c 100644 --- a/drivers/virtio/virtio_pci_modern.c +++ b/drivers/virtio/virtio_pci_modern.c @@ -384,13 +384,12 @@ static struct virtqueue *setup_vq(struct virtio_pci_device *vp_dev, } static int vp_modern_find_vqs(struct virtio_device *vdev, unsigned nvqs, - struct virtqueue *vqs[], - vq_callback_t *callbacks[], - const char * const names[]) + struct virtqueue *vqs[], vq_callback_t *callbacks[], + const char * const names[], struct irq_affinity *desc) { struct virtio_pci_device *vp_dev = to_vp_device(vdev); struct virtqueue *vq; - int rc = vp_find_vqs(vdev, nvqs, vqs, callbacks, names); + int rc = vp_find_vqs(vdev, nvqs, vqs, callbacks, names, desc); if (rc) return rc; diff --git a/include/linux/virtio_config.h b/include/linux/virtio_config.h index 26c155bb639b..2ebe506fe41a 100644 --- a/include/linux/virtio_config.h +++ b/include/linux/virtio_config.h @@ -7,6 +7,8 @@ #include <linux/virtio_byteorder.h> #include <uapi/linux/virtio_config.h> +struct irq_affinity; + /** * virtio_config_ops - operations for configuring a virtio device * @get: read the value of a configuration field @@ -68,9 +70,8 @@ struct virtio_config_ops { void (*set_status)(struct virtio_device *vdev, u8 status); void (*reset)(struct virtio_device *vdev); int (*find_vqs)(struct virtio_device *, unsigned nvqs, - struct virtqueue *vqs[], - vq_callback_t *callbacks[], - const char * const names[]); + struct virtqueue *vqs[], vq_callback_t *callbacks[], + const char * const names[], struct irq_affinity *desc); void (*del_vqs)(struct virtio_device *); u64 (*get_features)(struct virtio_device *vdev); int (*finalize_features)(struct virtio_device *vdev); @@ -169,7 +170,7 @@ struct virtqueue *virtio_find_single_vq(struct virtio_device *vdev, vq_callback_t *callbacks[] = { c }; const char *names[] = { n }; struct virtqueue *vq; - int err = vdev->config->find_vqs(vdev, 1, &vq, callbacks, names); + int err = vdev->config->find_vqs(vdev, 1, &vq, callbacks, names, NULL); if (err < 0) return ERR_PTR(err); return vq; diff --git a/net/vmw_vsock/virtio_transport.c b/net/vmw_vsock/virtio_transport.c index 6788264acc63..9d24c0e958b1 100644 --- a/net/vmw_vsock/virtio_transport.c +++ b/net/vmw_vsock/virtio_transport.c @@ -532,7 +532,8 @@ static int virtio_vsock_probe(struct virtio_device *vdev) vsock->vdev = vdev; ret = vsock->vdev->config->find_vqs(vsock->vdev, VSOCK_VQ_MAX, - vsock->vqs, callbacks, names); + vsock->vqs, callbacks, names, + NULL); if (ret < 0) goto out; -- 2.11.0
Christoph Hellwig
2017-Feb-05 17:15 UTC
[PATCH 6/9] virtio: provide a method to get the IRQ affinity mask for a virtqueue
This basically passed up the pci_irq_get_affinity information through virtio through an optional get_vq_affinity method. It is only implemented by the PCI backend for now, and only when we use per-virtqueue IRQs. Signed-off-by: Christoph Hellwig <hch at lst.de> Reviewed-by: Jason Wang <jasowang at redhat.com> --- drivers/virtio/virtio_pci_common.c | 11 +++++++++++ drivers/virtio/virtio_pci_common.h | 2 ++ drivers/virtio/virtio_pci_legacy.c | 1 + drivers/virtio/virtio_pci_modern.c | 2 ++ include/linux/virtio_config.h | 3 +++ 5 files changed, 19 insertions(+) diff --git a/drivers/virtio/virtio_pci_common.c b/drivers/virtio/virtio_pci_common.c index 7902e920fc73..df548a6fb844 100644 --- a/drivers/virtio/virtio_pci_common.c +++ b/drivers/virtio/virtio_pci_common.c @@ -361,6 +361,17 @@ int vp_set_vq_affinity(struct virtqueue *vq, int cpu) return 0; } +const struct cpumask *vp_get_vq_affinity(struct virtio_device *vdev, int index) +{ + struct virtio_pci_device *vp_dev = to_vp_device(vdev); + unsigned int *map = vp_dev->msix_vector_map; + + if (!map || map[index] == VIRTIO_MSI_NO_VECTOR) + return NULL; + + return pci_irq_get_affinity(vp_dev->pci_dev, map[index]); +} + #ifdef CONFIG_PM_SLEEP static int virtio_pci_freeze(struct device *dev) { diff --git a/drivers/virtio/virtio_pci_common.h b/drivers/virtio/virtio_pci_common.h index a6ad9ec6baef..ac8c9d788964 100644 --- a/drivers/virtio/virtio_pci_common.h +++ b/drivers/virtio/virtio_pci_common.h @@ -108,6 +108,8 @@ const char *vp_bus_name(struct virtio_device *vdev); */ int vp_set_vq_affinity(struct virtqueue *vq, int cpu); +const struct cpumask *vp_get_vq_affinity(struct virtio_device *vdev, int index); + #if IS_ENABLED(CONFIG_VIRTIO_PCI_LEGACY) int virtio_pci_legacy_probe(struct virtio_pci_device *); void virtio_pci_legacy_remove(struct virtio_pci_device *); diff --git a/drivers/virtio/virtio_pci_legacy.c b/drivers/virtio/virtio_pci_legacy.c index 2ab6aee51bf6..f7362c5fe18a 100644 --- a/drivers/virtio/virtio_pci_legacy.c +++ b/drivers/virtio/virtio_pci_legacy.c @@ -190,6 +190,7 @@ static const struct virtio_config_ops virtio_pci_config_ops = { .finalize_features = vp_finalize_features, .bus_name = vp_bus_name, .set_vq_affinity = vp_set_vq_affinity, + .get_vq_affinity = vp_get_vq_affinity, }; /* the PCI probing function */ diff --git a/drivers/virtio/virtio_pci_modern.c b/drivers/virtio/virtio_pci_modern.c index a7a0981e441c..7bc3004b840e 100644 --- a/drivers/virtio/virtio_pci_modern.c +++ b/drivers/virtio/virtio_pci_modern.c @@ -437,6 +437,7 @@ static const struct virtio_config_ops virtio_pci_config_nodev_ops = { .finalize_features = vp_finalize_features, .bus_name = vp_bus_name, .set_vq_affinity = vp_set_vq_affinity, + .get_vq_affinity = vp_get_vq_affinity, }; static const struct virtio_config_ops virtio_pci_config_ops = { @@ -452,6 +453,7 @@ static const struct virtio_config_ops virtio_pci_config_ops = { .finalize_features = vp_finalize_features, .bus_name = vp_bus_name, .set_vq_affinity = vp_set_vq_affinity, + .get_vq_affinity = vp_get_vq_affinity, }; /** diff --git a/include/linux/virtio_config.h b/include/linux/virtio_config.h index 2ebe506fe41a..8355bab175e1 100644 --- a/include/linux/virtio_config.h +++ b/include/linux/virtio_config.h @@ -58,6 +58,7 @@ struct irq_affinity; * This returns a pointer to the bus name a la pci_name from which * the caller can then copy. * @set_vq_affinity: set the affinity for a virtqueue. + * @get_vq_affinity: get the affinity for a virtqueue (optional). */ typedef void vq_callback_t(struct virtqueue *); struct virtio_config_ops { @@ -77,6 +78,8 @@ struct virtio_config_ops { int (*finalize_features)(struct virtio_device *vdev); const char *(*bus_name)(struct virtio_device *vdev); int (*set_vq_affinity)(struct virtqueue *vq, int cpu); + const struct cpumask *(*get_vq_affinity)(struct virtio_device *vdev, + int index); }; /* If driver didn't advertise the feature, it will never appear. */ -- 2.11.0
Christoph Hellwig
2017-Feb-05 17:15 UTC
[PATCH 7/9] blk-mq: provide a default queue mapping for virtio device
Similar to the PCI version, just calling into virtio instead. Signed-off-by: Christoph Hellwig <hch at lst.de> --- block/Kconfig | 5 ++++ block/Makefile | 1 + block/blk-mq-virtio.c | 54 +++++++++++++++++++++++++++++++++++++++++++ include/linux/blk-mq-virtio.h | 10 ++++++++ 4 files changed, 70 insertions(+) create mode 100644 block/blk-mq-virtio.c create mode 100644 include/linux/blk-mq-virtio.h diff --git a/block/Kconfig b/block/Kconfig index 8bf114a3858a..3523b4f0cd8b 100644 --- a/block/Kconfig +++ b/block/Kconfig @@ -165,4 +165,9 @@ config BLK_MQ_PCI depends on BLOCK && PCI default y +config BLK_MQ_VIRTIO + bool + depends on BLOCK && VIRTIO + default y + source block/Kconfig.iosched diff --git a/block/Makefile b/block/Makefile index a827f988c4e6..60691949d28d 100644 --- a/block/Makefile +++ b/block/Makefile @@ -23,5 +23,6 @@ obj-$(CONFIG_BLOCK_COMPAT) += compat_ioctl.o obj-$(CONFIG_BLK_CMDLINE_PARSER) += cmdline-parser.o obj-$(CONFIG_BLK_DEV_INTEGRITY) += bio-integrity.o blk-integrity.o t10-pi.o obj-$(CONFIG_BLK_MQ_PCI) += blk-mq-pci.o +obj-$(CONFIG_BLK_MQ_VIRTIO) += blk-mq-virtio.o obj-$(CONFIG_BLK_DEV_ZONED) += blk-zoned.o obj-$(CONFIG_BLK_WBT) += blk-wbt.o diff --git a/block/blk-mq-virtio.c b/block/blk-mq-virtio.c new file mode 100644 index 000000000000..c3afbca11299 --- /dev/null +++ b/block/blk-mq-virtio.c @@ -0,0 +1,54 @@ +/* + * Copyright (c) 2016 Christoph Hellwig. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms and conditions of the GNU General Public License, + * version 2, as published by the Free Software Foundation. + * + * This program is distributed in the hope it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + */ +#include <linux/device.h> +#include <linux/blk-mq.h> +#include <linux/blk-mq-virtio.h> +#include <linux/virtio_config.h> +#include <linux/module.h> +#include "blk-mq.h" + +/** + * blk_mq_virtio_map_queues - provide a default queue mapping for virtio device + * @set: tagset to provide the mapping for + * @vdev: virtio device associated with @set. + * @first_vec: first interrupt vectors to use for queues (usually 0) + * + * This function assumes the virtio device @vdev has at least as many available + * interrupt vetors as @set has queues. It will then queuery the vector + * corresponding to each queue for it's affinity mask and built queue mapping + * that maps a queue to the CPUs that have irq affinity for the corresponding + * vector. + */ +int blk_mq_virtio_map_queues(struct blk_mq_tag_set *set, + struct virtio_device *vdev, int first_vec) +{ + const struct cpumask *mask; + unsigned int queue, cpu; + + if (!vdev->config->get_vq_affinity) + goto fallback; + + for (queue = 0; queue < set->nr_hw_queues; queue++) { + mask = vdev->config->get_vq_affinity(vdev, first_vec + queue); + if (!mask) + goto fallback; + + for_each_cpu(cpu, mask) + set->mq_map[cpu] = queue; + } + + return 0; +fallback: + return blk_mq_map_queues(set); +} +EXPORT_SYMBOL_GPL(blk_mq_virtio_map_queues); diff --git a/include/linux/blk-mq-virtio.h b/include/linux/blk-mq-virtio.h new file mode 100644 index 000000000000..b1ef6e14744f --- /dev/null +++ b/include/linux/blk-mq-virtio.h @@ -0,0 +1,10 @@ +#ifndef _LINUX_BLK_MQ_VIRTIO_H +#define _LINUX_BLK_MQ_VIRTIO_H + +struct blk_mq_tag_set; +struct virtio_device; + +int blk_mq_virtio_map_queues(struct blk_mq_tag_set *set, + struct virtio_device *vdev, int first_vec); + +#endif /* _LINUX_BLK_MQ_VIRTIO_H */ -- 2.11.0
Use automatic IRQ affinity assignment in the virtio layer if available, and build the blk-mq queues based on it. Signed-off-by: Christoph Hellwig <hch at lst.de> --- drivers/block/virtio_blk.c | 13 ++++++++++++- 1 file changed, 12 insertions(+), 1 deletion(-) diff --git a/drivers/block/virtio_blk.c b/drivers/block/virtio_blk.c index c54118bdc67d..1028dfeb5a7f 100644 --- a/drivers/block/virtio_blk.c +++ b/drivers/block/virtio_blk.c @@ -5,6 +5,7 @@ #include <linux/hdreg.h> #include <linux/module.h> #include <linux/mutex.h> +#include <linux/interrupt.h> #include <linux/virtio.h> #include <linux/virtio_blk.h> #include <linux/scatterlist.h> @@ -12,6 +13,7 @@ #include <scsi/scsi_cmnd.h> #include <linux/idr.h> #include <linux/blk-mq.h> +#include <linux/blk-mq-virtio.h> #include <linux/numa.h> #define PART_BITS 4 @@ -385,6 +387,7 @@ static int init_vq(struct virtio_blk *vblk) struct virtqueue **vqs; unsigned short num_vqs; struct virtio_device *vdev = vblk->vdev; + struct irq_affinity desc = { 0, }; err = virtio_cread_feature(vdev, VIRTIO_BLK_F_MQ, struct virtio_blk_config, num_queues, @@ -412,7 +415,7 @@ static int init_vq(struct virtio_blk *vblk) /* Discover virtqueues and write information to configuration. */ err = vdev->config->find_vqs(vdev, num_vqs, vqs, callbacks, names, - NULL); + &desc); if (err) goto out; @@ -543,10 +546,18 @@ static int virtblk_init_request(void *data, struct request *rq, return 0; } +static int virtblk_map_queues(struct blk_mq_tag_set *set) +{ + struct virtio_blk *vblk = set->driver_data; + + return blk_mq_virtio_map_queues(set, vblk->vdev, 0); +} + static struct blk_mq_ops virtio_mq_ops = { .queue_rq = virtio_queue_rq, .complete = virtblk_request_done, .init_request = virtblk_init_request, + .map_queues = virtblk_map_queues, }; static unsigned int virtblk_queue_depth; -- 2.11.0
Use automatic IRQ affinity assignment in the virtio layer if available, and build the blk-mq queues based on it. Signed-off-by: Christoph Hellwig <hch at lst.de> --- drivers/scsi/virtio_scsi.c | 126 +++++---------------------------------------- include/linux/cpuhotplug.h | 1 - 2 files changed, 12 insertions(+), 115 deletions(-) diff --git a/drivers/scsi/virtio_scsi.c b/drivers/scsi/virtio_scsi.c index 32a16293436b..5582c8baeabb 100644 --- a/drivers/scsi/virtio_scsi.c +++ b/drivers/scsi/virtio_scsi.c @@ -18,6 +18,7 @@ #include <linux/module.h> #include <linux/slab.h> #include <linux/mempool.h> +#include <linux/interrupt.h> #include <linux/virtio.h> #include <linux/virtio_ids.h> #include <linux/virtio_config.h> @@ -29,6 +30,7 @@ #include <scsi/scsi_cmnd.h> #include <scsi/scsi_tcq.h> #include <linux/seqlock.h> +#include <linux/blk-mq-virtio.h> #define VIRTIO_SCSI_MEMPOOL_SZ 64 #define VIRTIO_SCSI_EVENT_LEN 8 @@ -108,7 +110,6 @@ struct virtio_scsi { bool affinity_hint_set; struct hlist_node node; - struct hlist_node node_dead; /* Protected by event_vq lock */ bool stop_events; @@ -118,7 +119,6 @@ struct virtio_scsi { struct virtio_scsi_vq req_vqs[]; }; -static enum cpuhp_state virtioscsi_online; static struct kmem_cache *virtscsi_cmd_cache; static mempool_t *virtscsi_cmd_pool; @@ -757,6 +757,13 @@ static void virtscsi_target_destroy(struct scsi_target *starget) kfree(tgt); } +static int virtscsi_map_queues(struct Scsi_Host *shost) +{ + struct virtio_scsi *vscsi = shost_priv(shost); + + return blk_mq_virtio_map_queues(&shost->tag_set, vscsi->vdev, 2); +} + static struct scsi_host_template virtscsi_host_template_single = { .module = THIS_MODULE, .name = "Virtio SCSI HBA", @@ -792,6 +799,7 @@ static struct scsi_host_template virtscsi_host_template_multi = { .use_clustering = ENABLE_CLUSTERING, .target_alloc = virtscsi_target_alloc, .target_destroy = virtscsi_target_destroy, + .map_queues = virtscsi_map_queues, .track_queue_depth = 1, }; @@ -808,80 +816,6 @@ static struct scsi_host_template virtscsi_host_template_multi = { virtio_cwrite(vdev, struct virtio_scsi_config, fld, &__val); \ } while(0) -static void __virtscsi_set_affinity(struct virtio_scsi *vscsi, bool affinity) -{ - int i; - int cpu; - - /* In multiqueue mode, when the number of cpu is equal - * to the number of request queues, we let the qeueues - * to be private to one cpu by setting the affinity hint - * to eliminate the contention. - */ - if ((vscsi->num_queues == 1 || - vscsi->num_queues != num_online_cpus()) && affinity) { - if (vscsi->affinity_hint_set) - affinity = false; - else - return; - } - - if (affinity) { - i = 0; - for_each_online_cpu(cpu) { - virtqueue_set_affinity(vscsi->req_vqs[i].vq, cpu); - i++; - } - - vscsi->affinity_hint_set = true; - } else { - for (i = 0; i < vscsi->num_queues; i++) { - if (!vscsi->req_vqs[i].vq) - continue; - - virtqueue_set_affinity(vscsi->req_vqs[i].vq, -1); - } - - vscsi->affinity_hint_set = false; - } -} - -static void virtscsi_set_affinity(struct virtio_scsi *vscsi, bool affinity) -{ - get_online_cpus(); - __virtscsi_set_affinity(vscsi, affinity); - put_online_cpus(); -} - -static int virtscsi_cpu_online(unsigned int cpu, struct hlist_node *node) -{ - struct virtio_scsi *vscsi = hlist_entry_safe(node, struct virtio_scsi, - node); - __virtscsi_set_affinity(vscsi, true); - return 0; -} - -static int virtscsi_cpu_notif_add(struct virtio_scsi *vi) -{ - int ret; - - ret = cpuhp_state_add_instance(virtioscsi_online, &vi->node); - if (ret) - return ret; - - ret = cpuhp_state_add_instance(CPUHP_VIRT_SCSI_DEAD, &vi->node_dead); - if (ret) - cpuhp_state_remove_instance(virtioscsi_online, &vi->node); - return ret; -} - -static void virtscsi_cpu_notif_remove(struct virtio_scsi *vi) -{ - cpuhp_state_remove_instance_nocalls(virtioscsi_online, &vi->node); - cpuhp_state_remove_instance_nocalls(CPUHP_VIRT_SCSI_DEAD, - &vi->node_dead); -} - static void virtscsi_init_vq(struct virtio_scsi_vq *virtscsi_vq, struct virtqueue *vq) { @@ -891,14 +825,8 @@ static void virtscsi_init_vq(struct virtio_scsi_vq *virtscsi_vq, static void virtscsi_remove_vqs(struct virtio_device *vdev) { - struct Scsi_Host *sh = virtio_scsi_host(vdev); - struct virtio_scsi *vscsi = shost_priv(sh); - - virtscsi_set_affinity(vscsi, false); - /* Stop all the virtqueues. */ vdev->config->reset(vdev); - vdev->config->del_vqs(vdev); } @@ -911,6 +839,7 @@ static int virtscsi_init(struct virtio_device *vdev, vq_callback_t **callbacks; const char **names; struct virtqueue **vqs; + struct irq_affinity desc = { .pre_vectors = 2 }; num_vqs = vscsi->num_queues + VIRTIO_SCSI_VQ_BASE; vqs = kmalloc(num_vqs * sizeof(struct virtqueue *), GFP_KERNEL); @@ -933,7 +862,7 @@ static int virtscsi_init(struct virtio_device *vdev, /* Discover virtqueues and write information to configuration. */ err = vdev->config->find_vqs(vdev, num_vqs, vqs, callbacks, names, - NULL); + &desc); if (err) goto out; @@ -999,10 +928,6 @@ static int virtscsi_probe(struct virtio_device *vdev) if (err) goto virtscsi_init_failed; - err = virtscsi_cpu_notif_add(vscsi); - if (err) - goto scsi_add_host_failed; - cmd_per_lun = virtscsi_config_get(vdev, cmd_per_lun) ?: 1; shost->cmd_per_lun = min_t(u32, cmd_per_lun, shost->can_queue); shost->max_sectors = virtscsi_config_get(vdev, max_sectors) ?: 0xFFFF; @@ -1057,9 +982,6 @@ static void virtscsi_remove(struct virtio_device *vdev) virtscsi_cancel_event_work(vscsi); scsi_remove_host(shost); - - virtscsi_cpu_notif_remove(vscsi); - virtscsi_remove_vqs(vdev); scsi_host_put(shost); } @@ -1067,10 +989,6 @@ static void virtscsi_remove(struct virtio_device *vdev) #ifdef CONFIG_PM_SLEEP static int virtscsi_freeze(struct virtio_device *vdev) { - struct Scsi_Host *sh = virtio_scsi_host(vdev); - struct virtio_scsi *vscsi = shost_priv(sh); - - virtscsi_cpu_notif_remove(vscsi); virtscsi_remove_vqs(vdev); return 0; } @@ -1085,11 +1003,6 @@ static int virtscsi_restore(struct virtio_device *vdev) if (err) return err; - err = virtscsi_cpu_notif_add(vscsi); - if (err) { - vdev->config->del_vqs(vdev); - return err; - } virtio_device_ready(vdev); if (virtio_has_feature(vdev, VIRTIO_SCSI_F_HOTPLUG)) @@ -1144,16 +1057,6 @@ static int __init init(void) pr_err("mempool_create() for virtscsi_cmd_pool failed\n"); goto error; } - ret = cpuhp_setup_state_multi(CPUHP_AP_ONLINE_DYN, - "scsi/virtio:online", - virtscsi_cpu_online, NULL); - if (ret < 0) - goto error; - virtioscsi_online = ret; - ret = cpuhp_setup_state_multi(CPUHP_VIRT_SCSI_DEAD, "scsi/virtio:dead", - NULL, virtscsi_cpu_online); - if (ret) - goto error; ret = register_virtio_driver(&virtio_scsi_driver); if (ret < 0) goto error; @@ -1169,17 +1072,12 @@ static int __init init(void) kmem_cache_destroy(virtscsi_cmd_cache); virtscsi_cmd_cache = NULL; } - if (virtioscsi_online) - cpuhp_remove_multi_state(virtioscsi_online); - cpuhp_remove_multi_state(CPUHP_VIRT_SCSI_DEAD); return ret; } static void __exit fini(void) { unregister_virtio_driver(&virtio_scsi_driver); - cpuhp_remove_multi_state(virtioscsi_online); - cpuhp_remove_multi_state(CPUHP_VIRT_SCSI_DEAD); mempool_destroy(virtscsi_cmd_pool); kmem_cache_destroy(virtscsi_cmd_cache); } diff --git a/include/linux/cpuhotplug.h b/include/linux/cpuhotplug.h index 921acaaa1601..01aea80a503e 100644 --- a/include/linux/cpuhotplug.h +++ b/include/linux/cpuhotplug.h @@ -26,7 +26,6 @@ enum cpuhp_state { CPUHP_ARM_OMAP_WAKE_DEAD, CPUHP_IRQ_POLL_DEAD, CPUHP_BLOCK_SOFTIRQ_DEAD, - CPUHP_VIRT_SCSI_DEAD, CPUHP_ACPI_CPUDRV_DEAD, CPUHP_S390_PFAULT_DEAD, CPUHP_BLK_MQ_DEAD, -- 2.11.0
On 2017?02?06? 01:15, Christoph Hellwig wrote:> We don't really need struct virtio_pci_vq_info, as most field in there > are redundant: > > - the vq backpointer is not strictly neede to start with > - the entry in the vqs list is not needed - the generic virtqueue already > has list, we only need to check if it has a callback to get the same > semantics > - we can use a simple array to look up the MSI-X vec if needed. > - That simple array now also duoble serves to replace the per_vq_vectors > flag > > Signed-off-by: Christoph Hellwig<hch at lst.de> > --- > drivers/virtio/virtio_pci_common.c | 117 +++++++++++-------------------------- > drivers/virtio/virtio_pci_common.h | 25 +------- > drivers/virtio/virtio_pci_legacy.c | 6 +- > drivers/virtio/virtio_pci_modern.c | 6 +- > 4 files changed, 39 insertions(+), 115 deletions(-) > > diff --git a/drivers/virtio/virtio_pci_common.c b/drivers/virtio/virtio_pci_common.c > index 186cbab327b8..a33767318cbf 100644 > --- a/drivers/virtio/virtio_pci_common.c > +++ b/drivers/virtio/virtio_pci_common.c > @@ -62,16 +62,13 @@ static irqreturn_t vp_config_changed(int irq, void *opaque) > static irqreturn_t vp_vring_interrupt(int irq, void *opaque) > { > struct virtio_pci_device *vp_dev = opaque; > - struct virtio_pci_vq_info *info; > irqreturn_t ret = IRQ_NONE; > - unsigned long flags; > + struct virtqueue *vq; > > - spin_lock_irqsave(&vp_dev->lock, flags); > - list_for_each_entry(info, &vp_dev->virtqueues, node) { > - if (vring_interrupt(irq, info->vq) == IRQ_HANDLED) > + list_for_each_entry(vq, &vp_dev->vdev.vqs, list) { > + if (vq->callback && vring_interrupt(irq, vq) == IRQ_HANDLED) > ret = IRQ_HANDLED; > }The check is still there. Thanks
Jason Wang
2017-Feb-07 07:17 UTC
[PATCH 2/9] virtio_pci: use shared interrupts for virtqueues
On 2017?02?06? 01:15, Christoph Hellwig wrote:> This lets IRQ layer handle dispatching IRQs to separate handlers for the > case where we don't have per-VQ MSI-X vectors, and allows us to greatly > simplify the code based on the assumption that we always have interrupt > vector 0 (legacy INTx or config interrupt for MSI-X) available, and > any other interrupt is request/freed throught the VQ, even if the > actual interrupt line might be shared in some cases. > > This allows removing a great deal of variables keeping track of the > interrupt state in struct virtio_pci_device, as we can now simply walk the > list of VQs and deal with per-VQ interrupt handlers there, and only treat > vector 0 special. > > Additionally clean up the VQ allocation code to properly unwind on error > instead of having a single global cleanup label, which is error prone, > and in this case also leads to more code. > > Signed-off-by: Christoph Hellwig <hch at lst.de>Reviewed-by: Jason Wang <jasowang at redhat.com>> --- > drivers/virtio/virtio_pci_common.c | 235 ++++++++++++++++--------------------- > drivers/virtio/virtio_pci_common.h | 16 +-- > 2 files changed, 106 insertions(+), 145 deletions(-) > > diff --git a/drivers/virtio/virtio_pci_common.c b/drivers/virtio/virtio_pci_common.c > index a33767318cbf..274dc1ff09c0 100644 > --- a/drivers/virtio/virtio_pci_common.c > +++ b/drivers/virtio/virtio_pci_common.c > @@ -33,10 +33,8 @@ void vp_synchronize_vectors(struct virtio_device *vdev) > struct virtio_pci_device *vp_dev = to_vp_device(vdev); > int i; > > - if (vp_dev->intx_enabled) > - synchronize_irq(vp_dev->pci_dev->irq); > - > - for (i = 0; i < vp_dev->msix_vectors; ++i) > + synchronize_irq(pci_irq_vector(vp_dev->pci_dev, 0)); > + for (i = 1; i < vp_dev->msix_vectors; i++) > synchronize_irq(pci_irq_vector(vp_dev->pci_dev, i)); > } > > @@ -99,77 +97,10 @@ static irqreturn_t vp_interrupt(int irq, void *opaque) > return vp_vring_interrupt(irq, opaque); > } > > -static int vp_request_msix_vectors(struct virtio_device *vdev, int nvectors, > - bool per_vq_vectors) > -{ > - struct virtio_pci_device *vp_dev = to_vp_device(vdev); > - const char *name = dev_name(&vp_dev->vdev.dev); > - unsigned i, v; > - int err = -ENOMEM; > - > - vp_dev->msix_vectors = nvectors; > - > - vp_dev->msix_names = kmalloc(nvectors * sizeof *vp_dev->msix_names, > - GFP_KERNEL); > - if (!vp_dev->msix_names) > - goto error; > - vp_dev->msix_affinity_masks > - = kzalloc(nvectors * sizeof *vp_dev->msix_affinity_masks, > - GFP_KERNEL); > - if (!vp_dev->msix_affinity_masks) > - goto error; > - for (i = 0; i < nvectors; ++i) > - if (!alloc_cpumask_var(&vp_dev->msix_affinity_masks[i], > - GFP_KERNEL)) > - goto error; > - > - err = pci_alloc_irq_vectors(vp_dev->pci_dev, nvectors, nvectors, > - PCI_IRQ_MSIX); > - if (err < 0) > - goto error; > - vp_dev->msix_enabled = 1; > - > - /* Set the vector used for configuration */ > - v = vp_dev->msix_used_vectors; > - snprintf(vp_dev->msix_names[v], sizeof *vp_dev->msix_names, > - "%s-config", name); > - err = request_irq(pci_irq_vector(vp_dev->pci_dev, v), > - vp_config_changed, 0, vp_dev->msix_names[v], > - vp_dev); > - if (err) > - goto error; > - ++vp_dev->msix_used_vectors; > - > - v = vp_dev->config_vector(vp_dev, v); > - /* Verify we had enough resources to assign the vector */ > - if (v == VIRTIO_MSI_NO_VECTOR) { > - err = -EBUSY; > - goto error; > - } > - > - if (!per_vq_vectors) { > - /* Shared vector for all VQs */ > - v = vp_dev->msix_used_vectors; > - snprintf(vp_dev->msix_names[v], sizeof *vp_dev->msix_names, > - "%s-virtqueues", name); > - err = request_irq(pci_irq_vector(vp_dev->pci_dev, v), > - vp_vring_interrupt, 0, vp_dev->msix_names[v], > - vp_dev); > - if (err) > - goto error; > - ++vp_dev->msix_used_vectors; > - } > - return 0; > -error: > - return err; > -} > - > -/* the config->del_vqs() implementation */ > -void vp_del_vqs(struct virtio_device *vdev) > +static void vp_remove_vqs(struct virtio_device *vdev) > { > struct virtio_pci_device *vp_dev = to_vp_device(vdev); > struct virtqueue *vq, *n; > - int i; > > list_for_each_entry_safe(vq, n, &vdev->vqs, list) { > if (vp_dev->msix_vector_map) { > @@ -181,35 +112,33 @@ void vp_del_vqs(struct virtio_device *vdev) > } > vp_dev->del_vq(vq); > } > +} > > - if (vp_dev->intx_enabled) { > - free_irq(vp_dev->pci_dev->irq, vp_dev); > - vp_dev->intx_enabled = 0; > - } > +/* the config->del_vqs() implementation */ > +void vp_del_vqs(struct virtio_device *vdev) > +{ > + struct virtio_pci_device *vp_dev = to_vp_device(vdev); > + int i; > > - for (i = 0; i < vp_dev->msix_used_vectors; ++i) > - free_irq(pci_irq_vector(vp_dev->pci_dev, i), vp_dev); > + if (WARN_ON_ONCE(list_empty_careful(&vdev->vqs))) > + return; > > - for (i = 0; i < vp_dev->msix_vectors; i++) > - if (vp_dev->msix_affinity_masks[i]) > - free_cpumask_var(vp_dev->msix_affinity_masks[i]); > + vp_remove_vqs(vdev); > > if (vp_dev->msix_enabled) { > + for (i = 0; i < vp_dev->msix_vectors; i++) > + free_cpumask_var(vp_dev->msix_affinity_masks[i]); > + > /* Disable the vector used for configuration */ > vp_dev->config_vector(vp_dev, VIRTIO_MSI_NO_VECTOR); > > - pci_free_irq_vectors(vp_dev->pci_dev); > - vp_dev->msix_enabled = 0; > + kfree(vp_dev->msix_affinity_masks); > + kfree(vp_dev->msix_names); > + kfree(vp_dev->msix_vector_map); > } > > - vp_dev->msix_vectors = 0; > - vp_dev->msix_used_vectors = 0; > - kfree(vp_dev->msix_names); > - vp_dev->msix_names = NULL; > - kfree(vp_dev->msix_affinity_masks); > - vp_dev->msix_affinity_masks = NULL; > - kfree(vp_dev->msix_vector_map); > - vp_dev->msix_vector_map = NULL; > + free_irq(pci_irq_vector(vp_dev->pci_dev, 0), vp_dev); > + pci_free_irq_vectors(vp_dev->pci_dev); > } > > static int vp_find_vqs_msix(struct virtio_device *vdev, unsigned nvqs, > @@ -219,79 +148,122 @@ static int vp_find_vqs_msix(struct virtio_device *vdev, unsigned nvqs, > bool per_vq_vectors) > { > struct virtio_pci_device *vp_dev = to_vp_device(vdev); > + const char *name = dev_name(&vp_dev->vdev.dev); > + int i, err = -ENOMEM, allocated_vectors, nvectors; > u16 msix_vec; > - int i, err, nvectors, allocated_vectors; > + > + nvectors = 1; > + for (i = 0; i < nvqs; i++) > + if (callbacks[i]) > + nvectors++; > > if (per_vq_vectors) { > - /* Best option: one for change interrupt, one per vq. */ > - nvectors = 1; > - for (i = 0; i < nvqs; ++i) > - if (callbacks[i]) > - ++nvectors; > + err = pci_alloc_irq_vectors(vp_dev->pci_dev, nvectors, nvectors, > + PCI_IRQ_MSIX); > } else { > - /* Second best: one for change, shared for all vqs. */ > - nvectors = 2; > + err = pci_alloc_irq_vectors(vp_dev->pci_dev, 2, 2, > + PCI_IRQ_MSIX); > } > + if (err < 0) > + return err; > > - err = vp_request_msix_vectors(vdev, nvectors, per_vq_vectors); > + vp_dev->msix_vectors = nvectors; > + vp_dev->msix_names = kmalloc_array(nvectors, > + sizeof(*vp_dev->msix_names), GFP_KERNEL); > + if (!vp_dev->msix_names) > + goto out_free_irq_vectors; > + > + vp_dev->msix_affinity_masks = kcalloc(nvectors, > + sizeof(*vp_dev->msix_affinity_masks), GFP_KERNEL); > + if (!vp_dev->msix_affinity_masks) > + goto out_free_msix_names; > + > + for (i = 0; i < nvectors; ++i) { > + if (!alloc_cpumask_var(&vp_dev->msix_affinity_masks[i], > + GFP_KERNEL)) > + goto out_free_msix_affinity_masks; > + } > + > + /* Set the vector used for configuration */ > + snprintf(vp_dev->msix_names[0], sizeof(*vp_dev->msix_names), > + "%s-config", name); > + err = request_irq(pci_irq_vector(vp_dev->pci_dev, 0), vp_config_changed, > + 0, vp_dev->msix_names[0], vp_dev); > if (err) > - goto error_find; > + goto out_free_irq_vectors; > > - if (per_vq_vectors) { > - vp_dev->msix_vector_map = kmalloc_array(nvqs, > - sizeof(*vp_dev->msix_vector_map), GFP_KERNEL); > - if (!vp_dev->msix_vector_map) > - goto error_find; > + /* Verify we had enough resources to assign the vector */ > + if (vp_dev->config_vector(vp_dev, 0) == VIRTIO_MSI_NO_VECTOR) { > + err = -EBUSY; > + goto out_free_config_irq; > } > > - allocated_vectors = vp_dev->msix_used_vectors; > + vp_dev->msix_vector_map = kmalloc_array(nvqs, > + sizeof(*vp_dev->msix_vector_map), GFP_KERNEL); > + if (!vp_dev->msix_vector_map) > + goto out_disable_config_irq; > + > + allocated_vectors = 1; /* vector 0 is the config interrupt */ > for (i = 0; i < nvqs; ++i) { > if (!names[i]) { > vqs[i] = NULL; > continue; > } > > - if (!callbacks[i]) > - msix_vec = VIRTIO_MSI_NO_VECTOR; > - else if (per_vq_vectors) > - msix_vec = allocated_vectors++; > + if (callbacks[i]) > + msix_vec = allocated_vectors; > else > - msix_vec = VP_MSIX_VQ_VECTOR; > + msix_vec = VIRTIO_MSI_NO_VECTOR; > + > vqs[i] = vp_dev->setup_vq(vp_dev, i, callbacks[i], names[i], > msix_vec); > if (IS_ERR(vqs[i])) { > err = PTR_ERR(vqs[i]); > - goto error_find; > + goto out_remove_vqs; > } > > - if (!per_vq_vectors) > - continue; > - > if (msix_vec == VIRTIO_MSI_NO_VECTOR) { > vp_dev->msix_vector_map[i] = VIRTIO_MSI_NO_VECTOR; > continue; > } > > - /* allocate per-vq irq if available and necessary */ > - snprintf(vp_dev->msix_names[msix_vec], > - sizeof *vp_dev->msix_names, > - "%s-%s", > + snprintf(vp_dev->msix_names[i + 1], > + sizeof(*vp_dev->msix_names), "%s-%s", > dev_name(&vp_dev->vdev.dev), names[i]); > err = request_irq(pci_irq_vector(vp_dev->pci_dev, msix_vec), > - vring_interrupt, 0, > - vp_dev->msix_names[msix_vec], > - vqs[i]); > + vring_interrupt, IRQF_SHARED, > + vp_dev->msix_names[i + 1], vqs[i]); > if (err) { > /* don't free this irq on error */ > vp_dev->msix_vector_map[i] = VIRTIO_MSI_NO_VECTOR; > - goto error_find; > + goto out_remove_vqs; > } > vp_dev->msix_vector_map[i] = msix_vec; > + > + if (per_vq_vectors) > + allocated_vectors++; > } > + > + vp_dev->msix_enabled = 1; > return 0; > > -error_find: > - vp_del_vqs(vdev); > +out_remove_vqs: > + vp_remove_vqs(vdev); > + kfree(vp_dev->msix_vector_map); > +out_disable_config_irq: > + vp_dev->config_vector(vp_dev, VIRTIO_MSI_NO_VECTOR); > +out_free_config_irq: > + free_irq(pci_irq_vector(vp_dev->pci_dev, 0), vp_dev); > +out_free_msix_affinity_masks: > + for (i = 0; i < nvectors; i++) { > + if (vp_dev->msix_affinity_masks[i]) > + free_cpumask_var(vp_dev->msix_affinity_masks[i]); > + } > + kfree(vp_dev->msix_affinity_masks); > +out_free_msix_names: > + kfree(vp_dev->msix_names); > +out_free_irq_vectors: > + pci_free_irq_vectors(vp_dev->pci_dev); > return err; > } > > @@ -305,9 +277,8 @@ static int vp_find_vqs_intx(struct virtio_device *vdev, unsigned nvqs, > err = request_irq(vp_dev->pci_dev->irq, vp_interrupt, IRQF_SHARED, > dev_name(&vdev->dev), vp_dev); > if (err) > - goto out_del_vqs; > + return err; > > - vp_dev->intx_enabled = 1; > for (i = 0; i < nvqs; ++i) { > if (!names[i]) { > vqs[i] = NULL; > @@ -317,13 +288,15 @@ static int vp_find_vqs_intx(struct virtio_device *vdev, unsigned nvqs, > VIRTIO_MSI_NO_VECTOR); > if (IS_ERR(vqs[i])) { > err = PTR_ERR(vqs[i]); > - goto out_del_vqs; > + goto out_remove_vqs; > } > } > > return 0; > -out_del_vqs: > - vp_del_vqs(vdev); > + > +out_remove_vqs: > + vp_remove_vqs(vdev); > + free_irq(pci_irq_vector(vp_dev->pci_dev, 0), vp_dev); > return err; > } > > diff --git a/drivers/virtio/virtio_pci_common.h b/drivers/virtio/virtio_pci_common.h > index 2038887bdf23..85593867e712 100644 > --- a/drivers/virtio/virtio_pci_common.h > +++ b/drivers/virtio/virtio_pci_common.h > @@ -66,16 +66,12 @@ struct virtio_pci_device { > > /* MSI-X support */ > int msix_enabled; > - int intx_enabled; > cpumask_var_t *msix_affinity_masks; > /* Name strings for interrupts. This size should be enough, > * and I'm too lazy to allocate each name separately. */ > char (*msix_names)[256]; > - /* Number of available vectors */ > - unsigned msix_vectors; > - /* Vectors allocated, excluding per-vq vectors if any */ > - unsigned msix_used_vectors; > - > + /* Total Number of MSI-X vectors (including per-VQ ones). */ > + int msix_vectors; > /* Map of per-VQ MSI-X vectors, may be NULL */ > unsigned *msix_vector_map; > > @@ -89,14 +85,6 @@ struct virtio_pci_device { > u16 (*config_vector)(struct virtio_pci_device *vp_dev, u16 vector); > }; > > -/* Constants for MSI-X */ > -/* Use first vector for configuration changes, second and the rest for > - * virtqueues Thus, we need at least 2 vectors for MSI. */ > -enum { > - VP_MSIX_CONFIG_VECTOR = 0, > - VP_MSIX_VQ_VECTOR = 1, > -}; > - > /* Convert a generic virtio device to our structure */ > static struct virtio_pci_device *to_vp_device(struct virtio_device *vdev) > {
On Sun, Feb 05, 2017 at 06:15:17PM +0100, Christoph Hellwig wrote:> Hi Michael, hi Jason, > > This patches applies a few cleanups to the virtio PCI interrupt handling > code, and then converts the virtio PCI code to use the automatic MSI-X > vectors spreading, as well as using the information in virtio-blk > and virtio-scsi to automatically align the blk-mq queues to the MSI-X > vectors.Any chance to get this in for 4.11 after I got reviews from Jason for most of the patches?
On Thu, Feb 09, 2017 at 05:50:31AM -0800, Christoph Hellwig wrote:> On Sun, Feb 05, 2017 at 06:15:17PM +0100, Christoph Hellwig wrote: > > Hi Michael, hi Jason, > > > > This patches applies a few cleanups to the virtio PCI interrupt handling > > code, and then converts the virtio PCI code to use the automatic MSI-X > > vectors spreading, as well as using the information in virtio-blk > > and virtio-scsi to automatically align the blk-mq queues to the MSI-X > > vectors. > > Any chance to get this in for 4.11 after I got reviews from Jason > for most of the patches?Absolutely, I intend to merge it. -- MST
Possibly Parallel Threads
- automatic IRQ affinity for virtio V3
- automatic IRQ affinity for virtio V3
- 4.14: WARNING: CPU: 4 PID: 2895 at block/blk-mq.c:1144 with virtio-blk (also 4.12 stable)
- 4.14: WARNING: CPU: 4 PID: 2895 at block/blk-mq.c:1144 with virtio-blk (also 4.12 stable)
- automatic IRQ affinity for virtio V2