Andy Lutomirski
2014-Aug-26 21:16 UTC
[PATCH 0/3] virtio: Clean up scatterlists and use the DMA API
This fixes virtio on Xen guests as well as on any other platform on which physical addresses don't match bus addresses. This can be tested with: virtme-run --xen xen --kimg arch/x86/boot/bzImage --console using virtme from here: https://git.kernel.org/cgit/utils/kernel/virtme/virtme.git Without these patches, the guest hangs forever. With these patches, everything works. There are two outstanding issues. virtio_net warns if DMA debugging is on because it does DMA from the stack. (The warning is correct.) This also is likely to do something unpleasant to s390. (Maintainers are cc'd -- I don't know what to do about it.) Andy Lutomirski (3): virtio_ring: Remove sg_next indirection virtio_ring: Use DMA APIs virtio_pci: Use the DMA API for virtqueues drivers/virtio/Kconfig | 1 + drivers/virtio/virtio_pci.c | 25 ++++++-- drivers/virtio/virtio_ring.c | 150 ++++++++++++++++++++++++++++++------------- 3 files changed, 125 insertions(+), 51 deletions(-) -- 1.9.3
The only unusual thing about virtio's use of scatterlists is that two of the APIs accept scatterlists that might not be terminated. Using function pointers to handle this case is overkill; for_each_sg can do it. There's a small subtlely here: for_each_sg assumes that the provided count is correct, but, because of the way that virtio_ring handles multiple scatterlists at once, the count is only an upper bound if there is more than one scatterlist. This is easily solved by checking the sg pointer for NULL on each iteration. Signed-off-by: Andy Lutomirski <luto at amacapital.net> --- drivers/virtio/virtio_ring.c | 46 +++++++++++++++++--------------------------- 1 file changed, 18 insertions(+), 28 deletions(-) diff --git a/drivers/virtio/virtio_ring.c b/drivers/virtio/virtio_ring.c index 4d08f45a9c29..d356a701c9c2 100644 --- a/drivers/virtio/virtio_ring.c +++ b/drivers/virtio/virtio_ring.c @@ -99,25 +99,9 @@ struct vring_virtqueue #define to_vvq(_vq) container_of(_vq, struct vring_virtqueue, vq) -static inline struct scatterlist *sg_next_chained(struct scatterlist *sg, - unsigned int *count) -{ - return sg_next(sg); -} - -static inline struct scatterlist *sg_next_arr(struct scatterlist *sg, - unsigned int *count) -{ - if (--(*count) == 0) - return NULL; - return sg + 1; -} - /* Set up an indirect table of descriptors and add it to the queue. */ static inline int vring_add_indirect(struct vring_virtqueue *vq, struct scatterlist *sgs[], - struct scatterlist *(*next) - (struct scatterlist *, unsigned int *), unsigned int total_sg, unsigned int total_out, unsigned int total_in, @@ -128,7 +112,7 @@ static inline int vring_add_indirect(struct vring_virtqueue *vq, struct vring_desc *desc; unsigned head; struct scatterlist *sg; - int i, n; + int i, j, n; /* * We require lowmem mappings for the descriptors because @@ -144,7 +128,9 @@ static inline int vring_add_indirect(struct vring_virtqueue *vq, /* Transfer entries from the sg lists into the indirect page */ i = 0; for (n = 0; n < out_sgs; n++) { - for (sg = sgs[n]; sg; sg = next(sg, &total_out)) { + for_each_sg(sgs[n], sg, total_out, j) { + if (!sg) + break; desc[i].flags = VRING_DESC_F_NEXT; desc[i].addr = sg_phys(sg); desc[i].len = sg->length; @@ -153,7 +139,9 @@ static inline int vring_add_indirect(struct vring_virtqueue *vq, } } for (; n < (out_sgs + in_sgs); n++) { - for (sg = sgs[n]; sg; sg = next(sg, &total_in)) { + for_each_sg(sgs[n], sg, total_in, j) { + if (!sg) + break; desc[i].flags = VRING_DESC_F_NEXT|VRING_DESC_F_WRITE; desc[i].addr = sg_phys(sg); desc[i].len = sg->length; @@ -186,8 +174,6 @@ static inline int vring_add_indirect(struct vring_virtqueue *vq, static inline int virtqueue_add(struct virtqueue *_vq, struct scatterlist *sgs[], - struct scatterlist *(*next) - (struct scatterlist *, unsigned int *), unsigned int total_out, unsigned int total_in, unsigned int out_sgs, @@ -197,7 +183,7 @@ static inline int virtqueue_add(struct virtqueue *_vq, { struct vring_virtqueue *vq = to_vvq(_vq); struct scatterlist *sg; - unsigned int i, n, avail, uninitialized_var(prev), total_sg; + unsigned int i, j, n, avail, uninitialized_var(prev), total_sg; int head; START_USE(vq); @@ -227,7 +213,7 @@ static inline int virtqueue_add(struct virtqueue *_vq, /* If the host supports indirect descriptor tables, and we have multiple * buffers, then go indirect. FIXME: tune this threshold */ if (vq->indirect && total_sg > 1 && vq->vq.num_free) { - head = vring_add_indirect(vq, sgs, next, total_sg, total_out, + head = vring_add_indirect(vq, sgs, total_sg, total_out, total_in, out_sgs, in_sgs, gfp); if (likely(head >= 0)) @@ -254,7 +240,9 @@ static inline int virtqueue_add(struct virtqueue *_vq, head = i = vq->free_head; for (n = 0; n < out_sgs; n++) { - for (sg = sgs[n]; sg; sg = next(sg, &total_out)) { + for_each_sg(sgs[n], sg, total_out, j) { + if (!sg) + break; vq->vring.desc[i].flags = VRING_DESC_F_NEXT; vq->vring.desc[i].addr = sg_phys(sg); vq->vring.desc[i].len = sg->length; @@ -263,7 +251,9 @@ static inline int virtqueue_add(struct virtqueue *_vq, } } for (; n < (out_sgs + in_sgs); n++) { - for (sg = sgs[n]; sg; sg = next(sg, &total_in)) { + for_each_sg(sgs[n], sg, total_in, j) { + if (!sg) + break; vq->vring.desc[i].flags = VRING_DESC_F_NEXT|VRING_DESC_F_WRITE; vq->vring.desc[i].addr = sg_phys(sg); vq->vring.desc[i].len = sg->length; @@ -337,7 +327,7 @@ int virtqueue_add_sgs(struct virtqueue *_vq, for (sg = sgs[i]; sg; sg = sg_next(sg)) total_in++; } - return virtqueue_add(_vq, sgs, sg_next_chained, + return virtqueue_add(_vq, sgs, total_out, total_in, out_sgs, in_sgs, data, gfp); } EXPORT_SYMBOL_GPL(virtqueue_add_sgs); @@ -360,7 +350,7 @@ int virtqueue_add_outbuf(struct virtqueue *vq, void *data, gfp_t gfp) { - return virtqueue_add(vq, &sg, sg_next_arr, num, 0, 1, 0, data, gfp); + return virtqueue_add(vq, &sg, num, 0, 1, 0, data, gfp); } EXPORT_SYMBOL_GPL(virtqueue_add_outbuf); @@ -382,7 +372,7 @@ int virtqueue_add_inbuf(struct virtqueue *vq, void *data, gfp_t gfp) { - return virtqueue_add(vq, &sg, sg_next_arr, 0, num, 0, 1, data, gfp); + return virtqueue_add(vq, &sg, 0, num, 0, 1, data, gfp); } EXPORT_SYMBOL_GPL(virtqueue_add_inbuf); -- 1.9.3
virtio_ring currently sends the device (usually a hypervisor) physical addresses of its I/O buffers. This is okay when DMA addresses and physical addresses are the same thing, but this isn't always the case. For example, this never works on Xen guests, and it is likely to fail if a physical "virtio" device ever ends up behind an IOMMU or swiotlb. The immediate use case for me is to enable virtio on Xen guests. For that to work, we need this fix as well as a corresponding fix to virtio_pci or to another driver. With this patch, virtfs survives kmemleak and CONFIG_DMA_API_DEBUG. virtio-net warns (correctly) about DMA from the stack in virtnet_set_rx_mode. This breaks s390's defconfig. The default configuration for s390 does virtio through a KVM-specific interface, but that configuration does not support DMA. I could modify this patch to stub out the DMA API calls if !CONFIG_HAS_DMA, but it seems to me that it would be much nicer to make s390 support DMA unconditionally. It's actually unclear to me whether this can be fixed without either s390 arch support or a special case for s390 in virtio_ring: on brief inspection of s390's DMA code, it seems to assume that everything goes through a PCI IOMMU, which is presumably not the case for virtio. Cc: Cornelia Huck <cornelia.huck at de.ibm.com> Signed-off-by: Andy Lutomirski <luto at amacapital.net> --- drivers/virtio/Kconfig | 1 + drivers/virtio/virtio_ring.c | 116 +++++++++++++++++++++++++++++++++++-------- 2 files changed, 95 insertions(+), 22 deletions(-) diff --git a/drivers/virtio/Kconfig b/drivers/virtio/Kconfig index c6683f2e396c..b6f3acc61153 100644 --- a/drivers/virtio/Kconfig +++ b/drivers/virtio/Kconfig @@ -1,5 +1,6 @@ config VIRTIO tristate + depends on HAS_DMA ---help--- This option is selected by any driver which implements the virtio bus, such as CONFIG_VIRTIO_PCI, CONFIG_VIRTIO_MMIO, CONFIG_LGUEST, diff --git a/drivers/virtio/virtio_ring.c b/drivers/virtio/virtio_ring.c index d356a701c9c2..6a78e2fd8e63 100644 --- a/drivers/virtio/virtio_ring.c +++ b/drivers/virtio/virtio_ring.c @@ -24,6 +24,7 @@ #include <linux/module.h> #include <linux/hrtimer.h> #include <linux/kmemleak.h> +#include <linux/dma-mapping.h> #ifdef DEBUG /* For development, we want to crash whenever the ring is screwed. */ @@ -54,6 +55,12 @@ #define END_USE(vq) #endif +struct vring_desc_state +{ + void *data; /* Data for callback. */ + struct vring_desc *indir_desc; /* Indirect descriptor, if any. */ +}; + struct vring_virtqueue { struct virtqueue vq; @@ -93,12 +100,45 @@ struct vring_virtqueue ktime_t last_add_time; #endif - /* Tokens for callbacks. */ - void *data[]; + /* Per-descriptor state. */ + struct vring_desc_state desc_state[]; }; #define to_vvq(_vq) container_of(_vq, struct vring_virtqueue, vq) +/* Helper to map one sg entry, since we can't use dma_map_sg. */ +static dma_addr_t dma_map_one_sg(struct vring_virtqueue *vq, + struct scatterlist *sg, + enum dma_data_direction direction) +{ + return dma_map_page(vq->vq.vdev->dev.parent, + sg_page(sg), sg->offset, sg->length, direction); +} + +static void unmap_one(struct vring_virtqueue *vq, struct vring_desc *desc) +{ + if (desc->flags & VRING_DESC_F_INDIRECT) { + dma_unmap_single(vq->vq.vdev->dev.parent, + desc->addr, desc->len, + (desc->flags & VRING_DESC_F_WRITE) ? + DMA_FROM_DEVICE : DMA_TO_DEVICE); + } else { + dma_unmap_page(vq->vq.vdev->dev.parent, + desc->addr, desc->len, + (desc->flags & VRING_DESC_F_WRITE) ? + DMA_FROM_DEVICE : DMA_TO_DEVICE); + } +} + +static void unmap_indirect(struct vring_virtqueue *vq, struct vring_desc *desc, + int total) +{ + int i; + + for (i = 0; i < total; i++) + unmap_one(vq, &desc[i]); +} + /* Set up an indirect table of descriptors and add it to the queue. */ static inline int vring_add_indirect(struct vring_virtqueue *vq, struct scatterlist *sgs[], @@ -132,7 +172,11 @@ static inline int vring_add_indirect(struct vring_virtqueue *vq, if (!sg) break; desc[i].flags = VRING_DESC_F_NEXT; - desc[i].addr = sg_phys(sg); + desc[i].addr + dma_map_one_sg(vq, sg, DMA_TO_DEVICE); + if (dma_mapping_error(vq->vq.vdev->dev.parent, + desc[i].addr)) + goto unmap_free; desc[i].len = sg->length; desc[i].next = i+1; i++; @@ -143,7 +187,11 @@ static inline int vring_add_indirect(struct vring_virtqueue *vq, if (!sg) break; desc[i].flags = VRING_DESC_F_NEXT|VRING_DESC_F_WRITE; - desc[i].addr = sg_phys(sg); + desc[i].addr + dma_map_one_sg(vq, sg, DMA_FROM_DEVICE); + if (dma_mapping_error(vq->vq.vdev->dev.parent, + desc[i].addr)) + goto unmap_free; desc[i].len = sg->length; desc[i].next = i+1; i++; @@ -161,15 +209,27 @@ static inline int vring_add_indirect(struct vring_virtqueue *vq, /* Use a single buffer which doesn't continue */ head = vq->free_head; vq->vring.desc[head].flags = VRING_DESC_F_INDIRECT; - vq->vring.desc[head].addr = virt_to_phys(desc); - /* kmemleak gives a false positive, as it's hidden by virt_to_phys */ - kmemleak_ignore(desc); + vq->vring.desc[head].addr + dma_map_single(vq->vq.vdev->dev.parent, + desc, i * sizeof(struct vring_desc), + DMA_TO_DEVICE); + if (dma_mapping_error(vq->vq.vdev->dev.parent, + vq->vring.desc[head].addr)) + goto unmap_free; vq->vring.desc[head].len = i * sizeof(struct vring_desc); /* Update free pointer */ vq->free_head = vq->vring.desc[head].next; + /* Save the indirect block */ + vq->desc_state[head].indir_desc = desc; + return head; + +unmap_free: + unmap_indirect(vq, desc, i); + kfree(desc); + return -ENOMEM; } static inline int virtqueue_add(struct virtqueue *_vq, @@ -244,7 +304,8 @@ static inline int virtqueue_add(struct virtqueue *_vq, if (!sg) break; vq->vring.desc[i].flags = VRING_DESC_F_NEXT; - vq->vring.desc[i].addr = sg_phys(sg); + vq->vring.desc[i].addr + dma_map_one_sg(vq, sg, DMA_TO_DEVICE); vq->vring.desc[i].len = sg->length; prev = i; i = vq->vring.desc[i].next; @@ -255,7 +316,8 @@ static inline int virtqueue_add(struct virtqueue *_vq, if (!sg) break; vq->vring.desc[i].flags = VRING_DESC_F_NEXT|VRING_DESC_F_WRITE; - vq->vring.desc[i].addr = sg_phys(sg); + vq->vring.desc[i].addr + dma_map_one_sg(vq, sg, DMA_FROM_DEVICE); vq->vring.desc[i].len = sg->length; prev = i; i = vq->vring.desc[i].next; @@ -269,7 +331,7 @@ static inline int virtqueue_add(struct virtqueue *_vq, add_head: /* Set token. */ - vq->data[head] = data; + vq->desc_state[head].data = data; /* Put entry in available array (but don't update avail->idx until they * do sync). */ @@ -470,22 +532,33 @@ static void detach_buf(struct vring_virtqueue *vq, unsigned int head) unsigned int i; /* Clear data ptr. */ - vq->data[head] = NULL; + vq->desc_state[head].data = NULL; /* Put back on free list: find end */ i = head; /* Free the indirect table */ - if (vq->vring.desc[i].flags & VRING_DESC_F_INDIRECT) - kfree(phys_to_virt(vq->vring.desc[i].addr)); + if (vq->desc_state[head].indir_desc) { + u32 len = vq->vring.desc[i].len; + + BUG_ON(!(vq->vring.desc[i].flags & VRING_DESC_F_INDIRECT)); + BUG_ON(len == 0 || len % sizeof(struct vring_desc)); + unmap_indirect(vq, vq->desc_state[head].indir_desc, + len / sizeof(struct vring_desc)); + kfree(vq->desc_state[head].indir_desc); + vq->desc_state[head].indir_desc = NULL; + } while (vq->vring.desc[i].flags & VRING_DESC_F_NEXT) { + unmap_one(vq, &vq->vring.desc[i]); i = vq->vring.desc[i].next; vq->vq.num_free++; } + unmap_one(vq, &vq->vring.desc[i]); vq->vring.desc[i].next = vq->free_head; vq->free_head = head; + /* Plus final descriptor */ vq->vq.num_free++; } @@ -542,13 +615,13 @@ void *virtqueue_get_buf(struct virtqueue *_vq, unsigned int *len) BAD_RING(vq, "id %u out of range\n", i); return NULL; } - if (unlikely(!vq->data[i])) { + if (unlikely(!vq->desc_state[i].data)) { BAD_RING(vq, "id %u is not a head!\n", i); return NULL; } /* detach_buf clears data, so grab it now. */ - ret = vq->data[i]; + ret = vq->desc_state[i].data; detach_buf(vq, i); vq->last_used_idx++; /* If we expect an interrupt for the next entry, tell host @@ -709,10 +782,10 @@ void *virtqueue_detach_unused_buf(struct virtqueue *_vq) START_USE(vq); for (i = 0; i < vq->vring.num; i++) { - if (!vq->data[i]) + if (!vq->desc_state[i].data) continue; /* detach_buf clears data, so grab it now. */ - buf = vq->data[i]; + buf = vq->desc_state[i].data; detach_buf(vq, i); vq->vring.avail->idx--; END_USE(vq); @@ -765,7 +838,8 @@ struct virtqueue *vring_new_virtqueue(unsigned int index, return NULL; } - vq = kmalloc(sizeof(*vq) + sizeof(void *)*num, GFP_KERNEL); + vq = kmalloc(sizeof(*vq) + num * sizeof(struct vring_desc_state), + GFP_KERNEL); if (!vq) return NULL; @@ -795,11 +869,9 @@ struct virtqueue *vring_new_virtqueue(unsigned int index, /* Put everything in free lists. */ vq->free_head = 0; - for (i = 0; i < num-1; i++) { + for (i = 0; i < num-1; i++) vq->vring.desc[i].next = i+1; - vq->data[i] = NULL; - } - vq->data[i] = NULL; + memset(vq->desc_state, 0, num * sizeof(struct vring_desc_state)); return &vq->vq; } -- 1.9.3
Andy Lutomirski
2014-Aug-26 21:17 UTC
[PATCH 3/3] virtio_pci: Use the DMA API for virtqueues
A virtqueue is a coherent DMA mapping. Use the DMA API for it. This fixes virtio_pci on Xen. Signed-off-by: Andy Lutomirski <luto at amacapital.net> --- drivers/virtio/virtio_pci.c | 25 ++++++++++++++++++------- 1 file changed, 18 insertions(+), 7 deletions(-) diff --git a/drivers/virtio/virtio_pci.c b/drivers/virtio/virtio_pci.c index 3d1463c6b120..19039c5bec24 100644 --- a/drivers/virtio/virtio_pci.c +++ b/drivers/virtio/virtio_pci.c @@ -80,8 +80,9 @@ struct virtio_pci_vq_info /* the number of entries in the queue */ int num; - /* the virtual address of the ring queue */ - void *queue; + /* the ring queue */ + void *queue; /* virtual address */ + dma_addr_t queue_dma_addr; /* bus address */ /* the list node for the virtqueues list */ struct list_head node; @@ -417,15 +418,16 @@ static struct virtqueue *setup_vq(struct virtio_device *vdev, unsigned index, info->num = num; info->msix_vector = msix_vec; - size = PAGE_ALIGN(vring_size(num, VIRTIO_PCI_VRING_ALIGN)); - info->queue = alloc_pages_exact(size, GFP_KERNEL|__GFP_ZERO); + size = vring_size(num, VIRTIO_PCI_VRING_ALIGN); + info->queue = dma_zalloc_coherent(vdev->dev.parent, size, + &info->queue_dma_addr, GFP_KERNEL); if (info->queue == NULL) { err = -ENOMEM; goto out_info; } /* activate the queue */ - iowrite32(virt_to_phys(info->queue) >> VIRTIO_PCI_QUEUE_ADDR_SHIFT, + iowrite32(info->queue_dma_addr >> VIRTIO_PCI_QUEUE_ADDR_SHIFT, vp_dev->ioaddr + VIRTIO_PCI_QUEUE_PFN); /* create the vring */ @@ -462,7 +464,8 @@ out_assign: vring_del_virtqueue(vq); out_activate_queue: iowrite32(0, vp_dev->ioaddr + VIRTIO_PCI_QUEUE_PFN); - free_pages_exact(info->queue, size); + dma_free_coherent(vdev->dev.parent, size, + info->queue, info->queue_dma_addr); out_info: kfree(info); return ERR_PTR(err); @@ -493,7 +496,8 @@ static void vp_del_vq(struct virtqueue *vq) iowrite32(0, vp_dev->ioaddr + VIRTIO_PCI_QUEUE_PFN); size = PAGE_ALIGN(vring_size(info->num, VIRTIO_PCI_VRING_ALIGN)); - free_pages_exact(info->queue, size); + dma_free_coherent(vq->vdev->dev.parent, size, + info->queue, info->queue_dma_addr); kfree(info); } @@ -712,6 +716,13 @@ static int virtio_pci_probe(struct pci_dev *pci_dev, if (err) goto out; + /* + * We support 64-bit DMA. If this fails (e.g. some bridge + * or PV code doesn't or DAC is disabled), then we're okay + * with 32-bit DMA. + */ + dma_set_mask_and_coherent(&pci_dev->dev, DMA_BIT_MASK(64)); + err = pci_request_regions(pci_dev, "virtio-pci"); if (err) goto out_enable_device; -- 1.9.3
Stefan Hajnoczi
2014-Aug-27 06:46 UTC
[PATCH 0/3] virtio: Clean up scatterlists and use the DMA API
On Tue, Aug 26, 2014 at 10:16 PM, Andy Lutomirski <luto at amacapital.net> wrote:> There are two outstanding issues. virtio_net warns if DMA debugging > is on because it does DMA from the stack. (The warning is correct.) > This also is likely to do something unpleasant to s390. > (Maintainers are cc'd -- I don't know what to do about it.)This changes the semantics of vring and breaks existing guests when bus address != physical address. Can you use a transport feature bit to indicate that bus addresses are used? That way both approaches can be supported. Please also update the virtio specification: https://tools.oasis-open.org/version-control/browse/wsvn/virtio/ Stefan
On 26/08/14 23:17, Andy Lutomirski wrote:> virtio_ring currently sends the device (usually a hypervisor) > physical addresses of its I/O buffers. This is okay when DMA > addresses and physical addresses are the same thing, but this isn't > always the case. For example, this never works on Xen guests, and > it is likely to fail if a physical "virtio" device ever ends up > behind an IOMMU or swiotlb. > > The immediate use case for me is to enable virtio on Xen guests. > For that to work, we need this fix as well as a corresponding > fix to virtio_pci or to another driver. > > With this patch, virtfs survives kmemleak and CONFIG_DMA_API_DEBUG. > virtio-net warns (correctly) about DMA from the stack in > virtnet_set_rx_mode. > > This breaks s390's defconfig. The default configuration for s390 > does virtio through a KVM-specific interface, but that configuration > does not support DMA. I could modify this patch to stub out the DMA > API calls if !CONFIG_HAS_DMA, but it seems to me that it would be > much nicer to make s390 support DMA unconditionally.s390 has no DMA per se. Newest systems have a PCI-like I/O attach in addition to the classic channel I/O, and for that we enable the DMA code just for that transport to be able to reuse some of the existing PCI drivers. (only some because, we differ in some aspects from how PCI looks like) But the architecture itself (and the virtio interface) does not provide the DMA interface as you know it: In essence this patch is a no-go for s390. I am also aksing myself, if this makes virtio-ring more expensive? Christian> > It's actually unclear to me whether this can be fixed without either > s390 arch support or a special case for s390 in virtio_ring: on > brief inspection of s390's DMA code, it seems to assume that > everything goes through a PCI IOMMU, which is presumably not the > case for virtio.> > Cc: Cornelia Huck <cornelia.huck at de.ibm.com> > Signed-off-by: Andy Lutomirski <luto at amacapital.net> > --- > drivers/virtio/Kconfig | 1 + > drivers/virtio/virtio_ring.c | 116 +++++++++++++++++++++++++++++++++++-------- > 2 files changed, 95 insertions(+), 22 deletions(-) > > diff --git a/drivers/virtio/Kconfig b/drivers/virtio/Kconfig > index c6683f2e396c..b6f3acc61153 100644 > --- a/drivers/virtio/Kconfig > +++ b/drivers/virtio/Kconfig > @@ -1,5 +1,6 @@ > config VIRTIO > tristate > + depends on HAS_DMA > ---help--- > This option is selected by any driver which implements the virtio > bus, such as CONFIG_VIRTIO_PCI, CONFIG_VIRTIO_MMIO, CONFIG_LGUEST, > diff --git a/drivers/virtio/virtio_ring.c b/drivers/virtio/virtio_ring.c > index d356a701c9c2..6a78e2fd8e63 100644 > --- a/drivers/virtio/virtio_ring.c > +++ b/drivers/virtio/virtio_ring.c > @@ -24,6 +24,7 @@ > #include <linux/module.h> > #include <linux/hrtimer.h> > #include <linux/kmemleak.h> > +#include <linux/dma-mapping.h> > > #ifdef DEBUG > /* For development, we want to crash whenever the ring is screwed. */ > @@ -54,6 +55,12 @@ > #define END_USE(vq) > #endif > > +struct vring_desc_state > +{ > + void *data; /* Data for callback. */ > + struct vring_desc *indir_desc; /* Indirect descriptor, if any. */ > +}; > + > struct vring_virtqueue > { > struct virtqueue vq; > @@ -93,12 +100,45 @@ struct vring_virtqueue > ktime_t last_add_time; > #endif > > - /* Tokens for callbacks. */ > - void *data[]; > + /* Per-descriptor state. */ > + struct vring_desc_state desc_state[]; > }; > > #define to_vvq(_vq) container_of(_vq, struct vring_virtqueue, vq) > > +/* Helper to map one sg entry, since we can't use dma_map_sg. */ > +static dma_addr_t dma_map_one_sg(struct vring_virtqueue *vq, > + struct scatterlist *sg, > + enum dma_data_direction direction) > +{ > + return dma_map_page(vq->vq.vdev->dev.parent, > + sg_page(sg), sg->offset, sg->length, direction); > +} > + > +static void unmap_one(struct vring_virtqueue *vq, struct vring_desc *desc) > +{ > + if (desc->flags & VRING_DESC_F_INDIRECT) { > + dma_unmap_single(vq->vq.vdev->dev.parent, > + desc->addr, desc->len, > + (desc->flags & VRING_DESC_F_WRITE) ? > + DMA_FROM_DEVICE : DMA_TO_DEVICE); > + } else { > + dma_unmap_page(vq->vq.vdev->dev.parent, > + desc->addr, desc->len, > + (desc->flags & VRING_DESC_F_WRITE) ? > + DMA_FROM_DEVICE : DMA_TO_DEVICE); > + } > +} > + > +static void unmap_indirect(struct vring_virtqueue *vq, struct vring_desc *desc, > + int total) > +{ > + int i; > + > + for (i = 0; i < total; i++) > + unmap_one(vq, &desc[i]); > +} > + > /* Set up an indirect table of descriptors and add it to the queue. */ > static inline int vring_add_indirect(struct vring_virtqueue *vq, > struct scatterlist *sgs[], > @@ -132,7 +172,11 @@ static inline int vring_add_indirect(struct vring_virtqueue *vq, > if (!sg) > break; > desc[i].flags = VRING_DESC_F_NEXT; > - desc[i].addr = sg_phys(sg); > + desc[i].addr > + dma_map_one_sg(vq, sg, DMA_TO_DEVICE); > + if (dma_mapping_error(vq->vq.vdev->dev.parent, > + desc[i].addr)) > + goto unmap_free; > desc[i].len = sg->length; > desc[i].next = i+1; > i++; > @@ -143,7 +187,11 @@ static inline int vring_add_indirect(struct vring_virtqueue *vq, > if (!sg) > break; > desc[i].flags = VRING_DESC_F_NEXT|VRING_DESC_F_WRITE; > - desc[i].addr = sg_phys(sg); > + desc[i].addr > + dma_map_one_sg(vq, sg, DMA_FROM_DEVICE); > + if (dma_mapping_error(vq->vq.vdev->dev.parent, > + desc[i].addr)) > + goto unmap_free; > desc[i].len = sg->length; > desc[i].next = i+1; > i++; > @@ -161,15 +209,27 @@ static inline int vring_add_indirect(struct vring_virtqueue *vq, > /* Use a single buffer which doesn't continue */ > head = vq->free_head; > vq->vring.desc[head].flags = VRING_DESC_F_INDIRECT; > - vq->vring.desc[head].addr = virt_to_phys(desc); > - /* kmemleak gives a false positive, as it's hidden by virt_to_phys */ > - kmemleak_ignore(desc); > + vq->vring.desc[head].addr > + dma_map_single(vq->vq.vdev->dev.parent, > + desc, i * sizeof(struct vring_desc), > + DMA_TO_DEVICE); > + if (dma_mapping_error(vq->vq.vdev->dev.parent, > + vq->vring.desc[head].addr)) > + goto unmap_free; > vq->vring.desc[head].len = i * sizeof(struct vring_desc); > > /* Update free pointer */ > vq->free_head = vq->vring.desc[head].next; > > + /* Save the indirect block */ > + vq->desc_state[head].indir_desc = desc; > + > return head; > + > +unmap_free: > + unmap_indirect(vq, desc, i); > + kfree(desc); > + return -ENOMEM; > } > > static inline int virtqueue_add(struct virtqueue *_vq, > @@ -244,7 +304,8 @@ static inline int virtqueue_add(struct virtqueue *_vq, > if (!sg) > break; > vq->vring.desc[i].flags = VRING_DESC_F_NEXT; > - vq->vring.desc[i].addr = sg_phys(sg); > + vq->vring.desc[i].addr > + dma_map_one_sg(vq, sg, DMA_TO_DEVICE); > vq->vring.desc[i].len = sg->length; > prev = i; > i = vq->vring.desc[i].next; > @@ -255,7 +316,8 @@ static inline int virtqueue_add(struct virtqueue *_vq, > if (!sg) > break; > vq->vring.desc[i].flags = VRING_DESC_F_NEXT|VRING_DESC_F_WRITE; > - vq->vring.desc[i].addr = sg_phys(sg); > + vq->vring.desc[i].addr > + dma_map_one_sg(vq, sg, DMA_FROM_DEVICE); > vq->vring.desc[i].len = sg->length; > prev = i; > i = vq->vring.desc[i].next; > @@ -269,7 +331,7 @@ static inline int virtqueue_add(struct virtqueue *_vq, > > add_head: > /* Set token. */ > - vq->data[head] = data; > + vq->desc_state[head].data = data; > > /* Put entry in available array (but don't update avail->idx until they > * do sync). */ > @@ -470,22 +532,33 @@ static void detach_buf(struct vring_virtqueue *vq, unsigned int head) > unsigned int i; > > /* Clear data ptr. */ > - vq->data[head] = NULL; > + vq->desc_state[head].data = NULL; > > /* Put back on free list: find end */ > i = head; > > /* Free the indirect table */ > - if (vq->vring.desc[i].flags & VRING_DESC_F_INDIRECT) > - kfree(phys_to_virt(vq->vring.desc[i].addr)); > + if (vq->desc_state[head].indir_desc) { > + u32 len = vq->vring.desc[i].len; > + > + BUG_ON(!(vq->vring.desc[i].flags & VRING_DESC_F_INDIRECT)); > + BUG_ON(len == 0 || len % sizeof(struct vring_desc)); > + unmap_indirect(vq, vq->desc_state[head].indir_desc, > + len / sizeof(struct vring_desc)); > + kfree(vq->desc_state[head].indir_desc); > + vq->desc_state[head].indir_desc = NULL; > + } > > while (vq->vring.desc[i].flags & VRING_DESC_F_NEXT) { > + unmap_one(vq, &vq->vring.desc[i]); > i = vq->vring.desc[i].next; > vq->vq.num_free++; > } > > + unmap_one(vq, &vq->vring.desc[i]); > vq->vring.desc[i].next = vq->free_head; > vq->free_head = head; > + > /* Plus final descriptor */ > vq->vq.num_free++; > } > @@ -542,13 +615,13 @@ void *virtqueue_get_buf(struct virtqueue *_vq, unsigned int *len) > BAD_RING(vq, "id %u out of range\n", i); > return NULL; > } > - if (unlikely(!vq->data[i])) { > + if (unlikely(!vq->desc_state[i].data)) { > BAD_RING(vq, "id %u is not a head!\n", i); > return NULL; > } > > /* detach_buf clears data, so grab it now. */ > - ret = vq->data[i]; > + ret = vq->desc_state[i].data; > detach_buf(vq, i); > vq->last_used_idx++; > /* If we expect an interrupt for the next entry, tell host > @@ -709,10 +782,10 @@ void *virtqueue_detach_unused_buf(struct virtqueue *_vq) > START_USE(vq); > > for (i = 0; i < vq->vring.num; i++) { > - if (!vq->data[i]) > + if (!vq->desc_state[i].data) > continue; > /* detach_buf clears data, so grab it now. */ > - buf = vq->data[i]; > + buf = vq->desc_state[i].data; > detach_buf(vq, i); > vq->vring.avail->idx--; > END_USE(vq); > @@ -765,7 +838,8 @@ struct virtqueue *vring_new_virtqueue(unsigned int index, > return NULL; > } > > - vq = kmalloc(sizeof(*vq) + sizeof(void *)*num, GFP_KERNEL); > + vq = kmalloc(sizeof(*vq) + num * sizeof(struct vring_desc_state), > + GFP_KERNEL); > if (!vq) > return NULL; > > @@ -795,11 +869,9 @@ struct virtqueue *vring_new_virtqueue(unsigned int index, > > /* Put everything in free lists. */ > vq->free_head = 0; > - for (i = 0; i < num-1; i++) { > + for (i = 0; i < num-1; i++) > vq->vring.desc[i].next = i+1; > - vq->data[i] = NULL; > - } > - vq->data[i] = NULL; > + memset(vq->desc_state, 0, num * sizeof(struct vring_desc_state)); > > return &vq->vq; > } >
Cornelia Huck
2014-Aug-27 07:54 UTC
[PATCH 0/3] virtio: Clean up scatterlists and use the DMA API
On Tue, 26 Aug 2014 14:16:59 -0700 Andy Lutomirski <luto at amacapital.net> wrote:> This fixes virtio on Xen guests as well as on any other platform on > which physical addresses don't match bus addresses. > > This can be tested with: > > virtme-run --xen xen --kimg arch/x86/boot/bzImage --console > > using virtme from here: > > https://git.kernel.org/cgit/utils/kernel/virtme/virtme.git > > Without these patches, the guest hangs forever. With these patches, > everything works. > > There are two outstanding issues. virtio_net warns if DMA debugging > is on because it does DMA from the stack. (The warning is correct.) > This also is likely to do something unpleasant to s390.s/is likely to do something unpleasant to/breaks/ Sorry, this just won't work for s390, as Christian already explained.> (Maintainers are cc'd -- I don't know what to do about it.) > > Andy Lutomirski (3): > virtio_ring: Remove sg_next indirection > virtio_ring: Use DMA APIs > virtio_pci: Use the DMA API for virtqueues > > drivers/virtio/Kconfig | 1 + > drivers/virtio/virtio_pci.c | 25 ++++++-- > drivers/virtio/virtio_ring.c | 150 ++++++++++++++++++++++++++++++------------- > 3 files changed, 125 insertions(+), 51 deletions(-) >Aren't you also changing some basic assumptions with the conversion to DMA apis? virtqueues currently aren't accessed via dma (or it wouldn't work on s390); if you want (say) virtio-pci to use dma, shouldn't that be something that is negotiated between guest and host?
Andy Lutomirski
2014-Aug-27 15:11 UTC
[PATCH 0/3] virtio: Clean up scatterlists and use the DMA API
On Aug 26, 2014 11:46 PM, "Stefan Hajnoczi" <stefanha at gmail.com> wrote:> > On Tue, Aug 26, 2014 at 10:16 PM, Andy Lutomirski <luto at amacapital.net> wrote: > > There are two outstanding issues. virtio_net warns if DMA debugging > > is on because it does DMA from the stack. (The warning is correct.) > > This also is likely to do something unpleasant to s390. > > (Maintainers are cc'd -- I don't know what to do about it.) > > This changes the semantics of vring and breaks existing guests when > bus address != physical address. > > Can you use a transport feature bit to indicate that bus addresses are > used? That way both approaches can be supported.I can try to support both styles of addressing, but I don't think that this can be negotiated between the device (i.e. host or physical virtio-speaking device) and the guest. In the Xen case that I care about (Linux on Xen on KVM), the host doesn't know about the translation at all -- Xen is an intermediate layer that only the guest is aware of. In this case, there are effectively two layers of virtualization, and only the inner one (Xen) knows about the translation despite the fact that the the outer layer is the one providing the virtio device. I could change virtio_ring to use the DMA API only if requested by the lower driver (virtio_pci, etc) and to have only virtio_pci enable that feature. Will that work for all cases? On s390, this shouldn't work just like the current code. On x86, I think that if QEMU ever starts exposing an IOMMU attached to a virtio-pci device, then QEMU should expect that IOMMU to be used. If QEMU expects to see physical addresses, then it shouldn't advertise an IOMMU. Since QEMU doesn't currently support guest IOMMUs, this should be fine for everything that uses QEMU. At least x86's implementation of the DMA ops for devices that aren't behind an IOMMU should be very fast. Are there any other weird cases for which this might be a problem?> > Please also update the virtio specification: > https://tools.oasis-open.org/version-control/browse/wsvn/virtio/ >I'm not sure it will need an update. Perhaps a note in the PCI section indicating that, if the host expects the guest to program an IOMMU, then it should use the appropriate platform-specific mechanism to expose that IOMMU. --Andy
Konrad Rzeszutek Wilk
2014-Aug-27 17:27 UTC
[PATCH 0/3] virtio: Clean up scatterlists and use the DMA API
On Wed, Aug 27, 2014 at 07:46:46AM +0100, Stefan Hajnoczi wrote:> On Tue, Aug 26, 2014 at 10:16 PM, Andy Lutomirski <luto at amacapital.net> wrote: > > There are two outstanding issues. virtio_net warns if DMA debugging > > is on because it does DMA from the stack. (The warning is correct.) > > This also is likely to do something unpleasant to s390. > > (Maintainers are cc'd -- I don't know what to do about it.) > > This changes the semantics of vring and breaks existing guests when > bus address != physical address.Isn't that what this is suppose to fix? Right now the semantics of the vring is that bus address == physical address.> > Can you use a transport feature bit to indicate that bus addresses are > used? That way both approaches can be supported. > > Please also update the virtio specification: > https://tools.oasis-open.org/version-control/browse/wsvn/virtio/ > > Stefan
Konrad Rzeszutek Wilk
2014-Aug-27 17:32 UTC
[PATCH 3/3] virtio_pci: Use the DMA API for virtqueues
On Tue, Aug 26, 2014 at 02:17:02PM -0700, Andy Lutomirski wrote:> A virtqueue is a coherent DMA mapping. Use the DMA API for it. > This fixes virtio_pci on Xen. > > Signed-off-by: Andy Lutomirski <luto at amacapital.net> > --- > drivers/virtio/virtio_pci.c | 25 ++++++++++++++++++------- > 1 file changed, 18 insertions(+), 7 deletions(-) > > diff --git a/drivers/virtio/virtio_pci.c b/drivers/virtio/virtio_pci.c > index 3d1463c6b120..19039c5bec24 100644 > --- a/drivers/virtio/virtio_pci.c > +++ b/drivers/virtio/virtio_pci.c > @@ -80,8 +80,9 @@ struct virtio_pci_vq_info > /* the number of entries in the queue */ > int num; > > - /* the virtual address of the ring queue */ > - void *queue; > + /* the ring queue */ > + void *queue; /* virtual address */ > + dma_addr_t queue_dma_addr; /* bus address */ > > /* the list node for the virtqueues list */ > struct list_head node; > @@ -417,15 +418,16 @@ static struct virtqueue *setup_vq(struct virtio_device *vdev, unsigned index, > info->num = num; > info->msix_vector = msix_vec; > > - size = PAGE_ALIGN(vring_size(num, VIRTIO_PCI_VRING_ALIGN)); > - info->queue = alloc_pages_exact(size, GFP_KERNEL|__GFP_ZERO); > + size = vring_size(num, VIRTIO_PCI_VRING_ALIGN); > + info->queue = dma_zalloc_coherent(vdev->dev.parent, size, > + &info->queue_dma_addr, GFP_KERNEL); > if (info->queue == NULL) { > err = -ENOMEM; > goto out_info; > } > > /* activate the queue */ > - iowrite32(virt_to_phys(info->queue) >> VIRTIO_PCI_QUEUE_ADDR_SHIFT, > + iowrite32(info->queue_dma_addr >> VIRTIO_PCI_QUEUE_ADDR_SHIFT, > vp_dev->ioaddr + VIRTIO_PCI_QUEUE_PFN); > > /* create the vring */ > @@ -462,7 +464,8 @@ out_assign: > vring_del_virtqueue(vq); > out_activate_queue: > iowrite32(0, vp_dev->ioaddr + VIRTIO_PCI_QUEUE_PFN); > - free_pages_exact(info->queue, size); > + dma_free_coherent(vdev->dev.parent, size, > + info->queue, info->queue_dma_addr); > out_info: > kfree(info); > return ERR_PTR(err); > @@ -493,7 +496,8 @@ static void vp_del_vq(struct virtqueue *vq) > iowrite32(0, vp_dev->ioaddr + VIRTIO_PCI_QUEUE_PFN); > > size = PAGE_ALIGN(vring_size(info->num, VIRTIO_PCI_VRING_ALIGN)); > - free_pages_exact(info->queue, size); > + dma_free_coherent(vq->vdev->dev.parent, size, > + info->queue, info->queue_dma_addr); > kfree(info); > } > > @@ -712,6 +716,13 @@ static int virtio_pci_probe(struct pci_dev *pci_dev, > if (err) > goto out; > > + /* > + * We support 64-bit DMA. If this fails (e.g. some bridge > + * or PV code doesn't or DAC is disabled), then we're okay > + * with 32-bit DMA.<scratches his head> I am having a hard time parsing that. Could you expand a bit the faulting use-case please?> + */ > + dma_set_mask_and_coherent(&pci_dev->dev, DMA_BIT_MASK(64));The usual process is: ret = dma_set_mask_and_coherent(..) if (ret) ret = dma_set_mask_and_coherent(.., DMA_BIT_MASK(32)) if (ret) pr_warn("We are truly screwed. Good luck!\n");> + > err = pci_request_regions(pci_dev, "virtio-pci"); > if (err) > goto out_enable_device; > -- > 1.9.3 >
Andy Lutomirski <luto at amacapital.net> writes:> The only unusual thing about virtio's use of scatterlists is that > two of the APIs accept scatterlists that might not be terminated. > Using function pointers to handle this case is overkill; for_each_sg > can do it. > > There's a small subtlely here: for_each_sg assumes that the provided > count is correct, but, because of the way that virtio_ring handles > multiple scatterlists at once, the count is only an upper bound if > there is more than one scatterlist. This is easily solved by > checking the sg pointer for NULL on each iteration.Hi Andy, (Sorry for the delayed response; I was still catching up from my week at KS.) Unfortunately the reason we dance through so many hoops here is that it has a measurable performance impact :( Those indirect calls get inlined. There's only one place which actually uses a weirdly-formed sg now, and that's virtio_net. It's pretty trivial to fix. However, vring_bench drops 15% when we do this. There's a larger question as to how much difference that makes in Real Life, of course. I'll measure that today. Here are my two patches, back-to-back (it cam out of of an earlier concern about reducing stack usage, hence the stack measurements). Cheers, Rusty. Subject: virtio_net: pass well-formed sg to virtqueue_add_inbuf() This is the only place which doesn't hand virtqueue_add_inbuf or virtqueue_add_outbuf a well-formed, well-terminated sg. Fix it, so we can make virtio_add_* simpler. Signed-off-by: Rusty Russell <rusty at rustcorp.com.au> diff --git a/drivers/net/virtio_net.c b/drivers/net/virtio_net.c index 8a852b5f215f..63299b04cdf2 100644 --- a/drivers/net/virtio_net.c +++ b/drivers/net/virtio_net.c @@ -590,6 +590,8 @@ static int add_recvbuf_big(struct receive_queue *rq, gfp_t gfp) offset = sizeof(struct padded_vnet_hdr); sg_set_buf(&rq->sg[1], p + offset, PAGE_SIZE - offset); + sg_mark_end(&rq->sg[MAX_SKB_FRAGS + 2 - 1]); + /* chain first in list head */ first->private = (unsigned long)list; err = virtqueue_add_inbuf(rq->vq, rq->sg, MAX_SKB_FRAGS + 2, Subject: virtio_ring: assume sgs are always well-formed. We used to have several callers which just used arrays. They're gone, so we can use sg_next() everywhere, simplifying the code. Before: gcc 4.8.2: virtio_blk: stack used = 392 gcc 4.6.4: virtio_blk: stack used = 528 After: gcc 4.8.2: virtio_blk: stack used = 392 gcc 4.6.4: virtio_blk: stack used = 480 vring_bench before: 936153354-967745359(9.44739e+08+/-6.1e+06)ns vring_bench after: 1061485790-1104800648(1.08254e+09+/-6.6e+06)ns Signed-off-by: Rusty Russell <rusty at rustcorp.com.au> diff --git a/drivers/virtio/virtio_ring.c b/drivers/virtio/virtio_ring.c index 8e531578065f..a6cdb52ae9b2 100644 --- a/drivers/virtio/virtio_ring.c +++ b/drivers/virtio/virtio_ring.c @@ -107,28 +107,10 @@ struct vring_virtqueue #define to_vvq(_vq) container_of(_vq, struct vring_virtqueue, vq) -static inline struct scatterlist *sg_next_chained(struct scatterlist *sg, - unsigned int *count) -{ - return sg_next(sg); -} - -static inline struct scatterlist *sg_next_arr(struct scatterlist *sg, - unsigned int *count) -{ - if (--(*count) == 0) - return NULL; - return sg + 1; -} - /* Set up an indirect table of descriptors and add it to the queue. */ static inline int vring_add_indirect(struct vring_virtqueue *vq, struct scatterlist *sgs[], - struct scatterlist *(*next) - (struct scatterlist *, unsigned int *), unsigned int total_sg, - unsigned int total_out, - unsigned int total_in, unsigned int out_sgs, unsigned int in_sgs, gfp_t gfp) @@ -155,7 +137,7 @@ static inline int vring_add_indirect(struct vring_virtqueue *vq, /* Transfer entries from the sg lists into the indirect page */ i = 0; for (n = 0; n < out_sgs; n++) { - for (sg = sgs[n]; sg; sg = next(sg, &total_out)) { + for (sg = sgs[n]; sg; sg = sg_next(sg)) { desc[i].flags = VRING_DESC_F_NEXT; desc[i].addr = sg_phys(sg); desc[i].len = sg->length; @@ -164,7 +146,7 @@ static inline int vring_add_indirect(struct vring_virtqueue *vq, } } for (; n < (out_sgs + in_sgs); n++) { - for (sg = sgs[n]; sg; sg = next(sg, &total_in)) { + for (sg = sgs[n]; sg; sg = sg_next(sg)) { desc[i].flags = VRING_DESC_F_NEXT|VRING_DESC_F_WRITE; desc[i].addr = sg_phys(sg); desc[i].len = sg->length; @@ -197,10 +179,7 @@ static inline int vring_add_indirect(struct vring_virtqueue *vq, static inline int virtqueue_add(struct virtqueue *_vq, struct scatterlist *sgs[], - struct scatterlist *(*next) - (struct scatterlist *, unsigned int *), - unsigned int total_out, - unsigned int total_in, + unsigned int total_sg, unsigned int out_sgs, unsigned int in_sgs, void *data, @@ -208,7 +187,7 @@ static inline int virtqueue_add(struct virtqueue *_vq, { struct vring_virtqueue *vq = to_vvq(_vq); struct scatterlist *sg; - unsigned int i, n, avail, uninitialized_var(prev), total_sg; + unsigned int i, n, avail, uninitialized_var(prev); int head; START_USE(vq); @@ -233,13 +212,10 @@ static inline int virtqueue_add(struct virtqueue *_vq, } #endif - total_sg = total_in + total_out; - /* If the host supports indirect descriptor tables, and we have multiple * buffers, then go indirect. FIXME: tune this threshold */ if (vq->indirect && total_sg > 1 && vq->vq.num_free) { - head = vring_add_indirect(vq, sgs, next, total_sg, total_out, - total_in, + head = vring_add_indirect(vq, sgs, total_sg, out_sgs, in_sgs, gfp); if (likely(head >= 0)) goto add_head; @@ -265,7 +241,7 @@ static inline int virtqueue_add(struct virtqueue *_vq, head = i = vq->free_head; for (n = 0; n < out_sgs; n++) { - for (sg = sgs[n]; sg; sg = next(sg, &total_out)) { + for (sg = sgs[n]; sg; sg = sg_next(sg)) { vq->vring.desc[i].flags = VRING_DESC_F_NEXT; vq->vring.desc[i].addr = sg_phys(sg); vq->vring.desc[i].len = sg->length; @@ -274,7 +250,7 @@ static inline int virtqueue_add(struct virtqueue *_vq, } } for (; n < (out_sgs + in_sgs); n++) { - for (sg = sgs[n]; sg; sg = next(sg, &total_in)) { + for (sg = sgs[n]; sg; sg = sg_next(sg)) { vq->vring.desc[i].flags = VRING_DESC_F_NEXT|VRING_DESC_F_WRITE; vq->vring.desc[i].addr = sg_phys(sg); vq->vring.desc[i].len = sg->length; @@ -335,29 +311,23 @@ int virtqueue_add_sgs(struct virtqueue *_vq, void *data, gfp_t gfp) { - unsigned int i, total_out, total_in; + unsigned int i, total_sg = 0; /* Count them first. */ - for (i = total_out = total_in = 0; i < out_sgs; i++) { - struct scatterlist *sg; - for (sg = sgs[i]; sg; sg = sg_next(sg)) - total_out++; - } - for (; i < out_sgs + in_sgs; i++) { + for (i = 0; i < out_sgs + in_sgs; i++) { struct scatterlist *sg; for (sg = sgs[i]; sg; sg = sg_next(sg)) - total_in++; + total_sg++; } - return virtqueue_add(_vq, sgs, sg_next_chained, - total_out, total_in, out_sgs, in_sgs, data, gfp); + return virtqueue_add(_vq, sgs, total_sg, out_sgs, in_sgs, data, gfp); } EXPORT_SYMBOL_GPL(virtqueue_add_sgs); /** * virtqueue_add_outbuf - expose output buffers to other end * @vq: the struct virtqueue we're talking about. - * @sgs: array of scatterlists (need not be terminated!) - * @num: the number of scatterlists readable by other side + * @sg: scatterlist (must be well-formed and terminated!) + * @num: the number of entries in @sg readable by other side * @data: the token identifying the buffer. * @gfp: how to do memory allocations (if necessary). * @@ -367,19 +337,19 @@ EXPORT_SYMBOL_GPL(virtqueue_add_sgs); * Returns zero or a negative error (ie. ENOSPC, ENOMEM, EIO). */ int virtqueue_add_outbuf(struct virtqueue *vq, - struct scatterlist sg[], unsigned int num, + struct scatterlist *sg, unsigned int num, void *data, gfp_t gfp) { - return virtqueue_add(vq, &sg, sg_next_arr, num, 0, 1, 0, data, gfp); + return virtqueue_add(vq, &sg, num, 1, 0, data, gfp); } EXPORT_SYMBOL_GPL(virtqueue_add_outbuf); /** * virtqueue_add_inbuf - expose input buffers to other end * @vq: the struct virtqueue we're talking about. - * @sgs: array of scatterlists (need not be terminated!) - * @num: the number of scatterlists writable by other side + * @sg: scatterlist (must be well-formed and terminated!) + * @num: the number of entries in @sg writable by other side * @data: the token identifying the buffer. * @gfp: how to do memory allocations (if necessary). * @@ -389,11 +359,11 @@ EXPORT_SYMBOL_GPL(virtqueue_add_outbuf); * Returns zero or a negative error (ie. ENOSPC, ENOMEM, EIO). */ int virtqueue_add_inbuf(struct virtqueue *vq, - struct scatterlist sg[], unsigned int num, + struct scatterlist *sg, unsigned int num, void *data, gfp_t gfp) { - return virtqueue_add(vq, &sg, sg_next_arr, 0, num, 0, 1, data, gfp); + return virtqueue_add(vq, &sg, num, 0, 1, data, gfp); } EXPORT_SYMBOL_GPL(virtqueue_add_inbuf);