jiangyiwen
2018-Dec-12 09:31 UTC
[PATCH v2 3/5] VSOCK: support receive mergeable rx buffer in guest
Guest receive mergeable rx buffer, it can merge scatter rx buffer into a big buffer and then copy to user space. In addition, it also use iovec to replace buf in struct virtio_vsock_pkt, keep tx and rx consistency. The only difference is now tx still uses a segment of continuous physical memory to implement. Signed-off-by: Yiwen Jiang <jiangyiwen at huawei.com> --- drivers/vhost/vsock.c | 31 +++++++--- include/linux/virtio_vsock.h | 6 +- net/vmw_vsock/virtio_transport.c | 105 ++++++++++++++++++++++++++++---- net/vmw_vsock/virtio_transport_common.c | 59 ++++++++++++++---- 4 files changed, 166 insertions(+), 35 deletions(-) diff --git a/drivers/vhost/vsock.c b/drivers/vhost/vsock.c index dc52b0f..c7ab0dd 100644 --- a/drivers/vhost/vsock.c +++ b/drivers/vhost/vsock.c @@ -179,6 +179,8 @@ static int get_rx_bufs(struct vhost_virtqueue *vq, size_t nbytes; size_t len; s16 headcount; + size_t remain_len; + int i; spin_lock_bh(&vsock->send_pkt_list_lock); if (list_empty(&vsock->send_pkt_list)) { @@ -221,11 +223,19 @@ static int get_rx_bufs(struct vhost_virtqueue *vq, break; } - nbytes = copy_to_iter(pkt->buf, pkt->len, &iov_iter); - if (nbytes != pkt->len) { - virtio_transport_free_pkt(pkt); - vq_err(vq, "Faulted on copying pkt buf\n"); - break; + remain_len = pkt->len; + for (i = 0; i < pkt->nr_vecs; i++) { + int tmp_len; + + tmp_len = min(remain_len, pkt->vec[i].iov_len); + nbytes = copy_to_iter(pkt->vec[i].iov_base, tmp_len, &iov_iter); + if (nbytes != tmp_len) { + virtio_transport_free_pkt(pkt); + vq_err(vq, "Faulted on copying pkt buf\n"); + break; + } + + remain_len -= tmp_len; } vhost_add_used_n(vq, vq->heads, headcount); @@ -341,6 +351,7 @@ static void vhost_transport_send_pkt_work(struct vhost_work *work) struct iov_iter iov_iter; size_t nbytes; size_t len; + void *buf; if (in != 0) { vq_err(vq, "Expected 0 input buffers, got %u\n", in); @@ -375,13 +386,17 @@ static void vhost_transport_send_pkt_work(struct vhost_work *work) return NULL; } - pkt->buf = kmalloc(pkt->len, GFP_KERNEL); - if (!pkt->buf) { + buf = kmalloc(pkt->len, GFP_KERNEL); + if (!buf) { kfree(pkt); return NULL; } - nbytes = copy_from_iter(pkt->buf, pkt->len, &iov_iter); + pkt->vec[0].iov_base = buf; + pkt->vec[0].iov_len = pkt->len; + pkt->nr_vecs = 1; + + nbytes = copy_from_iter(buf, pkt->len, &iov_iter); if (nbytes != pkt->len) { vq_err(vq, "Expected %u byte payload, got %zu bytes\n", pkt->len, nbytes); diff --git a/include/linux/virtio_vsock.h b/include/linux/virtio_vsock.h index da9e1fe..734eeed 100644 --- a/include/linux/virtio_vsock.h +++ b/include/linux/virtio_vsock.h @@ -13,6 +13,8 @@ #define VIRTIO_VSOCK_DEFAULT_RX_BUF_SIZE (1024 * 4) #define VIRTIO_VSOCK_MAX_BUF_SIZE 0xFFFFFFFFUL #define VIRTIO_VSOCK_MAX_PKT_BUF_SIZE (1024 * 64) +/* virtio_vsock_pkt + max_pkt_len(default MAX_PKT_BUF_SIZE) */ +#define VIRTIO_VSOCK_MAX_VEC_NUM ((VIRTIO_VSOCK_MAX_PKT_BUF_SIZE / PAGE_SIZE) + 1) /* Virtio-vsock feature */ #define VIRTIO_VSOCK_F_MRG_RXBUF 0 /* Host can merge receive buffers. */ @@ -55,10 +57,12 @@ struct virtio_vsock_pkt { struct list_head list; /* socket refcnt not held, only use for cancellation */ struct vsock_sock *vsk; - void *buf; + struct kvec vec[VIRTIO_VSOCK_MAX_VEC_NUM]; + int nr_vecs; u32 len; u32 off; bool reply; + bool mergeable; }; struct virtio_vsock_pkt_info { diff --git a/net/vmw_vsock/virtio_transport.c b/net/vmw_vsock/virtio_transport.c index c4a465c..148b58a 100644 --- a/net/vmw_vsock/virtio_transport.c +++ b/net/vmw_vsock/virtio_transport.c @@ -155,8 +155,10 @@ static int virtio_transport_send_pkt_loopback(struct virtio_vsock *vsock, sg_init_one(&hdr, &pkt->hdr, sizeof(pkt->hdr)); sgs[out_sg++] = &hdr; - if (pkt->buf) { - sg_init_one(&buf, pkt->buf, pkt->len); + if (pkt->len) { + /* Currently only support a segment of memory in tx */ + BUG_ON(pkt->vec[0].iov_len != pkt->len); + sg_init_one(&buf, pkt->vec[0].iov_base, pkt->vec[0].iov_len); sgs[out_sg++] = &buf; } @@ -304,23 +306,28 @@ static int fill_old_rx_buff(struct virtqueue *vq) struct virtio_vsock_pkt *pkt; struct scatterlist hdr, buf, *sgs[2]; int ret; + void *pkt_buf; pkt = kzalloc(sizeof(*pkt), GFP_KERNEL); if (!pkt) return -ENOMEM; - pkt->buf = kmalloc(buf_len, GFP_KERNEL); - if (!pkt->buf) { + pkt_buf = kmalloc(buf_len, GFP_KERNEL); + if (!pkt_buf) { virtio_transport_free_pkt(pkt); return -ENOMEM; } + pkt->vec[0].iov_base = pkt_buf; + pkt->vec[0].iov_len = buf_len; + pkt->nr_vecs = 1; + pkt->len = buf_len; sg_init_one(&hdr, &pkt->hdr, sizeof(pkt->hdr)); sgs[0] = &hdr; - sg_init_one(&buf, pkt->buf, buf_len); + sg_init_one(&buf, pkt->vec[0].iov_base, buf_len); sgs[1] = &buf; ret = virtqueue_add_sgs(vq, sgs, 0, 2, pkt, GFP_KERNEL); if (ret) @@ -388,11 +395,78 @@ static bool virtio_transport_more_replies(struct virtio_vsock *vsock) return val < virtqueue_get_vring_size(vq); } +static struct virtio_vsock_pkt *receive_mergeable(struct virtqueue *vq, + struct virtio_vsock *vsock, unsigned int *total_len) +{ + struct virtio_vsock_pkt *pkt; + u16 num_buf; + void *buf; + unsigned int len; + size_t vsock_hlen = sizeof(struct virtio_vsock_pkt); + + buf = virtqueue_get_buf(vq, &len); + if (!buf) + return NULL; + + *total_len = len; + vsock->rx_buf_nr--; + + if (unlikely(len < vsock_hlen)) { + put_page(virt_to_head_page(buf)); + return NULL; + } + + pkt = buf; + num_buf = le16_to_cpu(pkt->mrg_rxbuf_hdr.num_buffers); + if (!num_buf || num_buf > VIRTIO_VSOCK_MAX_VEC_NUM) { + put_page(virt_to_head_page(buf)); + return NULL; + } + + /* Initialize pkt residual structure */ + memset(&pkt->work, 0, vsock_hlen - sizeof(struct virtio_vsock_hdr) - + sizeof(struct virtio_vsock_mrg_rxbuf_hdr)); + + pkt->mergeable = true; + pkt->len = le32_to_cpu(pkt->hdr.len); + if (!pkt->len) + return pkt; + + len -= vsock_hlen; + if (len) { + pkt->vec[pkt->nr_vecs].iov_base = buf + vsock_hlen; + pkt->vec[pkt->nr_vecs].iov_len = len; + /* Shared page with pkt, so get page in advance */ + get_page(virt_to_head_page(buf)); + pkt->nr_vecs++; + } + + while (--num_buf) { + buf = virtqueue_get_buf(vq, &len); + if (!buf) + goto err; + + *total_len += len; + vsock->rx_buf_nr--; + + pkt->vec[pkt->nr_vecs].iov_base = buf; + pkt->vec[pkt->nr_vecs].iov_len = len; + pkt->nr_vecs++; + } + + return pkt; +err: + virtio_transport_free_pkt(pkt); + return NULL; +} + static void virtio_transport_rx_work(struct work_struct *work) { struct virtio_vsock *vsock container_of(work, struct virtio_vsock, rx_work); struct virtqueue *vq; + size_t vsock_hlen = vsock->mergeable ? sizeof(struct virtio_vsock_pkt) : + sizeof(struct virtio_vsock_hdr); vq = vsock->vqs[VSOCK_VQ_RX]; @@ -412,21 +486,26 @@ static void virtio_transport_rx_work(struct work_struct *work) goto out; } - pkt = virtqueue_get_buf(vq, &len); - if (!pkt) { - break; - } + if (likely(vsock->mergeable)) { + pkt = receive_mergeable(vq, vsock, &len); + if (!pkt) + break; + } else { + pkt = virtqueue_get_buf(vq, &len); + if (!pkt) + break; - vsock->rx_buf_nr--; + vsock->rx_buf_nr--; + } /* Drop short/long packets */ - if (unlikely(len < sizeof(pkt->hdr) || - len > sizeof(pkt->hdr) + pkt->len)) { + if (unlikely(len < vsock_hlen || + len > vsock_hlen + pkt->len)) { virtio_transport_free_pkt(pkt); continue; } - pkt->len = len - sizeof(pkt->hdr); + pkt->len = len - vsock_hlen; virtio_transport_deliver_tap_pkt(pkt); virtio_transport_recv_pkt(pkt); } diff --git a/net/vmw_vsock/virtio_transport_common.c b/net/vmw_vsock/virtio_transport_common.c index 3ae3a33..123a8b6 100644 --- a/net/vmw_vsock/virtio_transport_common.c +++ b/net/vmw_vsock/virtio_transport_common.c @@ -44,6 +44,7 @@ static const struct virtio_transport *virtio_transport_get_ops(void) { struct virtio_vsock_pkt *pkt; int err; + void *buf = NULL; pkt = kzalloc(sizeof(*pkt), GFP_KERNEL); if (!pkt) @@ -62,12 +63,16 @@ static const struct virtio_transport *virtio_transport_get_ops(void) pkt->vsk = info->vsk; if (info->msg && len > 0) { - pkt->buf = kmalloc(len, GFP_KERNEL); - if (!pkt->buf) + buf = kmalloc(len, GFP_KERNEL); + if (!buf) goto out_pkt; - err = memcpy_from_msg(pkt->buf, info->msg, len); + err = memcpy_from_msg(buf, info->msg, len); if (err) goto out; + + pkt->vec[0].iov_base = buf; + pkt->vec[0].iov_len = len; + pkt->nr_vecs = 1; } trace_virtio_transport_alloc_pkt(src_cid, src_port, @@ -80,7 +85,7 @@ static const struct virtio_transport *virtio_transport_get_ops(void) return pkt; out: - kfree(pkt->buf); + kfree(buf); out_pkt: kfree(pkt); return NULL; @@ -92,6 +97,7 @@ static struct sk_buff *virtio_transport_build_skb(void *opaque) struct virtio_vsock_pkt *pkt = opaque; struct af_vsockmon_hdr *hdr; struct sk_buff *skb; + int i; skb = alloc_skb(sizeof(*hdr) + sizeof(pkt->hdr) + pkt->len, GFP_ATOMIC); @@ -134,7 +140,8 @@ static struct sk_buff *virtio_transport_build_skb(void *opaque) skb_put_data(skb, &pkt->hdr, sizeof(pkt->hdr)); if (pkt->len) { - skb_put_data(skb, pkt->buf, pkt->len); + for (i = 0; i < pkt->nr_vecs; i++) + skb_put_data(skb, pkt->vec[i].iov_base, pkt->vec[i].iov_len); } return skb; @@ -260,6 +267,9 @@ static int virtio_transport_send_credit_update(struct vsock_sock *vsk, spin_lock_bh(&vvs->rx_lock); while (total < len && !list_empty(&vvs->rx_queue)) { + size_t copy_bytes, last_vec_total = 0, vec_off; + int i; + pkt = list_first_entry(&vvs->rx_queue, struct virtio_vsock_pkt, list); @@ -272,14 +282,28 @@ static int virtio_transport_send_credit_update(struct vsock_sock *vsk, */ spin_unlock_bh(&vvs->rx_lock); - err = memcpy_to_msg(msg, pkt->buf + pkt->off, bytes); - if (err) - goto out; + for (i = 0; i < pkt->nr_vecs; i++) { + if (pkt->off > last_vec_total + pkt->vec[i].iov_len) { + last_vec_total += pkt->vec[i].iov_len; + continue; + } + + vec_off = pkt->off - last_vec_total; + copy_bytes = min(pkt->vec[i].iov_len - vec_off, bytes); + err = memcpy_to_msg(msg, pkt->vec[i].iov_base + vec_off, + copy_bytes); + if (err) + goto out; + + bytes -= copy_bytes; + pkt->off += copy_bytes; + total += copy_bytes; + last_vec_total += pkt->vec[i].iov_len; + if (!bytes) + break; + } spin_lock_bh(&vvs->rx_lock); - - total += bytes; - pkt->off += bytes; if (pkt->off == pkt->len) { virtio_transport_dec_rx_pkt(vvs, pkt); list_del(&pkt->list); @@ -1050,8 +1074,17 @@ void virtio_transport_recv_pkt(struct virtio_vsock_pkt *pkt) void virtio_transport_free_pkt(struct virtio_vsock_pkt *pkt) { - kfree(pkt->buf); - kfree(pkt); + int i; + + if (pkt->mergeable) { + for (i = 0; i < pkt->nr_vecs; i++) + put_page(virt_to_head_page(pkt->vec[i].iov_base)); + put_page(virt_to_head_page((void *)pkt)); + } else { + for (i = 0; i < pkt->nr_vecs; i++) + kfree(pkt->vec[i].iov_base); + kfree(pkt); + } } EXPORT_SYMBOL_GPL(virtio_transport_free_pkt); -- 1.8.3.1
Michael S. Tsirkin
2018-Dec-12 15:31 UTC
[PATCH v2 3/5] VSOCK: support receive mergeable rx buffer in guest
On Wed, Dec 12, 2018 at 05:31:39PM +0800, jiangyiwen wrote:> Guest receive mergeable rx buffer, it can merge > scatter rx buffer into a big buffer and then copy > to user space. > > In addition, it also use iovec to replace buf in struct > virtio_vsock_pkt, keep tx and rx consistency. The only > difference is now tx still uses a segment of continuous > physical memory to implement. > > Signed-off-by: Yiwen Jiang <jiangyiwen at huawei.com> > --- > drivers/vhost/vsock.c | 31 +++++++--- > include/linux/virtio_vsock.h | 6 +- > net/vmw_vsock/virtio_transport.c | 105 ++++++++++++++++++++++++++++---- > net/vmw_vsock/virtio_transport_common.c | 59 ++++++++++++++---- > 4 files changed, 166 insertions(+), 35 deletions(-)This was supposed to be a guest patch, why is vhost changed here?> diff --git a/drivers/vhost/vsock.c b/drivers/vhost/vsock.c > index dc52b0f..c7ab0dd 100644 > --- a/drivers/vhost/vsock.c > +++ b/drivers/vhost/vsock.c > @@ -179,6 +179,8 @@ static int get_rx_bufs(struct vhost_virtqueue *vq, > size_t nbytes; > size_t len; > s16 headcount; > + size_t remain_len; > + int i; > > spin_lock_bh(&vsock->send_pkt_list_lock); > if (list_empty(&vsock->send_pkt_list)) { > @@ -221,11 +223,19 @@ static int get_rx_bufs(struct vhost_virtqueue *vq, > break; > } > > - nbytes = copy_to_iter(pkt->buf, pkt->len, &iov_iter); > - if (nbytes != pkt->len) { > - virtio_transport_free_pkt(pkt); > - vq_err(vq, "Faulted on copying pkt buf\n"); > - break; > + remain_len = pkt->len; > + for (i = 0; i < pkt->nr_vecs; i++) { > + int tmp_len; > + > + tmp_len = min(remain_len, pkt->vec[i].iov_len); > + nbytes = copy_to_iter(pkt->vec[i].iov_base, tmp_len, &iov_iter); > + if (nbytes != tmp_len) { > + virtio_transport_free_pkt(pkt); > + vq_err(vq, "Faulted on copying pkt buf\n"); > + break; > + } > + > + remain_len -= tmp_len; > } > > vhost_add_used_n(vq, vq->heads, headcount); > @@ -341,6 +351,7 @@ static void vhost_transport_send_pkt_work(struct vhost_work *work) > struct iov_iter iov_iter; > size_t nbytes; > size_t len; > + void *buf; > > if (in != 0) { > vq_err(vq, "Expected 0 input buffers, got %u\n", in); > @@ -375,13 +386,17 @@ static void vhost_transport_send_pkt_work(struct vhost_work *work) > return NULL; > } > > - pkt->buf = kmalloc(pkt->len, GFP_KERNEL); > - if (!pkt->buf) { > + buf = kmalloc(pkt->len, GFP_KERNEL); > + if (!buf) { > kfree(pkt); > return NULL; > } > > - nbytes = copy_from_iter(pkt->buf, pkt->len, &iov_iter); > + pkt->vec[0].iov_base = buf; > + pkt->vec[0].iov_len = pkt->len; > + pkt->nr_vecs = 1; > + > + nbytes = copy_from_iter(buf, pkt->len, &iov_iter); > if (nbytes != pkt->len) { > vq_err(vq, "Expected %u byte payload, got %zu bytes\n", > pkt->len, nbytes); > diff --git a/include/linux/virtio_vsock.h b/include/linux/virtio_vsock.h > index da9e1fe..734eeed 100644 > --- a/include/linux/virtio_vsock.h > +++ b/include/linux/virtio_vsock.h > @@ -13,6 +13,8 @@ > #define VIRTIO_VSOCK_DEFAULT_RX_BUF_SIZE (1024 * 4) > #define VIRTIO_VSOCK_MAX_BUF_SIZE 0xFFFFFFFFUL > #define VIRTIO_VSOCK_MAX_PKT_BUF_SIZE (1024 * 64) > +/* virtio_vsock_pkt + max_pkt_len(default MAX_PKT_BUF_SIZE) */ > +#define VIRTIO_VSOCK_MAX_VEC_NUM ((VIRTIO_VSOCK_MAX_PKT_BUF_SIZE / PAGE_SIZE) + 1) > > /* Virtio-vsock feature */ > #define VIRTIO_VSOCK_F_MRG_RXBUF 0 /* Host can merge receive buffers. */ > @@ -55,10 +57,12 @@ struct virtio_vsock_pkt { > struct list_head list; > /* socket refcnt not held, only use for cancellation */ > struct vsock_sock *vsk; > - void *buf; > + struct kvec vec[VIRTIO_VSOCK_MAX_VEC_NUM]; > + int nr_vecs; > u32 len; > u32 off; > bool reply; > + bool mergeable; > }; > > struct virtio_vsock_pkt_info { > diff --git a/net/vmw_vsock/virtio_transport.c b/net/vmw_vsock/virtio_transport.c > index c4a465c..148b58a 100644 > --- a/net/vmw_vsock/virtio_transport.c > +++ b/net/vmw_vsock/virtio_transport.c > @@ -155,8 +155,10 @@ static int virtio_transport_send_pkt_loopback(struct virtio_vsock *vsock, > > sg_init_one(&hdr, &pkt->hdr, sizeof(pkt->hdr)); > sgs[out_sg++] = &hdr; > - if (pkt->buf) { > - sg_init_one(&buf, pkt->buf, pkt->len); > + if (pkt->len) { > + /* Currently only support a segment of memory in tx */ > + BUG_ON(pkt->vec[0].iov_len != pkt->len); > + sg_init_one(&buf, pkt->vec[0].iov_base, pkt->vec[0].iov_len); > sgs[out_sg++] = &buf; > } > > @@ -304,23 +306,28 @@ static int fill_old_rx_buff(struct virtqueue *vq) > struct virtio_vsock_pkt *pkt; > struct scatterlist hdr, buf, *sgs[2]; > int ret; > + void *pkt_buf; > > pkt = kzalloc(sizeof(*pkt), GFP_KERNEL); > if (!pkt) > return -ENOMEM; > > - pkt->buf = kmalloc(buf_len, GFP_KERNEL); > - if (!pkt->buf) { > + pkt_buf = kmalloc(buf_len, GFP_KERNEL); > + if (!pkt_buf) { > virtio_transport_free_pkt(pkt); > return -ENOMEM; > } > > + pkt->vec[0].iov_base = pkt_buf; > + pkt->vec[0].iov_len = buf_len; > + pkt->nr_vecs = 1; > + > pkt->len = buf_len; > > sg_init_one(&hdr, &pkt->hdr, sizeof(pkt->hdr)); > sgs[0] = &hdr; > > - sg_init_one(&buf, pkt->buf, buf_len); > + sg_init_one(&buf, pkt->vec[0].iov_base, buf_len); > sgs[1] = &buf; > ret = virtqueue_add_sgs(vq, sgs, 0, 2, pkt, GFP_KERNEL); > if (ret) > @@ -388,11 +395,78 @@ static bool virtio_transport_more_replies(struct virtio_vsock *vsock) > return val < virtqueue_get_vring_size(vq); > } > > +static struct virtio_vsock_pkt *receive_mergeable(struct virtqueue *vq, > + struct virtio_vsock *vsock, unsigned int *total_len) > +{ > + struct virtio_vsock_pkt *pkt; > + u16 num_buf; > + void *buf; > + unsigned int len; > + size_t vsock_hlen = sizeof(struct virtio_vsock_pkt); > + > + buf = virtqueue_get_buf(vq, &len); > + if (!buf) > + return NULL; > + > + *total_len = len; > + vsock->rx_buf_nr--; > + > + if (unlikely(len < vsock_hlen)) { > + put_page(virt_to_head_page(buf)); > + return NULL; > + } > + > + pkt = buf; > + num_buf = le16_to_cpu(pkt->mrg_rxbuf_hdr.num_buffers); > + if (!num_buf || num_buf > VIRTIO_VSOCK_MAX_VEC_NUM) { > + put_page(virt_to_head_page(buf)); > + return NULL; > + }So everything just stops going, and host and user don't even know what the reason is. And not only that - the next packet will be corrupted because we skipped the first one.> + > + /* Initialize pkt residual structure */ > + memset(&pkt->work, 0, vsock_hlen - sizeof(struct virtio_vsock_hdr) - > + sizeof(struct virtio_vsock_mrg_rxbuf_hdr)); > + > + pkt->mergeable = true; > + pkt->len = le32_to_cpu(pkt->hdr.len); > + if (!pkt->len) > + return pkt; > + > + len -= vsock_hlen; > + if (len) { > + pkt->vec[pkt->nr_vecs].iov_base = buf + vsock_hlen; > + pkt->vec[pkt->nr_vecs].iov_len = len; > + /* Shared page with pkt, so get page in advance */ > + get_page(virt_to_head_page(buf)); > + pkt->nr_vecs++; > + } > + > + while (--num_buf) { > + buf = virtqueue_get_buf(vq, &len); > + if (!buf) > + goto err; > + > + *total_len += len; > + vsock->rx_buf_nr--; > + > + pkt->vec[pkt->nr_vecs].iov_base = buf; > + pkt->vec[pkt->nr_vecs].iov_len = len; > + pkt->nr_vecs++; > + } > + > + return pkt; > +err: > + virtio_transport_free_pkt(pkt); > + return NULL; > +} > + > static void virtio_transport_rx_work(struct work_struct *work) > { > struct virtio_vsock *vsock > container_of(work, struct virtio_vsock, rx_work); > struct virtqueue *vq; > + size_t vsock_hlen = vsock->mergeable ? sizeof(struct virtio_vsock_pkt) : > + sizeof(struct virtio_vsock_hdr); > > vq = vsock->vqs[VSOCK_VQ_RX]; > > @@ -412,21 +486,26 @@ static void virtio_transport_rx_work(struct work_struct *work) > goto out; > } > > - pkt = virtqueue_get_buf(vq, &len); > - if (!pkt) { > - break; > - } > + if (likely(vsock->mergeable)) { > + pkt = receive_mergeable(vq, vsock, &len); > + if (!pkt) > + break; > + } else { > + pkt = virtqueue_get_buf(vq, &len); > + if (!pkt) > + break; >So looking at it, this seems to be the main source of the gain. But why does this require host/guest changes? The way I see it: - get a buffer and create an skb - get the next one, check header matches, if yes tack it on the skb as a fragment. If not then don't, deliver previous one and queue the new one.> - vsock->rx_buf_nr--; > + vsock->rx_buf_nr--; > + } > > /* Drop short/long packets */ > - if (unlikely(len < sizeof(pkt->hdr) || > - len > sizeof(pkt->hdr) + pkt->len)) { > + if (unlikely(len < vsock_hlen || > + len > vsock_hlen + pkt->len)) { > virtio_transport_free_pkt(pkt); > continue; > } > > - pkt->len = len - sizeof(pkt->hdr); > + pkt->len = len - vsock_hlen; > virtio_transport_deliver_tap_pkt(pkt); > virtio_transport_recv_pkt(pkt); > } > diff --git a/net/vmw_vsock/virtio_transport_common.c b/net/vmw_vsock/virtio_transport_common.c > index 3ae3a33..123a8b6 100644 > --- a/net/vmw_vsock/virtio_transport_common.c > +++ b/net/vmw_vsock/virtio_transport_common.c > @@ -44,6 +44,7 @@ static const struct virtio_transport *virtio_transport_get_ops(void) > { > struct virtio_vsock_pkt *pkt; > int err; > + void *buf = NULL; > > pkt = kzalloc(sizeof(*pkt), GFP_KERNEL); > if (!pkt) > @@ -62,12 +63,16 @@ static const struct virtio_transport *virtio_transport_get_ops(void) > pkt->vsk = info->vsk; > > if (info->msg && len > 0) { > - pkt->buf = kmalloc(len, GFP_KERNEL); > - if (!pkt->buf) > + buf = kmalloc(len, GFP_KERNEL); > + if (!buf) > goto out_pkt; > - err = memcpy_from_msg(pkt->buf, info->msg, len); > + err = memcpy_from_msg(buf, info->msg, len); > if (err) > goto out; > + > + pkt->vec[0].iov_base = buf; > + pkt->vec[0].iov_len = len; > + pkt->nr_vecs = 1; > } > > trace_virtio_transport_alloc_pkt(src_cid, src_port, > @@ -80,7 +85,7 @@ static const struct virtio_transport *virtio_transport_get_ops(void) > return pkt; > > out: > - kfree(pkt->buf); > + kfree(buf); > out_pkt: > kfree(pkt); > return NULL; > @@ -92,6 +97,7 @@ static struct sk_buff *virtio_transport_build_skb(void *opaque) > struct virtio_vsock_pkt *pkt = opaque; > struct af_vsockmon_hdr *hdr; > struct sk_buff *skb; > + int i; > > skb = alloc_skb(sizeof(*hdr) + sizeof(pkt->hdr) + pkt->len, > GFP_ATOMIC); > @@ -134,7 +140,8 @@ static struct sk_buff *virtio_transport_build_skb(void *opaque) > skb_put_data(skb, &pkt->hdr, sizeof(pkt->hdr)); > > if (pkt->len) { > - skb_put_data(skb, pkt->buf, pkt->len); > + for (i = 0; i < pkt->nr_vecs; i++) > + skb_put_data(skb, pkt->vec[i].iov_base, pkt->vec[i].iov_len); > } > > return skb; > @@ -260,6 +267,9 @@ static int virtio_transport_send_credit_update(struct vsock_sock *vsk, > > spin_lock_bh(&vvs->rx_lock); > while (total < len && !list_empty(&vvs->rx_queue)) { > + size_t copy_bytes, last_vec_total = 0, vec_off; > + int i; > + > pkt = list_first_entry(&vvs->rx_queue, > struct virtio_vsock_pkt, list); > > @@ -272,14 +282,28 @@ static int virtio_transport_send_credit_update(struct vsock_sock *vsk, > */ > spin_unlock_bh(&vvs->rx_lock); > > - err = memcpy_to_msg(msg, pkt->buf + pkt->off, bytes); > - if (err) > - goto out; > + for (i = 0; i < pkt->nr_vecs; i++) { > + if (pkt->off > last_vec_total + pkt->vec[i].iov_len) { > + last_vec_total += pkt->vec[i].iov_len; > + continue; > + } > + > + vec_off = pkt->off - last_vec_total; > + copy_bytes = min(pkt->vec[i].iov_len - vec_off, bytes); > + err = memcpy_to_msg(msg, pkt->vec[i].iov_base + vec_off, > + copy_bytes); > + if (err) > + goto out; > + > + bytes -= copy_bytes; > + pkt->off += copy_bytes; > + total += copy_bytes; > + last_vec_total += pkt->vec[i].iov_len; > + if (!bytes) > + break; > + } > > spin_lock_bh(&vvs->rx_lock); > - > - total += bytes; > - pkt->off += bytes; > if (pkt->off == pkt->len) { > virtio_transport_dec_rx_pkt(vvs, pkt); > list_del(&pkt->list); > @@ -1050,8 +1074,17 @@ void virtio_transport_recv_pkt(struct virtio_vsock_pkt *pkt) > > void virtio_transport_free_pkt(struct virtio_vsock_pkt *pkt) > { > - kfree(pkt->buf); > - kfree(pkt); > + int i; > + > + if (pkt->mergeable) { > + for (i = 0; i < pkt->nr_vecs; i++) > + put_page(virt_to_head_page(pkt->vec[i].iov_base)); > + put_page(virt_to_head_page((void *)pkt)); > + } else { > + for (i = 0; i < pkt->nr_vecs; i++) > + kfree(pkt->vec[i].iov_base); > + kfree(pkt); > + } > } > EXPORT_SYMBOL_GPL(virtio_transport_free_pkt); > > -- > 1.8.3.1 >
jiangyiwen
2018-Dec-13 02:38 UTC
[PATCH v2 3/5] VSOCK: support receive mergeable rx buffer in guest
Hi Michael, On 2018/12/12 23:31, Michael S. Tsirkin wrote:> On Wed, Dec 12, 2018 at 05:31:39PM +0800, jiangyiwen wrote: >> Guest receive mergeable rx buffer, it can merge >> scatter rx buffer into a big buffer and then copy >> to user space. >> >> In addition, it also use iovec to replace buf in struct >> virtio_vsock_pkt, keep tx and rx consistency. The only >> difference is now tx still uses a segment of continuous >> physical memory to implement. >> >> Signed-off-by: Yiwen Jiang <jiangyiwen at huawei.com> >> --- >> drivers/vhost/vsock.c | 31 +++++++--- >> include/linux/virtio_vsock.h | 6 +- >> net/vmw_vsock/virtio_transport.c | 105 ++++++++++++++++++++++++++++---- >> net/vmw_vsock/virtio_transport_common.c | 59 ++++++++++++++---- >> 4 files changed, 166 insertions(+), 35 deletions(-) > > > This was supposed to be a guest patch, why is vhost changed here? >In mergeable rx buff cases, it need to scatter big packets into several buffers, so I add kvec variable in struct virtio_vsock_pkt, at the same time, in order to keep tx and rx consistency, I use kvec to replace variable buf, because vhost use the variable pkt->buf, so this patch caused vhost is changed.>> diff --git a/drivers/vhost/vsock.c b/drivers/vhost/vsock.c >> index dc52b0f..c7ab0dd 100644 >> --- a/drivers/vhost/vsock.c >> +++ b/drivers/vhost/vsock.c >> @@ -179,6 +179,8 @@ static int get_rx_bufs(struct vhost_virtqueue *vq, >> size_t nbytes; >> size_t len; >> s16 headcount; >> + size_t remain_len; >> + int i; >> >> spin_lock_bh(&vsock->send_pkt_list_lock); >> if (list_empty(&vsock->send_pkt_list)) { >> @@ -221,11 +223,19 @@ static int get_rx_bufs(struct vhost_virtqueue *vq, >> break; >> } >> >> - nbytes = copy_to_iter(pkt->buf, pkt->len, &iov_iter); >> - if (nbytes != pkt->len) { >> - virtio_transport_free_pkt(pkt); >> - vq_err(vq, "Faulted on copying pkt buf\n"); >> - break; >> + remain_len = pkt->len; >> + for (i = 0; i < pkt->nr_vecs; i++) { >> + int tmp_len; >> + >> + tmp_len = min(remain_len, pkt->vec[i].iov_len); >> + nbytes = copy_to_iter(pkt->vec[i].iov_base, tmp_len, &iov_iter); >> + if (nbytes != tmp_len) { >> + virtio_transport_free_pkt(pkt); >> + vq_err(vq, "Faulted on copying pkt buf\n"); >> + break; >> + } >> + >> + remain_len -= tmp_len; >> } >> >> vhost_add_used_n(vq, vq->heads, headcount); >> @@ -341,6 +351,7 @@ static void vhost_transport_send_pkt_work(struct vhost_work *work) >> struct iov_iter iov_iter; >> size_t nbytes; >> size_t len; >> + void *buf; >> >> if (in != 0) { >> vq_err(vq, "Expected 0 input buffers, got %u\n", in); >> @@ -375,13 +386,17 @@ static void vhost_transport_send_pkt_work(struct vhost_work *work) >> return NULL; >> } >> >> - pkt->buf = kmalloc(pkt->len, GFP_KERNEL); >> - if (!pkt->buf) { >> + buf = kmalloc(pkt->len, GFP_KERNEL); >> + if (!buf) { >> kfree(pkt); >> return NULL; >> } >> >> - nbytes = copy_from_iter(pkt->buf, pkt->len, &iov_iter); >> + pkt->vec[0].iov_base = buf; >> + pkt->vec[0].iov_len = pkt->len; >> + pkt->nr_vecs = 1; >> + >> + nbytes = copy_from_iter(buf, pkt->len, &iov_iter); >> if (nbytes != pkt->len) { >> vq_err(vq, "Expected %u byte payload, got %zu bytes\n", >> pkt->len, nbytes); >> diff --git a/include/linux/virtio_vsock.h b/include/linux/virtio_vsock.h >> index da9e1fe..734eeed 100644 >> --- a/include/linux/virtio_vsock.h >> +++ b/include/linux/virtio_vsock.h >> @@ -13,6 +13,8 @@ >> #define VIRTIO_VSOCK_DEFAULT_RX_BUF_SIZE (1024 * 4) >> #define VIRTIO_VSOCK_MAX_BUF_SIZE 0xFFFFFFFFUL >> #define VIRTIO_VSOCK_MAX_PKT_BUF_SIZE (1024 * 64) >> +/* virtio_vsock_pkt + max_pkt_len(default MAX_PKT_BUF_SIZE) */ >> +#define VIRTIO_VSOCK_MAX_VEC_NUM ((VIRTIO_VSOCK_MAX_PKT_BUF_SIZE / PAGE_SIZE) + 1) >> >> /* Virtio-vsock feature */ >> #define VIRTIO_VSOCK_F_MRG_RXBUF 0 /* Host can merge receive buffers. */ >> @@ -55,10 +57,12 @@ struct virtio_vsock_pkt { >> struct list_head list; >> /* socket refcnt not held, only use for cancellation */ >> struct vsock_sock *vsk; >> - void *buf; >> + struct kvec vec[VIRTIO_VSOCK_MAX_VEC_NUM]; >> + int nr_vecs; >> u32 len; >> u32 off; >> bool reply; >> + bool mergeable; >> }; >> >> struct virtio_vsock_pkt_info { >> diff --git a/net/vmw_vsock/virtio_transport.c b/net/vmw_vsock/virtio_transport.c >> index c4a465c..148b58a 100644 >> --- a/net/vmw_vsock/virtio_transport.c >> +++ b/net/vmw_vsock/virtio_transport.c >> @@ -155,8 +155,10 @@ static int virtio_transport_send_pkt_loopback(struct virtio_vsock *vsock, >> >> sg_init_one(&hdr, &pkt->hdr, sizeof(pkt->hdr)); >> sgs[out_sg++] = &hdr; >> - if (pkt->buf) { >> - sg_init_one(&buf, pkt->buf, pkt->len); >> + if (pkt->len) { >> + /* Currently only support a segment of memory in tx */ >> + BUG_ON(pkt->vec[0].iov_len != pkt->len); >> + sg_init_one(&buf, pkt->vec[0].iov_base, pkt->vec[0].iov_len); >> sgs[out_sg++] = &buf; >> } >> >> @@ -304,23 +306,28 @@ static int fill_old_rx_buff(struct virtqueue *vq) >> struct virtio_vsock_pkt *pkt; >> struct scatterlist hdr, buf, *sgs[2]; >> int ret; >> + void *pkt_buf; >> >> pkt = kzalloc(sizeof(*pkt), GFP_KERNEL); >> if (!pkt) >> return -ENOMEM; >> >> - pkt->buf = kmalloc(buf_len, GFP_KERNEL); >> - if (!pkt->buf) { >> + pkt_buf = kmalloc(buf_len, GFP_KERNEL); >> + if (!pkt_buf) { >> virtio_transport_free_pkt(pkt); >> return -ENOMEM; >> } >> >> + pkt->vec[0].iov_base = pkt_buf; >> + pkt->vec[0].iov_len = buf_len; >> + pkt->nr_vecs = 1; >> + >> pkt->len = buf_len; >> >> sg_init_one(&hdr, &pkt->hdr, sizeof(pkt->hdr)); >> sgs[0] = &hdr; >> >> - sg_init_one(&buf, pkt->buf, buf_len); >> + sg_init_one(&buf, pkt->vec[0].iov_base, buf_len); >> sgs[1] = &buf; >> ret = virtqueue_add_sgs(vq, sgs, 0, 2, pkt, GFP_KERNEL); >> if (ret) >> @@ -388,11 +395,78 @@ static bool virtio_transport_more_replies(struct virtio_vsock *vsock) >> return val < virtqueue_get_vring_size(vq); >> } >> >> +static struct virtio_vsock_pkt *receive_mergeable(struct virtqueue *vq, >> + struct virtio_vsock *vsock, unsigned int *total_len) >> +{ >> + struct virtio_vsock_pkt *pkt; >> + u16 num_buf; >> + void *buf; >> + unsigned int len; >> + size_t vsock_hlen = sizeof(struct virtio_vsock_pkt); >> + >> + buf = virtqueue_get_buf(vq, &len); >> + if (!buf) >> + return NULL; >> + >> + *total_len = len; >> + vsock->rx_buf_nr--; >> + >> + if (unlikely(len < vsock_hlen)) { >> + put_page(virt_to_head_page(buf)); >> + return NULL; >> + } >> + >> + pkt = buf; >> + num_buf = le16_to_cpu(pkt->mrg_rxbuf_hdr.num_buffers); >> + if (!num_buf || num_buf > VIRTIO_VSOCK_MAX_VEC_NUM) { >> + put_page(virt_to_head_page(buf)); >> + return NULL; >> + } > > So everything just stops going, and host and user don't even > know what the reason is. And not only that - the next > packet will be corrupted because we skipped the first one. > >I understand this case will not encountered unless the code has *BUG*, like Host send some problematic packages (shorten/longer than expected). In this case, I think we should ignore/drop these packets.> >> + >> + /* Initialize pkt residual structure */ >> + memset(&pkt->work, 0, vsock_hlen - sizeof(struct virtio_vsock_hdr) - >> + sizeof(struct virtio_vsock_mrg_rxbuf_hdr)); >> + >> + pkt->mergeable = true; >> + pkt->len = le32_to_cpu(pkt->hdr.len); >> + if (!pkt->len) >> + return pkt; >> + >> + len -= vsock_hlen; >> + if (len) { >> + pkt->vec[pkt->nr_vecs].iov_base = buf + vsock_hlen; >> + pkt->vec[pkt->nr_vecs].iov_len = len; >> + /* Shared page with pkt, so get page in advance */ >> + get_page(virt_to_head_page(buf)); >> + pkt->nr_vecs++; >> + } >> + >> + while (--num_buf) { >> + buf = virtqueue_get_buf(vq, &len); >> + if (!buf) >> + goto err; >> + >> + *total_len += len; >> + vsock->rx_buf_nr--; >> + >> + pkt->vec[pkt->nr_vecs].iov_base = buf; >> + pkt->vec[pkt->nr_vecs].iov_len = len; >> + pkt->nr_vecs++; >> + } >> + >> + return pkt; >> +err: >> + virtio_transport_free_pkt(pkt); >> + return NULL; >> +} >> + >> static void virtio_transport_rx_work(struct work_struct *work) >> { >> struct virtio_vsock *vsock >> container_of(work, struct virtio_vsock, rx_work); >> struct virtqueue *vq; >> + size_t vsock_hlen = vsock->mergeable ? sizeof(struct virtio_vsock_pkt) : >> + sizeof(struct virtio_vsock_hdr); >> >> vq = vsock->vqs[VSOCK_VQ_RX]; >> >> @@ -412,21 +486,26 @@ static void virtio_transport_rx_work(struct work_struct *work) >> goto out; >> } >> >> - pkt = virtqueue_get_buf(vq, &len); >> - if (!pkt) { >> - break; >> - } >> + if (likely(vsock->mergeable)) { >> + pkt = receive_mergeable(vq, vsock, &len); >> + if (!pkt) >> + break; >> + } else { >> + pkt = virtqueue_get_buf(vq, &len); >> + if (!pkt) >> + break; >> > > So looking at it, this seems to be the main source of the gain. > But why does this require host/guest changes? > > > The way I see it: > - get a buffer and create an skb > - get the next one, check header matches, if yes > tack it on the skb as a fragment. If not then > don't, deliver previous one and queue the new one. > >Vhost change reason I explain as above, and I hope use kvec to instead buf, after all buf only can express a contiguous physical memory. Thanks, Yiwen.> >> - vsock->rx_buf_nr--; >> + vsock->rx_buf_nr--; >> + } >> >> /* Drop short/long packets */ >> - if (unlikely(len < sizeof(pkt->hdr) || >> - len > sizeof(pkt->hdr) + pkt->len)) { >> + if (unlikely(len < vsock_hlen || >> + len > vsock_hlen + pkt->len)) { >> virtio_transport_free_pkt(pkt); >> continue; >> } >> >> - pkt->len = len - sizeof(pkt->hdr); >> + pkt->len = len - vsock_hlen; >> virtio_transport_deliver_tap_pkt(pkt); >> virtio_transport_recv_pkt(pkt); >> } >> diff --git a/net/vmw_vsock/virtio_transport_common.c b/net/vmw_vsock/virtio_transport_common.c >> index 3ae3a33..123a8b6 100644 >> --- a/net/vmw_vsock/virtio_transport_common.c >> +++ b/net/vmw_vsock/virtio_transport_common.c >> @@ -44,6 +44,7 @@ static const struct virtio_transport *virtio_transport_get_ops(void) >> { >> struct virtio_vsock_pkt *pkt; >> int err; >> + void *buf = NULL; >> >> pkt = kzalloc(sizeof(*pkt), GFP_KERNEL); >> if (!pkt) >> @@ -62,12 +63,16 @@ static const struct virtio_transport *virtio_transport_get_ops(void) >> pkt->vsk = info->vsk; >> >> if (info->msg && len > 0) { >> - pkt->buf = kmalloc(len, GFP_KERNEL); >> - if (!pkt->buf) >> + buf = kmalloc(len, GFP_KERNEL); >> + if (!buf) >> goto out_pkt; >> - err = memcpy_from_msg(pkt->buf, info->msg, len); >> + err = memcpy_from_msg(buf, info->msg, len); >> if (err) >> goto out; >> + >> + pkt->vec[0].iov_base = buf; >> + pkt->vec[0].iov_len = len; >> + pkt->nr_vecs = 1; >> } >> >> trace_virtio_transport_alloc_pkt(src_cid, src_port, >> @@ -80,7 +85,7 @@ static const struct virtio_transport *virtio_transport_get_ops(void) >> return pkt; >> >> out: >> - kfree(pkt->buf); >> + kfree(buf); >> out_pkt: >> kfree(pkt); >> return NULL; >> @@ -92,6 +97,7 @@ static struct sk_buff *virtio_transport_build_skb(void *opaque) >> struct virtio_vsock_pkt *pkt = opaque; >> struct af_vsockmon_hdr *hdr; >> struct sk_buff *skb; >> + int i; >> >> skb = alloc_skb(sizeof(*hdr) + sizeof(pkt->hdr) + pkt->len, >> GFP_ATOMIC); >> @@ -134,7 +140,8 @@ static struct sk_buff *virtio_transport_build_skb(void *opaque) >> skb_put_data(skb, &pkt->hdr, sizeof(pkt->hdr)); >> >> if (pkt->len) { >> - skb_put_data(skb, pkt->buf, pkt->len); >> + for (i = 0; i < pkt->nr_vecs; i++) >> + skb_put_data(skb, pkt->vec[i].iov_base, pkt->vec[i].iov_len); >> } >> >> return skb; >> @@ -260,6 +267,9 @@ static int virtio_transport_send_credit_update(struct vsock_sock *vsk, >> >> spin_lock_bh(&vvs->rx_lock); >> while (total < len && !list_empty(&vvs->rx_queue)) { >> + size_t copy_bytes, last_vec_total = 0, vec_off; >> + int i; >> + >> pkt = list_first_entry(&vvs->rx_queue, >> struct virtio_vsock_pkt, list); >> >> @@ -272,14 +282,28 @@ static int virtio_transport_send_credit_update(struct vsock_sock *vsk, >> */ >> spin_unlock_bh(&vvs->rx_lock); >> >> - err = memcpy_to_msg(msg, pkt->buf + pkt->off, bytes); >> - if (err) >> - goto out; >> + for (i = 0; i < pkt->nr_vecs; i++) { >> + if (pkt->off > last_vec_total + pkt->vec[i].iov_len) { >> + last_vec_total += pkt->vec[i].iov_len; >> + continue; >> + } >> + >> + vec_off = pkt->off - last_vec_total; >> + copy_bytes = min(pkt->vec[i].iov_len - vec_off, bytes); >> + err = memcpy_to_msg(msg, pkt->vec[i].iov_base + vec_off, >> + copy_bytes); >> + if (err) >> + goto out; >> + >> + bytes -= copy_bytes; >> + pkt->off += copy_bytes; >> + total += copy_bytes; >> + last_vec_total += pkt->vec[i].iov_len; >> + if (!bytes) >> + break; >> + } >> >> spin_lock_bh(&vvs->rx_lock); >> - >> - total += bytes; >> - pkt->off += bytes; >> if (pkt->off == pkt->len) { >> virtio_transport_dec_rx_pkt(vvs, pkt); >> list_del(&pkt->list); >> @@ -1050,8 +1074,17 @@ void virtio_transport_recv_pkt(struct virtio_vsock_pkt *pkt) >> >> void virtio_transport_free_pkt(struct virtio_vsock_pkt *pkt) >> { >> - kfree(pkt->buf); >> - kfree(pkt); >> + int i; >> + >> + if (pkt->mergeable) { >> + for (i = 0; i < pkt->nr_vecs; i++) >> + put_page(virt_to_head_page(pkt->vec[i].iov_base)); >> + put_page(virt_to_head_page((void *)pkt)); >> + } else { >> + for (i = 0; i < pkt->nr_vecs; i++) >> + kfree(pkt->vec[i].iov_base); >> + kfree(pkt); >> + } >> } >> EXPORT_SYMBOL_GPL(virtio_transport_free_pkt); >> >> -- >> 1.8.3.1 >> > > . >
Stefan Hajnoczi
2018-Dec-13 16:20 UTC
[PATCH v2 3/5] VSOCK: support receive mergeable rx buffer in guest
On Wed, Dec 12, 2018 at 05:31:39PM +0800, jiangyiwen wrote:> +static struct virtio_vsock_pkt *receive_mergeable(struct virtqueue *vq, > + struct virtio_vsock *vsock, unsigned int *total_len) > +{ > + struct virtio_vsock_pkt *pkt; > + u16 num_buf; > + void *buf; > + unsigned int len; > + size_t vsock_hlen = sizeof(struct virtio_vsock_pkt); > + > + buf = virtqueue_get_buf(vq, &len); > + if (!buf) > + return NULL; > + > + *total_len = len; > + vsock->rx_buf_nr--; > + > + if (unlikely(len < vsock_hlen)) { > + put_page(virt_to_head_page(buf)); > + return NULL; > + } > + > + pkt = buf; > + num_buf = le16_to_cpu(pkt->mrg_rxbuf_hdr.num_buffers); > + if (!num_buf || num_buf > VIRTIO_VSOCK_MAX_VEC_NUM) { > + put_page(virt_to_head_page(buf)); > + return NULL; > + } > + > + /* Initialize pkt residual structure */ > + memset(&pkt->work, 0, vsock_hlen - sizeof(struct virtio_vsock_hdr) - > + sizeof(struct virtio_vsock_mrg_rxbuf_hdr));struct virtio_vsock_pkt is an internal driver state structure. It must be able to change without breaking the host<->guest device interface. Exposing struct virtio_vsock_pkt across the device interface makes this impossible. One way to solve this is by placing a header length field at the beginning of the mergeable buffer. That way the device knows at which offset the payload should be placed and the driver can continue to use the allocation scheme you've chosen (where a single page contains the virtio_vsock_pkt and payload). Another issue is that virtio requires exclusive read OR write descriptors. You cannot have a descriptor that is both read and write. So there's not a huge advantage to combining the driver state struct and payload, except maybe the driver can save a memory allocation. -------------- next part -------------- A non-text attachment was scrubbed... Name: signature.asc Type: application/pgp-signature Size: 455 bytes Desc: not available URL: <http://lists.linuxfoundation.org/pipermail/virtualization/attachments/20181213/837b0845/attachment.sig>
jiangyiwen
2018-Dec-14 08:41 UTC
[PATCH v2 3/5] VSOCK: support receive mergeable rx buffer in guest
On 2018/12/14 0:20, Stefan Hajnoczi wrote:> On Wed, Dec 12, 2018 at 05:31:39PM +0800, jiangyiwen wrote: >> +static struct virtio_vsock_pkt *receive_mergeable(struct virtqueue *vq, >> + struct virtio_vsock *vsock, unsigned int *total_len) >> +{ >> + struct virtio_vsock_pkt *pkt; >> + u16 num_buf; >> + void *buf; >> + unsigned int len; >> + size_t vsock_hlen = sizeof(struct virtio_vsock_pkt); >> + >> + buf = virtqueue_get_buf(vq, &len); >> + if (!buf) >> + return NULL; >> + >> + *total_len = len; >> + vsock->rx_buf_nr--; >> + >> + if (unlikely(len < vsock_hlen)) { >> + put_page(virt_to_head_page(buf)); >> + return NULL; >> + } >> + >> + pkt = buf; >> + num_buf = le16_to_cpu(pkt->mrg_rxbuf_hdr.num_buffers); >> + if (!num_buf || num_buf > VIRTIO_VSOCK_MAX_VEC_NUM) { >> + put_page(virt_to_head_page(buf)); >> + return NULL; >> + } >> + >> + /* Initialize pkt residual structure */ >> + memset(&pkt->work, 0, vsock_hlen - sizeof(struct virtio_vsock_hdr) - >> + sizeof(struct virtio_vsock_mrg_rxbuf_hdr)); > > struct virtio_vsock_pkt is an internal driver state structure. It must > be able to change without breaking the host<->guest device interface. > Exposing struct virtio_vsock_pkt across the device interface makes this > impossible. > > One way to solve this is by placing a header length field at the > beginning of the mergeable buffer. That way the device knows at which > offset the payload should be placed and the driver can continue to use > the allocation scheme you've chosen (where a single page contains the > virtio_vsock_pkt and payload). > > Another issue is that virtio requires exclusive read OR write > descriptors. You cannot have a descriptor that is both read and write. > So there's not a huge advantage to combining the driver state struct and > payload, except maybe the driver can save a memory allocation. >Right, I will separate pkt and payload in different pages, as you mentioned in another patch. Thanks, Yiwen.
Reasonably Related Threads
- [PATCH v2 3/5] VSOCK: support receive mergeable rx buffer in guest
- [PATCH v2 3/5] VSOCK: support receive mergeable rx buffer in guest
- [PATCH v2 3/5] VSOCK: support receive mergeable rx buffer in guest
- [PATCH v2 3/5] VSOCK: support receive mergeable rx buffer in guest
- [PATCH 3/5] VSOCK: support receive mergeable rx buffer in guest