David Riley
2019-Sep-05  22:00 UTC
[PATCH v2] drm/virtio: Use vmalloc for command buffer allocations.
Userspace requested command buffer allocations could be too large
to make as a contiguous allocation.  Use vmalloc if necessary to
satisfy those allocations.
Signed-off-by: David Riley <davidriley at chromium.org>
---
 drivers/gpu/drm/virtio/virtgpu_ioctl.c |   4 +-
 drivers/gpu/drm/virtio/virtgpu_vq.c    | 114 ++++++++++++++++++++-----
 2 files changed, 96 insertions(+), 22 deletions(-)
diff --git a/drivers/gpu/drm/virtio/virtgpu_ioctl.c
b/drivers/gpu/drm/virtio/virtgpu_ioctl.c
index ac60be9b5c19..a8732a8af766 100644
--- a/drivers/gpu/drm/virtio/virtgpu_ioctl.c
+++ b/drivers/gpu/drm/virtio/virtgpu_ioctl.c
@@ -195,7 +195,7 @@ static int virtio_gpu_execbuffer_ioctl(struct drm_device
*dev, void *data,
 	if (ret)
 		goto out_free;
 
-	buf = memdup_user(u64_to_user_ptr(exbuf->command), exbuf->size);
+	buf = vmemdup_user(u64_to_user_ptr(exbuf->command), exbuf->size);
 	if (IS_ERR(buf)) {
 		ret = PTR_ERR(buf);
 		goto out_unresv;
@@ -230,7 +230,7 @@ static int virtio_gpu_execbuffer_ioctl(struct drm_device
*dev, void *data,
 	return 0;
 
 out_memdup:
-	kfree(buf);
+	kvfree(buf);
 out_unresv:
 	ttm_eu_backoff_reservation(&ticket, &validate_list);
 out_free:
diff --git a/drivers/gpu/drm/virtio/virtgpu_vq.c
b/drivers/gpu/drm/virtio/virtgpu_vq.c
index 981ee16e3ee9..3ec89ae8478c 100644
--- a/drivers/gpu/drm/virtio/virtgpu_vq.c
+++ b/drivers/gpu/drm/virtio/virtgpu_vq.c
@@ -154,7 +154,7 @@ static void free_vbuf(struct virtio_gpu_device *vgdev,
 {
 	if (vbuf->resp_size > MAX_INLINE_RESP_SIZE)
 		kfree(vbuf->resp_buf);
-	kfree(vbuf->data_buf);
+	kvfree(vbuf->data_buf);
 	kmem_cache_free(vgdev->vbufs, vbuf);
 }
 
@@ -251,13 +251,70 @@ void virtio_gpu_dequeue_cursor_func(struct work_struct
*work)
 	wake_up(&vgdev->cursorq.ack_queue);
 }
 
+/* How many bytes left in this page. */
+static unsigned int rest_of_page(void *data)
+{
+	return PAGE_SIZE - offset_in_page(data);
+}
+
+/* Create sg_table from a vmalloc'd buffer. */
+static struct sg_table *vmalloc_to_sgt(char *data, uint32_t size, int *sg_ents)
+{
+	int nents, ret, s, i;
+	struct sg_table *sgt;
+	struct scatterlist *sg;
+	struct page *pg;
+
+	*sg_ents = 0;
+
+	sgt = kmalloc(sizeof(*sgt), GFP_KERNEL);
+	if (!sgt)
+		return NULL;
+
+	nents = DIV_ROUND_UP(size, PAGE_SIZE) + 1;
+	ret = sg_alloc_table(sgt, nents, GFP_KERNEL);
+	if (ret) {
+		kfree(sgt);
+		return NULL;
+	}
+
+	for_each_sg(sgt->sgl, sg, nents, i) {
+		pg = vmalloc_to_page(data);
+		if (!pg) {
+			sg_free_table(sgt);
+			kfree(sgt);
+			return NULL;
+		}
+
+		s = rest_of_page(data);
+		if (s > size)
+			s = size;
+
+		sg_set_page(sg, pg, s, offset_in_page(data));
+
+		size -= s;
+		data += s;
+		*sg_ents += 1;
+
+		if (size) {
+			sg_unmark_end(sg);
+		} else {
+			sg_mark_end(sg);
+			break;
+		}
+	}
+
+	return sgt;
+}
+
 static int virtio_gpu_queue_ctrl_buffer_locked(struct virtio_gpu_device *vgdev,
-					       struct virtio_gpu_vbuffer *vbuf)
+					       struct virtio_gpu_vbuffer *vbuf,
+					       struct scatterlist *vout)
 		__releases(&vgdev->ctrlq.qlock)
 		__acquires(&vgdev->ctrlq.qlock)
 {
 	struct virtqueue *vq = vgdev->ctrlq.vq;
-	struct scatterlist *sgs[3], vcmd, vout, vresp;
+	struct scatterlist *sgs[3], vcmd, vresp;
 	int outcnt = 0, incnt = 0;
 	int ret;
 
@@ -268,9 +325,8 @@ static int virtio_gpu_queue_ctrl_buffer_locked(struct
virtio_gpu_device *vgdev,
 	sgs[outcnt + incnt] = &vcmd;
 	outcnt++;
 
-	if (vbuf->data_size) {
-		sg_init_one(&vout, vbuf->data_buf, vbuf->data_size);
-		sgs[outcnt + incnt] = &vout;
+	if (vout) {
+		sgs[outcnt + incnt] = vout;
 		outcnt++;
 	}
 
@@ -299,24 +355,30 @@ static int virtio_gpu_queue_ctrl_buffer_locked(struct
virtio_gpu_device *vgdev,
 	return ret;
 }
 
-static int virtio_gpu_queue_ctrl_buffer(struct virtio_gpu_device *vgdev,
-					struct virtio_gpu_vbuffer *vbuf)
-{
-	int rc;
-
-	spin_lock(&vgdev->ctrlq.qlock);
-	rc = virtio_gpu_queue_ctrl_buffer_locked(vgdev, vbuf);
-	spin_unlock(&vgdev->ctrlq.qlock);
-	return rc;
-}
-
 static int virtio_gpu_queue_fenced_ctrl_buffer(struct virtio_gpu_device *vgdev,
 					       struct virtio_gpu_vbuffer *vbuf,
 					       struct virtio_gpu_ctrl_hdr *hdr,
 					       struct virtio_gpu_fence *fence)
 {
 	struct virtqueue *vq = vgdev->ctrlq.vq;
+	struct scatterlist *vout = NULL, sg;
+	struct sg_table *sgt = NULL;
 	int rc;
+	int outcnt = 0;
+
+	if (vbuf->data_size) {
+		if (is_vmalloc_addr(vbuf->data_buf)) {
+			sgt = vmalloc_to_sgt(vbuf->data_buf, vbuf->data_size,
+					     &outcnt);
+			if (!sgt)
+				return -ENOMEM;
+			vout = sgt->sgl;
+		} else {
+			sg_init_one(&sg, vbuf->data_buf, vbuf->data_size);
+			vout = &sg;
+			outcnt = 1;
+		}
+	}
 
 again:
 	spin_lock(&vgdev->ctrlq.qlock);
@@ -329,19 +391,31 @@ static int virtio_gpu_queue_fenced_ctrl_buffer(struct
virtio_gpu_device *vgdev,
 	 * to wait for free space, which can result in fence ids being
 	 * submitted out-of-order.
 	 */
-	if (vq->num_free < 3) {
+	if (vq->num_free < 2 + outcnt) {
 		spin_unlock(&vgdev->ctrlq.qlock);
 		wait_event(vgdev->ctrlq.ack_queue, vq->num_free >= 3);
 		goto again;
 	}
 
-	if (fence)
+	if (hdr && fence)
 		virtio_gpu_fence_emit(vgdev, hdr, fence);
-	rc = virtio_gpu_queue_ctrl_buffer_locked(vgdev, vbuf);
+	rc = virtio_gpu_queue_ctrl_buffer_locked(vgdev, vbuf, vout);
 	spin_unlock(&vgdev->ctrlq.qlock);
+
+	if (sgt) {
+		sg_free_table(sgt);
+		kfree(sgt);
+	}
+
 	return rc;
 }
 
+static int virtio_gpu_queue_ctrl_buffer(struct virtio_gpu_device *vgdev,
+					struct virtio_gpu_vbuffer *vbuf)
+{
+	return virtio_gpu_queue_fenced_ctrl_buffer(vgdev, vbuf, NULL, NULL);
+}
+
 static int virtio_gpu_queue_cursor(struct virtio_gpu_device *vgdev,
 				   struct virtio_gpu_vbuffer *vbuf)
 {
-- 
2.23.0.187.g17f5b7556c-goog
Gerd Hoffmann
2019-Sep-06  05:18 UTC
[PATCH v2] drm/virtio: Use vmalloc for command buffer allocations.
> +/* How many bytes left in this page. */ > +static unsigned int rest_of_page(void *data) > +{ > + return PAGE_SIZE - offset_in_page(data); > +}Not needed.> +/* Create sg_table from a vmalloc'd buffer. */ > +static struct sg_table *vmalloc_to_sgt(char *data, uint32_t size, int *sg_ents) > +{ > + int nents, ret, s, i; > + struct sg_table *sgt; > + struct scatterlist *sg; > + struct page *pg; > + > + *sg_ents = 0; > + > + sgt = kmalloc(sizeof(*sgt), GFP_KERNEL); > + if (!sgt) > + return NULL; > + > + nents = DIV_ROUND_UP(size, PAGE_SIZE) + 1;Why +1?> + ret = sg_alloc_table(sgt, nents, GFP_KERNEL); > + if (ret) { > + kfree(sgt); > + return NULL; > + } > + > + for_each_sg(sgt->sgl, sg, nents, i) { > + pg = vmalloc_to_page(data); > + if (!pg) { > + sg_free_table(sgt); > + kfree(sgt); > + return NULL; > + } > + > + s = rest_of_page(data); > + if (s > size) > + s = size;vmalloc memory is page aligned, so: s = min(PAGE_SIZE, size);> + sg_set_page(sg, pg, s, offset_in_page(data));Offset is always zero.> + > + size -= s; > + data += s; > + *sg_ents += 1;sg_ents isn't used anywhere.> + > + if (size) { > + sg_unmark_end(sg); > + } else { > + sg_mark_end(sg); > + break; > + }That looks a bit strange. I guess you need only one of the two because the other is the default?> static int virtio_gpu_queue_fenced_ctrl_buffer(struct virtio_gpu_device *vgdev, > struct virtio_gpu_vbuffer *vbuf, > struct virtio_gpu_ctrl_hdr *hdr, > struct virtio_gpu_fence *fence) > { > struct virtqueue *vq = vgdev->ctrlq.vq; > + struct scatterlist *vout = NULL, sg; > + struct sg_table *sgt = NULL; > int rc; > + int outcnt = 0; > + > + if (vbuf->data_size) { > + if (is_vmalloc_addr(vbuf->data_buf)) { > + sgt = vmalloc_to_sgt(vbuf->data_buf, vbuf->data_size, > + &outcnt); > + if (!sgt) > + return -ENOMEM; > + vout = sgt->sgl; > + } else { > + sg_init_one(&sg, vbuf->data_buf, vbuf->data_size); > + vout = &sg; > + outcnt = 1;outcnt must be set in both cases.> +static int virtio_gpu_queue_ctrl_buffer(struct virtio_gpu_device *vgdev, > + struct virtio_gpu_vbuffer *vbuf) > +{ > + return virtio_gpu_queue_fenced_ctrl_buffer(vgdev, vbuf, NULL, NULL); > +}Changing virtio_gpu_queue_ctrl_buffer to call virtio_gpu_queue_fenced_ctrl_buffer should be done in a separate patch. cheers, Gerd
David Riley
2019-Sep-09  17:12 UTC
[PATCH v2] drm/virtio: Use vmalloc for command buffer allocations.
On Thu, Sep 5, 2019 at 10:18 PM Gerd Hoffmann <kraxel at redhat.com> wrote:> > > +/* How many bytes left in this page. */ > > +static unsigned int rest_of_page(void *data) > > +{ > > + return PAGE_SIZE - offset_in_page(data); > > +} > > Not needed. > > > +/* Create sg_table from a vmalloc'd buffer. */ > > +static struct sg_table *vmalloc_to_sgt(char *data, uint32_t size, int *sg_ents) > > +{ > > + int nents, ret, s, i; > > + struct sg_table *sgt; > > + struct scatterlist *sg; > > + struct page *pg; > > + > > + *sg_ents = 0; > > + > > + sgt = kmalloc(sizeof(*sgt), GFP_KERNEL); > > + if (!sgt) > > + return NULL; > > + > > + nents = DIV_ROUND_UP(size, PAGE_SIZE) + 1; > > Why +1?This is part of handling offsets within the vmalloc buffer and to maintain parity with the !is_vmalloc_addr/existing case (sg_init_one handles offsets within pages internally). I had left it in because this is being used for all sg/descriptor generation and I wasn't sure if someone in the future might do something like: buf = vmemdup_user() offset = find_interesting(buf) queue(buf + offset) To respond specifically to your question, if we handle offsets, a vmalloc_to_sgt(size = PAGE_SIZE + 2) could end up with 3 sg_ents with the +1 being to account for that extra page. I'll just remove all support for offsets in v3 of the patch and comment that functionality will be different based on where the buffer was originally allocated from.> > > + ret = sg_alloc_table(sgt, nents, GFP_KERNEL); > > + if (ret) { > > + kfree(sgt); > > + return NULL; > > + } > > + > > + for_each_sg(sgt->sgl, sg, nents, i) { > > + pg = vmalloc_to_page(data); > > + if (!pg) { > > + sg_free_table(sgt); > > + kfree(sgt); > > + return NULL; > > + } > > + > > + s = rest_of_page(data); > > + if (s > size) > > + s = size; > > vmalloc memory is page aligned, so:As per above, will remove with v3.> > s = min(PAGE_SIZE, size); > > > + sg_set_page(sg, pg, s, offset_in_page(data)); > > Offset is always zero.As per above, will remove with v3.> > > + > > + size -= s; > > + data += s; > > + *sg_ents += 1; > > sg_ents isn't used anywhere.It's used for outcnt.> > > + > > + if (size) { > > + sg_unmark_end(sg); > > + } else { > > + sg_mark_end(sg); > > + break; > > + } > > That looks a bit strange. I guess you need only one of the two because > the other is the default?I was being overly paranoid and not wanting to make assumptions about the initial state of the table. I'll simplify.> > > static int virtio_gpu_queue_fenced_ctrl_buffer(struct virtio_gpu_device *vgdev, > > struct virtio_gpu_vbuffer *vbuf, > > struct virtio_gpu_ctrl_hdr *hdr, > > struct virtio_gpu_fence *fence) > > { > > struct virtqueue *vq = vgdev->ctrlq.vq; > > + struct scatterlist *vout = NULL, sg; > > + struct sg_table *sgt = NULL; > > int rc; > > + int outcnt = 0; > > + > > + if (vbuf->data_size) { > > + if (is_vmalloc_addr(vbuf->data_buf)) { > > + sgt = vmalloc_to_sgt(vbuf->data_buf, vbuf->data_size, > > + &outcnt); > > + if (!sgt) > > + return -ENOMEM; > > + vout = sgt->sgl; > > + } else { > > + sg_init_one(&sg, vbuf->data_buf, vbuf->data_size); > > + vout = &sg; > > + outcnt = 1; > > outcnt must be set in both cases.outcnt is set by vmalloc_to_sgt.> > > +static int virtio_gpu_queue_ctrl_buffer(struct virtio_gpu_device *vgdev, > > + struct virtio_gpu_vbuffer *vbuf) > > +{ > > + return virtio_gpu_queue_fenced_ctrl_buffer(vgdev, vbuf, NULL, NULL); > > +} > > Changing virtio_gpu_queue_ctrl_buffer to call > virtio_gpu_queue_fenced_ctrl_buffer should be done in a separate patch.Will do. Thanks, David
Possibly Parallel Threads
- [PATCH v2] drm/virtio: Use vmalloc for command buffer allocations.
- [PATCH v2] drm/virtio: Use vmalloc for command buffer allocations.
- [PATCH v3 2/2] drm/virtio: Use vmalloc for command buffer allocations.
- [PATCH v4 2/2] drm/virtio: Use vmalloc for command buffer allocations.
- [PATCH 2/3] drm/virtio: return virtio_gpu_queue errors