Hi all: The code used to busy poll for cvq command which turns out to have several side effects: 1) infinite poll for buggy devices 2) bad interaction with scheduler So this series tries to use sleep + timeout instead of busy polling. Please review. Thanks Changes since RFC: - switch to use BAD_RING in virtio_break_device() - check virtqueue_is_broken() after being woken up - use more_used() instead of virtqueue_get_buf() to allow caller to get buffers afterwards - break the virtio-net device when timeout - get buffer manually since the virtio core check more_used() instead Jason Wang (4): virtio-net: convert rx mode setting to use workqueue virtio_ring: switch to use BAD_RING() virtio_ring: introduce a per virtqueue waitqueue virtio-net: sleep instead of busy waiting for cvq command drivers/net/virtio_net.c | 90 +++++++++++++++++++++++++++++++----- drivers/virtio/virtio_ring.c | 37 +++++++++++++-- include/linux/virtio.h | 3 ++ 3 files changed, 115 insertions(+), 15 deletions(-) -- 2.25.1
Jason Wang
2022-Dec-26 07:49 UTC
[PATCH 1/4] virtio-net: convert rx mode setting to use workqueue
This patch convert rx mode setting to be done in a workqueue, this is a must for allow to sleep when waiting for the cvq command to response since current code is executed under addr spin lock. Signed-off-by: Jason Wang <jasowang at redhat.com> --- drivers/net/virtio_net.c | 66 ++++++++++++++++++++++++++++++++++++++-- 1 file changed, 63 insertions(+), 3 deletions(-) diff --git a/drivers/net/virtio_net.c b/drivers/net/virtio_net.c index 86e52454b5b5..efd9dd55828b 100644 --- a/drivers/net/virtio_net.c +++ b/drivers/net/virtio_net.c @@ -260,6 +260,15 @@ struct virtnet_info { /* Work struct for config space updates */ struct work_struct config_work; + /* Work struct for config rx mode */ + struct work_struct rx_mode_work; + + /* Is rx_mode_work enabled? */ + bool rx_mode_work_enabled; + + /* The lock to synchronize the access to refill_enabled */ + spinlock_t rx_mode_lock; + /* Does the affinity hint is set for virtqueues? */ bool affinity_hint_set; @@ -383,6 +392,22 @@ static void disable_delayed_refill(struct virtnet_info *vi) spin_unlock_bh(&vi->refill_lock); } +static void enable_rx_mode_work(struct virtnet_info *vi) +{ + spin_lock_bh(&vi->rx_mode_lock); + vi->rx_mode_work_enabled = true; + spin_unlock_bh(&vi->rx_mode_lock); +} + +static void disable_rx_mode_work(struct virtnet_info *vi) +{ + spin_lock_bh(&vi->rx_mode_lock); + vi->rx_mode_work_enabled = false; + spin_unlock_bh(&vi->rx_mode_lock); + + flush_work(&vi->rx_mode_work); +} + static void virtqueue_napi_schedule(struct napi_struct *napi, struct virtqueue *vq) { @@ -1974,6 +1999,8 @@ static bool virtnet_send_command(struct virtnet_info *vi, u8 class, u8 cmd, /* Caller should know better */ BUG_ON(!virtio_has_feature(vi->vdev, VIRTIO_NET_F_CTRL_VQ)); + ASSERT_RTNL(); + vi->ctrl->status = ~0; vi->ctrl->hdr.class = class; vi->ctrl->hdr.cmd = cmd; @@ -2160,9 +2187,11 @@ static int virtnet_close(struct net_device *dev) return 0; } -static void virtnet_set_rx_mode(struct net_device *dev) +static void virtnet_rx_mode_work(struct work_struct *work) { - struct virtnet_info *vi = netdev_priv(dev); + struct virtnet_info *vi + container_of(work, struct virtnet_info, rx_mode_work); + struct net_device *dev = vi->dev; struct scatterlist sg[2]; struct virtio_net_ctrl_mac *mac_data; struct netdev_hw_addr *ha; @@ -2175,8 +2204,12 @@ static void virtnet_set_rx_mode(struct net_device *dev) if (!virtio_has_feature(vi->vdev, VIRTIO_NET_F_CTRL_RX)) return; + rtnl_lock(); + + netif_addr_lock_bh(dev); vi->ctrl->promisc = ((dev->flags & IFF_PROMISC) != 0); vi->ctrl->allmulti = ((dev->flags & IFF_ALLMULTI) != 0); + netif_addr_unlock_bh(dev); sg_init_one(sg, &vi->ctrl->promisc, sizeof(vi->ctrl->promisc)); @@ -2192,14 +2225,19 @@ static void virtnet_set_rx_mode(struct net_device *dev) dev_warn(&dev->dev, "Failed to %sable allmulti mode.\n", vi->ctrl->allmulti ? "en" : "dis"); + netif_addr_lock_bh(dev); + uc_count = netdev_uc_count(dev); mc_count = netdev_mc_count(dev); /* MAC filter - use one buffer for both lists */ buf = kzalloc(((uc_count + mc_count) * ETH_ALEN) + (2 * sizeof(mac_data->entries)), GFP_ATOMIC); mac_data = buf; - if (!buf) + if (!buf) { + netif_addr_unlock_bh(dev); + rtnl_unlock(); return; + } sg_init_table(sg, 2); @@ -2220,6 +2258,8 @@ static void virtnet_set_rx_mode(struct net_device *dev) netdev_for_each_mc_addr(ha, dev) memcpy(&mac_data->macs[i++][0], ha->addr, ETH_ALEN); + netif_addr_unlock_bh(dev); + sg_set_buf(&sg[1], mac_data, sizeof(mac_data->entries) + (mc_count * ETH_ALEN)); @@ -2227,9 +2267,21 @@ static void virtnet_set_rx_mode(struct net_device *dev) VIRTIO_NET_CTRL_MAC_TABLE_SET, sg)) dev_warn(&dev->dev, "Failed to set MAC filter table.\n"); + rtnl_unlock(); + kfree(buf); } +static void virtnet_set_rx_mode(struct net_device *dev) +{ + struct virtnet_info *vi = netdev_priv(dev); + + spin_lock(&vi->rx_mode_lock); + if (vi->rx_mode_work_enabled) + schedule_work(&vi->rx_mode_work); + spin_unlock(&vi->rx_mode_lock); +} + static int virtnet_vlan_rx_add_vid(struct net_device *dev, __be16 proto, u16 vid) { @@ -3000,6 +3052,7 @@ static void virtnet_freeze_down(struct virtio_device *vdev) /* Make sure no work handler is accessing the device */ flush_work(&vi->config_work); + disable_rx_mode_work(vi); netif_tx_lock_bh(vi->dev); netif_device_detach(vi->dev); @@ -3022,6 +3075,8 @@ static int virtnet_restore_up(struct virtio_device *vdev) virtio_device_ready(vdev); enable_delayed_refill(vi); + enable_rx_mode_work(vi); + virtnet_set_rx_mode(vi->dev); if (netif_running(vi->dev)) { err = virtnet_open(vi->dev); @@ -3799,7 +3854,9 @@ static int virtnet_probe(struct virtio_device *vdev) vdev->priv = vi; INIT_WORK(&vi->config_work, virtnet_config_changed_work); + INIT_WORK(&vi->rx_mode_work, virtnet_rx_mode_work); spin_lock_init(&vi->refill_lock); + spin_lock_init(&vi->rx_mode_lock); if (virtio_has_feature(vdev, VIRTIO_NET_F_MRG_RXBUF)) vi->mergeable_rx_bufs = true; @@ -3905,6 +3962,8 @@ static int virtnet_probe(struct virtio_device *vdev) if (vi->has_rss || vi->has_rss_hash_report) virtnet_init_default_rss(vi); + enable_rx_mode_work(vi); + /* serialize netdev register + virtio_device_ready() with ndo_open() */ rtnl_lock(); @@ -3984,6 +4043,7 @@ static void virtnet_remove(struct virtio_device *vdev) /* Make sure no work handler is accessing the device. */ flush_work(&vi->config_work); + disable_rx_mode_work(vi); unregister_netdev(vi->dev); -- 2.25.1
Switch to reuse BAD_RING() to allow common logic to be implemented in BAD_RING(). Signed-off-by: Jason Wang <jasowang at redhat.com> --- Changes since V1: - switch to use BAD_RING in virtio_break_device() --- drivers/virtio/virtio_ring.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/drivers/virtio/virtio_ring.c b/drivers/virtio/virtio_ring.c index 2e7689bb933b..5cfb2fa8abee 100644 --- a/drivers/virtio/virtio_ring.c +++ b/drivers/virtio/virtio_ring.c @@ -58,7 +58,8 @@ do { \ dev_err(&_vq->vq.vdev->dev, \ "%s:"fmt, (_vq)->vq.name, ##args); \ - (_vq)->broken = true; \ + /* Pairs with READ_ONCE() in virtqueue_is_broken(). */ \ + WRITE_ONCE((_vq)->broken, true); \ } while (0) #define START_USE(vq) #define END_USE(vq) @@ -2237,7 +2238,7 @@ bool virtqueue_notify(struct virtqueue *_vq) /* Prod other side to tell it about changes. */ if (!vq->notify(_vq)) { - vq->broken = true; + BAD_RING(vq, "vq %d is broken\n", vq->vq.index); return false; } return true; @@ -2786,8 +2787,7 @@ void virtio_break_device(struct virtio_device *dev) list_for_each_entry(_vq, &dev->vqs, list) { struct vring_virtqueue *vq = to_vvq(_vq); - /* Pairs with READ_ONCE() in virtqueue_is_broken(). */ - WRITE_ONCE(vq->broken, true); + BAD_RING(vq, "Device break vq %d", _vq->index); } spin_unlock(&dev->vqs_list_lock); } -- 2.25.1
Jason Wang
2022-Dec-26 07:49 UTC
[PATCH 3/4] virtio_ring: introduce a per virtqueue waitqueue
This patch introduces a per virtqueue waitqueue to allow driver to sleep and wait for more used. Two new helpers are introduced to allow driver to sleep and wake up. Signed-off-by: Jason Wang <jasowang at redhat.com> --- Changes since V1: - check virtqueue_is_broken() as well - use more_used() instead of virtqueue_get_buf() to allow caller to get buffers afterwards --- drivers/virtio/virtio_ring.c | 29 +++++++++++++++++++++++++++++ include/linux/virtio.h | 3 +++ 2 files changed, 32 insertions(+) diff --git a/drivers/virtio/virtio_ring.c b/drivers/virtio/virtio_ring.c index 5cfb2fa8abee..9c83eb945493 100644 --- a/drivers/virtio/virtio_ring.c +++ b/drivers/virtio/virtio_ring.c @@ -13,6 +13,7 @@ #include <linux/dma-mapping.h> #include <linux/kmsan.h> #include <linux/spinlock.h> +#include <linux/wait.h> #include <xen/xen.h> #ifdef DEBUG @@ -60,6 +61,7 @@ "%s:"fmt, (_vq)->vq.name, ##args); \ /* Pairs with READ_ONCE() in virtqueue_is_broken(). */ \ WRITE_ONCE((_vq)->broken, true); \ + wake_up_interruptible(&(_vq)->wq); \ } while (0) #define START_USE(vq) #define END_USE(vq) @@ -203,6 +205,9 @@ struct vring_virtqueue { /* DMA, allocation, and size information */ bool we_own_ring; + /* Wait for buffer to be used */ + wait_queue_head_t wq; + #ifdef DEBUG /* They're supposed to lock for us. */ unsigned int in_use; @@ -2024,6 +2029,8 @@ static struct virtqueue *vring_create_virtqueue_packed( if (virtio_has_feature(vdev, VIRTIO_F_ORDER_PLATFORM)) vq->weak_barriers = false; + init_waitqueue_head(&vq->wq); + err = vring_alloc_state_extra_packed(&vring_packed); if (err) goto err_state_extra; @@ -2517,6 +2524,8 @@ static struct virtqueue *__vring_new_virtqueue(unsigned int index, if (virtio_has_feature(vdev, VIRTIO_F_ORDER_PLATFORM)) vq->weak_barriers = false; + init_waitqueue_head(&vq->wq); + err = vring_alloc_state_extra_split(vring_split); if (err) { kfree(vq); @@ -2654,6 +2663,8 @@ static void vring_free(struct virtqueue *_vq) { struct vring_virtqueue *vq = to_vvq(_vq); + wake_up_interruptible(&vq->wq); + if (vq->we_own_ring) { if (vq->packed_ring) { vring_free_queue(vq->vq.vdev, @@ -2863,4 +2874,22 @@ const struct vring *virtqueue_get_vring(struct virtqueue *vq) } EXPORT_SYMBOL_GPL(virtqueue_get_vring); +int virtqueue_wait_for_used(struct virtqueue *_vq) +{ + struct vring_virtqueue *vq = to_vvq(_vq); + + /* TODO: Tweak the timeout. */ + return wait_event_interruptible_timeout(vq->wq, + virtqueue_is_broken(_vq) || more_used(vq), HZ); +} +EXPORT_SYMBOL_GPL(virtqueue_wait_for_used); + +void virtqueue_wake_up(struct virtqueue *_vq) +{ + struct vring_virtqueue *vq = to_vvq(_vq); + + wake_up_interruptible(&vq->wq); +} +EXPORT_SYMBOL_GPL(virtqueue_wake_up); + MODULE_LICENSE("GPL"); diff --git a/include/linux/virtio.h b/include/linux/virtio.h index dcab9c7e8784..2eb62c774895 100644 --- a/include/linux/virtio.h +++ b/include/linux/virtio.h @@ -72,6 +72,9 @@ void *virtqueue_get_buf(struct virtqueue *vq, unsigned int *len); void *virtqueue_get_buf_ctx(struct virtqueue *vq, unsigned int *len, void **ctx); +int virtqueue_wait_for_used(struct virtqueue *vq); +void virtqueue_wake_up(struct virtqueue *vq); + void virtqueue_disable_cb(struct virtqueue *vq); bool virtqueue_enable_cb(struct virtqueue *vq); -- 2.25.1
Jason Wang
2022-Dec-26 07:49 UTC
[PATCH 4/4] virtio-net: sleep instead of busy waiting for cvq command
We used to busy waiting on the cvq command this tends to be problematic since: 1) CPU could wait for ever on a buggy/malicous device 2) There's no wait to terminate the process that triggers the cvq command So this patch switch to use virtqueue_wait_for_used() to sleep with a timeout (1s) instead of busy polling for the cvq command forever. This gives the scheduler a breath and can let the process can respond to asignal. If the device doesn't respond in the timeout, break the device. Signed-off-by: Jason Wang <jasowang at redhat.com> --- Changes since V1: - break the device when timeout - get buffer manually since the virtio core check more_used() instead --- drivers/net/virtio_net.c | 24 ++++++++++++++++-------- 1 file changed, 16 insertions(+), 8 deletions(-) diff --git a/drivers/net/virtio_net.c b/drivers/net/virtio_net.c index efd9dd55828b..6a2ea64cfcb5 100644 --- a/drivers/net/virtio_net.c +++ b/drivers/net/virtio_net.c @@ -405,6 +405,7 @@ static void disable_rx_mode_work(struct virtnet_info *vi) vi->rx_mode_work_enabled = false; spin_unlock_bh(&vi->rx_mode_lock); + virtqueue_wake_up(vi->cvq); flush_work(&vi->rx_mode_work); } @@ -1497,6 +1498,11 @@ static bool try_fill_recv(struct virtnet_info *vi, struct receive_queue *rq, return !oom; } +static void virtnet_cvq_done(struct virtqueue *cvq) +{ + virtqueue_wake_up(cvq); +} + static void skb_recv_done(struct virtqueue *rvq) { struct virtnet_info *vi = rvq->vdev->priv; @@ -1984,6 +1990,8 @@ static int virtnet_tx_resize(struct virtnet_info *vi, return err; } +static int virtnet_close(struct net_device *dev); + /* * Send command via the control virtqueue and check status. Commands * supported by the hypervisor, as indicated by feature bits, should @@ -2026,14 +2034,14 @@ static bool virtnet_send_command(struct virtnet_info *vi, u8 class, u8 cmd, if (unlikely(!virtqueue_kick(vi->cvq))) return vi->ctrl->status == VIRTIO_NET_OK; - /* Spin for a response, the kick causes an ioport write, trapping - * into the hypervisor, so the request should be handled immediately. - */ - while (!virtqueue_get_buf(vi->cvq, &tmp) && - !virtqueue_is_broken(vi->cvq)) - cpu_relax(); + if (virtqueue_wait_for_used(vi->cvq)) { + virtqueue_get_buf(vi->cvq, &tmp); + return vi->ctrl->status == VIRTIO_NET_OK; + } - return vi->ctrl->status == VIRTIO_NET_OK; + netdev_err(vi->dev, "CVQ command timeout, break the virtio device."); + virtio_break_device(vi->vdev); + return VIRTIO_NET_ERR; } static int virtnet_set_mac_address(struct net_device *dev, void *p) @@ -3526,7 +3534,7 @@ static int virtnet_find_vqs(struct virtnet_info *vi) /* Parameters for control virtqueue, if any */ if (vi->has_cvq) { - callbacks[total_vqs - 1] = NULL; + callbacks[total_vqs - 1] = virtnet_cvq_done; names[total_vqs - 1] = "control"; } -- 2.25.1
Maybe Matching Threads
- [PATCH V3 net-next 0/2] virtio-net: don't busy poll for cvq command
- [PATCH net-next v4 0/2] virtio-net: don't busy poll for cvq command
- [PATCH net-next V2 0/2] virtio-net: don't busy poll for cvq command
- [PATCH V3 net-next 1/2] virtio-net: convert rx mode setting to use workqueue
- [PATCH net-next V2 1/2] virtio-net: convert rx mode setting to use workqueue