Michael S. Tsirkin
2021-Apr-13 05:47 UTC
[PATCH RFC v2 0/4] virtio net: spurious interrupt related fixes
With the implementation of napi-tx in virtio driver, we clean tx descriptors from rx napi handler, for the purpose of reducing tx complete interrupts. But this introduces a race where tx complete interrupt has been raised, but the handler finds there is no work to do because we have done the work in the previous rx interrupt handler. A similar issue exists with polling from start_xmit, it is however less common because of the delayed cb optimization of the split ring - but will likely affect the packed ring once that is more common. In particular, this was reported to lead to the following warning msg: [ 3588.010778] irq 38: nobody cared (try booting with the "irqpoll" option) [ 3588.017938] CPU: 4 PID: 0 Comm: swapper/4 Not tainted 5.3.0-19-generic #20~18.04.2-Ubuntu [ 3588.017940] Call Trace: [ 3588.017942] <IRQ> [ 3588.017951] dump_stack+0x63/0x85 [ 3588.017953] __report_bad_irq+0x35/0xc0 [ 3588.017955] note_interrupt+0x24b/0x2a0 [ 3588.017956] handle_irq_event_percpu+0x54/0x80 [ 3588.017957] handle_irq_event+0x3b/0x60 [ 3588.017958] handle_edge_irq+0x83/0x1a0 [ 3588.017961] handle_irq+0x20/0x30 [ 3588.017964] do_IRQ+0x50/0xe0 [ 3588.017966] common_interrupt+0xf/0xf [ 3588.017966] </IRQ> [ 3588.017989] handlers: [ 3588.020374] [<000000001b9f1da8>] vring_interrupt [ 3588.025099] Disabling IRQ #38 This patchset attempts to fix this by cleaning up a bunch of races related to the handling of sq callbacks (aka tx interrupts). Very lightly tested, sending out for help with testing, early feedback and flames. Thanks! Michael S. Tsirkin (4): virtio: fix up virtio_disable_cb virtio_net: disable cb aggressively virtio_net: move tx vq operation under tx queue lock virtio_net: move txq wakeups under tx q lock drivers/net/virtio_net.c | 35 +++++++++++++++++++++++++++++------ drivers/virtio/virtio_ring.c | 26 +++++++++++++++++++++++++- 2 files changed, 54 insertions(+), 7 deletions(-) -- MST
Michael S. Tsirkin
2021-Apr-13 05:47 UTC
[PATCH RFC v2 1/4] virtio: fix up virtio_disable_cb
virtio_disable_cb is currently a nop for split ring with event index. This is because it used to be always called from a callback when we know device won't trigger more events until we update the index. However, now that we run with interrupts enabled a lot we also poll without a callback so that is different: disabling callbacks will help reduce the number of spurious interrupts. Further, if using event index with a packed ring, and if being called from a callback, we actually do disable interrupts which is unnecessary. Fix both issues by tracking whenever we get a callback. If that is the case disabling interrupts with event index can be a nop. If not the case disable interrupts. Note: with a split ring there's no explicit "no interrupts" value. For now we write a fixed value so our chance of triggering an interupt is 1/ring size. It's probably better to write something related to the last used index there to reduce the chance even further. For now I'm keeping it simple. Signed-off-by: Michael S. Tsirkin <mst at redhat.com> --- drivers/virtio/virtio_ring.c | 26 +++++++++++++++++++++++++- 1 file changed, 25 insertions(+), 1 deletion(-) diff --git a/drivers/virtio/virtio_ring.c b/drivers/virtio/virtio_ring.c index 71e16b53e9c1..88f0b16b11b8 100644 --- a/drivers/virtio/virtio_ring.c +++ b/drivers/virtio/virtio_ring.c @@ -113,6 +113,9 @@ struct vring_virtqueue { /* Last used index we've seen. */ u16 last_used_idx; + /* Hint for event idx: already triggered no need to disable. */ + bool event_triggered; + union { /* Available for split ring */ struct { @@ -739,7 +742,10 @@ static void virtqueue_disable_cb_split(struct virtqueue *_vq) if (!(vq->split.avail_flags_shadow & VRING_AVAIL_F_NO_INTERRUPT)) { vq->split.avail_flags_shadow |= VRING_AVAIL_F_NO_INTERRUPT; - if (!vq->event) + if (vq->event) + /* TODO: this is a hack. Figure out a cleaner value to write. */ + vring_used_event(&vq->split.vring) = 0x0; + else vq->split.vring.avail->flags cpu_to_virtio16(_vq->vdev, vq->split.avail_flags_shadow); @@ -1605,6 +1611,7 @@ static struct virtqueue *vring_create_virtqueue_packed( vq->weak_barriers = weak_barriers; vq->broken = false; vq->last_used_idx = 0; + vq->event_triggered = false; vq->num_added = 0; vq->packed_ring = true; vq->use_dma_api = vring_use_dma_api(vdev); @@ -1919,6 +1926,12 @@ void virtqueue_disable_cb(struct virtqueue *_vq) { struct vring_virtqueue *vq = to_vvq(_vq); + /* If device triggered an event already it won't trigger one again: + * no need to disable. + */ + if (vq->event_triggered) + return; + if (vq->packed_ring) virtqueue_disable_cb_packed(_vq); else @@ -1942,6 +1955,9 @@ unsigned virtqueue_enable_cb_prepare(struct virtqueue *_vq) { struct vring_virtqueue *vq = to_vvq(_vq); + if (vq->event_triggered) + vq->event_triggered = false; + return vq->packed_ring ? virtqueue_enable_cb_prepare_packed(_vq) : virtqueue_enable_cb_prepare_split(_vq); } @@ -2005,6 +2021,9 @@ bool virtqueue_enable_cb_delayed(struct virtqueue *_vq) { struct vring_virtqueue *vq = to_vvq(_vq); + if (vq->event_triggered) + vq->event_triggered = false; + return vq->packed_ring ? virtqueue_enable_cb_delayed_packed(_vq) : virtqueue_enable_cb_delayed_split(_vq); } @@ -2044,6 +2063,10 @@ irqreturn_t vring_interrupt(int irq, void *_vq) if (unlikely(vq->broken)) return IRQ_HANDLED; + /* Just a hint for performance: so it's ok that this can be racy! */ + if (vq->event) + vq->event_triggered = true; + pr_debug("virtqueue callback for %p (%p)\n", vq, vq->vq.callback); if (vq->vq.callback) vq->vq.callback(&vq->vq); @@ -2083,6 +2106,7 @@ struct virtqueue *__vring_new_virtqueue(unsigned int index, vq->weak_barriers = weak_barriers; vq->broken = false; vq->last_used_idx = 0; + vq->event_triggered = false; vq->num_added = 0; vq->use_dma_api = vring_use_dma_api(vdev); #ifdef DEBUG -- MST
Michael S. Tsirkin
2021-Apr-13 05:47 UTC
[PATCH RFC v2 2/4] virtio_net: disable cb aggressively
There are currently two cases where we poll TX vq not in response to a callback: start xmit and rx napi. We currently do this with callbacks enabled which can cause extra interrupts from the card. Used not to be a big issue as we run with interrupts disabled but that is no longer the case, and in some cases the rate of spurious interrupts is so high linux detects this and actually kills the interrupt. Fix up by disabling the callbacks before polling the tx vq. Signed-off-by: Michael S. Tsirkin <mst at redhat.com> --- drivers/net/virtio_net.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/net/virtio_net.c b/drivers/net/virtio_net.c index 82e520d2cb12..16d5abed582c 100644 --- a/drivers/net/virtio_net.c +++ b/drivers/net/virtio_net.c @@ -1429,6 +1429,7 @@ static void virtnet_poll_cleantx(struct receive_queue *rq) return; if (__netif_tx_trylock(txq)) { + virtqueue_disable_cb(sq->vq); free_old_xmit_skbs(sq, true); __netif_tx_unlock(txq); } @@ -1582,6 +1583,7 @@ static netdev_tx_t start_xmit(struct sk_buff *skb, struct net_device *dev) bool use_napi = sq->napi.weight; /* Free up any pending old buffers before queueing new ones. */ + virtqueue_disable_cb(sq->vq); free_old_xmit_skbs(sq, false); if (use_napi && kick) -- MST
Michael S. Tsirkin
2021-Apr-13 05:47 UTC
[PATCH RFC v2 3/4] virtio_net: move tx vq operation under tx queue lock
It's unsafe to operate a vq from multiple threads. Unfortunately this is exactly what we do when invoking clean tx poll from rx napi. As a fix move everything that deals with the vq to under tx lock. Signed-off-by: Michael S. Tsirkin <mst at redhat.com> --- drivers/net/virtio_net.c | 22 +++++++++++++++++++++- 1 file changed, 21 insertions(+), 1 deletion(-) diff --git a/drivers/net/virtio_net.c b/drivers/net/virtio_net.c index 16d5abed582c..460ccdbb840e 100644 --- a/drivers/net/virtio_net.c +++ b/drivers/net/virtio_net.c @@ -1505,6 +1505,8 @@ static int virtnet_poll_tx(struct napi_struct *napi, int budget) struct virtnet_info *vi = sq->vq->vdev->priv; unsigned int index = vq2txq(sq->vq); struct netdev_queue *txq; + int opaque; + bool done; if (unlikely(is_xdp_raw_buffer_queue(vi, index))) { /* We don't need to enable cb for XDP */ @@ -1514,10 +1516,28 @@ static int virtnet_poll_tx(struct napi_struct *napi, int budget) txq = netdev_get_tx_queue(vi->dev, index); __netif_tx_lock(txq, raw_smp_processor_id()); + virtqueue_disable_cb(sq->vq); free_old_xmit_skbs(sq, true); + + opaque = virtqueue_enable_cb_prepare(sq->vq); + + done = napi_complete_done(napi, 0); + + if (!done) + virtqueue_disable_cb(sq->vq); + __netif_tx_unlock(txq); - virtqueue_napi_complete(napi, sq->vq, 0); + if (done) { + if (unlikely(virtqueue_poll(sq->vq, opaque))) { + if (napi_schedule_prep(napi)) { + __netif_tx_lock(txq, raw_smp_processor_id()); + virtqueue_disable_cb(sq->vq); + __netif_tx_unlock(txq); + __napi_schedule(napi); + } + } + } if (sq->vq->num_free >= 2 + MAX_SKB_FRAGS) netif_tx_wake_queue(txq); -- MST
Michael S. Tsirkin
2021-Apr-13 05:47 UTC
[PATCH RFC v2 4/4] virtio_net: move txq wakeups under tx q lock
We currently check num_free outside tx q lock which is unsafe: new packets can arrive meanwhile and there won't be space in the queue. Thus a spurious queue wakeup causing overhead and even packet drops. Move the check under the lock to fix that. Signed-off-by: Michael S. Tsirkin <mst at redhat.com> --- drivers/net/virtio_net.c | 13 +++++++------ 1 file changed, 7 insertions(+), 6 deletions(-) diff --git a/drivers/net/virtio_net.c b/drivers/net/virtio_net.c index 460ccdbb840e..febaf55ec1f6 100644 --- a/drivers/net/virtio_net.c +++ b/drivers/net/virtio_net.c @@ -1431,11 +1431,12 @@ static void virtnet_poll_cleantx(struct receive_queue *rq) if (__netif_tx_trylock(txq)) { virtqueue_disable_cb(sq->vq); free_old_xmit_skbs(sq, true); + + if (sq->vq->num_free >= 2 + MAX_SKB_FRAGS) + netif_tx_wake_queue(txq); + __netif_tx_unlock(txq); } - - if (sq->vq->num_free >= 2 + MAX_SKB_FRAGS) - netif_tx_wake_queue(txq); } static int virtnet_poll(struct napi_struct *napi, int budget) @@ -1519,6 +1520,9 @@ static int virtnet_poll_tx(struct napi_struct *napi, int budget) virtqueue_disable_cb(sq->vq); free_old_xmit_skbs(sq, true); + if (sq->vq->num_free >= 2 + MAX_SKB_FRAGS) + netif_tx_wake_queue(txq); + opaque = virtqueue_enable_cb_prepare(sq->vq); done = napi_complete_done(napi, 0); @@ -1539,9 +1543,6 @@ static int virtnet_poll_tx(struct napi_struct *napi, int budget) } } - if (sq->vq->num_free >= 2 + MAX_SKB_FRAGS) - netif_tx_wake_queue(txq); - return 0; } -- MST