On Mon, Apr 12, 2021 at 06:03:50PM -0400, Michael S. Tsirkin
wrote:> I was working on the spurios interrupt problem and
> I noticed something weird.
>
> static int virtnet_poll_tx(struct napi_struct *napi, int budget)
> {
> struct send_queue *sq = container_of(napi, struct send_queue,
napi);
> struct virtnet_info *vi = sq->vq->vdev->priv;
> unsigned int index = vq2txq(sq->vq);
> struct netdev_queue *txq;
>
> if (unlikely(is_xdp_raw_buffer_queue(vi, index))) {
> /* We don't need to enable cb for XDP */
> napi_complete_done(napi, 0);
> return 0;
> }
>
> txq = netdev_get_tx_queue(vi->dev, index);
> __netif_tx_lock(txq, raw_smp_processor_id());
> free_old_xmit_skbs(sq, true);
> __netif_tx_unlock(txq);
>
> virtqueue_napi_complete(napi, sq->vq, 0);
>
> if (sq->vq->num_free >= 2 + MAX_SKB_FRAGS)
> netif_tx_wake_queue(txq);
>
> return 0;
> }
>
> So virtqueue_napi_complete is called when txq is not locked,
> thinkably start_xmit can happen right?
>
> Now virtqueue_napi_complete
>
> static void virtqueue_napi_complete(struct napi_struct *napi,
> struct virtqueue *vq, int processed)
> {
> int opaque;
>
> opaque = virtqueue_enable_cb_prepare(vq);
> if (napi_complete_done(napi, processed)) {
> if (unlikely(virtqueue_poll(vq, opaque)))
> virtqueue_napi_schedule(napi, vq);
> } else {
> virtqueue_disable_cb(vq);
> }
> }
>
>
> So it is calling virtqueue_enable_cb_prepare but tx queue
> could be running and can process things in parallel ...
> What gives? I suspect this corrupts the ring, and explains
> why we are seeing weird hangs with vhost packed ring ...
>
> Jason?
>
>
> --
> MST
and wouldn't the following untested patch make sense then?
diff --git a/drivers/net/virtio_net.c b/drivers/net/virtio_net.c
index 82e520d2cb12..c23341b18eb5 100644
--- a/drivers/net/virtio_net.c
+++ b/drivers/net/virtio_net.c
@@ -1504,6 +1505,8 @@ static int virtnet_poll_tx(struct napi_struct *napi, int
budget)
struct virtnet_info *vi = sq->vq->vdev->priv;
unsigned int index = vq2txq(sq->vq);
struct netdev_queue *txq;
+ int opaque;
+ bool done;
if (unlikely(is_xdp_raw_buffer_queue(vi, index))) {
/* We don't need to enable cb for XDP */
@@ -1514,9 +1517,26 @@ static int virtnet_poll_tx(struct napi_struct *napi, int
budget)
txq = netdev_get_tx_queue(vi->dev, index);
__netif_tx_lock(txq, raw_smp_processor_id());
free_old_xmit_skbs(sq, true);
+
+ opaque = virtqueue_enable_cb_prepare(sq->vq);
+
+ done = napi_complete_done(napi, 0);
+
+ if (!done)
+ virtqueue_disable_cb(sq->vq);
+
__netif_tx_unlock(txq);
- virtqueue_napi_complete(napi, sq->vq, 0);
+ if (done) {
+ if (unlikely(virtqueue_poll(vq, opaque))) {
+ if (napi_schedule_prep(napi)) {
+ __netif_tx_lock(txq, raw_smp_processor_id());
+ virtqueue_disable_cb(sq->vq);
+ __netif_tx_unlock(txq);
+ __napi_schedule(napi);
+ }
+ }
+ }
if (sq->vq->num_free >= 2 + MAX_SKB_FRAGS)
netif_tx_wake_queue(txq);