jcfaracco at gmail.com
2019-Oct-06 18:45 UTC
[PATCH RFC net-next 0/2] drivers: net: virtio_net: Implement
From: Julio Faracco <jcfaracco at gmail.com> Driver virtio_net is not handling error events for TX provided by dev_watchdog. This event is reached when transmission queue is having problems to transmit packets. To enable it, driver should have .ndo_tx_timeout implemented. This serie has two commits: In the past, we implemented a function to recover driver state when this kind of event happens, but the structure was to complex for virtio_net that moment. Alternativelly, this skeleton should be enough for now. For further details, see thread: https://lkml.org/lkml/2015/6/23/691 Patch 1/2: Add statistic field for TX timeout events. Patch 2/2: Implement a skeleton function to debug TX timeout events. Julio Faracco (2): drivers: net: virtio_net: Add tx_timeout stats field drivers: net: virtio_net: Add tx_timeout function drivers/net/virtio_net.c | 33 ++++++++++++++++++++++++++++++++- 1 file changed, 32 insertions(+), 1 deletion(-) -- 2.21.0
jcfaracco at gmail.com
2019-Oct-06 18:45 UTC
[PATCH RFC net-next 1/2] drivers: net: virtio_net: Add tx_timeout stats field
From: Julio Faracco <jcfaracco at gmail.com> For debug purpose of TX timeout events, a tx_timeout entry was added to monitor this special case: when dev_watchdog identifies a tx_timeout and throw an exception. We can both consider this event as an error, but driver should report as a tx_timeout statistic. Signed-off-by: Julio Faracco <jcfaracco at gmail.com> Signed-off-by: Daiane Mendes <dnmendes76 at gmail.com> Cc: Jason Wang <jasowang at redhat.com> --- drivers/net/virtio_net.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/drivers/net/virtio_net.c b/drivers/net/virtio_net.c index 4f3de0ac8b0b..27f9b212c9f5 100644 --- a/drivers/net/virtio_net.c +++ b/drivers/net/virtio_net.c @@ -75,6 +75,7 @@ struct virtnet_sq_stats { u64 xdp_tx; u64 xdp_tx_drops; u64 kicks; + u64 tx_timeouts; }; struct virtnet_rq_stats { @@ -98,6 +99,7 @@ static const struct virtnet_stat_desc virtnet_sq_stats_desc[] = { { "xdp_tx", VIRTNET_SQ_STAT(xdp_tx) }, { "xdp_tx_drops", VIRTNET_SQ_STAT(xdp_tx_drops) }, { "kicks", VIRTNET_SQ_STAT(kicks) }, + { "tx_timeouts", VIRTNET_SQ_STAT(tx_timeouts) }, }; static const struct virtnet_stat_desc virtnet_rq_stats_desc[] = { @@ -1721,7 +1723,7 @@ static void virtnet_stats(struct net_device *dev, int i; for (i = 0; i < vi->max_queue_pairs; i++) { - u64 tpackets, tbytes, rpackets, rbytes, rdrops; + u64 tpackets, tbytes, terrors, rpackets, rbytes, rdrops; struct receive_queue *rq = &vi->rq[i]; struct send_queue *sq = &vi->sq[i]; @@ -1729,6 +1731,7 @@ static void virtnet_stats(struct net_device *dev, start = u64_stats_fetch_begin_irq(&sq->stats.syncp); tpackets = sq->stats.packets; tbytes = sq->stats.bytes; + terrors = sq->stats.tx_timeouts; } while (u64_stats_fetch_retry_irq(&sq->stats.syncp, start)); do { @@ -1743,6 +1746,7 @@ static void virtnet_stats(struct net_device *dev, tot->rx_bytes += rbytes; tot->tx_bytes += tbytes; tot->rx_dropped += rdrops; + tot->tx_errors += terrors; } tot->tx_dropped = dev->stats.tx_dropped; -- 2.21.0
jcfaracco at gmail.com
2019-Oct-06 18:45 UTC
[PATCH RFC net-next 2/2] drivers: net: virtio_net: Add tx_timeout function
From: Julio Faracco <jcfaracco at gmail.com> To enable dev_watchdog, virtio_net should have a tx_timeout defined (.ndo_tx_timeout). This is only a skeleton to throw a warn message. It notifies the event in some specific queue of device. This function still counts tx_timeout statistic and consider this event as an error (one error per queue), reporting it. Signed-off-by: Julio Faracco <jcfaracco at gmail.com> Signed-off-by: Daiane Mendes <dnmendes76 at gmail.com> Cc: Jason Wang <jasowang at redhat.com> --- drivers/net/virtio_net.c | 27 +++++++++++++++++++++++++++ 1 file changed, 27 insertions(+) diff --git a/drivers/net/virtio_net.c b/drivers/net/virtio_net.c index 27f9b212c9f5..4b703b4b9441 100644 --- a/drivers/net/virtio_net.c +++ b/drivers/net/virtio_net.c @@ -2585,6 +2585,29 @@ static int virtnet_set_features(struct net_device *dev, return 0; } +static void virtnet_tx_timeout(struct net_device *dev) +{ + struct virtnet_info *vi = netdev_priv(dev); + u32 i; + + /* find the stopped queue the same way dev_watchdog() does */ + for (i = 0; i < vi->curr_queue_pairs; i++) { + struct send_queue *sq = &vi->sq[i]; + + if (!netif_xmit_stopped(netdev_get_tx_queue(dev, i))) + continue; + + u64_stats_update_begin(&sq->stats.syncp); + sq->stats.tx_timeouts++; + u64_stats_update_end(&sq->stats.syncp); + + netdev_warn(dev, "TX timeout on send queue: %d, sq: %s, vq: %d, name: %s\n", + i, sq->name, sq->vq->index, sq->vq->name); + + dev->stats.tx_errors++; + } +} + static const struct net_device_ops virtnet_netdev = { .ndo_open = virtnet_open, .ndo_stop = virtnet_close, @@ -2600,6 +2623,7 @@ static const struct net_device_ops virtnet_netdev = { .ndo_features_check = passthru_features_check, .ndo_get_phys_port_name = virtnet_get_phys_port_name, .ndo_set_features = virtnet_set_features, + .ndo_tx_timeout = virtnet_tx_timeout, }; static void virtnet_config_changed_work(struct work_struct *work) @@ -3018,6 +3042,9 @@ static int virtnet_probe(struct virtio_device *vdev) dev->netdev_ops = &virtnet_netdev; dev->features = NETIF_F_HIGHDMA; + /* Set up dev_watchdog cycle. */ + dev->watchdog_timeo = 5 * HZ; + dev->ethtool_ops = &virtnet_ethtool_ops; SET_NETDEV_DEV(dev, &vdev->dev); -- 2.21.0
Michael S. Tsirkin
2019-Oct-07 07:43 UTC
[PATCH RFC net-next 0/2] drivers: net: virtio_net: Implement
On Sun, Oct 06, 2019 at 03:45:13PM -0300, jcfaracco at gmail.com wrote:> From: Julio Faracco <jcfaracco at gmail.com> > > Driver virtio_net is not handling error events for TX provided by > dev_watchdog. This event is reached when transmission queue is having > problems to transmit packets. To enable it, driver should have > .ndo_tx_timeout implemented. This serie has two commits: > > In the past, we implemented a function to recover driver state when this > kind of event happens, but the structure was to complex for virtio_net > that moment.It's more that it was missing a bunch of locks.> Alternativelly, this skeleton should be enough for now. > > For further details, see thread: > https://lkml.org/lkml/2015/6/23/691 > > Patch 1/2: > Add statistic field for TX timeout events. > > Patch 2/2: > Implement a skeleton function to debug TX timeout events. > > Julio Faracco (2): > drivers: net: virtio_net: Add tx_timeout stats field > drivers: net: virtio_net: Add tx_timeout function > > drivers/net/virtio_net.c | 33 ++++++++++++++++++++++++++++++++- > 1 file changed, 32 insertions(+), 1 deletion(-) > > -- > 2.21.0
Michael S. Tsirkin
2019-Oct-07 07:51 UTC
[PATCH RFC net-next 2/2] drivers: net: virtio_net: Add tx_timeout function
On Sun, Oct 06, 2019 at 03:45:15PM -0300, jcfaracco at gmail.com wrote:> From: Julio Faracco <jcfaracco at gmail.com> > > To enable dev_watchdog, virtio_net should have a tx_timeout defined > (.ndo_tx_timeout). This is only a skeleton to throw a warn message. It > notifies the event in some specific queue of device. This function > still counts tx_timeout statistic and consider this event as an error > (one error per queue), reporting it. > > Signed-off-by: Julio Faracco <jcfaracco at gmail.com> > Signed-off-by: Daiane Mendes <dnmendes76 at gmail.com> > Cc: Jason Wang <jasowang at redhat.com> > --- > drivers/net/virtio_net.c | 27 +++++++++++++++++++++++++++ > 1 file changed, 27 insertions(+) > > diff --git a/drivers/net/virtio_net.c b/drivers/net/virtio_net.c > index 27f9b212c9f5..4b703b4b9441 100644 > --- a/drivers/net/virtio_net.c > +++ b/drivers/net/virtio_net.c > @@ -2585,6 +2585,29 @@ static int virtnet_set_features(struct net_device *dev, > return 0; > } > > +static void virtnet_tx_timeout(struct net_device *dev) > +{ > + struct virtnet_info *vi = netdev_priv(dev); > + u32 i; > + > + /* find the stopped queue the same way dev_watchdog() does */not really - the watchdog actually looks at trans_start.> + for (i = 0; i < vi->curr_queue_pairs; i++) { > + struct send_queue *sq = &vi->sq[i]; > + > + if (!netif_xmit_stopped(netdev_get_tx_queue(dev, i))) > + continue; > + > + u64_stats_update_begin(&sq->stats.syncp); > + sq->stats.tx_timeouts++; > + u64_stats_update_end(&sq->stats.syncp); > + > + netdev_warn(dev, "TX timeout on send queue: %d, sq: %s, vq: %d, name: %s\n", > + i, sq->name, sq->vq->index, sq->vq->name);this seems to assume any running queue is timed out. doesn't look right. also - there's already a warning in this case in the core. do we need another one?> + dev->stats.tx_errors++;> + } > +} > + > static const struct net_device_ops virtnet_netdev = { > .ndo_open = virtnet_open, > .ndo_stop = virtnet_close, > @@ -2600,6 +2623,7 @@ static const struct net_device_ops virtnet_netdev = { > .ndo_features_check = passthru_features_check, > .ndo_get_phys_port_name = virtnet_get_phys_port_name, > .ndo_set_features = virtnet_set_features, > + .ndo_tx_timeout = virtnet_tx_timeout, > }; > > static void virtnet_config_changed_work(struct work_struct *work) > @@ -3018,6 +3042,9 @@ static int virtnet_probe(struct virtio_device *vdev) > dev->netdev_ops = &virtnet_netdev; > dev->features = NETIF_F_HIGHDMA; > > + /* Set up dev_watchdog cycle. */ > + dev->watchdog_timeo = 5 * HZ; > +Seems to be still broken with napi_tx = false.> dev->ethtool_ops = &virtnet_ethtool_ops; > SET_NETDEV_DEV(dev, &vdev->dev); > > -- > 2.21.0
Julio Faracco
2019-Oct-07 13:58 UTC
[PATCH RFC net-next 0/2] drivers: net: virtio_net: Implement
Em seg, 7 de out de 2019 ?s 04:43, Michael S. Tsirkin <mst at redhat.com> escreveu:> > On Sun, Oct 06, 2019 at 03:45:13PM -0300, jcfaracco at gmail.com wrote: > > From: Julio Faracco <jcfaracco at gmail.com> > > > > Driver virtio_net is not handling error events for TX provided by > > dev_watchdog. This event is reached when transmission queue is having > > problems to transmit packets. To enable it, driver should have > > .ndo_tx_timeout implemented. This serie has two commits: > > > > In the past, we implemented a function to recover driver state when this > > kind of event happens, but the structure was to complex for virtio_net > > that moment. > > It's more that it was missing a bunch of locks.Actually, we submitted this patch as a RFC to understand the community perspective about this missing feature: Complexity versus performance versus solution.> > > Alternativelly, this skeleton should be enough for now. > > > > For further details, see thread: > > https://lkml.org/lkml/2015/6/23/691 > > > > Patch 1/2: > > Add statistic field for TX timeout events. > > > > Patch 2/2: > > Implement a skeleton function to debug TX timeout events. > > > > Julio Faracco (2): > > drivers: net: virtio_net: Add tx_timeout stats field > > drivers: net: virtio_net: Add tx_timeout function > > > > drivers/net/virtio_net.c | 33 ++++++++++++++++++++++++++++++++- > > 1 file changed, 32 insertions(+), 1 deletion(-) > > > > -- > > 2.21.0
Julio Faracco
2019-Oct-07 14:55 UTC
[PATCH RFC net-next 1/2] drivers: net: virtio_net: Add tx_timeout stats field
Em seg, 7 de out de 2019 ?s 11:15, Julian Wiedmann <jwi at linux.ibm.com> escreveu:> > On 06.10.19 20:45, jcfaracco at gmail.com wrote: > > From: Julio Faracco <jcfaracco at gmail.com> > > > > For debug purpose of TX timeout events, a tx_timeout entry was added to > > monitor this special case: when dev_watchdog identifies a tx_timeout and > > throw an exception. We can both consider this event as an error, but > > driver should report as a tx_timeout statistic. > > > > Hi Julio, > dev_watchdog() updates txq->trans_timeout, why isn't that sufficient?Hi Julian, Good catch! This case (this patch) it would be useful only for ethtool stats. This is not so relevant as the method implementation itself. But, on the other hand, I think it should be relevant if we split into tx_timeout per queue. Anyway, suggestions are welcome.> > > > Signed-off-by: Julio Faracco <jcfaracco at gmail.com> > > Signed-off-by: Daiane Mendes <dnmendes76 at gmail.com> > > Cc: Jason Wang <jasowang at redhat.com> > > --- > > drivers/net/virtio_net.c | 6 +++++- > > 1 file changed, 5 insertions(+), 1 deletion(-) > > > > diff --git a/drivers/net/virtio_net.c b/drivers/net/virtio_net.c > > index 4f3de0ac8b0b..27f9b212c9f5 100644 > > --- a/drivers/net/virtio_net.c > > +++ b/drivers/net/virtio_net.c > > @@ -75,6 +75,7 @@ struct virtnet_sq_stats { > > u64 xdp_tx; > > u64 xdp_tx_drops; > > u64 kicks; > > + u64 tx_timeouts; > > }; > > > > struct virtnet_rq_stats { > > @@ -98,6 +99,7 @@ static const struct virtnet_stat_desc virtnet_sq_stats_desc[] = { > > { "xdp_tx", VIRTNET_SQ_STAT(xdp_tx) }, > > { "xdp_tx_drops", VIRTNET_SQ_STAT(xdp_tx_drops) }, > > { "kicks", VIRTNET_SQ_STAT(kicks) }, > > + { "tx_timeouts", VIRTNET_SQ_STAT(tx_timeouts) }, > > }; > > > > static const struct virtnet_stat_desc virtnet_rq_stats_desc[] = { > > @@ -1721,7 +1723,7 @@ static void virtnet_stats(struct net_device *dev, > > int i; > > > > for (i = 0; i < vi->max_queue_pairs; i++) { > > - u64 tpackets, tbytes, rpackets, rbytes, rdrops; > > + u64 tpackets, tbytes, terrors, rpackets, rbytes, rdrops; > > struct receive_queue *rq = &vi->rq[i]; > > struct send_queue *sq = &vi->sq[i]; > > > > @@ -1729,6 +1731,7 @@ static void virtnet_stats(struct net_device *dev, > > start = u64_stats_fetch_begin_irq(&sq->stats.syncp); > > tpackets = sq->stats.packets; > > tbytes = sq->stats.bytes; > > + terrors = sq->stats.tx_timeouts; > > } while (u64_stats_fetch_retry_irq(&sq->stats.syncp, start)); > > > > do { > > @@ -1743,6 +1746,7 @@ static void virtnet_stats(struct net_device *dev, > > tot->rx_bytes += rbytes; > > tot->tx_bytes += tbytes; > > tot->rx_dropped += rdrops; > > + tot->tx_errors += terrors; > > } > > > > tot->tx_dropped = dev->stats.tx_dropped; > > >
Jason Wang
2019-Oct-12 13:01 UTC
[PATCH RFC net-next 2/2] drivers: net: virtio_net: Add tx_timeout function
On 2019/10/7 ??2:45, jcfaracco at gmail.com wrote:> From: Julio Faracco <jcfaracco at gmail.com> > > To enable dev_watchdog, virtio_net should have a tx_timeout defined > (.ndo_tx_timeout). This is only a skeleton to throw a warn message. It > notifies the event in some specific queue of device. This function > still counts tx_timeout statistic and consider this event as an error > (one error per queue), reporting it. > > Signed-off-by: Julio Faracco <jcfaracco at gmail.com> > Signed-off-by: Daiane Mendes <dnmendes76 at gmail.com> > Cc: Jason Wang <jasowang at redhat.com> > --- > drivers/net/virtio_net.c | 27 +++++++++++++++++++++++++++ > 1 file changed, 27 insertions(+) > > diff --git a/drivers/net/virtio_net.c b/drivers/net/virtio_net.c > index 27f9b212c9f5..4b703b4b9441 100644 > --- a/drivers/net/virtio_net.c > +++ b/drivers/net/virtio_net.c > @@ -2585,6 +2585,29 @@ static int virtnet_set_features(struct net_device *dev, > return 0; > } > > +static void virtnet_tx_timeout(struct net_device *dev) > +{ > + struct virtnet_info *vi = netdev_priv(dev); > + u32 i; > + > + /* find the stopped queue the same way dev_watchdog() does */ > + for (i = 0; i < vi->curr_queue_pairs; i++) { > + struct send_queue *sq = &vi->sq[i]; > + > + if (!netif_xmit_stopped(netdev_get_tx_queue(dev, i))) > + continue; > + > + u64_stats_update_begin(&sq->stats.syncp); > + sq->stats.tx_timeouts++; > + u64_stats_update_end(&sq->stats.syncp); > + > + netdev_warn(dev, "TX timeout on send queue: %d, sq: %s, vq: %d, name: %s\n", > + i, sq->name, sq->vq->index, sq->vq->name);If this is just a warn for a specific queue, maybe it's better to do it in the dev_watchdog()? Or we may want more information like avail,used idx etc. And usually there will be a reset, any reason for not doing this? Thanks> + > + dev->stats.tx_errors++; > + } > +} > + > static const struct net_device_ops virtnet_netdev = { > .ndo_open = virtnet_open, > .ndo_stop = virtnet_close, > @@ -2600,6 +2623,7 @@ static const struct net_device_ops virtnet_netdev = { > .ndo_features_check = passthru_features_check, > .ndo_get_phys_port_name = virtnet_get_phys_port_name, > .ndo_set_features = virtnet_set_features, > + .ndo_tx_timeout = virtnet_tx_timeout, > }; > > static void virtnet_config_changed_work(struct work_struct *work) > @@ -3018,6 +3042,9 @@ static int virtnet_probe(struct virtio_device *vdev) > dev->netdev_ops = &virtnet_netdev; > dev->features = NETIF_F_HIGHDMA; > > + /* Set up dev_watchdog cycle. */ > + dev->watchdog_timeo = 5 * HZ; > + > dev->ethtool_ops = &virtnet_ethtool_ops; > SET_NETDEV_DEV(dev, &vdev->dev); >
Possibly Parallel Threads
- [PATCH RFC net-next 0/2] drivers: net: virtio_net: Implement
- [PATCH net-next v2] drivers: net: virtio_net: Implement a dev_watchdog handler
- [PATCH net-next v2] drivers: net: virtio_net: Implement a dev_watchdog handler
- [PATCH RFC net-next 2/2] drivers: net: virtio_net: Add tx_timeout function
- [PATCH RFC net-next 2/2] drivers: net: virtio_net: Add tx_timeout function