Jason Wang
2018-Sep-13 05:35 UTC
[PATCH net-next V2] virtio_net: ethtool tx napi configuration
Implement ethtool .set_coalesce (-C) and .get_coalesce (-c) handlers. Interrupt moderation is currently not supported, so these accept and display the default settings of 0 usec and 1 frame. Toggle tx napi through a bit in tx-frames. So as to not interfere with possible future interrupt moderation, value 1 means tx napi while value 0 means not. To properly synchronize with the data path, tx napi is disabled and tx lock is held when changing the value of napi weight. And two more places that can access tx napi weight: - speculative tx polling in rx napi, we can leave it as is since it not a must for correctness. - skb_xmit_done(), one more check of napi weight is added before trying to enable tx to avoid tx to be disabled forever if napi is disabled after skb_xmit_done() but before the napi Link: https://patchwork.ozlabs.org/patch/948149/ Suggested-by: Jason Wang <jasowang at redhat.com> Signed-off-by: Willem de Bruijn <willemb at google.com> Signed-off-by: Jason Wang <jasowang at redhat.com> --- Changes from V1: - try to synchronize with datapath to allow changing mode when interface is up. - use tx-frames 0 as to disable tx napi while tx-frames 1 to enable tx napi --- drivers/net/virtio_net.c | 64 +++++++++++++++++++++++++++++++++++++++- 1 file changed, 63 insertions(+), 1 deletion(-) diff --git a/drivers/net/virtio_net.c b/drivers/net/virtio_net.c index 765920905226..6e70864f5899 100644 --- a/drivers/net/virtio_net.c +++ b/drivers/net/virtio_net.c @@ -66,6 +66,8 @@ DECLARE_EWMA(pkt_len, 0, 64) #define VIRTNET_DRIVER_VERSION "1.0.0" +static const u32 ethtool_coalesce_napi_mask = (1UL << 10); + static const unsigned long guest_offloads[] = { VIRTIO_NET_F_GUEST_TSO4, VIRTIO_NET_F_GUEST_TSO6, @@ -1444,7 +1446,10 @@ static int virtnet_poll_tx(struct napi_struct *napi, int budget) virtqueue_napi_complete(napi, sq->vq, 0); - if (sq->vq->num_free >= 2 + MAX_SKB_FRAGS) + /* Check napi.weight to avoid tx stall since it could be set + * to zero by ethtool after skb_xmit_done(). + */ + if (sq->vq->num_free >= 2 + MAX_SKB_FRAGS || !sq->napi.weight) netif_tx_wake_queue(txq); return 0; @@ -2181,6 +2186,61 @@ static int virtnet_get_link_ksettings(struct net_device *dev, return 0; } +static int virtnet_set_coalesce(struct net_device *dev, + struct ethtool_coalesce *ec) +{ + struct ethtool_coalesce ec_default = { + .cmd = ETHTOOL_SCOALESCE, + .rx_max_coalesced_frames = 1, + }; + struct virtnet_info *vi = netdev_priv(dev); + int i, napi_weight; + + if (ec->tx_max_coalesced_frames > 1) + return -EINVAL; + + ec_default.tx_max_coalesced_frames = ec->tx_max_coalesced_frames; + napi_weight = ec->tx_max_coalesced_frames ? NAPI_POLL_WEIGHT : 0; + + /* disallow changes to fields not explicitly tested above */ + if (memcmp(ec, &ec_default, sizeof(ec_default))) + return -EINVAL; + + if (napi_weight ^ vi->sq[0].napi.weight) { + for (i = 0; i < vi->max_queue_pairs; i++) { + struct netdev_queue *txq + netdev_get_tx_queue(vi->dev, i); + + virtnet_napi_tx_disable(&vi->sq[i].napi); + __netif_tx_lock_bh(txq); + vi->sq[i].napi.weight = napi_weight; + __netif_tx_unlock_bh(txq); + virtnet_napi_tx_enable(vi, vi->sq[i].vq, + &vi->sq[i].napi); + } + } + + return 0; +} + +static int virtnet_get_coalesce(struct net_device *dev, + struct ethtool_coalesce *ec) +{ + struct ethtool_coalesce ec_default = { + .cmd = ETHTOOL_GCOALESCE, + .rx_max_coalesced_frames = 1, + .tx_max_coalesced_frames = 0, + }; + struct virtnet_info *vi = netdev_priv(dev); + + memcpy(ec, &ec_default, sizeof(ec_default)); + + if (vi->sq[0].napi.weight) + ec->tx_max_coalesced_frames = 1; + + return 0; +} + static void virtnet_init_settings(struct net_device *dev) { struct virtnet_info *vi = netdev_priv(dev); @@ -2219,6 +2279,8 @@ static const struct ethtool_ops virtnet_ethtool_ops = { .get_ts_info = ethtool_op_get_ts_info, .get_link_ksettings = virtnet_get_link_ksettings, .set_link_ksettings = virtnet_set_link_ksettings, + .set_coalesce = virtnet_set_coalesce, + .get_coalesce = virtnet_get_coalesce, }; static void virtnet_freeze_down(struct virtio_device *vdev) -- 2.17.1
Jason Wang
2018-Sep-13 09:07 UTC
[PATCH net-next V2] virtio_net: ethtool tx napi configuration
On 2018?09?13? 13:35, Jason Wang wrote:> Implement ethtool .set_coalesce (-C) and .get_coalesce (-c) handlers. > Interrupt moderation is currently not supported, so these accept and > display the default settings of 0 usec and 1 frame. > > Toggle tx napi through a bit in tx-frames. So as to not interfere > with possible future interrupt moderation, value 1 means tx napi while > value 0 means not. > > To properly synchronize with the data path, tx napi is disabled and > tx lock is held when changing the value of napi weight. And two more > places that can access tx napi weight: > > - speculative tx polling in rx napi, we can leave it as is since it > not a must for correctness. > - skb_xmit_done(), one more check of napi weight is added before > trying to enable tx to avoid tx to be disabled forever if napi is > disabled after skb_xmit_done() but before the napi > > Link: https://patchwork.ozlabs.org/patch/948149/ > Suggested-by: Jason Wang <jasowang at redhat.com> > Signed-off-by: Willem de Bruijn <willemb at google.com> > Signed-off-by: Jason Wang <jasowang at redhat.com> > --- > Changes from V1: > - try to synchronize with datapath to allow changing mode when > interface is up. > - use tx-frames 0 as to disable tx napi while tx-frames 1 to enable tx napi > --- > drivers/net/virtio_net.c | 64 +++++++++++++++++++++++++++++++++++++++- > 1 file changed, 63 insertions(+), 1 deletion(-) > > diff --git a/drivers/net/virtio_net.c b/drivers/net/virtio_net.c > index 765920905226..6e70864f5899 100644 > --- a/drivers/net/virtio_net.c > +++ b/drivers/net/virtio_net.c > @@ -66,6 +66,8 @@ DECLARE_EWMA(pkt_len, 0, 64) > > #define VIRTNET_DRIVER_VERSION "1.0.0" > > +static const u32 ethtool_coalesce_napi_mask = (1UL << 10); > + > static const unsigned long guest_offloads[] = { > VIRTIO_NET_F_GUEST_TSO4, > VIRTIO_NET_F_GUEST_TSO6, > @@ -1444,7 +1446,10 @@ static int virtnet_poll_tx(struct napi_struct *napi, int budget) > > virtqueue_napi_complete(napi, sq->vq, 0); > > - if (sq->vq->num_free >= 2 + MAX_SKB_FRAGS) > + /* Check napi.weight to avoid tx stall since it could be set > + * to zero by ethtool after skb_xmit_done(). > + */ > + if (sq->vq->num_free >= 2 + MAX_SKB_FRAGS || !sq->napi.weight) > netif_tx_wake_queue(txq); > > return 0; > @@ -2181,6 +2186,61 @@ static int virtnet_get_link_ksettings(struct net_device *dev, > return 0; > } > > +static int virtnet_set_coalesce(struct net_device *dev, > + struct ethtool_coalesce *ec) > +{ > + struct ethtool_coalesce ec_default = { > + .cmd = ETHTOOL_SCOALESCE, > + .rx_max_coalesced_frames = 1, > + }; > + struct virtnet_info *vi = netdev_priv(dev); > + int i, napi_weight; > + > + if (ec->tx_max_coalesced_frames > 1) > + return -EINVAL; > + > + ec_default.tx_max_coalesced_frames = ec->tx_max_coalesced_frames; > + napi_weight = ec->tx_max_coalesced_frames ? NAPI_POLL_WEIGHT : 0; > + > + /* disallow changes to fields not explicitly tested above */ > + if (memcmp(ec, &ec_default, sizeof(ec_default))) > + return -EINVAL; > + > + if (napi_weight ^ vi->sq[0].napi.weight) { > + for (i = 0; i < vi->max_queue_pairs; i++) { > + struct netdev_queue *txq > + netdev_get_tx_queue(vi->dev, i); > + > + virtnet_napi_tx_disable(&vi->sq[i].napi); > + __netif_tx_lock_bh(txq);Need to check netif_running() before disabling napi. Otherwise we may have a infinite loop here. The discussion is still ongoing, if we decide to go set_coalesce, I will post a V3. Thanks> + vi->sq[i].napi.weight = napi_weight; > + __netif_tx_unlock_bh(txq); > + virtnet_napi_tx_enable(vi, vi->sq[i].vq, > + &vi->sq[i].napi); > + } > + } > + > + return 0; > +} > + > +static int virtnet_get_coalesce(struct net_device *dev, > + struct ethtool_coalesce *ec) > +{ > + struct ethtool_coalesce ec_default = { > + .cmd = ETHTOOL_GCOALESCE, > + .rx_max_coalesced_frames = 1, > + .tx_max_coalesced_frames = 0, > + }; > + struct virtnet_info *vi = netdev_priv(dev); > + > + memcpy(ec, &ec_default, sizeof(ec_default)); > + > + if (vi->sq[0].napi.weight) > + ec->tx_max_coalesced_frames = 1; > + > + return 0; > +} > + > static void virtnet_init_settings(struct net_device *dev) > { > struct virtnet_info *vi = netdev_priv(dev); > @@ -2219,6 +2279,8 @@ static const struct ethtool_ops virtnet_ethtool_ops = { > .get_ts_info = ethtool_op_get_ts_info, > .get_link_ksettings = virtnet_get_link_ksettings, > .set_link_ksettings = virtnet_set_link_ksettings, > + .set_coalesce = virtnet_set_coalesce, > + .get_coalesce = virtnet_get_coalesce, > }; > > static void virtnet_freeze_down(struct virtio_device *vdev)
Willem de Bruijn
2018-Sep-13 15:20 UTC
[PATCH net-next V2] virtio_net: ethtool tx napi configuration
On Thu, Sep 13, 2018 at 1:40 AM Jason Wang <jasowang at redhat.com> wrote:> > Implement ethtool .set_coalesce (-C) and .get_coalesce (-c) handlers. > Interrupt moderation is currently not supported, so these accept and > display the default settings of 0 usec and 1 frame. > > Toggle tx napi through a bit in tx-frames. So as to not interfere > with possible future interrupt moderation, value 1 means tx napi while > value 0 means not. > > To properly synchronize with the data path, tx napi is disabled and > tx lock is held when changing the value of napi weight. And two more > places that can access tx napi weight: > > - speculative tx polling in rx napi, we can leave it as is since it > not a must for correctness. > - skb_xmit_done(), one more check of napi weight is added before > trying to enable tx to avoid tx to be disabled forever if napi is > disabled after skb_xmit_done() but before the napi > > Link: https://patchwork.ozlabs.org/patch/948149/ > Suggested-by: Jason Wang <jasowang at redhat.com> > Signed-off-by: Willem de Bruijn <willemb at google.com> > Signed-off-by: Jason Wang <jasowang at redhat.com> > --- > Changes from V1: > - try to synchronize with datapath to allow changing mode when > interface is up. > - use tx-frames 0 as to disable tx napi while tx-frames 1 to enable tx napi > --- > drivers/net/virtio_net.c | 64 +++++++++++++++++++++++++++++++++++++++- > 1 file changed, 63 insertions(+), 1 deletion(-) > > diff --git a/drivers/net/virtio_net.c b/drivers/net/virtio_net.c > index 765920905226..6e70864f5899 100644 > --- a/drivers/net/virtio_net.c > +++ b/drivers/net/virtio_net.c > @@ -66,6 +66,8 @@ DECLARE_EWMA(pkt_len, 0, 64) > > #define VIRTNET_DRIVER_VERSION "1.0.0" > > +static const u32 ethtool_coalesce_napi_mask = (1UL << 10); > +This is no longer needed> static const unsigned long guest_offloads[] = { > VIRTIO_NET_F_GUEST_TSO4, > VIRTIO_NET_F_GUEST_TSO6, > @@ -1444,7 +1446,10 @@ static int virtnet_poll_tx(struct napi_struct *napi, int budget) > > virtqueue_napi_complete(napi, sq->vq, 0); > > - if (sq->vq->num_free >= 2 + MAX_SKB_FRAGS) > + /* Check napi.weight to avoid tx stall since it could be set > + * to zero by ethtool after skb_xmit_done(). > + */ > + if (sq->vq->num_free >= 2 + MAX_SKB_FRAGS || !sq->napi.weight) > netif_tx_wake_queue(txq);I see. This assumes that the napi handler will always be called on conversion from napi to no-napi mode. That is safe to assume because if it isn't called (and will not call netif_tx_wake_queue) that implies that napi was not scheduled, and thus the tx interrupt was not suppressed and thus there was no tx completion work to be scheduled?> > return 0; > @@ -2181,6 +2186,61 @@ static int virtnet_get_link_ksettings(struct net_device *dev, > return 0; > } > > +static int virtnet_set_coalesce(struct net_device *dev, > + struct ethtool_coalesce *ec) > +{ > + struct ethtool_coalesce ec_default = { > + .cmd = ETHTOOL_SCOALESCE, > + .rx_max_coalesced_frames = 1, > + }; > + struct virtnet_info *vi = netdev_priv(dev); > + int i, napi_weight; > + > + if (ec->tx_max_coalesced_frames > 1) > + return -EINVAL; > + > + ec_default.tx_max_coalesced_frames = ec->tx_max_coalesced_frames; > + napi_weight = ec->tx_max_coalesced_frames ? NAPI_POLL_WEIGHT : 0; > + > + /* disallow changes to fields not explicitly tested above */ > + if (memcmp(ec, &ec_default, sizeof(ec_default))) > + return -EINVAL; > + > + if (napi_weight ^ vi->sq[0].napi.weight) { > + for (i = 0; i < vi->max_queue_pairs; i++) { > + struct netdev_queue *txq > + netdev_get_tx_queue(vi->dev, i); > + > + virtnet_napi_tx_disable(&vi->sq[i].napi); > + __netif_tx_lock_bh(txq); > + vi->sq[i].napi.weight = napi_weight; > + __netif_tx_unlock_bh(txq); > + virtnet_napi_tx_enable(vi, vi->sq[i].vq, > + &vi->sq[i].napi); > + } > + } > + > + return 0; > +} > + > +static int virtnet_get_coalesce(struct net_device *dev, > + struct ethtool_coalesce *ec) > +{ > + struct ethtool_coalesce ec_default = { > + .cmd = ETHTOOL_GCOALESCE, > + .rx_max_coalesced_frames = 1, > + .tx_max_coalesced_frames = 0,no need to explicitly initialize to 0 (unless you did this for documentation purposes, which is fine).> + }; > + struct virtnet_info *vi = netdev_priv(dev); > + > + memcpy(ec, &ec_default, sizeof(ec_default)); > + > + if (vi->sq[0].napi.weight) > + ec->tx_max_coalesced_frames = 1; > + > + return 0; > +} > + > static void virtnet_init_settings(struct net_device *dev) > { > struct virtnet_info *vi = netdev_priv(dev); > @@ -2219,6 +2279,8 @@ static const struct ethtool_ops virtnet_ethtool_ops = { > .get_ts_info = ethtool_op_get_ts_info, > .get_link_ksettings = virtnet_get_link_ksettings, > .set_link_ksettings = virtnet_set_link_ksettings, > + .set_coalesce = virtnet_set_coalesce, > + .get_coalesce = virtnet_get_coalesce, > }; > > static void virtnet_freeze_down(struct virtio_device *vdev) > -- > 2.17.1 >
David Miller
2018-Sep-13 22:52 UTC
[PATCH net-next V2] virtio_net: ethtool tx napi configuration
From: Jason Wang <jasowang at redhat.com> Date: Thu, 13 Sep 2018 13:35:45 +0800> Toggle tx napi through a bit in tx-frames.This is not what the code implements as the interface any more. Please fix the commit message to match the code. Thanks.
Jason Wang
2018-Sep-14 03:29 UTC
[PATCH net-next V2] virtio_net: ethtool tx napi configuration
On 2018?09?14? 06:52, David Miller wrote:> From: Jason Wang <jasowang at redhat.com> > Date: Thu, 13 Sep 2018 13:35:45 +0800 > >> Toggle tx napi through a bit in tx-frames. > This is not what the code implements as the interface any more. > > Please fix the commit message to match the code. > > Thanks.Will fix this. Thanks
Jason Wang
2018-Sep-14 08:24 UTC
[PATCH net-next V2] virtio_net: ethtool tx napi configuration
On 2018?09?13? 23:20, Willem de Bruijn wrote:> On Thu, Sep 13, 2018 at 1:40 AM Jason Wang <jasowang at redhat.com> wrote: >> Implement ethtool .set_coalesce (-C) and .get_coalesce (-c) handlers. >> Interrupt moderation is currently not supported, so these accept and >> display the default settings of 0 usec and 1 frame. >> >> Toggle tx napi through a bit in tx-frames. So as to not interfere >> with possible future interrupt moderation, value 1 means tx napi while >> value 0 means not. >> >> To properly synchronize with the data path, tx napi is disabled and >> tx lock is held when changing the value of napi weight. And two more >> places that can access tx napi weight: >> >> - speculative tx polling in rx napi, we can leave it as is since it >> not a must for correctness. >> - skb_xmit_done(), one more check of napi weight is added before >> trying to enable tx to avoid tx to be disabled forever if napi is >> disabled after skb_xmit_done() but before the napi >> >> Link: https://patchwork.ozlabs.org/patch/948149/ >> Suggested-by: Jason Wang <jasowang at redhat.com> >> Signed-off-by: Willem de Bruijn <willemb at google.com> >> Signed-off-by: Jason Wang <jasowang at redhat.com> >> --- >> Changes from V1: >> - try to synchronize with datapath to allow changing mode when >> interface is up. >> - use tx-frames 0 as to disable tx napi while tx-frames 1 to enable tx napi >> --- >> drivers/net/virtio_net.c | 64 +++++++++++++++++++++++++++++++++++++++- >> 1 file changed, 63 insertions(+), 1 deletion(-) >> >> diff --git a/drivers/net/virtio_net.c b/drivers/net/virtio_net.c >> index 765920905226..6e70864f5899 100644 >> --- a/drivers/net/virtio_net.c >> +++ b/drivers/net/virtio_net.c >> @@ -66,6 +66,8 @@ DECLARE_EWMA(pkt_len, 0, 64) >> >> #define VIRTNET_DRIVER_VERSION "1.0.0" >> >> +static const u32 ethtool_coalesce_napi_mask = (1UL << 10); >> + > This is no longer neededYes, will remove this.> >> static const unsigned long guest_offloads[] = { >> VIRTIO_NET_F_GUEST_TSO4, >> VIRTIO_NET_F_GUEST_TSO6, >> @@ -1444,7 +1446,10 @@ static int virtnet_poll_tx(struct napi_struct *napi, int budget) >> >> virtqueue_napi_complete(napi, sq->vq, 0); >> >> - if (sq->vq->num_free >= 2 + MAX_SKB_FRAGS) >> + /* Check napi.weight to avoid tx stall since it could be set >> + * to zero by ethtool after skb_xmit_done(). >> + */ >> + if (sq->vq->num_free >= 2 + MAX_SKB_FRAGS || !sq->napi.weight) >> netif_tx_wake_queue(txq); > I see. This assumes that the napi handler will always be called on > conversion from napi to no-napi mode. > > That is safe to assume because if it isn't called (and will not call > netif_tx_wake_queue) that implies that napi was not scheduled, and > thus the tx interrupt was not suppressed and thus there was no tx > completion work to be scheduled?If it isn't called it means skb_xmit_done() wakeup tx directly instead of schedule tx. This could be a little bit early since there may be still lots of pending tx packets. But it doesn't harm, start_xmit() can handle this by re enable a delayed tx interrupt and disable TX. But there's a bug, look like I need remove the check of (!sq->napi.weight) in the beginning of the function.> >> return 0; >> @@ -2181,6 +2186,61 @@ static int virtnet_get_link_ksettings(struct net_device *dev, >> return 0; >> } >> >> +static int virtnet_set_coalesce(struct net_device *dev, >> + struct ethtool_coalesce *ec) >> +{ >> + struct ethtool_coalesce ec_default = { >> + .cmd = ETHTOOL_SCOALESCE, >> + .rx_max_coalesced_frames = 1, >> + }; >> + struct virtnet_info *vi = netdev_priv(dev); >> + int i, napi_weight; >> + >> + if (ec->tx_max_coalesced_frames > 1) >> + return -EINVAL; >> + >> + ec_default.tx_max_coalesced_frames = ec->tx_max_coalesced_frames; >> + napi_weight = ec->tx_max_coalesced_frames ? NAPI_POLL_WEIGHT : 0; >> + >> + /* disallow changes to fields not explicitly tested above */ >> + if (memcmp(ec, &ec_default, sizeof(ec_default))) >> + return -EINVAL; >> + >> + if (napi_weight ^ vi->sq[0].napi.weight) { >> + for (i = 0; i < vi->max_queue_pairs; i++) { >> + struct netdev_queue *txq >> + netdev_get_tx_queue(vi->dev, i); >> + >> + virtnet_napi_tx_disable(&vi->sq[i].napi); >> + __netif_tx_lock_bh(txq); >> + vi->sq[i].napi.weight = napi_weight; >> + __netif_tx_unlock_bh(txq); >> + virtnet_napi_tx_enable(vi, vi->sq[i].vq, >> + &vi->sq[i].napi); >> + } >> + } >> + >> + return 0; >> +} >> + >> +static int virtnet_get_coalesce(struct net_device *dev, >> + struct ethtool_coalesce *ec) >> +{ >> + struct ethtool_coalesce ec_default = { >> + .cmd = ETHTOOL_GCOALESCE, >> + .rx_max_coalesced_frames = 1, >> + .tx_max_coalesced_frames = 0, > no need to explicitly initialize to 0 (unless you did this for > documentation purposes, which is fine).Yes. Thanks>> + }; >> + struct virtnet_info *vi = netdev_priv(dev); >> + >> + memcpy(ec, &ec_default, sizeof(ec_default)); >> + >> + if (vi->sq[0].napi.weight) >> + ec->tx_max_coalesced_frames = 1; >> + >> + return 0; >> +} >> + >> static void virtnet_init_settings(struct net_device *dev) >> { >> struct virtnet_info *vi = netdev_priv(dev); >> @@ -2219,6 +2279,8 @@ static const struct ethtool_ops virtnet_ethtool_ops = { >> .get_ts_info = ethtool_op_get_ts_info, >> .get_link_ksettings = virtnet_get_link_ksettings, >> .set_link_ksettings = virtnet_set_link_ksettings, >> + .set_coalesce = virtnet_set_coalesce, >> + .get_coalesce = virtnet_get_coalesce, >> }; >> >> static void virtnet_freeze_down(struct virtio_device *vdev) >> -- >> 2.17.1 >>
Reasonably Related Threads
- [PATCH net-next V2] virtio_net: ethtool tx napi configuration
- [PATCH RFC v5 net-next 4/6] virtio-net: add basic interrupt coalescing support
- [PATCH RFC v5 net-next 4/6] virtio-net: add basic interrupt coalescing support
- [PATCH net-next V3] virtio_net: ethtool tx napi configuration
- [PATCH net-next V3] virtio_net: ethtool tx napi configuration