Michael S. Tsirkin
2020-Dec-28 16:28 UTC
[PATCH rfc 1/3] virtio-net: support transmit hash report
On Mon, Dec 28, 2020 at 11:22:31AM -0500, Willem de Bruijn wrote:> From: Willem de Bruijn <willemb at google.com> > > Virtio-net supports sharing the flow hash from host to guest on rx. > Do the same on transmit, to allow the host to infer connection state > for more robust routing and telemetry. > > Linux derives ipv6 flowlabel and ECMP multipath from sk->sk_txhash, > and updates these fields on error with sk_rethink_txhash. This feature > allows the host to make similar decisions. > > Besides the raw hash, optionally also convey connection state for > this hash. Specifically, the hash rotates on transmit timeout. To > avoid having to keep a stateful table in the host to detect flow > changes, explicitly notify when a hash changed due to timeout.I don't actually see code using VIRTIO_NET_HASH_STATE_TIMEOUT_BIT in this series. Want to split out that part to a separate patch?> Signed-off-by: Willem de Bruijn <willemb at google.com> > --- > drivers/net/virtio_net.c | 24 +++++++++++++++++++++--- > include/uapi/linux/virtio_net.h | 10 +++++++++- > 2 files changed, 30 insertions(+), 4 deletions(-) > > diff --git a/drivers/net/virtio_net.c b/drivers/net/virtio_net.c > index 21b71148c532..b917b7333928 100644 > --- a/drivers/net/virtio_net.c > +++ b/drivers/net/virtio_net.c > @@ -201,6 +201,9 @@ struct virtnet_info { > /* Host will merge rx buffers for big packets (shake it! shake it!) */ > bool mergeable_rx_bufs; > > + /* Guest will pass tx path info to the host */ > + bool has_tx_hash; > + > /* Has control virtqueue */ > bool has_cvq; > > @@ -394,9 +397,9 @@ static struct sk_buff *page_to_skb(struct virtnet_info *vi, > > hdr_len = vi->hdr_len; > if (vi->mergeable_rx_bufs) > - hdr_padded_len = sizeof(*hdr); > + hdr_padded_len = max_t(unsigned int, hdr_len, sizeof(*hdr)); > else > - hdr_padded_len = sizeof(struct padded_vnet_hdr); > + hdr_padded_len = ALIGN(hdr_len, 16); > > /* hdr_valid means no XDP, so we can copy the vnet header */ > if (hdr_valid) > @@ -1534,6 +1537,7 @@ static int xmit_skb(struct send_queue *sq, struct sk_buff *skb) > struct virtio_net_hdr_mrg_rxbuf *hdr; > const unsigned char *dest = ((struct ethhdr *)skb->data)->h_dest; > struct virtnet_info *vi = sq->vq->vdev->priv; > + struct virtio_net_hdr_v1_hash *ht; > int num_sg; > unsigned hdr_len = vi->hdr_len; > bool can_push; > @@ -1558,6 +1562,14 @@ static int xmit_skb(struct send_queue *sq, struct sk_buff *skb) > if (vi->mergeable_rx_bufs) > hdr->num_buffers = 0; > > + ht = (void *)hdr; > + if (vi->has_tx_hash) { > + ht->hash_value = cpu_to_virtio32(vi->vdev, skb->hash); > + ht->hash_report = skb->l4_hash ? VIRTIO_NET_HASH_REPORT_L4 : > + VIRTIO_NET_HASH_REPORT_OTHER; > + ht->hash_state = VIRTIO_NET_HASH_STATE_DEFAULT; > + } > + > sg_init_table(sq->sg, skb_shinfo(skb)->nr_frags + (can_push ? 1 : 2)); > if (can_push) { > __skb_push(skb, hdr_len); > @@ -3054,6 +3066,11 @@ static int virtnet_probe(struct virtio_device *vdev) > else > vi->hdr_len = sizeof(struct virtio_net_hdr); > > + if (virtio_has_feature(vi->vdev, VIRTIO_NET_F_TX_HASH)) { > + vi->has_tx_hash = true; > + vi->hdr_len = sizeof(struct virtio_net_hdr_v1_hash); > + } > + > if (virtio_has_feature(vdev, VIRTIO_F_ANY_LAYOUT) || > virtio_has_feature(vdev, VIRTIO_F_VERSION_1)) > vi->any_header_sg = true; > @@ -3243,7 +3260,8 @@ static struct virtio_device_id id_table[] = { > VIRTIO_NET_F_GUEST_ANNOUNCE, VIRTIO_NET_F_MQ, \ > VIRTIO_NET_F_CTRL_MAC_ADDR, \ > VIRTIO_NET_F_MTU, VIRTIO_NET_F_CTRL_GUEST_OFFLOADS, \ > - VIRTIO_NET_F_SPEED_DUPLEX, VIRTIO_NET_F_STANDBY > + VIRTIO_NET_F_SPEED_DUPLEX, VIRTIO_NET_F_STANDBY, \ > + VIRTIO_NET_F_TX_HASH > > static unsigned int features[] = { > VIRTNET_FEATURES, > diff --git a/include/uapi/linux/virtio_net.h b/include/uapi/linux/virtio_net.h > index 3f55a4215f11..f6881b5b77ee 100644 > --- a/include/uapi/linux/virtio_net.h > +++ b/include/uapi/linux/virtio_net.h > @@ -57,6 +57,7 @@ > * Steering */ > #define VIRTIO_NET_F_CTRL_MAC_ADDR 23 /* Set MAC address */ > > +#define VIRTIO_NET_F_TX_HASH 56 /* Guest sends hash report */Guest -> Driver> #define VIRTIO_NET_F_HASH_REPORT 57 /* Supports hash report */ > #define VIRTIO_NET_F_RSS 60 /* Supports RSS RX steering */ > #define VIRTIO_NET_F_RSC_EXT 61 /* extended coalescing info */ > @@ -170,8 +171,15 @@ struct virtio_net_hdr_v1_hash { > #define VIRTIO_NET_HASH_REPORT_IPv6_EX 7 > #define VIRTIO_NET_HASH_REPORT_TCPv6_EX 8 > #define VIRTIO_NET_HASH_REPORT_UDPv6_EX 9 > +#define VIRTIO_NET_HASH_REPORT_L4 10 > +#define VIRTIO_NET_HASH_REPORT_OTHER 11 > __le16 hash_report; > - __le16 padding; > + union { > + __le16 padding; > +#define VIRTIO_NET_HASH_STATE_DEFAULT 0 > +#define VIRTIO_NET_HASH_STATE_TIMEOUT_BIT 0x1 > + __le16 hash_state; > + }; > }; > > #ifndef VIRTIO_NET_NO_LEGACY > -- > 2.29.2.729.g45daf8777d-goog
Willem de Bruijn
2020-Dec-28 16:47 UTC
[PATCH rfc 1/3] virtio-net: support transmit hash report
On Mon, Dec 28, 2020 at 11:28 AM Michael S. Tsirkin <mst at redhat.com> wrote:> > On Mon, Dec 28, 2020 at 11:22:31AM -0500, Willem de Bruijn wrote: > > From: Willem de Bruijn <willemb at google.com> > > > > Virtio-net supports sharing the flow hash from host to guest on rx. > > Do the same on transmit, to allow the host to infer connection state > > for more robust routing and telemetry. > > > > Linux derives ipv6 flowlabel and ECMP multipath from sk->sk_txhash, > > and updates these fields on error with sk_rethink_txhash. This feature > > allows the host to make similar decisions. > > > > Besides the raw hash, optionally also convey connection state for > > this hash. Specifically, the hash rotates on transmit timeout. To > > avoid having to keep a stateful table in the host to detect flow > > changes, explicitly notify when a hash changed due to timeout. > > I don't actually see code using VIRTIO_NET_HASH_STATE_TIMEOUT_BIT > in this series. Want to split out that part to a separate patch?Will do. I wanted to make it clear that these bits must be reserved (i.e., zero) until a later patch specifies them. The timeout notification feature requires additional plumbing between the TCP protocol stack and device driver, probably an skb bit. I'd like to leave that as follow-up for now. Thanks for the fast feedback!