Fredrik Markstrom
2017-May-11 13:46 UTC
[Bridge] [PATCH v2 1/2] net: Added mtu parameter to dev_forward_skb calls
From: Fredrik Markstr?m <fredrik.markstrom at gmail.com> is_skb_forwardable() currently checks if the packet size is <= mtu of the receiving interface. This is not consistent with most of the hardware ethernet drivers that happily receives packets larger then MTU. This patch adds a parameter to dev_forward_skb and is_skb_forwardable so that the caller can override this packet size limit. Signed-off-by: Fredrik Markstrom <fredrik.markstrom at gmail.com> --- drivers/net/ipvlan/ipvlan_core.c | 7 ++++--- drivers/net/macvlan.c | 4 ++-- drivers/net/veth.c | 2 +- include/linux/netdevice.h | 10 +++++----- net/bridge/br_forward.c | 4 ++-- net/core/dev.c | 17 +++++++++++------ net/core/filter.c | 4 ++-- net/l2tp/l2tp_eth.c | 2 +- 8 files changed, 28 insertions(+), 22 deletions(-) diff --git a/drivers/net/ipvlan/ipvlan_core.c b/drivers/net/ipvlan/ipvlan_core.c index 1f3295e274d0..dbbe48ade204 100644 --- a/drivers/net/ipvlan/ipvlan_core.c +++ b/drivers/net/ipvlan/ipvlan_core.c @@ -234,7 +234,8 @@ void ipvlan_process_multicast(struct work_struct *work) nskb->pkt_type = pkt_type; nskb->dev = ipvlan->dev; if (tx_pkt) - ret = dev_forward_skb(ipvlan->dev, nskb); + ret = dev_forward_skb(ipvlan->dev, + nskb, 0); else ret = netif_rx(nskb); } @@ -301,7 +302,7 @@ static int ipvlan_rcv_frame(struct ipvl_addr *addr, struct sk_buff **pskb, if (local) { skb->pkt_type = PACKET_HOST; - if (dev_forward_skb(ipvlan->dev, skb) == NET_RX_SUCCESS) + if (dev_forward_skb(ipvlan->dev, skb, 0) == NET_RX_SUCCESS) success = true; } else { ret = RX_HANDLER_ANOTHER; @@ -547,7 +548,7 @@ static int ipvlan_xmit_mode_l2(struct sk_buff *skb, struct net_device *dev) * the skb for the main-dev. At the RX side we just return * RX_PASS for it to be processed further on the stack. */ - return dev_forward_skb(ipvlan->phy_dev, skb); + return dev_forward_skb(ipvlan->phy_dev, skb, 0); } else if (is_multicast_ether_addr(eth->h_dest)) { ipvlan_skb_crossing_ns(skb, NULL); diff --git a/drivers/net/macvlan.c b/drivers/net/macvlan.c index 9261722960a7..4db2876c1e44 100644 --- a/drivers/net/macvlan.c +++ b/drivers/net/macvlan.c @@ -202,7 +202,7 @@ static int macvlan_broadcast_one(struct sk_buff *skb, struct net_device *dev = vlan->dev; if (local) - return __dev_forward_skb(dev, skb); + return __dev_forward_skb(dev, skb, 0); skb->dev = dev; if (ether_addr_equal_64bits(eth->h_dest, dev->broadcast)) @@ -495,7 +495,7 @@ static int macvlan_queue_xmit(struct sk_buff *skb, struct net_device *dev) dest = macvlan_hash_lookup(port, eth->h_dest); if (dest && dest->mode == MACVLAN_MODE_BRIDGE) { /* send to lowerdev first for its network taps */ - dev_forward_skb(vlan->lowerdev, skb); + dev_forward_skb(vlan->lowerdev, skb, 0); return NET_XMIT_SUCCESS; } diff --git a/drivers/net/veth.c b/drivers/net/veth.c index 8c39d6d690e5..561da3a63b8a 100644 --- a/drivers/net/veth.c +++ b/drivers/net/veth.c @@ -116,7 +116,7 @@ static netdev_tx_t veth_xmit(struct sk_buff *skb, struct net_device *dev) goto drop; } - if (likely(dev_forward_skb(rcv, skb) == NET_RX_SUCCESS)) { + if (likely(dev_forward_skb(rcv, skb, 0) == NET_RX_SUCCESS)) { struct pcpu_vstats *stats = this_cpu_ptr(dev->vstats); u64_stats_update_begin(&stats->syncp); diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index 97456b2539e4..f207b083ffec 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -3282,16 +3282,16 @@ int dev_change_xdp_fd(struct net_device *dev, int fd, u32 flags); struct sk_buff *validate_xmit_skb_list(struct sk_buff *skb, struct net_device *dev); struct sk_buff *dev_hard_start_xmit(struct sk_buff *skb, struct net_device *dev, struct netdev_queue *txq, int *ret); -int __dev_forward_skb(struct net_device *dev, struct sk_buff *skb); -int dev_forward_skb(struct net_device *dev, struct sk_buff *skb); +int __dev_forward_skb(struct net_device *dev, struct sk_buff *skb, int mtu); +int dev_forward_skb(struct net_device *dev, struct sk_buff *skb, int mtu); bool is_skb_forwardable(const struct net_device *dev, - const struct sk_buff *skb); + const struct sk_buff *skb, int mtu); static __always_inline int ____dev_forward_skb(struct net_device *dev, - struct sk_buff *skb) + struct sk_buff *skb, int mtu) { if (skb_orphan_frags(skb, GFP_ATOMIC) || - unlikely(!is_skb_forwardable(dev, skb))) { + unlikely(!is_skb_forwardable(dev, skb, mtu))) { atomic_long_inc(&dev->rx_dropped); kfree_skb(skb); return NET_RX_DROP; diff --git a/net/bridge/br_forward.c b/net/bridge/br_forward.c index 902af6ba481c..15ab57da5ef1 100644 --- a/net/bridge/br_forward.c +++ b/net/bridge/br_forward.c @@ -35,7 +35,7 @@ static inline int should_deliver(const struct net_bridge_port *p, int br_dev_queue_push_xmit(struct net *net, struct sock *sk, struct sk_buff *skb) { - if (!is_skb_forwardable(skb->dev, skb)) + if (!is_skb_forwardable(skb->dev, skb, 0)) goto drop; skb_push(skb, ETH_HLEN); @@ -96,7 +96,7 @@ static void __br_forward(const struct net_bridge_port *to, net = dev_net(indev); } else { if (unlikely(netpoll_tx_running(to->br->dev))) { - if (!is_skb_forwardable(skb->dev, skb)) { + if (!is_skb_forwardable(skb->dev, skb, 0)) { kfree_skb(skb); } else { skb_push(skb, ETH_HLEN); diff --git a/net/core/dev.c b/net/core/dev.c index 533a6d6f6092..f7c53d7c8e26 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -1767,14 +1767,18 @@ static inline void net_timestamp_set(struct sk_buff *skb) __net_timestamp(SKB); \ } \ -bool is_skb_forwardable(const struct net_device *dev, const struct sk_buff *skb) +bool is_skb_forwardable(const struct net_device *dev, + const struct sk_buff *skb, int mtu) { unsigned int len; if (!(dev->flags & IFF_UP)) return false; - len = dev->mtu + dev->hard_header_len + VLAN_HLEN; + if (mtu == 0) + mtu = dev->mtu; + + len = mtu + dev->hard_header_len + VLAN_HLEN; if (skb->len <= len) return true; @@ -1788,9 +1792,9 @@ bool is_skb_forwardable(const struct net_device *dev, const struct sk_buff *skb) } EXPORT_SYMBOL_GPL(is_skb_forwardable); -int __dev_forward_skb(struct net_device *dev, struct sk_buff *skb) +int __dev_forward_skb(struct net_device *dev, struct sk_buff *skb, int mtu) { - int ret = ____dev_forward_skb(dev, skb); + int ret = ____dev_forward_skb(dev, skb, mtu); if (likely(!ret)) { skb->protocol = eth_type_trans(skb, dev); @@ -1806,6 +1810,7 @@ EXPORT_SYMBOL_GPL(__dev_forward_skb); * * @dev: destination network device * @skb: buffer to forward + * @mtu: Maximum size to forward. If 0 dev->mtu is used. * * return values: * NET_RX_SUCCESS (no congestion) @@ -1819,9 +1824,9 @@ EXPORT_SYMBOL_GPL(__dev_forward_skb); * we have to clear all information in the skb that could * impact namespace isolation. */ -int dev_forward_skb(struct net_device *dev, struct sk_buff *skb) +int dev_forward_skb(struct net_device *dev, struct sk_buff *skb, int mtu) { - return __dev_forward_skb(dev, skb) ?: netif_rx_internal(skb); + return __dev_forward_skb(dev, skb, mtu) ?: netif_rx_internal(skb); } EXPORT_SYMBOL_GPL(dev_forward_skb); diff --git a/net/core/filter.c b/net/core/filter.c index ebaeaf2e46e8..3f3eb26e7ea1 100644 --- a/net/core/filter.c +++ b/net/core/filter.c @@ -1632,13 +1632,13 @@ static const struct bpf_func_proto bpf_csum_update_proto = { static inline int __bpf_rx_skb(struct net_device *dev, struct sk_buff *skb) { - return dev_forward_skb(dev, skb); + return dev_forward_skb(dev, skb, 0); } static inline int __bpf_rx_skb_no_mac(struct net_device *dev, struct sk_buff *skb) { - int ret = ____dev_forward_skb(dev, skb); + int ret = ____dev_forward_skb(dev, skb, 0); if (likely(!ret)) { skb->dev = dev; diff --git a/net/l2tp/l2tp_eth.c b/net/l2tp/l2tp_eth.c index 6fd41d7afe1e..1258555b6578 100644 --- a/net/l2tp/l2tp_eth.c +++ b/net/l2tp/l2tp_eth.c @@ -164,7 +164,7 @@ static void l2tp_eth_dev_recv(struct l2tp_session *session, struct sk_buff *skb, skb_dst_drop(skb); nf_reset(skb); - if (dev_forward_skb(dev, skb) == NET_RX_SUCCESS) { + if (dev_forward_skb(dev, skb, 0) == NET_RX_SUCCESS) { atomic_long_inc(&priv->rx_packets); atomic_long_add(data_len, &priv->rx_bytes); } else { -- 2.11.0
Stephen Hemminger
2017-May-11 16:01 UTC
[Bridge] [PATCH v2 1/2] net: Added mtu parameter to dev_forward_skb calls
On Thu, 11 May 2017 15:46:27 +0200 Fredrik Markstrom <fredrik.markstrom at gmail.com> wrote:> From: Fredrik Markstr?m <fredrik.markstrom at gmail.com> > > is_skb_forwardable() currently checks if the packet size is <= mtu of > the receiving interface. This is not consistent with most of the hardware > ethernet drivers that happily receives packets larger then MTU.Wrong. Hardware interfaces are free to drop any packet greater than MTU (actually MTU + VLAN). The actual limit is a function of the hardware. Some hardware can only limit by power of 2; some can only limit frames larger than 1500; some have no limiting at all. Any application that should: * not expect packets larger than MTU to be received * not send packets larger than MTU * check actual receive size. IP protocols will do truncation of padded packets