Daniel Borkmann
2020-Jul-31 21:12 UTC
[Bridge] [RFC PATCH bpf-next 2/3] bpf: Add helper to do forwarding lookups in kernel FDB table
On 7/31/20 6:44 AM, Yoshiki Komachi wrote:> This patch adds a new bpf helper to access FDB in the kernel tables > from XDP programs. The helper enables us to find the destination port > of master bridge in XDP layer with high speed. If an entry in the > tables is successfully found, egress device index will be returned. > > In cases of failure, packets will be dropped or forwarded to upper > networking stack in the kernel by XDP programs. Multicast and broadcast > packets are currently not supported. Thus, these will need to be > passed to upper layer on the basis of XDP_PASS action. > > The API uses destination MAC and VLAN ID as keys, so XDP programs > need to extract these from forwarded packets. > > Signed-off-by: Yoshiki Komachi <komachi.yoshiki at gmail.com>Few initial comments below:> --- > include/uapi/linux/bpf.h | 28 +++++++++++++++++++++ > net/core/filter.c | 45 ++++++++++++++++++++++++++++++++++ > scripts/bpf_helpers_doc.py | 1 + > tools/include/uapi/linux/bpf.h | 28 +++++++++++++++++++++ > 4 files changed, 102 insertions(+) > > diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h > index 54d0c886e3ba..f2e729dd1721 100644 > --- a/include/uapi/linux/bpf.h > +++ b/include/uapi/linux/bpf.h > @@ -2149,6 +2149,22 @@ union bpf_attr { > * * > 0 one of **BPF_FIB_LKUP_RET_** codes explaining why the > * packet is not forwarded or needs assist from full stack > * > + * long bpf_fdb_lookup(void *ctx, struct bpf_fdb_lookup *params, int plen, u32 flags) > + * Description > + * Do FDB lookup in kernel tables using parameters in *params*. > + * If lookup is successful (ie., FDB lookup finds a destination entry), > + * ifindex is set to the egress device index from the FDB lookup. > + * Both multicast and broadcast packets are currently unsupported > + * in XDP layer. > + * > + * *plen* argument is the size of the passed **struct bpf_fdb_lookup**. > + * *ctx* is only **struct xdp_md** for XDP programs. > + * > + * Return > + * * < 0 if any input argument is invalid > + * * 0 on success (destination port is found) > + * * > 0 on failure (there is no entry) > + * > * long bpf_sock_hash_update(struct bpf_sock_ops *skops, struct bpf_map *map, void *key, u64 flags) > * Description > * Add an entry to, or update a sockhash *map* referencing sockets. > @@ -3449,6 +3465,7 @@ union bpf_attr { > FN(get_stack), \ > FN(skb_load_bytes_relative), \ > FN(fib_lookup), \ > + FN(fdb_lookup), \This breaks UAPI. Needs to be added to the very end of the list.> FN(sock_hash_update), \ > FN(msg_redirect_hash), \ > FN(sk_redirect_hash), \ > @@ -4328,6 +4345,17 @@ struct bpf_fib_lookup { > __u8 dmac[6]; /* ETH_ALEN */ > }; > > +enum { > + BPF_FDB_LKUP_RET_SUCCESS, /* lookup successful */ > + BPF_FDB_LKUP_RET_NOENT, /* entry is not found */ > +}; > + > +struct bpf_fdb_lookup { > + unsigned char addr[6]; /* ETH_ALEN */ > + __u16 vlan_id; > + __u32 ifindex; > +}; > + > enum bpf_task_fd_type { > BPF_FD_TYPE_RAW_TRACEPOINT, /* tp name */ > BPF_FD_TYPE_TRACEPOINT, /* tp name */ > diff --git a/net/core/filter.c b/net/core/filter.c > index 654c346b7d91..68800d1b8cd5 100644 > --- a/net/core/filter.c > +++ b/net/core/filter.c > @@ -45,6 +45,7 @@ > #include <linux/filter.h> > #include <linux/ratelimit.h> > #include <linux/seccomp.h> > +#include <linux/if_bridge.h> > #include <linux/if_vlan.h> > #include <linux/bpf.h> > #include <linux/btf.h> > @@ -5084,6 +5085,46 @@ static const struct bpf_func_proto bpf_skb_fib_lookup_proto = { > .arg4_type = ARG_ANYTHING, > }; > > +#if IS_ENABLED(CONFIG_BRIDGE) > +BPF_CALL_4(bpf_xdp_fdb_lookup, struct xdp_buff *, ctx, > + struct bpf_fdb_lookup *, params, int, plen, u32, flags) > +{ > + struct net_device *src, *dst; > + struct net *net; > + > + if (plen < sizeof(*params)) > + return -EINVAL;Given flags are not used, this needs to reject anything invalid otherwise you're not able to extend it in future.> + net = dev_net(ctx->rxq->dev); > + > + if (is_multicast_ether_addr(params->addr) || > + is_broadcast_ether_addr(params->addr)) > + return BPF_FDB_LKUP_RET_NOENT; > + > + src = dev_get_by_index_rcu(net, params->ifindex); > + if (unlikely(!src)) > + return -ENODEV; > + > + dst = br_fdb_find_port_xdp(src, params->addr, params->vlan_id); > + if (dst) { > + params->ifindex = dst->ifindex; > + return BPF_FDB_LKUP_RET_SUCCESS; > + }Currently the helper description says nothing that this is /only/ limited to bridges. I think it would be better to also name the helper bpf_br_fdb_lookup() as well if so to avoid any confusion.> + return BPF_FDB_LKUP_RET_NOENT; > +} > + > +static const struct bpf_func_proto bpf_xdp_fdb_lookup_proto = { > + .func = bpf_xdp_fdb_lookup, > + .gpl_only = true, > + .ret_type = RET_INTEGER, > + .arg1_type = ARG_PTR_TO_CTX, > + .arg2_type = ARG_PTR_TO_MEM, > + .arg3_type = ARG_CONST_SIZE, > + .arg4_type = ARG_ANYTHING, > +}; > +#endifThis should also have a tc pendant (similar as done in routing lookup helper) in case native XDP is not available. This will be useful for those that have the same code compilable for both tc/XDP.> #if IS_ENABLED(CONFIG_IPV6_SEG6_BPF) > static int bpf_push_seg6_encap(struct sk_buff *skb, u32 type, void *hdr, u32 len) > { > @@ -6477,6 +6518,10 @@ xdp_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog) > return &bpf_xdp_adjust_tail_proto; > case BPF_FUNC_fib_lookup: > return &bpf_xdp_fib_lookup_proto; > +#if IS_ENABLED(CONFIG_BRIDGE) > + case BPF_FUNC_fdb_lookup: > + return &bpf_xdp_fdb_lookup_proto; > +#endif > #ifdef CONFIG_INET > case BPF_FUNC_sk_lookup_udp: > return &bpf_xdp_sk_lookup_udp_proto;
Yoshiki Komachi
2020-Aug-05 04:45 UTC
[Bridge] [RFC PATCH bpf-next 2/3] bpf: Add helper to do forwarding lookups in kernel FDB table
> 2020/08/01 6:12?Daniel Borkmann <daniel at iogearbox.net>????: > > On 7/31/20 6:44 AM, Yoshiki Komachi wrote: >> This patch adds a new bpf helper to access FDB in the kernel tables >> from XDP programs. The helper enables us to find the destination port >> of master bridge in XDP layer with high speed. If an entry in the >> tables is successfully found, egress device index will be returned. >> In cases of failure, packets will be dropped or forwarded to upper >> networking stack in the kernel by XDP programs. Multicast and broadcast >> packets are currently not supported. Thus, these will need to be >> passed to upper layer on the basis of XDP_PASS action. >> The API uses destination MAC and VLAN ID as keys, so XDP programs >> need to extract these from forwarded packets. >> Signed-off-by: Yoshiki Komachi <komachi.yoshiki at gmail.com> > > Few initial comments below: > >> --- >> include/uapi/linux/bpf.h | 28 +++++++++++++++++++++ >> net/core/filter.c | 45 ++++++++++++++++++++++++++++++++++ >> scripts/bpf_helpers_doc.py | 1 + >> tools/include/uapi/linux/bpf.h | 28 +++++++++++++++++++++ >> 4 files changed, 102 insertions(+) >> diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h >> index 54d0c886e3ba..f2e729dd1721 100644 >> --- a/include/uapi/linux/bpf.h >> +++ b/include/uapi/linux/bpf.h >> @@ -2149,6 +2149,22 @@ union bpf_attr { >> * * > 0 one of **BPF_FIB_LKUP_RET_** codes explaining why the >> * packet is not forwarded or needs assist from full stack >> * >> + * long bpf_fdb_lookup(void *ctx, struct bpf_fdb_lookup *params, int plen, u32 flags) >> + * Description >> + * Do FDB lookup in kernel tables using parameters in *params*. >> + * If lookup is successful (ie., FDB lookup finds a destination entry), >> + * ifindex is set to the egress device index from the FDB lookup. >> + * Both multicast and broadcast packets are currently unsupported >> + * in XDP layer. >> + * >> + * *plen* argument is the size of the passed **struct bpf_fdb_lookup**. >> + * *ctx* is only **struct xdp_md** for XDP programs. >> + * >> + * Return >> + * * < 0 if any input argument is invalid >> + * * 0 on success (destination port is found) >> + * * > 0 on failure (there is no entry) >> + * >> * long bpf_sock_hash_update(struct bpf_sock_ops *skops, struct bpf_map *map, void *key, u64 flags) >> * Description >> * Add an entry to, or update a sockhash *map* referencing sockets. >> @@ -3449,6 +3465,7 @@ union bpf_attr { >> FN(get_stack), \ >> FN(skb_load_bytes_relative), \ >> FN(fib_lookup), \ >> + FN(fdb_lookup), \ > > This breaks UAPI. Needs to be added to the very end of the list.I figured it out and will move it to the very end of the list. Thanks!>> FN(sock_hash_update), \ >> FN(msg_redirect_hash), \ >> FN(sk_redirect_hash), \ >> @@ -4328,6 +4345,17 @@ struct bpf_fib_lookup { >> __u8 dmac[6]; /* ETH_ALEN */ >> }; >> +enum { >> + BPF_FDB_LKUP_RET_SUCCESS, /* lookup successful */ >> + BPF_FDB_LKUP_RET_NOENT, /* entry is not found */ >> +}; >> + >> +struct bpf_fdb_lookup { >> + unsigned char addr[6]; /* ETH_ALEN */ >> + __u16 vlan_id; >> + __u32 ifindex; >> +}; >> + >> enum bpf_task_fd_type { >> BPF_FD_TYPE_RAW_TRACEPOINT, /* tp name */ >> BPF_FD_TYPE_TRACEPOINT, /* tp name */ >> diff --git a/net/core/filter.c b/net/core/filter.c >> index 654c346b7d91..68800d1b8cd5 100644 >> --- a/net/core/filter.c >> +++ b/net/core/filter.c >> @@ -45,6 +45,7 @@ >> #include <linux/filter.h> >> #include <linux/ratelimit.h> >> #include <linux/seccomp.h> >> +#include <linux/if_bridge.h> >> #include <linux/if_vlan.h> >> #include <linux/bpf.h> >> #include <linux/btf.h> >> @@ -5084,6 +5085,46 @@ static const struct bpf_func_proto bpf_skb_fib_lookup_proto = { >> .arg4_type = ARG_ANYTHING, >> }; >> +#if IS_ENABLED(CONFIG_BRIDGE) >> +BPF_CALL_4(bpf_xdp_fdb_lookup, struct xdp_buff *, ctx, >> + struct bpf_fdb_lookup *, params, int, plen, u32, flags) >> +{ >> + struct net_device *src, *dst; >> + struct net *net; >> + >> + if (plen < sizeof(*params)) >> + return -EINVAL; > > Given flags are not used, this needs to reject anything invalid otherwise > you're not able to extend it in future.I added this flags based on the bpf_fib_lookup() helper, but these are not used at this point. I will make sure whether this flags are required or not.>> + net = dev_net(ctx->rxq->dev); >> + >> + if (is_multicast_ether_addr(params->addr) || >> + is_broadcast_ether_addr(params->addr)) >> + return BPF_FDB_LKUP_RET_NOENT; >> + >> + src = dev_get_by_index_rcu(net, params->ifindex); >> + if (unlikely(!src)) >> + return -ENODEV; >> + >> + dst = br_fdb_find_port_xdp(src, params->addr, params->vlan_id); >> + if (dst) { >> + params->ifindex = dst->ifindex; >> + return BPF_FDB_LKUP_RET_SUCCESS; >> + } > > Currently the helper description says nothing that this is /only/ limited to > bridges. I think it would be better to also name the helper bpf_br_fdb_lookup() > as well if so to avoid any confusion.For now, the helper enables only bridges to access FDB table in the kernel as you understand, so I will try to rename the helper in the next version.>> + return BPF_FDB_LKUP_RET_NOENT; >> +} >> + >> +static const struct bpf_func_proto bpf_xdp_fdb_lookup_proto = { >> + .func = bpf_xdp_fdb_lookup, >> + .gpl_only = true, >> + .ret_type = RET_INTEGER, >> + .arg1_type = ARG_PTR_TO_CTX, >> + .arg2_type = ARG_PTR_TO_MEM, >> + .arg3_type = ARG_CONST_SIZE, >> + .arg4_type = ARG_ANYTHING, >> +}; >> +#endif > > This should also have a tc pendant (similar as done in routing lookup helper) > in case native XDP is not available. This will be useful for those that have > the same code compilable for both tc/XDP.Thanks, I agree with your idea. On the basis of the bpf_fib_lookup() helper, I will try to add the feature so that the helper can also be used in the TC hook. Best regards,>> #if IS_ENABLED(CONFIG_IPV6_SEG6_BPF) >> static int bpf_push_seg6_encap(struct sk_buff *skb, u32 type, void *hdr, u32 len) >> { >> @@ -6477,6 +6518,10 @@ xdp_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog) >> return &bpf_xdp_adjust_tail_proto; >> case BPF_FUNC_fib_lookup: >> return &bpf_xdp_fib_lookup_proto; >> +#if IS_ENABLED(CONFIG_BRIDGE) >> + case BPF_FUNC_fdb_lookup: >> + return &bpf_xdp_fdb_lookup_proto; >> +#endif >> #ifdef CONFIG_INET >> case BPF_FUNC_sk_lookup_udp: >> return &bpf_xdp_sk_lookup_udp_proto;? Yoshiki Komachi komachi.yoshiki at gmail.com