frr/zebra/zebra_l2.h

Ignoring revisions in .git-blame-ignore-revs. Click here to bypass and see the normal blame view.

214 lines
5.7 KiB
C
Raw Permalink Normal View History

// SPDX-License-Identifier: GPL-2.0-or-later
/*
* Zebra Layer-2 interface Data structures and definitions
* Copyright (C) 2016, 2017 Cumulus Networks, Inc.
*/
#ifndef _ZEBRA_L2_H
#define _ZEBRA_L2_H
#include <zebra.h>
#include "if.h"
#include "vlan.h"
#include "vxlan.h"
#include "zebra/zebra_vrf.h"
#ifdef __cplusplus
extern "C" {
#endif
#define ZEBRA_BRIDGE_NO_ACTION (0)
#define ZEBRA_BRIDGE_MASTER_MAC_CHANGE (1 << 1)
#define ZEBRA_BRIDGE_MASTER_UP (1 << 2)
/* zebra L2 interface information - bridge slave (linkage to bridge) */
struct zebra_l2info_brslave {
ifindex_t bridge_ifindex; /* Bridge Master */
struct interface *br_if; /* Pointer to master */
ns_id_t ns_id; /* network namespace where bridge is */
};
zebra: uplink tracking and startup delay for EVPN-MH Local ethernet segments are held in a protodown or error-disabled state if access to the VxLAN overlay is not ready - 1. When FRR comes up the local-ESs/access-port are kept protodown for the startup-delay duration. During this time the underlay and EVPN routes via it are expected to converge. 2. When all the uplinks/core-links attached to the underlay go down the access-ports are similarly protodowned. The ES-bond protodown state is propagated to each ES-bond member and programmed in the dataplane/kernel (per-bond-member). Configuring uplinks - vtysh -c "conf t" vtysh -c "interface swp4" vtysh -c "evpn mh uplink" Configuring startup delay - vtysh -c "conf t" vtysh -c "evpn mh startup-delay 100" >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>> EVPN protodown display - ======================== root@torm-11:mgmt:~# vtysh -c "show evpn" L2 VNIs: 10 L3 VNIs: 3 Advertise gateway mac-ip: No Advertise svi mac-ip: No Duplicate address detection: Disable Detection max-moves 5, time 180 EVPN MH: mac-holdtime: 60s, neigh-holdtime: 60s startup-delay: 180s, start-delay-timer: 00:01:14 <<<<<<<<<<<< uplink-cfg-cnt: 4, uplink-active-cnt: 4 protodown: startup-delay <<<<<<<<<<<<<<<<<<<<<<< >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>> >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>> ES-bond protodown display - =========================== root@torm-11:mgmt:~# vtysh -c "show interface hostbond1" Interface hostbond1 is up, line protocol is down Link ups: 0 last: (never) Link downs: 1 last: 2020/04/26 20:38:03.53 PTM status: disabled vrf: default OS Description: Local Node/s torm-11 and Ports swp5 <==> Remote Node/s hostd-11 and Ports swp1 index 58 metric 0 mtu 9152 speed 4294967295 flags: <UP,BROADCAST,MULTICAST> Type: Ethernet HWaddr: 00:02:00:00:00:35 Interface Type bond Master interface: bridge EVPN-MH: ES id 1 ES sysmac 00:00:00:00:01:11 protodown: off rc: startup-delay <<<<<<<<<<<<<<<<< >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>> >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>> ES-bond member protodown display - ================================== root@torm-11:mgmt:~# vtysh -c "show interface swp5" Interface swp5 is up, line protocol is down Link ups: 0 last: (never) Link downs: 3 last: 2020/04/26 20:38:03.52 PTM status: disabled vrf: default index 7 metric 0 mtu 9152 speed 10000 flags: <UP,BROADCAST,MULTICAST> Type: Ethernet HWaddr: 00:02:00:00:00:35 Interface Type Other Master interface: hostbond1 protodown: on rc: startup-delay <<<<<<<<<<<<<<<< root@torm-11:mgmt:~# >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>> Signed-off-by: Anuradha Karuppiah <anuradhak@cumulusnetworks.com>
2020-05-09 04:11:13 +02:00
struct zebra_l2info_bond {
struct list *mbr_zifs; /* slaves using this bond as a master */
};
struct zebra_l2_bridge_vlan {
vlanid_t vid;
struct zebra_evpn_access_bd *access_bd;
};
struct zebra_l2_bridge_if_ctx {
/* input */
struct zebra_if *zif;
int (*func)(struct zebra_if *zif, struct zebra_l2_bridge_vlan *vlan,
void *arg);
/* input-output */
void *arg;
};
struct zebra_l2_bridge_if {
uint8_t vlan_aware;
struct zebra_if *br_zif;
struct hash *vlan_table;
};
/* zebra L2 interface information - bridge interface */
struct zebra_l2info_bridge {
struct zebra_l2_bridge_if bridge;
};
/* zebra L2 interface information - VLAN interface */
struct zebra_l2info_vlan {
vlanid_t vid; /* VLAN id */
};
/* zebra L2 interface information - GRE interface */
struct zebra_l2info_gre {
struct in_addr vtep_ip; /* IFLA_GRE_LOCAL */
struct in_addr vtep_ip_remote; /* IFLA_GRE_REMOTE */
uint32_t ikey;
uint32_t okey;
ifindex_t ifindex_link; /* Interface index of interface
* linked with GRE
*/
ns_id_t link_nsid;
};
struct zebra_vxlan_vni {
vni_t vni; /* VNI */
vlanid_t access_vlan; /* Access VLAN - for VLAN-aware bridge. */
struct in_addr mcast_grp;
uint16_t flags;
};
struct zebra_vxlan_vni_array {
uint16_t count;
struct zebra_vxlan_vni vnis[0];
};
enum {
ZEBRA_VXLAN_IF_VNI = 0, /* per vni vxlan if */
ZEBRA_VXLAN_IF_SVD /* single vxlan device */
};
struct zebra_vxlan_if_vlan_ctx {
vlanid_t vid;
struct zebra_vxlan_vni *vni;
};
struct zebra_vxlan_if_update_ctx {
uint16_t chgflags;
struct in_addr old_vtep_ip;
struct zebra_vxlan_vni old_vni;
struct hash *old_vni_table;
};
struct zebra_vxlan_if_ctx {
/* input */
struct zebra_if *zif;
int (*func)(struct zebra_if *zif, struct zebra_vxlan_vni *vni,
void *arg);
/* input-output */
void *arg;
};
struct zebra_vxlan_vni_info {
int iftype;
union {
struct zebra_vxlan_vni vni; /* per vni vxlan device vni info */
struct hash
*vni_table; /* table of vni's assocated with this if */
};
};
/* zebra L2 interface information - VXLAN interface */
struct zebra_l2info_vxlan {
struct zebra_vxlan_vni_info vni_info;
struct in_addr vtep_ip; /* Local tunnel IP */
ifindex_t ifindex_link; /* Interface index of interface
* linked with VXLAN
*/
ns_id_t link_nsid;
};
struct zebra_l2info_bondslave {
ifindex_t bond_ifindex; /* Bridge Master */
struct interface *bond_if; /* Pointer to master */
};
union zebra_l2if_info {
struct zebra_l2info_bridge br;
struct zebra_l2info_vlan vl;
struct zebra_l2info_vxlan vxl;
struct zebra_l2info_gre gre;
};
zebra: vlan to dplane Offload from main Trigger: Zebra core seen when we convert l2vni to l3vni and back BackTrace: /usr/lib/x86_64-linux-gnu/frr/libfrr.so.0(_zlog_assert_failed+0xe9) [0x7f4af96989d9] /usr/lib/frr/zebra(zebra_vxlan_if_vni_up+0x250) [0x5561022ae030] /usr/lib/frr/zebra(netlink_vlan_change+0x2f4) [0x5561021fd354] /usr/lib/frr/zebra(netlink_parse_info+0xff) [0x55610220d37f] /usr/lib/frr/zebra(+0xc264a) [0x55610220d64a] /usr/lib/x86_64-linux-gnu/frr/libfrr.so.0(thread_call+0x7d) [0x7f4af967e96d] /usr/lib/x86_64-linux-gnu/frr/libfrr.so.0(frr_run+0xe8) [0x7f4af9637588] /usr/lib/frr/zebra(main+0x402) [0x5561021f4d32] /lib/x86_64-linux-gnu/libc.so.6(+0x2724a) [0x7f4af932624a] /lib/x86_64-linux-gnu/libc.so.6(__libc_start_main+0x85) [0x7f4af9326305] /usr/lib/frr/zebra(_start+0x21) [0x5561021f72f1] Root Cause: In working case, - We get a RTM_NEWLINK whose ctx is enqueued by zebra dplane and dequeued by zebra main and processed i.e. (102000 is deleted from vxlan99) before we handle RTM_NEWVLAN. - So in handling of NEWVLAN (vxlan99) we bail out since find with vlan id 703 does not exist. root@leaf2:mgmt:/var/log/frr# cat ~/raja_logs/working/nocras.log | grep "RTM_NEWLINK\|QUEUED\|vxlan99\|in thread" 2024/07/18 23:09:33.741105 ZEBRA: [KMXEB-K771Y] netlink_parse_info: netlink-dp-in (NS 0) type RTM_NEWLINK(16), len=616, seq=0, pid=0 2024/07/18 23:09:33.744061 ZEBRA: [K8FXY-V65ZJ] Intf dplane ctx 0x7f2244000cf0, op INTF_INSTALL, ifindex (65), result QUEUED 2024/07/18 23:09:33.767240 ZEBRA: [KMXEB-K771Y] netlink_parse_info: netlink-dp-in (NS 0) type RTM_NEWLINK(16), len=508, seq=0, pid=0 2024/07/18 23:09:33.767380 ZEBRA: [K8FXY-V65ZJ] Intf dplane ctx 0x7f2244000cf0, op INTF_INSTALL, ifindex (73), result QUEUED 2024/07/18 23:09:33.767389 ZEBRA: [NVFT0-HS1EX] INTF_INSTALL for vxlan99(73) 2024/07/18 23:09:33.767404 ZEBRA: [TQR2A-H2RFY] Vlan-Vni(1186:1186-6000002:6000002) update for VxLAN IF vxlan99(73) 2024/07/18 23:09:33.767422 ZEBRA: [TP4VP-XZ627] Del L2-VNI 102000 intf vxlan99(73) 2024/07/18 23:09:33.767858 ZEBRA: [QYXB9-6RNNK] RTM_NEWVLAN bridge IF vxlan99 NS 0 2024/07/18 23:09:33.767866 ZEBRA: [KKZGZ-8PCDW] Cannot find VNI for VID (703) IF vxlan99 for vlan state update >>>>BAIL OUT In failure case, - The NEWVLAN is received first even before processing RTM_NEWLINK. - Since the vxlan id 102000 is not removed from the vxlan99, the find with vlan id 703 returns the 102000 one and we invoke zebra_vxlan_if_vni_up where the interfaces don't match and assert. root@leaf2:mgmt:/var/log/frr# cat ~/raja_logs/noworking/crash.log | grep "RTM_NEWLINK\|QUEUED\|vxlan99\|in thread" 2024/07/18 22:26:43.829370 ZEBRA: [KMXEB-K771Y] netlink_parse_info: netlink-dp-in (NS 0) type RTM_NEWLINK(16), len=616, seq=0, pid=0 2024/07/18 22:26:43.829646 ZEBRA: [K8FXY-V65ZJ] Intf dplane ctx 0x7fe07c026d30, op INTF_INSTALL, ifindex (65), result QUEUED 2024/07/18 22:26:43.853930 ZEBRA: [QYXB9-6RNNK] RTM_NEWVLAN bridge IF vxlan99 NS 0 2024/07/18 22:26:43.853949 ZEBRA: [K61WJ-XQQ3X] Intf vxlan99(73) L2-VNI 102000 is UP >>> VLAN PROCESSED BEFORE INTF EVENT 2024/07/18 22:26:43.853951 ZEBRA: [SPV50-BX2RP] RAJA zevpn_vxlanif vxlan48 and ifp vxlan99 2024/07/18 22:26:43.854005 ZEBRA: [KMXEB-K771Y] netlink_parse_info: netlink-dp-in (NS 0) type RTM_NEWLINK(16), len=508, seq=0, pid=0 2024/07/18 22:26:43.854241 ZEBRA: [KMXEB-K771Y] netlink_parse_info: netlink-dp-in (NS 0) type RTM_NEWLINK(16), len=516, seq=0, pid=0 2024/07/18 22:26:43.854251 ZEBRA: [KMXEB-K771Y] netlink_parse_info: netlink-dp-in (NS 0) type RTM_NEWLINK(16), len=544, seq=0, pid=0 ZEBRA: in thread kernel_read scheduled from zebra/kernel_netlink.c:505 kernel_read() Fix: Similar to #13396, where link change handling was offloaded to dplane, same is being done for vlan events. Note: Prior to this change, zebra main thread was interested in the RTNLGRP_BRVLAN. So all the kernel events pertaining to vlan was handled by zebra main. With this change change as well the handling of vlan events is still with Zebra main. However we offload it via Dplane thread. Ticket :#3878175 Signed-off-by: Rajasekar Raja <rajasekarr@nvidia.com>
2024-08-12 20:51:06 +02:00
struct zebra_vxlan_vlan {
uint8_t state;
uint32_t vrange;
vlanid_t vid;
};
struct zebra_vxlan_vlan_array {
uint16_t count;
struct zebra_vxlan_vlan vlans[0];
};
/* NOTE: These macros are to be invoked only in the "correct" context.
* IOW, the macro VNI_FROM_ZEBRA_IF() will assume the interface is
* of type ZEBRA_IF_VXLAN.
*/
#define VNI_INFO_FROM_ZEBRA_IF(zif) (&((zif)->l2info.vxl.vni_info))
#define IS_ZEBRA_VXLAN_IF_SVD(zif) \
((zif)->l2info.vxl.vni_info.iftype == ZEBRA_VXLAN_IF_SVD)
#define IS_ZEBRA_VXLAN_IF_VNI(zif) \
((zif)->l2info.vxl.vni_info.iftype == ZEBRA_VXLAN_IF_VNI)
#define VLAN_ID_FROM_ZEBRA_IF(zif) (zif)->l2info.vl.vid
#define BRIDGE_FROM_ZEBRA_IF(zif) (&((zif)->l2info.br.bridge))
#define IS_ZEBRA_IF_BRIDGE_VLAN_AWARE(zif) \
((zif)->l2info.br.bridge.vlan_aware == 1)
extern void zebra_l2_map_slave_to_bridge(struct zebra_l2info_brslave *br_slave,
struct zebra_ns *zns);
extern void
zebra_l2_unmap_slave_from_bridge(struct zebra_l2info_brslave *br_slave);
extern void
zebra_l2_bridge_add_update(struct interface *ifp,
const struct zebra_l2info_bridge *bridge_info);
extern void zebra_l2_bridge_del(struct interface *ifp);
extern void zebra_l2_vlanif_update(struct interface *ifp,
const struct zebra_l2info_vlan *vlan_info);
extern void zebra_l2_greif_add_update(struct interface *ifp,
const struct zebra_l2info_gre *vxlan_info,
int add);
extern void
zebra_l2_vxlanif_add_update(struct interface *ifp,
const struct zebra_l2info_vxlan *vxlan_info,
int add);
extern void zebra_l2_vxlanif_update_access_vlan(struct interface *ifp,
vlanid_t access_vlan);
extern void zebra_l2_greif_del(struct interface *ifp);
extern void zebra_l2_vxlanif_del(struct interface *ifp);
extern void zebra_l2if_update_bridge_slave(struct interface *ifp,
ifindex_t bridge_ifindex,
ns_id_t ns_id, uint8_t chgflags);
extern void zebra_l2if_update_bond_slave(struct interface *ifp,
zebra: support for lacp bypass with EVPN MH Feature overview: ================= A 802.3ad bond can be setup to allow lacp-bypass. This is done to enable servers to pxe boot without a LACP license i.e. allows the bond to go oper up (with a single link) without LACP converging. If an ES-bond is oper-up in an "LACP-bypass" state MH treats it as a non-ES bond. This involves the following special handling - 1. If the bond is in a bypass-state the associated ES is placed in a bypass state. 2. If an ES is in a bypass state - a. DF election is disabled (i.e. assumed DF) b. SPH filter is not installed. 3. MACs learnt via the host bond are advertised with a zero ESI. When the ES moves out of "bypass" the MACs are moved from a zero-ESI to the correct non-zero id. This is treated as a local station move. Implementation: =============== When (a) an ES is detached from a hostbond or (b) an ES-bond goes into LACP bypass zebra deletes all the local macs (with that ES as destination) in the kernel and its local db. BGP re-sends any imported MAC-IP routes that may exist with this ES destination as remote routes i.e. zebra can end up programming a MAC that was perviously local as remote pointing to a VTEP-ECMP group. When an ES is attached to a hostbond or an ES-bond goes LACP-up (out of bypss) zebra again deletes all the local macs in the kernel and its local db. At this point BGP resends any imported MAC-IP routes that may exist with this ES destination as sync routes i.e. zebra can end up programming a MAC that was perviously remote as local pointing to an access port. Signed-off-by: Anuradha Karuppiah <anuradhak@cumulusnetworks.com>
2020-08-05 16:13:55 +02:00
ifindex_t bond_ifindex, bool bypass);
extern void zebra_vlan_bitmap_compute(struct interface *ifp,
uint32_t vid_start, uint16_t vid_end);
extern void zebra_vlan_mbr_re_eval(struct interface *ifp,
bitfield_t vlan_bitmap);
zebra: uplink tracking and startup delay for EVPN-MH Local ethernet segments are held in a protodown or error-disabled state if access to the VxLAN overlay is not ready - 1. When FRR comes up the local-ESs/access-port are kept protodown for the startup-delay duration. During this time the underlay and EVPN routes via it are expected to converge. 2. When all the uplinks/core-links attached to the underlay go down the access-ports are similarly protodowned. The ES-bond protodown state is propagated to each ES-bond member and programmed in the dataplane/kernel (per-bond-member). Configuring uplinks - vtysh -c "conf t" vtysh -c "interface swp4" vtysh -c "evpn mh uplink" Configuring startup delay - vtysh -c "conf t" vtysh -c "evpn mh startup-delay 100" >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>> EVPN protodown display - ======================== root@torm-11:mgmt:~# vtysh -c "show evpn" L2 VNIs: 10 L3 VNIs: 3 Advertise gateway mac-ip: No Advertise svi mac-ip: No Duplicate address detection: Disable Detection max-moves 5, time 180 EVPN MH: mac-holdtime: 60s, neigh-holdtime: 60s startup-delay: 180s, start-delay-timer: 00:01:14 <<<<<<<<<<<< uplink-cfg-cnt: 4, uplink-active-cnt: 4 protodown: startup-delay <<<<<<<<<<<<<<<<<<<<<<< >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>> >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>> ES-bond protodown display - =========================== root@torm-11:mgmt:~# vtysh -c "show interface hostbond1" Interface hostbond1 is up, line protocol is down Link ups: 0 last: (never) Link downs: 1 last: 2020/04/26 20:38:03.53 PTM status: disabled vrf: default OS Description: Local Node/s torm-11 and Ports swp5 <==> Remote Node/s hostd-11 and Ports swp1 index 58 metric 0 mtu 9152 speed 4294967295 flags: <UP,BROADCAST,MULTICAST> Type: Ethernet HWaddr: 00:02:00:00:00:35 Interface Type bond Master interface: bridge EVPN-MH: ES id 1 ES sysmac 00:00:00:00:01:11 protodown: off rc: startup-delay <<<<<<<<<<<<<<<<< >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>> >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>> ES-bond member protodown display - ================================== root@torm-11:mgmt:~# vtysh -c "show interface swp5" Interface swp5 is up, line protocol is down Link ups: 0 last: (never) Link downs: 3 last: 2020/04/26 20:38:03.52 PTM status: disabled vrf: default index 7 metric 0 mtu 9152 speed 10000 flags: <UP,BROADCAST,MULTICAST> Type: Ethernet HWaddr: 00:02:00:00:00:35 Interface Type Other Master interface: hostbond1 protodown: on rc: startup-delay <<<<<<<<<<<<<<<< root@torm-11:mgmt:~# >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>> Signed-off-by: Anuradha Karuppiah <anuradhak@cumulusnetworks.com>
2020-05-09 04:11:13 +02:00
extern void zebra_l2if_update_bond(struct interface *ifp, bool add);
extern void zebra_l2if_update_bridge(struct interface *ifp, uint8_t chgflags);
#ifdef __cplusplus
}
#endif
#endif /* _ZEBRA_L2_H */