frr/bgpd/bgp_nht.c

Ignoring revisions in .git-blame-ignore-revs. Click here to bypass and see the normal blame view.

1616 lines
45 KiB
C
Raw Normal View History

// SPDX-License-Identifier: GPL-2.0-or-later
/* BGP Nexthop tracking
* Copyright (C) 2013 Cumulus Networks, Inc.
*/
#include <zebra.h>
#include "command.h"
#include "frrevent.h"
#include "prefix.h"
#include "zclient.h"
#include "stream.h"
#include "network.h"
#include "log.h"
#include "memory.h"
#include "nexthop.h"
*: add VRF ID in the API message header The API messages are used by zebra to exchange the interfaces, addresses, routes and router-id information with its clients. To distinguish which VRF the information belongs to, a new field "VRF ID" is added in the message header. And hence the message version is increased to 3. * The new field "VRF ID" in the message header: Length (2 bytes) Marker (1 byte) Version (1 byte) VRF ID (2 bytes, newly added) Command (2 bytes) - Client side: - zclient_create_header() adds the VRF ID in the message header. - zclient_read() extracts and validates the VRF ID from the header, and passes the VRF ID to the callback functions registered to the API messages. - All relative functions are appended with a new parameter "vrf_id", including all the callback functions. - "vrf_id" is also added to "struct zapi_ipv4" and "struct zapi_ipv6". Clients need to correctly set the VRF ID when using the API functions zapi_ipv4_route() and zapi_ipv6_route(). - Till now all messages sent from a client have the default VRF ID "0" in the header. - The HELLO message is special, which is used as the heart-beat of a client, and has no relation with VRF. The VRF ID in the HELLO message header will always be 0 and ignored by zebra. - Zebra side: - zserv_create_header() adds the VRF ID in the message header. - zebra_client_read() extracts and validates the VRF ID from the header, and passes the VRF ID to the functions which process the received messages. - All relative functions are appended with a new parameter "vrf_id". * Suppress the messages in a VRF which a client does not care: Some clients may not care about the information in the VRF X, and zebra should not send the messages in the VRF X to those clients. Extra flags are used to indicate which VRF is registered by a client, and a new message ZEBRA_VRF_UNREGISTER is introduced to let a client can unregister a VRF when it does not need any information in that VRF. A client sends any message other than ZEBRA_VRF_UNREGISTER in a VRF will automatically register to that VRF. - lib/vrf: A new utility "VRF bit-map" is provided to manage the flags for VRFs, one bit per VRF ID. - Use vrf_bitmap_init()/vrf_bitmap_free() to initialize/free a bit-map; - Use vrf_bitmap_set()/vrf_bitmap_unset() to set/unset a flag in the given bit-map, corresponding to the given VRF ID; - Use vrf_bitmap_check() to test whether the flag, in the given bit-map and for the given VRF ID, is set. - Client side: - In "struct zclient", the following flags are changed from "u_char" to "vrf_bitmap_t": redist[ZEBRA_ROUTE_MAX] default_information These flags are extended for each VRF, and controlled by the clients themselves (or with the help of zclient_redistribute() and zclient_redistribute_default()). - Zebra side: - In "struct zserv", the following flags are changed from "u_char" to "vrf_bitmap_t": redist[ZEBRA_ROUTE_MAX] redist_default ifinfo ridinfo These flags are extended for each VRF, as the VRF registration flags. They are maintained on receiving a ZEBRA_XXX_ADD or ZEBRA_XXX_DELETE message. When sending an interface/address/route/router-id message in a VRF to a client, if the corresponding VRF registration flag is not set, this message will not be dropped by zebra. - A new function zread_vrf_unregister() is introduced to process the new command ZEBRA_VRF_UNREGISTER. All the VRF registration flags are cleared for the requested VRF. Those clients, who support only the default VRF, will never receive a message in a non-default VRF, thanks to the filter in zebra. * New callback for the event of successful connection to zebra: - zclient_start() is splitted, keeping only the code of connecting to zebra. - Now zclient_init()=>zclient_connect()=>zclient_start() operations are purely dealing with the connection to zbera. - Once zebra is successfully connected, at the end of zclient_start(), a new callback is used to inform the client about connection. - Till now, in the callback of connect-to-zebra event, all clients send messages to zebra to request the router-id/interface/routes information in the default VRF. Of corse in future the client can do anything it wants in this callback. For example, it may send requests for both default VRF and some non-default VRFs. Signed-off-by: Feng Lu <lu.feng@6wind.com> Reviewed-by: Alain Ritoux <alain.ritoux@6wind.com> Signed-off-by: Nicolas Dichtel <nicolas.dichtel@6wind.com> Acked-by: Donald Sharp <sharpd@cumulusnetworks.com> Conflicts: lib/zclient.h lib/zebra.h zebra/zserv.c zebra/zserv.h Conflicts: bgpd/bgp_nexthop.c bgpd/bgp_nht.c bgpd/bgp_zebra.c isisd/isis_zebra.c lib/zclient.c lib/zclient.h lib/zebra.h nhrpd/nhrp_interface.c nhrpd/nhrp_route.c nhrpd/nhrpd.h ospf6d/ospf6_zebra.c ospf6d/ospf6_zebra.h ospfd/ospf_vty.c ospfd/ospf_zebra.c pimd/pim_zebra.c pimd/pim_zlookup.c ripd/rip_zebra.c ripngd/ripng_zebra.c zebra/redistribute.c zebra/rt_netlink.c zebra/zebra_rnh.c zebra/zebra_rnh.h zebra/zserv.c zebra/zserv.h
2014-10-16 03:52:36 +02:00
#include "vrf.h"
#include "filter.h"
#include "nexthop_group.h"
#include "bgpd/bgpd.h"
#include "bgpd/bgp_table.h"
#include "bgpd/bgp_route.h"
#include "bgpd/bgp_attr.h"
#include "bgpd/bgp_nexthop.h"
#include "bgpd/bgp_debug.h"
#include "bgpd/bgp_errors.h"
#include "bgpd/bgp_nht.h"
#include "bgpd/bgp_fsm.h"
#include "bgpd/bgp_zebra.h"
#include "bgpd/bgp_flowspec_util.h"
#include "bgpd/bgp_evpn.h"
#include "bgpd/bgp_rd.h"
bgpd: add support for l3vpn per-nexthop label This commit introduces a new method to associate a label to prefixes to export to a VPNv4 backbone. All the methods to associate a label to a BGP update is documented in rfc4364, chapter 4.3.2. Initially, the "single label for an entire VRF" method was available. This commit adds "single label for each attachment circuit" method. The change impacts the control-plane, because each BGP update is checked to know if the nexthop has reachability in the VRF or not. If this is the case, then a unique label for a given destination IP in the VRF will be picked up. This label will be reused for an other BGP update that will have the same nexthop IP address. The change impacts the data-plane, because the MPLs pop mechanism applied to incoming labelled packets changes: the MPLS label is popped, and the packet is directly sent to the connected nexthop described in the previous outgoing BGP VPN update. By default per-vrf mode is done, but the user may choose the per-nexthop mode, by using the vty command from the previous commit. In the latter case, a per-vrf label will however be allocated to handle networks that are not directly connected. This is the case for local traffic for instance. The change also include the following: - ECMP case In case a route is learnt in a given VRF, and is resolved via an ECMP nexthop. This implies that when exporting the route as a BGP update, if label allocation per nexthop is used, then two possible MPLS values could be picked up, which is not possible with the current implementation. Actually, the NLRI for VPNv4 stores one prefix, and one single label value, not two. Today, RFC8277 with multiple label capability is not yet available. To avoid this corner case, when a route is resolved via more than one nexthop, the label allocation per nexthop will not apply, and the default per-vrf label will be chosen. Let us imagine BGP redistributes a static route using the `172.31.0.20` nexthop. The nexthop resolution will find two different nexthops fo a unique BGP update. > r1# show running-config > [..] > vrf vrf1 > ip route 172.31.0.30/32 172.31.0.20 > r1# show bgp vrf vrf1 nexthop > [..] > 172.31.0.20 valid [IGP metric 0], #paths 1 > gate 192.0.2.11 > gate 192.0.2.12 > Last update: Mon Jan 16 09:27:09 2023 > Paths: > 1/1 172.31.0.30/32 VRF vrf1 flags 0x20018 To avoid this situation, BGP updates that resolve over multiple nexthops are using the unique per-vrf label. - recursive route case Prefixes that need a recursive route to be resolved can also be eligible for mpls allocation per nexthop. In that case, the nexthop will be the recursive nexthop calculated. To achieve this, all nexthop types in bnc contexts are valid, except for the blackhole nexthops. - network declared prefixes Nexthop tracking is used to look for the reachability of the prefixes. When the the 'no bgp network import-check' command is used, network declared prefixes are maintained active, even if there is no active nexthop. Signed-off-by: Philippe Guibert <philippe.guibert@6wind.com>
2023-02-28 14:25:02 +01:00
#include "bgpd/bgp_mplsvpn.h"
#include "bgpd/bgp_ecommunity.h"
extern struct zclient *zclient;
static void register_zebra_rnh(struct bgp_nexthop_cache *bnc);
static void unregister_zebra_rnh(struct bgp_nexthop_cache *bnc);
static int make_prefix(int afi, struct bgp_path_info *pi, struct prefix *p);
static void bgp_nht_ifp_initial(struct event *thread);
static int bgp_isvalid_nexthop(struct bgp_nexthop_cache *bnc)
{
return (bgp_zebra_num_connects() == 0
|| (bnc && CHECK_FLAG(bnc->flags, BGP_NEXTHOP_VALID)
&& bnc->nexthop_num > 0));
}
static int bgp_isvalid_nexthop_for_ebgp(struct bgp_nexthop_cache *bnc,
struct bgp_path_info *path)
{
struct interface *ifp = NULL;
struct nexthop *nexthop;
struct bgp_interface *iifp;
struct peer *peer;
if (!path->extra || !path->extra->vrfleak ||
!path->extra->vrfleak->peer_orig)
return false;
peer = path->extra->vrfleak->peer_orig;
/* only connected ebgp peers are valid */
if (peer->sort != BGP_PEER_EBGP || peer->ttl != BGP_DEFAULT_TTL ||
CHECK_FLAG(peer->flags, PEER_FLAG_DISABLE_CONNECTED_CHECK) ||
CHECK_FLAG(peer->bgp->flags, BGP_FLAG_DISABLE_NH_CONNECTED_CHK))
return false;
for (nexthop = bnc->nexthop; nexthop; nexthop = nexthop->next) {
if (nexthop->type == NEXTHOP_TYPE_IFINDEX ||
nexthop->type == NEXTHOP_TYPE_IPV4_IFINDEX ||
nexthop->type == NEXTHOP_TYPE_IPV6_IFINDEX) {
ifp = if_lookup_by_index(bnc->ifindex_ipv6_ll
? bnc->ifindex_ipv6_ll
: nexthop->ifindex,
bnc->bgp->vrf_id);
}
if (!ifp)
continue;
iifp = ifp->info;
if (CHECK_FLAG(iifp->flags, BGP_INTERFACE_MPLS_BGP_FORWARDING))
return true;
}
return false;
}
bgpd: add resolution for l3vpn traffic over gre interfaces When a route imported from l3vpn is analysed, the nexthop from default VRF is looked up against a valid MPLS path. Generally, this is done on backbones with a MPLS signalisation transport layer like LDP. Generally, the BGP connection is multiple hops away. That scenario is already working. There is case where it is possible to run L3VPN over GRE interfaces, and where there is no LSP path over that GRE interface: GRE is just here to tunnel MPLS traffic. On that case, the nexthop given in the path does not have MPLS path, but should be authorized to convey MPLS traffic provided that the user permits it via a configuration command. That commit introduces a new command that can be activated in route-map: > set l3vpn next-hop encapsulation gre That command authorizes the nexthop tracking engine to accept paths that o have a GRE interface as output, independently of the presence of an LSP path or not. A configuration example is given below. When bgp incoming vpnv4 updates are received, the nexthop of NLRI is 192.168.0.2. Based on nexthop tracking service from zebra, BGP knows that the output interface to reach 192.168.0.2 is r1-gre0. Because that interface is not MPLS based, but is a GRE tunnel, then the update will be using that nexthop to be installed. interface r1-gre0 ip address 192.168.0.1/24 exit router bgp 65500 bgp router-id 1.1.1.1 neighbor 192.168.0.2 remote-as 65500 ! address-family ipv4 unicast no neighbor 192.168.0.2 activate exit-address-family ! address-family ipv4 vpn neighbor 192.168.0.2 activate neighbor 192.168.0.2 route-map rmap in exit-address-family exit ! router bgp 65500 vrf vrf1 bgp router-id 1.1.1.1 no bgp network import-check ! address-family ipv4 unicast network 10.201.0.0/24 redistribute connected label vpn export 101 rd vpn export 444:1 rt vpn both 52:100 export vpn import vpn exit-address-family exit ! route-map rmap permit 1 set l3vpn next-hop encapsulation gre exit Signed-off-by: Philippe Guibert <philippe.guibert@6wind.com>
2021-09-20 11:50:52 +02:00
static int bgp_isvalid_nexthop_for_mplsovergre(struct bgp_nexthop_cache *bnc,
struct bgp_path_info *path)
{
struct interface *ifp = NULL;
struct nexthop *nexthop;
for (nexthop = bnc->nexthop; nexthop; nexthop = nexthop->next) {
if (nexthop->type != NEXTHOP_TYPE_BLACKHOLE) {
ifp = if_lookup_by_index(bnc->ifindex_ipv6_ll
? bnc->ifindex_ipv6_ll
: nexthop->ifindex,
bnc->bgp->vrf_id);
bgpd: add resolution for l3vpn traffic over gre interfaces When a route imported from l3vpn is analysed, the nexthop from default VRF is looked up against a valid MPLS path. Generally, this is done on backbones with a MPLS signalisation transport layer like LDP. Generally, the BGP connection is multiple hops away. That scenario is already working. There is case where it is possible to run L3VPN over GRE interfaces, and where there is no LSP path over that GRE interface: GRE is just here to tunnel MPLS traffic. On that case, the nexthop given in the path does not have MPLS path, but should be authorized to convey MPLS traffic provided that the user permits it via a configuration command. That commit introduces a new command that can be activated in route-map: > set l3vpn next-hop encapsulation gre That command authorizes the nexthop tracking engine to accept paths that o have a GRE interface as output, independently of the presence of an LSP path or not. A configuration example is given below. When bgp incoming vpnv4 updates are received, the nexthop of NLRI is 192.168.0.2. Based on nexthop tracking service from zebra, BGP knows that the output interface to reach 192.168.0.2 is r1-gre0. Because that interface is not MPLS based, but is a GRE tunnel, then the update will be using that nexthop to be installed. interface r1-gre0 ip address 192.168.0.1/24 exit router bgp 65500 bgp router-id 1.1.1.1 neighbor 192.168.0.2 remote-as 65500 ! address-family ipv4 unicast no neighbor 192.168.0.2 activate exit-address-family ! address-family ipv4 vpn neighbor 192.168.0.2 activate neighbor 192.168.0.2 route-map rmap in exit-address-family exit ! router bgp 65500 vrf vrf1 bgp router-id 1.1.1.1 no bgp network import-check ! address-family ipv4 unicast network 10.201.0.0/24 redistribute connected label vpn export 101 rd vpn export 444:1 rt vpn both 52:100 export vpn import vpn exit-address-family exit ! route-map rmap permit 1 set l3vpn next-hop encapsulation gre exit Signed-off-by: Philippe Guibert <philippe.guibert@6wind.com>
2021-09-20 11:50:52 +02:00
if (ifp && (ifp->ll_type == ZEBRA_LLT_IPGRE ||
ifp->ll_type == ZEBRA_LLT_IP6GRE))
break;
}
}
if (!ifp)
return false;
if (CHECK_FLAG(path->attr->rmap_change_flags,
BATTR_RMAP_L3VPN_ACCEPT_GRE))
return true;
return false;
}
static int bgp_isvalid_nexthop_for_mpls(struct bgp_nexthop_cache *bnc,
struct bgp_path_info *path)
{
return (bnc && (bnc->nexthop_num > 0 &&
(CHECK_FLAG(path->flags, BGP_PATH_ACCEPT_OWN) ||
CHECK_FLAG(bnc->flags, BGP_NEXTHOP_LABELED_VALID) ||
bgp_isvalid_nexthop_for_ebgp(bnc, path) ||
bgp_isvalid_nexthop_for_mplsovergre(bnc, path))));
}
static bool bgp_isvalid_nexthop_for_l3vpn(struct bgp_nexthop_cache *bnc,
struct bgp_path_info *path)
{
if (bgp_zebra_num_connects() == 0)
return 1;
if (path->attr->srv6_l3vpn || path->attr->srv6_vpn) {
/* In the case of SRv6-VPN, we need to track the reachability to the
* SID (in other words, IPv6 address). We check that the SID is
* available in the BGP update; then if it is available, we check
* for the nexthop reachability.
*/
if (bnc && (bnc->nexthop_num > 0 && bgp_isvalid_nexthop(bnc)))
return 1;
return 0;
}
/*
* In the case of MPLS-VPN, the label is learned from LDP or other
* protocols, and nexthop tracking is enabled for the label.
* The value is recorded as BGP_NEXTHOP_LABELED_VALID.
bgpd: add resolution for l3vpn traffic over gre interfaces When a route imported from l3vpn is analysed, the nexthop from default VRF is looked up against a valid MPLS path. Generally, this is done on backbones with a MPLS signalisation transport layer like LDP. Generally, the BGP connection is multiple hops away. That scenario is already working. There is case where it is possible to run L3VPN over GRE interfaces, and where there is no LSP path over that GRE interface: GRE is just here to tunnel MPLS traffic. On that case, the nexthop given in the path does not have MPLS path, but should be authorized to convey MPLS traffic provided that the user permits it via a configuration command. That commit introduces a new command that can be activated in route-map: > set l3vpn next-hop encapsulation gre That command authorizes the nexthop tracking engine to accept paths that o have a GRE interface as output, independently of the presence of an LSP path or not. A configuration example is given below. When bgp incoming vpnv4 updates are received, the nexthop of NLRI is 192.168.0.2. Based on nexthop tracking service from zebra, BGP knows that the output interface to reach 192.168.0.2 is r1-gre0. Because that interface is not MPLS based, but is a GRE tunnel, then the update will be using that nexthop to be installed. interface r1-gre0 ip address 192.168.0.1/24 exit router bgp 65500 bgp router-id 1.1.1.1 neighbor 192.168.0.2 remote-as 65500 ! address-family ipv4 unicast no neighbor 192.168.0.2 activate exit-address-family ! address-family ipv4 vpn neighbor 192.168.0.2 activate neighbor 192.168.0.2 route-map rmap in exit-address-family exit ! router bgp 65500 vrf vrf1 bgp router-id 1.1.1.1 no bgp network import-check ! address-family ipv4 unicast network 10.201.0.0/24 redistribute connected label vpn export 101 rd vpn export 444:1 rt vpn both 52:100 export vpn import vpn exit-address-family exit ! route-map rmap permit 1 set l3vpn next-hop encapsulation gre exit Signed-off-by: Philippe Guibert <philippe.guibert@6wind.com>
2021-09-20 11:50:52 +02:00
* - Otherwise check for mpls-gre acceptance
*/
return bgp_isvalid_nexthop_for_mpls(bnc, path);
}
static void bgp_unlink_nexthop_check(struct bgp_nexthop_cache *bnc)
{
if (LIST_EMPTY(&(bnc->paths)) && !bnc->nht_info) {
if (BGP_DEBUG(nht, NHT))
zlog_debug("%s: freeing bnc %pFX(%d)(%u)(%s)", __func__,
&bnc->prefix, bnc->ifindex_ipv6_ll,
bnc->srte_color, bnc->bgp->name_pretty);
/* only unregister if this is the last nh for this prefix*/
if (!bnc_existing_for_prefix(bnc))
unregister_zebra_rnh(bnc);
bnc_free(bnc);
}
}
void bgp_unlink_nexthop(struct bgp_path_info *path)
{
struct bgp_nexthop_cache *bnc = path->nexthop;
bgpd: add support for l3vpn per-nexthop label This commit introduces a new method to associate a label to prefixes to export to a VPNv4 backbone. All the methods to associate a label to a BGP update is documented in rfc4364, chapter 4.3.2. Initially, the "single label for an entire VRF" method was available. This commit adds "single label for each attachment circuit" method. The change impacts the control-plane, because each BGP update is checked to know if the nexthop has reachability in the VRF or not. If this is the case, then a unique label for a given destination IP in the VRF will be picked up. This label will be reused for an other BGP update that will have the same nexthop IP address. The change impacts the data-plane, because the MPLs pop mechanism applied to incoming labelled packets changes: the MPLS label is popped, and the packet is directly sent to the connected nexthop described in the previous outgoing BGP VPN update. By default per-vrf mode is done, but the user may choose the per-nexthop mode, by using the vty command from the previous commit. In the latter case, a per-vrf label will however be allocated to handle networks that are not directly connected. This is the case for local traffic for instance. The change also include the following: - ECMP case In case a route is learnt in a given VRF, and is resolved via an ECMP nexthop. This implies that when exporting the route as a BGP update, if label allocation per nexthop is used, then two possible MPLS values could be picked up, which is not possible with the current implementation. Actually, the NLRI for VPNv4 stores one prefix, and one single label value, not two. Today, RFC8277 with multiple label capability is not yet available. To avoid this corner case, when a route is resolved via more than one nexthop, the label allocation per nexthop will not apply, and the default per-vrf label will be chosen. Let us imagine BGP redistributes a static route using the `172.31.0.20` nexthop. The nexthop resolution will find two different nexthops fo a unique BGP update. > r1# show running-config > [..] > vrf vrf1 > ip route 172.31.0.30/32 172.31.0.20 > r1# show bgp vrf vrf1 nexthop > [..] > 172.31.0.20 valid [IGP metric 0], #paths 1 > gate 192.0.2.11 > gate 192.0.2.12 > Last update: Mon Jan 16 09:27:09 2023 > Paths: > 1/1 172.31.0.30/32 VRF vrf1 flags 0x20018 To avoid this situation, BGP updates that resolve over multiple nexthops are using the unique per-vrf label. - recursive route case Prefixes that need a recursive route to be resolved can also be eligible for mpls allocation per nexthop. In that case, the nexthop will be the recursive nexthop calculated. To achieve this, all nexthop types in bnc contexts are valid, except for the blackhole nexthops. - network declared prefixes Nexthop tracking is used to look for the reachability of the prefixes. When the the 'no bgp network import-check' command is used, network declared prefixes are maintained active, even if there is no active nexthop. Signed-off-by: Philippe Guibert <philippe.guibert@6wind.com>
2023-02-28 14:25:02 +01:00
bgp_mplsvpn_path_nh_label_unlink(path);
bgp_mplsvpn_path_nh_label_bind_unlink(path);
bgpd: add support for l3vpn per-nexthop label This commit introduces a new method to associate a label to prefixes to export to a VPNv4 backbone. All the methods to associate a label to a BGP update is documented in rfc4364, chapter 4.3.2. Initially, the "single label for an entire VRF" method was available. This commit adds "single label for each attachment circuit" method. The change impacts the control-plane, because each BGP update is checked to know if the nexthop has reachability in the VRF or not. If this is the case, then a unique label for a given destination IP in the VRF will be picked up. This label will be reused for an other BGP update that will have the same nexthop IP address. The change impacts the data-plane, because the MPLs pop mechanism applied to incoming labelled packets changes: the MPLS label is popped, and the packet is directly sent to the connected nexthop described in the previous outgoing BGP VPN update. By default per-vrf mode is done, but the user may choose the per-nexthop mode, by using the vty command from the previous commit. In the latter case, a per-vrf label will however be allocated to handle networks that are not directly connected. This is the case for local traffic for instance. The change also include the following: - ECMP case In case a route is learnt in a given VRF, and is resolved via an ECMP nexthop. This implies that when exporting the route as a BGP update, if label allocation per nexthop is used, then two possible MPLS values could be picked up, which is not possible with the current implementation. Actually, the NLRI for VPNv4 stores one prefix, and one single label value, not two. Today, RFC8277 with multiple label capability is not yet available. To avoid this corner case, when a route is resolved via more than one nexthop, the label allocation per nexthop will not apply, and the default per-vrf label will be chosen. Let us imagine BGP redistributes a static route using the `172.31.0.20` nexthop. The nexthop resolution will find two different nexthops fo a unique BGP update. > r1# show running-config > [..] > vrf vrf1 > ip route 172.31.0.30/32 172.31.0.20 > r1# show bgp vrf vrf1 nexthop > [..] > 172.31.0.20 valid [IGP metric 0], #paths 1 > gate 192.0.2.11 > gate 192.0.2.12 > Last update: Mon Jan 16 09:27:09 2023 > Paths: > 1/1 172.31.0.30/32 VRF vrf1 flags 0x20018 To avoid this situation, BGP updates that resolve over multiple nexthops are using the unique per-vrf label. - recursive route case Prefixes that need a recursive route to be resolved can also be eligible for mpls allocation per nexthop. In that case, the nexthop will be the recursive nexthop calculated. To achieve this, all nexthop types in bnc contexts are valid, except for the blackhole nexthops. - network declared prefixes Nexthop tracking is used to look for the reachability of the prefixes. When the the 'no bgp network import-check' command is used, network declared prefixes are maintained active, even if there is no active nexthop. Signed-off-by: Philippe Guibert <philippe.guibert@6wind.com>
2023-02-28 14:25:02 +01:00
if (!bnc)
return;
path_nh_map(path, NULL, false);
bgp_unlink_nexthop_check(bnc);
}
void bgp_replace_nexthop_by_peer(struct peer *from, struct peer *to)
{
struct prefix pp;
struct prefix pt;
struct bgp_nexthop_cache *bncp, *bnct;
afi_t afi;
ifindex_t ifindex = 0;
if (!sockunion2hostprefix(&from->connection->su, &pp))
return;
/*
* Gather the ifindex for if up/down events to be
* tagged into this fun
*/
if (from->conf_if &&
IN6_IS_ADDR_LINKLOCAL(&from->connection->su.sin6.sin6_addr))
ifindex = from->connection->su.sin6.sin6_scope_id;
afi = family2afi(pp.family);
bncp = bnc_find(&from->bgp->nexthop_cache_table[afi], &pp, 0, ifindex);
if (!sockunion2hostprefix(&to->connection->su, &pt))
return;
/*
* Gather the ifindex for if up/down events to be
* tagged into this fun
*/
ifindex = 0;
if (to->conf_if &&
IN6_IS_ADDR_LINKLOCAL(&to->connection->su.sin6.sin6_addr))
ifindex = to->connection->su.sin6.sin6_scope_id;
bnct = bnc_find(&to->bgp->nexthop_cache_table[afi], &pt, 0, ifindex);
if (bnct != bncp)
return;
if (bnct)
bnct->nht_info = to;
}
/*
* Returns the bnc whose bnc->nht_info matches the LL peer by
* looping through the IPv6 nexthop table
*/
static struct bgp_nexthop_cache *
bgp_find_ipv6_nexthop_matching_peer(struct peer *peer)
{
struct bgp_nexthop_cache *bnc;
frr_each (bgp_nexthop_cache, &peer->bgp->nexthop_cache_table[AFI_IP6],
bnc) {
if (bnc->nht_info == peer) {
if (BGP_DEBUG(nht, NHT)) {
zlog_debug(
"Found bnc: %pFX(%u)(%u)(%p) for peer: %s(%s) %p",
&bnc->prefix, bnc->ifindex_ipv6_ll,
bnc->srte_color, bnc, peer->host,
peer->bgp->name_pretty, peer);
}
return bnc;
}
}
if (BGP_DEBUG(nht, NHT))
zlog_debug(
"Could not find bnc for peer %s(%s) %p in v6 nexthop table",
peer->host, peer->bgp->name_pretty, peer);
return NULL;
}
void bgp_unlink_nexthop_by_peer(struct peer *peer)
{
struct prefix p;
struct bgp_nexthop_cache *bnc;
afi_t afi = family2afi(peer->connection->su.sa.sa_family);
ifindex_t ifindex = 0;
if (!sockunion2hostprefix(&peer->connection->su, &p)) {
/*
* In scenarios where unnumbered BGP session is brought
* down by shutting down the interface before unconfiguring
* the BGP neighbor, neighbor information in peer->su.sa
* will be cleared when the interface is shutdown. So
* during the deletion of unnumbered bgp peer, above check
* will return true. Therefore, in this case,BGP needs to
* find the bnc whose bnc->nht_info matches the
* peer being deleted and free it.
*/
bnc = bgp_find_ipv6_nexthop_matching_peer(peer);
} else {
/*
* Gather the ifindex for if up/down events to be
* tagged into this fun
*/
if (afi == AFI_IP6 &&
IN6_IS_ADDR_LINKLOCAL(&peer->connection->su.sin6.sin6_addr))
ifindex = peer->connection->su.sin6.sin6_scope_id;
bnc = bnc_find(&peer->bgp->nexthop_cache_table[afi], &p, 0,
ifindex);
}
if (!bnc)
return;
/* cleanup the peer reference */
bnc->nht_info = NULL;
bgp_unlink_nexthop_check(bnc);
}
/*
* A route and its nexthop might belong to different VRFs. Therefore,
* we need both the bgp_route and bgp_nexthop pointers.
*/
int bgp_find_or_add_nexthop(struct bgp *bgp_route, struct bgp *bgp_nexthop,
afi_t afi, safi_t safi, struct bgp_path_info *pi,
struct peer *peer, int connected,
const struct prefix *orig_prefix)
{
struct bgp_nexthop_cache_head *tree = NULL;
struct bgp_nexthop_cache *bnc;
struct bgp_path_info *bpi_ultimate;
struct prefix p;
uint32_t srte_color = 0;
int is_bgp_static_route = 0;
ifindex_t ifindex = 0;
if (pi) {
is_bgp_static_route = ((pi->type == ZEBRA_ROUTE_BGP)
&& (pi->sub_type == BGP_ROUTE_STATIC))
? 1
: 0;
/* Since Extended Next-hop Encoding (RFC5549) support, we want
to derive
address-family from the next-hop. */
if (!is_bgp_static_route)
afi = BGP_ATTR_MP_NEXTHOP_LEN_IP6(pi->attr) ? AFI_IP6
: AFI_IP;
/* Validation for the ipv4 mapped ipv6 nexthop. */
if (IS_MAPPED_IPV6(&pi->attr->mp_nexthop_global)) {
afi = AFI_IP;
}
/* This will return true if the global IPv6 NH is a link local
* addr */
if (make_prefix(afi, pi, &p) < 0)
return 1;
/*
* If it's a V6 nexthop, path is learnt from a v6 LL peer,
* and if the NH prefix matches peer's LL address then
* set the ifindex to peer's interface index so that
* correct nexthop can be found in nexthop tree.
*
* NH could be set to different v6 LL address (compared to
* peer's LL) using route-map. In such a scenario, do not set
* the ifindex.
*/
if (afi == AFI_IP6 &&
IN6_IS_ADDR_LINKLOCAL(
&pi->peer->connection->su.sin6.sin6_addr) &&
IPV6_ADDR_SAME(&pi->peer->connection->su.sin6.sin6_addr,
&p.u.prefix6))
ifindex = pi->peer->connection->su.sin6.sin6_scope_id;
if (!is_bgp_static_route && orig_prefix
&& prefix_same(&p, orig_prefix)) {
if (BGP_DEBUG(nht, NHT)) {
zlog_debug(
"%s(%pFX): prefix loops through itself",
__func__, &p);
}
return 0;
}
srte_color = bgp_attr_get_color(pi->attr);
} else if (peer) {
/*
* Gather the ifindex for if up/down events to be
* tagged into this fun
*/
if (afi == AFI_IP6 && peer->conf_if &&
IN6_IS_ADDR_LINKLOCAL(&peer->connection->su.sin6.sin6_addr)) {
ifindex = peer->connection->su.sin6.sin6_scope_id;
if (ifindex == 0) {
if (BGP_DEBUG(nht, NHT)) {
zlog_debug(
"%s: Unable to locate ifindex, waiting till we have one",
peer->conf_if);
}
return 0;
}
}
if (!sockunion2hostprefix(&peer->connection->su, &p)) {
if (BGP_DEBUG(nht, NHT)) {
zlog_debug(
"%s: Attempting to register with unknown AFI %d (not %d or %d)",
__func__, afi, AFI_IP, AFI_IP6);
}
return 0;
}
} else
return 0;
if (is_bgp_static_route)
tree = &bgp_nexthop->import_check_table[afi];
else
tree = &bgp_nexthop->nexthop_cache_table[afi];
bnc = bnc_find(tree, &p, srte_color, ifindex);
if (!bnc) {
bnc = bnc_new(tree, &p, srte_color, ifindex);
bnc->afi = afi;
bnc->bgp = bgp_nexthop;
if (BGP_DEBUG(nht, NHT))
zlog_debug("Allocated bnc %pFX(%d)(%u)(%s) peer %p",
&bnc->prefix, bnc->ifindex_ipv6_ll,
bnc->srte_color, bnc->bgp->name_pretty,
peer);
} else {
if (BGP_DEBUG(nht, NHT))
zlog_debug("Found existing bnc %pFX(%d)(%s) flags 0x%x ifindex %d #paths %d peer %p, resolved prefix %pFX",
&bnc->prefix, bnc->ifindex_ipv6_ll,
bnc->bgp->name_pretty, bnc->flags,
bnc->ifindex_ipv6_ll, bnc->path_count,
bnc->nht_info, &bnc->resolved_prefix);
}
bgpd: EVPN route type-5 to type-2 recursive resolution using gateway IP When EVPN prefix route with a gateway IP overlay index is imported into the IP vrf at the ingress PE, BGP nexthop of this route is set to the gateway IP. For this vrf route to be valid, following conditions must be met. - Gateway IP nexthop of this route should be L3 reachable, i.e., this route should be resolved in RIB. - A remote MAC/IP route should be present for the gateway IP address in the EVI(L2VPN table). To check for the first condition, gateway IP is registered with nht (nexthop tracking) to receive the reachability notifications for this IP from zebra RIB. If the gateway IP is reachable, zebra sends the reachability information (i.e., nexthop interface) for the gateway IP. This nexthop interface should be the SVI interface. Now, to find out type-2 route corresponding to the gateway IP, we need to fetch the VNI for the above SVI. To do this VNI lookup effitiently, define a hashtable of struct bgpevpn with svi_ifindex as key. struct hash *vni_svi_hash; An EVI instance is added to vni_svi_hash if its svi_ifindex is nonzero. Using this hash, we obtain struct bgpevpn corresponding to the gateway IP. For gateway IP overlay index recursive lookup, once we find the correct EVI, we have to lookup its route table for a MAC/IP prefix. As we have to iterate the entire route table for every lookup, this lookup is expensive. We can optimize this lookup by adding all the remote IP addresses in a hash table. Following hash table is defined for this purpose in struct bgpevpn Struct hash *remote_ip_hash; When a MAC/IP route is installed in the EVI table, it is also added to remote_ip_hash. It is possible to have multiple MAC/IP routes with the same IP address because of host move scenarios. Thus, for every address addr in remote_ip_hash, we maintain list of all the MAC/IP routes having addr as their IP address. Following structure defines an address in remote_ip_hash. struct evpn_remote_ip { struct ipaddr addr; struct list *macip_path_list; }; A Boolean field is added to struct bgp_nexthop_cache to indicate that the nexthop is EVPN gateway IP overlay index. bool is_evpn_gwip_nexthop; A flag BGP_NEXTHOP_EVPN_INCOMPLETE is added to struct bgp_nexthop_cache. This flag is set when the gateway IP is L3 reachable but not yet resolved by a MAC/IP route. Following table explains the combination of L3 and L2 reachability w.r.t. BGP_NEXTHOP_VALID and BGP_NEXTHOP_EVPN_INCOMPLETE flags * | MACIP resolved | MACIP unresolved *----------------|----------------|------------------ * L3 reachable | VALID = 1 | VALID = 0 * | INCOMPLETE = 0 | INCOMPLETE = 1 * ---------------|----------------|-------------------- * L3 unreachable | VALID = 0 | VALID = 0 * | INCOMPLETE = 0 | INCOMPLETE = 0 Procedure that we use to check if the gateway IP is resolvable by a MAC/IP route: - Find the EVI/L2VRF that belongs to the nexthop SVI using vni_svi_hash. - Check if the gateway IP is present in remote_ip_hash in this EVI. When the gateway IP is L3 reachable and it is also resolved by a MAC/IP route, unset BGP_NEXTHOP_EVPN_INCOMPLETE flag and set BGP_NEXTHOP_VALID flag. Signed-off-by: Ameya Dharkar <adharkar@vmware.com>
2021-01-11 12:51:56 +01:00
if (pi && is_route_parent_evpn(pi))
bnc->is_evpn_gwip_nexthop = true;
if (is_bgp_static_route && !CHECK_FLAG(bnc->flags, BGP_STATIC_ROUTE)) {
SET_FLAG(bnc->flags, BGP_STATIC_ROUTE);
/* If we're toggling the type, re-register */
if ((CHECK_FLAG(bgp_route->flags, BGP_FLAG_IMPORT_CHECK))
&& !CHECK_FLAG(bnc->flags, BGP_STATIC_ROUTE_EXACT_MATCH)) {
SET_FLAG(bnc->flags, BGP_STATIC_ROUTE_EXACT_MATCH);
UNSET_FLAG(bnc->flags, BGP_NEXTHOP_REGISTERED);
UNSET_FLAG(bnc->flags, BGP_NEXTHOP_VALID);
} else if ((!CHECK_FLAG(bgp_route->flags,
BGP_FLAG_IMPORT_CHECK))
&& CHECK_FLAG(bnc->flags,
BGP_STATIC_ROUTE_EXACT_MATCH)) {
UNSET_FLAG(bnc->flags, BGP_STATIC_ROUTE_EXACT_MATCH);
UNSET_FLAG(bnc->flags, BGP_NEXTHOP_REGISTERED);
UNSET_FLAG(bnc->flags, BGP_NEXTHOP_VALID);
}
}
/* When nexthop is already known, but now requires 'connected'
* resolution,
* re-register it. The reverse scenario where the nexthop currently
* requires
* 'connected' resolution does not need a re-register (i.e., we treat
* 'connected-required' as an override) except in the scenario where
* this
* is actually a case of tracking a peer for connectivity (e.g., after
* disable connected-check).
* NOTE: We don't track the number of paths separately for 'connected-
* required' vs 'connected-not-required' as this change is not a common
* scenario.
*/
else if (connected && !CHECK_FLAG(bnc->flags, BGP_NEXTHOP_CONNECTED)) {
SET_FLAG(bnc->flags, BGP_NEXTHOP_CONNECTED);
UNSET_FLAG(bnc->flags, BGP_NEXTHOP_REGISTERED);
UNSET_FLAG(bnc->flags, BGP_NEXTHOP_VALID);
} else if (peer && !connected
&& CHECK_FLAG(bnc->flags, BGP_NEXTHOP_CONNECTED)) {
UNSET_FLAG(bnc->flags, BGP_NEXTHOP_CONNECTED);
UNSET_FLAG(bnc->flags, BGP_NEXTHOP_REGISTERED);
UNSET_FLAG(bnc->flags, BGP_NEXTHOP_VALID);
}
if (peer && (bnc->ifindex_ipv6_ll != ifindex)) {
UNSET_FLAG(bnc->flags, BGP_NEXTHOP_REGISTERED);
UNSET_FLAG(bnc->flags, BGP_NEXTHOP_VALID);
bnc->ifindex_ipv6_ll = ifindex;
}
if (bgp_route->inst_type == BGP_INSTANCE_TYPE_VIEW) {
SET_FLAG(bnc->flags, BGP_NEXTHOP_REGISTERED);
SET_FLAG(bnc->flags, BGP_NEXTHOP_VALID);
} else if (!CHECK_FLAG(bnc->flags, BGP_NEXTHOP_REGISTERED)
&& !is_default_host_route(&bnc->prefix))
register_zebra_rnh(bnc);
if (pi && pi->nexthop != bnc) {
/* Unlink from existing nexthop cache, if any. This will also
* free
* the nexthop cache entry, if appropriate.
*/
bgp_unlink_nexthop(pi);
/* updates NHT pi list reference */
path_nh_map(pi, bnc, true);
bpi_ultimate = bgp_get_imported_bpi_ultimate(pi);
if (CHECK_FLAG(bnc->flags, BGP_NEXTHOP_VALID) && bnc->metric)
(bgp_path_info_extra_get(bpi_ultimate))->igpmetric =
bnc->metric;
else if (bpi_ultimate->extra)
bpi_ultimate->extra->igpmetric = 0;
bgpd: Validate imported routes next-hop that is in a default VRF Without this patch: ``` r1# sh ip bgp vrf CUSTOMER-A BGP table version is 1, local router ID is 20.20.20.0, vrf id 4 Default local pref 100, local AS 65000 Status codes: s suppressed, d damped, h history, u unsorted, * valid, > best, = multipath, i internal, r RIB-failure, S Stale, R Removed Nexthop codes: @NNN nexthop's vrf id, < announce-nh-self Origin codes: i - IGP, e - EGP, ? - incomplete RPKI validation codes: V valid, I invalid, N Not found Network Next Hop Metric LocPrf Weight Path 192.168.2.0/24 192.168.179.5@0< 0 0 479 ? Displayed 1 routes and 1 total paths r1# ``` This is because the route is imported, next-hop is in a default VRF, and we should evaluate an ultimate path info, not the current path info. After: ``` r1# sh ip bgp vrf CUSTOMER-A BGP table version is 1, local router ID is 20.20.20.0, vrf id 4 Default local pref 100, local AS 65000 Status codes: s suppressed, d damped, h history, u unsorted, * valid, > best, = multipath, i internal, r RIB-failure, S Stale, R Removed Nexthop codes: @NNN nexthop's vrf id, < announce-nh-self Origin codes: i - IGP, e - EGP, ? - incomplete RPKI validation codes: V valid, I invalid, N Not found Network Next Hop Metric LocPrf Weight Path *> 192.168.2.0/24 192.168.179.5@0< 0 0 479 ? Displayed 1 routes and 1 total paths r1# ``` In both cases next-hop in cache table is valid: ``` r1# sh ip bgp nexthop Current BGP nexthop cache: 192.168.179.5 valid [IGP metric 0], #paths 2, peer 192.168.179.5 Resolved prefix 192.168.179.0/24 if r1-eth0 Last update: Thu Sep 5 11:24:37 2024 r1# ``` Signed-off-by: Donatas Abraitis <donatas@opensourcerouting.org>
2024-09-05 10:24:53 +02:00
SET_FLAG(bnc->flags, BGP_NEXTHOP_ULTIMATE);
} else if (peer) {
/*
* Let's not accidentally save the peer data for a peer
* we are going to throw away in a second or so.
* When we come back around we'll fix up this
* data properly in replace_nexthop_by_peer
*/
if (CHECK_FLAG(peer->flags, PEER_FLAG_CONFIG_NODE))
bnc->nht_info = (void *)peer; /* NHT peer reference */
}
/*
* We are cheating here. Views have no associated underlying
* ability to detect nexthops. So when we have a view
* just tell everyone the nexthop is valid
*/
if (bgp_route->inst_type == BGP_INSTANCE_TYPE_VIEW)
return 1;
bgpd: Validate imported routes next-hop that is in a default VRF Without this patch: ``` r1# sh ip bgp vrf CUSTOMER-A BGP table version is 1, local router ID is 20.20.20.0, vrf id 4 Default local pref 100, local AS 65000 Status codes: s suppressed, d damped, h history, u unsorted, * valid, > best, = multipath, i internal, r RIB-failure, S Stale, R Removed Nexthop codes: @NNN nexthop's vrf id, < announce-nh-self Origin codes: i - IGP, e - EGP, ? - incomplete RPKI validation codes: V valid, I invalid, N Not found Network Next Hop Metric LocPrf Weight Path 192.168.2.0/24 192.168.179.5@0< 0 0 479 ? Displayed 1 routes and 1 total paths r1# ``` This is because the route is imported, next-hop is in a default VRF, and we should evaluate an ultimate path info, not the current path info. After: ``` r1# sh ip bgp vrf CUSTOMER-A BGP table version is 1, local router ID is 20.20.20.0, vrf id 4 Default local pref 100, local AS 65000 Status codes: s suppressed, d damped, h history, u unsorted, * valid, > best, = multipath, i internal, r RIB-failure, S Stale, R Removed Nexthop codes: @NNN nexthop's vrf id, < announce-nh-self Origin codes: i - IGP, e - EGP, ? - incomplete RPKI validation codes: V valid, I invalid, N Not found Network Next Hop Metric LocPrf Weight Path *> 192.168.2.0/24 192.168.179.5@0< 0 0 479 ? Displayed 1 routes and 1 total paths r1# ``` In both cases next-hop in cache table is valid: ``` r1# sh ip bgp nexthop Current BGP nexthop cache: 192.168.179.5 valid [IGP metric 0], #paths 2, peer 192.168.179.5 Resolved prefix 192.168.179.0/24 if r1-eth0 Last update: Thu Sep 5 11:24:37 2024 r1# ``` Signed-off-by: Donatas Abraitis <donatas@opensourcerouting.org>
2024-09-05 10:24:53 +02:00
else if (safi == SAFI_UNICAST && pi &&
pi->sub_type == BGP_ROUTE_IMPORTED &&
CHECK_FLAG(bnc->flags, BGP_NEXTHOP_ULTIMATE))
return bgp_isvalid_nexthop(bnc);
bgpd: add resolution for l3vpn traffic over gre interfaces When a route imported from l3vpn is analysed, the nexthop from default VRF is looked up against a valid MPLS path. Generally, this is done on backbones with a MPLS signalisation transport layer like LDP. Generally, the BGP connection is multiple hops away. That scenario is already working. There is case where it is possible to run L3VPN over GRE interfaces, and where there is no LSP path over that GRE interface: GRE is just here to tunnel MPLS traffic. On that case, the nexthop given in the path does not have MPLS path, but should be authorized to convey MPLS traffic provided that the user permits it via a configuration command. That commit introduces a new command that can be activated in route-map: > set l3vpn next-hop encapsulation gre That command authorizes the nexthop tracking engine to accept paths that o have a GRE interface as output, independently of the presence of an LSP path or not. A configuration example is given below. When bgp incoming vpnv4 updates are received, the nexthop of NLRI is 192.168.0.2. Based on nexthop tracking service from zebra, BGP knows that the output interface to reach 192.168.0.2 is r1-gre0. Because that interface is not MPLS based, but is a GRE tunnel, then the update will be using that nexthop to be installed. interface r1-gre0 ip address 192.168.0.1/24 exit router bgp 65500 bgp router-id 1.1.1.1 neighbor 192.168.0.2 remote-as 65500 ! address-family ipv4 unicast no neighbor 192.168.0.2 activate exit-address-family ! address-family ipv4 vpn neighbor 192.168.0.2 activate neighbor 192.168.0.2 route-map rmap in exit-address-family exit ! router bgp 65500 vrf vrf1 bgp router-id 1.1.1.1 no bgp network import-check ! address-family ipv4 unicast network 10.201.0.0/24 redistribute connected label vpn export 101 rd vpn export 444:1 rt vpn both 52:100 export vpn import vpn exit-address-family exit ! route-map rmap permit 1 set l3vpn next-hop encapsulation gre exit Signed-off-by: Philippe Guibert <philippe.guibert@6wind.com>
2021-09-20 11:50:52 +02:00
else if (safi == SAFI_UNICAST && pi &&
pi->sub_type == BGP_ROUTE_IMPORTED &&
BGP_PATH_INFO_NUM_LABELS(pi) && !bnc->is_evpn_gwip_nexthop)
return bgp_isvalid_nexthop_for_l3vpn(bnc, pi);
else if (safi == SAFI_MPLS_VPN && pi &&
pi->sub_type != BGP_ROUTE_IMPORTED)
/* avoid not redistributing mpls vpn routes */
return 1;
bgpd: add resolution for l3vpn traffic over gre interfaces When a route imported from l3vpn is analysed, the nexthop from default VRF is looked up against a valid MPLS path. Generally, this is done on backbones with a MPLS signalisation transport layer like LDP. Generally, the BGP connection is multiple hops away. That scenario is already working. There is case where it is possible to run L3VPN over GRE interfaces, and where there is no LSP path over that GRE interface: GRE is just here to tunnel MPLS traffic. On that case, the nexthop given in the path does not have MPLS path, but should be authorized to convey MPLS traffic provided that the user permits it via a configuration command. That commit introduces a new command that can be activated in route-map: > set l3vpn next-hop encapsulation gre That command authorizes the nexthop tracking engine to accept paths that o have a GRE interface as output, independently of the presence of an LSP path or not. A configuration example is given below. When bgp incoming vpnv4 updates are received, the nexthop of NLRI is 192.168.0.2. Based on nexthop tracking service from zebra, BGP knows that the output interface to reach 192.168.0.2 is r1-gre0. Because that interface is not MPLS based, but is a GRE tunnel, then the update will be using that nexthop to be installed. interface r1-gre0 ip address 192.168.0.1/24 exit router bgp 65500 bgp router-id 1.1.1.1 neighbor 192.168.0.2 remote-as 65500 ! address-family ipv4 unicast no neighbor 192.168.0.2 activate exit-address-family ! address-family ipv4 vpn neighbor 192.168.0.2 activate neighbor 192.168.0.2 route-map rmap in exit-address-family exit ! router bgp 65500 vrf vrf1 bgp router-id 1.1.1.1 no bgp network import-check ! address-family ipv4 unicast network 10.201.0.0/24 redistribute connected label vpn export 101 rd vpn export 444:1 rt vpn both 52:100 export vpn import vpn exit-address-family exit ! route-map rmap permit 1 set l3vpn next-hop encapsulation gre exit Signed-off-by: Philippe Guibert <philippe.guibert@6wind.com>
2021-09-20 11:50:52 +02:00
else
/* mpls-vpn routes with BGP_ROUTE_IMPORTED subtype */
return (bgp_isvalid_nexthop(bnc));
}
void bgp_delete_connected_nexthop(afi_t afi, struct peer *peer)
{
struct bgp_nexthop_cache *bnc;
struct prefix p;
ifindex_t ifindex = 0;
if (!peer)
return;
/*
* In case the below check evaluates true and if
* the bnc has not been freed at this point, then
* we might have to do something similar to what's
* done in bgp_unlink_nexthop_by_peer(). Since
* bgp_unlink_nexthop_by_peer() loops through the
* nodes of V6 nexthop cache to find the bnc, it is
* currently not being called here.
*/
if (!sockunion2hostprefix(&peer->connection->su, &p))
return;
/*
* Gather the ifindex for if up/down events to be
* tagged into this fun
*/
if (afi == AFI_IP6 &&
IN6_IS_ADDR_LINKLOCAL(&peer->connection->su.sin6.sin6_addr))
ifindex = peer->connection->su.sin6.sin6_scope_id;
bnc = bnc_find(&peer->bgp->nexthop_cache_table[family2afi(p.family)],
&p, 0, ifindex);
if (!bnc) {
if (BGP_DEBUG(nht, NHT))
zlog_debug(
"Cannot find connected NHT node for peer %s(%s)",
peer->host, peer->bgp->name_pretty);
return;
}
if (bnc->nht_info != peer) {
if (BGP_DEBUG(nht, NHT))
zlog_debug(
"Connected NHT %p node for peer %s(%s) points to %p",
bnc, peer->host, bnc->bgp->name_pretty,
bnc->nht_info);
return;
}
bnc->nht_info = NULL;
if (LIST_EMPTY(&(bnc->paths))) {
if (BGP_DEBUG(nht, NHT))
zlog_debug(
"Freeing connected NHT node %p for peer %s(%s)",
bnc, peer->host, bnc->bgp->name_pretty);
unregister_zebra_rnh(bnc);
bnc_free(bnc);
}
}
static void bgp_process_nexthop_update(struct bgp_nexthop_cache *bnc,
bgpd: Fix import check removal Fix: 06e4e90132ad23815c6f288dd7e6be334f5ab233 Modified BGP to pay more attention the prefix returned from zebra to ensure that a LPM wasn't accidently causing BGP import checks to think it had a match when it did not. This unfortunately removed the check to handle the route removal. This sequence of config and events would leave BGP in a bad state: ip route 100.100.100.0/24 Null0 router bgp 32932 bgp network import-check address-family ipv4 uni network 100.100.100.0/24 Then if you removed the static route the import check would still think the route existed: donatas-pc(config)# ip route 100.100.100.0/24 Null0 donatas-pc(config)# do sh ip bgp import-check-table Current BGP import check cache: 100.100.100.0 valid [IGP metric 0], #paths 1 blackhole Last update: Sat Apr 23 22:51:34 2022 donatas-pc(config)# do sh ip nht 100.100.100.0 resolved via static is directly connected, Null0 Client list: bgp(fd 17) donatas-pc(config)# do sh ip bgp neighbors 192.168.10.123 advertised-routes | include 100.100.100.0 *> 100.100.100.0/24 0.0.0.0 0 32768 i donatas-pc(config)# no ip route 100.100.100.0/24 Null0 donatas-pc(config)# do sh ip nht 100.100.100.0 resolved via kernel via 192.168.10.1, enp3s0 Client list: bgp(fd 17) donatas-pc(config)# do sh ip bgp import-check-table Current BGP import check cache: 100.100.100.0 valid [IGP metric 0], #paths 1 blackhole Last update: Sat Apr 23 22:51:34 2022 donatas-pc(config)# do sh ip bgp neighbors 192.168.10.123 advertised-routes | include 100.100.100.0 *> 100.100.100.0/24 0.0.0.0 0 32768 i donatas-pc(config)# Fix this by moving the code to handle the prefix check to the evaluation function and mark the bnc as not matching and actually evaluate the bnc. Signed-off-by: Donald Sharp <sharpd@nvidia.com>
2022-04-24 22:52:46 +02:00
struct zapi_route *nhr,
bool import_check)
{
struct nexthop *nexthop;
struct nexthop *oldnh;
struct nexthop *nhlist_head = NULL;
struct nexthop *nhlist_tail = NULL;
int i;
bgpd: EVPN route type-5 to type-2 recursive resolution using gateway IP When EVPN prefix route with a gateway IP overlay index is imported into the IP vrf at the ingress PE, BGP nexthop of this route is set to the gateway IP. For this vrf route to be valid, following conditions must be met. - Gateway IP nexthop of this route should be L3 reachable, i.e., this route should be resolved in RIB. - A remote MAC/IP route should be present for the gateway IP address in the EVI(L2VPN table). To check for the first condition, gateway IP is registered with nht (nexthop tracking) to receive the reachability notifications for this IP from zebra RIB. If the gateway IP is reachable, zebra sends the reachability information (i.e., nexthop interface) for the gateway IP. This nexthop interface should be the SVI interface. Now, to find out type-2 route corresponding to the gateway IP, we need to fetch the VNI for the above SVI. To do this VNI lookup effitiently, define a hashtable of struct bgpevpn with svi_ifindex as key. struct hash *vni_svi_hash; An EVI instance is added to vni_svi_hash if its svi_ifindex is nonzero. Using this hash, we obtain struct bgpevpn corresponding to the gateway IP. For gateway IP overlay index recursive lookup, once we find the correct EVI, we have to lookup its route table for a MAC/IP prefix. As we have to iterate the entire route table for every lookup, this lookup is expensive. We can optimize this lookup by adding all the remote IP addresses in a hash table. Following hash table is defined for this purpose in struct bgpevpn Struct hash *remote_ip_hash; When a MAC/IP route is installed in the EVI table, it is also added to remote_ip_hash. It is possible to have multiple MAC/IP routes with the same IP address because of host move scenarios. Thus, for every address addr in remote_ip_hash, we maintain list of all the MAC/IP routes having addr as their IP address. Following structure defines an address in remote_ip_hash. struct evpn_remote_ip { struct ipaddr addr; struct list *macip_path_list; }; A Boolean field is added to struct bgp_nexthop_cache to indicate that the nexthop is EVPN gateway IP overlay index. bool is_evpn_gwip_nexthop; A flag BGP_NEXTHOP_EVPN_INCOMPLETE is added to struct bgp_nexthop_cache. This flag is set when the gateway IP is L3 reachable but not yet resolved by a MAC/IP route. Following table explains the combination of L3 and L2 reachability w.r.t. BGP_NEXTHOP_VALID and BGP_NEXTHOP_EVPN_INCOMPLETE flags * | MACIP resolved | MACIP unresolved *----------------|----------------|------------------ * L3 reachable | VALID = 1 | VALID = 0 * | INCOMPLETE = 0 | INCOMPLETE = 1 * ---------------|----------------|-------------------- * L3 unreachable | VALID = 0 | VALID = 0 * | INCOMPLETE = 0 | INCOMPLETE = 0 Procedure that we use to check if the gateway IP is resolvable by a MAC/IP route: - Find the EVI/L2VRF that belongs to the nexthop SVI using vni_svi_hash. - Check if the gateway IP is present in remote_ip_hash in this EVI. When the gateway IP is L3 reachable and it is also resolved by a MAC/IP route, unset BGP_NEXTHOP_EVPN_INCOMPLETE flag and set BGP_NEXTHOP_VALID flag. Signed-off-by: Ameya Dharkar <adharkar@vmware.com>
2021-01-11 12:51:56 +01:00
bool evpn_resolved = false;
bnc->last_update = monotime(NULL);
bnc->change_flags = 0;
/* debug print the input */
if (BGP_DEBUG(nht, NHT)) {
char bnc_buf[BNC_FLAG_DUMP_SIZE];
zlog_debug(
"%s(%u): Rcvd NH update %pFX(%u)(%u) - metric %d/%d #nhops %d/%d flags %s",
bnc->bgp->name_pretty, bnc->bgp->vrf_id, &nhr->prefix,
bnc->ifindex_ipv6_ll, bnc->srte_color, nhr->metric,
bnc->metric, nhr->nexthop_num, bnc->nexthop_num,
bgp_nexthop_dump_bnc_flags(bnc, bnc_buf,
sizeof(bnc_buf)));
}
if (nhr->metric != bnc->metric)
SET_FLAG(bnc->change_flags, BGP_NEXTHOP_METRIC_CHANGED);
if (nhr->nexthop_num != bnc->nexthop_num)
SET_FLAG(bnc->change_flags, BGP_NEXTHOP_CHANGED);
bgpd: Fix import check removal Fix: 06e4e90132ad23815c6f288dd7e6be334f5ab233 Modified BGP to pay more attention the prefix returned from zebra to ensure that a LPM wasn't accidently causing BGP import checks to think it had a match when it did not. This unfortunately removed the check to handle the route removal. This sequence of config and events would leave BGP in a bad state: ip route 100.100.100.0/24 Null0 router bgp 32932 bgp network import-check address-family ipv4 uni network 100.100.100.0/24 Then if you removed the static route the import check would still think the route existed: donatas-pc(config)# ip route 100.100.100.0/24 Null0 donatas-pc(config)# do sh ip bgp import-check-table Current BGP import check cache: 100.100.100.0 valid [IGP metric 0], #paths 1 blackhole Last update: Sat Apr 23 22:51:34 2022 donatas-pc(config)# do sh ip nht 100.100.100.0 resolved via static is directly connected, Null0 Client list: bgp(fd 17) donatas-pc(config)# do sh ip bgp neighbors 192.168.10.123 advertised-routes | include 100.100.100.0 *> 100.100.100.0/24 0.0.0.0 0 32768 i donatas-pc(config)# no ip route 100.100.100.0/24 Null0 donatas-pc(config)# do sh ip nht 100.100.100.0 resolved via kernel via 192.168.10.1, enp3s0 Client list: bgp(fd 17) donatas-pc(config)# do sh ip bgp import-check-table Current BGP import check cache: 100.100.100.0 valid [IGP metric 0], #paths 1 blackhole Last update: Sat Apr 23 22:51:34 2022 donatas-pc(config)# do sh ip bgp neighbors 192.168.10.123 advertised-routes | include 100.100.100.0 *> 100.100.100.0/24 0.0.0.0 0 32768 i donatas-pc(config)# Fix this by moving the code to handle the prefix check to the evaluation function and mark the bnc as not matching and actually evaluate the bnc. Signed-off-by: Donald Sharp <sharpd@nvidia.com>
2022-04-24 22:52:46 +02:00
if (import_check && (nhr->type == ZEBRA_ROUTE_BGP ||
!prefix_same(&bnc->prefix, &nhr->prefix))) {
SET_FLAG(bnc->change_flags, BGP_NEXTHOP_CHANGED);
UNSET_FLAG(bnc->flags, BGP_NEXTHOP_VALID);
UNSET_FLAG(bnc->flags, BGP_NEXTHOP_LABELED_VALID);
UNSET_FLAG(bnc->flags, BGP_NEXTHOP_EVPN_INCOMPLETE);
bnc_nexthop_free(bnc);
bnc->nexthop = NULL;
if (BGP_DEBUG(nht, NHT))
zlog_debug(
"%s: Import Check does not resolve to the same prefix for %pFX received %pFX or matching route is BGP",
__func__, &bnc->prefix, &nhr->prefix);
} else if (nhr->nexthop_num) {
struct peer *peer = bnc->nht_info;
prefix_copy(&bnc->resolved_prefix, &nhr->prefix);
/* notify bgp fsm if nbr ip goes from invalid->valid */
if (!bnc->nexthop_num)
UNSET_FLAG(bnc->flags, BGP_NEXTHOP_PEER_NOTIFIED);
bgpd: EVPN route type-5 to type-2 recursive resolution using gateway IP When EVPN prefix route with a gateway IP overlay index is imported into the IP vrf at the ingress PE, BGP nexthop of this route is set to the gateway IP. For this vrf route to be valid, following conditions must be met. - Gateway IP nexthop of this route should be L3 reachable, i.e., this route should be resolved in RIB. - A remote MAC/IP route should be present for the gateway IP address in the EVI(L2VPN table). To check for the first condition, gateway IP is registered with nht (nexthop tracking) to receive the reachability notifications for this IP from zebra RIB. If the gateway IP is reachable, zebra sends the reachability information (i.e., nexthop interface) for the gateway IP. This nexthop interface should be the SVI interface. Now, to find out type-2 route corresponding to the gateway IP, we need to fetch the VNI for the above SVI. To do this VNI lookup effitiently, define a hashtable of struct bgpevpn with svi_ifindex as key. struct hash *vni_svi_hash; An EVI instance is added to vni_svi_hash if its svi_ifindex is nonzero. Using this hash, we obtain struct bgpevpn corresponding to the gateway IP. For gateway IP overlay index recursive lookup, once we find the correct EVI, we have to lookup its route table for a MAC/IP prefix. As we have to iterate the entire route table for every lookup, this lookup is expensive. We can optimize this lookup by adding all the remote IP addresses in a hash table. Following hash table is defined for this purpose in struct bgpevpn Struct hash *remote_ip_hash; When a MAC/IP route is installed in the EVI table, it is also added to remote_ip_hash. It is possible to have multiple MAC/IP routes with the same IP address because of host move scenarios. Thus, for every address addr in remote_ip_hash, we maintain list of all the MAC/IP routes having addr as their IP address. Following structure defines an address in remote_ip_hash. struct evpn_remote_ip { struct ipaddr addr; struct list *macip_path_list; }; A Boolean field is added to struct bgp_nexthop_cache to indicate that the nexthop is EVPN gateway IP overlay index. bool is_evpn_gwip_nexthop; A flag BGP_NEXTHOP_EVPN_INCOMPLETE is added to struct bgp_nexthop_cache. This flag is set when the gateway IP is L3 reachable but not yet resolved by a MAC/IP route. Following table explains the combination of L3 and L2 reachability w.r.t. BGP_NEXTHOP_VALID and BGP_NEXTHOP_EVPN_INCOMPLETE flags * | MACIP resolved | MACIP unresolved *----------------|----------------|------------------ * L3 reachable | VALID = 1 | VALID = 0 * | INCOMPLETE = 0 | INCOMPLETE = 1 * ---------------|----------------|-------------------- * L3 unreachable | VALID = 0 | VALID = 0 * | INCOMPLETE = 0 | INCOMPLETE = 0 Procedure that we use to check if the gateway IP is resolvable by a MAC/IP route: - Find the EVI/L2VRF that belongs to the nexthop SVI using vni_svi_hash. - Check if the gateway IP is present in remote_ip_hash in this EVI. When the gateway IP is L3 reachable and it is also resolved by a MAC/IP route, unset BGP_NEXTHOP_EVPN_INCOMPLETE flag and set BGP_NEXTHOP_VALID flag. Signed-off-by: Ameya Dharkar <adharkar@vmware.com>
2021-01-11 12:51:56 +01:00
if (!bnc->is_evpn_gwip_nexthop)
SET_FLAG(bnc->flags, BGP_NEXTHOP_VALID);
bnc->metric = nhr->metric;
bnc->nexthop_num = nhr->nexthop_num;
UNSET_FLAG(bnc->flags,
BGP_NEXTHOP_LABELED_VALID); /* check below */
for (i = 0; i < nhr->nexthop_num; i++) {
int num_labels = 0;
nexthop = nexthop_from_zapi_nexthop(&nhr->nexthops[i]);
/*
* Turn on RA for the v6 nexthops
* we receive from bgp. This is to allow us
* to work with v4 routing over v6 nexthops
*/
if (peer && !peer->ifp
&& CHECK_FLAG(peer->flags,
PEER_FLAG_CAPABILITY_ENHE)
&& nhr->prefix.family == AF_INET6
&& nexthop->type != NEXTHOP_TYPE_BLACKHOLE) {
struct interface *ifp;
ifp = if_lookup_by_index(nexthop->ifindex,
nexthop->vrf_id);
if (ifp)
zclient_send_interface_radv_req(
zclient, nexthop->vrf_id, ifp,
true,
BGP_UNNUM_DEFAULT_RA_INTERVAL);
}
/* There is at least one label-switched path */
if (nexthop->nh_label &&
nexthop->nh_label->num_labels) {
SET_FLAG(bnc->flags, BGP_NEXTHOP_LABELED_VALID);
num_labels = nexthop->nh_label->num_labels;
}
if (BGP_DEBUG(nht, NHT)) {
char buf[NEXTHOP_STRLEN];
zlog_debug(
" nhop via %s (%d labels)",
nexthop2str(nexthop, buf, sizeof(buf)),
num_labels);
}
if (nhlist_tail) {
nhlist_tail->next = nexthop;
nhlist_tail = nexthop;
} else {
nhlist_tail = nexthop;
nhlist_head = nexthop;
}
/* No need to evaluate the nexthop if we have already
* determined
* that there has been a change.
*/
if (CHECK_FLAG(bnc->change_flags, BGP_NEXTHOP_CHANGED))
continue;
for (oldnh = bnc->nexthop; oldnh; oldnh = oldnh->next)
if (nexthop_same(oldnh, nexthop))
break;
if (!oldnh)
SET_FLAG(bnc->change_flags, BGP_NEXTHOP_CHANGED);
}
bnc_nexthop_free(bnc);
bnc->nexthop = nhlist_head;
bgpd: EVPN route type-5 to type-2 recursive resolution using gateway IP When EVPN prefix route with a gateway IP overlay index is imported into the IP vrf at the ingress PE, BGP nexthop of this route is set to the gateway IP. For this vrf route to be valid, following conditions must be met. - Gateway IP nexthop of this route should be L3 reachable, i.e., this route should be resolved in RIB. - A remote MAC/IP route should be present for the gateway IP address in the EVI(L2VPN table). To check for the first condition, gateway IP is registered with nht (nexthop tracking) to receive the reachability notifications for this IP from zebra RIB. If the gateway IP is reachable, zebra sends the reachability information (i.e., nexthop interface) for the gateway IP. This nexthop interface should be the SVI interface. Now, to find out type-2 route corresponding to the gateway IP, we need to fetch the VNI for the above SVI. To do this VNI lookup effitiently, define a hashtable of struct bgpevpn with svi_ifindex as key. struct hash *vni_svi_hash; An EVI instance is added to vni_svi_hash if its svi_ifindex is nonzero. Using this hash, we obtain struct bgpevpn corresponding to the gateway IP. For gateway IP overlay index recursive lookup, once we find the correct EVI, we have to lookup its route table for a MAC/IP prefix. As we have to iterate the entire route table for every lookup, this lookup is expensive. We can optimize this lookup by adding all the remote IP addresses in a hash table. Following hash table is defined for this purpose in struct bgpevpn Struct hash *remote_ip_hash; When a MAC/IP route is installed in the EVI table, it is also added to remote_ip_hash. It is possible to have multiple MAC/IP routes with the same IP address because of host move scenarios. Thus, for every address addr in remote_ip_hash, we maintain list of all the MAC/IP routes having addr as their IP address. Following structure defines an address in remote_ip_hash. struct evpn_remote_ip { struct ipaddr addr; struct list *macip_path_list; }; A Boolean field is added to struct bgp_nexthop_cache to indicate that the nexthop is EVPN gateway IP overlay index. bool is_evpn_gwip_nexthop; A flag BGP_NEXTHOP_EVPN_INCOMPLETE is added to struct bgp_nexthop_cache. This flag is set when the gateway IP is L3 reachable but not yet resolved by a MAC/IP route. Following table explains the combination of L3 and L2 reachability w.r.t. BGP_NEXTHOP_VALID and BGP_NEXTHOP_EVPN_INCOMPLETE flags * | MACIP resolved | MACIP unresolved *----------------|----------------|------------------ * L3 reachable | VALID = 1 | VALID = 0 * | INCOMPLETE = 0 | INCOMPLETE = 1 * ---------------|----------------|-------------------- * L3 unreachable | VALID = 0 | VALID = 0 * | INCOMPLETE = 0 | INCOMPLETE = 0 Procedure that we use to check if the gateway IP is resolvable by a MAC/IP route: - Find the EVI/L2VRF that belongs to the nexthop SVI using vni_svi_hash. - Check if the gateway IP is present in remote_ip_hash in this EVI. When the gateway IP is L3 reachable and it is also resolved by a MAC/IP route, unset BGP_NEXTHOP_EVPN_INCOMPLETE flag and set BGP_NEXTHOP_VALID flag. Signed-off-by: Ameya Dharkar <adharkar@vmware.com>
2021-01-11 12:51:56 +01:00
/*
* Gateway IP nexthop is L3 reachable. Mark it as
* BGP_NEXTHOP_VALID only if it is recursively resolved with a
* remote EVPN RT-2.
* Else, mark it as BGP_NEXTHOP_EVPN_INCOMPLETE.
* When its mapping with EVPN RT-2 is established, unset
* BGP_NEXTHOP_EVPN_INCOMPLETE and set BGP_NEXTHOP_VALID.
*/
if (bnc->is_evpn_gwip_nexthop) {
evpn_resolved = bgp_evpn_is_gateway_ip_resolved(bnc);
if (BGP_DEBUG(nht, NHT))
bgpd: EVPN route type-5 to type-2 recursive resolution using gateway IP When EVPN prefix route with a gateway IP overlay index is imported into the IP vrf at the ingress PE, BGP nexthop of this route is set to the gateway IP. For this vrf route to be valid, following conditions must be met. - Gateway IP nexthop of this route should be L3 reachable, i.e., this route should be resolved in RIB. - A remote MAC/IP route should be present for the gateway IP address in the EVI(L2VPN table). To check for the first condition, gateway IP is registered with nht (nexthop tracking) to receive the reachability notifications for this IP from zebra RIB. If the gateway IP is reachable, zebra sends the reachability information (i.e., nexthop interface) for the gateway IP. This nexthop interface should be the SVI interface. Now, to find out type-2 route corresponding to the gateway IP, we need to fetch the VNI for the above SVI. To do this VNI lookup effitiently, define a hashtable of struct bgpevpn with svi_ifindex as key. struct hash *vni_svi_hash; An EVI instance is added to vni_svi_hash if its svi_ifindex is nonzero. Using this hash, we obtain struct bgpevpn corresponding to the gateway IP. For gateway IP overlay index recursive lookup, once we find the correct EVI, we have to lookup its route table for a MAC/IP prefix. As we have to iterate the entire route table for every lookup, this lookup is expensive. We can optimize this lookup by adding all the remote IP addresses in a hash table. Following hash table is defined for this purpose in struct bgpevpn Struct hash *remote_ip_hash; When a MAC/IP route is installed in the EVI table, it is also added to remote_ip_hash. It is possible to have multiple MAC/IP routes with the same IP address because of host move scenarios. Thus, for every address addr in remote_ip_hash, we maintain list of all the MAC/IP routes having addr as their IP address. Following structure defines an address in remote_ip_hash. struct evpn_remote_ip { struct ipaddr addr; struct list *macip_path_list; }; A Boolean field is added to struct bgp_nexthop_cache to indicate that the nexthop is EVPN gateway IP overlay index. bool is_evpn_gwip_nexthop; A flag BGP_NEXTHOP_EVPN_INCOMPLETE is added to struct bgp_nexthop_cache. This flag is set when the gateway IP is L3 reachable but not yet resolved by a MAC/IP route. Following table explains the combination of L3 and L2 reachability w.r.t. BGP_NEXTHOP_VALID and BGP_NEXTHOP_EVPN_INCOMPLETE flags * | MACIP resolved | MACIP unresolved *----------------|----------------|------------------ * L3 reachable | VALID = 1 | VALID = 0 * | INCOMPLETE = 0 | INCOMPLETE = 1 * ---------------|----------------|-------------------- * L3 unreachable | VALID = 0 | VALID = 0 * | INCOMPLETE = 0 | INCOMPLETE = 0 Procedure that we use to check if the gateway IP is resolvable by a MAC/IP route: - Find the EVI/L2VRF that belongs to the nexthop SVI using vni_svi_hash. - Check if the gateway IP is present in remote_ip_hash in this EVI. When the gateway IP is L3 reachable and it is also resolved by a MAC/IP route, unset BGP_NEXTHOP_EVPN_INCOMPLETE flag and set BGP_NEXTHOP_VALID flag. Signed-off-by: Ameya Dharkar <adharkar@vmware.com>
2021-01-11 12:51:56 +01:00
zlog_debug(
"EVPN gateway IP %pFX recursive MAC/IP lookup %s",
&bnc->prefix,
bgpd: EVPN route type-5 to type-2 recursive resolution using gateway IP When EVPN prefix route with a gateway IP overlay index is imported into the IP vrf at the ingress PE, BGP nexthop of this route is set to the gateway IP. For this vrf route to be valid, following conditions must be met. - Gateway IP nexthop of this route should be L3 reachable, i.e., this route should be resolved in RIB. - A remote MAC/IP route should be present for the gateway IP address in the EVI(L2VPN table). To check for the first condition, gateway IP is registered with nht (nexthop tracking) to receive the reachability notifications for this IP from zebra RIB. If the gateway IP is reachable, zebra sends the reachability information (i.e., nexthop interface) for the gateway IP. This nexthop interface should be the SVI interface. Now, to find out type-2 route corresponding to the gateway IP, we need to fetch the VNI for the above SVI. To do this VNI lookup effitiently, define a hashtable of struct bgpevpn with svi_ifindex as key. struct hash *vni_svi_hash; An EVI instance is added to vni_svi_hash if its svi_ifindex is nonzero. Using this hash, we obtain struct bgpevpn corresponding to the gateway IP. For gateway IP overlay index recursive lookup, once we find the correct EVI, we have to lookup its route table for a MAC/IP prefix. As we have to iterate the entire route table for every lookup, this lookup is expensive. We can optimize this lookup by adding all the remote IP addresses in a hash table. Following hash table is defined for this purpose in struct bgpevpn Struct hash *remote_ip_hash; When a MAC/IP route is installed in the EVI table, it is also added to remote_ip_hash. It is possible to have multiple MAC/IP routes with the same IP address because of host move scenarios. Thus, for every address addr in remote_ip_hash, we maintain list of all the MAC/IP routes having addr as their IP address. Following structure defines an address in remote_ip_hash. struct evpn_remote_ip { struct ipaddr addr; struct list *macip_path_list; }; A Boolean field is added to struct bgp_nexthop_cache to indicate that the nexthop is EVPN gateway IP overlay index. bool is_evpn_gwip_nexthop; A flag BGP_NEXTHOP_EVPN_INCOMPLETE is added to struct bgp_nexthop_cache. This flag is set when the gateway IP is L3 reachable but not yet resolved by a MAC/IP route. Following table explains the combination of L3 and L2 reachability w.r.t. BGP_NEXTHOP_VALID and BGP_NEXTHOP_EVPN_INCOMPLETE flags * | MACIP resolved | MACIP unresolved *----------------|----------------|------------------ * L3 reachable | VALID = 1 | VALID = 0 * | INCOMPLETE = 0 | INCOMPLETE = 1 * ---------------|----------------|-------------------- * L3 unreachable | VALID = 0 | VALID = 0 * | INCOMPLETE = 0 | INCOMPLETE = 0 Procedure that we use to check if the gateway IP is resolvable by a MAC/IP route: - Find the EVI/L2VRF that belongs to the nexthop SVI using vni_svi_hash. - Check if the gateway IP is present in remote_ip_hash in this EVI. When the gateway IP is L3 reachable and it is also resolved by a MAC/IP route, unset BGP_NEXTHOP_EVPN_INCOMPLETE flag and set BGP_NEXTHOP_VALID flag. Signed-off-by: Ameya Dharkar <adharkar@vmware.com>
2021-01-11 12:51:56 +01:00
(evpn_resolved ? "successful"
: "failed"));
if (evpn_resolved) {
SET_FLAG(bnc->flags, BGP_NEXTHOP_VALID);
UNSET_FLAG(bnc->flags,
BGP_NEXTHOP_EVPN_INCOMPLETE);
SET_FLAG(bnc->change_flags,
BGP_NEXTHOP_MACIP_CHANGED);
bgpd: EVPN route type-5 to type-2 recursive resolution using gateway IP When EVPN prefix route with a gateway IP overlay index is imported into the IP vrf at the ingress PE, BGP nexthop of this route is set to the gateway IP. For this vrf route to be valid, following conditions must be met. - Gateway IP nexthop of this route should be L3 reachable, i.e., this route should be resolved in RIB. - A remote MAC/IP route should be present for the gateway IP address in the EVI(L2VPN table). To check for the first condition, gateway IP is registered with nht (nexthop tracking) to receive the reachability notifications for this IP from zebra RIB. If the gateway IP is reachable, zebra sends the reachability information (i.e., nexthop interface) for the gateway IP. This nexthop interface should be the SVI interface. Now, to find out type-2 route corresponding to the gateway IP, we need to fetch the VNI for the above SVI. To do this VNI lookup effitiently, define a hashtable of struct bgpevpn with svi_ifindex as key. struct hash *vni_svi_hash; An EVI instance is added to vni_svi_hash if its svi_ifindex is nonzero. Using this hash, we obtain struct bgpevpn corresponding to the gateway IP. For gateway IP overlay index recursive lookup, once we find the correct EVI, we have to lookup its route table for a MAC/IP prefix. As we have to iterate the entire route table for every lookup, this lookup is expensive. We can optimize this lookup by adding all the remote IP addresses in a hash table. Following hash table is defined for this purpose in struct bgpevpn Struct hash *remote_ip_hash; When a MAC/IP route is installed in the EVI table, it is also added to remote_ip_hash. It is possible to have multiple MAC/IP routes with the same IP address because of host move scenarios. Thus, for every address addr in remote_ip_hash, we maintain list of all the MAC/IP routes having addr as their IP address. Following structure defines an address in remote_ip_hash. struct evpn_remote_ip { struct ipaddr addr; struct list *macip_path_list; }; A Boolean field is added to struct bgp_nexthop_cache to indicate that the nexthop is EVPN gateway IP overlay index. bool is_evpn_gwip_nexthop; A flag BGP_NEXTHOP_EVPN_INCOMPLETE is added to struct bgp_nexthop_cache. This flag is set when the gateway IP is L3 reachable but not yet resolved by a MAC/IP route. Following table explains the combination of L3 and L2 reachability w.r.t. BGP_NEXTHOP_VALID and BGP_NEXTHOP_EVPN_INCOMPLETE flags * | MACIP resolved | MACIP unresolved *----------------|----------------|------------------ * L3 reachable | VALID = 1 | VALID = 0 * | INCOMPLETE = 0 | INCOMPLETE = 1 * ---------------|----------------|-------------------- * L3 unreachable | VALID = 0 | VALID = 0 * | INCOMPLETE = 0 | INCOMPLETE = 0 Procedure that we use to check if the gateway IP is resolvable by a MAC/IP route: - Find the EVI/L2VRF that belongs to the nexthop SVI using vni_svi_hash. - Check if the gateway IP is present in remote_ip_hash in this EVI. When the gateway IP is L3 reachable and it is also resolved by a MAC/IP route, unset BGP_NEXTHOP_EVPN_INCOMPLETE flag and set BGP_NEXTHOP_VALID flag. Signed-off-by: Ameya Dharkar <adharkar@vmware.com>
2021-01-11 12:51:56 +01:00
} else {
SET_FLAG(bnc->flags,
BGP_NEXTHOP_EVPN_INCOMPLETE);
UNSET_FLAG(bnc->flags, BGP_NEXTHOP_VALID);
bgpd: EVPN route type-5 to type-2 recursive resolution using gateway IP When EVPN prefix route with a gateway IP overlay index is imported into the IP vrf at the ingress PE, BGP nexthop of this route is set to the gateway IP. For this vrf route to be valid, following conditions must be met. - Gateway IP nexthop of this route should be L3 reachable, i.e., this route should be resolved in RIB. - A remote MAC/IP route should be present for the gateway IP address in the EVI(L2VPN table). To check for the first condition, gateway IP is registered with nht (nexthop tracking) to receive the reachability notifications for this IP from zebra RIB. If the gateway IP is reachable, zebra sends the reachability information (i.e., nexthop interface) for the gateway IP. This nexthop interface should be the SVI interface. Now, to find out type-2 route corresponding to the gateway IP, we need to fetch the VNI for the above SVI. To do this VNI lookup effitiently, define a hashtable of struct bgpevpn with svi_ifindex as key. struct hash *vni_svi_hash; An EVI instance is added to vni_svi_hash if its svi_ifindex is nonzero. Using this hash, we obtain struct bgpevpn corresponding to the gateway IP. For gateway IP overlay index recursive lookup, once we find the correct EVI, we have to lookup its route table for a MAC/IP prefix. As we have to iterate the entire route table for every lookup, this lookup is expensive. We can optimize this lookup by adding all the remote IP addresses in a hash table. Following hash table is defined for this purpose in struct bgpevpn Struct hash *remote_ip_hash; When a MAC/IP route is installed in the EVI table, it is also added to remote_ip_hash. It is possible to have multiple MAC/IP routes with the same IP address because of host move scenarios. Thus, for every address addr in remote_ip_hash, we maintain list of all the MAC/IP routes having addr as their IP address. Following structure defines an address in remote_ip_hash. struct evpn_remote_ip { struct ipaddr addr; struct list *macip_path_list; }; A Boolean field is added to struct bgp_nexthop_cache to indicate that the nexthop is EVPN gateway IP overlay index. bool is_evpn_gwip_nexthop; A flag BGP_NEXTHOP_EVPN_INCOMPLETE is added to struct bgp_nexthop_cache. This flag is set when the gateway IP is L3 reachable but not yet resolved by a MAC/IP route. Following table explains the combination of L3 and L2 reachability w.r.t. BGP_NEXTHOP_VALID and BGP_NEXTHOP_EVPN_INCOMPLETE flags * | MACIP resolved | MACIP unresolved *----------------|----------------|------------------ * L3 reachable | VALID = 1 | VALID = 0 * | INCOMPLETE = 0 | INCOMPLETE = 1 * ---------------|----------------|-------------------- * L3 unreachable | VALID = 0 | VALID = 0 * | INCOMPLETE = 0 | INCOMPLETE = 0 Procedure that we use to check if the gateway IP is resolvable by a MAC/IP route: - Find the EVI/L2VRF that belongs to the nexthop SVI using vni_svi_hash. - Check if the gateway IP is present in remote_ip_hash in this EVI. When the gateway IP is L3 reachable and it is also resolved by a MAC/IP route, unset BGP_NEXTHOP_EVPN_INCOMPLETE flag and set BGP_NEXTHOP_VALID flag. Signed-off-by: Ameya Dharkar <adharkar@vmware.com>
2021-01-11 12:51:56 +01:00
}
}
} else {
memset(&bnc->resolved_prefix, 0, sizeof(bnc->resolved_prefix));
UNSET_FLAG(bnc->flags, BGP_NEXTHOP_EVPN_INCOMPLETE);
UNSET_FLAG(bnc->flags, BGP_NEXTHOP_VALID);
UNSET_FLAG(bnc->flags, BGP_NEXTHOP_LABELED_VALID);
bnc->nexthop_num = nhr->nexthop_num;
/* notify bgp fsm if nbr ip goes from valid->invalid */
UNSET_FLAG(bnc->flags, BGP_NEXTHOP_PEER_NOTIFIED);
bnc_nexthop_free(bnc);
bnc->nexthop = NULL;
}
evaluate_paths(bnc);
}
static void bgp_nht_ifp_table_handle(struct bgp *bgp,
struct bgp_nexthop_cache_head *table,
struct interface *ifp, bool up)
{
struct bgp_nexthop_cache *bnc;
struct nexthop *nhop;
uint8_t other_nh_count;
bool nhop_ll_found = false;
bool nhop_found = false;
if (ifp->ifindex == IFINDEX_INTERNAL) {
zlog_warn("%s: The interface %s ignored", __func__, ifp->name);
return;
}
frr_each (bgp_nexthop_cache, table, bnc) {
other_nh_count = 0;
nhop_ll_found = bnc->ifindex_ipv6_ll == ifp->ifindex;
for (nhop = bnc->nexthop; nhop; nhop = nhop->next) {
if (nhop->ifindex == bnc->ifindex_ipv6_ll)
continue;
if (nhop->ifindex != ifp->ifindex) {
other_nh_count++;
continue;
}
if (nhop->vrf_id != ifp->vrf->vrf_id) {
other_nh_count++;
continue;
}
nhop_found = true;
}
if (!nhop_found && !nhop_ll_found)
/* The event interface does not match the nexthop cache
* entry */
continue;
if (!up && other_nh_count > 0)
/* Down event ignored in case of multiple next-hop
* interfaces. The other might interfaces might be still
* up. The cases where all interfaces are down or a bnc
* is invalid are processed by a separate zebra rnh
* messages.
*/
continue;
if (!nhop_ll_found) {
evaluate_paths(bnc);
continue;
}
bnc->last_update = monotime(NULL);
bnc->change_flags = 0;
/*
* For interface based routes ( ala the v6 LL routes
* that this was written for ) the metric received
* for the connected route is 0 not 1.
*/
bnc->metric = 0;
if (up) {
SET_FLAG(bnc->flags, BGP_NEXTHOP_VALID);
SET_FLAG(bnc->change_flags, BGP_NEXTHOP_CHANGED);
/* change nexthop number only for ll */
bnc->nexthop_num = 1;
} else {
UNSET_FLAG(bnc->flags, BGP_NEXTHOP_PEER_NOTIFIED);
UNSET_FLAG(bnc->flags, BGP_NEXTHOP_VALID);
SET_FLAG(bnc->change_flags, BGP_NEXTHOP_CHANGED);
bnc->nexthop_num = 0;
}
evaluate_paths(bnc);
}
}
static void bgp_nht_ifp_handle(struct interface *ifp, bool up)
{
struct bgp *bgp;
bgp = ifp->vrf->info;
if (!bgp)
return;
bgp_nht_ifp_table_handle(bgp, &bgp->nexthop_cache_table[AFI_IP], ifp,
up);
bgp_nht_ifp_table_handle(bgp, &bgp->import_check_table[AFI_IP], ifp,
up);
bgp_nht_ifp_table_handle(bgp, &bgp->nexthop_cache_table[AFI_IP6], ifp,
up);
bgp_nht_ifp_table_handle(bgp, &bgp->import_check_table[AFI_IP6], ifp,
up);
}
void bgp_nht_ifp_up(struct interface *ifp)
{
bgp_nht_ifp_handle(ifp, true);
}
void bgp_nht_ifp_down(struct interface *ifp)
{
bgp_nht_ifp_handle(ifp, false);
}
static void bgp_nht_ifp_initial(struct event *thread)
{
ifindex_t ifindex = EVENT_VAL(thread);
struct bgp *bgp = EVENT_ARG(thread);
struct interface *ifp = if_lookup_by_index(ifindex, bgp->vrf_id);
if (!ifp)
return;
if (BGP_DEBUG(nht, NHT))
zlog_debug(
"Handle NHT initial update for Intf %s(%d) status %s",
ifp->name, ifp->ifindex, if_is_up(ifp) ? "up" : "down");
if (if_is_up(ifp))
bgp_nht_ifp_up(ifp);
else
bgp_nht_ifp_down(ifp);
}
/*
* So the bnc code has the ability to handle interface up/down
* events to properly handle v6 LL peering.
* What is happening here:
* The event system for peering expects the nht code to
* report on the tracking events after we move to active
* So let's give the system a chance to report on that event
* in a manner that is expected.
*/
void bgp_nht_interface_events(struct peer *peer)
{
struct bgp *bgp = peer->bgp;
struct bgp_nexthop_cache_head *table;
struct bgp_nexthop_cache *bnc;
struct prefix p;
ifindex_t ifindex = 0;
if (!IN6_IS_ADDR_LINKLOCAL(&peer->connection->su.sin6.sin6_addr))
return;
if (!sockunion2hostprefix(&peer->connection->su, &p))
return;
/*
* Gather the ifindex for if up/down events to be
* tagged into this fun
*/
if (peer->conf_if &&
IN6_IS_ADDR_LINKLOCAL(&peer->connection->su.sin6.sin6_addr))
ifindex = peer->connection->su.sin6.sin6_scope_id;
table = &bgp->nexthop_cache_table[AFI_IP6];
bnc = bnc_find(table, &p, 0, ifindex);
if (!bnc)
return;
if (bnc->ifindex_ipv6_ll)
event_add_event(bm->master, bgp_nht_ifp_initial, bnc->bgp,
bnc->ifindex_ipv6_ll, NULL);
}
void bgp_nexthop_update(struct vrf *vrf, struct prefix *match,
struct zapi_route *nhr)
{
struct bgp_nexthop_cache_head *tree = NULL;
struct bgp_nexthop_cache *bnc_nhc, *bnc_import;
struct bgp *bgp, *bgp_default;
struct bgp_path_info *pi;
struct bgp_dest *dest;
safi_t safi;
afi_t afi;
if (!vrf->info) {
flog_err(EC_BGP_NH_UPD,
"parse nexthop update: instance not found for vrf_id %u",
vrf->vrf_id);
return;
}
bgp = (struct bgp *)vrf->info;
afi = family2afi(match->family);
tree = &bgp->nexthop_cache_table[afi];
bnc_nhc = bnc_find(tree, match, nhr->srte_color, 0);
if (bnc_nhc)
bgp_process_nexthop_update(bnc_nhc, nhr, false);
else if (BGP_DEBUG(nht, NHT))
zlog_debug("parse nexthop update %pFX(%u)(%s): bnc info not found for nexthop cache",
&nhr->prefix, nhr->srte_color,
bgp->name_pretty);
tree = &bgp->import_check_table[afi];
bnc_import = bnc_find(tree, match, nhr->srte_color, 0);
if (bnc_import) {
bgp_process_nexthop_update(bnc_import, nhr, true);
bgp_default = bgp_get_default();
safi = nhr->safi;
if (bgp != bgp_default && bgp->rib[afi][safi]) {
dest = bgp_afi_node_get(bgp->rib[afi][safi], afi, safi,
match, NULL);
for (pi = bgp_dest_get_bgp_path_info(dest); pi;
pi = pi->next)
if (pi->peer == bgp->peer_self &&
pi->type == ZEBRA_ROUTE_BGP &&
pi->sub_type == BGP_ROUTE_STATIC)
vpn_leak_from_vrf_update(bgp_default,
bgp, pi);
}
} else if (BGP_DEBUG(nht, NHT))
zlog_debug("parse nexthop update %pFX(%u)(%s): bnc info not found for import check",
&nhr->prefix, nhr->srte_color,
bgp->name_pretty);
/*
* HACK: if any BGP route is dependant on an SR-policy that doesn't
* exist, zebra will never send NH updates relative to that policy. In
* that case, whenever we receive an update about a colorless NH, update
* the corresponding colorful NHs that share the same endpoint but that
* are inactive. This ugly hack should work around the problem at the
* cost of a performance pernalty. Long term, what should be done is to
* make zebra's RNH subsystem aware of SR-TE colors (like bgpd is),
* which should provide a better infrastructure to solve this issue in
* a more efficient and elegant way.
*/
if (nhr->srte_color == 0) {
struct bgp_nexthop_cache *bnc_iter;
frr_each (bgp_nexthop_cache, &bgp->nexthop_cache_table[afi],
bnc_iter) {
if (!prefix_same(match, &bnc_iter->prefix) ||
bnc_iter->srte_color == 0)
continue;
bgp_process_nexthop_update(bnc_iter, nhr, false);
}
}
}
/*
* Cleanup nexthop registration and status information for BGP nexthops
* pertaining to this VRF. This is invoked upon VRF deletion.
*/
void bgp_cleanup_nexthops(struct bgp *bgp)
{
for (afi_t afi = AFI_IP; afi < AFI_MAX; afi++) {
struct bgp_nexthop_cache *bnc;
frr_each (bgp_nexthop_cache, &bgp->nexthop_cache_table[afi],
bnc) {
/* Clear relevant flags. */
UNSET_FLAG(bnc->flags, BGP_NEXTHOP_VALID);
UNSET_FLAG(bnc->flags, BGP_NEXTHOP_REGISTERED);
UNSET_FLAG(bnc->flags, BGP_NEXTHOP_PEER_NOTIFIED);
bgpd: EVPN route type-5 to type-2 recursive resolution using gateway IP When EVPN prefix route with a gateway IP overlay index is imported into the IP vrf at the ingress PE, BGP nexthop of this route is set to the gateway IP. For this vrf route to be valid, following conditions must be met. - Gateway IP nexthop of this route should be L3 reachable, i.e., this route should be resolved in RIB. - A remote MAC/IP route should be present for the gateway IP address in the EVI(L2VPN table). To check for the first condition, gateway IP is registered with nht (nexthop tracking) to receive the reachability notifications for this IP from zebra RIB. If the gateway IP is reachable, zebra sends the reachability information (i.e., nexthop interface) for the gateway IP. This nexthop interface should be the SVI interface. Now, to find out type-2 route corresponding to the gateway IP, we need to fetch the VNI for the above SVI. To do this VNI lookup effitiently, define a hashtable of struct bgpevpn with svi_ifindex as key. struct hash *vni_svi_hash; An EVI instance is added to vni_svi_hash if its svi_ifindex is nonzero. Using this hash, we obtain struct bgpevpn corresponding to the gateway IP. For gateway IP overlay index recursive lookup, once we find the correct EVI, we have to lookup its route table for a MAC/IP prefix. As we have to iterate the entire route table for every lookup, this lookup is expensive. We can optimize this lookup by adding all the remote IP addresses in a hash table. Following hash table is defined for this purpose in struct bgpevpn Struct hash *remote_ip_hash; When a MAC/IP route is installed in the EVI table, it is also added to remote_ip_hash. It is possible to have multiple MAC/IP routes with the same IP address because of host move scenarios. Thus, for every address addr in remote_ip_hash, we maintain list of all the MAC/IP routes having addr as their IP address. Following structure defines an address in remote_ip_hash. struct evpn_remote_ip { struct ipaddr addr; struct list *macip_path_list; }; A Boolean field is added to struct bgp_nexthop_cache to indicate that the nexthop is EVPN gateway IP overlay index. bool is_evpn_gwip_nexthop; A flag BGP_NEXTHOP_EVPN_INCOMPLETE is added to struct bgp_nexthop_cache. This flag is set when the gateway IP is L3 reachable but not yet resolved by a MAC/IP route. Following table explains the combination of L3 and L2 reachability w.r.t. BGP_NEXTHOP_VALID and BGP_NEXTHOP_EVPN_INCOMPLETE flags * | MACIP resolved | MACIP unresolved *----------------|----------------|------------------ * L3 reachable | VALID = 1 | VALID = 0 * | INCOMPLETE = 0 | INCOMPLETE = 1 * ---------------|----------------|-------------------- * L3 unreachable | VALID = 0 | VALID = 0 * | INCOMPLETE = 0 | INCOMPLETE = 0 Procedure that we use to check if the gateway IP is resolvable by a MAC/IP route: - Find the EVI/L2VRF that belongs to the nexthop SVI using vni_svi_hash. - Check if the gateway IP is present in remote_ip_hash in this EVI. When the gateway IP is L3 reachable and it is also resolved by a MAC/IP route, unset BGP_NEXTHOP_EVPN_INCOMPLETE flag and set BGP_NEXTHOP_VALID flag. Signed-off-by: Ameya Dharkar <adharkar@vmware.com>
2021-01-11 12:51:56 +01:00
UNSET_FLAG(bnc->flags, BGP_NEXTHOP_EVPN_INCOMPLETE);
}
}
}
/**
* make_prefix - make a prefix structure from the path (essentially
* path's node.
*/
static int make_prefix(int afi, struct bgp_path_info *pi, struct prefix *p)
{
int is_bgp_static = ((pi->type == ZEBRA_ROUTE_BGP)
&& (pi->sub_type == BGP_ROUTE_STATIC))
? 1
: 0;
struct bgp_dest *net = pi->net;
const struct prefix *p_orig = bgp_dest_get_prefix(net);
struct in_addr ipv4;
if (p_orig->family == AF_FLOWSPEC) {
if (!pi->peer)
return -1;
return bgp_flowspec_get_first_nh(pi->peer->bgp,
pi, p, afi);
}
memset(p, 0, sizeof(struct prefix));
switch (afi) {
case AFI_IP:
p->family = AF_INET;
if (is_bgp_static) {
p->u.prefix4 = p_orig->u.prefix4;
p->prefixlen = p_orig->prefixlen;
} else {
if (IS_MAPPED_IPV6(&pi->attr->mp_nexthop_global)) {
ipv4_mapped_ipv6_to_ipv4(
&pi->attr->mp_nexthop_global, &ipv4);
p->u.prefix4 = ipv4;
p->prefixlen = IPV4_MAX_BITLEN;
} else {
if (p_orig->family == AF_EVPN)
p->u.prefix4 =
pi->attr->mp_nexthop_global_in;
else
p->u.prefix4 = pi->attr->nexthop;
p->prefixlen = IPV4_MAX_BITLEN;
}
}
break;
case AFI_IP6:
p->family = AF_INET6;
if (pi->attr->srv6_l3vpn) {
IPV6_ADDR_COPY(&(p->u.prefix6),
&(pi->attr->srv6_l3vpn->sid));
p->prefixlen = IPV6_MAX_BITLEN;
} else if (is_bgp_static) {
p->u.prefix6 = p_orig->u.prefix6;
p->prefixlen = p_orig->prefixlen;
} else {
/* If we receive MP_REACH nexthop with ::(LL)
* or LL(LL), use LL address as nexthop cache.
*/
if (pi->attr &&
pi->attr->mp_nexthop_len ==
BGP_ATTR_NHLEN_IPV6_GLOBAL_AND_LL &&
(IN6_IS_ADDR_UNSPECIFIED(
&pi->attr->mp_nexthop_global) ||
IN6_IS_ADDR_LINKLOCAL(&pi->attr->mp_nexthop_global)))
p->u.prefix6 = pi->attr->mp_nexthop_local;
/* If we receive MR_REACH with (GA)::(LL)
* then check for route-map to choose GA or LL
*/
else if (pi->attr &&
pi->attr->mp_nexthop_len ==
BGP_ATTR_NHLEN_IPV6_GLOBAL_AND_LL) {
if (CHECK_FLAG(pi->attr->nh_flags,
BGP_ATTR_NH_MP_PREFER_GLOBAL))
p->u.prefix6 =
pi->attr->mp_nexthop_global;
else
p->u.prefix6 =
pi->attr->mp_nexthop_local;
} else
p->u.prefix6 = pi->attr->mp_nexthop_global;
p->prefixlen = IPV6_MAX_BITLEN;
}
break;
default:
if (BGP_DEBUG(nht, NHT)) {
zlog_debug(
"%s: Attempting to make prefix with unknown AFI %d (not %d or %d)",
__func__, afi, AFI_IP, AFI_IP6);
}
break;
}
return 0;
}
/**
* sendmsg_zebra_rnh -- Format and send a nexthop register/Unregister
* command to Zebra.
* ARGUMENTS:
* struct bgp_nexthop_cache *bnc -- the nexthop structure.
* int command -- command to send to zebra
* RETURNS:
* void.
*/
static void sendmsg_zebra_rnh(struct bgp_nexthop_cache *bnc, int command)
{
bool exact_match = false;
2021-09-24 21:51:18 +02:00
bool resolve_via_default = false;
int ret;
if (!zclient)
return;
/* Don't try to register if Zebra doesn't know of this instance. */
if (!IS_BGP_INST_KNOWN_TO_ZEBRA(bnc->bgp)) {
if (BGP_DEBUG(zebra, ZEBRA))
zlog_debug(
"%s: No zebra instance to talk to, not installing NHT entry",
__func__);
return;
}
if (!bgp_zebra_num_connects()) {
if (BGP_DEBUG(zebra, ZEBRA))
zlog_debug(
"%s: We have not connected yet, cannot send nexthops",
__func__);
}
2021-09-24 21:51:18 +02:00
if (command == ZEBRA_NEXTHOP_REGISTER) {
if (CHECK_FLAG(bnc->flags, BGP_NEXTHOP_CONNECTED))
exact_match = true;
if (CHECK_FLAG(bnc->flags, BGP_STATIC_ROUTE_EXACT_MATCH))
resolve_via_default = true;
}
if (BGP_DEBUG(zebra, ZEBRA))
zlog_debug("%s: sending cmd %s for %pFX (vrf %s)", __func__,
zserv_command_string(command), &bnc->prefix,
bnc->bgp->name_pretty);
ret = zclient_send_rnh(zclient, command, &bnc->prefix, SAFI_UNICAST,
exact_match, resolve_via_default,
bnc->bgp->vrf_id);
if (ret == ZCLIENT_SEND_FAILURE) {
flog_warn(EC_BGP_ZEBRA_SEND,
"sendmsg_nexthop: zclient_send_message() failed");
return;
}
if (command == ZEBRA_NEXTHOP_REGISTER)
SET_FLAG(bnc->flags, BGP_NEXTHOP_REGISTERED);
else if (command == ZEBRA_NEXTHOP_UNREGISTER)
UNSET_FLAG(bnc->flags, BGP_NEXTHOP_REGISTERED);
return;
}
/**
* register_zebra_rnh - register a NH/route with Zebra for notification
* when the route or the route to the nexthop changes.
* ARGUMENTS:
* struct bgp_nexthop_cache *bnc
* RETURNS:
* void.
*/
static void register_zebra_rnh(struct bgp_nexthop_cache *bnc)
{
/* Check if we have already registered */
if (CHECK_FLAG(bnc->flags, BGP_NEXTHOP_REGISTERED))
return;
if (bnc->ifindex_ipv6_ll) {
SET_FLAG(bnc->flags, BGP_NEXTHOP_REGISTERED);
return;
}
sendmsg_zebra_rnh(bnc, ZEBRA_NEXTHOP_REGISTER);
}
/**
* unregister_zebra_rnh -- Unregister the route/nexthop from Zebra.
* ARGUMENTS:
* struct bgp_nexthop_cache *bnc
* RETURNS:
* void.
*/
static void unregister_zebra_rnh(struct bgp_nexthop_cache *bnc)
{
struct bgp_nexthop_cache *import;
struct bgp_nexthop_cache *nexthop;
struct bgp *bgp = bnc->bgp;
/* Check if we have already registered */
if (!CHECK_FLAG(bnc->flags, BGP_NEXTHOP_REGISTERED))
return;
if (bnc->ifindex_ipv6_ll) {
UNSET_FLAG(bnc->flags, BGP_NEXTHOP_REGISTERED);
return;
}
import = bnc_find(&bgp->import_check_table[bnc->afi], &bnc->prefix, 0,
0);
nexthop = bnc_find(&bgp->nexthop_cache_table[bnc->afi], &bnc->prefix, 0,
0);
/*
* If this entry has both a import and a nexthop entry
* then let's not send the unregister quite as of yet
* wait until we only have 1 left
*/
if (import && nexthop)
return;
sendmsg_zebra_rnh(bnc, ZEBRA_NEXTHOP_UNREGISTER);
}
/**
* evaluate_paths - Evaluate the paths/nets associated with a nexthop.
* ARGUMENTS:
* struct bgp_nexthop_cache *bnc -- the nexthop structure.
* RETURNS:
* void.
*/
bgpd: EVPN route type-5 to type-2 recursive resolution using gateway IP When EVPN prefix route with a gateway IP overlay index is imported into the IP vrf at the ingress PE, BGP nexthop of this route is set to the gateway IP. For this vrf route to be valid, following conditions must be met. - Gateway IP nexthop of this route should be L3 reachable, i.e., this route should be resolved in RIB. - A remote MAC/IP route should be present for the gateway IP address in the EVI(L2VPN table). To check for the first condition, gateway IP is registered with nht (nexthop tracking) to receive the reachability notifications for this IP from zebra RIB. If the gateway IP is reachable, zebra sends the reachability information (i.e., nexthop interface) for the gateway IP. This nexthop interface should be the SVI interface. Now, to find out type-2 route corresponding to the gateway IP, we need to fetch the VNI for the above SVI. To do this VNI lookup effitiently, define a hashtable of struct bgpevpn with svi_ifindex as key. struct hash *vni_svi_hash; An EVI instance is added to vni_svi_hash if its svi_ifindex is nonzero. Using this hash, we obtain struct bgpevpn corresponding to the gateway IP. For gateway IP overlay index recursive lookup, once we find the correct EVI, we have to lookup its route table for a MAC/IP prefix. As we have to iterate the entire route table for every lookup, this lookup is expensive. We can optimize this lookup by adding all the remote IP addresses in a hash table. Following hash table is defined for this purpose in struct bgpevpn Struct hash *remote_ip_hash; When a MAC/IP route is installed in the EVI table, it is also added to remote_ip_hash. It is possible to have multiple MAC/IP routes with the same IP address because of host move scenarios. Thus, for every address addr in remote_ip_hash, we maintain list of all the MAC/IP routes having addr as their IP address. Following structure defines an address in remote_ip_hash. struct evpn_remote_ip { struct ipaddr addr; struct list *macip_path_list; }; A Boolean field is added to struct bgp_nexthop_cache to indicate that the nexthop is EVPN gateway IP overlay index. bool is_evpn_gwip_nexthop; A flag BGP_NEXTHOP_EVPN_INCOMPLETE is added to struct bgp_nexthop_cache. This flag is set when the gateway IP is L3 reachable but not yet resolved by a MAC/IP route. Following table explains the combination of L3 and L2 reachability w.r.t. BGP_NEXTHOP_VALID and BGP_NEXTHOP_EVPN_INCOMPLETE flags * | MACIP resolved | MACIP unresolved *----------------|----------------|------------------ * L3 reachable | VALID = 1 | VALID = 0 * | INCOMPLETE = 0 | INCOMPLETE = 1 * ---------------|----------------|-------------------- * L3 unreachable | VALID = 0 | VALID = 0 * | INCOMPLETE = 0 | INCOMPLETE = 0 Procedure that we use to check if the gateway IP is resolvable by a MAC/IP route: - Find the EVI/L2VRF that belongs to the nexthop SVI using vni_svi_hash. - Check if the gateway IP is present in remote_ip_hash in this EVI. When the gateway IP is L3 reachable and it is also resolved by a MAC/IP route, unset BGP_NEXTHOP_EVPN_INCOMPLETE flag and set BGP_NEXTHOP_VALID flag. Signed-off-by: Ameya Dharkar <adharkar@vmware.com>
2021-01-11 12:51:56 +01:00
void evaluate_paths(struct bgp_nexthop_cache *bnc)
{
struct bgp_dest *dest;
struct bgp_path_info *path;
struct bgp_path_info *bpi_ultimate;
int afi;
struct peer *peer = (struct peer *)bnc->nht_info;
struct bgp_table *table;
safi_t safi;
struct bgp *bgp_path;
const struct prefix *p;
if (BGP_DEBUG(nht, NHT)) {
char bnc_buf[BNC_FLAG_DUMP_SIZE];
char chg_buf[BNC_FLAG_DUMP_SIZE];
zlog_debug(
"NH update for %pFX(%d)(%u)(%s) - flags %s chgflags %s- evaluate paths",
&bnc->prefix, bnc->ifindex_ipv6_ll, bnc->srte_color,
bnc->bgp->name_pretty,
bgp_nexthop_dump_bnc_flags(bnc, bnc_buf,
sizeof(bnc_buf)),
bgp_nexthop_dump_bnc_change_flags(bnc, chg_buf,
sizeof(bnc_buf)));
}
LIST_FOREACH (path, &(bnc->paths), nh_thread) {
bgpd: export redistributed routes with label allocation per nexthop The label allocation per nexthop mode requires to use a nexthop tracking context. For redistributed routes, a nexthop tracking context is created, and the resolution helps to know the real nexthop ip address used. The below configuration example has been used: > vrf vrf1 > ip route 172.31.0.14/32 192.0.2.14 > ip route 172.31.0.15/32 192.0.2.12 > ip route 172.31.0.30/32 192.0.2.30 > exit > router bgp 65500 vrf vrf1 > address-family ipv4 unicast > redistribute static > label vpn export per-nexthop > [..] The static routes are correctly imported in the BGP IPv4 RIB. Contrary to label allocation per vrf mode, some nexthop tracking are created/or reused: > # show bgp vrf vrf1 nexthop > 192.0.2.12 valid [IGP metric 0], #paths 3, peer 192.0.2.12 > if r1-eth1 > Last update: Fri Jan 13 15:49:42 2023 > 192.0.2.14 valid [IGP metric 0], #paths 1 > if r1-eth1 > Last update: Fri Jan 13 15:49:42 2023 > 192.0.2.30 valid [IGP metric 0], #paths 1 > if r1-eth1 > Last update: Fri Jan 13 15:49:51 2023 > [..] This results in having a BGP VPN route for each of the static routes: > # show bgp ipv4 vpn > [..] > Route Distinguisher: 444:1 > *> 172.31.0.14/32 192.0.2.14@9< 0 32768 ? > *> 172.31.0.15/32 192.0.2.12@9< 0 32768 ? > *> 172.31.0.30/32 192.0.2.30@9< 0 32768 ? > [..] Without that patch, only the redistributed routes that rely on a pre-existing nexthop tracking context could be exported. Also, a command in the code about redistributed routes is modified accordingly, to explain that redistribute routes may be submitted to nexthop tracking in the case label allocation per next-hop is used. note: VNC routes have been removed from the redistribution, because of a test failure in the bgp_l3vpn_to_bgp_direct test. Signed-off-by: Philippe Guibert <philippe.guibert@6wind.com>
2023-03-03 21:33:34 +01:00
if (path->type == ZEBRA_ROUTE_BGP &&
(path->sub_type == BGP_ROUTE_NORMAL ||
path->sub_type == BGP_ROUTE_STATIC ||
path->sub_type == BGP_ROUTE_IMPORTED))
/* evaluate the path */
;
else if (path->sub_type == BGP_ROUTE_REDISTRIBUTE) {
/* evaluate the path for redistributed routes
* except those from VNC
*/
if ((path->type == ZEBRA_ROUTE_VNC) ||
(path->type == ZEBRA_ROUTE_VNC_DIRECT))
continue;
} else
/* don't evaluate the path */
continue;
dest = path->net;
assert(dest && bgp_dest_table(dest));
p = bgp_dest_get_prefix(dest);
afi = family2afi(p->family);
table = bgp_dest_table(dest);
safi = table->safi;
/*
* handle routes from other VRFs (they can have a
* nexthop in THIS VRF). bgp_path is the bgp instance
* that owns the route referencing this nexthop.
*/
bgp_path = table->bgp;
/*
* Path becomes valid/invalid depending on whether the nexthop
* reachable/unreachable.
*
* In case of unicast routes that were imported from vpn
* and that have labels, they are valid only if there are
* nexthops with labels
*
* If the nexthop is EVPN gateway-IP,
* do not check for a valid label.
*/
bool bnc_is_valid_nexthop = false;
bool path_valid = false;
struct bgp_route_evpn *bre =
bgp_attr_get_evpn_overlay(path->attr);
if (safi == SAFI_UNICAST &&
path->sub_type == BGP_ROUTE_IMPORTED &&
BGP_PATH_INFO_NUM_LABELS(path) &&
!(bre && bre->type == OVERLAY_INDEX_GATEWAY_IP)) {
bnc_is_valid_nexthop =
bgp_isvalid_nexthop_for_l3vpn(bnc, path)
? true
: false;
} else if (safi == SAFI_MPLS_VPN &&
path->sub_type != BGP_ROUTE_IMPORTED) {
/* avoid not redistributing mpls vpn routes */
bnc_is_valid_nexthop = true;
} else {
/* mpls-vpn routes with BGP_ROUTE_IMPORTED subtype */
bgpd: Force self-next-hop check in next-hop update. Problem Description: ===================== +--+ +--+ |R1|-(192.201.202.1)----iBGP----(192.201.202.2)-|R2| +--+ +--+ Routes on R2: ============= S>* 202.202.202.202/32 [1/0] via 192.201.78.1, ens256, 00:40:48 Where, the next-hop network, 192.201.78.0/24, is a directly connected network address. C>* 192.201.78.0/24 is directly connected, ens256, 00:40:48 Configurations on R1: ===================== ! router bgp 201 bgp router-id 192.168.0.1 neighbor 192.201.202.2 remote-as 201 ! Configurations on R2: ===================== ! ip route 202.202.202.202/32 192.201.78.1 ! router bgp 201 bgp router-id 192.168.0.2 neighbor 192.201.202.1 remote-as 201 ! address-family ipv4 unicast redistribute static exit-address-family ! Step-1: ======= R1 receives the route 202.202.202.202/32 from R2. R1 installs the route in its BGP RIB. Step-2: ======= On R1, a connected interface address is added. The address is the same as the next-hop of the BGP route received from R2 (192.201.78.1). Point of Failure: ================= R1 resolves the BGP route even though the route's next-hop is its own connected address. Even though this appears to be a misconfiguration it would still be better to safeguard the code against it. Fix: ==== When BGP receives a connected route from Zebra, it processes the routes for the next-hop update. While doing so, BGP must ignore routes whose next-hop address matches the address of the connected route for which Zebra sent the next-hop update message. Signed-off-by: NaveenThanikachalam <nthanikachal@vmware.com>
2020-04-09 09:27:54 +02:00
if (bgp_update_martian_nexthop(
bnc->bgp, afi, safi, path->type,
path->sub_type, path->attr, dest)) {
bgpd: Force self-next-hop check in next-hop update. Problem Description: ===================== +--+ +--+ |R1|-(192.201.202.1)----iBGP----(192.201.202.2)-|R2| +--+ +--+ Routes on R2: ============= S>* 202.202.202.202/32 [1/0] via 192.201.78.1, ens256, 00:40:48 Where, the next-hop network, 192.201.78.0/24, is a directly connected network address. C>* 192.201.78.0/24 is directly connected, ens256, 00:40:48 Configurations on R1: ===================== ! router bgp 201 bgp router-id 192.168.0.1 neighbor 192.201.202.2 remote-as 201 ! Configurations on R2: ===================== ! ip route 202.202.202.202/32 192.201.78.1 ! router bgp 201 bgp router-id 192.168.0.2 neighbor 192.201.202.1 remote-as 201 ! address-family ipv4 unicast redistribute static exit-address-family ! Step-1: ======= R1 receives the route 202.202.202.202/32 from R2. R1 installs the route in its BGP RIB. Step-2: ======= On R1, a connected interface address is added. The address is the same as the next-hop of the BGP route received from R2 (192.201.78.1). Point of Failure: ================= R1 resolves the BGP route even though the route's next-hop is its own connected address. Even though this appears to be a misconfiguration it would still be better to safeguard the code against it. Fix: ==== When BGP receives a connected route from Zebra, it processes the routes for the next-hop update. While doing so, BGP must ignore routes whose next-hop address matches the address of the connected route for which Zebra sent the next-hop update message. Signed-off-by: NaveenThanikachalam <nthanikachal@vmware.com>
2020-04-09 09:27:54 +02:00
if (BGP_DEBUG(nht, NHT))
zlog_debug(
"%s: prefix %pBD (vrf %s), ignoring path due to martian or self-next-hop",
__func__, dest, bgp_path->name);
bgpd: Force self-next-hop check in next-hop update. Problem Description: ===================== +--+ +--+ |R1|-(192.201.202.1)----iBGP----(192.201.202.2)-|R2| +--+ +--+ Routes on R2: ============= S>* 202.202.202.202/32 [1/0] via 192.201.78.1, ens256, 00:40:48 Where, the next-hop network, 192.201.78.0/24, is a directly connected network address. C>* 192.201.78.0/24 is directly connected, ens256, 00:40:48 Configurations on R1: ===================== ! router bgp 201 bgp router-id 192.168.0.1 neighbor 192.201.202.2 remote-as 201 ! Configurations on R2: ===================== ! ip route 202.202.202.202/32 192.201.78.1 ! router bgp 201 bgp router-id 192.168.0.2 neighbor 192.201.202.1 remote-as 201 ! address-family ipv4 unicast redistribute static exit-address-family ! Step-1: ======= R1 receives the route 202.202.202.202/32 from R2. R1 installs the route in its BGP RIB. Step-2: ======= On R1, a connected interface address is added. The address is the same as the next-hop of the BGP route received from R2 (192.201.78.1). Point of Failure: ================= R1 resolves the BGP route even though the route's next-hop is its own connected address. Even though this appears to be a misconfiguration it would still be better to safeguard the code against it. Fix: ==== When BGP receives a connected route from Zebra, it processes the routes for the next-hop update. While doing so, BGP must ignore routes whose next-hop address matches the address of the connected route for which Zebra sent the next-hop update message. Signed-off-by: NaveenThanikachalam <nthanikachal@vmware.com>
2020-04-09 09:27:54 +02:00
} else
bnc_is_valid_nexthop =
bgp_isvalid_nexthop(bnc) ? true : false;
}
if (BGP_DEBUG(nht, NHT)) {
bgpd: support for as notation format for route distinguisher RD may be built based on an AS number. Like for the AS, the RD may use the AS notation. The two below examples can illustrate: RD 1.1:20 stands for an AS4B:NN RD with AS4B=65536 in dot format. RD 0.1:20 stands for an AS2B:NNNN RD with AS2B=0.1 in dot+ format. This commit adds the asnotation mode to prefix_rd2str() API so as to pick up the relevant display. Two new printfrr extensions are available to display the RD with the two above display methods. - The pRDD extension stands for dot asnotation format - The pRDE extension stands for dot+ asnotation format. - The pRD extension has been renamed to pRDP extension The code is changed each time '%pRD' printf extension is called. Possibly, the asnotation may change the output, then a macro defines the asnotation mode to use. A side effect of forging the mode to use is that the string could not be concatenated with other strings in vty_out and snprintfrr. Those functions have been called multiple times. When zlog_debug needs to display the RD with some other string, the prefix_rd2str() old API is used instead of the printf extension. Some code has been kept untouched: - code related to running-config. Actually, wherever an RD is displayed, its configured name should be dumped. - bgp rfapi code - bgp evpn multihoming code (partially done), since the logic is missing to get the asnotation of 'struct bgp_evpn_es'. Signed-off-by: Philippe Guibert <philippe.guibert@6wind.com>
2022-11-22 09:57:10 +01:00
if (dest->pdest) {
char rd_buf[RD_ADDRSTRLEN];
prefix_rd2str(
(struct prefix_rd *)bgp_dest_get_prefix(
dest->pdest),
bgpd: support for as notation format for route distinguisher RD may be built based on an AS number. Like for the AS, the RD may use the AS notation. The two below examples can illustrate: RD 1.1:20 stands for an AS4B:NN RD with AS4B=65536 in dot format. RD 0.1:20 stands for an AS2B:NNNN RD with AS2B=0.1 in dot+ format. This commit adds the asnotation mode to prefix_rd2str() API so as to pick up the relevant display. Two new printfrr extensions are available to display the RD with the two above display methods. - The pRDD extension stands for dot asnotation format - The pRDE extension stands for dot+ asnotation format. - The pRD extension has been renamed to pRDP extension The code is changed each time '%pRD' printf extension is called. Possibly, the asnotation may change the output, then a macro defines the asnotation mode to use. A side effect of forging the mode to use is that the string could not be concatenated with other strings in vty_out and snprintfrr. Those functions have been called multiple times. When zlog_debug needs to display the RD with some other string, the prefix_rd2str() old API is used instead of the printf extension. Some code has been kept untouched: - code related to running-config. Actually, wherever an RD is displayed, its configured name should be dumped. - bgp rfapi code - bgp evpn multihoming code (partially done), since the logic is missing to get the asnotation of 'struct bgp_evpn_es'. Signed-off-by: Philippe Guibert <philippe.guibert@6wind.com>
2022-11-22 09:57:10 +01:00
rd_buf, sizeof(rd_buf),
bgp_get_asnotation(bnc->bgp));
zlog_debug(
"... eval path %d/%d %pBD RD %s %s flags 0x%x",
afi, safi, dest, rd_buf,
bgp_path->name_pretty, path->flags);
bgpd: support for as notation format for route distinguisher RD may be built based on an AS number. Like for the AS, the RD may use the AS notation. The two below examples can illustrate: RD 1.1:20 stands for an AS4B:NN RD with AS4B=65536 in dot format. RD 0.1:20 stands for an AS2B:NNNN RD with AS2B=0.1 in dot+ format. This commit adds the asnotation mode to prefix_rd2str() API so as to pick up the relevant display. Two new printfrr extensions are available to display the RD with the two above display methods. - The pRDD extension stands for dot asnotation format - The pRDE extension stands for dot+ asnotation format. - The pRD extension has been renamed to pRDP extension The code is changed each time '%pRD' printf extension is called. Possibly, the asnotation may change the output, then a macro defines the asnotation mode to use. A side effect of forging the mode to use is that the string could not be concatenated with other strings in vty_out and snprintfrr. Those functions have been called multiple times. When zlog_debug needs to display the RD with some other string, the prefix_rd2str() old API is used instead of the printf extension. Some code has been kept untouched: - code related to running-config. Actually, wherever an RD is displayed, its configured name should be dumped. - bgp rfapi code - bgp evpn multihoming code (partially done), since the logic is missing to get the asnotation of 'struct bgp_evpn_es'. Signed-off-by: Philippe Guibert <philippe.guibert@6wind.com>
2022-11-22 09:57:10 +01:00
} else
zlog_debug(
"... eval path %d/%d %pBD %s flags 0x%x",
afi, safi, dest, bgp_path->name_pretty,
path->flags);
}
/* Skip paths marked for removal or as history. */
if (CHECK_FLAG(path->flags, BGP_PATH_REMOVED)
|| CHECK_FLAG(path->flags, BGP_PATH_HISTORY))
continue;
/* Copy the metric to the path. Will be used for bestpath
* computation */
bpi_ultimate = bgp_get_imported_bpi_ultimate(path);
if (bgp_isvalid_nexthop(bnc) && bnc->metric)
(bgp_path_info_extra_get(bpi_ultimate))->igpmetric =
bnc->metric;
else if (bpi_ultimate->extra)
bpi_ultimate->extra->igpmetric = 0;
if (CHECK_FLAG(bnc->change_flags, BGP_NEXTHOP_METRIC_CHANGED) ||
CHECK_FLAG(bnc->change_flags, BGP_NEXTHOP_CHANGED) ||
bgp_attr_get_color(path->attr))
SET_FLAG(path->flags, BGP_PATH_IGP_CHANGED);
path_valid = CHECK_FLAG(path->flags, BGP_PATH_VALID);
bgpd: add support for l3vpn per-nexthop label This commit introduces a new method to associate a label to prefixes to export to a VPNv4 backbone. All the methods to associate a label to a BGP update is documented in rfc4364, chapter 4.3.2. Initially, the "single label for an entire VRF" method was available. This commit adds "single label for each attachment circuit" method. The change impacts the control-plane, because each BGP update is checked to know if the nexthop has reachability in the VRF or not. If this is the case, then a unique label for a given destination IP in the VRF will be picked up. This label will be reused for an other BGP update that will have the same nexthop IP address. The change impacts the data-plane, because the MPLs pop mechanism applied to incoming labelled packets changes: the MPLS label is popped, and the packet is directly sent to the connected nexthop described in the previous outgoing BGP VPN update. By default per-vrf mode is done, but the user may choose the per-nexthop mode, by using the vty command from the previous commit. In the latter case, a per-vrf label will however be allocated to handle networks that are not directly connected. This is the case for local traffic for instance. The change also include the following: - ECMP case In case a route is learnt in a given VRF, and is resolved via an ECMP nexthop. This implies that when exporting the route as a BGP update, if label allocation per nexthop is used, then two possible MPLS values could be picked up, which is not possible with the current implementation. Actually, the NLRI for VPNv4 stores one prefix, and one single label value, not two. Today, RFC8277 with multiple label capability is not yet available. To avoid this corner case, when a route is resolved via more than one nexthop, the label allocation per nexthop will not apply, and the default per-vrf label will be chosen. Let us imagine BGP redistributes a static route using the `172.31.0.20` nexthop. The nexthop resolution will find two different nexthops fo a unique BGP update. > r1# show running-config > [..] > vrf vrf1 > ip route 172.31.0.30/32 172.31.0.20 > r1# show bgp vrf vrf1 nexthop > [..] > 172.31.0.20 valid [IGP metric 0], #paths 1 > gate 192.0.2.11 > gate 192.0.2.12 > Last update: Mon Jan 16 09:27:09 2023 > Paths: > 1/1 172.31.0.30/32 VRF vrf1 flags 0x20018 To avoid this situation, BGP updates that resolve over multiple nexthops are using the unique per-vrf label. - recursive route case Prefixes that need a recursive route to be resolved can also be eligible for mpls allocation per nexthop. In that case, the nexthop will be the recursive nexthop calculated. To achieve this, all nexthop types in bnc contexts are valid, except for the blackhole nexthops. - network declared prefixes Nexthop tracking is used to look for the reachability of the prefixes. When the the 'no bgp network import-check' command is used, network declared prefixes are maintained active, even if there is no active nexthop. Signed-off-by: Philippe Guibert <philippe.guibert@6wind.com>
2023-02-28 14:25:02 +01:00
if (path->type == ZEBRA_ROUTE_BGP &&
path->sub_type == BGP_ROUTE_STATIC &&
!CHECK_FLAG(bgp_path->flags, BGP_FLAG_IMPORT_CHECK))
/* static routes with 'no bgp network import-check' are
* always valid. if nht is called with static routes,
* the vpn exportation needs to be triggered
*/
vpn_leak_from_vrf_update(bgp_get_default(), bgp_path,
path);
bgpd: export redistributed routes with label allocation per nexthop The label allocation per nexthop mode requires to use a nexthop tracking context. For redistributed routes, a nexthop tracking context is created, and the resolution helps to know the real nexthop ip address used. The below configuration example has been used: > vrf vrf1 > ip route 172.31.0.14/32 192.0.2.14 > ip route 172.31.0.15/32 192.0.2.12 > ip route 172.31.0.30/32 192.0.2.30 > exit > router bgp 65500 vrf vrf1 > address-family ipv4 unicast > redistribute static > label vpn export per-nexthop > [..] The static routes are correctly imported in the BGP IPv4 RIB. Contrary to label allocation per vrf mode, some nexthop tracking are created/or reused: > # show bgp vrf vrf1 nexthop > 192.0.2.12 valid [IGP metric 0], #paths 3, peer 192.0.2.12 > if r1-eth1 > Last update: Fri Jan 13 15:49:42 2023 > 192.0.2.14 valid [IGP metric 0], #paths 1 > if r1-eth1 > Last update: Fri Jan 13 15:49:42 2023 > 192.0.2.30 valid [IGP metric 0], #paths 1 > if r1-eth1 > Last update: Fri Jan 13 15:49:51 2023 > [..] This results in having a BGP VPN route for each of the static routes: > # show bgp ipv4 vpn > [..] > Route Distinguisher: 444:1 > *> 172.31.0.14/32 192.0.2.14@9< 0 32768 ? > *> 172.31.0.15/32 192.0.2.12@9< 0 32768 ? > *> 172.31.0.30/32 192.0.2.30@9< 0 32768 ? > [..] Without that patch, only the redistributed routes that rely on a pre-existing nexthop tracking context could be exported. Also, a command in the code about redistributed routes is modified accordingly, to explain that redistribute routes may be submitted to nexthop tracking in the case label allocation per next-hop is used. note: VNC routes have been removed from the redistribution, because of a test failure in the bgp_l3vpn_to_bgp_direct test. Signed-off-by: Philippe Guibert <philippe.guibert@6wind.com>
2023-03-03 21:33:34 +01:00
else if (path->sub_type == BGP_ROUTE_REDISTRIBUTE &&
safi == SAFI_UNICAST &&
(bgp_path->inst_type == BGP_INSTANCE_TYPE_VRF ||
bgp_path->inst_type == BGP_INSTANCE_TYPE_DEFAULT))
/* redistribute routes are always valid
* if nht is called with redistribute routes, the vpn
* exportation needs to be triggered
*/
vpn_leak_from_vrf_update(bgp_get_default(), bgp_path,
path);
bgpd: add support for l3vpn per-nexthop label This commit introduces a new method to associate a label to prefixes to export to a VPNv4 backbone. All the methods to associate a label to a BGP update is documented in rfc4364, chapter 4.3.2. Initially, the "single label for an entire VRF" method was available. This commit adds "single label for each attachment circuit" method. The change impacts the control-plane, because each BGP update is checked to know if the nexthop has reachability in the VRF or not. If this is the case, then a unique label for a given destination IP in the VRF will be picked up. This label will be reused for an other BGP update that will have the same nexthop IP address. The change impacts the data-plane, because the MPLs pop mechanism applied to incoming labelled packets changes: the MPLS label is popped, and the packet is directly sent to the connected nexthop described in the previous outgoing BGP VPN update. By default per-vrf mode is done, but the user may choose the per-nexthop mode, by using the vty command from the previous commit. In the latter case, a per-vrf label will however be allocated to handle networks that are not directly connected. This is the case for local traffic for instance. The change also include the following: - ECMP case In case a route is learnt in a given VRF, and is resolved via an ECMP nexthop. This implies that when exporting the route as a BGP update, if label allocation per nexthop is used, then two possible MPLS values could be picked up, which is not possible with the current implementation. Actually, the NLRI for VPNv4 stores one prefix, and one single label value, not two. Today, RFC8277 with multiple label capability is not yet available. To avoid this corner case, when a route is resolved via more than one nexthop, the label allocation per nexthop will not apply, and the default per-vrf label will be chosen. Let us imagine BGP redistributes a static route using the `172.31.0.20` nexthop. The nexthop resolution will find two different nexthops fo a unique BGP update. > r1# show running-config > [..] > vrf vrf1 > ip route 172.31.0.30/32 172.31.0.20 > r1# show bgp vrf vrf1 nexthop > [..] > 172.31.0.20 valid [IGP metric 0], #paths 1 > gate 192.0.2.11 > gate 192.0.2.12 > Last update: Mon Jan 16 09:27:09 2023 > Paths: > 1/1 172.31.0.30/32 VRF vrf1 flags 0x20018 To avoid this situation, BGP updates that resolve over multiple nexthops are using the unique per-vrf label. - recursive route case Prefixes that need a recursive route to be resolved can also be eligible for mpls allocation per nexthop. In that case, the nexthop will be the recursive nexthop calculated. To achieve this, all nexthop types in bnc contexts are valid, except for the blackhole nexthops. - network declared prefixes Nexthop tracking is used to look for the reachability of the prefixes. When the the 'no bgp network import-check' command is used, network declared prefixes are maintained active, even if there is no active nexthop. Signed-off-by: Philippe Guibert <philippe.guibert@6wind.com>
2023-02-28 14:25:02 +01:00
else if (path_valid != bnc_is_valid_nexthop) {
if (path_valid) {
/* No longer valid, clear flag; also for EVPN
* routes, unimport from VRFs if needed.
*/
bgp_aggregate_decrement(bgp_path, p, path, afi,
safi);
bgp_path_info_unset_flag(dest, path,
BGP_PATH_VALID);
if (safi == SAFI_EVPN &&
bgp_evpn_is_prefix_nht_supported(bgp_dest_get_prefix(dest)))
bgp_evpn_unimport_route(bgp_path,
afi, safi, bgp_dest_get_prefix(dest), path);
bgpd: add support for l3vpn per-nexthop label This commit introduces a new method to associate a label to prefixes to export to a VPNv4 backbone. All the methods to associate a label to a BGP update is documented in rfc4364, chapter 4.3.2. Initially, the "single label for an entire VRF" method was available. This commit adds "single label for each attachment circuit" method. The change impacts the control-plane, because each BGP update is checked to know if the nexthop has reachability in the VRF or not. If this is the case, then a unique label for a given destination IP in the VRF will be picked up. This label will be reused for an other BGP update that will have the same nexthop IP address. The change impacts the data-plane, because the MPLs pop mechanism applied to incoming labelled packets changes: the MPLS label is popped, and the packet is directly sent to the connected nexthop described in the previous outgoing BGP VPN update. By default per-vrf mode is done, but the user may choose the per-nexthop mode, by using the vty command from the previous commit. In the latter case, a per-vrf label will however be allocated to handle networks that are not directly connected. This is the case for local traffic for instance. The change also include the following: - ECMP case In case a route is learnt in a given VRF, and is resolved via an ECMP nexthop. This implies that when exporting the route as a BGP update, if label allocation per nexthop is used, then two possible MPLS values could be picked up, which is not possible with the current implementation. Actually, the NLRI for VPNv4 stores one prefix, and one single label value, not two. Today, RFC8277 with multiple label capability is not yet available. To avoid this corner case, when a route is resolved via more than one nexthop, the label allocation per nexthop will not apply, and the default per-vrf label will be chosen. Let us imagine BGP redistributes a static route using the `172.31.0.20` nexthop. The nexthop resolution will find two different nexthops fo a unique BGP update. > r1# show running-config > [..] > vrf vrf1 > ip route 172.31.0.30/32 172.31.0.20 > r1# show bgp vrf vrf1 nexthop > [..] > 172.31.0.20 valid [IGP metric 0], #paths 1 > gate 192.0.2.11 > gate 192.0.2.12 > Last update: Mon Jan 16 09:27:09 2023 > Paths: > 1/1 172.31.0.30/32 VRF vrf1 flags 0x20018 To avoid this situation, BGP updates that resolve over multiple nexthops are using the unique per-vrf label. - recursive route case Prefixes that need a recursive route to be resolved can also be eligible for mpls allocation per nexthop. In that case, the nexthop will be the recursive nexthop calculated. To achieve this, all nexthop types in bnc contexts are valid, except for the blackhole nexthops. - network declared prefixes Nexthop tracking is used to look for the reachability of the prefixes. When the the 'no bgp network import-check' command is used, network declared prefixes are maintained active, even if there is no active nexthop. Signed-off-by: Philippe Guibert <philippe.guibert@6wind.com>
2023-02-28 14:25:02 +01:00
if (safi == SAFI_UNICAST &&
(bgp_path->inst_type !=
BGP_INSTANCE_TYPE_VIEW))
vpn_leak_from_vrf_withdraw(
bgp_get_default(), bgp_path,
path);
} else {
/* Path becomes valid, set flag; also for EVPN
* routes, import from VRFs if needed.
*/
bgp_path_info_set_flag(dest, path,
BGP_PATH_VALID);
bgp_aggregate_increment(bgp_path, p, path, afi,
safi);
if (safi == SAFI_EVPN &&
bgp_evpn_is_prefix_nht_supported(bgp_dest_get_prefix(dest)))
bgp_evpn_import_route(bgp_path,
afi, safi, bgp_dest_get_prefix(dest), path);
bgpd: add support for l3vpn per-nexthop label This commit introduces a new method to associate a label to prefixes to export to a VPNv4 backbone. All the methods to associate a label to a BGP update is documented in rfc4364, chapter 4.3.2. Initially, the "single label for an entire VRF" method was available. This commit adds "single label for each attachment circuit" method. The change impacts the control-plane, because each BGP update is checked to know if the nexthop has reachability in the VRF or not. If this is the case, then a unique label for a given destination IP in the VRF will be picked up. This label will be reused for an other BGP update that will have the same nexthop IP address. The change impacts the data-plane, because the MPLs pop mechanism applied to incoming labelled packets changes: the MPLS label is popped, and the packet is directly sent to the connected nexthop described in the previous outgoing BGP VPN update. By default per-vrf mode is done, but the user may choose the per-nexthop mode, by using the vty command from the previous commit. In the latter case, a per-vrf label will however be allocated to handle networks that are not directly connected. This is the case for local traffic for instance. The change also include the following: - ECMP case In case a route is learnt in a given VRF, and is resolved via an ECMP nexthop. This implies that when exporting the route as a BGP update, if label allocation per nexthop is used, then two possible MPLS values could be picked up, which is not possible with the current implementation. Actually, the NLRI for VPNv4 stores one prefix, and one single label value, not two. Today, RFC8277 with multiple label capability is not yet available. To avoid this corner case, when a route is resolved via more than one nexthop, the label allocation per nexthop will not apply, and the default per-vrf label will be chosen. Let us imagine BGP redistributes a static route using the `172.31.0.20` nexthop. The nexthop resolution will find two different nexthops fo a unique BGP update. > r1# show running-config > [..] > vrf vrf1 > ip route 172.31.0.30/32 172.31.0.20 > r1# show bgp vrf vrf1 nexthop > [..] > 172.31.0.20 valid [IGP metric 0], #paths 1 > gate 192.0.2.11 > gate 192.0.2.12 > Last update: Mon Jan 16 09:27:09 2023 > Paths: > 1/1 172.31.0.30/32 VRF vrf1 flags 0x20018 To avoid this situation, BGP updates that resolve over multiple nexthops are using the unique per-vrf label. - recursive route case Prefixes that need a recursive route to be resolved can also be eligible for mpls allocation per nexthop. In that case, the nexthop will be the recursive nexthop calculated. To achieve this, all nexthop types in bnc contexts are valid, except for the blackhole nexthops. - network declared prefixes Nexthop tracking is used to look for the reachability of the prefixes. When the the 'no bgp network import-check' command is used, network declared prefixes are maintained active, even if there is no active nexthop. Signed-off-by: Philippe Guibert <philippe.guibert@6wind.com>
2023-02-28 14:25:02 +01:00
if (safi == SAFI_UNICAST &&
(bgp_path->inst_type !=
BGP_INSTANCE_TYPE_VIEW))
vpn_leak_from_vrf_update(
bgp_get_default(), bgp_path,
path);
}
}
bgp_process(bgp_path, dest, path, afi, safi);
}
if (peer) {
int valid_nexthops = bgp_isvalid_nexthop(bnc);
bgpd: Blackhole nexthops are not reachable When bgp registers for a nexthop that is not reachable due to the nexthop pointing to a blackhole, bgp is never going to be able to reach it when attempting to open a connection. Broken behavior: <show bgp nexthop> 192.168.161.204 valid [IGP metric 0], #paths 0, peer 192.168.161.204 blackhole Last update: Thu Feb 11 09:46:10 2021 eva# show bgp ipv4 uni summ fail BGP router identifier 10.10.3.11, local AS number 3235 vrf-id 0 BGP table version 40 RIB entries 78, using 14 KiB of memory Peers 2, using 54 KiB of memory Neighbor EstdCnt DropCnt ResetTime Reason 192.168.161.204 0 0 never Waiting for peer OPEN The log file fills up with this type of message: 2021-02-09T18:53:11.653433+00:00 nq-sjc6c-cor-01 bgpd[6548]: can't connect to 24.51.27.241 fd 26 : Invalid argument 2021-02-09T18:53:21.654005+00:00 nq-sjc6c-cor-01 bgpd[6548]: can't connect to 24.51.27.241 fd 26 : Invalid argument 2021-02-09T18:53:31.654381+00:00 nq-sjc6c-cor-01 bgpd[6548]: can't connect to 24.51.27.241 fd 26 : Invalid argument 2021-02-09T18:53:41.654729+00:00 nq-sjc6c-cor-01 bgpd[6548]: can't connect to 24.51.27.241 fd 26 : Invalid argument 2021-02-09T18:53:51.655147+00:00 nq-sjc6c-cor-01 bgpd[6548]: can't connect to 24.51.27.241 fd 26 : Invalid argument As that the connect to a blackhole is correctly rejected by the kernel Fixed behavior: eva# show bgp ipv4 uni summ BGP router identifier 10.10.3.11, local AS number 3235 vrf-id 0 BGP table version 40 RIB entries 78, using 14 KiB of memory Peers 2, using 54 KiB of memory Neighbor V AS MsgRcvd MsgSent TblVer InQ OutQ Up/Down State/PfxRcd PfxSnt Desc annie(192.168.161.2) 4 64539 126264 39 0 0 0 00:01:36 38 40 N/A 192.168.161.178 4 0 0 0 0 0 0 never Active 0 N/A Total number of neighbors 2 eva# show bgp ipv4 uni summ fail BGP router identifier 10.10.3.11, local AS number 3235 vrf-id 0 BGP table version 40 RIB entries 78, using 14 KiB of memory Peers 2, using 54 KiB of memory Neighbor EstdCnt DropCnt ResetTime Reason 192.168.161.178 0 0 never Waiting for NHT Total number of neighbors 2 eva# show bgp nexthop Current BGP nexthop cache: 192.168.161.2 valid [IGP metric 0], #paths 38, peer 192.168.161.2 if enp39s0 Last update: Thu Feb 11 09:52:05 2021 192.168.161.131 valid [IGP metric 0], #paths 0, peer 192.168.161.131 if enp39s0 Last update: Thu Feb 11 09:52:05 2021 192.168.161.178 invalid, #paths 0, peer 192.168.161.178 Must be Connected Last update: Thu Feb 11 09:53:37 2021 eva# Signed-off-by: Donald Sharp <sharpd@nvidia.com>
2021-02-11 15:54:34 +01:00
if (valid_nexthops) {
/*
* Peering cannot occur across a blackhole nexthop
*/
if (bnc->nexthop_num == 1 && bnc->nexthop
bgpd: Blackhole nexthops are not reachable When bgp registers for a nexthop that is not reachable due to the nexthop pointing to a blackhole, bgp is never going to be able to reach it when attempting to open a connection. Broken behavior: <show bgp nexthop> 192.168.161.204 valid [IGP metric 0], #paths 0, peer 192.168.161.204 blackhole Last update: Thu Feb 11 09:46:10 2021 eva# show bgp ipv4 uni summ fail BGP router identifier 10.10.3.11, local AS number 3235 vrf-id 0 BGP table version 40 RIB entries 78, using 14 KiB of memory Peers 2, using 54 KiB of memory Neighbor EstdCnt DropCnt ResetTime Reason 192.168.161.204 0 0 never Waiting for peer OPEN The log file fills up with this type of message: 2021-02-09T18:53:11.653433+00:00 nq-sjc6c-cor-01 bgpd[6548]: can't connect to 24.51.27.241 fd 26 : Invalid argument 2021-02-09T18:53:21.654005+00:00 nq-sjc6c-cor-01 bgpd[6548]: can't connect to 24.51.27.241 fd 26 : Invalid argument 2021-02-09T18:53:31.654381+00:00 nq-sjc6c-cor-01 bgpd[6548]: can't connect to 24.51.27.241 fd 26 : Invalid argument 2021-02-09T18:53:41.654729+00:00 nq-sjc6c-cor-01 bgpd[6548]: can't connect to 24.51.27.241 fd 26 : Invalid argument 2021-02-09T18:53:51.655147+00:00 nq-sjc6c-cor-01 bgpd[6548]: can't connect to 24.51.27.241 fd 26 : Invalid argument As that the connect to a blackhole is correctly rejected by the kernel Fixed behavior: eva# show bgp ipv4 uni summ BGP router identifier 10.10.3.11, local AS number 3235 vrf-id 0 BGP table version 40 RIB entries 78, using 14 KiB of memory Peers 2, using 54 KiB of memory Neighbor V AS MsgRcvd MsgSent TblVer InQ OutQ Up/Down State/PfxRcd PfxSnt Desc annie(192.168.161.2) 4 64539 126264 39 0 0 0 00:01:36 38 40 N/A 192.168.161.178 4 0 0 0 0 0 0 never Active 0 N/A Total number of neighbors 2 eva# show bgp ipv4 uni summ fail BGP router identifier 10.10.3.11, local AS number 3235 vrf-id 0 BGP table version 40 RIB entries 78, using 14 KiB of memory Peers 2, using 54 KiB of memory Neighbor EstdCnt DropCnt ResetTime Reason 192.168.161.178 0 0 never Waiting for NHT Total number of neighbors 2 eva# show bgp nexthop Current BGP nexthop cache: 192.168.161.2 valid [IGP metric 0], #paths 38, peer 192.168.161.2 if enp39s0 Last update: Thu Feb 11 09:52:05 2021 192.168.161.131 valid [IGP metric 0], #paths 0, peer 192.168.161.131 if enp39s0 Last update: Thu Feb 11 09:52:05 2021 192.168.161.178 invalid, #paths 0, peer 192.168.161.178 Must be Connected Last update: Thu Feb 11 09:53:37 2021 eva# Signed-off-by: Donald Sharp <sharpd@nvidia.com>
2021-02-11 15:54:34 +01:00
&& bnc->nexthop->type == NEXTHOP_TYPE_BLACKHOLE) {
peer->last_reset = PEER_DOWN_WAITING_NHT;
valid_nexthops = 0;
} else
peer->last_reset = PEER_DOWN_WAITING_OPEN;
} else
peer->last_reset = PEER_DOWN_WAITING_NHT;
if (!CHECK_FLAG(bnc->flags, BGP_NEXTHOP_PEER_NOTIFIED)) {
if (BGP_DEBUG(nht, NHT))
zlog_debug(
"%s: Updating peer (%s(%s)) status with NHT nexthops %d",
__func__, peer->host,
peer->bgp->name_pretty,
!!valid_nexthops);
bgp_fsm_nht_update(peer->connection, peer,
!!valid_nexthops);
SET_FLAG(bnc->flags, BGP_NEXTHOP_PEER_NOTIFIED);
}
}
RESET_FLAG(bnc->change_flags);
}
/**
* path_nh_map - make or break path-to-nexthop association.
* ARGUMENTS:
* path - pointer to the path structure
* bnc - pointer to the nexthop structure
* make - if set, make the association. if unset, just break the existing
* association.
*/
void path_nh_map(struct bgp_path_info *path, struct bgp_nexthop_cache *bnc,
bool make)
{
if (path->nexthop) {
LIST_REMOVE(path, nh_thread);
path->nexthop->path_count--;
path->nexthop = NULL;
}
if (make) {
LIST_INSERT_HEAD(&(bnc->paths), path, nh_thread);
path->nexthop = bnc;
path->nexthop->path_count++;
}
}
/*
* This function is called to register nexthops to zebra
* as that we may have tried to install the nexthops
* before we actually have a zebra connection
*/
void bgp_nht_register_nexthops(struct bgp *bgp)
{
for (afi_t afi = AFI_IP; afi < AFI_MAX; afi++) {
struct bgp_nexthop_cache *bnc;
frr_each (bgp_nexthop_cache, &bgp->nexthop_cache_table[afi],
bnc) {
register_zebra_rnh(bnc);
}
}
}
void bgp_nht_reg_enhe_cap_intfs(struct peer *peer)
{
struct bgp *bgp;
struct bgp_nexthop_cache *bnc;
struct nexthop *nhop;
struct interface *ifp;
struct prefix p;
ifindex_t ifindex = 0;
if (peer->ifp)
return;
bgp = peer->bgp;
if (!sockunion2hostprefix(&peer->connection->su, &p)) {
zlog_warn("%s: Unable to convert sockunion to prefix for %s",
__func__, peer->host);
return;
}
if (p.family != AF_INET6)
return;
/*
* Gather the ifindex for if up/down events to be
* tagged into this fun
*/
if (peer->conf_if &&
IN6_IS_ADDR_LINKLOCAL(&peer->connection->su.sin6.sin6_addr))
ifindex = peer->connection->su.sin6.sin6_scope_id;
bnc = bnc_find(&bgp->nexthop_cache_table[AFI_IP6], &p, 0, ifindex);
if (!bnc)
return;
if (peer != bnc->nht_info)
return;
for (nhop = bnc->nexthop; nhop; nhop = nhop->next) {
ifp = if_lookup_by_index(nhop->ifindex, nhop->vrf_id);
if (!ifp)
continue;
zclient_send_interface_radv_req(zclient,
nhop->vrf_id,
ifp, true,
BGP_UNNUM_DEFAULT_RA_INTERVAL);
}
}
void bgp_nht_dereg_enhe_cap_intfs(struct peer *peer)
{
struct bgp *bgp;
struct bgp_nexthop_cache *bnc;
struct nexthop *nhop;
struct interface *ifp;
struct prefix p;
ifindex_t ifindex = 0;
if (peer->ifp)
return;
bgp = peer->bgp;
if (!sockunion2hostprefix(&peer->connection->su, &p)) {
zlog_warn("%s: Unable to convert sockunion to prefix for %s",
__func__, peer->host);
return;
}
if (p.family != AF_INET6)
return;
/*
* Gather the ifindex for if up/down events to be
* tagged into this fun
*/
if (peer->conf_if &&
IN6_IS_ADDR_LINKLOCAL(&peer->connection->su.sin6.sin6_addr))
ifindex = peer->connection->su.sin6.sin6_scope_id;
bnc = bnc_find(&bgp->nexthop_cache_table[AFI_IP6], &p, 0, ifindex);
if (!bnc)
return;
if (peer != bnc->nht_info)
return;
for (nhop = bnc->nexthop; nhop; nhop = nhop->next) {
ifp = if_lookup_by_index(nhop->ifindex, nhop->vrf_id);
if (!ifp)
continue;
zclient_send_interface_radv_req(zclient, nhop->vrf_id, ifp, 0,
0);
}
}