frr/bgpd/bgp_nexthop.c

Ignoring revisions in .git-blame-ignore-revs. Click here to bypass and see the normal blame view.

1402 lines
35 KiB
C
Raw Normal View History

// SPDX-License-Identifier: GPL-2.0-or-later
2002-12-13 21:15:29 +01:00
/* BGP nexthop scan
* Copyright (C) 2000 Kunihiro Ishiguro
*/
2002-12-13 21:15:29 +01:00
#include <zebra.h>
#include "command.h"
#include "frrevent.h"
2002-12-13 21:15:29 +01:00
#include "prefix.h"
#include "lib/json.h"
2002-12-13 21:15:29 +01:00
#include "zclient.h"
#include "stream.h"
#include "network.h"
#include "log.h"
#include "memory.h"
#include "hash.h"
#include "jhash.h"
#include "nexthop.h"
#include "queue.h"
#include "filter.h"
#include "printfrr.h"
2002-12-13 21:15:29 +01:00
#include "bgpd/bgpd.h"
#include "bgpd/bgp_route.h"
#include "bgpd/bgp_attr.h"
#include "bgpd/bgp_nexthop.h"
#include "bgpd/bgp_nht.h"
2002-12-13 21:15:29 +01:00
#include "bgpd/bgp_debug.h"
#include "bgpd/bgp_damp.h"
#include "bgpd/bgp_fsm.h"
#include "bgpd/bgp_vty.h"
#include "bgpd/bgp_rd.h"
bgpd: add support for l3vpn per-nexthop label This commit introduces a new method to associate a label to prefixes to export to a VPNv4 backbone. All the methods to associate a label to a BGP update is documented in rfc4364, chapter 4.3.2. Initially, the "single label for an entire VRF" method was available. This commit adds "single label for each attachment circuit" method. The change impacts the control-plane, because each BGP update is checked to know if the nexthop has reachability in the VRF or not. If this is the case, then a unique label for a given destination IP in the VRF will be picked up. This label will be reused for an other BGP update that will have the same nexthop IP address. The change impacts the data-plane, because the MPLs pop mechanism applied to incoming labelled packets changes: the MPLS label is popped, and the packet is directly sent to the connected nexthop described in the previous outgoing BGP VPN update. By default per-vrf mode is done, but the user may choose the per-nexthop mode, by using the vty command from the previous commit. In the latter case, a per-vrf label will however be allocated to handle networks that are not directly connected. This is the case for local traffic for instance. The change also include the following: - ECMP case In case a route is learnt in a given VRF, and is resolved via an ECMP nexthop. This implies that when exporting the route as a BGP update, if label allocation per nexthop is used, then two possible MPLS values could be picked up, which is not possible with the current implementation. Actually, the NLRI for VPNv4 stores one prefix, and one single label value, not two. Today, RFC8277 with multiple label capability is not yet available. To avoid this corner case, when a route is resolved via more than one nexthop, the label allocation per nexthop will not apply, and the default per-vrf label will be chosen. Let us imagine BGP redistributes a static route using the `172.31.0.20` nexthop. The nexthop resolution will find two different nexthops fo a unique BGP update. > r1# show running-config > [..] > vrf vrf1 > ip route 172.31.0.30/32 172.31.0.20 > r1# show bgp vrf vrf1 nexthop > [..] > 172.31.0.20 valid [IGP metric 0], #paths 1 > gate 192.0.2.11 > gate 192.0.2.12 > Last update: Mon Jan 16 09:27:09 2023 > Paths: > 1/1 172.31.0.30/32 VRF vrf1 flags 0x20018 To avoid this situation, BGP updates that resolve over multiple nexthops are using the unique per-vrf label. - recursive route case Prefixes that need a recursive route to be resolved can also be eligible for mpls allocation per nexthop. In that case, the nexthop will be the recursive nexthop calculated. To achieve this, all nexthop types in bnc contexts are valid, except for the blackhole nexthops. - network declared prefixes Nexthop tracking is used to look for the reachability of the prefixes. When the the 'no bgp network import-check' command is used, network declared prefixes are maintained active, even if there is no active nexthop. Signed-off-by: Philippe Guibert <philippe.guibert@6wind.com>
2023-02-28 14:25:02 +01:00
#include "bgpd/bgp_mplsvpn.h"
#include "bgpd/bgp_bfd.h"
2002-12-13 21:15:29 +01:00
DEFINE_MTYPE_STATIC(BGPD, MARTIAN_STRING, "BGP Martian Addr Intf String");
int bgp_nexthop_cache_compare(const struct bgp_nexthop_cache *a,
const struct bgp_nexthop_cache *b)
{
if (a->srte_color < b->srte_color)
return -1;
if (a->srte_color > b->srte_color)
return 1;
if (a->ifindex_ipv6_ll < b->ifindex_ipv6_ll)
return -1;
if (a->ifindex_ipv6_ll > b->ifindex_ipv6_ll)
return 1;
return prefix_cmp(&a->prefix, &b->prefix);
}
2002-12-13 21:15:29 +01:00
void bnc_nexthop_free(struct bgp_nexthop_cache *bnc)
{
nexthops_free(bnc->nexthop);
2002-12-13 21:15:29 +01:00
}
struct bgp_nexthop_cache *bnc_new(struct bgp_nexthop_cache_head *tree,
struct prefix *prefix, uint32_t srte_color,
ifindex_t ifindex)
2002-12-13 21:15:29 +01:00
{
struct bgp_nexthop_cache *bnc;
bnc = XCALLOC(MTYPE_BGP_NEXTHOP_CACHE,
sizeof(struct bgp_nexthop_cache));
bnc->prefix = *prefix;
bnc->ifindex_ipv6_ll = ifindex;
bnc->srte_color = srte_color;
bnc->tree = tree;
LIST_INIT(&(bnc->paths));
bgp_nexthop_cache_add(tree, bnc);
return bnc;
2002-12-13 21:15:29 +01:00
}
bool bnc_existing_for_prefix(struct bgp_nexthop_cache *bnc)
{
struct bgp_nexthop_cache *bnc_tmp;
frr_each (bgp_nexthop_cache, bnc->tree, bnc_tmp) {
if (bnc_tmp == bnc)
continue;
if (prefix_cmp(&bnc->prefix, &bnc_tmp->prefix) == 0)
return true;
}
return false;
}
2002-12-13 21:15:29 +01:00
void bnc_free(struct bgp_nexthop_cache *bnc)
{
bnc_nexthop_free(bnc);
bgp_nexthop_cache_del(bnc->tree, bnc);
2002-12-13 21:15:29 +01:00
XFREE(MTYPE_BGP_NEXTHOP_CACHE, bnc);
}
struct bgp_nexthop_cache *bnc_find(struct bgp_nexthop_cache_head *tree,
struct prefix *prefix, uint32_t srte_color,
ifindex_t ifindex)
{
struct bgp_nexthop_cache bnc = {};
if (!tree)
return NULL;
bnc.prefix = *prefix;
bnc.srte_color = srte_color;
bnc.ifindex_ipv6_ll = ifindex;
return bgp_nexthop_cache_find(tree, &bnc);
}
2002-12-13 21:15:29 +01:00
/* Reset and free all BGP nexthop cache. */
static void bgp_nexthop_cache_reset(struct bgp_nexthop_cache_head *tree)
2002-12-13 21:15:29 +01:00
{
struct bgp_nexthop_cache *bnc;
while (bgp_nexthop_cache_count(tree) > 0) {
bnc = bgp_nexthop_cache_first(tree);
while (!LIST_EMPTY(&(bnc->paths))) {
struct bgp_path_info *path = LIST_FIRST(&(bnc->paths));
bgpd: add support for l3vpn per-nexthop label This commit introduces a new method to associate a label to prefixes to export to a VPNv4 backbone. All the methods to associate a label to a BGP update is documented in rfc4364, chapter 4.3.2. Initially, the "single label for an entire VRF" method was available. This commit adds "single label for each attachment circuit" method. The change impacts the control-plane, because each BGP update is checked to know if the nexthop has reachability in the VRF or not. If this is the case, then a unique label for a given destination IP in the VRF will be picked up. This label will be reused for an other BGP update that will have the same nexthop IP address. The change impacts the data-plane, because the MPLs pop mechanism applied to incoming labelled packets changes: the MPLS label is popped, and the packet is directly sent to the connected nexthop described in the previous outgoing BGP VPN update. By default per-vrf mode is done, but the user may choose the per-nexthop mode, by using the vty command from the previous commit. In the latter case, a per-vrf label will however be allocated to handle networks that are not directly connected. This is the case for local traffic for instance. The change also include the following: - ECMP case In case a route is learnt in a given VRF, and is resolved via an ECMP nexthop. This implies that when exporting the route as a BGP update, if label allocation per nexthop is used, then two possible MPLS values could be picked up, which is not possible with the current implementation. Actually, the NLRI for VPNv4 stores one prefix, and one single label value, not two. Today, RFC8277 with multiple label capability is not yet available. To avoid this corner case, when a route is resolved via more than one nexthop, the label allocation per nexthop will not apply, and the default per-vrf label will be chosen. Let us imagine BGP redistributes a static route using the `172.31.0.20` nexthop. The nexthop resolution will find two different nexthops fo a unique BGP update. > r1# show running-config > [..] > vrf vrf1 > ip route 172.31.0.30/32 172.31.0.20 > r1# show bgp vrf vrf1 nexthop > [..] > 172.31.0.20 valid [IGP metric 0], #paths 1 > gate 192.0.2.11 > gate 192.0.2.12 > Last update: Mon Jan 16 09:27:09 2023 > Paths: > 1/1 172.31.0.30/32 VRF vrf1 flags 0x20018 To avoid this situation, BGP updates that resolve over multiple nexthops are using the unique per-vrf label. - recursive route case Prefixes that need a recursive route to be resolved can also be eligible for mpls allocation per nexthop. In that case, the nexthop will be the recursive nexthop calculated. To achieve this, all nexthop types in bnc contexts are valid, except for the blackhole nexthops. - network declared prefixes Nexthop tracking is used to look for the reachability of the prefixes. When the the 'no bgp network import-check' command is used, network declared prefixes are maintained active, even if there is no active nexthop. Signed-off-by: Philippe Guibert <philippe.guibert@6wind.com>
2023-02-28 14:25:02 +01:00
bgp_mplsvpn_path_nh_label_unlink(path);
bgp_mplsvpn_path_nh_label_bind_unlink(path);
bgpd: add support for l3vpn per-nexthop label This commit introduces a new method to associate a label to prefixes to export to a VPNv4 backbone. All the methods to associate a label to a BGP update is documented in rfc4364, chapter 4.3.2. Initially, the "single label for an entire VRF" method was available. This commit adds "single label for each attachment circuit" method. The change impacts the control-plane, because each BGP update is checked to know if the nexthop has reachability in the VRF or not. If this is the case, then a unique label for a given destination IP in the VRF will be picked up. This label will be reused for an other BGP update that will have the same nexthop IP address. The change impacts the data-plane, because the MPLs pop mechanism applied to incoming labelled packets changes: the MPLS label is popped, and the packet is directly sent to the connected nexthop described in the previous outgoing BGP VPN update. By default per-vrf mode is done, but the user may choose the per-nexthop mode, by using the vty command from the previous commit. In the latter case, a per-vrf label will however be allocated to handle networks that are not directly connected. This is the case for local traffic for instance. The change also include the following: - ECMP case In case a route is learnt in a given VRF, and is resolved via an ECMP nexthop. This implies that when exporting the route as a BGP update, if label allocation per nexthop is used, then two possible MPLS values could be picked up, which is not possible with the current implementation. Actually, the NLRI for VPNv4 stores one prefix, and one single label value, not two. Today, RFC8277 with multiple label capability is not yet available. To avoid this corner case, when a route is resolved via more than one nexthop, the label allocation per nexthop will not apply, and the default per-vrf label will be chosen. Let us imagine BGP redistributes a static route using the `172.31.0.20` nexthop. The nexthop resolution will find two different nexthops fo a unique BGP update. > r1# show running-config > [..] > vrf vrf1 > ip route 172.31.0.30/32 172.31.0.20 > r1# show bgp vrf vrf1 nexthop > [..] > 172.31.0.20 valid [IGP metric 0], #paths 1 > gate 192.0.2.11 > gate 192.0.2.12 > Last update: Mon Jan 16 09:27:09 2023 > Paths: > 1/1 172.31.0.30/32 VRF vrf1 flags 0x20018 To avoid this situation, BGP updates that resolve over multiple nexthops are using the unique per-vrf label. - recursive route case Prefixes that need a recursive route to be resolved can also be eligible for mpls allocation per nexthop. In that case, the nexthop will be the recursive nexthop calculated. To achieve this, all nexthop types in bnc contexts are valid, except for the blackhole nexthops. - network declared prefixes Nexthop tracking is used to look for the reachability of the prefixes. When the the 'no bgp network import-check' command is used, network declared prefixes are maintained active, even if there is no active nexthop. Signed-off-by: Philippe Guibert <philippe.guibert@6wind.com>
2023-02-28 14:25:02 +01:00
path_nh_map(path, bnc, false);
2002-12-13 21:15:29 +01:00
}
bnc_free(bnc);
}
2002-12-13 21:15:29 +01:00
}
bgpd: Ignore EVPN routes from CLAG peer when VNI comes up There are two parts to this commit: 1. create a database of self tunnel-ip for used in martian nexthop check In a CLAG setup, the tunnel-ip (VNI UP) notification comes before the clag-anycast-ip comes up in the system. This was causing our self next hop check to fail and we were instaling routes with martian nexthop in zebra. We need to keep this info in a seperate database for all local tunnel-ip. This database will be used in parallel with the self next hop database to martian nexthop checks. 2. When a local VNI comes up, update the tunnel-ip database and filter routes in the RD table if necessary In case of EVPN we might receive routes from clag peer before the clag-anycast ip and VNI is up on the system. We will store the routes in the RD table for later processing. When VNI comes UP, we loop thorugh all the routes and install them in zebra if required. However, we were missing the martian nexthop check in this code path. From now onwards, when a VNI comes UP, we will first update the tunnel-ip database We then loop through all the routes in RD table and apply martian next hop filter if required. Things not covered in this commit but are required: This processing is needed in general when an address becomes a connected address. We need to loop through all the routes in BGP and apply martian nexthop filter if necessary. This will be taken care in a seperate bug Ticket:CM-17271/CM-16911 Reviewed By: ccr-6542 Testing Done: Manual Signed-off-by: Mitesh Kanjariya <mitesh@cumulusnetworks.com>
2017-08-17 08:19:58 +02:00
static void *bgp_tip_hash_alloc(void *p)
{
const struct in_addr *val = (const struct in_addr *)p;
struct tip_addr *addr;
bgpd: Ignore EVPN routes from CLAG peer when VNI comes up There are two parts to this commit: 1. create a database of self tunnel-ip for used in martian nexthop check In a CLAG setup, the tunnel-ip (VNI UP) notification comes before the clag-anycast-ip comes up in the system. This was causing our self next hop check to fail and we were instaling routes with martian nexthop in zebra. We need to keep this info in a seperate database for all local tunnel-ip. This database will be used in parallel with the self next hop database to martian nexthop checks. 2. When a local VNI comes up, update the tunnel-ip database and filter routes in the RD table if necessary In case of EVPN we might receive routes from clag peer before the clag-anycast ip and VNI is up on the system. We will store the routes in the RD table for later processing. When VNI comes UP, we loop thorugh all the routes and install them in zebra if required. However, we were missing the martian nexthop check in this code path. From now onwards, when a VNI comes UP, we will first update the tunnel-ip database We then loop through all the routes in RD table and apply martian next hop filter if required. Things not covered in this commit but are required: This processing is needed in general when an address becomes a connected address. We need to loop through all the routes in BGP and apply martian nexthop filter if necessary. This will be taken care in a seperate bug Ticket:CM-17271/CM-16911 Reviewed By: ccr-6542 Testing Done: Manual Signed-off-by: Mitesh Kanjariya <mitesh@cumulusnetworks.com>
2017-08-17 08:19:58 +02:00
addr = XMALLOC(MTYPE_TIP_ADDR, sizeof(struct tip_addr));
addr->refcnt = 0;
addr->addr.s_addr = val->s_addr;
return addr;
}
static void bgp_tip_hash_free(void *addr)
{
XFREE(MTYPE_TIP_ADDR, addr);
}
static unsigned int bgp_tip_hash_key_make(const void *p)
bgpd: Ignore EVPN routes from CLAG peer when VNI comes up There are two parts to this commit: 1. create a database of self tunnel-ip for used in martian nexthop check In a CLAG setup, the tunnel-ip (VNI UP) notification comes before the clag-anycast-ip comes up in the system. This was causing our self next hop check to fail and we were instaling routes with martian nexthop in zebra. We need to keep this info in a seperate database for all local tunnel-ip. This database will be used in parallel with the self next hop database to martian nexthop checks. 2. When a local VNI comes up, update the tunnel-ip database and filter routes in the RD table if necessary In case of EVPN we might receive routes from clag peer before the clag-anycast ip and VNI is up on the system. We will store the routes in the RD table for later processing. When VNI comes UP, we loop thorugh all the routes and install them in zebra if required. However, we were missing the martian nexthop check in this code path. From now onwards, when a VNI comes UP, we will first update the tunnel-ip database We then loop through all the routes in RD table and apply martian next hop filter if required. Things not covered in this commit but are required: This processing is needed in general when an address becomes a connected address. We need to loop through all the routes in BGP and apply martian nexthop filter if necessary. This will be taken care in a seperate bug Ticket:CM-17271/CM-16911 Reviewed By: ccr-6542 Testing Done: Manual Signed-off-by: Mitesh Kanjariya <mitesh@cumulusnetworks.com>
2017-08-17 08:19:58 +02:00
{
const struct tip_addr *addr = p;
bgpd: Ignore EVPN routes from CLAG peer when VNI comes up There are two parts to this commit: 1. create a database of self tunnel-ip for used in martian nexthop check In a CLAG setup, the tunnel-ip (VNI UP) notification comes before the clag-anycast-ip comes up in the system. This was causing our self next hop check to fail and we were instaling routes with martian nexthop in zebra. We need to keep this info in a seperate database for all local tunnel-ip. This database will be used in parallel with the self next hop database to martian nexthop checks. 2. When a local VNI comes up, update the tunnel-ip database and filter routes in the RD table if necessary In case of EVPN we might receive routes from clag peer before the clag-anycast ip and VNI is up on the system. We will store the routes in the RD table for later processing. When VNI comes UP, we loop thorugh all the routes and install them in zebra if required. However, we were missing the martian nexthop check in this code path. From now onwards, when a VNI comes UP, we will first update the tunnel-ip database We then loop through all the routes in RD table and apply martian next hop filter if required. Things not covered in this commit but are required: This processing is needed in general when an address becomes a connected address. We need to loop through all the routes in BGP and apply martian nexthop filter if necessary. This will be taken care in a seperate bug Ticket:CM-17271/CM-16911 Reviewed By: ccr-6542 Testing Done: Manual Signed-off-by: Mitesh Kanjariya <mitesh@cumulusnetworks.com>
2017-08-17 08:19:58 +02:00
return jhash_1word(addr->addr.s_addr, 0);
}
static bool bgp_tip_hash_cmp(const void *p1, const void *p2)
bgpd: Ignore EVPN routes from CLAG peer when VNI comes up There are two parts to this commit: 1. create a database of self tunnel-ip for used in martian nexthop check In a CLAG setup, the tunnel-ip (VNI UP) notification comes before the clag-anycast-ip comes up in the system. This was causing our self next hop check to fail and we were instaling routes with martian nexthop in zebra. We need to keep this info in a seperate database for all local tunnel-ip. This database will be used in parallel with the self next hop database to martian nexthop checks. 2. When a local VNI comes up, update the tunnel-ip database and filter routes in the RD table if necessary In case of EVPN we might receive routes from clag peer before the clag-anycast ip and VNI is up on the system. We will store the routes in the RD table for later processing. When VNI comes UP, we loop thorugh all the routes and install them in zebra if required. However, we were missing the martian nexthop check in this code path. From now onwards, when a VNI comes UP, we will first update the tunnel-ip database We then loop through all the routes in RD table and apply martian next hop filter if required. Things not covered in this commit but are required: This processing is needed in general when an address becomes a connected address. We need to loop through all the routes in BGP and apply martian nexthop filter if necessary. This will be taken care in a seperate bug Ticket:CM-17271/CM-16911 Reviewed By: ccr-6542 Testing Done: Manual Signed-off-by: Mitesh Kanjariya <mitesh@cumulusnetworks.com>
2017-08-17 08:19:58 +02:00
{
const struct tip_addr *addr1 = p1;
const struct tip_addr *addr2 = p2;
bgpd: Ignore EVPN routes from CLAG peer when VNI comes up There are two parts to this commit: 1. create a database of self tunnel-ip for used in martian nexthop check In a CLAG setup, the tunnel-ip (VNI UP) notification comes before the clag-anycast-ip comes up in the system. This was causing our self next hop check to fail and we were instaling routes with martian nexthop in zebra. We need to keep this info in a seperate database for all local tunnel-ip. This database will be used in parallel with the self next hop database to martian nexthop checks. 2. When a local VNI comes up, update the tunnel-ip database and filter routes in the RD table if necessary In case of EVPN we might receive routes from clag peer before the clag-anycast ip and VNI is up on the system. We will store the routes in the RD table for later processing. When VNI comes UP, we loop thorugh all the routes and install them in zebra if required. However, we were missing the martian nexthop check in this code path. From now onwards, when a VNI comes UP, we will first update the tunnel-ip database We then loop through all the routes in RD table and apply martian next hop filter if required. Things not covered in this commit but are required: This processing is needed in general when an address becomes a connected address. We need to loop through all the routes in BGP and apply martian nexthop filter if necessary. This will be taken care in a seperate bug Ticket:CM-17271/CM-16911 Reviewed By: ccr-6542 Testing Done: Manual Signed-off-by: Mitesh Kanjariya <mitesh@cumulusnetworks.com>
2017-08-17 08:19:58 +02:00
return addr1->addr.s_addr == addr2->addr.s_addr;
}
void bgp_tip_hash_init(struct bgp *bgp)
{
bgp->tip_hash = hash_create(bgp_tip_hash_key_make, bgp_tip_hash_cmp,
"BGP TIP hash");
bgpd: Ignore EVPN routes from CLAG peer when VNI comes up There are two parts to this commit: 1. create a database of self tunnel-ip for used in martian nexthop check In a CLAG setup, the tunnel-ip (VNI UP) notification comes before the clag-anycast-ip comes up in the system. This was causing our self next hop check to fail and we were instaling routes with martian nexthop in zebra. We need to keep this info in a seperate database for all local tunnel-ip. This database will be used in parallel with the self next hop database to martian nexthop checks. 2. When a local VNI comes up, update the tunnel-ip database and filter routes in the RD table if necessary In case of EVPN we might receive routes from clag peer before the clag-anycast ip and VNI is up on the system. We will store the routes in the RD table for later processing. When VNI comes UP, we loop thorugh all the routes and install them in zebra if required. However, we were missing the martian nexthop check in this code path. From now onwards, when a VNI comes UP, we will first update the tunnel-ip database We then loop through all the routes in RD table and apply martian next hop filter if required. Things not covered in this commit but are required: This processing is needed in general when an address becomes a connected address. We need to loop through all the routes in BGP and apply martian nexthop filter if necessary. This will be taken care in a seperate bug Ticket:CM-17271/CM-16911 Reviewed By: ccr-6542 Testing Done: Manual Signed-off-by: Mitesh Kanjariya <mitesh@cumulusnetworks.com>
2017-08-17 08:19:58 +02:00
}
void bgp_tip_hash_destroy(struct bgp *bgp)
{
hash_clean_and_free(&bgp->tip_hash, bgp_tip_hash_free);
bgpd: Ignore EVPN routes from CLAG peer when VNI comes up There are two parts to this commit: 1. create a database of self tunnel-ip for used in martian nexthop check In a CLAG setup, the tunnel-ip (VNI UP) notification comes before the clag-anycast-ip comes up in the system. This was causing our self next hop check to fail and we were instaling routes with martian nexthop in zebra. We need to keep this info in a seperate database for all local tunnel-ip. This database will be used in parallel with the self next hop database to martian nexthop checks. 2. When a local VNI comes up, update the tunnel-ip database and filter routes in the RD table if necessary In case of EVPN we might receive routes from clag peer before the clag-anycast ip and VNI is up on the system. We will store the routes in the RD table for later processing. When VNI comes UP, we loop thorugh all the routes and install them in zebra if required. However, we were missing the martian nexthop check in this code path. From now onwards, when a VNI comes UP, we will first update the tunnel-ip database We then loop through all the routes in RD table and apply martian next hop filter if required. Things not covered in this commit but are required: This processing is needed in general when an address becomes a connected address. We need to loop through all the routes in BGP and apply martian nexthop filter if necessary. This will be taken care in a seperate bug Ticket:CM-17271/CM-16911 Reviewed By: ccr-6542 Testing Done: Manual Signed-off-by: Mitesh Kanjariya <mitesh@cumulusnetworks.com>
2017-08-17 08:19:58 +02:00
}
/* Add/Update Tunnel-IP entry of bgp martian next-hop table.
*
* Returns true only if we add a _new_ TIP so the caller knows that an
* actionable change has occurred. If we find an existing TIP then we
* only need to update the refcnt, since the collection of known TIPs
* has not changed.
*/
bool bgp_tip_add(struct bgp *bgp, struct in_addr *tip)
bgpd: Ignore EVPN routes from CLAG peer when VNI comes up There are two parts to this commit: 1. create a database of self tunnel-ip for used in martian nexthop check In a CLAG setup, the tunnel-ip (VNI UP) notification comes before the clag-anycast-ip comes up in the system. This was causing our self next hop check to fail and we were instaling routes with martian nexthop in zebra. We need to keep this info in a seperate database for all local tunnel-ip. This database will be used in parallel with the self next hop database to martian nexthop checks. 2. When a local VNI comes up, update the tunnel-ip database and filter routes in the RD table if necessary In case of EVPN we might receive routes from clag peer before the clag-anycast ip and VNI is up on the system. We will store the routes in the RD table for later processing. When VNI comes UP, we loop thorugh all the routes and install them in zebra if required. However, we were missing the martian nexthop check in this code path. From now onwards, when a VNI comes UP, we will first update the tunnel-ip database We then loop through all the routes in RD table and apply martian next hop filter if required. Things not covered in this commit but are required: This processing is needed in general when an address becomes a connected address. We need to loop through all the routes in BGP and apply martian nexthop filter if necessary. This will be taken care in a seperate bug Ticket:CM-17271/CM-16911 Reviewed By: ccr-6542 Testing Done: Manual Signed-off-by: Mitesh Kanjariya <mitesh@cumulusnetworks.com>
2017-08-17 08:19:58 +02:00
{
struct tip_addr tmp;
struct tip_addr *addr;
bool tip_added = false;
bgpd: Ignore EVPN routes from CLAG peer when VNI comes up There are two parts to this commit: 1. create a database of self tunnel-ip for used in martian nexthop check In a CLAG setup, the tunnel-ip (VNI UP) notification comes before the clag-anycast-ip comes up in the system. This was causing our self next hop check to fail and we were instaling routes with martian nexthop in zebra. We need to keep this info in a seperate database for all local tunnel-ip. This database will be used in parallel with the self next hop database to martian nexthop checks. 2. When a local VNI comes up, update the tunnel-ip database and filter routes in the RD table if necessary In case of EVPN we might receive routes from clag peer before the clag-anycast ip and VNI is up on the system. We will store the routes in the RD table for later processing. When VNI comes UP, we loop thorugh all the routes and install them in zebra if required. However, we were missing the martian nexthop check in this code path. From now onwards, when a VNI comes UP, we will first update the tunnel-ip database We then loop through all the routes in RD table and apply martian next hop filter if required. Things not covered in this commit but are required: This processing is needed in general when an address becomes a connected address. We need to loop through all the routes in BGP and apply martian nexthop filter if necessary. This will be taken care in a seperate bug Ticket:CM-17271/CM-16911 Reviewed By: ccr-6542 Testing Done: Manual Signed-off-by: Mitesh Kanjariya <mitesh@cumulusnetworks.com>
2017-08-17 08:19:58 +02:00
tmp.addr = *tip;
addr = hash_lookup(bgp->tip_hash, &tmp);
if (!addr) {
addr = hash_get(bgp->tip_hash, &tmp, bgp_tip_hash_alloc);
tip_added = true;
}
bgpd: Ignore EVPN routes from CLAG peer when VNI comes up There are two parts to this commit: 1. create a database of self tunnel-ip for used in martian nexthop check In a CLAG setup, the tunnel-ip (VNI UP) notification comes before the clag-anycast-ip comes up in the system. This was causing our self next hop check to fail and we were instaling routes with martian nexthop in zebra. We need to keep this info in a seperate database for all local tunnel-ip. This database will be used in parallel with the self next hop database to martian nexthop checks. 2. When a local VNI comes up, update the tunnel-ip database and filter routes in the RD table if necessary In case of EVPN we might receive routes from clag peer before the clag-anycast ip and VNI is up on the system. We will store the routes in the RD table for later processing. When VNI comes UP, we loop thorugh all the routes and install them in zebra if required. However, we were missing the martian nexthop check in this code path. From now onwards, when a VNI comes UP, we will first update the tunnel-ip database We then loop through all the routes in RD table and apply martian next hop filter if required. Things not covered in this commit but are required: This processing is needed in general when an address becomes a connected address. We need to loop through all the routes in BGP and apply martian nexthop filter if necessary. This will be taken care in a seperate bug Ticket:CM-17271/CM-16911 Reviewed By: ccr-6542 Testing Done: Manual Signed-off-by: Mitesh Kanjariya <mitesh@cumulusnetworks.com>
2017-08-17 08:19:58 +02:00
addr->refcnt++;
return tip_added;
bgpd: Ignore EVPN routes from CLAG peer when VNI comes up There are two parts to this commit: 1. create a database of self tunnel-ip for used in martian nexthop check In a CLAG setup, the tunnel-ip (VNI UP) notification comes before the clag-anycast-ip comes up in the system. This was causing our self next hop check to fail and we were instaling routes with martian nexthop in zebra. We need to keep this info in a seperate database for all local tunnel-ip. This database will be used in parallel with the self next hop database to martian nexthop checks. 2. When a local VNI comes up, update the tunnel-ip database and filter routes in the RD table if necessary In case of EVPN we might receive routes from clag peer before the clag-anycast ip and VNI is up on the system. We will store the routes in the RD table for later processing. When VNI comes UP, we loop thorugh all the routes and install them in zebra if required. However, we were missing the martian nexthop check in this code path. From now onwards, when a VNI comes UP, we will first update the tunnel-ip database We then loop through all the routes in RD table and apply martian next hop filter if required. Things not covered in this commit but are required: This processing is needed in general when an address becomes a connected address. We need to loop through all the routes in BGP and apply martian nexthop filter if necessary. This will be taken care in a seperate bug Ticket:CM-17271/CM-16911 Reviewed By: ccr-6542 Testing Done: Manual Signed-off-by: Mitesh Kanjariya <mitesh@cumulusnetworks.com>
2017-08-17 08:19:58 +02:00
}
void bgp_tip_del(struct bgp *bgp, struct in_addr *tip)
{
struct tip_addr tmp;
struct tip_addr *addr;
bgpd: Ignore EVPN routes from CLAG peer when VNI comes up There are two parts to this commit: 1. create a database of self tunnel-ip for used in martian nexthop check In a CLAG setup, the tunnel-ip (VNI UP) notification comes before the clag-anycast-ip comes up in the system. This was causing our self next hop check to fail and we were instaling routes with martian nexthop in zebra. We need to keep this info in a seperate database for all local tunnel-ip. This database will be used in parallel with the self next hop database to martian nexthop checks. 2. When a local VNI comes up, update the tunnel-ip database and filter routes in the RD table if necessary In case of EVPN we might receive routes from clag peer before the clag-anycast ip and VNI is up on the system. We will store the routes in the RD table for later processing. When VNI comes UP, we loop thorugh all the routes and install them in zebra if required. However, we were missing the martian nexthop check in this code path. From now onwards, when a VNI comes UP, we will first update the tunnel-ip database We then loop through all the routes in RD table and apply martian next hop filter if required. Things not covered in this commit but are required: This processing is needed in general when an address becomes a connected address. We need to loop through all the routes in BGP and apply martian nexthop filter if necessary. This will be taken care in a seperate bug Ticket:CM-17271/CM-16911 Reviewed By: ccr-6542 Testing Done: Manual Signed-off-by: Mitesh Kanjariya <mitesh@cumulusnetworks.com>
2017-08-17 08:19:58 +02:00
tmp.addr = *tip;
addr = hash_lookup(bgp->tip_hash, &tmp);
/* may have been deleted earlier by bgp_interface_down() */
if (addr == NULL)
return;
addr->refcnt--;
if (addr->refcnt == 0) {
hash_release(bgp->tip_hash, addr);
XFREE(MTYPE_TIP_ADDR, addr);
}
}
/* BGP own address structure */
struct bgp_addr {
struct prefix p;
struct list *ifp_name_list;
};
static void show_address_entry(struct hash_bucket *bucket, void *args)
{
struct vty *vty = (struct vty *)args;
struct bgp_addr *addr = (struct bgp_addr *)bucket->data;
char *name;
struct listnode *node;
char str[INET6_ADDRSTRLEN] = {0};
vty_out(vty, "addr: %s, count: %d : ",
inet_ntop(addr->p.family, &(addr->p.u.prefix),
str, INET6_ADDRSTRLEN),
addr->ifp_name_list->count);
for (ALL_LIST_ELEMENTS_RO(addr->ifp_name_list, node, name)) {
vty_out(vty, " %s,", name);
}
vty_out(vty, "\n");
}
void bgp_nexthop_show_address_hash(struct vty *vty, struct bgp *bgp)
{
hash_iterate(bgp->address_hash,
(void (*)(struct hash_bucket *, void *))show_address_entry,
vty);
}
static void bgp_address_hash_string_del(void *val)
{
char *data = val;
XFREE(MTYPE_MARTIAN_STRING, data);
}
static void *bgp_address_hash_alloc(void *p)
{
struct bgp_addr *copy_addr = p;
struct bgp_addr *addr = NULL;
addr = XMALLOC(MTYPE_BGP_ADDR, sizeof(struct bgp_addr));
prefix_copy(&addr->p, &copy_addr->p);
addr->ifp_name_list = list_new();
addr->ifp_name_list->del = bgp_address_hash_string_del;
return addr;
}
static void bgp_address_hash_free(void *data)
{
struct bgp_addr *addr = data;
list_delete(&addr->ifp_name_list);
XFREE(MTYPE_BGP_ADDR, addr);
}
static unsigned int bgp_address_hash_key_make(const void *p)
{
const struct bgp_addr *addr = p;
return prefix_hash_key(&addr->p);
}
static bool bgp_address_hash_cmp(const void *p1, const void *p2)
{
const struct bgp_addr *addr1 = p1;
const struct bgp_addr *addr2 = p2;
return prefix_same(&addr1->p, &addr2->p);
}
void bgp_address_init(struct bgp *bgp)
{
bgp->address_hash =
hash_create(bgp_address_hash_key_make, bgp_address_hash_cmp,
"BGP Connected Address Hash");
}
void bgp_address_destroy(struct bgp *bgp)
{
hash_clean_and_free(&bgp->address_hash, bgp_address_hash_free);
}
static void bgp_address_add(struct bgp *bgp, struct connected *ifc,
struct prefix *p)
{
struct bgp_addr tmp;
struct bgp_addr *addr;
struct listnode *node;
char *name;
tmp.p = *p;
if (tmp.p.family == AF_INET)
tmp.p.prefixlen = IPV4_MAX_BITLEN;
else if (tmp.p.family == AF_INET6)
tmp.p.prefixlen = IPV6_MAX_BITLEN;
addr = hash_get(bgp->address_hash, &tmp, bgp_address_hash_alloc);
for (ALL_LIST_ELEMENTS_RO(addr->ifp_name_list, node, name)) {
if (strcmp(ifc->ifp->name, name) == 0)
break;
}
if (!node) {
name = XSTRDUP(MTYPE_MARTIAN_STRING, ifc->ifp->name);
listnode_add(addr->ifp_name_list, name);
}
}
static void bgp_address_del(struct bgp *bgp, struct connected *ifc,
struct prefix *p)
{
struct bgp_addr tmp;
struct bgp_addr *addr;
struct listnode *node;
char *name;
tmp.p = *p;
if (tmp.p.family == AF_INET)
tmp.p.prefixlen = IPV4_MAX_BITLEN;
else if (tmp.p.family == AF_INET6)
tmp.p.prefixlen = IPV6_MAX_BITLEN;
addr = hash_lookup(bgp->address_hash, &tmp);
/* may have been deleted earlier by bgp_interface_down() */
if (addr == NULL)
return;
for (ALL_LIST_ELEMENTS_RO(addr->ifp_name_list, node, name)) {
if (strcmp(ifc->ifp->name, name) == 0)
break;
}
if (node) {
list_delete_node(addr->ifp_name_list, node);
XFREE(MTYPE_MARTIAN_STRING, name);
}
if (addr->ifp_name_list->count == 0) {
hash_release(bgp->address_hash, addr);
list_delete(&addr->ifp_name_list);
XFREE(MTYPE_BGP_ADDR, addr);
}
}
struct bgp_connected_ref {
2002-12-13 21:15:29 +01:00
unsigned int refcnt;
};
void bgp_connected_add(struct bgp *bgp, struct connected *ifc)
2002-12-13 21:15:29 +01:00
{
struct prefix p;
struct prefix *addr;
struct bgp_dest *dest;
struct bgp_connected_ref *bc;
struct listnode *node, *nnode;
struct peer *peer;
struct peer_connection *connection;
2002-12-13 21:15:29 +01:00
addr = ifc->address;
p = *(CONNECTED_PREFIX(ifc));
2002-12-13 21:15:29 +01:00
if (addr->family == AF_INET) {
apply_mask_ipv4((struct prefix_ipv4 *)&p);
2002-12-13 21:15:29 +01:00
if (prefix_ipv4_any((struct prefix_ipv4 *)&p))
return;
bgp_address_add(bgp, ifc, addr);
dest = bgp_node_get(bgp->connected_table[AFI_IP], &p);
bc = bgp_dest_get_bgp_connected_ref_info(dest);
if (bc)
2002-12-13 21:15:29 +01:00
bc->refcnt++;
else {
bc = XCALLOC(MTYPE_BGP_CONN,
sizeof(struct bgp_connected_ref));
2002-12-13 21:15:29 +01:00
bc->refcnt = 1;
bgp_dest_set_bgp_connected_ref_info(dest, bc);
2002-12-13 21:15:29 +01:00
}
[PtP over ethernet] New peer flag allows much more addressing flexibility 2006-12-12 Andrew J. Schorr <ajschorr@alumni.princeton.edu> * if.h: (struct connected) Add new ZEBRA_IFA_PEER flag indicating whether a peer address has been configured. Comment now shows the new interpretation of the destination addr: if ZEBRA_IFA_PEER is set, then it must contain the destination address, otherwise it may contain the broadcast address or be NULL. (CONNECTED_DEST_HOST,CONNECTED_POINTOPOINT_HOST) Remove obsolete macros that were specific to IPv4 and not fully general. (CONNECTED_PEER) New macro to check ZEBRA_IFA_PEER flag. (CONNECTED_PREFIX) New macro giving the prefix to insert into the RIB: if CONNECTED_PEER, then use the destination (peer) address, else use the address field. (CONNECTED_ID) New macro to come up with an identifying address for the struct connected. * if.c: (if_lookup_address, connected_lookup_address) Streamline logic with new CONNECTED_PREFIX macro. * prefix.h: (PREFIX_COPY_IPV4, PREFIX_COPY_IPV6) New macros for better performance than the general prefix_copy function. * zclient.c: (zebra_interface_address_read) For non-null destination addresses, set prefixlen to equal the address prefixlen. This is needed to get the new CONNECTED_PREFIX macro to work properly. * connected.c: (connected_up_ipv4, connected_down_ipv4, connected_up_ipv6, connected_down_ipv6) Simplify logic using the new CONNECTED_PREFIX macro. (connected_add_ipv4) Set prefixlen in destination addresses (required by the CONNECTED_PREFIX macro). Use CONNECTED_PEER macro instead of testing for IFF_POINTOPOINT. Delete invalid warning message. Warn about cases where the ZEBRA_IFA_PEER is set but no destination address has been supplied (and turn off the flag). (connected_add_ipv6) Add new flags argument so callers may set the ZEBRA_IFA_PEER flag. If peer/broadcast address satisfies IN6_IS_ADDR_UNSPECIFIED, then reject it with a warning. Set prefixlen in destination address so CONNECTED_PREFIX will work. * connected.h: (connected_add_ipv6) Add new flags argument so callers may set the ZEBRA_IFA_PEER flag. * interface.c: (connected_dump_vty) Use CONNECTED_PEER macro to decide whether the destination address is a peer or broadcast address (instead of checking IFF_BROADCAST and IFF_POINTOPOINT). * if_ioctl.c: (if_getaddrs) Instead of setting a peer address only when the IFF_POINTOPOINT is set, we now accept a peer address whenever it is available and not the same as the local address. Otherwise (no peer address assigned), we check for a broadcast address (regardless of the IFF_BROADCAST flag). And must now pass a flags value of ZEBRA_IFA_PEER to connected_add_ipv4 when a peer address is assigned. The same new logic is used with the IPv6 code as well (and we pass the new flags argument to connected_add_ipv6). (if_get_addr) Do not bother to check IFF_POINTOPOINT: just issue the SIOCGIFDSTADDR ioctl and see if we get back a peer address not matching the local address (and set the ZEBRA_IFA_PEER in that case). If there's no peer address, try to grab SIOCGIFBRDADDR regardless of whether IFF_BROADCAST is set. * if_ioctl_solaris.c: (if_get_addr) Just try the SIOCGLIFDSTADDR ioctl without bothering to check the IFF_POINTOPOINT flag. And if no peer address was found, just try the SIOCGLIFBRDADDR ioctl without checking the IFF_BROADCAST flag. Call connected_add_ipv4 and connected_add_ipv6 with appropriate flags. * if_proc.c: (ifaddr_proc_ipv6) Must pass new flags argument to connected_add_ipv6. * kernel_socket.c: (ifam_read) Must pass new flags argument to connected_add_ipv6. * rt_netlink.c: (netlink_interface_addr) Copy logic from iproute2 to determine local and possible peer address (so there's no longer a test for IFF_POINTOPOINT). Set ZEBRA_IFA_PEER flag appropriately. Pass new flags argument to connected_add_ipv6. (netlink_address) Test !CONNECTED_PEER instead of if_is_broadcast to determine whether the connected destination address is a broadcast address. * bgp_nexthop.c: (bgp_connected_add, bgp_connected_delete) Simplify logic by using new CONNECTED_PREFIX macro. * ospf_interface.c: (ospf_if_is_configured, ospf_if_lookup_by_prefix, ospf_if_lookup_recv_if) Simplify logic using new CONNECTED_PREFIX macro. * ospf_lsa.c: (lsa_link_ptop_set) Using the new CONNECTED_PREFIX macro, both options collapse into the same code. * ospf_snmp.c: (ospf_snmp_if_update) Simplify logic using new CONNECTED_ID macro. (ospf_snmp_is_if_have_addr) Simplify logic using new CONNECTED_PREFIX macro. * ospf_vty.c: (show_ip_ospf_interface_sub) Use new CONNECTED_PEER macro instead of testing the IFF_POINTOPOINT flag. * ospfd.c: (ospf_network_match_iface) Use new CONNECTED_PEER macro instead of testing with if_is_pointopoint. And add commented-out code to implement alternative (in my opinion) more elegant behavior that has no special-case treatment for PtP addresses. (ospf_network_run) Use new CONNECTED_ID macro to simplify logic. * rip_interface.c: (rip_interface_multicast_set) Use new CONNECTED_ID macro to simplify logic. (rip_request_interface_send) Fix minor bug: ipv4_broadcast_addr does not give a useful result if prefixlen is 32 (we require a peer address in such cases). * ripd.c: (rip_update_interface) Fix same bug as above.
2006-12-12 20:18:21 +01:00
} else if (addr->family == AF_INET6) {
2002-12-13 21:15:29 +01:00
apply_mask_ipv6((struct prefix_ipv6 *)&p);
2002-12-13 21:15:29 +01:00
if (IN6_IS_ADDR_UNSPECIFIED(&p.u.prefix6))
return;
2002-12-13 21:15:29 +01:00
if (IN6_IS_ADDR_LINKLOCAL(&p.u.prefix6))
return;
bgp_address_add(bgp, ifc, addr);
dest = bgp_node_get(bgp->connected_table[AFI_IP6], &p);
bc = bgp_dest_get_bgp_connected_ref_info(dest);
if (bc)
2002-12-13 21:15:29 +01:00
bc->refcnt++;
else {
bc = XCALLOC(MTYPE_BGP_CONN,
sizeof(struct bgp_connected_ref));
2002-12-13 21:15:29 +01:00
bc->refcnt = 1;
bgp_dest_set_bgp_connected_ref_info(dest, bc);
}
2002-12-13 21:15:29 +01:00
}
/*
* Iterate over all the peers and attempt to set the bfd session
* data and if it's a bgp unnumbered get her flowing if necessary
*/
for (ALL_LIST_ELEMENTS(bgp->peer, node, nnode, peer)) {
bgp_peer_bfd_update_source(peer);
if (peer->conf_if && (strcmp(peer->conf_if, ifc->ifp->name) == 0) &&
!peer_established(peer->connection) &&
!CHECK_FLAG(peer->flags, PEER_FLAG_IFPEER_V6ONLY)) {
connection = peer->connection;
if (peer_active(connection) == BGP_PEER_ACTIVE)
BGP_EVENT_ADD(connection, BGP_Stop);
BGP_EVENT_ADD(connection, BGP_Start);
}
}
2002-12-13 21:15:29 +01:00
}
void bgp_connected_delete(struct bgp *bgp, struct connected *ifc)
2002-12-13 21:15:29 +01:00
{
struct prefix p;
struct prefix *addr;
struct bgp_dest *dest = NULL;
struct bgp_connected_ref *bc;
2002-12-13 21:15:29 +01:00
addr = ifc->address;
p = *(CONNECTED_PREFIX(ifc));
apply_mask(&p);
2002-12-13 21:15:29 +01:00
if (addr->family == AF_INET) {
if (prefix_ipv4_any((struct prefix_ipv4 *)&p))
return;
bgp_address_del(bgp, ifc, addr);
dest = bgp_node_lookup(bgp->connected_table[AFI_IP], &p);
2002-12-13 21:15:29 +01:00
} else if (addr->family == AF_INET6) {
if (IN6_IS_ADDR_UNSPECIFIED(&p.u.prefix6))
return;
2002-12-13 21:15:29 +01:00
if (IN6_IS_ADDR_LINKLOCAL(&p.u.prefix6))
return;
bgp_address_del(bgp, ifc, addr);
dest = bgp_node_lookup(bgp->connected_table[AFI_IP6], &p);
}
if (!dest)
return;
bc = bgp_dest_get_bgp_connected_ref_info(dest);
bc->refcnt--;
if (bc->refcnt == 0) {
XFREE(MTYPE_BGP_CONN, bc);
bgp_dest_set_bgp_connected_ref_info(dest, NULL);
2002-12-13 21:15:29 +01:00
}
dest = bgp_dest_unlock_node(dest);
assert(dest);
bgp_dest_unlock_node(dest);
2002-12-13 21:15:29 +01:00
}
static void bgp_connected_cleanup(struct route_table *table,
struct route_node *rn)
{
struct bgp_connected_ref *bc;
struct bgp_dest *bn = bgp_dest_from_rnode(rn);
bc = bgp_dest_get_bgp_connected_ref_info(bn);
if (!bc)
return;
XFREE(MTYPE_BGP_CONN, bc);
bgp_dest_set_bgp_connected_ref_info(bn, NULL);
}
bool bgp_nexthop_self(struct bgp *bgp, afi_t afi, uint8_t type,
uint8_t sub_type, struct attr *attr,
struct bgp_dest *dest)
2002-12-13 21:15:29 +01:00
{
uint8_t new_afi = afi == AFI_IP ? AF_INET : AF_INET6;
struct bgp_addr tmp_addr = {{0}}, *addr = NULL;
struct tip_addr tmp_tip, *tip = NULL;
const struct prefix *p = bgp_dest_get_prefix(dest);
bool is_bgp_static_route =
((type == ZEBRA_ROUTE_BGP) && (sub_type == BGP_ROUTE_STATIC))
? true
: false;
if (!is_bgp_static_route)
new_afi = BGP_ATTR_NEXTHOP_AFI_IP6(attr) ? AF_INET6 : AF_INET;
tmp_addr.p.family = new_afi;
switch (new_afi) {
case AF_INET:
if (is_bgp_static_route) {
tmp_addr.p.u.prefix4 = p->u.prefix4;
tmp_addr.p.prefixlen = p->prefixlen;
} else {
/* Here we need to find out which nexthop to be used*/
if (CHECK_FLAG(attr->flag, ATTR_FLAG_BIT(BGP_ATTR_NEXT_HOP))) {
tmp_addr.p.u.prefix4 = attr->nexthop;
tmp_addr.p.prefixlen = IPV4_MAX_BITLEN;
} else if ((attr->mp_nexthop_len) &&
((attr->mp_nexthop_len == BGP_ATTR_NHLEN_IPV4) ||
(attr->mp_nexthop_len == BGP_ATTR_NHLEN_VPNV4))) {
tmp_addr.p.u.prefix4 =
attr->mp_nexthop_global_in;
tmp_addr.p.prefixlen = IPV4_MAX_BITLEN;
} else
return false;
}
break;
case AF_INET6:
if (is_bgp_static_route) {
tmp_addr.p.u.prefix6 = p->u.prefix6;
tmp_addr.p.prefixlen = p->prefixlen;
} else {
tmp_addr.p.u.prefix6 = attr->mp_nexthop_global;
tmp_addr.p.prefixlen = IPV6_MAX_BITLEN;
}
break;
default:
break;
}
addr = hash_lookup(bgp->address_hash, &tmp_addr);
if (addr)
return true;
2002-12-13 21:15:29 +01:00
if (new_afi == AF_INET && hashcount(bgp->tip_hash)) {
memset(&tmp_tip, 0, sizeof(tmp_tip));
tmp_tip.addr = attr->nexthop;
if (CHECK_FLAG(attr->flag, ATTR_FLAG_BIT(BGP_ATTR_NEXT_HOP))) {
tmp_tip.addr = attr->nexthop;
} else if ((attr->mp_nexthop_len) &&
((attr->mp_nexthop_len == BGP_ATTR_NHLEN_IPV4) ||
(attr->mp_nexthop_len == BGP_ATTR_NHLEN_VPNV4))) {
tmp_tip.addr = attr->mp_nexthop_global_in;
}
tip = hash_lookup(bgp->tip_hash, &tmp_tip);
if (tip)
return true;
}
bgpd: Ignore EVPN routes from CLAG peer when VNI comes up There are two parts to this commit: 1. create a database of self tunnel-ip for used in martian nexthop check In a CLAG setup, the tunnel-ip (VNI UP) notification comes before the clag-anycast-ip comes up in the system. This was causing our self next hop check to fail and we were instaling routes with martian nexthop in zebra. We need to keep this info in a seperate database for all local tunnel-ip. This database will be used in parallel with the self next hop database to martian nexthop checks. 2. When a local VNI comes up, update the tunnel-ip database and filter routes in the RD table if necessary In case of EVPN we might receive routes from clag peer before the clag-anycast ip and VNI is up on the system. We will store the routes in the RD table for later processing. When VNI comes UP, we loop thorugh all the routes and install them in zebra if required. However, we were missing the martian nexthop check in this code path. From now onwards, when a VNI comes UP, we will first update the tunnel-ip database We then loop through all the routes in RD table and apply martian next hop filter if required. Things not covered in this commit but are required: This processing is needed in general when an address becomes a connected address. We need to loop through all the routes in BGP and apply martian nexthop filter if necessary. This will be taken care in a seperate bug Ticket:CM-17271/CM-16911 Reviewed By: ccr-6542 Testing Done: Manual Signed-off-by: Mitesh Kanjariya <mitesh@cumulusnetworks.com>
2017-08-17 08:19:58 +02:00
return false;
2002-12-13 21:15:29 +01:00
}
bool bgp_multiaccess_check_v4(struct in_addr nexthop, struct peer *peer)
2002-12-13 21:15:29 +01:00
{
struct bgp_dest *dest1;
struct bgp_dest *dest2;
struct prefix p;
2002-12-13 21:15:29 +01:00
int ret;
p.family = AF_INET;
p.prefixlen = IPV4_MAX_BITLEN;
p.u.prefix4 = nexthop;
dest1 = bgp_node_match(peer->bgp->connected_table[AFI_IP], &p);
if (!dest1)
return false;
p.family = AF_INET;
p.prefixlen = IPV4_MAX_BITLEN;
p.u.prefix4 = peer->connection->su.sin.sin_addr;
dest2 = bgp_node_match(peer->bgp->connected_table[AFI_IP], &p);
if (!dest2) {
bgp_dest_unlock_node(dest1);
return false;
2002-12-13 21:15:29 +01:00
}
ret = (dest1 == dest2);
2002-12-13 21:15:29 +01:00
bgp_dest_unlock_node(dest1);
bgp_dest_unlock_node(dest2);
2002-12-13 21:15:29 +01:00
return ret;
2002-12-13 21:15:29 +01:00
}
bool bgp_multiaccess_check_v6(struct in6_addr nexthop, struct peer *peer)
{
struct bgp_dest *dest1;
struct bgp_dest *dest2;
struct prefix p;
int ret;
p.family = AF_INET6;
p.prefixlen = IPV6_MAX_BITLEN;
p.u.prefix6 = nexthop;
dest1 = bgp_node_match(peer->bgp->connected_table[AFI_IP6], &p);
if (!dest1)
return false;
p.family = AF_INET6;
p.prefixlen = IPV6_MAX_BITLEN;
p.u.prefix6 = peer->connection->su.sin6.sin6_addr;
dest2 = bgp_node_match(peer->bgp->connected_table[AFI_IP6], &p);
if (!dest2) {
bgp_dest_unlock_node(dest1);
return false;
}
ret = (dest1 == dest2);
bgp_dest_unlock_node(dest1);
bgp_dest_unlock_node(dest2);
return ret;
}
bool bgp_subgrp_multiaccess_check_v6(struct in6_addr nexthop,
struct update_subgroup *subgrp,
struct peer *exclude)
{
struct bgp_dest *dest1 = NULL, *dest2 = NULL;
struct peer_af *paf = NULL;
struct prefix p = {0}, np = {0};
struct bgp *bgp = NULL;
np.family = AF_INET6;
np.prefixlen = IPV6_MAX_BITLEN;
np.u.prefix6 = nexthop;
p.family = AF_INET;
p.prefixlen = IPV6_MAX_BITLEN;
bgp = SUBGRP_INST(subgrp);
dest1 = bgp_node_match(bgp->connected_table[AFI_IP6], &np);
if (!dest1)
return false;
SUBGRP_FOREACH_PEER (subgrp, paf) {
/* Skip peer we're told to exclude - e.g., source of route. */
if (paf->peer == exclude)
continue;
p.u.prefix6 = paf->peer->connection->su.sin6.sin6_addr;
dest2 = bgp_node_match(bgp->connected_table[AFI_IP6], &p);
if (dest1 == dest2) {
bgp_dest_unlock_node(dest1);
bgp_dest_unlock_node(dest2);
return true;
}
if (dest2)
bgp_dest_unlock_node(dest2);
}
bgp_dest_unlock_node(dest1);
return false;
}
bool bgp_subgrp_multiaccess_check_v4(struct in_addr nexthop,
struct update_subgroup *subgrp,
struct peer *exclude)
{
struct bgp_dest *dest1, *dest2;
struct peer_af *paf;
struct prefix p, np;
struct bgp *bgp;
np.family = AF_INET;
np.prefixlen = IPV4_MAX_BITLEN;
np.u.prefix4 = nexthop;
p.family = AF_INET;
p.prefixlen = IPV4_MAX_BITLEN;
bgp = SUBGRP_INST(subgrp);
dest1 = bgp_node_match(bgp->connected_table[AFI_IP], &np);
if (!dest1)
return false;
SUBGRP_FOREACH_PEER (subgrp, paf) {
/* Skip peer we're told to exclude - e.g., source of route. */
if (paf->peer == exclude)
continue;
p.u.prefix4 = paf->peer->connection->su.sin.sin_addr;
dest2 = bgp_node_match(bgp->connected_table[AFI_IP], &p);
if (dest1 == dest2) {
bgp_dest_unlock_node(dest1);
bgp_dest_unlock_node(dest2);
return true;
}
if (dest2)
bgp_dest_unlock_node(dest2);
}
bgp_dest_unlock_node(dest1);
return false;
}
static void bgp_show_bgp_path_info_flags(uint32_t flags, json_object *json)
{
json_object *json_flags = NULL;
if (!json)
return;
json_flags = json_object_new_object();
json_object_boolean_add(json_flags, "igpChanged",
CHECK_FLAG(flags, BGP_PATH_IGP_CHANGED));
json_object_boolean_add(json_flags, "damped",
CHECK_FLAG(flags, BGP_PATH_DAMPED));
json_object_boolean_add(json_flags, "history",
CHECK_FLAG(flags, BGP_PATH_HISTORY));
json_object_boolean_add(json_flags, "bestpath",
CHECK_FLAG(flags, BGP_PATH_SELECTED));
json_object_boolean_add(json_flags, "valid",
CHECK_FLAG(flags, BGP_PATH_VALID));
json_object_boolean_add(json_flags, "attrChanged",
CHECK_FLAG(flags, BGP_PATH_ATTR_CHANGED));
json_object_boolean_add(json_flags, "deterministicMedCheck",
CHECK_FLAG(flags, BGP_PATH_DMED_CHECK));
json_object_boolean_add(json_flags, "deterministicMedSelected",
CHECK_FLAG(flags, BGP_PATH_DMED_SELECTED));
json_object_boolean_add(json_flags, "stale",
CHECK_FLAG(flags, BGP_PATH_STALE));
json_object_boolean_add(json_flags, "removed",
CHECK_FLAG(flags, BGP_PATH_REMOVED));
json_object_boolean_add(json_flags, "counted",
CHECK_FLAG(flags, BGP_PATH_COUNTED));
json_object_boolean_add(json_flags, "multipath",
CHECK_FLAG(flags, BGP_PATH_MULTIPATH));
json_object_boolean_add(json_flags, "multipathChanged",
CHECK_FLAG(flags, BGP_PATH_MULTIPATH_CHG));
json_object_boolean_add(json_flags, "ribAttributeChanged",
CHECK_FLAG(flags, BGP_PATH_RIB_ATTR_CHG));
json_object_boolean_add(json_flags, "nexthopSelf",
CHECK_FLAG(flags, BGP_PATH_ANNC_NH_SELF));
json_object_boolean_add(json_flags, "linkBandwidthChanged",
CHECK_FLAG(flags, BGP_PATH_LINK_BW_CHG));
json_object_boolean_add(json_flags, "acceptOwn",
CHECK_FLAG(flags, BGP_PATH_ACCEPT_OWN));
json_object_object_add(json, "flags", json_flags);
}
static void bgp_show_nexthop_paths(struct vty *vty, struct bgp *bgp,
struct bgp_nexthop_cache *bnc,
json_object *json)
{
struct bgp_dest *dest;
struct bgp_path_info *path;
afi_t afi;
safi_t safi;
struct bgp_table *table;
struct bgp *bgp_path;
json_object *paths = NULL;
json_object *json_path = NULL;
if (json)
paths = json_object_new_array();
else
vty_out(vty, " Paths:\n");
LIST_FOREACH (path, &(bnc->paths), nh_thread) {
dest = path->net;
assert(dest && bgp_dest_table(dest));
afi = family2afi(bgp_dest_get_prefix(dest)->family);
table = bgp_dest_table(dest);
safi = table->safi;
bgp_path = table->bgp;
bgpd: support for as notation format for route distinguisher RD may be built based on an AS number. Like for the AS, the RD may use the AS notation. The two below examples can illustrate: RD 1.1:20 stands for an AS4B:NN RD with AS4B=65536 in dot format. RD 0.1:20 stands for an AS2B:NNNN RD with AS2B=0.1 in dot+ format. This commit adds the asnotation mode to prefix_rd2str() API so as to pick up the relevant display. Two new printfrr extensions are available to display the RD with the two above display methods. - The pRDD extension stands for dot asnotation format - The pRDE extension stands for dot+ asnotation format. - The pRD extension has been renamed to pRDP extension The code is changed each time '%pRD' printf extension is called. Possibly, the asnotation may change the output, then a macro defines the asnotation mode to use. A side effect of forging the mode to use is that the string could not be concatenated with other strings in vty_out and snprintfrr. Those functions have been called multiple times. When zlog_debug needs to display the RD with some other string, the prefix_rd2str() old API is used instead of the printf extension. Some code has been kept untouched: - code related to running-config. Actually, wherever an RD is displayed, its configured name should be dumped. - bgp rfapi code - bgp evpn multihoming code (partially done), since the logic is missing to get the asnotation of 'struct bgp_evpn_es'. Signed-off-by: Philippe Guibert <philippe.guibert@6wind.com>
2022-11-22 09:57:10 +01:00
if (json) {
json_path = json_object_new_object();
json_object_string_add(json_path, "afi", afi2str(afi));
json_object_string_add(json_path, "safi",
safi2str(safi));
json_object_string_addf(json_path, "prefix", "%pBD",
dest);
if (dest->pdest)
json_object_string_addf(
bgpd: support for as notation format for route distinguisher RD may be built based on an AS number. Like for the AS, the RD may use the AS notation. The two below examples can illustrate: RD 1.1:20 stands for an AS4B:NN RD with AS4B=65536 in dot format. RD 0.1:20 stands for an AS2B:NNNN RD with AS2B=0.1 in dot+ format. This commit adds the asnotation mode to prefix_rd2str() API so as to pick up the relevant display. Two new printfrr extensions are available to display the RD with the two above display methods. - The pRDD extension stands for dot asnotation format - The pRDE extension stands for dot+ asnotation format. - The pRD extension has been renamed to pRDP extension The code is changed each time '%pRD' printf extension is called. Possibly, the asnotation may change the output, then a macro defines the asnotation mode to use. A side effect of forging the mode to use is that the string could not be concatenated with other strings in vty_out and snprintfrr. Those functions have been called multiple times. When zlog_debug needs to display the RD with some other string, the prefix_rd2str() old API is used instead of the printf extension. Some code has been kept untouched: - code related to running-config. Actually, wherever an RD is displayed, its configured name should be dumped. - bgp rfapi code - bgp evpn multihoming code (partially done), since the logic is missing to get the asnotation of 'struct bgp_evpn_es'. Signed-off-by: Philippe Guibert <philippe.guibert@6wind.com>
2022-11-22 09:57:10 +01:00
json_path, "rd",
BGP_RD_AS_FORMAT(bgp->asnotation),
(struct prefix_rd *)bgp_dest_get_prefix(
dest->pdest));
json_object_string_add(
json_path, "vrf",
vrf_id_to_name(bgp_path->vrf_id));
bgp_show_bgp_path_info_flags(path->flags, json_path);
json_object_array_add(paths, json_path);
continue;
}
bgpd: support for as notation format for route distinguisher RD may be built based on an AS number. Like for the AS, the RD may use the AS notation. The two below examples can illustrate: RD 1.1:20 stands for an AS4B:NN RD with AS4B=65536 in dot format. RD 0.1:20 stands for an AS2B:NNNN RD with AS2B=0.1 in dot+ format. This commit adds the asnotation mode to prefix_rd2str() API so as to pick up the relevant display. Two new printfrr extensions are available to display the RD with the two above display methods. - The pRDD extension stands for dot asnotation format - The pRDE extension stands for dot+ asnotation format. - The pRD extension has been renamed to pRDP extension The code is changed each time '%pRD' printf extension is called. Possibly, the asnotation may change the output, then a macro defines the asnotation mode to use. A side effect of forging the mode to use is that the string could not be concatenated with other strings in vty_out and snprintfrr. Those functions have been called multiple times. When zlog_debug needs to display the RD with some other string, the prefix_rd2str() old API is used instead of the printf extension. Some code has been kept untouched: - code related to running-config. Actually, wherever an RD is displayed, its configured name should be dumped. - bgp rfapi code - bgp evpn multihoming code (partially done), since the logic is missing to get the asnotation of 'struct bgp_evpn_es'. Signed-off-by: Philippe Guibert <philippe.guibert@6wind.com>
2022-11-22 09:57:10 +01:00
if (dest->pdest) {
vty_out(vty, " %d/%d %pBD RD ", afi, safi, dest);
vty_out(vty, BGP_RD_AS_FORMAT(bgp->asnotation),
(struct prefix_rd *)bgp_dest_get_prefix(
bgpd: support for as notation format for route distinguisher RD may be built based on an AS number. Like for the AS, the RD may use the AS notation. The two below examples can illustrate: RD 1.1:20 stands for an AS4B:NN RD with AS4B=65536 in dot format. RD 0.1:20 stands for an AS2B:NNNN RD with AS2B=0.1 in dot+ format. This commit adds the asnotation mode to prefix_rd2str() API so as to pick up the relevant display. Two new printfrr extensions are available to display the RD with the two above display methods. - The pRDD extension stands for dot asnotation format - The pRDE extension stands for dot+ asnotation format. - The pRD extension has been renamed to pRDP extension The code is changed each time '%pRD' printf extension is called. Possibly, the asnotation may change the output, then a macro defines the asnotation mode to use. A side effect of forging the mode to use is that the string could not be concatenated with other strings in vty_out and snprintfrr. Those functions have been called multiple times. When zlog_debug needs to display the RD with some other string, the prefix_rd2str() old API is used instead of the printf extension. Some code has been kept untouched: - code related to running-config. Actually, wherever an RD is displayed, its configured name should be dumped. - bgp rfapi code - bgp evpn multihoming code (partially done), since the logic is missing to get the asnotation of 'struct bgp_evpn_es'. Signed-off-by: Philippe Guibert <philippe.guibert@6wind.com>
2022-11-22 09:57:10 +01:00
dest->pdest));
vty_out(vty, " %s flags 0x%x\n", bgp_path->name_pretty,
path->flags);
} else
vty_out(vty, " %d/%d %pBD %s flags 0x%x\n",
afi, safi, dest, bgp_path->name_pretty, path->flags);
}
if (json)
json_object_object_add(json, "paths", paths);
}
static void bgp_show_nexthops_detail(struct vty *vty, struct bgp *bgp,
struct bgp_nexthop_cache *bnc,
json_object *json)
{
struct nexthop *nexthop;
json_object *json_gates = NULL;
json_object *json_gate = NULL;
if (json)
json_gates = json_object_new_array();
for (nexthop = bnc->nexthop; nexthop; nexthop = nexthop->next) {
if (json) {
json_gate = json_object_new_object();
switch (nexthop->type) {
case NEXTHOP_TYPE_IPV6:
json_object_string_addf(json_gate, "ip", "%pI6",
&nexthop->gate.ipv6);
break;
case NEXTHOP_TYPE_IPV6_IFINDEX:
json_object_string_addf(json_gate, "ip", "%pI6",
&nexthop->gate.ipv6);
json_object_string_add(
json_gate, "interfaceName",
ifindex2ifname(
bnc->ifindex_ipv6_ll
? bnc->ifindex_ipv6_ll
: nexthop->ifindex,
bgp->vrf_id));
json_object_int_add(json_gate, "ifindex",
bnc->ifindex_ipv6_ll ? bnc->ifindex_ipv6_ll
: nexthop->ifindex);
break;
case NEXTHOP_TYPE_IPV4:
json_object_string_addf(json_gate, "ip", "%pI4",
&nexthop->gate.ipv4);
break;
case NEXTHOP_TYPE_IFINDEX:
json_object_string_add(
json_gate, "interfaceName",
ifindex2ifname(
bnc->ifindex_ipv6_ll
? bnc->ifindex_ipv6_ll
: nexthop->ifindex,
bgp->vrf_id));
json_object_int_add(json_gate, "ifindex",
bnc->ifindex_ipv6_ll ? bnc->ifindex_ipv6_ll
: nexthop->ifindex);
break;
case NEXTHOP_TYPE_IPV4_IFINDEX:
json_object_string_addf(json_gate, "ip", "%pI4",
&nexthop->gate.ipv4);
json_object_string_add(
json_gate, "interfaceName",
ifindex2ifname(
bnc->ifindex_ipv6_ll
? bnc->ifindex_ipv6_ll
: nexthop->ifindex,
bgp->vrf_id));
json_object_int_add(json_gate, "ifindex",
bnc->ifindex_ipv6_ll ? bnc->ifindex_ipv6_ll
: nexthop->ifindex);
break;
case NEXTHOP_TYPE_BLACKHOLE:
json_object_boolean_true_add(json_gate,
"unreachable");
switch (nexthop->bh_type) {
case BLACKHOLE_REJECT:
json_object_boolean_true_add(json_gate,
"reject");
break;
case BLACKHOLE_ADMINPROHIB:
json_object_boolean_true_add(
json_gate, "adminProhibited");
break;
case BLACKHOLE_NULL:
json_object_boolean_true_add(
json_gate, "blackhole");
break;
case BLACKHOLE_UNSPEC:
break;
}
break;
default:
break;
}
json_object_array_add(json_gates, json_gate);
continue;
}
switch (nexthop->type) {
case NEXTHOP_TYPE_IPV6:
case NEXTHOP_TYPE_IPV6_IFINDEX:
vty_out(vty, " gate %pI6", &nexthop->gate.ipv6);
if (nexthop->type == NEXTHOP_TYPE_IPV6_IFINDEX &&
bnc->ifindex_ipv6_ll)
vty_out(vty, ", if %s, ifindex %d\n",
ifindex2ifname(bnc->ifindex_ipv6_ll, bgp->vrf_id),
bnc->ifindex_ipv6_ll);
else if (nexthop->ifindex)
vty_out(vty, ", if %s, ifindex %d\n",
ifindex2ifname(nexthop->ifindex, bgp->vrf_id),
nexthop->ifindex);
else
vty_out(vty, "\n");
break;
case NEXTHOP_TYPE_IPV4:
case NEXTHOP_TYPE_IPV4_IFINDEX:
vty_out(vty, " gate %pI4", &nexthop->gate.ipv4);
if (nexthop->type == NEXTHOP_TYPE_IPV4_IFINDEX &&
bnc->ifindex_ipv6_ll)
vty_out(vty, ", if %s, ifindex %d\n",
ifindex2ifname(bnc->ifindex_ipv6_ll, bgp->vrf_id),
bnc->ifindex_ipv6_ll);
else if (nexthop->ifindex)
vty_out(vty, ", if %s, ifindex %d\n",
ifindex2ifname(nexthop->ifindex, bgp->vrf_id),
nexthop->ifindex);
else
vty_out(vty, "\n");
break;
case NEXTHOP_TYPE_IFINDEX:
vty_out(vty, " if %s, ifindex %d\n",
ifindex2ifname(bnc->ifindex_ipv6_ll ? bnc->ifindex_ipv6_ll
: nexthop->ifindex,
bgp->vrf_id),
bnc->ifindex_ipv6_ll ? bnc->ifindex_ipv6_ll : nexthop->ifindex);
break;
case NEXTHOP_TYPE_BLACKHOLE:
vty_out(vty, " blackhole\n");
break;
default:
vty_out(vty, " invalid nexthop type %u\n",
nexthop->type);
}
}
if (json)
json_object_object_add(json, "nexthops", json_gates);
}
static void bgp_show_nexthop(struct vty *vty, struct bgp *bgp, struct bgp_nexthop_cache *bnc,
bool detail, bool uj)
{
char buf[PREFIX2STR_BUFFER];
time_t tbuf;
char timebuf[32];
struct peer *peer;
json_object *json_last_update = NULL;
json_object *json_nexthop = NULL;
peer = (struct peer *)bnc->nht_info;
if (uj)
json_nexthop = json_object_new_object();
if (bnc->srte_color) {
if (uj)
json_object_int_add(json_nexthop, "srteColor",
bnc->srte_color);
else
vty_out(vty, " SR-TE color %u -", bnc->srte_color);
}
inet_ntop(bnc->prefix.family, &bnc->prefix.u.prefix, buf, sizeof(buf));
if (CHECK_FLAG(bnc->flags, BGP_NEXTHOP_VALID)) {
if (uj) {
json_object_boolean_true_add(json_nexthop, "valid");
json_object_boolean_true_add(json_nexthop, "complete");
json_object_int_add(json_nexthop, "igpMetric",
bnc->metric);
json_object_int_add(json_nexthop, "pathCount",
bnc->path_count);
if (peer)
json_object_string_add(json_nexthop, "peer",
peer->host);
if (bnc->is_evpn_gwip_nexthop)
json_object_boolean_true_add(json_nexthop,
"isEvpnGatewayIp");
json_object_string_addf(json_nexthop, "resolvedPrefix", "%pFX",
&bnc->resolved_prefix);
} else {
vty_out(vty, " %s valid [IGP metric %d], #paths %d",
buf, bnc->metric, bnc->path_count);
if (peer)
vty_out(vty, ", peer %s", peer->host);
if (bnc->is_evpn_gwip_nexthop)
vty_out(vty, " EVPN Gateway IP");
vty_out(vty, "\n Resolved prefix %pFX",
&bnc->resolved_prefix);
vty_out(vty, "\n");
}
bgp_show_nexthops_detail(vty, bgp, bnc, json_nexthop);
bgpd: EVPN route type-5 to type-2 recursive resolution using gateway IP When EVPN prefix route with a gateway IP overlay index is imported into the IP vrf at the ingress PE, BGP nexthop of this route is set to the gateway IP. For this vrf route to be valid, following conditions must be met. - Gateway IP nexthop of this route should be L3 reachable, i.e., this route should be resolved in RIB. - A remote MAC/IP route should be present for the gateway IP address in the EVI(L2VPN table). To check for the first condition, gateway IP is registered with nht (nexthop tracking) to receive the reachability notifications for this IP from zebra RIB. If the gateway IP is reachable, zebra sends the reachability information (i.e., nexthop interface) for the gateway IP. This nexthop interface should be the SVI interface. Now, to find out type-2 route corresponding to the gateway IP, we need to fetch the VNI for the above SVI. To do this VNI lookup effitiently, define a hashtable of struct bgpevpn with svi_ifindex as key. struct hash *vni_svi_hash; An EVI instance is added to vni_svi_hash if its svi_ifindex is nonzero. Using this hash, we obtain struct bgpevpn corresponding to the gateway IP. For gateway IP overlay index recursive lookup, once we find the correct EVI, we have to lookup its route table for a MAC/IP prefix. As we have to iterate the entire route table for every lookup, this lookup is expensive. We can optimize this lookup by adding all the remote IP addresses in a hash table. Following hash table is defined for this purpose in struct bgpevpn Struct hash *remote_ip_hash; When a MAC/IP route is installed in the EVI table, it is also added to remote_ip_hash. It is possible to have multiple MAC/IP routes with the same IP address because of host move scenarios. Thus, for every address addr in remote_ip_hash, we maintain list of all the MAC/IP routes having addr as their IP address. Following structure defines an address in remote_ip_hash. struct evpn_remote_ip { struct ipaddr addr; struct list *macip_path_list; }; A Boolean field is added to struct bgp_nexthop_cache to indicate that the nexthop is EVPN gateway IP overlay index. bool is_evpn_gwip_nexthop; A flag BGP_NEXTHOP_EVPN_INCOMPLETE is added to struct bgp_nexthop_cache. This flag is set when the gateway IP is L3 reachable but not yet resolved by a MAC/IP route. Following table explains the combination of L3 and L2 reachability w.r.t. BGP_NEXTHOP_VALID and BGP_NEXTHOP_EVPN_INCOMPLETE flags * | MACIP resolved | MACIP unresolved *----------------|----------------|------------------ * L3 reachable | VALID = 1 | VALID = 0 * | INCOMPLETE = 0 | INCOMPLETE = 1 * ---------------|----------------|-------------------- * L3 unreachable | VALID = 0 | VALID = 0 * | INCOMPLETE = 0 | INCOMPLETE = 0 Procedure that we use to check if the gateway IP is resolvable by a MAC/IP route: - Find the EVI/L2VRF that belongs to the nexthop SVI using vni_svi_hash. - Check if the gateway IP is present in remote_ip_hash in this EVI. When the gateway IP is L3 reachable and it is also resolved by a MAC/IP route, unset BGP_NEXTHOP_EVPN_INCOMPLETE flag and set BGP_NEXTHOP_VALID flag. Signed-off-by: Ameya Dharkar <adharkar@vmware.com>
2021-01-11 12:51:56 +01:00
} else if (CHECK_FLAG(bnc->flags, BGP_NEXTHOP_EVPN_INCOMPLETE)) {
if (uj) {
json_object_boolean_true_add(json_nexthop, "valid");
json_object_boolean_false_add(json_nexthop, "complete");
json_object_int_add(json_nexthop, "igpMetric",
bnc->metric);
json_object_int_add(json_nexthop, "pathCount",
bnc->path_count);
if (bnc->is_evpn_gwip_nexthop)
json_object_boolean_true_add(json_nexthop,
"isEvpnGatewayIp");
} else {
vty_out(vty,
" %s overlay index unresolved [IGP metric %d], #paths %d",
buf, bnc->metric, bnc->path_count);
if (bnc->is_evpn_gwip_nexthop)
vty_out(vty, " EVPN Gateway IP");
vty_out(vty, "\n");
}
bgp_show_nexthops_detail(vty, bgp, bnc, json_nexthop);
} else {
if (uj) {
json_object_boolean_false_add(json_nexthop, "valid");
json_object_boolean_false_add(json_nexthop, "complete");
json_object_int_add(json_nexthop, "pathCount",
bnc->path_count);
if (peer)
json_object_string_add(json_nexthop, "peer",
peer->host);
if (bnc->is_evpn_gwip_nexthop)
json_object_boolean_true_add(json_nexthop,
"isEvpnGatewayIp");
if (CHECK_FLAG(bnc->flags, BGP_NEXTHOP_CONNECTED))
json_object_boolean_false_add(json_nexthop,
"isConnected");
if (!CHECK_FLAG(bnc->flags, BGP_NEXTHOP_REGISTERED))
json_object_boolean_false_add(json_nexthop,
"isRegistered");
} else {
vty_out(vty, " %s invalid, #paths %d", buf,
bnc->path_count);
if (peer)
vty_out(vty, ", peer %s", peer->host);
if (bnc->is_evpn_gwip_nexthop)
vty_out(vty, " EVPN Gateway IP");
vty_out(vty, "\n");
if (CHECK_FLAG(bnc->flags, BGP_NEXTHOP_CONNECTED))
vty_out(vty, " Must be Connected\n");
if (!CHECK_FLAG(bnc->flags, BGP_NEXTHOP_REGISTERED))
vty_out(vty, " Is not Registered\n");
}
}
tbuf = time(NULL) - (monotime(NULL) - bnc->last_update);
if (uj) {
if (detail) {
json_last_update = json_object_new_object();
json_object_int_add(json_last_update, "epoch", tbuf);
json_object_string_add(json_last_update, "string",
time_to_string_json(bnc->last_update, timebuf));
json_object_object_add(json_nexthop, "lastUpdate",
json_last_update);
} else {
json_object_int_add(json_nexthop, "lastUpdate", tbuf);
}
} else {
vty_out(vty, " Last update: %s", time_to_string(bnc->last_update, timebuf));
}
/* show paths dependent on nexthop, if needed. */
if (detail)
bgp_show_nexthop_paths(vty, bgp, bnc, json_nexthop);
if (uj) {
vty_out(vty, "\"%s\":", buf);
vty_json_no_pretty(vty, json_nexthop);
}
}
static void bgp_show_nexthops(struct vty *vty, struct bgp *bgp, bool import_table, bool uj,
afi_t afi, bool detail)
{
struct bgp_nexthop_cache *bnc;
struct bgp_nexthop_cache_head(*tree)[AFI_MAX];
bool found = false;
bool firstafi = true;
bool firstnh = true;
if (!uj) {
if (import_table)
vty_out(vty, "Current BGP import check cache:\n");
else
vty_out(vty, "Current BGP nexthop cache:\n");
}
if (import_table)
tree = &bgp->import_check_table;
else
tree = &bgp->nexthop_cache_table;
if (afi == AFI_IP || afi == AFI_IP6) {
if (uj)
vty_out(vty, "%s:{", (afi == AFI_IP) ? "\"ipv4\"" : "\"ipv6\"");
frr_each (bgp_nexthop_cache, &(*tree)[afi], bnc) {
if (uj)
vty_out(vty, "%s", firstnh ? "" : ",");
bgp_show_nexthop(vty, bgp, bnc, detail, uj);
found = true;
firstnh = false;
}
if (found && uj)
vty_out(vty, "}");
return;
}
for (afi = AFI_IP; afi < AFI_MAX; afi++) {
if (afi != AFI_IP && afi != AFI_IP6)
continue;
if (uj)
vty_out(vty, "%s%s:{", firstafi ? "" : ",",
(afi == AFI_IP) ? "\"ipv4\"" : "\"ipv6\"");
firstafi = false;
firstnh = true;
frr_each (bgp_nexthop_cache, &(*tree)[afi], bnc) {
if (uj)
vty_out(vty, "%s", firstnh ? "" : ",");
bgp_show_nexthop(vty, bgp, bnc, detail, uj);
firstnh = false;
}
if (uj)
vty_out(vty, "}");
}
}
static int show_ip_bgp_nexthop_table(struct vty *vty, const char *name, const char *nhopip_str,
bool import_table, bool uj, afi_t afi, bool detail)
{
struct bgp *bgp;
if (name && !strmatch(name, VRF_DEFAULT_NAME))
bgp = bgp_lookup_by_name(name);
else
bgp = bgp_get_default();
if (!bgp) {
if (!uj)
vty_out(vty, "%% No such BGP instance exist\n");
return CMD_WARNING;
}
if (nhopip_str) {
struct prefix nhop;
struct bgp_nexthop_cache_head (*tree)[AFI_MAX];
struct bgp_nexthop_cache *bnc;
bool found = false;
if (!str2prefix(nhopip_str, &nhop)) {
if (!uj)
vty_out(vty, "nexthop address is malformed\n");
return CMD_WARNING;
}
tree = import_table ? &bgp->import_check_table
: &bgp->nexthop_cache_table;
if (uj)
vty_out(vty, "%s:{",
(family2afi(nhop.family) == AFI_IP) ? "\"ipv4\"" : "\"ipv6\"");
frr_each (bgp_nexthop_cache, &(*tree)[family2afi(nhop.family)],
bnc) {
if (prefix_cmp(&bnc->prefix, &nhop))
continue;
bgp_show_nexthop(vty, bgp, bnc, true, uj);
found = true;
}
if (!found && !uj)
vty_out(vty, "nexthop %s does not have entry\n",
nhopip_str);
if (uj)
vty_out(vty, "}");
} else
bgp_show_nexthops(vty, bgp, import_table, uj, afi, detail);
return CMD_SUCCESS;
}
static void bgp_show_all_instances_nexthops_vty(struct vty *vty, bool uj, afi_t afi, bool detail)
{
struct listnode *node, *nnode;
struct bgp *bgp;
const char *inst_name;
bool firstinst = true;
for (ALL_LIST_ELEMENTS(bm->bgp, node, nnode, bgp)) {
inst_name = (bgp->inst_type == BGP_INSTANCE_TYPE_DEFAULT)
? VRF_DEFAULT_NAME
: bgp->name;
if (uj)
vty_out(vty, "%s\"%s\":{", firstinst ? "" : ",", inst_name);
else
vty_out(vty, "\nInstance %s:\n", inst_name);
bgp_show_nexthops(vty, bgp, false, uj, afi, detail);
firstinst = false;
if (uj)
vty_out(vty, "}");
}
}
#include "bgpd/bgp_nexthop_clippy.c"
DEFPY (show_ip_bgp_nexthop,
show_ip_bgp_nexthop_cmd,
"show [ip] bgp [<view|vrf> VIEWVRFNAME$vrf] nexthop [<A.B.C.D|X:X::X:X>$nhop] [<ipv4$afi [A.B.C.D$nhop]|ipv6$afi [X:X::X:X$nhop]>] [detail$detail] [json$uj]",
SHOW_STR
IP_STR
BGP_STR
BGP_INSTANCE_HELP_STR
"BGP nexthop table\n"
"IPv4 nexthop address\n"
"IPv6 nexthop address\n"
"BGP nexthop IPv4 table\n"
"IPv4 nexthop address\n"
"BGP nexthop IPv6 table\n"
"IPv6 nexthop address\n"
"Show detailed information\n"
JSON_STR)
{
int rc = 0;
afi_t afiz = AFI_UNSPEC;
if (uj)
vty_out(vty, "{\n");
if (afi)
afiz = bgp_vty_afi_from_str(afi);
rc = show_ip_bgp_nexthop_table(vty, vrf, nhop_str, false, uj, afiz, detail);
if (uj)
vty_out(vty, "}\n");
return rc;
}
DEFPY (show_ip_bgp_import_check,
show_ip_bgp_import_check_cmd,
"show [ip] bgp [<view|vrf> VIEWVRFNAME$vrf] import-check-table [detail$detail] [json$uj]",
SHOW_STR
IP_STR
BGP_STR
BGP_INSTANCE_HELP_STR
"BGP import check table\n"
"Show detailed information\n"
JSON_STR)
{
int rc = 0;
if (uj)
vty_out(vty, "{\n");
rc = show_ip_bgp_nexthop_table(vty, vrf, NULL, true, uj, AFI_UNSPEC, detail);
if (uj)
vty_out(vty, "}\n");
return rc;
}
DEFPY (show_ip_bgp_instance_all_nexthop,
show_ip_bgp_instance_all_nexthop_cmd,
"show [ip] bgp <view|vrf> all nexthop [<ipv4|ipv6>$afi] [detail$detail] [json$uj]",
SHOW_STR
IP_STR
BGP_STR
BGP_INSTANCE_ALL_HELP_STR
"BGP nexthop table\n"
"BGP IPv4 nexthop table\n"
"BGP IPv6 nexthop table\n"
"Show detailed information\n"
JSON_STR)
{
afi_t afiz = AFI_UNSPEC;
if (uj)
vty_out(vty, "{");
if (afi)
afiz = bgp_vty_afi_from_str(afi);
bgp_show_all_instances_nexthops_vty(vty, uj, afiz, detail);
if (uj)
vty_out(vty, "}");
return CMD_SUCCESS;
}
void bgp_scan_init(struct bgp *bgp)
2002-12-13 21:15:29 +01:00
{
afi_t afi;
for (afi = AFI_IP; afi < AFI_MAX; afi++) {
bgp_nexthop_cache_init(&bgp->nexthop_cache_table[afi]);
bgp_nexthop_cache_init(&bgp->import_check_table[afi]);
bgp->connected_table[afi] = bgp_table_init(bgp, afi,
SAFI_UNICAST);
}
}
void bgp_scan_vty_init(void)
{
install_element(VIEW_NODE, &show_ip_bgp_nexthop_cmd);
install_element(VIEW_NODE, &show_ip_bgp_import_check_cmd);
install_element(VIEW_NODE, &show_ip_bgp_instance_all_nexthop_cmd);
2002-12-13 21:15:29 +01:00
}
[bgpd] Stability fixes including bugs 397, 492 I've spent the last several weeks working on stability fixes to bgpd. These patches fix all of the numerous crashes, assertion failures, memory leaks and memory stomping I could find. Valgrind was used extensively. Added new function bgp_exit() to help catch problems. If "debug bgp" is configured and bgpd exits with status of 0, statistics on remaining lib/memory.c allocations are printed to stderr. It is my hope that other developers will use this to stay on top of memory issues. Example questionable exit: bgpd: memstats: Current memory utilization in module LIB: bgpd: memstats: Link List : 6 bgpd: memstats: Link Node : 5 bgpd: memstats: Hash : 8 bgpd: memstats: Hash Bucket : 2 bgpd: memstats: Hash Index : 8 bgpd: memstats: Work queue : 3 bgpd: memstats: Work queue item : 2 bgpd: memstats: Work queue name string : 3 bgpd: memstats: Current memory utilization in module BGP: bgpd: memstats: BGP instance : 1 bgpd: memstats: BGP peer : 1 bgpd: memstats: BGP peer hostname : 1 bgpd: memstats: BGP attribute : 1 bgpd: memstats: BGP extra attributes : 1 bgpd: memstats: BGP aspath : 1 bgpd: memstats: BGP aspath str : 1 bgpd: memstats: BGP table : 24 bgpd: memstats: BGP node : 1 bgpd: memstats: BGP route : 1 bgpd: memstats: BGP synchronise : 8 bgpd: memstats: BGP Process queue : 1 bgpd: memstats: BGP node clear queue : 1 bgpd: memstats: NOTE: If configuration exists, utilization may be expected. Example clean exit: bgpd: memstats: No remaining tracked memory utilization. This patch fixes bug #397: "Invalid free in bgp_announce_check()". This patch fixes bug #492: "SIGBUS in bgpd/bgp_route.c: bgp_clear_route_node()". My apologies for not separating out these changes into individual patches. The complexity of doing so boggled what is left of my brain. I hope this is all still useful to the community. This code has been production tested, in non-route-server-client mode, on a linux 32-bit box and a 64-bit box. Release/reset functions, used by bgp_exit(), added to: bgpd/bgp_attr.c,h bgpd/bgp_community.c,h bgpd/bgp_dump.c,h bgpd/bgp_ecommunity.c,h bgpd/bgp_filter.c,h bgpd/bgp_nexthop.c,h bgpd/bgp_route.c,h lib/routemap.c,h File by file analysis: * bgpd/bgp_aspath.c: Prevent re-use of ashash after it is released. * bgpd/bgp_attr.c: #if removed uncalled cluster_dup(). * bgpd/bgp_clist.c,h: Allow community_list_terminate() to be called from bgp_exit(). * bgpd/bgp_filter.c: Fix aslist->name use without allocation check, and also fix memory leak. * bgpd/bgp_main.c: Created bgp_exit() exit routine. This function frees allocations made as part of bgpd initialization and, to some extent, configuration. If "debug bgp" is configured, memory stats are printed as described above. * bgpd/bgp_nexthop.c: zclient_new() already allocates stream for ibuf/obuf, so bgp_scan_init() shouldn't do it too. Also, made it so zlookup is global so bgp_exit() can use it. * bgpd/bgp_packet.c: bgp_capability_msg_parse() call to bgp_clear_route() adjusted to use new BGP_CLEAR_ROUTE_NORMAL flag. * bgpd/bgp_route.h: Correct reference counter "lock" to be signed. bgp_clear_route() now accepts a bgp_clear_route_type of either BGP_CLEAR_ROUTE_NORMAL or BGP_CLEAR_ROUTE_MY_RSCLIENT. * bgpd/bgp_route.c: - bgp_process_rsclient(): attr was being zero'ed and then bgp_attr_extra_free() was being called with it, even though it was never filled with valid data. - bgp_process_rsclient(): Make sure rsclient->group is not NULL before use. - bgp_processq_del(): Add call to bgp_table_unlock(). - bgp_process(): Add call to bgp_table_lock(). - bgp_update_rsclient(): memset clearing of new_attr not needed since declarationw with "= { 0 }" does it. memset was already commented out. - bgp_update_rsclient(): Fix screwed up misleading indentation. - bgp_withdraw_rsclient(): Fix screwed up misleading indentation. - bgp_clear_route_node(): Support BGP_CLEAR_ROUTE_MY_RSCLIENT. - bgp_clear_node_queue_del(): Add call to bgp_table_unlock() and also free struct bgp_clear_node_queue used for work item. - bgp_clear_node_complete(): Do peer_unlock() after BGP_EVENT_ADD() in case peer is released by peer_unlock() call. - bgp_clear_route_table(): Support BGP_CLEAR_ROUTE_MY_RSCLIENT. Use struct bgp_clear_node_queue to supply data to worker. Add call to bgp_table_lock(). - bgp_clear_route(): Add support for BGP_CLEAR_ROUTE_NORMAL or BGP_CLEAR_ROUTE_MY_RSCLIENT. - bgp_clear_route_all(): Use BGP_CLEAR_ROUTE_NORMAL. Bug 397 fixes: - bgp_default_originate() - bgp_announce_table() * bgpd/bgp_table.h: - struct bgp_table: Added reference count. Changed type of owner to be "struct peer *" rather than "void *". - struct bgp_node: Correct reference counter "lock" to be signed. * bgpd/bgp_table.c: - Added bgp_table reference counting. - bgp_table_free(): Fixed cleanup code. Call peer_unlock() on owner if set. - bgp_unlock_node(): Added assertion. - bgp_node_get(): Added call to bgp_lock_node() to code path that it was missing from. * bgpd/bgp_vty.c: - peer_rsclient_set_vty(): Call peer_lock() as part of peer assignment to owner. Handle failure gracefully. - peer_rsclient_unset_vty(): Add call to bgp_clear_route() with BGP_CLEAR_ROUTE_MY_RSCLIENT purpose. * bgpd/bgp_zebra.c: Made it so zclient is global so bgp_exit() can use it. * bgpd/bgpd.c: - peer_lock(): Allow to be called when status is "Deleted". - peer_deactivate(): Supply BGP_CLEAR_ROUTE_NORMAL purpose to bgp_clear_route() call. - peer_delete(): Common variable listnode pn. Fix bug in which rsclient was only dealt with if not part of a peer group. Call bgp_clear_route() for rsclient, if appropriate, and do so with BGP_CLEAR_ROUTE_MY_RSCLIENT purpose. - peer_group_get(): Use XSTRDUP() instead of strdup() for conf->host. - peer_group_bind(): Call bgp_clear_route() for rsclient, and do so with BGP_CLEAR_ROUTE_MY_RSCLIENT purpose. - bgp_create(): Use XSTRDUP() instead of strdup() for peer_self->host. - bgp_delete(): Delete peers before groups, rather than after. And then rather than deleting rsclients, verify that there are none at this point. - bgp_unlock(): Add assertion. - bgp_free(): Call bgp_table_finish() rather than doing XFREE() itself. * lib/command.c,h: Compiler warning fixes. Add cmd_terminate(). Fixed massive leak in install_element() in which cmd_make_descvec() was being called more than once for the same cmd->strvec/string/doc. * lib/log.c: Make closezlog() check fp before calling fclose(). * lib/memory.c: Catch when alloc count goes negative by using signed counts. Correct #endif comment. Add log_memstats_stderr(). * lib/memory.h: Add log_memstats_stderr(). * lib/thread.c: thread->funcname was being accessed in thread_call() after it had been freed. Rearranged things so that thread_call() frees funcname. Also made it so thread_master_free() cleans up cpu_record. * lib/vty.c,h: Use global command_cr. Add vty_terminate(). * lib/zclient.c,h: Re-enable zclient_free().
2009-07-18 07:44:03 +02:00
void bgp_scan_finish(struct bgp *bgp)
[bgpd] Stability fixes including bugs 397, 492 I've spent the last several weeks working on stability fixes to bgpd. These patches fix all of the numerous crashes, assertion failures, memory leaks and memory stomping I could find. Valgrind was used extensively. Added new function bgp_exit() to help catch problems. If "debug bgp" is configured and bgpd exits with status of 0, statistics on remaining lib/memory.c allocations are printed to stderr. It is my hope that other developers will use this to stay on top of memory issues. Example questionable exit: bgpd: memstats: Current memory utilization in module LIB: bgpd: memstats: Link List : 6 bgpd: memstats: Link Node : 5 bgpd: memstats: Hash : 8 bgpd: memstats: Hash Bucket : 2 bgpd: memstats: Hash Index : 8 bgpd: memstats: Work queue : 3 bgpd: memstats: Work queue item : 2 bgpd: memstats: Work queue name string : 3 bgpd: memstats: Current memory utilization in module BGP: bgpd: memstats: BGP instance : 1 bgpd: memstats: BGP peer : 1 bgpd: memstats: BGP peer hostname : 1 bgpd: memstats: BGP attribute : 1 bgpd: memstats: BGP extra attributes : 1 bgpd: memstats: BGP aspath : 1 bgpd: memstats: BGP aspath str : 1 bgpd: memstats: BGP table : 24 bgpd: memstats: BGP node : 1 bgpd: memstats: BGP route : 1 bgpd: memstats: BGP synchronise : 8 bgpd: memstats: BGP Process queue : 1 bgpd: memstats: BGP node clear queue : 1 bgpd: memstats: NOTE: If configuration exists, utilization may be expected. Example clean exit: bgpd: memstats: No remaining tracked memory utilization. This patch fixes bug #397: "Invalid free in bgp_announce_check()". This patch fixes bug #492: "SIGBUS in bgpd/bgp_route.c: bgp_clear_route_node()". My apologies for not separating out these changes into individual patches. The complexity of doing so boggled what is left of my brain. I hope this is all still useful to the community. This code has been production tested, in non-route-server-client mode, on a linux 32-bit box and a 64-bit box. Release/reset functions, used by bgp_exit(), added to: bgpd/bgp_attr.c,h bgpd/bgp_community.c,h bgpd/bgp_dump.c,h bgpd/bgp_ecommunity.c,h bgpd/bgp_filter.c,h bgpd/bgp_nexthop.c,h bgpd/bgp_route.c,h lib/routemap.c,h File by file analysis: * bgpd/bgp_aspath.c: Prevent re-use of ashash after it is released. * bgpd/bgp_attr.c: #if removed uncalled cluster_dup(). * bgpd/bgp_clist.c,h: Allow community_list_terminate() to be called from bgp_exit(). * bgpd/bgp_filter.c: Fix aslist->name use without allocation check, and also fix memory leak. * bgpd/bgp_main.c: Created bgp_exit() exit routine. This function frees allocations made as part of bgpd initialization and, to some extent, configuration. If "debug bgp" is configured, memory stats are printed as described above. * bgpd/bgp_nexthop.c: zclient_new() already allocates stream for ibuf/obuf, so bgp_scan_init() shouldn't do it too. Also, made it so zlookup is global so bgp_exit() can use it. * bgpd/bgp_packet.c: bgp_capability_msg_parse() call to bgp_clear_route() adjusted to use new BGP_CLEAR_ROUTE_NORMAL flag. * bgpd/bgp_route.h: Correct reference counter "lock" to be signed. bgp_clear_route() now accepts a bgp_clear_route_type of either BGP_CLEAR_ROUTE_NORMAL or BGP_CLEAR_ROUTE_MY_RSCLIENT. * bgpd/bgp_route.c: - bgp_process_rsclient(): attr was being zero'ed and then bgp_attr_extra_free() was being called with it, even though it was never filled with valid data. - bgp_process_rsclient(): Make sure rsclient->group is not NULL before use. - bgp_processq_del(): Add call to bgp_table_unlock(). - bgp_process(): Add call to bgp_table_lock(). - bgp_update_rsclient(): memset clearing of new_attr not needed since declarationw with "= { 0 }" does it. memset was already commented out. - bgp_update_rsclient(): Fix screwed up misleading indentation. - bgp_withdraw_rsclient(): Fix screwed up misleading indentation. - bgp_clear_route_node(): Support BGP_CLEAR_ROUTE_MY_RSCLIENT. - bgp_clear_node_queue_del(): Add call to bgp_table_unlock() and also free struct bgp_clear_node_queue used for work item. - bgp_clear_node_complete(): Do peer_unlock() after BGP_EVENT_ADD() in case peer is released by peer_unlock() call. - bgp_clear_route_table(): Support BGP_CLEAR_ROUTE_MY_RSCLIENT. Use struct bgp_clear_node_queue to supply data to worker. Add call to bgp_table_lock(). - bgp_clear_route(): Add support for BGP_CLEAR_ROUTE_NORMAL or BGP_CLEAR_ROUTE_MY_RSCLIENT. - bgp_clear_route_all(): Use BGP_CLEAR_ROUTE_NORMAL. Bug 397 fixes: - bgp_default_originate() - bgp_announce_table() * bgpd/bgp_table.h: - struct bgp_table: Added reference count. Changed type of owner to be "struct peer *" rather than "void *". - struct bgp_node: Correct reference counter "lock" to be signed. * bgpd/bgp_table.c: - Added bgp_table reference counting. - bgp_table_free(): Fixed cleanup code. Call peer_unlock() on owner if set. - bgp_unlock_node(): Added assertion. - bgp_node_get(): Added call to bgp_lock_node() to code path that it was missing from. * bgpd/bgp_vty.c: - peer_rsclient_set_vty(): Call peer_lock() as part of peer assignment to owner. Handle failure gracefully. - peer_rsclient_unset_vty(): Add call to bgp_clear_route() with BGP_CLEAR_ROUTE_MY_RSCLIENT purpose. * bgpd/bgp_zebra.c: Made it so zclient is global so bgp_exit() can use it. * bgpd/bgpd.c: - peer_lock(): Allow to be called when status is "Deleted". - peer_deactivate(): Supply BGP_CLEAR_ROUTE_NORMAL purpose to bgp_clear_route() call. - peer_delete(): Common variable listnode pn. Fix bug in which rsclient was only dealt with if not part of a peer group. Call bgp_clear_route() for rsclient, if appropriate, and do so with BGP_CLEAR_ROUTE_MY_RSCLIENT purpose. - peer_group_get(): Use XSTRDUP() instead of strdup() for conf->host. - peer_group_bind(): Call bgp_clear_route() for rsclient, and do so with BGP_CLEAR_ROUTE_MY_RSCLIENT purpose. - bgp_create(): Use XSTRDUP() instead of strdup() for peer_self->host. - bgp_delete(): Delete peers before groups, rather than after. And then rather than deleting rsclients, verify that there are none at this point. - bgp_unlock(): Add assertion. - bgp_free(): Call bgp_table_finish() rather than doing XFREE() itself. * lib/command.c,h: Compiler warning fixes. Add cmd_terminate(). Fixed massive leak in install_element() in which cmd_make_descvec() was being called more than once for the same cmd->strvec/string/doc. * lib/log.c: Make closezlog() check fp before calling fclose(). * lib/memory.c: Catch when alloc count goes negative by using signed counts. Correct #endif comment. Add log_memstats_stderr(). * lib/memory.h: Add log_memstats_stderr(). * lib/thread.c: thread->funcname was being accessed in thread_call() after it had been freed. Rearranged things so that thread_call() frees funcname. Also made it so thread_master_free() cleans up cpu_record. * lib/vty.c,h: Use global command_cr. Add vty_terminate(). * lib/zclient.c,h: Re-enable zclient_free().
2009-07-18 07:44:03 +02:00
{
afi_t afi;
for (afi = AFI_IP; afi < AFI_MAX; afi++) {
/* Only the current one needs to be reset. */
bgp_nexthop_cache_reset(&bgp->nexthop_cache_table[afi]);
bgp_nexthop_cache_reset(&bgp->import_check_table[afi]);
[bgpd] Stability fixes including bugs 397, 492 I've spent the last several weeks working on stability fixes to bgpd. These patches fix all of the numerous crashes, assertion failures, memory leaks and memory stomping I could find. Valgrind was used extensively. Added new function bgp_exit() to help catch problems. If "debug bgp" is configured and bgpd exits with status of 0, statistics on remaining lib/memory.c allocations are printed to stderr. It is my hope that other developers will use this to stay on top of memory issues. Example questionable exit: bgpd: memstats: Current memory utilization in module LIB: bgpd: memstats: Link List : 6 bgpd: memstats: Link Node : 5 bgpd: memstats: Hash : 8 bgpd: memstats: Hash Bucket : 2 bgpd: memstats: Hash Index : 8 bgpd: memstats: Work queue : 3 bgpd: memstats: Work queue item : 2 bgpd: memstats: Work queue name string : 3 bgpd: memstats: Current memory utilization in module BGP: bgpd: memstats: BGP instance : 1 bgpd: memstats: BGP peer : 1 bgpd: memstats: BGP peer hostname : 1 bgpd: memstats: BGP attribute : 1 bgpd: memstats: BGP extra attributes : 1 bgpd: memstats: BGP aspath : 1 bgpd: memstats: BGP aspath str : 1 bgpd: memstats: BGP table : 24 bgpd: memstats: BGP node : 1 bgpd: memstats: BGP route : 1 bgpd: memstats: BGP synchronise : 8 bgpd: memstats: BGP Process queue : 1 bgpd: memstats: BGP node clear queue : 1 bgpd: memstats: NOTE: If configuration exists, utilization may be expected. Example clean exit: bgpd: memstats: No remaining tracked memory utilization. This patch fixes bug #397: "Invalid free in bgp_announce_check()". This patch fixes bug #492: "SIGBUS in bgpd/bgp_route.c: bgp_clear_route_node()". My apologies for not separating out these changes into individual patches. The complexity of doing so boggled what is left of my brain. I hope this is all still useful to the community. This code has been production tested, in non-route-server-client mode, on a linux 32-bit box and a 64-bit box. Release/reset functions, used by bgp_exit(), added to: bgpd/bgp_attr.c,h bgpd/bgp_community.c,h bgpd/bgp_dump.c,h bgpd/bgp_ecommunity.c,h bgpd/bgp_filter.c,h bgpd/bgp_nexthop.c,h bgpd/bgp_route.c,h lib/routemap.c,h File by file analysis: * bgpd/bgp_aspath.c: Prevent re-use of ashash after it is released. * bgpd/bgp_attr.c: #if removed uncalled cluster_dup(). * bgpd/bgp_clist.c,h: Allow community_list_terminate() to be called from bgp_exit(). * bgpd/bgp_filter.c: Fix aslist->name use without allocation check, and also fix memory leak. * bgpd/bgp_main.c: Created bgp_exit() exit routine. This function frees allocations made as part of bgpd initialization and, to some extent, configuration. If "debug bgp" is configured, memory stats are printed as described above. * bgpd/bgp_nexthop.c: zclient_new() already allocates stream for ibuf/obuf, so bgp_scan_init() shouldn't do it too. Also, made it so zlookup is global so bgp_exit() can use it. * bgpd/bgp_packet.c: bgp_capability_msg_parse() call to bgp_clear_route() adjusted to use new BGP_CLEAR_ROUTE_NORMAL flag. * bgpd/bgp_route.h: Correct reference counter "lock" to be signed. bgp_clear_route() now accepts a bgp_clear_route_type of either BGP_CLEAR_ROUTE_NORMAL or BGP_CLEAR_ROUTE_MY_RSCLIENT. * bgpd/bgp_route.c: - bgp_process_rsclient(): attr was being zero'ed and then bgp_attr_extra_free() was being called with it, even though it was never filled with valid data. - bgp_process_rsclient(): Make sure rsclient->group is not NULL before use. - bgp_processq_del(): Add call to bgp_table_unlock(). - bgp_process(): Add call to bgp_table_lock(). - bgp_update_rsclient(): memset clearing of new_attr not needed since declarationw with "= { 0 }" does it. memset was already commented out. - bgp_update_rsclient(): Fix screwed up misleading indentation. - bgp_withdraw_rsclient(): Fix screwed up misleading indentation. - bgp_clear_route_node(): Support BGP_CLEAR_ROUTE_MY_RSCLIENT. - bgp_clear_node_queue_del(): Add call to bgp_table_unlock() and also free struct bgp_clear_node_queue used for work item. - bgp_clear_node_complete(): Do peer_unlock() after BGP_EVENT_ADD() in case peer is released by peer_unlock() call. - bgp_clear_route_table(): Support BGP_CLEAR_ROUTE_MY_RSCLIENT. Use struct bgp_clear_node_queue to supply data to worker. Add call to bgp_table_lock(). - bgp_clear_route(): Add support for BGP_CLEAR_ROUTE_NORMAL or BGP_CLEAR_ROUTE_MY_RSCLIENT. - bgp_clear_route_all(): Use BGP_CLEAR_ROUTE_NORMAL. Bug 397 fixes: - bgp_default_originate() - bgp_announce_table() * bgpd/bgp_table.h: - struct bgp_table: Added reference count. Changed type of owner to be "struct peer *" rather than "void *". - struct bgp_node: Correct reference counter "lock" to be signed. * bgpd/bgp_table.c: - Added bgp_table reference counting. - bgp_table_free(): Fixed cleanup code. Call peer_unlock() on owner if set. - bgp_unlock_node(): Added assertion. - bgp_node_get(): Added call to bgp_lock_node() to code path that it was missing from. * bgpd/bgp_vty.c: - peer_rsclient_set_vty(): Call peer_lock() as part of peer assignment to owner. Handle failure gracefully. - peer_rsclient_unset_vty(): Add call to bgp_clear_route() with BGP_CLEAR_ROUTE_MY_RSCLIENT purpose. * bgpd/bgp_zebra.c: Made it so zclient is global so bgp_exit() can use it. * bgpd/bgpd.c: - peer_lock(): Allow to be called when status is "Deleted". - peer_deactivate(): Supply BGP_CLEAR_ROUTE_NORMAL purpose to bgp_clear_route() call. - peer_delete(): Common variable listnode pn. Fix bug in which rsclient was only dealt with if not part of a peer group. Call bgp_clear_route() for rsclient, if appropriate, and do so with BGP_CLEAR_ROUTE_MY_RSCLIENT purpose. - peer_group_get(): Use XSTRDUP() instead of strdup() for conf->host. - peer_group_bind(): Call bgp_clear_route() for rsclient, and do so with BGP_CLEAR_ROUTE_MY_RSCLIENT purpose. - bgp_create(): Use XSTRDUP() instead of strdup() for peer_self->host. - bgp_delete(): Delete peers before groups, rather than after. And then rather than deleting rsclients, verify that there are none at this point. - bgp_unlock(): Add assertion. - bgp_free(): Call bgp_table_finish() rather than doing XFREE() itself. * lib/command.c,h: Compiler warning fixes. Add cmd_terminate(). Fixed massive leak in install_element() in which cmd_make_descvec() was being called more than once for the same cmd->strvec/string/doc. * lib/log.c: Make closezlog() check fp before calling fclose(). * lib/memory.c: Catch when alloc count goes negative by using signed counts. Correct #endif comment. Add log_memstats_stderr(). * lib/memory.h: Add log_memstats_stderr(). * lib/thread.c: thread->funcname was being accessed in thread_call() after it had been freed. Rearranged things so that thread_call() frees funcname. Also made it so thread_master_free() cleans up cpu_record. * lib/vty.c,h: Use global command_cr. Add vty_terminate(). * lib/zclient.c,h: Re-enable zclient_free().
2009-07-18 07:44:03 +02:00
bgp->connected_table[afi]->route_table->cleanup =
bgp_connected_cleanup;
bgp_table_unlock(bgp->connected_table[afi]);
bgp->connected_table[afi] = NULL;
}
[bgpd] Stability fixes including bugs 397, 492 I've spent the last several weeks working on stability fixes to bgpd. These patches fix all of the numerous crashes, assertion failures, memory leaks and memory stomping I could find. Valgrind was used extensively. Added new function bgp_exit() to help catch problems. If "debug bgp" is configured and bgpd exits with status of 0, statistics on remaining lib/memory.c allocations are printed to stderr. It is my hope that other developers will use this to stay on top of memory issues. Example questionable exit: bgpd: memstats: Current memory utilization in module LIB: bgpd: memstats: Link List : 6 bgpd: memstats: Link Node : 5 bgpd: memstats: Hash : 8 bgpd: memstats: Hash Bucket : 2 bgpd: memstats: Hash Index : 8 bgpd: memstats: Work queue : 3 bgpd: memstats: Work queue item : 2 bgpd: memstats: Work queue name string : 3 bgpd: memstats: Current memory utilization in module BGP: bgpd: memstats: BGP instance : 1 bgpd: memstats: BGP peer : 1 bgpd: memstats: BGP peer hostname : 1 bgpd: memstats: BGP attribute : 1 bgpd: memstats: BGP extra attributes : 1 bgpd: memstats: BGP aspath : 1 bgpd: memstats: BGP aspath str : 1 bgpd: memstats: BGP table : 24 bgpd: memstats: BGP node : 1 bgpd: memstats: BGP route : 1 bgpd: memstats: BGP synchronise : 8 bgpd: memstats: BGP Process queue : 1 bgpd: memstats: BGP node clear queue : 1 bgpd: memstats: NOTE: If configuration exists, utilization may be expected. Example clean exit: bgpd: memstats: No remaining tracked memory utilization. This patch fixes bug #397: "Invalid free in bgp_announce_check()". This patch fixes bug #492: "SIGBUS in bgpd/bgp_route.c: bgp_clear_route_node()". My apologies for not separating out these changes into individual patches. The complexity of doing so boggled what is left of my brain. I hope this is all still useful to the community. This code has been production tested, in non-route-server-client mode, on a linux 32-bit box and a 64-bit box. Release/reset functions, used by bgp_exit(), added to: bgpd/bgp_attr.c,h bgpd/bgp_community.c,h bgpd/bgp_dump.c,h bgpd/bgp_ecommunity.c,h bgpd/bgp_filter.c,h bgpd/bgp_nexthop.c,h bgpd/bgp_route.c,h lib/routemap.c,h File by file analysis: * bgpd/bgp_aspath.c: Prevent re-use of ashash after it is released. * bgpd/bgp_attr.c: #if removed uncalled cluster_dup(). * bgpd/bgp_clist.c,h: Allow community_list_terminate() to be called from bgp_exit(). * bgpd/bgp_filter.c: Fix aslist->name use without allocation check, and also fix memory leak. * bgpd/bgp_main.c: Created bgp_exit() exit routine. This function frees allocations made as part of bgpd initialization and, to some extent, configuration. If "debug bgp" is configured, memory stats are printed as described above. * bgpd/bgp_nexthop.c: zclient_new() already allocates stream for ibuf/obuf, so bgp_scan_init() shouldn't do it too. Also, made it so zlookup is global so bgp_exit() can use it. * bgpd/bgp_packet.c: bgp_capability_msg_parse() call to bgp_clear_route() adjusted to use new BGP_CLEAR_ROUTE_NORMAL flag. * bgpd/bgp_route.h: Correct reference counter "lock" to be signed. bgp_clear_route() now accepts a bgp_clear_route_type of either BGP_CLEAR_ROUTE_NORMAL or BGP_CLEAR_ROUTE_MY_RSCLIENT. * bgpd/bgp_route.c: - bgp_process_rsclient(): attr was being zero'ed and then bgp_attr_extra_free() was being called with it, even though it was never filled with valid data. - bgp_process_rsclient(): Make sure rsclient->group is not NULL before use. - bgp_processq_del(): Add call to bgp_table_unlock(). - bgp_process(): Add call to bgp_table_lock(). - bgp_update_rsclient(): memset clearing of new_attr not needed since declarationw with "= { 0 }" does it. memset was already commented out. - bgp_update_rsclient(): Fix screwed up misleading indentation. - bgp_withdraw_rsclient(): Fix screwed up misleading indentation. - bgp_clear_route_node(): Support BGP_CLEAR_ROUTE_MY_RSCLIENT. - bgp_clear_node_queue_del(): Add call to bgp_table_unlock() and also free struct bgp_clear_node_queue used for work item. - bgp_clear_node_complete(): Do peer_unlock() after BGP_EVENT_ADD() in case peer is released by peer_unlock() call. - bgp_clear_route_table(): Support BGP_CLEAR_ROUTE_MY_RSCLIENT. Use struct bgp_clear_node_queue to supply data to worker. Add call to bgp_table_lock(). - bgp_clear_route(): Add support for BGP_CLEAR_ROUTE_NORMAL or BGP_CLEAR_ROUTE_MY_RSCLIENT. - bgp_clear_route_all(): Use BGP_CLEAR_ROUTE_NORMAL. Bug 397 fixes: - bgp_default_originate() - bgp_announce_table() * bgpd/bgp_table.h: - struct bgp_table: Added reference count. Changed type of owner to be "struct peer *" rather than "void *". - struct bgp_node: Correct reference counter "lock" to be signed. * bgpd/bgp_table.c: - Added bgp_table reference counting. - bgp_table_free(): Fixed cleanup code. Call peer_unlock() on owner if set. - bgp_unlock_node(): Added assertion. - bgp_node_get(): Added call to bgp_lock_node() to code path that it was missing from. * bgpd/bgp_vty.c: - peer_rsclient_set_vty(): Call peer_lock() as part of peer assignment to owner. Handle failure gracefully. - peer_rsclient_unset_vty(): Add call to bgp_clear_route() with BGP_CLEAR_ROUTE_MY_RSCLIENT purpose. * bgpd/bgp_zebra.c: Made it so zclient is global so bgp_exit() can use it. * bgpd/bgpd.c: - peer_lock(): Allow to be called when status is "Deleted". - peer_deactivate(): Supply BGP_CLEAR_ROUTE_NORMAL purpose to bgp_clear_route() call. - peer_delete(): Common variable listnode pn. Fix bug in which rsclient was only dealt with if not part of a peer group. Call bgp_clear_route() for rsclient, if appropriate, and do so with BGP_CLEAR_ROUTE_MY_RSCLIENT purpose. - peer_group_get(): Use XSTRDUP() instead of strdup() for conf->host. - peer_group_bind(): Call bgp_clear_route() for rsclient, and do so with BGP_CLEAR_ROUTE_MY_RSCLIENT purpose. - bgp_create(): Use XSTRDUP() instead of strdup() for peer_self->host. - bgp_delete(): Delete peers before groups, rather than after. And then rather than deleting rsclients, verify that there are none at this point. - bgp_unlock(): Add assertion. - bgp_free(): Call bgp_table_finish() rather than doing XFREE() itself. * lib/command.c,h: Compiler warning fixes. Add cmd_terminate(). Fixed massive leak in install_element() in which cmd_make_descvec() was being called more than once for the same cmd->strvec/string/doc. * lib/log.c: Make closezlog() check fp before calling fclose(). * lib/memory.c: Catch when alloc count goes negative by using signed counts. Correct #endif comment. Add log_memstats_stderr(). * lib/memory.h: Add log_memstats_stderr(). * lib/thread.c: thread->funcname was being accessed in thread_call() after it had been freed. Rearranged things so that thread_call() frees funcname. Also made it so thread_master_free() cleans up cpu_record. * lib/vty.c,h: Use global command_cr. Add vty_terminate(). * lib/zclient.c,h: Re-enable zclient_free().
2009-07-18 07:44:03 +02:00
}
char *bgp_nexthop_dump_bnc_flags(struct bgp_nexthop_cache *bnc, char *buf,
size_t len)
{
if (bnc->flags == 0) {
snprintfrr(buf, len, "None ");
return buf;
}
snprintfrr(buf, len, "%s%s%s%s%s%s%s",
CHECK_FLAG(bnc->flags, BGP_NEXTHOP_VALID) ? "Valid " : "",
CHECK_FLAG(bnc->flags, BGP_NEXTHOP_REGISTERED) ? "Reg " : "",
CHECK_FLAG(bnc->flags, BGP_NEXTHOP_CONNECTED) ? "Conn " : "",
CHECK_FLAG(bnc->flags, BGP_NEXTHOP_PEER_NOTIFIED) ? "Notify "
: "",
CHECK_FLAG(bnc->flags, BGP_STATIC_ROUTE) ? "Static " : "",
CHECK_FLAG(bnc->flags, BGP_STATIC_ROUTE_EXACT_MATCH)
? "Static Exact "
: "",
CHECK_FLAG(bnc->flags, BGP_NEXTHOP_LABELED_VALID)
? "Label Valid "
: "");
return buf;
}
char *bgp_nexthop_dump_bnc_change_flags(struct bgp_nexthop_cache *bnc,
char *buf, size_t len)
{
if (bnc->flags == 0) {
snprintfrr(buf, len, "None ");
return buf;
}
snprintfrr(buf, len, "%s%s",
CHECK_FLAG(bnc->change_flags, BGP_NEXTHOP_CHANGED) ? "Changed " : "",
CHECK_FLAG(bnc->change_flags, BGP_NEXTHOP_METRIC_CHANGED) ? "Metric " : "");
return buf;
}