bgpd: extend the NHT code to understand SR-TE colors

Extend the NHT code so that only the affected BGP routes are affected
whenever an SR-policy is updated on zebra.

Signed-off-by: Renato Westphal <renato@opensourcerouting.org>
This commit is contained in:
Renato Westphal 2020-08-26 14:39:33 -03:00 committed by GalaxyGorilla
parent ef3e0d0476
commit 545aeef1d1
3 changed files with 121 additions and 70 deletions

View file

@ -50,6 +50,11 @@ DEFINE_MTYPE_STATIC(BGPD, MARTIAN_STRING, "BGP Martian Address Intf String");
int bgp_nexthop_cache_compare(const struct bgp_nexthop_cache *a, int bgp_nexthop_cache_compare(const struct bgp_nexthop_cache *a,
const struct bgp_nexthop_cache *b) const struct bgp_nexthop_cache *b)
{ {
if (a->srte_color < b->srte_color)
return -1;
if (a->srte_color > b->srte_color)
return 1;
return prefix_cmp(&a->prefix, &b->prefix); return prefix_cmp(&a->prefix, &b->prefix);
} }
@ -64,13 +69,14 @@ void bnc_nexthop_free(struct bgp_nexthop_cache *bnc)
} }
struct bgp_nexthop_cache *bnc_new(struct bgp_nexthop_cache_head *tree, struct bgp_nexthop_cache *bnc_new(struct bgp_nexthop_cache_head *tree,
struct prefix *prefix) struct prefix *prefix, uint32_t srte_color)
{ {
struct bgp_nexthop_cache *bnc; struct bgp_nexthop_cache *bnc;
bnc = XCALLOC(MTYPE_BGP_NEXTHOP_CACHE, bnc = XCALLOC(MTYPE_BGP_NEXTHOP_CACHE,
sizeof(struct bgp_nexthop_cache)); sizeof(struct bgp_nexthop_cache));
bnc->prefix = *prefix; bnc->prefix = *prefix;
bnc->srte_color = srte_color;
bnc->tree = tree; bnc->tree = tree;
LIST_INIT(&(bnc->paths)); LIST_INIT(&(bnc->paths));
bgp_nexthop_cache_add(tree, bnc); bgp_nexthop_cache_add(tree, bnc);
@ -86,7 +92,7 @@ void bnc_free(struct bgp_nexthop_cache *bnc)
} }
struct bgp_nexthop_cache *bnc_find(struct bgp_nexthop_cache_head *tree, struct bgp_nexthop_cache *bnc_find(struct bgp_nexthop_cache_head *tree,
struct prefix *prefix) struct prefix *prefix, uint32_t srte_color)
{ {
struct bgp_nexthop_cache bnc = {}; struct bgp_nexthop_cache bnc = {};
@ -94,6 +100,7 @@ struct bgp_nexthop_cache *bnc_find(struct bgp_nexthop_cache_head *tree,
return NULL; return NULL;
bnc.prefix = *prefix; bnc.prefix = *prefix;
bnc.srte_color = srte_color;
return bgp_nexthop_cache_find(tree, &bnc); return bgp_nexthop_cache_find(tree, &bnc);
} }
@ -799,6 +806,8 @@ static void bgp_show_nexthop(struct vty *vty, struct bgp *bgp,
peer = (struct peer *)bnc->nht_info; peer = (struct peer *)bnc->nht_info;
if (bnc->srte_color)
vty_out(vty, " SR-TE color %u -", bnc->srte_color);
if (CHECK_FLAG(bnc->flags, BGP_NEXTHOP_VALID)) { if (CHECK_FLAG(bnc->flags, BGP_NEXTHOP_VALID)) {
vty_out(vty, " %s valid [IGP metric %d], #paths %d", vty_out(vty, " %s valid [IGP metric %d], #paths %d",
inet_ntop(bnc->prefix.family, &bnc->prefix.u.prefix, inet_ntop(bnc->prefix.family, &bnc->prefix.u.prefix,

View file

@ -70,6 +70,7 @@ struct bgp_nexthop_cache {
/* Back pointer to the cache tree this entry belongs to. */ /* Back pointer to the cache tree this entry belongs to. */
struct bgp_nexthop_cache_head *tree; struct bgp_nexthop_cache_head *tree;
uint32_t srte_color;
struct prefix prefix; struct prefix prefix;
void *nht_info; /* In BGP, peer session */ void *nht_info; /* In BGP, peer session */
LIST_HEAD(path_list, bgp_path_info) paths; LIST_HEAD(path_list, bgp_path_info) paths;
@ -115,10 +116,12 @@ extern bool bgp_nexthop_self(struct bgp *bgp, afi_t afi, uint8_t type,
uint8_t sub_type, struct attr *attr, uint8_t sub_type, struct attr *attr,
struct bgp_dest *dest); struct bgp_dest *dest);
extern struct bgp_nexthop_cache *bnc_new(struct bgp_nexthop_cache_head *tree, extern struct bgp_nexthop_cache *bnc_new(struct bgp_nexthop_cache_head *tree,
struct prefix *prefix); struct prefix *prefix,
uint32_t srte_color);
extern void bnc_free(struct bgp_nexthop_cache *bnc); extern void bnc_free(struct bgp_nexthop_cache *bnc);
extern struct bgp_nexthop_cache *bnc_find(struct bgp_nexthop_cache_head *tree, extern struct bgp_nexthop_cache *bnc_find(struct bgp_nexthop_cache_head *tree,
struct prefix *prefix); struct prefix *prefix,
uint32_t srte_color);
extern void bnc_nexthop_free(struct bgp_nexthop_cache *bnc); extern void bnc_nexthop_free(struct bgp_nexthop_cache *bnc);
extern const char *bnc_str(struct bgp_nexthop_cache *bnc, char *buf, int size); extern const char *bnc_str(struct bgp_nexthop_cache *bnc, char *buf, int size);
extern void bgp_scan_init(struct bgp *bgp); extern void bgp_scan_init(struct bgp *bgp);

View file

@ -72,9 +72,9 @@ static void bgp_unlink_nexthop_check(struct bgp_nexthop_cache *bnc)
if (LIST_EMPTY(&(bnc->paths)) && !bnc->nht_info) { if (LIST_EMPTY(&(bnc->paths)) && !bnc->nht_info) {
if (BGP_DEBUG(nht, NHT)) { if (BGP_DEBUG(nht, NHT)) {
char buf[PREFIX2STR_BUFFER]; char buf[PREFIX2STR_BUFFER];
zlog_debug("bgp_unlink_nexthop: freeing bnc %s(%s)", zlog_debug("bgp_unlink_nexthop: freeing bnc %s(%u)(%s)",
bnc_str(bnc, buf, PREFIX2STR_BUFFER), bnc_str(bnc, buf, PREFIX2STR_BUFFER),
bnc->bgp->name_pretty); bnc->srte_color, bnc->bgp->name_pretty);
} }
unregister_zebra_rnh(bnc, unregister_zebra_rnh(bnc,
CHECK_FLAG(bnc->flags, BGP_STATIC_ROUTE)); CHECK_FLAG(bnc->flags, BGP_STATIC_ROUTE));
@ -103,7 +103,7 @@ void bgp_unlink_nexthop_by_peer(struct peer *peer)
if (!sockunion2hostprefix(&peer->su, &p)) if (!sockunion2hostprefix(&peer->su, &p))
return; return;
bnc = bnc_find(&peer->bgp->nexthop_cache_table[afi], &p); bnc = bnc_find(&peer->bgp->nexthop_cache_table[afi], &p, 0);
if (!bnc) if (!bnc)
return; return;
@ -124,6 +124,7 @@ int bgp_find_or_add_nexthop(struct bgp *bgp_route, struct bgp *bgp_nexthop,
struct bgp_nexthop_cache_head *tree = NULL; struct bgp_nexthop_cache_head *tree = NULL;
struct bgp_nexthop_cache *bnc; struct bgp_nexthop_cache *bnc;
struct prefix p; struct prefix p;
uint32_t srte_color = 0;
int is_bgp_static_route = 0; int is_bgp_static_route = 0;
if (pi) { if (pi) {
@ -148,6 +149,8 @@ int bgp_find_or_add_nexthop(struct bgp *bgp_route, struct bgp *bgp_nexthop,
* addr */ * addr */
if (make_prefix(afi, pi, &p) < 0) if (make_prefix(afi, pi, &p) < 0)
return 1; return 1;
srte_color = pi->attr->srte_color;
} else if (peer) { } else if (peer) {
if (!sockunion2hostprefix(&peer->su, &p)) { if (!sockunion2hostprefix(&peer->su, &p)) {
if (BGP_DEBUG(nht, NHT)) { if (BGP_DEBUG(nht, NHT)) {
@ -165,16 +168,17 @@ int bgp_find_or_add_nexthop(struct bgp *bgp_route, struct bgp *bgp_nexthop,
else else
tree = &bgp_nexthop->nexthop_cache_table[afi]; tree = &bgp_nexthop->nexthop_cache_table[afi];
bnc = bnc_find(tree, &p); bnc = bnc_find(tree, &p, srte_color);
if (!bnc) { if (!bnc) {
bnc = bnc_new(tree, &p); bnc = bnc_new(tree, &p, srte_color);
bnc->bgp = bgp_nexthop; bnc->bgp = bgp_nexthop;
if (BGP_DEBUG(nht, NHT)) { if (BGP_DEBUG(nht, NHT)) {
char buf[PREFIX2STR_BUFFER]; char buf[PREFIX2STR_BUFFER];
zlog_debug("Allocated bnc %s(%s) peer %p", zlog_debug("Allocated bnc %s(%u)(%s) peer %p",
bnc_str(bnc, buf, PREFIX2STR_BUFFER), bnc_str(bnc, buf, PREFIX2STR_BUFFER),
bnc->bgp->name_pretty, peer); bnc->srte_color, bnc->bgp->name_pretty,
peer);
} }
} }
@ -266,7 +270,7 @@ void bgp_delete_connected_nexthop(afi_t afi, struct peer *peer)
return; return;
bnc = bnc_find(&peer->bgp->nexthop_cache_table[family2afi(p.family)], bnc = bnc_find(&peer->bgp->nexthop_cache_table[family2afi(p.family)],
&p); &p, 0);
if (!bnc) { if (!bnc) {
if (BGP_DEBUG(nht, NHT)) if (BGP_DEBUG(nht, NHT))
zlog_debug( zlog_debug(
@ -296,51 +300,14 @@ void bgp_delete_connected_nexthop(afi_t afi, struct peer *peer)
} }
} }
void bgp_parse_nexthop_update(int command, vrf_id_t vrf_id) static void bgp_process_nexthop_update(struct bgp_nexthop_cache *bnc,
struct zapi_route *nhr)
{ {
struct bgp_nexthop_cache_head *tree = NULL;
struct bgp_nexthop_cache *bnc;
struct nexthop *nexthop; struct nexthop *nexthop;
struct nexthop *oldnh; struct nexthop *oldnh;
struct nexthop *nhlist_head = NULL; struct nexthop *nhlist_head = NULL;
struct nexthop *nhlist_tail = NULL; struct nexthop *nhlist_tail = NULL;
int i; int i;
struct bgp *bgp;
struct zapi_route nhr;
bgp = bgp_lookup_by_vrf_id(vrf_id);
if (!bgp) {
flog_err(
EC_BGP_NH_UPD,
"parse nexthop update: instance not found for vrf_id %u",
vrf_id);
return;
}
if (!zapi_nexthop_update_decode(zclient->ibuf, &nhr)) {
if (BGP_DEBUG(nht, NHT))
zlog_debug("%s[%s]: Failure to decode nexthop update",
__func__, bgp->name_pretty);
return;
}
if (command == ZEBRA_NEXTHOP_UPDATE)
tree = &bgp->nexthop_cache_table[family2afi(nhr.prefix.family)];
else if (command == ZEBRA_IMPORT_CHECK_UPDATE)
tree = &bgp->import_check_table[family2afi(nhr.prefix.family)];
bnc = bnc_find(tree, &nhr.prefix);
if (!bnc) {
if (BGP_DEBUG(nht, NHT)) {
char buf[PREFIX2STR_BUFFER];
prefix2str(&nhr.prefix, buf, sizeof(buf));
zlog_debug(
"parse nexthop update(%s(%s)): bnc info not found",
buf, bgp->name_pretty);
}
return;
}
bnc->last_update = bgp_clock(); bnc->last_update = bgp_clock();
bnc->change_flags = 0; bnc->change_flags = 0;
@ -348,21 +315,21 @@ void bgp_parse_nexthop_update(int command, vrf_id_t vrf_id)
/* debug print the input */ /* debug print the input */
if (BGP_DEBUG(nht, NHT)) { if (BGP_DEBUG(nht, NHT)) {
char buf[PREFIX2STR_BUFFER]; char buf[PREFIX2STR_BUFFER];
prefix2str(&nhr.prefix, buf, sizeof(buf)); prefix2str(&nhr->prefix, buf, sizeof(buf));
zlog_debug( zlog_debug(
"%s(%u): Rcvd NH update %s - metric %d/%d #nhops %d/%d flags 0x%x", "%s(%u): Rcvd NH update %s(%u) - metric %d/%d #nhops %d/%d flags 0x%x",
bnc->bgp->name_pretty, vrf_id, buf, nhr.metric, bnc->bgp->name_pretty, bnc->bgp->vrf_id, buf,
bnc->metric, nhr.nexthop_num, bnc->nexthop_num, bnc->srte_color, nhr->metric, bnc->metric,
bnc->flags); nhr->nexthop_num, bnc->nexthop_num, bnc->flags);
} }
if (nhr.metric != bnc->metric) if (nhr->metric != bnc->metric)
bnc->change_flags |= BGP_NEXTHOP_METRIC_CHANGED; bnc->change_flags |= BGP_NEXTHOP_METRIC_CHANGED;
if (nhr.nexthop_num != bnc->nexthop_num) if (nhr->nexthop_num != bnc->nexthop_num)
bnc->change_flags |= BGP_NEXTHOP_CHANGED; bnc->change_flags |= BGP_NEXTHOP_CHANGED;
if (nhr.nexthop_num) { if (nhr->nexthop_num) {
struct peer *peer = bnc->nht_info; struct peer *peer = bnc->nht_info;
/* notify bgp fsm if nbr ip goes from invalid->valid */ /* notify bgp fsm if nbr ip goes from invalid->valid */
@ -370,15 +337,15 @@ void bgp_parse_nexthop_update(int command, vrf_id_t vrf_id)
UNSET_FLAG(bnc->flags, BGP_NEXTHOP_PEER_NOTIFIED); UNSET_FLAG(bnc->flags, BGP_NEXTHOP_PEER_NOTIFIED);
bnc->flags |= BGP_NEXTHOP_VALID; bnc->flags |= BGP_NEXTHOP_VALID;
bnc->metric = nhr.metric; bnc->metric = nhr->metric;
bnc->nexthop_num = nhr.nexthop_num; bnc->nexthop_num = nhr->nexthop_num;
bnc->flags &= ~BGP_NEXTHOP_LABELED_VALID; /* check below */ bnc->flags &= ~BGP_NEXTHOP_LABELED_VALID; /* check below */
for (i = 0; i < nhr.nexthop_num; i++) { for (i = 0; i < nhr->nexthop_num; i++) {
int num_labels = 0; int num_labels = 0;
nexthop = nexthop_from_zapi_nexthop(&nhr.nexthops[i]); nexthop = nexthop_from_zapi_nexthop(&nhr->nexthops[i]);
/* /*
* Turn on RA for the v6 nexthops * Turn on RA for the v6 nexthops
@ -388,7 +355,7 @@ void bgp_parse_nexthop_update(int command, vrf_id_t vrf_id)
if (peer && !peer->ifp if (peer && !peer->ifp
&& CHECK_FLAG(peer->flags, && CHECK_FLAG(peer->flags,
PEER_FLAG_CAPABILITY_ENHE) PEER_FLAG_CAPABILITY_ENHE)
&& nhr.prefix.family == AF_INET6 && nhr->prefix.family == AF_INET6
&& nexthop->type != NEXTHOP_TYPE_BLACKHOLE) { && nexthop->type != NEXTHOP_TYPE_BLACKHOLE) {
struct interface *ifp; struct interface *ifp;
@ -442,7 +409,7 @@ void bgp_parse_nexthop_update(int command, vrf_id_t vrf_id)
bnc->nexthop = nhlist_head; bnc->nexthop = nhlist_head;
} else { } else {
bnc->flags &= ~BGP_NEXTHOP_VALID; bnc->flags &= ~BGP_NEXTHOP_VALID;
bnc->nexthop_num = nhr.nexthop_num; bnc->nexthop_num = nhr->nexthop_num;
/* notify bgp fsm if nbr ip goes from valid->invalid */ /* notify bgp fsm if nbr ip goes from valid->invalid */
UNSET_FLAG(bnc->flags, BGP_NEXTHOP_PEER_NOTIFIED); UNSET_FLAG(bnc->flags, BGP_NEXTHOP_PEER_NOTIFIED);
@ -454,6 +421,77 @@ void bgp_parse_nexthop_update(int command, vrf_id_t vrf_id)
evaluate_paths(bnc); evaluate_paths(bnc);
} }
void bgp_parse_nexthop_update(int command, vrf_id_t vrf_id)
{
struct bgp_nexthop_cache_head *tree = NULL;
struct bgp_nexthop_cache *bnc;
struct bgp *bgp;
struct zapi_route nhr;
afi_t afi;
bgp = bgp_lookup_by_vrf_id(vrf_id);
if (!bgp) {
flog_err(
EC_BGP_NH_UPD,
"parse nexthop update: instance not found for vrf_id %u",
vrf_id);
return;
}
if (!zapi_nexthop_update_decode(zclient->ibuf, &nhr)) {
if (BGP_DEBUG(nht, NHT))
zlog_debug("%s[%s]: Failure to decode nexthop update",
__PRETTY_FUNCTION__, bgp->name_pretty);
return;
}
afi = family2afi(nhr.prefix.family);
if (command == ZEBRA_NEXTHOP_UPDATE)
tree = &bgp->nexthop_cache_table[afi];
else if (command == ZEBRA_IMPORT_CHECK_UPDATE)
tree = &bgp->import_check_table[afi];
bnc = bnc_find(tree, &nhr.prefix, nhr.srte_color);
if (!bnc) {
if (BGP_DEBUG(nht, NHT)) {
char buf[PREFIX2STR_BUFFER];
prefix2str(&nhr.prefix, buf, sizeof(buf));
zlog_debug(
"parse nexthop update(%s(%u)(%s)): bnc info not found",
buf, nhr.srte_color, bgp->name_pretty);
}
return;
}
bgp_process_nexthop_update(bnc, &nhr);
/*
* HACK: if any BGP route is dependant on an SR-policy that doesn't
* exist, zebra will never send NH updates relative to that policy. In
* that case, whenever we receive an update about a colorless NH, update
* the corresponding colorful NHs that share the same endpoint but that
* are inactive. This ugly hack should work around the problem at the
* cost of a performance pernalty. Long term, what should be done is to
* make zebra's RNH subsystem aware of SR-TE colors (like bgpd is),
* which should provide a better infrastructure to solve this issue in
* a more efficient and elegant way.
*/
if (nhr.srte_color == 0) {
struct bgp_nexthop_cache *bnc_iter;
frr_each (bgp_nexthop_cache, &bgp->nexthop_cache_table[afi],
bnc_iter) {
if (!prefix_same(&bnc->prefix, &bnc_iter->prefix)
|| bnc_iter->srte_color == 0
|| CHECK_FLAG(bnc_iter->flags, BGP_NEXTHOP_VALID))
continue;
bgp_process_nexthop_update(bnc_iter, &nhr);
}
}
}
/* /*
* Cleanup nexthop registration and status information for BGP nexthops * Cleanup nexthop registration and status information for BGP nexthops
* pertaining to this VRF. This is invoked upon VRF deletion. * pertaining to this VRF. This is invoked upon VRF deletion.
@ -667,8 +705,8 @@ static void evaluate_paths(struct bgp_nexthop_cache *bnc)
char buf[PREFIX2STR_BUFFER]; char buf[PREFIX2STR_BUFFER];
bnc_str(bnc, buf, PREFIX2STR_BUFFER); bnc_str(bnc, buf, PREFIX2STR_BUFFER);
zlog_debug( zlog_debug(
"NH update for %s %s flags 0x%x chgflags 0x%x - evaluate paths", "NH update for %s(%u)(%s) - flags 0x%x chgflags 0x%x - evaluate paths",
buf, bnc->bgp->name_pretty, bnc->flags, buf, bnc->srte_color, bnc->bgp->name_pretty, bnc->flags,
bnc->change_flags); bnc->change_flags);
} }
@ -756,7 +794,8 @@ static void evaluate_paths(struct bgp_nexthop_cache *bnc)
path->extra->igpmetric = 0; path->extra->igpmetric = 0;
if (CHECK_FLAG(bnc->change_flags, BGP_NEXTHOP_METRIC_CHANGED) if (CHECK_FLAG(bnc->change_flags, BGP_NEXTHOP_METRIC_CHANGED)
|| CHECK_FLAG(bnc->change_flags, BGP_NEXTHOP_CHANGED)) || CHECK_FLAG(bnc->change_flags, BGP_NEXTHOP_CHANGED)
|| path->attr->srte_color != 0)
SET_FLAG(path->flags, BGP_PATH_IGP_CHANGED); SET_FLAG(path->flags, BGP_PATH_IGP_CHANGED);
path_valid = !!CHECK_FLAG(path->flags, BGP_PATH_VALID); path_valid = !!CHECK_FLAG(path->flags, BGP_PATH_VALID);
@ -874,7 +913,7 @@ void bgp_nht_reg_enhe_cap_intfs(struct peer *peer)
if (p.family != AF_INET6) if (p.family != AF_INET6)
return; return;
bnc = bnc_find(&bgp->nexthop_cache_table[AFI_IP6], &p); bnc = bnc_find(&bgp->nexthop_cache_table[AFI_IP6], &p, 0);
if (!bnc) if (!bnc)
return; return;
@ -916,7 +955,7 @@ void bgp_nht_dereg_enhe_cap_intfs(struct peer *peer)
if (p.family != AF_INET6) if (p.family != AF_INET6)
return; return;
bnc = bnc_find(&bgp->nexthop_cache_table[AFI_IP6], &p); bnc = bnc_find(&bgp->nexthop_cache_table[AFI_IP6], &p, 0);
if (!bnc) if (!bnc)
return; return;