2023-02-08 13:17:09 +01:00
|
|
|
// SPDX-License-Identifier: GPL-2.0-or-later
|
2019-05-13 21:46:05 +02:00
|
|
|
/* Zebra Nexthop Group Code.
|
|
|
|
* Copyright (C) 2019 Cumulus Networks, Inc.
|
|
|
|
* Donald Sharp
|
|
|
|
* Stephen Worley
|
|
|
|
*/
|
|
|
|
#include <zebra.h>
|
|
|
|
|
|
|
|
#include "lib/nexthop.h"
|
2019-06-24 20:04:13 +02:00
|
|
|
#include "lib/nexthop_group_private.h"
|
2019-05-13 21:46:05 +02:00
|
|
|
#include "lib/routemap.h"
|
zebra: Append rparent labels when resolving nexthop
When resolving a nexthop, append its labels to the one its
resolving to along with the labels that may already be present there.
Before we were ignoring labels if the resolving level was greater than
two.
Before:
```
S> 2.2.2.2/32 [1/0] via 7.7.7.7 (recursive), label 2222, 00:00:07
* via 7.7.7.7, dummy1 onlink, label 1111, 00:00:07
S> 3.3.3.3/32 [1/0] via 2.2.2.2 (recursive), label 3333, 00:00:04
* via 7.7.7.7, dummy1 onlink, label 1111, 00:00:04
K>* 7.7.7.7/32 [0/0] is directly connected, dummy1, label 1111, 00:00:17
C>* 192.168.122.0/24 is directly connected, ens3, 00:00:17
K>* 192.168.122.1/32 [0/100] is directly connected, ens3, 00:00:17
ubuntu_nh#
```
This patch:
```
S> 2.2.2.2/32 [1/0] via 7.7.7.7 (recursive), label 2222, 00:00:04
* via 7.7.7.7, dummy1 onlink, label 1111/2222, 00:00:04
S> 3.3.3.3/32 [1/0] via 2.2.2.2 (recursive), label 3333, 00:00:02
* via 7.7.7.7, dummy1 onlink, label 1111/2222/3333, 00:00:02
K>* 7.7.7.7/32 [0/0] is directly connected, dummy1, label 1111, 00:00:11
C>* 192.168.122.0/24 is directly connected, ens3, 00:00:11
K>* 192.168.122.1/32 [0/100] is directly connected, ens3, 00:00:11
ubuntu_nh#
```
Signed-off-by: Stephen Worley <sworley@cumulusnetworks.com>
2019-08-08 00:40:36 +02:00
|
|
|
#include "lib/mpls.h"
|
2019-01-24 14:06:34 +01:00
|
|
|
#include "lib/jhash.h"
|
2019-03-15 17:23:51 +01:00
|
|
|
#include "lib/debug.h"
|
2020-07-20 13:43:54 +02:00
|
|
|
#include "lib/lib_errors.h"
|
2019-05-13 21:46:05 +02:00
|
|
|
|
|
|
|
#include "zebra/connected.h"
|
|
|
|
#include "zebra/debug.h"
|
|
|
|
#include "zebra/zebra_router.h"
|
2019-10-24 00:28:10 +02:00
|
|
|
#include "zebra/zebra_nhg_private.h"
|
2019-05-13 21:46:05 +02:00
|
|
|
#include "zebra/zebra_rnh.h"
|
|
|
|
#include "zebra/zebra_routemap.h"
|
2020-07-20 13:43:54 +02:00
|
|
|
#include "zebra/zebra_srte.h"
|
2019-03-15 17:23:51 +01:00
|
|
|
#include "zebra/zserv.h"
|
2019-05-13 21:46:05 +02:00
|
|
|
#include "zebra/rt.h"
|
2019-02-26 00:18:07 +01:00
|
|
|
#include "zebra_errors.h"
|
2019-05-14 02:10:34 +02:00
|
|
|
#include "zebra_dplane.h"
|
2019-05-14 18:53:19 +02:00
|
|
|
#include "zebra/interface.h"
|
2021-01-21 16:12:05 +01:00
|
|
|
#include "zebra/zapi_msg.h"
|
2022-03-29 16:55:34 +02:00
|
|
|
#include "zebra/rib.h"
|
2021-04-05 23:16:38 +02:00
|
|
|
#include "zebra/zebra_vxlan.h"
|
2019-02-26 00:18:07 +01:00
|
|
|
|
2019-03-15 17:23:51 +01:00
|
|
|
DEFINE_MTYPE_STATIC(ZEBRA, NHG, "Nexthop Group Entry");
|
2019-05-14 03:13:02 +02:00
|
|
|
DEFINE_MTYPE_STATIC(ZEBRA, NHG_CONNECTED, "Nexthop Group Connected");
|
2019-05-15 00:03:29 +02:00
|
|
|
DEFINE_MTYPE_STATIC(ZEBRA, NHG_CTX, "Nexthop Group Context");
|
2019-05-14 02:10:34 +02:00
|
|
|
|
2021-02-22 21:09:07 +01:00
|
|
|
/* Map backup nexthop indices between two nhes */
|
|
|
|
struct backup_nh_map_s {
|
|
|
|
int map_count;
|
|
|
|
|
|
|
|
struct {
|
|
|
|
uint8_t orig_idx;
|
|
|
|
uint8_t new_idx;
|
|
|
|
} map[MULTIPATH_NUM];
|
|
|
|
};
|
|
|
|
|
2019-08-01 20:07:04 +02:00
|
|
|
/* id counter to keep in sync with kernel */
|
|
|
|
uint32_t id_counter;
|
|
|
|
|
2021-02-22 21:06:28 +01:00
|
|
|
/* Controlled through ui */
|
2020-01-28 17:00:42 +01:00
|
|
|
static bool g_nexthops_enabled = true;
|
2020-07-29 19:11:37 +02:00
|
|
|
static bool proto_nexthops_only;
|
2021-02-22 21:06:28 +01:00
|
|
|
static bool use_recursive_backups = true;
|
2020-01-28 17:00:42 +01:00
|
|
|
|
2020-05-05 21:57:35 +02:00
|
|
|
static struct nhg_hash_entry *depends_find(const struct nexthop *nh, afi_t afi,
|
2020-10-12 23:31:36 +02:00
|
|
|
int type, bool from_dplane);
|
2019-07-24 18:27:40 +02:00
|
|
|
static void depends_add(struct nhg_connected_tree_head *head,
|
2019-07-17 19:19:56 +02:00
|
|
|
struct nhg_hash_entry *depend);
|
2019-08-01 20:07:04 +02:00
|
|
|
static struct nhg_hash_entry *
|
|
|
|
depends_find_add(struct nhg_connected_tree_head *head, struct nexthop *nh,
|
2020-10-12 23:31:36 +02:00
|
|
|
afi_t afi, int type, bool from_dplane);
|
2019-08-01 20:07:04 +02:00
|
|
|
static struct nhg_hash_entry *
|
|
|
|
depends_find_id_add(struct nhg_connected_tree_head *head, uint32_t id);
|
2019-07-24 18:27:40 +02:00
|
|
|
static void depends_decrement_free(struct nhg_connected_tree_head *head);
|
2019-05-14 02:10:34 +02:00
|
|
|
|
2019-12-24 20:22:03 +01:00
|
|
|
static struct nhg_backup_info *
|
|
|
|
nhg_backup_copy(const struct nhg_backup_info *orig);
|
|
|
|
|
2020-05-13 23:42:55 +02:00
|
|
|
/* Helper function for getting the next allocatable ID */
|
2020-07-29 19:11:37 +02:00
|
|
|
static uint32_t nhg_get_next_id(void)
|
2020-05-13 23:42:55 +02:00
|
|
|
{
|
|
|
|
while (1) {
|
|
|
|
id_counter++;
|
|
|
|
|
|
|
|
if (id_counter == ZEBRA_NHG_PROTO_LOWER) {
|
|
|
|
id_counter = 0;
|
|
|
|
continue;
|
|
|
|
}
|
|
|
|
|
2024-01-21 02:36:25 +01:00
|
|
|
if (!zebra_nhg_lookup_id(id_counter))
|
|
|
|
break;
|
2020-05-13 23:42:55 +02:00
|
|
|
}
|
|
|
|
|
|
|
|
return id_counter;
|
|
|
|
}
|
2019-05-15 00:03:29 +02:00
|
|
|
|
2019-10-24 00:28:10 +02:00
|
|
|
static void nhg_connected_free(struct nhg_connected *dep)
|
2019-05-14 02:10:34 +02:00
|
|
|
{
|
2019-05-14 03:13:02 +02:00
|
|
|
XFREE(MTYPE_NHG_CONNECTED, dep);
|
2019-05-14 02:10:34 +02:00
|
|
|
}
|
|
|
|
|
2019-10-24 00:28:10 +02:00
|
|
|
static struct nhg_connected *nhg_connected_new(struct nhg_hash_entry *nhe)
|
2019-05-14 02:10:34 +02:00
|
|
|
{
|
2019-05-14 03:13:02 +02:00
|
|
|
struct nhg_connected *new = NULL;
|
2019-05-14 02:10:34 +02:00
|
|
|
|
2019-05-14 03:13:02 +02:00
|
|
|
new = XCALLOC(MTYPE_NHG_CONNECTED, sizeof(struct nhg_connected));
|
2019-05-14 02:10:34 +02:00
|
|
|
new->nhe = nhe;
|
|
|
|
|
|
|
|
return new;
|
|
|
|
}
|
|
|
|
|
2019-07-24 18:27:40 +02:00
|
|
|
void nhg_connected_tree_free(struct nhg_connected_tree_head *head)
|
2019-05-14 02:10:34 +02:00
|
|
|
{
|
2019-05-14 03:13:02 +02:00
|
|
|
struct nhg_connected *rb_node_dep = NULL;
|
2019-05-14 02:10:34 +02:00
|
|
|
|
2019-07-24 18:27:40 +02:00
|
|
|
if (!nhg_connected_tree_is_empty(head)) {
|
2019-09-03 22:12:06 +02:00
|
|
|
frr_each_safe(nhg_connected_tree, head, rb_node_dep) {
|
2019-07-24 18:27:40 +02:00
|
|
|
nhg_connected_tree_del(head, rb_node_dep);
|
2019-05-14 18:53:19 +02:00
|
|
|
nhg_connected_free(rb_node_dep);
|
|
|
|
}
|
2019-05-14 02:10:34 +02:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2019-07-24 18:27:40 +02:00
|
|
|
bool nhg_connected_tree_is_empty(const struct nhg_connected_tree_head *head)
|
2019-05-14 02:10:34 +02:00
|
|
|
{
|
2019-09-03 22:12:06 +02:00
|
|
|
return nhg_connected_tree_count(head) ? false : true;
|
2019-05-14 02:10:34 +02:00
|
|
|
}
|
|
|
|
|
2019-05-15 00:27:40 +02:00
|
|
|
struct nhg_connected *
|
2019-07-24 18:27:40 +02:00
|
|
|
nhg_connected_tree_root(struct nhg_connected_tree_head *head)
|
2019-05-15 00:27:40 +02:00
|
|
|
{
|
2019-07-24 18:27:40 +02:00
|
|
|
return nhg_connected_tree_first(head);
|
2019-05-15 00:27:40 +02:00
|
|
|
}
|
|
|
|
|
2020-01-06 18:58:41 +01:00
|
|
|
struct nhg_hash_entry *
|
|
|
|
nhg_connected_tree_del_nhe(struct nhg_connected_tree_head *head,
|
|
|
|
struct nhg_hash_entry *depend)
|
2019-05-14 02:10:34 +02:00
|
|
|
{
|
2019-05-14 03:13:02 +02:00
|
|
|
struct nhg_connected lookup = {};
|
2019-05-14 23:31:17 +02:00
|
|
|
struct nhg_connected *remove = NULL;
|
2020-01-06 18:58:41 +01:00
|
|
|
struct nhg_hash_entry *removed_nhe;
|
2019-05-14 02:10:34 +02:00
|
|
|
|
|
|
|
lookup.nhe = depend;
|
2019-03-21 15:43:16 +01:00
|
|
|
|
2019-05-14 23:31:17 +02:00
|
|
|
/* Lookup to find the element, then remove it */
|
2019-07-24 18:27:40 +02:00
|
|
|
remove = nhg_connected_tree_find(head, &lookup);
|
2019-05-14 23:31:17 +02:00
|
|
|
if (remove)
|
2020-01-06 18:58:41 +01:00
|
|
|
/* Re-returning here just in case this API changes..
|
|
|
|
* the _del list api's are a bit undefined at the moment.
|
|
|
|
*
|
|
|
|
* So hopefully returning here will make it fail if the api
|
|
|
|
* changes to something different than currently expected.
|
|
|
|
*/
|
|
|
|
remove = nhg_connected_tree_del(head, remove);
|
|
|
|
|
|
|
|
/* If the entry was sucessfully removed, free the 'connected` struct */
|
|
|
|
if (remove) {
|
|
|
|
removed_nhe = remove->nhe;
|
2019-05-14 23:31:17 +02:00
|
|
|
nhg_connected_free(remove);
|
2020-01-06 18:58:41 +01:00
|
|
|
return removed_nhe;
|
|
|
|
}
|
|
|
|
|
|
|
|
return NULL;
|
2019-03-21 15:43:16 +01:00
|
|
|
}
|
|
|
|
|
2020-01-06 18:58:41 +01:00
|
|
|
/* Assuming UNIQUE RB tree. If this changes, assumptions here about
|
|
|
|
* insertion need to change.
|
|
|
|
*/
|
|
|
|
struct nhg_hash_entry *
|
|
|
|
nhg_connected_tree_add_nhe(struct nhg_connected_tree_head *head,
|
|
|
|
struct nhg_hash_entry *depend)
|
2019-03-21 15:43:16 +01:00
|
|
|
{
|
2019-05-14 03:13:02 +02:00
|
|
|
struct nhg_connected *new = NULL;
|
2019-05-14 02:10:34 +02:00
|
|
|
|
2019-05-14 03:13:02 +02:00
|
|
|
new = nhg_connected_new(depend);
|
2019-05-14 02:10:34 +02:00
|
|
|
|
2020-01-06 18:58:41 +01:00
|
|
|
/* On success, NULL will be returned from the
|
|
|
|
* RB code.
|
|
|
|
*/
|
|
|
|
if (new && (nhg_connected_tree_add(head, new) == NULL))
|
|
|
|
return NULL;
|
|
|
|
|
|
|
|
/* If it wasn't successful, it must be a duplicate. We enforce the
|
|
|
|
* unique property for the `nhg_connected` tree.
|
|
|
|
*/
|
|
|
|
nhg_connected_free(new);
|
|
|
|
|
|
|
|
return depend;
|
2019-03-21 15:43:16 +01:00
|
|
|
}
|
|
|
|
|
2019-07-24 18:27:40 +02:00
|
|
|
static void
|
|
|
|
nhg_connected_tree_decrement_ref(struct nhg_connected_tree_head *head)
|
2019-07-17 19:15:51 +02:00
|
|
|
{
|
|
|
|
struct nhg_connected *rb_node_dep = NULL;
|
|
|
|
|
2019-09-03 22:12:06 +02:00
|
|
|
frr_each_safe(nhg_connected_tree, head, rb_node_dep) {
|
2019-07-17 19:15:51 +02:00
|
|
|
zebra_nhg_decrement_ref(rb_node_dep->nhe);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2019-07-24 18:27:40 +02:00
|
|
|
static void
|
|
|
|
nhg_connected_tree_increment_ref(struct nhg_connected_tree_head *head)
|
2019-07-17 19:15:51 +02:00
|
|
|
{
|
|
|
|
struct nhg_connected *rb_node_dep = NULL;
|
|
|
|
|
2019-09-03 22:12:06 +02:00
|
|
|
frr_each(nhg_connected_tree, head, rb_node_dep) {
|
2019-07-17 19:15:51 +02:00
|
|
|
zebra_nhg_increment_ref(rb_node_dep->nhe);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2019-05-15 00:27:40 +02:00
|
|
|
struct nhg_hash_entry *zebra_nhg_resolve(struct nhg_hash_entry *nhe)
|
|
|
|
{
|
|
|
|
if (CHECK_FLAG(nhe->flags, NEXTHOP_GROUP_RECURSIVE)
|
|
|
|
&& !zebra_nhg_depends_is_empty(nhe)) {
|
2019-07-24 18:27:40 +02:00
|
|
|
nhe = nhg_connected_tree_root(&nhe->nhg_depends)->nhe;
|
2019-05-15 00:27:40 +02:00
|
|
|
return zebra_nhg_resolve(nhe);
|
|
|
|
}
|
|
|
|
|
|
|
|
return nhe;
|
|
|
|
}
|
|
|
|
|
2019-05-14 18:53:19 +02:00
|
|
|
unsigned int zebra_nhg_depends_count(const struct nhg_hash_entry *nhe)
|
2019-05-14 03:13:02 +02:00
|
|
|
{
|
2019-07-24 18:27:40 +02:00
|
|
|
return nhg_connected_tree_count(&nhe->nhg_depends);
|
2019-05-14 03:13:02 +02:00
|
|
|
}
|
|
|
|
|
2019-05-14 18:53:19 +02:00
|
|
|
bool zebra_nhg_depends_is_empty(const struct nhg_hash_entry *nhe)
|
2019-05-14 03:13:02 +02:00
|
|
|
{
|
2019-07-24 18:27:40 +02:00
|
|
|
return nhg_connected_tree_is_empty(&nhe->nhg_depends);
|
2019-05-14 03:13:02 +02:00
|
|
|
}
|
|
|
|
|
2019-10-24 00:28:10 +02:00
|
|
|
static void zebra_nhg_depends_del(struct nhg_hash_entry *from,
|
|
|
|
struct nhg_hash_entry *depend)
|
2019-03-21 15:43:16 +01:00
|
|
|
{
|
2019-07-24 18:27:40 +02:00
|
|
|
nhg_connected_tree_del_nhe(&from->nhg_depends, depend);
|
2019-03-21 15:43:16 +01:00
|
|
|
}
|
|
|
|
|
2019-10-24 00:28:10 +02:00
|
|
|
static void zebra_nhg_depends_init(struct nhg_hash_entry *nhe)
|
2019-03-29 15:55:10 +01:00
|
|
|
{
|
2019-07-24 18:27:40 +02:00
|
|
|
nhg_connected_tree_init(&nhe->nhg_depends);
|
2019-03-29 15:55:10 +01:00
|
|
|
}
|
|
|
|
|
2019-05-14 18:53:19 +02:00
|
|
|
unsigned int zebra_nhg_dependents_count(const struct nhg_hash_entry *nhe)
|
|
|
|
{
|
2019-07-24 18:27:40 +02:00
|
|
|
return nhg_connected_tree_count(&nhe->nhg_dependents);
|
2019-05-14 18:53:19 +02:00
|
|
|
}
|
|
|
|
|
2019-10-24 00:28:10 +02:00
|
|
|
|
2019-05-14 18:53:19 +02:00
|
|
|
bool zebra_nhg_dependents_is_empty(const struct nhg_hash_entry *nhe)
|
|
|
|
{
|
2019-07-24 18:27:40 +02:00
|
|
|
return nhg_connected_tree_is_empty(&nhe->nhg_dependents);
|
2019-05-14 18:53:19 +02:00
|
|
|
}
|
|
|
|
|
2019-10-24 00:28:10 +02:00
|
|
|
static void zebra_nhg_dependents_del(struct nhg_hash_entry *from,
|
|
|
|
struct nhg_hash_entry *dependent)
|
2019-05-14 18:53:19 +02:00
|
|
|
{
|
2019-07-24 18:27:40 +02:00
|
|
|
nhg_connected_tree_del_nhe(&from->nhg_dependents, dependent);
|
2019-05-14 18:53:19 +02:00
|
|
|
}
|
|
|
|
|
2019-10-24 00:28:10 +02:00
|
|
|
static void zebra_nhg_dependents_add(struct nhg_hash_entry *to,
|
|
|
|
struct nhg_hash_entry *dependent)
|
2019-05-14 18:53:19 +02:00
|
|
|
{
|
2019-07-24 18:27:40 +02:00
|
|
|
nhg_connected_tree_add_nhe(&to->nhg_dependents, dependent);
|
2019-05-14 18:53:19 +02:00
|
|
|
}
|
|
|
|
|
2019-10-24 00:28:10 +02:00
|
|
|
static void zebra_nhg_dependents_init(struct nhg_hash_entry *nhe)
|
2019-05-14 18:53:19 +02:00
|
|
|
{
|
2019-07-24 18:27:40 +02:00
|
|
|
nhg_connected_tree_init(&nhe->nhg_dependents);
|
2019-05-14 18:53:19 +02:00
|
|
|
}
|
|
|
|
|
2019-05-14 23:40:27 +02:00
|
|
|
/* Release this nhe from anything depending on it */
|
|
|
|
static void zebra_nhg_dependents_release(struct nhg_hash_entry *nhe)
|
|
|
|
{
|
2019-10-23 22:49:07 +02:00
|
|
|
struct nhg_connected *rb_node_dep = NULL;
|
2019-05-14 23:40:27 +02:00
|
|
|
|
2019-10-23 22:49:07 +02:00
|
|
|
frr_each_safe(nhg_connected_tree, &nhe->nhg_dependents, rb_node_dep) {
|
|
|
|
zebra_nhg_depends_del(rb_node_dep->nhe, nhe);
|
|
|
|
/* recheck validity of the dependent */
|
|
|
|
zebra_nhg_check_valid(rb_node_dep->nhe);
|
2019-05-14 23:40:27 +02:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2019-10-24 00:28:10 +02:00
|
|
|
/* Release this nhe from anything that it depends on */
|
|
|
|
static void zebra_nhg_depends_release(struct nhg_hash_entry *nhe)
|
|
|
|
{
|
|
|
|
if (!zebra_nhg_depends_is_empty(nhe)) {
|
|
|
|
struct nhg_connected *rb_node_dep = NULL;
|
|
|
|
|
|
|
|
frr_each_safe(nhg_connected_tree, &nhe->nhg_depends,
|
|
|
|
rb_node_dep) {
|
|
|
|
zebra_nhg_dependents_del(rb_node_dep->nhe, nhe);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
|
2019-02-26 00:18:07 +01:00
|
|
|
struct nhg_hash_entry *zebra_nhg_lookup_id(uint32_t id)
|
|
|
|
{
|
2019-05-14 02:10:34 +02:00
|
|
|
struct nhg_hash_entry lookup = {};
|
2019-02-26 00:18:07 +01:00
|
|
|
|
|
|
|
lookup.id = id;
|
|
|
|
return hash_lookup(zrouter.nhgs_id, &lookup);
|
|
|
|
}
|
|
|
|
|
2019-10-24 00:28:10 +02:00
|
|
|
static int zebra_nhg_insert_id(struct nhg_hash_entry *nhe)
|
2019-02-26 00:18:07 +01:00
|
|
|
{
|
|
|
|
if (hash_lookup(zrouter.nhgs_id, nhe)) {
|
|
|
|
flog_err(
|
|
|
|
EC_ZEBRA_NHG_TABLE_INSERT_FAILED,
|
2022-06-14 21:02:27 +02:00
|
|
|
"Failed inserting NHG %pNG into the ID hash table, entry already exists",
|
|
|
|
nhe);
|
2019-02-26 00:18:07 +01:00
|
|
|
return -1;
|
|
|
|
}
|
|
|
|
|
*: remove the checking returned value for hash_get()
Firstly, *keep no change* for `hash_get()` with NULL
`alloc_func`.
Only focus on cases with non-NULL `alloc_func` of
`hash_get()`.
Since `hash_get()` with non-NULL `alloc_func` parameter
shall not fail, just ignore the returned value of it.
The returned value must not be NULL.
So in this case, remove the unnecessary checking NULL
or not for the returned value and add `void` in front
of it.
Importantly, also *keep no change* for the two cases with
non-NULL `alloc_func` -
1) Use `assert(<returned_data> == <searching_data>)` to
ensure it is a created node, not a found node.
Refer to `isis_vertex_queue_insert()` of isisd, there
are many examples of this case in isid.
2) Use `<returned_data> != <searching_data>` to judge it
is a found node, then free <searching_data>.
Refer to `aspath_intern()` of bgpd, there are many
examples of this case in bgpd.
Here, <returned_data> is the returned value from `hash_get()`,
and <searching_data> is the data, which is to be put into
hash table.
Signed-off-by: anlan_cs <vic.lan@pica8.com>
2022-04-21 08:37:12 +02:00
|
|
|
(void)hash_get(zrouter.nhgs_id, nhe, hash_alloc_intern);
|
2019-02-26 00:18:07 +01:00
|
|
|
|
|
|
|
return 0;
|
|
|
|
}
|
2019-05-13 21:46:05 +02:00
|
|
|
|
2019-10-24 00:28:10 +02:00
|
|
|
static void zebra_nhg_set_if(struct nhg_hash_entry *nhe, struct interface *ifp)
|
|
|
|
{
|
2024-02-08 17:56:40 +01:00
|
|
|
struct zebra_if *zif = (struct zebra_if *)ifp->info;
|
|
|
|
|
2019-10-24 00:28:10 +02:00
|
|
|
nhe->ifp = ifp;
|
2024-02-08 17:56:40 +01:00
|
|
|
nhg_connected_tree_add_nhe(&zif->nhg_dependents, nhe);
|
2019-10-24 00:28:10 +02:00
|
|
|
}
|
|
|
|
|
2019-08-01 20:07:04 +02:00
|
|
|
static void
|
|
|
|
zebra_nhg_connect_depends(struct nhg_hash_entry *nhe,
|
2020-03-10 15:50:40 +01:00
|
|
|
struct nhg_connected_tree_head *nhg_depends)
|
2019-01-24 16:49:28 +01:00
|
|
|
{
|
2019-05-14 03:13:02 +02:00
|
|
|
struct nhg_connected *rb_node_dep = NULL;
|
|
|
|
|
2019-08-01 20:07:04 +02:00
|
|
|
/* This has been allocated higher above in the stack. Could probably
|
|
|
|
* re-allocate and free the old stuff but just using the same memory
|
|
|
|
* for now. Otherwise, their might be a time trade-off for repeated
|
|
|
|
* alloc/frees as startup.
|
|
|
|
*/
|
2020-03-10 15:50:40 +01:00
|
|
|
nhe->nhg_depends = *nhg_depends;
|
2019-01-24 16:49:28 +01:00
|
|
|
|
2019-05-14 03:13:02 +02:00
|
|
|
/* Attach backpointer to anything that it depends on */
|
2019-05-14 18:53:19 +02:00
|
|
|
zebra_nhg_dependents_init(nhe);
|
2019-05-14 03:13:02 +02:00
|
|
|
if (!zebra_nhg_depends_is_empty(nhe)) {
|
2019-09-03 22:12:06 +02:00
|
|
|
frr_each(nhg_connected_tree, &nhe->nhg_depends, rb_node_dep) {
|
2020-03-10 15:50:40 +01:00
|
|
|
if (IS_ZEBRA_DEBUG_NHG_DETAIL)
|
2022-06-14 21:02:27 +02:00
|
|
|
zlog_debug("%s: nhe %p (%pNG), dep %p (%pNG)",
|
|
|
|
__func__, nhe, nhe, rb_node_dep->nhe,
|
|
|
|
rb_node_dep->nhe);
|
2020-03-10 15:50:40 +01:00
|
|
|
|
2019-05-14 03:13:02 +02:00
|
|
|
zebra_nhg_dependents_add(rb_node_dep->nhe, nhe);
|
|
|
|
}
|
|
|
|
}
|
2020-03-10 15:50:40 +01:00
|
|
|
}
|
2019-01-24 16:49:28 +01:00
|
|
|
|
2020-03-10 15:50:40 +01:00
|
|
|
/* Init an nhe, for use in a hash lookup for example */
|
|
|
|
void zebra_nhe_init(struct nhg_hash_entry *nhe, afi_t afi,
|
|
|
|
const struct nexthop *nh)
|
|
|
|
{
|
|
|
|
memset(nhe, 0, sizeof(struct nhg_hash_entry));
|
|
|
|
nhe->vrf_id = VRF_DEFAULT;
|
|
|
|
nhe->type = ZEBRA_ROUTE_NHG;
|
|
|
|
nhe->afi = AFI_UNSPEC;
|
2019-04-11 18:11:49 +02:00
|
|
|
|
2020-03-10 15:50:40 +01:00
|
|
|
/* There are some special rules that apply to groups representing
|
|
|
|
* a single nexthop.
|
|
|
|
*/
|
|
|
|
if (nh && (nh->next == NULL)) {
|
|
|
|
switch (nh->type) {
|
|
|
|
/*
|
|
|
|
* This switch case handles setting the afi different
|
2024-08-29 21:06:31 +02:00
|
|
|
* for ipv4/v6 routes. Ifindex nexthop
|
2020-03-10 15:50:40 +01:00
|
|
|
* objects cannot be ambiguous, they must be Address
|
2024-08-29 21:06:31 +02:00
|
|
|
* Family specific as that the kernel relies on these
|
|
|
|
* for some reason. blackholes can be v6 because the
|
|
|
|
* v4 kernel infrastructure allows the usage of v6
|
|
|
|
* blackholes in this case. if we get here, we will
|
|
|
|
* either use the AF of the route, or the one we got
|
|
|
|
* passed from here from the kernel.
|
2020-03-10 15:50:40 +01:00
|
|
|
*/
|
2024-08-29 21:06:31 +02:00
|
|
|
case NEXTHOP_TYPE_IFINDEX:
|
2020-03-10 15:50:40 +01:00
|
|
|
nhe->afi = afi;
|
|
|
|
break;
|
2024-08-29 21:06:31 +02:00
|
|
|
case NEXTHOP_TYPE_BLACKHOLE:
|
|
|
|
nhe->afi = AFI_IP6;
|
|
|
|
break;
|
2021-01-29 21:43:52 +01:00
|
|
|
case NEXTHOP_TYPE_IPV4_IFINDEX:
|
|
|
|
case NEXTHOP_TYPE_IPV4:
|
2020-03-10 15:50:40 +01:00
|
|
|
nhe->afi = AFI_IP;
|
|
|
|
break;
|
2021-01-29 21:43:52 +01:00
|
|
|
case NEXTHOP_TYPE_IPV6_IFINDEX:
|
|
|
|
case NEXTHOP_TYPE_IPV6:
|
2020-03-10 15:50:40 +01:00
|
|
|
nhe->afi = AFI_IP6;
|
|
|
|
break;
|
|
|
|
}
|
2019-04-11 18:11:49 +02:00
|
|
|
}
|
2019-08-01 20:07:04 +02:00
|
|
|
}
|
|
|
|
|
2019-11-22 21:30:53 +01:00
|
|
|
struct nhg_hash_entry *zebra_nhg_alloc(void)
|
2019-08-01 20:07:04 +02:00
|
|
|
{
|
|
|
|
struct nhg_hash_entry *nhe;
|
|
|
|
|
|
|
|
nhe = XCALLOC(MTYPE_NHG, sizeof(struct nhg_hash_entry));
|
|
|
|
|
2019-11-22 21:30:53 +01:00
|
|
|
return nhe;
|
|
|
|
}
|
|
|
|
|
2020-05-05 23:03:33 +02:00
|
|
|
/*
|
|
|
|
* Allocate new nhe and make shallow copy of 'orig'; no
|
|
|
|
* recursive info is copied.
|
|
|
|
*/
|
|
|
|
struct nhg_hash_entry *zebra_nhe_copy(const struct nhg_hash_entry *orig,
|
|
|
|
uint32_t id)
|
2019-11-22 21:30:53 +01:00
|
|
|
{
|
|
|
|
struct nhg_hash_entry *nhe;
|
|
|
|
|
|
|
|
nhe = zebra_nhg_alloc();
|
|
|
|
|
2019-08-01 20:07:04 +02:00
|
|
|
nhe->id = id;
|
|
|
|
|
2019-12-24 20:22:03 +01:00
|
|
|
nexthop_group_copy(&(nhe->nhg), &(orig->nhg));
|
2019-08-01 20:07:04 +02:00
|
|
|
|
2019-12-24 20:22:03 +01:00
|
|
|
nhe->vrf_id = orig->vrf_id;
|
|
|
|
nhe->afi = orig->afi;
|
|
|
|
nhe->type = orig->type ? orig->type : ZEBRA_ROUTE_NHG;
|
2019-08-01 20:07:04 +02:00
|
|
|
nhe->refcnt = 0;
|
|
|
|
nhe->dplane_ref = zebra_router_get_next_sequence();
|
|
|
|
|
2019-12-24 20:22:03 +01:00
|
|
|
/* Copy backup info also, if present */
|
|
|
|
if (orig->backup_info)
|
|
|
|
nhe->backup_info = nhg_backup_copy(orig->backup_info);
|
|
|
|
|
2024-08-29 17:29:55 +02:00
|
|
|
/*
|
|
|
|
* This is a special case, Zebra needs to track
|
|
|
|
* whether or not this flag was set on a initial
|
|
|
|
* unresolved NHG
|
|
|
|
*/
|
|
|
|
if (CHECK_FLAG(orig->flags, NEXTHOP_GROUP_INITIAL_DELAY_INSTALL))
|
|
|
|
SET_FLAG(nhe->flags, NEXTHOP_GROUP_INITIAL_DELAY_INSTALL);
|
|
|
|
|
2019-08-01 20:07:04 +02:00
|
|
|
return nhe;
|
|
|
|
}
|
|
|
|
|
|
|
|
/* Allocation via hash handler */
|
|
|
|
static void *zebra_nhg_hash_alloc(void *arg)
|
|
|
|
{
|
|
|
|
struct nhg_hash_entry *nhe = NULL;
|
|
|
|
struct nhg_hash_entry *copy = arg;
|
2019-04-11 18:11:49 +02:00
|
|
|
|
2020-05-05 23:03:33 +02:00
|
|
|
nhe = zebra_nhe_copy(copy, copy->id);
|
2019-08-07 17:33:01 +02:00
|
|
|
|
|
|
|
/* Mark duplicate nexthops in a group at creation time. */
|
2020-02-25 14:29:46 +01:00
|
|
|
nexthop_group_mark_duplicates(&(nhe->nhg));
|
2019-08-07 17:33:01 +02:00
|
|
|
|
2021-06-21 18:53:25 +02:00
|
|
|
/*
|
|
|
|
* Add the ifp now if it's not a group or recursive and has ifindex.
|
|
|
|
*
|
|
|
|
* A proto-owned ID is always a group.
|
|
|
|
*/
|
|
|
|
if (!PROTO_OWNED(nhe) && nhe->nhg.nexthop && !nhe->nhg.nexthop->next
|
|
|
|
&& !nhe->nhg.nexthop->resolved && nhe->nhg.nexthop->ifindex) {
|
2020-03-10 15:50:40 +01:00
|
|
|
struct interface *ifp = NULL;
|
|
|
|
|
|
|
|
ifp = if_lookup_by_index(nhe->nhg.nexthop->ifindex,
|
|
|
|
nhe->nhg.nexthop->vrf_id);
|
|
|
|
if (ifp)
|
|
|
|
zebra_nhg_set_if(nhe, ifp);
|
2020-12-01 18:04:30 +01:00
|
|
|
else {
|
|
|
|
if (IS_ZEBRA_DEBUG_NHG)
|
|
|
|
zlog_debug(
|
2022-06-14 21:02:27 +02:00
|
|
|
"Failed to lookup an interface with ifindex=%d in vrf=%u for NHE %pNG",
|
2020-12-01 18:04:30 +01:00
|
|
|
nhe->nhg.nexthop->ifindex,
|
2022-06-14 21:02:27 +02:00
|
|
|
nhe->nhg.nexthop->vrf_id, nhe);
|
2020-12-01 18:04:30 +01:00
|
|
|
}
|
2020-03-10 15:50:40 +01:00
|
|
|
}
|
|
|
|
|
2019-01-24 16:49:28 +01:00
|
|
|
return nhe;
|
|
|
|
}
|
|
|
|
|
|
|
|
uint32_t zebra_nhg_hash_key(const void *arg)
|
|
|
|
{
|
|
|
|
const struct nhg_hash_entry *nhe = arg;
|
2020-05-05 21:57:35 +02:00
|
|
|
uint32_t key = 0x5a351234;
|
|
|
|
uint32_t primary = 0;
|
|
|
|
uint32_t backup = 0;
|
|
|
|
|
|
|
|
primary = nexthop_group_hash(&(nhe->nhg));
|
|
|
|
if (nhe->backup_info)
|
|
|
|
backup = nexthop_group_hash(&(nhe->backup_info->nhe->nhg));
|
2019-02-26 00:18:07 +01:00
|
|
|
|
2020-05-05 21:57:35 +02:00
|
|
|
key = jhash_3words(primary, backup, nhe->type, key);
|
|
|
|
|
|
|
|
key = jhash_2words(nhe->vrf_id, nhe->afi, key);
|
2019-02-26 00:18:07 +01:00
|
|
|
|
|
|
|
return key;
|
2019-01-24 16:49:28 +01:00
|
|
|
}
|
|
|
|
|
2019-02-15 19:18:48 +01:00
|
|
|
uint32_t zebra_nhg_id_key(const void *arg)
|
|
|
|
{
|
|
|
|
const struct nhg_hash_entry *nhe = arg;
|
|
|
|
|
|
|
|
return nhe->id;
|
|
|
|
}
|
|
|
|
|
2019-12-24 20:22:03 +01:00
|
|
|
/* Helper with common nhg/nhe nexthop comparison logic */
|
|
|
|
static bool nhg_compare_nexthops(const struct nexthop *nh1,
|
|
|
|
const struct nexthop *nh2)
|
|
|
|
{
|
2020-04-13 16:25:48 +02:00
|
|
|
assert(nh1 != NULL && nh2 != NULL);
|
2019-12-24 20:22:03 +01:00
|
|
|
|
|
|
|
/*
|
|
|
|
* We have to check the active flag of each individual one,
|
|
|
|
* not just the overall active_num. This solves the special case
|
|
|
|
* issue of a route with a nexthop group with one nexthop
|
|
|
|
* resolving to itself and thus marking it inactive. If we
|
|
|
|
* have two different routes each wanting to mark a different
|
|
|
|
* nexthop inactive, they need to hash to two different groups.
|
|
|
|
*
|
|
|
|
* If we just hashed on num_active, they would hash the same
|
|
|
|
* which is incorrect.
|
|
|
|
*
|
|
|
|
* ex)
|
|
|
|
* 1.1.1.0/24
|
|
|
|
* -> 1.1.1.1 dummy1 (inactive)
|
|
|
|
* -> 1.1.2.1 dummy2
|
|
|
|
*
|
|
|
|
* 1.1.2.0/24
|
|
|
|
* -> 1.1.1.1 dummy1
|
|
|
|
* -> 1.1.2.1 dummy2 (inactive)
|
|
|
|
*
|
|
|
|
* Without checking each individual one, they would hash to
|
|
|
|
* the same group and both have 1.1.1.1 dummy1 marked inactive.
|
|
|
|
*
|
|
|
|
*/
|
|
|
|
if (CHECK_FLAG(nh1->flags, NEXTHOP_FLAG_ACTIVE)
|
|
|
|
!= CHECK_FLAG(nh2->flags, NEXTHOP_FLAG_ACTIVE))
|
|
|
|
return false;
|
|
|
|
|
|
|
|
if (!nexthop_same(nh1, nh2))
|
|
|
|
return false;
|
|
|
|
|
|
|
|
return true;
|
|
|
|
}
|
|
|
|
|
2019-01-24 16:49:28 +01:00
|
|
|
bool zebra_nhg_hash_equal(const void *arg1, const void *arg2)
|
|
|
|
{
|
|
|
|
const struct nhg_hash_entry *nhe1 = arg1;
|
|
|
|
const struct nhg_hash_entry *nhe2 = arg2;
|
2019-11-12 00:32:13 +01:00
|
|
|
struct nexthop *nexthop1;
|
|
|
|
struct nexthop *nexthop2;
|
2019-01-24 16:49:28 +01:00
|
|
|
|
2024-08-11 01:43:08 +02:00
|
|
|
/* If both NHG's have id's then we can just know that
|
|
|
|
* they are either identical or not. This comparison
|
|
|
|
* is only ever used for hash equality. NHE's id
|
|
|
|
* is sufficient to distinguish them. This is especially
|
|
|
|
* true if NHG's are owned by an upper level protocol.
|
|
|
|
*/
|
|
|
|
if (nhe1->id && nhe2->id) {
|
|
|
|
if (nhe1->id == nhe2->id)
|
|
|
|
return true;
|
|
|
|
|
|
|
|
return false;
|
|
|
|
}
|
2019-05-15 00:27:40 +02:00
|
|
|
|
2020-05-05 21:57:35 +02:00
|
|
|
if (nhe1->type != nhe2->type)
|
|
|
|
return false;
|
|
|
|
|
2019-01-24 16:49:28 +01:00
|
|
|
if (nhe1->vrf_id != nhe2->vrf_id)
|
|
|
|
return false;
|
|
|
|
|
2019-03-19 22:06:01 +01:00
|
|
|
if (nhe1->afi != nhe2->afi)
|
|
|
|
return false;
|
|
|
|
|
2022-10-22 21:37:27 +02:00
|
|
|
if (nhe1->nhg.nhgr.buckets != nhe2->nhg.nhgr.buckets)
|
|
|
|
return false;
|
|
|
|
|
|
|
|
if (nhe1->nhg.nhgr.idle_timer != nhe2->nhg.nhgr.idle_timer)
|
|
|
|
return false;
|
|
|
|
|
|
|
|
if (nhe1->nhg.nhgr.unbalanced_timer != nhe2->nhg.nhgr.unbalanced_timer)
|
|
|
|
return false;
|
|
|
|
|
2019-12-24 20:22:03 +01:00
|
|
|
/* Nexthops should be in-order, so we simply compare them in-place */
|
2020-02-25 14:29:46 +01:00
|
|
|
for (nexthop1 = nhe1->nhg.nexthop, nexthop2 = nhe2->nhg.nexthop;
|
2020-04-13 16:25:48 +02:00
|
|
|
nexthop1 && nexthop2;
|
2019-11-12 00:32:13 +01:00
|
|
|
nexthop1 = nexthop1->next, nexthop2 = nexthop2->next) {
|
|
|
|
|
2019-12-24 20:22:03 +01:00
|
|
|
if (!nhg_compare_nexthops(nexthop1, nexthop2))
|
2019-11-12 00:32:13 +01:00
|
|
|
return false;
|
2019-12-24 20:22:03 +01:00
|
|
|
}
|
2019-11-12 00:32:13 +01:00
|
|
|
|
2020-04-13 16:25:48 +02:00
|
|
|
/* Check for unequal list lengths */
|
|
|
|
if (nexthop1 || nexthop2)
|
|
|
|
return false;
|
|
|
|
|
2019-12-24 20:22:03 +01:00
|
|
|
/* If there's no backup info, comparison is done. */
|
|
|
|
if ((nhe1->backup_info == NULL) && (nhe2->backup_info == NULL))
|
|
|
|
return true;
|
|
|
|
|
|
|
|
/* Compare backup info also - test the easy things first */
|
|
|
|
if (nhe1->backup_info && (nhe2->backup_info == NULL))
|
|
|
|
return false;
|
|
|
|
if (nhe2->backup_info && (nhe1->backup_info == NULL))
|
|
|
|
return false;
|
|
|
|
|
|
|
|
/* Compare number of backups before actually comparing any */
|
|
|
|
for (nexthop1 = nhe1->backup_info->nhe->nhg.nexthop,
|
|
|
|
nexthop2 = nhe2->backup_info->nhe->nhg.nexthop;
|
|
|
|
nexthop1 && nexthop2;
|
|
|
|
nexthop1 = nexthop1->next, nexthop2 = nexthop2->next) {
|
|
|
|
;
|
|
|
|
}
|
|
|
|
|
|
|
|
/* Did we find the end of one list before the other? */
|
|
|
|
if (nexthop1 || nexthop2)
|
|
|
|
return false;
|
|
|
|
|
|
|
|
/* Have to compare the backup nexthops */
|
|
|
|
for (nexthop1 = nhe1->backup_info->nhe->nhg.nexthop,
|
|
|
|
nexthop2 = nhe2->backup_info->nhe->nhg.nexthop;
|
2020-04-13 16:25:48 +02:00
|
|
|
nexthop1 && nexthop2;
|
2019-12-24 20:22:03 +01:00
|
|
|
nexthop1 = nexthop1->next, nexthop2 = nexthop2->next) {
|
2019-11-12 00:32:13 +01:00
|
|
|
|
2019-12-24 20:22:03 +01:00
|
|
|
if (!nhg_compare_nexthops(nexthop1, nexthop2))
|
2019-11-12 00:32:13 +01:00
|
|
|
return false;
|
|
|
|
}
|
2019-08-06 19:16:07 +02:00
|
|
|
|
2019-01-24 16:49:28 +01:00
|
|
|
return true;
|
|
|
|
}
|
|
|
|
|
2019-02-26 00:18:07 +01:00
|
|
|
bool zebra_nhg_hash_id_equal(const void *arg1, const void *arg2)
|
2019-01-24 16:49:28 +01:00
|
|
|
{
|
2019-02-26 00:18:07 +01:00
|
|
|
const struct nhg_hash_entry *nhe1 = arg1;
|
|
|
|
const struct nhg_hash_entry *nhe2 = arg2;
|
2019-01-24 16:49:28 +01:00
|
|
|
|
2019-02-26 00:18:07 +01:00
|
|
|
return nhe1->id == nhe2->id;
|
|
|
|
}
|
2019-01-24 16:49:28 +01:00
|
|
|
|
2024-10-07 18:40:46 +02:00
|
|
|
static int zebra_nhg_process_grp(struct nexthop_group *nhg, struct nhg_connected_tree_head *depends,
|
|
|
|
struct nh_grp *grp, uint16_t count,
|
2022-10-22 21:37:27 +02:00
|
|
|
struct nhg_resilience *resilience)
|
2019-05-15 00:03:29 +02:00
|
|
|
{
|
2019-07-24 18:27:40 +02:00
|
|
|
nhg_connected_tree_init(depends);
|
2019-05-15 00:03:29 +02:00
|
|
|
|
|
|
|
for (int i = 0; i < count; i++) {
|
|
|
|
struct nhg_hash_entry *depend = NULL;
|
|
|
|
/* We do not care about nexthop_grp.weight at
|
|
|
|
* this time. But we should figure out
|
|
|
|
* how to adapt this to our code in
|
|
|
|
* the future.
|
|
|
|
*/
|
2019-08-01 20:07:04 +02:00
|
|
|
depend = depends_find_id_add(depends, grp[i].id);
|
2019-05-15 00:03:29 +02:00
|
|
|
|
2019-08-01 20:07:04 +02:00
|
|
|
if (!depend) {
|
2019-05-15 00:03:29 +02:00
|
|
|
flog_err(
|
|
|
|
EC_ZEBRA_NHG_SYNC,
|
|
|
|
"Received Nexthop Group from the kernel with a dependent Nexthop ID (%u) which we do not have in our table",
|
|
|
|
grp[i].id);
|
2019-08-13 02:09:59 +02:00
|
|
|
return -1;
|
2019-05-15 00:03:29 +02:00
|
|
|
}
|
2019-08-01 20:07:04 +02:00
|
|
|
|
|
|
|
/*
|
|
|
|
* If this is a nexthop with its own group
|
|
|
|
* dependencies, add them as well. Not sure its
|
|
|
|
* even possible to have a group within a group
|
|
|
|
* in the kernel.
|
|
|
|
*/
|
|
|
|
|
2020-02-25 14:29:46 +01:00
|
|
|
copy_nexthops(&nhg->nexthop, depend->nhg.nexthop, NULL);
|
2019-05-15 00:03:29 +02:00
|
|
|
}
|
2019-08-13 02:09:59 +02:00
|
|
|
|
2022-10-22 21:37:27 +02:00
|
|
|
if (resilience)
|
|
|
|
nhg->nhgr = *resilience;
|
|
|
|
|
2019-08-13 02:09:59 +02:00
|
|
|
return 0;
|
2019-05-15 00:03:29 +02:00
|
|
|
}
|
|
|
|
|
2019-08-26 23:09:31 +02:00
|
|
|
static void handle_recursive_depend(struct nhg_connected_tree_head *nhg_depends,
|
2020-05-05 21:57:35 +02:00
|
|
|
struct nexthop *nh, afi_t afi, int type)
|
2019-08-26 23:09:31 +02:00
|
|
|
{
|
|
|
|
struct nhg_hash_entry *depend = NULL;
|
|
|
|
struct nexthop_group resolved_ng = {};
|
|
|
|
|
zebra: just set nexthop member in handle_recursive_depend()
With recent changes to the lib nexthop_group
APIs (e1f3a8eb193267da195088cc515b598ae5a92a12), we are making
new assumptions that this should be adding a single nexthop
to a group, not a list of nexthops.
This broke the case of a recursive nexthop resolving to a group:
```
D> 2.2.2.1/32 [150/0] via 1.1.1.1 (recursive), 00:00:09
* via 1.1.1.1, dummy1 onlink, 00:00:09
via 1.1.1.2 (recursive), 00:00:09
* via 1.1.1.2, dummy2 onlink, 00:00:09
D> 3.3.3.1/32 [150/0] via 2.2.2.1 (recursive), 00:00:04
* via 1.1.1.1, dummy1 onlink, 00:00:04
K * 10.0.0.0/8 [0/1] via 172.27.227.148, tun0, 00:00:21
```
This group can instead just directly point to the nh that was passed.
Its only being used for a lookup (the memory gets copied and used
elsewhere if the nexthop is not found).
Signed-off-by: Stephen Worley <sworley@cumulusnetworks.com>
2020-01-03 18:35:15 +01:00
|
|
|
resolved_ng.nexthop = nh;
|
2019-08-26 23:09:31 +02:00
|
|
|
|
2020-03-10 15:50:40 +01:00
|
|
|
if (IS_ZEBRA_DEBUG_NHG_DETAIL)
|
|
|
|
zlog_debug("%s: head %p, nh %pNHv",
|
|
|
|
__func__, nhg_depends, nh);
|
|
|
|
|
2020-05-05 21:57:35 +02:00
|
|
|
depend = zebra_nhg_rib_find(0, &resolved_ng, afi, type);
|
2020-01-21 21:03:52 +01:00
|
|
|
|
2020-03-10 15:50:40 +01:00
|
|
|
if (IS_ZEBRA_DEBUG_NHG_DETAIL)
|
|
|
|
zlog_debug("%s: nh %pNHv => %p (%u)",
|
|
|
|
__func__, nh, depend,
|
|
|
|
depend ? depend->id : 0);
|
|
|
|
|
2020-01-21 21:03:52 +01:00
|
|
|
if (depend)
|
|
|
|
depends_add(nhg_depends, depend);
|
2019-08-26 23:09:31 +02:00
|
|
|
}
|
2019-05-15 00:03:29 +02:00
|
|
|
|
2020-03-10 15:50:40 +01:00
|
|
|
/*
|
|
|
|
* Lookup an nhe in the global hash, using data from another nhe. If 'lookup'
|
|
|
|
* has an id value, that's used. Create a new global/shared nhe if not found.
|
|
|
|
*/
|
|
|
|
static bool zebra_nhe_find(struct nhg_hash_entry **nhe, /* return value */
|
|
|
|
struct nhg_hash_entry *lookup,
|
|
|
|
struct nhg_connected_tree_head *nhg_depends,
|
2020-10-12 23:31:36 +02:00
|
|
|
afi_t afi, bool from_dplane)
|
2020-03-10 15:50:40 +01:00
|
|
|
{
|
|
|
|
bool created = false;
|
|
|
|
bool recursive = false;
|
2020-03-13 21:52:53 +01:00
|
|
|
struct nhg_hash_entry *newnhe, *backup_nhe;
|
2020-03-10 15:50:40 +01:00
|
|
|
struct nexthop *nh = NULL;
|
|
|
|
|
|
|
|
|
|
|
|
if (lookup->id)
|
|
|
|
(*nhe) = zebra_nhg_lookup_id(lookup->id);
|
|
|
|
else
|
|
|
|
(*nhe) = hash_lookup(zrouter.nhgs, lookup);
|
|
|
|
|
|
|
|
if (IS_ZEBRA_DEBUG_NHG_DETAIL)
|
2024-01-21 02:25:59 +01:00
|
|
|
zlog_debug("%s: id %u, lookup %p, vrf %d, type %d, depends %p%s => Found %p(%pNG)",
|
|
|
|
__func__, lookup->id, lookup, lookup->vrf_id,
|
|
|
|
lookup->type, nhg_depends,
|
|
|
|
(from_dplane ? " (from dplane)" : ""), *nhe, *nhe);
|
2020-03-10 15:50:40 +01:00
|
|
|
|
|
|
|
/* If we found an existing object, we're done */
|
|
|
|
if (*nhe)
|
|
|
|
goto done;
|
|
|
|
|
|
|
|
/* We're going to create/insert a new nhe:
|
|
|
|
* assign the next global id value if necessary.
|
|
|
|
*/
|
|
|
|
if (lookup->id == 0)
|
2020-05-13 23:42:55 +02:00
|
|
|
lookup->id = nhg_get_next_id();
|
2020-05-05 21:57:35 +02:00
|
|
|
|
2020-10-12 23:37:14 +02:00
|
|
|
if (!from_dplane && lookup->id < ZEBRA_NHG_PROTO_LOWER) {
|
2020-05-05 21:57:35 +02:00
|
|
|
/*
|
|
|
|
* This is a zebra hashed/owned NHG.
|
|
|
|
*
|
|
|
|
* It goes in HASH and ID table.
|
|
|
|
*/
|
|
|
|
newnhe = hash_get(zrouter.nhgs, lookup, zebra_nhg_hash_alloc);
|
|
|
|
zebra_nhg_insert_id(newnhe);
|
|
|
|
} else {
|
|
|
|
/*
|
2020-10-12 23:37:14 +02:00
|
|
|
* This is upperproto owned NHG or one we read in from dataplane
|
|
|
|
* and should not be hashed to.
|
2020-05-05 21:57:35 +02:00
|
|
|
*
|
|
|
|
* It goes in ID table.
|
|
|
|
*/
|
|
|
|
newnhe =
|
|
|
|
hash_get(zrouter.nhgs_id, lookup, zebra_nhg_hash_alloc);
|
|
|
|
}
|
|
|
|
|
2020-03-10 15:50:40 +01:00
|
|
|
created = true;
|
|
|
|
|
|
|
|
/* Mail back the new object */
|
|
|
|
*nhe = newnhe;
|
|
|
|
|
|
|
|
if (IS_ZEBRA_DEBUG_NHG_DETAIL)
|
2022-06-14 21:02:27 +02:00
|
|
|
zlog_debug("%s: => created %p (%pNG)", __func__, newnhe,
|
|
|
|
newnhe);
|
2020-03-10 15:50:40 +01:00
|
|
|
|
|
|
|
/* Only hash/lookup the depends if the first lookup
|
|
|
|
* fails to find something. This should hopefully save a
|
|
|
|
* lot of cycles for larger ecmp sizes.
|
|
|
|
*/
|
|
|
|
if (nhg_depends) {
|
|
|
|
/* If you don't want to hash on each nexthop in the
|
|
|
|
* nexthop group struct you can pass the depends
|
|
|
|
* directly. Kernel-side we do this since it just looks
|
|
|
|
* them up via IDs.
|
|
|
|
*/
|
|
|
|
zebra_nhg_connect_depends(newnhe, nhg_depends);
|
|
|
|
goto done;
|
|
|
|
}
|
|
|
|
|
|
|
|
/* Prepare dependency relationships if this is not a
|
|
|
|
* singleton nexthop. There are two cases: a single
|
|
|
|
* recursive nexthop, where we need a relationship to the
|
|
|
|
* resolving nexthop; or a group of nexthops, where we need
|
|
|
|
* relationships with the corresponding singletons.
|
|
|
|
*/
|
2020-10-23 00:09:44 +02:00
|
|
|
zebra_nhg_depends_init(newnhe);
|
2020-03-10 15:50:40 +01:00
|
|
|
|
|
|
|
nh = newnhe->nhg.nexthop;
|
|
|
|
|
|
|
|
if (CHECK_FLAG(nh->flags, NEXTHOP_FLAG_ACTIVE))
|
|
|
|
SET_FLAG(newnhe->flags, NEXTHOP_GROUP_VALID);
|
|
|
|
|
2020-05-13 20:32:13 +02:00
|
|
|
if (nh->next == NULL && newnhe->id < ZEBRA_NHG_PROTO_LOWER) {
|
2020-03-10 15:50:40 +01:00
|
|
|
if (CHECK_FLAG(nh->flags, NEXTHOP_FLAG_RECURSIVE)) {
|
|
|
|
/* Single recursive nexthop */
|
|
|
|
handle_recursive_depend(&newnhe->nhg_depends,
|
2020-05-05 21:57:35 +02:00
|
|
|
nh->resolved, afi,
|
|
|
|
newnhe->type);
|
2020-03-10 15:50:40 +01:00
|
|
|
recursive = true;
|
|
|
|
}
|
|
|
|
} else {
|
2020-05-10 22:36:49 +02:00
|
|
|
/* Proto-owned are groups by default */
|
2020-03-10 15:50:40 +01:00
|
|
|
/* List of nexthops */
|
|
|
|
for (nh = newnhe->nhg.nexthop; nh; nh = nh->next) {
|
|
|
|
if (IS_ZEBRA_DEBUG_NHG_DETAIL)
|
|
|
|
zlog_debug("%s: depends NH %pNHv %s",
|
|
|
|
__func__, nh,
|
|
|
|
CHECK_FLAG(nh->flags,
|
|
|
|
NEXTHOP_FLAG_RECURSIVE) ?
|
|
|
|
"(R)" : "");
|
|
|
|
|
2020-05-05 21:57:35 +02:00
|
|
|
depends_find_add(&newnhe->nhg_depends, nh, afi,
|
2020-10-12 23:31:36 +02:00
|
|
|
newnhe->type, from_dplane);
|
2020-03-10 15:50:40 +01:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2020-03-13 21:52:53 +01:00
|
|
|
if (recursive)
|
2020-10-23 00:09:44 +02:00
|
|
|
SET_FLAG(newnhe->flags, NEXTHOP_GROUP_RECURSIVE);
|
|
|
|
|
|
|
|
/* Attach dependent backpointers to singletons */
|
|
|
|
zebra_nhg_connect_depends(newnhe, &newnhe->nhg_depends);
|
|
|
|
|
|
|
|
/**
|
|
|
|
* Backup Nexthops
|
|
|
|
*/
|
2020-03-13 21:52:53 +01:00
|
|
|
|
2020-03-10 15:50:40 +01:00
|
|
|
if (zebra_nhg_get_backup_nhg(newnhe) == NULL ||
|
|
|
|
zebra_nhg_get_backup_nhg(newnhe)->nexthop == NULL)
|
2020-03-13 21:52:53 +01:00
|
|
|
goto done;
|
|
|
|
|
|
|
|
/* If there are backup nexthops, add them to the backup
|
|
|
|
* depends tree. The rules here are a little different.
|
|
|
|
*/
|
|
|
|
recursive = false;
|
|
|
|
backup_nhe = newnhe->backup_info->nhe;
|
2020-03-10 15:50:40 +01:00
|
|
|
|
2020-03-13 21:52:53 +01:00
|
|
|
nh = backup_nhe->nhg.nexthop;
|
2020-03-10 15:50:40 +01:00
|
|
|
|
|
|
|
/* Singleton recursive NH */
|
|
|
|
if (nh->next == NULL &&
|
|
|
|
CHECK_FLAG(nh->flags, NEXTHOP_FLAG_RECURSIVE)) {
|
|
|
|
if (IS_ZEBRA_DEBUG_NHG_DETAIL)
|
|
|
|
zlog_debug("%s: backup depend NH %pNHv (R)",
|
|
|
|
__func__, nh);
|
|
|
|
|
|
|
|
/* Single recursive nexthop */
|
2020-05-05 21:57:35 +02:00
|
|
|
handle_recursive_depend(&backup_nhe->nhg_depends, nh->resolved,
|
|
|
|
afi, backup_nhe->type);
|
2020-03-10 15:50:40 +01:00
|
|
|
recursive = true;
|
|
|
|
} else {
|
|
|
|
/* One or more backup NHs */
|
|
|
|
for (; nh; nh = nh->next) {
|
|
|
|
if (IS_ZEBRA_DEBUG_NHG_DETAIL)
|
|
|
|
zlog_debug("%s: backup depend NH %pNHv %s",
|
|
|
|
__func__, nh,
|
|
|
|
CHECK_FLAG(nh->flags,
|
|
|
|
NEXTHOP_FLAG_RECURSIVE) ?
|
|
|
|
"(R)" : "");
|
|
|
|
|
2020-05-05 21:57:35 +02:00
|
|
|
depends_find_add(&backup_nhe->nhg_depends, nh, afi,
|
2020-10-12 23:31:36 +02:00
|
|
|
backup_nhe->type, from_dplane);
|
2020-03-10 15:50:40 +01:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
if (recursive)
|
2020-03-13 21:52:53 +01:00
|
|
|
SET_FLAG(backup_nhe->flags, NEXTHOP_GROUP_RECURSIVE);
|
2020-03-10 15:50:40 +01:00
|
|
|
|
|
|
|
done:
|
2021-04-22 23:19:03 +02:00
|
|
|
/* Reset time since last update */
|
|
|
|
(*nhe)->uptime = monotime(NULL);
|
2020-03-10 15:50:40 +01:00
|
|
|
|
|
|
|
return created;
|
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Lookup or create an nhe, based on an nhg or an nhe id.
|
|
|
|
*/
|
2019-05-15 01:33:04 +02:00
|
|
|
static bool zebra_nhg_find(struct nhg_hash_entry **nhe, uint32_t id,
|
|
|
|
struct nexthop_group *nhg,
|
2019-07-24 18:27:40 +02:00
|
|
|
struct nhg_connected_tree_head *nhg_depends,
|
2020-10-12 23:31:36 +02:00
|
|
|
vrf_id_t vrf_id, afi_t afi, int type,
|
|
|
|
bool from_dplane)
|
2019-02-15 19:18:48 +01:00
|
|
|
{
|
2019-05-14 02:10:34 +02:00
|
|
|
struct nhg_hash_entry lookup = {};
|
2019-05-15 01:33:04 +02:00
|
|
|
bool created = false;
|
|
|
|
|
2019-12-24 20:22:03 +01:00
|
|
|
if (IS_ZEBRA_DEBUG_NHG_DETAIL)
|
|
|
|
zlog_debug("%s: id %u, nhg %p, vrf %d, type %d, depends %p",
|
|
|
|
__func__, id, nhg, vrf_id, type,
|
|
|
|
nhg_depends);
|
|
|
|
|
2020-03-10 15:50:40 +01:00
|
|
|
/* Use a temporary nhe and call into the superset/common code */
|
|
|
|
lookup.id = id;
|
2019-08-01 20:24:35 +02:00
|
|
|
lookup.type = type ? type : ZEBRA_ROUTE_NHG;
|
2020-02-25 14:29:46 +01:00
|
|
|
lookup.nhg = *nhg;
|
2019-05-15 00:03:29 +02:00
|
|
|
|
2020-01-13 22:11:46 +01:00
|
|
|
lookup.vrf_id = vrf_id;
|
2020-10-12 23:37:14 +02:00
|
|
|
if (nhg_depends || lookup.nhg.nexthop->next) {
|
2019-08-26 23:09:31 +02:00
|
|
|
/* Groups can have all vrfs and AF's in them */
|
|
|
|
lookup.afi = AFI_UNSPEC;
|
|
|
|
} else {
|
2020-02-25 14:29:46 +01:00
|
|
|
switch (lookup.nhg.nexthop->type) {
|
2019-08-20 21:08:01 +02:00
|
|
|
case (NEXTHOP_TYPE_IFINDEX):
|
|
|
|
case (NEXTHOP_TYPE_BLACKHOLE):
|
|
|
|
/*
|
|
|
|
* This switch case handles setting the afi different
|
|
|
|
* for ipv4/v6 routes. Ifindex/blackhole nexthop
|
|
|
|
* objects cannot be ambiguous, they must be Address
|
|
|
|
* Family specific. If we get here, we will either use
|
|
|
|
* the AF of the route, or the one we got passed from
|
|
|
|
* here from the kernel.
|
|
|
|
*/
|
|
|
|
lookup.afi = afi;
|
|
|
|
break;
|
|
|
|
case (NEXTHOP_TYPE_IPV4_IFINDEX):
|
|
|
|
case (NEXTHOP_TYPE_IPV4):
|
|
|
|
lookup.afi = AFI_IP;
|
|
|
|
break;
|
|
|
|
case (NEXTHOP_TYPE_IPV6_IFINDEX):
|
|
|
|
case (NEXTHOP_TYPE_IPV6):
|
|
|
|
lookup.afi = AFI_IP6;
|
|
|
|
break;
|
|
|
|
}
|
2019-08-26 23:09:31 +02:00
|
|
|
}
|
2019-02-15 19:18:48 +01:00
|
|
|
|
2020-10-12 23:31:36 +02:00
|
|
|
created = zebra_nhe_find(nhe, &lookup, nhg_depends, afi, from_dplane);
|
2019-02-26 00:18:07 +01:00
|
|
|
|
2019-05-15 01:33:04 +02:00
|
|
|
return created;
|
2019-02-15 19:18:48 +01:00
|
|
|
}
|
|
|
|
|
2019-05-15 00:03:29 +02:00
|
|
|
/* Find/create a single nexthop */
|
2020-10-12 23:31:36 +02:00
|
|
|
static struct nhg_hash_entry *zebra_nhg_find_nexthop(uint32_t id,
|
|
|
|
struct nexthop *nh,
|
|
|
|
afi_t afi, int type,
|
|
|
|
bool from_dplane)
|
2019-03-29 15:51:07 +01:00
|
|
|
{
|
2019-08-26 23:09:31 +02:00
|
|
|
struct nhg_hash_entry *nhe = NULL;
|
2019-05-15 00:03:29 +02:00
|
|
|
struct nexthop_group nhg = {};
|
2020-01-13 22:11:46 +01:00
|
|
|
vrf_id_t vrf_id = !vrf_is_backend_netns() ? VRF_DEFAULT : nh->vrf_id;
|
2019-05-15 00:03:29 +02:00
|
|
|
|
2019-11-22 21:30:53 +01:00
|
|
|
nexthop_group_add_sorted(&nhg, nh);
|
2019-05-15 00:03:29 +02:00
|
|
|
|
2020-10-12 23:31:36 +02:00
|
|
|
zebra_nhg_find(&nhe, id, &nhg, NULL, vrf_id, afi, type, from_dplane);
|
2019-07-03 18:09:20 +02:00
|
|
|
|
2020-03-10 15:50:40 +01:00
|
|
|
if (IS_ZEBRA_DEBUG_NHG_DETAIL)
|
2022-06-14 21:02:27 +02:00
|
|
|
zlog_debug("%s: nh %pNHv => %p (%pNG)", __func__, nh, nhe, nhe);
|
2020-03-10 15:50:40 +01:00
|
|
|
|
2019-08-26 23:09:31 +02:00
|
|
|
return nhe;
|
2019-05-15 00:03:29 +02:00
|
|
|
}
|
|
|
|
|
2019-08-12 23:58:59 +02:00
|
|
|
static uint32_t nhg_ctx_get_id(const struct nhg_ctx *ctx)
|
|
|
|
{
|
|
|
|
return ctx->id;
|
|
|
|
}
|
|
|
|
|
2019-08-13 02:09:59 +02:00
|
|
|
static void nhg_ctx_set_status(struct nhg_ctx *ctx, enum nhg_ctx_status status)
|
2019-05-15 00:03:29 +02:00
|
|
|
{
|
|
|
|
ctx->status = status;
|
|
|
|
}
|
|
|
|
|
2019-08-13 02:09:59 +02:00
|
|
|
static enum nhg_ctx_status nhg_ctx_get_status(const struct nhg_ctx *ctx)
|
2019-05-15 00:03:29 +02:00
|
|
|
{
|
|
|
|
return ctx->status;
|
|
|
|
}
|
|
|
|
|
|
|
|
static void nhg_ctx_set_op(struct nhg_ctx *ctx, enum nhg_ctx_op_e op)
|
|
|
|
{
|
|
|
|
ctx->op = op;
|
|
|
|
}
|
|
|
|
|
|
|
|
static enum nhg_ctx_op_e nhg_ctx_get_op(const struct nhg_ctx *ctx)
|
|
|
|
{
|
|
|
|
return ctx->op;
|
|
|
|
}
|
|
|
|
|
2019-08-12 23:58:59 +02:00
|
|
|
static vrf_id_t nhg_ctx_get_vrf_id(const struct nhg_ctx *ctx)
|
|
|
|
{
|
|
|
|
return ctx->vrf_id;
|
|
|
|
}
|
|
|
|
|
|
|
|
static int nhg_ctx_get_type(const struct nhg_ctx *ctx)
|
|
|
|
{
|
|
|
|
return ctx->type;
|
|
|
|
}
|
|
|
|
|
|
|
|
static int nhg_ctx_get_afi(const struct nhg_ctx *ctx)
|
|
|
|
{
|
|
|
|
return ctx->afi;
|
|
|
|
}
|
|
|
|
|
|
|
|
static struct nexthop *nhg_ctx_get_nh(struct nhg_ctx *ctx)
|
|
|
|
{
|
|
|
|
return &ctx->u.nh;
|
|
|
|
}
|
|
|
|
|
2024-10-07 18:40:46 +02:00
|
|
|
static uint16_t nhg_ctx_get_count(const struct nhg_ctx *ctx)
|
2019-08-12 23:58:59 +02:00
|
|
|
{
|
|
|
|
return ctx->count;
|
|
|
|
}
|
|
|
|
|
|
|
|
static struct nh_grp *nhg_ctx_get_grp(struct nhg_ctx *ctx)
|
|
|
|
{
|
|
|
|
return ctx->u.grp;
|
|
|
|
}
|
|
|
|
|
2022-10-22 21:37:27 +02:00
|
|
|
static struct nhg_resilience *nhg_ctx_get_resilience(struct nhg_ctx *ctx)
|
|
|
|
{
|
|
|
|
return &ctx->resilience;
|
|
|
|
}
|
|
|
|
|
2020-03-11 14:06:34 +01:00
|
|
|
static struct nhg_ctx *nhg_ctx_new(void)
|
2019-10-29 02:39:18 +01:00
|
|
|
{
|
2020-03-11 14:06:34 +01:00
|
|
|
struct nhg_ctx *new;
|
2019-10-29 02:39:18 +01:00
|
|
|
|
|
|
|
new = XCALLOC(MTYPE_NHG_CTX, sizeof(struct nhg_ctx));
|
|
|
|
|
|
|
|
return new;
|
|
|
|
}
|
|
|
|
|
2021-04-15 20:20:39 +02:00
|
|
|
void nhg_ctx_free(struct nhg_ctx **ctx)
|
2019-10-29 02:39:18 +01:00
|
|
|
{
|
|
|
|
struct nexthop *nh;
|
|
|
|
|
|
|
|
if (ctx == NULL)
|
|
|
|
return;
|
|
|
|
|
|
|
|
assert((*ctx) != NULL);
|
|
|
|
|
|
|
|
if (nhg_ctx_get_count(*ctx))
|
|
|
|
goto done;
|
|
|
|
|
|
|
|
nh = nhg_ctx_get_nh(*ctx);
|
|
|
|
|
|
|
|
nexthop_del_labels(nh);
|
2021-04-05 15:29:12 +02:00
|
|
|
nexthop_del_srv6_seg6local(nh);
|
|
|
|
nexthop_del_srv6_seg6(nh);
|
2019-10-29 02:39:18 +01:00
|
|
|
|
|
|
|
done:
|
|
|
|
XFREE(MTYPE_NHG_CTX, *ctx);
|
|
|
|
}
|
|
|
|
|
2024-10-07 18:40:46 +02:00
|
|
|
static struct nhg_ctx *nhg_ctx_init(uint32_t id, struct nexthop *nh, struct nh_grp *grp,
|
|
|
|
vrf_id_t vrf_id, afi_t afi, int type, uint16_t count,
|
2022-10-22 21:37:27 +02:00
|
|
|
struct nhg_resilience *resilience)
|
2019-08-01 20:53:06 +02:00
|
|
|
{
|
|
|
|
struct nhg_ctx *ctx = NULL;
|
|
|
|
|
|
|
|
ctx = nhg_ctx_new();
|
|
|
|
|
|
|
|
ctx->id = id;
|
|
|
|
ctx->vrf_id = vrf_id;
|
|
|
|
ctx->afi = afi;
|
|
|
|
ctx->type = type;
|
|
|
|
ctx->count = count;
|
|
|
|
|
2022-10-22 21:37:27 +02:00
|
|
|
if (resilience)
|
|
|
|
ctx->resilience = *resilience;
|
|
|
|
|
2019-08-01 20:53:06 +02:00
|
|
|
if (count)
|
|
|
|
/* Copy over the array */
|
|
|
|
memcpy(&ctx->u.grp, grp, count * sizeof(struct nh_grp));
|
|
|
|
else if (nh)
|
|
|
|
ctx->u.nh = *nh;
|
|
|
|
|
|
|
|
return ctx;
|
|
|
|
}
|
|
|
|
|
2024-02-07 20:28:37 +01:00
|
|
|
static void zebra_nhg_set_valid(struct nhg_hash_entry *nhe, bool valid)
|
2019-10-23 22:49:07 +02:00
|
|
|
{
|
|
|
|
struct nhg_connected *rb_node_dep;
|
2024-12-29 07:40:37 +01:00
|
|
|
bool dependent_valid = valid;
|
2019-10-23 22:49:07 +02:00
|
|
|
|
2024-02-07 20:28:37 +01:00
|
|
|
if (valid)
|
|
|
|
SET_FLAG(nhe->flags, NEXTHOP_GROUP_VALID);
|
|
|
|
else {
|
|
|
|
UNSET_FLAG(nhe->flags, NEXTHOP_GROUP_VALID);
|
2019-10-23 22:49:07 +02:00
|
|
|
|
2024-02-07 20:28:37 +01:00
|
|
|
/* If we're in shutdown, this interface event needs to clean
|
|
|
|
* up installed NHGs, so don't clear that flag directly.
|
|
|
|
*/
|
|
|
|
if (!zebra_router_in_shutdown())
|
|
|
|
UNSET_FLAG(nhe->flags, NEXTHOP_GROUP_INSTALLED);
|
|
|
|
}
|
2022-06-15 22:27:07 +02:00
|
|
|
|
2019-10-23 22:49:07 +02:00
|
|
|
/* Update validity of nexthops depending on it */
|
zebra: Properly note that a nhg's nexthop has gone down
Current code when a link is set down is to just mark the
nexthop group as not properly setup. Leaving situations
where when an interface goes down and show output is
entered we see incorrect state. This is true for anything
that would be checking those flags at that point in time.
Modify the interface down nexthop group code to notice the
nexthops appropriately ( and I mean set the appropriate flags )
and to allow a `show ip route` command to actually display
what is going on with the nexthops.
eva# show ip route 1.0.0.0
Routing entry for 1.0.0.0/32
Known via "sharp", distance 150, metric 0, best
Last update 00:00:06 ago
* 192.168.44.33, via dummy1, weight 1
* 192.168.45.33, via dummy2, weight 1
sharpd@eva:~/frr1$ sudo ip link set dummy2 down
eva# show ip route 1.0.0.0
Routing entry for 1.0.0.0/32
Known via "sharp", distance 150, metric 0, best
Last update 00:00:12 ago
* 192.168.44.33, via dummy1, weight 1
192.168.45.33, via dummy2 inactive, weight 1
Notice now that the 1.0.0.0/32 route now correctly
displays the route for the nexthop group entry.
Signed-off-by: Donald Sharp <sharpd@nvidia.com>
2024-02-07 20:56:15 +01:00
|
|
|
frr_each (nhg_connected_tree, &nhe->nhg_dependents, rb_node_dep) {
|
2024-12-29 07:40:37 +01:00
|
|
|
dependent_valid = valid;
|
zebra: Properly note that a nhg's nexthop has gone down
Current code when a link is set down is to just mark the
nexthop group as not properly setup. Leaving situations
where when an interface goes down and show output is
entered we see incorrect state. This is true for anything
that would be checking those flags at that point in time.
Modify the interface down nexthop group code to notice the
nexthops appropriately ( and I mean set the appropriate flags )
and to allow a `show ip route` command to actually display
what is going on with the nexthops.
eva# show ip route 1.0.0.0
Routing entry for 1.0.0.0/32
Known via "sharp", distance 150, metric 0, best
Last update 00:00:06 ago
* 192.168.44.33, via dummy1, weight 1
* 192.168.45.33, via dummy2, weight 1
sharpd@eva:~/frr1$ sudo ip link set dummy2 down
eva# show ip route 1.0.0.0
Routing entry for 1.0.0.0/32
Known via "sharp", distance 150, metric 0, best
Last update 00:00:12 ago
* 192.168.44.33, via dummy1, weight 1
192.168.45.33, via dummy2 inactive, weight 1
Notice now that the 1.0.0.0/32 route now correctly
displays the route for the nexthop group entry.
Signed-off-by: Donald Sharp <sharpd@nvidia.com>
2024-02-07 20:56:15 +01:00
|
|
|
if (!valid) {
|
|
|
|
/*
|
|
|
|
* Grab the first nexthop from the depending nexthop group
|
|
|
|
* then let's find the nexthop in that group that matches
|
|
|
|
* my individual nexthop and mark it as no longer ACTIVE
|
|
|
|
*/
|
|
|
|
struct nexthop *nexthop = rb_node_dep->nhe->nhg.nexthop;
|
|
|
|
|
|
|
|
while (nexthop) {
|
2024-12-29 07:40:37 +01:00
|
|
|
if (nexthop_same(nexthop, nhe->nhg.nexthop)) {
|
|
|
|
/* Invalid Nexthop */
|
|
|
|
UNSET_FLAG(nexthop->flags, NEXTHOP_FLAG_ACTIVE);
|
|
|
|
} else {
|
|
|
|
/*
|
|
|
|
* If other nexthops in the nexthop
|
|
|
|
* group are valid then we can continue
|
|
|
|
* to use this nexthop group as valid
|
|
|
|
*/
|
|
|
|
if (CHECK_FLAG(nexthop->flags, NEXTHOP_FLAG_ACTIVE))
|
|
|
|
dependent_valid = true;
|
|
|
|
}
|
zebra: Properly note that a nhg's nexthop has gone down
Current code when a link is set down is to just mark the
nexthop group as not properly setup. Leaving situations
where when an interface goes down and show output is
entered we see incorrect state. This is true for anything
that would be checking those flags at that point in time.
Modify the interface down nexthop group code to notice the
nexthops appropriately ( and I mean set the appropriate flags )
and to allow a `show ip route` command to actually display
what is going on with the nexthops.
eva# show ip route 1.0.0.0
Routing entry for 1.0.0.0/32
Known via "sharp", distance 150, metric 0, best
Last update 00:00:06 ago
* 192.168.44.33, via dummy1, weight 1
* 192.168.45.33, via dummy2, weight 1
sharpd@eva:~/frr1$ sudo ip link set dummy2 down
eva# show ip route 1.0.0.0
Routing entry for 1.0.0.0/32
Known via "sharp", distance 150, metric 0, best
Last update 00:00:12 ago
* 192.168.44.33, via dummy1, weight 1
192.168.45.33, via dummy2 inactive, weight 1
Notice now that the 1.0.0.0/32 route now correctly
displays the route for the nexthop group entry.
Signed-off-by: Donald Sharp <sharpd@nvidia.com>
2024-02-07 20:56:15 +01:00
|
|
|
nexthop = nexthop->next;
|
|
|
|
}
|
|
|
|
}
|
2024-12-29 07:40:37 +01:00
|
|
|
zebra_nhg_set_valid(rb_node_dep->nhe, dependent_valid);
|
zebra: Properly note that a nhg's nexthop has gone down
Current code when a link is set down is to just mark the
nexthop group as not properly setup. Leaving situations
where when an interface goes down and show output is
entered we see incorrect state. This is true for anything
that would be checking those flags at that point in time.
Modify the interface down nexthop group code to notice the
nexthops appropriately ( and I mean set the appropriate flags )
and to allow a `show ip route` command to actually display
what is going on with the nexthops.
eva# show ip route 1.0.0.0
Routing entry for 1.0.0.0/32
Known via "sharp", distance 150, metric 0, best
Last update 00:00:06 ago
* 192.168.44.33, via dummy1, weight 1
* 192.168.45.33, via dummy2, weight 1
sharpd@eva:~/frr1$ sudo ip link set dummy2 down
eva# show ip route 1.0.0.0
Routing entry for 1.0.0.0/32
Known via "sharp", distance 150, metric 0, best
Last update 00:00:12 ago
* 192.168.44.33, via dummy1, weight 1
192.168.45.33, via dummy2 inactive, weight 1
Notice now that the 1.0.0.0/32 route now correctly
displays the route for the nexthop group entry.
Signed-off-by: Donald Sharp <sharpd@nvidia.com>
2024-02-07 20:56:15 +01:00
|
|
|
}
|
2019-10-23 22:49:07 +02:00
|
|
|
}
|
|
|
|
|
|
|
|
void zebra_nhg_check_valid(struct nhg_hash_entry *nhe)
|
|
|
|
{
|
|
|
|
struct nhg_connected *rb_node_dep = NULL;
|
|
|
|
bool valid = false;
|
|
|
|
|
zebra: Properly note that a nhg's nexthop has gone down
Current code when a link is set down is to just mark the
nexthop group as not properly setup. Leaving situations
where when an interface goes down and show output is
entered we see incorrect state. This is true for anything
that would be checking those flags at that point in time.
Modify the interface down nexthop group code to notice the
nexthops appropriately ( and I mean set the appropriate flags )
and to allow a `show ip route` command to actually display
what is going on with the nexthops.
eva# show ip route 1.0.0.0
Routing entry for 1.0.0.0/32
Known via "sharp", distance 150, metric 0, best
Last update 00:00:06 ago
* 192.168.44.33, via dummy1, weight 1
* 192.168.45.33, via dummy2, weight 1
sharpd@eva:~/frr1$ sudo ip link set dummy2 down
eva# show ip route 1.0.0.0
Routing entry for 1.0.0.0/32
Known via "sharp", distance 150, metric 0, best
Last update 00:00:12 ago
* 192.168.44.33, via dummy1, weight 1
192.168.45.33, via dummy2 inactive, weight 1
Notice now that the 1.0.0.0/32 route now correctly
displays the route for the nexthop group entry.
Signed-off-by: Donald Sharp <sharpd@nvidia.com>
2024-02-07 20:56:15 +01:00
|
|
|
/*
|
zebra: Properly note that a nhg's nexthop has gone down
Current code when a link is set down is to just mark the
nexthop group as not properly setup. Leaving situations
where when an interface goes down and show output is
entered we see incorrect state. This is true for anything
that would be checking those flags at that point in time.
Modify the interface down nexthop group code to notice the
nexthops appropriately ( and I mean set the appropriate flags )
and to allow a `show ip route` command to actually display
what is going on with the nexthops.
eva# show ip route 1.0.0.0
Routing entry for 1.0.0.0/32
Known via "sharp", distance 150, metric 0, best
Last update 00:00:06 ago
* 192.168.44.33, via dummy1, weight 1
* 192.168.45.33, via dummy2, weight 1
sharpd@eva:~/frr1$ sudo ip link set dummy2 down
eva# show ip route 1.0.0.0
Routing entry for 1.0.0.0/32
Known via "sharp", distance 150, metric 0, best
Last update 00:00:12 ago
* 192.168.44.33, via dummy1, weight 1
192.168.45.33, via dummy2 inactive, weight 1
Notice now that the 1.0.0.0/32 route now correctly
displays the route for the nexthop group entry.
Signed-off-by: Donald Sharp <sharpd@nvidia.com>
2024-02-07 20:56:15 +01:00
|
|
|
* If I have other nhe's depending on me, or I have nothing
|
|
|
|
* I am depending on then this is a
|
zebra: Properly note that a nhg's nexthop has gone down
Current code when a link is set down is to just mark the
nexthop group as not properly setup. Leaving situations
where when an interface goes down and show output is
entered we see incorrect state. This is true for anything
that would be checking those flags at that point in time.
Modify the interface down nexthop group code to notice the
nexthops appropriately ( and I mean set the appropriate flags )
and to allow a `show ip route` command to actually display
what is going on with the nexthops.
eva# show ip route 1.0.0.0
Routing entry for 1.0.0.0/32
Known via "sharp", distance 150, metric 0, best
Last update 00:00:06 ago
* 192.168.44.33, via dummy1, weight 1
* 192.168.45.33, via dummy2, weight 1
sharpd@eva:~/frr1$ sudo ip link set dummy2 down
eva# show ip route 1.0.0.0
Routing entry for 1.0.0.0/32
Known via "sharp", distance 150, metric 0, best
Last update 00:00:12 ago
* 192.168.44.33, via dummy1, weight 1
192.168.45.33, via dummy2 inactive, weight 1
Notice now that the 1.0.0.0/32 route now correctly
displays the route for the nexthop group entry.
Signed-off-by: Donald Sharp <sharpd@nvidia.com>
2024-02-07 20:56:15 +01:00
|
|
|
* singleton nhe so set this nexthops flag as appropriate.
|
|
|
|
*/
|
zebra: Properly note that a nhg's nexthop has gone down
Current code when a link is set down is to just mark the
nexthop group as not properly setup. Leaving situations
where when an interface goes down and show output is
entered we see incorrect state. This is true for anything
that would be checking those flags at that point in time.
Modify the interface down nexthop group code to notice the
nexthops appropriately ( and I mean set the appropriate flags )
and to allow a `show ip route` command to actually display
what is going on with the nexthops.
eva# show ip route 1.0.0.0
Routing entry for 1.0.0.0/32
Known via "sharp", distance 150, metric 0, best
Last update 00:00:06 ago
* 192.168.44.33, via dummy1, weight 1
* 192.168.45.33, via dummy2, weight 1
sharpd@eva:~/frr1$ sudo ip link set dummy2 down
eva# show ip route 1.0.0.0
Routing entry for 1.0.0.0/32
Known via "sharp", distance 150, metric 0, best
Last update 00:00:12 ago
* 192.168.44.33, via dummy1, weight 1
192.168.45.33, via dummy2 inactive, weight 1
Notice now that the 1.0.0.0/32 route now correctly
displays the route for the nexthop group entry.
Signed-off-by: Donald Sharp <sharpd@nvidia.com>
2024-02-07 20:56:15 +01:00
|
|
|
if (nhg_connected_tree_count(&nhe->nhg_depends) ||
|
|
|
|
nhg_connected_tree_count(&nhe->nhg_dependents) == 0) {
|
|
|
|
UNSET_FLAG(nhe->nhg.nexthop->flags, NEXTHOP_FLAG_FIB);
|
zebra: Properly note that a nhg's nexthop has gone down
Current code when a link is set down is to just mark the
nexthop group as not properly setup. Leaving situations
where when an interface goes down and show output is
entered we see incorrect state. This is true for anything
that would be checking those flags at that point in time.
Modify the interface down nexthop group code to notice the
nexthops appropriately ( and I mean set the appropriate flags )
and to allow a `show ip route` command to actually display
what is going on with the nexthops.
eva# show ip route 1.0.0.0
Routing entry for 1.0.0.0/32
Known via "sharp", distance 150, metric 0, best
Last update 00:00:06 ago
* 192.168.44.33, via dummy1, weight 1
* 192.168.45.33, via dummy2, weight 1
sharpd@eva:~/frr1$ sudo ip link set dummy2 down
eva# show ip route 1.0.0.0
Routing entry for 1.0.0.0/32
Known via "sharp", distance 150, metric 0, best
Last update 00:00:12 ago
* 192.168.44.33, via dummy1, weight 1
192.168.45.33, via dummy2 inactive, weight 1
Notice now that the 1.0.0.0/32 route now correctly
displays the route for the nexthop group entry.
Signed-off-by: Donald Sharp <sharpd@nvidia.com>
2024-02-07 20:56:15 +01:00
|
|
|
UNSET_FLAG(nhe->nhg.nexthop->flags, NEXTHOP_FLAG_ACTIVE);
|
zebra: Properly note that a nhg's nexthop has gone down
Current code when a link is set down is to just mark the
nexthop group as not properly setup. Leaving situations
where when an interface goes down and show output is
entered we see incorrect state. This is true for anything
that would be checking those flags at that point in time.
Modify the interface down nexthop group code to notice the
nexthops appropriately ( and I mean set the appropriate flags )
and to allow a `show ip route` command to actually display
what is going on with the nexthops.
eva# show ip route 1.0.0.0
Routing entry for 1.0.0.0/32
Known via "sharp", distance 150, metric 0, best
Last update 00:00:06 ago
* 192.168.44.33, via dummy1, weight 1
* 192.168.45.33, via dummy2, weight 1
sharpd@eva:~/frr1$ sudo ip link set dummy2 down
eva# show ip route 1.0.0.0
Routing entry for 1.0.0.0/32
Known via "sharp", distance 150, metric 0, best
Last update 00:00:12 ago
* 192.168.44.33, via dummy1, weight 1
192.168.45.33, via dummy2 inactive, weight 1
Notice now that the 1.0.0.0/32 route now correctly
displays the route for the nexthop group entry.
Signed-off-by: Donald Sharp <sharpd@nvidia.com>
2024-02-07 20:56:15 +01:00
|
|
|
}
|
zebra: Properly note that a nhg's nexthop has gone down
Current code when a link is set down is to just mark the
nexthop group as not properly setup. Leaving situations
where when an interface goes down and show output is
entered we see incorrect state. This is true for anything
that would be checking those flags at that point in time.
Modify the interface down nexthop group code to notice the
nexthops appropriately ( and I mean set the appropriate flags )
and to allow a `show ip route` command to actually display
what is going on with the nexthops.
eva# show ip route 1.0.0.0
Routing entry for 1.0.0.0/32
Known via "sharp", distance 150, metric 0, best
Last update 00:00:06 ago
* 192.168.44.33, via dummy1, weight 1
* 192.168.45.33, via dummy2, weight 1
sharpd@eva:~/frr1$ sudo ip link set dummy2 down
eva# show ip route 1.0.0.0
Routing entry for 1.0.0.0/32
Known via "sharp", distance 150, metric 0, best
Last update 00:00:12 ago
* 192.168.44.33, via dummy1, weight 1
192.168.45.33, via dummy2 inactive, weight 1
Notice now that the 1.0.0.0/32 route now correctly
displays the route for the nexthop group entry.
Signed-off-by: Donald Sharp <sharpd@nvidia.com>
2024-02-07 20:56:15 +01:00
|
|
|
|
2019-10-23 22:49:07 +02:00
|
|
|
/* If anthing else in the group is valid, the group is valid */
|
|
|
|
frr_each(nhg_connected_tree, &nhe->nhg_depends, rb_node_dep) {
|
|
|
|
if (CHECK_FLAG(rb_node_dep->nhe->flags, NEXTHOP_GROUP_VALID)) {
|
|
|
|
valid = true;
|
2024-01-21 00:50:49 +01:00
|
|
|
break;
|
2019-10-23 22:49:07 +02:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2024-02-07 20:28:37 +01:00
|
|
|
zebra_nhg_set_valid(nhe, valid);
|
2019-10-23 22:49:07 +02:00
|
|
|
}
|
|
|
|
|
2020-05-10 22:36:49 +02:00
|
|
|
static void zebra_nhg_release_all_deps(struct nhg_hash_entry *nhe)
|
2019-08-01 20:24:35 +02:00
|
|
|
{
|
|
|
|
/* Remove it from any lists it may be on */
|
|
|
|
zebra_nhg_depends_release(nhe);
|
|
|
|
zebra_nhg_dependents_release(nhe);
|
2024-02-08 17:56:40 +01:00
|
|
|
if (nhe->ifp) {
|
|
|
|
struct zebra_if *zif = nhe->ifp->info;
|
|
|
|
|
|
|
|
nhg_connected_tree_del_nhe(&zif->nhg_dependents, nhe);
|
|
|
|
}
|
2020-05-10 22:36:49 +02:00
|
|
|
}
|
|
|
|
|
|
|
|
static void zebra_nhg_release(struct nhg_hash_entry *nhe)
|
|
|
|
{
|
|
|
|
if (IS_ZEBRA_DEBUG_NHG_DETAIL)
|
2022-06-14 21:02:27 +02:00
|
|
|
zlog_debug("%s: nhe %p (%pNG)", __func__, nhe, nhe);
|
2020-05-10 22:36:49 +02:00
|
|
|
|
|
|
|
zebra_nhg_release_all_deps(nhe);
|
2019-08-01 20:24:35 +02:00
|
|
|
|
|
|
|
/*
|
2020-05-05 21:57:35 +02:00
|
|
|
* If its not zebra owned, we didn't store it here and have to be
|
2019-08-01 20:24:35 +02:00
|
|
|
* sure we don't clear one thats actually being used.
|
|
|
|
*/
|
2020-05-24 22:03:01 +02:00
|
|
|
if (nhe->id < ZEBRA_NHG_PROTO_LOWER)
|
2019-08-01 20:24:35 +02:00
|
|
|
hash_release(zrouter.nhgs, nhe);
|
|
|
|
|
|
|
|
hash_release(zrouter.nhgs_id, nhe);
|
|
|
|
}
|
|
|
|
|
|
|
|
static void zebra_nhg_handle_uninstall(struct nhg_hash_entry *nhe)
|
|
|
|
{
|
2019-10-04 22:48:20 +02:00
|
|
|
zebra_nhg_release(nhe);
|
2019-08-01 20:24:35 +02:00
|
|
|
zebra_nhg_free(nhe);
|
|
|
|
}
|
|
|
|
|
2023-04-29 04:09:55 +02:00
|
|
|
static void zebra_nhg_handle_install(struct nhg_hash_entry *nhe, bool install)
|
2019-10-23 22:49:07 +02:00
|
|
|
{
|
|
|
|
/* Update validity of groups depending on it */
|
|
|
|
struct nhg_connected *rb_node_dep;
|
|
|
|
|
2023-04-29 04:09:55 +02:00
|
|
|
frr_each_safe (nhg_connected_tree, &nhe->nhg_dependents, rb_node_dep) {
|
2024-02-07 20:28:37 +01:00
|
|
|
zebra_nhg_set_valid(rb_node_dep->nhe, true);
|
2023-04-29 04:09:55 +02:00
|
|
|
/* install dependent NHG into kernel */
|
|
|
|
if (install) {
|
|
|
|
if (IS_ZEBRA_DEBUG_NHG_DETAIL)
|
|
|
|
zlog_debug(
|
|
|
|
"%s nh id %u (flags 0x%x) associated dependent NHG %pNG install",
|
|
|
|
__func__, nhe->id, nhe->flags,
|
|
|
|
rb_node_dep->nhe);
|
2024-09-20 16:58:46 +02:00
|
|
|
zebra_nhg_install_kernel(rb_node_dep->nhe,
|
|
|
|
ZEBRA_ROUTE_MAX);
|
2023-04-29 04:09:55 +02:00
|
|
|
}
|
|
|
|
}
|
2019-10-23 22:49:07 +02:00
|
|
|
}
|
|
|
|
|
2019-08-01 20:24:35 +02:00
|
|
|
/*
|
|
|
|
* The kernel/other program has changed the state of a nexthop object we are
|
|
|
|
* using.
|
|
|
|
*/
|
|
|
|
static void zebra_nhg_handle_kernel_state_change(struct nhg_hash_entry *nhe,
|
|
|
|
bool is_delete)
|
|
|
|
{
|
|
|
|
if (nhe->refcnt) {
|
|
|
|
flog_err(
|
|
|
|
EC_ZEBRA_NHG_SYNC,
|
2022-06-14 21:02:27 +02:00
|
|
|
"Kernel %s a nexthop group with ID (%pNG) that we are still using for a route, sending it back down",
|
|
|
|
(is_delete ? "deleted" : "updated"), nhe);
|
2019-08-01 20:24:35 +02:00
|
|
|
|
|
|
|
UNSET_FLAG(nhe->flags, NEXTHOP_GROUP_INSTALLED);
|
2024-08-29 17:29:55 +02:00
|
|
|
zebra_nhg_install_kernel(nhe, ZEBRA_ROUTE_MAX);
|
2019-10-04 22:48:20 +02:00
|
|
|
} else
|
2019-08-01 20:24:35 +02:00
|
|
|
zebra_nhg_handle_uninstall(nhe);
|
|
|
|
}
|
|
|
|
|
2019-05-15 00:03:29 +02:00
|
|
|
static int nhg_ctx_process_new(struct nhg_ctx *ctx)
|
|
|
|
{
|
|
|
|
struct nexthop_group *nhg = NULL;
|
2019-07-24 18:27:40 +02:00
|
|
|
struct nhg_connected_tree_head nhg_depends = {};
|
2019-08-01 20:24:35 +02:00
|
|
|
struct nhg_hash_entry *lookup = NULL;
|
2019-03-29 15:51:07 +01:00
|
|
|
struct nhg_hash_entry *nhe = NULL;
|
|
|
|
|
2019-08-12 23:58:59 +02:00
|
|
|
uint32_t id = nhg_ctx_get_id(ctx);
|
2024-10-07 18:40:46 +02:00
|
|
|
uint16_t count = nhg_ctx_get_count(ctx);
|
2019-08-12 23:58:59 +02:00
|
|
|
vrf_id_t vrf_id = nhg_ctx_get_vrf_id(ctx);
|
|
|
|
int type = nhg_ctx_get_type(ctx);
|
|
|
|
afi_t afi = nhg_ctx_get_afi(ctx);
|
|
|
|
|
|
|
|
lookup = zebra_nhg_lookup_id(id);
|
2019-08-01 20:24:35 +02:00
|
|
|
|
2020-03-10 15:50:40 +01:00
|
|
|
if (IS_ZEBRA_DEBUG_NHG_DETAIL)
|
|
|
|
zlog_debug("%s: id %u, count %d, lookup => %p",
|
|
|
|
__func__, id, count, lookup);
|
|
|
|
|
2019-08-01 20:24:35 +02:00
|
|
|
if (lookup) {
|
|
|
|
/* This is already present in our table, hence an update
|
|
|
|
* that we did not initate.
|
|
|
|
*/
|
|
|
|
zebra_nhg_handle_kernel_state_change(lookup, false);
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
2019-08-12 23:58:59 +02:00
|
|
|
if (nhg_ctx_get_count(ctx)) {
|
2019-05-15 00:03:29 +02:00
|
|
|
nhg = nexthop_group_new();
|
2019-08-13 02:09:59 +02:00
|
|
|
if (zebra_nhg_process_grp(nhg, &nhg_depends,
|
2022-10-22 21:37:27 +02:00
|
|
|
nhg_ctx_get_grp(ctx), count,
|
|
|
|
nhg_ctx_get_resilience(ctx))) {
|
2019-08-13 02:09:59 +02:00
|
|
|
depends_decrement_free(&nhg_depends);
|
2019-10-04 20:04:43 +02:00
|
|
|
nexthop_group_delete(&nhg);
|
2019-09-03 22:12:06 +02:00
|
|
|
return -ENOENT;
|
2019-08-13 02:09:59 +02:00
|
|
|
}
|
|
|
|
|
2020-05-05 21:57:35 +02:00
|
|
|
if (!zebra_nhg_find(&nhe, id, nhg, &nhg_depends, vrf_id, afi,
|
2020-10-12 23:31:36 +02:00
|
|
|
type, true))
|
2019-08-01 20:07:04 +02:00
|
|
|
depends_decrement_free(&nhg_depends);
|
2019-05-15 01:33:04 +02:00
|
|
|
|
2019-05-15 00:03:29 +02:00
|
|
|
/* These got copied over in zebra_nhg_alloc() */
|
2019-10-04 20:04:43 +02:00
|
|
|
nexthop_group_delete(&nhg);
|
2019-08-01 20:07:04 +02:00
|
|
|
} else
|
2020-10-12 23:31:36 +02:00
|
|
|
nhe = zebra_nhg_find_nexthop(id, nhg_ctx_get_nh(ctx), afi, type,
|
|
|
|
true);
|
2019-05-15 00:03:29 +02:00
|
|
|
|
2020-05-01 00:35:13 +02:00
|
|
|
if (!nhe) {
|
2019-05-15 00:03:29 +02:00
|
|
|
flog_err(
|
|
|
|
EC_ZEBRA_TABLE_LOOKUP_FAILED,
|
|
|
|
"Zebra failed to find or create a nexthop hash entry for ID (%u)",
|
2019-08-12 23:58:59 +02:00
|
|
|
id);
|
2019-05-15 00:03:29 +02:00
|
|
|
return -1;
|
|
|
|
}
|
|
|
|
|
2020-05-01 00:35:13 +02:00
|
|
|
if (IS_ZEBRA_DEBUG_NHG_DETAIL)
|
2022-06-14 21:02:27 +02:00
|
|
|
zlog_debug("%s: nhe %p (%pNG) is new", __func__, nhe, nhe);
|
2020-05-01 00:35:13 +02:00
|
|
|
|
2021-04-22 23:21:12 +02:00
|
|
|
/*
|
|
|
|
* If daemon nhg from the kernel, add a refcnt here to indicate the
|
|
|
|
* daemon owns it.
|
|
|
|
*/
|
|
|
|
if (PROTO_OWNED(nhe))
|
|
|
|
zebra_nhg_increment_ref(nhe);
|
|
|
|
|
2020-05-01 00:35:13 +02:00
|
|
|
SET_FLAG(nhe->flags, NEXTHOP_GROUP_VALID);
|
|
|
|
SET_FLAG(nhe->flags, NEXTHOP_GROUP_INSTALLED);
|
|
|
|
|
2019-05-15 00:03:29 +02:00
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
2019-08-01 20:24:35 +02:00
|
|
|
static int nhg_ctx_process_del(struct nhg_ctx *ctx)
|
|
|
|
{
|
|
|
|
struct nhg_hash_entry *nhe = NULL;
|
2019-08-12 23:58:59 +02:00
|
|
|
uint32_t id = nhg_ctx_get_id(ctx);
|
2019-08-01 20:24:35 +02:00
|
|
|
|
2019-08-12 23:58:59 +02:00
|
|
|
nhe = zebra_nhg_lookup_id(id);
|
2019-08-01 20:24:35 +02:00
|
|
|
|
|
|
|
if (!nhe) {
|
|
|
|
flog_warn(
|
|
|
|
EC_ZEBRA_BAD_NHG_MESSAGE,
|
|
|
|
"Kernel delete message received for nexthop group ID (%u) that we do not have in our ID table",
|
2019-08-12 23:58:59 +02:00
|
|
|
id);
|
2019-08-01 20:53:06 +02:00
|
|
|
return -1;
|
2019-08-01 20:24:35 +02:00
|
|
|
}
|
|
|
|
|
|
|
|
zebra_nhg_handle_kernel_state_change(nhe, true);
|
|
|
|
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
2019-10-29 02:39:18 +01:00
|
|
|
static void nhg_ctx_fini(struct nhg_ctx **ctx)
|
2019-05-15 00:03:29 +02:00
|
|
|
{
|
|
|
|
/*
|
|
|
|
* Just freeing for now, maybe do something more in the future
|
|
|
|
* based on flag.
|
|
|
|
*/
|
|
|
|
|
2019-10-29 01:30:06 +01:00
|
|
|
nhg_ctx_free(ctx);
|
2019-05-15 00:03:29 +02:00
|
|
|
}
|
|
|
|
|
2019-08-13 02:09:59 +02:00
|
|
|
static int queue_add(struct nhg_ctx *ctx)
|
|
|
|
{
|
|
|
|
/* If its queued or already processed do nothing */
|
|
|
|
if (nhg_ctx_get_status(ctx) == NHG_CTX_QUEUED)
|
|
|
|
return 0;
|
|
|
|
|
2021-04-15 20:20:39 +02:00
|
|
|
if (rib_queue_nhg_ctx_add(ctx)) {
|
2019-08-13 02:09:59 +02:00
|
|
|
nhg_ctx_set_status(ctx, NHG_CTX_FAILURE);
|
|
|
|
return -1;
|
|
|
|
}
|
|
|
|
|
|
|
|
nhg_ctx_set_status(ctx, NHG_CTX_QUEUED);
|
|
|
|
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
2019-05-15 00:03:29 +02:00
|
|
|
int nhg_ctx_process(struct nhg_ctx *ctx)
|
|
|
|
{
|
|
|
|
int ret = 0;
|
|
|
|
|
|
|
|
switch (nhg_ctx_get_op(ctx)) {
|
|
|
|
case NHG_CTX_OP_NEW:
|
|
|
|
ret = nhg_ctx_process_new(ctx);
|
2019-09-03 22:12:06 +02:00
|
|
|
if (nhg_ctx_get_count(ctx) && ret == -ENOENT
|
2019-08-13 02:09:59 +02:00
|
|
|
&& nhg_ctx_get_status(ctx) != NHG_CTX_REQUEUED) {
|
2019-10-23 21:07:22 +02:00
|
|
|
/**
|
|
|
|
* We have entered a situation where we are
|
|
|
|
* processing a group from the kernel
|
|
|
|
* that has a contained nexthop which
|
|
|
|
* we have not yet processed.
|
2019-08-13 02:09:59 +02:00
|
|
|
*
|
2019-10-23 21:07:22 +02:00
|
|
|
* Re-enqueue this ctx to be handled exactly one
|
|
|
|
* more time (indicated by the flag).
|
|
|
|
*
|
|
|
|
* By the time we get back to it, we
|
|
|
|
* should have processed its depends.
|
2019-08-13 02:09:59 +02:00
|
|
|
*/
|
|
|
|
nhg_ctx_set_status(ctx, NHG_CTX_NONE);
|
|
|
|
if (queue_add(ctx) == 0) {
|
|
|
|
nhg_ctx_set_status(ctx, NHG_CTX_REQUEUED);
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
}
|
2019-05-15 00:03:29 +02:00
|
|
|
break;
|
|
|
|
case NHG_CTX_OP_DEL:
|
2019-08-01 20:24:35 +02:00
|
|
|
ret = nhg_ctx_process_del(ctx);
|
2023-10-10 23:15:32 +02:00
|
|
|
break;
|
2019-05-15 00:03:29 +02:00
|
|
|
case NHG_CTX_OP_NONE:
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
|
|
|
|
nhg_ctx_set_status(ctx, (ret ? NHG_CTX_FAILURE : NHG_CTX_SUCCESS));
|
|
|
|
|
2019-10-29 02:39:18 +01:00
|
|
|
nhg_ctx_fini(&ctx);
|
2019-05-15 00:03:29 +02:00
|
|
|
|
|
|
|
return ret;
|
|
|
|
}
|
2019-03-29 15:51:07 +01:00
|
|
|
|
2019-05-15 00:03:29 +02:00
|
|
|
/* Kernel-side, you either get a single new nexthop or a array of ID's */
|
2024-10-07 18:40:46 +02:00
|
|
|
int zebra_nhg_kernel_find(uint32_t id, struct nexthop *nh, struct nh_grp *grp, uint16_t count,
|
|
|
|
vrf_id_t vrf_id, afi_t afi, int type, int startup,
|
|
|
|
struct nhg_resilience *nhgr)
|
2019-05-15 00:03:29 +02:00
|
|
|
{
|
|
|
|
struct nhg_ctx *ctx = NULL;
|
|
|
|
|
2020-03-10 15:50:40 +01:00
|
|
|
if (IS_ZEBRA_DEBUG_NHG_DETAIL)
|
|
|
|
zlog_debug("%s: nh %pNHv, id %u, count %d",
|
|
|
|
__func__, nh, id, (int)count);
|
|
|
|
|
2020-05-13 20:32:13 +02:00
|
|
|
if (id > id_counter && id < ZEBRA_NHG_PROTO_LOWER)
|
2019-08-01 20:07:04 +02:00
|
|
|
/* Increase our counter so we don't try to create
|
|
|
|
* an ID that already exists
|
|
|
|
*/
|
|
|
|
id_counter = id;
|
|
|
|
|
2022-10-22 21:37:27 +02:00
|
|
|
ctx = nhg_ctx_init(id, nh, grp, vrf_id, afi, type, count, nhgr);
|
2019-05-15 00:03:29 +02:00
|
|
|
nhg_ctx_set_op(ctx, NHG_CTX_OP_NEW);
|
|
|
|
|
2019-08-01 20:07:04 +02:00
|
|
|
/* Under statup conditions, we need to handle them immediately
|
|
|
|
* like we do for routes. Otherwise, we are going to get a route
|
|
|
|
* with a nhe_id that we have not handled.
|
|
|
|
*/
|
|
|
|
if (startup)
|
|
|
|
return nhg_ctx_process(ctx);
|
|
|
|
|
2019-05-15 00:03:29 +02:00
|
|
|
if (queue_add(ctx)) {
|
2019-10-29 02:39:18 +01:00
|
|
|
nhg_ctx_fini(&ctx);
|
2019-05-15 00:03:29 +02:00
|
|
|
return -1;
|
|
|
|
}
|
|
|
|
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
2019-08-01 20:24:35 +02:00
|
|
|
/* Kernel-side, received delete message */
|
2020-01-13 22:11:46 +01:00
|
|
|
int zebra_nhg_kernel_del(uint32_t id, vrf_id_t vrf_id)
|
2019-08-01 20:24:35 +02:00
|
|
|
{
|
|
|
|
struct nhg_ctx *ctx = NULL;
|
|
|
|
|
2022-10-22 21:37:27 +02:00
|
|
|
ctx = nhg_ctx_init(id, NULL, NULL, vrf_id, 0, 0, 0, NULL);
|
2019-08-01 20:24:35 +02:00
|
|
|
|
|
|
|
nhg_ctx_set_op(ctx, NHG_CTX_OP_DEL);
|
|
|
|
|
|
|
|
if (queue_add(ctx)) {
|
2019-10-29 02:39:18 +01:00
|
|
|
nhg_ctx_fini(&ctx);
|
2019-08-01 20:24:35 +02:00
|
|
|
return -1;
|
|
|
|
}
|
|
|
|
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
2019-07-17 19:19:56 +02:00
|
|
|
/* Some dependency helper functions */
|
2019-12-31 18:10:58 +01:00
|
|
|
static struct nhg_hash_entry *depends_find_recursive(const struct nexthop *nh,
|
2020-05-05 21:57:35 +02:00
|
|
|
afi_t afi, int type)
|
2019-05-15 00:27:40 +02:00
|
|
|
{
|
2019-12-31 18:10:58 +01:00
|
|
|
struct nhg_hash_entry *nhe;
|
|
|
|
struct nexthop *lookup = NULL;
|
2019-05-15 00:27:40 +02:00
|
|
|
|
2020-01-13 19:29:58 +01:00
|
|
|
lookup = nexthop_dup(nh, NULL);
|
2019-12-31 18:10:58 +01:00
|
|
|
|
2020-10-12 23:31:36 +02:00
|
|
|
nhe = zebra_nhg_find_nexthop(0, lookup, afi, type, false);
|
2019-12-31 18:10:58 +01:00
|
|
|
|
|
|
|
nexthops_free(lookup);
|
|
|
|
|
|
|
|
return nhe;
|
|
|
|
}
|
|
|
|
|
|
|
|
static struct nhg_hash_entry *depends_find_singleton(const struct nexthop *nh,
|
2020-10-12 23:31:36 +02:00
|
|
|
afi_t afi, int type,
|
|
|
|
bool from_dplane)
|
2019-12-31 18:10:58 +01:00
|
|
|
{
|
|
|
|
struct nhg_hash_entry *nhe;
|
|
|
|
struct nexthop lookup = {};
|
2019-10-29 02:37:05 +01:00
|
|
|
|
2019-12-18 21:34:37 +01:00
|
|
|
/* Capture a snapshot of this single nh; it might be part of a list,
|
|
|
|
* so we need to make a standalone copy.
|
|
|
|
*/
|
2020-01-13 19:29:58 +01:00
|
|
|
nexthop_copy_no_recurse(&lookup, nh, NULL);
|
2019-07-03 18:09:20 +02:00
|
|
|
|
zebra: Create Singleton nhg's without weights
Currently FRR when it has two nexthop groups:
A
nexthop 1 weight 5
nexthop 2 weight 6
nexthop 3 weight 7
B
nexthop 1 weight 3
nexthop 2 weight 4
nexthop 3 weight 5
We end up with 5 singleton nexthops and two groups:
ID: 181818168 (sharp)
RefCnt: 1
Uptime: 00:04:52
VRF: default
Valid, Installed
Depends: (69) (70) (71)
via 192.168.119.1, enp13s0 (vrf default), weight 182
via 192.168.119.2, enp13s0 (vrf default), weight 218
via 192.168.119.3, enp13s0 (vrf default), weight 255
ID: 181818169 (sharp)
RefCnt: 1
Uptime: 00:02:08
VRF: default
Valid, Installed
Depends: (71) (127) (128)
via 192.168.119.1, enp13s0 (vrf default), weight 127
via 192.168.119.2, enp13s0 (vrf default), weight 170
via 192.168.119.3, enp13s0 (vrf default), weight 255
id 69 via 192.168.119.1 dev enp13s0 scope link proto 194
id 70 via 192.168.119.2 dev enp13s0 scope link proto 194
id 71 via 192.168.119.3 dev enp13s0 scope link proto 194
id 127 via 192.168.119.1 dev enp13s0 scope link proto 194
id 128 via 192.168.119.2 dev enp13s0 scope link proto 194
id 181818168 group 69,182/70,218/71,255 proto 194
id 181818169 group 71,255/127,127/128,170 proto 194
This is not a desirable state to be in. If you have a
link flapping in the network and weights are changing
rapidly you end up with a large number of singleton
nexthops that are being used by the nexthop groups.
This fills up asic space and clutters the table.
Additionally singleton nexthops cannot have any weight
and the fact that you attempt to create a singleton
nexthop with different weights means nothing to the
linux kernel( or any asic dplane ). Let's modify
the code to always create the singleton nexthops
without a weight and then just creating the
NHG's that use the singletons with the appropriate
weight.
ID: 181818168 (sharp)
RefCnt: 1
Uptime: 00:00:32
VRF: default
Valid, Installed
Depends: (22) (24) (28)
via 192.168.119.1, enp13s0 (vrf default), weight 182
via 192.168.119.2, enp13s0 (vrf default), weight 218
via 192.168.119.3, enp13s0 (vrf default), weight 255
ID: 181818169 (sharp)
RefCnt: 1
Uptime: 00:00:14
VRF: default
Valid, Installed
Depends: (22) (24) (28)
via 192.168.119.1, enp13s0 (vrf default), weight 153
via 192.168.119.2, enp13s0 (vrf default), weight 204
via 192.168.119.3, enp13s0 (vrf default), weight 255
id 22 via 192.168.119.1 dev enp13s0 scope link proto 194
id 24 via 192.168.119.2 dev enp13s0 scope link proto 194
id 28 via 192.168.119.3 dev enp13s0 scope link proto 194
id 181818168 group 22,182/24,218/28,255 proto 194
id 181818169 group 22,153/24,204/28,255 proto 194
Signed-off-by: Donald Sharp <sharpd@nvidia.com>
2024-08-20 16:53:34 +02:00
|
|
|
/*
|
|
|
|
* So this is to intentionally cause the singleton nexthop
|
|
|
|
* to be created with a weight of 1.
|
|
|
|
*/
|
|
|
|
lookup.weight = 1;
|
2020-10-12 23:31:36 +02:00
|
|
|
nhe = zebra_nhg_find_nexthop(0, &lookup, afi, type, from_dplane);
|
2019-07-03 18:09:20 +02:00
|
|
|
|
2019-12-18 21:34:37 +01:00
|
|
|
/* The copy may have allocated labels; free them if necessary. */
|
|
|
|
nexthop_del_labels(&lookup);
|
2021-04-05 15:29:12 +02:00
|
|
|
nexthop_del_srv6_seg6local(&lookup);
|
|
|
|
nexthop_del_srv6_seg6(&lookup);
|
2019-05-15 01:33:04 +02:00
|
|
|
|
2020-03-10 15:50:40 +01:00
|
|
|
if (IS_ZEBRA_DEBUG_NHG_DETAIL)
|
2022-06-14 21:02:27 +02:00
|
|
|
zlog_debug("%s: nh %pNHv => %p (%pNG)", __func__, nh, nhe, nhe);
|
2020-03-10 15:50:40 +01:00
|
|
|
|
2019-12-31 18:10:58 +01:00
|
|
|
return nhe;
|
|
|
|
}
|
|
|
|
|
2020-05-05 21:57:35 +02:00
|
|
|
static struct nhg_hash_entry *depends_find(const struct nexthop *nh, afi_t afi,
|
2020-10-12 23:31:36 +02:00
|
|
|
int type, bool from_dplane)
|
2019-12-31 18:10:58 +01:00
|
|
|
{
|
|
|
|
struct nhg_hash_entry *nhe = NULL;
|
|
|
|
|
|
|
|
if (!nh)
|
|
|
|
goto done;
|
|
|
|
|
|
|
|
/* We are separating these functions out to increase handling speed
|
|
|
|
* in the non-recursive case (by not alloc/freeing)
|
|
|
|
*/
|
2020-09-23 02:47:33 +02:00
|
|
|
if (CHECK_FLAG(nh->flags, NEXTHOP_FLAG_RECURSIVE))
|
2020-05-05 21:57:35 +02:00
|
|
|
nhe = depends_find_recursive(nh, afi, type);
|
2020-09-23 02:47:33 +02:00
|
|
|
else
|
2020-10-12 23:31:36 +02:00
|
|
|
nhe = depends_find_singleton(nh, afi, type, from_dplane);
|
2020-03-10 15:50:40 +01:00
|
|
|
|
2020-09-23 02:47:33 +02:00
|
|
|
|
|
|
|
if (IS_ZEBRA_DEBUG_NHG_DETAIL) {
|
2022-06-14 21:02:27 +02:00
|
|
|
zlog_debug("%s: nh %pNHv %s => %p (%pNG)", __func__, nh,
|
2020-09-23 02:47:33 +02:00
|
|
|
CHECK_FLAG(nh->flags, NEXTHOP_FLAG_RECURSIVE) ? "(R)"
|
|
|
|
: "",
|
2022-06-14 21:02:27 +02:00
|
|
|
nhe, nhe);
|
2020-09-23 02:47:33 +02:00
|
|
|
}
|
2019-12-31 18:10:58 +01:00
|
|
|
|
2019-10-29 02:37:05 +01:00
|
|
|
done:
|
2019-05-15 01:33:04 +02:00
|
|
|
return nhe;
|
2019-05-15 00:27:40 +02:00
|
|
|
}
|
|
|
|
|
2019-07-24 18:27:40 +02:00
|
|
|
static void depends_add(struct nhg_connected_tree_head *head,
|
2019-07-17 19:19:56 +02:00
|
|
|
struct nhg_hash_entry *depend)
|
|
|
|
{
|
2020-03-10 15:50:40 +01:00
|
|
|
if (IS_ZEBRA_DEBUG_NHG_DETAIL)
|
|
|
|
zlog_debug("%s: head %p nh %pNHv",
|
|
|
|
__func__, head, depend->nhg.nexthop);
|
|
|
|
|
2020-01-06 18:58:41 +01:00
|
|
|
/* If NULL is returned, it was successfully added and
|
|
|
|
* needs to have its refcnt incremented.
|
|
|
|
*
|
|
|
|
* Else the NHE is already present in the tree and doesn't
|
|
|
|
* need to increment the refcnt.
|
|
|
|
*/
|
|
|
|
if (nhg_connected_tree_add_nhe(head, depend) == NULL)
|
|
|
|
zebra_nhg_increment_ref(depend);
|
2019-07-17 19:19:56 +02:00
|
|
|
}
|
|
|
|
|
2019-08-01 20:07:04 +02:00
|
|
|
static struct nhg_hash_entry *
|
|
|
|
depends_find_add(struct nhg_connected_tree_head *head, struct nexthop *nh,
|
2020-10-12 23:31:36 +02:00
|
|
|
afi_t afi, int type, bool from_dplane)
|
2019-07-17 19:19:56 +02:00
|
|
|
{
|
|
|
|
struct nhg_hash_entry *depend = NULL;
|
|
|
|
|
2020-10-12 23:31:36 +02:00
|
|
|
depend = depends_find(nh, afi, type, from_dplane);
|
2019-08-13 02:09:59 +02:00
|
|
|
|
2019-12-24 20:22:03 +01:00
|
|
|
if (IS_ZEBRA_DEBUG_NHG_DETAIL)
|
|
|
|
zlog_debug("%s: nh %pNHv => %p",
|
|
|
|
__func__, nh, depend);
|
|
|
|
|
2019-08-13 02:09:59 +02:00
|
|
|
if (depend)
|
|
|
|
depends_add(head, depend);
|
2019-08-01 20:07:04 +02:00
|
|
|
|
|
|
|
return depend;
|
|
|
|
}
|
|
|
|
|
|
|
|
static struct nhg_hash_entry *
|
|
|
|
depends_find_id_add(struct nhg_connected_tree_head *head, uint32_t id)
|
|
|
|
{
|
|
|
|
struct nhg_hash_entry *depend = NULL;
|
|
|
|
|
|
|
|
depend = zebra_nhg_lookup_id(id);
|
2019-08-13 02:09:59 +02:00
|
|
|
|
|
|
|
if (depend)
|
|
|
|
depends_add(head, depend);
|
2019-08-01 20:07:04 +02:00
|
|
|
|
|
|
|
return depend;
|
2019-07-17 19:19:56 +02:00
|
|
|
}
|
|
|
|
|
2019-07-24 18:27:40 +02:00
|
|
|
static void depends_decrement_free(struct nhg_connected_tree_head *head)
|
2019-07-17 19:19:56 +02:00
|
|
|
{
|
2019-07-24 18:27:40 +02:00
|
|
|
nhg_connected_tree_decrement_ref(head);
|
|
|
|
nhg_connected_tree_free(head);
|
2019-07-17 19:19:56 +02:00
|
|
|
}
|
|
|
|
|
2020-03-10 15:50:40 +01:00
|
|
|
/* Find an nhe based on a list of nexthops */
|
2020-05-05 21:57:35 +02:00
|
|
|
struct nhg_hash_entry *zebra_nhg_rib_find(uint32_t id,
|
|
|
|
struct nexthop_group *nhg,
|
|
|
|
afi_t rt_afi, int type)
|
2019-05-15 00:03:29 +02:00
|
|
|
{
|
|
|
|
struct nhg_hash_entry *nhe = NULL;
|
2020-01-13 22:11:46 +01:00
|
|
|
vrf_id_t vrf_id;
|
|
|
|
|
|
|
|
/*
|
|
|
|
* CLANG SA is complaining that nexthop may be NULL
|
|
|
|
* Make it happy but this is ridonc
|
|
|
|
*/
|
|
|
|
assert(nhg->nexthop);
|
|
|
|
vrf_id = !vrf_is_backend_netns() ? VRF_DEFAULT : nhg->nexthop->vrf_id;
|
2019-05-15 00:27:40 +02:00
|
|
|
|
2020-10-12 23:31:36 +02:00
|
|
|
zebra_nhg_find(&nhe, id, nhg, NULL, vrf_id, rt_afi, type, false);
|
2019-05-15 01:33:04 +02:00
|
|
|
|
2020-03-10 15:50:40 +01:00
|
|
|
if (IS_ZEBRA_DEBUG_NHG_DETAIL)
|
2022-06-14 21:02:27 +02:00
|
|
|
zlog_debug("%s: => nhe %p (%pNG)", __func__, nhe, nhe);
|
2020-03-10 15:50:40 +01:00
|
|
|
|
|
|
|
return nhe;
|
|
|
|
}
|
|
|
|
|
|
|
|
/* Find an nhe based on a route's nhe */
|
|
|
|
struct nhg_hash_entry *
|
|
|
|
zebra_nhg_rib_find_nhe(struct nhg_hash_entry *rt_nhe, afi_t rt_afi)
|
|
|
|
{
|
|
|
|
struct nhg_hash_entry *nhe = NULL;
|
|
|
|
|
2023-09-04 15:43:37 +02:00
|
|
|
if (!rt_nhe) {
|
|
|
|
flog_err(EC_ZEBRA_TABLE_LOOKUP_FAILED,
|
|
|
|
"No nhg_hash_entry passed to %s", __func__);
|
|
|
|
return NULL;
|
|
|
|
}
|
|
|
|
|
|
|
|
if (!rt_nhe->nhg.nexthop) {
|
2020-03-10 15:50:40 +01:00
|
|
|
flog_err(EC_ZEBRA_TABLE_LOOKUP_FAILED,
|
|
|
|
"No nexthop passed to %s", __func__);
|
|
|
|
return NULL;
|
|
|
|
}
|
|
|
|
|
2020-10-12 23:31:36 +02:00
|
|
|
zebra_nhe_find(&nhe, rt_nhe, NULL, rt_afi, false);
|
2020-03-10 15:50:40 +01:00
|
|
|
|
|
|
|
if (IS_ZEBRA_DEBUG_NHG_DETAIL)
|
2024-01-21 02:25:59 +01:00
|
|
|
zlog_debug("%s: rt_nhe %p(%pNG) => nhe %p(%pNG)", __func__,
|
|
|
|
rt_nhe, rt_nhe, nhe, nhe);
|
2020-03-10 15:50:40 +01:00
|
|
|
|
2019-03-29 15:51:07 +01:00
|
|
|
return nhe;
|
|
|
|
}
|
|
|
|
|
2019-12-24 20:22:03 +01:00
|
|
|
/*
|
|
|
|
* Allocate backup nexthop info object. Typically these are embedded in
|
|
|
|
* nhg_hash_entry objects.
|
|
|
|
*/
|
|
|
|
struct nhg_backup_info *zebra_nhg_backup_alloc(void)
|
|
|
|
{
|
|
|
|
struct nhg_backup_info *p;
|
|
|
|
|
|
|
|
p = XCALLOC(MTYPE_NHG, sizeof(struct nhg_backup_info));
|
|
|
|
|
|
|
|
p->nhe = zebra_nhg_alloc();
|
|
|
|
|
|
|
|
/* Identify the embedded group used to hold the list of backups */
|
|
|
|
SET_FLAG(p->nhe->flags, NEXTHOP_GROUP_BACKUP);
|
|
|
|
|
|
|
|
return p;
|
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Free backup nexthop info object, deal with any embedded allocations
|
|
|
|
*/
|
|
|
|
void zebra_nhg_backup_free(struct nhg_backup_info **p)
|
|
|
|
{
|
|
|
|
if (p && *p) {
|
|
|
|
if ((*p)->nhe)
|
|
|
|
zebra_nhg_free((*p)->nhe);
|
|
|
|
|
|
|
|
XFREE(MTYPE_NHG, (*p));
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
/* Accessor for backup nexthop group */
|
|
|
|
struct nexthop_group *zebra_nhg_get_backup_nhg(struct nhg_hash_entry *nhe)
|
|
|
|
{
|
|
|
|
struct nexthop_group *p = NULL;
|
|
|
|
|
|
|
|
if (nhe) {
|
|
|
|
if (nhe->backup_info && nhe->backup_info->nhe)
|
|
|
|
p = &(nhe->backup_info->nhe->nhg);
|
|
|
|
}
|
|
|
|
|
|
|
|
return p;
|
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Helper to return a copy of a backup_info - note that this is a shallow
|
|
|
|
* copy, meant to be used when creating a new nhe from info passed in with
|
|
|
|
* a route e.g.
|
|
|
|
*/
|
|
|
|
static struct nhg_backup_info *
|
|
|
|
nhg_backup_copy(const struct nhg_backup_info *orig)
|
|
|
|
{
|
|
|
|
struct nhg_backup_info *b;
|
|
|
|
|
|
|
|
b = zebra_nhg_backup_alloc();
|
|
|
|
|
|
|
|
/* Copy list of nexthops */
|
|
|
|
nexthop_group_copy(&(b->nhe->nhg), &(orig->nhe->nhg));
|
|
|
|
|
|
|
|
return b;
|
|
|
|
}
|
|
|
|
|
2019-10-24 00:28:10 +02:00
|
|
|
static void zebra_nhg_free_members(struct nhg_hash_entry *nhe)
|
2019-03-22 18:07:22 +01:00
|
|
|
{
|
2020-02-25 14:29:46 +01:00
|
|
|
nexthops_free(nhe->nhg.nexthop);
|
|
|
|
|
2019-12-24 20:22:03 +01:00
|
|
|
zebra_nhg_backup_free(&nhe->backup_info);
|
|
|
|
|
2019-07-17 19:22:23 +02:00
|
|
|
/* Decrement to remove connection ref */
|
2019-07-24 18:27:40 +02:00
|
|
|
nhg_connected_tree_decrement_ref(&nhe->nhg_depends);
|
|
|
|
nhg_connected_tree_free(&nhe->nhg_depends);
|
|
|
|
nhg_connected_tree_free(&nhe->nhg_dependents);
|
2019-03-22 18:07:22 +01:00
|
|
|
}
|
|
|
|
|
2019-11-22 21:30:53 +01:00
|
|
|
void zebra_nhg_free(struct nhg_hash_entry *nhe)
|
2019-02-15 19:18:48 +01:00
|
|
|
{
|
2020-03-10 15:50:40 +01:00
|
|
|
if (IS_ZEBRA_DEBUG_NHG_DETAIL) {
|
|
|
|
/* Group or singleton? */
|
|
|
|
if (nhe->nhg.nexthop && nhe->nhg.nexthop->next)
|
2022-06-14 21:02:27 +02:00
|
|
|
zlog_debug("%s: nhe %p (%pNG), refcnt %d", __func__,
|
|
|
|
nhe, nhe, nhe->refcnt);
|
2020-03-10 15:50:40 +01:00
|
|
|
else
|
2022-06-14 21:02:27 +02:00
|
|
|
zlog_debug("%s: nhe %p (%pNG), refcnt %d, NH %pNHv",
|
|
|
|
__func__, nhe, nhe, nhe->refcnt,
|
2020-03-10 15:50:40 +01:00
|
|
|
nhe->nhg.nexthop);
|
|
|
|
}
|
|
|
|
|
2022-12-25 16:26:52 +01:00
|
|
|
EVENT_OFF(nhe->timer);
|
2022-07-17 01:00:43 +02:00
|
|
|
|
2019-03-22 18:11:07 +01:00
|
|
|
zebra_nhg_free_members(nhe);
|
2019-03-15 17:23:51 +01:00
|
|
|
|
|
|
|
XFREE(MTYPE_NHG, nhe);
|
2019-02-15 19:18:48 +01:00
|
|
|
}
|
|
|
|
|
2022-08-04 13:05:46 +02:00
|
|
|
/*
|
|
|
|
* Let's just drop the memory associated with each item
|
|
|
|
*/
|
2019-11-22 21:30:53 +01:00
|
|
|
void zebra_nhg_hash_free(void *p)
|
|
|
|
{
|
2022-08-04 13:05:46 +02:00
|
|
|
struct nhg_hash_entry *nhe = p;
|
|
|
|
|
|
|
|
if (IS_ZEBRA_DEBUG_NHG_DETAIL) {
|
|
|
|
/* Group or singleton? */
|
|
|
|
if (nhe->nhg.nexthop && nhe->nhg.nexthop->next)
|
|
|
|
zlog_debug("%s: nhe %p (%u), refcnt %d", __func__, nhe,
|
|
|
|
nhe->id, nhe->refcnt);
|
|
|
|
else
|
|
|
|
zlog_debug("%s: nhe %p (%pNG), refcnt %d, NH %pNHv",
|
|
|
|
__func__, nhe, nhe, nhe->refcnt,
|
|
|
|
nhe->nhg.nexthop);
|
|
|
|
}
|
|
|
|
|
2022-12-25 16:26:52 +01:00
|
|
|
EVENT_OFF(nhe->timer);
|
2022-08-04 13:05:46 +02:00
|
|
|
|
|
|
|
nexthops_free(nhe->nhg.nexthop);
|
|
|
|
|
|
|
|
XFREE(MTYPE_NHG, nhe);
|
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
* On cleanup there are nexthop groups that have not
|
|
|
|
* been resolved at all( a nhe->id of 0 ). As such
|
|
|
|
* zebra needs to clean up the memory associated with
|
|
|
|
* those entries.
|
|
|
|
*/
|
|
|
|
void zebra_nhg_hash_free_zero_id(struct hash_bucket *b, void *arg)
|
|
|
|
{
|
|
|
|
struct nhg_hash_entry *nhe = b->data;
|
|
|
|
struct nhg_connected *dep;
|
|
|
|
|
|
|
|
while ((dep = nhg_connected_tree_pop(&nhe->nhg_depends))) {
|
|
|
|
if (dep->nhe->id == 0)
|
|
|
|
zebra_nhg_hash_free(dep->nhe);
|
|
|
|
|
|
|
|
nhg_connected_free(dep);
|
|
|
|
}
|
|
|
|
|
|
|
|
while ((dep = nhg_connected_tree_pop(&nhe->nhg_dependents)))
|
|
|
|
nhg_connected_free(dep);
|
|
|
|
|
|
|
|
if (nhe->backup_info && nhe->backup_info->nhe->id == 0) {
|
|
|
|
while ((dep = nhg_connected_tree_pop(
|
|
|
|
&nhe->backup_info->nhe->nhg_depends)))
|
|
|
|
nhg_connected_free(dep);
|
|
|
|
|
|
|
|
zebra_nhg_hash_free(nhe->backup_info->nhe);
|
|
|
|
|
|
|
|
XFREE(MTYPE_NHG, nhe->backup_info);
|
|
|
|
}
|
2019-11-22 21:30:53 +01:00
|
|
|
}
|
|
|
|
|
2022-03-01 22:18:12 +01:00
|
|
|
static void zebra_nhg_timer(struct event *thread)
|
2021-10-29 14:16:13 +02:00
|
|
|
{
|
2022-12-25 16:26:52 +01:00
|
|
|
struct nhg_hash_entry *nhe = EVENT_ARG(thread);
|
2021-10-29 14:16:13 +02:00
|
|
|
|
|
|
|
if (IS_ZEBRA_DEBUG_NHG_DETAIL)
|
|
|
|
zlog_debug("Nexthop Timer for nhe: %pNG", nhe);
|
|
|
|
|
|
|
|
if (nhe->refcnt == 1)
|
|
|
|
zebra_nhg_decrement_ref(nhe);
|
|
|
|
}
|
|
|
|
|
2019-02-26 00:18:07 +01:00
|
|
|
void zebra_nhg_decrement_ref(struct nhg_hash_entry *nhe)
|
|
|
|
{
|
2020-03-10 15:50:40 +01:00
|
|
|
if (IS_ZEBRA_DEBUG_NHG_DETAIL)
|
2022-06-14 21:02:27 +02:00
|
|
|
zlog_debug("%s: nhe %p (%pNG) %d => %d", __func__, nhe, nhe,
|
|
|
|
nhe->refcnt, nhe->refcnt - 1);
|
2020-03-10 15:50:40 +01:00
|
|
|
|
2019-05-15 00:03:29 +02:00
|
|
|
nhe->refcnt--;
|
|
|
|
|
2022-08-04 13:36:51 +02:00
|
|
|
if (!zebra_router_in_shutdown() && nhe->refcnt <= 0 &&
|
2021-10-29 14:16:13 +02:00
|
|
|
CHECK_FLAG(nhe->flags, NEXTHOP_GROUP_INSTALLED) &&
|
|
|
|
!CHECK_FLAG(nhe->flags, NEXTHOP_GROUP_KEEP_AROUND)) {
|
|
|
|
nhe->refcnt = 1;
|
|
|
|
SET_FLAG(nhe->flags, NEXTHOP_GROUP_KEEP_AROUND);
|
2022-05-20 20:19:08 +02:00
|
|
|
event_add_timer(zrouter.master, zebra_nhg_timer, nhe,
|
|
|
|
zrouter.nhg_keep, &nhe->timer);
|
2022-08-02 21:43:46 +02:00
|
|
|
return;
|
2021-10-29 14:16:13 +02:00
|
|
|
}
|
|
|
|
|
2019-07-17 19:15:51 +02:00
|
|
|
if (!zebra_nhg_depends_is_empty(nhe))
|
2019-07-24 18:27:40 +02:00
|
|
|
nhg_connected_tree_decrement_ref(&nhe->nhg_depends);
|
2019-03-29 23:16:27 +01:00
|
|
|
|
2019-08-01 20:07:04 +02:00
|
|
|
if (ZEBRA_NHG_CREATED(nhe) && nhe->refcnt <= 0)
|
2019-05-14 19:12:28 +02:00
|
|
|
zebra_nhg_uninstall_kernel(nhe);
|
2019-03-29 23:14:20 +01:00
|
|
|
}
|
|
|
|
|
|
|
|
void zebra_nhg_increment_ref(struct nhg_hash_entry *nhe)
|
|
|
|
{
|
2020-03-10 15:50:40 +01:00
|
|
|
if (IS_ZEBRA_DEBUG_NHG_DETAIL)
|
2022-06-14 21:02:27 +02:00
|
|
|
zlog_debug("%s: nhe %p (%pNG) %d => %d", __func__, nhe, nhe,
|
|
|
|
nhe->refcnt, nhe->refcnt + 1);
|
2020-03-10 15:50:40 +01:00
|
|
|
|
2019-05-15 00:03:29 +02:00
|
|
|
nhe->refcnt++;
|
|
|
|
|
2022-12-11 16:51:58 +01:00
|
|
|
if (event_is_scheduled(nhe->timer)) {
|
2022-12-25 16:26:52 +01:00
|
|
|
EVENT_OFF(nhe->timer);
|
2021-10-29 14:16:13 +02:00
|
|
|
nhe->refcnt--;
|
|
|
|
UNSET_FLAG(nhe->flags, NEXTHOP_GROUP_KEEP_AROUND);
|
|
|
|
}
|
|
|
|
|
2019-07-17 19:15:51 +02:00
|
|
|
if (!zebra_nhg_depends_is_empty(nhe))
|
2019-07-24 18:27:40 +02:00
|
|
|
nhg_connected_tree_increment_ref(&nhe->nhg_depends);
|
2019-05-15 00:03:29 +02:00
|
|
|
}
|
2019-02-26 00:18:07 +01:00
|
|
|
|
2021-02-22 21:09:07 +01:00
|
|
|
static struct nexthop *nexthop_set_resolved(afi_t afi,
|
|
|
|
const struct nexthop *newhop,
|
|
|
|
struct nexthop *nexthop,
|
|
|
|
struct zebra_sr_policy *policy)
|
2019-05-13 21:46:05 +02:00
|
|
|
{
|
|
|
|
struct nexthop *resolved_hop;
|
zebra: Append rparent labels when resolving nexthop
When resolving a nexthop, append its labels to the one its
resolving to along with the labels that may already be present there.
Before we were ignoring labels if the resolving level was greater than
two.
Before:
```
S> 2.2.2.2/32 [1/0] via 7.7.7.7 (recursive), label 2222, 00:00:07
* via 7.7.7.7, dummy1 onlink, label 1111, 00:00:07
S> 3.3.3.3/32 [1/0] via 2.2.2.2 (recursive), label 3333, 00:00:04
* via 7.7.7.7, dummy1 onlink, label 1111, 00:00:04
K>* 7.7.7.7/32 [0/0] is directly connected, dummy1, label 1111, 00:00:17
C>* 192.168.122.0/24 is directly connected, ens3, 00:00:17
K>* 192.168.122.1/32 [0/100] is directly connected, ens3, 00:00:17
ubuntu_nh#
```
This patch:
```
S> 2.2.2.2/32 [1/0] via 7.7.7.7 (recursive), label 2222, 00:00:04
* via 7.7.7.7, dummy1 onlink, label 1111/2222, 00:00:04
S> 3.3.3.3/32 [1/0] via 2.2.2.2 (recursive), label 3333, 00:00:02
* via 7.7.7.7, dummy1 onlink, label 1111/2222/3333, 00:00:02
K>* 7.7.7.7/32 [0/0] is directly connected, dummy1, label 1111, 00:00:11
C>* 192.168.122.0/24 is directly connected, ens3, 00:00:11
K>* 192.168.122.1/32 [0/100] is directly connected, ens3, 00:00:11
ubuntu_nh#
```
Signed-off-by: Stephen Worley <sworley@cumulusnetworks.com>
2019-08-08 00:40:36 +02:00
|
|
|
uint8_t num_labels = 0;
|
|
|
|
mpls_label_t labels[MPLS_MAX_LABELS];
|
|
|
|
enum lsp_types_t label_type = ZEBRA_LSP_NONE;
|
|
|
|
int i = 0;
|
2019-05-13 21:46:05 +02:00
|
|
|
|
|
|
|
resolved_hop = nexthop_new();
|
|
|
|
SET_FLAG(resolved_hop->flags, NEXTHOP_FLAG_ACTIVE);
|
|
|
|
|
|
|
|
resolved_hop->vrf_id = nexthop->vrf_id;
|
2024-06-18 11:07:23 +02:00
|
|
|
|
|
|
|
/* Using weighted ECMP, we should respect the weight and use
|
|
|
|
* the same value for non-recursive next-hop.
|
|
|
|
*/
|
|
|
|
resolved_hop->weight = nexthop->weight;
|
|
|
|
|
2019-05-13 21:46:05 +02:00
|
|
|
switch (newhop->type) {
|
|
|
|
case NEXTHOP_TYPE_IPV4:
|
|
|
|
case NEXTHOP_TYPE_IPV4_IFINDEX:
|
|
|
|
/* If the resolving route specifies a gateway, use it */
|
|
|
|
resolved_hop->type = newhop->type;
|
|
|
|
resolved_hop->gate.ipv4 = newhop->gate.ipv4;
|
|
|
|
|
|
|
|
if (newhop->ifindex) {
|
|
|
|
resolved_hop->type = NEXTHOP_TYPE_IPV4_IFINDEX;
|
|
|
|
resolved_hop->ifindex = newhop->ifindex;
|
|
|
|
}
|
|
|
|
break;
|
|
|
|
case NEXTHOP_TYPE_IPV6:
|
|
|
|
case NEXTHOP_TYPE_IPV6_IFINDEX:
|
|
|
|
resolved_hop->type = newhop->type;
|
|
|
|
resolved_hop->gate.ipv6 = newhop->gate.ipv6;
|
|
|
|
|
|
|
|
if (newhop->ifindex) {
|
|
|
|
resolved_hop->type = NEXTHOP_TYPE_IPV6_IFINDEX;
|
|
|
|
resolved_hop->ifindex = newhop->ifindex;
|
|
|
|
}
|
|
|
|
break;
|
|
|
|
case NEXTHOP_TYPE_IFINDEX:
|
|
|
|
/* If the resolving route is an interface route,
|
|
|
|
* it means the gateway we are looking up is connected
|
|
|
|
* to that interface. (The actual network is _not_ onlink).
|
|
|
|
* Therefore, the resolved route should have the original
|
|
|
|
* gateway as nexthop as it is directly connected.
|
|
|
|
*
|
|
|
|
* On Linux, we have to set the onlink netlink flag because
|
|
|
|
* otherwise, the kernel won't accept the route.
|
|
|
|
*/
|
|
|
|
resolved_hop->flags |= NEXTHOP_FLAG_ONLINK;
|
|
|
|
if (afi == AFI_IP) {
|
|
|
|
resolved_hop->type = NEXTHOP_TYPE_IPV4_IFINDEX;
|
|
|
|
resolved_hop->gate.ipv4 = nexthop->gate.ipv4;
|
|
|
|
} else if (afi == AFI_IP6) {
|
|
|
|
resolved_hop->type = NEXTHOP_TYPE_IPV6_IFINDEX;
|
|
|
|
resolved_hop->gate.ipv6 = nexthop->gate.ipv6;
|
|
|
|
}
|
|
|
|
resolved_hop->ifindex = newhop->ifindex;
|
|
|
|
break;
|
|
|
|
case NEXTHOP_TYPE_BLACKHOLE:
|
|
|
|
resolved_hop->type = NEXTHOP_TYPE_BLACKHOLE;
|
2019-08-13 07:56:38 +02:00
|
|
|
resolved_hop->bh_type = newhop->bh_type;
|
2019-05-13 21:46:05 +02:00
|
|
|
break;
|
|
|
|
}
|
|
|
|
|
2024-03-25 04:54:52 +01:00
|
|
|
if (CHECK_FLAG(newhop->flags, NEXTHOP_FLAG_ONLINK))
|
|
|
|
SET_FLAG(resolved_hop->flags, NEXTHOP_FLAG_ONLINK);
|
2019-05-13 21:46:05 +02:00
|
|
|
|
zebra: Append rparent labels when resolving nexthop
When resolving a nexthop, append its labels to the one its
resolving to along with the labels that may already be present there.
Before we were ignoring labels if the resolving level was greater than
two.
Before:
```
S> 2.2.2.2/32 [1/0] via 7.7.7.7 (recursive), label 2222, 00:00:07
* via 7.7.7.7, dummy1 onlink, label 1111, 00:00:07
S> 3.3.3.3/32 [1/0] via 2.2.2.2 (recursive), label 3333, 00:00:04
* via 7.7.7.7, dummy1 onlink, label 1111, 00:00:04
K>* 7.7.7.7/32 [0/0] is directly connected, dummy1, label 1111, 00:00:17
C>* 192.168.122.0/24 is directly connected, ens3, 00:00:17
K>* 192.168.122.1/32 [0/100] is directly connected, ens3, 00:00:17
ubuntu_nh#
```
This patch:
```
S> 2.2.2.2/32 [1/0] via 7.7.7.7 (recursive), label 2222, 00:00:04
* via 7.7.7.7, dummy1 onlink, label 1111/2222, 00:00:04
S> 3.3.3.3/32 [1/0] via 2.2.2.2 (recursive), label 3333, 00:00:02
* via 7.7.7.7, dummy1 onlink, label 1111/2222/3333, 00:00:02
K>* 7.7.7.7/32 [0/0] is directly connected, dummy1, label 1111, 00:00:11
C>* 192.168.122.0/24 is directly connected, ens3, 00:00:11
K>* 192.168.122.1/32 [0/100] is directly connected, ens3, 00:00:11
ubuntu_nh#
```
Signed-off-by: Stephen Worley <sworley@cumulusnetworks.com>
2019-08-08 00:40:36 +02:00
|
|
|
/* Copy labels of the resolved route and the parent resolving to it */
|
2020-07-20 13:43:54 +02:00
|
|
|
if (policy) {
|
2023-01-26 16:41:55 +01:00
|
|
|
int label_num = 0;
|
2020-07-20 13:43:54 +02:00
|
|
|
|
|
|
|
/*
|
|
|
|
* Don't push the first SID if the corresponding action in the
|
|
|
|
* LFIB is POP.
|
|
|
|
*/
|
|
|
|
if (!newhop->nh_label || !newhop->nh_label->num_labels
|
|
|
|
|| newhop->nh_label->label[0] == MPLS_LABEL_IMPLICIT_NULL)
|
2023-01-26 16:41:55 +01:00
|
|
|
label_num = 1;
|
2020-07-20 13:43:54 +02:00
|
|
|
|
2023-01-26 16:41:55 +01:00
|
|
|
for (; label_num < policy->segment_list.label_num; label_num++)
|
|
|
|
labels[num_labels++] =
|
|
|
|
policy->segment_list.labels[label_num];
|
2020-07-20 13:43:54 +02:00
|
|
|
label_type = policy->segment_list.type;
|
|
|
|
} else if (newhop->nh_label) {
|
2020-05-12 20:27:02 +02:00
|
|
|
for (i = 0; i < newhop->nh_label->num_labels; i++) {
|
|
|
|
/* Be a bit picky about overrunning the local array */
|
|
|
|
if (num_labels >= MPLS_MAX_LABELS) {
|
|
|
|
if (IS_ZEBRA_DEBUG_NHG || IS_ZEBRA_DEBUG_RIB)
|
|
|
|
zlog_debug("%s: too many labels in newhop %pNHv",
|
|
|
|
__func__, newhop);
|
|
|
|
break;
|
|
|
|
}
|
zebra: Append rparent labels when resolving nexthop
When resolving a nexthop, append its labels to the one its
resolving to along with the labels that may already be present there.
Before we were ignoring labels if the resolving level was greater than
two.
Before:
```
S> 2.2.2.2/32 [1/0] via 7.7.7.7 (recursive), label 2222, 00:00:07
* via 7.7.7.7, dummy1 onlink, label 1111, 00:00:07
S> 3.3.3.3/32 [1/0] via 2.2.2.2 (recursive), label 3333, 00:00:04
* via 7.7.7.7, dummy1 onlink, label 1111, 00:00:04
K>* 7.7.7.7/32 [0/0] is directly connected, dummy1, label 1111, 00:00:17
C>* 192.168.122.0/24 is directly connected, ens3, 00:00:17
K>* 192.168.122.1/32 [0/100] is directly connected, ens3, 00:00:17
ubuntu_nh#
```
This patch:
```
S> 2.2.2.2/32 [1/0] via 7.7.7.7 (recursive), label 2222, 00:00:04
* via 7.7.7.7, dummy1 onlink, label 1111/2222, 00:00:04
S> 3.3.3.3/32 [1/0] via 2.2.2.2 (recursive), label 3333, 00:00:02
* via 7.7.7.7, dummy1 onlink, label 1111/2222/3333, 00:00:02
K>* 7.7.7.7/32 [0/0] is directly connected, dummy1, label 1111, 00:00:11
C>* 192.168.122.0/24 is directly connected, ens3, 00:00:11
K>* 192.168.122.1/32 [0/100] is directly connected, ens3, 00:00:11
ubuntu_nh#
```
Signed-off-by: Stephen Worley <sworley@cumulusnetworks.com>
2019-08-08 00:40:36 +02:00
|
|
|
labels[num_labels++] = newhop->nh_label->label[i];
|
2020-05-12 20:27:02 +02:00
|
|
|
}
|
|
|
|
/* Use the "outer" type */
|
zebra: Append rparent labels when resolving nexthop
When resolving a nexthop, append its labels to the one its
resolving to along with the labels that may already be present there.
Before we were ignoring labels if the resolving level was greater than
two.
Before:
```
S> 2.2.2.2/32 [1/0] via 7.7.7.7 (recursive), label 2222, 00:00:07
* via 7.7.7.7, dummy1 onlink, label 1111, 00:00:07
S> 3.3.3.3/32 [1/0] via 2.2.2.2 (recursive), label 3333, 00:00:04
* via 7.7.7.7, dummy1 onlink, label 1111, 00:00:04
K>* 7.7.7.7/32 [0/0] is directly connected, dummy1, label 1111, 00:00:17
C>* 192.168.122.0/24 is directly connected, ens3, 00:00:17
K>* 192.168.122.1/32 [0/100] is directly connected, ens3, 00:00:17
ubuntu_nh#
```
This patch:
```
S> 2.2.2.2/32 [1/0] via 7.7.7.7 (recursive), label 2222, 00:00:04
* via 7.7.7.7, dummy1 onlink, label 1111/2222, 00:00:04
S> 3.3.3.3/32 [1/0] via 2.2.2.2 (recursive), label 3333, 00:00:02
* via 7.7.7.7, dummy1 onlink, label 1111/2222/3333, 00:00:02
K>* 7.7.7.7/32 [0/0] is directly connected, dummy1, label 1111, 00:00:11
C>* 192.168.122.0/24 is directly connected, ens3, 00:00:11
K>* 192.168.122.1/32 [0/100] is directly connected, ens3, 00:00:11
ubuntu_nh#
```
Signed-off-by: Stephen Worley <sworley@cumulusnetworks.com>
2019-08-08 00:40:36 +02:00
|
|
|
label_type = newhop->nh_label_type;
|
|
|
|
}
|
|
|
|
|
|
|
|
if (nexthop->nh_label) {
|
2020-05-12 20:27:02 +02:00
|
|
|
for (i = 0; i < nexthop->nh_label->num_labels; i++) {
|
|
|
|
/* Be a bit picky about overrunning the local array */
|
|
|
|
if (num_labels >= MPLS_MAX_LABELS) {
|
|
|
|
if (IS_ZEBRA_DEBUG_NHG || IS_ZEBRA_DEBUG_RIB)
|
|
|
|
zlog_debug("%s: too many labels in nexthop %pNHv",
|
|
|
|
__func__, nexthop);
|
|
|
|
break;
|
|
|
|
}
|
zebra: Append rparent labels when resolving nexthop
When resolving a nexthop, append its labels to the one its
resolving to along with the labels that may already be present there.
Before we were ignoring labels if the resolving level was greater than
two.
Before:
```
S> 2.2.2.2/32 [1/0] via 7.7.7.7 (recursive), label 2222, 00:00:07
* via 7.7.7.7, dummy1 onlink, label 1111, 00:00:07
S> 3.3.3.3/32 [1/0] via 2.2.2.2 (recursive), label 3333, 00:00:04
* via 7.7.7.7, dummy1 onlink, label 1111, 00:00:04
K>* 7.7.7.7/32 [0/0] is directly connected, dummy1, label 1111, 00:00:17
C>* 192.168.122.0/24 is directly connected, ens3, 00:00:17
K>* 192.168.122.1/32 [0/100] is directly connected, ens3, 00:00:17
ubuntu_nh#
```
This patch:
```
S> 2.2.2.2/32 [1/0] via 7.7.7.7 (recursive), label 2222, 00:00:04
* via 7.7.7.7, dummy1 onlink, label 1111/2222, 00:00:04
S> 3.3.3.3/32 [1/0] via 2.2.2.2 (recursive), label 3333, 00:00:02
* via 7.7.7.7, dummy1 onlink, label 1111/2222/3333, 00:00:02
K>* 7.7.7.7/32 [0/0] is directly connected, dummy1, label 1111, 00:00:11
C>* 192.168.122.0/24 is directly connected, ens3, 00:00:11
K>* 192.168.122.1/32 [0/100] is directly connected, ens3, 00:00:11
ubuntu_nh#
```
Signed-off-by: Stephen Worley <sworley@cumulusnetworks.com>
2019-08-08 00:40:36 +02:00
|
|
|
labels[num_labels++] = nexthop->nh_label->label[i];
|
2020-05-12 20:27:02 +02:00
|
|
|
}
|
zebra: Append rparent labels when resolving nexthop
When resolving a nexthop, append its labels to the one its
resolving to along with the labels that may already be present there.
Before we were ignoring labels if the resolving level was greater than
two.
Before:
```
S> 2.2.2.2/32 [1/0] via 7.7.7.7 (recursive), label 2222, 00:00:07
* via 7.7.7.7, dummy1 onlink, label 1111, 00:00:07
S> 3.3.3.3/32 [1/0] via 2.2.2.2 (recursive), label 3333, 00:00:04
* via 7.7.7.7, dummy1 onlink, label 1111, 00:00:04
K>* 7.7.7.7/32 [0/0] is directly connected, dummy1, label 1111, 00:00:17
C>* 192.168.122.0/24 is directly connected, ens3, 00:00:17
K>* 192.168.122.1/32 [0/100] is directly connected, ens3, 00:00:17
ubuntu_nh#
```
This patch:
```
S> 2.2.2.2/32 [1/0] via 7.7.7.7 (recursive), label 2222, 00:00:04
* via 7.7.7.7, dummy1 onlink, label 1111/2222, 00:00:04
S> 3.3.3.3/32 [1/0] via 2.2.2.2 (recursive), label 3333, 00:00:02
* via 7.7.7.7, dummy1 onlink, label 1111/2222/3333, 00:00:02
K>* 7.7.7.7/32 [0/0] is directly connected, dummy1, label 1111, 00:00:11
C>* 192.168.122.0/24 is directly connected, ens3, 00:00:11
K>* 192.168.122.1/32 [0/100] is directly connected, ens3, 00:00:11
ubuntu_nh#
```
Signed-off-by: Stephen Worley <sworley@cumulusnetworks.com>
2019-08-08 00:40:36 +02:00
|
|
|
|
2020-05-12 20:27:02 +02:00
|
|
|
/* If the parent has labels, use its type if
|
|
|
|
* we don't already have one.
|
|
|
|
*/
|
|
|
|
if (label_type == ZEBRA_LSP_NONE)
|
|
|
|
label_type = nexthop->nh_label_type;
|
zebra: Append rparent labels when resolving nexthop
When resolving a nexthop, append its labels to the one its
resolving to along with the labels that may already be present there.
Before we were ignoring labels if the resolving level was greater than
two.
Before:
```
S> 2.2.2.2/32 [1/0] via 7.7.7.7 (recursive), label 2222, 00:00:07
* via 7.7.7.7, dummy1 onlink, label 1111, 00:00:07
S> 3.3.3.3/32 [1/0] via 2.2.2.2 (recursive), label 3333, 00:00:04
* via 7.7.7.7, dummy1 onlink, label 1111, 00:00:04
K>* 7.7.7.7/32 [0/0] is directly connected, dummy1, label 1111, 00:00:17
C>* 192.168.122.0/24 is directly connected, ens3, 00:00:17
K>* 192.168.122.1/32 [0/100] is directly connected, ens3, 00:00:17
ubuntu_nh#
```
This patch:
```
S> 2.2.2.2/32 [1/0] via 7.7.7.7 (recursive), label 2222, 00:00:04
* via 7.7.7.7, dummy1 onlink, label 1111/2222, 00:00:04
S> 3.3.3.3/32 [1/0] via 2.2.2.2 (recursive), label 3333, 00:00:02
* via 7.7.7.7, dummy1 onlink, label 1111/2222/3333, 00:00:02
K>* 7.7.7.7/32 [0/0] is directly connected, dummy1, label 1111, 00:00:11
C>* 192.168.122.0/24 is directly connected, ens3, 00:00:11
K>* 192.168.122.1/32 [0/100] is directly connected, ens3, 00:00:11
ubuntu_nh#
```
Signed-off-by: Stephen Worley <sworley@cumulusnetworks.com>
2019-08-08 00:40:36 +02:00
|
|
|
}
|
|
|
|
|
|
|
|
if (num_labels)
|
|
|
|
nexthop_add_labels(resolved_hop, label_type, num_labels,
|
|
|
|
labels);
|
2019-05-13 21:46:05 +02:00
|
|
|
|
2021-09-09 11:21:30 +02:00
|
|
|
if (nexthop->nh_srv6) {
|
2023-07-26 17:56:32 +02:00
|
|
|
if (nexthop->nh_srv6->seg6local_action !=
|
|
|
|
ZEBRA_SEG6_LOCAL_ACTION_UNSPEC)
|
|
|
|
nexthop_add_srv6_seg6local(resolved_hop,
|
|
|
|
nexthop->nh_srv6
|
|
|
|
->seg6local_action,
|
|
|
|
&nexthop->nh_srv6
|
|
|
|
->seg6local_ctx);
|
|
|
|
if (nexthop->nh_srv6->seg6_segs)
|
|
|
|
nexthop_add_srv6_seg6(resolved_hop,
|
|
|
|
&nexthop->nh_srv6->seg6_segs->seg[0],
|
|
|
|
nexthop->nh_srv6->seg6_segs
|
|
|
|
->num_segs);
|
2021-09-09 11:21:30 +02:00
|
|
|
}
|
|
|
|
|
2019-05-13 21:46:05 +02:00
|
|
|
resolved_hop->rparent = nexthop;
|
2019-06-24 20:04:13 +02:00
|
|
|
_nexthop_add(&nexthop->resolved, resolved_hop);
|
2021-02-22 21:09:07 +01:00
|
|
|
|
|
|
|
return resolved_hop;
|
2019-05-13 21:46:05 +02:00
|
|
|
}
|
|
|
|
|
2019-09-09 23:20:17 +02:00
|
|
|
/* Checks if nexthop we are trying to resolve to is valid */
|
|
|
|
static bool nexthop_valid_resolve(const struct nexthop *nexthop,
|
|
|
|
const struct nexthop *resolved)
|
|
|
|
{
|
|
|
|
/* Can't resolve to a recursive nexthop */
|
|
|
|
if (CHECK_FLAG(resolved->flags, NEXTHOP_FLAG_RECURSIVE))
|
|
|
|
return false;
|
|
|
|
|
2020-05-20 22:43:11 +02:00
|
|
|
/* Must be ACTIVE */
|
|
|
|
if (!CHECK_FLAG(resolved->flags, NEXTHOP_FLAG_ACTIVE))
|
|
|
|
return false;
|
|
|
|
|
zebra: disallow resolution to duplicate nexthops
Disallow the resolution to nexthops that are marked duplicate.
When we are resolving to an ecmp group, it's possible this
group has duplicates.
I found this when I hit a bug where we can have groups resolving
to each other and cause the resolved->next->next pointer to increase
exponentially. Sufficiently large ecmp and zebra will grind to a hault.
Like so:
```
D> 4.4.4.14/32 [150/0] via 1.1.1.1 (recursive), weight 1, 00:00:02
* via 1.1.1.1, dummy1 onlink, weight 1, 00:00:02
via 4.4.4.1 (recursive), weight 1, 00:00:02
via 1.1.1.1, dummy1, weight 1, 00:00:02
via 4.4.4.2 (recursive), weight 1, 00:00:02
via 1.1.1.1, dummy1, weight 1, 00:00:02
via 4.4.4.3 (recursive), weight 1, 00:00:02
via 1.1.1.1, dummy1, weight 1, 00:00:02
via 4.4.4.4 (recursive), weight 1, 00:00:02
via 1.1.1.1, dummy1, weight 1, 00:00:02
via 4.4.4.5 (recursive), weight 1, 00:00:02
via 1.1.1.1, dummy1, weight 1, 00:00:02
via 4.4.4.6 (recursive), weight 1, 00:00:02
via 1.1.1.1, dummy1, weight 1, 00:00:02
via 4.4.4.7 (recursive), weight 1, 00:00:02
via 1.1.1.1, dummy1, weight 1, 00:00:02
via 4.4.4.8 (recursive), weight 1, 00:00:02
via 1.1.1.1, dummy1, weight 1, 00:00:02
via 4.4.4.9 (recursive), weight 1, 00:00:02
via 1.1.1.1, dummy1, weight 1, 00:00:02
via 4.4.4.10 (recursive), weight 1, 00:00:02
via 1.1.1.1, dummy1, weight 1, 00:00:02
via 4.4.4.11 (recursive), weight 1, 00:00:02
via 1.1.1.1, dummy1, weight 1, 00:00:02
via 4.4.4.12 (recursive), weight 1, 00:00:02
via 1.1.1.1, dummy1, weight 1, 00:00:02
via 4.4.4.13 (recursive), weight 1, 00:00:02
via 1.1.1.1, dummy1, weight 1, 00:00:02
via 4.4.4.15 (recursive), weight 1, 00:00:02
via 1.1.1.1, dummy1 onlink, weight 1, 00:00:02
via 1.1.1.1, dummy1, weight 1, 00:00:02
via 1.1.1.1, dummy1, weight 1, 00:00:02
via 1.1.1.1, dummy1, weight 1, 00:00:02
via 1.1.1.1, dummy1, weight 1, 00:00:02
via 1.1.1.1, dummy1, weight 1, 00:00:02
via 1.1.1.1, dummy1, weight 1, 00:00:02
via 1.1.1.1, dummy1, weight 1, 00:00:02
via 1.1.1.1, dummy1, weight 1, 00:00:02
via 1.1.1.1, dummy1, weight 1, 00:00:02
via 1.1.1.1, dummy1, weight 1, 00:00:02
via 1.1.1.1, dummy1, weight 1, 00:00:02
via 1.1.1.1, dummy1, weight 1, 00:00:02
via 1.1.1.1, dummy1, weight 1, 00:00:02
via 1.1.1.1, dummy1, weight 1, 00:00:02
via 1.1.1.1, dummy1 onlink, weight 1, 00:00:02
via 1.1.1.1, dummy1, weight 1, 00:00:02
via 1.1.1.1, dummy1, weight 1, 00:00:02
via 1.1.1.1, dummy1, weight 1, 00:00:02
via 1.1.1.1, dummy1, weight 1, 00:00:02
via 1.1.1.1, dummy1, weight 1, 00:00:02
via 1.1.1.1, dummy1, weight 1, 00:00:02
via 1.1.1.1, dummy1, weight 1, 00:00:02
via 1.1.1.1, dummy1, weight 1, 00:00:02
via 1.1.1.1, dummy1, weight 1, 00:00:02
via 1.1.1.1, dummy1, weight 1, 00:00:02
via 1.1.1.1, dummy1, weight 1, 00:00:02
via 1.1.1.1, dummy1, weight 1, 00:00:02
via 1.1.1.1, dummy1, weight 1, 00:00:02
via 1.1.1.1, dummy1, weight 1, 00:00:02
via 1.1.1.1, dummy1, weight 1, 00:00:02
via 4.4.4.16 (recursive), weight 1, 00:00:02
via 1.1.1.1, dummy1 onlink, weight 1, 00:00:02
via 1.1.1.1, dummy1, weight 1, 00:00:02
via 1.1.1.1, dummy1, weight 1, 00:00:02
via 1.1.1.1, dummy1, weight 1, 00:00:02
via 1.1.1.1, dummy1, weight 1, 00:00:02
via 1.1.1.1, dummy1, weight 1, 00:00:02
via 1.1.1.1, dummy1, weight 1, 00:00:02
via 1.1.1.1, dummy1, weight 1, 00:00:02
via 1.1.1.1, dummy1, weight 1, 00:00:02
via 1.1.1.1, dummy1, weight 1, 00:00:02
via 1.1.1.1, dummy1, weight 1, 00:00:02
via 1.1.1.1, dummy1, weight 1, 00:00:02
via 1.1.1.1, dummy1, weight 1, 00:00:02
via 1.1.1.1, dummy1, weight 1, 00:00:02
via 1.1.1.1, dummy1, weight 1, 00:00:02
via 1.1.1.1, dummy1, weight 1, 00:00:02
D> 4.4.4.15/32 [150/0] via 1.1.1.1 (recursive), weight 1, 00:00:09
* via 1.1.1.1, dummy1 onlink, weight 1, 00:00:09
via 4.4.4.1 (recursive), weight 1, 00:00:09
via 1.1.1.1, dummy1, weight 1, 00:00:09
via 4.4.4.2 (recursive), weight 1, 00:00:09
via 1.1.1.1, dummy1, weight 1, 00:00:09
via 4.4.4.3 (recursive), weight 1, 00:00:09
via 1.1.1.1, dummy1, weight 1, 00:00:09
via 4.4.4.4 (recursive), weight 1, 00:00:09
via 1.1.1.1, dummy1, weight 1, 00:00:09
via 4.4.4.5 (recursive), weight 1, 00:00:09
via 1.1.1.1, dummy1, weight 1, 00:00:09
via 4.4.4.6 (recursive), weight 1, 00:00:09
via 1.1.1.1, dummy1, weight 1, 00:00:09
via 4.4.4.7 (recursive), weight 1, 00:00:09
via 1.1.1.1, dummy1, weight 1, 00:00:09
via 4.4.4.8 (recursive), weight 1, 00:00:09
via 1.1.1.1, dummy1, weight 1, 00:00:09
via 4.4.4.9 (recursive), weight 1, 00:00:09
via 1.1.1.1, dummy1, weight 1, 00:00:09
via 4.4.4.10 (recursive), weight 1, 00:00:09
via 1.1.1.1, dummy1, weight 1, 00:00:09
via 4.4.4.11 (recursive), weight 1, 00:00:09
via 1.1.1.1, dummy1, weight 1, 00:00:09
via 4.4.4.12 (recursive), weight 1, 00:00:09
via 1.1.1.1, dummy1, weight 1, 00:00:09
via 4.4.4.13 (recursive), weight 1, 00:00:09
via 1.1.1.1, dummy1, weight 1, 00:00:09
via 4.4.4.14 (recursive), weight 1, 00:00:09
via 1.1.1.1, dummy1, weight 1, 00:00:09
via 4.4.4.16 (recursive), weight 1, 00:00:09
via 1.1.1.1, dummy1 onlink, weight 1, 00:00:09
via 1.1.1.1, dummy1, weight 1, 00:00:09
via 1.1.1.1, dummy1, weight 1, 00:00:09
via 1.1.1.1, dummy1, weight 1, 00:00:09
via 1.1.1.1, dummy1, weight 1, 00:00:09
via 1.1.1.1, dummy1, weight 1, 00:00:09
via 1.1.1.1, dummy1, weight 1, 00:00:09
via 1.1.1.1, dummy1, weight 1, 00:00:09
via 1.1.1.1, dummy1, weight 1, 00:00:09
via 1.1.1.1, dummy1, weight 1, 00:00:09
via 1.1.1.1, dummy1, weight 1, 00:00:09
via 1.1.1.1, dummy1, weight 1, 00:00:09
via 1.1.1.1, dummy1, weight 1, 00:00:09
via 1.1.1.1, dummy1, weight 1, 00:00:09
via 1.1.1.1, dummy1, weight 1, 00:00:09
via 1.1.1.1, dummy1, weight 1, 00:00:09
D> 4.4.4.16/32 [150/0] via 1.1.1.1 (recursive), weight 1, 00:00:19
* via 1.1.1.1, dummy1 onlink, weight 1, 00:00:19
via 4.4.4.1 (recursive), weight 1, 00:00:19
via 1.1.1.1, dummy1, weight 1, 00:00:19
via 4.4.4.2 (recursive), weight 1, 00:00:19
...............
................
and on...
```
You can repro the above via:
```
kernel routes:
1.1.1.1 dev dummy1 scope link
4.4.4.0/24 via 1.1.1.1 dev dummy1
==============================
config:
nexthop-group doof
nexthop 1.1.1.1
nexthop 4.4.4.1
nexthop 4.4.4.10
nexthop 4.4.4.11
nexthop 4.4.4.12
nexthop 4.4.4.13
nexthop 4.4.4.14
nexthop 4.4.4.15
nexthop 4.4.4.16
nexthop 4.4.4.2
nexthop 4.4.4.3
nexthop 4.4.4.4
nexthop 4.4.4.5
nexthop 4.4.4.6
nexthop 4.4.4.7
nexthop 4.4.4.8
nexthop 4.4.4.9
!
===========================
Then use sharpd to install 4.4.4.16 -> 4.4.4.1 pointing to that nexthop
group in decending order.
```
With these changes it prevents the growing ecmp above by disallowing
duplicates to be in the resolution decision. These nexthops are not
installed anyways so why should we be resolving to them?
Signed-off-by: Stephen Worley <sworley@nvidia.com>
2021-01-27 22:20:22 +01:00
|
|
|
/* Must not be duplicate */
|
|
|
|
if (CHECK_FLAG(resolved->flags, NEXTHOP_FLAG_DUPLICATE))
|
|
|
|
return false;
|
|
|
|
|
2019-09-09 23:20:17 +02:00
|
|
|
switch (nexthop->type) {
|
|
|
|
case NEXTHOP_TYPE_IPV4_IFINDEX:
|
|
|
|
case NEXTHOP_TYPE_IPV6_IFINDEX:
|
|
|
|
/* If the nexthop we are resolving to does not match the
|
|
|
|
* ifindex for the nexthop the route wanted, its not valid.
|
|
|
|
*/
|
|
|
|
if (nexthop->ifindex != resolved->ifindex)
|
|
|
|
return false;
|
|
|
|
break;
|
|
|
|
case NEXTHOP_TYPE_IPV4:
|
|
|
|
case NEXTHOP_TYPE_IPV6:
|
|
|
|
case NEXTHOP_TYPE_IFINDEX:
|
|
|
|
case NEXTHOP_TYPE_BLACKHOLE:
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
|
|
|
|
return true;
|
|
|
|
}
|
|
|
|
|
2019-05-13 21:46:05 +02:00
|
|
|
/*
|
2021-04-05 23:16:38 +02:00
|
|
|
* Downstream VNI and Single VXlan device check.
|
|
|
|
*
|
|
|
|
* If it has nexthop VNI labels at this point it must be D-VNI allocated
|
|
|
|
* and all the nexthops have to be on an SVD.
|
|
|
|
*
|
|
|
|
* If SVD is not available, mark as inactive.
|
|
|
|
*/
|
|
|
|
static bool nexthop_set_evpn_dvni_svd(vrf_id_t re_vrf_id,
|
|
|
|
struct nexthop *nexthop)
|
|
|
|
{
|
|
|
|
if (!is_vrf_l3vni_svd_backed(re_vrf_id)) {
|
|
|
|
if (IS_ZEBRA_DEBUG_NHG_DETAIL) {
|
|
|
|
struct vrf *vrf = vrf_lookup_by_id(re_vrf_id);
|
|
|
|
|
|
|
|
zlog_debug(
|
|
|
|
"nexthop %pNHv D-VNI but route's vrf %s(%u) doesn't use SVD",
|
|
|
|
nexthop, VRF_LOGNAME(vrf), re_vrf_id);
|
|
|
|
}
|
|
|
|
|
|
|
|
return false;
|
|
|
|
}
|
|
|
|
|
|
|
|
nexthop->ifindex = get_l3vni_vxlan_ifindex(re_vrf_id);
|
|
|
|
nexthop->vrf_id = 0;
|
|
|
|
|
|
|
|
if (IS_ZEBRA_DEBUG_NHG_DETAIL)
|
|
|
|
zlog_debug("nexthop %pNHv using SVD", nexthop);
|
|
|
|
|
|
|
|
return true;
|
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Given a nexthop we need to properly recursively resolve
|
|
|
|
* the route. As such, do a table lookup to find and match
|
|
|
|
* if at all possible. Set the nexthop->ifindex and resolved_id
|
|
|
|
* as appropriate
|
2021-02-22 21:09:07 +01:00
|
|
|
*/
|
|
|
|
static int resolve_backup_nexthops(const struct nexthop *nexthop,
|
|
|
|
const struct nhg_hash_entry *nhe,
|
|
|
|
struct nexthop *resolved,
|
|
|
|
struct nhg_hash_entry *resolve_nhe,
|
|
|
|
struct backup_nh_map_s *map)
|
|
|
|
{
|
|
|
|
int i, j, idx;
|
|
|
|
const struct nexthop *bnh;
|
|
|
|
struct nexthop *nh, *newnh;
|
2021-04-01 17:56:30 +02:00
|
|
|
mpls_label_t labels[MPLS_MAX_LABELS];
|
|
|
|
uint8_t num_labels;
|
2021-02-22 21:09:07 +01:00
|
|
|
|
|
|
|
assert(nexthop->backup_num <= NEXTHOP_MAX_BACKUPS);
|
|
|
|
|
|
|
|
/* Locate backups from the original nexthop's backup index and nhe */
|
|
|
|
for (i = 0; i < nexthop->backup_num; i++) {
|
|
|
|
idx = nexthop->backup_idx[i];
|
|
|
|
|
|
|
|
/* Do we already know about this particular backup? */
|
|
|
|
for (j = 0; j < map->map_count; j++) {
|
|
|
|
if (map->map[j].orig_idx == idx)
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
|
|
|
|
if (j < map->map_count) {
|
|
|
|
resolved->backup_idx[resolved->backup_num] =
|
|
|
|
map->map[j].new_idx;
|
|
|
|
resolved->backup_num++;
|
|
|
|
|
2021-03-24 20:01:50 +01:00
|
|
|
SET_FLAG(resolved->flags, NEXTHOP_FLAG_HAS_BACKUP);
|
|
|
|
|
2021-02-22 21:09:07 +01:00
|
|
|
if (IS_ZEBRA_DEBUG_RIB_DETAILED)
|
|
|
|
zlog_debug("%s: found map idx orig %d, new %d",
|
|
|
|
__func__, map->map[j].orig_idx,
|
|
|
|
map->map[j].new_idx);
|
|
|
|
|
|
|
|
continue;
|
|
|
|
}
|
|
|
|
|
|
|
|
/* We can't handle any new map entries at this point. */
|
|
|
|
if (map->map_count == MULTIPATH_NUM)
|
|
|
|
break;
|
|
|
|
|
|
|
|
/* Need to create/copy a new backup */
|
|
|
|
bnh = nhe->backup_info->nhe->nhg.nexthop;
|
|
|
|
for (j = 0; j < idx; j++) {
|
|
|
|
if (bnh == NULL)
|
|
|
|
break;
|
|
|
|
bnh = bnh->next;
|
|
|
|
}
|
|
|
|
|
|
|
|
/* Whoops - bad index in the nexthop? */
|
|
|
|
if (bnh == NULL)
|
|
|
|
continue;
|
|
|
|
|
2021-03-24 20:01:50 +01:00
|
|
|
if (resolve_nhe->backup_info == NULL)
|
|
|
|
resolve_nhe->backup_info = zebra_nhg_backup_alloc();
|
|
|
|
|
2021-02-22 21:09:07 +01:00
|
|
|
/* Update backup info in the resolving nexthop and its nhe */
|
|
|
|
newnh = nexthop_dup_no_recurse(bnh, NULL);
|
|
|
|
|
2021-04-01 17:56:30 +02:00
|
|
|
/* We may need some special handling for mpls labels: the new
|
|
|
|
* backup needs to carry the recursive nexthop's labels,
|
|
|
|
* if any: they may be vrf labels e.g.
|
|
|
|
* The original/inner labels are in the stack of 'resolve_nhe',
|
|
|
|
* if that is longer than the stack in 'nexthop'.
|
|
|
|
*/
|
|
|
|
if (newnh->nh_label && resolved->nh_label &&
|
|
|
|
nexthop->nh_label) {
|
|
|
|
if (resolved->nh_label->num_labels >
|
|
|
|
nexthop->nh_label->num_labels) {
|
|
|
|
/* Prepare new label stack */
|
|
|
|
num_labels = 0;
|
|
|
|
for (j = 0; j < newnh->nh_label->num_labels;
|
|
|
|
j++) {
|
|
|
|
labels[j] = newnh->nh_label->label[j];
|
|
|
|
num_labels++;
|
|
|
|
}
|
|
|
|
|
|
|
|
/* Include inner labels */
|
|
|
|
for (j = nexthop->nh_label->num_labels;
|
|
|
|
j < resolved->nh_label->num_labels;
|
|
|
|
j++) {
|
|
|
|
labels[num_labels] =
|
|
|
|
resolved->nh_label->label[j];
|
|
|
|
num_labels++;
|
|
|
|
}
|
|
|
|
|
|
|
|
/* Replace existing label stack in the backup */
|
|
|
|
nexthop_del_labels(newnh);
|
|
|
|
nexthop_add_labels(newnh, bnh->nh_label_type,
|
|
|
|
num_labels, labels);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2021-02-22 21:09:07 +01:00
|
|
|
/* Need to compute the new backup index in the new
|
|
|
|
* backup list, and add to map struct.
|
|
|
|
*/
|
|
|
|
j = 0;
|
|
|
|
nh = resolve_nhe->backup_info->nhe->nhg.nexthop;
|
|
|
|
if (nh) {
|
|
|
|
while (nh->next) {
|
|
|
|
nh = nh->next;
|
|
|
|
j++;
|
|
|
|
}
|
|
|
|
|
|
|
|
nh->next = newnh;
|
2021-03-24 20:01:50 +01:00
|
|
|
j++;
|
2021-02-22 21:09:07 +01:00
|
|
|
|
|
|
|
} else /* First one */
|
|
|
|
resolve_nhe->backup_info->nhe->nhg.nexthop = newnh;
|
|
|
|
|
|
|
|
/* Capture index */
|
|
|
|
resolved->backup_idx[resolved->backup_num] = j;
|
|
|
|
resolved->backup_num++;
|
|
|
|
|
2021-03-24 20:01:50 +01:00
|
|
|
SET_FLAG(resolved->flags, NEXTHOP_FLAG_HAS_BACKUP);
|
|
|
|
|
2021-02-22 21:09:07 +01:00
|
|
|
if (IS_ZEBRA_DEBUG_RIB_DETAILED)
|
|
|
|
zlog_debug("%s: added idx orig %d, new %d",
|
|
|
|
__func__, idx, j);
|
|
|
|
|
|
|
|
/* Update map/cache */
|
|
|
|
map->map[map->map_count].orig_idx = idx;
|
|
|
|
map->map[map->map_count].new_idx = j;
|
|
|
|
map->map_count++;
|
|
|
|
}
|
|
|
|
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
2022-03-29 16:55:34 +02:00
|
|
|
/*
|
|
|
|
* So this nexthop resolution has decided that a connected route
|
|
|
|
* is the correct choice. At this point in time if FRR has multiple
|
|
|
|
* connected routes that all point to the same prefix one will be
|
|
|
|
* selected, *but* the particular interface may not be the one
|
|
|
|
* that the nexthop points at. Let's look at all the available
|
|
|
|
* connected routes on this node and if any of them auto match
|
|
|
|
* the routes nexthops ifindex that is good enough for a match
|
|
|
|
*
|
|
|
|
* This code is depending on the fact that a nexthop->ifindex is 0
|
|
|
|
* if it is not known, if this assumption changes, yummy!
|
|
|
|
* Additionally a ifindx of 0 means figure it out for us.
|
|
|
|
*/
|
|
|
|
static struct route_entry *
|
|
|
|
zebra_nhg_connected_ifindex(struct route_node *rn, struct route_entry *match,
|
|
|
|
int32_t curr_ifindex)
|
|
|
|
{
|
|
|
|
struct nexthop *newhop = match->nhe->nhg.nexthop;
|
|
|
|
struct route_entry *re;
|
|
|
|
|
|
|
|
assert(newhop); /* What a kick in the patooey */
|
|
|
|
|
|
|
|
if (curr_ifindex == 0)
|
|
|
|
return match;
|
|
|
|
|
|
|
|
if (curr_ifindex == newhop->ifindex)
|
|
|
|
return match;
|
|
|
|
|
|
|
|
/*
|
|
|
|
* At this point we know that this route is matching a connected
|
|
|
|
* but there are possibly a bunch of connected routes that are
|
|
|
|
* alive that should be considered as well. So let's iterate over
|
|
|
|
* all the re's and see if they are connected as well and maybe one
|
|
|
|
* of those ifindexes match as well.
|
|
|
|
*/
|
|
|
|
RNODE_FOREACH_RE (rn, re) {
|
*: Introduce Local Host Routes to FRR
Create Local routes in FRR:
S 0.0.0.0/0 [1/0] via 192.168.119.1, enp39s0, weight 1, 00:03:46
K>* 0.0.0.0/0 [0/100] via 192.168.119.1, enp39s0, 00:03:51
O 192.168.119.0/24 [110/100] is directly connected, enp39s0, weight 1, 00:03:46
C>* 192.168.119.0/24 is directly connected, enp39s0, 00:03:51
L>* 192.168.119.224/32 is directly connected, enp39s0, 00:03:51
O 192.168.119.229/32 [110/100] via 0.0.0.0, enp39s0 inactive, weight 1, 00:03:46
C>* 192.168.119.229/32 is directly connected, enp39s0, 00:03:46
Create ability to redistribute local routes.
Modify tests to support this change.
Signed-off-by: Donald Sharp <sharpd@nvidia.com>
2023-01-05 00:32:43 +01:00
|
|
|
if (re->type != ZEBRA_ROUTE_CONNECT &&
|
|
|
|
re->type != ZEBRA_ROUTE_LOCAL)
|
2022-03-29 16:55:34 +02:00
|
|
|
continue;
|
|
|
|
|
|
|
|
if (CHECK_FLAG(re->status, ROUTE_ENTRY_REMOVED))
|
|
|
|
continue;
|
|
|
|
|
|
|
|
/*
|
|
|
|
* zebra has a connected route that is not removed
|
|
|
|
* let's test if it is good
|
|
|
|
*/
|
|
|
|
newhop = re->nhe->nhg.nexthop;
|
|
|
|
assert(newhop);
|
|
|
|
if (curr_ifindex == newhop->ifindex)
|
|
|
|
return re;
|
|
|
|
}
|
|
|
|
|
|
|
|
return match;
|
|
|
|
}
|
|
|
|
|
2021-02-22 21:09:07 +01:00
|
|
|
/*
|
|
|
|
* Given a nexthop we need to properly recursively resolve,
|
|
|
|
* do a table lookup to find and match if at all possible.
|
|
|
|
* Set the nexthop->ifindex and resolution info as appropriate.
|
2019-05-13 21:46:05 +02:00
|
|
|
*/
|
2021-02-22 21:09:07 +01:00
|
|
|
static int nexthop_active(struct nexthop *nexthop, struct nhg_hash_entry *nhe,
|
2021-01-29 21:43:52 +01:00
|
|
|
const struct prefix *top, int type, uint32_t flags,
|
2022-01-31 01:44:35 +01:00
|
|
|
uint32_t *pmtu, vrf_id_t vrf_id)
|
2019-05-13 21:46:05 +02:00
|
|
|
{
|
|
|
|
struct prefix p;
|
|
|
|
struct route_table *table;
|
|
|
|
struct route_node *rn;
|
|
|
|
struct route_entry *match = NULL;
|
|
|
|
int resolved;
|
2021-08-20 15:08:25 +02:00
|
|
|
struct zebra_nhlfe *nhlfe;
|
2019-05-13 21:46:05 +02:00
|
|
|
struct nexthop *newhop;
|
|
|
|
struct interface *ifp;
|
|
|
|
rib_dest_t *dest;
|
2019-08-28 16:01:38 +02:00
|
|
|
struct zebra_vrf *zvrf;
|
2020-07-20 13:43:54 +02:00
|
|
|
struct in_addr local_ipv4;
|
|
|
|
struct in_addr *ipv4;
|
2021-02-22 21:09:07 +01:00
|
|
|
afi_t afi = AFI_IP;
|
2019-05-13 21:46:05 +02:00
|
|
|
|
2021-01-29 21:43:52 +01:00
|
|
|
/* Reset some nexthop attributes that we'll recompute if necessary */
|
2019-05-13 21:46:05 +02:00
|
|
|
if ((nexthop->type == NEXTHOP_TYPE_IPV4)
|
2021-01-29 21:43:52 +01:00
|
|
|
|| (nexthop->type == NEXTHOP_TYPE_IPV6))
|
2019-05-13 21:46:05 +02:00
|
|
|
nexthop->ifindex = 0;
|
|
|
|
|
|
|
|
UNSET_FLAG(nexthop->flags, NEXTHOP_FLAG_RECURSIVE);
|
|
|
|
nexthops_free(nexthop->resolved);
|
|
|
|
nexthop->resolved = NULL;
|
2020-03-10 15:50:40 +01:00
|
|
|
|
2019-05-13 21:46:05 +02:00
|
|
|
/*
|
2021-02-22 21:09:07 +01:00
|
|
|
* Set afi based on nexthop type.
|
2021-01-29 21:43:52 +01:00
|
|
|
* Some nexthop types get special handling, possibly skipping
|
|
|
|
* the normal processing.
|
2019-05-13 21:46:05 +02:00
|
|
|
*/
|
2021-01-29 21:43:52 +01:00
|
|
|
switch (nexthop->type) {
|
|
|
|
case NEXTHOP_TYPE_IFINDEX:
|
2021-02-22 21:09:07 +01:00
|
|
|
|
2021-01-29 21:43:52 +01:00
|
|
|
ifp = if_lookup_by_index(nexthop->ifindex, nexthop->vrf_id);
|
2023-06-19 03:10:57 +02:00
|
|
|
/* If the interface exists and its operative, it's active */
|
zebra: Allow kernel routes to stick around better on interface state changes
Currently kernel routes on system bring up would be `auto-accepted`,
then if an interface went down all kernel and system routes would
be re-evaluated. There exists situations where a kernel route can
exist but the interface itself is not exactly in a state that is
ready to create a connected route yet. As such when any interface
goes down in the system all kernel/system routes would be re-evaluated
and then since that interfaces connected route is not in the table yet
the route is matching against a default route( or not at all ) and
is being dropped.
Modify the code such that kernel or system routes just look for interface
being in a good state (up or operative) and accept it.
Broken code:
eva# show ip route
Codes: K - kernel route, C - connected, S - static, R - RIP,
O - OSPF, I - IS-IS, B - BGP, E - EIGRP, N - NHRP,
T - Table, v - VNC, V - VNC-Direct, A - Babel, D - SHARP,
F - PBR, f - OpenFabric,
> - selected route, * - FIB route, q - queued, r - rejected, b - backup
t - trapped, o - offload failure
K>* 0.0.0.0/0 [0/100] via 192.168.119.1, enp39s0, 00:05:08
K>* 1.2.3.5/32 [0/0] via 172.22.0.44, br-23e378ed7fd2 linkdown, 00:05:08
K>* 1.2.3.6/32 [0/0] via 172.22.0.44, br-23e378ed7fd2 linkdown, 00:05:08
K>* 1.2.3.7/32 [0/0] via 172.22.0.44, br-23e378ed7fd2 linkdown, 00:05:08
K>* 1.2.3.8/32 [0/0] via 172.22.0.44, br-23e378ed7fd2 linkdown, 00:05:08
K>* 1.2.3.9/32 [0/0] via 172.22.0.44, br-23e378ed7fd2 linkdown, 00:05:08
K>* 1.2.3.10/32 [0/0] via 172.22.0.44, br-23e378ed7fd2 linkdown, 00:05:08
K>* 1.2.3.11/32 [0/0] via 172.22.0.44, br-23e378ed7fd2 linkdown, 00:05:08
K>* 1.2.3.12/32 [0/0] via 172.22.0.44, br-23e378ed7fd2 linkdown, 00:05:08
K>* 1.2.3.13/32 [0/0] via 172.22.0.44, br-23e378ed7fd2 linkdown, 00:05:08
K>* 1.2.3.14/32 [0/0] via 172.22.0.44, br-23e378ed7fd2 linkdown, 00:05:08
K>* 1.2.3.16/32 [0/0] via 172.22.0.44, br-23e378ed7fd2 linkdown, 00:05:08
K>* 1.2.3.17/32 [0/0] via 172.22.0.44, br-23e378ed7fd2 linkdown, 00:05:08
C>* 4.5.6.99/32 is directly connected, dummy9, 00:05:08
K>* 4.9.10.11/32 [0/0] via 172.22.0.44, br-23e378ed7fd2 linkdown, 00:05:08
K>* 10.11.12.13/32 [0/0] via 192.168.119.1, enp39s0, 00:05:08
C>* 192.168.10.0/24 is directly connected, dummy99, 00:05:08
C>* 192.168.119.0/24 is directly connected, enp39s0, 00:05:08
<shutdown a non-related interface>
eva# show ip route
Codes: K - kernel route, C - connected, S - static, R - RIP,
O - OSPF, I - IS-IS, B - BGP, E - EIGRP, N - NHRP,
T - Table, v - VNC, V - VNC-Direct, A - Babel, D - SHARP,
F - PBR, f - OpenFabric,
> - selected route, * - FIB route, q - queued, r - rejected, b - backup
t - trapped, o - offload failure
K>* 0.0.0.0/0 [0/100] via 192.168.119.1, enp39s0, 00:05:28
C>* 4.5.6.99/32 is directly connected, dummy9, 00:05:28
K>* 10.11.12.13/32 [0/0] via 192.168.119.1, enp39s0, 00:05:28
C>* 192.168.10.0/24 is directly connected, dummy99, 00:05:28
C>* 192.168.119.0/24 is directly connected, enp39s0, 00:05:28
Working code:
eva# show ip route
Codes: K - kernel route, C - connected, S - static, R - RIP,
O - OSPF, I - IS-IS, B - BGP, E - EIGRP, N - NHRP,
T - Table, v - VNC, V - VNC-Direct, A - Babel, D - SHARP,
F - PBR, f - OpenFabric,
> - selected route, * - FIB route, q - queued, r - rejected, b - backup
t - trapped, o - offload failure
K>* 0.0.0.0/0 [0/100] via 192.168.119.1, enp39s0, 00:00:04
K>* 1.2.3.5/32 [0/0] via 172.22.0.44, br-23e378ed7fd2 linkdown, 00:00:04
K>* 1.2.3.6/32 [0/0] via 172.22.0.44, br-23e378ed7fd2 linkdown, 00:00:04
K>* 1.2.3.7/32 [0/0] via 172.22.0.44, br-23e378ed7fd2 linkdown, 00:00:04
K>* 1.2.3.8/32 [0/0] via 172.22.0.44, br-23e378ed7fd2 linkdown, 00:00:04
K>* 1.2.3.9/32 [0/0] via 172.22.0.44, br-23e378ed7fd2 linkdown, 00:00:04
K>* 1.2.3.10/32 [0/0] via 172.22.0.44, br-23e378ed7fd2 linkdown, 00:00:04
K>* 1.2.3.11/32 [0/0] via 172.22.0.44, br-23e378ed7fd2 linkdown, 00:00:04
K>* 1.2.3.12/32 [0/0] via 172.22.0.44, br-23e378ed7fd2 linkdown, 00:00:04
K>* 1.2.3.13/32 [0/0] via 172.22.0.44, br-23e378ed7fd2 linkdown, 00:00:04
K>* 1.2.3.14/32 [0/0] via 172.22.0.44, br-23e378ed7fd2 linkdown, 00:00:04
K>* 1.2.3.16/32 [0/0] via 172.22.0.44, br-23e378ed7fd2 linkdown, 00:00:04
K>* 1.2.3.17/32 [0/0] via 172.22.0.44, br-23e378ed7fd2 linkdown, 00:00:04
C>* 4.5.6.99/32 is directly connected, dummy9, 00:00:04
K>* 4.9.10.11/32 [0/0] via 172.22.0.44, br-23e378ed7fd2 linkdown, 00:00:04
K>* 10.11.12.13/32 [0/0] via 192.168.119.1, enp39s0, 00:00:04
C>* 192.168.10.0/24 is directly connected, dummy99, 00:00:04
C>* 192.168.119.0/24 is directly connected, enp39s0, 00:00:04
<shutdown a non-related interface>
eva# show ip route
Codes: K - kernel route, C - connected, S - static, R - RIP,
O - OSPF, I - IS-IS, B - BGP, E - EIGRP, N - NHRP,
T - Table, v - VNC, V - VNC-Direct, A - Babel, D - SHARP,
F - PBR, f - OpenFabric,
> - selected route, * - FIB route, q - queued, r - rejected, b - backup
t - trapped, o - offload failure
K>* 0.0.0.0/0 [0/100] via 192.168.119.1, enp39s0, 00:00:15
K>* 1.2.3.5/32 [0/0] via 172.22.0.44, br-23e378ed7fd2 linkdown, 00:00:15
K>* 1.2.3.6/32 [0/0] via 172.22.0.44, br-23e378ed7fd2 linkdown, 00:00:15
K>* 1.2.3.7/32 [0/0] via 172.22.0.44, br-23e378ed7fd2 linkdown, 00:00:15
K>* 1.2.3.8/32 [0/0] via 172.22.0.44, br-23e378ed7fd2 linkdown, 00:00:15
K>* 1.2.3.9/32 [0/0] via 172.22.0.44, br-23e378ed7fd2 linkdown, 00:00:15
K>* 1.2.3.10/32 [0/0] via 172.22.0.44, br-23e378ed7fd2 linkdown, 00:00:15
K>* 1.2.3.11/32 [0/0] via 172.22.0.44, br-23e378ed7fd2 linkdown, 00:00:15
K>* 1.2.3.12/32 [0/0] via 172.22.0.44, br-23e378ed7fd2 linkdown, 00:00:15
K>* 1.2.3.13/32 [0/0] via 172.22.0.44, br-23e378ed7fd2 linkdown, 00:00:15
K>* 1.2.3.14/32 [0/0] via 172.22.0.44, br-23e378ed7fd2 linkdown, 00:00:15
K>* 1.2.3.16/32 [0/0] via 172.22.0.44, br-23e378ed7fd2 linkdown, 00:00:15
K>* 1.2.3.17/32 [0/0] via 172.22.0.44, br-23e378ed7fd2 linkdown, 00:00:15
C>* 4.5.6.99/32 is directly connected, dummy9, 00:00:15
K>* 4.9.10.11/32 [0/0] via 172.22.0.44, br-23e378ed7fd2 linkdown, 00:00:15
K>* 10.11.12.13/32 [0/0] via 192.168.119.1, enp39s0, 00:00:15
C>* 192.168.10.0/24 is directly connected, dummy99, 00:00:15
C>* 192.168.119.0/24 is directly connected, enp39s0, 00:00:15
eva#
Signed-off-by: Donald Sharp <sharpd@nvidia.com>
2022-06-23 18:22:30 +02:00
|
|
|
if (ifp && (if_is_operative(ifp)))
|
2021-01-29 21:43:52 +01:00
|
|
|
return 1;
|
|
|
|
else
|
|
|
|
return 0;
|
|
|
|
break;
|
|
|
|
|
|
|
|
case NEXTHOP_TYPE_IPV6_IFINDEX:
|
2021-02-22 21:09:07 +01:00
|
|
|
afi = AFI_IP6;
|
|
|
|
|
2021-01-29 21:43:52 +01:00
|
|
|
if (IN6_IS_ADDR_LINKLOCAL(&nexthop->gate.ipv6)) {
|
|
|
|
ifp = if_lookup_by_index(nexthop->ifindex,
|
|
|
|
nexthop->vrf_id);
|
|
|
|
if (ifp && if_is_operative(ifp))
|
|
|
|
return 1;
|
|
|
|
else
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
break;
|
|
|
|
|
|
|
|
case NEXTHOP_TYPE_IPV4:
|
|
|
|
case NEXTHOP_TYPE_IPV4_IFINDEX:
|
2021-02-22 21:09:07 +01:00
|
|
|
afi = AFI_IP;
|
|
|
|
break;
|
2021-01-29 21:43:52 +01:00
|
|
|
case NEXTHOP_TYPE_IPV6:
|
2021-02-22 21:09:07 +01:00
|
|
|
afi = AFI_IP6;
|
2021-01-29 21:43:52 +01:00
|
|
|
break;
|
2021-02-22 21:09:07 +01:00
|
|
|
|
|
|
|
case NEXTHOP_TYPE_BLACKHOLE:
|
|
|
|
return 1;
|
2021-01-29 21:43:52 +01:00
|
|
|
}
|
|
|
|
|
2019-05-13 21:46:05 +02:00
|
|
|
/*
|
2020-05-16 01:22:01 +02:00
|
|
|
* If the nexthop has been marked as 'onlink' we just need to make
|
|
|
|
* sure the nexthop's interface is known and is operational.
|
2019-05-13 21:46:05 +02:00
|
|
|
*/
|
|
|
|
if (CHECK_FLAG(nexthop->flags, NEXTHOP_FLAG_ONLINK)) {
|
2021-04-05 23:16:38 +02:00
|
|
|
/* DVNI/SVD Checks for EVPN routes */
|
|
|
|
if (nexthop->nh_label &&
|
|
|
|
nexthop->nh_label_type == ZEBRA_LSP_EVPN &&
|
|
|
|
!nexthop_set_evpn_dvni_svd(vrf_id, nexthop))
|
|
|
|
return 0;
|
|
|
|
|
2019-05-13 21:46:05 +02:00
|
|
|
ifp = if_lookup_by_index(nexthop->ifindex, nexthop->vrf_id);
|
|
|
|
if (!ifp) {
|
2020-09-29 13:54:35 +02:00
|
|
|
if (IS_ZEBRA_DEBUG_RIB_DETAILED)
|
2020-05-16 01:22:01 +02:00
|
|
|
zlog_debug("nexthop %pNHv marked onlink but nhif %u doesn't exist",
|
|
|
|
nexthop, nexthop->ifindex);
|
2019-05-13 21:46:05 +02:00
|
|
|
return 0;
|
|
|
|
}
|
2020-05-16 01:22:01 +02:00
|
|
|
if (!if_is_operative(ifp)) {
|
2020-09-29 13:54:35 +02:00
|
|
|
if (IS_ZEBRA_DEBUG_RIB_DETAILED)
|
2020-05-16 01:22:01 +02:00
|
|
|
zlog_debug("nexthop %pNHv marked onlink but nhif %s is not operational",
|
|
|
|
nexthop, ifp->name);
|
2019-05-15 18:59:37 +02:00
|
|
|
return 0;
|
2019-05-13 21:46:05 +02:00
|
|
|
}
|
2020-05-16 01:22:01 +02:00
|
|
|
return 1;
|
2019-05-13 21:46:05 +02:00
|
|
|
}
|
|
|
|
|
2020-07-29 17:48:57 +02:00
|
|
|
/* Validation for ipv4 mapped ipv6 nexthop. */
|
|
|
|
if (IS_MAPPED_IPV6(&nexthop->gate.ipv6)) {
|
|
|
|
afi = AFI_IP;
|
2020-07-20 13:43:54 +02:00
|
|
|
ipv4 = &local_ipv4;
|
|
|
|
ipv4_mapped_ipv6_to_ipv4(&nexthop->gate.ipv6, ipv4);
|
|
|
|
} else {
|
|
|
|
ipv4 = &nexthop->gate.ipv4;
|
|
|
|
}
|
|
|
|
|
2021-01-29 21:43:52 +01:00
|
|
|
/* Processing for nexthops with SR 'color' attribute, using
|
|
|
|
* the corresponding SR policy object.
|
|
|
|
*/
|
2020-07-20 13:43:54 +02:00
|
|
|
if (nexthop->srte_color) {
|
|
|
|
struct ipaddr endpoint = {0};
|
|
|
|
struct zebra_sr_policy *policy;
|
|
|
|
|
|
|
|
switch (afi) {
|
|
|
|
case AFI_IP:
|
|
|
|
endpoint.ipa_type = IPADDR_V4;
|
|
|
|
endpoint.ipaddr_v4 = *ipv4;
|
|
|
|
break;
|
|
|
|
case AFI_IP6:
|
|
|
|
endpoint.ipa_type = IPADDR_V6;
|
|
|
|
endpoint.ipaddr_v6 = nexthop->gate.ipv6;
|
|
|
|
break;
|
2023-01-30 16:05:58 +01:00
|
|
|
case AFI_UNSPEC:
|
|
|
|
case AFI_L2VPN:
|
|
|
|
case AFI_MAX:
|
2020-07-20 13:43:54 +02:00
|
|
|
flog_err(EC_LIB_DEVELOPMENT,
|
|
|
|
"%s: unknown address-family: %u", __func__,
|
|
|
|
afi);
|
|
|
|
exit(1);
|
|
|
|
}
|
|
|
|
|
|
|
|
policy = zebra_sr_policy_find(nexthop->srte_color, &endpoint);
|
|
|
|
if (policy && policy->status == ZEBRA_SR_POLICY_UP) {
|
|
|
|
resolved = 0;
|
|
|
|
frr_each_safe (nhlfe_list, &policy->lsp->nhlfe_list,
|
|
|
|
nhlfe) {
|
|
|
|
if (!CHECK_FLAG(nhlfe->flags,
|
|
|
|
NHLFE_FLAG_SELECTED)
|
|
|
|
|| CHECK_FLAG(nhlfe->flags,
|
|
|
|
NHLFE_FLAG_DELETED))
|
|
|
|
continue;
|
|
|
|
SET_FLAG(nexthop->flags,
|
|
|
|
NEXTHOP_FLAG_RECURSIVE);
|
|
|
|
nexthop_set_resolved(afi, nhlfe->nexthop,
|
|
|
|
nexthop, policy);
|
|
|
|
resolved = 1;
|
|
|
|
}
|
|
|
|
if (resolved)
|
|
|
|
return 1;
|
|
|
|
}
|
2020-07-29 17:48:57 +02:00
|
|
|
}
|
|
|
|
|
2019-05-13 21:46:05 +02:00
|
|
|
/* Make lookup prefix. */
|
|
|
|
memset(&p, 0, sizeof(struct prefix));
|
|
|
|
switch (afi) {
|
|
|
|
case AFI_IP:
|
|
|
|
p.family = AF_INET;
|
2021-07-01 16:42:03 +02:00
|
|
|
p.prefixlen = IPV4_MAX_BITLEN;
|
2020-07-20 13:43:54 +02:00
|
|
|
p.u.prefix4 = *ipv4;
|
2019-05-13 21:46:05 +02:00
|
|
|
break;
|
|
|
|
case AFI_IP6:
|
|
|
|
p.family = AF_INET6;
|
2021-07-01 16:39:04 +02:00
|
|
|
p.prefixlen = IPV6_MAX_BITLEN;
|
2019-05-13 21:46:05 +02:00
|
|
|
p.u.prefix6 = nexthop->gate.ipv6;
|
|
|
|
break;
|
2023-01-30 16:05:58 +01:00
|
|
|
case AFI_UNSPEC:
|
|
|
|
case AFI_L2VPN:
|
|
|
|
case AFI_MAX:
|
2019-05-13 21:46:05 +02:00
|
|
|
assert(afi != AFI_IP && afi != AFI_IP6);
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
/* Lookup table. */
|
|
|
|
table = zebra_vrf_table(afi, SAFI_UNICAST, nexthop->vrf_id);
|
2019-08-28 16:01:38 +02:00
|
|
|
/* get zvrf */
|
|
|
|
zvrf = zebra_vrf_lookup_by_id(nexthop->vrf_id);
|
|
|
|
if (!table || !zvrf) {
|
2019-05-13 21:46:05 +02:00
|
|
|
if (IS_ZEBRA_DEBUG_RIB_DETAILED)
|
2020-03-24 17:38:20 +01:00
|
|
|
zlog_debug(" %s: Table not found", __func__);
|
2019-05-13 21:46:05 +02:00
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
|
|
|
rn = route_node_match(table, (struct prefix *)&p);
|
|
|
|
while (rn) {
|
|
|
|
route_unlock_node(rn);
|
|
|
|
|
|
|
|
/* Lookup should halt if we've matched against ourselves ('top',
|
|
|
|
* if specified) - i.e., we cannot have a nexthop NH1 is
|
|
|
|
* resolved by a route NH1. The exception is if the route is a
|
|
|
|
* host route.
|
|
|
|
*/
|
2021-01-29 21:43:52 +01:00
|
|
|
if (prefix_same(&rn->p, top))
|
2021-07-01 17:05:11 +02:00
|
|
|
if (((afi == AFI_IP)
|
|
|
|
&& (rn->p.prefixlen != IPV4_MAX_BITLEN))
|
2021-07-01 16:53:21 +02:00
|
|
|
|| ((afi == AFI_IP6)
|
|
|
|
&& (rn->p.prefixlen != IPV6_MAX_BITLEN))) {
|
2019-05-13 21:46:05 +02:00
|
|
|
if (IS_ZEBRA_DEBUG_RIB_DETAILED)
|
|
|
|
zlog_debug(
|
2020-03-24 17:38:20 +01:00
|
|
|
" %s: Matched against ourself and prefix length is not max bit length",
|
2020-03-06 15:23:22 +01:00
|
|
|
__func__);
|
zebra: Allow longer prefix matches for nexthops
Zebra currently does a shortest prefix match for
resolving nexthops for a prefix. This is typically
an ok thing to do but fails in several specific scenarios.
If a nexthop matches to a route that is not usable, nexthop
resolution just gives up and refuses to use that particular
route. For example if zebra currently has a covering prefix
say a 10.0.0.0/8. And about the same time it receives a
10.1.0.0/16 ( a more specific than the /8 ) and another
route A, who's nexthop is 10.1.1.1. Imagine the 10.1.0.0/16
is processed enough to know we want to install it and the
prefix is sent to the dataplane for installation( it is queued )
and then route A is processed, nexthop resolution will fail
and the route A will be left in limbo as uninstallable.
Let's modify the nexthop resolution code in zebra such that
if a nexthop's most specific match is unusable, continue looking
up the table till we get to the 0.0.0.0/0 route( if it's even
installed ). If we find a usable route for the nexthop accept
it and use it.
The bgp_default_originate topology test is frequently failing
with this exact problem:
B>* 0.0.0.0/0 [200/0] via 192.168.1.1, r2-r1-eth0, weight 1, 00:00:21
B 1.0.1.17/32 [200/0] via 192.168.0.1 inactive, weight 1, 00:00:21
B>* 1.0.2.17/32 [200/0] via 192.168.1.1, r2-r1-eth0, weight 1, 00:00:21
C>* 1.0.3.17/32 is directly connected, lo, 00:02:00
B>* 1.0.5.17/32 [20/0] via 192.168.2.2, r2-r3-eth1, weight 1, 00:00:32
B>* 192.168.0.0/24 [200/0] via 192.168.1.1, r2-r1-eth0, weight 1, 00:00:21
B 192.168.1.0/24 [200/0] via 192.168.1.1 inactive, weight 1, 00:00:21
C>* 192.168.1.0/24 is directly connected, r2-r1-eth0, 00:02:00
C>* 192.168.2.0/24 is directly connected, r2-r3-eth1, 00:02:00
B>* 192.168.3.0/24 [20/0] via 192.168.2.2, r2-r3-eth1, weight 1, 00:00:32
B 198.51.1.1/32 [200/0] via 192.168.0.1 inactive, weight 1, 00:00:21
B>* 198.51.1.2/32 [20/0] via 192.168.2.2, r2-r3-eth1, weight 1, 00:00:32
Notice that the 1.0.1.17/32 route is inactive but the nexthop
192.168.0.1 is covered by both the 192.168.0.0/24 prefix( shortest match )
*and* the 0.0.0.0/0 route ( longest match ). When looking at the logs
the 1.0.1.17/32 route was not being installed because the matching
route was not in a usable state, which is because the 192.168.0.0/24
route was in the process of being installed.
Signed-off-by: Donald Sharp <sharpd@nvidia.com>
2023-10-19 22:38:12 +02:00
|
|
|
goto continue_up_tree;
|
2019-05-13 21:46:05 +02:00
|
|
|
}
|
|
|
|
|
|
|
|
/* Pick up selected route. */
|
|
|
|
/* However, do not resolve over default route unless explicitly
|
2019-05-15 18:59:37 +02:00
|
|
|
* allowed.
|
|
|
|
*/
|
2019-05-13 21:46:05 +02:00
|
|
|
if (is_default_prefix(&rn->p)
|
2019-08-28 16:01:38 +02:00
|
|
|
&& !rnh_resolve_via_default(zvrf, p.family)) {
|
2019-05-13 21:46:05 +02:00
|
|
|
if (IS_ZEBRA_DEBUG_RIB_DETAILED)
|
2023-07-17 16:00:32 +02:00
|
|
|
zlog_debug(" :%s: %pFX Resolved against default route",
|
|
|
|
__func__, &p);
|
2019-05-13 21:46:05 +02:00
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
|
|
|
dest = rib_dest_from_rnode(rn);
|
2023-07-17 16:00:32 +02:00
|
|
|
if (dest && dest->selected_fib &&
|
|
|
|
(!CHECK_FLAG(dest->selected_fib->status,
|
|
|
|
ROUTE_ENTRY_REMOVED) ||
|
|
|
|
CHECK_FLAG(dest->selected_fib->status,
|
|
|
|
ROUTE_ENTRY_ROUTE_REPLACING)) &&
|
|
|
|
dest->selected_fib->type != ZEBRA_ROUTE_TABLE)
|
2019-05-13 21:46:05 +02:00
|
|
|
match = dest->selected_fib;
|
|
|
|
|
|
|
|
/* If there is no selected route or matched route is EGP, go up
|
2019-05-15 18:59:37 +02:00
|
|
|
* tree.
|
|
|
|
*/
|
2019-05-13 21:46:05 +02:00
|
|
|
|
2023-06-28 14:11:41 +02:00
|
|
|
/* If the candidate match's type is considered "connected",
|
|
|
|
* we consider it first.
|
|
|
|
*/
|
zebra: Allow longer prefix matches for nexthops
Zebra currently does a shortest prefix match for
resolving nexthops for a prefix. This is typically
an ok thing to do but fails in several specific scenarios.
If a nexthop matches to a route that is not usable, nexthop
resolution just gives up and refuses to use that particular
route. For example if zebra currently has a covering prefix
say a 10.0.0.0/8. And about the same time it receives a
10.1.0.0/16 ( a more specific than the /8 ) and another
route A, who's nexthop is 10.1.1.1. Imagine the 10.1.0.0/16
is processed enough to know we want to install it and the
prefix is sent to the dataplane for installation( it is queued )
and then route A is processed, nexthop resolution will fail
and the route A will be left in limbo as uninstallable.
Let's modify the nexthop resolution code in zebra such that
if a nexthop's most specific match is unusable, continue looking
up the table till we get to the 0.0.0.0/0 route( if it's even
installed ). If we find a usable route for the nexthop accept
it and use it.
The bgp_default_originate topology test is frequently failing
with this exact problem:
B>* 0.0.0.0/0 [200/0] via 192.168.1.1, r2-r1-eth0, weight 1, 00:00:21
B 1.0.1.17/32 [200/0] via 192.168.0.1 inactive, weight 1, 00:00:21
B>* 1.0.2.17/32 [200/0] via 192.168.1.1, r2-r1-eth0, weight 1, 00:00:21
C>* 1.0.3.17/32 is directly connected, lo, 00:02:00
B>* 1.0.5.17/32 [20/0] via 192.168.2.2, r2-r3-eth1, weight 1, 00:00:32
B>* 192.168.0.0/24 [200/0] via 192.168.1.1, r2-r1-eth0, weight 1, 00:00:21
B 192.168.1.0/24 [200/0] via 192.168.1.1 inactive, weight 1, 00:00:21
C>* 192.168.1.0/24 is directly connected, r2-r1-eth0, 00:02:00
C>* 192.168.2.0/24 is directly connected, r2-r3-eth1, 00:02:00
B>* 192.168.3.0/24 [20/0] via 192.168.2.2, r2-r3-eth1, weight 1, 00:00:32
B 198.51.1.1/32 [200/0] via 192.168.0.1 inactive, weight 1, 00:00:21
B>* 198.51.1.2/32 [20/0] via 192.168.2.2, r2-r3-eth1, weight 1, 00:00:32
Notice that the 1.0.1.17/32 route is inactive but the nexthop
192.168.0.1 is covered by both the 192.168.0.0/24 prefix( shortest match )
*and* the 0.0.0.0/0 route ( longest match ). When looking at the logs
the 1.0.1.17/32 route was not being installed because the matching
route was not in a usable state, which is because the 192.168.0.0/24
route was in the process of being installed.
Signed-off-by: Donald Sharp <sharpd@nvidia.com>
2023-10-19 22:38:12 +02:00
|
|
|
if (match && (RIB_CONNECTED_ROUTE(match) ||
|
|
|
|
(RIB_SYSTEM_ROUTE(match) && RSYSTEM_ROUTE(type)))) {
|
2022-03-29 16:55:34 +02:00
|
|
|
match = zebra_nhg_connected_ifindex(rn, match,
|
|
|
|
nexthop->ifindex);
|
|
|
|
|
2020-02-25 14:29:46 +01:00
|
|
|
newhop = match->nhe->nhg.nexthop;
|
zebra: fix nhg out of sync between zebra and kernel
PR#13413 introduces reinstall mechanism, but there is problem with the route
leak scenario.
With route leak configuration: ( `x1` and `x2` are binded to `vrf1` )
```
vrf vrf2
ip route 75.75.75.75/32 77.75.1.75 nexthop-vrf vrf1
ip route 75.75.75.75/32 77.75.2.75 nexthop-vrf vrf1
exit-vrf
```
Firstly, all are ok. But after `x1` is set down and up ( The interval
between the down and up operations should be less than 180 seconds. ) ,
`x1` is lost from the nexthop group:
```
anlan# ip nexthop
id 121 group 122/123 proto zebra
id 122 via 77.75.1.75 dev x1 scope link proto zebra
id 123 via 77.75.2.75 dev x2 scope link proto zebra
anlan# ip route show table 2
75.75.75.75 nhid 121 proto 196 metric 20
nexthop via 77.75.1.75 dev x1 weight 1
nexthop via 77.75.2.75 dev x2 weight 1
anlan# ip link set dev x1 down
anlan# ip link set dev x1 up
anlan# ip route show table 2 <- Wrong, one nexthop lost from group
75.75.75.75 nhid 121 via 77.75.2.75 dev x2 proto 196 metric 20
anlan# ip nexthop
id 121 group 123 proto zebra
id 122 via 77.75.1.75 dev x1 scope link proto zebra
id 123 via 77.75.2.75 dev x2 scope link proto zebra
anlan# show ip route vrf vrf2 <- Still ok
VRF vrf2:
S>* 75.75.75.75/32 [1/0] via 77.75.1.75, x1 (vrf vrf1), weight 1, 00:00:05
* via 77.75.2.75, x2 (vrf vrf1), weight 1, 00:00:05
```
From the impact on kernel:
The `nh->type` of `id 122` is *always* `NEXTHOP_TYPE_IPV4` in the route leak
case. Then, `nexthop_is_ifindex_type()` introduced by commit `5bb877` always
returns `false`, so its dependents can't be reinstalled. After `x1` is down,
there is only `id 123` in the group of `id 121`. So, Finally `id 121` remains
unchanged after `x1` is up, i.e., `id 122` is not added to the group even it is
reinstalled itself.
From the impact on zebra:
The `show ip route vrf vrf2` is still ok because the `id`s are reused/reinstalled
successfully within 180 seconds after `x1` is down and up. The group of `id 121`
is with old `NEXTHOP_GROUP_INSTALLED` flag, and it is still the group of `id 122`
and `id 123` as before.
In this way, kernel and zebra have become out of sync.
The `nh->type` of `id 122` should be adjusted to `NEXTHOP_TYPE_IPV4_IFINDEX`
after nexthop resolved. This commit is for doing this to make that reinstall
mechanism work.
Signed-off-by: anlan_cs <anlan_cs@tom.com>
2023-07-24 08:40:22 +02:00
|
|
|
if (nexthop->type == NEXTHOP_TYPE_IPV4) {
|
2022-03-29 16:55:34 +02:00
|
|
|
nexthop->ifindex = newhop->ifindex;
|
zebra: fix nhg out of sync between zebra and kernel
PR#13413 introduces reinstall mechanism, but there is problem with the route
leak scenario.
With route leak configuration: ( `x1` and `x2` are binded to `vrf1` )
```
vrf vrf2
ip route 75.75.75.75/32 77.75.1.75 nexthop-vrf vrf1
ip route 75.75.75.75/32 77.75.2.75 nexthop-vrf vrf1
exit-vrf
```
Firstly, all are ok. But after `x1` is set down and up ( The interval
between the down and up operations should be less than 180 seconds. ) ,
`x1` is lost from the nexthop group:
```
anlan# ip nexthop
id 121 group 122/123 proto zebra
id 122 via 77.75.1.75 dev x1 scope link proto zebra
id 123 via 77.75.2.75 dev x2 scope link proto zebra
anlan# ip route show table 2
75.75.75.75 nhid 121 proto 196 metric 20
nexthop via 77.75.1.75 dev x1 weight 1
nexthop via 77.75.2.75 dev x2 weight 1
anlan# ip link set dev x1 down
anlan# ip link set dev x1 up
anlan# ip route show table 2 <- Wrong, one nexthop lost from group
75.75.75.75 nhid 121 via 77.75.2.75 dev x2 proto 196 metric 20
anlan# ip nexthop
id 121 group 123 proto zebra
id 122 via 77.75.1.75 dev x1 scope link proto zebra
id 123 via 77.75.2.75 dev x2 scope link proto zebra
anlan# show ip route vrf vrf2 <- Still ok
VRF vrf2:
S>* 75.75.75.75/32 [1/0] via 77.75.1.75, x1 (vrf vrf1), weight 1, 00:00:05
* via 77.75.2.75, x2 (vrf vrf1), weight 1, 00:00:05
```
From the impact on kernel:
The `nh->type` of `id 122` is *always* `NEXTHOP_TYPE_IPV4` in the route leak
case. Then, `nexthop_is_ifindex_type()` introduced by commit `5bb877` always
returns `false`, so its dependents can't be reinstalled. After `x1` is down,
there is only `id 123` in the group of `id 121`. So, Finally `id 121` remains
unchanged after `x1` is up, i.e., `id 122` is not added to the group even it is
reinstalled itself.
From the impact on zebra:
The `show ip route vrf vrf2` is still ok because the `id`s are reused/reinstalled
successfully within 180 seconds after `x1` is down and up. The group of `id 121`
is with old `NEXTHOP_GROUP_INSTALLED` flag, and it is still the group of `id 122`
and `id 123` as before.
In this way, kernel and zebra have become out of sync.
The `nh->type` of `id 122` should be adjusted to `NEXTHOP_TYPE_IPV4_IFINDEX`
after nexthop resolved. This commit is for doing this to make that reinstall
mechanism work.
Signed-off-by: anlan_cs <anlan_cs@tom.com>
2023-07-24 08:40:22 +02:00
|
|
|
nexthop->type = NEXTHOP_TYPE_IPV4_IFINDEX;
|
|
|
|
} else if (nexthop->type == NEXTHOP_TYPE_IPV6) {
|
|
|
|
nexthop->ifindex = newhop->ifindex;
|
|
|
|
nexthop->type = NEXTHOP_TYPE_IPV6_IFINDEX;
|
|
|
|
} else if (nexthop->ifindex != newhop->ifindex) {
|
2022-03-29 16:55:34 +02:00
|
|
|
if (IS_ZEBRA_DEBUG_RIB_DETAILED)
|
|
|
|
zlog_debug(
|
|
|
|
"%s: %pNHv given ifindex does not match nexthops ifindex found: %pNHv",
|
|
|
|
__func__, nexthop, newhop);
|
zebra: Allow longer prefix matches for nexthops
Zebra currently does a shortest prefix match for
resolving nexthops for a prefix. This is typically
an ok thing to do but fails in several specific scenarios.
If a nexthop matches to a route that is not usable, nexthop
resolution just gives up and refuses to use that particular
route. For example if zebra currently has a covering prefix
say a 10.0.0.0/8. And about the same time it receives a
10.1.0.0/16 ( a more specific than the /8 ) and another
route A, who's nexthop is 10.1.1.1. Imagine the 10.1.0.0/16
is processed enough to know we want to install it and the
prefix is sent to the dataplane for installation( it is queued )
and then route A is processed, nexthop resolution will fail
and the route A will be left in limbo as uninstallable.
Let's modify the nexthop resolution code in zebra such that
if a nexthop's most specific match is unusable, continue looking
up the table till we get to the 0.0.0.0/0 route( if it's even
installed ). If we find a usable route for the nexthop accept
it and use it.
The bgp_default_originate topology test is frequently failing
with this exact problem:
B>* 0.0.0.0/0 [200/0] via 192.168.1.1, r2-r1-eth0, weight 1, 00:00:21
B 1.0.1.17/32 [200/0] via 192.168.0.1 inactive, weight 1, 00:00:21
B>* 1.0.2.17/32 [200/0] via 192.168.1.1, r2-r1-eth0, weight 1, 00:00:21
C>* 1.0.3.17/32 is directly connected, lo, 00:02:00
B>* 1.0.5.17/32 [20/0] via 192.168.2.2, r2-r3-eth1, weight 1, 00:00:32
B>* 192.168.0.0/24 [200/0] via 192.168.1.1, r2-r1-eth0, weight 1, 00:00:21
B 192.168.1.0/24 [200/0] via 192.168.1.1 inactive, weight 1, 00:00:21
C>* 192.168.1.0/24 is directly connected, r2-r1-eth0, 00:02:00
C>* 192.168.2.0/24 is directly connected, r2-r3-eth1, 00:02:00
B>* 192.168.3.0/24 [20/0] via 192.168.2.2, r2-r3-eth1, weight 1, 00:00:32
B 198.51.1.1/32 [200/0] via 192.168.0.1 inactive, weight 1, 00:00:21
B>* 198.51.1.2/32 [20/0] via 192.168.2.2, r2-r3-eth1, weight 1, 00:00:32
Notice that the 1.0.1.17/32 route is inactive but the nexthop
192.168.0.1 is covered by both the 192.168.0.0/24 prefix( shortest match )
*and* the 0.0.0.0/0 route ( longest match ). When looking at the logs
the 1.0.1.17/32 route was not being installed because the matching
route was not in a usable state, which is because the 192.168.0.0/24
route was in the process of being installed.
Signed-off-by: Donald Sharp <sharpd@nvidia.com>
2023-10-19 22:38:12 +02:00
|
|
|
goto continue_up_tree;
|
2019-05-13 21:46:05 +02:00
|
|
|
}
|
2020-03-10 15:50:40 +01:00
|
|
|
|
2023-06-28 14:11:41 +02:00
|
|
|
/* NHRP special case: need to indicate onlink */
|
|
|
|
if (match->type == ZEBRA_ROUTE_NHRP)
|
|
|
|
SET_FLAG(nexthop->flags, NEXTHOP_FLAG_ONLINK);
|
|
|
|
|
2020-03-10 15:50:40 +01:00
|
|
|
if (IS_ZEBRA_DEBUG_NHG_DETAIL)
|
2022-06-14 21:02:27 +02:00
|
|
|
zlog_debug(
|
|
|
|
"%s: CONNECT match %p (%pNG), newhop %pNHv",
|
|
|
|
__func__, match, match->nhe, newhop);
|
2020-03-10 15:50:40 +01:00
|
|
|
|
2019-05-13 21:46:05 +02:00
|
|
|
return 1;
|
zebra: Allow longer prefix matches for nexthops
Zebra currently does a shortest prefix match for
resolving nexthops for a prefix. This is typically
an ok thing to do but fails in several specific scenarios.
If a nexthop matches to a route that is not usable, nexthop
resolution just gives up and refuses to use that particular
route. For example if zebra currently has a covering prefix
say a 10.0.0.0/8. And about the same time it receives a
10.1.0.0/16 ( a more specific than the /8 ) and another
route A, who's nexthop is 10.1.1.1. Imagine the 10.1.0.0/16
is processed enough to know we want to install it and the
prefix is sent to the dataplane for installation( it is queued )
and then route A is processed, nexthop resolution will fail
and the route A will be left in limbo as uninstallable.
Let's modify the nexthop resolution code in zebra such that
if a nexthop's most specific match is unusable, continue looking
up the table till we get to the 0.0.0.0/0 route( if it's even
installed ). If we find a usable route for the nexthop accept
it and use it.
The bgp_default_originate topology test is frequently failing
with this exact problem:
B>* 0.0.0.0/0 [200/0] via 192.168.1.1, r2-r1-eth0, weight 1, 00:00:21
B 1.0.1.17/32 [200/0] via 192.168.0.1 inactive, weight 1, 00:00:21
B>* 1.0.2.17/32 [200/0] via 192.168.1.1, r2-r1-eth0, weight 1, 00:00:21
C>* 1.0.3.17/32 is directly connected, lo, 00:02:00
B>* 1.0.5.17/32 [20/0] via 192.168.2.2, r2-r3-eth1, weight 1, 00:00:32
B>* 192.168.0.0/24 [200/0] via 192.168.1.1, r2-r1-eth0, weight 1, 00:00:21
B 192.168.1.0/24 [200/0] via 192.168.1.1 inactive, weight 1, 00:00:21
C>* 192.168.1.0/24 is directly connected, r2-r1-eth0, 00:02:00
C>* 192.168.2.0/24 is directly connected, r2-r3-eth1, 00:02:00
B>* 192.168.3.0/24 [20/0] via 192.168.2.2, r2-r3-eth1, weight 1, 00:00:32
B 198.51.1.1/32 [200/0] via 192.168.0.1 inactive, weight 1, 00:00:21
B>* 198.51.1.2/32 [20/0] via 192.168.2.2, r2-r3-eth1, weight 1, 00:00:32
Notice that the 1.0.1.17/32 route is inactive but the nexthop
192.168.0.1 is covered by both the 192.168.0.0/24 prefix( shortest match )
*and* the 0.0.0.0/0 route ( longest match ). When looking at the logs
the 1.0.1.17/32 route was not being installed because the matching
route was not in a usable state, which is because the 192.168.0.0/24
route was in the process of being installed.
Signed-off-by: Donald Sharp <sharpd@nvidia.com>
2023-10-19 22:38:12 +02:00
|
|
|
} else if (match && CHECK_FLAG(flags, ZEBRA_FLAG_ALLOW_RECURSION)) {
|
2020-05-08 22:36:26 +02:00
|
|
|
struct nexthop_group *nhg;
|
2021-02-22 21:09:07 +01:00
|
|
|
struct nexthop *resolver;
|
|
|
|
struct backup_nh_map_s map = {};
|
2020-05-08 22:36:26 +02:00
|
|
|
|
2019-05-13 21:46:05 +02:00
|
|
|
resolved = 0;
|
2020-05-08 22:36:26 +02:00
|
|
|
|
zebra: Fix handling of recursive routes when processing closely in time
When zebra receives routes from upper level protocols it decodes the
zapi message and places the routes on the metaQ for processing. Suppose
we have a route A that is already installed by some routing protocol.
And there is a route B that has a nexthop that will be recursively
resolved through A. Imagine if a route replace operation for A is
going to happen from an upper level protocol at about the same time
the route B is going to be installed into zebra. If these routes
are received, and decoded, at about the same time there exists a
chance that the metaQ will contain both of them at the same time.
If the order of installation is [ B, A ]. B will be resolved
correctly through A and installed, A will be processed and
re-installed into the FIB. If the nexthops have changed for
A then the owner of B should be notified about the change( and B
can do the correct action here and decide to withdraw or re-install ).
Now imagine if the order of routes received for processing on the
metaQ is [ A, B ]. A will be received, processed and sent to the
dataplane for reinstall. B will then be pulled off the metaQ and
fail the install since A is in a `not Installed` state.
Let's loosen the restriction in nexthop resolution for B such
that if the route we are dependent on is a route replace operation
allow the resolution to suceed. This requires zebra to track a new
route state( ROUTE_ENTRY_ROUTE_REPLACING ) that can be looked at
during nexthop resolution. I believe this is ok because A is
a route replace operation, which could result in this:
-route install failed, in which case B should be nht'ing and
will receive the nht failure and the upper level protocol should
remove B.
-route install succeeded, no nexthop changes. In this case
allowing the resolution for B is ok, NHT will not notify the upper
level protocol so no action is needed.
-route install succeeded, nexthops changes. In this case
allowing the resolution for B is ok, NHT will notify the upper
level protocol and it can decide to reinstall B or not based
upon it's own algorithm.
This set of events was found by the bgp_distance_change topotest(s).
Effectively the tests were looking for the bug ( A, B order in the metaQ )
as the `correct` state. When under very heavy load, the A, B ordering
caused A to just be installed and fully resolved in the dataplane before
B is gotten to( which is entirely possible ).
Signed-off-by: Donald Sharp <sharpd@nvidia.com>
2022-10-21 13:20:44 +02:00
|
|
|
/*
|
|
|
|
* Only useful if installed or being Route Replacing
|
|
|
|
* Why Being Route Replaced as well?
|
|
|
|
* Imagine a route A and route B( that depends on A )
|
|
|
|
* for recursive resolution and A already exists in the
|
|
|
|
* zebra rib. If zebra receives the routes
|
|
|
|
* for resolution at aproximately the same time in the [
|
|
|
|
* B, A ] order on the workQ. If this happens then
|
|
|
|
* normal route resolution will happen and B will be
|
|
|
|
* resolved successfully and then A will be resolved
|
|
|
|
* successfully. Now imagine the reversed order [A, B].
|
|
|
|
* A will be resolved and then scheduled for installed
|
|
|
|
* (Thus not having the ROUTE_ENTRY_INSTALLED flag ). B
|
|
|
|
* will then get resolved and fail to be installed
|
|
|
|
* because the original below test. Let's `loosen` this
|
|
|
|
* up a tiny bit and allow the
|
|
|
|
* ROUTE_ENTRY_ROUTE_REPLACING flag ( that is set when a
|
|
|
|
* Route Replace operation is being initiated on A now )
|
|
|
|
* to now satisfy this situation. This will allow
|
|
|
|
* either order in the workQ to work properly.
|
|
|
|
*/
|
|
|
|
if (!CHECK_FLAG(match->status, ROUTE_ENTRY_INSTALLED) &&
|
|
|
|
!CHECK_FLAG(match->status,
|
|
|
|
ROUTE_ENTRY_ROUTE_REPLACING)) {
|
2020-09-29 13:54:35 +02:00
|
|
|
if (IS_ZEBRA_DEBUG_RIB_DETAILED)
|
2022-06-14 21:02:27 +02:00
|
|
|
zlog_debug(
|
zebra: Fix handling of recursive routes when processing closely in time
When zebra receives routes from upper level protocols it decodes the
zapi message and places the routes on the metaQ for processing. Suppose
we have a route A that is already installed by some routing protocol.
And there is a route B that has a nexthop that will be recursively
resolved through A. Imagine if a route replace operation for A is
going to happen from an upper level protocol at about the same time
the route B is going to be installed into zebra. If these routes
are received, and decoded, at about the same time there exists a
chance that the metaQ will contain both of them at the same time.
If the order of installation is [ B, A ]. B will be resolved
correctly through A and installed, A will be processed and
re-installed into the FIB. If the nexthops have changed for
A then the owner of B should be notified about the change( and B
can do the correct action here and decide to withdraw or re-install ).
Now imagine if the order of routes received for processing on the
metaQ is [ A, B ]. A will be received, processed and sent to the
dataplane for reinstall. B will then be pulled off the metaQ and
fail the install since A is in a `not Installed` state.
Let's loosen the restriction in nexthop resolution for B such
that if the route we are dependent on is a route replace operation
allow the resolution to suceed. This requires zebra to track a new
route state( ROUTE_ENTRY_ROUTE_REPLACING ) that can be looked at
during nexthop resolution. I believe this is ok because A is
a route replace operation, which could result in this:
-route install failed, in which case B should be nht'ing and
will receive the nht failure and the upper level protocol should
remove B.
-route install succeeded, no nexthop changes. In this case
allowing the resolution for B is ok, NHT will not notify the upper
level protocol so no action is needed.
-route install succeeded, nexthops changes. In this case
allowing the resolution for B is ok, NHT will notify the upper
level protocol and it can decide to reinstall B or not based
upon it's own algorithm.
This set of events was found by the bgp_distance_change topotest(s).
Effectively the tests were looking for the bug ( A, B order in the metaQ )
as the `correct` state. When under very heavy load, the A, B ordering
caused A to just be installed and fully resolved in the dataplane before
B is gotten to( which is entirely possible ).
Signed-off-by: Donald Sharp <sharpd@nvidia.com>
2022-10-21 13:20:44 +02:00
|
|
|
"%s: match %p (%pNG) not installed or being Route Replaced",
|
2022-06-14 21:02:27 +02:00
|
|
|
__func__, match, match->nhe);
|
2020-05-26 23:35:20 +02:00
|
|
|
|
zebra: Allow longer prefix matches for nexthops
Zebra currently does a shortest prefix match for
resolving nexthops for a prefix. This is typically
an ok thing to do but fails in several specific scenarios.
If a nexthop matches to a route that is not usable, nexthop
resolution just gives up and refuses to use that particular
route. For example if zebra currently has a covering prefix
say a 10.0.0.0/8. And about the same time it receives a
10.1.0.0/16 ( a more specific than the /8 ) and another
route A, who's nexthop is 10.1.1.1. Imagine the 10.1.0.0/16
is processed enough to know we want to install it and the
prefix is sent to the dataplane for installation( it is queued )
and then route A is processed, nexthop resolution will fail
and the route A will be left in limbo as uninstallable.
Let's modify the nexthop resolution code in zebra such that
if a nexthop's most specific match is unusable, continue looking
up the table till we get to the 0.0.0.0/0 route( if it's even
installed ). If we find a usable route for the nexthop accept
it and use it.
The bgp_default_originate topology test is frequently failing
with this exact problem:
B>* 0.0.0.0/0 [200/0] via 192.168.1.1, r2-r1-eth0, weight 1, 00:00:21
B 1.0.1.17/32 [200/0] via 192.168.0.1 inactive, weight 1, 00:00:21
B>* 1.0.2.17/32 [200/0] via 192.168.1.1, r2-r1-eth0, weight 1, 00:00:21
C>* 1.0.3.17/32 is directly connected, lo, 00:02:00
B>* 1.0.5.17/32 [20/0] via 192.168.2.2, r2-r3-eth1, weight 1, 00:00:32
B>* 192.168.0.0/24 [200/0] via 192.168.1.1, r2-r1-eth0, weight 1, 00:00:21
B 192.168.1.0/24 [200/0] via 192.168.1.1 inactive, weight 1, 00:00:21
C>* 192.168.1.0/24 is directly connected, r2-r1-eth0, 00:02:00
C>* 192.168.2.0/24 is directly connected, r2-r3-eth1, 00:02:00
B>* 192.168.3.0/24 [20/0] via 192.168.2.2, r2-r3-eth1, weight 1, 00:00:32
B 198.51.1.1/32 [200/0] via 192.168.0.1 inactive, weight 1, 00:00:21
B>* 198.51.1.2/32 [20/0] via 192.168.2.2, r2-r3-eth1, weight 1, 00:00:32
Notice that the 1.0.1.17/32 route is inactive but the nexthop
192.168.0.1 is covered by both the 192.168.0.0/24 prefix( shortest match )
*and* the 0.0.0.0/0 route ( longest match ). When looking at the logs
the 1.0.1.17/32 route was not being installed because the matching
route was not in a usable state, which is because the 192.168.0.0/24
route was in the process of being installed.
Signed-off-by: Donald Sharp <sharpd@nvidia.com>
2023-10-19 22:38:12 +02:00
|
|
|
if (CHECK_FLAG(match->status,
|
|
|
|
ROUTE_ENTRY_QUEUED))
|
|
|
|
goto continue_up_tree;
|
|
|
|
|
2020-05-26 23:35:20 +02:00
|
|
|
goto done_with_match;
|
|
|
|
}
|
|
|
|
|
2020-07-17 19:10:29 +02:00
|
|
|
/* Examine installed nexthops; note that there
|
|
|
|
* may not be any installed primary nexthops if
|
|
|
|
* only backups are installed.
|
|
|
|
*/
|
|
|
|
nhg = rib_get_fib_nhg(match);
|
2020-05-08 22:36:26 +02:00
|
|
|
for (ALL_NEXTHOPS_PTR(nhg, newhop)) {
|
2019-09-09 23:20:17 +02:00
|
|
|
if (!nexthop_valid_resolve(nexthop, newhop))
|
2019-05-13 21:46:05 +02:00
|
|
|
continue;
|
|
|
|
|
2020-03-10 15:50:40 +01:00
|
|
|
if (IS_ZEBRA_DEBUG_NHG_DETAIL)
|
2022-06-14 21:02:27 +02:00
|
|
|
zlog_debug(
|
|
|
|
"%s: RECURSIVE match %p (%pNG), newhop %pNHv",
|
|
|
|
__func__, match, match->nhe,
|
|
|
|
newhop);
|
2020-03-10 15:50:40 +01:00
|
|
|
|
2019-05-13 21:46:05 +02:00
|
|
|
SET_FLAG(nexthop->flags,
|
|
|
|
NEXTHOP_FLAG_RECURSIVE);
|
2021-02-22 21:09:07 +01:00
|
|
|
resolver = nexthop_set_resolved(afi, newhop,
|
|
|
|
nexthop, NULL);
|
2019-05-13 21:46:05 +02:00
|
|
|
resolved = 1;
|
2021-02-22 21:09:07 +01:00
|
|
|
|
|
|
|
/* If there are backup nexthops, capture
|
|
|
|
* that info with the resolving nexthop.
|
|
|
|
*/
|
|
|
|
if (resolver && newhop->backup_num > 0) {
|
|
|
|
resolve_backup_nexthops(newhop,
|
|
|
|
match->nhe,
|
|
|
|
resolver, nhe,
|
|
|
|
&map);
|
|
|
|
}
|
2019-05-13 21:46:05 +02:00
|
|
|
}
|
2020-04-13 16:25:48 +02:00
|
|
|
|
2020-05-27 18:52:07 +02:00
|
|
|
/* Examine installed backup nexthops, if any. There
|
|
|
|
* are only installed backups *if* there is a
|
2021-02-22 21:06:28 +01:00
|
|
|
* dedicated fib list. The UI can also control use
|
|
|
|
* of backups for resolution.
|
2020-05-27 18:52:07 +02:00
|
|
|
*/
|
|
|
|
nhg = rib_get_fib_backup_nhg(match);
|
2021-02-22 21:06:28 +01:00
|
|
|
if (!use_recursive_backups ||
|
|
|
|
nhg == NULL || nhg->nexthop == NULL)
|
2020-05-08 22:36:26 +02:00
|
|
|
goto done_with_match;
|
|
|
|
|
|
|
|
for (ALL_NEXTHOPS_PTR(nhg, newhop)) {
|
|
|
|
if (!nexthop_valid_resolve(nexthop, newhop))
|
|
|
|
continue;
|
|
|
|
|
|
|
|
if (IS_ZEBRA_DEBUG_NHG_DETAIL)
|
2022-06-14 21:02:27 +02:00
|
|
|
zlog_debug(
|
|
|
|
"%s: RECURSIVE match backup %p (%pNG), newhop %pNHv",
|
|
|
|
__func__, match, match->nhe,
|
|
|
|
newhop);
|
2020-05-08 22:36:26 +02:00
|
|
|
|
|
|
|
SET_FLAG(nexthop->flags,
|
|
|
|
NEXTHOP_FLAG_RECURSIVE);
|
2020-07-20 13:43:54 +02:00
|
|
|
nexthop_set_resolved(afi, newhop, nexthop,
|
|
|
|
NULL);
|
2020-05-08 22:36:26 +02:00
|
|
|
resolved = 1;
|
|
|
|
}
|
2021-01-29 21:43:52 +01:00
|
|
|
|
2020-05-08 22:36:26 +02:00
|
|
|
done_with_match:
|
2021-01-29 21:43:52 +01:00
|
|
|
/* Capture resolving mtu */
|
|
|
|
if (resolved) {
|
|
|
|
if (pmtu)
|
|
|
|
*pmtu = match->mtu;
|
|
|
|
|
zebra: Allow longer prefix matches for nexthops
Zebra currently does a shortest prefix match for
resolving nexthops for a prefix. This is typically
an ok thing to do but fails in several specific scenarios.
If a nexthop matches to a route that is not usable, nexthop
resolution just gives up and refuses to use that particular
route. For example if zebra currently has a covering prefix
say a 10.0.0.0/8. And about the same time it receives a
10.1.0.0/16 ( a more specific than the /8 ) and another
route A, who's nexthop is 10.1.1.1. Imagine the 10.1.0.0/16
is processed enough to know we want to install it and the
prefix is sent to the dataplane for installation( it is queued )
and then route A is processed, nexthop resolution will fail
and the route A will be left in limbo as uninstallable.
Let's modify the nexthop resolution code in zebra such that
if a nexthop's most specific match is unusable, continue looking
up the table till we get to the 0.0.0.0/0 route( if it's even
installed ). If we find a usable route for the nexthop accept
it and use it.
The bgp_default_originate topology test is frequently failing
with this exact problem:
B>* 0.0.0.0/0 [200/0] via 192.168.1.1, r2-r1-eth0, weight 1, 00:00:21
B 1.0.1.17/32 [200/0] via 192.168.0.1 inactive, weight 1, 00:00:21
B>* 1.0.2.17/32 [200/0] via 192.168.1.1, r2-r1-eth0, weight 1, 00:00:21
C>* 1.0.3.17/32 is directly connected, lo, 00:02:00
B>* 1.0.5.17/32 [20/0] via 192.168.2.2, r2-r3-eth1, weight 1, 00:00:32
B>* 192.168.0.0/24 [200/0] via 192.168.1.1, r2-r1-eth0, weight 1, 00:00:21
B 192.168.1.0/24 [200/0] via 192.168.1.1 inactive, weight 1, 00:00:21
C>* 192.168.1.0/24 is directly connected, r2-r1-eth0, 00:02:00
C>* 192.168.2.0/24 is directly connected, r2-r3-eth1, 00:02:00
B>* 192.168.3.0/24 [20/0] via 192.168.2.2, r2-r3-eth1, weight 1, 00:00:32
B 198.51.1.1/32 [200/0] via 192.168.0.1 inactive, weight 1, 00:00:21
B>* 198.51.1.2/32 [20/0] via 192.168.2.2, r2-r3-eth1, weight 1, 00:00:32
Notice that the 1.0.1.17/32 route is inactive but the nexthop
192.168.0.1 is covered by both the 192.168.0.0/24 prefix( shortest match )
*and* the 0.0.0.0/0 route ( longest match ). When looking at the logs
the 1.0.1.17/32 route was not being installed because the matching
route was not in a usable state, which is because the 192.168.0.0/24
route was in the process of being installed.
Signed-off-by: Donald Sharp <sharpd@nvidia.com>
2023-10-19 22:38:12 +02:00
|
|
|
} else {
|
|
|
|
if (IS_ZEBRA_DEBUG_RIB_DETAILED)
|
2019-05-13 21:46:05 +02:00
|
|
|
zlog_debug(
|
zebra: Allow longer prefix matches for nexthops
Zebra currently does a shortest prefix match for
resolving nexthops for a prefix. This is typically
an ok thing to do but fails in several specific scenarios.
If a nexthop matches to a route that is not usable, nexthop
resolution just gives up and refuses to use that particular
route. For example if zebra currently has a covering prefix
say a 10.0.0.0/8. And about the same time it receives a
10.1.0.0/16 ( a more specific than the /8 ) and another
route A, who's nexthop is 10.1.1.1. Imagine the 10.1.0.0/16
is processed enough to know we want to install it and the
prefix is sent to the dataplane for installation( it is queued )
and then route A is processed, nexthop resolution will fail
and the route A will be left in limbo as uninstallable.
Let's modify the nexthop resolution code in zebra such that
if a nexthop's most specific match is unusable, continue looking
up the table till we get to the 0.0.0.0/0 route( if it's even
installed ). If we find a usable route for the nexthop accept
it and use it.
The bgp_default_originate topology test is frequently failing
with this exact problem:
B>* 0.0.0.0/0 [200/0] via 192.168.1.1, r2-r1-eth0, weight 1, 00:00:21
B 1.0.1.17/32 [200/0] via 192.168.0.1 inactive, weight 1, 00:00:21
B>* 1.0.2.17/32 [200/0] via 192.168.1.1, r2-r1-eth0, weight 1, 00:00:21
C>* 1.0.3.17/32 is directly connected, lo, 00:02:00
B>* 1.0.5.17/32 [20/0] via 192.168.2.2, r2-r3-eth1, weight 1, 00:00:32
B>* 192.168.0.0/24 [200/0] via 192.168.1.1, r2-r1-eth0, weight 1, 00:00:21
B 192.168.1.0/24 [200/0] via 192.168.1.1 inactive, weight 1, 00:00:21
C>* 192.168.1.0/24 is directly connected, r2-r1-eth0, 00:02:00
C>* 192.168.2.0/24 is directly connected, r2-r3-eth1, 00:02:00
B>* 192.168.3.0/24 [20/0] via 192.168.2.2, r2-r3-eth1, weight 1, 00:00:32
B 198.51.1.1/32 [200/0] via 192.168.0.1 inactive, weight 1, 00:00:21
B>* 198.51.1.2/32 [20/0] via 192.168.2.2, r2-r3-eth1, weight 1, 00:00:32
Notice that the 1.0.1.17/32 route is inactive but the nexthop
192.168.0.1 is covered by both the 192.168.0.0/24 prefix( shortest match )
*and* the 0.0.0.0/0 route ( longest match ). When looking at the logs
the 1.0.1.17/32 route was not being installed because the matching
route was not in a usable state, which is because the 192.168.0.0/24
route was in the process of being installed.
Signed-off-by: Donald Sharp <sharpd@nvidia.com>
2023-10-19 22:38:12 +02:00
|
|
|
" %s: Recursion failed to find while looking at %pRN",
|
|
|
|
__func__, rn);
|
|
|
|
goto continue_up_tree;
|
2019-05-13 21:46:05 +02:00
|
|
|
}
|
zebra: Allow longer prefix matches for nexthops
Zebra currently does a shortest prefix match for
resolving nexthops for a prefix. This is typically
an ok thing to do but fails in several specific scenarios.
If a nexthop matches to a route that is not usable, nexthop
resolution just gives up and refuses to use that particular
route. For example if zebra currently has a covering prefix
say a 10.0.0.0/8. And about the same time it receives a
10.1.0.0/16 ( a more specific than the /8 ) and another
route A, who's nexthop is 10.1.1.1. Imagine the 10.1.0.0/16
is processed enough to know we want to install it and the
prefix is sent to the dataplane for installation( it is queued )
and then route A is processed, nexthop resolution will fail
and the route A will be left in limbo as uninstallable.
Let's modify the nexthop resolution code in zebra such that
if a nexthop's most specific match is unusable, continue looking
up the table till we get to the 0.0.0.0/0 route( if it's even
installed ). If we find a usable route for the nexthop accept
it and use it.
The bgp_default_originate topology test is frequently failing
with this exact problem:
B>* 0.0.0.0/0 [200/0] via 192.168.1.1, r2-r1-eth0, weight 1, 00:00:21
B 1.0.1.17/32 [200/0] via 192.168.0.1 inactive, weight 1, 00:00:21
B>* 1.0.2.17/32 [200/0] via 192.168.1.1, r2-r1-eth0, weight 1, 00:00:21
C>* 1.0.3.17/32 is directly connected, lo, 00:02:00
B>* 1.0.5.17/32 [20/0] via 192.168.2.2, r2-r3-eth1, weight 1, 00:00:32
B>* 192.168.0.0/24 [200/0] via 192.168.1.1, r2-r1-eth0, weight 1, 00:00:21
B 192.168.1.0/24 [200/0] via 192.168.1.1 inactive, weight 1, 00:00:21
C>* 192.168.1.0/24 is directly connected, r2-r1-eth0, 00:02:00
C>* 192.168.2.0/24 is directly connected, r2-r3-eth1, 00:02:00
B>* 192.168.3.0/24 [20/0] via 192.168.2.2, r2-r3-eth1, weight 1, 00:00:32
B 198.51.1.1/32 [200/0] via 192.168.0.1 inactive, weight 1, 00:00:21
B>* 198.51.1.2/32 [20/0] via 192.168.2.2, r2-r3-eth1, weight 1, 00:00:32
Notice that the 1.0.1.17/32 route is inactive but the nexthop
192.168.0.1 is covered by both the 192.168.0.0/24 prefix( shortest match )
*and* the 0.0.0.0/0 route ( longest match ). When looking at the logs
the 1.0.1.17/32 route was not being installed because the matching
route was not in a usable state, which is because the 192.168.0.0/24
route was in the process of being installed.
Signed-off-by: Donald Sharp <sharpd@nvidia.com>
2023-10-19 22:38:12 +02:00
|
|
|
|
|
|
|
return 1;
|
|
|
|
} else if (IS_ZEBRA_DEBUG_RIB_DETAILED) {
|
|
|
|
zlog_debug(
|
|
|
|
" %s: Route Type %s has not turned on recursion %pRN failed to match",
|
|
|
|
__func__, zebra_route_string(type), rn);
|
|
|
|
if (type == ZEBRA_ROUTE_BGP
|
|
|
|
&& !CHECK_FLAG(flags, ZEBRA_FLAG_IBGP))
|
|
|
|
zlog_debug(
|
|
|
|
" EBGP: see \"disable-ebgp-connected-route-check\" or \"disable-connected-check\"");
|
2019-05-13 21:46:05 +02:00
|
|
|
}
|
zebra: Allow longer prefix matches for nexthops
Zebra currently does a shortest prefix match for
resolving nexthops for a prefix. This is typically
an ok thing to do but fails in several specific scenarios.
If a nexthop matches to a route that is not usable, nexthop
resolution just gives up and refuses to use that particular
route. For example if zebra currently has a covering prefix
say a 10.0.0.0/8. And about the same time it receives a
10.1.0.0/16 ( a more specific than the /8 ) and another
route A, who's nexthop is 10.1.1.1. Imagine the 10.1.0.0/16
is processed enough to know we want to install it and the
prefix is sent to the dataplane for installation( it is queued )
and then route A is processed, nexthop resolution will fail
and the route A will be left in limbo as uninstallable.
Let's modify the nexthop resolution code in zebra such that
if a nexthop's most specific match is unusable, continue looking
up the table till we get to the 0.0.0.0/0 route( if it's even
installed ). If we find a usable route for the nexthop accept
it and use it.
The bgp_default_originate topology test is frequently failing
with this exact problem:
B>* 0.0.0.0/0 [200/0] via 192.168.1.1, r2-r1-eth0, weight 1, 00:00:21
B 1.0.1.17/32 [200/0] via 192.168.0.1 inactive, weight 1, 00:00:21
B>* 1.0.2.17/32 [200/0] via 192.168.1.1, r2-r1-eth0, weight 1, 00:00:21
C>* 1.0.3.17/32 is directly connected, lo, 00:02:00
B>* 1.0.5.17/32 [20/0] via 192.168.2.2, r2-r3-eth1, weight 1, 00:00:32
B>* 192.168.0.0/24 [200/0] via 192.168.1.1, r2-r1-eth0, weight 1, 00:00:21
B 192.168.1.0/24 [200/0] via 192.168.1.1 inactive, weight 1, 00:00:21
C>* 192.168.1.0/24 is directly connected, r2-r1-eth0, 00:02:00
C>* 192.168.2.0/24 is directly connected, r2-r3-eth1, 00:02:00
B>* 192.168.3.0/24 [20/0] via 192.168.2.2, r2-r3-eth1, weight 1, 00:00:32
B 198.51.1.1/32 [200/0] via 192.168.0.1 inactive, weight 1, 00:00:21
B>* 198.51.1.2/32 [20/0] via 192.168.2.2, r2-r3-eth1, weight 1, 00:00:32
Notice that the 1.0.1.17/32 route is inactive but the nexthop
192.168.0.1 is covered by both the 192.168.0.0/24 prefix( shortest match )
*and* the 0.0.0.0/0 route ( longest match ). When looking at the logs
the 1.0.1.17/32 route was not being installed because the matching
route was not in a usable state, which is because the 192.168.0.0/24
route was in the process of being installed.
Signed-off-by: Donald Sharp <sharpd@nvidia.com>
2023-10-19 22:38:12 +02:00
|
|
|
|
|
|
|
continue_up_tree:
|
|
|
|
/*
|
|
|
|
* If there is no selected route or matched route is EGP, go up
|
|
|
|
* tree.
|
|
|
|
*/
|
|
|
|
do {
|
|
|
|
rn = rn->parent;
|
|
|
|
} while (rn && rn->info == NULL);
|
|
|
|
if (rn)
|
|
|
|
route_lock_node(rn);
|
2019-05-13 21:46:05 +02:00
|
|
|
}
|
zebra: Allow longer prefix matches for nexthops
Zebra currently does a shortest prefix match for
resolving nexthops for a prefix. This is typically
an ok thing to do but fails in several specific scenarios.
If a nexthop matches to a route that is not usable, nexthop
resolution just gives up and refuses to use that particular
route. For example if zebra currently has a covering prefix
say a 10.0.0.0/8. And about the same time it receives a
10.1.0.0/16 ( a more specific than the /8 ) and another
route A, who's nexthop is 10.1.1.1. Imagine the 10.1.0.0/16
is processed enough to know we want to install it and the
prefix is sent to the dataplane for installation( it is queued )
and then route A is processed, nexthop resolution will fail
and the route A will be left in limbo as uninstallable.
Let's modify the nexthop resolution code in zebra such that
if a nexthop's most specific match is unusable, continue looking
up the table till we get to the 0.0.0.0/0 route( if it's even
installed ). If we find a usable route for the nexthop accept
it and use it.
The bgp_default_originate topology test is frequently failing
with this exact problem:
B>* 0.0.0.0/0 [200/0] via 192.168.1.1, r2-r1-eth0, weight 1, 00:00:21
B 1.0.1.17/32 [200/0] via 192.168.0.1 inactive, weight 1, 00:00:21
B>* 1.0.2.17/32 [200/0] via 192.168.1.1, r2-r1-eth0, weight 1, 00:00:21
C>* 1.0.3.17/32 is directly connected, lo, 00:02:00
B>* 1.0.5.17/32 [20/0] via 192.168.2.2, r2-r3-eth1, weight 1, 00:00:32
B>* 192.168.0.0/24 [200/0] via 192.168.1.1, r2-r1-eth0, weight 1, 00:00:21
B 192.168.1.0/24 [200/0] via 192.168.1.1 inactive, weight 1, 00:00:21
C>* 192.168.1.0/24 is directly connected, r2-r1-eth0, 00:02:00
C>* 192.168.2.0/24 is directly connected, r2-r3-eth1, 00:02:00
B>* 192.168.3.0/24 [20/0] via 192.168.2.2, r2-r3-eth1, weight 1, 00:00:32
B 198.51.1.1/32 [200/0] via 192.168.0.1 inactive, weight 1, 00:00:21
B>* 198.51.1.2/32 [20/0] via 192.168.2.2, r2-r3-eth1, weight 1, 00:00:32
Notice that the 1.0.1.17/32 route is inactive but the nexthop
192.168.0.1 is covered by both the 192.168.0.0/24 prefix( shortest match )
*and* the 0.0.0.0/0 route ( longest match ). When looking at the logs
the 1.0.1.17/32 route was not being installed because the matching
route was not in a usable state, which is because the 192.168.0.0/24
route was in the process of being installed.
Signed-off-by: Donald Sharp <sharpd@nvidia.com>
2023-10-19 22:38:12 +02:00
|
|
|
|
2019-05-13 21:46:05 +02:00
|
|
|
if (IS_ZEBRA_DEBUG_RIB_DETAILED)
|
2020-03-24 17:38:20 +01:00
|
|
|
zlog_debug(" %s: Nexthop did not lookup in table",
|
|
|
|
__func__);
|
2019-05-13 21:46:05 +02:00
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
|
|
|
/* This function verifies reachability of one given nexthop, which can be
|
|
|
|
* numbered or unnumbered, IPv4 or IPv6. The result is unconditionally stored
|
|
|
|
* in nexthop->flags field. The nexthop->ifindex will be updated
|
2021-02-22 21:09:07 +01:00
|
|
|
* appropriately as well.
|
|
|
|
*
|
|
|
|
* An existing route map can turn an otherwise active nexthop into inactive,
|
|
|
|
* but not vice versa.
|
2019-05-13 21:46:05 +02:00
|
|
|
*
|
|
|
|
* The return value is the final value of 'ACTIVE' flag.
|
|
|
|
*/
|
|
|
|
static unsigned nexthop_active_check(struct route_node *rn,
|
|
|
|
struct route_entry *re,
|
2021-02-22 21:09:07 +01:00
|
|
|
struct nexthop *nexthop,
|
|
|
|
struct nhg_hash_entry *nhe)
|
2019-05-13 21:46:05 +02:00
|
|
|
{
|
lib: Introducing a 3rd state for route-map match cmd: RMAP_NOOP
Introducing a 3rd state for route_map_apply library function: RMAP_NOOP
Traditionally route map MATCH rule apis were designed to return
a binary response, consisting of either RMAP_MATCH or RMAP_NOMATCH.
(Route-map SET rule apis return RMAP_OKAY or RMAP_ERROR).
Depending on this response, the following statemachine decided the
course of action:
State1:
If match cmd returns RMAP_MATCH then, keep existing behaviour.
If routemap type is PERMIT, execute set cmds or call cmds if applicable,
otherwise PERMIT!
Else If routemap type is DENY, we DENYMATCH right away
State2:
If match cmd returns RMAP_NOMATCH, continue on to next route-map. If there
are no other rules or if all the rules return RMAP_NOMATCH, return DENYMATCH
We require a 3rd state because of the following situation:
The issue - what if, the rule api needs to abort or ignore a rule?:
"match evpn vni xx" route-map filter can be applied to incoming routes
regardless of whether the tunnel type is vxlan or mpls.
This rule should be N/A for mpls based evpn route, but applicable to only
vxlan based evpn route.
Also, this rule should be applicable for routes with VNI label only, and
not for routes without labels. For example, type 3 and type 4 EVPN routes
do not have labels, so, this match cmd should let them through.
Today, the filter produces either a match or nomatch response regardless of
whether it is mpls/vxlan, resulting in either permitting or denying the
route.. So an mpls evpn route may get filtered out incorrectly.
Eg: "route-map RM1 permit 10 ; match evpn vni 20" or
"route-map RM2 deny 20 ; match vni 20"
With the introduction of the 3rd state, we can abort this rule check safely.
How? The rules api can now return RMAP_NOOP to indicate
that it encountered an invalid check, and needs to abort just that rule,
but continue with other rules.
As a result we have a 3rd state:
State3:
If match cmd returned RMAP_NOOP
Then, proceed to other route-map, otherwise if there are no more
rules or if all the rules return RMAP_NOOP, then, return RMAP_PERMITMATCH.
Signed-off-by: Lakshman Krishnamoorthy <lkrishnamoor@vmware.com>
2019-06-19 23:04:36 +02:00
|
|
|
route_map_result_t ret = RMAP_PERMITMATCH;
|
2021-01-11 19:53:42 +01:00
|
|
|
afi_t family;
|
2019-05-13 21:46:05 +02:00
|
|
|
const struct prefix *p, *src_p;
|
|
|
|
struct zebra_vrf *zvrf;
|
2021-01-29 21:43:52 +01:00
|
|
|
uint32_t mtu = 0;
|
2022-01-31 01:44:35 +01:00
|
|
|
vrf_id_t vrf_id;
|
2019-05-13 21:46:05 +02:00
|
|
|
|
|
|
|
srcdest_rnode_prefixes(rn, &p, &src_p);
|
|
|
|
|
|
|
|
if (rn->p.family == AF_INET)
|
|
|
|
family = AFI_IP;
|
|
|
|
else if (rn->p.family == AF_INET6)
|
|
|
|
family = AFI_IP6;
|
|
|
|
else
|
2022-02-07 19:22:41 +01:00
|
|
|
family = AF_UNSPEC;
|
2021-01-21 16:12:05 +01:00
|
|
|
|
2021-01-29 21:43:52 +01:00
|
|
|
if (IS_ZEBRA_DEBUG_NHG_DETAIL)
|
|
|
|
zlog_debug("%s: re %p, nexthop %pNHv", __func__, re, nexthop);
|
|
|
|
|
zebra: fix wrong nexthop check for kernel routes
When changing one interface's vrf, the kernel routes are wrongly kept
in old vrf. Finally, the forwarding table in that old vrf can't forward
traffic correctly for those residual entries.
Follow these steps to make this problem happen:
( Firstly, "x1" interface of default vrf is with address of "6.6.6.6/24". )
```
anlan# ip route add 4.4.4.0/24 via 6.6.6.8 dev x1
anlan# ip link add vrf1 type vrf table 1
anlan# ip link set vrf1 up
anlan# ip link set x1 master vrf1
```
Then check `show ip route`, the route of "4.4.4.0/24" is still selected
in default vrf.
If the interface goes down, the kernel routes will be reevaluated. Those
kernel routes with active interface of nexthop can be kept no change, it
is a fast path. Otherwise, it enters into slow path to do careful examination
on this nexthop.
After the interface's vrf had been changed into new vrf, the down message of
this interface came. It means the interface is not in old vrf although it
still exists during that checking, so the kernel routes should be dropped
after this nexthop matching against a default route in slow path. But, in
current code they are wrongly kept in fast path for not checking vrf.
So, modified the checking active nexthop with vrf comparision for the interface
during reevaluation.
Signed-off-by: anlan_cs <vic.lan@pica8.com>
2023-06-19 04:21:28 +02:00
|
|
|
vrf_id = zvrf_id(rib_dest_vrf(rib_dest_from_rnode(rn)));
|
|
|
|
|
2021-01-29 21:43:52 +01:00
|
|
|
/*
|
zebra: Allow kernel routes to stick around better on interface state changes
Currently kernel routes on system bring up would be `auto-accepted`,
then if an interface went down all kernel and system routes would
be re-evaluated. There exists situations where a kernel route can
exist but the interface itself is not exactly in a state that is
ready to create a connected route yet. As such when any interface
goes down in the system all kernel/system routes would be re-evaluated
and then since that interfaces connected route is not in the table yet
the route is matching against a default route( or not at all ) and
is being dropped.
Modify the code such that kernel or system routes just look for interface
being in a good state (up or operative) and accept it.
Broken code:
eva# show ip route
Codes: K - kernel route, C - connected, S - static, R - RIP,
O - OSPF, I - IS-IS, B - BGP, E - EIGRP, N - NHRP,
T - Table, v - VNC, V - VNC-Direct, A - Babel, D - SHARP,
F - PBR, f - OpenFabric,
> - selected route, * - FIB route, q - queued, r - rejected, b - backup
t - trapped, o - offload failure
K>* 0.0.0.0/0 [0/100] via 192.168.119.1, enp39s0, 00:05:08
K>* 1.2.3.5/32 [0/0] via 172.22.0.44, br-23e378ed7fd2 linkdown, 00:05:08
K>* 1.2.3.6/32 [0/0] via 172.22.0.44, br-23e378ed7fd2 linkdown, 00:05:08
K>* 1.2.3.7/32 [0/0] via 172.22.0.44, br-23e378ed7fd2 linkdown, 00:05:08
K>* 1.2.3.8/32 [0/0] via 172.22.0.44, br-23e378ed7fd2 linkdown, 00:05:08
K>* 1.2.3.9/32 [0/0] via 172.22.0.44, br-23e378ed7fd2 linkdown, 00:05:08
K>* 1.2.3.10/32 [0/0] via 172.22.0.44, br-23e378ed7fd2 linkdown, 00:05:08
K>* 1.2.3.11/32 [0/0] via 172.22.0.44, br-23e378ed7fd2 linkdown, 00:05:08
K>* 1.2.3.12/32 [0/0] via 172.22.0.44, br-23e378ed7fd2 linkdown, 00:05:08
K>* 1.2.3.13/32 [0/0] via 172.22.0.44, br-23e378ed7fd2 linkdown, 00:05:08
K>* 1.2.3.14/32 [0/0] via 172.22.0.44, br-23e378ed7fd2 linkdown, 00:05:08
K>* 1.2.3.16/32 [0/0] via 172.22.0.44, br-23e378ed7fd2 linkdown, 00:05:08
K>* 1.2.3.17/32 [0/0] via 172.22.0.44, br-23e378ed7fd2 linkdown, 00:05:08
C>* 4.5.6.99/32 is directly connected, dummy9, 00:05:08
K>* 4.9.10.11/32 [0/0] via 172.22.0.44, br-23e378ed7fd2 linkdown, 00:05:08
K>* 10.11.12.13/32 [0/0] via 192.168.119.1, enp39s0, 00:05:08
C>* 192.168.10.0/24 is directly connected, dummy99, 00:05:08
C>* 192.168.119.0/24 is directly connected, enp39s0, 00:05:08
<shutdown a non-related interface>
eva# show ip route
Codes: K - kernel route, C - connected, S - static, R - RIP,
O - OSPF, I - IS-IS, B - BGP, E - EIGRP, N - NHRP,
T - Table, v - VNC, V - VNC-Direct, A - Babel, D - SHARP,
F - PBR, f - OpenFabric,
> - selected route, * - FIB route, q - queued, r - rejected, b - backup
t - trapped, o - offload failure
K>* 0.0.0.0/0 [0/100] via 192.168.119.1, enp39s0, 00:05:28
C>* 4.5.6.99/32 is directly connected, dummy9, 00:05:28
K>* 10.11.12.13/32 [0/0] via 192.168.119.1, enp39s0, 00:05:28
C>* 192.168.10.0/24 is directly connected, dummy99, 00:05:28
C>* 192.168.119.0/24 is directly connected, enp39s0, 00:05:28
Working code:
eva# show ip route
Codes: K - kernel route, C - connected, S - static, R - RIP,
O - OSPF, I - IS-IS, B - BGP, E - EIGRP, N - NHRP,
T - Table, v - VNC, V - VNC-Direct, A - Babel, D - SHARP,
F - PBR, f - OpenFabric,
> - selected route, * - FIB route, q - queued, r - rejected, b - backup
t - trapped, o - offload failure
K>* 0.0.0.0/0 [0/100] via 192.168.119.1, enp39s0, 00:00:04
K>* 1.2.3.5/32 [0/0] via 172.22.0.44, br-23e378ed7fd2 linkdown, 00:00:04
K>* 1.2.3.6/32 [0/0] via 172.22.0.44, br-23e378ed7fd2 linkdown, 00:00:04
K>* 1.2.3.7/32 [0/0] via 172.22.0.44, br-23e378ed7fd2 linkdown, 00:00:04
K>* 1.2.3.8/32 [0/0] via 172.22.0.44, br-23e378ed7fd2 linkdown, 00:00:04
K>* 1.2.3.9/32 [0/0] via 172.22.0.44, br-23e378ed7fd2 linkdown, 00:00:04
K>* 1.2.3.10/32 [0/0] via 172.22.0.44, br-23e378ed7fd2 linkdown, 00:00:04
K>* 1.2.3.11/32 [0/0] via 172.22.0.44, br-23e378ed7fd2 linkdown, 00:00:04
K>* 1.2.3.12/32 [0/0] via 172.22.0.44, br-23e378ed7fd2 linkdown, 00:00:04
K>* 1.2.3.13/32 [0/0] via 172.22.0.44, br-23e378ed7fd2 linkdown, 00:00:04
K>* 1.2.3.14/32 [0/0] via 172.22.0.44, br-23e378ed7fd2 linkdown, 00:00:04
K>* 1.2.3.16/32 [0/0] via 172.22.0.44, br-23e378ed7fd2 linkdown, 00:00:04
K>* 1.2.3.17/32 [0/0] via 172.22.0.44, br-23e378ed7fd2 linkdown, 00:00:04
C>* 4.5.6.99/32 is directly connected, dummy9, 00:00:04
K>* 4.9.10.11/32 [0/0] via 172.22.0.44, br-23e378ed7fd2 linkdown, 00:00:04
K>* 10.11.12.13/32 [0/0] via 192.168.119.1, enp39s0, 00:00:04
C>* 192.168.10.0/24 is directly connected, dummy99, 00:00:04
C>* 192.168.119.0/24 is directly connected, enp39s0, 00:00:04
<shutdown a non-related interface>
eva# show ip route
Codes: K - kernel route, C - connected, S - static, R - RIP,
O - OSPF, I - IS-IS, B - BGP, E - EIGRP, N - NHRP,
T - Table, v - VNC, V - VNC-Direct, A - Babel, D - SHARP,
F - PBR, f - OpenFabric,
> - selected route, * - FIB route, q - queued, r - rejected, b - backup
t - trapped, o - offload failure
K>* 0.0.0.0/0 [0/100] via 192.168.119.1, enp39s0, 00:00:15
K>* 1.2.3.5/32 [0/0] via 172.22.0.44, br-23e378ed7fd2 linkdown, 00:00:15
K>* 1.2.3.6/32 [0/0] via 172.22.0.44, br-23e378ed7fd2 linkdown, 00:00:15
K>* 1.2.3.7/32 [0/0] via 172.22.0.44, br-23e378ed7fd2 linkdown, 00:00:15
K>* 1.2.3.8/32 [0/0] via 172.22.0.44, br-23e378ed7fd2 linkdown, 00:00:15
K>* 1.2.3.9/32 [0/0] via 172.22.0.44, br-23e378ed7fd2 linkdown, 00:00:15
K>* 1.2.3.10/32 [0/0] via 172.22.0.44, br-23e378ed7fd2 linkdown, 00:00:15
K>* 1.2.3.11/32 [0/0] via 172.22.0.44, br-23e378ed7fd2 linkdown, 00:00:15
K>* 1.2.3.12/32 [0/0] via 172.22.0.44, br-23e378ed7fd2 linkdown, 00:00:15
K>* 1.2.3.13/32 [0/0] via 172.22.0.44, br-23e378ed7fd2 linkdown, 00:00:15
K>* 1.2.3.14/32 [0/0] via 172.22.0.44, br-23e378ed7fd2 linkdown, 00:00:15
K>* 1.2.3.16/32 [0/0] via 172.22.0.44, br-23e378ed7fd2 linkdown, 00:00:15
K>* 1.2.3.17/32 [0/0] via 172.22.0.44, br-23e378ed7fd2 linkdown, 00:00:15
C>* 4.5.6.99/32 is directly connected, dummy9, 00:00:15
K>* 4.9.10.11/32 [0/0] via 172.22.0.44, br-23e378ed7fd2 linkdown, 00:00:15
K>* 10.11.12.13/32 [0/0] via 192.168.119.1, enp39s0, 00:00:15
C>* 192.168.10.0/24 is directly connected, dummy99, 00:00:15
C>* 192.168.119.0/24 is directly connected, enp39s0, 00:00:15
eva#
Signed-off-by: Donald Sharp <sharpd@nvidia.com>
2022-06-23 18:22:30 +02:00
|
|
|
* If this is a kernel route, then if the interface is *up* then
|
2021-01-29 21:43:52 +01:00
|
|
|
* by golly gee whiz it's a good route.
|
|
|
|
*/
|
zebra: Allow kernel routes to stick around better on interface state changes
Currently kernel routes on system bring up would be `auto-accepted`,
then if an interface went down all kernel and system routes would
be re-evaluated. There exists situations where a kernel route can
exist but the interface itself is not exactly in a state that is
ready to create a connected route yet. As such when any interface
goes down in the system all kernel/system routes would be re-evaluated
and then since that interfaces connected route is not in the table yet
the route is matching against a default route( or not at all ) and
is being dropped.
Modify the code such that kernel or system routes just look for interface
being in a good state (up or operative) and accept it.
Broken code:
eva# show ip route
Codes: K - kernel route, C - connected, S - static, R - RIP,
O - OSPF, I - IS-IS, B - BGP, E - EIGRP, N - NHRP,
T - Table, v - VNC, V - VNC-Direct, A - Babel, D - SHARP,
F - PBR, f - OpenFabric,
> - selected route, * - FIB route, q - queued, r - rejected, b - backup
t - trapped, o - offload failure
K>* 0.0.0.0/0 [0/100] via 192.168.119.1, enp39s0, 00:05:08
K>* 1.2.3.5/32 [0/0] via 172.22.0.44, br-23e378ed7fd2 linkdown, 00:05:08
K>* 1.2.3.6/32 [0/0] via 172.22.0.44, br-23e378ed7fd2 linkdown, 00:05:08
K>* 1.2.3.7/32 [0/0] via 172.22.0.44, br-23e378ed7fd2 linkdown, 00:05:08
K>* 1.2.3.8/32 [0/0] via 172.22.0.44, br-23e378ed7fd2 linkdown, 00:05:08
K>* 1.2.3.9/32 [0/0] via 172.22.0.44, br-23e378ed7fd2 linkdown, 00:05:08
K>* 1.2.3.10/32 [0/0] via 172.22.0.44, br-23e378ed7fd2 linkdown, 00:05:08
K>* 1.2.3.11/32 [0/0] via 172.22.0.44, br-23e378ed7fd2 linkdown, 00:05:08
K>* 1.2.3.12/32 [0/0] via 172.22.0.44, br-23e378ed7fd2 linkdown, 00:05:08
K>* 1.2.3.13/32 [0/0] via 172.22.0.44, br-23e378ed7fd2 linkdown, 00:05:08
K>* 1.2.3.14/32 [0/0] via 172.22.0.44, br-23e378ed7fd2 linkdown, 00:05:08
K>* 1.2.3.16/32 [0/0] via 172.22.0.44, br-23e378ed7fd2 linkdown, 00:05:08
K>* 1.2.3.17/32 [0/0] via 172.22.0.44, br-23e378ed7fd2 linkdown, 00:05:08
C>* 4.5.6.99/32 is directly connected, dummy9, 00:05:08
K>* 4.9.10.11/32 [0/0] via 172.22.0.44, br-23e378ed7fd2 linkdown, 00:05:08
K>* 10.11.12.13/32 [0/0] via 192.168.119.1, enp39s0, 00:05:08
C>* 192.168.10.0/24 is directly connected, dummy99, 00:05:08
C>* 192.168.119.0/24 is directly connected, enp39s0, 00:05:08
<shutdown a non-related interface>
eva# show ip route
Codes: K - kernel route, C - connected, S - static, R - RIP,
O - OSPF, I - IS-IS, B - BGP, E - EIGRP, N - NHRP,
T - Table, v - VNC, V - VNC-Direct, A - Babel, D - SHARP,
F - PBR, f - OpenFabric,
> - selected route, * - FIB route, q - queued, r - rejected, b - backup
t - trapped, o - offload failure
K>* 0.0.0.0/0 [0/100] via 192.168.119.1, enp39s0, 00:05:28
C>* 4.5.6.99/32 is directly connected, dummy9, 00:05:28
K>* 10.11.12.13/32 [0/0] via 192.168.119.1, enp39s0, 00:05:28
C>* 192.168.10.0/24 is directly connected, dummy99, 00:05:28
C>* 192.168.119.0/24 is directly connected, enp39s0, 00:05:28
Working code:
eva# show ip route
Codes: K - kernel route, C - connected, S - static, R - RIP,
O - OSPF, I - IS-IS, B - BGP, E - EIGRP, N - NHRP,
T - Table, v - VNC, V - VNC-Direct, A - Babel, D - SHARP,
F - PBR, f - OpenFabric,
> - selected route, * - FIB route, q - queued, r - rejected, b - backup
t - trapped, o - offload failure
K>* 0.0.0.0/0 [0/100] via 192.168.119.1, enp39s0, 00:00:04
K>* 1.2.3.5/32 [0/0] via 172.22.0.44, br-23e378ed7fd2 linkdown, 00:00:04
K>* 1.2.3.6/32 [0/0] via 172.22.0.44, br-23e378ed7fd2 linkdown, 00:00:04
K>* 1.2.3.7/32 [0/0] via 172.22.0.44, br-23e378ed7fd2 linkdown, 00:00:04
K>* 1.2.3.8/32 [0/0] via 172.22.0.44, br-23e378ed7fd2 linkdown, 00:00:04
K>* 1.2.3.9/32 [0/0] via 172.22.0.44, br-23e378ed7fd2 linkdown, 00:00:04
K>* 1.2.3.10/32 [0/0] via 172.22.0.44, br-23e378ed7fd2 linkdown, 00:00:04
K>* 1.2.3.11/32 [0/0] via 172.22.0.44, br-23e378ed7fd2 linkdown, 00:00:04
K>* 1.2.3.12/32 [0/0] via 172.22.0.44, br-23e378ed7fd2 linkdown, 00:00:04
K>* 1.2.3.13/32 [0/0] via 172.22.0.44, br-23e378ed7fd2 linkdown, 00:00:04
K>* 1.2.3.14/32 [0/0] via 172.22.0.44, br-23e378ed7fd2 linkdown, 00:00:04
K>* 1.2.3.16/32 [0/0] via 172.22.0.44, br-23e378ed7fd2 linkdown, 00:00:04
K>* 1.2.3.17/32 [0/0] via 172.22.0.44, br-23e378ed7fd2 linkdown, 00:00:04
C>* 4.5.6.99/32 is directly connected, dummy9, 00:00:04
K>* 4.9.10.11/32 [0/0] via 172.22.0.44, br-23e378ed7fd2 linkdown, 00:00:04
K>* 10.11.12.13/32 [0/0] via 192.168.119.1, enp39s0, 00:00:04
C>* 192.168.10.0/24 is directly connected, dummy99, 00:00:04
C>* 192.168.119.0/24 is directly connected, enp39s0, 00:00:04
<shutdown a non-related interface>
eva# show ip route
Codes: K - kernel route, C - connected, S - static, R - RIP,
O - OSPF, I - IS-IS, B - BGP, E - EIGRP, N - NHRP,
T - Table, v - VNC, V - VNC-Direct, A - Babel, D - SHARP,
F - PBR, f - OpenFabric,
> - selected route, * - FIB route, q - queued, r - rejected, b - backup
t - trapped, o - offload failure
K>* 0.0.0.0/0 [0/100] via 192.168.119.1, enp39s0, 00:00:15
K>* 1.2.3.5/32 [0/0] via 172.22.0.44, br-23e378ed7fd2 linkdown, 00:00:15
K>* 1.2.3.6/32 [0/0] via 172.22.0.44, br-23e378ed7fd2 linkdown, 00:00:15
K>* 1.2.3.7/32 [0/0] via 172.22.0.44, br-23e378ed7fd2 linkdown, 00:00:15
K>* 1.2.3.8/32 [0/0] via 172.22.0.44, br-23e378ed7fd2 linkdown, 00:00:15
K>* 1.2.3.9/32 [0/0] via 172.22.0.44, br-23e378ed7fd2 linkdown, 00:00:15
K>* 1.2.3.10/32 [0/0] via 172.22.0.44, br-23e378ed7fd2 linkdown, 00:00:15
K>* 1.2.3.11/32 [0/0] via 172.22.0.44, br-23e378ed7fd2 linkdown, 00:00:15
K>* 1.2.3.12/32 [0/0] via 172.22.0.44, br-23e378ed7fd2 linkdown, 00:00:15
K>* 1.2.3.13/32 [0/0] via 172.22.0.44, br-23e378ed7fd2 linkdown, 00:00:15
K>* 1.2.3.14/32 [0/0] via 172.22.0.44, br-23e378ed7fd2 linkdown, 00:00:15
K>* 1.2.3.16/32 [0/0] via 172.22.0.44, br-23e378ed7fd2 linkdown, 00:00:15
K>* 1.2.3.17/32 [0/0] via 172.22.0.44, br-23e378ed7fd2 linkdown, 00:00:15
C>* 4.5.6.99/32 is directly connected, dummy9, 00:00:15
K>* 4.9.10.11/32 [0/0] via 172.22.0.44, br-23e378ed7fd2 linkdown, 00:00:15
K>* 10.11.12.13/32 [0/0] via 192.168.119.1, enp39s0, 00:00:15
C>* 192.168.10.0/24 is directly connected, dummy99, 00:00:15
C>* 192.168.119.0/24 is directly connected, enp39s0, 00:00:15
eva#
Signed-off-by: Donald Sharp <sharpd@nvidia.com>
2022-06-23 18:22:30 +02:00
|
|
|
if (re->type == ZEBRA_ROUTE_KERNEL || re->type == ZEBRA_ROUTE_SYSTEM) {
|
|
|
|
struct interface *ifp;
|
|
|
|
|
|
|
|
ifp = if_lookup_by_index(nexthop->ifindex, nexthop->vrf_id);
|
2021-01-29 21:43:52 +01:00
|
|
|
|
2024-11-30 14:11:04 +01:00
|
|
|
if (ifp && ifp->vrf->vrf_id == vrf_id && if_is_up(ifp) && if_is_operative(ifp)) {
|
zebra: Allow kernel routes to stick around better on interface state changes
Currently kernel routes on system bring up would be `auto-accepted`,
then if an interface went down all kernel and system routes would
be re-evaluated. There exists situations where a kernel route can
exist but the interface itself is not exactly in a state that is
ready to create a connected route yet. As such when any interface
goes down in the system all kernel/system routes would be re-evaluated
and then since that interfaces connected route is not in the table yet
the route is matching against a default route( or not at all ) and
is being dropped.
Modify the code such that kernel or system routes just look for interface
being in a good state (up or operative) and accept it.
Broken code:
eva# show ip route
Codes: K - kernel route, C - connected, S - static, R - RIP,
O - OSPF, I - IS-IS, B - BGP, E - EIGRP, N - NHRP,
T - Table, v - VNC, V - VNC-Direct, A - Babel, D - SHARP,
F - PBR, f - OpenFabric,
> - selected route, * - FIB route, q - queued, r - rejected, b - backup
t - trapped, o - offload failure
K>* 0.0.0.0/0 [0/100] via 192.168.119.1, enp39s0, 00:05:08
K>* 1.2.3.5/32 [0/0] via 172.22.0.44, br-23e378ed7fd2 linkdown, 00:05:08
K>* 1.2.3.6/32 [0/0] via 172.22.0.44, br-23e378ed7fd2 linkdown, 00:05:08
K>* 1.2.3.7/32 [0/0] via 172.22.0.44, br-23e378ed7fd2 linkdown, 00:05:08
K>* 1.2.3.8/32 [0/0] via 172.22.0.44, br-23e378ed7fd2 linkdown, 00:05:08
K>* 1.2.3.9/32 [0/0] via 172.22.0.44, br-23e378ed7fd2 linkdown, 00:05:08
K>* 1.2.3.10/32 [0/0] via 172.22.0.44, br-23e378ed7fd2 linkdown, 00:05:08
K>* 1.2.3.11/32 [0/0] via 172.22.0.44, br-23e378ed7fd2 linkdown, 00:05:08
K>* 1.2.3.12/32 [0/0] via 172.22.0.44, br-23e378ed7fd2 linkdown, 00:05:08
K>* 1.2.3.13/32 [0/0] via 172.22.0.44, br-23e378ed7fd2 linkdown, 00:05:08
K>* 1.2.3.14/32 [0/0] via 172.22.0.44, br-23e378ed7fd2 linkdown, 00:05:08
K>* 1.2.3.16/32 [0/0] via 172.22.0.44, br-23e378ed7fd2 linkdown, 00:05:08
K>* 1.2.3.17/32 [0/0] via 172.22.0.44, br-23e378ed7fd2 linkdown, 00:05:08
C>* 4.5.6.99/32 is directly connected, dummy9, 00:05:08
K>* 4.9.10.11/32 [0/0] via 172.22.0.44, br-23e378ed7fd2 linkdown, 00:05:08
K>* 10.11.12.13/32 [0/0] via 192.168.119.1, enp39s0, 00:05:08
C>* 192.168.10.0/24 is directly connected, dummy99, 00:05:08
C>* 192.168.119.0/24 is directly connected, enp39s0, 00:05:08
<shutdown a non-related interface>
eva# show ip route
Codes: K - kernel route, C - connected, S - static, R - RIP,
O - OSPF, I - IS-IS, B - BGP, E - EIGRP, N - NHRP,
T - Table, v - VNC, V - VNC-Direct, A - Babel, D - SHARP,
F - PBR, f - OpenFabric,
> - selected route, * - FIB route, q - queued, r - rejected, b - backup
t - trapped, o - offload failure
K>* 0.0.0.0/0 [0/100] via 192.168.119.1, enp39s0, 00:05:28
C>* 4.5.6.99/32 is directly connected, dummy9, 00:05:28
K>* 10.11.12.13/32 [0/0] via 192.168.119.1, enp39s0, 00:05:28
C>* 192.168.10.0/24 is directly connected, dummy99, 00:05:28
C>* 192.168.119.0/24 is directly connected, enp39s0, 00:05:28
Working code:
eva# show ip route
Codes: K - kernel route, C - connected, S - static, R - RIP,
O - OSPF, I - IS-IS, B - BGP, E - EIGRP, N - NHRP,
T - Table, v - VNC, V - VNC-Direct, A - Babel, D - SHARP,
F - PBR, f - OpenFabric,
> - selected route, * - FIB route, q - queued, r - rejected, b - backup
t - trapped, o - offload failure
K>* 0.0.0.0/0 [0/100] via 192.168.119.1, enp39s0, 00:00:04
K>* 1.2.3.5/32 [0/0] via 172.22.0.44, br-23e378ed7fd2 linkdown, 00:00:04
K>* 1.2.3.6/32 [0/0] via 172.22.0.44, br-23e378ed7fd2 linkdown, 00:00:04
K>* 1.2.3.7/32 [0/0] via 172.22.0.44, br-23e378ed7fd2 linkdown, 00:00:04
K>* 1.2.3.8/32 [0/0] via 172.22.0.44, br-23e378ed7fd2 linkdown, 00:00:04
K>* 1.2.3.9/32 [0/0] via 172.22.0.44, br-23e378ed7fd2 linkdown, 00:00:04
K>* 1.2.3.10/32 [0/0] via 172.22.0.44, br-23e378ed7fd2 linkdown, 00:00:04
K>* 1.2.3.11/32 [0/0] via 172.22.0.44, br-23e378ed7fd2 linkdown, 00:00:04
K>* 1.2.3.12/32 [0/0] via 172.22.0.44, br-23e378ed7fd2 linkdown, 00:00:04
K>* 1.2.3.13/32 [0/0] via 172.22.0.44, br-23e378ed7fd2 linkdown, 00:00:04
K>* 1.2.3.14/32 [0/0] via 172.22.0.44, br-23e378ed7fd2 linkdown, 00:00:04
K>* 1.2.3.16/32 [0/0] via 172.22.0.44, br-23e378ed7fd2 linkdown, 00:00:04
K>* 1.2.3.17/32 [0/0] via 172.22.0.44, br-23e378ed7fd2 linkdown, 00:00:04
C>* 4.5.6.99/32 is directly connected, dummy9, 00:00:04
K>* 4.9.10.11/32 [0/0] via 172.22.0.44, br-23e378ed7fd2 linkdown, 00:00:04
K>* 10.11.12.13/32 [0/0] via 192.168.119.1, enp39s0, 00:00:04
C>* 192.168.10.0/24 is directly connected, dummy99, 00:00:04
C>* 192.168.119.0/24 is directly connected, enp39s0, 00:00:04
<shutdown a non-related interface>
eva# show ip route
Codes: K - kernel route, C - connected, S - static, R - RIP,
O - OSPF, I - IS-IS, B - BGP, E - EIGRP, N - NHRP,
T - Table, v - VNC, V - VNC-Direct, A - Babel, D - SHARP,
F - PBR, f - OpenFabric,
> - selected route, * - FIB route, q - queued, r - rejected, b - backup
t - trapped, o - offload failure
K>* 0.0.0.0/0 [0/100] via 192.168.119.1, enp39s0, 00:00:15
K>* 1.2.3.5/32 [0/0] via 172.22.0.44, br-23e378ed7fd2 linkdown, 00:00:15
K>* 1.2.3.6/32 [0/0] via 172.22.0.44, br-23e378ed7fd2 linkdown, 00:00:15
K>* 1.2.3.7/32 [0/0] via 172.22.0.44, br-23e378ed7fd2 linkdown, 00:00:15
K>* 1.2.3.8/32 [0/0] via 172.22.0.44, br-23e378ed7fd2 linkdown, 00:00:15
K>* 1.2.3.9/32 [0/0] via 172.22.0.44, br-23e378ed7fd2 linkdown, 00:00:15
K>* 1.2.3.10/32 [0/0] via 172.22.0.44, br-23e378ed7fd2 linkdown, 00:00:15
K>* 1.2.3.11/32 [0/0] via 172.22.0.44, br-23e378ed7fd2 linkdown, 00:00:15
K>* 1.2.3.12/32 [0/0] via 172.22.0.44, br-23e378ed7fd2 linkdown, 00:00:15
K>* 1.2.3.13/32 [0/0] via 172.22.0.44, br-23e378ed7fd2 linkdown, 00:00:15
K>* 1.2.3.14/32 [0/0] via 172.22.0.44, br-23e378ed7fd2 linkdown, 00:00:15
K>* 1.2.3.16/32 [0/0] via 172.22.0.44, br-23e378ed7fd2 linkdown, 00:00:15
K>* 1.2.3.17/32 [0/0] via 172.22.0.44, br-23e378ed7fd2 linkdown, 00:00:15
C>* 4.5.6.99/32 is directly connected, dummy9, 00:00:15
K>* 4.9.10.11/32 [0/0] via 172.22.0.44, br-23e378ed7fd2 linkdown, 00:00:15
K>* 10.11.12.13/32 [0/0] via 192.168.119.1, enp39s0, 00:00:15
C>* 192.168.10.0/24 is directly connected, dummy99, 00:00:15
C>* 192.168.119.0/24 is directly connected, enp39s0, 00:00:15
eva#
Signed-off-by: Donald Sharp <sharpd@nvidia.com>
2022-06-23 18:22:30 +02:00
|
|
|
SET_FLAG(nexthop->flags, NEXTHOP_FLAG_ACTIVE);
|
|
|
|
goto skip_check;
|
|
|
|
}
|
|
|
|
}
|
2022-01-31 01:44:35 +01:00
|
|
|
|
2019-05-13 21:46:05 +02:00
|
|
|
switch (nexthop->type) {
|
|
|
|
case NEXTHOP_TYPE_IFINDEX:
|
2022-01-31 01:44:35 +01:00
|
|
|
if (nexthop_active(nexthop, nhe, &rn->p, re->type, re->flags,
|
|
|
|
&mtu, vrf_id))
|
2019-05-13 21:46:05 +02:00
|
|
|
SET_FLAG(nexthop->flags, NEXTHOP_FLAG_ACTIVE);
|
|
|
|
else
|
|
|
|
UNSET_FLAG(nexthop->flags, NEXTHOP_FLAG_ACTIVE);
|
|
|
|
break;
|
|
|
|
case NEXTHOP_TYPE_IPV4:
|
|
|
|
case NEXTHOP_TYPE_IPV4_IFINDEX:
|
|
|
|
family = AFI_IP;
|
2022-01-31 01:44:35 +01:00
|
|
|
if (nexthop_active(nexthop, nhe, &rn->p, re->type, re->flags,
|
|
|
|
&mtu, vrf_id))
|
2019-05-13 21:46:05 +02:00
|
|
|
SET_FLAG(nexthop->flags, NEXTHOP_FLAG_ACTIVE);
|
|
|
|
else
|
|
|
|
UNSET_FLAG(nexthop->flags, NEXTHOP_FLAG_ACTIVE);
|
|
|
|
break;
|
|
|
|
case NEXTHOP_TYPE_IPV6:
|
|
|
|
case NEXTHOP_TYPE_IPV6_IFINDEX:
|
|
|
|
/* RFC 5549, v4 prefix with v6 NH */
|
|
|
|
if (rn->p.family != AF_INET)
|
|
|
|
family = AFI_IP6;
|
2021-01-29 21:43:52 +01:00
|
|
|
|
2022-01-31 01:44:35 +01:00
|
|
|
if (nexthop_active(nexthop, nhe, &rn->p, re->type, re->flags,
|
|
|
|
&mtu, vrf_id))
|
2021-01-29 21:43:52 +01:00
|
|
|
SET_FLAG(nexthop->flags, NEXTHOP_FLAG_ACTIVE);
|
|
|
|
else
|
|
|
|
UNSET_FLAG(nexthop->flags, NEXTHOP_FLAG_ACTIVE);
|
2019-05-13 21:46:05 +02:00
|
|
|
break;
|
|
|
|
case NEXTHOP_TYPE_BLACKHOLE:
|
|
|
|
SET_FLAG(nexthop->flags, NEXTHOP_FLAG_ACTIVE);
|
|
|
|
break;
|
|
|
|
default:
|
|
|
|
break;
|
|
|
|
}
|
2020-03-10 15:50:40 +01:00
|
|
|
|
2021-01-29 21:43:52 +01:00
|
|
|
skip_check:
|
|
|
|
|
2019-05-13 21:46:05 +02:00
|
|
|
if (!CHECK_FLAG(nexthop->flags, NEXTHOP_FLAG_ACTIVE)) {
|
|
|
|
if (IS_ZEBRA_DEBUG_RIB_DETAILED)
|
2020-03-10 15:50:40 +01:00
|
|
|
zlog_debug(" %s: Unable to find active nexthop",
|
|
|
|
__func__);
|
2019-05-13 21:46:05 +02:00
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
2021-01-29 21:43:52 +01:00
|
|
|
/* Capture recursive nexthop mtu.
|
|
|
|
* TODO -- the code used to just reset the re's value to zero
|
|
|
|
* for each nexthop, and then jam any resolving route's mtu value in,
|
|
|
|
* whether or not that was zero, or lt/gt any existing value? The
|
|
|
|
* way this is used appears to be as a floor value, so let's try
|
|
|
|
* using it that way here.
|
|
|
|
*/
|
|
|
|
if (mtu > 0) {
|
|
|
|
if (re->nexthop_mtu == 0 || re->nexthop_mtu > mtu)
|
|
|
|
re->nexthop_mtu = mtu;
|
|
|
|
}
|
|
|
|
|
2019-05-13 21:46:05 +02:00
|
|
|
/* XXX: What exactly do those checks do? Do we support
|
|
|
|
* e.g. IPv4 routes with IPv6 nexthops or vice versa?
|
|
|
|
*/
|
|
|
|
if (RIB_SYSTEM_ROUTE(re) || (family == AFI_IP && p->family != AF_INET)
|
|
|
|
|| (family == AFI_IP6 && p->family != AF_INET6))
|
|
|
|
return CHECK_FLAG(nexthop->flags, NEXTHOP_FLAG_ACTIVE);
|
|
|
|
|
|
|
|
/* The original code didn't determine the family correctly
|
|
|
|
* e.g. for NEXTHOP_TYPE_IFINDEX. Retrieve the correct afi
|
|
|
|
* from the rib_table_info in those cases.
|
|
|
|
* Possibly it may be better to use only the rib_table_info
|
|
|
|
* in every case.
|
|
|
|
*/
|
2021-01-29 21:43:52 +01:00
|
|
|
if (family == 0) {
|
2020-05-07 14:59:27 +02:00
|
|
|
struct rib_table_info *info;
|
2019-05-13 21:46:05 +02:00
|
|
|
|
|
|
|
info = srcdest_rnode_table_info(rn);
|
|
|
|
family = info->afi;
|
|
|
|
}
|
|
|
|
|
|
|
|
memset(&nexthop->rmap_src.ipv6, 0, sizeof(union g_addr));
|
|
|
|
|
2022-02-28 19:08:01 +01:00
|
|
|
zvrf = zebra_vrf_lookup_by_id(re->vrf_id);
|
2019-05-13 21:46:05 +02:00
|
|
|
if (!zvrf) {
|
|
|
|
if (IS_ZEBRA_DEBUG_RIB_DETAILED)
|
2020-03-24 17:38:20 +01:00
|
|
|
zlog_debug(" %s: zvrf is NULL", __func__);
|
2019-05-13 21:46:05 +02:00
|
|
|
return CHECK_FLAG(nexthop->flags, NEXTHOP_FLAG_ACTIVE);
|
|
|
|
}
|
|
|
|
|
|
|
|
/* It'll get set if required inside */
|
2023-08-11 17:21:03 +02:00
|
|
|
ret = zebra_route_map_check(family, re, p, nexthop, zvrf);
|
2019-05-13 21:46:05 +02:00
|
|
|
if (ret == RMAP_DENYMATCH) {
|
|
|
|
if (IS_ZEBRA_DEBUG_RIB) {
|
|
|
|
zlog_debug(
|
2022-10-19 18:44:55 +02:00
|
|
|
"%u:%pRN: Filtering out with NH %pNHv due to route map",
|
|
|
|
re->vrf_id, rn, nexthop);
|
2019-05-13 21:46:05 +02:00
|
|
|
}
|
|
|
|
UNSET_FLAG(nexthop->flags, NEXTHOP_FLAG_ACTIVE);
|
|
|
|
}
|
|
|
|
return CHECK_FLAG(nexthop->flags, NEXTHOP_FLAG_ACTIVE);
|
|
|
|
}
|
|
|
|
|
2020-01-30 22:43:09 +01:00
|
|
|
/* Helper function called after resolution to walk nhg rb trees
|
|
|
|
* and toggle the NEXTHOP_GROUP_VALID flag if the nexthop
|
|
|
|
* is active on singleton NHEs.
|
|
|
|
*/
|
|
|
|
static bool zebra_nhg_set_valid_if_active(struct nhg_hash_entry *nhe)
|
|
|
|
{
|
|
|
|
struct nhg_connected *rb_node_dep = NULL;
|
|
|
|
bool valid = false;
|
|
|
|
|
|
|
|
if (!zebra_nhg_depends_is_empty(nhe)) {
|
|
|
|
/* Is at least one depend valid? */
|
|
|
|
frr_each(nhg_connected_tree, &nhe->nhg_depends, rb_node_dep) {
|
|
|
|
if (zebra_nhg_set_valid_if_active(rb_node_dep->nhe))
|
|
|
|
valid = true;
|
|
|
|
}
|
|
|
|
|
|
|
|
goto done;
|
|
|
|
}
|
|
|
|
|
|
|
|
/* should be fully resolved singleton at this point */
|
|
|
|
if (CHECK_FLAG(nhe->nhg.nexthop->flags, NEXTHOP_FLAG_ACTIVE))
|
|
|
|
valid = true;
|
|
|
|
|
|
|
|
done:
|
|
|
|
if (valid)
|
|
|
|
SET_FLAG(nhe->flags, NEXTHOP_GROUP_VALID);
|
|
|
|
|
|
|
|
return valid;
|
|
|
|
}
|
|
|
|
|
2021-04-05 23:16:38 +02:00
|
|
|
/* Checks if the first nexthop is EVPN. If not, early return.
|
|
|
|
*
|
|
|
|
* This is used to determine if there is a mismatch between l3VNI
|
|
|
|
* of the route's vrf and the nexthops in use's VNI labels.
|
|
|
|
*
|
|
|
|
* If there is a mismatch, we keep the labels as these MUST be DVNI nexthops.
|
|
|
|
*
|
|
|
|
* IF there is no mismatch, we remove the labels and handle the routes as
|
|
|
|
* we have traditionally with evpn.
|
|
|
|
*/
|
|
|
|
static bool nexthop_list_set_evpn_dvni(struct route_entry *re,
|
|
|
|
struct nexthop_group *nhg)
|
|
|
|
{
|
|
|
|
struct nexthop *nexthop;
|
|
|
|
vni_t re_vrf_vni;
|
|
|
|
vni_t nh_vni;
|
|
|
|
bool use_dvni = false;
|
|
|
|
|
|
|
|
nexthop = nhg->nexthop;
|
|
|
|
|
|
|
|
if (!nexthop->nh_label || nexthop->nh_label_type != ZEBRA_LSP_EVPN)
|
|
|
|
return false;
|
|
|
|
|
|
|
|
re_vrf_vni = get_l3vni_vni(re->vrf_id);
|
|
|
|
|
|
|
|
for (; nexthop; nexthop = nexthop->next) {
|
2022-11-16 19:15:32 +01:00
|
|
|
if (!nexthop->nh_label ||
|
|
|
|
nexthop->nh_label_type != ZEBRA_LSP_EVPN)
|
|
|
|
continue;
|
|
|
|
|
2021-04-05 23:16:38 +02:00
|
|
|
nh_vni = label2vni(&nexthop->nh_label->label[0]);
|
|
|
|
|
|
|
|
if (nh_vni != re_vrf_vni)
|
|
|
|
use_dvni = true;
|
|
|
|
}
|
|
|
|
|
|
|
|
/* Using traditional way, no VNI encap - remove labels */
|
|
|
|
if (!use_dvni) {
|
|
|
|
for (nexthop = nhg->nexthop; nexthop; nexthop = nexthop->next)
|
|
|
|
nexthop_del_labels(nexthop);
|
|
|
|
}
|
|
|
|
|
|
|
|
return use_dvni;
|
|
|
|
}
|
|
|
|
|
2019-05-13 21:46:05 +02:00
|
|
|
/*
|
2021-02-22 21:09:07 +01:00
|
|
|
* Process a list of nexthops, given an nhe, determining
|
2020-03-10 15:50:40 +01:00
|
|
|
* whether each one is ACTIVE/installable at this time.
|
2019-05-13 21:46:05 +02:00
|
|
|
*/
|
2020-03-10 15:50:40 +01:00
|
|
|
static uint32_t nexthop_list_active_update(struct route_node *rn,
|
|
|
|
struct route_entry *re,
|
2021-02-22 21:09:07 +01:00
|
|
|
struct nhg_hash_entry *nhe,
|
|
|
|
bool is_backup)
|
2019-05-13 21:46:05 +02:00
|
|
|
{
|
|
|
|
union g_addr prev_src;
|
|
|
|
unsigned int prev_active, new_active;
|
|
|
|
ifindex_t prev_index;
|
2020-03-10 15:50:40 +01:00
|
|
|
uint32_t counter = 0;
|
2020-05-08 22:36:26 +02:00
|
|
|
struct nexthop *nexthop;
|
2021-02-22 21:09:07 +01:00
|
|
|
struct nexthop_group *nhg = &nhe->nhg;
|
2021-04-05 23:16:38 +02:00
|
|
|
bool vni_removed = false;
|
2020-05-08 22:36:26 +02:00
|
|
|
|
|
|
|
nexthop = nhg->nexthop;
|
2019-05-15 00:03:29 +02:00
|
|
|
|
2021-01-29 21:43:52 +01:00
|
|
|
/* Init recursive nh mtu */
|
|
|
|
re->nexthop_mtu = 0;
|
|
|
|
|
2021-04-05 23:16:38 +02:00
|
|
|
/* Handler for dvni evpn nexthops. Has to be done at nhg level */
|
|
|
|
vni_removed = !nexthop_list_set_evpn_dvni(re, nhg);
|
|
|
|
|
2020-03-10 15:50:40 +01:00
|
|
|
/* Process nexthops one-by-one */
|
|
|
|
for ( ; nexthop; nexthop = nexthop->next) {
|
2019-05-13 21:46:05 +02:00
|
|
|
|
|
|
|
/* No protocol daemon provides src and so we're skipping
|
2020-03-10 15:50:40 +01:00
|
|
|
* tracking it
|
|
|
|
*/
|
2019-05-13 21:46:05 +02:00
|
|
|
prev_src = nexthop->rmap_src;
|
|
|
|
prev_active = CHECK_FLAG(nexthop->flags, NEXTHOP_FLAG_ACTIVE);
|
|
|
|
prev_index = nexthop->ifindex;
|
2021-02-22 21:09:07 +01:00
|
|
|
|
|
|
|
/* Include the containing nhe for primary nexthops: if there's
|
|
|
|
* recursive resolution, we capture the backup info also.
|
|
|
|
*/
|
|
|
|
new_active =
|
|
|
|
nexthop_active_check(rn, re, nexthop,
|
|
|
|
(is_backup ? NULL : nhe));
|
|
|
|
|
2019-05-13 21:46:05 +02:00
|
|
|
/*
|
|
|
|
* We need to respect the multipath_num here
|
|
|
|
* as that what we should be able to install from
|
2020-03-10 15:50:40 +01:00
|
|
|
* a multipath perspective should not be a data plane
|
2019-05-13 21:46:05 +02:00
|
|
|
* decision point.
|
|
|
|
*/
|
2020-03-10 15:50:40 +01:00
|
|
|
if (new_active && counter >= zrouter.multipath_num) {
|
2019-11-21 21:53:59 +01:00
|
|
|
struct nexthop *nh;
|
|
|
|
|
|
|
|
/* Set it and its resolved nexthop as inactive. */
|
|
|
|
for (nh = nexthop; nh; nh = nh->resolved)
|
|
|
|
UNSET_FLAG(nh->flags, NEXTHOP_FLAG_ACTIVE);
|
|
|
|
|
2019-05-13 21:46:05 +02:00
|
|
|
new_active = 0;
|
|
|
|
}
|
2019-02-15 17:39:12 +01:00
|
|
|
|
2019-07-02 07:37:17 +02:00
|
|
|
if (new_active)
|
2020-03-10 15:50:40 +01:00
|
|
|
counter++;
|
2019-02-15 17:39:12 +01:00
|
|
|
|
2021-02-22 21:09:07 +01:00
|
|
|
/* Check for changes to the nexthop - set ROUTE_ENTRY_CHANGED */
|
2022-12-10 00:51:22 +01:00
|
|
|
if (prev_active != new_active ||
|
|
|
|
prev_index != nexthop->ifindex ||
|
|
|
|
((nexthop->type >= NEXTHOP_TYPE_IFINDEX &&
|
|
|
|
nexthop->type < NEXTHOP_TYPE_IPV6) &&
|
|
|
|
prev_src.ipv4.s_addr != nexthop->rmap_src.ipv4.s_addr) ||
|
|
|
|
((nexthop->type >= NEXTHOP_TYPE_IPV6 &&
|
|
|
|
nexthop->type < NEXTHOP_TYPE_BLACKHOLE) &&
|
|
|
|
!(IPV6_ADDR_SAME(&prev_src.ipv6,
|
|
|
|
&nexthop->rmap_src.ipv6))) ||
|
|
|
|
CHECK_FLAG(re->status, ROUTE_ENTRY_LABELS_CHANGED) ||
|
|
|
|
vni_removed)
|
2019-05-13 21:46:05 +02:00
|
|
|
SET_FLAG(re->status, ROUTE_ENTRY_CHANGED);
|
|
|
|
}
|
|
|
|
|
2020-03-10 15:50:40 +01:00
|
|
|
return counter;
|
|
|
|
}
|
|
|
|
|
2020-05-10 23:34:27 +02:00
|
|
|
|
|
|
|
static uint32_t proto_nhg_nexthop_active_update(struct nexthop_group *nhg)
|
|
|
|
{
|
|
|
|
struct nexthop *nh;
|
|
|
|
uint32_t curr_active = 0;
|
|
|
|
|
|
|
|
/* Assume all active for now */
|
|
|
|
|
|
|
|
for (nh = nhg->nexthop; nh; nh = nh->next) {
|
|
|
|
SET_FLAG(nh->flags, NEXTHOP_FLAG_ACTIVE);
|
|
|
|
curr_active++;
|
|
|
|
}
|
|
|
|
|
|
|
|
return curr_active;
|
|
|
|
}
|
|
|
|
|
zebra: Attempt to reuse NHG after interface up and route reinstall
The previous commit modified zebra to reinstall the singleton
nexthops for a nexthop group when a interface event comes up.
Now let's modify zebra to attempt to reuse the nexthop group
when this happens and the upper level protocol resends the
route down with that. Only match if the protocol is the same
as well as the instance and the nexthop groups would match.
Here is the new behavior:
eva(config)# do show ip route 9.9.9.9/32
Routing entry for 9.9.9.9/32
Known via "static", distance 1, metric 0, best
Last update 00:00:08 ago
* 192.168.99.33, via dummy1, weight 1
* 192.168.100.33, via dummy2, weight 1
* 192.168.101.33, via dummy3, weight 1
* 192.168.102.33, via dummy4, weight 1
eva(config)# do show ip route nexthop-group 9.9.9.9/32
% Unknown command: do show ip route nexthop-group 9.9.9.9/32
eva(config)# do show ip route 9.9.9.9/32 nexthop-group
Routing entry for 9.9.9.9/32
Known via "static", distance 1, metric 0, best
Last update 00:00:54 ago
Nexthop Group ID: 57
* 192.168.99.33, via dummy1, weight 1
* 192.168.100.33, via dummy2, weight 1
* 192.168.101.33, via dummy3, weight 1
* 192.168.102.33, via dummy4, weight 1
eva(config)# exit
eva# conf
eva(config)# int dummy3
eva(config-if)# shut
eva(config-if)# no shut
eva(config-if)# do show ip route 9.9.9.9/32 nexthop-group
Routing entry for 9.9.9.9/32
Known via "static", distance 1, metric 0, best
Last update 00:00:08 ago
Nexthop Group ID: 57
* 192.168.99.33, via dummy1, weight 1
* 192.168.100.33, via dummy2, weight 1
* 192.168.101.33, via dummy3, weight 1
* 192.168.102.33, via dummy4, weight 1
eva(config-if)# exit
eva(config)# exit
eva# exit
sharpd@eva ~/frr1 (master) [255]> ip nexthop show id 57
id 57 group 37/43/50/58 proto zebra
sharpd@eva ~/frr1 (master)> ip route show 9.9.9.9/32
9.9.9.9 nhid 57 proto 196 metric 20
nexthop via 192.168.99.33 dev dummy1 weight 1
nexthop via 192.168.100.33 dev dummy2 weight 1
nexthop via 192.168.101.33 dev dummy3 weight 1
nexthop via 192.168.102.33 dev dummy4 weight 1
sharpd@eva ~/frr1 (master)>
Notice that we now no longer are creating a bunch of new
nexthop groups.
Signed-off-by: Donald Sharp <sharpd@nvidia.com>
2024-09-11 20:24:27 +02:00
|
|
|
/*
|
|
|
|
* This function takes the start of two comparable nexthops from two different
|
|
|
|
* nexthop groups and walks them to see if they can be considered the same
|
|
|
|
* or not. This is being used to determine if zebra should reuse a nhg
|
|
|
|
* from the old_re to the new_re, when an interface goes down and the
|
|
|
|
* new nhg sent down from the upper level protocol would resolve to it
|
|
|
|
*/
|
|
|
|
static bool zebra_nhg_nexthop_compare(const struct nexthop *nhop,
|
|
|
|
const struct nexthop *old_nhop,
|
|
|
|
const struct route_node *rn)
|
|
|
|
{
|
|
|
|
bool same = true;
|
|
|
|
|
|
|
|
while (nhop && old_nhop) {
|
|
|
|
if (IS_ZEBRA_DEBUG_NHG_DETAIL)
|
|
|
|
zlog_debug("%s: %pRN Comparing %pNHvv(%u) to old: %pNHvv(%u)",
|
|
|
|
__func__, rn, nhop, nhop->flags, old_nhop,
|
|
|
|
old_nhop->flags);
|
|
|
|
if (!CHECK_FLAG(old_nhop->flags, NEXTHOP_FLAG_ACTIVE)) {
|
|
|
|
if (IS_ZEBRA_DEBUG_NHG_DETAIL)
|
|
|
|
zlog_debug("%s: %pRN Old is not active going to the next one",
|
|
|
|
__func__, rn);
|
|
|
|
old_nhop = old_nhop->next;
|
|
|
|
continue;
|
|
|
|
}
|
|
|
|
|
|
|
|
if (nexthop_same(nhop, old_nhop)) {
|
|
|
|
struct nexthop *new_recursive, *old_recursive;
|
|
|
|
|
|
|
|
if (IS_ZEBRA_DEBUG_NHG_DETAIL)
|
|
|
|
zlog_debug("%s: %pRN New and old are same, continuing search",
|
|
|
|
__func__, rn);
|
|
|
|
|
|
|
|
new_recursive = nhop->resolved;
|
|
|
|
old_recursive = old_nhop->resolved;
|
|
|
|
|
|
|
|
while (new_recursive && old_recursive) {
|
|
|
|
if (!nexthop_same(new_recursive, old_recursive)) {
|
|
|
|
same = false;
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
|
|
|
|
new_recursive = new_recursive->next;
|
|
|
|
old_recursive = old_recursive->next;
|
|
|
|
}
|
|
|
|
|
|
|
|
if (new_recursive)
|
|
|
|
same = false;
|
|
|
|
else if (old_recursive) {
|
|
|
|
while (old_recursive) {
|
|
|
|
if (CHECK_FLAG(old_recursive->flags,
|
|
|
|
NEXTHOP_FLAG_ACTIVE))
|
|
|
|
break;
|
|
|
|
old_recursive = old_recursive->next;
|
|
|
|
}
|
|
|
|
|
|
|
|
if (old_recursive)
|
|
|
|
same = false;
|
|
|
|
}
|
|
|
|
|
|
|
|
if (!same)
|
|
|
|
break;
|
|
|
|
|
|
|
|
nhop = nhop->next;
|
|
|
|
old_nhop = old_nhop->next;
|
|
|
|
continue;
|
|
|
|
} else {
|
|
|
|
if (IS_ZEBRA_DEBUG_NHG_DETAIL)
|
|
|
|
zlog_debug("%s:%pRN They are not the same, stopping using new nexthop entry",
|
|
|
|
__func__, rn);
|
|
|
|
same = false;
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
if (nhop)
|
|
|
|
same = false;
|
|
|
|
else if (old_nhop) {
|
|
|
|
while (old_nhop) {
|
|
|
|
if (CHECK_FLAG(old_nhop->flags, NEXTHOP_FLAG_ACTIVE))
|
|
|
|
break;
|
|
|
|
old_nhop = old_nhop->next;
|
|
|
|
}
|
|
|
|
|
|
|
|
if (old_nhop)
|
|
|
|
same = false;
|
|
|
|
}
|
|
|
|
|
|
|
|
return same;
|
|
|
|
}
|
|
|
|
|
|
|
|
static struct nhg_hash_entry *zebra_nhg_rib_compare_old_nhe(
|
|
|
|
const struct route_node *rn, const struct route_entry *re,
|
|
|
|
struct nhg_hash_entry *new_nhe, struct nhg_hash_entry *old_nhe)
|
|
|
|
{
|
|
|
|
struct nexthop *nhop, *old_nhop;
|
|
|
|
bool same = true;
|
|
|
|
struct vrf *vrf = vrf_lookup_by_id(re->vrf_id);
|
|
|
|
|
|
|
|
if (IS_ZEBRA_DEBUG_NHG_DETAIL) {
|
|
|
|
char straddr[PREFIX_STRLEN];
|
|
|
|
|
|
|
|
prefix2str(&rn->p, straddr, sizeof(straddr));
|
|
|
|
zlog_debug("%s: %pRN new id: %u old id: %u", __func__, rn,
|
|
|
|
new_nhe->id, old_nhe->id);
|
|
|
|
zlog_debug("%s: %pRN NEW", __func__, rn);
|
|
|
|
for (ALL_NEXTHOPS(new_nhe->nhg, nhop))
|
|
|
|
route_entry_dump_nh(re, straddr, vrf, nhop);
|
|
|
|
|
|
|
|
zlog_debug("%s: %pRN OLD", __func__, rn);
|
|
|
|
for (ALL_NEXTHOPS(old_nhe->nhg, nhop))
|
|
|
|
route_entry_dump_nh(re, straddr, vrf, nhop);
|
|
|
|
}
|
|
|
|
|
|
|
|
nhop = new_nhe->nhg.nexthop;
|
|
|
|
old_nhop = old_nhe->nhg.nexthop;
|
|
|
|
|
|
|
|
same = zebra_nhg_nexthop_compare(nhop, old_nhop, rn);
|
|
|
|
|
|
|
|
if (same) {
|
|
|
|
struct nexthop_group *bnhg, *old_bnhg;
|
|
|
|
|
|
|
|
bnhg = zebra_nhg_get_backup_nhg(new_nhe);
|
|
|
|
old_bnhg = zebra_nhg_get_backup_nhg(old_nhe);
|
|
|
|
|
|
|
|
if (bnhg || old_bnhg) {
|
|
|
|
if (bnhg && !old_bnhg)
|
|
|
|
same = false;
|
|
|
|
else if (!bnhg && old_bnhg)
|
|
|
|
same = false;
|
|
|
|
else
|
|
|
|
same = zebra_nhg_nexthop_compare(bnhg->nexthop,
|
|
|
|
old_bnhg->nexthop,
|
|
|
|
rn);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
if (IS_ZEBRA_DEBUG_NHG_DETAIL)
|
|
|
|
zlog_debug("%s:%pRN They are %sthe same, using the %s nhg entry",
|
|
|
|
__func__, rn, same ? "" : "not ",
|
|
|
|
same ? "old" : "new");
|
|
|
|
|
|
|
|
if (same)
|
|
|
|
return old_nhe;
|
|
|
|
else
|
|
|
|
return new_nhe;
|
|
|
|
}
|
|
|
|
|
2020-03-10 15:50:40 +01:00
|
|
|
/*
|
|
|
|
* Iterate over all nexthops of the given RIB entry and refresh their
|
|
|
|
* ACTIVE flag. If any nexthop is found to toggle the ACTIVE flag,
|
|
|
|
* the whole re structure is flagged with ROUTE_ENTRY_CHANGED.
|
|
|
|
*
|
|
|
|
* Return value is the new number of active nexthops.
|
|
|
|
*/
|
zebra: Attempt to reuse NHG after interface up and route reinstall
The previous commit modified zebra to reinstall the singleton
nexthops for a nexthop group when a interface event comes up.
Now let's modify zebra to attempt to reuse the nexthop group
when this happens and the upper level protocol resends the
route down with that. Only match if the protocol is the same
as well as the instance and the nexthop groups would match.
Here is the new behavior:
eva(config)# do show ip route 9.9.9.9/32
Routing entry for 9.9.9.9/32
Known via "static", distance 1, metric 0, best
Last update 00:00:08 ago
* 192.168.99.33, via dummy1, weight 1
* 192.168.100.33, via dummy2, weight 1
* 192.168.101.33, via dummy3, weight 1
* 192.168.102.33, via dummy4, weight 1
eva(config)# do show ip route nexthop-group 9.9.9.9/32
% Unknown command: do show ip route nexthop-group 9.9.9.9/32
eva(config)# do show ip route 9.9.9.9/32 nexthop-group
Routing entry for 9.9.9.9/32
Known via "static", distance 1, metric 0, best
Last update 00:00:54 ago
Nexthop Group ID: 57
* 192.168.99.33, via dummy1, weight 1
* 192.168.100.33, via dummy2, weight 1
* 192.168.101.33, via dummy3, weight 1
* 192.168.102.33, via dummy4, weight 1
eva(config)# exit
eva# conf
eva(config)# int dummy3
eva(config-if)# shut
eva(config-if)# no shut
eva(config-if)# do show ip route 9.9.9.9/32 nexthop-group
Routing entry for 9.9.9.9/32
Known via "static", distance 1, metric 0, best
Last update 00:00:08 ago
Nexthop Group ID: 57
* 192.168.99.33, via dummy1, weight 1
* 192.168.100.33, via dummy2, weight 1
* 192.168.101.33, via dummy3, weight 1
* 192.168.102.33, via dummy4, weight 1
eva(config-if)# exit
eva(config)# exit
eva# exit
sharpd@eva ~/frr1 (master) [255]> ip nexthop show id 57
id 57 group 37/43/50/58 proto zebra
sharpd@eva ~/frr1 (master)> ip route show 9.9.9.9/32
9.9.9.9 nhid 57 proto 196 metric 20
nexthop via 192.168.99.33 dev dummy1 weight 1
nexthop via 192.168.100.33 dev dummy2 weight 1
nexthop via 192.168.101.33 dev dummy3 weight 1
nexthop via 192.168.102.33 dev dummy4 weight 1
sharpd@eva ~/frr1 (master)>
Notice that we now no longer are creating a bunch of new
nexthop groups.
Signed-off-by: Donald Sharp <sharpd@nvidia.com>
2024-09-11 20:24:27 +02:00
|
|
|
int nexthop_active_update(struct route_node *rn, struct route_entry *re,
|
|
|
|
struct route_entry *old_re)
|
2020-03-10 15:50:40 +01:00
|
|
|
{
|
2025-01-09 18:34:50 +01:00
|
|
|
struct nhg_hash_entry *curr_nhe, *remove;
|
2020-03-10 15:50:40 +01:00
|
|
|
uint32_t curr_active = 0, backup_active = 0;
|
|
|
|
|
2021-04-22 23:16:57 +02:00
|
|
|
if (PROTO_OWNED(re->nhe))
|
2020-05-10 23:34:27 +02:00
|
|
|
return proto_nhg_nexthop_active_update(&re->nhe->nhg);
|
|
|
|
|
2020-03-10 15:50:40 +01:00
|
|
|
afi_t rt_afi = family2afi(rn->p.family);
|
|
|
|
|
|
|
|
UNSET_FLAG(re->status, ROUTE_ENTRY_CHANGED);
|
|
|
|
|
|
|
|
/* Make a local copy of the existing nhe, so we don't work on/modify
|
|
|
|
* the shared nhe.
|
|
|
|
*/
|
2020-05-05 23:03:33 +02:00
|
|
|
curr_nhe = zebra_nhe_copy(re->nhe, re->nhe->id);
|
2020-03-10 15:50:40 +01:00
|
|
|
|
|
|
|
if (IS_ZEBRA_DEBUG_NHG_DETAIL)
|
2022-06-14 21:02:27 +02:00
|
|
|
zlog_debug("%s: re %p nhe %p (%pNG), curr_nhe %p", __func__, re,
|
|
|
|
re->nhe, re->nhe, curr_nhe);
|
2020-03-10 15:50:40 +01:00
|
|
|
|
|
|
|
/* Clear the existing id, if any: this will avoid any confusion
|
|
|
|
* if the id exists, and will also force the creation
|
|
|
|
* of a new nhe reflecting the changes we may make in this local copy.
|
|
|
|
*/
|
|
|
|
curr_nhe->id = 0;
|
|
|
|
|
|
|
|
/* Process nexthops */
|
2021-02-22 21:09:07 +01:00
|
|
|
curr_active = nexthop_list_active_update(rn, re, curr_nhe, false);
|
2020-03-10 15:50:40 +01:00
|
|
|
|
|
|
|
if (IS_ZEBRA_DEBUG_NHG_DETAIL)
|
|
|
|
zlog_debug("%s: re %p curr_active %u", __func__, re,
|
|
|
|
curr_active);
|
|
|
|
|
|
|
|
/* If there are no backup nexthops, we are done */
|
|
|
|
if (zebra_nhg_get_backup_nhg(curr_nhe) == NULL)
|
|
|
|
goto backups_done;
|
|
|
|
|
|
|
|
backup_active = nexthop_list_active_update(
|
2021-02-22 21:09:07 +01:00
|
|
|
rn, re, curr_nhe->backup_info->nhe, true /*is_backup*/);
|
2020-03-10 15:50:40 +01:00
|
|
|
|
|
|
|
if (IS_ZEBRA_DEBUG_NHG_DETAIL)
|
|
|
|
zlog_debug("%s: re %p backup_active %u", __func__, re,
|
|
|
|
backup_active);
|
|
|
|
|
|
|
|
backups_done:
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Ref or create an nhe that matches the current state of the
|
|
|
|
* nexthop(s).
|
|
|
|
*/
|
2019-07-03 18:09:20 +02:00
|
|
|
if (CHECK_FLAG(re->status, ROUTE_ENTRY_CHANGED)) {
|
2019-05-15 00:27:40 +02:00
|
|
|
struct nhg_hash_entry *new_nhe = NULL;
|
|
|
|
|
2020-03-10 15:50:40 +01:00
|
|
|
new_nhe = zebra_nhg_rib_find_nhe(curr_nhe, rt_afi);
|
|
|
|
|
2025-01-09 18:34:50 +01:00
|
|
|
remove = new_nhe;
|
|
|
|
|
zebra: Attempt to reuse NHG after interface up and route reinstall
The previous commit modified zebra to reinstall the singleton
nexthops for a nexthop group when a interface event comes up.
Now let's modify zebra to attempt to reuse the nexthop group
when this happens and the upper level protocol resends the
route down with that. Only match if the protocol is the same
as well as the instance and the nexthop groups would match.
Here is the new behavior:
eva(config)# do show ip route 9.9.9.9/32
Routing entry for 9.9.9.9/32
Known via "static", distance 1, metric 0, best
Last update 00:00:08 ago
* 192.168.99.33, via dummy1, weight 1
* 192.168.100.33, via dummy2, weight 1
* 192.168.101.33, via dummy3, weight 1
* 192.168.102.33, via dummy4, weight 1
eva(config)# do show ip route nexthop-group 9.9.9.9/32
% Unknown command: do show ip route nexthop-group 9.9.9.9/32
eva(config)# do show ip route 9.9.9.9/32 nexthop-group
Routing entry for 9.9.9.9/32
Known via "static", distance 1, metric 0, best
Last update 00:00:54 ago
Nexthop Group ID: 57
* 192.168.99.33, via dummy1, weight 1
* 192.168.100.33, via dummy2, weight 1
* 192.168.101.33, via dummy3, weight 1
* 192.168.102.33, via dummy4, weight 1
eva(config)# exit
eva# conf
eva(config)# int dummy3
eva(config-if)# shut
eva(config-if)# no shut
eva(config-if)# do show ip route 9.9.9.9/32 nexthop-group
Routing entry for 9.9.9.9/32
Known via "static", distance 1, metric 0, best
Last update 00:00:08 ago
Nexthop Group ID: 57
* 192.168.99.33, via dummy1, weight 1
* 192.168.100.33, via dummy2, weight 1
* 192.168.101.33, via dummy3, weight 1
* 192.168.102.33, via dummy4, weight 1
eva(config-if)# exit
eva(config)# exit
eva# exit
sharpd@eva ~/frr1 (master) [255]> ip nexthop show id 57
id 57 group 37/43/50/58 proto zebra
sharpd@eva ~/frr1 (master)> ip route show 9.9.9.9/32
9.9.9.9 nhid 57 proto 196 metric 20
nexthop via 192.168.99.33 dev dummy1 weight 1
nexthop via 192.168.100.33 dev dummy2 weight 1
nexthop via 192.168.101.33 dev dummy3 weight 1
nexthop via 192.168.102.33 dev dummy4 weight 1
sharpd@eva ~/frr1 (master)>
Notice that we now no longer are creating a bunch of new
nexthop groups.
Signed-off-by: Donald Sharp <sharpd@nvidia.com>
2024-09-11 20:24:27 +02:00
|
|
|
if (old_re && old_re->type == re->type &&
|
|
|
|
old_re->instance == re->instance)
|
|
|
|
new_nhe = zebra_nhg_rib_compare_old_nhe(rn, re, new_nhe,
|
|
|
|
old_re->nhe);
|
|
|
|
|
2020-03-10 15:50:40 +01:00
|
|
|
if (IS_ZEBRA_DEBUG_NHG_DETAIL)
|
2025-01-09 18:34:50 +01:00
|
|
|
zlog_debug("%s: re %p CHANGED: nhe %p (%pNG) => new_nhe %p (%pNG) rib_find_nhe returned %p (%pNG) refcnt: %d",
|
|
|
|
__func__, re, re->nhe, re->nhe, new_nhe, new_nhe, remove, remove,
|
|
|
|
remove ? remove->refcnt : 0);
|
|
|
|
|
|
|
|
/*
|
|
|
|
* if the results from zebra_nhg_rib_find_nhe is being
|
|
|
|
* dropped and it was generated in that function
|
|
|
|
* (refcnt of 0) then we know we can clean it up
|
|
|
|
*/
|
|
|
|
if (remove && remove != new_nhe && remove != re->nhe && remove->refcnt == 0)
|
|
|
|
zebra_nhg_handle_uninstall(remove);
|
2019-05-15 00:27:40 +02:00
|
|
|
|
2019-11-21 21:05:52 +01:00
|
|
|
route_entry_update_nhe(re, new_nhe);
|
2019-05-15 00:03:29 +02:00
|
|
|
}
|
|
|
|
|
2020-03-10 15:50:40 +01:00
|
|
|
|
2020-01-30 22:43:09 +01:00
|
|
|
/* Walk the NHE depends tree and toggle NEXTHOP_GROUP_VALID
|
|
|
|
* flag where appropriate.
|
|
|
|
*/
|
2020-01-30 18:34:35 +01:00
|
|
|
if (curr_active)
|
2020-01-30 22:43:09 +01:00
|
|
|
zebra_nhg_set_valid_if_active(re->nhe);
|
2019-05-15 00:27:40 +02:00
|
|
|
|
|
|
|
/*
|
2020-03-10 15:50:40 +01:00
|
|
|
* Do not need the old / copied nhe anymore since it
|
|
|
|
* was either copied over into a new nhe or not
|
2019-05-15 00:27:40 +02:00
|
|
|
* used at all.
|
|
|
|
*/
|
2020-03-10 15:50:40 +01:00
|
|
|
zebra_nhg_free(curr_nhe);
|
2019-02-15 17:39:12 +01:00
|
|
|
return curr_active;
|
2019-05-13 21:46:05 +02:00
|
|
|
}
|
2019-03-06 20:58:57 +01:00
|
|
|
|
2020-01-28 01:36:01 +01:00
|
|
|
/* Recursively construct a grp array of fully resolved IDs.
|
|
|
|
*
|
|
|
|
* This function allows us to account for groups within groups,
|
|
|
|
* by converting them into a flat array of IDs.
|
|
|
|
*
|
|
|
|
* nh_grp is modified at every level of recursion to append
|
|
|
|
* to it the next unique, fully resolved ID from the entire tree.
|
|
|
|
*
|
|
|
|
*
|
|
|
|
* Note:
|
|
|
|
* I'm pretty sure we only allow ONE level of group within group currently.
|
|
|
|
* But making this recursive just in case that ever changes.
|
|
|
|
*/
|
2024-10-07 18:40:46 +02:00
|
|
|
static uint16_t zebra_nhg_nhe2grp_internal(struct nh_grp *grp, uint16_t curr_index,
|
|
|
|
struct nhg_hash_entry *nhe,
|
|
|
|
struct nhg_hash_entry *original, int max_num)
|
2019-05-15 00:27:40 +02:00
|
|
|
{
|
|
|
|
struct nhg_connected *rb_node_dep = NULL;
|
|
|
|
struct nhg_hash_entry *depend = NULL;
|
zebra: Create Singleton nhg's without weights
Currently FRR when it has two nexthop groups:
A
nexthop 1 weight 5
nexthop 2 weight 6
nexthop 3 weight 7
B
nexthop 1 weight 3
nexthop 2 weight 4
nexthop 3 weight 5
We end up with 5 singleton nexthops and two groups:
ID: 181818168 (sharp)
RefCnt: 1
Uptime: 00:04:52
VRF: default
Valid, Installed
Depends: (69) (70) (71)
via 192.168.119.1, enp13s0 (vrf default), weight 182
via 192.168.119.2, enp13s0 (vrf default), weight 218
via 192.168.119.3, enp13s0 (vrf default), weight 255
ID: 181818169 (sharp)
RefCnt: 1
Uptime: 00:02:08
VRF: default
Valid, Installed
Depends: (71) (127) (128)
via 192.168.119.1, enp13s0 (vrf default), weight 127
via 192.168.119.2, enp13s0 (vrf default), weight 170
via 192.168.119.3, enp13s0 (vrf default), weight 255
id 69 via 192.168.119.1 dev enp13s0 scope link proto 194
id 70 via 192.168.119.2 dev enp13s0 scope link proto 194
id 71 via 192.168.119.3 dev enp13s0 scope link proto 194
id 127 via 192.168.119.1 dev enp13s0 scope link proto 194
id 128 via 192.168.119.2 dev enp13s0 scope link proto 194
id 181818168 group 69,182/70,218/71,255 proto 194
id 181818169 group 71,255/127,127/128,170 proto 194
This is not a desirable state to be in. If you have a
link flapping in the network and weights are changing
rapidly you end up with a large number of singleton
nexthops that are being used by the nexthop groups.
This fills up asic space and clutters the table.
Additionally singleton nexthops cannot have any weight
and the fact that you attempt to create a singleton
nexthop with different weights means nothing to the
linux kernel( or any asic dplane ). Let's modify
the code to always create the singleton nexthops
without a weight and then just creating the
NHG's that use the singletons with the appropriate
weight.
ID: 181818168 (sharp)
RefCnt: 1
Uptime: 00:00:32
VRF: default
Valid, Installed
Depends: (22) (24) (28)
via 192.168.119.1, enp13s0 (vrf default), weight 182
via 192.168.119.2, enp13s0 (vrf default), weight 218
via 192.168.119.3, enp13s0 (vrf default), weight 255
ID: 181818169 (sharp)
RefCnt: 1
Uptime: 00:00:14
VRF: default
Valid, Installed
Depends: (22) (24) (28)
via 192.168.119.1, enp13s0 (vrf default), weight 153
via 192.168.119.2, enp13s0 (vrf default), weight 204
via 192.168.119.3, enp13s0 (vrf default), weight 255
id 22 via 192.168.119.1 dev enp13s0 scope link proto 194
id 24 via 192.168.119.2 dev enp13s0 scope link proto 194
id 28 via 192.168.119.3 dev enp13s0 scope link proto 194
id 181818168 group 22,182/24,218/28,255 proto 194
id 181818169 group 22,153/24,204/28,255 proto 194
Signed-off-by: Donald Sharp <sharpd@nvidia.com>
2024-08-20 16:53:34 +02:00
|
|
|
struct nexthop *nexthop;
|
2024-10-07 18:40:46 +02:00
|
|
|
uint16_t i = curr_index;
|
2019-05-15 00:27:40 +02:00
|
|
|
|
2019-09-03 22:12:06 +02:00
|
|
|
frr_each(nhg_connected_tree, &nhe->nhg_depends, rb_node_dep) {
|
2019-07-25 19:27:59 +02:00
|
|
|
bool duplicate = false;
|
|
|
|
|
2020-01-28 01:36:01 +01:00
|
|
|
if (i >= max_num)
|
|
|
|
goto done;
|
|
|
|
|
2019-05-15 00:27:40 +02:00
|
|
|
depend = rb_node_dep->nhe;
|
|
|
|
|
|
|
|
/*
|
|
|
|
* If its recursive, use its resolved nhe in the group
|
|
|
|
*/
|
|
|
|
if (CHECK_FLAG(depend->flags, NEXTHOP_GROUP_RECURSIVE)) {
|
|
|
|
depend = zebra_nhg_resolve(depend);
|
|
|
|
if (!depend) {
|
|
|
|
flog_err(
|
|
|
|
EC_ZEBRA_NHG_FIB_UPDATE,
|
2022-06-14 21:02:27 +02:00
|
|
|
"Failed to recursively resolve Nexthop Hash Entry in the group id=%pNG",
|
|
|
|
nhe);
|
2019-05-15 00:27:40 +02:00
|
|
|
continue;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2020-01-28 01:36:01 +01:00
|
|
|
if (!zebra_nhg_depends_is_empty(depend)) {
|
|
|
|
/* This is a group within a group */
|
zebra: Create Singleton nhg's without weights
Currently FRR when it has two nexthop groups:
A
nexthop 1 weight 5
nexthop 2 weight 6
nexthop 3 weight 7
B
nexthop 1 weight 3
nexthop 2 weight 4
nexthop 3 weight 5
We end up with 5 singleton nexthops and two groups:
ID: 181818168 (sharp)
RefCnt: 1
Uptime: 00:04:52
VRF: default
Valid, Installed
Depends: (69) (70) (71)
via 192.168.119.1, enp13s0 (vrf default), weight 182
via 192.168.119.2, enp13s0 (vrf default), weight 218
via 192.168.119.3, enp13s0 (vrf default), weight 255
ID: 181818169 (sharp)
RefCnt: 1
Uptime: 00:02:08
VRF: default
Valid, Installed
Depends: (71) (127) (128)
via 192.168.119.1, enp13s0 (vrf default), weight 127
via 192.168.119.2, enp13s0 (vrf default), weight 170
via 192.168.119.3, enp13s0 (vrf default), weight 255
id 69 via 192.168.119.1 dev enp13s0 scope link proto 194
id 70 via 192.168.119.2 dev enp13s0 scope link proto 194
id 71 via 192.168.119.3 dev enp13s0 scope link proto 194
id 127 via 192.168.119.1 dev enp13s0 scope link proto 194
id 128 via 192.168.119.2 dev enp13s0 scope link proto 194
id 181818168 group 69,182/70,218/71,255 proto 194
id 181818169 group 71,255/127,127/128,170 proto 194
This is not a desirable state to be in. If you have a
link flapping in the network and weights are changing
rapidly you end up with a large number of singleton
nexthops that are being used by the nexthop groups.
This fills up asic space and clutters the table.
Additionally singleton nexthops cannot have any weight
and the fact that you attempt to create a singleton
nexthop with different weights means nothing to the
linux kernel( or any asic dplane ). Let's modify
the code to always create the singleton nexthops
without a weight and then just creating the
NHG's that use the singletons with the appropriate
weight.
ID: 181818168 (sharp)
RefCnt: 1
Uptime: 00:00:32
VRF: default
Valid, Installed
Depends: (22) (24) (28)
via 192.168.119.1, enp13s0 (vrf default), weight 182
via 192.168.119.2, enp13s0 (vrf default), weight 218
via 192.168.119.3, enp13s0 (vrf default), weight 255
ID: 181818169 (sharp)
RefCnt: 1
Uptime: 00:00:14
VRF: default
Valid, Installed
Depends: (22) (24) (28)
via 192.168.119.1, enp13s0 (vrf default), weight 153
via 192.168.119.2, enp13s0 (vrf default), weight 204
via 192.168.119.3, enp13s0 (vrf default), weight 255
id 22 via 192.168.119.1 dev enp13s0 scope link proto 194
id 24 via 192.168.119.2 dev enp13s0 scope link proto 194
id 28 via 192.168.119.3 dev enp13s0 scope link proto 194
id 181818168 group 22,182/24,218/28,255 proto 194
id 181818169 group 22,153/24,204/28,255 proto 194
Signed-off-by: Donald Sharp <sharpd@nvidia.com>
2024-08-20 16:53:34 +02:00
|
|
|
i = zebra_nhg_nhe2grp_internal(grp, i, depend, nhe,
|
|
|
|
max_num);
|
2020-01-28 01:36:01 +01:00
|
|
|
} else {
|
zebra: Create Singleton nhg's without weights
Currently FRR when it has two nexthop groups:
A
nexthop 1 weight 5
nexthop 2 weight 6
nexthop 3 weight 7
B
nexthop 1 weight 3
nexthop 2 weight 4
nexthop 3 weight 5
We end up with 5 singleton nexthops and two groups:
ID: 181818168 (sharp)
RefCnt: 1
Uptime: 00:04:52
VRF: default
Valid, Installed
Depends: (69) (70) (71)
via 192.168.119.1, enp13s0 (vrf default), weight 182
via 192.168.119.2, enp13s0 (vrf default), weight 218
via 192.168.119.3, enp13s0 (vrf default), weight 255
ID: 181818169 (sharp)
RefCnt: 1
Uptime: 00:02:08
VRF: default
Valid, Installed
Depends: (71) (127) (128)
via 192.168.119.1, enp13s0 (vrf default), weight 127
via 192.168.119.2, enp13s0 (vrf default), weight 170
via 192.168.119.3, enp13s0 (vrf default), weight 255
id 69 via 192.168.119.1 dev enp13s0 scope link proto 194
id 70 via 192.168.119.2 dev enp13s0 scope link proto 194
id 71 via 192.168.119.3 dev enp13s0 scope link proto 194
id 127 via 192.168.119.1 dev enp13s0 scope link proto 194
id 128 via 192.168.119.2 dev enp13s0 scope link proto 194
id 181818168 group 69,182/70,218/71,255 proto 194
id 181818169 group 71,255/127,127/128,170 proto 194
This is not a desirable state to be in. If you have a
link flapping in the network and weights are changing
rapidly you end up with a large number of singleton
nexthops that are being used by the nexthop groups.
This fills up asic space and clutters the table.
Additionally singleton nexthops cannot have any weight
and the fact that you attempt to create a singleton
nexthop with different weights means nothing to the
linux kernel( or any asic dplane ). Let's modify
the code to always create the singleton nexthops
without a weight and then just creating the
NHG's that use the singletons with the appropriate
weight.
ID: 181818168 (sharp)
RefCnt: 1
Uptime: 00:00:32
VRF: default
Valid, Installed
Depends: (22) (24) (28)
via 192.168.119.1, enp13s0 (vrf default), weight 182
via 192.168.119.2, enp13s0 (vrf default), weight 218
via 192.168.119.3, enp13s0 (vrf default), weight 255
ID: 181818169 (sharp)
RefCnt: 1
Uptime: 00:00:14
VRF: default
Valid, Installed
Depends: (22) (24) (28)
via 192.168.119.1, enp13s0 (vrf default), weight 153
via 192.168.119.2, enp13s0 (vrf default), weight 204
via 192.168.119.3, enp13s0 (vrf default), weight 255
id 22 via 192.168.119.1 dev enp13s0 scope link proto 194
id 24 via 192.168.119.2 dev enp13s0 scope link proto 194
id 28 via 192.168.119.3 dev enp13s0 scope link proto 194
id 181818168 group 22,182/24,218/28,255 proto 194
id 181818169 group 22,153/24,204/28,255 proto 194
Signed-off-by: Donald Sharp <sharpd@nvidia.com>
2024-08-20 16:53:34 +02:00
|
|
|
bool found;
|
|
|
|
|
2020-01-30 22:43:09 +01:00
|
|
|
if (!CHECK_FLAG(depend->flags, NEXTHOP_GROUP_VALID)) {
|
|
|
|
if (IS_ZEBRA_DEBUG_RIB_DETAILED
|
|
|
|
|| IS_ZEBRA_DEBUG_NHG)
|
|
|
|
zlog_debug(
|
|
|
|
"%s: Nexthop ID (%u) not valid, not appending to dataplane install group",
|
|
|
|
__func__, depend->id);
|
|
|
|
continue;
|
|
|
|
}
|
|
|
|
|
2020-01-28 20:33:10 +01:00
|
|
|
/* If the nexthop not installed/queued for install don't
|
|
|
|
* put in the ID array.
|
|
|
|
*/
|
|
|
|
if (!(CHECK_FLAG(depend->flags, NEXTHOP_GROUP_INSTALLED)
|
|
|
|
|| CHECK_FLAG(depend->flags,
|
|
|
|
NEXTHOP_GROUP_QUEUED))) {
|
|
|
|
if (IS_ZEBRA_DEBUG_RIB_DETAILED
|
|
|
|
|| IS_ZEBRA_DEBUG_NHG)
|
|
|
|
zlog_debug(
|
|
|
|
"%s: Nexthop ID (%u) not installed or queued for install, not appending to dataplane install group",
|
|
|
|
__func__, depend->id);
|
|
|
|
continue;
|
|
|
|
}
|
|
|
|
|
2020-01-28 21:20:18 +01:00
|
|
|
/* Check for duplicate IDs, ignore if found. */
|
2020-01-28 01:36:01 +01:00
|
|
|
for (int j = 0; j < i; j++) {
|
2020-03-26 15:57:45 +01:00
|
|
|
if (depend->id == grp[j].id) {
|
2020-01-28 01:36:01 +01:00
|
|
|
duplicate = true;
|
2020-03-26 15:57:45 +01:00
|
|
|
break;
|
|
|
|
}
|
2020-01-28 01:36:01 +01:00
|
|
|
}
|
2019-07-25 19:27:59 +02:00
|
|
|
|
2020-01-28 21:20:18 +01:00
|
|
|
if (duplicate) {
|
|
|
|
if (IS_ZEBRA_DEBUG_RIB_DETAILED
|
|
|
|
|| IS_ZEBRA_DEBUG_NHG)
|
|
|
|
zlog_debug(
|
|
|
|
"%s: Nexthop ID (%u) is duplicate, not appending to dataplane install group",
|
|
|
|
__func__, depend->id);
|
|
|
|
continue;
|
2020-01-28 01:36:01 +01:00
|
|
|
}
|
2020-01-28 21:20:18 +01:00
|
|
|
|
zebra: Create Singleton nhg's without weights
Currently FRR when it has two nexthop groups:
A
nexthop 1 weight 5
nexthop 2 weight 6
nexthop 3 weight 7
B
nexthop 1 weight 3
nexthop 2 weight 4
nexthop 3 weight 5
We end up with 5 singleton nexthops and two groups:
ID: 181818168 (sharp)
RefCnt: 1
Uptime: 00:04:52
VRF: default
Valid, Installed
Depends: (69) (70) (71)
via 192.168.119.1, enp13s0 (vrf default), weight 182
via 192.168.119.2, enp13s0 (vrf default), weight 218
via 192.168.119.3, enp13s0 (vrf default), weight 255
ID: 181818169 (sharp)
RefCnt: 1
Uptime: 00:02:08
VRF: default
Valid, Installed
Depends: (71) (127) (128)
via 192.168.119.1, enp13s0 (vrf default), weight 127
via 192.168.119.2, enp13s0 (vrf default), weight 170
via 192.168.119.3, enp13s0 (vrf default), weight 255
id 69 via 192.168.119.1 dev enp13s0 scope link proto 194
id 70 via 192.168.119.2 dev enp13s0 scope link proto 194
id 71 via 192.168.119.3 dev enp13s0 scope link proto 194
id 127 via 192.168.119.1 dev enp13s0 scope link proto 194
id 128 via 192.168.119.2 dev enp13s0 scope link proto 194
id 181818168 group 69,182/70,218/71,255 proto 194
id 181818169 group 71,255/127,127/128,170 proto 194
This is not a desirable state to be in. If you have a
link flapping in the network and weights are changing
rapidly you end up with a large number of singleton
nexthops that are being used by the nexthop groups.
This fills up asic space and clutters the table.
Additionally singleton nexthops cannot have any weight
and the fact that you attempt to create a singleton
nexthop with different weights means nothing to the
linux kernel( or any asic dplane ). Let's modify
the code to always create the singleton nexthops
without a weight and then just creating the
NHG's that use the singletons with the appropriate
weight.
ID: 181818168 (sharp)
RefCnt: 1
Uptime: 00:00:32
VRF: default
Valid, Installed
Depends: (22) (24) (28)
via 192.168.119.1, enp13s0 (vrf default), weight 182
via 192.168.119.2, enp13s0 (vrf default), weight 218
via 192.168.119.3, enp13s0 (vrf default), weight 255
ID: 181818169 (sharp)
RefCnt: 1
Uptime: 00:00:14
VRF: default
Valid, Installed
Depends: (22) (24) (28)
via 192.168.119.1, enp13s0 (vrf default), weight 153
via 192.168.119.2, enp13s0 (vrf default), weight 204
via 192.168.119.3, enp13s0 (vrf default), weight 255
id 22 via 192.168.119.1 dev enp13s0 scope link proto 194
id 24 via 192.168.119.2 dev enp13s0 scope link proto 194
id 28 via 192.168.119.3 dev enp13s0 scope link proto 194
id 181818168 group 22,182/24,218/28,255 proto 194
id 181818169 group 22,153/24,204/28,255 proto 194
Signed-off-by: Donald Sharp <sharpd@nvidia.com>
2024-08-20 16:53:34 +02:00
|
|
|
/*
|
|
|
|
* So we need to create the nexthop group with
|
|
|
|
* the appropriate weights. The nexthops weights
|
|
|
|
* are stored in the fully resolved nexthops for
|
|
|
|
* the nhg so we need to find the appropriate
|
|
|
|
* nexthop associated with this and set the weight
|
|
|
|
* appropriately
|
|
|
|
*/
|
|
|
|
found = false;
|
|
|
|
for (ALL_NEXTHOPS_PTR(&original->nhg, nexthop)) {
|
|
|
|
if (CHECK_FLAG(nexthop->flags,
|
|
|
|
NEXTHOP_FLAG_RECURSIVE))
|
|
|
|
continue;
|
|
|
|
|
|
|
|
if (nexthop_cmp_no_weight(depend->nhg.nexthop,
|
|
|
|
nexthop) != 0)
|
|
|
|
continue;
|
|
|
|
|
|
|
|
found = true;
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
|
|
|
|
if (!found) {
|
|
|
|
if (IS_ZEBRA_DEBUG_RIB_DETAILED ||
|
|
|
|
IS_ZEBRA_DEBUG_NHG)
|
|
|
|
zlog_debug("%s: Nexthop ID (%u) unable to find nexthop in Nexthop Gropu Entry, something is terribly wrong",
|
|
|
|
__func__, depend->id);
|
|
|
|
continue;
|
|
|
|
}
|
2020-01-28 21:20:18 +01:00
|
|
|
grp[i].id = depend->id;
|
zebra: Create Singleton nhg's without weights
Currently FRR when it has two nexthop groups:
A
nexthop 1 weight 5
nexthop 2 weight 6
nexthop 3 weight 7
B
nexthop 1 weight 3
nexthop 2 weight 4
nexthop 3 weight 5
We end up with 5 singleton nexthops and two groups:
ID: 181818168 (sharp)
RefCnt: 1
Uptime: 00:04:52
VRF: default
Valid, Installed
Depends: (69) (70) (71)
via 192.168.119.1, enp13s0 (vrf default), weight 182
via 192.168.119.2, enp13s0 (vrf default), weight 218
via 192.168.119.3, enp13s0 (vrf default), weight 255
ID: 181818169 (sharp)
RefCnt: 1
Uptime: 00:02:08
VRF: default
Valid, Installed
Depends: (71) (127) (128)
via 192.168.119.1, enp13s0 (vrf default), weight 127
via 192.168.119.2, enp13s0 (vrf default), weight 170
via 192.168.119.3, enp13s0 (vrf default), weight 255
id 69 via 192.168.119.1 dev enp13s0 scope link proto 194
id 70 via 192.168.119.2 dev enp13s0 scope link proto 194
id 71 via 192.168.119.3 dev enp13s0 scope link proto 194
id 127 via 192.168.119.1 dev enp13s0 scope link proto 194
id 128 via 192.168.119.2 dev enp13s0 scope link proto 194
id 181818168 group 69,182/70,218/71,255 proto 194
id 181818169 group 71,255/127,127/128,170 proto 194
This is not a desirable state to be in. If you have a
link flapping in the network and weights are changing
rapidly you end up with a large number of singleton
nexthops that are being used by the nexthop groups.
This fills up asic space and clutters the table.
Additionally singleton nexthops cannot have any weight
and the fact that you attempt to create a singleton
nexthop with different weights means nothing to the
linux kernel( or any asic dplane ). Let's modify
the code to always create the singleton nexthops
without a weight and then just creating the
NHG's that use the singletons with the appropriate
weight.
ID: 181818168 (sharp)
RefCnt: 1
Uptime: 00:00:32
VRF: default
Valid, Installed
Depends: (22) (24) (28)
via 192.168.119.1, enp13s0 (vrf default), weight 182
via 192.168.119.2, enp13s0 (vrf default), weight 218
via 192.168.119.3, enp13s0 (vrf default), weight 255
ID: 181818169 (sharp)
RefCnt: 1
Uptime: 00:00:14
VRF: default
Valid, Installed
Depends: (22) (24) (28)
via 192.168.119.1, enp13s0 (vrf default), weight 153
via 192.168.119.2, enp13s0 (vrf default), weight 204
via 192.168.119.3, enp13s0 (vrf default), weight 255
id 22 via 192.168.119.1 dev enp13s0 scope link proto 194
id 24 via 192.168.119.2 dev enp13s0 scope link proto 194
id 28 via 192.168.119.3 dev enp13s0 scope link proto 194
id 181818168 group 22,182/24,218/28,255 proto 194
id 181818169 group 22,153/24,204/28,255 proto 194
Signed-off-by: Donald Sharp <sharpd@nvidia.com>
2024-08-20 16:53:34 +02:00
|
|
|
grp[i].weight = nexthop->weight;
|
2020-01-28 21:20:18 +01:00
|
|
|
i++;
|
2019-07-25 19:27:59 +02:00
|
|
|
}
|
2019-05-15 00:27:40 +02:00
|
|
|
}
|
2019-07-25 19:27:59 +02:00
|
|
|
|
2020-03-13 21:52:53 +01:00
|
|
|
if (nhe->backup_info == NULL || nhe->backup_info->nhe == NULL)
|
|
|
|
goto done;
|
|
|
|
|
|
|
|
/* TODO -- For now, we are not trying to use or install any
|
|
|
|
* backup info in this nexthop-id path: we aren't prepared
|
|
|
|
* to use the backups here yet. We're just debugging what we find.
|
|
|
|
*/
|
|
|
|
if (IS_ZEBRA_DEBUG_NHG_DETAIL)
|
|
|
|
zlog_debug("%s: skipping backup nhe", __func__);
|
|
|
|
|
2019-07-25 19:27:59 +02:00
|
|
|
done:
|
2019-05-15 00:27:40 +02:00
|
|
|
return i;
|
|
|
|
}
|
|
|
|
|
2020-01-28 01:36:01 +01:00
|
|
|
/* Convert a nhe into a group array */
|
2024-10-07 18:40:46 +02:00
|
|
|
uint16_t zebra_nhg_nhe2grp(struct nh_grp *grp, struct nhg_hash_entry *nhe, int max_num)
|
2020-01-28 01:36:01 +01:00
|
|
|
{
|
|
|
|
/* Call into the recursive function */
|
zebra: Create Singleton nhg's without weights
Currently FRR when it has two nexthop groups:
A
nexthop 1 weight 5
nexthop 2 weight 6
nexthop 3 weight 7
B
nexthop 1 weight 3
nexthop 2 weight 4
nexthop 3 weight 5
We end up with 5 singleton nexthops and two groups:
ID: 181818168 (sharp)
RefCnt: 1
Uptime: 00:04:52
VRF: default
Valid, Installed
Depends: (69) (70) (71)
via 192.168.119.1, enp13s0 (vrf default), weight 182
via 192.168.119.2, enp13s0 (vrf default), weight 218
via 192.168.119.3, enp13s0 (vrf default), weight 255
ID: 181818169 (sharp)
RefCnt: 1
Uptime: 00:02:08
VRF: default
Valid, Installed
Depends: (71) (127) (128)
via 192.168.119.1, enp13s0 (vrf default), weight 127
via 192.168.119.2, enp13s0 (vrf default), weight 170
via 192.168.119.3, enp13s0 (vrf default), weight 255
id 69 via 192.168.119.1 dev enp13s0 scope link proto 194
id 70 via 192.168.119.2 dev enp13s0 scope link proto 194
id 71 via 192.168.119.3 dev enp13s0 scope link proto 194
id 127 via 192.168.119.1 dev enp13s0 scope link proto 194
id 128 via 192.168.119.2 dev enp13s0 scope link proto 194
id 181818168 group 69,182/70,218/71,255 proto 194
id 181818169 group 71,255/127,127/128,170 proto 194
This is not a desirable state to be in. If you have a
link flapping in the network and weights are changing
rapidly you end up with a large number of singleton
nexthops that are being used by the nexthop groups.
This fills up asic space and clutters the table.
Additionally singleton nexthops cannot have any weight
and the fact that you attempt to create a singleton
nexthop with different weights means nothing to the
linux kernel( or any asic dplane ). Let's modify
the code to always create the singleton nexthops
without a weight and then just creating the
NHG's that use the singletons with the appropriate
weight.
ID: 181818168 (sharp)
RefCnt: 1
Uptime: 00:00:32
VRF: default
Valid, Installed
Depends: (22) (24) (28)
via 192.168.119.1, enp13s0 (vrf default), weight 182
via 192.168.119.2, enp13s0 (vrf default), weight 218
via 192.168.119.3, enp13s0 (vrf default), weight 255
ID: 181818169 (sharp)
RefCnt: 1
Uptime: 00:00:14
VRF: default
Valid, Installed
Depends: (22) (24) (28)
via 192.168.119.1, enp13s0 (vrf default), weight 153
via 192.168.119.2, enp13s0 (vrf default), weight 204
via 192.168.119.3, enp13s0 (vrf default), weight 255
id 22 via 192.168.119.1 dev enp13s0 scope link proto 194
id 24 via 192.168.119.2 dev enp13s0 scope link proto 194
id 28 via 192.168.119.3 dev enp13s0 scope link proto 194
id 181818168 group 22,182/24,218/28,255 proto 194
id 181818169 group 22,153/24,204/28,255 proto 194
Signed-off-by: Donald Sharp <sharpd@nvidia.com>
2024-08-20 16:53:34 +02:00
|
|
|
return zebra_nhg_nhe2grp_internal(grp, 0, nhe, nhe, max_num);
|
2020-01-28 01:36:01 +01:00
|
|
|
}
|
|
|
|
|
2024-08-29 17:29:55 +02:00
|
|
|
void zebra_nhg_install_kernel(struct nhg_hash_entry *nhe, uint8_t type)
|
2019-03-06 20:58:57 +01:00
|
|
|
{
|
2019-07-25 17:45:19 +02:00
|
|
|
struct nhg_connected *rb_node_dep = NULL;
|
|
|
|
|
|
|
|
/* Resolve it first */
|
|
|
|
nhe = zebra_nhg_resolve(nhe);
|
|
|
|
|
2023-04-19 20:35:25 +02:00
|
|
|
if (zebra_nhg_set_valid_if_active(nhe)) {
|
|
|
|
if (IS_ZEBRA_DEBUG_NHG_DETAIL)
|
|
|
|
zlog_debug("%s: valid flag set for nh %pNG", __func__,
|
|
|
|
nhe);
|
|
|
|
}
|
|
|
|
|
2024-08-29 17:29:55 +02:00
|
|
|
if ((type != ZEBRA_ROUTE_CONNECT && type != ZEBRA_ROUTE_LOCAL &&
|
|
|
|
type != ZEBRA_ROUTE_KERNEL) &&
|
|
|
|
CHECK_FLAG(nhe->flags, NEXTHOP_GROUP_INITIAL_DELAY_INSTALL)) {
|
|
|
|
UNSET_FLAG(nhe->flags, NEXTHOP_GROUP_INITIAL_DELAY_INSTALL);
|
|
|
|
UNSET_FLAG(nhe->flags, NEXTHOP_GROUP_INSTALLED);
|
|
|
|
}
|
|
|
|
|
2019-07-25 17:45:19 +02:00
|
|
|
/* Make sure all depends are installed/queued */
|
2019-09-03 22:12:06 +02:00
|
|
|
frr_each(nhg_connected_tree, &nhe->nhg_depends, rb_node_dep) {
|
2024-08-29 17:29:55 +02:00
|
|
|
zebra_nhg_install_kernel(rb_node_dep->nhe, type);
|
2019-07-25 17:45:19 +02:00
|
|
|
}
|
|
|
|
|
2024-02-08 18:32:26 +01:00
|
|
|
if (CHECK_FLAG(nhe->flags, NEXTHOP_GROUP_VALID) &&
|
|
|
|
(!CHECK_FLAG(nhe->flags, NEXTHOP_GROUP_INSTALLED) ||
|
|
|
|
CHECK_FLAG(nhe->flags, NEXTHOP_GROUP_REINSTALL)) &&
|
|
|
|
!CHECK_FLAG(nhe->flags, NEXTHOP_GROUP_QUEUED)) {
|
2019-10-04 22:12:19 +02:00
|
|
|
/* Change its type to us since we are installing it */
|
2020-05-05 21:57:35 +02:00
|
|
|
if (!ZEBRA_NHG_CREATED(nhe))
|
|
|
|
nhe->type = ZEBRA_ROUTE_NHG;
|
2019-10-04 22:12:19 +02:00
|
|
|
|
2024-02-08 02:32:24 +01:00
|
|
|
enum zebra_dplane_result ret = dplane_nexthop_add(nhe);
|
2019-05-15 18:59:37 +02:00
|
|
|
|
2019-03-08 00:11:57 +01:00
|
|
|
switch (ret) {
|
|
|
|
case ZEBRA_DPLANE_REQUEST_QUEUED:
|
|
|
|
SET_FLAG(nhe->flags, NEXTHOP_GROUP_QUEUED);
|
|
|
|
break;
|
|
|
|
case ZEBRA_DPLANE_REQUEST_FAILURE:
|
|
|
|
flog_err(
|
|
|
|
EC_ZEBRA_DP_INSTALL_FAIL,
|
2022-06-14 21:02:27 +02:00
|
|
|
"Failed to install Nexthop ID (%pNG) into the kernel",
|
|
|
|
nhe);
|
2019-03-08 00:11:57 +01:00
|
|
|
break;
|
|
|
|
case ZEBRA_DPLANE_REQUEST_SUCCESS:
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
void zebra_nhg_uninstall_kernel(struct nhg_hash_entry *nhe)
|
|
|
|
{
|
|
|
|
if (CHECK_FLAG(nhe->flags, NEXTHOP_GROUP_INSTALLED)) {
|
|
|
|
int ret = dplane_nexthop_delete(nhe);
|
2019-05-15 18:59:37 +02:00
|
|
|
|
2019-03-08 00:11:57 +01:00
|
|
|
switch (ret) {
|
|
|
|
case ZEBRA_DPLANE_REQUEST_QUEUED:
|
|
|
|
SET_FLAG(nhe->flags, NEXTHOP_GROUP_QUEUED);
|
|
|
|
break;
|
|
|
|
case ZEBRA_DPLANE_REQUEST_FAILURE:
|
|
|
|
flog_err(
|
|
|
|
EC_ZEBRA_DP_DELETE_FAIL,
|
2022-06-14 21:02:27 +02:00
|
|
|
"Failed to uninstall Nexthop ID (%pNG) from the kernel",
|
|
|
|
nhe);
|
2019-03-08 00:11:57 +01:00
|
|
|
break;
|
|
|
|
case ZEBRA_DPLANE_REQUEST_SUCCESS:
|
|
|
|
UNSET_FLAG(nhe->flags, NEXTHOP_GROUP_INSTALLED);
|
|
|
|
break;
|
|
|
|
}
|
2019-10-04 22:48:20 +02:00
|
|
|
}
|
|
|
|
|
|
|
|
zebra_nhg_handle_uninstall(nhe);
|
2019-03-08 00:11:57 +01:00
|
|
|
}
|
|
|
|
|
2019-03-08 00:15:30 +01:00
|
|
|
void zebra_nhg_dplane_result(struct zebra_dplane_ctx *ctx)
|
|
|
|
{
|
|
|
|
enum dplane_op_e op;
|
|
|
|
enum zebra_dplane_result status;
|
|
|
|
uint32_t id = 0;
|
|
|
|
struct nhg_hash_entry *nhe = NULL;
|
|
|
|
|
|
|
|
op = dplane_ctx_get_op(ctx);
|
|
|
|
status = dplane_ctx_get_status(ctx);
|
|
|
|
|
2019-05-14 02:10:34 +02:00
|
|
|
id = dplane_ctx_get_nhe_id(ctx);
|
2019-05-15 00:03:29 +02:00
|
|
|
|
2020-03-10 15:50:40 +01:00
|
|
|
if (IS_ZEBRA_DEBUG_DPLANE_DETAIL || IS_ZEBRA_DEBUG_NHG_DETAIL)
|
2019-10-04 22:48:20 +02:00
|
|
|
zlog_debug(
|
|
|
|
"Nexthop dplane ctx %p, op %s, nexthop ID (%u), result %s",
|
|
|
|
ctx, dplane_op2str(op), id, dplane_res2str(status));
|
2019-03-08 00:15:30 +01:00
|
|
|
|
2023-11-17 14:40:58 +01:00
|
|
|
if (op == DPLANE_OP_NH_DELETE) {
|
2019-10-04 22:48:20 +02:00
|
|
|
if (status != ZEBRA_DPLANE_REQUEST_SUCCESS)
|
|
|
|
flog_err(
|
|
|
|
EC_ZEBRA_DP_DELETE_FAIL,
|
|
|
|
"Failed to uninstall Nexthop ID (%u) from the kernel",
|
|
|
|
id);
|
2021-01-21 16:12:05 +01:00
|
|
|
|
2019-10-04 22:48:20 +02:00
|
|
|
/* We already free'd the data, nothing to do */
|
2023-11-17 14:40:58 +01:00
|
|
|
} else if (op == DPLANE_OP_NH_INSTALL || op == DPLANE_OP_NH_UPDATE) {
|
2019-10-04 22:48:20 +02:00
|
|
|
nhe = zebra_nhg_lookup_id(id);
|
|
|
|
|
|
|
|
if (!nhe) {
|
2020-12-01 18:04:30 +01:00
|
|
|
if (IS_ZEBRA_DEBUG_NHG)
|
2023-11-17 14:40:58 +01:00
|
|
|
zlog_debug("%s operation performed on Nexthop ID (%u) in the kernel, that we no longer have in our table",
|
|
|
|
dplane_op2str(op), id);
|
2020-12-01 18:04:30 +01:00
|
|
|
|
2023-11-17 14:40:58 +01:00
|
|
|
return;
|
2019-03-08 00:15:30 +01:00
|
|
|
}
|
2019-10-04 22:48:20 +02:00
|
|
|
|
|
|
|
UNSET_FLAG(nhe->flags, NEXTHOP_GROUP_QUEUED);
|
2024-02-08 18:32:26 +01:00
|
|
|
UNSET_FLAG(nhe->flags, NEXTHOP_GROUP_REINSTALL);
|
2024-02-08 17:30:30 +01:00
|
|
|
switch (status) {
|
|
|
|
case ZEBRA_DPLANE_REQUEST_SUCCESS:
|
2019-10-04 22:48:20 +02:00
|
|
|
SET_FLAG(nhe->flags, NEXTHOP_GROUP_INSTALLED);
|
2023-04-29 04:09:55 +02:00
|
|
|
zebra_nhg_handle_install(nhe, true);
|
2021-01-21 16:12:05 +01:00
|
|
|
|
|
|
|
/* If daemon nhg, send it an update */
|
2021-04-22 23:16:57 +02:00
|
|
|
if (PROTO_OWNED(nhe))
|
2021-01-21 16:12:05 +01:00
|
|
|
zsend_nhg_notify(nhe->type, nhe->zapi_instance,
|
|
|
|
nhe->zapi_session, nhe->id,
|
|
|
|
ZAPI_NHG_INSTALLED);
|
2024-02-08 17:30:30 +01:00
|
|
|
break;
|
|
|
|
case ZEBRA_DPLANE_REQUEST_FAILURE:
|
|
|
|
UNSET_FLAG(nhe->flags, NEXTHOP_GROUP_INSTALLED);
|
2021-01-21 16:12:05 +01:00
|
|
|
/* If daemon nhg, send it an update */
|
2021-04-22 23:16:57 +02:00
|
|
|
if (PROTO_OWNED(nhe))
|
2021-01-21 16:12:05 +01:00
|
|
|
zsend_nhg_notify(nhe->type, nhe->zapi_instance,
|
|
|
|
nhe->zapi_session, nhe->id,
|
|
|
|
ZAPI_NHG_FAIL_INSTALL);
|
|
|
|
|
2022-03-28 13:35:53 +02:00
|
|
|
if (!(zebra_nhg_proto_nexthops_only() &&
|
|
|
|
!PROTO_OWNED(nhe)))
|
|
|
|
flog_err(
|
|
|
|
EC_ZEBRA_DP_INSTALL_FAIL,
|
2022-06-14 21:02:27 +02:00
|
|
|
"Failed to install Nexthop (%pNG) into the kernel",
|
|
|
|
nhe);
|
2024-02-08 17:30:30 +01:00
|
|
|
break;
|
|
|
|
case ZEBRA_DPLANE_REQUEST_QUEUED:
|
|
|
|
flog_err(EC_ZEBRA_DP_INVALID_RC,
|
|
|
|
"Dplane returned an invalid result code for a result from the dplane for %pNG into the kernel",
|
|
|
|
nhe);
|
|
|
|
break;
|
2021-01-21 16:12:05 +01:00
|
|
|
}
|
2019-10-04 22:48:20 +02:00
|
|
|
}
|
2019-03-06 20:58:57 +01:00
|
|
|
}
|
|
|
|
|
2021-12-01 22:28:42 +01:00
|
|
|
static int zebra_nhg_sweep_entry(struct hash_bucket *bucket, void *arg)
|
2019-08-01 20:07:04 +02:00
|
|
|
{
|
|
|
|
struct nhg_hash_entry *nhe = NULL;
|
|
|
|
|
|
|
|
nhe = (struct nhg_hash_entry *)bucket->data;
|
|
|
|
|
2021-04-22 23:21:12 +02:00
|
|
|
/*
|
|
|
|
* same logic as with routes.
|
|
|
|
*
|
|
|
|
* If older than startup time, we know we read them in from the
|
|
|
|
* kernel and have not gotten and update for them since startup
|
|
|
|
* from an upper level proto.
|
|
|
|
*/
|
|
|
|
if (zrouter.startup_time < nhe->uptime)
|
2021-12-01 22:28:42 +01:00
|
|
|
return HASHWALK_CONTINUE;
|
2021-04-22 23:21:12 +02:00
|
|
|
|
|
|
|
/*
|
|
|
|
* If it's proto-owned and not being used by a route, remove it since
|
|
|
|
* we haven't gotten an update about it from the proto since startup.
|
|
|
|
* This means that either the config for it was removed or the daemon
|
|
|
|
* didn't get started. This handles graceful restart & retain scenario.
|
|
|
|
*/
|
|
|
|
if (PROTO_OWNED(nhe) && nhe->refcnt == 1) {
|
|
|
|
zebra_nhg_decrement_ref(nhe);
|
2021-12-01 22:28:42 +01:00
|
|
|
return HASHWALK_ABORT;
|
2021-04-22 23:21:12 +02:00
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
* If its being ref'd by routes, just let it be uninstalled via a route
|
|
|
|
* removal.
|
|
|
|
*/
|
2021-12-01 22:28:42 +01:00
|
|
|
if (ZEBRA_NHG_CREATED(nhe) && nhe->refcnt <= 0) {
|
2019-08-01 20:07:04 +02:00
|
|
|
zebra_nhg_uninstall_kernel(nhe);
|
2021-12-01 22:28:42 +01:00
|
|
|
return HASHWALK_ABORT;
|
|
|
|
}
|
|
|
|
|
|
|
|
return HASHWALK_CONTINUE;
|
2019-08-01 20:07:04 +02:00
|
|
|
}
|
|
|
|
|
|
|
|
void zebra_nhg_sweep_table(struct hash *hash)
|
|
|
|
{
|
2021-12-01 22:28:42 +01:00
|
|
|
uint32_t count;
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Yes this is extremely odd. Effectively nhg's have
|
|
|
|
* other nexthop groups that depend on them and when you
|
|
|
|
* remove them, you can have other entries blown up.
|
|
|
|
* our hash code does not work with deleting multiple
|
|
|
|
* entries at a time and will possibly cause crashes
|
|
|
|
* So what to do? Whenever zebra_nhg_sweep_entry
|
|
|
|
* deletes an entry it will return HASHWALK_ABORT,
|
|
|
|
* cause that deletion might have triggered more.
|
|
|
|
* then we can just keep sweeping this table
|
|
|
|
* until nothing more is found to do.
|
|
|
|
*/
|
|
|
|
do {
|
|
|
|
count = hashcount(hash);
|
|
|
|
hash_walk(hash, zebra_nhg_sweep_entry, NULL);
|
|
|
|
} while (count != hashcount(hash));
|
2019-08-01 20:07:04 +02:00
|
|
|
}
|
2020-01-28 17:00:42 +01:00
|
|
|
|
2020-10-22 14:02:33 +02:00
|
|
|
static void zebra_nhg_mark_keep_entry(struct hash_bucket *bucket, void *arg)
|
|
|
|
{
|
|
|
|
struct nhg_hash_entry *nhe = bucket->data;
|
|
|
|
|
|
|
|
UNSET_FLAG(nhe->flags, NEXTHOP_GROUP_INSTALLED);
|
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
* When we are shutting down and we have retain mode enabled
|
|
|
|
* in zebra the process is to mark each vrf that it's
|
|
|
|
* routes should not be deleted. The problem with that
|
|
|
|
* is that shutdown actually free's up memory which
|
|
|
|
* causes the nexthop group's ref counts to go to zero
|
|
|
|
* we need a way to subtly tell the system to not remove
|
|
|
|
* the nexthop groups from the kernel at the same time.
|
|
|
|
* The easiest just looks like that we should not mark
|
|
|
|
* the nhg's as installed any more and when the ref count
|
|
|
|
* goes to zero we'll attempt to delete and do nothing
|
|
|
|
*/
|
|
|
|
void zebra_nhg_mark_keep(void)
|
|
|
|
{
|
|
|
|
hash_iterate(zrouter.nhgs_id, zebra_nhg_mark_keep_entry, NULL);
|
|
|
|
}
|
|
|
|
|
2020-01-28 17:00:42 +01:00
|
|
|
/* Global control to disable use of kernel nexthops, if available. We can't
|
|
|
|
* force the kernel to support nexthop ids, of course, but we can disable
|
|
|
|
* zebra's use of them, for testing e.g. By default, if the kernel supports
|
|
|
|
* nexthop ids, zebra uses them.
|
|
|
|
*/
|
|
|
|
void zebra_nhg_enable_kernel_nexthops(bool set)
|
|
|
|
{
|
|
|
|
g_nexthops_enabled = set;
|
|
|
|
}
|
|
|
|
|
|
|
|
bool zebra_nhg_kernel_nexthops_enabled(void)
|
|
|
|
{
|
|
|
|
return g_nexthops_enabled;
|
|
|
|
}
|
2020-05-05 21:57:35 +02:00
|
|
|
|
2021-02-22 21:06:28 +01:00
|
|
|
/* Global control for use of activated backups for recursive resolution. */
|
|
|
|
void zebra_nhg_set_recursive_use_backups(bool set)
|
|
|
|
{
|
|
|
|
use_recursive_backups = set;
|
|
|
|
}
|
|
|
|
|
|
|
|
bool zebra_nhg_recursive_use_backups(void)
|
|
|
|
{
|
|
|
|
return use_recursive_backups;
|
|
|
|
}
|
|
|
|
|
2020-05-13 21:50:14 +02:00
|
|
|
/*
|
|
|
|
* Global control to only use kernel nexthops for protocol created NHGs.
|
|
|
|
* There are some use cases where you may not want zebra to implicitly
|
|
|
|
* create kernel nexthops for all routes and only create them for NHGs
|
|
|
|
* passed down by upper level protos.
|
|
|
|
*
|
|
|
|
* Default is off.
|
|
|
|
*/
|
|
|
|
void zebra_nhg_set_proto_nexthops_only(bool set)
|
|
|
|
{
|
|
|
|
proto_nexthops_only = set;
|
|
|
|
}
|
|
|
|
|
|
|
|
bool zebra_nhg_proto_nexthops_only(void)
|
|
|
|
{
|
|
|
|
return proto_nexthops_only;
|
|
|
|
}
|
|
|
|
|
2020-05-05 21:57:35 +02:00
|
|
|
/* Add NHE from upper level proto */
|
|
|
|
struct nhg_hash_entry *zebra_nhg_proto_add(uint32_t id, int type,
|
2021-01-21 16:12:05 +01:00
|
|
|
uint16_t instance, uint32_t session,
|
2020-05-05 21:57:35 +02:00
|
|
|
struct nexthop_group *nhg, afi_t afi)
|
|
|
|
{
|
|
|
|
struct nhg_hash_entry lookup;
|
2020-05-10 22:36:49 +02:00
|
|
|
struct nhg_hash_entry *new, *old;
|
2020-05-05 21:57:35 +02:00
|
|
|
struct nhg_connected *rb_node_dep = NULL;
|
2020-05-20 21:47:12 +02:00
|
|
|
struct nexthop *newhop;
|
|
|
|
bool replace = false;
|
zebra: Fix zebra crash when replacing NHE during shutdown
During replace of a NHE from upper proto in zebra_nhg_proto_add(),
- rib_handle_nhg_replace() is invoked with old NHE where we walk all
RNs/REs & replace the re->nhe whose address points to old NHE.
- In this walk, if prev re->nhe refcnt is decremented to 0, we free up
the memory which the old NHE is pointing to.
Later in zebra_nhg_proto_add(), we end up accessing this freed memory
and crash.
Logs:
1380766 2023/08/16 22:34:11.994671 ZEBRA: [WDEB1-93HCZ] zebra_nhg_decrement_ref: nhe 0x56091d890840 (70312519[2756/2762/2810]) 2 => 1
1380773 2023/08/16 22:34:11.994678 ZEBRA: [WDEB1-93HCZ] zebra_nhg_decrement_ref: nhe 0x56091d890840 (70312519[2756/2762/2810]) 1 => 0
1380777 2023/08/16 22:34:11.994844 ZEBRA: [JE46R-G2NEE] zebra_nhg_release: nhe 0x56091d890840 (70312519[2756/2762/2810])
1380778 2023/08/16 22:34:11.994849 ZEBRA: [SCDBM-4H062] zebra_nhg_free: nhe 0x56091d890840 (70312519[2756/2762/2810]), refcnt 0
1380782 2023/08/16 22:34:11.995000 ZEBRA: [SCDBM-4H062] zebra_nhg_free: nhe 0x56091d890840 (0[]), refcnt 0
1380783 2023/08/16 22:34:11.995011 ZEBRA: lib/memory.c:84: mt_count_free(): assertion (mt->n_alloc) failed
Backtrace:
0 0x00007f833f5f48eb in raise () from /lib/x86_64-linux-gnu/libc.so.6
1 0x00007f833f5df535 in abort () from /lib/x86_64-linux-gnu/libc.so.6
2 0x00007f833f636648 in ?? () from /lib/x86_64-linux-gnu/libc.so.6
3 0x00007f833f63cd6a in ?? () from /lib/x86_64-linux-gnu/libc.so.6
4 0x00007f833f63cfb4 in ?? () from /lib/x86_64-linux-gnu/libc.so.6
5 0x00007f833f63fbc8 in ?? () from /lib/x86_64-linux-gnu/libc.so.6
6 0x00007f833f64172a in malloc () from /lib/x86_64-linux-gnu/libc.so.6
7 0x00007f833f6c3fd2 in backtrace_symbols () from /lib/x86_64-linux-gnu/libc.so.6
8 0x00007f833f9013fc in zlog_backtrace_sigsafe (priority=priority@entry=2, program_counter=program_counter@entry=0x7f833f5f48eb <raise+267>) at lib/log.c:222
9 0x00007f833f901593 in zlog_signal (signo=signo@entry=6, action=action@entry=0x7f833f988ee8 "aborting...", siginfo_v=siginfo_v@entry=0x7ffee1ce4a30,
program_counter=program_counter@entry=0x7f833f5f48eb <raise+267>) at lib/log.c:154
10 0x00007f833f92dbd1 in core_handler (signo=6, siginfo=0x7ffee1ce4a30, context=<optimized out>) at lib/sigevent.c:254
11 <signal handler called>
12 0x00007f833f5f48eb in raise () from /lib/x86_64-linux-gnu/libc.so.6
13 0x00007f833f5df535 in abort () from /lib/x86_64-linux-gnu/libc.so.6
14 0x00007f833f958f96 in _zlog_assert_failed (xref=xref@entry=0x7f833f9e4080 <_xref.10705>, extra=extra@entry=0x0) at lib/zlog.c:680
15 0x00007f833f905400 in mt_count_free (mt=0x7f833fa02800 <MTYPE_NH_LABEL>, ptr=0x51) at lib/memory.c:84
16 mt_count_free (ptr=0x51, mt=0x7f833fa02800 <MTYPE_NH_LABEL>) at lib/memory.c:80
17 qfree (mt=0x7f833fa02800 <MTYPE_NH_LABEL>, ptr=0x51) at lib/memory.c:140
18 0x00007f833f90799c in nexthop_del_labels (nexthop=nexthop@entry=0x56091d776640) at lib/nexthop.c:563
19 0x00007f833f907b91 in nexthop_free (nexthop=0x56091d776640) at lib/nexthop.c:393
20 0x00007f833f907be8 in nexthops_free (nexthop=<optimized out>) at lib/nexthop.c:408
21 0x000056091c21aa76 in zebra_nhg_free_members (nhe=0x56091d890840) at zebra/zebra_nhg.c:1628
22 zebra_nhg_free (nhe=0x56091d890840) at zebra/zebra_nhg.c:1628
23 0x000056091c21bab2 in zebra_nhg_proto_add (id=<optimized out>, type=9, instance=<optimized out>, session=0, nhg=nhg@entry=0x56091d7da028, afi=afi@entry=AFI_UNSPEC)
at zebra/zebra_nhg.c:3532
24 0x000056091c22bc4e in process_subq_nhg (lnode=0x56091d88c540) at zebra/zebra_rib.c:2689
25 process_subq (qindex=META_QUEUE_NHG, subq=0x56091d24cea0) at zebra/zebra_rib.c:3290
26 meta_queue_process (dummy=<optimized out>, data=0x56091d24d4c0) at zebra/zebra_rib.c:3343
27 0x00007f833f9492c8 in work_queue_run (thread=0x7ffee1ce55a0) at lib/workqueue.c:285
28 0x00007f833f93f60d in thread_call (thread=thread@entry=0x7ffee1ce55a0) at lib/thread.c:2008
29 0x00007f833f8f9888 in frr_run (master=0x56091d068660) at lib/libfrr.c:1223
30 0x000056091c1b8366 in main (argc=12, argv=0x7ffee1ce5988) at zebra/main.c:551
Issue: 3492162
Ticket# 3492162
Signed-off-by: Chirag Shah <chirag@nvidia.com>
Signed-off-by: Rajasekar Raja <rajasekarr@nvidia.com>
2023-08-17 09:47:05 +02:00
|
|
|
int ret = 0;
|
2020-05-20 21:47:12 +02:00
|
|
|
|
|
|
|
if (!nhg->nexthop) {
|
|
|
|
if (IS_ZEBRA_DEBUG_NHG)
|
|
|
|
zlog_debug("%s: id %u, no nexthops passed to add",
|
|
|
|
__func__, id);
|
|
|
|
return NULL;
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
/* Set nexthop list as active, since they wont go through rib
|
|
|
|
* processing.
|
|
|
|
*
|
|
|
|
* Assuming valid/onlink for now.
|
|
|
|
*
|
|
|
|
* Once resolution is figured out, we won't need this!
|
|
|
|
*/
|
2020-06-11 19:46:48 +02:00
|
|
|
for (ALL_NEXTHOPS_PTR(nhg, newhop)) {
|
2020-09-25 19:48:21 +02:00
|
|
|
if (CHECK_FLAG(newhop->flags, NEXTHOP_FLAG_HAS_BACKUP)) {
|
|
|
|
if (IS_ZEBRA_DEBUG_NHG)
|
|
|
|
zlog_debug(
|
|
|
|
"%s: id %u, backup nexthops not supported",
|
|
|
|
__func__, id);
|
|
|
|
return NULL;
|
|
|
|
}
|
|
|
|
|
2020-08-03 20:34:52 +02:00
|
|
|
if (newhop->type == NEXTHOP_TYPE_BLACKHOLE) {
|
|
|
|
if (IS_ZEBRA_DEBUG_NHG)
|
|
|
|
zlog_debug(
|
|
|
|
"%s: id %u, blackhole nexthop not supported",
|
|
|
|
__func__, id);
|
|
|
|
return NULL;
|
|
|
|
}
|
|
|
|
|
|
|
|
if (newhop->type == NEXTHOP_TYPE_IFINDEX) {
|
|
|
|
if (IS_ZEBRA_DEBUG_NHG)
|
|
|
|
zlog_debug(
|
|
|
|
"%s: id %u, nexthop without gateway not supported",
|
|
|
|
__func__, id);
|
|
|
|
return NULL;
|
|
|
|
}
|
|
|
|
|
2020-06-11 19:46:48 +02:00
|
|
|
if (!newhop->ifindex) {
|
|
|
|
if (IS_ZEBRA_DEBUG_NHG)
|
|
|
|
zlog_debug(
|
2020-08-03 20:34:52 +02:00
|
|
|
"%s: id %u, nexthop without ifindex is not supported",
|
2020-06-11 19:46:48 +02:00
|
|
|
__func__, id);
|
|
|
|
return NULL;
|
|
|
|
}
|
2020-05-20 21:47:12 +02:00
|
|
|
SET_FLAG(newhop->flags, NEXTHOP_FLAG_ACTIVE);
|
2020-06-11 19:46:48 +02:00
|
|
|
}
|
2020-05-05 21:57:35 +02:00
|
|
|
|
|
|
|
zebra_nhe_init(&lookup, afi, nhg->nexthop);
|
|
|
|
lookup.nhg.nexthop = nhg->nexthop;
|
2022-10-24 15:49:40 +02:00
|
|
|
lookup.nhg.nhgr = nhg->nhgr;
|
2020-05-05 21:57:35 +02:00
|
|
|
lookup.id = id;
|
|
|
|
lookup.type = type;
|
|
|
|
|
2020-05-10 22:36:49 +02:00
|
|
|
old = zebra_nhg_lookup_id(id);
|
|
|
|
|
|
|
|
if (old) {
|
|
|
|
/*
|
|
|
|
* This is a replace, just release NHE from ID for now, The
|
2020-10-23 00:09:44 +02:00
|
|
|
* depends/dependents may still be used in the replacement so
|
|
|
|
* we don't touch them other than to remove their refs to their
|
|
|
|
* old parent.
|
2020-05-10 22:36:49 +02:00
|
|
|
*/
|
2020-05-20 21:47:12 +02:00
|
|
|
replace = true;
|
2020-05-10 22:36:49 +02:00
|
|
|
hash_release(zrouter.nhgs_id, old);
|
2020-10-23 00:09:44 +02:00
|
|
|
|
|
|
|
/* Free all the things */
|
|
|
|
zebra_nhg_release_all_deps(old);
|
2020-05-10 22:36:49 +02:00
|
|
|
}
|
|
|
|
|
2020-05-05 21:57:35 +02:00
|
|
|
new = zebra_nhg_rib_find_nhe(&lookup, afi);
|
|
|
|
|
2020-05-20 21:47:12 +02:00
|
|
|
zebra_nhg_increment_ref(new);
|
|
|
|
|
2021-01-21 16:12:05 +01:00
|
|
|
/* Capture zapi client info */
|
|
|
|
new->zapi_instance = instance;
|
|
|
|
new->zapi_session = session;
|
|
|
|
|
2020-05-20 21:47:12 +02:00
|
|
|
zebra_nhg_set_valid_if_active(new);
|
|
|
|
|
2024-08-29 17:29:55 +02:00
|
|
|
zebra_nhg_install_kernel(new, ZEBRA_ROUTE_MAX);
|
2020-05-20 21:47:12 +02:00
|
|
|
|
2020-05-10 22:36:49 +02:00
|
|
|
if (old) {
|
2020-07-22 19:45:47 +02:00
|
|
|
/*
|
|
|
|
* Check to handle recving DEL while routes still in use then
|
|
|
|
* a replace.
|
|
|
|
*
|
|
|
|
* In this case we would have decremented the refcnt already
|
|
|
|
* but set the FLAG here. Go ahead and increment once to fix
|
|
|
|
* the misordering we have been sent.
|
|
|
|
*/
|
|
|
|
if (CHECK_FLAG(old->flags, NEXTHOP_GROUP_PROTO_RELEASED))
|
|
|
|
zebra_nhg_increment_ref(old);
|
|
|
|
|
zebra: Fix zebra crash when replacing NHE during shutdown
During replace of a NHE from upper proto in zebra_nhg_proto_add(),
- rib_handle_nhg_replace() is invoked with old NHE where we walk all
RNs/REs & replace the re->nhe whose address points to old NHE.
- In this walk, if prev re->nhe refcnt is decremented to 0, we free up
the memory which the old NHE is pointing to.
Later in zebra_nhg_proto_add(), we end up accessing this freed memory
and crash.
Logs:
1380766 2023/08/16 22:34:11.994671 ZEBRA: [WDEB1-93HCZ] zebra_nhg_decrement_ref: nhe 0x56091d890840 (70312519[2756/2762/2810]) 2 => 1
1380773 2023/08/16 22:34:11.994678 ZEBRA: [WDEB1-93HCZ] zebra_nhg_decrement_ref: nhe 0x56091d890840 (70312519[2756/2762/2810]) 1 => 0
1380777 2023/08/16 22:34:11.994844 ZEBRA: [JE46R-G2NEE] zebra_nhg_release: nhe 0x56091d890840 (70312519[2756/2762/2810])
1380778 2023/08/16 22:34:11.994849 ZEBRA: [SCDBM-4H062] zebra_nhg_free: nhe 0x56091d890840 (70312519[2756/2762/2810]), refcnt 0
1380782 2023/08/16 22:34:11.995000 ZEBRA: [SCDBM-4H062] zebra_nhg_free: nhe 0x56091d890840 (0[]), refcnt 0
1380783 2023/08/16 22:34:11.995011 ZEBRA: lib/memory.c:84: mt_count_free(): assertion (mt->n_alloc) failed
Backtrace:
0 0x00007f833f5f48eb in raise () from /lib/x86_64-linux-gnu/libc.so.6
1 0x00007f833f5df535 in abort () from /lib/x86_64-linux-gnu/libc.so.6
2 0x00007f833f636648 in ?? () from /lib/x86_64-linux-gnu/libc.so.6
3 0x00007f833f63cd6a in ?? () from /lib/x86_64-linux-gnu/libc.so.6
4 0x00007f833f63cfb4 in ?? () from /lib/x86_64-linux-gnu/libc.so.6
5 0x00007f833f63fbc8 in ?? () from /lib/x86_64-linux-gnu/libc.so.6
6 0x00007f833f64172a in malloc () from /lib/x86_64-linux-gnu/libc.so.6
7 0x00007f833f6c3fd2 in backtrace_symbols () from /lib/x86_64-linux-gnu/libc.so.6
8 0x00007f833f9013fc in zlog_backtrace_sigsafe (priority=priority@entry=2, program_counter=program_counter@entry=0x7f833f5f48eb <raise+267>) at lib/log.c:222
9 0x00007f833f901593 in zlog_signal (signo=signo@entry=6, action=action@entry=0x7f833f988ee8 "aborting...", siginfo_v=siginfo_v@entry=0x7ffee1ce4a30,
program_counter=program_counter@entry=0x7f833f5f48eb <raise+267>) at lib/log.c:154
10 0x00007f833f92dbd1 in core_handler (signo=6, siginfo=0x7ffee1ce4a30, context=<optimized out>) at lib/sigevent.c:254
11 <signal handler called>
12 0x00007f833f5f48eb in raise () from /lib/x86_64-linux-gnu/libc.so.6
13 0x00007f833f5df535 in abort () from /lib/x86_64-linux-gnu/libc.so.6
14 0x00007f833f958f96 in _zlog_assert_failed (xref=xref@entry=0x7f833f9e4080 <_xref.10705>, extra=extra@entry=0x0) at lib/zlog.c:680
15 0x00007f833f905400 in mt_count_free (mt=0x7f833fa02800 <MTYPE_NH_LABEL>, ptr=0x51) at lib/memory.c:84
16 mt_count_free (ptr=0x51, mt=0x7f833fa02800 <MTYPE_NH_LABEL>) at lib/memory.c:80
17 qfree (mt=0x7f833fa02800 <MTYPE_NH_LABEL>, ptr=0x51) at lib/memory.c:140
18 0x00007f833f90799c in nexthop_del_labels (nexthop=nexthop@entry=0x56091d776640) at lib/nexthop.c:563
19 0x00007f833f907b91 in nexthop_free (nexthop=0x56091d776640) at lib/nexthop.c:393
20 0x00007f833f907be8 in nexthops_free (nexthop=<optimized out>) at lib/nexthop.c:408
21 0x000056091c21aa76 in zebra_nhg_free_members (nhe=0x56091d890840) at zebra/zebra_nhg.c:1628
22 zebra_nhg_free (nhe=0x56091d890840) at zebra/zebra_nhg.c:1628
23 0x000056091c21bab2 in zebra_nhg_proto_add (id=<optimized out>, type=9, instance=<optimized out>, session=0, nhg=nhg@entry=0x56091d7da028, afi=afi@entry=AFI_UNSPEC)
at zebra/zebra_nhg.c:3532
24 0x000056091c22bc4e in process_subq_nhg (lnode=0x56091d88c540) at zebra/zebra_rib.c:2689
25 process_subq (qindex=META_QUEUE_NHG, subq=0x56091d24cea0) at zebra/zebra_rib.c:3290
26 meta_queue_process (dummy=<optimized out>, data=0x56091d24d4c0) at zebra/zebra_rib.c:3343
27 0x00007f833f9492c8 in work_queue_run (thread=0x7ffee1ce55a0) at lib/workqueue.c:285
28 0x00007f833f93f60d in thread_call (thread=thread@entry=0x7ffee1ce55a0) at lib/thread.c:2008
29 0x00007f833f8f9888 in frr_run (master=0x56091d068660) at lib/libfrr.c:1223
30 0x000056091c1b8366 in main (argc=12, argv=0x7ffee1ce5988) at zebra/main.c:551
Issue: 3492162
Ticket# 3492162
Signed-off-by: Chirag Shah <chirag@nvidia.com>
Signed-off-by: Rajasekar Raja <rajasekarr@nvidia.com>
2023-08-17 09:47:05 +02:00
|
|
|
ret = rib_handle_nhg_replace(old, new);
|
|
|
|
if (ret)
|
|
|
|
/*
|
|
|
|
* if ret > 0, some previous re->nhe has freed the
|
|
|
|
* address to which old_entry is pointing. Hence mark
|
|
|
|
* the old NHE as NULL
|
|
|
|
*/
|
|
|
|
old = NULL;
|
|
|
|
else {
|
|
|
|
/* We have to decrement its singletons
|
|
|
|
* because some might not exist in NEW.
|
|
|
|
*/
|
|
|
|
if (!zebra_nhg_depends_is_empty(old)) {
|
|
|
|
frr_each (nhg_connected_tree, &old->nhg_depends,
|
|
|
|
rb_node_dep)
|
|
|
|
zebra_nhg_decrement_ref(
|
|
|
|
rb_node_dep->nhe);
|
|
|
|
}
|
2020-05-10 22:36:49 +02:00
|
|
|
|
zebra: Fix zebra crash when replacing NHE during shutdown
During replace of a NHE from upper proto in zebra_nhg_proto_add(),
- rib_handle_nhg_replace() is invoked with old NHE where we walk all
RNs/REs & replace the re->nhe whose address points to old NHE.
- In this walk, if prev re->nhe refcnt is decremented to 0, we free up
the memory which the old NHE is pointing to.
Later in zebra_nhg_proto_add(), we end up accessing this freed memory
and crash.
Logs:
1380766 2023/08/16 22:34:11.994671 ZEBRA: [WDEB1-93HCZ] zebra_nhg_decrement_ref: nhe 0x56091d890840 (70312519[2756/2762/2810]) 2 => 1
1380773 2023/08/16 22:34:11.994678 ZEBRA: [WDEB1-93HCZ] zebra_nhg_decrement_ref: nhe 0x56091d890840 (70312519[2756/2762/2810]) 1 => 0
1380777 2023/08/16 22:34:11.994844 ZEBRA: [JE46R-G2NEE] zebra_nhg_release: nhe 0x56091d890840 (70312519[2756/2762/2810])
1380778 2023/08/16 22:34:11.994849 ZEBRA: [SCDBM-4H062] zebra_nhg_free: nhe 0x56091d890840 (70312519[2756/2762/2810]), refcnt 0
1380782 2023/08/16 22:34:11.995000 ZEBRA: [SCDBM-4H062] zebra_nhg_free: nhe 0x56091d890840 (0[]), refcnt 0
1380783 2023/08/16 22:34:11.995011 ZEBRA: lib/memory.c:84: mt_count_free(): assertion (mt->n_alloc) failed
Backtrace:
0 0x00007f833f5f48eb in raise () from /lib/x86_64-linux-gnu/libc.so.6
1 0x00007f833f5df535 in abort () from /lib/x86_64-linux-gnu/libc.so.6
2 0x00007f833f636648 in ?? () from /lib/x86_64-linux-gnu/libc.so.6
3 0x00007f833f63cd6a in ?? () from /lib/x86_64-linux-gnu/libc.so.6
4 0x00007f833f63cfb4 in ?? () from /lib/x86_64-linux-gnu/libc.so.6
5 0x00007f833f63fbc8 in ?? () from /lib/x86_64-linux-gnu/libc.so.6
6 0x00007f833f64172a in malloc () from /lib/x86_64-linux-gnu/libc.so.6
7 0x00007f833f6c3fd2 in backtrace_symbols () from /lib/x86_64-linux-gnu/libc.so.6
8 0x00007f833f9013fc in zlog_backtrace_sigsafe (priority=priority@entry=2, program_counter=program_counter@entry=0x7f833f5f48eb <raise+267>) at lib/log.c:222
9 0x00007f833f901593 in zlog_signal (signo=signo@entry=6, action=action@entry=0x7f833f988ee8 "aborting...", siginfo_v=siginfo_v@entry=0x7ffee1ce4a30,
program_counter=program_counter@entry=0x7f833f5f48eb <raise+267>) at lib/log.c:154
10 0x00007f833f92dbd1 in core_handler (signo=6, siginfo=0x7ffee1ce4a30, context=<optimized out>) at lib/sigevent.c:254
11 <signal handler called>
12 0x00007f833f5f48eb in raise () from /lib/x86_64-linux-gnu/libc.so.6
13 0x00007f833f5df535 in abort () from /lib/x86_64-linux-gnu/libc.so.6
14 0x00007f833f958f96 in _zlog_assert_failed (xref=xref@entry=0x7f833f9e4080 <_xref.10705>, extra=extra@entry=0x0) at lib/zlog.c:680
15 0x00007f833f905400 in mt_count_free (mt=0x7f833fa02800 <MTYPE_NH_LABEL>, ptr=0x51) at lib/memory.c:84
16 mt_count_free (ptr=0x51, mt=0x7f833fa02800 <MTYPE_NH_LABEL>) at lib/memory.c:80
17 qfree (mt=0x7f833fa02800 <MTYPE_NH_LABEL>, ptr=0x51) at lib/memory.c:140
18 0x00007f833f90799c in nexthop_del_labels (nexthop=nexthop@entry=0x56091d776640) at lib/nexthop.c:563
19 0x00007f833f907b91 in nexthop_free (nexthop=0x56091d776640) at lib/nexthop.c:393
20 0x00007f833f907be8 in nexthops_free (nexthop=<optimized out>) at lib/nexthop.c:408
21 0x000056091c21aa76 in zebra_nhg_free_members (nhe=0x56091d890840) at zebra/zebra_nhg.c:1628
22 zebra_nhg_free (nhe=0x56091d890840) at zebra/zebra_nhg.c:1628
23 0x000056091c21bab2 in zebra_nhg_proto_add (id=<optimized out>, type=9, instance=<optimized out>, session=0, nhg=nhg@entry=0x56091d7da028, afi=afi@entry=AFI_UNSPEC)
at zebra/zebra_nhg.c:3532
24 0x000056091c22bc4e in process_subq_nhg (lnode=0x56091d88c540) at zebra/zebra_rib.c:2689
25 process_subq (qindex=META_QUEUE_NHG, subq=0x56091d24cea0) at zebra/zebra_rib.c:3290
26 meta_queue_process (dummy=<optimized out>, data=0x56091d24d4c0) at zebra/zebra_rib.c:3343
27 0x00007f833f9492c8 in work_queue_run (thread=0x7ffee1ce55a0) at lib/workqueue.c:285
28 0x00007f833f93f60d in thread_call (thread=thread@entry=0x7ffee1ce55a0) at lib/thread.c:2008
29 0x00007f833f8f9888 in frr_run (master=0x56091d068660) at lib/libfrr.c:1223
30 0x000056091c1b8366 in main (argc=12, argv=0x7ffee1ce5988) at zebra/main.c:551
Issue: 3492162
Ticket# 3492162
Signed-off-by: Chirag Shah <chirag@nvidia.com>
Signed-off-by: Rajasekar Raja <rajasekarr@nvidia.com>
2023-08-17 09:47:05 +02:00
|
|
|
/* Dont call the dec API, we dont want to uninstall the ID */
|
|
|
|
old->refcnt = 0;
|
|
|
|
EVENT_OFF(old->timer);
|
|
|
|
zebra_nhg_free(old);
|
|
|
|
old = NULL;
|
2020-05-20 21:47:12 +02:00
|
|
|
}
|
2020-05-05 21:57:35 +02:00
|
|
|
}
|
|
|
|
|
|
|
|
if (IS_ZEBRA_DEBUG_NHG_DETAIL)
|
2020-05-10 22:36:49 +02:00
|
|
|
zlog_debug("%s: %s nhe %p (%u), vrf %d, type %s", __func__,
|
2020-05-20 21:47:12 +02:00
|
|
|
(replace ? "replaced" : "added"), new, new->id,
|
2020-05-10 22:36:49 +02:00
|
|
|
new->vrf_id, zebra_route_string(new->type));
|
2020-05-05 21:57:35 +02:00
|
|
|
|
|
|
|
return new;
|
|
|
|
}
|
|
|
|
|
2020-05-20 21:47:12 +02:00
|
|
|
/* Delete NHE from upper level proto, caller must decrement ref */
|
2020-09-15 19:42:49 +02:00
|
|
|
struct nhg_hash_entry *zebra_nhg_proto_del(uint32_t id, int type)
|
2020-05-05 21:57:35 +02:00
|
|
|
{
|
|
|
|
struct nhg_hash_entry *nhe;
|
|
|
|
|
|
|
|
nhe = zebra_nhg_lookup_id(id);
|
|
|
|
|
|
|
|
if (!nhe) {
|
2020-07-22 19:45:47 +02:00
|
|
|
if (IS_ZEBRA_DEBUG_NHG)
|
2020-05-05 21:57:35 +02:00
|
|
|
zlog_debug("%s: id %u, lookup failed", __func__, id);
|
|
|
|
|
|
|
|
return NULL;
|
|
|
|
}
|
|
|
|
|
2020-09-15 19:42:49 +02:00
|
|
|
if (type != nhe->type) {
|
|
|
|
if (IS_ZEBRA_DEBUG_NHG)
|
|
|
|
zlog_debug(
|
|
|
|
"%s: id %u, type %s mismatch, sent by %s, ignoring",
|
|
|
|
__func__, id, zebra_route_string(nhe->type),
|
|
|
|
zebra_route_string(type));
|
|
|
|
return NULL;
|
|
|
|
}
|
|
|
|
|
2020-07-22 19:45:47 +02:00
|
|
|
if (CHECK_FLAG(nhe->flags, NEXTHOP_GROUP_PROTO_RELEASED)) {
|
|
|
|
if (IS_ZEBRA_DEBUG_NHG)
|
|
|
|
zlog_debug("%s: id %u, already released", __func__, id);
|
|
|
|
|
|
|
|
return NULL;
|
|
|
|
}
|
|
|
|
|
|
|
|
SET_FLAG(nhe->flags, NEXTHOP_GROUP_PROTO_RELEASED);
|
|
|
|
|
2020-05-20 21:47:12 +02:00
|
|
|
if (nhe->refcnt > 1) {
|
2020-05-05 21:57:35 +02:00
|
|
|
if (IS_ZEBRA_DEBUG_NHG)
|
2020-05-20 21:47:12 +02:00
|
|
|
zlog_debug(
|
2022-06-14 21:02:27 +02:00
|
|
|
"%s: %pNG, still being used by routes refcnt %u",
|
|
|
|
__func__, nhe, nhe->refcnt);
|
2020-05-24 22:08:36 +02:00
|
|
|
return nhe;
|
2020-05-05 21:57:35 +02:00
|
|
|
}
|
|
|
|
|
|
|
|
if (IS_ZEBRA_DEBUG_NHG_DETAIL)
|
2022-06-14 21:02:27 +02:00
|
|
|
zlog_debug("%s: deleted nhe %p (%pNG), vrf %d, type %s",
|
|
|
|
__func__, nhe, nhe, nhe->vrf_id,
|
2020-05-05 21:57:35 +02:00
|
|
|
zebra_route_string(nhe->type));
|
|
|
|
|
|
|
|
return nhe;
|
|
|
|
}
|
|
|
|
|
2020-05-20 21:41:18 +02:00
|
|
|
struct nhg_score_proto_iter {
|
|
|
|
int type;
|
2020-09-01 20:53:09 +02:00
|
|
|
struct list *found;
|
2020-05-20 21:41:18 +02:00
|
|
|
};
|
|
|
|
|
|
|
|
static void zebra_nhg_score_proto_entry(struct hash_bucket *bucket, void *arg)
|
|
|
|
{
|
|
|
|
struct nhg_hash_entry *nhe;
|
|
|
|
struct nhg_score_proto_iter *iter;
|
|
|
|
|
|
|
|
nhe = (struct nhg_hash_entry *)bucket->data;
|
|
|
|
iter = arg;
|
|
|
|
|
|
|
|
/* Needs to match type and outside zebra ID space */
|
2021-04-22 23:16:57 +02:00
|
|
|
if (nhe->type == iter->type && PROTO_OWNED(nhe)) {
|
2020-05-20 21:41:18 +02:00
|
|
|
if (IS_ZEBRA_DEBUG_NHG_DETAIL)
|
|
|
|
zlog_debug(
|
2022-06-14 21:02:27 +02:00
|
|
|
"%s: found nhe %p (%pNG), vrf %d, type %s after client disconnect",
|
|
|
|
__func__, nhe, nhe, nhe->vrf_id,
|
2020-05-20 21:41:18 +02:00
|
|
|
zebra_route_string(nhe->type));
|
|
|
|
|
2020-09-01 20:53:09 +02:00
|
|
|
/* Add to removal list */
|
|
|
|
listnode_add(iter->found, nhe);
|
2020-05-20 21:41:18 +02:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
/* Remove specific by proto NHGs */
|
|
|
|
unsigned long zebra_nhg_score_proto(int type)
|
|
|
|
{
|
2020-09-01 20:53:09 +02:00
|
|
|
struct nhg_hash_entry *nhe;
|
2020-05-20 21:41:18 +02:00
|
|
|
struct nhg_score_proto_iter iter = {};
|
2020-09-01 20:53:09 +02:00
|
|
|
struct listnode *ln;
|
|
|
|
unsigned long count;
|
2020-05-20 21:41:18 +02:00
|
|
|
|
|
|
|
iter.type = type;
|
2020-09-01 20:53:09 +02:00
|
|
|
iter.found = list_new();
|
2020-05-20 21:41:18 +02:00
|
|
|
|
2020-09-01 20:53:09 +02:00
|
|
|
/* Find matching entries to remove */
|
2020-05-20 21:41:18 +02:00
|
|
|
hash_iterate(zrouter.nhgs_id, zebra_nhg_score_proto_entry, &iter);
|
|
|
|
|
2020-09-01 20:53:09 +02:00
|
|
|
/* Now remove them */
|
|
|
|
for (ALL_LIST_ELEMENTS_RO(iter.found, ln, nhe)) {
|
|
|
|
/*
|
|
|
|
* This should be the last ref if we remove client routes too,
|
|
|
|
* and thus should remove and free them.
|
|
|
|
*/
|
zebra: fix nhe refcnt when frr service goes down
When frr.service is going down(restart or stop),
zebra core can be seen.
Sequence of events leading to crash:
Increments of nhe refcnt:
- Upper level creates a new nhe(say NHE1) —> nhe->refcnt=1
- Two RE’s (Say RE1 & RE2) associate with NHE1 —> nhe->refcnt = 3
Decrements of nhe refcnt:
- BGP sends a zapi msg to zebra to delete NHG. —> nhe->refcnt = 2
- RE1 is queued for delete in META-Q
- As zebra is dissociating with its clients, zebra_nhg_score_proto() is
invoked -> nhe->refcnt=1
- RE2 is no more associated with the NHE1 —>nhe->refcnt=0 &
hence NHE IS FREED
- Now RE1 is dequeued from META-Q for processing the re delete. —> At
this point re->nhe is pointing to freed pointer. CRASH CRASH!!!!
Fix:
- When we iterate zebra_nhg_score_proto_entry() to delete the upper
proto specific nhe’s, we need to skip the additional nhe->refcnt
decrement in case nhe->flags has NEXTHOP_GROUP_PROTO_RELEASED set.
Backtrace-1
0x00007fa8449ce8eb in raise () from /lib/x86_64-linux-gnu/libc.so.6
0x00007fa8449b9535 in abort () from /lib/x86_64-linux-gnu/libc.so.6
0x00007fa844d32f86 in _zlog_assert_failed (xref=xref@entry=0x55fa37871040 <_xref.28142>, extra=extra@entry=0x0) at lib/zlog.c:680
0x000055fa3778f770 in rib_re_nhg_free (re=0x55fa39e33770) at zebra/zebra_rib.c:2578
rib_unlink (rn=0x55fa39e27a60, re=0x55fa39e33770) at zebra/zebra_rib.c:3930
0x000055fa3778ff18 in rib_process (rn=0x55fa39e27a60) at zebra/zebra_rib.c:1439
0x000055fa37790b1c in process_subq_route (qindex=8 '\b', lnode=0x55fa39e1c1b0) at zebra/zebra_rib.c:2549
process_subq (qindex=META_QUEUE_BGP, subq=0x55fa3999c580) at zebra/zebra_rib.c:3107
meta_queue_process (dummy=<optimized out>, data=0x55fa3999c480) at zebra/zebra_rib.c:3146
0x00007fa844d232b8 in work_queue_run (thread=0x7ffffbdf6cb0) at lib/workqueue.c:285
0x00007fa844d195fd in thread_call (thread=thread@entry=0x7ffffbdf6cb0) at lib/thread.c:2008
0x00007fa844cd3888 in frr_run (master=0x55fa397b7630) at lib/libfrr.c:1223
0x000055fa3771e294 in main (argc=12, argv=0x7ffffbdf7098) at zebra/main.c:526
Backtrace-2
0x00007f125af3f535 in abort () from /lib/x86_64-linux-gnu/libc.so.6
0x00007f125b2b8f96 in _zlog_assert_failed (xref=xref@entry=0x7f125b344260 <_xref.18768>, extra=extra@entry=0x0) at lib/zlog.c:680
0x00007f125b268190 in nexthop_copy_no_recurse (copy=copy@entry=0x5606dd726f10, nexthop=nexthop@entry=0x7f125b0d7f90, rparent=<optimized out>) at lib/nexthop.c:806
0x00007f125b2681b2 in nexthop_copy (copy=0x5606dd726f10, nexthop=0x7f125b0d7f90, rparent=<optimized out>) at lib/nexthop.c:836
0x00007f125b268249 in nexthop_dup (nexthop=nexthop@entry=0x7f125b0d7f90, rparent=rparent@entry=0x0) at lib/nexthop.c:860
0x00007f125b26b67b in copy_nexthops (tnh=tnh@entry=0x5606dd9ec748, nh=<optimized out>, rparent=rparent@entry=0x0) at lib/nexthop_group.c:457
0x00007f125b26b6ba in nexthop_group_copy (to=to@entry=0x5606dd9ec748, from=from@entry=0x5606dd9ee9f8) at lib/nexthop_group.c:291
0x00005606db6ec678 in zebra_nhe_copy (orig=0x5606dd9ee9d0, id=id@entry=0) at zebra/zebra_nhg.c:431
0x00005606db6ddc63 in mpls_ftn_uninstall_all (zvrf=zvrf@entry=0x5606dd6e7cd0, afi=afi@entry=2, lsp_type=ZEBRA_LSP_NONE) at zebra/zebra_mpls.c:3410
0x00005606db6de108 in zebra_mpls_cleanup_zclient_labels (client=0x5606dd8e03b0) at ./zebra/zebra_mpls.h:471
0x00005606db73e575 in hook_call_zserv_client_close (client=0x5606dd8e03b0) at zebra/zserv.c:566
zserv_client_free (client=0x5606dd8e03b0) at zebra/zserv.c:585
zserv_close_client (client=0x5606dd8e03b0) at zebra/zserv.c:706
0x00007f125b29f60d in thread_call (thread=thread@entry=0x7ffc2a740290) at lib/thread.c:2008
0x00007f125b259888 in frr_run (master=0x5606dd3b7630) at lib/libfrr.c:1223
0x00005606db68d298 in main (argc=12, argv=0x7ffc2a740678) at zebra/main.c:534
Issue: 3492031
Ticket# 3492031
Signed-off-by: Rajasekar Raja <rajasekarr@nvidia.com>
2023-07-23 07:43:12 +02:00
|
|
|
if (!CHECK_FLAG(nhe->flags, NEXTHOP_GROUP_PROTO_RELEASED))
|
|
|
|
zebra_nhg_decrement_ref(nhe);
|
|
|
|
else {
|
|
|
|
|
|
|
|
/* protocol sends explicit delete of nhg, the
|
|
|
|
* nhe->refcount is decremented in zread_nhg_del()
|
|
|
|
*/
|
|
|
|
if (IS_ZEBRA_DEBUG_RIB_DETAILED)
|
|
|
|
zlog_debug(
|
|
|
|
"%s: nhe %u (%p) refcount %u already decremented in zread_nhg_del",
|
|
|
|
__func__, nhe->id, nhe, nhe->refcnt);
|
|
|
|
}
|
2020-09-01 20:53:09 +02:00
|
|
|
}
|
|
|
|
|
|
|
|
count = iter.found->count;
|
|
|
|
list_delete(&iter.found);
|
|
|
|
|
|
|
|
return count;
|
2020-05-20 21:41:18 +02:00
|
|
|
}
|
2022-06-14 20:45:01 +02:00
|
|
|
|
|
|
|
printfrr_ext_autoreg_p("NG", printfrr_nhghe);
|
|
|
|
static ssize_t printfrr_nhghe(struct fbuf *buf, struct printfrr_eargs *ea,
|
|
|
|
const void *ptr)
|
|
|
|
{
|
|
|
|
const struct nhg_hash_entry *nhe = ptr;
|
|
|
|
const struct nhg_connected *dep;
|
|
|
|
ssize_t ret = 0;
|
|
|
|
|
|
|
|
if (!nhe)
|
|
|
|
return bputs(buf, "[NULL]");
|
|
|
|
|
|
|
|
ret += bprintfrr(buf, "%u[", nhe->id);
|
|
|
|
if (nhe->ifp)
|
|
|
|
ret += printfrr_nhs(buf, nhe->nhg.nexthop);
|
|
|
|
else {
|
|
|
|
int count = zebra_nhg_depends_count(nhe);
|
|
|
|
|
|
|
|
frr_each (nhg_connected_tree_const, &nhe->nhg_depends, dep) {
|
|
|
|
ret += bprintfrr(buf, "%u", dep->nhe->id);
|
|
|
|
if (count > 1)
|
|
|
|
ret += bputs(buf, "/");
|
|
|
|
count--;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
ret += bputs(buf, "]");
|
|
|
|
return ret;
|
|
|
|
}
|
2023-04-19 20:35:25 +02:00
|
|
|
|
|
|
|
/*
|
|
|
|
* On interface add the nexthop that resolves to this intf needs
|
|
|
|
* a re-install. There are following scenarios when the nexthop group update
|
|
|
|
* gets skipped:
|
|
|
|
* 1. When upper level protocol sends removal of NHG, there is
|
|
|
|
* timer running to keep NHG for 180 seconds, during this interval, same route
|
|
|
|
* with same set of nexthops installation is given , the same NHG is used
|
|
|
|
* but since NHG is not reinstalled on interface address add, it is not aware
|
|
|
|
* in Dplan/Kernel.
|
|
|
|
* 2. Due to a quick port flap due to interface add and delete
|
|
|
|
* to be processed in same queue one after another. Zebra believes that
|
|
|
|
* there is no change in nhg in this case. Hence this re-install will
|
|
|
|
* make sure the nexthop group gets updated to Dplan/Kernel.
|
|
|
|
*/
|
|
|
|
void zebra_interface_nhg_reinstall(struct interface *ifp)
|
|
|
|
{
|
|
|
|
struct nhg_connected *rb_node_dep = NULL;
|
|
|
|
struct zebra_if *zif = ifp->info;
|
|
|
|
struct nexthop *nh;
|
|
|
|
|
|
|
|
if (IS_ZEBRA_DEBUG_NHG_DETAIL)
|
|
|
|
zlog_debug(
|
|
|
|
"%s: Installing interface %s associated NHGs into kernel",
|
|
|
|
__func__, ifp->name);
|
|
|
|
|
|
|
|
frr_each (nhg_connected_tree, &zif->nhg_dependents, rb_node_dep) {
|
|
|
|
nh = rb_node_dep->nhe->nhg.nexthop;
|
|
|
|
if (zebra_nhg_set_valid_if_active(rb_node_dep->nhe)) {
|
|
|
|
if (IS_ZEBRA_DEBUG_NHG_DETAIL)
|
|
|
|
zlog_debug(
|
|
|
|
"%s: Setting the valid flag for nhe %pNG, interface: %s",
|
|
|
|
__func__, rb_node_dep->nhe, ifp->name);
|
|
|
|
}
|
2024-04-19 18:13:32 +02:00
|
|
|
|
2023-04-19 20:35:25 +02:00
|
|
|
/* Check for singleton NHG associated to interface */
|
2024-04-19 18:13:32 +02:00
|
|
|
if (!nexthop_is_blackhole(nh) &&
|
2023-04-29 04:09:55 +02:00
|
|
|
zebra_nhg_depends_is_empty(rb_node_dep->nhe)) {
|
|
|
|
struct nhg_connected *rb_node_dependent;
|
|
|
|
|
|
|
|
if (IS_ZEBRA_DEBUG_NHG)
|
|
|
|
zlog_debug(
|
|
|
|
"%s install nhe %pNG nh type %u flags 0x%x",
|
|
|
|
__func__, rb_node_dep->nhe, nh->type,
|
|
|
|
rb_node_dep->nhe->flags);
|
2024-08-29 17:29:55 +02:00
|
|
|
zebra_nhg_install_kernel(rb_node_dep->nhe,
|
|
|
|
ZEBRA_ROUTE_MAX);
|
2023-04-29 04:09:55 +02:00
|
|
|
|
2023-09-29 18:08:17 +02:00
|
|
|
/* Don't need to modify dependents if installed */
|
|
|
|
if (CHECK_FLAG(rb_node_dep->nhe->flags,
|
|
|
|
NEXTHOP_GROUP_INSTALLED))
|
|
|
|
continue;
|
|
|
|
|
|
|
|
/* mark dependent uninstalled; when interface associated
|
|
|
|
* singleton is installed, install dependent
|
2023-04-29 04:09:55 +02:00
|
|
|
*/
|
|
|
|
frr_each_safe (nhg_connected_tree,
|
|
|
|
&rb_node_dep->nhe->nhg_dependents,
|
|
|
|
rb_node_dependent) {
|
zebra: Reinstall nexthop when interface comes back up
If a interface down event caused a nexthop group to remove
one of the entries in the kernel, have it be reinstalled
when the interface comes back up. Mark the nexthop as
usable.
new behavior:
eva# show nexthop-group rib 181818168
ID: 181818168 (sharp)
RefCnt: 1
Uptime: 00:00:23
VRF: default(bad-value)
Valid, Installed
Depends: (35) (38) (44) (51)
via 192.168.99.33, dummy1 (vrf default), weight 1
via 192.168.100.33, dummy2 (vrf default), weight 1
via 192.168.101.33, dummy3 (vrf default), weight 1
via 192.168.102.33, dummy4 (vrf default), weight 1
eva# conf
eva(config)# int dummy3
eva(config-if)# shut
eva(config-if)# do show nexthop-group rib 181818168
ID: 181818168 (sharp)
RefCnt: 1
Uptime: 00:00:44
VRF: default(bad-value)
Depends: (35) (38) (44) (51)
via 192.168.99.33, dummy1 (vrf default), weight 1
via 192.168.100.33, dummy2 (vrf default), weight 1
via 192.168.101.33, dummy3 (vrf default) inactive, weight 1
via 192.168.102.33, dummy4 (vrf default), weight 1
eva(config-if)# no shut
eva(config-if)# do show nexthop-group rib 181818168
ID: 181818168 (sharp)
RefCnt: 1
Uptime: 00:00:53
VRF: default(bad-value)
Valid, Installed
Depends: (35) (38) (44) (51)
via 192.168.99.33, dummy1 (vrf default), weight 1
via 192.168.100.33, dummy2 (vrf default), weight 1
via 192.168.101.33, dummy3 (vrf default), weight 1
via 192.168.102.33, dummy4 (vrf default), weight 1
eva(config-if)# exit
eva(config)# exit
eva# exit
sharpd@eva ~/frr1 (master) [255]> ip nexthop show id 181818168
id 181818168 group 35/38/44/51 proto 194
sharpd@eva ~/frr1 (master)>
Signed-off-by: Donald Sharp <sharpd@nvidia.com>
2024-09-11 20:19:51 +02:00
|
|
|
struct nexthop *nhop_dependent =
|
|
|
|
rb_node_dependent->nhe->nhg.nexthop;
|
|
|
|
|
|
|
|
while (nhop_dependent &&
|
|
|
|
!nexthop_same(nhop_dependent, nh))
|
|
|
|
nhop_dependent = nhop_dependent->next;
|
|
|
|
|
|
|
|
if (nhop_dependent)
|
|
|
|
SET_FLAG(nhop_dependent->flags,
|
|
|
|
NEXTHOP_FLAG_ACTIVE);
|
|
|
|
|
2023-04-29 04:09:55 +02:00
|
|
|
if (IS_ZEBRA_DEBUG_NHG)
|
2024-02-08 18:32:26 +01:00
|
|
|
zlog_debug("%s dependent nhe %pNG Setting Reinstall flag",
|
|
|
|
__func__,
|
|
|
|
rb_node_dependent->nhe);
|
|
|
|
SET_FLAG(rb_node_dependent->nhe->flags,
|
|
|
|
NEXTHOP_GROUP_REINSTALL);
|
2023-04-29 04:09:55 +02:00
|
|
|
}
|
|
|
|
}
|
2023-04-19 20:35:25 +02:00
|
|
|
}
|
|
|
|
}
|