frr/zebra/zebra_ns.c

Ignoring revisions in .git-blame-ignore-revs. Click here to bypass and see the normal blame view.

449 lines
9.8 KiB
C
Raw Permalink Normal View History

// SPDX-License-Identifier: GPL-2.0-or-later
/* zebra NS Routines
* Copyright (C) 2016 Cumulus Networks, Inc.
* Donald Sharp
* Copyright (C) 2017/2018 6WIND
*/
#include "zebra.h"
#include "lib/ns.h"
#include "lib/vrf.h"
#include "lib/prefix.h"
#include "lib/memory.h"
#include "zebra_ns.h"
#include "zebra_vrf.h"
#include "rt.h"
#include "zebra_vxlan.h"
#include "debug.h"
#include "zebra_netns_notify.h"
#include "zebra_netns_id.h"
#include "zebra_pbr.h"
#include "zebra_tc.h"
#include "rib.h"
#include "table_manager.h"
zebra: default ns->info should be default zebra_ns We were not connecting the default zebra_ns to the default ns->info at namespace initialization in zebra. Thus, when we tried to use the `ns_walk_func()` it would ignore the default zebra_ns since there is no pointer to it from the ns struct. Fix this by connecting them in `zebra_ns_init()` and, if the default ns is not found, exit with failure since this is not recoverable. This was found during a crash where we fail to cancel the kernel_read thread at termination (via the `ns_walk_func()`) and then we get a netlink notification trying to use the zns struct that has already been freed. ``` (gdb) bt \#0 0x00007fc1134dc7bb in raise () from /lib/x86_64-linux-gnu/libc.so.6 \#1 0x00007fc1134c7535 in abort () from /lib/x86_64-linux-gnu/libc.so.6 \#2 0x00007fc113996f8f in core_handler (signo=11, siginfo=0x7ffe5429d070, context=<optimized out>) at lib/sigevent.c:254 \#3 <signal handler called> \#4 0x0000561880e15449 in if_lookup_by_index_per_ns (ns=0x0, ifindex=174) at zebra/interface.c:269 \#5 0x0000561880e1642c in if_up (ifp=ifp@entry=0x561883076c50) at zebra/interface.c:1043 \#6 0x0000561880e10723 in netlink_link_change (h=0x7ffe5429d8f0, ns_id=<optimized out>, startup=<optimized out>) at zebra/if_netlink.c:1384 \#7 0x0000561880e17e68 in netlink_parse_info (filter=filter@entry=0x561880e17680 <netlink_information_fetch>, nl=nl@entry=0x561882497238, zns=zns@entry=0x7ffe542a5940, count=count@entry=5, startup=startup@entry=0) at zebra/kernel_netlink.c:932 \#8 0x0000561880e186a5 in kernel_read (thread=<optimized out>) at zebra/kernel_netlink.c:406 \#9 0x00007fc1139a4416 in thread_call (thread=thread@entry=0x7ffe542a5b70) at lib/thread.c:1599 \#10 0x00007fc113974ef8 in frr_run (master=0x5618823c9510) at lib/libfrr.c:1024 \#11 0x0000561880e0b916 in main (argc=8, argv=0x7ffe542a5f78) at zebra/main.c:483 ``` Signed-off-by: Stephen Worley <sworley@cumulusnetworks.com>
2019-11-14 19:43:11 +01:00
#include "zebra_errors.h"
#include "zebra_dplane.h"
extern struct zebra_privs_t zserv_privs;
DEFINE_MTYPE_STATIC(ZEBRA, ZEBRA_NS, "Zebra Name Space");
DEFINE_MTYPE_STATIC(ZEBRA, ZNS_IFP, "Zebra NS Ifp");
static int ifp_tree_cmp(const struct ifp_tree_link *a, const struct ifp_tree_link *b);
DECLARE_RBTREE_UNIQ(ifp_tree, struct ifp_tree_link, link, ifp_tree_cmp);
static struct zebra_ns *dzns;
static int ifp_tree_cmp(const struct ifp_tree_link *a, const struct ifp_tree_link *b)
{
return (a->ifindex - b->ifindex);
}
/*
* Link an ifp into its parent NS
*/
void zebra_ns_link_ifp(struct zebra_ns *zns, struct interface *ifp)
{
struct zebra_if *zif;
struct ifp_tree_link *link, tlink = {};
zif = ifp->info;
assert(zif != NULL);
if (zif->ns_tree_link) {
assert(zif->ns_tree_link->zns == zns);
assert(zif->ns_tree_link->ifp == ifp);
return;
}
/* Lookup first - already linked? */
tlink.ifindex = ifp->ifindex;
link = ifp_tree_find(&zns->ifp_tree, &tlink);
if (link) {
assert(link->ifp == ifp);
return;
}
/* Allocate new linkage struct and add */
link = XCALLOC(MTYPE_ZNS_IFP, sizeof(struct ifp_tree_link));
link->ifp = ifp;
link->ifindex = ifp->ifindex;
link->zns = zns;
ifp_tree_add(&zns->ifp_tree, link);
zif->ns_tree_link = link;
}
/*
* Unlink an ifp from its parent NS (probably because the ifp is being deleted)
*/
void zebra_ns_unlink_ifp(struct interface *ifp)
{
struct zebra_if *zif;
struct ifp_tree_link *link;
struct zebra_ns *zns;
zif = ifp->info;
if (zif && zif->ns_tree_link) {
link = zif->ns_tree_link;
zns = link->zns;
ifp_tree_del(&zns->ifp_tree, link);
zif->ns_tree_link = NULL;
XFREE(MTYPE_ZNS_IFP, link);
}
}
/*
* ifp lookup apis
*/
struct interface *zebra_ns_lookup_ifp(struct zebra_ns *zns, uint32_t ifindex)
{
struct interface *ifp = NULL;
struct ifp_tree_link *link, tlink = {};
/* Init temp struct for lookup */
tlink.ifindex = ifindex;
link = ifp_tree_find(&zns->ifp_tree, &tlink);
if (link)
ifp = link->ifp;
return ifp;
}
static int lookup_ifp_name_cb(struct interface *ifp, void *arg);
struct ifp_name_ctx {
const char *ifname;
struct interface *ifp;
};
struct interface *zebra_ns_lookup_ifp_name(struct zebra_ns *zns, const char *ifname)
{
struct ifp_name_ctx ctx = {};
/* Hand context struct into walker function for use in its callback */
ctx.ifname = ifname;
zebra_ns_ifp_walk(zns, lookup_ifp_name_cb, &ctx);
return ctx.ifp;
}
static int lookup_ifp_name_cb(struct interface *ifp, void *arg)
{
struct ifp_name_ctx *pctx = arg;
if (strcmp(ifp->name, pctx->ifname) == 0) {
pctx->ifp = ifp;
return NS_WALK_STOP;
}
return NS_WALK_CONTINUE;
}
/* Iterate collection of ifps, calling application's callback. Callback uses
* return semantics from lib/ns.h: return NS_WALK_STOP to stop the iteration.
* Caller's 'arg' is included in each callback.
*/
int zebra_ns_ifp_walk(struct zebra_ns *zns,
int (*func)(struct interface *ifp, void *arg), void *arg)
{
struct ifp_tree_link *link;
int ret = NS_WALK_CONTINUE;
frr_each (ifp_tree, &zns->ifp_tree, link) {
ret = (func)(link->ifp, arg);
if (ret == NS_WALK_STOP)
break;
}
if (ret == NS_WALK_STOP)
return NS_WALK_STOP;
else
return NS_WALK_CONTINUE;
}
/*
* Walk all NSes, and all ifps for each NS.
*/
struct ns_ifp_walk_ctx {
int (*func)(struct interface *ifp, void *arg);
void *arg;
int ret;
};
static int ns_ifp_walker(struct ns *ns, void *in_param, void **unused);
void zebra_ns_ifp_walk_all(int (*func)(struct interface *ifp, void *arg), void *arg)
{
struct ns_ifp_walk_ctx ctx = {};
ctx.func = func;
ctx.arg = arg;
ns_walk_func(ns_ifp_walker, &ctx, NULL);
}
static int ns_ifp_walker(struct ns *ns, void *in_param, void **unused)
{
struct zebra_ns *zns;
struct ns_ifp_walk_ctx *ctx = in_param;
int ret = NS_WALK_CONTINUE;
zns = ns->info;
if (zns == NULL)
goto done;
ret = zebra_ns_ifp_walk(zns, ctx->func, ctx->arg);
done:
return ret;
}
static int zebra_ns_disable_internal(struct zebra_ns *zns, bool complete);
struct zebra_ns *zebra_ns_lookup(ns_id_t ns_id)
{
if (ns_id == NS_DEFAULT)
return dzns;
struct zebra_ns *info = (struct zebra_ns *)ns_info_lookup(ns_id);
return (info == NULL) ? dzns : info;
}
static int zebra_ns_new(struct ns *ns)
{
struct zebra_ns *zns;
zebra: default ns->info should be default zebra_ns We were not connecting the default zebra_ns to the default ns->info at namespace initialization in zebra. Thus, when we tried to use the `ns_walk_func()` it would ignore the default zebra_ns since there is no pointer to it from the ns struct. Fix this by connecting them in `zebra_ns_init()` and, if the default ns is not found, exit with failure since this is not recoverable. This was found during a crash where we fail to cancel the kernel_read thread at termination (via the `ns_walk_func()`) and then we get a netlink notification trying to use the zns struct that has already been freed. ``` (gdb) bt \#0 0x00007fc1134dc7bb in raise () from /lib/x86_64-linux-gnu/libc.so.6 \#1 0x00007fc1134c7535 in abort () from /lib/x86_64-linux-gnu/libc.so.6 \#2 0x00007fc113996f8f in core_handler (signo=11, siginfo=0x7ffe5429d070, context=<optimized out>) at lib/sigevent.c:254 \#3 <signal handler called> \#4 0x0000561880e15449 in if_lookup_by_index_per_ns (ns=0x0, ifindex=174) at zebra/interface.c:269 \#5 0x0000561880e1642c in if_up (ifp=ifp@entry=0x561883076c50) at zebra/interface.c:1043 \#6 0x0000561880e10723 in netlink_link_change (h=0x7ffe5429d8f0, ns_id=<optimized out>, startup=<optimized out>) at zebra/if_netlink.c:1384 \#7 0x0000561880e17e68 in netlink_parse_info (filter=filter@entry=0x561880e17680 <netlink_information_fetch>, nl=nl@entry=0x561882497238, zns=zns@entry=0x7ffe542a5940, count=count@entry=5, startup=startup@entry=0) at zebra/kernel_netlink.c:932 \#8 0x0000561880e186a5 in kernel_read (thread=<optimized out>) at zebra/kernel_netlink.c:406 \#9 0x00007fc1139a4416 in thread_call (thread=thread@entry=0x7ffe542a5b70) at lib/thread.c:1599 \#10 0x00007fc113974ef8 in frr_run (master=0x5618823c9510) at lib/libfrr.c:1024 \#11 0x0000561880e0b916 in main (argc=8, argv=0x7ffe542a5f78) at zebra/main.c:483 ``` Signed-off-by: Stephen Worley <sworley@cumulusnetworks.com>
2019-11-14 19:43:11 +01:00
if (!ns)
return -1;
if (IS_ZEBRA_DEBUG_EVENT)
zlog_info("ZNS %s with id %u (created)", ns->name, ns->ns_id);
zns = XCALLOC(MTYPE_ZEBRA_NS, sizeof(struct zebra_ns));
ns->info = zns;
zns->ns = ns;
zns->ns_id = ns->ns_id;
/* Do any needed per-NS data structure allocation. */
ifp_tree_init(&zns->ifp_tree);
return 0;
}
static int zebra_ns_delete(struct ns *ns)
{
struct zebra_ns *zns = (struct zebra_ns *)ns->info;
struct zebra_if *zif;
struct ifp_tree_link *link;
if (IS_ZEBRA_DEBUG_EVENT)
zlog_info("ZNS %s with id %u (deleted)", ns->name, ns->ns_id);
if (!zns)
return 0;
/* Clean up ifp tree */
while ((link = ifp_tree_pop(&zns->ifp_tree)) != NULL) {
zif = link->ifp->info;
zif->ns_tree_link = NULL;
XFREE(MTYPE_ZNS_IFP, link);
}
XFREE(MTYPE_ZEBRA_NS, ns->info);
return 0;
}
static int zebra_ns_enabled(struct ns *ns)
{
struct zebra_ns *zns = ns->info;
if (IS_ZEBRA_DEBUG_EVENT)
zlog_info("ZNS %s with id %u (enabled)", ns->name, ns->ns_id);
if (!zns)
return 0;
return zebra_ns_enable(ns->ns_id, (void **)&zns);
}
int zebra_ns_disabled(struct ns *ns)
{
struct zebra_ns *zns = ns->info;
if (IS_ZEBRA_DEBUG_EVENT)
zlog_info("ZNS %s with id %u (disabled)", ns->name, ns->ns_id);
if (!zns)
return 0;
return zebra_ns_disable_internal(zns, true);
}
void zebra_ns_startup_continue(struct zebra_dplane_ctx *ctx)
{
struct zebra_ns *zns = zebra_ns_lookup(dplane_ctx_get_ns_id(ctx));
enum zebra_dplane_startup_notifications spot;
if (!zns) {
zlog_err("%s: No Namespace associated with %u", __func__,
dplane_ctx_get_ns_id(ctx));
return;
}
spot = dplane_ctx_get_startup_spot(ctx);
switch (spot) {
case ZEBRA_DPLANE_INTERFACES_READ:
interface_list_tunneldump(zns);
break;
case ZEBRA_DPLANE_TUNNELS_READ:
interface_list_second(zns);
break;
case ZEBRA_DPLANE_ADDRESSES_READ:
route_read(zns);
vlan_read(zns);
kernel_read_pbr_rules(zns);
kernel_read_tc_qdisc(zns);
/*
* At this point FRR has requested and read a bunch
* of data from the dplane about initial state of
* the system. Zebra now needs to initialize
* the gr subsystem ( or the route sweeping
* subsystem ) to allow that to properly work.
* This must be done *immediately* after the
* load of all data from the underlying dplane.
*/
zebra_main_router_started();
break;
}
}
*: Handle VRF configuration when VRF gets inactivated and activated A VRF is active only when the corresponding VRF device is present in the kernel. However, when the kernel VRF device is removed, the VRF container in FRR should go away only if there is no user configuration for it. Otherwise, when the VRF device is created again so that the VRF becomes active, FRR cannot take the correct actions. Example configuration for the VRF includes static routes and EVPN L3 VNI. Note that a VRF is currently considered to be "configured" as soon as the operator has issued the "vrf <name>" command in FRR. Such a configured VRF is not deleted upon VRF device removal, it is only made inactive. A VRF that is "configured" can be deleted only upon operator action and only if the VRF has been deactivated i.e., the VRF device removed from the kernel. This is an existing restriction. To implement this change, the VRF disable and delete actions have been modified. Signed-off-by: Vivek Venkatraman <vivek@cumulusnetworks.com> Reviewed-by: Donald Sharp <sharpd@cumulusnetworks.com> Reviewed-by: Mitesh Kanjariya <mkanjariya@cumulusnetworks.com> Reviewed-by: Don Slice <dslice@cumulusnetworks.com> Ticket: CM-18553, CM-18918, CM-10139 Reviewed By: CCR-7022 Testing Done: 1. vrf and pim-vrf automation tests 2. Multiple VRF delete and readd (ifdown, ifup-with-depends) 3. FRR stop, start, restart 4. Networking restart 5. Configuration delete and readd Some of the above tests run in different sequences (manually).
2017-12-02 02:36:37 +01:00
/* Do global enable actions - open sockets, read kernel config etc. */
int zebra_ns_enable(ns_id_t ns_id, void **info)
{
struct zebra_ns *zns = (struct zebra_ns *)(*info);
zns->ns_id = ns_id;
kernel_init(zns);
zebra_dplane_ns_enable(zns, true);
interface_list(zns);
return 0;
}
/* Common handler for ns disable - this can be called during ns config,
* or during zebra shutdown.
*/
static int zebra_ns_disable_internal(struct zebra_ns *zns, bool complete)
{
zebra_dplane_ns_enable(zns, false /*Disable*/);
kernel_terminate(zns, complete);
zns->ns_id = NS_DEFAULT;
return 0;
}
/* During zebra shutdown, do partial cleanup while the async dataplane
* is still running.
*/
int zebra_ns_early_shutdown(struct ns *ns,
void *param_in __attribute__((unused)),
void **param_out __attribute__((unused)))
{
struct zebra_ns *zns = ns->info;
if (zns == NULL)
return 0;
zebra_ns_disable_internal(zns, false);
return NS_WALK_CONTINUE;
}
zebra: fix heap-use-after free on ns shutdown The following ASAN issue has been observed: > ERROR: AddressSanitizer: heap-use-after-free on address 0x6160000acba4 at pc 0x55910c5694d0 bp 0x7ffe3a8ac850 sp 0x7ffe3a8ac840 > READ of size 4 at 0x6160000acba4 thread T0 > #0 0x55910c5694cf in ctx_info_from_zns zebra/zebra_dplane.c:3315 > #1 0x55910c569696 in dplane_ctx_ns_init zebra/zebra_dplane.c:3331 > #2 0x55910c56bf61 in dplane_ctx_nexthop_init zebra/zebra_dplane.c:3680 > #3 0x55910c5711ca in dplane_nexthop_update_internal zebra/zebra_dplane.c:4490 > #4 0x55910c571c5c in dplane_nexthop_delete zebra/zebra_dplane.c:4717 > #5 0x55910c61e90e in zebra_nhg_uninstall_kernel zebra/zebra_nhg.c:3413 > #6 0x55910c615d8a in zebra_nhg_decrement_ref zebra/zebra_nhg.c:1919 > #7 0x55910c6404db in route_entry_update_nhe zebra/zebra_rib.c:454 > #8 0x55910c64c904 in rib_re_nhg_free zebra/zebra_rib.c:2822 > #9 0x55910c655be2 in rib_unlink zebra/zebra_rib.c:4212 > #10 0x55910c6430f9 in zebra_rtable_node_cleanup zebra/zebra_rib.c:968 > #11 0x7f26f275b8a9 in route_node_free lib/table.c:75 > #12 0x7f26f275bae4 in route_table_free lib/table.c:111 > #13 0x7f26f275b749 in route_table_finish lib/table.c:46 > #14 0x55910c65db17 in zebra_router_free_table zebra/zebra_router.c:191 > #15 0x55910c65dfb5 in zebra_router_terminate zebra/zebra_router.c:244 > #16 0x55910c4f40db in zebra_finalize zebra/main.c:249 > #17 0x7f26f2777108 in event_call lib/event.c:2011 > #18 0x7f26f264180e in frr_run lib/libfrr.c:1212 > #19 0x55910c4f49cb in main zebra/main.c:531 > #20 0x7f26f2029d8f in __libc_start_call_main ../sysdeps/nptl/libc_start_call_main.h:58 > #21 0x7f26f2029e3f in __libc_start_main_impl ../csu/libc-start.c:392 > #22 0x55910c4b0114 in _start (/usr/lib/frr/zebra+0x1ae114) It happens with FRR using the kernel. During shutdown, the namespace identifier is attempted to be obtained by zebra, in an attempt to prepare zebra dataplane nexthop messages. Fix this by accessing the ns structure. Signed-off-by: Philippe Guibert <philippe.guibert@6wind.com>
2024-10-07 15:11:43 +02:00
/* During zebra shutdown, do kernel cleanup
* netlink sockets, ..
*/
int zebra_ns_kernel_shutdown(struct ns *ns, void *param_in __attribute__((unused)),
void **param_out __attribute__((unused)))
{
struct zebra_ns *zns = ns->info;
if (zns == NULL)
return NS_WALK_CONTINUE;
kernel_terminate(zns, true);
return NS_WALK_CONTINUE;
}
/* During zebra shutdown, do final cleanup
* after all dataplane work is complete.
*/
int zebra_ns_final_shutdown(struct ns *ns,
void *param_in __attribute__((unused)),
void **param_out __attribute__((unused)))
{
struct zebra_ns *zns = ns->info;
if (zns == NULL)
zebra: fix heap-use-after free on ns shutdown The following ASAN issue has been observed: > ERROR: AddressSanitizer: heap-use-after-free on address 0x6160000acba4 at pc 0x55910c5694d0 bp 0x7ffe3a8ac850 sp 0x7ffe3a8ac840 > READ of size 4 at 0x6160000acba4 thread T0 > #0 0x55910c5694cf in ctx_info_from_zns zebra/zebra_dplane.c:3315 > #1 0x55910c569696 in dplane_ctx_ns_init zebra/zebra_dplane.c:3331 > #2 0x55910c56bf61 in dplane_ctx_nexthop_init zebra/zebra_dplane.c:3680 > #3 0x55910c5711ca in dplane_nexthop_update_internal zebra/zebra_dplane.c:4490 > #4 0x55910c571c5c in dplane_nexthop_delete zebra/zebra_dplane.c:4717 > #5 0x55910c61e90e in zebra_nhg_uninstall_kernel zebra/zebra_nhg.c:3413 > #6 0x55910c615d8a in zebra_nhg_decrement_ref zebra/zebra_nhg.c:1919 > #7 0x55910c6404db in route_entry_update_nhe zebra/zebra_rib.c:454 > #8 0x55910c64c904 in rib_re_nhg_free zebra/zebra_rib.c:2822 > #9 0x55910c655be2 in rib_unlink zebra/zebra_rib.c:4212 > #10 0x55910c6430f9 in zebra_rtable_node_cleanup zebra/zebra_rib.c:968 > #11 0x7f26f275b8a9 in route_node_free lib/table.c:75 > #12 0x7f26f275bae4 in route_table_free lib/table.c:111 > #13 0x7f26f275b749 in route_table_finish lib/table.c:46 > #14 0x55910c65db17 in zebra_router_free_table zebra/zebra_router.c:191 > #15 0x55910c65dfb5 in zebra_router_terminate zebra/zebra_router.c:244 > #16 0x55910c4f40db in zebra_finalize zebra/main.c:249 > #17 0x7f26f2777108 in event_call lib/event.c:2011 > #18 0x7f26f264180e in frr_run lib/libfrr.c:1212 > #19 0x55910c4f49cb in main zebra/main.c:531 > #20 0x7f26f2029d8f in __libc_start_call_main ../sysdeps/nptl/libc_start_call_main.h:58 > #21 0x7f26f2029e3f in __libc_start_main_impl ../csu/libc-start.c:392 > #22 0x55910c4b0114 in _start (/usr/lib/frr/zebra+0x1ae114) It happens with FRR using the kernel. During shutdown, the namespace identifier is attempted to be obtained by zebra, in an attempt to prepare zebra dataplane nexthop messages. Fix this by accessing the ns structure. Signed-off-by: Philippe Guibert <philippe.guibert@6wind.com>
2024-10-07 15:11:43 +02:00
return NS_WALK_CONTINUE;
zebra_ns_delete(ns);
return NS_WALK_CONTINUE;
}
*: rework renaming the default VRF Currently, it is possible to rename the default VRF either by passing `-o` option to zebra or by creating a file in `/var/run/netns` and binding it to `/proc/self/ns/net`. In both cases, only zebra knows about the rename and other daemons learn about it only after they connect to zebra. This is a problem, because daemons may read their config before they connect to zebra. To handle this rename after the config is read, we have some special code in every single daemon, which is not very bad but not desirable in my opinion. But things are getting worse when we need to handle this in northbound layer as we have to manually rewrite the config nodes. This approach is already hacky, but still works as every daemon handles its own NB structures. But it is completely incompatible with the central management daemon architecture we are aiming for, as mgmtd doesn't even have a connection with zebra to learn from it. And it shouldn't have it, because operational state changes should never affect configuration. To solve the problem and simplify the code, I propose to expand the `-o` option to all daemons. By using the startup option, we let daemons know about the rename before they read their configs so we don't need any special code to deal with it. There's an easy way to pass the option to all daemons by using `frr_global_options` variable. Unfortunately, the second way of renaming by creating a file in `/var/run/netns` is incompatible with the new mgmtd architecture. Theoretically, we could force daemons to read their configs only after they connect to zebra, but it means adding even more code to handle a very specific use-case. And anyway this won't work for mgmtd as it doesn't have a connection with zebra. So I had to remove this option. Signed-off-by: Igor Ryzhov <iryzhov@nfware.com>
2021-12-03 23:22:55 +01:00
int zebra_ns_init(void)
{
zebra: default ns->info should be default zebra_ns We were not connecting the default zebra_ns to the default ns->info at namespace initialization in zebra. Thus, when we tried to use the `ns_walk_func()` it would ignore the default zebra_ns since there is no pointer to it from the ns struct. Fix this by connecting them in `zebra_ns_init()` and, if the default ns is not found, exit with failure since this is not recoverable. This was found during a crash where we fail to cancel the kernel_read thread at termination (via the `ns_walk_func()`) and then we get a netlink notification trying to use the zns struct that has already been freed. ``` (gdb) bt \#0 0x00007fc1134dc7bb in raise () from /lib/x86_64-linux-gnu/libc.so.6 \#1 0x00007fc1134c7535 in abort () from /lib/x86_64-linux-gnu/libc.so.6 \#2 0x00007fc113996f8f in core_handler (signo=11, siginfo=0x7ffe5429d070, context=<optimized out>) at lib/sigevent.c:254 \#3 <signal handler called> \#4 0x0000561880e15449 in if_lookup_by_index_per_ns (ns=0x0, ifindex=174) at zebra/interface.c:269 \#5 0x0000561880e1642c in if_up (ifp=ifp@entry=0x561883076c50) at zebra/interface.c:1043 \#6 0x0000561880e10723 in netlink_link_change (h=0x7ffe5429d8f0, ns_id=<optimized out>, startup=<optimized out>) at zebra/if_netlink.c:1384 \#7 0x0000561880e17e68 in netlink_parse_info (filter=filter@entry=0x561880e17680 <netlink_information_fetch>, nl=nl@entry=0x561882497238, zns=zns@entry=0x7ffe542a5940, count=count@entry=5, startup=startup@entry=0) at zebra/kernel_netlink.c:932 \#8 0x0000561880e186a5 in kernel_read (thread=<optimized out>) at zebra/kernel_netlink.c:406 \#9 0x00007fc1139a4416 in thread_call (thread=thread@entry=0x7ffe542a5b70) at lib/thread.c:1599 \#10 0x00007fc113974ef8 in frr_run (master=0x5618823c9510) at lib/libfrr.c:1024 \#11 0x0000561880e0b916 in main (argc=8, argv=0x7ffe542a5f78) at zebra/main.c:483 ``` Signed-off-by: Stephen Worley <sworley@cumulusnetworks.com>
2019-11-14 19:43:11 +01:00
struct ns *default_ns;
ns_id_t ns_id;
ns_id_t ns_id_external;
struct ns *ns;
frr_with_privs(&zserv_privs) {
ns_id = zebra_ns_id_get_default();
}
ns_id_external = ns_map_nsid_with_external(ns_id, true);
ns_init_management(ns_id_external, ns_id);
ns = ns_get_default();
if (ns)
ns->relative_default_ns = ns_id;
default_ns = ns_lookup(NS_DEFAULT);
zebra: default ns->info should be default zebra_ns We were not connecting the default zebra_ns to the default ns->info at namespace initialization in zebra. Thus, when we tried to use the `ns_walk_func()` it would ignore the default zebra_ns since there is no pointer to it from the ns struct. Fix this by connecting them in `zebra_ns_init()` and, if the default ns is not found, exit with failure since this is not recoverable. This was found during a crash where we fail to cancel the kernel_read thread at termination (via the `ns_walk_func()`) and then we get a netlink notification trying to use the zns struct that has already been freed. ``` (gdb) bt \#0 0x00007fc1134dc7bb in raise () from /lib/x86_64-linux-gnu/libc.so.6 \#1 0x00007fc1134c7535 in abort () from /lib/x86_64-linux-gnu/libc.so.6 \#2 0x00007fc113996f8f in core_handler (signo=11, siginfo=0x7ffe5429d070, context=<optimized out>) at lib/sigevent.c:254 \#3 <signal handler called> \#4 0x0000561880e15449 in if_lookup_by_index_per_ns (ns=0x0, ifindex=174) at zebra/interface.c:269 \#5 0x0000561880e1642c in if_up (ifp=ifp@entry=0x561883076c50) at zebra/interface.c:1043 \#6 0x0000561880e10723 in netlink_link_change (h=0x7ffe5429d8f0, ns_id=<optimized out>, startup=<optimized out>) at zebra/if_netlink.c:1384 \#7 0x0000561880e17e68 in netlink_parse_info (filter=filter@entry=0x561880e17680 <netlink_information_fetch>, nl=nl@entry=0x561882497238, zns=zns@entry=0x7ffe542a5940, count=count@entry=5, startup=startup@entry=0) at zebra/kernel_netlink.c:932 \#8 0x0000561880e186a5 in kernel_read (thread=<optimized out>) at zebra/kernel_netlink.c:406 \#9 0x00007fc1139a4416 in thread_call (thread=thread@entry=0x7ffe542a5b70) at lib/thread.c:1599 \#10 0x00007fc113974ef8 in frr_run (master=0x5618823c9510) at lib/libfrr.c:1024 \#11 0x0000561880e0b916 in main (argc=8, argv=0x7ffe542a5f78) at zebra/main.c:483 ``` Signed-off-by: Stephen Worley <sworley@cumulusnetworks.com>
2019-11-14 19:43:11 +01:00
if (!default_ns) {
flog_err(EC_ZEBRA_NS_NO_DEFAULT,
"%s: failed to find default ns", __func__);
exit(EXIT_FAILURE); /* This is non-recoverable */
}
*: Handle VRF configuration when VRF gets inactivated and activated A VRF is active only when the corresponding VRF device is present in the kernel. However, when the kernel VRF device is removed, the VRF container in FRR should go away only if there is no user configuration for it. Otherwise, when the VRF device is created again so that the VRF becomes active, FRR cannot take the correct actions. Example configuration for the VRF includes static routes and EVPN L3 VNI. Note that a VRF is currently considered to be "configured" as soon as the operator has issued the "vrf <name>" command in FRR. Such a configured VRF is not deleted upon VRF device removal, it is only made inactive. A VRF that is "configured" can be deleted only upon operator action and only if the VRF has been deactivated i.e., the VRF device removed from the kernel. This is an existing restriction. To implement this change, the VRF disable and delete actions have been modified. Signed-off-by: Vivek Venkatraman <vivek@cumulusnetworks.com> Reviewed-by: Donald Sharp <sharpd@cumulusnetworks.com> Reviewed-by: Mitesh Kanjariya <mkanjariya@cumulusnetworks.com> Reviewed-by: Don Slice <dslice@cumulusnetworks.com> Ticket: CM-18553, CM-18918, CM-10139 Reviewed By: CCR-7022 Testing Done: 1. vrf and pim-vrf automation tests 2. Multiple VRF delete and readd (ifdown, ifup-with-depends) 3. FRR stop, start, restart 4. Networking restart 5. Configuration delete and readd Some of the above tests run in different sequences (manually).
2017-12-02 02:36:37 +01:00
/* Do any needed per-NS data structure allocation. */
zebra: default ns->info should be default zebra_ns We were not connecting the default zebra_ns to the default ns->info at namespace initialization in zebra. Thus, when we tried to use the `ns_walk_func()` it would ignore the default zebra_ns since there is no pointer to it from the ns struct. Fix this by connecting them in `zebra_ns_init()` and, if the default ns is not found, exit with failure since this is not recoverable. This was found during a crash where we fail to cancel the kernel_read thread at termination (via the `ns_walk_func()`) and then we get a netlink notification trying to use the zns struct that has already been freed. ``` (gdb) bt \#0 0x00007fc1134dc7bb in raise () from /lib/x86_64-linux-gnu/libc.so.6 \#1 0x00007fc1134c7535 in abort () from /lib/x86_64-linux-gnu/libc.so.6 \#2 0x00007fc113996f8f in core_handler (signo=11, siginfo=0x7ffe5429d070, context=<optimized out>) at lib/sigevent.c:254 \#3 <signal handler called> \#4 0x0000561880e15449 in if_lookup_by_index_per_ns (ns=0x0, ifindex=174) at zebra/interface.c:269 \#5 0x0000561880e1642c in if_up (ifp=ifp@entry=0x561883076c50) at zebra/interface.c:1043 \#6 0x0000561880e10723 in netlink_link_change (h=0x7ffe5429d8f0, ns_id=<optimized out>, startup=<optimized out>) at zebra/if_netlink.c:1384 \#7 0x0000561880e17e68 in netlink_parse_info (filter=filter@entry=0x561880e17680 <netlink_information_fetch>, nl=nl@entry=0x561882497238, zns=zns@entry=0x7ffe542a5940, count=count@entry=5, startup=startup@entry=0) at zebra/kernel_netlink.c:932 \#8 0x0000561880e186a5 in kernel_read (thread=<optimized out>) at zebra/kernel_netlink.c:406 \#9 0x00007fc1139a4416 in thread_call (thread=thread@entry=0x7ffe542a5b70) at lib/thread.c:1599 \#10 0x00007fc113974ef8 in frr_run (master=0x5618823c9510) at lib/libfrr.c:1024 \#11 0x0000561880e0b916 in main (argc=8, argv=0x7ffe542a5f78) at zebra/main.c:483 ``` Signed-off-by: Stephen Worley <sworley@cumulusnetworks.com>
2019-11-14 19:43:11 +01:00
zebra_ns_new(default_ns);
dzns = default_ns->info;
*: Handle VRF configuration when VRF gets inactivated and activated A VRF is active only when the corresponding VRF device is present in the kernel. However, when the kernel VRF device is removed, the VRF container in FRR should go away only if there is no user configuration for it. Otherwise, when the VRF device is created again so that the VRF becomes active, FRR cannot take the correct actions. Example configuration for the VRF includes static routes and EVPN L3 VNI. Note that a VRF is currently considered to be "configured" as soon as the operator has issued the "vrf <name>" command in FRR. Such a configured VRF is not deleted upon VRF device removal, it is only made inactive. A VRF that is "configured" can be deleted only upon operator action and only if the VRF has been deactivated i.e., the VRF device removed from the kernel. This is an existing restriction. To implement this change, the VRF disable and delete actions have been modified. Signed-off-by: Vivek Venkatraman <vivek@cumulusnetworks.com> Reviewed-by: Donald Sharp <sharpd@cumulusnetworks.com> Reviewed-by: Mitesh Kanjariya <mkanjariya@cumulusnetworks.com> Reviewed-by: Don Slice <dslice@cumulusnetworks.com> Ticket: CM-18553, CM-18918, CM-10139 Reviewed By: CCR-7022 Testing Done: 1. vrf and pim-vrf automation tests 2. Multiple VRF delete and readd (ifdown, ifup-with-depends) 3. FRR stop, start, restart 4. Networking restart 5. Configuration delete and readd Some of the above tests run in different sequences (manually).
2017-12-02 02:36:37 +01:00
/* Register zebra VRF callbacks, create and activate default VRF. */
zebra_vrf_init();
*: Handle VRF configuration when VRF gets inactivated and activated A VRF is active only when the corresponding VRF device is present in the kernel. However, when the kernel VRF device is removed, the VRF container in FRR should go away only if there is no user configuration for it. Otherwise, when the VRF device is created again so that the VRF becomes active, FRR cannot take the correct actions. Example configuration for the VRF includes static routes and EVPN L3 VNI. Note that a VRF is currently considered to be "configured" as soon as the operator has issued the "vrf <name>" command in FRR. Such a configured VRF is not deleted upon VRF device removal, it is only made inactive. A VRF that is "configured" can be deleted only upon operator action and only if the VRF has been deactivated i.e., the VRF device removed from the kernel. This is an existing restriction. To implement this change, the VRF disable and delete actions have been modified. Signed-off-by: Vivek Venkatraman <vivek@cumulusnetworks.com> Reviewed-by: Donald Sharp <sharpd@cumulusnetworks.com> Reviewed-by: Mitesh Kanjariya <mkanjariya@cumulusnetworks.com> Reviewed-by: Don Slice <dslice@cumulusnetworks.com> Ticket: CM-18553, CM-18918, CM-10139 Reviewed By: CCR-7022 Testing Done: 1. vrf and pim-vrf automation tests 2. Multiple VRF delete and readd (ifdown, ifup-with-depends) 3. FRR stop, start, restart 4. Networking restart 5. Configuration delete and readd Some of the above tests run in different sequences (manually).
2017-12-02 02:36:37 +01:00
/* Default NS is activated */
zebra_ns_enable(ns_id_external, (void **)&dzns);
if (vrf_is_backend_netns()) {
ns_add_hook(NS_NEW_HOOK, zebra_ns_new);
ns_add_hook(NS_ENABLE_HOOK, zebra_ns_enabled);
ns_add_hook(NS_DISABLE_HOOK, zebra_ns_disabled);
ns_add_hook(NS_DELETE_HOOK, zebra_ns_delete);
zebra_ns_notify_parse();
zebra_ns_notify_init();
}
return 0;
}