frr/zebra/zebra_ns.c

Ignoring revisions in .git-blame-ignore-revs. Click here to bypass and see the normal blame view.

255 lines
5.6 KiB
C
Raw Normal View History

// SPDX-License-Identifier: GPL-2.0-or-later
/* zebra NS Routines
* Copyright (C) 2016 Cumulus Networks, Inc.
* Donald Sharp
* Copyright (C) 2017/2018 6WIND
*/
#include "zebra.h"
#include "lib/ns.h"
#include "lib/vrf.h"
#include "lib/prefix.h"
#include "lib/memory.h"
#include "zebra_ns.h"
#include "zebra_vrf.h"
#include "rt.h"
#include "zebra_vxlan.h"
#include "debug.h"
#include "zebra_netns_notify.h"
#include "zebra_netns_id.h"
#include "zebra_pbr.h"
#include "zebra_tc.h"
#include "rib.h"
#include "table_manager.h"
zebra: default ns->info should be default zebra_ns We were not connecting the default zebra_ns to the default ns->info at namespace initialization in zebra. Thus, when we tried to use the `ns_walk_func()` it would ignore the default zebra_ns since there is no pointer to it from the ns struct. Fix this by connecting them in `zebra_ns_init()` and, if the default ns is not found, exit with failure since this is not recoverable. This was found during a crash where we fail to cancel the kernel_read thread at termination (via the `ns_walk_func()`) and then we get a netlink notification trying to use the zns struct that has already been freed. ``` (gdb) bt \#0 0x00007fc1134dc7bb in raise () from /lib/x86_64-linux-gnu/libc.so.6 \#1 0x00007fc1134c7535 in abort () from /lib/x86_64-linux-gnu/libc.so.6 \#2 0x00007fc113996f8f in core_handler (signo=11, siginfo=0x7ffe5429d070, context=<optimized out>) at lib/sigevent.c:254 \#3 <signal handler called> \#4 0x0000561880e15449 in if_lookup_by_index_per_ns (ns=0x0, ifindex=174) at zebra/interface.c:269 \#5 0x0000561880e1642c in if_up (ifp=ifp@entry=0x561883076c50) at zebra/interface.c:1043 \#6 0x0000561880e10723 in netlink_link_change (h=0x7ffe5429d8f0, ns_id=<optimized out>, startup=<optimized out>) at zebra/if_netlink.c:1384 \#7 0x0000561880e17e68 in netlink_parse_info (filter=filter@entry=0x561880e17680 <netlink_information_fetch>, nl=nl@entry=0x561882497238, zns=zns@entry=0x7ffe542a5940, count=count@entry=5, startup=startup@entry=0) at zebra/kernel_netlink.c:932 \#8 0x0000561880e186a5 in kernel_read (thread=<optimized out>) at zebra/kernel_netlink.c:406 \#9 0x00007fc1139a4416 in thread_call (thread=thread@entry=0x7ffe542a5b70) at lib/thread.c:1599 \#10 0x00007fc113974ef8 in frr_run (master=0x5618823c9510) at lib/libfrr.c:1024 \#11 0x0000561880e0b916 in main (argc=8, argv=0x7ffe542a5f78) at zebra/main.c:483 ``` Signed-off-by: Stephen Worley <sworley@cumulusnetworks.com>
2019-11-14 19:43:11 +01:00
#include "zebra_errors.h"
#include "zebra_dplane.h"
extern struct zebra_privs_t zserv_privs;
DEFINE_MTYPE_STATIC(ZEBRA, ZEBRA_NS, "Zebra Name Space");
static struct zebra_ns *dzns;
static int zebra_ns_disable_internal(struct zebra_ns *zns, bool complete);
struct zebra_ns *zebra_ns_lookup(ns_id_t ns_id)
{
if (ns_id == NS_DEFAULT)
return dzns;
struct zebra_ns *info = (struct zebra_ns *)ns_info_lookup(ns_id);
return (info == NULL) ? dzns : info;
}
static int zebra_ns_new(struct ns *ns)
{
struct zebra_ns *zns;
zebra: default ns->info should be default zebra_ns We were not connecting the default zebra_ns to the default ns->info at namespace initialization in zebra. Thus, when we tried to use the `ns_walk_func()` it would ignore the default zebra_ns since there is no pointer to it from the ns struct. Fix this by connecting them in `zebra_ns_init()` and, if the default ns is not found, exit with failure since this is not recoverable. This was found during a crash where we fail to cancel the kernel_read thread at termination (via the `ns_walk_func()`) and then we get a netlink notification trying to use the zns struct that has already been freed. ``` (gdb) bt \#0 0x00007fc1134dc7bb in raise () from /lib/x86_64-linux-gnu/libc.so.6 \#1 0x00007fc1134c7535 in abort () from /lib/x86_64-linux-gnu/libc.so.6 \#2 0x00007fc113996f8f in core_handler (signo=11, siginfo=0x7ffe5429d070, context=<optimized out>) at lib/sigevent.c:254 \#3 <signal handler called> \#4 0x0000561880e15449 in if_lookup_by_index_per_ns (ns=0x0, ifindex=174) at zebra/interface.c:269 \#5 0x0000561880e1642c in if_up (ifp=ifp@entry=0x561883076c50) at zebra/interface.c:1043 \#6 0x0000561880e10723 in netlink_link_change (h=0x7ffe5429d8f0, ns_id=<optimized out>, startup=<optimized out>) at zebra/if_netlink.c:1384 \#7 0x0000561880e17e68 in netlink_parse_info (filter=filter@entry=0x561880e17680 <netlink_information_fetch>, nl=nl@entry=0x561882497238, zns=zns@entry=0x7ffe542a5940, count=count@entry=5, startup=startup@entry=0) at zebra/kernel_netlink.c:932 \#8 0x0000561880e186a5 in kernel_read (thread=<optimized out>) at zebra/kernel_netlink.c:406 \#9 0x00007fc1139a4416 in thread_call (thread=thread@entry=0x7ffe542a5b70) at lib/thread.c:1599 \#10 0x00007fc113974ef8 in frr_run (master=0x5618823c9510) at lib/libfrr.c:1024 \#11 0x0000561880e0b916 in main (argc=8, argv=0x7ffe542a5f78) at zebra/main.c:483 ``` Signed-off-by: Stephen Worley <sworley@cumulusnetworks.com>
2019-11-14 19:43:11 +01:00
if (!ns)
return -1;
if (IS_ZEBRA_DEBUG_EVENT)
zlog_info("ZNS %s with id %u (created)", ns->name, ns->ns_id);
zns = XCALLOC(MTYPE_ZEBRA_NS, sizeof(struct zebra_ns));
ns->info = zns;
zns->ns = ns;
zns->ns_id = ns->ns_id;
/* Do any needed per-NS data structure allocation. */
zns->if_table = route_table_init();
return 0;
}
static int zebra_ns_delete(struct ns *ns)
{
struct zebra_ns *zns = (struct zebra_ns *)ns->info;
if (IS_ZEBRA_DEBUG_EVENT)
zlog_info("ZNS %s with id %u (deleted)", ns->name, ns->ns_id);
if (!zns)
return 0;
XFREE(MTYPE_ZEBRA_NS, ns->info);
return 0;
}
static int zebra_ns_enabled(struct ns *ns)
{
struct zebra_ns *zns = ns->info;
if (IS_ZEBRA_DEBUG_EVENT)
zlog_info("ZNS %s with id %u (enabled)", ns->name, ns->ns_id);
if (!zns)
return 0;
return zebra_ns_enable(ns->ns_id, (void **)&zns);
}
int zebra_ns_disabled(struct ns *ns)
{
struct zebra_ns *zns = ns->info;
if (IS_ZEBRA_DEBUG_EVENT)
zlog_info("ZNS %s with id %u (disabled)", ns->name, ns->ns_id);
if (!zns)
return 0;
return zebra_ns_disable_internal(zns, true);
}
void zebra_ns_startup_continue(struct zebra_dplane_ctx *ctx)
{
struct zebra_ns *zns = zebra_ns_lookup(dplane_ctx_get_ns_id(ctx));
enum zebra_dplane_startup_notifications spot;
if (!zns) {
zlog_err("%s: No Namespace associated with %u", __func__,
dplane_ctx_get_ns_id(ctx));
return;
}
spot = dplane_ctx_get_startup_spot(ctx);
switch (spot) {
case ZEBRA_DPLANE_INTERFACES_READ:
interface_list_tunneldump(zns);
break;
case ZEBRA_DPLANE_TUNNELS_READ:
interface_list_second(zns);
break;
case ZEBRA_DPLANE_ADDRESSES_READ:
route_read(zns);
vlan_read(zns);
kernel_read_pbr_rules(zns);
kernel_read_tc_qdisc(zns);
break;
}
}
*: Handle VRF configuration when VRF gets inactivated and activated A VRF is active only when the corresponding VRF device is present in the kernel. However, when the kernel VRF device is removed, the VRF container in FRR should go away only if there is no user configuration for it. Otherwise, when the VRF device is created again so that the VRF becomes active, FRR cannot take the correct actions. Example configuration for the VRF includes static routes and EVPN L3 VNI. Note that a VRF is currently considered to be "configured" as soon as the operator has issued the "vrf <name>" command in FRR. Such a configured VRF is not deleted upon VRF device removal, it is only made inactive. A VRF that is "configured" can be deleted only upon operator action and only if the VRF has been deactivated i.e., the VRF device removed from the kernel. This is an existing restriction. To implement this change, the VRF disable and delete actions have been modified. Signed-off-by: Vivek Venkatraman <vivek@cumulusnetworks.com> Reviewed-by: Donald Sharp <sharpd@cumulusnetworks.com> Reviewed-by: Mitesh Kanjariya <mkanjariya@cumulusnetworks.com> Reviewed-by: Don Slice <dslice@cumulusnetworks.com> Ticket: CM-18553, CM-18918, CM-10139 Reviewed By: CCR-7022 Testing Done: 1. vrf and pim-vrf automation tests 2. Multiple VRF delete and readd (ifdown, ifup-with-depends) 3. FRR stop, start, restart 4. Networking restart 5. Configuration delete and readd Some of the above tests run in different sequences (manually).
2017-12-02 02:36:37 +01:00
/* Do global enable actions - open sockets, read kernel config etc. */
int zebra_ns_enable(ns_id_t ns_id, void **info)
{
struct zebra_ns *zns = (struct zebra_ns *)(*info);
zns->ns_id = ns_id;
kernel_init(zns);
zebra_dplane_ns_enable(zns, true);
interface_list(zns);
return 0;
}
/* Common handler for ns disable - this can be called during ns config,
* or during zebra shutdown.
*/
static int zebra_ns_disable_internal(struct zebra_ns *zns, bool complete)
{
zebra: Cleanup use after free in shutdown On shutdown a use after free was being seen of a route table. Basically the pointer was kept around and resent for cleanup. Probably something needs to be unwound to make this better in the future. Just cleaning up the use after free. ./bfd_vrf_topo1.test_bfd_vrf_topo1/r2.zebra.asan.911929-================================================================= ./bfd_vrf_topo1.test_bfd_vrf_topo1/r2.zebra.asan.911929:==911929==ERROR: AddressSanitizer: heap-use-after-free on address 0x606000127a00 at pc 0x7fb9ad546f5b bp 0x7ffc3cff0330 sp 0x7ffc3 cff0328 ./bfd_vrf_topo1.test_bfd_vrf_topo1/r2.zebra.asan.911929-READ of size 8 at 0x606000127a00 thread T0 ./bfd_vrf_topo1.test_bfd_vrf_topo1/r2.zebra.asan.911929- #0 0x7fb9ad546f5a in route_table_free /home/sharpd/frr8/lib/table.c:103:13 ./bfd_vrf_topo1.test_bfd_vrf_topo1/r2.zebra.asan.911929- #1 0x7fb9ad546f04 in route_table_finish /home/sharpd/frr8/lib/table.c:61:2 ./bfd_vrf_topo1.test_bfd_vrf_topo1/r2.zebra.asan.911929- #2 0x6b94ba in zebra_ns_disable_internal /home/sharpd/frr8/zebra/zebra_ns.c:141:2 ./bfd_vrf_topo1.test_bfd_vrf_topo1/r2.zebra.asan.911929- #3 0x6b9158 in zebra_ns_disabled /home/sharpd/frr8/zebra/zebra_ns.c:116:9 ./bfd_vrf_topo1.test_bfd_vrf_topo1/r2.zebra.asan.911929- #4 0x7fb9ad43f0f5 in ns_disable_internal /home/sharpd/frr8/lib/netns_linux.c:273:4 ./bfd_vrf_topo1.test_bfd_vrf_topo1/r2.zebra.asan.911929- #5 0x7fb9ad43e634 in ns_disable /home/sharpd/frr8/lib/netns_linux.c:368:2 ./bfd_vrf_topo1.test_bfd_vrf_topo1/r2.zebra.asan.911929- #6 0x7fb9ad43e251 in ns_delete /home/sharpd/frr8/lib/netns_linux.c:330:2 ./bfd_vrf_topo1.test_bfd_vrf_topo1/r2.zebra.asan.911929- #7 0x7fb9ad43fbb3 in ns_terminate /home/sharpd/frr8/lib/netns_linux.c:524:3 ./bfd_vrf_topo1.test_bfd_vrf_topo1/r2.zebra.asan.911929- #8 0x54f8de in zebra_finalize /home/sharpd/frr8/zebra/main.c:232:2 ./bfd_vrf_topo1.test_bfd_vrf_topo1/r2.zebra.asan.911929- #9 0x7fb9ad5655e6 in thread_call /home/sharpd/frr8/lib/thread.c:2006:2 ./bfd_vrf_topo1.test_bfd_vrf_topo1/r2.zebra.asan.911929- #10 0x7fb9ad3d3343 in frr_run /home/sharpd/frr8/lib/libfrr.c:1198:3 ./bfd_vrf_topo1.test_bfd_vrf_topo1/r2.zebra.asan.911929- #11 0x550b48 in main /home/sharpd/frr8/zebra/main.c:476:2 ./bfd_vrf_topo1.test_bfd_vrf_topo1/r2.zebra.asan.911929- #12 0x7fb9acd30d09 in __libc_start_main csu/../csu/libc-start.c:308:16 ./bfd_vrf_topo1.test_bfd_vrf_topo1/r2.zebra.asan.911929- #13 0x443549 in _start (/usr/lib/frr/zebra+0x443549) ./bfd_vrf_topo1.test_bfd_vrf_topo1/r2.zebra.asan.911929- ./bfd_vrf_topo1.test_bfd_vrf_topo1/r2.zebra.asan.911929-0x606000127a00 is located 0 bytes inside of 56-byte region [0x606000127a00,0x606000127a38) ./bfd_vrf_topo1.test_bfd_vrf_topo1/r2.zebra.asan.911929-freed by thread T0 here: ./bfd_vrf_topo1.test_bfd_vrf_topo1/r2.zebra.asan.911929- #0 0x4bd33d in free (/usr/lib/frr/zebra+0x4bd33d) ./bfd_vrf_topo1.test_bfd_vrf_topo1/r2.zebra.asan.911929- #1 0x7fb9ad42cc80 in qfree /home/sharpd/frr8/lib/memory.c:141:2 ./bfd_vrf_topo1.test_bfd_vrf_topo1/r2.zebra.asan.911929- #2 0x7fb9ad547305 in route_table_free /home/sharpd/frr8/lib/table.c:141:2 ./bfd_vrf_topo1.test_bfd_vrf_topo1/r2.zebra.asan.911929- #3 0x7fb9ad546f04 in route_table_finish /home/sharpd/frr8/lib/table.c:61:2 ./bfd_vrf_topo1.test_bfd_vrf_topo1/r2.zebra.asan.911929- #4 0x6b94ba in zebra_ns_disable_internal /home/sharpd/frr8/zebra/zebra_ns.c:141:2 ./bfd_vrf_topo1.test_bfd_vrf_topo1/r2.zebra.asan.911929- #5 0x6b9692 in zebra_ns_early_shutdown /home/sharpd/frr8/zebra/zebra_ns.c:164:2 ./bfd_vrf_topo1.test_bfd_vrf_topo1/r2.zebra.asan.911929- #6 0x7fb9ad43f228 in ns_walk_func /home/sharpd/frr8/lib/netns_linux.c:386:9 ./bfd_vrf_topo1.test_bfd_vrf_topo1/r2.zebra.asan.911929- #7 0x55014f in sigint /home/sharpd/frr8/zebra/main.c:194:2 ./bfd_vrf_topo1.test_bfd_vrf_topo1/r2.zebra.asan.911929- #8 0x7fb9ad50db99 in frr_sigevent_process /home/sharpd/frr8/lib/sigevent.c:130:6 ./bfd_vrf_topo1.test_bfd_vrf_topo1/r2.zebra.asan.911929- #9 0x7fb9ad560d07 in thread_fetch /home/sharpd/frr8/lib/thread.c:1775:4 ./bfd_vrf_topo1.test_bfd_vrf_topo1/r2.zebra.asan.911929- #10 0x7fb9ad3d332d in frr_run /home/sharpd/frr8/lib/libfrr.c:1197:9 ./bfd_vrf_topo1.test_bfd_vrf_topo1/r2.zebra.asan.911929- #11 0x550b48 in main /home/sharpd/frr8/zebra/main.c:476:2 -- ./bfd_vrf_topo1.test_bfd_vrf_topo1/r2.zebra.asan.911929- #7 0x7fb9acd30d09 in __libc_start_main csu/../csu/libc-start.c:308:16 ./bfd_vrf_topo1.test_bfd_vrf_topo1/r2.zebra.asan.911929- Signed-off-by: Donald Sharp <sharpd@nvidia.com>
2022-12-03 13:14:54 +01:00
if (zns->if_table)
route_table_finish(zns->if_table);
zns->if_table = NULL;
zebra_dplane_ns_enable(zns, false /*Disable*/);
kernel_terminate(zns, complete);
zns->ns_id = NS_DEFAULT;
return 0;
}
/* During zebra shutdown, do partial cleanup while the async dataplane
* is still running.
*/
int zebra_ns_early_shutdown(struct ns *ns,
void *param_in __attribute__((unused)),
void **param_out __attribute__((unused)))
{
struct zebra_ns *zns = ns->info;
if (zns == NULL)
return 0;
zebra_ns_disable_internal(zns, false);
return NS_WALK_CONTINUE;
}
zebra: fix heap-use-after free on ns shutdown The following ASAN issue has been observed: > ERROR: AddressSanitizer: heap-use-after-free on address 0x6160000acba4 at pc 0x55910c5694d0 bp 0x7ffe3a8ac850 sp 0x7ffe3a8ac840 > READ of size 4 at 0x6160000acba4 thread T0 > #0 0x55910c5694cf in ctx_info_from_zns zebra/zebra_dplane.c:3315 > #1 0x55910c569696 in dplane_ctx_ns_init zebra/zebra_dplane.c:3331 > #2 0x55910c56bf61 in dplane_ctx_nexthop_init zebra/zebra_dplane.c:3680 > #3 0x55910c5711ca in dplane_nexthop_update_internal zebra/zebra_dplane.c:4490 > #4 0x55910c571c5c in dplane_nexthop_delete zebra/zebra_dplane.c:4717 > #5 0x55910c61e90e in zebra_nhg_uninstall_kernel zebra/zebra_nhg.c:3413 > #6 0x55910c615d8a in zebra_nhg_decrement_ref zebra/zebra_nhg.c:1919 > #7 0x55910c6404db in route_entry_update_nhe zebra/zebra_rib.c:454 > #8 0x55910c64c904 in rib_re_nhg_free zebra/zebra_rib.c:2822 > #9 0x55910c655be2 in rib_unlink zebra/zebra_rib.c:4212 > #10 0x55910c6430f9 in zebra_rtable_node_cleanup zebra/zebra_rib.c:968 > #11 0x7f26f275b8a9 in route_node_free lib/table.c:75 > #12 0x7f26f275bae4 in route_table_free lib/table.c:111 > #13 0x7f26f275b749 in route_table_finish lib/table.c:46 > #14 0x55910c65db17 in zebra_router_free_table zebra/zebra_router.c:191 > #15 0x55910c65dfb5 in zebra_router_terminate zebra/zebra_router.c:244 > #16 0x55910c4f40db in zebra_finalize zebra/main.c:249 > #17 0x7f26f2777108 in event_call lib/event.c:2011 > #18 0x7f26f264180e in frr_run lib/libfrr.c:1212 > #19 0x55910c4f49cb in main zebra/main.c:531 > #20 0x7f26f2029d8f in __libc_start_call_main ../sysdeps/nptl/libc_start_call_main.h:58 > #21 0x7f26f2029e3f in __libc_start_main_impl ../csu/libc-start.c:392 > #22 0x55910c4b0114 in _start (/usr/lib/frr/zebra+0x1ae114) It happens with FRR using the kernel. During shutdown, the namespace identifier is attempted to be obtained by zebra, in an attempt to prepare zebra dataplane nexthop messages. Fix this by accessing the ns structure. Signed-off-by: Philippe Guibert <philippe.guibert@6wind.com>
2024-10-07 15:11:43 +02:00
/* During zebra shutdown, do kernel cleanup
* netlink sockets, ..
*/
int zebra_ns_kernel_shutdown(struct ns *ns, void *param_in __attribute__((unused)),
void **param_out __attribute__((unused)))
{
struct zebra_ns *zns = ns->info;
if (zns == NULL)
return NS_WALK_CONTINUE;
kernel_terminate(zns, true);
return NS_WALK_CONTINUE;
}
/* During zebra shutdown, do final cleanup
* after all dataplane work is complete.
*/
int zebra_ns_final_shutdown(struct ns *ns,
void *param_in __attribute__((unused)),
void **param_out __attribute__((unused)))
{
struct zebra_ns *zns = ns->info;
if (zns == NULL)
zebra: fix heap-use-after free on ns shutdown The following ASAN issue has been observed: > ERROR: AddressSanitizer: heap-use-after-free on address 0x6160000acba4 at pc 0x55910c5694d0 bp 0x7ffe3a8ac850 sp 0x7ffe3a8ac840 > READ of size 4 at 0x6160000acba4 thread T0 > #0 0x55910c5694cf in ctx_info_from_zns zebra/zebra_dplane.c:3315 > #1 0x55910c569696 in dplane_ctx_ns_init zebra/zebra_dplane.c:3331 > #2 0x55910c56bf61 in dplane_ctx_nexthop_init zebra/zebra_dplane.c:3680 > #3 0x55910c5711ca in dplane_nexthop_update_internal zebra/zebra_dplane.c:4490 > #4 0x55910c571c5c in dplane_nexthop_delete zebra/zebra_dplane.c:4717 > #5 0x55910c61e90e in zebra_nhg_uninstall_kernel zebra/zebra_nhg.c:3413 > #6 0x55910c615d8a in zebra_nhg_decrement_ref zebra/zebra_nhg.c:1919 > #7 0x55910c6404db in route_entry_update_nhe zebra/zebra_rib.c:454 > #8 0x55910c64c904 in rib_re_nhg_free zebra/zebra_rib.c:2822 > #9 0x55910c655be2 in rib_unlink zebra/zebra_rib.c:4212 > #10 0x55910c6430f9 in zebra_rtable_node_cleanup zebra/zebra_rib.c:968 > #11 0x7f26f275b8a9 in route_node_free lib/table.c:75 > #12 0x7f26f275bae4 in route_table_free lib/table.c:111 > #13 0x7f26f275b749 in route_table_finish lib/table.c:46 > #14 0x55910c65db17 in zebra_router_free_table zebra/zebra_router.c:191 > #15 0x55910c65dfb5 in zebra_router_terminate zebra/zebra_router.c:244 > #16 0x55910c4f40db in zebra_finalize zebra/main.c:249 > #17 0x7f26f2777108 in event_call lib/event.c:2011 > #18 0x7f26f264180e in frr_run lib/libfrr.c:1212 > #19 0x55910c4f49cb in main zebra/main.c:531 > #20 0x7f26f2029d8f in __libc_start_call_main ../sysdeps/nptl/libc_start_call_main.h:58 > #21 0x7f26f2029e3f in __libc_start_main_impl ../csu/libc-start.c:392 > #22 0x55910c4b0114 in _start (/usr/lib/frr/zebra+0x1ae114) It happens with FRR using the kernel. During shutdown, the namespace identifier is attempted to be obtained by zebra, in an attempt to prepare zebra dataplane nexthop messages. Fix this by accessing the ns structure. Signed-off-by: Philippe Guibert <philippe.guibert@6wind.com>
2024-10-07 15:11:43 +02:00
return NS_WALK_CONTINUE;
zebra_ns_delete(ns);
return NS_WALK_CONTINUE;
}
*: rework renaming the default VRF Currently, it is possible to rename the default VRF either by passing `-o` option to zebra or by creating a file in `/var/run/netns` and binding it to `/proc/self/ns/net`. In both cases, only zebra knows about the rename and other daemons learn about it only after they connect to zebra. This is a problem, because daemons may read their config before they connect to zebra. To handle this rename after the config is read, we have some special code in every single daemon, which is not very bad but not desirable in my opinion. But things are getting worse when we need to handle this in northbound layer as we have to manually rewrite the config nodes. This approach is already hacky, but still works as every daemon handles its own NB structures. But it is completely incompatible with the central management daemon architecture we are aiming for, as mgmtd doesn't even have a connection with zebra to learn from it. And it shouldn't have it, because operational state changes should never affect configuration. To solve the problem and simplify the code, I propose to expand the `-o` option to all daemons. By using the startup option, we let daemons know about the rename before they read their configs so we don't need any special code to deal with it. There's an easy way to pass the option to all daemons by using `frr_global_options` variable. Unfortunately, the second way of renaming by creating a file in `/var/run/netns` is incompatible with the new mgmtd architecture. Theoretically, we could force daemons to read their configs only after they connect to zebra, but it means adding even more code to handle a very specific use-case. And anyway this won't work for mgmtd as it doesn't have a connection with zebra. So I had to remove this option. Signed-off-by: Igor Ryzhov <iryzhov@nfware.com>
2021-12-03 23:22:55 +01:00
int zebra_ns_init(void)
{
zebra: default ns->info should be default zebra_ns We were not connecting the default zebra_ns to the default ns->info at namespace initialization in zebra. Thus, when we tried to use the `ns_walk_func()` it would ignore the default zebra_ns since there is no pointer to it from the ns struct. Fix this by connecting them in `zebra_ns_init()` and, if the default ns is not found, exit with failure since this is not recoverable. This was found during a crash where we fail to cancel the kernel_read thread at termination (via the `ns_walk_func()`) and then we get a netlink notification trying to use the zns struct that has already been freed. ``` (gdb) bt \#0 0x00007fc1134dc7bb in raise () from /lib/x86_64-linux-gnu/libc.so.6 \#1 0x00007fc1134c7535 in abort () from /lib/x86_64-linux-gnu/libc.so.6 \#2 0x00007fc113996f8f in core_handler (signo=11, siginfo=0x7ffe5429d070, context=<optimized out>) at lib/sigevent.c:254 \#3 <signal handler called> \#4 0x0000561880e15449 in if_lookup_by_index_per_ns (ns=0x0, ifindex=174) at zebra/interface.c:269 \#5 0x0000561880e1642c in if_up (ifp=ifp@entry=0x561883076c50) at zebra/interface.c:1043 \#6 0x0000561880e10723 in netlink_link_change (h=0x7ffe5429d8f0, ns_id=<optimized out>, startup=<optimized out>) at zebra/if_netlink.c:1384 \#7 0x0000561880e17e68 in netlink_parse_info (filter=filter@entry=0x561880e17680 <netlink_information_fetch>, nl=nl@entry=0x561882497238, zns=zns@entry=0x7ffe542a5940, count=count@entry=5, startup=startup@entry=0) at zebra/kernel_netlink.c:932 \#8 0x0000561880e186a5 in kernel_read (thread=<optimized out>) at zebra/kernel_netlink.c:406 \#9 0x00007fc1139a4416 in thread_call (thread=thread@entry=0x7ffe542a5b70) at lib/thread.c:1599 \#10 0x00007fc113974ef8 in frr_run (master=0x5618823c9510) at lib/libfrr.c:1024 \#11 0x0000561880e0b916 in main (argc=8, argv=0x7ffe542a5f78) at zebra/main.c:483 ``` Signed-off-by: Stephen Worley <sworley@cumulusnetworks.com>
2019-11-14 19:43:11 +01:00
struct ns *default_ns;
ns_id_t ns_id;
ns_id_t ns_id_external;
struct ns *ns;
frr_with_privs(&zserv_privs) {
ns_id = zebra_ns_id_get_default();
}
ns_id_external = ns_map_nsid_with_external(ns_id, true);
ns_init_management(ns_id_external, ns_id);
ns = ns_get_default();
if (ns)
ns->relative_default_ns = ns_id;
default_ns = ns_lookup(NS_DEFAULT);
zebra: default ns->info should be default zebra_ns We were not connecting the default zebra_ns to the default ns->info at namespace initialization in zebra. Thus, when we tried to use the `ns_walk_func()` it would ignore the default zebra_ns since there is no pointer to it from the ns struct. Fix this by connecting them in `zebra_ns_init()` and, if the default ns is not found, exit with failure since this is not recoverable. This was found during a crash where we fail to cancel the kernel_read thread at termination (via the `ns_walk_func()`) and then we get a netlink notification trying to use the zns struct that has already been freed. ``` (gdb) bt \#0 0x00007fc1134dc7bb in raise () from /lib/x86_64-linux-gnu/libc.so.6 \#1 0x00007fc1134c7535 in abort () from /lib/x86_64-linux-gnu/libc.so.6 \#2 0x00007fc113996f8f in core_handler (signo=11, siginfo=0x7ffe5429d070, context=<optimized out>) at lib/sigevent.c:254 \#3 <signal handler called> \#4 0x0000561880e15449 in if_lookup_by_index_per_ns (ns=0x0, ifindex=174) at zebra/interface.c:269 \#5 0x0000561880e1642c in if_up (ifp=ifp@entry=0x561883076c50) at zebra/interface.c:1043 \#6 0x0000561880e10723 in netlink_link_change (h=0x7ffe5429d8f0, ns_id=<optimized out>, startup=<optimized out>) at zebra/if_netlink.c:1384 \#7 0x0000561880e17e68 in netlink_parse_info (filter=filter@entry=0x561880e17680 <netlink_information_fetch>, nl=nl@entry=0x561882497238, zns=zns@entry=0x7ffe542a5940, count=count@entry=5, startup=startup@entry=0) at zebra/kernel_netlink.c:932 \#8 0x0000561880e186a5 in kernel_read (thread=<optimized out>) at zebra/kernel_netlink.c:406 \#9 0x00007fc1139a4416 in thread_call (thread=thread@entry=0x7ffe542a5b70) at lib/thread.c:1599 \#10 0x00007fc113974ef8 in frr_run (master=0x5618823c9510) at lib/libfrr.c:1024 \#11 0x0000561880e0b916 in main (argc=8, argv=0x7ffe542a5f78) at zebra/main.c:483 ``` Signed-off-by: Stephen Worley <sworley@cumulusnetworks.com>
2019-11-14 19:43:11 +01:00
if (!default_ns) {
flog_err(EC_ZEBRA_NS_NO_DEFAULT,
"%s: failed to find default ns", __func__);
exit(EXIT_FAILURE); /* This is non-recoverable */
}
*: Handle VRF configuration when VRF gets inactivated and activated A VRF is active only when the corresponding VRF device is present in the kernel. However, when the kernel VRF device is removed, the VRF container in FRR should go away only if there is no user configuration for it. Otherwise, when the VRF device is created again so that the VRF becomes active, FRR cannot take the correct actions. Example configuration for the VRF includes static routes and EVPN L3 VNI. Note that a VRF is currently considered to be "configured" as soon as the operator has issued the "vrf <name>" command in FRR. Such a configured VRF is not deleted upon VRF device removal, it is only made inactive. A VRF that is "configured" can be deleted only upon operator action and only if the VRF has been deactivated i.e., the VRF device removed from the kernel. This is an existing restriction. To implement this change, the VRF disable and delete actions have been modified. Signed-off-by: Vivek Venkatraman <vivek@cumulusnetworks.com> Reviewed-by: Donald Sharp <sharpd@cumulusnetworks.com> Reviewed-by: Mitesh Kanjariya <mkanjariya@cumulusnetworks.com> Reviewed-by: Don Slice <dslice@cumulusnetworks.com> Ticket: CM-18553, CM-18918, CM-10139 Reviewed By: CCR-7022 Testing Done: 1. vrf and pim-vrf automation tests 2. Multiple VRF delete and readd (ifdown, ifup-with-depends) 3. FRR stop, start, restart 4. Networking restart 5. Configuration delete and readd Some of the above tests run in different sequences (manually).
2017-12-02 02:36:37 +01:00
/* Do any needed per-NS data structure allocation. */
zebra: default ns->info should be default zebra_ns We were not connecting the default zebra_ns to the default ns->info at namespace initialization in zebra. Thus, when we tried to use the `ns_walk_func()` it would ignore the default zebra_ns since there is no pointer to it from the ns struct. Fix this by connecting them in `zebra_ns_init()` and, if the default ns is not found, exit with failure since this is not recoverable. This was found during a crash where we fail to cancel the kernel_read thread at termination (via the `ns_walk_func()`) and then we get a netlink notification trying to use the zns struct that has already been freed. ``` (gdb) bt \#0 0x00007fc1134dc7bb in raise () from /lib/x86_64-linux-gnu/libc.so.6 \#1 0x00007fc1134c7535 in abort () from /lib/x86_64-linux-gnu/libc.so.6 \#2 0x00007fc113996f8f in core_handler (signo=11, siginfo=0x7ffe5429d070, context=<optimized out>) at lib/sigevent.c:254 \#3 <signal handler called> \#4 0x0000561880e15449 in if_lookup_by_index_per_ns (ns=0x0, ifindex=174) at zebra/interface.c:269 \#5 0x0000561880e1642c in if_up (ifp=ifp@entry=0x561883076c50) at zebra/interface.c:1043 \#6 0x0000561880e10723 in netlink_link_change (h=0x7ffe5429d8f0, ns_id=<optimized out>, startup=<optimized out>) at zebra/if_netlink.c:1384 \#7 0x0000561880e17e68 in netlink_parse_info (filter=filter@entry=0x561880e17680 <netlink_information_fetch>, nl=nl@entry=0x561882497238, zns=zns@entry=0x7ffe542a5940, count=count@entry=5, startup=startup@entry=0) at zebra/kernel_netlink.c:932 \#8 0x0000561880e186a5 in kernel_read (thread=<optimized out>) at zebra/kernel_netlink.c:406 \#9 0x00007fc1139a4416 in thread_call (thread=thread@entry=0x7ffe542a5b70) at lib/thread.c:1599 \#10 0x00007fc113974ef8 in frr_run (master=0x5618823c9510) at lib/libfrr.c:1024 \#11 0x0000561880e0b916 in main (argc=8, argv=0x7ffe542a5f78) at zebra/main.c:483 ``` Signed-off-by: Stephen Worley <sworley@cumulusnetworks.com>
2019-11-14 19:43:11 +01:00
zebra_ns_new(default_ns);
dzns = default_ns->info;
*: Handle VRF configuration when VRF gets inactivated and activated A VRF is active only when the corresponding VRF device is present in the kernel. However, when the kernel VRF device is removed, the VRF container in FRR should go away only if there is no user configuration for it. Otherwise, when the VRF device is created again so that the VRF becomes active, FRR cannot take the correct actions. Example configuration for the VRF includes static routes and EVPN L3 VNI. Note that a VRF is currently considered to be "configured" as soon as the operator has issued the "vrf <name>" command in FRR. Such a configured VRF is not deleted upon VRF device removal, it is only made inactive. A VRF that is "configured" can be deleted only upon operator action and only if the VRF has been deactivated i.e., the VRF device removed from the kernel. This is an existing restriction. To implement this change, the VRF disable and delete actions have been modified. Signed-off-by: Vivek Venkatraman <vivek@cumulusnetworks.com> Reviewed-by: Donald Sharp <sharpd@cumulusnetworks.com> Reviewed-by: Mitesh Kanjariya <mkanjariya@cumulusnetworks.com> Reviewed-by: Don Slice <dslice@cumulusnetworks.com> Ticket: CM-18553, CM-18918, CM-10139 Reviewed By: CCR-7022 Testing Done: 1. vrf and pim-vrf automation tests 2. Multiple VRF delete and readd (ifdown, ifup-with-depends) 3. FRR stop, start, restart 4. Networking restart 5. Configuration delete and readd Some of the above tests run in different sequences (manually).
2017-12-02 02:36:37 +01:00
/* Register zebra VRF callbacks, create and activate default VRF. */
zebra_vrf_init();
*: Handle VRF configuration when VRF gets inactivated and activated A VRF is active only when the corresponding VRF device is present in the kernel. However, when the kernel VRF device is removed, the VRF container in FRR should go away only if there is no user configuration for it. Otherwise, when the VRF device is created again so that the VRF becomes active, FRR cannot take the correct actions. Example configuration for the VRF includes static routes and EVPN L3 VNI. Note that a VRF is currently considered to be "configured" as soon as the operator has issued the "vrf <name>" command in FRR. Such a configured VRF is not deleted upon VRF device removal, it is only made inactive. A VRF that is "configured" can be deleted only upon operator action and only if the VRF has been deactivated i.e., the VRF device removed from the kernel. This is an existing restriction. To implement this change, the VRF disable and delete actions have been modified. Signed-off-by: Vivek Venkatraman <vivek@cumulusnetworks.com> Reviewed-by: Donald Sharp <sharpd@cumulusnetworks.com> Reviewed-by: Mitesh Kanjariya <mkanjariya@cumulusnetworks.com> Reviewed-by: Don Slice <dslice@cumulusnetworks.com> Ticket: CM-18553, CM-18918, CM-10139 Reviewed By: CCR-7022 Testing Done: 1. vrf and pim-vrf automation tests 2. Multiple VRF delete and readd (ifdown, ifup-with-depends) 3. FRR stop, start, restart 4. Networking restart 5. Configuration delete and readd Some of the above tests run in different sequences (manually).
2017-12-02 02:36:37 +01:00
/* Default NS is activated */
zebra_ns_enable(ns_id_external, (void **)&dzns);
if (vrf_is_backend_netns()) {
ns_add_hook(NS_NEW_HOOK, zebra_ns_new);
ns_add_hook(NS_ENABLE_HOOK, zebra_ns_enabled);
ns_add_hook(NS_DISABLE_HOOK, zebra_ns_disabled);
ns_add_hook(NS_DELETE_HOOK, zebra_ns_delete);
zebra_ns_notify_parse();
zebra_ns_notify_init();
}
return 0;
}