diff --git a/tests/topotests/high_ecmp/test_high_ecmp.py b/tests/topotests/high_ecmp/test_high_ecmp.py index d28a1ee069..daaf5d4d27 100644 --- a/tests/topotests/high_ecmp/test_high_ecmp.py +++ b/tests/topotests/high_ecmp/test_high_ecmp.py @@ -43,7 +43,6 @@ from lib.topolog import logger def build_topo(tgen): - tgen.add_router("r1") tgen.add_router("r2") diff --git a/zebra/interface.h b/zebra/interface.h index 2c7a079bf4..995dffdc32 100644 --- a/zebra/interface.h +++ b/zebra/interface.h @@ -99,6 +99,9 @@ struct zebra_if { /* back pointer to the interface */ struct interface *ifp; + /* Event timer to batch ICMPv6 join requests */ + struct event *icmpv6_join_timer; + enum zebra_if_flags flags; /* Shutdown configuration. */ diff --git a/zebra/rtadv.c b/zebra/rtadv.c index 8f6713517d..014021dba6 100644 --- a/zebra/rtadv.c +++ b/zebra/rtadv.c @@ -21,6 +21,8 @@ #include "vrf.h" #include "ns.h" #include "lib_errors.h" +#include "wheel.h" +#include "network.h" #include "zebra/interface.h" #include "zebra/rtadv.h" @@ -36,6 +38,19 @@ extern struct zebra_privs_t zserv_privs; static uint32_t interfaces_configured_for_ra_from_bgp; #define RTADV_ADATA_SIZE 1024 +#define PROC_IGMP6 "/proc/net/igmp6" + +/* 32 hex chars + * say for 2001:db8:85a3::8a2e:370:7334 + * hex string is 20010db885a3000000008a2e03707334, + * which is 32 chars long +*/ +#define MAX_V6ADDR_LEN 32 + +#define MAX_INTERFACE_NAME_LEN 25 + +#define MAX_CHARS_PER_LINE 1024 + #if defined(HAVE_RTADV) #include "zebra/rtadv_clippy.c" @@ -58,6 +73,12 @@ DEFINE_MTYPE_STATIC(ZEBRA, ADV_IF, "Advertised Interface"); #define ALLNODE "ff02::1" #define ALLROUTER "ff02::2" +static bool is_interface_in_group(const char *ifname_in, const char *mcast_addr_in); + +#ifdef __linux__ +static bool v6_addr_hex_str_to_in6_addr(const char *hex_str, struct in6_addr *addr); +#endif + /* adv list node */ struct adv_if { char name[IFNAMSIZ]; @@ -462,6 +483,60 @@ no_more_opts: zif->ra_sent++; } +static void start_icmpv6_join_timer(struct event *thread) +{ + struct interface *ifp = EVENT_ARG(thread); + struct zebra_if *zif = ifp->info; + struct zebra_vrf *zvrf = rtadv_interface_get_zvrf(ifp); + + if (if_join_all_router(zvrf->rtadv.sock, ifp)) { + /*Wait random amount of time between 1 ms to ICMPV6_JOIN_TIMER_EXP_MS ms*/ + int random_ms = (frr_weak_random() % ICMPV6_JOIN_TIMER_EXP_MS) + 1; + event_add_timer_msec(zrouter.master, start_icmpv6_join_timer, ifp, random_ms, + &zif->icmpv6_join_timer); + } + + if (IS_ZEBRA_DEBUG_EVENT) + zlog_debug("Processing ICMPv6 join on interface %s(%s:%u)", ifp->name, + ifp->vrf->name, ifp->ifindex); +} + +void process_rtadv(void *arg) +{ + struct interface *ifp = arg; + struct zebra_if *zif = ifp->info; + struct zebra_vrf *zvrf = rtadv_interface_get_zvrf(ifp); + + if (zif->rtadv.inFastRexmit && zif->rtadv.UseFastRexmit) { + if (--zif->rtadv.NumFastReXmitsRemain <= 0) + zif->rtadv.inFastRexmit = 0; + + if (IS_ZEBRA_DEBUG_SEND) + zlog_debug("Doing fast RA Rexmit on interface %s(%s:%u)", ifp->name, + ifp->vrf->name, ifp->ifindex); + + rtadv_send_packet(zvrf->rtadv.sock, ifp, RA_ENABLE); + } else { + zif->rtadv.AdvIntervalTimer -= RTADV_TIMER_WHEEL_PERIOD_MS; + /* Wait atleast AdvIntervalTimer time before sending next RA + * AdvIntervalTimer can go negative, when ra_wheel timer expiry + * interval is not a multiple of AdvIntervalTimer. Say ra_wheel + * expiry time is 10 ms and, AdvIntervalTimer == 1005 ms. Allowing + * AdvIntervalTimer to go negative and checking, gurantees that + * we have waited Wait atleast AdvIntervalTimer, so RA can be + * sent now. + */ + if (zif->rtadv.AdvIntervalTimer <= 0) { + zif->rtadv.AdvIntervalTimer = zif->rtadv.MaxRtrAdvInterval; + if (IS_ZEBRA_DEBUG_SEND) + zlog_debug("Doing regular RA Rexmit on interface %s(%s:%u)", + ifp->name, ifp->vrf->name, ifp->ifindex); + + rtadv_send_packet(zvrf->rtadv.sock, ifp, RA_ENABLE); + } + } +} + static void rtadv_timer(struct event *thread) { struct zebra_vrf *zvrf = EVENT_ARG(thread); @@ -1261,7 +1336,13 @@ static void rtadv_start_interface_events(struct zebra_vrf *zvrf, if (adv_if != NULL) return; /* Already added */ - if_join_all_router(zvrf->rtadv.sock, zif->ifp); + if (if_join_all_router(zvrf->rtadv.sock, zif->ifp)) { + /*Failed to join on 1st attempt, wait random amount of time between 1 ms + to ICMPV6_JOIN_TIMER_EXP_MS ms*/ + int random_ms = (frr_weak_random() % ICMPV6_JOIN_TIMER_EXP_MS) + 1; + event_add_timer_msec(zrouter.master, start_icmpv6_join_timer, zif->ifp, random_ms, + &zif->icmpv6_join_timer); + } if (adv_if_list_count(&zvrf->rtadv.adv_if) == 1) rtadv_event(zvrf, RTADV_START, 0); @@ -1281,6 +1362,8 @@ void ipv6_nd_suppress_ra_set(struct interface *ifp, if (status == RA_SUPPRESS) { /* RA is currently enabled */ if (zif->rtadv.AdvSendAdvertisements) { + /* Try to delete from the ra wheel */ + wheel_remove_item(zrouter.ra_wheel, ifp); rtadv_send_packet(zvrf->rtadv.sock, ifp, RA_SUPPRESS); zif->rtadv.AdvSendAdvertisements = 0; zif->rtadv.AdvIntervalTimer = 0; @@ -1311,6 +1394,7 @@ void ipv6_nd_suppress_ra_set(struct interface *ifp, RTADV_NUM_FAST_REXMITS; } + wheel_add_item(zrouter.ra_wheel, ifp); rtadv_start_interface_events(zvrf, zif); } } @@ -1438,6 +1522,12 @@ void rtadv_stop_ra(struct interface *ifp) zif = ifp->info; zvrf = rtadv_interface_get_zvrf(ifp); + /*Try to delete from ra wheels */ + wheel_remove_item(zrouter.ra_wheel, ifp); + + /*Turn off event for ICMPv6 join*/ + EVENT_OFF(zif->icmpv6_join_timer); + if (zif->rtadv.AdvSendAdvertisements) rtadv_send_packet(zvrf->rtadv.sock, ifp, RA_SUPPRESS); } @@ -1730,8 +1820,7 @@ static void rtadv_event(struct zebra_vrf *zvrf, enum rtadv_event event, int val) case RTADV_START: event_add_read(zrouter.master, rtadv_read, zvrf, rtadv->sock, &rtadv->ra_read); - event_add_event(zrouter.master, rtadv_timer, zvrf, 0, - &rtadv->ra_timer); + break; case RTADV_STOP: EVENT_OFF(rtadv->ra_timer); @@ -1862,24 +1951,114 @@ void rtadv_cmd_init(void) install_element(VIEW_NODE, &show_ipv6_nd_ra_if_cmd); } +#ifdef __linux__ +static bool v6_addr_hex_str_to_in6_addr(const char *hex_str, struct in6_addr *addr) +{ + size_t str_len = strlen(hex_str); + + if (str_len != MAX_V6ADDR_LEN) { + flog_err_sys(EC_LIB_SYSTEM_CALL, "Invalid V6 addr hex len %zu", str_len); + return false; + } + + for (int i = 0; i < 16; i++) { + char byte_str[3] = { hex_str[i * 2], hex_str[i * 2 + 1], '\0' }; + addr->s6_addr[i] = (uint8_t)strtol(byte_str, NULL, 16); + } + + return true; +} +#endif + +/* Checks if an interface is part of a multicast group, no null check for input strings */ +static bool is_interface_in_group(const char *ifname_in, const char *mcast_addr_in) +{ +#ifdef __linux__ + char line[MAX_CHARS_PER_LINE]; + char ifname_found[MAX_INTERFACE_NAME_LEN]; + char mcast_addr_found_hex_str[MAX_V6ADDR_LEN + 5]; + struct in6_addr mcast_addr_in_bin; + struct in6_addr mcast_addr_found_bin; + int if_index = -1; + int ifname_in_len = 0; + int ifname_found_len = 0; + + FILE *fp = fopen(PROC_IGMP6, "r"); + + if (!fp) { + flog_err_sys(EC_LIB_SYSTEM_CALL, "Failed to open %s", PROC_IGMP6); + return false; + } + + /* Convert input IPv6 address to binary */ + if (inet_pton(AF_INET6, mcast_addr_in, &mcast_addr_in_bin) != 1) { + flog_err_sys(EC_LIB_SYSTEM_CALL, "Invalid IPv6 address format %s", mcast_addr_in); + fclose(fp); + return false; + } + + /* Convert binary to hex format */ + while (fgets(line, sizeof(line), fp)) { + sscanf(line, "%d %s %s", &if_index, ifname_found, mcast_addr_found_hex_str); + + ifname_in_len = strlen(ifname_in); + ifname_found_len = strlen(ifname_found); + if (ifname_in_len != ifname_found_len) + continue; + + /* Locate 'x' if "0x" is present or not, if present go past that */ + const char *clean_mcast_addr_hex_str = strchr(mcast_addr_found_hex_str, 'x'); + if (clean_mcast_addr_hex_str) { + clean_mcast_addr_hex_str++; + } else { + clean_mcast_addr_hex_str = mcast_addr_found_hex_str; + } + + if (!v6_addr_hex_str_to_in6_addr(clean_mcast_addr_hex_str, &mcast_addr_found_bin)) + continue; + + if ((!strncmp(ifname_in, ifname_found, ifname_in_len)) && + (!IPV6_ADDR_CMP(&mcast_addr_in_bin, &mcast_addr_found_bin))) { + fclose(fp); + /* Already joined */ + return true; + } + } + + fclose(fp); + +#endif + + /* Not joined */ + return false; +} + static int if_join_all_router(int sock, struct interface *ifp) { int ret; struct ipv6_mreq mreq; + if (is_interface_in_group(ifp->name, ALLROUTER)) + /* Interface is already part of the group, so return sucess */ + return 0; + memset(&mreq, 0, sizeof(mreq)); inet_pton(AF_INET6, ALLROUTER, &mreq.ipv6mr_multiaddr); mreq.ipv6mr_interface = ifp->ifindex; ret = setsockopt(sock, IPPROTO_IPV6, IPV6_JOIN_GROUP, (char *)&mreq, sizeof(mreq)); - if (ret < 0) + + if (ret < 0) { flog_err_sys(EC_LIB_SOCKET, "%s(%u): Failed to join group, socket %u error %s", ifp->name, ifp->ifindex, sock, safe_strerror(errno)); + return ret; + } + if (IS_ZEBRA_DEBUG_EVENT) zlog_debug( "%s(%s:%u): Join All-Routers multicast group, socket %u", diff --git a/zebra/rtadv.h b/zebra/rtadv.h index 0983ea578f..73d737ce41 100644 --- a/zebra/rtadv.h +++ b/zebra/rtadv.h @@ -460,6 +460,7 @@ extern void zebra_interface_radv_enable(ZAPI_HANDLER_ARGS); extern uint32_t rtadv_get_interfaces_configured_from_bgp(void); extern bool rtadv_compiled_in(void); extern void rtadv_init(void); +extern void process_rtadv(void *arg); #ifdef __cplusplus } diff --git a/zebra/zebra_router.c b/zebra/zebra_router.c index ae2910af41..15b7e317c9 100644 --- a/zebra/zebra_router.c +++ b/zebra/zebra_router.c @@ -17,6 +17,7 @@ #include "zebra/zebra_tc.h" #include "debug.h" #include "zebra_script.h" +#include "wheel.h" DEFINE_MTYPE_STATIC(ZEBRA, RIB_TABLE_INFO, "RIB table info"); DEFINE_MTYPE_STATIC(ZEBRA, ZEBRA_RT_TABLE, "Zebra VRF table"); @@ -220,10 +221,22 @@ uint32_t zebra_router_get_next_sequence(void) memory_order_relaxed); } +static inline unsigned int interface_hash_key(const void *arg) +{ + const struct interface *ifp = arg; + + return ifp->ifindex; +} + void zebra_router_terminate(void) { struct zebra_router_table *zrt, *tmp; + if (zrouter.ra_wheel) { + wheel_delete(zrouter.ra_wheel); + zrouter.ra_wheel = NULL; + } + EVENT_OFF(zrouter.t_rib_sweep); RB_FOREACH_SAFE (zrt, zebra_router_table_head, &zrouter.tables, tmp) @@ -278,6 +291,11 @@ void zebra_router_init(bool asic_offload, bool notify_on_ack, zrouter.nhg_keep = ZEBRA_DEFAULT_NHG_KEEP_TIMER; + /*Init V6 RA batching stuffs*/ + zrouter.ra_wheel = wheel_init(zrouter.master, RTADV_TIMER_WHEEL_PERIOD_MS, + RTADV_TIMER_WHEEL_SLOTS_NO, interface_hash_key, process_rtadv, + NULL); + zebra_vxlan_init(); zebra_mlag_init(); zebra_neigh_init(); diff --git a/zebra/zebra_router.h b/zebra/zebra_router.h index 28c4cf0790..d357994ec2 100644 --- a/zebra/zebra_router.h +++ b/zebra/zebra_router.h @@ -112,12 +112,19 @@ struct zebra_mlag_info { struct event *t_write; }; +#define RTADV_TIMER_WHEEL_PERIOD_MS 1000 +#define RTADV_TIMER_WHEEL_SLOTS_NO 100 +#define ICMPV6_JOIN_TIMER_EXP_MS 100 + struct zebra_router { atomic_bool in_shutdown; /* Thread master */ struct event_loop *master; + /* Wheel to process V6 RA update */ + struct timer_wheel *ra_wheel; + /* Lists of clients who have connected to us */ struct list *client_list;