From c50ca33acf29bb269e8f26be19a8ccd06ab41d3e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Timo=20Ter=C3=A4s?= Date: Mon, 2 Nov 2015 16:50:07 +0200 Subject: [PATCH] zebra: implement per-route mtu handling MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This commits allow overriding MTU using netlink attributes on per-route basis. This is useful for routing protocols that can advertice prefix specific MTUs between routers (e.g. NHRP). Signed-off-by: Timo Teräs (cherry picked from commit b11f3b54c842117e22e2f5cf1561ea34eee8dfcc) --- lib/zclient.c | 8 ++++++ lib/zclient.h | 5 ++++ zebra/connected.c | 6 ++--- zebra/kernel_socket.c | 6 +++-- zebra/redistribute.c | 3 ++- zebra/rib.h | 9 +++++-- zebra/rt_netlink.c | 57 ++++++++++++++++++++++++++++++++++++++----- zebra/zebra_rib.c | 14 ++++++++--- zebra/zebra_vty.c | 2 ++ zebra/zserv.c | 17 +++++++++++++ 10 files changed, 110 insertions(+), 17 deletions(-) diff --git a/lib/zclient.c b/lib/zclient.c index a28db083cb..0259af5971 100644 --- a/lib/zclient.c +++ b/lib/zclient.c @@ -709,6 +709,8 @@ zclient_connect (struct thread *t) * * If ZAPI_MESSAGE_TAG is set, the tag value is written as a 2 byte value * + * If ZAPI_MESSAGE_MTU is set, the mtu value is written as a 4 byte value + * * XXX: No attention paid to alignment. */ int @@ -769,6 +771,8 @@ zapi_ipv4_route (u_char cmd, struct zclient *zclient, struct prefix_ipv4 *p, stream_putl (s, api->metric); if (CHECK_FLAG (api->message, ZAPI_MESSAGE_TAG)) stream_putw (s, api->tag); + if (CHECK_FLAG (api->message, ZAPI_MESSAGE_MTU)) + stream_putl (s, api->mtu); /* Put length at the first point of the stream. */ stream_putw_at (s, 0, stream_get_endp (s)); @@ -834,6 +838,8 @@ zapi_ipv4_route_ipv6_nexthop (u_char cmd, struct zclient *zclient, stream_putl (s, api->metric); if (CHECK_FLAG (api->message, ZAPI_MESSAGE_TAG)) stream_putw (s, api->tag); + if (CHECK_FLAG (api->message, ZAPI_MESSAGE_MTU)) + stream_putl (s, api->mtu); /* Put length at the first point of the stream. */ stream_putw_at (s, 0, stream_get_endp (s)); @@ -898,6 +904,8 @@ zapi_ipv6_route (u_char cmd, struct zclient *zclient, struct prefix_ipv6 *p, stream_putl (s, api->metric); if (CHECK_FLAG (api->message, ZAPI_MESSAGE_TAG)) stream_putw (s, api->tag); + if (CHECK_FLAG (api->message, ZAPI_MESSAGE_MTU)) + stream_putl (s, api->mtu); /* Put length at the first point of the stream. */ stream_putw_at (s, 0, stream_get_endp (s)); diff --git a/lib/zclient.h b/lib/zclient.h index 8926bbbe63..c42d8c9aa3 100644 --- a/lib/zclient.h +++ b/lib/zclient.h @@ -116,6 +116,7 @@ struct zclient #define ZAPI_MESSAGE_DISTANCE 0x04 #define ZAPI_MESSAGE_METRIC 0x08 #define ZAPI_MESSAGE_TAG 0x10 +#define ZAPI_MESSAGE_MTU 0x20 /* Zserv protocol message header */ struct zserv_header @@ -154,6 +155,8 @@ struct zapi_ipv4 u_short tag; + u_int32_t mtu; + vrf_id_t vrf_id; }; @@ -237,6 +240,8 @@ struct zapi_ipv6 u_short tag; + u_int32_t mtu; + vrf_id_t vrf_id; }; diff --git a/zebra/connected.c b/zebra/connected.c index 42b5a8ab9c..3077272221 100644 --- a/zebra/connected.c +++ b/zebra/connected.c @@ -201,10 +201,10 @@ connected_up_ipv4 (struct interface *ifp, struct connected *ifc) return; rib_add_ipv4 (ZEBRA_ROUTE_CONNECT, 0, 0, &p, NULL, NULL, ifp->ifindex, - ifp->vrf_id, RT_TABLE_MAIN, ifp->metric, 0, SAFI_UNICAST); + ifp->vrf_id, RT_TABLE_MAIN, ifp->metric, 0, 0, SAFI_UNICAST); rib_add_ipv4 (ZEBRA_ROUTE_CONNECT, 0, 0, &p, NULL, NULL, ifp->ifindex, - ifp->vrf_id, RT_TABLE_MAIN, ifp->metric, 0, SAFI_MULTICAST); + ifp->vrf_id, RT_TABLE_MAIN, ifp->metric, 0, 0, SAFI_MULTICAST); if (IS_ZEBRA_DEBUG_RIB_DETAILED) zlog_debug ("%u: IF %s IPv4 address add/up, scheduling RIB processing", @@ -379,7 +379,7 @@ connected_up_ipv6 (struct interface *ifp, struct connected *ifc) #endif rib_add_ipv6 (ZEBRA_ROUTE_CONNECT, 0, 0, &p, NULL, ifp->ifindex, ifp->vrf_id, - RT_TABLE_MAIN, ifp->metric, 0, SAFI_UNICAST); + RT_TABLE_MAIN, ifp->metric, 0, 0, SAFI_UNICAST); if (IS_ZEBRA_DEBUG_RIB_DETAILED) zlog_debug ("%u: IF %s IPv6 address down, scheduling RIB processing", diff --git a/zebra/kernel_socket.c b/zebra/kernel_socket.c index 638062d5d6..c5763dd46f 100644 --- a/zebra/kernel_socket.c +++ b/zebra/kernel_socket.c @@ -974,7 +974,8 @@ rtm_read (struct rt_msghdr *rtm) || rtm->rtm_type == RTM_ADD || rtm->rtm_type == RTM_CHANGE) rib_add_ipv4 (ZEBRA_ROUTE_KERNEL, 0, zebra_flags, - &p, &gate.sin.sin_addr, NULL, 0, VRF_DEFAULT, 0, 0, 0, SAFI_UNICAST); + &p, &gate.sin.sin_addr, NULL, 0, VRF_DEFAULT, + 0, 0, 0, 0, SAFI_UNICAST); else rib_delete_ipv4 (ZEBRA_ROUTE_KERNEL, 0, zebra_flags, &p, &gate.sin.sin_addr, 0, VRF_DEFAULT, 0, SAFI_UNICAST); @@ -1016,7 +1017,8 @@ rtm_read (struct rt_msghdr *rtm) || rtm->rtm_type == RTM_ADD || rtm->rtm_type == RTM_CHANGE) rib_add_ipv6 (ZEBRA_ROUTE_KERNEL, 0, zebra_flags, - &p, &gate.sin6.sin6_addr, ifindex, VRF_DEFAULT, 0, 0, 0, SAFI_UNICAST); + &p, &gate.sin6.sin6_addr, ifindex, VRF_DEFAULT, + 0, 0, 0, 0, SAFI_UNICAST); else rib_delete_ipv6 (ZEBRA_ROUTE_KERNEL, 0, zebra_flags, &p, &gate.sin6.sin6_addr, ifindex, VRF_DEFAULT, 0, SAFI_UNICAST); diff --git a/zebra/redistribute.c b/zebra/redistribute.c index 4238d8329e..892bc2af2b 100644 --- a/zebra/redistribute.c +++ b/zebra/redistribute.c @@ -578,7 +578,7 @@ zebra_add_import_table_entry (struct route_node *rn, struct rib *rib, const char rib_add_ipv4(ZEBRA_ROUTE_TABLE, rib->table, 0, &p4, gate, &nhop->src.ipv4, nhop->ifindex, rib->vrf_id, zebrad.rtm_table_default, - rib->metric, + rib->metric, rib->mtu, zebra_import_table_distance[AFI_IP][rib->table], SAFI_UNICAST); } @@ -589,6 +589,7 @@ zebra_add_import_table_entry (struct route_node *rn, struct rib *rib, const char newrib->distance = zebra_import_table_distance[AFI_IP][rib->table]; newrib->flags = rib->flags; newrib->metric = rib->metric; + newrib->mtu = rib->mtu; newrib->table = zebrad.rtm_table_default; newrib->nexthop_num = 0; newrib->uptime = time(NULL); diff --git a/zebra/rib.h b/zebra/rib.h index a2109d8c17..095b9c6f8f 100644 --- a/zebra/rib.h +++ b/zebra/rib.h @@ -64,6 +64,10 @@ struct rib /* Metric */ u_int32_t metric; + /* MTU */ + u_int32_t mtu; + u_int32_t nexthop_mtu; + /* Distance. */ u_char distance; @@ -376,7 +380,7 @@ extern int zebra_check_addr (struct prefix *p); extern int rib_add_ipv4 (int type, u_short instance, int flags, struct prefix_ipv4 *p, struct in_addr *gate, struct in_addr *src, ifindex_t ifindex, vrf_id_t vrf_id, u_int32_t table_id, - u_int32_t, u_char, safi_t); + u_int32_t, u_int32_t, u_char, safi_t); extern int rib_add_ipv4_multipath (struct prefix_ipv4 *, struct rib *, safi_t); @@ -417,7 +421,8 @@ static_delete_ipv4 (safi_t safi, struct prefix *p, struct in_addr *gate, ifindex extern int rib_add_ipv6 (int type, u_short instance, int flags, struct prefix_ipv6 *p, struct in6_addr *gate, ifindex_t ifindex, vrf_id_t vrf_id, - u_int32_t table_id, u_int32_t metric, u_char distance, safi_t safi); + u_int32_t table_id, u_int32_t metric, u_int32_t mtu, + u_char distance, safi_t safi); extern int rib_delete_ipv6 (int type, u_short instance, int flags, struct prefix_ipv6 *p, diff --git a/zebra/rt_netlink.c b/zebra/rt_netlink.c index d93a530317..49dcd1e6de 100644 --- a/zebra/rt_netlink.c +++ b/zebra/rt_netlink.c @@ -866,6 +866,7 @@ netlink_routing_table (struct sockaddr_nl *snl, struct nlmsghdr *h, int index; int table; int metric; + u_int32_t mtu = 0; void *dest; void *gate; @@ -937,6 +938,18 @@ netlink_routing_table (struct sockaddr_nl *snl, struct nlmsghdr *h, if (tb[RTA_PRIORITY]) metric = *(int *) RTA_DATA(tb[RTA_PRIORITY]); + if (tb[RTA_METRICS]) + { + struct rtattr *mxrta[RTAX_MAX+1]; + + memset (mxrta, 0, sizeof mxrta); + netlink_parse_rtattr (mxrta, RTAX_MAX, RTA_DATA(tb[RTA_METRICS]), + RTA_PAYLOAD(tb[RTA_METRICS])); + + if (mxrta[RTAX_MTU]) + mtu = *(u_int32_t *) RTA_DATA(mxrta[RTAX_MTU]); + } + if (rtm->rtm_family == AF_INET) { struct prefix_ipv4 p; @@ -946,7 +959,7 @@ netlink_routing_table (struct sockaddr_nl *snl, struct nlmsghdr *h, if (!tb[RTA_MULTIPATH]) rib_add_ipv4 (ZEBRA_ROUTE_KERNEL, 0, flags, &p, gate, src, index, - vrf_id, table, metric, 0, SAFI_UNICAST); + vrf_id, table, metric, mtu, 0, SAFI_UNICAST); else { /* This is a multipath route */ @@ -962,6 +975,7 @@ netlink_routing_table (struct sockaddr_nl *snl, struct nlmsghdr *h, rib->distance = 0; rib->flags = flags; rib->metric = metric; + rib->mtu = mtu; rib->vrf_id = vrf_id; rib->table = table; rib->nexthop_num = 0; @@ -1014,7 +1028,7 @@ netlink_routing_table (struct sockaddr_nl *snl, struct nlmsghdr *h, p.prefixlen = rtm->rtm_dst_len; rib_add_ipv6 (ZEBRA_ROUTE_KERNEL, 0, flags, &p, gate, index, vrf_id, - table, metric, 0, SAFI_UNICAST); + table, metric, mtu, 0, SAFI_UNICAST); } #endif /* HAVE_IPV6 */ @@ -1051,6 +1065,7 @@ netlink_route_change (struct sockaddr_nl *snl, struct nlmsghdr *h, int index; int table; int metric; + u_int32_t mtu = 0; void *dest; void *gate; @@ -1142,8 +1157,23 @@ netlink_route_change (struct sockaddr_nl *snl, struct nlmsghdr *h, if (tb[RTA_PREFSRC]) src = RTA_DATA (tb[RTA_PREFSRC]); - if (h->nlmsg_type == RTM_NEWROUTE && tb[RTA_PRIORITY]) - metric = *(int *) RTA_DATA(tb[RTA_PRIORITY]); + if (h->nlmsg_type == RTM_NEWROUTE) + { + if (tb[RTA_PRIORITY]) + metric = *(int *) RTA_DATA(tb[RTA_PRIORITY]); + + if (tb[RTA_METRICS]) + { + struct rtattr *mxrta[RTAX_MAX+1]; + + memset (mxrta, 0, sizeof mxrta); + netlink_parse_rtattr (mxrta, RTAX_MAX, RTA_DATA(tb[RTA_METRICS]), + RTA_PAYLOAD(tb[RTA_METRICS])); + + if (mxrta[RTAX_MTU]) + mtu = *(u_int32_t *) RTA_DATA(mxrta[RTAX_MTU]); + } + } if (rtm->rtm_family == AF_INET) { @@ -1164,7 +1194,7 @@ netlink_route_change (struct sockaddr_nl *snl, struct nlmsghdr *h, { if (!tb[RTA_MULTIPATH]) rib_add_ipv4 (ZEBRA_ROUTE_KERNEL, 0, 0, &p, gate, src, index, vrf_id, - table, metric, 0, SAFI_UNICAST); + table, metric, mtu, 0, SAFI_UNICAST); else { /* This is a multipath route */ @@ -1180,6 +1210,7 @@ netlink_route_change (struct sockaddr_nl *snl, struct nlmsghdr *h, rib->distance = 0; rib->flags = 0; rib->metric = metric; + rib->mtu = mtu; rib->vrf_id = vrf_id; rib->table = table; rib->nexthop_num = 0; @@ -1248,7 +1279,7 @@ netlink_route_change (struct sockaddr_nl *snl, struct nlmsghdr *h, if (h->nlmsg_type == RTM_NEWROUTE) rib_add_ipv6 (ZEBRA_ROUTE_KERNEL, 0, 0, &p, gate, index, vrf_id, - table, metric, 0, SAFI_UNICAST); + table, metric, mtu, 0, SAFI_UNICAST); else rib_delete_ipv6 (ZEBRA_ROUTE_KERNEL, 0, zebra_flags, &p, gate, index, vrf_id, table, SAFI_UNICAST); @@ -2079,6 +2110,20 @@ netlink_route_multipath (int cmd, struct prefix *p, struct rib *rib, addattr32(&req.n, sizeof req, RTA_TABLE, rib->table); } + if (rib->mtu || rib->nexthop_mtu) + { + char buf[NL_PKT_BUF_SIZE]; + struct rtattr *rta = (void *) buf; + u_int32_t mtu = rib->mtu; + if (!mtu || (rib->nexthop_mtu && rib->nexthop_mtu < mtu)) + mtu = rib->nexthop_mtu; + rta->rta_type = RTA_METRICS; + rta->rta_len = RTA_LENGTH(0); + rta_addattr_l (rta, NL_PKT_BUF_SIZE, RTAX_MTU, &mtu, sizeof mtu); + addattr_l (&req.n, NL_PKT_BUF_SIZE, RTA_METRICS, RTA_DATA (rta), + RTA_PAYLOAD (rta)); + } + if (discard) { if (cmd == RTM_NEWROUTE) diff --git a/zebra/zebra_rib.c b/zebra/zebra_rib.c index 0dc8086906..195d3633e1 100644 --- a/zebra/zebra_rib.c +++ b/zebra/zebra_rib.c @@ -352,6 +352,7 @@ nexthop_active_ipv4 (struct rib *rib, struct nexthop *nexthop, int set, zebra_deregister_rnh_static_nexthops(rib->vrf_id, nexthop->resolved, top); nexthops_free(nexthop->resolved); nexthop->resolved = NULL; + rib->nexthop_mtu = 0; } /* Skip nexthops that have been filtered out due to route-map */ @@ -545,6 +546,8 @@ nexthop_active_ipv4 (struct rib *rib, struct nexthop *nexthop, int set, } resolved = 1; } + if (resolved && set) + rib->nexthop_mtu = match->mtu; return resolved; } else @@ -2414,7 +2417,7 @@ int rib_add_ipv4 (int type, u_short instance, int flags, struct prefix_ipv4 *p, struct in_addr *gate, struct in_addr *src, ifindex_t ifindex, vrf_id_t vrf_id, u_int32_t table_id, - u_int32_t metric, u_char distance, safi_t safi) + u_int32_t metric, u_int32_t mtu, u_char distance, safi_t safi) { struct rib *rib; struct rib *same = NULL; @@ -2481,6 +2484,7 @@ rib_add_ipv4 (int type, u_short instance, int flags, struct prefix_ipv4 *p, rib->distance = distance; rib->flags = flags; rib->metric = metric; + rib->mtu = mtu; rib->table = table_id; rib->vrf_id = vrf_id; rib->nexthop_num = 0; @@ -2554,9 +2558,10 @@ void _rib_dump (const char * func, ); zlog_debug ( - "%s: metric == %u, distance == %u, flags == %u, status == %u", + "%s: metric == %u, mtu == %u, distance == %u, flags == %u, status == %u", func, rib->metric, + rib->mtu, rib->distance, rib->flags, rib->status @@ -2992,6 +2997,7 @@ static_install_route (afi_t afi, safi_t safi, struct prefix *p, struct static_ro rib->instance = 0; rib->distance = si->distance; rib->metric = 0; + rib->mtu = 0; rib->vrf_id = si->vrf_id; rib->table = si->vrf_id ? (zebra_vrf_lookup(si->vrf_id))->table_id : zebrad.rtm_table_default; rib->nexthop_num = 0; @@ -3342,7 +3348,8 @@ static_delete_ipv4 (safi_t safi, struct prefix *p, struct in_addr *gate, ifindex int rib_add_ipv6 (int type, u_short instance, int flags, struct prefix_ipv6 *p, struct in6_addr *gate, ifindex_t ifindex, vrf_id_t vrf_id, - u_int32_t table_id, u_int32_t metric, u_char distance, safi_t safi) + u_int32_t table_id, u_int32_t metric, u_int32_t mtu, + u_char distance, safi_t safi) { struct rib *rib; struct rib *same = NULL; @@ -3401,6 +3408,7 @@ rib_add_ipv6 (int type, u_short instance, int flags, struct prefix_ipv6 *p, rib->distance = distance; rib->flags = flags; rib->metric = metric; + rib->mtu = mtu; rib->table = table_id; rib->vrf_id = vrf_id; rib->nexthop_num = 0; diff --git a/zebra/zebra_vty.c b/zebra/zebra_vty.c index e9090cbed2..ff2f7c7bc4 100644 --- a/zebra/zebra_vty.c +++ b/zebra/zebra_vty.c @@ -1929,6 +1929,8 @@ vty_show_ip_route_detail (struct vty *vty, struct route_node *rn, int mcast) vty_out (vty, ", distance %u, metric %u", rib->distance, rib->metric); if (rib->tag) vty_out (vty, ", tag %d", rib->tag); + if (rib->mtu) + vty_out (vty, ", mtu %u", rib->mtu); if (rib->vrf_id != VRF_DEFAULT) { zvrf = vrf_info_lookup(rib->vrf_id); diff --git a/zebra/zserv.c b/zebra/zserv.c index a1fe9bc188..84881e14a4 100644 --- a/zebra/zserv.c +++ b/zebra/zserv.c @@ -695,6 +695,8 @@ zsend_redistribute_route (int cmd, struct zserv *client, struct prefix *p, SET_FLAG(zapi_flags, ZAPI_MESSAGE_TAG); stream_putw(s, rib->tag); } + SET_FLAG (zapi_flags, ZAPI_MESSAGE_MTU); + stream_putl (s, rib->mtu); } /* write real message flags value */ @@ -1264,6 +1266,11 @@ zread_ipv4_add (struct zserv *client, u_short length, struct zebra_vrf *zvrf) else rib->tag = 0; + if (CHECK_FLAG (message, ZAPI_MESSAGE_MTU)) + rib->mtu = stream_getl (s); + else + rib->mtu = 0; + /* Table */ rib->table = zvrf->table_id; @@ -1518,6 +1525,11 @@ zread_ipv4_route_ipv6_nexthop_add (struct zserv *client, u_short length, struct else rib->tag = 0; + if (CHECK_FLAG (message, ZAPI_MESSAGE_MTU)) + rib->mtu = stream_getl (s); + else + rib->mtu = 0; + /* Table */ rib->table = zvrf->table_id; @@ -1636,6 +1648,11 @@ zread_ipv6_add (struct zserv *client, u_short length, struct zebra_vrf *zvrf) else rib->tag = 0; + if (CHECK_FLAG (message, ZAPI_MESSAGE_MTU)) + rib->mtu = stream_getl (s); + else + rib->mtu = 0; + /* VRF ID */ rib->vrf_id = zvrf->vrf_id; rib->table = zvrf->table_id;