vrrpd: add initial macvlan support

* Search for macvlan interfaces with the appropriate name and MAC
  address when starting up a new VRRP instance
* Split VRRP socket into two; one for Tx, one for Rx
* Bind Tx socket to the macvlan subinterface so our VRRP advertisements
  go out with the correct MAC address
* Send ARP requests from this macvlan subinterface
* Improve error messaging

Signed-off-by: Quentin Young <qlyoung@cumulusnetworks.com>
This commit is contained in:
Quentin Young 2019-01-07 19:02:53 +00:00
parent bb54fa3a00
commit dad18a2fd7
6 changed files with 244 additions and 36 deletions

View file

@ -389,6 +389,34 @@ struct interface *if_lookup_prefix(struct prefix *prefix, vrf_id_t vrf_id)
return NULL; return NULL;
} }
size_t if_lookup_by_hwaddr(const uint8_t *hw_addr, size_t addrsz,
struct interface ***result, vrf_id_t vrf_id)
{
struct vrf *vrf = vrf_lookup_by_id(vrf_id);
struct list *rs = list_new();
struct interface *ifp;
FOR_ALL_INTERFACES (vrf, ifp) {
if (ifp->hw_addr_len == (int)addrsz
&& !memcmp(hw_addr, ifp->hw_addr, addrsz))
listnode_add(rs, ifp);
}
if (rs->count) {
*result = XCALLOC(MTYPE_TMP,
sizeof(struct interface *) * rs->count);
list_to_array(rs, (void **)*result, rs->count);
}
int count = rs->count;
list_delete(&rs);
return count;
}
/* Get interface by name if given name interface doesn't exist create /* Get interface by name if given name interface doesn't exist create
one. */ one. */
struct interface *if_get_by_name(const char *name, vrf_id_t vrf_id) struct interface *if_get_by_name(const char *name, vrf_id_t vrf_id)

View file

@ -482,6 +482,8 @@ extern struct connected *if_lookup_address(void *matchaddr, int family,
vrf_id_t vrf_id); vrf_id_t vrf_id);
extern struct interface *if_lookup_prefix(struct prefix *prefix, extern struct interface *if_lookup_prefix(struct prefix *prefix,
vrf_id_t vrf_id); vrf_id_t vrf_id);
size_t if_lookup_by_hwaddr(const uint8_t *hw_addr, size_t addrsz,
struct interface ***result, vrf_id_t vrf_id);
/* These 3 functions are to be used when the ifname argument is terminated /* These 3 functions are to be used when the ifname argument is terminated
by a '\0' character: */ by a '\0' character: */

View file

@ -27,6 +27,7 @@
#include "lib/network.h" #include "lib/network.h"
#include "lib/prefix.h" #include "lib/prefix.h"
#include "lib/sockopt.h" #include "lib/sockopt.h"
#include "lib/sockunion.h"
#include "lib/vrf.h" #include "lib/vrf.h"
#include "vrrp.h" #include "vrrp.h"
@ -93,6 +94,13 @@ static void vrrp_recalculate_timers(struct vrrp_router *r)
/* /*
* Determines if a VRRP router is the owner of the specified address. * Determines if a VRRP router is the owner of the specified address.
* *
* The determining factor for whether an interface is the address owner is
* simply whether the address is assigned to the VRRP subinterface by someone
* other than vrrpd.
*
* This function should always return the correct answer regardless of
* master/backup status.
*
* vr * vr
* Virtual Router * Virtual Router
* *
@ -104,20 +112,30 @@ static bool vrrp_is_owner(struct vrrp_vrouter *vr, struct ipaddr *addr)
struct prefix *p; struct prefix *p;
struct prefix_ipv4 p4; struct prefix_ipv4 p4;
struct prefix_ipv6 p6; struct prefix_ipv6 p6;
struct vrrp_router *r;
if (IS_IPADDR_V4(addr)) { if (IS_IPADDR_V4(addr)) {
p4.family = AF_INET; p4.family = AF_INET;
p4.prefixlen = IPV4_MAX_BITLEN; p4.prefixlen = IPV4_MAX_BITLEN;
p4.prefix = addr->ipaddr_v4; p4.prefix = addr->ipaddr_v4;
p = (struct prefix *)&p4; p = (struct prefix *)&p4;
r = vr->v4;
} else { } else {
p6.family = AF_INET6; p6.family = AF_INET6;
p6.prefixlen = IPV6_MAX_BITLEN; p6.prefixlen = IPV6_MAX_BITLEN;
memcpy(&p6.prefix, &addr->ipaddr_v6, sizeof(struct in6_addr)); memcpy(&p6.prefix, &addr->ipaddr_v6, sizeof(struct in6_addr));
p = (struct prefix *)&p6; p = (struct prefix *)&p6;
r = vr->v6;
} }
return !!connected_lookup_prefix_exact(vr->ifp, p); bool have_addr = !!connected_lookup_prefix_exact(r->mvl_ifp, p);
/* did we assign it? */
/* FIXME: this check is wrong, we need a flag to set when we install
* addresses on an interface when assuming master status; then
* ownership status is determined by (have_addr && !flag) in master
* state */
return have_addr;
} }
/* Configuration controllers ----------------------------------------------- */ /* Configuration controllers ----------------------------------------------- */
@ -176,20 +194,70 @@ static struct vrrp_router *vrrp_router_create(struct vrrp_vrouter *vr,
struct vrrp_router *r = XCALLOC(MTYPE_TMP, sizeof(struct vrrp_router)); struct vrrp_router *r = XCALLOC(MTYPE_TMP, sizeof(struct vrrp_router));
r->family = family; r->family = family;
r->sock = -1; r->sock_rx = -1;
r->sock_tx = -1;
r->vr = vr; r->vr = vr;
r->addrs = list_new(); r->addrs = list_new();
r->priority = vr->priority; r->priority = vr->priority;
r->fsm.state = VRRP_STATE_INITIALIZE; r->fsm.state = VRRP_STATE_INITIALIZE;
vrrp_mac_set(&r->vmac, family == AF_INET6, vr->vrid); vrrp_mac_set(&r->vmac, family == AF_INET6, vr->vrid);
/* Search for existing interface with computed MAC address */
struct interface **ifps;
size_t ifps_cnt = if_lookup_by_hwaddr(
r->vmac.octet, sizeof(r->vmac.octet), &ifps, VRF_DEFAULT);
/*
* Filter to only those interfaces whose names begin with VRRP
* interface name. E.g. if this VRRP instance was configured on eth0,
* then we filter the list to only keep interfaces matching ^eth0.*
*
* If there are still multiple interfaces we just select the first one,
* as it should be functionally identical to the others.
*/
unsigned int candidates = 0;
struct interface *selection = NULL;
for (unsigned int i = 0; i < ifps_cnt; i++) {
zlog_info("Found VRRP interface %s", ifps[i]->name);
if (strncmp(ifps[i]->name, r->vr->ifp->name,
strlen(r->vr->ifp->name)))
ifps[i] = NULL;
else {
selection = selection ? selection : ifps[i];
candidates++;
}
}
XFREE(MTYPE_TMP, ifps);
char ethstr[ETHER_ADDR_STRLEN];
prefix_mac2str(&r->vmac, ethstr, sizeof(ethstr));
assert(!!selection == !!candidates);
if (candidates == 0)
zlog_warn(VRRP_LOGPFX VRRP_LOGPFX_VRID
"No interface found w/ MAC %s; using default",
r->vr->vrid, ethstr);
else if (candidates > 1)
zlog_warn(VRRP_LOGPFX VRRP_LOGPFX_VRID
"Multiple VRRP interfaces found; using %s",
r->vr->vrid, selection->name);
else
zlog_info(VRRP_LOGPFX VRRP_LOGPFX_VRID "Selected %s",
r->vr->vrid, selection->name);
r->mvl_ifp = selection ? selection : r->vr->ifp;
return r; return r;
} }
static void vrrp_router_destroy(struct vrrp_router *r) static void vrrp_router_destroy(struct vrrp_router *r)
{ {
if (r->sock >= 0) if (r->sock_rx >= 0)
close(r->sock); close(r->sock_rx);
if (r->sock_tx >= 0)
close(r->sock_tx);
/* FIXME: also delete list elements */ /* FIXME: also delete list elements */
list_delete(&r->addrs); list_delete(&r->addrs);
XFREE(MTYPE_TMP, r); XFREE(MTYPE_TMP, r);
@ -263,7 +331,7 @@ static void vrrp_send_advertisement(struct vrrp_router *r)
r->family == AF_INET ? VRRP_MCASTV4_GROUP_STR : VRRP_MCASTV6_GROUP_STR; r->family == AF_INET ? VRRP_MCASTV4_GROUP_STR : VRRP_MCASTV6_GROUP_STR;
str2sockunion(group, &dest); str2sockunion(group, &dest);
ssize_t sent = sendto(r->sock, pkt, (size_t)pktlen, 0, &dest.sa, ssize_t sent = sendto(r->sock_tx, pkt, (size_t)pktlen, 0, &dest.sa,
sockunion_sizeof(&dest)); sockunion_sizeof(&dest));
XFREE(MTYPE_TMP, pkt); XFREE(MTYPE_TMP, pkt);
@ -308,7 +376,7 @@ static int vrrp_read(struct thread *thread)
m.msg_control = control; m.msg_control = control;
m.msg_controllen = sizeof(control); m.msg_controllen = sizeof(control);
nbytes = recvmsg(r->sock, &m, MSG_DONTWAIT); nbytes = recvmsg(r->sock_rx, &m, MSG_DONTWAIT);
if ((nbytes < 0 && ERRNO_IO_RETRY(errno))) { if ((nbytes < 0 && ERRNO_IO_RETRY(errno))) {
resched = true; resched = true;
@ -342,38 +410,123 @@ done:
memset(r->ibuf, 0x00, sizeof(r->ibuf)); memset(r->ibuf, 0x00, sizeof(r->ibuf));
if (resched) if (resched)
thread_add_read(master, vrrp_read, r, r->sock, &r->t_read); thread_add_read(master, vrrp_read, r, r->sock_rx, &r->t_read);
return 0; return 0;
} }
/* /*
* Create Virtual Router listen socket and join it to the VRRP multicast group. * Finds the first connected address of the appropriate family on a VRRP
* router's interface and binds the Tx socket of the VRRP router to that
* address.
*
* r
* VRRP router to operate on
*
* Returns:
* 0 on success
* -1 on failure
*/
static int vrrp_bind_to_primary_connected(struct vrrp_router *r)
{
char ipstr[INET6_ADDRSTRLEN];
struct listnode *ln;
struct connected *c = NULL;
for (ALL_LIST_ELEMENTS_RO(r->mvl_ifp->connected, ln, c))
if (c->address->family == r->family)
break;
if (c == NULL) {
zlog_err(VRRP_LOGPFX VRRP_LOGPFX_VRID
"Failed to find %s address to bind on %s",
r->vr->vrid, family2str(r->family), r->mvl_ifp->name);
return -1;
}
struct sockaddr_in sa4 = {
.sin_family = AF_INET,
.sin_addr = c->address->u.prefix4,
};
struct sockaddr_in6 sa6 = {
.sin6_family = AF_INET6,
.sin6_addr = c->address->u.prefix6,
};
struct sockaddr *sa = r->family == AF_INET ? (struct sockaddr *)&sa4
: (struct sockaddr *)&sa6;
sockopt_reuseaddr(r->sock_tx);
if (bind(r->sock_tx, sa, sizeof(struct sockaddr)) < 0) {
zlog_err(
VRRP_LOGPFX VRRP_LOGPFX_VRID
"Failed to bind Tx socket to primary IP address %s: %s",
r->vr->vrid,
inet_ntop(r->family,
(const void *)&c->address->u.prefix, ipstr,
sizeof(ipstr)),
safe_strerror(errno));
return -1;
} else {
zlog_info(VRRP_LOGPFX VRRP_LOGPFX_VRID
"Bound Tx socket to primary IP address %s",
r->vr->vrid,
inet_ntop(r->family,
(const void *)&c->address->u.prefix, ipstr,
sizeof(ipstr)));
}
return 0;
}
/*
* Creates and configures VRRP router sockets.
*
* This function:
* - Creates two sockets, one for Tx, one for Rx
* - Joins the Rx socket to the appropriate VRRP multicast group
* - Sets the Tx socket to set the TTL (v4) or Hop Limit (v6) field to 255 for
* all transmitted IPvX packets
* - Requests the kernel to deliver IPv6 header values needed to validate VRRP
* packets
* - FIXME: Binds the Tx socket to the first address on the macvlan
* subinterface.
*
* If any of the above fail, the sockets are closed. The only exception is if
* the TTL / Hop Limit settings fail; these are logged, but configuration
* proceeds.
* *
* The first connected address on the Virtual Router's interface is used as the * The first connected address on the Virtual Router's interface is used as the
* interface address. * interface address.
* *
* r * r
* VRRP Router for which to create listen socket * VRRP Router for which to create listen socket
*
* Returns:
* 0 on success
* -1 on failure
*/ */
static int vrrp_socket(struct vrrp_router *r) static int vrrp_socket(struct vrrp_router *r)
{ {
int ret; int ret;
bool failed = false; bool failed = false;
struct connected *c;
frr_elevate_privs(&vrrp_privs) { frr_elevate_privs(&vrrp_privs)
r->sock = socket(r->family, SOCK_RAW, IPPROTO_VRRP); {
r->sock_rx = socket(r->family, SOCK_RAW, IPPROTO_VRRP);
r->sock_tx = socket(r->family, SOCK_RAW, IPPROTO_VRRP);
} }
if (r->sock < 0) { if (r->sock_rx < 0 || r->sock_tx < 0) {
const char *rxtx = r->sock_rx < 0 ? "Rx" : "Tx";
zlog_warn(VRRP_LOGPFX VRRP_LOGPFX_VRID zlog_warn(VRRP_LOGPFX VRRP_LOGPFX_VRID
"Can't create %s VRRP socket", "Can't create %s VRRP %s socket",
r->vr->vrid, r->family == AF_INET ? "v4" : "v6"); r->vr->vrid, family2str(r->family), rxtx);
failed = true; failed = true;
goto done; goto done;
} }
/* Configure sockets */
if (!listcount(r->vr->ifp->connected)) { if (!listcount(r->vr->ifp->connected)) {
zlog_warn( zlog_warn(
VRRP_LOGPFX VRRP_LOGPFX_VRID VRRP_LOGPFX VRRP_LOGPFX_VRID
@ -384,8 +537,9 @@ static int vrrp_socket(struct vrrp_router *r)
} }
if (r->family == AF_INET) { if (r->family == AF_INET) {
/* Set Tx socket to always Tx with TTL set to 255 */
int ttl = 255; int ttl = 255;
ret = setsockopt(r->sock, IPPROTO_IP, IP_MULTICAST_TTL, &ttl, ret = setsockopt(r->sock_tx, IPPROTO_IP, IP_MULTICAST_TTL, &ttl,
sizeof(ttl)); sizeof(ttl));
if (ret < 0) { if (ret < 0) {
zlog_warn( zlog_warn(
@ -394,22 +548,22 @@ static int vrrp_socket(struct vrrp_router *r)
r->vr->vrid); r->vr->vrid);
} }
c = listhead(r->vr->ifp->connected)->data; /* Join Rx socket to VRRP IPv4 multicast group */
struct connected *c = listhead(r->vr->ifp->connected)->data;
struct in_addr v4 = c->address->u.prefix4; struct in_addr v4 = c->address->u.prefix4;
ret = setsockopt_ipv4_multicast(r->sock_rx, IP_ADD_MEMBERSHIP,
/* Join VRRP IPv4 multicast group */ v4, htonl(VRRP_MCASTV4_GROUP),
ret = setsockopt_ipv4_multicast(r->sock, IP_ADD_MEMBERSHIP, v4,
htonl(VRRP_MCASTV4_GROUP),
r->vr->ifp->ifindex); r->vr->ifp->ifindex);
} else if (r->family == AF_INET6) { } else if (r->family == AF_INET6) {
ret = setsockopt_ipv6_multicast_hops(r->sock, 255); /* Always transmit IPv6 packets with hop limit set to 255 */
ret = setsockopt_ipv6_multicast_hops(r->sock_tx, 255);
if (ret < 0) { if (ret < 0) {
zlog_warn( zlog_warn(
VRRP_LOGPFX VRRP_LOGPFX_VRID VRRP_LOGPFX VRRP_LOGPFX_VRID
"Failed to set outgoing multicast hop count to 255; RFC 5798 compliant implementations will drop our packets", "Failed to set outgoing multicast hop count to 255; RFC 5798 compliant implementations will drop our packets",
r->vr->vrid); r->vr->vrid);
} }
ret = setsockopt_ipv6_hoplimit(r->sock, 1); ret = setsockopt_ipv6_hoplimit(r->sock_rx, 1);
if (ret < 0) { if (ret < 0) {
zlog_warn(VRRP_LOGPFX VRRP_LOGPFX_VRID zlog_warn(VRRP_LOGPFX VRRP_LOGPFX_VRID
"Failed to request IPv6 Hop Limit delivery", "Failed to request IPv6 Hop Limit delivery",
@ -420,10 +574,11 @@ static int vrrp_socket(struct vrrp_router *r)
/* Join VRRP IPv6 multicast group */ /* Join VRRP IPv6 multicast group */
struct ipv6_mreq mreq; struct ipv6_mreq mreq;
inet_pton(AF_INET6, VRRP_MCASTV6_GROUP_STR, &mreq.ipv6mr_multiaddr); inet_pton(AF_INET6, VRRP_MCASTV6_GROUP_STR,
&mreq.ipv6mr_multiaddr);
mreq.ipv6mr_interface = r->vr->ifp->ifindex; mreq.ipv6mr_interface = r->vr->ifp->ifindex;
ret = setsockopt(r->sock, IPPROTO_IPV6, IPV6_JOIN_GROUP, &mreq, ret = setsockopt(r->sock_rx, IPPROTO_IPV6, IPV6_JOIN_GROUP,
sizeof(mreq)); &mreq, sizeof(mreq));
} }
if (ret < 0) { if (ret < 0) {
@ -431,15 +586,29 @@ static int vrrp_socket(struct vrrp_router *r)
"Failed to join VRRP %s multicast group", "Failed to join VRRP %s multicast group",
r->vr->vrid, family2str(r->family)); r->vr->vrid, family2str(r->family));
failed = true; failed = true;
goto done;
} else {
zlog_info(VRRP_LOGPFX VRRP_LOGPFX_VRID
"Joined %s VRRP multicast group",
r->vr->vrid, family2str(r->family));
} }
/* Bind Tx socket to link-local address */
if (vrrp_bind_to_primary_connected(r) < 0) {
failed = true;
goto done;
}
done: done:
ret = 0; ret = 0;
if (failed) { if (failed) {
zlog_warn(VRRP_LOGPFX VRRP_LOGPFX_VRID zlog_warn(VRRP_LOGPFX VRRP_LOGPFX_VRID
"Failed to initialize VRRP %s router", "Failed to initialize VRRP %s router",
r->vr->vrid, family2str(r->family)); r->vr->vrid, family2str(r->family));
if (r->sock >= 0) if (r->sock_rx >= 0)
close(r->sock); close(r->sock_rx);
if (r->sock_tx >= 0)
close(r->sock_tx);
ret = -1; ret = -1;
} }
@ -584,14 +753,14 @@ static int vrrp_startup(struct vrrp_router *r)
vrrp_garp_init(); vrrp_garp_init();
/* Create socket */ /* Create socket */
if (r->sock < 0) { if (r->sock_rx < 0 || r->sock_tx < 0) {
int ret = vrrp_socket(r); int ret = vrrp_socket(r);
if (ret < 0 || r->sock < 0) if (ret < 0 || r->sock_tx < 0 || r->sock_rx < 0)
return ret; return ret;
} }
/* Schedule listener */ /* Schedule listener */
thread_add_read(master, vrrp_read, r, r->sock, &r->t_read); thread_add_read(master, vrrp_read, r, r->sock_rx, &r->t_read);
/* Configure effective priority */ /* Configure effective priority */
struct ipaddr *primary = (struct ipaddr *)listhead(r->addrs)->data; struct ipaddr *primary = (struct ipaddr *)listhead(r->addrs)->data;

View file

@ -64,8 +64,13 @@ struct vrrp_router {
*/ */
bool is_active; bool is_active;
/* Socket */ /* Rx socket: Rx from parent of mvl_ifp */
int sock; int sock_rx;
/* Tx socket; Tx from mvl_ifp */
int sock_tx;
/* macvlan interface */
struct interface *mvl_ifp;
/* Socket read buffer */ /* Socket read buffer */
uint8_t ibuf[IP_MAXPACKET]; uint8_t ibuf[IP_MAXPACKET];

View file

@ -114,7 +114,7 @@ static ssize_t vrrp_build_garp(uint8_t *buf, struct interface *ifp,
void vrrp_garp_send(struct vrrp_router *r, struct in_addr *v4) void vrrp_garp_send(struct vrrp_router *r, struct in_addr *v4)
{ {
struct interface *ifp = r->vr->ifp; struct interface *ifp = r->mvl_ifp;
uint8_t garpbuf[GARP_BUFFER_SIZE]; uint8_t garpbuf[GARP_BUFFER_SIZE];
ssize_t garpbuf_len; ssize_t garpbuf_len;
ssize_t sent_len; ssize_t sent_len;
@ -149,7 +149,7 @@ void vrrp_garp_send_all(struct vrrp_router *r)
{ {
assert(r->family == AF_INET); assert(r->family == AF_INET);
struct interface *ifp = r->vr->ifp; struct interface *ifp = r->mvl_ifp;
/* If the interface doesn't support ARP, don't try sending */ /* If the interface doesn't support ARP, don't try sending */
if (ifp->flags & IFF_NOARP) { if (ifp->flags & IFF_NOARP) {

View file

@ -149,7 +149,9 @@ ssize_t vrrp_parse_datagram(int family, struct msghdr *m, size_t read,
ntohs(ip->ip_len), read); ntohs(ip->ip_len), read);
/* TTL check */ /* TTL check */
VRRP_PKT_VCHECK(ip->ip_ttl == 255, "IPv4 TTL is not 255"); VRRP_PKT_VCHECK(ip->ip_ttl == 255,
"IPv4 TTL is %" PRIu8 "; should be 255",
ip->ip_ttl);
*pkt = (struct vrrp_pkt *)(buf + (ip->ip_hl << 2)); *pkt = (struct vrrp_pkt *)(buf + (ip->ip_hl << 2));
pktsize = read - (ip->ip_hl << 2); pktsize = read - (ip->ip_hl << 2);
@ -167,7 +169,9 @@ ssize_t vrrp_parse_datagram(int family, struct msghdr *m, size_t read,
VRRP_PKT_VCHECK(!!c, "IPv6 Hop Limit not received"); VRRP_PKT_VCHECK(!!c, "IPv6 Hop Limit not received");
uint8_t *hoplimit = CMSG_DATA(c); uint8_t *hoplimit = CMSG_DATA(c);
VRRP_PKT_VCHECK(*hoplimit == 255, "IPv6 Hop Limit is not 255"); VRRP_PKT_VCHECK(*hoplimit == 255,
"IPv6 Hop Limit is %" PRIu8 "; should be 255",
*hoplimit);
*pkt = (struct vrrp_pkt *)buf; *pkt = (struct vrrp_pkt *)buf;
pktsize = read; pktsize = read;