frr/bgpd/bgp_packet.c

2064 lines
56 KiB
C
Raw Normal View History

2002-12-13 21:15:29 +01:00
/* BGP packet management routine.
* Contains utility functions for constructing and consuming BGP messages.
* Copyright (C) 2017 Cumulus Networks
* Copyright (C) 1999 Kunihiro Ishiguro
*
* This file is part of GNU Zebra.
*
* GNU Zebra is free software; you can redistribute it and/or modify it
* under the terms of the GNU General Public License as published by the
* Free Software Foundation; either version 2, or (at your option) any
* later version.
*
* GNU Zebra is distributed in the hope that it will be useful, but
* WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* General Public License for more details.
*
* You should have received a copy of the GNU General Public License along
* with this program; see the file COPYING; if not, write to the Free Software
* Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
*/
2002-12-13 21:15:29 +01:00
#include <zebra.h>
#include <sys/time.h>
2002-12-13 21:15:29 +01:00
#include "thread.h"
#include "stream.h"
#include "network.h"
#include "prefix.h"
#include "command.h"
#include "log.h"
#include "memory.h"
#include "sockunion.h" /* for inet_ntop () */
#include "sockopt.h"
2002-12-13 21:15:29 +01:00
#include "linklist.h"
#include "plist.h"
#include "queue.h"
#include "filter.h"
2002-12-13 21:15:29 +01:00
#include "bgpd/bgpd.h"
#include "bgpd/bgp_table.h"
#include "bgpd/bgp_dump.h"
#include "bgpd/bgp_attr.h"
#include "bgpd/bgp_debug.h"
#include "bgpd/bgp_fsm.h"
#include "bgpd/bgp_route.h"
#include "bgpd/bgp_packet.h"
#include "bgpd/bgp_open.h"
#include "bgpd/bgp_aspath.h"
#include "bgpd/bgp_community.h"
#include "bgpd/bgp_ecommunity.h"
#include "bgpd/bgp_lcommunity.h"
2002-12-13 21:15:29 +01:00
#include "bgpd/bgp_network.h"
#include "bgpd/bgp_mplsvpn.h"
#include "bgpd/bgp_evpn.h"
2002-12-13 21:15:29 +01:00
#include "bgpd/bgp_advertise.h"
#include "bgpd/bgp_vty.h"
#include "bgpd/bgp_updgrp.h"
#include "bgpd/bgp_label.h"
#include "bgpd/bgp_io.h"
2002-12-13 21:15:29 +01:00
/* Set up BGP packet marker and packet type. */
int bgp_packet_set_marker(struct stream *s, u_char type)
2002-12-13 21:15:29 +01:00
{
int i;
2002-12-13 21:15:29 +01:00
/* Fill in marker. */
for (i = 0; i < BGP_MARKER_SIZE; i++)
stream_putc(s, 0xff);
2002-12-13 21:15:29 +01:00
/* Dummy total length. This field is should be filled in later on. */
stream_putw(s, 0);
2002-12-13 21:15:29 +01:00
/* BGP packet type. */
stream_putc(s, type);
2002-12-13 21:15:29 +01:00
/* Return current stream size. */
return stream_get_endp(s);
2002-12-13 21:15:29 +01:00
}
/* Set BGP packet header size entry. If size is zero then use current
stream size. */
int bgp_packet_set_size(struct stream *s)
2002-12-13 21:15:29 +01:00
{
int cp;
2002-12-13 21:15:29 +01:00
/* Preserve current pointer. */
cp = stream_get_endp(s);
stream_putw_at(s, BGP_MARKER_SIZE, cp);
2002-12-13 21:15:29 +01:00
return cp;
2002-12-13 21:15:29 +01:00
}
/*
* Push a packet onto the beginning of the peer's output queue.
* This function acquires the peer's write mutex before proceeding.
*/
static void bgp_packet_add(struct peer *peer, struct stream *s)
{
pthread_mutex_lock(&peer->io_mtx);
stream_fifo_push(peer->obuf, s);
pthread_mutex_unlock(&peer->io_mtx);
2002-12-13 21:15:29 +01:00
}
static struct stream *bgp_update_packet_eor(struct peer *peer, afi_t afi,
safi_t safi)
{
struct stream *s;
iana_afi_t pkt_afi;
iana_safi_t pkt_safi;
if (DISABLE_BGP_ANNOUNCE)
return NULL;
if (bgp_debug_neighbor_events(peer))
zlog_debug("send End-of-RIB for %s to %s",
afi_safi_print(afi, safi), peer->host);
s = stream_new(BGP_MAX_PACKET_SIZE);
/* Make BGP update packet. */
bgp_packet_set_marker(s, BGP_MSG_UPDATE);
/* Unfeasible Routes Length */
stream_putw(s, 0);
if (afi == AFI_IP && safi == SAFI_UNICAST) {
/* Total Path Attribute Length */
stream_putw(s, 0);
} else {
/* Convert AFI, SAFI to values for packet. */
bgp_map_afi_safi_int2iana(afi, safi, &pkt_afi, &pkt_safi);
/* Total Path Attribute Length */
stream_putw(s, 6);
stream_putc(s, BGP_ATTR_FLAG_OPTIONAL);
stream_putc(s, BGP_ATTR_MP_UNREACH_NLRI);
stream_putc(s, 3);
stream_putw(s, pkt_afi);
stream_putc(s, pkt_safi);
}
bgp_packet_set_size(s);
return s;
2002-12-13 21:15:29 +01:00
}
/*
* Enqueue onto the peer's output buffer any packets which are pending for the
* update group it is a member of.
*
* XXX: Severely needs performance work.
*/
int bgp_generate_updgrp_packets(struct thread *thread)
2002-12-13 21:15:29 +01:00
{
struct peer *peer = THREAD_ARG(thread);
struct stream *s;
struct peer_af *paf;
struct bpacket *next_pkt;
afi_t afi;
safi_t safi;
/*
* The code beyond this part deals with update packets, proceed only
* if peer is Established and updates are not on hold (as part of
* update-delay post processing).
*/
if (peer->status != Established)
return 0;
if (peer->bgp && peer->bgp->main_peers_update_hold)
return 0;
do {
s = NULL;
for (afi = AFI_IP; afi < AFI_MAX; afi++)
for (safi = SAFI_UNICAST; safi < SAFI_MAX; safi++) {
paf = peer_af_find(peer, afi, safi);
if (!paf || !PAF_SUBGRP(paf))
continue;
next_pkt = paf->next_pkt_to_send;
/* Try to generate a packet for the peer if we
* are at the end of
* the list. Always try to push out WITHDRAWs
* first. */
if (!next_pkt || !next_pkt->buffer) {
next_pkt = subgroup_withdraw_packet(
PAF_SUBGRP(paf));
if (!next_pkt || !next_pkt->buffer)
subgroup_update_packet(
PAF_SUBGRP(paf));
next_pkt = paf->next_pkt_to_send;
}
/* If we still don't have a packet to send to
* the peer, then
* try to find out out if we have to send eor or
* if not, skip to
* the next AFI, SAFI.
* Don't send the EOR prematurely... if the
* subgroup's coalesce
* timer is running, the adjacency-out structure
* is not created
* yet.
*/
if (!next_pkt || !next_pkt->buffer) {
if (CHECK_FLAG(peer->cap,
PEER_CAP_RESTART_RCV)) {
if (!(PAF_SUBGRP(paf))
->t_coalesce
&& peer->afc_nego[afi][safi]
&& peer->synctime
&& !CHECK_FLAG(
peer->af_sflags
[afi]
[safi],
PEER_STATUS_EOR_SEND)) {
SET_FLAG(
peer->af_sflags
[afi]
[safi],
PEER_STATUS_EOR_SEND);
if ((s = bgp_update_packet_eor(
peer, afi,
safi))) {
bgp_packet_add(
peer,
s);
bgp_writes_on(
peer);
}
}
}
continue;
}
2002-12-13 21:15:29 +01:00
/* Found a packet template to send, overwrite
* packet with appropriate
* attributes from peer and advance peer */
s = bpacket_reformat_for_peer(next_pkt, paf);
bgp_packet_add(peer, s);
bgp_writes_on(peer);
bpacket_queue_advance_peer(paf);
}
} while (s);
return 0;
}
/*
* Creates a BGP Keepalive packet and appends it to the peer's output queue.
*/
void bgp_keepalive_send(struct peer *peer)
2002-12-13 21:15:29 +01:00
{
struct stream *s;
s = stream_new(BGP_MAX_PACKET_SIZE);
2002-12-13 21:15:29 +01:00
/* Make keepalive packet. */
bgp_packet_set_marker(s, BGP_MSG_KEEPALIVE);
2002-12-13 21:15:29 +01:00
/* Set packet size. */
(void)bgp_packet_set_size(s);
2002-12-13 21:15:29 +01:00
/* Dump packet if debug option is set. */
/* bgp_packet_dump (s); */
2002-12-13 21:15:29 +01:00
if (bgp_debug_keepalive(peer))
zlog_debug("%s sending KEEPALIVE", peer->host);
2002-12-13 21:15:29 +01:00
/* Add packet to the peer. */
bgp_packet_add(peer, s);
bgp_writes_on(peer);
2002-12-13 21:15:29 +01:00
}
/*
* Creates a BGP Open packet and appends it to the peer's output queue.
* Sets capabilities as necessary.
*/
void bgp_open_send(struct peer *peer)
2002-12-13 21:15:29 +01:00
{
struct stream *s;
u_int16_t send_holdtime;
as_t local_as;
2002-12-13 21:15:29 +01:00
if (PEER_OR_GROUP_TIMER_SET(peer))
send_holdtime = peer->holdtime;
else
send_holdtime = peer->bgp->default_holdtime;
2002-12-13 21:15:29 +01:00
/* local-as Change */
if (peer->change_local_as)
local_as = peer->change_local_as;
else
local_as = peer->local_as;
2002-12-13 21:15:29 +01:00
s = stream_new(BGP_MAX_PACKET_SIZE);
2002-12-13 21:15:29 +01:00
/* Make open packet. */
bgp_packet_set_marker(s, BGP_MSG_OPEN);
2002-12-13 21:15:29 +01:00
/* Set open packet values. */
stream_putc(s, BGP_VERSION_4); /* BGP version */
stream_putw(s,
(local_as <= BGP_AS_MAX) ? (u_int16_t)local_as
: BGP_AS_TRANS);
stream_putw(s, send_holdtime); /* Hold Time */
stream_put_in_addr(s, &peer->local_id); /* BGP Identifier */
2002-12-13 21:15:29 +01:00
/* Set capability code. */
bgp_open_capability(s, peer);
2002-12-13 21:15:29 +01:00
/* Set BGP packet length. */
(void)bgp_packet_set_size(s);
2002-12-13 21:15:29 +01:00
if (bgp_debug_neighbor_events(peer))
zlog_debug(
"%s sending OPEN, version %d, my as %u, holdtime %d, id %s",
peer->host, BGP_VERSION_4, local_as, send_holdtime,
inet_ntoa(peer->local_id));
2002-12-13 21:15:29 +01:00
/* Dump packet if debug option is set. */
/* bgp_packet_dump (s); */
2002-12-13 21:15:29 +01:00
/* Add packet to the peer. */
bgp_packet_add(peer, s);
bgp_writes_on(peer);
}
/* This is only for sending NOTIFICATION message to neighbor. */
static int bgp_write_notify(struct peer *peer)
{
int ret, val;
u_char type;
struct stream *s;
pthread_mutex_lock(&peer->io_mtx);
{
/* There should be at least one packet. */
s = stream_fifo_pop(peer->obuf);
if (!s)
return 0;
assert(stream_get_endp(s) >= BGP_HEADER_SIZE);
}
pthread_mutex_unlock(&peer->io_mtx);
/* Stop collecting data within the socket */
sockopt_cork(peer->fd, 0);
/* socket is in nonblocking mode, if we can't deliver the NOTIFY, well,
* we only care about getting a clean shutdown at this point. */
ret = write(peer->fd, STREAM_DATA(s), stream_get_endp(s));
/* only connection reset/close gets counted as TCP_fatal_error, failure
* to write the entire NOTIFY doesn't get different FSM treatment */
if (ret <= 0) {
BGP_EVENT_ADD(peer, TCP_fatal_error);
return 0;
}
/* Disable Nagle, make NOTIFY packet go out right away */
val = 1;
(void)setsockopt(peer->fd, IPPROTO_TCP, TCP_NODELAY, (char *)&val,
sizeof(val));
/* Retrieve BGP packet type. */
stream_set_getp(s, BGP_MARKER_SIZE + 2);
type = stream_getc(s);
assert(type == BGP_MSG_NOTIFY);
/* Type should be notify. */
peer->notify_out++;
/* Double start timer. */
peer->v_start *= 2;
/* Overflow check. */
if (peer->v_start >= (60 * 2))
peer->v_start = (60 * 2);
/* Handle Graceful Restart case where the state changes to
Connect instead of Idle */
BGP_EVENT_ADD(peer, BGP_Stop);
return 0;
2002-12-13 21:15:29 +01:00
}
/*
* Creates a BGP Notify and appends it to the peer's output queue.
*
* This function awakens the write thread to ensure the packet
* gets out ASAP.
*
* @param peer
* @param code BGP error code
* @param sub_code BGP error subcode
* @param data Data portion
* @param datalen length of data portion
*/
void bgp_notify_send_with_data(struct peer *peer, u_char code, u_char sub_code,
u_char *data, size_t datalen)
2002-12-13 21:15:29 +01:00
{
struct stream *s;
int length;
/* Allocate new stream. */
s = stream_new(BGP_MAX_PACKET_SIZE);
/* Make notify packet. */
bgp_packet_set_marker(s, BGP_MSG_NOTIFY);
/* Set notify packet values. */
stream_putc(s, code); /* BGP notify code */
stream_putc(s, sub_code); /* BGP notify sub_code */
/* If notify data is present. */
if (data)
stream_write(s, data, datalen);
/* Set BGP packet length. */
length = bgp_packet_set_size(s);
/* wipe output buffer */
pthread_mutex_lock(&peer->io_mtx);
{
stream_fifo_clean(peer->obuf);
}
pthread_mutex_unlock(&peer->io_mtx);
/* For debug */
{
struct bgp_notify bgp_notify;
int first = 0;
int i;
char c[4];
bgp_notify.code = code;
bgp_notify.subcode = sub_code;
bgp_notify.data = NULL;
bgp_notify.length = length - BGP_MSG_NOTIFY_MIN_SIZE;
bgp_notify.raw_data = data;
peer->notify.code = bgp_notify.code;
peer->notify.subcode = bgp_notify.subcode;
if (bgp_notify.length) {
bgp_notify.data =
XMALLOC(MTYPE_TMP, bgp_notify.length * 3);
for (i = 0; i < bgp_notify.length; i++)
if (first) {
sprintf(c, " %02x", data[i]);
strcat(bgp_notify.data, c);
} else {
first = 1;
sprintf(c, "%02x", data[i]);
strcpy(bgp_notify.data, c);
}
}
bgp_notify_print(peer, &bgp_notify, "sending");
if (bgp_notify.data) {
XFREE(MTYPE_TMP, bgp_notify.data);
bgp_notify.data = NULL;
bgp_notify.length = 0;
}
}
/* peer reset cause */
if (code == BGP_NOTIFY_CEASE) {
if (sub_code == BGP_NOTIFY_CEASE_ADMIN_RESET)
peer->last_reset = PEER_DOWN_USER_RESET;
else if (sub_code == BGP_NOTIFY_CEASE_ADMIN_SHUTDOWN)
peer->last_reset = PEER_DOWN_USER_SHUTDOWN;
else
peer->last_reset = PEER_DOWN_NOTIFY_SEND;
} else
peer->last_reset = PEER_DOWN_NOTIFY_SEND;
/* Add packet to peer's output queue */
bgp_packet_add(peer, s);
bgp_write_notify(peer);
2002-12-13 21:15:29 +01:00
}
/*
* Creates a BGP Notify and appends it to the peer's output queue.
*
* This function awakens the write thread to ensure the packet
* gets out ASAP.
*
* @param peer
* @param code BGP error code
* @param sub_code BGP error subcode
*/
void bgp_notify_send(struct peer *peer, u_char code, u_char sub_code)
2002-12-13 21:15:29 +01:00
{
bgp_notify_send_with_data(peer, code, sub_code, NULL, 0);
2002-12-13 21:15:29 +01:00
}
/*
* Creates BGP Route Refresh packet and appends it to the peer's output queue.
*
* @param peer
* @param afi Address Family Identifier
* @param safi Subsequent Address Family Identifier
* @param orf_type Outbound Route Filtering type
* @param when_to_refresh Whether to refresh immediately or defer
* @param remove Whether to remove ORF for specified AFI/SAFI
*/
void bgp_route_refresh_send(struct peer *peer, afi_t afi, safi_t safi,
u_char orf_type, u_char when_to_refresh, int remove)
2002-12-13 21:15:29 +01:00
{
struct stream *s;
struct bgp_filter *filter;
int orf_refresh = 0;
iana_afi_t pkt_afi;
iana_safi_t pkt_safi;
if (DISABLE_BGP_ANNOUNCE)
return;
filter = &peer->filter[afi][safi];
/* Convert AFI, SAFI to values for packet. */
bgp_map_afi_safi_int2iana(afi, safi, &pkt_afi, &pkt_safi);
s = stream_new(BGP_MAX_PACKET_SIZE);
/* Make BGP update packet. */
if (CHECK_FLAG(peer->cap, PEER_CAP_REFRESH_NEW_RCV))
bgp_packet_set_marker(s, BGP_MSG_ROUTE_REFRESH_NEW);
2002-12-13 21:15:29 +01:00
else
bgp_packet_set_marker(s, BGP_MSG_ROUTE_REFRESH_OLD);
/* Encode Route Refresh message. */
stream_putw(s, pkt_afi);
stream_putc(s, 0);
stream_putc(s, pkt_safi);
if (orf_type == ORF_TYPE_PREFIX || orf_type == ORF_TYPE_PREFIX_OLD)
if (remove || filter->plist[FILTER_IN].plist) {
u_int16_t orf_len;
unsigned long orfp;
orf_refresh = 1;
stream_putc(s, when_to_refresh);
stream_putc(s, orf_type);
orfp = stream_get_endp(s);
stream_putw(s, 0);
if (remove) {
UNSET_FLAG(peer->af_sflags[afi][safi],
PEER_STATUS_ORF_PREFIX_SEND);
stream_putc(s, ORF_COMMON_PART_REMOVE_ALL);
if (bgp_debug_neighbor_events(peer))
zlog_debug(
"%s sending REFRESH_REQ to remove ORF(%d) (%s) for afi/safi: %d/%d",
peer->host, orf_type,
(when_to_refresh == REFRESH_DEFER
? "defer"
: "immediate"),
pkt_afi, pkt_safi);
} else {
SET_FLAG(peer->af_sflags[afi][safi],
PEER_STATUS_ORF_PREFIX_SEND);
prefix_bgp_orf_entry(
s, filter->plist[FILTER_IN].plist,
ORF_COMMON_PART_ADD,
ORF_COMMON_PART_PERMIT,
ORF_COMMON_PART_DENY);
if (bgp_debug_neighbor_events(peer))
zlog_debug(
"%s sending REFRESH_REQ with pfxlist ORF(%d) (%s) for afi/safi: %d/%d",
peer->host, orf_type,
(when_to_refresh == REFRESH_DEFER
? "defer"
: "immediate"),
pkt_afi, pkt_safi);
}
/* Total ORF Entry Len. */
orf_len = stream_get_endp(s) - orfp - 2;
stream_putw_at(s, orfp, orf_len);
}
/* Set packet size. */
(void)bgp_packet_set_size(s);
if (bgp_debug_neighbor_events(peer)) {
if (!orf_refresh)
zlog_debug("%s sending REFRESH_REQ for afi/safi: %d/%d",
peer->host, pkt_afi, pkt_safi);
}
/* Add packet to the peer. */
bgp_packet_add(peer, s);
bgp_writes_on(peer);
2002-12-13 21:15:29 +01:00
}
/*
* Create a BGP Capability packet and append it to the peer's output queue.
*
* @param peer
* @param afi Address Family Identifier
* @param safi Subsequent Address Family Identifier
* @param capability_code BGP Capability Code
* @param action Set or Remove capability
*/
void bgp_capability_send(struct peer *peer, afi_t afi, safi_t safi,
int capability_code, int action)
2002-12-13 21:15:29 +01:00
{
struct stream *s;
iana_afi_t pkt_afi;
iana_safi_t pkt_safi;
/* Convert AFI, SAFI to values for packet. */
bgp_map_afi_safi_int2iana(afi, safi, &pkt_afi, &pkt_safi);
s = stream_new(BGP_MAX_PACKET_SIZE);
/* Make BGP update packet. */
bgp_packet_set_marker(s, BGP_MSG_CAPABILITY);
/* Encode MP_EXT capability. */
if (capability_code == CAPABILITY_CODE_MP) {
stream_putc(s, action);
stream_putc(s, CAPABILITY_CODE_MP);
stream_putc(s, CAPABILITY_CODE_MP_LEN);
stream_putw(s, pkt_afi);
stream_putc(s, 0);
stream_putc(s, pkt_safi);
if (bgp_debug_neighbor_events(peer))
zlog_debug(
"%s sending CAPABILITY has %s MP_EXT CAP for afi/safi: %d/%d",
peer->host,
action == CAPABILITY_ACTION_SET ? "Advertising"
: "Removing",
pkt_afi, pkt_safi);
}
/* Set packet size. */
(void)bgp_packet_set_size(s);
2002-12-13 21:15:29 +01:00
/* Add packet to the peer. */
bgp_packet_add(peer, s);
bgp_writes_on(peer);
}
2002-12-13 21:15:29 +01:00
/* RFC1771 6.8 Connection collision detection. */
static int bgp_collision_detect(struct peer *new, struct in_addr remote_id)
{
struct peer *peer;
/* Upon receipt of an OPEN message, the local system must examine
all of its connections that are in the OpenConfirm state. A BGP
speaker may also examine connections in an OpenSent state if it
knows the BGP Identifier of the peer by means outside of the
protocol. If among these connections there is a connection to a
remote BGP speaker whose BGP Identifier equals the one in the
OPEN message, then the local system performs the following
collision resolution procedure: */
if ((peer = new->doppelganger) != NULL) {
/* Do not accept the new connection in Established or Clearing
* states.
* Note that a peer GR is handled by closing the existing
* connection
* upon receipt of new one.
*/
if (peer->status == Established || peer->status == Clearing) {
bgp_notify_send(new, BGP_NOTIFY_CEASE,
BGP_NOTIFY_CEASE_COLLISION_RESOLUTION);
return (-1);
} else if ((peer->status == OpenConfirm)
|| (peer->status == OpenSent)) {
/* 1. The BGP Identifier of the local system is compared
to
the BGP Identifier of the remote system (as specified
in
the OPEN message). */
if (ntohl(peer->local_id.s_addr)
< ntohl(remote_id.s_addr))
if (!CHECK_FLAG(peer->sflags,
PEER_STATUS_ACCEPT_PEER)) {
/* 2. If the value of the local BGP
Identifier is less
than the remote one, the local system
closes BGP
connection that already exists (the
one that is
already in the OpenConfirm state),
and accepts BGP
connection initiated by the remote
system. */
bgp_notify_send(
peer, BGP_NOTIFY_CEASE,
BGP_NOTIFY_CEASE_COLLISION_RESOLUTION);
return 1;
} else {
bgp_notify_send(
new, BGP_NOTIFY_CEASE,
BGP_NOTIFY_CEASE_COLLISION_RESOLUTION);
return -1;
}
else {
/* 3. Otherwise, the local system closes newly
created
BGP connection (the one associated with the
newly
received OPEN message), and continues to use
the
existing one (the one that is already in the
OpenConfirm state). */
if (CHECK_FLAG(peer->sflags,
PEER_STATUS_ACCEPT_PEER)) {
bgp_notify_send(
peer, BGP_NOTIFY_CEASE,
BGP_NOTIFY_CEASE_COLLISION_RESOLUTION);
return 1;
} else {
bgp_notify_send(
new, BGP_NOTIFY_CEASE,
BGP_NOTIFY_CEASE_COLLISION_RESOLUTION);
return -1;
}
}
}
}
return 0;
}
2002-12-13 21:15:29 +01:00
static int bgp_open_receive(struct peer *peer, bgp_size_t size)
{
int ret;
u_char version;
u_char optlen;
u_int16_t holdtime;
u_int16_t send_holdtime;
as_t remote_as;
as_t as4 = 0;
struct in_addr remote_id;
int mp_capability;
u_int8_t notify_data_remote_as[2];
u_int8_t notify_data_remote_as4[4];
u_int8_t notify_data_remote_id[4];
u_int16_t *holdtime_ptr;
/* Parse open packet. */
version = stream_getc(peer->curr);
memcpy(notify_data_remote_as, stream_pnt(peer->curr), 2);
remote_as = stream_getw(peer->curr);
holdtime_ptr = (u_int16_t *)stream_pnt(peer->curr);
holdtime = stream_getw(peer->curr);
memcpy(notify_data_remote_id, stream_pnt(peer->curr), 4);
remote_id.s_addr = stream_get_ipv4(peer->curr);
/* Receive OPEN message log */
if (bgp_debug_neighbor_events(peer))
zlog_debug(
"%s rcv OPEN, version %d, remote-as (in open) %u,"
" holdtime %d, id %s",
peer->host, version, remote_as, holdtime,
inet_ntoa(remote_id));
/* BEGIN to read the capability here, but dont do it yet */
mp_capability = 0;
optlen = stream_getc(peer->curr);
if (optlen != 0) {
/* If not enough bytes, it is an error. */
if (STREAM_READABLE(peer->curr) < optlen) {
bgp_notify_send(peer, BGP_NOTIFY_OPEN_ERR,
BGP_NOTIFY_OPEN_MALFORMED_ATTR);
return -1;
}
2002-12-13 21:15:29 +01:00
/* We need the as4 capability value *right now* because
* if it is there, we have not got the remote_as yet, and
* without
* that we do not know which peer is connecting to us now.
*/
as4 = peek_for_as4_capability(peer, optlen);
memcpy(notify_data_remote_as4, &as4, 4);
}
2002-12-13 21:15:29 +01:00
/* Just in case we have a silly peer who sends AS4 capability set to 0
*/
if (CHECK_FLAG(peer->cap, PEER_CAP_AS4_RCV) && !as4) {
zlog_err("%s bad OPEN, got AS4 capability, but AS4 set to 0",
peer->host);
bgp_notify_send_with_data(peer, BGP_NOTIFY_OPEN_ERR,
BGP_NOTIFY_OPEN_BAD_PEER_AS,
notify_data_remote_as4, 4);
return -1;
}
2002-12-13 21:15:29 +01:00
if (remote_as == BGP_AS_TRANS) {
/* Take the AS4 from the capability. We must have received the
* capability now! Otherwise we have a asn16 peer who uses
* BGP_AS_TRANS, for some unknown reason.
*/
if (as4 == BGP_AS_TRANS) {
zlog_err(
"%s [AS4] NEW speaker using AS_TRANS for AS4, not allowed",
peer->host);
bgp_notify_send_with_data(peer, BGP_NOTIFY_OPEN_ERR,
BGP_NOTIFY_OPEN_BAD_PEER_AS,
notify_data_remote_as4, 4);
return -1;
}
2002-12-13 21:15:29 +01:00
if (!as4 && BGP_DEBUG(as4, AS4))
zlog_debug(
"%s [AS4] OPEN remote_as is AS_TRANS, but no AS4."
" Odd, but proceeding.",
peer->host);
else if (as4 < BGP_AS_MAX && BGP_DEBUG(as4, AS4))
zlog_debug(
"%s [AS4] OPEN remote_as is AS_TRANS, but AS4 (%u) fits "
"in 2-bytes, very odd peer.",
peer->host, as4);
if (as4)
remote_as = as4;
} else {
/* We may have a partner with AS4 who has an asno < BGP_AS_MAX
*/
/* If we have got the capability, peer->as4cap must match
* remote_as */
if (CHECK_FLAG(peer->cap, PEER_CAP_AS4_RCV)
&& as4 != remote_as) {
/* raise error, log this, close session */
zlog_err(
"%s bad OPEN, got AS4 capability, but remote_as %u"
" mismatch with 16bit 'myasn' %u in open",
peer->host, as4, remote_as);
bgp_notify_send_with_data(peer, BGP_NOTIFY_OPEN_ERR,
BGP_NOTIFY_OPEN_BAD_PEER_AS,
notify_data_remote_as4, 4);
return -1;
}
}
/* remote router-id check. */
if (remote_id.s_addr == 0 || IPV4_CLASS_DE(ntohl(remote_id.s_addr))
|| ntohl(peer->local_id.s_addr) == ntohl(remote_id.s_addr)) {
if (bgp_debug_neighbor_events(peer))
zlog_debug("%s bad OPEN, wrong router identifier %s",
peer->host, inet_ntoa(remote_id));
bgp_notify_send_with_data(peer, BGP_NOTIFY_OPEN_ERR,
BGP_NOTIFY_OPEN_BAD_BGP_IDENT,
notify_data_remote_id, 4);
return -1;
}
/* Set remote router-id */
peer->remote_id = remote_id;
/* Peer BGP version check. */
if (version != BGP_VERSION_4) {
u_int16_t maxver = htons(BGP_VERSION_4);
/* XXX this reply may not be correct if version < 4 XXX */
if (bgp_debug_neighbor_events(peer))
zlog_debug(
"%s bad protocol version, remote requested %d, local request %d",
peer->host, version, BGP_VERSION_4);
/* Data must be in network byte order here */
bgp_notify_send_with_data(peer, BGP_NOTIFY_OPEN_ERR,
BGP_NOTIFY_OPEN_UNSUP_VERSION,
(u_int8_t *)&maxver, 2);
return -1;
}
/* Check neighbor as number. */
if (peer->as_type == AS_UNSPECIFIED) {
if (bgp_debug_neighbor_events(peer))
zlog_debug(
"%s bad OPEN, remote AS is unspecified currently",
peer->host);
bgp_notify_send_with_data(peer, BGP_NOTIFY_OPEN_ERR,
BGP_NOTIFY_OPEN_BAD_PEER_AS,
notify_data_remote_as, 2);
return -1;
} else if (peer->as_type == AS_INTERNAL) {
if (remote_as != peer->bgp->as) {
if (bgp_debug_neighbor_events(peer))
zlog_debug(
"%s bad OPEN, remote AS is %u, internal specified",
peer->host, remote_as);
bgp_notify_send_with_data(peer, BGP_NOTIFY_OPEN_ERR,
BGP_NOTIFY_OPEN_BAD_PEER_AS,
notify_data_remote_as, 2);
return -1;
}
peer->as = peer->local_as;
} else if (peer->as_type == AS_EXTERNAL) {
if (remote_as == peer->bgp->as) {
if (bgp_debug_neighbor_events(peer))
zlog_debug(
"%s bad OPEN, remote AS is %u, external specified",
peer->host, remote_as);
bgp_notify_send_with_data(peer, BGP_NOTIFY_OPEN_ERR,
BGP_NOTIFY_OPEN_BAD_PEER_AS,
notify_data_remote_as, 2);
return -1;
}
peer->as = remote_as;
} else if ((peer->as_type == AS_SPECIFIED) && (remote_as != peer->as)) {
if (bgp_debug_neighbor_events(peer))
zlog_debug("%s bad OPEN, remote AS is %u, expected %u",
peer->host, remote_as, peer->as);
bgp_notify_send_with_data(peer, BGP_NOTIFY_OPEN_ERR,
BGP_NOTIFY_OPEN_BAD_PEER_AS,
notify_data_remote_as, 2);
return -1;
}
2002-12-13 21:15:29 +01:00
/* From the rfc: Upon receipt of an OPEN message, a BGP speaker MUST
calculate the value of the Hold Timer by using the smaller of its
configured Hold Time and the Hold Time received in the OPEN message.
The Hold Time MUST be either zero or at least three seconds. An
implementation may reject connections on the basis of the Hold Time.
[bgpd] Merge AS4 support 2007-10-14 Paul Jakma <paul.jakma@sun.com> * NEWS: Note that MRT dumps are now version 2 * (general) Merge in Juergen Kammer's AS4 patch. 2007-09-27 Paul Jakma <paul.jakma@sun.com> * bgp_aspath.c: (assegment_normalise) remove duplicates from from sets. (aspath_reconcile_as4) disregard a broken part of the RFC around error handling in path reconciliation. * aspath_test.c: Test dupe-weeding from sets. Test that reconciliation merges AS_PATH and AS4_PATH where former is shorter than latter. 2007-09-26 Paul Jakma <paul.jakma@sun.com> * aspath_test.c: Test AS4_PATH reconcilation where length of AS_PATH and AS4_PATH is same. 2007-09-25 Paul Jakma <paul.jakma@sun.com> * bgp_open.c: (peek_for_as4_capability) Fix to work. * bgp_packet.c: (bgp_open_receive) Fix sanity check of as4. * tests/bgp_capability_test.c: (general) Extend tests to validate peek_for_as4_capability. Add test of full OPEN Option block, with multiple capabilities, both as a series of Option, and a single option. Add some crap to beginning of stream, to prevent code depending on getp == 0. 2007-09-18 Paul Jakma <paul.jakma@sun.com> * bgp_open.c: (bgp_capability_as4) debug printf inline with others. (peek_for_as4_capability) There's no need to signal failure, as failure is better dealt with through full capability parser - just return the AS4, simpler. * bgp_packet.c: (bgp_open_receive) Update to match peek_for_as4_capability change. Allow use of BGP_AS_TRANS by 2b speakers. Use NOTIFY_OPEN_ERR rather than CEASE for OPEN parsing errors. (bgp_capability_msg_parse) missing argument to debug print (bgp_capability_receive) missing return values. * tests/bgp_capability_test.c: (parse_test) update for changes to peek_for_as4_capability 2007-07-25 Paul Jakma <paul.jakma@sun.com> * Remove 2-byte size macros, just make existing macros take argument to indicate which size to use. Adjust all users - typically they want '1'. * bgp_aspath.c: (aspath_has_as4) New, return 1 if there are any as4's in a path. (aspath_put) Return the number of bytes actually written, to fix the bug Juergen noted: Splitting of segments will change the number of bytes written from that already written to the AS_PATH header. (aspath_snmp_pathseg) Pass 2-byte flag to aspath_put. SNMP is still defined as 2b. (aspath_aggregate) fix latent bug. (aspath_reconcile_as4) AS_PATH+NEW_AS_PATH reconciliation function. (aspath_key_make) Hash the AS_PATH string, rather than just taking the addition of assegment ASes as the hash value, hopefully sligthly more collision resistant. (bgp_attr_munge_as4_attrs) Collide the NEW_ attributes together with the OLD 2-byte forms, code Juergen had in bgp_attr_parse but re-organised a bit. (bgp_attr_parse) Bunch of code from Juergen moves to previous function. (bgp_packet_attribute) Compact significantly by just /always/ using extended-length attr header. Fix bug Juergen noted, by using aspath_put's (new) returned size value for the attr header rather than the (guesstimate) of aspath_size() - the two could differ when aspath_put had to split large segments, unlikely this bug was ever hit in the 'wild'. (bgp_dump_routes_attr) Always use extended-len and use aspath_put return for header length. Output 4b ASN for AS_PATH and AGGREGATOR. * bgp_ecommunity.c: (ecommunity_{hash_make,cmp}) fix hash callback declarations to match prototypes. (ecommunity_gettoken) Updated for ECOMMUNITY_ENCODE_AS4, complete rewrite of Juergen's changes (no asdot support) * bgp_open.c: (bgp_capability_as4) New, does what it says on the tin. (peek_for_as4_capability) Rewritten to use streams and bgp_capability_as4. * bgp_packet.c: (bgp_open_send) minor edit checked (in the abstract at least) with Juergen. Changes are to be more accepting, e.g, allow AS_TRANS on a 2-byte session. * (general) Update all commands to use CMD_AS_RANGE. * bgp_vty.c: (bgp_clear) Fix return vals to use CMD_.. Remove stuff replicated by VTY_GET_LONG (bgp_clear_vty) Return bgp_clear directly to vty. * tests/aspath_test.c: Exercise 32bit parsing. Test reconcile function. * tests/ecommunity_test.c: New, test AS4 ecommunity changes, positive test only at this time, error cases not tested yet. 2007-07-25 Juergen Kammer <j.kammer@eurodata.de> * (general) AS4 support. * bgpd.h: as_t changes to 4-bytes. * bgp_aspath.h: Add BGP_AS4_MAX and BGP_AS_TRANS defines. * bgp_aspath.c: AS_VALUE_SIZE becomes 4-byte, AS16_VALUE_SIZE added for 2-byte. Add AS16 versions of length calc macros. (aspath_count_numas) New, count number of ASes. (aspath_has_as4) New, return 1 if there are any as4's in a path. (assegments_parse) Interpret assegment as 4 or 2 byte, according to how the caller instructs us, with a new argument. (aspath_parse) Add use32bit argument to pass to assegments_parse. Adjust all its callers to pass 1, unless otherwise noted. (assegment_data_put) Adjust to be able to write 2 or 4 byte AS, according to new use32bit argument. (aspath_put) Adjust to write 2 or 4. (aspath_gettoken) Use a long for passed in asno. * bgp_attr.c: (attr_str) Add BGP_ATTR_AS4_PATH and BGP_ATTR_AS4_AGGREGATOR. (bgp_attr_aspath) Call aspath_parse with right 2/4 arg, as determined by received-capability flag. (bgp_attr_aspath_check) New, code previously in attr_aspath but moved to new func so it can be run after NEW_AS_PATH reconciliation. (bgp_attr_as4_path) New, handle NEW_AS_PATH. (bgp_attr_aggregator) Adjust to cope with 2/4 byte ASes. (bgp_attr_as4_aggregator) New, read NEW_AGGREGATOR. (bgp_attr_parse) Add handoffs to previous parsers for the two new AS4 NEW_ attributes. Various checks added for NEW/OLD reconciliation. (bgp_packet_attribute) Support 2/4 for AS_PATH and AGGREGATOR, detect when NEW_ attrs need to be sent. * bgp_debug.{c,h}: Add 'debug bgp as4'. * bgp_dump.c: MRTv2 support, unconditionally enabled, which supports AS4. Based on patches from Erik (RIPE?). * bgp_ecommunity.c: (ecommunity_ecom2str) ECOMMUNITY_ENCODE_AS4 support. * bgp_open.c: (peek_for_as4_capability) New, peek for AS4 capability prior to full capability parsing, so we know which ASN to use for struct peer lookup. (bgp_open_capability) Always send AS4 capability. * bgp_packet.c: (bgp_open_send) AS4 handling for AS field (bgp_open_receive) Peek for AS4 capability first, and figure out which AS to believe. * bgp_vty.c: (bgp_show_peer) Print AS4 cap * tests/aspath_test.c: Support asn32 changes, call aspath_parse with 16 bit. * vtysh/extract.pl: AS4 compatibility for router bgp ASNUMBER * vtysh/extract.pl.in: AS4 compatibility for router bgp ASNUMBER * vtysh/vtysh.c: AS4 compatibility for router bgp ASNUMBER
2007-10-15 00:32:21 +02:00
*/
if (holdtime < 3 && holdtime != 0) {
bgp_notify_send_with_data(peer, BGP_NOTIFY_OPEN_ERR,
BGP_NOTIFY_OPEN_UNACEP_HOLDTIME,
(u_char *)holdtime_ptr, 2);
return -1;
[bgpd] Merge AS4 support 2007-10-14 Paul Jakma <paul.jakma@sun.com> * NEWS: Note that MRT dumps are now version 2 * (general) Merge in Juergen Kammer's AS4 patch. 2007-09-27 Paul Jakma <paul.jakma@sun.com> * bgp_aspath.c: (assegment_normalise) remove duplicates from from sets. (aspath_reconcile_as4) disregard a broken part of the RFC around error handling in path reconciliation. * aspath_test.c: Test dupe-weeding from sets. Test that reconciliation merges AS_PATH and AS4_PATH where former is shorter than latter. 2007-09-26 Paul Jakma <paul.jakma@sun.com> * aspath_test.c: Test AS4_PATH reconcilation where length of AS_PATH and AS4_PATH is same. 2007-09-25 Paul Jakma <paul.jakma@sun.com> * bgp_open.c: (peek_for_as4_capability) Fix to work. * bgp_packet.c: (bgp_open_receive) Fix sanity check of as4. * tests/bgp_capability_test.c: (general) Extend tests to validate peek_for_as4_capability. Add test of full OPEN Option block, with multiple capabilities, both as a series of Option, and a single option. Add some crap to beginning of stream, to prevent code depending on getp == 0. 2007-09-18 Paul Jakma <paul.jakma@sun.com> * bgp_open.c: (bgp_capability_as4) debug printf inline with others. (peek_for_as4_capability) There's no need to signal failure, as failure is better dealt with through full capability parser - just return the AS4, simpler. * bgp_packet.c: (bgp_open_receive) Update to match peek_for_as4_capability change. Allow use of BGP_AS_TRANS by 2b speakers. Use NOTIFY_OPEN_ERR rather than CEASE for OPEN parsing errors. (bgp_capability_msg_parse) missing argument to debug print (bgp_capability_receive) missing return values. * tests/bgp_capability_test.c: (parse_test) update for changes to peek_for_as4_capability 2007-07-25 Paul Jakma <paul.jakma@sun.com> * Remove 2-byte size macros, just make existing macros take argument to indicate which size to use. Adjust all users - typically they want '1'. * bgp_aspath.c: (aspath_has_as4) New, return 1 if there are any as4's in a path. (aspath_put) Return the number of bytes actually written, to fix the bug Juergen noted: Splitting of segments will change the number of bytes written from that already written to the AS_PATH header. (aspath_snmp_pathseg) Pass 2-byte flag to aspath_put. SNMP is still defined as 2b. (aspath_aggregate) fix latent bug. (aspath_reconcile_as4) AS_PATH+NEW_AS_PATH reconciliation function. (aspath_key_make) Hash the AS_PATH string, rather than just taking the addition of assegment ASes as the hash value, hopefully sligthly more collision resistant. (bgp_attr_munge_as4_attrs) Collide the NEW_ attributes together with the OLD 2-byte forms, code Juergen had in bgp_attr_parse but re-organised a bit. (bgp_attr_parse) Bunch of code from Juergen moves to previous function. (bgp_packet_attribute) Compact significantly by just /always/ using extended-length attr header. Fix bug Juergen noted, by using aspath_put's (new) returned size value for the attr header rather than the (guesstimate) of aspath_size() - the two could differ when aspath_put had to split large segments, unlikely this bug was ever hit in the 'wild'. (bgp_dump_routes_attr) Always use extended-len and use aspath_put return for header length. Output 4b ASN for AS_PATH and AGGREGATOR. * bgp_ecommunity.c: (ecommunity_{hash_make,cmp}) fix hash callback declarations to match prototypes. (ecommunity_gettoken) Updated for ECOMMUNITY_ENCODE_AS4, complete rewrite of Juergen's changes (no asdot support) * bgp_open.c: (bgp_capability_as4) New, does what it says on the tin. (peek_for_as4_capability) Rewritten to use streams and bgp_capability_as4. * bgp_packet.c: (bgp_open_send) minor edit checked (in the abstract at least) with Juergen. Changes are to be more accepting, e.g, allow AS_TRANS on a 2-byte session. * (general) Update all commands to use CMD_AS_RANGE. * bgp_vty.c: (bgp_clear) Fix return vals to use CMD_.. Remove stuff replicated by VTY_GET_LONG (bgp_clear_vty) Return bgp_clear directly to vty. * tests/aspath_test.c: Exercise 32bit parsing. Test reconcile function. * tests/ecommunity_test.c: New, test AS4 ecommunity changes, positive test only at this time, error cases not tested yet. 2007-07-25 Juergen Kammer <j.kammer@eurodata.de> * (general) AS4 support. * bgpd.h: as_t changes to 4-bytes. * bgp_aspath.h: Add BGP_AS4_MAX and BGP_AS_TRANS defines. * bgp_aspath.c: AS_VALUE_SIZE becomes 4-byte, AS16_VALUE_SIZE added for 2-byte. Add AS16 versions of length calc macros. (aspath_count_numas) New, count number of ASes. (aspath_has_as4) New, return 1 if there are any as4's in a path. (assegments_parse) Interpret assegment as 4 or 2 byte, according to how the caller instructs us, with a new argument. (aspath_parse) Add use32bit argument to pass to assegments_parse. Adjust all its callers to pass 1, unless otherwise noted. (assegment_data_put) Adjust to be able to write 2 or 4 byte AS, according to new use32bit argument. (aspath_put) Adjust to write 2 or 4. (aspath_gettoken) Use a long for passed in asno. * bgp_attr.c: (attr_str) Add BGP_ATTR_AS4_PATH and BGP_ATTR_AS4_AGGREGATOR. (bgp_attr_aspath) Call aspath_parse with right 2/4 arg, as determined by received-capability flag. (bgp_attr_aspath_check) New, code previously in attr_aspath but moved to new func so it can be run after NEW_AS_PATH reconciliation. (bgp_attr_as4_path) New, handle NEW_AS_PATH. (bgp_attr_aggregator) Adjust to cope with 2/4 byte ASes. (bgp_attr_as4_aggregator) New, read NEW_AGGREGATOR. (bgp_attr_parse) Add handoffs to previous parsers for the two new AS4 NEW_ attributes. Various checks added for NEW/OLD reconciliation. (bgp_packet_attribute) Support 2/4 for AS_PATH and AGGREGATOR, detect when NEW_ attrs need to be sent. * bgp_debug.{c,h}: Add 'debug bgp as4'. * bgp_dump.c: MRTv2 support, unconditionally enabled, which supports AS4. Based on patches from Erik (RIPE?). * bgp_ecommunity.c: (ecommunity_ecom2str) ECOMMUNITY_ENCODE_AS4 support. * bgp_open.c: (peek_for_as4_capability) New, peek for AS4 capability prior to full capability parsing, so we know which ASN to use for struct peer lookup. (bgp_open_capability) Always send AS4 capability. * bgp_packet.c: (bgp_open_send) AS4 handling for AS field (bgp_open_receive) Peek for AS4 capability first, and figure out which AS to believe. * bgp_vty.c: (bgp_show_peer) Print AS4 cap * tests/aspath_test.c: Support asn32 changes, call aspath_parse with 16 bit. * vtysh/extract.pl: AS4 compatibility for router bgp ASNUMBER * vtysh/extract.pl.in: AS4 compatibility for router bgp ASNUMBER * vtysh/vtysh.c: AS4 compatibility for router bgp ASNUMBER
2007-10-15 00:32:21 +02:00
}
/* From the rfc: A reasonable maximum time between KEEPALIVE messages
would be one third of the Hold Time interval. KEEPALIVE messages
MUST NOT be sent more frequently than one per second. An
implementation MAY adjust the rate at which it sends KEEPALIVE
messages as a function of the Hold Time interval. */
if (PEER_OR_GROUP_TIMER_SET(peer))
send_holdtime = peer->holdtime;
else
send_holdtime = peer->bgp->default_holdtime;
if (holdtime < send_holdtime)
peer->v_holdtime = holdtime;
else
peer->v_holdtime = send_holdtime;
if ((PEER_OR_GROUP_TIMER_SET(peer))
&& (peer->keepalive < peer->v_holdtime / 3))
peer->v_keepalive = peer->keepalive;
else
peer->v_keepalive = peer->v_holdtime / 3;
/* Open option part parse. */
if (optlen != 0) {
if ((ret = bgp_open_option_parse(peer, optlen, &mp_capability))
< 0)
return ret;
} else {
if (bgp_debug_neighbor_events(peer))
zlog_debug("%s rcvd OPEN w/ OPTION parameter len: 0",
peer->host);
}
/*
* Assume that the peer supports the locally configured set of
* AFI/SAFIs if the peer did not send us any Mulitiprotocol
* capabilities, or if 'override-capability' is configured.
*/
if (!mp_capability
|| CHECK_FLAG(peer->flags, PEER_FLAG_OVERRIDE_CAPABILITY)) {
peer->afc_nego[AFI_IP][SAFI_UNICAST] =
peer->afc[AFI_IP][SAFI_UNICAST];
peer->afc_nego[AFI_IP][SAFI_MULTICAST] =
peer->afc[AFI_IP][SAFI_MULTICAST];
peer->afc_nego[AFI_IP][SAFI_LABELED_UNICAST] =
peer->afc[AFI_IP][SAFI_LABELED_UNICAST];
peer->afc_nego[AFI_IP6][SAFI_UNICAST] =
peer->afc[AFI_IP6][SAFI_UNICAST];
peer->afc_nego[AFI_IP6][SAFI_MULTICAST] =
peer->afc[AFI_IP6][SAFI_MULTICAST];
peer->afc_nego[AFI_IP6][SAFI_LABELED_UNICAST] =
peer->afc[AFI_IP6][SAFI_LABELED_UNICAST];
peer->afc_nego[AFI_L2VPN][SAFI_EVPN] =
peer->afc[AFI_L2VPN][SAFI_EVPN];
}
/* When collision is detected and this peer is closed. Retrun
immidiately. */
ret = bgp_collision_detect(peer, remote_id);
if (ret < 0)
return ret;
/* Get sockname. */
if ((ret = bgp_getsockname(peer)) < 0) {
zlog_err("%s: bgp_getsockname() failed for peer: %s",
__FUNCTION__, peer->host);
return (ret);
}
/* Verify valid local address present based on negotiated
* address-families. */
if (peer->afc_nego[AFI_IP][SAFI_UNICAST]
|| peer->afc_nego[AFI_IP][SAFI_LABELED_UNICAST]
|| peer->afc_nego[AFI_IP][SAFI_MULTICAST]
|| peer->afc_nego[AFI_IP][SAFI_MPLS_VPN]
|| peer->afc_nego[AFI_IP][SAFI_ENCAP]) {
if (!peer->nexthop.v4.s_addr) {
#if defined(HAVE_CUMULUS)
zlog_err(
"%s: No local IPv4 addr resetting connection, fd %d",
peer->host, peer->fd);
bgp_notify_send(peer, BGP_NOTIFY_CEASE,
BGP_NOTIFY_SUBCODE_UNSPECIFIC);
return -1;
#endif
}
}
if (peer->afc_nego[AFI_IP6][SAFI_UNICAST]
|| peer->afc_nego[AFI_IP6][SAFI_LABELED_UNICAST]
|| peer->afc_nego[AFI_IP6][SAFI_MULTICAST]
|| peer->afc_nego[AFI_IP6][SAFI_MPLS_VPN]
|| peer->afc_nego[AFI_IP6][SAFI_ENCAP]) {
if (IN6_IS_ADDR_UNSPECIFIED(&peer->nexthop.v6_global)) {
#if defined(HAVE_CUMULUS)
zlog_err(
"%s: No local IPv6 addr resetting connection, fd %d",
peer->host, peer->fd);
bgp_notify_send(peer, BGP_NOTIFY_CEASE,
BGP_NOTIFY_SUBCODE_UNSPECIFIC);
return -1;
#endif
}
}
peer->rtt = sockopt_tcp_rtt(peer->fd);
if ((ret = bgp_event_update(peer, Receive_OPEN_message)) < 0) {
zlog_err("%s: BGP event update failed for peer: %s",
__FUNCTION__, peer->host);
/* DD: bgp send notify and reset state */
return (ret);
}
return 0;
2002-12-13 21:15:29 +01:00
}
/* Called when there is a change in the EOR(implicit or explicit) status of a
peer.
bgpd: bgpd-update-delay.patch COMMAND: 'update-delay <max-delay in seconds> [<establish-wait in seconds>]' DESCRIPTION: This feature is used to enable read-only mode on BGP process restart or when BGP process is cleared using 'clear ip bgp *'. When applicable, read-only mode would begin as soon as the first peer reaches Established state and a timer for <max-delay> seconds is started. During this mode BGP doesn't run any best-path or generate any updates to its peers. This mode continues until: 1. All the configured peers, except the shutdown peers, have sent explicit EOR (End-Of-RIB) or an implicit-EOR. The first keep-alive after BGP has reached Established is considered an implicit-EOR. If the <establish-wait> optional value is given, then BGP will wait for peers to reach establish from the begining of the update-delay till the establish-wait period is over, i.e. the minimum set of established peers for which EOR is expected would be peers established during the establish-wait window, not necessarily all the configured neighbors. 2. max-delay period is over. On hitting any of the above two conditions, BGP resumes the decision process and generates updates to its peers. Default <max-delay> is 0, i.e. the feature is off by default. This feature can be useful in reducing CPU/network used as BGP restarts/clears. Particularly useful in the topologies where BGP learns a prefix from many peers. Intermediate bestpaths are possible for the same prefix as peers get established and start receiving updates at different times. This feature should offer a value-add if the network has a high number of such prefixes. IMPLEMENTATION OBJECTIVES: Given this is an optional feature, minimized the code-churn. Used existing constructs wherever possible (existing queue-plug/unplug were used to achieve delay and resume of best-paths/update-generation). As a result, no new data-structure(s) had to be defined and allocated. When the feature is disabled, the new node is not exercised for the most part. Signed-off-by: Vipin Kumar <vipin@cumulusnetworks.com> Reviewed-by: Pradosh Mohapatra <pmohapat@cumulusnetworks.com> Dinesh Dutt <ddutt@cumulusnetworks.com>
2015-05-20 02:40:33 +02:00
Ends the update-delay if all expected peers are done with EORs. */
void bgp_check_update_delay(struct bgp *bgp)
bgpd: bgpd-update-delay.patch COMMAND: 'update-delay <max-delay in seconds> [<establish-wait in seconds>]' DESCRIPTION: This feature is used to enable read-only mode on BGP process restart or when BGP process is cleared using 'clear ip bgp *'. When applicable, read-only mode would begin as soon as the first peer reaches Established state and a timer for <max-delay> seconds is started. During this mode BGP doesn't run any best-path or generate any updates to its peers. This mode continues until: 1. All the configured peers, except the shutdown peers, have sent explicit EOR (End-Of-RIB) or an implicit-EOR. The first keep-alive after BGP has reached Established is considered an implicit-EOR. If the <establish-wait> optional value is given, then BGP will wait for peers to reach establish from the begining of the update-delay till the establish-wait period is over, i.e. the minimum set of established peers for which EOR is expected would be peers established during the establish-wait window, not necessarily all the configured neighbors. 2. max-delay period is over. On hitting any of the above two conditions, BGP resumes the decision process and generates updates to its peers. Default <max-delay> is 0, i.e. the feature is off by default. This feature can be useful in reducing CPU/network used as BGP restarts/clears. Particularly useful in the topologies where BGP learns a prefix from many peers. Intermediate bestpaths are possible for the same prefix as peers get established and start receiving updates at different times. This feature should offer a value-add if the network has a high number of such prefixes. IMPLEMENTATION OBJECTIVES: Given this is an optional feature, minimized the code-churn. Used existing constructs wherever possible (existing queue-plug/unplug were used to achieve delay and resume of best-paths/update-generation). As a result, no new data-structure(s) had to be defined and allocated. When the feature is disabled, the new node is not exercised for the most part. Signed-off-by: Vipin Kumar <vipin@cumulusnetworks.com> Reviewed-by: Pradosh Mohapatra <pmohapat@cumulusnetworks.com> Dinesh Dutt <ddutt@cumulusnetworks.com>
2015-05-20 02:40:33 +02:00
{
struct listnode *node, *nnode;
struct peer *peer = NULL;
if (bgp_debug_neighbor_events(peer))
zlog_debug("Checking update delay, T: %d R: %d I:%d E: %d",
bgp->established, bgp->restarted_peers,
bgp->implicit_eors, bgp->explicit_eors);
if (bgp->established
<= bgp->restarted_peers + bgp->implicit_eors + bgp->explicit_eors) {
/* This is an extra sanity check to make sure we wait for all
the
eligible configured peers. This check is performed if
establish wait
timer is on, or establish wait option is not given with the
update-delay command */
if (bgp->t_establish_wait
|| (bgp->v_establish_wait == bgp->v_update_delay))
for (ALL_LIST_ELEMENTS(bgp->peer, node, nnode, peer)) {
if (CHECK_FLAG(peer->flags,
PEER_FLAG_CONFIG_NODE)
&& !CHECK_FLAG(peer->flags,
PEER_FLAG_SHUTDOWN)
&& !peer->update_delay_over) {
if (bgp_debug_neighbor_events(peer))
zlog_debug(
" Peer %s pending, continuing read-only mode",
peer->host);
return;
}
}
zlog_info(
"Update delay ended, restarted: %d, EORs implicit: %d, explicit: %d",
bgp->restarted_peers, bgp->implicit_eors,
bgp->explicit_eors);
bgp_update_delay_end(bgp);
}
bgpd: bgpd-update-delay.patch COMMAND: 'update-delay <max-delay in seconds> [<establish-wait in seconds>]' DESCRIPTION: This feature is used to enable read-only mode on BGP process restart or when BGP process is cleared using 'clear ip bgp *'. When applicable, read-only mode would begin as soon as the first peer reaches Established state and a timer for <max-delay> seconds is started. During this mode BGP doesn't run any best-path or generate any updates to its peers. This mode continues until: 1. All the configured peers, except the shutdown peers, have sent explicit EOR (End-Of-RIB) or an implicit-EOR. The first keep-alive after BGP has reached Established is considered an implicit-EOR. If the <establish-wait> optional value is given, then BGP will wait for peers to reach establish from the begining of the update-delay till the establish-wait period is over, i.e. the minimum set of established peers for which EOR is expected would be peers established during the establish-wait window, not necessarily all the configured neighbors. 2. max-delay period is over. On hitting any of the above two conditions, BGP resumes the decision process and generates updates to its peers. Default <max-delay> is 0, i.e. the feature is off by default. This feature can be useful in reducing CPU/network used as BGP restarts/clears. Particularly useful in the topologies where BGP learns a prefix from many peers. Intermediate bestpaths are possible for the same prefix as peers get established and start receiving updates at different times. This feature should offer a value-add if the network has a high number of such prefixes. IMPLEMENTATION OBJECTIVES: Given this is an optional feature, minimized the code-churn. Used existing constructs wherever possible (existing queue-plug/unplug were used to achieve delay and resume of best-paths/update-generation). As a result, no new data-structure(s) had to be defined and allocated. When the feature is disabled, the new node is not exercised for the most part. Signed-off-by: Vipin Kumar <vipin@cumulusnetworks.com> Reviewed-by: Pradosh Mohapatra <pmohapat@cumulusnetworks.com> Dinesh Dutt <ddutt@cumulusnetworks.com>
2015-05-20 02:40:33 +02:00
}
/* Called if peer is known to have restarted. The restart-state bit in
Graceful-Restart capability is used for that */
void bgp_update_restarted_peers(struct peer *peer)
bgpd: bgpd-update-delay.patch COMMAND: 'update-delay <max-delay in seconds> [<establish-wait in seconds>]' DESCRIPTION: This feature is used to enable read-only mode on BGP process restart or when BGP process is cleared using 'clear ip bgp *'. When applicable, read-only mode would begin as soon as the first peer reaches Established state and a timer for <max-delay> seconds is started. During this mode BGP doesn't run any best-path or generate any updates to its peers. This mode continues until: 1. All the configured peers, except the shutdown peers, have sent explicit EOR (End-Of-RIB) or an implicit-EOR. The first keep-alive after BGP has reached Established is considered an implicit-EOR. If the <establish-wait> optional value is given, then BGP will wait for peers to reach establish from the begining of the update-delay till the establish-wait period is over, i.e. the minimum set of established peers for which EOR is expected would be peers established during the establish-wait window, not necessarily all the configured neighbors. 2. max-delay period is over. On hitting any of the above two conditions, BGP resumes the decision process and generates updates to its peers. Default <max-delay> is 0, i.e. the feature is off by default. This feature can be useful in reducing CPU/network used as BGP restarts/clears. Particularly useful in the topologies where BGP learns a prefix from many peers. Intermediate bestpaths are possible for the same prefix as peers get established and start receiving updates at different times. This feature should offer a value-add if the network has a high number of such prefixes. IMPLEMENTATION OBJECTIVES: Given this is an optional feature, minimized the code-churn. Used existing constructs wherever possible (existing queue-plug/unplug were used to achieve delay and resume of best-paths/update-generation). As a result, no new data-structure(s) had to be defined and allocated. When the feature is disabled, the new node is not exercised for the most part. Signed-off-by: Vipin Kumar <vipin@cumulusnetworks.com> Reviewed-by: Pradosh Mohapatra <pmohapat@cumulusnetworks.com> Dinesh Dutt <ddutt@cumulusnetworks.com>
2015-05-20 02:40:33 +02:00
{
if (!bgp_update_delay_active(peer->bgp))
return; /* BGP update delay has ended */
if (peer->update_delay_over)
return; /* This peer has already been considered */
if (bgp_debug_neighbor_events(peer))
zlog_debug("Peer %s: Checking restarted", peer->host);
if (peer->status == Established) {
peer->update_delay_over = 1;
peer->bgp->restarted_peers++;
bgp_check_update_delay(peer->bgp);
}
bgpd: bgpd-update-delay.patch COMMAND: 'update-delay <max-delay in seconds> [<establish-wait in seconds>]' DESCRIPTION: This feature is used to enable read-only mode on BGP process restart or when BGP process is cleared using 'clear ip bgp *'. When applicable, read-only mode would begin as soon as the first peer reaches Established state and a timer for <max-delay> seconds is started. During this mode BGP doesn't run any best-path or generate any updates to its peers. This mode continues until: 1. All the configured peers, except the shutdown peers, have sent explicit EOR (End-Of-RIB) or an implicit-EOR. The first keep-alive after BGP has reached Established is considered an implicit-EOR. If the <establish-wait> optional value is given, then BGP will wait for peers to reach establish from the begining of the update-delay till the establish-wait period is over, i.e. the minimum set of established peers for which EOR is expected would be peers established during the establish-wait window, not necessarily all the configured neighbors. 2. max-delay period is over. On hitting any of the above two conditions, BGP resumes the decision process and generates updates to its peers. Default <max-delay> is 0, i.e. the feature is off by default. This feature can be useful in reducing CPU/network used as BGP restarts/clears. Particularly useful in the topologies where BGP learns a prefix from many peers. Intermediate bestpaths are possible for the same prefix as peers get established and start receiving updates at different times. This feature should offer a value-add if the network has a high number of such prefixes. IMPLEMENTATION OBJECTIVES: Given this is an optional feature, minimized the code-churn. Used existing constructs wherever possible (existing queue-plug/unplug were used to achieve delay and resume of best-paths/update-generation). As a result, no new data-structure(s) had to be defined and allocated. When the feature is disabled, the new node is not exercised for the most part. Signed-off-by: Vipin Kumar <vipin@cumulusnetworks.com> Reviewed-by: Pradosh Mohapatra <pmohapat@cumulusnetworks.com> Dinesh Dutt <ddutt@cumulusnetworks.com>
2015-05-20 02:40:33 +02:00
}
/* Called as peer receives a keep-alive. Determines if this occurence can be
taken as an implicit EOR for this peer.
NOTE: The very first keep-alive after the Established state of a peer is
considered implicit EOR for the update-delay purposes */
void bgp_update_implicit_eors(struct peer *peer)
bgpd: bgpd-update-delay.patch COMMAND: 'update-delay <max-delay in seconds> [<establish-wait in seconds>]' DESCRIPTION: This feature is used to enable read-only mode on BGP process restart or when BGP process is cleared using 'clear ip bgp *'. When applicable, read-only mode would begin as soon as the first peer reaches Established state and a timer for <max-delay> seconds is started. During this mode BGP doesn't run any best-path or generate any updates to its peers. This mode continues until: 1. All the configured peers, except the shutdown peers, have sent explicit EOR (End-Of-RIB) or an implicit-EOR. The first keep-alive after BGP has reached Established is considered an implicit-EOR. If the <establish-wait> optional value is given, then BGP will wait for peers to reach establish from the begining of the update-delay till the establish-wait period is over, i.e. the minimum set of established peers for which EOR is expected would be peers established during the establish-wait window, not necessarily all the configured neighbors. 2. max-delay period is over. On hitting any of the above two conditions, BGP resumes the decision process and generates updates to its peers. Default <max-delay> is 0, i.e. the feature is off by default. This feature can be useful in reducing CPU/network used as BGP restarts/clears. Particularly useful in the topologies where BGP learns a prefix from many peers. Intermediate bestpaths are possible for the same prefix as peers get established and start receiving updates at different times. This feature should offer a value-add if the network has a high number of such prefixes. IMPLEMENTATION OBJECTIVES: Given this is an optional feature, minimized the code-churn. Used existing constructs wherever possible (existing queue-plug/unplug were used to achieve delay and resume of best-paths/update-generation). As a result, no new data-structure(s) had to be defined and allocated. When the feature is disabled, the new node is not exercised for the most part. Signed-off-by: Vipin Kumar <vipin@cumulusnetworks.com> Reviewed-by: Pradosh Mohapatra <pmohapat@cumulusnetworks.com> Dinesh Dutt <ddutt@cumulusnetworks.com>
2015-05-20 02:40:33 +02:00
{
if (!bgp_update_delay_active(peer->bgp))
return; /* BGP update delay has ended */
if (peer->update_delay_over)
return; /* This peer has already been considered */
if (bgp_debug_neighbor_events(peer))
zlog_debug("Peer %s: Checking implicit EORs", peer->host);
if (peer->status == Established) {
peer->update_delay_over = 1;
peer->bgp->implicit_eors++;
bgp_check_update_delay(peer->bgp);
}
bgpd: bgpd-update-delay.patch COMMAND: 'update-delay <max-delay in seconds> [<establish-wait in seconds>]' DESCRIPTION: This feature is used to enable read-only mode on BGP process restart or when BGP process is cleared using 'clear ip bgp *'. When applicable, read-only mode would begin as soon as the first peer reaches Established state and a timer for <max-delay> seconds is started. During this mode BGP doesn't run any best-path or generate any updates to its peers. This mode continues until: 1. All the configured peers, except the shutdown peers, have sent explicit EOR (End-Of-RIB) or an implicit-EOR. The first keep-alive after BGP has reached Established is considered an implicit-EOR. If the <establish-wait> optional value is given, then BGP will wait for peers to reach establish from the begining of the update-delay till the establish-wait period is over, i.e. the minimum set of established peers for which EOR is expected would be peers established during the establish-wait window, not necessarily all the configured neighbors. 2. max-delay period is over. On hitting any of the above two conditions, BGP resumes the decision process and generates updates to its peers. Default <max-delay> is 0, i.e. the feature is off by default. This feature can be useful in reducing CPU/network used as BGP restarts/clears. Particularly useful in the topologies where BGP learns a prefix from many peers. Intermediate bestpaths are possible for the same prefix as peers get established and start receiving updates at different times. This feature should offer a value-add if the network has a high number of such prefixes. IMPLEMENTATION OBJECTIVES: Given this is an optional feature, minimized the code-churn. Used existing constructs wherever possible (existing queue-plug/unplug were used to achieve delay and resume of best-paths/update-generation). As a result, no new data-structure(s) had to be defined and allocated. When the feature is disabled, the new node is not exercised for the most part. Signed-off-by: Vipin Kumar <vipin@cumulusnetworks.com> Reviewed-by: Pradosh Mohapatra <pmohapat@cumulusnetworks.com> Dinesh Dutt <ddutt@cumulusnetworks.com>
2015-05-20 02:40:33 +02:00
}
/* Should be called only when there is a change in the EOR_RECEIVED status
for any afi/safi on a peer */
static void bgp_update_explicit_eors(struct peer *peer)
bgpd: bgpd-update-delay.patch COMMAND: 'update-delay <max-delay in seconds> [<establish-wait in seconds>]' DESCRIPTION: This feature is used to enable read-only mode on BGP process restart or when BGP process is cleared using 'clear ip bgp *'. When applicable, read-only mode would begin as soon as the first peer reaches Established state and a timer for <max-delay> seconds is started. During this mode BGP doesn't run any best-path or generate any updates to its peers. This mode continues until: 1. All the configured peers, except the shutdown peers, have sent explicit EOR (End-Of-RIB) or an implicit-EOR. The first keep-alive after BGP has reached Established is considered an implicit-EOR. If the <establish-wait> optional value is given, then BGP will wait for peers to reach establish from the begining of the update-delay till the establish-wait period is over, i.e. the minimum set of established peers for which EOR is expected would be peers established during the establish-wait window, not necessarily all the configured neighbors. 2. max-delay period is over. On hitting any of the above two conditions, BGP resumes the decision process and generates updates to its peers. Default <max-delay> is 0, i.e. the feature is off by default. This feature can be useful in reducing CPU/network used as BGP restarts/clears. Particularly useful in the topologies where BGP learns a prefix from many peers. Intermediate bestpaths are possible for the same prefix as peers get established and start receiving updates at different times. This feature should offer a value-add if the network has a high number of such prefixes. IMPLEMENTATION OBJECTIVES: Given this is an optional feature, minimized the code-churn. Used existing constructs wherever possible (existing queue-plug/unplug were used to achieve delay and resume of best-paths/update-generation). As a result, no new data-structure(s) had to be defined and allocated. When the feature is disabled, the new node is not exercised for the most part. Signed-off-by: Vipin Kumar <vipin@cumulusnetworks.com> Reviewed-by: Pradosh Mohapatra <pmohapat@cumulusnetworks.com> Dinesh Dutt <ddutt@cumulusnetworks.com>
2015-05-20 02:40:33 +02:00
{
afi_t afi;
safi_t safi;
if (!bgp_update_delay_active(peer->bgp))
return; /* BGP update delay has ended */
if (peer->update_delay_over)
return; /* This peer has already been considered */
if (bgp_debug_neighbor_events(peer))
zlog_debug("Peer %s: Checking explicit EORs", peer->host);
FOREACH_AFI_SAFI (afi, safi) {
if (peer->afc_nego[afi][safi]
&& !CHECK_FLAG(peer->af_sflags[afi][safi],
PEER_STATUS_EOR_RECEIVED)) {
if (bgp_debug_neighbor_events(peer))
zlog_debug(
" afi %d safi %d didnt receive EOR",
afi, safi);
return;
}
}
peer->update_delay_over = 1;
peer->bgp->explicit_eors++;
bgp_check_update_delay(peer->bgp);
bgpd: bgpd-update-delay.patch COMMAND: 'update-delay <max-delay in seconds> [<establish-wait in seconds>]' DESCRIPTION: This feature is used to enable read-only mode on BGP process restart or when BGP process is cleared using 'clear ip bgp *'. When applicable, read-only mode would begin as soon as the first peer reaches Established state and a timer for <max-delay> seconds is started. During this mode BGP doesn't run any best-path or generate any updates to its peers. This mode continues until: 1. All the configured peers, except the shutdown peers, have sent explicit EOR (End-Of-RIB) or an implicit-EOR. The first keep-alive after BGP has reached Established is considered an implicit-EOR. If the <establish-wait> optional value is given, then BGP will wait for peers to reach establish from the begining of the update-delay till the establish-wait period is over, i.e. the minimum set of established peers for which EOR is expected would be peers established during the establish-wait window, not necessarily all the configured neighbors. 2. max-delay period is over. On hitting any of the above two conditions, BGP resumes the decision process and generates updates to its peers. Default <max-delay> is 0, i.e. the feature is off by default. This feature can be useful in reducing CPU/network used as BGP restarts/clears. Particularly useful in the topologies where BGP learns a prefix from many peers. Intermediate bestpaths are possible for the same prefix as peers get established and start receiving updates at different times. This feature should offer a value-add if the network has a high number of such prefixes. IMPLEMENTATION OBJECTIVES: Given this is an optional feature, minimized the code-churn. Used existing constructs wherever possible (existing queue-plug/unplug were used to achieve delay and resume of best-paths/update-generation). As a result, no new data-structure(s) had to be defined and allocated. When the feature is disabled, the new node is not exercised for the most part. Signed-off-by: Vipin Kumar <vipin@cumulusnetworks.com> Reviewed-by: Pradosh Mohapatra <pmohapat@cumulusnetworks.com> Dinesh Dutt <ddutt@cumulusnetworks.com>
2015-05-20 02:40:33 +02:00
}
/* Frontend for NLRI parsing, to fan-out to AFI/SAFI specific parsers
* mp_withdraw, if set, is used to nullify attr structure on most of the calling
* safi function
* and for evpn, passed as parameter
*/
int bgp_nlri_parse(struct peer *peer, struct attr *attr,
struct bgp_nlri *packet, int mp_withdraw)
bgpd: Regularise bgp_update_receive, add missing notifies and checks * bgp_packet.c: (bgp_update_receive) Lots of repeated code, doing same thing for each AFI/SAFI. Except when it doesn't, e.g. the IPv4/VPN case was missing the EoR bgp_clear_stale_route call - the only action really needed for EoR. Make this function a lot more regular, using common, AFI/SAFI independent blocks so far as possible. Replace the 4 separate bgp_nlris with an array, indexed by an enum. The distinct blocks that handle calling bgp_nlri_parse for each different AFI/SAFI can now be replaced with a loop. Transmogrify the nlri SAFI from the SAFI_MPLS_LABELED_VPN code-point used on the wire, to the SAFI_MPLS_VPN safi_t enum we use internally as early as possible. The existing code was not necessarily sending a NOTIFY for NLRI parsing errors, if they arose via bgp_nlri_sanity_check. Send the correct NOTIFY - INVAL_NETWORK for the classic NLRIs and OPT_ATTR_ERR for the MP ones. EoR can now be handled in one block. The existing code seemed broken for EoR recognition in a number of ways: 1. A v4/unicast EoR should be an empty UPDATE. However, it seemed to be treating an UPDATE with attributes, inc. MP REACH/UNREACH, but no classic NLRIs, as a v4/uni EoR. 2. For other AFI/SAFIs, it was treating UPDATEs with no classic withraw and with a zero-length MP withdraw as EoRs. However, that would mean an UPDATE packet _with_ update NLRIs and a 0-len MP withdraw could be classed as an EoR. This seems to be loose coding leading to ambiguous protocol situations and likely incorrect behaviour, rather than simply being liberal. Be more strict about checking that an UPDATE really is an EoR and definitely is not trying to update any NLRIs. This same loose EoR parsing was noted by Chris Hall previously on list. (bgp_nlri_parse) Front end NLRI parse function, to fan-out to the correct parser for the AFI/SAFI. * bgp_route.c: (bgp_nlri_sanity_check) We try convert NLRI safi to internal code-point ASAP, adjust switch for that. Leave the wire code point in for defensive coding. (bgp_nlri_parse) rename to bgp_nlri_parse_ip. * tests/bgp_mp_attr_test.c: Can just use bgp_nlri_parse frontend.
2016-02-04 14:27:04 +01:00
{
switch (packet->safi) {
case SAFI_UNICAST:
case SAFI_MULTICAST:
return bgp_nlri_parse_ip(peer, mp_withdraw ? NULL : attr,
packet);
case SAFI_LABELED_UNICAST:
return bgp_nlri_parse_label(peer, mp_withdraw ? NULL : attr,
packet);
case SAFI_MPLS_VPN:
return bgp_nlri_parse_vpn(peer, mp_withdraw ? NULL : attr,
packet);
case SAFI_EVPN:
return bgp_nlri_parse_evpn(peer, attr, packet, mp_withdraw);
default:
return -1;
}
bgpd: Regularise bgp_update_receive, add missing notifies and checks * bgp_packet.c: (bgp_update_receive) Lots of repeated code, doing same thing for each AFI/SAFI. Except when it doesn't, e.g. the IPv4/VPN case was missing the EoR bgp_clear_stale_route call - the only action really needed for EoR. Make this function a lot more regular, using common, AFI/SAFI independent blocks so far as possible. Replace the 4 separate bgp_nlris with an array, indexed by an enum. The distinct blocks that handle calling bgp_nlri_parse for each different AFI/SAFI can now be replaced with a loop. Transmogrify the nlri SAFI from the SAFI_MPLS_LABELED_VPN code-point used on the wire, to the SAFI_MPLS_VPN safi_t enum we use internally as early as possible. The existing code was not necessarily sending a NOTIFY for NLRI parsing errors, if they arose via bgp_nlri_sanity_check. Send the correct NOTIFY - INVAL_NETWORK for the classic NLRIs and OPT_ATTR_ERR for the MP ones. EoR can now be handled in one block. The existing code seemed broken for EoR recognition in a number of ways: 1. A v4/unicast EoR should be an empty UPDATE. However, it seemed to be treating an UPDATE with attributes, inc. MP REACH/UNREACH, but no classic NLRIs, as a v4/uni EoR. 2. For other AFI/SAFIs, it was treating UPDATEs with no classic withraw and with a zero-length MP withdraw as EoRs. However, that would mean an UPDATE packet _with_ update NLRIs and a 0-len MP withdraw could be classed as an EoR. This seems to be loose coding leading to ambiguous protocol situations and likely incorrect behaviour, rather than simply being liberal. Be more strict about checking that an UPDATE really is an EoR and definitely is not trying to update any NLRIs. This same loose EoR parsing was noted by Chris Hall previously on list. (bgp_nlri_parse) Front end NLRI parse function, to fan-out to the correct parser for the AFI/SAFI. * bgp_route.c: (bgp_nlri_sanity_check) We try convert NLRI safi to internal code-point ASAP, adjust switch for that. Leave the wire code point in for defensive coding. (bgp_nlri_parse) rename to bgp_nlri_parse_ip. * tests/bgp_mp_attr_test.c: Can just use bgp_nlri_parse frontend.
2016-02-04 14:27:04 +01:00
}
2002-12-13 21:15:29 +01:00
/* Parse BGP Update packet and make attribute object. */
static int bgp_update_receive(struct peer *peer, bgp_size_t size)
2002-12-13 21:15:29 +01:00
{
int ret, nlri_ret;
u_char *end;
struct stream *s;
struct attr attr;
bgp_size_t attribute_len;
bgp_size_t update_len;
bgp_size_t withdraw_len;
enum NLRI_TYPES {
NLRI_UPDATE,
NLRI_WITHDRAW,
NLRI_MP_UPDATE,
NLRI_MP_WITHDRAW,
NLRI_TYPE_MAX
};
struct bgp_nlri nlris[NLRI_TYPE_MAX];
/* Status must be Established. */
if (peer->status != Established) {
zlog_err("%s [FSM] Update packet received under status %s",
peer->host,
lookup_msg(bgp_status_msg, peer->status, NULL));
bgp_notify_send(peer, BGP_NOTIFY_FSM_ERR, 0);
return -1;
}
/* Set initial values. */
memset(&attr, 0, sizeof(struct attr));
attr.label_index = BGP_INVALID_LABEL_INDEX;
attr.label = MPLS_INVALID_LABEL;
memset(&nlris, 0, sizeof(nlris));
memset(peer->rcvd_attr_str, 0, BUFSIZ);
peer->rcvd_attr_printed = 0;
s = peer->curr;
end = stream_pnt(s) + size;
/* RFC1771 6.3 If the Unfeasible Routes Length or Total Attribute
Length is too large (i.e., if Unfeasible Routes Length + Total
Attribute Length + 23 exceeds the message Length), then the Error
Subcode is set to Malformed Attribute List. */
if (stream_pnt(s) + 2 > end) {
zlog_err(
"%s [Error] Update packet error"
" (packet length is short for unfeasible length)",
peer->host);
bgp_notify_send(peer, BGP_NOTIFY_UPDATE_ERR,
BGP_NOTIFY_UPDATE_MAL_ATTR);
return -1;
}
/* Unfeasible Route Length. */
withdraw_len = stream_getw(s);
/* Unfeasible Route Length check. */
if (stream_pnt(s) + withdraw_len > end) {
zlog_err(
"%s [Error] Update packet error"
" (packet unfeasible length overflow %d)",
peer->host, withdraw_len);
bgp_notify_send(peer, BGP_NOTIFY_UPDATE_ERR,
BGP_NOTIFY_UPDATE_MAL_ATTR);
return -1;
}
/* Unfeasible Route packet format check. */
if (withdraw_len > 0) {
nlris[NLRI_WITHDRAW].afi = AFI_IP;
nlris[NLRI_WITHDRAW].safi = SAFI_UNICAST;
nlris[NLRI_WITHDRAW].nlri = stream_pnt(s);
nlris[NLRI_WITHDRAW].length = withdraw_len;
stream_forward_getp(s, withdraw_len);
}
/* Attribute total length check. */
if (stream_pnt(s) + 2 > end) {
zlog_warn(
"%s [Error] Packet Error"
" (update packet is short for attribute length)",
peer->host);
bgp_notify_send(peer, BGP_NOTIFY_UPDATE_ERR,
BGP_NOTIFY_UPDATE_MAL_ATTR);
return -1;
}
/* Fetch attribute total length. */
attribute_len = stream_getw(s);
/* Attribute length check. */
if (stream_pnt(s) + attribute_len > end) {
zlog_warn(
"%s [Error] Packet Error"
" (update packet attribute length overflow %d)",
peer->host, attribute_len);
bgp_notify_send(peer, BGP_NOTIFY_UPDATE_ERR,
BGP_NOTIFY_UPDATE_MAL_ATTR);
return -1;
}
/* Certain attribute parsing errors should not be considered bad enough
* to reset the session for, most particularly any partial/optional
* attributes that have 'tunneled' over speakers that don't understand
* them. Instead we withdraw only the prefix concerned.
*
* Complicates the flow a little though..
*/
bgp_attr_parse_ret_t attr_parse_ret = BGP_ATTR_PARSE_PROCEED;
/* This define morphs the update case into a withdraw when lower levels
* have signalled an error condition where this is best.
*/
bgpd: Implement revised error handling for partial optional/trans. attributes * BGP error handling generally boils down to "reset session". This was fine when all BGP speakers pretty much understood all BGP messages. However the increasing deployment of new attribute types has shown this approach to cause problems, in particular where a new attribute type is "tunneled" over some speakers which do not understand it, and then arrives at a speaker which does but considers it malformed (e.g. corruption along the way, or because of early implementation bugs/interop issues). To mitigate this drafts before the IDR (likely to be adopted) propose to treat errors in partial (i.e. not understood by neighbour), optional transitive attributes, when received from eBGP peers, as withdrawing only the NLRIs in the affected UPDATE, rather than causing the entire session to be reset. See: http://tools.ietf.org/html/draft-scudder-idr-optional-transitive * bgp_aspath.c: (assegments_parse) Replace the "NULL means valid, 0-length OR an error" return value with an error code - instead taking pointer to result structure as arg. (aspath_parse) adjust to suit previous change, but here NULL really does mean error in the external interface. * bgp_attr.h (bgp_attr_parse) use an explictly typed and enumerated value to indicate return result. (bgp_attr_unintern_sub) cleans up just the members of an attr, but not the attr itself, for benefit of those who use a stack-local attr. * bgp_attr.c: (bgp_attr_unintern_sub) split out from bgp_attr_unintern (bgp_attr_unintern) as previous. (bgp_attr_malformed) helper function to centralise decisions on how to handle errors in attributes. (bgp_attr_{aspathlimit,origin,etc..}) Use bgp_attr_malformed. (bgp_attr_aspathlimit) Subcode for error specifc to this attr should be BGP_NOTIFY_UPDATE_OPT_ATTR_ERR. (bgp_attr_as4_path) be more rigorous about checks, ala bgp_attr_as_path. (bgp_attr_parse) Adjust to deal with the additional error level that bgp_attr_ parsers can raise, and also similarly return appropriate error back up to (bgp_update_receive). Try to avoid leaking as4_path. * bgp_packet.c: (bgp_update_receive) Adjust to deal with BGP_ATTR_PARSE_WITHDRAW error level from bgp_attr_parse, which should lead to a withdraw, by making the attribute parameter in call to (bgp_nlri_parse) conditional on the error, so the update case morphs also into a withdraw. Use bgp_attr_unintern_sub from above, instead of doing this itself. Fix error case returns which were not calling bgp_attr_unintern_sub and probably leaking memory. * tests/aspath_test.c: Fix to work for null return with bad segments
2010-11-23 17:35:42 +01:00
#define NLRI_ATTR_ARG (attr_parse_ret != BGP_ATTR_PARSE_WITHDRAW ? &attr : NULL)
2002-12-13 21:15:29 +01:00
/* Parse attribute when it exists. */
if (attribute_len) {
attr_parse_ret = bgp_attr_parse(peer, &attr, attribute_len,
&nlris[NLRI_MP_UPDATE],
&nlris[NLRI_MP_WITHDRAW]);
if (attr_parse_ret == BGP_ATTR_PARSE_ERROR) {
bgp_attr_unintern_sub(&attr);
return -1;
}
}
/* Logging the attribute. */
if (attr_parse_ret == BGP_ATTR_PARSE_WITHDRAW
|| BGP_DEBUG(update, UPDATE_IN)
|| BGP_DEBUG(update, UPDATE_PREFIX)) {
ret = bgp_dump_attr(&attr, peer->rcvd_attr_str, BUFSIZ);
if (attr_parse_ret == BGP_ATTR_PARSE_WITHDRAW)
zlog_err(
"%s rcvd UPDATE with errors in attr(s)!! Withdrawing route.",
peer->host);
if (ret && bgp_debug_update(peer, NULL, NULL, 1)) {
zlog_debug("%s rcvd UPDATE w/ attr: %s", peer->host,
peer->rcvd_attr_str);
peer->rcvd_attr_printed = 1;
}
}
/* Network Layer Reachability Information. */
update_len = end - stream_pnt(s);
if (update_len) {
/* Set NLRI portion to structure. */
nlris[NLRI_UPDATE].afi = AFI_IP;
nlris[NLRI_UPDATE].safi = SAFI_UNICAST;
nlris[NLRI_UPDATE].nlri = stream_pnt(s);
nlris[NLRI_UPDATE].length = update_len;
stream_forward_getp(s, update_len);
}
if (BGP_DEBUG(update, UPDATE_IN))
zlog_debug("%s rcvd UPDATE wlen %d attrlen %d alen %d",
peer->host, withdraw_len, attribute_len, update_len);
/* Parse any given NLRIs */
for (int i = NLRI_UPDATE; i < NLRI_TYPE_MAX; i++) {
if (!nlris[i].nlri)
continue;
/* NLRI is processed iff the peer if configured for the specific
* afi/safi */
if (!peer->afc[nlris[i].afi][nlris[i].safi]) {
zlog_info(
"%s [Info] UPDATE for non-enabled AFI/SAFI %u/%u",
peer->host, nlris[i].afi, nlris[i].safi);
continue;
}
/* EoR handled later */
if (nlris[i].length == 0)
continue;
switch (i) {
case NLRI_UPDATE:
case NLRI_MP_UPDATE:
nlri_ret = bgp_nlri_parse(peer, NLRI_ATTR_ARG,
&nlris[i], 0);
break;
case NLRI_WITHDRAW:
case NLRI_MP_WITHDRAW:
nlri_ret = bgp_nlri_parse(peer, &attr, &nlris[i], 1);
break;
default:
nlri_ret = -1;
}
if (nlri_ret < 0) {
zlog_err("%s [Error] Error parsing NLRI", peer->host);
if (peer->status == Established)
bgp_notify_send(
peer, BGP_NOTIFY_UPDATE_ERR,
i <= NLRI_WITHDRAW
? BGP_NOTIFY_UPDATE_INVAL_NETWORK
: BGP_NOTIFY_UPDATE_OPT_ATTR_ERR);
bgp_attr_unintern_sub(&attr);
return -1;
}
}
/* EoR checks
*
* Non-MP IPv4/Unicast EoR is a completely empty UPDATE
* and MP EoR should have only an empty MP_UNREACH
*/
if ((!update_len && !withdraw_len &&
nlris[NLRI_MP_UPDATE].length == 0) ||
(attr_parse_ret == BGP_ATTR_PARSE_EOR)) {
afi_t afi = 0;
safi_t safi;
/* Non-MP IPv4/Unicast is a completely emtpy UPDATE - already
* checked
* update and withdraw NLRI lengths are 0.
*/
if (!attribute_len) {
afi = AFI_IP;
safi = SAFI_UNICAST;
} else if (attr.flag & ATTR_FLAG_BIT(BGP_ATTR_MP_UNREACH_NLRI)
&& nlris[NLRI_MP_WITHDRAW].length == 0) {
afi = nlris[NLRI_MP_WITHDRAW].afi;
safi = nlris[NLRI_MP_WITHDRAW].safi;
} else if (attr_parse_ret == BGP_ATTR_PARSE_EOR) {
afi = nlris[NLRI_MP_UPDATE].afi;
safi = nlris[NLRI_MP_UPDATE].safi;
}
if (afi && peer->afc[afi][safi]) {
/* End-of-RIB received */
if (!CHECK_FLAG(peer->af_sflags[afi][safi],
PEER_STATUS_EOR_RECEIVED)) {
SET_FLAG(peer->af_sflags[afi][safi],
PEER_STATUS_EOR_RECEIVED);
bgp_update_explicit_eors(peer);
}
/* NSF delete stale route */
if (peer->nsf[afi][safi])
bgp_clear_stale_route(peer, afi, safi);
if (bgp_debug_neighbor_events(peer)) {
zlog_debug("rcvd End-of-RIB for %s from %s",
afi_safi_print(afi, safi),
peer->host);
}
}
}
/* Everything is done. We unintern temporary structures which
interned in bgp_attr_parse(). */
bgp_attr_unintern_sub(&attr);
/* If peering is stopped due to some reason, do not generate BGP
event. */
if (peer->status != Established)
return 0;
/* Increment packet counter. */
peer->update_in++;
peer->update_time = bgp_clock();
/* Rearm holdtime timer */
BGP_TIMER_OFF(peer->t_holdtime);
bgp_timer_set(peer);
return 0;
2002-12-13 21:15:29 +01:00
}
/* Notify message treatment function. */
static void bgp_notify_receive(struct peer *peer, bgp_size_t size)
2002-12-13 21:15:29 +01:00
{
struct bgp_notify bgp_notify;
if (peer->notify.data) {
XFREE(MTYPE_TMP, peer->notify.data);
peer->notify.data = NULL;
peer->notify.length = 0;
}
bgp_notify.code = stream_getc(peer->curr);
bgp_notify.subcode = stream_getc(peer->curr);
bgp_notify.length = size - 2;
bgp_notify.data = NULL;
/* Preserv notify code and sub code. */
peer->notify.code = bgp_notify.code;
peer->notify.subcode = bgp_notify.subcode;
/* For further diagnostic record returned Data. */
if (bgp_notify.length) {
peer->notify.length = size - 2;
peer->notify.data = XMALLOC(MTYPE_TMP, size - 2);
memcpy(peer->notify.data, stream_pnt(peer->curr), size - 2);
}
/* For debug */
{
int i;
int first = 0;
char c[4];
if (bgp_notify.length) {
bgp_notify.data =
XMALLOC(MTYPE_TMP, bgp_notify.length * 3);
for (i = 0; i < bgp_notify.length; i++)
if (first) {
sprintf(c, " %02x",
stream_getc(peer->curr));
strcat(bgp_notify.data, c);
} else {
first = 1;
sprintf(c, "%02x",
stream_getc(peer->curr));
strcpy(bgp_notify.data, c);
}
bgp_notify.raw_data = (u_char *)peer->notify.data;
}
bgp_notify_print(peer, &bgp_notify, "received");
if (bgp_notify.data) {
XFREE(MTYPE_TMP, bgp_notify.data);
bgp_notify.data = NULL;
bgp_notify.length = 0;
}
}
/* peer count update */
peer->notify_in++;
peer->last_reset = PEER_DOWN_NOTIFY_RECEIVED;
/* We have to check for Notify with Unsupported Optional Parameter.
in that case we fallback to open without the capability option.
But this done in bgp_stop. We just mark it here to avoid changing
the fsm tables. */
if (bgp_notify.code == BGP_NOTIFY_OPEN_ERR
&& bgp_notify.subcode == BGP_NOTIFY_OPEN_UNSUP_PARAM)
UNSET_FLAG(peer->sflags, PEER_STATUS_CAPABILITY_OPEN);
BGP_EVENT_ADD(peer, Receive_NOTIFICATION_message);
2002-12-13 21:15:29 +01:00
}
/* Keepalive treatment function -- get keepalive send keepalive */
static void bgp_keepalive_receive(struct peer *peer, bgp_size_t size)
2002-12-13 21:15:29 +01:00
{
if (bgp_debug_keepalive(peer))
zlog_debug("%s KEEPALIVE rcvd", peer->host);
BGP_EVENT_ADD(peer, Receive_KEEPALIVE_message);
2002-12-13 21:15:29 +01:00
}
/* Route refresh message is received. */
static void bgp_route_refresh_receive(struct peer *peer, bgp_size_t size)
2002-12-13 21:15:29 +01:00
{
iana_afi_t pkt_afi;
afi_t afi;
iana_safi_t pkt_safi;
safi_t safi;
struct stream *s;
struct peer_af *paf;
struct update_group *updgrp;
struct peer *updgrp_peer;
/* If peer does not have the capability, send notification. */
if (!CHECK_FLAG(peer->cap, PEER_CAP_REFRESH_ADV)) {
zlog_err("%s [Error] BGP route refresh is not enabled",
peer->host);
bgp_notify_send(peer, BGP_NOTIFY_HEADER_ERR,
BGP_NOTIFY_HEADER_BAD_MESTYPE);
return;
}
/* Status must be Established. */
if (peer->status != Established) {
zlog_err(
"%s [Error] Route refresh packet received under status %s",
peer->host,
lookup_msg(bgp_status_msg, peer->status, NULL));
bgp_notify_send(peer, BGP_NOTIFY_FSM_ERR, 0);
return;
}
s = peer->curr;
/* Parse packet. */
pkt_afi = stream_getw(s);
(void)stream_getc(s);
pkt_safi = stream_getc(s);
if (bgp_debug_update(peer, NULL, NULL, 0))
zlog_debug("%s rcvd REFRESH_REQ for afi/safi: %d/%d",
peer->host, pkt_afi, pkt_safi);
/* Convert AFI, SAFI to internal values and check. */
if (bgp_map_afi_safi_iana2int(pkt_afi, pkt_safi, &afi, &safi)) {
zlog_info(
"%s REFRESH_REQ for unrecognized afi/safi: %d/%d - ignored",
peer->host, pkt_afi, pkt_safi);
return;
}
if (size != BGP_MSG_ROUTE_REFRESH_MIN_SIZE - BGP_HEADER_SIZE) {
u_char *end;
u_char when_to_refresh;
u_char orf_type;
u_int16_t orf_len;
if (size - (BGP_MSG_ROUTE_REFRESH_MIN_SIZE - BGP_HEADER_SIZE)
< 5) {
zlog_info("%s ORF route refresh length error",
peer->host);
bgp_notify_send(peer, BGP_NOTIFY_CEASE, 0);
return;
2002-12-13 21:15:29 +01:00
}
when_to_refresh = stream_getc(s);
end = stream_pnt(s) + (size - 5);
while ((stream_pnt(s) + 2) < end) {
orf_type = stream_getc(s);
orf_len = stream_getw(s);
/* orf_len in bounds? */
if ((stream_pnt(s) + orf_len) > end)
break; /* XXX: Notify instead?? */
if (orf_type == ORF_TYPE_PREFIX
|| orf_type == ORF_TYPE_PREFIX_OLD) {
uint8_t *p_pnt = stream_pnt(s);
uint8_t *p_end = stream_pnt(s) + orf_len;
struct orf_prefix orfp;
u_char common = 0;
u_int32_t seq;
int psize;
char name[BUFSIZ];
int ret = CMD_SUCCESS;
if (bgp_debug_neighbor_events(peer)) {
zlog_debug(
"%s rcvd Prefixlist ORF(%d) length %d",
peer->host, orf_type, orf_len);
}
/* we're going to read at least 1 byte of common
* ORF header,
* and 7 bytes of ORF Address-filter entry from
* the stream
*/
if (orf_len < 7)
break;
/* ORF prefix-list name */
sprintf(name, "%s.%d.%d", peer->host, afi,
safi);
while (p_pnt < p_end) {
/* If the ORF entry is malformed, want
* to read as much of it
* as possible without going beyond the
* bounds of the entry,
* to maximise debug information.
*/
int ok;
memset(&orfp, 0,
sizeof(struct orf_prefix));
common = *p_pnt++;
/* after ++: p_pnt <= p_end */
if (common
& ORF_COMMON_PART_REMOVE_ALL) {
if (bgp_debug_neighbor_events(
peer))
zlog_debug(
"%s rcvd Remove-All pfxlist ORF request",
peer->host);
prefix_bgp_orf_remove_all(afi,
name);
break;
}
ok = ((u_int32_t)(p_end - p_pnt)
>= sizeof(u_int32_t));
if (ok) {
memcpy(&seq, p_pnt,
sizeof(u_int32_t));
p_pnt += sizeof(u_int32_t);
orfp.seq = ntohl(seq);
} else
p_pnt = p_end;
if ((ok = (p_pnt < p_end)))
orfp.ge =
*p_pnt++; /* value
checked in
prefix_bgp_orf_set()
*/
if ((ok = (p_pnt < p_end)))
orfp.le =
*p_pnt++; /* value
checked in
prefix_bgp_orf_set()
*/
if ((ok = (p_pnt < p_end)))
orfp.p.prefixlen = *p_pnt++;
orfp.p.family = afi2family(
afi); /* afi checked already */
psize = PSIZE(
orfp.p.prefixlen); /* 0 if not
ok */
if (psize
> prefix_blen(
&orfp.p)) /* valid for
family ? */
{
ok = 0;
psize = prefix_blen(&orfp.p);
}
if (psize
> (p_end - p_pnt)) /* valid for
packet ? */
{
ok = 0;
psize = p_end - p_pnt;
}
if (psize > 0)
memcpy(&orfp.p.u.prefix, p_pnt,
psize);
p_pnt += psize;
if (bgp_debug_neighbor_events(peer)) {
char buf[INET6_BUFSIZ];
zlog_debug(
"%s rcvd %s %s seq %u %s/%d ge %d le %d%s",
peer->host,
(common & ORF_COMMON_PART_REMOVE
? "Remove"
: "Add"),
(common & ORF_COMMON_PART_DENY
? "deny"
: "permit"),
orfp.seq,
inet_ntop(
orfp.p.family,
&orfp.p.u.prefix,
buf,
INET6_BUFSIZ),
orfp.p.prefixlen,
orfp.ge, orfp.le,
ok ? "" : " MALFORMED");
}
if (ok)
ret = prefix_bgp_orf_set(
name, afi, &orfp,
(common & ORF_COMMON_PART_DENY
? 0
: 1),
(common & ORF_COMMON_PART_REMOVE
? 0
: 1));
if (!ok || (ok && ret != CMD_SUCCESS)) {
zlog_info(
"%s Received misformatted prefixlist ORF."
" Remove All pfxlist",
peer->host);
prefix_bgp_orf_remove_all(afi,
name);
break;
}
}
peer->orf_plist[afi][safi] =
prefix_bgp_orf_lookup(afi, name);
}
stream_forward_getp(s, orf_len);
2002-12-13 21:15:29 +01:00
}
if (bgp_debug_neighbor_events(peer))
zlog_debug("%s rcvd Refresh %s ORF request", peer->host,
when_to_refresh == REFRESH_DEFER
? "Defer"
: "Immediate");
if (when_to_refresh == REFRESH_DEFER)
return;
}
/* First update is deferred until ORF or ROUTE-REFRESH is received */
if (CHECK_FLAG(peer->af_sflags[afi][safi],
PEER_STATUS_ORF_WAIT_REFRESH))
UNSET_FLAG(peer->af_sflags[afi][safi],
PEER_STATUS_ORF_WAIT_REFRESH);
paf = peer_af_find(peer, afi, safi);
if (paf && paf->subgroup) {
if (peer->orf_plist[afi][safi]) {
updgrp = PAF_UPDGRP(paf);
updgrp_peer = UPDGRP_PEER(updgrp);
updgrp_peer->orf_plist[afi][safi] =
peer->orf_plist[afi][safi];
}
/* If the peer is configured for default-originate clear the
* SUBGRP_STATUS_DEFAULT_ORIGINATE flag so that we will
* re-advertise the
* default
*/
if (CHECK_FLAG(paf->subgroup->sflags,
SUBGRP_STATUS_DEFAULT_ORIGINATE))
UNSET_FLAG(paf->subgroup->sflags,
SUBGRP_STATUS_DEFAULT_ORIGINATE);
2002-12-13 21:15:29 +01:00
}
/* Perform route refreshment to the peer */
bgp_announce_route(peer, afi, safi);
2002-12-13 21:15:29 +01:00
}
static int bgp_capability_msg_parse(struct peer *peer, u_char *pnt,
bgp_size_t length)
2002-12-13 21:15:29 +01:00
{
u_char *end;
struct capability_mp_data mpc;
struct capability_header *hdr;
u_char action;
iana_afi_t pkt_afi;
afi_t afi;
iana_safi_t pkt_safi;
safi_t safi;
end = pnt + length;
while (pnt < end) {
/* We need at least action, capability code and capability
* length. */
if (pnt + 3 > end) {
zlog_info("%s Capability length error", peer->host);
bgp_notify_send(peer, BGP_NOTIFY_CEASE, 0);
return -1;
}
action = *pnt;
hdr = (struct capability_header *)(pnt + 1);
/* Action value check. */
if (action != CAPABILITY_ACTION_SET
&& action != CAPABILITY_ACTION_UNSET) {
zlog_info("%s Capability Action Value error %d",
peer->host, action);
bgp_notify_send(peer, BGP_NOTIFY_CEASE, 0);
return -1;
}
if (bgp_debug_neighbor_events(peer))
zlog_debug(
"%s CAPABILITY has action: %d, code: %u, length %u",
peer->host, action, hdr->code, hdr->length);
/* Capability length check. */
if ((pnt + hdr->length + 3) > end) {
zlog_info("%s Capability length error", peer->host);
bgp_notify_send(peer, BGP_NOTIFY_CEASE, 0);
return -1;
}
/* Fetch structure to the byte stream. */
memcpy(&mpc, pnt + 3, sizeof(struct capability_mp_data));
pnt += hdr->length + 3;
/* We know MP Capability Code. */
if (hdr->code == CAPABILITY_CODE_MP) {
pkt_afi = ntohs(mpc.afi);
pkt_safi = mpc.safi;
/* Ignore capability when override-capability is set. */
if (CHECK_FLAG(peer->flags,
PEER_FLAG_OVERRIDE_CAPABILITY))
continue;
/* Convert AFI, SAFI to internal values. */
if (bgp_map_afi_safi_iana2int(pkt_afi, pkt_safi, &afi,
&safi)) {
if (bgp_debug_neighbor_events(peer))
zlog_debug(
"%s Dynamic Capability MP_EXT afi/safi invalid "
"(%u/%u)",
peer->host, pkt_afi, pkt_safi);
continue;
}
/* Address family check. */
if (bgp_debug_neighbor_events(peer))
zlog_debug(
"%s CAPABILITY has %s MP_EXT CAP for afi/safi: %u/%u",
peer->host,
action == CAPABILITY_ACTION_SET
? "Advertising"
: "Removing",
pkt_afi, pkt_safi);
if (action == CAPABILITY_ACTION_SET) {
peer->afc_recv[afi][safi] = 1;
if (peer->afc[afi][safi]) {
peer->afc_nego[afi][safi] = 1;
bgp_announce_route(peer, afi, safi);
}
} else {
peer->afc_recv[afi][safi] = 0;
peer->afc_nego[afi][safi] = 0;
if (peer_active_nego(peer))
bgp_clear_route(peer, afi, safi);
else
BGP_EVENT_ADD(peer, BGP_Stop);
}
} else {
zlog_warn(
"%s unrecognized capability code: %d - ignored",
peer->host, hdr->code);
}
}
return 0;
2002-12-13 21:15:29 +01:00
}
/* Dynamic Capability is received.
*
* This is exported for unit-test purposes
*/
int bgp_capability_receive(struct peer *peer, bgp_size_t size)
2002-12-13 21:15:29 +01:00
{
u_char *pnt;
/* Fetch pointer. */
pnt = stream_pnt(peer->curr);
if (bgp_debug_neighbor_events(peer))
zlog_debug("%s rcv CAPABILITY", peer->host);
/* If peer does not have the capability, send notification. */
if (!CHECK_FLAG(peer->cap, PEER_CAP_DYNAMIC_ADV)) {
zlog_err("%s [Error] BGP dynamic capability is not enabled",
peer->host);
bgp_notify_send(peer, BGP_NOTIFY_HEADER_ERR,
BGP_NOTIFY_HEADER_BAD_MESTYPE);
return -1;
}
/* Status must be Established. */
if (peer->status != Established) {
zlog_err(
"%s [Error] Dynamic capability packet received under status %s",
peer->host,
lookup_msg(bgp_status_msg, peer->status, NULL));
bgp_notify_send(peer, BGP_NOTIFY_FSM_ERR, 0);
return -1;
}
/* Parse packet. */
return bgp_capability_msg_parse(peer, pnt, size);
2002-12-13 21:15:29 +01:00
}
/* Starting point of packet process function. */
int bgp_process_packet(struct thread *thread)
2002-12-13 21:15:29 +01:00
{
/* Yes first of all get peer pointer. */
struct peer *peer;
peer = THREAD_ARG(thread);
2002-12-13 21:15:29 +01:00
/* Guard against scheduled events that occur after peer deletion. */
if (peer->status == Deleted || peer->status == Clearing)
return 0;
2002-12-13 21:15:29 +01:00
int processed = 0;
while (processed < 5 && peer->ibuf->count > 0) {
u_char type = 0;
bgp_size_t size;
char notify_data_length[2];
u_int32_t notify_out;
/* Note notify_out so we can check later to see if we sent
* another one */
notify_out = peer->notify_out;
pthread_mutex_lock(&peer->io_mtx);
{
peer->curr = stream_fifo_pop(peer->ibuf);
}
pthread_mutex_unlock(&peer->io_mtx);
if (peer->curr == NULL) // no packets to process, hmm...
return 0;
bgp_size_t actual_size = stream_get_endp(peer->curr);
/* skip the marker and copy the packet length */
stream_forward_getp(peer->curr, BGP_MARKER_SIZE);
memcpy(notify_data_length, stream_pnt(peer->curr), 2);
/* read in the packet length and type */
size = stream_getw(peer->curr);
type = stream_getc(peer->curr);
/* BGP packet dump function. */
bgp_dump_packet(peer, type, peer->curr);
/* adjust size to exclude the marker + length + type */
size -= BGP_HEADER_SIZE;
/* Read rest of the packet and call each sort of packet routine
*/
switch (type) {
case BGP_MSG_OPEN:
peer->open_in++;
bgp_open_receive(peer,
size); /* XXX return value ignored! */
break;
case BGP_MSG_UPDATE:
peer->readtime = monotime(NULL);
bgp_update_receive(peer, size);
break;
case BGP_MSG_NOTIFY:
bgp_notify_receive(peer, size);
break;
case BGP_MSG_KEEPALIVE:
peer->readtime = monotime(NULL);
bgp_keepalive_receive(peer, size);
break;
case BGP_MSG_ROUTE_REFRESH_NEW:
case BGP_MSG_ROUTE_REFRESH_OLD:
peer->refresh_in++;
bgp_route_refresh_receive(peer, size);
break;
case BGP_MSG_CAPABILITY:
peer->dynamic_cap_in++;
bgp_capability_receive(peer, size);
break;
}
/* If reading this packet caused us to send a NOTIFICATION then
* store a copy
* of the packet for troubleshooting purposes
*/
if (notify_out < peer->notify_out) {
memcpy(peer->last_reset_cause, peer->curr->data,
actual_size);
peer->last_reset_cause_size = actual_size;
}
/* Delete packet and carry on. */
if (peer->curr) {
stream_free(peer->curr);
peer->curr = NULL;
processed++;
}
}
if (peer->ibuf->count > 0) { // more work to do, come back later
thread_add_background(bm->master, bgp_process_packet, peer, 0,
&peer->t_process_packet);
2002-12-13 21:15:29 +01:00
}
return 0;
2002-12-13 21:15:29 +01:00
}