bgpd: implement SendHoldTimer

As described by
https://www.ietf.org/archive/id/draft-spaghetti-idr-bgp-sendholdtimer-04.html

Since this replicates the HoldTime check on the receiver that is already
part of the protocol, I do not believe it necessary to wait for IETF
progress on this draft.  It's just replicating an existing element of
the protocol at the other side of the session.

Signed-off-by: David Lamparter <equinox@opensourcerouting.org>
This commit is contained in:
David Lamparter 2021-04-22 11:04:52 +02:00
parent 18028bdb9b
commit bd9fb6f368
5 changed files with 63 additions and 3 deletions

View file

@ -181,6 +181,12 @@ static struct log_ref ferr_bgp_warn[] = {
.description = "BGP is in the process of building NLRI information for a peer and has discovered an inconsistent internal state",
.suggestion = "Gather log files and open an Issue, restart FRR",
},
{
.code = EC_BGP_SENDQ_STUCK_WARN,
.title = "BGP has been unable to send anything to a peer for an extended time",
.description = "The BGP peer does not seem to be receiving or processing any data received from us, causing updates to be delayed.",
.suggestion = "Check connectivity to the peer and that it is not overloaded",
},
{
.code = END_FERR,
}
@ -480,6 +486,12 @@ static struct log_ref ferr_bgp_err[] = {
.description = "BGP when using a v6 peer requires a v6 LL address to be configured on the outgoing interface as per RFC 4291 section 2.1",
.suggestion = "Add a v6 LL address to the outgoing interfaces as per RFC",
},
{
.code = EC_BGP_SENDQ_STUCK_PROPER,
.title = "BGP is shutting down a peer due to being unable to send anything for an extended time",
.description = "No BGP updates were successfully sent to the peer for more than twice the holdtime.",
.suggestion = "Check connectivity to the peer and that it is not overloaded",
},
{
.code = END_FERR,
}

View file

@ -102,6 +102,8 @@ enum bgp_log_refs {
EC_BGP_INVALID_BGP_INSTANCE,
EC_BGP_INVALID_ROUTE,
EC_BGP_NO_LL_ADDRESS_AVAILABLE,
EC_BGP_SENDQ_STUCK_WARN,
EC_BGP_SENDQ_STUCK_PROPER,
};
extern void bgp_error_init(void);

View file

@ -298,6 +298,7 @@ static uint16_t bgp_write(struct peer *peer)
unsigned int iovsz;
unsigned int strmsz;
unsigned int total_written;
time_t now;
wpkt_quanta_old = atomic_load_explicit(&peer->bgp->wpkt_quanta,
memory_order_relaxed);
@ -430,19 +431,22 @@ static uint16_t bgp_write(struct peer *peer)
}
done : {
now = bgp_clock();
/*
* Update last_update if UPDATEs were written.
* Note: that these are only updated at end,
* not per message (i.e., per loop)
*/
if (uo)
atomic_store_explicit(&peer->last_update, bgp_clock(),
atomic_store_explicit(&peer->last_update, now,
memory_order_relaxed);
/* If we TXed any flavor of packet */
if (update_last_write)
atomic_store_explicit(&peer->last_write, bgp_clock(),
if (update_last_write) {
atomic_store_explicit(&peer->last_write, now,
memory_order_relaxed);
peer->last_sendq_ok = now;
}
}
return status;

View file

@ -122,8 +122,45 @@ void bgp_packet_set_size(struct stream *s)
*/
static void bgp_packet_add(struct peer *peer, struct stream *s)
{
intmax_t delta;
uint32_t holdtime;
frr_with_mutex(&peer->io_mtx) {
/* if the queue is empty, reset the "last OK" timestamp to
* now, otherwise if we write another packet immediately
* after it'll get confused
*/
if (!stream_fifo_count_safe(peer->obuf))
peer->last_sendq_ok = bgp_clock();
stream_fifo_push(peer->obuf, s);
delta = bgp_clock() - peer->last_sendq_ok;
holdtime = atomic_load_explicit(&peer->holdtime,
memory_order_relaxed);
/* Note that when we're here, we're adding some packet to the
* OutQ. That includes keepalives when there is nothing to
* do, so there's a guarantee we pass by here once in a while.
*
* That implies there is no need to go set up another separate
* timer that ticks down SendHoldTime, as we'll be here sooner
* or later anyway and will see the checks below failing.
*/
if (delta > 2 * (intmax_t)holdtime) {
flog_err(
EC_BGP_SENDQ_STUCK_PROPER,
"%s has not made any SendQ progress for 2 holdtimes, terminating session",
peer->host);
BGP_EVENT_ADD(peer, TCP_fatal_error);
} else if (delta > (intmax_t)holdtime &&
bgp_clock() - peer->last_sendq_warn > 5) {
flog_warn(
EC_BGP_SENDQ_STUCK_WARN,
"%s has not made any SendQ progress for 1 holdtime, peer overloaded?",
peer->host);
peer->last_sendq_warn = bgp_clock();
}
}
}

View file

@ -1533,6 +1533,11 @@ struct peer {
/* timestamp when the last msg was written */
_Atomic time_t last_update;
/* only updated under io_mtx.
* last_sendq_warn is only for ratelimiting log warning messages.
*/
time_t last_sendq_ok, last_sendq_warn;
/* Notify data. */
struct bgp_notify notify;