forked from Mirror/frr
bgpd: implement SendHoldTimer
As described by https://www.ietf.org/archive/id/draft-spaghetti-idr-bgp-sendholdtimer-04.html Since this replicates the HoldTime check on the receiver that is already part of the protocol, I do not believe it necessary to wait for IETF progress on this draft. It's just replicating an existing element of the protocol at the other side of the session. Signed-off-by: David Lamparter <equinox@opensourcerouting.org>
This commit is contained in:
parent
18028bdb9b
commit
bd9fb6f368
|
@ -181,6 +181,12 @@ static struct log_ref ferr_bgp_warn[] = {
|
|||
.description = "BGP is in the process of building NLRI information for a peer and has discovered an inconsistent internal state",
|
||||
.suggestion = "Gather log files and open an Issue, restart FRR",
|
||||
},
|
||||
{
|
||||
.code = EC_BGP_SENDQ_STUCK_WARN,
|
||||
.title = "BGP has been unable to send anything to a peer for an extended time",
|
||||
.description = "The BGP peer does not seem to be receiving or processing any data received from us, causing updates to be delayed.",
|
||||
.suggestion = "Check connectivity to the peer and that it is not overloaded",
|
||||
},
|
||||
{
|
||||
.code = END_FERR,
|
||||
}
|
||||
|
@ -480,6 +486,12 @@ static struct log_ref ferr_bgp_err[] = {
|
|||
.description = "BGP when using a v6 peer requires a v6 LL address to be configured on the outgoing interface as per RFC 4291 section 2.1",
|
||||
.suggestion = "Add a v6 LL address to the outgoing interfaces as per RFC",
|
||||
},
|
||||
{
|
||||
.code = EC_BGP_SENDQ_STUCK_PROPER,
|
||||
.title = "BGP is shutting down a peer due to being unable to send anything for an extended time",
|
||||
.description = "No BGP updates were successfully sent to the peer for more than twice the holdtime.",
|
||||
.suggestion = "Check connectivity to the peer and that it is not overloaded",
|
||||
},
|
||||
{
|
||||
.code = END_FERR,
|
||||
}
|
||||
|
|
|
@ -102,6 +102,8 @@ enum bgp_log_refs {
|
|||
EC_BGP_INVALID_BGP_INSTANCE,
|
||||
EC_BGP_INVALID_ROUTE,
|
||||
EC_BGP_NO_LL_ADDRESS_AVAILABLE,
|
||||
EC_BGP_SENDQ_STUCK_WARN,
|
||||
EC_BGP_SENDQ_STUCK_PROPER,
|
||||
};
|
||||
|
||||
extern void bgp_error_init(void);
|
||||
|
|
|
@ -298,6 +298,7 @@ static uint16_t bgp_write(struct peer *peer)
|
|||
unsigned int iovsz;
|
||||
unsigned int strmsz;
|
||||
unsigned int total_written;
|
||||
time_t now;
|
||||
|
||||
wpkt_quanta_old = atomic_load_explicit(&peer->bgp->wpkt_quanta,
|
||||
memory_order_relaxed);
|
||||
|
@ -430,19 +431,22 @@ static uint16_t bgp_write(struct peer *peer)
|
|||
}
|
||||
|
||||
done : {
|
||||
now = bgp_clock();
|
||||
/*
|
||||
* Update last_update if UPDATEs were written.
|
||||
* Note: that these are only updated at end,
|
||||
* not per message (i.e., per loop)
|
||||
*/
|
||||
if (uo)
|
||||
atomic_store_explicit(&peer->last_update, bgp_clock(),
|
||||
atomic_store_explicit(&peer->last_update, now,
|
||||
memory_order_relaxed);
|
||||
|
||||
/* If we TXed any flavor of packet */
|
||||
if (update_last_write)
|
||||
atomic_store_explicit(&peer->last_write, bgp_clock(),
|
||||
if (update_last_write) {
|
||||
atomic_store_explicit(&peer->last_write, now,
|
||||
memory_order_relaxed);
|
||||
peer->last_sendq_ok = now;
|
||||
}
|
||||
}
|
||||
|
||||
return status;
|
||||
|
|
|
@ -122,8 +122,45 @@ void bgp_packet_set_size(struct stream *s)
|
|||
*/
|
||||
static void bgp_packet_add(struct peer *peer, struct stream *s)
|
||||
{
|
||||
intmax_t delta;
|
||||
uint32_t holdtime;
|
||||
|
||||
frr_with_mutex(&peer->io_mtx) {
|
||||
/* if the queue is empty, reset the "last OK" timestamp to
|
||||
* now, otherwise if we write another packet immediately
|
||||
* after it'll get confused
|
||||
*/
|
||||
if (!stream_fifo_count_safe(peer->obuf))
|
||||
peer->last_sendq_ok = bgp_clock();
|
||||
|
||||
stream_fifo_push(peer->obuf, s);
|
||||
|
||||
delta = bgp_clock() - peer->last_sendq_ok;
|
||||
holdtime = atomic_load_explicit(&peer->holdtime,
|
||||
memory_order_relaxed);
|
||||
|
||||
/* Note that when we're here, we're adding some packet to the
|
||||
* OutQ. That includes keepalives when there is nothing to
|
||||
* do, so there's a guarantee we pass by here once in a while.
|
||||
*
|
||||
* That implies there is no need to go set up another separate
|
||||
* timer that ticks down SendHoldTime, as we'll be here sooner
|
||||
* or later anyway and will see the checks below failing.
|
||||
*/
|
||||
if (delta > 2 * (intmax_t)holdtime) {
|
||||
flog_err(
|
||||
EC_BGP_SENDQ_STUCK_PROPER,
|
||||
"%s has not made any SendQ progress for 2 holdtimes, terminating session",
|
||||
peer->host);
|
||||
BGP_EVENT_ADD(peer, TCP_fatal_error);
|
||||
} else if (delta > (intmax_t)holdtime &&
|
||||
bgp_clock() - peer->last_sendq_warn > 5) {
|
||||
flog_warn(
|
||||
EC_BGP_SENDQ_STUCK_WARN,
|
||||
"%s has not made any SendQ progress for 1 holdtime, peer overloaded?",
|
||||
peer->host);
|
||||
peer->last_sendq_warn = bgp_clock();
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
@ -1533,6 +1533,11 @@ struct peer {
|
|||
/* timestamp when the last msg was written */
|
||||
_Atomic time_t last_update;
|
||||
|
||||
/* only updated under io_mtx.
|
||||
* last_sendq_warn is only for ratelimiting log warning messages.
|
||||
*/
|
||||
time_t last_sendq_ok, last_sendq_warn;
|
||||
|
||||
/* Notify data. */
|
||||
struct bgp_notify notify;
|
||||
|
||||
|
|
Loading…
Reference in a new issue