forked from Mirror/frr
bgpd: implement SendHoldTimer
As described by https://www.ietf.org/archive/id/draft-spaghetti-idr-bgp-sendholdtimer-04.html Since this replicates the HoldTime check on the receiver that is already part of the protocol, I do not believe it necessary to wait for IETF progress on this draft. It's just replicating an existing element of the protocol at the other side of the session. Signed-off-by: David Lamparter <equinox@opensourcerouting.org>
This commit is contained in:
parent
18028bdb9b
commit
bd9fb6f368
|
@ -181,6 +181,12 @@ static struct log_ref ferr_bgp_warn[] = {
|
||||||
.description = "BGP is in the process of building NLRI information for a peer and has discovered an inconsistent internal state",
|
.description = "BGP is in the process of building NLRI information for a peer and has discovered an inconsistent internal state",
|
||||||
.suggestion = "Gather log files and open an Issue, restart FRR",
|
.suggestion = "Gather log files and open an Issue, restart FRR",
|
||||||
},
|
},
|
||||||
|
{
|
||||||
|
.code = EC_BGP_SENDQ_STUCK_WARN,
|
||||||
|
.title = "BGP has been unable to send anything to a peer for an extended time",
|
||||||
|
.description = "The BGP peer does not seem to be receiving or processing any data received from us, causing updates to be delayed.",
|
||||||
|
.suggestion = "Check connectivity to the peer and that it is not overloaded",
|
||||||
|
},
|
||||||
{
|
{
|
||||||
.code = END_FERR,
|
.code = END_FERR,
|
||||||
}
|
}
|
||||||
|
@ -480,6 +486,12 @@ static struct log_ref ferr_bgp_err[] = {
|
||||||
.description = "BGP when using a v6 peer requires a v6 LL address to be configured on the outgoing interface as per RFC 4291 section 2.1",
|
.description = "BGP when using a v6 peer requires a v6 LL address to be configured on the outgoing interface as per RFC 4291 section 2.1",
|
||||||
.suggestion = "Add a v6 LL address to the outgoing interfaces as per RFC",
|
.suggestion = "Add a v6 LL address to the outgoing interfaces as per RFC",
|
||||||
},
|
},
|
||||||
|
{
|
||||||
|
.code = EC_BGP_SENDQ_STUCK_PROPER,
|
||||||
|
.title = "BGP is shutting down a peer due to being unable to send anything for an extended time",
|
||||||
|
.description = "No BGP updates were successfully sent to the peer for more than twice the holdtime.",
|
||||||
|
.suggestion = "Check connectivity to the peer and that it is not overloaded",
|
||||||
|
},
|
||||||
{
|
{
|
||||||
.code = END_FERR,
|
.code = END_FERR,
|
||||||
}
|
}
|
||||||
|
|
|
@ -102,6 +102,8 @@ enum bgp_log_refs {
|
||||||
EC_BGP_INVALID_BGP_INSTANCE,
|
EC_BGP_INVALID_BGP_INSTANCE,
|
||||||
EC_BGP_INVALID_ROUTE,
|
EC_BGP_INVALID_ROUTE,
|
||||||
EC_BGP_NO_LL_ADDRESS_AVAILABLE,
|
EC_BGP_NO_LL_ADDRESS_AVAILABLE,
|
||||||
|
EC_BGP_SENDQ_STUCK_WARN,
|
||||||
|
EC_BGP_SENDQ_STUCK_PROPER,
|
||||||
};
|
};
|
||||||
|
|
||||||
extern void bgp_error_init(void);
|
extern void bgp_error_init(void);
|
||||||
|
|
|
@ -298,6 +298,7 @@ static uint16_t bgp_write(struct peer *peer)
|
||||||
unsigned int iovsz;
|
unsigned int iovsz;
|
||||||
unsigned int strmsz;
|
unsigned int strmsz;
|
||||||
unsigned int total_written;
|
unsigned int total_written;
|
||||||
|
time_t now;
|
||||||
|
|
||||||
wpkt_quanta_old = atomic_load_explicit(&peer->bgp->wpkt_quanta,
|
wpkt_quanta_old = atomic_load_explicit(&peer->bgp->wpkt_quanta,
|
||||||
memory_order_relaxed);
|
memory_order_relaxed);
|
||||||
|
@ -430,19 +431,22 @@ static uint16_t bgp_write(struct peer *peer)
|
||||||
}
|
}
|
||||||
|
|
||||||
done : {
|
done : {
|
||||||
|
now = bgp_clock();
|
||||||
/*
|
/*
|
||||||
* Update last_update if UPDATEs were written.
|
* Update last_update if UPDATEs were written.
|
||||||
* Note: that these are only updated at end,
|
* Note: that these are only updated at end,
|
||||||
* not per message (i.e., per loop)
|
* not per message (i.e., per loop)
|
||||||
*/
|
*/
|
||||||
if (uo)
|
if (uo)
|
||||||
atomic_store_explicit(&peer->last_update, bgp_clock(),
|
atomic_store_explicit(&peer->last_update, now,
|
||||||
memory_order_relaxed);
|
memory_order_relaxed);
|
||||||
|
|
||||||
/* If we TXed any flavor of packet */
|
/* If we TXed any flavor of packet */
|
||||||
if (update_last_write)
|
if (update_last_write) {
|
||||||
atomic_store_explicit(&peer->last_write, bgp_clock(),
|
atomic_store_explicit(&peer->last_write, now,
|
||||||
memory_order_relaxed);
|
memory_order_relaxed);
|
||||||
|
peer->last_sendq_ok = now;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
return status;
|
return status;
|
||||||
|
|
|
@ -122,8 +122,45 @@ void bgp_packet_set_size(struct stream *s)
|
||||||
*/
|
*/
|
||||||
static void bgp_packet_add(struct peer *peer, struct stream *s)
|
static void bgp_packet_add(struct peer *peer, struct stream *s)
|
||||||
{
|
{
|
||||||
|
intmax_t delta;
|
||||||
|
uint32_t holdtime;
|
||||||
|
|
||||||
frr_with_mutex(&peer->io_mtx) {
|
frr_with_mutex(&peer->io_mtx) {
|
||||||
|
/* if the queue is empty, reset the "last OK" timestamp to
|
||||||
|
* now, otherwise if we write another packet immediately
|
||||||
|
* after it'll get confused
|
||||||
|
*/
|
||||||
|
if (!stream_fifo_count_safe(peer->obuf))
|
||||||
|
peer->last_sendq_ok = bgp_clock();
|
||||||
|
|
||||||
stream_fifo_push(peer->obuf, s);
|
stream_fifo_push(peer->obuf, s);
|
||||||
|
|
||||||
|
delta = bgp_clock() - peer->last_sendq_ok;
|
||||||
|
holdtime = atomic_load_explicit(&peer->holdtime,
|
||||||
|
memory_order_relaxed);
|
||||||
|
|
||||||
|
/* Note that when we're here, we're adding some packet to the
|
||||||
|
* OutQ. That includes keepalives when there is nothing to
|
||||||
|
* do, so there's a guarantee we pass by here once in a while.
|
||||||
|
*
|
||||||
|
* That implies there is no need to go set up another separate
|
||||||
|
* timer that ticks down SendHoldTime, as we'll be here sooner
|
||||||
|
* or later anyway and will see the checks below failing.
|
||||||
|
*/
|
||||||
|
if (delta > 2 * (intmax_t)holdtime) {
|
||||||
|
flog_err(
|
||||||
|
EC_BGP_SENDQ_STUCK_PROPER,
|
||||||
|
"%s has not made any SendQ progress for 2 holdtimes, terminating session",
|
||||||
|
peer->host);
|
||||||
|
BGP_EVENT_ADD(peer, TCP_fatal_error);
|
||||||
|
} else if (delta > (intmax_t)holdtime &&
|
||||||
|
bgp_clock() - peer->last_sendq_warn > 5) {
|
||||||
|
flog_warn(
|
||||||
|
EC_BGP_SENDQ_STUCK_WARN,
|
||||||
|
"%s has not made any SendQ progress for 1 holdtime, peer overloaded?",
|
||||||
|
peer->host);
|
||||||
|
peer->last_sendq_warn = bgp_clock();
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -1533,6 +1533,11 @@ struct peer {
|
||||||
/* timestamp when the last msg was written */
|
/* timestamp when the last msg was written */
|
||||||
_Atomic time_t last_update;
|
_Atomic time_t last_update;
|
||||||
|
|
||||||
|
/* only updated under io_mtx.
|
||||||
|
* last_sendq_warn is only for ratelimiting log warning messages.
|
||||||
|
*/
|
||||||
|
time_t last_sendq_ok, last_sendq_warn;
|
||||||
|
|
||||||
/* Notify data. */
|
/* Notify data. */
|
||||||
struct bgp_notify notify;
|
struct bgp_notify notify;
|
||||||
|
|
||||||
|
|
Loading…
Reference in a new issue