Merge pull request #18450 from donaldsharp/bgp_packet_reads

Bgp packet reads conversion to a FIFO
This commit is contained in:
Russ White 2025-04-01 10:12:37 -04:00 committed by GitHub
commit c312917988
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
19 changed files with 210 additions and 59 deletions

View file

@ -184,7 +184,11 @@ static struct peer *peer_xfer_conn(struct peer *from_peer)
EVENT_OFF(keeper->t_delayopen);
EVENT_OFF(keeper->t_connect_check_r);
EVENT_OFF(keeper->t_connect_check_w);
EVENT_OFF(keeper->t_process_packet);
frr_with_mutex (&bm->peer_connection_mtx) {
if (peer_connection_fifo_member(&bm->connection_fifo, keeper))
peer_connection_fifo_del(&bm->connection_fifo, keeper);
}
/*
* At this point in time, it is possible that there are packets pending
@ -305,8 +309,13 @@ static struct peer *peer_xfer_conn(struct peer *from_peer)
bgp_reads_on(keeper);
bgp_writes_on(keeper);
event_add_event(bm->master, bgp_process_packet, keeper, 0,
&keeper->t_process_packet);
frr_with_mutex (&bm->peer_connection_mtx) {
if (!peer_connection_fifo_member(&bm->connection_fifo, keeper)) {
peer_connection_fifo_add_tail(&bm->connection_fifo, keeper);
}
}
event_add_event(bm->master, bgp_process_packet, NULL, 0, &bm->e_process_packet);
return (peer);
}

View file

@ -99,7 +99,11 @@ void bgp_reads_off(struct peer_connection *connection)
assert(fpt->running);
event_cancel_async(fpt->master, &connection->t_read, NULL);
EVENT_OFF(connection->t_process_packet);
frr_with_mutex (&bm->peer_connection_mtx) {
if (peer_connection_fifo_member(&bm->connection_fifo, connection))
peer_connection_fifo_del(&bm->connection_fifo, connection);
}
UNSET_FLAG(connection->thread_flags, PEER_THREAD_READS_ON);
}
@ -292,9 +296,13 @@ done:
event_add_read(fpt->master, bgp_process_reads, connection,
connection->fd, &connection->t_read);
if (added_pkt)
event_add_event(bm->master, bgp_process_packet, connection, 0,
&connection->t_process_packet);
if (added_pkt) {
frr_with_mutex (&bm->peer_connection_mtx) {
if (!peer_connection_fifo_member(&bm->connection_fifo, connection))
peer_connection_fifo_add_tail(&bm->connection_fifo, connection);
}
event_add_event(bm->master, bgp_process_packet, NULL, 0, &bm->e_process_packet);
}
}
/*

View file

@ -10,6 +10,7 @@
#define BGP_WRITE_PACKET_MAX 64U
#define BGP_READ_PACKET_MAX 10U
#define BGP_PACKET_PROCESS_LIMIT 100
#include "bgpd/bgpd.h"
#include "frr_pthread.h"

View file

@ -161,6 +161,14 @@ __attribute__((__noreturn__)) void sigint(void)
bgp_exit(0);
/*
* This is being done after bgp_exit because items may be removed
* from the connection_fifo
*/
peer_connection_fifo_fini(&bm->connection_fifo);
EVENT_OFF(bm->e_process_packet);
pthread_mutex_destroy(&bm->peer_connection_mtx);
exit(0);
}

View file

@ -3974,6 +3974,18 @@ int bgp_capability_receive(struct peer_connection *connection,
* would not, making event flow difficult to understand. Please think twice
* before hacking this.
*
* packet_processing is now a FIFO of connections that need to be handled
* This loop has a maximum run of 100(BGP_PACKET_PROCESS_LIMIT) packets,
* but each individual connection can only handle the quanta value as
* specified in bgp_vty.c. If the connection still has work to do, place it
* back on the back of the queue for more work. Do note that event_should_yield
* is also being called to figure out if processing should stop and work
* picked up after other items can run. This was added *After* withdrawals
* started being processed at scale and this function was taking cpu for 40+ seconds
* On my machine we are getting 2-3 packets before a yield should happen in the
* update case. Withdrawal is 1 packet being processed(note this is a very very
* fast computer) before other items should be run.
*
* Thread type: EVENT_EVENT
* @param thread
* @return 0
@ -3986,30 +3998,54 @@ void bgp_process_packet(struct event *thread)
uint32_t rpkt_quanta_old; // how many packets to read
int fsm_update_result; // return code of bgp_event_update()
int mprc; // message processing return code
uint32_t processed = 0, curr_connection_processed = 0;
bool more_work = false;
size_t count;
uint32_t total_packets_to_process, total_processed = 0;
connection = EVENT_ARG(thread);
frr_with_mutex (&bm->peer_connection_mtx)
connection = peer_connection_fifo_pop(&bm->connection_fifo);
if (!connection)
goto done;
total_packets_to_process = BGP_PACKET_PROCESS_LIMIT;
peer = connection->peer;
rpkt_quanta_old = atomic_load_explicit(&peer->bgp->rpkt_quanta,
memory_order_relaxed);
fsm_update_result = 0;
/* Guard against scheduled events that occur after peer deletion. */
if (connection->status == Deleted || connection->status == Clearing)
return;
while ((processed < total_packets_to_process) && connection) {
total_processed++;
/* Guard against scheduled events that occur after peer deletion. */
if (connection->status == Deleted || connection->status == Clearing) {
frr_with_mutex (&bm->peer_connection_mtx)
connection = peer_connection_fifo_pop(&bm->connection_fifo);
unsigned int processed = 0;
if (connection)
peer = connection->peer;
continue;
}
while (processed < rpkt_quanta_old) {
uint8_t type = 0;
bgp_size_t size;
char notify_data_length[2];
frr_with_mutex (&connection->io_mtx) {
frr_with_mutex (&connection->io_mtx)
peer->curr = stream_fifo_pop(connection->ibuf);
}
if (peer->curr == NULL) // no packets to process, hmm...
return;
if (peer->curr == NULL) {
frr_with_mutex (&bm->peer_connection_mtx)
connection = peer_connection_fifo_pop(&bm->connection_fifo);
if (connection)
peer = connection->peer;
continue;
}
/* skip the marker and copy the packet length */
stream_forward_getp(peer->curr, BGP_MARKER_SIZE);
@ -4113,32 +4149,81 @@ void bgp_process_packet(struct event *thread)
stream_free(peer->curr);
peer->curr = NULL;
processed++;
curr_connection_processed++;
/* Update FSM */
if (mprc != BGP_PACKET_NOOP)
fsm_update_result = bgp_event_update(connection, mprc);
else
continue;
/*
* If peer was deleted, do not process any more packets. This
* is usually due to executing BGP_Stop or a stub deletion.
*/
if (fsm_update_result == FSM_PEER_TRANSFERRED
|| fsm_update_result == FSM_PEER_STOPPED)
break;
}
if (fsm_update_result == FSM_PEER_TRANSFERRED ||
fsm_update_result == FSM_PEER_STOPPED) {
frr_with_mutex (&bm->peer_connection_mtx)
connection = peer_connection_fifo_pop(&bm->connection_fifo);
if (connection)
peer = connection->peer;
continue;
}
bool yield = event_should_yield(thread);
if (curr_connection_processed >= rpkt_quanta_old || yield) {
curr_connection_processed = 0;
frr_with_mutex (&bm->peer_connection_mtx) {
if (!peer_connection_fifo_member(&bm->connection_fifo, connection))
peer_connection_fifo_add_tail(&bm->connection_fifo,
connection);
if (!yield)
connection = peer_connection_fifo_pop(&bm->connection_fifo);
else
connection = NULL;
}
if (connection)
peer = connection->peer;
continue;
}
if (fsm_update_result != FSM_PEER_TRANSFERRED
&& fsm_update_result != FSM_PEER_STOPPED) {
frr_with_mutex (&connection->io_mtx) {
// more work to do, come back later
if (connection->ibuf->count > 0)
event_add_event(bm->master, bgp_process_packet,
connection, 0,
&connection->t_process_packet);
more_work = true;
else
more_work = false;
}
if (!more_work) {
frr_with_mutex (&bm->peer_connection_mtx)
connection = peer_connection_fifo_pop(&bm->connection_fifo);
if (connection)
peer = connection->peer;
}
}
if (connection) {
frr_with_mutex (&connection->io_mtx) {
if (connection->ibuf->count > 0)
more_work = true;
else
more_work = false;
}
frr_with_mutex (&bm->peer_connection_mtx) {
if (more_work &&
!peer_connection_fifo_member(&bm->connection_fifo, connection))
peer_connection_fifo_add_tail(&bm->connection_fifo, connection);
}
}
done:
frr_with_mutex (&bm->peer_connection_mtx)
count = peer_connection_fifo_count(&bm->connection_fifo);
if (count)
event_add_event(bm->master, bgp_process_packet, NULL, 0, &bm->e_process_packet);
}
/* Send EOR when routes are processed by selection deferral timer */

View file

@ -4201,12 +4201,30 @@ static wq_item_status meta_queue_process(struct work_queue *dummy, void *data)
{
struct meta_queue *mq = data;
uint32_t i;
uint32_t peers_on_fifo;
static uint32_t total_runs = 0;
total_runs++;
frr_with_mutex (&bm->peer_connection_mtx)
peers_on_fifo = peer_connection_fifo_count(&bm->connection_fifo);
/*
* If the number of peers on the fifo is greater than 10
* let's yield this run of the MetaQ to allow the packet processing to make
* progress against the incoming packets. But we should also
* attempt to allow this to run occassionally. Let's run
* something every 10 attempts to process the work queue.
*/
if (peers_on_fifo > 10 && total_runs % 10 != 0)
return WQ_QUEUE_BLOCKED;
for (i = 0; i < MQ_SIZE; i++)
if (process_subq(mq->subq[i], i)) {
mq->size--;
break;
}
return mq->size ? WQ_REQUEUE : WQ_SUCCESS;
}

View file

@ -8683,6 +8683,10 @@ void bgp_master_init(struct event_loop *master, const int buffer_size,
bm = &bgp_master;
/* Initialize the peer connection FIFO list */
peer_connection_fifo_init(&bm->connection_fifo);
pthread_mutex_init(&bm->peer_connection_mtx, NULL);
zebra_announce_init(&bm->zebra_announce_head);
zebra_l2_vni_init(&bm->zebra_l2_vni_head);
zebra_l3_vni_init(&bm->zebra_l3_vni_head);

View file

@ -107,6 +107,9 @@ enum bgp_af_index {
extern struct frr_pthread *bgp_pth_io;
extern struct frr_pthread *bgp_pth_ka;
/* FIFO list for peer connections */
PREDECL_LIST(peer_connection_fifo);
/* BGP master for system wide configurations and variables. */
struct bgp_master {
/* BGP instance list. */
@ -121,6 +124,11 @@ struct bgp_master {
/* BGP port number. */
uint16_t port;
/* FIFO list head for peer connections */
struct peer_connection_fifo_head connection_fifo;
struct event *e_process_packet;
pthread_mutex_t peer_connection_mtx;
/* Listener addresses */
struct list *addresses;
@ -1378,7 +1386,6 @@ struct peer_connection {
struct event *t_pmax_restart;
struct event *t_routeadv;
struct event *t_process_packet;
struct event *t_stop_with_notify;
@ -1394,7 +1401,14 @@ struct peer_connection {
union sockunion *su_local; /* Sockunion of local address. */
union sockunion *su_remote; /* Sockunion of remote address. */
/* For FIFO list */
struct peer_connection_fifo_item fifo_item;
};
/* Declare the FIFO list implementation */
DECLARE_LIST(peer_connection_fifo, struct peer_connection, fifo_item);
const char *bgp_peer_get_connection_direction(struct peer_connection *connection);
extern struct peer_connection *bgp_peer_connection_new(struct peer *peer);
extern void bgp_peer_connection_free(struct peer_connection **connection);

View file

@ -2577,3 +2577,8 @@ interface r1-eth514
ipv6 address 2001:db8:3:5::1/64
no shut
!
router bgp 1001
timers bgp 5 60
no bgp ebgp-requires-policy
read-quanta 1
!

View file

@ -1,7 +1,4 @@
router bgp 1001
timers bgp 5 20
no bgp ebgp-requires-policy
read-quanta 1
neighbor 10.1.1.2 remote-as external
neighbor 10.1.2.2 remote-as external
neighbor 10.1.3.2 remote-as external

View file

@ -1,7 +1,4 @@
router bgp 1001
timers bgp 5 20
no bgp ebgp-requires-policy
read-quanta 1
neighbor 2001:db8:1:1::2 remote-as external
neighbor 2001:db8:1:2::2 remote-as external
neighbor 2001:db8:1:3::2 remote-as external

View file

@ -1,7 +1,4 @@
router bgp 1001
timers bgp 5 20
no bgp ebgp-requires-policy
read-quanta 1
neighbor r1-eth0 interface remote-as external
neighbor r1-eth1 interface remote-as external
neighbor r1-eth2 interface remote-as external

View file

@ -2577,3 +2577,8 @@ interface r2-eth514
ipv6 address 2001:db8:3:5::2/64
no shutdown
!
router bgp 1002
timers bgp 5 60
no bgp ebgp-requires-policy
read-quanta 1
!

View file

@ -1,7 +1,4 @@
router bgp 1002
timers bgp 5 20
no bgp ebgp-requires-policy
read-quanta 1
neighbor 10.1.1.1 remote-as external
neighbor 10.1.2.1 remote-as external
neighbor 10.1.3.1 remote-as external

View file

@ -1,7 +1,4 @@
router bgp 1002
timers bgp 5 20
no bgp ebgp-requires-policy
read-quanta 1
neighbor 2001:db8:1:1::1 remote-as external
neighbor 2001:db8:1:2::1 remote-as external
neighbor 2001:db8:1:3::1 remote-as external

View file

@ -1,7 +1,4 @@
router bgp 1002
timers bgp 5 20
no bgp ebgp-requires-policy
read-quanta 1
neighbor r2-eth0 interface remote-as external
neighbor r2-eth1 interface remote-as external
neighbor r2-eth2 interface remote-as external

View file

@ -462,6 +462,8 @@ extern void meta_queue_free(struct meta_queue *mq, struct zebra_vrf *zvrf);
extern int zebra_rib_labeled_unicast(struct route_entry *re);
extern struct route_table *rib_table_ipv6;
extern uint32_t zebra_rib_meta_queue_size(void);
extern void rib_unlink(struct route_node *rn, struct route_entry *re);
extern int rib_gc_dest(struct route_node *rn);
extern struct route_table *rib_tables_iter_next(rib_tables_iter_t *iter);

View file

@ -3302,8 +3302,8 @@ static int rib_meta_queue_add(struct meta_queue *mq, void *data)
mq->size++;
if (IS_ZEBRA_DEBUG_RIB_DETAILED)
rnode_debug(rn, re->vrf_id, "queued rn %p into sub-queue %s",
(void *)rn, subqueue2str(qindex));
rnode_debug(rn, re->vrf_id, "queued rn %p into sub-queue %s mq size %u", (void *)rn,
subqueue2str(qindex), zrouter.mq->size);
return 0;
}
@ -3335,8 +3335,8 @@ static int rib_meta_queue_nhg_ctx_add(struct meta_queue *mq, void *data)
mq->size++;
if (IS_ZEBRA_DEBUG_RIB_DETAILED)
zlog_debug("NHG Context id=%u queued into sub-queue %s",
ctx->id, subqueue2str(qindex));
zlog_debug("NHG Context id=%u queued into sub-queue %s mq size %u", ctx->id,
subqueue2str(qindex), zrouter.mq->size);
return 0;
}
@ -3363,8 +3363,8 @@ static int rib_meta_queue_nhg_process(struct meta_queue *mq, void *data,
mq->size++;
if (IS_ZEBRA_DEBUG_RIB_DETAILED)
zlog_debug("NHG id=%u queued into sub-queue %s", nhe->id,
subqueue2str(qindex));
zlog_debug("NHG id=%u queued into sub-queue %s mq size %u", nhe->id,
subqueue2str(qindex), zrouter.mq->size);
return 0;
}
@ -3410,6 +3410,11 @@ static int mq_add_handler(void *data,
return mq_add_func(zrouter.mq, data);
}
uint32_t zebra_rib_meta_queue_size(void)
{
return zrouter.mq->size;
}
void mpls_ftn_uninstall(struct zebra_vrf *zvrf, enum lsp_types_t type,
struct prefix *prefix, uint8_t route_type,
uint8_t route_instance)
@ -4226,7 +4231,7 @@ static int rib_meta_queue_gr_run_add(struct meta_queue *mq, void *data)
mq->size++;
if (IS_ZEBRA_DEBUG_RIB_DETAILED)
zlog_debug("Graceful Run adding");
zlog_debug("Graceful Run adding mq size %u", zrouter.mq->size);
return 0;
}
@ -4241,10 +4246,9 @@ static int rib_meta_queue_early_route_add(struct meta_queue *mq, void *data)
if (IS_ZEBRA_DEBUG_RIB_DETAILED) {
struct vrf *vrf = vrf_lookup_by_id(ere->re->vrf_id);
zlog_debug("Route %pFX(%s) (%s) queued for processing into sub-queue %s",
&ere->p, VRF_LOGNAME(vrf),
ere->deletion ? "delete" : "add",
subqueue2str(META_QUEUE_EARLY_ROUTE));
zlog_debug("Route %pFX(%s) (%s) queued for processing into sub-queue %s mq size %u",
&ere->p, VRF_LOGNAME(vrf), ere->deletion ? "delete" : "add",
subqueue2str(META_QUEUE_EARLY_ROUTE), zrouter.mq->size);
}
return 0;

View file

@ -530,6 +530,12 @@ static void zserv_process_messages(struct event *thread)
struct stream_fifo *cache = stream_fifo_new();
uint32_t p2p = zrouter.packets_to_process;
bool need_resched = false;
uint32_t meta_queue_size = zebra_rib_meta_queue_size();
if (meta_queue_size < p2p)
p2p = p2p - meta_queue_size;
else
p2p = 0;
frr_with_mutex (&client->ibuf_mtx) {
uint32_t i;