mirror of
https://github.com/FRRouting/frr.git
synced 2025-04-30 13:37:17 +02:00
Merge pull request #18450 from donaldsharp/bgp_packet_reads
Bgp packet reads conversion to a FIFO
This commit is contained in:
commit
c312917988
|
@ -184,7 +184,11 @@ static struct peer *peer_xfer_conn(struct peer *from_peer)
|
|||
EVENT_OFF(keeper->t_delayopen);
|
||||
EVENT_OFF(keeper->t_connect_check_r);
|
||||
EVENT_OFF(keeper->t_connect_check_w);
|
||||
EVENT_OFF(keeper->t_process_packet);
|
||||
|
||||
frr_with_mutex (&bm->peer_connection_mtx) {
|
||||
if (peer_connection_fifo_member(&bm->connection_fifo, keeper))
|
||||
peer_connection_fifo_del(&bm->connection_fifo, keeper);
|
||||
}
|
||||
|
||||
/*
|
||||
* At this point in time, it is possible that there are packets pending
|
||||
|
@ -305,8 +309,13 @@ static struct peer *peer_xfer_conn(struct peer *from_peer)
|
|||
|
||||
bgp_reads_on(keeper);
|
||||
bgp_writes_on(keeper);
|
||||
event_add_event(bm->master, bgp_process_packet, keeper, 0,
|
||||
&keeper->t_process_packet);
|
||||
|
||||
frr_with_mutex (&bm->peer_connection_mtx) {
|
||||
if (!peer_connection_fifo_member(&bm->connection_fifo, keeper)) {
|
||||
peer_connection_fifo_add_tail(&bm->connection_fifo, keeper);
|
||||
}
|
||||
}
|
||||
event_add_event(bm->master, bgp_process_packet, NULL, 0, &bm->e_process_packet);
|
||||
|
||||
return (peer);
|
||||
}
|
||||
|
|
|
@ -99,7 +99,11 @@ void bgp_reads_off(struct peer_connection *connection)
|
|||
assert(fpt->running);
|
||||
|
||||
event_cancel_async(fpt->master, &connection->t_read, NULL);
|
||||
EVENT_OFF(connection->t_process_packet);
|
||||
|
||||
frr_with_mutex (&bm->peer_connection_mtx) {
|
||||
if (peer_connection_fifo_member(&bm->connection_fifo, connection))
|
||||
peer_connection_fifo_del(&bm->connection_fifo, connection);
|
||||
}
|
||||
|
||||
UNSET_FLAG(connection->thread_flags, PEER_THREAD_READS_ON);
|
||||
}
|
||||
|
@ -292,9 +296,13 @@ done:
|
|||
|
||||
event_add_read(fpt->master, bgp_process_reads, connection,
|
||||
connection->fd, &connection->t_read);
|
||||
if (added_pkt)
|
||||
event_add_event(bm->master, bgp_process_packet, connection, 0,
|
||||
&connection->t_process_packet);
|
||||
if (added_pkt) {
|
||||
frr_with_mutex (&bm->peer_connection_mtx) {
|
||||
if (!peer_connection_fifo_member(&bm->connection_fifo, connection))
|
||||
peer_connection_fifo_add_tail(&bm->connection_fifo, connection);
|
||||
}
|
||||
event_add_event(bm->master, bgp_process_packet, NULL, 0, &bm->e_process_packet);
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
|
|
|
@ -10,6 +10,7 @@
|
|||
|
||||
#define BGP_WRITE_PACKET_MAX 64U
|
||||
#define BGP_READ_PACKET_MAX 10U
|
||||
#define BGP_PACKET_PROCESS_LIMIT 100
|
||||
|
||||
#include "bgpd/bgpd.h"
|
||||
#include "frr_pthread.h"
|
||||
|
|
|
@ -161,6 +161,14 @@ __attribute__((__noreturn__)) void sigint(void)
|
|||
|
||||
bgp_exit(0);
|
||||
|
||||
/*
|
||||
* This is being done after bgp_exit because items may be removed
|
||||
* from the connection_fifo
|
||||
*/
|
||||
peer_connection_fifo_fini(&bm->connection_fifo);
|
||||
EVENT_OFF(bm->e_process_packet);
|
||||
pthread_mutex_destroy(&bm->peer_connection_mtx);
|
||||
|
||||
exit(0);
|
||||
}
|
||||
|
||||
|
|
|
@ -3974,6 +3974,18 @@ int bgp_capability_receive(struct peer_connection *connection,
|
|||
* would not, making event flow difficult to understand. Please think twice
|
||||
* before hacking this.
|
||||
*
|
||||
* packet_processing is now a FIFO of connections that need to be handled
|
||||
* This loop has a maximum run of 100(BGP_PACKET_PROCESS_LIMIT) packets,
|
||||
* but each individual connection can only handle the quanta value as
|
||||
* specified in bgp_vty.c. If the connection still has work to do, place it
|
||||
* back on the back of the queue for more work. Do note that event_should_yield
|
||||
* is also being called to figure out if processing should stop and work
|
||||
* picked up after other items can run. This was added *After* withdrawals
|
||||
* started being processed at scale and this function was taking cpu for 40+ seconds
|
||||
* On my machine we are getting 2-3 packets before a yield should happen in the
|
||||
* update case. Withdrawal is 1 packet being processed(note this is a very very
|
||||
* fast computer) before other items should be run.
|
||||
*
|
||||
* Thread type: EVENT_EVENT
|
||||
* @param thread
|
||||
* @return 0
|
||||
|
@ -3986,30 +3998,54 @@ void bgp_process_packet(struct event *thread)
|
|||
uint32_t rpkt_quanta_old; // how many packets to read
|
||||
int fsm_update_result; // return code of bgp_event_update()
|
||||
int mprc; // message processing return code
|
||||
uint32_t processed = 0, curr_connection_processed = 0;
|
||||
bool more_work = false;
|
||||
size_t count;
|
||||
uint32_t total_packets_to_process, total_processed = 0;
|
||||
|
||||
connection = EVENT_ARG(thread);
|
||||
frr_with_mutex (&bm->peer_connection_mtx)
|
||||
connection = peer_connection_fifo_pop(&bm->connection_fifo);
|
||||
|
||||
if (!connection)
|
||||
goto done;
|
||||
|
||||
total_packets_to_process = BGP_PACKET_PROCESS_LIMIT;
|
||||
peer = connection->peer;
|
||||
rpkt_quanta_old = atomic_load_explicit(&peer->bgp->rpkt_quanta,
|
||||
memory_order_relaxed);
|
||||
|
||||
fsm_update_result = 0;
|
||||
|
||||
/* Guard against scheduled events that occur after peer deletion. */
|
||||
if (connection->status == Deleted || connection->status == Clearing)
|
||||
return;
|
||||
while ((processed < total_packets_to_process) && connection) {
|
||||
total_processed++;
|
||||
/* Guard against scheduled events that occur after peer deletion. */
|
||||
if (connection->status == Deleted || connection->status == Clearing) {
|
||||
frr_with_mutex (&bm->peer_connection_mtx)
|
||||
connection = peer_connection_fifo_pop(&bm->connection_fifo);
|
||||
|
||||
unsigned int processed = 0;
|
||||
if (connection)
|
||||
peer = connection->peer;
|
||||
|
||||
continue;
|
||||
}
|
||||
|
||||
while (processed < rpkt_quanta_old) {
|
||||
uint8_t type = 0;
|
||||
bgp_size_t size;
|
||||
char notify_data_length[2];
|
||||
|
||||
frr_with_mutex (&connection->io_mtx) {
|
||||
frr_with_mutex (&connection->io_mtx)
|
||||
peer->curr = stream_fifo_pop(connection->ibuf);
|
||||
}
|
||||
|
||||
if (peer->curr == NULL) // no packets to process, hmm...
|
||||
return;
|
||||
if (peer->curr == NULL) {
|
||||
frr_with_mutex (&bm->peer_connection_mtx)
|
||||
connection = peer_connection_fifo_pop(&bm->connection_fifo);
|
||||
|
||||
|
||||
if (connection)
|
||||
peer = connection->peer;
|
||||
|
||||
continue;
|
||||
}
|
||||
|
||||
/* skip the marker and copy the packet length */
|
||||
stream_forward_getp(peer->curr, BGP_MARKER_SIZE);
|
||||
|
@ -4113,32 +4149,81 @@ void bgp_process_packet(struct event *thread)
|
|||
stream_free(peer->curr);
|
||||
peer->curr = NULL;
|
||||
processed++;
|
||||
curr_connection_processed++;
|
||||
|
||||
/* Update FSM */
|
||||
if (mprc != BGP_PACKET_NOOP)
|
||||
fsm_update_result = bgp_event_update(connection, mprc);
|
||||
else
|
||||
continue;
|
||||
|
||||
/*
|
||||
* If peer was deleted, do not process any more packets. This
|
||||
* is usually due to executing BGP_Stop or a stub deletion.
|
||||
*/
|
||||
if (fsm_update_result == FSM_PEER_TRANSFERRED
|
||||
|| fsm_update_result == FSM_PEER_STOPPED)
|
||||
break;
|
||||
}
|
||||
if (fsm_update_result == FSM_PEER_TRANSFERRED ||
|
||||
fsm_update_result == FSM_PEER_STOPPED) {
|
||||
frr_with_mutex (&bm->peer_connection_mtx)
|
||||
connection = peer_connection_fifo_pop(&bm->connection_fifo);
|
||||
|
||||
if (connection)
|
||||
peer = connection->peer;
|
||||
|
||||
continue;
|
||||
}
|
||||
|
||||
bool yield = event_should_yield(thread);
|
||||
if (curr_connection_processed >= rpkt_quanta_old || yield) {
|
||||
curr_connection_processed = 0;
|
||||
frr_with_mutex (&bm->peer_connection_mtx) {
|
||||
if (!peer_connection_fifo_member(&bm->connection_fifo, connection))
|
||||
peer_connection_fifo_add_tail(&bm->connection_fifo,
|
||||
connection);
|
||||
if (!yield)
|
||||
connection = peer_connection_fifo_pop(&bm->connection_fifo);
|
||||
else
|
||||
connection = NULL;
|
||||
}
|
||||
if (connection)
|
||||
peer = connection->peer;
|
||||
|
||||
continue;
|
||||
}
|
||||
|
||||
if (fsm_update_result != FSM_PEER_TRANSFERRED
|
||||
&& fsm_update_result != FSM_PEER_STOPPED) {
|
||||
frr_with_mutex (&connection->io_mtx) {
|
||||
// more work to do, come back later
|
||||
if (connection->ibuf->count > 0)
|
||||
event_add_event(bm->master, bgp_process_packet,
|
||||
connection, 0,
|
||||
&connection->t_process_packet);
|
||||
more_work = true;
|
||||
else
|
||||
more_work = false;
|
||||
}
|
||||
|
||||
if (!more_work) {
|
||||
frr_with_mutex (&bm->peer_connection_mtx)
|
||||
connection = peer_connection_fifo_pop(&bm->connection_fifo);
|
||||
|
||||
if (connection)
|
||||
peer = connection->peer;
|
||||
}
|
||||
}
|
||||
|
||||
if (connection) {
|
||||
frr_with_mutex (&connection->io_mtx) {
|
||||
if (connection->ibuf->count > 0)
|
||||
more_work = true;
|
||||
else
|
||||
more_work = false;
|
||||
}
|
||||
frr_with_mutex (&bm->peer_connection_mtx) {
|
||||
if (more_work &&
|
||||
!peer_connection_fifo_member(&bm->connection_fifo, connection))
|
||||
peer_connection_fifo_add_tail(&bm->connection_fifo, connection);
|
||||
}
|
||||
}
|
||||
|
||||
done:
|
||||
frr_with_mutex (&bm->peer_connection_mtx)
|
||||
count = peer_connection_fifo_count(&bm->connection_fifo);
|
||||
|
||||
if (count)
|
||||
event_add_event(bm->master, bgp_process_packet, NULL, 0, &bm->e_process_packet);
|
||||
}
|
||||
|
||||
/* Send EOR when routes are processed by selection deferral timer */
|
||||
|
|
|
@ -4201,12 +4201,30 @@ static wq_item_status meta_queue_process(struct work_queue *dummy, void *data)
|
|||
{
|
||||
struct meta_queue *mq = data;
|
||||
uint32_t i;
|
||||
uint32_t peers_on_fifo;
|
||||
static uint32_t total_runs = 0;
|
||||
|
||||
total_runs++;
|
||||
|
||||
frr_with_mutex (&bm->peer_connection_mtx)
|
||||
peers_on_fifo = peer_connection_fifo_count(&bm->connection_fifo);
|
||||
|
||||
/*
|
||||
* If the number of peers on the fifo is greater than 10
|
||||
* let's yield this run of the MetaQ to allow the packet processing to make
|
||||
* progress against the incoming packets. But we should also
|
||||
* attempt to allow this to run occassionally. Let's run
|
||||
* something every 10 attempts to process the work queue.
|
||||
*/
|
||||
if (peers_on_fifo > 10 && total_runs % 10 != 0)
|
||||
return WQ_QUEUE_BLOCKED;
|
||||
|
||||
for (i = 0; i < MQ_SIZE; i++)
|
||||
if (process_subq(mq->subq[i], i)) {
|
||||
mq->size--;
|
||||
break;
|
||||
}
|
||||
|
||||
return mq->size ? WQ_REQUEUE : WQ_SUCCESS;
|
||||
}
|
||||
|
||||
|
|
|
@ -8683,6 +8683,10 @@ void bgp_master_init(struct event_loop *master, const int buffer_size,
|
|||
|
||||
bm = &bgp_master;
|
||||
|
||||
/* Initialize the peer connection FIFO list */
|
||||
peer_connection_fifo_init(&bm->connection_fifo);
|
||||
pthread_mutex_init(&bm->peer_connection_mtx, NULL);
|
||||
|
||||
zebra_announce_init(&bm->zebra_announce_head);
|
||||
zebra_l2_vni_init(&bm->zebra_l2_vni_head);
|
||||
zebra_l3_vni_init(&bm->zebra_l3_vni_head);
|
||||
|
|
16
bgpd/bgpd.h
16
bgpd/bgpd.h
|
@ -107,6 +107,9 @@ enum bgp_af_index {
|
|||
extern struct frr_pthread *bgp_pth_io;
|
||||
extern struct frr_pthread *bgp_pth_ka;
|
||||
|
||||
/* FIFO list for peer connections */
|
||||
PREDECL_LIST(peer_connection_fifo);
|
||||
|
||||
/* BGP master for system wide configurations and variables. */
|
||||
struct bgp_master {
|
||||
/* BGP instance list. */
|
||||
|
@ -121,6 +124,11 @@ struct bgp_master {
|
|||
/* BGP port number. */
|
||||
uint16_t port;
|
||||
|
||||
/* FIFO list head for peer connections */
|
||||
struct peer_connection_fifo_head connection_fifo;
|
||||
struct event *e_process_packet;
|
||||
pthread_mutex_t peer_connection_mtx;
|
||||
|
||||
/* Listener addresses */
|
||||
struct list *addresses;
|
||||
|
||||
|
@ -1378,7 +1386,6 @@ struct peer_connection {
|
|||
struct event *t_pmax_restart;
|
||||
|
||||
struct event *t_routeadv;
|
||||
struct event *t_process_packet;
|
||||
|
||||
struct event *t_stop_with_notify;
|
||||
|
||||
|
@ -1394,7 +1401,14 @@ struct peer_connection {
|
|||
|
||||
union sockunion *su_local; /* Sockunion of local address. */
|
||||
union sockunion *su_remote; /* Sockunion of remote address. */
|
||||
|
||||
/* For FIFO list */
|
||||
struct peer_connection_fifo_item fifo_item;
|
||||
};
|
||||
|
||||
/* Declare the FIFO list implementation */
|
||||
DECLARE_LIST(peer_connection_fifo, struct peer_connection, fifo_item);
|
||||
|
||||
const char *bgp_peer_get_connection_direction(struct peer_connection *connection);
|
||||
extern struct peer_connection *bgp_peer_connection_new(struct peer *peer);
|
||||
extern void bgp_peer_connection_free(struct peer_connection **connection);
|
||||
|
|
|
@ -2577,3 +2577,8 @@ interface r1-eth514
|
|||
ipv6 address 2001:db8:3:5::1/64
|
||||
no shut
|
||||
!
|
||||
router bgp 1001
|
||||
timers bgp 5 60
|
||||
no bgp ebgp-requires-policy
|
||||
read-quanta 1
|
||||
!
|
||||
|
|
|
@ -1,7 +1,4 @@
|
|||
router bgp 1001
|
||||
timers bgp 5 20
|
||||
no bgp ebgp-requires-policy
|
||||
read-quanta 1
|
||||
neighbor 10.1.1.2 remote-as external
|
||||
neighbor 10.1.2.2 remote-as external
|
||||
neighbor 10.1.3.2 remote-as external
|
||||
|
|
|
@ -1,7 +1,4 @@
|
|||
router bgp 1001
|
||||
timers bgp 5 20
|
||||
no bgp ebgp-requires-policy
|
||||
read-quanta 1
|
||||
neighbor 2001:db8:1:1::2 remote-as external
|
||||
neighbor 2001:db8:1:2::2 remote-as external
|
||||
neighbor 2001:db8:1:3::2 remote-as external
|
||||
|
|
|
@ -1,7 +1,4 @@
|
|||
router bgp 1001
|
||||
timers bgp 5 20
|
||||
no bgp ebgp-requires-policy
|
||||
read-quanta 1
|
||||
neighbor r1-eth0 interface remote-as external
|
||||
neighbor r1-eth1 interface remote-as external
|
||||
neighbor r1-eth2 interface remote-as external
|
||||
|
|
|
@ -2577,3 +2577,8 @@ interface r2-eth514
|
|||
ipv6 address 2001:db8:3:5::2/64
|
||||
no shutdown
|
||||
!
|
||||
router bgp 1002
|
||||
timers bgp 5 60
|
||||
no bgp ebgp-requires-policy
|
||||
read-quanta 1
|
||||
!
|
||||
|
|
|
@ -1,7 +1,4 @@
|
|||
router bgp 1002
|
||||
timers bgp 5 20
|
||||
no bgp ebgp-requires-policy
|
||||
read-quanta 1
|
||||
neighbor 10.1.1.1 remote-as external
|
||||
neighbor 10.1.2.1 remote-as external
|
||||
neighbor 10.1.3.1 remote-as external
|
||||
|
|
|
@ -1,7 +1,4 @@
|
|||
router bgp 1002
|
||||
timers bgp 5 20
|
||||
no bgp ebgp-requires-policy
|
||||
read-quanta 1
|
||||
neighbor 2001:db8:1:1::1 remote-as external
|
||||
neighbor 2001:db8:1:2::1 remote-as external
|
||||
neighbor 2001:db8:1:3::1 remote-as external
|
||||
|
|
|
@ -1,7 +1,4 @@
|
|||
router bgp 1002
|
||||
timers bgp 5 20
|
||||
no bgp ebgp-requires-policy
|
||||
read-quanta 1
|
||||
neighbor r2-eth0 interface remote-as external
|
||||
neighbor r2-eth1 interface remote-as external
|
||||
neighbor r2-eth2 interface remote-as external
|
||||
|
|
|
@ -462,6 +462,8 @@ extern void meta_queue_free(struct meta_queue *mq, struct zebra_vrf *zvrf);
|
|||
extern int zebra_rib_labeled_unicast(struct route_entry *re);
|
||||
extern struct route_table *rib_table_ipv6;
|
||||
|
||||
extern uint32_t zebra_rib_meta_queue_size(void);
|
||||
|
||||
extern void rib_unlink(struct route_node *rn, struct route_entry *re);
|
||||
extern int rib_gc_dest(struct route_node *rn);
|
||||
extern struct route_table *rib_tables_iter_next(rib_tables_iter_t *iter);
|
||||
|
|
|
@ -3302,8 +3302,8 @@ static int rib_meta_queue_add(struct meta_queue *mq, void *data)
|
|||
mq->size++;
|
||||
|
||||
if (IS_ZEBRA_DEBUG_RIB_DETAILED)
|
||||
rnode_debug(rn, re->vrf_id, "queued rn %p into sub-queue %s",
|
||||
(void *)rn, subqueue2str(qindex));
|
||||
rnode_debug(rn, re->vrf_id, "queued rn %p into sub-queue %s mq size %u", (void *)rn,
|
||||
subqueue2str(qindex), zrouter.mq->size);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
@ -3335,8 +3335,8 @@ static int rib_meta_queue_nhg_ctx_add(struct meta_queue *mq, void *data)
|
|||
mq->size++;
|
||||
|
||||
if (IS_ZEBRA_DEBUG_RIB_DETAILED)
|
||||
zlog_debug("NHG Context id=%u queued into sub-queue %s",
|
||||
ctx->id, subqueue2str(qindex));
|
||||
zlog_debug("NHG Context id=%u queued into sub-queue %s mq size %u", ctx->id,
|
||||
subqueue2str(qindex), zrouter.mq->size);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
@ -3363,8 +3363,8 @@ static int rib_meta_queue_nhg_process(struct meta_queue *mq, void *data,
|
|||
mq->size++;
|
||||
|
||||
if (IS_ZEBRA_DEBUG_RIB_DETAILED)
|
||||
zlog_debug("NHG id=%u queued into sub-queue %s", nhe->id,
|
||||
subqueue2str(qindex));
|
||||
zlog_debug("NHG id=%u queued into sub-queue %s mq size %u", nhe->id,
|
||||
subqueue2str(qindex), zrouter.mq->size);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
@ -3410,6 +3410,11 @@ static int mq_add_handler(void *data,
|
|||
return mq_add_func(zrouter.mq, data);
|
||||
}
|
||||
|
||||
uint32_t zebra_rib_meta_queue_size(void)
|
||||
{
|
||||
return zrouter.mq->size;
|
||||
}
|
||||
|
||||
void mpls_ftn_uninstall(struct zebra_vrf *zvrf, enum lsp_types_t type,
|
||||
struct prefix *prefix, uint8_t route_type,
|
||||
uint8_t route_instance)
|
||||
|
@ -4226,7 +4231,7 @@ static int rib_meta_queue_gr_run_add(struct meta_queue *mq, void *data)
|
|||
mq->size++;
|
||||
|
||||
if (IS_ZEBRA_DEBUG_RIB_DETAILED)
|
||||
zlog_debug("Graceful Run adding");
|
||||
zlog_debug("Graceful Run adding mq size %u", zrouter.mq->size);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
@ -4241,10 +4246,9 @@ static int rib_meta_queue_early_route_add(struct meta_queue *mq, void *data)
|
|||
if (IS_ZEBRA_DEBUG_RIB_DETAILED) {
|
||||
struct vrf *vrf = vrf_lookup_by_id(ere->re->vrf_id);
|
||||
|
||||
zlog_debug("Route %pFX(%s) (%s) queued for processing into sub-queue %s",
|
||||
&ere->p, VRF_LOGNAME(vrf),
|
||||
ere->deletion ? "delete" : "add",
|
||||
subqueue2str(META_QUEUE_EARLY_ROUTE));
|
||||
zlog_debug("Route %pFX(%s) (%s) queued for processing into sub-queue %s mq size %u",
|
||||
&ere->p, VRF_LOGNAME(vrf), ere->deletion ? "delete" : "add",
|
||||
subqueue2str(META_QUEUE_EARLY_ROUTE), zrouter.mq->size);
|
||||
}
|
||||
|
||||
return 0;
|
||||
|
|
|
@ -530,6 +530,12 @@ static void zserv_process_messages(struct event *thread)
|
|||
struct stream_fifo *cache = stream_fifo_new();
|
||||
uint32_t p2p = zrouter.packets_to_process;
|
||||
bool need_resched = false;
|
||||
uint32_t meta_queue_size = zebra_rib_meta_queue_size();
|
||||
|
||||
if (meta_queue_size < p2p)
|
||||
p2p = p2p - meta_queue_size;
|
||||
else
|
||||
p2p = 0;
|
||||
|
||||
frr_with_mutex (&client->ibuf_mtx) {
|
||||
uint32_t i;
|
||||
|
|
Loading…
Reference in a new issue