forked from Mirror/frr
lib: make cputime checks runtime options (v2)
...really no reason to force this into a compile time decision. The only point is avoiding the getrusage() syscall, which can easily be a runtime decision. [v2: also split cputime & walltime limits] Signed-off-by: David Lamparter <equinox@diac24.net>
This commit is contained in:
parent
94a64dab86
commit
45f0118832
11
configure.ac
11
configure.ac
|
@ -783,7 +783,6 @@ fi
|
|||
|
||||
if test "$enable_datacenter" = "yes" ; then
|
||||
AC_DEFINE([HAVE_DATACENTER], [1], [Compile extensions for a DataCenter])
|
||||
AC_MSG_WARN([The --enable-datacenter compile time option is deprecated. Please modify the init script to pass -F datacenter to the daemons instead.])
|
||||
DFLT_NAME="datacenter"
|
||||
else
|
||||
DFLT_NAME="traditional"
|
||||
|
@ -2695,6 +2694,16 @@ vici socket path : ${vici_socket}
|
|||
The above user and group must have read/write access to the state file
|
||||
directory and to the config files in the config file directory."
|
||||
|
||||
if test -n "$enable_datacenter"; then
|
||||
AC_MSG_WARN([The --enable-datacenter compile time option is deprecated. Please modify the init script to pass -F datacenter to the daemons instead.])
|
||||
fi
|
||||
if test -n "$enable_time_check"; then
|
||||
AC_MSG_WARN([The --enable-time-check compile time option is deprecated. Please use the service cputime-stats configuration option instead.])
|
||||
fi
|
||||
if test -n "$enable_cpu_time"; then
|
||||
AC_MSG_WARN([The --enable-cpu-time compile time option is deprecated. Please use the service cputime-warning NNN configuration option instead.])
|
||||
fi
|
||||
|
||||
if test "$enable_doc" != "no" -a "$frr_py_mod_sphinx" = "false"; then
|
||||
AC_MSG_WARN([sphinx is missing but required to build documentation])
|
||||
fi
|
||||
|
|
|
@ -434,6 +434,36 @@ static int config_write_host(struct vty *vty)
|
|||
}
|
||||
log_config_write(vty);
|
||||
|
||||
/* print disable always, but enable only if default is flipped
|
||||
* => prep for future removal of compile-time knob
|
||||
*/
|
||||
if (!cputime_enabled)
|
||||
vty_out(vty, "no service cputime-stats\n");
|
||||
#ifdef EXCLUDE_CPU_TIME
|
||||
else
|
||||
vty_out(vty, "service cputime-stats\n");
|
||||
#endif
|
||||
|
||||
if (!cputime_threshold)
|
||||
vty_out(vty, "no service cputime-warning\n");
|
||||
#if defined(CONSUMED_TIME_CHECK) && CONSUMED_TIME_CHECK != 5000000
|
||||
else /* again, always print non-default */
|
||||
#else
|
||||
else if (cputime_threshold != 5000000)
|
||||
#endif
|
||||
vty_out(vty, "service cputime-warning %lu\n",
|
||||
cputime_threshold);
|
||||
|
||||
if (!walltime_threshold)
|
||||
vty_out(vty, "no service walltime-warning\n");
|
||||
#if defined(CONSUMED_TIME_CHECK) && CONSUMED_TIME_CHECK != 5000000
|
||||
else /* again, always print non-default */
|
||||
#else
|
||||
else if (walltime_threshold != 5000000)
|
||||
#endif
|
||||
vty_out(vty, "service walltime-warning %lu\n",
|
||||
walltime_threshold);
|
||||
|
||||
if (host.advanced)
|
||||
vty_out(vty, "service advanced-vty\n");
|
||||
|
||||
|
|
|
@ -168,6 +168,7 @@ clippy_scan += \
|
|||
lib/northbound_cli.c \
|
||||
lib/plist.c \
|
||||
lib/routemap_cli.c \
|
||||
lib/thread.c \
|
||||
lib/vty.c \
|
||||
# end
|
||||
|
||||
|
|
170
lib/thread.c
170
lib/thread.c
|
@ -90,7 +90,22 @@ static struct list *masters;
|
|||
|
||||
static void thread_free(struct thread_master *master, struct thread *thread);
|
||||
|
||||
#ifndef EXCLUDE_CPU_TIME
|
||||
#define EXCLUDE_CPU_TIME 0
|
||||
#endif
|
||||
#ifndef CONSUMED_TIME_CHECK
|
||||
#define CONSUMED_TIME_CHECK 0
|
||||
#endif
|
||||
|
||||
bool cputime_enabled = !EXCLUDE_CPU_TIME;
|
||||
unsigned long cputime_threshold = CONSUMED_TIME_CHECK;
|
||||
unsigned long walltime_threshold = CONSUMED_TIME_CHECK;
|
||||
|
||||
/* CLI start ---------------------------------------------------------------- */
|
||||
#ifndef VTYSH_EXTRACT_PL
|
||||
#include "lib/thread_clippy.c"
|
||||
#endif
|
||||
|
||||
static unsigned int cpu_record_hash_key(const struct cpu_thread_history *a)
|
||||
{
|
||||
int size = sizeof(a->func);
|
||||
|
@ -120,7 +135,6 @@ static void cpu_record_hash_free(void *a)
|
|||
XFREE(MTYPE_THREAD_STATS, hist);
|
||||
}
|
||||
|
||||
#ifndef EXCLUDE_CPU_TIME
|
||||
static void vty_out_cpu_thread_history(struct vty *vty,
|
||||
struct cpu_thread_history *a)
|
||||
{
|
||||
|
@ -187,6 +201,14 @@ static void cpu_record_print(struct vty *vty, uint8_t filter)
|
|||
struct thread_master *m;
|
||||
struct listnode *ln;
|
||||
|
||||
if (!cputime_enabled)
|
||||
vty_out(vty,
|
||||
"\n"
|
||||
"Collecting CPU time statistics is currently disabled. Following statistics\n"
|
||||
"will be zero or may display data from when collection was enabled. Use the\n"
|
||||
" \"service cputime-stats\" command to start collecting data.\n"
|
||||
"\nCounters and wallclock times are always maintained and should be accurate.\n");
|
||||
|
||||
memset(&tmp, 0, sizeof(tmp));
|
||||
tmp.funcname = "TOTAL";
|
||||
tmp.types = filter;
|
||||
|
@ -236,7 +258,6 @@ static void cpu_record_print(struct vty *vty, uint8_t filter)
|
|||
if (tmp.total_calls > 0)
|
||||
vty_out_cpu_thread_history(vty, &tmp);
|
||||
}
|
||||
#endif
|
||||
|
||||
static void cpu_record_hash_clear(struct hash_bucket *bucket, void *args[])
|
||||
{
|
||||
|
@ -306,7 +327,6 @@ static uint8_t parse_filter(const char *filterstr)
|
|||
return filter;
|
||||
}
|
||||
|
||||
#ifndef EXCLUDE_CPU_TIME
|
||||
DEFUN_NOSH (show_thread_cpu,
|
||||
show_thread_cpu_cmd,
|
||||
"show thread cpu [FILTER]",
|
||||
|
@ -331,7 +351,61 @@ DEFUN_NOSH (show_thread_cpu,
|
|||
cpu_record_print(vty, filter);
|
||||
return CMD_SUCCESS;
|
||||
}
|
||||
#endif
|
||||
|
||||
DEFPY (service_cputime_stats,
|
||||
service_cputime_stats_cmd,
|
||||
"[no] service cputime-stats",
|
||||
NO_STR
|
||||
"Set up miscellaneous service\n"
|
||||
"Collect CPU usage statistics\n")
|
||||
{
|
||||
cputime_enabled = !no;
|
||||
return CMD_SUCCESS;
|
||||
}
|
||||
|
||||
DEFPY (service_cputime_warning,
|
||||
service_cputime_warning_cmd,
|
||||
"[no] service cputime-warning (1-4294967295)",
|
||||
NO_STR
|
||||
"Set up miscellaneous service\n"
|
||||
"Warn for tasks exceeding CPU usage threshold\n"
|
||||
"Warning threshold in milliseconds\n")
|
||||
{
|
||||
if (no)
|
||||
cputime_threshold = 0;
|
||||
else
|
||||
cputime_threshold = cputime_warning * 1000;
|
||||
return CMD_SUCCESS;
|
||||
}
|
||||
|
||||
ALIAS (service_cputime_warning,
|
||||
no_service_cputime_warning_cmd,
|
||||
"no service cputime-warning",
|
||||
NO_STR
|
||||
"Set up miscellaneous service\n"
|
||||
"Warn for tasks exceeding CPU usage threshold\n")
|
||||
|
||||
DEFPY (service_walltime_warning,
|
||||
service_walltime_warning_cmd,
|
||||
"[no] service walltime-warning (1-4294967295)",
|
||||
NO_STR
|
||||
"Set up miscellaneous service\n"
|
||||
"Warn for tasks exceeding total wallclock threshold\n"
|
||||
"Warning threshold in milliseconds\n")
|
||||
{
|
||||
if (no)
|
||||
walltime_threshold = 0;
|
||||
else
|
||||
walltime_threshold = walltime_warning * 1000;
|
||||
return CMD_SUCCESS;
|
||||
}
|
||||
|
||||
ALIAS (service_walltime_warning,
|
||||
no_service_walltime_warning_cmd,
|
||||
"no service walltime-warning",
|
||||
NO_STR
|
||||
"Set up miscellaneous service\n"
|
||||
"Warn for tasks exceeding total wallclock threshold\n")
|
||||
|
||||
static void show_thread_poll_helper(struct vty *vty, struct thread_master *m)
|
||||
{
|
||||
|
@ -421,11 +495,15 @@ DEFUN (clear_thread_cpu,
|
|||
|
||||
void thread_cmd_init(void)
|
||||
{
|
||||
#ifndef EXCLUDE_CPU_TIME
|
||||
install_element(VIEW_NODE, &show_thread_cpu_cmd);
|
||||
#endif
|
||||
install_element(VIEW_NODE, &show_thread_poll_cmd);
|
||||
install_element(ENABLE_NODE, &clear_thread_cpu_cmd);
|
||||
|
||||
install_element(CONFIG_NODE, &service_cputime_stats_cmd);
|
||||
install_element(CONFIG_NODE, &service_cputime_warning_cmd);
|
||||
install_element(CONFIG_NODE, &no_service_cputime_warning_cmd);
|
||||
install_element(CONFIG_NODE, &service_walltime_warning_cmd);
|
||||
install_element(CONFIG_NODE, &no_service_walltime_warning_cmd);
|
||||
}
|
||||
/* CLI end ------------------------------------------------------------------ */
|
||||
|
||||
|
@ -1784,9 +1862,8 @@ void thread_getrusage(RUSAGE_T *r)
|
|||
#define FRR_RUSAGE RUSAGE_SELF
|
||||
#endif
|
||||
monotime(&r->real);
|
||||
#ifndef EXCLUDE_CPU_TIME
|
||||
getrusage(FRR_RUSAGE, &(r->cpu));
|
||||
#endif
|
||||
if (cputime_enabled)
|
||||
getrusage(FRR_RUSAGE, &(r->cpu));
|
||||
}
|
||||
|
||||
/*
|
||||
|
@ -1802,13 +1879,14 @@ void thread_getrusage(RUSAGE_T *r)
|
|||
*/
|
||||
void thread_call(struct thread *thread)
|
||||
{
|
||||
#ifndef EXCLUDE_CPU_TIME
|
||||
_Atomic unsigned long realtime, cputime;
|
||||
unsigned long exp;
|
||||
unsigned long helper;
|
||||
#endif
|
||||
RUSAGE_T before, after;
|
||||
|
||||
/* if the thread being called is the CLI, it may change cputime_enabled
|
||||
* ("service cputime-stats" command), which can result in nonsensical
|
||||
* and very confusing warnings
|
||||
*/
|
||||
bool cputime_enabled_here = cputime_enabled;
|
||||
|
||||
if (thread->master->ready_run_loop)
|
||||
before = thread->master->last_getrusage;
|
||||
else
|
||||
|
@ -1828,43 +1906,45 @@ void thread_call(struct thread *thread)
|
|||
GETRUSAGE(&after);
|
||||
thread->master->last_getrusage = after;
|
||||
|
||||
#ifndef EXCLUDE_CPU_TIME
|
||||
realtime = thread_consumed_time(&after, &before, &helper);
|
||||
cputime = helper;
|
||||
unsigned long walltime, cputime;
|
||||
unsigned long exp;
|
||||
|
||||
/* update realtime */
|
||||
atomic_fetch_add_explicit(&thread->hist->real.total, realtime,
|
||||
walltime = thread_consumed_time(&after, &before, &cputime);
|
||||
|
||||
/* update walltime */
|
||||
atomic_fetch_add_explicit(&thread->hist->real.total, walltime,
|
||||
memory_order_seq_cst);
|
||||
exp = atomic_load_explicit(&thread->hist->real.max,
|
||||
memory_order_seq_cst);
|
||||
while (exp < realtime
|
||||
while (exp < walltime
|
||||
&& !atomic_compare_exchange_weak_explicit(
|
||||
&thread->hist->real.max, &exp, realtime,
|
||||
memory_order_seq_cst, memory_order_seq_cst))
|
||||
&thread->hist->real.max, &exp, walltime,
|
||||
memory_order_seq_cst, memory_order_seq_cst))
|
||||
;
|
||||
|
||||
/* update cputime */
|
||||
atomic_fetch_add_explicit(&thread->hist->cpu.total, cputime,
|
||||
memory_order_seq_cst);
|
||||
exp = atomic_load_explicit(&thread->hist->cpu.max,
|
||||
memory_order_seq_cst);
|
||||
while (exp < cputime
|
||||
&& !atomic_compare_exchange_weak_explicit(
|
||||
&thread->hist->cpu.max, &exp, cputime,
|
||||
memory_order_seq_cst, memory_order_seq_cst))
|
||||
;
|
||||
if (cputime_enabled_here && cputime_enabled) {
|
||||
/* update cputime */
|
||||
atomic_fetch_add_explicit(&thread->hist->cpu.total, cputime,
|
||||
memory_order_seq_cst);
|
||||
exp = atomic_load_explicit(&thread->hist->cpu.max,
|
||||
memory_order_seq_cst);
|
||||
while (exp < cputime
|
||||
&& !atomic_compare_exchange_weak_explicit(
|
||||
&thread->hist->cpu.max, &exp, cputime,
|
||||
memory_order_seq_cst, memory_order_seq_cst))
|
||||
;
|
||||
}
|
||||
|
||||
atomic_fetch_add_explicit(&thread->hist->total_calls, 1,
|
||||
memory_order_seq_cst);
|
||||
atomic_fetch_or_explicit(&thread->hist->types, 1 << thread->add_type,
|
||||
memory_order_seq_cst);
|
||||
|
||||
#ifdef CONSUMED_TIME_CHECK
|
||||
if (cputime > CONSUMED_TIME_CHECK) {
|
||||
if (cputime_enabled_here && cputime_enabled && cputime_threshold
|
||||
&& cputime > cputime_threshold) {
|
||||
/*
|
||||
* We have a CPU Hog on our hands. The time FRR
|
||||
* has spent doing actual work ( not sleeping )
|
||||
* is greater than 5 seconds.
|
||||
* We have a CPU Hog on our hands. The time FRR has spent
|
||||
* doing actual work (not sleeping) is greater than 5 seconds.
|
||||
* Whinge about it now, so we're aware this is yet another task
|
||||
* to fix.
|
||||
*/
|
||||
|
@ -1874,13 +1954,13 @@ void thread_call(struct thread *thread)
|
|||
EC_LIB_SLOW_THREAD_CPU,
|
||||
"CPU HOG: task %s (%lx) ran for %lums (cpu time %lums)",
|
||||
thread->xref->funcname, (unsigned long)thread->func,
|
||||
realtime / 1000, cputime / 1000);
|
||||
} else if (realtime > CONSUMED_TIME_CHECK) {
|
||||
walltime / 1000, cputime / 1000);
|
||||
|
||||
} else if (walltime_threshold && walltime > walltime_threshold) {
|
||||
/*
|
||||
* The runtime for a task is greater than 5 seconds, but
|
||||
* the cpu time is under 5 seconds. Let's whine
|
||||
* about this because this could imply some sort of
|
||||
* scheduling issue.
|
||||
* The runtime for a task is greater than 5 seconds, but the
|
||||
* cpu time is under 5 seconds. Let's whine about this because
|
||||
* this could imply some sort of scheduling issue.
|
||||
*/
|
||||
atomic_fetch_add_explicit(&thread->hist->total_wall_warn,
|
||||
1, memory_order_seq_cst);
|
||||
|
@ -1888,10 +1968,8 @@ void thread_call(struct thread *thread)
|
|||
EC_LIB_SLOW_THREAD_WALL,
|
||||
"STARVATION: task %s (%lx) ran for %lums (cpu time %lums)",
|
||||
thread->xref->funcname, (unsigned long)thread->func,
|
||||
realtime / 1000, cputime / 1000);
|
||||
walltime / 1000, cputime / 1000);
|
||||
}
|
||||
#endif /* CONSUMED_TIME_CHECK */
|
||||
#endif /* Exclude CPU Time */
|
||||
}
|
||||
|
||||
/* Execute thread */
|
||||
|
|
|
@ -33,6 +33,13 @@
|
|||
extern "C" {
|
||||
#endif
|
||||
|
||||
extern bool cputime_enabled;
|
||||
extern unsigned long cputime_threshold;
|
||||
/* capturing wallclock time is always enabled since it is fast (reading
|
||||
* hardware TSC w/o syscalls)
|
||||
*/
|
||||
extern unsigned long walltime_threshold;
|
||||
|
||||
struct rusage_t {
|
||||
struct rusage cpu;
|
||||
struct timeval real;
|
||||
|
|
54
lib/vty.c
54
lib/vty.c
|
@ -502,37 +502,37 @@ static int vty_command(struct vty *vty, char *buf)
|
|||
zlog_notice("%s%s", prompt_str, buf);
|
||||
}
|
||||
|
||||
#ifdef CONSUMED_TIME_CHECK
|
||||
{
|
||||
RUSAGE_T before;
|
||||
RUSAGE_T after;
|
||||
unsigned long realtime, cputime;
|
||||
RUSAGE_T before;
|
||||
RUSAGE_T after;
|
||||
unsigned long walltime, cputime;
|
||||
|
||||
GETRUSAGE(&before);
|
||||
#endif /* CONSUMED_TIME_CHECK */
|
||||
/* cmd_execute() may change cputime_enabled if we're executing the
|
||||
* "service cputime-stats" command, which can result in nonsensical
|
||||
* and very confusing warnings
|
||||
*/
|
||||
bool cputime_enabled_here = cputime_enabled;
|
||||
|
||||
ret = cmd_execute(vty, buf, NULL, 0);
|
||||
GETRUSAGE(&before);
|
||||
|
||||
/* Get the name of the protocol if any */
|
||||
protocolname = frr_protoname;
|
||||
ret = cmd_execute(vty, buf, NULL, 0);
|
||||
|
||||
#ifdef CONSUMED_TIME_CHECK
|
||||
GETRUSAGE(&after);
|
||||
realtime = thread_consumed_time(&after, &before, &cputime);
|
||||
if (cputime > CONSUMED_TIME_CHECK) {
|
||||
/* Warn about CPU hog that must be fixed. */
|
||||
flog_warn(
|
||||
EC_LIB_SLOW_THREAD_CPU,
|
||||
"CPU HOG: command took %lums (cpu time %lums): %s",
|
||||
realtime / 1000, cputime / 1000, buf);
|
||||
} else if (realtime > CONSUMED_TIME_CHECK) {
|
||||
flog_warn(
|
||||
EC_LIB_SLOW_THREAD_WALL,
|
||||
"STARVATION: command took %lums (cpu time %lums): %s",
|
||||
realtime / 1000, cputime / 1000, buf);
|
||||
}
|
||||
}
|
||||
#endif /* CONSUMED_TIME_CHECK */
|
||||
GETRUSAGE(&after);
|
||||
|
||||
walltime = thread_consumed_time(&after, &before, &cputime);
|
||||
|
||||
if (cputime_enabled_here && cputime_enabled && cputime_threshold
|
||||
&& cputime > cputime_threshold)
|
||||
/* Warn about CPU hog that must be fixed. */
|
||||
flog_warn(EC_LIB_SLOW_THREAD_CPU,
|
||||
"CPU HOG: command took %lums (cpu time %lums): %s",
|
||||
walltime / 1000, cputime / 1000, buf);
|
||||
else if (walltime_threshold && walltime > walltime_threshold)
|
||||
flog_warn(EC_LIB_SLOW_THREAD_WALL,
|
||||
"STARVATION: command took %lums (cpu time %lums): %s",
|
||||
walltime / 1000, cputime / 1000, buf);
|
||||
|
||||
/* Get the name of the protocol if any */
|
||||
protocolname = frr_protoname;
|
||||
|
||||
if (ret != CMD_SUCCESS)
|
||||
switch (ret) {
|
||||
|
|
|
@ -2851,7 +2851,6 @@ DEFUN (vtysh_show_poll,
|
|||
return show_per_daemon(vty, argv, argc, "Thread statistics for %s:\n");
|
||||
}
|
||||
|
||||
#ifndef EXCLUDE_CPU_TIME
|
||||
DEFUN (vtysh_show_thread,
|
||||
vtysh_show_thread_cmd,
|
||||
"show thread cpu [FILTER]",
|
||||
|
@ -2862,7 +2861,6 @@ DEFUN (vtysh_show_thread,
|
|||
{
|
||||
return show_per_daemon(vty, argv, argc, "Thread statistics for %s:\n");
|
||||
}
|
||||
#endif
|
||||
|
||||
DEFUN (vtysh_show_work_queues,
|
||||
vtysh_show_work_queues_cmd,
|
||||
|
@ -4567,9 +4565,7 @@ void vtysh_init_vty(void)
|
|||
install_element(VIEW_NODE, &vtysh_show_modules_cmd);
|
||||
install_element(VIEW_NODE, &vtysh_show_work_queues_cmd);
|
||||
install_element(VIEW_NODE, &vtysh_show_work_queues_daemon_cmd);
|
||||
#ifndef EXCLUDE_CPU_TIME
|
||||
install_element(VIEW_NODE, &vtysh_show_thread_cmd);
|
||||
#endif
|
||||
install_element(VIEW_NODE, &vtysh_show_poll_cmd);
|
||||
|
||||
/* Logging */
|
||||
|
|
Loading…
Reference in a new issue