Testing with a fixed number of loops per-thread only works if the
workload is distributed perfectly across CPUs. For instance, if a lock
is held in the workload (e.g. internally by open() and close()), those
may cause starvation of some threads, and therefore cause the benchmark
to be wrong because it will wait for the slowest thread to complete its
loops.
It is also not good for testing overcommit of threads vs cpus.
Change the test to report the number of loops performed in a given wall
time, and use this to report the average and std.dev. of tracing
overhead per event on each active CPU.
Change the benchmark workload to be only CPU-bound and not generate
system calls to minimize the inherent non-scalability of the workload
(e.g. locks held within the kernel).
Signed-off-by: Mathieu Desnoyers <mathieu.desnoyers@efficios.com>
Change-Id: I5245f36831875bd9f87854618a4ed0cb31e56a4d
./test_benchmark
You can specify the number of iterations, events and threads by setting
./test_benchmark
You can specify the number of iterations, events and threads by setting
-environment variables ITERS, NR_EVENTS, NR_CPUS respectively:
+environment variables ITERS, DURATION, NR_THREADS respectively:
- ITERS=10 NR_EVENTS=10000 NR_CPUS=4 ./test_benchmark
+ ITERS=10 DURATION=20 NR_THREADS=4 ./test_benchmark
+
+NR_CPUS can also be configured, but by default is based on the contents of
+/proc/cpuinfo.
* LTTng Userspace Tracer (UST) - benchmark tool
*
* Copyright 2010 - Douglas Santos <douglas.santos@polymtl.ca>
* LTTng Userspace Tracer (UST) - benchmark tool
*
* Copyright 2010 - Douglas Santos <douglas.santos@polymtl.ca>
+ * Copyright 2021 - Mathieu Desnoyers <mathieu.desnoyers@efficios.com>
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
#include "ust_tests_benchmark.h"
#endif
#include "ust_tests_benchmark.h"
#endif
-static int nr_cpus;
-static unsigned long nr_events;
+#define printf_verbose(fmt, args...) \
+ do { \
+ if (verbose_mode) \
+ printf(fmt, ## args); \
+ } while (0)
+
+static int verbose_mode;
+
+struct thread_counter {
+ unsigned long long nr_loops;
+};
+
+static int nr_threads;
+static unsigned long duration;
+
+static volatile int test_go, test_stop;
void *function(void *arg)
{
void *function(void *arg)
{
+ unsigned long long nr_loops = 0;
+ struct thread_counter *thread_counter = arg;
- for (i = 0; i < nr_events; i++) {
+ while (!test_go)
+ cmm_barrier();
+
+ for (;;) {
+ nr_loops++;
+ if (test_stop)
+ break;
+ thread_counter->nr_loops = nr_loops;
return NULL;
}
void usage(char **argv) {
return NULL;
}
void usage(char **argv) {
- printf("Usage: %s nr_cpus nr_events\n", argv[0]);
+ printf("Usage: %s nr_threads duration(s) <OPTIONS>\n", argv[0]);
+ printf("OPTIONS:\n");
+ printf(" [-v] (verbose output)\n");
+ printf("\n");
int main(int argc, char **argv)
{
int main(int argc, char **argv)
{
+ unsigned long long total_loops = 0;
+ unsigned long i_thr;
- nr_cpus = atoi(argv[1]);
- printf("using %d processor(s)\n", nr_cpus);
+ nr_threads = atoi(argv[1]);
+ duration = atol(argv[2]);
+
+ for (i = 3; i < argc; i++) {
+ if (argv[i][0] != '-')
+ continue;
+ switch (argv[i][1]) {
+ case 'v':
+ verbose_mode = 1;
+ break;
+ }
+ }
+
+ printf_verbose("using %d thread(s)\n", nr_threads);
+ printf_verbose("for a duration of %lds\n", duration);
- nr_events = atol(argv[2]);
- printf("using %ld events per cpu\n", nr_events);
+ pthread_t thread[nr_threads];
+ struct thread_counter thread_counter[nr_threads];
- pthread_t thread[nr_cpus];
- for (i = 0; i < nr_cpus; i++) {
- if (pthread_create(&thread[i], NULL, function, NULL)) {
+ for (i = 0; i < nr_threads; i++) {
+ thread_counter[i].nr_loops = 0;
+ if (pthread_create(&thread[i], NULL, function, &thread_counter[i])) {
fprintf(stderr, "thread create %d failed\n", i);
exit(1);
}
}
fprintf(stderr, "thread create %d failed\n", i);
exit(1);
}
}
- for (i = 0; i < nr_cpus; i++) {
+ test_go = 1;
+
+ for (i_thr = 0; i_thr < duration; i_thr++) {
+ sleep(1);
+ if (verbose_mode) {
+ fwrite(".", sizeof(char), 1, stdout);
+ fflush(stdout);
+ }
+ }
+ printf_verbose("\n");
+
+ test_stop = 1;
+
+ for (i = 0; i < nr_threads; i++) {
if (pthread_join(thread[i], &retval)) {
fprintf(stderr, "thread join %d failed\n", i);
exit(1);
}
if (pthread_join(thread[i], &retval)) {
fprintf(stderr, "thread join %d failed\n", i);
exit(1);
}
+ total_loops += thread_counter[i].nr_loops;
+ printf("Number of loops: %llu\n", total_loops);
os.system(cmd)
t2 = time.time()
os.system(cmd)
t2 = time.time()
+ print("Wall time: " + str(t2-t1))
if __name__ == "__main__":
main()
if __name__ == "__main__":
main()
plan_tests 1
: ${ITERS:=10}
plan_tests 1
: ${ITERS:=10}
-: ${NR_EVENTS:=7000000}
-: ${NR_CPUS:=1}
+: ${DURATION:=2}
+: ${NR_THREADS:=1}
+: ${NR_CPUS:=$(lscpu | grep "^CPU(s)" | sed 's/^.*:[ \t]*//g')}
: ${TIME:="./$CURDIR/ptime"}
: ${TIME:="./$CURDIR/ptime"}
-: ${PROG_NOTRACING:="./$CURDIR/bench1 $NR_CPUS $NR_EVENTS"}
-: ${PROG_TRACING:="./$CURDIR/bench2 $NR_CPUS $NR_EVENTS"}
+: ${PROG_NOTRACING:="./$CURDIR/bench1 $NR_THREADS $DURATION"}
+: ${PROG_TRACING:="./$CURDIR/bench2 $NR_THREADS $DURATION"}
function signal_cleanup ()
{
killall lttng-sessiond
function signal_cleanup ()
{
killall lttng-sessiond
}
trap signal_cleanup SIGTERM SIGINT
}
trap signal_cleanup SIGTERM SIGINT
-CMD_NOTRACING="$TIME '$PROG_NOTRACING >/dev/null 2>&1'"
-CMD_TRACING="$TIME '$PROG_TRACING >/dev/null 2>&1'"
+CMD_NOTRACING="$TIME '$PROG_NOTRACING'"
+CMD_TRACING="$TIME '$PROG_TRACING'"
+
+NR_ACTIVE_CPUS=$(( $NR_CPUS > $NR_THREADS ? $NR_THREADS : $NR_CPUS ))
for i in $(seq $ITERS); do
for i in $(seq $ITERS); do
- time_notrace[i]=$(sh -c "$CMD_NOTRACING")
+ res=$(sh -c "$CMD_NOTRACING")
+ loops_notrace[$i]=$(echo "${res}" | grep "^Number of loops:" | sed 's/^.*: //g')
+ time_notrace[$i]=$(echo "${res}" | grep "^Wall time:" | sed 's/^.*: //g')
lttng-sessiond -d --no-kernel
lttng -q create --snapshot
lttng -q enable-event -u -a
lttng -q start
for i in $(seq $ITERS); do
lttng-sessiond -d --no-kernel
lttng -q create --snapshot
lttng -q enable-event -u -a
lttng -q start
for i in $(seq $ITERS); do
- time_trace[i]=$(sh -c "$CMD_TRACING")
+ res=$(sh -c "$CMD_TRACING")
+ loops_trace[$i]=$(echo "${res}" | grep "^Number of loops:" | sed 's/^.*: //g')
+ time_trace[$i]=$(echo "${res}" | grep "^Wall time:" | sed 's/^.*: //g')
+# Multiply the wall time by the number of active CPUs to get the
+# overhead of events on each active cpu.
+
avg_delta=0
for i in $(seq $ITERS); do
avg_delta=0
for i in $(seq $ITERS); do
- delta[$i]=$(echo "( ((${time_trace[$i]}) - (${time_notrace[$i]})) / $NR_EVENTS)" | bc -l)
+ delta[$i]=$(echo "((${time_trace[$i]} * ${NR_ACTIVE_CPUS} / ${loops_trace[$i]}) - (${time_notrace[$i]} * ${NR_ACTIVE_CPUS} / ${loops_notrace[$i]}))" | bc -l)
avg_delta=$(echo "(${avg_delta} + ${delta[$i]})" | bc -l)
done
avg_delta=$(echo "(${avg_delta} / $ITERS)" | bc -l)
avg_delta=$(echo "(${avg_delta} + ${delta[$i]})" | bc -l)
done
avg_delta=$(echo "(${avg_delta} / $ITERS)" | bc -l)
NS_PER_EVENT=${NS_PER_EVENT%%.*}
STD_DEV_NS_PER_EVENT=$(echo "($std_dev * 1000000000)" | bc -l)
NS_PER_EVENT=${NS_PER_EVENT%%.*}
STD_DEV_NS_PER_EVENT=$(echo "($std_dev * 1000000000)" | bc -l)
STD_DEV_NS_PER_EVENT=${STD_DEV_NS_PER_EVENT%%.*}
STD_DEV_NS_PER_EVENT=${STD_DEV_NS_PER_EVENT%%.*}
-diag "Average tracing overhead per event is ${NS_PER_EVENT}ns, std.dev.: ${STD_DEV_NS_PER_EVENT}ns"
+diag "Average tracing overhead per event is ${NS_PER_EVENT}ns, std.dev.: ${STD_DEV_NS_PER_EVENT}ns { NR_THREADS=${NR_THREADS}, NR_ACTIVE_CPUS=${NR_ACTIVE_CPUS} }"