tests: benchmark: improve benchmark scalability accuracy

author Mathieu Desnoyers <mathieu.desnoyers@efficios.com>

Wed, 31 Mar 2021 14:27:32 +0000 (10:27 -0400)

committer Mathieu Desnoyers <mathieu.desnoyers@efficios.com>

Wed, 31 Mar 2021 14:34:03 +0000 (10:34 -0400)
author Mathieu Desnoyers <mathieu.desnoyers@efficios.com>
Wed, 31 Mar 2021 14:27:32 +0000 (10:27 -0400)
committer Mathieu Desnoyers <mathieu.desnoyers@efficios.com>
Wed, 31 Mar 2021 14:34:03 +0000 (10:34 -0400)
diff --git a/tests/benchmark/README b/tests/benchmark/README

index c8e33052551e30694ed93581f7babbbfd5010835..3294f10a83f7174c16533b9402b6228a6834631a 100644 (file)
--- a/tests/benchmark/README
+++ b/tests/benchmark/README
@@ -3,6 +3,9 @@ To run the benchmark:
      ./test_benchmark
  
  You can specify the number of iterations, events and threads by setting
      ./test_benchmark
  
  You can specify the number of iterations, events and threads by setting
-environment variables ITERS, NR_EVENTS, NR_CPUS respectively:
+environment variables ITERS, DURATION, NR_THREADS respectively:
  
  
-    ITERS=10 NR_EVENTS=10000 NR_CPUS=4 ./test_benchmark
+    ITERS=10 DURATION=20 NR_THREADS=4 ./test_benchmark
+
+NR_CPUS can also be configured, but by default is based on the contents of
+/proc/cpuinfo.
diff --git a/tests/benchmark/bench.c b/tests/benchmark/bench.c

index 77f53a1532f9a977d149acde14c95974be0a2889..d15896cccd38dad1f5fdb89cc03920c2a95cf58c 100644 (file)
--- a/tests/benchmark/bench.c
+++ b/tests/benchmark/bench.c
@@ -4,6 +4,7 @@
   * LTTng Userspace Tracer (UST) - benchmark tool
   *
   * Copyright 2010 - Douglas Santos <douglas.santos@polymtl.ca>
   * LTTng Userspace Tracer (UST) - benchmark tool
   *
   * Copyright 2010 - Douglas Santos <douglas.santos@polymtl.ca>
+ * Copyright 2021 - Mathieu Desnoyers <mathieu.desnoyers@efficios.com>
   *
   * This program is free software; you can redistribute it and/or modify
   * it under the terms of the GNU General Public License as published by
   *
   * This program is free software; you can redistribute it and/or modify
   * it under the terms of the GNU General Public License as published by
@@ -34,8 +35,22 @@
  #include "ust_tests_benchmark.h"
  #endif
  
  #include "ust_tests_benchmark.h"
  #endif
  
-static int nr_cpus;
-static unsigned long nr_events;
+#define printf_verbose(fmt, args...)           \
+       do {                                    \
+               if (verbose_mode)               \
+                       printf(fmt, ## args);   \
+       } while (0)
+
+static int verbose_mode;
+
+struct thread_counter {
+       unsigned long long nr_loops;
+};
+
+static int nr_threads;
+static unsigned long duration;
+
+static volatile int test_go, test_stop;
  
  void do_stuff(void)
  {
  
  void do_stuff(void)
  {
@@ -53,21 +68,33 @@ void do_stuff(void)
  
  void *function(void *arg)
  {
  
  void *function(void *arg)
  {
-       unsigned long i;
+       unsigned long long nr_loops = 0;
+       struct thread_counter *thread_counter = arg;
  
  
-       for (i = 0; i < nr_events; i++) {
+       while (!test_go)
+               cmm_barrier();
+
+       for (;;) {
                 do_stuff();
                 do_stuff();
+               nr_loops++;
+               if (test_stop)
+                       break;
         }
         }
+       thread_counter->nr_loops = nr_loops;
         return NULL;
  }
  
  void usage(char **argv) {
         return NULL;
  }
  
  void usage(char **argv) {
-       printf("Usage: %s nr_cpus nr_events\n", argv[0]);
+       printf("Usage: %s nr_threads duration(s) <OPTIONS>\n", argv[0]);
+       printf("OPTIONS:\n");
+       printf("        [-v] (verbose output)\n");
+       printf("\n");
  }
  
  }
  
-
  int main(int argc, char **argv)
  {
  int main(int argc, char **argv)
  {
+       unsigned long long total_loops = 0;
+       unsigned long i_thr;
         void *retval;
         int i;
  
         void *retval;
         int i;
  
@@ -76,25 +103,53 @@ int main(int argc, char **argv)
                 exit(1);
         }
  
                 exit(1);
         }
  
-       nr_cpus = atoi(argv[1]);
-       printf("using %d processor(s)\n", nr_cpus);
+       nr_threads = atoi(argv[1]);
+       duration = atol(argv[2]);
+
+       for (i = 3; i < argc; i++) {
+               if (argv[i][0] != '-')
+                       continue;
+               switch (argv[i][1]) {
+               case 'v':
+                       verbose_mode = 1;
+                       break;
+               }
+       }
+
+       printf_verbose("using %d thread(s)\n", nr_threads);
+       printf_verbose("for a duration of %lds\n", duration);
  
  
-       nr_events = atol(argv[2]);
-       printf("using %ld events per cpu\n", nr_events);
+       pthread_t thread[nr_threads];
+       struct thread_counter thread_counter[nr_threads];
  
  
-       pthread_t thread[nr_cpus];
-       for (i = 0; i < nr_cpus; i++) {
-               if (pthread_create(&thread[i], NULL, function, NULL)) {
+       for (i = 0; i < nr_threads; i++) {
+               thread_counter[i].nr_loops = 0;
+               if (pthread_create(&thread[i], NULL, function, &thread_counter[i])) {
                         fprintf(stderr, "thread create %d failed\n", i);
                         exit(1);
                 }
         }
  
                         fprintf(stderr, "thread create %d failed\n", i);
                         exit(1);
                 }
         }
  
-       for (i = 0; i < nr_cpus; i++) {
+       test_go = 1;
+
+       for (i_thr = 0; i_thr < duration; i_thr++) {
+               sleep(1);
+               if (verbose_mode) {
+                       fwrite(".", sizeof(char), 1, stdout);
+                       fflush(stdout);
+               }
+       }
+       printf_verbose("\n");
+
+       test_stop = 1;
+
+       for (i = 0; i < nr_threads; i++) {
                 if (pthread_join(thread[i], &retval)) {
                         fprintf(stderr, "thread join %d failed\n", i);
                         exit(1);
                 }
                 if (pthread_join(thread[i], &retval)) {
                         fprintf(stderr, "thread join %d failed\n", i);
                         exit(1);
                 }
+               total_loops += thread_counter[i].nr_loops;
         }
         }
+       printf("Number of loops: %llu\n", total_loops);
         return 0;
  }
         return 0;
  }
diff --git a/tests/benchmark/ptime b/tests/benchmark/ptime

index 419f3684d6b5373f5f8b39be3ab1ac1f1389ea88..92ee29805f7b99e32d5f8a0e08dc7f304e41845b 100755 (executable)
--- a/tests/benchmark/ptime
+++ b/tests/benchmark/ptime
@@ -16,7 +16,7 @@ def main():
         os.system(cmd)
         t2 = time.time()
  
         os.system(cmd)
         t2 = time.time()
  
-       print(t2-t1)
+       print("Wall time: " + str(t2-t1))
  
  if __name__ == "__main__":
         main()
  
  if __name__ == "__main__":
         main()
diff --git a/tests/benchmark/test_benchmark b/tests/benchmark/test_benchmark

index 6f0dd7b49851cc106f03884fbf367f97e991c565..fa1b1e9e0b77096450b243b98bb5c212737f594e 100755 (executable)
--- a/tests/benchmark/test_benchmark
+++ b/tests/benchmark/test_benchmark
@@ -7,36 +7,43 @@ source $TESTDIR/utils/tap.sh
  plan_tests 1
  
  : ${ITERS:=10}
  plan_tests 1
  
  : ${ITERS:=10}
-: ${NR_EVENTS:=7000000}
-: ${NR_CPUS:=1}
+: ${DURATION:=2}
+: ${NR_THREADS:=1}
+: ${NR_CPUS:=$(lscpu | grep "^CPU(s)" | sed 's/^.*:[ \t]*//g')}
  
  : ${TIME:="./$CURDIR/ptime"}
  
  
  : ${TIME:="./$CURDIR/ptime"}
  
-: ${PROG_NOTRACING:="./$CURDIR/bench1 $NR_CPUS $NR_EVENTS"}
-: ${PROG_TRACING:="./$CURDIR/bench2 $NR_CPUS $NR_EVENTS"}
+: ${PROG_NOTRACING:="./$CURDIR/bench1 $NR_THREADS $DURATION"}
+: ${PROG_TRACING:="./$CURDIR/bench2 $NR_THREADS $DURATION"}
  
  function signal_cleanup ()
  {
         killall lttng-sessiond
  
  function signal_cleanup ()
  {
         killall lttng-sessiond
+       exit
  }
  
  trap signal_cleanup SIGTERM SIGINT
  
  }
  
  trap signal_cleanup SIGTERM SIGINT
  
-CMD_NOTRACING="$TIME '$PROG_NOTRACING >/dev/null 2>&1'"
-CMD_TRACING="$TIME '$PROG_TRACING >/dev/null 2>&1'"
+CMD_NOTRACING="$TIME '$PROG_NOTRACING'"
+CMD_TRACING="$TIME '$PROG_TRACING'"
+
+NR_ACTIVE_CPUS=$(( $NR_CPUS > $NR_THREADS ? $NR_THREADS : $NR_CPUS ))
  
  for i in $(seq $ITERS); do
  
  for i in $(seq $ITERS); do
-       time_notrace[i]=$(sh -c "$CMD_NOTRACING")
+       res=$(sh -c "$CMD_NOTRACING")
+       loops_notrace[$i]=$(echo "${res}" | grep "^Number of loops:" | sed 's/^.*: //g')
+       time_notrace[$i]=$(echo "${res}" | grep "^Wall time:" | sed 's/^.*: //g')
  done
  
  done
  
-
  lttng-sessiond -d --no-kernel
  lttng -q create --snapshot
  lttng -q enable-event -u -a
  lttng -q start
  
  for i in $(seq $ITERS); do
  lttng-sessiond -d --no-kernel
  lttng -q create --snapshot
  lttng -q enable-event -u -a
  lttng -q start
  
  for i in $(seq $ITERS); do
-       time_trace[i]=$(sh -c "$CMD_TRACING")
+       res=$(sh -c "$CMD_TRACING")
+       loops_trace[$i]=$(echo "${res}" | grep "^Number of loops:" | sed 's/^.*: //g')
+       time_trace[$i]=$(echo "${res}" | grep "^Wall time:" | sed 's/^.*: //g')
  done
  
  lttng -q stop
  done
  
  lttng -q stop
@@ -45,9 +52,12 @@ killall lttng-sessiond
  
  pass "Trace benchmark"
  
  
  pass "Trace benchmark"
  
+# Multiply the wall time by the number of active CPUs to get the
+# overhead of events on each active cpu.
+
  avg_delta=0
  for i in $(seq $ITERS); do
  avg_delta=0
  for i in $(seq $ITERS); do
-       delta[$i]=$(echo "( ((${time_trace[$i]}) - (${time_notrace[$i]})) / $NR_EVENTS)" | bc -l)
+       delta[$i]=$(echo "((${time_trace[$i]} * ${NR_ACTIVE_CPUS} / ${loops_trace[$i]}) - (${time_notrace[$i]} * ${NR_ACTIVE_CPUS} / ${loops_notrace[$i]}))" | bc -l)
         avg_delta=$(echo "(${avg_delta} + ${delta[$i]})" | bc -l)
  done
  avg_delta=$(echo "(${avg_delta} / $ITERS)" | bc -l)
         avg_delta=$(echo "(${avg_delta} + ${delta[$i]})" | bc -l)
  done
  avg_delta=$(echo "(${avg_delta} / $ITERS)" | bc -l)
@@ -65,6 +75,7 @@ NS_PER_EVENT=$(echo "($avg_delta * 1000000000)" | bc -l)
  NS_PER_EVENT=${NS_PER_EVENT%%.*}
  
  STD_DEV_NS_PER_EVENT=$(echo "($std_dev * 1000000000)" | bc -l)
  NS_PER_EVENT=${NS_PER_EVENT%%.*}
  
  STD_DEV_NS_PER_EVENT=$(echo "($std_dev * 1000000000)" | bc -l)
+# Remove fractions
  STD_DEV_NS_PER_EVENT=${STD_DEV_NS_PER_EVENT%%.*}
  
  STD_DEV_NS_PER_EVENT=${STD_DEV_NS_PER_EVENT%%.*}
  
-diag "Average tracing overhead per event is ${NS_PER_EVENT}ns, std.dev.: ${STD_DEV_NS_PER_EVENT}ns"
+diag "Average tracing overhead per event is ${NS_PER_EVENT}ns, std.dev.: ${STD_DEV_NS_PER_EVENT}ns { NR_THREADS=${NR_THREADS}, NR_ACTIVE_CPUS=${NR_ACTIVE_CPUS} }"
author	Mathieu Desnoyers <mathieu.desnoyers@efficios.com>
	Wed, 31 Mar 2021 14:27:32 +0000 (10:27 -0400)
committer	Mathieu Desnoyers <mathieu.desnoyers@efficios.com>
	Wed, 31 Mar 2021 14:34:03 +0000 (10:34 -0400)
tests/benchmark/README		patch \| blob \| blame \| history
tests/benchmark/bench.c		patch \| blob \| blame \| history
tests/benchmark/ptime		patch \| blob \| blame \| history
tests/benchmark/test_benchmark		patch \| blob \| blame \| history