From 6f97f9c2bbc74605e3de2b05333ecf25fa52d6fc Mon Sep 17 00:00:00 2001 From: Mathieu Desnoyers Date: Fri, 4 Nov 2016 07:42:03 -0600 Subject: [PATCH] Implement LTTNG_UST_BLOCKING_RETRY_TIMEOUT MIME-Version: 1.0 Content-Type: text/plain; charset=utf8 Content-Transfer-Encoding: 8bit Add LTTNG_UST_BLOCKING_RETRY_TIMEOUT environment variable: LTTNG_UST_BLOCKING_RETRY_TIMEOUT Maximum duration (milliseconds) to retry event tracing when there’s no space left for the event record in the sub-buffer. 0 (default) Never block the application. Positive value Block the application for the specified number of milliseconds. If there’s no space left after this duration, discard the event record. Negative value Block the application until there’s space left for the event record. This option can be useful in workloads generating very large trace data throughput, where blocking the application is an acceptable trade-off to prevent discarding event records. Warning Setting this environment variable to a non-zero value may significantly affect application timings. Signed-off-by: Mathieu Desnoyers CC: "Carlos O'Donell" --- configure.ac | 2 ++ doc/man/Makefile.am | 4 ++- doc/man/lttng-ust.3.txt | 42 +++++++++++++++++++++++++ liblttng-ust/lttng-ust-comm.c | 29 ++++++++++++++++- libringbuffer/Makefile.am | 2 +- libringbuffer/{tlsfixup.h => rb-init.h} | 11 ++++--- libringbuffer/ring_buffer_frontend.c | 32 ++++++++++++++++++- 7 files changed, 113 insertions(+), 9 deletions(-) rename libringbuffer/{tlsfixup.h => rb-init.h} (71%) diff --git a/configure.ac b/configure.ac index 1add5d63..450b43b2 100644 --- a/configure.ac +++ b/configure.ac @@ -435,6 +435,8 @@ AC_DEFUN([_AC_DEFINE_AND_SUBST], [ ]) _AC_DEFINE_AND_SUBST([LTTNG_UST_DEFAULT_CONSTRUCTOR_TIMEOUT_MS], [3000]) +# By default, do not retry on buffer full condition. +_AC_DEFINE_AND_SUBST([LTTNG_UST_DEFAULT_BLOCKING_RETRY_TIMEOUT_MS], [0]) AC_CONFIG_FILES([ Makefile diff --git a/doc/man/Makefile.am b/doc/man/Makefile.am index 22aed154..449377ff 100644 --- a/doc/man/Makefile.am +++ b/doc/man/Makefile.am @@ -56,7 +56,9 @@ if HAVE_ASCIIDOC_XMLTO # Tools to execute: ADOC = $(ASCIIDOC) -f $(ASCIIDOC_CONF) -d manpage \ -a lttng_version="$(PACKAGE_VERSION)" \ - -a lttng_ust_register_timeout="@LTTNG_UST_DEFAULT_CONSTRUCTOR_TIMEOUT_MS@" + -a lttng_ust_register_timeout="@LTTNG_UST_DEFAULT_CONSTRUCTOR_TIMEOUT_MS@" \ + -a lttng_ust_blocking_retry_timeout="@LTTNG_UST_DEFAULT_BLOCKING_RETRY_TIMEOUT_MS@" + ADOC_DOCBOOK = $(ADOC) -b docbook XTO = $(XMLTO) -m $(firstword $(XSL_SRC_FILES)) man diff --git a/doc/man/lttng-ust.3.txt b/doc/man/lttng-ust.3.txt index 7a23943d..43bc2265 100644 --- a/doc/man/lttng-ust.3.txt +++ b/doc/man/lttng-ust.3.txt @@ -1098,6 +1098,30 @@ LTTng session and consumer daemons (part of the LTTng-tools project) are located in a specific directory under `$LTTNG_HOME` (or `$HOME` if `$LTTNG_HOME` is not set). +`LTTNG_UST_BLOCKING_RETRY_TIMEOUT`:: + Maximum duration (milliseconds) to retry event tracing when + there's no space left for the event record in the sub-buffer. ++ +-- +`0` (default):: + Never block the application. + +Positive value:: + Block the application for the specified number of milliseconds. If + there's no space left after this duration, discard the event + record. + +Negative value:: + Block the application until there's space left for the event record. +-- ++ +This option can be useful in workloads generating very large trace data +throughput, where blocking the application is an acceptable trade-off to +prevent discarding event records. ++ +WARNING: Setting this environment variable to a non-zero value may +significantly affect application timings. + `LTTNG_UST_CLOCK_PLUGIN`:: Path to the shared object which acts as the clock override plugin. An example of such a plugin can be found in the LTTng-UST @@ -1123,6 +1147,24 @@ with time constraints on the process startup time. + Default: {lttng_ust_register_timeout}. +`LTTNG_UST_BLOCKING_RETRY_TIMEOUT`:: + Maximum time during which event tracing retry is attempted on buffer + full condition (millliseconds). Setting this environment to non-zero + value effectively blocks the application on buffer full condition. + Setting this environment variable to non-zero values may + significantly affect application timings. Setting this to a negative + value may block the application indefinitely if there is no consumer + emptying the ring buffer. The delay between retry attempts is the + minimum between the specified timeout value and 100ms. This option + can be useful in workloads generating very large trace data + throughput, where blocking the application is an acceptable + trade-off to not discard events. _Use with caution_. ++ +The value `0` means _do not retry_. The value `-1` means _retry forever_. +Value > `0` means a maximum timeout of the given value. ++ +Default: {lttng_ust_blocking_retry_timeout}. + `LTTNG_UST_WITHOUT_BADDR_STATEDUMP`:: Prevents `liblttng-ust` from performing a base address state dump (see the <> section above) if diff --git a/liblttng-ust/lttng-ust-comm.c b/liblttng-ust/lttng-ust-comm.c index 7cd6a227..651d2aaa 100644 --- a/liblttng-ust/lttng-ust-comm.c +++ b/liblttng-ust/lttng-ust-comm.c @@ -35,6 +35,7 @@ #include #include #include +#include #include #include #include @@ -52,7 +53,7 @@ #include "tracepoint-internal.h" #include "lttng-tracer-core.h" #include "compat.h" -#include "../libringbuffer/tlsfixup.h" +#include "../libringbuffer/rb-init.h" #include "lttng-ust-statedump.h" #include "clock.h" #include "../libringbuffer/getcpu.h" @@ -533,6 +534,30 @@ int get_constructor_timeout(struct timespec *constructor_timeout) return 1; } +static +void get_blocking_retry_timeout(void) +{ + const char *str_blocking_retry_timeout = + lttng_secure_getenv("LTTNG_UST_BLOCKING_RETRY_TIMEOUT"); + + if (str_blocking_retry_timeout) { + long timeout = strtol(str_blocking_retry_timeout, NULL, 10); + + if (timeout < 0) + timeout = -1; + if (timeout > INT_MAX) { + WARN("Saturating %s value from %ld to %d\n", + "LTTNG_UST_BLOCKING_RETRY_TIMEOUT", + timeout, INT_MAX); + timeout = INT_MAX; + } + DBG("%s environment variable value is %ld", + "LTTNG_UST_BLOCKING_RETRY_TIMEOUT", + timeout); + lttng_ust_ringbuffer_set_retry_timeout(timeout); + } +} + static int register_to_sessiond(int socket, enum ustctl_socket_type type) { @@ -1672,6 +1697,8 @@ void __attribute__((constructor)) lttng_ust_init(void) timeout_mode = get_constructor_timeout(&constructor_timeout); + get_blocking_retry_timeout(); + ret = sem_init(&constructor_wait, 0, 0); if (ret) { PERROR("sem_init"); diff --git a/libringbuffer/Makefile.am b/libringbuffer/Makefile.am index 271c8bee..33db1653 100644 --- a/libringbuffer/Makefile.am +++ b/libringbuffer/Makefile.am @@ -11,7 +11,7 @@ libringbuffer_la_SOURCES = \ api.h \ backend.h backend_internal.h backend_types.h \ frontend_api.h frontend.h frontend_internal.h frontend_types.h \ - nohz.h vatomic.h tlsfixup.h + nohz.h vatomic.h rb-init.h libringbuffer_la_LIBADD = \ -lpthread \ diff --git a/libringbuffer/tlsfixup.h b/libringbuffer/rb-init.h similarity index 71% rename from libringbuffer/tlsfixup.h rename to libringbuffer/rb-init.h index 125742ef..eba087fa 100644 --- a/libringbuffer/tlsfixup.h +++ b/libringbuffer/rb-init.h @@ -1,10 +1,10 @@ -#ifndef _LTTNG_UST_LIB_RINGBUFFER_TLS_FIXUP_H -#define _LTTNG_UST_LIB_RINGBUFFER_TLS_FIXUP_H +#ifndef _LTTNG_UST_LIB_RINGBUFFER_RB_INIT_H +#define _LTTNG_UST_LIB_RINGBUFFER_RB_INIT_H /* - * libringbuffer/tlsfixup.h + * libringbuffer/rb-init.h * - * Copyright (C) 2012 Mathieu Desnoyers + * Copyright (C) 2012-2016 Mathieu Desnoyers * * This library is free software; you can redistribute it and/or * modify it under the terms of the GNU Lesser General Public @@ -22,5 +22,6 @@ */ void lttng_fixup_ringbuffer_tls(void); +void lttng_ust_ringbuffer_set_retry_timeout(int timeout); -#endif /* _LTTNG_UST_LIB_RINGBUFFER_TLS_FIXUP_H */ +#endif /* _LTTNG_UST_LIB_RINGBUFFER_RB_INIT_H */ diff --git a/libringbuffer/ring_buffer_frontend.c b/libringbuffer/ring_buffer_frontend.c index be20d69b..768e2a12 100644 --- a/libringbuffer/ring_buffer_frontend.c +++ b/libringbuffer/ring_buffer_frontend.c @@ -72,7 +72,7 @@ #include "backend.h" #include "frontend.h" #include "shm.h" -#include "tlsfixup.h" +#include "rb-init.h" #include "../liblttng-ust/compat.h" /* For ENODATA */ /* Print DBG() messages about events lost only every 1048576 hits */ @@ -84,6 +84,7 @@ #define CLOCKID CLOCK_MONOTONIC #define LTTNG_UST_RING_BUFFER_GET_RETRY 10 #define LTTNG_UST_RING_BUFFER_RETRY_DELAY_MS 10 +#define RETRY_DELAY_MS 100 /* 100 ms. */ /* * Non-static to ensure the compiler does not optimize away the xor. @@ -149,6 +150,14 @@ static struct timer_signal_data timer_signal = { .lock = PTHREAD_MUTEX_INITIALIZER, }; +int lttng_ust_blocking_retry_timeout = + CONFIG_LTTNG_UST_DEFAULT_BLOCKING_RETRY_TIMEOUT_MS; + +void lttng_ust_ringbuffer_set_retry_timeout(int timeout) +{ + lttng_ust_blocking_retry_timeout = timeout; +} + /** * lib_ring_buffer_reset - Reset ring buffer to initial values. * @buf: Ring buffer. @@ -1985,6 +1994,23 @@ void lib_ring_buffer_switch_slow(struct lttng_ust_lib_ring_buffer *buf, enum swi lib_ring_buffer_switch_old_end(buf, chan, &offsets, tsc, handle); } +static +bool handle_blocking_retry(int *timeout_left_ms) +{ + int timeout = *timeout_left_ms, delay; + + if (caa_likely(!timeout)) + return false; /* Do not retry, discard event. */ + if (timeout < 0) /* Wait forever. */ + delay = RETRY_DELAY_MS; + else + delay = min_t(int, timeout, RETRY_DELAY_MS); + (void) poll(NULL, 0, delay); + if (timeout > 0) + *timeout_left_ms -= delay; + return true; /* Retry. */ +} + /* * Returns : * 0 if ok @@ -2001,6 +2027,7 @@ int lib_ring_buffer_try_reserve_slow(struct lttng_ust_lib_ring_buffer *buf, const struct lttng_ust_lib_ring_buffer_config *config = &chan->backend.config; struct lttng_ust_shm_handle *handle = ctx->handle; unsigned long reserve_commit_diff, offset_cmp; + int timeout_left_ms = lttng_ust_blocking_retry_timeout; retry: offsets->begin = offset_cmp = v_read(config, &buf->offset); @@ -2083,6 +2110,9 @@ retry: >= chan->backend.buf_size)) { unsigned long nr_lost; + if (handle_blocking_retry(&timeout_left_ms)) + goto retry; + /* * We do not overwrite non consumed buffers * and we are full : record is lost. -- 2.34.1