2 * Copyright (C) 2017 Jonathan Rajotte <jonathan.rajotte-julien@efficios.com>
4 * SPDX-License-Identifier: GPL-2.0-only
9 #include <netinet/tcp.h>
11 #include <sys/socket.h>
14 #include <common/compat/getenv.hpp>
15 #include <common/time.hpp>
16 #include <common/defaults.hpp>
17 #include <common/ini-config/ini-config.hpp>
19 #include "tcp_keep_alive.hpp"
21 #define SOLARIS_IDLE_TIME_MIN_S 10
22 #define SOLARIS_IDLE_TIME_MAX_S 864000 /* 10 days */
23 #define SOLARIS_ABORT_THRESHOLD_MIN_S 1
24 #define SOLARIS_ABORT_THRESHOLD_MAX_S 480 /* 8 minutes */
26 /* Per-platform definitions of TCP socket options. */
27 #if defined (__linux__)
29 #define COMPAT_TCP_LEVEL SOL_TCP
30 #define COMPAT_TCP_ABORT_THRESHOLD 0 /* Does not exist on linux. */
31 #define COMPAT_TCP_KEEPIDLE TCP_KEEPIDLE
32 #define COMPAT_TCP_KEEPINTVL TCP_KEEPINTVL
33 #define COMPAT_TCP_KEEPCNT TCP_KEEPCNT
35 #elif defined (__sun__) /* ! defined (__linux__) */
37 #define COMPAT_TCP_LEVEL IPPROTO_TCP
39 #ifdef TCP_KEEPALIVE_THRESHOLD
40 #define COMPAT_TCP_KEEPIDLE TCP_KEEPALIVE_THRESHOLD
41 #else /* ! defined (TCP_KEEPALIVE_THRESHOLD) */
42 #define COMPAT_TCP_KEEPIDLE 0
43 #endif /* TCP_KEEPALIVE_THRESHOLD */
45 #ifdef TCP_KEEPALIVE_ABORT_THRESHOLD
46 #define COMPAT_TCP_ABORT_THRESHOLD TCP_KEEPALIVE_ABORT_THRESHOLD
47 #else /* ! defined (TCP_KEEPALIVE_ABORT_THRESHOLD) */
48 #define COMPAT_TCP_ABORT_THRESHOLD 0
49 #endif /* TCP_KEEPALIVE_ABORT_THRESHOLD */
51 #define COMPAT_TCP_KEEPINTVL 0 /* Does not exist on Solaris. */
52 #define COMPAT_TCP_KEEPCNT 0 /* Does not exist on Solaris. */
54 #else /* ! defined (__linux__) && ! defined (__sun__) */
56 #define COMPAT_TCP_LEVEL 0
57 #define COMPAT_TCP_ABORT_THRESHOLD 0
58 #define COMPAT_TCP_KEEPIDLE 0
59 #define COMPAT_TCP_KEEPINTVL 0
60 #define COMPAT_TCP_KEEPCNT 0
62 #endif /* ! defined (__linux__) && ! defined (__sun__) */
65 struct tcp_keep_alive_support
{
66 /* TCP keep-alive is supported by this platform. */
68 /* Overriding idle-time per socket is supported by this platform. */
69 bool idle_time_supported
;
71 * Overriding probe interval per socket is supported by this
74 bool probe_interval_supported
;
76 * Configuring max probe count per socket is supported by this
79 bool max_probe_count_supported
;
80 /* Overriding on a per-socket basis is supported by this platform. */
81 bool abort_threshold_supported
;
84 struct tcp_keep_alive_config
{
85 /* Maps to the LTTNG_RELAYD_TCP_KEEP_ALIVE_ENV environment variable. */
88 * Maps to the LTTNG_RELAYD_TCP_KEEP_ALIVE_IDLE_TIME_ENV environment
93 * Maps to the LTTNG_RELAYD_TCP_KEEP_ALIVE_PROBE_INTERVAL_ENV
94 * environment variable.
98 * Maps to the LTTNG_RELAYD_TCP_KEEP_ALIVE_MAX_PROBE_COUNT_ENV
99 * environment variable.
103 * Maps to the LTTNG_RELAYD_TCP_KEEP_ALIVE_ABORT_THRESHOLD_ENV
104 * environment variable.
109 struct tcp_keep_alive_config the_config
= {.enabled
= false,
111 .probe_interval
= -1,
112 .max_probe_count
= -1,
113 .abort_threshold
= -1};
115 struct tcp_keep_alive_support the_support
= {.supported
= false,
116 .idle_time_supported
= false,
117 .probe_interval_supported
= false,
118 .max_probe_count_supported
= false,
119 .abort_threshold_supported
= false};
123 * Common parser for string to positive int conversion where the value must be
124 * in range [-1, INT_MAX].
126 * Returns -2 on invalid value.
129 int get_env_int(const char *env_var
,
137 tmp
= strtol(value
, &endptr
, 0);
139 ERR("%s cannot be parsed.", env_var
);
140 PERROR("errno for previous parsing failure");
145 if (endptr
== value
|| *endptr
!= '\0') {
146 ERR("%s is not a valid number", env_var
);
152 ERR("%s must be greater or equal to -1", env_var
);
157 ERR("%s is too big. Maximum value is %d", env_var
, INT_MAX
);
168 * Per-platform implementation of tcp_keep_alive_idle_time_modifier.
169 * Returns -2 on invalid value.
174 int convert_idle_time(int value
)
179 if (value
== -1 || value
== 0) {
180 /* Use system defaults */
186 ERR("Invalid tcp keep-alive idle time (%i)", value
);
192 * Additional constraints for Solaris 11.
193 * Minimum 10s, maximum 10 days. Defined by
194 * https://docs.oracle.com/cd/E23824_01/html/821-1475/tcp-7p.html#REFMAN7tcp-7p
196 if ((value
< SOLARIS_IDLE_TIME_MIN_S
||
197 value
> SOLARIS_IDLE_TIME_MAX_S
)) {
198 ERR("%s must be comprised between %d and %d inclusively on Solaris",
199 DEFAULT_LTTNG_RELAYD_TCP_KEEP_ALIVE_IDLE_TIME_ENV
,
200 SOLARIS_IDLE_TIME_MIN_S
,
201 SOLARIS_IDLE_TIME_MAX_S
);
206 /* On Solaris idle time is given in milliseconds. */
207 tmp_ms
= ((unsigned int) value
) * MSEC_PER_SEC
;
208 if ((value
!= 0 && (tmp_ms
/ ((unsigned int) value
)) != MSEC_PER_SEC
)
209 || tmp_ms
> INT_MAX
) {
211 const int max_value
= INT_MAX
/ MSEC_PER_SEC
;
213 ERR("%s is too big: maximum supported value is %d",
214 DEFAULT_LTTNG_RELAYD_TCP_KEEP_ALIVE_IDLE_TIME_ENV
,
220 /* tmp_ms is >= 0 and <= INT_MAX. Cast is safe. */
226 #else /* ! defined(__sun__) */
229 int convert_idle_time(int value
)
234 #endif /* ! defined(__sun__) */
236 /* Per-platform support of tcp_keep_alive functionality. */
237 #if defined (__linux__)
240 void tcp_keep_alive_init_support(struct tcp_keep_alive_support
*support
)
242 support
->supported
= true;
243 support
->idle_time_supported
= true;
244 support
->probe_interval_supported
= true;
245 support
->max_probe_count_supported
= true;
246 /* Solaris specific */
247 support
->abort_threshold_supported
= false;
250 #elif defined(__sun__) /* ! defined (__linux__) */
253 void tcp_keep_alive_init_support(struct tcp_keep_alive_support
*support
)
255 support
->supported
= true;
256 #ifdef TCP_KEEPALIVE_THRESHOLD
257 support
->idle_time_supported
= true;
259 support
->idle_time_supported
= false;;
260 #endif /* TCP_KEEPALIVE_THRESHOLD */
263 * Solaris does not support either tcp_keepalive_probes or
264 * tcp_keepalive_intvl.
265 * Inferring a value for TCP_KEEP_ALIVE_ABORT_THRESHOLD using
266 * (tcp_keepalive_probes * tcp_keepalive_intvl) could yield a good
267 * alternative, but Solaris does not detail the algorithm used (such as
268 * constant time retry like Linux).
270 * Ignore those settings on Solaris 11. We prefer exposing an
271 * environment variable only used on Solaris for the abort threshold.
273 support
->probe_interval_supported
= false;
274 support
->max_probe_count_supported
= false;
275 #ifdef TCP_KEEPALIVE_ABORT_THRESHOLD
276 support
->abort_threshold_supported
= true;
278 support
->abort_threshold_supported
= false;
279 #endif /* TCP_KEEPALIVE_THRESHOLD */
282 #else /* ! defined(__sun__) && ! defined(__linux__) */
284 /* Assume nothing is supported on other platforms. */
286 void tcp_keep_alive_init_support(struct tcp_keep_alive_support
*support
)
288 support
->supported
= false;
289 support
->idle_time_supported
= false;
290 support
->probe_interval_supported
= false;
291 support
->max_probe_count_supported
= false;
292 support
->abort_threshold_supported
= false;
295 #endif /* ! defined(__sun__) && ! defined(__linux__) */
300 * Solaris specific modifier for abort threshold.
301 * Return -2 on error.
304 int convert_abort_threshold(int value
)
310 /* Use system defaults */
316 ERR("Invalid tcp keep-alive abort threshold (%i)", value
);
322 * Additional constraints for Solaris 11.
324 * Between 0 and 8 minutes.
325 * https://docs.oracle.com/cd/E19120-01/open.solaris/819-2724/fsvdh/index.html
327 * Restrict from 1 seconds to 8 minutes sice the 0 value goes against
328 * the purpose of dead peers detection by never timing out when probing.
329 * It does NOT mean that the connection times out immediately.
331 if ((value
< SOLARIS_ABORT_THRESHOLD_MIN_S
|| value
> SOLARIS_ABORT_THRESHOLD_MAX_S
)) {
332 ERR("%s must be comprised between %d and %d inclusively on Solaris",
333 DEFAULT_LTTNG_RELAYD_TCP_KEEP_ALIVE_ABORT_THRESHOLD_ENV
,
334 SOLARIS_ABORT_THRESHOLD_MIN_S
,
335 SOLARIS_ABORT_THRESHOLD_MAX_S
);
340 /* Abort threshold is given in milliseconds. */
341 tmp_ms
= ((unsigned int) value
) * MSEC_PER_SEC
;
342 if ((value
!= 0 && (tmp_ms
/ ((unsigned int) value
)) != MSEC_PER_SEC
)
343 || tmp_ms
> INT_MAX
) {
345 const int max_value
= INT_MAX
/ MSEC_PER_SEC
;
347 ERR("%s is too big: maximum supported value is %d",
348 DEFAULT_LTTNG_RELAYD_TCP_KEEP_ALIVE_ABORT_THRESHOLD_ENV
,
354 /* tmp_ms is >= 0 and <= INT_MAX. Cast is safe. */
363 int convert_abort_threshold(int value
)
368 #endif /* defined (__sun__) */
371 * Retrieve settings from environment variables and warn for settings not
372 * supported by the platform.
375 int tcp_keep_alive_init_config(struct tcp_keep_alive_support
*support
,
376 struct tcp_keep_alive_config
*config
)
381 value
= lttng_secure_getenv(DEFAULT_LTTNG_RELAYD_TCP_KEEP_ALIVE_ENV
);
382 if (!support
->supported
) {
384 WARN("Using per-socket TCP keep-alive mechanism is not supported by this platform. Ignoring the %s environment variable.",
385 DEFAULT_LTTNG_RELAYD_TCP_KEEP_ALIVE_ENV
);
387 config
->enabled
= false;
389 ret
= config_parse_value(value
);
390 if (ret
< 0 || ret
> 1) {
391 ERR("Invalid value for %s", DEFAULT_LTTNG_RELAYD_TCP_KEEP_ALIVE_ENV
);
395 config
->enabled
= ret
;
397 DBG("TCP keep-alive mechanism %s", config
->enabled
? "enabled": "disabled");
399 /* Get value for tcp_keepalive_time in seconds. */
400 value
= lttng_secure_getenv(DEFAULT_LTTNG_RELAYD_TCP_KEEP_ALIVE_IDLE_TIME_ENV
);
401 if (!support
->idle_time_supported
&& value
) {
402 WARN("Overriding the TCP keep-alive idle time threshold per-socket is not supported by this platform. Ignoring the %s environment variable.",
403 DEFAULT_LTTNG_RELAYD_TCP_KEEP_ALIVE_IDLE_TIME_ENV
);
404 config
->idle_time
= -1;
406 int idle_time_platform
;
407 int idle_time_seconds
;
409 idle_time_seconds
= get_env_int(
410 DEFAULT_LTTNG_RELAYD_TCP_KEEP_ALIVE_IDLE_TIME_ENV
,
412 if (idle_time_seconds
< -1) {
417 idle_time_platform
= convert_idle_time(idle_time_seconds
);
418 if (idle_time_platform
< -1) {
423 config
->idle_time
= idle_time_platform
;
424 DBG("Overriding %s to %d",
425 DEFAULT_LTTNG_RELAYD_TCP_KEEP_ALIVE_IDLE_TIME_ENV
,
429 /* Get value for tcp_keepalive_intvl in seconds. */
430 value
= lttng_secure_getenv(
431 DEFAULT_LTTNG_RELAYD_TCP_KEEP_ALIVE_PROBE_INTERVAL_ENV
);
432 if (!support
->probe_interval_supported
&& value
) {
433 WARN("Overriding the TCP keep-alive probe interval time per-socket is not supported by this platform. Ignoring the %s environment variable.",
434 DEFAULT_LTTNG_RELAYD_TCP_KEEP_ALIVE_PROBE_INTERVAL_ENV
);
435 config
->probe_interval
= -1;
439 probe_interval
= get_env_int(DEFAULT_LTTNG_RELAYD_TCP_KEEP_ALIVE_PROBE_INTERVAL_ENV
,
441 if (probe_interval
< -1) {
446 config
->probe_interval
= probe_interval
;
447 DBG("Overriding %s to %d",
448 DEFAULT_LTTNG_RELAYD_TCP_KEEP_ALIVE_PROBE_INTERVAL_ENV
,
449 config
->probe_interval
);
452 /* Get value for tcp_keepalive_probes. */
453 value
= lttng_secure_getenv(DEFAULT_LTTNG_RELAYD_TCP_KEEP_ALIVE_MAX_PROBE_COUNT_ENV
);
454 if (!support
->max_probe_count_supported
&& value
) {
455 WARN("Overriding the TCP keep-alive maximum probe count per-socket is not supported by this platform. Ignoring the %s environment variable.",
456 DEFAULT_LTTNG_RELAYD_TCP_KEEP_ALIVE_MAX_PROBE_COUNT_ENV
);
457 config
->max_probe_count
= -1;
461 max_probe_count
= get_env_int(DEFAULT_LTTNG_RELAYD_TCP_KEEP_ALIVE_MAX_PROBE_COUNT_ENV
,
463 if (max_probe_count
< -1) {
468 config
->max_probe_count
= max_probe_count
;
469 DBG("Overriding %s to %d",
470 DEFAULT_LTTNG_RELAYD_TCP_KEEP_ALIVE_MAX_PROBE_COUNT_ENV
,
471 config
->max_probe_count
);
474 /* Get value for tcp_keepalive_abort_interval. */
475 value
= lttng_secure_getenv(
476 DEFAULT_LTTNG_RELAYD_TCP_KEEP_ALIVE_ABORT_THRESHOLD_ENV
);
477 if (!support
->abort_threshold_supported
&& value
) {
478 WARN("Overriding the TCP keep-alive abort threshold per-socket is not supported by this platform. Ignoring the %s environment variable.",
479 DEFAULT_LTTNG_RELAYD_TCP_KEEP_ALIVE_ABORT_THRESHOLD_ENV
);
480 config
->abort_threshold
= -1;
482 int abort_threshold_platform
;
483 int abort_threshold_seconds
;
485 abort_threshold_seconds
= get_env_int(
486 DEFAULT_LTTNG_RELAYD_TCP_KEEP_ALIVE_MAX_PROBE_COUNT_ENV
,
488 if (abort_threshold_seconds
< -1) {
493 abort_threshold_platform
= convert_abort_threshold(
494 abort_threshold_seconds
);
495 if (abort_threshold_platform
< -1) {
500 config
->abort_threshold
= abort_threshold_platform
;
501 DBG("Overriding %s to %d",
502 DEFAULT_LTTNG_RELAYD_TCP_KEEP_ALIVE_ABORT_THRESHOLD_ENV
,
503 config
->abort_threshold
);
512 /* Initialize the TCP keep-alive configuration. */
513 __attribute__((constructor
)) static
514 void tcp_keep_alive_init(void)
516 tcp_keep_alive_init_support(&the_support
);
517 (void) tcp_keep_alive_init_config(&the_support
, &the_config
);
521 * Set the socket options regarding TCP keep-alive.
523 int socket_apply_keep_alive_config(int socket_fd
)
529 if (!the_support
.supported
|| !the_config
.enabled
) {
534 DBG("TCP keep-alive enabled for socket %d", socket_fd
);
535 ret
= setsockopt(socket_fd
, SOL_SOCKET
, SO_KEEPALIVE
, &val
,
538 PERROR("setsockopt so_keepalive");
542 /* TCP keep-alive idle time */
543 if (the_support
.idle_time_supported
&& the_config
.idle_time
> 0) {
544 DBG("TCP keep-alive keep idle: %d enabled for socket %d",
545 the_config
.idle_time
, socket_fd
);
546 ret
= setsockopt(socket_fd
, COMPAT_TCP_LEVEL
,
547 COMPAT_TCP_KEEPIDLE
, &the_config
.idle_time
,
548 sizeof(the_config
.idle_time
));
550 PERROR("setsockopt TCP_KEEPIDLE");
554 /* TCP keep-alive probe interval */
555 if (the_support
.probe_interval_supported
&&
556 the_config
.probe_interval
> 0) {
557 DBG("TCP keep-alive probe_interval: %d enabled for socket %d",
558 the_config
.probe_interval
, socket_fd
);
559 ret
= setsockopt(socket_fd
, COMPAT_TCP_LEVEL
,
560 COMPAT_TCP_KEEPINTVL
,
561 &the_config
.probe_interval
,
562 sizeof(the_config
.probe_interval
));
564 PERROR("setsockopt TCP_KEEPINTVL");
569 /* TCP keep-alive max probe count */
570 if (the_support
.max_probe_count_supported
&&
571 the_config
.max_probe_count
> 0) {
572 DBG("TCP keep-alive max_probe: %d enabled for socket %d",
573 the_config
.max_probe_count
, socket_fd
);
574 ret
= setsockopt(socket_fd
, COMPAT_TCP_LEVEL
,
575 COMPAT_TCP_KEEPCNT
, &the_config
.max_probe_count
,
576 sizeof(the_config
.max_probe_count
));
578 PERROR("setsockopt TCP_KEEPCNT");
583 /* TCP keep-alive abort threshold */
584 if (the_support
.abort_threshold_supported
&&
585 the_config
.abort_threshold
> 0) {
586 DBG("TCP keep-alive abort threshold: %d enabled for socket %d",
587 the_config
.abort_threshold
, socket_fd
);
588 ret
= setsockopt(socket_fd
, COMPAT_TCP_LEVEL
,
589 COMPAT_TCP_ABORT_THRESHOLD
,
590 &the_config
.abort_threshold
,
591 sizeof(the_config
.max_probe_count
));
593 PERROR("setsockopt TCP_KEEPALIVE_ABORT_THRESHOLD");