common: split ini-config in its own convenience library
[lttng-tools.git] / src / bin / lttng-relayd / tcp_keep_alive.cpp
CommitLineData
f056029c 1/*
ab5be9fa 2 * Copyright (C) 2017 Jonathan Rajotte <jonathan.rajotte-julien@efficios.com>
f056029c 3 *
ab5be9fa 4 * SPDX-License-Identifier: GPL-2.0-only
f056029c 5 *
f056029c
JR
6 */
7
8#include <sys/types.h>
9#include <netinet/tcp.h>
10#include <stdbool.h>
11#include <sys/socket.h>
12#include <limits.h>
13
14#include <common/compat/getenv.h>
15#include <common/time.h>
16#include <common/defaults.h>
3299fd31 17#include <common/ini-config/ini-config.h>
f056029c
JR
18
19#include "tcp_keep_alive.h"
20
21#define SOLARIS_IDLE_TIME_MIN_S 10
22#define SOLARIS_IDLE_TIME_MAX_S 864000 /* 10 days */
23#define SOLARIS_ABORT_THRESHOLD_MIN_S 1
24#define SOLARIS_ABORT_THRESHOLD_MAX_S 480 /* 8 minutes */
25
26/* Per-platform definitions of TCP socket options. */
27#if defined (__linux__)
28
f056029c
JR
29#define COMPAT_TCP_LEVEL SOL_TCP
30#define COMPAT_TCP_ABORT_THRESHOLD 0 /* Does not exist on linux. */
31#define COMPAT_TCP_KEEPIDLE TCP_KEEPIDLE
32#define COMPAT_TCP_KEEPINTVL TCP_KEEPINTVL
33#define COMPAT_TCP_KEEPCNT TCP_KEEPCNT
34
35#elif defined (__sun__) /* ! defined (__linux__) */
36
f056029c
JR
37#define COMPAT_TCP_LEVEL IPPROTO_TCP
38
39#ifdef TCP_KEEPALIVE_THRESHOLD
40#define COMPAT_TCP_KEEPIDLE TCP_KEEPALIVE_THRESHOLD
41#else /* ! defined (TCP_KEEPALIVE_THRESHOLD) */
42#define COMPAT_TCP_KEEPIDLE 0
43#endif /* TCP_KEEPALIVE_THRESHOLD */
44
45#ifdef TCP_KEEPALIVE_ABORT_THRESHOLD
46#define COMPAT_TCP_ABORT_THRESHOLD TCP_KEEPALIVE_ABORT_THRESHOLD
47#else /* ! defined (TCP_KEEPALIVE_ABORT_THRESHOLD) */
48#define COMPAT_TCP_ABORT_THRESHOLD 0
49#endif /* TCP_KEEPALIVE_ABORT_THRESHOLD */
50
51#define COMPAT_TCP_KEEPINTVL 0 /* Does not exist on Solaris. */
52#define COMPAT_TCP_KEEPCNT 0 /* Does not exist on Solaris. */
53
54#else /* ! defined (__linux__) && ! defined (__sun__) */
55
f056029c
JR
56#define COMPAT_TCP_LEVEL 0
57#define COMPAT_TCP_ABORT_THRESHOLD 0
58#define COMPAT_TCP_KEEPIDLE 0
59#define COMPAT_TCP_KEEPINTVL 0
60#define COMPAT_TCP_KEEPCNT 0
61
62#endif /* ! defined (__linux__) && ! defined (__sun__) */
63
64struct tcp_keep_alive_support {
65 /* TCP keep-alive is supported by this platform. */
66 bool supported;
67 /* Overriding idle-time per socket is supported by this platform. */
68 bool idle_time_supported;
69 /*
70 * Overriding probe interval per socket is supported by this
71 * platform.
72 */
73 bool probe_interval_supported;
74 /*
75 * Configuring max probe count per socket is supported by this
76 * platform.
77 */
78 bool max_probe_count_supported;
79 /* Overriding on a per-socket basis is supported by this platform. */
80 bool abort_threshold_supported;
81};
82
83struct tcp_keep_alive_config {
84 /* Maps to the LTTNG_RELAYD_TCP_KEEP_ALIVE_ENV environment variable. */
85 bool enabled;
86 /*
87 * Maps to the LTTNG_RELAYD_TCP_KEEP_ALIVE_IDLE_TIME_ENV environment
88 * variable.
89 */
90 int idle_time;
91 /*
92 * Maps to the LTTNG_RELAYD_TCP_KEEP_ALIVE_PROBE_INTERVAL_ENV
93 * environment variable.
94 */
95 int probe_interval;
96 /*
97 * Maps to the LTTNG_RELAYD_TCP_KEEP_ALIVE_MAX_PROBE_COUNT_ENV
98 * environment variable.
99 */
100 int max_probe_count;
101 /*
102 * Maps to the LTTNG_RELAYD_TCP_KEEP_ALIVE_ABORT_THRESHOLD_ENV
103 * environment variable.
104 */
105 int abort_threshold;
106};
107
3d16a92d
SM
108static struct tcp_keep_alive_config the_config = {.enabled = false,
109 .idle_time = -1,
110 .probe_interval = -1,
111 .max_probe_count = -1,
112 .abort_threshold = -1};
113
114static struct tcp_keep_alive_support the_support = {.supported = false,
115 .idle_time_supported = false,
116 .probe_interval_supported = false,
117 .max_probe_count_supported = false,
118 .abort_threshold_supported = false};
f056029c
JR
119
120/*
121 * Common parser for string to positive int conversion where the value must be
122 * in range [-1, INT_MAX].
123 *
124 * Returns -2 on invalid value.
125 */
126static
127int get_env_int(const char *env_var,
128 const char *value)
129{
130 int ret;
131 long tmp;
132 char *endptr = NULL;
133
134 errno = 0;
135 tmp = strtol(value, &endptr, 0);
136 if (errno != 0) {
137 ERR("%s cannot be parsed.", env_var);
138 PERROR("errno for previous parsing failure");
139 ret = -2;
140 goto end;
141 }
142
143 if (endptr == value || *endptr != '\0') {
144 ERR("%s is not a valid number", env_var);
145 ret = -1;
146 goto end;
147 }
148
149 if (tmp < -1) {
150 ERR("%s must be greater or equal to -1", env_var);
151 ret = -2;
152 goto end;
153 }
154 if (tmp > INT_MAX){
155 ERR("%s is too big. Maximum value is %d", env_var, INT_MAX);
156 ret = -2;
157 goto end;
158 }
159
160 ret = (int) tmp;
161end:
162 return ret;
163}
164
165/*
166 * Per-platform implementation of tcp_keep_alive_idle_time_modifier.
167 * Returns -2 on invalid value.
168 */
169#ifdef __sun__
170
171static
172int convert_idle_time(int value)
173{
174 int ret;
175 unsigned int tmp_ms;
176
177 if (value == -1 || value == 0) {
178 /* Use system defaults */
179 ret = value;
180 goto end;
181 }
182
183 if (value < 0) {
184 ERR("Invalid tcp keep-alive idle time (%i)", value);
185 ret = -2;
186 goto end;
187 }
188
189 /*
190 * Additional constraints for Solaris 11.
191 * Minimum 10s, maximum 10 days. Defined by
192 * https://docs.oracle.com/cd/E23824_01/html/821-1475/tcp-7p.html#REFMAN7tcp-7p
193 */
194 if ((value < SOLARIS_IDLE_TIME_MIN_S ||
195 value > SOLARIS_IDLE_TIME_MAX_S)) {
196 ERR("%s must be comprised between %d and %d inclusively on Solaris",
197 DEFAULT_LTTNG_RELAYD_TCP_KEEP_ALIVE_IDLE_TIME_ENV,
198 SOLARIS_IDLE_TIME_MIN_S,
199 SOLARIS_IDLE_TIME_MAX_S);
200 ret = -2;
201 goto end;
202 }
203
204 /* On Solaris idle time is given in milliseconds. */
205 tmp_ms = ((unsigned int) value) * MSEC_PER_SEC;
206 if ((value != 0 && (tmp_ms / ((unsigned int) value)) != MSEC_PER_SEC)
207 || tmp_ms > INT_MAX) {
208 /* Overflow. */
209 const int max_value = INT_MAX / MSEC_PER_SEC;
210
211 ERR("%s is too big: maximum supported value is %d",
212 DEFAULT_LTTNG_RELAYD_TCP_KEEP_ALIVE_IDLE_TIME_ENV,
213 max_value);
214 ret = -2;
215 goto end;
216 }
217
218 /* tmp_ms is >= 0 and <= INT_MAX. Cast is safe. */
219 ret = (int) tmp_ms;
220end:
221 return ret;
222}
223
224#else /* ! defined(__sun__) */
225
226static
227int convert_idle_time(int value)
228{
229 return value;
230}
231
232#endif /* ! defined(__sun__) */
233
234/* Per-platform support of tcp_keep_alive functionality. */
235#if defined (__linux__)
236
237static
238void tcp_keep_alive_init_support(struct tcp_keep_alive_support *support)
239{
240 support->supported = true;
241 support->idle_time_supported = true;
242 support->probe_interval_supported = true;
243 support->max_probe_count_supported = true;
244 /* Solaris specific */
245 support->abort_threshold_supported = false;
246}
247
248#elif defined(__sun__) /* ! defined (__linux__) */
249
250static
251void tcp_keep_alive_init_support(struct tcp_keep_alive_support *support)
252{
253 support->supported = true;
254#ifdef TCP_KEEPALIVE_THRESHOLD
255 support->idle_time_supported = true;
256#else
257 support->idle_time_supported = false;;
258#endif /* TCP_KEEPALIVE_THRESHOLD */
259
260 /*
261 * Solaris does not support either tcp_keepalive_probes or
262 * tcp_keepalive_intvl.
263 * Inferring a value for TCP_KEEP_ALIVE_ABORT_THRESHOLD using
264 * (tcp_keepalive_probes * tcp_keepalive_intvl) could yield a good
265 * alternative, but Solaris does not detail the algorithm used (such as
266 * constant time retry like Linux).
267 *
268 * Ignore those settings on Solaris 11. We prefer exposing an
269 * environment variable only used on Solaris for the abort threshold.
270 */
271 support->probe_interval_supported = false;
272 support->max_probe_count_supported = false;
273#ifdef TCP_KEEPALIVE_ABORT_THRESHOLD
274 support->abort_threshold_supported = true;
275#else
276 support->abort_threshold_supported = false;
277#endif /* TCP_KEEPALIVE_THRESHOLD */
278}
279
280#else /* ! defined(__sun__) && ! defined(__linux__) */
281
282/* Assume nothing is supported on other platforms. */
283static
284void tcp_keep_alive_init_support(struct tcp_keep_alive_support *support)
285{
286 support->supported = false;
287 support->idle_time_supported = false;
288 support->probe_interval_supported = false;
289 support->max_probe_count_supported = false;
290 support->abort_threshold_supported = false;
291}
292
293#endif /* ! defined(__sun__) && ! defined(__linux__) */
294
295#ifdef __sun__
296
297/*
298 * Solaris specific modifier for abort threshold.
299 * Return -2 on error.
300 */
301static
302int convert_abort_threshold(int value)
303{
304 int ret;
305 unsigned int tmp_ms;
306
307 if (value == -1) {
308 /* Use system defaults */
309 ret = value;
310 goto end;
311 }
312
313 if (value < 0) {
314 ERR("Invalid tcp keep-alive abort threshold (%i)", value);
315 ret = -2;
316 goto end;
317 }
318
319 /*
320 * Additional constraints for Solaris 11.
321 *
322 * Between 0 and 8 minutes.
323 * https://docs.oracle.com/cd/E19120-01/open.solaris/819-2724/fsvdh/index.html
324 *
325 * Restrict from 1 seconds to 8 minutes sice the 0 value goes against
326 * the purpose of dead peers detection by never timing out when probing.
327 * It does NOT mean that the connection times out immediately.
328 */
329 if ((value < SOLARIS_ABORT_THRESHOLD_MIN_S || value > SOLARIS_ABORT_THRESHOLD_MAX_S)) {
330 ERR("%s must be comprised between %d and %d inclusively on Solaris",
331 DEFAULT_LTTNG_RELAYD_TCP_KEEP_ALIVE_ABORT_THRESHOLD_ENV,
332 SOLARIS_ABORT_THRESHOLD_MIN_S,
333 SOLARIS_ABORT_THRESHOLD_MAX_S);
334 ret = -2;
335 goto end;
336 }
337
338 /* Abort threshold is given in milliseconds. */
339 tmp_ms = ((unsigned int) value) * MSEC_PER_SEC;
340 if ((value != 0 && (tmp_ms / ((unsigned int) value)) != MSEC_PER_SEC)
341 || tmp_ms > INT_MAX) {
342 /* Overflow */
343 const int max_value = INT_MAX / MSEC_PER_SEC;
344
345 ERR("%s is too big: maximum supported value is %d",
346 DEFAULT_LTTNG_RELAYD_TCP_KEEP_ALIVE_ABORT_THRESHOLD_ENV,
347 max_value);
348 ret = -2;
349 goto end;
350 }
351
352 /* tmp_ms is >= 0 and <= INT_MAX. Cast is safe. */
353 ret = (int) tmp_ms;
354end:
355 return ret;
356}
357
358#else
359
360static
361int convert_abort_threshold(int value)
362{
363 return value;
364}
365
366#endif /* defined (__sun__) */
367
368/*
369 * Retrieve settings from environment variables and warn for settings not
370 * supported by the platform.
371 */
372static
373int tcp_keep_alive_init_config(struct tcp_keep_alive_support *support,
374 struct tcp_keep_alive_config *config)
375{
376 int ret;
377 const char *value;
378
379 value = lttng_secure_getenv(DEFAULT_LTTNG_RELAYD_TCP_KEEP_ALIVE_ENV);
380 if (!support->supported) {
381 if (value) {
382 WARN("Using per-socket TCP keep-alive mechanism is not supported by this platform. Ignoring the %s environment variable.",
383 DEFAULT_LTTNG_RELAYD_TCP_KEEP_ALIVE_ENV);
384 }
385 config->enabled = false;
386 } else if (value) {
387 ret = config_parse_value(value);
388 if (ret < 0 || ret > 1) {
389 ERR("Invalid value for %s", DEFAULT_LTTNG_RELAYD_TCP_KEEP_ALIVE_ENV);
390 ret = 1;
391 goto error;
392 }
393 config->enabled = ret;
394 }
395 DBG("TCP keep-alive mechanism %s", config->enabled ? "enabled": "disabled");
396
397 /* Get value for tcp_keepalive_time in seconds. */
398 value = lttng_secure_getenv(DEFAULT_LTTNG_RELAYD_TCP_KEEP_ALIVE_IDLE_TIME_ENV);
399 if (!support->idle_time_supported && value) {
400 WARN("Overriding the TCP keep-alive idle time threshold per-socket is not supported by this platform. Ignoring the %s environment variable.",
401 DEFAULT_LTTNG_RELAYD_TCP_KEEP_ALIVE_IDLE_TIME_ENV);
402 config->idle_time = -1;
403 } else if (value) {
404 int idle_time_platform;
405 int idle_time_seconds;
406
407 idle_time_seconds = get_env_int(
408 DEFAULT_LTTNG_RELAYD_TCP_KEEP_ALIVE_IDLE_TIME_ENV,
409 value);
410 if (idle_time_seconds < -1) {
411 ret = 1;
412 goto error;
413 }
414
415 idle_time_platform = convert_idle_time(idle_time_seconds);
416 if (idle_time_platform < -1) {
417 ret = 1;
418 goto error;
419 }
420
421 config->idle_time = idle_time_platform;
422 DBG("Overriding %s to %d",
423 DEFAULT_LTTNG_RELAYD_TCP_KEEP_ALIVE_IDLE_TIME_ENV,
424 idle_time_seconds);
425 }
426
427 /* Get value for tcp_keepalive_intvl in seconds. */
428 value = lttng_secure_getenv(
429 DEFAULT_LTTNG_RELAYD_TCP_KEEP_ALIVE_PROBE_INTERVAL_ENV);
430 if (!support->probe_interval_supported && value) {
431 WARN("Overriding the TCP keep-alive probe interval time per-socket is not supported by this platform. Ignoring the %s environment variable.",
432 DEFAULT_LTTNG_RELAYD_TCP_KEEP_ALIVE_PROBE_INTERVAL_ENV);
433 config->probe_interval = -1;
434 } else if (value) {
435 int probe_interval;
436
437 probe_interval = get_env_int(DEFAULT_LTTNG_RELAYD_TCP_KEEP_ALIVE_PROBE_INTERVAL_ENV,
438 value);
439 if (probe_interval < -1) {
440 ret = 1;
441 goto error;
442 }
443
444 config->probe_interval = probe_interval;
445 DBG("Overriding %s to %d",
446 DEFAULT_LTTNG_RELAYD_TCP_KEEP_ALIVE_PROBE_INTERVAL_ENV,
447 config->probe_interval);
448 }
449
450 /* Get value for tcp_keepalive_probes. */
451 value = lttng_secure_getenv(DEFAULT_LTTNG_RELAYD_TCP_KEEP_ALIVE_MAX_PROBE_COUNT_ENV);
452 if (!support->max_probe_count_supported && value) {
453 WARN("Overriding the TCP keep-alive maximum probe count per-socket is not supported by this platform. Ignoring the %s environment variable.",
454 DEFAULT_LTTNG_RELAYD_TCP_KEEP_ALIVE_MAX_PROBE_COUNT_ENV);
455 config->max_probe_count = -1;
456 } else if (value) {
457 int max_probe_count;
458
459 max_probe_count = get_env_int(DEFAULT_LTTNG_RELAYD_TCP_KEEP_ALIVE_MAX_PROBE_COUNT_ENV,
460 value);
461 if (max_probe_count < -1) {
462 ret = 1;
463 goto error;
464 }
465
466 config->max_probe_count = max_probe_count;
467 DBG("Overriding %s to %d",
468 DEFAULT_LTTNG_RELAYD_TCP_KEEP_ALIVE_MAX_PROBE_COUNT_ENV,
469 config->max_probe_count);
470 }
471
472 /* Get value for tcp_keepalive_abort_interval. */
473 value = lttng_secure_getenv(
474 DEFAULT_LTTNG_RELAYD_TCP_KEEP_ALIVE_ABORT_THRESHOLD_ENV);
475 if (!support->abort_threshold_supported && value) {
476 WARN("Overriding the TCP keep-alive abort threshold per-socket is not supported by this platform. Ignoring the %s environment variable.",
477 DEFAULT_LTTNG_RELAYD_TCP_KEEP_ALIVE_ABORT_THRESHOLD_ENV);
478 config->abort_threshold = -1;
479 } else if (value) {
480 int abort_threshold_platform;
481 int abort_threshold_seconds;
482
483 abort_threshold_seconds = get_env_int(
484 DEFAULT_LTTNG_RELAYD_TCP_KEEP_ALIVE_MAX_PROBE_COUNT_ENV,
485 value);
486 if (abort_threshold_seconds < -1) {
487 ret = 1;
488 goto error;
489 }
490
491 abort_threshold_platform = convert_abort_threshold(
492 abort_threshold_seconds);
493 if (abort_threshold_platform < -1) {
494 ret = 1;
495 goto error;
496 }
497
498 config->abort_threshold = abort_threshold_platform;
499 DBG("Overriding %s to %d",
500 DEFAULT_LTTNG_RELAYD_TCP_KEEP_ALIVE_ABORT_THRESHOLD_ENV,
501 config->abort_threshold);
502 }
503
504 ret = 0;
505
506error:
507 return ret;
508}
509
510/* Initialize the TCP keep-alive configuration. */
511__attribute__((constructor)) static
e4265826 512void tcp_keep_alive_init(void)
f056029c 513{
3d16a92d
SM
514 tcp_keep_alive_init_support(&the_support);
515 (void) tcp_keep_alive_init_config(&the_support, &the_config);
f056029c
JR
516}
517
518/*
519 * Set the socket options regarding TCP keep-alive.
520 */
f056029c
JR
521int socket_apply_keep_alive_config(int socket_fd)
522{
523 int ret;
524 int val = 1;
525
526 /* TCP keep-alive */
3d16a92d 527 if (!the_support.supported || !the_config.enabled) {
f056029c
JR
528 ret = 0;
529 goto end;
530 }
531
cbfbb78f 532 DBG("TCP keep-alive enabled for socket %d", socket_fd);
627e6953 533 ret = setsockopt(socket_fd, SOL_SOCKET, SO_KEEPALIVE, &val,
f056029c
JR
534 sizeof(val));
535 if (ret < 0) {
536 PERROR("setsockopt so_keepalive");
537 goto end;
538 }
539
540 /* TCP keep-alive idle time */
3d16a92d
SM
541 if (the_support.idle_time_supported && the_config.idle_time > 0) {
542 DBG("TCP keep-alive keep idle: %d enabled for socket %d",
543 the_config.idle_time, socket_fd);
544 ret = setsockopt(socket_fd, COMPAT_TCP_LEVEL,
545 COMPAT_TCP_KEEPIDLE, &the_config.idle_time,
546 sizeof(the_config.idle_time));
f056029c
JR
547 if (ret < 0) {
548 PERROR("setsockopt TCP_KEEPIDLE");
549 goto end;
550 }
551 }
552 /* TCP keep-alive probe interval */
3d16a92d
SM
553 if (the_support.probe_interval_supported &&
554 the_config.probe_interval > 0) {
555 DBG("TCP keep-alive probe_interval: %d enabled for socket %d",
556 the_config.probe_interval, socket_fd);
557 ret = setsockopt(socket_fd, COMPAT_TCP_LEVEL,
558 COMPAT_TCP_KEEPINTVL,
559 &the_config.probe_interval,
560 sizeof(the_config.probe_interval));
f056029c
JR
561 if (ret < 0) {
562 PERROR("setsockopt TCP_KEEPINTVL");
563 goto end;
564 }
565 }
566
567 /* TCP keep-alive max probe count */
3d16a92d
SM
568 if (the_support.max_probe_count_supported &&
569 the_config.max_probe_count > 0) {
570 DBG("TCP keep-alive max_probe: %d enabled for socket %d",
571 the_config.max_probe_count, socket_fd);
572 ret = setsockopt(socket_fd, COMPAT_TCP_LEVEL,
573 COMPAT_TCP_KEEPCNT, &the_config.max_probe_count,
574 sizeof(the_config.max_probe_count));
f056029c
JR
575 if (ret < 0) {
576 PERROR("setsockopt TCP_KEEPCNT");
577 goto end;
578 }
579 }
580
581 /* TCP keep-alive abort threshold */
3d16a92d
SM
582 if (the_support.abort_threshold_supported &&
583 the_config.abort_threshold > 0) {
584 DBG("TCP keep-alive abort threshold: %d enabled for socket %d",
585 the_config.abort_threshold, socket_fd);
586 ret = setsockopt(socket_fd, COMPAT_TCP_LEVEL,
587 COMPAT_TCP_ABORT_THRESHOLD,
588 &the_config.abort_threshold,
589 sizeof(the_config.max_probe_count));
f056029c
JR
590 if (ret < 0) {
591 PERROR("setsockopt TCP_KEEPALIVE_ABORT_THRESHOLD");
592 goto end;
593 }
594 }
595end:
596 return ret;
597}
This page took 0.0658879999999999 seconds and 4 git commands to generate.