Fix: sessiond: ODR violation results in memory corruption
[lttng-tools.git] / src / bin / lttng-relayd / tcp_keep_alive.cpp
... / ...
CommitLineData
1/*
2 * Copyright (C) 2017 Jonathan Rajotte <jonathan.rajotte-julien@efficios.com>
3 *
4 * SPDX-License-Identifier: GPL-2.0-only
5 *
6 */
7
8#include <sys/types.h>
9#include <netinet/tcp.h>
10#include <stdbool.h>
11#include <sys/socket.h>
12#include <limits.h>
13
14#include <common/compat/getenv.hpp>
15#include <common/time.hpp>
16#include <common/defaults.hpp>
17#include <common/ini-config/ini-config.hpp>
18
19#include "tcp_keep_alive.hpp"
20
21#define SOLARIS_IDLE_TIME_MIN_S 10
22#define SOLARIS_IDLE_TIME_MAX_S 864000 /* 10 days */
23#define SOLARIS_ABORT_THRESHOLD_MIN_S 1
24#define SOLARIS_ABORT_THRESHOLD_MAX_S 480 /* 8 minutes */
25
26/* Per-platform definitions of TCP socket options. */
27#if defined (__linux__)
28
29#define COMPAT_TCP_LEVEL SOL_TCP
30#define COMPAT_TCP_ABORT_THRESHOLD 0 /* Does not exist on linux. */
31#define COMPAT_TCP_KEEPIDLE TCP_KEEPIDLE
32#define COMPAT_TCP_KEEPINTVL TCP_KEEPINTVL
33#define COMPAT_TCP_KEEPCNT TCP_KEEPCNT
34
35#elif defined (__sun__) /* ! defined (__linux__) */
36
37#define COMPAT_TCP_LEVEL IPPROTO_TCP
38
39#ifdef TCP_KEEPALIVE_THRESHOLD
40#define COMPAT_TCP_KEEPIDLE TCP_KEEPALIVE_THRESHOLD
41#else /* ! defined (TCP_KEEPALIVE_THRESHOLD) */
42#define COMPAT_TCP_KEEPIDLE 0
43#endif /* TCP_KEEPALIVE_THRESHOLD */
44
45#ifdef TCP_KEEPALIVE_ABORT_THRESHOLD
46#define COMPAT_TCP_ABORT_THRESHOLD TCP_KEEPALIVE_ABORT_THRESHOLD
47#else /* ! defined (TCP_KEEPALIVE_ABORT_THRESHOLD) */
48#define COMPAT_TCP_ABORT_THRESHOLD 0
49#endif /* TCP_KEEPALIVE_ABORT_THRESHOLD */
50
51#define COMPAT_TCP_KEEPINTVL 0 /* Does not exist on Solaris. */
52#define COMPAT_TCP_KEEPCNT 0 /* Does not exist on Solaris. */
53
54#else /* ! defined (__linux__) && ! defined (__sun__) */
55
56#define COMPAT_TCP_LEVEL 0
57#define COMPAT_TCP_ABORT_THRESHOLD 0
58#define COMPAT_TCP_KEEPIDLE 0
59#define COMPAT_TCP_KEEPINTVL 0
60#define COMPAT_TCP_KEEPCNT 0
61
62#endif /* ! defined (__linux__) && ! defined (__sun__) */
63
64namespace {
65struct tcp_keep_alive_support {
66 /* TCP keep-alive is supported by this platform. */
67 bool supported;
68 /* Overriding idle-time per socket is supported by this platform. */
69 bool idle_time_supported;
70 /*
71 * Overriding probe interval per socket is supported by this
72 * platform.
73 */
74 bool probe_interval_supported;
75 /*
76 * Configuring max probe count per socket is supported by this
77 * platform.
78 */
79 bool max_probe_count_supported;
80 /* Overriding on a per-socket basis is supported by this platform. */
81 bool abort_threshold_supported;
82};
83
84struct tcp_keep_alive_config {
85 /* Maps to the LTTNG_RELAYD_TCP_KEEP_ALIVE_ENV environment variable. */
86 bool enabled;
87 /*
88 * Maps to the LTTNG_RELAYD_TCP_KEEP_ALIVE_IDLE_TIME_ENV environment
89 * variable.
90 */
91 int idle_time;
92 /*
93 * Maps to the LTTNG_RELAYD_TCP_KEEP_ALIVE_PROBE_INTERVAL_ENV
94 * environment variable.
95 */
96 int probe_interval;
97 /*
98 * Maps to the LTTNG_RELAYD_TCP_KEEP_ALIVE_MAX_PROBE_COUNT_ENV
99 * environment variable.
100 */
101 int max_probe_count;
102 /*
103 * Maps to the LTTNG_RELAYD_TCP_KEEP_ALIVE_ABORT_THRESHOLD_ENV
104 * environment variable.
105 */
106 int abort_threshold;
107};
108
109struct tcp_keep_alive_config the_config = {.enabled = false,
110 .idle_time = -1,
111 .probe_interval = -1,
112 .max_probe_count = -1,
113 .abort_threshold = -1};
114
115struct tcp_keep_alive_support the_support = {.supported = false,
116 .idle_time_supported = false,
117 .probe_interval_supported = false,
118 .max_probe_count_supported = false,
119 .abort_threshold_supported = false};
120} /* namespace */
121
122/*
123 * Common parser for string to positive int conversion where the value must be
124 * in range [-1, INT_MAX].
125 *
126 * Returns -2 on invalid value.
127 */
128static
129int get_env_int(const char *env_var,
130 const char *value)
131{
132 int ret;
133 long tmp;
134 char *endptr = NULL;
135
136 errno = 0;
137 tmp = strtol(value, &endptr, 0);
138 if (errno != 0) {
139 ERR("%s cannot be parsed.", env_var);
140 PERROR("errno for previous parsing failure");
141 ret = -2;
142 goto end;
143 }
144
145 if (endptr == value || *endptr != '\0') {
146 ERR("%s is not a valid number", env_var);
147 ret = -1;
148 goto end;
149 }
150
151 if (tmp < -1) {
152 ERR("%s must be greater or equal to -1", env_var);
153 ret = -2;
154 goto end;
155 }
156 if (tmp > INT_MAX){
157 ERR("%s is too big. Maximum value is %d", env_var, INT_MAX);
158 ret = -2;
159 goto end;
160 }
161
162 ret = (int) tmp;
163end:
164 return ret;
165}
166
167/*
168 * Per-platform implementation of tcp_keep_alive_idle_time_modifier.
169 * Returns -2 on invalid value.
170 */
171#ifdef __sun__
172
173static
174int convert_idle_time(int value)
175{
176 int ret;
177 unsigned int tmp_ms;
178
179 if (value == -1 || value == 0) {
180 /* Use system defaults */
181 ret = value;
182 goto end;
183 }
184
185 if (value < 0) {
186 ERR("Invalid tcp keep-alive idle time (%i)", value);
187 ret = -2;
188 goto end;
189 }
190
191 /*
192 * Additional constraints for Solaris 11.
193 * Minimum 10s, maximum 10 days. Defined by
194 * https://docs.oracle.com/cd/E23824_01/html/821-1475/tcp-7p.html#REFMAN7tcp-7p
195 */
196 if ((value < SOLARIS_IDLE_TIME_MIN_S ||
197 value > SOLARIS_IDLE_TIME_MAX_S)) {
198 ERR("%s must be comprised between %d and %d inclusively on Solaris",
199 DEFAULT_LTTNG_RELAYD_TCP_KEEP_ALIVE_IDLE_TIME_ENV,
200 SOLARIS_IDLE_TIME_MIN_S,
201 SOLARIS_IDLE_TIME_MAX_S);
202 ret = -2;
203 goto end;
204 }
205
206 /* On Solaris idle time is given in milliseconds. */
207 tmp_ms = ((unsigned int) value) * MSEC_PER_SEC;
208 if ((value != 0 && (tmp_ms / ((unsigned int) value)) != MSEC_PER_SEC)
209 || tmp_ms > INT_MAX) {
210 /* Overflow. */
211 const int max_value = INT_MAX / MSEC_PER_SEC;
212
213 ERR("%s is too big: maximum supported value is %d",
214 DEFAULT_LTTNG_RELAYD_TCP_KEEP_ALIVE_IDLE_TIME_ENV,
215 max_value);
216 ret = -2;
217 goto end;
218 }
219
220 /* tmp_ms is >= 0 and <= INT_MAX. Cast is safe. */
221 ret = (int) tmp_ms;
222end:
223 return ret;
224}
225
226#else /* ! defined(__sun__) */
227
228static
229int convert_idle_time(int value)
230{
231 return value;
232}
233
234#endif /* ! defined(__sun__) */
235
236/* Per-platform support of tcp_keep_alive functionality. */
237#if defined (__linux__)
238
239static
240void tcp_keep_alive_init_support(struct tcp_keep_alive_support *support)
241{
242 support->supported = true;
243 support->idle_time_supported = true;
244 support->probe_interval_supported = true;
245 support->max_probe_count_supported = true;
246 /* Solaris specific */
247 support->abort_threshold_supported = false;
248}
249
250#elif defined(__sun__) /* ! defined (__linux__) */
251
252static
253void tcp_keep_alive_init_support(struct tcp_keep_alive_support *support)
254{
255 support->supported = true;
256#ifdef TCP_KEEPALIVE_THRESHOLD
257 support->idle_time_supported = true;
258#else
259 support->idle_time_supported = false;;
260#endif /* TCP_KEEPALIVE_THRESHOLD */
261
262 /*
263 * Solaris does not support either tcp_keepalive_probes or
264 * tcp_keepalive_intvl.
265 * Inferring a value for TCP_KEEP_ALIVE_ABORT_THRESHOLD using
266 * (tcp_keepalive_probes * tcp_keepalive_intvl) could yield a good
267 * alternative, but Solaris does not detail the algorithm used (such as
268 * constant time retry like Linux).
269 *
270 * Ignore those settings on Solaris 11. We prefer exposing an
271 * environment variable only used on Solaris for the abort threshold.
272 */
273 support->probe_interval_supported = false;
274 support->max_probe_count_supported = false;
275#ifdef TCP_KEEPALIVE_ABORT_THRESHOLD
276 support->abort_threshold_supported = true;
277#else
278 support->abort_threshold_supported = false;
279#endif /* TCP_KEEPALIVE_THRESHOLD */
280}
281
282#else /* ! defined(__sun__) && ! defined(__linux__) */
283
284/* Assume nothing is supported on other platforms. */
285static
286void tcp_keep_alive_init_support(struct tcp_keep_alive_support *support)
287{
288 support->supported = false;
289 support->idle_time_supported = false;
290 support->probe_interval_supported = false;
291 support->max_probe_count_supported = false;
292 support->abort_threshold_supported = false;
293}
294
295#endif /* ! defined(__sun__) && ! defined(__linux__) */
296
297#ifdef __sun__
298
299/*
300 * Solaris specific modifier for abort threshold.
301 * Return -2 on error.
302 */
303static
304int convert_abort_threshold(int value)
305{
306 int ret;
307 unsigned int tmp_ms;
308
309 if (value == -1) {
310 /* Use system defaults */
311 ret = value;
312 goto end;
313 }
314
315 if (value < 0) {
316 ERR("Invalid tcp keep-alive abort threshold (%i)", value);
317 ret = -2;
318 goto end;
319 }
320
321 /*
322 * Additional constraints for Solaris 11.
323 *
324 * Between 0 and 8 minutes.
325 * https://docs.oracle.com/cd/E19120-01/open.solaris/819-2724/fsvdh/index.html
326 *
327 * Restrict from 1 seconds to 8 minutes sice the 0 value goes against
328 * the purpose of dead peers detection by never timing out when probing.
329 * It does NOT mean that the connection times out immediately.
330 */
331 if ((value < SOLARIS_ABORT_THRESHOLD_MIN_S || value > SOLARIS_ABORT_THRESHOLD_MAX_S)) {
332 ERR("%s must be comprised between %d and %d inclusively on Solaris",
333 DEFAULT_LTTNG_RELAYD_TCP_KEEP_ALIVE_ABORT_THRESHOLD_ENV,
334 SOLARIS_ABORT_THRESHOLD_MIN_S,
335 SOLARIS_ABORT_THRESHOLD_MAX_S);
336 ret = -2;
337 goto end;
338 }
339
340 /* Abort threshold is given in milliseconds. */
341 tmp_ms = ((unsigned int) value) * MSEC_PER_SEC;
342 if ((value != 0 && (tmp_ms / ((unsigned int) value)) != MSEC_PER_SEC)
343 || tmp_ms > INT_MAX) {
344 /* Overflow */
345 const int max_value = INT_MAX / MSEC_PER_SEC;
346
347 ERR("%s is too big: maximum supported value is %d",
348 DEFAULT_LTTNG_RELAYD_TCP_KEEP_ALIVE_ABORT_THRESHOLD_ENV,
349 max_value);
350 ret = -2;
351 goto end;
352 }
353
354 /* tmp_ms is >= 0 and <= INT_MAX. Cast is safe. */
355 ret = (int) tmp_ms;
356end:
357 return ret;
358}
359
360#else
361
362static
363int convert_abort_threshold(int value)
364{
365 return value;
366}
367
368#endif /* defined (__sun__) */
369
370/*
371 * Retrieve settings from environment variables and warn for settings not
372 * supported by the platform.
373 */
374static
375int tcp_keep_alive_init_config(struct tcp_keep_alive_support *support,
376 struct tcp_keep_alive_config *config)
377{
378 int ret;
379 const char *value;
380
381 value = lttng_secure_getenv(DEFAULT_LTTNG_RELAYD_TCP_KEEP_ALIVE_ENV);
382 if (!support->supported) {
383 if (value) {
384 WARN("Using per-socket TCP keep-alive mechanism is not supported by this platform. Ignoring the %s environment variable.",
385 DEFAULT_LTTNG_RELAYD_TCP_KEEP_ALIVE_ENV);
386 }
387 config->enabled = false;
388 } else if (value) {
389 ret = config_parse_value(value);
390 if (ret < 0 || ret > 1) {
391 ERR("Invalid value for %s", DEFAULT_LTTNG_RELAYD_TCP_KEEP_ALIVE_ENV);
392 ret = 1;
393 goto error;
394 }
395 config->enabled = ret;
396 }
397 DBG("TCP keep-alive mechanism %s", config->enabled ? "enabled": "disabled");
398
399 /* Get value for tcp_keepalive_time in seconds. */
400 value = lttng_secure_getenv(DEFAULT_LTTNG_RELAYD_TCP_KEEP_ALIVE_IDLE_TIME_ENV);
401 if (!support->idle_time_supported && value) {
402 WARN("Overriding the TCP keep-alive idle time threshold per-socket is not supported by this platform. Ignoring the %s environment variable.",
403 DEFAULT_LTTNG_RELAYD_TCP_KEEP_ALIVE_IDLE_TIME_ENV);
404 config->idle_time = -1;
405 } else if (value) {
406 int idle_time_platform;
407 int idle_time_seconds;
408
409 idle_time_seconds = get_env_int(
410 DEFAULT_LTTNG_RELAYD_TCP_KEEP_ALIVE_IDLE_TIME_ENV,
411 value);
412 if (idle_time_seconds < -1) {
413 ret = 1;
414 goto error;
415 }
416
417 idle_time_platform = convert_idle_time(idle_time_seconds);
418 if (idle_time_platform < -1) {
419 ret = 1;
420 goto error;
421 }
422
423 config->idle_time = idle_time_platform;
424 DBG("Overriding %s to %d",
425 DEFAULT_LTTNG_RELAYD_TCP_KEEP_ALIVE_IDLE_TIME_ENV,
426 idle_time_seconds);
427 }
428
429 /* Get value for tcp_keepalive_intvl in seconds. */
430 value = lttng_secure_getenv(
431 DEFAULT_LTTNG_RELAYD_TCP_KEEP_ALIVE_PROBE_INTERVAL_ENV);
432 if (!support->probe_interval_supported && value) {
433 WARN("Overriding the TCP keep-alive probe interval time per-socket is not supported by this platform. Ignoring the %s environment variable.",
434 DEFAULT_LTTNG_RELAYD_TCP_KEEP_ALIVE_PROBE_INTERVAL_ENV);
435 config->probe_interval = -1;
436 } else if (value) {
437 int probe_interval;
438
439 probe_interval = get_env_int(DEFAULT_LTTNG_RELAYD_TCP_KEEP_ALIVE_PROBE_INTERVAL_ENV,
440 value);
441 if (probe_interval < -1) {
442 ret = 1;
443 goto error;
444 }
445
446 config->probe_interval = probe_interval;
447 DBG("Overriding %s to %d",
448 DEFAULT_LTTNG_RELAYD_TCP_KEEP_ALIVE_PROBE_INTERVAL_ENV,
449 config->probe_interval);
450 }
451
452 /* Get value for tcp_keepalive_probes. */
453 value = lttng_secure_getenv(DEFAULT_LTTNG_RELAYD_TCP_KEEP_ALIVE_MAX_PROBE_COUNT_ENV);
454 if (!support->max_probe_count_supported && value) {
455 WARN("Overriding the TCP keep-alive maximum probe count per-socket is not supported by this platform. Ignoring the %s environment variable.",
456 DEFAULT_LTTNG_RELAYD_TCP_KEEP_ALIVE_MAX_PROBE_COUNT_ENV);
457 config->max_probe_count = -1;
458 } else if (value) {
459 int max_probe_count;
460
461 max_probe_count = get_env_int(DEFAULT_LTTNG_RELAYD_TCP_KEEP_ALIVE_MAX_PROBE_COUNT_ENV,
462 value);
463 if (max_probe_count < -1) {
464 ret = 1;
465 goto error;
466 }
467
468 config->max_probe_count = max_probe_count;
469 DBG("Overriding %s to %d",
470 DEFAULT_LTTNG_RELAYD_TCP_KEEP_ALIVE_MAX_PROBE_COUNT_ENV,
471 config->max_probe_count);
472 }
473
474 /* Get value for tcp_keepalive_abort_interval. */
475 value = lttng_secure_getenv(
476 DEFAULT_LTTNG_RELAYD_TCP_KEEP_ALIVE_ABORT_THRESHOLD_ENV);
477 if (!support->abort_threshold_supported && value) {
478 WARN("Overriding the TCP keep-alive abort threshold per-socket is not supported by this platform. Ignoring the %s environment variable.",
479 DEFAULT_LTTNG_RELAYD_TCP_KEEP_ALIVE_ABORT_THRESHOLD_ENV);
480 config->abort_threshold = -1;
481 } else if (value) {
482 int abort_threshold_platform;
483 int abort_threshold_seconds;
484
485 abort_threshold_seconds = get_env_int(
486 DEFAULT_LTTNG_RELAYD_TCP_KEEP_ALIVE_MAX_PROBE_COUNT_ENV,
487 value);
488 if (abort_threshold_seconds < -1) {
489 ret = 1;
490 goto error;
491 }
492
493 abort_threshold_platform = convert_abort_threshold(
494 abort_threshold_seconds);
495 if (abort_threshold_platform < -1) {
496 ret = 1;
497 goto error;
498 }
499
500 config->abort_threshold = abort_threshold_platform;
501 DBG("Overriding %s to %d",
502 DEFAULT_LTTNG_RELAYD_TCP_KEEP_ALIVE_ABORT_THRESHOLD_ENV,
503 config->abort_threshold);
504 }
505
506 ret = 0;
507
508error:
509 return ret;
510}
511
512/* Initialize the TCP keep-alive configuration. */
513__attribute__((constructor)) static
514void tcp_keep_alive_init(void)
515{
516 tcp_keep_alive_init_support(&the_support);
517 (void) tcp_keep_alive_init_config(&the_support, &the_config);
518}
519
520/*
521 * Set the socket options regarding TCP keep-alive.
522 */
523int socket_apply_keep_alive_config(int socket_fd)
524{
525 int ret;
526 int val = 1;
527
528 /* TCP keep-alive */
529 if (!the_support.supported || !the_config.enabled) {
530 ret = 0;
531 goto end;
532 }
533
534 DBG("TCP keep-alive enabled for socket %d", socket_fd);
535 ret = setsockopt(socket_fd, SOL_SOCKET, SO_KEEPALIVE, &val,
536 sizeof(val));
537 if (ret < 0) {
538 PERROR("setsockopt so_keepalive");
539 goto end;
540 }
541
542 /* TCP keep-alive idle time */
543 if (the_support.idle_time_supported && the_config.idle_time > 0) {
544 DBG("TCP keep-alive keep idle: %d enabled for socket %d",
545 the_config.idle_time, socket_fd);
546 ret = setsockopt(socket_fd, COMPAT_TCP_LEVEL,
547 COMPAT_TCP_KEEPIDLE, &the_config.idle_time,
548 sizeof(the_config.idle_time));
549 if (ret < 0) {
550 PERROR("setsockopt TCP_KEEPIDLE");
551 goto end;
552 }
553 }
554 /* TCP keep-alive probe interval */
555 if (the_support.probe_interval_supported &&
556 the_config.probe_interval > 0) {
557 DBG("TCP keep-alive probe_interval: %d enabled for socket %d",
558 the_config.probe_interval, socket_fd);
559 ret = setsockopt(socket_fd, COMPAT_TCP_LEVEL,
560 COMPAT_TCP_KEEPINTVL,
561 &the_config.probe_interval,
562 sizeof(the_config.probe_interval));
563 if (ret < 0) {
564 PERROR("setsockopt TCP_KEEPINTVL");
565 goto end;
566 }
567 }
568
569 /* TCP keep-alive max probe count */
570 if (the_support.max_probe_count_supported &&
571 the_config.max_probe_count > 0) {
572 DBG("TCP keep-alive max_probe: %d enabled for socket %d",
573 the_config.max_probe_count, socket_fd);
574 ret = setsockopt(socket_fd, COMPAT_TCP_LEVEL,
575 COMPAT_TCP_KEEPCNT, &the_config.max_probe_count,
576 sizeof(the_config.max_probe_count));
577 if (ret < 0) {
578 PERROR("setsockopt TCP_KEEPCNT");
579 goto end;
580 }
581 }
582
583 /* TCP keep-alive abort threshold */
584 if (the_support.abort_threshold_supported &&
585 the_config.abort_threshold > 0) {
586 DBG("TCP keep-alive abort threshold: %d enabled for socket %d",
587 the_config.abort_threshold, socket_fd);
588 ret = setsockopt(socket_fd, COMPAT_TCP_LEVEL,
589 COMPAT_TCP_ABORT_THRESHOLD,
590 &the_config.abort_threshold,
591 sizeof(the_config.max_probe_count));
592 if (ret < 0) {
593 PERROR("setsockopt TCP_KEEPALIVE_ABORT_THRESHOLD");
594 goto end;
595 }
596 }
597end:
598 return ret;
599}
This page took 0.024429 seconds and 4 git commands to generate.