Tests: Add test to check shared-memory FD leaks after relayd dies
[lttng-tools.git] / src / bin / lttng-relayd / tcp_keep_alive.cpp
1 /*
2 * Copyright (C) 2017 Jonathan Rajotte <jonathan.rajotte-julien@efficios.com>
3 *
4 * SPDX-License-Identifier: GPL-2.0-only
5 *
6 */
7
8 #include <sys/types.h>
9 #include <netinet/tcp.h>
10 #include <stdbool.h>
11 #include <sys/socket.h>
12 #include <limits.h>
13
14 #include <common/compat/getenv.hpp>
15 #include <common/time.hpp>
16 #include <common/defaults.hpp>
17 #include <common/ini-config/ini-config.hpp>
18
19 #include "tcp_keep_alive.hpp"
20
21 #define SOLARIS_IDLE_TIME_MIN_S 10
22 #define SOLARIS_IDLE_TIME_MAX_S 864000 /* 10 days */
23 #define SOLARIS_ABORT_THRESHOLD_MIN_S 1
24 #define SOLARIS_ABORT_THRESHOLD_MAX_S 480 /* 8 minutes */
25
26 /* Per-platform definitions of TCP socket options. */
27 #if defined (__linux__)
28
29 #define COMPAT_TCP_LEVEL SOL_TCP
30 #define COMPAT_TCP_ABORT_THRESHOLD 0 /* Does not exist on linux. */
31 #define COMPAT_TCP_KEEPIDLE TCP_KEEPIDLE
32 #define COMPAT_TCP_KEEPINTVL TCP_KEEPINTVL
33 #define COMPAT_TCP_KEEPCNT TCP_KEEPCNT
34
35 #elif defined (__sun__) /* ! defined (__linux__) */
36
37 #define COMPAT_TCP_LEVEL IPPROTO_TCP
38
39 #ifdef TCP_KEEPALIVE_THRESHOLD
40 #define COMPAT_TCP_KEEPIDLE TCP_KEEPALIVE_THRESHOLD
41 #else /* ! defined (TCP_KEEPALIVE_THRESHOLD) */
42 #define COMPAT_TCP_KEEPIDLE 0
43 #endif /* TCP_KEEPALIVE_THRESHOLD */
44
45 #ifdef TCP_KEEPALIVE_ABORT_THRESHOLD
46 #define COMPAT_TCP_ABORT_THRESHOLD TCP_KEEPALIVE_ABORT_THRESHOLD
47 #else /* ! defined (TCP_KEEPALIVE_ABORT_THRESHOLD) */
48 #define COMPAT_TCP_ABORT_THRESHOLD 0
49 #endif /* TCP_KEEPALIVE_ABORT_THRESHOLD */
50
51 #define COMPAT_TCP_KEEPINTVL 0 /* Does not exist on Solaris. */
52 #define COMPAT_TCP_KEEPCNT 0 /* Does not exist on Solaris. */
53
54 #else /* ! defined (__linux__) && ! defined (__sun__) */
55
56 #define COMPAT_TCP_LEVEL 0
57 #define COMPAT_TCP_ABORT_THRESHOLD 0
58 #define COMPAT_TCP_KEEPIDLE 0
59 #define COMPAT_TCP_KEEPINTVL 0
60 #define COMPAT_TCP_KEEPCNT 0
61
62 #endif /* ! defined (__linux__) && ! defined (__sun__) */
63
64 namespace {
65 struct tcp_keep_alive_support {
66 /* TCP keep-alive is supported by this platform. */
67 bool supported;
68 /* Overriding idle-time per socket is supported by this platform. */
69 bool idle_time_supported;
70 /*
71 * Overriding probe interval per socket is supported by this
72 * platform.
73 */
74 bool probe_interval_supported;
75 /*
76 * Configuring max probe count per socket is supported by this
77 * platform.
78 */
79 bool max_probe_count_supported;
80 /* Overriding on a per-socket basis is supported by this platform. */
81 bool abort_threshold_supported;
82 };
83
84 struct tcp_keep_alive_config {
85 /* Maps to the LTTNG_RELAYD_TCP_KEEP_ALIVE_ENV environment variable. */
86 bool enabled;
87 /*
88 * Maps to the LTTNG_RELAYD_TCP_KEEP_ALIVE_IDLE_TIME_ENV environment
89 * variable.
90 */
91 int idle_time;
92 /*
93 * Maps to the LTTNG_RELAYD_TCP_KEEP_ALIVE_PROBE_INTERVAL_ENV
94 * environment variable.
95 */
96 int probe_interval;
97 /*
98 * Maps to the LTTNG_RELAYD_TCP_KEEP_ALIVE_MAX_PROBE_COUNT_ENV
99 * environment variable.
100 */
101 int max_probe_count;
102 /*
103 * Maps to the LTTNG_RELAYD_TCP_KEEP_ALIVE_ABORT_THRESHOLD_ENV
104 * environment variable.
105 */
106 int abort_threshold;
107 };
108
109 struct tcp_keep_alive_config the_config = {.enabled = false,
110 .idle_time = -1,
111 .probe_interval = -1,
112 .max_probe_count = -1,
113 .abort_threshold = -1};
114
115 struct tcp_keep_alive_support the_support = {.supported = false,
116 .idle_time_supported = false,
117 .probe_interval_supported = false,
118 .max_probe_count_supported = false,
119 .abort_threshold_supported = false};
120 } /* namespace */
121
122 /*
123 * Common parser for string to positive int conversion where the value must be
124 * in range [-1, INT_MAX].
125 *
126 * Returns -2 on invalid value.
127 */
128 static
129 int get_env_int(const char *env_var,
130 const char *value)
131 {
132 int ret;
133 long tmp;
134 char *endptr = NULL;
135
136 errno = 0;
137 tmp = strtol(value, &endptr, 0);
138 if (errno != 0) {
139 ERR("%s cannot be parsed.", env_var);
140 PERROR("errno for previous parsing failure");
141 ret = -2;
142 goto end;
143 }
144
145 if (endptr == value || *endptr != '\0') {
146 ERR("%s is not a valid number", env_var);
147 ret = -1;
148 goto end;
149 }
150
151 if (tmp < -1) {
152 ERR("%s must be greater or equal to -1", env_var);
153 ret = -2;
154 goto end;
155 }
156 if (tmp > INT_MAX){
157 ERR("%s is too big. Maximum value is %d", env_var, INT_MAX);
158 ret = -2;
159 goto end;
160 }
161
162 ret = (int) tmp;
163 end:
164 return ret;
165 }
166
167 /*
168 * Per-platform implementation of tcp_keep_alive_idle_time_modifier.
169 * Returns -2 on invalid value.
170 */
171 #ifdef __sun__
172
173 static
174 int convert_idle_time(int value)
175 {
176 int ret;
177 unsigned int tmp_ms;
178
179 if (value == -1 || value == 0) {
180 /* Use system defaults */
181 ret = value;
182 goto end;
183 }
184
185 if (value < 0) {
186 ERR("Invalid tcp keep-alive idle time (%i)", value);
187 ret = -2;
188 goto end;
189 }
190
191 /*
192 * Additional constraints for Solaris 11.
193 * Minimum 10s, maximum 10 days. Defined by
194 * https://docs.oracle.com/cd/E23824_01/html/821-1475/tcp-7p.html#REFMAN7tcp-7p
195 */
196 if ((value < SOLARIS_IDLE_TIME_MIN_S ||
197 value > SOLARIS_IDLE_TIME_MAX_S)) {
198 ERR("%s must be comprised between %d and %d inclusively on Solaris",
199 DEFAULT_LTTNG_RELAYD_TCP_KEEP_ALIVE_IDLE_TIME_ENV,
200 SOLARIS_IDLE_TIME_MIN_S,
201 SOLARIS_IDLE_TIME_MAX_S);
202 ret = -2;
203 goto end;
204 }
205
206 /* On Solaris idle time is given in milliseconds. */
207 tmp_ms = ((unsigned int) value) * MSEC_PER_SEC;
208 if ((value != 0 && (tmp_ms / ((unsigned int) value)) != MSEC_PER_SEC)
209 || tmp_ms > INT_MAX) {
210 /* Overflow. */
211 const int max_value = INT_MAX / MSEC_PER_SEC;
212
213 ERR("%s is too big: maximum supported value is %d",
214 DEFAULT_LTTNG_RELAYD_TCP_KEEP_ALIVE_IDLE_TIME_ENV,
215 max_value);
216 ret = -2;
217 goto end;
218 }
219
220 /* tmp_ms is >= 0 and <= INT_MAX. Cast is safe. */
221 ret = (int) tmp_ms;
222 end:
223 return ret;
224 }
225
226 #else /* ! defined(__sun__) */
227
228 static
229 int convert_idle_time(int value)
230 {
231 return value;
232 }
233
234 #endif /* ! defined(__sun__) */
235
236 /* Per-platform support of tcp_keep_alive functionality. */
237 #if defined (__linux__)
238
239 static
240 void tcp_keep_alive_init_support(struct tcp_keep_alive_support *support)
241 {
242 support->supported = true;
243 support->idle_time_supported = true;
244 support->probe_interval_supported = true;
245 support->max_probe_count_supported = true;
246 /* Solaris specific */
247 support->abort_threshold_supported = false;
248 }
249
250 #elif defined(__sun__) /* ! defined (__linux__) */
251
252 static
253 void tcp_keep_alive_init_support(struct tcp_keep_alive_support *support)
254 {
255 support->supported = true;
256 #ifdef TCP_KEEPALIVE_THRESHOLD
257 support->idle_time_supported = true;
258 #else
259 support->idle_time_supported = false;;
260 #endif /* TCP_KEEPALIVE_THRESHOLD */
261
262 /*
263 * Solaris does not support either tcp_keepalive_probes or
264 * tcp_keepalive_intvl.
265 * Inferring a value for TCP_KEEP_ALIVE_ABORT_THRESHOLD using
266 * (tcp_keepalive_probes * tcp_keepalive_intvl) could yield a good
267 * alternative, but Solaris does not detail the algorithm used (such as
268 * constant time retry like Linux).
269 *
270 * Ignore those settings on Solaris 11. We prefer exposing an
271 * environment variable only used on Solaris for the abort threshold.
272 */
273 support->probe_interval_supported = false;
274 support->max_probe_count_supported = false;
275 #ifdef TCP_KEEPALIVE_ABORT_THRESHOLD
276 support->abort_threshold_supported = true;
277 #else
278 support->abort_threshold_supported = false;
279 #endif /* TCP_KEEPALIVE_THRESHOLD */
280 }
281
282 #else /* ! defined(__sun__) && ! defined(__linux__) */
283
284 /* Assume nothing is supported on other platforms. */
285 static
286 void tcp_keep_alive_init_support(struct tcp_keep_alive_support *support)
287 {
288 support->supported = false;
289 support->idle_time_supported = false;
290 support->probe_interval_supported = false;
291 support->max_probe_count_supported = false;
292 support->abort_threshold_supported = false;
293 }
294
295 #endif /* ! defined(__sun__) && ! defined(__linux__) */
296
297 #ifdef __sun__
298
299 /*
300 * Solaris specific modifier for abort threshold.
301 * Return -2 on error.
302 */
303 static
304 int convert_abort_threshold(int value)
305 {
306 int ret;
307 unsigned int tmp_ms;
308
309 if (value == -1) {
310 /* Use system defaults */
311 ret = value;
312 goto end;
313 }
314
315 if (value < 0) {
316 ERR("Invalid tcp keep-alive abort threshold (%i)", value);
317 ret = -2;
318 goto end;
319 }
320
321 /*
322 * Additional constraints for Solaris 11.
323 *
324 * Between 0 and 8 minutes.
325 * https://docs.oracle.com/cd/E19120-01/open.solaris/819-2724/fsvdh/index.html
326 *
327 * Restrict from 1 seconds to 8 minutes sice the 0 value goes against
328 * the purpose of dead peers detection by never timing out when probing.
329 * It does NOT mean that the connection times out immediately.
330 */
331 if ((value < SOLARIS_ABORT_THRESHOLD_MIN_S || value > SOLARIS_ABORT_THRESHOLD_MAX_S)) {
332 ERR("%s must be comprised between %d and %d inclusively on Solaris",
333 DEFAULT_LTTNG_RELAYD_TCP_KEEP_ALIVE_ABORT_THRESHOLD_ENV,
334 SOLARIS_ABORT_THRESHOLD_MIN_S,
335 SOLARIS_ABORT_THRESHOLD_MAX_S);
336 ret = -2;
337 goto end;
338 }
339
340 /* Abort threshold is given in milliseconds. */
341 tmp_ms = ((unsigned int) value) * MSEC_PER_SEC;
342 if ((value != 0 && (tmp_ms / ((unsigned int) value)) != MSEC_PER_SEC)
343 || tmp_ms > INT_MAX) {
344 /* Overflow */
345 const int max_value = INT_MAX / MSEC_PER_SEC;
346
347 ERR("%s is too big: maximum supported value is %d",
348 DEFAULT_LTTNG_RELAYD_TCP_KEEP_ALIVE_ABORT_THRESHOLD_ENV,
349 max_value);
350 ret = -2;
351 goto end;
352 }
353
354 /* tmp_ms is >= 0 and <= INT_MAX. Cast is safe. */
355 ret = (int) tmp_ms;
356 end:
357 return ret;
358 }
359
360 #else
361
362 static
363 int convert_abort_threshold(int value)
364 {
365 return value;
366 }
367
368 #endif /* defined (__sun__) */
369
370 /*
371 * Retrieve settings from environment variables and warn for settings not
372 * supported by the platform.
373 */
374 static
375 int tcp_keep_alive_init_config(struct tcp_keep_alive_support *support,
376 struct tcp_keep_alive_config *config)
377 {
378 int ret;
379 const char *value;
380
381 value = lttng_secure_getenv(DEFAULT_LTTNG_RELAYD_TCP_KEEP_ALIVE_ENV);
382 if (!support->supported) {
383 if (value) {
384 WARN("Using per-socket TCP keep-alive mechanism is not supported by this platform. Ignoring the %s environment variable.",
385 DEFAULT_LTTNG_RELAYD_TCP_KEEP_ALIVE_ENV);
386 }
387 config->enabled = false;
388 } else if (value) {
389 ret = config_parse_value(value);
390 if (ret < 0 || ret > 1) {
391 ERR("Invalid value for %s", DEFAULT_LTTNG_RELAYD_TCP_KEEP_ALIVE_ENV);
392 ret = 1;
393 goto error;
394 }
395 config->enabled = ret;
396 }
397 DBG("TCP keep-alive mechanism %s", config->enabled ? "enabled": "disabled");
398
399 /* Get value for tcp_keepalive_time in seconds. */
400 value = lttng_secure_getenv(DEFAULT_LTTNG_RELAYD_TCP_KEEP_ALIVE_IDLE_TIME_ENV);
401 if (!support->idle_time_supported && value) {
402 WARN("Overriding the TCP keep-alive idle time threshold per-socket is not supported by this platform. Ignoring the %s environment variable.",
403 DEFAULT_LTTNG_RELAYD_TCP_KEEP_ALIVE_IDLE_TIME_ENV);
404 config->idle_time = -1;
405 } else if (value) {
406 int idle_time_platform;
407 int idle_time_seconds;
408
409 idle_time_seconds = get_env_int(
410 DEFAULT_LTTNG_RELAYD_TCP_KEEP_ALIVE_IDLE_TIME_ENV,
411 value);
412 if (idle_time_seconds < -1) {
413 ret = 1;
414 goto error;
415 }
416
417 idle_time_platform = convert_idle_time(idle_time_seconds);
418 if (idle_time_platform < -1) {
419 ret = 1;
420 goto error;
421 }
422
423 config->idle_time = idle_time_platform;
424 DBG("Overriding %s to %d",
425 DEFAULT_LTTNG_RELAYD_TCP_KEEP_ALIVE_IDLE_TIME_ENV,
426 idle_time_seconds);
427 }
428
429 /* Get value for tcp_keepalive_intvl in seconds. */
430 value = lttng_secure_getenv(
431 DEFAULT_LTTNG_RELAYD_TCP_KEEP_ALIVE_PROBE_INTERVAL_ENV);
432 if (!support->probe_interval_supported && value) {
433 WARN("Overriding the TCP keep-alive probe interval time per-socket is not supported by this platform. Ignoring the %s environment variable.",
434 DEFAULT_LTTNG_RELAYD_TCP_KEEP_ALIVE_PROBE_INTERVAL_ENV);
435 config->probe_interval = -1;
436 } else if (value) {
437 int probe_interval;
438
439 probe_interval = get_env_int(DEFAULT_LTTNG_RELAYD_TCP_KEEP_ALIVE_PROBE_INTERVAL_ENV,
440 value);
441 if (probe_interval < -1) {
442 ret = 1;
443 goto error;
444 }
445
446 config->probe_interval = probe_interval;
447 DBG("Overriding %s to %d",
448 DEFAULT_LTTNG_RELAYD_TCP_KEEP_ALIVE_PROBE_INTERVAL_ENV,
449 config->probe_interval);
450 }
451
452 /* Get value for tcp_keepalive_probes. */
453 value = lttng_secure_getenv(DEFAULT_LTTNG_RELAYD_TCP_KEEP_ALIVE_MAX_PROBE_COUNT_ENV);
454 if (!support->max_probe_count_supported && value) {
455 WARN("Overriding the TCP keep-alive maximum probe count per-socket is not supported by this platform. Ignoring the %s environment variable.",
456 DEFAULT_LTTNG_RELAYD_TCP_KEEP_ALIVE_MAX_PROBE_COUNT_ENV);
457 config->max_probe_count = -1;
458 } else if (value) {
459 int max_probe_count;
460
461 max_probe_count = get_env_int(DEFAULT_LTTNG_RELAYD_TCP_KEEP_ALIVE_MAX_PROBE_COUNT_ENV,
462 value);
463 if (max_probe_count < -1) {
464 ret = 1;
465 goto error;
466 }
467
468 config->max_probe_count = max_probe_count;
469 DBG("Overriding %s to %d",
470 DEFAULT_LTTNG_RELAYD_TCP_KEEP_ALIVE_MAX_PROBE_COUNT_ENV,
471 config->max_probe_count);
472 }
473
474 /* Get value for tcp_keepalive_abort_interval. */
475 value = lttng_secure_getenv(
476 DEFAULT_LTTNG_RELAYD_TCP_KEEP_ALIVE_ABORT_THRESHOLD_ENV);
477 if (!support->abort_threshold_supported && value) {
478 WARN("Overriding the TCP keep-alive abort threshold per-socket is not supported by this platform. Ignoring the %s environment variable.",
479 DEFAULT_LTTNG_RELAYD_TCP_KEEP_ALIVE_ABORT_THRESHOLD_ENV);
480 config->abort_threshold = -1;
481 } else if (value) {
482 int abort_threshold_platform;
483 int abort_threshold_seconds;
484
485 abort_threshold_seconds = get_env_int(
486 DEFAULT_LTTNG_RELAYD_TCP_KEEP_ALIVE_MAX_PROBE_COUNT_ENV,
487 value);
488 if (abort_threshold_seconds < -1) {
489 ret = 1;
490 goto error;
491 }
492
493 abort_threshold_platform = convert_abort_threshold(
494 abort_threshold_seconds);
495 if (abort_threshold_platform < -1) {
496 ret = 1;
497 goto error;
498 }
499
500 config->abort_threshold = abort_threshold_platform;
501 DBG("Overriding %s to %d",
502 DEFAULT_LTTNG_RELAYD_TCP_KEEP_ALIVE_ABORT_THRESHOLD_ENV,
503 config->abort_threshold);
504 }
505
506 ret = 0;
507
508 error:
509 return ret;
510 }
511
512 /* Initialize the TCP keep-alive configuration. */
513 __attribute__((constructor)) static
514 void tcp_keep_alive_init(void)
515 {
516 tcp_keep_alive_init_support(&the_support);
517 (void) tcp_keep_alive_init_config(&the_support, &the_config);
518 }
519
520 /*
521 * Set the socket options regarding TCP keep-alive.
522 */
523 int socket_apply_keep_alive_config(int socket_fd)
524 {
525 int ret;
526 int val = 1;
527
528 /* TCP keep-alive */
529 if (!the_support.supported || !the_config.enabled) {
530 ret = 0;
531 goto end;
532 }
533
534 DBG("TCP keep-alive enabled for socket %d", socket_fd);
535 ret = setsockopt(socket_fd, SOL_SOCKET, SO_KEEPALIVE, &val,
536 sizeof(val));
537 if (ret < 0) {
538 PERROR("setsockopt so_keepalive");
539 goto end;
540 }
541
542 /* TCP keep-alive idle time */
543 if (the_support.idle_time_supported && the_config.idle_time > 0) {
544 DBG("TCP keep-alive keep idle: %d enabled for socket %d",
545 the_config.idle_time, socket_fd);
546 ret = setsockopt(socket_fd, COMPAT_TCP_LEVEL,
547 COMPAT_TCP_KEEPIDLE, &the_config.idle_time,
548 sizeof(the_config.idle_time));
549 if (ret < 0) {
550 PERROR("setsockopt TCP_KEEPIDLE");
551 goto end;
552 }
553 }
554 /* TCP keep-alive probe interval */
555 if (the_support.probe_interval_supported &&
556 the_config.probe_interval > 0) {
557 DBG("TCP keep-alive probe_interval: %d enabled for socket %d",
558 the_config.probe_interval, socket_fd);
559 ret = setsockopt(socket_fd, COMPAT_TCP_LEVEL,
560 COMPAT_TCP_KEEPINTVL,
561 &the_config.probe_interval,
562 sizeof(the_config.probe_interval));
563 if (ret < 0) {
564 PERROR("setsockopt TCP_KEEPINTVL");
565 goto end;
566 }
567 }
568
569 /* TCP keep-alive max probe count */
570 if (the_support.max_probe_count_supported &&
571 the_config.max_probe_count > 0) {
572 DBG("TCP keep-alive max_probe: %d enabled for socket %d",
573 the_config.max_probe_count, socket_fd);
574 ret = setsockopt(socket_fd, COMPAT_TCP_LEVEL,
575 COMPAT_TCP_KEEPCNT, &the_config.max_probe_count,
576 sizeof(the_config.max_probe_count));
577 if (ret < 0) {
578 PERROR("setsockopt TCP_KEEPCNT");
579 goto end;
580 }
581 }
582
583 /* TCP keep-alive abort threshold */
584 if (the_support.abort_threshold_supported &&
585 the_config.abort_threshold > 0) {
586 DBG("TCP keep-alive abort threshold: %d enabled for socket %d",
587 the_config.abort_threshold, socket_fd);
588 ret = setsockopt(socket_fd, COMPAT_TCP_LEVEL,
589 COMPAT_TCP_ABORT_THRESHOLD,
590 &the_config.abort_threshold,
591 sizeof(the_config.max_probe_count));
592 if (ret < 0) {
593 PERROR("setsockopt TCP_KEEPALIVE_ABORT_THRESHOLD");
594 goto end;
595 }
596 }
597 end:
598 return ret;
599 }
This page took 0.040305 seconds and 4 git commands to generate.