94bf4ab9332742f42ea3c8e8872f6ea545f253f9
[lttng-tools.git] / src / bin / lttng-relayd / tcp_keep_alive.cpp
1 /*
2 * Copyright (C) 2017 Jonathan Rajotte <jonathan.rajotte-julien@efficios.com>
3 *
4 * SPDX-License-Identifier: GPL-2.0-only
5 *
6 */
7
8 #include <sys/types.h>
9 #include <netinet/tcp.h>
10 #include <stdbool.h>
11 #include <sys/socket.h>
12 #include <limits.h>
13
14 #include <common/compat/getenv.h>
15 #include <common/time.h>
16 #include <common/defaults.h>
17 #include <common/config/session-config.h>
18
19 #include "tcp_keep_alive.h"
20
21 #define SOLARIS_IDLE_TIME_MIN_S 10
22 #define SOLARIS_IDLE_TIME_MAX_S 864000 /* 10 days */
23 #define SOLARIS_ABORT_THRESHOLD_MIN_S 1
24 #define SOLARIS_ABORT_THRESHOLD_MAX_S 480 /* 8 minutes */
25
26 /* Per-platform definitions of TCP socket options. */
27 #if defined (__linux__)
28
29 #define COMPAT_TCP_LEVEL SOL_TCP
30 #define COMPAT_TCP_ABORT_THRESHOLD 0 /* Does not exist on linux. */
31 #define COMPAT_TCP_KEEPIDLE TCP_KEEPIDLE
32 #define COMPAT_TCP_KEEPINTVL TCP_KEEPINTVL
33 #define COMPAT_TCP_KEEPCNT TCP_KEEPCNT
34
35 #elif defined (__sun__) /* ! defined (__linux__) */
36
37 #define COMPAT_TCP_LEVEL IPPROTO_TCP
38
39 #ifdef TCP_KEEPALIVE_THRESHOLD
40 #define COMPAT_TCP_KEEPIDLE TCP_KEEPALIVE_THRESHOLD
41 #else /* ! defined (TCP_KEEPALIVE_THRESHOLD) */
42 #define COMPAT_TCP_KEEPIDLE 0
43 #endif /* TCP_KEEPALIVE_THRESHOLD */
44
45 #ifdef TCP_KEEPALIVE_ABORT_THRESHOLD
46 #define COMPAT_TCP_ABORT_THRESHOLD TCP_KEEPALIVE_ABORT_THRESHOLD
47 #else /* ! defined (TCP_KEEPALIVE_ABORT_THRESHOLD) */
48 #define COMPAT_TCP_ABORT_THRESHOLD 0
49 #endif /* TCP_KEEPALIVE_ABORT_THRESHOLD */
50
51 #define COMPAT_TCP_KEEPINTVL 0 /* Does not exist on Solaris. */
52 #define COMPAT_TCP_KEEPCNT 0 /* Does not exist on Solaris. */
53
54 #else /* ! defined (__linux__) && ! defined (__sun__) */
55
56 #define COMPAT_TCP_LEVEL 0
57 #define COMPAT_TCP_ABORT_THRESHOLD 0
58 #define COMPAT_TCP_KEEPIDLE 0
59 #define COMPAT_TCP_KEEPINTVL 0
60 #define COMPAT_TCP_KEEPCNT 0
61
62 #endif /* ! defined (__linux__) && ! defined (__sun__) */
63
64 struct tcp_keep_alive_support {
65 /* TCP keep-alive is supported by this platform. */
66 bool supported;
67 /* Overriding idle-time per socket is supported by this platform. */
68 bool idle_time_supported;
69 /*
70 * Overriding probe interval per socket is supported by this
71 * platform.
72 */
73 bool probe_interval_supported;
74 /*
75 * Configuring max probe count per socket is supported by this
76 * platform.
77 */
78 bool max_probe_count_supported;
79 /* Overriding on a per-socket basis is supported by this platform. */
80 bool abort_threshold_supported;
81 };
82
83 struct tcp_keep_alive_config {
84 /* Maps to the LTTNG_RELAYD_TCP_KEEP_ALIVE_ENV environment variable. */
85 bool enabled;
86 /*
87 * Maps to the LTTNG_RELAYD_TCP_KEEP_ALIVE_IDLE_TIME_ENV environment
88 * variable.
89 */
90 int idle_time;
91 /*
92 * Maps to the LTTNG_RELAYD_TCP_KEEP_ALIVE_PROBE_INTERVAL_ENV
93 * environment variable.
94 */
95 int probe_interval;
96 /*
97 * Maps to the LTTNG_RELAYD_TCP_KEEP_ALIVE_MAX_PROBE_COUNT_ENV
98 * environment variable.
99 */
100 int max_probe_count;
101 /*
102 * Maps to the LTTNG_RELAYD_TCP_KEEP_ALIVE_ABORT_THRESHOLD_ENV
103 * environment variable.
104 */
105 int abort_threshold;
106 };
107
108 static struct tcp_keep_alive_config the_config = {.enabled = false,
109 .idle_time = -1,
110 .probe_interval = -1,
111 .max_probe_count = -1,
112 .abort_threshold = -1};
113
114 static struct tcp_keep_alive_support the_support = {.supported = false,
115 .idle_time_supported = false,
116 .probe_interval_supported = false,
117 .max_probe_count_supported = false,
118 .abort_threshold_supported = false};
119
120 /*
121 * Common parser for string to positive int conversion where the value must be
122 * in range [-1, INT_MAX].
123 *
124 * Returns -2 on invalid value.
125 */
126 static
127 int get_env_int(const char *env_var,
128 const char *value)
129 {
130 int ret;
131 long tmp;
132 char *endptr = NULL;
133
134 errno = 0;
135 tmp = strtol(value, &endptr, 0);
136 if (errno != 0) {
137 ERR("%s cannot be parsed.", env_var);
138 PERROR("errno for previous parsing failure");
139 ret = -2;
140 goto end;
141 }
142
143 if (endptr == value || *endptr != '\0') {
144 ERR("%s is not a valid number", env_var);
145 ret = -1;
146 goto end;
147 }
148
149 if (tmp < -1) {
150 ERR("%s must be greater or equal to -1", env_var);
151 ret = -2;
152 goto end;
153 }
154 if (tmp > INT_MAX){
155 ERR("%s is too big. Maximum value is %d", env_var, INT_MAX);
156 ret = -2;
157 goto end;
158 }
159
160 ret = (int) tmp;
161 end:
162 return ret;
163 }
164
165 /*
166 * Per-platform implementation of tcp_keep_alive_idle_time_modifier.
167 * Returns -2 on invalid value.
168 */
169 #ifdef __sun__
170
171 static
172 int convert_idle_time(int value)
173 {
174 int ret;
175 unsigned int tmp_ms;
176
177 if (value == -1 || value == 0) {
178 /* Use system defaults */
179 ret = value;
180 goto end;
181 }
182
183 if (value < 0) {
184 ERR("Invalid tcp keep-alive idle time (%i)", value);
185 ret = -2;
186 goto end;
187 }
188
189 /*
190 * Additional constraints for Solaris 11.
191 * Minimum 10s, maximum 10 days. Defined by
192 * https://docs.oracle.com/cd/E23824_01/html/821-1475/tcp-7p.html#REFMAN7tcp-7p
193 */
194 if ((value < SOLARIS_IDLE_TIME_MIN_S ||
195 value > SOLARIS_IDLE_TIME_MAX_S)) {
196 ERR("%s must be comprised between %d and %d inclusively on Solaris",
197 DEFAULT_LTTNG_RELAYD_TCP_KEEP_ALIVE_IDLE_TIME_ENV,
198 SOLARIS_IDLE_TIME_MIN_S,
199 SOLARIS_IDLE_TIME_MAX_S);
200 ret = -2;
201 goto end;
202 }
203
204 /* On Solaris idle time is given in milliseconds. */
205 tmp_ms = ((unsigned int) value) * MSEC_PER_SEC;
206 if ((value != 0 && (tmp_ms / ((unsigned int) value)) != MSEC_PER_SEC)
207 || tmp_ms > INT_MAX) {
208 /* Overflow. */
209 const int max_value = INT_MAX / MSEC_PER_SEC;
210
211 ERR("%s is too big: maximum supported value is %d",
212 DEFAULT_LTTNG_RELAYD_TCP_KEEP_ALIVE_IDLE_TIME_ENV,
213 max_value);
214 ret = -2;
215 goto end;
216 }
217
218 /* tmp_ms is >= 0 and <= INT_MAX. Cast is safe. */
219 ret = (int) tmp_ms;
220 end:
221 return ret;
222 }
223
224 #else /* ! defined(__sun__) */
225
226 static
227 int convert_idle_time(int value)
228 {
229 return value;
230 }
231
232 #endif /* ! defined(__sun__) */
233
234 /* Per-platform support of tcp_keep_alive functionality. */
235 #if defined (__linux__)
236
237 static
238 void tcp_keep_alive_init_support(struct tcp_keep_alive_support *support)
239 {
240 support->supported = true;
241 support->idle_time_supported = true;
242 support->probe_interval_supported = true;
243 support->max_probe_count_supported = true;
244 /* Solaris specific */
245 support->abort_threshold_supported = false;
246 }
247
248 #elif defined(__sun__) /* ! defined (__linux__) */
249
250 static
251 void tcp_keep_alive_init_support(struct tcp_keep_alive_support *support)
252 {
253 support->supported = true;
254 #ifdef TCP_KEEPALIVE_THRESHOLD
255 support->idle_time_supported = true;
256 #else
257 support->idle_time_supported = false;;
258 #endif /* TCP_KEEPALIVE_THRESHOLD */
259
260 /*
261 * Solaris does not support either tcp_keepalive_probes or
262 * tcp_keepalive_intvl.
263 * Inferring a value for TCP_KEEP_ALIVE_ABORT_THRESHOLD using
264 * (tcp_keepalive_probes * tcp_keepalive_intvl) could yield a good
265 * alternative, but Solaris does not detail the algorithm used (such as
266 * constant time retry like Linux).
267 *
268 * Ignore those settings on Solaris 11. We prefer exposing an
269 * environment variable only used on Solaris for the abort threshold.
270 */
271 support->probe_interval_supported = false;
272 support->max_probe_count_supported = false;
273 #ifdef TCP_KEEPALIVE_ABORT_THRESHOLD
274 support->abort_threshold_supported = true;
275 #else
276 support->abort_threshold_supported = false;
277 #endif /* TCP_KEEPALIVE_THRESHOLD */
278 }
279
280 #else /* ! defined(__sun__) && ! defined(__linux__) */
281
282 /* Assume nothing is supported on other platforms. */
283 static
284 void tcp_keep_alive_init_support(struct tcp_keep_alive_support *support)
285 {
286 support->supported = false;
287 support->idle_time_supported = false;
288 support->probe_interval_supported = false;
289 support->max_probe_count_supported = false;
290 support->abort_threshold_supported = false;
291 }
292
293 #endif /* ! defined(__sun__) && ! defined(__linux__) */
294
295 #ifdef __sun__
296
297 /*
298 * Solaris specific modifier for abort threshold.
299 * Return -2 on error.
300 */
301 static
302 int convert_abort_threshold(int value)
303 {
304 int ret;
305 unsigned int tmp_ms;
306
307 if (value == -1) {
308 /* Use system defaults */
309 ret = value;
310 goto end;
311 }
312
313 if (value < 0) {
314 ERR("Invalid tcp keep-alive abort threshold (%i)", value);
315 ret = -2;
316 goto end;
317 }
318
319 /*
320 * Additional constraints for Solaris 11.
321 *
322 * Between 0 and 8 minutes.
323 * https://docs.oracle.com/cd/E19120-01/open.solaris/819-2724/fsvdh/index.html
324 *
325 * Restrict from 1 seconds to 8 minutes sice the 0 value goes against
326 * the purpose of dead peers detection by never timing out when probing.
327 * It does NOT mean that the connection times out immediately.
328 */
329 if ((value < SOLARIS_ABORT_THRESHOLD_MIN_S || value > SOLARIS_ABORT_THRESHOLD_MAX_S)) {
330 ERR("%s must be comprised between %d and %d inclusively on Solaris",
331 DEFAULT_LTTNG_RELAYD_TCP_KEEP_ALIVE_ABORT_THRESHOLD_ENV,
332 SOLARIS_ABORT_THRESHOLD_MIN_S,
333 SOLARIS_ABORT_THRESHOLD_MAX_S);
334 ret = -2;
335 goto end;
336 }
337
338 /* Abort threshold is given in milliseconds. */
339 tmp_ms = ((unsigned int) value) * MSEC_PER_SEC;
340 if ((value != 0 && (tmp_ms / ((unsigned int) value)) != MSEC_PER_SEC)
341 || tmp_ms > INT_MAX) {
342 /* Overflow */
343 const int max_value = INT_MAX / MSEC_PER_SEC;
344
345 ERR("%s is too big: maximum supported value is %d",
346 DEFAULT_LTTNG_RELAYD_TCP_KEEP_ALIVE_ABORT_THRESHOLD_ENV,
347 max_value);
348 ret = -2;
349 goto end;
350 }
351
352 /* tmp_ms is >= 0 and <= INT_MAX. Cast is safe. */
353 ret = (int) tmp_ms;
354 end:
355 return ret;
356 }
357
358 #else
359
360 static
361 int convert_abort_threshold(int value)
362 {
363 return value;
364 }
365
366 #endif /* defined (__sun__) */
367
368 /*
369 * Retrieve settings from environment variables and warn for settings not
370 * supported by the platform.
371 */
372 static
373 int tcp_keep_alive_init_config(struct tcp_keep_alive_support *support,
374 struct tcp_keep_alive_config *config)
375 {
376 int ret;
377 const char *value;
378
379 value = lttng_secure_getenv(DEFAULT_LTTNG_RELAYD_TCP_KEEP_ALIVE_ENV);
380 if (!support->supported) {
381 if (value) {
382 WARN("Using per-socket TCP keep-alive mechanism is not supported by this platform. Ignoring the %s environment variable.",
383 DEFAULT_LTTNG_RELAYD_TCP_KEEP_ALIVE_ENV);
384 }
385 config->enabled = false;
386 } else if (value) {
387 ret = config_parse_value(value);
388 if (ret < 0 || ret > 1) {
389 ERR("Invalid value for %s", DEFAULT_LTTNG_RELAYD_TCP_KEEP_ALIVE_ENV);
390 ret = 1;
391 goto error;
392 }
393 config->enabled = ret;
394 }
395 DBG("TCP keep-alive mechanism %s", config->enabled ? "enabled": "disabled");
396
397 /* Get value for tcp_keepalive_time in seconds. */
398 value = lttng_secure_getenv(DEFAULT_LTTNG_RELAYD_TCP_KEEP_ALIVE_IDLE_TIME_ENV);
399 if (!support->idle_time_supported && value) {
400 WARN("Overriding the TCP keep-alive idle time threshold per-socket is not supported by this platform. Ignoring the %s environment variable.",
401 DEFAULT_LTTNG_RELAYD_TCP_KEEP_ALIVE_IDLE_TIME_ENV);
402 config->idle_time = -1;
403 } else if (value) {
404 int idle_time_platform;
405 int idle_time_seconds;
406
407 idle_time_seconds = get_env_int(
408 DEFAULT_LTTNG_RELAYD_TCP_KEEP_ALIVE_IDLE_TIME_ENV,
409 value);
410 if (idle_time_seconds < -1) {
411 ret = 1;
412 goto error;
413 }
414
415 idle_time_platform = convert_idle_time(idle_time_seconds);
416 if (idle_time_platform < -1) {
417 ret = 1;
418 goto error;
419 }
420
421 config->idle_time = idle_time_platform;
422 DBG("Overriding %s to %d",
423 DEFAULT_LTTNG_RELAYD_TCP_KEEP_ALIVE_IDLE_TIME_ENV,
424 idle_time_seconds);
425 }
426
427 /* Get value for tcp_keepalive_intvl in seconds. */
428 value = lttng_secure_getenv(
429 DEFAULT_LTTNG_RELAYD_TCP_KEEP_ALIVE_PROBE_INTERVAL_ENV);
430 if (!support->probe_interval_supported && value) {
431 WARN("Overriding the TCP keep-alive probe interval time per-socket is not supported by this platform. Ignoring the %s environment variable.",
432 DEFAULT_LTTNG_RELAYD_TCP_KEEP_ALIVE_PROBE_INTERVAL_ENV);
433 config->probe_interval = -1;
434 } else if (value) {
435 int probe_interval;
436
437 probe_interval = get_env_int(DEFAULT_LTTNG_RELAYD_TCP_KEEP_ALIVE_PROBE_INTERVAL_ENV,
438 value);
439 if (probe_interval < -1) {
440 ret = 1;
441 goto error;
442 }
443
444 config->probe_interval = probe_interval;
445 DBG("Overriding %s to %d",
446 DEFAULT_LTTNG_RELAYD_TCP_KEEP_ALIVE_PROBE_INTERVAL_ENV,
447 config->probe_interval);
448 }
449
450 /* Get value for tcp_keepalive_probes. */
451 value = lttng_secure_getenv(DEFAULT_LTTNG_RELAYD_TCP_KEEP_ALIVE_MAX_PROBE_COUNT_ENV);
452 if (!support->max_probe_count_supported && value) {
453 WARN("Overriding the TCP keep-alive maximum probe count per-socket is not supported by this platform. Ignoring the %s environment variable.",
454 DEFAULT_LTTNG_RELAYD_TCP_KEEP_ALIVE_MAX_PROBE_COUNT_ENV);
455 config->max_probe_count = -1;
456 } else if (value) {
457 int max_probe_count;
458
459 max_probe_count = get_env_int(DEFAULT_LTTNG_RELAYD_TCP_KEEP_ALIVE_MAX_PROBE_COUNT_ENV,
460 value);
461 if (max_probe_count < -1) {
462 ret = 1;
463 goto error;
464 }
465
466 config->max_probe_count = max_probe_count;
467 DBG("Overriding %s to %d",
468 DEFAULT_LTTNG_RELAYD_TCP_KEEP_ALIVE_MAX_PROBE_COUNT_ENV,
469 config->max_probe_count);
470 }
471
472 /* Get value for tcp_keepalive_abort_interval. */
473 value = lttng_secure_getenv(
474 DEFAULT_LTTNG_RELAYD_TCP_KEEP_ALIVE_ABORT_THRESHOLD_ENV);
475 if (!support->abort_threshold_supported && value) {
476 WARN("Overriding the TCP keep-alive abort threshold per-socket is not supported by this platform. Ignoring the %s environment variable.",
477 DEFAULT_LTTNG_RELAYD_TCP_KEEP_ALIVE_ABORT_THRESHOLD_ENV);
478 config->abort_threshold = -1;
479 } else if (value) {
480 int abort_threshold_platform;
481 int abort_threshold_seconds;
482
483 abort_threshold_seconds = get_env_int(
484 DEFAULT_LTTNG_RELAYD_TCP_KEEP_ALIVE_MAX_PROBE_COUNT_ENV,
485 value);
486 if (abort_threshold_seconds < -1) {
487 ret = 1;
488 goto error;
489 }
490
491 abort_threshold_platform = convert_abort_threshold(
492 abort_threshold_seconds);
493 if (abort_threshold_platform < -1) {
494 ret = 1;
495 goto error;
496 }
497
498 config->abort_threshold = abort_threshold_platform;
499 DBG("Overriding %s to %d",
500 DEFAULT_LTTNG_RELAYD_TCP_KEEP_ALIVE_ABORT_THRESHOLD_ENV,
501 config->abort_threshold);
502 }
503
504 ret = 0;
505
506 error:
507 return ret;
508 }
509
510 /* Initialize the TCP keep-alive configuration. */
511 __attribute__((constructor)) static
512 void tcp_keep_alive_init(void)
513 {
514 tcp_keep_alive_init_support(&the_support);
515 (void) tcp_keep_alive_init_config(&the_support, &the_config);
516 }
517
518 /*
519 * Set the socket options regarding TCP keep-alive.
520 */
521 int socket_apply_keep_alive_config(int socket_fd)
522 {
523 int ret;
524 int val = 1;
525
526 /* TCP keep-alive */
527 if (!the_support.supported || !the_config.enabled) {
528 ret = 0;
529 goto end;
530 }
531
532 DBG("TCP keep-alive enabled for socket %d", socket_fd);
533 ret = setsockopt(socket_fd, SOL_SOCKET, SO_KEEPALIVE, &val,
534 sizeof(val));
535 if (ret < 0) {
536 PERROR("setsockopt so_keepalive");
537 goto end;
538 }
539
540 /* TCP keep-alive idle time */
541 if (the_support.idle_time_supported && the_config.idle_time > 0) {
542 DBG("TCP keep-alive keep idle: %d enabled for socket %d",
543 the_config.idle_time, socket_fd);
544 ret = setsockopt(socket_fd, COMPAT_TCP_LEVEL,
545 COMPAT_TCP_KEEPIDLE, &the_config.idle_time,
546 sizeof(the_config.idle_time));
547 if (ret < 0) {
548 PERROR("setsockopt TCP_KEEPIDLE");
549 goto end;
550 }
551 }
552 /* TCP keep-alive probe interval */
553 if (the_support.probe_interval_supported &&
554 the_config.probe_interval > 0) {
555 DBG("TCP keep-alive probe_interval: %d enabled for socket %d",
556 the_config.probe_interval, socket_fd);
557 ret = setsockopt(socket_fd, COMPAT_TCP_LEVEL,
558 COMPAT_TCP_KEEPINTVL,
559 &the_config.probe_interval,
560 sizeof(the_config.probe_interval));
561 if (ret < 0) {
562 PERROR("setsockopt TCP_KEEPINTVL");
563 goto end;
564 }
565 }
566
567 /* TCP keep-alive max probe count */
568 if (the_support.max_probe_count_supported &&
569 the_config.max_probe_count > 0) {
570 DBG("TCP keep-alive max_probe: %d enabled for socket %d",
571 the_config.max_probe_count, socket_fd);
572 ret = setsockopt(socket_fd, COMPAT_TCP_LEVEL,
573 COMPAT_TCP_KEEPCNT, &the_config.max_probe_count,
574 sizeof(the_config.max_probe_count));
575 if (ret < 0) {
576 PERROR("setsockopt TCP_KEEPCNT");
577 goto end;
578 }
579 }
580
581 /* TCP keep-alive abort threshold */
582 if (the_support.abort_threshold_supported &&
583 the_config.abort_threshold > 0) {
584 DBG("TCP keep-alive abort threshold: %d enabled for socket %d",
585 the_config.abort_threshold, socket_fd);
586 ret = setsockopt(socket_fd, COMPAT_TCP_LEVEL,
587 COMPAT_TCP_ABORT_THRESHOLD,
588 &the_config.abort_threshold,
589 sizeof(the_config.max_probe_count));
590 if (ret < 0) {
591 PERROR("setsockopt TCP_KEEPALIVE_ABORT_THRESHOLD");
592 goto end;
593 }
594 }
595 end:
596 return ret;
597 }
This page took 0.039255 seconds and 3 git commands to generate.