Run clang-format on the whole tree
[lttng-tools.git] / src / bin / lttng-relayd / tcp_keep_alive.cpp
1 /*
2 * Copyright (C) 2017 Jonathan Rajotte <jonathan.rajotte-julien@efficios.com>
3 *
4 * SPDX-License-Identifier: GPL-2.0-only
5 *
6 */
7
8 #include "tcp_keep_alive.hpp"
9
10 #include <common/compat/getenv.hpp>
11 #include <common/defaults.hpp>
12 #include <common/ini-config/ini-config.hpp>
13 #include <common/time.hpp>
14
15 #include <limits.h>
16 #include <netinet/tcp.h>
17 #include <stdbool.h>
18 #include <sys/socket.h>
19 #include <sys/types.h>
20
21 #define SOLARIS_IDLE_TIME_MIN_S 10
22 #define SOLARIS_IDLE_TIME_MAX_S 864000 /* 10 days */
23 #define SOLARIS_ABORT_THRESHOLD_MIN_S 1
24 #define SOLARIS_ABORT_THRESHOLD_MAX_S 480 /* 8 minutes */
25
26 /* Per-platform definitions of TCP socket options. */
27 #if defined(__linux__)
28
29 #define COMPAT_TCP_LEVEL SOL_TCP
30 #define COMPAT_TCP_ABORT_THRESHOLD 0 /* Does not exist on linux. */
31 #define COMPAT_TCP_KEEPIDLE TCP_KEEPIDLE
32 #define COMPAT_TCP_KEEPINTVL TCP_KEEPINTVL
33 #define COMPAT_TCP_KEEPCNT TCP_KEEPCNT
34
35 #elif defined(__sun__) /* ! defined (__linux__) */
36
37 #define COMPAT_TCP_LEVEL IPPROTO_TCP
38
39 #ifdef TCP_KEEPALIVE_THRESHOLD
40 #define COMPAT_TCP_KEEPIDLE TCP_KEEPALIVE_THRESHOLD
41 #else /* ! defined (TCP_KEEPALIVE_THRESHOLD) */
42 #define COMPAT_TCP_KEEPIDLE 0
43 #endif /* TCP_KEEPALIVE_THRESHOLD */
44
45 #ifdef TCP_KEEPALIVE_ABORT_THRESHOLD
46 #define COMPAT_TCP_ABORT_THRESHOLD TCP_KEEPALIVE_ABORT_THRESHOLD
47 #else /* ! defined (TCP_KEEPALIVE_ABORT_THRESHOLD) */
48 #define COMPAT_TCP_ABORT_THRESHOLD 0
49 #endif /* TCP_KEEPALIVE_ABORT_THRESHOLD */
50
51 #define COMPAT_TCP_KEEPINTVL 0 /* Does not exist on Solaris. */
52 #define COMPAT_TCP_KEEPCNT 0 /* Does not exist on Solaris. */
53
54 #else /* ! defined (__linux__) && ! defined (__sun__) */
55
56 #define COMPAT_TCP_LEVEL 0
57 #define COMPAT_TCP_ABORT_THRESHOLD 0
58 #define COMPAT_TCP_KEEPIDLE 0
59 #define COMPAT_TCP_KEEPINTVL 0
60 #define COMPAT_TCP_KEEPCNT 0
61
62 #endif /* ! defined (__linux__) && ! defined (__sun__) */
63
64 namespace {
65 struct tcp_keep_alive_support {
66 /* TCP keep-alive is supported by this platform. */
67 bool supported;
68 /* Overriding idle-time per socket is supported by this platform. */
69 bool idle_time_supported;
70 /*
71 * Overriding probe interval per socket is supported by this
72 * platform.
73 */
74 bool probe_interval_supported;
75 /*
76 * Configuring max probe count per socket is supported by this
77 * platform.
78 */
79 bool max_probe_count_supported;
80 /* Overriding on a per-socket basis is supported by this platform. */
81 bool abort_threshold_supported;
82 };
83
84 struct tcp_keep_alive_config {
85 /* Maps to the LTTNG_RELAYD_TCP_KEEP_ALIVE_ENV environment variable. */
86 bool enabled;
87 /*
88 * Maps to the LTTNG_RELAYD_TCP_KEEP_ALIVE_IDLE_TIME_ENV environment
89 * variable.
90 */
91 int idle_time;
92 /*
93 * Maps to the LTTNG_RELAYD_TCP_KEEP_ALIVE_PROBE_INTERVAL_ENV
94 * environment variable.
95 */
96 int probe_interval;
97 /*
98 * Maps to the LTTNG_RELAYD_TCP_KEEP_ALIVE_MAX_PROBE_COUNT_ENV
99 * environment variable.
100 */
101 int max_probe_count;
102 /*
103 * Maps to the LTTNG_RELAYD_TCP_KEEP_ALIVE_ABORT_THRESHOLD_ENV
104 * environment variable.
105 */
106 int abort_threshold;
107 };
108
109 struct tcp_keep_alive_config the_config = { .enabled = false,
110 .idle_time = -1,
111 .probe_interval = -1,
112 .max_probe_count = -1,
113 .abort_threshold = -1 };
114
115 struct tcp_keep_alive_support the_support = { .supported = false,
116 .idle_time_supported = false,
117 .probe_interval_supported = false,
118 .max_probe_count_supported = false,
119 .abort_threshold_supported = false };
120 } /* namespace */
121
122 /*
123 * Common parser for string to positive int conversion where the value must be
124 * in range [-1, INT_MAX].
125 *
126 * Returns -2 on invalid value.
127 */
128 static int get_env_int(const char *env_var, const char *value)
129 {
130 int ret;
131 long tmp;
132 char *endptr = NULL;
133
134 errno = 0;
135 tmp = strtol(value, &endptr, 0);
136 if (errno != 0) {
137 ERR("%s cannot be parsed.", env_var);
138 PERROR("errno for previous parsing failure");
139 ret = -2;
140 goto end;
141 }
142
143 if (endptr == value || *endptr != '\0') {
144 ERR("%s is not a valid number", env_var);
145 ret = -1;
146 goto end;
147 }
148
149 if (tmp < -1) {
150 ERR("%s must be greater or equal to -1", env_var);
151 ret = -2;
152 goto end;
153 }
154 if (tmp > INT_MAX) {
155 ERR("%s is too big. Maximum value is %d", env_var, INT_MAX);
156 ret = -2;
157 goto end;
158 }
159
160 ret = (int) tmp;
161 end:
162 return ret;
163 }
164
165 /*
166 * Per-platform implementation of tcp_keep_alive_idle_time_modifier.
167 * Returns -2 on invalid value.
168 */
169 #ifdef __sun__
170
171 static int convert_idle_time(int value)
172 {
173 int ret;
174 unsigned int tmp_ms;
175
176 if (value == -1 || value == 0) {
177 /* Use system defaults */
178 ret = value;
179 goto end;
180 }
181
182 if (value < 0) {
183 ERR("Invalid tcp keep-alive idle time (%i)", value);
184 ret = -2;
185 goto end;
186 }
187
188 /*
189 * Additional constraints for Solaris 11.
190 * Minimum 10s, maximum 10 days. Defined by
191 * https://docs.oracle.com/cd/E23824_01/html/821-1475/tcp-7p.html#REFMAN7tcp-7p
192 */
193 if ((value < SOLARIS_IDLE_TIME_MIN_S || value > SOLARIS_IDLE_TIME_MAX_S)) {
194 ERR("%s must be comprised between %d and %d inclusively on Solaris",
195 DEFAULT_LTTNG_RELAYD_TCP_KEEP_ALIVE_IDLE_TIME_ENV,
196 SOLARIS_IDLE_TIME_MIN_S,
197 SOLARIS_IDLE_TIME_MAX_S);
198 ret = -2;
199 goto end;
200 }
201
202 /* On Solaris idle time is given in milliseconds. */
203 tmp_ms = ((unsigned int) value) * MSEC_PER_SEC;
204 if ((value != 0 && (tmp_ms / ((unsigned int) value)) != MSEC_PER_SEC) || tmp_ms > INT_MAX) {
205 /* Overflow. */
206 const int max_value = INT_MAX / MSEC_PER_SEC;
207
208 ERR("%s is too big: maximum supported value is %d",
209 DEFAULT_LTTNG_RELAYD_TCP_KEEP_ALIVE_IDLE_TIME_ENV,
210 max_value);
211 ret = -2;
212 goto end;
213 }
214
215 /* tmp_ms is >= 0 and <= INT_MAX. Cast is safe. */
216 ret = (int) tmp_ms;
217 end:
218 return ret;
219 }
220
221 #else /* ! defined(__sun__) */
222
223 static int convert_idle_time(int value)
224 {
225 return value;
226 }
227
228 #endif /* ! defined(__sun__) */
229
230 /* Per-platform support of tcp_keep_alive functionality. */
231 #if defined(__linux__)
232
233 static void tcp_keep_alive_init_support(struct tcp_keep_alive_support *support)
234 {
235 support->supported = true;
236 support->idle_time_supported = true;
237 support->probe_interval_supported = true;
238 support->max_probe_count_supported = true;
239 /* Solaris specific */
240 support->abort_threshold_supported = false;
241 }
242
243 #elif defined(__sun__) /* ! defined (__linux__) */
244
245 static void tcp_keep_alive_init_support(struct tcp_keep_alive_support *support)
246 {
247 support->supported = true;
248 #ifdef TCP_KEEPALIVE_THRESHOLD
249 support->idle_time_supported = true;
250 #else
251 support->idle_time_supported = false;
252 ;
253 #endif /* TCP_KEEPALIVE_THRESHOLD */
254
255 /*
256 * Solaris does not support either tcp_keepalive_probes or
257 * tcp_keepalive_intvl.
258 * Inferring a value for TCP_KEEP_ALIVE_ABORT_THRESHOLD using
259 * (tcp_keepalive_probes * tcp_keepalive_intvl) could yield a good
260 * alternative, but Solaris does not detail the algorithm used (such as
261 * constant time retry like Linux).
262 *
263 * Ignore those settings on Solaris 11. We prefer exposing an
264 * environment variable only used on Solaris for the abort threshold.
265 */
266 support->probe_interval_supported = false;
267 support->max_probe_count_supported = false;
268 #ifdef TCP_KEEPALIVE_ABORT_THRESHOLD
269 support->abort_threshold_supported = true;
270 #else
271 support->abort_threshold_supported = false;
272 #endif /* TCP_KEEPALIVE_THRESHOLD */
273 }
274
275 #else /* ! defined(__sun__) && ! defined(__linux__) */
276
277 /* Assume nothing is supported on other platforms. */
278 static void tcp_keep_alive_init_support(struct tcp_keep_alive_support *support)
279 {
280 support->supported = false;
281 support->idle_time_supported = false;
282 support->probe_interval_supported = false;
283 support->max_probe_count_supported = false;
284 support->abort_threshold_supported = false;
285 }
286
287 #endif /* ! defined(__sun__) && ! defined(__linux__) */
288
289 #ifdef __sun__
290
291 /*
292 * Solaris specific modifier for abort threshold.
293 * Return -2 on error.
294 */
295 static int convert_abort_threshold(int value)
296 {
297 int ret;
298 unsigned int tmp_ms;
299
300 if (value == -1) {
301 /* Use system defaults */
302 ret = value;
303 goto end;
304 }
305
306 if (value < 0) {
307 ERR("Invalid tcp keep-alive abort threshold (%i)", value);
308 ret = -2;
309 goto end;
310 }
311
312 /*
313 * Additional constraints for Solaris 11.
314 *
315 * Between 0 and 8 minutes.
316 * https://docs.oracle.com/cd/E19120-01/open.solaris/819-2724/fsvdh/index.html
317 *
318 * Restrict from 1 seconds to 8 minutes sice the 0 value goes against
319 * the purpose of dead peers detection by never timing out when probing.
320 * It does NOT mean that the connection times out immediately.
321 */
322 if ((value < SOLARIS_ABORT_THRESHOLD_MIN_S || value > SOLARIS_ABORT_THRESHOLD_MAX_S)) {
323 ERR("%s must be comprised between %d and %d inclusively on Solaris",
324 DEFAULT_LTTNG_RELAYD_TCP_KEEP_ALIVE_ABORT_THRESHOLD_ENV,
325 SOLARIS_ABORT_THRESHOLD_MIN_S,
326 SOLARIS_ABORT_THRESHOLD_MAX_S);
327 ret = -2;
328 goto end;
329 }
330
331 /* Abort threshold is given in milliseconds. */
332 tmp_ms = ((unsigned int) value) * MSEC_PER_SEC;
333 if ((value != 0 && (tmp_ms / ((unsigned int) value)) != MSEC_PER_SEC) || tmp_ms > INT_MAX) {
334 /* Overflow */
335 const int max_value = INT_MAX / MSEC_PER_SEC;
336
337 ERR("%s is too big: maximum supported value is %d",
338 DEFAULT_LTTNG_RELAYD_TCP_KEEP_ALIVE_ABORT_THRESHOLD_ENV,
339 max_value);
340 ret = -2;
341 goto end;
342 }
343
344 /* tmp_ms is >= 0 and <= INT_MAX. Cast is safe. */
345 ret = (int) tmp_ms;
346 end:
347 return ret;
348 }
349
350 #else
351
352 static int convert_abort_threshold(int value)
353 {
354 return value;
355 }
356
357 #endif /* defined (__sun__) */
358
359 /*
360 * Retrieve settings from environment variables and warn for settings not
361 * supported by the platform.
362 */
363 static int tcp_keep_alive_init_config(struct tcp_keep_alive_support *support,
364 struct tcp_keep_alive_config *config)
365 {
366 int ret;
367 const char *value;
368
369 value = lttng_secure_getenv(DEFAULT_LTTNG_RELAYD_TCP_KEEP_ALIVE_ENV);
370 if (!support->supported) {
371 if (value) {
372 WARN("Using per-socket TCP keep-alive mechanism is not supported by this platform. Ignoring the %s environment variable.",
373 DEFAULT_LTTNG_RELAYD_TCP_KEEP_ALIVE_ENV);
374 }
375 config->enabled = false;
376 } else if (value) {
377 ret = config_parse_value(value);
378 if (ret < 0 || ret > 1) {
379 ERR("Invalid value for %s", DEFAULT_LTTNG_RELAYD_TCP_KEEP_ALIVE_ENV);
380 ret = 1;
381 goto error;
382 }
383 config->enabled = ret;
384 }
385 DBG("TCP keep-alive mechanism %s", config->enabled ? "enabled" : "disabled");
386
387 /* Get value for tcp_keepalive_time in seconds. */
388 value = lttng_secure_getenv(DEFAULT_LTTNG_RELAYD_TCP_KEEP_ALIVE_IDLE_TIME_ENV);
389 if (!support->idle_time_supported && value) {
390 WARN("Overriding the TCP keep-alive idle time threshold per-socket is not supported by this platform. Ignoring the %s environment variable.",
391 DEFAULT_LTTNG_RELAYD_TCP_KEEP_ALIVE_IDLE_TIME_ENV);
392 config->idle_time = -1;
393 } else if (value) {
394 int idle_time_platform;
395 int idle_time_seconds;
396
397 idle_time_seconds =
398 get_env_int(DEFAULT_LTTNG_RELAYD_TCP_KEEP_ALIVE_IDLE_TIME_ENV, value);
399 if (idle_time_seconds < -1) {
400 ret = 1;
401 goto error;
402 }
403
404 idle_time_platform = convert_idle_time(idle_time_seconds);
405 if (idle_time_platform < -1) {
406 ret = 1;
407 goto error;
408 }
409
410 config->idle_time = idle_time_platform;
411 DBG("Overriding %s to %d",
412 DEFAULT_LTTNG_RELAYD_TCP_KEEP_ALIVE_IDLE_TIME_ENV,
413 idle_time_seconds);
414 }
415
416 /* Get value for tcp_keepalive_intvl in seconds. */
417 value = lttng_secure_getenv(DEFAULT_LTTNG_RELAYD_TCP_KEEP_ALIVE_PROBE_INTERVAL_ENV);
418 if (!support->probe_interval_supported && value) {
419 WARN("Overriding the TCP keep-alive probe interval time per-socket is not supported by this platform. Ignoring the %s environment variable.",
420 DEFAULT_LTTNG_RELAYD_TCP_KEEP_ALIVE_PROBE_INTERVAL_ENV);
421 config->probe_interval = -1;
422 } else if (value) {
423 int probe_interval;
424
425 probe_interval =
426 get_env_int(DEFAULT_LTTNG_RELAYD_TCP_KEEP_ALIVE_PROBE_INTERVAL_ENV, value);
427 if (probe_interval < -1) {
428 ret = 1;
429 goto error;
430 }
431
432 config->probe_interval = probe_interval;
433 DBG("Overriding %s to %d",
434 DEFAULT_LTTNG_RELAYD_TCP_KEEP_ALIVE_PROBE_INTERVAL_ENV,
435 config->probe_interval);
436 }
437
438 /* Get value for tcp_keepalive_probes. */
439 value = lttng_secure_getenv(DEFAULT_LTTNG_RELAYD_TCP_KEEP_ALIVE_MAX_PROBE_COUNT_ENV);
440 if (!support->max_probe_count_supported && value) {
441 WARN("Overriding the TCP keep-alive maximum probe count per-socket is not supported by this platform. Ignoring the %s environment variable.",
442 DEFAULT_LTTNG_RELAYD_TCP_KEEP_ALIVE_MAX_PROBE_COUNT_ENV);
443 config->max_probe_count = -1;
444 } else if (value) {
445 int max_probe_count;
446
447 max_probe_count =
448 get_env_int(DEFAULT_LTTNG_RELAYD_TCP_KEEP_ALIVE_MAX_PROBE_COUNT_ENV, value);
449 if (max_probe_count < -1) {
450 ret = 1;
451 goto error;
452 }
453
454 config->max_probe_count = max_probe_count;
455 DBG("Overriding %s to %d",
456 DEFAULT_LTTNG_RELAYD_TCP_KEEP_ALIVE_MAX_PROBE_COUNT_ENV,
457 config->max_probe_count);
458 }
459
460 /* Get value for tcp_keepalive_abort_interval. */
461 value = lttng_secure_getenv(DEFAULT_LTTNG_RELAYD_TCP_KEEP_ALIVE_ABORT_THRESHOLD_ENV);
462 if (!support->abort_threshold_supported && value) {
463 WARN("Overriding the TCP keep-alive abort threshold per-socket is not supported by this platform. Ignoring the %s environment variable.",
464 DEFAULT_LTTNG_RELAYD_TCP_KEEP_ALIVE_ABORT_THRESHOLD_ENV);
465 config->abort_threshold = -1;
466 } else if (value) {
467 int abort_threshold_platform;
468 int abort_threshold_seconds;
469
470 abort_threshold_seconds =
471 get_env_int(DEFAULT_LTTNG_RELAYD_TCP_KEEP_ALIVE_MAX_PROBE_COUNT_ENV, value);
472 if (abort_threshold_seconds < -1) {
473 ret = 1;
474 goto error;
475 }
476
477 abort_threshold_platform = convert_abort_threshold(abort_threshold_seconds);
478 if (abort_threshold_platform < -1) {
479 ret = 1;
480 goto error;
481 }
482
483 config->abort_threshold = abort_threshold_platform;
484 DBG("Overriding %s to %d",
485 DEFAULT_LTTNG_RELAYD_TCP_KEEP_ALIVE_ABORT_THRESHOLD_ENV,
486 config->abort_threshold);
487 }
488
489 ret = 0;
490
491 error:
492 return ret;
493 }
494
495 /* Initialize the TCP keep-alive configuration. */
496 __attribute__((constructor)) static void tcp_keep_alive_init(void)
497 {
498 tcp_keep_alive_init_support(&the_support);
499 (void) tcp_keep_alive_init_config(&the_support, &the_config);
500 }
501
502 /*
503 * Set the socket options regarding TCP keep-alive.
504 */
505 int socket_apply_keep_alive_config(int socket_fd)
506 {
507 int ret;
508 int val = 1;
509
510 /* TCP keep-alive */
511 if (!the_support.supported || !the_config.enabled) {
512 ret = 0;
513 goto end;
514 }
515
516 DBG("TCP keep-alive enabled for socket %d", socket_fd);
517 ret = setsockopt(socket_fd, SOL_SOCKET, SO_KEEPALIVE, &val, sizeof(val));
518 if (ret < 0) {
519 PERROR("setsockopt so_keepalive");
520 goto end;
521 }
522
523 /* TCP keep-alive idle time */
524 if (the_support.idle_time_supported && the_config.idle_time > 0) {
525 DBG("TCP keep-alive keep idle: %d enabled for socket %d",
526 the_config.idle_time,
527 socket_fd);
528 ret = setsockopt(socket_fd,
529 COMPAT_TCP_LEVEL,
530 COMPAT_TCP_KEEPIDLE,
531 &the_config.idle_time,
532 sizeof(the_config.idle_time));
533 if (ret < 0) {
534 PERROR("setsockopt TCP_KEEPIDLE");
535 goto end;
536 }
537 }
538 /* TCP keep-alive probe interval */
539 if (the_support.probe_interval_supported && the_config.probe_interval > 0) {
540 DBG("TCP keep-alive probe_interval: %d enabled for socket %d",
541 the_config.probe_interval,
542 socket_fd);
543 ret = setsockopt(socket_fd,
544 COMPAT_TCP_LEVEL,
545 COMPAT_TCP_KEEPINTVL,
546 &the_config.probe_interval,
547 sizeof(the_config.probe_interval));
548 if (ret < 0) {
549 PERROR("setsockopt TCP_KEEPINTVL");
550 goto end;
551 }
552 }
553
554 /* TCP keep-alive max probe count */
555 if (the_support.max_probe_count_supported && the_config.max_probe_count > 0) {
556 DBG("TCP keep-alive max_probe: %d enabled for socket %d",
557 the_config.max_probe_count,
558 socket_fd);
559 ret = setsockopt(socket_fd,
560 COMPAT_TCP_LEVEL,
561 COMPAT_TCP_KEEPCNT,
562 &the_config.max_probe_count,
563 sizeof(the_config.max_probe_count));
564 if (ret < 0) {
565 PERROR("setsockopt TCP_KEEPCNT");
566 goto end;
567 }
568 }
569
570 /* TCP keep-alive abort threshold */
571 if (the_support.abort_threshold_supported && the_config.abort_threshold > 0) {
572 DBG("TCP keep-alive abort threshold: %d enabled for socket %d",
573 the_config.abort_threshold,
574 socket_fd);
575 ret = setsockopt(socket_fd,
576 COMPAT_TCP_LEVEL,
577 COMPAT_TCP_ABORT_THRESHOLD,
578 &the_config.abort_threshold,
579 sizeof(the_config.max_probe_count));
580 if (ret < 0) {
581 PERROR("setsockopt TCP_KEEPALIVE_ABORT_THRESHOLD");
582 goto end;
583 }
584 }
585 end:
586 return ret;
587 }
This page took 0.04147 seconds and 5 git commands to generate.