Move to kernel style SPDX license identifiers
[lttng-tools.git] / src / bin / lttng-relayd / tcp_keep_alive.c
1 /*
2 * Copyright (C) 2017 Jonathan Rajotte <jonathan.rajotte-julien@efficios.com>
3 *
4 * SPDX-License-Identifier: GPL-2.0-only
5 *
6 */
7
8 #include <sys/types.h>
9 #include <netinet/tcp.h>
10 #include <stdbool.h>
11 #include <sys/socket.h>
12 #include <limits.h>
13
14 #include <common/compat/getenv.h>
15 #include <common/time.h>
16 #include <common/defaults.h>
17 #include <common/config/session-config.h>
18
19 #include "tcp_keep_alive.h"
20
21 #define SOLARIS_IDLE_TIME_MIN_S 10
22 #define SOLARIS_IDLE_TIME_MAX_S 864000 /* 10 days */
23 #define SOLARIS_ABORT_THRESHOLD_MIN_S 1
24 #define SOLARIS_ABORT_THRESHOLD_MAX_S 480 /* 8 minutes */
25
26 /* Per-platform definitions of TCP socket options. */
27 #if defined (__linux__)
28
29 #define COMPAT_TCP_LEVEL SOL_TCP
30 #define COMPAT_TCP_ABORT_THRESHOLD 0 /* Does not exist on linux. */
31 #define COMPAT_TCP_KEEPIDLE TCP_KEEPIDLE
32 #define COMPAT_TCP_KEEPINTVL TCP_KEEPINTVL
33 #define COMPAT_TCP_KEEPCNT TCP_KEEPCNT
34
35 #elif defined (__sun__) /* ! defined (__linux__) */
36
37 #define COMPAT_TCP_LEVEL IPPROTO_TCP
38
39 #ifdef TCP_KEEPALIVE_THRESHOLD
40 #define COMPAT_TCP_KEEPIDLE TCP_KEEPALIVE_THRESHOLD
41 #else /* ! defined (TCP_KEEPALIVE_THRESHOLD) */
42 #define COMPAT_TCP_KEEPIDLE 0
43 #endif /* TCP_KEEPALIVE_THRESHOLD */
44
45 #ifdef TCP_KEEPALIVE_ABORT_THRESHOLD
46 #define COMPAT_TCP_ABORT_THRESHOLD TCP_KEEPALIVE_ABORT_THRESHOLD
47 #else /* ! defined (TCP_KEEPALIVE_ABORT_THRESHOLD) */
48 #define COMPAT_TCP_ABORT_THRESHOLD 0
49 #endif /* TCP_KEEPALIVE_ABORT_THRESHOLD */
50
51 #define COMPAT_TCP_KEEPINTVL 0 /* Does not exist on Solaris. */
52 #define COMPAT_TCP_KEEPCNT 0 /* Does not exist on Solaris. */
53
54 #else /* ! defined (__linux__) && ! defined (__sun__) */
55
56 #define COMPAT_TCP_LEVEL 0
57 #define COMPAT_TCP_ABORT_THRESHOLD 0
58 #define COMPAT_TCP_KEEPIDLE 0
59 #define COMPAT_TCP_KEEPINTVL 0
60 #define COMPAT_TCP_KEEPCNT 0
61
62 #endif /* ! defined (__linux__) && ! defined (__sun__) */
63
64 struct tcp_keep_alive_support {
65 /* TCP keep-alive is supported by this platform. */
66 bool supported;
67 /* Overriding idle-time per socket is supported by this platform. */
68 bool idle_time_supported;
69 /*
70 * Overriding probe interval per socket is supported by this
71 * platform.
72 */
73 bool probe_interval_supported;
74 /*
75 * Configuring max probe count per socket is supported by this
76 * platform.
77 */
78 bool max_probe_count_supported;
79 /* Overriding on a per-socket basis is supported by this platform. */
80 bool abort_threshold_supported;
81 };
82
83 struct tcp_keep_alive_config {
84 /* Maps to the LTTNG_RELAYD_TCP_KEEP_ALIVE_ENV environment variable. */
85 bool enabled;
86 /*
87 * Maps to the LTTNG_RELAYD_TCP_KEEP_ALIVE_IDLE_TIME_ENV environment
88 * variable.
89 */
90 int idle_time;
91 /*
92 * Maps to the LTTNG_RELAYD_TCP_KEEP_ALIVE_PROBE_INTERVAL_ENV
93 * environment variable.
94 */
95 int probe_interval;
96 /*
97 * Maps to the LTTNG_RELAYD_TCP_KEEP_ALIVE_MAX_PROBE_COUNT_ENV
98 * environment variable.
99 */
100 int max_probe_count;
101 /*
102 * Maps to the LTTNG_RELAYD_TCP_KEEP_ALIVE_ABORT_THRESHOLD_ENV
103 * environment variable.
104 */
105 int abort_threshold;
106 };
107
108 static struct tcp_keep_alive_config config = {
109 .enabled = false,
110 .idle_time = -1,
111 .probe_interval = -1,
112 .max_probe_count = -1,
113 .abort_threshold = -1
114 };
115
116 static struct tcp_keep_alive_support support = {
117 .supported = false,
118 .idle_time_supported = false,
119 .probe_interval_supported = false,
120 .max_probe_count_supported = false,
121 .abort_threshold_supported = false
122 };
123
124 /*
125 * Common parser for string to positive int conversion where the value must be
126 * in range [-1, INT_MAX].
127 *
128 * Returns -2 on invalid value.
129 */
130 static
131 int get_env_int(const char *env_var,
132 const char *value)
133 {
134 int ret;
135 long tmp;
136 char *endptr = NULL;
137
138 errno = 0;
139 tmp = strtol(value, &endptr, 0);
140 if (errno != 0) {
141 ERR("%s cannot be parsed.", env_var);
142 PERROR("errno for previous parsing failure");
143 ret = -2;
144 goto end;
145 }
146
147 if (endptr == value || *endptr != '\0') {
148 ERR("%s is not a valid number", env_var);
149 ret = -1;
150 goto end;
151 }
152
153 if (tmp < -1) {
154 ERR("%s must be greater or equal to -1", env_var);
155 ret = -2;
156 goto end;
157 }
158 if (tmp > INT_MAX){
159 ERR("%s is too big. Maximum value is %d", env_var, INT_MAX);
160 ret = -2;
161 goto end;
162 }
163
164 ret = (int) tmp;
165 end:
166 return ret;
167 }
168
169 /*
170 * Per-platform implementation of tcp_keep_alive_idle_time_modifier.
171 * Returns -2 on invalid value.
172 */
173 #ifdef __sun__
174
175 static
176 int convert_idle_time(int value)
177 {
178 int ret;
179 unsigned int tmp_ms;
180
181 if (value == -1 || value == 0) {
182 /* Use system defaults */
183 ret = value;
184 goto end;
185 }
186
187 if (value < 0) {
188 ERR("Invalid tcp keep-alive idle time (%i)", value);
189 ret = -2;
190 goto end;
191 }
192
193 /*
194 * Additional constraints for Solaris 11.
195 * Minimum 10s, maximum 10 days. Defined by
196 * https://docs.oracle.com/cd/E23824_01/html/821-1475/tcp-7p.html#REFMAN7tcp-7p
197 */
198 if ((value < SOLARIS_IDLE_TIME_MIN_S ||
199 value > SOLARIS_IDLE_TIME_MAX_S)) {
200 ERR("%s must be comprised between %d and %d inclusively on Solaris",
201 DEFAULT_LTTNG_RELAYD_TCP_KEEP_ALIVE_IDLE_TIME_ENV,
202 SOLARIS_IDLE_TIME_MIN_S,
203 SOLARIS_IDLE_TIME_MAX_S);
204 ret = -2;
205 goto end;
206 }
207
208 /* On Solaris idle time is given in milliseconds. */
209 tmp_ms = ((unsigned int) value) * MSEC_PER_SEC;
210 if ((value != 0 && (tmp_ms / ((unsigned int) value)) != MSEC_PER_SEC)
211 || tmp_ms > INT_MAX) {
212 /* Overflow. */
213 const int max_value = INT_MAX / MSEC_PER_SEC;
214
215 ERR("%s is too big: maximum supported value is %d",
216 DEFAULT_LTTNG_RELAYD_TCP_KEEP_ALIVE_IDLE_TIME_ENV,
217 max_value);
218 ret = -2;
219 goto end;
220 }
221
222 /* tmp_ms is >= 0 and <= INT_MAX. Cast is safe. */
223 ret = (int) tmp_ms;
224 end:
225 return ret;
226 }
227
228 #else /* ! defined(__sun__) */
229
230 static
231 int convert_idle_time(int value)
232 {
233 return value;
234 }
235
236 #endif /* ! defined(__sun__) */
237
238 /* Per-platform support of tcp_keep_alive functionality. */
239 #if defined (__linux__)
240
241 static
242 void tcp_keep_alive_init_support(struct tcp_keep_alive_support *support)
243 {
244 support->supported = true;
245 support->idle_time_supported = true;
246 support->probe_interval_supported = true;
247 support->max_probe_count_supported = true;
248 /* Solaris specific */
249 support->abort_threshold_supported = false;
250 }
251
252 #elif defined(__sun__) /* ! defined (__linux__) */
253
254 static
255 void tcp_keep_alive_init_support(struct tcp_keep_alive_support *support)
256 {
257 support->supported = true;
258 #ifdef TCP_KEEPALIVE_THRESHOLD
259 support->idle_time_supported = true;
260 #else
261 support->idle_time_supported = false;;
262 #endif /* TCP_KEEPALIVE_THRESHOLD */
263
264 /*
265 * Solaris does not support either tcp_keepalive_probes or
266 * tcp_keepalive_intvl.
267 * Inferring a value for TCP_KEEP_ALIVE_ABORT_THRESHOLD using
268 * (tcp_keepalive_probes * tcp_keepalive_intvl) could yield a good
269 * alternative, but Solaris does not detail the algorithm used (such as
270 * constant time retry like Linux).
271 *
272 * Ignore those settings on Solaris 11. We prefer exposing an
273 * environment variable only used on Solaris for the abort threshold.
274 */
275 support->probe_interval_supported = false;
276 support->max_probe_count_supported = false;
277 #ifdef TCP_KEEPALIVE_ABORT_THRESHOLD
278 support->abort_threshold_supported = true;
279 #else
280 support->abort_threshold_supported = false;
281 #endif /* TCP_KEEPALIVE_THRESHOLD */
282 }
283
284 #else /* ! defined(__sun__) && ! defined(__linux__) */
285
286 /* Assume nothing is supported on other platforms. */
287 static
288 void tcp_keep_alive_init_support(struct tcp_keep_alive_support *support)
289 {
290 support->supported = false;
291 support->idle_time_supported = false;
292 support->probe_interval_supported = false;
293 support->max_probe_count_supported = false;
294 support->abort_threshold_supported = false;
295 }
296
297 #endif /* ! defined(__sun__) && ! defined(__linux__) */
298
299 #ifdef __sun__
300
301 /*
302 * Solaris specific modifier for abort threshold.
303 * Return -2 on error.
304 */
305 static
306 int convert_abort_threshold(int value)
307 {
308 int ret;
309 unsigned int tmp_ms;
310
311 if (value == -1) {
312 /* Use system defaults */
313 ret = value;
314 goto end;
315 }
316
317 if (value < 0) {
318 ERR("Invalid tcp keep-alive abort threshold (%i)", value);
319 ret = -2;
320 goto end;
321 }
322
323 /*
324 * Additional constraints for Solaris 11.
325 *
326 * Between 0 and 8 minutes.
327 * https://docs.oracle.com/cd/E19120-01/open.solaris/819-2724/fsvdh/index.html
328 *
329 * Restrict from 1 seconds to 8 minutes sice the 0 value goes against
330 * the purpose of dead peers detection by never timing out when probing.
331 * It does NOT mean that the connection times out immediately.
332 */
333 if ((value < SOLARIS_ABORT_THRESHOLD_MIN_S || value > SOLARIS_ABORT_THRESHOLD_MAX_S)) {
334 ERR("%s must be comprised between %d and %d inclusively on Solaris",
335 DEFAULT_LTTNG_RELAYD_TCP_KEEP_ALIVE_ABORT_THRESHOLD_ENV,
336 SOLARIS_ABORT_THRESHOLD_MIN_S,
337 SOLARIS_ABORT_THRESHOLD_MAX_S);
338 ret = -2;
339 goto end;
340 }
341
342 /* Abort threshold is given in milliseconds. */
343 tmp_ms = ((unsigned int) value) * MSEC_PER_SEC;
344 if ((value != 0 && (tmp_ms / ((unsigned int) value)) != MSEC_PER_SEC)
345 || tmp_ms > INT_MAX) {
346 /* Overflow */
347 const int max_value = INT_MAX / MSEC_PER_SEC;
348
349 ERR("%s is too big: maximum supported value is %d",
350 DEFAULT_LTTNG_RELAYD_TCP_KEEP_ALIVE_ABORT_THRESHOLD_ENV,
351 max_value);
352 ret = -2;
353 goto end;
354 }
355
356 /* tmp_ms is >= 0 and <= INT_MAX. Cast is safe. */
357 ret = (int) tmp_ms;
358 end:
359 return ret;
360 }
361
362 #else
363
364 static
365 int convert_abort_threshold(int value)
366 {
367 return value;
368 }
369
370 #endif /* defined (__sun__) */
371
372 /*
373 * Retrieve settings from environment variables and warn for settings not
374 * supported by the platform.
375 */
376 static
377 int tcp_keep_alive_init_config(struct tcp_keep_alive_support *support,
378 struct tcp_keep_alive_config *config)
379 {
380 int ret;
381 const char *value;
382
383 value = lttng_secure_getenv(DEFAULT_LTTNG_RELAYD_TCP_KEEP_ALIVE_ENV);
384 if (!support->supported) {
385 if (value) {
386 WARN("Using per-socket TCP keep-alive mechanism is not supported by this platform. Ignoring the %s environment variable.",
387 DEFAULT_LTTNG_RELAYD_TCP_KEEP_ALIVE_ENV);
388 }
389 config->enabled = false;
390 } else if (value) {
391 ret = config_parse_value(value);
392 if (ret < 0 || ret > 1) {
393 ERR("Invalid value for %s", DEFAULT_LTTNG_RELAYD_TCP_KEEP_ALIVE_ENV);
394 ret = 1;
395 goto error;
396 }
397 config->enabled = ret;
398 }
399 DBG("TCP keep-alive mechanism %s", config->enabled ? "enabled": "disabled");
400
401 /* Get value for tcp_keepalive_time in seconds. */
402 value = lttng_secure_getenv(DEFAULT_LTTNG_RELAYD_TCP_KEEP_ALIVE_IDLE_TIME_ENV);
403 if (!support->idle_time_supported && value) {
404 WARN("Overriding the TCP keep-alive idle time threshold per-socket is not supported by this platform. Ignoring the %s environment variable.",
405 DEFAULT_LTTNG_RELAYD_TCP_KEEP_ALIVE_IDLE_TIME_ENV);
406 config->idle_time = -1;
407 } else if (value) {
408 int idle_time_platform;
409 int idle_time_seconds;
410
411 idle_time_seconds = get_env_int(
412 DEFAULT_LTTNG_RELAYD_TCP_KEEP_ALIVE_IDLE_TIME_ENV,
413 value);
414 if (idle_time_seconds < -1) {
415 ret = 1;
416 goto error;
417 }
418
419 idle_time_platform = convert_idle_time(idle_time_seconds);
420 if (idle_time_platform < -1) {
421 ret = 1;
422 goto error;
423 }
424
425 config->idle_time = idle_time_platform;
426 DBG("Overriding %s to %d",
427 DEFAULT_LTTNG_RELAYD_TCP_KEEP_ALIVE_IDLE_TIME_ENV,
428 idle_time_seconds);
429 }
430
431 /* Get value for tcp_keepalive_intvl in seconds. */
432 value = lttng_secure_getenv(
433 DEFAULT_LTTNG_RELAYD_TCP_KEEP_ALIVE_PROBE_INTERVAL_ENV);
434 if (!support->probe_interval_supported && value) {
435 WARN("Overriding the TCP keep-alive probe interval time per-socket is not supported by this platform. Ignoring the %s environment variable.",
436 DEFAULT_LTTNG_RELAYD_TCP_KEEP_ALIVE_PROBE_INTERVAL_ENV);
437 config->probe_interval = -1;
438 } else if (value) {
439 int probe_interval;
440
441 probe_interval = get_env_int(DEFAULT_LTTNG_RELAYD_TCP_KEEP_ALIVE_PROBE_INTERVAL_ENV,
442 value);
443 if (probe_interval < -1) {
444 ret = 1;
445 goto error;
446 }
447
448 config->probe_interval = probe_interval;
449 DBG("Overriding %s to %d",
450 DEFAULT_LTTNG_RELAYD_TCP_KEEP_ALIVE_PROBE_INTERVAL_ENV,
451 config->probe_interval);
452 }
453
454 /* Get value for tcp_keepalive_probes. */
455 value = lttng_secure_getenv(DEFAULT_LTTNG_RELAYD_TCP_KEEP_ALIVE_MAX_PROBE_COUNT_ENV);
456 if (!support->max_probe_count_supported && value) {
457 WARN("Overriding the TCP keep-alive maximum probe count per-socket is not supported by this platform. Ignoring the %s environment variable.",
458 DEFAULT_LTTNG_RELAYD_TCP_KEEP_ALIVE_MAX_PROBE_COUNT_ENV);
459 config->max_probe_count = -1;
460 } else if (value) {
461 int max_probe_count;
462
463 max_probe_count = get_env_int(DEFAULT_LTTNG_RELAYD_TCP_KEEP_ALIVE_MAX_PROBE_COUNT_ENV,
464 value);
465 if (max_probe_count < -1) {
466 ret = 1;
467 goto error;
468 }
469
470 config->max_probe_count = max_probe_count;
471 DBG("Overriding %s to %d",
472 DEFAULT_LTTNG_RELAYD_TCP_KEEP_ALIVE_MAX_PROBE_COUNT_ENV,
473 config->max_probe_count);
474 }
475
476 /* Get value for tcp_keepalive_abort_interval. */
477 value = lttng_secure_getenv(
478 DEFAULT_LTTNG_RELAYD_TCP_KEEP_ALIVE_ABORT_THRESHOLD_ENV);
479 if (!support->abort_threshold_supported && value) {
480 WARN("Overriding the TCP keep-alive abort threshold per-socket is not supported by this platform. Ignoring the %s environment variable.",
481 DEFAULT_LTTNG_RELAYD_TCP_KEEP_ALIVE_ABORT_THRESHOLD_ENV);
482 config->abort_threshold = -1;
483 } else if (value) {
484 int abort_threshold_platform;
485 int abort_threshold_seconds;
486
487 abort_threshold_seconds = get_env_int(
488 DEFAULT_LTTNG_RELAYD_TCP_KEEP_ALIVE_MAX_PROBE_COUNT_ENV,
489 value);
490 if (abort_threshold_seconds < -1) {
491 ret = 1;
492 goto error;
493 }
494
495 abort_threshold_platform = convert_abort_threshold(
496 abort_threshold_seconds);
497 if (abort_threshold_platform < -1) {
498 ret = 1;
499 goto error;
500 }
501
502 config->abort_threshold = abort_threshold_platform;
503 DBG("Overriding %s to %d",
504 DEFAULT_LTTNG_RELAYD_TCP_KEEP_ALIVE_ABORT_THRESHOLD_ENV,
505 config->abort_threshold);
506 }
507
508 ret = 0;
509
510 error:
511 return ret;
512 }
513
514 /* Initialize the TCP keep-alive configuration. */
515 __attribute__((constructor)) static
516 int tcp_keep_alive_init(void)
517 {
518 tcp_keep_alive_init_support(&support);
519 return tcp_keep_alive_init_config(&support, &config);
520 }
521
522 /*
523 * Set the socket options regarding TCP keep-alive.
524 */
525 LTTNG_HIDDEN
526 int socket_apply_keep_alive_config(int socket_fd)
527 {
528 int ret;
529 int val = 1;
530
531 /* TCP keep-alive */
532 if (!support.supported || !config.enabled ) {
533 ret = 0;
534 goto end;
535 }
536
537 DBG("TCP keep-alive enabled for socket %d", socket_fd);
538 ret = setsockopt(socket_fd, SOL_SOCKET, SO_KEEPALIVE, &val,
539 sizeof(val));
540 if (ret < 0) {
541 PERROR("setsockopt so_keepalive");
542 goto end;
543 }
544
545 /* TCP keep-alive idle time */
546 if (support.idle_time_supported && config.idle_time > 0) {
547 DBG("TCP keep-alive keep idle: %d enabled for socket %d", config.idle_time, socket_fd);
548 ret = setsockopt(socket_fd, COMPAT_TCP_LEVEL, COMPAT_TCP_KEEPIDLE, &config.idle_time,
549 sizeof(config.idle_time));
550 if (ret < 0) {
551 PERROR("setsockopt TCP_KEEPIDLE");
552 goto end;
553 }
554 }
555 /* TCP keep-alive probe interval */
556 if (support.probe_interval_supported && config.probe_interval > 0) {
557 DBG("TCP keep-alive probe_interval: %d enabled for socket %d", config.probe_interval, socket_fd);
558 ret = setsockopt(socket_fd, COMPAT_TCP_LEVEL, COMPAT_TCP_KEEPINTVL, &config.probe_interval,
559 sizeof(config.probe_interval));
560 if (ret < 0) {
561 PERROR("setsockopt TCP_KEEPINTVL");
562 goto end;
563 }
564 }
565
566 /* TCP keep-alive max probe count */
567 if (support.max_probe_count_supported && config.max_probe_count > 0) {
568 DBG("TCP keep-alive max_probe: %d enabled for socket %d", config.max_probe_count, socket_fd);
569 ret = setsockopt(socket_fd, COMPAT_TCP_LEVEL, COMPAT_TCP_KEEPCNT, &config.max_probe_count,
570 sizeof(config.max_probe_count));
571 if (ret < 0) {
572 PERROR("setsockopt TCP_KEEPCNT");
573 goto end;
574 }
575 }
576
577 /* TCP keep-alive abort threshold */
578 if (support.abort_threshold_supported && config.abort_threshold > 0) {
579 DBG("TCP keep-alive abort threshold: %d enabled for socket %d", config.abort_threshold, socket_fd);
580 ret = setsockopt(socket_fd, COMPAT_TCP_LEVEL, COMPAT_TCP_ABORT_THRESHOLD, &config.abort_threshold,
581 sizeof(config.max_probe_count));
582 if (ret < 0) {
583 PERROR("setsockopt TCP_KEEPALIVE_ABORT_THRESHOLD");
584 goto end;
585 }
586 }
587 end:
588 return ret;
589 }
This page took 0.067336 seconds and 5 git commands to generate.