0cadbad74e6c4309eee2c2d1c0d54ed2c02ea1a1
[lttng-tools.git] / src / bin / lttng-relayd / tcp_keep_alive.c
1 /*
2 * Copyright (C) 2017 - Jonathan Rajotte <jonathan.rajotte-julien@efficios.com>
3 *
4 * This program is free software; you can redistribute it and/or modify
5 * it under the terms of the GNU General Public License, version 2 only,
6 * as published by the Free Software Foundation.
7 *
8 * This program is distributed in the hope that it will be useful, but WITHOUT
9 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
10 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
11 * more details.
12 *
13 * You should have received a copy of the GNU General Public License along
14 * with this program; if not, write to the Free Software Foundation, Inc.,
15 * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
16 */
17
18 #include <sys/types.h>
19 #include <netinet/tcp.h>
20 #include <stdbool.h>
21 #include <sys/socket.h>
22 #include <limits.h>
23
24 #include <common/compat/getenv.h>
25 #include <common/time.h>
26 #include <common/defaults.h>
27 #include <common/config/session-config.h>
28
29 #include "tcp_keep_alive.h"
30
31 #define SOLARIS_IDLE_TIME_MIN_S 10
32 #define SOLARIS_IDLE_TIME_MAX_S 864000 /* 10 days */
33 #define SOLARIS_ABORT_THRESHOLD_MIN_S 1
34 #define SOLARIS_ABORT_THRESHOLD_MAX_S 480 /* 8 minutes */
35
36 /* Per-platform definitions of TCP socket options. */
37 #if defined (__linux__)
38
39 #define COMPAT_TCP_LEVEL SOL_TCP
40 #define COMPAT_TCP_ABORT_THRESHOLD 0 /* Does not exist on linux. */
41 #define COMPAT_TCP_KEEPIDLE TCP_KEEPIDLE
42 #define COMPAT_TCP_KEEPINTVL TCP_KEEPINTVL
43 #define COMPAT_TCP_KEEPCNT TCP_KEEPCNT
44
45 #elif defined (__sun__) /* ! defined (__linux__) */
46
47 #define COMPAT_TCP_LEVEL IPPROTO_TCP
48
49 #ifdef TCP_KEEPALIVE_THRESHOLD
50 #define COMPAT_TCP_KEEPIDLE TCP_KEEPALIVE_THRESHOLD
51 #else /* ! defined (TCP_KEEPALIVE_THRESHOLD) */
52 #define COMPAT_TCP_KEEPIDLE 0
53 #endif /* TCP_KEEPALIVE_THRESHOLD */
54
55 #ifdef TCP_KEEPALIVE_ABORT_THRESHOLD
56 #define COMPAT_TCP_ABORT_THRESHOLD TCP_KEEPALIVE_ABORT_THRESHOLD
57 #else /* ! defined (TCP_KEEPALIVE_ABORT_THRESHOLD) */
58 #define COMPAT_TCP_ABORT_THRESHOLD 0
59 #endif /* TCP_KEEPALIVE_ABORT_THRESHOLD */
60
61 #define COMPAT_TCP_KEEPINTVL 0 /* Does not exist on Solaris. */
62 #define COMPAT_TCP_KEEPCNT 0 /* Does not exist on Solaris. */
63
64 #else /* ! defined (__linux__) && ! defined (__sun__) */
65
66 #define COMPAT_TCP_LEVEL 0
67 #define COMPAT_TCP_ABORT_THRESHOLD 0
68 #define COMPAT_TCP_KEEPIDLE 0
69 #define COMPAT_TCP_KEEPINTVL 0
70 #define COMPAT_TCP_KEEPCNT 0
71
72 #endif /* ! defined (__linux__) && ! defined (__sun__) */
73
74 struct tcp_keep_alive_support {
75 /* TCP keep-alive is supported by this platform. */
76 bool supported;
77 /* Overriding idle-time per socket is supported by this platform. */
78 bool idle_time_supported;
79 /*
80 * Overriding probe interval per socket is supported by this
81 * platform.
82 */
83 bool probe_interval_supported;
84 /*
85 * Configuring max probe count per socket is supported by this
86 * platform.
87 */
88 bool max_probe_count_supported;
89 /* Overriding on a per-socket basis is supported by this platform. */
90 bool abort_threshold_supported;
91 };
92
93 struct tcp_keep_alive_config {
94 /* Maps to the LTTNG_RELAYD_TCP_KEEP_ALIVE_ENV environment variable. */
95 bool enabled;
96 /*
97 * Maps to the LTTNG_RELAYD_TCP_KEEP_ALIVE_IDLE_TIME_ENV environment
98 * variable.
99 */
100 int idle_time;
101 /*
102 * Maps to the LTTNG_RELAYD_TCP_KEEP_ALIVE_PROBE_INTERVAL_ENV
103 * environment variable.
104 */
105 int probe_interval;
106 /*
107 * Maps to the LTTNG_RELAYD_TCP_KEEP_ALIVE_MAX_PROBE_COUNT_ENV
108 * environment variable.
109 */
110 int max_probe_count;
111 /*
112 * Maps to the LTTNG_RELAYD_TCP_KEEP_ALIVE_ABORT_THRESHOLD_ENV
113 * environment variable.
114 */
115 int abort_threshold;
116 };
117
118 static struct tcp_keep_alive_config config = {
119 .enabled = false,
120 .idle_time = -1,
121 .probe_interval = -1,
122 .max_probe_count = -1,
123 .abort_threshold = -1
124 };
125
126 static struct tcp_keep_alive_support support = {
127 .supported = false,
128 .idle_time_supported = false,
129 .probe_interval_supported = false,
130 .max_probe_count_supported = false,
131 .abort_threshold_supported = false
132 };
133
134 /*
135 * Common parser for string to positive int conversion where the value must be
136 * in range [-1, INT_MAX].
137 *
138 * Returns -2 on invalid value.
139 */
140 static
141 int get_env_int(const char *env_var,
142 const char *value)
143 {
144 int ret;
145 long tmp;
146 char *endptr = NULL;
147
148 errno = 0;
149 tmp = strtol(value, &endptr, 0);
150 if (errno != 0) {
151 ERR("%s cannot be parsed.", env_var);
152 PERROR("errno for previous parsing failure");
153 ret = -2;
154 goto end;
155 }
156
157 if (endptr == value || *endptr != '\0') {
158 ERR("%s is not a valid number", env_var);
159 ret = -1;
160 goto end;
161 }
162
163 if (tmp < -1) {
164 ERR("%s must be greater or equal to -1", env_var);
165 ret = -2;
166 goto end;
167 }
168 if (tmp > INT_MAX){
169 ERR("%s is too big. Maximum value is %d", env_var, INT_MAX);
170 ret = -2;
171 goto end;
172 }
173
174 ret = (int) tmp;
175 end:
176 return ret;
177 }
178
179 /*
180 * Per-platform implementation of tcp_keep_alive_idle_time_modifier.
181 * Returns -2 on invalid value.
182 */
183 #ifdef __sun__
184
185 static
186 int convert_idle_time(int value)
187 {
188 int ret;
189 unsigned int tmp_ms;
190
191 if (value == -1 || value == 0) {
192 /* Use system defaults */
193 ret = value;
194 goto end;
195 }
196
197 if (value < 0) {
198 ERR("Invalid tcp keep-alive idle time (%i)", value);
199 ret = -2;
200 goto end;
201 }
202
203 /*
204 * Additional constraints for Solaris 11.
205 * Minimum 10s, maximum 10 days. Defined by
206 * https://docs.oracle.com/cd/E23824_01/html/821-1475/tcp-7p.html#REFMAN7tcp-7p
207 */
208 if ((value < SOLARIS_IDLE_TIME_MIN_S ||
209 value > SOLARIS_IDLE_TIME_MAX_S)) {
210 ERR("%s must be comprised between %d and %d inclusively on Solaris",
211 DEFAULT_LTTNG_RELAYD_TCP_KEEP_ALIVE_IDLE_TIME_ENV,
212 SOLARIS_IDLE_TIME_MIN_S,
213 SOLARIS_IDLE_TIME_MAX_S);
214 ret = -2;
215 goto end;
216 }
217
218 /* On Solaris idle time is given in milliseconds. */
219 tmp_ms = ((unsigned int) value) * MSEC_PER_SEC;
220 if ((value != 0 && (tmp_ms / ((unsigned int) value)) != MSEC_PER_SEC)
221 || tmp_ms > INT_MAX) {
222 /* Overflow. */
223 const int max_value = INT_MAX / MSEC_PER_SEC;
224
225 ERR("%s is too big: maximum supported value is %d",
226 DEFAULT_LTTNG_RELAYD_TCP_KEEP_ALIVE_IDLE_TIME_ENV,
227 max_value);
228 ret = -2;
229 goto end;
230 }
231
232 /* tmp_ms is >= 0 and <= INT_MAX. Cast is safe. */
233 ret = (int) tmp_ms;
234 end:
235 return ret;
236 }
237
238 #else /* ! defined(__sun__) */
239
240 static
241 int convert_idle_time(int value)
242 {
243 return value;
244 }
245
246 #endif /* ! defined(__sun__) */
247
248 /* Per-platform support of tcp_keep_alive functionality. */
249 #if defined (__linux__)
250
251 static
252 void tcp_keep_alive_init_support(struct tcp_keep_alive_support *support)
253 {
254 support->supported = true;
255 support->idle_time_supported = true;
256 support->probe_interval_supported = true;
257 support->max_probe_count_supported = true;
258 /* Solaris specific */
259 support->abort_threshold_supported = false;
260 }
261
262 #elif defined(__sun__) /* ! defined (__linux__) */
263
264 static
265 void tcp_keep_alive_init_support(struct tcp_keep_alive_support *support)
266 {
267 support->supported = true;
268 #ifdef TCP_KEEPALIVE_THRESHOLD
269 support->idle_time_supported = true;
270 #else
271 support->idle_time_supported = false;;
272 #endif /* TCP_KEEPALIVE_THRESHOLD */
273
274 /*
275 * Solaris does not support either tcp_keepalive_probes or
276 * tcp_keepalive_intvl.
277 * Inferring a value for TCP_KEEP_ALIVE_ABORT_THRESHOLD using
278 * (tcp_keepalive_probes * tcp_keepalive_intvl) could yield a good
279 * alternative, but Solaris does not detail the algorithm used (such as
280 * constant time retry like Linux).
281 *
282 * Ignore those settings on Solaris 11. We prefer exposing an
283 * environment variable only used on Solaris for the abort threshold.
284 */
285 support->probe_interval_supported = false;
286 support->max_probe_count_supported = false;
287 #ifdef TCP_KEEPALIVE_ABORT_THRESHOLD
288 support->abort_threshold_supported = true;
289 #else
290 support->abort_threshold_supported = false;
291 #endif /* TCP_KEEPALIVE_THRESHOLD */
292 }
293
294 #else /* ! defined(__sun__) && ! defined(__linux__) */
295
296 /* Assume nothing is supported on other platforms. */
297 static
298 void tcp_keep_alive_init_support(struct tcp_keep_alive_support *support)
299 {
300 support->supported = false;
301 support->idle_time_supported = false;
302 support->probe_interval_supported = false;
303 support->max_probe_count_supported = false;
304 support->abort_threshold_supported = false;
305 }
306
307 #endif /* ! defined(__sun__) && ! defined(__linux__) */
308
309 #ifdef __sun__
310
311 /*
312 * Solaris specific modifier for abort threshold.
313 * Return -2 on error.
314 */
315 static
316 int convert_abort_threshold(int value)
317 {
318 int ret;
319 unsigned int tmp_ms;
320
321 if (value == -1) {
322 /* Use system defaults */
323 ret = value;
324 goto end;
325 }
326
327 if (value < 0) {
328 ERR("Invalid tcp keep-alive abort threshold (%i)", value);
329 ret = -2;
330 goto end;
331 }
332
333 /*
334 * Additional constraints for Solaris 11.
335 *
336 * Between 0 and 8 minutes.
337 * https://docs.oracle.com/cd/E19120-01/open.solaris/819-2724/fsvdh/index.html
338 *
339 * Restrict from 1 seconds to 8 minutes sice the 0 value goes against
340 * the purpose of dead peers detection by never timing out when probing.
341 * It does NOT mean that the connection times out immediately.
342 */
343 if ((value < SOLARIS_ABORT_THRESHOLD_MIN_S || value > SOLARIS_ABORT_THRESHOLD_MAX_S)) {
344 ERR("%s must be comprised between %d and %d inclusively on Solaris",
345 DEFAULT_LTTNG_RELAYD_TCP_KEEP_ALIVE_ABORT_THRESHOLD_ENV,
346 SOLARIS_ABORT_THRESHOLD_MIN_S,
347 SOLARIS_ABORT_THRESHOLD_MAX_S);
348 ret = -2;
349 goto end;
350 }
351
352 /* Abort threshold is given in milliseconds. */
353 tmp_ms = ((unsigned int) value) * MSEC_PER_SEC;
354 if ((value != 0 && (tmp_ms / ((unsigned int) value)) != MSEC_PER_SEC)
355 || tmp_ms > INT_MAX) {
356 /* Overflow */
357 const int max_value = INT_MAX / MSEC_PER_SEC;
358
359 ERR("%s is too big: maximum supported value is %d",
360 DEFAULT_LTTNG_RELAYD_TCP_KEEP_ALIVE_ABORT_THRESHOLD_ENV,
361 max_value);
362 ret = -2;
363 goto end;
364 }
365
366 /* tmp_ms is >= 0 and <= INT_MAX. Cast is safe. */
367 ret = (int) tmp_ms;
368 end:
369 return ret;
370 }
371
372 #else
373
374 static
375 int convert_abort_threshold(int value)
376 {
377 return value;
378 }
379
380 #endif /* defined (__sun__) */
381
382 /*
383 * Retrieve settings from environment variables and warn for settings not
384 * supported by the platform.
385 */
386 static
387 int tcp_keep_alive_init_config(struct tcp_keep_alive_support *support,
388 struct tcp_keep_alive_config *config)
389 {
390 int ret;
391 const char *value;
392
393 value = lttng_secure_getenv(DEFAULT_LTTNG_RELAYD_TCP_KEEP_ALIVE_ENV);
394 if (!support->supported) {
395 if (value) {
396 WARN("Using per-socket TCP keep-alive mechanism is not supported by this platform. Ignoring the %s environment variable.",
397 DEFAULT_LTTNG_RELAYD_TCP_KEEP_ALIVE_ENV);
398 }
399 config->enabled = false;
400 } else if (value) {
401 ret = config_parse_value(value);
402 if (ret < 0 || ret > 1) {
403 ERR("Invalid value for %s", DEFAULT_LTTNG_RELAYD_TCP_KEEP_ALIVE_ENV);
404 ret = 1;
405 goto error;
406 }
407 config->enabled = ret;
408 }
409 DBG("TCP keep-alive mechanism %s", config->enabled ? "enabled": "disabled");
410
411 /* Get value for tcp_keepalive_time in seconds. */
412 value = lttng_secure_getenv(DEFAULT_LTTNG_RELAYD_TCP_KEEP_ALIVE_IDLE_TIME_ENV);
413 if (!support->idle_time_supported && value) {
414 WARN("Overriding the TCP keep-alive idle time threshold per-socket is not supported by this platform. Ignoring the %s environment variable.",
415 DEFAULT_LTTNG_RELAYD_TCP_KEEP_ALIVE_IDLE_TIME_ENV);
416 config->idle_time = -1;
417 } else if (value) {
418 int idle_time_platform;
419 int idle_time_seconds;
420
421 idle_time_seconds = get_env_int(
422 DEFAULT_LTTNG_RELAYD_TCP_KEEP_ALIVE_IDLE_TIME_ENV,
423 value);
424 if (idle_time_seconds < -1) {
425 ret = 1;
426 goto error;
427 }
428
429 idle_time_platform = convert_idle_time(idle_time_seconds);
430 if (idle_time_platform < -1) {
431 ret = 1;
432 goto error;
433 }
434
435 config->idle_time = idle_time_platform;
436 DBG("Overriding %s to %d",
437 DEFAULT_LTTNG_RELAYD_TCP_KEEP_ALIVE_IDLE_TIME_ENV,
438 idle_time_seconds);
439 }
440
441 /* Get value for tcp_keepalive_intvl in seconds. */
442 value = lttng_secure_getenv(
443 DEFAULT_LTTNG_RELAYD_TCP_KEEP_ALIVE_PROBE_INTERVAL_ENV);
444 if (!support->probe_interval_supported && value) {
445 WARN("Overriding the TCP keep-alive probe interval time per-socket is not supported by this platform. Ignoring the %s environment variable.",
446 DEFAULT_LTTNG_RELAYD_TCP_KEEP_ALIVE_PROBE_INTERVAL_ENV);
447 config->probe_interval = -1;
448 } else if (value) {
449 int probe_interval;
450
451 probe_interval = get_env_int(DEFAULT_LTTNG_RELAYD_TCP_KEEP_ALIVE_PROBE_INTERVAL_ENV,
452 value);
453 if (probe_interval < -1) {
454 ret = 1;
455 goto error;
456 }
457
458 config->probe_interval = probe_interval;
459 DBG("Overriding %s to %d",
460 DEFAULT_LTTNG_RELAYD_TCP_KEEP_ALIVE_PROBE_INTERVAL_ENV,
461 config->probe_interval);
462 }
463
464 /* Get value for tcp_keepalive_probes. */
465 value = lttng_secure_getenv(DEFAULT_LTTNG_RELAYD_TCP_KEEP_ALIVE_MAX_PROBE_COUNT_ENV);
466 if (!support->max_probe_count_supported && value) {
467 WARN("Overriding the TCP keep-alive maximum probe count per-socket is not supported by this platform. Ignoring the %s environment variable.",
468 DEFAULT_LTTNG_RELAYD_TCP_KEEP_ALIVE_MAX_PROBE_COUNT_ENV);
469 config->max_probe_count = -1;
470 } else if (value) {
471 int max_probe_count;
472
473 max_probe_count = get_env_int(DEFAULT_LTTNG_RELAYD_TCP_KEEP_ALIVE_MAX_PROBE_COUNT_ENV,
474 value);
475 if (max_probe_count < -1) {
476 ret = 1;
477 goto error;
478 }
479
480 config->max_probe_count = max_probe_count;
481 DBG("Overriding %s to %d",
482 DEFAULT_LTTNG_RELAYD_TCP_KEEP_ALIVE_MAX_PROBE_COUNT_ENV,
483 config->max_probe_count);
484 }
485
486 /* Get value for tcp_keepalive_abort_interval. */
487 value = lttng_secure_getenv(
488 DEFAULT_LTTNG_RELAYD_TCP_KEEP_ALIVE_ABORT_THRESHOLD_ENV);
489 if (!support->abort_threshold_supported && value) {
490 WARN("Overriding the TCP keep-alive abort threshold per-socket is not supported by this platform. Ignoring the %s environment variable.",
491 DEFAULT_LTTNG_RELAYD_TCP_KEEP_ALIVE_ABORT_THRESHOLD_ENV);
492 config->abort_threshold = -1;
493 } else if (value) {
494 int abort_threshold_platform;
495 int abort_threshold_seconds;
496
497 abort_threshold_seconds = get_env_int(
498 DEFAULT_LTTNG_RELAYD_TCP_KEEP_ALIVE_MAX_PROBE_COUNT_ENV,
499 value);
500 if (abort_threshold_seconds < -1) {
501 ret = 1;
502 goto error;
503 }
504
505 abort_threshold_platform = convert_abort_threshold(
506 abort_threshold_seconds);
507 if (abort_threshold_platform < -1) {
508 ret = 1;
509 goto error;
510 }
511
512 config->abort_threshold = abort_threshold_platform;
513 DBG("Overriding %s to %d",
514 DEFAULT_LTTNG_RELAYD_TCP_KEEP_ALIVE_ABORT_THRESHOLD_ENV,
515 config->abort_threshold);
516 }
517
518 ret = 0;
519
520 error:
521 return ret;
522 }
523
524 /* Initialize the TCP keep-alive configuration. */
525 __attribute__((constructor)) static
526 int tcp_keep_alive_init(void)
527 {
528 tcp_keep_alive_init_support(&support);
529 return tcp_keep_alive_init_config(&support, &config);
530 }
531
532 /*
533 * Set the socket options regarding TCP keep-alive.
534 */
535 LTTNG_HIDDEN
536 int socket_apply_keep_alive_config(int socket_fd)
537 {
538 int ret;
539 int val = 1;
540
541 /* TCP keep-alive */
542 if (!support.supported || !config.enabled ) {
543 ret = 0;
544 goto end;
545 }
546
547 DBG("TCP keep-alive enabled for socket %d", socket_fd);
548 ret = setsockopt(socket_fd, SOL_SOCKET, SO_KEEPALIVE, &val,
549 sizeof(val));
550 if (ret < 0) {
551 PERROR("setsockopt so_keepalive");
552 goto end;
553 }
554
555 /* TCP keep-alive idle time */
556 if (support.idle_time_supported && config.idle_time > 0) {
557 DBG("TCP keep-alive keep idle: %d enabled for socket %d", config.idle_time, socket_fd);
558 ret = setsockopt(socket_fd, COMPAT_TCP_LEVEL, COMPAT_TCP_KEEPIDLE, &config.idle_time,
559 sizeof(config.idle_time));
560 if (ret < 0) {
561 PERROR("setsockopt TCP_KEEPIDLE");
562 goto end;
563 }
564 }
565 /* TCP keep-alive probe interval */
566 if (support.probe_interval_supported && config.probe_interval > 0) {
567 DBG("TCP keep-alive probe_interval: %d enabled for socket %d", config.probe_interval, socket_fd);
568 ret = setsockopt(socket_fd, COMPAT_TCP_LEVEL, COMPAT_TCP_KEEPINTVL, &config.probe_interval,
569 sizeof(config.probe_interval));
570 if (ret < 0) {
571 PERROR("setsockopt TCP_KEEPINTVL");
572 goto end;
573 }
574 }
575
576 /* TCP keep-alive max probe count */
577 if (support.max_probe_count_supported && config.max_probe_count > 0) {
578 DBG("TCP keep-alive max_probe: %d enabled for socket %d", config.max_probe_count, socket_fd);
579 ret = setsockopt(socket_fd, COMPAT_TCP_LEVEL, COMPAT_TCP_KEEPCNT, &config.max_probe_count,
580 sizeof(config.max_probe_count));
581 if (ret < 0) {
582 PERROR("setsockopt TCP_KEEPCNT");
583 goto end;
584 }
585 }
586
587 /* TCP keep-alive abort threshold */
588 if (support.abort_threshold_supported && config.abort_threshold > 0) {
589 DBG("TCP keep-alive abort threshold: %d enabled for socket %d", config.abort_threshold, socket_fd);
590 ret = setsockopt(socket_fd, COMPAT_TCP_LEVEL, COMPAT_TCP_ABORT_THRESHOLD, &config.abort_threshold,
591 sizeof(config.max_probe_count));
592 if (ret < 0) {
593 PERROR("setsockopt TCP_KEEPALIVE_ABORT_THRESHOLD");
594 goto end;
595 }
596 }
597 end:
598 return ret;
599 }
This page took 0.040536 seconds and 4 git commands to generate.