Fix: Handle SIGBUS in sessiond and consumerd
[lttng-tools.git] / src / bin / lttng-consumerd / lttng-consumerd.c
1 /*
2 * Copyright (C) 2011 Julien Desfossez <julien.desfossez@polymtl.ca>
3 * Copyright (C) 2011 Mathieu Desnoyers <mathieu.desnoyers@efficios.com>
4 *
5 * SPDX-License-Identifier: GPL-2.0-only
6 *
7 */
8
9 #define _LGPL_SOURCE
10 #include <fcntl.h>
11 #include <getopt.h>
12 #include <grp.h>
13 #include <limits.h>
14 #include <pthread.h>
15 #include <signal.h>
16 #include <stdio.h>
17 #include <stdlib.h>
18 #include <string.h>
19 #include <sys/ipc.h>
20 #include <sys/resource.h>
21 #include <sys/shm.h>
22 #include <sys/socket.h>
23 #include <sys/stat.h>
24 #include <sys/types.h>
25 #include <urcu/list.h>
26 #include <poll.h>
27 #include <unistd.h>
28 #include <sys/mman.h>
29 #include <assert.h>
30 #include <urcu/compiler.h>
31 #include <ulimit.h>
32
33 #include <common/defaults.h>
34 #include <common/common.h>
35 #include <common/consumer/consumer.h>
36 #include <common/consumer/consumer-timer.h>
37 #include <common/compat/poll.h>
38 #include <common/compat/getenv.h>
39 #include <common/sessiond-comm/sessiond-comm.h>
40 #include <common/utils.h>
41
42 #include "lttng-consumerd.h"
43 #include "health-consumerd.h"
44
45 /* threads (channel handling, poll, metadata, sessiond) */
46
47 static pthread_t channel_thread, data_thread, metadata_thread,
48 sessiond_thread, metadata_timer_thread, health_thread;
49 static bool metadata_timer_thread_online;
50
51 /* to count the number of times the user pressed ctrl+c */
52 static int sigintcount = 0;
53
54 /* Argument variables */
55 int lttng_opt_quiet; /* not static in error.h */
56 int lttng_opt_verbose; /* not static in error.h */
57 int lttng_opt_mi; /* not static in error.h */
58
59 static int opt_daemon;
60 static const char *progname;
61 static char command_sock_path[PATH_MAX]; /* Global command socket path */
62 static char error_sock_path[PATH_MAX]; /* Global error path */
63 static enum lttng_consumer_type opt_type = LTTNG_CONSUMER_KERNEL;
64
65 /* the liblttngconsumerd context */
66 static struct lttng_consumer_local_data *ctx;
67
68 /* Consumerd health monitoring */
69 struct health_app *health_consumerd;
70
71 const char *tracing_group_name = DEFAULT_TRACING_GROUP;
72
73 int lttng_consumer_ready = NR_LTTNG_CONSUMER_READY;
74
75 enum lttng_consumer_type lttng_consumer_get_type(void)
76 {
77 if (!ctx) {
78 return LTTNG_CONSUMER_UNKNOWN;
79 }
80 return ctx->type;
81 }
82
83 /*
84 * Signal handler for the daemon
85 */
86 static void sighandler(int sig, siginfo_t *siginfo, void *arg)
87 {
88 if (sig == SIGINT && sigintcount++ == 0) {
89 DBG("ignoring first SIGINT");
90 return;
91 }
92
93 if (sig == SIGBUS) {
94 int write_ret;
95 const char msg[] = "Received SIGBUS, aborting program.\n";
96
97 lttng_consumer_sigbus_handle(siginfo->si_addr);
98 /*
99 * If ustctl did not catch this signal (triggering a
100 * siglongjmp), abort the program. Otherwise, the execution
101 * will resume from the ust-ctl call which caused this error.
102 *
103 * The return value is ignored since the program aborts anyhow.
104 */
105 write_ret = write(STDERR_FILENO, msg, sizeof(msg));
106 (void) write_ret;
107 abort();
108 }
109
110 if (ctx) {
111 lttng_consumer_should_exit(ctx);
112 }
113 }
114
115 /*
116 * Setup signal handler for :
117 * SIGINT, SIGTERM, SIGPIPE, SIGBUS
118 */
119 static int set_signal_handler(void)
120 {
121 int ret = 0;
122 struct sigaction sa;
123 sigset_t sigset;
124
125 if ((ret = sigemptyset(&sigset)) < 0) {
126 PERROR("sigemptyset");
127 return ret;
128 }
129
130 sa.sa_mask = sigset;
131 sa.sa_flags = SA_SIGINFO;
132
133 sa.sa_sigaction = sighandler;
134 if ((ret = sigaction(SIGTERM, &sa, NULL)) < 0) {
135 PERROR("sigaction");
136 return ret;
137 }
138
139 if ((ret = sigaction(SIGINT, &sa, NULL)) < 0) {
140 PERROR("sigaction");
141 return ret;
142 }
143
144 if ((ret = sigaction(SIGBUS, &sa, NULL)) < 0) {
145 PERROR("sigaction");
146 return ret;
147 }
148
149 sa.sa_flags = 0;
150 sa.sa_handler = SIG_IGN;
151 if ((ret = sigaction(SIGPIPE, &sa, NULL)) < 0) {
152 PERROR("sigaction");
153 return ret;
154 }
155
156 return ret;
157 }
158
159 /*
160 * Usage function on stream file.
161 */
162 static void usage(FILE *fp)
163 {
164 fprintf(fp, "Usage: %s OPTIONS\n\nOptions:\n", progname);
165 fprintf(fp, " -h, --help "
166 "Display this usage.\n");
167 fprintf(fp, " -c, --consumerd-cmd-sock PATH "
168 "Specify path for the command socket\n");
169 fprintf(fp, " -e, --consumerd-err-sock PATH "
170 "Specify path for the error socket\n");
171 fprintf(fp, " -d, --daemonize "
172 "Start as a daemon.\n");
173 fprintf(fp, " -q, --quiet "
174 "No output at all.\n");
175 fprintf(fp, " -v, --verbose "
176 "Verbose mode. Activate DBG() macro.\n");
177 fprintf(fp, " -V, --version "
178 "Show version number.\n");
179 fprintf(fp, " -g, --group NAME "
180 "Specify the tracing group name. (default: tracing)\n");
181 fprintf(fp, " -k, --kernel "
182 "Consumer kernel buffers (default).\n");
183 fprintf(fp, " -u, --ust "
184 "Consumer UST buffers.%s\n",
185 #ifdef HAVE_LIBLTTNG_UST_CTL
186 ""
187 #else
188 " (support not compiled in)"
189 #endif
190 );
191 }
192
193 /*
194 * daemon argument parsing
195 */
196 static int parse_args(int argc, char **argv)
197 {
198 int c, ret = 0;
199
200 static struct option long_options[] = {
201 { "consumerd-cmd-sock", 1, 0, 'c' },
202 { "consumerd-err-sock", 1, 0, 'e' },
203 { "daemonize", 0, 0, 'd' },
204 { "group", 1, 0, 'g' },
205 { "help", 0, 0, 'h' },
206 { "quiet", 0, 0, 'q' },
207 { "verbose", 0, 0, 'v' },
208 { "version", 0, 0, 'V' },
209 { "kernel", 0, 0, 'k' },
210 #ifdef HAVE_LIBLTTNG_UST_CTL
211 { "ust", 0, 0, 'u' },
212 #endif
213 { NULL, 0, 0, 0 }
214 };
215
216 while (1) {
217 int option_index = 0;
218 c = getopt_long(argc, argv, "dhqvVku" "c:e:g:",
219 long_options, &option_index);
220 if (c == -1) {
221 break;
222 }
223
224 switch (c) {
225 case 0:
226 fprintf(stderr, "option %s",
227 long_options[option_index].name);
228 if (optarg) {
229 fprintf(stderr, " with arg %s\n", optarg);
230 ret = -1;
231 goto end;
232 }
233 break;
234 case 'c':
235 if (lttng_is_setuid_setgid()) {
236 WARN("Getting '%s' argument from setuid/setgid binary refused for security reasons.",
237 "-c, --consumerd-cmd-sock");
238 } else {
239 snprintf(command_sock_path, PATH_MAX, "%s", optarg);
240 }
241 break;
242 case 'e':
243 if (lttng_is_setuid_setgid()) {
244 WARN("Getting '%s' argument from setuid/setgid binary refused for security reasons.",
245 "-e, --consumerd-err-sock");
246 } else {
247 snprintf(error_sock_path, PATH_MAX, "%s", optarg);
248 }
249 break;
250 case 'd':
251 opt_daemon = 1;
252 break;
253 case 'g':
254 if (lttng_is_setuid_setgid()) {
255 WARN("Getting '%s' argument from setuid/setgid binary refused for security reasons.",
256 "-g, --group");
257 } else {
258 tracing_group_name = optarg;
259 }
260 break;
261 case 'h':
262 usage(stdout);
263 exit(EXIT_SUCCESS);
264 case 'q':
265 lttng_opt_quiet = 1;
266 break;
267 case 'v':
268 lttng_opt_verbose = 3;
269 break;
270 case 'V':
271 fprintf(stdout, "%s\n", VERSION);
272 exit(EXIT_SUCCESS);
273 case 'k':
274 opt_type = LTTNG_CONSUMER_KERNEL;
275 break;
276 #ifdef HAVE_LIBLTTNG_UST_CTL
277 case 'u':
278 # if (CAA_BITS_PER_LONG == 64)
279 opt_type = LTTNG_CONSUMER64_UST;
280 # elif (CAA_BITS_PER_LONG == 32)
281 opt_type = LTTNG_CONSUMER32_UST;
282 # else
283 # error "Unknown bitness"
284 # endif
285 break;
286 #endif
287 default:
288 usage(stderr);
289 ret = -1;
290 goto end;
291 }
292 }
293 end:
294 return ret;
295 }
296
297 /*
298 * Set open files limit to unlimited. This daemon can open a large number of
299 * file descriptors in order to consumer multiple kernel traces.
300 */
301 static void set_ulimit(void)
302 {
303 int ret;
304 struct rlimit lim;
305
306 /* The kernel does not allowed an infinite limit for open files */
307 lim.rlim_cur = 65535;
308 lim.rlim_max = 65535;
309
310 ret = setrlimit(RLIMIT_NOFILE, &lim);
311 if (ret < 0) {
312 PERROR("failed to set open files limit");
313 }
314 }
315
316 /*
317 * main
318 */
319 int main(int argc, char **argv)
320 {
321 int ret = 0, retval = 0;
322 void *status;
323 struct lttng_consumer_local_data *tmp_ctx;
324
325 rcu_register_thread();
326
327 if (run_as_create_worker(argv[0], NULL, NULL) < 0) {
328 goto exit_set_signal_handler;
329 }
330
331 if (set_signal_handler()) {
332 retval = -1;
333 goto exit_set_signal_handler;
334 }
335
336 /* Parse arguments */
337 progname = argv[0];
338 if (parse_args(argc, argv)) {
339 retval = -1;
340 goto exit_options;
341 }
342
343 /* Daemonize */
344 if (opt_daemon) {
345 int i;
346
347 /*
348 * fork
349 * child: setsid, close FD 0, 1, 2, chdir /
350 * parent: exit (if fork is successful)
351 */
352 ret = daemon(0, 0);
353 if (ret < 0) {
354 PERROR("daemon");
355 retval = -1;
356 goto exit_options;
357 }
358 /*
359 * We are in the child. Make sure all other file
360 * descriptors are closed, in case we are called with
361 * more opened file descriptors than the standard ones.
362 */
363 for (i = 3; i < sysconf(_SC_OPEN_MAX); i++) {
364 (void) close(i);
365 }
366 }
367
368 /*
369 * Starting from here, we can create threads. This needs to be after
370 * lttng_daemonize due to RCU.
371 */
372
373 health_consumerd = health_app_create(NR_HEALTH_CONSUMERD_TYPES);
374 if (!health_consumerd) {
375 retval = -1;
376 goto exit_health_consumerd_cleanup;
377 }
378
379 if (*command_sock_path == '\0') {
380 switch (opt_type) {
381 case LTTNG_CONSUMER_KERNEL:
382 ret = snprintf(command_sock_path, PATH_MAX,
383 DEFAULT_KCONSUMERD_CMD_SOCK_PATH,
384 DEFAULT_LTTNG_RUNDIR);
385 if (ret < 0) {
386 retval = -1;
387 goto exit_init_data;
388 }
389 break;
390 case LTTNG_CONSUMER64_UST:
391 ret = snprintf(command_sock_path, PATH_MAX,
392 DEFAULT_USTCONSUMERD64_CMD_SOCK_PATH,
393 DEFAULT_LTTNG_RUNDIR);
394 if (ret < 0) {
395 retval = -1;
396 goto exit_init_data;
397 }
398 break;
399 case LTTNG_CONSUMER32_UST:
400 ret = snprintf(command_sock_path, PATH_MAX,
401 DEFAULT_USTCONSUMERD32_CMD_SOCK_PATH,
402 DEFAULT_LTTNG_RUNDIR);
403 if (ret < 0) {
404 retval = -1;
405 goto exit_init_data;
406 }
407 break;
408 default:
409 ERR("Unknown consumerd type");
410 retval = -1;
411 goto exit_init_data;
412 }
413 }
414
415 /* Init */
416 if (lttng_consumer_init()) {
417 retval = -1;
418 goto exit_init_data;
419 }
420
421 /* Initialize communication library */
422 lttcomm_init();
423 /* Initialize TCP timeout values */
424 lttcomm_inet_init();
425
426 if (!getuid()) {
427 /* Set limit for open files */
428 set_ulimit();
429 }
430
431 /* create the consumer instance with and assign the callbacks */
432 ctx = lttng_consumer_create(opt_type, lttng_consumer_read_subbuffer,
433 NULL, lttng_consumer_on_recv_stream, NULL);
434 if (!ctx) {
435 retval = -1;
436 goto exit_init_data;
437 }
438
439 lttng_consumer_set_command_sock_path(ctx, command_sock_path);
440 if (*error_sock_path == '\0') {
441 switch (opt_type) {
442 case LTTNG_CONSUMER_KERNEL:
443 ret = snprintf(error_sock_path, PATH_MAX,
444 DEFAULT_KCONSUMERD_ERR_SOCK_PATH,
445 DEFAULT_LTTNG_RUNDIR);
446 if (ret < 0) {
447 retval = -1;
448 goto exit_init_data;
449 }
450 break;
451 case LTTNG_CONSUMER64_UST:
452 ret = snprintf(error_sock_path, PATH_MAX,
453 DEFAULT_USTCONSUMERD64_ERR_SOCK_PATH,
454 DEFAULT_LTTNG_RUNDIR);
455 if (ret < 0) {
456 retval = -1;
457 goto exit_init_data;
458 }
459 break;
460 case LTTNG_CONSUMER32_UST:
461 ret = snprintf(error_sock_path, PATH_MAX,
462 DEFAULT_USTCONSUMERD32_ERR_SOCK_PATH,
463 DEFAULT_LTTNG_RUNDIR);
464 if (ret < 0) {
465 retval = -1;
466 goto exit_init_data;
467 }
468 break;
469 default:
470 ERR("Unknown consumerd type");
471 retval = -1;
472 goto exit_init_data;
473 }
474 }
475
476 /* Connect to the socket created by lttng-sessiond to report errors */
477 DBG("Connecting to error socket %s", error_sock_path);
478 ret = lttcomm_connect_unix_sock(error_sock_path);
479 /*
480 * Not a fatal error, but all communication with lttng-sessiond will
481 * fail.
482 */
483 if (ret < 0) {
484 WARN("Cannot connect to error socket (is lttng-sessiond started?)");
485 }
486 lttng_consumer_set_error_sock(ctx, ret);
487
488 /*
489 * Block RT signals used for UST periodical metadata flush and the live
490 * timer in main, and create a dedicated thread to handle these signals.
491 */
492 if (consumer_signal_init()) {
493 retval = -1;
494 goto exit_init_data;
495 }
496
497 ctx->type = opt_type;
498
499 if (utils_create_pipe(health_quit_pipe)) {
500 retval = -1;
501 goto exit_health_pipe;
502 }
503
504 /* Create thread to manage the client socket */
505 ret = pthread_create(&health_thread, default_pthread_attr(),
506 thread_manage_health, (void *) NULL);
507 if (ret) {
508 errno = ret;
509 PERROR("pthread_create health");
510 retval = -1;
511 goto exit_health_thread;
512 }
513
514 /*
515 * Wait for health thread to be initialized before letting the
516 * sessiond thread reply to the sessiond that we are ready.
517 */
518 while (uatomic_read(&lttng_consumer_ready)) {
519 usleep(100000);
520 }
521 cmm_smp_mb(); /* Read ready before following operations */
522
523 /*
524 * Create the thread to manage the UST metadata periodic timer and
525 * live timer.
526 */
527 ret = pthread_create(&metadata_timer_thread, NULL,
528 consumer_timer_thread, (void *) ctx);
529 if (ret) {
530 errno = ret;
531 PERROR("pthread_create");
532 retval = -1;
533 goto exit_metadata_timer_thread;
534 }
535 metadata_timer_thread_online = true;
536
537 /* Create thread to manage channels */
538 ret = pthread_create(&channel_thread, default_pthread_attr(),
539 consumer_thread_channel_poll,
540 (void *) ctx);
541 if (ret) {
542 errno = ret;
543 PERROR("pthread_create");
544 retval = -1;
545 goto exit_channel_thread;
546 }
547
548 /* Create thread to manage the polling/writing of trace metadata */
549 ret = pthread_create(&metadata_thread, default_pthread_attr(),
550 consumer_thread_metadata_poll,
551 (void *) ctx);
552 if (ret) {
553 errno = ret;
554 PERROR("pthread_create");
555 retval = -1;
556 goto exit_metadata_thread;
557 }
558
559 /* Create thread to manage the polling/writing of trace data */
560 ret = pthread_create(&data_thread, default_pthread_attr(),
561 consumer_thread_data_poll, (void *) ctx);
562 if (ret) {
563 errno = ret;
564 PERROR("pthread_create");
565 retval = -1;
566 goto exit_data_thread;
567 }
568
569 /* Create the thread to manage the reception of fds */
570 ret = pthread_create(&sessiond_thread, default_pthread_attr(),
571 consumer_thread_sessiond_poll,
572 (void *) ctx);
573 if (ret) {
574 errno = ret;
575 PERROR("pthread_create");
576 retval = -1;
577 goto exit_sessiond_thread;
578 }
579
580
581 /*
582 * This is where we start awaiting program completion (e.g. through
583 * signal that asks threads to teardown.
584 */
585
586 ret = pthread_join(sessiond_thread, &status);
587 if (ret) {
588 errno = ret;
589 PERROR("pthread_join sessiond_thread");
590 retval = -1;
591 }
592 exit_sessiond_thread:
593
594 ret = pthread_join(data_thread, &status);
595 if (ret) {
596 errno = ret;
597 PERROR("pthread_join data_thread");
598 retval = -1;
599 }
600 exit_data_thread:
601
602 ret = pthread_join(metadata_thread, &status);
603 if (ret) {
604 errno = ret;
605 PERROR("pthread_join metadata_thread");
606 retval = -1;
607 }
608 exit_metadata_thread:
609
610 ret = pthread_join(channel_thread, &status);
611 if (ret) {
612 errno = ret;
613 PERROR("pthread_join channel_thread");
614 retval = -1;
615 }
616 exit_channel_thread:
617
618 exit_metadata_timer_thread:
619
620 ret = pthread_join(health_thread, &status);
621 if (ret) {
622 errno = ret;
623 PERROR("pthread_join health_thread");
624 retval = -1;
625 }
626 exit_health_thread:
627
628 utils_close_pipe(health_quit_pipe);
629 exit_health_pipe:
630
631 exit_init_data:
632 /*
633 * Wait for all pending call_rcu work to complete before tearing
634 * down data structures. call_rcu worker may be trying to
635 * perform lookups in those structures.
636 */
637 rcu_barrier();
638 lttng_consumer_cleanup();
639 /*
640 * Tearing down the metadata timer thread in a
641 * non-fully-symmetric fashion compared to its creation in case
642 * lttng_consumer_cleanup() ends up tearing down timers (which
643 * requires the timer thread to be alive).
644 */
645 if (metadata_timer_thread_online) {
646 /*
647 * Ensure the metadata timer thread exits only after all other
648 * threads are gone, because it is required to perform timer
649 * teardown synchronization.
650 */
651 kill(getpid(), LTTNG_CONSUMER_SIG_EXIT);
652 ret = pthread_join(metadata_timer_thread, &status);
653 if (ret) {
654 errno = ret;
655 PERROR("pthread_join metadata_timer_thread");
656 retval = -1;
657 }
658 ret = consumer_timer_thread_get_channel_monitor_pipe();
659 if (ret >= 0) {
660 ret = close(ret);
661 if (ret) {
662 PERROR("close channel monitor pipe");
663 }
664 }
665 metadata_timer_thread_online = false;
666 }
667 tmp_ctx = ctx;
668 ctx = NULL;
669 cmm_barrier(); /* Clear ctx for signal handler. */
670 lttng_consumer_destroy(tmp_ctx);
671
672 if (health_consumerd) {
673 health_app_destroy(health_consumerd);
674 }
675 /* Ensure all prior call_rcu are done. */
676 rcu_barrier();
677
678 run_as_destroy_worker();
679
680 exit_health_consumerd_cleanup:
681 exit_options:
682 exit_set_signal_handler:
683
684 rcu_unregister_thread();
685
686 if (!retval) {
687 exit(EXIT_SUCCESS);
688 } else {
689 exit(EXIT_FAILURE);
690 }
691 }
This page took 0.043783 seconds and 5 git commands to generate.