Fix: close channel monitor pipe after killing the metadata_timer_thread
[lttng-tools.git] / src / bin / lttng-consumerd / lttng-consumerd.c
1 /*
2 * Copyright (C) 2011 - Julien Desfossez <julien.desfossez@polymtl.ca>
3 * Mathieu Desnoyers <mathieu.desnoyers@efficios.com>
4 *
5 * This program is free software; you can redistribute it and/or modify
6 * it under the terms of the GNU General Public License, version 2 only,
7 * as published by the Free Software Foundation.
8 *
9 * This program is distributed in the hope that it will be useful,
10 * but WITHOUT ANY WARRANTY; without even the implied warranty of
11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 * GNU General Public License for more details.
13 *
14 * You should have received a copy of the GNU General Public License along
15 * with this program; if not, write to the Free Software Foundation, Inc.,
16 * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
17 */
18
19 #define _LGPL_SOURCE
20 #include <fcntl.h>
21 #include <getopt.h>
22 #include <grp.h>
23 #include <limits.h>
24 #include <pthread.h>
25 #include <signal.h>
26 #include <stdio.h>
27 #include <stdlib.h>
28 #include <string.h>
29 #include <sys/ipc.h>
30 #include <sys/resource.h>
31 #include <sys/shm.h>
32 #include <sys/socket.h>
33 #include <sys/stat.h>
34 #include <sys/types.h>
35 #include <urcu/list.h>
36 #include <poll.h>
37 #include <unistd.h>
38 #include <sys/mman.h>
39 #include <assert.h>
40 #include <urcu/compiler.h>
41 #include <ulimit.h>
42
43 #include <common/defaults.h>
44 #include <common/common.h>
45 #include <common/consumer/consumer.h>
46 #include <common/consumer/consumer-timer.h>
47 #include <common/compat/poll.h>
48 #include <common/compat/getenv.h>
49 #include <common/sessiond-comm/sessiond-comm.h>
50 #include <common/utils.h>
51
52 #include "lttng-consumerd.h"
53 #include "health-consumerd.h"
54
55 /* threads (channel handling, poll, metadata, sessiond) */
56
57 static pthread_t channel_thread, data_thread, metadata_thread,
58 sessiond_thread, metadata_timer_thread, health_thread;
59 static bool metadata_timer_thread_online;
60
61 /* to count the number of times the user pressed ctrl+c */
62 static int sigintcount = 0;
63
64 /* Argument variables */
65 int lttng_opt_quiet; /* not static in error.h */
66 int lttng_opt_verbose; /* not static in error.h */
67 int lttng_opt_mi; /* not static in error.h */
68
69 static int opt_daemon;
70 static const char *progname;
71 static char command_sock_path[PATH_MAX]; /* Global command socket path */
72 static char error_sock_path[PATH_MAX]; /* Global error path */
73 static enum lttng_consumer_type opt_type = LTTNG_CONSUMER_KERNEL;
74
75 /* the liblttngconsumerd context */
76 static struct lttng_consumer_local_data *ctx;
77
78 /* Consumerd health monitoring */
79 struct health_app *health_consumerd;
80
81 const char *tracing_group_name = DEFAULT_TRACING_GROUP;
82
83 int lttng_consumer_ready = NR_LTTNG_CONSUMER_READY;
84
85 enum lttng_consumer_type lttng_consumer_get_type(void)
86 {
87 if (!ctx) {
88 return LTTNG_CONSUMER_UNKNOWN;
89 }
90 return ctx->type;
91 }
92
93 /*
94 * Signal handler for the daemon
95 */
96 static void sighandler(int sig)
97 {
98 if (sig == SIGINT && sigintcount++ == 0) {
99 DBG("ignoring first SIGINT");
100 return;
101 }
102
103 if (ctx) {
104 lttng_consumer_should_exit(ctx);
105 }
106 }
107
108 /*
109 * Setup signal handler for :
110 * SIGINT, SIGTERM, SIGPIPE
111 */
112 static int set_signal_handler(void)
113 {
114 int ret = 0;
115 struct sigaction sa;
116 sigset_t sigset;
117
118 if ((ret = sigemptyset(&sigset)) < 0) {
119 PERROR("sigemptyset");
120 return ret;
121 }
122
123 sa.sa_mask = sigset;
124 sa.sa_flags = 0;
125
126 sa.sa_handler = sighandler;
127 if ((ret = sigaction(SIGTERM, &sa, NULL)) < 0) {
128 PERROR("sigaction");
129 return ret;
130 }
131
132 if ((ret = sigaction(SIGINT, &sa, NULL)) < 0) {
133 PERROR("sigaction");
134 return ret;
135 }
136
137 sa.sa_handler = SIG_IGN;
138 if ((ret = sigaction(SIGPIPE, &sa, NULL)) < 0) {
139 PERROR("sigaction");
140 return ret;
141 }
142
143 return ret;
144 }
145
146 /*
147 * Usage function on stream file.
148 */
149 static void usage(FILE *fp)
150 {
151 fprintf(fp, "Usage: %s OPTIONS\n\nOptions:\n", progname);
152 fprintf(fp, " -h, --help "
153 "Display this usage.\n");
154 fprintf(fp, " -c, --consumerd-cmd-sock PATH "
155 "Specify path for the command socket\n");
156 fprintf(fp, " -e, --consumerd-err-sock PATH "
157 "Specify path for the error socket\n");
158 fprintf(fp, " -d, --daemonize "
159 "Start as a daemon.\n");
160 fprintf(fp, " -q, --quiet "
161 "No output at all.\n");
162 fprintf(fp, " -v, --verbose "
163 "Verbose mode. Activate DBG() macro.\n");
164 fprintf(fp, " -V, --version "
165 "Show version number.\n");
166 fprintf(fp, " -g, --group NAME "
167 "Specify the tracing group name. (default: tracing)\n");
168 fprintf(fp, " -k, --kernel "
169 "Consumer kernel buffers (default).\n");
170 fprintf(fp, " -u, --ust "
171 "Consumer UST buffers.%s\n",
172 #ifdef HAVE_LIBLTTNG_UST_CTL
173 ""
174 #else
175 " (support not compiled in)"
176 #endif
177 );
178 }
179
180 /*
181 * daemon argument parsing
182 */
183 static int parse_args(int argc, char **argv)
184 {
185 int c, ret = 0;
186
187 static struct option long_options[] = {
188 { "consumerd-cmd-sock", 1, 0, 'c' },
189 { "consumerd-err-sock", 1, 0, 'e' },
190 { "daemonize", 0, 0, 'd' },
191 { "group", 1, 0, 'g' },
192 { "help", 0, 0, 'h' },
193 { "quiet", 0, 0, 'q' },
194 { "verbose", 0, 0, 'v' },
195 { "version", 0, 0, 'V' },
196 { "kernel", 0, 0, 'k' },
197 #ifdef HAVE_LIBLTTNG_UST_CTL
198 { "ust", 0, 0, 'u' },
199 #endif
200 { NULL, 0, 0, 0 }
201 };
202
203 while (1) {
204 int option_index = 0;
205 c = getopt_long(argc, argv, "dhqvVku" "c:e:g:",
206 long_options, &option_index);
207 if (c == -1) {
208 break;
209 }
210
211 switch (c) {
212 case 0:
213 fprintf(stderr, "option %s",
214 long_options[option_index].name);
215 if (optarg) {
216 fprintf(stderr, " with arg %s\n", optarg);
217 ret = -1;
218 goto end;
219 }
220 break;
221 case 'c':
222 if (lttng_is_setuid_setgid()) {
223 WARN("Getting '%s' argument from setuid/setgid binary refused for security reasons.",
224 "-c, --consumerd-cmd-sock");
225 } else {
226 snprintf(command_sock_path, PATH_MAX, "%s", optarg);
227 }
228 break;
229 case 'e':
230 if (lttng_is_setuid_setgid()) {
231 WARN("Getting '%s' argument from setuid/setgid binary refused for security reasons.",
232 "-e, --consumerd-err-sock");
233 } else {
234 snprintf(error_sock_path, PATH_MAX, "%s", optarg);
235 }
236 break;
237 case 'd':
238 opt_daemon = 1;
239 break;
240 case 'g':
241 if (lttng_is_setuid_setgid()) {
242 WARN("Getting '%s' argument from setuid/setgid binary refused for security reasons.",
243 "-g, --group");
244 } else {
245 tracing_group_name = optarg;
246 }
247 break;
248 case 'h':
249 usage(stdout);
250 exit(EXIT_SUCCESS);
251 case 'q':
252 lttng_opt_quiet = 1;
253 break;
254 case 'v':
255 lttng_opt_verbose = 1;
256 break;
257 case 'V':
258 fprintf(stdout, "%s\n", VERSION);
259 exit(EXIT_SUCCESS);
260 case 'k':
261 opt_type = LTTNG_CONSUMER_KERNEL;
262 break;
263 #ifdef HAVE_LIBLTTNG_UST_CTL
264 case 'u':
265 # if (CAA_BITS_PER_LONG == 64)
266 opt_type = LTTNG_CONSUMER64_UST;
267 # elif (CAA_BITS_PER_LONG == 32)
268 opt_type = LTTNG_CONSUMER32_UST;
269 # else
270 # error "Unknown bitness"
271 # endif
272 break;
273 #endif
274 default:
275 usage(stderr);
276 ret = -1;
277 goto end;
278 }
279 }
280 end:
281 return ret;
282 }
283
284 /*
285 * Set open files limit to unlimited. This daemon can open a large number of
286 * file descriptors in order to consumer multiple kernel traces.
287 */
288 static void set_ulimit(void)
289 {
290 int ret;
291 struct rlimit lim;
292
293 /* The kernel does not allowed an infinite limit for open files */
294 lim.rlim_cur = 65535;
295 lim.rlim_max = 65535;
296
297 ret = setrlimit(RLIMIT_NOFILE, &lim);
298 if (ret < 0) {
299 PERROR("failed to set open files limit");
300 }
301 }
302
303 /*
304 * main
305 */
306 int main(int argc, char **argv)
307 {
308 int ret = 0, retval = 0;
309 void *status;
310 struct lttng_consumer_local_data *tmp_ctx;
311
312 rcu_register_thread();
313
314 if (set_signal_handler()) {
315 retval = -1;
316 goto exit_set_signal_handler;
317 }
318
319 /* Parse arguments */
320 progname = argv[0];
321 if (parse_args(argc, argv)) {
322 retval = -1;
323 goto exit_options;
324 }
325
326 /* Daemonize */
327 if (opt_daemon) {
328 int i;
329
330 /*
331 * fork
332 * child: setsid, close FD 0, 1, 2, chdir /
333 * parent: exit (if fork is successful)
334 */
335 ret = daemon(0, 0);
336 if (ret < 0) {
337 PERROR("daemon");
338 retval = -1;
339 goto exit_options;
340 }
341 /*
342 * We are in the child. Make sure all other file
343 * descriptors are closed, in case we are called with
344 * more opened file descriptors than the standard ones.
345 */
346 for (i = 3; i < sysconf(_SC_OPEN_MAX); i++) {
347 (void) close(i);
348 }
349 }
350
351 /*
352 * Starting from here, we can create threads. This needs to be after
353 * lttng_daemonize due to RCU.
354 */
355
356 health_consumerd = health_app_create(NR_HEALTH_CONSUMERD_TYPES);
357 if (!health_consumerd) {
358 retval = -1;
359 goto exit_health_consumerd_cleanup;
360 }
361
362 if (*command_sock_path == '\0') {
363 switch (opt_type) {
364 case LTTNG_CONSUMER_KERNEL:
365 ret = snprintf(command_sock_path, PATH_MAX,
366 DEFAULT_KCONSUMERD_CMD_SOCK_PATH,
367 DEFAULT_LTTNG_RUNDIR);
368 if (ret < 0) {
369 retval = -1;
370 goto exit_init_data;
371 }
372 break;
373 case LTTNG_CONSUMER64_UST:
374 ret = snprintf(command_sock_path, PATH_MAX,
375 DEFAULT_USTCONSUMERD64_CMD_SOCK_PATH,
376 DEFAULT_LTTNG_RUNDIR);
377 if (ret < 0) {
378 retval = -1;
379 goto exit_init_data;
380 }
381 break;
382 case LTTNG_CONSUMER32_UST:
383 ret = snprintf(command_sock_path, PATH_MAX,
384 DEFAULT_USTCONSUMERD32_CMD_SOCK_PATH,
385 DEFAULT_LTTNG_RUNDIR);
386 if (ret < 0) {
387 retval = -1;
388 goto exit_init_data;
389 }
390 break;
391 default:
392 ERR("Unknown consumerd type");
393 retval = -1;
394 goto exit_init_data;
395 }
396 }
397
398 /* Init */
399 if (lttng_consumer_init()) {
400 retval = -1;
401 goto exit_init_data;
402 }
403
404 /* Initialize communication library */
405 lttcomm_init();
406 /* Initialize TCP timeout values */
407 lttcomm_inet_init();
408
409 if (!getuid()) {
410 /* Set limit for open files */
411 set_ulimit();
412 }
413
414 if (run_as_create_worker(argv[0]) < 0) {
415 goto exit_init_data;
416 }
417
418 /* create the consumer instance with and assign the callbacks */
419 ctx = lttng_consumer_create(opt_type, lttng_consumer_read_subbuffer,
420 NULL, lttng_consumer_on_recv_stream, NULL);
421 if (!ctx) {
422 retval = -1;
423 goto exit_init_data;
424 }
425
426 lttng_consumer_set_command_sock_path(ctx, command_sock_path);
427 if (*error_sock_path == '\0') {
428 switch (opt_type) {
429 case LTTNG_CONSUMER_KERNEL:
430 ret = snprintf(error_sock_path, PATH_MAX,
431 DEFAULT_KCONSUMERD_ERR_SOCK_PATH,
432 DEFAULT_LTTNG_RUNDIR);
433 if (ret < 0) {
434 retval = -1;
435 goto exit_init_data;
436 }
437 break;
438 case LTTNG_CONSUMER64_UST:
439 ret = snprintf(error_sock_path, PATH_MAX,
440 DEFAULT_USTCONSUMERD64_ERR_SOCK_PATH,
441 DEFAULT_LTTNG_RUNDIR);
442 if (ret < 0) {
443 retval = -1;
444 goto exit_init_data;
445 }
446 break;
447 case LTTNG_CONSUMER32_UST:
448 ret = snprintf(error_sock_path, PATH_MAX,
449 DEFAULT_USTCONSUMERD32_ERR_SOCK_PATH,
450 DEFAULT_LTTNG_RUNDIR);
451 if (ret < 0) {
452 retval = -1;
453 goto exit_init_data;
454 }
455 break;
456 default:
457 ERR("Unknown consumerd type");
458 retval = -1;
459 goto exit_init_data;
460 }
461 }
462
463 /* Connect to the socket created by lttng-sessiond to report errors */
464 DBG("Connecting to error socket %s", error_sock_path);
465 ret = lttcomm_connect_unix_sock(error_sock_path);
466 /*
467 * Not a fatal error, but all communication with lttng-sessiond will
468 * fail.
469 */
470 if (ret < 0) {
471 WARN("Cannot connect to error socket (is lttng-sessiond started?)");
472 }
473 lttng_consumer_set_error_sock(ctx, ret);
474
475 /*
476 * Block RT signals used for UST periodical metadata flush and the live
477 * timer in main, and create a dedicated thread to handle these signals.
478 */
479 if (consumer_signal_init()) {
480 retval = -1;
481 goto exit_init_data;
482 }
483
484 ctx->type = opt_type;
485
486 if (utils_create_pipe(health_quit_pipe)) {
487 retval = -1;
488 goto exit_health_pipe;
489 }
490
491 /* Create thread to manage the client socket */
492 ret = pthread_create(&health_thread, default_pthread_attr(),
493 thread_manage_health, (void *) NULL);
494 if (ret) {
495 errno = ret;
496 PERROR("pthread_create health");
497 retval = -1;
498 goto exit_health_thread;
499 }
500
501 /*
502 * Wait for health thread to be initialized before letting the
503 * sessiond thread reply to the sessiond that we are ready.
504 */
505 while (uatomic_read(&lttng_consumer_ready)) {
506 usleep(100000);
507 }
508 cmm_smp_mb(); /* Read ready before following operations */
509
510 /*
511 * Create the thread to manage the UST metadata periodic timer and
512 * live timer.
513 */
514 ret = pthread_create(&metadata_timer_thread, NULL,
515 consumer_timer_thread, (void *) ctx);
516 if (ret) {
517 errno = ret;
518 PERROR("pthread_create");
519 retval = -1;
520 goto exit_metadata_timer_thread;
521 }
522 metadata_timer_thread_online = true;
523
524 /* Create thread to manage channels */
525 ret = pthread_create(&channel_thread, default_pthread_attr(),
526 consumer_thread_channel_poll,
527 (void *) ctx);
528 if (ret) {
529 errno = ret;
530 PERROR("pthread_create");
531 retval = -1;
532 goto exit_channel_thread;
533 }
534
535 /* Create thread to manage the polling/writing of trace metadata */
536 ret = pthread_create(&metadata_thread, default_pthread_attr(),
537 consumer_thread_metadata_poll,
538 (void *) ctx);
539 if (ret) {
540 errno = ret;
541 PERROR("pthread_create");
542 retval = -1;
543 goto exit_metadata_thread;
544 }
545
546 /* Create thread to manage the polling/writing of trace data */
547 ret = pthread_create(&data_thread, default_pthread_attr(),
548 consumer_thread_data_poll, (void *) ctx);
549 if (ret) {
550 errno = ret;
551 PERROR("pthread_create");
552 retval = -1;
553 goto exit_data_thread;
554 }
555
556 /* Create the thread to manage the reception of fds */
557 ret = pthread_create(&sessiond_thread, default_pthread_attr(),
558 consumer_thread_sessiond_poll,
559 (void *) ctx);
560 if (ret) {
561 errno = ret;
562 PERROR("pthread_create");
563 retval = -1;
564 goto exit_sessiond_thread;
565 }
566
567
568 /*
569 * This is where we start awaiting program completion (e.g. through
570 * signal that asks threads to teardown.
571 */
572
573 ret = pthread_join(sessiond_thread, &status);
574 if (ret) {
575 errno = ret;
576 PERROR("pthread_join sessiond_thread");
577 retval = -1;
578 }
579 exit_sessiond_thread:
580
581 ret = pthread_join(data_thread, &status);
582 if (ret) {
583 errno = ret;
584 PERROR("pthread_join data_thread");
585 retval = -1;
586 }
587 exit_data_thread:
588
589 ret = pthread_join(metadata_thread, &status);
590 if (ret) {
591 errno = ret;
592 PERROR("pthread_join metadata_thread");
593 retval = -1;
594 }
595 exit_metadata_thread:
596
597 ret = pthread_join(channel_thread, &status);
598 if (ret) {
599 errno = ret;
600 PERROR("pthread_join channel_thread");
601 retval = -1;
602 }
603 exit_channel_thread:
604
605 exit_metadata_timer_thread:
606
607 ret = pthread_join(health_thread, &status);
608 if (ret) {
609 errno = ret;
610 PERROR("pthread_join health_thread");
611 retval = -1;
612 }
613 exit_health_thread:
614
615 utils_close_pipe(health_quit_pipe);
616 exit_health_pipe:
617
618 exit_init_data:
619 /*
620 * Wait for all pending call_rcu work to complete before tearing
621 * down data structures. call_rcu worker may be trying to
622 * perform lookups in those structures.
623 */
624 rcu_barrier();
625 lttng_consumer_cleanup();
626 /*
627 * Tearing down the metadata timer thread in a
628 * non-fully-symmetric fashion compared to its creation in case
629 * lttng_consumer_cleanup() ends up tearing down timers (which
630 * requires the timer thread to be alive).
631 */
632 if (metadata_timer_thread_online) {
633 /*
634 * Ensure the metadata timer thread exits only after all other
635 * threads are gone, because it is required to perform timer
636 * teardown synchronization.
637 */
638 kill(getpid(), LTTNG_CONSUMER_SIG_EXIT);
639 ret = pthread_join(metadata_timer_thread, &status);
640 if (ret) {
641 errno = ret;
642 PERROR("pthread_join metadata_timer_thread");
643 retval = -1;
644 }
645 ret = consumer_timer_thread_get_channel_monitor_pipe();
646 if (ret >= 0) {
647 ret = close(ret);
648 if (ret) {
649 PERROR("close channel monitor pipe");
650 }
651 }
652 metadata_timer_thread_online = false;
653 }
654 tmp_ctx = ctx;
655 ctx = NULL;
656 cmm_barrier(); /* Clear ctx for signal handler. */
657 lttng_consumer_destroy(tmp_ctx);
658
659 if (health_consumerd) {
660 health_app_destroy(health_consumerd);
661 }
662 /* Ensure all prior call_rcu are done. */
663 rcu_barrier();
664
665 run_as_destroy_worker();
666
667 exit_health_consumerd_cleanup:
668 exit_options:
669 exit_set_signal_handler:
670
671 rcu_unregister_thread();
672
673 if (!retval) {
674 exit(EXIT_SUCCESS);
675 } else {
676 exit(EXIT_FAILURE);
677 }
678 }
This page took 0.068811 seconds and 4 git commands to generate.