80eebf163a10af74b1edfdc1069ccd830050bdbb
[lttng-tools.git] / src / bin / lttng-sessiond / main.c
1 /*
2 * Copyright (C) 2011 - David Goulet <david.goulet@polymtl.ca>
3 * Mathieu Desnoyers <mathieu.desnoyers@efficios.com>
4 * 2013 - Jérémie Galarneau <jeremie.galarneau@efficios.com>
5 *
6 * This program is free software; you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License, version 2 only,
8 * as published by the Free Software Foundation.
9 *
10 * This program is distributed in the hope that it will be useful,
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 * GNU General Public License for more details.
14 *
15 * You should have received a copy of the GNU General Public License along
16 * with this program; if not, write to the Free Software Foundation, Inc.,
17 * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
18 */
19
20 #define _GNU_SOURCE
21 #define _LGPL_SOURCE
22 #include <getopt.h>
23 #include <grp.h>
24 #include <limits.h>
25 #include <paths.h>
26 #include <pthread.h>
27 #include <signal.h>
28 #include <stdio.h>
29 #include <stdlib.h>
30 #include <string.h>
31 #include <inttypes.h>
32 #include <sys/mman.h>
33 #include <sys/mount.h>
34 #include <sys/resource.h>
35 #include <sys/socket.h>
36 #include <sys/stat.h>
37 #include <sys/types.h>
38 #include <sys/wait.h>
39 #include <urcu/uatomic.h>
40 #include <unistd.h>
41 #include <config.h>
42
43 #include <common/common.h>
44 #include <common/compat/socket.h>
45 #include <common/defaults.h>
46 #include <common/kernel-consumer/kernel-consumer.h>
47 #include <common/futex.h>
48 #include <common/relayd/relayd.h>
49 #include <common/utils.h>
50 #include <common/daemonize.h>
51 #include <common/config/config.h>
52
53 #include "lttng-sessiond.h"
54 #include "buffer-registry.h"
55 #include "channel.h"
56 #include "cmd.h"
57 #include "consumer.h"
58 #include "context.h"
59 #include "event.h"
60 #include "kernel.h"
61 #include "kernel-consumer.h"
62 #include "modprobe.h"
63 #include "shm.h"
64 #include "ust-ctl.h"
65 #include "ust-consumer.h"
66 #include "utils.h"
67 #include "fd-limit.h"
68 #include "health-sessiond.h"
69 #include "testpoint.h"
70 #include "ust-thread.h"
71 #include "agent-thread.h"
72 #include "save.h"
73 #include "load-session-thread.h"
74 #include "syscall.h"
75
76 #define CONSUMERD_FILE "lttng-consumerd"
77
78 const char *progname;
79 static const char *tracing_group_name = DEFAULT_TRACING_GROUP;
80 static int tracing_group_name_override;
81 static char *opt_pidfile;
82 static int opt_sig_parent;
83 static int opt_verbose_consumer;
84 static int opt_daemon, opt_background;
85 static int opt_no_kernel;
86 static char *opt_load_session_path;
87 static pid_t ppid; /* Parent PID for --sig-parent option */
88 static pid_t child_ppid; /* Internal parent PID use with daemonize. */
89 static char *rundir;
90 static int lockfile_fd = -1;
91
92 /* Set to 1 when a SIGUSR1 signal is received. */
93 static int recv_child_signal;
94
95 /*
96 * Consumer daemon specific control data. Every value not initialized here is
97 * set to 0 by the static definition.
98 */
99 static struct consumer_data kconsumer_data = {
100 .type = LTTNG_CONSUMER_KERNEL,
101 .err_unix_sock_path = DEFAULT_KCONSUMERD_ERR_SOCK_PATH,
102 .cmd_unix_sock_path = DEFAULT_KCONSUMERD_CMD_SOCK_PATH,
103 .err_sock = -1,
104 .cmd_sock = -1,
105 .pid_mutex = PTHREAD_MUTEX_INITIALIZER,
106 .lock = PTHREAD_MUTEX_INITIALIZER,
107 .cond = PTHREAD_COND_INITIALIZER,
108 .cond_mutex = PTHREAD_MUTEX_INITIALIZER,
109 };
110 static struct consumer_data ustconsumer64_data = {
111 .type = LTTNG_CONSUMER64_UST,
112 .err_unix_sock_path = DEFAULT_USTCONSUMERD64_ERR_SOCK_PATH,
113 .cmd_unix_sock_path = DEFAULT_USTCONSUMERD64_CMD_SOCK_PATH,
114 .err_sock = -1,
115 .cmd_sock = -1,
116 .pid_mutex = PTHREAD_MUTEX_INITIALIZER,
117 .lock = PTHREAD_MUTEX_INITIALIZER,
118 .cond = PTHREAD_COND_INITIALIZER,
119 .cond_mutex = PTHREAD_MUTEX_INITIALIZER,
120 };
121 static struct consumer_data ustconsumer32_data = {
122 .type = LTTNG_CONSUMER32_UST,
123 .err_unix_sock_path = DEFAULT_USTCONSUMERD32_ERR_SOCK_PATH,
124 .cmd_unix_sock_path = DEFAULT_USTCONSUMERD32_CMD_SOCK_PATH,
125 .err_sock = -1,
126 .cmd_sock = -1,
127 .pid_mutex = PTHREAD_MUTEX_INITIALIZER,
128 .lock = PTHREAD_MUTEX_INITIALIZER,
129 .cond = PTHREAD_COND_INITIALIZER,
130 .cond_mutex = PTHREAD_MUTEX_INITIALIZER,
131 };
132
133 /* Command line options */
134 static const struct option long_options[] = {
135 { "client-sock", 1, 0, 'c' },
136 { "apps-sock", 1, 0, 'a' },
137 { "kconsumerd-cmd-sock", 1, 0, 'C' },
138 { "kconsumerd-err-sock", 1, 0, 'E' },
139 { "ustconsumerd32-cmd-sock", 1, 0, 'G' },
140 { "ustconsumerd32-err-sock", 1, 0, 'H' },
141 { "ustconsumerd64-cmd-sock", 1, 0, 'D' },
142 { "ustconsumerd64-err-sock", 1, 0, 'F' },
143 { "consumerd32-path", 1, 0, 'u' },
144 { "consumerd32-libdir", 1, 0, 'U' },
145 { "consumerd64-path", 1, 0, 't' },
146 { "consumerd64-libdir", 1, 0, 'T' },
147 { "daemonize", 0, 0, 'd' },
148 { "background", 0, 0, 'b' },
149 { "sig-parent", 0, 0, 'S' },
150 { "help", 0, 0, 'h' },
151 { "group", 1, 0, 'g' },
152 { "version", 0, 0, 'V' },
153 { "quiet", 0, 0, 'q' },
154 { "verbose", 0, 0, 'v' },
155 { "verbose-consumer", 0, 0, 'Z' },
156 { "no-kernel", 0, 0, 'N' },
157 { "pidfile", 1, 0, 'p' },
158 { "agent-tcp-port", 1, 0, 'J' },
159 { "config", 1, 0, 'f' },
160 { "load", 1, 0, 'l' },
161 { "kmod-probes", 1, 0, 'P' },
162 { "extra-kmod-probes", 1, 0, 'e' },
163 { NULL, 0, 0, 0 }
164 };
165
166 /* Command line options to ignore from configuration file */
167 static const char *config_ignore_options[] = { "help", "version", "config" };
168
169 /* Shared between threads */
170 static int dispatch_thread_exit;
171
172 /* Global application Unix socket path */
173 static char apps_unix_sock_path[PATH_MAX];
174 /* Global client Unix socket path */
175 static char client_unix_sock_path[PATH_MAX];
176 /* global wait shm path for UST */
177 static char wait_shm_path[PATH_MAX];
178 /* Global health check unix path */
179 static char health_unix_sock_path[PATH_MAX];
180
181 /* Sockets and FDs */
182 static int client_sock = -1;
183 static int apps_sock = -1;
184 int kernel_tracer_fd = -1;
185 static int kernel_poll_pipe[2] = { -1, -1 };
186
187 /*
188 * Quit pipe for all threads. This permits a single cancellation point
189 * for all threads when receiving an event on the pipe.
190 */
191 static int thread_quit_pipe[2] = { -1, -1 };
192 static int ht_cleanup_quit_pipe[2] = { -1, -1 };
193
194 /*
195 * This pipe is used to inform the thread managing application communication
196 * that a command is queued and ready to be processed.
197 */
198 static int apps_cmd_pipe[2] = { -1, -1 };
199
200 int apps_cmd_notify_pipe[2] = { -1, -1 };
201
202 /* Pthread, Mutexes and Semaphores */
203 static pthread_t apps_thread;
204 static pthread_t apps_notify_thread;
205 static pthread_t reg_apps_thread;
206 static pthread_t client_thread;
207 static pthread_t kernel_thread;
208 static pthread_t dispatch_thread;
209 static pthread_t health_thread;
210 static pthread_t ht_cleanup_thread;
211 static pthread_t agent_reg_thread;
212 static pthread_t load_session_thread;
213
214 /*
215 * UST registration command queue. This queue is tied with a futex and uses a N
216 * wakers / 1 waiter implemented and detailed in futex.c/.h
217 *
218 * The thread_registration_apps and thread_dispatch_ust_registration uses this
219 * queue along with the wait/wake scheme. The thread_manage_apps receives down
220 * the line new application socket and monitors it for any I/O error or clean
221 * close that triggers an unregistration of the application.
222 */
223 static struct ust_cmd_queue ust_cmd_queue;
224
225 /*
226 * Pointer initialized before thread creation.
227 *
228 * This points to the tracing session list containing the session count and a
229 * mutex lock. The lock MUST be taken if you iterate over the list. The lock
230 * MUST NOT be taken if you call a public function in session.c.
231 *
232 * The lock is nested inside the structure: session_list_ptr->lock. Please use
233 * session_lock_list and session_unlock_list for lock acquisition.
234 */
235 static struct ltt_session_list *session_list_ptr;
236
237 int ust_consumerd64_fd = -1;
238 int ust_consumerd32_fd = -1;
239
240 static const char *consumerd32_bin = CONFIG_CONSUMERD32_BIN;
241 static const char *consumerd64_bin = CONFIG_CONSUMERD64_BIN;
242 static const char *consumerd32_libdir = CONFIG_CONSUMERD32_LIBDIR;
243 static const char *consumerd64_libdir = CONFIG_CONSUMERD64_LIBDIR;
244 static int consumerd32_bin_override;
245 static int consumerd64_bin_override;
246 static int consumerd32_libdir_override;
247 static int consumerd64_libdir_override;
248
249 static const char *module_proc_lttng = "/proc/lttng";
250
251 /*
252 * Consumer daemon state which is changed when spawning it, killing it or in
253 * case of a fatal error.
254 */
255 enum consumerd_state {
256 CONSUMER_STARTED = 1,
257 CONSUMER_STOPPED = 2,
258 CONSUMER_ERROR = 3,
259 };
260
261 /*
262 * This consumer daemon state is used to validate if a client command will be
263 * able to reach the consumer. If not, the client is informed. For instance,
264 * doing a "lttng start" when the consumer state is set to ERROR will return an
265 * error to the client.
266 *
267 * The following example shows a possible race condition of this scheme:
268 *
269 * consumer thread error happens
270 * client cmd arrives
271 * client cmd checks state -> still OK
272 * consumer thread exit, sets error
273 * client cmd try to talk to consumer
274 * ...
275 *
276 * However, since the consumer is a different daemon, we have no way of making
277 * sure the command will reach it safely even with this state flag. This is why
278 * we consider that up to the state validation during command processing, the
279 * command is safe. After that, we can not guarantee the correctness of the
280 * client request vis-a-vis the consumer.
281 */
282 static enum consumerd_state ust_consumerd_state;
283 static enum consumerd_state kernel_consumerd_state;
284
285 /*
286 * Socket timeout for receiving and sending in seconds.
287 */
288 static int app_socket_timeout;
289
290 /* Set in main() with the current page size. */
291 long page_size;
292
293 /* Application health monitoring */
294 struct health_app *health_sessiond;
295
296 /* Agent TCP port for registration. Used by the agent thread. */
297 unsigned int agent_tcp_port = DEFAULT_AGENT_TCP_PORT;
298
299 /* Am I root or not. */
300 int is_root; /* Set to 1 if the daemon is running as root */
301
302 const char * const config_section_name = "sessiond";
303
304 /* Load session thread information to operate. */
305 struct load_session_thread_data *load_info;
306
307 /*
308 * Whether sessiond is ready for commands/health check requests.
309 * NR_LTTNG_SESSIOND_READY must match the number of calls to
310 * sessiond_notify_ready().
311 */
312 #define NR_LTTNG_SESSIOND_READY 3
313 int lttng_sessiond_ready = NR_LTTNG_SESSIOND_READY;
314
315 /* Notify parents that we are ready for cmd and health check */
316 LTTNG_HIDDEN
317 void sessiond_notify_ready(void)
318 {
319 if (uatomic_sub_return(&lttng_sessiond_ready, 1) == 0) {
320 /*
321 * Notify parent pid that we are ready to accept command
322 * for client side. This ppid is the one from the
323 * external process that spawned us.
324 */
325 if (opt_sig_parent) {
326 kill(ppid, SIGUSR1);
327 }
328
329 /*
330 * Notify the parent of the fork() process that we are
331 * ready.
332 */
333 if (opt_daemon || opt_background) {
334 kill(child_ppid, SIGUSR1);
335 }
336 }
337 }
338
339 static
340 void setup_consumerd_path(void)
341 {
342 const char *bin, *libdir;
343
344 /*
345 * Allow INSTALL_BIN_PATH to be used as a target path for the
346 * native architecture size consumer if CONFIG_CONSUMER*_PATH
347 * has not been defined.
348 */
349 #if (CAA_BITS_PER_LONG == 32)
350 if (!consumerd32_bin[0]) {
351 consumerd32_bin = INSTALL_BIN_PATH "/" CONSUMERD_FILE;
352 }
353 if (!consumerd32_libdir[0]) {
354 consumerd32_libdir = INSTALL_LIB_PATH;
355 }
356 #elif (CAA_BITS_PER_LONG == 64)
357 if (!consumerd64_bin[0]) {
358 consumerd64_bin = INSTALL_BIN_PATH "/" CONSUMERD_FILE;
359 }
360 if (!consumerd64_libdir[0]) {
361 consumerd64_libdir = INSTALL_LIB_PATH;
362 }
363 #else
364 #error "Unknown bitness"
365 #endif
366
367 /*
368 * runtime env. var. overrides the build default.
369 */
370 bin = getenv("LTTNG_CONSUMERD32_BIN");
371 if (bin) {
372 consumerd32_bin = bin;
373 }
374 bin = getenv("LTTNG_CONSUMERD64_BIN");
375 if (bin) {
376 consumerd64_bin = bin;
377 }
378 libdir = getenv("LTTNG_CONSUMERD32_LIBDIR");
379 if (libdir) {
380 consumerd32_libdir = libdir;
381 }
382 libdir = getenv("LTTNG_CONSUMERD64_LIBDIR");
383 if (libdir) {
384 consumerd64_libdir = libdir;
385 }
386 }
387
388 static
389 int __sessiond_set_thread_pollset(struct lttng_poll_event *events, size_t size,
390 int *a_pipe)
391 {
392 int ret;
393
394 assert(events);
395
396 ret = lttng_poll_create(events, size, LTTNG_CLOEXEC);
397 if (ret < 0) {
398 goto error;
399 }
400
401 /* Add quit pipe */
402 ret = lttng_poll_add(events, a_pipe[0], LPOLLIN | LPOLLERR);
403 if (ret < 0) {
404 goto error;
405 }
406
407 return 0;
408
409 error:
410 return ret;
411 }
412
413 /*
414 * Create a poll set with O_CLOEXEC and add the thread quit pipe to the set.
415 */
416 int sessiond_set_thread_pollset(struct lttng_poll_event *events, size_t size)
417 {
418 return __sessiond_set_thread_pollset(events, size, thread_quit_pipe);
419 }
420
421 /*
422 * Create a poll set with O_CLOEXEC and add the thread quit pipe to the set.
423 */
424 int sessiond_set_ht_cleanup_thread_pollset(struct lttng_poll_event *events,
425 size_t size)
426 {
427 return __sessiond_set_thread_pollset(events, size,
428 ht_cleanup_quit_pipe);
429 }
430
431 static
432 int __sessiond_check_thread_quit_pipe(int fd, uint32_t events, int a_pipe)
433 {
434 if (fd == a_pipe && (events & LPOLLIN)) {
435 return 1;
436 }
437 return 0;
438 }
439
440 /*
441 * Check if the thread quit pipe was triggered.
442 *
443 * Return 1 if it was triggered else 0;
444 */
445 int sessiond_check_thread_quit_pipe(int fd, uint32_t events)
446 {
447 return __sessiond_check_thread_quit_pipe(fd, events,
448 thread_quit_pipe[0]);
449 }
450
451 /*
452 * Check if the ht_cleanup thread quit pipe was triggered.
453 *
454 * Return 1 if it was triggered else 0;
455 */
456 int sessiond_check_ht_cleanup_quit(int fd, uint32_t events)
457 {
458 return __sessiond_check_thread_quit_pipe(fd, events,
459 ht_cleanup_quit_pipe[0]);
460 }
461
462 /*
463 * Init thread quit pipe.
464 *
465 * Return -1 on error or 0 if all pipes are created.
466 */
467 static int __init_thread_quit_pipe(int *a_pipe)
468 {
469 int ret, i;
470
471 ret = pipe(a_pipe);
472 if (ret < 0) {
473 PERROR("thread quit pipe");
474 goto error;
475 }
476
477 for (i = 0; i < 2; i++) {
478 ret = fcntl(a_pipe[i], F_SETFD, FD_CLOEXEC);
479 if (ret < 0) {
480 PERROR("fcntl");
481 goto error;
482 }
483 }
484
485 error:
486 return ret;
487 }
488
489 static int init_thread_quit_pipe(void)
490 {
491 return __init_thread_quit_pipe(thread_quit_pipe);
492 }
493
494 static int init_ht_cleanup_quit_pipe(void)
495 {
496 return __init_thread_quit_pipe(ht_cleanup_quit_pipe);
497 }
498
499 /*
500 * Stop all threads by closing the thread quit pipe.
501 */
502 static void stop_threads(void)
503 {
504 int ret;
505
506 /* Stopping all threads */
507 DBG("Terminating all threads");
508 ret = notify_thread_pipe(thread_quit_pipe[1]);
509 if (ret < 0) {
510 ERR("write error on thread quit pipe");
511 }
512
513 /* Dispatch thread */
514 CMM_STORE_SHARED(dispatch_thread_exit, 1);
515 futex_nto1_wake(&ust_cmd_queue.futex);
516 }
517
518 /*
519 * Close every consumer sockets.
520 */
521 static void close_consumer_sockets(void)
522 {
523 int ret;
524
525 if (kconsumer_data.err_sock >= 0) {
526 ret = close(kconsumer_data.err_sock);
527 if (ret < 0) {
528 PERROR("kernel consumer err_sock close");
529 }
530 }
531 if (ustconsumer32_data.err_sock >= 0) {
532 ret = close(ustconsumer32_data.err_sock);
533 if (ret < 0) {
534 PERROR("UST consumerd32 err_sock close");
535 }
536 }
537 if (ustconsumer64_data.err_sock >= 0) {
538 ret = close(ustconsumer64_data.err_sock);
539 if (ret < 0) {
540 PERROR("UST consumerd64 err_sock close");
541 }
542 }
543 if (kconsumer_data.cmd_sock >= 0) {
544 ret = close(kconsumer_data.cmd_sock);
545 if (ret < 0) {
546 PERROR("kernel consumer cmd_sock close");
547 }
548 }
549 if (ustconsumer32_data.cmd_sock >= 0) {
550 ret = close(ustconsumer32_data.cmd_sock);
551 if (ret < 0) {
552 PERROR("UST consumerd32 cmd_sock close");
553 }
554 }
555 if (ustconsumer64_data.cmd_sock >= 0) {
556 ret = close(ustconsumer64_data.cmd_sock);
557 if (ret < 0) {
558 PERROR("UST consumerd64 cmd_sock close");
559 }
560 }
561 }
562
563 /*
564 * Generate the full lock file path using the rundir.
565 *
566 * Return the snprintf() return value thus a negative value is an error.
567 */
568 static int generate_lock_file_path(char *path, size_t len)
569 {
570 int ret;
571
572 assert(path);
573 assert(rundir);
574
575 /* Build lockfile path from rundir. */
576 ret = snprintf(path, len, "%s/" DEFAULT_LTTNG_SESSIOND_LOCKFILE, rundir);
577 if (ret < 0) {
578 PERROR("snprintf lockfile path");
579 }
580
581 return ret;
582 }
583
584 /*
585 * Cleanup the session daemon's data structures.
586 */
587 static void sessiond_cleanup(void)
588 {
589 int ret;
590 struct ltt_session *sess, *stmp;
591 char path[PATH_MAX];
592
593 DBG("Cleanup sessiond");
594
595 /*
596 * Close the thread quit pipe. It has already done its job,
597 * since we are now called.
598 */
599 utils_close_pipe(thread_quit_pipe);
600
601 /*
602 * If opt_pidfile is undefined, the default file will be wiped when
603 * removing the rundir.
604 */
605 if (opt_pidfile) {
606 ret = remove(opt_pidfile);
607 if (ret < 0) {
608 PERROR("remove pidfile %s", opt_pidfile);
609 }
610 }
611
612 DBG("Removing sessiond and consumerd content of directory %s", rundir);
613
614 /* sessiond */
615 snprintf(path, PATH_MAX,
616 "%s/%s",
617 rundir, DEFAULT_LTTNG_SESSIOND_PIDFILE);
618 DBG("Removing %s", path);
619 (void) unlink(path);
620
621 snprintf(path, PATH_MAX, "%s/%s", rundir,
622 DEFAULT_LTTNG_SESSIOND_AGENTPORT_FILE);
623 DBG("Removing %s", path);
624 (void) unlink(path);
625
626 /* kconsumerd */
627 snprintf(path, PATH_MAX,
628 DEFAULT_KCONSUMERD_ERR_SOCK_PATH,
629 rundir);
630 DBG("Removing %s", path);
631 (void) unlink(path);
632
633 snprintf(path, PATH_MAX,
634 DEFAULT_KCONSUMERD_PATH,
635 rundir);
636 DBG("Removing directory %s", path);
637 (void) rmdir(path);
638
639 /* ust consumerd 32 */
640 snprintf(path, PATH_MAX,
641 DEFAULT_USTCONSUMERD32_ERR_SOCK_PATH,
642 rundir);
643 DBG("Removing %s", path);
644 (void) unlink(path);
645
646 snprintf(path, PATH_MAX,
647 DEFAULT_USTCONSUMERD32_PATH,
648 rundir);
649 DBG("Removing directory %s", path);
650 (void) rmdir(path);
651
652 /* ust consumerd 64 */
653 snprintf(path, PATH_MAX,
654 DEFAULT_USTCONSUMERD64_ERR_SOCK_PATH,
655 rundir);
656 DBG("Removing %s", path);
657 (void) unlink(path);
658
659 snprintf(path, PATH_MAX,
660 DEFAULT_USTCONSUMERD64_PATH,
661 rundir);
662 DBG("Removing directory %s", path);
663 (void) rmdir(path);
664
665 DBG("Cleaning up all sessions");
666
667 /* Destroy session list mutex */
668 if (session_list_ptr != NULL) {
669 pthread_mutex_destroy(&session_list_ptr->lock);
670
671 /* Cleanup ALL session */
672 cds_list_for_each_entry_safe(sess, stmp,
673 &session_list_ptr->head, list) {
674 cmd_destroy_session(sess, kernel_poll_pipe[1]);
675 }
676 }
677
678 DBG("Closing all UST sockets");
679 ust_app_clean_list();
680 buffer_reg_destroy_registries();
681
682 if (is_root && !opt_no_kernel) {
683 DBG2("Closing kernel fd");
684 if (kernel_tracer_fd >= 0) {
685 ret = close(kernel_tracer_fd);
686 if (ret) {
687 PERROR("close");
688 }
689 }
690 DBG("Unloading kernel modules");
691 modprobe_remove_lttng_all();
692 free(syscall_table);
693 }
694
695 close_consumer_sockets();
696
697 if (load_info) {
698 load_session_destroy_data(load_info);
699 free(load_info);
700 }
701
702 /*
703 * Cleanup lock file by deleting it and finaly closing it which will
704 * release the file system lock.
705 */
706 if (lockfile_fd >= 0) {
707 char lockfile_path[PATH_MAX];
708
709 ret = generate_lock_file_path(lockfile_path,
710 sizeof(lockfile_path));
711 if (ret > 0) {
712 ret = remove(lockfile_path);
713 if (ret < 0) {
714 PERROR("remove lock file");
715 }
716 ret = close(lockfile_fd);
717 if (ret < 0) {
718 PERROR("close lock file");
719 }
720 }
721 }
722
723 /*
724 * We do NOT rmdir rundir because there are other processes
725 * using it, for instance lttng-relayd, which can start in
726 * parallel with this teardown.
727 */
728
729 free(rundir);
730 }
731
732 /*
733 * Cleanup the daemon's option data structures.
734 */
735 static void sessiond_cleanup_options(void)
736 {
737 DBG("Cleaning up options");
738
739 /*
740 * If the override option is set, the pointer points to a *non* const
741 * thus freeing it even though the variable type is set to const.
742 */
743 if (tracing_group_name_override) {
744 free((void *) tracing_group_name);
745 }
746 if (consumerd32_bin_override) {
747 free((void *) consumerd32_bin);
748 }
749 if (consumerd64_bin_override) {
750 free((void *) consumerd64_bin);
751 }
752 if (consumerd32_libdir_override) {
753 free((void *) consumerd32_libdir);
754 }
755 if (consumerd64_libdir_override) {
756 free((void *) consumerd64_libdir);
757 }
758
759 free(opt_pidfile);
760 free(opt_load_session_path);
761 free(kmod_probes_list);
762 free(kmod_extra_probes_list);
763
764 /* <fun> */
765 DBG("%c[%d;%dm*** assert failed :-) *** ==> %c[%dm%c[%d;%dm"
766 "Matthew, BEET driven development works!%c[%dm",
767 27, 1, 31, 27, 0, 27, 1, 33, 27, 0);
768 /* </fun> */
769 }
770
771 /*
772 * Send data on a unix socket using the liblttsessiondcomm API.
773 *
774 * Return lttcomm error code.
775 */
776 static int send_unix_sock(int sock, void *buf, size_t len)
777 {
778 /* Check valid length */
779 if (len == 0) {
780 return -1;
781 }
782
783 return lttcomm_send_unix_sock(sock, buf, len);
784 }
785
786 /*
787 * Free memory of a command context structure.
788 */
789 static void clean_command_ctx(struct command_ctx **cmd_ctx)
790 {
791 DBG("Clean command context structure");
792 if (*cmd_ctx) {
793 if ((*cmd_ctx)->llm) {
794 free((*cmd_ctx)->llm);
795 }
796 if ((*cmd_ctx)->lsm) {
797 free((*cmd_ctx)->lsm);
798 }
799 free(*cmd_ctx);
800 *cmd_ctx = NULL;
801 }
802 }
803
804 /*
805 * Notify UST applications using the shm mmap futex.
806 */
807 static int notify_ust_apps(int active)
808 {
809 char *wait_shm_mmap;
810
811 DBG("Notifying applications of session daemon state: %d", active);
812
813 /* See shm.c for this call implying mmap, shm and futex calls */
814 wait_shm_mmap = shm_ust_get_mmap(wait_shm_path, is_root);
815 if (wait_shm_mmap == NULL) {
816 goto error;
817 }
818
819 /* Wake waiting process */
820 futex_wait_update((int32_t *) wait_shm_mmap, active);
821
822 /* Apps notified successfully */
823 return 0;
824
825 error:
826 return -1;
827 }
828
829 /*
830 * Setup the outgoing data buffer for the response (llm) by allocating the
831 * right amount of memory and copying the original information from the lsm
832 * structure.
833 *
834 * Return total size of the buffer pointed by buf.
835 */
836 static int setup_lttng_msg(struct command_ctx *cmd_ctx, size_t size)
837 {
838 int ret, buf_size;
839
840 buf_size = size;
841
842 cmd_ctx->llm = zmalloc(sizeof(struct lttcomm_lttng_msg) + buf_size);
843 if (cmd_ctx->llm == NULL) {
844 PERROR("zmalloc");
845 ret = -ENOMEM;
846 goto error;
847 }
848
849 /* Copy common data */
850 cmd_ctx->llm->cmd_type = cmd_ctx->lsm->cmd_type;
851 cmd_ctx->llm->pid = cmd_ctx->lsm->domain.attr.pid;
852
853 cmd_ctx->llm->data_size = size;
854 cmd_ctx->lttng_msg_size = sizeof(struct lttcomm_lttng_msg) + buf_size;
855
856 return buf_size;
857
858 error:
859 return ret;
860 }
861
862 /*
863 * Update the kernel poll set of all channel fd available over all tracing
864 * session. Add the wakeup pipe at the end of the set.
865 */
866 static int update_kernel_poll(struct lttng_poll_event *events)
867 {
868 int ret;
869 struct ltt_session *session;
870 struct ltt_kernel_channel *channel;
871
872 DBG("Updating kernel poll set");
873
874 session_lock_list();
875 cds_list_for_each_entry(session, &session_list_ptr->head, list) {
876 session_lock(session);
877 if (session->kernel_session == NULL) {
878 session_unlock(session);
879 continue;
880 }
881
882 cds_list_for_each_entry(channel,
883 &session->kernel_session->channel_list.head, list) {
884 /* Add channel fd to the kernel poll set */
885 ret = lttng_poll_add(events, channel->fd, LPOLLIN | LPOLLRDNORM);
886 if (ret < 0) {
887 session_unlock(session);
888 goto error;
889 }
890 DBG("Channel fd %d added to kernel set", channel->fd);
891 }
892 session_unlock(session);
893 }
894 session_unlock_list();
895
896 return 0;
897
898 error:
899 session_unlock_list();
900 return -1;
901 }
902
903 /*
904 * Find the channel fd from 'fd' over all tracing session. When found, check
905 * for new channel stream and send those stream fds to the kernel consumer.
906 *
907 * Useful for CPU hotplug feature.
908 */
909 static int update_kernel_stream(struct consumer_data *consumer_data, int fd)
910 {
911 int ret = 0;
912 struct ltt_session *session;
913 struct ltt_kernel_session *ksess;
914 struct ltt_kernel_channel *channel;
915
916 DBG("Updating kernel streams for channel fd %d", fd);
917
918 session_lock_list();
919 cds_list_for_each_entry(session, &session_list_ptr->head, list) {
920 session_lock(session);
921 if (session->kernel_session == NULL) {
922 session_unlock(session);
923 continue;
924 }
925 ksess = session->kernel_session;
926
927 cds_list_for_each_entry(channel,
928 &ksess->channel_list.head, list) {
929 struct lttng_ht_iter iter;
930 struct consumer_socket *socket;
931
932 if (channel->fd != fd) {
933 continue;
934 }
935 DBG("Channel found, updating kernel streams");
936 ret = kernel_open_channel_stream(channel);
937 if (ret < 0) {
938 goto error;
939 }
940 /* Update the stream global counter */
941 ksess->stream_count_global += ret;
942
943 /*
944 * Have we already sent fds to the consumer? If yes, it
945 * means that tracing is started so it is safe to send
946 * our updated stream fds.
947 */
948 if (ksess->consumer_fds_sent != 1
949 || ksess->consumer == NULL) {
950 ret = -1;
951 goto error;
952 }
953
954 rcu_read_lock();
955 cds_lfht_for_each_entry(ksess->consumer->socks->ht,
956 &iter.iter, socket, node.node) {
957 pthread_mutex_lock(socket->lock);
958 ret = kernel_consumer_send_channel_stream(socket,
959 channel, ksess,
960 session->output_traces ? 1 : 0);
961 pthread_mutex_unlock(socket->lock);
962 if (ret < 0) {
963 rcu_read_unlock();
964 goto error;
965 }
966 }
967 rcu_read_unlock();
968 }
969 session_unlock(session);
970 }
971 session_unlock_list();
972 return ret;
973
974 error:
975 session_unlock(session);
976 session_unlock_list();
977 return ret;
978 }
979
980 /*
981 * For each tracing session, update newly registered apps. The session list
982 * lock MUST be acquired before calling this.
983 */
984 static void update_ust_app(int app_sock)
985 {
986 struct ltt_session *sess, *stmp;
987
988 /* Consumer is in an ERROR state. Stop any application update. */
989 if (uatomic_read(&ust_consumerd_state) == CONSUMER_ERROR) {
990 /* Stop the update process since the consumer is dead. */
991 return;
992 }
993
994 /* For all tracing session(s) */
995 cds_list_for_each_entry_safe(sess, stmp, &session_list_ptr->head, list) {
996 session_lock(sess);
997 if (sess->ust_session) {
998 ust_app_global_update(sess->ust_session, app_sock);
999 }
1000 session_unlock(sess);
1001 }
1002 }
1003
1004 /*
1005 * This thread manage event coming from the kernel.
1006 *
1007 * Features supported in this thread:
1008 * -) CPU Hotplug
1009 */
1010 static void *thread_manage_kernel(void *data)
1011 {
1012 int ret, i, pollfd, update_poll_flag = 1, err = -1;
1013 uint32_t revents, nb_fd;
1014 char tmp;
1015 struct lttng_poll_event events;
1016
1017 DBG("[thread] Thread manage kernel started");
1018
1019 health_register(health_sessiond, HEALTH_SESSIOND_TYPE_KERNEL);
1020
1021 /*
1022 * This first step of the while is to clean this structure which could free
1023 * non NULL pointers so initialize it before the loop.
1024 */
1025 lttng_poll_init(&events);
1026
1027 if (testpoint(sessiond_thread_manage_kernel)) {
1028 goto error_testpoint;
1029 }
1030
1031 health_code_update();
1032
1033 if (testpoint(sessiond_thread_manage_kernel_before_loop)) {
1034 goto error_testpoint;
1035 }
1036
1037 while (1) {
1038 health_code_update();
1039
1040 if (update_poll_flag == 1) {
1041 /* Clean events object. We are about to populate it again. */
1042 lttng_poll_clean(&events);
1043
1044 ret = sessiond_set_thread_pollset(&events, 2);
1045 if (ret < 0) {
1046 goto error_poll_create;
1047 }
1048
1049 ret = lttng_poll_add(&events, kernel_poll_pipe[0], LPOLLIN);
1050 if (ret < 0) {
1051 goto error;
1052 }
1053
1054 /* This will add the available kernel channel if any. */
1055 ret = update_kernel_poll(&events);
1056 if (ret < 0) {
1057 goto error;
1058 }
1059 update_poll_flag = 0;
1060 }
1061
1062 DBG("Thread kernel polling on %d fds", LTTNG_POLL_GETNB(&events));
1063
1064 /* Poll infinite value of time */
1065 restart:
1066 health_poll_entry();
1067 ret = lttng_poll_wait(&events, -1);
1068 health_poll_exit();
1069 if (ret < 0) {
1070 /*
1071 * Restart interrupted system call.
1072 */
1073 if (errno == EINTR) {
1074 goto restart;
1075 }
1076 goto error;
1077 } else if (ret == 0) {
1078 /* Should not happen since timeout is infinite */
1079 ERR("Return value of poll is 0 with an infinite timeout.\n"
1080 "This should not have happened! Continuing...");
1081 continue;
1082 }
1083
1084 nb_fd = ret;
1085
1086 for (i = 0; i < nb_fd; i++) {
1087 /* Fetch once the poll data */
1088 revents = LTTNG_POLL_GETEV(&events, i);
1089 pollfd = LTTNG_POLL_GETFD(&events, i);
1090
1091 health_code_update();
1092
1093 if (!revents) {
1094 /* No activity for this FD (poll implementation). */
1095 continue;
1096 }
1097
1098 /* Thread quit pipe has been closed. Killing thread. */
1099 ret = sessiond_check_thread_quit_pipe(pollfd, revents);
1100 if (ret) {
1101 err = 0;
1102 goto exit;
1103 }
1104
1105 /* Check for data on kernel pipe */
1106 if (pollfd == kernel_poll_pipe[0] && (revents & LPOLLIN)) {
1107 (void) lttng_read(kernel_poll_pipe[0],
1108 &tmp, 1);
1109 /*
1110 * Ret value is useless here, if this pipe gets any actions an
1111 * update is required anyway.
1112 */
1113 update_poll_flag = 1;
1114 continue;
1115 } else {
1116 /*
1117 * New CPU detected by the kernel. Adding kernel stream to
1118 * kernel session and updating the kernel consumer
1119 */
1120 if (revents & LPOLLIN) {
1121 ret = update_kernel_stream(&kconsumer_data, pollfd);
1122 if (ret < 0) {
1123 continue;
1124 }
1125 break;
1126 /*
1127 * TODO: We might want to handle the LPOLLERR | LPOLLHUP
1128 * and unregister kernel stream at this point.
1129 */
1130 }
1131 }
1132 }
1133 }
1134
1135 exit:
1136 error:
1137 lttng_poll_clean(&events);
1138 error_poll_create:
1139 error_testpoint:
1140 utils_close_pipe(kernel_poll_pipe);
1141 kernel_poll_pipe[0] = kernel_poll_pipe[1] = -1;
1142 if (err) {
1143 health_error();
1144 ERR("Health error occurred in %s", __func__);
1145 WARN("Kernel thread died unexpectedly. "
1146 "Kernel tracing can continue but CPU hotplug is disabled.");
1147 }
1148 health_unregister(health_sessiond);
1149 DBG("Kernel thread dying");
1150 return NULL;
1151 }
1152
1153 /*
1154 * Signal pthread condition of the consumer data that the thread.
1155 */
1156 static void signal_consumer_condition(struct consumer_data *data, int state)
1157 {
1158 pthread_mutex_lock(&data->cond_mutex);
1159
1160 /*
1161 * The state is set before signaling. It can be any value, it's the waiter
1162 * job to correctly interpret this condition variable associated to the
1163 * consumer pthread_cond.
1164 *
1165 * A value of 0 means that the corresponding thread of the consumer data
1166 * was not started. 1 indicates that the thread has started and is ready
1167 * for action. A negative value means that there was an error during the
1168 * thread bootstrap.
1169 */
1170 data->consumer_thread_is_ready = state;
1171 (void) pthread_cond_signal(&data->cond);
1172
1173 pthread_mutex_unlock(&data->cond_mutex);
1174 }
1175
1176 /*
1177 * This thread manage the consumer error sent back to the session daemon.
1178 */
1179 static void *thread_manage_consumer(void *data)
1180 {
1181 int sock = -1, i, ret, pollfd, err = -1, should_quit = 0;
1182 uint32_t revents, nb_fd;
1183 enum lttcomm_return_code code;
1184 struct lttng_poll_event events;
1185 struct consumer_data *consumer_data = data;
1186
1187 DBG("[thread] Manage consumer started");
1188
1189 health_register(health_sessiond, HEALTH_SESSIOND_TYPE_CONSUMER);
1190
1191 health_code_update();
1192
1193 /*
1194 * Pass 3 as size here for the thread quit pipe, consumerd_err_sock and the
1195 * metadata_sock. Nothing more will be added to this poll set.
1196 */
1197 ret = sessiond_set_thread_pollset(&events, 3);
1198 if (ret < 0) {
1199 goto error_poll;
1200 }
1201
1202 /*
1203 * The error socket here is already in a listening state which was done
1204 * just before spawning this thread to avoid a race between the consumer
1205 * daemon exec trying to connect and the listen() call.
1206 */
1207 ret = lttng_poll_add(&events, consumer_data->err_sock, LPOLLIN | LPOLLRDHUP);
1208 if (ret < 0) {
1209 goto error;
1210 }
1211
1212 health_code_update();
1213
1214 /* Infinite blocking call, waiting for transmission */
1215 restart:
1216 health_poll_entry();
1217
1218 if (testpoint(sessiond_thread_manage_consumer)) {
1219 goto error;
1220 }
1221
1222 ret = lttng_poll_wait(&events, -1);
1223 health_poll_exit();
1224 if (ret < 0) {
1225 /*
1226 * Restart interrupted system call.
1227 */
1228 if (errno == EINTR) {
1229 goto restart;
1230 }
1231 goto error;
1232 }
1233
1234 nb_fd = ret;
1235
1236 for (i = 0; i < nb_fd; i++) {
1237 /* Fetch once the poll data */
1238 revents = LTTNG_POLL_GETEV(&events, i);
1239 pollfd = LTTNG_POLL_GETFD(&events, i);
1240
1241 health_code_update();
1242
1243 if (!revents) {
1244 /* No activity for this FD (poll implementation). */
1245 continue;
1246 }
1247
1248 /* Thread quit pipe has been closed. Killing thread. */
1249 ret = sessiond_check_thread_quit_pipe(pollfd, revents);
1250 if (ret) {
1251 err = 0;
1252 goto exit;
1253 }
1254
1255 /* Event on the registration socket */
1256 if (pollfd == consumer_data->err_sock) {
1257 if (revents & (LPOLLERR | LPOLLHUP | LPOLLRDHUP)) {
1258 ERR("consumer err socket poll error");
1259 goto error;
1260 }
1261 }
1262 }
1263
1264 sock = lttcomm_accept_unix_sock(consumer_data->err_sock);
1265 if (sock < 0) {
1266 goto error;
1267 }
1268
1269 /*
1270 * Set the CLOEXEC flag. Return code is useless because either way, the
1271 * show must go on.
1272 */
1273 (void) utils_set_fd_cloexec(sock);
1274
1275 health_code_update();
1276
1277 DBG2("Receiving code from consumer err_sock");
1278
1279 /* Getting status code from kconsumerd */
1280 ret = lttcomm_recv_unix_sock(sock, &code,
1281 sizeof(enum lttcomm_return_code));
1282 if (ret <= 0) {
1283 goto error;
1284 }
1285
1286 health_code_update();
1287 if (code == LTTCOMM_CONSUMERD_COMMAND_SOCK_READY) {
1288 /* Connect both socket, command and metadata. */
1289 consumer_data->cmd_sock =
1290 lttcomm_connect_unix_sock(consumer_data->cmd_unix_sock_path);
1291 consumer_data->metadata_fd =
1292 lttcomm_connect_unix_sock(consumer_data->cmd_unix_sock_path);
1293 if (consumer_data->cmd_sock < 0
1294 || consumer_data->metadata_fd < 0) {
1295 PERROR("consumer connect cmd socket");
1296 /* On error, signal condition and quit. */
1297 signal_consumer_condition(consumer_data, -1);
1298 goto error;
1299 }
1300 consumer_data->metadata_sock.fd_ptr = &consumer_data->metadata_fd;
1301 /* Create metadata socket lock. */
1302 consumer_data->metadata_sock.lock = zmalloc(sizeof(pthread_mutex_t));
1303 if (consumer_data->metadata_sock.lock == NULL) {
1304 PERROR("zmalloc pthread mutex");
1305 ret = -1;
1306 goto error;
1307 }
1308 pthread_mutex_init(consumer_data->metadata_sock.lock, NULL);
1309
1310 signal_consumer_condition(consumer_data, 1);
1311 DBG("Consumer command socket ready (fd: %d", consumer_data->cmd_sock);
1312 DBG("Consumer metadata socket ready (fd: %d)",
1313 consumer_data->metadata_fd);
1314 } else {
1315 ERR("consumer error when waiting for SOCK_READY : %s",
1316 lttcomm_get_readable_code(-code));
1317 goto error;
1318 }
1319
1320 /* Remove the consumerd error sock since we've established a connexion */
1321 ret = lttng_poll_del(&events, consumer_data->err_sock);
1322 if (ret < 0) {
1323 goto error;
1324 }
1325
1326 /* Add new accepted error socket. */
1327 ret = lttng_poll_add(&events, sock, LPOLLIN | LPOLLRDHUP);
1328 if (ret < 0) {
1329 goto error;
1330 }
1331
1332 /* Add metadata socket that is successfully connected. */
1333 ret = lttng_poll_add(&events, consumer_data->metadata_fd,
1334 LPOLLIN | LPOLLRDHUP);
1335 if (ret < 0) {
1336 goto error;
1337 }
1338
1339 health_code_update();
1340
1341 /* Infinite blocking call, waiting for transmission */
1342 restart_poll:
1343 while (1) {
1344 health_code_update();
1345
1346 /* Exit the thread because the thread quit pipe has been triggered. */
1347 if (should_quit) {
1348 /* Not a health error. */
1349 err = 0;
1350 goto exit;
1351 }
1352
1353 health_poll_entry();
1354 ret = lttng_poll_wait(&events, -1);
1355 health_poll_exit();
1356 if (ret < 0) {
1357 /*
1358 * Restart interrupted system call.
1359 */
1360 if (errno == EINTR) {
1361 goto restart_poll;
1362 }
1363 goto error;
1364 }
1365
1366 nb_fd = ret;
1367
1368 for (i = 0; i < nb_fd; i++) {
1369 /* Fetch once the poll data */
1370 revents = LTTNG_POLL_GETEV(&events, i);
1371 pollfd = LTTNG_POLL_GETFD(&events, i);
1372
1373 health_code_update();
1374
1375 if (!revents) {
1376 /* No activity for this FD (poll implementation). */
1377 continue;
1378 }
1379
1380 /*
1381 * Thread quit pipe has been triggered, flag that we should stop
1382 * but continue the current loop to handle potential data from
1383 * consumer.
1384 */
1385 should_quit = sessiond_check_thread_quit_pipe(pollfd, revents);
1386
1387 if (pollfd == sock) {
1388 /* Event on the consumerd socket */
1389 if (revents & (LPOLLERR | LPOLLHUP | LPOLLRDHUP)) {
1390 ERR("consumer err socket second poll error");
1391 goto error;
1392 }
1393 health_code_update();
1394 /* Wait for any kconsumerd error */
1395 ret = lttcomm_recv_unix_sock(sock, &code,
1396 sizeof(enum lttcomm_return_code));
1397 if (ret <= 0) {
1398 ERR("consumer closed the command socket");
1399 goto error;
1400 }
1401
1402 ERR("consumer return code : %s",
1403 lttcomm_get_readable_code(-code));
1404
1405 goto exit;
1406 } else if (pollfd == consumer_data->metadata_fd) {
1407 /* UST metadata requests */
1408 ret = ust_consumer_metadata_request(
1409 &consumer_data->metadata_sock);
1410 if (ret < 0) {
1411 ERR("Handling metadata request");
1412 goto error;
1413 }
1414 }
1415 /* No need for an else branch all FDs are tested prior. */
1416 }
1417 health_code_update();
1418 }
1419
1420 exit:
1421 error:
1422 /*
1423 * We lock here because we are about to close the sockets and some other
1424 * thread might be using them so get exclusive access which will abort all
1425 * other consumer command by other threads.
1426 */
1427 pthread_mutex_lock(&consumer_data->lock);
1428
1429 /* Immediately set the consumerd state to stopped */
1430 if (consumer_data->type == LTTNG_CONSUMER_KERNEL) {
1431 uatomic_set(&kernel_consumerd_state, CONSUMER_ERROR);
1432 } else if (consumer_data->type == LTTNG_CONSUMER64_UST ||
1433 consumer_data->type == LTTNG_CONSUMER32_UST) {
1434 uatomic_set(&ust_consumerd_state, CONSUMER_ERROR);
1435 } else {
1436 /* Code flow error... */
1437 assert(0);
1438 }
1439
1440 if (consumer_data->err_sock >= 0) {
1441 ret = close(consumer_data->err_sock);
1442 if (ret) {
1443 PERROR("close");
1444 }
1445 consumer_data->err_sock = -1;
1446 }
1447 if (consumer_data->cmd_sock >= 0) {
1448 ret = close(consumer_data->cmd_sock);
1449 if (ret) {
1450 PERROR("close");
1451 }
1452 consumer_data->cmd_sock = -1;
1453 }
1454 if (consumer_data->metadata_sock.fd_ptr &&
1455 *consumer_data->metadata_sock.fd_ptr >= 0) {
1456 ret = close(*consumer_data->metadata_sock.fd_ptr);
1457 if (ret) {
1458 PERROR("close");
1459 }
1460 }
1461 if (sock >= 0) {
1462 ret = close(sock);
1463 if (ret) {
1464 PERROR("close");
1465 }
1466 }
1467
1468 unlink(consumer_data->err_unix_sock_path);
1469 unlink(consumer_data->cmd_unix_sock_path);
1470 consumer_data->pid = 0;
1471 pthread_mutex_unlock(&consumer_data->lock);
1472
1473 /* Cleanup metadata socket mutex. */
1474 if (consumer_data->metadata_sock.lock) {
1475 pthread_mutex_destroy(consumer_data->metadata_sock.lock);
1476 free(consumer_data->metadata_sock.lock);
1477 }
1478 lttng_poll_clean(&events);
1479 error_poll:
1480 if (err) {
1481 health_error();
1482 ERR("Health error occurred in %s", __func__);
1483 }
1484 health_unregister(health_sessiond);
1485 DBG("consumer thread cleanup completed");
1486
1487 return NULL;
1488 }
1489
1490 /*
1491 * This thread manage application communication.
1492 */
1493 static void *thread_manage_apps(void *data)
1494 {
1495 int i, ret, pollfd, err = -1;
1496 ssize_t size_ret;
1497 uint32_t revents, nb_fd;
1498 struct lttng_poll_event events;
1499
1500 DBG("[thread] Manage application started");
1501
1502 rcu_register_thread();
1503 rcu_thread_online();
1504
1505 health_register(health_sessiond, HEALTH_SESSIOND_TYPE_APP_MANAGE);
1506
1507 if (testpoint(sessiond_thread_manage_apps)) {
1508 goto error_testpoint;
1509 }
1510
1511 health_code_update();
1512
1513 ret = sessiond_set_thread_pollset(&events, 2);
1514 if (ret < 0) {
1515 goto error_poll_create;
1516 }
1517
1518 ret = lttng_poll_add(&events, apps_cmd_pipe[0], LPOLLIN | LPOLLRDHUP);
1519 if (ret < 0) {
1520 goto error;
1521 }
1522
1523 if (testpoint(sessiond_thread_manage_apps_before_loop)) {
1524 goto error;
1525 }
1526
1527 health_code_update();
1528
1529 while (1) {
1530 DBG("Apps thread polling on %d fds", LTTNG_POLL_GETNB(&events));
1531
1532 /* Inifinite blocking call, waiting for transmission */
1533 restart:
1534 health_poll_entry();
1535 ret = lttng_poll_wait(&events, -1);
1536 health_poll_exit();
1537 if (ret < 0) {
1538 /*
1539 * Restart interrupted system call.
1540 */
1541 if (errno == EINTR) {
1542 goto restart;
1543 }
1544 goto error;
1545 }
1546
1547 nb_fd = ret;
1548
1549 for (i = 0; i < nb_fd; i++) {
1550 /* Fetch once the poll data */
1551 revents = LTTNG_POLL_GETEV(&events, i);
1552 pollfd = LTTNG_POLL_GETFD(&events, i);
1553
1554 health_code_update();
1555
1556 if (!revents) {
1557 /* No activity for this FD (poll implementation). */
1558 continue;
1559 }
1560
1561 /* Thread quit pipe has been closed. Killing thread. */
1562 ret = sessiond_check_thread_quit_pipe(pollfd, revents);
1563 if (ret) {
1564 err = 0;
1565 goto exit;
1566 }
1567
1568 /* Inspect the apps cmd pipe */
1569 if (pollfd == apps_cmd_pipe[0]) {
1570 if (revents & (LPOLLERR | LPOLLHUP | LPOLLRDHUP)) {
1571 ERR("Apps command pipe error");
1572 goto error;
1573 } else if (revents & LPOLLIN) {
1574 int sock;
1575
1576 /* Empty pipe */
1577 size_ret = lttng_read(apps_cmd_pipe[0], &sock, sizeof(sock));
1578 if (size_ret < sizeof(sock)) {
1579 PERROR("read apps cmd pipe");
1580 goto error;
1581 }
1582
1583 health_code_update();
1584
1585 /*
1586 * We only monitor the error events of the socket. This
1587 * thread does not handle any incoming data from UST
1588 * (POLLIN).
1589 */
1590 ret = lttng_poll_add(&events, sock,
1591 LPOLLERR | LPOLLHUP | LPOLLRDHUP);
1592 if (ret < 0) {
1593 goto error;
1594 }
1595
1596 DBG("Apps with sock %d added to poll set", sock);
1597 }
1598 } else {
1599 /*
1600 * At this point, we know that a registered application made
1601 * the event at poll_wait.
1602 */
1603 if (revents & (LPOLLERR | LPOLLHUP | LPOLLRDHUP)) {
1604 /* Removing from the poll set */
1605 ret = lttng_poll_del(&events, pollfd);
1606 if (ret < 0) {
1607 goto error;
1608 }
1609
1610 /* Socket closed on remote end. */
1611 ust_app_unregister(pollfd);
1612 }
1613 }
1614
1615 health_code_update();
1616 }
1617 }
1618
1619 exit:
1620 error:
1621 lttng_poll_clean(&events);
1622 error_poll_create:
1623 error_testpoint:
1624 utils_close_pipe(apps_cmd_pipe);
1625 apps_cmd_pipe[0] = apps_cmd_pipe[1] = -1;
1626
1627 /*
1628 * We don't clean the UST app hash table here since already registered
1629 * applications can still be controlled so let them be until the session
1630 * daemon dies or the applications stop.
1631 */
1632
1633 if (err) {
1634 health_error();
1635 ERR("Health error occurred in %s", __func__);
1636 }
1637 health_unregister(health_sessiond);
1638 DBG("Application communication apps thread cleanup complete");
1639 rcu_thread_offline();
1640 rcu_unregister_thread();
1641 return NULL;
1642 }
1643
1644 /*
1645 * Send a socket to a thread This is called from the dispatch UST registration
1646 * thread once all sockets are set for the application.
1647 *
1648 * The sock value can be invalid, we don't really care, the thread will handle
1649 * it and make the necessary cleanup if so.
1650 *
1651 * On success, return 0 else a negative value being the errno message of the
1652 * write().
1653 */
1654 static int send_socket_to_thread(int fd, int sock)
1655 {
1656 ssize_t ret;
1657
1658 /*
1659 * It's possible that the FD is set as invalid with -1 concurrently just
1660 * before calling this function being a shutdown state of the thread.
1661 */
1662 if (fd < 0) {
1663 ret = -EBADF;
1664 goto error;
1665 }
1666
1667 ret = lttng_write(fd, &sock, sizeof(sock));
1668 if (ret < sizeof(sock)) {
1669 PERROR("write apps pipe %d", fd);
1670 if (ret < 0) {
1671 ret = -errno;
1672 }
1673 goto error;
1674 }
1675
1676 /* All good. Don't send back the write positive ret value. */
1677 ret = 0;
1678 error:
1679 return (int) ret;
1680 }
1681
1682 /*
1683 * Sanitize the wait queue of the dispatch registration thread meaning removing
1684 * invalid nodes from it. This is to avoid memory leaks for the case the UST
1685 * notify socket is never received.
1686 */
1687 static void sanitize_wait_queue(struct ust_reg_wait_queue *wait_queue)
1688 {
1689 int ret, nb_fd = 0, i;
1690 unsigned int fd_added = 0;
1691 struct lttng_poll_event events;
1692 struct ust_reg_wait_node *wait_node = NULL, *tmp_wait_node;
1693
1694 assert(wait_queue);
1695
1696 lttng_poll_init(&events);
1697
1698 /* Just skip everything for an empty queue. */
1699 if (!wait_queue->count) {
1700 goto end;
1701 }
1702
1703 ret = lttng_poll_create(&events, wait_queue->count, LTTNG_CLOEXEC);
1704 if (ret < 0) {
1705 goto error_create;
1706 }
1707
1708 cds_list_for_each_entry_safe(wait_node, tmp_wait_node,
1709 &wait_queue->head, head) {
1710 assert(wait_node->app);
1711 ret = lttng_poll_add(&events, wait_node->app->sock,
1712 LPOLLHUP | LPOLLERR);
1713 if (ret < 0) {
1714 goto error;
1715 }
1716
1717 fd_added = 1;
1718 }
1719
1720 if (!fd_added) {
1721 goto end;
1722 }
1723
1724 /*
1725 * Poll but don't block so we can quickly identify the faulty events and
1726 * clean them afterwards from the wait queue.
1727 */
1728 ret = lttng_poll_wait(&events, 0);
1729 if (ret < 0) {
1730 goto error;
1731 }
1732 nb_fd = ret;
1733
1734 for (i = 0; i < nb_fd; i++) {
1735 /* Get faulty FD. */
1736 uint32_t revents = LTTNG_POLL_GETEV(&events, i);
1737 int pollfd = LTTNG_POLL_GETFD(&events, i);
1738
1739 if (!revents) {
1740 /* No activity for this FD (poll implementation). */
1741 continue;
1742 }
1743
1744 cds_list_for_each_entry_safe(wait_node, tmp_wait_node,
1745 &wait_queue->head, head) {
1746 if (pollfd == wait_node->app->sock &&
1747 (revents & (LPOLLHUP | LPOLLERR))) {
1748 cds_list_del(&wait_node->head);
1749 wait_queue->count--;
1750 ust_app_destroy(wait_node->app);
1751 free(wait_node);
1752 break;
1753 }
1754 }
1755 }
1756
1757 if (nb_fd > 0) {
1758 DBG("Wait queue sanitized, %d node were cleaned up", nb_fd);
1759 }
1760
1761 end:
1762 lttng_poll_clean(&events);
1763 return;
1764
1765 error:
1766 lttng_poll_clean(&events);
1767 error_create:
1768 ERR("Unable to sanitize wait queue");
1769 return;
1770 }
1771
1772 /*
1773 * Dispatch request from the registration threads to the application
1774 * communication thread.
1775 */
1776 static void *thread_dispatch_ust_registration(void *data)
1777 {
1778 int ret, err = -1;
1779 struct cds_wfcq_node *node;
1780 struct ust_command *ust_cmd = NULL;
1781 struct ust_reg_wait_node *wait_node = NULL, *tmp_wait_node;
1782 struct ust_reg_wait_queue wait_queue = {
1783 .count = 0,
1784 };
1785
1786 health_register(health_sessiond, HEALTH_SESSIOND_TYPE_APP_REG_DISPATCH);
1787
1788 if (testpoint(sessiond_thread_app_reg_dispatch)) {
1789 goto error_testpoint;
1790 }
1791
1792 health_code_update();
1793
1794 CDS_INIT_LIST_HEAD(&wait_queue.head);
1795
1796 DBG("[thread] Dispatch UST command started");
1797
1798 while (!CMM_LOAD_SHARED(dispatch_thread_exit)) {
1799 health_code_update();
1800
1801 /* Atomically prepare the queue futex */
1802 futex_nto1_prepare(&ust_cmd_queue.futex);
1803
1804 do {
1805 struct ust_app *app = NULL;
1806 ust_cmd = NULL;
1807
1808 /*
1809 * Make sure we don't have node(s) that have hung up before receiving
1810 * the notify socket. This is to clean the list in order to avoid
1811 * memory leaks from notify socket that are never seen.
1812 */
1813 sanitize_wait_queue(&wait_queue);
1814
1815 health_code_update();
1816 /* Dequeue command for registration */
1817 node = cds_wfcq_dequeue_blocking(&ust_cmd_queue.head, &ust_cmd_queue.tail);
1818 if (node == NULL) {
1819 DBG("Woken up but nothing in the UST command queue");
1820 /* Continue thread execution */
1821 break;
1822 }
1823
1824 ust_cmd = caa_container_of(node, struct ust_command, node);
1825
1826 DBG("Dispatching UST registration pid:%d ppid:%d uid:%d"
1827 " gid:%d sock:%d name:%s (version %d.%d)",
1828 ust_cmd->reg_msg.pid, ust_cmd->reg_msg.ppid,
1829 ust_cmd->reg_msg.uid, ust_cmd->reg_msg.gid,
1830 ust_cmd->sock, ust_cmd->reg_msg.name,
1831 ust_cmd->reg_msg.major, ust_cmd->reg_msg.minor);
1832
1833 if (ust_cmd->reg_msg.type == USTCTL_SOCKET_CMD) {
1834 wait_node = zmalloc(sizeof(*wait_node));
1835 if (!wait_node) {
1836 PERROR("zmalloc wait_node dispatch");
1837 ret = close(ust_cmd->sock);
1838 if (ret < 0) {
1839 PERROR("close ust sock dispatch %d", ust_cmd->sock);
1840 }
1841 lttng_fd_put(LTTNG_FD_APPS, 1);
1842 free(ust_cmd);
1843 goto error;
1844 }
1845 CDS_INIT_LIST_HEAD(&wait_node->head);
1846
1847 /* Create application object if socket is CMD. */
1848 wait_node->app = ust_app_create(&ust_cmd->reg_msg,
1849 ust_cmd->sock);
1850 if (!wait_node->app) {
1851 ret = close(ust_cmd->sock);
1852 if (ret < 0) {
1853 PERROR("close ust sock dispatch %d", ust_cmd->sock);
1854 }
1855 lttng_fd_put(LTTNG_FD_APPS, 1);
1856 free(wait_node);
1857 free(ust_cmd);
1858 continue;
1859 }
1860 /*
1861 * Add application to the wait queue so we can set the notify
1862 * socket before putting this object in the global ht.
1863 */
1864 cds_list_add(&wait_node->head, &wait_queue.head);
1865 wait_queue.count++;
1866
1867 free(ust_cmd);
1868 /*
1869 * We have to continue here since we don't have the notify
1870 * socket and the application MUST be added to the hash table
1871 * only at that moment.
1872 */
1873 continue;
1874 } else {
1875 /*
1876 * Look for the application in the local wait queue and set the
1877 * notify socket if found.
1878 */
1879 cds_list_for_each_entry_safe(wait_node, tmp_wait_node,
1880 &wait_queue.head, head) {
1881 health_code_update();
1882 if (wait_node->app->pid == ust_cmd->reg_msg.pid) {
1883 wait_node->app->notify_sock = ust_cmd->sock;
1884 cds_list_del(&wait_node->head);
1885 wait_queue.count--;
1886 app = wait_node->app;
1887 free(wait_node);
1888 DBG3("UST app notify socket %d is set", ust_cmd->sock);
1889 break;
1890 }
1891 }
1892
1893 /*
1894 * With no application at this stage the received socket is
1895 * basically useless so close it before we free the cmd data
1896 * structure for good.
1897 */
1898 if (!app) {
1899 ret = close(ust_cmd->sock);
1900 if (ret < 0) {
1901 PERROR("close ust sock dispatch %d", ust_cmd->sock);
1902 }
1903 lttng_fd_put(LTTNG_FD_APPS, 1);
1904 }
1905 free(ust_cmd);
1906 }
1907
1908 if (app) {
1909 /*
1910 * @session_lock_list
1911 *
1912 * Lock the global session list so from the register up to the
1913 * registration done message, no thread can see the application
1914 * and change its state.
1915 */
1916 session_lock_list();
1917 rcu_read_lock();
1918
1919 /*
1920 * Add application to the global hash table. This needs to be
1921 * done before the update to the UST registry can locate the
1922 * application.
1923 */
1924 ust_app_add(app);
1925
1926 /* Set app version. This call will print an error if needed. */
1927 (void) ust_app_version(app);
1928
1929 /* Send notify socket through the notify pipe. */
1930 ret = send_socket_to_thread(apps_cmd_notify_pipe[1],
1931 app->notify_sock);
1932 if (ret < 0) {
1933 rcu_read_unlock();
1934 session_unlock_list();
1935 /*
1936 * No notify thread, stop the UST tracing. However, this is
1937 * not an internal error of the this thread thus setting
1938 * the health error code to a normal exit.
1939 */
1940 err = 0;
1941 goto error;
1942 }
1943
1944 /*
1945 * Update newly registered application with the tracing
1946 * registry info already enabled information.
1947 */
1948 update_ust_app(app->sock);
1949
1950 /*
1951 * Don't care about return value. Let the manage apps threads
1952 * handle app unregistration upon socket close.
1953 */
1954 (void) ust_app_register_done(app->sock);
1955
1956 /*
1957 * Even if the application socket has been closed, send the app
1958 * to the thread and unregistration will take place at that
1959 * place.
1960 */
1961 ret = send_socket_to_thread(apps_cmd_pipe[1], app->sock);
1962 if (ret < 0) {
1963 rcu_read_unlock();
1964 session_unlock_list();
1965 /*
1966 * No apps. thread, stop the UST tracing. However, this is
1967 * not an internal error of the this thread thus setting
1968 * the health error code to a normal exit.
1969 */
1970 err = 0;
1971 goto error;
1972 }
1973
1974 rcu_read_unlock();
1975 session_unlock_list();
1976 }
1977 } while (node != NULL);
1978
1979 health_poll_entry();
1980 /* Futex wait on queue. Blocking call on futex() */
1981 futex_nto1_wait(&ust_cmd_queue.futex);
1982 health_poll_exit();
1983 }
1984 /* Normal exit, no error */
1985 err = 0;
1986
1987 error:
1988 /* Clean up wait queue. */
1989 cds_list_for_each_entry_safe(wait_node, tmp_wait_node,
1990 &wait_queue.head, head) {
1991 cds_list_del(&wait_node->head);
1992 wait_queue.count--;
1993 free(wait_node);
1994 }
1995
1996 error_testpoint:
1997 DBG("Dispatch thread dying");
1998 if (err) {
1999 health_error();
2000 ERR("Health error occurred in %s", __func__);
2001 }
2002 health_unregister(health_sessiond);
2003 return NULL;
2004 }
2005
2006 /*
2007 * This thread manage application registration.
2008 */
2009 static void *thread_registration_apps(void *data)
2010 {
2011 int sock = -1, i, ret, pollfd, err = -1;
2012 uint32_t revents, nb_fd;
2013 struct lttng_poll_event events;
2014 /*
2015 * Get allocated in this thread, enqueued to a global queue, dequeued and
2016 * freed in the manage apps thread.
2017 */
2018 struct ust_command *ust_cmd = NULL;
2019
2020 DBG("[thread] Manage application registration started");
2021
2022 health_register(health_sessiond, HEALTH_SESSIOND_TYPE_APP_REG);
2023
2024 if (testpoint(sessiond_thread_registration_apps)) {
2025 goto error_testpoint;
2026 }
2027
2028 ret = lttcomm_listen_unix_sock(apps_sock);
2029 if (ret < 0) {
2030 goto error_listen;
2031 }
2032
2033 /*
2034 * Pass 2 as size here for the thread quit pipe and apps socket. Nothing
2035 * more will be added to this poll set.
2036 */
2037 ret = sessiond_set_thread_pollset(&events, 2);
2038 if (ret < 0) {
2039 goto error_create_poll;
2040 }
2041
2042 /* Add the application registration socket */
2043 ret = lttng_poll_add(&events, apps_sock, LPOLLIN | LPOLLRDHUP);
2044 if (ret < 0) {
2045 goto error_poll_add;
2046 }
2047
2048 /* Notify all applications to register */
2049 ret = notify_ust_apps(1);
2050 if (ret < 0) {
2051 ERR("Failed to notify applications or create the wait shared memory.\n"
2052 "Execution continues but there might be problem for already\n"
2053 "running applications that wishes to register.");
2054 }
2055
2056 while (1) {
2057 DBG("Accepting application registration");
2058
2059 /* Inifinite blocking call, waiting for transmission */
2060 restart:
2061 health_poll_entry();
2062 ret = lttng_poll_wait(&events, -1);
2063 health_poll_exit();
2064 if (ret < 0) {
2065 /*
2066 * Restart interrupted system call.
2067 */
2068 if (errno == EINTR) {
2069 goto restart;
2070 }
2071 goto error;
2072 }
2073
2074 nb_fd = ret;
2075
2076 for (i = 0; i < nb_fd; i++) {
2077 health_code_update();
2078
2079 /* Fetch once the poll data */
2080 revents = LTTNG_POLL_GETEV(&events, i);
2081 pollfd = LTTNG_POLL_GETFD(&events, i);
2082
2083 if (!revents) {
2084 /* No activity for this FD (poll implementation). */
2085 continue;
2086 }
2087
2088 /* Thread quit pipe has been closed. Killing thread. */
2089 ret = sessiond_check_thread_quit_pipe(pollfd, revents);
2090 if (ret) {
2091 err = 0;
2092 goto exit;
2093 }
2094
2095 /* Event on the registration socket */
2096 if (pollfd == apps_sock) {
2097 if (revents & (LPOLLERR | LPOLLHUP | LPOLLRDHUP)) {
2098 ERR("Register apps socket poll error");
2099 goto error;
2100 } else if (revents & LPOLLIN) {
2101 sock = lttcomm_accept_unix_sock(apps_sock);
2102 if (sock < 0) {
2103 goto error;
2104 }
2105
2106 /*
2107 * Set socket timeout for both receiving and ending.
2108 * app_socket_timeout is in seconds, whereas
2109 * lttcomm_setsockopt_rcv_timeout and
2110 * lttcomm_setsockopt_snd_timeout expect msec as
2111 * parameter.
2112 */
2113 (void) lttcomm_setsockopt_rcv_timeout(sock,
2114 app_socket_timeout * 1000);
2115 (void) lttcomm_setsockopt_snd_timeout(sock,
2116 app_socket_timeout * 1000);
2117
2118 /*
2119 * Set the CLOEXEC flag. Return code is useless because
2120 * either way, the show must go on.
2121 */
2122 (void) utils_set_fd_cloexec(sock);
2123
2124 /* Create UST registration command for enqueuing */
2125 ust_cmd = zmalloc(sizeof(struct ust_command));
2126 if (ust_cmd == NULL) {
2127 PERROR("ust command zmalloc");
2128 goto error;
2129 }
2130
2131 /*
2132 * Using message-based transmissions to ensure we don't
2133 * have to deal with partially received messages.
2134 */
2135 ret = lttng_fd_get(LTTNG_FD_APPS, 1);
2136 if (ret < 0) {
2137 ERR("Exhausted file descriptors allowed for applications.");
2138 free(ust_cmd);
2139 ret = close(sock);
2140 if (ret) {
2141 PERROR("close");
2142 }
2143 sock = -1;
2144 continue;
2145 }
2146
2147 health_code_update();
2148 ret = ust_app_recv_registration(sock, &ust_cmd->reg_msg);
2149 if (ret < 0) {
2150 free(ust_cmd);
2151 /* Close socket of the application. */
2152 ret = close(sock);
2153 if (ret) {
2154 PERROR("close");
2155 }
2156 lttng_fd_put(LTTNG_FD_APPS, 1);
2157 sock = -1;
2158 continue;
2159 }
2160 health_code_update();
2161
2162 ust_cmd->sock = sock;
2163 sock = -1;
2164
2165 DBG("UST registration received with pid:%d ppid:%d uid:%d"
2166 " gid:%d sock:%d name:%s (version %d.%d)",
2167 ust_cmd->reg_msg.pid, ust_cmd->reg_msg.ppid,
2168 ust_cmd->reg_msg.uid, ust_cmd->reg_msg.gid,
2169 ust_cmd->sock, ust_cmd->reg_msg.name,
2170 ust_cmd->reg_msg.major, ust_cmd->reg_msg.minor);
2171
2172 /*
2173 * Lock free enqueue the registration request. The red pill
2174 * has been taken! This apps will be part of the *system*.
2175 */
2176 cds_wfcq_enqueue(&ust_cmd_queue.head, &ust_cmd_queue.tail, &ust_cmd->node);
2177
2178 /*
2179 * Wake the registration queue futex. Implicit memory
2180 * barrier with the exchange in cds_wfcq_enqueue.
2181 */
2182 futex_nto1_wake(&ust_cmd_queue.futex);
2183 }
2184 }
2185 }
2186 }
2187
2188 exit:
2189 error:
2190 /* Notify that the registration thread is gone */
2191 notify_ust_apps(0);
2192
2193 if (apps_sock >= 0) {
2194 ret = close(apps_sock);
2195 if (ret) {
2196 PERROR("close");
2197 }
2198 }
2199 if (sock >= 0) {
2200 ret = close(sock);
2201 if (ret) {
2202 PERROR("close");
2203 }
2204 lttng_fd_put(LTTNG_FD_APPS, 1);
2205 }
2206 unlink(apps_unix_sock_path);
2207
2208 error_poll_add:
2209 lttng_poll_clean(&events);
2210 error_listen:
2211 error_create_poll:
2212 error_testpoint:
2213 DBG("UST Registration thread cleanup complete");
2214 if (err) {
2215 health_error();
2216 ERR("Health error occurred in %s", __func__);
2217 }
2218 health_unregister(health_sessiond);
2219
2220 return NULL;
2221 }
2222
2223 /*
2224 * Start the thread_manage_consumer. This must be done after a lttng-consumerd
2225 * exec or it will fails.
2226 */
2227 static int spawn_consumer_thread(struct consumer_data *consumer_data)
2228 {
2229 int ret, clock_ret;
2230 struct timespec timeout;
2231
2232 /* Make sure we set the readiness flag to 0 because we are NOT ready */
2233 consumer_data->consumer_thread_is_ready = 0;
2234
2235 /* Setup pthread condition */
2236 ret = pthread_condattr_init(&consumer_data->condattr);
2237 if (ret) {
2238 errno = ret;
2239 PERROR("pthread_condattr_init consumer data");
2240 goto error;
2241 }
2242
2243 /*
2244 * Set the monotonic clock in order to make sure we DO NOT jump in time
2245 * between the clock_gettime() call and the timedwait call. See bug #324
2246 * for a more details and how we noticed it.
2247 */
2248 ret = pthread_condattr_setclock(&consumer_data->condattr, CLOCK_MONOTONIC);
2249 if (ret) {
2250 errno = ret;
2251 PERROR("pthread_condattr_setclock consumer data");
2252 goto error;
2253 }
2254
2255 ret = pthread_cond_init(&consumer_data->cond, &consumer_data->condattr);
2256 if (ret) {
2257 errno = ret;
2258 PERROR("pthread_cond_init consumer data");
2259 goto error;
2260 }
2261
2262 ret = pthread_create(&consumer_data->thread, NULL, thread_manage_consumer,
2263 consumer_data);
2264 if (ret) {
2265 errno = ret;
2266 PERROR("pthread_create consumer");
2267 ret = -1;
2268 goto error;
2269 }
2270
2271 /* We are about to wait on a pthread condition */
2272 pthread_mutex_lock(&consumer_data->cond_mutex);
2273
2274 /* Get time for sem_timedwait absolute timeout */
2275 clock_ret = clock_gettime(CLOCK_MONOTONIC, &timeout);
2276 /*
2277 * Set the timeout for the condition timed wait even if the clock gettime
2278 * call fails since we might loop on that call and we want to avoid to
2279 * increment the timeout too many times.
2280 */
2281 timeout.tv_sec += DEFAULT_SEM_WAIT_TIMEOUT;
2282
2283 /*
2284 * The following loop COULD be skipped in some conditions so this is why we
2285 * set ret to 0 in order to make sure at least one round of the loop is
2286 * done.
2287 */
2288 ret = 0;
2289
2290 /*
2291 * Loop until the condition is reached or when a timeout is reached. Note
2292 * that the pthread_cond_timedwait(P) man page specifies that EINTR can NOT
2293 * be returned but the pthread_cond(3), from the glibc-doc, says that it is
2294 * possible. This loop does not take any chances and works with both of
2295 * them.
2296 */
2297 while (!consumer_data->consumer_thread_is_ready && ret != ETIMEDOUT) {
2298 if (clock_ret < 0) {
2299 PERROR("clock_gettime spawn consumer");
2300 /* Infinite wait for the consumerd thread to be ready */
2301 ret = pthread_cond_wait(&consumer_data->cond,
2302 &consumer_data->cond_mutex);
2303 } else {
2304 ret = pthread_cond_timedwait(&consumer_data->cond,
2305 &consumer_data->cond_mutex, &timeout);
2306 }
2307 }
2308
2309 /* Release the pthread condition */
2310 pthread_mutex_unlock(&consumer_data->cond_mutex);
2311
2312 if (ret != 0) {
2313 errno = ret;
2314 if (ret == ETIMEDOUT) {
2315 int pth_ret;
2316
2317 /*
2318 * Call has timed out so we kill the kconsumerd_thread and return
2319 * an error.
2320 */
2321 ERR("Condition timed out. The consumer thread was never ready."
2322 " Killing it");
2323 pth_ret = pthread_cancel(consumer_data->thread);
2324 if (pth_ret < 0) {
2325 PERROR("pthread_cancel consumer thread");
2326 }
2327 } else {
2328 PERROR("pthread_cond_wait failed consumer thread");
2329 }
2330 /* Caller is expecting a negative value on failure. */
2331 ret = -1;
2332 goto error;
2333 }
2334
2335 pthread_mutex_lock(&consumer_data->pid_mutex);
2336 if (consumer_data->pid == 0) {
2337 ERR("Consumerd did not start");
2338 pthread_mutex_unlock(&consumer_data->pid_mutex);
2339 goto error;
2340 }
2341 pthread_mutex_unlock(&consumer_data->pid_mutex);
2342
2343 return 0;
2344
2345 error:
2346 return ret;
2347 }
2348
2349 /*
2350 * Join consumer thread
2351 */
2352 static int join_consumer_thread(struct consumer_data *consumer_data)
2353 {
2354 void *status;
2355
2356 /* Consumer pid must be a real one. */
2357 if (consumer_data->pid > 0) {
2358 int ret;
2359 ret = kill(consumer_data->pid, SIGTERM);
2360 if (ret) {
2361 PERROR("Error killing consumer daemon");
2362 return ret;
2363 }
2364 return pthread_join(consumer_data->thread, &status);
2365 } else {
2366 return 0;
2367 }
2368 }
2369
2370 /*
2371 * Fork and exec a consumer daemon (consumerd).
2372 *
2373 * Return pid if successful else -1.
2374 */
2375 static pid_t spawn_consumerd(struct consumer_data *consumer_data)
2376 {
2377 int ret;
2378 pid_t pid;
2379 const char *consumer_to_use;
2380 const char *verbosity;
2381 struct stat st;
2382
2383 DBG("Spawning consumerd");
2384
2385 pid = fork();
2386 if (pid == 0) {
2387 /*
2388 * Exec consumerd.
2389 */
2390 if (opt_verbose_consumer) {
2391 verbosity = "--verbose";
2392 } else if (lttng_opt_quiet) {
2393 verbosity = "--quiet";
2394 } else {
2395 verbosity = "";
2396 }
2397
2398 switch (consumer_data->type) {
2399 case LTTNG_CONSUMER_KERNEL:
2400 /*
2401 * Find out which consumerd to execute. We will first try the
2402 * 64-bit path, then the sessiond's installation directory, and
2403 * fallback on the 32-bit one,
2404 */
2405 DBG3("Looking for a kernel consumer at these locations:");
2406 DBG3(" 1) %s", consumerd64_bin);
2407 DBG3(" 2) %s/%s", INSTALL_BIN_PATH, CONSUMERD_FILE);
2408 DBG3(" 3) %s", consumerd32_bin);
2409 if (stat(consumerd64_bin, &st) == 0) {
2410 DBG3("Found location #1");
2411 consumer_to_use = consumerd64_bin;
2412 } else if (stat(INSTALL_BIN_PATH "/" CONSUMERD_FILE, &st) == 0) {
2413 DBG3("Found location #2");
2414 consumer_to_use = INSTALL_BIN_PATH "/" CONSUMERD_FILE;
2415 } else if (stat(consumerd32_bin, &st) == 0) {
2416 DBG3("Found location #3");
2417 consumer_to_use = consumerd32_bin;
2418 } else {
2419 DBG("Could not find any valid consumerd executable");
2420 ret = -EINVAL;
2421 break;
2422 }
2423 DBG("Using kernel consumer at: %s", consumer_to_use);
2424 ret = execl(consumer_to_use,
2425 "lttng-consumerd", verbosity, "-k",
2426 "--consumerd-cmd-sock", consumer_data->cmd_unix_sock_path,
2427 "--consumerd-err-sock", consumer_data->err_unix_sock_path,
2428 "--group", tracing_group_name,
2429 NULL);
2430 break;
2431 case LTTNG_CONSUMER64_UST:
2432 {
2433 char *tmpnew = NULL;
2434
2435 if (consumerd64_libdir[0] != '\0') {
2436 char *tmp;
2437 size_t tmplen;
2438
2439 tmp = getenv("LD_LIBRARY_PATH");
2440 if (!tmp) {
2441 tmp = "";
2442 }
2443 tmplen = strlen("LD_LIBRARY_PATH=")
2444 + strlen(consumerd64_libdir) + 1 /* : */ + strlen(tmp);
2445 tmpnew = zmalloc(tmplen + 1 /* \0 */);
2446 if (!tmpnew) {
2447 ret = -ENOMEM;
2448 goto error;
2449 }
2450 strcpy(tmpnew, "LD_LIBRARY_PATH=");
2451 strcat(tmpnew, consumerd64_libdir);
2452 if (tmp[0] != '\0') {
2453 strcat(tmpnew, ":");
2454 strcat(tmpnew, tmp);
2455 }
2456 ret = putenv(tmpnew);
2457 if (ret) {
2458 ret = -errno;
2459 free(tmpnew);
2460 goto error;
2461 }
2462 }
2463 DBG("Using 64-bit UST consumer at: %s", consumerd64_bin);
2464 ret = execl(consumerd64_bin, "lttng-consumerd", verbosity, "-u",
2465 "--consumerd-cmd-sock", consumer_data->cmd_unix_sock_path,
2466 "--consumerd-err-sock", consumer_data->err_unix_sock_path,
2467 "--group", tracing_group_name,
2468 NULL);
2469 if (consumerd64_libdir[0] != '\0') {
2470 free(tmpnew);
2471 }
2472 break;
2473 }
2474 case LTTNG_CONSUMER32_UST:
2475 {
2476 char *tmpnew = NULL;
2477
2478 if (consumerd32_libdir[0] != '\0') {
2479 char *tmp;
2480 size_t tmplen;
2481
2482 tmp = getenv("LD_LIBRARY_PATH");
2483 if (!tmp) {
2484 tmp = "";
2485 }
2486 tmplen = strlen("LD_LIBRARY_PATH=")
2487 + strlen(consumerd32_libdir) + 1 /* : */ + strlen(tmp);
2488 tmpnew = zmalloc(tmplen + 1 /* \0 */);
2489 if (!tmpnew) {
2490 ret = -ENOMEM;
2491 goto error;
2492 }
2493 strcpy(tmpnew, "LD_LIBRARY_PATH=");
2494 strcat(tmpnew, consumerd32_libdir);
2495 if (tmp[0] != '\0') {
2496 strcat(tmpnew, ":");
2497 strcat(tmpnew, tmp);
2498 }
2499 ret = putenv(tmpnew);
2500 if (ret) {
2501 ret = -errno;
2502 free(tmpnew);
2503 goto error;
2504 }
2505 }
2506 DBG("Using 32-bit UST consumer at: %s", consumerd32_bin);
2507 ret = execl(consumerd32_bin, "lttng-consumerd", verbosity, "-u",
2508 "--consumerd-cmd-sock", consumer_data->cmd_unix_sock_path,
2509 "--consumerd-err-sock", consumer_data->err_unix_sock_path,
2510 "--group", tracing_group_name,
2511 NULL);
2512 if (consumerd32_libdir[0] != '\0') {
2513 free(tmpnew);
2514 }
2515 break;
2516 }
2517 default:
2518 PERROR("unknown consumer type");
2519 exit(EXIT_FAILURE);
2520 }
2521 if (errno != 0) {
2522 PERROR("Consumer execl()");
2523 }
2524 /* Reaching this point, we got a failure on our execl(). */
2525 exit(EXIT_FAILURE);
2526 } else if (pid > 0) {
2527 ret = pid;
2528 } else {
2529 PERROR("start consumer fork");
2530 ret = -errno;
2531 }
2532 error:
2533 return ret;
2534 }
2535
2536 /*
2537 * Spawn the consumerd daemon and session daemon thread.
2538 */
2539 static int start_consumerd(struct consumer_data *consumer_data)
2540 {
2541 int ret;
2542
2543 /*
2544 * Set the listen() state on the socket since there is a possible race
2545 * between the exec() of the consumer daemon and this call if place in the
2546 * consumer thread. See bug #366 for more details.
2547 */
2548 ret = lttcomm_listen_unix_sock(consumer_data->err_sock);
2549 if (ret < 0) {
2550 goto error;
2551 }
2552
2553 pthread_mutex_lock(&consumer_data->pid_mutex);
2554 if (consumer_data->pid != 0) {
2555 pthread_mutex_unlock(&consumer_data->pid_mutex);
2556 goto end;
2557 }
2558
2559 ret = spawn_consumerd(consumer_data);
2560 if (ret < 0) {
2561 ERR("Spawning consumerd failed");
2562 pthread_mutex_unlock(&consumer_data->pid_mutex);
2563 goto error;
2564 }
2565
2566 /* Setting up the consumer_data pid */
2567 consumer_data->pid = ret;
2568 DBG2("Consumer pid %d", consumer_data->pid);
2569 pthread_mutex_unlock(&consumer_data->pid_mutex);
2570
2571 DBG2("Spawning consumer control thread");
2572 ret = spawn_consumer_thread(consumer_data);
2573 if (ret < 0) {
2574 ERR("Fatal error spawning consumer control thread");
2575 goto error;
2576 }
2577
2578 end:
2579 return 0;
2580
2581 error:
2582 /* Cleanup already created sockets on error. */
2583 if (consumer_data->err_sock >= 0) {
2584 int err;
2585
2586 err = close(consumer_data->err_sock);
2587 if (err < 0) {
2588 PERROR("close consumer data error socket");
2589 }
2590 }
2591 return ret;
2592 }
2593
2594 /*
2595 * Setup necessary data for kernel tracer action.
2596 */
2597 static int init_kernel_tracer(void)
2598 {
2599 int ret;
2600
2601 /* Modprobe lttng kernel modules */
2602 ret = modprobe_lttng_control();
2603 if (ret < 0) {
2604 goto error;
2605 }
2606
2607 /* Open debugfs lttng */
2608 kernel_tracer_fd = open(module_proc_lttng, O_RDWR);
2609 if (kernel_tracer_fd < 0) {
2610 DBG("Failed to open %s", module_proc_lttng);
2611 ret = -1;
2612 goto error_open;
2613 }
2614
2615 /* Validate kernel version */
2616 ret = kernel_validate_version(kernel_tracer_fd);
2617 if (ret < 0) {
2618 goto error_version;
2619 }
2620
2621 ret = modprobe_lttng_data();
2622 if (ret < 0) {
2623 goto error_modules;
2624 }
2625
2626 DBG("Kernel tracer fd %d", kernel_tracer_fd);
2627 return 0;
2628
2629 error_version:
2630 modprobe_remove_lttng_control();
2631 ret = close(kernel_tracer_fd);
2632 if (ret) {
2633 PERROR("close");
2634 }
2635 kernel_tracer_fd = -1;
2636 return LTTNG_ERR_KERN_VERSION;
2637
2638 error_modules:
2639 ret = close(kernel_tracer_fd);
2640 if (ret) {
2641 PERROR("close");
2642 }
2643
2644 error_open:
2645 modprobe_remove_lttng_control();
2646
2647 error:
2648 WARN("No kernel tracer available");
2649 kernel_tracer_fd = -1;
2650 if (!is_root) {
2651 return LTTNG_ERR_NEED_ROOT_SESSIOND;
2652 } else {
2653 return LTTNG_ERR_KERN_NA;
2654 }
2655 }
2656
2657
2658 /*
2659 * Copy consumer output from the tracing session to the domain session. The
2660 * function also applies the right modification on a per domain basis for the
2661 * trace files destination directory.
2662 *
2663 * Should *NOT* be called with RCU read-side lock held.
2664 */
2665 static int copy_session_consumer(int domain, struct ltt_session *session)
2666 {
2667 int ret;
2668 const char *dir_name;
2669 struct consumer_output *consumer;
2670
2671 assert(session);
2672 assert(session->consumer);
2673
2674 switch (domain) {
2675 case LTTNG_DOMAIN_KERNEL:
2676 DBG3("Copying tracing session consumer output in kernel session");
2677 /*
2678 * XXX: We should audit the session creation and what this function
2679 * does "extra" in order to avoid a destroy since this function is used
2680 * in the domain session creation (kernel and ust) only. Same for UST
2681 * domain.
2682 */
2683 if (session->kernel_session->consumer) {
2684 consumer_destroy_output(session->kernel_session->consumer);
2685 }
2686 session->kernel_session->consumer =
2687 consumer_copy_output(session->consumer);
2688 /* Ease our life a bit for the next part */
2689 consumer = session->kernel_session->consumer;
2690 dir_name = DEFAULT_KERNEL_TRACE_DIR;
2691 break;
2692 case LTTNG_DOMAIN_JUL:
2693 case LTTNG_DOMAIN_LOG4J:
2694 case LTTNG_DOMAIN_PYTHON:
2695 case LTTNG_DOMAIN_UST:
2696 DBG3("Copying tracing session consumer output in UST session");
2697 if (session->ust_session->consumer) {
2698 consumer_destroy_output(session->ust_session->consumer);
2699 }
2700 session->ust_session->consumer =
2701 consumer_copy_output(session->consumer);
2702 /* Ease our life a bit for the next part */
2703 consumer = session->ust_session->consumer;
2704 dir_name = DEFAULT_UST_TRACE_DIR;
2705 break;
2706 default:
2707 ret = LTTNG_ERR_UNKNOWN_DOMAIN;
2708 goto error;
2709 }
2710
2711 /* Append correct directory to subdir */
2712 strncat(consumer->subdir, dir_name,
2713 sizeof(consumer->subdir) - strlen(consumer->subdir) - 1);
2714 DBG3("Copy session consumer subdir %s", consumer->subdir);
2715
2716 ret = LTTNG_OK;
2717
2718 error:
2719 return ret;
2720 }
2721
2722 /*
2723 * Create an UST session and add it to the session ust list.
2724 *
2725 * Should *NOT* be called with RCU read-side lock held.
2726 */
2727 static int create_ust_session(struct ltt_session *session,
2728 struct lttng_domain *domain)
2729 {
2730 int ret;
2731 struct ltt_ust_session *lus = NULL;
2732
2733 assert(session);
2734 assert(domain);
2735 assert(session->consumer);
2736
2737 switch (domain->type) {
2738 case LTTNG_DOMAIN_JUL:
2739 case LTTNG_DOMAIN_LOG4J:
2740 case LTTNG_DOMAIN_PYTHON:
2741 case LTTNG_DOMAIN_UST:
2742 break;
2743 default:
2744 ERR("Unknown UST domain on create session %d", domain->type);
2745 ret = LTTNG_ERR_UNKNOWN_DOMAIN;
2746 goto error;
2747 }
2748
2749 DBG("Creating UST session");
2750
2751 lus = trace_ust_create_session(session->id);
2752 if (lus == NULL) {
2753 ret = LTTNG_ERR_UST_SESS_FAIL;
2754 goto error;
2755 }
2756
2757 lus->uid = session->uid;
2758 lus->gid = session->gid;
2759 lus->output_traces = session->output_traces;
2760 lus->snapshot_mode = session->snapshot_mode;
2761 lus->live_timer_interval = session->live_timer;
2762 session->ust_session = lus;
2763
2764 /* Copy session output to the newly created UST session */
2765 ret = copy_session_consumer(domain->type, session);
2766 if (ret != LTTNG_OK) {
2767 goto error;
2768 }
2769
2770 return LTTNG_OK;
2771
2772 error:
2773 free(lus);
2774 session->ust_session = NULL;
2775 return ret;
2776 }
2777
2778 /*
2779 * Create a kernel tracer session then create the default channel.
2780 */
2781 static int create_kernel_session(struct ltt_session *session)
2782 {
2783 int ret;
2784
2785 DBG("Creating kernel session");
2786
2787 ret = kernel_create_session(session, kernel_tracer_fd);
2788 if (ret < 0) {
2789 ret = LTTNG_ERR_KERN_SESS_FAIL;
2790 goto error;
2791 }
2792
2793 /* Code flow safety */
2794 assert(session->kernel_session);
2795
2796 /* Copy session output to the newly created Kernel session */
2797 ret = copy_session_consumer(LTTNG_DOMAIN_KERNEL, session);
2798 if (ret != LTTNG_OK) {
2799 goto error;
2800 }
2801
2802 /* Create directory(ies) on local filesystem. */
2803 if (session->kernel_session->consumer->type == CONSUMER_DST_LOCAL &&
2804 strlen(session->kernel_session->consumer->dst.trace_path) > 0) {
2805 ret = run_as_mkdir_recursive(
2806 session->kernel_session->consumer->dst.trace_path,
2807 S_IRWXU | S_IRWXG, session->uid, session->gid);
2808 if (ret < 0) {
2809 if (ret != -EEXIST) {
2810 ERR("Trace directory creation error");
2811 goto error;
2812 }
2813 }
2814 }
2815
2816 session->kernel_session->uid = session->uid;
2817 session->kernel_session->gid = session->gid;
2818 session->kernel_session->output_traces = session->output_traces;
2819 session->kernel_session->snapshot_mode = session->snapshot_mode;
2820
2821 return LTTNG_OK;
2822
2823 error:
2824 trace_kernel_destroy_session(session->kernel_session);
2825 session->kernel_session = NULL;
2826 return ret;
2827 }
2828
2829 /*
2830 * Count number of session permitted by uid/gid.
2831 */
2832 static unsigned int lttng_sessions_count(uid_t uid, gid_t gid)
2833 {
2834 unsigned int i = 0;
2835 struct ltt_session *session;
2836
2837 DBG("Counting number of available session for UID %d GID %d",
2838 uid, gid);
2839 cds_list_for_each_entry(session, &session_list_ptr->head, list) {
2840 /*
2841 * Only list the sessions the user can control.
2842 */
2843 if (!session_access_ok(session, uid, gid)) {
2844 continue;
2845 }
2846 i++;
2847 }
2848 return i;
2849 }
2850
2851 /*
2852 * Process the command requested by the lttng client within the command
2853 * context structure. This function make sure that the return structure (llm)
2854 * is set and ready for transmission before returning.
2855 *
2856 * Return any error encountered or 0 for success.
2857 *
2858 * "sock" is only used for special-case var. len data.
2859 *
2860 * Should *NOT* be called with RCU read-side lock held.
2861 */
2862 static int process_client_msg(struct command_ctx *cmd_ctx, int sock,
2863 int *sock_error)
2864 {
2865 int ret = LTTNG_OK;
2866 int need_tracing_session = 1;
2867 int need_domain;
2868
2869 DBG("Processing client command %d", cmd_ctx->lsm->cmd_type);
2870
2871 *sock_error = 0;
2872
2873 switch (cmd_ctx->lsm->cmd_type) {
2874 case LTTNG_CREATE_SESSION:
2875 case LTTNG_CREATE_SESSION_SNAPSHOT:
2876 case LTTNG_CREATE_SESSION_LIVE:
2877 case LTTNG_DESTROY_SESSION:
2878 case LTTNG_LIST_SESSIONS:
2879 case LTTNG_LIST_DOMAINS:
2880 case LTTNG_START_TRACE:
2881 case LTTNG_STOP_TRACE:
2882 case LTTNG_DATA_PENDING:
2883 case LTTNG_SNAPSHOT_ADD_OUTPUT:
2884 case LTTNG_SNAPSHOT_DEL_OUTPUT:
2885 case LTTNG_SNAPSHOT_LIST_OUTPUT:
2886 case LTTNG_SNAPSHOT_RECORD:
2887 case LTTNG_SAVE_SESSION:
2888 need_domain = 0;
2889 break;
2890 default:
2891 need_domain = 1;
2892 }
2893
2894 if (opt_no_kernel && need_domain
2895 && cmd_ctx->lsm->domain.type == LTTNG_DOMAIN_KERNEL) {
2896 if (!is_root) {
2897 ret = LTTNG_ERR_NEED_ROOT_SESSIOND;
2898 } else {
2899 ret = LTTNG_ERR_KERN_NA;
2900 }
2901 goto error;
2902 }
2903
2904 /* Deny register consumer if we already have a spawned consumer. */
2905 if (cmd_ctx->lsm->cmd_type == LTTNG_REGISTER_CONSUMER) {
2906 pthread_mutex_lock(&kconsumer_data.pid_mutex);
2907 if (kconsumer_data.pid > 0) {
2908 ret = LTTNG_ERR_KERN_CONSUMER_FAIL;
2909 pthread_mutex_unlock(&kconsumer_data.pid_mutex);
2910 goto error;
2911 }
2912 pthread_mutex_unlock(&kconsumer_data.pid_mutex);
2913 }
2914
2915 /*
2916 * Check for command that don't needs to allocate a returned payload. We do
2917 * this here so we don't have to make the call for no payload at each
2918 * command.
2919 */
2920 switch(cmd_ctx->lsm->cmd_type) {
2921 case LTTNG_LIST_SESSIONS:
2922 case LTTNG_LIST_TRACEPOINTS:
2923 case LTTNG_LIST_TRACEPOINT_FIELDS:
2924 case LTTNG_LIST_DOMAINS:
2925 case LTTNG_LIST_CHANNELS:
2926 case LTTNG_LIST_EVENTS:
2927 case LTTNG_LIST_SYSCALLS:
2928 break;
2929 default:
2930 /* Setup lttng message with no payload */
2931 ret = setup_lttng_msg(cmd_ctx, 0);
2932 if (ret < 0) {
2933 /* This label does not try to unlock the session */
2934 goto init_setup_error;
2935 }
2936 }
2937
2938 /* Commands that DO NOT need a session. */
2939 switch (cmd_ctx->lsm->cmd_type) {
2940 case LTTNG_CREATE_SESSION:
2941 case LTTNG_CREATE_SESSION_SNAPSHOT:
2942 case LTTNG_CREATE_SESSION_LIVE:
2943 case LTTNG_CALIBRATE:
2944 case LTTNG_LIST_SESSIONS:
2945 case LTTNG_LIST_TRACEPOINTS:
2946 case LTTNG_LIST_SYSCALLS:
2947 case LTTNG_LIST_TRACEPOINT_FIELDS:
2948 case LTTNG_SAVE_SESSION:
2949 need_tracing_session = 0;
2950 break;
2951 default:
2952 DBG("Getting session %s by name", cmd_ctx->lsm->session.name);
2953 /*
2954 * We keep the session list lock across _all_ commands
2955 * for now, because the per-session lock does not
2956 * handle teardown properly.
2957 */
2958 session_lock_list();
2959 cmd_ctx->session = session_find_by_name(cmd_ctx->lsm->session.name);
2960 if (cmd_ctx->session == NULL) {
2961 ret = LTTNG_ERR_SESS_NOT_FOUND;
2962 goto error;
2963 } else {
2964 /* Acquire lock for the session */
2965 session_lock(cmd_ctx->session);
2966 }
2967 break;
2968 }
2969
2970 /*
2971 * Commands that need a valid session but should NOT create one if none
2972 * exists. Instead of creating one and destroying it when the command is
2973 * handled, process that right before so we save some round trip in useless
2974 * code path.
2975 */
2976 switch (cmd_ctx->lsm->cmd_type) {
2977 case LTTNG_DISABLE_CHANNEL:
2978 case LTTNG_DISABLE_EVENT:
2979 switch (cmd_ctx->lsm->domain.type) {
2980 case LTTNG_DOMAIN_KERNEL:
2981 if (!cmd_ctx->session->kernel_session) {
2982 ret = LTTNG_ERR_NO_CHANNEL;
2983 goto error;
2984 }
2985 break;
2986 case LTTNG_DOMAIN_JUL:
2987 case LTTNG_DOMAIN_LOG4J:
2988 case LTTNG_DOMAIN_PYTHON:
2989 case LTTNG_DOMAIN_UST:
2990 if (!cmd_ctx->session->ust_session) {
2991 ret = LTTNG_ERR_NO_CHANNEL;
2992 goto error;
2993 }
2994 break;
2995 default:
2996 ret = LTTNG_ERR_UNKNOWN_DOMAIN;
2997 goto error;
2998 }
2999 default:
3000 break;
3001 }
3002
3003 if (!need_domain) {
3004 goto skip_domain;
3005 }
3006
3007 /*
3008 * Check domain type for specific "pre-action".
3009 */
3010 switch (cmd_ctx->lsm->domain.type) {
3011 case LTTNG_DOMAIN_KERNEL:
3012 if (!is_root) {
3013 ret = LTTNG_ERR_NEED_ROOT_SESSIOND;
3014 goto error;
3015 }
3016
3017 /* Kernel tracer check */
3018 if (kernel_tracer_fd == -1) {
3019 /* Basically, load kernel tracer modules */
3020 ret = init_kernel_tracer();
3021 if (ret != 0) {
3022 goto error;
3023 }
3024 }
3025
3026 /* Consumer is in an ERROR state. Report back to client */
3027 if (uatomic_read(&kernel_consumerd_state) == CONSUMER_ERROR) {
3028 ret = LTTNG_ERR_NO_KERNCONSUMERD;
3029 goto error;
3030 }
3031
3032 /* Need a session for kernel command */
3033 if (need_tracing_session) {
3034 if (cmd_ctx->session->kernel_session == NULL) {
3035 ret = create_kernel_session(cmd_ctx->session);
3036 if (ret < 0) {
3037 ret = LTTNG_ERR_KERN_SESS_FAIL;
3038 goto error;
3039 }
3040 }
3041
3042 /* Start the kernel consumer daemon */
3043 pthread_mutex_lock(&kconsumer_data.pid_mutex);
3044 if (kconsumer_data.pid == 0 &&
3045 cmd_ctx->lsm->cmd_type != LTTNG_REGISTER_CONSUMER) {
3046 pthread_mutex_unlock(&kconsumer_data.pid_mutex);
3047 ret = start_consumerd(&kconsumer_data);
3048 if (ret < 0) {
3049 ret = LTTNG_ERR_KERN_CONSUMER_FAIL;
3050 goto error;
3051 }
3052 uatomic_set(&kernel_consumerd_state, CONSUMER_STARTED);
3053 } else {
3054 pthread_mutex_unlock(&kconsumer_data.pid_mutex);
3055 }
3056
3057 /*
3058 * The consumer was just spawned so we need to add the socket to
3059 * the consumer output of the session if exist.
3060 */
3061 ret = consumer_create_socket(&kconsumer_data,
3062 cmd_ctx->session->kernel_session->consumer);
3063 if (ret < 0) {
3064 goto error;
3065 }
3066 }
3067
3068 break;
3069 case LTTNG_DOMAIN_JUL:
3070 case LTTNG_DOMAIN_LOG4J:
3071 case LTTNG_DOMAIN_PYTHON:
3072 case LTTNG_DOMAIN_UST:
3073 {
3074 if (!ust_app_supported()) {
3075 ret = LTTNG_ERR_NO_UST;
3076 goto error;
3077 }
3078 /* Consumer is in an ERROR state. Report back to client */
3079 if (uatomic_read(&ust_consumerd_state) == CONSUMER_ERROR) {
3080 ret = LTTNG_ERR_NO_USTCONSUMERD;
3081 goto error;
3082 }
3083
3084 if (need_tracing_session) {
3085 /* Create UST session if none exist. */
3086 if (cmd_ctx->session->ust_session == NULL) {
3087 ret = create_ust_session(cmd_ctx->session,
3088 &cmd_ctx->lsm->domain);
3089 if (ret != LTTNG_OK) {
3090 goto error;
3091 }
3092 }
3093
3094 /* Start the UST consumer daemons */
3095 /* 64-bit */
3096 pthread_mutex_lock(&ustconsumer64_data.pid_mutex);
3097 if (consumerd64_bin[0] != '\0' &&
3098 ustconsumer64_data.pid == 0 &&
3099 cmd_ctx->lsm->cmd_type != LTTNG_REGISTER_CONSUMER) {
3100 pthread_mutex_unlock(&ustconsumer64_data.pid_mutex);
3101 ret = start_consumerd(&ustconsumer64_data);
3102 if (ret < 0) {
3103 ret = LTTNG_ERR_UST_CONSUMER64_FAIL;
3104 uatomic_set(&ust_consumerd64_fd, -EINVAL);
3105 goto error;
3106 }
3107
3108 uatomic_set(&ust_consumerd64_fd, ustconsumer64_data.cmd_sock);
3109 uatomic_set(&ust_consumerd_state, CONSUMER_STARTED);
3110 } else {
3111 pthread_mutex_unlock(&ustconsumer64_data.pid_mutex);
3112 }
3113
3114 /*
3115 * Setup socket for consumer 64 bit. No need for atomic access
3116 * since it was set above and can ONLY be set in this thread.
3117 */
3118 ret = consumer_create_socket(&ustconsumer64_data,
3119 cmd_ctx->session->ust_session->consumer);
3120 if (ret < 0) {
3121 goto error;
3122 }
3123
3124 /* 32-bit */
3125 pthread_mutex_lock(&ustconsumer32_data.pid_mutex);
3126 if (consumerd32_bin[0] != '\0' &&
3127 ustconsumer32_data.pid == 0 &&
3128 cmd_ctx->lsm->cmd_type != LTTNG_REGISTER_CONSUMER) {
3129 pthread_mutex_unlock(&ustconsumer32_data.pid_mutex);
3130 ret = start_consumerd(&ustconsumer32_data);
3131 if (ret < 0) {
3132 ret = LTTNG_ERR_UST_CONSUMER32_FAIL;
3133 uatomic_set(&ust_consumerd32_fd, -EINVAL);
3134 goto error;
3135 }
3136
3137 uatomic_set(&ust_consumerd32_fd, ustconsumer32_data.cmd_sock);
3138 uatomic_set(&ust_consumerd_state, CONSUMER_STARTED);
3139 } else {
3140 pthread_mutex_unlock(&ustconsumer32_data.pid_mutex);
3141 }
3142
3143 /*
3144 * Setup socket for consumer 64 bit. No need for atomic access
3145 * since it was set above and can ONLY be set in this thread.
3146 */
3147 ret = consumer_create_socket(&ustconsumer32_data,
3148 cmd_ctx->session->ust_session->consumer);
3149 if (ret < 0) {
3150 goto error;
3151 }
3152 }
3153 break;
3154 }
3155 default:
3156 break;
3157 }
3158 skip_domain:
3159
3160 /* Validate consumer daemon state when start/stop trace command */
3161 if (cmd_ctx->lsm->cmd_type == LTTNG_START_TRACE ||
3162 cmd_ctx->lsm->cmd_type == LTTNG_STOP_TRACE) {
3163 switch (cmd_ctx->lsm->domain.type) {
3164 case LTTNG_DOMAIN_JUL:
3165 case LTTNG_DOMAIN_LOG4J:
3166 case LTTNG_DOMAIN_PYTHON:
3167 case LTTNG_DOMAIN_UST:
3168 if (uatomic_read(&ust_consumerd_state) != CONSUMER_STARTED) {
3169 ret = LTTNG_ERR_NO_USTCONSUMERD;
3170 goto error;
3171 }
3172 break;
3173 case LTTNG_DOMAIN_KERNEL:
3174 if (uatomic_read(&kernel_consumerd_state) != CONSUMER_STARTED) {
3175 ret = LTTNG_ERR_NO_KERNCONSUMERD;
3176 goto error;
3177 }
3178 break;
3179 }
3180 }
3181
3182 /*
3183 * Check that the UID or GID match that of the tracing session.
3184 * The root user can interact with all sessions.
3185 */
3186 if (need_tracing_session) {
3187 if (!session_access_ok(cmd_ctx->session,
3188 LTTNG_SOCK_GET_UID_CRED(&cmd_ctx->creds),
3189 LTTNG_SOCK_GET_GID_CRED(&cmd_ctx->creds))) {
3190 ret = LTTNG_ERR_EPERM;
3191 goto error;
3192 }
3193 }
3194
3195 /*
3196 * Send relayd information to consumer as soon as we have a domain and a
3197 * session defined.
3198 */
3199 if (cmd_ctx->session && need_domain) {
3200 /*
3201 * Setup relayd if not done yet. If the relayd information was already
3202 * sent to the consumer, this call will gracefully return.
3203 */
3204 ret = cmd_setup_relayd(cmd_ctx->session);
3205 if (ret != LTTNG_OK) {
3206 goto error;
3207 }
3208 }
3209
3210 /* Process by command type */
3211 switch (cmd_ctx->lsm->cmd_type) {
3212 case LTTNG_ADD_CONTEXT:
3213 {
3214 ret = cmd_add_context(cmd_ctx->session, cmd_ctx->lsm->domain.type,
3215 cmd_ctx->lsm->u.context.channel_name,
3216 &cmd_ctx->lsm->u.context.ctx, kernel_poll_pipe[1]);
3217 break;
3218 }
3219 case LTTNG_DISABLE_CHANNEL:
3220 {
3221 ret = cmd_disable_channel(cmd_ctx->session, cmd_ctx->lsm->domain.type,
3222 cmd_ctx->lsm->u.disable.channel_name);
3223 break;
3224 }
3225 case LTTNG_DISABLE_EVENT:
3226 {
3227 /* FIXME: passing packed structure to non-packed pointer */
3228 /* TODO: handle filter */
3229 ret = cmd_disable_event(cmd_ctx->session, cmd_ctx->lsm->domain.type,
3230 cmd_ctx->lsm->u.disable.channel_name,
3231 &cmd_ctx->lsm->u.disable.event);
3232 break;
3233 }
3234 case LTTNG_ENABLE_CHANNEL:
3235 {
3236 ret = cmd_enable_channel(cmd_ctx->session, &cmd_ctx->lsm->domain,
3237 &cmd_ctx->lsm->u.channel.chan, kernel_poll_pipe[1]);
3238 break;
3239 }
3240 case LTTNG_ENABLE_EVENT:
3241 {
3242 struct lttng_event_exclusion *exclusion = NULL;
3243 struct lttng_filter_bytecode *bytecode = NULL;
3244 char *filter_expression = NULL;
3245
3246 /* Handle exclusion events and receive it from the client. */
3247 if (cmd_ctx->lsm->u.enable.exclusion_count > 0) {
3248 size_t count = cmd_ctx->lsm->u.enable.exclusion_count;
3249
3250 exclusion = zmalloc(sizeof(struct lttng_event_exclusion) +
3251 (count * LTTNG_SYMBOL_NAME_LEN));
3252 if (!exclusion) {
3253 ret = LTTNG_ERR_EXCLUSION_NOMEM;
3254 goto error;
3255 }
3256
3257 DBG("Receiving var len exclusion event list from client ...");
3258 exclusion->count = count;
3259 ret = lttcomm_recv_unix_sock(sock, exclusion->names,
3260 count * LTTNG_SYMBOL_NAME_LEN);
3261 if (ret <= 0) {
3262 DBG("Nothing recv() from client var len data... continuing");
3263 *sock_error = 1;
3264 free(exclusion);
3265 ret = LTTNG_ERR_EXCLUSION_INVAL;
3266 goto error;
3267 }
3268 }
3269
3270 /* Get filter expression from client. */
3271 if (cmd_ctx->lsm->u.enable.expression_len > 0) {
3272 size_t expression_len =
3273 cmd_ctx->lsm->u.enable.expression_len;
3274
3275 if (expression_len > LTTNG_FILTER_MAX_LEN) {
3276 ret = LTTNG_ERR_FILTER_INVAL;
3277 free(exclusion);
3278 goto error;
3279 }
3280
3281 filter_expression = zmalloc(expression_len);
3282 if (!filter_expression) {
3283 free(exclusion);
3284 ret = LTTNG_ERR_FILTER_NOMEM;
3285 goto error;
3286 }
3287
3288 /* Receive var. len. data */
3289 DBG("Receiving var len filter's expression from client ...");
3290 ret = lttcomm_recv_unix_sock(sock, filter_expression,
3291 expression_len);
3292 if (ret <= 0) {
3293 DBG("Nothing recv() from client car len data... continuing");
3294 *sock_error = 1;
3295 free(filter_expression);
3296 free(exclusion);
3297 ret = LTTNG_ERR_FILTER_INVAL;
3298 goto error;
3299 }
3300 }
3301
3302 /* Handle filter and get bytecode from client. */
3303 if (cmd_ctx->lsm->u.enable.bytecode_len > 0) {
3304 size_t bytecode_len = cmd_ctx->lsm->u.enable.bytecode_len;
3305
3306 if (bytecode_len > LTTNG_FILTER_MAX_LEN) {
3307 ret = LTTNG_ERR_FILTER_INVAL;
3308 free(filter_expression);
3309 free(exclusion);
3310 goto error;
3311 }
3312
3313 bytecode = zmalloc(bytecode_len);
3314 if (!bytecode) {
3315 free(filter_expression);
3316 free(exclusion);
3317 ret = LTTNG_ERR_FILTER_NOMEM;
3318 goto error;
3319 }
3320
3321 /* Receive var. len. data */
3322 DBG("Receiving var len filter's bytecode from client ...");
3323 ret = lttcomm_recv_unix_sock(sock, bytecode, bytecode_len);
3324 if (ret <= 0) {
3325 DBG("Nothing recv() from client car len data... continuing");
3326 *sock_error = 1;
3327 free(filter_expression);
3328 free(bytecode);
3329 free(exclusion);
3330 ret = LTTNG_ERR_FILTER_INVAL;
3331 goto error;
3332 }
3333
3334 if ((bytecode->len + sizeof(*bytecode)) != bytecode_len) {
3335 free(filter_expression);
3336 free(bytecode);
3337 free(exclusion);
3338 ret = LTTNG_ERR_FILTER_INVAL;
3339 goto error;
3340 }
3341 }
3342
3343 ret = cmd_enable_event(cmd_ctx->session, &cmd_ctx->lsm->domain,
3344 cmd_ctx->lsm->u.enable.channel_name,
3345 &cmd_ctx->lsm->u.enable.event,
3346 filter_expression, bytecode, exclusion,
3347 kernel_poll_pipe[1]);
3348 break;
3349 }
3350 case LTTNG_LIST_TRACEPOINTS:
3351 {
3352 struct lttng_event *events;
3353 ssize_t nb_events;
3354
3355 session_lock_list();
3356 nb_events = cmd_list_tracepoints(cmd_ctx->lsm->domain.type, &events);
3357 session_unlock_list();
3358 if (nb_events < 0) {
3359 /* Return value is a negative lttng_error_code. */
3360 ret = -nb_events;
3361 goto error;
3362 }
3363
3364 /*
3365 * Setup lttng message with payload size set to the event list size in
3366 * bytes and then copy list into the llm payload.
3367 */
3368 ret = setup_lttng_msg(cmd_ctx, sizeof(struct lttng_event) * nb_events);
3369 if (ret < 0) {
3370 free(events);
3371 goto setup_error;
3372 }
3373
3374 /* Copy event list into message payload */
3375 memcpy(cmd_ctx->llm->payload, events,
3376 sizeof(struct lttng_event) * nb_events);
3377
3378 free(events);
3379
3380 ret = LTTNG_OK;
3381 break;
3382 }
3383 case LTTNG_LIST_TRACEPOINT_FIELDS:
3384 {
3385 struct lttng_event_field *fields;
3386 ssize_t nb_fields;
3387
3388 session_lock_list();
3389 nb_fields = cmd_list_tracepoint_fields(cmd_ctx->lsm->domain.type,
3390 &fields);
3391 session_unlock_list();
3392 if (nb_fields < 0) {
3393 /* Return value is a negative lttng_error_code. */
3394 ret = -nb_fields;
3395 goto error;
3396 }
3397
3398 /*
3399 * Setup lttng message with payload size set to the event list size in
3400 * bytes and then copy list into the llm payload.
3401 */
3402 ret = setup_lttng_msg(cmd_ctx,
3403 sizeof(struct lttng_event_field) * nb_fields);
3404 if (ret < 0) {
3405 free(fields);
3406 goto setup_error;
3407 }
3408
3409 /* Copy event list into message payload */
3410 memcpy(cmd_ctx->llm->payload, fields,
3411 sizeof(struct lttng_event_field) * nb_fields);
3412
3413 free(fields);
3414
3415 ret = LTTNG_OK;
3416 break;
3417 }
3418 case LTTNG_LIST_SYSCALLS:
3419 {
3420 struct lttng_event *events;
3421 ssize_t nb_events;
3422
3423 nb_events = cmd_list_syscalls(&events);
3424 if (nb_events < 0) {
3425 /* Return value is a negative lttng_error_code. */
3426 ret = -nb_events;
3427 goto error;
3428 }
3429
3430 /*
3431 * Setup lttng message with payload size set to the event list size in
3432 * bytes and then copy list into the llm payload.
3433 */
3434 ret = setup_lttng_msg(cmd_ctx, sizeof(struct lttng_event) * nb_events);
3435 if (ret < 0) {
3436 free(events);
3437 goto setup_error;
3438 }
3439
3440 /* Copy event list into message payload */
3441 memcpy(cmd_ctx->llm->payload, events,
3442 sizeof(struct lttng_event) * nb_events);
3443
3444 free(events);
3445
3446 ret = LTTNG_OK;
3447 break;
3448 }
3449 case LTTNG_SET_CONSUMER_URI:
3450 {
3451 size_t nb_uri, len;
3452 struct lttng_uri *uris;
3453
3454 nb_uri = cmd_ctx->lsm->u.uri.size;
3455 len = nb_uri * sizeof(struct lttng_uri);
3456
3457 if (nb_uri == 0) {
3458 ret = LTTNG_ERR_INVALID;
3459 goto error;
3460 }
3461
3462 uris = zmalloc(len);
3463 if (uris == NULL) {
3464 ret = LTTNG_ERR_FATAL;
3465 goto error;
3466 }
3467
3468 /* Receive variable len data */
3469 DBG("Receiving %zu URI(s) from client ...", nb_uri);
3470 ret = lttcomm_recv_unix_sock(sock, uris, len);
3471 if (ret <= 0) {
3472 DBG("No URIs received from client... continuing");
3473 *sock_error = 1;
3474 ret = LTTNG_ERR_SESSION_FAIL;
3475 free(uris);
3476 goto error;
3477 }
3478
3479 ret = cmd_set_consumer_uri(cmd_ctx->session, nb_uri, uris);
3480 free(uris);
3481 if (ret != LTTNG_OK) {
3482 goto error;
3483 }
3484
3485
3486 break;
3487 }
3488 case LTTNG_START_TRACE:
3489 {
3490 ret = cmd_start_trace(cmd_ctx->session);
3491 break;
3492 }
3493 case LTTNG_STOP_TRACE:
3494 {
3495 ret = cmd_stop_trace(cmd_ctx->session);
3496 break;
3497 }
3498 case LTTNG_CREATE_SESSION:
3499 {
3500 size_t nb_uri, len;
3501 struct lttng_uri *uris = NULL;
3502
3503 nb_uri = cmd_ctx->lsm->u.uri.size;
3504 len = nb_uri * sizeof(struct lttng_uri);
3505
3506 if (nb_uri > 0) {
3507 uris = zmalloc(len);
3508 if (uris == NULL) {
3509 ret = LTTNG_ERR_FATAL;
3510 goto error;
3511 }
3512
3513 /* Receive variable len data */
3514 DBG("Waiting for %zu URIs from client ...", nb_uri);
3515 ret = lttcomm_recv_unix_sock(sock, uris, len);
3516 if (ret <= 0) {
3517 DBG("No URIs received from client... continuing");
3518 *sock_error = 1;
3519 ret = LTTNG_ERR_SESSION_FAIL;
3520 free(uris);
3521 goto error;
3522 }
3523
3524 if (nb_uri == 1 && uris[0].dtype != LTTNG_DST_PATH) {
3525 DBG("Creating session with ONE network URI is a bad call");
3526 ret = LTTNG_ERR_SESSION_FAIL;
3527 free(uris);
3528 goto error;
3529 }
3530 }
3531
3532 ret = cmd_create_session_uri(cmd_ctx->lsm->session.name, uris, nb_uri,
3533 &cmd_ctx->creds, 0);
3534
3535 free(uris);
3536
3537 break;
3538 }
3539 case LTTNG_DESTROY_SESSION:
3540 {
3541 ret = cmd_destroy_session(cmd_ctx->session, kernel_poll_pipe[1]);
3542
3543 /* Set session to NULL so we do not unlock it after free. */
3544 cmd_ctx->session = NULL;
3545 break;
3546 }
3547 case LTTNG_LIST_DOMAINS:
3548 {
3549 ssize_t nb_dom;
3550 struct lttng_domain *domains = NULL;
3551
3552 nb_dom = cmd_list_domains(cmd_ctx->session, &domains);
3553 if (nb_dom < 0) {
3554 /* Return value is a negative lttng_error_code. */
3555 ret = -nb_dom;
3556 goto error;
3557 }
3558
3559 ret = setup_lttng_msg(cmd_ctx, nb_dom * sizeof(struct lttng_domain));
3560 if (ret < 0) {
3561 free(domains);
3562 goto setup_error;
3563 }
3564
3565 /* Copy event list into message payload */
3566 memcpy(cmd_ctx->llm->payload, domains,
3567 nb_dom * sizeof(struct lttng_domain));
3568
3569 free(domains);
3570
3571 ret = LTTNG_OK;
3572 break;
3573 }
3574 case LTTNG_LIST_CHANNELS:
3575 {
3576 int nb_chan;
3577 struct lttng_channel *channels = NULL;
3578
3579 nb_chan = cmd_list_channels(cmd_ctx->lsm->domain.type,
3580 cmd_ctx->session, &channels);
3581 if (nb_chan < 0) {
3582 /* Return value is a negative lttng_error_code. */
3583 ret = -nb_chan;
3584 goto error;
3585 }
3586
3587 ret = setup_lttng_msg(cmd_ctx, nb_chan * sizeof(struct lttng_channel));
3588 if (ret < 0) {
3589 free(channels);
3590 goto setup_error;
3591 }
3592
3593 /* Copy event list into message payload */
3594 memcpy(cmd_ctx->llm->payload, channels,
3595 nb_chan * sizeof(struct lttng_channel));
3596
3597 free(channels);
3598
3599 ret = LTTNG_OK;
3600 break;
3601 }
3602 case LTTNG_LIST_EVENTS:
3603 {
3604 ssize_t nb_event;
3605 struct lttng_event *events = NULL;
3606
3607 nb_event = cmd_list_events(cmd_ctx->lsm->domain.type, cmd_ctx->session,
3608 cmd_ctx->lsm->u.list.channel_name, &events);
3609 if (nb_event < 0) {
3610 /* Return value is a negative lttng_error_code. */
3611 ret = -nb_event;
3612 goto error;
3613 }
3614
3615 ret = setup_lttng_msg(cmd_ctx, nb_event * sizeof(struct lttng_event));
3616 if (ret < 0) {
3617 free(events);
3618 goto setup_error;
3619 }
3620
3621 /* Copy event list into message payload */
3622 memcpy(cmd_ctx->llm->payload, events,
3623 nb_event * sizeof(struct lttng_event));
3624
3625 free(events);
3626
3627 ret = LTTNG_OK;
3628 break;
3629 }
3630 case LTTNG_LIST_SESSIONS:
3631 {
3632 unsigned int nr_sessions;
3633
3634 session_lock_list();
3635 nr_sessions = lttng_sessions_count(
3636 LTTNG_SOCK_GET_UID_CRED(&cmd_ctx->creds),
3637 LTTNG_SOCK_GET_GID_CRED(&cmd_ctx->creds));
3638
3639 ret = setup_lttng_msg(cmd_ctx, sizeof(struct lttng_session) * nr_sessions);
3640 if (ret < 0) {
3641 session_unlock_list();
3642 goto setup_error;
3643 }
3644
3645 /* Filled the session array */
3646 cmd_list_lttng_sessions((struct lttng_session *)(cmd_ctx->llm->payload),
3647 LTTNG_SOCK_GET_UID_CRED(&cmd_ctx->creds),
3648 LTTNG_SOCK_GET_GID_CRED(&cmd_ctx->creds));
3649
3650 session_unlock_list();
3651
3652 ret = LTTNG_OK;
3653 break;
3654 }
3655 case LTTNG_CALIBRATE:
3656 {
3657 ret = cmd_calibrate(cmd_ctx->lsm->domain.type,
3658 &cmd_ctx->lsm->u.calibrate);
3659 break;
3660 }
3661 case LTTNG_REGISTER_CONSUMER:
3662 {
3663 struct consumer_data *cdata;
3664
3665 switch (cmd_ctx->lsm->domain.type) {
3666 case LTTNG_DOMAIN_KERNEL:
3667 cdata = &kconsumer_data;
3668 break;
3669 default:
3670 ret = LTTNG_ERR_UND;
3671 goto error;
3672 }
3673
3674 ret = cmd_register_consumer(cmd_ctx->session, cmd_ctx->lsm->domain.type,
3675 cmd_ctx->lsm->u.reg.path, cdata);
3676 break;
3677 }
3678 case LTTNG_DATA_PENDING:
3679 {
3680 ret = cmd_data_pending(cmd_ctx->session);
3681 break;
3682 }
3683 case LTTNG_SNAPSHOT_ADD_OUTPUT:
3684 {
3685 struct lttcomm_lttng_output_id reply;
3686
3687 ret = cmd_snapshot_add_output(cmd_ctx->session,
3688 &cmd_ctx->lsm->u.snapshot_output.output, &reply.id);
3689 if (ret != LTTNG_OK) {
3690 goto error;
3691 }
3692
3693 ret = setup_lttng_msg(cmd_ctx, sizeof(reply));
3694 if (ret < 0) {
3695 goto setup_error;
3696 }
3697
3698 /* Copy output list into message payload */
3699 memcpy(cmd_ctx->llm->payload, &reply, sizeof(reply));
3700 ret = LTTNG_OK;
3701 break;
3702 }
3703 case LTTNG_SNAPSHOT_DEL_OUTPUT:
3704 {
3705 ret = cmd_snapshot_del_output(cmd_ctx->session,
3706 &cmd_ctx->lsm->u.snapshot_output.output);
3707 break;
3708 }
3709 case LTTNG_SNAPSHOT_LIST_OUTPUT:
3710 {
3711 ssize_t nb_output;
3712 struct lttng_snapshot_output *outputs = NULL;
3713
3714 nb_output = cmd_snapshot_list_outputs(cmd_ctx->session, &outputs);
3715 if (nb_output < 0) {
3716 ret = -nb_output;
3717 goto error;
3718 }
3719
3720 ret = setup_lttng_msg(cmd_ctx,
3721 nb_output * sizeof(struct lttng_snapshot_output));
3722 if (ret < 0) {
3723 free(outputs);
3724 goto setup_error;
3725 }
3726
3727 if (outputs) {
3728 /* Copy output list into message payload */
3729 memcpy(cmd_ctx->llm->payload, outputs,
3730 nb_output * sizeof(struct lttng_snapshot_output));
3731 free(outputs);
3732 }
3733
3734 ret = LTTNG_OK;
3735 break;
3736 }
3737 case LTTNG_SNAPSHOT_RECORD:
3738 {
3739 ret = cmd_snapshot_record(cmd_ctx->session,
3740 &cmd_ctx->lsm->u.snapshot_record.output,
3741 cmd_ctx->lsm->u.snapshot_record.wait);
3742 break;
3743 }
3744 case LTTNG_CREATE_SESSION_SNAPSHOT:
3745 {
3746 size_t nb_uri, len;
3747 struct lttng_uri *uris = NULL;
3748
3749 nb_uri = cmd_ctx->lsm->u.uri.size;
3750 len = nb_uri * sizeof(struct lttng_uri);
3751
3752 if (nb_uri > 0) {
3753 uris = zmalloc(len);
3754 if (uris == NULL) {
3755 ret = LTTNG_ERR_FATAL;
3756 goto error;
3757 }
3758
3759 /* Receive variable len data */
3760 DBG("Waiting for %zu URIs from client ...", nb_uri);
3761 ret = lttcomm_recv_unix_sock(sock, uris, len);
3762 if (ret <= 0) {
3763 DBG("No URIs received from client... continuing");
3764 *sock_error = 1;
3765 ret = LTTNG_ERR_SESSION_FAIL;
3766 free(uris);
3767 goto error;
3768 }
3769
3770 if (nb_uri == 1 && uris[0].dtype != LTTNG_DST_PATH) {
3771 DBG("Creating session with ONE network URI is a bad call");
3772 ret = LTTNG_ERR_SESSION_FAIL;
3773 free(uris);
3774 goto error;
3775 }
3776 }
3777
3778 ret = cmd_create_session_snapshot(cmd_ctx->lsm->session.name, uris,
3779 nb_uri, &cmd_ctx->creds);
3780 free(uris);
3781 break;
3782 }
3783 case LTTNG_CREATE_SESSION_LIVE:
3784 {
3785 size_t nb_uri, len;
3786 struct lttng_uri *uris = NULL;
3787
3788 nb_uri = cmd_ctx->lsm->u.uri.size;
3789 len = nb_uri * sizeof(struct lttng_uri);
3790
3791 if (nb_uri > 0) {
3792 uris = zmalloc(len);
3793 if (uris == NULL) {
3794 ret = LTTNG_ERR_FATAL;
3795 goto error;
3796 }
3797
3798 /* Receive variable len data */
3799 DBG("Waiting for %zu URIs from client ...", nb_uri);
3800 ret = lttcomm_recv_unix_sock(sock, uris, len);
3801 if (ret <= 0) {
3802 DBG("No URIs received from client... continuing");
3803 *sock_error = 1;
3804 ret = LTTNG_ERR_SESSION_FAIL;
3805 free(uris);
3806 goto error;
3807 }
3808
3809 if (nb_uri == 1 && uris[0].dtype != LTTNG_DST_PATH) {
3810 DBG("Creating session with ONE network URI is a bad call");
3811 ret = LTTNG_ERR_SESSION_FAIL;
3812 free(uris);
3813 goto error;
3814 }
3815 }
3816
3817 ret = cmd_create_session_uri(cmd_ctx->lsm->session.name, uris,
3818 nb_uri, &cmd_ctx->creds, cmd_ctx->lsm->u.session_live.timer_interval);
3819 free(uris);
3820 break;
3821 }
3822 case LTTNG_SAVE_SESSION:
3823 {
3824 ret = cmd_save_sessions(&cmd_ctx->lsm->u.save_session.attr,
3825 &cmd_ctx->creds);
3826 break;
3827 }
3828 default:
3829 ret = LTTNG_ERR_UND;
3830 break;
3831 }
3832
3833 error:
3834 if (cmd_ctx->llm == NULL) {
3835 DBG("Missing llm structure. Allocating one.");
3836 if (setup_lttng_msg(cmd_ctx, 0) < 0) {
3837 goto setup_error;
3838 }
3839 }
3840 /* Set return code */
3841 cmd_ctx->llm->ret_code = ret;
3842 setup_error:
3843 if (cmd_ctx->session) {
3844 session_unlock(cmd_ctx->session);
3845 }
3846 if (need_tracing_session) {
3847 session_unlock_list();
3848 }
3849 init_setup_error:
3850 return ret;
3851 }
3852
3853 /*
3854 * Thread managing health check socket.
3855 */
3856 static void *thread_manage_health(void *data)
3857 {
3858 int sock = -1, new_sock = -1, ret, i, pollfd, err = -1;
3859 uint32_t revents, nb_fd;
3860 struct lttng_poll_event events;
3861 struct health_comm_msg msg;
3862 struct health_comm_reply reply;
3863
3864 DBG("[thread] Manage health check started");
3865
3866 rcu_register_thread();
3867
3868 /* We might hit an error path before this is created. */
3869 lttng_poll_init(&events);
3870
3871 /* Create unix socket */
3872 sock = lttcomm_create_unix_sock(health_unix_sock_path);
3873 if (sock < 0) {
3874 ERR("Unable to create health check Unix socket");
3875 ret = -1;
3876 goto error;
3877 }
3878
3879 if (is_root) {
3880 /* lttng health client socket path permissions */
3881 ret = chown(health_unix_sock_path, 0,
3882 utils_get_group_id(tracing_group_name));
3883 if (ret < 0) {
3884 ERR("Unable to set group on %s", health_unix_sock_path);
3885 PERROR("chown");
3886 ret = -1;
3887 goto error;
3888 }
3889
3890 ret = chmod(health_unix_sock_path,
3891 S_IRUSR | S_IWUSR | S_IRGRP | S_IWGRP);
3892 if (ret < 0) {
3893 ERR("Unable to set permissions on %s", health_unix_sock_path);
3894 PERROR("chmod");
3895 ret = -1;
3896 goto error;
3897 }
3898 }
3899
3900 /*
3901 * Set the CLOEXEC flag. Return code is useless because either way, the
3902 * show must go on.
3903 */
3904 (void) utils_set_fd_cloexec(sock);
3905
3906 ret = lttcomm_listen_unix_sock(sock);
3907 if (ret < 0) {
3908 goto error;
3909 }
3910
3911 /*
3912 * Pass 2 as size here for the thread quit pipe and client_sock. Nothing
3913 * more will be added to this poll set.
3914 */
3915 ret = sessiond_set_thread_pollset(&events, 2);
3916 if (ret < 0) {
3917 goto error;
3918 }
3919
3920 /* Add the application registration socket */
3921 ret = lttng_poll_add(&events, sock, LPOLLIN | LPOLLPRI);
3922 if (ret < 0) {
3923 goto error;
3924 }
3925
3926 sessiond_notify_ready();
3927
3928 while (1) {
3929 DBG("Health check ready");
3930
3931 /* Inifinite blocking call, waiting for transmission */
3932 restart:
3933 ret = lttng_poll_wait(&events, -1);
3934 if (ret < 0) {
3935 /*
3936 * Restart interrupted system call.
3937 */
3938 if (errno == EINTR) {
3939 goto restart;
3940 }
3941 goto error;
3942 }
3943
3944 nb_fd = ret;
3945
3946 for (i = 0; i < nb_fd; i++) {
3947 /* Fetch once the poll data */
3948 revents = LTTNG_POLL_GETEV(&events, i);
3949 pollfd = LTTNG_POLL_GETFD(&events, i);
3950
3951 if (!revents) {
3952 /* No activity for this FD (poll implementation). */
3953 continue;
3954 }
3955
3956 /* Thread quit pipe has been closed. Killing thread. */
3957 ret = sessiond_check_thread_quit_pipe(pollfd, revents);
3958 if (ret) {
3959 err = 0;
3960 goto exit;
3961 }
3962
3963 /* Event on the registration socket */
3964 if (pollfd == sock) {
3965 if (revents & (LPOLLERR | LPOLLHUP | LPOLLRDHUP)) {
3966 ERR("Health socket poll error");
3967 goto error;
3968 }
3969 }
3970 }
3971
3972 new_sock = lttcomm_accept_unix_sock(sock);
3973 if (new_sock < 0) {
3974 goto error;
3975 }
3976
3977 /*
3978 * Set the CLOEXEC flag. Return code is useless because either way, the
3979 * show must go on.
3980 */
3981 (void) utils_set_fd_cloexec(new_sock);
3982
3983 DBG("Receiving data from client for health...");
3984 ret = lttcomm_recv_unix_sock(new_sock, (void *)&msg, sizeof(msg));
3985 if (ret <= 0) {
3986 DBG("Nothing recv() from client... continuing");
3987 ret = close(new_sock);
3988 if (ret) {
3989 PERROR("close");
3990 }
3991 new_sock = -1;
3992 continue;
3993 }
3994
3995 rcu_thread_online();
3996
3997 memset(&reply, 0, sizeof(reply));
3998 for (i = 0; i < NR_HEALTH_SESSIOND_TYPES; i++) {
3999 /*
4000 * health_check_state returns 0 if health is
4001 * bad.
4002 */
4003 if (!health_check_state(health_sessiond, i)) {
4004 reply.ret_code |= 1ULL << i;
4005 }
4006 }
4007
4008 DBG2("Health check return value %" PRIx64, reply.ret_code);
4009
4010 ret = send_unix_sock(new_sock, (void *) &reply, sizeof(reply));
4011 if (ret < 0) {
4012 ERR("Failed to send health data back to client");
4013 }
4014
4015 /* End of transmission */
4016 ret = close(new_sock);
4017 if (ret) {
4018 PERROR("close");
4019 }
4020 new_sock = -1;
4021 }
4022
4023 exit:
4024 error:
4025 if (err) {
4026 ERR("Health error occurred in %s", __func__);
4027 }
4028 DBG("Health check thread dying");
4029 unlink(health_unix_sock_path);
4030 if (sock >= 0) {
4031 ret = close(sock);
4032 if (ret) {
4033 PERROR("close");
4034 }
4035 }
4036
4037 lttng_poll_clean(&events);
4038
4039 rcu_unregister_thread();
4040 return NULL;
4041 }
4042
4043 /*
4044 * This thread manage all clients request using the unix client socket for
4045 * communication.
4046 */
4047 static void *thread_manage_clients(void *data)
4048 {
4049 int sock = -1, ret, i, pollfd, err = -1;
4050 int sock_error;
4051 uint32_t revents, nb_fd;
4052 struct command_ctx *cmd_ctx = NULL;
4053 struct lttng_poll_event events;
4054
4055 DBG("[thread] Manage client started");
4056
4057 rcu_register_thread();
4058
4059 health_register(health_sessiond, HEALTH_SESSIOND_TYPE_CMD);
4060
4061 health_code_update();
4062
4063 ret = lttcomm_listen_unix_sock(client_sock);
4064 if (ret < 0) {
4065 goto error_listen;
4066 }
4067
4068 /*
4069 * Pass 2 as size here for the thread quit pipe and client_sock. Nothing
4070 * more will be added to this poll set.
4071 */
4072 ret = sessiond_set_thread_pollset(&events, 2);
4073 if (ret < 0) {
4074 goto error_create_poll;
4075 }
4076
4077 /* Add the application registration socket */
4078 ret = lttng_poll_add(&events, client_sock, LPOLLIN | LPOLLPRI);
4079 if (ret < 0) {
4080 goto error;
4081 }
4082
4083 sessiond_notify_ready();
4084 ret = sem_post(&load_info->message_thread_ready);
4085 if (ret) {
4086 PERROR("sem_post message_thread_ready");
4087 goto error;
4088 }
4089
4090 /* This testpoint is after we signal readiness to the parent. */
4091 if (testpoint(sessiond_thread_manage_clients)) {
4092 goto error;
4093 }
4094
4095 if (testpoint(sessiond_thread_manage_clients_before_loop)) {
4096 goto error;
4097 }
4098
4099 health_code_update();
4100
4101 while (1) {
4102 DBG("Accepting client command ...");
4103
4104 /* Inifinite blocking call, waiting for transmission */
4105 restart:
4106 health_poll_entry();
4107 ret = lttng_poll_wait(&events, -1);
4108 health_poll_exit();
4109 if (ret < 0) {
4110 /*
4111 * Restart interrupted system call.
4112 */
4113 if (errno == EINTR) {
4114 goto restart;
4115 }
4116 goto error;
4117 }
4118
4119 nb_fd = ret;
4120
4121 for (i = 0; i < nb_fd; i++) {
4122 /* Fetch once the poll data */
4123 revents = LTTNG_POLL_GETEV(&events, i);
4124 pollfd = LTTNG_POLL_GETFD(&events, i);
4125
4126 health_code_update();
4127
4128 if (!revents) {
4129 /* No activity for this FD (poll implementation). */
4130 continue;
4131 }
4132
4133 /* Thread quit pipe has been closed. Killing thread. */
4134 ret = sessiond_check_thread_quit_pipe(pollfd, revents);
4135 if (ret) {
4136 err = 0;
4137 goto exit;
4138 }
4139
4140 /* Event on the registration socket */
4141 if (pollfd == client_sock) {
4142 if (revents & (LPOLLERR | LPOLLHUP | LPOLLRDHUP)) {
4143 ERR("Client socket poll error");
4144 goto error;
4145 }
4146 }
4147 }
4148
4149 DBG("Wait for client response");
4150
4151 health_code_update();
4152
4153 sock = lttcomm_accept_unix_sock(client_sock);
4154 if (sock < 0) {
4155 goto error;
4156 }
4157
4158 /*
4159 * Set the CLOEXEC flag. Return code is useless because either way, the
4160 * show must go on.
4161 */
4162 (void) utils_set_fd_cloexec(sock);
4163
4164 /* Set socket option for credentials retrieval */
4165 ret = lttcomm_setsockopt_creds_unix_sock(sock);
4166 if (ret < 0) {
4167 goto error;
4168 }
4169
4170 /* Allocate context command to process the client request */
4171 cmd_ctx = zmalloc(sizeof(struct command_ctx));
4172 if (cmd_ctx == NULL) {
4173 PERROR("zmalloc cmd_ctx");
4174 goto error;
4175 }
4176
4177 /* Allocate data buffer for reception */
4178 cmd_ctx->lsm = zmalloc(sizeof(struct lttcomm_session_msg));
4179 if (cmd_ctx->lsm == NULL) {
4180 PERROR("zmalloc cmd_ctx->lsm");
4181 goto error;
4182 }
4183
4184 cmd_ctx->llm = NULL;
4185 cmd_ctx->session = NULL;
4186
4187 health_code_update();
4188
4189 /*
4190 * Data is received from the lttng client. The struct
4191 * lttcomm_session_msg (lsm) contains the command and data request of
4192 * the client.
4193 */
4194 DBG("Receiving data from client ...");
4195 ret = lttcomm_recv_creds_unix_sock(sock, cmd_ctx->lsm,
4196 sizeof(struct lttcomm_session_msg), &cmd_ctx->creds);
4197 if (ret <= 0) {
4198 DBG("Nothing recv() from client... continuing");
4199 ret = close(sock);
4200 if (ret) {
4201 PERROR("close");
4202 }
4203 sock = -1;
4204 clean_command_ctx(&cmd_ctx);
4205 continue;
4206 }
4207
4208 health_code_update();
4209
4210 // TODO: Validate cmd_ctx including sanity check for
4211 // security purpose.
4212
4213 rcu_thread_online();
4214 /*
4215 * This function dispatch the work to the kernel or userspace tracer
4216 * libs and fill the lttcomm_lttng_msg data structure of all the needed
4217 * informations for the client. The command context struct contains
4218 * everything this function may needs.
4219 */
4220 ret = process_client_msg(cmd_ctx, sock, &sock_error);
4221 rcu_thread_offline();
4222 if (ret < 0) {
4223 ret = close(sock);
4224 if (ret) {
4225 PERROR("close");
4226 }
4227 sock = -1;
4228 /*
4229 * TODO: Inform client somehow of the fatal error. At
4230 * this point, ret < 0 means that a zmalloc failed
4231 * (ENOMEM). Error detected but still accept
4232 * command, unless a socket error has been
4233 * detected.
4234 */
4235 clean_command_ctx(&cmd_ctx);
4236 continue;
4237 }
4238
4239 health_code_update();
4240
4241 DBG("Sending response (size: %d, retcode: %s)",
4242 cmd_ctx->lttng_msg_size,
4243 lttng_strerror(-cmd_ctx->llm->ret_code));
4244 ret = send_unix_sock(sock, cmd_ctx->llm, cmd_ctx->lttng_msg_size);
4245 if (ret < 0) {
4246 ERR("Failed to send data back to client");
4247 }
4248
4249 /* End of transmission */
4250 ret = close(sock);
4251 if (ret) {
4252 PERROR("close");
4253 }
4254 sock = -1;
4255
4256 clean_command_ctx(&cmd_ctx);
4257
4258 health_code_update();
4259 }
4260
4261 exit:
4262 error:
4263 if (sock >= 0) {
4264 ret = close(sock);
4265 if (ret) {
4266 PERROR("close");
4267 }
4268 }
4269
4270 lttng_poll_clean(&events);
4271 clean_command_ctx(&cmd_ctx);
4272
4273 error_listen:
4274 error_create_poll:
4275 unlink(client_unix_sock_path);
4276 if (client_sock >= 0) {
4277 ret = close(client_sock);
4278 if (ret) {
4279 PERROR("close");
4280 }
4281 }
4282
4283 if (err) {
4284 health_error();
4285 ERR("Health error occurred in %s", __func__);
4286 }
4287
4288 health_unregister(health_sessiond);
4289
4290 DBG("Client thread dying");
4291
4292 rcu_unregister_thread();
4293
4294 /*
4295 * Since we are creating the consumer threads, we own them, so we need
4296 * to join them before our thread exits.
4297 */
4298 ret = join_consumer_thread(&kconsumer_data);
4299 if (ret) {
4300 errno = ret;
4301 PERROR("join_consumer");
4302 }
4303
4304 ret = join_consumer_thread(&ustconsumer32_data);
4305 if (ret) {
4306 errno = ret;
4307 PERROR("join_consumer ust32");
4308 }
4309
4310 ret = join_consumer_thread(&ustconsumer64_data);
4311 if (ret) {
4312 errno = ret;
4313 PERROR("join_consumer ust64");
4314 }
4315 return NULL;
4316 }
4317
4318
4319 /*
4320 * usage function on stderr
4321 */
4322 static void usage(void)
4323 {
4324 fprintf(stderr, "Usage: %s OPTIONS\n\nOptions:\n", progname);
4325 fprintf(stderr, " -h, --help Display this usage.\n");
4326 fprintf(stderr, " -c, --client-sock PATH Specify path for the client unix socket\n");
4327 fprintf(stderr, " -a, --apps-sock PATH Specify path for apps unix socket\n");
4328 fprintf(stderr, " --kconsumerd-err-sock PATH Specify path for the kernel consumer error socket\n");
4329 fprintf(stderr, " --kconsumerd-cmd-sock PATH Specify path for the kernel consumer command socket\n");
4330 fprintf(stderr, " --ustconsumerd32-err-sock PATH Specify path for the 32-bit UST consumer error socket\n");
4331 fprintf(stderr, " --ustconsumerd64-err-sock PATH Specify path for the 64-bit UST consumer error socket\n");
4332 fprintf(stderr, " --ustconsumerd32-cmd-sock PATH Specify path for the 32-bit UST consumer command socket\n");
4333 fprintf(stderr, " --ustconsumerd64-cmd-sock PATH Specify path for the 64-bit UST consumer command socket\n");
4334 fprintf(stderr, " --consumerd32-path PATH Specify path for the 32-bit UST consumer daemon binary\n");
4335 fprintf(stderr, " --consumerd32-libdir PATH Specify path for the 32-bit UST consumer daemon libraries\n");
4336 fprintf(stderr, " --consumerd64-path PATH Specify path for the 64-bit UST consumer daemon binary\n");
4337 fprintf(stderr, " --consumerd64-libdir PATH Specify path for the 64-bit UST consumer daemon libraries\n");
4338 fprintf(stderr, " -d, --daemonize Start as a daemon.\n");
4339 fprintf(stderr, " -b, --background Start as a daemon, keeping console open.\n");
4340 fprintf(stderr, " -g, --group NAME Specify the tracing group name. (default: tracing)\n");
4341 fprintf(stderr, " -V, --version Show version number.\n");
4342 fprintf(stderr, " -S, --sig-parent Send SIGUSR1 to parent pid to notify readiness.\n");
4343 fprintf(stderr, " -q, --quiet No output at all.\n");
4344 fprintf(stderr, " -v, --verbose Verbose mode. Activate DBG() macro.\n");
4345 fprintf(stderr, " -p, --pidfile FILE Write a pid to FILE name overriding the default value.\n");
4346 fprintf(stderr, " --verbose-consumer Verbose mode for consumer. Activate DBG() macro.\n");
4347 fprintf(stderr, " --no-kernel Disable kernel tracer\n");
4348 fprintf(stderr, " --agent-tcp-port Agent registration TCP port\n");
4349 fprintf(stderr, " -f --config PATH Load daemon configuration file\n");
4350 fprintf(stderr, " -l --load PATH Load session configuration\n");
4351 fprintf(stderr, " --kmod-probes Specify kernel module probes to load\n");
4352 fprintf(stderr, " --extra-kmod-probes Specify extra kernel module probes to load\n");
4353 }
4354
4355 /*
4356 * Take an option from the getopt output and set it in the right variable to be
4357 * used later.
4358 *
4359 * Return 0 on success else a negative value.
4360 */
4361 static int set_option(int opt, const char *arg, const char *optname)
4362 {
4363 int ret = 0;
4364
4365 if (arg && arg[0] == '\0') {
4366 /*
4367 * This only happens if the value is read from daemon config
4368 * file. This means the option requires an argument and the
4369 * configuration file contains a line such as:
4370 * my_option =
4371 */
4372 ret = -EINVAL;
4373 goto end;
4374 }
4375
4376 switch (opt) {
4377 case 0:
4378 fprintf(stderr, "option %s", optname);
4379 if (arg) {
4380 fprintf(stderr, " with arg %s\n", arg);
4381 }
4382 break;
4383 case 'c':
4384 snprintf(client_unix_sock_path, PATH_MAX, "%s", arg);
4385 break;
4386 case 'a':
4387 snprintf(apps_unix_sock_path, PATH_MAX, "%s", arg);
4388 break;
4389 case 'd':
4390 opt_daemon = 1;
4391 break;
4392 case 'b':
4393 opt_background = 1;
4394 break;
4395 case 'g':
4396 /*
4397 * If the override option is set, the pointer points to a
4398 * *non* const thus freeing it even though the variable type is
4399 * set to const.
4400 */
4401 if (tracing_group_name_override) {
4402 free((void *) tracing_group_name);
4403 }
4404 tracing_group_name = strdup(arg);
4405 if (!tracing_group_name) {
4406 PERROR("strdup");
4407 ret = -ENOMEM;
4408 }
4409 tracing_group_name_override = 1;
4410 break;
4411 case 'h':
4412 usage();
4413 exit(EXIT_SUCCESS);
4414 case 'V':
4415 fprintf(stdout, "%s\n", VERSION);
4416 exit(EXIT_SUCCESS);
4417 case 'S':
4418 opt_sig_parent = 1;
4419 break;
4420 case 'E':
4421 snprintf(kconsumer_data.err_unix_sock_path, PATH_MAX, "%s", arg);
4422 break;
4423 case 'C':
4424 snprintf(kconsumer_data.cmd_unix_sock_path, PATH_MAX, "%s", arg);
4425 break;
4426 case 'F':
4427 snprintf(ustconsumer64_data.err_unix_sock_path, PATH_MAX, "%s", arg);
4428 break;
4429 case 'D':
4430 snprintf(ustconsumer64_data.cmd_unix_sock_path, PATH_MAX, "%s", arg);
4431 break;
4432 case 'H':
4433 snprintf(ustconsumer32_data.err_unix_sock_path, PATH_MAX, "%s", arg);
4434 break;
4435 case 'G':
4436 snprintf(ustconsumer32_data.cmd_unix_sock_path, PATH_MAX, "%s", arg);
4437 break;
4438 case 'N':
4439 opt_no_kernel = 1;
4440 break;
4441 case 'q':
4442 lttng_opt_quiet = 1;
4443 break;
4444 case 'v':
4445 /* Verbose level can increase using multiple -v */
4446 if (arg) {
4447 /* Value obtained from config file */
4448 lttng_opt_verbose = config_parse_value(arg);
4449 } else {
4450 /* -v used on command line */
4451 lttng_opt_verbose++;
4452 }
4453 /* Clamp value to [0, 3] */
4454 lttng_opt_verbose = lttng_opt_verbose < 0 ? 0 :
4455 (lttng_opt_verbose <= 3 ? lttng_opt_verbose : 3);
4456 break;
4457 case 'Z':
4458 if (arg) {
4459 opt_verbose_consumer = config_parse_value(arg);
4460 } else {
4461 opt_verbose_consumer += 1;
4462 }
4463 break;
4464 case 'u':
4465 if (consumerd32_bin_override) {
4466 free((void *) consumerd32_bin);
4467 }
4468 consumerd32_bin = strdup(arg);
4469 if (!consumerd32_bin) {
4470 PERROR("strdup");
4471 ret = -ENOMEM;
4472 }
4473 consumerd32_bin_override = 1;
4474 break;
4475 case 'U':
4476 if (consumerd32_libdir_override) {
4477 free((void *) consumerd32_libdir);
4478 }
4479 consumerd32_libdir = strdup(arg);
4480 if (!consumerd32_libdir) {
4481 PERROR("strdup");
4482 ret = -ENOMEM;
4483 }
4484 consumerd32_libdir_override = 1;
4485 break;
4486 case 't':
4487 if (consumerd64_bin_override) {
4488 free((void *) consumerd64_bin);
4489 }
4490 consumerd64_bin = strdup(arg);
4491 if (!consumerd64_bin) {
4492 PERROR("strdup");
4493 ret = -ENOMEM;
4494 }
4495 consumerd64_bin_override = 1;
4496 break;
4497 case 'T':
4498 if (consumerd64_libdir_override) {
4499 free((void *) consumerd64_libdir);
4500 }
4501 consumerd64_libdir = strdup(arg);
4502 if (!consumerd64_libdir) {
4503 PERROR("strdup");
4504 ret = -ENOMEM;
4505 }
4506 consumerd64_libdir_override = 1;
4507 break;
4508 case 'p':
4509 free(opt_pidfile);
4510 opt_pidfile = strdup(arg);
4511 if (!opt_pidfile) {
4512 PERROR("strdup");
4513 ret = -ENOMEM;
4514 }
4515 break;
4516 case 'J': /* Agent TCP port. */
4517 {
4518 unsigned long v;
4519
4520 errno = 0;
4521 v = strtoul(arg, NULL, 0);
4522 if (errno != 0 || !isdigit(arg[0])) {
4523 ERR("Wrong value in --agent-tcp-port parameter: %s", arg);
4524 return -1;
4525 }
4526 if (v == 0 || v >= 65535) {
4527 ERR("Port overflow in --agent-tcp-port parameter: %s", arg);
4528 return -1;
4529 }
4530 agent_tcp_port = (uint32_t) v;
4531 DBG3("Agent TCP port set to non default: %u", agent_tcp_port);
4532 break;
4533 }
4534 case 'l':
4535 free(opt_load_session_path);
4536 opt_load_session_path = strdup(arg);
4537 if (!opt_load_session_path) {
4538 PERROR("strdup");
4539 ret = -ENOMEM;
4540 }
4541 break;
4542 case 'P': /* probe modules list */
4543 free(kmod_probes_list);
4544 kmod_probes_list = strdup(arg);
4545 if (!kmod_probes_list) {
4546 PERROR("strdup");
4547 ret = -ENOMEM;
4548 }
4549 break;
4550 case 'e':
4551 free(kmod_extra_probes_list);
4552 kmod_extra_probes_list = strdup(arg);
4553 if (!kmod_extra_probes_list) {
4554 PERROR("strdup");
4555 ret = -ENOMEM;
4556 }
4557 break;
4558 case 'f':
4559 /* This is handled in set_options() thus silent break. */
4560 break;
4561 default:
4562 /* Unknown option or other error.
4563 * Error is printed by getopt, just return */
4564 ret = -1;
4565 }
4566
4567 end:
4568 if (ret == -EINVAL) {
4569 const char *opt_name = "unknown";
4570 int i;
4571
4572 for (i = 0; i < sizeof(long_options) / sizeof(struct option);
4573 i++) {
4574 if (opt == long_options[i].val) {
4575 opt_name = long_options[i].name;
4576 break;
4577 }
4578 }
4579
4580 WARN("Invalid argument provided for option \"%s\", using default value.",
4581 opt_name);
4582 }
4583
4584 return ret;
4585 }
4586
4587 /*
4588 * config_entry_handler_cb used to handle options read from a config file.
4589 * See config_entry_handler_cb comment in common/config/config.h for the
4590 * return value conventions.
4591 */
4592 static int config_entry_handler(const struct config_entry *entry, void *unused)
4593 {
4594 int ret = 0, i;
4595
4596 if (!entry || !entry->name || !entry->value) {
4597 ret = -EINVAL;
4598 goto end;
4599 }
4600
4601 /* Check if the option is to be ignored */
4602 for (i = 0; i < sizeof(config_ignore_options) / sizeof(char *); i++) {
4603 if (!strcmp(entry->name, config_ignore_options[i])) {
4604 goto end;
4605 }
4606 }
4607
4608 for (i = 0; i < (sizeof(long_options) / sizeof(struct option)) - 1;
4609 i++) {
4610
4611 /* Ignore if not fully matched. */
4612 if (strcmp(entry->name, long_options[i].name)) {
4613 continue;
4614 }
4615
4616 /*
4617 * If the option takes no argument on the command line, we have to
4618 * check if the value is "true". We support non-zero numeric values,
4619 * true, on and yes.
4620 */
4621 if (!long_options[i].has_arg) {
4622 ret = config_parse_value(entry->value);
4623 if (ret <= 0) {
4624 if (ret) {
4625 WARN("Invalid configuration value \"%s\" for option %s",
4626 entry->value, entry->name);
4627 }
4628 /* False, skip boolean config option. */
4629 goto end;
4630 }
4631 }
4632
4633 ret = set_option(long_options[i].val, entry->value, entry->name);
4634 goto end;
4635 }
4636
4637 WARN("Unrecognized option \"%s\" in daemon configuration file.", entry->name);
4638
4639 end:
4640 return ret;
4641 }
4642
4643 /*
4644 * daemon configuration loading and argument parsing
4645 */
4646 static int set_options(int argc, char **argv)
4647 {
4648 int ret = 0, c = 0, option_index = 0;
4649 int orig_optopt = optopt, orig_optind = optind;
4650 char *optstring;
4651 const char *config_path = NULL;
4652
4653 optstring = utils_generate_optstring(long_options,
4654 sizeof(long_options) / sizeof(struct option));
4655 if (!optstring) {
4656 ret = -ENOMEM;
4657 goto end;
4658 }
4659
4660 /* Check for the --config option */
4661 while ((c = getopt_long(argc, argv, optstring, long_options,
4662 &option_index)) != -1) {
4663 if (c == '?') {
4664 ret = -EINVAL;
4665 goto end;
4666 } else if (c != 'f') {
4667 /* if not equal to --config option. */
4668 continue;
4669 }
4670
4671 config_path = utils_expand_path(optarg);
4672 if (!config_path) {
4673 ERR("Failed to resolve path: %s", optarg);
4674 }
4675 }
4676
4677 ret = config_get_section_entries(config_path, config_section_name,
4678 config_entry_handler, NULL);
4679 if (ret) {
4680 if (ret > 0) {
4681 ERR("Invalid configuration option at line %i", ret);
4682 ret = -1;
4683 }
4684 goto end;
4685 }
4686
4687 /* Reset getopt's global state */
4688 optopt = orig_optopt;
4689 optind = orig_optind;
4690 while (1) {
4691 c = getopt_long(argc, argv, optstring, long_options, &option_index);
4692 if (c == -1) {
4693 break;
4694 }
4695
4696 ret = set_option(c, optarg, long_options[option_index].name);
4697 if (ret < 0) {
4698 break;
4699 }
4700 }
4701
4702 end:
4703 free(optstring);
4704 return ret;
4705 }
4706
4707 /*
4708 * Creates the two needed socket by the daemon.
4709 * apps_sock - The communication socket for all UST apps.
4710 * client_sock - The communication of the cli tool (lttng).
4711 */
4712 static int init_daemon_socket(void)
4713 {
4714 int ret = 0;
4715 mode_t old_umask;
4716
4717 old_umask = umask(0);
4718
4719 /* Create client tool unix socket */
4720 client_sock = lttcomm_create_unix_sock(client_unix_sock_path);
4721 if (client_sock < 0) {
4722 ERR("Create unix sock failed: %s", client_unix_sock_path);
4723 ret = -1;
4724 goto end;
4725 }
4726
4727 /* Set the cloexec flag */
4728 ret = utils_set_fd_cloexec(client_sock);
4729 if (ret < 0) {
4730 ERR("Unable to set CLOEXEC flag to the client Unix socket (fd: %d). "
4731 "Continuing but note that the consumer daemon will have a "
4732 "reference to this socket on exec()", client_sock);
4733 }
4734
4735 /* File permission MUST be 660 */
4736 ret = chmod(client_unix_sock_path, S_IRUSR | S_IWUSR | S_IRGRP | S_IWGRP);
4737 if (ret < 0) {
4738 ERR("Set file permissions failed: %s", client_unix_sock_path);
4739 PERROR("chmod");
4740 goto end;
4741 }
4742
4743 /* Create the application unix socket */
4744 apps_sock = lttcomm_create_unix_sock(apps_unix_sock_path);
4745 if (apps_sock < 0) {
4746 ERR("Create unix sock failed: %s", apps_unix_sock_path);
4747 ret = -1;
4748 goto end;
4749 }
4750
4751 /* Set the cloexec flag */
4752 ret = utils_set_fd_cloexec(apps_sock);
4753 if (ret < 0) {
4754 ERR("Unable to set CLOEXEC flag to the app Unix socket (fd: %d). "
4755 "Continuing but note that the consumer daemon will have a "
4756 "reference to this socket on exec()", apps_sock);
4757 }
4758
4759 /* File permission MUST be 666 */
4760 ret = chmod(apps_unix_sock_path,
4761 S_IRUSR | S_IWUSR | S_IRGRP | S_IWGRP | S_IROTH | S_IWOTH);
4762 if (ret < 0) {
4763 ERR("Set file permissions failed: %s", apps_unix_sock_path);
4764 PERROR("chmod");
4765 goto end;
4766 }
4767
4768 DBG3("Session daemon client socket %d and application socket %d created",
4769 client_sock, apps_sock);
4770
4771 end:
4772 umask(old_umask);
4773 return ret;
4774 }
4775
4776 /*
4777 * Check if the global socket is available, and if a daemon is answering at the
4778 * other side. If yes, error is returned.
4779 */
4780 static int check_existing_daemon(void)
4781 {
4782 /* Is there anybody out there ? */
4783 if (lttng_session_daemon_alive()) {
4784 return -EEXIST;
4785 }
4786
4787 return 0;
4788 }
4789
4790 /*
4791 * Set the tracing group gid onto the client socket.
4792 *
4793 * Race window between mkdir and chown is OK because we are going from more
4794 * permissive (root.root) to less permissive (root.tracing).
4795 */
4796 static int set_permissions(char *rundir)
4797 {
4798 int ret;
4799 gid_t gid;
4800
4801 gid = utils_get_group_id(tracing_group_name);
4802
4803 /* Set lttng run dir */
4804 ret = chown(rundir, 0, gid);
4805 if (ret < 0) {
4806 ERR("Unable to set group on %s", rundir);
4807 PERROR("chown");
4808 }
4809
4810 /*
4811 * Ensure all applications and tracing group can search the run
4812 * dir. Allow everyone to read the directory, since it does not
4813 * buy us anything to hide its content.
4814 */
4815 ret = chmod(rundir, S_IRWXU | S_IRGRP | S_IXGRP | S_IROTH | S_IXOTH);
4816 if (ret < 0) {
4817 ERR("Unable to set permissions on %s", rundir);
4818 PERROR("chmod");
4819 }
4820
4821 /* lttng client socket path */
4822 ret = chown(client_unix_sock_path, 0, gid);
4823 if (ret < 0) {
4824 ERR("Unable to set group on %s", client_unix_sock_path);
4825 PERROR("chown");
4826 }
4827
4828 /* kconsumer error socket path */
4829 ret = chown(kconsumer_data.err_unix_sock_path, 0, 0);
4830 if (ret < 0) {
4831 ERR("Unable to set group on %s", kconsumer_data.err_unix_sock_path);
4832 PERROR("chown");
4833 }
4834
4835 /* 64-bit ustconsumer error socket path */
4836 ret = chown(ustconsumer64_data.err_unix_sock_path, 0, 0);
4837 if (ret < 0) {
4838 ERR("Unable to set group on %s", ustconsumer64_data.err_unix_sock_path);
4839 PERROR("chown");
4840 }
4841
4842 /* 32-bit ustconsumer compat32 error socket path */
4843 ret = chown(ustconsumer32_data.err_unix_sock_path, 0, 0);
4844 if (ret < 0) {
4845 ERR("Unable to set group on %s", ustconsumer32_data.err_unix_sock_path);
4846 PERROR("chown");
4847 }
4848
4849 DBG("All permissions are set");
4850
4851 return ret;
4852 }
4853
4854 /*
4855 * Create the lttng run directory needed for all global sockets and pipe.
4856 */
4857 static int create_lttng_rundir(const char *rundir)
4858 {
4859 int ret;
4860
4861 DBG3("Creating LTTng run directory: %s", rundir);
4862
4863 ret = mkdir(rundir, S_IRWXU);
4864 if (ret < 0) {
4865 if (errno != EEXIST) {
4866 ERR("Unable to create %s", rundir);
4867 goto error;
4868 } else {
4869 ret = 0;
4870 }
4871 }
4872
4873 error:
4874 return ret;
4875 }
4876
4877 /*
4878 * Setup sockets and directory needed by the kconsumerd communication with the
4879 * session daemon.
4880 */
4881 static int set_consumer_sockets(struct consumer_data *consumer_data,
4882 const char *rundir)
4883 {
4884 int ret;
4885 char path[PATH_MAX];
4886
4887 switch (consumer_data->type) {
4888 case LTTNG_CONSUMER_KERNEL:
4889 snprintf(path, PATH_MAX, DEFAULT_KCONSUMERD_PATH, rundir);
4890 break;
4891 case LTTNG_CONSUMER64_UST:
4892 snprintf(path, PATH_MAX, DEFAULT_USTCONSUMERD64_PATH, rundir);
4893 break;
4894 case LTTNG_CONSUMER32_UST:
4895 snprintf(path, PATH_MAX, DEFAULT_USTCONSUMERD32_PATH, rundir);
4896 break;
4897 default:
4898 ERR("Consumer type unknown");
4899 ret = -EINVAL;
4900 goto error;
4901 }
4902
4903 DBG2("Creating consumer directory: %s", path);
4904
4905 ret = mkdir(path, S_IRWXU | S_IRGRP | S_IXGRP);
4906 if (ret < 0) {
4907 if (errno != EEXIST) {
4908 PERROR("mkdir");
4909 ERR("Failed to create %s", path);
4910 goto error;
4911 }
4912 ret = -1;
4913 }
4914 if (is_root) {
4915 ret = chown(path, 0, utils_get_group_id(tracing_group_name));
4916 if (ret < 0) {
4917 ERR("Unable to set group on %s", path);
4918 PERROR("chown");
4919 goto error;
4920 }
4921 }
4922
4923 /* Create the kconsumerd error unix socket */
4924 consumer_data->err_sock =
4925 lttcomm_create_unix_sock(consumer_data->err_unix_sock_path);
4926 if (consumer_data->err_sock < 0) {
4927 ERR("Create unix sock failed: %s", consumer_data->err_unix_sock_path);
4928 ret = -1;
4929 goto error;
4930 }
4931
4932 /*
4933 * Set the CLOEXEC flag. Return code is useless because either way, the
4934 * show must go on.
4935 */
4936 ret = utils_set_fd_cloexec(consumer_data->err_sock);
4937 if (ret < 0) {
4938 PERROR("utils_set_fd_cloexec");
4939 /* continue anyway */
4940 }
4941
4942 /* File permission MUST be 660 */
4943 ret = chmod(consumer_data->err_unix_sock_path,
4944 S_IRUSR | S_IWUSR | S_IRGRP | S_IWGRP);
4945 if (ret < 0) {
4946 ERR("Set file permissions failed: %s", consumer_data->err_unix_sock_path);
4947 PERROR("chmod");
4948 goto error;
4949 }
4950
4951 error:
4952 return ret;
4953 }
4954
4955 /*
4956 * Signal handler for the daemon
4957 *
4958 * Simply stop all worker threads, leaving main() return gracefully after
4959 * joining all threads and calling cleanup().
4960 */
4961 static void sighandler(int sig)
4962 {
4963 switch (sig) {
4964 case SIGPIPE:
4965 DBG("SIGPIPE caught");
4966 return;
4967 case SIGINT:
4968 DBG("SIGINT caught");
4969 stop_threads();
4970 break;
4971 case SIGTERM:
4972 DBG("SIGTERM caught");
4973 stop_threads();
4974 break;
4975 case SIGUSR1:
4976 CMM_STORE_SHARED(recv_child_signal, 1);
4977 break;
4978 default:
4979 break;
4980 }
4981 }
4982
4983 /*
4984 * Setup signal handler for :
4985 * SIGINT, SIGTERM, SIGPIPE
4986 */
4987 static int set_signal_handler(void)
4988 {
4989 int ret = 0;
4990 struct sigaction sa;
4991 sigset_t sigset;
4992
4993 if ((ret = sigemptyset(&sigset)) < 0) {
4994 PERROR("sigemptyset");
4995 return ret;
4996 }
4997
4998 sa.sa_handler = sighandler;
4999 sa.sa_mask = sigset;
5000 sa.sa_flags = 0;
5001 if ((ret = sigaction(SIGTERM, &sa, NULL)) < 0) {
5002 PERROR("sigaction");
5003 return ret;
5004 }
5005
5006 if ((ret = sigaction(SIGINT, &sa, NULL)) < 0) {
5007 PERROR("sigaction");
5008 return ret;
5009 }
5010
5011 if ((ret = sigaction(SIGPIPE, &sa, NULL)) < 0) {
5012 PERROR("sigaction");
5013 return ret;
5014 }
5015
5016 if ((ret = sigaction(SIGUSR1, &sa, NULL)) < 0) {
5017 PERROR("sigaction");
5018 return ret;
5019 }
5020
5021 DBG("Signal handler set for SIGTERM, SIGUSR1, SIGPIPE and SIGINT");
5022
5023 return ret;
5024 }
5025
5026 /*
5027 * Set open files limit to unlimited. This daemon can open a large number of
5028 * file descriptors in order to consumer multiple kernel traces.
5029 */
5030 static void set_ulimit(void)
5031 {
5032 int ret;
5033 struct rlimit lim;
5034
5035 /* The kernel does not allowed an infinite limit for open files */
5036 lim.rlim_cur = 65535;
5037 lim.rlim_max = 65535;
5038
5039 ret = setrlimit(RLIMIT_NOFILE, &lim);
5040 if (ret < 0) {
5041 PERROR("failed to set open files limit");
5042 }
5043 }
5044
5045 /*
5046 * Write pidfile using the rundir and opt_pidfile.
5047 */
5048 static int write_pidfile(void)
5049 {
5050 int ret;
5051 char pidfile_path[PATH_MAX];
5052
5053 assert(rundir);
5054
5055 if (opt_pidfile) {
5056 strncpy(pidfile_path, opt_pidfile, sizeof(pidfile_path));
5057 } else {
5058 /* Build pidfile path from rundir and opt_pidfile. */
5059 ret = snprintf(pidfile_path, sizeof(pidfile_path), "%s/"
5060 DEFAULT_LTTNG_SESSIOND_PIDFILE, rundir);
5061 if (ret < 0) {
5062 PERROR("snprintf pidfile path");
5063 goto error;
5064 }
5065 }
5066
5067 /*
5068 * Create pid file in rundir.
5069 */
5070 ret = utils_create_pid_file(getpid(), pidfile_path);
5071 error:
5072 return ret;
5073 }
5074
5075 /*
5076 * Create lockfile using the rundir and return its fd.
5077 */
5078 static int create_lockfile(void)
5079 {
5080 int ret;
5081 char lockfile_path[PATH_MAX];
5082
5083 ret = generate_lock_file_path(lockfile_path, sizeof(lockfile_path));
5084 if (ret < 0) {
5085 goto error;
5086 }
5087
5088 ret = utils_create_lock_file(lockfile_path);
5089 error:
5090 return ret;
5091 }
5092
5093 /*
5094 * Write agent TCP port using the rundir.
5095 */
5096 static int write_agent_port(void)
5097 {
5098 int ret;
5099 char path[PATH_MAX];
5100
5101 assert(rundir);
5102
5103 ret = snprintf(path, sizeof(path), "%s/"
5104 DEFAULT_LTTNG_SESSIOND_AGENTPORT_FILE, rundir);
5105 if (ret < 0) {
5106 PERROR("snprintf agent port path");
5107 goto error;
5108 }
5109
5110 /*
5111 * Create TCP agent port file in rundir.
5112 */
5113 ret = utils_create_pid_file(agent_tcp_port, path);
5114
5115 error:
5116 return ret;
5117 }
5118
5119 /*
5120 * main
5121 */
5122 int main(int argc, char **argv)
5123 {
5124 int ret = 0, retval = 0;
5125 void *status;
5126 const char *home_path, *env_app_timeout;
5127
5128 init_kernel_workarounds();
5129
5130 rcu_register_thread();
5131
5132 if (set_signal_handler()) {
5133 retval = -1;
5134 goto exit_set_signal_handler;
5135 }
5136
5137 setup_consumerd_path();
5138
5139 page_size = sysconf(_SC_PAGESIZE);
5140 if (page_size < 0) {
5141 PERROR("sysconf _SC_PAGESIZE");
5142 page_size = LONG_MAX;
5143 WARN("Fallback page size to %ld", page_size);
5144 }
5145
5146 /*
5147 * Parse arguments and load the daemon configuration file.
5148 *
5149 * We have an exit_options exit path to free memory reserved by
5150 * set_options. This is needed because the rest of sessiond_cleanup()
5151 * depends on ht_cleanup_thread, which depends on lttng_daemonize, which
5152 * depends on set_options.
5153 */
5154 progname = argv[0];
5155 if (set_options(argc, argv)) {
5156 retval = -1;
5157 goto exit_options;
5158 }
5159
5160 /* Daemonize */
5161 if (opt_daemon || opt_background) {
5162 int i;
5163
5164 ret = lttng_daemonize(&child_ppid, &recv_child_signal,
5165 !opt_background);
5166 if (ret < 0) {
5167 retval = -1;
5168 goto exit_options;
5169 }
5170
5171 /*
5172 * We are in the child. Make sure all other file descriptors are
5173 * closed, in case we are called with more opened file
5174 * descriptors than the standard ones.
5175 */
5176 for (i = 3; i < sysconf(_SC_OPEN_MAX); i++) {
5177 (void) close(i);
5178 }
5179 }
5180
5181 /*
5182 * Starting from here, we can create threads. This needs to be after
5183 * lttng_daemonize due to RCU.
5184 */
5185
5186 /*
5187 * Initialize the health check subsystem. This call should set the
5188 * appropriate time values.
5189 */
5190 health_sessiond = health_app_create(NR_HEALTH_SESSIOND_TYPES);
5191 if (!health_sessiond) {
5192 PERROR("health_app_create error");
5193 retval = -1;
5194 goto exit_health_sessiond_cleanup;
5195 }
5196
5197 if (init_ht_cleanup_quit_pipe()) {
5198 retval = -1;
5199 goto exit_ht_cleanup_quit_pipe;
5200 }
5201
5202 /* Setup the thread ht_cleanup communication pipe. */
5203 if (utils_create_pipe_cloexec(ht_cleanup_pipe)) {
5204 retval = -1;
5205 goto exit_ht_cleanup_pipe;
5206 }
5207
5208 /* Create thread to clean up RCU hash tables */
5209 ret = pthread_create(&ht_cleanup_thread, NULL,
5210 thread_ht_cleanup, (void *) NULL);
5211 if (ret) {
5212 errno = ret;
5213 PERROR("pthread_create ht_cleanup");
5214 retval = -1;
5215 goto exit_ht_cleanup;
5216 }
5217
5218 /* Create thread quit pipe */
5219 if (init_thread_quit_pipe()) {
5220 retval = -1;
5221 goto exit_init_data;
5222 }
5223
5224 /* Check if daemon is UID = 0 */
5225 is_root = !getuid();
5226
5227 if (is_root) {
5228 rundir = strdup(DEFAULT_LTTNG_RUNDIR);
5229 if (!rundir) {
5230 retval = -1;
5231 goto exit_init_data;
5232 }
5233
5234 /* Create global run dir with root access */
5235 if (create_lttng_rundir(rundir)) {
5236 retval = -1;
5237 goto exit_init_data;
5238 }
5239
5240 if (strlen(apps_unix_sock_path) == 0) {
5241 ret = snprintf(apps_unix_sock_path, PATH_MAX,
5242 DEFAULT_GLOBAL_APPS_UNIX_SOCK);
5243 if (ret < 0) {
5244 retval = -1;
5245 goto exit_init_data;
5246 }
5247 }
5248
5249 if (strlen(client_unix_sock_path) == 0) {
5250 ret = snprintf(client_unix_sock_path, PATH_MAX,
5251 DEFAULT_GLOBAL_CLIENT_UNIX_SOCK);
5252 if (ret < 0) {
5253 retval = -1;
5254 goto exit_init_data;
5255 }
5256 }
5257
5258 /* Set global SHM for ust */
5259 if (strlen(wait_shm_path) == 0) {
5260 ret = snprintf(wait_shm_path, PATH_MAX,
5261 DEFAULT_GLOBAL_APPS_WAIT_SHM_PATH);
5262 if (ret < 0) {
5263 retval = -1;
5264 goto exit_init_data;
5265 }
5266 }
5267
5268 if (strlen(health_unix_sock_path) == 0) {
5269 ret = snprintf(health_unix_sock_path,
5270 sizeof(health_unix_sock_path),
5271 DEFAULT_GLOBAL_HEALTH_UNIX_SOCK);
5272 if (ret < 0) {
5273 retval = -1;
5274 goto exit_init_data;
5275 }
5276 }
5277
5278 /* Setup kernel consumerd path */
5279 ret = snprintf(kconsumer_data.err_unix_sock_path, PATH_MAX,
5280 DEFAULT_KCONSUMERD_ERR_SOCK_PATH, rundir);
5281 if (ret < 0) {
5282 retval = -1;
5283 goto exit_init_data;
5284 }
5285 ret = snprintf(kconsumer_data.cmd_unix_sock_path, PATH_MAX,
5286 DEFAULT_KCONSUMERD_CMD_SOCK_PATH, rundir);
5287 if (ret < 0) {
5288 retval = -1;
5289 goto exit_init_data;
5290 }
5291
5292 DBG2("Kernel consumer err path: %s",
5293 kconsumer_data.err_unix_sock_path);
5294 DBG2("Kernel consumer cmd path: %s",
5295 kconsumer_data.cmd_unix_sock_path);
5296 } else {
5297 home_path = utils_get_home_dir();
5298 if (home_path == NULL) {
5299 /* TODO: Add --socket PATH option */
5300 ERR("Can't get HOME directory for sockets creation.");
5301 retval = -1;
5302 goto exit_init_data;
5303 }
5304
5305 /*
5306 * Create rundir from home path. This will create something like
5307 * $HOME/.lttng
5308 */
5309 ret = asprintf(&rundir, DEFAULT_LTTNG_HOME_RUNDIR, home_path);
5310 if (ret < 0) {
5311 retval = -1;
5312 goto exit_init_data;
5313 }
5314
5315 if (create_lttng_rundir(rundir)) {
5316 retval = -1;
5317 goto exit_init_data;
5318 }
5319
5320 if (strlen(apps_unix_sock_path) == 0) {
5321 ret = snprintf(apps_unix_sock_path, PATH_MAX,
5322 DEFAULT_HOME_APPS_UNIX_SOCK,
5323 home_path);
5324 if (ret < 0) {
5325 retval = -1;
5326 goto exit_init_data;
5327 }
5328 }
5329
5330 /* Set the cli tool unix socket path */
5331 if (strlen(client_unix_sock_path) == 0) {
5332 ret = snprintf(client_unix_sock_path, PATH_MAX,
5333 DEFAULT_HOME_CLIENT_UNIX_SOCK,
5334 home_path);
5335 if (ret < 0) {
5336 retval = -1;
5337 goto exit_init_data;
5338 }
5339 }
5340
5341 /* Set global SHM for ust */
5342 if (strlen(wait_shm_path) == 0) {
5343 ret = snprintf(wait_shm_path, PATH_MAX,
5344 DEFAULT_HOME_APPS_WAIT_SHM_PATH,
5345 getuid());
5346 if (ret < 0) {
5347 retval = -1;
5348 goto exit_init_data;
5349 }
5350 }
5351
5352 /* Set health check Unix path */
5353 if (strlen(health_unix_sock_path) == 0) {
5354 ret = snprintf(health_unix_sock_path,
5355 sizeof(health_unix_sock_path),
5356 DEFAULT_HOME_HEALTH_UNIX_SOCK,
5357 home_path);
5358 if (ret < 0) {
5359 retval = -1;
5360 goto exit_init_data;
5361 }
5362 }
5363 }
5364
5365 lockfile_fd = create_lockfile();
5366 if (lockfile_fd < 0) {
5367 retval = -1;
5368 goto exit_init_data;
5369 }
5370
5371 /* Set consumer initial state */
5372 kernel_consumerd_state = CONSUMER_STOPPED;
5373 ust_consumerd_state = CONSUMER_STOPPED;
5374
5375 DBG("Client socket path %s", client_unix_sock_path);
5376 DBG("Application socket path %s", apps_unix_sock_path);
5377 DBG("Application wait path %s", wait_shm_path);
5378 DBG("LTTng run directory path: %s", rundir);
5379
5380 /* 32 bits consumerd path setup */
5381 ret = snprintf(ustconsumer32_data.err_unix_sock_path, PATH_MAX,
5382 DEFAULT_USTCONSUMERD32_ERR_SOCK_PATH, rundir);
5383 if (ret < 0) {
5384 PERROR("snprintf 32-bit consumer error socket path");
5385 retval = -1;
5386 goto exit_init_data;
5387 }
5388 ret = snprintf(ustconsumer32_data.cmd_unix_sock_path, PATH_MAX,
5389 DEFAULT_USTCONSUMERD32_CMD_SOCK_PATH, rundir);
5390 if (ret < 0) {
5391 PERROR("snprintf 32-bit consumer command socket path");
5392 retval = -1;
5393 goto exit_init_data;
5394 }
5395
5396 DBG2("UST consumer 32 bits err path: %s",
5397 ustconsumer32_data.err_unix_sock_path);
5398 DBG2("UST consumer 32 bits cmd path: %s",
5399 ustconsumer32_data.cmd_unix_sock_path);
5400
5401 /* 64 bits consumerd path setup */
5402 ret = snprintf(ustconsumer64_data.err_unix_sock_path, PATH_MAX,
5403 DEFAULT_USTCONSUMERD64_ERR_SOCK_PATH, rundir);
5404 if (ret < 0) {
5405 PERROR("snprintf 64-bit consumer error socket path");
5406 retval = -1;
5407 goto exit_init_data;
5408 }
5409 ret = snprintf(ustconsumer64_data.cmd_unix_sock_path, PATH_MAX,
5410 DEFAULT_USTCONSUMERD64_CMD_SOCK_PATH, rundir);
5411 if (ret < 0) {
5412 PERROR("snprintf 64-bit consumer command socket path");
5413 retval = -1;
5414 goto exit_init_data;
5415 }
5416
5417 DBG2("UST consumer 64 bits err path: %s",
5418 ustconsumer64_data.err_unix_sock_path);
5419 DBG2("UST consumer 64 bits cmd path: %s",
5420 ustconsumer64_data.cmd_unix_sock_path);
5421
5422 /*
5423 * See if daemon already exist.
5424 */
5425 if (check_existing_daemon()) {
5426 ERR("Already running daemon.\n");
5427 /*
5428 * We do not goto exit because we must not cleanup()
5429 * because a daemon is already running.
5430 */
5431 retval = -1;
5432 goto exit_init_data;
5433 }
5434
5435 /*
5436 * Init UST app hash table. Alloc hash table before this point since
5437 * cleanup() can get called after that point.
5438 */
5439 if (ust_app_ht_alloc()) {
5440 retval = -1;
5441 goto exit_init_data;
5442 }
5443
5444 /* Initialize agent domain subsystem. */
5445 if (agent_setup()) {
5446 /* ENOMEM at this point. */
5447 retval = -1;
5448 goto exit_init_data;
5449 }
5450
5451 /*
5452 * These actions must be executed as root. We do that *after* setting up
5453 * the sockets path because we MUST make the check for another daemon using
5454 * those paths *before* trying to set the kernel consumer sockets and init
5455 * kernel tracer.
5456 */
5457 if (is_root) {
5458 if (set_consumer_sockets(&kconsumer_data, rundir)) {
5459 retval = -1;
5460 goto exit_init_data;
5461 }
5462
5463 /* Setup kernel tracer */
5464 if (!opt_no_kernel) {
5465 init_kernel_tracer();
5466 if (kernel_tracer_fd >= 0) {
5467 ret = syscall_init_table();
5468 if (ret < 0) {
5469 ERR("Unable to populate syscall table. "
5470 "Syscall tracing won't work "
5471 "for this session daemon.");
5472 }
5473 }
5474 }
5475
5476 /* Set ulimit for open files */
5477 set_ulimit();
5478 }
5479 /* init lttng_fd tracking must be done after set_ulimit. */
5480 lttng_fd_init();
5481
5482 if (set_consumer_sockets(&ustconsumer64_data, rundir)) {
5483 retval = -1;
5484 goto exit_init_data;
5485 }
5486
5487 if (set_consumer_sockets(&ustconsumer32_data, rundir)) {
5488 retval = -1;
5489 goto exit_init_data;
5490 }
5491
5492 /* Setup the needed unix socket */
5493 if (init_daemon_socket()) {
5494 retval = -1;
5495 goto exit_init_data;
5496 }
5497
5498 /* Set credentials to socket */
5499 if (is_root && set_permissions(rundir)) {
5500 retval = -1;
5501 goto exit_init_data;
5502 }
5503
5504 /* Get parent pid if -S, --sig-parent is specified. */
5505 if (opt_sig_parent) {
5506 ppid = getppid();
5507 }
5508
5509 /* Setup the kernel pipe for waking up the kernel thread */
5510 if (is_root && !opt_no_kernel) {
5511 if (utils_create_pipe_cloexec(kernel_poll_pipe)) {
5512 retval = -1;
5513 goto exit_init_data;
5514 }
5515 }
5516
5517 /* Setup the thread apps communication pipe. */
5518 if (utils_create_pipe_cloexec(apps_cmd_pipe)) {
5519 retval = -1;
5520 goto exit_init_data;
5521 }
5522
5523 /* Setup the thread apps notify communication pipe. */
5524 if (utils_create_pipe_cloexec(apps_cmd_notify_pipe)) {
5525 retval = -1;
5526 goto exit_init_data;
5527 }
5528
5529 /* Initialize global buffer per UID and PID registry. */
5530 buffer_reg_init_uid_registry();
5531 buffer_reg_init_pid_registry();
5532
5533 /* Init UST command queue. */
5534 cds_wfcq_init(&ust_cmd_queue.head, &ust_cmd_queue.tail);
5535
5536 /*
5537 * Get session list pointer. This pointer MUST NOT be free'd. This list
5538 * is statically declared in session.c
5539 */
5540 session_list_ptr = session_get_list();
5541
5542 /* Set up max poll set size */
5543 lttng_poll_set_max_size();
5544
5545 cmd_init();
5546
5547 /* Check for the application socket timeout env variable. */
5548 env_app_timeout = getenv(DEFAULT_APP_SOCKET_TIMEOUT_ENV);
5549 if (env_app_timeout) {
5550 app_socket_timeout = atoi(env_app_timeout);
5551 } else {
5552 app_socket_timeout = DEFAULT_APP_SOCKET_RW_TIMEOUT;
5553 }
5554
5555 ret = write_pidfile();
5556 if (ret) {
5557 ERR("Error in write_pidfile");
5558 retval = -1;
5559 goto exit_init_data;
5560 }
5561 ret = write_agent_port();
5562 if (ret) {
5563 ERR("Error in write_agent_port");
5564 retval = -1;
5565 goto exit_init_data;
5566 }
5567
5568 /* Initialize communication library */
5569 lttcomm_init();
5570 /* Initialize TCP timeout values */
5571 lttcomm_inet_init();
5572
5573 if (load_session_init_data(&load_info) < 0) {
5574 retval = -1;
5575 goto exit_init_data;
5576 }
5577 load_info->path = opt_load_session_path;
5578
5579 /* Create health-check thread */
5580 ret = pthread_create(&health_thread, NULL,
5581 thread_manage_health, (void *) NULL);
5582 if (ret) {
5583 errno = ret;
5584 PERROR("pthread_create health");
5585 retval = -1;
5586 goto exit_health;
5587 }
5588
5589 /* Create thread to manage the client socket */
5590 ret = pthread_create(&client_thread, NULL,
5591 thread_manage_clients, (void *) NULL);
5592 if (ret) {
5593 errno = ret;
5594 PERROR("pthread_create clients");
5595 retval = -1;
5596 goto exit_client;
5597 }
5598
5599 /* Create thread to dispatch registration */
5600 ret = pthread_create(&dispatch_thread, NULL,
5601 thread_dispatch_ust_registration, (void *) NULL);
5602 if (ret) {
5603 errno = ret;
5604 PERROR("pthread_create dispatch");
5605 retval = -1;
5606 goto exit_dispatch;
5607 }
5608
5609 /* Create thread to manage application registration. */
5610 ret = pthread_create(&reg_apps_thread, NULL,
5611 thread_registration_apps, (void *) NULL);
5612 if (ret) {
5613 errno = ret;
5614 PERROR("pthread_create registration");
5615 retval = -1;
5616 goto exit_reg_apps;
5617 }
5618
5619 /* Create thread to manage application socket */
5620 ret = pthread_create(&apps_thread, NULL,
5621 thread_manage_apps, (void *) NULL);
5622 if (ret) {
5623 errno = ret;
5624 PERROR("pthread_create apps");
5625 retval = -1;
5626 goto exit_apps;
5627 }
5628
5629 /* Create thread to manage application notify socket */
5630 ret = pthread_create(&apps_notify_thread, NULL,
5631 ust_thread_manage_notify, (void *) NULL);
5632 if (ret) {
5633 errno = ret;
5634 PERROR("pthread_create notify");
5635 retval = -1;
5636 goto exit_apps_notify;
5637 }
5638
5639 /* Create agent registration thread. */
5640 ret = pthread_create(&agent_reg_thread, NULL,
5641 agent_thread_manage_registration, (void *) NULL);
5642 if (ret) {
5643 errno = ret;
5644 PERROR("pthread_create agent");
5645 retval = -1;
5646 goto exit_agent_reg;
5647 }
5648
5649 /* Don't start this thread if kernel tracing is not requested nor root */
5650 if (is_root && !opt_no_kernel) {
5651 /* Create kernel thread to manage kernel event */
5652 ret = pthread_create(&kernel_thread, NULL,
5653 thread_manage_kernel, (void *) NULL);
5654 if (ret) {
5655 errno = ret;
5656 PERROR("pthread_create kernel");
5657 retval = -1;
5658 goto exit_kernel;
5659 }
5660 }
5661
5662 /* Create session loading thread. */
5663 ret = pthread_create(&load_session_thread, NULL, thread_load_session,
5664 load_info);
5665 if (ret) {
5666 errno = ret;
5667 PERROR("pthread_create load_session_thread");
5668 retval = -1;
5669 goto exit_load_session;
5670 }
5671
5672 /*
5673 * This is where we start awaiting program completion (e.g. through
5674 * signal that asks threads to teardown).
5675 */
5676
5677 ret = pthread_join(load_session_thread, &status);
5678 if (ret) {
5679 errno = ret;
5680 PERROR("pthread_join load_session_thread");
5681 retval = -1;
5682 }
5683 exit_load_session:
5684
5685 if (is_root && !opt_no_kernel) {
5686 ret = pthread_join(kernel_thread, &status);
5687 if (ret) {
5688 errno = ret;
5689 PERROR("pthread_join");
5690 retval = -1;
5691 }
5692 }
5693 exit_kernel:
5694
5695 ret = pthread_join(agent_reg_thread, &status);
5696 if (ret) {
5697 errno = ret;
5698 PERROR("pthread_join agent");
5699 retval = -1;
5700 }
5701 exit_agent_reg:
5702
5703 ret = pthread_join(apps_notify_thread, &status);
5704 if (ret) {
5705 errno = ret;
5706 PERROR("pthread_join apps notify");
5707 retval = -1;
5708 }
5709 exit_apps_notify:
5710
5711 ret = pthread_join(apps_thread, &status);
5712 if (ret) {
5713 errno = ret;
5714 PERROR("pthread_join apps");
5715 retval = -1;
5716 }
5717 exit_apps:
5718
5719 ret = pthread_join(reg_apps_thread, &status);
5720 if (ret) {
5721 errno = ret;
5722 PERROR("pthread_join");
5723 retval = -1;
5724 }
5725 exit_reg_apps:
5726
5727 ret = pthread_join(dispatch_thread, &status);
5728 if (ret) {
5729 errno = ret;
5730 PERROR("pthread_join");
5731 retval = -1;
5732 }
5733 exit_dispatch:
5734
5735 ret = pthread_join(client_thread, &status);
5736 if (ret) {
5737 errno = ret;
5738 PERROR("pthread_join");
5739 retval = -1;
5740 }
5741 exit_client:
5742
5743 ret = pthread_join(health_thread, &status);
5744 if (ret) {
5745 errno = ret;
5746 PERROR("pthread_join health thread");
5747 retval = -1;
5748 }
5749 exit_health:
5750
5751 exit_init_data:
5752 /*
5753 * sessiond_cleanup() is called when no other thread is running, except
5754 * the ht_cleanup thread, which is needed to destroy the hash tables.
5755 */
5756 rcu_thread_online();
5757 sessiond_cleanup();
5758 rcu_thread_offline();
5759 rcu_unregister_thread();
5760
5761 ret = notify_thread_pipe(ht_cleanup_quit_pipe[1]);
5762 if (ret < 0) {
5763 ERR("write error on ht_cleanup quit pipe");
5764 retval = -1;
5765 }
5766
5767 ret = pthread_join(ht_cleanup_thread, &status);
5768 if (ret) {
5769 errno = ret;
5770 PERROR("pthread_join ht cleanup thread");
5771 retval = -1;
5772 }
5773 exit_ht_cleanup:
5774
5775 utils_close_pipe(ht_cleanup_pipe);
5776 exit_ht_cleanup_pipe:
5777
5778 /*
5779 * Close the ht_cleanup quit pipe.
5780 */
5781 utils_close_pipe(ht_cleanup_quit_pipe);
5782 exit_ht_cleanup_quit_pipe:
5783
5784 health_app_destroy(health_sessiond);
5785 exit_health_sessiond_cleanup:
5786
5787 exit_options:
5788 sessiond_cleanup_options();
5789
5790 exit_set_signal_handler:
5791 if (!retval) {
5792 exit(EXIT_SUCCESS);
5793 } else {
5794 exit(EXIT_FAILURE);
5795 }
5796 }
This page took 0.77946 seconds and 3 git commands to generate.