Fix: take session list lock when listing tp
[lttng-tools.git] / src / bin / lttng-sessiond / main.c
1 /*
2 * Copyright (C) 2011 - David Goulet <david.goulet@polymtl.ca>
3 * Mathieu Desnoyers <mathieu.desnoyers@efficios.com>
4 *
5 * This program is free software; you can redistribute it and/or modify
6 * it under the terms of the GNU General Public License, version 2 only,
7 * as published by the Free Software Foundation.
8 *
9 * This program is distributed in the hope that it will be useful,
10 * but WITHOUT ANY WARRANTY; without even the implied warranty of
11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 * GNU General Public License for more details.
13 *
14 * You should have received a copy of the GNU General Public License along
15 * with this program; if not, write to the Free Software Foundation, Inc.,
16 * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
17 */
18
19 #define _GNU_SOURCE
20 #include <getopt.h>
21 #include <grp.h>
22 #include <limits.h>
23 #include <paths.h>
24 #include <pthread.h>
25 #include <signal.h>
26 #include <stdio.h>
27 #include <stdlib.h>
28 #include <string.h>
29 #include <inttypes.h>
30 #include <sys/mman.h>
31 #include <sys/mount.h>
32 #include <sys/resource.h>
33 #include <sys/socket.h>
34 #include <sys/stat.h>
35 #include <sys/types.h>
36 #include <sys/wait.h>
37 #include <urcu/uatomic.h>
38 #include <unistd.h>
39 #include <config.h>
40
41 #include <common/common.h>
42 #include <common/compat/socket.h>
43 #include <common/defaults.h>
44 #include <common/kernel-consumer/kernel-consumer.h>
45 #include <common/futex.h>
46 #include <common/relayd/relayd.h>
47 #include <common/utils.h>
48 #include <common/daemonize.h>
49
50 #include "lttng-sessiond.h"
51 #include "buffer-registry.h"
52 #include "channel.h"
53 #include "cmd.h"
54 #include "consumer.h"
55 #include "context.h"
56 #include "event.h"
57 #include "kernel.h"
58 #include "kernel-consumer.h"
59 #include "modprobe.h"
60 #include "shm.h"
61 #include "ust-ctl.h"
62 #include "ust-consumer.h"
63 #include "utils.h"
64 #include "fd-limit.h"
65 #include "health-sessiond.h"
66 #include "testpoint.h"
67 #include "ust-thread.h"
68 #include "jul-thread.h"
69
70 #define CONSUMERD_FILE "lttng-consumerd"
71
72 const char *progname;
73 static const char *tracing_group_name = DEFAULT_TRACING_GROUP;
74 static const char *opt_pidfile;
75 static int opt_sig_parent;
76 static int opt_verbose_consumer;
77 static int opt_daemon, opt_background;
78 static int opt_no_kernel;
79 static pid_t ppid; /* Parent PID for --sig-parent option */
80 static pid_t child_ppid; /* Internal parent PID use with daemonize. */
81 static char *rundir;
82
83 /* Set to 1 when a SIGUSR1 signal is received. */
84 static int recv_child_signal;
85
86 /*
87 * Consumer daemon specific control data. Every value not initialized here is
88 * set to 0 by the static definition.
89 */
90 static struct consumer_data kconsumer_data = {
91 .type = LTTNG_CONSUMER_KERNEL,
92 .err_unix_sock_path = DEFAULT_KCONSUMERD_ERR_SOCK_PATH,
93 .cmd_unix_sock_path = DEFAULT_KCONSUMERD_CMD_SOCK_PATH,
94 .err_sock = -1,
95 .cmd_sock = -1,
96 .pid_mutex = PTHREAD_MUTEX_INITIALIZER,
97 .lock = PTHREAD_MUTEX_INITIALIZER,
98 .cond = PTHREAD_COND_INITIALIZER,
99 .cond_mutex = PTHREAD_MUTEX_INITIALIZER,
100 };
101 static struct consumer_data ustconsumer64_data = {
102 .type = LTTNG_CONSUMER64_UST,
103 .err_unix_sock_path = DEFAULT_USTCONSUMERD64_ERR_SOCK_PATH,
104 .cmd_unix_sock_path = DEFAULT_USTCONSUMERD64_CMD_SOCK_PATH,
105 .err_sock = -1,
106 .cmd_sock = -1,
107 .pid_mutex = PTHREAD_MUTEX_INITIALIZER,
108 .lock = PTHREAD_MUTEX_INITIALIZER,
109 .cond = PTHREAD_COND_INITIALIZER,
110 .cond_mutex = PTHREAD_MUTEX_INITIALIZER,
111 };
112 static struct consumer_data ustconsumer32_data = {
113 .type = LTTNG_CONSUMER32_UST,
114 .err_unix_sock_path = DEFAULT_USTCONSUMERD32_ERR_SOCK_PATH,
115 .cmd_unix_sock_path = DEFAULT_USTCONSUMERD32_CMD_SOCK_PATH,
116 .err_sock = -1,
117 .cmd_sock = -1,
118 .pid_mutex = PTHREAD_MUTEX_INITIALIZER,
119 .lock = PTHREAD_MUTEX_INITIALIZER,
120 .cond = PTHREAD_COND_INITIALIZER,
121 .cond_mutex = PTHREAD_MUTEX_INITIALIZER,
122 };
123
124 /* Shared between threads */
125 static int dispatch_thread_exit;
126
127 /* Global application Unix socket path */
128 static char apps_unix_sock_path[PATH_MAX];
129 /* Global client Unix socket path */
130 static char client_unix_sock_path[PATH_MAX];
131 /* global wait shm path for UST */
132 static char wait_shm_path[PATH_MAX];
133 /* Global health check unix path */
134 static char health_unix_sock_path[PATH_MAX];
135
136 /* Sockets and FDs */
137 static int client_sock = -1;
138 static int apps_sock = -1;
139 int kernel_tracer_fd = -1;
140 static int kernel_poll_pipe[2] = { -1, -1 };
141
142 /*
143 * Quit pipe for all threads. This permits a single cancellation point
144 * for all threads when receiving an event on the pipe.
145 */
146 static int thread_quit_pipe[2] = { -1, -1 };
147
148 /*
149 * This pipe is used to inform the thread managing application communication
150 * that a command is queued and ready to be processed.
151 */
152 static int apps_cmd_pipe[2] = { -1, -1 };
153
154 int apps_cmd_notify_pipe[2] = { -1, -1 };
155
156 /* Pthread, Mutexes and Semaphores */
157 static pthread_t apps_thread;
158 static pthread_t apps_notify_thread;
159 static pthread_t reg_apps_thread;
160 static pthread_t client_thread;
161 static pthread_t kernel_thread;
162 static pthread_t dispatch_thread;
163 static pthread_t health_thread;
164 static pthread_t ht_cleanup_thread;
165 static pthread_t jul_reg_thread;
166
167 /*
168 * UST registration command queue. This queue is tied with a futex and uses a N
169 * wakers / 1 waiter implemented and detailed in futex.c/.h
170 *
171 * The thread_manage_apps and thread_dispatch_ust_registration interact with
172 * this queue and the wait/wake scheme.
173 */
174 static struct ust_cmd_queue ust_cmd_queue;
175
176 /*
177 * Pointer initialized before thread creation.
178 *
179 * This points to the tracing session list containing the session count and a
180 * mutex lock. The lock MUST be taken if you iterate over the list. The lock
181 * MUST NOT be taken if you call a public function in session.c.
182 *
183 * The lock is nested inside the structure: session_list_ptr->lock. Please use
184 * session_lock_list and session_unlock_list for lock acquisition.
185 */
186 static struct ltt_session_list *session_list_ptr;
187
188 int ust_consumerd64_fd = -1;
189 int ust_consumerd32_fd = -1;
190
191 static const char *consumerd32_bin = CONFIG_CONSUMERD32_BIN;
192 static const char *consumerd64_bin = CONFIG_CONSUMERD64_BIN;
193 static const char *consumerd32_libdir = CONFIG_CONSUMERD32_LIBDIR;
194 static const char *consumerd64_libdir = CONFIG_CONSUMERD64_LIBDIR;
195
196 static const char *module_proc_lttng = "/proc/lttng";
197
198 /*
199 * Consumer daemon state which is changed when spawning it, killing it or in
200 * case of a fatal error.
201 */
202 enum consumerd_state {
203 CONSUMER_STARTED = 1,
204 CONSUMER_STOPPED = 2,
205 CONSUMER_ERROR = 3,
206 };
207
208 /*
209 * This consumer daemon state is used to validate if a client command will be
210 * able to reach the consumer. If not, the client is informed. For instance,
211 * doing a "lttng start" when the consumer state is set to ERROR will return an
212 * error to the client.
213 *
214 * The following example shows a possible race condition of this scheme:
215 *
216 * consumer thread error happens
217 * client cmd arrives
218 * client cmd checks state -> still OK
219 * consumer thread exit, sets error
220 * client cmd try to talk to consumer
221 * ...
222 *
223 * However, since the consumer is a different daemon, we have no way of making
224 * sure the command will reach it safely even with this state flag. This is why
225 * we consider that up to the state validation during command processing, the
226 * command is safe. After that, we can not guarantee the correctness of the
227 * client request vis-a-vis the consumer.
228 */
229 static enum consumerd_state ust_consumerd_state;
230 static enum consumerd_state kernel_consumerd_state;
231
232 /*
233 * Socket timeout for receiving and sending in seconds.
234 */
235 static int app_socket_timeout;
236
237 /* Set in main() with the current page size. */
238 long page_size;
239
240 /* Application health monitoring */
241 struct health_app *health_sessiond;
242
243 /* JUL TCP port for registration. Used by the JUL thread. */
244 unsigned int jul_tcp_port = DEFAULT_JUL_TCP_PORT;
245
246 /* Am I root or not. */
247 int is_root; /* Set to 1 if the daemon is running as root */
248
249 /*
250 * Whether sessiond is ready for commands/health check requests.
251 * NR_LTTNG_SESSIOND_READY must match the number of calls to
252 * lttng_sessiond_notify_ready().
253 */
254 #define NR_LTTNG_SESSIOND_READY 2
255 int lttng_sessiond_ready = NR_LTTNG_SESSIOND_READY;
256
257 /* Notify parents that we are ready for cmd and health check */
258 static
259 void lttng_sessiond_notify_ready(void)
260 {
261 if (uatomic_sub_return(&lttng_sessiond_ready, 1) == 0) {
262 /*
263 * Notify parent pid that we are ready to accept command
264 * for client side. This ppid is the one from the
265 * external process that spawned us.
266 */
267 if (opt_sig_parent) {
268 kill(ppid, SIGUSR1);
269 }
270
271 /*
272 * Notify the parent of the fork() process that we are
273 * ready.
274 */
275 if (opt_daemon || opt_background) {
276 kill(child_ppid, SIGUSR1);
277 }
278 }
279 }
280
281 static
282 void setup_consumerd_path(void)
283 {
284 const char *bin, *libdir;
285
286 /*
287 * Allow INSTALL_BIN_PATH to be used as a target path for the
288 * native architecture size consumer if CONFIG_CONSUMER*_PATH
289 * has not been defined.
290 */
291 #if (CAA_BITS_PER_LONG == 32)
292 if (!consumerd32_bin[0]) {
293 consumerd32_bin = INSTALL_BIN_PATH "/" CONSUMERD_FILE;
294 }
295 if (!consumerd32_libdir[0]) {
296 consumerd32_libdir = INSTALL_LIB_PATH;
297 }
298 #elif (CAA_BITS_PER_LONG == 64)
299 if (!consumerd64_bin[0]) {
300 consumerd64_bin = INSTALL_BIN_PATH "/" CONSUMERD_FILE;
301 }
302 if (!consumerd64_libdir[0]) {
303 consumerd64_libdir = INSTALL_LIB_PATH;
304 }
305 #else
306 #error "Unknown bitness"
307 #endif
308
309 /*
310 * runtime env. var. overrides the build default.
311 */
312 bin = getenv("LTTNG_CONSUMERD32_BIN");
313 if (bin) {
314 consumerd32_bin = bin;
315 }
316 bin = getenv("LTTNG_CONSUMERD64_BIN");
317 if (bin) {
318 consumerd64_bin = bin;
319 }
320 libdir = getenv("LTTNG_CONSUMERD32_LIBDIR");
321 if (libdir) {
322 consumerd32_libdir = libdir;
323 }
324 libdir = getenv("LTTNG_CONSUMERD64_LIBDIR");
325 if (libdir) {
326 consumerd64_libdir = libdir;
327 }
328 }
329
330 /*
331 * Create a poll set with O_CLOEXEC and add the thread quit pipe to the set.
332 */
333 int sessiond_set_thread_pollset(struct lttng_poll_event *events, size_t size)
334 {
335 int ret;
336
337 assert(events);
338
339 ret = lttng_poll_create(events, size, LTTNG_CLOEXEC);
340 if (ret < 0) {
341 goto error;
342 }
343
344 /* Add quit pipe */
345 ret = lttng_poll_add(events, thread_quit_pipe[0], LPOLLIN | LPOLLERR);
346 if (ret < 0) {
347 goto error;
348 }
349
350 return 0;
351
352 error:
353 return ret;
354 }
355
356 /*
357 * Check if the thread quit pipe was triggered.
358 *
359 * Return 1 if it was triggered else 0;
360 */
361 int sessiond_check_thread_quit_pipe(int fd, uint32_t events)
362 {
363 if (fd == thread_quit_pipe[0] && (events & LPOLLIN)) {
364 return 1;
365 }
366
367 return 0;
368 }
369
370 /*
371 * Init thread quit pipe.
372 *
373 * Return -1 on error or 0 if all pipes are created.
374 */
375 static int init_thread_quit_pipe(void)
376 {
377 int ret, i;
378
379 ret = pipe(thread_quit_pipe);
380 if (ret < 0) {
381 PERROR("thread quit pipe");
382 goto error;
383 }
384
385 for (i = 0; i < 2; i++) {
386 ret = fcntl(thread_quit_pipe[i], F_SETFD, FD_CLOEXEC);
387 if (ret < 0) {
388 PERROR("fcntl");
389 goto error;
390 }
391 }
392
393 error:
394 return ret;
395 }
396
397 /*
398 * Stop all threads by closing the thread quit pipe.
399 */
400 static void stop_threads(void)
401 {
402 int ret;
403
404 /* Stopping all threads */
405 DBG("Terminating all threads");
406 ret = notify_thread_pipe(thread_quit_pipe[1]);
407 if (ret < 0) {
408 ERR("write error on thread quit pipe");
409 }
410
411 /* Dispatch thread */
412 CMM_STORE_SHARED(dispatch_thread_exit, 1);
413 futex_nto1_wake(&ust_cmd_queue.futex);
414 }
415
416 /*
417 * Close every consumer sockets.
418 */
419 static void close_consumer_sockets(void)
420 {
421 int ret;
422
423 if (kconsumer_data.err_sock >= 0) {
424 ret = close(kconsumer_data.err_sock);
425 if (ret < 0) {
426 PERROR("kernel consumer err_sock close");
427 }
428 }
429 if (ustconsumer32_data.err_sock >= 0) {
430 ret = close(ustconsumer32_data.err_sock);
431 if (ret < 0) {
432 PERROR("UST consumerd32 err_sock close");
433 }
434 }
435 if (ustconsumer64_data.err_sock >= 0) {
436 ret = close(ustconsumer64_data.err_sock);
437 if (ret < 0) {
438 PERROR("UST consumerd64 err_sock close");
439 }
440 }
441 if (kconsumer_data.cmd_sock >= 0) {
442 ret = close(kconsumer_data.cmd_sock);
443 if (ret < 0) {
444 PERROR("kernel consumer cmd_sock close");
445 }
446 }
447 if (ustconsumer32_data.cmd_sock >= 0) {
448 ret = close(ustconsumer32_data.cmd_sock);
449 if (ret < 0) {
450 PERROR("UST consumerd32 cmd_sock close");
451 }
452 }
453 if (ustconsumer64_data.cmd_sock >= 0) {
454 ret = close(ustconsumer64_data.cmd_sock);
455 if (ret < 0) {
456 PERROR("UST consumerd64 cmd_sock close");
457 }
458 }
459 }
460
461 /*
462 * Cleanup the daemon
463 */
464 static void cleanup(void)
465 {
466 int ret;
467 struct ltt_session *sess, *stmp;
468 char path[PATH_MAX];
469
470 DBG("Cleaning up");
471
472 /*
473 * Close the thread quit pipe. It has already done its job,
474 * since we are now called.
475 */
476 utils_close_pipe(thread_quit_pipe);
477
478 /*
479 * If opt_pidfile is undefined, the default file will be wiped when
480 * removing the rundir.
481 */
482 if (opt_pidfile) {
483 ret = remove(opt_pidfile);
484 if (ret < 0) {
485 PERROR("remove pidfile %s", opt_pidfile);
486 }
487 }
488
489 DBG("Removing sessiond and consumerd content of directory %s", rundir);
490
491 /* sessiond */
492 snprintf(path, PATH_MAX,
493 "%s/%s",
494 rundir, DEFAULT_LTTNG_SESSIOND_PIDFILE);
495 DBG("Removing %s", path);
496 (void) unlink(path);
497
498 snprintf(path, PATH_MAX, "%s/%s", rundir,
499 DEFAULT_LTTNG_SESSIOND_JULPORT_FILE);
500 DBG("Removing %s", path);
501 (void) unlink(path);
502
503 /* kconsumerd */
504 snprintf(path, PATH_MAX,
505 DEFAULT_KCONSUMERD_ERR_SOCK_PATH,
506 rundir);
507 DBG("Removing %s", path);
508 (void) unlink(path);
509
510 snprintf(path, PATH_MAX,
511 DEFAULT_KCONSUMERD_PATH,
512 rundir);
513 DBG("Removing directory %s", path);
514 (void) rmdir(path);
515
516 /* ust consumerd 32 */
517 snprintf(path, PATH_MAX,
518 DEFAULT_USTCONSUMERD32_ERR_SOCK_PATH,
519 rundir);
520 DBG("Removing %s", path);
521 (void) unlink(path);
522
523 snprintf(path, PATH_MAX,
524 DEFAULT_USTCONSUMERD32_PATH,
525 rundir);
526 DBG("Removing directory %s", path);
527 (void) rmdir(path);
528
529 /* ust consumerd 64 */
530 snprintf(path, PATH_MAX,
531 DEFAULT_USTCONSUMERD64_ERR_SOCK_PATH,
532 rundir);
533 DBG("Removing %s", path);
534 (void) unlink(path);
535
536 snprintf(path, PATH_MAX,
537 DEFAULT_USTCONSUMERD64_PATH,
538 rundir);
539 DBG("Removing directory %s", path);
540 (void) rmdir(path);
541
542 /*
543 * We do NOT rmdir rundir because there are other processes
544 * using it, for instance lttng-relayd, which can start in
545 * parallel with this teardown.
546 */
547
548 free(rundir);
549
550 DBG("Cleaning up all sessions");
551
552 /* Destroy session list mutex */
553 if (session_list_ptr != NULL) {
554 pthread_mutex_destroy(&session_list_ptr->lock);
555
556 /* Cleanup ALL session */
557 cds_list_for_each_entry_safe(sess, stmp,
558 &session_list_ptr->head, list) {
559 cmd_destroy_session(sess, kernel_poll_pipe[1]);
560 }
561 }
562
563 DBG("Closing all UST sockets");
564 ust_app_clean_list();
565 buffer_reg_destroy_registries();
566
567 if (is_root && !opt_no_kernel) {
568 DBG2("Closing kernel fd");
569 if (kernel_tracer_fd >= 0) {
570 ret = close(kernel_tracer_fd);
571 if (ret) {
572 PERROR("close");
573 }
574 }
575 DBG("Unloading kernel modules");
576 modprobe_remove_lttng_all();
577 }
578
579 close_consumer_sockets();
580
581 /* <fun> */
582 DBG("%c[%d;%dm*** assert failed :-) *** ==> %c[%dm%c[%d;%dm"
583 "Matthew, BEET driven development works!%c[%dm",
584 27, 1, 31, 27, 0, 27, 1, 33, 27, 0);
585 /* </fun> */
586 }
587
588 /*
589 * Send data on a unix socket using the liblttsessiondcomm API.
590 *
591 * Return lttcomm error code.
592 */
593 static int send_unix_sock(int sock, void *buf, size_t len)
594 {
595 /* Check valid length */
596 if (len == 0) {
597 return -1;
598 }
599
600 return lttcomm_send_unix_sock(sock, buf, len);
601 }
602
603 /*
604 * Free memory of a command context structure.
605 */
606 static void clean_command_ctx(struct command_ctx **cmd_ctx)
607 {
608 DBG("Clean command context structure");
609 if (*cmd_ctx) {
610 if ((*cmd_ctx)->llm) {
611 free((*cmd_ctx)->llm);
612 }
613 if ((*cmd_ctx)->lsm) {
614 free((*cmd_ctx)->lsm);
615 }
616 free(*cmd_ctx);
617 *cmd_ctx = NULL;
618 }
619 }
620
621 /*
622 * Notify UST applications using the shm mmap futex.
623 */
624 static int notify_ust_apps(int active)
625 {
626 char *wait_shm_mmap;
627
628 DBG("Notifying applications of session daemon state: %d", active);
629
630 /* See shm.c for this call implying mmap, shm and futex calls */
631 wait_shm_mmap = shm_ust_get_mmap(wait_shm_path, is_root);
632 if (wait_shm_mmap == NULL) {
633 goto error;
634 }
635
636 /* Wake waiting process */
637 futex_wait_update((int32_t *) wait_shm_mmap, active);
638
639 /* Apps notified successfully */
640 return 0;
641
642 error:
643 return -1;
644 }
645
646 /*
647 * Setup the outgoing data buffer for the response (llm) by allocating the
648 * right amount of memory and copying the original information from the lsm
649 * structure.
650 *
651 * Return total size of the buffer pointed by buf.
652 */
653 static int setup_lttng_msg(struct command_ctx *cmd_ctx, size_t size)
654 {
655 int ret, buf_size;
656
657 buf_size = size;
658
659 cmd_ctx->llm = zmalloc(sizeof(struct lttcomm_lttng_msg) + buf_size);
660 if (cmd_ctx->llm == NULL) {
661 PERROR("zmalloc");
662 ret = -ENOMEM;
663 goto error;
664 }
665
666 /* Copy common data */
667 cmd_ctx->llm->cmd_type = cmd_ctx->lsm->cmd_type;
668 cmd_ctx->llm->pid = cmd_ctx->lsm->domain.attr.pid;
669
670 cmd_ctx->llm->data_size = size;
671 cmd_ctx->lttng_msg_size = sizeof(struct lttcomm_lttng_msg) + buf_size;
672
673 return buf_size;
674
675 error:
676 return ret;
677 }
678
679 /*
680 * Update the kernel poll set of all channel fd available over all tracing
681 * session. Add the wakeup pipe at the end of the set.
682 */
683 static int update_kernel_poll(struct lttng_poll_event *events)
684 {
685 int ret;
686 struct ltt_session *session;
687 struct ltt_kernel_channel *channel;
688
689 DBG("Updating kernel poll set");
690
691 session_lock_list();
692 cds_list_for_each_entry(session, &session_list_ptr->head, list) {
693 session_lock(session);
694 if (session->kernel_session == NULL) {
695 session_unlock(session);
696 continue;
697 }
698
699 cds_list_for_each_entry(channel,
700 &session->kernel_session->channel_list.head, list) {
701 /* Add channel fd to the kernel poll set */
702 ret = lttng_poll_add(events, channel->fd, LPOLLIN | LPOLLRDNORM);
703 if (ret < 0) {
704 session_unlock(session);
705 goto error;
706 }
707 DBG("Channel fd %d added to kernel set", channel->fd);
708 }
709 session_unlock(session);
710 }
711 session_unlock_list();
712
713 return 0;
714
715 error:
716 session_unlock_list();
717 return -1;
718 }
719
720 /*
721 * Find the channel fd from 'fd' over all tracing session. When found, check
722 * for new channel stream and send those stream fds to the kernel consumer.
723 *
724 * Useful for CPU hotplug feature.
725 */
726 static int update_kernel_stream(struct consumer_data *consumer_data, int fd)
727 {
728 int ret = 0;
729 struct ltt_session *session;
730 struct ltt_kernel_session *ksess;
731 struct ltt_kernel_channel *channel;
732
733 DBG("Updating kernel streams for channel fd %d", fd);
734
735 session_lock_list();
736 cds_list_for_each_entry(session, &session_list_ptr->head, list) {
737 session_lock(session);
738 if (session->kernel_session == NULL) {
739 session_unlock(session);
740 continue;
741 }
742 ksess = session->kernel_session;
743
744 cds_list_for_each_entry(channel, &ksess->channel_list.head, list) {
745 if (channel->fd == fd) {
746 DBG("Channel found, updating kernel streams");
747 ret = kernel_open_channel_stream(channel);
748 if (ret < 0) {
749 goto error;
750 }
751 /* Update the stream global counter */
752 ksess->stream_count_global += ret;
753
754 /*
755 * Have we already sent fds to the consumer? If yes, it means
756 * that tracing is started so it is safe to send our updated
757 * stream fds.
758 */
759 if (ksess->consumer_fds_sent == 1 && ksess->consumer != NULL) {
760 struct lttng_ht_iter iter;
761 struct consumer_socket *socket;
762
763 rcu_read_lock();
764 cds_lfht_for_each_entry(ksess->consumer->socks->ht,
765 &iter.iter, socket, node.node) {
766 pthread_mutex_lock(socket->lock);
767 ret = kernel_consumer_send_channel_stream(socket,
768 channel, ksess,
769 session->output_traces ? 1 : 0);
770 pthread_mutex_unlock(socket->lock);
771 if (ret < 0) {
772 rcu_read_unlock();
773 goto error;
774 }
775 }
776 rcu_read_unlock();
777 }
778 goto error;
779 }
780 }
781 session_unlock(session);
782 }
783 session_unlock_list();
784 return ret;
785
786 error:
787 session_unlock(session);
788 session_unlock_list();
789 return ret;
790 }
791
792 /*
793 * For each tracing session, update newly registered apps. The session list
794 * lock MUST be acquired before calling this.
795 */
796 static void update_ust_app(int app_sock)
797 {
798 struct ltt_session *sess, *stmp;
799
800 /* Consumer is in an ERROR state. Stop any application update. */
801 if (uatomic_read(&ust_consumerd_state) == CONSUMER_ERROR) {
802 /* Stop the update process since the consumer is dead. */
803 return;
804 }
805
806 /* For all tracing session(s) */
807 cds_list_for_each_entry_safe(sess, stmp, &session_list_ptr->head, list) {
808 session_lock(sess);
809 if (sess->ust_session) {
810 ust_app_global_update(sess->ust_session, app_sock);
811 }
812 session_unlock(sess);
813 }
814 }
815
816 /*
817 * This thread manage event coming from the kernel.
818 *
819 * Features supported in this thread:
820 * -) CPU Hotplug
821 */
822 static void *thread_manage_kernel(void *data)
823 {
824 int ret, i, pollfd, update_poll_flag = 1, err = -1;
825 uint32_t revents, nb_fd;
826 char tmp;
827 struct lttng_poll_event events;
828
829 DBG("[thread] Thread manage kernel started");
830
831 health_register(health_sessiond, HEALTH_SESSIOND_TYPE_KERNEL);
832
833 /*
834 * This first step of the while is to clean this structure which could free
835 * non NULL pointers so initialize it before the loop.
836 */
837 lttng_poll_init(&events);
838
839 if (testpoint(sessiond_thread_manage_kernel)) {
840 goto error_testpoint;
841 }
842
843 health_code_update();
844
845 if (testpoint(sessiond_thread_manage_kernel_before_loop)) {
846 goto error_testpoint;
847 }
848
849 while (1) {
850 health_code_update();
851
852 if (update_poll_flag == 1) {
853 /* Clean events object. We are about to populate it again. */
854 lttng_poll_clean(&events);
855
856 ret = sessiond_set_thread_pollset(&events, 2);
857 if (ret < 0) {
858 goto error_poll_create;
859 }
860
861 ret = lttng_poll_add(&events, kernel_poll_pipe[0], LPOLLIN);
862 if (ret < 0) {
863 goto error;
864 }
865
866 /* This will add the available kernel channel if any. */
867 ret = update_kernel_poll(&events);
868 if (ret < 0) {
869 goto error;
870 }
871 update_poll_flag = 0;
872 }
873
874 DBG("Thread kernel polling on %d fds", LTTNG_POLL_GETNB(&events));
875
876 /* Poll infinite value of time */
877 restart:
878 health_poll_entry();
879 ret = lttng_poll_wait(&events, -1);
880 health_poll_exit();
881 if (ret < 0) {
882 /*
883 * Restart interrupted system call.
884 */
885 if (errno == EINTR) {
886 goto restart;
887 }
888 goto error;
889 } else if (ret == 0) {
890 /* Should not happen since timeout is infinite */
891 ERR("Return value of poll is 0 with an infinite timeout.\n"
892 "This should not have happened! Continuing...");
893 continue;
894 }
895
896 nb_fd = ret;
897
898 for (i = 0; i < nb_fd; i++) {
899 /* Fetch once the poll data */
900 revents = LTTNG_POLL_GETEV(&events, i);
901 pollfd = LTTNG_POLL_GETFD(&events, i);
902
903 health_code_update();
904
905 /* Thread quit pipe has been closed. Killing thread. */
906 ret = sessiond_check_thread_quit_pipe(pollfd, revents);
907 if (ret) {
908 err = 0;
909 goto exit;
910 }
911
912 /* Check for data on kernel pipe */
913 if (pollfd == kernel_poll_pipe[0] && (revents & LPOLLIN)) {
914 (void) lttng_read(kernel_poll_pipe[0],
915 &tmp, 1);
916 /*
917 * Ret value is useless here, if this pipe gets any actions an
918 * update is required anyway.
919 */
920 update_poll_flag = 1;
921 continue;
922 } else {
923 /*
924 * New CPU detected by the kernel. Adding kernel stream to
925 * kernel session and updating the kernel consumer
926 */
927 if (revents & LPOLLIN) {
928 ret = update_kernel_stream(&kconsumer_data, pollfd);
929 if (ret < 0) {
930 continue;
931 }
932 break;
933 /*
934 * TODO: We might want to handle the LPOLLERR | LPOLLHUP
935 * and unregister kernel stream at this point.
936 */
937 }
938 }
939 }
940 }
941
942 exit:
943 error:
944 lttng_poll_clean(&events);
945 error_poll_create:
946 error_testpoint:
947 utils_close_pipe(kernel_poll_pipe);
948 kernel_poll_pipe[0] = kernel_poll_pipe[1] = -1;
949 if (err) {
950 health_error();
951 ERR("Health error occurred in %s", __func__);
952 WARN("Kernel thread died unexpectedly. "
953 "Kernel tracing can continue but CPU hotplug is disabled.");
954 }
955 health_unregister(health_sessiond);
956 DBG("Kernel thread dying");
957 return NULL;
958 }
959
960 /*
961 * Signal pthread condition of the consumer data that the thread.
962 */
963 static void signal_consumer_condition(struct consumer_data *data, int state)
964 {
965 pthread_mutex_lock(&data->cond_mutex);
966
967 /*
968 * The state is set before signaling. It can be any value, it's the waiter
969 * job to correctly interpret this condition variable associated to the
970 * consumer pthread_cond.
971 *
972 * A value of 0 means that the corresponding thread of the consumer data
973 * was not started. 1 indicates that the thread has started and is ready
974 * for action. A negative value means that there was an error during the
975 * thread bootstrap.
976 */
977 data->consumer_thread_is_ready = state;
978 (void) pthread_cond_signal(&data->cond);
979
980 pthread_mutex_unlock(&data->cond_mutex);
981 }
982
983 /*
984 * This thread manage the consumer error sent back to the session daemon.
985 */
986 static void *thread_manage_consumer(void *data)
987 {
988 int sock = -1, i, ret, pollfd, err = -1;
989 uint32_t revents, nb_fd;
990 enum lttcomm_return_code code;
991 struct lttng_poll_event events;
992 struct consumer_data *consumer_data = data;
993
994 DBG("[thread] Manage consumer started");
995
996 health_register(health_sessiond, HEALTH_SESSIOND_TYPE_CONSUMER);
997
998 health_code_update();
999
1000 /*
1001 * Pass 3 as size here for the thread quit pipe, consumerd_err_sock and the
1002 * metadata_sock. Nothing more will be added to this poll set.
1003 */
1004 ret = sessiond_set_thread_pollset(&events, 3);
1005 if (ret < 0) {
1006 goto error_poll;
1007 }
1008
1009 /*
1010 * The error socket here is already in a listening state which was done
1011 * just before spawning this thread to avoid a race between the consumer
1012 * daemon exec trying to connect and the listen() call.
1013 */
1014 ret = lttng_poll_add(&events, consumer_data->err_sock, LPOLLIN | LPOLLRDHUP);
1015 if (ret < 0) {
1016 goto error;
1017 }
1018
1019 health_code_update();
1020
1021 /* Infinite blocking call, waiting for transmission */
1022 restart:
1023 health_poll_entry();
1024
1025 if (testpoint(sessiond_thread_manage_consumer)) {
1026 goto error;
1027 }
1028
1029 ret = lttng_poll_wait(&events, -1);
1030 health_poll_exit();
1031 if (ret < 0) {
1032 /*
1033 * Restart interrupted system call.
1034 */
1035 if (errno == EINTR) {
1036 goto restart;
1037 }
1038 goto error;
1039 }
1040
1041 nb_fd = ret;
1042
1043 for (i = 0; i < nb_fd; i++) {
1044 /* Fetch once the poll data */
1045 revents = LTTNG_POLL_GETEV(&events, i);
1046 pollfd = LTTNG_POLL_GETFD(&events, i);
1047
1048 health_code_update();
1049
1050 /* Thread quit pipe has been closed. Killing thread. */
1051 ret = sessiond_check_thread_quit_pipe(pollfd, revents);
1052 if (ret) {
1053 err = 0;
1054 goto exit;
1055 }
1056
1057 /* Event on the registration socket */
1058 if (pollfd == consumer_data->err_sock) {
1059 if (revents & (LPOLLERR | LPOLLHUP | LPOLLRDHUP)) {
1060 ERR("consumer err socket poll error");
1061 goto error;
1062 }
1063 }
1064 }
1065
1066 sock = lttcomm_accept_unix_sock(consumer_data->err_sock);
1067 if (sock < 0) {
1068 goto error;
1069 }
1070
1071 /*
1072 * Set the CLOEXEC flag. Return code is useless because either way, the
1073 * show must go on.
1074 */
1075 (void) utils_set_fd_cloexec(sock);
1076
1077 health_code_update();
1078
1079 DBG2("Receiving code from consumer err_sock");
1080
1081 /* Getting status code from kconsumerd */
1082 ret = lttcomm_recv_unix_sock(sock, &code,
1083 sizeof(enum lttcomm_return_code));
1084 if (ret <= 0) {
1085 goto error;
1086 }
1087
1088 health_code_update();
1089 if (code == LTTCOMM_CONSUMERD_COMMAND_SOCK_READY) {
1090 /* Connect both socket, command and metadata. */
1091 consumer_data->cmd_sock =
1092 lttcomm_connect_unix_sock(consumer_data->cmd_unix_sock_path);
1093 consumer_data->metadata_fd =
1094 lttcomm_connect_unix_sock(consumer_data->cmd_unix_sock_path);
1095 if (consumer_data->cmd_sock < 0
1096 || consumer_data->metadata_fd < 0) {
1097 PERROR("consumer connect cmd socket");
1098 /* On error, signal condition and quit. */
1099 signal_consumer_condition(consumer_data, -1);
1100 goto error;
1101 }
1102 consumer_data->metadata_sock.fd_ptr = &consumer_data->metadata_fd;
1103 /* Create metadata socket lock. */
1104 consumer_data->metadata_sock.lock = zmalloc(sizeof(pthread_mutex_t));
1105 if (consumer_data->metadata_sock.lock == NULL) {
1106 PERROR("zmalloc pthread mutex");
1107 ret = -1;
1108 goto error;
1109 }
1110 pthread_mutex_init(consumer_data->metadata_sock.lock, NULL);
1111
1112 signal_consumer_condition(consumer_data, 1);
1113 DBG("Consumer command socket ready (fd: %d", consumer_data->cmd_sock);
1114 DBG("Consumer metadata socket ready (fd: %d)",
1115 consumer_data->metadata_fd);
1116 } else {
1117 ERR("consumer error when waiting for SOCK_READY : %s",
1118 lttcomm_get_readable_code(-code));
1119 goto error;
1120 }
1121
1122 /* Remove the consumerd error sock since we've established a connexion */
1123 ret = lttng_poll_del(&events, consumer_data->err_sock);
1124 if (ret < 0) {
1125 goto error;
1126 }
1127
1128 /* Add new accepted error socket. */
1129 ret = lttng_poll_add(&events, sock, LPOLLIN | LPOLLRDHUP);
1130 if (ret < 0) {
1131 goto error;
1132 }
1133
1134 /* Add metadata socket that is successfully connected. */
1135 ret = lttng_poll_add(&events, consumer_data->metadata_fd,
1136 LPOLLIN | LPOLLRDHUP);
1137 if (ret < 0) {
1138 goto error;
1139 }
1140
1141 health_code_update();
1142
1143 /* Infinite blocking call, waiting for transmission */
1144 restart_poll:
1145 while (1) {
1146 health_poll_entry();
1147 ret = lttng_poll_wait(&events, -1);
1148 health_poll_exit();
1149 if (ret < 0) {
1150 /*
1151 * Restart interrupted system call.
1152 */
1153 if (errno == EINTR) {
1154 goto restart_poll;
1155 }
1156 goto error;
1157 }
1158
1159 nb_fd = ret;
1160
1161 for (i = 0; i < nb_fd; i++) {
1162 /* Fetch once the poll data */
1163 revents = LTTNG_POLL_GETEV(&events, i);
1164 pollfd = LTTNG_POLL_GETFD(&events, i);
1165
1166 health_code_update();
1167
1168 /* Thread quit pipe has been closed. Killing thread. */
1169 ret = sessiond_check_thread_quit_pipe(pollfd, revents);
1170 if (ret) {
1171 err = 0;
1172 goto exit;
1173 }
1174
1175 if (pollfd == sock) {
1176 /* Event on the consumerd socket */
1177 if (revents & (LPOLLERR | LPOLLHUP | LPOLLRDHUP)) {
1178 ERR("consumer err socket second poll error");
1179 goto error;
1180 }
1181 health_code_update();
1182 /* Wait for any kconsumerd error */
1183 ret = lttcomm_recv_unix_sock(sock, &code,
1184 sizeof(enum lttcomm_return_code));
1185 if (ret <= 0) {
1186 ERR("consumer closed the command socket");
1187 goto error;
1188 }
1189
1190 ERR("consumer return code : %s",
1191 lttcomm_get_readable_code(-code));
1192
1193 goto exit;
1194 } else if (pollfd == consumer_data->metadata_fd) {
1195 /* UST metadata requests */
1196 ret = ust_consumer_metadata_request(
1197 &consumer_data->metadata_sock);
1198 if (ret < 0) {
1199 ERR("Handling metadata request");
1200 goto error;
1201 }
1202 break;
1203 } else {
1204 ERR("Unknown pollfd");
1205 goto error;
1206 }
1207 }
1208 health_code_update();
1209 }
1210
1211 exit:
1212 error:
1213 /*
1214 * We lock here because we are about to close the sockets and some other
1215 * thread might be using them so get exclusive access which will abort all
1216 * other consumer command by other threads.
1217 */
1218 pthread_mutex_lock(&consumer_data->lock);
1219
1220 /* Immediately set the consumerd state to stopped */
1221 if (consumer_data->type == LTTNG_CONSUMER_KERNEL) {
1222 uatomic_set(&kernel_consumerd_state, CONSUMER_ERROR);
1223 } else if (consumer_data->type == LTTNG_CONSUMER64_UST ||
1224 consumer_data->type == LTTNG_CONSUMER32_UST) {
1225 uatomic_set(&ust_consumerd_state, CONSUMER_ERROR);
1226 } else {
1227 /* Code flow error... */
1228 assert(0);
1229 }
1230
1231 if (consumer_data->err_sock >= 0) {
1232 ret = close(consumer_data->err_sock);
1233 if (ret) {
1234 PERROR("close");
1235 }
1236 consumer_data->err_sock = -1;
1237 }
1238 if (consumer_data->cmd_sock >= 0) {
1239 ret = close(consumer_data->cmd_sock);
1240 if (ret) {
1241 PERROR("close");
1242 }
1243 consumer_data->cmd_sock = -1;
1244 }
1245 if (consumer_data->metadata_sock.fd_ptr &&
1246 *consumer_data->metadata_sock.fd_ptr >= 0) {
1247 ret = close(*consumer_data->metadata_sock.fd_ptr);
1248 if (ret) {
1249 PERROR("close");
1250 }
1251 }
1252 if (sock >= 0) {
1253 ret = close(sock);
1254 if (ret) {
1255 PERROR("close");
1256 }
1257 }
1258
1259 unlink(consumer_data->err_unix_sock_path);
1260 unlink(consumer_data->cmd_unix_sock_path);
1261 consumer_data->pid = 0;
1262 pthread_mutex_unlock(&consumer_data->lock);
1263
1264 /* Cleanup metadata socket mutex. */
1265 if (consumer_data->metadata_sock.lock) {
1266 pthread_mutex_destroy(consumer_data->metadata_sock.lock);
1267 free(consumer_data->metadata_sock.lock);
1268 }
1269 lttng_poll_clean(&events);
1270 error_poll:
1271 if (err) {
1272 health_error();
1273 ERR("Health error occurred in %s", __func__);
1274 }
1275 health_unregister(health_sessiond);
1276 DBG("consumer thread cleanup completed");
1277
1278 return NULL;
1279 }
1280
1281 /*
1282 * This thread manage application communication.
1283 */
1284 static void *thread_manage_apps(void *data)
1285 {
1286 int i, ret, pollfd, err = -1;
1287 ssize_t size_ret;
1288 uint32_t revents, nb_fd;
1289 struct lttng_poll_event events;
1290
1291 DBG("[thread] Manage application started");
1292
1293 rcu_register_thread();
1294 rcu_thread_online();
1295
1296 health_register(health_sessiond, HEALTH_SESSIOND_TYPE_APP_MANAGE);
1297
1298 if (testpoint(sessiond_thread_manage_apps)) {
1299 goto error_testpoint;
1300 }
1301
1302 health_code_update();
1303
1304 ret = sessiond_set_thread_pollset(&events, 2);
1305 if (ret < 0) {
1306 goto error_poll_create;
1307 }
1308
1309 ret = lttng_poll_add(&events, apps_cmd_pipe[0], LPOLLIN | LPOLLRDHUP);
1310 if (ret < 0) {
1311 goto error;
1312 }
1313
1314 if (testpoint(sessiond_thread_manage_apps_before_loop)) {
1315 goto error;
1316 }
1317
1318 health_code_update();
1319
1320 while (1) {
1321 DBG("Apps thread polling on %d fds", LTTNG_POLL_GETNB(&events));
1322
1323 /* Inifinite blocking call, waiting for transmission */
1324 restart:
1325 health_poll_entry();
1326 ret = lttng_poll_wait(&events, -1);
1327 health_poll_exit();
1328 if (ret < 0) {
1329 /*
1330 * Restart interrupted system call.
1331 */
1332 if (errno == EINTR) {
1333 goto restart;
1334 }
1335 goto error;
1336 }
1337
1338 nb_fd = ret;
1339
1340 for (i = 0; i < nb_fd; i++) {
1341 /* Fetch once the poll data */
1342 revents = LTTNG_POLL_GETEV(&events, i);
1343 pollfd = LTTNG_POLL_GETFD(&events, i);
1344
1345 health_code_update();
1346
1347 /* Thread quit pipe has been closed. Killing thread. */
1348 ret = sessiond_check_thread_quit_pipe(pollfd, revents);
1349 if (ret) {
1350 err = 0;
1351 goto exit;
1352 }
1353
1354 /* Inspect the apps cmd pipe */
1355 if (pollfd == apps_cmd_pipe[0]) {
1356 if (revents & (LPOLLERR | LPOLLHUP | LPOLLRDHUP)) {
1357 ERR("Apps command pipe error");
1358 goto error;
1359 } else if (revents & LPOLLIN) {
1360 int sock;
1361
1362 /* Empty pipe */
1363 size_ret = lttng_read(apps_cmd_pipe[0], &sock, sizeof(sock));
1364 if (size_ret < sizeof(sock)) {
1365 PERROR("read apps cmd pipe");
1366 goto error;
1367 }
1368
1369 health_code_update();
1370
1371 /*
1372 * We only monitor the error events of the socket. This
1373 * thread does not handle any incoming data from UST
1374 * (POLLIN).
1375 */
1376 ret = lttng_poll_add(&events, sock,
1377 LPOLLERR | LPOLLHUP | LPOLLRDHUP);
1378 if (ret < 0) {
1379 goto error;
1380 }
1381
1382 DBG("Apps with sock %d added to poll set", sock);
1383 }
1384 } else {
1385 /*
1386 * At this point, we know that a registered application made
1387 * the event at poll_wait.
1388 */
1389 if (revents & (LPOLLERR | LPOLLHUP | LPOLLRDHUP)) {
1390 /* Removing from the poll set */
1391 ret = lttng_poll_del(&events, pollfd);
1392 if (ret < 0) {
1393 goto error;
1394 }
1395
1396 /* Socket closed on remote end. */
1397 ust_app_unregister(pollfd);
1398 }
1399 }
1400
1401 health_code_update();
1402 }
1403 }
1404
1405 exit:
1406 error:
1407 lttng_poll_clean(&events);
1408 error_poll_create:
1409 error_testpoint:
1410 utils_close_pipe(apps_cmd_pipe);
1411 apps_cmd_pipe[0] = apps_cmd_pipe[1] = -1;
1412
1413 /*
1414 * We don't clean the UST app hash table here since already registered
1415 * applications can still be controlled so let them be until the session
1416 * daemon dies or the applications stop.
1417 */
1418
1419 if (err) {
1420 health_error();
1421 ERR("Health error occurred in %s", __func__);
1422 }
1423 health_unregister(health_sessiond);
1424 DBG("Application communication apps thread cleanup complete");
1425 rcu_thread_offline();
1426 rcu_unregister_thread();
1427 return NULL;
1428 }
1429
1430 /*
1431 * Send a socket to a thread This is called from the dispatch UST registration
1432 * thread once all sockets are set for the application.
1433 *
1434 * The sock value can be invalid, we don't really care, the thread will handle
1435 * it and make the necessary cleanup if so.
1436 *
1437 * On success, return 0 else a negative value being the errno message of the
1438 * write().
1439 */
1440 static int send_socket_to_thread(int fd, int sock)
1441 {
1442 ssize_t ret;
1443
1444 /*
1445 * It's possible that the FD is set as invalid with -1 concurrently just
1446 * before calling this function being a shutdown state of the thread.
1447 */
1448 if (fd < 0) {
1449 ret = -EBADF;
1450 goto error;
1451 }
1452
1453 ret = lttng_write(fd, &sock, sizeof(sock));
1454 if (ret < sizeof(sock)) {
1455 PERROR("write apps pipe %d", fd);
1456 if (ret < 0) {
1457 ret = -errno;
1458 }
1459 goto error;
1460 }
1461
1462 /* All good. Don't send back the write positive ret value. */
1463 ret = 0;
1464 error:
1465 return (int) ret;
1466 }
1467
1468 /*
1469 * Sanitize the wait queue of the dispatch registration thread meaning removing
1470 * invalid nodes from it. This is to avoid memory leaks for the case the UST
1471 * notify socket is never received.
1472 */
1473 static void sanitize_wait_queue(struct ust_reg_wait_queue *wait_queue)
1474 {
1475 int ret, nb_fd = 0, i;
1476 unsigned int fd_added = 0;
1477 struct lttng_poll_event events;
1478 struct ust_reg_wait_node *wait_node = NULL, *tmp_wait_node;
1479
1480 assert(wait_queue);
1481
1482 lttng_poll_init(&events);
1483
1484 /* Just skip everything for an empty queue. */
1485 if (!wait_queue->count) {
1486 goto end;
1487 }
1488
1489 ret = lttng_poll_create(&events, wait_queue->count, LTTNG_CLOEXEC);
1490 if (ret < 0) {
1491 goto error_create;
1492 }
1493
1494 cds_list_for_each_entry_safe(wait_node, tmp_wait_node,
1495 &wait_queue->head, head) {
1496 assert(wait_node->app);
1497 ret = lttng_poll_add(&events, wait_node->app->sock,
1498 LPOLLHUP | LPOLLERR);
1499 if (ret < 0) {
1500 goto error;
1501 }
1502
1503 fd_added = 1;
1504 }
1505
1506 if (!fd_added) {
1507 goto end;
1508 }
1509
1510 /*
1511 * Poll but don't block so we can quickly identify the faulty events and
1512 * clean them afterwards from the wait queue.
1513 */
1514 ret = lttng_poll_wait(&events, 0);
1515 if (ret < 0) {
1516 goto error;
1517 }
1518 nb_fd = ret;
1519
1520 for (i = 0; i < nb_fd; i++) {
1521 /* Get faulty FD. */
1522 uint32_t revents = LTTNG_POLL_GETEV(&events, i);
1523 int pollfd = LTTNG_POLL_GETFD(&events, i);
1524
1525 cds_list_for_each_entry_safe(wait_node, tmp_wait_node,
1526 &wait_queue->head, head) {
1527 if (pollfd == wait_node->app->sock &&
1528 (revents & (LPOLLHUP | LPOLLERR))) {
1529 cds_list_del(&wait_node->head);
1530 wait_queue->count--;
1531 ust_app_destroy(wait_node->app);
1532 free(wait_node);
1533 break;
1534 }
1535 }
1536 }
1537
1538 if (nb_fd > 0) {
1539 DBG("Wait queue sanitized, %d node were cleaned up", nb_fd);
1540 }
1541
1542 end:
1543 lttng_poll_clean(&events);
1544 return;
1545
1546 error:
1547 lttng_poll_clean(&events);
1548 error_create:
1549 ERR("Unable to sanitize wait queue");
1550 return;
1551 }
1552
1553 /*
1554 * Dispatch request from the registration threads to the application
1555 * communication thread.
1556 */
1557 static void *thread_dispatch_ust_registration(void *data)
1558 {
1559 int ret, err = -1;
1560 struct cds_wfq_node *node;
1561 struct ust_command *ust_cmd = NULL;
1562 struct ust_reg_wait_node *wait_node = NULL, *tmp_wait_node;
1563 struct ust_reg_wait_queue wait_queue = {
1564 .count = 0,
1565 };
1566
1567 health_register(health_sessiond, HEALTH_SESSIOND_TYPE_APP_REG_DISPATCH);
1568
1569 if (testpoint(sessiond_thread_app_reg_dispatch)) {
1570 goto error_testpoint;
1571 }
1572
1573 health_code_update();
1574
1575 CDS_INIT_LIST_HEAD(&wait_queue.head);
1576
1577 DBG("[thread] Dispatch UST command started");
1578
1579 while (!CMM_LOAD_SHARED(dispatch_thread_exit)) {
1580 health_code_update();
1581
1582 /* Atomically prepare the queue futex */
1583 futex_nto1_prepare(&ust_cmd_queue.futex);
1584
1585 do {
1586 struct ust_app *app = NULL;
1587 ust_cmd = NULL;
1588
1589 /*
1590 * Make sure we don't have node(s) that have hung up before receiving
1591 * the notify socket. This is to clean the list in order to avoid
1592 * memory leaks from notify socket that are never seen.
1593 */
1594 sanitize_wait_queue(&wait_queue);
1595
1596 health_code_update();
1597 /* Dequeue command for registration */
1598 node = cds_wfq_dequeue_blocking(&ust_cmd_queue.queue);
1599 if (node == NULL) {
1600 DBG("Woken up but nothing in the UST command queue");
1601 /* Continue thread execution */
1602 break;
1603 }
1604
1605 ust_cmd = caa_container_of(node, struct ust_command, node);
1606
1607 DBG("Dispatching UST registration pid:%d ppid:%d uid:%d"
1608 " gid:%d sock:%d name:%s (version %d.%d)",
1609 ust_cmd->reg_msg.pid, ust_cmd->reg_msg.ppid,
1610 ust_cmd->reg_msg.uid, ust_cmd->reg_msg.gid,
1611 ust_cmd->sock, ust_cmd->reg_msg.name,
1612 ust_cmd->reg_msg.major, ust_cmd->reg_msg.minor);
1613
1614 if (ust_cmd->reg_msg.type == USTCTL_SOCKET_CMD) {
1615 wait_node = zmalloc(sizeof(*wait_node));
1616 if (!wait_node) {
1617 PERROR("zmalloc wait_node dispatch");
1618 ret = close(ust_cmd->sock);
1619 if (ret < 0) {
1620 PERROR("close ust sock dispatch %d", ust_cmd->sock);
1621 }
1622 lttng_fd_put(LTTNG_FD_APPS, 1);
1623 free(ust_cmd);
1624 goto error;
1625 }
1626 CDS_INIT_LIST_HEAD(&wait_node->head);
1627
1628 /* Create application object if socket is CMD. */
1629 wait_node->app = ust_app_create(&ust_cmd->reg_msg,
1630 ust_cmd->sock);
1631 if (!wait_node->app) {
1632 ret = close(ust_cmd->sock);
1633 if (ret < 0) {
1634 PERROR("close ust sock dispatch %d", ust_cmd->sock);
1635 }
1636 lttng_fd_put(LTTNG_FD_APPS, 1);
1637 free(wait_node);
1638 free(ust_cmd);
1639 continue;
1640 }
1641 /*
1642 * Add application to the wait queue so we can set the notify
1643 * socket before putting this object in the global ht.
1644 */
1645 cds_list_add(&wait_node->head, &wait_queue.head);
1646 wait_queue.count++;
1647
1648 free(ust_cmd);
1649 /*
1650 * We have to continue here since we don't have the notify
1651 * socket and the application MUST be added to the hash table
1652 * only at that moment.
1653 */
1654 continue;
1655 } else {
1656 /*
1657 * Look for the application in the local wait queue and set the
1658 * notify socket if found.
1659 */
1660 cds_list_for_each_entry_safe(wait_node, tmp_wait_node,
1661 &wait_queue.head, head) {
1662 health_code_update();
1663 if (wait_node->app->pid == ust_cmd->reg_msg.pid) {
1664 wait_node->app->notify_sock = ust_cmd->sock;
1665 cds_list_del(&wait_node->head);
1666 wait_queue.count--;
1667 app = wait_node->app;
1668 free(wait_node);
1669 DBG3("UST app notify socket %d is set", ust_cmd->sock);
1670 break;
1671 }
1672 }
1673
1674 /*
1675 * With no application at this stage the received socket is
1676 * basically useless so close it before we free the cmd data
1677 * structure for good.
1678 */
1679 if (!app) {
1680 ret = close(ust_cmd->sock);
1681 if (ret < 0) {
1682 PERROR("close ust sock dispatch %d", ust_cmd->sock);
1683 }
1684 lttng_fd_put(LTTNG_FD_APPS, 1);
1685 }
1686 free(ust_cmd);
1687 }
1688
1689 if (app) {
1690 /*
1691 * @session_lock_list
1692 *
1693 * Lock the global session list so from the register up to the
1694 * registration done message, no thread can see the application
1695 * and change its state.
1696 */
1697 session_lock_list();
1698 rcu_read_lock();
1699
1700 /*
1701 * Add application to the global hash table. This needs to be
1702 * done before the update to the UST registry can locate the
1703 * application.
1704 */
1705 ust_app_add(app);
1706
1707 /* Set app version. This call will print an error if needed. */
1708 (void) ust_app_version(app);
1709
1710 /* Send notify socket through the notify pipe. */
1711 ret = send_socket_to_thread(apps_cmd_notify_pipe[1],
1712 app->notify_sock);
1713 if (ret < 0) {
1714 rcu_read_unlock();
1715 session_unlock_list();
1716 /*
1717 * No notify thread, stop the UST tracing. However, this is
1718 * not an internal error of the this thread thus setting
1719 * the health error code to a normal exit.
1720 */
1721 err = 0;
1722 goto error;
1723 }
1724
1725 /*
1726 * Update newly registered application with the tracing
1727 * registry info already enabled information.
1728 */
1729 update_ust_app(app->sock);
1730
1731 /*
1732 * Don't care about return value. Let the manage apps threads
1733 * handle app unregistration upon socket close.
1734 */
1735 (void) ust_app_register_done(app->sock);
1736
1737 /*
1738 * Even if the application socket has been closed, send the app
1739 * to the thread and unregistration will take place at that
1740 * place.
1741 */
1742 ret = send_socket_to_thread(apps_cmd_pipe[1], app->sock);
1743 if (ret < 0) {
1744 rcu_read_unlock();
1745 session_unlock_list();
1746 /*
1747 * No apps. thread, stop the UST tracing. However, this is
1748 * not an internal error of the this thread thus setting
1749 * the health error code to a normal exit.
1750 */
1751 err = 0;
1752 goto error;
1753 }
1754
1755 rcu_read_unlock();
1756 session_unlock_list();
1757 }
1758 } while (node != NULL);
1759
1760 health_poll_entry();
1761 /* Futex wait on queue. Blocking call on futex() */
1762 futex_nto1_wait(&ust_cmd_queue.futex);
1763 health_poll_exit();
1764 }
1765 /* Normal exit, no error */
1766 err = 0;
1767
1768 error:
1769 /* Clean up wait queue. */
1770 cds_list_for_each_entry_safe(wait_node, tmp_wait_node,
1771 &wait_queue.head, head) {
1772 cds_list_del(&wait_node->head);
1773 wait_queue.count--;
1774 free(wait_node);
1775 }
1776
1777 error_testpoint:
1778 DBG("Dispatch thread dying");
1779 if (err) {
1780 health_error();
1781 ERR("Health error occurred in %s", __func__);
1782 }
1783 health_unregister(health_sessiond);
1784 return NULL;
1785 }
1786
1787 /*
1788 * This thread manage application registration.
1789 */
1790 static void *thread_registration_apps(void *data)
1791 {
1792 int sock = -1, i, ret, pollfd, err = -1;
1793 uint32_t revents, nb_fd;
1794 struct lttng_poll_event events;
1795 /*
1796 * Get allocated in this thread, enqueued to a global queue, dequeued and
1797 * freed in the manage apps thread.
1798 */
1799 struct ust_command *ust_cmd = NULL;
1800
1801 DBG("[thread] Manage application registration started");
1802
1803 health_register(health_sessiond, HEALTH_SESSIOND_TYPE_APP_REG);
1804
1805 if (testpoint(sessiond_thread_registration_apps)) {
1806 goto error_testpoint;
1807 }
1808
1809 ret = lttcomm_listen_unix_sock(apps_sock);
1810 if (ret < 0) {
1811 goto error_listen;
1812 }
1813
1814 /*
1815 * Pass 2 as size here for the thread quit pipe and apps socket. Nothing
1816 * more will be added to this poll set.
1817 */
1818 ret = sessiond_set_thread_pollset(&events, 2);
1819 if (ret < 0) {
1820 goto error_create_poll;
1821 }
1822
1823 /* Add the application registration socket */
1824 ret = lttng_poll_add(&events, apps_sock, LPOLLIN | LPOLLRDHUP);
1825 if (ret < 0) {
1826 goto error_poll_add;
1827 }
1828
1829 /* Notify all applications to register */
1830 ret = notify_ust_apps(1);
1831 if (ret < 0) {
1832 ERR("Failed to notify applications or create the wait shared memory.\n"
1833 "Execution continues but there might be problem for already\n"
1834 "running applications that wishes to register.");
1835 }
1836
1837 while (1) {
1838 DBG("Accepting application registration");
1839
1840 /* Inifinite blocking call, waiting for transmission */
1841 restart:
1842 health_poll_entry();
1843 ret = lttng_poll_wait(&events, -1);
1844 health_poll_exit();
1845 if (ret < 0) {
1846 /*
1847 * Restart interrupted system call.
1848 */
1849 if (errno == EINTR) {
1850 goto restart;
1851 }
1852 goto error;
1853 }
1854
1855 nb_fd = ret;
1856
1857 for (i = 0; i < nb_fd; i++) {
1858 health_code_update();
1859
1860 /* Fetch once the poll data */
1861 revents = LTTNG_POLL_GETEV(&events, i);
1862 pollfd = LTTNG_POLL_GETFD(&events, i);
1863
1864 /* Thread quit pipe has been closed. Killing thread. */
1865 ret = sessiond_check_thread_quit_pipe(pollfd, revents);
1866 if (ret) {
1867 err = 0;
1868 goto exit;
1869 }
1870
1871 /* Event on the registration socket */
1872 if (pollfd == apps_sock) {
1873 if (revents & (LPOLLERR | LPOLLHUP | LPOLLRDHUP)) {
1874 ERR("Register apps socket poll error");
1875 goto error;
1876 } else if (revents & LPOLLIN) {
1877 sock = lttcomm_accept_unix_sock(apps_sock);
1878 if (sock < 0) {
1879 goto error;
1880 }
1881
1882 /*
1883 * Set socket timeout for both receiving and ending.
1884 * app_socket_timeout is in seconds, whereas
1885 * lttcomm_setsockopt_rcv_timeout and
1886 * lttcomm_setsockopt_snd_timeout expect msec as
1887 * parameter.
1888 */
1889 (void) lttcomm_setsockopt_rcv_timeout(sock,
1890 app_socket_timeout * 1000);
1891 (void) lttcomm_setsockopt_snd_timeout(sock,
1892 app_socket_timeout * 1000);
1893
1894 /*
1895 * Set the CLOEXEC flag. Return code is useless because
1896 * either way, the show must go on.
1897 */
1898 (void) utils_set_fd_cloexec(sock);
1899
1900 /* Create UST registration command for enqueuing */
1901 ust_cmd = zmalloc(sizeof(struct ust_command));
1902 if (ust_cmd == NULL) {
1903 PERROR("ust command zmalloc");
1904 goto error;
1905 }
1906
1907 /*
1908 * Using message-based transmissions to ensure we don't
1909 * have to deal with partially received messages.
1910 */
1911 ret = lttng_fd_get(LTTNG_FD_APPS, 1);
1912 if (ret < 0) {
1913 ERR("Exhausted file descriptors allowed for applications.");
1914 free(ust_cmd);
1915 ret = close(sock);
1916 if (ret) {
1917 PERROR("close");
1918 }
1919 sock = -1;
1920 continue;
1921 }
1922
1923 health_code_update();
1924 ret = ust_app_recv_registration(sock, &ust_cmd->reg_msg);
1925 if (ret < 0) {
1926 free(ust_cmd);
1927 /* Close socket of the application. */
1928 ret = close(sock);
1929 if (ret) {
1930 PERROR("close");
1931 }
1932 lttng_fd_put(LTTNG_FD_APPS, 1);
1933 sock = -1;
1934 continue;
1935 }
1936 health_code_update();
1937
1938 ust_cmd->sock = sock;
1939 sock = -1;
1940
1941 DBG("UST registration received with pid:%d ppid:%d uid:%d"
1942 " gid:%d sock:%d name:%s (version %d.%d)",
1943 ust_cmd->reg_msg.pid, ust_cmd->reg_msg.ppid,
1944 ust_cmd->reg_msg.uid, ust_cmd->reg_msg.gid,
1945 ust_cmd->sock, ust_cmd->reg_msg.name,
1946 ust_cmd->reg_msg.major, ust_cmd->reg_msg.minor);
1947
1948 /*
1949 * Lock free enqueue the registration request. The red pill
1950 * has been taken! This apps will be part of the *system*.
1951 */
1952 cds_wfq_enqueue(&ust_cmd_queue.queue, &ust_cmd->node);
1953
1954 /*
1955 * Wake the registration queue futex. Implicit memory
1956 * barrier with the exchange in cds_wfq_enqueue.
1957 */
1958 futex_nto1_wake(&ust_cmd_queue.futex);
1959 }
1960 }
1961 }
1962 }
1963
1964 exit:
1965 error:
1966 /* Notify that the registration thread is gone */
1967 notify_ust_apps(0);
1968
1969 if (apps_sock >= 0) {
1970 ret = close(apps_sock);
1971 if (ret) {
1972 PERROR("close");
1973 }
1974 }
1975 if (sock >= 0) {
1976 ret = close(sock);
1977 if (ret) {
1978 PERROR("close");
1979 }
1980 lttng_fd_put(LTTNG_FD_APPS, 1);
1981 }
1982 unlink(apps_unix_sock_path);
1983
1984 error_poll_add:
1985 lttng_poll_clean(&events);
1986 error_listen:
1987 error_create_poll:
1988 error_testpoint:
1989 DBG("UST Registration thread cleanup complete");
1990 if (err) {
1991 health_error();
1992 ERR("Health error occurred in %s", __func__);
1993 }
1994 health_unregister(health_sessiond);
1995
1996 return NULL;
1997 }
1998
1999 /*
2000 * Start the thread_manage_consumer. This must be done after a lttng-consumerd
2001 * exec or it will fails.
2002 */
2003 static int spawn_consumer_thread(struct consumer_data *consumer_data)
2004 {
2005 int ret, clock_ret;
2006 struct timespec timeout;
2007
2008 /* Make sure we set the readiness flag to 0 because we are NOT ready */
2009 consumer_data->consumer_thread_is_ready = 0;
2010
2011 /* Setup pthread condition */
2012 ret = pthread_condattr_init(&consumer_data->condattr);
2013 if (ret != 0) {
2014 errno = ret;
2015 PERROR("pthread_condattr_init consumer data");
2016 goto error;
2017 }
2018
2019 /*
2020 * Set the monotonic clock in order to make sure we DO NOT jump in time
2021 * between the clock_gettime() call and the timedwait call. See bug #324
2022 * for a more details and how we noticed it.
2023 */
2024 ret = pthread_condattr_setclock(&consumer_data->condattr, CLOCK_MONOTONIC);
2025 if (ret != 0) {
2026 errno = ret;
2027 PERROR("pthread_condattr_setclock consumer data");
2028 goto error;
2029 }
2030
2031 ret = pthread_cond_init(&consumer_data->cond, &consumer_data->condattr);
2032 if (ret != 0) {
2033 errno = ret;
2034 PERROR("pthread_cond_init consumer data");
2035 goto error;
2036 }
2037
2038 ret = pthread_create(&consumer_data->thread, NULL, thread_manage_consumer,
2039 consumer_data);
2040 if (ret != 0) {
2041 PERROR("pthread_create consumer");
2042 ret = -1;
2043 goto error;
2044 }
2045
2046 /* We are about to wait on a pthread condition */
2047 pthread_mutex_lock(&consumer_data->cond_mutex);
2048
2049 /* Get time for sem_timedwait absolute timeout */
2050 clock_ret = clock_gettime(CLOCK_MONOTONIC, &timeout);
2051 /*
2052 * Set the timeout for the condition timed wait even if the clock gettime
2053 * call fails since we might loop on that call and we want to avoid to
2054 * increment the timeout too many times.
2055 */
2056 timeout.tv_sec += DEFAULT_SEM_WAIT_TIMEOUT;
2057
2058 /*
2059 * The following loop COULD be skipped in some conditions so this is why we
2060 * set ret to 0 in order to make sure at least one round of the loop is
2061 * done.
2062 */
2063 ret = 0;
2064
2065 /*
2066 * Loop until the condition is reached or when a timeout is reached. Note
2067 * that the pthread_cond_timedwait(P) man page specifies that EINTR can NOT
2068 * be returned but the pthread_cond(3), from the glibc-doc, says that it is
2069 * possible. This loop does not take any chances and works with both of
2070 * them.
2071 */
2072 while (!consumer_data->consumer_thread_is_ready && ret != ETIMEDOUT) {
2073 if (clock_ret < 0) {
2074 PERROR("clock_gettime spawn consumer");
2075 /* Infinite wait for the consumerd thread to be ready */
2076 ret = pthread_cond_wait(&consumer_data->cond,
2077 &consumer_data->cond_mutex);
2078 } else {
2079 ret = pthread_cond_timedwait(&consumer_data->cond,
2080 &consumer_data->cond_mutex, &timeout);
2081 }
2082 }
2083
2084 /* Release the pthread condition */
2085 pthread_mutex_unlock(&consumer_data->cond_mutex);
2086
2087 if (ret != 0) {
2088 errno = ret;
2089 if (ret == ETIMEDOUT) {
2090 int pth_ret;
2091
2092 /*
2093 * Call has timed out so we kill the kconsumerd_thread and return
2094 * an error.
2095 */
2096 ERR("Condition timed out. The consumer thread was never ready."
2097 " Killing it");
2098 pth_ret = pthread_cancel(consumer_data->thread);
2099 if (pth_ret < 0) {
2100 PERROR("pthread_cancel consumer thread");
2101 }
2102 } else {
2103 PERROR("pthread_cond_wait failed consumer thread");
2104 }
2105 /* Caller is expecting a negative value on failure. */
2106 ret = -1;
2107 goto error;
2108 }
2109
2110 pthread_mutex_lock(&consumer_data->pid_mutex);
2111 if (consumer_data->pid == 0) {
2112 ERR("Consumerd did not start");
2113 pthread_mutex_unlock(&consumer_data->pid_mutex);
2114 goto error;
2115 }
2116 pthread_mutex_unlock(&consumer_data->pid_mutex);
2117
2118 return 0;
2119
2120 error:
2121 return ret;
2122 }
2123
2124 /*
2125 * Join consumer thread
2126 */
2127 static int join_consumer_thread(struct consumer_data *consumer_data)
2128 {
2129 void *status;
2130
2131 /* Consumer pid must be a real one. */
2132 if (consumer_data->pid > 0) {
2133 int ret;
2134 ret = kill(consumer_data->pid, SIGTERM);
2135 if (ret) {
2136 ERR("Error killing consumer daemon");
2137 return ret;
2138 }
2139 return pthread_join(consumer_data->thread, &status);
2140 } else {
2141 return 0;
2142 }
2143 }
2144
2145 /*
2146 * Fork and exec a consumer daemon (consumerd).
2147 *
2148 * Return pid if successful else -1.
2149 */
2150 static pid_t spawn_consumerd(struct consumer_data *consumer_data)
2151 {
2152 int ret;
2153 pid_t pid;
2154 const char *consumer_to_use;
2155 const char *verbosity;
2156 struct stat st;
2157
2158 DBG("Spawning consumerd");
2159
2160 pid = fork();
2161 if (pid == 0) {
2162 /*
2163 * Exec consumerd.
2164 */
2165 if (opt_verbose_consumer) {
2166 verbosity = "--verbose";
2167 } else {
2168 verbosity = "--quiet";
2169 }
2170 switch (consumer_data->type) {
2171 case LTTNG_CONSUMER_KERNEL:
2172 /*
2173 * Find out which consumerd to execute. We will first try the
2174 * 64-bit path, then the sessiond's installation directory, and
2175 * fallback on the 32-bit one,
2176 */
2177 DBG3("Looking for a kernel consumer at these locations:");
2178 DBG3(" 1) %s", consumerd64_bin);
2179 DBG3(" 2) %s/%s", INSTALL_BIN_PATH, CONSUMERD_FILE);
2180 DBG3(" 3) %s", consumerd32_bin);
2181 if (stat(consumerd64_bin, &st) == 0) {
2182 DBG3("Found location #1");
2183 consumer_to_use = consumerd64_bin;
2184 } else if (stat(INSTALL_BIN_PATH "/" CONSUMERD_FILE, &st) == 0) {
2185 DBG3("Found location #2");
2186 consumer_to_use = INSTALL_BIN_PATH "/" CONSUMERD_FILE;
2187 } else if (stat(consumerd32_bin, &st) == 0) {
2188 DBG3("Found location #3");
2189 consumer_to_use = consumerd32_bin;
2190 } else {
2191 DBG("Could not find any valid consumerd executable");
2192 ret = -EINVAL;
2193 break;
2194 }
2195 DBG("Using kernel consumer at: %s", consumer_to_use);
2196 ret = execl(consumer_to_use,
2197 "lttng-consumerd", verbosity, "-k",
2198 "--consumerd-cmd-sock", consumer_data->cmd_unix_sock_path,
2199 "--consumerd-err-sock", consumer_data->err_unix_sock_path,
2200 "--group", tracing_group_name,
2201 NULL);
2202 break;
2203 case LTTNG_CONSUMER64_UST:
2204 {
2205 char *tmpnew = NULL;
2206
2207 if (consumerd64_libdir[0] != '\0') {
2208 char *tmp;
2209 size_t tmplen;
2210
2211 tmp = getenv("LD_LIBRARY_PATH");
2212 if (!tmp) {
2213 tmp = "";
2214 }
2215 tmplen = strlen("LD_LIBRARY_PATH=")
2216 + strlen(consumerd64_libdir) + 1 /* : */ + strlen(tmp);
2217 tmpnew = zmalloc(tmplen + 1 /* \0 */);
2218 if (!tmpnew) {
2219 ret = -ENOMEM;
2220 goto error;
2221 }
2222 strcpy(tmpnew, "LD_LIBRARY_PATH=");
2223 strcat(tmpnew, consumerd64_libdir);
2224 if (tmp[0] != '\0') {
2225 strcat(tmpnew, ":");
2226 strcat(tmpnew, tmp);
2227 }
2228 ret = putenv(tmpnew);
2229 if (ret) {
2230 ret = -errno;
2231 free(tmpnew);
2232 goto error;
2233 }
2234 }
2235 DBG("Using 64-bit UST consumer at: %s", consumerd64_bin);
2236 ret = execl(consumerd64_bin, "lttng-consumerd", verbosity, "-u",
2237 "--consumerd-cmd-sock", consumer_data->cmd_unix_sock_path,
2238 "--consumerd-err-sock", consumer_data->err_unix_sock_path,
2239 "--group", tracing_group_name,
2240 NULL);
2241 if (consumerd64_libdir[0] != '\0') {
2242 free(tmpnew);
2243 }
2244 break;
2245 }
2246 case LTTNG_CONSUMER32_UST:
2247 {
2248 char *tmpnew = NULL;
2249
2250 if (consumerd32_libdir[0] != '\0') {
2251 char *tmp;
2252 size_t tmplen;
2253
2254 tmp = getenv("LD_LIBRARY_PATH");
2255 if (!tmp) {
2256 tmp = "";
2257 }
2258 tmplen = strlen("LD_LIBRARY_PATH=")
2259 + strlen(consumerd32_libdir) + 1 /* : */ + strlen(tmp);
2260 tmpnew = zmalloc(tmplen + 1 /* \0 */);
2261 if (!tmpnew) {
2262 ret = -ENOMEM;
2263 goto error;
2264 }
2265 strcpy(tmpnew, "LD_LIBRARY_PATH=");
2266 strcat(tmpnew, consumerd32_libdir);
2267 if (tmp[0] != '\0') {
2268 strcat(tmpnew, ":");
2269 strcat(tmpnew, tmp);
2270 }
2271 ret = putenv(tmpnew);
2272 if (ret) {
2273 ret = -errno;
2274 free(tmpnew);
2275 goto error;
2276 }
2277 }
2278 DBG("Using 32-bit UST consumer at: %s", consumerd32_bin);
2279 ret = execl(consumerd32_bin, "lttng-consumerd", verbosity, "-u",
2280 "--consumerd-cmd-sock", consumer_data->cmd_unix_sock_path,
2281 "--consumerd-err-sock", consumer_data->err_unix_sock_path,
2282 "--group", tracing_group_name,
2283 NULL);
2284 if (consumerd32_libdir[0] != '\0') {
2285 free(tmpnew);
2286 }
2287 break;
2288 }
2289 default:
2290 PERROR("unknown consumer type");
2291 exit(EXIT_FAILURE);
2292 }
2293 if (errno != 0) {
2294 PERROR("Consumer execl()");
2295 }
2296 /* Reaching this point, we got a failure on our execl(). */
2297 exit(EXIT_FAILURE);
2298 } else if (pid > 0) {
2299 ret = pid;
2300 } else {
2301 PERROR("start consumer fork");
2302 ret = -errno;
2303 }
2304 error:
2305 return ret;
2306 }
2307
2308 /*
2309 * Spawn the consumerd daemon and session daemon thread.
2310 */
2311 static int start_consumerd(struct consumer_data *consumer_data)
2312 {
2313 int ret;
2314
2315 /*
2316 * Set the listen() state on the socket since there is a possible race
2317 * between the exec() of the consumer daemon and this call if place in the
2318 * consumer thread. See bug #366 for more details.
2319 */
2320 ret = lttcomm_listen_unix_sock(consumer_data->err_sock);
2321 if (ret < 0) {
2322 goto error;
2323 }
2324
2325 pthread_mutex_lock(&consumer_data->pid_mutex);
2326 if (consumer_data->pid != 0) {
2327 pthread_mutex_unlock(&consumer_data->pid_mutex);
2328 goto end;
2329 }
2330
2331 ret = spawn_consumerd(consumer_data);
2332 if (ret < 0) {
2333 ERR("Spawning consumerd failed");
2334 pthread_mutex_unlock(&consumer_data->pid_mutex);
2335 goto error;
2336 }
2337
2338 /* Setting up the consumer_data pid */
2339 consumer_data->pid = ret;
2340 DBG2("Consumer pid %d", consumer_data->pid);
2341 pthread_mutex_unlock(&consumer_data->pid_mutex);
2342
2343 DBG2("Spawning consumer control thread");
2344 ret = spawn_consumer_thread(consumer_data);
2345 if (ret < 0) {
2346 ERR("Fatal error spawning consumer control thread");
2347 goto error;
2348 }
2349
2350 end:
2351 return 0;
2352
2353 error:
2354 /* Cleanup already created sockets on error. */
2355 if (consumer_data->err_sock >= 0) {
2356 int err;
2357
2358 err = close(consumer_data->err_sock);
2359 if (err < 0) {
2360 PERROR("close consumer data error socket");
2361 }
2362 }
2363 return ret;
2364 }
2365
2366 /*
2367 * Setup necessary data for kernel tracer action.
2368 */
2369 static int init_kernel_tracer(void)
2370 {
2371 int ret;
2372
2373 /* Modprobe lttng kernel modules */
2374 ret = modprobe_lttng_control();
2375 if (ret < 0) {
2376 goto error;
2377 }
2378
2379 /* Open debugfs lttng */
2380 kernel_tracer_fd = open(module_proc_lttng, O_RDWR);
2381 if (kernel_tracer_fd < 0) {
2382 DBG("Failed to open %s", module_proc_lttng);
2383 ret = -1;
2384 goto error_open;
2385 }
2386
2387 /* Validate kernel version */
2388 ret = kernel_validate_version(kernel_tracer_fd);
2389 if (ret < 0) {
2390 goto error_version;
2391 }
2392
2393 ret = modprobe_lttng_data();
2394 if (ret < 0) {
2395 goto error_modules;
2396 }
2397
2398 DBG("Kernel tracer fd %d", kernel_tracer_fd);
2399 return 0;
2400
2401 error_version:
2402 modprobe_remove_lttng_control();
2403 ret = close(kernel_tracer_fd);
2404 if (ret) {
2405 PERROR("close");
2406 }
2407 kernel_tracer_fd = -1;
2408 return LTTNG_ERR_KERN_VERSION;
2409
2410 error_modules:
2411 ret = close(kernel_tracer_fd);
2412 if (ret) {
2413 PERROR("close");
2414 }
2415
2416 error_open:
2417 modprobe_remove_lttng_control();
2418
2419 error:
2420 WARN("No kernel tracer available");
2421 kernel_tracer_fd = -1;
2422 if (!is_root) {
2423 return LTTNG_ERR_NEED_ROOT_SESSIOND;
2424 } else {
2425 return LTTNG_ERR_KERN_NA;
2426 }
2427 }
2428
2429
2430 /*
2431 * Copy consumer output from the tracing session to the domain session. The
2432 * function also applies the right modification on a per domain basis for the
2433 * trace files destination directory.
2434 *
2435 * Should *NOT* be called with RCU read-side lock held.
2436 */
2437 static int copy_session_consumer(int domain, struct ltt_session *session)
2438 {
2439 int ret;
2440 const char *dir_name;
2441 struct consumer_output *consumer;
2442
2443 assert(session);
2444 assert(session->consumer);
2445
2446 switch (domain) {
2447 case LTTNG_DOMAIN_KERNEL:
2448 DBG3("Copying tracing session consumer output in kernel session");
2449 /*
2450 * XXX: We should audit the session creation and what this function
2451 * does "extra" in order to avoid a destroy since this function is used
2452 * in the domain session creation (kernel and ust) only. Same for UST
2453 * domain.
2454 */
2455 if (session->kernel_session->consumer) {
2456 consumer_destroy_output(session->kernel_session->consumer);
2457 }
2458 session->kernel_session->consumer =
2459 consumer_copy_output(session->consumer);
2460 /* Ease our life a bit for the next part */
2461 consumer = session->kernel_session->consumer;
2462 dir_name = DEFAULT_KERNEL_TRACE_DIR;
2463 break;
2464 case LTTNG_DOMAIN_JUL:
2465 case LTTNG_DOMAIN_UST:
2466 DBG3("Copying tracing session consumer output in UST session");
2467 if (session->ust_session->consumer) {
2468 consumer_destroy_output(session->ust_session->consumer);
2469 }
2470 session->ust_session->consumer =
2471 consumer_copy_output(session->consumer);
2472 /* Ease our life a bit for the next part */
2473 consumer = session->ust_session->consumer;
2474 dir_name = DEFAULT_UST_TRACE_DIR;
2475 break;
2476 default:
2477 ret = LTTNG_ERR_UNKNOWN_DOMAIN;
2478 goto error;
2479 }
2480
2481 /* Append correct directory to subdir */
2482 strncat(consumer->subdir, dir_name,
2483 sizeof(consumer->subdir) - strlen(consumer->subdir) - 1);
2484 DBG3("Copy session consumer subdir %s", consumer->subdir);
2485
2486 ret = LTTNG_OK;
2487
2488 error:
2489 return ret;
2490 }
2491
2492 /*
2493 * Create an UST session and add it to the session ust list.
2494 *
2495 * Should *NOT* be called with RCU read-side lock held.
2496 */
2497 static int create_ust_session(struct ltt_session *session,
2498 struct lttng_domain *domain)
2499 {
2500 int ret;
2501 struct ltt_ust_session *lus = NULL;
2502
2503 assert(session);
2504 assert(domain);
2505 assert(session->consumer);
2506
2507 switch (domain->type) {
2508 case LTTNG_DOMAIN_JUL:
2509 case LTTNG_DOMAIN_UST:
2510 break;
2511 default:
2512 ERR("Unknown UST domain on create session %d", domain->type);
2513 ret = LTTNG_ERR_UNKNOWN_DOMAIN;
2514 goto error;
2515 }
2516
2517 DBG("Creating UST session");
2518
2519 lus = trace_ust_create_session(session->id);
2520 if (lus == NULL) {
2521 ret = LTTNG_ERR_UST_SESS_FAIL;
2522 goto error;
2523 }
2524
2525 lus->uid = session->uid;
2526 lus->gid = session->gid;
2527 lus->output_traces = session->output_traces;
2528 lus->snapshot_mode = session->snapshot_mode;
2529 lus->live_timer_interval = session->live_timer;
2530 session->ust_session = lus;
2531
2532 /* Copy session output to the newly created UST session */
2533 ret = copy_session_consumer(domain->type, session);
2534 if (ret != LTTNG_OK) {
2535 goto error;
2536 }
2537
2538 return LTTNG_OK;
2539
2540 error:
2541 free(lus);
2542 session->ust_session = NULL;
2543 return ret;
2544 }
2545
2546 /*
2547 * Create a kernel tracer session then create the default channel.
2548 */
2549 static int create_kernel_session(struct ltt_session *session)
2550 {
2551 int ret;
2552
2553 DBG("Creating kernel session");
2554
2555 ret = kernel_create_session(session, kernel_tracer_fd);
2556 if (ret < 0) {
2557 ret = LTTNG_ERR_KERN_SESS_FAIL;
2558 goto error;
2559 }
2560
2561 /* Code flow safety */
2562 assert(session->kernel_session);
2563
2564 /* Copy session output to the newly created Kernel session */
2565 ret = copy_session_consumer(LTTNG_DOMAIN_KERNEL, session);
2566 if (ret != LTTNG_OK) {
2567 goto error;
2568 }
2569
2570 /* Create directory(ies) on local filesystem. */
2571 if (session->kernel_session->consumer->type == CONSUMER_DST_LOCAL &&
2572 strlen(session->kernel_session->consumer->dst.trace_path) > 0) {
2573 ret = run_as_mkdir_recursive(
2574 session->kernel_session->consumer->dst.trace_path,
2575 S_IRWXU | S_IRWXG, session->uid, session->gid);
2576 if (ret < 0) {
2577 if (ret != -EEXIST) {
2578 ERR("Trace directory creation error");
2579 goto error;
2580 }
2581 }
2582 }
2583
2584 session->kernel_session->uid = session->uid;
2585 session->kernel_session->gid = session->gid;
2586 session->kernel_session->output_traces = session->output_traces;
2587 session->kernel_session->snapshot_mode = session->snapshot_mode;
2588
2589 return LTTNG_OK;
2590
2591 error:
2592 trace_kernel_destroy_session(session->kernel_session);
2593 session->kernel_session = NULL;
2594 return ret;
2595 }
2596
2597 /*
2598 * Count number of session permitted by uid/gid.
2599 */
2600 static unsigned int lttng_sessions_count(uid_t uid, gid_t gid)
2601 {
2602 unsigned int i = 0;
2603 struct ltt_session *session;
2604
2605 DBG("Counting number of available session for UID %d GID %d",
2606 uid, gid);
2607 cds_list_for_each_entry(session, &session_list_ptr->head, list) {
2608 /*
2609 * Only list the sessions the user can control.
2610 */
2611 if (!session_access_ok(session, uid, gid)) {
2612 continue;
2613 }
2614 i++;
2615 }
2616 return i;
2617 }
2618
2619 /*
2620 * Process the command requested by the lttng client within the command
2621 * context structure. This function make sure that the return structure (llm)
2622 * is set and ready for transmission before returning.
2623 *
2624 * Return any error encountered or 0 for success.
2625 *
2626 * "sock" is only used for special-case var. len data.
2627 *
2628 * Should *NOT* be called with RCU read-side lock held.
2629 */
2630 static int process_client_msg(struct command_ctx *cmd_ctx, int sock,
2631 int *sock_error)
2632 {
2633 int ret = LTTNG_OK;
2634 int need_tracing_session = 1;
2635 int need_domain;
2636
2637 DBG("Processing client command %d", cmd_ctx->lsm->cmd_type);
2638
2639 *sock_error = 0;
2640
2641 switch (cmd_ctx->lsm->cmd_type) {
2642 case LTTNG_CREATE_SESSION:
2643 case LTTNG_CREATE_SESSION_SNAPSHOT:
2644 case LTTNG_CREATE_SESSION_LIVE:
2645 case LTTNG_DESTROY_SESSION:
2646 case LTTNG_LIST_SESSIONS:
2647 case LTTNG_LIST_DOMAINS:
2648 case LTTNG_START_TRACE:
2649 case LTTNG_STOP_TRACE:
2650 case LTTNG_DATA_PENDING:
2651 case LTTNG_SNAPSHOT_ADD_OUTPUT:
2652 case LTTNG_SNAPSHOT_DEL_OUTPUT:
2653 case LTTNG_SNAPSHOT_LIST_OUTPUT:
2654 case LTTNG_SNAPSHOT_RECORD:
2655 need_domain = 0;
2656 break;
2657 default:
2658 need_domain = 1;
2659 }
2660
2661 if (opt_no_kernel && need_domain
2662 && cmd_ctx->lsm->domain.type == LTTNG_DOMAIN_KERNEL) {
2663 if (!is_root) {
2664 ret = LTTNG_ERR_NEED_ROOT_SESSIOND;
2665 } else {
2666 ret = LTTNG_ERR_KERN_NA;
2667 }
2668 goto error;
2669 }
2670
2671 /* Deny register consumer if we already have a spawned consumer. */
2672 if (cmd_ctx->lsm->cmd_type == LTTNG_REGISTER_CONSUMER) {
2673 pthread_mutex_lock(&kconsumer_data.pid_mutex);
2674 if (kconsumer_data.pid > 0) {
2675 ret = LTTNG_ERR_KERN_CONSUMER_FAIL;
2676 pthread_mutex_unlock(&kconsumer_data.pid_mutex);
2677 goto error;
2678 }
2679 pthread_mutex_unlock(&kconsumer_data.pid_mutex);
2680 }
2681
2682 /*
2683 * Check for command that don't needs to allocate a returned payload. We do
2684 * this here so we don't have to make the call for no payload at each
2685 * command.
2686 */
2687 switch(cmd_ctx->lsm->cmd_type) {
2688 case LTTNG_LIST_SESSIONS:
2689 case LTTNG_LIST_TRACEPOINTS:
2690 case LTTNG_LIST_TRACEPOINT_FIELDS:
2691 case LTTNG_LIST_DOMAINS:
2692 case LTTNG_LIST_CHANNELS:
2693 case LTTNG_LIST_EVENTS:
2694 break;
2695 default:
2696 /* Setup lttng message with no payload */
2697 ret = setup_lttng_msg(cmd_ctx, 0);
2698 if (ret < 0) {
2699 /* This label does not try to unlock the session */
2700 goto init_setup_error;
2701 }
2702 }
2703
2704 /* Commands that DO NOT need a session. */
2705 switch (cmd_ctx->lsm->cmd_type) {
2706 case LTTNG_CREATE_SESSION:
2707 case LTTNG_CREATE_SESSION_SNAPSHOT:
2708 case LTTNG_CREATE_SESSION_LIVE:
2709 case LTTNG_CALIBRATE:
2710 case LTTNG_LIST_SESSIONS:
2711 case LTTNG_LIST_TRACEPOINTS:
2712 case LTTNG_LIST_TRACEPOINT_FIELDS:
2713 need_tracing_session = 0;
2714 break;
2715 default:
2716 DBG("Getting session %s by name", cmd_ctx->lsm->session.name);
2717 /*
2718 * We keep the session list lock across _all_ commands
2719 * for now, because the per-session lock does not
2720 * handle teardown properly.
2721 */
2722 session_lock_list();
2723 cmd_ctx->session = session_find_by_name(cmd_ctx->lsm->session.name);
2724 if (cmd_ctx->session == NULL) {
2725 ret = LTTNG_ERR_SESS_NOT_FOUND;
2726 goto error;
2727 } else {
2728 /* Acquire lock for the session */
2729 session_lock(cmd_ctx->session);
2730 }
2731 break;
2732 }
2733
2734 if (!need_domain) {
2735 goto skip_domain;
2736 }
2737
2738 /*
2739 * Check domain type for specific "pre-action".
2740 */
2741 switch (cmd_ctx->lsm->domain.type) {
2742 case LTTNG_DOMAIN_KERNEL:
2743 if (!is_root) {
2744 ret = LTTNG_ERR_NEED_ROOT_SESSIOND;
2745 goto error;
2746 }
2747
2748 /* Kernel tracer check */
2749 if (kernel_tracer_fd == -1) {
2750 /* Basically, load kernel tracer modules */
2751 ret = init_kernel_tracer();
2752 if (ret != 0) {
2753 goto error;
2754 }
2755 }
2756
2757 /* Consumer is in an ERROR state. Report back to client */
2758 if (uatomic_read(&kernel_consumerd_state) == CONSUMER_ERROR) {
2759 ret = LTTNG_ERR_NO_KERNCONSUMERD;
2760 goto error;
2761 }
2762
2763 /* Need a session for kernel command */
2764 if (need_tracing_session) {
2765 if (cmd_ctx->session->kernel_session == NULL) {
2766 ret = create_kernel_session(cmd_ctx->session);
2767 if (ret < 0) {
2768 ret = LTTNG_ERR_KERN_SESS_FAIL;
2769 goto error;
2770 }
2771 }
2772
2773 /* Start the kernel consumer daemon */
2774 pthread_mutex_lock(&kconsumer_data.pid_mutex);
2775 if (kconsumer_data.pid == 0 &&
2776 cmd_ctx->lsm->cmd_type != LTTNG_REGISTER_CONSUMER) {
2777 pthread_mutex_unlock(&kconsumer_data.pid_mutex);
2778 ret = start_consumerd(&kconsumer_data);
2779 if (ret < 0) {
2780 ret = LTTNG_ERR_KERN_CONSUMER_FAIL;
2781 goto error;
2782 }
2783 uatomic_set(&kernel_consumerd_state, CONSUMER_STARTED);
2784 } else {
2785 pthread_mutex_unlock(&kconsumer_data.pid_mutex);
2786 }
2787
2788 /*
2789 * The consumer was just spawned so we need to add the socket to
2790 * the consumer output of the session if exist.
2791 */
2792 ret = consumer_create_socket(&kconsumer_data,
2793 cmd_ctx->session->kernel_session->consumer);
2794 if (ret < 0) {
2795 goto error;
2796 }
2797 }
2798
2799 break;
2800 case LTTNG_DOMAIN_JUL:
2801 case LTTNG_DOMAIN_UST:
2802 {
2803 if (!ust_app_supported()) {
2804 ret = LTTNG_ERR_NO_UST;
2805 goto error;
2806 }
2807 /* Consumer is in an ERROR state. Report back to client */
2808 if (uatomic_read(&ust_consumerd_state) == CONSUMER_ERROR) {
2809 ret = LTTNG_ERR_NO_USTCONSUMERD;
2810 goto error;
2811 }
2812
2813 if (need_tracing_session) {
2814 /* Create UST session if none exist. */
2815 if (cmd_ctx->session->ust_session == NULL) {
2816 ret = create_ust_session(cmd_ctx->session,
2817 &cmd_ctx->lsm->domain);
2818 if (ret != LTTNG_OK) {
2819 goto error;
2820 }
2821 }
2822
2823 /* Start the UST consumer daemons */
2824 /* 64-bit */
2825 pthread_mutex_lock(&ustconsumer64_data.pid_mutex);
2826 if (consumerd64_bin[0] != '\0' &&
2827 ustconsumer64_data.pid == 0 &&
2828 cmd_ctx->lsm->cmd_type != LTTNG_REGISTER_CONSUMER) {
2829 pthread_mutex_unlock(&ustconsumer64_data.pid_mutex);
2830 ret = start_consumerd(&ustconsumer64_data);
2831 if (ret < 0) {
2832 ret = LTTNG_ERR_UST_CONSUMER64_FAIL;
2833 uatomic_set(&ust_consumerd64_fd, -EINVAL);
2834 goto error;
2835 }
2836
2837 uatomic_set(&ust_consumerd64_fd, ustconsumer64_data.cmd_sock);
2838 uatomic_set(&ust_consumerd_state, CONSUMER_STARTED);
2839 } else {
2840 pthread_mutex_unlock(&ustconsumer64_data.pid_mutex);
2841 }
2842
2843 /*
2844 * Setup socket for consumer 64 bit. No need for atomic access
2845 * since it was set above and can ONLY be set in this thread.
2846 */
2847 ret = consumer_create_socket(&ustconsumer64_data,
2848 cmd_ctx->session->ust_session->consumer);
2849 if (ret < 0) {
2850 goto error;
2851 }
2852
2853 /* 32-bit */
2854 if (consumerd32_bin[0] != '\0' &&
2855 ustconsumer32_data.pid == 0 &&
2856 cmd_ctx->lsm->cmd_type != LTTNG_REGISTER_CONSUMER) {
2857 pthread_mutex_unlock(&ustconsumer32_data.pid_mutex);
2858 ret = start_consumerd(&ustconsumer32_data);
2859 if (ret < 0) {
2860 ret = LTTNG_ERR_UST_CONSUMER32_FAIL;
2861 uatomic_set(&ust_consumerd32_fd, -EINVAL);
2862 goto error;
2863 }
2864
2865 uatomic_set(&ust_consumerd32_fd, ustconsumer32_data.cmd_sock);
2866 uatomic_set(&ust_consumerd_state, CONSUMER_STARTED);
2867 } else {
2868 pthread_mutex_unlock(&ustconsumer32_data.pid_mutex);
2869 }
2870
2871 /*
2872 * Setup socket for consumer 64 bit. No need for atomic access
2873 * since it was set above and can ONLY be set in this thread.
2874 */
2875 ret = consumer_create_socket(&ustconsumer32_data,
2876 cmd_ctx->session->ust_session->consumer);
2877 if (ret < 0) {
2878 goto error;
2879 }
2880 }
2881 break;
2882 }
2883 default:
2884 break;
2885 }
2886 skip_domain:
2887
2888 /* Validate consumer daemon state when start/stop trace command */
2889 if (cmd_ctx->lsm->cmd_type == LTTNG_START_TRACE ||
2890 cmd_ctx->lsm->cmd_type == LTTNG_STOP_TRACE) {
2891 switch (cmd_ctx->lsm->domain.type) {
2892 case LTTNG_DOMAIN_JUL:
2893 case LTTNG_DOMAIN_UST:
2894 if (uatomic_read(&ust_consumerd_state) != CONSUMER_STARTED) {
2895 ret = LTTNG_ERR_NO_USTCONSUMERD;
2896 goto error;
2897 }
2898 break;
2899 case LTTNG_DOMAIN_KERNEL:
2900 if (uatomic_read(&kernel_consumerd_state) != CONSUMER_STARTED) {
2901 ret = LTTNG_ERR_NO_KERNCONSUMERD;
2902 goto error;
2903 }
2904 break;
2905 }
2906 }
2907
2908 /*
2909 * Check that the UID or GID match that of the tracing session.
2910 * The root user can interact with all sessions.
2911 */
2912 if (need_tracing_session) {
2913 if (!session_access_ok(cmd_ctx->session,
2914 LTTNG_SOCK_GET_UID_CRED(&cmd_ctx->creds),
2915 LTTNG_SOCK_GET_GID_CRED(&cmd_ctx->creds))) {
2916 ret = LTTNG_ERR_EPERM;
2917 goto error;
2918 }
2919 }
2920
2921 /*
2922 * Send relayd information to consumer as soon as we have a domain and a
2923 * session defined.
2924 */
2925 if (cmd_ctx->session && need_domain) {
2926 /*
2927 * Setup relayd if not done yet. If the relayd information was already
2928 * sent to the consumer, this call will gracefully return.
2929 */
2930 ret = cmd_setup_relayd(cmd_ctx->session);
2931 if (ret != LTTNG_OK) {
2932 goto error;
2933 }
2934 }
2935
2936 /* Process by command type */
2937 switch (cmd_ctx->lsm->cmd_type) {
2938 case LTTNG_ADD_CONTEXT:
2939 {
2940 ret = cmd_add_context(cmd_ctx->session, cmd_ctx->lsm->domain.type,
2941 cmd_ctx->lsm->u.context.channel_name,
2942 &cmd_ctx->lsm->u.context.ctx, kernel_poll_pipe[1]);
2943 break;
2944 }
2945 case LTTNG_DISABLE_CHANNEL:
2946 {
2947 ret = cmd_disable_channel(cmd_ctx->session, cmd_ctx->lsm->domain.type,
2948 cmd_ctx->lsm->u.disable.channel_name);
2949 break;
2950 }
2951 case LTTNG_DISABLE_EVENT:
2952 {
2953 ret = cmd_disable_event(cmd_ctx->session, cmd_ctx->lsm->domain.type,
2954 cmd_ctx->lsm->u.disable.channel_name,
2955 cmd_ctx->lsm->u.disable.name);
2956 break;
2957 }
2958 case LTTNG_DISABLE_ALL_EVENT:
2959 {
2960 DBG("Disabling all events");
2961
2962 ret = cmd_disable_event_all(cmd_ctx->session, cmd_ctx->lsm->domain.type,
2963 cmd_ctx->lsm->u.disable.channel_name);
2964 break;
2965 }
2966 case LTTNG_ENABLE_CHANNEL:
2967 {
2968 ret = cmd_enable_channel(cmd_ctx->session, &cmd_ctx->lsm->domain,
2969 &cmd_ctx->lsm->u.channel.chan, kernel_poll_pipe[1]);
2970 break;
2971 }
2972 case LTTNG_ENABLE_EVENT:
2973 {
2974 struct lttng_event_exclusion *exclusion = NULL;
2975 struct lttng_filter_bytecode *bytecode = NULL;
2976
2977 /* Handle exclusion events and receive it from the client. */
2978 if (cmd_ctx->lsm->u.enable.exclusion_count > 0) {
2979 size_t count = cmd_ctx->lsm->u.enable.exclusion_count;
2980
2981 exclusion = zmalloc(sizeof(struct lttng_event_exclusion) +
2982 (count * LTTNG_SYMBOL_NAME_LEN));
2983 if (!exclusion) {
2984 ret = LTTNG_ERR_EXCLUSION_NOMEM;
2985 goto error;
2986 }
2987
2988 DBG("Receiving var len exclusion event list from client ...");
2989 exclusion->count = count;
2990 ret = lttcomm_recv_unix_sock(sock, exclusion->names,
2991 count * LTTNG_SYMBOL_NAME_LEN);
2992 if (ret <= 0) {
2993 DBG("Nothing recv() from client var len data... continuing");
2994 *sock_error = 1;
2995 free(exclusion);
2996 ret = LTTNG_ERR_EXCLUSION_INVAL;
2997 goto error;
2998 }
2999 }
3000
3001 /* Handle filter and get bytecode from client. */
3002 if (cmd_ctx->lsm->u.enable.bytecode_len > 0) {
3003 size_t bytecode_len = cmd_ctx->lsm->u.enable.bytecode_len;
3004
3005 if (bytecode_len > LTTNG_FILTER_MAX_LEN) {
3006 ret = LTTNG_ERR_FILTER_INVAL;
3007 free(exclusion);
3008 goto error;
3009 }
3010
3011 bytecode = zmalloc(bytecode_len);
3012 if (!bytecode) {
3013 free(exclusion);
3014 ret = LTTNG_ERR_FILTER_NOMEM;
3015 goto error;
3016 }
3017
3018 /* Receive var. len. data */
3019 DBG("Receiving var len filter's bytecode from client ...");
3020 ret = lttcomm_recv_unix_sock(sock, bytecode, bytecode_len);
3021 if (ret <= 0) {
3022 DBG("Nothing recv() from client car len data... continuing");
3023 *sock_error = 1;
3024 free(bytecode);
3025 free(exclusion);
3026 ret = LTTNG_ERR_FILTER_INVAL;
3027 goto error;
3028 }
3029
3030 if ((bytecode->len + sizeof(*bytecode)) != bytecode_len) {
3031 free(bytecode);
3032 free(exclusion);
3033 ret = LTTNG_ERR_FILTER_INVAL;
3034 goto error;
3035 }
3036 }
3037
3038 ret = cmd_enable_event(cmd_ctx->session, &cmd_ctx->lsm->domain,
3039 cmd_ctx->lsm->u.enable.channel_name,
3040 &cmd_ctx->lsm->u.enable.event, bytecode, exclusion,
3041 kernel_poll_pipe[1]);
3042 break;
3043 }
3044 case LTTNG_ENABLE_ALL_EVENT:
3045 {
3046 DBG("Enabling all events");
3047
3048 ret = cmd_enable_event_all(cmd_ctx->session, &cmd_ctx->lsm->domain,
3049 cmd_ctx->lsm->u.enable.channel_name,
3050 cmd_ctx->lsm->u.enable.event.type, NULL, kernel_poll_pipe[1]);
3051 break;
3052 }
3053 case LTTNG_LIST_TRACEPOINTS:
3054 {
3055 struct lttng_event *events;
3056 ssize_t nb_events;
3057
3058 session_lock_list();
3059 nb_events = cmd_list_tracepoints(cmd_ctx->lsm->domain.type, &events);
3060 session_unlock_list();
3061 if (nb_events < 0) {
3062 /* Return value is a negative lttng_error_code. */
3063 ret = -nb_events;
3064 goto error;
3065 }
3066
3067 /*
3068 * Setup lttng message with payload size set to the event list size in
3069 * bytes and then copy list into the llm payload.
3070 */
3071 ret = setup_lttng_msg(cmd_ctx, sizeof(struct lttng_event) * nb_events);
3072 if (ret < 0) {
3073 free(events);
3074 goto setup_error;
3075 }
3076
3077 /* Copy event list into message payload */
3078 memcpy(cmd_ctx->llm->payload, events,
3079 sizeof(struct lttng_event) * nb_events);
3080
3081 free(events);
3082
3083 ret = LTTNG_OK;
3084 break;
3085 }
3086 case LTTNG_LIST_TRACEPOINT_FIELDS:
3087 {
3088 struct lttng_event_field *fields;
3089 ssize_t nb_fields;
3090
3091 session_lock_list();
3092 nb_fields = cmd_list_tracepoint_fields(cmd_ctx->lsm->domain.type,
3093 &fields);
3094 session_unlock_list();
3095 if (nb_fields < 0) {
3096 /* Return value is a negative lttng_error_code. */
3097 ret = -nb_fields;
3098 goto error;
3099 }
3100
3101 /*
3102 * Setup lttng message with payload size set to the event list size in
3103 * bytes and then copy list into the llm payload.
3104 */
3105 ret = setup_lttng_msg(cmd_ctx,
3106 sizeof(struct lttng_event_field) * nb_fields);
3107 if (ret < 0) {
3108 free(fields);
3109 goto setup_error;
3110 }
3111
3112 /* Copy event list into message payload */
3113 memcpy(cmd_ctx->llm->payload, fields,
3114 sizeof(struct lttng_event_field) * nb_fields);
3115
3116 free(fields);
3117
3118 ret = LTTNG_OK;
3119 break;
3120 }
3121 case LTTNG_SET_CONSUMER_URI:
3122 {
3123 size_t nb_uri, len;
3124 struct lttng_uri *uris;
3125
3126 nb_uri = cmd_ctx->lsm->u.uri.size;
3127 len = nb_uri * sizeof(struct lttng_uri);
3128
3129 if (nb_uri == 0) {
3130 ret = LTTNG_ERR_INVALID;
3131 goto error;
3132 }
3133
3134 uris = zmalloc(len);
3135 if (uris == NULL) {
3136 ret = LTTNG_ERR_FATAL;
3137 goto error;
3138 }
3139
3140 /* Receive variable len data */
3141 DBG("Receiving %zu URI(s) from client ...", nb_uri);
3142 ret = lttcomm_recv_unix_sock(sock, uris, len);
3143 if (ret <= 0) {
3144 DBG("No URIs received from client... continuing");
3145 *sock_error = 1;
3146 ret = LTTNG_ERR_SESSION_FAIL;
3147 free(uris);
3148 goto error;
3149 }
3150
3151 ret = cmd_set_consumer_uri(cmd_ctx->lsm->domain.type, cmd_ctx->session,
3152 nb_uri, uris);
3153 if (ret != LTTNG_OK) {
3154 free(uris);
3155 goto error;
3156 }
3157
3158 /*
3159 * XXX: 0 means that this URI should be applied on the session. Should
3160 * be a DOMAIN enuam.
3161 */
3162 if (cmd_ctx->lsm->domain.type == 0) {
3163 /* Add the URI for the UST session if a consumer is present. */
3164 if (cmd_ctx->session->ust_session &&
3165 cmd_ctx->session->ust_session->consumer) {
3166 ret = cmd_set_consumer_uri(LTTNG_DOMAIN_UST, cmd_ctx->session,
3167 nb_uri, uris);
3168 } else if (cmd_ctx->session->kernel_session &&
3169 cmd_ctx->session->kernel_session->consumer) {
3170 ret = cmd_set_consumer_uri(LTTNG_DOMAIN_KERNEL,
3171 cmd_ctx->session, nb_uri, uris);
3172 }
3173 }
3174
3175 free(uris);
3176
3177 break;
3178 }
3179 case LTTNG_START_TRACE:
3180 {
3181 ret = cmd_start_trace(cmd_ctx->session);
3182 break;
3183 }
3184 case LTTNG_STOP_TRACE:
3185 {
3186 ret = cmd_stop_trace(cmd_ctx->session);
3187 break;
3188 }
3189 case LTTNG_CREATE_SESSION:
3190 {
3191 size_t nb_uri, len;
3192 struct lttng_uri *uris = NULL;
3193
3194 nb_uri = cmd_ctx->lsm->u.uri.size;
3195 len = nb_uri * sizeof(struct lttng_uri);
3196
3197 if (nb_uri > 0) {
3198 uris = zmalloc(len);
3199 if (uris == NULL) {
3200 ret = LTTNG_ERR_FATAL;
3201 goto error;
3202 }
3203
3204 /* Receive variable len data */
3205 DBG("Waiting for %zu URIs from client ...", nb_uri);
3206 ret = lttcomm_recv_unix_sock(sock, uris, len);
3207 if (ret <= 0) {
3208 DBG("No URIs received from client... continuing");
3209 *sock_error = 1;
3210 ret = LTTNG_ERR_SESSION_FAIL;
3211 free(uris);
3212 goto error;
3213 }
3214
3215 if (nb_uri == 1 && uris[0].dtype != LTTNG_DST_PATH) {
3216 DBG("Creating session with ONE network URI is a bad call");
3217 ret = LTTNG_ERR_SESSION_FAIL;
3218 free(uris);
3219 goto error;
3220 }
3221 }
3222
3223 ret = cmd_create_session_uri(cmd_ctx->lsm->session.name, uris, nb_uri,
3224 &cmd_ctx->creds, 0);
3225
3226 free(uris);
3227
3228 break;
3229 }
3230 case LTTNG_DESTROY_SESSION:
3231 {
3232 ret = cmd_destroy_session(cmd_ctx->session, kernel_poll_pipe[1]);
3233
3234 /* Set session to NULL so we do not unlock it after free. */
3235 cmd_ctx->session = NULL;
3236 break;
3237 }
3238 case LTTNG_LIST_DOMAINS:
3239 {
3240 ssize_t nb_dom;
3241 struct lttng_domain *domains;
3242
3243 nb_dom = cmd_list_domains(cmd_ctx->session, &domains);
3244 if (nb_dom < 0) {
3245 /* Return value is a negative lttng_error_code. */
3246 ret = -nb_dom;
3247 goto error;
3248 }
3249
3250 ret = setup_lttng_msg(cmd_ctx, nb_dom * sizeof(struct lttng_domain));
3251 if (ret < 0) {
3252 free(domains);
3253 goto setup_error;
3254 }
3255
3256 /* Copy event list into message payload */
3257 memcpy(cmd_ctx->llm->payload, domains,
3258 nb_dom * sizeof(struct lttng_domain));
3259
3260 free(domains);
3261
3262 ret = LTTNG_OK;
3263 break;
3264 }
3265 case LTTNG_LIST_CHANNELS:
3266 {
3267 int nb_chan;
3268 struct lttng_channel *channels;
3269
3270 nb_chan = cmd_list_channels(cmd_ctx->lsm->domain.type,
3271 cmd_ctx->session, &channels);
3272 if (nb_chan < 0) {
3273 /* Return value is a negative lttng_error_code. */
3274 ret = -nb_chan;
3275 goto error;
3276 }
3277
3278 ret = setup_lttng_msg(cmd_ctx, nb_chan * sizeof(struct lttng_channel));
3279 if (ret < 0) {
3280 free(channels);
3281 goto setup_error;
3282 }
3283
3284 /* Copy event list into message payload */
3285 memcpy(cmd_ctx->llm->payload, channels,
3286 nb_chan * sizeof(struct lttng_channel));
3287
3288 free(channels);
3289
3290 ret = LTTNG_OK;
3291 break;
3292 }
3293 case LTTNG_LIST_EVENTS:
3294 {
3295 ssize_t nb_event;
3296 struct lttng_event *events = NULL;
3297
3298 nb_event = cmd_list_events(cmd_ctx->lsm->domain.type, cmd_ctx->session,
3299 cmd_ctx->lsm->u.list.channel_name, &events);
3300 if (nb_event < 0) {
3301 /* Return value is a negative lttng_error_code. */
3302 ret = -nb_event;
3303 goto error;
3304 }
3305
3306 ret = setup_lttng_msg(cmd_ctx, nb_event * sizeof(struct lttng_event));
3307 if (ret < 0) {
3308 free(events);
3309 goto setup_error;
3310 }
3311
3312 /* Copy event list into message payload */
3313 memcpy(cmd_ctx->llm->payload, events,
3314 nb_event * sizeof(struct lttng_event));
3315
3316 free(events);
3317
3318 ret = LTTNG_OK;
3319 break;
3320 }
3321 case LTTNG_LIST_SESSIONS:
3322 {
3323 unsigned int nr_sessions;
3324
3325 session_lock_list();
3326 nr_sessions = lttng_sessions_count(
3327 LTTNG_SOCK_GET_UID_CRED(&cmd_ctx->creds),
3328 LTTNG_SOCK_GET_GID_CRED(&cmd_ctx->creds));
3329
3330 ret = setup_lttng_msg(cmd_ctx, sizeof(struct lttng_session) * nr_sessions);
3331 if (ret < 0) {
3332 session_unlock_list();
3333 goto setup_error;
3334 }
3335
3336 /* Filled the session array */
3337 cmd_list_lttng_sessions((struct lttng_session *)(cmd_ctx->llm->payload),
3338 LTTNG_SOCK_GET_UID_CRED(&cmd_ctx->creds),
3339 LTTNG_SOCK_GET_GID_CRED(&cmd_ctx->creds));
3340
3341 session_unlock_list();
3342
3343 ret = LTTNG_OK;
3344 break;
3345 }
3346 case LTTNG_CALIBRATE:
3347 {
3348 ret = cmd_calibrate(cmd_ctx->lsm->domain.type,
3349 &cmd_ctx->lsm->u.calibrate);
3350 break;
3351 }
3352 case LTTNG_REGISTER_CONSUMER:
3353 {
3354 struct consumer_data *cdata;
3355
3356 switch (cmd_ctx->lsm->domain.type) {
3357 case LTTNG_DOMAIN_KERNEL:
3358 cdata = &kconsumer_data;
3359 break;
3360 default:
3361 ret = LTTNG_ERR_UND;
3362 goto error;
3363 }
3364
3365 ret = cmd_register_consumer(cmd_ctx->session, cmd_ctx->lsm->domain.type,
3366 cmd_ctx->lsm->u.reg.path, cdata);
3367 break;
3368 }
3369 case LTTNG_DATA_PENDING:
3370 {
3371 ret = cmd_data_pending(cmd_ctx->session);
3372 break;
3373 }
3374 case LTTNG_SNAPSHOT_ADD_OUTPUT:
3375 {
3376 struct lttcomm_lttng_output_id reply;
3377
3378 ret = cmd_snapshot_add_output(cmd_ctx->session,
3379 &cmd_ctx->lsm->u.snapshot_output.output, &reply.id);
3380 if (ret != LTTNG_OK) {
3381 goto error;
3382 }
3383
3384 ret = setup_lttng_msg(cmd_ctx, sizeof(reply));
3385 if (ret < 0) {
3386 goto setup_error;
3387 }
3388
3389 /* Copy output list into message payload */
3390 memcpy(cmd_ctx->llm->payload, &reply, sizeof(reply));
3391 ret = LTTNG_OK;
3392 break;
3393 }
3394 case LTTNG_SNAPSHOT_DEL_OUTPUT:
3395 {
3396 ret = cmd_snapshot_del_output(cmd_ctx->session,
3397 &cmd_ctx->lsm->u.snapshot_output.output);
3398 break;
3399 }
3400 case LTTNG_SNAPSHOT_LIST_OUTPUT:
3401 {
3402 ssize_t nb_output;
3403 struct lttng_snapshot_output *outputs = NULL;
3404
3405 nb_output = cmd_snapshot_list_outputs(cmd_ctx->session, &outputs);
3406 if (nb_output < 0) {
3407 ret = -nb_output;
3408 goto error;
3409 }
3410
3411 ret = setup_lttng_msg(cmd_ctx,
3412 nb_output * sizeof(struct lttng_snapshot_output));
3413 if (ret < 0) {
3414 free(outputs);
3415 goto setup_error;
3416 }
3417
3418 if (outputs) {
3419 /* Copy output list into message payload */
3420 memcpy(cmd_ctx->llm->payload, outputs,
3421 nb_output * sizeof(struct lttng_snapshot_output));
3422 free(outputs);
3423 }
3424
3425 ret = LTTNG_OK;
3426 break;
3427 }
3428 case LTTNG_SNAPSHOT_RECORD:
3429 {
3430 ret = cmd_snapshot_record(cmd_ctx->session,
3431 &cmd_ctx->lsm->u.snapshot_record.output,
3432 cmd_ctx->lsm->u.snapshot_record.wait);
3433 break;
3434 }
3435 case LTTNG_CREATE_SESSION_SNAPSHOT:
3436 {
3437 size_t nb_uri, len;
3438 struct lttng_uri *uris = NULL;
3439
3440 nb_uri = cmd_ctx->lsm->u.uri.size;
3441 len = nb_uri * sizeof(struct lttng_uri);
3442
3443 if (nb_uri > 0) {
3444 uris = zmalloc(len);
3445 if (uris == NULL) {
3446 ret = LTTNG_ERR_FATAL;
3447 goto error;
3448 }
3449
3450 /* Receive variable len data */
3451 DBG("Waiting for %zu URIs from client ...", nb_uri);
3452 ret = lttcomm_recv_unix_sock(sock, uris, len);
3453 if (ret <= 0) {
3454 DBG("No URIs received from client... continuing");
3455 *sock_error = 1;
3456 ret = LTTNG_ERR_SESSION_FAIL;
3457 free(uris);
3458 goto error;
3459 }
3460
3461 if (nb_uri == 1 && uris[0].dtype != LTTNG_DST_PATH) {
3462 DBG("Creating session with ONE network URI is a bad call");
3463 ret = LTTNG_ERR_SESSION_FAIL;
3464 free(uris);
3465 goto error;
3466 }
3467 }
3468
3469 ret = cmd_create_session_snapshot(cmd_ctx->lsm->session.name, uris,
3470 nb_uri, &cmd_ctx->creds);
3471 free(uris);
3472 break;
3473 }
3474 case LTTNG_CREATE_SESSION_LIVE:
3475 {
3476 size_t nb_uri, len;
3477 struct lttng_uri *uris = NULL;
3478
3479 nb_uri = cmd_ctx->lsm->u.uri.size;
3480 len = nb_uri * sizeof(struct lttng_uri);
3481
3482 if (nb_uri > 0) {
3483 uris = zmalloc(len);
3484 if (uris == NULL) {
3485 ret = LTTNG_ERR_FATAL;
3486 goto error;
3487 }
3488
3489 /* Receive variable len data */
3490 DBG("Waiting for %zu URIs from client ...", nb_uri);
3491 ret = lttcomm_recv_unix_sock(sock, uris, len);
3492 if (ret <= 0) {
3493 DBG("No URIs received from client... continuing");
3494 *sock_error = 1;
3495 ret = LTTNG_ERR_SESSION_FAIL;
3496 free(uris);
3497 goto error;
3498 }
3499
3500 if (nb_uri == 1 && uris[0].dtype != LTTNG_DST_PATH) {
3501 DBG("Creating session with ONE network URI is a bad call");
3502 ret = LTTNG_ERR_SESSION_FAIL;
3503 free(uris);
3504 goto error;
3505 }
3506 }
3507
3508 ret = cmd_create_session_uri(cmd_ctx->lsm->session.name, uris,
3509 nb_uri, &cmd_ctx->creds, cmd_ctx->lsm->u.session_live.timer_interval);
3510 free(uris);
3511 break;
3512 }
3513 default:
3514 ret = LTTNG_ERR_UND;
3515 break;
3516 }
3517
3518 error:
3519 if (cmd_ctx->llm == NULL) {
3520 DBG("Missing llm structure. Allocating one.");
3521 if (setup_lttng_msg(cmd_ctx, 0) < 0) {
3522 goto setup_error;
3523 }
3524 }
3525 /* Set return code */
3526 cmd_ctx->llm->ret_code = ret;
3527 setup_error:
3528 if (cmd_ctx->session) {
3529 session_unlock(cmd_ctx->session);
3530 }
3531 if (need_tracing_session) {
3532 session_unlock_list();
3533 }
3534 init_setup_error:
3535 return ret;
3536 }
3537
3538 /*
3539 * Thread managing health check socket.
3540 */
3541 static void *thread_manage_health(void *data)
3542 {
3543 int sock = -1, new_sock = -1, ret, i, pollfd, err = -1;
3544 uint32_t revents, nb_fd;
3545 struct lttng_poll_event events;
3546 struct health_comm_msg msg;
3547 struct health_comm_reply reply;
3548
3549 DBG("[thread] Manage health check started");
3550
3551 rcu_register_thread();
3552
3553 /* We might hit an error path before this is created. */
3554 lttng_poll_init(&events);
3555
3556 /* Create unix socket */
3557 sock = lttcomm_create_unix_sock(health_unix_sock_path);
3558 if (sock < 0) {
3559 ERR("Unable to create health check Unix socket");
3560 ret = -1;
3561 goto error;
3562 }
3563
3564 if (is_root) {
3565 /* lttng health client socket path permissions */
3566 ret = chown(health_unix_sock_path, 0,
3567 utils_get_group_id(tracing_group_name));
3568 if (ret < 0) {
3569 ERR("Unable to set group on %s", health_unix_sock_path);
3570 PERROR("chown");
3571 ret = -1;
3572 goto error;
3573 }
3574
3575 ret = chmod(health_unix_sock_path,
3576 S_IRUSR | S_IWUSR | S_IRGRP | S_IWGRP);
3577 if (ret < 0) {
3578 ERR("Unable to set permissions on %s", health_unix_sock_path);
3579 PERROR("chmod");
3580 ret = -1;
3581 goto error;
3582 }
3583 }
3584
3585 /*
3586 * Set the CLOEXEC flag. Return code is useless because either way, the
3587 * show must go on.
3588 */
3589 (void) utils_set_fd_cloexec(sock);
3590
3591 ret = lttcomm_listen_unix_sock(sock);
3592 if (ret < 0) {
3593 goto error;
3594 }
3595
3596 /*
3597 * Pass 2 as size here for the thread quit pipe and client_sock. Nothing
3598 * more will be added to this poll set.
3599 */
3600 ret = sessiond_set_thread_pollset(&events, 2);
3601 if (ret < 0) {
3602 goto error;
3603 }
3604
3605 /* Add the application registration socket */
3606 ret = lttng_poll_add(&events, sock, LPOLLIN | LPOLLPRI);
3607 if (ret < 0) {
3608 goto error;
3609 }
3610
3611 lttng_sessiond_notify_ready();
3612
3613 while (1) {
3614 DBG("Health check ready");
3615
3616 /* Inifinite blocking call, waiting for transmission */
3617 restart:
3618 ret = lttng_poll_wait(&events, -1);
3619 if (ret < 0) {
3620 /*
3621 * Restart interrupted system call.
3622 */
3623 if (errno == EINTR) {
3624 goto restart;
3625 }
3626 goto error;
3627 }
3628
3629 nb_fd = ret;
3630
3631 for (i = 0; i < nb_fd; i++) {
3632 /* Fetch once the poll data */
3633 revents = LTTNG_POLL_GETEV(&events, i);
3634 pollfd = LTTNG_POLL_GETFD(&events, i);
3635
3636 /* Thread quit pipe has been closed. Killing thread. */
3637 ret = sessiond_check_thread_quit_pipe(pollfd, revents);
3638 if (ret) {
3639 err = 0;
3640 goto exit;
3641 }
3642
3643 /* Event on the registration socket */
3644 if (pollfd == sock) {
3645 if (revents & (LPOLLERR | LPOLLHUP | LPOLLRDHUP)) {
3646 ERR("Health socket poll error");
3647 goto error;
3648 }
3649 }
3650 }
3651
3652 new_sock = lttcomm_accept_unix_sock(sock);
3653 if (new_sock < 0) {
3654 goto error;
3655 }
3656
3657 /*
3658 * Set the CLOEXEC flag. Return code is useless because either way, the
3659 * show must go on.
3660 */
3661 (void) utils_set_fd_cloexec(new_sock);
3662
3663 DBG("Receiving data from client for health...");
3664 ret = lttcomm_recv_unix_sock(new_sock, (void *)&msg, sizeof(msg));
3665 if (ret <= 0) {
3666 DBG("Nothing recv() from client... continuing");
3667 ret = close(new_sock);
3668 if (ret) {
3669 PERROR("close");
3670 }
3671 new_sock = -1;
3672 continue;
3673 }
3674
3675 rcu_thread_online();
3676
3677 memset(&reply, 0, sizeof(reply));
3678 for (i = 0; i < NR_HEALTH_SESSIOND_TYPES; i++) {
3679 /*
3680 * health_check_state returns 0 if health is
3681 * bad.
3682 */
3683 if (!health_check_state(health_sessiond, i)) {
3684 reply.ret_code |= 1ULL << i;
3685 }
3686 }
3687
3688 DBG2("Health check return value %" PRIx64, reply.ret_code);
3689
3690 ret = send_unix_sock(new_sock, (void *) &reply, sizeof(reply));
3691 if (ret < 0) {
3692 ERR("Failed to send health data back to client");
3693 }
3694
3695 /* End of transmission */
3696 ret = close(new_sock);
3697 if (ret) {
3698 PERROR("close");
3699 }
3700 new_sock = -1;
3701 }
3702
3703 exit:
3704 error:
3705 if (err) {
3706 ERR("Health error occurred in %s", __func__);
3707 }
3708 DBG("Health check thread dying");
3709 unlink(health_unix_sock_path);
3710 if (sock >= 0) {
3711 ret = close(sock);
3712 if (ret) {
3713 PERROR("close");
3714 }
3715 }
3716
3717 lttng_poll_clean(&events);
3718
3719 rcu_unregister_thread();
3720 return NULL;
3721 }
3722
3723 /*
3724 * This thread manage all clients request using the unix client socket for
3725 * communication.
3726 */
3727 static void *thread_manage_clients(void *data)
3728 {
3729 int sock = -1, ret, i, pollfd, err = -1;
3730 int sock_error;
3731 uint32_t revents, nb_fd;
3732 struct command_ctx *cmd_ctx = NULL;
3733 struct lttng_poll_event events;
3734
3735 DBG("[thread] Manage client started");
3736
3737 rcu_register_thread();
3738
3739 health_register(health_sessiond, HEALTH_SESSIOND_TYPE_CMD);
3740
3741 health_code_update();
3742
3743 ret = lttcomm_listen_unix_sock(client_sock);
3744 if (ret < 0) {
3745 goto error_listen;
3746 }
3747
3748 /*
3749 * Pass 2 as size here for the thread quit pipe and client_sock. Nothing
3750 * more will be added to this poll set.
3751 */
3752 ret = sessiond_set_thread_pollset(&events, 2);
3753 if (ret < 0) {
3754 goto error_create_poll;
3755 }
3756
3757 /* Add the application registration socket */
3758 ret = lttng_poll_add(&events, client_sock, LPOLLIN | LPOLLPRI);
3759 if (ret < 0) {
3760 goto error;
3761 }
3762
3763 lttng_sessiond_notify_ready();
3764
3765 /* This testpoint is after we signal readiness to the parent. */
3766 if (testpoint(sessiond_thread_manage_clients)) {
3767 goto error;
3768 }
3769
3770 if (testpoint(sessiond_thread_manage_clients_before_loop)) {
3771 goto error;
3772 }
3773
3774 health_code_update();
3775
3776 while (1) {
3777 DBG("Accepting client command ...");
3778
3779 /* Inifinite blocking call, waiting for transmission */
3780 restart:
3781 health_poll_entry();
3782 ret = lttng_poll_wait(&events, -1);
3783 health_poll_exit();
3784 if (ret < 0) {
3785 /*
3786 * Restart interrupted system call.
3787 */
3788 if (errno == EINTR) {
3789 goto restart;
3790 }
3791 goto error;
3792 }
3793
3794 nb_fd = ret;
3795
3796 for (i = 0; i < nb_fd; i++) {
3797 /* Fetch once the poll data */
3798 revents = LTTNG_POLL_GETEV(&events, i);
3799 pollfd = LTTNG_POLL_GETFD(&events, i);
3800
3801 health_code_update();
3802
3803 /* Thread quit pipe has been closed. Killing thread. */
3804 ret = sessiond_check_thread_quit_pipe(pollfd, revents);
3805 if (ret) {
3806 err = 0;
3807 goto exit;
3808 }
3809
3810 /* Event on the registration socket */
3811 if (pollfd == client_sock) {
3812 if (revents & (LPOLLERR | LPOLLHUP | LPOLLRDHUP)) {
3813 ERR("Client socket poll error");
3814 goto error;
3815 }
3816 }
3817 }
3818
3819 DBG("Wait for client response");
3820
3821 health_code_update();
3822
3823 sock = lttcomm_accept_unix_sock(client_sock);
3824 if (sock < 0) {
3825 goto error;
3826 }
3827
3828 /*
3829 * Set the CLOEXEC flag. Return code is useless because either way, the
3830 * show must go on.
3831 */
3832 (void) utils_set_fd_cloexec(sock);
3833
3834 /* Set socket option for credentials retrieval */
3835 ret = lttcomm_setsockopt_creds_unix_sock(sock);
3836 if (ret < 0) {
3837 goto error;
3838 }
3839
3840 /* Allocate context command to process the client request */
3841 cmd_ctx = zmalloc(sizeof(struct command_ctx));
3842 if (cmd_ctx == NULL) {
3843 PERROR("zmalloc cmd_ctx");
3844 goto error;
3845 }
3846
3847 /* Allocate data buffer for reception */
3848 cmd_ctx->lsm = zmalloc(sizeof(struct lttcomm_session_msg));
3849 if (cmd_ctx->lsm == NULL) {
3850 PERROR("zmalloc cmd_ctx->lsm");
3851 goto error;
3852 }
3853
3854 cmd_ctx->llm = NULL;
3855 cmd_ctx->session = NULL;
3856
3857 health_code_update();
3858
3859 /*
3860 * Data is received from the lttng client. The struct
3861 * lttcomm_session_msg (lsm) contains the command and data request of
3862 * the client.
3863 */
3864 DBG("Receiving data from client ...");
3865 ret = lttcomm_recv_creds_unix_sock(sock, cmd_ctx->lsm,
3866 sizeof(struct lttcomm_session_msg), &cmd_ctx->creds);
3867 if (ret <= 0) {
3868 DBG("Nothing recv() from client... continuing");
3869 ret = close(sock);
3870 if (ret) {
3871 PERROR("close");
3872 }
3873 sock = -1;
3874 clean_command_ctx(&cmd_ctx);
3875 continue;
3876 }
3877
3878 health_code_update();
3879
3880 // TODO: Validate cmd_ctx including sanity check for
3881 // security purpose.
3882
3883 rcu_thread_online();
3884 /*
3885 * This function dispatch the work to the kernel or userspace tracer
3886 * libs and fill the lttcomm_lttng_msg data structure of all the needed
3887 * informations for the client. The command context struct contains
3888 * everything this function may needs.
3889 */
3890 ret = process_client_msg(cmd_ctx, sock, &sock_error);
3891 rcu_thread_offline();
3892 if (ret < 0) {
3893 ret = close(sock);
3894 if (ret) {
3895 PERROR("close");
3896 }
3897 sock = -1;
3898 /*
3899 * TODO: Inform client somehow of the fatal error. At
3900 * this point, ret < 0 means that a zmalloc failed
3901 * (ENOMEM). Error detected but still accept
3902 * command, unless a socket error has been
3903 * detected.
3904 */
3905 clean_command_ctx(&cmd_ctx);
3906 continue;
3907 }
3908
3909 health_code_update();
3910
3911 DBG("Sending response (size: %d, retcode: %s)",
3912 cmd_ctx->lttng_msg_size,
3913 lttng_strerror(-cmd_ctx->llm->ret_code));
3914 ret = send_unix_sock(sock, cmd_ctx->llm, cmd_ctx->lttng_msg_size);
3915 if (ret < 0) {
3916 ERR("Failed to send data back to client");
3917 }
3918
3919 /* End of transmission */
3920 ret = close(sock);
3921 if (ret) {
3922 PERROR("close");
3923 }
3924 sock = -1;
3925
3926 clean_command_ctx(&cmd_ctx);
3927
3928 health_code_update();
3929 }
3930
3931 exit:
3932 error:
3933 if (sock >= 0) {
3934 ret = close(sock);
3935 if (ret) {
3936 PERROR("close");
3937 }
3938 }
3939
3940 lttng_poll_clean(&events);
3941 clean_command_ctx(&cmd_ctx);
3942
3943 error_listen:
3944 error_create_poll:
3945 unlink(client_unix_sock_path);
3946 if (client_sock >= 0) {
3947 ret = close(client_sock);
3948 if (ret) {
3949 PERROR("close");
3950 }
3951 }
3952
3953 if (err) {
3954 health_error();
3955 ERR("Health error occurred in %s", __func__);
3956 }
3957
3958 health_unregister(health_sessiond);
3959
3960 DBG("Client thread dying");
3961
3962 rcu_unregister_thread();
3963 return NULL;
3964 }
3965
3966
3967 /*
3968 * usage function on stderr
3969 */
3970 static void usage(void)
3971 {
3972 fprintf(stderr, "Usage: %s OPTIONS\n\nOptions:\n", progname);
3973 fprintf(stderr, " -h, --help Display this usage.\n");
3974 fprintf(stderr, " -c, --client-sock PATH Specify path for the client unix socket\n");
3975 fprintf(stderr, " -a, --apps-sock PATH Specify path for apps unix socket\n");
3976 fprintf(stderr, " --kconsumerd-err-sock PATH Specify path for the kernel consumer error socket\n");
3977 fprintf(stderr, " --kconsumerd-cmd-sock PATH Specify path for the kernel consumer command socket\n");
3978 fprintf(stderr, " --ustconsumerd32-err-sock PATH Specify path for the 32-bit UST consumer error socket\n");
3979 fprintf(stderr, " --ustconsumerd64-err-sock PATH Specify path for the 64-bit UST consumer error socket\n");
3980 fprintf(stderr, " --ustconsumerd32-cmd-sock PATH Specify path for the 32-bit UST consumer command socket\n");
3981 fprintf(stderr, " --ustconsumerd64-cmd-sock PATH Specify path for the 64-bit UST consumer command socket\n");
3982 fprintf(stderr, " --consumerd32-path PATH Specify path for the 32-bit UST consumer daemon binary\n");
3983 fprintf(stderr, " --consumerd32-libdir PATH Specify path for the 32-bit UST consumer daemon libraries\n");
3984 fprintf(stderr, " --consumerd64-path PATH Specify path for the 64-bit UST consumer daemon binary\n");
3985 fprintf(stderr, " --consumerd64-libdir PATH Specify path for the 64-bit UST consumer daemon libraries\n");
3986 fprintf(stderr, " -d, --daemonize Start as a daemon.\n");
3987 fprintf(stderr, " -b, --background Start as a daemon, keeping console open.\n");
3988 fprintf(stderr, " -g, --group NAME Specify the tracing group name. (default: tracing)\n");
3989 fprintf(stderr, " -V, --version Show version number.\n");
3990 fprintf(stderr, " -S, --sig-parent Send SIGUSR1 to parent pid to notify readiness.\n");
3991 fprintf(stderr, " -q, --quiet No output at all.\n");
3992 fprintf(stderr, " -v, --verbose Verbose mode. Activate DBG() macro.\n");
3993 fprintf(stderr, " -p, --pidfile FILE Write a pid to FILE name overriding the default value.\n");
3994 fprintf(stderr, " --verbose-consumer Verbose mode for consumer. Activate DBG() macro.\n");
3995 fprintf(stderr, " --no-kernel Disable kernel tracer\n");
3996 fprintf(stderr, " --jul-tcp-port JUL application registration TCP port\n");
3997 }
3998
3999 /*
4000 * daemon argument parsing
4001 */
4002 static int parse_args(int argc, char **argv)
4003 {
4004 int c;
4005
4006 static struct option long_options[] = {
4007 { "client-sock", 1, 0, 'c' },
4008 { "apps-sock", 1, 0, 'a' },
4009 { "kconsumerd-cmd-sock", 1, 0, 'C' },
4010 { "kconsumerd-err-sock", 1, 0, 'E' },
4011 { "ustconsumerd32-cmd-sock", 1, 0, 'G' },
4012 { "ustconsumerd32-err-sock", 1, 0, 'H' },
4013 { "ustconsumerd64-cmd-sock", 1, 0, 'D' },
4014 { "ustconsumerd64-err-sock", 1, 0, 'F' },
4015 { "consumerd32-path", 1, 0, 'u' },
4016 { "consumerd32-libdir", 1, 0, 'U' },
4017 { "consumerd64-path", 1, 0, 't' },
4018 { "consumerd64-libdir", 1, 0, 'T' },
4019 { "daemonize", 0, 0, 'd' },
4020 { "sig-parent", 0, 0, 'S' },
4021 { "help", 0, 0, 'h' },
4022 { "group", 1, 0, 'g' },
4023 { "version", 0, 0, 'V' },
4024 { "quiet", 0, 0, 'q' },
4025 { "verbose", 0, 0, 'v' },
4026 { "verbose-consumer", 0, 0, 'Z' },
4027 { "no-kernel", 0, 0, 'N' },
4028 { "pidfile", 1, 0, 'p' },
4029 { "jul-tcp-port", 1, 0, 'J' },
4030 { "background", 0, 0, 'b' },
4031 { NULL, 0, 0, 0 }
4032 };
4033
4034 while (1) {
4035 int option_index = 0;
4036 c = getopt_long(argc, argv, "dhqvVSN" "a:c:g:s:C:E:D:F:Z:u:t:p:J:b",
4037 long_options, &option_index);
4038 if (c == -1) {
4039 break;
4040 }
4041
4042 switch (c) {
4043 case 0:
4044 fprintf(stderr, "option %s", long_options[option_index].name);
4045 if (optarg) {
4046 fprintf(stderr, " with arg %s\n", optarg);
4047 }
4048 break;
4049 case 'c':
4050 snprintf(client_unix_sock_path, PATH_MAX, "%s", optarg);
4051 break;
4052 case 'a':
4053 snprintf(apps_unix_sock_path, PATH_MAX, "%s", optarg);
4054 break;
4055 case 'd':
4056 opt_daemon = 1;
4057 break;
4058 case 'b':
4059 opt_background = 1;
4060 break;
4061 case 'g':
4062 tracing_group_name = optarg;
4063 break;
4064 case 'h':
4065 usage();
4066 exit(EXIT_FAILURE);
4067 case 'V':
4068 fprintf(stdout, "%s\n", VERSION);
4069 exit(EXIT_SUCCESS);
4070 case 'S':
4071 opt_sig_parent = 1;
4072 break;
4073 case 'E':
4074 snprintf(kconsumer_data.err_unix_sock_path, PATH_MAX, "%s", optarg);
4075 break;
4076 case 'C':
4077 snprintf(kconsumer_data.cmd_unix_sock_path, PATH_MAX, "%s", optarg);
4078 break;
4079 case 'F':
4080 snprintf(ustconsumer64_data.err_unix_sock_path, PATH_MAX, "%s", optarg);
4081 break;
4082 case 'D':
4083 snprintf(ustconsumer64_data.cmd_unix_sock_path, PATH_MAX, "%s", optarg);
4084 break;
4085 case 'H':
4086 snprintf(ustconsumer32_data.err_unix_sock_path, PATH_MAX, "%s", optarg);
4087 break;
4088 case 'G':
4089 snprintf(ustconsumer32_data.cmd_unix_sock_path, PATH_MAX, "%s", optarg);
4090 break;
4091 case 'N':
4092 opt_no_kernel = 1;
4093 break;
4094 case 'q':
4095 lttng_opt_quiet = 1;
4096 break;
4097 case 'v':
4098 /* Verbose level can increase using multiple -v */
4099 lttng_opt_verbose += 1;
4100 break;
4101 case 'Z':
4102 opt_verbose_consumer += 1;
4103 break;
4104 case 'u':
4105 consumerd32_bin= optarg;
4106 break;
4107 case 'U':
4108 consumerd32_libdir = optarg;
4109 break;
4110 case 't':
4111 consumerd64_bin = optarg;
4112 break;
4113 case 'T':
4114 consumerd64_libdir = optarg;
4115 break;
4116 case 'p':
4117 opt_pidfile = optarg;
4118 break;
4119 case 'J': /* JUL TCP port. */
4120 {
4121 unsigned long v;
4122
4123 errno = 0;
4124 v = strtoul(optarg, NULL, 0);
4125 if (errno != 0 || !isdigit(optarg[0])) {
4126 ERR("Wrong value in --jul-tcp-port parameter: %s", optarg);
4127 return -1;
4128 }
4129 if (v == 0 || v >= 65535) {
4130 ERR("Port overflow in --jul-tcp-port parameter: %s", optarg);
4131 return -1;
4132 }
4133 jul_tcp_port = (uint32_t) v;
4134 DBG3("JUL TCP port set to non default: %u", jul_tcp_port);
4135 break;
4136 }
4137 default:
4138 /* Unknown option or other error.
4139 * Error is printed by getopt, just return */
4140 return -1;
4141 }
4142 }
4143
4144 return 0;
4145 }
4146
4147 /*
4148 * Creates the two needed socket by the daemon.
4149 * apps_sock - The communication socket for all UST apps.
4150 * client_sock - The communication of the cli tool (lttng).
4151 */
4152 static int init_daemon_socket(void)
4153 {
4154 int ret = 0;
4155 mode_t old_umask;
4156
4157 old_umask = umask(0);
4158
4159 /* Create client tool unix socket */
4160 client_sock = lttcomm_create_unix_sock(client_unix_sock_path);
4161 if (client_sock < 0) {
4162 ERR("Create unix sock failed: %s", client_unix_sock_path);
4163 ret = -1;
4164 goto end;
4165 }
4166
4167 /* Set the cloexec flag */
4168 ret = utils_set_fd_cloexec(client_sock);
4169 if (ret < 0) {
4170 ERR("Unable to set CLOEXEC flag to the client Unix socket (fd: %d). "
4171 "Continuing but note that the consumer daemon will have a "
4172 "reference to this socket on exec()", client_sock);
4173 }
4174
4175 /* File permission MUST be 660 */
4176 ret = chmod(client_unix_sock_path, S_IRUSR | S_IWUSR | S_IRGRP | S_IWGRP);
4177 if (ret < 0) {
4178 ERR("Set file permissions failed: %s", client_unix_sock_path);
4179 PERROR("chmod");
4180 goto end;
4181 }
4182
4183 /* Create the application unix socket */
4184 apps_sock = lttcomm_create_unix_sock(apps_unix_sock_path);
4185 if (apps_sock < 0) {
4186 ERR("Create unix sock failed: %s", apps_unix_sock_path);
4187 ret = -1;
4188 goto end;
4189 }
4190
4191 /* Set the cloexec flag */
4192 ret = utils_set_fd_cloexec(apps_sock);
4193 if (ret < 0) {
4194 ERR("Unable to set CLOEXEC flag to the app Unix socket (fd: %d). "
4195 "Continuing but note that the consumer daemon will have a "
4196 "reference to this socket on exec()", apps_sock);
4197 }
4198
4199 /* File permission MUST be 666 */
4200 ret = chmod(apps_unix_sock_path,
4201 S_IRUSR | S_IWUSR | S_IRGRP | S_IWGRP | S_IROTH | S_IWOTH);
4202 if (ret < 0) {
4203 ERR("Set file permissions failed: %s", apps_unix_sock_path);
4204 PERROR("chmod");
4205 goto end;
4206 }
4207
4208 DBG3("Session daemon client socket %d and application socket %d created",
4209 client_sock, apps_sock);
4210
4211 end:
4212 umask(old_umask);
4213 return ret;
4214 }
4215
4216 /*
4217 * Check if the global socket is available, and if a daemon is answering at the
4218 * other side. If yes, error is returned.
4219 */
4220 static int check_existing_daemon(void)
4221 {
4222 /* Is there anybody out there ? */
4223 if (lttng_session_daemon_alive()) {
4224 return -EEXIST;
4225 }
4226
4227 return 0;
4228 }
4229
4230 /*
4231 * Set the tracing group gid onto the client socket.
4232 *
4233 * Race window between mkdir and chown is OK because we are going from more
4234 * permissive (root.root) to less permissive (root.tracing).
4235 */
4236 static int set_permissions(char *rundir)
4237 {
4238 int ret;
4239 gid_t gid;
4240
4241 gid = utils_get_group_id(tracing_group_name);
4242
4243 /* Set lttng run dir */
4244 ret = chown(rundir, 0, gid);
4245 if (ret < 0) {
4246 ERR("Unable to set group on %s", rundir);
4247 PERROR("chown");
4248 }
4249
4250 /*
4251 * Ensure all applications and tracing group can search the run
4252 * dir. Allow everyone to read the directory, since it does not
4253 * buy us anything to hide its content.
4254 */
4255 ret = chmod(rundir, S_IRWXU | S_IRGRP | S_IXGRP | S_IROTH | S_IXOTH);
4256 if (ret < 0) {
4257 ERR("Unable to set permissions on %s", rundir);
4258 PERROR("chmod");
4259 }
4260
4261 /* lttng client socket path */
4262 ret = chown(client_unix_sock_path, 0, gid);
4263 if (ret < 0) {
4264 ERR("Unable to set group on %s", client_unix_sock_path);
4265 PERROR("chown");
4266 }
4267
4268 /* kconsumer error socket path */
4269 ret = chown(kconsumer_data.err_unix_sock_path, 0, 0);
4270 if (ret < 0) {
4271 ERR("Unable to set group on %s", kconsumer_data.err_unix_sock_path);
4272 PERROR("chown");
4273 }
4274
4275 /* 64-bit ustconsumer error socket path */
4276 ret = chown(ustconsumer64_data.err_unix_sock_path, 0, 0);
4277 if (ret < 0) {
4278 ERR("Unable to set group on %s", ustconsumer64_data.err_unix_sock_path);
4279 PERROR("chown");
4280 }
4281
4282 /* 32-bit ustconsumer compat32 error socket path */
4283 ret = chown(ustconsumer32_data.err_unix_sock_path, 0, 0);
4284 if (ret < 0) {
4285 ERR("Unable to set group on %s", ustconsumer32_data.err_unix_sock_path);
4286 PERROR("chown");
4287 }
4288
4289 DBG("All permissions are set");
4290
4291 return ret;
4292 }
4293
4294 /*
4295 * Create the lttng run directory needed for all global sockets and pipe.
4296 */
4297 static int create_lttng_rundir(const char *rundir)
4298 {
4299 int ret;
4300
4301 DBG3("Creating LTTng run directory: %s", rundir);
4302
4303 ret = mkdir(rundir, S_IRWXU);
4304 if (ret < 0) {
4305 if (errno != EEXIST) {
4306 ERR("Unable to create %s", rundir);
4307 goto error;
4308 } else {
4309 ret = 0;
4310 }
4311 }
4312
4313 error:
4314 return ret;
4315 }
4316
4317 /*
4318 * Setup sockets and directory needed by the kconsumerd communication with the
4319 * session daemon.
4320 */
4321 static int set_consumer_sockets(struct consumer_data *consumer_data,
4322 const char *rundir)
4323 {
4324 int ret;
4325 char path[PATH_MAX];
4326
4327 switch (consumer_data->type) {
4328 case LTTNG_CONSUMER_KERNEL:
4329 snprintf(path, PATH_MAX, DEFAULT_KCONSUMERD_PATH, rundir);
4330 break;
4331 case LTTNG_CONSUMER64_UST:
4332 snprintf(path, PATH_MAX, DEFAULT_USTCONSUMERD64_PATH, rundir);
4333 break;
4334 case LTTNG_CONSUMER32_UST:
4335 snprintf(path, PATH_MAX, DEFAULT_USTCONSUMERD32_PATH, rundir);
4336 break;
4337 default:
4338 ERR("Consumer type unknown");
4339 ret = -EINVAL;
4340 goto error;
4341 }
4342
4343 DBG2("Creating consumer directory: %s", path);
4344
4345 ret = mkdir(path, S_IRWXU | S_IRGRP | S_IXGRP);
4346 if (ret < 0) {
4347 if (errno != EEXIST) {
4348 PERROR("mkdir");
4349 ERR("Failed to create %s", path);
4350 goto error;
4351 }
4352 ret = -1;
4353 }
4354 if (is_root) {
4355 ret = chown(path, 0, utils_get_group_id(tracing_group_name));
4356 if (ret < 0) {
4357 ERR("Unable to set group on %s", path);
4358 PERROR("chown");
4359 goto error;
4360 }
4361 }
4362
4363 /* Create the kconsumerd error unix socket */
4364 consumer_data->err_sock =
4365 lttcomm_create_unix_sock(consumer_data->err_unix_sock_path);
4366 if (consumer_data->err_sock < 0) {
4367 ERR("Create unix sock failed: %s", consumer_data->err_unix_sock_path);
4368 ret = -1;
4369 goto error;
4370 }
4371
4372 /*
4373 * Set the CLOEXEC flag. Return code is useless because either way, the
4374 * show must go on.
4375 */
4376 ret = utils_set_fd_cloexec(consumer_data->err_sock);
4377 if (ret < 0) {
4378 PERROR("utils_set_fd_cloexec");
4379 /* continue anyway */
4380 }
4381
4382 /* File permission MUST be 660 */
4383 ret = chmod(consumer_data->err_unix_sock_path,
4384 S_IRUSR | S_IWUSR | S_IRGRP | S_IWGRP);
4385 if (ret < 0) {
4386 ERR("Set file permissions failed: %s", consumer_data->err_unix_sock_path);
4387 PERROR("chmod");
4388 goto error;
4389 }
4390
4391 error:
4392 return ret;
4393 }
4394
4395 /*
4396 * Signal handler for the daemon
4397 *
4398 * Simply stop all worker threads, leaving main() return gracefully after
4399 * joining all threads and calling cleanup().
4400 */
4401 static void sighandler(int sig)
4402 {
4403 switch (sig) {
4404 case SIGPIPE:
4405 DBG("SIGPIPE caught");
4406 return;
4407 case SIGINT:
4408 DBG("SIGINT caught");
4409 stop_threads();
4410 break;
4411 case SIGTERM:
4412 DBG("SIGTERM caught");
4413 stop_threads();
4414 break;
4415 case SIGUSR1:
4416 CMM_STORE_SHARED(recv_child_signal, 1);
4417 break;
4418 default:
4419 break;
4420 }
4421 }
4422
4423 /*
4424 * Setup signal handler for :
4425 * SIGINT, SIGTERM, SIGPIPE
4426 */
4427 static int set_signal_handler(void)
4428 {
4429 int ret = 0;
4430 struct sigaction sa;
4431 sigset_t sigset;
4432
4433 if ((ret = sigemptyset(&sigset)) < 0) {
4434 PERROR("sigemptyset");
4435 return ret;
4436 }
4437
4438 sa.sa_handler = sighandler;
4439 sa.sa_mask = sigset;
4440 sa.sa_flags = 0;
4441 if ((ret = sigaction(SIGTERM, &sa, NULL)) < 0) {
4442 PERROR("sigaction");
4443 return ret;
4444 }
4445
4446 if ((ret = sigaction(SIGINT, &sa, NULL)) < 0) {
4447 PERROR("sigaction");
4448 return ret;
4449 }
4450
4451 if ((ret = sigaction(SIGPIPE, &sa, NULL)) < 0) {
4452 PERROR("sigaction");
4453 return ret;
4454 }
4455
4456 if ((ret = sigaction(SIGUSR1, &sa, NULL)) < 0) {
4457 PERROR("sigaction");
4458 return ret;
4459 }
4460
4461 DBG("Signal handler set for SIGTERM, SIGUSR1, SIGPIPE and SIGINT");
4462
4463 return ret;
4464 }
4465
4466 /*
4467 * Set open files limit to unlimited. This daemon can open a large number of
4468 * file descriptors in order to consumer multiple kernel traces.
4469 */
4470 static void set_ulimit(void)
4471 {
4472 int ret;
4473 struct rlimit lim;
4474
4475 /* The kernel does not allowed an infinite limit for open files */
4476 lim.rlim_cur = 65535;
4477 lim.rlim_max = 65535;
4478
4479 ret = setrlimit(RLIMIT_NOFILE, &lim);
4480 if (ret < 0) {
4481 PERROR("failed to set open files limit");
4482 }
4483 }
4484
4485 /*
4486 * Write pidfile using the rundir and opt_pidfile.
4487 */
4488 static void write_pidfile(void)
4489 {
4490 int ret;
4491 char pidfile_path[PATH_MAX];
4492
4493 assert(rundir);
4494
4495 if (opt_pidfile) {
4496 strncpy(pidfile_path, opt_pidfile, sizeof(pidfile_path));
4497 } else {
4498 /* Build pidfile path from rundir and opt_pidfile. */
4499 ret = snprintf(pidfile_path, sizeof(pidfile_path), "%s/"
4500 DEFAULT_LTTNG_SESSIOND_PIDFILE, rundir);
4501 if (ret < 0) {
4502 PERROR("snprintf pidfile path");
4503 goto error;
4504 }
4505 }
4506
4507 /*
4508 * Create pid file in rundir. Return value is of no importance. The
4509 * execution will continue even though we are not able to write the file.
4510 */
4511 (void) utils_create_pid_file(getpid(), pidfile_path);
4512
4513 error:
4514 return;
4515 }
4516
4517 /*
4518 * Write JUL TCP port using the rundir.
4519 */
4520 static void write_julport(void)
4521 {
4522 int ret;
4523 char path[PATH_MAX];
4524
4525 assert(rundir);
4526
4527 ret = snprintf(path, sizeof(path), "%s/"
4528 DEFAULT_LTTNG_SESSIOND_JULPORT_FILE, rundir);
4529 if (ret < 0) {
4530 PERROR("snprintf julport path");
4531 goto error;
4532 }
4533
4534 /*
4535 * Create TCP JUL port file in rundir. Return value is of no importance.
4536 * The execution will continue even though we are not able to write the
4537 * file.
4538 */
4539 (void) utils_create_pid_file(jul_tcp_port, path);
4540
4541 error:
4542 return;
4543 }
4544
4545 /*
4546 * main
4547 */
4548 int main(int argc, char **argv)
4549 {
4550 int ret = 0;
4551 void *status;
4552 const char *home_path, *env_app_timeout;
4553
4554 init_kernel_workarounds();
4555
4556 rcu_register_thread();
4557
4558 if ((ret = set_signal_handler()) < 0) {
4559 goto error;
4560 }
4561
4562 setup_consumerd_path();
4563
4564 page_size = sysconf(_SC_PAGESIZE);
4565 if (page_size < 0) {
4566 PERROR("sysconf _SC_PAGESIZE");
4567 page_size = LONG_MAX;
4568 WARN("Fallback page size to %ld", page_size);
4569 }
4570
4571 /* Parse arguments */
4572 progname = argv[0];
4573 if ((ret = parse_args(argc, argv)) < 0) {
4574 goto error;
4575 }
4576
4577 /* Daemonize */
4578 if (opt_daemon || opt_background) {
4579 int i;
4580
4581 ret = lttng_daemonize(&child_ppid, &recv_child_signal,
4582 !opt_background);
4583 if (ret < 0) {
4584 goto error;
4585 }
4586
4587 /*
4588 * We are in the child. Make sure all other file descriptors are
4589 * closed, in case we are called with more opened file descriptors than
4590 * the standard ones.
4591 */
4592 for (i = 3; i < sysconf(_SC_OPEN_MAX); i++) {
4593 (void) close(i);
4594 }
4595 }
4596
4597 /* Create thread quit pipe */
4598 if ((ret = init_thread_quit_pipe()) < 0) {
4599 goto error;
4600 }
4601
4602 /* Check if daemon is UID = 0 */
4603 is_root = !getuid();
4604
4605 if (is_root) {
4606 rundir = strdup(DEFAULT_LTTNG_RUNDIR);
4607
4608 /* Create global run dir with root access */
4609 ret = create_lttng_rundir(rundir);
4610 if (ret < 0) {
4611 goto error;
4612 }
4613
4614 if (strlen(apps_unix_sock_path) == 0) {
4615 snprintf(apps_unix_sock_path, PATH_MAX,
4616 DEFAULT_GLOBAL_APPS_UNIX_SOCK);
4617 }
4618
4619 if (strlen(client_unix_sock_path) == 0) {
4620 snprintf(client_unix_sock_path, PATH_MAX,
4621 DEFAULT_GLOBAL_CLIENT_UNIX_SOCK);
4622 }
4623
4624 /* Set global SHM for ust */
4625 if (strlen(wait_shm_path) == 0) {
4626 snprintf(wait_shm_path, PATH_MAX,
4627 DEFAULT_GLOBAL_APPS_WAIT_SHM_PATH);
4628 }
4629
4630 if (strlen(health_unix_sock_path) == 0) {
4631 snprintf(health_unix_sock_path, sizeof(health_unix_sock_path),
4632 DEFAULT_GLOBAL_HEALTH_UNIX_SOCK);
4633 }
4634
4635 /* Setup kernel consumerd path */
4636 snprintf(kconsumer_data.err_unix_sock_path, PATH_MAX,
4637 DEFAULT_KCONSUMERD_ERR_SOCK_PATH, rundir);
4638 snprintf(kconsumer_data.cmd_unix_sock_path, PATH_MAX,
4639 DEFAULT_KCONSUMERD_CMD_SOCK_PATH, rundir);
4640
4641 DBG2("Kernel consumer err path: %s",
4642 kconsumer_data.err_unix_sock_path);
4643 DBG2("Kernel consumer cmd path: %s",
4644 kconsumer_data.cmd_unix_sock_path);
4645 } else {
4646 home_path = utils_get_home_dir();
4647 if (home_path == NULL) {
4648 /* TODO: Add --socket PATH option */
4649 ERR("Can't get HOME directory for sockets creation.");
4650 ret = -EPERM;
4651 goto error;
4652 }
4653
4654 /*
4655 * Create rundir from home path. This will create something like
4656 * $HOME/.lttng
4657 */
4658 ret = asprintf(&rundir, DEFAULT_LTTNG_HOME_RUNDIR, home_path);
4659 if (ret < 0) {
4660 ret = -ENOMEM;
4661 goto error;
4662 }
4663
4664 ret = create_lttng_rundir(rundir);
4665 if (ret < 0) {
4666 goto error;
4667 }
4668
4669 if (strlen(apps_unix_sock_path) == 0) {
4670 snprintf(apps_unix_sock_path, PATH_MAX,
4671 DEFAULT_HOME_APPS_UNIX_SOCK, home_path);
4672 }
4673
4674 /* Set the cli tool unix socket path */
4675 if (strlen(client_unix_sock_path) == 0) {
4676 snprintf(client_unix_sock_path, PATH_MAX,
4677 DEFAULT_HOME_CLIENT_UNIX_SOCK, home_path);
4678 }
4679
4680 /* Set global SHM for ust */
4681 if (strlen(wait_shm_path) == 0) {
4682 snprintf(wait_shm_path, PATH_MAX,
4683 DEFAULT_HOME_APPS_WAIT_SHM_PATH, getuid());
4684 }
4685
4686 /* Set health check Unix path */
4687 if (strlen(health_unix_sock_path) == 0) {
4688 snprintf(health_unix_sock_path, sizeof(health_unix_sock_path),
4689 DEFAULT_HOME_HEALTH_UNIX_SOCK, home_path);
4690 }
4691 }
4692
4693 /* Set consumer initial state */
4694 kernel_consumerd_state = CONSUMER_STOPPED;
4695 ust_consumerd_state = CONSUMER_STOPPED;
4696
4697 DBG("Client socket path %s", client_unix_sock_path);
4698 DBG("Application socket path %s", apps_unix_sock_path);
4699 DBG("Application wait path %s", wait_shm_path);
4700 DBG("LTTng run directory path: %s", rundir);
4701
4702 /* 32 bits consumerd path setup */
4703 snprintf(ustconsumer32_data.err_unix_sock_path, PATH_MAX,
4704 DEFAULT_USTCONSUMERD32_ERR_SOCK_PATH, rundir);
4705 snprintf(ustconsumer32_data.cmd_unix_sock_path, PATH_MAX,
4706 DEFAULT_USTCONSUMERD32_CMD_SOCK_PATH, rundir);
4707
4708 DBG2("UST consumer 32 bits err path: %s",
4709 ustconsumer32_data.err_unix_sock_path);
4710 DBG2("UST consumer 32 bits cmd path: %s",
4711 ustconsumer32_data.cmd_unix_sock_path);
4712
4713 /* 64 bits consumerd path setup */
4714 snprintf(ustconsumer64_data.err_unix_sock_path, PATH_MAX,
4715 DEFAULT_USTCONSUMERD64_ERR_SOCK_PATH, rundir);
4716 snprintf(ustconsumer64_data.cmd_unix_sock_path, PATH_MAX,
4717 DEFAULT_USTCONSUMERD64_CMD_SOCK_PATH, rundir);
4718
4719 DBG2("UST consumer 64 bits err path: %s",
4720 ustconsumer64_data.err_unix_sock_path);
4721 DBG2("UST consumer 64 bits cmd path: %s",
4722 ustconsumer64_data.cmd_unix_sock_path);
4723
4724 /*
4725 * See if daemon already exist.
4726 */
4727 if ((ret = check_existing_daemon()) < 0) {
4728 ERR("Already running daemon.\n");
4729 /*
4730 * We do not goto exit because we must not cleanup()
4731 * because a daemon is already running.
4732 */
4733 goto error;
4734 }
4735
4736 /*
4737 * Init UST app hash table. Alloc hash table before this point since
4738 * cleanup() can get called after that point.
4739 */
4740 ust_app_ht_alloc();
4741
4742 /* Initialize JUL domain subsystem. */
4743 if ((ret = jul_init()) < 0) {
4744 /* ENOMEM at this point. */
4745 goto error;
4746 }
4747
4748 /* After this point, we can safely call cleanup() with "goto exit" */
4749
4750 /*
4751 * These actions must be executed as root. We do that *after* setting up
4752 * the sockets path because we MUST make the check for another daemon using
4753 * those paths *before* trying to set the kernel consumer sockets and init
4754 * kernel tracer.
4755 */
4756 if (is_root) {
4757 ret = set_consumer_sockets(&kconsumer_data, rundir);
4758 if (ret < 0) {
4759 goto exit;
4760 }
4761
4762 /* Setup kernel tracer */
4763 if (!opt_no_kernel) {
4764 init_kernel_tracer();
4765 }
4766
4767 /* Set ulimit for open files */
4768 set_ulimit();
4769 }
4770 /* init lttng_fd tracking must be done after set_ulimit. */
4771 lttng_fd_init();
4772
4773 ret = set_consumer_sockets(&ustconsumer64_data, rundir);
4774 if (ret < 0) {
4775 goto exit;
4776 }
4777
4778 ret = set_consumer_sockets(&ustconsumer32_data, rundir);
4779 if (ret < 0) {
4780 goto exit;
4781 }
4782
4783 /* Setup the needed unix socket */
4784 if ((ret = init_daemon_socket()) < 0) {
4785 goto exit;
4786 }
4787
4788 /* Set credentials to socket */
4789 if (is_root && ((ret = set_permissions(rundir)) < 0)) {
4790 goto exit;
4791 }
4792
4793 /* Get parent pid if -S, --sig-parent is specified. */
4794 if (opt_sig_parent) {
4795 ppid = getppid();
4796 }
4797
4798 /* Setup the kernel pipe for waking up the kernel thread */
4799 if (is_root && !opt_no_kernel) {
4800 if ((ret = utils_create_pipe_cloexec(kernel_poll_pipe)) < 0) {
4801 goto exit;
4802 }
4803 }
4804
4805 /* Setup the thread ht_cleanup communication pipe. */
4806 if (utils_create_pipe_cloexec(ht_cleanup_pipe) < 0) {
4807 goto exit;
4808 }
4809
4810 /* Setup the thread apps communication pipe. */
4811 if ((ret = utils_create_pipe_cloexec(apps_cmd_pipe)) < 0) {
4812 goto exit;
4813 }
4814
4815 /* Setup the thread apps notify communication pipe. */
4816 if (utils_create_pipe_cloexec(apps_cmd_notify_pipe) < 0) {
4817 goto exit;
4818 }
4819
4820 /* Initialize global buffer per UID and PID registry. */
4821 buffer_reg_init_uid_registry();
4822 buffer_reg_init_pid_registry();
4823
4824 /* Init UST command queue. */
4825 cds_wfq_init(&ust_cmd_queue.queue);
4826
4827 /*
4828 * Get session list pointer. This pointer MUST NOT be free(). This list is
4829 * statically declared in session.c
4830 */
4831 session_list_ptr = session_get_list();
4832
4833 /* Set up max poll set size */
4834 lttng_poll_set_max_size();
4835
4836 cmd_init();
4837
4838 /* Check for the application socket timeout env variable. */
4839 env_app_timeout = getenv(DEFAULT_APP_SOCKET_TIMEOUT_ENV);
4840 if (env_app_timeout) {
4841 app_socket_timeout = atoi(env_app_timeout);
4842 } else {
4843 app_socket_timeout = DEFAULT_APP_SOCKET_RW_TIMEOUT;
4844 }
4845
4846 write_pidfile();
4847 write_julport();
4848
4849 /* Initialize communication library */
4850 lttcomm_init();
4851 /* This is to get the TCP timeout value. */
4852 lttcomm_inet_init();
4853
4854 /*
4855 * Initialize the health check subsystem. This call should set the
4856 * appropriate time values.
4857 */
4858 health_sessiond = health_app_create(NR_HEALTH_SESSIOND_TYPES);
4859 if (!health_sessiond) {
4860 PERROR("health_app_create error");
4861 goto exit_health_sessiond_cleanup;
4862 }
4863
4864 /* Create thread to clean up RCU hash tables */
4865 ret = pthread_create(&ht_cleanup_thread, NULL,
4866 thread_ht_cleanup, (void *) NULL);
4867 if (ret != 0) {
4868 PERROR("pthread_create ht_cleanup");
4869 goto exit_ht_cleanup;
4870 }
4871
4872 /* Create health-check thread */
4873 ret = pthread_create(&health_thread, NULL,
4874 thread_manage_health, (void *) NULL);
4875 if (ret != 0) {
4876 PERROR("pthread_create health");
4877 goto exit_health;
4878 }
4879
4880 /* Create thread to manage the client socket */
4881 ret = pthread_create(&client_thread, NULL,
4882 thread_manage_clients, (void *) NULL);
4883 if (ret != 0) {
4884 PERROR("pthread_create clients");
4885 goto exit_client;
4886 }
4887
4888 /* Create thread to dispatch registration */
4889 ret = pthread_create(&dispatch_thread, NULL,
4890 thread_dispatch_ust_registration, (void *) NULL);
4891 if (ret != 0) {
4892 PERROR("pthread_create dispatch");
4893 goto exit_dispatch;
4894 }
4895
4896 /* Create thread to manage application registration. */
4897 ret = pthread_create(&reg_apps_thread, NULL,
4898 thread_registration_apps, (void *) NULL);
4899 if (ret != 0) {
4900 PERROR("pthread_create registration");
4901 goto exit_reg_apps;
4902 }
4903
4904 /* Create thread to manage application socket */
4905 ret = pthread_create(&apps_thread, NULL,
4906 thread_manage_apps, (void *) NULL);
4907 if (ret != 0) {
4908 PERROR("pthread_create apps");
4909 goto exit_apps;
4910 }
4911
4912 /* Create thread to manage application notify socket */
4913 ret = pthread_create(&apps_notify_thread, NULL,
4914 ust_thread_manage_notify, (void *) NULL);
4915 if (ret != 0) {
4916 PERROR("pthread_create apps");
4917 goto exit_apps_notify;
4918 }
4919
4920 /* Create JUL registration thread. */
4921 ret = pthread_create(&jul_reg_thread, NULL,
4922 jul_thread_manage_registration, (void *) NULL);
4923 if (ret != 0) {
4924 PERROR("pthread_create apps");
4925 goto exit_jul_reg;
4926 }
4927
4928 /* Don't start this thread if kernel tracing is not requested nor root */
4929 if (is_root && !opt_no_kernel) {
4930 /* Create kernel thread to manage kernel event */
4931 ret = pthread_create(&kernel_thread, NULL,
4932 thread_manage_kernel, (void *) NULL);
4933 if (ret != 0) {
4934 PERROR("pthread_create kernel");
4935 goto exit_kernel;
4936 }
4937
4938 ret = pthread_join(kernel_thread, &status);
4939 if (ret != 0) {
4940 PERROR("pthread_join");
4941 goto error; /* join error, exit without cleanup */
4942 }
4943 }
4944
4945 exit_kernel:
4946 ret = pthread_join(jul_reg_thread, &status);
4947 if (ret != 0) {
4948 PERROR("pthread_join JUL");
4949 goto error; /* join error, exit without cleanup */
4950 }
4951
4952 exit_jul_reg:
4953 ret = pthread_join(apps_notify_thread, &status);
4954 if (ret != 0) {
4955 PERROR("pthread_join apps notify");
4956 goto error; /* join error, exit without cleanup */
4957 }
4958
4959 exit_apps_notify:
4960 ret = pthread_join(apps_thread, &status);
4961 if (ret != 0) {
4962 PERROR("pthread_join apps");
4963 goto error; /* join error, exit without cleanup */
4964 }
4965
4966
4967 exit_apps:
4968 ret = pthread_join(reg_apps_thread, &status);
4969 if (ret != 0) {
4970 PERROR("pthread_join");
4971 goto error; /* join error, exit without cleanup */
4972 }
4973
4974 exit_reg_apps:
4975 ret = pthread_join(dispatch_thread, &status);
4976 if (ret != 0) {
4977 PERROR("pthread_join");
4978 goto error; /* join error, exit without cleanup */
4979 }
4980
4981 exit_dispatch:
4982 ret = pthread_join(client_thread, &status);
4983 if (ret != 0) {
4984 PERROR("pthread_join");
4985 goto error; /* join error, exit without cleanup */
4986 }
4987
4988 ret = join_consumer_thread(&kconsumer_data);
4989 if (ret != 0) {
4990 PERROR("join_consumer");
4991 goto error; /* join error, exit without cleanup */
4992 }
4993
4994 ret = join_consumer_thread(&ustconsumer32_data);
4995 if (ret != 0) {
4996 PERROR("join_consumer ust32");
4997 goto error; /* join error, exit without cleanup */
4998 }
4999
5000 ret = join_consumer_thread(&ustconsumer64_data);
5001 if (ret != 0) {
5002 PERROR("join_consumer ust64");
5003 goto error; /* join error, exit without cleanup */
5004 }
5005
5006 exit_client:
5007 ret = pthread_join(health_thread, &status);
5008 if (ret != 0) {
5009 PERROR("pthread_join health thread");
5010 goto error; /* join error, exit without cleanup */
5011 }
5012
5013 exit_health:
5014 ret = pthread_join(ht_cleanup_thread, &status);
5015 if (ret != 0) {
5016 PERROR("pthread_join ht cleanup thread");
5017 goto error; /* join error, exit without cleanup */
5018 }
5019 exit_ht_cleanup:
5020 health_app_destroy(health_sessiond);
5021 exit_health_sessiond_cleanup:
5022 exit:
5023 /*
5024 * cleanup() is called when no other thread is running.
5025 */
5026 rcu_thread_online();
5027 cleanup();
5028 rcu_thread_offline();
5029 rcu_unregister_thread();
5030 if (!ret) {
5031 exit(EXIT_SUCCESS);
5032 }
5033 error:
5034 exit(EXIT_FAILURE);
5035 }
This page took 0.129375 seconds and 5 git commands to generate.