Clean-up: move global sessiond symbols out of main.o
[lttng-tools.git] / src / bin / lttng-sessiond / main.c
1 /*
2 * Copyright (C) 2011 - David Goulet <david.goulet@polymtl.ca>
3 * Mathieu Desnoyers <mathieu.desnoyers@efficios.com>
4 * 2013 - Jérémie Galarneau <jeremie.galarneau@efficios.com>
5 *
6 * This program is free software; you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License, version 2 only,
8 * as published by the Free Software Foundation.
9 *
10 * This program is distributed in the hope that it will be useful,
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 * GNU General Public License for more details.
14 *
15 * You should have received a copy of the GNU General Public License along
16 * with this program; if not, write to the Free Software Foundation, Inc.,
17 * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
18 */
19
20 #define _LGPL_SOURCE
21 #include <getopt.h>
22 #include <grp.h>
23 #include <limits.h>
24 #include <paths.h>
25 #include <pthread.h>
26 #include <signal.h>
27 #include <stdio.h>
28 #include <stdlib.h>
29 #include <string.h>
30 #include <inttypes.h>
31 #include <sys/mman.h>
32 #include <sys/mount.h>
33 #include <sys/resource.h>
34 #include <sys/socket.h>
35 #include <sys/stat.h>
36 #include <sys/types.h>
37 #include <sys/wait.h>
38 #include <urcu/uatomic.h>
39 #include <unistd.h>
40 #include <ctype.h>
41
42 #include <common/common.h>
43 #include <common/compat/socket.h>
44 #include <common/compat/getenv.h>
45 #include <common/defaults.h>
46 #include <common/kernel-consumer/kernel-consumer.h>
47 #include <common/futex.h>
48 #include <common/relayd/relayd.h>
49 #include <common/utils.h>
50 #include <common/daemonize.h>
51 #include <common/config/session-config.h>
52 #include <common/dynamic-buffer.h>
53 #include <lttng/userspace-probe-internal.h>
54 #include <lttng/event-internal.h>
55
56 #include "lttng-sessiond.h"
57 #include "buffer-registry.h"
58 #include "channel.h"
59 #include "cmd.h"
60 #include "consumer.h"
61 #include "context.h"
62 #include "event.h"
63 #include "kernel.h"
64 #include "kernel-consumer.h"
65 #include "modprobe.h"
66 #include "shm.h"
67 #include "ust-ctl.h"
68 #include "ust-consumer.h"
69 #include "utils.h"
70 #include "fd-limit.h"
71 #include "health-sessiond.h"
72 #include "testpoint.h"
73 #include "ust-thread.h"
74 #include "agent-thread.h"
75 #include "save.h"
76 #include "load-session-thread.h"
77 #include "notification-thread.h"
78 #include "notification-thread-commands.h"
79 #include "rotation-thread.h"
80 #include "lttng-syscall.h"
81 #include "agent.h"
82 #include "ht-cleanup.h"
83 #include "sessiond-config.h"
84 #include "timer.h"
85
86 static const char *help_msg =
87 #ifdef LTTNG_EMBED_HELP
88 #include <lttng-sessiond.8.h>
89 #else
90 NULL
91 #endif
92 ;
93
94 const char *progname;
95 static int lockfile_fd = -1;
96
97 /* Set to 1 when a SIGUSR1 signal is received. */
98 static int recv_child_signal;
99
100 static struct lttng_kernel_tracer_version kernel_tracer_version;
101 static struct lttng_kernel_tracer_abi_version kernel_tracer_abi_version;
102
103 /*
104 * Consumer daemon specific control data. Every value not initialized here is
105 * set to 0 by the static definition.
106 */
107 static struct consumer_data kconsumer_data = {
108 .type = LTTNG_CONSUMER_KERNEL,
109 .err_sock = -1,
110 .cmd_sock = -1,
111 .channel_monitor_pipe = -1,
112 .pid_mutex = PTHREAD_MUTEX_INITIALIZER,
113 .lock = PTHREAD_MUTEX_INITIALIZER,
114 .cond = PTHREAD_COND_INITIALIZER,
115 .cond_mutex = PTHREAD_MUTEX_INITIALIZER,
116 };
117 static struct consumer_data ustconsumer64_data = {
118 .type = LTTNG_CONSUMER64_UST,
119 .err_sock = -1,
120 .cmd_sock = -1,
121 .channel_monitor_pipe = -1,
122 .pid_mutex = PTHREAD_MUTEX_INITIALIZER,
123 .lock = PTHREAD_MUTEX_INITIALIZER,
124 .cond = PTHREAD_COND_INITIALIZER,
125 .cond_mutex = PTHREAD_MUTEX_INITIALIZER,
126 };
127 static struct consumer_data ustconsumer32_data = {
128 .type = LTTNG_CONSUMER32_UST,
129 .err_sock = -1,
130 .cmd_sock = -1,
131 .channel_monitor_pipe = -1,
132 .pid_mutex = PTHREAD_MUTEX_INITIALIZER,
133 .lock = PTHREAD_MUTEX_INITIALIZER,
134 .cond = PTHREAD_COND_INITIALIZER,
135 .cond_mutex = PTHREAD_MUTEX_INITIALIZER,
136 };
137
138 /* Command line options */
139 static const struct option long_options[] = {
140 { "client-sock", required_argument, 0, 'c' },
141 { "apps-sock", required_argument, 0, 'a' },
142 { "kconsumerd-cmd-sock", required_argument, 0, '\0' },
143 { "kconsumerd-err-sock", required_argument, 0, '\0' },
144 { "ustconsumerd32-cmd-sock", required_argument, 0, '\0' },
145 { "ustconsumerd32-err-sock", required_argument, 0, '\0' },
146 { "ustconsumerd64-cmd-sock", required_argument, 0, '\0' },
147 { "ustconsumerd64-err-sock", required_argument, 0, '\0' },
148 { "consumerd32-path", required_argument, 0, '\0' },
149 { "consumerd32-libdir", required_argument, 0, '\0' },
150 { "consumerd64-path", required_argument, 0, '\0' },
151 { "consumerd64-libdir", required_argument, 0, '\0' },
152 { "daemonize", no_argument, 0, 'd' },
153 { "background", no_argument, 0, 'b' },
154 { "sig-parent", no_argument, 0, 'S' },
155 { "help", no_argument, 0, 'h' },
156 { "group", required_argument, 0, 'g' },
157 { "version", no_argument, 0, 'V' },
158 { "quiet", no_argument, 0, 'q' },
159 { "verbose", no_argument, 0, 'v' },
160 { "verbose-consumer", no_argument, 0, '\0' },
161 { "no-kernel", no_argument, 0, '\0' },
162 { "pidfile", required_argument, 0, 'p' },
163 { "agent-tcp-port", required_argument, 0, '\0' },
164 { "config", required_argument, 0, 'f' },
165 { "load", required_argument, 0, 'l' },
166 { "kmod-probes", required_argument, 0, '\0' },
167 { "extra-kmod-probes", required_argument, 0, '\0' },
168 { NULL, 0, 0, 0 }
169 };
170
171 /* Command line options to ignore from configuration file */
172 static const char *config_ignore_options[] = { "help", "version", "config" };
173
174 /* Shared between threads */
175 static int dispatch_thread_exit;
176
177 /* Sockets and FDs */
178 static int client_sock = -1;
179 static int apps_sock = -1;
180 static int kernel_poll_pipe[2] = { -1, -1 };
181
182 /*
183 * This pipe is used to inform the thread managing application communication
184 * that a command is queued and ready to be processed.
185 */
186 static int apps_cmd_pipe[2] = { -1, -1 };
187
188 /* Pthread, Mutexes and Semaphores */
189 static pthread_t apps_thread;
190 static pthread_t apps_notify_thread;
191 static pthread_t reg_apps_thread;
192 static pthread_t client_thread;
193 static pthread_t kernel_thread;
194 static pthread_t dispatch_thread;
195 static pthread_t health_thread;
196 static pthread_t ht_cleanup_thread;
197 static pthread_t agent_reg_thread;
198 static pthread_t load_session_thread;
199 static pthread_t notification_thread;
200 static pthread_t rotation_thread;
201 static pthread_t timer_thread;
202
203 /*
204 * UST registration command queue. This queue is tied with a futex and uses a N
205 * wakers / 1 waiter implemented and detailed in futex.c/.h
206 *
207 * The thread_registration_apps and thread_dispatch_ust_registration uses this
208 * queue along with the wait/wake scheme. The thread_manage_apps receives down
209 * the line new application socket and monitors it for any I/O error or clean
210 * close that triggers an unregistration of the application.
211 */
212 static struct ust_cmd_queue ust_cmd_queue;
213
214 /*
215 * Pointer initialized before thread creation.
216 *
217 * This points to the tracing session list containing the session count and a
218 * mutex lock. The lock MUST be taken if you iterate over the list. The lock
219 * MUST NOT be taken if you call a public function in session.c.
220 *
221 * The lock is nested inside the structure: session_list_ptr->lock. Please use
222 * session_lock_list and session_unlock_list for lock acquisition.
223 */
224 static struct ltt_session_list *session_list_ptr;
225
226 static const char *module_proc_lttng = "/proc/lttng";
227
228 /*
229 * Consumer daemon state which is changed when spawning it, killing it or in
230 * case of a fatal error.
231 */
232 enum consumerd_state {
233 CONSUMER_STARTED = 1,
234 CONSUMER_STOPPED = 2,
235 CONSUMER_ERROR = 3,
236 };
237
238 /*
239 * This consumer daemon state is used to validate if a client command will be
240 * able to reach the consumer. If not, the client is informed. For instance,
241 * doing a "lttng start" when the consumer state is set to ERROR will return an
242 * error to the client.
243 *
244 * The following example shows a possible race condition of this scheme:
245 *
246 * consumer thread error happens
247 * client cmd arrives
248 * client cmd checks state -> still OK
249 * consumer thread exit, sets error
250 * client cmd try to talk to consumer
251 * ...
252 *
253 * However, since the consumer is a different daemon, we have no way of making
254 * sure the command will reach it safely even with this state flag. This is why
255 * we consider that up to the state validation during command processing, the
256 * command is safe. After that, we can not guarantee the correctness of the
257 * client request vis-a-vis the consumer.
258 */
259 static enum consumerd_state ust_consumerd_state;
260 static enum consumerd_state kernel_consumerd_state;
261
262 /* Load session thread information to operate. */
263 static struct load_session_thread_data *load_info;
264
265 /*
266 * Section name to look for in the daemon configuration file.
267 */
268 static const char * const config_section_name = "sessiond";
269
270 /* Am I root or not. Set to 1 if the daemon is running as root */
271 static int is_root;
272
273 /* Rotation thread handle. */
274 static struct rotation_thread_handle *rotation_thread_handle;
275
276 /*
277 * Stop all threads by closing the thread quit pipe.
278 */
279 static void stop_threads(void)
280 {
281 int ret;
282
283 /* Stopping all threads */
284 DBG("Terminating all threads");
285 ret = sessiond_notify_quit_pipe();
286 if (ret < 0) {
287 ERR("write error on thread quit pipe");
288 }
289
290 /* Dispatch thread */
291 CMM_STORE_SHARED(dispatch_thread_exit, 1);
292 futex_nto1_wake(&ust_cmd_queue.futex);
293 }
294
295 /*
296 * Close every consumer sockets.
297 */
298 static void close_consumer_sockets(void)
299 {
300 int ret;
301
302 if (kconsumer_data.err_sock >= 0) {
303 ret = close(kconsumer_data.err_sock);
304 if (ret < 0) {
305 PERROR("kernel consumer err_sock close");
306 }
307 }
308 if (ustconsumer32_data.err_sock >= 0) {
309 ret = close(ustconsumer32_data.err_sock);
310 if (ret < 0) {
311 PERROR("UST consumerd32 err_sock close");
312 }
313 }
314 if (ustconsumer64_data.err_sock >= 0) {
315 ret = close(ustconsumer64_data.err_sock);
316 if (ret < 0) {
317 PERROR("UST consumerd64 err_sock close");
318 }
319 }
320 if (kconsumer_data.cmd_sock >= 0) {
321 ret = close(kconsumer_data.cmd_sock);
322 if (ret < 0) {
323 PERROR("kernel consumer cmd_sock close");
324 }
325 }
326 if (ustconsumer32_data.cmd_sock >= 0) {
327 ret = close(ustconsumer32_data.cmd_sock);
328 if (ret < 0) {
329 PERROR("UST consumerd32 cmd_sock close");
330 }
331 }
332 if (ustconsumer64_data.cmd_sock >= 0) {
333 ret = close(ustconsumer64_data.cmd_sock);
334 if (ret < 0) {
335 PERROR("UST consumerd64 cmd_sock close");
336 }
337 }
338 if (kconsumer_data.channel_monitor_pipe >= 0) {
339 ret = close(kconsumer_data.channel_monitor_pipe);
340 if (ret < 0) {
341 PERROR("kernel consumer channel monitor pipe close");
342 }
343 }
344 if (ustconsumer32_data.channel_monitor_pipe >= 0) {
345 ret = close(ustconsumer32_data.channel_monitor_pipe);
346 if (ret < 0) {
347 PERROR("UST consumerd32 channel monitor pipe close");
348 }
349 }
350 if (ustconsumer64_data.channel_monitor_pipe >= 0) {
351 ret = close(ustconsumer64_data.channel_monitor_pipe);
352 if (ret < 0) {
353 PERROR("UST consumerd64 channel monitor pipe close");
354 }
355 }
356 }
357
358 /*
359 * Wait on consumer process termination.
360 *
361 * Need to be called with the consumer data lock held or from a context
362 * ensuring no concurrent access to data (e.g: cleanup).
363 */
364 static void wait_consumer(struct consumer_data *consumer_data)
365 {
366 pid_t ret;
367 int status;
368
369 if (consumer_data->pid <= 0) {
370 return;
371 }
372
373 DBG("Waiting for complete teardown of consumerd (PID: %d)",
374 consumer_data->pid);
375 ret = waitpid(consumer_data->pid, &status, 0);
376 if (ret == -1) {
377 PERROR("consumerd waitpid pid: %d", consumer_data->pid)
378 } else if (!WIFEXITED(status)) {
379 ERR("consumerd termination with error: %d",
380 WEXITSTATUS(ret));
381 }
382 consumer_data->pid = 0;
383 }
384
385 /*
386 * Cleanup the session daemon's data structures.
387 */
388 static void sessiond_cleanup(void)
389 {
390 int ret;
391 struct ltt_session *sess, *stmp;
392
393 DBG("Cleanup sessiond");
394
395 /*
396 * Close the thread quit pipe. It has already done its job,
397 * since we are now called.
398 */
399 sessiond_close_quit_pipe();
400
401 ret = remove(config.pid_file_path.value);
402 if (ret < 0) {
403 PERROR("remove pidfile %s", config.pid_file_path.value);
404 }
405
406 DBG("Removing sessiond and consumerd content of directory %s",
407 config.rundir.value);
408
409 /* sessiond */
410 DBG("Removing %s", config.pid_file_path.value);
411 (void) unlink(config.pid_file_path.value);
412
413 DBG("Removing %s", config.agent_port_file_path.value);
414 (void) unlink(config.agent_port_file_path.value);
415
416 /* kconsumerd */
417 DBG("Removing %s", kconsumer_data.err_unix_sock_path);
418 (void) unlink(kconsumer_data.err_unix_sock_path);
419
420 DBG("Removing directory %s", config.kconsumerd_path.value);
421 (void) rmdir(config.kconsumerd_path.value);
422
423 /* ust consumerd 32 */
424 DBG("Removing %s", config.consumerd32_err_unix_sock_path.value);
425 (void) unlink(config.consumerd32_err_unix_sock_path.value);
426
427 DBG("Removing directory %s", config.consumerd32_path.value);
428 (void) rmdir(config.consumerd32_path.value);
429
430 /* ust consumerd 64 */
431 DBG("Removing %s", config.consumerd64_err_unix_sock_path.value);
432 (void) unlink(config.consumerd64_err_unix_sock_path.value);
433
434 DBG("Removing directory %s", config.consumerd64_path.value);
435 (void) rmdir(config.consumerd64_path.value);
436
437 DBG("Cleaning up all sessions");
438
439 /* Destroy session list mutex */
440 if (session_list_ptr != NULL) {
441 pthread_mutex_destroy(&session_list_ptr->lock);
442
443 /* Cleanup ALL session */
444 cds_list_for_each_entry_safe(sess, stmp,
445 &session_list_ptr->head, list) {
446 cmd_destroy_session(sess, kernel_poll_pipe[1],
447 notification_thread_handle);
448 }
449 }
450
451 wait_consumer(&kconsumer_data);
452 wait_consumer(&ustconsumer64_data);
453 wait_consumer(&ustconsumer32_data);
454
455 DBG("Cleaning up all agent apps");
456 agent_app_ht_clean();
457
458 DBG("Closing all UST sockets");
459 ust_app_clean_list();
460 buffer_reg_destroy_registries();
461
462 if (is_root && !config.no_kernel) {
463 DBG2("Closing kernel fd");
464 if (kernel_tracer_fd >= 0) {
465 ret = close(kernel_tracer_fd);
466 if (ret) {
467 PERROR("close");
468 }
469 }
470 DBG("Unloading kernel modules");
471 modprobe_remove_lttng_all();
472 free(syscall_table);
473 }
474
475 close_consumer_sockets();
476
477 if (load_info) {
478 load_session_destroy_data(load_info);
479 free(load_info);
480 }
481
482 /*
483 * We do NOT rmdir rundir because there are other processes
484 * using it, for instance lttng-relayd, which can start in
485 * parallel with this teardown.
486 */
487 }
488
489 /*
490 * Cleanup the daemon's option data structures.
491 */
492 static void sessiond_cleanup_options(void)
493 {
494 DBG("Cleaning up options");
495
496 sessiond_config_fini(&config);
497
498 run_as_destroy_worker();
499 }
500
501 /*
502 * Send data on a unix socket using the liblttsessiondcomm API.
503 *
504 * Return lttcomm error code.
505 */
506 static int send_unix_sock(int sock, void *buf, size_t len)
507 {
508 /* Check valid length */
509 if (len == 0) {
510 return -1;
511 }
512
513 return lttcomm_send_unix_sock(sock, buf, len);
514 }
515
516 /*
517 * Free memory of a command context structure.
518 */
519 static void clean_command_ctx(struct command_ctx **cmd_ctx)
520 {
521 DBG("Clean command context structure");
522 if (*cmd_ctx) {
523 if ((*cmd_ctx)->llm) {
524 free((*cmd_ctx)->llm);
525 }
526 if ((*cmd_ctx)->lsm) {
527 free((*cmd_ctx)->lsm);
528 }
529 free(*cmd_ctx);
530 *cmd_ctx = NULL;
531 }
532 }
533
534 /*
535 * Notify UST applications using the shm mmap futex.
536 */
537 static int notify_ust_apps(int active)
538 {
539 char *wait_shm_mmap;
540
541 DBG("Notifying applications of session daemon state: %d", active);
542
543 /* See shm.c for this call implying mmap, shm and futex calls */
544 wait_shm_mmap = shm_ust_get_mmap(config.wait_shm_path.value, is_root);
545 if (wait_shm_mmap == NULL) {
546 goto error;
547 }
548
549 /* Wake waiting process */
550 futex_wait_update((int32_t *) wait_shm_mmap, active);
551
552 /* Apps notified successfully */
553 return 0;
554
555 error:
556 return -1;
557 }
558
559 /*
560 * Setup the outgoing data buffer for the response (llm) by allocating the
561 * right amount of memory and copying the original information from the lsm
562 * structure.
563 *
564 * Return 0 on success, negative value on error.
565 */
566 static int setup_lttng_msg(struct command_ctx *cmd_ctx,
567 const void *payload_buf, size_t payload_len,
568 const void *cmd_header_buf, size_t cmd_header_len)
569 {
570 int ret = 0;
571 const size_t header_len = sizeof(struct lttcomm_lttng_msg);
572 const size_t cmd_header_offset = header_len;
573 const size_t payload_offset = cmd_header_offset + cmd_header_len;
574 const size_t total_msg_size = header_len + cmd_header_len + payload_len;
575
576 cmd_ctx->llm = zmalloc(total_msg_size);
577
578 if (cmd_ctx->llm == NULL) {
579 PERROR("zmalloc");
580 ret = -ENOMEM;
581 goto end;
582 }
583
584 /* Copy common data */
585 cmd_ctx->llm->cmd_type = cmd_ctx->lsm->cmd_type;
586 cmd_ctx->llm->pid = cmd_ctx->lsm->domain.attr.pid;
587 cmd_ctx->llm->cmd_header_size = cmd_header_len;
588 cmd_ctx->llm->data_size = payload_len;
589 cmd_ctx->lttng_msg_size = total_msg_size;
590
591 /* Copy command header */
592 if (cmd_header_len) {
593 memcpy(((uint8_t *) cmd_ctx->llm) + cmd_header_offset, cmd_header_buf,
594 cmd_header_len);
595 }
596
597 /* Copy payload */
598 if (payload_len) {
599 memcpy(((uint8_t *) cmd_ctx->llm) + payload_offset, payload_buf,
600 payload_len);
601 }
602
603 end:
604 return ret;
605 }
606
607 /*
608 * Version of setup_lttng_msg() without command header.
609 */
610 static int setup_lttng_msg_no_cmd_header(struct command_ctx *cmd_ctx,
611 void *payload_buf, size_t payload_len)
612 {
613 return setup_lttng_msg(cmd_ctx, payload_buf, payload_len, NULL, 0);
614 }
615 /*
616 * Update the kernel poll set of all channel fd available over all tracing
617 * session. Add the wakeup pipe at the end of the set.
618 */
619 static int update_kernel_poll(struct lttng_poll_event *events)
620 {
621 int ret;
622 struct ltt_session *session;
623 struct ltt_kernel_channel *channel;
624
625 DBG("Updating kernel poll set");
626
627 session_lock_list();
628 cds_list_for_each_entry(session, &session_list_ptr->head, list) {
629 session_lock(session);
630 if (session->kernel_session == NULL) {
631 session_unlock(session);
632 continue;
633 }
634
635 cds_list_for_each_entry(channel,
636 &session->kernel_session->channel_list.head, list) {
637 /* Add channel fd to the kernel poll set */
638 ret = lttng_poll_add(events, channel->fd, LPOLLIN | LPOLLRDNORM);
639 if (ret < 0) {
640 session_unlock(session);
641 goto error;
642 }
643 DBG("Channel fd %d added to kernel set", channel->fd);
644 }
645 session_unlock(session);
646 }
647 session_unlock_list();
648
649 return 0;
650
651 error:
652 session_unlock_list();
653 return -1;
654 }
655
656 /*
657 * Find the channel fd from 'fd' over all tracing session. When found, check
658 * for new channel stream and send those stream fds to the kernel consumer.
659 *
660 * Useful for CPU hotplug feature.
661 */
662 static int update_kernel_stream(int fd)
663 {
664 int ret = 0;
665 struct ltt_session *session;
666 struct ltt_kernel_session *ksess;
667 struct ltt_kernel_channel *channel;
668
669 DBG("Updating kernel streams for channel fd %d", fd);
670
671 session_lock_list();
672 cds_list_for_each_entry(session, &session_list_ptr->head, list) {
673 session_lock(session);
674 if (session->kernel_session == NULL) {
675 session_unlock(session);
676 continue;
677 }
678 ksess = session->kernel_session;
679
680 cds_list_for_each_entry(channel,
681 &ksess->channel_list.head, list) {
682 struct lttng_ht_iter iter;
683 struct consumer_socket *socket;
684
685 if (channel->fd != fd) {
686 continue;
687 }
688 DBG("Channel found, updating kernel streams");
689 ret = kernel_open_channel_stream(channel);
690 if (ret < 0) {
691 goto error;
692 }
693 /* Update the stream global counter */
694 ksess->stream_count_global += ret;
695
696 /*
697 * Have we already sent fds to the consumer? If yes, it
698 * means that tracing is started so it is safe to send
699 * our updated stream fds.
700 */
701 if (ksess->consumer_fds_sent != 1
702 || ksess->consumer == NULL) {
703 ret = -1;
704 goto error;
705 }
706
707 rcu_read_lock();
708 cds_lfht_for_each_entry(ksess->consumer->socks->ht,
709 &iter.iter, socket, node.node) {
710 pthread_mutex_lock(socket->lock);
711 ret = kernel_consumer_send_channel_streams(socket,
712 channel, ksess,
713 session->output_traces ? 1 : 0);
714 pthread_mutex_unlock(socket->lock);
715 if (ret < 0) {
716 rcu_read_unlock();
717 goto error;
718 }
719 }
720 rcu_read_unlock();
721 }
722 session_unlock(session);
723 }
724 session_unlock_list();
725 return ret;
726
727 error:
728 session_unlock(session);
729 session_unlock_list();
730 return ret;
731 }
732
733 /*
734 * For each tracing session, update newly registered apps. The session list
735 * lock MUST be acquired before calling this.
736 */
737 static void update_ust_app(int app_sock)
738 {
739 struct ltt_session *sess, *stmp;
740
741 /* Consumer is in an ERROR state. Stop any application update. */
742 if (uatomic_read(&ust_consumerd_state) == CONSUMER_ERROR) {
743 /* Stop the update process since the consumer is dead. */
744 return;
745 }
746
747 /* For all tracing session(s) */
748 cds_list_for_each_entry_safe(sess, stmp, &session_list_ptr->head, list) {
749 struct ust_app *app;
750
751 session_lock(sess);
752 if (!sess->ust_session) {
753 goto unlock_session;
754 }
755
756 rcu_read_lock();
757 assert(app_sock >= 0);
758 app = ust_app_find_by_sock(app_sock);
759 if (app == NULL) {
760 /*
761 * Application can be unregistered before so
762 * this is possible hence simply stopping the
763 * update.
764 */
765 DBG3("UST app update failed to find app sock %d",
766 app_sock);
767 goto unlock_rcu;
768 }
769 ust_app_global_update(sess->ust_session, app);
770 unlock_rcu:
771 rcu_read_unlock();
772 unlock_session:
773 session_unlock(sess);
774 }
775 }
776
777 /*
778 * This thread manage event coming from the kernel.
779 *
780 * Features supported in this thread:
781 * -) CPU Hotplug
782 */
783 static void *thread_manage_kernel(void *data)
784 {
785 int ret, i, pollfd, update_poll_flag = 1, err = -1;
786 uint32_t revents, nb_fd;
787 char tmp;
788 struct lttng_poll_event events;
789
790 DBG("[thread] Thread manage kernel started");
791
792 health_register(health_sessiond, HEALTH_SESSIOND_TYPE_KERNEL);
793
794 /*
795 * This first step of the while is to clean this structure which could free
796 * non NULL pointers so initialize it before the loop.
797 */
798 lttng_poll_init(&events);
799
800 if (testpoint(sessiond_thread_manage_kernel)) {
801 goto error_testpoint;
802 }
803
804 health_code_update();
805
806 if (testpoint(sessiond_thread_manage_kernel_before_loop)) {
807 goto error_testpoint;
808 }
809
810 while (1) {
811 health_code_update();
812
813 if (update_poll_flag == 1) {
814 /* Clean events object. We are about to populate it again. */
815 lttng_poll_clean(&events);
816
817 ret = sessiond_set_thread_pollset(&events, 2);
818 if (ret < 0) {
819 goto error_poll_create;
820 }
821
822 ret = lttng_poll_add(&events, kernel_poll_pipe[0], LPOLLIN);
823 if (ret < 0) {
824 goto error;
825 }
826
827 /* This will add the available kernel channel if any. */
828 ret = update_kernel_poll(&events);
829 if (ret < 0) {
830 goto error;
831 }
832 update_poll_flag = 0;
833 }
834
835 DBG("Thread kernel polling");
836
837 /* Poll infinite value of time */
838 restart:
839 health_poll_entry();
840 ret = lttng_poll_wait(&events, -1);
841 DBG("Thread kernel return from poll on %d fds",
842 LTTNG_POLL_GETNB(&events));
843 health_poll_exit();
844 if (ret < 0) {
845 /*
846 * Restart interrupted system call.
847 */
848 if (errno == EINTR) {
849 goto restart;
850 }
851 goto error;
852 } else if (ret == 0) {
853 /* Should not happen since timeout is infinite */
854 ERR("Return value of poll is 0 with an infinite timeout.\n"
855 "This should not have happened! Continuing...");
856 continue;
857 }
858
859 nb_fd = ret;
860
861 for (i = 0; i < nb_fd; i++) {
862 /* Fetch once the poll data */
863 revents = LTTNG_POLL_GETEV(&events, i);
864 pollfd = LTTNG_POLL_GETFD(&events, i);
865
866 health_code_update();
867
868 if (!revents) {
869 /* No activity for this FD (poll implementation). */
870 continue;
871 }
872
873 /* Thread quit pipe has been closed. Killing thread. */
874 ret = sessiond_check_thread_quit_pipe(pollfd, revents);
875 if (ret) {
876 err = 0;
877 goto exit;
878 }
879
880 /* Check for data on kernel pipe */
881 if (revents & LPOLLIN) {
882 if (pollfd == kernel_poll_pipe[0]) {
883 (void) lttng_read(kernel_poll_pipe[0],
884 &tmp, 1);
885 /*
886 * Ret value is useless here, if this pipe gets any actions an
887 * update is required anyway.
888 */
889 update_poll_flag = 1;
890 continue;
891 } else {
892 /*
893 * New CPU detected by the kernel. Adding kernel stream to
894 * kernel session and updating the kernel consumer
895 */
896 ret = update_kernel_stream(pollfd);
897 if (ret < 0) {
898 continue;
899 }
900 break;
901 }
902 } else if (revents & (LPOLLERR | LPOLLHUP | LPOLLRDHUP)) {
903 update_poll_flag = 1;
904 continue;
905 } else {
906 ERR("Unexpected poll events %u for sock %d", revents, pollfd);
907 goto error;
908 }
909 }
910 }
911
912 exit:
913 error:
914 lttng_poll_clean(&events);
915 error_poll_create:
916 error_testpoint:
917 utils_close_pipe(kernel_poll_pipe);
918 kernel_poll_pipe[0] = kernel_poll_pipe[1] = -1;
919 if (err) {
920 health_error();
921 ERR("Health error occurred in %s", __func__);
922 WARN("Kernel thread died unexpectedly. "
923 "Kernel tracing can continue but CPU hotplug is disabled.");
924 }
925 health_unregister(health_sessiond);
926 DBG("Kernel thread dying");
927 return NULL;
928 }
929
930 /*
931 * Signal pthread condition of the consumer data that the thread.
932 */
933 static void signal_consumer_condition(struct consumer_data *data, int state)
934 {
935 pthread_mutex_lock(&data->cond_mutex);
936
937 /*
938 * The state is set before signaling. It can be any value, it's the waiter
939 * job to correctly interpret this condition variable associated to the
940 * consumer pthread_cond.
941 *
942 * A value of 0 means that the corresponding thread of the consumer data
943 * was not started. 1 indicates that the thread has started and is ready
944 * for action. A negative value means that there was an error during the
945 * thread bootstrap.
946 */
947 data->consumer_thread_is_ready = state;
948 (void) pthread_cond_signal(&data->cond);
949
950 pthread_mutex_unlock(&data->cond_mutex);
951 }
952
953 /*
954 * This thread manage the consumer error sent back to the session daemon.
955 */
956 static void *thread_manage_consumer(void *data)
957 {
958 int sock = -1, i, ret, pollfd, err = -1, should_quit = 0;
959 uint32_t revents, nb_fd;
960 enum lttcomm_return_code code;
961 struct lttng_poll_event events;
962 struct consumer_data *consumer_data = data;
963 struct consumer_socket *cmd_socket_wrapper = NULL;
964
965 DBG("[thread] Manage consumer started");
966
967 rcu_register_thread();
968 rcu_thread_online();
969
970 health_register(health_sessiond, HEALTH_SESSIOND_TYPE_CONSUMER);
971
972 health_code_update();
973
974 /*
975 * Pass 3 as size here for the thread quit pipe, consumerd_err_sock and the
976 * metadata_sock. Nothing more will be added to this poll set.
977 */
978 ret = sessiond_set_thread_pollset(&events, 3);
979 if (ret < 0) {
980 goto error_poll;
981 }
982
983 /*
984 * The error socket here is already in a listening state which was done
985 * just before spawning this thread to avoid a race between the consumer
986 * daemon exec trying to connect and the listen() call.
987 */
988 ret = lttng_poll_add(&events, consumer_data->err_sock, LPOLLIN | LPOLLRDHUP);
989 if (ret < 0) {
990 goto error;
991 }
992
993 health_code_update();
994
995 /* Infinite blocking call, waiting for transmission */
996 restart:
997 health_poll_entry();
998
999 if (testpoint(sessiond_thread_manage_consumer)) {
1000 goto error;
1001 }
1002
1003 ret = lttng_poll_wait(&events, -1);
1004 health_poll_exit();
1005 if (ret < 0) {
1006 /*
1007 * Restart interrupted system call.
1008 */
1009 if (errno == EINTR) {
1010 goto restart;
1011 }
1012 goto error;
1013 }
1014
1015 nb_fd = ret;
1016
1017 for (i = 0; i < nb_fd; i++) {
1018 /* Fetch once the poll data */
1019 revents = LTTNG_POLL_GETEV(&events, i);
1020 pollfd = LTTNG_POLL_GETFD(&events, i);
1021
1022 health_code_update();
1023
1024 if (!revents) {
1025 /* No activity for this FD (poll implementation). */
1026 continue;
1027 }
1028
1029 /* Thread quit pipe has been closed. Killing thread. */
1030 ret = sessiond_check_thread_quit_pipe(pollfd, revents);
1031 if (ret) {
1032 err = 0;
1033 goto exit;
1034 }
1035
1036 /* Event on the registration socket */
1037 if (pollfd == consumer_data->err_sock) {
1038 if (revents & LPOLLIN) {
1039 continue;
1040 } else if (revents & (LPOLLERR | LPOLLHUP | LPOLLRDHUP)) {
1041 ERR("consumer err socket poll error");
1042 goto error;
1043 } else {
1044 ERR("Unexpected poll events %u for sock %d", revents, pollfd);
1045 goto error;
1046 }
1047 }
1048 }
1049
1050 sock = lttcomm_accept_unix_sock(consumer_data->err_sock);
1051 if (sock < 0) {
1052 goto error;
1053 }
1054
1055 /*
1056 * Set the CLOEXEC flag. Return code is useless because either way, the
1057 * show must go on.
1058 */
1059 (void) utils_set_fd_cloexec(sock);
1060
1061 health_code_update();
1062
1063 DBG2("Receiving code from consumer err_sock");
1064
1065 /* Getting status code from kconsumerd */
1066 ret = lttcomm_recv_unix_sock(sock, &code,
1067 sizeof(enum lttcomm_return_code));
1068 if (ret <= 0) {
1069 goto error;
1070 }
1071
1072 health_code_update();
1073 if (code != LTTCOMM_CONSUMERD_COMMAND_SOCK_READY) {
1074 ERR("consumer error when waiting for SOCK_READY : %s",
1075 lttcomm_get_readable_code(-code));
1076 goto error;
1077 }
1078
1079 /* Connect both command and metadata sockets. */
1080 consumer_data->cmd_sock =
1081 lttcomm_connect_unix_sock(
1082 consumer_data->cmd_unix_sock_path);
1083 consumer_data->metadata_fd =
1084 lttcomm_connect_unix_sock(
1085 consumer_data->cmd_unix_sock_path);
1086 if (consumer_data->cmd_sock < 0 || consumer_data->metadata_fd < 0) {
1087 PERROR("consumer connect cmd socket");
1088 /* On error, signal condition and quit. */
1089 signal_consumer_condition(consumer_data, -1);
1090 goto error;
1091 }
1092
1093 consumer_data->metadata_sock.fd_ptr = &consumer_data->metadata_fd;
1094
1095 /* Create metadata socket lock. */
1096 consumer_data->metadata_sock.lock = zmalloc(sizeof(pthread_mutex_t));
1097 if (consumer_data->metadata_sock.lock == NULL) {
1098 PERROR("zmalloc pthread mutex");
1099 goto error;
1100 }
1101 pthread_mutex_init(consumer_data->metadata_sock.lock, NULL);
1102
1103 DBG("Consumer command socket ready (fd: %d", consumer_data->cmd_sock);
1104 DBG("Consumer metadata socket ready (fd: %d)",
1105 consumer_data->metadata_fd);
1106
1107 /*
1108 * Remove the consumerd error sock since we've established a connection.
1109 */
1110 ret = lttng_poll_del(&events, consumer_data->err_sock);
1111 if (ret < 0) {
1112 goto error;
1113 }
1114
1115 /* Add new accepted error socket. */
1116 ret = lttng_poll_add(&events, sock, LPOLLIN | LPOLLRDHUP);
1117 if (ret < 0) {
1118 goto error;
1119 }
1120
1121 /* Add metadata socket that is successfully connected. */
1122 ret = lttng_poll_add(&events, consumer_data->metadata_fd,
1123 LPOLLIN | LPOLLRDHUP);
1124 if (ret < 0) {
1125 goto error;
1126 }
1127
1128 health_code_update();
1129
1130 /*
1131 * Transfer the write-end of the channel monitoring and rotate pipe
1132 * to the consumer by issuing a SET_CHANNEL_MONITOR_PIPE command.
1133 */
1134 cmd_socket_wrapper = consumer_allocate_socket(&consumer_data->cmd_sock);
1135 if (!cmd_socket_wrapper) {
1136 goto error;
1137 }
1138 cmd_socket_wrapper->lock = &consumer_data->lock;
1139
1140 ret = consumer_send_channel_monitor_pipe(cmd_socket_wrapper,
1141 consumer_data->channel_monitor_pipe);
1142 if (ret) {
1143 goto error;
1144 }
1145
1146 /* Discard the socket wrapper as it is no longer needed. */
1147 consumer_destroy_socket(cmd_socket_wrapper);
1148 cmd_socket_wrapper = NULL;
1149
1150 /* The thread is completely initialized, signal that it is ready. */
1151 signal_consumer_condition(consumer_data, 1);
1152
1153 /* Infinite blocking call, waiting for transmission */
1154 restart_poll:
1155 while (1) {
1156 health_code_update();
1157
1158 /* Exit the thread because the thread quit pipe has been triggered. */
1159 if (should_quit) {
1160 /* Not a health error. */
1161 err = 0;
1162 goto exit;
1163 }
1164
1165 health_poll_entry();
1166 ret = lttng_poll_wait(&events, -1);
1167 health_poll_exit();
1168 if (ret < 0) {
1169 /*
1170 * Restart interrupted system call.
1171 */
1172 if (errno == EINTR) {
1173 goto restart_poll;
1174 }
1175 goto error;
1176 }
1177
1178 nb_fd = ret;
1179
1180 for (i = 0; i < nb_fd; i++) {
1181 /* Fetch once the poll data */
1182 revents = LTTNG_POLL_GETEV(&events, i);
1183 pollfd = LTTNG_POLL_GETFD(&events, i);
1184
1185 health_code_update();
1186
1187 if (!revents) {
1188 /* No activity for this FD (poll implementation). */
1189 continue;
1190 }
1191
1192 /*
1193 * Thread quit pipe has been triggered, flag that we should stop
1194 * but continue the current loop to handle potential data from
1195 * consumer.
1196 */
1197 should_quit = sessiond_check_thread_quit_pipe(pollfd, revents);
1198
1199 if (pollfd == sock) {
1200 /* Event on the consumerd socket */
1201 if (revents & (LPOLLERR | LPOLLHUP | LPOLLRDHUP)
1202 && !(revents & LPOLLIN)) {
1203 ERR("consumer err socket second poll error");
1204 goto error;
1205 }
1206 health_code_update();
1207 /* Wait for any kconsumerd error */
1208 ret = lttcomm_recv_unix_sock(sock, &code,
1209 sizeof(enum lttcomm_return_code));
1210 if (ret <= 0) {
1211 ERR("consumer closed the command socket");
1212 goto error;
1213 }
1214
1215 ERR("consumer return code : %s",
1216 lttcomm_get_readable_code(-code));
1217
1218 goto exit;
1219 } else if (pollfd == consumer_data->metadata_fd) {
1220 if (revents & (LPOLLERR | LPOLLHUP | LPOLLRDHUP)
1221 && !(revents & LPOLLIN)) {
1222 ERR("consumer err metadata socket second poll error");
1223 goto error;
1224 }
1225 /* UST metadata requests */
1226 ret = ust_consumer_metadata_request(
1227 &consumer_data->metadata_sock);
1228 if (ret < 0) {
1229 ERR("Handling metadata request");
1230 goto error;
1231 }
1232 }
1233 /* No need for an else branch all FDs are tested prior. */
1234 }
1235 health_code_update();
1236 }
1237
1238 exit:
1239 error:
1240 /*
1241 * We lock here because we are about to close the sockets and some other
1242 * thread might be using them so get exclusive access which will abort all
1243 * other consumer command by other threads.
1244 */
1245 pthread_mutex_lock(&consumer_data->lock);
1246
1247 /* Immediately set the consumerd state to stopped */
1248 if (consumer_data->type == LTTNG_CONSUMER_KERNEL) {
1249 uatomic_set(&kernel_consumerd_state, CONSUMER_ERROR);
1250 } else if (consumer_data->type == LTTNG_CONSUMER64_UST ||
1251 consumer_data->type == LTTNG_CONSUMER32_UST) {
1252 uatomic_set(&ust_consumerd_state, CONSUMER_ERROR);
1253 } else {
1254 /* Code flow error... */
1255 assert(0);
1256 }
1257
1258 if (consumer_data->err_sock >= 0) {
1259 ret = close(consumer_data->err_sock);
1260 if (ret) {
1261 PERROR("close");
1262 }
1263 consumer_data->err_sock = -1;
1264 }
1265 if (consumer_data->cmd_sock >= 0) {
1266 ret = close(consumer_data->cmd_sock);
1267 if (ret) {
1268 PERROR("close");
1269 }
1270 consumer_data->cmd_sock = -1;
1271 }
1272 if (consumer_data->metadata_sock.fd_ptr &&
1273 *consumer_data->metadata_sock.fd_ptr >= 0) {
1274 ret = close(*consumer_data->metadata_sock.fd_ptr);
1275 if (ret) {
1276 PERROR("close");
1277 }
1278 }
1279 if (sock >= 0) {
1280 ret = close(sock);
1281 if (ret) {
1282 PERROR("close");
1283 }
1284 }
1285
1286 unlink(consumer_data->err_unix_sock_path);
1287 unlink(consumer_data->cmd_unix_sock_path);
1288 pthread_mutex_unlock(&consumer_data->lock);
1289
1290 /* Cleanup metadata socket mutex. */
1291 if (consumer_data->metadata_sock.lock) {
1292 pthread_mutex_destroy(consumer_data->metadata_sock.lock);
1293 free(consumer_data->metadata_sock.lock);
1294 }
1295 lttng_poll_clean(&events);
1296
1297 if (cmd_socket_wrapper) {
1298 consumer_destroy_socket(cmd_socket_wrapper);
1299 }
1300 error_poll:
1301 if (err) {
1302 health_error();
1303 ERR("Health error occurred in %s", __func__);
1304 }
1305 health_unregister(health_sessiond);
1306 DBG("consumer thread cleanup completed");
1307
1308 rcu_thread_offline();
1309 rcu_unregister_thread();
1310
1311 return NULL;
1312 }
1313
1314 /*
1315 * This thread receives application command sockets (FDs) on the
1316 * apps_cmd_pipe and waits (polls) on them until they are closed
1317 * or an error occurs.
1318 *
1319 * At that point, it flushes the data (tracing and metadata) associated
1320 * with this application and tears down ust app sessions and other
1321 * associated data structures through ust_app_unregister().
1322 *
1323 * Note that this thread never sends commands to the applications
1324 * through the command sockets; it merely listens for hang-ups
1325 * and errors on those sockets and cleans-up as they occur.
1326 */
1327 static void *thread_manage_apps(void *data)
1328 {
1329 int i, ret, pollfd, err = -1;
1330 ssize_t size_ret;
1331 uint32_t revents, nb_fd;
1332 struct lttng_poll_event events;
1333
1334 DBG("[thread] Manage application started");
1335
1336 rcu_register_thread();
1337 rcu_thread_online();
1338
1339 health_register(health_sessiond, HEALTH_SESSIOND_TYPE_APP_MANAGE);
1340
1341 if (testpoint(sessiond_thread_manage_apps)) {
1342 goto error_testpoint;
1343 }
1344
1345 health_code_update();
1346
1347 ret = sessiond_set_thread_pollset(&events, 2);
1348 if (ret < 0) {
1349 goto error_poll_create;
1350 }
1351
1352 ret = lttng_poll_add(&events, apps_cmd_pipe[0], LPOLLIN | LPOLLRDHUP);
1353 if (ret < 0) {
1354 goto error;
1355 }
1356
1357 if (testpoint(sessiond_thread_manage_apps_before_loop)) {
1358 goto error;
1359 }
1360
1361 health_code_update();
1362
1363 while (1) {
1364 DBG("Apps thread polling");
1365
1366 /* Inifinite blocking call, waiting for transmission */
1367 restart:
1368 health_poll_entry();
1369 ret = lttng_poll_wait(&events, -1);
1370 DBG("Apps thread return from poll on %d fds",
1371 LTTNG_POLL_GETNB(&events));
1372 health_poll_exit();
1373 if (ret < 0) {
1374 /*
1375 * Restart interrupted system call.
1376 */
1377 if (errno == EINTR) {
1378 goto restart;
1379 }
1380 goto error;
1381 }
1382
1383 nb_fd = ret;
1384
1385 for (i = 0; i < nb_fd; i++) {
1386 /* Fetch once the poll data */
1387 revents = LTTNG_POLL_GETEV(&events, i);
1388 pollfd = LTTNG_POLL_GETFD(&events, i);
1389
1390 health_code_update();
1391
1392 if (!revents) {
1393 /* No activity for this FD (poll implementation). */
1394 continue;
1395 }
1396
1397 /* Thread quit pipe has been closed. Killing thread. */
1398 ret = sessiond_check_thread_quit_pipe(pollfd, revents);
1399 if (ret) {
1400 err = 0;
1401 goto exit;
1402 }
1403
1404 /* Inspect the apps cmd pipe */
1405 if (pollfd == apps_cmd_pipe[0]) {
1406 if (revents & LPOLLIN) {
1407 int sock;
1408
1409 /* Empty pipe */
1410 size_ret = lttng_read(apps_cmd_pipe[0], &sock, sizeof(sock));
1411 if (size_ret < sizeof(sock)) {
1412 PERROR("read apps cmd pipe");
1413 goto error;
1414 }
1415
1416 health_code_update();
1417
1418 /*
1419 * Since this is a command socket (write then read),
1420 * we only monitor the error events of the socket.
1421 */
1422 ret = lttng_poll_add(&events, sock,
1423 LPOLLERR | LPOLLHUP | LPOLLRDHUP);
1424 if (ret < 0) {
1425 goto error;
1426 }
1427
1428 DBG("Apps with sock %d added to poll set", sock);
1429 } else if (revents & (LPOLLERR | LPOLLHUP | LPOLLRDHUP)) {
1430 ERR("Apps command pipe error");
1431 goto error;
1432 } else {
1433 ERR("Unknown poll events %u for sock %d", revents, pollfd);
1434 goto error;
1435 }
1436 } else {
1437 /*
1438 * At this point, we know that a registered application made
1439 * the event at poll_wait.
1440 */
1441 if (revents & (LPOLLERR | LPOLLHUP | LPOLLRDHUP)) {
1442 /* Removing from the poll set */
1443 ret = lttng_poll_del(&events, pollfd);
1444 if (ret < 0) {
1445 goto error;
1446 }
1447
1448 /* Socket closed on remote end. */
1449 ust_app_unregister(pollfd);
1450 } else {
1451 ERR("Unexpected poll events %u for sock %d", revents, pollfd);
1452 goto error;
1453 }
1454 }
1455
1456 health_code_update();
1457 }
1458 }
1459
1460 exit:
1461 error:
1462 lttng_poll_clean(&events);
1463 error_poll_create:
1464 error_testpoint:
1465 utils_close_pipe(apps_cmd_pipe);
1466 apps_cmd_pipe[0] = apps_cmd_pipe[1] = -1;
1467
1468 /*
1469 * We don't clean the UST app hash table here since already registered
1470 * applications can still be controlled so let them be until the session
1471 * daemon dies or the applications stop.
1472 */
1473
1474 if (err) {
1475 health_error();
1476 ERR("Health error occurred in %s", __func__);
1477 }
1478 health_unregister(health_sessiond);
1479 DBG("Application communication apps thread cleanup complete");
1480 rcu_thread_offline();
1481 rcu_unregister_thread();
1482 return NULL;
1483 }
1484
1485 /*
1486 * Send a socket to a thread This is called from the dispatch UST registration
1487 * thread once all sockets are set for the application.
1488 *
1489 * The sock value can be invalid, we don't really care, the thread will handle
1490 * it and make the necessary cleanup if so.
1491 *
1492 * On success, return 0 else a negative value being the errno message of the
1493 * write().
1494 */
1495 static int send_socket_to_thread(int fd, int sock)
1496 {
1497 ssize_t ret;
1498
1499 /*
1500 * It's possible that the FD is set as invalid with -1 concurrently just
1501 * before calling this function being a shutdown state of the thread.
1502 */
1503 if (fd < 0) {
1504 ret = -EBADF;
1505 goto error;
1506 }
1507
1508 ret = lttng_write(fd, &sock, sizeof(sock));
1509 if (ret < sizeof(sock)) {
1510 PERROR("write apps pipe %d", fd);
1511 if (ret < 0) {
1512 ret = -errno;
1513 }
1514 goto error;
1515 }
1516
1517 /* All good. Don't send back the write positive ret value. */
1518 ret = 0;
1519 error:
1520 return (int) ret;
1521 }
1522
1523 /*
1524 * Sanitize the wait queue of the dispatch registration thread meaning removing
1525 * invalid nodes from it. This is to avoid memory leaks for the case the UST
1526 * notify socket is never received.
1527 */
1528 static void sanitize_wait_queue(struct ust_reg_wait_queue *wait_queue)
1529 {
1530 int ret, nb_fd = 0, i;
1531 unsigned int fd_added = 0;
1532 struct lttng_poll_event events;
1533 struct ust_reg_wait_node *wait_node = NULL, *tmp_wait_node;
1534
1535 assert(wait_queue);
1536
1537 lttng_poll_init(&events);
1538
1539 /* Just skip everything for an empty queue. */
1540 if (!wait_queue->count) {
1541 goto end;
1542 }
1543
1544 ret = lttng_poll_create(&events, wait_queue->count, LTTNG_CLOEXEC);
1545 if (ret < 0) {
1546 goto error_create;
1547 }
1548
1549 cds_list_for_each_entry_safe(wait_node, tmp_wait_node,
1550 &wait_queue->head, head) {
1551 assert(wait_node->app);
1552 ret = lttng_poll_add(&events, wait_node->app->sock,
1553 LPOLLHUP | LPOLLERR);
1554 if (ret < 0) {
1555 goto error;
1556 }
1557
1558 fd_added = 1;
1559 }
1560
1561 if (!fd_added) {
1562 goto end;
1563 }
1564
1565 /*
1566 * Poll but don't block so we can quickly identify the faulty events and
1567 * clean them afterwards from the wait queue.
1568 */
1569 ret = lttng_poll_wait(&events, 0);
1570 if (ret < 0) {
1571 goto error;
1572 }
1573 nb_fd = ret;
1574
1575 for (i = 0; i < nb_fd; i++) {
1576 /* Get faulty FD. */
1577 uint32_t revents = LTTNG_POLL_GETEV(&events, i);
1578 int pollfd = LTTNG_POLL_GETFD(&events, i);
1579
1580 if (!revents) {
1581 /* No activity for this FD (poll implementation). */
1582 continue;
1583 }
1584
1585 cds_list_for_each_entry_safe(wait_node, tmp_wait_node,
1586 &wait_queue->head, head) {
1587 if (pollfd == wait_node->app->sock &&
1588 (revents & (LPOLLHUP | LPOLLERR))) {
1589 cds_list_del(&wait_node->head);
1590 wait_queue->count--;
1591 ust_app_destroy(wait_node->app);
1592 free(wait_node);
1593 /*
1594 * Silence warning of use-after-free in
1595 * cds_list_for_each_entry_safe which uses
1596 * __typeof__(*wait_node).
1597 */
1598 wait_node = NULL;
1599 break;
1600 } else {
1601 ERR("Unexpected poll events %u for sock %d", revents, pollfd);
1602 goto error;
1603 }
1604 }
1605 }
1606
1607 if (nb_fd > 0) {
1608 DBG("Wait queue sanitized, %d node were cleaned up", nb_fd);
1609 }
1610
1611 end:
1612 lttng_poll_clean(&events);
1613 return;
1614
1615 error:
1616 lttng_poll_clean(&events);
1617 error_create:
1618 ERR("Unable to sanitize wait queue");
1619 return;
1620 }
1621
1622 /*
1623 * Dispatch request from the registration threads to the application
1624 * communication thread.
1625 */
1626 static void *thread_dispatch_ust_registration(void *data)
1627 {
1628 int ret, err = -1;
1629 struct cds_wfcq_node *node;
1630 struct ust_command *ust_cmd = NULL;
1631 struct ust_reg_wait_node *wait_node = NULL, *tmp_wait_node;
1632 struct ust_reg_wait_queue wait_queue = {
1633 .count = 0,
1634 };
1635
1636 rcu_register_thread();
1637
1638 health_register(health_sessiond, HEALTH_SESSIOND_TYPE_APP_REG_DISPATCH);
1639
1640 if (testpoint(sessiond_thread_app_reg_dispatch)) {
1641 goto error_testpoint;
1642 }
1643
1644 health_code_update();
1645
1646 CDS_INIT_LIST_HEAD(&wait_queue.head);
1647
1648 DBG("[thread] Dispatch UST command started");
1649
1650 for (;;) {
1651 health_code_update();
1652
1653 /* Atomically prepare the queue futex */
1654 futex_nto1_prepare(&ust_cmd_queue.futex);
1655
1656 if (CMM_LOAD_SHARED(dispatch_thread_exit)) {
1657 break;
1658 }
1659
1660 do {
1661 struct ust_app *app = NULL;
1662 ust_cmd = NULL;
1663
1664 /*
1665 * Make sure we don't have node(s) that have hung up before receiving
1666 * the notify socket. This is to clean the list in order to avoid
1667 * memory leaks from notify socket that are never seen.
1668 */
1669 sanitize_wait_queue(&wait_queue);
1670
1671 health_code_update();
1672 /* Dequeue command for registration */
1673 node = cds_wfcq_dequeue_blocking(&ust_cmd_queue.head, &ust_cmd_queue.tail);
1674 if (node == NULL) {
1675 DBG("Woken up but nothing in the UST command queue");
1676 /* Continue thread execution */
1677 break;
1678 }
1679
1680 ust_cmd = caa_container_of(node, struct ust_command, node);
1681
1682 DBG("Dispatching UST registration pid:%d ppid:%d uid:%d"
1683 " gid:%d sock:%d name:%s (version %d.%d)",
1684 ust_cmd->reg_msg.pid, ust_cmd->reg_msg.ppid,
1685 ust_cmd->reg_msg.uid, ust_cmd->reg_msg.gid,
1686 ust_cmd->sock, ust_cmd->reg_msg.name,
1687 ust_cmd->reg_msg.major, ust_cmd->reg_msg.minor);
1688
1689 if (ust_cmd->reg_msg.type == USTCTL_SOCKET_CMD) {
1690 wait_node = zmalloc(sizeof(*wait_node));
1691 if (!wait_node) {
1692 PERROR("zmalloc wait_node dispatch");
1693 ret = close(ust_cmd->sock);
1694 if (ret < 0) {
1695 PERROR("close ust sock dispatch %d", ust_cmd->sock);
1696 }
1697 lttng_fd_put(LTTNG_FD_APPS, 1);
1698 free(ust_cmd);
1699 goto error;
1700 }
1701 CDS_INIT_LIST_HEAD(&wait_node->head);
1702
1703 /* Create application object if socket is CMD. */
1704 wait_node->app = ust_app_create(&ust_cmd->reg_msg,
1705 ust_cmd->sock);
1706 if (!wait_node->app) {
1707 ret = close(ust_cmd->sock);
1708 if (ret < 0) {
1709 PERROR("close ust sock dispatch %d", ust_cmd->sock);
1710 }
1711 lttng_fd_put(LTTNG_FD_APPS, 1);
1712 free(wait_node);
1713 free(ust_cmd);
1714 continue;
1715 }
1716 /*
1717 * Add application to the wait queue so we can set the notify
1718 * socket before putting this object in the global ht.
1719 */
1720 cds_list_add(&wait_node->head, &wait_queue.head);
1721 wait_queue.count++;
1722
1723 free(ust_cmd);
1724 /*
1725 * We have to continue here since we don't have the notify
1726 * socket and the application MUST be added to the hash table
1727 * only at that moment.
1728 */
1729 continue;
1730 } else {
1731 /*
1732 * Look for the application in the local wait queue and set the
1733 * notify socket if found.
1734 */
1735 cds_list_for_each_entry_safe(wait_node, tmp_wait_node,
1736 &wait_queue.head, head) {
1737 health_code_update();
1738 if (wait_node->app->pid == ust_cmd->reg_msg.pid) {
1739 wait_node->app->notify_sock = ust_cmd->sock;
1740 cds_list_del(&wait_node->head);
1741 wait_queue.count--;
1742 app = wait_node->app;
1743 free(wait_node);
1744 DBG3("UST app notify socket %d is set", ust_cmd->sock);
1745 break;
1746 }
1747 }
1748
1749 /*
1750 * With no application at this stage the received socket is
1751 * basically useless so close it before we free the cmd data
1752 * structure for good.
1753 */
1754 if (!app) {
1755 ret = close(ust_cmd->sock);
1756 if (ret < 0) {
1757 PERROR("close ust sock dispatch %d", ust_cmd->sock);
1758 }
1759 lttng_fd_put(LTTNG_FD_APPS, 1);
1760 }
1761 free(ust_cmd);
1762 }
1763
1764 if (app) {
1765 /*
1766 * @session_lock_list
1767 *
1768 * Lock the global session list so from the register up to the
1769 * registration done message, no thread can see the application
1770 * and change its state.
1771 */
1772 session_lock_list();
1773 rcu_read_lock();
1774
1775 /*
1776 * Add application to the global hash table. This needs to be
1777 * done before the update to the UST registry can locate the
1778 * application.
1779 */
1780 ust_app_add(app);
1781
1782 /* Set app version. This call will print an error if needed. */
1783 (void) ust_app_version(app);
1784
1785 /* Send notify socket through the notify pipe. */
1786 ret = send_socket_to_thread(apps_cmd_notify_pipe[1],
1787 app->notify_sock);
1788 if (ret < 0) {
1789 rcu_read_unlock();
1790 session_unlock_list();
1791 /*
1792 * No notify thread, stop the UST tracing. However, this is
1793 * not an internal error of the this thread thus setting
1794 * the health error code to a normal exit.
1795 */
1796 err = 0;
1797 goto error;
1798 }
1799
1800 /*
1801 * Update newly registered application with the tracing
1802 * registry info already enabled information.
1803 */
1804 update_ust_app(app->sock);
1805
1806 /*
1807 * Don't care about return value. Let the manage apps threads
1808 * handle app unregistration upon socket close.
1809 */
1810 (void) ust_app_register_done(app);
1811
1812 /*
1813 * Even if the application socket has been closed, send the app
1814 * to the thread and unregistration will take place at that
1815 * place.
1816 */
1817 ret = send_socket_to_thread(apps_cmd_pipe[1], app->sock);
1818 if (ret < 0) {
1819 rcu_read_unlock();
1820 session_unlock_list();
1821 /*
1822 * No apps. thread, stop the UST tracing. However, this is
1823 * not an internal error of the this thread thus setting
1824 * the health error code to a normal exit.
1825 */
1826 err = 0;
1827 goto error;
1828 }
1829
1830 rcu_read_unlock();
1831 session_unlock_list();
1832 }
1833 } while (node != NULL);
1834
1835 health_poll_entry();
1836 /* Futex wait on queue. Blocking call on futex() */
1837 futex_nto1_wait(&ust_cmd_queue.futex);
1838 health_poll_exit();
1839 }
1840 /* Normal exit, no error */
1841 err = 0;
1842
1843 error:
1844 /* Clean up wait queue. */
1845 cds_list_for_each_entry_safe(wait_node, tmp_wait_node,
1846 &wait_queue.head, head) {
1847 cds_list_del(&wait_node->head);
1848 wait_queue.count--;
1849 free(wait_node);
1850 }
1851
1852 /* Empty command queue. */
1853 for (;;) {
1854 /* Dequeue command for registration */
1855 node = cds_wfcq_dequeue_blocking(&ust_cmd_queue.head, &ust_cmd_queue.tail);
1856 if (node == NULL) {
1857 break;
1858 }
1859 ust_cmd = caa_container_of(node, struct ust_command, node);
1860 ret = close(ust_cmd->sock);
1861 if (ret < 0) {
1862 PERROR("close ust sock exit dispatch %d", ust_cmd->sock);
1863 }
1864 lttng_fd_put(LTTNG_FD_APPS, 1);
1865 free(ust_cmd);
1866 }
1867
1868 error_testpoint:
1869 DBG("Dispatch thread dying");
1870 if (err) {
1871 health_error();
1872 ERR("Health error occurred in %s", __func__);
1873 }
1874 health_unregister(health_sessiond);
1875 rcu_unregister_thread();
1876 return NULL;
1877 }
1878
1879 /*
1880 * This thread manage application registration.
1881 */
1882 static void *thread_registration_apps(void *data)
1883 {
1884 int sock = -1, i, ret, pollfd, err = -1;
1885 uint32_t revents, nb_fd;
1886 struct lttng_poll_event events;
1887 /*
1888 * Get allocated in this thread, enqueued to a global queue, dequeued and
1889 * freed in the manage apps thread.
1890 */
1891 struct ust_command *ust_cmd = NULL;
1892
1893 DBG("[thread] Manage application registration started");
1894
1895 health_register(health_sessiond, HEALTH_SESSIOND_TYPE_APP_REG);
1896
1897 if (testpoint(sessiond_thread_registration_apps)) {
1898 goto error_testpoint;
1899 }
1900
1901 ret = lttcomm_listen_unix_sock(apps_sock);
1902 if (ret < 0) {
1903 goto error_listen;
1904 }
1905
1906 /*
1907 * Pass 2 as size here for the thread quit pipe and apps socket. Nothing
1908 * more will be added to this poll set.
1909 */
1910 ret = sessiond_set_thread_pollset(&events, 2);
1911 if (ret < 0) {
1912 goto error_create_poll;
1913 }
1914
1915 /* Add the application registration socket */
1916 ret = lttng_poll_add(&events, apps_sock, LPOLLIN | LPOLLRDHUP);
1917 if (ret < 0) {
1918 goto error_poll_add;
1919 }
1920
1921 /* Notify all applications to register */
1922 ret = notify_ust_apps(1);
1923 if (ret < 0) {
1924 ERR("Failed to notify applications or create the wait shared memory.\n"
1925 "Execution continues but there might be problem for already\n"
1926 "running applications that wishes to register.");
1927 }
1928
1929 while (1) {
1930 DBG("Accepting application registration");
1931
1932 /* Inifinite blocking call, waiting for transmission */
1933 restart:
1934 health_poll_entry();
1935 ret = lttng_poll_wait(&events, -1);
1936 health_poll_exit();
1937 if (ret < 0) {
1938 /*
1939 * Restart interrupted system call.
1940 */
1941 if (errno == EINTR) {
1942 goto restart;
1943 }
1944 goto error;
1945 }
1946
1947 nb_fd = ret;
1948
1949 for (i = 0; i < nb_fd; i++) {
1950 health_code_update();
1951
1952 /* Fetch once the poll data */
1953 revents = LTTNG_POLL_GETEV(&events, i);
1954 pollfd = LTTNG_POLL_GETFD(&events, i);
1955
1956 if (!revents) {
1957 /* No activity for this FD (poll implementation). */
1958 continue;
1959 }
1960
1961 /* Thread quit pipe has been closed. Killing thread. */
1962 ret = sessiond_check_thread_quit_pipe(pollfd, revents);
1963 if (ret) {
1964 err = 0;
1965 goto exit;
1966 }
1967
1968 /* Event on the registration socket */
1969 if (pollfd == apps_sock) {
1970 if (revents & LPOLLIN) {
1971 sock = lttcomm_accept_unix_sock(apps_sock);
1972 if (sock < 0) {
1973 goto error;
1974 }
1975
1976 /*
1977 * Set socket timeout for both receiving and ending.
1978 * app_socket_timeout is in seconds, whereas
1979 * lttcomm_setsockopt_rcv_timeout and
1980 * lttcomm_setsockopt_snd_timeout expect msec as
1981 * parameter.
1982 */
1983 if (config.app_socket_timeout >= 0) {
1984 (void) lttcomm_setsockopt_rcv_timeout(sock,
1985 config.app_socket_timeout * 1000);
1986 (void) lttcomm_setsockopt_snd_timeout(sock,
1987 config.app_socket_timeout * 1000);
1988 }
1989
1990 /*
1991 * Set the CLOEXEC flag. Return code is useless because
1992 * either way, the show must go on.
1993 */
1994 (void) utils_set_fd_cloexec(sock);
1995
1996 /* Create UST registration command for enqueuing */
1997 ust_cmd = zmalloc(sizeof(struct ust_command));
1998 if (ust_cmd == NULL) {
1999 PERROR("ust command zmalloc");
2000 ret = close(sock);
2001 if (ret) {
2002 PERROR("close");
2003 }
2004 goto error;
2005 }
2006
2007 /*
2008 * Using message-based transmissions to ensure we don't
2009 * have to deal with partially received messages.
2010 */
2011 ret = lttng_fd_get(LTTNG_FD_APPS, 1);
2012 if (ret < 0) {
2013 ERR("Exhausted file descriptors allowed for applications.");
2014 free(ust_cmd);
2015 ret = close(sock);
2016 if (ret) {
2017 PERROR("close");
2018 }
2019 sock = -1;
2020 continue;
2021 }
2022
2023 health_code_update();
2024 ret = ust_app_recv_registration(sock, &ust_cmd->reg_msg);
2025 if (ret < 0) {
2026 free(ust_cmd);
2027 /* Close socket of the application. */
2028 ret = close(sock);
2029 if (ret) {
2030 PERROR("close");
2031 }
2032 lttng_fd_put(LTTNG_FD_APPS, 1);
2033 sock = -1;
2034 continue;
2035 }
2036 health_code_update();
2037
2038 ust_cmd->sock = sock;
2039 sock = -1;
2040
2041 DBG("UST registration received with pid:%d ppid:%d uid:%d"
2042 " gid:%d sock:%d name:%s (version %d.%d)",
2043 ust_cmd->reg_msg.pid, ust_cmd->reg_msg.ppid,
2044 ust_cmd->reg_msg.uid, ust_cmd->reg_msg.gid,
2045 ust_cmd->sock, ust_cmd->reg_msg.name,
2046 ust_cmd->reg_msg.major, ust_cmd->reg_msg.minor);
2047
2048 /*
2049 * Lock free enqueue the registration request. The red pill
2050 * has been taken! This apps will be part of the *system*.
2051 */
2052 cds_wfcq_enqueue(&ust_cmd_queue.head, &ust_cmd_queue.tail, &ust_cmd->node);
2053
2054 /*
2055 * Wake the registration queue futex. Implicit memory
2056 * barrier with the exchange in cds_wfcq_enqueue.
2057 */
2058 futex_nto1_wake(&ust_cmd_queue.futex);
2059 } else if (revents & (LPOLLERR | LPOLLHUP | LPOLLRDHUP)) {
2060 ERR("Register apps socket poll error");
2061 goto error;
2062 } else {
2063 ERR("Unexpected poll events %u for sock %d", revents, pollfd);
2064 goto error;
2065 }
2066 }
2067 }
2068 }
2069
2070 exit:
2071 error:
2072 /* Notify that the registration thread is gone */
2073 notify_ust_apps(0);
2074
2075 if (apps_sock >= 0) {
2076 ret = close(apps_sock);
2077 if (ret) {
2078 PERROR("close");
2079 }
2080 }
2081 if (sock >= 0) {
2082 ret = close(sock);
2083 if (ret) {
2084 PERROR("close");
2085 }
2086 lttng_fd_put(LTTNG_FD_APPS, 1);
2087 }
2088 unlink(config.apps_unix_sock_path.value);
2089
2090 error_poll_add:
2091 lttng_poll_clean(&events);
2092 error_listen:
2093 error_create_poll:
2094 error_testpoint:
2095 DBG("UST Registration thread cleanup complete");
2096 if (err) {
2097 health_error();
2098 ERR("Health error occurred in %s", __func__);
2099 }
2100 health_unregister(health_sessiond);
2101
2102 return NULL;
2103 }
2104
2105 /*
2106 * Start the thread_manage_consumer. This must be done after a lttng-consumerd
2107 * exec or it will fails.
2108 */
2109 static int spawn_consumer_thread(struct consumer_data *consumer_data)
2110 {
2111 int ret, clock_ret;
2112 struct timespec timeout;
2113
2114 /*
2115 * Make sure we set the readiness flag to 0 because we are NOT ready.
2116 * This access to consumer_thread_is_ready does not need to be
2117 * protected by consumer_data.cond_mutex (yet) since the consumer
2118 * management thread has not been started at this point.
2119 */
2120 consumer_data->consumer_thread_is_ready = 0;
2121
2122 /* Setup pthread condition */
2123 ret = pthread_condattr_init(&consumer_data->condattr);
2124 if (ret) {
2125 errno = ret;
2126 PERROR("pthread_condattr_init consumer data");
2127 goto error;
2128 }
2129
2130 /*
2131 * Set the monotonic clock in order to make sure we DO NOT jump in time
2132 * between the clock_gettime() call and the timedwait call. See bug #324
2133 * for a more details and how we noticed it.
2134 */
2135 ret = pthread_condattr_setclock(&consumer_data->condattr, CLOCK_MONOTONIC);
2136 if (ret) {
2137 errno = ret;
2138 PERROR("pthread_condattr_setclock consumer data");
2139 goto error;
2140 }
2141
2142 ret = pthread_cond_init(&consumer_data->cond, &consumer_data->condattr);
2143 if (ret) {
2144 errno = ret;
2145 PERROR("pthread_cond_init consumer data");
2146 goto error;
2147 }
2148
2149 ret = pthread_create(&consumer_data->thread, default_pthread_attr(),
2150 thread_manage_consumer, consumer_data);
2151 if (ret) {
2152 errno = ret;
2153 PERROR("pthread_create consumer");
2154 ret = -1;
2155 goto error;
2156 }
2157
2158 /* We are about to wait on a pthread condition */
2159 pthread_mutex_lock(&consumer_data->cond_mutex);
2160
2161 /* Get time for sem_timedwait absolute timeout */
2162 clock_ret = lttng_clock_gettime(CLOCK_MONOTONIC, &timeout);
2163 /*
2164 * Set the timeout for the condition timed wait even if the clock gettime
2165 * call fails since we might loop on that call and we want to avoid to
2166 * increment the timeout too many times.
2167 */
2168 timeout.tv_sec += DEFAULT_SEM_WAIT_TIMEOUT;
2169
2170 /*
2171 * The following loop COULD be skipped in some conditions so this is why we
2172 * set ret to 0 in order to make sure at least one round of the loop is
2173 * done.
2174 */
2175 ret = 0;
2176
2177 /*
2178 * Loop until the condition is reached or when a timeout is reached. Note
2179 * that the pthread_cond_timedwait(P) man page specifies that EINTR can NOT
2180 * be returned but the pthread_cond(3), from the glibc-doc, says that it is
2181 * possible. This loop does not take any chances and works with both of
2182 * them.
2183 */
2184 while (!consumer_data->consumer_thread_is_ready && ret != ETIMEDOUT) {
2185 if (clock_ret < 0) {
2186 PERROR("clock_gettime spawn consumer");
2187 /* Infinite wait for the consumerd thread to be ready */
2188 ret = pthread_cond_wait(&consumer_data->cond,
2189 &consumer_data->cond_mutex);
2190 } else {
2191 ret = pthread_cond_timedwait(&consumer_data->cond,
2192 &consumer_data->cond_mutex, &timeout);
2193 }
2194 }
2195
2196 /* Release the pthread condition */
2197 pthread_mutex_unlock(&consumer_data->cond_mutex);
2198
2199 if (ret != 0) {
2200 errno = ret;
2201 if (ret == ETIMEDOUT) {
2202 int pth_ret;
2203
2204 /*
2205 * Call has timed out so we kill the kconsumerd_thread and return
2206 * an error.
2207 */
2208 ERR("Condition timed out. The consumer thread was never ready."
2209 " Killing it");
2210 pth_ret = pthread_cancel(consumer_data->thread);
2211 if (pth_ret < 0) {
2212 PERROR("pthread_cancel consumer thread");
2213 }
2214 } else {
2215 PERROR("pthread_cond_wait failed consumer thread");
2216 }
2217 /* Caller is expecting a negative value on failure. */
2218 ret = -1;
2219 goto error;
2220 }
2221
2222 pthread_mutex_lock(&consumer_data->pid_mutex);
2223 if (consumer_data->pid == 0) {
2224 ERR("Consumerd did not start");
2225 pthread_mutex_unlock(&consumer_data->pid_mutex);
2226 goto error;
2227 }
2228 pthread_mutex_unlock(&consumer_data->pid_mutex);
2229
2230 return 0;
2231
2232 error:
2233 return ret;
2234 }
2235
2236 /*
2237 * Join consumer thread
2238 */
2239 static int join_consumer_thread(struct consumer_data *consumer_data)
2240 {
2241 void *status;
2242
2243 /* Consumer pid must be a real one. */
2244 if (consumer_data->pid > 0) {
2245 int ret;
2246 ret = kill(consumer_data->pid, SIGTERM);
2247 if (ret) {
2248 PERROR("Error killing consumer daemon");
2249 return ret;
2250 }
2251 return pthread_join(consumer_data->thread, &status);
2252 } else {
2253 return 0;
2254 }
2255 }
2256
2257 /*
2258 * Fork and exec a consumer daemon (consumerd).
2259 *
2260 * Return pid if successful else -1.
2261 */
2262 static pid_t spawn_consumerd(struct consumer_data *consumer_data)
2263 {
2264 int ret;
2265 pid_t pid;
2266 const char *consumer_to_use;
2267 const char *verbosity;
2268 struct stat st;
2269
2270 DBG("Spawning consumerd");
2271
2272 pid = fork();
2273 if (pid == 0) {
2274 /*
2275 * Exec consumerd.
2276 */
2277 if (config.verbose_consumer) {
2278 verbosity = "--verbose";
2279 } else if (lttng_opt_quiet) {
2280 verbosity = "--quiet";
2281 } else {
2282 verbosity = "";
2283 }
2284
2285 switch (consumer_data->type) {
2286 case LTTNG_CONSUMER_KERNEL:
2287 /*
2288 * Find out which consumerd to execute. We will first try the
2289 * 64-bit path, then the sessiond's installation directory, and
2290 * fallback on the 32-bit one,
2291 */
2292 DBG3("Looking for a kernel consumer at these locations:");
2293 DBG3(" 1) %s", config.consumerd64_bin_path.value ? : "NULL");
2294 DBG3(" 2) %s/%s", INSTALL_BIN_PATH, DEFAULT_CONSUMERD_FILE);
2295 DBG3(" 3) %s", config.consumerd32_bin_path.value ? : "NULL");
2296 if (stat(config.consumerd64_bin_path.value, &st) == 0) {
2297 DBG3("Found location #1");
2298 consumer_to_use = config.consumerd64_bin_path.value;
2299 } else if (stat(INSTALL_BIN_PATH "/" DEFAULT_CONSUMERD_FILE, &st) == 0) {
2300 DBG3("Found location #2");
2301 consumer_to_use = INSTALL_BIN_PATH "/" DEFAULT_CONSUMERD_FILE;
2302 } else if (config.consumerd32_bin_path.value &&
2303 stat(config.consumerd32_bin_path.value, &st) == 0) {
2304 DBG3("Found location #3");
2305 consumer_to_use = config.consumerd32_bin_path.value;
2306 } else {
2307 DBG("Could not find any valid consumerd executable");
2308 ret = -EINVAL;
2309 goto error;
2310 }
2311 DBG("Using kernel consumer at: %s", consumer_to_use);
2312 (void) execl(consumer_to_use,
2313 "lttng-consumerd", verbosity, "-k",
2314 "--consumerd-cmd-sock", consumer_data->cmd_unix_sock_path,
2315 "--consumerd-err-sock", consumer_data->err_unix_sock_path,
2316 "--group", config.tracing_group_name.value,
2317 NULL);
2318 break;
2319 case LTTNG_CONSUMER64_UST:
2320 {
2321 if (config.consumerd64_lib_dir.value) {
2322 char *tmp;
2323 size_t tmplen;
2324 char *tmpnew;
2325
2326 tmp = lttng_secure_getenv("LD_LIBRARY_PATH");
2327 if (!tmp) {
2328 tmp = "";
2329 }
2330 tmplen = strlen(config.consumerd64_lib_dir.value) + 1 /* : */ + strlen(tmp);
2331 tmpnew = zmalloc(tmplen + 1 /* \0 */);
2332 if (!tmpnew) {
2333 ret = -ENOMEM;
2334 goto error;
2335 }
2336 strcat(tmpnew, config.consumerd64_lib_dir.value);
2337 if (tmp[0] != '\0') {
2338 strcat(tmpnew, ":");
2339 strcat(tmpnew, tmp);
2340 }
2341 ret = setenv("LD_LIBRARY_PATH", tmpnew, 1);
2342 free(tmpnew);
2343 if (ret) {
2344 ret = -errno;
2345 goto error;
2346 }
2347 }
2348 DBG("Using 64-bit UST consumer at: %s", config.consumerd64_bin_path.value);
2349 (void) execl(config.consumerd64_bin_path.value, "lttng-consumerd", verbosity, "-u",
2350 "--consumerd-cmd-sock", consumer_data->cmd_unix_sock_path,
2351 "--consumerd-err-sock", consumer_data->err_unix_sock_path,
2352 "--group", config.tracing_group_name.value,
2353 NULL);
2354 break;
2355 }
2356 case LTTNG_CONSUMER32_UST:
2357 {
2358 if (config.consumerd32_lib_dir.value) {
2359 char *tmp;
2360 size_t tmplen;
2361 char *tmpnew;
2362
2363 tmp = lttng_secure_getenv("LD_LIBRARY_PATH");
2364 if (!tmp) {
2365 tmp = "";
2366 }
2367 tmplen = strlen(config.consumerd32_lib_dir.value) + 1 /* : */ + strlen(tmp);
2368 tmpnew = zmalloc(tmplen + 1 /* \0 */);
2369 if (!tmpnew) {
2370 ret = -ENOMEM;
2371 goto error;
2372 }
2373 strcat(tmpnew, config.consumerd32_lib_dir.value);
2374 if (tmp[0] != '\0') {
2375 strcat(tmpnew, ":");
2376 strcat(tmpnew, tmp);
2377 }
2378 ret = setenv("LD_LIBRARY_PATH", tmpnew, 1);
2379 free(tmpnew);
2380 if (ret) {
2381 ret = -errno;
2382 goto error;
2383 }
2384 }
2385 DBG("Using 32-bit UST consumer at: %s", config.consumerd32_bin_path.value);
2386 (void) execl(config.consumerd32_bin_path.value, "lttng-consumerd", verbosity, "-u",
2387 "--consumerd-cmd-sock", consumer_data->cmd_unix_sock_path,
2388 "--consumerd-err-sock", consumer_data->err_unix_sock_path,
2389 "--group", config.tracing_group_name.value,
2390 NULL);
2391 break;
2392 }
2393 default:
2394 ERR("unknown consumer type");
2395 errno = 0;
2396 }
2397 if (errno != 0) {
2398 PERROR("Consumer execl()");
2399 }
2400 /* Reaching this point, we got a failure on our execl(). */
2401 exit(EXIT_FAILURE);
2402 } else if (pid > 0) {
2403 ret = pid;
2404 } else {
2405 PERROR("start consumer fork");
2406 ret = -errno;
2407 }
2408 error:
2409 return ret;
2410 }
2411
2412 /*
2413 * Spawn the consumerd daemon and session daemon thread.
2414 */
2415 static int start_consumerd(struct consumer_data *consumer_data)
2416 {
2417 int ret;
2418
2419 /*
2420 * Set the listen() state on the socket since there is a possible race
2421 * between the exec() of the consumer daemon and this call if place in the
2422 * consumer thread. See bug #366 for more details.
2423 */
2424 ret = lttcomm_listen_unix_sock(consumer_data->err_sock);
2425 if (ret < 0) {
2426 goto error;
2427 }
2428
2429 pthread_mutex_lock(&consumer_data->pid_mutex);
2430 if (consumer_data->pid != 0) {
2431 pthread_mutex_unlock(&consumer_data->pid_mutex);
2432 goto end;
2433 }
2434
2435 ret = spawn_consumerd(consumer_data);
2436 if (ret < 0) {
2437 ERR("Spawning consumerd failed");
2438 pthread_mutex_unlock(&consumer_data->pid_mutex);
2439 goto error;
2440 }
2441
2442 /* Setting up the consumer_data pid */
2443 consumer_data->pid = ret;
2444 DBG2("Consumer pid %d", consumer_data->pid);
2445 pthread_mutex_unlock(&consumer_data->pid_mutex);
2446
2447 DBG2("Spawning consumer control thread");
2448 ret = spawn_consumer_thread(consumer_data);
2449 if (ret < 0) {
2450 ERR("Fatal error spawning consumer control thread");
2451 goto error;
2452 }
2453
2454 end:
2455 return 0;
2456
2457 error:
2458 /* Cleanup already created sockets on error. */
2459 if (consumer_data->err_sock >= 0) {
2460 int err;
2461
2462 err = close(consumer_data->err_sock);
2463 if (err < 0) {
2464 PERROR("close consumer data error socket");
2465 }
2466 }
2467 return ret;
2468 }
2469
2470 /*
2471 * Setup necessary data for kernel tracer action.
2472 */
2473 static int init_kernel_tracer(void)
2474 {
2475 int ret;
2476
2477 /* Modprobe lttng kernel modules */
2478 ret = modprobe_lttng_control();
2479 if (ret < 0) {
2480 goto error;
2481 }
2482
2483 /* Open debugfs lttng */
2484 kernel_tracer_fd = open(module_proc_lttng, O_RDWR);
2485 if (kernel_tracer_fd < 0) {
2486 DBG("Failed to open %s", module_proc_lttng);
2487 goto error_open;
2488 }
2489
2490 /* Validate kernel version */
2491 ret = kernel_validate_version(kernel_tracer_fd, &kernel_tracer_version,
2492 &kernel_tracer_abi_version);
2493 if (ret < 0) {
2494 goto error_version;
2495 }
2496
2497 ret = modprobe_lttng_data();
2498 if (ret < 0) {
2499 goto error_modules;
2500 }
2501
2502 ret = kernel_supports_ring_buffer_snapshot_sample_positions(
2503 kernel_tracer_fd);
2504 if (ret < 0) {
2505 goto error_modules;
2506 }
2507
2508 if (ret < 1) {
2509 WARN("Kernel tracer does not support buffer monitoring. "
2510 "The monitoring timer of channels in the kernel domain "
2511 "will be set to 0 (disabled).");
2512 }
2513
2514 DBG("Kernel tracer fd %d", kernel_tracer_fd);
2515 return 0;
2516
2517 error_version:
2518 modprobe_remove_lttng_control();
2519 ret = close(kernel_tracer_fd);
2520 if (ret) {
2521 PERROR("close");
2522 }
2523 kernel_tracer_fd = -1;
2524 return LTTNG_ERR_KERN_VERSION;
2525
2526 error_modules:
2527 ret = close(kernel_tracer_fd);
2528 if (ret) {
2529 PERROR("close");
2530 }
2531
2532 error_open:
2533 modprobe_remove_lttng_control();
2534
2535 error:
2536 WARN("No kernel tracer available");
2537 kernel_tracer_fd = -1;
2538 if (!is_root) {
2539 return LTTNG_ERR_NEED_ROOT_SESSIOND;
2540 } else {
2541 return LTTNG_ERR_KERN_NA;
2542 }
2543 }
2544
2545
2546 /*
2547 * Copy consumer output from the tracing session to the domain session. The
2548 * function also applies the right modification on a per domain basis for the
2549 * trace files destination directory.
2550 *
2551 * Should *NOT* be called with RCU read-side lock held.
2552 */
2553 static int copy_session_consumer(int domain, struct ltt_session *session)
2554 {
2555 int ret;
2556 const char *dir_name;
2557 struct consumer_output *consumer;
2558
2559 assert(session);
2560 assert(session->consumer);
2561
2562 switch (domain) {
2563 case LTTNG_DOMAIN_KERNEL:
2564 DBG3("Copying tracing session consumer output in kernel session");
2565 /*
2566 * XXX: We should audit the session creation and what this function
2567 * does "extra" in order to avoid a destroy since this function is used
2568 * in the domain session creation (kernel and ust) only. Same for UST
2569 * domain.
2570 */
2571 if (session->kernel_session->consumer) {
2572 consumer_output_put(session->kernel_session->consumer);
2573 }
2574 session->kernel_session->consumer =
2575 consumer_copy_output(session->consumer);
2576 /* Ease our life a bit for the next part */
2577 consumer = session->kernel_session->consumer;
2578 dir_name = DEFAULT_KERNEL_TRACE_DIR;
2579 break;
2580 case LTTNG_DOMAIN_JUL:
2581 case LTTNG_DOMAIN_LOG4J:
2582 case LTTNG_DOMAIN_PYTHON:
2583 case LTTNG_DOMAIN_UST:
2584 DBG3("Copying tracing session consumer output in UST session");
2585 if (session->ust_session->consumer) {
2586 consumer_output_put(session->ust_session->consumer);
2587 }
2588 session->ust_session->consumer =
2589 consumer_copy_output(session->consumer);
2590 /* Ease our life a bit for the next part */
2591 consumer = session->ust_session->consumer;
2592 dir_name = DEFAULT_UST_TRACE_DIR;
2593 break;
2594 default:
2595 ret = LTTNG_ERR_UNKNOWN_DOMAIN;
2596 goto error;
2597 }
2598
2599 /* Append correct directory to subdir */
2600 strncat(consumer->subdir, dir_name,
2601 sizeof(consumer->subdir) - strlen(consumer->subdir) - 1);
2602 DBG3("Copy session consumer subdir %s", consumer->subdir);
2603
2604 ret = LTTNG_OK;
2605
2606 error:
2607 return ret;
2608 }
2609
2610 /*
2611 * Create an UST session and add it to the session ust list.
2612 *
2613 * Should *NOT* be called with RCU read-side lock held.
2614 */
2615 static int create_ust_session(struct ltt_session *session,
2616 struct lttng_domain *domain)
2617 {
2618 int ret;
2619 struct ltt_ust_session *lus = NULL;
2620
2621 assert(session);
2622 assert(domain);
2623 assert(session->consumer);
2624
2625 switch (domain->type) {
2626 case LTTNG_DOMAIN_JUL:
2627 case LTTNG_DOMAIN_LOG4J:
2628 case LTTNG_DOMAIN_PYTHON:
2629 case LTTNG_DOMAIN_UST:
2630 break;
2631 default:
2632 ERR("Unknown UST domain on create session %d", domain->type);
2633 ret = LTTNG_ERR_UNKNOWN_DOMAIN;
2634 goto error;
2635 }
2636
2637 DBG("Creating UST session");
2638
2639 lus = trace_ust_create_session(session->id);
2640 if (lus == NULL) {
2641 ret = LTTNG_ERR_UST_SESS_FAIL;
2642 goto error;
2643 }
2644
2645 lus->uid = session->uid;
2646 lus->gid = session->gid;
2647 lus->output_traces = session->output_traces;
2648 lus->snapshot_mode = session->snapshot_mode;
2649 lus->live_timer_interval = session->live_timer;
2650 session->ust_session = lus;
2651 if (session->shm_path[0]) {
2652 strncpy(lus->root_shm_path, session->shm_path,
2653 sizeof(lus->root_shm_path));
2654 lus->root_shm_path[sizeof(lus->root_shm_path) - 1] = '\0';
2655 strncpy(lus->shm_path, session->shm_path,
2656 sizeof(lus->shm_path));
2657 lus->shm_path[sizeof(lus->shm_path) - 1] = '\0';
2658 strncat(lus->shm_path, "/ust",
2659 sizeof(lus->shm_path) - strlen(lus->shm_path) - 1);
2660 }
2661 /* Copy session output to the newly created UST session */
2662 ret = copy_session_consumer(domain->type, session);
2663 if (ret != LTTNG_OK) {
2664 goto error;
2665 }
2666
2667 return LTTNG_OK;
2668
2669 error:
2670 free(lus);
2671 session->ust_session = NULL;
2672 return ret;
2673 }
2674
2675 /*
2676 * Create a kernel tracer session then create the default channel.
2677 */
2678 static int create_kernel_session(struct ltt_session *session)
2679 {
2680 int ret;
2681
2682 DBG("Creating kernel session");
2683
2684 ret = kernel_create_session(session, kernel_tracer_fd);
2685 if (ret < 0) {
2686 ret = LTTNG_ERR_KERN_SESS_FAIL;
2687 goto error;
2688 }
2689
2690 /* Code flow safety */
2691 assert(session->kernel_session);
2692
2693 /* Copy session output to the newly created Kernel session */
2694 ret = copy_session_consumer(LTTNG_DOMAIN_KERNEL, session);
2695 if (ret != LTTNG_OK) {
2696 goto error;
2697 }
2698
2699 session->kernel_session->uid = session->uid;
2700 session->kernel_session->gid = session->gid;
2701 session->kernel_session->output_traces = session->output_traces;
2702 session->kernel_session->snapshot_mode = session->snapshot_mode;
2703
2704 return LTTNG_OK;
2705
2706 error:
2707 trace_kernel_destroy_session(session->kernel_session);
2708 session->kernel_session = NULL;
2709 return ret;
2710 }
2711
2712 /*
2713 * Count number of session permitted by uid/gid.
2714 */
2715 static unsigned int lttng_sessions_count(uid_t uid, gid_t gid)
2716 {
2717 unsigned int i = 0;
2718 struct ltt_session *session;
2719
2720 DBG("Counting number of available session for UID %d GID %d",
2721 uid, gid);
2722 cds_list_for_each_entry(session, &session_list_ptr->head, list) {
2723 /*
2724 * Only list the sessions the user can control.
2725 */
2726 if (!session_access_ok(session, uid, gid)) {
2727 continue;
2728 }
2729 i++;
2730 }
2731 return i;
2732 }
2733
2734 static int receive_userspace_probe(struct command_ctx *cmd_ctx, int sock,
2735 int *sock_error, struct lttng_event *event)
2736 {
2737 int fd, ret;
2738 struct lttng_userspace_probe_location *probe_location;
2739 const struct lttng_userspace_probe_location_lookup_method *lookup = NULL;
2740 struct lttng_dynamic_buffer probe_location_buffer;
2741 struct lttng_buffer_view buffer_view;
2742
2743 /*
2744 * Create a buffer to store the serialized version of the probe
2745 * location.
2746 */
2747 lttng_dynamic_buffer_init(&probe_location_buffer);
2748 ret = lttng_dynamic_buffer_set_size(&probe_location_buffer,
2749 cmd_ctx->lsm->u.enable.userspace_probe_location_len);
2750 if (ret) {
2751 ret = LTTNG_ERR_NOMEM;
2752 goto error;
2753 }
2754
2755 /*
2756 * Receive the probe location.
2757 */
2758 ret = lttcomm_recv_unix_sock(sock, probe_location_buffer.data,
2759 probe_location_buffer.size);
2760 if (ret <= 0) {
2761 DBG("Nothing recv() from client var len data... continuing");
2762 *sock_error = 1;
2763 lttng_dynamic_buffer_reset(&probe_location_buffer);
2764 ret = LTTNG_ERR_PROBE_LOCATION_INVAL;
2765 goto error;
2766 }
2767
2768 buffer_view = lttng_buffer_view_from_dynamic_buffer(
2769 &probe_location_buffer, 0, probe_location_buffer.size);
2770
2771 /*
2772 * Extract the probe location from the serialized version.
2773 */
2774 ret = lttng_userspace_probe_location_create_from_buffer(
2775 &buffer_view, &probe_location);
2776 if (ret < 0) {
2777 WARN("Failed to create a userspace probe location from the received buffer");
2778 lttng_dynamic_buffer_reset( &probe_location_buffer);
2779 ret = LTTNG_ERR_PROBE_LOCATION_INVAL;
2780 goto error;
2781 }
2782
2783 /*
2784 * Receive the file descriptor to the target binary from the client.
2785 */
2786 DBG("Receiving userspace probe target FD from client ...");
2787 ret = lttcomm_recv_fds_unix_sock(sock, &fd, 1);
2788 if (ret <= 0) {
2789 DBG("Nothing recv() from client userspace probe fd... continuing");
2790 *sock_error = 1;
2791 ret = LTTNG_ERR_PROBE_LOCATION_INVAL;
2792 goto error;
2793 }
2794
2795 /*
2796 * Set the file descriptor received from the client through the unix
2797 * socket in the probe location.
2798 */
2799 lookup = lttng_userspace_probe_location_get_lookup_method(probe_location);
2800 if (!lookup) {
2801 ret = LTTNG_ERR_PROBE_LOCATION_INVAL;
2802 goto error;
2803 }
2804
2805 /*
2806 * From the kernel tracer's perspective, all userspace probe event types
2807 * are all the same: a file and an offset.
2808 */
2809 switch (lttng_userspace_probe_location_lookup_method_get_type(lookup)) {
2810 case LTTNG_USERSPACE_PROBE_LOCATION_LOOKUP_METHOD_TYPE_FUNCTION_ELF:
2811 ret = lttng_userspace_probe_location_function_set_binary_fd(
2812 probe_location, fd);
2813 break;
2814 case LTTNG_USERSPACE_PROBE_LOCATION_LOOKUP_METHOD_TYPE_TRACEPOINT_SDT:
2815 ret = lttng_userspace_probe_location_tracepoint_set_binary_fd(
2816 probe_location, fd);
2817 break;
2818 default:
2819 ret = LTTNG_ERR_PROBE_LOCATION_INVAL;
2820 goto error;
2821 }
2822
2823 if (ret) {
2824 ret = LTTNG_ERR_PROBE_LOCATION_INVAL;
2825 goto error;
2826 }
2827
2828 /* Attach the probe location to the event. */
2829 ret = lttng_event_set_userspace_probe_location(event, probe_location);
2830 if (ret) {
2831 ret = LTTNG_ERR_PROBE_LOCATION_INVAL;
2832 goto error;
2833 }
2834
2835 lttng_dynamic_buffer_reset(&probe_location_buffer);
2836 error:
2837 return ret;
2838 }
2839
2840 /*
2841 * Check if the current kernel tracer supports the session rotation feature.
2842 * Return 1 if it does, 0 otherwise.
2843 */
2844 static int check_rotate_compatible(void)
2845 {
2846 int ret = 1;
2847
2848 if (kernel_tracer_version.major != 2 || kernel_tracer_version.minor < 11) {
2849 DBG("Kernel tracer version is not compatible with the rotation feature");
2850 ret = 0;
2851 }
2852
2853 return ret;
2854 }
2855
2856 /*
2857 * Process the command requested by the lttng client within the command
2858 * context structure. This function make sure that the return structure (llm)
2859 * is set and ready for transmission before returning.
2860 *
2861 * Return any error encountered or 0 for success.
2862 *
2863 * "sock" is only used for special-case var. len data.
2864 *
2865 * Should *NOT* be called with RCU read-side lock held.
2866 */
2867 static int process_client_msg(struct command_ctx *cmd_ctx, int sock,
2868 int *sock_error)
2869 {
2870 int ret = LTTNG_OK;
2871 int need_tracing_session = 1;
2872 int need_domain;
2873
2874 DBG("Processing client command %d", cmd_ctx->lsm->cmd_type);
2875
2876 assert(!rcu_read_ongoing());
2877
2878 *sock_error = 0;
2879
2880 switch (cmd_ctx->lsm->cmd_type) {
2881 case LTTNG_CREATE_SESSION:
2882 case LTTNG_CREATE_SESSION_SNAPSHOT:
2883 case LTTNG_CREATE_SESSION_LIVE:
2884 case LTTNG_DESTROY_SESSION:
2885 case LTTNG_LIST_SESSIONS:
2886 case LTTNG_LIST_DOMAINS:
2887 case LTTNG_START_TRACE:
2888 case LTTNG_STOP_TRACE:
2889 case LTTNG_DATA_PENDING:
2890 case LTTNG_SNAPSHOT_ADD_OUTPUT:
2891 case LTTNG_SNAPSHOT_DEL_OUTPUT:
2892 case LTTNG_SNAPSHOT_LIST_OUTPUT:
2893 case LTTNG_SNAPSHOT_RECORD:
2894 case LTTNG_SAVE_SESSION:
2895 case LTTNG_SET_SESSION_SHM_PATH:
2896 case LTTNG_REGENERATE_METADATA:
2897 case LTTNG_REGENERATE_STATEDUMP:
2898 case LTTNG_REGISTER_TRIGGER:
2899 case LTTNG_UNREGISTER_TRIGGER:
2900 case LTTNG_ROTATE_SESSION:
2901 case LTTNG_ROTATION_GET_INFO:
2902 case LTTNG_ROTATION_SET_SCHEDULE:
2903 case LTTNG_SESSION_LIST_ROTATION_SCHEDULES:
2904 need_domain = 0;
2905 break;
2906 default:
2907 need_domain = 1;
2908 }
2909
2910 if (config.no_kernel && need_domain
2911 && cmd_ctx->lsm->domain.type == LTTNG_DOMAIN_KERNEL) {
2912 if (!is_root) {
2913 ret = LTTNG_ERR_NEED_ROOT_SESSIOND;
2914 } else {
2915 ret = LTTNG_ERR_KERN_NA;
2916 }
2917 goto error;
2918 }
2919
2920 /* Deny register consumer if we already have a spawned consumer. */
2921 if (cmd_ctx->lsm->cmd_type == LTTNG_REGISTER_CONSUMER) {
2922 pthread_mutex_lock(&kconsumer_data.pid_mutex);
2923 if (kconsumer_data.pid > 0) {
2924 ret = LTTNG_ERR_KERN_CONSUMER_FAIL;
2925 pthread_mutex_unlock(&kconsumer_data.pid_mutex);
2926 goto error;
2927 }
2928 pthread_mutex_unlock(&kconsumer_data.pid_mutex);
2929 }
2930
2931 /*
2932 * Check for command that don't needs to allocate a returned payload. We do
2933 * this here so we don't have to make the call for no payload at each
2934 * command.
2935 */
2936 switch(cmd_ctx->lsm->cmd_type) {
2937 case LTTNG_LIST_SESSIONS:
2938 case LTTNG_LIST_TRACEPOINTS:
2939 case LTTNG_LIST_TRACEPOINT_FIELDS:
2940 case LTTNG_LIST_DOMAINS:
2941 case LTTNG_LIST_CHANNELS:
2942 case LTTNG_LIST_EVENTS:
2943 case LTTNG_LIST_SYSCALLS:
2944 case LTTNG_LIST_TRACKER_PIDS:
2945 case LTTNG_DATA_PENDING:
2946 case LTTNG_ROTATE_SESSION:
2947 case LTTNG_ROTATION_GET_INFO:
2948 case LTTNG_SESSION_LIST_ROTATION_SCHEDULES:
2949 break;
2950 default:
2951 /* Setup lttng message with no payload */
2952 ret = setup_lttng_msg_no_cmd_header(cmd_ctx, NULL, 0);
2953 if (ret < 0) {
2954 /* This label does not try to unlock the session */
2955 goto init_setup_error;
2956 }
2957 }
2958
2959 /* Commands that DO NOT need a session. */
2960 switch (cmd_ctx->lsm->cmd_type) {
2961 case LTTNG_CREATE_SESSION:
2962 case LTTNG_CREATE_SESSION_SNAPSHOT:
2963 case LTTNG_CREATE_SESSION_LIVE:
2964 case LTTNG_LIST_SESSIONS:
2965 case LTTNG_LIST_TRACEPOINTS:
2966 case LTTNG_LIST_SYSCALLS:
2967 case LTTNG_LIST_TRACEPOINT_FIELDS:
2968 case LTTNG_SAVE_SESSION:
2969 case LTTNG_REGISTER_TRIGGER:
2970 case LTTNG_UNREGISTER_TRIGGER:
2971 need_tracing_session = 0;
2972 break;
2973 default:
2974 DBG("Getting session %s by name", cmd_ctx->lsm->session.name);
2975 /*
2976 * We keep the session list lock across _all_ commands
2977 * for now, because the per-session lock does not
2978 * handle teardown properly.
2979 */
2980 session_lock_list();
2981 cmd_ctx->session = session_find_by_name(cmd_ctx->lsm->session.name);
2982 if (cmd_ctx->session == NULL) {
2983 ret = LTTNG_ERR_SESS_NOT_FOUND;
2984 goto error;
2985 } else {
2986 /* Acquire lock for the session */
2987 session_lock(cmd_ctx->session);
2988 }
2989 break;
2990 }
2991
2992 /*
2993 * Commands that need a valid session but should NOT create one if none
2994 * exists. Instead of creating one and destroying it when the command is
2995 * handled, process that right before so we save some round trip in useless
2996 * code path.
2997 */
2998 switch (cmd_ctx->lsm->cmd_type) {
2999 case LTTNG_DISABLE_CHANNEL:
3000 case LTTNG_DISABLE_EVENT:
3001 switch (cmd_ctx->lsm->domain.type) {
3002 case LTTNG_DOMAIN_KERNEL:
3003 if (!cmd_ctx->session->kernel_session) {
3004 ret = LTTNG_ERR_NO_CHANNEL;
3005 goto error;
3006 }
3007 break;
3008 case LTTNG_DOMAIN_JUL:
3009 case LTTNG_DOMAIN_LOG4J:
3010 case LTTNG_DOMAIN_PYTHON:
3011 case LTTNG_DOMAIN_UST:
3012 if (!cmd_ctx->session->ust_session) {
3013 ret = LTTNG_ERR_NO_CHANNEL;
3014 goto error;
3015 }
3016 break;
3017 default:
3018 ret = LTTNG_ERR_UNKNOWN_DOMAIN;
3019 goto error;
3020 }
3021 default:
3022 break;
3023 }
3024
3025 if (!need_domain) {
3026 goto skip_domain;
3027 }
3028
3029 /*
3030 * Check domain type for specific "pre-action".
3031 */
3032 switch (cmd_ctx->lsm->domain.type) {
3033 case LTTNG_DOMAIN_KERNEL:
3034 if (!is_root) {
3035 ret = LTTNG_ERR_NEED_ROOT_SESSIOND;
3036 goto error;
3037 }
3038
3039 /* Kernel tracer check */
3040 if (kernel_tracer_fd == -1) {
3041 /* Basically, load kernel tracer modules */
3042 ret = init_kernel_tracer();
3043 if (ret != 0) {
3044 goto error;
3045 }
3046 }
3047
3048 /* Consumer is in an ERROR state. Report back to client */
3049 if (uatomic_read(&kernel_consumerd_state) == CONSUMER_ERROR) {
3050 ret = LTTNG_ERR_NO_KERNCONSUMERD;
3051 goto error;
3052 }
3053
3054 /* Need a session for kernel command */
3055 if (need_tracing_session) {
3056 if (cmd_ctx->session->kernel_session == NULL) {
3057 ret = create_kernel_session(cmd_ctx->session);
3058 if (ret < 0) {
3059 ret = LTTNG_ERR_KERN_SESS_FAIL;
3060 goto error;
3061 }
3062 }
3063
3064 /* Start the kernel consumer daemon */
3065 pthread_mutex_lock(&kconsumer_data.pid_mutex);
3066 if (kconsumer_data.pid == 0 &&
3067 cmd_ctx->lsm->cmd_type != LTTNG_REGISTER_CONSUMER) {
3068 pthread_mutex_unlock(&kconsumer_data.pid_mutex);
3069 ret = start_consumerd(&kconsumer_data);
3070 if (ret < 0) {
3071 ret = LTTNG_ERR_KERN_CONSUMER_FAIL;
3072 goto error;
3073 }
3074 uatomic_set(&kernel_consumerd_state, CONSUMER_STARTED);
3075 } else {
3076 pthread_mutex_unlock(&kconsumer_data.pid_mutex);
3077 }
3078
3079 /*
3080 * The consumer was just spawned so we need to add the socket to
3081 * the consumer output of the session if exist.
3082 */
3083 ret = consumer_create_socket(&kconsumer_data,
3084 cmd_ctx->session->kernel_session->consumer);
3085 if (ret < 0) {
3086 goto error;
3087 }
3088 }
3089
3090 break;
3091 case LTTNG_DOMAIN_JUL:
3092 case LTTNG_DOMAIN_LOG4J:
3093 case LTTNG_DOMAIN_PYTHON:
3094 case LTTNG_DOMAIN_UST:
3095 {
3096 if (!ust_app_supported()) {
3097 ret = LTTNG_ERR_NO_UST;
3098 goto error;
3099 }
3100 /* Consumer is in an ERROR state. Report back to client */
3101 if (uatomic_read(&ust_consumerd_state) == CONSUMER_ERROR) {
3102 ret = LTTNG_ERR_NO_USTCONSUMERD;
3103 goto error;
3104 }
3105
3106 if (need_tracing_session) {
3107 /* Create UST session if none exist. */
3108 if (cmd_ctx->session->ust_session == NULL) {
3109 ret = create_ust_session(cmd_ctx->session,
3110 &cmd_ctx->lsm->domain);
3111 if (ret != LTTNG_OK) {
3112 goto error;
3113 }
3114 }
3115
3116 /* Start the UST consumer daemons */
3117 /* 64-bit */
3118 pthread_mutex_lock(&ustconsumer64_data.pid_mutex);
3119 if (config.consumerd64_bin_path.value &&
3120 ustconsumer64_data.pid == 0 &&
3121 cmd_ctx->lsm->cmd_type != LTTNG_REGISTER_CONSUMER) {
3122 pthread_mutex_unlock(&ustconsumer64_data.pid_mutex);
3123 ret = start_consumerd(&ustconsumer64_data);
3124 if (ret < 0) {
3125 ret = LTTNG_ERR_UST_CONSUMER64_FAIL;
3126 uatomic_set(&ust_consumerd64_fd, -EINVAL);
3127 goto error;
3128 }
3129
3130 uatomic_set(&ust_consumerd64_fd, ustconsumer64_data.cmd_sock);
3131 uatomic_set(&ust_consumerd_state, CONSUMER_STARTED);
3132 } else {
3133 pthread_mutex_unlock(&ustconsumer64_data.pid_mutex);
3134 }
3135
3136 /*
3137 * Setup socket for consumer 64 bit. No need for atomic access
3138 * since it was set above and can ONLY be set in this thread.
3139 */
3140 ret = consumer_create_socket(&ustconsumer64_data,
3141 cmd_ctx->session->ust_session->consumer);
3142 if (ret < 0) {
3143 goto error;
3144 }
3145
3146 /* 32-bit */
3147 pthread_mutex_lock(&ustconsumer32_data.pid_mutex);
3148 if (config.consumerd32_bin_path.value &&
3149 ustconsumer32_data.pid == 0 &&
3150 cmd_ctx->lsm->cmd_type != LTTNG_REGISTER_CONSUMER) {
3151 pthread_mutex_unlock(&ustconsumer32_data.pid_mutex);
3152 ret = start_consumerd(&ustconsumer32_data);
3153 if (ret < 0) {
3154 ret = LTTNG_ERR_UST_CONSUMER32_FAIL;
3155 uatomic_set(&ust_consumerd32_fd, -EINVAL);
3156 goto error;
3157 }
3158
3159 uatomic_set(&ust_consumerd32_fd, ustconsumer32_data.cmd_sock);
3160 uatomic_set(&ust_consumerd_state, CONSUMER_STARTED);
3161 } else {
3162 pthread_mutex_unlock(&ustconsumer32_data.pid_mutex);
3163 }
3164
3165 /*
3166 * Setup socket for consumer 32 bit. No need for atomic access
3167 * since it was set above and can ONLY be set in this thread.
3168 */
3169 ret = consumer_create_socket(&ustconsumer32_data,
3170 cmd_ctx->session->ust_session->consumer);
3171 if (ret < 0) {
3172 goto error;
3173 }
3174 }
3175 break;
3176 }
3177 default:
3178 break;
3179 }
3180 skip_domain:
3181
3182 /* Validate consumer daemon state when start/stop trace command */
3183 if (cmd_ctx->lsm->cmd_type == LTTNG_START_TRACE ||
3184 cmd_ctx->lsm->cmd_type == LTTNG_STOP_TRACE) {
3185 switch (cmd_ctx->lsm->domain.type) {
3186 case LTTNG_DOMAIN_NONE:
3187 break;
3188 case LTTNG_DOMAIN_JUL:
3189 case LTTNG_DOMAIN_LOG4J:
3190 case LTTNG_DOMAIN_PYTHON:
3191 case LTTNG_DOMAIN_UST:
3192 if (uatomic_read(&ust_consumerd_state) != CONSUMER_STARTED) {
3193 ret = LTTNG_ERR_NO_USTCONSUMERD;
3194 goto error;
3195 }
3196 break;
3197 case LTTNG_DOMAIN_KERNEL:
3198 if (uatomic_read(&kernel_consumerd_state) != CONSUMER_STARTED) {
3199 ret = LTTNG_ERR_NO_KERNCONSUMERD;
3200 goto error;
3201 }
3202 break;
3203 default:
3204 ret = LTTNG_ERR_UNKNOWN_DOMAIN;
3205 goto error;
3206 }
3207 }
3208
3209 /*
3210 * Check that the UID or GID match that of the tracing session.
3211 * The root user can interact with all sessions.
3212 */
3213 if (need_tracing_session) {
3214 if (!session_access_ok(cmd_ctx->session,
3215 LTTNG_SOCK_GET_UID_CRED(&cmd_ctx->creds),
3216 LTTNG_SOCK_GET_GID_CRED(&cmd_ctx->creds))) {
3217 ret = LTTNG_ERR_EPERM;
3218 goto error;