7a0cdb81c83ac0c451341aa41ed29c57d4d3cb77
[lttng-tools.git] / src / bin / lttng-sessiond / main.c
1 /*
2 * Copyright (C) 2011 - David Goulet <david.goulet@polymtl.ca>
3 * Mathieu Desnoyers <mathieu.desnoyers@efficios.com>
4 * 2013 - Jérémie Galarneau <jeremie.galarneau@efficios.com>
5 *
6 * This program is free software; you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License, version 2 only,
8 * as published by the Free Software Foundation.
9 *
10 * This program is distributed in the hope that it will be useful,
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 * GNU General Public License for more details.
14 *
15 * You should have received a copy of the GNU General Public License along
16 * with this program; if not, write to the Free Software Foundation, Inc.,
17 * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
18 */
19
20 #define _LGPL_SOURCE
21 #include <getopt.h>
22 #include <grp.h>
23 #include <limits.h>
24 #include <paths.h>
25 #include <pthread.h>
26 #include <signal.h>
27 #include <stdio.h>
28 #include <stdlib.h>
29 #include <string.h>
30 #include <inttypes.h>
31 #include <sys/mman.h>
32 #include <sys/mount.h>
33 #include <sys/resource.h>
34 #include <sys/socket.h>
35 #include <sys/stat.h>
36 #include <sys/types.h>
37 #include <sys/wait.h>
38 #include <urcu/uatomic.h>
39 #include <unistd.h>
40 #include <ctype.h>
41
42 #include <common/common.h>
43 #include <common/compat/socket.h>
44 #include <common/compat/getenv.h>
45 #include <common/defaults.h>
46 #include <common/kernel-consumer/kernel-consumer.h>
47 #include <common/futex.h>
48 #include <common/relayd/relayd.h>
49 #include <common/utils.h>
50 #include <common/daemonize.h>
51 #include <common/config/session-config.h>
52 #include <common/dynamic-buffer.h>
53 #include <lttng/userspace-probe-internal.h>
54 #include <lttng/event-internal.h>
55
56 #include "lttng-sessiond.h"
57 #include "buffer-registry.h"
58 #include "channel.h"
59 #include "cmd.h"
60 #include "consumer.h"
61 #include "context.h"
62 #include "event.h"
63 #include "kernel.h"
64 #include "kernel-consumer.h"
65 #include "modprobe.h"
66 #include "shm.h"
67 #include "ust-ctl.h"
68 #include "ust-consumer.h"
69 #include "utils.h"
70 #include "fd-limit.h"
71 #include "health-sessiond.h"
72 #include "testpoint.h"
73 #include "ust-thread.h"
74 #include "agent-thread.h"
75 #include "save.h"
76 #include "load-session-thread.h"
77 #include "notification-thread.h"
78 #include "notification-thread-commands.h"
79 #include "rotation-thread.h"
80 #include "lttng-syscall.h"
81 #include "agent.h"
82 #include "ht-cleanup.h"
83 #include "sessiond-config.h"
84 #include "timer.h"
85
86 static const char *help_msg =
87 #ifdef LTTNG_EMBED_HELP
88 #include <lttng-sessiond.8.h>
89 #else
90 NULL
91 #endif
92 ;
93
94 const char *progname;
95 static int lockfile_fd = -1;
96
97 /* Set to 1 when a SIGUSR1 signal is received. */
98 static int recv_child_signal;
99
100 static struct lttng_kernel_tracer_version kernel_tracer_version;
101 static struct lttng_kernel_tracer_abi_version kernel_tracer_abi_version;
102
103 /*
104 * Consumer daemon specific control data. Every value not initialized here is
105 * set to 0 by the static definition.
106 */
107 static struct consumer_data kconsumer_data = {
108 .type = LTTNG_CONSUMER_KERNEL,
109 .err_sock = -1,
110 .cmd_sock = -1,
111 .channel_monitor_pipe = -1,
112 .pid_mutex = PTHREAD_MUTEX_INITIALIZER,
113 .lock = PTHREAD_MUTEX_INITIALIZER,
114 .cond = PTHREAD_COND_INITIALIZER,
115 .cond_mutex = PTHREAD_MUTEX_INITIALIZER,
116 };
117 static struct consumer_data ustconsumer64_data = {
118 .type = LTTNG_CONSUMER64_UST,
119 .err_sock = -1,
120 .cmd_sock = -1,
121 .channel_monitor_pipe = -1,
122 .pid_mutex = PTHREAD_MUTEX_INITIALIZER,
123 .lock = PTHREAD_MUTEX_INITIALIZER,
124 .cond = PTHREAD_COND_INITIALIZER,
125 .cond_mutex = PTHREAD_MUTEX_INITIALIZER,
126 };
127 static struct consumer_data ustconsumer32_data = {
128 .type = LTTNG_CONSUMER32_UST,
129 .err_sock = -1,
130 .cmd_sock = -1,
131 .channel_monitor_pipe = -1,
132 .pid_mutex = PTHREAD_MUTEX_INITIALIZER,
133 .lock = PTHREAD_MUTEX_INITIALIZER,
134 .cond = PTHREAD_COND_INITIALIZER,
135 .cond_mutex = PTHREAD_MUTEX_INITIALIZER,
136 };
137
138 /* Command line options */
139 static const struct option long_options[] = {
140 { "client-sock", required_argument, 0, 'c' },
141 { "apps-sock", required_argument, 0, 'a' },
142 { "kconsumerd-cmd-sock", required_argument, 0, '\0' },
143 { "kconsumerd-err-sock", required_argument, 0, '\0' },
144 { "ustconsumerd32-cmd-sock", required_argument, 0, '\0' },
145 { "ustconsumerd32-err-sock", required_argument, 0, '\0' },
146 { "ustconsumerd64-cmd-sock", required_argument, 0, '\0' },
147 { "ustconsumerd64-err-sock", required_argument, 0, '\0' },
148 { "consumerd32-path", required_argument, 0, '\0' },
149 { "consumerd32-libdir", required_argument, 0, '\0' },
150 { "consumerd64-path", required_argument, 0, '\0' },
151 { "consumerd64-libdir", required_argument, 0, '\0' },
152 { "daemonize", no_argument, 0, 'd' },
153 { "background", no_argument, 0, 'b' },
154 { "sig-parent", no_argument, 0, 'S' },
155 { "help", no_argument, 0, 'h' },
156 { "group", required_argument, 0, 'g' },
157 { "version", no_argument, 0, 'V' },
158 { "quiet", no_argument, 0, 'q' },
159 { "verbose", no_argument, 0, 'v' },
160 { "verbose-consumer", no_argument, 0, '\0' },
161 { "no-kernel", no_argument, 0, '\0' },
162 { "pidfile", required_argument, 0, 'p' },
163 { "agent-tcp-port", required_argument, 0, '\0' },
164 { "config", required_argument, 0, 'f' },
165 { "load", required_argument, 0, 'l' },
166 { "kmod-probes", required_argument, 0, '\0' },
167 { "extra-kmod-probes", required_argument, 0, '\0' },
168 { NULL, 0, 0, 0 }
169 };
170
171 /* Command line options to ignore from configuration file */
172 static const char *config_ignore_options[] = { "help", "version", "config" };
173
174 /* Shared between threads */
175 static int dispatch_thread_exit;
176
177 /* Sockets and FDs */
178 static int client_sock = -1;
179 static int apps_sock = -1;
180 static int kernel_poll_pipe[2] = { -1, -1 };
181
182 /*
183 * This pipe is used to inform the thread managing application communication
184 * that a command is queued and ready to be processed.
185 */
186 static int apps_cmd_pipe[2] = { -1, -1 };
187
188 /* Pthread, Mutexes and Semaphores */
189 static pthread_t apps_thread;
190 static pthread_t apps_notify_thread;
191 static pthread_t reg_apps_thread;
192 static pthread_t client_thread;
193 static pthread_t kernel_thread;
194 static pthread_t dispatch_thread;
195 static pthread_t health_thread;
196 static pthread_t ht_cleanup_thread;
197 static pthread_t agent_reg_thread;
198 static pthread_t load_session_thread;
199 static pthread_t notification_thread;
200 static pthread_t rotation_thread;
201 static pthread_t timer_thread;
202
203 /*
204 * UST registration command queue. This queue is tied with a futex and uses a N
205 * wakers / 1 waiter implemented and detailed in futex.c/.h
206 *
207 * The thread_registration_apps and thread_dispatch_ust_registration uses this
208 * queue along with the wait/wake scheme. The thread_manage_apps receives down
209 * the line new application socket and monitors it for any I/O error or clean
210 * close that triggers an unregistration of the application.
211 */
212 static struct ust_cmd_queue ust_cmd_queue;
213
214 /*
215 * Pointer initialized before thread creation.
216 *
217 * This points to the tracing session list containing the session count and a
218 * mutex lock. The lock MUST be taken if you iterate over the list. The lock
219 * MUST NOT be taken if you call a public function in session.c.
220 *
221 * The lock is nested inside the structure: session_list_ptr->lock. Please use
222 * session_lock_list and session_unlock_list for lock acquisition.
223 */
224 static struct ltt_session_list *session_list_ptr;
225
226 static const char *module_proc_lttng = "/proc/lttng";
227
228 /*
229 * Consumer daemon state which is changed when spawning it, killing it or in
230 * case of a fatal error.
231 */
232 enum consumerd_state {
233 CONSUMER_STARTED = 1,
234 CONSUMER_STOPPED = 2,
235 CONSUMER_ERROR = 3,
236 };
237
238 /*
239 * This consumer daemon state is used to validate if a client command will be
240 * able to reach the consumer. If not, the client is informed. For instance,
241 * doing a "lttng start" when the consumer state is set to ERROR will return an
242 * error to the client.
243 *
244 * The following example shows a possible race condition of this scheme:
245 *
246 * consumer thread error happens
247 * client cmd arrives
248 * client cmd checks state -> still OK
249 * consumer thread exit, sets error
250 * client cmd try to talk to consumer
251 * ...
252 *
253 * However, since the consumer is a different daemon, we have no way of making
254 * sure the command will reach it safely even with this state flag. This is why
255 * we consider that up to the state validation during command processing, the
256 * command is safe. After that, we can not guarantee the correctness of the
257 * client request vis-a-vis the consumer.
258 */
259 static enum consumerd_state ust_consumerd_state;
260 static enum consumerd_state kernel_consumerd_state;
261
262 /* Load session thread information to operate. */
263 static struct load_session_thread_data *load_info;
264
265 /*
266 * Section name to look for in the daemon configuration file.
267 */
268 static const char * const config_section_name = "sessiond";
269
270 /* Am I root or not. Set to 1 if the daemon is running as root */
271 static int is_root;
272
273 /* Rotation thread handle. */
274 static struct rotation_thread_handle *rotation_thread_handle;
275
276 /*
277 * Stop all threads by closing the thread quit pipe.
278 */
279 static void stop_threads(void)
280 {
281 int ret;
282
283 /* Stopping all threads */
284 DBG("Terminating all threads");
285 ret = sessiond_notify_quit_pipe();
286 if (ret < 0) {
287 ERR("write error on thread quit pipe");
288 }
289
290 /* Dispatch thread */
291 CMM_STORE_SHARED(dispatch_thread_exit, 1);
292 futex_nto1_wake(&ust_cmd_queue.futex);
293 }
294
295 /*
296 * Close every consumer sockets.
297 */
298 static void close_consumer_sockets(void)
299 {
300 int ret;
301
302 if (kconsumer_data.err_sock >= 0) {
303 ret = close(kconsumer_data.err_sock);
304 if (ret < 0) {
305 PERROR("kernel consumer err_sock close");
306 }
307 }
308 if (ustconsumer32_data.err_sock >= 0) {
309 ret = close(ustconsumer32_data.err_sock);
310 if (ret < 0) {
311 PERROR("UST consumerd32 err_sock close");
312 }
313 }
314 if (ustconsumer64_data.err_sock >= 0) {
315 ret = close(ustconsumer64_data.err_sock);
316 if (ret < 0) {
317 PERROR("UST consumerd64 err_sock close");
318 }
319 }
320 if (kconsumer_data.cmd_sock >= 0) {
321 ret = close(kconsumer_data.cmd_sock);
322 if (ret < 0) {
323 PERROR("kernel consumer cmd_sock close");
324 }
325 }
326 if (ustconsumer32_data.cmd_sock >= 0) {
327 ret = close(ustconsumer32_data.cmd_sock);
328 if (ret < 0) {
329 PERROR("UST consumerd32 cmd_sock close");
330 }
331 }
332 if (ustconsumer64_data.cmd_sock >= 0) {
333 ret = close(ustconsumer64_data.cmd_sock);
334 if (ret < 0) {
335 PERROR("UST consumerd64 cmd_sock close");
336 }
337 }
338 if (kconsumer_data.channel_monitor_pipe >= 0) {
339 ret = close(kconsumer_data.channel_monitor_pipe);
340 if (ret < 0) {
341 PERROR("kernel consumer channel monitor pipe close");
342 }
343 }
344 if (ustconsumer32_data.channel_monitor_pipe >= 0) {
345 ret = close(ustconsumer32_data.channel_monitor_pipe);
346 if (ret < 0) {
347 PERROR("UST consumerd32 channel monitor pipe close");
348 }
349 }
350 if (ustconsumer64_data.channel_monitor_pipe >= 0) {
351 ret = close(ustconsumer64_data.channel_monitor_pipe);
352 if (ret < 0) {
353 PERROR("UST consumerd64 channel monitor pipe close");
354 }
355 }
356 }
357
358 /*
359 * Wait on consumer process termination.
360 *
361 * Need to be called with the consumer data lock held or from a context
362 * ensuring no concurrent access to data (e.g: cleanup).
363 */
364 static void wait_consumer(struct consumer_data *consumer_data)
365 {
366 pid_t ret;
367 int status;
368
369 if (consumer_data->pid <= 0) {
370 return;
371 }
372
373 DBG("Waiting for complete teardown of consumerd (PID: %d)",
374 consumer_data->pid);
375 ret = waitpid(consumer_data->pid, &status, 0);
376 if (ret == -1) {
377 PERROR("consumerd waitpid pid: %d", consumer_data->pid)
378 } else if (!WIFEXITED(status)) {
379 ERR("consumerd termination with error: %d",
380 WEXITSTATUS(ret));
381 }
382 consumer_data->pid = 0;
383 }
384
385 /*
386 * Cleanup the session daemon's data structures.
387 */
388 static void sessiond_cleanup(void)
389 {
390 int ret;
391 struct ltt_session *sess, *stmp;
392
393 DBG("Cleanup sessiond");
394
395 /*
396 * Close the thread quit pipe. It has already done its job,
397 * since we are now called.
398 */
399 sessiond_close_quit_pipe();
400
401 ret = remove(config.pid_file_path.value);
402 if (ret < 0) {
403 PERROR("remove pidfile %s", config.pid_file_path.value);
404 }
405
406 DBG("Removing sessiond and consumerd content of directory %s",
407 config.rundir.value);
408
409 /* sessiond */
410 DBG("Removing %s", config.pid_file_path.value);
411 (void) unlink(config.pid_file_path.value);
412
413 DBG("Removing %s", config.agent_port_file_path.value);
414 (void) unlink(config.agent_port_file_path.value);
415
416 /* kconsumerd */
417 DBG("Removing %s", kconsumer_data.err_unix_sock_path);
418 (void) unlink(kconsumer_data.err_unix_sock_path);
419
420 DBG("Removing directory %s", config.kconsumerd_path.value);
421 (void) rmdir(config.kconsumerd_path.value);
422
423 /* ust consumerd 32 */
424 DBG("Removing %s", config.consumerd32_err_unix_sock_path.value);
425 (void) unlink(config.consumerd32_err_unix_sock_path.value);
426
427 DBG("Removing directory %s", config.consumerd32_path.value);
428 (void) rmdir(config.consumerd32_path.value);
429
430 /* ust consumerd 64 */
431 DBG("Removing %s", config.consumerd64_err_unix_sock_path.value);
432 (void) unlink(config.consumerd64_err_unix_sock_path.value);
433
434 DBG("Removing directory %s", config.consumerd64_path.value);
435 (void) rmdir(config.consumerd64_path.value);
436
437 DBG("Cleaning up all sessions");
438
439 /* Destroy session list mutex */
440 if (session_list_ptr != NULL) {
441 pthread_mutex_destroy(&session_list_ptr->lock);
442
443 /* Cleanup ALL session */
444 cds_list_for_each_entry_safe(sess, stmp,
445 &session_list_ptr->head, list) {
446 cmd_destroy_session(sess, kernel_poll_pipe[1],
447 notification_thread_handle);
448 }
449 }
450
451 wait_consumer(&kconsumer_data);
452 wait_consumer(&ustconsumer64_data);
453 wait_consumer(&ustconsumer32_data);
454
455 DBG("Cleaning up all agent apps");
456 agent_app_ht_clean();
457
458 DBG("Closing all UST sockets");
459 ust_app_clean_list();
460 buffer_reg_destroy_registries();
461
462 if (is_root && !config.no_kernel) {
463 DBG2("Closing kernel fd");
464 if (kernel_tracer_fd >= 0) {
465 ret = close(kernel_tracer_fd);
466 if (ret) {
467 PERROR("close");
468 }
469 }
470 DBG("Unloading kernel modules");
471 modprobe_remove_lttng_all();
472 free(syscall_table);
473 }
474
475 close_consumer_sockets();
476
477 if (load_info) {
478 load_session_destroy_data(load_info);
479 free(load_info);
480 }
481
482 /*
483 * We do NOT rmdir rundir because there are other processes
484 * using it, for instance lttng-relayd, which can start in
485 * parallel with this teardown.
486 */
487 }
488
489 /*
490 * Cleanup the daemon's option data structures.
491 */
492 static void sessiond_cleanup_options(void)
493 {
494 DBG("Cleaning up options");
495
496 sessiond_config_fini(&config);
497
498 run_as_destroy_worker();
499 }
500
501 /*
502 * Send data on a unix socket using the liblttsessiondcomm API.
503 *
504 * Return lttcomm error code.
505 */
506 static int send_unix_sock(int sock, void *buf, size_t len)
507 {
508 /* Check valid length */
509 if (len == 0) {
510 return -1;
511 }
512
513 return lttcomm_send_unix_sock(sock, buf, len);
514 }
515
516 /*
517 * Free memory of a command context structure.
518 */
519 static void clean_command_ctx(struct command_ctx **cmd_ctx)
520 {
521 DBG("Clean command context structure");
522 if (*cmd_ctx) {
523 if ((*cmd_ctx)->llm) {
524 free((*cmd_ctx)->llm);
525 }
526 if ((*cmd_ctx)->lsm) {
527 free((*cmd_ctx)->lsm);
528 }
529 free(*cmd_ctx);
530 *cmd_ctx = NULL;
531 }
532 }
533
534 /*
535 * Notify UST applications using the shm mmap futex.
536 */
537 static int notify_ust_apps(int active)
538 {
539 char *wait_shm_mmap;
540
541 DBG("Notifying applications of session daemon state: %d", active);
542
543 /* See shm.c for this call implying mmap, shm and futex calls */
544 wait_shm_mmap = shm_ust_get_mmap(config.wait_shm_path.value, is_root);
545 if (wait_shm_mmap == NULL) {
546 goto error;
547 }
548
549 /* Wake waiting process */
550 futex_wait_update((int32_t *) wait_shm_mmap, active);
551
552 /* Apps notified successfully */
553 return 0;
554
555 error:
556 return -1;
557 }
558
559 /*
560 * Setup the outgoing data buffer for the response (llm) by allocating the
561 * right amount of memory and copying the original information from the lsm
562 * structure.
563 *
564 * Return 0 on success, negative value on error.
565 */
566 static int setup_lttng_msg(struct command_ctx *cmd_ctx,
567 const void *payload_buf, size_t payload_len,
568 const void *cmd_header_buf, size_t cmd_header_len)
569 {
570 int ret = 0;
571 const size_t header_len = sizeof(struct lttcomm_lttng_msg);
572 const size_t cmd_header_offset = header_len;
573 const size_t payload_offset = cmd_header_offset + cmd_header_len;
574 const size_t total_msg_size = header_len + cmd_header_len + payload_len;
575
576 cmd_ctx->llm = zmalloc(total_msg_size);
577
578 if (cmd_ctx->llm == NULL) {
579 PERROR("zmalloc");
580 ret = -ENOMEM;
581 goto end;
582 }
583
584 /* Copy common data */
585 cmd_ctx->llm->cmd_type = cmd_ctx->lsm->cmd_type;
586 cmd_ctx->llm->pid = cmd_ctx->lsm->domain.attr.pid;
587 cmd_ctx->llm->cmd_header_size = cmd_header_len;
588 cmd_ctx->llm->data_size = payload_len;
589 cmd_ctx->lttng_msg_size = total_msg_size;
590
591 /* Copy command header */
592 if (cmd_header_len) {
593 memcpy(((uint8_t *) cmd_ctx->llm) + cmd_header_offset, cmd_header_buf,
594 cmd_header_len);
595 }
596
597 /* Copy payload */
598 if (payload_len) {
599 memcpy(((uint8_t *) cmd_ctx->llm) + payload_offset, payload_buf,
600 payload_len);
601 }
602
603 end:
604 return ret;
605 }
606
607 /*
608 * Version of setup_lttng_msg() without command header.
609 */
610 static int setup_lttng_msg_no_cmd_header(struct command_ctx *cmd_ctx,
611 void *payload_buf, size_t payload_len)
612 {
613 return setup_lttng_msg(cmd_ctx, payload_buf, payload_len, NULL, 0);
614 }
615 /*
616 * Update the kernel poll set of all channel fd available over all tracing
617 * session. Add the wakeup pipe at the end of the set.
618 */
619 static int update_kernel_poll(struct lttng_poll_event *events)
620 {
621 int ret;
622 struct ltt_session *session;
623 struct ltt_kernel_channel *channel;
624
625 DBG("Updating kernel poll set");
626
627 session_lock_list();
628 cds_list_for_each_entry(session, &session_list_ptr->head, list) {
629 session_lock(session);
630 if (session->kernel_session == NULL) {
631 session_unlock(session);
632 continue;
633 }
634
635 cds_list_for_each_entry(channel,
636 &session->kernel_session->channel_list.head, list) {
637 /* Add channel fd to the kernel poll set */
638 ret = lttng_poll_add(events, channel->fd, LPOLLIN | LPOLLRDNORM);
639 if (ret < 0) {
640 session_unlock(session);
641 goto error;
642 }
643 DBG("Channel fd %d added to kernel set", channel->fd);
644 }
645 session_unlock(session);
646 }
647 session_unlock_list();
648
649 return 0;
650
651 error:
652 session_unlock_list();
653 return -1;
654 }
655
656 /*
657 * Find the channel fd from 'fd' over all tracing session. When found, check
658 * for new channel stream and send those stream fds to the kernel consumer.
659 *
660 * Useful for CPU hotplug feature.
661 */
662 static int update_kernel_stream(int fd)
663 {
664 int ret = 0;
665 struct ltt_session *session;
666 struct ltt_kernel_session *ksess;
667 struct ltt_kernel_channel *channel;
668
669 DBG("Updating kernel streams for channel fd %d", fd);
670
671 session_lock_list();
672 cds_list_for_each_entry(session, &session_list_ptr->head, list) {
673 session_lock(session);
674 if (session->kernel_session == NULL) {
675 session_unlock(session);
676 continue;
677 }
678 ksess = session->kernel_session;
679
680 cds_list_for_each_entry(channel,
681 &ksess->channel_list.head, list) {
682 struct lttng_ht_iter iter;
683 struct consumer_socket *socket;
684
685 if (channel->fd != fd) {
686 continue;
687 }
688 DBG("Channel found, updating kernel streams");
689 ret = kernel_open_channel_stream(channel);
690 if (ret < 0) {
691 goto error;
692 }
693 /* Update the stream global counter */
694 ksess->stream_count_global += ret;
695
696 /*
697 * Have we already sent fds to the consumer? If yes, it
698 * means that tracing is started so it is safe to send
699 * our updated stream fds.
700 */
701 if (ksess->consumer_fds_sent != 1
702 || ksess->consumer == NULL) {
703 ret = -1;
704 goto error;
705 }
706
707 rcu_read_lock();
708 cds_lfht_for_each_entry(ksess->consumer->socks->ht,
709 &iter.iter, socket, node.node) {
710 pthread_mutex_lock(socket->lock);
711 ret = kernel_consumer_send_channel_streams(socket,
712 channel, ksess,
713 session->output_traces ? 1 : 0);
714 pthread_mutex_unlock(socket->lock);
715 if (ret < 0) {
716 rcu_read_unlock();
717 goto error;
718 }
719 }
720 rcu_read_unlock();
721 }
722 session_unlock(session);
723 }
724 session_unlock_list();
725 return ret;
726
727 error:
728 session_unlock(session);
729 session_unlock_list();
730 return ret;
731 }
732
733 /*
734 * For each tracing session, update newly registered apps. The session list
735 * lock MUST be acquired before calling this.
736 */
737 static void update_ust_app(int app_sock)
738 {
739 struct ltt_session *sess, *stmp;
740
741 /* Consumer is in an ERROR state. Stop any application update. */
742 if (uatomic_read(&ust_consumerd_state) == CONSUMER_ERROR) {
743 /* Stop the update process since the consumer is dead. */
744 return;
745 }
746
747 /* For all tracing session(s) */
748 cds_list_for_each_entry_safe(sess, stmp, &session_list_ptr->head, list) {
749 struct ust_app *app;
750
751 session_lock(sess);
752 if (!sess->ust_session) {
753 goto unlock_session;
754 }
755
756 rcu_read_lock();
757 assert(app_sock >= 0);
758 app = ust_app_find_by_sock(app_sock);
759 if (app == NULL) {
760 /*
761 * Application can be unregistered before so
762 * this is possible hence simply stopping the
763 * update.
764 */
765 DBG3("UST app update failed to find app sock %d",
766 app_sock);
767 goto unlock_rcu;
768 }
769 ust_app_global_update(sess->ust_session, app);
770 unlock_rcu:
771 rcu_read_unlock();
772 unlock_session:
773 session_unlock(sess);
774 }
775 }
776
777 /*
778 * This thread manage event coming from the kernel.
779 *
780 * Features supported in this thread:
781 * -) CPU Hotplug
782 */
783 static void *thread_manage_kernel(void *data)
784 {
785 int ret, i, pollfd, update_poll_flag = 1, err = -1;
786 uint32_t revents, nb_fd;
787 char tmp;
788 struct lttng_poll_event events;
789
790 DBG("[thread] Thread manage kernel started");
791
792 health_register(health_sessiond, HEALTH_SESSIOND_TYPE_KERNEL);
793
794 /*
795 * This first step of the while is to clean this structure which could free
796 * non NULL pointers so initialize it before the loop.
797 */
798 lttng_poll_init(&events);
799
800 if (testpoint(sessiond_thread_manage_kernel)) {
801 goto error_testpoint;
802 }
803
804 health_code_update();
805
806 if (testpoint(sessiond_thread_manage_kernel_before_loop)) {
807 goto error_testpoint;
808 }
809
810 while (1) {
811 health_code_update();
812
813 if (update_poll_flag == 1) {
814 /* Clean events object. We are about to populate it again. */
815 lttng_poll_clean(&events);
816
817 ret = sessiond_set_thread_pollset(&events, 2);
818 if (ret < 0) {
819 goto error_poll_create;
820 }
821
822 ret = lttng_poll_add(&events, kernel_poll_pipe[0], LPOLLIN);
823 if (ret < 0) {
824 goto error;
825 }
826
827 /* This will add the available kernel channel if any. */
828 ret = update_kernel_poll(&events);
829 if (ret < 0) {
830 goto error;
831 }
832 update_poll_flag = 0;
833 }
834
835 DBG("Thread kernel polling");
836
837 /* Poll infinite value of time */
838 restart:
839 health_poll_entry();
840 ret = lttng_poll_wait(&events, -1);
841 DBG("Thread kernel return from poll on %d fds",
842 LTTNG_POLL_GETNB(&events));
843 health_poll_exit();
844 if (ret < 0) {
845 /*
846 * Restart interrupted system call.
847 */
848 if (errno == EINTR) {
849 goto restart;
850 }
851 goto error;
852 } else if (ret == 0) {
853 /* Should not happen since timeout is infinite */
854 ERR("Return value of poll is 0 with an infinite timeout.\n"
855 "This should not have happened! Continuing...");
856 continue;
857 }
858
859 nb_fd = ret;
860
861 for (i = 0; i < nb_fd; i++) {
862 /* Fetch once the poll data */
863 revents = LTTNG_POLL_GETEV(&events, i);
864 pollfd = LTTNG_POLL_GETFD(&events, i);
865
866 health_code_update();
867
868 if (!revents) {
869 /* No activity for this FD (poll implementation). */
870 continue;
871 }
872
873 /* Thread quit pipe has been closed. Killing thread. */
874 ret = sessiond_check_thread_quit_pipe(pollfd, revents);
875 if (ret) {
876 err = 0;
877 goto exit;
878 }
879
880 /* Check for data on kernel pipe */
881 if (revents & LPOLLIN) {
882 if (pollfd == kernel_poll_pipe[0]) {
883 (void) lttng_read(kernel_poll_pipe[0],
884 &tmp, 1);
885 /*
886 * Ret value is useless here, if this pipe gets any actions an
887 * update is required anyway.
888 */
889 update_poll_flag = 1;
890 continue;
891 } else {
892 /*
893 * New CPU detected by the kernel. Adding kernel stream to
894 * kernel session and updating the kernel consumer
895 */
896 ret = update_kernel_stream(pollfd);
897 if (ret < 0) {
898 continue;
899 }
900 break;
901 }
902 } else if (revents & (LPOLLERR | LPOLLHUP | LPOLLRDHUP)) {
903 update_poll_flag = 1;
904 continue;
905 } else {
906 ERR("Unexpected poll events %u for sock %d", revents, pollfd);
907 goto error;
908 }
909 }
910 }
911
912 exit:
913 error:
914 lttng_poll_clean(&events);
915 error_poll_create:
916 error_testpoint:
917 utils_close_pipe(kernel_poll_pipe);
918 kernel_poll_pipe[0] = kernel_poll_pipe[1] = -1;
919 if (err) {
920 health_error();
921 ERR("Health error occurred in %s", __func__);
922 WARN("Kernel thread died unexpectedly. "
923 "Kernel tracing can continue but CPU hotplug is disabled.");
924 }
925 health_unregister(health_sessiond);
926 DBG("Kernel thread dying");
927 return NULL;
928 }
929
930 /*
931 * Signal pthread condition of the consumer data that the thread.
932 */
933 static void signal_consumer_condition(struct consumer_data *data, int state)
934 {
935 pthread_mutex_lock(&data->cond_mutex);
936
937 /*
938 * The state is set before signaling. It can be any value, it's the waiter
939 * job to correctly interpret this condition variable associated to the
940 * consumer pthread_cond.
941 *
942 * A value of 0 means that the corresponding thread of the consumer data
943 * was not started. 1 indicates that the thread has started and is ready
944 * for action. A negative value means that there was an error during the
945 * thread bootstrap.
946 */
947 data->consumer_thread_is_ready = state;
948 (void) pthread_cond_signal(&data->cond);
949
950 pthread_mutex_unlock(&data->cond_mutex);
951 }
952
953 /*
954 * This thread manage the consumer error sent back to the session daemon.
955 */
956 static void *thread_manage_consumer(void *data)
957 {
958 int sock = -1, i, ret, pollfd, err = -1, should_quit = 0;
959 uint32_t revents, nb_fd;
960 enum lttcomm_return_code code;
961 struct lttng_poll_event events;
962 struct consumer_data *consumer_data = data;
963 struct consumer_socket *cmd_socket_wrapper = NULL;
964
965 DBG("[thread] Manage consumer started");
966
967 rcu_register_thread();
968 rcu_thread_online();
969
970 health_register(health_sessiond, HEALTH_SESSIOND_TYPE_CONSUMER);
971
972 health_code_update();
973
974 /*
975 * Pass 3 as size here for the thread quit pipe, consumerd_err_sock and the
976 * metadata_sock. Nothing more will be added to this poll set.
977 */
978 ret = sessiond_set_thread_pollset(&events, 3);
979 if (ret < 0) {
980 goto error_poll;
981 }
982
983 /*
984 * The error socket here is already in a listening state which was done
985 * just before spawning this thread to avoid a race between the consumer
986 * daemon exec trying to connect and the listen() call.
987 */
988 ret = lttng_poll_add(&events, consumer_data->err_sock, LPOLLIN | LPOLLRDHUP);
989 if (ret < 0) {
990 goto error;
991 }
992
993 health_code_update();
994
995 /* Infinite blocking call, waiting for transmission */
996 restart:
997 health_poll_entry();
998
999 if (testpoint(sessiond_thread_manage_consumer)) {
1000 goto error;
1001 }
1002
1003 ret = lttng_poll_wait(&events, -1);
1004 health_poll_exit();
1005 if (ret < 0) {
1006 /*
1007 * Restart interrupted system call.
1008 */
1009 if (errno == EINTR) {
1010 goto restart;
1011 }
1012 goto error;
1013 }
1014
1015 nb_fd = ret;
1016
1017 for (i = 0; i < nb_fd; i++) {
1018 /* Fetch once the poll data */
1019 revents = LTTNG_POLL_GETEV(&events, i);
1020 pollfd = LTTNG_POLL_GETFD(&events, i);
1021
1022 health_code_update();
1023
1024 if (!revents) {
1025 /* No activity for this FD (poll implementation). */
1026 continue;
1027 }
1028
1029 /* Thread quit pipe has been closed. Killing thread. */
1030 ret = sessiond_check_thread_quit_pipe(pollfd, revents);
1031 if (ret) {
1032 err = 0;
1033 goto exit;
1034 }
1035
1036 /* Event on the registration socket */
1037 if (pollfd == consumer_data->err_sock) {
1038 if (revents & LPOLLIN) {
1039 continue;
1040 } else if (revents & (LPOLLERR | LPOLLHUP | LPOLLRDHUP)) {
1041 ERR("consumer err socket poll error");
1042 goto error;
1043 } else {
1044 ERR("Unexpected poll events %u for sock %d", revents, pollfd);
1045 goto error;
1046 }
1047 }
1048 }
1049
1050 sock = lttcomm_accept_unix_sock(consumer_data->err_sock);
1051 if (sock < 0) {
1052 goto error;
1053 }
1054
1055 /*
1056 * Set the CLOEXEC flag. Return code is useless because either way, the
1057 * show must go on.
1058 */
1059 (void) utils_set_fd_cloexec(sock);
1060
1061 health_code_update();
1062
1063 DBG2("Receiving code from consumer err_sock");
1064
1065 /* Getting status code from kconsumerd */
1066 ret = lttcomm_recv_unix_sock(sock, &code,
1067 sizeof(enum lttcomm_return_code));
1068 if (ret <= 0) {
1069 goto error;
1070 }
1071
1072 health_code_update();
1073 if (code != LTTCOMM_CONSUMERD_COMMAND_SOCK_READY) {
1074 ERR("consumer error when waiting for SOCK_READY : %s",
1075 lttcomm_get_readable_code(-code));
1076 goto error;
1077 }
1078
1079 /* Connect both command and metadata sockets. */
1080 consumer_data->cmd_sock =
1081 lttcomm_connect_unix_sock(
1082 consumer_data->cmd_unix_sock_path);
1083 consumer_data->metadata_fd =
1084 lttcomm_connect_unix_sock(
1085 consumer_data->cmd_unix_sock_path);
1086 if (consumer_data->cmd_sock < 0 || consumer_data->metadata_fd < 0) {
1087 PERROR("consumer connect cmd socket");
1088 /* On error, signal condition and quit. */
1089 signal_consumer_condition(consumer_data, -1);
1090 goto error;
1091 }
1092
1093 consumer_data->metadata_sock.fd_ptr = &consumer_data->metadata_fd;
1094
1095 /* Create metadata socket lock. */
1096 consumer_data->metadata_sock.lock = zmalloc(sizeof(pthread_mutex_t));
1097 if (consumer_data->metadata_sock.lock == NULL) {
1098 PERROR("zmalloc pthread mutex");
1099 goto error;
1100 }
1101 pthread_mutex_init(consumer_data->metadata_sock.lock, NULL);
1102
1103 DBG("Consumer command socket ready (fd: %d", consumer_data->cmd_sock);
1104 DBG("Consumer metadata socket ready (fd: %d)",
1105 consumer_data->metadata_fd);
1106
1107 /*
1108 * Remove the consumerd error sock since we've established a connection.
1109 */
1110 ret = lttng_poll_del(&events, consumer_data->err_sock);
1111 if (ret < 0) {
1112 goto error;
1113 }
1114
1115 /* Add new accepted error socket. */
1116 ret = lttng_poll_add(&events, sock, LPOLLIN | LPOLLRDHUP);
1117 if (ret < 0) {
1118 goto error;
1119 }
1120
1121 /* Add metadata socket that is successfully connected. */
1122 ret = lttng_poll_add(&events, consumer_data->metadata_fd,
1123 LPOLLIN | LPOLLRDHUP);
1124 if (ret < 0) {
1125 goto error;
1126 }
1127
1128 health_code_update();
1129
1130 /*
1131 * Transfer the write-end of the channel monitoring and rotate pipe
1132 * to the consumer by issuing a SET_CHANNEL_MONITOR_PIPE command.
1133 */
1134 cmd_socket_wrapper = consumer_allocate_socket(&consumer_data->cmd_sock);
1135 if (!cmd_socket_wrapper) {
1136 goto error;
1137 }
1138 cmd_socket_wrapper->lock = &consumer_data->lock;
1139
1140 ret = consumer_send_channel_monitor_pipe(cmd_socket_wrapper,
1141 consumer_data->channel_monitor_pipe);
1142 if (ret) {
1143 goto error;
1144 }
1145
1146 /* Discard the socket wrapper as it is no longer needed. */
1147 consumer_destroy_socket(cmd_socket_wrapper);
1148 cmd_socket_wrapper = NULL;
1149
1150 /* The thread is completely initialized, signal that it is ready. */
1151 signal_consumer_condition(consumer_data, 1);
1152
1153 /* Infinite blocking call, waiting for transmission */
1154 restart_poll:
1155 while (1) {
1156 health_code_update();
1157
1158 /* Exit the thread because the thread quit pipe has been triggered. */
1159 if (should_quit) {
1160 /* Not a health error. */
1161 err = 0;
1162 goto exit;
1163 }
1164
1165 health_poll_entry();
1166 ret = lttng_poll_wait(&events, -1);
1167 health_poll_exit();
1168 if (ret < 0) {
1169 /*
1170 * Restart interrupted system call.
1171 */
1172 if (errno == EINTR) {
1173 goto restart_poll;
1174 }
1175 goto error;
1176 }
1177
1178 nb_fd = ret;
1179
1180 for (i = 0; i < nb_fd; i++) {
1181 /* Fetch once the poll data */
1182 revents = LTTNG_POLL_GETEV(&events, i);
1183 pollfd = LTTNG_POLL_GETFD(&events, i);
1184
1185 health_code_update();
1186
1187 if (!revents) {
1188 /* No activity for this FD (poll implementation). */
1189 continue;
1190 }
1191
1192 /*
1193 * Thread quit pipe has been triggered, flag that we should stop
1194 * but continue the current loop to handle potential data from
1195 * consumer.
1196 */
1197 should_quit = sessiond_check_thread_quit_pipe(pollfd, revents);
1198
1199 if (pollfd == sock) {
1200 /* Event on the consumerd socket */
1201 if (revents & (LPOLLERR | LPOLLHUP | LPOLLRDHUP)
1202 && !(revents & LPOLLIN)) {
1203 ERR("consumer err socket second poll error");
1204 goto error;
1205 }
1206 health_code_update();
1207 /* Wait for any kconsumerd error */
1208 ret = lttcomm_recv_unix_sock(sock, &code,
1209 sizeof(enum lttcomm_return_code));
1210 if (ret <= 0) {
1211 ERR("consumer closed the command socket");
1212 goto error;
1213 }
1214
1215 ERR("consumer return code : %s",
1216 lttcomm_get_readable_code(-code));
1217
1218 goto exit;
1219 } else if (pollfd == consumer_data->metadata_fd) {
1220 if (revents & (LPOLLERR | LPOLLHUP | LPOLLRDHUP)
1221 && !(revents & LPOLLIN)) {
1222 ERR("consumer err metadata socket second poll error");
1223 goto error;
1224 }
1225 /* UST metadata requests */
1226 ret = ust_consumer_metadata_request(
1227 &consumer_data->metadata_sock);
1228 if (ret < 0) {
1229 ERR("Handling metadata request");
1230 goto error;
1231 }
1232 }
1233 /* No need for an else branch all FDs are tested prior. */
1234 }
1235 health_code_update();
1236 }
1237
1238 exit:
1239 error:
1240 /*
1241 * We lock here because we are about to close the sockets and some other
1242 * thread might be using them so get exclusive access which will abort all
1243 * other consumer command by other threads.
1244 */
1245 pthread_mutex_lock(&consumer_data->lock);
1246
1247 /* Immediately set the consumerd state to stopped */
1248 if (consumer_data->type == LTTNG_CONSUMER_KERNEL) {
1249 uatomic_set(&kernel_consumerd_state, CONSUMER_ERROR);
1250 } else if (consumer_data->type == LTTNG_CONSUMER64_UST ||
1251 consumer_data->type == LTTNG_CONSUMER32_UST) {
1252 uatomic_set(&ust_consumerd_state, CONSUMER_ERROR);
1253 } else {
1254 /* Code flow error... */
1255 assert(0);
1256 }
1257
1258 if (consumer_data->err_sock >= 0) {
1259 ret = close(consumer_data->err_sock);
1260 if (ret) {
1261 PERROR("close");
1262 }
1263 consumer_data->err_sock = -1;
1264 }
1265 if (consumer_data->cmd_sock >= 0) {
1266 ret = close(consumer_data->cmd_sock);
1267 if (ret) {
1268 PERROR("close");
1269 }
1270 consumer_data->cmd_sock = -1;
1271 }
1272 if (consumer_data->metadata_sock.fd_ptr &&
1273 *consumer_data->metadata_sock.fd_ptr >= 0) {
1274 ret = close(*consumer_data->metadata_sock.fd_ptr);
1275 if (ret) {
1276 PERROR("close");
1277 }
1278 }
1279 if (sock >= 0) {
1280 ret = close(sock);
1281 if (ret) {
1282 PERROR("close");
1283 }
1284 }
1285
1286 unlink(consumer_data->err_unix_sock_path);
1287 unlink(consumer_data->cmd_unix_sock_path);
1288 pthread_mutex_unlock(&consumer_data->lock);
1289
1290 /* Cleanup metadata socket mutex. */
1291 if (consumer_data->metadata_sock.lock) {
1292 pthread_mutex_destroy(consumer_data->metadata_sock.lock);
1293 free(consumer_data->metadata_sock.lock);
1294 }
1295 lttng_poll_clean(&events);
1296
1297 if (cmd_socket_wrapper) {
1298 consumer_destroy_socket(cmd_socket_wrapper);
1299 }
1300 error_poll:
1301 if (err) {
1302 health_error();
1303 ERR("Health error occurred in %s", __func__);
1304 }
1305 health_unregister(health_sessiond);
1306 DBG("consumer thread cleanup completed");
1307
1308 rcu_thread_offline();
1309 rcu_unregister_thread();
1310
1311 return NULL;
1312 }
1313
1314 /*
1315 * This thread receives application command sockets (FDs) on the
1316 * apps_cmd_pipe and waits (polls) on them until they are closed
1317 * or an error occurs.
1318 *
1319 * At that point, it flushes the data (tracing and metadata) associated
1320 * with this application and tears down ust app sessions and other
1321 * associated data structures through ust_app_unregister().
1322 *
1323 * Note that this thread never sends commands to the applications
1324 * through the command sockets; it merely listens for hang-ups
1325 * and errors on those sockets and cleans-up as they occur.
1326 */
1327 static void *thread_manage_apps(void *data)
1328 {
1329 int i, ret, pollfd, err = -1;
1330 ssize_t size_ret;
1331 uint32_t revents, nb_fd;
1332 struct lttng_poll_event events;
1333
1334 DBG("[thread] Manage application started");
1335
1336 rcu_register_thread();
1337 rcu_thread_online();
1338
1339 health_register(health_sessiond, HEALTH_SESSIOND_TYPE_APP_MANAGE);
1340
1341 if (testpoint(sessiond_thread_manage_apps)) {
1342 goto error_testpoint;
1343 }
1344
1345 health_code_update();
1346
1347 ret = sessiond_set_thread_pollset(&events, 2);
1348 if (ret < 0) {
1349 goto error_poll_create;
1350 }
1351
1352 ret = lttng_poll_add(&events, apps_cmd_pipe[0], LPOLLIN | LPOLLRDHUP);
1353 if (ret < 0) {
1354 goto error;
1355 }
1356
1357 if (testpoint(sessiond_thread_manage_apps_before_loop)) {
1358 goto error;
1359 }
1360
1361 health_code_update();
1362
1363 while (1) {
1364 DBG("Apps thread polling");
1365
1366 /* Inifinite blocking call, waiting for transmission */
1367 restart:
1368 health_poll_entry();
1369 ret = lttng_poll_wait(&events, -1);
1370 DBG("Apps thread return from poll on %d fds",
1371 LTTNG_POLL_GETNB(&events));
1372 health_poll_exit();
1373 if (ret < 0) {
1374 /*
1375 * Restart interrupted system call.
1376 */
1377 if (errno == EINTR) {
1378 goto restart;
1379 }
1380 goto error;
1381 }
1382
1383 nb_fd = ret;
1384
1385 for (i = 0; i < nb_fd; i++) {
1386 /* Fetch once the poll data */
1387 revents = LTTNG_POLL_GETEV(&events, i);
1388 pollfd = LTTNG_POLL_GETFD(&events, i);
1389
1390 health_code_update();
1391
1392 if (!revents) {
1393 /* No activity for this FD (poll implementation). */
1394 continue;
1395 }
1396
1397 /* Thread quit pipe has been closed. Killing thread. */
1398 ret = sessiond_check_thread_quit_pipe(pollfd, revents);
1399 if (ret) {
1400 err = 0;
1401 goto exit;
1402 }
1403
1404 /* Inspect the apps cmd pipe */
1405 if (pollfd == apps_cmd_pipe[0]) {
1406 if (revents & LPOLLIN) {
1407 int sock;
1408
1409 /* Empty pipe */
1410 size_ret = lttng_read(apps_cmd_pipe[0], &sock, sizeof(sock));
1411 if (size_ret < sizeof(sock)) {
1412 PERROR("read apps cmd pipe");
1413 goto error;
1414 }
1415
1416 health_code_update();
1417
1418 /*
1419 * Since this is a command socket (write then read),
1420 * we only monitor the error events of the socket.
1421 */
1422 ret = lttng_poll_add(&events, sock,
1423 LPOLLERR | LPOLLHUP | LPOLLRDHUP);
1424 if (ret < 0) {
1425 goto error;
1426 }
1427
1428 DBG("Apps with sock %d added to poll set", sock);
1429 } else if (revents & (LPOLLERR | LPOLLHUP | LPOLLRDHUP)) {
1430 ERR("Apps command pipe error");
1431 goto error;
1432 } else {
1433 ERR("Unknown poll events %u for sock %d", revents, pollfd);
1434 goto error;
1435 }
1436 } else {
1437 /*
1438 * At this point, we know that a registered application made
1439 * the event at poll_wait.
1440 */
1441 if (revents & (LPOLLERR | LPOLLHUP | LPOLLRDHUP)) {
1442 /* Removing from the poll set */
1443 ret = lttng_poll_del(&events, pollfd);
1444 if (ret < 0) {
1445 goto error;
1446 }
1447
1448 /* Socket closed on remote end. */
1449 ust_app_unregister(pollfd);
1450 } else {
1451 ERR("Unexpected poll events %u for sock %d", revents, pollfd);
1452 goto error;
1453 }
1454 }
1455
1456 health_code_update();
1457 }
1458 }
1459
1460 exit:
1461 error:
1462 lttng_poll_clean(&events);
1463 error_poll_create:
1464 error_testpoint:
1465 utils_close_pipe(apps_cmd_pipe);
1466 apps_cmd_pipe[0] = apps_cmd_pipe[1] = -1;
1467
1468 /*
1469 * We don't clean the UST app hash table here since already registered
1470 * applications can still be controlled so let them be until the session
1471 * daemon dies or the applications stop.
1472 */
1473
1474 if (err) {
1475 health_error();
1476 ERR("Health error occurred in %s", __func__);
1477 }
1478 health_unregister(health_sessiond);
1479 DBG("Application communication apps thread cleanup complete");
1480 rcu_thread_offline();
1481 rcu_unregister_thread();
1482 return NULL;
1483 }
1484
1485 /*
1486 * Send a socket to a thread This is called from the dispatch UST registration
1487 * thread once all sockets are set for the application.
1488 *
1489 * The sock value can be invalid, we don't really care, the thread will handle
1490 * it and make the necessary cleanup if so.
1491 *
1492 * On success, return 0 else a negative value being the errno message of the
1493 * write().
1494 */
1495 static int send_socket_to_thread(int fd, int sock)
1496 {
1497 ssize_t ret;
1498
1499 /*
1500 * It's possible that the FD is set as invalid with -1 concurrently just
1501 * before calling this function being a shutdown state of the thread.
1502 */
1503 if (fd < 0) {
1504 ret = -EBADF;
1505 goto error;
1506 }
1507
1508 ret = lttng_write(fd, &sock, sizeof(sock));
1509 if (ret < sizeof(sock)) {
1510 PERROR("write apps pipe %d", fd);
1511 if (ret < 0) {
1512 ret = -errno;
1513 }
1514 goto error;
1515 }
1516
1517 /* All good. Don't send back the write positive ret value. */
1518 ret = 0;
1519 error:
1520 return (int) ret;
1521 }
1522
1523 /*
1524 * Sanitize the wait queue of the dispatch registration thread meaning removing
1525 * invalid nodes from it. This is to avoid memory leaks for the case the UST
1526 * notify socket is never received.
1527 */
1528 static void sanitize_wait_queue(struct ust_reg_wait_queue *wait_queue)
1529 {
1530 int ret, nb_fd = 0, i;
1531 unsigned int fd_added = 0;
1532 struct lttng_poll_event events;
1533 struct ust_reg_wait_node *wait_node = NULL, *tmp_wait_node;
1534
1535 assert(wait_queue);
1536
1537 lttng_poll_init(&events);
1538
1539 /* Just skip everything for an empty queue. */
1540 if (!wait_queue->count) {
1541 goto end;
1542 }
1543
1544 ret = lttng_poll_create(&events, wait_queue->count, LTTNG_CLOEXEC);
1545 if (ret < 0) {
1546 goto error_create;
1547 }
1548
1549 cds_list_for_each_entry_safe(wait_node, tmp_wait_node,
1550 &wait_queue->head, head) {
1551 assert(wait_node->app);
1552 ret = lttng_poll_add(&events, wait_node->app->sock,
1553 LPOLLHUP | LPOLLERR);
1554 if (ret < 0) {
1555 goto error;
1556 }
1557
1558 fd_added = 1;
1559 }
1560
1561 if (!fd_added) {
1562 goto end;
1563 }
1564
1565 /*
1566 * Poll but don't block so we can quickly identify the faulty events and
1567 * clean them afterwards from the wait queue.
1568 */
1569 ret = lttng_poll_wait(&events, 0);
1570 if (ret < 0) {
1571 goto error;
1572 }
1573 nb_fd = ret;
1574
1575 for (i = 0; i < nb_fd; i++) {
1576 /* Get faulty FD. */
1577 uint32_t revents = LTTNG_POLL_GETEV(&events, i);
1578 int pollfd = LTTNG_POLL_GETFD(&events, i);
1579
1580 if (!revents) {
1581 /* No activity for this FD (poll implementation). */
1582 continue;
1583 }
1584
1585 cds_list_for_each_entry_safe(wait_node, tmp_wait_node,
1586 &wait_queue->head, head) {
1587 if (pollfd == wait_node->app->sock &&
1588 (revents & (LPOLLHUP | LPOLLERR))) {
1589 cds_list_del(&wait_node->head);
1590 wait_queue->count--;
1591 ust_app_destroy(wait_node->app);
1592 free(wait_node);
1593 /*
1594 * Silence warning of use-after-free in
1595 * cds_list_for_each_entry_safe which uses
1596 * __typeof__(*wait_node).
1597 */
1598 wait_node = NULL;
1599 break;
1600 } else {
1601 ERR("Unexpected poll events %u for sock %d", revents, pollfd);
1602 goto error;
1603 }
1604 }
1605 }
1606
1607 if (nb_fd > 0) {
1608 DBG("Wait queue sanitized, %d node were cleaned up", nb_fd);
1609 }
1610
1611 end:
1612 lttng_poll_clean(&events);
1613 return;
1614
1615 error:
1616 lttng_poll_clean(&events);
1617 error_create:
1618 ERR("Unable to sanitize wait queue");
1619 return;
1620 }
1621
1622 /*
1623 * Dispatch request from the registration threads to the application
1624 * communication thread.
1625 */
1626 static void *thread_dispatch_ust_registration(void *data)
1627 {
1628 int ret, err = -1;
1629 struct cds_wfcq_node *node;
1630 struct ust_command *ust_cmd = NULL;
1631 struct ust_reg_wait_node *wait_node = NULL, *tmp_wait_node;
1632 struct ust_reg_wait_queue wait_queue = {
1633 .count = 0,
1634 };
1635
1636 rcu_register_thread();
1637
1638 health_register(health_sessiond, HEALTH_SESSIOND_TYPE_APP_REG_DISPATCH);
1639
1640 if (testpoint(sessiond_thread_app_reg_dispatch)) {
1641 goto error_testpoint;
1642 }
1643
1644 health_code_update();
1645
1646 CDS_INIT_LIST_HEAD(&wait_queue.head);
1647
1648 DBG("[thread] Dispatch UST command started");
1649
1650 for (;;) {
1651 health_code_update();
1652
1653 /* Atomically prepare the queue futex */
1654 futex_nto1_prepare(&ust_cmd_queue.futex);
1655
1656 if (CMM_LOAD_SHARED(dispatch_thread_exit)) {
1657 break;
1658 }
1659
1660 do {
1661 struct ust_app *app = NULL;
1662 ust_cmd = NULL;
1663
1664 /*
1665 * Make sure we don't have node(s) that have hung up before receiving
1666 * the notify socket. This is to clean the list in order to avoid
1667 * memory leaks from notify socket that are never seen.
1668 */
1669 sanitize_wait_queue(&wait_queue);
1670
1671 health_code_update();
1672 /* Dequeue command for registration */
1673 node = cds_wfcq_dequeue_blocking(&ust_cmd_queue.head, &ust_cmd_queue.tail);
1674 if (node == NULL) {
1675 DBG("Woken up but nothing in the UST command queue");
1676 /* Continue thread execution */
1677 break;
1678 }
1679
1680 ust_cmd = caa_container_of(node, struct ust_command, node);
1681
1682 DBG("Dispatching UST registration pid:%d ppid:%d uid:%d"
1683 " gid:%d sock:%d name:%s (version %d.%d)",
1684 ust_cmd->reg_msg.pid, ust_cmd->reg_msg.ppid,
1685 ust_cmd->reg_msg.uid, ust_cmd->reg_msg.gid,
1686 ust_cmd->sock, ust_cmd->reg_msg.name,
1687 ust_cmd->reg_msg.major, ust_cmd->reg_msg.minor);
1688
1689 if (ust_cmd->reg_msg.type == USTCTL_SOCKET_CMD) {
1690 wait_node = zmalloc(sizeof(*wait_node));
1691 if (!wait_node) {
1692 PERROR("zmalloc wait_node dispatch");
1693 ret = close(ust_cmd->sock);
1694 if (ret < 0) {
1695 PERROR("close ust sock dispatch %d", ust_cmd->sock);
1696 }
1697 lttng_fd_put(LTTNG_FD_APPS, 1);
1698 free(ust_cmd);
1699 goto error;
1700 }
1701 CDS_INIT_LIST_HEAD(&wait_node->head);
1702
1703 /* Create application object if socket is CMD. */
1704 wait_node->app = ust_app_create(&ust_cmd->reg_msg,
1705 ust_cmd->sock);
1706 if (!wait_node->app) {
1707 ret = close(ust_cmd->sock);
1708 if (ret < 0) {
1709 PERROR("close ust sock dispatch %d", ust_cmd->sock);
1710 }
1711 lttng_fd_put(LTTNG_FD_APPS, 1);
1712 free(wait_node);
1713 free(ust_cmd);
1714 continue;
1715 }
1716 /*
1717 * Add application to the wait queue so we can set the notify
1718 * socket before putting this object in the global ht.
1719 */
1720 cds_list_add(&wait_node->head, &wait_queue.head);
1721 wait_queue.count++;
1722
1723 free(ust_cmd);
1724 /*
1725 * We have to continue here since we don't have the notify
1726 * socket and the application MUST be added to the hash table
1727 * only at that moment.
1728 */
1729 continue;
1730 } else {
1731 /*
1732 * Look for the application in the local wait queue and set the
1733 * notify socket if found.
1734 */
1735 cds_list_for_each_entry_safe(wait_node, tmp_wait_node,
1736 &wait_queue.head, head) {
1737 health_code_update();
1738 if (wait_node->app->pid == ust_cmd->reg_msg.pid) {
1739 wait_node->app->notify_sock = ust_cmd->sock;
1740 cds_list_del(&wait_node->head);
1741 wait_queue.count--;
1742 app = wait_node->app;
1743 free(wait_node);
1744 DBG3("UST app notify socket %d is set", ust_cmd->sock);
1745 break;
1746 }
1747 }
1748
1749 /*
1750 * With no application at this stage the received socket is
1751 * basically useless so close it before we free the cmd data
1752 * structure for good.
1753 */
1754 if (!app) {
1755 ret = close(ust_cmd->sock);
1756 if (ret < 0) {
1757 PERROR("close ust sock dispatch %d", ust_cmd->sock);
1758 }
1759 lttng_fd_put(LTTNG_FD_APPS, 1);
1760 }
1761 free(ust_cmd);
1762 }
1763
1764 if (app) {
1765 /*
1766 * @session_lock_list
1767 *
1768 * Lock the global session list so from the register up to the
1769 * registration done message, no thread can see the application
1770 * and change its state.
1771 */
1772 session_lock_list();
1773 rcu_read_lock();
1774
1775 /*
1776 * Add application to the global hash table. This needs to be
1777 * done before the update to the UST registry can locate the
1778 * application.
1779 */
1780 ust_app_add(app);
1781
1782 /* Set app version. This call will print an error if needed. */
1783 (void) ust_app_version(app);
1784
1785 /* Send notify socket through the notify pipe. */
1786 ret = send_socket_to_thread(apps_cmd_notify_pipe[1],
1787 app->notify_sock);
1788 if (ret < 0) {
1789 rcu_read_unlock();
1790 session_unlock_list();
1791 /*
1792 * No notify thread, stop the UST tracing. However, this is
1793 * not an internal error of the this thread thus setting
1794 * the health error code to a normal exit.
1795 */
1796 err = 0;
1797 goto error;
1798 }
1799
1800 /*
1801 * Update newly registered application with the tracing
1802 * registry info already enabled information.
1803 */
1804 update_ust_app(app->sock);
1805
1806 /*
1807 * Don't care about return value. Let the manage apps threads
1808 * handle app unregistration upon socket close.
1809 */
1810 (void) ust_app_register_done(app);
1811
1812 /*
1813 * Even if the application socket has been closed, send the app
1814 * to the thread and unregistration will take place at that
1815 * place.
1816 */
1817 ret = send_socket_to_thread(apps_cmd_pipe[1], app->sock);
1818 if (ret < 0) {
1819 rcu_read_unlock();
1820 session_unlock_list();
1821 /*
1822 * No apps. thread, stop the UST tracing. However, this is
1823 * not an internal error of the this thread thus setting
1824 * the health error code to a normal exit.
1825 */
1826 err = 0;
1827 goto error;
1828 }
1829
1830 rcu_read_unlock();
1831 session_unlock_list();
1832 }
1833 } while (node != NULL);
1834
1835 health_poll_entry();
1836 /* Futex wait on queue. Blocking call on futex() */
1837 futex_nto1_wait(&ust_cmd_queue.futex);
1838 health_poll_exit();
1839 }
1840 /* Normal exit, no error */
1841 err = 0;
1842
1843 error:
1844 /* Clean up wait queue. */
1845 cds_list_for_each_entry_safe(wait_node, tmp_wait_node,
1846 &wait_queue.head, head) {
1847 cds_list_del(&wait_node->head);
1848 wait_queue.count--;
1849 free(wait_node);
1850 }
1851
1852 /* Empty command queue. */
1853 for (;;) {
1854 /* Dequeue command for registration */
1855 node = cds_wfcq_dequeue_blocking(&ust_cmd_queue.head, &ust_cmd_queue.tail);
1856 if (node == NULL) {
1857 break;
1858 }
1859 ust_cmd = caa_container_of(node, struct ust_command, node);
1860 ret = close(ust_cmd->sock);
1861 if (ret < 0) {
1862 PERROR("close ust sock exit dispatch %d", ust_cmd->sock);
1863 }
1864 lttng_fd_put(LTTNG_FD_APPS, 1);
1865 free(ust_cmd);
1866 }
1867
1868 error_testpoint:
1869 DBG("Dispatch thread dying");
1870 if (err) {
1871 health_error();
1872 ERR("Health error occurred in %s", __func__);
1873 }
1874 health_unregister(health_sessiond);
1875 rcu_unregister_thread();
1876 return NULL;
1877 }
1878
1879 /*
1880 * This thread manage application registration.
1881 */
1882 static void *thread_registration_apps(void *data)
1883 {
1884 int sock = -1, i, ret, pollfd, err = -1;
1885 uint32_t revents, nb_fd;
1886 struct lttng_poll_event events;
1887 /*
1888 * Get allocated in this thread, enqueued to a global queue, dequeued and
1889 * freed in the manage apps thread.
1890 */
1891 struct ust_command *ust_cmd = NULL;
1892
1893 DBG("[thread] Manage application registration started");
1894
1895 health_register(health_sessiond, HEALTH_SESSIOND_TYPE_APP_REG);
1896
1897 if (testpoint(sessiond_thread_registration_apps)) {
1898 goto error_testpoint;
1899 }
1900
1901 ret = lttcomm_listen_unix_sock(apps_sock);
1902 if (ret < 0) {
1903 goto error_listen;
1904 }
1905
1906 /*
1907 * Pass 2 as size here for the thread quit pipe and apps socket. Nothing
1908 * more will be added to this poll set.
1909 */
1910 ret = sessiond_set_thread_pollset(&events, 2);
1911 if (ret < 0) {
1912 goto error_create_poll;
1913 }
1914
1915 /* Add the application registration socket */
1916 ret = lttng_poll_add(&events, apps_sock, LPOLLIN | LPOLLRDHUP);
1917 if (ret < 0) {
1918 goto error_poll_add;
1919 }
1920
1921 /* Notify all applications to register */
1922 ret = notify_ust_apps(1);
1923 if (ret < 0) {
1924 ERR("Failed to notify applications or create the wait shared memory.\n"
1925 "Execution continues but there might be problem for already\n"
1926 "running applications that wishes to register.");
1927 }
1928
1929 while (1) {
1930 DBG("Accepting application registration");
1931
1932 /* Inifinite blocking call, waiting for transmission */
1933 restart:
1934 health_poll_entry();
1935 ret = lttng_poll_wait(&events, -1);
1936 health_poll_exit();
1937 if (ret < 0) {
1938 /*
1939 * Restart interrupted system call.
1940 */
1941 if (errno == EINTR) {
1942 goto restart;
1943 }
1944 goto error;
1945 }
1946
1947 nb_fd = ret;
1948
1949 for (i = 0; i < nb_fd; i++) {
1950 health_code_update();
1951
1952 /* Fetch once the poll data */
1953 revents = LTTNG_POLL_GETEV(&events, i);
1954 pollfd = LTTNG_POLL_GETFD(&events, i);
1955
1956 if (!revents) {
1957 /* No activity for this FD (poll implementation). */
1958 continue;
1959 }
1960
1961 /* Thread quit pipe has been closed. Killing thread. */
1962 ret = sessiond_check_thread_quit_pipe(pollfd, revents);
1963 if (ret) {
1964 err = 0;
1965 goto exit;
1966 }
1967
1968 /* Event on the registration socket */
1969 if (pollfd == apps_sock) {
1970 if (revents & LPOLLIN) {
1971 sock = lttcomm_accept_unix_sock(apps_sock);
1972 if (sock < 0) {
1973 goto error;
1974 }
1975
1976 /*
1977 * Set socket timeout for both receiving and ending.
1978 * app_socket_timeout is in seconds, whereas
1979 * lttcomm_setsockopt_rcv_timeout and
1980 * lttcomm_setsockopt_snd_timeout expect msec as
1981 * parameter.
1982 */
1983 if (config.app_socket_timeout >= 0) {
1984 (void) lttcomm_setsockopt_rcv_timeout(sock,
1985 config.app_socket_timeout * 1000);
1986 (void) lttcomm_setsockopt_snd_timeout(sock,
1987 config.app_socket_timeout * 1000);
1988 }
1989
1990 /*
1991 * Set the CLOEXEC flag. Return code is useless because
1992 * either way, the show must go on.
1993 */
1994 (void) utils_set_fd_cloexec(sock);
1995
1996 /* Create UST registration command for enqueuing */
1997 ust_cmd = zmalloc(sizeof(struct ust_command));
1998 if (ust_cmd == NULL) {
1999 PERROR("ust command zmalloc");
2000 ret = close(sock);
2001 if (ret) {
2002 PERROR("close");
2003 }
2004 goto error;
2005 }
2006
2007 /*
2008 * Using message-based transmissions to ensure we don't
2009 * have to deal with partially received messages.
2010 */
2011 ret = lttng_fd_get(LTTNG_FD_APPS, 1);
2012 if (ret < 0) {
2013 ERR("Exhausted file descriptors allowed for applications.");
2014 free(ust_cmd);
2015 ret = close(sock);
2016 if (ret) {
2017 PERROR("close");
2018 }
2019 sock = -1;
2020 continue;
2021 }
2022
2023 health_code_update();
2024 ret = ust_app_recv_registration(sock, &ust_cmd->reg_msg);
2025 if (ret < 0) {
2026 free(ust_cmd);
2027 /* Close socket of the application. */
2028 ret = close(sock);
2029 if (ret) {
2030 PERROR("close");
2031 }
2032 lttng_fd_put(LTTNG_FD_APPS, 1);
2033 sock = -1;
2034 continue;
2035 }
2036 health_code_update();
2037
2038 ust_cmd->sock = sock;
2039 sock = -1;
2040
2041 DBG("UST registration received with pid:%d ppid:%d uid:%d"
2042 " gid:%d sock:%d name:%s (version %d.%d)",
2043 ust_cmd->reg_msg.pid, ust_cmd->reg_msg.ppid,
2044 ust_cmd->reg_msg.uid, ust_cmd->reg_msg.gid,
2045 ust_cmd->sock, ust_cmd->reg_msg.name,
2046 ust_cmd->reg_msg.major, ust_cmd->reg_msg.minor);
2047
2048 /*
2049 * Lock free enqueue the registration request. The red pill
2050 * has been taken! This apps will be part of the *system*.
2051 */
2052 cds_wfcq_enqueue(&ust_cmd_queue.head, &ust_cmd_queue.tail, &ust_cmd->node);
2053
2054 /*
2055 * Wake the registration queue futex. Implicit memory
2056 * barrier with the exchange in cds_wfcq_enqueue.
2057 */
2058 futex_nto1_wake(&ust_cmd_queue.futex);
2059 } else if (revents & (LPOLLERR | LPOLLHUP | LPOLLRDHUP)) {
2060 ERR("Register apps socket poll error");
2061 goto error;
2062 } else {
2063 ERR("Unexpected poll events %u for sock %d", revents, pollfd);
2064 goto error;
2065 }
2066 }
2067 }
2068 }
2069
2070 exit:
2071 error:
2072 /* Notify that the registration thread is gone */
2073 notify_ust_apps(0);
2074
2075 if (apps_sock >= 0) {
2076 ret = close(apps_sock);
2077 if (ret) {
2078 PERROR("close");
2079 }
2080 }
2081 if (sock >= 0) {
2082 ret = close(sock);
2083 if (ret) {
2084 PERROR("close");
2085 }
2086 lttng_fd_put(LTTNG_FD_APPS, 1);
2087 }
2088 unlink(config.apps_unix_sock_path.value);
2089
2090 error_poll_add:
2091 lttng_poll_clean(&events);
2092 error_listen:
2093 error_create_poll:
2094 error_testpoint:
2095 DBG("UST Registration thread cleanup complete");
2096 if (err) {
2097 health_error();
2098 ERR("Health error occurred in %s", __func__);
2099 }
2100 health_unregister(health_sessiond);
2101
2102 return NULL;
2103 }
2104
2105 /*
2106 * Start the thread_manage_consumer. This must be done after a lttng-consumerd
2107 * exec or it will fails.
2108 */
2109 static int spawn_consumer_thread(struct consumer_data *consumer_data)
2110 {
2111 int ret, clock_ret;
2112 struct timespec timeout;
2113
2114 /*
2115 * Make sure we set the readiness flag to 0 because we are NOT ready.
2116 * This access to consumer_thread_is_ready does not need to be
2117 * protected by consumer_data.cond_mutex (yet) since the consumer
2118 * management thread has not been started at this point.
2119 */
2120 consumer_data->consumer_thread_is_ready = 0;
2121
2122 /* Setup pthread condition */
2123 ret = pthread_condattr_init(&consumer_data->condattr);
2124 if (ret) {
2125 errno = ret;
2126 PERROR("pthread_condattr_init consumer data");
2127 goto error;
2128 }
2129
2130 /*
2131 * Set the monotonic clock in order to make sure we DO NOT jump in time
2132 * between the clock_gettime() call and the timedwait call. See bug #324
2133 * for a more details and how we noticed it.
2134 */
2135 ret = pthread_condattr_setclock(&consumer_data->condattr, CLOCK_MONOTONIC);
2136 if (ret) {
2137 errno = ret;
2138 PERROR("pthread_condattr_setclock consumer data");
2139 goto error;
2140 }
2141
2142 ret = pthread_cond_init(&consumer_data->cond, &consumer_data->condattr);
2143 if (ret) {
2144 errno = ret;
2145 PERROR("pthread_cond_init consumer data");
2146 goto error;
2147 }
2148
2149 ret = pthread_create(&consumer_data->thread, default_pthread_attr(),
2150 thread_manage_consumer, consumer_data);
2151 if (ret) {
2152 errno = ret;
2153 PERROR("pthread_create consumer");
2154 ret = -1;
2155 goto error;
2156 }
2157
2158 /* We are about to wait on a pthread condition */
2159 pthread_mutex_lock(&consumer_data->cond_mutex);
2160
2161 /* Get time for sem_timedwait absolute timeout */
2162 clock_ret = lttng_clock_gettime(CLOCK_MONOTONIC, &timeout);
2163 /*
2164 * Set the timeout for the condition timed wait even if the clock gettime
2165 * call fails since we might loop on that call and we want to avoid to
2166 * increment the timeout too many times.
2167 */
2168 timeout.tv_sec += DEFAULT_SEM_WAIT_TIMEOUT;
2169
2170 /*
2171 * The following loop COULD be skipped in some conditions so this is why we
2172 * set ret to 0 in order to make sure at least one round of the loop is
2173 * done.
2174 */
2175 ret = 0;
2176
2177 /*
2178 * Loop until the condition is reached or when a timeout is reached. Note
2179 * that the pthread_cond_timedwait(P) man page specifies that EINTR can NOT
2180 * be returned but the pthread_cond(3), from the glibc-doc, says that it is
2181 * possible. This loop does not take any chances and works with both of
2182 * them.
2183 */
2184 while (!consumer_data->consumer_thread_is_ready && ret != ETIMEDOUT) {
2185 if (clock_ret < 0) {
2186 PERROR("clock_gettime spawn consumer");
2187 /* Infinite wait for the consumerd thread to be ready */
2188 ret = pthread_cond_wait(&consumer_data->cond,
2189 &consumer_data->cond_mutex);
2190 } else {
2191 ret = pthread_cond_timedwait(&consumer_data->cond,
2192 &consumer_data->cond_mutex, &timeout);
2193 }
2194 }
2195
2196 /* Release the pthread condition */
2197 pthread_mutex_unlock(&consumer_data->cond_mutex);
2198
2199 if (ret != 0) {
2200 errno = ret;
2201 if (ret == ETIMEDOUT) {
2202 int pth_ret;
2203
2204 /*
2205 * Call has timed out so we kill the kconsumerd_thread and return
2206 * an error.
2207 */
2208 ERR("Condition timed out. The consumer thread was never ready."
2209 " Killing it");
2210 pth_ret = pthread_cancel(consumer_data->thread);
2211 if (pth_ret < 0) {
2212 PERROR("pthread_cancel consumer thread");
2213 }
2214 } else {
2215 PERROR("pthread_cond_wait failed consumer thread");
2216 }
2217 /* Caller is expecting a negative value on failure. */
2218 ret = -1;
2219 goto error;
2220 }
2221
2222 pthread_mutex_lock(&consumer_data->pid_mutex);
2223 if (consumer_data->pid == 0) {
2224 ERR("Consumerd did not start");
2225 pthread_mutex_unlock(&consumer_data->pid_mutex);
2226 goto error;
2227 }
2228 pthread_mutex_unlock(&consumer_data->pid_mutex);
2229
2230 return 0;
2231
2232 error:
2233 return ret;
2234 }
2235
2236 /*
2237 * Join consumer thread
2238 */
2239 static int join_consumer_thread(struct consumer_data *consumer_data)
2240 {
2241 void *status;
2242
2243 /* Consumer pid must be a real one. */
2244 if (consumer_data->pid > 0) {
2245 int ret;
2246 ret = kill(consumer_data->pid, SIGTERM);
2247 if (ret) {
2248 PERROR("Error killing consumer daemon");
2249 return ret;
2250 }
2251 return pthread_join(consumer_data->thread, &status);
2252 } else {
2253 return 0;
2254 }
2255 }
2256
2257 /*
2258 * Fork and exec a consumer daemon (consumerd).
2259 *
2260 * Return pid if successful else -1.
2261 */
2262 static pid_t spawn_consumerd(struct consumer_data *consumer_data)
2263 {
2264 int ret;
2265 pid_t pid;
2266 const char *consumer_to_use;
2267 const char *verbosity;
2268 struct stat st;
2269
2270 DBG("Spawning consumerd");
2271
2272 pid = fork();
2273 if (pid == 0) {
2274 /*
2275 * Exec consumerd.
2276 */
2277 if (config.verbose_consumer) {
2278 verbosity = "--verbose";
2279 } else if (lttng_opt_quiet) {
2280 verbosity = "--quiet";
2281 } else {
2282 verbosity = "";
2283 }
2284
2285 switch (consumer_data->type) {
2286 case LTTNG_CONSUMER_KERNEL:
2287 /*
2288 * Find out which consumerd to execute. We will first try the
2289 * 64-bit path, then the sessiond's installation directory, and
2290 * fallback on the 32-bit one,
2291 */
2292 DBG3("Looking for a kernel consumer at these locations:");
2293 DBG3(" 1) %s", config.consumerd64_bin_path.value ? : "NULL");
2294 DBG3(" 2) %s/%s", INSTALL_BIN_PATH, DEFAULT_CONSUMERD_FILE);
2295 DBG3(" 3) %s", config.consumerd32_bin_path.value ? : "NULL");
2296 if (stat(config.consumerd64_bin_path.value, &st) == 0) {
2297 DBG3("Found location #1");
2298 consumer_to_use = config.consumerd64_bin_path.value;
2299 } else if (stat(INSTALL_BIN_PATH "/" DEFAULT_CONSUMERD_FILE, &st) == 0) {
2300 DBG3("Found location #2");
2301 consumer_to_use = INSTALL_BIN_PATH "/" DEFAULT_CONSUMERD_FILE;
2302 } else if (config.consumerd32_bin_path.value &&
2303 stat(config.consumerd32_bin_path.value, &st) == 0) {
2304 DBG3("Found location #3");
2305 consumer_to_use = config.consumerd32_bin_path.value;
2306 } else {
2307 DBG("Could not find any valid consumerd executable");
2308 ret = -EINVAL;
2309 goto error;
2310 }
2311 DBG("Using kernel consumer at: %s", consumer_to_use);
2312 (void) execl(consumer_to_use,
2313 "lttng-consumerd", verbosity, "-k",
2314 "--consumerd-cmd-sock", consumer_data->cmd_unix_sock_path,
2315 "--consumerd-err-sock", consumer_data->err_unix_sock_path,
2316 "--group", config.tracing_group_name.value,
2317 NULL);
2318 break;
2319 case LTTNG_CONSUMER64_UST:
2320 {
2321 if (config.consumerd64_lib_dir.value) {
2322 char *tmp;
2323 size_t tmplen;
2324 char *tmpnew;
2325
2326 tmp = lttng_secure_getenv("LD_LIBRARY_PATH");
2327 if (!tmp) {
2328 tmp = "";
2329 }
2330 tmplen = strlen(config.consumerd64_lib_dir.value) + 1 /* : */ + strlen(tmp);
2331 tmpnew = zmalloc(tmplen + 1 /* \0 */);
2332 if (!tmpnew) {
2333 ret = -ENOMEM;
2334 goto error;
2335 }
2336 strcat(tmpnew, config.consumerd64_lib_dir.value);
2337 if (tmp[0] != '\0') {
2338 strcat(tmpnew, ":");
2339 strcat(tmpnew, tmp);
2340 }
2341 ret = setenv("LD_LIBRARY_PATH", tmpnew, 1);
2342 free(tmpnew);
2343 if (ret) {
2344 ret = -errno;
2345 goto error;
2346 }
2347 }
2348 DBG("Using 64-bit UST consumer at: %s", config.consumerd64_bin_path.value);
2349 (void) execl(config.consumerd64_bin_path.value, "lttng-consumerd", verbosity, "-u",
2350 "--consumerd-cmd-sock", consumer_data->cmd_unix_sock_path,
2351 "--consumerd-err-sock", consumer_data->err_unix_sock_path,
2352 "--group", config.tracing_group_name.value,
2353 NULL);
2354 break;
2355 }
2356 case LTTNG_CONSUMER32_UST:
2357 {
2358 if (config.consumerd32_lib_dir.value) {
2359 char *tmp;
2360 size_t tmplen;
2361 char *tmpnew;
2362
2363 tmp = lttng_secure_getenv("LD_LIBRARY_PATH");
2364 if (!tmp) {
2365 tmp = "";
2366 }
2367 tmplen = strlen(config.consumerd32_lib_dir.value) + 1 /* : */ + strlen(tmp);
2368 tmpnew = zmalloc(tmplen + 1 /* \0 */);
2369 if (!tmpnew) {
2370 ret = -ENOMEM;
2371 goto error;
2372 }
2373 strcat(tmpnew, config.consumerd32_lib_dir.value);
2374 if (tmp[0] != '\0') {
2375 strcat(tmpnew, ":");
2376 strcat(tmpnew, tmp);
2377 }
2378 ret = setenv("LD_LIBRARY_PATH", tmpnew, 1);
2379 free(tmpnew);
2380 if (ret) {
2381 ret = -errno;
2382 goto error;
2383 }
2384 }
2385 DBG("Using 32-bit UST consumer at: %s", config.consumerd32_bin_path.value);
2386 (void) execl(config.consumerd32_bin_path.value, "lttng-consumerd", verbosity, "-u",
2387 "--consumerd-cmd-sock", consumer_data->cmd_unix_sock_path,
2388 "--consumerd-err-sock", consumer_data->err_unix_sock_path,
2389 "--group", config.tracing_group_name.value,
2390 NULL);
2391 break;
2392 }
2393 default:
2394 ERR("unknown consumer type");
2395 errno = 0;
2396 }
2397 if (errno != 0) {
2398 PERROR("Consumer execl()");
2399 }
2400 /* Reaching this point, we got a failure on our execl(). */
2401 exit(EXIT_FAILURE);
2402 } else if (pid > 0) {
2403 ret = pid;
2404 } else {
2405 PERROR("start consumer fork");
2406 ret = -errno;
2407 }
2408 error:
2409 return ret;
2410 }
2411
2412 /*
2413 * Spawn the consumerd daemon and session daemon thread.
2414 */
2415 static int start_consumerd(struct consumer_data *consumer_data)
2416 {
2417 int ret;
2418
2419 /*
2420 * Set the listen() state on the socket since there is a possible race
2421 * between the exec() of the consumer daemon and this call if place in the
2422 * consumer thread. See bug #366 for more details.
2423 */
2424 ret = lttcomm_listen_unix_sock(consumer_data->err_sock);
2425 if (ret < 0) {
2426 goto error;
2427 }
2428
2429 pthread_mutex_lock(&consumer_data->pid_mutex);
2430 if (consumer_data->pid != 0) {
2431 pthread_mutex_unlock(&consumer_data->pid_mutex);
2432 goto end;
2433 }
2434
2435 ret = spawn_consumerd(consumer_data);
2436 if (ret < 0) {
2437 ERR("Spawning consumerd failed");
2438 pthread_mutex_unlock(&consumer_data->pid_mutex);
2439 goto error;
2440 }
2441
2442 /* Setting up the consumer_data pid */
2443 consumer_data->pid = ret;
2444 DBG2("Consumer pid %d", consumer_data->pid);
2445 pthread_mutex_unlock(&consumer_data->pid_mutex);
2446
2447 DBG2("Spawning consumer control thread");
2448 ret = spawn_consumer_thread(consumer_data);
2449 if (ret < 0) {
2450 ERR("Fatal error spawning consumer control thread");
2451 goto error;
2452 }
2453
2454 end:
2455 return 0;
2456
2457 error:
2458 /* Cleanup already created sockets on error. */
2459 if (consumer_data->err_sock >= 0) {
2460 int err;
2461
2462 err = close(consumer_data->err_sock);
2463 if (err < 0) {
2464 PERROR("close consumer data error socket");
2465 }
2466 }
2467 return ret;
2468 }
2469
2470 /*
2471 * Setup necessary data for kernel tracer action.
2472 */
2473 static int init_kernel_tracer(void)
2474 {
2475 int ret;
2476
2477 /* Modprobe lttng kernel modules */
2478 ret = modprobe_lttng_control();
2479 if (ret < 0) {
2480 goto error;
2481 }
2482
2483 /* Open debugfs lttng */
2484 kernel_tracer_fd = open(module_proc_lttng, O_RDWR);
2485 if (kernel_tracer_fd < 0) {
2486 DBG("Failed to open %s", module_proc_lttng);
2487 goto error_open;
2488 }
2489
2490 /* Validate kernel version */
2491 ret = kernel_validate_version(kernel_tracer_fd, &kernel_tracer_version,
2492 &kernel_tracer_abi_version);
2493 if (ret < 0) {
2494 goto error_version;
2495 }
2496
2497 ret = modprobe_lttng_data();
2498 if (ret < 0) {
2499 goto error_modules;
2500 }
2501
2502 ret = kernel_supports_ring_buffer_snapshot_sample_positions(
2503 kernel_tracer_fd);
2504 if (ret < 0) {
2505 goto error_modules;
2506 }
2507
2508 if (ret < 1) {
2509 WARN("Kernel tracer does not support buffer monitoring. "
2510 "The monitoring timer of channels in the kernel domain "
2511 "will be set to 0 (disabled).");
2512 }
2513
2514 DBG("Kernel tracer fd %d", kernel_tracer_fd);
2515 return 0;
2516
2517 error_version:
2518 modprobe_remove_lttng_control();
2519 ret = close(kernel_tracer_fd);
2520 if (ret) {
2521 PERROR("close");
2522 }
2523 kernel_tracer_fd = -1;
2524 return LTTNG_ERR_KERN_VERSION;
2525
2526 error_modules:
2527 ret = close(kernel_tracer_fd);
2528 if (ret) {
2529 PERROR("close");
2530 }
2531
2532 error_open:
2533 modprobe_remove_lttng_control();
2534
2535 error:
2536 WARN("No kernel tracer available");
2537 kernel_tracer_fd = -1;
2538 if (!is_root) {
2539 return LTTNG_ERR_NEED_ROOT_SESSIOND;
2540 } else {
2541 return LTTNG_ERR_KERN_NA;
2542 }
2543 }
2544
2545
2546 /*
2547 * Copy consumer output from the tracing session to the domain session. The
2548 * function also applies the right modification on a per domain basis for the
2549 * trace files destination directory.
2550 *
2551 * Should *NOT* be called with RCU read-side lock held.
2552 */
2553 static int copy_session_consumer(int domain, struct ltt_session *session)
2554 {
2555 int ret;
2556 const char *dir_name;
2557 struct consumer_output *consumer;
2558
2559 assert(session);
2560 assert(session->consumer);
2561
2562 switch (domain) {
2563 case LTTNG_DOMAIN_KERNEL:
2564 DBG3("Copying tracing session consumer output in kernel session");
2565 /*
2566 * XXX: We should audit the session creation and what this function
2567 * does "extra" in order to avoid a destroy since this function is used
2568 * in the domain session creation (kernel and ust) only. Same for UST
2569 * domain.
2570 */
2571 if (session->kernel_session->consumer) {
2572 consumer_output_put(session->kernel_session->consumer);
2573 }
2574 session->kernel_session->consumer =
2575 consumer_copy_output(session->consumer);
2576 /* Ease our life a bit for the next part */
2577 consumer = session->kernel_session->consumer;
2578 dir_name = DEFAULT_KERNEL_TRACE_DIR;
2579 break;
2580 case LTTNG_DOMAIN_JUL:
2581 case LTTNG_DOMAIN_LOG4J:
2582 case LTTNG_DOMAIN_PYTHON:
2583 case LTTNG_DOMAIN_UST:
2584 DBG3("Copying tracing session consumer output in UST session");
2585 if (session->ust_session->consumer) {
2586 consumer_output_put(session->ust_session->consumer);
2587 }
2588 session->ust_session->consumer =
2589 consumer_copy_output(session->consumer);
2590 /* Ease our life a bit for the next part */
2591 consumer = session->ust_session->consumer;
2592 dir_name = DEFAULT_UST_TRACE_DIR;
2593 break;
2594 default:
2595 ret = LTTNG_ERR_UNKNOWN_DOMAIN;
2596 goto error;
2597 }
2598
2599 /* Append correct directory to subdir */
2600 strncat(consumer->subdir, dir_name,
2601 sizeof(consumer->subdir) - strlen(consumer->subdir) - 1);
2602 DBG3("Copy session consumer subdir %s", consumer->subdir);
2603
2604 ret = LTTNG_OK;
2605
2606 error:
2607 return ret;
2608 }
2609
2610 /*
2611 * Create an UST session and add it to the session ust list.
2612 *
2613 * Should *NOT* be called with RCU read-side lock held.
2614 */
2615 static int create_ust_session(struct ltt_session *session,
2616 struct lttng_domain *domain)
2617 {
2618 int ret;
2619 struct ltt_ust_session *lus = NULL;
2620
2621 assert(session);
2622 assert(domain);
2623 assert(session->consumer);
2624
2625 switch (domain->type) {
2626 case LTTNG_DOMAIN_JUL:
2627 case LTTNG_DOMAIN_LOG4J:
2628 case LTTNG_DOMAIN_PYTHON:
2629 case LTTNG_DOMAIN_UST:
2630 break;
2631 default:
2632 ERR("Unknown UST domain on create session %d", domain->type);
2633 ret = LTTNG_ERR_UNKNOWN_DOMAIN;
2634 goto error;
2635 }
2636
2637 DBG("Creating UST session");
2638
2639 lus = trace_ust_create_session(session->id);
2640 if (lus == NULL) {
2641 ret = LTTNG_ERR_UST_SESS_FAIL;
2642 goto error;
2643 }
2644
2645 lus->uid = session->uid;
2646 lus->gid = session->gid;
2647 lus->output_traces = session->output_traces;
2648 lus->snapshot_mode = session->snapshot_mode;
2649 lus->live_timer_interval = session->live_timer;
2650 session->ust_session = lus;
2651 if (session->shm_path[0]) {
2652 strncpy(lus->root_shm_path, session->shm_path,
2653 sizeof(lus->root_shm_path));
2654 lus->root_shm_path[sizeof(lus->root_shm_path) - 1] = '\0';
2655 strncpy(lus->shm_path, session->shm_path,
2656 sizeof(lus->shm_path));
2657 lus->shm_path[sizeof(lus->shm_path) - 1] = '\0';
2658 strncat(lus->shm_path, "/ust",
2659 sizeof(lus->shm_path) - strlen(lus->shm_path) - 1);
2660 }
2661 /* Copy session output to the newly created UST session */
2662 ret = copy_session_consumer(domain->type, session);
2663 if (ret != LTTNG_OK) {
2664 goto error;
2665 }
2666
2667 return LTTNG_OK;
2668
2669 error:
2670 free(lus);
2671 session->ust_session = NULL;
2672 return ret;
2673 }
2674
2675 /*
2676 * Create a kernel tracer session then create the default channel.
2677 */
2678 static int create_kernel_session(struct ltt_session *session)
2679 {
2680 int ret;
2681
2682 DBG("Creating kernel session");
2683
2684 ret = kernel_create_session(session, kernel_tracer_fd);
2685 if (ret < 0) {
2686 ret = LTTNG_ERR_KERN_SESS_FAIL;
2687 goto error;
2688 }
2689
2690 /* Code flow safety */
2691 assert(session->kernel_session);
2692
2693 /* Copy session output to the newly created Kernel session */
2694 ret = copy_session_consumer(LTTNG_DOMAIN_KERNEL, session);
2695 if (ret != LTTNG_OK) {
2696 goto error;
2697 }
2698
2699 session->kernel_session->uid = session->uid;
2700 session->kernel_session->gid = session->gid;
2701 session->kernel_session->output_traces = session->output_traces;
2702 session->kernel_session->snapshot_mode = session->snapshot_mode;
2703
2704 return LTTNG_OK;
2705
2706 error:
2707 trace_kernel_destroy_session(session->kernel_session);
2708 session->kernel_session = NULL;
2709 return ret;
2710 }
2711
2712 /*
2713 * Count number of session permitted by uid/gid.
2714 */
2715 static unsigned int lttng_sessions_count(uid_t uid, gid_t gid)
2716 {
2717 unsigned int i = 0;
2718 struct ltt_session *session;
2719
2720 DBG("Counting number of available session for UID %d GID %d",
2721 uid, gid);
2722 cds_list_for_each_entry(session, &session_list_ptr->head, list) {
2723 /*
2724 * Only list the sessions the user can control.
2725 */
2726 if (!session_access_ok(session, uid, gid)) {
2727 continue;
2728 }
2729 i++;
2730 }
2731 return i;
2732 }
2733
2734 static int receive_userspace_probe(struct command_ctx *cmd_ctx, int sock,
2735 int *sock_error, struct lttng_event *event)
2736 {
2737 int fd, ret;
2738 struct lttng_userspace_probe_location *probe_location;
2739 const struct lttng_userspace_probe_location_lookup_method *lookup = NULL;
2740 struct lttng_dynamic_buffer probe_location_buffer;
2741 struct lttng_buffer_view buffer_view;
2742
2743 /*
2744 * Create a buffer to store the serialized version of the probe
2745 * location.
2746 */
2747 lttng_dynamic_buffer_init(&probe_location_buffer);
2748 ret = lttng_dynamic_buffer_set_size(&probe_location_buffer,
2749 cmd_ctx->lsm->u.enable.userspace_probe_location_len);
2750 if (ret) {
2751 ret = LTTNG_ERR_NOMEM;
2752 goto error;
2753 }
2754
2755 /*
2756 * Receive the probe location.
2757 */
2758 ret = lttcomm_recv_unix_sock(sock, probe_location_buffer.data,
2759 probe_location_buffer.size);
2760 if (ret <= 0) {
2761 DBG("Nothing recv() from client var len data... continuing");
2762 *sock_error = 1;
2763 lttng_dynamic_buffer_reset(&probe_location_buffer);
2764 ret = LTTNG_ERR_PROBE_LOCATION_INVAL;
2765 goto error;
2766 }
2767
2768 buffer_view = lttng_buffer_view_from_dynamic_buffer(
2769 &probe_location_buffer, 0, probe_location_buffer.size);
2770
2771 /*
2772 * Extract the probe location from the serialized version.
2773 */
2774 ret = lttng_userspace_probe_location_create_from_buffer(
2775 &buffer_view, &probe_location);
2776 if (ret < 0) {
2777 WARN("Failed to create a userspace probe location from the received buffer");
2778 lttng_dynamic_buffer_reset( &probe_location_buffer);
2779 ret = LTTNG_ERR_PROBE_LOCATION_INVAL;
2780 goto error;
2781 }
2782
2783 /*
2784 * Receive the file descriptor to the target binary from the client.
2785 */
2786 DBG("Receiving userspace probe target FD from client ...");
2787 ret = lttcomm_recv_fds_unix_sock(sock, &fd, 1);
2788 if (ret <= 0) {
2789 DBG("Nothing recv() from client userspace probe fd... continuing");
2790 *sock_error = 1;
2791 ret = LTTNG_ERR_PROBE_LOCATION_INVAL;
2792 goto error;
2793 }
2794
2795 /*
2796 * Set the file descriptor received from the client through the unix
2797 * socket in the probe location.
2798 */
2799 lookup = lttng_userspace_probe_location_get_lookup_method(probe_location);
2800 if (!lookup) {
2801 ret = LTTNG_ERR_PROBE_LOCATION_INVAL;
2802 goto error;
2803 }
2804
2805 /*
2806 * From the kernel tracer's perspective, all userspace probe event types
2807 * are all the same: a file and an offset.
2808 */
2809 switch (lttng_userspace_probe_location_lookup_method_get_type(lookup)) {
2810 case LTTNG_USERSPACE_PROBE_LOCATION_LOOKUP_METHOD_TYPE_FUNCTION_ELF:
2811 ret = lttng_userspace_probe_location_function_set_binary_fd(
2812 probe_location, fd);
2813 break;
2814 case LTTNG_USERSPACE_PROBE_LOCATION_LOOKUP_METHOD_TYPE_TRACEPOINT_SDT:
2815 ret = lttng_userspace_probe_location_tracepoint_set_binary_fd(
2816 probe_location, fd);
2817 break;
2818 default:
2819 ret = LTTNG_ERR_PROBE_LOCATION_INVAL;
2820 goto error;
2821 }
2822
2823 if (ret) {
2824 ret = LTTNG_ERR_PROBE_LOCATION_INVAL;
2825 goto error;
2826 }
2827
2828 /* Attach the probe location to the event. */
2829 ret = lttng_event_set_userspace_probe_location(event, probe_location);
2830 if (ret) {
2831 ret = LTTNG_ERR_PROBE_LOCATION_INVAL;
2832 goto error;
2833 }
2834
2835 lttng_dynamic_buffer_reset(&probe_location_buffer);
2836 error:
2837 return ret;
2838 }
2839
2840 /*
2841 * Check if the current kernel tracer supports the session rotation feature.
2842 * Return 1 if it does, 0 otherwise.
2843 */
2844 static int check_rotate_compatible(void)
2845 {
2846 int ret = 1;
2847
2848 if (kernel_tracer_version.major != 2 || kernel_tracer_version.minor < 11) {
2849 DBG("Kernel tracer version is not compatible with the rotation feature");
2850 ret = 0;
2851 }
2852
2853 return ret;
2854 }
2855
2856 /*
2857 * Process the command requested by the lttng client within the command
2858 * context structure. This function make sure that the return structure (llm)
2859 * is set and ready for transmission before returning.
2860 *
2861 * Return any error encountered or 0 for success.
2862 *
2863 * "sock" is only used for special-case var. len data.
2864 *
2865 * Should *NOT* be called with RCU read-side lock held.
2866 */
2867 static int process_client_msg(struct command_ctx *cmd_ctx, int sock,
2868 int *sock_error)
2869 {
2870 int ret = LTTNG_OK;
2871 int need_tracing_session = 1;
2872 int need_domain;
2873
2874 DBG("Processing client command %d", cmd_ctx->lsm->cmd_type);
2875
2876 assert(!rcu_read_ongoing());
2877
2878 *sock_error = 0;
2879
2880 switch (cmd_ctx->lsm->cmd_type) {
2881 case LTTNG_CREATE_SESSION:
2882 case LTTNG_CREATE_SESSION_SNAPSHOT:
2883 case LTTNG_CREATE_SESSION_LIVE:
2884 case LTTNG_DESTROY_SESSION:
2885 case LTTNG_LIST_SESSIONS:
2886 case LTTNG_LIST_DOMAINS:
2887 case LTTNG_START_TRACE:
2888 case LTTNG_STOP_TRACE:
2889 case LTTNG_DATA_PENDING:
2890 case LTTNG_SNAPSHOT_ADD_OUTPUT:
2891 case LTTNG_SNAPSHOT_DEL_OUTPUT:
2892 case LTTNG_SNAPSHOT_LIST_OUTPUT:
2893 case LTTNG_SNAPSHOT_RECORD:
2894 case LTTNG_SAVE_SESSION:
2895 case LTTNG_SET_SESSION_SHM_PATH:
2896 case LTTNG_REGENERATE_METADATA:
2897 case LTTNG_REGENERATE_STATEDUMP:
2898 case LTTNG_REGISTER_TRIGGER:
2899 case LTTNG_UNREGISTER_TRIGGER:
2900 case LTTNG_ROTATE_SESSION:
2901 case LTTNG_ROTATION_GET_INFO:
2902 case LTTNG_ROTATION_SET_SCHEDULE:
2903 case LTTNG_SESSION_LIST_ROTATION_SCHEDULES:
2904 need_domain = 0;
2905 break;
2906 default:
2907 need_domain = 1;
2908 }
2909
2910 if (config.no_kernel && need_domain
2911 && cmd_ctx->lsm->domain.type == LTTNG_DOMAIN_KERNEL) {
2912 if (!is_root) {
2913 ret = LTTNG_ERR_NEED_ROOT_SESSIOND;
2914 } else {
2915 ret = LTTNG_ERR_KERN_NA;
2916 }
2917 goto error;
2918 }
2919
2920 /* Deny register consumer if we already have a spawned consumer. */
2921 if (cmd_ctx->lsm->cmd_type == LTTNG_REGISTER_CONSUMER) {
2922 pthread_mutex_lock(&kconsumer_data.pid_mutex);
2923 if (kconsumer_data.pid > 0) {
2924 ret = LTTNG_ERR_KERN_CONSUMER_FAIL;
2925 pthread_mutex_unlock(&kconsumer_data.pid_mutex);
2926 goto error;
2927 }
2928 pthread_mutex_unlock(&kconsumer_data.pid_mutex);
2929 }
2930
2931 /*
2932 * Check for command that don't needs to allocate a returned payload. We do
2933 * this here so we don't have to make the call for no payload at each
2934 * command.
2935 */
2936 switch(cmd_ctx->lsm->cmd_type) {
2937 case LTTNG_LIST_SESSIONS:
2938 case LTTNG_LIST_TRACEPOINTS:
2939 case LTTNG_LIST_TRACEPOINT_FIELDS:
2940 case LTTNG_LIST_DOMAINS:
2941 case LTTNG_LIST_CHANNELS:
2942 case LTTNG_LIST_EVENTS:
2943 case LTTNG_LIST_SYSCALLS:
2944 case LTTNG_LIST_TRACKER_PIDS:
2945 case LTTNG_DATA_PENDING:
2946 case LTTNG_ROTATE_SESSION:
2947 case LTTNG_ROTATION_GET_INFO:
2948 case LTTNG_SESSION_LIST_ROTATION_SCHEDULES:
2949 break;
2950 default:
2951 /* Setup lttng message with no payload */
2952 ret = setup_lttng_msg_no_cmd_header(cmd_ctx, NULL, 0);
2953 if (ret < 0) {
2954 /* This label does not try to unlock the session */
2955 goto init_setup_error;
2956 }
2957 }
2958
2959 /* Commands that DO NOT need a session. */
2960 switch (cmd_ctx->lsm->cmd_type) {
2961 case LTTNG_CREATE_SESSION:
2962 case LTTNG_CREATE_SESSION_SNAPSHOT:
2963 case LTTNG_CREATE_SESSION_LIVE:
2964 case LTTNG_LIST_SESSIONS:
2965 case LTTNG_LIST_TRACEPOINTS:
2966 case LTTNG_LIST_SYSCALLS:
2967 case LTTNG_LIST_TRACEPOINT_FIELDS:
2968 case LTTNG_SAVE_SESSION:
2969 case LTTNG_REGISTER_TRIGGER:
2970 case LTTNG_UNREGISTER_TRIGGER:
2971 need_tracing_session = 0;
2972 break;
2973 default:
2974 DBG("Getting session %s by name", cmd_ctx->lsm->session.name);
2975 /*
2976 * We keep the session list lock across _all_ commands
2977 * for now, because the per-session lock does not
2978 * handle teardown properly.
2979 */
2980 session_lock_list();
2981 cmd_ctx->session = session_find_by_name(cmd_ctx->lsm->session.name);
2982 if (cmd_ctx->session == NULL) {
2983 ret = LTTNG_ERR_SESS_NOT_FOUND;
2984 goto error;
2985 } else {
2986 /* Acquire lock for the session */
2987 session_lock(cmd_ctx->session);
2988 }
2989 break;
2990 }
2991
2992 /*
2993 * Commands that need a valid session but should NOT create one if none
2994 * exists. Instead of creating one and destroying it when the command is
2995 * handled, process that right before so we save some round trip in useless
2996 * code path.
2997 */
2998 switch (cmd_ctx->lsm->cmd_type) {
2999 case LTTNG_DISABLE_CHANNEL:
3000 case LTTNG_DISABLE_EVENT:
3001 switch (cmd_ctx->lsm->domain.type) {
3002 case LTTNG_DOMAIN_KERNEL:
3003 if (!cmd_ctx->session->kernel_session) {
3004 ret = LTTNG_ERR_NO_CHANNEL;
3005 goto error;
3006 }
3007 break;
3008 case LTTNG_DOMAIN_JUL:
3009 case LTTNG_DOMAIN_LOG4J:
3010 case LTTNG_DOMAIN_PYTHON:
3011 case LTTNG_DOMAIN_UST:
3012 if (!cmd_ctx->session->ust_session) {
3013 ret = LTTNG_ERR_NO_CHANNEL;
3014 goto error;
3015 }
3016 break;
3017 default:
3018 ret = LTTNG_ERR_UNKNOWN_DOMAIN;
3019 goto error;
3020 }
3021 default:
3022 break;
3023 }
3024
3025 if (!need_domain) {
3026 goto skip_domain;
3027 }
3028
3029 /*
3030 * Check domain type for specific "pre-action".
3031 */
3032 switch (cmd_ctx->lsm->domain.type) {
3033 case LTTNG_DOMAIN_KERNEL:
3034 if (!is_root) {
3035 ret = LTTNG_ERR_NEED_ROOT_SESSIOND;
3036 goto error;
3037 }
3038
3039 /* Kernel tracer check */
3040 if (kernel_tracer_fd == -1) {
3041 /* Basically, load kernel tracer modules */
3042 ret = init_kernel_tracer();
3043 if (ret != 0) {
3044 goto error;
3045 }
3046 }
3047
3048 /* Consumer is in an ERROR state. Report back to client */
3049 if (uatomic_read(&kernel_consumerd_state) == CONSUMER_ERROR) {
3050 ret = LTTNG_ERR_NO_KERNCONSUMERD;
3051 goto error;
3052 }
3053
3054 /* Need a session for kernel command */
3055 if (need_tracing_session) {
3056 if (cmd_ctx->session->kernel_session == NULL) {
3057 ret = create_kernel_session(cmd_ctx->session);
3058 if (ret < 0) {
3059 ret = LTTNG_ERR_KERN_SESS_FAIL;
3060 goto error;
3061 }
3062 }
3063
3064 /* Start the kernel consumer daemon */
3065 pthread_mutex_lock(&kconsumer_data.pid_mutex);
3066 if (kconsumer_data.pid == 0 &&
3067 cmd_ctx->lsm->cmd_type != LTTNG_REGISTER_CONSUMER) {
3068 pthread_mutex_unlock(&kconsumer_data.pid_mutex);
3069 ret = start_consumerd(&kconsumer_data);
3070 if (ret < 0) {
3071 ret = LTTNG_ERR_KERN_CONSUMER_FAIL;
3072 goto error;
3073 }
3074 uatomic_set(&kernel_consumerd_state, CONSUMER_STARTED);
3075 } else {
3076 pthread_mutex_unlock(&kconsumer_data.pid_mutex);
3077 }
3078
3079 /*
3080 * The consumer was just spawned so we need to add the socket to
3081 * the consumer output of the session if exist.
3082 */
3083 ret = consumer_create_socket(&kconsumer_data,
3084 cmd_ctx->session->kernel_session->consumer);
3085 if (ret < 0) {
3086 goto error;
3087 }
3088 }
3089
3090 break;
3091 case LTTNG_DOMAIN_JUL:
3092 case LTTNG_DOMAIN_LOG4J:
3093 case LTTNG_DOMAIN_PYTHON:
3094 case LTTNG_DOMAIN_UST:
3095 {
3096 if (!ust_app_supported()) {
3097 ret = LTTNG_ERR_NO_UST;
3098 goto error;
3099 }
3100 /* Consumer is in an ERROR state. Report back to client */
3101 if (uatomic_read(&ust_consumerd_state) == CONSUMER_ERROR) {
3102 ret = LTTNG_ERR_NO_USTCONSUMERD;
3103 goto error;
3104 }
3105
3106 if (need_tracing_session) {
3107 /* Create UST session if none exist. */
3108 if (cmd_ctx->session->ust_session == NULL) {
3109 ret = create_ust_session(cmd_ctx->session,
3110 &cmd_ctx->lsm->domain);
3111 if (ret != LTTNG_OK) {
3112 goto error;
3113 }
3114 }
3115
3116 /* Start the UST consumer daemons */
3117 /* 64-bit */
3118 pthread_mutex_lock(&ustconsumer64_data.pid_mutex);
3119 if (config.consumerd64_bin_path.value &&
3120 ustconsumer64_data.pid == 0 &&
3121 cmd_ctx->lsm->cmd_type != LTTNG_REGISTER_CONSUMER) {
3122 pthread_mutex_unlock(&ustconsumer64_data.pid_mutex);
3123 ret = start_consumerd(&ustconsumer64_data);
3124 if (ret < 0) {
3125 ret = LTTNG_ERR_UST_CONSUMER64_FAIL;
3126 uatomic_set(&ust_consumerd64_fd, -EINVAL);
3127 goto error;
3128 }
3129
3130 uatomic_set(&ust_consumerd64_fd, ustconsumer64_data.cmd_sock);
3131 uatomic_set(&ust_consumerd_state, CONSUMER_STARTED);
3132 } else {
3133 pthread_mutex_unlock(&ustconsumer64_data.pid_mutex);
3134 }
3135
3136 /*
3137 * Setup socket for consumer 64 bit. No need for atomic access
3138 * since it was set above and can ONLY be set in this thread.
3139 */
3140 ret = consumer_create_socket(&ustconsumer64_data,
3141 cmd_ctx->session->ust_session->consumer);
3142 if (ret < 0) {
3143 goto error;
3144 }
3145
3146 /* 32-bit */
3147 pthread_mutex_lock(&ustconsumer32_data.pid_mutex);
3148 if (config.consumerd32_bin_path.value &&
3149 ustconsumer32_data.pid == 0 &&
3150 cmd_ctx->lsm->cmd_type != LTTNG_REGISTER_CONSUMER) {
3151 pthread_mutex_unlock(&ustconsumer32_data.pid_mutex);
3152 ret = start_consumerd(&ustconsumer32_data);
3153 if (ret < 0) {
3154 ret = LTTNG_ERR_UST_CONSUMER32_FAIL;
3155 uatomic_set(&ust_consumerd32_fd, -EINVAL);
3156 goto error;
3157 }
3158
3159 uatomic_set(&ust_consumerd32_fd, ustconsumer32_data.cmd_sock);
3160 uatomic_set(&ust_consumerd_state, CONSUMER_STARTED);
3161 } else {
3162 pthread_mutex_unlock(&ustconsumer32_data.pid_mutex);
3163 }
3164
3165 /*
3166 * Setup socket for consumer 32 bit. No need for atomic access
3167 * since it was set above and can ONLY be set in this thread.
3168 */
3169 ret = consumer_create_socket(&ustconsumer32_data,
3170 cmd_ctx->session->ust_session->consumer);
3171 if (ret < 0) {
3172 goto error;
3173 }
3174 }
3175 break;
3176 }
3177 default:
3178 break;
3179 }
3180 skip_domain:
3181
3182 /* Validate consumer daemon state when start/stop trace command */
3183 if (cmd_ctx->lsm->cmd_type == LTTNG_START_TRACE ||
3184 cmd_ctx->lsm->cmd_type == LTTNG_STOP_TRACE) {
3185 switch (cmd_ctx->lsm->domain.type) {
3186 case LTTNG_DOMAIN_NONE:
3187 break;
3188 case LTTNG_DOMAIN_JUL:
3189 case LTTNG_DOMAIN_LOG4J:
3190 case LTTNG_DOMAIN_PYTHON:
3191 case LTTNG_DOMAIN_UST:
3192 if (uatomic_read(&ust_consumerd_state) != CONSUMER_STARTED) {
3193 ret = LTTNG_ERR_NO_USTCONSUMERD;
3194 goto error;
3195 }
3196 break;
3197 case LTTNG_DOMAIN_KERNEL:
3198 if (uatomic_read(&kernel_consumerd_state) != CONSUMER_STARTED) {
3199 ret = LTTNG_ERR_NO_KERNCONSUMERD;
3200 goto error;
3201 }
3202 break;
3203 default:
3204 ret = LTTNG_ERR_UNKNOWN_DOMAIN;
3205 goto error;
3206 }
3207 }
3208
3209 /*
3210 * Check that the UID or GID match that of the tracing session.
3211 * The root user can interact with all sessions.
3212 */
3213 if (need_tracing_session) {
3214 if (!session_access_ok(cmd_ctx->session,
3215 LTTNG_SOCK_GET_UID_CRED(&cmd_ctx->creds),
3216 LTTNG_SOCK_GET_GID_CRED(&cmd_ctx->creds))) {
3217 ret = LTTNG_ERR_EPERM;
3218 goto error;
3219 }
3220 }
3221
3222 /*
3223 * Send relayd information to consumer as soon as we have a domain and a
3224 * session defined.
3225 */
3226 if (cmd_ctx->session && need_domain) {
3227 /*
3228 * Setup relayd if not done yet. If the relayd information was already
3229 * sent to the consumer, this call will gracefully return.
3230 */
3231 ret = cmd_setup_relayd(cmd_ctx->session);
3232 if (ret != LTTNG_OK) {
3233 goto error;
3234 }
3235 }
3236
3237 /* Process by command type */
3238 switch (cmd_ctx->lsm->cmd_type) {
3239 case LTTNG_ADD_CONTEXT:
3240 {
3241 /*
3242 * An LTTNG_ADD_CONTEXT command might have a supplementary
3243 * payload if the context being added is an application context.
3244 */
3245 if (cmd_ctx->lsm->u.context.ctx.ctx ==
3246 LTTNG_EVENT_CONTEXT_APP_CONTEXT) {
3247 char *provider_name = NULL, *context_name = NULL;
3248 size_t provider_name_len =
3249 cmd_ctx->lsm->u.context.provider_name_len;
3250 size_t context_name_len =
3251 cmd_ctx->lsm->u.context.context_name_len;
3252
3253 if (provider_name_len == 0 || context_name_len == 0) {
3254 /*
3255 * Application provider and context names MUST
3256 * be provided.
3257 */
3258 ret = -LTTNG_ERR_INVALID;
3259 goto error;
3260 }
3261
3262 provider_name = zmalloc(provider_name_len + 1);
3263 if (!provider_name) {
3264 ret = -LTTNG_ERR_NOMEM;
3265 goto error;
3266 }
3267 cmd_ctx->lsm->u.context.ctx.u.app_ctx.provider_name =
3268 provider_name;
3269
3270 context_name = zmalloc(context_name_len + 1);
3271 if (!context_name) {
3272 ret = -LTTNG_ERR_NOMEM;
3273 goto error_add_context;
3274 }
3275 cmd_ctx->lsm->u.context.ctx.u.app_ctx.ctx_name =
3276 context_name;
3277
3278 ret = lttcomm_recv_unix_sock(sock, provider_name,
3279 provider_name_len);
3280 if (ret < 0) {
3281 goto error_add_context;
3282 }
3283
3284 ret = lttcomm_recv_unix_sock(sock, context_name,
3285 context_name_len);
3286 if (ret < 0) {
3287 goto error_add_context;
3288 }
3289 }
3290
3291 /*
3292 * cmd_add_context assumes ownership of the provider and context
3293 * names.
3294 */
3295 ret = cmd_add_context(cmd_ctx->session,
3296 cmd_ctx->lsm->domain.type,
3297 cmd_ctx->lsm->u.context.channel_name,
3298 &cmd_ctx->lsm->u.context.ctx,
3299 kernel_poll_pipe[1]);
3300
3301 cmd_ctx->lsm->u.context.ctx.u.app_ctx.provider_name = NULL;
3302 cmd_ctx->lsm->u.context.ctx.u.app_ctx.ctx_name = NULL;
3303 error_add_context:
3304 free(cmd_ctx->lsm->u.context.ctx.u.app_ctx.provider_name);
3305 free(cmd_ctx->lsm->u.context.ctx.u.app_ctx.ctx_name);
3306 if (ret < 0) {
3307 goto error;
3308 }
3309 break;
3310 }
3311 case LTTNG_DISABLE_CHANNEL:
3312 {
3313 ret = cmd_disable_channel(cmd_ctx->session, cmd_ctx->lsm->domain.type,
3314 cmd_ctx->lsm->u.disable.channel_name);
3315 break;
3316 }
3317 case LTTNG_DISABLE_EVENT:
3318 {
3319
3320 /*
3321 * FIXME: handle filter; for now we just receive the filter's
3322 * bytecode along with the filter expression which are sent by
3323 * liblttng-ctl and discard them.
3324 *
3325 * This fixes an issue where the client may block while sending
3326 * the filter payload and encounter an error because the session
3327 * daemon closes the socket without ever handling this data.
3328 */
3329 size_t count = cmd_ctx->lsm->u.disable.expression_len +
3330 cmd_ctx->lsm->u.disable.bytecode_len;
3331
3332 if (count) {
3333 char data[LTTNG_FILTER_MAX_LEN];
3334
3335 DBG("Discarding disable event command payload of size %zu", count);
3336 while (count) {
3337 ret = lttcomm_recv_unix_sock(sock, data,
3338 count > sizeof(data) ? sizeof(data) : count);
3339 if (ret < 0) {
3340 goto error;
3341 }
3342
3343 count -= (size_t) ret;
3344 }
3345 }
3346 /* FIXME: passing packed structure to non-packed pointer */
3347 ret = cmd_disable_event(cmd_ctx->session, cmd_ctx->lsm->domain.type,
3348 cmd_ctx->lsm->u.disable.channel_name,
3349 &cmd_ctx->lsm->u.disable.event);
3350 break;
3351 }
3352 case LTTNG_ENABLE_CHANNEL:
3353 {
3354 cmd_ctx->lsm->u.channel.chan.attr.extended.ptr =
3355 (struct lttng_channel_extended *) &cmd_ctx->lsm->u.channel.extended;
3356 ret = cmd_enable_channel(cmd_ctx->session, &cmd_ctx->lsm->domain,
3357 &cmd_ctx->lsm->u.channel.chan,
3358 kernel_poll_pipe[1]);
3359 break;
3360 }
3361 case LTTNG_TRACK_PID:
3362 {
3363 ret = cmd_track_pid(cmd_ctx->session,
3364 cmd_ctx->lsm->domain.type,
3365 cmd_ctx->lsm->u.pid_tracker.pid);
3366 break;
3367 }
3368 case LTTNG_UNTRACK_PID:
3369 {
3370 ret = cmd_untrack_pid(cmd_ctx->session,
3371 cmd_ctx->lsm->domain.type,
3372 cmd_ctx->lsm->u.pid_tracker.pid);
3373 break;
3374 }
3375 case LTTNG_ENABLE_EVENT:
3376 {
3377 struct lttng_event *ev = NULL;
3378 struct lttng_event_exclusion *exclusion = NULL;
3379 struct lttng_filter_bytecode *bytecode = NULL;
3380 char *filter_expression = NULL;
3381
3382 /* Handle exclusion events and receive it from the client. */
3383 if (cmd_ctx->lsm->u.enable.exclusion_count > 0) {
3384 size_t count = cmd_ctx->lsm->u.enable.exclusion_count;
3385
3386 exclusion = zmalloc(sizeof(struct lttng_event_exclusion) +
3387 (count * LTTNG_SYMBOL_NAME_LEN));
3388 if (!exclusion) {
3389 ret = LTTNG_ERR_EXCLUSION_NOMEM;
3390 goto error;
3391 }
3392
3393 DBG("Receiving var len exclusion event list from client ...");
3394 exclusion->count = count;
3395 ret = lttcomm_recv_unix_sock(sock, exclusion->names,
3396 count * LTTNG_SYMBOL_NAME_LEN);
3397 if (ret <= 0) {
3398 DBG("Nothing recv() from client var len data... continuing");
3399 *sock_error = 1;
3400 free(exclusion);
3401 ret = LTTNG_ERR_EXCLUSION_INVAL;
3402 goto error;
3403 }
3404 }
3405
3406 /* Get filter expression from client. */
3407 if (cmd_ctx->lsm->u.enable.expression_len > 0) {
3408 size_t expression_len =
3409 cmd_ctx->lsm->u.enable.expression_len;
3410
3411 if (expression_len > LTTNG_FILTER_MAX_LEN) {
3412 ret = LTTNG_ERR_FILTER_INVAL;
3413 free(exclusion);
3414 goto error;
3415 }
3416
3417 filter_expression = zmalloc(expression_len);
3418 if (!filter_expression) {
3419 free(exclusion);
3420 ret = LTTNG_ERR_FILTER_NOMEM;
3421 goto error;
3422 }
3423
3424 /* Receive var. len. data */
3425 DBG("Receiving var len filter's expression from client ...");
3426 ret = lttcomm_recv_unix_sock(sock, filter_expression,
3427 expression_len);
3428 if (ret <= 0) {
3429 DBG("Nothing recv() from client var len data... continuing");
3430 *sock_error = 1;
3431 free(filter_expression);
3432 free(exclusion);
3433 ret = LTTNG_ERR_FILTER_INVAL;
3434 goto error;
3435 }
3436 }
3437
3438 /* Handle filter and get bytecode from client. */
3439 if (cmd_ctx->lsm->u.enable.bytecode_len > 0) {
3440 size_t bytecode_len = cmd_ctx->lsm->u.enable.bytecode_len;
3441
3442 if (bytecode_len > LTTNG_FILTER_MAX_LEN) {
3443 ret = LTTNG_ERR_FILTER_INVAL;
3444 free(filter_expression);
3445 free(exclusion);
3446 goto error;
3447 }
3448
3449 bytecode = zmalloc(bytecode_len);
3450 if (!bytecode) {
3451 free(filter_expression);
3452 free(exclusion);
3453 ret = LTTNG_ERR_FILTER_NOMEM;
3454 goto error;
3455 }
3456
3457 /* Receive var. len. data */
3458 DBG("Receiving var len filter's bytecode from client ...");
3459 ret = lttcomm_recv_unix_sock(sock, bytecode, bytecode_len);
3460 if (ret <= 0) {
3461 DBG("Nothing recv() from client var len data... continuing");
3462 *sock_error = 1;
3463 free(filter_expression);
3464 free(bytecode);
3465 free(exclusion);
3466 ret = LTTNG_ERR_FILTER_INVAL;
3467 goto error;
3468 }
3469
3470 if ((bytecode->len + sizeof(*bytecode)) != bytecode_len) {
3471 free(filter_expression);
3472 free(bytecode);
3473 free(exclusion);
3474 ret = LTTNG_ERR_FILTER_INVAL;
3475 goto error;
3476 }
3477 }
3478
3479 ev = lttng_event_copy(&cmd_ctx->lsm->u.enable.event);
3480 if (!ev) {
3481 DBG("Failed to copy event: %s",
3482 cmd_ctx->lsm->u.enable.event.name);
3483 free(filter_expression);
3484 free(bytecode);
3485 free(exclusion);
3486 ret = LTTNG_ERR_NOMEM;
3487 goto error;
3488 }
3489
3490
3491 if (cmd_ctx->lsm->u.enable.userspace_probe_location_len > 0) {
3492 /* Expect a userspace probe description. */
3493 ret = receive_userspace_probe(cmd_ctx, sock, sock_error, ev);
3494 if (ret) {
3495 free(filter_expression);
3496 free(bytecode);
3497 free(exclusion);
3498 lttng_event_destroy(ev);
3499 goto error;
3500 }
3501 }
3502
3503 ret = cmd_enable_event(cmd_ctx->session, &cmd_ctx->lsm->domain,
3504 cmd_ctx->lsm->u.enable.channel_name,
3505 ev,
3506 filter_expression, bytecode, exclusion,
3507 kernel_poll_pipe[1]);
3508 lttng_event_destroy(ev);
3509 break;
3510 }
3511 case LTTNG_LIST_TRACEPOINTS:
3512 {
3513 struct lttng_event *events;
3514 ssize_t nb_events;
3515
3516 session_lock_list();
3517 nb_events = cmd_list_tracepoints(cmd_ctx->lsm->domain.type, &events);
3518 session_unlock_list();
3519 if (nb_events < 0) {
3520 /* Return value is a negative lttng_error_code. */
3521 ret = -nb_events;
3522 goto error;
3523 }
3524
3525 /*
3526 * Setup lttng message with payload size set to the event list size in
3527 * bytes and then copy list into the llm payload.
3528 */
3529 ret = setup_lttng_msg_no_cmd_header(cmd_ctx, events,
3530 sizeof(struct lttng_event) * nb_events);
3531 free(events);
3532
3533 if (ret < 0) {
3534 goto setup_error;
3535 }
3536
3537 ret = LTTNG_OK;
3538 break;
3539 }
3540 case LTTNG_LIST_TRACEPOINT_FIELDS:
3541 {
3542 struct lttng_event_field *fields;
3543 ssize_t nb_fields;
3544
3545 session_lock_list();
3546 nb_fields = cmd_list_tracepoint_fields(cmd_ctx->lsm->domain.type,
3547 &fields);
3548 session_unlock_list();
3549 if (nb_fields < 0) {
3550 /* Return value is a negative lttng_error_code. */
3551 ret = -nb_fields;
3552 goto error;
3553 }
3554
3555 /*
3556 * Setup lttng message with payload size set to the event list size in
3557 * bytes and then copy list into the llm payload.
3558 */
3559 ret = setup_lttng_msg_no_cmd_header(cmd_ctx, fields,
3560 sizeof(struct lttng_event_field) * nb_fields);
3561 free(fields);
3562
3563 if (ret < 0) {
3564 goto setup_error;
3565 }
3566
3567 ret = LTTNG_OK;
3568 break;
3569 }
3570 case LTTNG_LIST_SYSCALLS:
3571 {
3572 struct lttng_event *events;
3573 ssize_t nb_events;
3574
3575 nb_events = cmd_list_syscalls(&events);
3576 if (nb_events < 0) {
3577 /* Return value is a negative lttng_error_code. */
3578 ret = -nb_events;
3579 goto error;
3580 }
3581
3582 /*
3583 * Setup lttng message with payload size set to the event list size in
3584 * bytes and then copy list into the llm payload.
3585 */
3586 ret = setup_lttng_msg_no_cmd_header(cmd_ctx, events,
3587 sizeof(struct lttng_event) * nb_events);
3588 free(events);
3589
3590 if (ret < 0) {
3591 goto setup_error;
3592 }
3593
3594 ret = LTTNG_OK;
3595 break;
3596 }
3597 case LTTNG_LIST_TRACKER_PIDS:
3598 {
3599 int32_t *pids = NULL;
3600 ssize_t nr_pids;
3601
3602 nr_pids = cmd_list_tracker_pids(cmd_ctx->session,
3603 cmd_ctx->lsm->domain.type, &pids);
3604 if (nr_pids < 0) {
3605 /* Return value is a negative lttng_error_code. */
3606 ret = -nr_pids;
3607 goto error;
3608 }
3609
3610 /*
3611 * Setup lttng message with payload size set to the event list size in
3612 * bytes and then copy list into the llm payload.
3613 */
3614 ret = setup_lttng_msg_no_cmd_header(cmd_ctx, pids,
3615 sizeof(int32_t) * nr_pids);
3616 free(pids);
3617
3618 if (ret < 0) {
3619 goto setup_error;
3620 }
3621
3622 ret = LTTNG_OK;
3623 break;
3624 }
3625 case LTTNG_SET_CONSUMER_URI:
3626 {
3627 size_t nb_uri, len;
3628 struct lttng_uri *uris;
3629
3630 nb_uri = cmd_ctx->lsm->u.uri.size;
3631 len = nb_uri * sizeof(struct lttng_uri);
3632
3633 if (nb_uri == 0) {
3634 ret = LTTNG_ERR_INVALID;
3635 goto error;
3636 }
3637
3638 uris = zmalloc(len);
3639 if (uris == NULL) {
3640 ret = LTTNG_ERR_FATAL;
3641 goto error;
3642 }
3643
3644 /* Receive variable len data */
3645 DBG("Receiving %zu URI(s) from client ...", nb_uri);
3646 ret = lttcomm_recv_unix_sock(sock, uris, len);
3647 if (ret <= 0) {
3648 DBG("No URIs received from client... continuing");
3649 *sock_error = 1;
3650 ret = LTTNG_ERR_SESSION_FAIL;
3651 free(uris);
3652 goto error;
3653 }
3654
3655 ret = cmd_set_consumer_uri(cmd_ctx->session, nb_uri, uris);
3656 free(uris);
3657 if (ret != LTTNG_OK) {
3658 goto error;
3659 }
3660
3661
3662 break;
3663 }
3664 case LTTNG_START_TRACE:
3665 {
3666 /*
3667 * On the first start, if we have a kernel session and we have
3668 * enabled time or size-based rotations, we have to make sure
3669 * the kernel tracer supports it.
3670 */
3671 if (!cmd_ctx->session->has_been_started && \
3672 cmd_ctx->session->kernel_session && \
3673 (cmd_ctx->session->rotate_timer_period || \
3674 cmd_ctx->session->rotate_size) && \
3675 !check_rotate_compatible()) {
3676 DBG("Kernel tracer version is not compatible with the rotation feature");
3677 ret = LTTNG_ERR_ROTATION_WRONG_VERSION;
3678 goto error;
3679 }
3680 ret = cmd_start_trace(cmd_ctx->session);
3681 break;
3682 }
3683 case LTTNG_STOP_TRACE:
3684 {
3685 ret = cmd_stop_trace(cmd_ctx->session);
3686 break;
3687 }
3688 case LTTNG_CREATE_SESSION:
3689 {
3690 size_t nb_uri, len;
3691 struct lttng_uri *uris = NULL;
3692
3693 nb_uri = cmd_ctx->lsm->u.uri.size;
3694 len = nb_uri * sizeof(struct lttng_uri);
3695
3696 if (nb_uri > 0) {
3697 uris = zmalloc(len);
3698 if (uris == NULL) {
3699 ret = LTTNG_ERR_FATAL;
3700 goto error;
3701 }
3702
3703 /* Receive variable len data */
3704 DBG("Waiting for %zu URIs from client ...", nb_uri);
3705 ret = lttcomm_recv_unix_sock(sock, uris, len);
3706 if (ret <= 0) {
3707 DBG("No URIs received from client... continuing");
3708 *sock_error = 1;
3709 ret = LTTNG_ERR_SESSION_FAIL;
3710 free(uris);
3711 goto error;
3712 }
3713
3714 if (nb_uri == 1 && uris[0].dtype != LTTNG_DST_PATH) {
3715 DBG("Creating session with ONE network URI is a bad call");
3716 ret = LTTNG_ERR_SESSION_FAIL;
3717 free(uris);
3718 goto error;
3719 }
3720 }
3721
3722 ret = cmd_create_session_uri(cmd_ctx->lsm->session.name, uris, nb_uri,
3723 &cmd_ctx->creds, 0);
3724
3725 free(uris);
3726
3727 break;
3728 }
3729 case LTTNG_DESTROY_SESSION:
3730 {
3731 ret = cmd_destroy_session(cmd_ctx->session, kernel_poll_pipe[1],
3732 notification_thread_handle);
3733
3734 /* Set session to NULL so we do not unlock it after free. */
3735 cmd_ctx->session = NULL;
3736 break;
3737 }
3738 case LTTNG_LIST_DOMAINS:
3739 {
3740 ssize_t nb_dom;
3741 struct lttng_domain *domains = NULL;
3742
3743 nb_dom = cmd_list_domains(cmd_ctx->session, &domains);
3744 if (nb_dom < 0) {
3745 /* Return value is a negative lttng_error_code. */
3746 ret = -nb_dom;
3747 goto error;
3748 }
3749
3750 ret = setup_lttng_msg_no_cmd_header(cmd_ctx, domains,
3751 nb_dom * sizeof(struct lttng_domain));
3752 free(domains);
3753
3754 if (ret < 0) {
3755 goto setup_error;
3756 }
3757
3758 ret = LTTNG_OK;
3759 break;
3760 }
3761 case LTTNG_LIST_CHANNELS:
3762 {
3763 ssize_t payload_size;
3764 struct lttng_channel *channels = NULL;
3765
3766 payload_size = cmd_list_channels(cmd_ctx->lsm->domain.type,
3767 cmd_ctx->session, &channels);
3768 if (payload_size < 0) {
3769 /* Return value is a negative lttng_error_code. */
3770 ret = -payload_size;
3771 goto error;
3772 }
3773
3774 ret = setup_lttng_msg_no_cmd_header(cmd_ctx, channels,
3775 payload_size);
3776 free(channels);
3777
3778 if (ret < 0) {
3779 goto setup_error;
3780 }
3781
3782 ret = LTTNG_OK;
3783 break;
3784 }
3785 case LTTNG_LIST_EVENTS:
3786 {
3787 ssize_t nb_event;
3788 struct lttng_event *events = NULL;
3789 struct lttcomm_event_command_header cmd_header;
3790 size_t total_size;
3791
3792 memset(&cmd_header, 0, sizeof(cmd_header));
3793 /* Extended infos are included at the end of events */
3794 nb_event = cmd_list_events(cmd_ctx->lsm->domain.type,
3795 cmd_ctx->session, cmd_ctx->lsm->u.list.channel_name,
3796 &events, &total_size);
3797
3798 if (nb_event < 0) {
3799 /* Return value is a negative lttng_error_code. */
3800 ret = -nb_event;
3801 goto error;
3802 }
3803
3804 cmd_header.nb_events = nb_event;
3805 ret = setup_lttng_msg(cmd_ctx, events, total_size,
3806 &cmd_header, sizeof(cmd_header));
3807 free(events);
3808
3809 if (ret < 0) {
3810 goto setup_error;
3811 }
3812
3813 ret = LTTNG_OK;
3814 break;
3815 }
3816 case LTTNG_LIST_SESSIONS:
3817 {
3818 unsigned int nr_sessions;
3819 void *sessions_payload;
3820 size_t payload_len;
3821
3822 session_lock_list();
3823 nr_sessions = lttng_sessions_count(
3824 LTTNG_SOCK_GET_UID_CRED(&cmd_ctx->creds),
3825 LTTNG_SOCK_GET_GID_CRED(&cmd_ctx->creds));
3826 payload_len = sizeof(struct lttng_session) * nr_sessions;
3827 sessions_payload = zmalloc(payload_len);
3828
3829 if (!sessions_payload) {
3830 session_unlock_list();
3831 ret = -ENOMEM;
3832 goto setup_error;
3833 }
3834
3835 cmd_list_lttng_sessions(sessions_payload,
3836 LTTNG_SOCK_GET_UID_CRED(&cmd_ctx->creds),
3837 LTTNG_SOCK_GET_GID_CRED(&cmd_ctx->creds));
3838 session_unlock_list();
3839
3840 ret = setup_lttng_msg_no_cmd_header(cmd_ctx, sessions_payload,
3841 payload_len);
3842 free(sessions_payload);
3843
3844 if (ret < 0) {
3845 goto setup_error;
3846 }
3847
3848 ret = LTTNG_OK;
3849 break;
3850 }
3851 case LTTNG_REGISTER_CONSUMER:
3852 {
3853 struct consumer_data *cdata;
3854
3855 switch (cmd_ctx->lsm->domain.type) {
3856 case LTTNG_DOMAIN_KERNEL:
3857 cdata = &kconsumer_data;
3858 break;
3859 default:
3860 ret = LTTNG_ERR_UND;
3861 goto error;
3862 }
3863
3864 ret = cmd_register_consumer(cmd_ctx->session, cmd_ctx->lsm->domain.type,
3865 cmd_ctx->lsm->u.reg.path, cdata);
3866 break;
3867 }
3868 case LTTNG_DATA_PENDING:
3869 {
3870 int pending_ret;
3871 uint8_t pending_ret_byte;
3872
3873 pending_ret = cmd_data_pending(cmd_ctx->session);
3874
3875 /*
3876 * FIXME
3877 *
3878 * This function may returns 0 or 1 to indicate whether or not
3879 * there is data pending. In case of error, it should return an
3880 * LTTNG_ERR code. However, some code paths may still return
3881 * a nondescript error code, which we handle by returning an
3882 * "unknown" error.
3883 */
3884 if (pending_ret == 0 || pending_ret == 1) {
3885 /*
3886 * ret will be set to LTTNG_OK at the end of
3887 * this function.
3888 */
3889 } else if (pending_ret < 0) {
3890 ret = LTTNG_ERR_UNK;
3891 goto setup_error;
3892 } else {
3893 ret = pending_ret;
3894 goto setup_error;
3895 }
3896
3897 pending_ret_byte = (uint8_t) pending_ret;
3898
3899 /* 1 byte to return whether or not data is pending */
3900 ret = setup_lttng_msg_no_cmd_header(cmd_ctx,
3901 &pending_ret_byte, 1);
3902
3903 if (ret < 0) {
3904 goto setup_error;
3905 }
3906
3907 ret = LTTNG_OK;
3908 break;
3909 }
3910 case LTTNG_SNAPSHOT_ADD_OUTPUT:
3911 {
3912 struct lttcomm_lttng_output_id reply;
3913
3914 ret = cmd_snapshot_add_output(cmd_ctx->session,
3915 &cmd_ctx->lsm->u.snapshot_output.output, &reply.id);
3916 if (ret != LTTNG_OK) {
3917 goto error;
3918 }
3919
3920 ret = setup_lttng_msg_no_cmd_header(cmd_ctx, &reply,
3921 sizeof(reply));
3922 if (ret < 0) {
3923 goto setup_error;
3924 }
3925
3926 /* Copy output list into message payload */
3927 ret = LTTNG_OK;
3928 break;
3929 }
3930 case LTTNG_SNAPSHOT_DEL_OUTPUT:
3931 {
3932 ret = cmd_snapshot_del_output(cmd_ctx->session,
3933 &cmd_ctx->lsm->u.snapshot_output.output);
3934 break;
3935 }
3936 case LTTNG_SNAPSHOT_LIST_OUTPUT:
3937 {
3938 ssize_t nb_output;
3939 struct lttng_snapshot_output *outputs = NULL;
3940
3941 nb_output = cmd_snapshot_list_outputs(cmd_ctx->session, &outputs);
3942 if (nb_output < 0) {
3943 ret = -nb_output;
3944 goto error;
3945 }
3946
3947 assert((nb_output > 0 && outputs) || nb_output == 0);
3948 ret = setup_lttng_msg_no_cmd_header(cmd_ctx, outputs,
3949 nb_output * sizeof(struct lttng_snapshot_output));
3950 free(outputs);
3951
3952 if (ret < 0) {
3953 goto setup_error;
3954 }
3955
3956 ret = LTTNG_OK;
3957 break;
3958 }
3959 case LTTNG_SNAPSHOT_RECORD:
3960 {
3961 ret = cmd_snapshot_record(cmd_ctx->session,
3962 &cmd_ctx->lsm->u.snapshot_record.output,
3963 cmd_ctx->lsm->u.snapshot_record.wait);
3964 break;
3965 }
3966 case LTTNG_CREATE_SESSION_SNAPSHOT:
3967 {
3968 size_t nb_uri, len;
3969 struct lttng_uri *uris = NULL;
3970
3971 nb_uri = cmd_ctx->lsm->u.uri.size;
3972 len = nb_uri * sizeof(struct lttng_uri);
3973
3974 if (nb_uri > 0) {
3975 uris = zmalloc(len);
3976 if (uris == NULL) {
3977 ret = LTTNG_ERR_FATAL;
3978 goto error;
3979 }
3980
3981 /* Receive variable len data */
3982 DBG("Waiting for %zu URIs from client ...", nb_uri);
3983 ret = lttcomm_recv_unix_sock(sock, uris, len);
3984 if (ret <= 0) {
3985 DBG("No URIs received from client... continuing");
3986 *sock_error = 1;
3987 ret = LTTNG_ERR_SESSION_FAIL;
3988 free(uris);
3989 goto error;
3990 }
3991
3992 if (nb_uri == 1 && uris[0].dtype != LTTNG_DST_PATH) {
3993 DBG("Creating session with ONE network URI is a bad call");
3994 ret = LTTNG_ERR_SESSION_FAIL;
3995 free(uris);
3996 goto error;
3997 }
3998 }
3999
4000 ret = cmd_create_session_snapshot(cmd_ctx->lsm->session.name, uris,
4001 nb_uri, &cmd_ctx->creds);
4002 free(uris);
4003 break;
4004 }
4005 case LTTNG_CREATE_SESSION_LIVE:
4006 {
4007 size_t nb_uri, len;
4008 struct lttng_uri *uris = NULL;
4009
4010 nb_uri = cmd_ctx->lsm->u.uri.size;
4011 len = nb_uri * sizeof(struct lttng_uri);
4012
4013 if (nb_uri > 0) {
4014 uris = zmalloc(len);
4015 if (uris == NULL) {
4016 ret = LTTNG_ERR_FATAL;
4017 goto error;
4018 }
4019
4020 /* Receive variable len data */
4021 DBG("Waiting for %zu URIs from client ...", nb_uri);
4022 ret = lttcomm_recv_unix_sock(sock, uris, len);
4023 if (ret <= 0) {
4024 DBG("No URIs received from client... continuing");
4025 *sock_error = 1;
4026 ret = LTTNG_ERR_SESSION_FAIL;
4027 free(uris);
4028 goto error;
4029 }
4030
4031 if (nb_uri == 1 && uris[0].dtype != LTTNG_DST_PATH) {
4032 DBG("Creating session with ONE network URI is a bad call");
4033 ret = LTTNG_ERR_SESSION_FAIL;
4034 free(uris);
4035 goto error;
4036 }
4037 }
4038
4039 ret = cmd_create_session_uri(cmd_ctx->lsm->session.name, uris,
4040 nb_uri, &cmd_ctx->creds, cmd_ctx->lsm->u.session_live.timer_interval);
4041 free(uris);
4042 break;
4043 }
4044 case LTTNG_SAVE_SESSION:
4045 {
4046 ret = cmd_save_sessions(&cmd_ctx->lsm->u.save_session.attr,
4047 &cmd_ctx->creds);
4048 break;
4049 }
4050 case LTTNG_SET_SESSION_SHM_PATH:
4051 {
4052 ret = cmd_set_session_shm_path(cmd_ctx->session,
4053 cmd_ctx->lsm->u.set_shm_path.shm_path);
4054 break;
4055 }
4056 case LTTNG_REGENERATE_METADATA:
4057 {
4058 ret = cmd_regenerate_metadata(cmd_ctx->session);
4059 break;
4060 }
4061 case LTTNG_REGENERATE_STATEDUMP:
4062 {
4063 ret = cmd_regenerate_statedump(cmd_ctx->session);
4064 break;
4065 }
4066 case LTTNG_REGISTER_TRIGGER:
4067 {
4068 ret = cmd_register_trigger(cmd_ctx, sock,
4069 notification_thread_handle);
4070 break;
4071 }
4072 case LTTNG_UNREGISTER_TRIGGER:
4073 {
4074 ret = cmd_unregister_trigger(cmd_ctx, sock,
4075 notification_thread_handle);
4076 break;
4077 }
4078 case LTTNG_ROTATE_SESSION:
4079 {
4080 struct lttng_rotate_session_return rotate_return;
4081
4082 DBG("Client rotate session \"%s\"", cmd_ctx->session->name);
4083
4084 memset(&rotate_return, 0, sizeof(rotate_return));
4085 if (cmd_ctx->session->kernel_session && !check_rotate_compatible()) {
4086 DBG("Kernel tracer version is not compatible with the rotation feature");
4087 ret = LTTNG_ERR_ROTATION_WRONG_VERSION;
4088 goto error;
4089 }
4090
4091 ret = cmd_rotate_session(cmd_ctx->session, &rotate_return);
4092 if (ret < 0) {
4093 ret = -ret;
4094 goto error;
4095 }
4096
4097 ret = setup_lttng_msg_no_cmd_header(cmd_ctx, &rotate_return,
4098 sizeof(rotate_return));
4099 if (ret < 0) {
4100 ret = -ret;
4101 goto error;
4102 }
4103
4104 ret = LTTNG_OK;
4105 break;
4106 }
4107 case LTTNG_ROTATION_GET_INFO:
4108 {
4109 struct lttng_rotation_get_info_return get_info_return;
4110
4111 memset(&get_info_return, 0, sizeof(get_info_return));
4112 ret = cmd_rotate_get_info(cmd_ctx->session, &get_info_return,
4113 cmd_ctx->lsm->u.get_rotation_info.rotation_id);
4114 if (ret < 0) {
4115 ret = -ret;
4116 goto error;
4117 }
4118
4119 ret = setup_lttng_msg_no_cmd_header(cmd_ctx, &get_info_return,
4120 sizeof(get_info_return));
4121 if (ret < 0) {
4122 ret = -ret;
4123 goto error;
4124 }
4125
4126 ret = LTTNG_OK;
4127 break;
4128 }
4129 case LTTNG_ROTATION_SET_SCHEDULE:
4130 {
4131 bool set_schedule;
4132 enum lttng_rotation_schedule_type schedule_type;
4133 uint64_t value;
4134
4135 if (cmd_ctx->session->kernel_session && !check_rotate_compatible()) {
4136 DBG("Kernel tracer version does not support session rotations");
4137 ret = LTTNG_ERR_ROTATION_WRONG_VERSION;
4138 goto error;
4139 }
4140
4141 set_schedule = cmd_ctx->lsm->u.rotation_set_schedule.set == 1;
4142 schedule_type = (enum lttng_rotation_schedule_type) cmd_ctx->lsm->u.rotation_set_schedule.type;
4143 value = cmd_ctx->lsm->u.rotation_set_schedule.value;
4144
4145 ret = cmd_rotation_set_schedule(cmd_ctx->session,
4146 set_schedule,
4147 schedule_type,
4148 value,
4149 notification_thread_handle);
4150 if (ret != LTTNG_OK) {
4151 goto error;
4152 }
4153
4154 break;
4155 }
4156 case LTTNG_SESSION_LIST_ROTATION_SCHEDULES:
4157 {
4158 struct lttng_session_list_schedules_return schedules = {
4159 .periodic.set = !!cmd_ctx->session->rotate_timer_period,
4160 .periodic.value = cmd_ctx->session->rotate_timer_period,
4161 .size.set = !!cmd_ctx->session->rotate_size,
4162 .size.value = cmd_ctx->session->rotate_size,
4163 };
4164
4165 ret = setup_lttng_msg_no_cmd_header(cmd_ctx, &schedules,
4166 sizeof(schedules));
4167 if (ret < 0) {
4168 ret = -ret;
4169 goto error;
4170 }
4171
4172 ret = LTTNG_OK;
4173 break;
4174 }
4175 default:
4176 ret = LTTNG_ERR_UND;
4177 break;
4178 }
4179
4180 error:
4181 if (cmd_ctx->llm == NULL) {
4182 DBG("Missing llm structure. Allocating one.");
4183 if (setup_lttng_msg_no_cmd_header(cmd_ctx, NULL, 0) < 0) {
4184 goto setup_error;
4185 }
4186 }
4187 /* Set return code */
4188 cmd_ctx->llm->ret_code = ret;
4189 setup_error:
4190 if (cmd_ctx->session) {
4191 session_unlock(cmd_ctx->session);
4192 }
4193 if (need_tracing_session) {
4194 session_unlock_list();
4195 }
4196 init_setup_error:
4197 assert(!rcu_read_ongoing());
4198 return ret;
4199 }
4200
4201 /*
4202 * Thread managing health check socket.
4203 */
4204 static void *thread_manage_health(void *data)
4205 {
4206 int sock = -1, new_sock = -1, ret, i, pollfd, err = -1;
4207 uint32_t revents, nb_fd;
4208 struct lttng_poll_event events;
4209 struct health_comm_msg msg;
4210 struct health_comm_reply reply;
4211
4212 DBG("[thread] Manage health check started");
4213
4214 rcu_register_thread();
4215
4216 /* We might hit an error path before this is created. */
4217 lttng_poll_init(&events);
4218
4219 /* Create unix socket */
4220 sock = lttcomm_create_unix_sock(config.health_unix_sock_path.value);
4221 if (sock < 0) {
4222 ERR("Unable to create health check Unix socket");
4223 goto error;
4224 }
4225
4226 if (is_root) {
4227 /* lttng health client socket path permissions */
4228 ret = chown(config.health_unix_sock_path.value, 0,
4229 utils_get_group_id(config.tracing_group_name.value));
4230 if (ret < 0) {
4231 ERR("Unable to set group on %s", config.health_unix_sock_path.value);
4232 PERROR("chown");
4233 goto error;
4234 }
4235
4236 ret = chmod(config.health_unix_sock_path.value,
4237 S_IRUSR | S_IWUSR | S_IRGRP | S_IWGRP);
4238 if (ret < 0) {
4239 ERR("Unable to set permissions on %s", config.health_unix_sock_path.value);
4240 PERROR("chmod");
4241 goto error;
4242 }
4243 }
4244
4245 /*
4246 * Set the CLOEXEC flag. Return code is useless because either way, the
4247 * show must go on.
4248 */
4249 (void) utils_set_fd_cloexec(sock);
4250
4251 ret = lttcomm_listen_unix_sock(sock);
4252 if (ret < 0) {
4253 goto error;
4254 }
4255
4256 /*
4257 * Pass 2 as size here for the thread quit pipe and client_sock. Nothing
4258 * more will be added to this poll set.
4259 */
4260 ret = sessiond_set_thread_pollset(&events, 2);
4261 if (ret < 0) {
4262 goto error;
4263 }
4264
4265 /* Add the application registration socket */
4266 ret = lttng_poll_add(&events, sock, LPOLLIN | LPOLLPRI);
4267 if (ret < 0) {
4268 goto error;
4269 }
4270
4271 sessiond_notify_ready();
4272
4273 while (1) {
4274 DBG("Health check ready");
4275
4276 /* Inifinite blocking call, waiting for transmission */
4277 restart:
4278 ret = lttng_poll_wait(&events, -1);
4279 if (ret < 0) {
4280 /*
4281 * Restart interrupted system call.
4282 */
4283 if (errno == EINTR) {
4284 goto restart;
4285 }
4286 goto error;
4287 }
4288
4289 nb_fd = ret;
4290
4291 for (i = 0; i < nb_fd; i++) {
4292 /* Fetch once the poll data */
4293 revents = LTTNG_POLL_GETEV(&events, i);
4294 pollfd = LTTNG_POLL_GETFD(&events, i);
4295
4296 if (!revents) {
4297 /* No activity for this FD (poll implementation). */
4298 continue;
4299 }
4300
4301 /* Thread quit pipe has been closed. Killing thread. */
4302 ret = sessiond_check_thread_quit_pipe(pollfd, revents);
4303 if (ret) {
4304 err = 0;
4305 goto exit;
4306 }
4307
4308 /* Event on the registration socket */
4309 if (pollfd == sock) {
4310 if (revents & LPOLLIN) {
4311 continue;
4312 } else if (revents & (LPOLLERR | LPOLLHUP | LPOLLRDHUP)) {
4313 ERR("Health socket poll error");
4314 goto error;
4315 } else {
4316 ERR("Unexpected poll events %u for sock %d", revents, pollfd);
4317 goto error;
4318 }
4319 }
4320 }
4321
4322 new_sock = lttcomm_accept_unix_sock(sock);
4323 if (new_sock < 0) {
4324 goto error;
4325 }
4326
4327 /*
4328 * Set the CLOEXEC flag. Return code is useless because either way, the
4329 * show must go on.
4330 */
4331 (void) utils_set_fd_cloexec(new_sock);
4332
4333 DBG("Receiving data from client for health...");
4334 ret = lttcomm_recv_unix_sock(new_sock, (void *)&msg, sizeof(msg));
4335 if (ret <= 0) {
4336 DBG("Nothing recv() from client... continuing");
4337 ret = close(new_sock);
4338 if (ret) {
4339 PERROR("close");
4340 }
4341 continue;
4342 }
4343
4344 rcu_thread_online();
4345
4346 memset(&reply, 0, sizeof(reply));
4347 for (i = 0; i < NR_HEALTH_SESSIOND_TYPES; i++) {
4348 /*
4349 * health_check_state returns 0 if health is
4350 * bad.
4351 */
4352 if (!health_check_state(health_sessiond, i)) {
4353 reply.ret_code |= 1ULL << i;
4354 }
4355 }
4356
4357 DBG2("Health check return value %" PRIx64, reply.ret_code);
4358
4359 ret = send_unix_sock(new_sock, (void *) &reply, sizeof(reply));
4360 if (ret < 0) {
4361 ERR("Failed to send health data back to client");
4362 }
4363
4364 /* End of transmission */
4365 ret = close(new_sock);
4366 if (ret) {
4367 PERROR("close");
4368 }
4369 }
4370
4371 exit:
4372 error:
4373 if (err) {
4374 ERR("Health error occurred in %s", __func__);
4375 }
4376 DBG("Health check thread dying");
4377 unlink(config.health_unix_sock_path.value);
4378 if (sock >= 0) {
4379 ret = close(sock);
4380 if (ret) {
4381 PERROR("close");
4382 }
4383 }
4384
4385 lttng_poll_clean(&events);
4386 stop_threads();
4387 rcu_unregister_thread();
4388 return NULL;
4389 }
4390
4391 /*
4392 * This thread manage all clients request using the unix client socket for
4393 * communication.
4394 */
4395 static void *thread_manage_clients(void *data)
4396 {
4397 int sock = -1, ret, i, pollfd, err = -1;
4398 int sock_error;
4399 uint32_t revents, nb_fd;
4400 struct command_ctx *cmd_ctx = NULL;
4401 struct lttng_poll_event events;
4402
4403 DBG("[thread] Manage client started");
4404
4405 rcu_register_thread();
4406
4407 health_register(health_sessiond, HEALTH_SESSIOND_TYPE_CMD);
4408
4409 health_code_update();
4410
4411 ret = lttcomm_listen_unix_sock(client_sock);
4412 if (ret < 0) {
4413 goto error_listen;
4414 }
4415
4416 /*
4417 * Pass 2 as size here for the thread quit pipe and client_sock. Nothing
4418 * more will be added to this poll set.
4419 */
4420 ret = sessiond_set_thread_pollset(&events, 2);
4421 if (ret < 0) {
4422 goto error_create_poll;
4423 }
4424
4425 /* Add the application registration socket */
4426 ret = lttng_poll_add(&events, client_sock, LPOLLIN | LPOLLPRI);
4427 if (ret < 0) {
4428 goto error;
4429 }
4430
4431 ret = sem_post(&load_info->message_thread_ready);
4432 if (ret) {
4433 PERROR("sem_post message_thread_ready");
4434 goto error;
4435 }
4436
4437 /*
4438 * Wait until all support threads are initialized before accepting
4439 * commands.
4440 */
4441 while (uatomic_read(&lttng_sessiond_ready) != 0) {
4442 /*
4443 * If a support thread failed to launch, it may signal that
4444 * we must exit and the sessiond would never be marked as
4445 * "ready".
4446 *
4447 * The timeout is set to 1ms, which serves as a way to
4448 * pace down this check.
4449 */
4450 ret = sessiond_wait_for_quit_pipe(1000);
4451 if (ret > 0) {
4452 goto exit;
4453 }
4454 }
4455 /*
4456 * This barrier is paired with the one in sessiond_notify_ready() to
4457 * ensure that loads accessing data initialized by the other threads,
4458 * on which this thread was waiting, are not performed before this point.
4459 *
4460 * Note that this could be a 'read' memory barrier, but a full barrier
4461 * is used in case the code changes. The performance implications of
4462 * this choice are minimal since this is a slow path.
4463 */
4464 cmm_smp_mb();
4465
4466 /* This testpoint is after we signal readiness to the parent. */
4467 if (testpoint(sessiond_thread_manage_clients)) {
4468 goto error;
4469 }
4470
4471 if (testpoint(sessiond_thread_manage_clients_before_loop)) {
4472 goto error;
4473 }
4474
4475 health_code_update();
4476
4477 while (1) {
4478 const struct cmd_completion_handler *cmd_completion_handler;
4479
4480 DBG("Accepting client command ...");
4481
4482 /* Inifinite blocking call, waiting for transmission */
4483 restart:
4484 health_poll_entry();
4485 ret = lttng_poll_wait(&events, -1);
4486 health_poll_exit();
4487 if (ret < 0) {
4488 /*
4489 * Restart interrupted system call.
4490 */
4491 if (errno == EINTR) {
4492 goto restart;
4493 }
4494 goto error;
4495 }
4496
4497 nb_fd = ret;
4498
4499 for (i = 0; i < nb_fd; i++) {
4500 /* Fetch once the poll data */
4501 revents = LTTNG_POLL_GETEV(&events, i);
4502 pollfd = LTTNG_POLL_GETFD(&events, i);
4503
4504 health_code_update();
4505
4506 if (!revents) {
4507 /* No activity for this FD (poll implementation). */
4508 continue;
4509 }
4510
4511 /* Thread quit pipe has been closed. Killing thread. */
4512 ret = sessiond_check_thread_quit_pipe(pollfd, revents);
4513 if (ret) {
4514 err = 0;
4515 goto exit;
4516 }
4517
4518 /* Event on the registration socket */
4519 if (pollfd == client_sock) {
4520 if (revents & LPOLLIN) {
4521 continue;
4522 } else if (revents & (LPOLLERR | LPOLLHUP | LPOLLRDHUP)) {
4523 ERR("Client socket poll error");
4524 goto error;
4525 } else {
4526 ERR("Unexpected poll events %u for sock %d", revents, pollfd);
4527 goto error;
4528 }
4529 }
4530 }
4531
4532 DBG("Wait for client response");
4533
4534 health_code_update();
4535
4536 sock = lttcomm_accept_unix_sock(client_sock);
4537 if (sock < 0) {
4538 goto error;
4539 }
4540
4541 /*
4542 * Set the CLOEXEC flag. Return code is useless because either way, the
4543 * show must go on.
4544 */
4545 (void) utils_set_fd_cloexec(sock);
4546
4547 /* Set socket option for credentials retrieval */
4548 ret = lttcomm_setsockopt_creds_unix_sock(sock);
4549 if (ret < 0) {
4550 goto error;
4551 }
4552
4553 /* Allocate context command to process the client request */
4554 cmd_ctx = zmalloc(sizeof(struct command_ctx));
4555 if (cmd_ctx == NULL) {
4556 PERROR("zmalloc cmd_ctx");
4557 goto error;
4558 }
4559
4560 /* Allocate data buffer for reception */
4561 cmd_ctx->lsm = zmalloc(sizeof(struct lttcomm_session_msg));
4562 if (cmd_ctx->lsm == NULL) {
4563 PERROR("zmalloc cmd_ctx->lsm");
4564 goto error;
4565 }
4566
4567 cmd_ctx->llm = NULL;
4568 cmd_ctx->session = NULL;
4569
4570 health_code_update();
4571
4572 /*
4573 * Data is received from the lttng client. The struct
4574 * lttcomm_session_msg (lsm) contains the command and data request of
4575 * the client.
4576 */
4577 DBG("Receiving data from client ...");
4578 ret = lttcomm_recv_creds_unix_sock(sock, cmd_ctx->lsm,
4579 sizeof(struct lttcomm_session_msg), &cmd_ctx->creds);
4580 if (ret <= 0) {
4581 DBG("Nothing recv() from client... continuing");
4582 ret = close(sock);
4583 if (ret) {
4584 PERROR("close");
4585 }
4586 sock = -1;
4587 clean_command_ctx(&cmd_ctx);
4588 continue;
4589 }
4590
4591 health_code_update();
4592
4593 // TODO: Validate cmd_ctx including sanity check for
4594 // security purpose.
4595
4596 rcu_thread_online();
4597 /*
4598 * This function dispatch the work to the kernel or userspace tracer
4599 * libs and fill the lttcomm_lttng_msg data structure of all the needed
4600 * informations for the client. The command context struct contains
4601 * everything this function may needs.
4602 */
4603 ret = process_client_msg(cmd_ctx, sock, &sock_error);
4604 rcu_thread_offline();
4605 if (ret < 0) {
4606 ret = close(sock);
4607 if (ret) {
4608 PERROR("close");
4609 }
4610 sock = -1;
4611 /*
4612 * TODO: Inform client somehow of the fatal error. At
4613 * this point, ret < 0 means that a zmalloc failed
4614 * (ENOMEM). Error detected but still accept
4615 * command, unless a socket error has been
4616 * detected.
4617 */
4618 clean_command_ctx(&cmd_ctx);
4619 continue;
4620 }
4621
4622 cmd_completion_handler = cmd_pop_completion_handler();
4623 if (cmd_completion_handler) {
4624 enum lttng_error_code completion_code;
4625
4626 completion_code = cmd_completion_handler->run(
4627 cmd_completion_handler->data);
4628 if (completion_code != LTTNG_OK) {
4629 clean_command_ctx(&cmd_ctx);
4630 continue;
4631 }
4632 }
4633
4634 health_code_update();
4635
4636 DBG("Sending response (size: %d, retcode: %s (%d))",
4637 cmd_ctx->lttng_msg_size,
4638 lttng_strerror(-cmd_ctx->llm->ret_code),
4639 cmd_ctx->llm->ret_code);
4640 ret = send_unix_sock(sock, cmd_ctx->llm, cmd_ctx->lttng_msg_size);
4641 if (ret < 0) {
4642 ERR("Failed to send data back to client");
4643 }
4644
4645 /* End of transmission */
4646 ret = close(sock);
4647 if (ret) {
4648 PERROR("close");
4649 }
4650 sock = -1;
4651
4652 clean_command_ctx(&cmd_ctx);
4653
4654 health_code_update();
4655 }
4656
4657 exit:
4658 error:
4659 if (sock >= 0) {
4660 ret = close(sock);
4661 if (ret) {
4662 PERROR("close");
4663 }
4664 }
4665
4666 lttng_poll_clean(&events);
4667 clean_command_ctx(&cmd_ctx);
4668
4669 error_listen:
4670 error_create_poll:
4671 unlink(config.client_unix_sock_path.value);
4672 if (client_sock >= 0) {
4673 ret = close(client_sock);
4674 if (ret) {
4675 PERROR("close");
4676 }
4677 }
4678
4679 if (err) {
4680 health_error();
4681 ERR("Health error occurred in %s", __func__);
4682 }
4683
4684 health_unregister(health_sessiond);
4685
4686 DBG("Client thread dying");
4687
4688 rcu_unregister_thread();
4689
4690 /*
4691 * Since we are creating the consumer threads, we own them, so we need
4692 * to join them before our thread exits.
4693 */
4694 ret = join_consumer_thread(&kconsumer_data);
4695 if (ret) {
4696 errno = ret;
4697 PERROR("join_consumer");
4698 }
4699
4700 ret = join_consumer_thread(&ustconsumer32_data);
4701 if (ret) {
4702 errno = ret;
4703 PERROR("join_consumer ust32");
4704 }
4705
4706 ret = join_consumer_thread(&ustconsumer64_data);
4707 if (ret) {
4708 errno = ret;
4709 PERROR("join_consumer ust64");
4710 }
4711 return NULL;
4712 }
4713
4714 static int string_match(const char *str1, const char *str2)
4715 {
4716 return (str1 && str2) && !strcmp(str1, str2);
4717 }
4718
4719 /*
4720 * Take an option from the getopt output and set it in the right variable to be
4721 * used later.
4722 *
4723 * Return 0 on success else a negative value.
4724 */
4725 static int set_option(int opt, const char *arg, const char *optname)
4726 {
4727 int ret = 0;
4728
4729 if (string_match(optname, "client-sock") || opt == 'c') {
4730 if (!arg || *arg == '\0') {
4731 ret = -EINVAL;
4732 goto end;
4733 }
4734 if (lttng_is_setuid_setgid()) {
4735 WARN("Getting '%s' argument from setuid/setgid binary refused for security reasons.",
4736 "-c, --client-sock");
4737 } else {
4738 config_string_set(&config.client_unix_sock_path,
4739 strdup(arg));
4740 if (!config.client_unix_sock_path.value) {
4741 ret = -ENOMEM;
4742 PERROR("strdup");
4743 }
4744 }
4745 } else if (string_match(optname, "apps-sock") || opt == 'a') {
4746 if (!arg || *arg == '\0') {
4747 ret = -EINVAL;
4748 goto end;
4749 }
4750 if (lttng_is_setuid_setgid()) {
4751 WARN("Getting '%s' argument from setuid/setgid binary refused for security reasons.",
4752 "-a, --apps-sock");
4753 } else {
4754 config_string_set(&config.apps_unix_sock_path,
4755 strdup(arg));
4756 if (!config.apps_unix_sock_path.value) {
4757 ret = -ENOMEM;
4758 PERROR("strdup");
4759 }
4760 }
4761 } else if (string_match(optname, "daemonize") || opt == 'd') {
4762 config.daemonize = true;
4763 } else if (string_match(optname, "background") || opt == 'b') {
4764 config.background = true;
4765 } else if (string_match(optname, "group") || opt == 'g') {
4766 if (!arg || *arg == '\0') {
4767 ret = -EINVAL;
4768 goto end;
4769 }
4770 if (lttng_is_setuid_setgid()) {
4771 WARN("Getting '%s' argument from setuid/setgid binary refused for security reasons.",
4772 "-g, --group");
4773 } else {
4774 config_string_set(&config.tracing_group_name,
4775 strdup(arg));
4776 if (!config.tracing_group_name.value) {
4777 ret = -ENOMEM;
4778 PERROR("strdup");
4779 }
4780 }
4781 } else if (string_match(optname, "help") || opt == 'h') {
4782 ret = utils_show_help(8, "lttng-sessiond", help_msg);
4783 if (ret) {
4784 ERR("Cannot show --help for `lttng-sessiond`");
4785 perror("exec");
4786 }
4787 exit(ret ? EXIT_FAILURE : EXIT_SUCCESS);
4788 } else if (string_match(optname, "version") || opt == 'V') {
4789 fprintf(stdout, "%s\n", VERSION);
4790 exit(EXIT_SUCCESS);
4791 } else if (string_match(optname, "sig-parent") || opt == 'S') {
4792 config.sig_parent = true;
4793 } else if (string_match(optname, "kconsumerd-err-sock")) {
4794 if (!arg || *arg == '\0') {
4795 ret = -EINVAL;
4796 goto end;
4797 }
4798 if (lttng_is_setuid_setgid()) {
4799 WARN("Getting '%s' argument from setuid/setgid binary refused for security reasons.",
4800 "--kconsumerd-err-sock");
4801 } else {
4802 config_string_set(&config.kconsumerd_err_unix_sock_path,
4803 strdup(arg));
4804 if (!config.kconsumerd_err_unix_sock_path.value) {
4805 ret = -ENOMEM;
4806 PERROR("strdup");
4807 }
4808 }
4809 } else if (string_match(optname, "kconsumerd-cmd-sock")) {
4810 if (!arg || *arg == '\0') {
4811 ret = -EINVAL;
4812 goto end;
4813 }
4814 if (lttng_is_setuid_setgid()) {
4815 WARN("Getting '%s' argument from setuid/setgid binary refused for security reasons.",
4816 "--kconsumerd-cmd-sock");
4817 } else {
4818 config_string_set(&config.kconsumerd_cmd_unix_sock_path,
4819 strdup(arg));
4820 if (!config.kconsumerd_cmd_unix_sock_path.value) {
4821 ret = -ENOMEM;
4822 PERROR("strdup");
4823 }
4824 }
4825 } else if (string_match(optname, "ustconsumerd64-err-sock")) {
4826 if (!arg || *arg == '\0') {
4827 ret = -EINVAL;
4828 goto end;
4829 }
4830 if (lttng_is_setuid_setgid()) {
4831 WARN("Getting '%s' argument from setuid/setgid binary refused for security reasons.",
4832 "--ustconsumerd64-err-sock");
4833 } else {
4834 config_string_set(&config.consumerd64_err_unix_sock_path,
4835 strdup(arg));
4836 if (!config.consumerd64_err_unix_sock_path.value) {
4837 ret = -ENOMEM;
4838 PERROR("strdup");
4839 }
4840 }
4841 } else if (string_match(optname, "ustconsumerd64-cmd-sock")) {
4842 if (!arg || *arg == '\0') {
4843 ret = -EINVAL;
4844 goto end;
4845 }
4846 if (lttng_is_setuid_setgid()) {
4847 WARN("Getting '%s' argument from setuid/setgid binary refused for security reasons.",
4848 "--ustconsumerd64-cmd-sock");
4849 } else {
4850 config_string_set(&config.consumerd64_cmd_unix_sock_path,
4851 strdup(arg));
4852 if (!config.consumerd64_cmd_unix_sock_path.value) {
4853 ret = -ENOMEM;
4854 PERROR("strdup");
4855 }
4856 }
4857 } else if (string_match(optname, "ustconsumerd32-err-sock")) {
4858 if (!arg || *arg == '\0') {
4859 ret = -EINVAL;
4860 goto end;
4861 }
4862 if (lttng_is_setuid_setgid()) {
4863 WARN("Getting '%s' argument from setuid/setgid binary refused for security reasons.",
4864 "--ustconsumerd32-err-sock");
4865 } else {
4866 config_string_set(&config.consumerd32_err_unix_sock_path,
4867 strdup(arg));
4868 if (!config.consumerd32_err_unix_sock_path.value) {
4869 ret = -ENOMEM;
4870 PERROR("strdup");
4871 }
4872 }
4873 } else if (string_match(optname, "ustconsumerd32-cmd-sock")) {
4874 if (!arg || *arg == '\0') {
4875 ret = -EINVAL;
4876 goto end;
4877 }
4878 if (lttng_is_setuid_setgid()) {
4879 WARN("Getting '%s' argument from setuid/setgid binary refused for security reasons.",
4880 "--ustconsumerd32-cmd-sock");
4881 } else {
4882 config_string_set(&config.consumerd32_cmd_unix_sock_path,
4883 strdup(arg));
4884 if (!config.consumerd32_cmd_unix_sock_path.value) {
4885 ret = -ENOMEM;
4886 PERROR("strdup");
4887 }
4888 }
4889 } else if (string_match(optname, "no-kernel")) {
4890 config.no_kernel = true;
4891 } else if (string_match(optname, "quiet") || opt == 'q') {
4892 config.quiet = true;
4893 } else if (string_match(optname, "verbose") || opt == 'v') {
4894 /* Verbose level can increase using multiple -v */
4895 if (arg) {
4896 /* Value obtained from config file */
4897 config.verbose = config_parse_value(arg);
4898 } else {
4899 /* -v used on command line */
4900 config.verbose++;
4901 }
4902 /* Clamp value to [0, 3] */
4903 config.verbose = config.verbose < 0 ? 0 :
4904 (config.verbose <= 3 ? config.verbose : 3);
4905 } else if (string_match(optname, "verbose-consumer")) {
4906 if (arg) {
4907 config.verbose_consumer = config_parse_value(arg);
4908 } else {
4909 config.verbose_consumer++;
4910 }
4911 } else if (string_match(optname, "consumerd32-path")) {
4912 if (!arg || *arg == '\0') {
4913 ret = -EINVAL;
4914 goto end;
4915 }
4916 if (lttng_is_setuid_setgid()) {
4917 WARN("Getting '%s' argument from setuid/setgid binary refused for security reasons.",
4918 "--consumerd32-path");
4919 } else {
4920 config_string_set(&config.consumerd32_bin_path,
4921 strdup(arg));
4922 if (!config.consumerd32_bin_path.value) {
4923 PERROR("strdup");
4924 ret = -ENOMEM;
4925 }
4926 }
4927 } else if (string_match(optname, "consumerd32-libdir")) {
4928 if (!arg || *arg == '\0') {
4929 ret = -EINVAL;
4930 goto end;
4931 }
4932 if (lttng_is_setuid_setgid()) {
4933 WARN("Getting '%s' argument from setuid/setgid binary refused for security reasons.",
4934 "--consumerd32-libdir");
4935 } else {
4936 config_string_set(&config.consumerd32_lib_dir,
4937 strdup(arg));
4938 if (!config.consumerd32_lib_dir.value) {
4939 PERROR("strdup");
4940 ret = -ENOMEM;
4941 }
4942 }
4943 } else if (string_match(optname, "consumerd64-path")) {
4944 if (!arg || *arg == '\0') {
4945 ret = -EINVAL;
4946 goto end;
4947 }
4948 if (lttng_is_setuid_setgid()) {
4949 WARN("Getting '%s' argument from setuid/setgid binary refused for security reasons.",
4950 "--consumerd64-path");
4951 } else {
4952 config_string_set(&config.consumerd64_bin_path,
4953 strdup(arg));
4954 if (!config.consumerd64_bin_path.value) {
4955 PERROR("strdup");
4956 ret = -ENOMEM;
4957 }
4958 }
4959 } else if (string_match(optname, "consumerd64-libdir")) {
4960 if (!arg || *arg == '\0') {
4961 ret = -EINVAL;
4962 goto end;
4963 }
4964 if (lttng_is_setuid_setgid()) {
4965 WARN("Getting '%s' argument from setuid/setgid binary refused for security reasons.",
4966 "--consumerd64-libdir");
4967 } else {
4968 config_string_set(&config.consumerd64_lib_dir,
4969 strdup(arg));
4970 if (!config.consumerd64_lib_dir.value) {
4971 PERROR("strdup");
4972 ret = -ENOMEM;
4973 }
4974 }
4975 } else if (string_match(optname, "pidfile") || opt == 'p') {
4976 if (!arg || *arg == '\0') {
4977 ret = -EINVAL;
4978 goto end;
4979 }
4980 if (lttng_is_setuid_setgid()) {
4981 WARN("Getting '%s' argument from setuid/setgid binary refused for security reasons.",
4982 "-p, --pidfile");
4983 } else {
4984 config_string_set(&config.pid_file_path, strdup(arg));
4985 if (!config.pid_file_path.value) {
4986 PERROR("strdup");
4987 ret = -ENOMEM;
4988 }
4989 }
4990 } else if (string_match(optname, "agent-tcp-port")) {
4991 if (!arg || *arg == '\0') {
4992 ret = -EINVAL;
4993 goto end;
4994 }
4995 if (lttng_is_setuid_setgid()) {
4996 WARN("Getting '%s' argument from setuid/setgid binary refused for security reasons.",
4997 "--agent-tcp-port");
4998 } else {
4999 unsigned long v;
5000
5001 errno = 0;
5002 v = strtoul(arg, NULL, 0);
5003 if (errno != 0 || !isdigit(arg[0])) {
5004 ERR("Wrong value in --agent-tcp-port parameter: %s", arg);
5005 return -1;
5006 }
5007 if (v == 0 || v >= 65535) {
5008 ERR("Port overflow in --agent-tcp-port parameter: %s", arg);
5009 return -1;
5010 }
5011 config.agent_tcp_port.begin = config.agent_tcp_port.end = (int) v;
5012 DBG3("Agent TCP port set to non default: %i", (int) v);
5013 }
5014 } else if (string_match(optname, "load") || opt == 'l') {
5015 if (!arg || *arg == '\0') {
5016 ret = -EINVAL;
5017 goto end;
5018 }
5019 if (lttng_is_setuid_setgid()) {
5020 WARN("Getting '%s' argument from setuid/setgid binary refused for security reasons.",
5021 "-l, --load");
5022 } else {
5023 config_string_set(&config.load_session_path, strdup(arg));
5024 if (!config.load_session_path.value) {
5025 PERROR("strdup");
5026 ret = -ENOMEM;
5027 }
5028 }
5029 } else if (string_match(optname, "kmod-probes")) {
5030 if (!arg || *arg == '\0') {
5031 ret = -EINVAL;
5032 goto end;
5033 }
5034 if (lttng_is_setuid_setgid()) {
5035 WARN("Getting '%s' argument from setuid/setgid binary refused for security reasons.",
5036 "--kmod-probes");
5037 } else {
5038 config_string_set(&config.kmod_probes_list, strdup(arg));
5039 if (!config.kmod_probes_list.value) {
5040 PERROR("strdup");
5041 ret = -ENOMEM;
5042 }
5043 }
5044 } else if (string_match(optname, "extra-kmod-probes")) {
5045 if (!arg || *arg == '\0') {
5046 ret = -EINVAL;
5047 goto end;
5048 }
5049 if (lttng_is_setuid_setgid()) {
5050 WARN("Getting '%s' argument from setuid/setgid binary refused for security reasons.",
5051 "--extra-kmod-probes");
5052 } else {
5053 config_string_set(&config.kmod_extra_probes_list,
5054 strdup(arg));
5055 if (!config.kmod_extra_probes_list.value) {
5056 PERROR("strdup");
5057 ret = -ENOMEM;
5058 }
5059 }
5060 } else if (string_match(optname, "config") || opt == 'f') {
5061 /* This is handled in set_options() thus silent skip. */
5062 goto end;
5063 } else {
5064 /* Unknown option or other error.
5065 * Error is printed by getopt, just return */
5066 ret = -1;
5067 }
5068
5069 end:
5070 if (ret == -EINVAL) {
5071 const char *opt_name = "unknown";
5072 int i;
5073
5074 for (i = 0; i < sizeof(long_options) / sizeof(struct option);
5075 i++) {
5076 if (opt == long_options[i].val) {
5077 opt_name = long_options[i].name;
5078 break;
5079 }
5080 }
5081
5082 WARN("Invalid argument provided for option \"%s\", using default value.",
5083 opt_name);
5084 }
5085
5086 return ret;
5087 }
5088
5089 /*
5090 * config_entry_handler_cb used to handle options read from a config file.
5091 * See config_entry_handler_cb comment in common/config/session-config.h for the
5092 * return value conventions.
5093 */
5094 static int config_entry_handler(const struct config_entry *entry, void *unused)
5095 {
5096 int ret = 0, i;
5097
5098 if (!entry || !entry->name || !entry->value) {
5099 ret = -EINVAL;
5100 goto end;
5101 }
5102
5103 /* Check if the option is to be ignored */
5104 for (i = 0; i < sizeof(config_ignore_options) / sizeof(char *); i++) {
5105 if (!strcmp(entry->name, config_ignore_options[i])) {
5106 goto end;
5107 }
5108 }
5109
5110 for (i = 0; i < (sizeof(long_options) / sizeof(struct option)) - 1;
5111 i++) {
5112
5113 /* Ignore if not fully matched. */
5114 if (strcmp(entry->name, long_options[i].name)) {
5115 continue;
5116 }
5117
5118 /*
5119 * If the option takes no argument on the command line, we have to
5120 * check if the value is "true". We support non-zero numeric values,
5121 * true, on and yes.
5122 */
5123 if (!long_options[i].has_arg) {
5124 ret = config_parse_value(entry->value);
5125 if (ret <= 0) {
5126 if (ret) {
5127 WARN("Invalid configuration value \"%s\" for option %s",
5128 entry->value, entry->name);
5129 }
5130 /* False, skip boolean config option. */
5131 goto end;
5132 }
5133 }
5134
5135 ret = set_option(long_options[i].val, entry->value, entry->name);
5136 goto end;
5137 }
5138
5139 WARN("Unrecognized option \"%s\" in daemon configuration file.", entry->name);
5140
5141 end:
5142 return ret;
5143 }
5144
5145 /*
5146 * daemon configuration loading and argument parsing
5147 */
5148 static int set_options(int argc, char **argv)
5149 {
5150 int ret = 0, c = 0, option_index = 0;
5151 int orig_optopt = optopt, orig_optind = optind;
5152 char *optstring;
5153 const char *config_path = NULL;
5154
5155 optstring = utils_generate_optstring(long_options,
5156 sizeof(long_options) / sizeof(struct option));
5157 if (!optstring) {
5158 ret = -ENOMEM;
5159 goto end;
5160 }
5161
5162 /* Check for the --config option */
5163 while ((c = getopt_long(argc, argv, optstring, long_options,
5164 &option_index)) != -1) {
5165 if (c == '?') {
5166 ret = -EINVAL;
5167 goto end;
5168 } else if (c != 'f') {
5169 /* if not equal to --config option. */
5170 continue;
5171 }
5172
5173 if (lttng_is_setuid_setgid()) {
5174 WARN("Getting '%s' argument from setuid/setgid binary refused for security reasons.",
5175 "-f, --config");
5176 } else {
5177 config_path = utils_expand_path(optarg);
5178 if (!config_path) {
5179 ERR("Failed to resolve path: %s", optarg);
5180 }
5181 }
5182 }
5183
5184 ret = config_get_section_entries(config_path, config_section_name,
5185 config_entry_handler, NULL);
5186 if (ret) {
5187 if (ret > 0) {
5188 ERR("Invalid configuration option at line %i", ret);
5189 ret = -1;
5190 }
5191 goto end;
5192 }
5193
5194 /* Reset getopt's global state */
5195 optopt = orig_optopt;
5196 optind = orig_optind;
5197 while (1) {
5198 option_index = -1;
5199 /*
5200 * getopt_long() will not set option_index if it encounters a
5201 * short option.
5202 */
5203 c = getopt_long(argc, argv, optstring, long_options,
5204 &option_index);
5205 if (c == -1) {
5206 break;
5207 }
5208
5209 /*
5210 * Pass NULL as the long option name if popt left the index
5211 * unset.
5212 */
5213 ret = set_option(c, optarg,
5214 option_index < 0 ? NULL :
5215 long_options[option_index].name);
5216 if (ret < 0) {
5217 break;
5218 }
5219 }
5220
5221 end:
5222 free(optstring);
5223 return ret;
5224 }
5225
5226 /*
5227 * Creates the two needed socket by the daemon.
5228 * apps_sock - The communication socket for all UST apps.
5229 * client_sock - The communication of the cli tool (lttng).
5230 */
5231 static int init_daemon_socket(void)
5232 {
5233 int ret = 0;
5234 mode_t old_umask;
5235
5236 old_umask = umask(0);
5237
5238 /* Create client tool unix socket */
5239 client_sock = lttcomm_create_unix_sock(config.client_unix_sock_path.value);
5240 if (client_sock < 0) {
5241 ERR("Create unix sock failed: %s", config.client_unix_sock_path.value);
5242 ret = -1;
5243 goto end;
5244 }
5245
5246 /* Set the cloexec flag */
5247 ret = utils_set_fd_cloexec(client_sock);
5248 if (ret < 0) {
5249 ERR("Unable to set CLOEXEC flag to the client Unix socket (fd: %d). "
5250 "Continuing but note that the consumer daemon will have a "
5251 "reference to this socket on exec()", client_sock);
5252 }
5253
5254 /* File permission MUST be 660 */
5255 ret = chmod(config.client_unix_sock_path.value, S_IRUSR | S_IWUSR | S_IRGRP | S_IWGRP);
5256 if (ret < 0) {
5257 ERR("Set file permissions failed: %s", config.client_unix_sock_path.value);
5258 PERROR("chmod");
5259 goto end;
5260 }
5261
5262 /* Create the application unix socket */
5263 apps_sock = lttcomm_create_unix_sock(config.apps_unix_sock_path.value);
5264 if (apps_sock < 0) {
5265 ERR("Create unix sock failed: %s", config.apps_unix_sock_path.value);
5266 ret = -1;
5267 goto end;
5268 }
5269
5270 /* Set the cloexec flag */
5271 ret = utils_set_fd_cloexec(apps_sock);
5272 if (ret < 0) {
5273 ERR("Unable to set CLOEXEC flag to the app Unix socket (fd: %d). "
5274 "Continuing but note that the consumer daemon will have a "
5275 "reference to this socket on exec()", apps_sock);
5276 }
5277
5278 /* File permission MUST be 666 */
5279 ret = chmod(config.apps_unix_sock_path.value,
5280 S_IRUSR | S_IWUSR | S_IRGRP | S_IWGRP | S_IROTH | S_IWOTH);
5281 if (ret < 0) {
5282 ERR("Set file permissions failed: %s", config.apps_unix_sock_path.value);
5283 PERROR("chmod");
5284 goto end;
5285 }
5286
5287 DBG3("Session daemon client socket %d and application socket %d created",
5288 client_sock, apps_sock);
5289
5290 end:
5291 umask(old_umask);
5292 return ret;
5293 }
5294
5295 /*
5296 * Create lockfile using the rundir and return its fd.
5297 */
5298 static int create_lockfile(void)
5299 {
5300 return utils_create_lock_file(config.lock_file_path.value);
5301 }
5302
5303 /*
5304 * Check if the global socket is available, and if a daemon is answering at the
5305 * other side. If yes, error is returned.
5306 *
5307 * Also attempts to create and hold the lock file.
5308 */
5309 static int check_existing_daemon(void)
5310 {
5311 int ret = 0;
5312
5313 /* Is there anybody out there ? */
5314 if (lttng_session_daemon_alive()) {
5315 ret = -EEXIST;
5316 goto end;
5317 }
5318
5319 lockfile_fd = create_lockfile();
5320 if (lockfile_fd < 0) {
5321 ret = -EEXIST;
5322 goto end;
5323 }
5324 end:
5325 return ret;
5326 }
5327
5328 static void sessiond_cleanup_lock_file(void)
5329 {
5330 int ret;
5331
5332 /*
5333 * Cleanup lock file by deleting it and finaly closing it which will
5334 * release the file system lock.
5335 */
5336 if (lockfile_fd >= 0) {
5337 ret = remove(config.lock_file_path.value);
5338 if (ret < 0) {
5339 PERROR("remove lock file");
5340 }
5341 ret = close(lockfile_fd);
5342 if (ret < 0) {
5343 PERROR("close lock file");
5344 }
5345 }
5346 }
5347
5348 /*
5349 * Set the tracing group gid onto the client socket.
5350 *
5351 * Race window between mkdir and chown is OK because we are going from more
5352 * permissive (root.root) to less permissive (root.tracing).
5353 */
5354 static int set_permissions(char *rundir)
5355 {
5356 int ret;
5357 gid_t gid;
5358
5359 gid = utils_get_group_id(config.tracing_group_name.value);
5360
5361 /* Set lttng run dir */
5362 ret = chown(rundir, 0, gid);
5363 if (ret < 0) {
5364 ERR("Unable to set group on %s", rundir);
5365 PERROR("chown");
5366 }
5367
5368 /*
5369 * Ensure all applications and tracing group can search the run
5370 * dir. Allow everyone to read the directory, since it does not
5371 * buy us anything to hide its content.
5372 */
5373 ret = chmod(rundir, S_IRWXU | S_IRGRP | S_IXGRP | S_IROTH | S_IXOTH);
5374 if (ret < 0) {
5375 ERR("Unable to set permissions on %s", rundir);
5376 PERROR("chmod");
5377 }
5378
5379 /* lttng client socket path */
5380 ret = chown(config.client_unix_sock_path.value, 0, gid);
5381 if (ret < 0) {
5382 ERR("Unable to set group on %s", config.client_unix_sock_path.value);
5383 PERROR("chown");
5384 }
5385
5386 /* kconsumer error socket path */
5387 ret = chown(kconsumer_data.err_unix_sock_path, 0, 0);
5388 if (ret < 0) {
5389 ERR("Unable to set group on %s", kconsumer_data.err_unix_sock_path);
5390 PERROR("chown");
5391 }
5392
5393 /* 64-bit ustconsumer error socket path */
5394 ret = chown(ustconsumer64_data.err_unix_sock_path, 0, 0);
5395 if (ret < 0) {
5396 ERR("Unable to set group on %s", ustconsumer64_data.err_unix_sock_path);
5397 PERROR("chown");
5398 }
5399
5400 /* 32-bit ustconsumer compat32 error socket path */
5401 ret = chown(ustconsumer32_data.err_unix_sock_path, 0, 0);
5402 if (ret < 0) {
5403 ERR("Unable to set group on %s", ustconsumer32_data.err_unix_sock_path);
5404 PERROR("chown");
5405 }
5406
5407 DBG("All permissions are set");
5408
5409 return ret;
5410 }
5411
5412 /*
5413 * Create the lttng run directory needed for all global sockets and pipe.
5414 */
5415 static int create_lttng_rundir(void)
5416 {
5417 int ret;
5418
5419 DBG3("Creating LTTng run directory: %s", config.rundir.value);
5420
5421 ret = mkdir(config.rundir.value, S_IRWXU);
5422 if (ret < 0) {
5423 if (errno != EEXIST) {
5424 ERR("Unable to create %s", config.rundir.value);
5425 goto error;
5426 } else {
5427 ret = 0;
5428 }
5429 }
5430
5431 error:
5432 return ret;
5433 }
5434
5435 /*
5436 * Setup sockets and directory needed by the consumerds' communication with the
5437 * session daemon.
5438 */
5439 static int set_consumer_sockets(struct consumer_data *consumer_data)
5440 {
5441 int ret;
5442 char *path = NULL;
5443
5444 switch (consumer_data->type) {
5445 case LTTNG_CONSUMER_KERNEL:
5446 path = config.kconsumerd_path.value;
5447 break;
5448 case LTTNG_CONSUMER64_UST:
5449 path = config.consumerd64_path.value;
5450 break;
5451 case LTTNG_CONSUMER32_UST:
5452 path = config.consumerd32_path.value;
5453 break;
5454 default:
5455 ERR("Consumer type unknown");
5456 ret = -EINVAL;
5457 goto error;
5458 }
5459 assert(path);
5460
5461 DBG2("Creating consumer directory: %s", path);
5462
5463 ret = mkdir(path, S_IRWXU | S_IRGRP | S_IXGRP);
5464 if (ret < 0 && errno != EEXIST) {
5465 PERROR("mkdir");
5466 ERR("Failed to create %s", path);
5467 goto error;
5468 }
5469 if (is_root) {
5470 ret = chown(path, 0, utils_get_group_id(config.tracing_group_name.value));
5471 if (ret < 0) {
5472 ERR("Unable to set group on %s", path);
5473 PERROR("chown");
5474 goto error;
5475 }
5476 }
5477
5478 /* Create the consumerd error unix socket */
5479 consumer_data->err_sock =
5480 lttcomm_create_unix_sock(consumer_data->err_unix_sock_path);
5481 if (consumer_data->err_sock < 0) {
5482 ERR("Create unix sock failed: %s", consumer_data->err_unix_sock_path);
5483 ret = -1;
5484 goto error;
5485 }
5486
5487 /*
5488 * Set the CLOEXEC flag. Return code is useless because either way, the
5489 * show must go on.
5490 */
5491 ret = utils_set_fd_cloexec(consumer_data->err_sock);
5492 if (ret < 0) {
5493 PERROR("utils_set_fd_cloexec");
5494 /* continue anyway */
5495 }
5496
5497 /* File permission MUST be 660 */
5498 ret = chmod(consumer_data->err_unix_sock_path,
5499 S_IRUSR | S_IWUSR | S_IRGRP | S_IWGRP);
5500 if (ret < 0) {
5501 ERR("Set file permissions failed: %s", consumer_data->err_unix_sock_path);
5502 PERROR("chmod");
5503 goto error;
5504 }
5505
5506 error:
5507 return ret;
5508 }
5509
5510 /*
5511 * Signal handler for the daemon
5512 *
5513 * Simply stop all worker threads, leaving main() return gracefully after
5514 * joining all threads and calling cleanup().
5515 */
5516 static void sighandler(int sig)
5517 {
5518 switch (sig) {
5519 case SIGINT:
5520 DBG("SIGINT caught");
5521 stop_threads();
5522 break;
5523 case SIGTERM:
5524 DBG("SIGTERM caught");
5525 stop_threads();
5526 break;
5527 case SIGUSR1:
5528 CMM_STORE_SHARED(recv_child_signal, 1);
5529 break;
5530 default:
5531 break;
5532 }
5533 }
5534
5535 /*
5536 * Setup signal handler for :
5537 * SIGINT, SIGTERM, SIGPIPE
5538 */
5539 static int set_signal_handler(void)
5540 {
5541 int ret = 0;
5542 struct sigaction sa;
5543 sigset_t sigset;
5544
5545 if ((ret = sigemptyset(&sigset)) < 0) {
5546 PERROR("sigemptyset");
5547 return ret;
5548 }
5549
5550 sa.sa_mask = sigset;
5551 sa.sa_flags = 0;
5552
5553 sa.sa_handler = sighandler;
5554 if ((ret = sigaction(SIGTERM, &sa, NULL)) < 0) {
5555 PERROR("sigaction");
5556 return ret;
5557 }
5558
5559 if ((ret = sigaction(SIGINT, &sa, NULL)) < 0) {
5560 PERROR("sigaction");
5561 return ret;
5562 }
5563
5564 if ((ret = sigaction(SIGUSR1, &sa, NULL)) < 0) {
5565 PERROR("sigaction");
5566 return ret;
5567 }
5568
5569 sa.sa_handler = SIG_IGN;
5570 if ((ret = sigaction(SIGPIPE, &sa, NULL)) < 0) {
5571 PERROR("sigaction");
5572 return ret;
5573 }
5574
5575 DBG("Signal handler set for SIGTERM, SIGUSR1, SIGPIPE and SIGINT");
5576
5577 return ret;
5578 }
5579
5580 /*
5581 * Set open files limit to unlimited. This daemon can open a large number of
5582 * file descriptors in order to consume multiple kernel traces.
5583 */
5584 static void set_ulimit(void)
5585 {
5586 int ret;
5587 struct rlimit lim;
5588
5589 /* The kernel does not allow an infinite limit for open files */
5590 lim.rlim_cur = 65535;
5591 lim.rlim_max = 65535;
5592
5593 ret = setrlimit(RLIMIT_NOFILE, &lim);
5594 if (ret < 0) {
5595 PERROR("failed to set open files limit");
5596 }
5597 }
5598
5599 static int write_pidfile(void)
5600 {
5601 return utils_create_pid_file(getpid(), config.pid_file_path.value);
5602 }
5603
5604 static int set_clock_plugin_env(void)
5605 {
5606 int ret = 0;
5607 char *env_value = NULL;
5608
5609 if (!config.lttng_ust_clock_plugin.value) {
5610 goto end;
5611 }
5612
5613 ret = asprintf(&env_value, "LTTNG_UST_CLOCK_PLUGIN=%s",
5614 config.lttng_ust_clock_plugin.value);
5615 if (ret < 0) {
5616 PERROR("asprintf");
5617 goto end;
5618 }
5619
5620 ret = putenv(env_value);
5621 if (ret) {
5622 free(env_value);
5623 PERROR("putenv of LTTNG_UST_CLOCK_PLUGIN");
5624 goto end;
5625 }
5626
5627 DBG("Updated LTTNG_UST_CLOCK_PLUGIN environment variable to \"%s\"",
5628 config.lttng_ust_clock_plugin.value);
5629 end:
5630 return ret;
5631 }
5632
5633 /*
5634 * main
5635 */
5636 int main(int argc, char **argv)
5637 {
5638 int ret = 0, retval = 0;
5639 void *status;
5640 const char *env_app_timeout;
5641 struct lttng_pipe *ust32_channel_monitor_pipe = NULL,
5642 *ust64_channel_monitor_pipe = NULL,
5643 *kernel_channel_monitor_pipe = NULL;
5644 bool notification_thread_launched = false;
5645 bool rotation_thread_launched = false;
5646 bool timer_thread_launched = false;
5647 struct timer_thread_parameters timer_thread_ctx;
5648 /* Queue of rotation jobs populated by the sessiond-timer. */
5649 struct rotation_thread_timer_queue *rotation_timer_queue = NULL;
5650 sem_t notification_thread_ready;
5651
5652 init_kernel_workarounds();
5653
5654 rcu_register_thread();
5655
5656 if (set_signal_handler()) {
5657 retval = -1;
5658 goto exit_set_signal_handler;
5659 }
5660
5661 if (timer_signal_init()) {
5662 retval = -1;
5663 goto exit_set_signal_handler;
5664 }
5665
5666 page_size = sysconf(_SC_PAGESIZE);
5667 if (page_size < 0) {
5668 PERROR("sysconf _SC_PAGESIZE");
5669 page_size = LONG_MAX;
5670 WARN("Fallback page size to %ld", page_size);
5671 }
5672
5673 ret = sessiond_config_init(&config);
5674 if (ret) {
5675 retval = -1;
5676 goto exit_set_signal_handler;
5677 }
5678
5679 /*
5680 * Init config from environment variables.
5681 * Command line option override env configuration per-doc. Do env first.
5682 */
5683 sessiond_config_apply_env_config(&config);
5684
5685 /*
5686 * Parse arguments and load the daemon configuration file.
5687 *
5688 * We have an exit_options exit path to free memory reserved by
5689 * set_options. This is needed because the rest of sessiond_cleanup()
5690 * depends on ht_cleanup_thread, which depends on lttng_daemonize, which
5691 * depends on set_options.
5692 */
5693 progname = argv[0];
5694 if (set_options(argc, argv)) {
5695 retval = -1;
5696 goto exit_options;
5697 }
5698
5699 /*
5700 * Resolve all paths received as arguments, configuration option, or
5701 * through environment variable as absolute paths. This is necessary
5702 * since daemonizing causes the sessiond's current working directory
5703 * to '/'.
5704 */
5705 ret = sessiond_config_resolve_paths(&config);
5706 if (ret) {
5707 goto exit_options;
5708 }
5709
5710 /* Apply config. */
5711 lttng_opt_verbose = config.verbose;
5712 lttng_opt_quiet = config.quiet;
5713 kconsumer_data.err_unix_sock_path =
5714 config.kconsumerd_err_unix_sock_path.value;
5715 kconsumer_data.cmd_unix_sock_path =
5716 config.kconsumerd_cmd_unix_sock_path.value;
5717 ustconsumer32_data.err_unix_sock_path =
5718 config.consumerd32_err_unix_sock_path.value;
5719 ustconsumer32_data.cmd_unix_sock_path =
5720 config.consumerd32_cmd_unix_sock_path.value;
5721 ustconsumer64_data.err_unix_sock_path =
5722 config.consumerd64_err_unix_sock_path.value;
5723 ustconsumer64_data.cmd_unix_sock_path =
5724 config.consumerd64_cmd_unix_sock_path.value;
5725 set_clock_plugin_env();
5726
5727 sessiond_config_log(&config);
5728
5729 if (create_lttng_rundir()) {
5730 retval = -1;
5731 goto exit_options;
5732 }
5733
5734 /* Abort launch if a session daemon is already running. */
5735 if (check_existing_daemon()) {
5736 ERR("A session daemon is already running.");
5737 retval = -1;
5738 goto exit_options;
5739 }
5740
5741 /* Daemonize */
5742 if (config.daemonize || config.background) {
5743 int i;
5744
5745 ret = lttng_daemonize(&child_ppid, &recv_child_signal,
5746 !config.background);
5747 if (ret < 0) {
5748 retval = -1;
5749 goto exit_options;
5750 }
5751
5752 /*
5753 * We are in the child. Make sure all other file descriptors are
5754 * closed, in case we are called with more opened file
5755 * descriptors than the standard ones and the lock file.
5756 */
5757 for (i = 3; i < sysconf(_SC_OPEN_MAX); i++) {
5758 if (i == lockfile_fd) {
5759 continue;
5760 }
5761 (void) close(i);
5762 }
5763 }
5764
5765 if (run_as_create_worker(argv[0]) < 0) {
5766 goto exit_create_run_as_worker_cleanup;
5767 }
5768
5769 /*
5770 * Starting from here, we can create threads. This needs to be after
5771 * lttng_daemonize due to RCU.
5772 */
5773
5774 /*
5775 * Initialize the health check subsystem. This call should set the
5776 * appropriate time values.
5777 */
5778 health_sessiond = health_app_create(NR_HEALTH_SESSIOND_TYPES);
5779 if (!health_sessiond) {
5780 PERROR("health_app_create error");
5781 retval = -1;
5782 goto exit_health_sessiond_cleanup;
5783 }
5784
5785 /* Create thread to clean up RCU hash tables */
5786 if (init_ht_cleanup_thread(&ht_cleanup_thread)) {
5787 retval = -1;
5788 goto exit_ht_cleanup;
5789 }
5790
5791 /* Create thread quit pipe */
5792 if (sessiond_init_thread_quit_pipe()) {
5793 retval = -1;
5794 goto exit_init_data;
5795 }
5796
5797 /* Check if daemon is UID = 0 */
5798 is_root = !getuid();
5799 if (is_root) {
5800 /* Create global run dir with root access */
5801
5802 kernel_channel_monitor_pipe = lttng_pipe_open(0);
5803 if (!kernel_channel_monitor_pipe) {
5804 ERR("Failed to create kernel consumer channel monitor pipe");
5805 retval = -1;
5806 goto exit_init_data;
5807 }
5808 kconsumer_data.channel_monitor_pipe =
5809 lttng_pipe_release_writefd(
5810 kernel_channel_monitor_pipe);
5811 if (kconsumer_data.channel_monitor_pipe < 0) {
5812 retval = -1;
5813 goto exit_init_data;
5814 }
5815 }
5816
5817 /* Set consumer initial state */
5818 kernel_consumerd_state = CONSUMER_STOPPED;
5819 ust_consumerd_state = CONSUMER_STOPPED;
5820
5821 ust32_channel_monitor_pipe = lttng_pipe_open(0);
5822 if (!ust32_channel_monitor_pipe) {
5823 ERR("Failed to create 32-bit user space consumer channel monitor pipe");
5824 retval = -1;
5825 goto exit_init_data;
5826 }
5827 ustconsumer32_data.channel_monitor_pipe = lttng_pipe_release_writefd(
5828 ust32_channel_monitor_pipe);
5829 if (ustconsumer32_data.channel_monitor_pipe < 0) {
5830 retval = -1;
5831 goto exit_init_data;
5832 }
5833
5834 /*
5835 * The rotation_thread_timer_queue structure is shared between the
5836 * sessiond timer thread and the rotation thread. The main thread keeps
5837 * its ownership and destroys it when both threads have been joined.
5838 */
5839 rotation_timer_queue = rotation_thread_timer_queue_create();
5840 if (!rotation_timer_queue) {
5841 retval = -1;
5842 goto exit_init_data;
5843 }
5844 timer_thread_ctx.rotation_thread_job_queue = rotation_timer_queue;
5845
5846 ust64_channel_monitor_pipe = lttng_pipe_open(0);
5847 if (!ust64_channel_monitor_pipe) {
5848 ERR("Failed to create 64-bit user space consumer channel monitor pipe");
5849 retval = -1;
5850 goto exit_init_data;
5851 }
5852 ustconsumer64_data.channel_monitor_pipe = lttng_pipe_release_writefd(
5853 ust64_channel_monitor_pipe);
5854 if (ustconsumer64_data.channel_monitor_pipe < 0) {
5855 retval = -1;
5856 goto exit_init_data;
5857 }
5858
5859 /*
5860 * Init UST app hash table. Alloc hash table before this point since
5861 * cleanup() can get called after that point.
5862 */
5863 if (ust_app_ht_alloc()) {
5864 ERR("Failed to allocate UST app hash table");
5865 retval = -1;
5866 goto exit_init_data;
5867 }
5868
5869 /*
5870 * Initialize agent app hash table. We allocate the hash table here
5871 * since cleanup() can get called after this point.
5872 */
5873 if (agent_app_ht_alloc()) {
5874 ERR("Failed to allocate Agent app hash table");
5875 retval = -1;
5876 goto exit_init_data;
5877 }
5878
5879 /*
5880 * These actions must be executed as root. We do that *after* setting up
5881 * the sockets path because we MUST make the check for another daemon using
5882 * those paths *before* trying to set the kernel consumer sockets and init
5883 * kernel tracer.
5884 */
5885 if (is_root) {
5886 if (set_consumer_sockets(&kconsumer_data)) {
5887 retval = -1;
5888 goto exit_init_data;
5889 }
5890
5891 /* Setup kernel tracer */
5892 if (!config.no_kernel) {
5893 init_kernel_tracer();
5894 if (kernel_tracer_fd >= 0) {
5895 ret = syscall_init_table();
5896 if (ret < 0) {
5897 ERR("Unable to populate syscall table. "
5898 "Syscall tracing won't work "
5899 "for this session daemon.");
5900 }
5901 }
5902 }
5903
5904 /* Set ulimit for open files */
5905 set_ulimit();
5906 }
5907 /* init lttng_fd tracking must be done after set_ulimit. */
5908 lttng_fd_init();
5909
5910 if (set_consumer_sockets(&ustconsumer64_data)) {
5911 retval = -1;
5912 goto exit_init_data;
5913 }
5914
5915 if (set_consumer_sockets(&ustconsumer32_data)) {
5916 retval = -1;
5917 goto exit_init_data;
5918 }
5919
5920 /* Setup the needed unix socket */
5921 if (init_daemon_socket()) {
5922 retval = -1;
5923 goto exit_init_data;
5924 }
5925
5926 /* Set credentials to socket */
5927 if (is_root && set_permissions(config.rundir.value)) {
5928 retval = -1;
5929 goto exit_init_data;
5930 }
5931
5932 /* Get parent pid if -S, --sig-parent is specified. */
5933 if (config.sig_parent) {
5934 ppid = getppid();
5935 }
5936
5937 /* Setup the kernel pipe for waking up the kernel thread */
5938 if (is_root && !config.no_kernel) {
5939 if (utils_create_pipe_cloexec(kernel_poll_pipe)) {
5940 retval = -1;
5941 goto exit_init_data;
5942 }
5943 }
5944
5945 /* Setup the thread apps communication pipe. */
5946 if (utils_create_pipe_cloexec(apps_cmd_pipe)) {
5947 retval = -1;
5948 goto exit_init_data;
5949 }
5950
5951 /* Setup the thread apps notify communication pipe. */
5952 if (utils_create_pipe_cloexec(apps_cmd_notify_pipe)) {
5953 retval = -1;
5954 goto exit_init_data;
5955 }
5956
5957 /* Initialize global buffer per UID and PID registry. */
5958 buffer_reg_init_uid_registry();
5959 buffer_reg_init_pid_registry();
5960
5961 /* Init UST command queue. */
5962 cds_wfcq_init(&ust_cmd_queue.head, &ust_cmd_queue.tail);
5963
5964 /*
5965 * Get session list pointer. This pointer MUST NOT be free'd. This list
5966 * is statically declared in session.c
5967 */
5968 session_list_ptr = session_get_list();
5969
5970 cmd_init();
5971
5972 /* Check for the application socket timeout env variable. */
5973 env_app_timeout = getenv(DEFAULT_APP_SOCKET_TIMEOUT_ENV);
5974 if (env_app_timeout) {
5975 config.app_socket_timeout = atoi(env_app_timeout);
5976 } else {
5977 config.app_socket_timeout = DEFAULT_APP_SOCKET_RW_TIMEOUT;
5978 }
5979
5980 ret = write_pidfile();
5981 if (ret) {
5982 ERR("Error in write_pidfile");
5983 retval = -1;
5984 goto exit_init_data;
5985 }
5986
5987 /* Initialize communication library */
5988 lttcomm_init();
5989 /* Initialize TCP timeout values */
5990 lttcomm_inet_init();
5991
5992 if (load_session_init_data(&load_info) < 0) {
5993 retval = -1;
5994 goto exit_init_data;
5995 }
5996 load_info->path = config.load_session_path.value;
5997
5998 /* Create health-check thread. */
5999 ret = pthread_create(&health_thread, default_pthread_attr(),
6000 thread_manage_health, (void *) NULL);
6001 if (ret) {
6002 errno = ret;
6003 PERROR("pthread_create health");
6004 retval = -1;
6005 goto exit_health;
6006 }
6007
6008 /*
6009 * The rotation thread needs the notification thread to be ready before
6010 * creating the rotate_notification_channel, so we use this semaphore as
6011 * a rendez-vous point.
6012 */
6013 sem_init(&notification_thread_ready, 0, 0);
6014
6015 /* notification_thread_data acquires the pipes' read side. */
6016 notification_thread_handle = notification_thread_handle_create(
6017 ust32_channel_monitor_pipe,
6018 ust64_channel_monitor_pipe,
6019 kernel_channel_monitor_pipe,
6020 &notification_thread_ready);
6021 if (!notification_thread_handle) {
6022 retval = -1;
6023 ERR("Failed to create notification thread shared data");
6024 stop_threads();
6025 goto exit_notification;
6026 }
6027
6028 /* Create notification thread. */
6029 ret = pthread_create(&notification_thread, default_pthread_attr(),
6030 thread_notification, notification_thread_handle);
6031 if (ret) {
6032 errno = ret;
6033 PERROR("pthread_create notification");
6034 retval = -1;
6035 stop_threads();
6036 goto exit_notification;
6037 }
6038 notification_thread_launched = true;
6039
6040 /* Create timer thread. */
6041 ret = pthread_create(&timer_thread, default_pthread_attr(),
6042 timer_thread_func, &timer_thread_ctx);
6043 if (ret) {
6044 errno = ret;
6045 PERROR("pthread_create timer");
6046 retval = -1;
6047 stop_threads();
6048 goto exit_notification;
6049 }
6050 timer_thread_launched = true;
6051
6052 /* rotation_thread_data acquires the pipes' read side. */
6053 rotation_thread_handle = rotation_thread_handle_create(
6054 rotation_timer_queue,
6055 notification_thread_handle,
6056 &notification_thread_ready);
6057 if (!rotation_thread_handle) {
6058 retval = -1;
6059 ERR("Failed to create rotation thread shared data");
6060 stop_threads();
6061 goto exit_rotation;
6062 }
6063
6064 /* Create rotation thread. */
6065 ret = pthread_create(&rotation_thread, default_pthread_attr(),
6066 thread_rotation, rotation_thread_handle);
6067 if (ret) {
6068 errno = ret;
6069 PERROR("pthread_create rotation");
6070 retval = -1;
6071 stop_threads();
6072 goto exit_rotation;
6073 }
6074 rotation_thread_launched = true;
6075
6076 /* Create thread to manage the client socket */
6077 ret = pthread_create(&client_thread, default_pthread_attr(),
6078 thread_manage_clients, (void *) NULL);
6079 if (ret) {
6080 errno = ret;
6081 PERROR("pthread_create clients");
6082 retval = -1;
6083 stop_threads();
6084 goto exit_client;
6085 }
6086
6087 /* Create thread to dispatch registration */
6088 ret = pthread_create(&dispatch_thread, default_pthread_attr(),
6089 thread_dispatch_ust_registration, (void *) NULL);
6090 if (ret) {
6091 errno = ret;
6092 PERROR("pthread_create dispatch");
6093 retval = -1;
6094 stop_threads();
6095 goto exit_dispatch;
6096 }
6097
6098 /* Create thread to manage application registration. */
6099 ret = pthread_create(&reg_apps_thread, default_pthread_attr(),
6100 thread_registration_apps, (void *) NULL);
6101 if (ret) {
6102 errno = ret;
6103 PERROR("pthread_create registration");
6104 retval = -1;
6105 stop_threads();
6106 goto exit_reg_apps;
6107 }
6108
6109 /* Create thread to manage application socket */
6110 ret = pthread_create(&apps_thread, default_pthread_attr(),
6111 thread_manage_apps, (void *) NULL);
6112 if (ret) {
6113 errno = ret;
6114 PERROR("pthread_create apps");
6115 retval = -1;
6116 stop_threads();
6117 goto exit_apps;
6118 }
6119
6120 /* Create thread to manage application notify socket */
6121 ret = pthread_create(&apps_notify_thread, default_pthread_attr(),
6122 ust_thread_manage_notify, (void *) NULL);
6123 if (ret) {
6124 errno = ret;
6125 PERROR("pthread_create notify");
6126 retval = -1;
6127 stop_threads();
6128 goto exit_apps_notify;
6129 }
6130
6131 /* Create agent registration thread. */
6132 ret = pthread_create(&agent_reg_thread, default_pthread_attr(),
6133 agent_thread_manage_registration, (void *) NULL);
6134 if (ret) {
6135 errno = ret;
6136 PERROR("pthread_create agent");
6137 retval = -1;
6138 stop_threads();
6139 goto exit_agent_reg;
6140 }
6141
6142 /* Don't start this thread if kernel tracing is not requested nor root */
6143 if (is_root && !config.no_kernel) {
6144 /* Create kernel thread to manage kernel event */
6145 ret = pthread_create(&kernel_thread, default_pthread_attr(),
6146 thread_manage_kernel, (void *) NULL);
6147 if (ret) {
6148 errno = ret;
6149 PERROR("pthread_create kernel");
6150 retval = -1;
6151 stop_threads();
6152 goto exit_kernel;
6153 }
6154 }
6155
6156 /* Create session loading thread. */
6157 ret = pthread_create(&load_session_thread, default_pthread_attr(),
6158 thread_load_session, load_info);
6159 if (ret) {
6160 errno = ret;
6161 PERROR("pthread_create load_session_thread");
6162 retval = -1;
6163 stop_threads();
6164 goto exit_load_session;
6165 }
6166
6167 /*
6168 * This is where we start awaiting program completion (e.g. through
6169 * signal that asks threads to teardown).
6170 */
6171
6172 ret = pthread_join(load_session_thread, &status);
6173 if (ret) {
6174 errno = ret;
6175 PERROR("pthread_join load_session_thread");
6176 retval = -1;
6177 }
6178 exit_load_session:
6179
6180 if (is_root && !config.no_kernel) {
6181 ret = pthread_join(kernel_thread, &status);
6182 if (ret) {
6183 errno = ret;
6184 PERROR("pthread_join");
6185 retval = -1;
6186 }
6187 }
6188 exit_kernel:
6189
6190 ret = pthread_join(agent_reg_thread, &status);
6191 if (ret) {
6192 errno = ret;
6193 PERROR("pthread_join agent");
6194 retval = -1;
6195 }
6196 exit_agent_reg:
6197
6198 ret = pthread_join(apps_notify_thread, &status);
6199 if (ret) {
6200 errno = ret;
6201 PERROR("pthread_join apps notify");
6202 retval = -1;
6203 }
6204 exit_apps_notify:
6205
6206 ret = pthread_join(apps_thread, &status);
6207 if (ret) {
6208 errno = ret;
6209 PERROR("pthread_join apps");
6210 retval = -1;
6211 }
6212 exit_apps:
6213
6214 ret = pthread_join(reg_apps_thread, &status);
6215 if (ret) {
6216 errno = ret;
6217 PERROR("pthread_join");
6218 retval = -1;
6219 }
6220 exit_reg_apps:
6221
6222 /*
6223 * Join dispatch thread after joining reg_apps_thread to ensure
6224 * we don't leak applications in the queue.
6225 */
6226 ret = pthread_join(dispatch_thread, &status);
6227 if (ret) {
6228 errno = ret;
6229 PERROR("pthread_join");
6230 retval = -1;
6231 }
6232 exit_dispatch:
6233
6234 ret = pthread_join(client_thread, &status);
6235 if (ret) {
6236 errno = ret;
6237 PERROR("pthread_join");
6238 retval = -1;
6239 }
6240
6241 exit_client:
6242 exit_rotation:
6243 exit_notification:
6244 sem_destroy(&notification_thread_ready);
6245 ret = pthread_join(health_thread, &status);
6246 if (ret) {
6247 errno = ret;
6248 PERROR("pthread_join health thread");
6249 retval = -1;
6250 }
6251
6252 exit_health:
6253 exit_init_data:
6254 /*
6255 * Wait for all pending call_rcu work to complete before tearing
6256 * down data structures. call_rcu worker may be trying to
6257 * perform lookups in those structures.
6258 */
6259 rcu_barrier();
6260 /*
6261 * sessiond_cleanup() is called when no other thread is running, except
6262 * the ht_cleanup thread, which is needed to destroy the hash tables.
6263 */
6264 rcu_thread_online();
6265 sessiond_cleanup();
6266
6267 /*
6268 * Ensure all prior call_rcu are done. call_rcu callbacks may push
6269 * hash tables to the ht_cleanup thread. Therefore, we ensure that
6270 * the queue is empty before shutting down the clean-up thread.
6271 */
6272 rcu_barrier();
6273
6274 /*
6275 * The teardown of the notification system is performed after the
6276 * session daemon's teardown in order to allow it to be notified
6277 * of the active session and channels at the moment of the teardown.
6278 */
6279 if (notification_thread_handle) {
6280 if (notification_thread_launched) {
6281 notification_thread_command_quit(
6282 notification_thread_handle);
6283 ret = pthread_join(notification_thread, &status);
6284 if (ret) {
6285 errno = ret;
6286 PERROR("pthread_join notification thread");
6287 retval = -1;
6288 }
6289 }
6290 notification_thread_handle_destroy(notification_thread_handle);
6291 }
6292
6293 if (rotation_thread_handle) {
6294 if (rotation_thread_launched) {
6295 ret = pthread_join(rotation_thread, &status);
6296 if (ret) {
6297 errno = ret;
6298 PERROR("pthread_join rotation thread");
6299 retval = -1;
6300 }
6301 }
6302 rotation_thread_handle_destroy(rotation_thread_handle);
6303 }
6304
6305 if (timer_thread_launched) {
6306 timer_exit();
6307 ret = pthread_join(timer_thread, &status);
6308 if (ret) {
6309 errno = ret;
6310 PERROR("pthread_join timer thread");
6311 retval = -1;
6312 }
6313 }
6314
6315 /*
6316 * After the rotation and timer thread have quit, we can safely destroy
6317 * the rotation_timer_queue.
6318 */
6319 rotation_thread_timer_queue_destroy(rotation_timer_queue);
6320
6321 rcu_thread_offline();
6322 rcu_unregister_thread();
6323
6324 ret = fini_ht_cleanup_thread(&ht_cleanup_thread);
6325 if (ret) {
6326 retval = -1;
6327 }
6328 lttng_pipe_destroy(ust32_channel_monitor_pipe);
6329 lttng_pipe_destroy(ust64_channel_monitor_pipe);
6330 lttng_pipe_destroy(kernel_channel_monitor_pipe);
6331 exit_ht_cleanup:
6332
6333 health_app_destroy(health_sessiond);
6334 exit_health_sessiond_cleanup:
6335 exit_create_run_as_worker_cleanup:
6336
6337 exit_options:
6338 sessiond_cleanup_lock_file();
6339 sessiond_cleanup_options();
6340
6341 exit_set_signal_handler:
6342 if (!retval) {
6343 exit(EXIT_SUCCESS);
6344 } else {
6345 exit(EXIT_FAILURE);
6346 }
6347 }
This page took 0.231189 seconds and 3 git commands to generate.