Launch the client management thread using lttng_thread
[lttng-tools.git] / src / bin / lttng-sessiond / main.c
1 /*
2 * Copyright (C) 2011 - David Goulet <david.goulet@polymtl.ca>
3 * Mathieu Desnoyers <mathieu.desnoyers@efficios.com>
4 * 2013 - Jérémie Galarneau <jeremie.galarneau@efficios.com>
5 *
6 * This program is free software; you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License, version 2 only,
8 * as published by the Free Software Foundation.
9 *
10 * This program is distributed in the hope that it will be useful,
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 * GNU General Public License for more details.
14 *
15 * You should have received a copy of the GNU General Public License along
16 * with this program; if not, write to the Free Software Foundation, Inc.,
17 * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
18 */
19
20 #define _LGPL_SOURCE
21 #include <getopt.h>
22 #include <grp.h>
23 #include <limits.h>
24 #include <paths.h>
25 #include <pthread.h>
26 #include <signal.h>
27 #include <stdio.h>
28 #include <stdlib.h>
29 #include <string.h>
30 #include <inttypes.h>
31 #include <sys/mman.h>
32 #include <sys/mount.h>
33 #include <sys/resource.h>
34 #include <sys/socket.h>
35 #include <sys/stat.h>
36 #include <sys/types.h>
37 #include <sys/wait.h>
38 #include <urcu/uatomic.h>
39 #include <unistd.h>
40 #include <ctype.h>
41
42 #include <common/common.h>
43 #include <common/compat/socket.h>
44 #include <common/compat/getenv.h>
45 #include <common/defaults.h>
46 #include <common/kernel-consumer/kernel-consumer.h>
47 #include <common/futex.h>
48 #include <common/relayd/relayd.h>
49 #include <common/utils.h>
50 #include <common/daemonize.h>
51 #include <common/config/session-config.h>
52 #include <common/dynamic-buffer.h>
53 #include <lttng/event-internal.h>
54
55 #include "lttng-sessiond.h"
56 #include "buffer-registry.h"
57 #include "channel.h"
58 #include "cmd.h"
59 #include "consumer.h"
60 #include "context.h"
61 #include "event.h"
62 #include "kernel.h"
63 #include "kernel-consumer.h"
64 #include "modprobe.h"
65 #include "shm.h"
66 #include "ust-ctl.h"
67 #include "ust-consumer.h"
68 #include "utils.h"
69 #include "fd-limit.h"
70 #include "health-sessiond.h"
71 #include "testpoint.h"
72 #include "ust-thread.h"
73 #include "agent-thread.h"
74 #include "save.h"
75 #include "load-session-thread.h"
76 #include "notification-thread.h"
77 #include "notification-thread-commands.h"
78 #include "rotation-thread.h"
79 #include "lttng-syscall.h"
80 #include "agent.h"
81 #include "ht-cleanup.h"
82 #include "sessiond-config.h"
83 #include "timer.h"
84 #include "thread.h"
85 #include "client.h"
86
87 static const char *help_msg =
88 #ifdef LTTNG_EMBED_HELP
89 #include <lttng-sessiond.8.h>
90 #else
91 NULL
92 #endif
93 ;
94
95 const char *progname;
96 static int lockfile_fd = -1;
97
98 /* Set to 1 when a SIGUSR1 signal is received. */
99 static int recv_child_signal;
100
101 /* Command line options */
102 static const struct option long_options[] = {
103 { "client-sock", required_argument, 0, 'c' },
104 { "apps-sock", required_argument, 0, 'a' },
105 { "kconsumerd-cmd-sock", required_argument, 0, '\0' },
106 { "kconsumerd-err-sock", required_argument, 0, '\0' },
107 { "ustconsumerd32-cmd-sock", required_argument, 0, '\0' },
108 { "ustconsumerd32-err-sock", required_argument, 0, '\0' },
109 { "ustconsumerd64-cmd-sock", required_argument, 0, '\0' },
110 { "ustconsumerd64-err-sock", required_argument, 0, '\0' },
111 { "consumerd32-path", required_argument, 0, '\0' },
112 { "consumerd32-libdir", required_argument, 0, '\0' },
113 { "consumerd64-path", required_argument, 0, '\0' },
114 { "consumerd64-libdir", required_argument, 0, '\0' },
115 { "daemonize", no_argument, 0, 'd' },
116 { "background", no_argument, 0, 'b' },
117 { "sig-parent", no_argument, 0, 'S' },
118 { "help", no_argument, 0, 'h' },
119 { "group", required_argument, 0, 'g' },
120 { "version", no_argument, 0, 'V' },
121 { "quiet", no_argument, 0, 'q' },
122 { "verbose", no_argument, 0, 'v' },
123 { "verbose-consumer", no_argument, 0, '\0' },
124 { "no-kernel", no_argument, 0, '\0' },
125 { "pidfile", required_argument, 0, 'p' },
126 { "agent-tcp-port", required_argument, 0, '\0' },
127 { "config", required_argument, 0, 'f' },
128 { "load", required_argument, 0, 'l' },
129 { "kmod-probes", required_argument, 0, '\0' },
130 { "extra-kmod-probes", required_argument, 0, '\0' },
131 { NULL, 0, 0, 0 }
132 };
133
134 /* Command line options to ignore from configuration file */
135 static const char *config_ignore_options[] = { "help", "version", "config" };
136
137 /* Shared between threads */
138 static int dispatch_thread_exit;
139
140 static int apps_sock = -1;
141
142 /*
143 * This pipe is used to inform the thread managing application communication
144 * that a command is queued and ready to be processed.
145 */
146 static int apps_cmd_pipe[2] = { -1, -1 };
147
148 /* Pthread, Mutexes and Semaphores */
149 static pthread_t apps_thread;
150 static pthread_t apps_notify_thread;
151 static pthread_t reg_apps_thread;
152 static pthread_t kernel_thread;
153 static pthread_t dispatch_thread;
154 static pthread_t agent_reg_thread;
155 static pthread_t load_session_thread;
156
157 /*
158 * UST registration command queue. This queue is tied with a futex and uses a N
159 * wakers / 1 waiter implemented and detailed in futex.c/.h
160 *
161 * The thread_registration_apps and thread_dispatch_ust_registration uses this
162 * queue along with the wait/wake scheme. The thread_manage_apps receives down
163 * the line new application socket and monitors it for any I/O error or clean
164 * close that triggers an unregistration of the application.
165 */
166 static struct ust_cmd_queue ust_cmd_queue;
167
168 static const char *module_proc_lttng = "/proc/lttng";
169
170 /* Load session thread information to operate. */
171 static struct load_session_thread_data *load_info;
172
173 /*
174 * Section name to look for in the daemon configuration file.
175 */
176 static const char * const config_section_name = "sessiond";
177
178 /* Am I root or not. Set to 1 if the daemon is running as root */
179 static int is_root;
180
181 /*
182 * Stop all threads by closing the thread quit pipe.
183 */
184 static void stop_threads(void)
185 {
186 int ret;
187
188 /* Stopping all threads */
189 DBG("Terminating all threads");
190 ret = sessiond_notify_quit_pipe();
191 if (ret < 0) {
192 ERR("write error on thread quit pipe");
193 }
194
195 /* Dispatch thread */
196 CMM_STORE_SHARED(dispatch_thread_exit, 1);
197 futex_nto1_wake(&ust_cmd_queue.futex);
198 }
199
200 /*
201 * Close every consumer sockets.
202 */
203 static void close_consumer_sockets(void)
204 {
205 int ret;
206
207 if (kconsumer_data.err_sock >= 0) {
208 ret = close(kconsumer_data.err_sock);
209 if (ret < 0) {
210 PERROR("kernel consumer err_sock close");
211 }
212 }
213 if (ustconsumer32_data.err_sock >= 0) {
214 ret = close(ustconsumer32_data.err_sock);
215 if (ret < 0) {
216 PERROR("UST consumerd32 err_sock close");
217 }
218 }
219 if (ustconsumer64_data.err_sock >= 0) {
220 ret = close(ustconsumer64_data.err_sock);
221 if (ret < 0) {
222 PERROR("UST consumerd64 err_sock close");
223 }
224 }
225 if (kconsumer_data.cmd_sock >= 0) {
226 ret = close(kconsumer_data.cmd_sock);
227 if (ret < 0) {
228 PERROR("kernel consumer cmd_sock close");
229 }
230 }
231 if (ustconsumer32_data.cmd_sock >= 0) {
232 ret = close(ustconsumer32_data.cmd_sock);
233 if (ret < 0) {
234 PERROR("UST consumerd32 cmd_sock close");
235 }
236 }
237 if (ustconsumer64_data.cmd_sock >= 0) {
238 ret = close(ustconsumer64_data.cmd_sock);
239 if (ret < 0) {
240 PERROR("UST consumerd64 cmd_sock close");
241 }
242 }
243 if (kconsumer_data.channel_monitor_pipe >= 0) {
244 ret = close(kconsumer_data.channel_monitor_pipe);
245 if (ret < 0) {
246 PERROR("kernel consumer channel monitor pipe close");
247 }
248 }
249 if (ustconsumer32_data.channel_monitor_pipe >= 0) {
250 ret = close(ustconsumer32_data.channel_monitor_pipe);
251 if (ret < 0) {
252 PERROR("UST consumerd32 channel monitor pipe close");
253 }
254 }
255 if (ustconsumer64_data.channel_monitor_pipe >= 0) {
256 ret = close(ustconsumer64_data.channel_monitor_pipe);
257 if (ret < 0) {
258 PERROR("UST consumerd64 channel monitor pipe close");
259 }
260 }
261 }
262
263 /*
264 * Wait on consumer process termination.
265 *
266 * Need to be called with the consumer data lock held or from a context
267 * ensuring no concurrent access to data (e.g: cleanup).
268 */
269 static void wait_consumer(struct consumer_data *consumer_data)
270 {
271 pid_t ret;
272 int status;
273
274 if (consumer_data->pid <= 0) {
275 return;
276 }
277
278 DBG("Waiting for complete teardown of consumerd (PID: %d)",
279 consumer_data->pid);
280 ret = waitpid(consumer_data->pid, &status, 0);
281 if (ret == -1) {
282 PERROR("consumerd waitpid pid: %d", consumer_data->pid)
283 } else if (!WIFEXITED(status)) {
284 ERR("consumerd termination with error: %d",
285 WEXITSTATUS(ret));
286 }
287 consumer_data->pid = 0;
288 }
289
290 /*
291 * Cleanup the session daemon's data structures.
292 */
293 static void sessiond_cleanup(void)
294 {
295 int ret;
296 struct ltt_session_list *session_list = session_get_list();
297
298 DBG("Cleanup sessiond");
299
300 /*
301 * Close the thread quit pipe. It has already done its job,
302 * since we are now called.
303 */
304 sessiond_close_quit_pipe();
305
306 ret = remove(config.pid_file_path.value);
307 if (ret < 0) {
308 PERROR("remove pidfile %s", config.pid_file_path.value);
309 }
310
311 DBG("Removing sessiond and consumerd content of directory %s",
312 config.rundir.value);
313
314 /* sessiond */
315 DBG("Removing %s", config.pid_file_path.value);
316 (void) unlink(config.pid_file_path.value);
317
318 DBG("Removing %s", config.agent_port_file_path.value);
319 (void) unlink(config.agent_port_file_path.value);
320
321 /* kconsumerd */
322 DBG("Removing %s", kconsumer_data.err_unix_sock_path);
323 (void) unlink(kconsumer_data.err_unix_sock_path);
324
325 DBG("Removing directory %s", config.kconsumerd_path.value);
326 (void) rmdir(config.kconsumerd_path.value);
327
328 /* ust consumerd 32 */
329 DBG("Removing %s", config.consumerd32_err_unix_sock_path.value);
330 (void) unlink(config.consumerd32_err_unix_sock_path.value);
331
332 DBG("Removing directory %s", config.consumerd32_path.value);
333 (void) rmdir(config.consumerd32_path.value);
334
335 /* ust consumerd 64 */
336 DBG("Removing %s", config.consumerd64_err_unix_sock_path.value);
337 (void) unlink(config.consumerd64_err_unix_sock_path.value);
338
339 DBG("Removing directory %s", config.consumerd64_path.value);
340 (void) rmdir(config.consumerd64_path.value);
341
342 pthread_mutex_destroy(&session_list->lock);
343
344 wait_consumer(&kconsumer_data);
345 wait_consumer(&ustconsumer64_data);
346 wait_consumer(&ustconsumer32_data);
347
348 DBG("Cleaning up all agent apps");
349 agent_app_ht_clean();
350
351 DBG("Closing all UST sockets");
352 ust_app_clean_list();
353 buffer_reg_destroy_registries();
354
355 if (is_root && !config.no_kernel) {
356 DBG2("Closing kernel fd");
357 if (kernel_tracer_fd >= 0) {
358 ret = close(kernel_tracer_fd);
359 if (ret) {
360 PERROR("close");
361 }
362 }
363 DBG("Unloading kernel modules");
364 modprobe_remove_lttng_all();
365 free(syscall_table);
366 }
367
368 close_consumer_sockets();
369
370 if (load_info) {
371 load_session_destroy_data(load_info);
372 free(load_info);
373 }
374
375 /*
376 * We do NOT rmdir rundir because there are other processes
377 * using it, for instance lttng-relayd, which can start in
378 * parallel with this teardown.
379 */
380 }
381
382 /*
383 * Cleanup the daemon's option data structures.
384 */
385 static void sessiond_cleanup_options(void)
386 {
387 DBG("Cleaning up options");
388
389 sessiond_config_fini(&config);
390
391 run_as_destroy_worker();
392 }
393
394 /*
395 * Notify UST applications using the shm mmap futex.
396 */
397 static int notify_ust_apps(int active)
398 {
399 char *wait_shm_mmap;
400
401 DBG("Notifying applications of session daemon state: %d", active);
402
403 /* See shm.c for this call implying mmap, shm and futex calls */
404 wait_shm_mmap = shm_ust_get_mmap(config.wait_shm_path.value, is_root);
405 if (wait_shm_mmap == NULL) {
406 goto error;
407 }
408
409 /* Wake waiting process */
410 futex_wait_update((int32_t *) wait_shm_mmap, active);
411
412 /* Apps notified successfully */
413 return 0;
414
415 error:
416 return -1;
417 }
418
419 /*
420 * Update the kernel poll set of all channel fd available over all tracing
421 * session. Add the wakeup pipe at the end of the set.
422 */
423 static int update_kernel_poll(struct lttng_poll_event *events)
424 {
425 int ret;
426 struct ltt_kernel_channel *channel;
427 struct ltt_session *session;
428 const struct ltt_session_list *session_list = session_get_list();
429
430 DBG("Updating kernel poll set");
431
432 session_lock_list();
433 cds_list_for_each_entry(session, &session_list->head, list) {
434 if (!session_get(session)) {
435 continue;
436 }
437 session_lock(session);
438 if (session->kernel_session == NULL) {
439 session_unlock(session);
440 session_put(session);
441 continue;
442 }
443
444 cds_list_for_each_entry(channel,
445 &session->kernel_session->channel_list.head, list) {
446 /* Add channel fd to the kernel poll set */
447 ret = lttng_poll_add(events, channel->fd, LPOLLIN | LPOLLRDNORM);
448 if (ret < 0) {
449 session_unlock(session);
450 session_put(session);
451 goto error;
452 }
453 DBG("Channel fd %d added to kernel set", channel->fd);
454 }
455 session_unlock(session);
456 }
457 session_unlock_list();
458
459 return 0;
460
461 error:
462 session_unlock_list();
463 return -1;
464 }
465
466 /*
467 * Find the channel fd from 'fd' over all tracing session. When found, check
468 * for new channel stream and send those stream fds to the kernel consumer.
469 *
470 * Useful for CPU hotplug feature.
471 */
472 static int update_kernel_stream(int fd)
473 {
474 int ret = 0;
475 struct ltt_session *session;
476 struct ltt_kernel_session *ksess;
477 struct ltt_kernel_channel *channel;
478 const struct ltt_session_list *session_list = session_get_list();
479
480 DBG("Updating kernel streams for channel fd %d", fd);
481
482 session_lock_list();
483 cds_list_for_each_entry(session, &session_list->head, list) {
484 if (!session_get(session)) {
485 continue;
486 }
487 session_lock(session);
488 if (session->kernel_session == NULL) {
489 session_unlock(session);
490 session_put(session);
491 continue;
492 }
493 ksess = session->kernel_session;
494
495 cds_list_for_each_entry(channel,
496 &ksess->channel_list.head, list) {
497 struct lttng_ht_iter iter;
498 struct consumer_socket *socket;
499
500 if (channel->fd != fd) {
501 continue;
502 }
503 DBG("Channel found, updating kernel streams");
504 ret = kernel_open_channel_stream(channel);
505 if (ret < 0) {
506 goto error;
507 }
508 /* Update the stream global counter */
509 ksess->stream_count_global += ret;
510
511 /*
512 * Have we already sent fds to the consumer? If yes, it
513 * means that tracing is started so it is safe to send
514 * our updated stream fds.
515 */
516 if (ksess->consumer_fds_sent != 1
517 || ksess->consumer == NULL) {
518 ret = -1;
519 goto error;
520 }
521
522 rcu_read_lock();
523 cds_lfht_for_each_entry(ksess->consumer->socks->ht,
524 &iter.iter, socket, node.node) {
525 pthread_mutex_lock(socket->lock);
526 ret = kernel_consumer_send_channel_streams(socket,
527 channel, ksess,
528 session->output_traces ? 1 : 0);
529 pthread_mutex_unlock(socket->lock);
530 if (ret < 0) {
531 rcu_read_unlock();
532 goto error;
533 }
534 }
535 rcu_read_unlock();
536 }
537 session_unlock(session);
538 session_put(session);
539 }
540 session_unlock_list();
541 return ret;
542
543 error:
544 session_unlock(session);
545 session_put(session);
546 session_unlock_list();
547 return ret;
548 }
549
550 /*
551 * For each tracing session, update newly registered apps. The session list
552 * lock MUST be acquired before calling this.
553 */
554 static void update_ust_app(int app_sock)
555 {
556 struct ltt_session *sess, *stmp;
557 const struct ltt_session_list *session_list = session_get_list();
558
559 /* Consumer is in an ERROR state. Stop any application update. */
560 if (uatomic_read(&ust_consumerd_state) == CONSUMER_ERROR) {
561 /* Stop the update process since the consumer is dead. */
562 return;
563 }
564
565 /* For all tracing session(s) */
566 cds_list_for_each_entry_safe(sess, stmp, &session_list->head, list) {
567 struct ust_app *app;
568
569 if (!session_get(sess)) {
570 continue;
571 }
572 session_lock(sess);
573 if (!sess->ust_session) {
574 goto unlock_session;
575 }
576
577 rcu_read_lock();
578 assert(app_sock >= 0);
579 app = ust_app_find_by_sock(app_sock);
580 if (app == NULL) {
581 /*
582 * Application can be unregistered before so
583 * this is possible hence simply stopping the
584 * update.
585 */
586 DBG3("UST app update failed to find app sock %d",
587 app_sock);
588 goto unlock_rcu;
589 }
590 ust_app_global_update(sess->ust_session, app);
591 unlock_rcu:
592 rcu_read_unlock();
593 unlock_session:
594 session_unlock(sess);
595 session_put(sess);
596 }
597 }
598
599 /*
600 * This thread manage event coming from the kernel.
601 *
602 * Features supported in this thread:
603 * -) CPU Hotplug
604 */
605 static void *thread_manage_kernel(void *data)
606 {
607 int ret, i, pollfd, update_poll_flag = 1, err = -1;
608 uint32_t revents, nb_fd;
609 char tmp;
610 struct lttng_poll_event events;
611
612 DBG("[thread] Thread manage kernel started");
613
614 health_register(health_sessiond, HEALTH_SESSIOND_TYPE_KERNEL);
615
616 /*
617 * This first step of the while is to clean this structure which could free
618 * non NULL pointers so initialize it before the loop.
619 */
620 lttng_poll_init(&events);
621
622 if (testpoint(sessiond_thread_manage_kernel)) {
623 goto error_testpoint;
624 }
625
626 health_code_update();
627
628 if (testpoint(sessiond_thread_manage_kernel_before_loop)) {
629 goto error_testpoint;
630 }
631
632 while (1) {
633 health_code_update();
634
635 if (update_poll_flag == 1) {
636 /* Clean events object. We are about to populate it again. */
637 lttng_poll_clean(&events);
638
639 ret = sessiond_set_thread_pollset(&events, 2);
640 if (ret < 0) {
641 goto error_poll_create;
642 }
643
644 ret = lttng_poll_add(&events, kernel_poll_pipe[0], LPOLLIN);
645 if (ret < 0) {
646 goto error;
647 }
648
649 /* This will add the available kernel channel if any. */
650 ret = update_kernel_poll(&events);
651 if (ret < 0) {
652 goto error;
653 }
654 update_poll_flag = 0;
655 }
656
657 DBG("Thread kernel polling");
658
659 /* Poll infinite value of time */
660 restart:
661 health_poll_entry();
662 ret = lttng_poll_wait(&events, -1);
663 DBG("Thread kernel return from poll on %d fds",
664 LTTNG_POLL_GETNB(&events));
665 health_poll_exit();
666 if (ret < 0) {
667 /*
668 * Restart interrupted system call.
669 */
670 if (errno == EINTR) {
671 goto restart;
672 }
673 goto error;
674 } else if (ret == 0) {
675 /* Should not happen since timeout is infinite */
676 ERR("Return value of poll is 0 with an infinite timeout.\n"
677 "This should not have happened! Continuing...");
678 continue;
679 }
680
681 nb_fd = ret;
682
683 for (i = 0; i < nb_fd; i++) {
684 /* Fetch once the poll data */
685 revents = LTTNG_POLL_GETEV(&events, i);
686 pollfd = LTTNG_POLL_GETFD(&events, i);
687
688 health_code_update();
689
690 if (!revents) {
691 /* No activity for this FD (poll implementation). */
692 continue;
693 }
694
695 /* Thread quit pipe has been closed. Killing thread. */
696 ret = sessiond_check_thread_quit_pipe(pollfd, revents);
697 if (ret) {
698 err = 0;
699 goto exit;
700 }
701
702 /* Check for data on kernel pipe */
703 if (revents & LPOLLIN) {
704 if (pollfd == kernel_poll_pipe[0]) {
705 (void) lttng_read(kernel_poll_pipe[0],
706 &tmp, 1);
707 /*
708 * Ret value is useless here, if this pipe gets any actions an
709 * update is required anyway.
710 */
711 update_poll_flag = 1;
712 continue;
713 } else {
714 /*
715 * New CPU detected by the kernel. Adding kernel stream to
716 * kernel session and updating the kernel consumer
717 */
718 ret = update_kernel_stream(pollfd);
719 if (ret < 0) {
720 continue;
721 }
722 break;
723 }
724 } else if (revents & (LPOLLERR | LPOLLHUP | LPOLLRDHUP)) {
725 update_poll_flag = 1;
726 continue;
727 } else {
728 ERR("Unexpected poll events %u for sock %d", revents, pollfd);
729 goto error;
730 }
731 }
732 }
733
734 exit:
735 error:
736 lttng_poll_clean(&events);
737 error_poll_create:
738 error_testpoint:
739 utils_close_pipe(kernel_poll_pipe);
740 kernel_poll_pipe[0] = kernel_poll_pipe[1] = -1;
741 if (err) {
742 health_error();
743 ERR("Health error occurred in %s", __func__);
744 WARN("Kernel thread died unexpectedly. "
745 "Kernel tracing can continue but CPU hotplug is disabled.");
746 }
747 health_unregister(health_sessiond);
748 DBG("Kernel thread dying");
749 return NULL;
750 }
751
752 /*
753 * Signal pthread condition of the consumer data that the thread.
754 */
755 static void signal_consumer_condition(struct consumer_data *data, int state)
756 {
757 pthread_mutex_lock(&data->cond_mutex);
758
759 /*
760 * The state is set before signaling. It can be any value, it's the waiter
761 * job to correctly interpret this condition variable associated to the
762 * consumer pthread_cond.
763 *
764 * A value of 0 means that the corresponding thread of the consumer data
765 * was not started. 1 indicates that the thread has started and is ready
766 * for action. A negative value means that there was an error during the
767 * thread bootstrap.
768 */
769 data->consumer_thread_is_ready = state;
770 (void) pthread_cond_signal(&data->cond);
771
772 pthread_mutex_unlock(&data->cond_mutex);
773 }
774
775 /*
776 * This thread manage the consumer error sent back to the session daemon.
777 */
778 void *thread_manage_consumer(void *data)
779 {
780 int sock = -1, i, ret, pollfd, err = -1, should_quit = 0;
781 uint32_t revents, nb_fd;
782 enum lttcomm_return_code code;
783 struct lttng_poll_event events;
784 struct consumer_data *consumer_data = data;
785 struct consumer_socket *cmd_socket_wrapper = NULL;
786
787 DBG("[thread] Manage consumer started");
788
789 rcu_register_thread();
790 rcu_thread_online();
791
792 health_register(health_sessiond, HEALTH_SESSIOND_TYPE_CONSUMER);
793
794 health_code_update();
795
796 /*
797 * Pass 3 as size here for the thread quit pipe, consumerd_err_sock and the
798 * metadata_sock. Nothing more will be added to this poll set.
799 */
800 ret = sessiond_set_thread_pollset(&events, 3);
801 if (ret < 0) {
802 goto error_poll;
803 }
804
805 /*
806 * The error socket here is already in a listening state which was done
807 * just before spawning this thread to avoid a race between the consumer
808 * daemon exec trying to connect and the listen() call.
809 */
810 ret = lttng_poll_add(&events, consumer_data->err_sock, LPOLLIN | LPOLLRDHUP);
811 if (ret < 0) {
812 goto error;
813 }
814
815 health_code_update();
816
817 /* Infinite blocking call, waiting for transmission */
818 restart:
819 health_poll_entry();
820
821 if (testpoint(sessiond_thread_manage_consumer)) {
822 goto error;
823 }
824
825 ret = lttng_poll_wait(&events, -1);
826 health_poll_exit();
827 if (ret < 0) {
828 /*
829 * Restart interrupted system call.
830 */
831 if (errno == EINTR) {
832 goto restart;
833 }
834 goto error;
835 }
836
837 nb_fd = ret;
838
839 for (i = 0; i < nb_fd; i++) {
840 /* Fetch once the poll data */
841 revents = LTTNG_POLL_GETEV(&events, i);
842 pollfd = LTTNG_POLL_GETFD(&events, i);
843
844 health_code_update();
845
846 if (!revents) {
847 /* No activity for this FD (poll implementation). */
848 continue;
849 }
850
851 /* Thread quit pipe has been closed. Killing thread. */
852 ret = sessiond_check_thread_quit_pipe(pollfd, revents);
853 if (ret) {
854 err = 0;
855 goto exit;
856 }
857
858 /* Event on the registration socket */
859 if (pollfd == consumer_data->err_sock) {
860 if (revents & LPOLLIN) {
861 continue;
862 } else if (revents & (LPOLLERR | LPOLLHUP | LPOLLRDHUP)) {
863 ERR("consumer err socket poll error");
864 goto error;
865 } else {
866 ERR("Unexpected poll events %u for sock %d", revents, pollfd);
867 goto error;
868 }
869 }
870 }
871
872 sock = lttcomm_accept_unix_sock(consumer_data->err_sock);
873 if (sock < 0) {
874 goto error;
875 }
876
877 /*
878 * Set the CLOEXEC flag. Return code is useless because either way, the
879 * show must go on.
880 */
881 (void) utils_set_fd_cloexec(sock);
882
883 health_code_update();
884
885 DBG2("Receiving code from consumer err_sock");
886
887 /* Getting status code from kconsumerd */
888 ret = lttcomm_recv_unix_sock(sock, &code,
889 sizeof(enum lttcomm_return_code));
890 if (ret <= 0) {
891 goto error;
892 }
893
894 health_code_update();
895 if (code != LTTCOMM_CONSUMERD_COMMAND_SOCK_READY) {
896 ERR("consumer error when waiting for SOCK_READY : %s",
897 lttcomm_get_readable_code(-code));
898 goto error;
899 }
900
901 /* Connect both command and metadata sockets. */
902 consumer_data->cmd_sock =
903 lttcomm_connect_unix_sock(
904 consumer_data->cmd_unix_sock_path);
905 consumer_data->metadata_fd =
906 lttcomm_connect_unix_sock(
907 consumer_data->cmd_unix_sock_path);
908 if (consumer_data->cmd_sock < 0 || consumer_data->metadata_fd < 0) {
909 PERROR("consumer connect cmd socket");
910 /* On error, signal condition and quit. */
911 signal_consumer_condition(consumer_data, -1);
912 goto error;
913 }
914
915 consumer_data->metadata_sock.fd_ptr = &consumer_data->metadata_fd;
916
917 /* Create metadata socket lock. */
918 consumer_data->metadata_sock.lock = zmalloc(sizeof(pthread_mutex_t));
919 if (consumer_data->metadata_sock.lock == NULL) {
920 PERROR("zmalloc pthread mutex");
921 goto error;
922 }
923 pthread_mutex_init(consumer_data->metadata_sock.lock, NULL);
924
925 DBG("Consumer command socket ready (fd: %d", consumer_data->cmd_sock);
926 DBG("Consumer metadata socket ready (fd: %d)",
927 consumer_data->metadata_fd);
928
929 /*
930 * Remove the consumerd error sock since we've established a connection.
931 */
932 ret = lttng_poll_del(&events, consumer_data->err_sock);
933 if (ret < 0) {
934 goto error;
935 }
936
937 /* Add new accepted error socket. */
938 ret = lttng_poll_add(&events, sock, LPOLLIN | LPOLLRDHUP);
939 if (ret < 0) {
940 goto error;
941 }
942
943 /* Add metadata socket that is successfully connected. */
944 ret = lttng_poll_add(&events, consumer_data->metadata_fd,
945 LPOLLIN | LPOLLRDHUP);
946 if (ret < 0) {
947 goto error;
948 }
949
950 health_code_update();
951
952 /*
953 * Transfer the write-end of the channel monitoring and rotate pipe
954 * to the consumer by issuing a SET_CHANNEL_MONITOR_PIPE command.
955 */
956 cmd_socket_wrapper = consumer_allocate_socket(&consumer_data->cmd_sock);
957 if (!cmd_socket_wrapper) {
958 goto error;
959 }
960 cmd_socket_wrapper->lock = &consumer_data->lock;
961
962 ret = consumer_send_channel_monitor_pipe(cmd_socket_wrapper,
963 consumer_data->channel_monitor_pipe);
964 if (ret) {
965 goto error;
966 }
967
968 /* Discard the socket wrapper as it is no longer needed. */
969 consumer_destroy_socket(cmd_socket_wrapper);
970 cmd_socket_wrapper = NULL;
971
972 /* The thread is completely initialized, signal that it is ready. */
973 signal_consumer_condition(consumer_data, 1);
974
975 /* Infinite blocking call, waiting for transmission */
976 restart_poll:
977 while (1) {
978 health_code_update();
979
980 /* Exit the thread because the thread quit pipe has been triggered. */
981 if (should_quit) {
982 /* Not a health error. */
983 err = 0;
984 goto exit;
985 }
986
987 health_poll_entry();
988 ret = lttng_poll_wait(&events, -1);
989 health_poll_exit();
990 if (ret < 0) {
991 /*
992 * Restart interrupted system call.
993 */
994 if (errno == EINTR) {
995 goto restart_poll;
996 }
997 goto error;
998 }
999
1000 nb_fd = ret;
1001
1002 for (i = 0; i < nb_fd; i++) {
1003 /* Fetch once the poll data */
1004 revents = LTTNG_POLL_GETEV(&events, i);
1005 pollfd = LTTNG_POLL_GETFD(&events, i);
1006
1007 health_code_update();
1008
1009 if (!revents) {
1010 /* No activity for this FD (poll implementation). */
1011 continue;
1012 }
1013
1014 /*
1015 * Thread quit pipe has been triggered, flag that we should stop
1016 * but continue the current loop to handle potential data from
1017 * consumer.
1018 */
1019 should_quit = sessiond_check_thread_quit_pipe(pollfd, revents);
1020
1021 if (pollfd == sock) {
1022 /* Event on the consumerd socket */
1023 if (revents & (LPOLLERR | LPOLLHUP | LPOLLRDHUP)
1024 && !(revents & LPOLLIN)) {
1025 ERR("consumer err socket second poll error");
1026 goto error;
1027 }
1028 health_code_update();
1029 /* Wait for any kconsumerd error */
1030 ret = lttcomm_recv_unix_sock(sock, &code,
1031 sizeof(enum lttcomm_return_code));
1032 if (ret <= 0) {
1033 ERR("consumer closed the command socket");
1034 goto error;
1035 }
1036
1037 ERR("consumer return code : %s",
1038 lttcomm_get_readable_code(-code));
1039
1040 goto exit;
1041 } else if (pollfd == consumer_data->metadata_fd) {
1042 if (revents & (LPOLLERR | LPOLLHUP | LPOLLRDHUP)
1043 && !(revents & LPOLLIN)) {
1044 ERR("consumer err metadata socket second poll error");
1045 goto error;
1046 }
1047 /* UST metadata requests */
1048 ret = ust_consumer_metadata_request(
1049 &consumer_data->metadata_sock);
1050 if (ret < 0) {
1051 ERR("Handling metadata request");
1052 goto error;
1053 }
1054 }
1055 /* No need for an else branch all FDs are tested prior. */
1056 }
1057 health_code_update();
1058 }
1059
1060 exit:
1061 error:
1062 /*
1063 * We lock here because we are about to close the sockets and some other
1064 * thread might be using them so get exclusive access which will abort all
1065 * other consumer command by other threads.
1066 */
1067 pthread_mutex_lock(&consumer_data->lock);
1068
1069 /* Immediately set the consumerd state to stopped */
1070 if (consumer_data->type == LTTNG_CONSUMER_KERNEL) {
1071 uatomic_set(&kernel_consumerd_state, CONSUMER_ERROR);
1072 } else if (consumer_data->type == LTTNG_CONSUMER64_UST ||
1073 consumer_data->type == LTTNG_CONSUMER32_UST) {
1074 uatomic_set(&ust_consumerd_state, CONSUMER_ERROR);
1075 } else {
1076 /* Code flow error... */
1077 assert(0);
1078 }
1079
1080 if (consumer_data->err_sock >= 0) {
1081 ret = close(consumer_data->err_sock);
1082 if (ret) {
1083 PERROR("close");
1084 }
1085 consumer_data->err_sock = -1;
1086 }
1087 if (consumer_data->cmd_sock >= 0) {
1088 ret = close(consumer_data->cmd_sock);
1089 if (ret) {
1090 PERROR("close");
1091 }
1092 consumer_data->cmd_sock = -1;
1093 }
1094 if (consumer_data->metadata_sock.fd_ptr &&
1095 *consumer_data->metadata_sock.fd_ptr >= 0) {
1096 ret = close(*consumer_data->metadata_sock.fd_ptr);
1097 if (ret) {
1098 PERROR("close");
1099 }
1100 }
1101 if (sock >= 0) {
1102 ret = close(sock);
1103 if (ret) {
1104 PERROR("close");
1105 }
1106 }
1107
1108 unlink(consumer_data->err_unix_sock_path);
1109 unlink(consumer_data->cmd_unix_sock_path);
1110 pthread_mutex_unlock(&consumer_data->lock);
1111
1112 /* Cleanup metadata socket mutex. */
1113 if (consumer_data->metadata_sock.lock) {
1114 pthread_mutex_destroy(consumer_data->metadata_sock.lock);
1115 free(consumer_data->metadata_sock.lock);
1116 }
1117 lttng_poll_clean(&events);
1118
1119 if (cmd_socket_wrapper) {
1120 consumer_destroy_socket(cmd_socket_wrapper);
1121 }
1122 error_poll:
1123 if (err) {
1124 health_error();
1125 ERR("Health error occurred in %s", __func__);
1126 }
1127 health_unregister(health_sessiond);
1128 DBG("consumer thread cleanup completed");
1129
1130 rcu_thread_offline();
1131 rcu_unregister_thread();
1132
1133 return NULL;
1134 }
1135
1136 /*
1137 * This thread receives application command sockets (FDs) on the
1138 * apps_cmd_pipe and waits (polls) on them until they are closed
1139 * or an error occurs.
1140 *
1141 * At that point, it flushes the data (tracing and metadata) associated
1142 * with this application and tears down ust app sessions and other
1143 * associated data structures through ust_app_unregister().
1144 *
1145 * Note that this thread never sends commands to the applications
1146 * through the command sockets; it merely listens for hang-ups
1147 * and errors on those sockets and cleans-up as they occur.
1148 */
1149 static void *thread_manage_apps(void *data)
1150 {
1151 int i, ret, pollfd, err = -1;
1152 ssize_t size_ret;
1153 uint32_t revents, nb_fd;
1154 struct lttng_poll_event events;
1155
1156 DBG("[thread] Manage application started");
1157
1158 rcu_register_thread();
1159 rcu_thread_online();
1160
1161 health_register(health_sessiond, HEALTH_SESSIOND_TYPE_APP_MANAGE);
1162
1163 if (testpoint(sessiond_thread_manage_apps)) {
1164 goto error_testpoint;
1165 }
1166
1167 health_code_update();
1168
1169 ret = sessiond_set_thread_pollset(&events, 2);
1170 if (ret < 0) {
1171 goto error_poll_create;
1172 }
1173
1174 ret = lttng_poll_add(&events, apps_cmd_pipe[0], LPOLLIN | LPOLLRDHUP);
1175 if (ret < 0) {
1176 goto error;
1177 }
1178
1179 if (testpoint(sessiond_thread_manage_apps_before_loop)) {
1180 goto error;
1181 }
1182
1183 health_code_update();
1184
1185 while (1) {
1186 DBG("Apps thread polling");
1187
1188 /* Inifinite blocking call, waiting for transmission */
1189 restart:
1190 health_poll_entry();
1191 ret = lttng_poll_wait(&events, -1);
1192 DBG("Apps thread return from poll on %d fds",
1193 LTTNG_POLL_GETNB(&events));
1194 health_poll_exit();
1195 if (ret < 0) {
1196 /*
1197 * Restart interrupted system call.
1198 */
1199 if (errno == EINTR) {
1200 goto restart;
1201 }
1202 goto error;
1203 }
1204
1205 nb_fd = ret;
1206
1207 for (i = 0; i < nb_fd; i++) {
1208 /* Fetch once the poll data */
1209 revents = LTTNG_POLL_GETEV(&events, i);
1210 pollfd = LTTNG_POLL_GETFD(&events, i);
1211
1212 health_code_update();
1213
1214 if (!revents) {
1215 /* No activity for this FD (poll implementation). */
1216 continue;
1217 }
1218
1219 /* Thread quit pipe has been closed. Killing thread. */
1220 ret = sessiond_check_thread_quit_pipe(pollfd, revents);
1221 if (ret) {
1222 err = 0;
1223 goto exit;
1224 }
1225
1226 /* Inspect the apps cmd pipe */
1227 if (pollfd == apps_cmd_pipe[0]) {
1228 if (revents & LPOLLIN) {
1229 int sock;
1230
1231 /* Empty pipe */
1232 size_ret = lttng_read(apps_cmd_pipe[0], &sock, sizeof(sock));
1233 if (size_ret < sizeof(sock)) {
1234 PERROR("read apps cmd pipe");
1235 goto error;
1236 }
1237
1238 health_code_update();
1239
1240 /*
1241 * Since this is a command socket (write then read),
1242 * we only monitor the error events of the socket.
1243 */
1244 ret = lttng_poll_add(&events, sock,
1245 LPOLLERR | LPOLLHUP | LPOLLRDHUP);
1246 if (ret < 0) {
1247 goto error;
1248 }
1249
1250 DBG("Apps with sock %d added to poll set", sock);
1251 } else if (revents & (LPOLLERR | LPOLLHUP | LPOLLRDHUP)) {
1252 ERR("Apps command pipe error");
1253 goto error;
1254 } else {
1255 ERR("Unknown poll events %u for sock %d", revents, pollfd);
1256 goto error;
1257 }
1258 } else {
1259 /*
1260 * At this point, we know that a registered application made
1261 * the event at poll_wait.
1262 */
1263 if (revents & (LPOLLERR | LPOLLHUP | LPOLLRDHUP)) {
1264 /* Removing from the poll set */
1265 ret = lttng_poll_del(&events, pollfd);
1266 if (ret < 0) {
1267 goto error;
1268 }
1269
1270 /* Socket closed on remote end. */
1271 ust_app_unregister(pollfd);
1272 } else {
1273 ERR("Unexpected poll events %u for sock %d", revents, pollfd);
1274 goto error;
1275 }
1276 }
1277
1278 health_code_update();
1279 }
1280 }
1281
1282 exit:
1283 error:
1284 lttng_poll_clean(&events);
1285 error_poll_create:
1286 error_testpoint:
1287 utils_close_pipe(apps_cmd_pipe);
1288 apps_cmd_pipe[0] = apps_cmd_pipe[1] = -1;
1289
1290 /*
1291 * We don't clean the UST app hash table here since already registered
1292 * applications can still be controlled so let them be until the session
1293 * daemon dies or the applications stop.
1294 */
1295
1296 if (err) {
1297 health_error();
1298 ERR("Health error occurred in %s", __func__);
1299 }
1300 health_unregister(health_sessiond);
1301 DBG("Application communication apps thread cleanup complete");
1302 rcu_thread_offline();
1303 rcu_unregister_thread();
1304 return NULL;
1305 }
1306
1307 /*
1308 * Send a socket to a thread This is called from the dispatch UST registration
1309 * thread once all sockets are set for the application.
1310 *
1311 * The sock value can be invalid, we don't really care, the thread will handle
1312 * it and make the necessary cleanup if so.
1313 *
1314 * On success, return 0 else a negative value being the errno message of the
1315 * write().
1316 */
1317 static int send_socket_to_thread(int fd, int sock)
1318 {
1319 ssize_t ret;
1320
1321 /*
1322 * It's possible that the FD is set as invalid with -1 concurrently just
1323 * before calling this function being a shutdown state of the thread.
1324 */
1325 if (fd < 0) {
1326 ret = -EBADF;
1327 goto error;
1328 }
1329
1330 ret = lttng_write(fd, &sock, sizeof(sock));
1331 if (ret < sizeof(sock)) {
1332 PERROR("write apps pipe %d", fd);
1333 if (ret < 0) {
1334 ret = -errno;
1335 }
1336 goto error;
1337 }
1338
1339 /* All good. Don't send back the write positive ret value. */
1340 ret = 0;
1341 error:
1342 return (int) ret;
1343 }
1344
1345 /*
1346 * Sanitize the wait queue of the dispatch registration thread meaning removing
1347 * invalid nodes from it. This is to avoid memory leaks for the case the UST
1348 * notify socket is never received.
1349 */
1350 static void sanitize_wait_queue(struct ust_reg_wait_queue *wait_queue)
1351 {
1352 int ret, nb_fd = 0, i;
1353 unsigned int fd_added = 0;
1354 struct lttng_poll_event events;
1355 struct ust_reg_wait_node *wait_node = NULL, *tmp_wait_node;
1356
1357 assert(wait_queue);
1358
1359 lttng_poll_init(&events);
1360
1361 /* Just skip everything for an empty queue. */
1362 if (!wait_queue->count) {
1363 goto end;
1364 }
1365
1366 ret = lttng_poll_create(&events, wait_queue->count, LTTNG_CLOEXEC);
1367 if (ret < 0) {
1368 goto error_create;
1369 }
1370
1371 cds_list_for_each_entry_safe(wait_node, tmp_wait_node,
1372 &wait_queue->head, head) {
1373 assert(wait_node->app);
1374 ret = lttng_poll_add(&events, wait_node->app->sock,
1375 LPOLLHUP | LPOLLERR);
1376 if (ret < 0) {
1377 goto error;
1378 }
1379
1380 fd_added = 1;
1381 }
1382
1383 if (!fd_added) {
1384 goto end;
1385 }
1386
1387 /*
1388 * Poll but don't block so we can quickly identify the faulty events and
1389 * clean them afterwards from the wait queue.
1390 */
1391 ret = lttng_poll_wait(&events, 0);
1392 if (ret < 0) {
1393 goto error;
1394 }
1395 nb_fd = ret;
1396
1397 for (i = 0; i < nb_fd; i++) {
1398 /* Get faulty FD. */
1399 uint32_t revents = LTTNG_POLL_GETEV(&events, i);
1400 int pollfd = LTTNG_POLL_GETFD(&events, i);
1401
1402 if (!revents) {
1403 /* No activity for this FD (poll implementation). */
1404 continue;
1405 }
1406
1407 cds_list_for_each_entry_safe(wait_node, tmp_wait_node,
1408 &wait_queue->head, head) {
1409 if (pollfd == wait_node->app->sock &&
1410 (revents & (LPOLLHUP | LPOLLERR))) {
1411 cds_list_del(&wait_node->head);
1412 wait_queue->count--;
1413 ust_app_destroy(wait_node->app);
1414 free(wait_node);
1415 /*
1416 * Silence warning of use-after-free in
1417 * cds_list_for_each_entry_safe which uses
1418 * __typeof__(*wait_node).
1419 */
1420 wait_node = NULL;
1421 break;
1422 } else {
1423 ERR("Unexpected poll events %u for sock %d", revents, pollfd);
1424 goto error;
1425 }
1426 }
1427 }
1428
1429 if (nb_fd > 0) {
1430 DBG("Wait queue sanitized, %d node were cleaned up", nb_fd);
1431 }
1432
1433 end:
1434 lttng_poll_clean(&events);
1435 return;
1436
1437 error:
1438 lttng_poll_clean(&events);
1439 error_create:
1440 ERR("Unable to sanitize wait queue");
1441 return;
1442 }
1443
1444 /*
1445 * Dispatch request from the registration threads to the application
1446 * communication thread.
1447 */
1448 static void *thread_dispatch_ust_registration(void *data)
1449 {
1450 int ret, err = -1;
1451 struct cds_wfcq_node *node;
1452 struct ust_command *ust_cmd = NULL;
1453 struct ust_reg_wait_node *wait_node = NULL, *tmp_wait_node;
1454 struct ust_reg_wait_queue wait_queue = {
1455 .count = 0,
1456 };
1457
1458 rcu_register_thread();
1459
1460 health_register(health_sessiond, HEALTH_SESSIOND_TYPE_APP_REG_DISPATCH);
1461
1462 if (testpoint(sessiond_thread_app_reg_dispatch)) {
1463 goto error_testpoint;
1464 }
1465
1466 health_code_update();
1467
1468 CDS_INIT_LIST_HEAD(&wait_queue.head);
1469
1470 DBG("[thread] Dispatch UST command started");
1471
1472 for (;;) {
1473 health_code_update();
1474
1475 /* Atomically prepare the queue futex */
1476 futex_nto1_prepare(&ust_cmd_queue.futex);
1477
1478 if (CMM_LOAD_SHARED(dispatch_thread_exit)) {
1479 break;
1480 }
1481
1482 do {
1483 struct ust_app *app = NULL;
1484 ust_cmd = NULL;
1485
1486 /*
1487 * Make sure we don't have node(s) that have hung up before receiving
1488 * the notify socket. This is to clean the list in order to avoid
1489 * memory leaks from notify socket that are never seen.
1490 */
1491 sanitize_wait_queue(&wait_queue);
1492
1493 health_code_update();
1494 /* Dequeue command for registration */
1495 node = cds_wfcq_dequeue_blocking(&ust_cmd_queue.head, &ust_cmd_queue.tail);
1496 if (node == NULL) {
1497 DBG("Woken up but nothing in the UST command queue");
1498 /* Continue thread execution */
1499 break;
1500 }
1501
1502 ust_cmd = caa_container_of(node, struct ust_command, node);
1503
1504 DBG("Dispatching UST registration pid:%d ppid:%d uid:%d"
1505 " gid:%d sock:%d name:%s (version %d.%d)",
1506 ust_cmd->reg_msg.pid, ust_cmd->reg_msg.ppid,
1507 ust_cmd->reg_msg.uid, ust_cmd->reg_msg.gid,
1508 ust_cmd->sock, ust_cmd->reg_msg.name,
1509 ust_cmd->reg_msg.major, ust_cmd->reg_msg.minor);
1510
1511 if (ust_cmd->reg_msg.type == USTCTL_SOCKET_CMD) {
1512 wait_node = zmalloc(sizeof(*wait_node));
1513 if (!wait_node) {
1514 PERROR("zmalloc wait_node dispatch");
1515 ret = close(ust_cmd->sock);
1516 if (ret < 0) {
1517 PERROR("close ust sock dispatch %d", ust_cmd->sock);
1518 }
1519 lttng_fd_put(LTTNG_FD_APPS, 1);
1520 free(ust_cmd);
1521 goto error;
1522 }
1523 CDS_INIT_LIST_HEAD(&wait_node->head);
1524
1525 /* Create application object if socket is CMD. */
1526 wait_node->app = ust_app_create(&ust_cmd->reg_msg,
1527 ust_cmd->sock);
1528 if (!wait_node->app) {
1529 ret = close(ust_cmd->sock);
1530 if (ret < 0) {
1531 PERROR("close ust sock dispatch %d", ust_cmd->sock);
1532 }
1533 lttng_fd_put(LTTNG_FD_APPS, 1);
1534 free(wait_node);
1535 free(ust_cmd);
1536 continue;
1537 }
1538 /*
1539 * Add application to the wait queue so we can set the notify
1540 * socket before putting this object in the global ht.
1541 */
1542 cds_list_add(&wait_node->head, &wait_queue.head);
1543 wait_queue.count++;
1544
1545 free(ust_cmd);
1546 /*
1547 * We have to continue here since we don't have the notify
1548 * socket and the application MUST be added to the hash table
1549 * only at that moment.
1550 */
1551 continue;
1552 } else {
1553 /*
1554 * Look for the application in the local wait queue and set the
1555 * notify socket if found.
1556 */
1557 cds_list_for_each_entry_safe(wait_node, tmp_wait_node,
1558 &wait_queue.head, head) {
1559 health_code_update();
1560 if (wait_node->app->pid == ust_cmd->reg_msg.pid) {
1561 wait_node->app->notify_sock = ust_cmd->sock;
1562 cds_list_del(&wait_node->head);
1563 wait_queue.count--;
1564 app = wait_node->app;
1565 free(wait_node);
1566 DBG3("UST app notify socket %d is set", ust_cmd->sock);
1567 break;
1568 }
1569 }
1570
1571 /*
1572 * With no application at this stage the received socket is
1573 * basically useless so close it before we free the cmd data
1574 * structure for good.
1575 */
1576 if (!app) {
1577 ret = close(ust_cmd->sock);
1578 if (ret < 0) {
1579 PERROR("close ust sock dispatch %d", ust_cmd->sock);
1580 }
1581 lttng_fd_put(LTTNG_FD_APPS, 1);
1582 }
1583 free(ust_cmd);
1584 }
1585
1586 if (app) {
1587 /*
1588 * @session_lock_list
1589 *
1590 * Lock the global session list so from the register up to the
1591 * registration done message, no thread can see the application
1592 * and change its state.
1593 */
1594 session_lock_list();
1595 rcu_read_lock();
1596
1597 /*
1598 * Add application to the global hash table. This needs to be
1599 * done before the update to the UST registry can locate the
1600 * application.
1601 */
1602 ust_app_add(app);
1603
1604 /* Set app version. This call will print an error if needed. */
1605 (void) ust_app_version(app);
1606
1607 /* Send notify socket through the notify pipe. */
1608 ret = send_socket_to_thread(apps_cmd_notify_pipe[1],
1609 app->notify_sock);
1610 if (ret < 0) {
1611 rcu_read_unlock();
1612 session_unlock_list();
1613 /*
1614 * No notify thread, stop the UST tracing. However, this is
1615 * not an internal error of the this thread thus setting
1616 * the health error code to a normal exit.
1617 */
1618 err = 0;
1619 goto error;
1620 }
1621
1622 /*
1623 * Update newly registered application with the tracing
1624 * registry info already enabled information.
1625 */
1626 update_ust_app(app->sock);
1627
1628 /*
1629 * Don't care about return value. Let the manage apps threads
1630 * handle app unregistration upon socket close.
1631 */
1632 (void) ust_app_register_done(app);
1633
1634 /*
1635 * Even if the application socket has been closed, send the app
1636 * to the thread and unregistration will take place at that
1637 * place.
1638 */
1639 ret = send_socket_to_thread(apps_cmd_pipe[1], app->sock);
1640 if (ret < 0) {
1641 rcu_read_unlock();
1642 session_unlock_list();
1643 /*
1644 * No apps. thread, stop the UST tracing. However, this is
1645 * not an internal error of the this thread thus setting
1646 * the health error code to a normal exit.
1647 */
1648 err = 0;
1649 goto error;
1650 }
1651
1652 rcu_read_unlock();
1653 session_unlock_list();
1654 }
1655 } while (node != NULL);
1656
1657 health_poll_entry();
1658 /* Futex wait on queue. Blocking call on futex() */
1659 futex_nto1_wait(&ust_cmd_queue.futex);
1660 health_poll_exit();
1661 }
1662 /* Normal exit, no error */
1663 err = 0;
1664
1665 error:
1666 /* Clean up wait queue. */
1667 cds_list_for_each_entry_safe(wait_node, tmp_wait_node,
1668 &wait_queue.head, head) {
1669 cds_list_del(&wait_node->head);
1670 wait_queue.count--;
1671 free(wait_node);
1672 }
1673
1674 /* Empty command queue. */
1675 for (;;) {
1676 /* Dequeue command for registration */
1677 node = cds_wfcq_dequeue_blocking(&ust_cmd_queue.head, &ust_cmd_queue.tail);
1678 if (node == NULL) {
1679 break;
1680 }
1681 ust_cmd = caa_container_of(node, struct ust_command, node);
1682 ret = close(ust_cmd->sock);
1683 if (ret < 0) {
1684 PERROR("close ust sock exit dispatch %d", ust_cmd->sock);
1685 }
1686 lttng_fd_put(LTTNG_FD_APPS, 1);
1687 free(ust_cmd);
1688 }
1689
1690 error_testpoint:
1691 DBG("Dispatch thread dying");
1692 if (err) {
1693 health_error();
1694 ERR("Health error occurred in %s", __func__);
1695 }
1696 health_unregister(health_sessiond);
1697 rcu_unregister_thread();
1698 return NULL;
1699 }
1700
1701 /*
1702 * This thread manage application registration.
1703 */
1704 static void *thread_registration_apps(void *data)
1705 {
1706 int sock = -1, i, ret, pollfd, err = -1;
1707 uint32_t revents, nb_fd;
1708 struct lttng_poll_event events;
1709 /*
1710 * Get allocated in this thread, enqueued to a global queue, dequeued and
1711 * freed in the manage apps thread.
1712 */
1713 struct ust_command *ust_cmd = NULL;
1714
1715 DBG("[thread] Manage application registration started");
1716
1717 health_register(health_sessiond, HEALTH_SESSIOND_TYPE_APP_REG);
1718
1719 if (testpoint(sessiond_thread_registration_apps)) {
1720 goto error_testpoint;
1721 }
1722
1723 ret = lttcomm_listen_unix_sock(apps_sock);
1724 if (ret < 0) {
1725 goto error_listen;
1726 }
1727
1728 /*
1729 * Pass 2 as size here for the thread quit pipe and apps socket. Nothing
1730 * more will be added to this poll set.
1731 */
1732 ret = sessiond_set_thread_pollset(&events, 2);
1733 if (ret < 0) {
1734 goto error_create_poll;
1735 }
1736
1737 /* Add the application registration socket */
1738 ret = lttng_poll_add(&events, apps_sock, LPOLLIN | LPOLLRDHUP);
1739 if (ret < 0) {
1740 goto error_poll_add;
1741 }
1742
1743 /* Notify all applications to register */
1744 ret = notify_ust_apps(1);
1745 if (ret < 0) {
1746 ERR("Failed to notify applications or create the wait shared memory.\n"
1747 "Execution continues but there might be problem for already\n"
1748 "running applications that wishes to register.");
1749 }
1750
1751 while (1) {
1752 DBG("Accepting application registration");
1753
1754 /* Inifinite blocking call, waiting for transmission */
1755 restart:
1756 health_poll_entry();
1757 ret = lttng_poll_wait(&events, -1);
1758 health_poll_exit();
1759 if (ret < 0) {
1760 /*
1761 * Restart interrupted system call.
1762 */
1763 if (errno == EINTR) {
1764 goto restart;
1765 }
1766 goto error;
1767 }
1768
1769 nb_fd = ret;
1770
1771 for (i = 0; i < nb_fd; i++) {
1772 health_code_update();
1773
1774 /* Fetch once the poll data */
1775 revents = LTTNG_POLL_GETEV(&events, i);
1776 pollfd = LTTNG_POLL_GETFD(&events, i);
1777
1778 if (!revents) {
1779 /* No activity for this FD (poll implementation). */
1780 continue;
1781 }
1782
1783 /* Thread quit pipe has been closed. Killing thread. */
1784 ret = sessiond_check_thread_quit_pipe(pollfd, revents);
1785 if (ret) {
1786 err = 0;
1787 goto exit;
1788 }
1789
1790 /* Event on the registration socket */
1791 if (pollfd == apps_sock) {
1792 if (revents & LPOLLIN) {
1793 sock = lttcomm_accept_unix_sock(apps_sock);
1794 if (sock < 0) {
1795 goto error;
1796 }
1797
1798 /*
1799 * Set socket timeout for both receiving and ending.
1800 * app_socket_timeout is in seconds, whereas
1801 * lttcomm_setsockopt_rcv_timeout and
1802 * lttcomm_setsockopt_snd_timeout expect msec as
1803 * parameter.
1804 */
1805 if (config.app_socket_timeout >= 0) {
1806 (void) lttcomm_setsockopt_rcv_timeout(sock,
1807 config.app_socket_timeout * 1000);
1808 (void) lttcomm_setsockopt_snd_timeout(sock,
1809 config.app_socket_timeout * 1000);
1810 }
1811
1812 /*
1813 * Set the CLOEXEC flag. Return code is useless because
1814 * either way, the show must go on.
1815 */
1816 (void) utils_set_fd_cloexec(sock);
1817
1818 /* Create UST registration command for enqueuing */
1819 ust_cmd = zmalloc(sizeof(struct ust_command));
1820 if (ust_cmd == NULL) {
1821 PERROR("ust command zmalloc");
1822 ret = close(sock);
1823 if (ret) {
1824 PERROR("close");
1825 }
1826 goto error;
1827 }
1828
1829 /*
1830 * Using message-based transmissions to ensure we don't
1831 * have to deal with partially received messages.
1832 */
1833 ret = lttng_fd_get(LTTNG_FD_APPS, 1);
1834 if (ret < 0) {
1835 ERR("Exhausted file descriptors allowed for applications.");
1836 free(ust_cmd);
1837 ret = close(sock);
1838 if (ret) {
1839 PERROR("close");
1840 }
1841 sock = -1;
1842 continue;
1843 }
1844
1845 health_code_update();
1846 ret = ust_app_recv_registration(sock, &ust_cmd->reg_msg);
1847 if (ret < 0) {
1848 free(ust_cmd);
1849 /* Close socket of the application. */
1850 ret = close(sock);
1851 if (ret) {
1852 PERROR("close");
1853 }
1854 lttng_fd_put(LTTNG_FD_APPS, 1);
1855 sock = -1;
1856 continue;
1857 }
1858 health_code_update();
1859
1860 ust_cmd->sock = sock;
1861 sock = -1;
1862
1863 DBG("UST registration received with pid:%d ppid:%d uid:%d"
1864 " gid:%d sock:%d name:%s (version %d.%d)",
1865 ust_cmd->reg_msg.pid, ust_cmd->reg_msg.ppid,
1866 ust_cmd->reg_msg.uid, ust_cmd->reg_msg.gid,
1867 ust_cmd->sock, ust_cmd->reg_msg.name,
1868 ust_cmd->reg_msg.major, ust_cmd->reg_msg.minor);
1869
1870 /*
1871 * Lock free enqueue the registration request. The red pill
1872 * has been taken! This apps will be part of the *system*.
1873 */
1874 cds_wfcq_enqueue(&ust_cmd_queue.head, &ust_cmd_queue.tail, &ust_cmd->node);
1875
1876 /*
1877 * Wake the registration queue futex. Implicit memory
1878 * barrier with the exchange in cds_wfcq_enqueue.
1879 */
1880 futex_nto1_wake(&ust_cmd_queue.futex);
1881 } else if (revents & (LPOLLERR | LPOLLHUP | LPOLLRDHUP)) {
1882 ERR("Register apps socket poll error");
1883 goto error;
1884 } else {
1885 ERR("Unexpected poll events %u for sock %d", revents, pollfd);
1886 goto error;
1887 }
1888 }
1889 }
1890 }
1891
1892 exit:
1893 error:
1894 /* Notify that the registration thread is gone */
1895 notify_ust_apps(0);
1896
1897 if (apps_sock >= 0) {
1898 ret = close(apps_sock);
1899 if (ret) {
1900 PERROR("close");
1901 }
1902 }
1903 if (sock >= 0) {
1904 ret = close(sock);
1905 if (ret) {
1906 PERROR("close");
1907 }
1908 lttng_fd_put(LTTNG_FD_APPS, 1);
1909 }
1910 unlink(config.apps_unix_sock_path.value);
1911
1912 error_poll_add:
1913 lttng_poll_clean(&events);
1914 error_listen:
1915 error_create_poll:
1916 error_testpoint:
1917 DBG("UST Registration thread cleanup complete");
1918 if (err) {
1919 health_error();
1920 ERR("Health error occurred in %s", __func__);
1921 }
1922 health_unregister(health_sessiond);
1923
1924 return NULL;
1925 }
1926
1927 /*
1928 * Setup necessary data for kernel tracer action.
1929 */
1930 static int init_kernel_tracer(void)
1931 {
1932 int ret;
1933
1934 /* Modprobe lttng kernel modules */
1935 ret = modprobe_lttng_control();
1936 if (ret < 0) {
1937 goto error;
1938 }
1939
1940 /* Open debugfs lttng */
1941 kernel_tracer_fd = open(module_proc_lttng, O_RDWR);
1942 if (kernel_tracer_fd < 0) {
1943 DBG("Failed to open %s", module_proc_lttng);
1944 goto error_open;
1945 }
1946
1947 /* Validate kernel version */
1948 ret = kernel_validate_version(kernel_tracer_fd, &kernel_tracer_version,
1949 &kernel_tracer_abi_version);
1950 if (ret < 0) {
1951 goto error_version;
1952 }
1953
1954 ret = modprobe_lttng_data();
1955 if (ret < 0) {
1956 goto error_modules;
1957 }
1958
1959 ret = kernel_supports_ring_buffer_snapshot_sample_positions(
1960 kernel_tracer_fd);
1961 if (ret < 0) {
1962 goto error_modules;
1963 }
1964
1965 if (ret < 1) {
1966 WARN("Kernel tracer does not support buffer monitoring. "
1967 "The monitoring timer of channels in the kernel domain "
1968 "will be set to 0 (disabled).");
1969 }
1970
1971 DBG("Kernel tracer fd %d", kernel_tracer_fd);
1972 return 0;
1973
1974 error_version:
1975 modprobe_remove_lttng_control();
1976 ret = close(kernel_tracer_fd);
1977 if (ret) {
1978 PERROR("close");
1979 }
1980 kernel_tracer_fd = -1;
1981 return LTTNG_ERR_KERN_VERSION;
1982
1983 error_modules:
1984 ret = close(kernel_tracer_fd);
1985 if (ret) {
1986 PERROR("close");
1987 }
1988
1989 error_open:
1990 modprobe_remove_lttng_control();
1991
1992 error:
1993 WARN("No kernel tracer available");
1994 kernel_tracer_fd = -1;
1995 if (!is_root) {
1996 return LTTNG_ERR_NEED_ROOT_SESSIOND;
1997 } else {
1998 return LTTNG_ERR_KERN_NA;
1999 }
2000 }
2001
2002 static int string_match(const char *str1, const char *str2)
2003 {
2004 return (str1 && str2) && !strcmp(str1, str2);
2005 }
2006
2007 /*
2008 * Take an option from the getopt output and set it in the right variable to be
2009 * used later.
2010 *
2011 * Return 0 on success else a negative value.
2012 */
2013 static int set_option(int opt, const char *arg, const char *optname)
2014 {
2015 int ret = 0;
2016
2017 if (string_match(optname, "client-sock") || opt == 'c') {
2018 if (!arg || *arg == '\0') {
2019 ret = -EINVAL;
2020 goto end;
2021 }
2022 if (lttng_is_setuid_setgid()) {
2023 WARN("Getting '%s' argument from setuid/setgid binary refused for security reasons.",
2024 "-c, --client-sock");
2025 } else {
2026 config_string_set(&config.client_unix_sock_path,
2027 strdup(arg));
2028 if (!config.client_unix_sock_path.value) {
2029 ret = -ENOMEM;
2030 PERROR("strdup");
2031 }
2032 }
2033 } else if (string_match(optname, "apps-sock") || opt == 'a') {
2034 if (!arg || *arg == '\0') {
2035 ret = -EINVAL;
2036 goto end;
2037 }
2038 if (lttng_is_setuid_setgid()) {
2039 WARN("Getting '%s' argument from setuid/setgid binary refused for security reasons.",
2040 "-a, --apps-sock");
2041 } else {
2042 config_string_set(&config.apps_unix_sock_path,
2043 strdup(arg));
2044 if (!config.apps_unix_sock_path.value) {
2045 ret = -ENOMEM;
2046 PERROR("strdup");
2047 }
2048 }
2049 } else if (string_match(optname, "daemonize") || opt == 'd') {
2050 config.daemonize = true;
2051 } else if (string_match(optname, "background") || opt == 'b') {
2052 config.background = true;
2053 } else if (string_match(optname, "group") || opt == 'g') {
2054 if (!arg || *arg == '\0') {
2055 ret = -EINVAL;
2056 goto end;
2057 }
2058 if (lttng_is_setuid_setgid()) {
2059 WARN("Getting '%s' argument from setuid/setgid binary refused for security reasons.",
2060 "-g, --group");
2061 } else {
2062 config_string_set(&config.tracing_group_name,
2063 strdup(arg));
2064 if (!config.tracing_group_name.value) {
2065 ret = -ENOMEM;
2066 PERROR("strdup");
2067 }
2068 }
2069 } else if (string_match(optname, "help") || opt == 'h') {
2070 ret = utils_show_help(8, "lttng-sessiond", help_msg);
2071 if (ret) {
2072 ERR("Cannot show --help for `lttng-sessiond`");
2073 perror("exec");
2074 }
2075 exit(ret ? EXIT_FAILURE : EXIT_SUCCESS);
2076 } else if (string_match(optname, "version") || opt == 'V') {
2077 fprintf(stdout, "%s\n", VERSION);
2078 exit(EXIT_SUCCESS);
2079 } else if (string_match(optname, "sig-parent") || opt == 'S') {
2080 config.sig_parent = true;
2081 } else if (string_match(optname, "kconsumerd-err-sock")) {
2082 if (!arg || *arg == '\0') {
2083 ret = -EINVAL;
2084 goto end;
2085 }
2086 if (lttng_is_setuid_setgid()) {
2087 WARN("Getting '%s' argument from setuid/setgid binary refused for security reasons.",
2088 "--kconsumerd-err-sock");
2089 } else {
2090 config_string_set(&config.kconsumerd_err_unix_sock_path,
2091 strdup(arg));
2092 if (!config.kconsumerd_err_unix_sock_path.value) {
2093 ret = -ENOMEM;
2094 PERROR("strdup");
2095 }
2096 }
2097 } else if (string_match(optname, "kconsumerd-cmd-sock")) {
2098 if (!arg || *arg == '\0') {
2099 ret = -EINVAL;
2100 goto end;
2101 }
2102 if (lttng_is_setuid_setgid()) {
2103 WARN("Getting '%s' argument from setuid/setgid binary refused for security reasons.",
2104 "--kconsumerd-cmd-sock");
2105 } else {
2106 config_string_set(&config.kconsumerd_cmd_unix_sock_path,
2107 strdup(arg));
2108 if (!config.kconsumerd_cmd_unix_sock_path.value) {
2109 ret = -ENOMEM;
2110 PERROR("strdup");
2111 }
2112 }
2113 } else if (string_match(optname, "ustconsumerd64-err-sock")) {
2114 if (!arg || *arg == '\0') {
2115 ret = -EINVAL;
2116 goto end;
2117 }
2118 if (lttng_is_setuid_setgid()) {
2119 WARN("Getting '%s' argument from setuid/setgid binary refused for security reasons.",
2120 "--ustconsumerd64-err-sock");
2121 } else {
2122 config_string_set(&config.consumerd64_err_unix_sock_path,
2123 strdup(arg));
2124 if (!config.consumerd64_err_unix_sock_path.value) {
2125 ret = -ENOMEM;
2126 PERROR("strdup");
2127 }
2128 }
2129 } else if (string_match(optname, "ustconsumerd64-cmd-sock")) {
2130 if (!arg || *arg == '\0') {
2131 ret = -EINVAL;
2132 goto end;
2133 }
2134 if (lttng_is_setuid_setgid()) {
2135 WARN("Getting '%s' argument from setuid/setgid binary refused for security reasons.",
2136 "--ustconsumerd64-cmd-sock");
2137 } else {
2138 config_string_set(&config.consumerd64_cmd_unix_sock_path,
2139 strdup(arg));
2140 if (!config.consumerd64_cmd_unix_sock_path.value) {
2141 ret = -ENOMEM;
2142 PERROR("strdup");
2143 }
2144 }
2145 } else if (string_match(optname, "ustconsumerd32-err-sock")) {
2146 if (!arg || *arg == '\0') {
2147 ret = -EINVAL;
2148 goto end;
2149 }
2150 if (lttng_is_setuid_setgid()) {
2151 WARN("Getting '%s' argument from setuid/setgid binary refused for security reasons.",
2152 "--ustconsumerd32-err-sock");
2153 } else {
2154 config_string_set(&config.consumerd32_err_unix_sock_path,
2155 strdup(arg));
2156 if (!config.consumerd32_err_unix_sock_path.value) {
2157 ret = -ENOMEM;
2158 PERROR("strdup");
2159 }
2160 }
2161 } else if (string_match(optname, "ustconsumerd32-cmd-sock")) {
2162 if (!arg || *arg == '\0') {
2163 ret = -EINVAL;
2164 goto end;
2165 }
2166 if (lttng_is_setuid_setgid()) {
2167 WARN("Getting '%s' argument from setuid/setgid binary refused for security reasons.",
2168 "--ustconsumerd32-cmd-sock");
2169 } else {
2170 config_string_set(&config.consumerd32_cmd_unix_sock_path,
2171 strdup(arg));
2172 if (!config.consumerd32_cmd_unix_sock_path.value) {
2173 ret = -ENOMEM;
2174 PERROR("strdup");
2175 }
2176 }
2177 } else if (string_match(optname, "no-kernel")) {
2178 config.no_kernel = true;
2179 } else if (string_match(optname, "quiet") || opt == 'q') {
2180 config.quiet = true;
2181 } else if (string_match(optname, "verbose") || opt == 'v') {
2182 /* Verbose level can increase using multiple -v */
2183 if (arg) {
2184 /* Value obtained from config file */
2185 config.verbose = config_parse_value(arg);
2186 } else {
2187 /* -v used on command line */
2188 config.verbose++;
2189 }
2190 /* Clamp value to [0, 3] */
2191 config.verbose = config.verbose < 0 ? 0 :
2192 (config.verbose <= 3 ? config.verbose : 3);
2193 } else if (string_match(optname, "verbose-consumer")) {
2194 if (arg) {
2195 config.verbose_consumer = config_parse_value(arg);
2196 } else {
2197 config.verbose_consumer++;
2198 }
2199 } else if (string_match(optname, "consumerd32-path")) {
2200 if (!arg || *arg == '\0') {
2201 ret = -EINVAL;
2202 goto end;
2203 }
2204 if (lttng_is_setuid_setgid()) {
2205 WARN("Getting '%s' argument from setuid/setgid binary refused for security reasons.",
2206 "--consumerd32-path");
2207 } else {
2208 config_string_set(&config.consumerd32_bin_path,
2209 strdup(arg));
2210 if (!config.consumerd32_bin_path.value) {
2211 PERROR("strdup");
2212 ret = -ENOMEM;
2213 }
2214 }
2215 } else if (string_match(optname, "consumerd32-libdir")) {
2216 if (!arg || *arg == '\0') {
2217 ret = -EINVAL;
2218 goto end;
2219 }
2220 if (lttng_is_setuid_setgid()) {
2221 WARN("Getting '%s' argument from setuid/setgid binary refused for security reasons.",
2222 "--consumerd32-libdir");
2223 } else {
2224 config_string_set(&config.consumerd32_lib_dir,
2225 strdup(arg));
2226 if (!config.consumerd32_lib_dir.value) {
2227 PERROR("strdup");
2228 ret = -ENOMEM;
2229 }
2230 }
2231 } else if (string_match(optname, "consumerd64-path")) {
2232 if (!arg || *arg == '\0') {
2233 ret = -EINVAL;
2234 goto end;
2235 }
2236 if (lttng_is_setuid_setgid()) {
2237 WARN("Getting '%s' argument from setuid/setgid binary refused for security reasons.",
2238 "--consumerd64-path");
2239 } else {
2240 config_string_set(&config.consumerd64_bin_path,
2241 strdup(arg));
2242 if (!config.consumerd64_bin_path.value) {
2243 PERROR("strdup");
2244 ret = -ENOMEM;
2245 }
2246 }
2247 } else if (string_match(optname, "consumerd64-libdir")) {
2248 if (!arg || *arg == '\0') {
2249 ret = -EINVAL;
2250 goto end;
2251 }
2252 if (lttng_is_setuid_setgid()) {
2253 WARN("Getting '%s' argument from setuid/setgid binary refused for security reasons.",
2254 "--consumerd64-libdir");
2255 } else {
2256 config_string_set(&config.consumerd64_lib_dir,
2257 strdup(arg));
2258 if (!config.consumerd64_lib_dir.value) {
2259 PERROR("strdup");
2260 ret = -ENOMEM;
2261 }
2262 }
2263 } else if (string_match(optname, "pidfile") || opt == 'p') {
2264 if (!arg || *arg == '\0') {
2265 ret = -EINVAL;
2266 goto end;
2267 }
2268 if (lttng_is_setuid_setgid()) {
2269 WARN("Getting '%s' argument from setuid/setgid binary refused for security reasons.",
2270 "-p, --pidfile");
2271 } else {
2272 config_string_set(&config.pid_file_path, strdup(arg));
2273 if (!config.pid_file_path.value) {
2274 PERROR("strdup");
2275 ret = -ENOMEM;
2276 }
2277 }
2278 } else if (string_match(optname, "agent-tcp-port")) {
2279 if (!arg || *arg == '\0') {
2280 ret = -EINVAL;
2281 goto end;
2282 }
2283 if (lttng_is_setuid_setgid()) {
2284 WARN("Getting '%s' argument from setuid/setgid binary refused for security reasons.",
2285 "--agent-tcp-port");
2286 } else {
2287 unsigned long v;
2288
2289 errno = 0;
2290 v = strtoul(arg, NULL, 0);
2291 if (errno != 0 || !isdigit(arg[0])) {
2292 ERR("Wrong value in --agent-tcp-port parameter: %s", arg);
2293 return -1;
2294 }
2295 if (v == 0 || v >= 65535) {
2296 ERR("Port overflow in --agent-tcp-port parameter: %s", arg);
2297 return -1;
2298 }
2299 config.agent_tcp_port.begin = config.agent_tcp_port.end = (int) v;
2300 DBG3("Agent TCP port set to non default: %i", (int) v);
2301 }
2302 } else if (string_match(optname, "load") || opt == 'l') {
2303 if (!arg || *arg == '\0') {
2304 ret = -EINVAL;
2305 goto end;
2306 }
2307 if (lttng_is_setuid_setgid()) {
2308 WARN("Getting '%s' argument from setuid/setgid binary refused for security reasons.",
2309 "-l, --load");
2310 } else {
2311 config_string_set(&config.load_session_path, strdup(arg));
2312 if (!config.load_session_path.value) {
2313 PERROR("strdup");
2314 ret = -ENOMEM;
2315 }
2316 }
2317 } else if (string_match(optname, "kmod-probes")) {
2318 if (!arg || *arg == '\0') {
2319 ret = -EINVAL;
2320 goto end;
2321 }
2322 if (lttng_is_setuid_setgid()) {
2323 WARN("Getting '%s' argument from setuid/setgid binary refused for security reasons.",
2324 "--kmod-probes");
2325 } else {
2326 config_string_set(&config.kmod_probes_list, strdup(arg));
2327 if (!config.kmod_probes_list.value) {
2328 PERROR("strdup");
2329 ret = -ENOMEM;
2330 }
2331 }
2332 } else if (string_match(optname, "extra-kmod-probes")) {
2333 if (!arg || *arg == '\0') {
2334 ret = -EINVAL;
2335 goto end;
2336 }
2337 if (lttng_is_setuid_setgid()) {
2338 WARN("Getting '%s' argument from setuid/setgid binary refused for security reasons.",
2339 "--extra-kmod-probes");
2340 } else {
2341 config_string_set(&config.kmod_extra_probes_list,
2342 strdup(arg));
2343 if (!config.kmod_extra_probes_list.value) {
2344 PERROR("strdup");
2345 ret = -ENOMEM;
2346 }
2347 }
2348 } else if (string_match(optname, "config") || opt == 'f') {
2349 /* This is handled in set_options() thus silent skip. */
2350 goto end;
2351 } else {
2352 /* Unknown option or other error.
2353 * Error is printed by getopt, just return */
2354 ret = -1;
2355 }
2356
2357 end:
2358 if (ret == -EINVAL) {
2359 const char *opt_name = "unknown";
2360 int i;
2361
2362 for (i = 0; i < sizeof(long_options) / sizeof(struct option);
2363 i++) {
2364 if (opt == long_options[i].val) {
2365 opt_name = long_options[i].name;
2366 break;
2367 }
2368 }
2369
2370 WARN("Invalid argument provided for option \"%s\", using default value.",
2371 opt_name);
2372 }
2373
2374 return ret;
2375 }
2376
2377 /*
2378 * config_entry_handler_cb used to handle options read from a config file.
2379 * See config_entry_handler_cb comment in common/config/session-config.h for the
2380 * return value conventions.
2381 */
2382 static int config_entry_handler(const struct config_entry *entry, void *unused)
2383 {
2384 int ret = 0, i;
2385
2386 if (!entry || !entry->name || !entry->value) {
2387 ret = -EINVAL;
2388 goto end;
2389 }
2390
2391 /* Check if the option is to be ignored */
2392 for (i = 0; i < sizeof(config_ignore_options) / sizeof(char *); i++) {
2393 if (!strcmp(entry->name, config_ignore_options[i])) {
2394 goto end;
2395 }
2396 }
2397
2398 for (i = 0; i < (sizeof(long_options) / sizeof(struct option)) - 1;
2399 i++) {
2400
2401 /* Ignore if not fully matched. */
2402 if (strcmp(entry->name, long_options[i].name)) {
2403 continue;
2404 }
2405
2406 /*
2407 * If the option takes no argument on the command line, we have to
2408 * check if the value is "true". We support non-zero numeric values,
2409 * true, on and yes.
2410 */
2411 if (!long_options[i].has_arg) {
2412 ret = config_parse_value(entry->value);
2413 if (ret <= 0) {
2414 if (ret) {
2415 WARN("Invalid configuration value \"%s\" for option %s",
2416 entry->value, entry->name);
2417 }
2418 /* False, skip boolean config option. */
2419 goto end;
2420 }
2421 }
2422
2423 ret = set_option(long_options[i].val, entry->value, entry->name);
2424 goto end;
2425 }
2426
2427 WARN("Unrecognized option \"%s\" in daemon configuration file.", entry->name);
2428
2429 end:
2430 return ret;
2431 }
2432
2433 /*
2434 * daemon configuration loading and argument parsing
2435 */
2436 static int set_options(int argc, char **argv)
2437 {
2438 int ret = 0, c = 0, option_index = 0;
2439 int orig_optopt = optopt, orig_optind = optind;
2440 char *optstring;
2441 const char *config_path = NULL;
2442
2443 optstring = utils_generate_optstring(long_options,
2444 sizeof(long_options) / sizeof(struct option));
2445 if (!optstring) {
2446 ret = -ENOMEM;
2447 goto end;
2448 }
2449
2450 /* Check for the --config option */
2451 while ((c = getopt_long(argc, argv, optstring, long_options,
2452 &option_index)) != -1) {
2453 if (c == '?') {
2454 ret = -EINVAL;
2455 goto end;
2456 } else if (c != 'f') {
2457 /* if not equal to --config option. */
2458 continue;
2459 }
2460
2461 if (lttng_is_setuid_setgid()) {
2462 WARN("Getting '%s' argument from setuid/setgid binary refused for security reasons.",
2463 "-f, --config");
2464 } else {
2465 config_path = utils_expand_path(optarg);
2466 if (!config_path) {
2467 ERR("Failed to resolve path: %s", optarg);
2468 }
2469 }
2470 }
2471
2472 ret = config_get_section_entries(config_path, config_section_name,
2473 config_entry_handler, NULL);
2474 if (ret) {
2475 if (ret > 0) {
2476 ERR("Invalid configuration option at line %i", ret);
2477 ret = -1;
2478 }
2479 goto end;
2480 }
2481
2482 /* Reset getopt's global state */
2483 optopt = orig_optopt;
2484 optind = orig_optind;
2485 while (1) {
2486 option_index = -1;
2487 /*
2488 * getopt_long() will not set option_index if it encounters a
2489 * short option.
2490 */
2491 c = getopt_long(argc, argv, optstring, long_options,
2492 &option_index);
2493 if (c == -1) {
2494 break;
2495 }
2496
2497 /*
2498 * Pass NULL as the long option name if popt left the index
2499 * unset.
2500 */
2501 ret = set_option(c, optarg,
2502 option_index < 0 ? NULL :
2503 long_options[option_index].name);
2504 if (ret < 0) {
2505 break;
2506 }
2507 }
2508
2509 end:
2510 free(optstring);
2511 return ret;
2512 }
2513
2514 /*
2515 * Creates the application socket.
2516 */
2517 static int init_daemon_socket(void)
2518 {
2519 int ret = 0;
2520 mode_t old_umask;
2521
2522 old_umask = umask(0);
2523
2524 /* Create the application unix socket */
2525 apps_sock = lttcomm_create_unix_sock(config.apps_unix_sock_path.value);
2526 if (apps_sock < 0) {
2527 ERR("Create unix sock failed: %s", config.apps_unix_sock_path.value);
2528 ret = -1;
2529 goto end;
2530 }
2531
2532 /* Set the cloexec flag */
2533 ret = utils_set_fd_cloexec(apps_sock);
2534 if (ret < 0) {
2535 ERR("Unable to set CLOEXEC flag to the app Unix socket (fd: %d). "
2536 "Continuing but note that the consumer daemon will have a "
2537 "reference to this socket on exec()", apps_sock);
2538 }
2539
2540 /* File permission MUST be 666 */
2541 ret = chmod(config.apps_unix_sock_path.value,
2542 S_IRUSR | S_IWUSR | S_IRGRP | S_IWGRP | S_IROTH | S_IWOTH);
2543 if (ret < 0) {
2544 ERR("Set file permissions failed: %s", config.apps_unix_sock_path.value);
2545 PERROR("chmod");
2546 goto end;
2547 }
2548
2549 DBG3("Session daemon application socket %d created",
2550 apps_sock);
2551
2552 end:
2553 umask(old_umask);
2554 return ret;
2555 }
2556
2557 /*
2558 * Create lockfile using the rundir and return its fd.
2559 */
2560 static int create_lockfile(void)
2561 {
2562 return utils_create_lock_file(config.lock_file_path.value);
2563 }
2564
2565 /*
2566 * Check if the global socket is available, and if a daemon is answering at the
2567 * other side. If yes, error is returned.
2568 *
2569 * Also attempts to create and hold the lock file.
2570 */
2571 static int check_existing_daemon(void)
2572 {
2573 int ret = 0;
2574
2575 /* Is there anybody out there ? */
2576 if (lttng_session_daemon_alive()) {
2577 ret = -EEXIST;
2578 goto end;
2579 }
2580
2581 lockfile_fd = create_lockfile();
2582 if (lockfile_fd < 0) {
2583 ret = -EEXIST;
2584 goto end;
2585 }
2586 end:
2587 return ret;
2588 }
2589
2590 static void sessiond_cleanup_lock_file(void)
2591 {
2592 int ret;
2593
2594 /*
2595 * Cleanup lock file by deleting it and finaly closing it which will
2596 * release the file system lock.
2597 */
2598 if (lockfile_fd >= 0) {
2599 ret = remove(config.lock_file_path.value);
2600 if (ret < 0) {
2601 PERROR("remove lock file");
2602 }
2603 ret = close(lockfile_fd);
2604 if (ret < 0) {
2605 PERROR("close lock file");
2606 }
2607 }
2608 }
2609
2610 /*
2611 * Set the tracing group gid onto the client socket.
2612 *
2613 * Race window between mkdir and chown is OK because we are going from more
2614 * permissive (root.root) to less permissive (root.tracing).
2615 */
2616 static int set_permissions(char *rundir)
2617 {
2618 int ret;
2619 gid_t gid;
2620
2621 gid = utils_get_group_id(config.tracing_group_name.value);
2622
2623 /* Set lttng run dir */
2624 ret = chown(rundir, 0, gid);
2625 if (ret < 0) {
2626 ERR("Unable to set group on %s", rundir);
2627 PERROR("chown");
2628 }
2629
2630 /*
2631 * Ensure all applications and tracing group can search the run
2632 * dir. Allow everyone to read the directory, since it does not
2633 * buy us anything to hide its content.
2634 */
2635 ret = chmod(rundir, S_IRWXU | S_IRGRP | S_IXGRP | S_IROTH | S_IXOTH);
2636 if (ret < 0) {
2637 ERR("Unable to set permissions on %s", rundir);
2638 PERROR("chmod");
2639 }
2640
2641 /* lttng client socket path */
2642 ret = chown(config.client_unix_sock_path.value, 0, gid);
2643 if (ret < 0) {
2644 ERR("Unable to set group on %s", config.client_unix_sock_path.value);
2645 PERROR("chown");
2646 }
2647
2648 /* kconsumer error socket path */
2649 ret = chown(kconsumer_data.err_unix_sock_path, 0, 0);
2650 if (ret < 0) {
2651 ERR("Unable to set group on %s", kconsumer_data.err_unix_sock_path);
2652 PERROR("chown");
2653 }
2654
2655 /* 64-bit ustconsumer error socket path */
2656 ret = chown(ustconsumer64_data.err_unix_sock_path, 0, 0);
2657 if (ret < 0) {
2658 ERR("Unable to set group on %s", ustconsumer64_data.err_unix_sock_path);
2659 PERROR("chown");
2660 }
2661
2662 /* 32-bit ustconsumer compat32 error socket path */
2663 ret = chown(ustconsumer32_data.err_unix_sock_path, 0, 0);
2664 if (ret < 0) {
2665 ERR("Unable to set group on %s", ustconsumer32_data.err_unix_sock_path);
2666 PERROR("chown");
2667 }
2668
2669 DBG("All permissions are set");
2670
2671 return ret;
2672 }
2673
2674 /*
2675 * Create the lttng run directory needed for all global sockets and pipe.
2676 */
2677 static int create_lttng_rundir(void)
2678 {
2679 int ret;
2680
2681 DBG3("Creating LTTng run directory: %s", config.rundir.value);
2682
2683 ret = mkdir(config.rundir.value, S_IRWXU);
2684 if (ret < 0) {
2685 if (errno != EEXIST) {
2686 ERR("Unable to create %s", config.rundir.value);
2687 goto error;
2688 } else {
2689 ret = 0;
2690 }
2691 }
2692
2693 error:
2694 return ret;
2695 }
2696
2697 /*
2698 * Setup sockets and directory needed by the consumerds' communication with the
2699 * session daemon.
2700 */
2701 static int set_consumer_sockets(struct consumer_data *consumer_data)
2702 {
2703 int ret;
2704 char *path = NULL;
2705
2706 switch (consumer_data->type) {
2707 case LTTNG_CONSUMER_KERNEL:
2708 path = config.kconsumerd_path.value;
2709 break;
2710 case LTTNG_CONSUMER64_UST:
2711 path = config.consumerd64_path.value;
2712 break;
2713 case LTTNG_CONSUMER32_UST:
2714 path = config.consumerd32_path.value;
2715 break;
2716 default:
2717 ERR("Consumer type unknown");
2718 ret = -EINVAL;
2719 goto error;
2720 }
2721 assert(path);
2722
2723 DBG2("Creating consumer directory: %s", path);
2724
2725 ret = mkdir(path, S_IRWXU | S_IRGRP | S_IXGRP);
2726 if (ret < 0 && errno != EEXIST) {
2727 PERROR("mkdir");
2728 ERR("Failed to create %s", path);
2729 goto error;
2730 }
2731 if (is_root) {
2732 ret = chown(path, 0, utils_get_group_id(config.tracing_group_name.value));
2733 if (ret < 0) {
2734 ERR("Unable to set group on %s", path);
2735 PERROR("chown");
2736 goto error;
2737 }
2738 }
2739
2740 /* Create the consumerd error unix socket */
2741 consumer_data->err_sock =
2742 lttcomm_create_unix_sock(consumer_data->err_unix_sock_path);
2743 if (consumer_data->err_sock < 0) {
2744 ERR("Create unix sock failed: %s", consumer_data->err_unix_sock_path);
2745 ret = -1;
2746 goto error;
2747 }
2748
2749 /*
2750 * Set the CLOEXEC flag. Return code is useless because either way, the
2751 * show must go on.
2752 */
2753 ret = utils_set_fd_cloexec(consumer_data->err_sock);
2754 if (ret < 0) {
2755 PERROR("utils_set_fd_cloexec");
2756 /* continue anyway */
2757 }
2758
2759 /* File permission MUST be 660 */
2760 ret = chmod(consumer_data->err_unix_sock_path,
2761 S_IRUSR | S_IWUSR | S_IRGRP | S_IWGRP);
2762 if (ret < 0) {
2763 ERR("Set file permissions failed: %s", consumer_data->err_unix_sock_path);
2764 PERROR("chmod");
2765 goto error;
2766 }
2767
2768 error:
2769 return ret;
2770 }
2771
2772 /*
2773 * Signal handler for the daemon
2774 *
2775 * Simply stop all worker threads, leaving main() return gracefully after
2776 * joining all threads and calling cleanup().
2777 */
2778 static void sighandler(int sig)
2779 {
2780 switch (sig) {
2781 case SIGINT:
2782 DBG("SIGINT caught");
2783 stop_threads();
2784 break;
2785 case SIGTERM:
2786 DBG("SIGTERM caught");
2787 stop_threads();
2788 break;
2789 case SIGUSR1:
2790 CMM_STORE_SHARED(recv_child_signal, 1);
2791 break;
2792 default:
2793 break;
2794 }
2795 }
2796
2797 /*
2798 * Setup signal handler for :
2799 * SIGINT, SIGTERM, SIGPIPE
2800 */
2801 static int set_signal_handler(void)
2802 {
2803 int ret = 0;
2804 struct sigaction sa;
2805 sigset_t sigset;
2806
2807 if ((ret = sigemptyset(&sigset)) < 0) {
2808 PERROR("sigemptyset");
2809 return ret;
2810 }
2811
2812 sa.sa_mask = sigset;
2813 sa.sa_flags = 0;
2814
2815 sa.sa_handler = sighandler;
2816 if ((ret = sigaction(SIGTERM, &sa, NULL)) < 0) {
2817 PERROR("sigaction");
2818 return ret;
2819 }
2820
2821 if ((ret = sigaction(SIGINT, &sa, NULL)) < 0) {
2822 PERROR("sigaction");
2823 return ret;
2824 }
2825
2826 if ((ret = sigaction(SIGUSR1, &sa, NULL)) < 0) {
2827 PERROR("sigaction");
2828 return ret;
2829 }
2830
2831 sa.sa_handler = SIG_IGN;
2832 if ((ret = sigaction(SIGPIPE, &sa, NULL)) < 0) {
2833 PERROR("sigaction");
2834 return ret;
2835 }
2836
2837 DBG("Signal handler set for SIGTERM, SIGUSR1, SIGPIPE and SIGINT");
2838
2839 return ret;
2840 }
2841
2842 /*
2843 * Set open files limit to unlimited. This daemon can open a large number of
2844 * file descriptors in order to consume multiple kernel traces.
2845 */
2846 static void set_ulimit(void)
2847 {
2848 int ret;
2849 struct rlimit lim;
2850
2851 /* The kernel does not allow an infinite limit for open files */
2852 lim.rlim_cur = 65535;
2853 lim.rlim_max = 65535;
2854
2855 ret = setrlimit(RLIMIT_NOFILE, &lim);
2856 if (ret < 0) {
2857 PERROR("failed to set open files limit");
2858 }
2859 }
2860
2861 static int write_pidfile(void)
2862 {
2863 return utils_create_pid_file(getpid(), config.pid_file_path.value);
2864 }
2865
2866 static int set_clock_plugin_env(void)
2867 {
2868 int ret = 0;
2869 char *env_value = NULL;
2870
2871 if (!config.lttng_ust_clock_plugin.value) {
2872 goto end;
2873 }
2874
2875 ret = asprintf(&env_value, "LTTNG_UST_CLOCK_PLUGIN=%s",
2876 config.lttng_ust_clock_plugin.value);
2877 if (ret < 0) {
2878 PERROR("asprintf");
2879 goto end;
2880 }
2881
2882 ret = putenv(env_value);
2883 if (ret) {
2884 free(env_value);
2885 PERROR("putenv of LTTNG_UST_CLOCK_PLUGIN");
2886 goto end;
2887 }
2888
2889 DBG("Updated LTTNG_UST_CLOCK_PLUGIN environment variable to \"%s\"",
2890 config.lttng_ust_clock_plugin.value);
2891 end:
2892 return ret;
2893 }
2894
2895 static void destroy_all_sessions_and_wait(void)
2896 {
2897 struct ltt_session *session, *tmp;
2898 struct ltt_session_list *session_list;
2899
2900 session_list = session_get_list();
2901 DBG("Initiating destruction of all sessions");
2902
2903 if (!session_list) {
2904 return;
2905 }
2906
2907 session_lock_list();
2908 /* Initiate the destruction of all sessions. */
2909 cds_list_for_each_entry_safe(session, tmp,
2910 &session_list->head, list) {
2911 if (!session_get(session)) {
2912 continue;
2913 }
2914
2915 session_lock(session);
2916 if (session->destroyed) {
2917 goto unlock_session;
2918 }
2919 (void) cmd_destroy_session(session,
2920 notification_thread_handle);
2921 unlock_session:
2922 session_unlock(session);
2923 session_put(session);
2924 }
2925 session_unlock_list();
2926
2927 /* Wait for the destruction of all sessions to complete. */
2928 DBG("Waiting for the destruction of all sessions to complete");
2929 session_list_wait_empty();
2930 DBG("Destruction of all sessions completed");
2931 }
2932
2933 /*
2934 * main
2935 */
2936 int main(int argc, char **argv)
2937 {
2938 int ret = 0, retval = 0;
2939 void *status;
2940 const char *env_app_timeout;
2941 struct lttng_pipe *ust32_channel_monitor_pipe = NULL,
2942 *ust64_channel_monitor_pipe = NULL,
2943 *kernel_channel_monitor_pipe = NULL;
2944 struct lttng_thread *ht_cleanup_thread = NULL;
2945 struct timer_thread_parameters timer_thread_parameters;
2946 /* Rotation thread handle. */
2947 struct rotation_thread_handle *rotation_thread_handle = NULL;
2948 /* Queue of rotation jobs populated by the sessiond-timer. */
2949 struct rotation_thread_timer_queue *rotation_timer_queue = NULL;
2950 struct lttng_thread *client_thread = NULL;
2951
2952 init_kernel_workarounds();
2953
2954 rcu_register_thread();
2955
2956 if (set_signal_handler()) {
2957 retval = -1;
2958 goto exit_set_signal_handler;
2959 }
2960
2961 if (timer_signal_init()) {
2962 retval = -1;
2963 goto exit_set_signal_handler;
2964 }
2965
2966 page_size = sysconf(_SC_PAGESIZE);
2967 if (page_size < 0) {
2968 PERROR("sysconf _SC_PAGESIZE");
2969 page_size = LONG_MAX;
2970 WARN("Fallback page size to %ld", page_size);
2971 }
2972
2973 ret = sessiond_config_init(&config);
2974 if (ret) {
2975 retval = -1;
2976 goto exit_set_signal_handler;
2977 }
2978
2979 /*
2980 * Init config from environment variables.
2981 * Command line option override env configuration per-doc. Do env first.
2982 */
2983 sessiond_config_apply_env_config(&config);
2984
2985 /*
2986 * Parse arguments and load the daemon configuration file.
2987 *
2988 * We have an exit_options exit path to free memory reserved by
2989 * set_options. This is needed because the rest of sessiond_cleanup()
2990 * depends on ht_cleanup_thread, which depends on lttng_daemonize, which
2991 * depends on set_options.
2992 */
2993 progname = argv[0];
2994 if (set_options(argc, argv)) {
2995 retval = -1;
2996 goto exit_options;
2997 }
2998
2999 /*
3000 * Resolve all paths received as arguments, configuration option, or
3001 * through environment variable as absolute paths. This is necessary
3002 * since daemonizing causes the sessiond's current working directory
3003 * to '/'.
3004 */
3005 ret = sessiond_config_resolve_paths(&config);
3006 if (ret) {
3007 goto exit_options;
3008 }
3009
3010 /* Apply config. */
3011 lttng_opt_verbose = config.verbose;
3012 lttng_opt_quiet = config.quiet;
3013 kconsumer_data.err_unix_sock_path =
3014 config.kconsumerd_err_unix_sock_path.value;
3015 kconsumer_data.cmd_unix_sock_path =
3016 config.kconsumerd_cmd_unix_sock_path.value;
3017 ustconsumer32_data.err_unix_sock_path =
3018 config.consumerd32_err_unix_sock_path.value;
3019 ustconsumer32_data.cmd_unix_sock_path =
3020 config.consumerd32_cmd_unix_sock_path.value;
3021 ustconsumer64_data.err_unix_sock_path =
3022 config.consumerd64_err_unix_sock_path.value;
3023 ustconsumer64_data.cmd_unix_sock_path =
3024 config.consumerd64_cmd_unix_sock_path.value;
3025 set_clock_plugin_env();
3026
3027 sessiond_config_log(&config);
3028
3029 if (create_lttng_rundir()) {
3030 retval = -1;
3031 goto exit_options;
3032 }
3033
3034 /* Abort launch if a session daemon is already running. */
3035 if (check_existing_daemon()) {
3036 ERR("A session daemon is already running.");
3037 retval = -1;
3038 goto exit_options;
3039 }
3040
3041 /* Daemonize */
3042 if (config.daemonize || config.background) {
3043 int i;
3044
3045 ret = lttng_daemonize(&child_ppid, &recv_child_signal,
3046 !config.background);
3047 if (ret < 0) {
3048 retval = -1;
3049 goto exit_options;
3050 }
3051
3052 /*
3053 * We are in the child. Make sure all other file descriptors are
3054 * closed, in case we are called with more opened file
3055 * descriptors than the standard ones and the lock file.
3056 */
3057 for (i = 3; i < sysconf(_SC_OPEN_MAX); i++) {
3058 if (i == lockfile_fd) {
3059 continue;
3060 }
3061 (void) close(i);
3062 }
3063 }
3064
3065 if (run_as_create_worker(argv[0]) < 0) {
3066 goto exit_create_run_as_worker_cleanup;
3067 }
3068
3069 /*
3070 * Starting from here, we can create threads. This needs to be after
3071 * lttng_daemonize due to RCU.
3072 */
3073
3074 /*
3075 * Initialize the health check subsystem. This call should set the
3076 * appropriate time values.
3077 */
3078 health_sessiond = health_app_create(NR_HEALTH_SESSIOND_TYPES);
3079 if (!health_sessiond) {
3080 PERROR("health_app_create error");
3081 retval = -1;
3082 goto exit_health_sessiond_cleanup;
3083 }
3084
3085 /* Create thread to clean up RCU hash tables */
3086 ht_cleanup_thread = launch_ht_cleanup_thread();
3087 if (!ht_cleanup_thread) {
3088 retval = -1;
3089 goto exit_ht_cleanup;
3090 }
3091
3092 /* Create thread quit pipe */
3093 if (sessiond_init_thread_quit_pipe()) {
3094 retval = -1;
3095 goto exit_init_data;
3096 }
3097
3098 /* Check if daemon is UID = 0 */
3099 is_root = !getuid();
3100 if (is_root) {
3101 /* Create global run dir with root access */
3102
3103 kernel_channel_monitor_pipe = lttng_pipe_open(0);
3104 if (!kernel_channel_monitor_pipe) {
3105 ERR("Failed to create kernel consumer channel monitor pipe");
3106 retval = -1;
3107 goto exit_init_data;
3108 }
3109 kconsumer_data.channel_monitor_pipe =
3110 lttng_pipe_release_writefd(
3111 kernel_channel_monitor_pipe);
3112 if (kconsumer_data.channel_monitor_pipe < 0) {
3113 retval = -1;
3114 goto exit_init_data;
3115 }
3116 }
3117
3118 /* Set consumer initial state */
3119 kernel_consumerd_state = CONSUMER_STOPPED;
3120 ust_consumerd_state = CONSUMER_STOPPED;
3121
3122 ust32_channel_monitor_pipe = lttng_pipe_open(0);
3123 if (!ust32_channel_monitor_pipe) {
3124 ERR("Failed to create 32-bit user space consumer channel monitor pipe");
3125 retval = -1;
3126 goto exit_init_data;
3127 }
3128 ustconsumer32_data.channel_monitor_pipe = lttng_pipe_release_writefd(
3129 ust32_channel_monitor_pipe);
3130 if (ustconsumer32_data.channel_monitor_pipe < 0) {
3131 retval = -1;
3132 goto exit_init_data;
3133 }
3134
3135 /*
3136 * The rotation_thread_timer_queue structure is shared between the
3137 * sessiond timer thread and the rotation thread. The main thread keeps
3138 * its ownership and destroys it when both threads have been joined.
3139 */
3140 rotation_timer_queue = rotation_thread_timer_queue_create();
3141 if (!rotation_timer_queue) {
3142 retval = -1;
3143 goto exit_init_data;
3144 }
3145 timer_thread_parameters.rotation_thread_job_queue =
3146 rotation_timer_queue;
3147
3148 ust64_channel_monitor_pipe = lttng_pipe_open(0);
3149 if (!ust64_channel_monitor_pipe) {
3150 ERR("Failed to create 64-bit user space consumer channel monitor pipe");
3151 retval = -1;
3152 goto exit_init_data;
3153 }
3154 ustconsumer64_data.channel_monitor_pipe = lttng_pipe_release_writefd(
3155 ust64_channel_monitor_pipe);
3156 if (ustconsumer64_data.channel_monitor_pipe < 0) {
3157 retval = -1;
3158 goto exit_init_data;
3159 }
3160
3161 /*
3162 * Init UST app hash table. Alloc hash table before this point since
3163 * cleanup() can get called after that point.
3164 */
3165 if (ust_app_ht_alloc()) {
3166 ERR("Failed to allocate UST app hash table");
3167 retval = -1;
3168 goto exit_init_data;
3169 }
3170
3171 /*
3172 * Initialize agent app hash table. We allocate the hash table here
3173 * since cleanup() can get called after this point.
3174 */
3175 if (agent_app_ht_alloc()) {
3176 ERR("Failed to allocate Agent app hash table");
3177 retval = -1;
3178 goto exit_init_data;
3179 }
3180
3181 /*
3182 * These actions must be executed as root. We do that *after* setting up
3183 * the sockets path because we MUST make the check for another daemon using
3184 * those paths *before* trying to set the kernel consumer sockets and init
3185 * kernel tracer.
3186 */
3187 if (is_root) {
3188 if (set_consumer_sockets(&kconsumer_data)) {
3189 retval = -1;
3190 goto exit_init_data;
3191 }
3192
3193 /* Setup kernel tracer */
3194 if (!config.no_kernel) {
3195 init_kernel_tracer();
3196 if (kernel_tracer_fd >= 0) {
3197 ret = syscall_init_table();
3198 if (ret < 0) {
3199 ERR("Unable to populate syscall table. "
3200 "Syscall tracing won't work "
3201 "for this session daemon.");
3202 }
3203 }
3204 }
3205
3206 /* Set ulimit for open files */
3207 set_ulimit();
3208 }
3209 /* init lttng_fd tracking must be done after set_ulimit. */
3210 lttng_fd_init();
3211
3212 if (set_consumer_sockets(&ustconsumer64_data)) {
3213 retval = -1;
3214 goto exit_init_data;
3215 }
3216
3217 if (set_consumer_sockets(&ustconsumer32_data)) {
3218 retval = -1;
3219 goto exit_init_data;
3220 }
3221
3222 /* Setup the needed unix socket */
3223 if (init_daemon_socket()) {
3224 retval = -1;
3225 goto exit_init_data;
3226 }
3227
3228 /* Set credentials to socket */
3229 if (is_root && set_permissions(config.rundir.value)) {
3230 retval = -1;
3231 goto exit_init_data;
3232 }
3233
3234 /* Get parent pid if -S, --sig-parent is specified. */
3235 if (config.sig_parent) {
3236 ppid = getppid();
3237 }
3238
3239 /* Setup the kernel pipe for waking up the kernel thread */
3240 if (is_root && !config.no_kernel) {
3241 if (utils_create_pipe_cloexec(kernel_poll_pipe)) {
3242 retval = -1;
3243 goto exit_init_data;
3244 }
3245 }
3246
3247 /* Setup the thread apps communication pipe. */
3248 if (utils_create_pipe_cloexec(apps_cmd_pipe)) {
3249 retval = -1;
3250 goto exit_init_data;
3251 }
3252
3253 /* Setup the thread apps notify communication pipe. */
3254 if (utils_create_pipe_cloexec(apps_cmd_notify_pipe)) {
3255 retval = -1;
3256 goto exit_init_data;
3257 }
3258
3259 /* Initialize global buffer per UID and PID registry. */
3260 buffer_reg_init_uid_registry();
3261 buffer_reg_init_pid_registry();
3262
3263 /* Init UST command queue. */
3264 cds_wfcq_init(&ust_cmd_queue.head, &ust_cmd_queue.tail);
3265
3266 cmd_init();
3267
3268 /* Check for the application socket timeout env variable. */
3269 env_app_timeout = getenv(DEFAULT_APP_SOCKET_TIMEOUT_ENV);
3270 if (env_app_timeout) {
3271 config.app_socket_timeout = atoi(env_app_timeout);
3272 } else {
3273 config.app_socket_timeout = DEFAULT_APP_SOCKET_RW_TIMEOUT;
3274 }
3275
3276 ret = write_pidfile();
3277 if (ret) {
3278 ERR("Error in write_pidfile");
3279 retval = -1;
3280 goto exit_init_data;
3281 }
3282
3283 /* Initialize communication library */
3284 lttcomm_init();
3285 /* Initialize TCP timeout values */
3286 lttcomm_inet_init();
3287
3288 if (load_session_init_data(&load_info) < 0) {
3289 retval = -1;
3290 goto exit_init_data;
3291 }
3292 load_info->path = config.load_session_path.value;
3293
3294 /* Create health-check thread. */
3295 if (!launch_health_management_thread()) {
3296 retval = -1;
3297 goto exit_health;
3298 }
3299
3300 /* notification_thread_data acquires the pipes' read side. */
3301 notification_thread_handle = notification_thread_handle_create(
3302 ust32_channel_monitor_pipe,
3303 ust64_channel_monitor_pipe,
3304 kernel_channel_monitor_pipe);
3305 if (!notification_thread_handle) {
3306 retval = -1;
3307 ERR("Failed to create notification thread shared data");
3308 goto exit_notification;
3309 }
3310
3311 /* Create notification thread. */
3312 if (!launch_notification_thread(notification_thread_handle)) {
3313 retval = -1;
3314 goto exit_notification;
3315 }
3316
3317 /* Create timer thread. */
3318 if (!launch_timer_thread(&timer_thread_parameters)) {
3319 retval = -1;
3320 goto exit_notification;
3321 }
3322
3323 /* rotation_thread_data acquires the pipes' read side. */
3324 rotation_thread_handle = rotation_thread_handle_create(
3325 rotation_timer_queue,
3326 notification_thread_handle);
3327 if (!rotation_thread_handle) {
3328 retval = -1;
3329 ERR("Failed to create rotation thread shared data");
3330 stop_threads();
3331 goto exit_rotation;
3332 }
3333
3334 /* Create rotation thread. */
3335 if (!launch_rotation_thread(rotation_thread_handle)) {
3336 retval = -1;
3337 goto exit_rotation;
3338 }
3339
3340 /* Create thread to manage the client socket */
3341 client_thread = launch_client_thread();
3342 if (!client_thread) {
3343 retval = -1;
3344 goto exit_client;
3345 }
3346
3347 /* Create thread to dispatch registration */
3348 ret = pthread_create(&dispatch_thread, default_pthread_attr(),
3349 thread_dispatch_ust_registration, (void *) NULL);
3350 if (ret) {
3351 errno = ret;
3352 PERROR("pthread_create dispatch");
3353 retval = -1;
3354 stop_threads();
3355 goto exit_dispatch;
3356 }
3357
3358 /* Create thread to manage application registration. */
3359 ret = pthread_create(&reg_apps_thread, default_pthread_attr(),
3360 thread_registration_apps, (void *) NULL);
3361 if (ret) {
3362 errno = ret;
3363 PERROR("pthread_create registration");
3364 retval = -1;
3365 stop_threads();
3366 goto exit_reg_apps;
3367 }
3368
3369 /* Create thread to manage application socket */
3370 ret = pthread_create(&apps_thread, default_pthread_attr(),
3371 thread_manage_apps, (void *) NULL);
3372 if (ret) {
3373 errno = ret;
3374 PERROR("pthread_create apps");
3375 retval = -1;
3376 stop_threads();
3377 goto exit_apps;
3378 }
3379
3380 /* Create thread to manage application notify socket */
3381 ret = pthread_create(&apps_notify_thread, default_pthread_attr(),
3382 ust_thread_manage_notify, (void *) NULL);
3383 if (ret) {
3384 errno = ret;
3385 PERROR("pthread_create notify");
3386 retval = -1;
3387 stop_threads();
3388 goto exit_apps_notify;
3389 }
3390
3391 /* Create agent registration thread. */
3392 ret = pthread_create(&agent_reg_thread, default_pthread_attr(),
3393 agent_thread_manage_registration, (void *) NULL);
3394 if (ret) {
3395 errno = ret;
3396 PERROR("pthread_create agent");
3397 retval = -1;
3398 stop_threads();
3399 goto exit_agent_reg;
3400 }
3401
3402 /* Don't start this thread if kernel tracing is not requested nor root */
3403 if (is_root && !config.no_kernel) {
3404 /* Create kernel thread to manage kernel event */
3405 ret = pthread_create(&kernel_thread, default_pthread_attr(),
3406 thread_manage_kernel, (void *) NULL);
3407 if (ret) {
3408 errno = ret;
3409 PERROR("pthread_create kernel");
3410 retval = -1;
3411 stop_threads();
3412 goto exit_kernel;
3413 }
3414 }
3415
3416 /* Create session loading thread. */
3417 ret = pthread_create(&load_session_thread, default_pthread_attr(),
3418 thread_load_session, load_info);
3419 if (ret) {
3420 errno = ret;
3421 PERROR("pthread_create load_session_thread");
3422 retval = -1;
3423 stop_threads();
3424 goto exit_load_session;
3425 }
3426
3427 /*
3428 * This is where we start awaiting program completion (e.g. through
3429 * signal that asks threads to teardown).
3430 */
3431
3432 ret = pthread_join(load_session_thread, &status);
3433 if (ret) {
3434 errno = ret;
3435 PERROR("pthread_join load_session_thread");
3436 retval = -1;
3437 }
3438
3439 /* Initiate teardown once activity occurs on the quit pipe. */
3440 sessiond_wait_for_quit_pipe(-1U);
3441
3442 /*
3443 * Ensure that the client thread is no longer accepting new commands,
3444 * which could cause new sessions to be created.
3445 */
3446 if (!lttng_thread_shutdown(client_thread)) {
3447 ERR("Failed to shutdown the client thread, continuing teardown");
3448 lttng_thread_put(client_thread);
3449 client_thread = NULL;
3450 }
3451
3452 destroy_all_sessions_and_wait();
3453 exit_load_session:
3454
3455 if (is_root && !config.no_kernel) {
3456 ret = pthread_join(kernel_thread, &status);
3457 if (ret) {
3458 errno = ret;
3459 PERROR("pthread_join");
3460 retval = -1;
3461 }
3462 }
3463 exit_kernel:
3464
3465 ret = pthread_join(agent_reg_thread, &status);
3466 if (ret) {
3467 errno = ret;
3468 PERROR("pthread_join agent");
3469 retval = -1;
3470 }
3471 exit_agent_reg:
3472
3473 ret = pthread_join(apps_notify_thread, &status);
3474 if (ret) {
3475 errno = ret;
3476 PERROR("pthread_join apps notify");
3477 retval = -1;
3478 }
3479 exit_apps_notify:
3480
3481 ret = pthread_join(apps_thread, &status);
3482 if (ret) {
3483 errno = ret;
3484 PERROR("pthread_join apps");
3485 retval = -1;
3486 }
3487 exit_apps:
3488
3489 ret = pthread_join(reg_apps_thread, &status);
3490 if (ret) {
3491 errno = ret;
3492 PERROR("pthread_join");
3493 retval = -1;
3494 }
3495 exit_reg_apps:
3496
3497 /*
3498 * Join dispatch thread after joining reg_apps_thread to ensure
3499 * we don't leak applications in the queue.
3500 */
3501 ret = pthread_join(dispatch_thread, &status);
3502 if (ret) {
3503 errno = ret;
3504 PERROR("pthread_join");
3505 retval = -1;
3506 }
3507 exit_dispatch:
3508 exit_client:
3509 exit_rotation:
3510 exit_notification:
3511 lttng_thread_list_shutdown_orphans();
3512 exit_health:
3513 exit_init_data:
3514 if (client_thread) {
3515 lttng_thread_put(client_thread);
3516 }
3517
3518 /*
3519 * Wait for all pending call_rcu work to complete before tearing
3520 * down data structures. call_rcu worker may be trying to
3521 * perform lookups in those structures.
3522 */
3523 rcu_barrier();
3524 /*
3525 * sessiond_cleanup() is called when no other thread is running, except
3526 * the ht_cleanup thread, which is needed to destroy the hash tables.
3527 */
3528 rcu_thread_online();
3529 sessiond_cleanup();
3530
3531 /*
3532 * Ensure all prior call_rcu are done. call_rcu callbacks may push
3533 * hash tables to the ht_cleanup thread. Therefore, we ensure that
3534 * the queue is empty before shutting down the clean-up thread.
3535 */
3536 rcu_barrier();
3537
3538 if (ht_cleanup_thread) {
3539 lttng_thread_shutdown(ht_cleanup_thread);
3540 lttng_thread_put(ht_cleanup_thread);
3541 }
3542
3543 rcu_thread_offline();
3544 rcu_unregister_thread();
3545
3546 if (rotation_thread_handle) {
3547 rotation_thread_handle_destroy(rotation_thread_handle);
3548 }
3549
3550 /*
3551 * After the rotation and timer thread have quit, we can safely destroy
3552 * the rotation_timer_queue.
3553 */
3554 rotation_thread_timer_queue_destroy(rotation_timer_queue);
3555 /*
3556 * The teardown of the notification system is performed after the
3557 * session daemon's teardown in order to allow it to be notified
3558 * of the active session and channels at the moment of the teardown.
3559 */
3560 if (notification_thread_handle) {
3561 notification_thread_handle_destroy(notification_thread_handle);
3562 }
3563 lttng_pipe_destroy(ust32_channel_monitor_pipe);
3564 lttng_pipe_destroy(ust64_channel_monitor_pipe);
3565 lttng_pipe_destroy(kernel_channel_monitor_pipe);
3566 exit_ht_cleanup:
3567
3568 health_app_destroy(health_sessiond);
3569 exit_health_sessiond_cleanup:
3570 exit_create_run_as_worker_cleanup:
3571
3572 exit_options:
3573 sessiond_cleanup_lock_file();
3574 sessiond_cleanup_options();
3575
3576 exit_set_signal_handler:
3577 if (!retval) {
3578 exit(EXIT_SUCCESS);
3579 } else {
3580 exit(EXIT_FAILURE);
3581 }
3582 }
This page took 0.146927 seconds and 4 git commands to generate.