Fix: health subsystem issues with shared code
[lttng-tools.git] / src / bin / lttng-sessiond / main.c
index 076aac54b1824659fc1ba88f195795084b334856..d999928feb8a0f06bfe24857fc3182f56c4f903f 100644 (file)
@@ -2,18 +2,18 @@
  * Copyright (C) 2011 - David Goulet <david.goulet@polymtl.ca>
  *                      Mathieu Desnoyers <mathieu.desnoyers@efficios.com>
  *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms of the GNU General Public License as published by the Free
- * Software Foundation; only version 2 of the License.
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License, version 2 only,
+ * as published by the Free Software Foundation.
  *
- * This program is distributed in the hope that it will be useful, but WITHOUT
- * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
- * more details.
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
  *
- * You should have received a copy of the GNU General Public License along with
- * this program; if not, write to the Free Software Foundation, Inc., 59 Temple
- * Place - Suite 330, Boston, MA  02111-1307, USA.
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, write to the Free Software Foundation, Inc.,
+ * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
  */
 
 #define _GNU_SOURCE
@@ -21,7 +21,6 @@
 #include <grp.h>
 #include <limits.h>
 #include <pthread.h>
-#include <semaphore.h>
 #include <signal.h>
 #include <stdio.h>
 #include <stdlib.h>
@@ -33,7 +32,7 @@
 #include <sys/stat.h>
 #include <sys/types.h>
 #include <sys/wait.h>
-#include <urcu/futex.h>
+#include <urcu/uatomic.h>
 #include <unistd.h>
 #include <config.h>
 
 #include <common/compat/socket.h>
 #include <common/defaults.h>
 #include <common/kernel-consumer/kernel-consumer.h>
-#include <common/ust-consumer/ust-consumer.h>
+#include <common/futex.h>
+#include <common/relayd/relayd.h>
+#include <common/utils.h>
 
 #include "lttng-sessiond.h"
 #include "channel.h"
+#include "cmd.h"
+#include "consumer.h"
 #include "context.h"
 #include "event.h"
-#include "futex.h"
 #include "kernel.h"
+#include "kernel-consumer.h"
 #include "modprobe.h"
 #include "shm.h"
 #include "ust-ctl.h"
+#include "ust-consumer.h"
 #include "utils.h"
+#include "fd-limit.h"
+#include "health.h"
+#include "testpoint.h"
 
 #define CONSUMERD_FILE "lttng-consumerd"
 
-struct consumer_data {
-       enum lttng_consumer_type type;
-
-       pthread_t thread;       /* Worker thread interacting with the consumer */
-       sem_t sem;
-
-       /* Mutex to control consumerd pid assignation */
-       pthread_mutex_t pid_mutex;
-       pid_t pid;
-
-       int err_sock;
-       int cmd_sock;
-
-       /* consumer error and command Unix socket path */
-       char err_unix_sock_path[PATH_MAX];
-       char cmd_unix_sock_path[PATH_MAX];
-};
-
 /* Const values */
 const char default_home_dir[] = DEFAULT_HOME_DIR;
 const char default_tracing_group[] = DEFAULT_TRACING_GROUP;
@@ -83,6 +72,7 @@ const char default_global_apps_pipe[] = DEFAULT_GLOBAL_APPS_PIPE;
 
 const char *progname;
 const char *opt_tracing_group;
+static const char *opt_pidfile;
 static int opt_sig_parent;
 static int opt_verbose_consumer;
 static int opt_daemon;
@@ -91,13 +81,20 @@ static int is_root;                 /* Set to 1 if the daemon is running as root */
 static pid_t ppid;          /* Parent PID for --sig-parent option */
 static char *rundir;
 
-/* Consumer daemon specific control data */
+/*
+ * Consumer daemon specific control data. Every value not initialized here is
+ * set to 0 by the static definition.
+ */
 static struct consumer_data kconsumer_data = {
        .type = LTTNG_CONSUMER_KERNEL,
        .err_unix_sock_path = DEFAULT_KCONSUMERD_ERR_SOCK_PATH,
        .cmd_unix_sock_path = DEFAULT_KCONSUMERD_CMD_SOCK_PATH,
        .err_sock = -1,
        .cmd_sock = -1,
+       .pid_mutex = PTHREAD_MUTEX_INITIALIZER,
+       .lock = PTHREAD_MUTEX_INITIALIZER,
+       .cond = PTHREAD_COND_INITIALIZER,
+       .cond_mutex = PTHREAD_MUTEX_INITIALIZER,
 };
 static struct consumer_data ustconsumer64_data = {
        .type = LTTNG_CONSUMER64_UST,
@@ -105,6 +102,10 @@ static struct consumer_data ustconsumer64_data = {
        .cmd_unix_sock_path = DEFAULT_USTCONSUMERD64_CMD_SOCK_PATH,
        .err_sock = -1,
        .cmd_sock = -1,
+       .pid_mutex = PTHREAD_MUTEX_INITIALIZER,
+       .lock = PTHREAD_MUTEX_INITIALIZER,
+       .cond = PTHREAD_COND_INITIALIZER,
+       .cond_mutex = PTHREAD_MUTEX_INITIALIZER,
 };
 static struct consumer_data ustconsumer32_data = {
        .type = LTTNG_CONSUMER32_UST,
@@ -112,8 +113,13 @@ static struct consumer_data ustconsumer32_data = {
        .cmd_unix_sock_path = DEFAULT_USTCONSUMERD32_CMD_SOCK_PATH,
        .err_sock = -1,
        .cmd_sock = -1,
+       .pid_mutex = PTHREAD_MUTEX_INITIALIZER,
+       .lock = PTHREAD_MUTEX_INITIALIZER,
+       .cond = PTHREAD_COND_INITIALIZER,
+       .cond_mutex = PTHREAD_MUTEX_INITIALIZER,
 };
 
+/* Shared between threads */
 static int dispatch_thread_exit;
 
 /* Global application Unix socket path */
@@ -122,11 +128,13 @@ static char apps_unix_sock_path[PATH_MAX];
 static char client_unix_sock_path[PATH_MAX];
 /* global wait shm path for UST */
 static char wait_shm_path[PATH_MAX];
+/* Global health check unix path */
+static char health_unix_sock_path[PATH_MAX];
 
 /* Sockets and FDs */
 static int client_sock = -1;
 static int apps_sock = -1;
-static int kernel_tracer_fd = -1;
+int kernel_tracer_fd = -1;
 static int kernel_poll_pipe[2] = { -1, -1 };
 
 /*
@@ -147,7 +155,7 @@ static pthread_t reg_apps_thread;
 static pthread_t client_thread;
 static pthread_t kernel_thread;
 static pthread_t dispatch_thread;
-
+static pthread_t health_thread;
 
 /*
  * UST registration command queue. This queue is tied with a futex and uses a N
@@ -178,6 +186,53 @@ static const char *consumerd64_bin = CONFIG_CONSUMERD64_BIN;
 static const char *consumerd32_libdir = CONFIG_CONSUMERD32_LIBDIR;
 static const char *consumerd64_libdir = CONFIG_CONSUMERD64_LIBDIR;
 
+static const char *module_proc_lttng = "/proc/lttng";
+
+/*
+ * Consumer daemon state which is changed when spawning it, killing it or in
+ * case of a fatal error.
+ */
+enum consumerd_state {
+       CONSUMER_STARTED = 1,
+       CONSUMER_STOPPED = 2,
+       CONSUMER_ERROR   = 3,
+};
+
+/*
+ * This consumer daemon state is used to validate if a client command will be
+ * able to reach the consumer. If not, the client is informed. For instance,
+ * doing a "lttng start" when the consumer state is set to ERROR will return an
+ * error to the client.
+ *
+ * The following example shows a possible race condition of this scheme:
+ *
+ * consumer thread error happens
+ *                                    client cmd arrives
+ *                                    client cmd checks state -> still OK
+ * consumer thread exit, sets error
+ *                                    client cmd try to talk to consumer
+ *                                    ...
+ *
+ * However, since the consumer is a different daemon, we have no way of making
+ * sure the command will reach it safely even with this state flag. This is why
+ * we consider that up to the state validation during command processing, the
+ * command is safe. After that, we can not guarantee the correctness of the
+ * client request vis-a-vis the consumer.
+ */
+static enum consumerd_state ust_consumerd_state;
+static enum consumerd_state kernel_consumerd_state;
+
+/* Used for the health monitoring of the session daemon. See health.h */
+struct health_state health_thread_cmd;
+struct health_state health_thread_app_manage;
+struct health_state health_thread_app_reg;
+struct health_state health_thread_kernel;
+
+/*
+ * Socket timeout for receiving and sending in seconds.
+ */
+static int app_socket_timeout;
+
 static
 void setup_consumerd_path(void)
 {
@@ -317,54 +372,6 @@ error:
        return ret;
 }
 
-/*
- * Complete teardown of a kernel session. This free all data structure related
- * to a kernel session and update counter.
- */
-static void teardown_kernel_session(struct ltt_session *session)
-{
-       if (!session->kernel_session) {
-               DBG3("No kernel session when tearing down session");
-               return;
-       }
-
-       DBG("Tearing down kernel session");
-
-       /*
-        * If a custom kernel consumer was registered, close the socket before
-        * tearing down the complete kernel session structure
-        */
-       if (kconsumer_data.cmd_sock >= 0 &&
-                       session->kernel_session->consumer_fd != kconsumer_data.cmd_sock) {
-               lttcomm_close_unix_sock(session->kernel_session->consumer_fd);
-       }
-
-       trace_kernel_destroy_session(session->kernel_session);
-}
-
-/*
- * Complete teardown of all UST sessions. This will free everything on his path
- * and destroy the core essence of all ust sessions :)
- */
-static void teardown_ust_session(struct ltt_session *session)
-{
-       int ret;
-
-       if (!session->ust_session) {
-               DBG3("No UST session when tearing down session");
-               return;
-       }
-
-       DBG("Tearing down UST session(s)");
-
-       ret = ust_app_destroy_trace_all(session->ust_session);
-       if (ret) {
-               ERR("Error in ust_app_destroy_trace_all");
-       }
-
-       trace_ust_destroy_session(session->ust_session);
-}
-
 /*
  * Stop all threads by closing the thread quit pipe.
  */
@@ -380,7 +387,7 @@ static void stop_threads(void)
        }
 
        /* Dispatch thread */
-       dispatch_thread_exit = 1;
+       CMM_STORE_SHARED(dispatch_thread_exit, 1);
        futex_nto1_wake(&ust_cmd_queue.futex);
 }
 
@@ -389,12 +396,26 @@ static void stop_threads(void)
  */
 static void cleanup(void)
 {
-       int ret, i;
-       char *cmd;
+       int ret;
+       char *cmd = NULL;
        struct ltt_session *sess, *stmp;
 
        DBG("Cleaning up");
 
+       /* First thing first, stop all threads */
+       utils_close_pipe(thread_quit_pipe);
+
+       /*
+        * If opt_pidfile is undefined, the default file will be wiped when
+        * removing the rundir.
+        */
+       if (opt_pidfile) {
+               ret = remove(opt_pidfile);
+               if (ret < 0) {
+                       PERROR("remove pidfile %s", opt_pidfile);
+               }
+       }
+
        DBG("Removing %s directory", rundir);
        ret = asprintf(&cmd, "rm -rf %s", rundir);
        if (ret < 0) {
@@ -407,6 +428,7 @@ static void cleanup(void)
                ERR("Unable to clean %s", rundir);
        }
        free(cmd);
+       free(rundir);
 
        DBG("Cleaning up all sessions");
 
@@ -417,17 +439,13 @@ static void cleanup(void)
                /* Cleanup ALL session */
                cds_list_for_each_entry_safe(sess, stmp,
                                &session_list_ptr->head, list) {
-                       teardown_kernel_session(sess);
-                       teardown_ust_session(sess);
-                       free(sess);
+                       cmd_destroy_session(sess, kernel_poll_pipe[1]);
                }
        }
 
        DBG("Closing all UST sockets");
        ust_app_clean_list();
 
-       pthread_mutex_destroy(&kconsumer_data.pid_mutex);
-
        if (is_root && !opt_no_kernel) {
                DBG2("Closing kernel fd");
                if (kernel_tracer_fd >= 0) {
@@ -440,35 +458,6 @@ static void cleanup(void)
                modprobe_remove_lttng_all();
        }
 
-       /*
-        * Closing all pipes used for communication between threads.
-        */
-       for (i = 0; i < 2; i++) {
-               if (kernel_poll_pipe[i] >= 0) {
-                       ret = close(kernel_poll_pipe[i]);
-                       if (ret) {
-                               PERROR("close");
-                       }
-                       
-               }
-       }
-       for (i = 0; i < 2; i++) {
-               if (thread_quit_pipe[i] >= 0) {
-                       ret = close(thread_quit_pipe[i]);
-                       if (ret) {
-                               PERROR("close");
-                       }
-               }
-       }
-       for (i = 0; i < 2; i++) {
-               if (apps_cmd_pipe[i] >= 0) {
-                       ret = close(apps_cmd_pipe[i]);
-                       if (ret) {
-                               PERROR("close");
-                       }
-               }
-       }
-
        /* <fun> */
        DBG("%c[%d;%dm*** assert failed :-) *** ==> %c[%dm%c[%d;%dm"
                        "Matthew, BEET driven development works!%c[%dm",
@@ -484,7 +473,7 @@ static void cleanup(void)
 static int send_unix_sock(int sock, void *buf, size_t len)
 {
        /* Check valid length */
-       if (len <= 0) {
+       if (len == 0) {
                return -1;
        }
 
@@ -509,139 +498,6 @@ static void clean_command_ctx(struct command_ctx **cmd_ctx)
        }
 }
 
-/*
- * Send all stream fds of kernel channel to the consumer.
- */
-static int send_kconsumer_channel_streams(struct consumer_data *consumer_data,
-               int sock, struct ltt_kernel_channel *channel,
-               uid_t uid, gid_t gid)
-{
-       int ret;
-       struct ltt_kernel_stream *stream;
-       struct lttcomm_consumer_msg lkm;
-
-       DBG("Sending streams of channel %s to kernel consumer",
-                       channel->channel->name);
-
-       /* Send channel */
-       lkm.cmd_type = LTTNG_CONSUMER_ADD_CHANNEL;
-       lkm.u.channel.channel_key = channel->fd;
-       lkm.u.channel.max_sb_size = channel->channel->attr.subbuf_size;
-       lkm.u.channel.mmap_len = 0;     /* for kernel */
-       DBG("Sending channel %d to consumer", lkm.u.channel.channel_key);
-       ret = lttcomm_send_unix_sock(sock, &lkm, sizeof(lkm));
-       if (ret < 0) {
-               PERROR("send consumer channel");
-               goto error;
-       }
-
-       /* Send streams */
-       cds_list_for_each_entry(stream, &channel->stream_list.head, list) {
-               if (!stream->fd) {
-                       continue;
-               }
-               lkm.cmd_type = LTTNG_CONSUMER_ADD_STREAM;
-               lkm.u.stream.channel_key = channel->fd;
-               lkm.u.stream.stream_key = stream->fd;
-               lkm.u.stream.state = stream->state;
-               lkm.u.stream.output = channel->channel->attr.output;
-               lkm.u.stream.mmap_len = 0;      /* for kernel */
-               lkm.u.stream.uid = uid;
-               lkm.u.stream.gid = gid;
-               strncpy(lkm.u.stream.path_name, stream->pathname, PATH_MAX - 1);
-               lkm.u.stream.path_name[PATH_MAX - 1] = '\0';
-               DBG("Sending stream %d to consumer", lkm.u.stream.stream_key);
-               ret = lttcomm_send_unix_sock(sock, &lkm, sizeof(lkm));
-               if (ret < 0) {
-                       PERROR("send consumer stream");
-                       goto error;
-               }
-               ret = lttcomm_send_fds_unix_sock(sock, &stream->fd, 1);
-               if (ret < 0) {
-                       PERROR("send consumer stream ancillary data");
-                       goto error;
-               }
-       }
-
-       DBG("consumer channel streams sent");
-
-       return 0;
-
-error:
-       return ret;
-}
-
-/*
- * Send all stream fds of the kernel session to the consumer.
- */
-static int send_kconsumer_session_streams(struct consumer_data *consumer_data,
-               struct ltt_kernel_session *session)
-{
-       int ret;
-       struct ltt_kernel_channel *chan;
-       struct lttcomm_consumer_msg lkm;
-       int sock = session->consumer_fd;
-
-       DBG("Sending metadata stream fd");
-
-       /* Extra protection. It's NOT supposed to be set to -1 at this point */
-       if (session->consumer_fd < 0) {
-               session->consumer_fd = consumer_data->cmd_sock;
-       }
-
-       if (session->metadata_stream_fd >= 0) {
-               /* Send metadata channel fd */
-               lkm.cmd_type = LTTNG_CONSUMER_ADD_CHANNEL;
-               lkm.u.channel.channel_key = session->metadata->fd;
-               lkm.u.channel.max_sb_size = session->metadata->conf->attr.subbuf_size;
-               lkm.u.channel.mmap_len = 0;     /* for kernel */
-               DBG("Sending metadata channel %d to consumer", lkm.u.stream.stream_key);
-               ret = lttcomm_send_unix_sock(sock, &lkm, sizeof(lkm));
-               if (ret < 0) {
-                       PERROR("send consumer channel");
-                       goto error;
-               }
-
-               /* Send metadata stream fd */
-               lkm.cmd_type = LTTNG_CONSUMER_ADD_STREAM;
-               lkm.u.stream.channel_key = session->metadata->fd;
-               lkm.u.stream.stream_key = session->metadata_stream_fd;
-               lkm.u.stream.state = LTTNG_CONSUMER_ACTIVE_STREAM;
-               lkm.u.stream.output = DEFAULT_KERNEL_CHANNEL_OUTPUT;
-               lkm.u.stream.mmap_len = 0;      /* for kernel */
-               lkm.u.stream.uid = session->uid;
-               lkm.u.stream.gid = session->gid;
-               strncpy(lkm.u.stream.path_name, session->metadata->pathname, PATH_MAX - 1);
-               lkm.u.stream.path_name[PATH_MAX - 1] = '\0';
-               DBG("Sending metadata stream %d to consumer", lkm.u.stream.stream_key);
-               ret = lttcomm_send_unix_sock(sock, &lkm, sizeof(lkm));
-               if (ret < 0) {
-                       PERROR("send consumer stream");
-                       goto error;
-               }
-               ret = lttcomm_send_fds_unix_sock(sock, &session->metadata_stream_fd, 1);
-               if (ret < 0) {
-                       PERROR("send consumer stream");
-                       goto error;
-               }
-       }
-
-       cds_list_for_each_entry(chan, &session->channel_list.head, list) {
-               ret = send_kconsumer_channel_streams(consumer_data, sock, chan,
-                               session->uid, session->gid);
-               if (ret < 0) {
-                       goto error;
-               }
-       }
-
-       DBG("consumer fds (metadata and channel streams) sent");
-
-       return 0;
-
-error:
-       return ret;
-}
-
 /*
  * Notify UST applications using the shm mmap futex.
  */
@@ -751,6 +607,7 @@ static int update_kernel_stream(struct consumer_data *consumer_data, int fd)
 {
        int ret = 0;
        struct ltt_session *session;
+       struct ltt_kernel_session *ksess;
        struct ltt_kernel_channel *channel;
 
        DBG("Updating kernel streams for channel fd %d", fd);
@@ -762,14 +619,9 @@ static int update_kernel_stream(struct consumer_data *consumer_data, int fd)
                        session_unlock(session);
                        continue;
                }
+               ksess = session->kernel_session;
 
-               /* This is not suppose to be -1 but this is an extra security check */
-               if (session->kernel_session->consumer_fd < 0) {
-                       session->kernel_session->consumer_fd = consumer_data->cmd_sock;
-               }
-
-               cds_list_for_each_entry(channel,
-                               &session->kernel_session->channel_list.head, list) {
+               cds_list_for_each_entry(channel, &ksess->channel_list.head, list) {
                        if (channel->fd == fd) {
                                DBG("Channel found, updating kernel streams");
                                ret = kernel_open_channel_stream(channel);
@@ -782,13 +634,26 @@ static int update_kernel_stream(struct consumer_data *consumer_data, int fd)
                                 * that tracing is started so it is safe to send our updated
                                 * stream fds.
                                 */
-                               if (session->kernel_session->consumer_fds_sent == 1) {
-                                       ret = send_kconsumer_channel_streams(consumer_data,
-                                                       session->kernel_session->consumer_fd, channel,
-                                                       session->uid, session->gid);
-                                       if (ret < 0) {
-                                               goto error;
+                               if (ksess->consumer_fds_sent == 1 && ksess->consumer != NULL) {
+                                       struct lttng_ht_iter iter;
+                                       struct consumer_socket *socket;
+
+                                       rcu_read_lock();
+                                       cds_lfht_for_each_entry(ksess->consumer->socks->ht,
+                                                       &iter.iter, socket, node.node) {
+                                               /* Code flow error */
+                                               assert(socket->fd >= 0);
+
+                                               pthread_mutex_lock(socket->lock);
+                                               ret = kernel_consumer_send_channel_stream(socket,
+                                                               channel, ksess);
+                                               pthread_mutex_unlock(socket->lock);
+                                               if (ret < 0) {
+                                                       rcu_read_unlock();
+                                                       goto error;
+                                               }
                                        }
+                                       rcu_read_unlock();
                                }
                                goto error;
                        }
@@ -833,31 +698,49 @@ static void update_ust_app(int app_sock)
  */
 static void *thread_manage_kernel(void *data)
 {
-       int ret, i, pollfd, update_poll_flag = 1;
+       int ret, i, pollfd, update_poll_flag = 1, err = -1;
        uint32_t revents, nb_fd;
        char tmp;
        struct lttng_poll_event events;
 
-       DBG("Thread manage kernel started");
+       DBG("[thread] Thread manage kernel started");
 
-       ret = create_thread_poll_set(&events, 2);
-       if (ret < 0) {
-               goto error_poll_create;
+       health_register(HEALTH_TYPE_KERNEL);
+
+       /*
+        * This first step of the while is to clean this structure which could free
+        * non NULL pointers so zero it before the loop.
+        */
+       memset(&events, 0, sizeof(events));
+
+       if (testpoint(thread_manage_kernel)) {
+               goto error_testpoint;
        }
 
-       ret = lttng_poll_add(&events, kernel_poll_pipe[0], LPOLLIN);
-       if (ret < 0) {
-               goto error;
+       health_code_update(&health_thread_kernel);
+
+       if (testpoint(thread_manage_kernel_before_loop)) {
+               goto error_testpoint;
        }
 
        while (1) {
+               health_code_update(&health_thread_kernel);
+
                if (update_poll_flag == 1) {
-                       /*
-                        * Reset number of fd in the poll set. Always 2 since there is the thread
-                        * quit pipe and the kernel pipe.
-                        */
-                       events.nb_fd = 2;
+                       /* Clean events object. We are about to populate it again. */
+                       lttng_poll_clean(&events);
+
+                       ret = create_thread_poll_set(&events, 2);
+                       if (ret < 0) {
+                               goto error_poll_create;
+                       }
+
+                       ret = lttng_poll_add(&events, kernel_poll_pipe[0], LPOLLIN);
+                       if (ret < 0) {
+                               goto error;
+                       }
 
+                       /* This will add the available kernel channel if any. */
                        ret = update_kernel_poll(&events);
                        if (ret < 0) {
                                goto error;
@@ -865,16 +748,13 @@ static void *thread_manage_kernel(void *data)
                        update_poll_flag = 0;
                }
 
-               nb_fd = LTTNG_POLL_GETNB(&events);
-
-               DBG("Thread kernel polling on %d fds", nb_fd);
-
-               /* Zeroed the poll events */
-               lttng_poll_reset(&events);
+               DBG("Thread kernel polling on %d fds", LTTNG_POLL_GETNB(&events));
 
                /* Poll infinite value of time */
        restart:
+               health_poll_update(&health_thread_kernel);
                ret = lttng_poll_wait(&events, -1);
+               health_poll_update(&health_thread_kernel);
                if (ret < 0) {
                        /*
                         * Restart interrupted system call.
@@ -890,20 +770,31 @@ static void *thread_manage_kernel(void *data)
                        continue;
                }
 
+               nb_fd = ret;
+
                for (i = 0; i < nb_fd; i++) {
                        /* Fetch once the poll data */
                        revents = LTTNG_POLL_GETEV(&events, i);
                        pollfd = LTTNG_POLL_GETFD(&events, i);
 
+                       health_code_update(&health_thread_kernel);
+
                        /* Thread quit pipe has been closed. Killing thread. */
                        ret = check_thread_quit_pipe(pollfd, revents);
                        if (ret) {
-                               goto error;
+                               err = 0;
+                               goto exit;
                        }
 
                        /* Check for data on kernel pipe */
                        if (pollfd == kernel_poll_pipe[0] && (revents & LPOLLIN)) {
-                               ret = read(kernel_poll_pipe[0], &tmp, 1);
+                               do {
+                                       ret = read(kernel_poll_pipe[0], &tmp, 1);
+                               } while (ret < 0 && errno == EINTR);
+                               /*
+                                * Ret value is useless here, if this pipe gets any actions an
+                                * update is required anyway.
+                                */
                                update_poll_flag = 1;
                                continue;
                        } else {
@@ -926,19 +817,53 @@ static void *thread_manage_kernel(void *data)
                }
        }
 
+exit:
 error:
        lttng_poll_clean(&events);
 error_poll_create:
+error_testpoint:
+       utils_close_pipe(kernel_poll_pipe);
+       kernel_poll_pipe[0] = kernel_poll_pipe[1] = -1;
+       if (err) {
+               health_error(&health_thread_kernel);
+               ERR("Health error occurred in %s", __func__);
+               WARN("Kernel thread died unexpectedly. "
+                               "Kernel tracing can continue but CPU hotplug is disabled.");
+       }
+       health_unregister();
        DBG("Kernel thread dying");
        return NULL;
 }
 
+/*
+ * Signal pthread condition of the consumer data that the thread.
+ */
+static void signal_consumer_condition(struct consumer_data *data, int state)
+{
+       pthread_mutex_lock(&data->cond_mutex);
+
+       /*
+        * The state is set before signaling. It can be any value, it's the waiter
+        * job to correctly interpret this condition variable associated to the
+        * consumer pthread_cond.
+        *
+        * A value of 0 means that the corresponding thread of the consumer data
+        * was not started. 1 indicates that the thread has started and is ready
+        * for action. A negative value means that there was an error during the
+        * thread bootstrap.
+        */
+       data->consumer_thread_is_ready = state;
+       (void) pthread_cond_signal(&data->cond);
+
+       pthread_mutex_unlock(&data->cond_mutex);
+}
+
 /*
  * This thread manage the consumer error sent back to the session daemon.
  */
 static void *thread_manage_consumer(void *data)
 {
-       int sock = -1, i, ret, pollfd;
+       int sock = -1, i, ret, pollfd, err = -1;
        uint32_t revents, nb_fd;
        enum lttcomm_return_code code;
        struct lttng_poll_event events;
@@ -946,10 +871,26 @@ static void *thread_manage_consumer(void *data)
 
        DBG("[thread] Manage consumer started");
 
-       ret = lttcomm_listen_unix_sock(consumer_data->err_sock);
-       if (ret < 0) {
-               goto error_listen;
-       }
+       health_register(HEALTH_TYPE_CONSUMER);
+
+       /*
+        * Since the consumer thread can be spawned at any moment in time, we init
+        * the health to a poll status (1, which is a valid health over time).
+        * When the thread starts, we update here the health to a "code" path being
+        * an even value so this thread, when reaching a poll wait, does not
+        * trigger an error with an even value.
+        *
+        * Here is the use case we avoid.
+        *
+        * +1: the first poll update during initialization (main())
+        * +2 * x: multiple code update once in this thread.
+        * +1: poll wait in this thread (being a good health state).
+        * == even number which after the wait period shows as a bad health.
+        *
+        * In a nutshell, the following poll update to the health state brings back
+        * the state to an even value meaning a code path.
+        */
+       health_poll_update(&consumer_data->health);
 
        /*
         * Pass 2 as size here for the thread quit pipe and kconsumerd_err_sock.
@@ -960,16 +901,28 @@ static void *thread_manage_consumer(void *data)
                goto error_poll;
        }
 
+       /*
+        * The error socket here is already in a listening state which was done
+        * just before spawning this thread to avoid a race between the consumer
+        * daemon exec trying to connect and the listen() call.
+        */
        ret = lttng_poll_add(&events, consumer_data->err_sock, LPOLLIN | LPOLLRDHUP);
        if (ret < 0) {
                goto error;
        }
 
-       nb_fd = LTTNG_POLL_GETNB(&events);
+       health_code_update(&consumer_data->health);
 
        /* Inifinite blocking call, waiting for transmission */
 restart:
+       health_poll_update(&consumer_data->health);
+
+       if (testpoint(thread_manage_consumer)) {
+               goto error;
+       }
+
        ret = lttng_poll_wait(&events, -1);
+       health_poll_update(&consumer_data->health);
        if (ret < 0) {
                /*
                 * Restart interrupted system call.
@@ -980,15 +933,20 @@ restart:
                goto error;
        }
 
+       nb_fd = ret;
+
        for (i = 0; i < nb_fd; i++) {
                /* Fetch once the poll data */
                revents = LTTNG_POLL_GETEV(&events, i);
                pollfd = LTTNG_POLL_GETFD(&events, i);
 
+               health_code_update(&consumer_data->health);
+
                /* Thread quit pipe has been closed. Killing thread. */
                ret = check_thread_quit_pipe(pollfd, revents);
                if (ret) {
-                       goto error;
+                       err = 0;
+                       goto exit;
                }
 
                /* Event on the registration socket */
@@ -1005,6 +963,14 @@ restart:
                goto error;
        }
 
+       /*
+        * Set the CLOEXEC flag. Return code is useless because either way, the
+        * show must go on.
+        */
+       (void) utils_set_fd_cloexec(sock);
+
+       health_code_update(&consumer_data->health);
+
        DBG2("Receiving code from consumer err_sock");
 
        /* Getting status code from kconsumerd */
@@ -1014,17 +980,19 @@ restart:
                goto error;
        }
 
-       if (code == CONSUMERD_COMMAND_SOCK_READY) {
+       health_code_update(&consumer_data->health);
+
+       if (code == LTTCOMM_CONSUMERD_COMMAND_SOCK_READY) {
                consumer_data->cmd_sock =
                        lttcomm_connect_unix_sock(consumer_data->cmd_unix_sock_path);
                if (consumer_data->cmd_sock < 0) {
-                       sem_post(&consumer_data->sem);
+                       /* On error, signal condition and quit. */
+                       signal_consumer_condition(consumer_data, -1);
                        PERROR("consumer connect");
                        goto error;
                }
-               /* Signal condition to tell that the kconsumerd is ready */
-               sem_post(&consumer_data->sem);
-               DBG("consumer command socket ready");
+               signal_consumer_condition(consumer_data, 1);
+               DBG("Consumer command socket ready");
        } else {
                ERR("consumer error when waiting for SOCK_READY : %s",
                                lttcomm_get_readable_code(-code));
@@ -1042,12 +1010,13 @@ restart:
                goto error;
        }
 
-       /* Update number of fd */
-       nb_fd = LTTNG_POLL_GETNB(&events);
+       health_code_update(&consumer_data->health);
 
        /* Inifinite blocking call, waiting for transmission */
 restart_poll:
+       health_poll_update(&consumer_data->health);
        ret = lttng_poll_wait(&events, -1);
+       health_poll_update(&consumer_data->health);
        if (ret < 0) {
                /*
                 * Restart interrupted system call.
@@ -1058,15 +1027,20 @@ restart_poll:
                goto error;
        }
 
+       nb_fd = ret;
+
        for (i = 0; i < nb_fd; i++) {
                /* Fetch once the poll data */
                revents = LTTNG_POLL_GETEV(&events, i);
                pollfd = LTTNG_POLL_GETFD(&events, i);
 
+               health_code_update(&consumer_data->health);
+
                /* Thread quit pipe has been closed. Killing thread. */
                ret = check_thread_quit_pipe(pollfd, revents);
                if (ret) {
-                       goto error;
+                       err = 0;
+                       goto exit;
                }
 
                /* Event on the kconsumerd socket */
@@ -1078,6 +1052,8 @@ restart_poll:
                }
        }
 
+       health_code_update(&consumer_data->health);
+
        /* Wait for any kconsumerd error */
        ret = lttcomm_recv_unix_sock(sock, &code,
                        sizeof(enum lttcomm_return_code));
@@ -1088,7 +1064,19 @@ restart_poll:
 
        ERR("consumer return code : %s", lttcomm_get_readable_code(-code));
 
+exit:
 error:
+       /* Immediately set the consumerd state to stopped */
+       if (consumer_data->type == LTTNG_CONSUMER_KERNEL) {
+               uatomic_set(&kernel_consumerd_state, CONSUMER_ERROR);
+       } else if (consumer_data->type == LTTNG_CONSUMER64_UST ||
+                       consumer_data->type == LTTNG_CONSUMER32_UST) {
+               uatomic_set(&ust_consumerd_state, CONSUMER_ERROR);
+       } else {
+               /* Code flow error... */
+               assert(0);
+       }
+
        if (consumer_data->err_sock >= 0) {
                ret = close(consumer_data->err_sock);
                if (ret) {
@@ -1114,7 +1102,11 @@ error:
 
        lttng_poll_clean(&events);
 error_poll:
-error_listen:
+       if (err) {
+               health_error(&consumer_data->health);
+               ERR("Health error occurred in %s", __func__);
+       }
+       health_unregister();
        DBG("consumer thread cleanup completed");
 
        return NULL;
@@ -1125,7 +1117,7 @@ error_listen:
  */
 static void *thread_manage_apps(void *data)
 {
-       int i, ret, pollfd;
+       int i, ret, pollfd, err = -1;
        uint32_t revents, nb_fd;
        struct ust_command ust_cmd;
        struct lttng_poll_event events;
@@ -1135,6 +1127,14 @@ static void *thread_manage_apps(void *data)
        rcu_register_thread();
        rcu_thread_online();
 
+       health_register(HEALTH_TYPE_APP_MANAGE);
+
+       if (testpoint(thread_manage_apps)) {
+               goto error_testpoint;
+       }
+
+       health_code_update(&health_thread_app_manage);
+
        ret = create_thread_poll_set(&events, 2);
        if (ret < 0) {
                goto error_poll_create;
@@ -1145,17 +1145,20 @@ static void *thread_manage_apps(void *data)
                goto error;
        }
 
-       while (1) {
-               /* Zeroed the events structure */
-               lttng_poll_reset(&events);
+       if (testpoint(thread_manage_apps_before_loop)) {
+               goto error;
+       }
 
-               nb_fd = LTTNG_POLL_GETNB(&events);
+       health_code_update(&health_thread_app_manage);
 
-               DBG("Apps thread polling on %d fds", nb_fd);
+       while (1) {
+               DBG("Apps thread polling on %d fds", LTTNG_POLL_GETNB(&events));
 
                /* Inifinite blocking call, waiting for transmission */
        restart:
+               health_poll_update(&health_thread_app_manage);
                ret = lttng_poll_wait(&events, -1);
+               health_poll_update(&health_thread_app_manage);
                if (ret < 0) {
                        /*
                         * Restart interrupted system call.
@@ -1166,15 +1169,20 @@ static void *thread_manage_apps(void *data)
                        goto error;
                }
 
+               nb_fd = ret;
+
                for (i = 0; i < nb_fd; i++) {
                        /* Fetch once the poll data */
                        revents = LTTNG_POLL_GETEV(&events, i);
                        pollfd = LTTNG_POLL_GETFD(&events, i);
 
+                       health_code_update(&health_thread_app_manage);
+
                        /* Thread quit pipe has been closed. Killing thread. */
                        ret = check_thread_quit_pipe(pollfd, revents);
                        if (ret) {
-                               goto error;
+                               err = 0;
+                               goto exit;
                        }
 
                        /* Inspect the apps cmd pipe */
@@ -1184,12 +1192,16 @@ static void *thread_manage_apps(void *data)
                                        goto error;
                                } else if (revents & LPOLLIN) {
                                        /* Empty pipe */
-                                       ret = read(apps_cmd_pipe[0], &ust_cmd, sizeof(ust_cmd));
+                                       do {
+                                               ret = read(apps_cmd_pipe[0], &ust_cmd, sizeof(ust_cmd));
+                                       } while (ret < 0 && errno == EINTR);
                                        if (ret < 0 || ret < sizeof(ust_cmd)) {
                                                PERROR("read apps cmd pipe");
                                                goto error;
                                        }
 
+                                       health_code_update(&health_thread_app_manage);
+
                                        /* Register applicaton to the session daemon */
                                        ret = ust_app_register(&ust_cmd.reg_msg,
                                                        ust_cmd.sock);
@@ -1199,6 +1211,8 @@ static void *thread_manage_apps(void *data)
                                                break;
                                        }
 
+                                       health_code_update(&health_thread_app_manage);
+
                                        /*
                                         * Validate UST version compatibility.
                                         */
@@ -1211,6 +1225,8 @@ static void *thread_manage_apps(void *data)
                                                update_ust_app(ust_cmd.sock);
                                        }
 
+                                       health_code_update(&health_thread_app_manage);
+
                                        ret = ust_app_register_done(ust_cmd.sock);
                                        if (ret < 0) {
                                                /*
@@ -1220,20 +1236,28 @@ static void *thread_manage_apps(void *data)
                                                ust_app_unregister(ust_cmd.sock);
                                        } else {
                                                /*
-                                                * We just need here to monitor the close of the UST
-                                                * socket and poll set monitor those by default.
-                                                * Listen on POLLIN (even if we never expect any
-                                                * data) to ensure that hangup wakes us.
+                                                * We only monitor the error events of the socket. This
+                                                * thread does not handle any incoming data from UST
+                                                * (POLLIN).
                                                 */
-                                               ret = lttng_poll_add(&events, ust_cmd.sock, LPOLLIN);
+                                               ret = lttng_poll_add(&events, ust_cmd.sock,
+                                                               LPOLLERR & LPOLLHUP & LPOLLRDHUP);
                                                if (ret < 0) {
                                                        goto error;
                                                }
 
+                                               /* Set socket timeout for both receiving and ending */
+                                               (void) lttcomm_setsockopt_rcv_timeout(ust_cmd.sock,
+                                                               app_socket_timeout);
+                                               (void) lttcomm_setsockopt_snd_timeout(ust_cmd.sock,
+                                                               app_socket_timeout);
+
                                                DBG("Apps with sock %d added to poll set",
                                                                ust_cmd.sock);
                                        }
 
+                                       health_code_update(&health_thread_app_manage);
+
                                        break;
                                }
                        } else {
@@ -1253,12 +1277,30 @@ static void *thread_manage_apps(void *data)
                                        break;
                                }
                        }
+
+                       health_code_update(&health_thread_app_manage);
                }
        }
 
+exit:
 error:
        lttng_poll_clean(&events);
 error_poll_create:
+error_testpoint:
+       utils_close_pipe(apps_cmd_pipe);
+       apps_cmd_pipe[0] = apps_cmd_pipe[1] = -1;
+
+       /*
+        * We don't clean the UST app hash table here since already registered
+        * applications can still be controlled so let them be until the session
+        * daemon dies or the applications stop.
+        */
+
+       if (err) {
+               health_error(&health_thread_app_manage);
+               ERR("Health error occurred in %s", __func__);
+       }
+       health_unregister();
        DBG("Application communication apps thread cleanup complete");
        rcu_thread_offline();
        rcu_unregister_thread();
@@ -1277,7 +1319,7 @@ static void *thread_dispatch_ust_registration(void *data)
 
        DBG("[thread] Dispatch UST command started");
 
-       while (!dispatch_thread_exit) {
+       while (!CMM_LOAD_SHARED(dispatch_thread_exit)) {
                /* Atomically prepare the queue futex */
                futex_nto1_prepare(&ust_cmd_queue.futex);
 
@@ -1303,18 +1345,28 @@ static void *thread_dispatch_ust_registration(void *data)
                         * call is blocking so we can be assured that the data will be read
                         * at some point in time or wait to the end of the world :)
                         */
-                       ret = write(apps_cmd_pipe[1], ust_cmd,
-                                       sizeof(struct ust_command));
-                       if (ret < 0) {
-                               PERROR("write apps cmd pipe");
-                               if (errno == EBADF) {
-                                       /*
-                                        * We can't inform the application thread to process
-                                        * registration. We will exit or else application
-                                        * registration will not occur and tracing will never
-                                        * start.
-                                        */
-                                       goto error;
+                       if (apps_cmd_pipe[1] >= 0) {
+                               do {
+                                       ret = write(apps_cmd_pipe[1], ust_cmd,
+                                                       sizeof(struct ust_command));
+                               } while (ret < 0 && errno == EINTR);
+                               if (ret < 0 || ret != sizeof(struct ust_command)) {
+                                       PERROR("write apps cmd pipe");
+                                       if (errno == EBADF) {
+                                               /*
+                                                * We can't inform the application thread to process
+                                                * registration. We will exit or else application
+                                                * registration will not occur and tracing will never
+                                                * start.
+                                                */
+                                               goto error;
+                                       }
+                               }
+                       } else {
+                               /* Application manager thread is not available. */
+                               ret = close(ust_cmd->sock);
+                               if (ret < 0) {
+                                       PERROR("close ust_cmd sock");
                                }
                        }
                        free(ust_cmd);
@@ -1334,7 +1386,7 @@ error:
  */
 static void *thread_registration_apps(void *data)
 {
-       int sock = -1, i, ret, pollfd;
+       int sock = -1, i, ret, pollfd, err = -1;
        uint32_t revents, nb_fd;
        struct lttng_poll_event events;
        /*
@@ -1345,7 +1397,13 @@ static void *thread_registration_apps(void *data)
 
        DBG("[thread] Manage application registration started");
 
-       ret = lttcomm_listen_unix_sock(apps_sock);
+       health_register(HEALTH_TYPE_APP_REG);
+
+       if (testpoint(thread_registration_apps)) {
+               goto error_testpoint;
+       }
+
+       ret = lttcomm_listen_unix_sock(apps_sock);
        if (ret < 0) {
                goto error_listen;
        }
@@ -1376,11 +1434,11 @@ static void *thread_registration_apps(void *data)
        while (1) {
                DBG("Accepting application registration");
 
-               nb_fd = LTTNG_POLL_GETNB(&events);
-
                /* Inifinite blocking call, waiting for transmission */
        restart:
+               health_poll_update(&health_thread_app_reg);
                ret = lttng_poll_wait(&events, -1);
+               health_poll_update(&health_thread_app_reg);
                if (ret < 0) {
                        /*
                         * Restart interrupted system call.
@@ -1391,7 +1449,11 @@ static void *thread_registration_apps(void *data)
                        goto error;
                }
 
+               nb_fd = ret;
+
                for (i = 0; i < nb_fd; i++) {
+                       health_code_update(&health_thread_app_reg);
+
                        /* Fetch once the poll data */
                        revents = LTTNG_POLL_GETEV(&events, i);
                        pollfd = LTTNG_POLL_GETFD(&events, i);
@@ -1399,7 +1461,8 @@ static void *thread_registration_apps(void *data)
                        /* Thread quit pipe has been closed. Killing thread. */
                        ret = check_thread_quit_pipe(pollfd, revents);
                        if (ret) {
-                               goto error;
+                               err = 0;
+                               goto exit;
                        }
 
                        /* Event on the registration socket */
@@ -1413,6 +1476,12 @@ static void *thread_registration_apps(void *data)
                                                goto error;
                                        }
 
+                                       /*
+                                        * Set the CLOEXEC flag. Return code is useless because
+                                        * either way, the show must go on.
+                                        */
+                                       (void) utils_set_fd_cloexec(sock);
+
                                        /* Create UST registration command for enqueuing */
                                        ust_cmd = zmalloc(sizeof(struct ust_command));
                                        if (ust_cmd == NULL) {
@@ -1424,6 +1493,18 @@ static void *thread_registration_apps(void *data)
                                         * Using message-based transmissions to ensure we don't
                                         * have to deal with partially received messages.
                                         */
+                                       ret = lttng_fd_get(LTTNG_FD_APPS, 1);
+                                       if (ret < 0) {
+                                               ERR("Exhausted file descriptors allowed for applications.");
+                                               free(ust_cmd);
+                                               ret = close(sock);
+                                               if (ret) {
+                                                       PERROR("close");
+                                               }
+                                               sock = -1;
+                                               continue;
+                                       }
+                                       health_code_update(&health_thread_app_reg);
                                        ret = lttcomm_recv_unix_sock(sock, &ust_cmd->reg_msg,
                                                        sizeof(struct ust_register_msg));
                                        if (ret < 0 || ret < sizeof(struct ust_register_msg)) {
@@ -1437,9 +1518,11 @@ static void *thread_registration_apps(void *data)
                                                if (ret) {
                                                        PERROR("close");
                                                }
+                                               lttng_fd_put(LTTNG_FD_APPS, 1);
                                                sock = -1;
                                                continue;
                                        }
+                                       health_code_update(&health_thread_app_reg);
 
                                        ust_cmd->sock = sock;
                                        sock = -1;
@@ -1467,7 +1550,13 @@ static void *thread_registration_apps(void *data)
                }
        }
 
+exit:
 error:
+       if (err) {
+               health_error(&health_thread_app_reg);
+               ERR("Health error occurred in %s", __func__);
+       }
+
        /* Notify that the registration thread is gone */
        notify_ust_apps(0);
 
@@ -1482,6 +1571,7 @@ error:
                if (ret) {
                        PERROR("close");
                }
+               lttng_fd_put(LTTNG_FD_APPS, 1);
        }
        unlink(apps_unix_sock_path);
 
@@ -1489,7 +1579,9 @@ error_poll_add:
        lttng_poll_clean(&events);
 error_listen:
 error_create_poll:
+error_testpoint:
        DBG("UST Registration thread cleanup complete");
+       health_unregister();
 
        return NULL;
 }
@@ -1500,59 +1592,110 @@ error_create_poll:
  */
 static int spawn_consumer_thread(struct consumer_data *consumer_data)
 {
-       int ret;
+       int ret, clock_ret;
        struct timespec timeout;
 
-       timeout.tv_sec = DEFAULT_SEM_WAIT_TIMEOUT;
-       timeout.tv_nsec = 0;
+       /* Make sure we set the readiness flag to 0 because we are NOT ready */
+       consumer_data->consumer_thread_is_ready = 0;
 
-       /* Setup semaphore */
-       ret = sem_init(&consumer_data->sem, 0, 0);
-       if (ret < 0) {
-               PERROR("sem_init consumer semaphore");
+       /* Setup pthread condition */
+       ret = pthread_condattr_init(&consumer_data->condattr);
+       if (ret != 0) {
+               errno = ret;
+               PERROR("pthread_condattr_init consumer data");
+               goto error;
+       }
+
+       /*
+        * Set the monotonic clock in order to make sure we DO NOT jump in time
+        * between the clock_gettime() call and the timedwait call. See bug #324
+        * for a more details and how we noticed it.
+        */
+       ret = pthread_condattr_setclock(&consumer_data->condattr, CLOCK_MONOTONIC);
+       if (ret != 0) {
+               errno = ret;
+               PERROR("pthread_condattr_setclock consumer data");
                goto error;
        }
 
-       ret = pthread_create(&consumer_data->thread, NULL,
-                       thread_manage_consumer, consumer_data);
+       ret = pthread_cond_init(&consumer_data->cond, &consumer_data->condattr);
+       if (ret != 0) {
+               errno = ret;
+               PERROR("pthread_cond_init consumer data");
+               goto error;
+       }
+
+       ret = pthread_create(&consumer_data->thread, NULL, thread_manage_consumer,
+                       consumer_data);
        if (ret != 0) {
                PERROR("pthread_create consumer");
                ret = -1;
                goto error;
        }
 
+       /* We are about to wait on a pthread condition */
+       pthread_mutex_lock(&consumer_data->cond_mutex);
+
        /* Get time for sem_timedwait absolute timeout */
-       ret = clock_gettime(CLOCK_REALTIME, &timeout);
-       if (ret < 0) {
-               PERROR("clock_gettime spawn consumer");
-               /* Infinite wait for the kconsumerd thread to be ready */
-               ret = sem_wait(&consumer_data->sem);
-       } else {
-               /* Normal timeout if the gettime was successful */
-               timeout.tv_sec += DEFAULT_SEM_WAIT_TIMEOUT;
-               ret = sem_timedwait(&consumer_data->sem, &timeout);
+       clock_ret = clock_gettime(CLOCK_MONOTONIC, &timeout);
+       /*
+        * Set the timeout for the condition timed wait even if the clock gettime
+        * call fails since we might loop on that call and we want to avoid to
+        * increment the timeout too many times.
+        */
+       timeout.tv_sec += DEFAULT_SEM_WAIT_TIMEOUT;
+
+       /*
+        * The following loop COULD be skipped in some conditions so this is why we
+        * set ret to 0 in order to make sure at least one round of the loop is
+        * done.
+        */
+       ret = 0;
+
+       /*
+        * Loop until the condition is reached or when a timeout is reached. Note
+        * that the pthread_cond_timedwait(P) man page specifies that EINTR can NOT
+        * be returned but the pthread_cond(3), from the glibc-doc, says that it is
+        * possible. This loop does not take any chances and works with both of
+        * them.
+        */
+       while (!consumer_data->consumer_thread_is_ready && ret != ETIMEDOUT) {
+               if (clock_ret < 0) {
+                       PERROR("clock_gettime spawn consumer");
+                       /* Infinite wait for the consumerd thread to be ready */
+                       ret = pthread_cond_wait(&consumer_data->cond,
+                                       &consumer_data->cond_mutex);
+               } else {
+                       ret = pthread_cond_timedwait(&consumer_data->cond,
+                                       &consumer_data->cond_mutex, &timeout);
+               }
        }
 
-       if (ret < 0) {
-               if (errno == ETIMEDOUT) {
+       /* Release the pthread condition */
+       pthread_mutex_unlock(&consumer_data->cond_mutex);
+
+       if (ret != 0) {
+               errno = ret;
+               if (ret == ETIMEDOUT) {
                        /*
                         * Call has timed out so we kill the kconsumerd_thread and return
                         * an error.
                         */
-                       ERR("The consumer thread was never ready. Killing it");
+                       ERR("Condition timed out. The consumer thread was never ready."
+                                       " Killing it");
                        ret = pthread_cancel(consumer_data->thread);
                        if (ret < 0) {
                                PERROR("pthread_cancel consumer thread");
                        }
                } else {
-                       PERROR("semaphore wait failed consumer thread");
+                       PERROR("pthread_cond_wait failed consumer thread");
                }
                goto error;
        }
 
        pthread_mutex_lock(&consumer_data->pid_mutex);
        if (consumer_data->pid == 0) {
-               ERR("Kconsumerd did not start");
+               ERR("Consumerd did not start");
                pthread_mutex_unlock(&consumer_data->pid_mutex);
                goto error;
        }
@@ -1570,9 +1713,10 @@ error:
 static int join_consumer_thread(struct consumer_data *consumer_data)
 {
        void *status;
-       int ret;
 
-       if (consumer_data->pid != 0) {
+       /* Consumer pid must be a real one. */
+       if (consumer_data->pid > 0) {
+               int ret;
                ret = kill(consumer_data->pid, SIGTERM);
                if (ret) {
                        ERR("Error killing consumer daemon");
@@ -1753,6 +1897,16 @@ static int start_consumerd(struct consumer_data *consumer_data)
 {
        int ret;
 
+       /*
+        * Set the listen() state on the socket since there is a possible race
+        * between the exec() of the consumer daemon and this call if place in the
+        * consumer thread. See bug #366 for more details.
+        */
+       ret = lttcomm_listen_unix_sock(consumer_data->err_sock);
+       if (ret < 0) {
+               goto error;
+       }
+
        pthread_mutex_lock(&consumer_data->pid_mutex);
        if (consumer_data->pid != 0) {
                pthread_mutex_unlock(&consumer_data->pid_mutex);
@@ -1782,15 +1936,31 @@ end:
        return 0;
 
 error:
+       /* Cleanup already created socket on error. */
+       if (consumer_data->err_sock >= 0) {
+               int err;
+
+               err = close(consumer_data->err_sock);
+               if (err < 0) {
+                       PERROR("close consumer data error socket");
+               }
+       }
        return ret;
 }
 
 /*
- * Check version of the lttng-modules.
+ * Compute health status of each consumer. If one of them is zero (bad
+ * state), we return 0.
  */
-static int validate_lttng_modules_version(void)
+static int check_consumer_health(void)
 {
-       return kernel_validate_version(kernel_tracer_fd);
+       int ret;
+
+       ret = health_check_state(HEALTH_TYPE_CONSUMER);
+
+       DBG3("Health consumer check %d", ret);
+
+       return ret;
 }
 
 /*
@@ -1815,7 +1985,7 @@ static int init_kernel_tracer(void)
        }
 
        /* Validate kernel version */
-       ret = validate_lttng_modules_version();
+       ret = kernel_validate_version(kernel_tracer_fd);
        if (ret < 0) {
                goto error_version;
        }
@@ -1835,7 +2005,7 @@ error_version:
                PERROR("close");
        }
        kernel_tracer_fd = -1;
-       return LTTCOMM_KERN_VERSION;
+       return LTTNG_ERR_KERN_VERSION;
 
 error_modules:
        ret = close(kernel_tracer_fd);
@@ -1850,37 +2020,67 @@ error:
        WARN("No kernel tracer available");
        kernel_tracer_fd = -1;
        if (!is_root) {
-               return LTTCOMM_NEED_ROOT_SESSIOND;
+               return LTTNG_ERR_NEED_ROOT_SESSIOND;
        } else {
-               return LTTCOMM_KERN_NA;
+               return LTTNG_ERR_KERN_NA;
        }
 }
 
+
 /*
- * Init tracing by creating trace directory and sending fds kernel consumer.
+ * Copy consumer output from the tracing session to the domain session. The
+ * function also applies the right modification on a per domain basis for the
+ * trace files destination directory.
  */
-static int init_kernel_tracing(struct ltt_kernel_session *session)
+static int copy_session_consumer(int domain, struct ltt_session *session)
 {
-       int ret = 0;
+       int ret;
+       const char *dir_name;
+       struct consumer_output *consumer;
+
+       assert(session);
+       assert(session->consumer);
 
-       if (session->consumer_fds_sent == 0) {
+       switch (domain) {
+       case LTTNG_DOMAIN_KERNEL:
+               DBG3("Copying tracing session consumer output in kernel session");
                /*
-                * Assign default kernel consumer socket if no consumer assigned to the
-                * kernel session. At this point, it's NOT supposed to be -1 but this is
-                * an extra security check.
+                * XXX: We should audit the session creation and what this function
+                * does "extra" in order to avoid a destroy since this function is used
+                * in the domain session creation (kernel and ust) only. Same for UST
+                * domain.
                 */
-               if (session->consumer_fd < 0) {
-                       session->consumer_fd = kconsumer_data.cmd_sock;
-               }
+               if (session->kernel_session->consumer) {
+                       consumer_destroy_output(session->kernel_session->consumer);
+               }
+               session->kernel_session->consumer =
+                       consumer_copy_output(session->consumer);
+               /* Ease our life a bit for the next part */
+               consumer = session->kernel_session->consumer;
+               dir_name = DEFAULT_KERNEL_TRACE_DIR;
+               break;
+       case LTTNG_DOMAIN_UST:
+               DBG3("Copying tracing session consumer output in UST session");
+               if (session->ust_session->consumer) {
+                       consumer_destroy_output(session->ust_session->consumer);
+               }
+               session->ust_session->consumer =
+                       consumer_copy_output(session->consumer);
+               /* Ease our life a bit for the next part */
+               consumer = session->ust_session->consumer;
+               dir_name = DEFAULT_UST_TRACE_DIR;
+               break;
+       default:
+               ret = LTTNG_ERR_UNKNOWN_DOMAIN;
+               goto error;
+       }
 
-               ret = send_kconsumer_session_streams(&kconsumer_data, session);
-               if (ret < 0) {
-                       ret = LTTCOMM_KERN_CONSUMER_FAIL;
-                       goto error;
-               }
+       /* Append correct directory to subdir */
+       strncat(consumer->subdir, dir_name,
+                       sizeof(consumer->subdir) - strlen(consumer->subdir) - 1);
+       DBG3("Copy session consumer subdir %s", consumer->subdir);
 
-               session->consumer_fds_sent = 1;
-       }
+       ret = LTTNG_OK;
 
 error:
        return ret;
@@ -1892,52 +2092,45 @@ error:
 static int create_ust_session(struct ltt_session *session,
                struct lttng_domain *domain)
 {
-       struct ltt_ust_session *lus = NULL;
        int ret;
+       struct ltt_ust_session *lus = NULL;
+
+       assert(session);
+       assert(domain);
+       assert(session->consumer);
 
        switch (domain->type) {
        case LTTNG_DOMAIN_UST:
                break;
        default:
-               ret = LTTCOMM_UNKNOWN_DOMAIN;
+               ERR("Unknown UST domain on create session %d", domain->type);
+               ret = LTTNG_ERR_UNKNOWN_DOMAIN;
                goto error;
        }
 
        DBG("Creating UST session");
 
-       lus = trace_ust_create_session(session->path, session->id, domain);
+       lus = trace_ust_create_session(session->path, session->id);
        if (lus == NULL) {
-               ret = LTTCOMM_UST_SESS_FAIL;
+               ret = LTTNG_ERR_UST_SESS_FAIL;
                goto error;
        }
 
-       ret = run_as_mkdir_recursive(lus->pathname, S_IRWXU | S_IRWXG,
-                       session->uid, session->gid);
-       if (ret < 0) {
-               if (ret != -EEXIST) {
-                       ERR("Trace directory creation error");
-                       ret = LTTCOMM_UST_SESS_FAIL;
-                       goto error;
-               }
-       }
-
-       /* The domain type dictate different actions on session creation */
-       switch (domain->type) {
-       case LTTNG_DOMAIN_UST:
-               /* No ustctl for the global UST domain */
-               break;
-       default:
-               ERR("Unknown UST domain on create session %d", domain->type);
-               goto error;
-       }
        lus->uid = session->uid;
        lus->gid = session->gid;
        session->ust_session = lus;
 
-       return LTTCOMM_OK;
+       /* Copy session output to the newly created UST session */
+       ret = copy_session_consumer(domain->type, session);
+       if (ret != LTTNG_OK) {
+               goto error;
+       }
+
+       return LTTNG_OK;
 
 error:
        free(lus);
+       session->ust_session = NULL;
        return ret;
 }
 
@@ -1952,50 +2145,54 @@ static int create_kernel_session(struct ltt_session *session)
 
        ret = kernel_create_session(session, kernel_tracer_fd);
        if (ret < 0) {
-               ret = LTTCOMM_KERN_SESS_FAIL;
+               ret = LTTNG_ERR_KERN_SESS_FAIL;
                goto error;
        }
 
-       /* Set kernel consumer socket fd */
-       if (kconsumer_data.cmd_sock >= 0) {
-               session->kernel_session->consumer_fd = kconsumer_data.cmd_sock;
+       /* Code flow safety */
+       assert(session->kernel_session);
+
+       /* Copy session output to the newly created Kernel session */
+       ret = copy_session_consumer(LTTNG_DOMAIN_KERNEL, session);
+       if (ret != LTTNG_OK) {
+               goto error;
        }
 
-       ret = run_as_mkdir_recursive(session->kernel_session->trace_path,
-                       S_IRWXU | S_IRWXG, session->uid, session->gid);
-       if (ret < 0) {
-               if (ret != -EEXIST) {
-                       ERR("Trace directory creation error");
-                       goto error;
+       /* Create directory(ies) on local filesystem. */
+       if (session->kernel_session->consumer->type == CONSUMER_DST_LOCAL &&
+                       strlen(session->kernel_session->consumer->dst.trace_path) > 0) {
+               ret = run_as_mkdir_recursive(
+                               session->kernel_session->consumer->dst.trace_path,
+                               S_IRWXU | S_IRWXG, session->uid, session->gid);
+               if (ret < 0) {
+                       if (ret != -EEXIST) {
+                               ERR("Trace directory creation error");
+                               goto error;
+                       }
                }
        }
+
        session->kernel_session->uid = session->uid;
        session->kernel_session->gid = session->gid;
 
+       return LTTNG_OK;
+
 error:
+       trace_kernel_destroy_session(session->kernel_session);
+       session->kernel_session = NULL;
        return ret;
 }
 
 /*
- * Check if the UID or GID match the session. Root user has access to all
- * sessions.
+ * Count number of session permitted by uid/gid.
  */
-static int session_access_ok(struct ltt_session *session, uid_t uid, gid_t gid)
-{
-       if (uid != session->uid && gid != session->gid && uid != 0) {
-               return 0;
-       } else {
-               return 1;
-       }
-}
-
 static unsigned int lttng_sessions_count(uid_t uid, gid_t gid)
 {
        unsigned int i = 0;
        struct ltt_session *session;
 
        DBG("Counting number of available session for UID %d GID %d",
-               uid, gid);
+                       uid, gid);
        cds_list_for_each_entry(session, &session_list_ptr->head, list) {
                /*
                 * Only list the sessions the user can control.
@@ -2009,1330 +2206,128 @@ static unsigned int lttng_sessions_count(uid_t uid, gid_t gid)
 }
 
 /*
- * Using the session list, filled a lttng_session array to send back to the
- * client for session listing.
+ * Process the command requested by the lttng client within the command
+ * context structure. This function make sure that the return structure (llm)
+ * is set and ready for transmission before returning.
+ *
+ * Return any error encountered or 0 for success.
  *
- * The session list lock MUST be acquired before calling this function. Use
- * session_lock_list() and session_unlock_list().
+ * "sock" is only used for special-case var. len data.
  */
-static void list_lttng_sessions(struct lttng_session *sessions, uid_t uid,
-               gid_t gid)
+static int process_client_msg(struct command_ctx *cmd_ctx, int sock,
+               int *sock_error)
 {
-       unsigned int i = 0;
-       struct ltt_session *session;
-
-       DBG("Getting all available session for UID %d GID %d",
-               uid, gid);
-       /*
-        * Iterate over session list and append data after the control struct in
-        * the buffer.
-        */
-       cds_list_for_each_entry(session, &session_list_ptr->head, list) {
-               /*
-                * Only list the sessions the user can control.
-                */
-               if (!session_access_ok(session, uid, gid)) {
-                       continue;
-               }
-               strncpy(sessions[i].path, session->path, PATH_MAX);
-               sessions[i].path[PATH_MAX - 1] = '\0';
-               strncpy(sessions[i].name, session->name, NAME_MAX);
-               sessions[i].name[NAME_MAX - 1] = '\0';
-               sessions[i].enabled = session->enabled;
-               i++;
-       }
-}
+       int ret = LTTNG_OK;
+       int need_tracing_session = 1;
+       int need_domain;
 
-/*
- * Fill lttng_channel array of all channels.
- */
-static void list_lttng_channels(int domain, struct ltt_session *session,
-               struct lttng_channel *channels)
-{
-       int i = 0;
-       struct ltt_kernel_channel *kchan;
+       DBG("Processing client command %d", cmd_ctx->lsm->cmd_type);
 
-       DBG("Listing channels for session %s", session->name);
+       *sock_error = 0;
 
-       switch (domain) {
-       case LTTNG_DOMAIN_KERNEL:
-               /* Kernel channels */
-               if (session->kernel_session != NULL) {
-                       cds_list_for_each_entry(kchan,
-                                       &session->kernel_session->channel_list.head, list) {
-                               /* Copy lttng_channel struct to array */
-                               memcpy(&channels[i], kchan->channel, sizeof(struct lttng_channel));
-                               channels[i].enabled = kchan->enabled;
-                               i++;
-                       }
-               }
-               break;
-       case LTTNG_DOMAIN_UST:
-       {
-               struct lttng_ht_iter iter;
-               struct ltt_ust_channel *uchan;
-
-               cds_lfht_for_each_entry(session->ust_session->domain_global.channels->ht,
-                               &iter.iter, uchan, node.node) {
-                       strncpy(channels[i].name, uchan->name, LTTNG_SYMBOL_NAME_LEN);
-                       channels[i].attr.overwrite = uchan->attr.overwrite;
-                       channels[i].attr.subbuf_size = uchan->attr.subbuf_size;
-                       channels[i].attr.num_subbuf = uchan->attr.num_subbuf;
-                       channels[i].attr.switch_timer_interval =
-                               uchan->attr.switch_timer_interval;
-                       channels[i].attr.read_timer_interval =
-                               uchan->attr.read_timer_interval;
-                       channels[i].enabled = uchan->enabled;
-                       switch (uchan->attr.output) {
-                       case LTTNG_UST_MMAP:
-                       default:
-                               channels[i].attr.output = LTTNG_EVENT_MMAP;
-                               break;
-                       }
-                       i++;
-               }
+       switch (cmd_ctx->lsm->cmd_type) {
+       case LTTNG_CREATE_SESSION:
+       case LTTNG_DESTROY_SESSION:
+       case LTTNG_LIST_SESSIONS:
+       case LTTNG_LIST_DOMAINS:
+       case LTTNG_START_TRACE:
+       case LTTNG_STOP_TRACE:
+       case LTTNG_DATA_PENDING:
+               need_domain = 0;
                break;
-       }
        default:
-               break;
+               need_domain = 1;
        }
-}
 
-/*
- * Create a list of ust global domain events.
- */
-static int list_lttng_ust_global_events(char *channel_name,
-               struct ltt_ust_domain_global *ust_global, struct lttng_event **events)
-{
-       int i = 0, ret = 0;
-       unsigned int nb_event = 0;
-       struct lttng_ht_iter iter;
-       struct lttng_ht_node_str *node;
-       struct ltt_ust_channel *uchan;
-       struct ltt_ust_event *uevent;
-       struct lttng_event *tmp;
-
-       DBG("Listing UST global events for channel %s", channel_name);
-
-       rcu_read_lock();
-
-       lttng_ht_lookup(ust_global->channels, (void *)channel_name, &iter);
-       node = lttng_ht_iter_get_node_str(&iter);
-       if (node == NULL) {
-               ret = -LTTCOMM_UST_CHAN_NOT_FOUND;
+       if (opt_no_kernel && need_domain
+                       && cmd_ctx->lsm->domain.type == LTTNG_DOMAIN_KERNEL) {
+               if (!is_root) {
+                       ret = LTTNG_ERR_NEED_ROOT_SESSIOND;
+               } else {
+                       ret = LTTNG_ERR_KERN_NA;
+               }
                goto error;
        }
 
-       uchan = caa_container_of(&node->node, struct ltt_ust_channel, node.node);
-
-       nb_event += lttng_ht_get_count(uchan->events);
-
-       if (nb_event == 0) {
-               ret = nb_event;
-               goto error;
+       /* Deny register consumer if we already have a spawned consumer. */
+       if (cmd_ctx->lsm->cmd_type == LTTNG_REGISTER_CONSUMER) {
+               pthread_mutex_lock(&kconsumer_data.pid_mutex);
+               if (kconsumer_data.pid > 0) {
+                       ret = LTTNG_ERR_KERN_CONSUMER_FAIL;
+                       pthread_mutex_unlock(&kconsumer_data.pid_mutex);
+                       goto error;
+               }
+               pthread_mutex_unlock(&kconsumer_data.pid_mutex);
        }
 
-       DBG3("Listing UST global %d events", nb_event);
-
-       tmp = zmalloc(nb_event * sizeof(struct lttng_event));
-       if (tmp == NULL) {
-               ret = -LTTCOMM_FATAL;
-               goto error;
+       /*
+        * Check for command that don't needs to allocate a returned payload. We do
+        * this here so we don't have to make the call for no payload at each
+        * command.
+        */
+       switch(cmd_ctx->lsm->cmd_type) {
+       case LTTNG_LIST_SESSIONS:
+       case LTTNG_LIST_TRACEPOINTS:
+       case LTTNG_LIST_TRACEPOINT_FIELDS:
+       case LTTNG_LIST_DOMAINS:
+       case LTTNG_LIST_CHANNELS:
+       case LTTNG_LIST_EVENTS:
+               break;
+       default:
+               /* Setup lttng message with no payload */
+               ret = setup_lttng_msg(cmd_ctx, 0);
+               if (ret < 0) {
+                       /* This label does not try to unlock the session */
+                       goto init_setup_error;
+               }
        }
 
-       cds_lfht_for_each_entry(uchan->events->ht, &iter.iter, uevent, node.node) {
-               strncpy(tmp[i].name, uevent->attr.name, LTTNG_SYMBOL_NAME_LEN);
-               tmp[i].name[LTTNG_SYMBOL_NAME_LEN - 1] = '\0';
-               tmp[i].enabled = uevent->enabled;
-               switch (uevent->attr.instrumentation) {
-               case LTTNG_UST_TRACEPOINT:
-                       tmp[i].type = LTTNG_EVENT_TRACEPOINT;
-                       break;
-               case LTTNG_UST_PROBE:
-                       tmp[i].type = LTTNG_EVENT_PROBE;
-                       break;
-               case LTTNG_UST_FUNCTION:
-                       tmp[i].type = LTTNG_EVENT_FUNCTION;
-                       break;
-               }
-               tmp[i].loglevel = uevent->attr.loglevel;
-               switch (uevent->attr.loglevel_type) {
-               case LTTNG_UST_LOGLEVEL_ALL:
-                       tmp[i].loglevel_type = LTTNG_EVENT_LOGLEVEL_ALL;
-                       break;
-               case LTTNG_UST_LOGLEVEL_RANGE:
-                       tmp[i].loglevel_type = LTTNG_EVENT_LOGLEVEL_RANGE;
-                       break;
-               case LTTNG_UST_LOGLEVEL_SINGLE:
-                       tmp[i].loglevel_type = LTTNG_EVENT_LOGLEVEL_SINGLE;
-                       break;
+       /* Commands that DO NOT need a session. */
+       switch (cmd_ctx->lsm->cmd_type) {
+       case LTTNG_CREATE_SESSION:
+       case LTTNG_CALIBRATE:
+       case LTTNG_LIST_SESSIONS:
+       case LTTNG_LIST_TRACEPOINTS:
+       case LTTNG_LIST_TRACEPOINT_FIELDS:
+               need_tracing_session = 0;
+               break;
+       default:
+               DBG("Getting session %s by name", cmd_ctx->lsm->session.name);
+               /*
+                * We keep the session list lock across _all_ commands
+                * for now, because the per-session lock does not
+                * handle teardown properly.
+                */
+               session_lock_list();
+               cmd_ctx->session = session_find_by_name(cmd_ctx->lsm->session.name);
+               if (cmd_ctx->session == NULL) {
+                       if (cmd_ctx->lsm->session.name != NULL) {
+                               ret = LTTNG_ERR_SESS_NOT_FOUND;
+                       } else {
+                               /* If no session name specified */
+                               ret = LTTNG_ERR_SELECT_SESS;
+                       }
+                       goto error;
+               } else {
+                       /* Acquire lock for the session */
+                       session_lock(cmd_ctx->session);
                }
-               i++;
+               break;
        }
 
-       ret = nb_event;
-       *events = tmp;
-
-error:
-       rcu_read_unlock();
-       return ret;
-}
-
-/*
- * Fill lttng_event array of all kernel events in the channel.
- */
-static int list_lttng_kernel_events(char *channel_name,
-               struct ltt_kernel_session *kernel_session, struct lttng_event **events)
-{
-       int i = 0, ret;
-       unsigned int nb_event;
-       struct ltt_kernel_event *event;
-       struct ltt_kernel_channel *kchan;
-
-       kchan = trace_kernel_get_channel_by_name(channel_name, kernel_session);
-       if (kchan == NULL) {
-               ret = LTTCOMM_KERN_CHAN_NOT_FOUND;
-               goto error;
+       if (!need_domain) {
+               goto skip_domain;
        }
 
-       nb_event = kchan->event_count;
-
-       DBG("Listing events for channel %s", kchan->channel->name);
-
-       if (nb_event == 0) {
-               ret = nb_event;
-               goto error;
-       }
-
-       *events = zmalloc(nb_event * sizeof(struct lttng_event));
-       if (*events == NULL) {
-               ret = LTTCOMM_FATAL;
-               goto error;
-       }
-
-       /* Kernel channels */
-       cds_list_for_each_entry(event, &kchan->events_list.head , list) {
-               strncpy((*events)[i].name, event->event->name, LTTNG_SYMBOL_NAME_LEN);
-               (*events)[i].name[LTTNG_SYMBOL_NAME_LEN - 1] = '\0';
-               (*events)[i].enabled = event->enabled;
-               switch (event->event->instrumentation) {
-                       case LTTNG_KERNEL_TRACEPOINT:
-                               (*events)[i].type = LTTNG_EVENT_TRACEPOINT;
-                               break;
-                       case LTTNG_KERNEL_KPROBE:
-                       case LTTNG_KERNEL_KRETPROBE:
-                               (*events)[i].type = LTTNG_EVENT_PROBE;
-                               memcpy(&(*events)[i].attr.probe, &event->event->u.kprobe,
-                                               sizeof(struct lttng_kernel_kprobe));
-                               break;
-                       case LTTNG_KERNEL_FUNCTION:
-                               (*events)[i].type = LTTNG_EVENT_FUNCTION;
-                               memcpy(&((*events)[i].attr.ftrace), &event->event->u.ftrace,
-                                               sizeof(struct lttng_kernel_function));
-                               break;
-                       case LTTNG_KERNEL_NOOP:
-                               (*events)[i].type = LTTNG_EVENT_NOOP;
-                               break;
-                       case LTTNG_KERNEL_SYSCALL:
-                               (*events)[i].type = LTTNG_EVENT_SYSCALL;
-                               break;
-                       case LTTNG_KERNEL_ALL:
-                               assert(0);
-                               break;
-               }
-               i++;
-       }
-
-       return nb_event;
-
-error:
-       return ret;
-}
-
-/*
- * Command LTTNG_DISABLE_CHANNEL processed by the client thread.
- */
-static int cmd_disable_channel(struct ltt_session *session,
-               int domain, char *channel_name)
-{
-       int ret;
-       struct ltt_ust_session *usess;
-
-       usess = session->ust_session;
-
-       switch (domain) {
-       case LTTNG_DOMAIN_KERNEL:
-       {
-               ret = channel_kernel_disable(session->kernel_session,
-                               channel_name);
-               if (ret != LTTCOMM_OK) {
-                       goto error;
-               }
-
-               kernel_wait_quiescent(kernel_tracer_fd);
-               break;
-       }
-       case LTTNG_DOMAIN_UST:
-       {
-               struct ltt_ust_channel *uchan;
-               struct lttng_ht *chan_ht;
-
-               chan_ht = usess->domain_global.channels;
-
-               uchan = trace_ust_find_channel_by_name(chan_ht, channel_name);
-               if (uchan == NULL) {
-                       ret = LTTCOMM_UST_CHAN_NOT_FOUND;
-                       goto error;
-               }
-
-               ret = channel_ust_disable(usess, domain, uchan);
-               if (ret != LTTCOMM_OK) {
-                       goto error;
-               }
-               break;
-       }
-#if 0
-       case LTTNG_DOMAIN_UST_PID_FOLLOW_CHILDREN:
-       case LTTNG_DOMAIN_UST_EXEC_NAME:
-       case LTTNG_DOMAIN_UST_PID:
-#endif
-       default:
-               ret = LTTCOMM_UNKNOWN_DOMAIN;
-               goto error;
-       }
-
-       ret = LTTCOMM_OK;
-
-error:
-       return ret;
-}
-
-/*
- * Command LTTNG_ENABLE_CHANNEL processed by the client thread.
- */
-static int cmd_enable_channel(struct ltt_session *session,
-               int domain, struct lttng_channel *attr)
-{
-       int ret;
-       struct ltt_ust_session *usess = session->ust_session;
-       struct lttng_ht *chan_ht;
-
-       DBG("Enabling channel %s for session %s", attr->name, session->name);
-
-       switch (domain) {
-       case LTTNG_DOMAIN_KERNEL:
-       {
-               struct ltt_kernel_channel *kchan;
-
-               kchan = trace_kernel_get_channel_by_name(attr->name,
-                               session->kernel_session);
-               if (kchan == NULL) {
-                       ret = channel_kernel_create(session->kernel_session,
-                                       attr, kernel_poll_pipe[1]);
-               } else {
-                       ret = channel_kernel_enable(session->kernel_session, kchan);
-               }
-
-               if (ret != LTTCOMM_OK) {
-                       goto error;
-               }
-
-               kernel_wait_quiescent(kernel_tracer_fd);
-               break;
-       }
-       case LTTNG_DOMAIN_UST:
-       {
-               struct ltt_ust_channel *uchan;
-
-               chan_ht = usess->domain_global.channels;
-
-               uchan = trace_ust_find_channel_by_name(chan_ht, attr->name);
-               if (uchan == NULL) {
-                       ret = channel_ust_create(usess, domain, attr);
-               } else {
-                       ret = channel_ust_enable(usess, domain, uchan);
-               }
-               break;
-       }
-#if 0
-       case LTTNG_DOMAIN_UST_PID_FOLLOW_CHILDREN:
-       case LTTNG_DOMAIN_UST_EXEC_NAME:
-       case LTTNG_DOMAIN_UST_PID:
-#endif
-       default:
-               ret = LTTCOMM_UNKNOWN_DOMAIN;
-               goto error;
-       }
-
-error:
-       return ret;
-}
-
-/*
- * Command LTTNG_DISABLE_EVENT processed by the client thread.
- */
-static int cmd_disable_event(struct ltt_session *session, int domain,
-               char *channel_name, char *event_name)
-{
-       int ret;
-
-       switch (domain) {
-       case LTTNG_DOMAIN_KERNEL:
-       {
-               struct ltt_kernel_channel *kchan;
-               struct ltt_kernel_session *ksess;
-
-               ksess = session->kernel_session;
-
-               kchan = trace_kernel_get_channel_by_name(channel_name, ksess);
-               if (kchan == NULL) {
-                       ret = LTTCOMM_KERN_CHAN_NOT_FOUND;
-                       goto error;
-               }
-
-               ret = event_kernel_disable_tracepoint(ksess, kchan, event_name);
-               if (ret != LTTCOMM_OK) {
-                       goto error;
-               }
-
-               kernel_wait_quiescent(kernel_tracer_fd);
-               break;
-       }
-       case LTTNG_DOMAIN_UST:
-       {
-               struct ltt_ust_channel *uchan;
-               struct ltt_ust_session *usess;
-
-               usess = session->ust_session;
-
-               uchan = trace_ust_find_channel_by_name(usess->domain_global.channels,
-                               channel_name);
-               if (uchan == NULL) {
-                       ret = LTTCOMM_UST_CHAN_NOT_FOUND;
-                       goto error;
-               }
-
-               ret = event_ust_disable_tracepoint(usess, domain, uchan, event_name);
-               if (ret != LTTCOMM_OK) {
-                       goto error;
-               }
-
-               DBG3("Disable UST event %s in channel %s completed", event_name,
-                               channel_name);
-               break;
-       }
-#if 0
-       case LTTNG_DOMAIN_UST_EXEC_NAME:
-       case LTTNG_DOMAIN_UST_PID:
-       case LTTNG_DOMAIN_UST_PID_FOLLOW_CHILDREN:
-#endif
-       default:
-               ret = LTTCOMM_UND;
-               goto error;
-       }
-
-       ret = LTTCOMM_OK;
-
-error:
-       return ret;
-}
-
-/*
- * Command LTTNG_DISABLE_ALL_EVENT processed by the client thread.
- */
-static int cmd_disable_event_all(struct ltt_session *session, int domain,
-               char *channel_name)
-{
-       int ret;
-
-       switch (domain) {
-       case LTTNG_DOMAIN_KERNEL:
-       {
-               struct ltt_kernel_session *ksess;
-               struct ltt_kernel_channel *kchan;
-
-               ksess = session->kernel_session;
-
-               kchan = trace_kernel_get_channel_by_name(channel_name, ksess);
-               if (kchan == NULL) {
-                       ret = LTTCOMM_KERN_CHAN_NOT_FOUND;
-                       goto error;
-               }
-
-               ret = event_kernel_disable_all(ksess, kchan);
-               if (ret != LTTCOMM_OK) {
-                       goto error;
-               }
-
-               kernel_wait_quiescent(kernel_tracer_fd);
-               break;
-       }
-       case LTTNG_DOMAIN_UST:
-       {
-               struct ltt_ust_session *usess;
-               struct ltt_ust_channel *uchan;
-
-               usess = session->ust_session;
-
-               uchan = trace_ust_find_channel_by_name(usess->domain_global.channels,
-                               channel_name);
-               if (uchan == NULL) {
-                       ret = LTTCOMM_UST_CHAN_NOT_FOUND;
-                       goto error;
-               }
-
-               ret = event_ust_disable_all_tracepoints(usess, domain, uchan);
-               if (ret != 0) {
-                       goto error;
-               }
-
-               DBG3("Disable all UST events in channel %s completed", channel_name);
-
-               break;
-       }
-#if 0
-       case LTTNG_DOMAIN_UST_EXEC_NAME:
-       case LTTNG_DOMAIN_UST_PID:
-       case LTTNG_DOMAIN_UST_PID_FOLLOW_CHILDREN:
-#endif
-       default:
-               ret = LTTCOMM_UND;
-               goto error;
-       }
-
-       ret = LTTCOMM_OK;
-
-error:
-       return ret;
-}
-
-/*
- * Command LTTNG_ADD_CONTEXT processed by the client thread.
- */
-static int cmd_add_context(struct ltt_session *session, int domain,
-               char *channel_name, char *event_name, struct lttng_event_context *ctx)
-{
-       int ret;
-
-       switch (domain) {
-       case LTTNG_DOMAIN_KERNEL:
-               /* Add kernel context to kernel tracer */
-               ret = context_kernel_add(session->kernel_session, ctx,
-                               event_name, channel_name);
-               if (ret != LTTCOMM_OK) {
-                       goto error;
-               }
-               break;
-       case LTTNG_DOMAIN_UST:
-       {
-               struct ltt_ust_session *usess = session->ust_session;
-
-               ret = context_ust_add(usess, domain, ctx, event_name, channel_name);
-               if (ret != LTTCOMM_OK) {
-                       goto error;
-               }
-               break;
-       }
-#if 0
-       case LTTNG_DOMAIN_UST_EXEC_NAME:
-       case LTTNG_DOMAIN_UST_PID:
-       case LTTNG_DOMAIN_UST_PID_FOLLOW_CHILDREN:
-#endif
-       default:
-               ret = LTTCOMM_UND;
-               goto error;
-       }
-
-       ret = LTTCOMM_OK;
-
-error:
-       return ret;
-}
-
-/*
- * Command LTTNG_ENABLE_EVENT processed by the client thread.
- */
-static int cmd_enable_event(struct ltt_session *session, int domain,
-               char *channel_name, struct lttng_event *event)
-{
-       int ret;
-       struct lttng_channel *attr;
-       struct ltt_ust_session *usess = session->ust_session;
-
-       switch (domain) {
-       case LTTNG_DOMAIN_KERNEL:
-       {
-               struct ltt_kernel_channel *kchan;
-
-               kchan = trace_kernel_get_channel_by_name(channel_name,
-                               session->kernel_session);
-               if (kchan == NULL) {
-                       attr = channel_new_default_attr(domain);
-                       if (attr == NULL) {
-                               ret = LTTCOMM_FATAL;
-                               goto error;
-                       }
-                       snprintf(attr->name, NAME_MAX, "%s", channel_name);
-
-                       /* This call will notify the kernel thread */
-                       ret = channel_kernel_create(session->kernel_session,
-                                       attr, kernel_poll_pipe[1]);
-                       if (ret != LTTCOMM_OK) {
-                               free(attr);
-                               goto error;
-                       }
-                       free(attr);
-               }
-
-               /* Get the newly created kernel channel pointer */
-               kchan = trace_kernel_get_channel_by_name(channel_name,
-                               session->kernel_session);
-               if (kchan == NULL) {
-                       /* This sould not happen... */
-                       ret = LTTCOMM_FATAL;
-                       goto error;
-               }
-
-               ret = event_kernel_enable_tracepoint(session->kernel_session, kchan,
-                               event);
-               if (ret != LTTCOMM_OK) {
-                       goto error;
-               }
-
-               kernel_wait_quiescent(kernel_tracer_fd);
-               break;
-       }
-       case LTTNG_DOMAIN_UST:
-       {
-               struct lttng_channel *attr;
-               struct ltt_ust_channel *uchan;
-
-               /* Get channel from global UST domain */
-               uchan = trace_ust_find_channel_by_name(usess->domain_global.channels,
-                               channel_name);
-               if (uchan == NULL) {
-                       /* Create default channel */
-                       attr = channel_new_default_attr(domain);
-                       if (attr == NULL) {
-                               ret = LTTCOMM_FATAL;
-                               goto error;
-                       }
-                       snprintf(attr->name, NAME_MAX, "%s", channel_name);
-                       attr->name[NAME_MAX - 1] = '\0';
-
-                       ret = channel_ust_create(usess, domain, attr);
-                       if (ret != LTTCOMM_OK) {
-                               free(attr);
-                               goto error;
-                       }
-                       free(attr);
-
-                       /* Get the newly created channel reference back */
-                       uchan = trace_ust_find_channel_by_name(
-                                       usess->domain_global.channels, channel_name);
-                       if (uchan == NULL) {
-                               /* Something is really wrong */
-                               ret = LTTCOMM_FATAL;
-                               goto error;
-                       }
-               }
-
-               /* At this point, the session and channel exist on the tracer */
-               ret = event_ust_enable_tracepoint(usess, domain, uchan, event);
-               if (ret != LTTCOMM_OK) {
-                       goto error;
-               }
-               break;
-       }
-#if 0
-       case LTTNG_DOMAIN_UST_EXEC_NAME:
-       case LTTNG_DOMAIN_UST_PID:
-       case LTTNG_DOMAIN_UST_PID_FOLLOW_CHILDREN:
-#endif
-       default:
-               ret = LTTCOMM_UND;
-               goto error;
-       }
-
-       ret = LTTCOMM_OK;
-
-error:
-       return ret;
-}
-
-/*
- * Command LTTNG_ENABLE_ALL_EVENT processed by the client thread.
- */
-static int cmd_enable_event_all(struct ltt_session *session, int domain,
-               char *channel_name, int event_type)
-{
-       int ret;
-       struct ltt_kernel_channel *kchan;
-
-       switch (domain) {
-       case LTTNG_DOMAIN_KERNEL:
-               kchan = trace_kernel_get_channel_by_name(channel_name,
-                               session->kernel_session);
-               if (kchan == NULL) {
-                       /* This call will notify the kernel thread */
-                       ret = channel_kernel_create(session->kernel_session, NULL,
-                                       kernel_poll_pipe[1]);
-                       if (ret != LTTCOMM_OK) {
-                               goto error;
-                       }
-
-                       /* Get the newly created kernel channel pointer */
-                       kchan = trace_kernel_get_channel_by_name(channel_name,
-                                       session->kernel_session);
-                       if (kchan == NULL) {
-                               /* This sould not happen... */
-                               ret = LTTCOMM_FATAL;
-                               goto error;
-                       }
-
-               }
-
-               switch (event_type) {
-               case LTTNG_EVENT_SYSCALL:
-                       ret = event_kernel_enable_all_syscalls(session->kernel_session,
-                                       kchan, kernel_tracer_fd);
-                       break;
-               case LTTNG_EVENT_TRACEPOINT:
-                       /*
-                        * This call enables all LTTNG_KERNEL_TRACEPOINTS and
-                        * events already registered to the channel.
-                        */
-                       ret = event_kernel_enable_all_tracepoints(session->kernel_session,
-                                       kchan, kernel_tracer_fd);
-                       break;
-               case LTTNG_EVENT_ALL:
-                       /* Enable syscalls and tracepoints */
-                       ret = event_kernel_enable_all(session->kernel_session,
-                                       kchan, kernel_tracer_fd);
-                       break;
-               default:
-                       ret = LTTCOMM_KERN_ENABLE_FAIL;
-                       goto error;
-               }
-
-               /* Manage return value */
-               if (ret != LTTCOMM_OK) {
-                       goto error;
-               }
-
-               kernel_wait_quiescent(kernel_tracer_fd);
-               break;
-       case LTTNG_DOMAIN_UST:
-       {
-               struct lttng_channel *attr;
-               struct ltt_ust_channel *uchan;
-               struct ltt_ust_session *usess = session->ust_session;
-
-               /* Get channel from global UST domain */
-               uchan = trace_ust_find_channel_by_name(usess->domain_global.channels,
-                               channel_name);
-               if (uchan == NULL) {
-                       /* Create default channel */
-                       attr = channel_new_default_attr(domain);
-                       if (attr == NULL) {
-                               ret = LTTCOMM_FATAL;
-                               goto error;
-                       }
-                       snprintf(attr->name, NAME_MAX, "%s", channel_name);
-                       attr->name[NAME_MAX - 1] = '\0';
-
-                       /* Use the internal command enable channel */
-                       ret = channel_ust_create(usess, domain, attr);
-                       if (ret != LTTCOMM_OK) {
-                               free(attr);
-                               goto error;
-                       }
-                       free(attr);
-
-                       /* Get the newly created channel reference back */
-                       uchan = trace_ust_find_channel_by_name(
-                                       usess->domain_global.channels, channel_name);
-                       if (uchan == NULL) {
-                               /* Something is really wrong */
-                               ret = LTTCOMM_FATAL;
-                               goto error;
-                       }
-               }
-
-               /* At this point, the session and channel exist on the tracer */
-
-               switch (event_type) {
-               case LTTNG_EVENT_ALL:
-               case LTTNG_EVENT_TRACEPOINT:
-                       ret = event_ust_enable_all_tracepoints(usess, domain, uchan);
-                       if (ret != LTTCOMM_OK) {
-                               goto error;
-                       }
-                       break;
-               default:
-                       ret = LTTCOMM_UST_ENABLE_FAIL;
-                       goto error;
-               }
-
-               /* Manage return value */
-               if (ret != LTTCOMM_OK) {
-                       goto error;
-               }
-
-               break;
-       }
-#if 0
-       case LTTNG_DOMAIN_UST_EXEC_NAME:
-       case LTTNG_DOMAIN_UST_PID:
-       case LTTNG_DOMAIN_UST_PID_FOLLOW_CHILDREN:
-#endif
-       default:
-               ret = LTTCOMM_UND;
-               goto error;
-       }
-
-       ret = LTTCOMM_OK;
-
-error:
-       return ret;
-}
-
-/*
- * Command LTTNG_LIST_TRACEPOINTS processed by the client thread.
- */
-static ssize_t cmd_list_tracepoints(int domain, struct lttng_event **events)
-{
-       int ret;
-       ssize_t nb_events = 0;
-
-       switch (domain) {
-       case LTTNG_DOMAIN_KERNEL:
-               nb_events = kernel_list_events(kernel_tracer_fd, events);
-               if (nb_events < 0) {
-                       ret = LTTCOMM_KERN_LIST_FAIL;
-                       goto error;
-               }
-               break;
-       case LTTNG_DOMAIN_UST:
-               nb_events = ust_app_list_events(events);
-               if (nb_events < 0) {
-                       ret = LTTCOMM_UST_LIST_FAIL;
-                       goto error;
-               }
-               break;
-       default:
-               ret = LTTCOMM_UND;
-               goto error;
-       }
-
-       return nb_events;
-
-error:
-       /* Return negative value to differentiate return code */
-       return -ret;
-}
-
-/*
- * Command LTTNG_START_TRACE processed by the client thread.
- */
-static int cmd_start_trace(struct ltt_session *session)
-{
-       int ret;
-       struct ltt_kernel_session *ksession;
-       struct ltt_ust_session *usess;
-
-       /* Short cut */
-       ksession = session->kernel_session;
-       usess = session->ust_session;
-
-       if (session->enabled) {
-               /* Already started. */
-               ret = LTTCOMM_TRACE_ALREADY_STARTED;
-               goto error;
-       }
-
-       session->enabled = 1;
-
-       /* Kernel tracing */
-       if (ksession != NULL) {
-               struct ltt_kernel_channel *kchan;
-
-               /* Open kernel metadata */
-               if (ksession->metadata == NULL) {
-                       ret = kernel_open_metadata(ksession, ksession->trace_path);
-                       if (ret < 0) {
-                               ret = LTTCOMM_KERN_META_FAIL;
-                               goto error;
-                       }
-               }
-
-               /* Open kernel metadata stream */
-               if (ksession->metadata_stream_fd < 0) {
-                       ret = kernel_open_metadata_stream(ksession);
-                       if (ret < 0) {
-                               ERR("Kernel create metadata stream failed");
-                               ret = LTTCOMM_KERN_STREAM_FAIL;
-                               goto error;
-                       }
-               }
-
-               /* For each channel */
-               cds_list_for_each_entry(kchan, &ksession->channel_list.head, list) {
-                       if (kchan->stream_count == 0) {
-                               ret = kernel_open_channel_stream(kchan);
-                               if (ret < 0) {
-                                       ret = LTTCOMM_KERN_STREAM_FAIL;
-                                       goto error;
-                               }
-                               /* Update the stream global counter */
-                               ksession->stream_count_global += ret;
-                       }
-               }
-
-               /* Setup kernel consumer socket and send fds to it */
-               ret = init_kernel_tracing(ksession);
-               if (ret < 0) {
-                       ret = LTTCOMM_KERN_START_FAIL;
-                       goto error;
-               }
-
-               /* This start the kernel tracing */
-               ret = kernel_start_session(ksession);
-               if (ret < 0) {
-                       ret = LTTCOMM_KERN_START_FAIL;
-                       goto error;
-               }
-
-               /* Quiescent wait after starting trace */
-               kernel_wait_quiescent(kernel_tracer_fd);
-       }
-
-       /* Flag session that trace should start automatically */
-       if (usess) {
-               usess->start_trace = 1;
-
-               ret = ust_app_start_trace_all(usess);
-               if (ret < 0) {
-                       ret = LTTCOMM_UST_START_FAIL;
-                       goto error;
-               }
-       }
-
-       ret = LTTCOMM_OK;
-
-error:
-       return ret;
-}
-
-/*
- * Command LTTNG_STOP_TRACE processed by the client thread.
- */
-static int cmd_stop_trace(struct ltt_session *session)
-{
-       int ret;
-       struct ltt_kernel_channel *kchan;
-       struct ltt_kernel_session *ksession;
-       struct ltt_ust_session *usess;
-
-       /* Short cut */
-       ksession = session->kernel_session;
-       usess = session->ust_session;
-
-       if (!session->enabled) {
-               ret = LTTCOMM_TRACE_ALREADY_STOPPED;
-               goto error;
-       }
-
-       session->enabled = 0;
-
-       /* Kernel tracer */
-       if (ksession != NULL) {
-               DBG("Stop kernel tracing");
-
-               /* Flush all buffers before stopping */
-               ret = kernel_metadata_flush_buffer(ksession->metadata_stream_fd);
-               if (ret < 0) {
-                       ERR("Kernel metadata flush failed");
-               }
-
-               cds_list_for_each_entry(kchan, &ksession->channel_list.head, list) {
-                       ret = kernel_flush_buffer(kchan);
-                       if (ret < 0) {
-                               ERR("Kernel flush buffer error");
-                       }
-               }
-
-               ret = kernel_stop_session(ksession);
-               if (ret < 0) {
-                       ret = LTTCOMM_KERN_STOP_FAIL;
-                       goto error;
-               }
-
-               kernel_wait_quiescent(kernel_tracer_fd);
-       }
-
-       if (usess) {
-               usess->start_trace = 0;
-
-               ret = ust_app_stop_trace_all(usess);
-               if (ret < 0) {
-                       ret = LTTCOMM_UST_STOP_FAIL;
-                       goto error;
-               }
-       }
-
-       ret = LTTCOMM_OK;
-
-error:
-       return ret;
-}
-
-/*
- * Command LTTNG_CREATE_SESSION processed by the client thread.
- */
-static int cmd_create_session(char *name, char *path, lttng_sock_cred *creds)
-{
-       int ret;
-
-       ret = session_create(name, path, LTTNG_SOCK_GET_UID_CRED(creds),
-                       LTTNG_SOCK_GET_GID_CRED(creds));
-       if (ret != LTTCOMM_OK) {
-               goto error;
-       }
-
-       ret = LTTCOMM_OK;
-
-error:
-       return ret;
-}
-
-/*
- * Command LTTNG_DESTROY_SESSION processed by the client thread.
- */
-static int cmd_destroy_session(struct ltt_session *session, char *name)
-{
-       int ret;
-
-       /* Clean kernel session teardown */
-       teardown_kernel_session(session);
-       /* UST session teardown */
-       teardown_ust_session(session);
-
-       /*
-        * Must notify the kernel thread here to update it's poll setin order
-        * to remove the channel(s)' fd just destroyed.
-        */
-       ret = notify_thread_pipe(kernel_poll_pipe[1]);
-       if (ret < 0) {
-               PERROR("write kernel poll pipe");
-       }
-
-       ret = session_destroy(session);
-
-       return ret;
-}
-
-/*
- * Command LTTNG_CALIBRATE processed by the client thread.
- */
-static int cmd_calibrate(int domain, struct lttng_calibrate *calibrate)
-{
-       int ret;
-
-       switch (domain) {
-       case LTTNG_DOMAIN_KERNEL:
-       {
-               struct lttng_kernel_calibrate kcalibrate;
-
-               kcalibrate.type = calibrate->type;
-               ret = kernel_calibrate(kernel_tracer_fd, &kcalibrate);
-               if (ret < 0) {
-                       ret = LTTCOMM_KERN_ENABLE_FAIL;
-                       goto error;
-               }
-               break;
-       }
-       case LTTNG_DOMAIN_UST:
-       {
-               struct lttng_ust_calibrate ucalibrate;
-
-               ucalibrate.type = calibrate->type;
-               ret = ust_app_calibrate_glb(&ucalibrate);
-               if (ret < 0) {
-                       ret = LTTCOMM_UST_CALIBRATE_FAIL;
-                       goto error;
-               }
-               break;
-       }
-       default:
-               ret = LTTCOMM_UND;
-               goto error;
-       }
-
-       ret = LTTCOMM_OK;
-
-error:
-       return ret;
-}
-
-/*
- * Command LTTNG_REGISTER_CONSUMER processed by the client thread.
- */
-static int cmd_register_consumer(struct ltt_session *session, int domain,
-               char *sock_path)
-{
-       int ret, sock;
-
-       switch (domain) {
-       case LTTNG_DOMAIN_KERNEL:
-               /* Can't register a consumer if there is already one */
-               if (session->kernel_session->consumer_fds_sent != 0) {
-                       ret = LTTCOMM_KERN_CONSUMER_FAIL;
-                       goto error;
-               }
-
-               sock = lttcomm_connect_unix_sock(sock_path);
-               if (sock < 0) {
-                       ret = LTTCOMM_CONNECT_FAIL;
-                       goto error;
-               }
-
-               session->kernel_session->consumer_fd = sock;
-               break;
-       default:
-               /* TODO: Userspace tracing */
-               ret = LTTCOMM_UND;
-               goto error;
-       }
-
-       ret = LTTCOMM_OK;
-
-error:
-       return ret;
-}
-
-/*
- * Command LTTNG_LIST_DOMAINS processed by the client thread.
- */
-static ssize_t cmd_list_domains(struct ltt_session *session,
-               struct lttng_domain **domains)
-{
-       int ret, index = 0;
-       ssize_t nb_dom = 0;
-
-       if (session->kernel_session != NULL) {
-               DBG3("Listing domains found kernel domain");
-               nb_dom++;
-       }
-
-       if (session->ust_session != NULL) {
-               DBG3("Listing domains found UST global domain");
-               nb_dom++;
-       }
-
-       *domains = zmalloc(nb_dom * sizeof(struct lttng_domain));
-       if (*domains == NULL) {
-               ret = -LTTCOMM_FATAL;
-               goto error;
-       }
-
-       if (session->kernel_session != NULL) {
-               (*domains)[index].type = LTTNG_DOMAIN_KERNEL;
-               index++;
-       }
-
-       if (session->ust_session != NULL) {
-               (*domains)[index].type = LTTNG_DOMAIN_UST;
-               index++;
-       }
-
-       return nb_dom;
-
-error:
-       return ret;
-}
-
-/*
- * Command LTTNG_LIST_CHANNELS processed by the client thread.
- */
-static ssize_t cmd_list_channels(int domain, struct ltt_session *session,
-               struct lttng_channel **channels)
-{
-       int ret;
-       ssize_t nb_chan = 0;
-
-       switch (domain) {
-       case LTTNG_DOMAIN_KERNEL:
-               if (session->kernel_session != NULL) {
-                       nb_chan = session->kernel_session->channel_count;
-               }
-               DBG3("Number of kernel channels %zd", nb_chan);
-               break;
-       case LTTNG_DOMAIN_UST:
-               if (session->ust_session != NULL) {
-                       nb_chan = lttng_ht_get_count(
-                                       session->ust_session->domain_global.channels);
-               }
-               DBG3("Number of UST global channels %zd", nb_chan);
-               break;
-       default:
-               *channels = NULL;
-               ret = -LTTCOMM_UND;
-               goto error;
-       }
-
-       if (nb_chan > 0) {
-               *channels = zmalloc(nb_chan * sizeof(struct lttng_channel));
-               if (*channels == NULL) {
-                       ret = -LTTCOMM_FATAL;
-                       goto error;
-               }
-
-               list_lttng_channels(domain, session, *channels);
-       } else {
-               *channels = NULL;
-       }
-
-       return nb_chan;
-
-error:
-       return ret;
-}
-
-/*
- * Command LTTNG_LIST_EVENTS processed by the client thread.
- */
-static ssize_t cmd_list_events(int domain, struct ltt_session *session,
-               char *channel_name, struct lttng_event **events)
-{
-       int ret = 0;
-       ssize_t nb_event = 0;
-
-       switch (domain) {
-       case LTTNG_DOMAIN_KERNEL:
-               if (session->kernel_session != NULL) {
-                       nb_event = list_lttng_kernel_events(channel_name,
-                                       session->kernel_session, events);
-               }
-               break;
-       case LTTNG_DOMAIN_UST:
-       {
-               if (session->ust_session != NULL) {
-                       nb_event = list_lttng_ust_global_events(channel_name,
-                                       &session->ust_session->domain_global, events);
-               }
-               break;
-       }
-       default:
-               ret = -LTTCOMM_UND;
-               goto error;
-       }
-
-       ret = nb_event;
-
-error:
-       return ret;
-}
-
-/*
- * Process the command requested by the lttng client within the command
- * context structure. This function make sure that the return structure (llm)
- * is set and ready for transmission before returning.
- *
- * Return any error encountered or 0 for success.
- */
-static int process_client_msg(struct command_ctx *cmd_ctx)
-{
-       int ret = LTTCOMM_OK;
-       int need_tracing_session = 1;
-       int need_domain;
-
-       DBG("Processing client command %d", cmd_ctx->lsm->cmd_type);
-
-       switch (cmd_ctx->lsm->cmd_type) {
-       case LTTNG_CREATE_SESSION:
-       case LTTNG_DESTROY_SESSION:
-       case LTTNG_LIST_SESSIONS:
-       case LTTNG_LIST_DOMAINS:
-       case LTTNG_START_TRACE:
-       case LTTNG_STOP_TRACE:
-               need_domain = 0;
-               break;
-       default:
-               need_domain = 1;
-       }
-
-       if (opt_no_kernel && need_domain
-                       && cmd_ctx->lsm->domain.type == LTTNG_DOMAIN_KERNEL) {
-               if (!is_root) {
-                       ret = LTTCOMM_NEED_ROOT_SESSIOND;
-               } else {
-                       ret = LTTCOMM_KERN_NA;
-               }
-               goto error;
-       }
-
-       /*
-        * Check for command that don't needs to allocate a returned payload. We do
-        * this here so we don't have to make the call for no payload at each
-        * command.
-        */
-       switch(cmd_ctx->lsm->cmd_type) {
-       case LTTNG_LIST_SESSIONS:
-       case LTTNG_LIST_TRACEPOINTS:
-       case LTTNG_LIST_DOMAINS:
-       case LTTNG_LIST_CHANNELS:
-       case LTTNG_LIST_EVENTS:
-               break;
-       default:
-               /* Setup lttng message with no payload */
-               ret = setup_lttng_msg(cmd_ctx, 0);
-               if (ret < 0) {
-                       /* This label does not try to unlock the session */
-                       goto init_setup_error;
-               }
-       }
-
-       /* Commands that DO NOT need a session. */
-       switch (cmd_ctx->lsm->cmd_type) {
-       case LTTNG_CREATE_SESSION:
-       case LTTNG_CALIBRATE:
-       case LTTNG_LIST_SESSIONS:
-       case LTTNG_LIST_TRACEPOINTS:
-               need_tracing_session = 0;
-               break;
-       default:
-               DBG("Getting session %s by name", cmd_ctx->lsm->session.name);
-               session_lock_list();
-               cmd_ctx->session = session_find_by_name(cmd_ctx->lsm->session.name);
-               session_unlock_list();
-               if (cmd_ctx->session == NULL) {
-                       if (cmd_ctx->lsm->session.name != NULL) {
-                               ret = LTTCOMM_SESS_NOT_FOUND;
-                       } else {
-                               /* If no session name specified */
-                               ret = LTTCOMM_SELECT_SESS;
-                       }
-                       goto error;
-               } else {
-                       /* Acquire lock for the session */
-                       session_lock(cmd_ctx->session);
-               }
-               break;
-       }
-
-       if (!need_domain) {
-               goto skip_domain;
-       }
-       /*
-        * Check domain type for specific "pre-action".
-        */
-       switch (cmd_ctx->lsm->domain.type) {
-       case LTTNG_DOMAIN_KERNEL:
-               if (!is_root) {
-                       ret = LTTCOMM_NEED_ROOT_SESSIOND;
-                       goto error;
-               }
+       /*
+        * Check domain type for specific "pre-action".
+        */
+       switch (cmd_ctx->lsm->domain.type) {
+       case LTTNG_DOMAIN_KERNEL:
+               if (!is_root) {
+                       ret = LTTNG_ERR_NEED_ROOT_SESSIOND;
+                       goto error;
+               }
 
                /* Kernel tracer check */
                if (kernel_tracer_fd == -1) {
@@ -3343,12 +2338,18 @@ static int process_client_msg(struct command_ctx *cmd_ctx)
                        }
                }
 
+               /* Consumer is in an ERROR state. Report back to client */
+               if (uatomic_read(&kernel_consumerd_state) == CONSUMER_ERROR) {
+                       ret = LTTNG_ERR_NO_KERNCONSUMERD;
+                       goto error;
+               }
+
                /* Need a session for kernel command */
                if (need_tracing_session) {
                        if (cmd_ctx->session->kernel_session == NULL) {
                                ret = create_kernel_session(cmd_ctx->session);
                                if (ret < 0) {
-                                       ret = LTTCOMM_KERN_SESS_FAIL;
+                                       ret = LTTNG_ERR_KERN_SESS_FAIL;
                                        goto error;
                                }
                        }
@@ -3356,61 +2357,108 @@ static int process_client_msg(struct command_ctx *cmd_ctx)
                        /* Start the kernel consumer daemon */
                        pthread_mutex_lock(&kconsumer_data.pid_mutex);
                        if (kconsumer_data.pid == 0 &&
-                                       cmd_ctx->lsm->cmd_type != LTTNG_REGISTER_CONSUMER) {
+                                       cmd_ctx->lsm->cmd_type != LTTNG_REGISTER_CONSUMER &&
+                                       cmd_ctx->session->start_consumer) {
                                pthread_mutex_unlock(&kconsumer_data.pid_mutex);
                                ret = start_consumerd(&kconsumer_data);
                                if (ret < 0) {
-                                       ret = LTTCOMM_KERN_CONSUMER_FAIL;
+                                       ret = LTTNG_ERR_KERN_CONSUMER_FAIL;
                                        goto error;
                                }
+                               uatomic_set(&kernel_consumerd_state, CONSUMER_STARTED);
                        } else {
                                pthread_mutex_unlock(&kconsumer_data.pid_mutex);
                        }
+
+                       /*
+                        * The consumer was just spawned so we need to add the socket to
+                        * the consumer output of the session if exist.
+                        */
+                       ret = consumer_create_socket(&kconsumer_data,
+                                       cmd_ctx->session->kernel_session->consumer);
+                       if (ret < 0) {
+                               goto error;
+                       }
                }
+
                break;
        case LTTNG_DOMAIN_UST:
        {
+               /* Consumer is in an ERROR state. Report back to client */
+               if (uatomic_read(&ust_consumerd_state) == CONSUMER_ERROR) {
+                       ret = LTTNG_ERR_NO_USTCONSUMERD;
+                       goto error;
+               }
+
                if (need_tracing_session) {
+                       /* Create UST session if none exist. */
                        if (cmd_ctx->session->ust_session == NULL) {
                                ret = create_ust_session(cmd_ctx->session,
                                                &cmd_ctx->lsm->domain);
-                               if (ret != LTTCOMM_OK) {
+                               if (ret != LTTNG_OK) {
                                        goto error;
                                }
                        }
+
                        /* Start the UST consumer daemons */
                        /* 64-bit */
                        pthread_mutex_lock(&ustconsumer64_data.pid_mutex);
                        if (consumerd64_bin[0] != '\0' &&
                                        ustconsumer64_data.pid == 0 &&
-                                       cmd_ctx->lsm->cmd_type != LTTNG_REGISTER_CONSUMER) {
+                                       cmd_ctx->lsm->cmd_type != LTTNG_REGISTER_CONSUMER &&
+                                       cmd_ctx->session->start_consumer) {
                                pthread_mutex_unlock(&ustconsumer64_data.pid_mutex);
                                ret = start_consumerd(&ustconsumer64_data);
                                if (ret < 0) {
-                                       ret = LTTCOMM_UST_CONSUMER64_FAIL;
-                                       ust_consumerd64_fd = -EINVAL;
+                                       ret = LTTNG_ERR_UST_CONSUMER64_FAIL;
+                                       uatomic_set(&ust_consumerd64_fd, -EINVAL);
                                        goto error;
                                }
 
-                               ust_consumerd64_fd = ustconsumer64_data.cmd_sock;
+                               uatomic_set(&ust_consumerd64_fd, ustconsumer64_data.cmd_sock);
+                               uatomic_set(&ust_consumerd_state, CONSUMER_STARTED);
                        } else {
                                pthread_mutex_unlock(&ustconsumer64_data.pid_mutex);
                        }
+
+                       /*
+                        * Setup socket for consumer 64 bit. No need for atomic access
+                        * since it was set above and can ONLY be set in this thread.
+                        */
+                       ret = consumer_create_socket(&ustconsumer64_data,
+                                       cmd_ctx->session->ust_session->consumer);
+                       if (ret < 0) {
+                               goto error;
+                       }
+
                        /* 32-bit */
                        if (consumerd32_bin[0] != '\0' &&
                                        ustconsumer32_data.pid == 0 &&
-                                       cmd_ctx->lsm->cmd_type != LTTNG_REGISTER_CONSUMER) {
+                                       cmd_ctx->lsm->cmd_type != LTTNG_REGISTER_CONSUMER &&
+                                       cmd_ctx->session->start_consumer) {
                                pthread_mutex_unlock(&ustconsumer32_data.pid_mutex);
                                ret = start_consumerd(&ustconsumer32_data);
                                if (ret < 0) {
-                                       ret = LTTCOMM_UST_CONSUMER32_FAIL;
-                                       ust_consumerd32_fd = -EINVAL;
+                                       ret = LTTNG_ERR_UST_CONSUMER32_FAIL;
+                                       uatomic_set(&ust_consumerd32_fd, -EINVAL);
                                        goto error;
                                }
-                               ust_consumerd32_fd = ustconsumer32_data.cmd_sock;
+
+                               uatomic_set(&ust_consumerd32_fd, ustconsumer32_data.cmd_sock);
+                               uatomic_set(&ust_consumerd_state, CONSUMER_STARTED);
                        } else {
                                pthread_mutex_unlock(&ustconsumer32_data.pid_mutex);
                        }
+
+                       /*
+                        * Setup socket for consumer 64 bit. No need for atomic access
+                        * since it was set above and can ONLY be set in this thread.
+                        */
+                       ret = consumer_create_socket(&ustconsumer32_data,
+                                       cmd_ctx->session->ust_session->consumer);
+                       if (ret < 0) {
+                               goto error;
+                       }
                }
                break;
        }
@@ -3419,6 +2467,25 @@ static int process_client_msg(struct command_ctx *cmd_ctx)
        }
 skip_domain:
 
+       /* Validate consumer daemon state when start/stop trace command */
+       if (cmd_ctx->lsm->cmd_type == LTTNG_START_TRACE ||
+                       cmd_ctx->lsm->cmd_type == LTTNG_STOP_TRACE) {
+               switch (cmd_ctx->lsm->domain.type) {
+               case LTTNG_DOMAIN_UST:
+                       if (uatomic_read(&ust_consumerd_state) != CONSUMER_STARTED) {
+                               ret = LTTNG_ERR_NO_USTCONSUMERD;
+                               goto error;
+                       }
+                       break;
+               case LTTNG_DOMAIN_KERNEL:
+                       if (uatomic_read(&kernel_consumerd_state) != CONSUMER_STARTED) {
+                               ret = LTTNG_ERR_NO_KERNCONSUMERD;
+                               goto error;
+                       }
+                       break;
+               }
+       }
+
        /*
         * Check that the UID or GID match that of the tracing session.
         * The root user can interact with all sessions.
@@ -3427,7 +2494,7 @@ skip_domain:
                if (!session_access_ok(cmd_ctx->session,
                                LTTNG_SOCK_GET_UID_CRED(&cmd_ctx->creds),
                                LTTNG_SOCK_GET_GID_CRED(&cmd_ctx->creds))) {
-                       ret = LTTCOMM_EPERM;
+                       ret = LTTNG_ERR_EPERM;
                        goto error;
                }
        }
@@ -3438,8 +2505,7 @@ skip_domain:
        {
                ret = cmd_add_context(cmd_ctx->session, cmd_ctx->lsm->domain.type,
                                cmd_ctx->lsm->u.context.channel_name,
-                               cmd_ctx->lsm->u.context.event_name,
-                               &cmd_ctx->lsm->u.context.ctx);
+                               &cmd_ctx->lsm->u.context.ctx, kernel_poll_pipe[1]);
                break;
        }
        case LTTNG_DISABLE_CHANNEL:
@@ -3463,17 +2529,46 @@ skip_domain:
                                cmd_ctx->lsm->u.disable.channel_name);
                break;
        }
+       case LTTNG_DISABLE_CONSUMER:
+       {
+               ret = cmd_disable_consumer(cmd_ctx->lsm->domain.type, cmd_ctx->session);
+               break;
+       }
        case LTTNG_ENABLE_CHANNEL:
        {
                ret = cmd_enable_channel(cmd_ctx->session, cmd_ctx->lsm->domain.type,
-                               &cmd_ctx->lsm->u.channel.chan);
+                               &cmd_ctx->lsm->u.channel.chan, kernel_poll_pipe[1]);
+               break;
+       }
+       case LTTNG_ENABLE_CONSUMER:
+       {
+               /*
+                * XXX: 0 means that this URI should be applied on the session. Should
+                * be a DOMAIN enuam.
+                */
+               ret = cmd_enable_consumer(cmd_ctx->lsm->domain.type, cmd_ctx->session);
+               if (ret != LTTNG_OK) {
+                       goto error;
+               }
+
+               if (cmd_ctx->lsm->domain.type == 0) {
+                       /* Add the URI for the UST session if a consumer is present. */
+                       if (cmd_ctx->session->ust_session &&
+                                       cmd_ctx->session->ust_session->consumer) {
+                               ret = cmd_enable_consumer(LTTNG_DOMAIN_UST, cmd_ctx->session);
+                       } else if (cmd_ctx->session->kernel_session &&
+                                       cmd_ctx->session->kernel_session->consumer) {
+                               ret = cmd_enable_consumer(LTTNG_DOMAIN_KERNEL,
+                                               cmd_ctx->session);
+                       }
+               }
                break;
        }
        case LTTNG_ENABLE_EVENT:
        {
                ret = cmd_enable_event(cmd_ctx->session, cmd_ctx->lsm->domain.type,
                                cmd_ctx->lsm->u.enable.channel_name,
-                               &cmd_ctx->lsm->u.enable.event);
+                               &cmd_ctx->lsm->u.enable.event, NULL, kernel_poll_pipe[1]);
                break;
        }
        case LTTNG_ENABLE_ALL_EVENT:
@@ -3482,7 +2577,7 @@ skip_domain:
 
                ret = cmd_enable_event_all(cmd_ctx->session, cmd_ctx->lsm->domain.type,
                                cmd_ctx->lsm->u.enable.channel_name,
-                               cmd_ctx->lsm->u.enable.event.type);
+                               cmd_ctx->lsm->u.enable.event.type, NULL, kernel_poll_pipe[1]);
                break;
        }
        case LTTNG_LIST_TRACEPOINTS:
@@ -3492,6 +2587,7 @@ skip_domain:
 
                nb_events = cmd_list_tracepoints(cmd_ctx->lsm->domain.type, &events);
                if (nb_events < 0) {
+                       /* Return value is a negative lttng_error_code. */
                        ret = -nb_events;
                        goto error;
                }
@@ -3512,7 +2608,98 @@ skip_domain:
 
                free(events);
 
-               ret = LTTCOMM_OK;
+               ret = LTTNG_OK;
+               break;
+       }
+       case LTTNG_LIST_TRACEPOINT_FIELDS:
+       {
+               struct lttng_event_field *fields;
+               ssize_t nb_fields;
+
+               nb_fields = cmd_list_tracepoint_fields(cmd_ctx->lsm->domain.type,
+                               &fields);
+               if (nb_fields < 0) {
+                       /* Return value is a negative lttng_error_code. */
+                       ret = -nb_fields;
+                       goto error;
+               }
+
+               /*
+                * Setup lttng message with payload size set to the event list size in
+                * bytes and then copy list into the llm payload.
+                */
+               ret = setup_lttng_msg(cmd_ctx,
+                               sizeof(struct lttng_event_field) * nb_fields);
+               if (ret < 0) {
+                       free(fields);
+                       goto setup_error;
+               }
+
+               /* Copy event list into message payload */
+               memcpy(cmd_ctx->llm->payload, fields,
+                               sizeof(struct lttng_event_field) * nb_fields);
+
+               free(fields);
+
+               ret = LTTNG_OK;
+               break;
+       }
+       case LTTNG_SET_CONSUMER_URI:
+       {
+               size_t nb_uri, len;
+               struct lttng_uri *uris;
+
+               nb_uri = cmd_ctx->lsm->u.uri.size;
+               len = nb_uri * sizeof(struct lttng_uri);
+
+               if (nb_uri == 0) {
+                       ret = LTTNG_ERR_INVALID;
+                       goto error;
+               }
+
+               uris = zmalloc(len);
+               if (uris == NULL) {
+                       ret = LTTNG_ERR_FATAL;
+                       goto error;
+               }
+
+               /* Receive variable len data */
+               DBG("Receiving %zu URI(s) from client ...", nb_uri);
+               ret = lttcomm_recv_unix_sock(sock, uris, len);
+               if (ret <= 0) {
+                       DBG("No URIs received from client... continuing");
+                       *sock_error = 1;
+                       ret = LTTNG_ERR_SESSION_FAIL;
+                       free(uris);
+                       goto error;
+               }
+
+               ret = cmd_set_consumer_uri(cmd_ctx->lsm->domain.type, cmd_ctx->session,
+                               nb_uri, uris);
+               if (ret != LTTNG_OK) {
+                       free(uris);
+                       goto error;
+               }
+
+               /*
+                * XXX: 0 means that this URI should be applied on the session. Should
+                * be a DOMAIN enuam.
+                */
+               if (cmd_ctx->lsm->domain.type == 0) {
+                       /* Add the URI for the UST session if a consumer is present. */
+                       if (cmd_ctx->session->ust_session &&
+                                       cmd_ctx->session->ust_session->consumer) {
+                               ret = cmd_set_consumer_uri(LTTNG_DOMAIN_UST, cmd_ctx->session,
+                                               nb_uri, uris);
+                       } else if (cmd_ctx->session->kernel_session &&
+                                       cmd_ctx->session->kernel_session->consumer) {
+                               ret = cmd_set_consumer_uri(LTTNG_DOMAIN_KERNEL,
+                                               cmd_ctx->session, nb_uri, uris);
+                       }
+               }
+
+               free(uris);
+
                break;
        }
        case LTTNG_START_TRACE:
@@ -3527,14 +2714,51 @@ skip_domain:
        }
        case LTTNG_CREATE_SESSION:
        {
-               ret = cmd_create_session(cmd_ctx->lsm->session.name,
-                               cmd_ctx->lsm->session.path, &cmd_ctx->creds);
+               size_t nb_uri, len;
+               struct lttng_uri *uris = NULL;
+
+               nb_uri = cmd_ctx->lsm->u.uri.size;
+               len = nb_uri * sizeof(struct lttng_uri);
+
+               if (nb_uri > 0) {
+                       uris = zmalloc(len);
+                       if (uris == NULL) {
+                               ret = LTTNG_ERR_FATAL;
+                               goto error;
+                       }
+
+                       /* Receive variable len data */
+                       DBG("Waiting for %zu URIs from client ...", nb_uri);
+                       ret = lttcomm_recv_unix_sock(sock, uris, len);
+                       if (ret <= 0) {
+                               DBG("No URIs received from client... continuing");
+                               *sock_error = 1;
+                               ret = LTTNG_ERR_SESSION_FAIL;
+                               free(uris);
+                               goto error;
+                       }
+
+                       if (nb_uri == 1 && uris[0].dtype != LTTNG_DST_PATH) {
+                               DBG("Creating session with ONE network URI is a bad call");
+                               ret = LTTNG_ERR_SESSION_FAIL;
+                               free(uris);
+                               goto error;
+                       }
+               }
+
+               ret = cmd_create_session_uri(cmd_ctx->lsm->session.name, uris, nb_uri,
+                       &cmd_ctx->creds);
+
+               free(uris);
+
                break;
        }
        case LTTNG_DESTROY_SESSION:
        {
-               ret = cmd_destroy_session(cmd_ctx->session,
-                               cmd_ctx->lsm->session.name);
+               ret = cmd_destroy_session(cmd_ctx->session, kernel_poll_pipe[1]);
+
+               /* Set session to NULL so we do not unlock it after free. */
+               cmd_ctx->session = NULL;
                break;
        }
        case LTTNG_LIST_DOMAINS:
@@ -3544,6 +2768,7 @@ skip_domain:
 
                nb_dom = cmd_list_domains(cmd_ctx->session, &domains);
                if (nb_dom < 0) {
+                       /* Return value is a negative lttng_error_code. */
                        ret = -nb_dom;
                        goto error;
                }
@@ -3559,17 +2784,18 @@ skip_domain:
 
                free(domains);
 
-               ret = LTTCOMM_OK;
+               ret = LTTNG_OK;
                break;
        }
        case LTTNG_LIST_CHANNELS:
        {
-               size_t nb_chan;
+               int nb_chan;
                struct lttng_channel *channels;
 
                nb_chan = cmd_list_channels(cmd_ctx->lsm->domain.type,
                                cmd_ctx->session, &channels);
                if (nb_chan < 0) {
+                       /* Return value is a negative lttng_error_code. */
                        ret = -nb_chan;
                        goto error;
                }
@@ -3585,7 +2811,7 @@ skip_domain:
 
                free(channels);
 
-               ret = LTTCOMM_OK;
+               ret = LTTNG_OK;
                break;
        }
        case LTTNG_LIST_EVENTS:
@@ -3596,81 +2822,334 @@ skip_domain:
                nb_event = cmd_list_events(cmd_ctx->lsm->domain.type, cmd_ctx->session,
                                cmd_ctx->lsm->u.list.channel_name, &events);
                if (nb_event < 0) {
+                       /* Return value is a negative lttng_error_code. */
                        ret = -nb_event;
                        goto error;
                }
 
-               ret = setup_lttng_msg(cmd_ctx, nb_event * sizeof(struct lttng_event));
-               if (ret < 0) {
-                       goto setup_error;
+               ret = setup_lttng_msg(cmd_ctx, nb_event * sizeof(struct lttng_event));
+               if (ret < 0) {
+                       goto setup_error;
+               }
+
+               /* Copy event list into message payload */
+               memcpy(cmd_ctx->llm->payload, events,
+                               nb_event * sizeof(struct lttng_event));
+
+               free(events);
+
+               ret = LTTNG_OK;
+               break;
+       }
+       case LTTNG_LIST_SESSIONS:
+       {
+               unsigned int nr_sessions;
+
+               session_lock_list();
+               nr_sessions = lttng_sessions_count(
+                               LTTNG_SOCK_GET_UID_CRED(&cmd_ctx->creds),
+                               LTTNG_SOCK_GET_GID_CRED(&cmd_ctx->creds));
+
+               ret = setup_lttng_msg(cmd_ctx, sizeof(struct lttng_session) * nr_sessions);
+               if (ret < 0) {
+                       session_unlock_list();
+                       goto setup_error;
+               }
+
+               /* Filled the session array */
+               cmd_list_lttng_sessions((struct lttng_session *)(cmd_ctx->llm->payload),
+                       LTTNG_SOCK_GET_UID_CRED(&cmd_ctx->creds),
+                       LTTNG_SOCK_GET_GID_CRED(&cmd_ctx->creds));
+
+               session_unlock_list();
+
+               ret = LTTNG_OK;
+               break;
+       }
+       case LTTNG_CALIBRATE:
+       {
+               ret = cmd_calibrate(cmd_ctx->lsm->domain.type,
+                               &cmd_ctx->lsm->u.calibrate);
+               break;
+       }
+       case LTTNG_REGISTER_CONSUMER:
+       {
+               struct consumer_data *cdata;
+
+               switch (cmd_ctx->lsm->domain.type) {
+               case LTTNG_DOMAIN_KERNEL:
+                       cdata = &kconsumer_data;
+                       break;
+               default:
+                       ret = LTTNG_ERR_UND;
+                       goto error;
+               }
+
+               ret = cmd_register_consumer(cmd_ctx->session, cmd_ctx->lsm->domain.type,
+                               cmd_ctx->lsm->u.reg.path, cdata);
+               break;
+       }
+       case LTTNG_ENABLE_EVENT_WITH_FILTER:
+       {
+               struct lttng_filter_bytecode *bytecode;
+
+               if (cmd_ctx->lsm->u.enable.bytecode_len > LTTNG_FILTER_MAX_LEN) {
+                       ret = LTTNG_ERR_FILTER_INVAL;
+                       goto error;
+               }
+               if (cmd_ctx->lsm->u.enable.bytecode_len == 0) {
+                       ret = LTTNG_ERR_FILTER_INVAL;
+                       goto error;
+               }
+               bytecode = zmalloc(cmd_ctx->lsm->u.enable.bytecode_len);
+               if (!bytecode) {
+                       ret = LTTNG_ERR_FILTER_NOMEM;
+                       goto error;
+               }
+               /* Receive var. len. data */
+               DBG("Receiving var len data from client ...");
+               ret = lttcomm_recv_unix_sock(sock, bytecode,
+                               cmd_ctx->lsm->u.enable.bytecode_len);
+               if (ret <= 0) {
+                       DBG("Nothing recv() from client var len data... continuing");
+                       *sock_error = 1;
+                       ret = LTTNG_ERR_FILTER_INVAL;
+                       goto error;
+               }
+
+               if (bytecode->len + sizeof(*bytecode)
+                               != cmd_ctx->lsm->u.enable.bytecode_len) {
+                       free(bytecode);
+                       ret = LTTNG_ERR_FILTER_INVAL;
+                       goto error;
+               }
+
+               ret = cmd_enable_event(cmd_ctx->session, cmd_ctx->lsm->domain.type,
+                               cmd_ctx->lsm->u.enable.channel_name,
+                               &cmd_ctx->lsm->u.enable.event, bytecode, kernel_poll_pipe[1]);
+               break;
+       }
+       case LTTNG_DATA_PENDING:
+       {
+               ret = cmd_data_pending(cmd_ctx->session);
+               break;
+       }
+       default:
+               ret = LTTNG_ERR_UND;
+               break;
+       }
+
+error:
+       if (cmd_ctx->llm == NULL) {
+               DBG("Missing llm structure. Allocating one.");
+               if (setup_lttng_msg(cmd_ctx, 0) < 0) {
+                       goto setup_error;
+               }
+       }
+       /* Set return code */
+       cmd_ctx->llm->ret_code = ret;
+setup_error:
+       if (cmd_ctx->session) {
+               session_unlock(cmd_ctx->session);
+       }
+       if (need_tracing_session) {
+               session_unlock_list();
+       }
+init_setup_error:
+       return ret;
+}
+
+/*
+ * Thread managing health check socket.
+ */
+static void *thread_manage_health(void *data)
+{
+       int sock = -1, new_sock = -1, ret, i, pollfd, err = -1;
+       uint32_t revents, nb_fd;
+       struct lttng_poll_event events;
+       struct lttcomm_health_msg msg;
+       struct lttcomm_health_data reply;
+
+       DBG("[thread] Manage health check started");
+
+       rcu_register_thread();
+
+       /* Create unix socket */
+       sock = lttcomm_create_unix_sock(health_unix_sock_path);
+       if (sock < 0) {
+               ERR("Unable to create health check Unix socket");
+               ret = -1;
+               goto error;
+       }
+
+       /*
+        * Set the CLOEXEC flag. Return code is useless because either way, the
+        * show must go on.
+        */
+       (void) utils_set_fd_cloexec(sock);
+
+       ret = lttcomm_listen_unix_sock(sock);
+       if (ret < 0) {
+               goto error;
+       }
+
+       /*
+        * Pass 2 as size here for the thread quit pipe and client_sock. Nothing
+        * more will be added to this poll set.
+        */
+       ret = create_thread_poll_set(&events, 2);
+       if (ret < 0) {
+               goto error;
+       }
+
+       /* Add the application registration socket */
+       ret = lttng_poll_add(&events, sock, LPOLLIN | LPOLLPRI);
+       if (ret < 0) {
+               goto error;
+       }
+
+       while (1) {
+               DBG("Health check ready");
+
+               /* Inifinite blocking call, waiting for transmission */
+restart:
+               ret = lttng_poll_wait(&events, -1);
+               if (ret < 0) {
+                       /*
+                        * Restart interrupted system call.
+                        */
+                       if (errno == EINTR) {
+                               goto restart;
+                       }
+                       goto error;
+               }
+
+               nb_fd = ret;
+
+               for (i = 0; i < nb_fd; i++) {
+                       /* Fetch once the poll data */
+                       revents = LTTNG_POLL_GETEV(&events, i);
+                       pollfd = LTTNG_POLL_GETFD(&events, i);
+
+                       /* Thread quit pipe has been closed. Killing thread. */
+                       ret = check_thread_quit_pipe(pollfd, revents);
+                       if (ret) {
+                               err = 0;
+                               goto exit;
+                       }
+
+                       /* Event on the registration socket */
+                       if (pollfd == sock) {
+                               if (revents & (LPOLLERR | LPOLLHUP | LPOLLRDHUP)) {
+                                       ERR("Health socket poll error");
+                                       goto error;
+                               }
+                       }
+               }
+
+               new_sock = lttcomm_accept_unix_sock(sock);
+               if (new_sock < 0) {
+                       goto error;
+               }
+
+               /*
+                * Set the CLOEXEC flag. Return code is useless because either way, the
+                * show must go on.
+                */
+               (void) utils_set_fd_cloexec(new_sock);
+
+               DBG("Receiving data from client for health...");
+               ret = lttcomm_recv_unix_sock(new_sock, (void *)&msg, sizeof(msg));
+               if (ret <= 0) {
+                       DBG("Nothing recv() from client... continuing");
+                       ret = close(new_sock);
+                       if (ret) {
+                               PERROR("close");
+                       }
+                       new_sock = -1;
+                       continue;
+               }
+
+               rcu_thread_online();
+
+               switch (msg.component) {
+               case LTTNG_HEALTH_CMD:
+                       reply.ret_code = health_check_state(HEALTH_TYPE_CMD);
+                       break;
+               case LTTNG_HEALTH_APP_MANAGE:
+                       reply.ret_code = health_check_state(HEALTH_TYPE_APP_MANAGE);
+                       break;
+               case LTTNG_HEALTH_APP_REG:
+                       reply.ret_code = health_check_state(HEALTH_TYPE_APP_REG);
+                       break;
+               case LTTNG_HEALTH_KERNEL:
+                       reply.ret_code = health_check_state(HEALTH_TYPE_KERNEL);
+                       break;
+               case LTTNG_HEALTH_CONSUMER:
+                       reply.ret_code = check_consumer_health();
+                       break;
+               case LTTNG_HEALTH_ALL:
+                       reply.ret_code =
+                               health_check_state(HEALTH_TYPE_APP_MANAGE) &&
+                               health_check_state(HEALTH_TYPE_APP_REG) &&
+                               health_check_state(HEALTH_TYPE_CMD) &&
+                               health_check_state(HEALTH_TYPE_KERNEL) &&
+                               check_consumer_health();
+                       break;
+               default:
+                       reply.ret_code = LTTNG_ERR_UND;
+                       break;
+               }
+
+               /*
+                * Flip ret value since 0 is a success and 1 indicates a bad health for
+                * the client where in the sessiond it is the opposite. Again, this is
+                * just to make things easier for us poor developer which enjoy a lot
+                * lazyness.
+                */
+               if (reply.ret_code == 0 || reply.ret_code == 1) {
+                       reply.ret_code = !reply.ret_code;
                }
 
-               /* Copy event list into message payload */
-               memcpy(cmd_ctx->llm->payload, events,
-                               nb_event * sizeof(struct lttng_event));
-
-               free(events);
-
-               ret = LTTCOMM_OK;
-               break;
-       }
-       case LTTNG_LIST_SESSIONS:
-       {
-               unsigned int nr_sessions;
-
-               session_lock_list();
-               nr_sessions = lttng_sessions_count(
-                               LTTNG_SOCK_GET_UID_CRED(&cmd_ctx->creds),
-                               LTTNG_SOCK_GET_GID_CRED(&cmd_ctx->creds));
+               DBG2("Health check return value %d", reply.ret_code);
 
-               ret = setup_lttng_msg(cmd_ctx, sizeof(struct lttng_session) * nr_sessions);
+               ret = send_unix_sock(new_sock, (void *) &reply, sizeof(reply));
                if (ret < 0) {
-                       session_unlock_list();
-                       goto setup_error;
+                       ERR("Failed to send health data back to client");
                }
 
-               /* Filled the session array */
-               list_lttng_sessions((struct lttng_session *)(cmd_ctx->llm->payload),
-                       LTTNG_SOCK_GET_UID_CRED(&cmd_ctx->creds),
-                       LTTNG_SOCK_GET_GID_CRED(&cmd_ctx->creds));
-
-               session_unlock_list();
-
-               ret = LTTCOMM_OK;
-               break;
-       }
-       case LTTNG_CALIBRATE:
-       {
-               ret = cmd_calibrate(cmd_ctx->lsm->domain.type,
-                               &cmd_ctx->lsm->u.calibrate);
-               break;
-       }
-       case LTTNG_REGISTER_CONSUMER:
-       {
-               ret = cmd_register_consumer(cmd_ctx->session, cmd_ctx->lsm->domain.type,
-                               cmd_ctx->lsm->u.reg.path);
-               break;
-       }
-       default:
-               ret = LTTCOMM_UND;
-               break;
+               /* End of transmission */
+               ret = close(new_sock);
+               if (ret) {
+                       PERROR("close");
+               }
+               new_sock = -1;
        }
 
+exit:
 error:
-       if (cmd_ctx->llm == NULL) {
-               DBG("Missing llm structure. Allocating one.");
-               if (setup_lttng_msg(cmd_ctx, 0) < 0) {
-                       goto setup_error;
+       if (err) {
+               ERR("Health error occurred in %s", __func__);
+       }
+       DBG("Health check thread dying");
+       unlink(health_unix_sock_path);
+       if (sock >= 0) {
+               ret = close(sock);
+               if (ret) {
+                       PERROR("close");
                }
        }
-       /* Set return code */
-       cmd_ctx->llm->ret_code = ret;
-setup_error:
-       if (cmd_ctx->session) {
-               session_unlock(cmd_ctx->session);
+       if (new_sock >= 0) {
+               ret = close(new_sock);
+               if (ret) {
+                       PERROR("close");
+               }
        }
-init_setup_error:
-       return ret;
+
+       lttng_poll_clean(&events);
+
+       rcu_unregister_thread();
+       return NULL;
 }
 
 /*
@@ -3679,7 +3158,8 @@ init_setup_error:
  */
 static void *thread_manage_clients(void *data)
 {
-       int sock = -1, ret, i, pollfd;
+       int sock = -1, ret, i, pollfd, err = -1;
+       int sock_error;
        uint32_t revents, nb_fd;
        struct command_ctx *cmd_ctx = NULL;
        struct lttng_poll_event events;
@@ -3688,9 +3168,17 @@ static void *thread_manage_clients(void *data)
 
        rcu_register_thread();
 
+       health_register(HEALTH_TYPE_CMD);
+
+       if (testpoint(thread_manage_clients)) {
+               goto error_testpoint;
+       }
+
+       health_code_update(&health_thread_cmd);
+
        ret = lttcomm_listen_unix_sock(client_sock);
        if (ret < 0) {
-               goto error;
+               goto error_listen;
        }
 
        /*
@@ -3699,7 +3187,7 @@ static void *thread_manage_clients(void *data)
         */
        ret = create_thread_poll_set(&events, 2);
        if (ret < 0) {
-               goto error;
+               goto error_create_poll;
        }
 
        /* Add the application registration socket */
@@ -3715,14 +3203,20 @@ static void *thread_manage_clients(void *data)
                kill(ppid, SIGUSR1);
        }
 
+       if (testpoint(thread_manage_clients_before_loop)) {
+               goto error;
+       }
+
+       health_code_update(&health_thread_cmd);
+
        while (1) {
                DBG("Accepting client command ...");
 
-               nb_fd = LTTNG_POLL_GETNB(&events);
-
                /* Inifinite blocking call, waiting for transmission */
        restart:
+               health_poll_update(&health_thread_cmd);
                ret = lttng_poll_wait(&events, -1);
+               health_poll_update(&health_thread_cmd);
                if (ret < 0) {
                        /*
                         * Restart interrupted system call.
@@ -3733,15 +3227,20 @@ static void *thread_manage_clients(void *data)
                        goto error;
                }
 
+               nb_fd = ret;
+
                for (i = 0; i < nb_fd; i++) {
                        /* Fetch once the poll data */
                        revents = LTTNG_POLL_GETEV(&events, i);
                        pollfd = LTTNG_POLL_GETFD(&events, i);
 
+                       health_code_update(&health_thread_cmd);
+
                        /* Thread quit pipe has been closed. Killing thread. */
                        ret = check_thread_quit_pipe(pollfd, revents);
                        if (ret) {
-                               goto error;
+                               err = 0;
+                               goto exit;
                        }
 
                        /* Event on the registration socket */
@@ -3755,11 +3254,19 @@ static void *thread_manage_clients(void *data)
 
                DBG("Wait for client response");
 
+               health_code_update(&health_thread_cmd);
+
                sock = lttcomm_accept_unix_sock(client_sock);
                if (sock < 0) {
                        goto error;
                }
 
+               /*
+                * Set the CLOEXEC flag. Return code is useless because either way, the
+                * show must go on.
+                */
+               (void) utils_set_fd_cloexec(sock);
+
                /* Set socket option for credentials retrieval */
                ret = lttcomm_setsockopt_creds_unix_sock(sock);
                if (ret < 0) {
@@ -3783,6 +3290,8 @@ static void *thread_manage_clients(void *data)
                cmd_ctx->llm = NULL;
                cmd_ctx->session = NULL;
 
+               health_code_update(&health_thread_cmd);
+
                /*
                 * Data is received from the lttng client. The struct
                 * lttcomm_session_msg (lsm) contains the command and data request of
@@ -3798,10 +3307,12 @@ static void *thread_manage_clients(void *data)
                                PERROR("close");
                        }
                        sock = -1;
-                       free(cmd_ctx);
+                       clean_command_ctx(&cmd_ctx);
                        continue;
                }
 
+               health_code_update(&health_thread_cmd);
+
                // TODO: Validate cmd_ctx including sanity check for
                // security purpose.
 
@@ -3812,18 +3323,29 @@ static void *thread_manage_clients(void *data)
                 * informations for the client. The command context struct contains
                 * everything this function may needs.
                 */
-               ret = process_client_msg(cmd_ctx);
+               ret = process_client_msg(cmd_ctx, sock, &sock_error);
                rcu_thread_offline();
                if (ret < 0) {
+                       if (sock_error) {
+                               ret = close(sock);
+                               if (ret) {
+                                       PERROR("close");
+                               }
+                               sock = -1;
+                       }
                        /*
                         * TODO: Inform client somehow of the fatal error. At
                         * this point, ret < 0 means that a zmalloc failed
-                        * (ENOMEM). Error detected but still accept command.
+                        * (ENOMEM). Error detected but still accept
+                        * command, unless a socket error has been
+                        * detected.
                         */
                        clean_command_ctx(&cmd_ctx);
                        continue;
                }
 
+               health_code_update(&health_thread_cmd);
+
                DBG("Sending response (size: %d, retcode: %s)",
                                cmd_ctx->lttng_msg_size,
                                lttng_strerror(-cmd_ctx->llm->ret_code));
@@ -3840,17 +3362,12 @@ static void *thread_manage_clients(void *data)
                sock = -1;
 
                clean_command_ctx(&cmd_ctx);
+
+               health_code_update(&health_thread_cmd);
        }
 
+exit:
 error:
-       DBG("Client thread dying");
-       unlink(client_unix_sock_path);
-       if (client_sock >= 0) {
-               ret = close(client_sock);
-               if (ret) {
-                       PERROR("close");
-               }
-       }
        if (sock >= 0) {
                ret = close(sock);
                if (ret) {
@@ -3861,6 +3378,26 @@ error:
        lttng_poll_clean(&events);
        clean_command_ctx(&cmd_ctx);
 
+error_listen:
+error_create_poll:
+error_testpoint:
+       unlink(client_unix_sock_path);
+       if (client_sock >= 0) {
+               ret = close(client_sock);
+               if (ret) {
+                       PERROR("close");
+               }
+       }
+
+       if (err) {
+               health_error(&health_thread_cmd);
+               ERR("Health error occurred in %s", __func__);
+       }
+
+       health_unregister();
+
+       DBG("Client thread dying");
+
        rcu_unregister_thread();
        return NULL;
 }
@@ -3891,6 +3428,7 @@ static void usage(void)
        fprintf(stderr, "  -S, --sig-parent                   Send SIGCHLD to parent pid to notify readiness.\n");
        fprintf(stderr, "  -q, --quiet                        No output at all.\n");
        fprintf(stderr, "  -v, --verbose                      Verbose mode. Activate DBG() macro.\n");
+       fprintf(stderr, "  -p, --pidfile FILE                 Write a pid to FILE name overriding the default value.\n");
        fprintf(stderr, "      --verbose-consumer             Verbose mode for consumer. Activate DBG() macro.\n");
        fprintf(stderr, "      --no-kernel                    Disable kernel tracer\n");
 }
@@ -3924,12 +3462,13 @@ static int parse_args(int argc, char **argv)
                { "verbose", 0, 0, 'v' },
                { "verbose-consumer", 0, 0, 'Z' },
                { "no-kernel", 0, 0, 'N' },
+               { "pidfile", 1, 0, 'p' },
                { NULL, 0, 0, 0 }
        };
 
        while (1) {
                int option_index = 0;
-               c = getopt_long(argc, argv, "dhqvVSN" "a:c:g:s:C:E:D:F:Z:u:t",
+               c = getopt_long(argc, argv, "dhqvVSN" "a:c:g:s:C:E:D:F:Z:u:t:p:",
                                long_options, &option_index);
                if (c == -1) {
                        break;
@@ -4006,6 +3545,9 @@ static int parse_args(int argc, char **argv)
                case 'T':
                        consumerd64_libdir = optarg;
                        break;
+               case 'p':
+                       opt_pidfile = optarg;
+                       break;
                default:
                        /* Unknown option or other error.
                         * Error is printed by getopt, just return */
@@ -4036,6 +3578,14 @@ static int init_daemon_socket(void)
                goto end;
        }
 
+       /* Set the cloexec flag */
+       ret = utils_set_fd_cloexec(client_sock);
+       if (ret < 0) {
+               ERR("Unable to set CLOEXEC flag to the client Unix socket (fd: %d). "
+                               "Continuing but note that the consumer daemon will have a "
+                               "reference to this socket on exec()", client_sock);
+       }
+
        /* File permission MUST be 660 */
        ret = chmod(client_unix_sock_path, S_IRUSR | S_IWUSR | S_IRGRP | S_IWGRP);
        if (ret < 0) {
@@ -4052,6 +3602,14 @@ static int init_daemon_socket(void)
                goto end;
        }
 
+       /* Set the cloexec flag */
+       ret = utils_set_fd_cloexec(apps_sock);
+       if (ret < 0) {
+               ERR("Unable to set CLOEXEC flag to the app Unix socket (fd: %d). "
+                               "Continuing but note that the consumer daemon will have a "
+                               "reference to this socket on exec()", apps_sock);
+       }
+
        /* File permission MUST be 666 */
        ret = chmod(apps_unix_sock_path,
                        S_IRUSR | S_IWUSR | S_IRGRP | S_IWGRP | S_IROTH | S_IWOTH);
@@ -4061,6 +3619,9 @@ static int init_daemon_socket(void)
                goto end;
        }
 
+       DBG3("Session daemon client socket %d and application socket %d created",
+                       client_sock, apps_sock);
+
 end:
        umask(old_umask);
        return ret;
@@ -4091,13 +3652,15 @@ static int set_permissions(char *rundir)
        int ret;
        gid_t gid;
 
-       gid = allowed_group();
-       if (gid < 0) {
+       ret = allowed_group();
+       if (ret < 0) {
                WARN("No tracing group detected");
                ret = 0;
                goto end;
        }
 
+       gid = ret;
+
        /* Set lttng run dir */
        ret = chown(rundir, 0, gid);
        if (ret < 0) {
@@ -4146,58 +3709,6 @@ end:
        return ret;
 }
 
-/*
- * Create the pipe used to wake up the kernel thread.
- * Closed in cleanup().
- */
-static int create_kernel_poll_pipe(void)
-{
-       int ret, i;
-
-       ret = pipe(kernel_poll_pipe);
-       if (ret < 0) {
-               PERROR("kernel poll pipe");
-               goto error;
-       }
-
-       for (i = 0; i < 2; i++) {
-               ret = fcntl(kernel_poll_pipe[i], F_SETFD, FD_CLOEXEC);
-               if (ret < 0) {
-                       PERROR("fcntl kernel_poll_pipe");
-                       goto error;
-               }
-       }
-
-error:
-       return ret;
-}
-
-/*
- * Create the application command pipe to wake thread_manage_apps.
- * Closed in cleanup().
- */
-static int create_apps_cmd_pipe(void)
-{
-       int ret, i;
-
-       ret = pipe(apps_cmd_pipe);
-       if (ret < 0) {
-               PERROR("apps cmd pipe");
-               goto error;
-       }
-
-       for (i = 0; i < 2; i++) {
-               ret = fcntl(apps_cmd_pipe[i], F_SETFD, FD_CLOEXEC);
-               if (ret < 0) {
-                       PERROR("fcntl apps_cmd_pipe");
-                       goto error;
-               }
-       }
-
-error:
-       return ret;
-}
-
 /*
  * Create the lttng run directory needed for all global sockets and pipe.
  */
@@ -4252,10 +3763,11 @@ static int set_consumer_sockets(struct consumer_data *consumer_data,
        ret = mkdir(path, S_IRWXU);
        if (ret < 0) {
                if (errno != EEXIST) {
+                       PERROR("mkdir");
                        ERR("Failed to create %s", path);
                        goto error;
                }
-               ret = 0;
+               ret = -1;
        }
 
        /* Create the kconsumerd error unix socket */
@@ -4362,6 +3874,38 @@ static void set_ulimit(void)
        }
 }
 
+/*
+ * Write pidfile using the rundir and opt_pidfile.
+ */
+static void write_pidfile(void)
+{
+       int ret;
+       char pidfile_path[PATH_MAX];
+
+       assert(rundir);
+
+       if (opt_pidfile) {
+               strncpy(pidfile_path, opt_pidfile, sizeof(pidfile_path));
+       } else {
+               /* Build pidfile path from rundir and opt_pidfile. */
+               ret = snprintf(pidfile_path, sizeof(pidfile_path), "%s/"
+                               DEFAULT_LTTNG_SESSIOND_PIDFILE, rundir);
+               if (ret < 0) {
+                       PERROR("snprintf pidfile path");
+                       goto error;
+               }
+       }
+
+       /*
+        * Create pid file in rundir. Return value is of no importance. The
+        * execution will continue even though we are not able to write the file.
+        */
+       (void) utils_create_pid_file(getpid(), pidfile_path);
+
+error:
+       return;
+}
+
 /*
  * main
  */
@@ -4369,32 +3913,47 @@ int main(int argc, char **argv)
 {
        int ret = 0;
        void *status;
-       const char *home_path;
+       const char *home_path, *env_app_timeout;
 
        init_kernel_workarounds();
 
        rcu_register_thread();
 
-       /* Create thread quit pipe */
-       if ((ret = init_thread_quit_pipe()) < 0) {
-               goto error;
-       }
-
        setup_consumerd_path();
 
        /* Parse arguments */
        progname = argv[0];
-       if ((ret = parse_args(argc, argv) < 0)) {
+       if ((ret = parse_args(argc, argv)) < 0) {
                goto error;
        }
 
        /* Daemonize */
        if (opt_daemon) {
+               int i;
+
+               /*
+                * fork
+                * child: setsid, close FD 0, 1, 2, chdir /
+                * parent: exit (if fork is successful)
+                */
                ret = daemon(0, 0);
                if (ret < 0) {
                        PERROR("daemon");
                        goto error;
                }
+               /*
+                * We are in the child. Make sure all other file
+                * descriptors are closed, in case we are called with
+                * more opened file descriptors than the standard ones.
+                */
+               for (i = 3; i < sysconf(_SC_OPEN_MAX); i++) {
+                       (void) close(i);
+               }
+       }
+
+       /* Create thread quit pipe */
+       if ((ret = init_thread_quit_pipe()) < 0) {
+               goto error;
        }
 
        /* Check if daemon is UID = 0 */
@@ -4425,6 +3984,11 @@ int main(int argc, char **argv)
                                        DEFAULT_GLOBAL_APPS_WAIT_SHM_PATH);
                }
 
+               if (strlen(health_unix_sock_path) == 0) {
+                       snprintf(health_unix_sock_path, sizeof(health_unix_sock_path),
+                                       DEFAULT_GLOBAL_HEALTH_UNIX_SOCK);
+               }
+
                /* Setup kernel consumerd path */
                snprintf(kconsumer_data.err_unix_sock_path, PATH_MAX,
                                DEFAULT_KCONSUMERD_ERR_SOCK_PATH, rundir);
@@ -4475,8 +4039,18 @@ int main(int argc, char **argv)
                        snprintf(wait_shm_path, PATH_MAX,
                                        DEFAULT_HOME_APPS_WAIT_SHM_PATH, geteuid());
                }
+
+               /* Set health check Unix path */
+               if (strlen(health_unix_sock_path) == 0) {
+                       snprintf(health_unix_sock_path, sizeof(health_unix_sock_path),
+                                       DEFAULT_HOME_HEALTH_UNIX_SOCK, home_path);
+               }
        }
 
+       /* Set consumer initial state */
+       kernel_consumerd_state = CONSUMER_STOPPED;
+       ust_consumerd_state = CONSUMER_STOPPED;
+
        DBG("Client socket path %s", client_unix_sock_path);
        DBG("Application socket path %s", apps_unix_sock_path);
        DBG("LTTng run directory path: %s", rundir);
@@ -4515,6 +4089,12 @@ int main(int argc, char **argv)
                goto error;
        }
 
+       /*
+        * Init UST app hash table. Alloc hash table before this point since
+        * cleanup() can get called after that point.
+        */
+       ust_app_ht_alloc();
+
        /* After this point, we can safely call cleanup() with "goto exit" */
 
        /*
@@ -4537,6 +4117,8 @@ int main(int argc, char **argv)
                /* Set ulimit for open files */
                set_ulimit();
        }
+       /* init lttng_fd tracking must be done after set_ulimit. */
+       lttng_fd_init();
 
        ret = set_consumer_sockets(&ustconsumer64_data, rundir);
        if (ret < 0) {
@@ -4568,21 +4150,20 @@ int main(int argc, char **argv)
        }
 
        /* Setup the kernel pipe for waking up the kernel thread */
-       if ((ret = create_kernel_poll_pipe()) < 0) {
-               goto exit;
+       if (is_root && !opt_no_kernel) {
+               if ((ret = utils_create_pipe_cloexec(kernel_poll_pipe)) < 0) {
+                       goto exit;
+               }
        }
 
        /* Setup the thread apps communication pipe. */
-       if ((ret = create_apps_cmd_pipe()) < 0) {
+       if ((ret = utils_create_pipe_cloexec(apps_cmd_pipe)) < 0) {
                goto exit;
        }
 
        /* Init UST command queue. */
        cds_wfq_init(&ust_cmd_queue.queue);
 
-       /* Init UST app hash table */
-       ust_app_ht_alloc();
-
        /*
         * Get session list pointer. This pointer MUST NOT be free(). This list is
         * statically declared in session.c
@@ -4592,6 +4173,26 @@ int main(int argc, char **argv)
        /* Set up max poll set size */
        lttng_poll_set_max_size();
 
+       cmd_init();
+
+       /* Check for the application socket timeout env variable. */
+       env_app_timeout = getenv(DEFAULT_APP_SOCKET_TIMEOUT_ENV);
+       if (env_app_timeout) {
+               app_socket_timeout = atoi(env_app_timeout);
+       } else {
+               app_socket_timeout = DEFAULT_APP_SOCKET_RW_TIMEOUT;
+       }
+
+       write_pidfile();
+
+       /* Create thread to manage the client socket */
+       ret = pthread_create(&health_thread, NULL,
+                       thread_manage_health, (void *) NULL);
+       if (ret != 0) {
+               PERROR("pthread_create health");
+               goto exit_health;
+       }
+
        /* Create thread to manage the client socket */
        ret = pthread_create(&client_thread, NULL,
                        thread_manage_clients, (void *) NULL);
@@ -4624,18 +4225,21 @@ int main(int argc, char **argv)
                goto exit_apps;
        }
 
-       /* Create kernel thread to manage kernel event */
-       ret = pthread_create(&kernel_thread, NULL,
-                       thread_manage_kernel, (void *) NULL);
-       if (ret != 0) {
-               PERROR("pthread_create kernel");
-               goto exit_kernel;
-       }
+       /* Don't start this thread if kernel tracing is not requested nor root */
+       if (is_root && !opt_no_kernel) {
+               /* Create kernel thread to manage kernel event */
+               ret = pthread_create(&kernel_thread, NULL,
+                               thread_manage_kernel, (void *) NULL);
+               if (ret != 0) {
+                       PERROR("pthread_create kernel");
+                       goto exit_kernel;
+               }
 
-       ret = pthread_join(kernel_thread, &status);
-       if (ret != 0) {
-               PERROR("pthread_join");
-               goto error;     /* join error, exit without cleanup */
+               ret = pthread_join(kernel_thread, &status);
+               if (ret != 0) {
+                       PERROR("pthread_join");
+                       goto error;     /* join error, exit without cleanup */
+               }
        }
 
 exit_kernel:
@@ -4672,7 +4276,26 @@ exit_dispatch:
                goto error;     /* join error, exit without cleanup */
        }
 
+       ret = join_consumer_thread(&ustconsumer32_data);
+       if (ret != 0) {
+               PERROR("join_consumer ust32");
+               goto error;     /* join error, exit without cleanup */
+       }
+
+       ret = join_consumer_thread(&ustconsumer64_data);
+       if (ret != 0) {
+               PERROR("join_consumer ust64");
+               goto error;     /* join error, exit without cleanup */
+       }
+
 exit_client:
+       ret = pthread_join(health_thread, &status);
+       if (ret != 0) {
+               PERROR("pthread_join health thread");
+               goto error;     /* join error, exit without cleanup */
+       }
+
+exit_health:
 exit:
        /*
         * cleanup() is called when no other thread is running.
This page took 0.10327 seconds and 4 git commands to generate.