Fix: report error to client on consumerd error
[lttng-tools.git] / src / bin / lttng-sessiond / main.c
index 8654031f1becce5d4fa497ddba1c70b35850d25f..7327c3cb2262ddf4337b46c08f2d02160f1136e7 100644 (file)
@@ -2,18 +2,18 @@
  * Copyright (C) 2011 - David Goulet <david.goulet@polymtl.ca>
  *                      Mathieu Desnoyers <mathieu.desnoyers@efficios.com>
  *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms of the GNU General Public License as published by the Free
- * Software Foundation; only version 2 of the License.
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License, version 2 only,
+ * as published by the Free Software Foundation.
  *
- * This program is distributed in the hope that it will be useful, but WITHOUT
- * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
- * more details.
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
  *
- * You should have received a copy of the GNU General Public License along with
- * this program; if not, write to the Free Software Foundation, Inc., 59 Temple
- * Place - Suite 330, Boston, MA  02111-1307, USA.
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, write to the Free Software Foundation, Inc.,
+ * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
  */
 
 #define _GNU_SOURCE
@@ -34,6 +34,7 @@
 #include <sys/types.h>
 #include <sys/wait.h>
 #include <urcu/futex.h>
+#include <urcu/uatomic.h>
 #include <unistd.h>
 #include <config.h>
 
@@ -54,6 +55,7 @@
 #include "shm.h"
 #include "ust-ctl.h"
 #include "utils.h"
+#include "fd-limit.h"
 
 #define CONSUMERD_FILE "lttng-consumerd"
 
@@ -81,14 +83,10 @@ const char default_tracing_group[] = DEFAULT_TRACING_GROUP;
 const char default_ust_sock_dir[] = DEFAULT_UST_SOCK_DIR;
 const char default_global_apps_pipe[] = DEFAULT_GLOBAL_APPS_PIPE;
 
-/* Variables */
-int opt_verbose;    /* Not static for lttngerr.h */
-int opt_verbose_consumer;    /* Not static for lttngerr.h */
-int opt_quiet;      /* Not static for lttngerr.h */
-
 const char *progname;
 const char *opt_tracing_group;
 static int opt_sig_parent;
+static int opt_verbose_consumer;
 static int opt_daemon;
 static int opt_no_kernel;
 static int is_root;                    /* Set to 1 if the daemon is running as root */
@@ -100,16 +98,22 @@ static struct consumer_data kconsumer_data = {
        .type = LTTNG_CONSUMER_KERNEL,
        .err_unix_sock_path = DEFAULT_KCONSUMERD_ERR_SOCK_PATH,
        .cmd_unix_sock_path = DEFAULT_KCONSUMERD_CMD_SOCK_PATH,
+       .err_sock = -1,
+       .cmd_sock = -1,
 };
 static struct consumer_data ustconsumer64_data = {
        .type = LTTNG_CONSUMER64_UST,
        .err_unix_sock_path = DEFAULT_USTCONSUMERD64_ERR_SOCK_PATH,
        .cmd_unix_sock_path = DEFAULT_USTCONSUMERD64_CMD_SOCK_PATH,
+       .err_sock = -1,
+       .cmd_sock = -1,
 };
 static struct consumer_data ustconsumer32_data = {
        .type = LTTNG_CONSUMER32_UST,
        .err_unix_sock_path = DEFAULT_USTCONSUMERD32_ERR_SOCK_PATH,
        .cmd_unix_sock_path = DEFAULT_USTCONSUMERD32_CMD_SOCK_PATH,
+       .err_sock = -1,
+       .cmd_sock = -1,
 };
 
 static int dispatch_thread_exit;
@@ -122,22 +126,22 @@ static char client_unix_sock_path[PATH_MAX];
 static char wait_shm_path[PATH_MAX];
 
 /* Sockets and FDs */
-static int client_sock;
-static int apps_sock;
-static int kernel_tracer_fd;
-static int kernel_poll_pipe[2];
+static int client_sock = -1;
+static int apps_sock = -1;
+static int kernel_tracer_fd = -1;
+static int kernel_poll_pipe[2] = { -1, -1 };
 
 /*
  * Quit pipe for all threads. This permits a single cancellation point
  * for all threads when receiving an event on the pipe.
  */
-static int thread_quit_pipe[2];
+static int thread_quit_pipe[2] = { -1, -1 };
 
 /*
  * This pipe is used to inform the thread managing application communication
  * that a command is queued and ready to be processed.
  */
-static int apps_cmd_pipe[2];
+static int apps_cmd_pipe[2] = { -1, -1 };
 
 /* Pthread, Mutexes and Semaphores */
 static pthread_t apps_thread;
@@ -176,6 +180,40 @@ static const char *consumerd64_bin = CONFIG_CONSUMERD64_BIN;
 static const char *consumerd32_libdir = CONFIG_CONSUMERD32_LIBDIR;
 static const char *consumerd64_libdir = CONFIG_CONSUMERD64_LIBDIR;
 
+/*
+ * Consumer daemon state which is changed when spawning it, killing it or in
+ * case of a fatal error.
+ */
+enum consumerd_state {
+       CONSUMER_STARTED = 1,
+       CONSUMER_STOPPED = 2,
+       CONSUMER_ERROR   = 3,
+};
+
+/*
+ * This consumer daemon state is used to validate if a client command will be
+ * able to reach the consumer. If not, the client is informed. For instance,
+ * doing a "lttng start" when the consumer state is set to ERROR will return an
+ * error to the client.
+ *
+ * The following example shows a possible race condition of this scheme:
+ *
+ * consumer thread error happens
+ *                                    client cmd arrives
+ *                                    client cmd checks state -> still OK
+ * consumer thread exit, sets error
+ *                                    client cmd try to talk to consumer
+ *                                    ...
+ *
+ * However, since the consumer is a different daemon, we have no way of making
+ * sure the command will reach it safely even with this state flag. This is why
+ * we consider that up to the state validation during command processing, the
+ * command is safe. After that, we can not guarantee the correctness of the
+ * client request vis-a-vis the consumer.
+ */
+static enum consumerd_state ust_consumerd_state;
+static enum consumerd_state kernel_consumerd_state;
+
 static
 void setup_consumerd_path(void)
 {
@@ -332,7 +370,8 @@ static void teardown_kernel_session(struct ltt_session *session)
         * If a custom kernel consumer was registered, close the socket before
         * tearing down the complete kernel session structure
         */
-       if (session->kernel_session->consumer_fd != kconsumer_data.cmd_sock) {
+       if (kconsumer_data.cmd_sock >= 0 &&
+                       session->kernel_session->consumer_fd != kconsumer_data.cmd_sock) {
                lttcomm_close_unix_sock(session->kernel_session->consumer_fd);
        }
 
@@ -386,7 +425,7 @@ static void stop_threads(void)
  */
 static void cleanup(void)
 {
-       int ret;
+       int ret, i;
        char *cmd;
        struct ltt_session *sess, *stmp;
 
@@ -427,13 +466,43 @@ static void cleanup(void)
 
        if (is_root && !opt_no_kernel) {
                DBG2("Closing kernel fd");
-               close(kernel_tracer_fd);
+               if (kernel_tracer_fd >= 0) {
+                       ret = close(kernel_tracer_fd);
+                       if (ret) {
+                               PERROR("close");
+                       }
+               }
                DBG("Unloading kernel modules");
                modprobe_remove_lttng_all();
        }
 
-       close(thread_quit_pipe[0]);
-       close(thread_quit_pipe[1]);
+       /*
+        * Closing all pipes used for communication between threads.
+        */
+       for (i = 0; i < 2; i++) {
+               if (kernel_poll_pipe[i] >= 0) {
+                       ret = close(kernel_poll_pipe[i]);
+                       if (ret) {
+                               PERROR("close");
+                       }
+               }
+       }
+       for (i = 0; i < 2; i++) {
+               if (thread_quit_pipe[i] >= 0) {
+                       ret = close(thread_quit_pipe[i]);
+                       if (ret) {
+                               PERROR("close");
+                       }
+               }
+       }
+       for (i = 0; i < 2; i++) {
+               if (apps_cmd_pipe[i] >= 0) {
+                       ret = close(apps_cmd_pipe[i]);
+                       if (ret) {
+                               PERROR("close");
+                       }
+               }
+       }
 
        /* <fun> */
        DBG("%c[%d;%dm*** assert failed :-) *** ==> %c[%dm%c[%d;%dm"
@@ -497,7 +566,7 @@ static int send_kconsumer_channel_streams(struct consumer_data *consumer_data,
        DBG("Sending channel %d to consumer", lkm.u.channel.channel_key);
        ret = lttcomm_send_unix_sock(sock, &lkm, sizeof(lkm));
        if (ret < 0) {
-               perror("send consumer channel");
+               PERROR("send consumer channel");
                goto error;
        }
 
@@ -519,12 +588,12 @@ static int send_kconsumer_channel_streams(struct consumer_data *consumer_data,
                DBG("Sending stream %d to consumer", lkm.u.stream.stream_key);
                ret = lttcomm_send_unix_sock(sock, &lkm, sizeof(lkm));
                if (ret < 0) {
-                       perror("send consumer stream");
+                       PERROR("send consumer stream");
                        goto error;
                }
                ret = lttcomm_send_fds_unix_sock(sock, &stream->fd, 1);
                if (ret < 0) {
-                       perror("send consumer stream ancillary data");
+                       PERROR("send consumer stream ancillary data");
                        goto error;
                }
        }
@@ -550,21 +619,21 @@ static int send_kconsumer_session_streams(struct consumer_data *consumer_data,
 
        DBG("Sending metadata stream fd");
 
-       /* Extra protection. It's NOT supposed to be set to 0 at this point */
-       if (session->consumer_fd == 0) {
+       /* Extra protection. It's NOT supposed to be set to -1 at this point */
+       if (session->consumer_fd < 0) {
                session->consumer_fd = consumer_data->cmd_sock;
        }
 
-       if (session->metadata_stream_fd != 0) {
+       if (session->metadata_stream_fd >= 0) {
                /* Send metadata channel fd */
                lkm.cmd_type = LTTNG_CONSUMER_ADD_CHANNEL;
                lkm.u.channel.channel_key = session->metadata->fd;
                lkm.u.channel.max_sb_size = session->metadata->conf->attr.subbuf_size;
                lkm.u.channel.mmap_len = 0;     /* for kernel */
-               DBG("Sending metadata channel %d to consumer", lkm.u.stream.stream_key);
+               DBG("Sending metadata channel %d to consumer", lkm.u.channel.channel_key);
                ret = lttcomm_send_unix_sock(sock, &lkm, sizeof(lkm));
                if (ret < 0) {
-                       perror("send consumer channel");
+                       PERROR("send consumer channel");
                        goto error;
                }
 
@@ -582,12 +651,12 @@ static int send_kconsumer_session_streams(struct consumer_data *consumer_data,
                DBG("Sending metadata stream %d to consumer", lkm.u.stream.stream_key);
                ret = lttcomm_send_unix_sock(sock, &lkm, sizeof(lkm));
                if (ret < 0) {
-                       perror("send consumer stream");
+                       PERROR("send consumer stream");
                        goto error;
                }
                ret = lttcomm_send_fds_unix_sock(sock, &session->metadata_stream_fd, 1);
                if (ret < 0) {
-                       perror("send consumer stream");
+                       PERROR("send consumer stream");
                        goto error;
                }
        }
@@ -648,7 +717,7 @@ static int setup_lttng_msg(struct command_ctx *cmd_ctx, size_t size)
 
        cmd_ctx->llm = zmalloc(sizeof(struct lttcomm_lttng_msg) + buf_size);
        if (cmd_ctx->llm == NULL) {
-               perror("zmalloc");
+               PERROR("zmalloc");
                ret = -ENOMEM;
                goto error;
        }
@@ -729,8 +798,8 @@ static int update_kernel_stream(struct consumer_data *consumer_data, int fd)
                        continue;
                }
 
-               /* This is not suppose to be 0 but this is an extra security check */
-               if (session->kernel_session->consumer_fd == 0) {
+               /* This is not suppose to be -1 but this is an extra security check */
+               if (session->kernel_session->consumer_fd < 0) {
                        session->kernel_session->consumer_fd = consumer_data->cmd_sock;
                }
 
@@ -808,7 +877,7 @@ static void *thread_manage_kernel(void *data)
 
        ret = create_thread_poll_set(&events, 2);
        if (ret < 0) {
-               goto error;
+               goto error_poll_create;
        }
 
        ret = lttng_poll_add(&events, kernel_poll_pipe[0], LPOLLIN);
@@ -893,12 +962,9 @@ static void *thread_manage_kernel(void *data)
        }
 
 error:
-       DBG("Kernel thread dying");
-       close(kernel_poll_pipe[0]);
-       close(kernel_poll_pipe[1]);
-
        lttng_poll_clean(&events);
-
+error_poll_create:
+       DBG("Kernel thread dying");
        return NULL;
 }
 
@@ -907,7 +973,7 @@ error:
  */
 static void *thread_manage_consumer(void *data)
 {
-       int sock = 0, i, ret, pollfd;
+       int sock = -1, i, ret, pollfd;
        uint32_t revents, nb_fd;
        enum lttcomm_return_code code;
        struct lttng_poll_event events;
@@ -917,7 +983,7 @@ static void *thread_manage_consumer(void *data)
 
        ret = lttcomm_listen_unix_sock(consumer_data->err_sock);
        if (ret < 0) {
-               goto error;
+               goto error_listen;
        }
 
        /*
@@ -926,7 +992,7 @@ static void *thread_manage_consumer(void *data)
         */
        ret = create_thread_poll_set(&events, 2);
        if (ret < 0) {
-               goto error;
+               goto error_poll;
        }
 
        ret = lttng_poll_add(&events, consumer_data->err_sock, LPOLLIN | LPOLLRDHUP);
@@ -1058,16 +1124,44 @@ restart_poll:
        ERR("consumer return code : %s", lttcomm_get_readable_code(-code));
 
 error:
-       DBG("consumer thread dying");
-       close(consumer_data->err_sock);
-       close(consumer_data->cmd_sock);
-       close(sock);
+       /* Immediately set the consumerd state to stopped */
+       if (consumer_data->type == LTTNG_CONSUMER_KERNEL) {
+               uatomic_set(&kernel_consumerd_state, CONSUMER_ERROR);
+       } else if (consumer_data->type == LTTNG_CONSUMER64_UST ||
+                       consumer_data->type == LTTNG_CONSUMER32_UST) {
+               uatomic_set(&ust_consumerd_state, CONSUMER_ERROR);
+       } else {
+               /* Code flow error... */
+               assert(0);
+       }
+
+       if (consumer_data->err_sock >= 0) {
+               ret = close(consumer_data->err_sock);
+               if (ret) {
+                       PERROR("close");
+               }
+       }
+       if (consumer_data->cmd_sock >= 0) {
+               ret = close(consumer_data->cmd_sock);
+               if (ret) {
+                       PERROR("close");
+               }
+       }
+       if (sock >= 0) {
+               ret = close(sock);
+               if (ret) {
+                       PERROR("close");
+               }
+       }
 
        unlink(consumer_data->err_unix_sock_path);
        unlink(consumer_data->cmd_unix_sock_path);
        consumer_data->pid = 0;
 
        lttng_poll_clean(&events);
+error_poll:
+error_listen:
+       DBG("consumer thread cleanup completed");
 
        return NULL;
 }
@@ -1089,7 +1183,7 @@ static void *thread_manage_apps(void *data)
 
        ret = create_thread_poll_set(&events, 2);
        if (ret < 0) {
-               goto error;
+               goto error_poll_create;
        }
 
        ret = lttng_poll_add(&events, apps_cmd_pipe[0], LPOLLIN | LPOLLRDHUP);
@@ -1138,7 +1232,7 @@ static void *thread_manage_apps(void *data)
                                        /* Empty pipe */
                                        ret = read(apps_cmd_pipe[0], &ust_cmd, sizeof(ust_cmd));
                                        if (ret < 0 || ret < sizeof(ust_cmd)) {
-                                               perror("read apps cmd pipe");
+                                               PERROR("read apps cmd pipe");
                                                goto error;
                                        }
 
@@ -1209,12 +1303,9 @@ static void *thread_manage_apps(void *data)
        }
 
 error:
-       DBG("Application communication apps dying");
-       close(apps_cmd_pipe[0]);
-       close(apps_cmd_pipe[1]);
-
        lttng_poll_clean(&events);
-
+error_poll_create:
+       DBG("Application communication apps thread cleanup complete");
        rcu_thread_offline();
        rcu_unregister_thread();
        return NULL;
@@ -1261,7 +1352,7 @@ static void *thread_dispatch_ust_registration(void *data)
                        ret = write(apps_cmd_pipe[1], ust_cmd,
                                        sizeof(struct ust_command));
                        if (ret < 0) {
-                               perror("write apps cmd pipe");
+                               PERROR("write apps cmd pipe");
                                if (errno == EBADF) {
                                        /*
                                         * We can't inform the application thread to process
@@ -1289,7 +1380,7 @@ error:
  */
 static void *thread_registration_apps(void *data)
 {
-       int sock = 0, i, ret, pollfd;
+       int sock = -1, i, ret, pollfd;
        uint32_t revents, nb_fd;
        struct lttng_poll_event events;
        /*
@@ -1302,7 +1393,7 @@ static void *thread_registration_apps(void *data)
 
        ret = lttcomm_listen_unix_sock(apps_sock);
        if (ret < 0) {
-               goto error;
+               goto error_listen;
        }
 
        /*
@@ -1311,13 +1402,13 @@ static void *thread_registration_apps(void *data)
         */
        ret = create_thread_poll_set(&events, 2);
        if (ret < 0) {
-               goto error;
+               goto error_create_poll;
        }
 
        /* Add the application registration socket */
        ret = lttng_poll_add(&events, apps_sock, LPOLLIN | LPOLLRDHUP);
        if (ret < 0) {
-               goto error;
+               goto error_poll_add;
        }
 
        /* Notify all applications to register */
@@ -1371,7 +1462,7 @@ static void *thread_registration_apps(void *data)
                                        /* Create UST registration command for enqueuing */
                                        ust_cmd = zmalloc(sizeof(struct ust_command));
                                        if (ust_cmd == NULL) {
-                                               perror("ust command zmalloc");
+                                               PERROR("ust command zmalloc");
                                                goto error;
                                        }
 
@@ -1379,20 +1470,37 @@ static void *thread_registration_apps(void *data)
                                         * Using message-based transmissions to ensure we don't
                                         * have to deal with partially received messages.
                                         */
+                                       ret = lttng_fd_get(LTTNG_FD_APPS, 1);
+                                       if (ret < 0) {
+                                               ERR("Exhausted file descriptors allowed for applications.");
+                                               free(ust_cmd);
+                                               ret = close(sock);
+                                               if (ret) {
+                                                       PERROR("close");
+                                               }
+                                               sock = -1;
+                                               continue;
+                                       }
                                        ret = lttcomm_recv_unix_sock(sock, &ust_cmd->reg_msg,
                                                        sizeof(struct ust_register_msg));
                                        if (ret < 0 || ret < sizeof(struct ust_register_msg)) {
                                                if (ret < 0) {
-                                                       perror("lttcomm_recv_unix_sock register apps");
+                                                       PERROR("lttcomm_recv_unix_sock register apps");
                                                } else {
                                                        ERR("Wrong size received on apps register");
                                                }
                                                free(ust_cmd);
-                                               close(sock);
+                                               ret = close(sock);
+                                               if (ret) {
+                                                       PERROR("close");
+                                               }
+                                               lttng_fd_put(LTTNG_FD_APPS, 1);
+                                               sock = -1;
                                                continue;
                                        }
 
                                        ust_cmd->sock = sock;
+                                       sock = -1;
 
                                        DBG("UST registration received with pid:%d ppid:%d uid:%d"
                                                        " gid:%d sock:%d name:%s (version %d.%d)",
@@ -1418,16 +1526,29 @@ static void *thread_registration_apps(void *data)
        }
 
 error:
-       DBG("UST Registration thread dying");
-
        /* Notify that the registration thread is gone */
        notify_ust_apps(0);
 
-       close(apps_sock);
-       close(sock);
+       if (apps_sock >= 0) {
+               ret = close(apps_sock);
+               if (ret) {
+                       PERROR("close");
+               }
+       }
+       if (sock >= 0) {
+               ret = close(sock);
+               if (ret) {
+                       PERROR("close");
+               }
+               lttng_fd_put(LTTNG_FD_APPS, 1);
+       }
        unlink(apps_unix_sock_path);
 
+error_poll_add:
        lttng_poll_clean(&events);
+error_listen:
+error_create_poll:
+       DBG("UST Registration thread cleanup complete");
 
        return NULL;
 }
@@ -1667,17 +1788,17 @@ static pid_t spawn_consumerd(struct consumer_data *consumer_data)
                        break;
                }
                default:
-                       perror("unknown consumer type");
+                       PERROR("unknown consumer type");
                        exit(EXIT_FAILURE);
                }
                if (errno != 0) {
-                       perror("kernel start consumer exec");
+                       PERROR("kernel start consumer exec");
                }
                exit(EXIT_FAILURE);
        } else if (pid > 0) {
                ret = pid;
        } else {
-               perror("start consumer fork");
+               PERROR("start consumer fork");
                ret = -errno;
        }
 error:
@@ -1768,20 +1889,30 @@ static int init_kernel_tracer(void)
 
 error_version:
        modprobe_remove_lttng_control();
-       close(kernel_tracer_fd);
-       kernel_tracer_fd = 0;
+       ret = close(kernel_tracer_fd);
+       if (ret) {
+               PERROR("close");
+       }
+       kernel_tracer_fd = -1;
        return LTTCOMM_KERN_VERSION;
 
 error_modules:
-       close(kernel_tracer_fd);
+       ret = close(kernel_tracer_fd);
+       if (ret) {
+               PERROR("close");
+       }
 
 error_open:
        modprobe_remove_lttng_control();
 
 error:
        WARN("No kernel tracer available");
-       kernel_tracer_fd = 0;
-       return LTTCOMM_KERN_NA;
+       kernel_tracer_fd = -1;
+       if (!is_root) {
+               return LTTCOMM_NEED_ROOT_SESSIOND;
+       } else {
+               return LTTCOMM_KERN_NA;
+       }
 }
 
 /*
@@ -1794,10 +1925,10 @@ static int init_kernel_tracing(struct ltt_kernel_session *session)
        if (session->consumer_fds_sent == 0) {
                /*
                 * Assign default kernel consumer socket if no consumer assigned to the
-                * kernel session. At this point, it's NOT suppose to be 0 but this is
+                * kernel session. At this point, it's NOT supposed to be -1 but this is
                 * an extra security check.
                 */
-               if (session->consumer_fd == 0) {
+               if (session->consumer_fd < 0) {
                        session->consumer_fd = kconsumer_data.cmd_sock;
                }
 
@@ -1885,7 +2016,7 @@ static int create_kernel_session(struct ltt_session *session)
        }
 
        /* Set kernel consumer socket fd */
-       if (kconsumer_data.cmd_sock) {
+       if (kconsumer_data.cmd_sock >= 0) {
                session->kernel_session->consumer_fd = kconsumer_data.cmd_sock;
        }
 
@@ -2774,7 +2905,8 @@ static int cmd_start_trace(struct ltt_session *session)
        usess = session->ust_session;
 
        if (session->enabled) {
-               ret = LTTCOMM_UST_START_FAIL;
+               /* Already started. */
+               ret = LTTCOMM_TRACE_ALREADY_STARTED;
                goto error;
        }
 
@@ -2794,7 +2926,7 @@ static int cmd_start_trace(struct ltt_session *session)
                }
 
                /* Open kernel metadata stream */
-               if (ksession->metadata_stream_fd == 0) {
+               if (ksession->metadata_stream_fd < 0) {
                        ret = kernel_open_metadata_stream(ksession);
                        if (ret < 0) {
                                ERR("Kernel create metadata stream failed");
@@ -2866,7 +2998,7 @@ static int cmd_stop_trace(struct ltt_session *session)
        usess = session->ust_session;
 
        if (!session->enabled) {
-               ret = LTTCOMM_UST_STOP_FAIL;
+               ret = LTTCOMM_TRACE_ALREADY_STOPPED;
                goto error;
        }
 
@@ -2951,7 +3083,7 @@ static int cmd_destroy_session(struct ltt_session *session, char *name)
         */
        ret = notify_thread_pipe(kernel_poll_pipe[1]);
        if (ret < 0) {
-               perror("write kernel poll pipe");
+               PERROR("write kernel poll pipe");
        }
 
        ret = session_destroy(session);
@@ -3191,7 +3323,11 @@ static int process_client_msg(struct command_ctx *cmd_ctx)
 
        if (opt_no_kernel && need_domain
                        && cmd_ctx->lsm->domain.type == LTTNG_DOMAIN_KERNEL) {
-               ret = LTTCOMM_KERN_NA;
+               if (!is_root) {
+                       ret = LTTCOMM_NEED_ROOT_SESSIOND;
+               } else {
+                       ret = LTTCOMM_KERN_NA;
+               }
                goto error;
        }
 
@@ -3226,9 +3362,13 @@ static int process_client_msg(struct command_ctx *cmd_ctx)
                break;
        default:
                DBG("Getting session %s by name", cmd_ctx->lsm->session.name);
+               /*
+                * We keep the session list lock across _all_ commands
+                * for now, because the per-session lock does not
+                * handle teardown properly.
+                */
                session_lock_list();
                cmd_ctx->session = session_find_by_name(cmd_ctx->lsm->session.name);
-               session_unlock_list();
                if (cmd_ctx->session == NULL) {
                        if (cmd_ctx->lsm->session.name != NULL) {
                                ret = LTTCOMM_SESS_NOT_FOUND;
@@ -3253,12 +3393,12 @@ static int process_client_msg(struct command_ctx *cmd_ctx)
        switch (cmd_ctx->lsm->domain.type) {
        case LTTNG_DOMAIN_KERNEL:
                if (!is_root) {
-                       ret = LTTCOMM_KERN_NA;
+                       ret = LTTCOMM_NEED_ROOT_SESSIOND;
                        goto error;
                }
 
                /* Kernel tracer check */
-               if (kernel_tracer_fd == 0) {
+               if (kernel_tracer_fd == -1) {
                        /* Basically, load kernel tracer modules */
                        ret = init_kernel_tracer();
                        if (ret != 0) {
@@ -3266,6 +3406,12 @@ static int process_client_msg(struct command_ctx *cmd_ctx)
                        }
                }
 
+               /* Consumer is in an ERROR state. Report back to client */
+               if (uatomic_read(&kernel_consumerd_state) == CONSUMER_ERROR) {
+                       ret = LTTCOMM_NO_KERNCONSUMERD;
+                       goto error;
+               }
+
                /* Need a session for kernel command */
                if (need_tracing_session) {
                        if (cmd_ctx->session->kernel_session == NULL) {
@@ -3286,13 +3432,21 @@ static int process_client_msg(struct command_ctx *cmd_ctx)
                                        ret = LTTCOMM_KERN_CONSUMER_FAIL;
                                        goto error;
                                }
+                               uatomic_set(&kernel_consumerd_state, CONSUMER_STARTED);
                        } else {
                                pthread_mutex_unlock(&kconsumer_data.pid_mutex);
                        }
                }
+
                break;
        case LTTNG_DOMAIN_UST:
        {
+               /* Consumer is in an ERROR state. Report back to client */
+               if (uatomic_read(&ust_consumerd_state) == CONSUMER_ERROR) {
+                       ret = LTTCOMM_NO_USTCONSUMERD;
+                       goto error;
+               }
+
                if (need_tracing_session) {
                        if (cmd_ctx->session->ust_session == NULL) {
                                ret = create_ust_session(cmd_ctx->session,
@@ -3316,6 +3470,7 @@ static int process_client_msg(struct command_ctx *cmd_ctx)
                                }
 
                                ust_consumerd64_fd = ustconsumer64_data.cmd_sock;
+                               uatomic_set(&ust_consumerd_state, CONSUMER_STARTED);
                        } else {
                                pthread_mutex_unlock(&ustconsumer64_data.pid_mutex);
                        }
@@ -3330,7 +3485,9 @@ static int process_client_msg(struct command_ctx *cmd_ctx)
                                        ust_consumerd32_fd = -EINVAL;
                                        goto error;
                                }
+
                                ust_consumerd32_fd = ustconsumer32_data.cmd_sock;
+                               uatomic_set(&ust_consumerd_state, CONSUMER_STARTED);
                        } else {
                                pthread_mutex_unlock(&ustconsumer32_data.pid_mutex);
                        }
@@ -3342,6 +3499,25 @@ static int process_client_msg(struct command_ctx *cmd_ctx)
        }
 skip_domain:
 
+       /* Validate consumer daemon state when start/stop trace command */
+       if (cmd_ctx->lsm->cmd_type == LTTNG_START_TRACE ||
+                       cmd_ctx->lsm->cmd_type == LTTNG_STOP_TRACE) {
+               switch (cmd_ctx->lsm->domain.type) {
+               case LTTNG_DOMAIN_UST:
+                       if (uatomic_read(&ust_consumerd_state) != CONSUMER_STARTED) {
+                               ret = LTTCOMM_NO_USTCONSUMERD;
+                               goto error;
+                       }
+                       break;
+               case LTTNG_DOMAIN_KERNEL:
+                       if (uatomic_read(&kernel_consumerd_state) != CONSUMER_STARTED) {
+                               ret = LTTCOMM_NO_KERNCONSUMERD;
+                               goto error;
+                       }
+                       break;
+               }
+       }
+
        /*
         * Check that the UID or GID match that of the tracing session.
         * The root user can interact with all sessions.
@@ -3458,6 +3634,11 @@ skip_domain:
        {
                ret = cmd_destroy_session(cmd_ctx->session,
                                cmd_ctx->lsm->session.name);
+               /*
+                * Set session to NULL so we do not unlock it after
+                * free.
+                */
+               cmd_ctx->session = NULL;
                break;
        }
        case LTTNG_LIST_DOMAINS:
@@ -3592,6 +3773,9 @@ setup_error:
        if (cmd_ctx->session) {
                session_unlock(cmd_ctx->session);
        }
+       if (need_tracing_session) {
+               session_unlock_list();
+       }
 init_setup_error:
        return ret;
 }
@@ -3602,7 +3786,7 @@ init_setup_error:
  */
 static void *thread_manage_clients(void *data)
 {
-       int sock = 0, ret, i, pollfd;
+       int sock = -1, ret, i, pollfd;
        uint32_t revents, nb_fd;
        struct command_ctx *cmd_ctx = NULL;
        struct lttng_poll_event events;
@@ -3692,14 +3876,14 @@ static void *thread_manage_clients(void *data)
                /* Allocate context command to process the client request */
                cmd_ctx = zmalloc(sizeof(struct command_ctx));
                if (cmd_ctx == NULL) {
-                       perror("zmalloc cmd_ctx");
+                       PERROR("zmalloc cmd_ctx");
                        goto error;
                }
 
                /* Allocate data buffer for reception */
                cmd_ctx->lsm = zmalloc(sizeof(struct lttcomm_session_msg));
                if (cmd_ctx->lsm == NULL) {
-                       perror("zmalloc cmd_ctx->lsm");
+                       PERROR("zmalloc cmd_ctx->lsm");
                        goto error;
                }
 
@@ -3716,8 +3900,12 @@ static void *thread_manage_clients(void *data)
                                sizeof(struct lttcomm_session_msg), &cmd_ctx->creds);
                if (ret <= 0) {
                        DBG("Nothing recv() from client... continuing");
-                       close(sock);
-                       free(cmd_ctx);
+                       ret = close(sock);
+                       if (ret) {
+                               PERROR("close");
+                       }
+                       sock = -1;
+                       clean_command_ctx(&cmd_ctx);
                        continue;
                }
 
@@ -3752,7 +3940,11 @@ static void *thread_manage_clients(void *data)
                }
 
                /* End of transmission */
-               close(sock);
+               ret = close(sock);
+               if (ret) {
+                       PERROR("close");
+               }
+               sock = -1;
 
                clean_command_ctx(&cmd_ctx);
        }
@@ -3760,8 +3952,18 @@ static void *thread_manage_clients(void *data)
 error:
        DBG("Client thread dying");
        unlink(client_unix_sock_path);
-       close(client_sock);
-       close(sock);
+       if (client_sock >= 0) {
+               ret = close(client_sock);
+               if (ret) {
+                       PERROR("close");
+               }
+       }
+       if (sock >= 0) {
+               ret = close(sock);
+               if (ret) {
+                       PERROR("close");
+               }
+       }
 
        lttng_poll_clean(&events);
        clean_command_ctx(&cmd_ctx);
@@ -3890,11 +4092,11 @@ static int parse_args(int argc, char **argv)
                        opt_no_kernel = 1;
                        break;
                case 'q':
-                       opt_quiet = 1;
+                       lttng_opt_quiet = 1;
                        break;
                case 'v':
                        /* Verbose level can increase using multiple -v */
-                       opt_verbose += 1;
+                       lttng_opt_verbose += 1;
                        break;
                case 'Z':
                        opt_verbose_consumer += 1;
@@ -3945,7 +4147,7 @@ static int init_daemon_socket(void)
        ret = chmod(client_unix_sock_path, S_IRUSR | S_IWUSR | S_IRGRP | S_IWGRP);
        if (ret < 0) {
                ERR("Set file permissions failed: %s", client_unix_sock_path);
-               perror("chmod");
+               PERROR("chmod");
                goto end;
        }
 
@@ -3962,7 +4164,7 @@ static int init_daemon_socket(void)
                        S_IRUSR | S_IWUSR | S_IRGRP | S_IWGRP | S_IROTH | S_IWOTH);
        if (ret < 0) {
                ERR("Set file permissions failed: %s", apps_unix_sock_path);
-               perror("chmod");
+               PERROR("chmod");
                goto end;
        }
 
@@ -4007,42 +4209,42 @@ static int set_permissions(char *rundir)
        ret = chown(rundir, 0, gid);
        if (ret < 0) {
                ERR("Unable to set group on %s", rundir);
-               perror("chown");
+               PERROR("chown");
        }
 
        /* Ensure tracing group can search the run dir */
        ret = chmod(rundir, S_IRWXU | S_IXGRP | S_IXOTH);
        if (ret < 0) {
                ERR("Unable to set permissions on %s", rundir);
-               perror("chmod");
+               PERROR("chmod");
        }
 
        /* lttng client socket path */
        ret = chown(client_unix_sock_path, 0, gid);
        if (ret < 0) {
                ERR("Unable to set group on %s", client_unix_sock_path);
-               perror("chown");
+               PERROR("chown");
        }
 
        /* kconsumer error socket path */
        ret = chown(kconsumer_data.err_unix_sock_path, 0, gid);
        if (ret < 0) {
                ERR("Unable to set group on %s", kconsumer_data.err_unix_sock_path);
-               perror("chown");
+               PERROR("chown");
        }
 
        /* 64-bit ustconsumer error socket path */
        ret = chown(ustconsumer64_data.err_unix_sock_path, 0, gid);
        if (ret < 0) {
                ERR("Unable to set group on %s", ustconsumer64_data.err_unix_sock_path);
-               perror("chown");
+               PERROR("chown");
        }
 
        /* 32-bit ustconsumer compat32 error socket path */
        ret = chown(ustconsumer32_data.err_unix_sock_path, 0, gid);
        if (ret < 0) {
                ERR("Unable to set group on %s", ustconsumer32_data.err_unix_sock_path);
-               perror("chown");
+               PERROR("chown");
        }
 
        DBG("All permissions are set");
@@ -4053,6 +4255,7 @@ end:
 
 /*
  * Create the pipe used to wake up the kernel thread.
+ * Closed in cleanup().
  */
 static int create_kernel_poll_pipe(void)
 {
@@ -4078,6 +4281,7 @@ error:
 
 /*
  * Create the application command pipe to wake thread_manage_apps.
+ * Closed in cleanup().
  */
 static int create_apps_cmd_pipe(void)
 {
@@ -4155,10 +4359,11 @@ static int set_consumer_sockets(struct consumer_data *consumer_data,
        ret = mkdir(path, S_IRWXU);
        if (ret < 0) {
                if (errno != EEXIST) {
+                       PERROR("mkdir");
                        ERR("Failed to create %s", path);
                        goto error;
                }
-               ret = 0;
+               ret = -1;
        }
 
        /* Create the kconsumerd error unix socket */
@@ -4219,7 +4424,7 @@ static int set_signal_handler(void)
        sigset_t sigset;
 
        if ((ret = sigemptyset(&sigset)) < 0) {
-               perror("sigemptyset");
+               PERROR("sigemptyset");
                return ret;
        }
 
@@ -4227,17 +4432,17 @@ static int set_signal_handler(void)
        sa.sa_mask = sigset;
        sa.sa_flags = 0;
        if ((ret = sigaction(SIGTERM, &sa, NULL)) < 0) {
-               perror("sigaction");
+               PERROR("sigaction");
                return ret;
        }
 
        if ((ret = sigaction(SIGINT, &sa, NULL)) < 0) {
-               perror("sigaction");
+               PERROR("sigaction");
                return ret;
        }
 
        if ((ret = sigaction(SIGPIPE, &sa, NULL)) < 0) {
-               perror("sigaction");
+               PERROR("sigaction");
                return ret;
        }
 
@@ -4261,7 +4466,7 @@ static void set_ulimit(void)
 
        ret = setrlimit(RLIMIT_NOFILE, &lim);
        if (ret < 0) {
-               perror("failed to set open files limit");
+               PERROR("failed to set open files limit");
        }
 }
 
@@ -4295,7 +4500,7 @@ int main(int argc, char **argv)
        if (opt_daemon) {
                ret = daemon(0, 0);
                if (ret < 0) {
-                       perror("daemon");
+                       PERROR("daemon");
                        goto error;
                }
        }
@@ -4380,6 +4585,10 @@ int main(int argc, char **argv)
                }
        }
 
+       /* Set consumer initial state */
+       kernel_consumerd_state = CONSUMER_STOPPED;
+       ust_consumerd_state = CONSUMER_STOPPED;
+
        DBG("Client socket path %s", client_unix_sock_path);
        DBG("Application socket path %s", apps_unix_sock_path);
        DBG("LTTng run directory path: %s", rundir);
@@ -4418,6 +4627,12 @@ int main(int argc, char **argv)
                goto error;
        }
 
+       /*
+        * Init UST app hash table. Alloc hash table before this point since
+        * cleanup() can get called after that point.
+        */
+       ust_app_ht_alloc();
+
        /* After this point, we can safely call cleanup() with "goto exit" */
 
        /*
@@ -4440,6 +4655,8 @@ int main(int argc, char **argv)
                /* Set ulimit for open files */
                set_ulimit();
        }
+       /* init lttng_fd tracking must be done after set_ulimit. */
+       lttng_fd_init();
 
        ret = set_consumer_sockets(&ustconsumer64_data, rundir);
        if (ret < 0) {
@@ -4483,9 +4700,6 @@ int main(int argc, char **argv)
        /* Init UST command queue. */
        cds_wfq_init(&ust_cmd_queue.queue);
 
-       /* Init UST app hash table */
-       ust_app_ht_alloc();
-
        /*
         * Get session list pointer. This pointer MUST NOT be free(). This list is
         * statically declared in session.c
@@ -4499,7 +4713,7 @@ int main(int argc, char **argv)
        ret = pthread_create(&client_thread, NULL,
                        thread_manage_clients, (void *) NULL);
        if (ret != 0) {
-               perror("pthread_create clients");
+               PERROR("pthread_create clients");
                goto exit_client;
        }
 
@@ -4507,7 +4721,7 @@ int main(int argc, char **argv)
        ret = pthread_create(&dispatch_thread, NULL,
                        thread_dispatch_ust_registration, (void *) NULL);
        if (ret != 0) {
-               perror("pthread_create dispatch");
+               PERROR("pthread_create dispatch");
                goto exit_dispatch;
        }
 
@@ -4515,7 +4729,7 @@ int main(int argc, char **argv)
        ret = pthread_create(&reg_apps_thread, NULL,
                        thread_registration_apps, (void *) NULL);
        if (ret != 0) {
-               perror("pthread_create registration");
+               PERROR("pthread_create registration");
                goto exit_reg_apps;
        }
 
@@ -4523,7 +4737,7 @@ int main(int argc, char **argv)
        ret = pthread_create(&apps_thread, NULL,
                        thread_manage_apps, (void *) NULL);
        if (ret != 0) {
-               perror("pthread_create apps");
+               PERROR("pthread_create apps");
                goto exit_apps;
        }
 
@@ -4531,47 +4745,47 @@ int main(int argc, char **argv)
        ret = pthread_create(&kernel_thread, NULL,
                        thread_manage_kernel, (void *) NULL);
        if (ret != 0) {
-               perror("pthread_create kernel");
+               PERROR("pthread_create kernel");
                goto exit_kernel;
        }
 
        ret = pthread_join(kernel_thread, &status);
        if (ret != 0) {
-               perror("pthread_join");
+               PERROR("pthread_join");
                goto error;     /* join error, exit without cleanup */
        }
 
 exit_kernel:
        ret = pthread_join(apps_thread, &status);
        if (ret != 0) {
-               perror("pthread_join");
+               PERROR("pthread_join");
                goto error;     /* join error, exit without cleanup */
        }
 
 exit_apps:
        ret = pthread_join(reg_apps_thread, &status);
        if (ret != 0) {
-               perror("pthread_join");
+               PERROR("pthread_join");
                goto error;     /* join error, exit without cleanup */
        }
 
 exit_reg_apps:
        ret = pthread_join(dispatch_thread, &status);
        if (ret != 0) {
-               perror("pthread_join");
+               PERROR("pthread_join");
                goto error;     /* join error, exit without cleanup */
        }
 
 exit_dispatch:
        ret = pthread_join(client_thread, &status);
        if (ret != 0) {
-               perror("pthread_join");
+               PERROR("pthread_join");
                goto error;     /* join error, exit without cleanup */
        }
 
        ret = join_consumer_thread(&kconsumer_data);
        if (ret != 0) {
-               perror("join_consumer");
+               PERROR("join_consumer");
                goto error;     /* join error, exit without cleanup */
        }
 
This page took 0.039804 seconds and 4 git commands to generate.