lttng.h API update: set filter becomes enable event with filter
[lttng-tools.git] / src / bin / lttng-sessiond / main.c
index b4755b0b8cce0d7eba70d60d4a474345ba2bbd1c..df4760602b5e4838f90cf124764fbbdad06d5ef4 100644 (file)
@@ -59,7 +59,6 @@
 #include "ust-consumer.h"
 #include "utils.h"
 #include "fd-limit.h"
-#include "filter.h"
 #include "health.h"
 #include "testpoint.h"
 
@@ -228,6 +227,11 @@ struct health_state health_thread_app_manage;
 struct health_state health_thread_app_reg;
 struct health_state health_thread_kernel;
 
+/*
+ * Socket timeout for receiving and sending in seconds.
+ */
+static int app_socket_timeout;
+
 static
 void setup_consumerd_path(void)
 {
@@ -392,7 +396,7 @@ static void stop_threads(void)
 static void cleanup(void)
 {
        int ret;
-       char *cmd;
+       char *cmd = NULL;
        struct ltt_session *sess, *stmp;
 
        DBG("Cleaning up");
@@ -442,9 +446,6 @@ static void cleanup(void)
                modprobe_remove_lttng_all();
        }
 
-       utils_close_pipe(kernel_poll_pipe);
-       utils_close_pipe(apps_cmd_pipe);
-
        /* <fun> */
        DBG("%c[%d;%dm*** assert failed :-) *** ==> %c[%dm%c[%d;%dm"
                        "Matthew, BEET driven development works!%c[%dm",
@@ -793,9 +794,13 @@ exit:
 error:
        lttng_poll_clean(&events);
 error_poll_create:
+       utils_close_pipe(kernel_poll_pipe);
+       kernel_poll_pipe[0] = kernel_poll_pipe[1] = -1;
        if (err) {
                health_error(&health_thread_kernel);
                ERR("Health error occurred in %s", __func__);
+               WARN("Kernel thread died unexpectedly. "
+                               "Kernel tracing can continue but CPU hotplug is disabled.");
        }
        health_exit(&health_thread_kernel);
        DBG("Kernel thread dying");
@@ -838,12 +843,24 @@ static void *thread_manage_consumer(void *data)
 
        DBG("[thread] Manage consumer started");
 
-       health_code_update(&consumer_data->health);
-
-       ret = lttcomm_listen_unix_sock(consumer_data->err_sock);
-       if (ret < 0) {
-               goto error_listen;
-       }
+       /*
+        * Since the consumer thread can be spawned at any moment in time, we init
+        * the health to a poll status (1, which is a valid health over time).
+        * When the thread starts, we update here the health to a "code" path being
+        * an even value so this thread, when reaching a poll wait, does not
+        * trigger an error with an even value.
+        *
+        * Here is the use case we avoid.
+        *
+        * +1: the first poll update during initialization (main())
+        * +2 * x: multiple code update once in this thread.
+        * +1: poll wait in this thread (being a good health state).
+        * == even number which after the wait period shows as a bad health.
+        *
+        * In a nutshell, the following poll update to the health state brings back
+        * the state to an even value meaning a code path.
+        */
+       health_poll_update(&consumer_data->health);
 
        /*
         * Pass 2 as size here for the thread quit pipe and kconsumerd_err_sock.
@@ -854,6 +871,11 @@ static void *thread_manage_consumer(void *data)
                goto error_poll;
        }
 
+       /*
+        * The error socket here is already in a listening state which was done
+        * just before spawning this thread to avoid a race between the consumer
+        * daemon exec trying to connect and the listen() call.
+        */
        ret = lttng_poll_add(&events, consumer_data->err_sock, LPOLLIN | LPOLLRDHUP);
        if (ret < 0) {
                goto error;
@@ -909,6 +931,12 @@ restart:
                goto error;
        }
 
+       /*
+        * Set the CLOEXEC flag. Return code is useless because either way, the
+        * show must go on.
+        */
+       (void) utils_set_fd_cloexec(sock);
+
        health_code_update(&consumer_data->health);
 
        DBG2("Receiving code from consumer err_sock");
@@ -1043,7 +1071,6 @@ error:
 
        lttng_poll_clean(&events);
 error_poll:
-error_listen:
        if (err) {
                health_error(&consumer_data->health);
                ERR("Health error occurred in %s", __func__);
@@ -1173,16 +1200,22 @@ static void *thread_manage_apps(void *data)
                                                ust_app_unregister(ust_cmd.sock);
                                        } else {
                                                /*
-                                                * We just need here to monitor the close of the UST
-                                                * socket and poll set monitor those by default.
-                                                * Listen on POLLIN (even if we never expect any
-                                                * data) to ensure that hangup wakes us.
+                                                * We only monitor the error events of the socket. This
+                                                * thread does not handle any incoming data from UST
+                                                * (POLLIN).
                                                 */
-                                               ret = lttng_poll_add(&events, ust_cmd.sock, LPOLLIN);
+                                               ret = lttng_poll_add(&events, ust_cmd.sock,
+                                                               LPOLLERR & LPOLLHUP & LPOLLRDHUP);
                                                if (ret < 0) {
                                                        goto error;
                                                }
 
+                                               /* Set socket timeout for both receiving and ending */
+                                               (void) lttcomm_setsockopt_rcv_timeout(ust_cmd.sock,
+                                                               app_socket_timeout);
+                                               (void) lttcomm_setsockopt_snd_timeout(ust_cmd.sock,
+                                                               app_socket_timeout);
+
                                                DBG("Apps with sock %d added to poll set",
                                                                ust_cmd.sock);
                                        }
@@ -1217,6 +1250,15 @@ exit:
 error:
        lttng_poll_clean(&events);
 error_poll_create:
+       utils_close_pipe(apps_cmd_pipe);
+       apps_cmd_pipe[0] = apps_cmd_pipe[1] = -1;
+
+       /*
+        * We don't clean the UST app hash table here since already registered
+        * applications can still be controlled so let them be until the session
+        * daemon dies or the applications stop.
+        */
+
        if (err) {
                health_error(&health_thread_app_manage);
                ERR("Health error occurred in %s", __func__);
@@ -1266,18 +1308,26 @@ static void *thread_dispatch_ust_registration(void *data)
                         * call is blocking so we can be assured that the data will be read
                         * at some point in time or wait to the end of the world :)
                         */
-                       ret = write(apps_cmd_pipe[1], ust_cmd,
-                                       sizeof(struct ust_command));
-                       if (ret < 0) {
-                               PERROR("write apps cmd pipe");
-                               if (errno == EBADF) {
-                                       /*
-                                        * We can't inform the application thread to process
-                                        * registration. We will exit or else application
-                                        * registration will not occur and tracing will never
-                                        * start.
-                                        */
-                                       goto error;
+                       if (apps_cmd_pipe[1] >= 0) {
+                               ret = write(apps_cmd_pipe[1], ust_cmd,
+                                               sizeof(struct ust_command));
+                               if (ret < 0) {
+                                       PERROR("write apps cmd pipe");
+                                       if (errno == EBADF) {
+                                               /*
+                                                * We can't inform the application thread to process
+                                                * registration. We will exit or else application
+                                                * registration will not occur and tracing will never
+                                                * start.
+                                                */
+                                               goto error;
+                                       }
+                               }
+                       } else {
+                               /* Application manager thread is not available. */
+                               ret = close(ust_cmd->sock);
+                               if (ret < 0) {
+                                       PERROR("close ust_cmd sock");
                                }
                        }
                        free(ust_cmd);
@@ -1383,6 +1433,12 @@ static void *thread_registration_apps(void *data)
                                                goto error;
                                        }
 
+                                       /*
+                                        * Set the CLOEXEC flag. Return code is useless because
+                                        * either way, the show must go on.
+                                        */
+                                       (void) utils_set_fd_cloexec(sock);
+
                                        /* Create UST registration command for enqueuing */
                                        ust_cmd = zmalloc(sizeof(struct ust_command));
                                        if (ust_cmd == NULL) {
@@ -1795,7 +1851,17 @@ error:
  */
 static int start_consumerd(struct consumer_data *consumer_data)
 {
-       int ret;
+       int ret, err;
+
+       /*
+        * Set the listen() state on the socket since there is a possible race
+        * between the exec() of the consumer daemon and this call if place in the
+        * consumer thread. See bug #366 for more details.
+        */
+       ret = lttcomm_listen_unix_sock(consumer_data->err_sock);
+       if (ret < 0) {
+               goto error;
+       }
 
        pthread_mutex_lock(&consumer_data->pid_mutex);
        if (consumer_data->pid != 0) {
@@ -1826,6 +1892,13 @@ end:
        return 0;
 
 error:
+       /* Cleanup already created socket on error. */
+       if (consumer_data->err_sock >= 0) {
+               err = close(consumer_data->err_sock);
+               if (err < 0) {
+                       PERROR("close consumer data error socket");
+               }
+       }
        return ret;
 }
 
@@ -2115,7 +2188,7 @@ static int process_client_msg(struct command_ctx *cmd_ctx, int sock,
        case LTTNG_LIST_DOMAINS:
        case LTTNG_START_TRACE:
        case LTTNG_STOP_TRACE:
-       case LTTNG_DATA_AVAILABLE:
+       case LTTNG_DATA_PENDING:
                need_domain = 0;
                break;
        default:
@@ -2388,8 +2461,7 @@ skip_domain:
        {
                ret = cmd_add_context(cmd_ctx->session, cmd_ctx->lsm->domain.type,
                                cmd_ctx->lsm->u.context.channel_name,
-                               cmd_ctx->lsm->u.context.event_name,
-                               &cmd_ctx->lsm->u.context.ctx);
+                               &cmd_ctx->lsm->u.context.ctx, kernel_poll_pipe[1]);
                break;
        }
        case LTTNG_DISABLE_CHANNEL:
@@ -2452,7 +2524,7 @@ skip_domain:
        {
                ret = cmd_enable_event(cmd_ctx->session, cmd_ctx->lsm->domain.type,
                                cmd_ctx->lsm->u.enable.channel_name,
-                               &cmd_ctx->lsm->u.enable.event, kernel_poll_pipe[1]);
+                               &cmd_ctx->lsm->u.enable.event, NULL, kernel_poll_pipe[1]);
                break;
        }
        case LTTNG_ENABLE_ALL_EVENT:
@@ -2461,7 +2533,7 @@ skip_domain:
 
                ret = cmd_enable_event_all(cmd_ctx->session, cmd_ctx->lsm->domain.type,
                                cmd_ctx->lsm->u.enable.channel_name,
-                               cmd_ctx->lsm->u.enable.event.type, kernel_poll_pipe[1]);
+                               cmd_ctx->lsm->u.enable.event.type, NULL, kernel_poll_pipe[1]);
                break;
        }
        case LTTNG_LIST_TRACEPOINTS:
@@ -2773,15 +2845,15 @@ skip_domain:
                                cmd_ctx->lsm->u.reg.path, cdata);
                break;
        }
-       case LTTNG_SET_FILTER:
+       case LTTNG_ENABLE_EVENT_WITH_FILTER:
        {
                struct lttng_filter_bytecode *bytecode;
 
-               if (cmd_ctx->lsm->u.filter.bytecode_len > LTTNG_FILTER_MAX_LEN) {
+               if (cmd_ctx->lsm->u.enable.bytecode_len > LTTNG_FILTER_MAX_LEN) {
                        ret = LTTNG_ERR_FILTER_INVAL;
                        goto error;
                }
-               bytecode = zmalloc(cmd_ctx->lsm->u.filter.bytecode_len);
+               bytecode = zmalloc(cmd_ctx->lsm->u.enable.bytecode_len);
                if (!bytecode) {
                        ret = LTTNG_ERR_FILTER_NOMEM;
                        goto error;
@@ -2789,7 +2861,7 @@ skip_domain:
                /* Receive var. len. data */
                DBG("Receiving var len data from client ...");
                ret = lttcomm_recv_unix_sock(sock, bytecode,
-                               cmd_ctx->lsm->u.filter.bytecode_len);
+                               cmd_ctx->lsm->u.enable.bytecode_len);
                if (ret <= 0) {
                        DBG("Nothing recv() from client var len data... continuing");
                        *sock_error = 1;
@@ -2798,21 +2870,20 @@ skip_domain:
                }
 
                if (bytecode->len + sizeof(*bytecode)
-                               != cmd_ctx->lsm->u.filter.bytecode_len) {
+                               != cmd_ctx->lsm->u.enable.bytecode_len) {
                        free(bytecode);
                        ret = LTTNG_ERR_FILTER_INVAL;
                        goto error;
                }
 
-               ret = cmd_set_filter(cmd_ctx->session, cmd_ctx->lsm->domain.type,
-                               cmd_ctx->lsm->u.filter.channel_name,
-                               cmd_ctx->lsm->u.filter.event_name,
-                               bytecode);
+               ret = cmd_enable_event(cmd_ctx->session, cmd_ctx->lsm->domain.type,
+                               cmd_ctx->lsm->u.enable.channel_name,
+                               &cmd_ctx->lsm->u.enable.event, bytecode, kernel_poll_pipe[1]);
                break;
        }
-       case LTTNG_DATA_AVAILABLE:
+       case LTTNG_DATA_PENDING:
        {
-               ret = cmd_data_available(cmd_ctx->session);
+               ret = cmd_data_pending(cmd_ctx->session);
                break;
        }
        default:
@@ -2863,6 +2934,12 @@ static void *thread_manage_health(void *data)
                goto error;
        }
 
+       /*
+        * Set the CLOEXEC flag. Return code is useless because either way, the
+        * show must go on.
+        */
+       (void) utils_set_fd_cloexec(sock);
+
        ret = lttcomm_listen_unix_sock(sock);
        if (ret < 0) {
                goto error;
@@ -2927,6 +3004,12 @@ restart:
                        goto error;
                }
 
+               /*
+                * Set the CLOEXEC flag. Return code is useless because either way, the
+                * show must go on.
+                */
+               (void) utils_set_fd_cloexec(new_sock);
+
                DBG("Receiving data from client for health...");
                ret = lttcomm_recv_unix_sock(new_sock, (void *)&msg, sizeof(msg));
                if (ret <= 0) {
@@ -3043,7 +3126,7 @@ static void *thread_manage_clients(void *data)
 
        ret = lttcomm_listen_unix_sock(client_sock);
        if (ret < 0) {
-               goto error;
+               goto error_listen;
        }
 
        /*
@@ -3052,7 +3135,7 @@ static void *thread_manage_clients(void *data)
         */
        ret = create_thread_poll_set(&events, 2);
        if (ret < 0) {
-               goto error;
+               goto error_create_poll;
        }
 
        /* Add the application registration socket */
@@ -3124,6 +3207,12 @@ static void *thread_manage_clients(void *data)
                        goto error;
                }
 
+               /*
+                * Set the CLOEXEC flag. Return code is useless because either way, the
+                * show must go on.
+                */
+               (void) utils_set_fd_cloexec(sock);
+
                /* Set socket option for credentials retrieval */
                ret = lttcomm_setsockopt_creds_unix_sock(sock);
                if (ret < 0) {
@@ -3225,13 +3314,18 @@ static void *thread_manage_clients(void *data)
 
 exit:
 error:
-       if (err) {
-               health_error(&health_thread_cmd);
-               ERR("Health error occurred in %s", __func__);
+       if (sock >= 0) {
+               ret = close(sock);
+               if (ret) {
+                       PERROR("close");
+               }
        }
-       health_exit(&health_thread_cmd);
 
-       DBG("Client thread dying");
+       lttng_poll_clean(&events);
+       clean_command_ctx(&cmd_ctx);
+
+error_listen:
+error_create_poll:
        unlink(client_unix_sock_path);
        if (client_sock >= 0) {
                ret = close(client_sock);
@@ -3239,15 +3333,15 @@ error:
                        PERROR("close");
                }
        }
-       if (sock >= 0) {
-               ret = close(sock);
-               if (ret) {
-                       PERROR("close");
-               }
+
+       if (err) {
+               health_error(&health_thread_cmd);
+               ERR("Health error occurred in %s", __func__);
        }
 
-       lttng_poll_clean(&events);
-       clean_command_ctx(&cmd_ctx);
+       health_exit(&health_thread_cmd);
+
+       DBG("Client thread dying");
 
        rcu_unregister_thread();
        return NULL;
@@ -3424,6 +3518,14 @@ static int init_daemon_socket(void)
                goto end;
        }
 
+       /* Set the cloexec flag */
+       ret = utils_set_fd_cloexec(client_sock);
+       if (ret < 0) {
+               ERR("Unable to set CLOEXEC flag to the client Unix socket (fd: %d). "
+                               "Continuing but note that the consumer daemon will have a "
+                               "reference to this socket on exec()", client_sock);
+       }
+
        /* File permission MUST be 660 */
        ret = chmod(client_unix_sock_path, S_IRUSR | S_IWUSR | S_IRGRP | S_IWGRP);
        if (ret < 0) {
@@ -3440,6 +3542,14 @@ static int init_daemon_socket(void)
                goto end;
        }
 
+       /* Set the cloexec flag */
+       ret = utils_set_fd_cloexec(apps_sock);
+       if (ret < 0) {
+               ERR("Unable to set CLOEXEC flag to the app Unix socket (fd: %d). "
+                               "Continuing but note that the consumer daemon will have a "
+                               "reference to this socket on exec()", apps_sock);
+       }
+
        /* File permission MUST be 666 */
        ret = chmod(apps_unix_sock_path,
                        S_IRUSR | S_IWUSR | S_IRGRP | S_IWGRP | S_IROTH | S_IWOTH);
@@ -3449,6 +3559,9 @@ static int init_daemon_socket(void)
                goto end;
        }
 
+       DBG3("Session daemon client socket %d and application socket %d created",
+                       client_sock, apps_sock);
+
 end:
        umask(old_umask);
        return ret;
@@ -3708,7 +3821,7 @@ int main(int argc, char **argv)
 {
        int ret = 0;
        void *status;
-       const char *home_path;
+       const char *home_path, *env_app_timeout;
 
        init_kernel_workarounds();
 
@@ -3945,8 +4058,10 @@ int main(int argc, char **argv)
        }
 
        /* Setup the kernel pipe for waking up the kernel thread */
-       if ((ret = utils_create_pipe_cloexec(kernel_poll_pipe)) < 0) {
-               goto exit;
+       if (is_root && !opt_no_kernel) {
+               if ((ret = utils_create_pipe_cloexec(kernel_poll_pipe)) < 0) {
+                       goto exit;
+               }
        }
 
        /* Setup the thread apps communication pipe. */
@@ -3977,7 +4092,9 @@ int main(int argc, char **argv)
        /*
         * Init health counters of the consumer thread. We do a quick hack here to
         * the state of the consumer health is fine even if the thread is not
-        * started.  This is simply to ease our life and has no cost what so ever.
+        * started. Once the thread starts, the health state is updated with a poll
+        * value to set a health code path. This is simply to ease our life and has
+        * no cost what so ever.
         */
        health_init(&kconsumer_data.health);
        health_poll_update(&kconsumer_data.health);
@@ -3986,6 +4103,14 @@ int main(int argc, char **argv)
        health_init(&ustconsumer64_data.health);
        health_poll_update(&ustconsumer64_data.health);
 
+       /* Check for the application socket timeout env variable. */
+       env_app_timeout = getenv(DEFAULT_APP_SOCKET_TIMEOUT_ENV);
+       if (env_app_timeout) {
+               app_socket_timeout = atoi(env_app_timeout);
+       } else {
+               app_socket_timeout = DEFAULT_APP_SOCKET_RW_TIMEOUT;
+       }
+
        /* Create thread to manage the client socket */
        ret = pthread_create(&health_thread, NULL,
                        thread_manage_health, (void *) NULL);
@@ -4026,18 +4151,21 @@ int main(int argc, char **argv)
                goto exit_apps;
        }
 
-       /* Create kernel thread to manage kernel event */
-       ret = pthread_create(&kernel_thread, NULL,
-                       thread_manage_kernel, (void *) NULL);
-       if (ret != 0) {
-               PERROR("pthread_create kernel");
-               goto exit_kernel;
-       }
+       /* Don't start this thread if kernel tracing is not requested nor root */
+       if (is_root && !opt_no_kernel) {
+               /* Create kernel thread to manage kernel event */
+               ret = pthread_create(&kernel_thread, NULL,
+                               thread_manage_kernel, (void *) NULL);
+               if (ret != 0) {
+                       PERROR("pthread_create kernel");
+                       goto exit_kernel;
+               }
 
-       ret = pthread_join(kernel_thread, &status);
-       if (ret != 0) {
-               PERROR("pthread_join");
-               goto error;     /* join error, exit without cleanup */
+               ret = pthread_join(kernel_thread, &status);
+               if (ret != 0) {
+                       PERROR("pthread_join");
+                       goto error;     /* join error, exit without cleanup */
+               }
        }
 
 exit_kernel:
This page took 0.030331 seconds and 4 git commands to generate.