Fix: Wrong fd used by kernel_wait_quiescent
[lttng-tools.git] / src / bin / lttng-sessiond / main.c
index 736ec83ff8712d891f92d3a2724f7085672b924f..fc52d759b2130b8f1bdd2e122f24c1990f9b85d2 100644 (file)
@@ -228,6 +228,11 @@ struct health_state health_thread_app_manage;
 struct health_state health_thread_app_reg;
 struct health_state health_thread_kernel;
 
+/*
+ * Socket timeout for receiving and sending in seconds.
+ */
+static int app_socket_timeout;
+
 static
 void setup_consumerd_path(void)
 {
@@ -857,13 +862,6 @@ static void *thread_manage_consumer(void *data)
         */
        health_poll_update(&consumer_data->health);
 
-       health_code_update(&consumer_data->health);
-
-       ret = lttcomm_listen_unix_sock(consumer_data->err_sock);
-       if (ret < 0) {
-               goto error_listen;
-       }
-
        /*
         * Pass 2 as size here for the thread quit pipe and kconsumerd_err_sock.
         * Nothing more will be added to this poll set.
@@ -873,6 +871,11 @@ static void *thread_manage_consumer(void *data)
                goto error_poll;
        }
 
+       /*
+        * The error socket here is already in a listening state which was done
+        * just before spawning this thread to avoid a race between the consumer
+        * daemon exec trying to connect and the listen() call.
+        */
        ret = lttng_poll_add(&events, consumer_data->err_sock, LPOLLIN | LPOLLRDHUP);
        if (ret < 0) {
                goto error;
@@ -1068,7 +1071,6 @@ error:
 
        lttng_poll_clean(&events);
 error_poll:
-error_listen:
        if (err) {
                health_error(&consumer_data->health);
                ERR("Health error occurred in %s", __func__);
@@ -1208,6 +1210,12 @@ static void *thread_manage_apps(void *data)
                                                        goto error;
                                                }
 
+                                               /* Set socket timeout for both receiving and ending */
+                                               (void) lttcomm_setsockopt_rcv_timeout(ust_cmd.sock,
+                                                               app_socket_timeout);
+                                               (void) lttcomm_setsockopt_snd_timeout(ust_cmd.sock,
+                                                               app_socket_timeout);
+
                                                DBG("Apps with sock %d added to poll set",
                                                                ust_cmd.sock);
                                        }
@@ -1826,7 +1834,17 @@ error:
  */
 static int start_consumerd(struct consumer_data *consumer_data)
 {
-       int ret;
+       int ret, err;
+
+       /*
+        * Set the listen() state on the socket since there is a possible race
+        * between the exec() of the consumer daemon and this call if place in the
+        * consumer thread. See bug #366 for more details.
+        */
+       ret = lttcomm_listen_unix_sock(consumer_data->err_sock);
+       if (ret < 0) {
+               goto error;
+       }
 
        pthread_mutex_lock(&consumer_data->pid_mutex);
        if (consumer_data->pid != 0) {
@@ -1857,6 +1875,13 @@ end:
        return 0;
 
 error:
+       /* Cleanup already created socket on error. */
+       if (consumer_data->err_sock >= 0) {
+               err = close(consumer_data->err_sock);
+               if (err < 0) {
+                       PERROR("close consumer data error socket");
+               }
+       }
        return ret;
 }
 
@@ -3086,7 +3111,7 @@ static void *thread_manage_clients(void *data)
 
        ret = lttcomm_listen_unix_sock(client_sock);
        if (ret < 0) {
-               goto error;
+               goto error_listen;
        }
 
        /*
@@ -3095,7 +3120,7 @@ static void *thread_manage_clients(void *data)
         */
        ret = create_thread_poll_set(&events, 2);
        if (ret < 0) {
-               goto error;
+               goto error_create_poll;
        }
 
        /* Add the application registration socket */
@@ -3274,13 +3299,18 @@ static void *thread_manage_clients(void *data)
 
 exit:
 error:
-       if (err) {
-               health_error(&health_thread_cmd);
-               ERR("Health error occurred in %s", __func__);
+       if (sock >= 0) {
+               ret = close(sock);
+               if (ret) {
+                       PERROR("close");
+               }
        }
-       health_exit(&health_thread_cmd);
 
-       DBG("Client thread dying");
+       lttng_poll_clean(&events);
+       clean_command_ctx(&cmd_ctx);
+
+error_listen:
+error_create_poll:
        unlink(client_unix_sock_path);
        if (client_sock >= 0) {
                ret = close(client_sock);
@@ -3288,15 +3318,15 @@ error:
                        PERROR("close");
                }
        }
-       if (sock >= 0) {
-               ret = close(sock);
-               if (ret) {
-                       PERROR("close");
-               }
+
+       if (err) {
+               health_error(&health_thread_cmd);
+               ERR("Health error occurred in %s", __func__);
        }
 
-       lttng_poll_clean(&events);
-       clean_command_ctx(&cmd_ctx);
+       health_exit(&health_thread_cmd);
+
+       DBG("Client thread dying");
 
        rcu_unregister_thread();
        return NULL;
@@ -3776,7 +3806,7 @@ int main(int argc, char **argv)
 {
        int ret = 0;
        void *status;
-       const char *home_path;
+       const char *home_path, *env_app_timeout;
 
        init_kernel_workarounds();
 
@@ -4056,6 +4086,14 @@ int main(int argc, char **argv)
        health_init(&ustconsumer64_data.health);
        health_poll_update(&ustconsumer64_data.health);
 
+       /* Check for the application socket timeout env variable. */
+       env_app_timeout = getenv(DEFAULT_APP_SOCKET_TIMEOUT_ENV);
+       if (env_app_timeout) {
+               app_socket_timeout = atoi(env_app_timeout);
+       } else {
+               app_socket_timeout = DEFAULT_APP_SOCKET_RW_TIMEOUT;
+       }
+
        /* Create thread to manage the client socket */
        ret = pthread_create(&health_thread, NULL,
                        thread_manage_health, (void *) NULL);
This page took 0.024937 seconds and 4 git commands to generate.