Fix: LPOLLHUP and LPOLLERR when there is still data in pipe/socket
[lttng-tools.git] / src / bin / lttng-sessiond / main.c
index 5a7d153e568743c73365549bd9676311789b9c48..13bd649180452ad8346f8d7a5729446a9c58b157 100644 (file)
@@ -586,6 +586,34 @@ static int generate_lock_file_path(char *path, size_t len)
        return ret;
 }
 
+/*
+ * Wait on consumer process termination.
+ *
+ * Need to be called with the consumer data lock held or from a context
+ * ensuring no concurrent access to data (e.g: cleanup).
+ */
+static void wait_consumer(struct consumer_data *consumer_data)
+{
+       pid_t ret;
+       int status;
+
+       if (consumer_data->pid <= 0) {
+               return;
+       }
+
+       DBG("Waiting for complete teardown of consumerd (PID: %d)",
+                       consumer_data->pid);
+       ret = waitpid(consumer_data->pid, &status, 0);
+       if (ret == -1) {
+               PERROR("consumerd waitpid pid: %d", consumer_data->pid)
+       }
+       if (!WIFEXITED(status)) {
+               ERR("consumerd termination with error: %d",
+                               WEXITSTATUS(ret));
+       }
+       consumer_data->pid = 0;
+}
+
 /*
  * Cleanup the session daemon's data structures.
  */
@@ -680,6 +708,10 @@ static void sessiond_cleanup(void)
                }
        }
 
+       wait_consumer(&kconsumer_data);
+       wait_consumer(&ustconsumer64_data);
+       wait_consumer(&ustconsumer32_data);
+
        DBG("Cleaning up all agent apps");
        agent_app_ht_clean();
 
@@ -1133,31 +1165,33 @@ static void *thread_manage_kernel(void *data)
                        }
 
                        /* Check for data on kernel pipe */
-                       if (pollfd == kernel_poll_pipe[0] && (revents & LPOLLIN)) {
-                               (void) lttng_read(kernel_poll_pipe[0],
-                                       &tmp, 1);
-                               /*
-                                * Ret value is useless here, if this pipe gets any actions an
-                                * update is required anyway.
-                                */
-                               update_poll_flag = 1;
-                               continue;
-                       } else {
-                               /*
-                                * New CPU detected by the kernel. Adding kernel stream to
-                                * kernel session and updating the kernel consumer
-                                */
-                               if (revents & LPOLLIN) {
+                       if (revents & LPOLLIN) {
+                               if (pollfd == kernel_poll_pipe[0]) {
+                                       (void) lttng_read(kernel_poll_pipe[0],
+                                               &tmp, 1);
+                                       /*
+                                        * Ret value is useless here, if this pipe gets any actions an
+                                        * update is required anyway.
+                                        */
+                                       update_poll_flag = 1;
+                                       continue;
+                               } else {
+                                       /*
+                                        * New CPU detected by the kernel. Adding kernel stream to
+                                        * kernel session and updating the kernel consumer
+                                        */
                                        ret = update_kernel_stream(&kconsumer_data, pollfd);
                                        if (ret < 0) {
                                                continue;
                                        }
                                        break;
-                                       /*
-                                        * TODO: We might want to handle the LPOLLERR | LPOLLHUP
-                                        * and unregister kernel stream at this point.
-                                        */
                                }
+                       } else if (revents & (LPOLLERR | LPOLLHUP | LPOLLRDHUP)) {
+                               update_poll_flag = 1;
+                               continue;
+                       } else {
+                               ERR("Unexpected poll events %u for sock %d", revents, pollfd);
+                               goto error;
                        }
                }
        }
@@ -1287,9 +1321,14 @@ restart:
 
                /* Event on the registration socket */
                if (pollfd == consumer_data->err_sock) {
-                       if (revents & (LPOLLERR | LPOLLHUP | LPOLLRDHUP)) {
+                       if (revents & LPOLLIN) {
+                               continue;
+                       } else if (revents & (LPOLLERR | LPOLLHUP | LPOLLRDHUP)) {
                                ERR("consumer err socket poll error");
                                goto error;
+                       } else {
+                               ERR("Unexpected poll events %u for sock %d", revents, pollfd);
+                               goto error;
                        }
                }
        }
@@ -1419,7 +1458,8 @@ restart_poll:
 
                        if (pollfd == sock) {
                                /* Event on the consumerd socket */
-                               if (revents & (LPOLLERR | LPOLLHUP | LPOLLRDHUP)) {
+                               if (revents & (LPOLLERR | LPOLLHUP | LPOLLRDHUP)
+                                               && !(revents & LPOLLIN)) {
                                        ERR("consumer err socket second poll error");
                                        goto error;
                                }
@@ -1437,6 +1477,11 @@ restart_poll:
 
                                goto exit;
                        } else if (pollfd == consumer_data->metadata_fd) {
+                               if (revents & (LPOLLERR | LPOLLHUP | LPOLLRDHUP)
+                                               && !(revents & LPOLLIN)) {
+                                       ERR("consumer err metadata socket second poll error");
+                                       goto error;
+                               }
                                /* UST metadata requests */
                                ret = ust_consumer_metadata_request(
                                                &consumer_data->metadata_sock);
@@ -1500,7 +1545,6 @@ error:
 
        unlink(consumer_data->err_unix_sock_path);
        unlink(consumer_data->cmd_unix_sock_path);
-       consumer_data->pid = 0;
        pthread_mutex_unlock(&consumer_data->lock);
 
        /* Cleanup metadata socket mutex. */
@@ -1605,10 +1649,7 @@ static void *thread_manage_apps(void *data)
 
                        /* Inspect the apps cmd pipe */
                        if (pollfd == apps_cmd_pipe[0]) {
-                               if (revents & (LPOLLERR | LPOLLHUP | LPOLLRDHUP)) {
-                                       ERR("Apps command pipe error");
-                                       goto error;
-                               } else if (revents & LPOLLIN) {
+                               if (revents & LPOLLIN) {
                                        int sock;
 
                                        /* Empty pipe */
@@ -1621,9 +1662,8 @@ static void *thread_manage_apps(void *data)
                                        health_code_update();
 
                                        /*
-                                        * We only monitor the error events of the socket. This
-                                        * thread does not handle any incoming data from UST
-                                        * (POLLIN).
+                                        * Since this is a command socket (write then read),
+                                        * we only monitor the error events of the socket.
                                         */
                                        ret = lttng_poll_add(&events, sock,
                                                        LPOLLERR | LPOLLHUP | LPOLLRDHUP);
@@ -1632,6 +1672,12 @@ static void *thread_manage_apps(void *data)
                                        }
 
                                        DBG("Apps with sock %d added to poll set", sock);
+                               } else if (revents & (LPOLLERR | LPOLLHUP | LPOLLRDHUP)) {
+                                       ERR("Apps command pipe error");
+                                       goto error;
+                               } else {
+                                       ERR("Unknown poll events %u for sock %d", revents, pollfd);
+                                       goto error;
                                }
                        } else {
                                /*
@@ -1647,6 +1693,9 @@ static void *thread_manage_apps(void *data)
 
                                        /* Socket closed on remote end. */
                                        ust_app_unregister(pollfd);
+                               } else {
+                                       ERR("Unexpected poll events %u for sock %d", revents, pollfd);
+                                       goto error;
                                }
                        }
 
@@ -1787,7 +1836,16 @@ static void sanitize_wait_queue(struct ust_reg_wait_queue *wait_queue)
                                wait_queue->count--;
                                ust_app_destroy(wait_node->app);
                                free(wait_node);
+                               /*
+                                * Silence warning of use-after-free in
+                                * cds_list_for_each_entry_safe which uses
+                                * __typeof__(*wait_node).
+                                */
+                               wait_node = NULL;
                                break;
+                       } else {
+                               ERR("Unexpected poll events %u for sock %d", revents, pollfd);
+                               goto error;
                        }
                }
        }
@@ -1989,7 +2047,7 @@ static void *thread_dispatch_ust_registration(void *data)
                                 * Don't care about return value. Let the manage apps threads
                                 * handle app unregistration upon socket close.
                                 */
-                               (void) ust_app_register_done(app->sock);
+                               (void) ust_app_register_done(app);
 
                                /*
                                 * Even if the application socket has been closed, send the app
@@ -2031,6 +2089,22 @@ error:
                free(wait_node);
        }
 
+       /* Empty command queue. */
+       for (;;) {
+               /* Dequeue command for registration */
+               node = cds_wfcq_dequeue_blocking(&ust_cmd_queue.head, &ust_cmd_queue.tail);
+               if (node == NULL) {
+                       break;
+               }
+               ust_cmd = caa_container_of(node, struct ust_command, node);
+               ret = close(ust_cmd->sock);
+               if (ret < 0) {
+                       PERROR("close ust sock exit dispatch %d", ust_cmd->sock);
+               }
+               lttng_fd_put(LTTNG_FD_APPS, 1);
+               free(ust_cmd);
+       }
+
 error_testpoint:
        DBG("Dispatch thread dying");
        if (err) {
@@ -2132,10 +2206,7 @@ static void *thread_registration_apps(void *data)
 
                        /* Event on the registration socket */
                        if (pollfd == apps_sock) {
-                               if (revents & (LPOLLERR | LPOLLHUP | LPOLLRDHUP)) {
-                                       ERR("Register apps socket poll error");
-                                       goto error;
-                               } else if (revents & LPOLLIN) {
+                               if (revents & LPOLLIN) {
                                        sock = lttcomm_accept_unix_sock(apps_sock);
                                        if (sock < 0) {
                                                goto error;
@@ -2222,6 +2293,12 @@ static void *thread_registration_apps(void *data)
                                         * barrier with the exchange in cds_wfcq_enqueue.
                                         */
                                        futex_nto1_wake(&ust_cmd_queue.futex);
+                               } else if (revents & (LPOLLERR | LPOLLHUP | LPOLLRDHUP)) {
+                                       ERR("Register apps socket poll error");
+                                       goto error;
+                               } else {
+                                       ERR("Unexpected poll events %u for sock %d", revents, pollfd);
+                                       goto error;
                                }
                        }
                }
@@ -2919,6 +2996,8 @@ static int process_client_msg(struct command_ctx *cmd_ctx, int sock,
 
        DBG("Processing client command %d", cmd_ctx->lsm->cmd_type);
 
+       assert(!rcu_read_ongoing());
+
        *sock_error = 0;
 
        switch (cmd_ctx->lsm->cmd_type) {
@@ -4006,6 +4085,7 @@ setup_error:
                session_unlock_list();
        }
 init_setup_error:
+       assert(!rcu_read_ongoing());
        return ret;
 }
 
@@ -4121,9 +4201,14 @@ restart:
 
                        /* Event on the registration socket */
                        if (pollfd == sock) {
-                               if (revents & (LPOLLERR | LPOLLHUP | LPOLLRDHUP)) {
+                               if (revents & LPOLLIN) {
+                                       continue;
+                               } else if (revents & (LPOLLERR | LPOLLHUP | LPOLLRDHUP)) {
                                        ERR("Health socket poll error");
                                        goto error;
+                               } else {
+                                       ERR("Unexpected poll events %u for sock %d", revents, pollfd);
+                                       goto error;
                                }
                        }
                }
@@ -4298,9 +4383,14 @@ static void *thread_manage_clients(void *data)
 
                        /* Event on the registration socket */
                        if (pollfd == client_sock) {
-                               if (revents & (LPOLLERR | LPOLLHUP | LPOLLRDHUP)) {
+                               if (revents & LPOLLIN) {
+                                       continue;
+                               } else if (revents & (LPOLLERR | LPOLLHUP | LPOLLRDHUP)) {
                                        ERR("Client socket poll error");
                                        goto error;
+                               } else {
+                                       ERR("Unexpected poll events %u for sock %d", revents, pollfd);
+                                       goto error;
                                }
                        }
                }
@@ -5973,6 +6063,10 @@ exit_apps:
        }
 exit_reg_apps:
 
+       /*
+        * Join dispatch thread after joining reg_apps_thread to ensure
+        * we don't leak applications in the queue.
+        */
        ret = pthread_join(dispatch_thread, &status);
        if (ret) {
                errno = ret;
@@ -6038,6 +6132,9 @@ exit_options:
        sessiond_cleanup_options();
 
 exit_set_signal_handler:
+       /* Ensure all prior call_rcu are done. */
+       rcu_barrier();
+
        if (!retval) {
                exit(EXIT_SUCCESS);
        } else {
This page took 0.028231 seconds and 4 git commands to generate.