From cf3af59ede4474f4e5123699ee2ee9ee85b7cadd Mon Sep 17 00:00:00 2001 From: Mathieu Desnoyers Date: Mon, 8 Aug 2011 00:03:04 -0400 Subject: [PATCH 1/1] ltt-sessiond teardown cleanup ltt-sessiond now kills (SIGTERM) the kconsumerd before exiting, so the LTTng module unload succeeds. Previously, the kconsumerd was still holding a reference on ltt-relay.ko -- it was only being released after ltt-sessiond closed its communication socket with kconsumerd. Now, all the teardown actually wakes each thread, getting them to return, and main() joins each of them before calling cleanup(). Previously, we had no guarantee that threads would not run concurrently with cleanup() executed from the signal handler. It would still be good to audit _each_ read call so we ensure that they are performed on O_NONBLOCK fcntl fds (as we did in the kconsumerd), so all the blocking is done in a poll() call, which can be woken up by the sessiond termination file descriptor too (from the signal handler). Failure to respect this could lead to races where sessiond blocks on a file descriptor and never really honour the signal for a thread, thus leading to blocking of the main() thread on a join. Signed-off-by: Mathieu Desnoyers --- ltt-sessiond/main.c | 180 +++++++++++++++++++++++++++----------------- 1 file changed, 111 insertions(+), 69 deletions(-) diff --git a/ltt-sessiond/main.c b/ltt-sessiond/main.c index 5636700d8..98bdb6a4d 100644 --- a/ltt-sessiond/main.c +++ b/ltt-sessiond/main.c @@ -146,10 +146,18 @@ static void teardown_kernel_session(struct ltt_session *session) } } +static void stop_threads(void) +{ + /* Stopping all threads */ + DBG("Terminating all threads"); + close(thread_quit_pipe[0]); + close(thread_quit_pipe[1]); +} + /* * Cleanup the daemon */ -static void cleanup() +static void cleanup(void) { int ret; char *cmd; @@ -163,11 +171,6 @@ static void cleanup() 27, 1, 31, 27, 0, 27, 1, 33, 27, 0); /* */ - /* Stopping all threads */ - DBG("Terminating all threads"); - close(thread_quit_pipe[0]); - close(thread_quit_pipe[1]); - DBG("Removing %s directory", LTTNG_RUNDIR); ret = asprintf(&cmd, "rm -rf " LTTNG_RUNDIR); if (ret < 0) { @@ -862,6 +865,23 @@ error: return ret; } +static int join_kconsumerd_thread(void) +{ + void *status; + int ret; + + if (kconsumerd_pid != 0) { + ret = kill(kconsumerd_pid, SIGTERM); + if (ret) { + ERR("Error killing kconsumerd"); + return ret; + } + return pthread_join(kconsumerd_thread, &status); + } else { + return 0; + } +} + /* * Fork and exec a kernel consumer daemon (kconsumerd). * @@ -2347,7 +2367,7 @@ static int parse_args(int argc, char **argv) * apps_sock - The communication socket for all UST apps. * client_sock - The communication of the cli tool (lttng). */ -static int init_daemon_socket() +static int init_daemon_socket(void) { int ret = 0; mode_t old_umask; @@ -2394,7 +2414,7 @@ end: /* * Check if the global socket is available. If yes, error is returned. */ -static int check_existing_daemon() +static int check_existing_daemon(void) { int ret; @@ -2536,26 +2556,27 @@ error: /* * Signal handler for the daemon + * + * Simply stop all worker threads, leaving main() return gracefully + * after joining all threads and calling cleanup(). */ static void sighandler(int sig) { switch (sig) { - case SIGPIPE: - DBG("SIGPIPE catched"); - return; - case SIGINT: - DBG("SIGINT catched"); - cleanup(); - break; - case SIGTERM: - DBG("SIGTERM catched"); - cleanup(); - break; - default: - break; + case SIGPIPE: + DBG("SIGPIPE catched"); + return; + case SIGINT: + DBG("SIGINT catched"); + stop_threads(); + break; + case SIGTERM: + DBG("SIGTERM catched"); + stop_threads(); + break; + default: + break; } - - exit(EXIT_SUCCESS); } /* @@ -2625,14 +2646,14 @@ int main(int argc, char **argv) const char *home_path; /* Create thread quit pipe */ - if (init_thread_quit_pipe() < 0) { - goto exit; + if ((ret = init_thread_quit_pipe()) < 0) { + goto error; } /* Parse arguments */ progname = argv[0]; if ((ret = parse_args(argc, argv) < 0)) { - goto exit; + goto error; } /* Daemonize */ @@ -2640,7 +2661,7 @@ int main(int argc, char **argv) ret = daemon(0, 0); if (ret < 0) { perror("daemon"); - goto exit; + goto error; } } @@ -2650,7 +2671,7 @@ int main(int argc, char **argv) if (is_root) { ret = create_lttng_rundir(); if (ret < 0) { - goto exit; + goto error; } if (strlen(apps_unix_sock_path) == 0) { @@ -2667,7 +2688,8 @@ int main(int argc, char **argv) if (home_path == NULL) { /* TODO: Add --socket PATH option */ ERR("Can't get HOME directory for sockets creation."); - goto exit; + ret = -EPERM; + goto error; } if (strlen(apps_unix_sock_path) == 0) { @@ -2693,10 +2715,10 @@ int main(int argc, char **argv) if ((ret = check_existing_daemon()) == 0) { ERR("Already running daemon.\n"); /* - * We do not goto error because we must not cleanup() because a daemon - * is already running. + * We do not goto exit because we must not cleanup() + * because a daemon is already running. */ - goto exit; + goto error; } /* After this point, we can safely call cleanup() so goto error is used */ @@ -2710,7 +2732,7 @@ int main(int argc, char **argv) if (is_root) { ret = set_kconsumerd_sockets(); if (ret < 0) { - goto error; + goto exit; } /* Setup kernel tracer */ @@ -2720,18 +2742,18 @@ int main(int argc, char **argv) set_ulimit(); } - if (set_signal_handler() < 0) { - goto error; + if ((ret = set_signal_handler()) < 0) { + goto exit; } /* Setup the needed unix socket */ - if (init_daemon_socket() < 0) { - goto error; + if ((ret = init_daemon_socket()) < 0) { + goto exit; } /* Set credentials to socket */ - if (is_root && (set_permissions() < 0)) { - goto error; + if (is_root && ((ret = set_permissions()) < 0)) { + goto exit; } /* Get parent pid if -S, --sig-parent is specified. */ @@ -2740,8 +2762,8 @@ int main(int argc, char **argv) } /* Setup the kernel pipe for waking up the kernel thread */ - if (create_kernel_poll_pipe() < 0) { - goto error; + if ((ret = create_kernel_poll_pipe()) < 0) { + goto exit; } /* @@ -2750,41 +2772,61 @@ int main(int argc, char **argv) */ session_list_ptr = get_session_list(); - while (1) { - /* Create thread to manage the client socket */ - ret = pthread_create(&client_thread, NULL, thread_manage_clients, (void *) NULL); - if (ret != 0) { - perror("pthread_create"); - goto error; - } + /* Create thread to manage the client socket */ + ret = pthread_create(&client_thread, NULL, thread_manage_clients, (void *) NULL); + if (ret != 0) { + perror("pthread_create"); + goto exit_client; + } - /* Create thread to manage application socket */ - ret = pthread_create(&apps_thread, NULL, thread_manage_apps, (void *) NULL); - if (ret != 0) { - perror("pthread_create"); - goto error; - } + /* Create thread to manage application socket */ + ret = pthread_create(&apps_thread, NULL, thread_manage_apps, (void *) NULL); + if (ret != 0) { + perror("pthread_create"); + goto exit_apps; + } - /* Create kernel thread to manage kernel event */ - ret = pthread_create(&kernel_thread, NULL, thread_manage_kernel, (void *) NULL); - if (ret != 0) { - perror("pthread_create"); - goto error; - } + /* Create kernel thread to manage kernel event */ + ret = pthread_create(&kernel_thread, NULL, thread_manage_kernel, (void *) NULL); + if (ret != 0) { + perror("pthread_create"); + goto exit_kernel; + } - ret = pthread_join(client_thread, &status); - if (ret != 0) { - perror("pthread_join"); - goto error; - } + ret = pthread_join(kernel_thread, &status); + if (ret != 0) { + perror("pthread_join"); + goto error; /* join error, exit without cleanup */ } - cleanup(); - exit(EXIT_SUCCESS); +exit_kernel: + ret = pthread_join(apps_thread, &status); + if (ret != 0) { + perror("pthread_join"); + goto error; /* join error, exit without cleanup */ + } -error: - cleanup(); +exit_apps: + ret = pthread_join(client_thread, &status); + if (ret != 0) { + perror("pthread_join"); + goto error; /* join error, exit without cleanup */ + } + + ret = join_kconsumerd_thread(); + if (ret != 0) { + perror("join_kconsumerd"); + goto error; /* join error, exit without cleanup */ + } +exit_client: exit: + /* + * cleanup() is called when no other thread is running. + */ + cleanup(); + if (!ret) + exit(EXIT_SUCCESS); +error: exit(EXIT_FAILURE); } -- 2.34.1