Clean-up: use a define for support thread count
[lttng-tools.git] / src / bin / lttng-sessiond / main.c
index db92191c8c31a9e77e4c2c48d002ecbbcd7e0369..f9b41d2707bd26aff4f95a72aa1885d2f2f09280 100644 (file)
@@ -74,7 +74,7 @@
 #include "notification-thread.h"
 #include "notification-thread-commands.h"
 #include "rotation-thread.h"
-#include "syscall.h"
+#include "lttng-syscall.h"
 #include "agent.h"
 #include "ht-cleanup.h"
 #include "sessiond-config.h"
@@ -299,13 +299,41 @@ struct rotation_thread_handle *rotation_thread_handle;
 struct lttng_ht *agent_apps_ht_by_sock = NULL;
 
 /*
- * Whether sessiond is ready for commands/notification channel/health check
+ * The initialization of the session daemon is done in multiple phases.
+ *
+ * While all threads are launched near-simultaneously, only some of them
+ * are needed to ensure the session daemon can start to respond to client
  * requests.
- * NR_LTTNG_SESSIOND_READY must match the number of calls to
- * sessiond_notify_ready().
+ *
+ * There are two important guarantees that we wish to offer with respect
+ * to the initialisation of the session daemon:
+ *   - When the daemonize/background launcher process exits, the sessiond
+ *     is fully able to respond to client requests,
+ *   - Auto-loaded sessions are visible to clients.
+ *
+ * In order to achieve this, a number of support threads have to be launched
+ * to allow the "client" thread to function properly. Moreover, since the
+ * "load session" thread needs the client thread, we must provide a way
+ * for the "load session" thread to know that the "client" thread is up
+ * and running.
+ *
+ * Hence, the support threads decrement the lttng_sessiond_ready counter
+ * while the "client" threads waits for it to reach 0. Once the "client" thread
+ * unblocks, it posts the message_thread_ready semaphore which allows the
+ * "load session" thread to progress.
+ *
+ * This implies that the "load session" thread is the last to be initialized
+ * and will explicitly call sessiond_signal_parents(), which signals the parents
+ * that the session daemon is fully initialized.
+ *
+ * The four (4) support threads are:
+ *  - agent_thread
+ *  - notification_thread
+ *  - rotation_thread
+ *  - health_thread
  */
-#define NR_LTTNG_SESSIOND_READY                5
-int lttng_sessiond_ready = NR_LTTNG_SESSIOND_READY;
+#define NR_LTTNG_SESSIOND_SUPPORT_THREADS 4
+int lttng_sessiond_ready = NR_LTTNG_SESSIOND_SUPPORT_THREADS;
 
 int sessiond_check_thread_quit_pipe(int fd, uint32_t events)
 {
@@ -314,28 +342,36 @@ int sessiond_check_thread_quit_pipe(int fd, uint32_t events)
 
 /* Notify parents that we are ready for cmd and health check */
 LTTNG_HIDDEN
-void sessiond_notify_ready(void)
+void sessiond_signal_parents(void)
 {
-       if (uatomic_sub_return(&lttng_sessiond_ready, 1) == 0) {
-               /*
-                * Notify parent pid that we are ready to accept command
-                * for client side.  This ppid is the one from the
-                * external process that spawned us.
-                */
-               if (config.sig_parent) {
-                       kill(ppid, SIGUSR1);
-               }
+       /*
+        * Notify parent pid that we are ready to accept command
+        * for client side.  This ppid is the one from the
+        * external process that spawned us.
+        */
+       if (config.sig_parent) {
+               kill(ppid, SIGUSR1);
+       }
 
-               /*
-                * Notify the parent of the fork() process that we are
-                * ready.
-                */
-               if (config.daemonize || config.background) {
-                       kill(child_ppid, SIGUSR1);
-               }
+       /*
+        * Notify the parent of the fork() process that we are
+        * ready.
+        */
+       if (config.daemonize || config.background) {
+               kill(child_ppid, SIGUSR1);
        }
 }
 
+LTTNG_HIDDEN
+void sessiond_notify_ready(void)
+{
+       /*
+        * The _return variant is used since the implied memory barriers are
+        * required.
+        */
+       (void) uatomic_sub_return(&lttng_sessiond_ready, 1);
+}
+
 static
 int __sessiond_set_thread_pollset(struct lttng_poll_event *events, size_t size,
                int *a_pipe)
@@ -631,21 +667,6 @@ static void sessiond_cleanup(void)
                free(load_info);
        }
 
-       /*
-        * Cleanup lock file by deleting it and finaly closing it which will
-        * release the file system lock.
-        */
-       if (lockfile_fd >= 0) {
-               ret = remove(config.lock_file_path.value);
-               if (ret < 0) {
-                       PERROR("remove lock file");
-               }
-               ret = close(lockfile_fd);
-               if (ret < 0) {
-                       PERROR("close lock file");
-               }
-       }
-
        /*
         * We do NOT rmdir rundir because there are other processes
         * using it, for instance lttng-relayd, which can start in
@@ -875,7 +896,7 @@ static int update_kernel_stream(struct consumer_data *consumer_data, int fd)
                        cds_lfht_for_each_entry(ksess->consumer->socks->ht,
                                        &iter.iter, socket, node.node) {
                                pthread_mutex_lock(socket->lock);
-                               ret = kernel_consumer_send_channel_stream(socket,
+                               ret = kernel_consumer_send_channel_streams(socket,
                                                channel, ksess,
                                                session->output_traces ? 1 : 0);
                                pthread_mutex_unlock(socket->lock);
@@ -4510,13 +4531,41 @@ static void *thread_manage_clients(void *data)
                goto error;
        }
 
-       sessiond_notify_ready();
        ret = sem_post(&load_info->message_thread_ready);
        if (ret) {
                PERROR("sem_post message_thread_ready");
                goto error;
        }
 
+       /*
+        * Wait until all support threads are initialized before accepting
+        * commands.
+        */
+       while (uatomic_read(&lttng_sessiond_ready) != 0) {
+               fd_set read_fds;
+               struct timeval timeout;
+
+               FD_ZERO(&read_fds);
+               FD_SET(thread_quit_pipe[0], &read_fds);
+               memset(&timeout, 0, sizeof(timeout));
+               timeout.tv_usec = 1000;
+
+               /*
+                * If a support thread failed to launch, it may signal that
+                * we must exit and the sessiond would never be marked as
+                * "ready".
+                *
+                * The timeout is set to 1ms, which serves as a way to
+                * pace down this check.
+                */
+               ret = select(thread_quit_pipe[0] + 1, &read_fds, NULL, NULL,
+                               &timeout);
+               if (ret > 0 || (ret < 0 && errno != EINTR)) {
+                       goto exit;
+               }
+               cmm_smp_rmb();
+       }
+
        /* This testpoint is after we signal readiness to the parent. */
        if (testpoint(sessiond_thread_manage_clients)) {
                goto error;
@@ -4929,7 +4978,7 @@ static int set_option(int opt, const char *arg, const char *optname)
        } else if (string_match(optname, "no-kernel")) {
                config.no_kernel = true;
        } else if (string_match(optname, "quiet") || opt == 'q') {
-               lttng_opt_quiet = true;
+               config.quiet = true;
        } else if (string_match(optname, "verbose") || opt == 'v') {
                /* Verbose level can increase using multiple -v */
                if (arg) {
@@ -5048,8 +5097,8 @@ static int set_option(int opt, const char *arg, const char *optname)
                                ERR("Port overflow in --agent-tcp-port parameter: %s", arg);
                                return -1;
                        }
-                       config.agent_tcp_port = (uint32_t) v;
-                       DBG3("Agent TCP port set to non default: %u", config.agent_tcp_port);
+                       config.agent_tcp_port.begin = config.agent_tcp_port.end = (int) v;
+                       DBG3("Agent TCP port set to non default: %i", (int) v);
                }
        } else if (string_match(optname, "load") || opt == 'l') {
                if (!arg || *arg == '\0') {
@@ -5332,18 +5381,57 @@ end:
        return ret;
 }
 
+/*
+ * Create lockfile using the rundir and return its fd.
+ */
+static int create_lockfile(void)
+{
+        return utils_create_lock_file(config.lock_file_path.value);
+}
+
 /*
  * Check if the global socket is available, and if a daemon is answering at the
  * other side. If yes, error is returned.
+ *
+ * Also attempts to create and hold the lock file.
  */
 static int check_existing_daemon(void)
 {
+       int ret = 0;
+
        /* Is there anybody out there ? */
        if (lttng_session_daemon_alive()) {
-               return -EEXIST;
+               ret = -EEXIST;
+               goto end;
        }
 
-       return 0;
+       lockfile_fd = create_lockfile();
+       if (lockfile_fd < 0) {
+               ret = -EEXIST;
+               goto end;
+       }
+end:
+       return ret;
+}
+
+static void sessiond_cleanup_lock_file(void)
+{
+       int ret;
+
+       /*
+        * Cleanup lock file by deleting it and finaly closing it which will
+        * release the file system lock.
+        */
+       if (lockfile_fd >= 0) {
+               ret = remove(config.lock_file_path.value);
+               if (ret < 0) {
+                       PERROR("remove lock file");
+               }
+               ret = close(lockfile_fd);
+               if (ret < 0) {
+                       PERROR("close lock file");
+               }
+       }
 }
 
 /*
@@ -5602,23 +5690,6 @@ static int write_pidfile(void)
         return utils_create_pid_file(getpid(), config.pid_file_path.value);
 }
 
-/*
- * Create lockfile using the rundir and return its fd.
- */
-static int create_lockfile(void)
-{
-        return utils_create_lock_file(config.lock_file_path.value);
-}
-
-/*
- * Write agent TCP port using the rundir.
- */
-static int write_agent_port(void)
-{
-       return utils_create_pid_file(config.agent_tcp_port,
-                       config.agent_port_file_path.value);
-}
-
 static int set_clock_plugin_env(void)
 {
        int ret = 0;
@@ -5701,9 +5772,9 @@ int main(int argc, char **argv)
        struct lttng_pipe *ust32_channel_monitor_pipe = NULL,
                        *ust64_channel_monitor_pipe = NULL,
                        *kernel_channel_monitor_pipe = NULL;
-       bool notification_thread_running = false;
-       bool rotation_thread_running = false;
-       bool timer_thread_running = false;
+       bool notification_thread_launched = false;
+       bool rotation_thread_launched = false;
+       bool timer_thread_launched = false;
        struct lttng_pipe *ust32_channel_rotate_pipe = NULL,
                        *ust64_channel_rotate_pipe = NULL,
                        *kernel_channel_rotate_pipe = NULL;
@@ -5786,6 +5857,18 @@ int main(int argc, char **argv)
 
        sessiond_config_log(&config);
 
+       if (create_lttng_rundir()) {
+               retval = -1;
+               goto exit_options;
+       }
+
+       /* Abort launch if a session daemon is already running. */
+       if (check_existing_daemon()) {
+               ERR("A session daemon is already running.");
+               retval = -1;
+               goto exit_options;
+       }
+
        /* Daemonize */
        if (config.daemonize || config.background) {
                int i;
@@ -5800,9 +5883,12 @@ int main(int argc, char **argv)
                /*
                 * We are in the child. Make sure all other file descriptors are
                 * closed, in case we are called with more opened file
-                * descriptors than the standard ones.
+                * descriptors than the standard ones and the lock file.
                 */
                for (i = 3; i < sysconf(_SC_OPEN_MAX); i++) {
+                       if (i == lockfile_fd) {
+                               continue;
+                       }
                        (void) close(i);
                }
        }
@@ -5841,12 +5927,6 @@ int main(int argc, char **argv)
 
        /* Check if daemon is UID = 0 */
        is_root = !getuid();
-
-       if (create_lttng_rundir()) {
-               retval = -1;
-               goto exit_init_data;
-       }
-
        if (is_root) {
                /* Create global run dir with root access */
 
@@ -5878,12 +5958,6 @@ int main(int argc, char **argv)
                }
        }
 
-       lockfile_fd = create_lockfile();
-       if (lockfile_fd < 0) {
-               retval = -1;
-               goto exit_init_data;
-       }
-
        /* Set consumer initial state */
        kernel_consumerd_state = CONSUMER_STOPPED;
        ust_consumerd_state = CONSUMER_STOPPED;
@@ -5950,19 +6024,6 @@ int main(int argc, char **argv)
                goto exit_init_data;
        }
 
-       /*
-        * See if daemon already exist.
-        */
-       if (check_existing_daemon()) {
-               ERR("Already running daemon.\n");
-               /*
-                * We do not goto exit because we must not cleanup()
-                * because a daemon is already running.
-                */
-               retval = -1;
-               goto exit_init_data;
-       }
-
        /*
         * Init UST app hash table. Alloc hash table before this point since
         * cleanup() can get called after that point.
@@ -6090,12 +6151,6 @@ int main(int argc, char **argv)
                retval = -1;
                goto exit_init_data;
        }
-       ret = write_agent_port();
-       if (ret) {
-               ERR("Error in write_agent_port");
-               retval = -1;
-               goto exit_init_data;
-       }
 
        /* Initialize communication library */
        lttcomm_init();
@@ -6148,7 +6203,7 @@ int main(int argc, char **argv)
                stop_threads();
                goto exit_notification;
        }
-       notification_thread_running = true;
+       notification_thread_launched = true;
 
        /* Create timer thread. */
        ret = pthread_create(&timer_thread, default_pthread_attr(),
@@ -6160,7 +6215,7 @@ int main(int argc, char **argv)
                stop_threads();
                goto exit_notification;
        }
-       timer_thread_running = true;
+       timer_thread_launched = true;
 
        /* rotation_thread_data acquires the pipes' read side. */
        rotation_thread_handle = rotation_thread_handle_create(
@@ -6188,7 +6243,7 @@ int main(int argc, char **argv)
                stop_threads();
                goto exit_rotation;
        }
-       rotation_thread_running = true;
+       rotation_thread_launched = true;
 
        /* Create thread to manage the client socket */
        ret = pthread_create(&client_thread, default_pthread_attr(),
@@ -6394,7 +6449,7 @@ exit_init_data:
         * of the active session and channels at the moment of the teardown.
         */
        if (notification_thread_handle) {
-               if (notification_thread_running) {
+               if (notification_thread_launched) {
                        notification_thread_command_quit(
                                        notification_thread_handle);
                        ret = pthread_join(notification_thread, &status);
@@ -6408,7 +6463,7 @@ exit_init_data:
        }
 
        if (rotation_thread_handle) {
-               if (rotation_thread_running) {
+               if (rotation_thread_launched) {
                        ret = pthread_join(rotation_thread, &status);
                        if (ret) {
                                errno = ret;
@@ -6419,7 +6474,7 @@ exit_init_data:
                rotation_thread_handle_destroy(rotation_thread_handle);
        }
 
-       if (timer_thread_running) {
+       if (timer_thread_launched) {
                kill(getpid(), LTTNG_SESSIOND_SIG_EXIT);
                ret = pthread_join(timer_thread, &status);
                if (ret) {
@@ -6455,6 +6510,7 @@ exit_health_sessiond_cleanup:
 exit_create_run_as_worker_cleanup:
 
 exit_options:
+       sessiond_cleanup_lock_file();
        sessiond_cleanup_options();
 
 exit_set_signal_handler:
This page took 0.028876 seconds and 4 git commands to generate.