Fix: agent may not be ready on launch
[lttng-tools.git] / src / bin / lttng-sessiond / main.c
index cec3a504a01371aed72f81f419f43a66539ba162..0620c7e70f83b3d33ae9cf5ea64dd15316784657 100644 (file)
@@ -74,7 +74,7 @@
 #include "notification-thread.h"
 #include "notification-thread-commands.h"
 #include "rotation-thread.h"
-#include "syscall.h"
+#include "lttng-syscall.h"
 #include "agent.h"
 #include "ht-cleanup.h"
 #include "sessiond-config.h"
@@ -304,7 +304,7 @@ struct lttng_ht *agent_apps_ht_by_sock = NULL;
  * NR_LTTNG_SESSIOND_READY must match the number of calls to
  * sessiond_notify_ready().
  */
-#define NR_LTTNG_SESSIOND_READY                5
+#define NR_LTTNG_SESSIOND_READY                6
 int lttng_sessiond_ready = NR_LTTNG_SESSIOND_READY;
 
 int sessiond_check_thread_quit_pipe(int fd, uint32_t events)
@@ -595,7 +595,8 @@ static void sessiond_cleanup(void)
                /* Cleanup ALL session */
                cds_list_for_each_entry_safe(sess, stmp,
                                &session_list_ptr->head, list) {
-                       cmd_destroy_session(sess, kernel_poll_pipe[1]);
+                       cmd_destroy_session(sess, kernel_poll_pipe[1],
+                                       notification_thread_handle);
                }
        }
 
@@ -630,21 +631,6 @@ static void sessiond_cleanup(void)
                free(load_info);
        }
 
-       /*
-        * Cleanup lock file by deleting it and finaly closing it which will
-        * release the file system lock.
-        */
-       if (lockfile_fd >= 0) {
-               ret = remove(config.lock_file_path.value);
-               if (ret < 0) {
-                       PERROR("remove lock file");
-               }
-               ret = close(lockfile_fd);
-               if (ret < 0) {
-                       PERROR("close lock file");
-               }
-       }
-
        /*
         * We do NOT rmdir rundir because there are other processes
         * using it, for instance lttng-relayd, which can start in
@@ -3771,7 +3757,8 @@ error_add_context:
        }
        case LTTNG_DESTROY_SESSION:
        {
-               ret = cmd_destroy_session(cmd_ctx->session, kernel_poll_pipe[1]);
+               ret = cmd_destroy_session(cmd_ctx->session, kernel_poll_pipe[1],
+                               notification_thread_handle);
 
                /* Set session to NULL so we do not unlock it after free. */
                cmd_ctx->session = NULL;
@@ -4200,13 +4187,12 @@ error_add_context:
 
                ret = cmd_rotation_set_schedule(cmd_ctx->session,
                                cmd_ctx->lsm->u.rotate_setup.timer_us,
-                               cmd_ctx->lsm->u.rotate_setup.size);
-               if (ret < 0) {
-                       ret = -ret;
+                               cmd_ctx->lsm->u.rotate_setup.size,
+                               notification_thread_handle);
+               if (ret != LTTNG_OK) {
                        goto error;
                }
 
-               ret = LTTNG_OK;
                break;
        }
        case LTTNG_ROTATION_SCHEDULE_GET_TIMER_PERIOD:
@@ -4928,7 +4914,7 @@ static int set_option(int opt, const char *arg, const char *optname)
        } else if (string_match(optname, "no-kernel")) {
                config.no_kernel = true;
        } else if (string_match(optname, "quiet") || opt == 'q') {
-               lttng_opt_quiet = true;
+               config.quiet = true;
        } else if (string_match(optname, "verbose") || opt == 'v') {
                /* Verbose level can increase using multiple -v */
                if (arg) {
@@ -5331,18 +5317,57 @@ end:
        return ret;
 }
 
+/*
+ * Create lockfile using the rundir and return its fd.
+ */
+static int create_lockfile(void)
+{
+        return utils_create_lock_file(config.lock_file_path.value);
+}
+
 /*
  * Check if the global socket is available, and if a daemon is answering at the
  * other side. If yes, error is returned.
+ *
+ * Also attempts to create and hold the lock file.
  */
 static int check_existing_daemon(void)
 {
+       int ret = 0;
+
        /* Is there anybody out there ? */
        if (lttng_session_daemon_alive()) {
-               return -EEXIST;
+               ret = -EEXIST;
+               goto end;
        }
 
-       return 0;
+       lockfile_fd = create_lockfile();
+       if (lockfile_fd < 0) {
+               ret = -EEXIST;
+               goto end;
+       }
+end:
+       return ret;
+}
+
+static void sessiond_cleanup_lock_file(void)
+{
+       int ret;
+
+       /*
+        * Cleanup lock file by deleting it and finaly closing it which will
+        * release the file system lock.
+        */
+       if (lockfile_fd >= 0) {
+               ret = remove(config.lock_file_path.value);
+               if (ret < 0) {
+                       PERROR("remove lock file");
+               }
+               ret = close(lockfile_fd);
+               if (ret < 0) {
+                       PERROR("close lock file");
+               }
+       }
 }
 
 /*
@@ -5601,14 +5626,6 @@ static int write_pidfile(void)
         return utils_create_pid_file(getpid(), config.pid_file_path.value);
 }
 
-/*
- * Create lockfile using the rundir and return its fd.
- */
-static int create_lockfile(void)
-{
-        return utils_create_lock_file(config.lock_file_path.value);
-}
-
 /*
  * Write agent TCP port using the rundir.
  */
@@ -5700,15 +5717,16 @@ int main(int argc, char **argv)
        struct lttng_pipe *ust32_channel_monitor_pipe = NULL,
                        *ust64_channel_monitor_pipe = NULL,
                        *kernel_channel_monitor_pipe = NULL;
-       bool notification_thread_running = false;
-       bool rotation_thread_running = false;
-       bool timer_thread_running = false;
+       bool notification_thread_launched = false;
+       bool rotation_thread_launched = false;
+       bool timer_thread_launched = false;
        struct lttng_pipe *ust32_channel_rotate_pipe = NULL,
                        *ust64_channel_rotate_pipe = NULL,
                        *kernel_channel_rotate_pipe = NULL;
        struct timer_thread_parameters timer_thread_ctx;
        /* Queue of rotation jobs populated by the sessiond-timer. */
        struct rotation_thread_timer_queue *rotation_timer_queue = NULL;
+       sem_t notification_thread_ready;
 
        init_kernel_workarounds();
 
@@ -5784,6 +5802,18 @@ int main(int argc, char **argv)
 
        sessiond_config_log(&config);
 
+       if (create_lttng_rundir()) {
+               retval = -1;
+               goto exit_options;
+       }
+
+       /* Abort launch if a session daemon is already running. */
+       if (check_existing_daemon()) {
+               ERR("A session daemon is already running.");
+               retval = -1;
+               goto exit_options;
+       }
+
        /* Daemonize */
        if (config.daemonize || config.background) {
                int i;
@@ -5798,9 +5828,12 @@ int main(int argc, char **argv)
                /*
                 * We are in the child. Make sure all other file descriptors are
                 * closed, in case we are called with more opened file
-                * descriptors than the standard ones.
+                * descriptors than the standard ones and the lock file.
                 */
                for (i = 3; i < sysconf(_SC_OPEN_MAX); i++) {
+                       if (i == lockfile_fd) {
+                               continue;
+                       }
                        (void) close(i);
                }
        }
@@ -5839,12 +5872,6 @@ int main(int argc, char **argv)
 
        /* Check if daemon is UID = 0 */
        is_root = !getuid();
-
-       if (create_lttng_rundir()) {
-               retval = -1;
-               goto exit_init_data;
-       }
-
        if (is_root) {
                /* Create global run dir with root access */
 
@@ -5876,12 +5903,6 @@ int main(int argc, char **argv)
                }
        }
 
-       lockfile_fd = create_lockfile();
-       if (lockfile_fd < 0) {
-               retval = -1;
-               goto exit_init_data;
-       }
-
        /* Set consumer initial state */
        kernel_consumerd_state = CONSUMER_STOPPED;
        ust_consumerd_state = CONSUMER_STOPPED;
@@ -5948,19 +5969,6 @@ int main(int argc, char **argv)
                goto exit_init_data;
        }
 
-       /*
-        * See if daemon already exist.
-        */
-       if (check_existing_daemon()) {
-               ERR("Already running daemon.\n");
-               /*
-                * We do not goto exit because we must not cleanup()
-                * because a daemon is already running.
-                */
-               retval = -1;
-               goto exit_init_data;
-       }
-
        /*
         * Init UST app hash table. Alloc hash table before this point since
         * cleanup() can get called after that point.
@@ -6116,11 +6124,19 @@ int main(int argc, char **argv)
                goto exit_health;
        }
 
+       /*
+        * The rotation thread needs the notification thread to be ready before
+        * creating the rotate_notification_channel, so we use this semaphore as
+        * a rendez-vous point.
+        */
+       sem_init(&notification_thread_ready, 0, 0);
+
        /* notification_thread_data acquires the pipes' read side. */
        notification_thread_handle = notification_thread_handle_create(
                        ust32_channel_monitor_pipe,
                        ust64_channel_monitor_pipe,
-                       kernel_channel_monitor_pipe);
+                       kernel_channel_monitor_pipe,
+                       &notification_thread_ready);
        if (!notification_thread_handle) {
                retval = -1;
                ERR("Failed to create notification thread shared data");
@@ -6138,7 +6154,7 @@ int main(int argc, char **argv)
                stop_threads();
                goto exit_notification;
        }
-       notification_thread_running = true;
+       notification_thread_launched = true;
 
        /* Create timer thread. */
        ret = pthread_create(&timer_thread, default_pthread_attr(),
@@ -6150,7 +6166,7 @@ int main(int argc, char **argv)
                stop_threads();
                goto exit_notification;
        }
-       timer_thread_running = true;
+       timer_thread_launched = true;
 
        /* rotation_thread_data acquires the pipes' read side. */
        rotation_thread_handle = rotation_thread_handle_create(
@@ -6158,7 +6174,9 @@ int main(int argc, char **argv)
                        ust64_channel_rotate_pipe,
                        kernel_channel_rotate_pipe,
                        thread_quit_pipe[0],
-                       rotation_timer_queue);
+                       rotation_timer_queue,
+                       notification_thread_handle,
+                       &notification_thread_ready);
        if (!rotation_thread_handle) {
                retval = -1;
                ERR("Failed to create rotation thread shared data");
@@ -6176,7 +6194,7 @@ int main(int argc, char **argv)
                stop_threads();
                goto exit_rotation;
        }
-       rotation_thread_running = true;
+       rotation_thread_launched = true;
 
        /* Create thread to manage the client socket */
        ret = pthread_create(&client_thread, default_pthread_attr(),
@@ -6346,6 +6364,7 @@ exit_dispatch:
 exit_client:
 exit_rotation:
 exit_notification:
+       sem_destroy(&notification_thread_ready);
        ret = pthread_join(health_thread, &status);
        if (ret) {
                errno = ret;
@@ -6381,7 +6400,7 @@ exit_init_data:
         * of the active session and channels at the moment of the teardown.
         */
        if (notification_thread_handle) {
-               if (notification_thread_running) {
+               if (notification_thread_launched) {
                        notification_thread_command_quit(
                                        notification_thread_handle);
                        ret = pthread_join(notification_thread, &status);
@@ -6395,7 +6414,7 @@ exit_init_data:
        }
 
        if (rotation_thread_handle) {
-               if (rotation_thread_running) {
+               if (rotation_thread_launched) {
                        ret = pthread_join(rotation_thread, &status);
                        if (ret) {
                                errno = ret;
@@ -6406,7 +6425,7 @@ exit_init_data:
                rotation_thread_handle_destroy(rotation_thread_handle);
        }
 
-       if (timer_thread_running) {
+       if (timer_thread_launched) {
                kill(getpid(), LTTNG_SESSIOND_SIG_EXIT);
                ret = pthread_join(timer_thread, &status);
                if (ret) {
@@ -6442,6 +6461,7 @@ exit_health_sessiond_cleanup:
 exit_create_run_as_worker_cleanup:
 
 exit_options:
+       sessiond_cleanup_lock_file();
        sessiond_cleanup_options();
 
 exit_set_signal_handler:
This page took 0.028011 seconds and 4 git commands to generate.