Fix: unbalanced ustconsumer32_data.pid_mutex lock
[lttng-tools.git] / src / bin / lttng-sessiond / main.c
index f01ca2e84b1f6e63bc0070d73d15fa06c6b01043..14dd1381ad036605f94fd75881b57b69693097e4 100644 (file)
@@ -76,10 +76,10 @@ static int opt_sig_parent;
 static int opt_verbose_consumer;
 static int opt_daemon, opt_background;
 static int opt_no_kernel;
-static int is_root;                    /* Set to 1 if the daemon is running as root */
 static pid_t ppid;          /* Parent PID for --sig-parent option */
 static pid_t child_ppid;    /* Internal parent PID use with daemonize. */
 static char *rundir;
+static int lockfile_fd = -1;
 
 /* Set to 1 when a SIGUSR1 signal is received. */
 static int recv_child_signal;
@@ -244,6 +244,9 @@ struct health_app *health_sessiond;
 /* JUL TCP port for registration. Used by the JUL thread. */
 unsigned int jul_tcp_port = DEFAULT_JUL_TCP_PORT;
 
+/* Am I root or not. */
+int is_root;                   /* Set to 1 if the daemon is running as root */
+
 /*
  * Whether sessiond is ready for commands/health check requests.
  * NR_LTTNG_SESSIOND_READY must match the number of calls to
@@ -456,6 +459,27 @@ static void close_consumer_sockets(void)
        }
 }
 
+/*
+ * Generate the full lock file path using the rundir.
+ *
+ * Return the snprintf() return value thus a negative value is an error.
+ */
+static int generate_lock_file_path(char *path, size_t len)
+{
+       int ret;
+
+       assert(path);
+       assert(rundir);
+
+       /* Build lockfile path from rundir. */
+       ret = snprintf(path, len, "%s/" DEFAULT_LTTNG_SESSIOND_LOCKFILE, rundir);
+       if (ret < 0) {
+               PERROR("snprintf lockfile path");
+       }
+
+       return ret;
+}
+
 /*
  * Cleanup the daemon
  */
@@ -537,14 +561,6 @@ static void cleanup(void)
        DBG("Removing directory %s", path);
        (void) rmdir(path);
 
-       /*
-        * We do NOT rmdir rundir because there are other processes
-        * using it, for instance lttng-relayd, which can start in
-        * parallel with this teardown.
-        */
-
-       free(rundir);
-
        DBG("Cleaning up all sessions");
 
        /* Destroy session list mutex */
@@ -576,6 +592,35 @@ static void cleanup(void)
 
        close_consumer_sockets();
 
+
+       /*
+        * Cleanup lock file by deleting it and finaly closing it which will
+        * release the file system lock.
+        */
+       if (lockfile_fd >= 0) {
+               char lockfile_path[PATH_MAX];
+
+               ret = generate_lock_file_path(lockfile_path, sizeof(lockfile_path));
+               if (ret > 0) {
+                       ret = remove(lockfile_path);
+                       if (ret < 0) {
+                               PERROR("remove lock file");
+                       }
+                       ret = close(lockfile_fd);
+                       if (ret < 0) {
+                               PERROR("close lock file");
+                       }
+               }
+       }
+
+       /*
+        * We do NOT rmdir rundir because there are other processes
+        * using it, for instance lttng-relayd, which can start in
+        * parallel with this teardown.
+        */
+
+       free(rundir);
+
        /* <fun> */
        DBG("%c[%d;%dm*** assert failed :-) *** ==> %c[%dm%c[%d;%dm"
                        "Matthew, BEET driven development works!%c[%dm",
@@ -1084,7 +1129,6 @@ restart:
        }
 
        health_code_update();
-
        if (code == LTTCOMM_CONSUMERD_COMMAND_SOCK_READY) {
                /* Connect both socket, command and metadata. */
                consumer_data->cmd_sock =
@@ -1241,13 +1285,13 @@ error:
                }
                consumer_data->cmd_sock = -1;
        }
-       if (*consumer_data->metadata_sock.fd_ptr >= 0) {
+       if (consumer_data->metadata_sock.fd_ptr &&
+           *consumer_data->metadata_sock.fd_ptr >= 0) {
                ret = close(*consumer_data->metadata_sock.fd_ptr);
                if (ret) {
                        PERROR("close");
                }
        }
-
        if (sock >= 0) {
                ret = close(sock);
                if (ret) {
@@ -1261,9 +1305,10 @@ error:
        pthread_mutex_unlock(&consumer_data->lock);
 
        /* Cleanup metadata socket mutex. */
-       pthread_mutex_destroy(consumer_data->metadata_sock.lock);
-       free(consumer_data->metadata_sock.lock);
-
+       if (consumer_data->metadata_sock.lock) {
+               pthread_mutex_destroy(consumer_data->metadata_sock.lock);
+               free(consumer_data->metadata_sock.lock);
+       }
        lttng_poll_clean(&events);
 error_poll:
        if (err) {
@@ -2085,19 +2130,23 @@ static int spawn_consumer_thread(struct consumer_data *consumer_data)
        if (ret != 0) {
                errno = ret;
                if (ret == ETIMEDOUT) {
+                       int pth_ret;
+
                        /*
                         * Call has timed out so we kill the kconsumerd_thread and return
                         * an error.
                         */
                        ERR("Condition timed out. The consumer thread was never ready."
                                        " Killing it");
-                       ret = pthread_cancel(consumer_data->thread);
-                       if (ret < 0) {
+                       pth_ret = pthread_cancel(consumer_data->thread);
+                       if (pth_ret < 0) {
                                PERROR("pthread_cancel consumer thread");
                        }
                } else {
                        PERROR("pthread_cond_wait failed consumer thread");
                }
+               /* Caller is expecting a negative value on failure. */
+               ret = -1;
                goto error;
        }
 
@@ -2183,10 +2232,11 @@ static pid_t spawn_consumerd(struct consumer_data *consumer_data)
                                consumer_to_use = consumerd32_bin;
                        } else {
                                DBG("Could not find any valid consumerd executable");
+                               ret = -EINVAL;
                                break;
                        }
                        DBG("Using kernel consumer at: %s",  consumer_to_use);
-                       execl(consumer_to_use,
+                       ret = execl(consumer_to_use,
                                "lttng-consumerd", verbosity, "-k",
                                "--consumerd-cmd-sock", consumer_data->cmd_unix_sock_path,
                                "--consumerd-err-sock", consumer_data->err_unix_sock_path,
@@ -2234,9 +2284,6 @@ static pid_t spawn_consumerd(struct consumer_data *consumer_data)
                        if (consumerd64_libdir[0] != '\0') {
                                free(tmpnew);
                        }
-                       if (ret) {
-                               goto error;
-                       }
                        break;
                }
                case LTTNG_CONSUMER32_UST:
@@ -2280,9 +2327,6 @@ static pid_t spawn_consumerd(struct consumer_data *consumer_data)
                        if (consumerd32_libdir[0] != '\0') {
                                free(tmpnew);
                        }
-                       if (ret) {
-                               goto error;
-                       }
                        break;
                }
                default:
@@ -2290,8 +2334,9 @@ static pid_t spawn_consumerd(struct consumer_data *consumer_data)
                        exit(EXIT_FAILURE);
                }
                if (errno != 0) {
-                       PERROR("kernel start consumer exec");
+                       PERROR("Consumer execl()");
                }
+               /* Reaching this point, we got a failure on our execl(). */
                exit(EXIT_FAILURE);
        } else if (pid > 0) {
                ret = pid;
@@ -2849,6 +2894,7 @@ static int process_client_msg(struct command_ctx *cmd_ctx, int sock,
                        }
 
                        /* 32-bit */
+                       pthread_mutex_lock(&ustconsumer32_data.pid_mutex);
                        if (consumerd32_bin[0] != '\0' &&
                                        ustconsumer32_data.pid == 0 &&
                                        cmd_ctx->lsm->cmd_type != LTTNG_REGISTER_CONSUMER) {
@@ -3053,7 +3099,9 @@ skip_domain:
                struct lttng_event *events;
                ssize_t nb_events;
 
+               session_lock_list();
                nb_events = cmd_list_tracepoints(cmd_ctx->lsm->domain.type, &events);
+               session_unlock_list();
                if (nb_events < 0) {
                        /* Return value is a negative lttng_error_code. */
                        ret = -nb_events;
@@ -3084,8 +3132,10 @@ skip_domain:
                struct lttng_event_field *fields;
                ssize_t nb_fields;
 
+               session_lock_list();
                nb_fields = cmd_list_tracepoint_fields(cmd_ctx->lsm->domain.type,
                                &fields);
+               session_unlock_list();
                if (nb_fields < 0) {
                        /* Return value is a negative lttng_error_code. */
                        ret = -nb_fields;
@@ -3668,7 +3718,7 @@ restart:
 
                rcu_thread_online();
 
-               reply.ret_code = 0;
+               memset(&reply, 0, sizeof(reply));
                for (i = 0; i < NR_HEALTH_SESSIOND_TYPES; i++) {
                        /*
                         * health_check_state returns 0 if health is
@@ -4021,12 +4071,13 @@ static int parse_args(int argc, char **argv)
                { "no-kernel", 0, 0, 'N' },
                { "pidfile", 1, 0, 'p' },
                { "jul-tcp-port", 1, 0, 'J' },
+               { "background", 0, 0, 'b' },
                { NULL, 0, 0, 0 }
        };
 
        while (1) {
                int option_index = 0;
-               c = getopt_long(argc, argv, "dhqvVSN" "a:c:g:s:C:E:D:F:Z:u:t:p:J:",
+               c = getopt_long(argc, argv, "dhqvVSN" "a:c:g:s:C:E:D:F:Z:u:t:p:J:b",
                                long_options, &option_index);
                if (c == -1) {
                        break;
@@ -4048,6 +4099,9 @@ static int parse_args(int argc, char **argv)
                case 'd':
                        opt_daemon = 1;
                        break;
+               case 'b':
+                       opt_background = 1;
+                       break;
                case 'g':
                        tracing_group_name = optarg;
                        break;
@@ -4504,6 +4558,24 @@ error:
        return;
 }
 
+/*
+ * Create lockfile using the rundir and return its fd.
+ */
+static int create_lockfile(void)
+{
+       int ret;
+       char lockfile_path[PATH_MAX];
+
+       ret = generate_lock_file_path(lockfile_path, sizeof(lockfile_path));
+       if (ret < 0) {
+               goto error;
+       }
+
+       ret = utils_create_lock_file(lockfile_path);
+error:
+       return ret;
+}
+
 /*
  * Write JUL TCP port using the rundir.
  */
@@ -4680,6 +4752,11 @@ int main(int argc, char **argv)
                }
        }
 
+       lockfile_fd = create_lockfile();
+       if (lockfile_fd < 0) {
+               goto error;
+       }
+
        /* Set consumer initial state */
        kernel_consumerd_state = CONSUMER_STOPPED;
        ust_consumerd_state = CONSUMER_STOPPED;
@@ -4903,7 +4980,7 @@ int main(int argc, char **argv)
        ret = pthread_create(&apps_notify_thread, NULL,
                        ust_thread_manage_notify, (void *) NULL);
        if (ret != 0) {
-               PERROR("pthread_create apps");
+               PERROR("pthread_create notify");
                goto exit_apps_notify;
        }
 
@@ -4911,7 +4988,7 @@ int main(int argc, char **argv)
        ret = pthread_create(&jul_reg_thread, NULL,
                        jul_thread_manage_registration, (void *) NULL);
        if (ret != 0) {
-               PERROR("pthread_create apps");
+               PERROR("pthread_create JUL");
                goto exit_jul_reg;
        }
 
This page took 0.028036 seconds and 4 git commands to generate.