Fix: health subsystem issues with shared code
[lttng-tools.git] / src / bin / lttng-sessiond / main.c
index 6f8f1486ca60de5d958352868b9912ddcbdcf24f..d999928feb8a0f06bfe24857fc3182f56c4f903f 100644 (file)
@@ -72,6 +72,7 @@ const char default_global_apps_pipe[] = DEFAULT_GLOBAL_APPS_PIPE;
 
 const char *progname;
 const char *opt_tracing_group;
+static const char *opt_pidfile;
 static int opt_sig_parent;
 static int opt_verbose_consumer;
 static int opt_daemon;
@@ -404,6 +405,17 @@ static void cleanup(void)
        /* First thing first, stop all threads */
        utils_close_pipe(thread_quit_pipe);
 
+       /*
+        * If opt_pidfile is undefined, the default file will be wiped when
+        * removing the rundir.
+        */
+       if (opt_pidfile) {
+               ret = remove(opt_pidfile);
+               if (ret < 0) {
+                       PERROR("remove pidfile %s", opt_pidfile);
+               }
+       }
+
        DBG("Removing %s directory", rundir);
        ret = asprintf(&cmd, "rm -rf %s", rundir);
        if (ret < 0) {
@@ -626,7 +638,7 @@ static int update_kernel_stream(struct consumer_data *consumer_data, int fd)
                                        struct lttng_ht_iter iter;
                                        struct consumer_socket *socket;
 
-
+                                       rcu_read_lock();
                                        cds_lfht_for_each_entry(ksess->consumer->socks->ht,
                                                        &iter.iter, socket, node.node) {
                                                /* Code flow error */
@@ -637,9 +649,11 @@ static int update_kernel_stream(struct consumer_data *consumer_data, int fd)
                                                                channel, ksess);
                                                pthread_mutex_unlock(socket->lock);
                                                if (ret < 0) {
+                                                       rcu_read_unlock();
                                                        goto error;
                                                }
                                        }
+                                       rcu_read_unlock();
                                }
                                goto error;
                        }
@@ -691,6 +705,14 @@ static void *thread_manage_kernel(void *data)
 
        DBG("[thread] Thread manage kernel started");
 
+       health_register(HEALTH_TYPE_KERNEL);
+
+       /*
+        * This first step of the while is to clean this structure which could free
+        * non NULL pointers so zero it before the loop.
+        */
+       memset(&events, 0, sizeof(events));
+
        if (testpoint(thread_manage_kernel)) {
                goto error_testpoint;
        }
@@ -766,7 +788,13 @@ static void *thread_manage_kernel(void *data)
 
                        /* Check for data on kernel pipe */
                        if (pollfd == kernel_poll_pipe[0] && (revents & LPOLLIN)) {
-                               ret = read(kernel_poll_pipe[0], &tmp, 1);
+                               do {
+                                       ret = read(kernel_poll_pipe[0], &tmp, 1);
+                               } while (ret < 0 && errno == EINTR);
+                               /*
+                                * Ret value is useless here, if this pipe gets any actions an
+                                * update is required anyway.
+                                */
                                update_poll_flag = 1;
                                continue;
                        } else {
@@ -802,7 +830,7 @@ error_testpoint:
                WARN("Kernel thread died unexpectedly. "
                                "Kernel tracing can continue but CPU hotplug is disabled.");
        }
-       health_exit(&health_thread_kernel);
+       health_unregister();
        DBG("Kernel thread dying");
        return NULL;
 }
@@ -843,6 +871,8 @@ static void *thread_manage_consumer(void *data)
 
        DBG("[thread] Manage consumer started");
 
+       health_register(HEALTH_TYPE_CONSUMER);
+
        /*
         * Since the consumer thread can be spawned at any moment in time, we init
         * the health to a poll status (1, which is a valid health over time).
@@ -1076,7 +1106,7 @@ error_poll:
                health_error(&consumer_data->health);
                ERR("Health error occurred in %s", __func__);
        }
-       health_exit(&consumer_data->health);
+       health_unregister();
        DBG("consumer thread cleanup completed");
 
        return NULL;
@@ -1097,6 +1127,8 @@ static void *thread_manage_apps(void *data)
        rcu_register_thread();
        rcu_thread_online();
 
+       health_register(HEALTH_TYPE_APP_MANAGE);
+
        if (testpoint(thread_manage_apps)) {
                goto error_testpoint;
        }
@@ -1160,7 +1192,9 @@ static void *thread_manage_apps(void *data)
                                        goto error;
                                } else if (revents & LPOLLIN) {
                                        /* Empty pipe */
-                                       ret = read(apps_cmd_pipe[0], &ust_cmd, sizeof(ust_cmd));
+                                       do {
+                                               ret = read(apps_cmd_pipe[0], &ust_cmd, sizeof(ust_cmd));
+                                       } while (ret < 0 && errno == EINTR);
                                        if (ret < 0 || ret < sizeof(ust_cmd)) {
                                                PERROR("read apps cmd pipe");
                                                goto error;
@@ -1266,7 +1300,7 @@ error_testpoint:
                health_error(&health_thread_app_manage);
                ERR("Health error occurred in %s", __func__);
        }
-       health_exit(&health_thread_app_manage);
+       health_unregister();
        DBG("Application communication apps thread cleanup complete");
        rcu_thread_offline();
        rcu_unregister_thread();
@@ -1312,9 +1346,11 @@ static void *thread_dispatch_ust_registration(void *data)
                         * at some point in time or wait to the end of the world :)
                         */
                        if (apps_cmd_pipe[1] >= 0) {
-                               ret = write(apps_cmd_pipe[1], ust_cmd,
-                                               sizeof(struct ust_command));
-                               if (ret < 0) {
+                               do {
+                                       ret = write(apps_cmd_pipe[1], ust_cmd,
+                                                       sizeof(struct ust_command));
+                               } while (ret < 0 && errno == EINTR);
+                               if (ret < 0 || ret != sizeof(struct ust_command)) {
                                        PERROR("write apps cmd pipe");
                                        if (errno == EBADF) {
                                                /*
@@ -1361,6 +1397,8 @@ static void *thread_registration_apps(void *data)
 
        DBG("[thread] Manage application registration started");
 
+       health_register(HEALTH_TYPE_APP_REG);
+
        if (testpoint(thread_registration_apps)) {
                goto error_testpoint;
        }
@@ -1543,7 +1581,7 @@ error_listen:
 error_create_poll:
 error_testpoint:
        DBG("UST Registration thread cleanup complete");
-       health_exit(&health_thread_app_reg);
+       health_unregister();
 
        return NULL;
 }
@@ -1918,9 +1956,7 @@ static int check_consumer_health(void)
 {
        int ret;
 
-       ret = health_check_state(&kconsumer_data.health) &&
-               health_check_state(&ustconsumer32_data.health) &&
-               health_check_state(&ustconsumer64_data.health);
+       ret = health_check_state(HEALTH_TYPE_CONSUMER);
 
        DBG3("Health consumer check %d", ret);
 
@@ -2074,7 +2110,7 @@ static int create_ust_session(struct ltt_session *session,
 
        DBG("Creating UST session");
 
-       lus = trace_ust_create_session(session->path, session->id, domain);
+       lus = trace_ust_create_session(session->path, session->id);
        if (lus == NULL) {
                ret = LTTNG_ERR_UST_SESS_FAIL;
                goto error;
@@ -3038,26 +3074,26 @@ restart:
 
                switch (msg.component) {
                case LTTNG_HEALTH_CMD:
-                       reply.ret_code = health_check_state(&health_thread_cmd);
+                       reply.ret_code = health_check_state(HEALTH_TYPE_CMD);
                        break;
                case LTTNG_HEALTH_APP_MANAGE:
-                       reply.ret_code = health_check_state(&health_thread_app_manage);
+                       reply.ret_code = health_check_state(HEALTH_TYPE_APP_MANAGE);
                        break;
                case LTTNG_HEALTH_APP_REG:
-                       reply.ret_code = health_check_state(&health_thread_app_reg);
+                       reply.ret_code = health_check_state(HEALTH_TYPE_APP_REG);
                        break;
                case LTTNG_HEALTH_KERNEL:
-                       reply.ret_code = health_check_state(&health_thread_kernel);
+                       reply.ret_code = health_check_state(HEALTH_TYPE_KERNEL);
                        break;
                case LTTNG_HEALTH_CONSUMER:
                        reply.ret_code = check_consumer_health();
                        break;
                case LTTNG_HEALTH_ALL:
                        reply.ret_code =
-                               health_check_state(&health_thread_app_manage) &&
-                               health_check_state(&health_thread_app_reg) &&
-                               health_check_state(&health_thread_cmd) &&
-                               health_check_state(&health_thread_kernel) &&
+                               health_check_state(HEALTH_TYPE_APP_MANAGE) &&
+                               health_check_state(HEALTH_TYPE_APP_REG) &&
+                               health_check_state(HEALTH_TYPE_CMD) &&
+                               health_check_state(HEALTH_TYPE_KERNEL) &&
                                check_consumer_health();
                        break;
                default:
@@ -3132,6 +3168,8 @@ static void *thread_manage_clients(void *data)
 
        rcu_register_thread();
 
+       health_register(HEALTH_TYPE_CMD);
+
        if (testpoint(thread_manage_clients)) {
                goto error_testpoint;
        }
@@ -3356,7 +3394,7 @@ error_testpoint:
                ERR("Health error occurred in %s", __func__);
        }
 
-       health_exit(&health_thread_cmd);
+       health_unregister();
 
        DBG("Client thread dying");
 
@@ -3390,6 +3428,7 @@ static void usage(void)
        fprintf(stderr, "  -S, --sig-parent                   Send SIGCHLD to parent pid to notify readiness.\n");
        fprintf(stderr, "  -q, --quiet                        No output at all.\n");
        fprintf(stderr, "  -v, --verbose                      Verbose mode. Activate DBG() macro.\n");
+       fprintf(stderr, "  -p, --pidfile FILE                 Write a pid to FILE name overriding the default value.\n");
        fprintf(stderr, "      --verbose-consumer             Verbose mode for consumer. Activate DBG() macro.\n");
        fprintf(stderr, "      --no-kernel                    Disable kernel tracer\n");
 }
@@ -3423,12 +3462,13 @@ static int parse_args(int argc, char **argv)
                { "verbose", 0, 0, 'v' },
                { "verbose-consumer", 0, 0, 'Z' },
                { "no-kernel", 0, 0, 'N' },
+               { "pidfile", 1, 0, 'p' },
                { NULL, 0, 0, 0 }
        };
 
        while (1) {
                int option_index = 0;
-               c = getopt_long(argc, argv, "dhqvVSN" "a:c:g:s:C:E:D:F:Z:u:t",
+               c = getopt_long(argc, argv, "dhqvVSN" "a:c:g:s:C:E:D:F:Z:u:t:p:",
                                long_options, &option_index);
                if (c == -1) {
                        break;
@@ -3505,6 +3545,9 @@ static int parse_args(int argc, char **argv)
                case 'T':
                        consumerd64_libdir = optarg;
                        break;
+               case 'p':
+                       opt_pidfile = optarg;
+                       break;
                default:
                        /* Unknown option or other error.
                         * Error is printed by getopt, just return */
@@ -3831,6 +3874,38 @@ static void set_ulimit(void)
        }
 }
 
+/*
+ * Write pidfile using the rundir and opt_pidfile.
+ */
+static void write_pidfile(void)
+{
+       int ret;
+       char pidfile_path[PATH_MAX];
+
+       assert(rundir);
+
+       if (opt_pidfile) {
+               strncpy(pidfile_path, opt_pidfile, sizeof(pidfile_path));
+       } else {
+               /* Build pidfile path from rundir and opt_pidfile. */
+               ret = snprintf(pidfile_path, sizeof(pidfile_path), "%s/"
+                               DEFAULT_LTTNG_SESSIOND_PIDFILE, rundir);
+               if (ret < 0) {
+                       PERROR("snprintf pidfile path");
+                       goto error;
+               }
+       }
+
+       /*
+        * Create pid file in rundir. Return value is of no importance. The
+        * execution will continue even though we are not able to write the file.
+        */
+       (void) utils_create_pid_file(getpid(), pidfile_path);
+
+error:
+       return;
+}
+
 /*
  * main
  */
@@ -4100,26 +4175,6 @@ int main(int argc, char **argv)
 
        cmd_init();
 
-       /* Init all health thread counters. */
-       health_init(&health_thread_cmd);
-       health_init(&health_thread_kernel);
-       health_init(&health_thread_app_manage);
-       health_init(&health_thread_app_reg);
-
-       /*
-        * Init health counters of the consumer thread. We do a quick hack here to
-        * the state of the consumer health is fine even if the thread is not
-        * started. Once the thread starts, the health state is updated with a poll
-        * value to set a health code path. This is simply to ease our life and has
-        * no cost what so ever.
-        */
-       health_init(&kconsumer_data.health);
-       health_poll_update(&kconsumer_data.health);
-       health_init(&ustconsumer32_data.health);
-       health_poll_update(&ustconsumer32_data.health);
-       health_init(&ustconsumer64_data.health);
-       health_poll_update(&ustconsumer64_data.health);
-
        /* Check for the application socket timeout env variable. */
        env_app_timeout = getenv(DEFAULT_APP_SOCKET_TIMEOUT_ENV);
        if (env_app_timeout) {
@@ -4128,6 +4183,8 @@ int main(int argc, char **argv)
                app_socket_timeout = DEFAULT_APP_SOCKET_RW_TIMEOUT;
        }
 
+       write_pidfile();
+
        /* Create thread to manage the client socket */
        ret = pthread_create(&health_thread, NULL,
                        thread_manage_health, (void *) NULL);
This page took 0.028447 seconds and 4 git commands to generate.