Fix: sessiond: kernel error accounting fd still open when unloading modules
[lttng-tools.git] / src / bin / lttng-sessiond / main.c
index a62719a1281fa5f508ac1c8b7fdc019357c73935..e0b5595f3b25adbd6fb98332420d9cc458435060 100644 (file)
@@ -49,9 +49,9 @@
 #include "consumer.h"
 #include "context.h"
 #include "event.h"
+#include "event-notifier-error-accounting.h"
 #include "kernel.h"
 #include "kernel-consumer.h"
-#include "shm.h"
 #include "lttng-ust-ctl.h"
 #include "ust-consumer.h"
 #include "utils.h"
@@ -74,6 +74,7 @@
 #include "register.h"
 #include "manage-apps.h"
 #include "manage-kernel.h"
+#include "modprobe.h"
 
 static const char *help_msg =
 #ifdef LTTNG_EMBED_HELP
@@ -83,6 +84,8 @@ NULL
 #endif
 ;
 
+#define EVENT_NOTIFIER_ERROR_COUNTER_NUMBER_OF_BUCKET_MAX 65535
+
 const char *progname;
 static int lockfile_fd = -1;
 static int opt_print_version;
@@ -120,6 +123,7 @@ static const struct option long_options[] = {
        { "load", required_argument, 0, 'l' },
        { "kmod-probes", required_argument, 0, '\0' },
        { "extra-kmod-probes", required_argument, 0, '\0' },
+       { "event-notifier-error-number-of-bucket", required_argument, 0, '\0' },
        { NULL, 0, 0, 0 }
 };
 
@@ -314,6 +318,9 @@ static void sessiond_cleanup(void)
 
        pthread_mutex_destroy(&session_list->lock);
 
+       DBG("Cleaning up all per-event notifier domain agents");
+       agent_by_event_notifier_domain_ht_destroy();
+
        DBG("Cleaning up all agent apps");
        agent_app_ht_clean();
        DBG("Closing all UST sockets");
@@ -694,6 +701,23 @@ static int set_option(int opt, const char *arg, const char *optname)
                                ret = -ENOMEM;
                        }
                }
+       } else if (string_match(optname, "event-notifier-error-number-of-bucket")) {
+               unsigned long v;
+
+               errno = 0;
+               v = strtoul(arg, NULL, 0);
+               if (errno != 0 || !isdigit(arg[0])) {
+                       ERR("Wrong value in --event-notifier-error-number-of-bucket parameter: %s", arg);
+                       return -1;
+               }
+               if (v == 0 || v >= EVENT_NOTIFIER_ERROR_COUNTER_NUMBER_OF_BUCKET_MAX) {
+                       ERR("Value out of range for --event-notifier-error-number-of-bucket parameter: %s", arg);
+                       return -1;
+               }
+               config.event_notifier_error_counter_bucket = (int) v;
+               DBG3("Number of event notifier error counter set to non default: %i",
+                               config.event_notifier_error_counter_bucket);
+               goto end;
        } else if (string_match(optname, "config") || opt == 'f') {
                /* This is handled in set_options() thus silent skip. */
                goto end;
@@ -1254,6 +1278,63 @@ static void destroy_all_sessions_and_wait(void)
        DBG("Destruction of all sessions completed");
 }
 
+static void unregister_all_triggers(void)
+{
+       enum lttng_error_code ret_code;
+       enum lttng_trigger_status trigger_status;
+       struct lttng_triggers *triggers = NULL;
+       unsigned int trigger_count, i;
+       const struct lttng_credentials creds = {
+                       .uid = LTTNG_OPTIONAL_INIT_VALUE(0),
+       };
+
+       DBG("Unregistering all triggers");
+
+       /*
+        * List all triggers as "root" since we wish to unregister all triggers.
+        */
+       ret_code = notification_thread_command_list_triggers(
+                       notification_thread_handle, creds.uid.value, &triggers);
+       if (ret_code != LTTNG_OK) {
+               ERR("Failed to list triggers while unregistering all triggers");
+               goto end;
+       }
+
+       trigger_status = lttng_triggers_get_count(triggers, &trigger_count);
+       assert(trigger_status == LTTNG_TRIGGER_STATUS_OK);
+
+       for (i = 0; i < trigger_count; i++) {
+               enum lttng_error_code ret_code;
+               uid_t trigger_owner;
+               const char *trigger_name;
+               const struct lttng_trigger *trigger =
+                               lttng_triggers_get_at_index(triggers, i);
+
+               assert(trigger);
+
+               trigger_status = lttng_trigger_get_owner_uid(
+                               trigger, &trigger_owner);
+               assert(trigger_status == LTTNG_TRIGGER_STATUS_OK);
+
+               trigger_status = lttng_trigger_get_name(trigger, &trigger_name);
+               assert(trigger_status == LTTNG_TRIGGER_STATUS_OK);
+
+               DBG("Unregistering trigger: trigger owner uid = %d, trigger name = '%s'",
+                               (int) trigger_owner, trigger_name);
+
+               ret_code = cmd_unregister_trigger(
+                               &creds, trigger, notification_thread_handle);
+               if (ret_code != LTTNG_OK) {
+                       ERR("Failed to unregister trigger: trigger owner uid = %d, trigger name = '%s', error: '%s'",
+                                       (int) trigger_owner, trigger_name,
+                                       lttng_strerror(-ret_code));
+                       /* Continue to unregister the remaining triggers. */
+               }
+       }
+end:
+       lttng_triggers_destroy(triggers);
+}
+
 static int run_as_worker_post_fork_cleanup(void *data)
 {
        struct sessiond_config *sessiond_config = data;
@@ -1302,6 +1383,7 @@ int main(int argc, char **argv)
        struct lttng_thread *notification_thread = NULL;
        struct lttng_thread *register_apps_thread = NULL;
 
+       logger_set_thread_name("Main", false);
        init_kernel_workarounds();
 
        rcu_register_thread();
@@ -1528,6 +1610,8 @@ int main(int argc, char **argv)
                goto stop_threads;
        }
 
+       event_notifier_error_accounting_init(config.event_notifier_error_counter_bucket);
+
        /*
         * Initialize agent app hash table. We allocate the hash table here
         * since cleanup() can get called after this point.
@@ -1538,6 +1622,11 @@ int main(int argc, char **argv)
                goto stop_threads;
        }
 
+       if (agent_by_event_notifier_domain_ht_create()) {
+               ERR("Failed to allocate per-event notifier domain agent hash table");
+               retval = -1;
+               goto stop_threads;
+       }
        /*
         * These actions must be executed as root. We do that *after* setting up
         * the sockets path because we MUST make the check for another daemon using
@@ -1725,6 +1814,18 @@ int main(int argc, char **argv)
                        retval = -1;
                        goto stop_threads;
                }
+
+               if (kernel_get_notification_fd() >= 0) {
+                       ret = notification_thread_command_add_tracer_event_source(
+                                       notification_thread_handle,
+                                       kernel_get_notification_fd(),
+                                       LTTNG_DOMAIN_KERNEL);
+                       if (ret != LTTNG_OK) {
+                               ERR("Failed to add kernel trigger event source to notification thread");
+                               retval = -1;
+                               goto stop_threads;
+                       }
+               }
        }
 
        /* Load sessions. */
@@ -1748,6 +1849,7 @@ int main(int argc, char **argv)
        sessiond_wait_for_quit_pipe(-1);
 
 stop_threads:
+
        /*
         * Ensure that the client thread is no longer accepting new commands,
         * which could cause new sessions to be created.
@@ -1759,6 +1861,13 @@ stop_threads:
 
        destroy_all_sessions_and_wait();
 
+       /*
+        * At this point no new trigger can be registered (no sessions are
+        * running/rotating) and clients can't connect to the session daemon
+        * anymore. Unregister all triggers.
+        */
+       unregister_all_triggers();
+
        if (register_apps_thread) {
                lttng_thread_shutdown(register_apps_thread);
                lttng_thread_put(register_apps_thread);
@@ -1778,11 +1887,39 @@ stop_threads:
        rcu_thread_online();
        sessiond_cleanup();
 
+       /*
+        * Wait for all pending call_rcu work to complete before shutting down
+        * the notification thread. This call_rcu work includes shutting down
+        * UST apps and event notifier pipes.
+        */
+       rcu_barrier();
+
        if (notification_thread) {
                lttng_thread_shutdown(notification_thread);
                lttng_thread_put(notification_thread);
        }
 
+       /*
+        * Error accounting teardown has to be done after the teardown of all
+        * event notifier pipes to ensure that no tracer may try to use the
+        * error accounting facilities.
+        */
+       event_notifier_error_accounting_fini();
+
+       /*
+        * Unloading the kernel modules needs to be done after all kernel
+        * ressources have been released. In our case, this includes the
+        * notification fd, the event notifier group fd, error accounting fd,
+        * all event and event notifier fds, etc.
+        *
+        * In short, at this point, we need to have called close() on all fds
+        * received from the kernel tracer.
+        */
+       if (is_root && !config.no_kernel) {
+               DBG("Unloading kernel modules");
+               modprobe_remove_lttng_all();
+       }
+
        /*
         * Ensure all prior call_rcu are done. call_rcu callbacks may push
         * hash tables to the ht_cleanup thread. Therefore, we ensure that
This page took 0.026189 seconds and 4 git commands to generate.