sessiond: Implement UST event notifier error counter
authorFrancis Deslauriers <francis.deslauriers@efficios.com>
Wed, 14 Oct 2020 19:33:46 +0000 (15:33 -0400)
committerJérémie Galarneau <jeremie.galarneau@efficios.com>
Fri, 2 Apr 2021 15:51:15 +0000 (11:51 -0400)
Signed-off-by: Francis Deslauriers <francis.deslauriers@efficios.com>
Signed-off-by: Jérémie Galarneau <jeremie.galarneau@efficios.com>
Change-Id: I254b2cec39b8c61ad945376043b5b9053de98f6f

src/bin/lttng-sessiond/event-notifier-error-accounting.c
src/bin/lttng-sessiond/event-notifier-error-accounting.h
src/bin/lttng-sessiond/ust-app.c
src/bin/lttng-sessiond/ust-app.h

index 628ddcfbf7b72d15ae5890d97672c71744a8f0f5..6837fe8f34de7ece5b734b4aeeaf168f19f9ac3f 100644 (file)
 #include <sys/stat.h>
 #include <unistd.h>
 #include <urcu/compiler.h>
+#include <pthread.h>
 
 #include <common/error.h>
 #include <common/hashtable/hashtable.h>
 #include <common/index-allocator.h>
 #include <common/kernel-ctl/kernel-ctl.h>
+#include <common/shm.h>
 #include <lttng/trigger/trigger-internal.h>
 
 #include "event-notifier-error-accounting.h"
+#include "lttng-ust-error.h"
+#include "ust-app.h"
 
 #define ERROR_COUNTER_INDEX_HT_INITIAL_SIZE 16
 
@@ -27,6 +31,22 @@ struct index_ht_entry {
        struct rcu_head rcu_head;
 };
 
+struct error_account_entry {
+       struct lttng_ht_node_u64 node;
+       struct rcu_head rcu_head;
+       struct ustctl_daemon_counter *daemon_counter;
+       /*
+        * Those `lttng_ust_abi_object_data` are anonymous handles to the counters
+        * objects.
+        * They are only used to be duplicated for each new applications of the
+        * user. To destroy them, call with the `sock` parameter set to -1.
+        * e.g. `ustctl_release_object(-1, data)`;
+        */
+       struct lttng_ust_abi_object_data *counter;
+       struct lttng_ust_abi_object_data **cpu_counters;
+       int nr_counter_cpu_fds;
+};
+
 struct kernel_error_account_entry {
        int kernel_event_notifier_error_counter_fd;
 };
@@ -36,9 +56,34 @@ static struct kernel_error_account_entry kernel_error_accountant;
 /* Hashtable mapping event notifier token to index_ht_entry. */
 static struct lttng_ht *error_counter_indexes_ht;
 
+/* Hashtable mapping uid to error_account_entry. */
+static struct lttng_ht *error_counter_uid_ht;
+
 static uint64_t error_counter_size;
 static struct lttng_index_allocator *index_allocator;
 
+static inline void get_trigger_info_for_log(const struct lttng_trigger *trigger,
+               const char **trigger_name,
+               uid_t *trigger_owner_uid)
+{
+       enum lttng_trigger_status trigger_status;
+
+       trigger_status = lttng_trigger_get_name(trigger, trigger_name);
+       switch (trigger_status) {
+       case LTTNG_TRIGGER_STATUS_OK:
+               break;
+       case LTTNG_TRIGGER_STATUS_UNSET:
+               *trigger_name = "(unset)";
+               break;
+       default:
+               abort();
+       }
+
+       trigger_status = lttng_trigger_get_owner_uid(trigger,
+                       trigger_owner_uid);
+       assert(trigger_status == LTTNG_TRIGGER_STATUS_OK);
+}
+
 static inline
 const char *error_accounting_status_str(
                enum event_notifier_error_accounting_status status)
@@ -54,6 +99,8 @@ const char *error_accounting_status_str(
                return "NOMEM";
        case EVENT_NOTIFIER_ERROR_ACCOUNTING_STATUS_NO_INDEX_AVAILABLE:
                return "NO_INDEX_AVAILABLE";
+       case EVENT_NOTIFIER_ERROR_ACCOUNTING_STATUS_APP_DEAD:
+               return "APP_DEAD";
        default:
                abort();
        }
@@ -74,6 +121,15 @@ event_notifier_error_accounting_init(uint64_t nb_bucket)
        error_counter_indexes_ht = lttng_ht_new(
                        ERROR_COUNTER_INDEX_HT_INITIAL_SIZE, LTTNG_HT_TYPE_U64);
        if (!error_counter_indexes_ht) {
+               ERR("Failed to allocate error counter indices hash table");
+               status = EVENT_NOTIFIER_ERROR_ACCOUNTING_STATUS_NOMEM;
+               goto error_index_allocator;
+       }
+
+       error_counter_uid_ht = lttng_ht_new(
+                       ERROR_COUNTER_INDEX_HT_INITIAL_SIZE, LTTNG_HT_TYPE_U64);
+       if (!error_counter_uid_ht) {
+               ERR("Failed to allocate UID to error counter accountant hash table");
                status = EVENT_NOTIFIER_ERROR_ACCOUNTING_STATUS_NOMEM;
                goto error_index_allocator;
        }
@@ -111,6 +167,526 @@ enum event_notifier_error_accounting_status get_error_counter_index_for_token(
        return status;
 }
 
+#ifdef HAVE_LIBLTTNG_UST_CTL
+static
+struct error_account_entry *get_uid_accounting_entry(const struct ust_app *app)
+{
+       struct error_account_entry *entry;
+       struct lttng_ht_node_u64 *node;
+       struct lttng_ht_iter iter;
+       uint64_t key = app->uid;
+
+       lttng_ht_lookup(error_counter_uid_ht, &key, &iter);
+       node = lttng_ht_iter_get_node_u64(&iter);
+       if(node == NULL) {
+               entry = NULL;
+       } else {
+               entry = caa_container_of(node, struct error_account_entry, node);
+       }
+
+       return entry;
+}
+
+static
+struct error_account_entry *create_uid_accounting_entry(
+               const struct ust_app *app)
+{
+       int i, ret;
+       struct ustctl_daemon_counter *daemon_counter;
+       struct lttng_ust_abi_object_data *counter, **cpu_counters;
+       int *cpu_counter_fds = NULL;
+       struct error_account_entry *entry = NULL;
+       const struct ustctl_counter_dimension dimension = {
+               .size = error_counter_size,
+               .has_underflow = false,
+               .has_overflow = false,
+       };
+
+       entry = zmalloc(sizeof(struct error_account_entry));
+       if (!entry) {
+               PERROR("Failed to allocate event notifier error acounting entry")
+               goto error;
+       }
+
+       entry->nr_counter_cpu_fds = ustctl_get_nr_cpu_per_counter();
+       cpu_counter_fds = zmalloc(entry->nr_counter_cpu_fds * sizeof(*cpu_counter_fds));
+       if (!cpu_counter_fds) {
+               PERROR("Failed to allocate event notifier error counter file descriptors array: application uid = %d, application name = '%s', pid = %d, allocation size = %zu",
+                               (int) app->uid, app->name, (int) app->pid,
+                               entry->nr_counter_cpu_fds * sizeof(*cpu_counter_fds));
+               ret = -1;
+               goto error_counter_cpu_fds_alloc;
+       }
+
+       /* Initialize to an invalid fd value to closes fds in case of error. */
+       for (i = 0; i < entry->nr_counter_cpu_fds; i++) {
+               cpu_counter_fds[i] = -1;
+       }
+
+       cpu_counters = zmalloc(entry->nr_counter_cpu_fds * sizeof(**cpu_counters));
+       if (!cpu_counters) {
+               PERROR("Failed to allocate event notifier error counter lttng_ust_abi_object_data array: application uid = %d, application name = '%s', pid = %d, allocation size = %zu",
+                               (int) app->uid, app->name, (int) app->pid,
+                               entry->nr_counter_cpu_fds * sizeof(**cpu_counters));
+               ret = -1;
+               goto error_counter_cpus_alloc;
+       }
+
+       for (i = 0; i < entry->nr_counter_cpu_fds; i++) {
+               cpu_counter_fds[i] = shm_create_anonymous("event-notifier-error-accounting");
+               if (cpu_counter_fds[i] == -1) {
+                       ERR("Failed to create event notifier error accounting shared memory for application user: application uid = %d, pid = %d, application name = '%s'",
+                                       (int) app->uid, (int) app->pid, app->name);
+                       goto error_shm_alloc;
+               }
+       }
+
+       /*
+        * Ownership of the file descriptors transferred to the ustctl object.
+        */
+       daemon_counter = ustctl_create_counter(1, &dimension, 0, -1,
+                       entry->nr_counter_cpu_fds, cpu_counter_fds,
+                       USTCTL_COUNTER_BITNESS_32,
+                       USTCTL_COUNTER_ARITHMETIC_MODULAR,
+                       USTCTL_COUNTER_ALLOC_PER_CPU,
+                       false);
+       if (!daemon_counter) {
+               goto error_create_daemon_counter;
+       }
+
+       ret = ustctl_create_counter_data(daemon_counter, &counter);
+       if (ret) {
+               ERR("Failed to create userspace tracer counter data for application user: uid = %d, pid = %d, application name = '%s'",
+                               (int) app->uid, (int) app->pid, app->name);
+               goto error_create_counter_data;
+       }
+
+       for (i = 0; i < entry->nr_counter_cpu_fds; i++) {
+               ret = ustctl_create_counter_cpu_data(daemon_counter, i,
+                               &cpu_counters[i]);
+               if (ret) {
+                       ERR("Failed to create userspace tracer counter cpu data for application user: uid = %d, pid = %d, application name = '%s'",
+                                       (int) app->uid, (int) app->pid,
+                                       app->name);
+                       goto error_create_counter_cpu_data;
+               }
+       }
+
+       entry->daemon_counter = daemon_counter;
+       entry->counter = counter;
+       entry->cpu_counters = cpu_counters;
+
+       lttng_ht_node_init_u64(&entry->node, app->uid);
+       lttng_ht_add_unique_u64(error_counter_uid_ht, &entry->node);
+
+       goto end;
+
+error_create_counter_cpu_data:
+       /* Teardown any allocated cpu counters. */
+       for (i = 0; i < entry->nr_counter_cpu_fds; i++) {
+               if (!cpu_counters[i]) {
+                       /*
+                        * Early-exit when error occurred before all cpu
+                        * counters could be initialized.
+                        */
+                       break;
+               }
+
+               ustctl_release_object(-1, cpu_counters[i]);
+               free(cpu_counters[i]);
+       }
+
+       ustctl_release_object(-1, entry->counter);
+       free(entry->counter);
+error_create_counter_data:
+       ustctl_destroy_counter(daemon_counter);
+error_create_daemon_counter:
+error_shm_alloc:
+       /* Error occured before per-cpu SHMs were handed-off to ustctl. */
+       if (cpu_counter_fds) {
+               for (i = 0; i < entry->nr_counter_cpu_fds; i++) {
+                       if (cpu_counter_fds[i] < 0) {
+                               /*
+                                * Early-exit when error occurred before all cpu
+                                * counter shm fds could be initialized.
+                                */
+                               break;
+                       }
+
+                       ret = close(cpu_counter_fds[i]);
+                       if (ret) {
+                               PERROR("Failed to close error counter per-CPU shm file descriptor: fd = %d", cpu_counter_fds[i]);
+                       }
+               }
+       }
+
+       free(cpu_counters);
+error_counter_cpus_alloc:
+error_counter_cpu_fds_alloc:
+       free(entry);
+error:
+       entry = NULL;
+end:
+       free(cpu_counter_fds);
+       return entry;
+}
+
+static
+enum event_notifier_error_accounting_status send_counter_data_to_ust(
+               struct ust_app *app,
+               struct lttng_ust_abi_object_data *new_counter)
+{
+       int ret;
+       enum event_notifier_error_accounting_status status;
+
+       /* Attach counter to trigger group. */
+       pthread_mutex_lock(&app->sock_lock);
+       ret = ustctl_send_counter_data_to_ust(app->sock,
+                       app->event_notifier_group.object->handle, new_counter);
+       pthread_mutex_unlock(&app->sock_lock);
+       if (ret < 0) {
+               if (ret != -EPIPE && ret != -LTTNG_UST_ERR_EXITING) {
+                       ERR("Failed to send counter data to application: application name = '%s', pid = %d, ret = %d",
+                                       app->name, app->pid, ret);
+                       status = EVENT_NOTIFIER_ERROR_ACCOUNTING_STATUS_ERR;
+               } else {
+                       DBG3("Failed to send counter data to application (application is dead): application name = '%s', pid = %d, ret = %d",
+                                       app->name, app->pid, ret);
+                       status = EVENT_NOTIFIER_ERROR_ACCOUNTING_STATUS_APP_DEAD;
+               }
+
+               goto end;
+       }
+
+       status = EVENT_NOTIFIER_ERROR_ACCOUNTING_STATUS_OK;
+end:
+       return status;
+}
+
+static
+enum event_notifier_error_accounting_status send_counter_cpu_data_to_ust(
+               struct ust_app *app,
+               struct lttng_ust_abi_object_data *counter,
+               struct lttng_ust_abi_object_data *counter_cpu)
+{
+       int ret;
+       enum event_notifier_error_accounting_status status;
+
+       pthread_mutex_lock(&app->sock_lock);
+       ret = ustctl_send_counter_cpu_data_to_ust(app->sock,
+                       counter, counter_cpu);
+       pthread_mutex_unlock(&app->sock_lock);
+       if (ret < 0) {
+               if (ret != -EPIPE && ret != -LTTNG_UST_ERR_EXITING) {
+                       ERR("Failed to send counter CPU data to application: application name = '%s', pid = %d, ret = %d",
+                                       app->name, app->pid, ret);
+                       status = EVENT_NOTIFIER_ERROR_ACCOUNTING_STATUS_ERR;
+               } else {
+                       DBG3("Failed to send counter CPU data to application: application name = '%s', pid = %d, ret = %d",
+                                       app->name, app->pid, ret);
+                       status = EVENT_NOTIFIER_ERROR_ACCOUNTING_STATUS_APP_DEAD;
+               }
+
+               goto end;
+       }
+
+       status = EVENT_NOTIFIER_ERROR_ACCOUNTING_STATUS_OK;
+end:
+       return status;
+}
+
+enum event_notifier_error_accounting_status
+event_notifier_error_accounting_register_app(struct ust_app *app)
+{
+       int ret;
+       uint64_t i;
+       struct lttng_ust_abi_object_data *new_counter;
+       struct error_account_entry *entry;
+       enum event_notifier_error_accounting_status status;
+       struct lttng_ust_abi_object_data **cpu_counters;
+
+       /*
+        * Check if we already have a error counter for the user id of this
+        * app. If not, create one.
+        */
+       rcu_read_lock();
+       entry = get_uid_accounting_entry(app);
+       if (entry == NULL) {
+               entry = create_uid_accounting_entry(app);
+               if (!entry) {
+                       status = EVENT_NOTIFIER_ERROR_ACCOUNTING_STATUS_ERR;
+                       goto end;
+               }
+       }
+
+       /* Duplicate counter object data. */
+       ret = ustctl_duplicate_ust_object_data(&new_counter,
+                       entry->counter);
+       if (ret) {
+               ERR("Failed to duplicate event notifier error accounting counter for application user: application uid = %d, pid = %d, application name = '%s'",
+                               (int) app->uid, (int) app->pid, app->name);
+               status = EVENT_NOTIFIER_ERROR_ACCOUNTING_STATUS_ERR;
+               goto end;
+       }
+
+       status = send_counter_data_to_ust(app, new_counter);
+       if (status != EVENT_NOTIFIER_ERROR_ACCOUNTING_STATUS_OK) {
+               ERR("Failed to send counter data to application tracer: status = %s, application uid = %d, pid = %d, application name = '%s'",
+                               error_accounting_status_str(status),
+                               (int) app->uid, (int) app->pid, app->name);
+               status = EVENT_NOTIFIER_ERROR_ACCOUNTING_STATUS_ERR;
+               goto error_send_counter_data;
+       }
+
+       cpu_counters = zmalloc(entry->nr_counter_cpu_fds * sizeof(struct lttng_ust_abi_object_data));
+       if (!cpu_counters) {
+               PERROR("Failed to allocate event notifier error counter lttng_ust_abi_object_data array: application uid = %d, application name = '%s', pid = %d, allocation size = %zu",
+                               (int) app->uid, app->name, (int) app->pid,
+                               entry->nr_counter_cpu_fds * sizeof(**cpu_counters));
+               status = EVENT_NOTIFIER_ERROR_ACCOUNTING_STATUS_NOMEM;
+               goto error_allocate_cpu_counters;
+       }
+
+       for (i = 0; i < entry->nr_counter_cpu_fds; i++) {
+               struct lttng_ust_abi_object_data *new_counter_cpu = NULL;
+
+               ret = ustctl_duplicate_ust_object_data(&new_counter_cpu,
+                               entry->cpu_counters[i]);
+               if (ret) {
+                       ERR("Failed to duplicate userspace tracer counter cpu data for application user: uid = %d, pid = %d, application name = '%s'",
+                                       (int) app->uid, (int) app->pid,
+                                       app->name);
+                       status = EVENT_NOTIFIER_ERROR_ACCOUNTING_STATUS_NOMEM;
+                       goto error_duplicate_cpu_counter;
+               }
+
+               cpu_counters[i] = new_counter_cpu;
+
+               status = send_counter_cpu_data_to_ust(app, new_counter,
+                               new_counter_cpu);
+               if (status != EVENT_NOTIFIER_ERROR_ACCOUNTING_STATUS_OK) {
+                       ERR("Failed to send counter cpu data to application tracer: status = %s, application uid = %d, pid = %d, application name = '%s'",
+                                       error_accounting_status_str(status),
+                                       (int) app->uid, (int) app->pid,
+                                       app->name);
+                       status = EVENT_NOTIFIER_ERROR_ACCOUNTING_STATUS_ERR;
+                       goto error_send_cpu_counter_data;
+               }
+       }
+
+       app->event_notifier_group.counter = new_counter;
+       new_counter = NULL;
+       app->event_notifier_group.nr_counter_cpu = entry->nr_counter_cpu_fds;
+       app->event_notifier_group.counter_cpu = cpu_counters;
+       cpu_counters = NULL;
+       goto end;
+
+error_allocate_cpu_counters:
+error_send_cpu_counter_data:
+error_duplicate_cpu_counter:
+       /* Teardown any duplicated cpu counters. */
+       for (i = 0; i < entry->nr_counter_cpu_fds; i++) {
+               if (!cpu_counters[i]) {
+                       /*
+                        * Early-exit when error occurred before all cpu
+                        * counters could be initialized.
+                        */
+                       break;
+               }
+
+               ustctl_release_object(-1, cpu_counters[i]);
+               free(cpu_counters[i]);
+       }
+
+error_send_counter_data:
+       ustctl_release_object(-1, new_counter);
+       free(new_counter);
+end:
+       rcu_read_unlock();
+       return status;
+}
+
+enum event_notifier_error_accounting_status
+event_notifier_error_accounting_unregister_app(struct ust_app *app)
+{
+       enum event_notifier_error_accounting_status status;
+       struct error_account_entry *entry;
+       int i;
+
+       rcu_read_lock();
+       entry = get_uid_accounting_entry(app);
+       if (entry == NULL) {
+               ERR("Failed to find event notitifier error accounting entry on application teardown: pid = %d, application name = '%s'",
+                               app->pid, app->name);
+               status = EVENT_NOTIFIER_ERROR_ACCOUNTING_STATUS_ERR;
+               goto end;
+       }
+
+       for (i = 0; i < app->event_notifier_group.nr_counter_cpu; i++) {
+               ustctl_release_object(app->sock,
+                               app->event_notifier_group.counter_cpu[i]);
+               free(app->event_notifier_group.counter_cpu[i]);
+       }
+
+       free(app->event_notifier_group.counter_cpu);
+
+       ustctl_release_object(app->sock, app->event_notifier_group.counter);
+       free(app->event_notifier_group.counter);
+
+       status = EVENT_NOTIFIER_ERROR_ACCOUNTING_STATUS_OK;
+end:
+       rcu_read_unlock();
+       return status;
+}
+
+static
+enum event_notifier_error_accounting_status
+event_notifier_error_accounting_ust_get_count(
+               const struct lttng_trigger *trigger, uint64_t *count)
+{
+       struct lttng_ht_iter iter;
+       struct error_account_entry *uid_entry;
+       uint64_t error_counter_index, global_sum = 0;
+       enum event_notifier_error_accounting_status status;
+       size_t dimension_indexes[1];
+       const uint64_t tracer_token = lttng_trigger_get_tracer_token(trigger);
+
+       /*
+        * Go over all error counters (ignoring uid) as a trigger (and trigger
+        * errors) can be generated from any applications that this session
+        * daemon is managing.
+        */
+
+       rcu_read_lock();
+
+       status = get_error_counter_index_for_token(
+                       tracer_token,
+                       &error_counter_index);
+       if (status != EVENT_NOTIFIER_ERROR_ACCOUNTING_STATUS_OK) {
+               uid_t trigger_owner_uid;
+               const char *trigger_name;
+
+               get_trigger_info_for_log(trigger, &trigger_name,
+                                        &trigger_owner_uid);
+
+               ERR("Failed to retrieve index for tracer token: token = %" PRIu64 ", trigger name = '%s', trigger owner uid = %d, status = %s",
+                               tracer_token, trigger_name,
+                               (int) trigger_owner_uid,
+                               error_accounting_status_str(status));
+               goto end;
+       }
+
+       dimension_indexes[0] = error_counter_index;
+
+       cds_lfht_for_each_entry(error_counter_uid_ht->ht, &iter.iter,
+                       uid_entry, node.node) {
+               int ret;
+               int64_t local_value = 0;
+               bool overflow = false, underflow = false;
+
+               ret = ustctl_counter_aggregate(uid_entry->daemon_counter,
+                               dimension_indexes, &local_value, &overflow,
+                               &underflow);
+               if (ret || local_value < 0) {
+                       uid_t trigger_owner_uid;
+                       const char *trigger_name;
+
+                       get_trigger_info_for_log(trigger, &trigger_name,
+                                       &trigger_owner_uid);
+
+                       if (ret) {
+                               ERR("Failed to aggregate event notifier error counter values of trigger: trigger name = '%s', trigger owner uid = %d",
+                                               trigger_name,
+                                               (int) trigger_owner_uid);
+                       } else if (local_value < 0) {
+                               ERR("Negative event notifier error counter value encountered during aggregation: trigger name = '%s', trigger owner uid = %d, value = %" PRId64,
+                                               trigger_name,
+                                               (int) trigger_owner_uid,
+                                               local_value);
+                       } else {
+                               abort();
+                       }
+
+                       status = EVENT_NOTIFIER_ERROR_ACCOUNTING_STATUS_ERR;
+                       goto end;
+               }
+
+               /* Cast is safe as negative values are checked-for above. */
+               global_sum += (uint64_t) local_value;
+
+       }
+
+       *count = global_sum;
+       status = EVENT_NOTIFIER_ERROR_ACCOUNTING_STATUS_OK;
+
+end:
+       rcu_read_unlock();
+       return status;
+}
+
+static
+enum event_notifier_error_accounting_status event_notifier_error_accounting_ust_clear(
+               const struct lttng_trigger *trigger)
+{
+       struct lttng_ht_iter iter;
+       struct error_account_entry *uid_entry;
+       uint64_t error_counter_index;
+       enum event_notifier_error_accounting_status status;
+       size_t dimension_index;
+       const uint64_t tracer_token = lttng_trigger_get_tracer_token(trigger);
+
+       /*
+        * Go over all error counters (ignoring uid) as a trigger (and trigger
+        * errors) can be generated from any applications that this session
+        * daemon is managing.
+        */
+
+       rcu_read_lock();
+       status = get_error_counter_index_for_token(
+                       tracer_token,
+                       &error_counter_index);
+       if (status != EVENT_NOTIFIER_ERROR_ACCOUNTING_STATUS_OK) {
+               uid_t trigger_owner_uid;
+               const char *trigger_name;
+
+               get_trigger_info_for_log(trigger, &trigger_name,
+                                        &trigger_owner_uid);
+
+               ERR("Failed to retrieve index for tracer token: token = %" PRIu64 ", trigger name = '%s', trigger owner uid = %d, status = %s",
+                               tracer_token, trigger_name,
+                               (int) trigger_owner_uid,
+                               error_accounting_status_str(status));
+               goto end;
+       }
+
+       dimension_index = error_counter_index;
+
+       cds_lfht_for_each_entry(error_counter_uid_ht->ht, &iter.iter,
+                       uid_entry, node.node) {
+               const int ret = ustctl_counter_clear(uid_entry->daemon_counter,
+                               &dimension_index);
+
+               if (ret) {
+                       uid_t trigger_owner_uid;
+                       const char *trigger_name;
+
+                       get_trigger_info_for_log(trigger, &trigger_name,
+                                                &trigger_owner_uid);
+                       ERR("Failed to clear event notifier counter value for trigger: counter uid = %d, trigger name = '%s', trigger owner uid = %d",
+                                       (int) uid_entry->node.key, trigger_name,
+                                       (int) trigger_owner_uid);
+                       status = EVENT_NOTIFIER_ERROR_ACCOUNTING_STATUS_ERR;
+                       goto end;
+               }
+       }
+
+       status = EVENT_NOTIFIER_ERROR_ACCOUNTING_STATUS_OK;
+end:
+       rcu_read_unlock();
+       return status;
+}
+#endif /* HAVE_LIBLTTNG_UST_CTL */
+
 static
 enum event_notifier_error_accounting_status
 event_notifier_error_accounting_kernel_clear(
@@ -127,18 +703,13 @@ event_notifier_error_accounting_kernel_clear(
        if (status != EVENT_NOTIFIER_ERROR_ACCOUNTING_STATUS_OK) {
                uid_t trigger_owner_uid;
                const char *trigger_name;
-               const enum lttng_trigger_status trigger_status =
-                               lttng_trigger_get_owner_uid(
-                                               trigger, &trigger_owner_uid);
-
-               assert(trigger_status == LTTNG_TRIGGER_STATUS_OK);
-               if (lttng_trigger_get_name(trigger, &trigger_name) !=
-                               LTTNG_TRIGGER_STATUS_OK) {
-                       trigger_name = "(unnamed)";
-               }
 
-               ERR("Failed to get event notifier error counter index: trigger owner uid = %d, trigger name = '%s'",
-                               trigger_owner_uid, trigger_name);
+               get_trigger_info_for_log(
+                               trigger, &trigger_name, &trigger_owner_uid);
+
+               ERR("Failed to get event notifier error counter index: trigger owner uid = %d, trigger name = '%s', status = '%s'",
+                               trigger_owner_uid, trigger_name,
+                               error_accounting_status_str(status));
                goto end;
        }
 
@@ -149,7 +720,14 @@ event_notifier_error_accounting_kernel_clear(
                        kernel_error_accountant.kernel_event_notifier_error_counter_fd,
                        &counter_clear);
        if (ret) {
-               ERR("Failed to clear event notifier error counter");
+               uid_t trigger_owner_uid;
+               const char *trigger_name;
+
+               get_trigger_info_for_log(
+                               trigger, &trigger_name, &trigger_owner_uid);
+
+               ERR("Failed to clear kernel event notifier error counter: trigger owner uid = %d, trigger name = '%s'",
+                               trigger_owner_uid, trigger_name);
                status = EVENT_NOTIFIER_ERROR_ACCOUNTING_STATUS_ERR;
                goto end;
        }
@@ -274,23 +852,21 @@ event_notifier_error_accounting_register_event_notifier(
        {
                uid_t trigger_owner_uid;
                const char *trigger_name;
-               const enum lttng_trigger_status trigger_status =
-                               lttng_trigger_get_owner_uid(
-                                               trigger, &trigger_owner_uid);
-
-               assert(trigger_status == LTTNG_TRIGGER_STATUS_OK);
-               if (lttng_trigger_get_name(trigger, &trigger_name) !=
-                               LTTNG_TRIGGER_STATUS_OK) {
-                       trigger_name = "(unnamed)";
-               }
 
-               DBG("Event notifier error counter index not found for tracer token (allocating a new one): trigger owner = %d, trigger name = '%s', tracer token = %" PRIu64,
-                               trigger_owner_uid, trigger_name,
+               get_trigger_info_for_log(
+                               trigger, &trigger_name, &trigger_owner_uid);
+
+               DBG("Event notifier error counter index not found for tracer token (allocating a new one): trigger name = '%s', trigger owner uid = %d, tracer token = %" PRIu64,
+                               trigger_name, trigger_owner_uid,
                                lttng_trigger_get_tracer_token(trigger));
+
                status = create_error_counter_index_for_token(
                                lttng_trigger_get_tracer_token(trigger),
                                &local_error_counter_index);
                if (status != EVENT_NOTIFIER_ERROR_ACCOUNTING_STATUS_OK) {
+                       ERR("Error creating index for token: status = %s, trigger name = '%s', trigger owner uid = %d",
+                                       error_accounting_status_str(status),
+                                       trigger_name, trigger_owner_uid);
                        goto end;
                }
                /* fall-through. */
@@ -320,6 +896,8 @@ event_notifier_error_accounting_kernel_get_count(
        status = get_error_counter_index_for_token(
                        lttng_trigger_get_tracer_token(trigger), &error_counter_index);
        if (status != EVENT_NOTIFIER_ERROR_ACCOUNTING_STATUS_OK) {
+               ERR("Error getting index for token: status=%s",
+                               error_accounting_status_str(status));
                goto end;
        }
 
@@ -334,15 +912,9 @@ event_notifier_error_accounting_kernel_get_count(
        if (ret || counter_aggregate.value.value < 0) {
                uid_t trigger_owner_uid;
                const char *trigger_name;
-               const enum lttng_trigger_status trigger_status =
-                               lttng_trigger_get_owner_uid(
-                                               trigger, &trigger_owner_uid);
-
-               assert(trigger_status == LTTNG_TRIGGER_STATUS_OK);
-               if (lttng_trigger_get_name(trigger, &trigger_name) !=
-                               LTTNG_TRIGGER_STATUS_OK) {
-                       trigger_name = "(unnamed)";
-               }
+
+               get_trigger_info_for_log(trigger, &trigger_name,
+                               &trigger_owner_uid);
 
                if (counter_aggregate.value.value < 0) {
                        ERR("Invalid negative event notifier error counter value: trigger owner = %d, trigger name = '%s', value = %" PRId64,
@@ -376,7 +948,11 @@ event_notifier_error_accounting_get_count(
                return event_notifier_error_accounting_kernel_get_count(
                                trigger, count);
        case LTTNG_DOMAIN_UST:
+#ifdef HAVE_LIBLTTNG_UST_CTL
+               return event_notifier_error_accounting_ust_get_count(trigger, count);
+#else
                return EVENT_NOTIFIER_ERROR_ACCOUNTING_STATUS_OK;
+#endif /* HAVE_LIBLTTNG_UST_CTL */
        default:
                abort();
        }
@@ -390,7 +966,11 @@ event_notifier_error_accounting_clear(const struct lttng_trigger *trigger)
        case LTTNG_DOMAIN_KERNEL:
                return event_notifier_error_accounting_kernel_clear(trigger);
        case LTTNG_DOMAIN_UST:
+#ifdef HAVE_LIBLTTNG_UST_CTL
+               return event_notifier_error_accounting_ust_clear(trigger);
+#else
                return EVENT_NOTIFIER_ERROR_ACCOUNTING_STATUS_OK;
+#endif /* HAVE_LIBLTTNG_UST_CTL */
        default:
                abort();
        }
@@ -409,27 +989,38 @@ void event_notifier_error_accounting_unregister_event_notifier(
 {
        struct lttng_ht_iter iter;
        struct lttng_ht_node_u64 *node;
-       struct index_ht_entry *index_entry;
+       const uint64_t tracer_token = lttng_trigger_get_tracer_token(trigger);
        enum event_notifier_error_accounting_status status;
-       enum lttng_index_allocator_status index_alloc_status;
-       uint64_t tracer_token = lttng_trigger_get_tracer_token(trigger);
 
        status = event_notifier_error_accounting_clear(trigger);
        if (status != EVENT_NOTIFIER_ERROR_ACCOUNTING_STATUS_OK) {
-               ERR("Failed to clear event notifier error counter index");
+               /* Trigger details already logged by callee on error. */
+               ERR("Failed to clear event notifier error counter during unregistration of event notifier: status = '%s'",
+                               error_accounting_status_str(status));
        }
 
        rcu_read_lock();
        lttng_ht_lookup(error_counter_indexes_ht, &tracer_token, &iter);
        node = lttng_ht_iter_get_node_u64(&iter);
-       if(node) {
-               index_entry = caa_container_of(node, struct index_ht_entry, node);
+       if (node) {
+               struct index_ht_entry *index_entry = caa_container_of(
+                               node, typeof(*index_entry), node);
+               enum lttng_index_allocator_status index_alloc_status;
+
                index_alloc_status = lttng_index_allocator_release(
                                index_allocator,
                                index_entry->error_counter_index);
                if (index_alloc_status != LTTNG_INDEX_ALLOCATOR_STATUS_OK) {
-                       ERR("Failed to release event notifier error counter index: index = %" PRIu64,
-                                       index_entry->error_counter_index);
+                       uid_t trigger_owner_uid;
+                       const char *trigger_name;
+
+                       get_trigger_info_for_log(trigger, &trigger_name,
+                                       &trigger_owner_uid);
+
+                       ERR("Failed to release event notifier error counter index: index = %" PRIu64 ", trigger name = '%s', trigger owner uid = %d",
+                                       index_entry->error_counter_index,
+                                       trigger_name, (int) trigger_owner_uid);
+                       /* Don't exit, perform the rest of the clean-up. */
                }
 
                lttng_ht_del(error_counter_indexes_ht, &iter);
@@ -439,8 +1030,37 @@ void event_notifier_error_accounting_unregister_event_notifier(
        rcu_read_unlock();
 }
 
+#ifdef HAVE_LIBLTTNG_UST_CTL
+static void free_error_account_entry(struct rcu_head *head)
+{
+       int i;
+       struct error_account_entry *entry =
+                       caa_container_of(head, typeof(*entry), rcu_head);
+
+       for (i = 0; i < entry->nr_counter_cpu_fds; i++) {
+               ustctl_release_object(-1, entry->cpu_counters[i]);
+               free(entry->cpu_counters[i]);
+       }
+
+       free(entry->cpu_counters);
+
+       ustctl_release_object(-1, entry->counter);
+       free(entry->counter);
+
+       ustctl_destroy_counter(entry->daemon_counter);
+
+       free(entry);
+}
+#else
+/* Not called without UST support. */
+static void free_error_account_entry(struct rcu_head *head) {}
+#endif /* HAVE_LIBLTTNG_UST_CTL */
+
 void event_notifier_error_accounting_fini(void)
 {
+       struct lttng_ht_iter iter;
+       struct error_account_entry *uid_entry;
+
        lttng_index_allocator_destroy(index_allocator);
 
        if (kernel_error_accountant.kernel_event_notifier_error_counter_fd) {
@@ -452,8 +1072,22 @@ void event_notifier_error_accounting_fini(void)
        }
 
        /*
-        * Will assert if some error counters were not released (an internal
-        * error).
+        * FIXME error account entries are not reference-counted and torn
+        * down on last use. They exist from the moment of their first use
+        * up until the teardown of the session daemon.
+        */
+       rcu_read_lock();
+       cds_lfht_for_each_entry(error_counter_uid_ht->ht, &iter.iter,
+                       uid_entry, node.node) {
+               cds_lfht_del(error_counter_uid_ht->ht, &uid_entry->node.node);
+               call_rcu(&uid_entry->rcu_head, free_error_account_entry);
+       }
+       rcu_read_unlock();
+       lttng_ht_destroy(error_counter_uid_ht);
+
+       /*
+        * Will assert if some error counter indices were not released (an
+        * internal error).
         */
        lttng_ht_destroy(error_counter_indexes_ht);
 }
index 1dea73948337feb6b19a6eb6ea8ae3558e19df86..889efffa3c3316d1619b14380540f0a3d824e243 100644 (file)
@@ -9,14 +9,18 @@
 #define _EVENT_NOTIFIER_ERROR_ACCOUNTING_H
 
 #include <stdint.h>
+
 #include <lttng/trigger/trigger.h>
 
+#include "ust-app.h"
+
 enum event_notifier_error_accounting_status {
        EVENT_NOTIFIER_ERROR_ACCOUNTING_STATUS_OK,
        EVENT_NOTIFIER_ERROR_ACCOUNTING_STATUS_ERR,
        EVENT_NOTIFIER_ERROR_ACCOUNTING_STATUS_NOT_FOUND,
        EVENT_NOTIFIER_ERROR_ACCOUNTING_STATUS_NOMEM,
        EVENT_NOTIFIER_ERROR_ACCOUNTING_STATUS_NO_INDEX_AVAILABLE,
+       EVENT_NOTIFIER_ERROR_ACCOUNTING_STATUS_APP_DEAD,
 };
 
 enum event_notifier_error_accounting_status
@@ -26,6 +30,28 @@ enum event_notifier_error_accounting_status
 event_notifier_error_accounting_register_kernel(
                int kernel_event_notifier_group_fd);
 
+#ifdef HAVE_LIBLTTNG_UST_CTL
+enum event_notifier_error_accounting_status
+event_notifier_error_accounting_register_app(struct ust_app *app);
+
+enum event_notifier_error_accounting_status
+event_notifier_error_accounting_unregister_app(struct ust_app *app);
+#else /* HAVE_LIBLTTNG_UST_CTL */
+static inline
+enum event_notifier_error_accounting_status
+event_notifier_error_accounting_register_app(struct ust_app *app)
+{
+       return EVENT_NOTIFIER_ERROR_ACCOUNTING_STATUS_OK;
+}
+
+static inline
+enum event_notifier_error_accounting_status
+event_notifier_error_accounting_unregister_app(struct ust_app *app)
+{
+       return EVENT_NOTIFIER_ERROR_ACCOUNTING_STATUS_OK;
+}
+#endif /* HAVE_LIBLTTNG_UST_CTL */
+
 enum event_notifier_error_accounting_status
 event_notifier_error_accounting_register_event_notifier(
                const struct lttng_trigger *trigger,
index 50d2f1932657e56f7863e5b23fd3ef8b48cad87d..d8546308deb2cc2fa77e7c1b698627626b881234 100644 (file)
@@ -7,11 +7,14 @@
  */
 
 #define _LGPL_SOURCE
+#include <errno.h>
+#include <fcntl.h>
 #include <inttypes.h>
 #include <pthread.h>
 #include <stdio.h>
 #include <stdlib.h>
 #include <string.h>
+#include <sys/mman.h>
 #include <sys/stat.h>
 #include <sys/types.h>
 #include <unistd.h>
@@ -32,6 +35,7 @@
 #include <common/sessiond-comm/sessiond-comm.h>
 
 #include "buffer-registry.h"
+#include "condition-internal.h"
 #include "fd-limit.h"
 #include "health-sessiond.h"
 #include "ust-app.h"
@@ -44,6 +48,8 @@
 #include "notification-thread-commands.h"
 #include "rotate.h"
 #include "event.h"
+#include "event-notifier-error-accounting.h"
+
 
 struct lttng_ht *ust_app_ht;
 struct lttng_ht *ust_app_ht_by_sock;
@@ -999,6 +1005,8 @@ void delete_ust_app(struct ust_app *app)
         */
        if (app->event_notifier_group.object) {
                enum lttng_error_code ret_code;
+               enum event_notifier_error_accounting_status status;
+
                const int event_notifier_read_fd = lttng_pipe_get_readfd(
                                app->event_notifier_group.event_pipe);
 
@@ -1009,6 +1017,11 @@ void delete_ust_app(struct ust_app *app)
                        ERR("Failed to remove application tracer event source from notification thread");
                }
 
+               status = event_notifier_error_accounting_unregister_app(app);
+               if (status != EVENT_NOTIFIER_ERROR_ACCOUNTING_STATUS_OK) {
+                       ERR("Error unregistering app from event notifier error accounting");
+               }
+
                ustctl_release_object(sock, app->event_notifier_group.object);
                free(app->event_notifier_group.object);
        }
@@ -2106,6 +2119,7 @@ static int create_ust_event_notifier(struct ust_app *app,
 
        init_ust_event_notifier_from_event_rule(event_rule, &event_notifier);
        event_notifier.event.token = ua_event_notifier_rule->token;
+       event_notifier.error_counter_index = ua_event_notifier_rule->error_counter_index;
 
        /* Create UST event notifier against the tracer. */
        pthread_mutex_lock(&app->sock_lock);
@@ -3674,12 +3688,12 @@ int create_ust_app_event_notifier_rule(struct lttng_trigger *trigger,
        DBG2("UST app create token event rule completed: app = '%s' (ppid: %d), token = %" PRIu64,
                        app->name, app->ppid, lttng_trigger_get_tracer_token(trigger));
 
-end:
-       return ret;
+       goto end;
 
 error:
        /* The RCU read side lock is already being held by the caller. */
        delete_ust_app_event_notifier_rule(-1, ua_event_notifier_rule, app);
+end:
        return ret;
 }
 
@@ -3994,6 +4008,7 @@ int ust_app_setup_event_notifier_group(struct ust_app *app)
        int event_pipe_write_fd;
        struct lttng_ust_abi_object_data *event_notifier_group = NULL;
        enum lttng_error_code lttng_ret;
+       enum event_notifier_error_accounting_status event_notifier_error_accounting_status;
 
        assert(app);
 
@@ -4042,6 +4057,14 @@ int ust_app_setup_event_notifier_group(struct ust_app *app)
 
        /* Assign handle only when the complete setup is valid. */
        app->event_notifier_group.object = event_notifier_group;
+
+       event_notifier_error_accounting_status = event_notifier_error_accounting_register_app(app);
+       if (event_notifier_error_accounting_status != EVENT_NOTIFIER_ERROR_ACCOUNTING_STATUS_OK) {
+               ERR("Failed to setup event notifier error accounting for app");
+               ret = -1;
+               goto error;
+       }
+
        return ret;
 
 error:
@@ -5962,6 +5985,20 @@ void ust_app_global_update_all_event_notifier_rules(void)
        rcu_read_unlock();
 }
 
+void ust_app_update_event_notifier_error_count(struct lttng_trigger *trigger)
+{
+       uint64_t error_count = 0;
+       enum event_notifier_error_accounting_status status;
+       struct lttng_condition *condition = lttng_trigger_get_condition(trigger);
+
+       status = event_notifier_error_accounting_get_count(trigger, &error_count);
+       if (status != EVENT_NOTIFIER_ERROR_ACCOUNTING_STATUS_OK) {
+               ERR("Error getting trigger error count.");
+       }
+
+       lttng_condition_on_event_set_error_count(condition, error_count);
+}
+
 /*
  * Add context to a specific channel for global UST domain.
  */
index 9b42d518d30f2e619ea7f89c5e4dc307454b37b7..7a4cf1537c9c577df02f13cd62e12f9bbbdac679 100644 (file)
@@ -11,6 +11,7 @@
 
 #include <stdint.h>
 
+#include <common/index-allocator.h>
 #include <common/uuid.h>
 
 #include "trace-ust.h"
@@ -114,6 +115,7 @@ struct ust_app_event {
 
 struct ust_app_event_notifier_rule {
        int enabled;
+       uint64_t error_counter_index;
        int handle;
        struct lttng_ust_abi_object_data *obj;
        /* Holds a strong reference. */
@@ -319,6 +321,9 @@ struct ust_app {
                 */
                struct lttng_ust_abi_object_data *object;
                struct lttng_pipe *event_pipe;
+               struct lttng_ust_abi_object_data *counter;
+               struct lttng_ust_abi_object_data **counter_cpu;
+               int nr_counter_cpu;
        } event_notifier_group;
        /*
         * Hashtable indexing the application's event notifier rule's
@@ -355,6 +360,8 @@ void ust_app_global_update_all(struct ltt_ust_session *usess);
 void ust_app_global_update_event_notifier_rules(struct ust_app *app);
 void ust_app_global_update_all_event_notifier_rules(void);
 
+void ust_app_update_event_notifier_error_count(struct lttng_trigger *trigger);
+
 void ust_app_clean_list(void);
 int ust_app_ht_alloc(void);
 struct ust_app *ust_app_find_by_pid(pid_t pid);
@@ -579,7 +586,12 @@ unsigned int ust_app_get_nb_stream(struct ltt_ust_session *usess)
 {
        return 0;
 }
-
+static inline
+void ust_app_update_event_notifier_error_count(
+               struct lttng_trigger *lttng_trigger)
+{
+       return;
+}
 static inline
 int ust_app_supported(void)
 {
This page took 0.058952 seconds and 4 git commands to generate.