From: Francis Deslauriers Date: Wed, 14 Oct 2020 19:33:46 +0000 (-0400) Subject: sessiond: Implement UST event notifier error counter X-Git-Tag: v2.13.0-rc1~174 X-Git-Url: https://git.lttng.org/?p=lttng-tools.git;a=commitdiff_plain;h=533a90fb2d8d6892b86261951658cab6150a8ed6 sessiond: Implement UST event notifier error counter Signed-off-by: Francis Deslauriers Signed-off-by: Jérémie Galarneau Change-Id: I254b2cec39b8c61ad945376043b5b9053de98f6f --- diff --git a/src/bin/lttng-sessiond/event-notifier-error-accounting.c b/src/bin/lttng-sessiond/event-notifier-error-accounting.c index 628ddcfbf..6837fe8f3 100644 --- a/src/bin/lttng-sessiond/event-notifier-error-accounting.c +++ b/src/bin/lttng-sessiond/event-notifier-error-accounting.c @@ -10,14 +10,18 @@ #include #include #include +#include #include #include #include #include +#include #include #include "event-notifier-error-accounting.h" +#include "lttng-ust-error.h" +#include "ust-app.h" #define ERROR_COUNTER_INDEX_HT_INITIAL_SIZE 16 @@ -27,6 +31,22 @@ struct index_ht_entry { struct rcu_head rcu_head; }; +struct error_account_entry { + struct lttng_ht_node_u64 node; + struct rcu_head rcu_head; + struct ustctl_daemon_counter *daemon_counter; + /* + * Those `lttng_ust_abi_object_data` are anonymous handles to the counters + * objects. + * They are only used to be duplicated for each new applications of the + * user. To destroy them, call with the `sock` parameter set to -1. + * e.g. `ustctl_release_object(-1, data)`; + */ + struct lttng_ust_abi_object_data *counter; + struct lttng_ust_abi_object_data **cpu_counters; + int nr_counter_cpu_fds; +}; + struct kernel_error_account_entry { int kernel_event_notifier_error_counter_fd; }; @@ -36,9 +56,34 @@ static struct kernel_error_account_entry kernel_error_accountant; /* Hashtable mapping event notifier token to index_ht_entry. */ static struct lttng_ht *error_counter_indexes_ht; +/* Hashtable mapping uid to error_account_entry. */ +static struct lttng_ht *error_counter_uid_ht; + static uint64_t error_counter_size; static struct lttng_index_allocator *index_allocator; +static inline void get_trigger_info_for_log(const struct lttng_trigger *trigger, + const char **trigger_name, + uid_t *trigger_owner_uid) +{ + enum lttng_trigger_status trigger_status; + + trigger_status = lttng_trigger_get_name(trigger, trigger_name); + switch (trigger_status) { + case LTTNG_TRIGGER_STATUS_OK: + break; + case LTTNG_TRIGGER_STATUS_UNSET: + *trigger_name = "(unset)"; + break; + default: + abort(); + } + + trigger_status = lttng_trigger_get_owner_uid(trigger, + trigger_owner_uid); + assert(trigger_status == LTTNG_TRIGGER_STATUS_OK); +} + static inline const char *error_accounting_status_str( enum event_notifier_error_accounting_status status) @@ -54,6 +99,8 @@ const char *error_accounting_status_str( return "NOMEM"; case EVENT_NOTIFIER_ERROR_ACCOUNTING_STATUS_NO_INDEX_AVAILABLE: return "NO_INDEX_AVAILABLE"; + case EVENT_NOTIFIER_ERROR_ACCOUNTING_STATUS_APP_DEAD: + return "APP_DEAD"; default: abort(); } @@ -74,6 +121,15 @@ event_notifier_error_accounting_init(uint64_t nb_bucket) error_counter_indexes_ht = lttng_ht_new( ERROR_COUNTER_INDEX_HT_INITIAL_SIZE, LTTNG_HT_TYPE_U64); if (!error_counter_indexes_ht) { + ERR("Failed to allocate error counter indices hash table"); + status = EVENT_NOTIFIER_ERROR_ACCOUNTING_STATUS_NOMEM; + goto error_index_allocator; + } + + error_counter_uid_ht = lttng_ht_new( + ERROR_COUNTER_INDEX_HT_INITIAL_SIZE, LTTNG_HT_TYPE_U64); + if (!error_counter_uid_ht) { + ERR("Failed to allocate UID to error counter accountant hash table"); status = EVENT_NOTIFIER_ERROR_ACCOUNTING_STATUS_NOMEM; goto error_index_allocator; } @@ -111,6 +167,526 @@ enum event_notifier_error_accounting_status get_error_counter_index_for_token( return status; } +#ifdef HAVE_LIBLTTNG_UST_CTL +static +struct error_account_entry *get_uid_accounting_entry(const struct ust_app *app) +{ + struct error_account_entry *entry; + struct lttng_ht_node_u64 *node; + struct lttng_ht_iter iter; + uint64_t key = app->uid; + + lttng_ht_lookup(error_counter_uid_ht, &key, &iter); + node = lttng_ht_iter_get_node_u64(&iter); + if(node == NULL) { + entry = NULL; + } else { + entry = caa_container_of(node, struct error_account_entry, node); + } + + return entry; +} + +static +struct error_account_entry *create_uid_accounting_entry( + const struct ust_app *app) +{ + int i, ret; + struct ustctl_daemon_counter *daemon_counter; + struct lttng_ust_abi_object_data *counter, **cpu_counters; + int *cpu_counter_fds = NULL; + struct error_account_entry *entry = NULL; + const struct ustctl_counter_dimension dimension = { + .size = error_counter_size, + .has_underflow = false, + .has_overflow = false, + }; + + entry = zmalloc(sizeof(struct error_account_entry)); + if (!entry) { + PERROR("Failed to allocate event notifier error acounting entry") + goto error; + } + + entry->nr_counter_cpu_fds = ustctl_get_nr_cpu_per_counter(); + cpu_counter_fds = zmalloc(entry->nr_counter_cpu_fds * sizeof(*cpu_counter_fds)); + if (!cpu_counter_fds) { + PERROR("Failed to allocate event notifier error counter file descriptors array: application uid = %d, application name = '%s', pid = %d, allocation size = %zu", + (int) app->uid, app->name, (int) app->pid, + entry->nr_counter_cpu_fds * sizeof(*cpu_counter_fds)); + ret = -1; + goto error_counter_cpu_fds_alloc; + } + + /* Initialize to an invalid fd value to closes fds in case of error. */ + for (i = 0; i < entry->nr_counter_cpu_fds; i++) { + cpu_counter_fds[i] = -1; + } + + cpu_counters = zmalloc(entry->nr_counter_cpu_fds * sizeof(**cpu_counters)); + if (!cpu_counters) { + PERROR("Failed to allocate event notifier error counter lttng_ust_abi_object_data array: application uid = %d, application name = '%s', pid = %d, allocation size = %zu", + (int) app->uid, app->name, (int) app->pid, + entry->nr_counter_cpu_fds * sizeof(**cpu_counters)); + ret = -1; + goto error_counter_cpus_alloc; + } + + for (i = 0; i < entry->nr_counter_cpu_fds; i++) { + cpu_counter_fds[i] = shm_create_anonymous("event-notifier-error-accounting"); + if (cpu_counter_fds[i] == -1) { + ERR("Failed to create event notifier error accounting shared memory for application user: application uid = %d, pid = %d, application name = '%s'", + (int) app->uid, (int) app->pid, app->name); + goto error_shm_alloc; + } + } + + /* + * Ownership of the file descriptors transferred to the ustctl object. + */ + daemon_counter = ustctl_create_counter(1, &dimension, 0, -1, + entry->nr_counter_cpu_fds, cpu_counter_fds, + USTCTL_COUNTER_BITNESS_32, + USTCTL_COUNTER_ARITHMETIC_MODULAR, + USTCTL_COUNTER_ALLOC_PER_CPU, + false); + if (!daemon_counter) { + goto error_create_daemon_counter; + } + + ret = ustctl_create_counter_data(daemon_counter, &counter); + if (ret) { + ERR("Failed to create userspace tracer counter data for application user: uid = %d, pid = %d, application name = '%s'", + (int) app->uid, (int) app->pid, app->name); + goto error_create_counter_data; + } + + for (i = 0; i < entry->nr_counter_cpu_fds; i++) { + ret = ustctl_create_counter_cpu_data(daemon_counter, i, + &cpu_counters[i]); + if (ret) { + ERR("Failed to create userspace tracer counter cpu data for application user: uid = %d, pid = %d, application name = '%s'", + (int) app->uid, (int) app->pid, + app->name); + goto error_create_counter_cpu_data; + } + } + + entry->daemon_counter = daemon_counter; + entry->counter = counter; + entry->cpu_counters = cpu_counters; + + lttng_ht_node_init_u64(&entry->node, app->uid); + lttng_ht_add_unique_u64(error_counter_uid_ht, &entry->node); + + goto end; + +error_create_counter_cpu_data: + /* Teardown any allocated cpu counters. */ + for (i = 0; i < entry->nr_counter_cpu_fds; i++) { + if (!cpu_counters[i]) { + /* + * Early-exit when error occurred before all cpu + * counters could be initialized. + */ + break; + } + + ustctl_release_object(-1, cpu_counters[i]); + free(cpu_counters[i]); + } + + ustctl_release_object(-1, entry->counter); + free(entry->counter); +error_create_counter_data: + ustctl_destroy_counter(daemon_counter); +error_create_daemon_counter: +error_shm_alloc: + /* Error occured before per-cpu SHMs were handed-off to ustctl. */ + if (cpu_counter_fds) { + for (i = 0; i < entry->nr_counter_cpu_fds; i++) { + if (cpu_counter_fds[i] < 0) { + /* + * Early-exit when error occurred before all cpu + * counter shm fds could be initialized. + */ + break; + } + + ret = close(cpu_counter_fds[i]); + if (ret) { + PERROR("Failed to close error counter per-CPU shm file descriptor: fd = %d", cpu_counter_fds[i]); + } + } + } + + free(cpu_counters); +error_counter_cpus_alloc: +error_counter_cpu_fds_alloc: + free(entry); +error: + entry = NULL; +end: + free(cpu_counter_fds); + return entry; +} + +static +enum event_notifier_error_accounting_status send_counter_data_to_ust( + struct ust_app *app, + struct lttng_ust_abi_object_data *new_counter) +{ + int ret; + enum event_notifier_error_accounting_status status; + + /* Attach counter to trigger group. */ + pthread_mutex_lock(&app->sock_lock); + ret = ustctl_send_counter_data_to_ust(app->sock, + app->event_notifier_group.object->handle, new_counter); + pthread_mutex_unlock(&app->sock_lock); + if (ret < 0) { + if (ret != -EPIPE && ret != -LTTNG_UST_ERR_EXITING) { + ERR("Failed to send counter data to application: application name = '%s', pid = %d, ret = %d", + app->name, app->pid, ret); + status = EVENT_NOTIFIER_ERROR_ACCOUNTING_STATUS_ERR; + } else { + DBG3("Failed to send counter data to application (application is dead): application name = '%s', pid = %d, ret = %d", + app->name, app->pid, ret); + status = EVENT_NOTIFIER_ERROR_ACCOUNTING_STATUS_APP_DEAD; + } + + goto end; + } + + status = EVENT_NOTIFIER_ERROR_ACCOUNTING_STATUS_OK; +end: + return status; +} + +static +enum event_notifier_error_accounting_status send_counter_cpu_data_to_ust( + struct ust_app *app, + struct lttng_ust_abi_object_data *counter, + struct lttng_ust_abi_object_data *counter_cpu) +{ + int ret; + enum event_notifier_error_accounting_status status; + + pthread_mutex_lock(&app->sock_lock); + ret = ustctl_send_counter_cpu_data_to_ust(app->sock, + counter, counter_cpu); + pthread_mutex_unlock(&app->sock_lock); + if (ret < 0) { + if (ret != -EPIPE && ret != -LTTNG_UST_ERR_EXITING) { + ERR("Failed to send counter CPU data to application: application name = '%s', pid = %d, ret = %d", + app->name, app->pid, ret); + status = EVENT_NOTIFIER_ERROR_ACCOUNTING_STATUS_ERR; + } else { + DBG3("Failed to send counter CPU data to application: application name = '%s', pid = %d, ret = %d", + app->name, app->pid, ret); + status = EVENT_NOTIFIER_ERROR_ACCOUNTING_STATUS_APP_DEAD; + } + + goto end; + } + + status = EVENT_NOTIFIER_ERROR_ACCOUNTING_STATUS_OK; +end: + return status; +} + +enum event_notifier_error_accounting_status +event_notifier_error_accounting_register_app(struct ust_app *app) +{ + int ret; + uint64_t i; + struct lttng_ust_abi_object_data *new_counter; + struct error_account_entry *entry; + enum event_notifier_error_accounting_status status; + struct lttng_ust_abi_object_data **cpu_counters; + + /* + * Check if we already have a error counter for the user id of this + * app. If not, create one. + */ + rcu_read_lock(); + entry = get_uid_accounting_entry(app); + if (entry == NULL) { + entry = create_uid_accounting_entry(app); + if (!entry) { + status = EVENT_NOTIFIER_ERROR_ACCOUNTING_STATUS_ERR; + goto end; + } + } + + /* Duplicate counter object data. */ + ret = ustctl_duplicate_ust_object_data(&new_counter, + entry->counter); + if (ret) { + ERR("Failed to duplicate event notifier error accounting counter for application user: application uid = %d, pid = %d, application name = '%s'", + (int) app->uid, (int) app->pid, app->name); + status = EVENT_NOTIFIER_ERROR_ACCOUNTING_STATUS_ERR; + goto end; + } + + status = send_counter_data_to_ust(app, new_counter); + if (status != EVENT_NOTIFIER_ERROR_ACCOUNTING_STATUS_OK) { + ERR("Failed to send counter data to application tracer: status = %s, application uid = %d, pid = %d, application name = '%s'", + error_accounting_status_str(status), + (int) app->uid, (int) app->pid, app->name); + status = EVENT_NOTIFIER_ERROR_ACCOUNTING_STATUS_ERR; + goto error_send_counter_data; + } + + cpu_counters = zmalloc(entry->nr_counter_cpu_fds * sizeof(struct lttng_ust_abi_object_data)); + if (!cpu_counters) { + PERROR("Failed to allocate event notifier error counter lttng_ust_abi_object_data array: application uid = %d, application name = '%s', pid = %d, allocation size = %zu", + (int) app->uid, app->name, (int) app->pid, + entry->nr_counter_cpu_fds * sizeof(**cpu_counters)); + status = EVENT_NOTIFIER_ERROR_ACCOUNTING_STATUS_NOMEM; + goto error_allocate_cpu_counters; + } + + for (i = 0; i < entry->nr_counter_cpu_fds; i++) { + struct lttng_ust_abi_object_data *new_counter_cpu = NULL; + + ret = ustctl_duplicate_ust_object_data(&new_counter_cpu, + entry->cpu_counters[i]); + if (ret) { + ERR("Failed to duplicate userspace tracer counter cpu data for application user: uid = %d, pid = %d, application name = '%s'", + (int) app->uid, (int) app->pid, + app->name); + status = EVENT_NOTIFIER_ERROR_ACCOUNTING_STATUS_NOMEM; + goto error_duplicate_cpu_counter; + } + + cpu_counters[i] = new_counter_cpu; + + status = send_counter_cpu_data_to_ust(app, new_counter, + new_counter_cpu); + if (status != EVENT_NOTIFIER_ERROR_ACCOUNTING_STATUS_OK) { + ERR("Failed to send counter cpu data to application tracer: status = %s, application uid = %d, pid = %d, application name = '%s'", + error_accounting_status_str(status), + (int) app->uid, (int) app->pid, + app->name); + status = EVENT_NOTIFIER_ERROR_ACCOUNTING_STATUS_ERR; + goto error_send_cpu_counter_data; + } + } + + app->event_notifier_group.counter = new_counter; + new_counter = NULL; + app->event_notifier_group.nr_counter_cpu = entry->nr_counter_cpu_fds; + app->event_notifier_group.counter_cpu = cpu_counters; + cpu_counters = NULL; + goto end; + +error_allocate_cpu_counters: +error_send_cpu_counter_data: +error_duplicate_cpu_counter: + /* Teardown any duplicated cpu counters. */ + for (i = 0; i < entry->nr_counter_cpu_fds; i++) { + if (!cpu_counters[i]) { + /* + * Early-exit when error occurred before all cpu + * counters could be initialized. + */ + break; + } + + ustctl_release_object(-1, cpu_counters[i]); + free(cpu_counters[i]); + } + +error_send_counter_data: + ustctl_release_object(-1, new_counter); + free(new_counter); +end: + rcu_read_unlock(); + return status; +} + +enum event_notifier_error_accounting_status +event_notifier_error_accounting_unregister_app(struct ust_app *app) +{ + enum event_notifier_error_accounting_status status; + struct error_account_entry *entry; + int i; + + rcu_read_lock(); + entry = get_uid_accounting_entry(app); + if (entry == NULL) { + ERR("Failed to find event notitifier error accounting entry on application teardown: pid = %d, application name = '%s'", + app->pid, app->name); + status = EVENT_NOTIFIER_ERROR_ACCOUNTING_STATUS_ERR; + goto end; + } + + for (i = 0; i < app->event_notifier_group.nr_counter_cpu; i++) { + ustctl_release_object(app->sock, + app->event_notifier_group.counter_cpu[i]); + free(app->event_notifier_group.counter_cpu[i]); + } + + free(app->event_notifier_group.counter_cpu); + + ustctl_release_object(app->sock, app->event_notifier_group.counter); + free(app->event_notifier_group.counter); + + status = EVENT_NOTIFIER_ERROR_ACCOUNTING_STATUS_OK; +end: + rcu_read_unlock(); + return status; +} + +static +enum event_notifier_error_accounting_status +event_notifier_error_accounting_ust_get_count( + const struct lttng_trigger *trigger, uint64_t *count) +{ + struct lttng_ht_iter iter; + struct error_account_entry *uid_entry; + uint64_t error_counter_index, global_sum = 0; + enum event_notifier_error_accounting_status status; + size_t dimension_indexes[1]; + const uint64_t tracer_token = lttng_trigger_get_tracer_token(trigger); + + /* + * Go over all error counters (ignoring uid) as a trigger (and trigger + * errors) can be generated from any applications that this session + * daemon is managing. + */ + + rcu_read_lock(); + + status = get_error_counter_index_for_token( + tracer_token, + &error_counter_index); + if (status != EVENT_NOTIFIER_ERROR_ACCOUNTING_STATUS_OK) { + uid_t trigger_owner_uid; + const char *trigger_name; + + get_trigger_info_for_log(trigger, &trigger_name, + &trigger_owner_uid); + + ERR("Failed to retrieve index for tracer token: token = %" PRIu64 ", trigger name = '%s', trigger owner uid = %d, status = %s", + tracer_token, trigger_name, + (int) trigger_owner_uid, + error_accounting_status_str(status)); + goto end; + } + + dimension_indexes[0] = error_counter_index; + + cds_lfht_for_each_entry(error_counter_uid_ht->ht, &iter.iter, + uid_entry, node.node) { + int ret; + int64_t local_value = 0; + bool overflow = false, underflow = false; + + ret = ustctl_counter_aggregate(uid_entry->daemon_counter, + dimension_indexes, &local_value, &overflow, + &underflow); + if (ret || local_value < 0) { + uid_t trigger_owner_uid; + const char *trigger_name; + + get_trigger_info_for_log(trigger, &trigger_name, + &trigger_owner_uid); + + if (ret) { + ERR("Failed to aggregate event notifier error counter values of trigger: trigger name = '%s', trigger owner uid = %d", + trigger_name, + (int) trigger_owner_uid); + } else if (local_value < 0) { + ERR("Negative event notifier error counter value encountered during aggregation: trigger name = '%s', trigger owner uid = %d, value = %" PRId64, + trigger_name, + (int) trigger_owner_uid, + local_value); + } else { + abort(); + } + + status = EVENT_NOTIFIER_ERROR_ACCOUNTING_STATUS_ERR; + goto end; + } + + /* Cast is safe as negative values are checked-for above. */ + global_sum += (uint64_t) local_value; + + } + + *count = global_sum; + status = EVENT_NOTIFIER_ERROR_ACCOUNTING_STATUS_OK; + +end: + rcu_read_unlock(); + return status; +} + +static +enum event_notifier_error_accounting_status event_notifier_error_accounting_ust_clear( + const struct lttng_trigger *trigger) +{ + struct lttng_ht_iter iter; + struct error_account_entry *uid_entry; + uint64_t error_counter_index; + enum event_notifier_error_accounting_status status; + size_t dimension_index; + const uint64_t tracer_token = lttng_trigger_get_tracer_token(trigger); + + /* + * Go over all error counters (ignoring uid) as a trigger (and trigger + * errors) can be generated from any applications that this session + * daemon is managing. + */ + + rcu_read_lock(); + status = get_error_counter_index_for_token( + tracer_token, + &error_counter_index); + if (status != EVENT_NOTIFIER_ERROR_ACCOUNTING_STATUS_OK) { + uid_t trigger_owner_uid; + const char *trigger_name; + + get_trigger_info_for_log(trigger, &trigger_name, + &trigger_owner_uid); + + ERR("Failed to retrieve index for tracer token: token = %" PRIu64 ", trigger name = '%s', trigger owner uid = %d, status = %s", + tracer_token, trigger_name, + (int) trigger_owner_uid, + error_accounting_status_str(status)); + goto end; + } + + dimension_index = error_counter_index; + + cds_lfht_for_each_entry(error_counter_uid_ht->ht, &iter.iter, + uid_entry, node.node) { + const int ret = ustctl_counter_clear(uid_entry->daemon_counter, + &dimension_index); + + if (ret) { + uid_t trigger_owner_uid; + const char *trigger_name; + + get_trigger_info_for_log(trigger, &trigger_name, + &trigger_owner_uid); + ERR("Failed to clear event notifier counter value for trigger: counter uid = %d, trigger name = '%s', trigger owner uid = %d", + (int) uid_entry->node.key, trigger_name, + (int) trigger_owner_uid); + status = EVENT_NOTIFIER_ERROR_ACCOUNTING_STATUS_ERR; + goto end; + } + } + + status = EVENT_NOTIFIER_ERROR_ACCOUNTING_STATUS_OK; +end: + rcu_read_unlock(); + return status; +} +#endif /* HAVE_LIBLTTNG_UST_CTL */ + static enum event_notifier_error_accounting_status event_notifier_error_accounting_kernel_clear( @@ -127,18 +703,13 @@ event_notifier_error_accounting_kernel_clear( if (status != EVENT_NOTIFIER_ERROR_ACCOUNTING_STATUS_OK) { uid_t trigger_owner_uid; const char *trigger_name; - const enum lttng_trigger_status trigger_status = - lttng_trigger_get_owner_uid( - trigger, &trigger_owner_uid); - - assert(trigger_status == LTTNG_TRIGGER_STATUS_OK); - if (lttng_trigger_get_name(trigger, &trigger_name) != - LTTNG_TRIGGER_STATUS_OK) { - trigger_name = "(unnamed)"; - } - ERR("Failed to get event notifier error counter index: trigger owner uid = %d, trigger name = '%s'", - trigger_owner_uid, trigger_name); + get_trigger_info_for_log( + trigger, &trigger_name, &trigger_owner_uid); + + ERR("Failed to get event notifier error counter index: trigger owner uid = %d, trigger name = '%s', status = '%s'", + trigger_owner_uid, trigger_name, + error_accounting_status_str(status)); goto end; } @@ -149,7 +720,14 @@ event_notifier_error_accounting_kernel_clear( kernel_error_accountant.kernel_event_notifier_error_counter_fd, &counter_clear); if (ret) { - ERR("Failed to clear event notifier error counter"); + uid_t trigger_owner_uid; + const char *trigger_name; + + get_trigger_info_for_log( + trigger, &trigger_name, &trigger_owner_uid); + + ERR("Failed to clear kernel event notifier error counter: trigger owner uid = %d, trigger name = '%s'", + trigger_owner_uid, trigger_name); status = EVENT_NOTIFIER_ERROR_ACCOUNTING_STATUS_ERR; goto end; } @@ -274,23 +852,21 @@ event_notifier_error_accounting_register_event_notifier( { uid_t trigger_owner_uid; const char *trigger_name; - const enum lttng_trigger_status trigger_status = - lttng_trigger_get_owner_uid( - trigger, &trigger_owner_uid); - - assert(trigger_status == LTTNG_TRIGGER_STATUS_OK); - if (lttng_trigger_get_name(trigger, &trigger_name) != - LTTNG_TRIGGER_STATUS_OK) { - trigger_name = "(unnamed)"; - } - DBG("Event notifier error counter index not found for tracer token (allocating a new one): trigger owner = %d, trigger name = '%s', tracer token = %" PRIu64, - trigger_owner_uid, trigger_name, + get_trigger_info_for_log( + trigger, &trigger_name, &trigger_owner_uid); + + DBG("Event notifier error counter index not found for tracer token (allocating a new one): trigger name = '%s', trigger owner uid = %d, tracer token = %" PRIu64, + trigger_name, trigger_owner_uid, lttng_trigger_get_tracer_token(trigger)); + status = create_error_counter_index_for_token( lttng_trigger_get_tracer_token(trigger), &local_error_counter_index); if (status != EVENT_NOTIFIER_ERROR_ACCOUNTING_STATUS_OK) { + ERR("Error creating index for token: status = %s, trigger name = '%s', trigger owner uid = %d", + error_accounting_status_str(status), + trigger_name, trigger_owner_uid); goto end; } /* fall-through. */ @@ -320,6 +896,8 @@ event_notifier_error_accounting_kernel_get_count( status = get_error_counter_index_for_token( lttng_trigger_get_tracer_token(trigger), &error_counter_index); if (status != EVENT_NOTIFIER_ERROR_ACCOUNTING_STATUS_OK) { + ERR("Error getting index for token: status=%s", + error_accounting_status_str(status)); goto end; } @@ -334,15 +912,9 @@ event_notifier_error_accounting_kernel_get_count( if (ret || counter_aggregate.value.value < 0) { uid_t trigger_owner_uid; const char *trigger_name; - const enum lttng_trigger_status trigger_status = - lttng_trigger_get_owner_uid( - trigger, &trigger_owner_uid); - - assert(trigger_status == LTTNG_TRIGGER_STATUS_OK); - if (lttng_trigger_get_name(trigger, &trigger_name) != - LTTNG_TRIGGER_STATUS_OK) { - trigger_name = "(unnamed)"; - } + + get_trigger_info_for_log(trigger, &trigger_name, + &trigger_owner_uid); if (counter_aggregate.value.value < 0) { ERR("Invalid negative event notifier error counter value: trigger owner = %d, trigger name = '%s', value = %" PRId64, @@ -376,7 +948,11 @@ event_notifier_error_accounting_get_count( return event_notifier_error_accounting_kernel_get_count( trigger, count); case LTTNG_DOMAIN_UST: +#ifdef HAVE_LIBLTTNG_UST_CTL + return event_notifier_error_accounting_ust_get_count(trigger, count); +#else return EVENT_NOTIFIER_ERROR_ACCOUNTING_STATUS_OK; +#endif /* HAVE_LIBLTTNG_UST_CTL */ default: abort(); } @@ -390,7 +966,11 @@ event_notifier_error_accounting_clear(const struct lttng_trigger *trigger) case LTTNG_DOMAIN_KERNEL: return event_notifier_error_accounting_kernel_clear(trigger); case LTTNG_DOMAIN_UST: +#ifdef HAVE_LIBLTTNG_UST_CTL + return event_notifier_error_accounting_ust_clear(trigger); +#else return EVENT_NOTIFIER_ERROR_ACCOUNTING_STATUS_OK; +#endif /* HAVE_LIBLTTNG_UST_CTL */ default: abort(); } @@ -409,27 +989,38 @@ void event_notifier_error_accounting_unregister_event_notifier( { struct lttng_ht_iter iter; struct lttng_ht_node_u64 *node; - struct index_ht_entry *index_entry; + const uint64_t tracer_token = lttng_trigger_get_tracer_token(trigger); enum event_notifier_error_accounting_status status; - enum lttng_index_allocator_status index_alloc_status; - uint64_t tracer_token = lttng_trigger_get_tracer_token(trigger); status = event_notifier_error_accounting_clear(trigger); if (status != EVENT_NOTIFIER_ERROR_ACCOUNTING_STATUS_OK) { - ERR("Failed to clear event notifier error counter index"); + /* Trigger details already logged by callee on error. */ + ERR("Failed to clear event notifier error counter during unregistration of event notifier: status = '%s'", + error_accounting_status_str(status)); } rcu_read_lock(); lttng_ht_lookup(error_counter_indexes_ht, &tracer_token, &iter); node = lttng_ht_iter_get_node_u64(&iter); - if(node) { - index_entry = caa_container_of(node, struct index_ht_entry, node); + if (node) { + struct index_ht_entry *index_entry = caa_container_of( + node, typeof(*index_entry), node); + enum lttng_index_allocator_status index_alloc_status; + index_alloc_status = lttng_index_allocator_release( index_allocator, index_entry->error_counter_index); if (index_alloc_status != LTTNG_INDEX_ALLOCATOR_STATUS_OK) { - ERR("Failed to release event notifier error counter index: index = %" PRIu64, - index_entry->error_counter_index); + uid_t trigger_owner_uid; + const char *trigger_name; + + get_trigger_info_for_log(trigger, &trigger_name, + &trigger_owner_uid); + + ERR("Failed to release event notifier error counter index: index = %" PRIu64 ", trigger name = '%s', trigger owner uid = %d", + index_entry->error_counter_index, + trigger_name, (int) trigger_owner_uid); + /* Don't exit, perform the rest of the clean-up. */ } lttng_ht_del(error_counter_indexes_ht, &iter); @@ -439,8 +1030,37 @@ void event_notifier_error_accounting_unregister_event_notifier( rcu_read_unlock(); } +#ifdef HAVE_LIBLTTNG_UST_CTL +static void free_error_account_entry(struct rcu_head *head) +{ + int i; + struct error_account_entry *entry = + caa_container_of(head, typeof(*entry), rcu_head); + + for (i = 0; i < entry->nr_counter_cpu_fds; i++) { + ustctl_release_object(-1, entry->cpu_counters[i]); + free(entry->cpu_counters[i]); + } + + free(entry->cpu_counters); + + ustctl_release_object(-1, entry->counter); + free(entry->counter); + + ustctl_destroy_counter(entry->daemon_counter); + + free(entry); +} +#else +/* Not called without UST support. */ +static void free_error_account_entry(struct rcu_head *head) {} +#endif /* HAVE_LIBLTTNG_UST_CTL */ + void event_notifier_error_accounting_fini(void) { + struct lttng_ht_iter iter; + struct error_account_entry *uid_entry; + lttng_index_allocator_destroy(index_allocator); if (kernel_error_accountant.kernel_event_notifier_error_counter_fd) { @@ -452,8 +1072,22 @@ void event_notifier_error_accounting_fini(void) } /* - * Will assert if some error counters were not released (an internal - * error). + * FIXME error account entries are not reference-counted and torn + * down on last use. They exist from the moment of their first use + * up until the teardown of the session daemon. + */ + rcu_read_lock(); + cds_lfht_for_each_entry(error_counter_uid_ht->ht, &iter.iter, + uid_entry, node.node) { + cds_lfht_del(error_counter_uid_ht->ht, &uid_entry->node.node); + call_rcu(&uid_entry->rcu_head, free_error_account_entry); + } + rcu_read_unlock(); + lttng_ht_destroy(error_counter_uid_ht); + + /* + * Will assert if some error counter indices were not released (an + * internal error). */ lttng_ht_destroy(error_counter_indexes_ht); } diff --git a/src/bin/lttng-sessiond/event-notifier-error-accounting.h b/src/bin/lttng-sessiond/event-notifier-error-accounting.h index 1dea73948..889efffa3 100644 --- a/src/bin/lttng-sessiond/event-notifier-error-accounting.h +++ b/src/bin/lttng-sessiond/event-notifier-error-accounting.h @@ -9,14 +9,18 @@ #define _EVENT_NOTIFIER_ERROR_ACCOUNTING_H #include + #include +#include "ust-app.h" + enum event_notifier_error_accounting_status { EVENT_NOTIFIER_ERROR_ACCOUNTING_STATUS_OK, EVENT_NOTIFIER_ERROR_ACCOUNTING_STATUS_ERR, EVENT_NOTIFIER_ERROR_ACCOUNTING_STATUS_NOT_FOUND, EVENT_NOTIFIER_ERROR_ACCOUNTING_STATUS_NOMEM, EVENT_NOTIFIER_ERROR_ACCOUNTING_STATUS_NO_INDEX_AVAILABLE, + EVENT_NOTIFIER_ERROR_ACCOUNTING_STATUS_APP_DEAD, }; enum event_notifier_error_accounting_status @@ -26,6 +30,28 @@ enum event_notifier_error_accounting_status event_notifier_error_accounting_register_kernel( int kernel_event_notifier_group_fd); +#ifdef HAVE_LIBLTTNG_UST_CTL +enum event_notifier_error_accounting_status +event_notifier_error_accounting_register_app(struct ust_app *app); + +enum event_notifier_error_accounting_status +event_notifier_error_accounting_unregister_app(struct ust_app *app); +#else /* HAVE_LIBLTTNG_UST_CTL */ +static inline +enum event_notifier_error_accounting_status +event_notifier_error_accounting_register_app(struct ust_app *app) +{ + return EVENT_NOTIFIER_ERROR_ACCOUNTING_STATUS_OK; +} + +static inline +enum event_notifier_error_accounting_status +event_notifier_error_accounting_unregister_app(struct ust_app *app) +{ + return EVENT_NOTIFIER_ERROR_ACCOUNTING_STATUS_OK; +} +#endif /* HAVE_LIBLTTNG_UST_CTL */ + enum event_notifier_error_accounting_status event_notifier_error_accounting_register_event_notifier( const struct lttng_trigger *trigger, diff --git a/src/bin/lttng-sessiond/ust-app.c b/src/bin/lttng-sessiond/ust-app.c index 50d2f1932..d8546308d 100644 --- a/src/bin/lttng-sessiond/ust-app.c +++ b/src/bin/lttng-sessiond/ust-app.c @@ -7,11 +7,14 @@ */ #define _LGPL_SOURCE +#include +#include #include #include #include #include #include +#include #include #include #include @@ -32,6 +35,7 @@ #include #include "buffer-registry.h" +#include "condition-internal.h" #include "fd-limit.h" #include "health-sessiond.h" #include "ust-app.h" @@ -44,6 +48,8 @@ #include "notification-thread-commands.h" #include "rotate.h" #include "event.h" +#include "event-notifier-error-accounting.h" + struct lttng_ht *ust_app_ht; struct lttng_ht *ust_app_ht_by_sock; @@ -999,6 +1005,8 @@ void delete_ust_app(struct ust_app *app) */ if (app->event_notifier_group.object) { enum lttng_error_code ret_code; + enum event_notifier_error_accounting_status status; + const int event_notifier_read_fd = lttng_pipe_get_readfd( app->event_notifier_group.event_pipe); @@ -1009,6 +1017,11 @@ void delete_ust_app(struct ust_app *app) ERR("Failed to remove application tracer event source from notification thread"); } + status = event_notifier_error_accounting_unregister_app(app); + if (status != EVENT_NOTIFIER_ERROR_ACCOUNTING_STATUS_OK) { + ERR("Error unregistering app from event notifier error accounting"); + } + ustctl_release_object(sock, app->event_notifier_group.object); free(app->event_notifier_group.object); } @@ -2106,6 +2119,7 @@ static int create_ust_event_notifier(struct ust_app *app, init_ust_event_notifier_from_event_rule(event_rule, &event_notifier); event_notifier.event.token = ua_event_notifier_rule->token; + event_notifier.error_counter_index = ua_event_notifier_rule->error_counter_index; /* Create UST event notifier against the tracer. */ pthread_mutex_lock(&app->sock_lock); @@ -3674,12 +3688,12 @@ int create_ust_app_event_notifier_rule(struct lttng_trigger *trigger, DBG2("UST app create token event rule completed: app = '%s' (ppid: %d), token = %" PRIu64, app->name, app->ppid, lttng_trigger_get_tracer_token(trigger)); -end: - return ret; + goto end; error: /* The RCU read side lock is already being held by the caller. */ delete_ust_app_event_notifier_rule(-1, ua_event_notifier_rule, app); +end: return ret; } @@ -3994,6 +4008,7 @@ int ust_app_setup_event_notifier_group(struct ust_app *app) int event_pipe_write_fd; struct lttng_ust_abi_object_data *event_notifier_group = NULL; enum lttng_error_code lttng_ret; + enum event_notifier_error_accounting_status event_notifier_error_accounting_status; assert(app); @@ -4042,6 +4057,14 @@ int ust_app_setup_event_notifier_group(struct ust_app *app) /* Assign handle only when the complete setup is valid. */ app->event_notifier_group.object = event_notifier_group; + + event_notifier_error_accounting_status = event_notifier_error_accounting_register_app(app); + if (event_notifier_error_accounting_status != EVENT_NOTIFIER_ERROR_ACCOUNTING_STATUS_OK) { + ERR("Failed to setup event notifier error accounting for app"); + ret = -1; + goto error; + } + return ret; error: @@ -5962,6 +5985,20 @@ void ust_app_global_update_all_event_notifier_rules(void) rcu_read_unlock(); } +void ust_app_update_event_notifier_error_count(struct lttng_trigger *trigger) +{ + uint64_t error_count = 0; + enum event_notifier_error_accounting_status status; + struct lttng_condition *condition = lttng_trigger_get_condition(trigger); + + status = event_notifier_error_accounting_get_count(trigger, &error_count); + if (status != EVENT_NOTIFIER_ERROR_ACCOUNTING_STATUS_OK) { + ERR("Error getting trigger error count."); + } + + lttng_condition_on_event_set_error_count(condition, error_count); +} + /* * Add context to a specific channel for global UST domain. */ diff --git a/src/bin/lttng-sessiond/ust-app.h b/src/bin/lttng-sessiond/ust-app.h index 9b42d518d..7a4cf1537 100644 --- a/src/bin/lttng-sessiond/ust-app.h +++ b/src/bin/lttng-sessiond/ust-app.h @@ -11,6 +11,7 @@ #include +#include #include #include "trace-ust.h" @@ -114,6 +115,7 @@ struct ust_app_event { struct ust_app_event_notifier_rule { int enabled; + uint64_t error_counter_index; int handle; struct lttng_ust_abi_object_data *obj; /* Holds a strong reference. */ @@ -319,6 +321,9 @@ struct ust_app { */ struct lttng_ust_abi_object_data *object; struct lttng_pipe *event_pipe; + struct lttng_ust_abi_object_data *counter; + struct lttng_ust_abi_object_data **counter_cpu; + int nr_counter_cpu; } event_notifier_group; /* * Hashtable indexing the application's event notifier rule's @@ -355,6 +360,8 @@ void ust_app_global_update_all(struct ltt_ust_session *usess); void ust_app_global_update_event_notifier_rules(struct ust_app *app); void ust_app_global_update_all_event_notifier_rules(void); +void ust_app_update_event_notifier_error_count(struct lttng_trigger *trigger); + void ust_app_clean_list(void); int ust_app_ht_alloc(void); struct ust_app *ust_app_find_by_pid(pid_t pid); @@ -579,7 +586,12 @@ unsigned int ust_app_get_nb_stream(struct ltt_ust_session *usess) { return 0; } - +static inline +void ust_app_update_event_notifier_error_count( + struct lttng_trigger *lttng_trigger) +{ + return; +} static inline int ust_app_supported(void) {