X-Git-Url: https://git.lttng.org/?p=lttng-tools.git;a=blobdiff_plain;f=src%2Fbin%2Flttng-sessiond%2Fnotification-thread-events.c;h=ccd285a8103d6a344bcb3344abbd982f079a0c13;hp=1a71131f0f90d398c37468db093372a3449120e8;hb=98dfca790cb8fcd3bac11a4f4f58c933f99d2a1a;hpb=ac16173e318279dee29504820e3c2ad8ea597712 diff --git a/src/bin/lttng-sessiond/notification-thread-events.c b/src/bin/lttng-sessiond/notification-thread-events.c index 1a71131f0..ccd285a81 100644 --- a/src/bin/lttng-sessiond/notification-thread-events.c +++ b/src/bin/lttng-sessiond/notification-thread-events.c @@ -21,6 +21,8 @@ #include #include #include +#include +#include #include #include #include @@ -48,6 +50,9 @@ #define CLIENT_POLL_MASK_IN (LPOLLIN | LPOLLERR | LPOLLHUP | LPOLLRDHUP) #define CLIENT_POLL_MASK_IN_OUT (CLIENT_POLL_MASK_IN | LPOLLOUT) +/* The tracers currently limit the capture size to PIPE_BUF (4kb on linux). */ +#define MAX_CAPTURE_SIZE (PIPE_BUF) + enum lttng_object_type { LTTNG_OBJECT_TYPE_UNKNOWN, LTTNG_OBJECT_TYPE_NONE, @@ -193,6 +198,11 @@ int client_handle_transmission_status( enum client_transmission_status transmission_status, struct notification_thread_state *state); +static +int handle_one_event_notifier_notification( + struct notification_thread_state *state, + int pipe, enum lttng_domain_type domain); + static void free_lttng_trigger_ht_element_rcu(struct rcu_head *node); @@ -318,6 +328,38 @@ int match_session(struct cds_lfht_node *node, const void *key) return !strcmp(session_info->name, name); } +static +const char *notification_command_type_str( + enum notification_thread_command_type type) +{ + switch (type) { + case NOTIFICATION_COMMAND_TYPE_REGISTER_TRIGGER: + return "REGISTER_TRIGGER"; + case NOTIFICATION_COMMAND_TYPE_UNREGISTER_TRIGGER: + return "UNREGISTER_TRIGGER"; + case NOTIFICATION_COMMAND_TYPE_ADD_CHANNEL: + return "ADD_CHANNEL"; + case NOTIFICATION_COMMAND_TYPE_REMOVE_CHANNEL: + return "REMOVE_CHANNEL"; + case NOTIFICATION_COMMAND_TYPE_SESSION_ROTATION_ONGOING: + return "SESSION_ROTATION_ONGOING"; + case NOTIFICATION_COMMAND_TYPE_SESSION_ROTATION_COMPLETED: + return "SESSION_ROTATION_COMPLETED"; + case NOTIFICATION_COMMAND_TYPE_ADD_TRACER_EVENT_SOURCE: + return "ADD_TRACER_EVENT_SOURCE"; + case NOTIFICATION_COMMAND_TYPE_REMOVE_TRACER_EVENT_SOURCE: + return "REMOVE_TRACER_EVENT_SOURCE"; + case NOTIFICATION_COMMAND_TYPE_LIST_TRIGGERS: + return "LIST_TRIGGERS"; + case NOTIFICATION_COMMAND_TYPE_QUIT: + return "QUIT"; + case NOTIFICATION_COMMAND_TYPE_CLIENT_COMMUNICATION_UPDATE: + return "CLIENT_COMMUNICATION_UPDATE"; + default: + abort(); + } +} + /* * Match trigger based on name and credentials only. * Name duplication is NOT allowed for the same uid. @@ -1609,7 +1651,7 @@ int handle_notification_thread_command_add_channel( DBG("[notification-thread] Adding channel %s from session %s, channel key = %" PRIu64 " in %s domain", channel_name, session_name, channel_key_int, - channel_domain == LTTNG_DOMAIN_KERNEL ? "kernel" : "user space"); + lttng_domain_type_str(channel_domain)); CDS_INIT_LIST_HEAD(&trigger_list); @@ -1710,7 +1752,7 @@ int handle_notification_thread_command_remove_channel( struct channel_info *channel_info; DBG("[notification-thread] Removing channel key = %" PRIu64 " in %s domain", - channel_key, domain == LTTNG_DOMAIN_KERNEL ? "kernel" : "user space"); + channel_key, lttng_domain_type_str(domain)); rcu_read_lock(); @@ -1948,6 +1990,59 @@ end: return ret; } +static +int drain_event_notifier_notification_pipe( + struct notification_thread_state *state, + int pipe, enum lttng_domain_type domain) +{ + struct lttng_poll_event events = {0}; + int ret; + + ret = lttng_poll_create(&events, 1, LTTNG_CLOEXEC); + if (ret < 0) { + ERR("[notification-thread] Error creating lttng_poll_event"); + goto end; + } + + ret = lttng_poll_add(&events, pipe, LPOLLIN); + if (ret < 0) { + ERR("[notification-thread] Error adding fd event notifier notification pipe to lttng_poll_event: fd = %d", + pipe); + goto end; + } + + while (true) { + /* + * Continue to consume notifications as long as there are new + * ones coming in. The tracer has been asked to stop producing + * them. + * + * LPOLLIN is explicitly checked since LPOLLHUP is implicitly + * monitored (on Linux, at least) and will be returned when + * the pipe is closed but empty. + */ + ret = lttng_poll_wait_interruptible(&events, 0); + if (ret == 0 || (LTTNG_POLL_GETEV(&events, 0) & LPOLLIN) == 0) { + /* No more notification to be read on this pipe. */ + ret = 0; + goto end; + } else if (ret < 0) { + PERROR("Failed on lttng_poll_wait_interruptible() call"); + ret = -1; + goto end; + } + + ret = handle_one_event_notifier_notification(state, pipe, domain); + if (ret) { + ERR("[notification-thread] Error consuming an event notifier notification from pipe: fd = %d", + pipe); + } + } +end: + lttng_poll_clean(&events); + return ret; +} + static int handle_notification_thread_command_remove_tracer_event_source( struct notification_thread_state *state, @@ -2004,6 +2099,24 @@ int handle_notification_thread_command_remove_tracer_event_source( source_element->is_fd_in_poll_set = false; + ret = drain_event_notifier_notification_pipe(state, tracer_event_source_fd, + source_element->domain); + if (ret) { + ERR("[notification-thread] Error draining event notifier notification: tracer_event_source_fd = %d, domain = %s", + tracer_event_source_fd, + lttng_domain_type_str(source_element->domain)); + cmd_result = LTTNG_ERR_FATAL; + goto end; + } + + /* + * The drain_event_notifier_notification_pipe() call might have read + * data from an fd that we received in event in the latest _poll_wait() + * call. Make sure the thread call poll_wait() again to ensure we have + * a clean state. + */ + state->restart_poll = true; + end: free(source_element); *_cmd_result = cmd_result; @@ -2565,11 +2678,15 @@ int handle_notification_thread_command_register_trigger( ret = evaluate_session_condition_for_client(condition, state, &evaluation, &object_uid, &object_gid); + LTTNG_OPTIONAL_SET(&object_creds.uid, object_uid); + LTTNG_OPTIONAL_SET(&object_creds.gid, object_gid); break; case LTTNG_OBJECT_TYPE_CHANNEL: ret = evaluate_channel_condition_for_client(condition, state, &evaluation, &object_uid, &object_gid); + LTTNG_OPTIONAL_SET(&object_creds.uid, object_uid); + LTTNG_OPTIONAL_SET(&object_creds.gid, object_gid); break; case LTTNG_OBJECT_TYPE_NONE: ret = 0; @@ -2585,9 +2702,6 @@ int handle_notification_thread_command_register_trigger( goto error_put_client_list; } - LTTNG_OPTIONAL_SET(&object_creds.uid, object_uid); - LTTNG_OPTIONAL_SET(&object_creds.gid, object_gid); - DBG("Newly registered trigger's condition evaluated to %s", evaluation ? "true" : "false"); if (!evaluation) { @@ -2789,22 +2903,22 @@ int handle_notification_thread_command( struct notification_thread_command, cmd_list_node); cds_list_del(&cmd->cmd_list_node); pthread_mutex_unlock(&handle->cmd_queue.lock); + + DBG("[notification-thread] Received `%s` command", + notification_command_type_str(cmd->type)); switch (cmd->type) { case NOTIFICATION_COMMAND_TYPE_REGISTER_TRIGGER: - DBG("[notification-thread] Received register trigger command"); ret = handle_notification_thread_command_register_trigger(state, cmd->parameters.register_trigger.trigger, &cmd->reply_code); break; case NOTIFICATION_COMMAND_TYPE_UNREGISTER_TRIGGER: - DBG("[notification-thread] Received unregister trigger command"); ret = handle_notification_thread_command_unregister_trigger( state, cmd->parameters.unregister_trigger.trigger, &cmd->reply_code); break; case NOTIFICATION_COMMAND_TYPE_ADD_CHANNEL: - DBG("[notification-thread] Received add channel command"); ret = handle_notification_thread_command_add_channel( state, cmd->parameters.add_channel.session.name, @@ -2817,7 +2931,6 @@ int handle_notification_thread_command( &cmd->reply_code); break; case NOTIFICATION_COMMAND_TYPE_REMOVE_CHANNEL: - DBG("[notification-thread] Received remove channel command"); ret = handle_notification_thread_command_remove_channel( state, cmd->parameters.remove_channel.key, cmd->parameters.remove_channel.domain, @@ -2825,9 +2938,6 @@ int handle_notification_thread_command( break; case NOTIFICATION_COMMAND_TYPE_SESSION_ROTATION_ONGOING: case NOTIFICATION_COMMAND_TYPE_SESSION_ROTATION_COMPLETED: - DBG("[notification-thread] Received session rotation %s command", - cmd->type == NOTIFICATION_COMMAND_TYPE_SESSION_ROTATION_ONGOING ? - "ongoing" : "completed"); ret = handle_notification_thread_command_session_rotation( state, cmd->type, @@ -2866,7 +2976,6 @@ int handle_notification_thread_command( break; } case NOTIFICATION_COMMAND_TYPE_QUIT: - DBG("[notification-thread] Received quit command"); cmd->reply_code = LTTNG_OK; ret = 1; goto end; @@ -4133,6 +4242,286 @@ end: return ret; } +static +struct lttng_event_notifier_notification *recv_one_event_notifier_notification( + int notification_pipe_read_fd, enum lttng_domain_type domain) +{ + int ret; + uint64_t token; + struct lttng_event_notifier_notification *notification = NULL; + char *capture_buffer = NULL; + size_t capture_buffer_size; + void *reception_buffer; + size_t reception_size; + + struct lttng_ust_abi_event_notifier_notification ust_notification; + struct lttng_kernel_event_notifier_notification kernel_notification; + + /* Init lttng_event_notifier_notification */ + switch(domain) { + case LTTNG_DOMAIN_UST: + reception_buffer = (void *) &ust_notification; + reception_size = sizeof(ust_notification); + break; + case LTTNG_DOMAIN_KERNEL: + reception_buffer = (void *) &kernel_notification; + reception_size = sizeof(kernel_notification); + break; + default: + abort(); + } + + /* + * The monitoring pipe only holds messages smaller than PIPE_BUF, + * ensuring that read/write of tracer notifications are atomic. + */ + ret = lttng_read(notification_pipe_read_fd, reception_buffer, + reception_size); + if (ret != reception_size) { + PERROR("Failed to read from event source notification pipe: fd = %d, size to read = %zu, ret = %d", + notification_pipe_read_fd, reception_size, ret); + ret = -1; + goto end; + } + + switch(domain) { + case LTTNG_DOMAIN_UST: + token = ust_notification.token; + capture_buffer_size = ust_notification.capture_buf_size; + break; + case LTTNG_DOMAIN_KERNEL: + token = kernel_notification.token; + capture_buffer_size = 0; + break; + default: + abort(); + } + + if (capture_buffer_size == 0) { + capture_buffer = NULL; + goto skip_capture; + } + + if (capture_buffer_size > MAX_CAPTURE_SIZE) { + ERR("[notification-thread] Event notifier has a capture payload size which exceeds the maximum allowed size: capture_payload_size = %zu bytes, max allowed size = %d bytes", + capture_buffer_size, MAX_CAPTURE_SIZE); + goto end; + } + + capture_buffer = zmalloc(capture_buffer_size); + if (!capture_buffer) { + ERR("[notification-thread] Failed to allocate capture buffer"); + goto end; + } + + /* Fetch additional payload (capture). */ + ret = lttng_read(notification_pipe_read_fd, capture_buffer, capture_buffer_size); + if (ret != capture_buffer_size) { + ERR("[notification-thread] Failed to read from event source pipe (fd = %i)", + notification_pipe_read_fd); + goto end; + } + +skip_capture: + notification = lttng_event_notifier_notification_create(token, domain, + capture_buffer, capture_buffer_size); + if (notification == NULL) { + goto end; + } + + /* + * Ownership transfered to the lttng_event_notifier_notification object. + */ + capture_buffer = NULL; + +end: + free(capture_buffer); + return notification; +} + +static +int dispatch_one_event_notifier_notification(struct notification_thread_state *state, + struct lttng_event_notifier_notification *notification) +{ + struct cds_lfht_node *node; + struct cds_lfht_iter iter; + struct notification_trigger_tokens_ht_element *element; + enum lttng_trigger_status trigger_status; + struct lttng_evaluation *evaluation = NULL; + enum action_executor_status executor_status; + struct notification_client_list *client_list = NULL; + const char *trigger_name; + int ret; + unsigned int capture_count = 0; + + /* Find triggers associated with this token. */ + rcu_read_lock(); + cds_lfht_lookup(state->trigger_tokens_ht, + hash_key_u64(¬ification->tracer_token, lttng_ht_seed), + match_trigger_token, ¬ification->tracer_token, &iter); + node = cds_lfht_iter_get_node(&iter); + if (caa_unlikely(!node)) { + /* + * This is not an error, slow consumption of the tracer + * notifications can lead to situations where a trigger is + * removed but we still get tracer notifications matching a + * trigger that no longer exists. + */ + ret = 0; + goto end_unlock; + } + + element = caa_container_of(node, + struct notification_trigger_tokens_ht_element, + node); + + if (!lttng_trigger_should_fire(element->trigger)) { + ret = 0; + goto end_unlock; + } + + lttng_trigger_fire(element->trigger); + + trigger_status = lttng_trigger_get_name(element->trigger, &trigger_name); + assert(trigger_status == LTTNG_TRIGGER_STATUS_OK); + + if (lttng_condition_event_rule_get_capture_descriptor_count( + lttng_trigger_get_const_condition(element->trigger), + &capture_count) != LTTNG_CONDITION_STATUS_OK) { + ERR("Failed to get capture count"); + ret = -1; + goto end; + } + + if (!notification->capture_buffer && capture_count != 0) { + ERR("Expected capture but capture buffer is null"); + ret = -1; + goto end; + } + + evaluation = lttng_evaluation_event_rule_create( + container_of(lttng_trigger_get_const_condition( + element->trigger), + struct lttng_condition_event_rule, + parent), + trigger_name, + notification->capture_buffer, + notification->capture_buf_size, false); + + if (evaluation == NULL) { + ERR("[notification-thread] Failed to create event rule hit evaluation while creating and enqueuing action executor job"); + ret = -1; + goto end_unlock; + } + client_list = get_client_list_from_condition(state, + lttng_trigger_get_const_condition(element->trigger)); + executor_status = action_executor_enqueue(state->executor, + element->trigger, evaluation, NULL, client_list); + switch (executor_status) { + case ACTION_EXECUTOR_STATUS_OK: + ret = 0; + break; + case ACTION_EXECUTOR_STATUS_OVERFLOW: + { + struct notification_client_list_element *client_list_element, + *tmp; + + /* + * Not a fatal error; this is expected and simply means the + * executor has too much work queued already. + */ + ret = 0; + + /* No clients subscribed to notifications for this trigger. */ + if (!client_list) { + break; + } + + /* Warn clients that a notification (or more) was dropped. */ + pthread_mutex_lock(&client_list->lock); + cds_list_for_each_entry_safe(client_list_element, tmp, + &client_list->list, node) { + enum client_transmission_status transmission_status; + struct notification_client *client = + client_list_element->client; + + pthread_mutex_lock(&client->lock); + ret = client_notification_overflow(client); + if (ret) { + /* Fatal error. */ + goto next_client; + } + + transmission_status = + client_flush_outgoing_queue(client); + ret = client_handle_transmission_status( + client, transmission_status, state); + if (ret) { + /* Fatal error. */ + goto next_client; + } +next_client: + pthread_mutex_unlock(&client->lock); + if (ret) { + break; + } + } + + pthread_mutex_unlock(&client_list->lock); + break; + } + case ACTION_EXECUTOR_STATUS_INVALID: + case ACTION_EXECUTOR_STATUS_ERROR: + /* Fatal error, shut down everything. */ + ERR("Fatal error encoutered while enqueuing action to the action executor"); + ret = -1; + goto end_unlock; + default: + /* Unhandled error. */ + abort(); + } + +end_unlock: + notification_client_list_put(client_list); + rcu_read_unlock(); +end: + return ret; +} + +static +int handle_one_event_notifier_notification( + struct notification_thread_state *state, + int pipe, enum lttng_domain_type domain) +{ + int ret = 0; + struct lttng_event_notifier_notification *notification = NULL; + + notification = recv_one_event_notifier_notification(pipe, domain); + if (notification == NULL) { + /* Reception failed, don't consider it fatal. */ + ERR("[notification-thread] Error receiving an event notifier notification from tracer: fd = %i, domain = %s", + pipe, lttng_domain_type_str(domain)); + goto end; + } + + ret = dispatch_one_event_notifier_notification(state, notification); + if (ret) { + ERR("[notification-thread] Error dispatching an event notifier notification from tracer: fd = %i, domain = %s", + pipe, lttng_domain_type_str(domain)); + goto end; + } + +end: + lttng_event_notifier_notification_destroy(notification); + return ret; +} + +int handle_notification_thread_event_notification(struct notification_thread_state *state, + int pipe, enum lttng_domain_type domain) +{ + return handle_one_event_notifier_notification(state, pipe, domain); +} + int handle_notification_thread_channel_sample( struct notification_thread_state *state, int pipe, enum lttng_domain_type domain) @@ -4186,8 +4575,7 @@ int handle_notification_thread_channel_sample( */ DBG("[notification-thread] Received a sample for an unknown channel from consumerd, key = %" PRIu64 " in %s domain", latest_sample.key.key, - domain == LTTNG_DOMAIN_KERNEL ? "kernel" : - "user space"); + lttng_domain_type_str(domain)); goto end_unlock; } channel_info = caa_container_of(node, struct channel_info,