sessiond: notification-thread: Missing action executor status handling
[lttng-tools.git] / src / bin / lttng-sessiond / notification-thread-events.c
index d620159246a54368d44fa9b9391f4007212c54f6..ccd285a8103d6a344bcb3344abbd982f079a0c13 100644 (file)
@@ -21,6 +21,8 @@
 #include <common/macros.h>
 #include <lttng/condition/condition.h>
 #include <lttng/action/action-internal.h>
+#include <lttng/action/group-internal.h>
+#include <lttng/domain-internal.h>
 #include <lttng/notification/notification-internal.h>
 #include <lttng/condition/condition-internal.h>
 #include <lttng/condition/buffer-usage-internal.h>
@@ -48,6 +50,9 @@
 #define CLIENT_POLL_MASK_IN (LPOLLIN | LPOLLERR | LPOLLHUP | LPOLLRDHUP)
 #define CLIENT_POLL_MASK_IN_OUT (CLIENT_POLL_MASK_IN | LPOLLOUT)
 
+/* The tracers currently limit the capture size to PIPE_BUF (4kb on linux). */
+#define MAX_CAPTURE_SIZE (PIPE_BUF)
+
 enum lttng_object_type {
        LTTNG_OBJECT_TYPE_UNKNOWN,
        LTTNG_OBJECT_TYPE_NONE,
@@ -125,19 +130,6 @@ struct lttng_condition_list_element {
        struct cds_list_head node;
 };
 
-/*
- * Facilities to carry the different notifications type in the action processing
- * code path.
- */
-struct lttng_event_notifier_notification {
-       union {
-               struct lttng_ust_event_notifier_notification *ust;
-               struct lttng_kernel_event_notifier_notification *kernel;
-       } notification;
-       uint64_t token;
-       enum lttng_domain_type type;
-};
-
 struct channel_state_sample {
        struct channel_key key;
        struct cds_lfht_node channel_state_ht_node;
@@ -206,6 +198,11 @@ int client_handle_transmission_status(
                enum client_transmission_status transmission_status,
                struct notification_thread_state *state);
 
+static
+int handle_one_event_notifier_notification(
+               struct notification_thread_state *state,
+               int pipe, enum lttng_domain_type domain);
+
 static
 void free_lttng_trigger_ht_element_rcu(struct rcu_head *node);
 
@@ -331,6 +328,38 @@ int match_session(struct cds_lfht_node *node, const void *key)
        return !strcmp(session_info->name, name);
 }
 
+static
+const char *notification_command_type_str(
+               enum notification_thread_command_type type)
+{
+       switch (type) {
+       case NOTIFICATION_COMMAND_TYPE_REGISTER_TRIGGER:
+               return "REGISTER_TRIGGER";
+       case NOTIFICATION_COMMAND_TYPE_UNREGISTER_TRIGGER:
+               return "UNREGISTER_TRIGGER";
+       case NOTIFICATION_COMMAND_TYPE_ADD_CHANNEL:
+               return "ADD_CHANNEL";
+       case NOTIFICATION_COMMAND_TYPE_REMOVE_CHANNEL:
+               return "REMOVE_CHANNEL";
+       case NOTIFICATION_COMMAND_TYPE_SESSION_ROTATION_ONGOING:
+               return "SESSION_ROTATION_ONGOING";
+       case NOTIFICATION_COMMAND_TYPE_SESSION_ROTATION_COMPLETED:
+               return "SESSION_ROTATION_COMPLETED";
+       case NOTIFICATION_COMMAND_TYPE_ADD_TRACER_EVENT_SOURCE:
+               return "ADD_TRACER_EVENT_SOURCE";
+       case NOTIFICATION_COMMAND_TYPE_REMOVE_TRACER_EVENT_SOURCE:
+               return "REMOVE_TRACER_EVENT_SOURCE";
+       case NOTIFICATION_COMMAND_TYPE_LIST_TRIGGERS:
+               return "LIST_TRIGGERS";
+       case NOTIFICATION_COMMAND_TYPE_QUIT:
+               return "QUIT";
+       case NOTIFICATION_COMMAND_TYPE_CLIENT_COMMUNICATION_UPDATE:
+               return "CLIENT_COMMUNICATION_UPDATE";
+       default:
+               abort();
+       }
+}
+
 /*
  * Match trigger based on name and credentials only.
  * Name duplication is NOT allowed for the same uid.
@@ -1622,7 +1651,7 @@ int handle_notification_thread_command_add_channel(
 
        DBG("[notification-thread] Adding channel %s from session %s, channel key = %" PRIu64 " in %s domain",
                        channel_name, session_name, channel_key_int,
-                       channel_domain == LTTNG_DOMAIN_KERNEL ? "kernel" : "user space");
+                       lttng_domain_type_str(channel_domain));
 
        CDS_INIT_LIST_HEAD(&trigger_list);
 
@@ -1723,7 +1752,7 @@ int handle_notification_thread_command_remove_channel(
        struct channel_info *channel_info;
 
        DBG("[notification-thread] Removing channel key = %" PRIu64 " in %s domain",
-                       channel_key, domain == LTTNG_DOMAIN_KERNEL ? "kernel" : "user space");
+                       channel_key, lttng_domain_type_str(domain));
 
        rcu_read_lock();
 
@@ -1961,6 +1990,59 @@ end:
        return ret;
 }
 
+static
+int drain_event_notifier_notification_pipe(
+               struct notification_thread_state *state,
+               int pipe, enum lttng_domain_type domain)
+{
+       struct lttng_poll_event events = {0};
+       int ret;
+
+       ret = lttng_poll_create(&events, 1, LTTNG_CLOEXEC);
+       if (ret < 0) {
+               ERR("[notification-thread] Error creating lttng_poll_event");
+               goto end;
+       }
+
+       ret = lttng_poll_add(&events, pipe, LPOLLIN);
+       if (ret < 0) {
+               ERR("[notification-thread] Error adding fd event notifier notification pipe to lttng_poll_event: fd = %d",
+                               pipe);
+               goto end;
+       }
+
+       while (true) {
+               /*
+                * Continue to consume notifications as long as there are new
+                * ones coming in. The tracer has been asked to stop producing
+                * them.
+                *
+                * LPOLLIN is explicitly checked since LPOLLHUP is implicitly
+                * monitored (on Linux, at least) and will be returned when
+                * the pipe is closed but empty.
+                */
+               ret = lttng_poll_wait_interruptible(&events, 0);
+               if (ret == 0 || (LTTNG_POLL_GETEV(&events, 0) & LPOLLIN) == 0) {
+                       /* No more notification to be read on this pipe. */
+                       ret = 0;
+                       goto end;
+               } else if (ret < 0) {
+                       PERROR("Failed on lttng_poll_wait_interruptible() call");
+                       ret = -1;
+                       goto end;
+               }
+
+               ret = handle_one_event_notifier_notification(state, pipe, domain);
+               if (ret) {
+                       ERR("[notification-thread] Error consuming an event notifier notification from pipe: fd = %d",
+                                       pipe);
+               }
+       }
+end:
+       lttng_poll_clean(&events);
+       return ret;
+}
+
 static
 int handle_notification_thread_command_remove_tracer_event_source(
                struct notification_thread_state *state,
@@ -2017,6 +2099,24 @@ int handle_notification_thread_command_remove_tracer_event_source(
 
        source_element->is_fd_in_poll_set = false;
 
+       ret = drain_event_notifier_notification_pipe(state, tracer_event_source_fd,
+                       source_element->domain);
+       if (ret) {
+               ERR("[notification-thread] Error draining event notifier notification: tracer_event_source_fd = %d, domain = %s",
+                               tracer_event_source_fd,
+                               lttng_domain_type_str(source_element->domain));
+               cmd_result = LTTNG_ERR_FATAL;
+               goto end;
+       }
+
+       /*
+        * The drain_event_notifier_notification_pipe() call might have read
+        * data from an fd that we received in event in the latest _poll_wait()
+        * call. Make sure the thread call poll_wait() again to ensure we have
+        * a clean state.
+        */
+       state->restart_poll = true;
+
 end:
        free(source_element);
        *_cmd_result = cmd_result;
@@ -2578,11 +2678,15 @@ int handle_notification_thread_command_register_trigger(
                ret = evaluate_session_condition_for_client(condition, state,
                                &evaluation, &object_uid,
                                &object_gid);
+               LTTNG_OPTIONAL_SET(&object_creds.uid, object_uid);
+               LTTNG_OPTIONAL_SET(&object_creds.gid, object_gid);
                break;
        case LTTNG_OBJECT_TYPE_CHANNEL:
                ret = evaluate_channel_condition_for_client(condition, state,
                                &evaluation, &object_uid,
                                &object_gid);
+               LTTNG_OPTIONAL_SET(&object_creds.uid, object_uid);
+               LTTNG_OPTIONAL_SET(&object_creds.gid, object_gid);
                break;
        case LTTNG_OBJECT_TYPE_NONE:
                ret = 0;
@@ -2598,9 +2702,6 @@ int handle_notification_thread_command_register_trigger(
                goto error_put_client_list;
        }
 
-       LTTNG_OPTIONAL_SET(&object_creds.uid, object_uid);
-       LTTNG_OPTIONAL_SET(&object_creds.gid, object_gid);
-
        DBG("Newly registered trigger's condition evaluated to %s",
                        evaluation ? "true" : "false");
        if (!evaluation) {
@@ -2802,22 +2903,22 @@ int handle_notification_thread_command(
                        struct notification_thread_command, cmd_list_node);
        cds_list_del(&cmd->cmd_list_node);
        pthread_mutex_unlock(&handle->cmd_queue.lock);
+
+       DBG("[notification-thread] Received `%s` command",
+                       notification_command_type_str(cmd->type));
        switch (cmd->type) {
        case NOTIFICATION_COMMAND_TYPE_REGISTER_TRIGGER:
-               DBG("[notification-thread] Received register trigger command");
                ret = handle_notification_thread_command_register_trigger(state,
                                cmd->parameters.register_trigger.trigger,
                                &cmd->reply_code);
                break;
        case NOTIFICATION_COMMAND_TYPE_UNREGISTER_TRIGGER:
-               DBG("[notification-thread] Received unregister trigger command");
                ret = handle_notification_thread_command_unregister_trigger(
                                state,
                                cmd->parameters.unregister_trigger.trigger,
                                &cmd->reply_code);
                break;
        case NOTIFICATION_COMMAND_TYPE_ADD_CHANNEL:
-               DBG("[notification-thread] Received add channel command");
                ret = handle_notification_thread_command_add_channel(
                                state,
                                cmd->parameters.add_channel.session.name,
@@ -2830,7 +2931,6 @@ int handle_notification_thread_command(
                                &cmd->reply_code);
                break;
        case NOTIFICATION_COMMAND_TYPE_REMOVE_CHANNEL:
-               DBG("[notification-thread] Received remove channel command");
                ret = handle_notification_thread_command_remove_channel(
                                state, cmd->parameters.remove_channel.key,
                                cmd->parameters.remove_channel.domain,
@@ -2838,9 +2938,6 @@ int handle_notification_thread_command(
                break;
        case NOTIFICATION_COMMAND_TYPE_SESSION_ROTATION_ONGOING:
        case NOTIFICATION_COMMAND_TYPE_SESSION_ROTATION_COMPLETED:
-               DBG("[notification-thread] Received session rotation %s command",
-                               cmd->type == NOTIFICATION_COMMAND_TYPE_SESSION_ROTATION_ONGOING ?
-                               "ongoing" : "completed");
                ret = handle_notification_thread_command_session_rotation(
                                state,
                                cmd->type,
@@ -2879,7 +2976,6 @@ int handle_notification_thread_command(
                break;
        }
        case NOTIFICATION_COMMAND_TYPE_QUIT:
-               DBG("[notification-thread] Received quit command");
                cmd->reply_code = LTTNG_OK;
                ret = 1;
                goto end;
@@ -4146,34 +4242,30 @@ end:
        return ret;
 }
 
-int handle_notification_thread_event_notification(struct notification_thread_state *state,
-               int notification_pipe_read_fd,
-               enum lttng_domain_type domain)
+static
+struct lttng_event_notifier_notification *recv_one_event_notifier_notification(
+               int notification_pipe_read_fd, enum lttng_domain_type domain)
 {
        int ret;
-       struct lttng_ust_event_notifier_notification ust_notification;
-       struct lttng_kernel_event_notifier_notification kernel_notification;
-       struct cds_lfht_node *node;
-       struct cds_lfht_iter iter;
-       struct notification_trigger_tokens_ht_element *element;
-       enum lttng_action_type action_type;
-       const struct lttng_action *action;
-       struct lttng_event_notifier_notification notification;
+       uint64_t token;
+       struct lttng_event_notifier_notification *notification = NULL;
+       char *capture_buffer = NULL;
+       size_t capture_buffer_size;
        void *reception_buffer;
        size_t reception_size;
 
-       notification.type = domain;
+       struct lttng_ust_abi_event_notifier_notification ust_notification;
+       struct lttng_kernel_event_notifier_notification kernel_notification;
 
+       /* Init lttng_event_notifier_notification */
        switch(domain) {
        case LTTNG_DOMAIN_UST:
                reception_buffer = (void *) &ust_notification;
                reception_size = sizeof(ust_notification);
-               notification.notification.ust = &ust_notification;
                break;
        case LTTNG_DOMAIN_KERNEL:
                reception_buffer = (void *) &kernel_notification;
                reception_size = sizeof(kernel_notification);
-               notification.notification.kernel = &kernel_notification;
                break;
        default:
                abort();
@@ -4194,26 +4286,86 @@ int handle_notification_thread_event_notification(struct notification_thread_sta
 
        switch(domain) {
        case LTTNG_DOMAIN_UST:
-               notification.token = ust_notification.token;
+               token = ust_notification.token;
+               capture_buffer_size = ust_notification.capture_buf_size;
                break;
        case LTTNG_DOMAIN_KERNEL:
-               notification.token = kernel_notification.token;
+               token = kernel_notification.token;
+               capture_buffer_size = 0;
                break;
        default:
                abort();
        }
 
+       if (capture_buffer_size == 0) {
+               capture_buffer = NULL;
+               goto skip_capture;
+       }
+
+       if (capture_buffer_size > MAX_CAPTURE_SIZE) {
+               ERR("[notification-thread] Event notifier has a capture payload size which exceeds the maximum allowed size: capture_payload_size = %zu bytes, max allowed size = %d bytes",
+                               capture_buffer_size, MAX_CAPTURE_SIZE);
+               goto end;
+       }
+
+       capture_buffer = zmalloc(capture_buffer_size);
+       if (!capture_buffer) {
+               ERR("[notification-thread] Failed to allocate capture buffer");
+               goto end;
+       }
+
+       /* Fetch additional payload (capture). */
+       ret = lttng_read(notification_pipe_read_fd, capture_buffer, capture_buffer_size);
+       if (ret != capture_buffer_size) {
+               ERR("[notification-thread] Failed to read from event source pipe (fd = %i)",
+                               notification_pipe_read_fd);
+               goto end;
+       }
+
+skip_capture:
+       notification = lttng_event_notifier_notification_create(token, domain,
+                       capture_buffer, capture_buffer_size);
+       if (notification == NULL) {
+               goto end;
+       }
+
+       /*
+        * Ownership transfered to the lttng_event_notifier_notification object.
+        */
+       capture_buffer = NULL;
+
+end:
+       free(capture_buffer);
+       return notification;
+}
+
+static
+int dispatch_one_event_notifier_notification(struct notification_thread_state *state,
+               struct lttng_event_notifier_notification *notification)
+{
+       struct cds_lfht_node *node;
+       struct cds_lfht_iter iter;
+       struct notification_trigger_tokens_ht_element *element;
+       enum lttng_trigger_status trigger_status;
+       struct lttng_evaluation *evaluation = NULL;
+       enum action_executor_status executor_status;
+       struct notification_client_list *client_list = NULL;
+       const char *trigger_name;
+       int ret;
+       unsigned int capture_count = 0;
+
        /* Find triggers associated with this token. */
        rcu_read_lock();
        cds_lfht_lookup(state->trigger_tokens_ht,
-                       hash_key_u64(&notification.token, lttng_ht_seed),
-                       match_trigger_token, &notification.token, &iter);
+                       hash_key_u64(&notification->tracer_token, lttng_ht_seed),
+                       match_trigger_token, &notification->tracer_token, &iter);
        node = cds_lfht_iter_get_node(&iter);
-       if (caa_likely(!node)) {
+       if (caa_unlikely(!node)) {
                /*
-                * This is not an error, slow consumption of the pipe can lead
-                * to situations where a trigger is removed but we still get
-                * tracer notification matching to a previous trigger.
+                * This is not an error, slow consumption of the tracer
+                * notifications can lead to situations where a trigger is
+                * removed but we still get tracer notifications matching a
+                * trigger that no longer exists.
                 */
                ret = 0;
                goto end_unlock;
@@ -4223,22 +4375,153 @@ int handle_notification_thread_event_notification(struct notification_thread_sta
                        struct notification_trigger_tokens_ht_element,
                        node);
 
-       action = lttng_trigger_get_const_action(element->trigger);
-       action_type = lttng_action_get_type(action);
-       DBG("Received message from tracer event source: event source fd = %d, token = %" PRIu64 ", action type = '%s'",
-                       notification_pipe_read_fd, notification.token,
-                       lttng_action_type_string(action_type));
+       if (!lttng_trigger_should_fire(element->trigger)) {
+               ret = 0;
+               goto end_unlock;
+       }
 
-       /* TODO: Perform actions */
+       lttng_trigger_fire(element->trigger);
 
-       ret = 0;
+       trigger_status = lttng_trigger_get_name(element->trigger, &trigger_name);
+       assert(trigger_status == LTTNG_TRIGGER_STATUS_OK);
+
+       if (lttng_condition_event_rule_get_capture_descriptor_count(
+                           lttng_trigger_get_const_condition(element->trigger),
+                           &capture_count) != LTTNG_CONDITION_STATUS_OK) {
+               ERR("Failed to get capture count");
+               ret = -1;
+               goto end;
+       }
+
+       if (!notification->capture_buffer && capture_count != 0) {
+               ERR("Expected capture but capture buffer is null");
+               ret = -1;
+               goto end;
+       }
+
+       evaluation = lttng_evaluation_event_rule_create(
+                       container_of(lttng_trigger_get_const_condition(
+                                                    element->trigger),
+                                       struct lttng_condition_event_rule,
+                                       parent),
+                       trigger_name,
+                       notification->capture_buffer,
+                       notification->capture_buf_size, false);
+
+       if (evaluation == NULL) {
+               ERR("[notification-thread] Failed to create event rule hit evaluation while creating and enqueuing action executor job");
+               ret = -1;
+               goto end_unlock;
+       }
+       client_list = get_client_list_from_condition(state,
+                       lttng_trigger_get_const_condition(element->trigger));
+       executor_status = action_executor_enqueue(state->executor,
+                       element->trigger, evaluation, NULL, client_list);
+       switch (executor_status) {
+       case ACTION_EXECUTOR_STATUS_OK:
+               ret = 0;
+               break;
+       case ACTION_EXECUTOR_STATUS_OVERFLOW:
+       {
+               struct notification_client_list_element *client_list_element,
+                               *tmp;
+
+               /*
+                * Not a fatal error; this is expected and simply means the
+                * executor has too much work queued already.
+                */
+               ret = 0;
+
+               /* No clients subscribed to notifications for this trigger. */
+               if (!client_list) {
+                       break;
+               }
+
+               /* Warn clients that a notification (or more) was dropped. */
+               pthread_mutex_lock(&client_list->lock);
+               cds_list_for_each_entry_safe(client_list_element, tmp,
+                               &client_list->list, node) {
+                       enum client_transmission_status transmission_status;
+                       struct notification_client *client =
+                                       client_list_element->client;
+
+                       pthread_mutex_lock(&client->lock);
+                       ret = client_notification_overflow(client);
+                       if (ret) {
+                               /* Fatal error. */
+                               goto next_client;
+                       }
+
+                       transmission_status =
+                                       client_flush_outgoing_queue(client);
+                       ret = client_handle_transmission_status(
+                                       client, transmission_status, state);
+                       if (ret) {
+                               /* Fatal error. */
+                               goto next_client;
+                       }
+next_client:
+                       pthread_mutex_unlock(&client->lock);
+                       if (ret) {
+                               break;
+                       }
+               }
+
+               pthread_mutex_unlock(&client_list->lock);
+               break;
+       }
+       case ACTION_EXECUTOR_STATUS_INVALID:
+       case ACTION_EXECUTOR_STATUS_ERROR:
+               /* Fatal error, shut down everything. */
+               ERR("Fatal error encoutered while enqueuing action to the action executor");
+               ret = -1;
+               goto end_unlock;
+       default:
+               /* Unhandled error. */
+               abort();
+       }
 
 end_unlock:
+       notification_client_list_put(client_list);
        rcu_read_unlock();
 end:
        return ret;
 }
 
+static
+int handle_one_event_notifier_notification(
+               struct notification_thread_state *state,
+               int pipe, enum lttng_domain_type domain)
+{
+       int ret = 0;
+       struct lttng_event_notifier_notification *notification = NULL;
+
+       notification = recv_one_event_notifier_notification(pipe, domain);
+       if (notification == NULL) {
+               /* Reception failed, don't consider it fatal. */
+               ERR("[notification-thread] Error receiving an event notifier notification from tracer: fd = %i, domain = %s",
+                               pipe, lttng_domain_type_str(domain));
+               goto end;
+       }
+
+       ret = dispatch_one_event_notifier_notification(state, notification);
+       if (ret) {
+               ERR("[notification-thread] Error dispatching an event notifier notification from tracer: fd = %i, domain = %s",
+                               pipe, lttng_domain_type_str(domain));
+               goto end;
+       }
+
+end:
+       lttng_event_notifier_notification_destroy(notification);
+       return ret;
+}
+
+int handle_notification_thread_event_notification(struct notification_thread_state *state,
+               int pipe, enum lttng_domain_type domain)
+{
+       return handle_one_event_notifier_notification(state, pipe, domain);
+}
+
 int handle_notification_thread_channel_sample(
                struct notification_thread_state *state, int pipe,
                enum lttng_domain_type domain)
@@ -4292,8 +4575,7 @@ int handle_notification_thread_channel_sample(
                 */
                DBG("[notification-thread] Received a sample for an unknown channel from consumerd, key = %" PRIu64 " in %s domain",
                                latest_sample.key.key,
-                               domain == LTTNG_DOMAIN_KERNEL ? "kernel" :
-                                       "user space");
+                               lttng_domain_type_str(domain));
                goto end_unlock;
        }
        channel_info = caa_container_of(node, struct channel_info,
This page took 0.02996 seconds and 4 git commands to generate.