notification-thread: remove fd from pollset on LPOLLHUP and friends
authorFrancis Deslauriers <francis.deslauriers@efficios.com>
Wed, 26 May 2021 20:05:16 +0000 (16:05 -0400)
committerJérémie Galarneau <jeremie.galarneau@efficios.com>
Mon, 12 Jul 2021 21:35:19 +0000 (17:35 -0400)
When an app dies, it's possible that the notification thread gets an
epoll event (`LPOLLHUP`) that the socket was closed before it gets the
_REMOVE_TRACER_SOURCE command for that source.

In such cases, the notification thread should simply remove the file
descriptor from the pollset and drain the notification on that file
descriptor. It should _not_ remove the _source_element object from the
list.

The removal from the list should only be done when it receives the
_REMOVE_TRACER_SOURCE command.

Signed-off-by: Francis Deslauriers <francis.deslauriers@efficios.com>
Signed-off-by: Jérémie Galarneau <jeremie.galarneau@efficios.com>
Change-Id: I9525315f9e92d0f6ae5e84e26b83a6b7207dce54

src/bin/lttng-sessiond/notification-thread-events.c
src/bin/lttng-sessiond/notification-thread-events.h
src/bin/lttng-sessiond/notification-thread.c

index c7037a482bfcdeccf70e0159089467196b690a79..c5ee4b11ed02052a77f014103b675e93336d6f71 100644 (file)
@@ -2110,95 +2110,115 @@ end:
 }
 
 static
-int handle_notification_thread_command_remove_tracer_event_source(
-               struct notification_thread_state *state,
-               int tracer_event_source_fd,
-               enum lttng_error_code *_cmd_result)
+struct notification_event_tracer_event_source_element *
+find_tracer_event_source_element(struct notification_thread_state *state,
+               int tracer_event_source_fd)
 {
-       int ret = 0;
-       bool found = false;
-       enum lttng_error_code cmd_result = LTTNG_OK;
-       struct notification_event_tracer_event_source_element *source_element = NULL, *tmp;
+       struct notification_event_tracer_event_source_element *source_element;
 
-       cds_list_for_each_entry_safe(source_element, tmp,
+       cds_list_for_each_entry(source_element,
                        &state->tracer_event_sources_list, node) {
-               if (source_element->fd != tracer_event_source_fd) {
-                       continue;
+               if (source_element->fd == tracer_event_source_fd) {
+                       goto end;
                }
-
-               DBG("Removed tracer event source from poll set: tracer_event_source_fd = %d, domain = '%s'",
-                               tracer_event_source_fd,
-                               lttng_domain_type_str(source_element->domain));
-               cds_list_del(&source_element->node);
-               found = true;
-               break;
        }
 
-       if (!found) {
-               /*
-                * This is temporarily allowed since the poll activity set is
-                * not properly cleaned-up for the moment. This is adressed in
-                * an upcoming fix.
-                */
-               source_element = NULL;
-               goto end;
-       }
+       source_element = NULL;
+end:
+       return NULL;
+}
 
-       if (!source_element->is_fd_in_poll_set) {
-               /* Skip the poll set removal. */
-               goto end;
-       }
+static
+int remove_tracer_event_source_from_pollset(
+               struct notification_thread_state *state,
+               struct notification_event_tracer_event_source_element *source_element)
+{
+       int ret = 0;
+
+       assert(source_element->is_fd_in_poll_set);
 
        DBG3("Removing tracer event source from poll set: tracer_event_source_fd = %d, domain = '%s'",
-                       tracer_event_source_fd,
+                       source_element->fd,
                        lttng_domain_type_str(source_element->domain));
 
        /* Removing the fd from the event poll set. */
-       ret = lttng_poll_del(&state->events, tracer_event_source_fd);
+       ret = lttng_poll_del(&state->events, source_element->fd);
        if (ret < 0) {
                ERR("Failed to remove tracer event source from poll set: tracer_event_source_fd = %d, domain = '%s'",
-                               tracer_event_source_fd,
+                               source_element->fd,
                                lttng_domain_type_str(source_element->domain));
-               cmd_result = LTTNG_ERR_FATAL;
+               ret = -1;
                goto end;
        }
 
        source_element->is_fd_in_poll_set = false;
 
-       ret = drain_event_notifier_notification_pipe(state, tracer_event_source_fd,
+       ret = drain_event_notifier_notification_pipe(state, source_element->fd,
                        source_element->domain);
        if (ret) {
                ERR("Error draining event notifier notification: tracer_event_source_fd = %d, domain = %s",
-                               tracer_event_source_fd,
+                               source_element->fd,
                                lttng_domain_type_str(source_element->domain));
-               cmd_result = LTTNG_ERR_FATAL;
+               ret = -1;
                goto end;
        }
 
-       /*
-        * The drain_event_notifier_notification_pipe() call might have read
-        * data from an fd that we received in event in the latest _poll_wait()
-        * call. Make sure the thread call poll_wait() again to ensure we have
-        * a clean state.
-        */
-       state->restart_poll = true;
-
 end:
-       free(source_element);
-       *_cmd_result = cmd_result;
        return ret;
 }
 
-int handle_notification_thread_remove_tracer_event_source_no_result(
+int handle_notification_thread_tracer_event_source_died(
                struct notification_thread_state *state,
                int tracer_event_source_fd)
 {
-       int ret;
-       enum lttng_error_code cmd_result;
+       int ret = 0;
+       struct notification_event_tracer_event_source_element *source_element;
+
+       source_element = find_tracer_event_source_element(state,
+                       tracer_event_source_fd);
+
+       assert(source_element);
+
+       ret = remove_tracer_event_source_from_pollset(state, source_element);
+       if (ret) {
+               ERR("Failed to remove dead tracer event source from poll set");
+       }
 
-       ret = handle_notification_thread_command_remove_tracer_event_source(
-                       state, tracer_event_source_fd, &cmd_result);
-       (void) cmd_result;
+       return ret;
+}
+
+static
+int handle_notification_thread_command_remove_tracer_event_source(
+               struct notification_thread_state *state,
+               int tracer_event_source_fd,
+               enum lttng_error_code *_cmd_result)
+{
+       int ret = 0;
+       enum lttng_error_code cmd_result = LTTNG_OK;
+       struct notification_event_tracer_event_source_element *source_element = NULL;
+
+       source_element = find_tracer_event_source_element(state,
+                       tracer_event_source_fd);
+
+       assert(source_element);
+
+       /* Remove the tracer source from the list. */
+       cds_list_del(&source_element->node);
+
+       if (!source_element->is_fd_in_poll_set) {
+               /* Skip the poll set removal. */
+               goto end;
+       }
+
+       ret = remove_tracer_event_source_from_pollset(state, source_element);
+       if (ret) {
+               ERR("Failed to remove tracer event source from poll set");
+               cmd_result = LTTNG_ERR_FATAL;
+       }
+
+end:
+       free(source_element);
+       *_cmd_result = cmd_result;
        return ret;
 }
 
index dffd26773ad328dfea75b7a63f04ca5581259217..265719181638a64cfe6f4d8727adea65486c3d10 100644 (file)
@@ -32,9 +32,9 @@ int handle_notification_thread_client_disconnect_all(
 int handle_notification_thread_trigger_unregister_all(
                struct notification_thread_state *state);
 
-int handle_notification_thread_remove_tracer_event_source_no_result(
-               struct notification_thread_state *state,
-               int tracer_event_source_fd);
+int handle_notification_thread_tracer_event_source_died(
+               struct notification_thread_state *state,
+               int tracer_event_source_fd);
 
 int handle_notification_thread_client_in(
                struct notification_thread_state *state,
index d2f5a944129d21e49c9b4a9b2b9ce1ec85952d56..0b39a5a3e61cb489c8340ef51a12926a3a76959a 100644 (file)
@@ -568,7 +568,7 @@ static int handle_event_notification_pipe(int event_source_fd,
        int ret = 0;
 
        if (revents & (LPOLLERR | LPOLLHUP | LPOLLRDHUP)) {
-               ret = handle_notification_thread_remove_tracer_event_source_no_result(
+               ret = handle_notification_thread_tracer_event_source_died(
                                state, event_source_fd);
                if (ret) {
                        ERR("Failed to remove event notification pipe from poll set: fd = %d",
This page took 0.030981 seconds and 4 git commands to generate.