Fix: agent may not be ready on launch
[lttng-tools.git] / src / bin / lttng-sessiond / agent-thread.c
index 5f4815165fed49c113a8e9ce875c5a374890b7c6..ddc6c7e62aeab3c5e058b0ddd0225eb76ffc1d1d 100644 (file)
@@ -15,7 +15,6 @@
  * Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
  */
 
-#define _GNU_SOURCE
 #define _LGPL_SOURCE
 #include <assert.h>
 
@@ -28,6 +27,7 @@
 
 #include "fd-limit.h"
 #include "agent-thread.h"
+#include "agent.h"
 #include "lttng-sessiond.h"
 #include "session.h"
 #include "utils.h"
@@ -73,53 +73,6 @@ static void update_agent_app(struct agent_app *app)
        session_unlock_list();
 }
 
-/*
- * Destroy a agent application by socket.
- */
-static void destroy_agent_app(int sock)
-{
-       struct agent_app *app;
-
-       assert(sock >= 0);
-
-       /*
-        * Not finding an application is a very important error that should NEVER
-        * happen. The hash table deletion is ONLY done through this call even on
-        * thread cleanup.
-        */
-       rcu_read_lock();
-       app = agent_find_app_by_sock(sock);
-       assert(app);
-       rcu_read_unlock();
-
-       /* RCU read side lock is taken in this function call. */
-       agent_delete_app(app);
-
-       /* The application is freed in a RCU call but the socket is closed here. */
-       agent_destroy_app(app);
-}
-
-/*
- * Cleanup remaining agent apps in the hash table. This should only be called in
- * the exit path of the thread.
- */
-static void clean_agent_apps_ht(void)
-{
-       struct lttng_ht_node_ulong *node;
-       struct lttng_ht_iter iter;
-
-       DBG3("[agent-thread] Cleaning agent apps ht");
-
-       rcu_read_lock();
-       cds_lfht_for_each_entry(agent_apps_ht_by_sock->ht, &iter.iter, node, node) {
-               struct agent_app *app;
-
-               app = caa_container_of(node, struct agent_app, node);
-               destroy_agent_app(app->sock->fd);
-       }
-       rcu_read_unlock();
-}
-
 /*
  * Create and init socket from uri.
  */
@@ -135,8 +88,8 @@ static struct lttcomm_sock *init_tcp_socket(void)
         */
        ret = uri_parse(default_reg_uri, &uri);
        assert(ret);
-       assert(agent_tcp_port);
-       uri->port = agent_tcp_port;
+       assert(config.agent_tcp_port);
+       uri->port = config.agent_tcp_port;
 
        sock = lttcomm_alloc_sock_from_uri(uri);
        uri_free(uri);
@@ -163,7 +116,7 @@ static struct lttcomm_sock *init_tcp_socket(void)
        }
 
        DBG("[agent-thread] Listening on TCP port %u and socket %d",
-                       agent_tcp_port, sock->fd);
+                       config.agent_tcp_port, sock->fd);
 
        return sock;
 
@@ -181,7 +134,7 @@ static void destroy_tcp_socket(struct lttcomm_sock *sock)
 {
        assert(sock);
 
-       DBG3("[agent-thread] Destroy TCP socket on port %u", agent_tcp_port);
+       DBG3("[agent-thread] Destroy TCP socket on port %u", config.agent_tcp_port);
 
        /* This will return gracefully if fd is invalid. */
        sock->ops->close(sock);
@@ -295,11 +248,12 @@ void *agent_thread_manage_registration(void *data)
        }
 
        reg_sock = init_tcp_socket();
+       sessiond_notify_ready();
        if (!reg_sock) {
                goto error_tcp_socket;
        }
 
-       /* Add create valid TCP socket to poll set. */
+       /* Add TCP socket to poll set. */
        ret = lttng_poll_add(&events, reg_sock->fd,
                        LPOLLIN | LPOLLERR | LPOLLHUP | LPOLLRDHUP);
        if (ret < 0) {
@@ -307,8 +261,7 @@ void *agent_thread_manage_registration(void *data)
        }
 
        while (1) {
-               DBG3("[agent-thread] Manage agent polling",
-                               LTTNG_POLL_GETNB(&events));
+               DBG3("[agent-thread] Manage agent polling");
 
                /* Inifinite blocking call, waiting for transmission */
 restart:
@@ -343,39 +296,26 @@ restart:
                                goto exit;
                        }
 
-                       /*
-                        * Check first if this is a POLLERR since POLLIN is also included
-                        * in an error value thus checking first.
-                        */
-                       if (revents & (LPOLLERR | LPOLLHUP | LPOLLRDHUP)) {
-                               /* Removing from the poll set */
-                               ret = lttng_poll_del(&events, pollfd);
-                               if (ret < 0) {
-                                       goto error;
-                               }
-
-                               destroy_agent_app(pollfd);
-                       } else if (revents & (LPOLLIN)) {
+                       if (revents & LPOLLIN) {
                                int new_fd;
                                struct agent_app *app = NULL;
 
-                               /* Pollin event of agent app socket should NEVER happen. */
                                assert(pollfd == reg_sock->fd);
-
                                new_fd = handle_registration(reg_sock, &app);
                                if (new_fd < 0) {
-                                       WARN("[agent-thread] agent registration failed. Ignoring.");
-                                       /* Somehow the communication failed. Just continue. */
                                        continue;
                                }
                                /* Should not have a NULL app on success. */
                                assert(app);
 
-                               /* Only add poll error event to only detect shutdown. */
+                               /*
+                                * Since this is a command socket (write then read),
+                                * only add poll error event to only detect shutdown.
+                                */
                                ret = lttng_poll_add(&events, new_fd,
                                                LPOLLERR | LPOLLHUP | LPOLLRDHUP);
                                if (ret < 0) {
-                                       destroy_agent_app(new_fd);
+                                       agent_destroy_app_by_sock(new_fd);
                                        continue;
                                }
 
@@ -383,10 +323,26 @@ restart:
                                update_agent_app(app);
 
                                /* On failure, the poll will detect it and clean it up. */
-                               (void) agent_send_registration_done(app);
+                               ret = agent_send_registration_done(app);
+                               if (ret < 0) {
+                                       /* Removing from the poll set */
+                                       ret = lttng_poll_del(&events, new_fd);
+                                       if (ret < 0) {
+                                               goto error;
+                                       }
+                                       agent_destroy_app_by_sock(new_fd);
+                                       continue;
+                               }
+                       } else if (revents & (LPOLLERR | LPOLLHUP | LPOLLRDHUP)) {
+                               /* Removing from the poll set */
+                               ret = lttng_poll_del(&events, pollfd);
+                               if (ret < 0) {
+                                       goto error;
+                               }
+                               agent_destroy_app_by_sock(pollfd);
                        } else {
-                               ERR("Unknown poll events %u for sock %d", revents, pollfd);
-                               continue;
+                               ERR("Unexpected poll events %u for sock %d", revents, pollfd);
+                               goto error;
                        }
                }
        }
@@ -401,11 +357,6 @@ error_tcp_socket:
 error_poll_create:
        DBG("[agent-thread] is cleaning up and stopping.");
 
-       if (agent_apps_ht_by_sock) {
-               clean_agent_apps_ht();
-               lttng_ht_destroy(agent_apps_ht_by_sock);
-       }
-
        rcu_thread_offline();
        rcu_unregister_thread();
        return NULL;
This page took 0.025748 seconds and 4 git commands to generate.