Add env var UST_REGISTER_TIMEOUT
[lttng-ust.git] / libust / lttng-ust-comm.c
index 1c7b35a36125fa952824898aa11b7ba21c55b794..8f0cb85b95d2074c1ff2c362e69ce3de1eb7e994 100644 (file)
@@ -27,6 +27,8 @@
 #include <lttng-ust-comm.h>
 #include <ust/usterr-signal-safe.h>
 #include <pthread.h>
+#include <semaphore.h>
+#include <time.h>
 #include <assert.h>
 
 /*
@@ -38,25 +40,43 @@ static pthread_mutex_t lttng_ust_comm_mutex = PTHREAD_MUTEX_INITIALIZER;
 /* Should the ust comm thread quit ? */
 static int lttng_ust_comm_should_quit;
 
+/*
+ * Wait for either of these before continuing to the main
+ * program:
+ * - the register_done message from sessiond daemon
+ *   (will let the sessiond daemon enable sessions before main
+ *   starts.)
+ * - sessiond daemon is not reachable.
+ * - timeout (ensuring applications are resilient to session
+ *   daemon problems).
+ */
+static sem_t constructor_wait;
+
 /*
  * Info about socket and associated listener thread.
  */
 struct sock_info {
+       const char *name;
        char sock_path[PATH_MAX];
        int socket;
        pthread_t ust_listener; /* listener thread */
+       int root_handle;
 };
 
 /* Socket from app (connect) to session daemon (listen) for communication */
 struct sock_info global_apps = {
+       .name = "global",
        .sock_path = DEFAULT_GLOBAL_APPS_UNIX_SOCK,
        .socket = -1,
+       .root_handle = -1,
 };
 
 /* TODO: allow global_apps_sock_path override */
 
 struct sock_info local_apps = {
+       .name = "local",
        .socket = -1,
+       .root_handle = -1,
 };
 
 static
@@ -99,9 +119,9 @@ int send_reply(int sock, struct lttcomm_ust_reply *lur)
 {
        ssize_t len;
 
-       len = lttcomm_send_unix_sock(sock, &lur, sizeof(lur));
+       len = lttcomm_send_unix_sock(sock, lur, sizeof(*lur));
        switch (len) {
-       case sizeof(lur):
+       case sizeof(*lur):
                DBG("message successfully sent");
                return 0;
        case -1:
@@ -117,62 +137,96 @@ int send_reply(int sock, struct lttcomm_ust_reply *lur)
 }
 
 static
-int handle_message(int sock, struct lttcomm_ust_msg *lum)
+int handle_register_done(void)
+{
+       int ret;
+
+       ret = sem_post(&constructor_wait);
+       assert(!ret);
+       return 0;
+}
+
+static
+int handle_message(struct sock_info *sock_info,
+               int sock, struct lttcomm_ust_msg *lum)
 {
        int ret = 0;
+       const struct objd_ops *ops;
+       struct lttcomm_ust_reply lur;
 
        pthread_mutex_lock(&lttng_ust_comm_mutex);
 
+       memset(&lur, 0, sizeof(lur));
+
        if (lttng_ust_comm_should_quit) {
-               ret = 0;
+               ret = -EPERM;
                goto end;
        }
 
-       switch (lum->cmd_type) {
-       case UST_CREATE_SESSION:
-       {
-               struct lttcomm_ust_reply lur;
-
-               DBG("Handling create session message");
-               memset(&lur, 0, sizeof(lur));
-               lur.cmd_type = UST_CREATE_SESSION;
-               ret = lttng_abi_create_session();
-               if (ret >= 0) {
-                       lur.ret_val = ret;
-                       lur.ret_code = LTTCOMM_OK;
-               } else {
-                       lur.ret_code = LTTCOMM_SESSION_FAIL;
-               }
-               ret = send_reply(sock, &lur);
-               break;
+       ops = objd_ops(lum->handle);
+       if (!ops) {
+               ret = -ENOENT;
+               goto end;
        }
-       case UST_RELEASE:
-       {
-               struct lttcomm_ust_reply lur;
-
-               DBG("Handling release message, handle: %d",
-                       lum->handle);
-               memset(&lur, 0, sizeof(lur));
-               lur.cmd_type = UST_RELEASE;
-               ret = objd_unref(lum->handle);
-               if (!ret) {
-                       lur.ret_code = LTTCOMM_OK;
-               } else {
-                       lur.ret_code = LTTCOMM_ERR;
-               }
-               ret = send_reply(sock, &lur);
+
+       switch (lum->cmd) {
+       case LTTNG_UST_REGISTER_DONE:
+               if (lum->handle == LTTNG_UST_ROOT_HANDLE)
+                       ret = handle_register_done();
+               else
+                       ret = -EINVAL;
+               break;
+       case LTTNG_UST_RELEASE:
+               if (lum->handle == LTTNG_UST_ROOT_HANDLE)
+                       ret = -EPERM;
+               else
+                       ret = objd_unref(lum->handle);
                break;
-       }
        default:
-               ERR("Unimplemented command %d", (int) lum->cmd_type);
-               ret = -1;
-               goto end;
+               if (ops->cmd)
+                       ret = ops->cmd(lum->handle, lum->cmd,
+                                       (unsigned long) &lum->u);
+               else
+                       ret = -ENOSYS;
+               break;
        }
+
 end:
+       lur.handle = lum->handle;
+       lur.cmd = lum->cmd;
+       lur.ret_val = ret;
+       if (ret >= 0) {
+               lur.ret_code = LTTCOMM_OK;
+       } else {
+               lur.ret_code = LTTCOMM_SESSION_FAIL;
+       }
+       ret = send_reply(sock, &lur);
+
        pthread_mutex_unlock(&lttng_ust_comm_mutex);
        return ret;
 }
 
+static
+void cleanup_sock_info(struct sock_info *sock_info)
+{
+       int ret;
+
+       if (sock_info->socket != -1) {
+               ret = close(sock_info->socket);
+               if (ret) {
+                       ERR("Error closing local apps socket");
+               }
+               sock_info->socket = -1;
+       }
+       if (sock_info->root_handle != -1) {
+               ret = objd_unref(sock_info->root_handle);
+               if (ret) {
+                       ERR("Error unref root handle");
+               }
+               sock_info->root_handle = -1;
+       }
+}
+
 /*
  * This thread does not allocate any resource, except within
  * handle_message, within mutex protection. This mutex protects against
@@ -198,54 +252,83 @@ restart:
        if (sock_info->socket != -1) {
                ret = close(sock_info->socket);
                if (ret) {
-                       ERR("Error closing local apps socket");
+                       ERR("Error closing %s apps socket", sock_info->name);
                }
                sock_info->socket = -1;
        }
+
        /* Check for sessiond availability with pipe TODO */
 
        /* Register */
        ret = lttcomm_connect_unix_sock(sock_info->sock_path);
        if (ret < 0) {
-               ERR("Error connecting to global apps socket");
+               ERR("Error connecting to %s apps socket", sock_info->name);
+               /*
+                * If we cannot find the sessiond daemon, don't delay
+                * constructor execution.
+                */
+               ret = handle_register_done();
+               assert(!ret);
                pthread_mutex_unlock(&lttng_ust_comm_mutex);
+               sleep(5);
                goto restart;
-       } else {
-               sock_info->socket = sock = ret;
-               pthread_mutex_unlock(&lttng_ust_comm_mutex);
+       }
+
+       sock_info->socket = sock = ret;
+
+       /*
+        * Create only one root handle per listener thread for the whole
+        * process lifetime.
+        */
+       if (sock_info->root_handle == -1) {
+               ret = lttng_abi_create_root_handle();
+               if (ret) {
+                       ERR("Error creating root handle");
+                       pthread_mutex_unlock(&lttng_ust_comm_mutex);
+                       goto quit;
+               }
+               sock_info->root_handle = ret;
        }
 
        ret = register_app_to_sessiond(sock);
        if (ret < 0) {
-               ERR("Error registering app to local apps socket");
+               ERR("Error registering to %s apps socket", sock_info->name);
+               /*
+                * If we cannot register to the sessiond daemon, don't
+                * delay constructor execution.
+                */
+               ret = handle_register_done();
+               assert(!ret);
+               pthread_mutex_unlock(&lttng_ust_comm_mutex);
                sleep(5);
                goto restart;
        }
+       pthread_mutex_unlock(&lttng_ust_comm_mutex);
+
        for (;;) {
                ssize_t len;
                struct lttcomm_ust_msg lum;
 
-               /* Receive session handle */
                len = lttcomm_recv_unix_sock(sock, &lum, sizeof(lum));
                switch (len) {
                case 0: /* orderly shutdown */
-                       DBG("ltt-sessiond has performed an orderly shutdown\n");
+                       DBG("%s ltt-sessiond has performed an orderly shutdown\n", sock_info->name);
                        goto end;
                case sizeof(lum):
                        DBG("message received\n");
-                       ret = handle_message(sock, &lum);
+                       ret = handle_message(sock_info, sock, &lum);
                        if (ret < 0) {
-                               ERR("Error handling message\n");
+                               ERR("Error handling message for %s socket", sock_info->name);
                        }
                        continue;
                case -1:
                        if (errno == ECONNRESET) {
-                               ERR("remote end closed connection\n");
+                               ERR("%s remote end closed connection\n", sock_info->name);
                                goto end;
                        }
                        goto end;
                default:
-                       ERR("incorrect message size: %zd\n", len);
+                       ERR("incorrect message size (%s socket): %zd\n", sock_info->name, len);
                        continue;
                }
 
@@ -256,6 +339,45 @@ quit:
        return NULL;
 }
 
+/*
+ * Return values: -1: don't wait. 0: wait forever. 1: timeout wait.
+ */
+static
+int get_timeout(struct timespec *constructor_timeout)
+{
+       long constructor_delay_ms = LTTNG_UST_DEFAULT_CONSTRUCTOR_TIMEOUT_MS;
+       char *str_delay;
+       int ret;
+
+       str_delay = getenv("UST_REGISTER_TIMEOUT");
+       if (str_delay) {
+               constructor_delay_ms = strtol(str_delay, NULL, 10);
+       }
+
+       switch (constructor_delay_ms) {
+       case -1:/* fall-through */
+       case 0:
+               return constructor_delay_ms;
+       default:
+               break;
+       }
+
+       /*
+        * If we are unable to find the current time, don't wait.
+        */
+       ret = clock_gettime(CLOCK_REALTIME, constructor_timeout);
+       if (ret) {
+               return -1;
+       }
+
+       constructor_timeout->tv_nsec =
+               constructor_timeout->tv_nsec + (constructor_delay_ms * 1000000UL);
+       if (constructor_timeout->tv_nsec >= 1000000000UL) {
+               constructor_timeout->tv_sec++;
+               constructor_timeout->tv_nsec -= 1000000000UL;
+       }
+       return 1;
+}
 
 /*
  * sessiond monitoring thread: monitor presence of global and per-user
@@ -265,20 +387,51 @@ quit:
 
 void __attribute__((constructor)) lttng_ust_comm_init(void)
 {
+       struct timespec constructor_timeout;
+       int timeout_mode;
        int ret;
 
        init_usterr();
 
+       timeout_mode = get_timeout(&constructor_timeout);
+
+       ret = sem_init(&constructor_wait, 0, 2);
+       assert(!ret);
+
        ret = setup_local_apps_socket();
        if (ret) {
                ERR("Error setting up to local apps socket");
        }
-#if 0
+
+       /*
+        * Wait for the pthread cond to let us continue to main program
+        * execution. Hold mutex across thread creation, so we start
+        * waiting for the condition before the threads can signal its
+        * completion.
+        */
+       pthread_mutex_lock(&lttng_ust_comm_mutex);
        ret = pthread_create(&global_apps.ust_listener, NULL,
                        ust_listener_thread, &global_apps);
-#endif //0
        ret = pthread_create(&local_apps.ust_listener, NULL,
                        ust_listener_thread, &local_apps);
+
+       switch (timeout_mode) {
+       case 1: /* timeout wait */
+               ret = sem_timedwait(&constructor_wait, &constructor_timeout);
+               if (ret < 0 && errno == ETIMEDOUT) {
+                       ERR("Timed out waiting for ltt-sessiond");
+               } else {
+                       assert(!ret);
+               }
+               break;
+       case 0: /* wait forever */
+               ret = sem_wait(&constructor_wait);
+               assert(!ret);
+               break;
+       case -1:/* no timeout */
+               break;
+       }
+       pthread_mutex_unlock(&lttng_ust_comm_mutex);
 }
 
 void __attribute__((destructor)) lttng_ust_comm_exit(void)
@@ -306,20 +459,15 @@ void __attribute__((destructor)) lttng_ust_comm_exit(void)
                ERR("Error cancelling global ust listener thread");
        }
 #endif //0
-       if (global_apps.socket != -1) {
-               ret = close(global_apps.socket);
-               assert(!ret);
-       }
+
+       cleanup_sock_info(&global_apps);
 
        ret = pthread_cancel(local_apps.ust_listener);
        if (ret) {
                ERR("Error cancelling local ust listener thread");
        }
 
-       if (local_apps.socket != -1) {
-               ret = close(local_apps.socket);
-               assert(!ret);
-       }
+       cleanup_sock_info(&local_apps);
 
        lttng_ust_abi_exit();
        ltt_events_exit();
This page took 0.029612 seconds and 4 git commands to generate.