Fix: sessiond wait futex: handle spurious futex wakeups
[lttng-ust.git] / src / lib / lttng-ust / lttng-ust-comm.c
index d67550797c5443aef3d6342db83c612450458cdb..7f34efe7a2f95ec9e934c0005d163bd71cd0ffcd 100644 (file)
@@ -1,7 +1,7 @@
 /*
  * SPDX-License-Identifier: LGPL-2.1-only
  *
- * Copyright (C) 2011 David Goulet <david.goulet@polymtl.ca>
+ * Copyright (C) 2011 EfficiOS Inc.
  * Copyright (C) 2011 Mathieu Desnoyers <mathieu.desnoyers@efficios.com>
  */
 
@@ -118,6 +118,28 @@ static int lttng_ust_comm_should_quit;
  */
 int lttng_ust_loaded __attribute__((weak));
 
+/*
+ * Notes on async-signal-safety of ust lock: a few libc functions are used
+ * which are not strictly async-signal-safe:
+ *
+ * - pthread_setcancelstate
+ * - pthread_mutex_lock
+ * - pthread_mutex_unlock
+ *
+ * As of glibc 2.35, the implementation of pthread_setcancelstate only
+ * touches TLS data, and it appears to be safe to use from signal
+ * handlers. If the libc implementation changes, this will need to be
+ * revisited, and we may ask glibc to provide an async-signal-safe
+ * pthread_setcancelstate.
+ *
+ * As of glibc 2.35, the implementation of pthread_mutex_lock/unlock
+ * for fast mutexes only relies on the pthread_mutex_t structure.
+ * Disabling signals around all uses of this mutex ensures
+ * signal-safety. If the libc implementation changes and eventually uses
+ * other global resources, this will need to be revisited and we may
+ * need to implement our own mutex.
+ */
+
 /*
  * Return 0 on success, -1 if should quit.
  * The lock is taken in both cases.
@@ -134,13 +156,13 @@ int ust_lock(void)
        sigfillset(&sig_all_blocked);
        ret = pthread_sigmask(SIG_SETMASK, &sig_all_blocked, &orig_mask);
        if (ret) {
-               ERR("pthread_sigmask: %s", strerror(ret));
+               ERR("pthread_sigmask: ret=%d", ret);
        }
        if (!URCU_TLS(ust_mutex_nest)++)
                pthread_mutex_lock(&ust_mutex);
        ret = pthread_sigmask(SIG_SETMASK, &orig_mask, NULL);
        if (ret) {
-               ERR("pthread_sigmask: %s", strerror(ret));
+               ERR("pthread_sigmask: ret=%d", ret);
        }
        if (lttng_ust_comm_should_quit) {
                return -1;
@@ -166,13 +188,13 @@ void ust_lock_nocheck(void)
        sigfillset(&sig_all_blocked);
        ret = pthread_sigmask(SIG_SETMASK, &sig_all_blocked, &orig_mask);
        if (ret) {
-               ERR("pthread_sigmask: %s", strerror(ret));
+               ERR("pthread_sigmask: ret=%d", ret);
        }
        if (!URCU_TLS(ust_mutex_nest)++)
                pthread_mutex_lock(&ust_mutex);
        ret = pthread_sigmask(SIG_SETMASK, &orig_mask, NULL);
        if (ret) {
-               ERR("pthread_sigmask: %s", strerror(ret));
+               ERR("pthread_sigmask: ret=%d", ret);
        }
 }
 
@@ -187,13 +209,13 @@ void ust_unlock(void)
        sigfillset(&sig_all_blocked);
        ret = pthread_sigmask(SIG_SETMASK, &sig_all_blocked, &orig_mask);
        if (ret) {
-               ERR("pthread_sigmask: %s", strerror(ret));
+               ERR("pthread_sigmask: ret=%d", ret);
        }
        if (!--URCU_TLS(ust_mutex_nest))
                pthread_mutex_unlock(&ust_mutex);
        ret = pthread_sigmask(SIG_SETMASK, &orig_mask, NULL);
        if (ret) {
-               ERR("pthread_sigmask: %s", strerror(ret));
+               ERR("pthread_sigmask: ret=%d", ret);
        }
        if (lttng_ust_cancelstate_disable_pop()) {
                ERR("lttng_ust_cancelstate_disable_pop");
@@ -1735,18 +1757,25 @@ void wait_for_sessiond(struct sock_info *sock_info)
 
        DBG("Waiting for %s apps sessiond", sock_info->name);
        /* Wait for futex wakeup */
-       if (uatomic_read((int32_t *) sock_info->wait_shm_mmap))
-               goto end_wait;
-
-       while (lttng_ust_futex_async((int32_t *) sock_info->wait_shm_mmap,
-                       FUTEX_WAIT, 0, NULL, NULL, 0)) {
+       while (!uatomic_read((int32_t *) sock_info->wait_shm_mmap)) {
+               if (!lttng_ust_futex_async((int32_t *) sock_info->wait_shm_mmap, FUTEX_WAIT, 0, NULL, NULL, 0)) {
+                       /*
+                        * Prior queued wakeups queued by unrelated code
+                        * using the same address can cause futex wait to
+                        * return 0 even through the futex value is still
+                        * 0 (spurious wakeups). Check the value again
+                        * in user-space to validate whether it really
+                        * differs from 0.
+                        */
+                       continue;
+               }
                switch (errno) {
-               case EWOULDBLOCK:
+               case EAGAIN:
                        /* Value already changed. */
                        goto end_wait;
                case EINTR:
                        /* Retry if interrupted by signal. */
-                       break;  /* Get out of switch. */
+                       break;  /* Get out of switch. Check again. */
                case EFAULT:
                        wait_poll_fallback = 1;
                        DBG(
This page took 0.026315 seconds and 4 git commands to generate.