fix: handle EINTR correctly in get_cpu_mask_from_sysfs

[urcu.git] / src / urcu-call-rcu-impl.h
diff --git a/src/urcu-call-rcu-impl.h b/src/urcu-call-rcu-impl.h

index bfa53f8e09f920a833559ba7b334a04bf3cfcf47..2ea1efcd19c978f4a144aa4f7300ecdae8e67101 100644 (file)
--- a/src/urcu-call-rcu-impl.h
+++ b/src/urcu-call-rcu-impl.h
@@ -1,30 +1,15 @@
+// SPDX-FileCopyrightText: 2010 Paul E. McKenney <paulmck@linux.vnet.ibm.com>
+//
+// SPDX-License-Identifier: LGPL-2.1-or-later
+
  /*
- * urcu-call-rcu.c
- *
   * Userspace RCU library - batch memory reclamation with kernel API
- *
- * Copyright (c) 2010 Paul E. McKenney <paulmck@linux.vnet.ibm.com>
- *
- * This library is free software; you can redistribute it and/or
- * modify it under the terms of the GNU Lesser General Public
- * License as published by the Free Software Foundation; either
- * version 2.1 of the License, or (at your option) any later version.
- *
- * This library is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
- * Lesser General Public License for more details.
- *
- * You should have received a copy of the GNU Lesser General Public
- * License along with this library; if not, write to the Free Software
- * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
   */
  
  #define _LGPL_SOURCE
  #include <stdio.h>
  #include <pthread.h>
  #include <signal.h>
-#include <assert.h>
  #include <stdlib.h>
  #include <stdint.h>
  #include <string.h>
@@ -35,14 +20,17 @@
  #include <sched.h>
  
  #include "compat-getcpu.h"
-#include "urcu/wfcqueue.h"
-#include "urcu-call-rcu.h"
-#include "urcu-pointer.h"
-#include "urcu/list.h"
-#include "urcu/futex.h"
-#include "urcu/tls-compat.h"
-#include "urcu/ref.h"
+#include <urcu/assert.h>
+#include <urcu/wfcqueue.h>
+#include <urcu/call-rcu.h>
+#include <urcu/pointer.h>
+#include <urcu/list.h>
+#include <urcu/futex.h>
+#include <urcu/tls-compat.h>
+#include <urcu/ref.h>
  #include "urcu-die.h"
+#include "urcu-utils.h"
+#include "compat-smp.h"
  
  #define SET_AFFINITY_CHECK_PERIOD              (1U << 8)       /* 256 */
  #define SET_AFFINITY_CHECK_PERIOD_MASK         (SET_AFFINITY_CHECK_PERIOD - 1)
@@ -79,6 +67,10 @@ struct call_rcu_completion_work {
         struct call_rcu_completion *completion;
  };
  
+enum crdf_flags {
+       CRDF_FLAG_JOIN_THREAD = (1 << 0),
+};
+
  /*
   * List of all call_rcu_data structures to keep valgrind happy.
   * Protected by call_rcu_mutex.
@@ -99,6 +91,8 @@ static pthread_mutex_t call_rcu_mutex = PTHREAD_MUTEX_INITIALIZER;
  
  static struct call_rcu_data *default_call_rcu_data;
  
+static struct urcu_atfork *registered_rculfhash_atfork;
+
  /*
   * If the sched_getcpu() and sysconf(_SC_NPROCESSORS_CONF) calls are
   * available, then we can have call_rcu threads assigned to individual
@@ -116,11 +110,11 @@ static struct call_rcu_data *default_call_rcu_data;
   */
  
  static struct call_rcu_data **per_cpu_call_rcu_data;
-static long maxcpus;
+static long cpus_array_len;
  
-static void maxcpus_reset(void)
+static void cpus_array_len_reset(void)
  {
-       maxcpus = 0;
+       cpus_array_len = 0;
  }
  
  /* Allocate the array if it has not already been allocated. */
@@ -130,15 +124,15 @@ static void alloc_cpu_call_rcu_data(void)
         struct call_rcu_data **p;
         static int warned = 0;
  
-       if (maxcpus != 0)
+       if (cpus_array_len != 0)
                 return;
-       maxcpus = sysconf(_SC_NPROCESSORS_CONF);
-       if (maxcpus <= 0) {
+       cpus_array_len = get_possible_cpus_array_len();
+       if (cpus_array_len <= 0) {
                 return;
         }
-       p = malloc(maxcpus * sizeof(*per_cpu_call_rcu_data));
+       p = malloc(cpus_array_len * sizeof(*per_cpu_call_rcu_data));
         if (p != NULL) {
-               memset(p, '\0', maxcpus * sizeof(*per_cpu_call_rcu_data));
+               memset(p, '\0', cpus_array_len * sizeof(*per_cpu_call_rcu_data));
                 rcu_set_pointer(&per_cpu_call_rcu_data, p);
         } else {
                 if (!warned) {
@@ -156,9 +150,9 @@ static void alloc_cpu_call_rcu_data(void)
   * constant.
   */
  static struct call_rcu_data **per_cpu_call_rcu_data = NULL;
-static const long maxcpus = -1;
+static const long cpus_array_len = -1;
  
-static void maxcpus_reset(void)
+static void cpus_array_len_reset(void)
  {
  }
  
@@ -195,7 +189,7 @@ static void call_rcu_unlock(pthread_mutex_t *pmp)
   * Losing affinity can be caused by CPU hotunplug/hotplug, or by
   * cpuset(7).
   */
-#if HAVE_SCHED_SETAFFINITY
+#ifdef HAVE_SCHED_SETAFFINITY
  static
  int set_thread_cpu_affinity(struct call_rcu_data *crdp)
  {
@@ -211,11 +205,8 @@ int set_thread_cpu_affinity(struct call_rcu_data *crdp)
  
         CPU_ZERO(&mask);
         CPU_SET(crdp->cpu_affinity, &mask);
-#if SCHED_SETAFFINITY_ARGS == 2
-       ret = sched_setaffinity(0, &mask);
-#else
         ret = sched_setaffinity(0, sizeof(mask), &mask);
-#endif
+
         /*
          * EINVAL is fine: can be caused by hotunplugged CPUs, or by
          * cpuset(7). This is why we should always retry if we detect
@@ -229,7 +220,7 @@ int set_thread_cpu_affinity(struct call_rcu_data *crdp)
  }
  #else
  static
-int set_thread_cpu_affinity(struct call_rcu_data *crdp)
+int set_thread_cpu_affinity(struct call_rcu_data *crdp __attribute__((unused)))
  {
         return 0;
  }
@@ -239,17 +230,25 @@ static void call_rcu_wait(struct call_rcu_data *crdp)
  {
         /* Read call_rcu list before read futex */
         cmm_smp_mb();
-       if (uatomic_read(&crdp->futex) != -1)
-               return;
-       while (futex_async(&crdp->futex, FUTEX_WAIT, -1,
-                       NULL, NULL, 0)) {
+       while (uatomic_read(&crdp->futex) == -1) {
+               if (!futex_async(&crdp->futex, FUTEX_WAIT, -1, NULL, NULL, 0)) {
+                       /*
+                        * Prior queued wakeups queued by unrelated code
+                        * using the same address can cause futex wait to
+                        * return 0 even through the futex value is still
+                        * -1 (spurious wakeups). Check the value again
+                        * in user-space to validate whether it really
+                        * differs from -1.
+                        */
+                       continue;
+               }
                 switch (errno) {
-               case EWOULDBLOCK:
+               case EAGAIN:
                         /* Value already changed. */
                         return;
                 case EINTR:
                         /* Retry if interrupted by signal. */
-                       break;  /* Get out of switch. */
+                       break;  /* Get out of switch. Check again. */
                 default:
                         /* Unexpected error. */
                         urcu_die(errno);
@@ -273,17 +272,25 @@ static void call_rcu_completion_wait(struct call_rcu_completion *completion)
  {
         /* Read completion barrier count before read futex */
         cmm_smp_mb();
-       if (uatomic_read(&completion->futex) != -1)
-               return;
-       while (futex_async(&completion->futex, FUTEX_WAIT, -1,
-                       NULL, NULL, 0)) {
+       while (uatomic_read(&completion->futex) == -1) {
+               if (!futex_async(&completion->futex, FUTEX_WAIT, -1, NULL, NULL, 0)) {
+                       /*
+                        * Prior queued wakeups queued by unrelated code
+                        * using the same address can cause futex wait to
+                        * return 0 even through the futex value is still
+                        * -1 (spurious wakeups). Check the value again
+                        * in user-space to validate whether it really
+                        * differs from -1.
+                        */
+                       continue;
+               }
                 switch (errno) {
-               case EWOULDBLOCK:
+               case EAGAIN:
                         /* Value already changed. */
                         return;
                 case EINTR:
                         /* Retry if interrupted by signal. */
-                       break;  /* Get out of switch. */
+                       break;  /* Get out of switch. Check again. */
                 default:
                         /* Unexpected error. */
                         urcu_die(errno);
@@ -354,8 +361,8 @@ static void *call_rcu_thread(void *arg)
                 cds_wfcq_init(&cbs_tmp_head, &cbs_tmp_tail);
                 splice_ret = __cds_wfcq_splice_blocking(&cbs_tmp_head,
                         &cbs_tmp_tail, &crdp->cbs_head, &crdp->cbs_tail);
-               assert(splice_ret != CDS_WFCQ_RET_WOULDBLOCK);
-               assert(splice_ret != CDS_WFCQ_RET_DEST_NON_EMPTY);
+               urcu_posix_assert(splice_ret != CDS_WFCQ_RET_WOULDBLOCK);
+               urcu_posix_assert(splice_ret != CDS_WFCQ_RET_DEST_NON_EMPTY);
                 if (splice_ret != CDS_WFCQ_RET_SRC_EMPTY) {
                         synchronize_rcu();
                         cbcount = 0;
@@ -416,6 +423,7 @@ static void call_rcu_data_init(struct call_rcu_data **crdpp,
  {
         struct call_rcu_data *crdp;
         int ret;
+       sigset_t newmask, oldmask;
  
         crdp = malloc(sizeof(*crdp));
         if (crdp == NULL)
@@ -428,11 +436,19 @@ static void call_rcu_data_init(struct call_rcu_data **crdpp,
         cds_list_add(&crdp->list, &call_rcu_data_list);
         crdp->cpu_affinity = cpu_affinity;
         crdp->gp_count = 0;
-       cmm_smp_mb();  /* Structure initialized before pointer is planted. */
-       *crdpp = crdp;
+       rcu_set_pointer(crdpp, crdp);
+
+       ret = sigfillset(&newmask);
+       urcu_posix_assert(!ret);
+       ret = pthread_sigmask(SIG_BLOCK, &newmask, &oldmask);
+       urcu_posix_assert(!ret);
+
         ret = pthread_create(&crdp->tid, NULL, call_rcu_thread, crdp);
         if (ret)
                 urcu_die(ret);
+
+       ret = pthread_sigmask(SIG_SETMASK, &oldmask, NULL);
+       urcu_posix_assert(!ret);
  }
  
  /*
@@ -453,11 +469,11 @@ struct call_rcu_data *get_cpu_call_rcu_data(int cpu)
         pcpu_crdp = rcu_dereference(per_cpu_call_rcu_data);
         if (pcpu_crdp == NULL)
                 return NULL;
-       if (!warned && maxcpus > 0 && (cpu < 0 || maxcpus <= cpu)) {
+       if (!warned && cpus_array_len > 0 && (cpu < 0 || cpus_array_len <= cpu)) {
                 fprintf(stderr, "[error] liburcu: get CPU # out of range\n");
                 warned = 1;
         }
-       if (cpu < 0 || maxcpus <= cpu)
+       if (cpu < 0 || cpus_array_len <= cpu)
                 return NULL;
         return rcu_dereference(pcpu_crdp[cpu]);
  }
@@ -515,7 +531,7 @@ int set_cpu_call_rcu_data(int cpu, struct call_rcu_data *crdp)
  
         call_rcu_lock(&call_rcu_mutex);
         alloc_cpu_call_rcu_data();
-       if (cpu < 0 || maxcpus <= cpu) {
+       if (cpu < 0 || cpus_array_len <= cpu) {
                 if (!warned) {
                         fprintf(stderr, "[error] liburcu: set CPU # out of range\n");
                         warned = 1;
@@ -544,22 +560,27 @@ int set_cpu_call_rcu_data(int cpu, struct call_rcu_data *crdp)
  
  /*
   * Return a pointer to the default call_rcu_data structure, creating
- * one if need be.  Because we never free call_rcu_data structures,
- * we don't need to be in an RCU read-side critical section.
+ * one if need be.
+ *
+ * The call to this function with intent to use the returned
+ * call_rcu_data should be protected by RCU read-side lock.
   */
  
  struct call_rcu_data *get_default_call_rcu_data(void)
  {
-       if (default_call_rcu_data != NULL)
-               return rcu_dereference(default_call_rcu_data);
+       struct call_rcu_data *crdp;
+
+       crdp = rcu_dereference(default_call_rcu_data);
+       if (crdp != NULL)
+               return crdp;
+
         call_rcu_lock(&call_rcu_mutex);
-       if (default_call_rcu_data != NULL) {
-               call_rcu_unlock(&call_rcu_mutex);
-               return default_call_rcu_data;
-       }
-       call_rcu_data_init(&default_call_rcu_data, 0, -1);
+       if (default_call_rcu_data == NULL)
+               call_rcu_data_init(&default_call_rcu_data, 0, -1);
+       crdp = default_call_rcu_data;
         call_rcu_unlock(&call_rcu_mutex);
-       return default_call_rcu_data;
+
+       return crdp;
  }
  
  /*
@@ -580,7 +601,7 @@ struct call_rcu_data *get_call_rcu_data(void)
         if (URCU_TLS(thread_call_rcu_data) != NULL)
                 return URCU_TLS(thread_call_rcu_data);
  
-       if (maxcpus > 0) {
+       if (cpus_array_len > 0) {
                 crd = get_cpu_call_rcu_data(urcu_sched_getcpu());
                 if (crd)
                         return crd;
@@ -631,7 +652,7 @@ int create_all_cpu_call_rcu_data(unsigned long flags)
         call_rcu_lock(&call_rcu_mutex);
         alloc_cpu_call_rcu_data();
         call_rcu_unlock(&call_rcu_mutex);
-       if (maxcpus <= 0) {
+       if (cpus_array_len <= 0) {
                 errno = EINVAL;
                 return -EINVAL;
         }
@@ -639,7 +660,7 @@ int create_all_cpu_call_rcu_data(unsigned long flags)
                 errno = ENOMEM;
                 return -ENOMEM;
         }
-       for (i = 0; i < maxcpus; i++) {
+       for (i = 0; i < cpus_array_len; i++) {
                 call_rcu_lock(&call_rcu_mutex);
                 if (get_cpu_call_rcu_data(i)) {
                         call_rcu_unlock(&call_rcu_mutex);
@@ -738,7 +759,8 @@ void call_rcu(struct rcu_head *head,
   * a list corruption bug in the 0.7.x series. The equivalent fix
   * appeared in 0.6.8 for the stable-0.6 branch.
   */
-void call_rcu_data_free(struct call_rcu_data *crdp)
+static
+void _call_rcu_data_free(struct call_rcu_data *crdp, unsigned int flags)
  {
         if (crdp == NULL || crdp == default_call_rcu_data) {
                 return;
@@ -749,9 +771,13 @@ void call_rcu_data_free(struct call_rcu_data *crdp)
                 while ((uatomic_read(&crdp->flags) & URCU_CALL_RCU_STOPPED) == 0)
                         (void) poll(NULL, 0, 1);
         }
+       call_rcu_lock(&call_rcu_mutex);
         if (!cds_wfcq_empty(&crdp->cbs_head, &crdp->cbs_tail)) {
-               /* Create default call rcu data if need be */
+               call_rcu_unlock(&call_rcu_mutex);
+               /* Create default call rcu data if need be. */
+               /* CBs queued here will be handed to the default list. */
                 (void) get_default_call_rcu_data();
+               call_rcu_lock(&call_rcu_mutex);
                 __cds_wfcq_splice_blocking(&default_call_rcu_data->cbs_head,
                         &default_call_rcu_data->cbs_tail,
                         &crdp->cbs_head, &crdp->cbs_tail);
@@ -760,13 +786,24 @@ void call_rcu_data_free(struct call_rcu_data *crdp)
                 wake_call_rcu_thread(default_call_rcu_data);
         }
  
-       call_rcu_lock(&call_rcu_mutex);
         cds_list_del(&crdp->list);
         call_rcu_unlock(&call_rcu_mutex);
  
+       if (flags & CRDF_FLAG_JOIN_THREAD) {
+               int ret;
+
+               ret = pthread_join(get_call_rcu_thread(crdp), NULL);
+               if (ret)
+                       urcu_die(ret);
+       }
         free(crdp);
  }
  
+void call_rcu_data_free(struct call_rcu_data *crdp)
+{
+       _call_rcu_data_free(crdp, CRDF_FLAG_JOIN_THREAD);
+}
+
  /*
   * Clean up all the per-CPU call_rcu threads.
   */
@@ -776,10 +813,10 @@ void free_all_cpu_call_rcu_data(void)
         struct call_rcu_data **crdp;
         static int warned = 0;
  
-       if (maxcpus <= 0)
+       if (cpus_array_len <= 0)
                 return;
  
-       crdp = malloc(sizeof(*crdp) * maxcpus);
+       crdp = malloc(sizeof(*crdp) * cpus_array_len);
         if (!crdp) {
                 if (!warned) {
                         fprintf(stderr, "[error] liburcu: unable to allocate per-CPU pointer array\n");
@@ -788,7 +825,7 @@ void free_all_cpu_call_rcu_data(void)
                 return;
         }
  
-       for (cpu = 0; cpu < maxcpus; cpu++) {
+       for (cpu = 0; cpu < cpus_array_len; cpu++) {
                 crdp[cpu] = get_cpu_call_rcu_data(cpu);
                 if (crdp[cpu] == NULL)
                         continue;
@@ -799,7 +836,7 @@ void free_all_cpu_call_rcu_data(void)
          * call_rcu_data to become quiescent.
          */
         synchronize_rcu();
-       for (cpu = 0; cpu < maxcpus; cpu++) {
+       for (cpu = 0; cpu < cpus_array_len; cpu++) {
                 if (crdp[cpu] == NULL)
                         continue;
                 call_rcu_data_free(crdp[cpu]);
@@ -858,7 +895,7 @@ void rcu_barrier(void)
                 goto online;
         }
  
-       completion = calloc(sizeof(*completion), 1);
+       completion = calloc(1, sizeof(*completion));
         if (!completion)
                 urcu_die(errno);
  
@@ -873,7 +910,7 @@ void rcu_barrier(void)
         cds_list_for_each_entry(crdp, &call_rcu_data_list, list) {
                 struct call_rcu_completion_work *work;
  
-               work = calloc(sizeof(*work), 1);
+               work = calloc(1, sizeof(*work));
                 if (!work)
                         urcu_die(errno);
                 work->completion = completion;
@@ -907,9 +944,14 @@ online:
  void call_rcu_before_fork(void)
  {
         struct call_rcu_data *crdp;
+       struct urcu_atfork *atfork;
  
         call_rcu_lock(&call_rcu_mutex);
  
+       atfork = registered_rculfhash_atfork;
+       if (atfork)
+               atfork->before_fork(atfork->priv);
+
         cds_list_for_each_entry(crdp, &call_rcu_data_list, list) {
                 uatomic_or(&crdp->flags, URCU_CALL_RCU_PAUSE);
                 cmm_smp_mb__after_uatomic_or();
@@ -929,6 +971,7 @@ void call_rcu_before_fork(void)
  void call_rcu_after_fork_parent(void)
  {
         struct call_rcu_data *crdp;
+       struct urcu_atfork *atfork;
  
         cds_list_for_each_entry(crdp, &call_rcu_data_list, list)
                 uatomic_and(&crdp->flags, ~URCU_CALL_RCU_PAUSE);
@@ -936,6 +979,9 @@ void call_rcu_after_fork_parent(void)
                 while ((uatomic_read(&crdp->flags) & URCU_CALL_RCU_PAUSED) != 0)
                         (void) poll(NULL, 0, 1);
         }
+       atfork = registered_rculfhash_atfork;
+       if (atfork)
+               atfork->after_fork_parent(atfork->priv);
         call_rcu_unlock(&call_rcu_mutex);
  }
  
@@ -947,10 +993,15 @@ void call_rcu_after_fork_parent(void)
  void call_rcu_after_fork_child(void)
  {
         struct call_rcu_data *crdp, *next;
+       struct urcu_atfork *atfork;
  
         /* Release the mutex. */
         call_rcu_unlock(&call_rcu_mutex);
  
+       atfork = registered_rculfhash_atfork;
+       if (atfork)
+               atfork->after_fork_child(atfork->priv);
+
         /* Do nothing when call_rcu() has not been used */
         if (cds_list_empty(&call_rcu_data_list))
                 return;
@@ -963,7 +1014,7 @@ void call_rcu_after_fork_child(void)
         (void)get_default_call_rcu_data();
  
         /* Cleanup call_rcu_data pointers before use */
-       maxcpus_reset();
+       cpus_array_len_reset();
         free(per_cpu_call_rcu_data);
         rcu_set_pointer(&per_cpu_call_rcu_data, NULL);
         URCU_TLS(thread_call_rcu_data) = NULL;
@@ -977,6 +1028,85 @@ void call_rcu_after_fork_child(void)
                 if (crdp == default_call_rcu_data)
                         continue;
                 uatomic_set(&crdp->flags, URCU_CALL_RCU_STOPPED);
+               /*
+                * Do not join the thread because it does not exist in
+                * the child.
+                */
+               _call_rcu_data_free(crdp, 0);
+       }
+}
+
+void urcu_register_rculfhash_atfork(struct urcu_atfork *atfork)
+{
+       if (CMM_LOAD_SHARED(registered_rculfhash_atfork))
+               return;
+       call_rcu_lock(&call_rcu_mutex);
+       if (!registered_rculfhash_atfork)
+               registered_rculfhash_atfork = atfork;
+       call_rcu_unlock(&call_rcu_mutex);
+}
+
+/*
+ * This unregistration function is deprecated, meant only for internal
+ * use by rculfhash.
+ */
+__attribute__((__noreturn__))
+void urcu_unregister_rculfhash_atfork(struct urcu_atfork *atfork __attribute__((unused)))
+{
+       urcu_die(EPERM);
+}
+
+/*
+ * Teardown the default call_rcu worker thread if there are no queued
+ * callbacks on process exit. This prevents leaking memory.
+ *
+ * Here is how an application can ensure graceful teardown of this
+ * worker thread:
+ *
+ * - An application queuing call_rcu callbacks should invoke
+ *   rcu_barrier() before it exits.
+ * - When chaining call_rcu callbacks, the number of calls to
+ *   rcu_barrier() on application exit must match at least the maximum
+ *   number of chained callbacks.
+ * - If an application chains callbacks endlessly, it would have to be
+ *   modified to stop chaining callbacks when it detects an application
+ *   exit (e.g. with a flag), and wait for quiescence with rcu_barrier()
+ *   after setting that flag.
+ * - The statements above apply to a library which queues call_rcu
+ *   callbacks, only it needs to invoke rcu_barrier in its library
+ *   destructor.
+ *
+ * Note that this function does not presume it is being called when the
+ * application is single-threaded even though this is invoked from a
+ * destructor: this function synchronizes against concurrent calls to
+ * get_default_call_rcu_data().
+ */
+static void urcu_call_rcu_exit(void)
+{
+       struct call_rcu_data *crdp;
+       bool teardown = true;
+
+       if (default_call_rcu_data == NULL)
+               return;
+       call_rcu_lock(&call_rcu_mutex);
+       /*
+        * If the application leaves callbacks in the default call_rcu
+        * worker queue, keep the default worker in place.
+        */
+       crdp = default_call_rcu_data;
+       if (!crdp) {
+               teardown = false;
+               goto unlock;
+       }
+       if (!cds_wfcq_empty(&crdp->cbs_head, &crdp->cbs_tail)) {
+               teardown = false;
+               goto unlock;
+       }
+       rcu_set_pointer(&default_call_rcu_data, NULL);
+unlock:
+       call_rcu_unlock(&call_rcu_mutex);
+       if (teardown) {
+               synchronize_rcu();
                 call_rcu_data_free(crdp);
         }
  }