src/urcu-call-rcu-impl.h

   1 /*
   2  * urcu-call-rcu.c
   3  *
   4  * Userspace RCU library - batch memory reclamation with kernel API
   5  *
   6  * Copyright (c) 2010 Paul E. McKenney <paulmck@linux.vnet.ibm.com>
   7  *
   8  * This library is free software; you can redistribute it and/or
   9  * modify it under the terms of the GNU Lesser General Public
  10  * License as published by the Free Software Foundation; either
  11  * version 2.1 of the License, or (at your option) any later version.
  12  *
  13  * This library is distributed in the hope that it will be useful,
  14  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  15  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  16  * Lesser General Public License for more details.
  17  *
  18  * You should have received a copy of the GNU Lesser General Public
  19  * License along with this library; if not, write to the Free Software
  20  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  21  */
  22
  23 #define _LGPL_SOURCE
  24 #include <stdio.h>
  25 #include <pthread.h>
  26 #include <signal.h>
  27 #include <assert.h>
  28 #include <stdlib.h>
  29 #include <stdint.h>
  30 #include <string.h>
  31 #include <errno.h>
  32 #include <poll.h>
  33 #include <sys/time.h>
  34 #include <unistd.h>
  35 #include <sched.h>
  36
  37 #include "compat-getcpu.h"
  38 #include <urcu/wfcqueue.h>
  39 #include <urcu/call-rcu.h>
  40 #include <urcu/pointer.h>
  41 #include <urcu/list.h>
  42 #include <urcu/futex.h>
  43 #include <urcu/tls-compat.h>
  44 #include <urcu/ref.h>
  45 #include "urcu-die.h"
  46 #include "urcu-utils.h"
  47
  48 #define SET_AFFINITY_CHECK_PERIOD               (1U << 8)       /* 256 */
  49 #define SET_AFFINITY_CHECK_PERIOD_MASK          (SET_AFFINITY_CHECK_PERIOD - 1)
  50
  51 /* Data structure that identifies a call_rcu thread. */
  52
  53 struct call_rcu_data {
  54         /*
  55          * We do not align head on a different cache-line than tail
  56          * mainly because call_rcu callback-invocation threads use
  57          * batching ("splice") to get an entire list of callbacks, which
  58          * effectively empties the queue, and requires to touch the tail
  59          * anyway.
  60          */
  61         struct cds_wfcq_tail cbs_tail;
  62         struct cds_wfcq_head cbs_head;
  63         unsigned long flags;
  64         int32_t futex;
  65         unsigned long qlen; /* maintained for debugging. */
  66         pthread_t tid;
  67         int cpu_affinity;
  68         unsigned long gp_count;
  69         struct cds_list_head list;
  70 } __attribute__((aligned(CAA_CACHE_LINE_SIZE)));
  71
  72 struct call_rcu_completion {
  73         int barrier_count;
  74         int32_t futex;
  75         struct urcu_ref ref;
  76 };
  77
  78 struct call_rcu_completion_work {
  79         struct rcu_head head;
  80         struct call_rcu_completion *completion;
  81 };
  82
  83 /*
  84  * List of all call_rcu_data structures to keep valgrind happy.
  85  * Protected by call_rcu_mutex.
  86  */
  87
  88 static CDS_LIST_HEAD(call_rcu_data_list);
  89
  90 /* Link a thread using call_rcu() to its call_rcu thread. */
  91
  92 static DEFINE_URCU_TLS(struct call_rcu_data *, thread_call_rcu_data);
  93
  94 /*
  95  * Guard call_rcu thread creation and atfork handlers.
  96  */
  97 static pthread_mutex_t call_rcu_mutex = PTHREAD_MUTEX_INITIALIZER;
  98
  99 /* If a given thread does not have its own call_rcu thread, this is default. */
 100
 101 static struct call_rcu_data *default_call_rcu_data;
 102
 103 static struct urcu_atfork *registered_rculfhash_atfork;
 104 static unsigned long registered_rculfhash_atfork_refcount;
 105
 106 /*
 107  * If the sched_getcpu() and sysconf(_SC_NPROCESSORS_CONF) calls are
 108  * available, then we can have call_rcu threads assigned to individual
 109  * CPUs rather than only to specific threads.
 110  */
 111
 112 #if defined(HAVE_SYSCONF) && (defined(HAVE_SCHED_GETCPU) || defined(HAVE_GETCPUID))
 113
 114 /*
 115  * Pointer to array of pointers to per-CPU call_rcu_data structures
 116  * and # CPUs. per_cpu_call_rcu_data is a RCU-protected pointer to an
 117  * array of RCU-protected pointers to call_rcu_data. call_rcu acts as a
 118  * RCU read-side and reads per_cpu_call_rcu_data and the per-cpu pointer
 119  * without mutex. The call_rcu_mutex protects updates.
 120  */
 121
 122 static struct call_rcu_data **per_cpu_call_rcu_data;
 123 static long maxcpus;
 124
 125 static void maxcpus_reset(void)
 126 {
 127         maxcpus = 0;
 128 }
 129
 130 /* Allocate the array if it has not already been allocated. */
 131
 132 static void alloc_cpu_call_rcu_data(void)
 133 {
 134         struct call_rcu_data **p;
 135         static int warned = 0;
 136
 137         if (maxcpus != 0)
 138                 return;
 139         maxcpus = sysconf(_SC_NPROCESSORS_CONF);
 140         if (maxcpus <= 0) {
 141                 return;
 142         }
 143         p = malloc(maxcpus * sizeof(*per_cpu_call_rcu_data));
 144         if (p != NULL) {
 145                 memset(p, '\0', maxcpus * sizeof(*per_cpu_call_rcu_data));
 146                 rcu_set_pointer(&per_cpu_call_rcu_data, p);
 147         } else {
 148                 if (!warned) {
 149                         fprintf(stderr, "[error] liburcu: unable to allocate per-CPU pointer array\n");
 150                 }
 151                 warned = 1;
 152         }
 153 }
 154
 155 #else /* #if defined(HAVE_SYSCONF) && defined(HAVE_SCHED_GETCPU) */
 156
 157 /*
 158  * per_cpu_call_rcu_data should be constant, but some functions below, used both
 159  * for cases where cpu number is available and not available, assume it it not
 160  * constant.
 161  */
 162 static struct call_rcu_data **per_cpu_call_rcu_data = NULL;
 163 static const long maxcpus = -1;
 164
 165 static void maxcpus_reset(void)
 166 {
 167 }
 168
 169 static void alloc_cpu_call_rcu_data(void)
 170 {
 171 }
 172
 173 #endif /* #else #if defined(HAVE_SYSCONF) && defined(HAVE_SCHED_GETCPU) */
 174
 175 /* Acquire the specified pthread mutex. */
 176
 177 static void call_rcu_lock(pthread_mutex_t *pmp)
 178 {
 179         int ret;
 180
 181         ret = pthread_mutex_lock(pmp);
 182         if (ret)
 183                 urcu_die(ret);
 184 }
 185
 186 /* Release the specified pthread mutex. */
 187
 188 static void call_rcu_unlock(pthread_mutex_t *pmp)
 189 {
 190         int ret;
 191
 192         ret = pthread_mutex_unlock(pmp);
 193         if (ret)
 194                 urcu_die(ret);
 195 }
 196
 197 /*
 198  * Periodically retry setting CPU affinity if we migrate.
 199  * Losing affinity can be caused by CPU hotunplug/hotplug, or by
 200  * cpuset(7).
 201  */
 202 #ifdef HAVE_SCHED_SETAFFINITY
 203 static
 204 int set_thread_cpu_affinity(struct call_rcu_data *crdp)
 205 {
 206         cpu_set_t mask;
 207         int ret;
 208
 209         if (crdp->cpu_affinity < 0)
 210                 return 0;
 211         if (++crdp->gp_count & SET_AFFINITY_CHECK_PERIOD_MASK)
 212                 return 0;
 213         if (urcu_sched_getcpu() == crdp->cpu_affinity)
 214                 return 0;
 215
 216         CPU_ZERO(&mask);
 217         CPU_SET(crdp->cpu_affinity, &mask);
 218 #if SCHED_SETAFFINITY_ARGS == 2
 219         ret = sched_setaffinity(0, &mask);
 220 #else
 221         ret = sched_setaffinity(0, sizeof(mask), &mask);
 222 #endif
 223         /*
 224          * EINVAL is fine: can be caused by hotunplugged CPUs, or by
 225          * cpuset(7). This is why we should always retry if we detect
 226          * migration.
 227          */
 228         if (ret && errno == EINVAL) {
 229                 ret = 0;
 230                 errno = 0;
 231         }
 232         return ret;
 233 }
 234 #else
 235 static
 236 int set_thread_cpu_affinity(struct call_rcu_data *crdp __attribute__((unused)))
 237 {
 238         return 0;
 239 }
 240 #endif
 241
 242 static void call_rcu_wait(struct call_rcu_data *crdp)
 243 {
 244         /* Read call_rcu list before read futex */
 245         cmm_smp_mb();
 246         while (uatomic_read(&crdp->futex) == -1) {
 247                 if (!futex_async(&crdp->futex, FUTEX_WAIT, -1, NULL, NULL, 0)) {
 248                         /*
 249                          * Prior queued wakeups queued by unrelated code
 250                          * using the same address can cause futex wait to
 251                          * return 0 even through the futex value is still
 252                          * -1 (spurious wakeups). Check the value again
 253                          * in user-space to validate whether it really
 254                          * differs from -1.
 255                          */
 256                         continue;
 257                 }
 258                 switch (errno) {
 259                 case EAGAIN:
 260                         /* Value already changed. */
 261                         return;
 262                 case EINTR:
 263                         /* Retry if interrupted by signal. */
 264                         break;  /* Get out of switch. Check again. */
 265                 default:
 266                         /* Unexpected error. */
 267                         urcu_die(errno);
 268                 }
 269         }
 270 }
 271
 272 static void call_rcu_wake_up(struct call_rcu_data *crdp)
 273 {
 274         /* Write to call_rcu list before reading/writing futex */
 275         cmm_smp_mb();
 276         if (caa_unlikely(uatomic_read(&crdp->futex) == -1)) {
 277                 uatomic_set(&crdp->futex, 0);
 278                 if (futex_async(&crdp->futex, FUTEX_WAKE, 1,
 279                                 NULL, NULL, 0) < 0)
 280                         urcu_die(errno);
 281         }
 282 }
 283
 284 static void call_rcu_completion_wait(struct call_rcu_completion *completion)
 285 {
 286         /* Read completion barrier count before read futex */
 287         cmm_smp_mb();
 288         while (uatomic_read(&completion->futex) == -1) {
 289                 if (!futex_async(&completion->futex, FUTEX_WAIT, -1, NULL, NULL, 0)) {
 290                         /*
 291                          * Prior queued wakeups queued by unrelated code
 292                          * using the same address can cause futex wait to
 293                          * return 0 even through the futex value is still
 294                          * -1 (spurious wakeups). Check the value again
 295                          * in user-space to validate whether it really
 296                          * differs from -1.
 297                          */
 298                         continue;
 299                 }
 300                 switch (errno) {
 301                 case EAGAIN:
 302                         /* Value already changed. */
 303                         return;
 304                 case EINTR:
 305                         /* Retry if interrupted by signal. */
 306                         break;  /* Get out of switch. Check again. */
 307                 default:
 308                         /* Unexpected error. */
 309                         urcu_die(errno);
 310                 }
 311         }
 312 }
 313
 314 static void call_rcu_completion_wake_up(struct call_rcu_completion *completion)
 315 {
 316         /* Write to completion barrier count before reading/writing futex */
 317         cmm_smp_mb();
 318         if (caa_unlikely(uatomic_read(&completion->futex) == -1)) {
 319                 uatomic_set(&completion->futex, 0);
 320                 if (futex_async(&completion->futex, FUTEX_WAKE, 1,
 321                                 NULL, NULL, 0) < 0)
 322                         urcu_die(errno);
 323         }
 324 }
 325
 326 /* This is the code run by each call_rcu thread. */
 327
 328 static void *call_rcu_thread(void *arg)
 329 {
 330         unsigned long cbcount;
 331         struct call_rcu_data *crdp = (struct call_rcu_data *) arg;
 332         int rt = !!(uatomic_read(&crdp->flags) & URCU_CALL_RCU_RT);
 333
 334         if (set_thread_cpu_affinity(crdp))
 335                 urcu_die(errno);
 336
 337         /*
 338          * If callbacks take a read-side lock, we need to be registered.
 339          */
 340         rcu_register_thread();
 341
 342         URCU_TLS(thread_call_rcu_data) = crdp;
 343         if (!rt) {
 344                 uatomic_dec(&crdp->futex);
 345                 /* Decrement futex before reading call_rcu list */
 346                 cmm_smp_mb();
 347         }
 348         for (;;) {
 349                 struct cds_wfcq_head cbs_tmp_head;
 350                 struct cds_wfcq_tail cbs_tmp_tail;
 351                 struct cds_wfcq_node *cbs, *cbs_tmp_n;
 352                 enum cds_wfcq_ret splice_ret;
 353
 354                 if (set_thread_cpu_affinity(crdp))
 355                         urcu_die(errno);
 356
 357                 if (uatomic_read(&crdp->flags) & URCU_CALL_RCU_PAUSE) {
 358                         /*
 359                          * Pause requested. Become quiescent: remove
 360                          * ourself from all global lists, and don't
 361                          * process any callback. The callback lists may
 362                          * still be non-empty though.
 363                          */
 364                         rcu_unregister_thread();
 365                         cmm_smp_mb__before_uatomic_or();
 366                         uatomic_or(&crdp->flags, URCU_CALL_RCU_PAUSED);
 367                         while ((uatomic_read(&crdp->flags) & URCU_CALL_RCU_PAUSE) != 0)
 368                                 (void) poll(NULL, 0, 1);
 369                         uatomic_and(&crdp->flags, ~URCU_CALL_RCU_PAUSED);
 370                         cmm_smp_mb__after_uatomic_and();
 371                         rcu_register_thread();
 372                 }
 373
 374                 cds_wfcq_init(&cbs_tmp_head, &cbs_tmp_tail);
 375                 splice_ret = __cds_wfcq_splice_blocking(&cbs_tmp_head,
 376                         &cbs_tmp_tail, &crdp->cbs_head, &crdp->cbs_tail);
 377                 assert(splice_ret != CDS_WFCQ_RET_WOULDBLOCK);
 378                 assert(splice_ret != CDS_WFCQ_RET_DEST_NON_EMPTY);
 379                 if (splice_ret != CDS_WFCQ_RET_SRC_EMPTY) {
 380                         synchronize_rcu();
 381                         cbcount = 0;
 382                         __cds_wfcq_for_each_blocking_safe(&cbs_tmp_head,
 383                                         &cbs_tmp_tail, cbs, cbs_tmp_n) {
 384                                 struct rcu_head *rhp;
 385
 386                                 rhp = caa_container_of(cbs,
 387                                         struct rcu_head, next);
 388                                 rhp->func(rhp);
 389                                 cbcount++;
 390                         }
 391                         uatomic_sub(&crdp->qlen, cbcount);
 392                 }
 393                 if (uatomic_read(&crdp->flags) & URCU_CALL_RCU_STOP)
 394                         break;
 395                 rcu_thread_offline();
 396                 if (!rt) {
 397                         if (cds_wfcq_empty(&crdp->cbs_head,
 398                                         &crdp->cbs_tail)) {
 399                                 call_rcu_wait(crdp);
 400                                 (void) poll(NULL, 0, 10);
 401                                 uatomic_dec(&crdp->futex);
 402                                 /*
 403                                  * Decrement futex before reading
 404                                  * call_rcu list.
 405                                  */
 406                                 cmm_smp_mb();
 407                         } else {
 408                                 (void) poll(NULL, 0, 10);
 409                         }
 410                 } else {
 411                         (void) poll(NULL, 0, 10);
 412                 }
 413                 rcu_thread_online();
 414         }
 415         if (!rt) {
 416                 /*
 417                  * Read call_rcu list before write futex.
 418                  */
 419                 cmm_smp_mb();
 420                 uatomic_set(&crdp->futex, 0);
 421         }
 422         uatomic_or(&crdp->flags, URCU_CALL_RCU_STOPPED);
 423         rcu_unregister_thread();
 424         return NULL;
 425 }
 426
 427 /*
 428  * Create both a call_rcu thread and the corresponding call_rcu_data
 429  * structure, linking the structure in as specified.  Caller must hold
 430  * call_rcu_mutex.
 431  */
 432
 433 static void call_rcu_data_init(struct call_rcu_data **crdpp,
 434                                unsigned long flags,
 435                                int cpu_affinity)
 436 {
 437         struct call_rcu_data *crdp;
 438         int ret;
 439
 440         crdp = malloc(sizeof(*crdp));
 441         if (crdp == NULL)
 442                 urcu_die(errno);
 443         memset(crdp, '\0', sizeof(*crdp));
 444         cds_wfcq_init(&crdp->cbs_head, &crdp->cbs_tail);
 445         crdp->qlen = 0;
 446         crdp->futex = 0;
 447         crdp->flags = flags;
 448         cds_list_add(&crdp->list, &call_rcu_data_list);
 449         crdp->cpu_affinity = cpu_affinity;
 450         crdp->gp_count = 0;
 451         cmm_smp_mb();  /* Structure initialized before pointer is planted. */
 452         *crdpp = crdp;
 453         ret = pthread_create(&crdp->tid, NULL, call_rcu_thread, crdp);
 454         if (ret)
 455                 urcu_die(ret);
 456 }
 457
 458 /*
 459  * Return a pointer to the call_rcu_data structure for the specified
 460  * CPU, returning NULL if there is none.  We cannot automatically
 461  * created it because the platform we are running on might not define
 462  * urcu_sched_getcpu().
 463  *
 464  * The call to this function and use of the returned call_rcu_data
 465  * should be protected by RCU read-side lock.
 466  */
 467
 468 struct call_rcu_data *get_cpu_call_rcu_data(int cpu)
 469 {
 470         static int warned = 0;
 471         struct call_rcu_data **pcpu_crdp;
 472
 473         pcpu_crdp = rcu_dereference(per_cpu_call_rcu_data);
 474         if (pcpu_crdp == NULL)
 475                 return NULL;
 476         if (!warned && maxcpus > 0 && (cpu < 0 || maxcpus <= cpu)) {
 477                 fprintf(stderr, "[error] liburcu: get CPU # out of range\n");
 478                 warned = 1;
 479         }
 480         if (cpu < 0 || maxcpus <= cpu)
 481                 return NULL;
 482         return rcu_dereference(pcpu_crdp[cpu]);
 483 }
 484 URCU_ATTR_ALIAS(urcu_stringify(get_cpu_call_rcu_data))
 485 struct call_rcu_data *alias_get_cpu_call_rcu_data();
 486
 487 /*
 488  * Return the tid corresponding to the call_rcu thread whose
 489  * call_rcu_data structure is specified.
 490  */
 491
 492 pthread_t get_call_rcu_thread(struct call_rcu_data *crdp)
 493 {
 494         return crdp->tid;
 495 }
 496 URCU_ATTR_ALIAS(urcu_stringify(get_call_rcu_thread))
 497 pthread_t alias_get_call_rcu_thread();
 498
 499 /*
 500  * Create a call_rcu_data structure (with thread) and return a pointer.
 501  */
 502
 503 static struct call_rcu_data *__create_call_rcu_data(unsigned long flags,
 504                                                     int cpu_affinity)
 505 {
 506         struct call_rcu_data *crdp;
 507
 508         call_rcu_data_init(&crdp, flags, cpu_affinity);
 509         return crdp;
 510 }
 511
 512 URCU_ATTR_ALIAS(urcu_stringify(create_call_rcu_data))
 513 struct call_rcu_data *alias_create_call_rcu_data();
 514 struct call_rcu_data *create_call_rcu_data(unsigned long flags,
 515                                            int cpu_affinity)
 516 {
 517         struct call_rcu_data *crdp;
 518
 519         call_rcu_lock(&call_rcu_mutex);
 520         crdp = __create_call_rcu_data(flags, cpu_affinity);
 521         call_rcu_unlock(&call_rcu_mutex);
 522         return crdp;
 523 }
 524
 525 /*
 526  * Set the specified CPU to use the specified call_rcu_data structure.
 527  *
 528  * Use NULL to remove a CPU's call_rcu_data structure, but it is
 529  * the caller's responsibility to dispose of the removed structure.
 530  * Use get_cpu_call_rcu_data() to obtain a pointer to the old structure
 531  * (prior to NULLing it out, of course).
 532  *
 533  * The caller must wait for a grace-period to pass between return from
 534  * set_cpu_call_rcu_data() and call to call_rcu_data_free() passing the
 535  * previous call rcu data as argument.
 536  */
 537
 538 int set_cpu_call_rcu_data(int cpu, struct call_rcu_data *crdp)
 539 {
 540         static int warned = 0;
 541
 542         call_rcu_lock(&call_rcu_mutex);
 543         alloc_cpu_call_rcu_data();
 544         if (cpu < 0 || maxcpus <= cpu) {
 545                 if (!warned) {
 546                         fprintf(stderr, "[error] liburcu: set CPU # out of range\n");
 547                         warned = 1;
 548                 }
 549                 call_rcu_unlock(&call_rcu_mutex);
 550                 errno = EINVAL;
 551                 return -EINVAL;
 552         }
 553
 554         if (per_cpu_call_rcu_data == NULL) {
 555                 call_rcu_unlock(&call_rcu_mutex);
 556                 errno = ENOMEM;
 557                 return -ENOMEM;
 558         }
 559
 560         if (per_cpu_call_rcu_data[cpu] != NULL && crdp != NULL) {
 561                 call_rcu_unlock(&call_rcu_mutex);
 562                 errno = EEXIST;
 563                 return -EEXIST;
 564         }
 565
 566         rcu_set_pointer(&per_cpu_call_rcu_data[cpu], crdp);
 567         call_rcu_unlock(&call_rcu_mutex);
 568         return 0;
 569 }
 570 URCU_ATTR_ALIAS(urcu_stringify(set_cpu_call_rcu_data))
 571 int alias_set_cpu_call_rcu_data();
 572
 573 /*
 574  * Return a pointer to the default call_rcu_data structure, creating
 575  * one if need be.  Because we never free call_rcu_data structures,
 576  * we don't need to be in an RCU read-side critical section.
 577  */
 578
 579 struct call_rcu_data *get_default_call_rcu_data(void)
 580 {
 581         if (default_call_rcu_data != NULL)
 582                 return rcu_dereference(default_call_rcu_data);
 583         call_rcu_lock(&call_rcu_mutex);
 584         if (default_call_rcu_data != NULL) {
 585                 call_rcu_unlock(&call_rcu_mutex);
 586                 return default_call_rcu_data;
 587         }
 588         call_rcu_data_init(&default_call_rcu_data, 0, -1);
 589         call_rcu_unlock(&call_rcu_mutex);
 590         return default_call_rcu_data;
 591 }
 592 URCU_ATTR_ALIAS(urcu_stringify(get_default_call_rcu_data))
 593 struct call_rcu_data *alias_get_default_call_rcu_data();
 594
 595 /*
 596  * Return the call_rcu_data structure that applies to the currently
 597  * running thread.  Any call_rcu_data structure assigned specifically
 598  * to this thread has first priority, followed by any call_rcu_data
 599  * structure assigned to the CPU on which the thread is running,
 600  * followed by the default call_rcu_data structure.  If there is not
 601  * yet a default call_rcu_data structure, one will be created.
 602  *
 603  * Calls to this function and use of the returned call_rcu_data should
 604  * be protected by RCU read-side lock.
 605  */
 606 struct call_rcu_data *get_call_rcu_data(void)
 607 {
 608         struct call_rcu_data *crd;
 609
 610         if (URCU_TLS(thread_call_rcu_data) != NULL)
 611                 return URCU_TLS(thread_call_rcu_data);
 612
 613         if (maxcpus > 0) {
 614                 crd = get_cpu_call_rcu_data(urcu_sched_getcpu());
 615                 if (crd)
 616                         return crd;
 617         }
 618
 619         return get_default_call_rcu_data();
 620 }
 621 URCU_ATTR_ALIAS(urcu_stringify(get_call_rcu_data))
 622 struct call_rcu_data *alias_get_call_rcu_data();
 623
 624 /*
 625  * Return a pointer to this task's call_rcu_data if there is one.
 626  */
 627
 628 struct call_rcu_data *get_thread_call_rcu_data(void)
 629 {
 630         return URCU_TLS(thread_call_rcu_data);
 631 }
 632 URCU_ATTR_ALIAS(urcu_stringify(get_thread_call_rcu_data))
 633 struct call_rcu_data *alias_get_thread_call_rcu_data();
 634
 635 /*
 636  * Set this task's call_rcu_data structure as specified, regardless
 637  * of whether or not this task already had one.  (This allows switching
 638  * to and from real-time call_rcu threads, for example.)
 639  *
 640  * Use NULL to remove a thread's call_rcu_data structure, but it is
 641  * the caller's responsibility to dispose of the removed structure.
 642  * Use get_thread_call_rcu_data() to obtain a pointer to the old structure
 643  * (prior to NULLing it out, of course).
 644  */
 645
 646 void set_thread_call_rcu_data(struct call_rcu_data *crdp)
 647 {
 648         URCU_TLS(thread_call_rcu_data) = crdp;
 649 }
 650 URCU_ATTR_ALIAS(urcu_stringify(set_thread_call_rcu_data))
 651 void alias_set_thread_call_rcu_data();
 652
 653 /*
 654  * Create a separate call_rcu thread for each CPU.  This does not
 655  * replace a pre-existing call_rcu thread -- use the set_cpu_call_rcu_data()
 656  * function if you want that behavior. Should be paired with
 657  * free_all_cpu_call_rcu_data() to teardown these call_rcu worker
 658  * threads.
 659  */
 660
 661 int create_all_cpu_call_rcu_data(unsigned long flags)
 662 {
 663         int i;
 664         struct call_rcu_data *crdp;
 665         int ret;
 666
 667         call_rcu_lock(&call_rcu_mutex);
 668         alloc_cpu_call_rcu_data();
 669         call_rcu_unlock(&call_rcu_mutex);
 670         if (maxcpus <= 0) {
 671                 errno = EINVAL;
 672                 return -EINVAL;
 673         }
 674         if (per_cpu_call_rcu_data == NULL) {
 675                 errno = ENOMEM;
 676                 return -ENOMEM;
 677         }
 678         for (i = 0; i < maxcpus; i++) {
 679                 call_rcu_lock(&call_rcu_mutex);
 680                 if (get_cpu_call_rcu_data(i)) {
 681                         call_rcu_unlock(&call_rcu_mutex);
 682                         continue;
 683                 }
 684                 crdp = __create_call_rcu_data(flags, i);
 685                 if (crdp == NULL) {
 686                         call_rcu_unlock(&call_rcu_mutex);
 687                         errno = ENOMEM;
 688                         return -ENOMEM;
 689                 }
 690                 call_rcu_unlock(&call_rcu_mutex);
 691                 if ((ret = set_cpu_call_rcu_data(i, crdp)) != 0) {
 692                         call_rcu_data_free(crdp);
 693
 694                         /* it has been created by other thread */
 695                         if (ret == -EEXIST)
 696                                 continue;
 697
 698                         return ret;
 699                 }
 700         }
 701         return 0;
 702 }
 703 URCU_ATTR_ALIAS(urcu_stringify(create_all_cpu_call_rcu_data))
 704 int alias_create_all_cpu_call_rcu_data();
 705
 706 /*
 707  * Wake up the call_rcu thread corresponding to the specified
 708  * call_rcu_data structure.
 709  */
 710 static void wake_call_rcu_thread(struct call_rcu_data *crdp)
 711 {
 712         if (!(_CMM_LOAD_SHARED(crdp->flags) & URCU_CALL_RCU_RT))
 713                 call_rcu_wake_up(crdp);
 714 }
 715
 716 static void _call_rcu(struct rcu_head *head,
 717                       void (*func)(struct rcu_head *head),
 718                       struct call_rcu_data *crdp)
 719 {
 720         cds_wfcq_node_init(&head->next);
 721         head->func = func;
 722         cds_wfcq_enqueue(&crdp->cbs_head, &crdp->cbs_tail, &head->next);
 723         uatomic_inc(&crdp->qlen);
 724         wake_call_rcu_thread(crdp);
 725 }
 726
 727 /*
 728  * Schedule a function to be invoked after a following grace period.
 729  * This is the only function that must be called -- the others are
 730  * only present to allow applications to tune their use of RCU for
 731  * maximum performance.
 732  *
 733  * Note that unless a call_rcu thread has not already been created,
 734  * the first invocation of call_rcu() will create one.  So, if you
 735  * need the first invocation of call_rcu() to be fast, make sure
 736  * to create a call_rcu thread first.  One way to accomplish this is
 737  * "get_call_rcu_data();", and another is create_all_cpu_call_rcu_data().
 738  *
 739  * call_rcu must be called by registered RCU read-side threads.
 740  */
 741 void call_rcu(struct rcu_head *head,
 742               void (*func)(struct rcu_head *head))
 743 {
 744         struct call_rcu_data *crdp;
 745
 746         /* Holding rcu read-side lock across use of per-cpu crdp */
 747         _rcu_read_lock();
 748         crdp = get_call_rcu_data();
 749         _call_rcu(head, func, crdp);
 750         _rcu_read_unlock();
 751 }
 752 URCU_ATTR_ALIAS(urcu_stringify(call_rcu)) void alias_call_rcu();
 753
 754 /*
 755  * Free up the specified call_rcu_data structure, terminating the
 756  * associated call_rcu thread.  The caller must have previously
 757  * removed the call_rcu_data structure from per-thread or per-CPU
 758  * usage.  For example, set_cpu_call_rcu_data(cpu, NULL) for per-CPU
 759  * call_rcu_data structures or set_thread_call_rcu_data(NULL) for
 760  * per-thread call_rcu_data structures.
 761  *
 762  * We silently refuse to free up the default call_rcu_data structure
 763  * because that is where we put any leftover callbacks.  Note that
 764  * the possibility of self-spawning callbacks makes it impossible
 765  * to execute all the callbacks in finite time without putting any
 766  * newly spawned callbacks somewhere else.  The "somewhere else" of
 767  * last resort is the default call_rcu_data structure.
 768  *
 769  * We also silently refuse to free NULL pointers.  This simplifies
 770  * the calling code.
 771  *
 772  * The caller must wait for a grace-period to pass between return from
 773  * set_cpu_call_rcu_data() and call to call_rcu_data_free() passing the
 774  * previous call rcu data as argument.
 775  *
 776  * Note: introducing __cds_wfcq_splice_blocking() in this function fixed
 777  * a list corruption bug in the 0.7.x series. The equivalent fix
 778  * appeared in 0.6.8 for the stable-0.6 branch.
 779  */
 780 void call_rcu_data_free(struct call_rcu_data *crdp)
 781 {
 782         if (crdp == NULL || crdp == default_call_rcu_data) {
 783                 return;
 784         }
 785         if ((uatomic_read(&crdp->flags) & URCU_CALL_RCU_STOPPED) == 0) {
 786                 uatomic_or(&crdp->flags, URCU_CALL_RCU_STOP);
 787                 wake_call_rcu_thread(crdp);
 788                 while ((uatomic_read(&crdp->flags) & URCU_CALL_RCU_STOPPED) == 0)
 789                         (void) poll(NULL, 0, 1);
 790         }
 791         call_rcu_lock(&call_rcu_mutex);
 792         if (!cds_wfcq_empty(&crdp->cbs_head, &crdp->cbs_tail)) {
 793                 call_rcu_unlock(&call_rcu_mutex);
 794                 /* Create default call rcu data if need be. */
 795                 /* CBs queued here will be handed to the default list. */
 796                 (void) get_default_call_rcu_data();
 797                 call_rcu_lock(&call_rcu_mutex);
 798                 __cds_wfcq_splice_blocking(&default_call_rcu_data->cbs_head,
 799                         &default_call_rcu_data->cbs_tail,
 800                         &crdp->cbs_head, &crdp->cbs_tail);
 801                 uatomic_add(&default_call_rcu_data->qlen,
 802                             uatomic_read(&crdp->qlen));
 803                 wake_call_rcu_thread(default_call_rcu_data);
 804         }
 805
 806         cds_list_del(&crdp->list);
 807         call_rcu_unlock(&call_rcu_mutex);
 808
 809         free(crdp);
 810 }
 811 URCU_ATTR_ALIAS(urcu_stringify(call_rcu_data_free))
 812 void alias_call_rcu_data_free();
 813
 814 /*
 815  * Clean up all the per-CPU call_rcu threads.
 816  */
 817 void free_all_cpu_call_rcu_data(void)
 818 {
 819         int cpu;
 820         struct call_rcu_data **crdp;
 821         static int warned = 0;
 822
 823         if (maxcpus <= 0)
 824                 return;
 825
 826         crdp = malloc(sizeof(*crdp) * maxcpus);
 827         if (!crdp) {
 828                 if (!warned) {
 829                         fprintf(stderr, "[error] liburcu: unable to allocate per-CPU pointer array\n");
 830                 }
 831                 warned = 1;
 832                 return;
 833         }
 834
 835         for (cpu = 0; cpu < maxcpus; cpu++) {
 836                 crdp[cpu] = get_cpu_call_rcu_data(cpu);
 837                 if (crdp[cpu] == NULL)
 838                         continue;
 839                 set_cpu_call_rcu_data(cpu, NULL);
 840         }
 841         /*
 842          * Wait for call_rcu sites acting as RCU readers of the
 843          * call_rcu_data to become quiescent.
 844          */
 845         synchronize_rcu();
 846         for (cpu = 0; cpu < maxcpus; cpu++) {
 847                 if (crdp[cpu] == NULL)
 848                         continue;
 849                 call_rcu_data_free(crdp[cpu]);
 850         }
 851         free(crdp);
 852 }
 853 #ifdef RCU_QSBR
 854 /* ABI6 has a non-namespaced free_all_cpu_call_rcu_data for qsbr */
 855 #undef free_all_cpu_call_rcu_data
 856 URCU_ATTR_ALIAS("urcu_qsbr_free_all_cpu_call_rcu_data")
 857 void free_all_cpu_call_rcu_data();
 858 #define free_all_cpu_call_rcu_data urcu_qsbr_free_all_cpu_call_rcu_data
 859 #else
 860 URCU_ATTR_ALIAS(urcu_stringify(free_all_cpu_call_rcu_data))
 861 void alias_free_all_cpu_call_rcu_data();
 862 #endif
 863
 864 static
 865 void free_completion(struct urcu_ref *ref)
 866 {
 867         struct call_rcu_completion *completion;
 868
 869         completion = caa_container_of(ref, struct call_rcu_completion, ref);
 870         free(completion);
 871 }
 872
 873 static
 874 void _rcu_barrier_complete(struct rcu_head *head)
 875 {
 876         struct call_rcu_completion_work *work;
 877         struct call_rcu_completion *completion;
 878
 879         work = caa_container_of(head, struct call_rcu_completion_work, head);
 880         completion = work->completion;
 881         if (!uatomic_sub_return(&completion->barrier_count, 1))
 882                 call_rcu_completion_wake_up(completion);
 883         urcu_ref_put(&completion->ref, free_completion);
 884         free(work);
 885 }
 886
 887 /*
 888  * Wait for all in-flight call_rcu callbacks to complete execution.
 889  */
 890 void rcu_barrier(void)
 891 {
 892         struct call_rcu_data *crdp;
 893         struct call_rcu_completion *completion;
 894         int count = 0;
 895         int was_online;
 896
 897         /* Put in offline state in QSBR. */
 898         was_online = _rcu_read_ongoing();
 899         if (was_online)
 900                 rcu_thread_offline();
 901         /*
 902          * Calling a rcu_barrier() within a RCU read-side critical
 903          * section is an error.
 904          */
 905         if (_rcu_read_ongoing()) {
 906                 static int warned = 0;
 907
 908                 if (!warned) {
 909                         fprintf(stderr, "[error] liburcu: rcu_barrier() called from within RCU read-side critical section.\n");
 910                 }
 911                 warned = 1;
 912                 goto online;
 913         }
 914
 915         completion = calloc(sizeof(*completion), 1);
 916         if (!completion)
 917                 urcu_die(errno);
 918
 919         call_rcu_lock(&call_rcu_mutex);
 920         cds_list_for_each_entry(crdp, &call_rcu_data_list, list)
 921                 count++;
 922
 923         /* Referenced by rcu_barrier() and each call_rcu thread. */
 924         urcu_ref_set(&completion->ref, count + 1);
 925         completion->barrier_count = count;
 926
 927         cds_list_for_each_entry(crdp, &call_rcu_data_list, list) {
 928                 struct call_rcu_completion_work *work;
 929
 930                 work = calloc(sizeof(*work), 1);
 931                 if (!work)
 932                         urcu_die(errno);
 933                 work->completion = completion;
 934                 _call_rcu(&work->head, _rcu_barrier_complete, crdp);
 935         }
 936         call_rcu_unlock(&call_rcu_mutex);
 937
 938         /* Wait for them */
 939         for (;;) {
 940                 uatomic_dec(&completion->futex);
 941                 /* Decrement futex before reading barrier_count */
 942                 cmm_smp_mb();
 943                 if (!uatomic_read(&completion->barrier_count))
 944                         break;
 945                 call_rcu_completion_wait(completion);
 946         }
 947
 948         urcu_ref_put(&completion->ref, free_completion);
 949
 950 online:
 951         if (was_online)
 952                 rcu_thread_online();
 953 }
 954 URCU_ATTR_ALIAS(urcu_stringify(rcu_barrier))
 955 void alias_rcu_barrier();
 956
 957 /*
 958  * Acquire the call_rcu_mutex in order to ensure that the child sees
 959  * all of the call_rcu() data structures in a consistent state. Ensure
 960  * that all call_rcu threads are in a quiescent state across fork.
 961  * Suitable for pthread_atfork() and friends.
 962  */
 963 void call_rcu_before_fork(void)
 964 {
 965         struct call_rcu_data *crdp;
 966         struct urcu_atfork *atfork;
 967
 968         call_rcu_lock(&call_rcu_mutex);
 969
 970         atfork = registered_rculfhash_atfork;
 971         if (atfork)
 972                 atfork->before_fork(atfork->priv);
 973
 974         cds_list_for_each_entry(crdp, &call_rcu_data_list, list) {
 975                 uatomic_or(&crdp->flags, URCU_CALL_RCU_PAUSE);
 976                 cmm_smp_mb__after_uatomic_or();
 977                 wake_call_rcu_thread(crdp);
 978         }
 979         cds_list_for_each_entry(crdp, &call_rcu_data_list, list) {
 980                 while ((uatomic_read(&crdp->flags) & URCU_CALL_RCU_PAUSED) == 0)
 981                         (void) poll(NULL, 0, 1);
 982         }
 983 }
 984 URCU_ATTR_ALIAS(urcu_stringify(call_rcu_before_fork))
 985 void alias_call_rcu_before_fork();
 986
 987 /*
 988  * Clean up call_rcu data structures in the parent of a successful fork()
 989  * that is not followed by exec() in the child.  Suitable for
 990  * pthread_atfork() and friends.
 991  */
 992 void call_rcu_after_fork_parent(void)
 993 {
 994         struct call_rcu_data *crdp;
 995         struct urcu_atfork *atfork;
 996
 997         cds_list_for_each_entry(crdp, &call_rcu_data_list, list)
 998                 uatomic_and(&crdp->flags, ~URCU_CALL_RCU_PAUSE);
 999         cds_list_for_each_entry(crdp, &call_rcu_data_list, list) {
1000                 while ((uatomic_read(&crdp->flags) & URCU_CALL_RCU_PAUSED) != 0)
1001                         (void) poll(NULL, 0, 1);
1002         }
1003         atfork = registered_rculfhash_atfork;
1004         if (atfork)
1005                 atfork->after_fork_parent(atfork->priv);
1006         call_rcu_unlock(&call_rcu_mutex);
1007 }
1008 URCU_ATTR_ALIAS(urcu_stringify(call_rcu_after_fork_parent))
1009 void alias_call_rcu_after_fork_parent();
1010
1011 /*
1012  * Clean up call_rcu data structures in the child of a successful fork()
1013  * that is not followed by exec().  Suitable for pthread_atfork() and
1014  * friends.
1015  */
1016 void call_rcu_after_fork_child(void)
1017 {
1018         struct call_rcu_data *crdp, *next;
1019         struct urcu_atfork *atfork;
1020
1021         /* Release the mutex. */
1022         call_rcu_unlock(&call_rcu_mutex);
1023
1024         atfork = registered_rculfhash_atfork;
1025         if (atfork)
1026                 atfork->after_fork_child(atfork->priv);
1027
1028         /* Do nothing when call_rcu() has not been used */
1029         if (cds_list_empty(&call_rcu_data_list))
1030                 return;
1031
1032         /*
1033          * Allocate a new default call_rcu_data structure in order
1034          * to get a working call_rcu thread to go with it.
1035          */
1036         default_call_rcu_data = NULL;
1037         (void)get_default_call_rcu_data();
1038
1039         /* Cleanup call_rcu_data pointers before use */
1040         maxcpus_reset();
1041         free(per_cpu_call_rcu_data);
1042         rcu_set_pointer(&per_cpu_call_rcu_data, NULL);
1043         URCU_TLS(thread_call_rcu_data) = NULL;
1044
1045         /*
1046          * Dispose of all of the rest of the call_rcu_data structures.
1047          * Leftover call_rcu callbacks will be merged into the new
1048          * default call_rcu thread queue.
1049          */
1050         cds_list_for_each_entry_safe(crdp, next, &call_rcu_data_list, list) {
1051                 if (crdp == default_call_rcu_data)
1052                         continue;
1053                 uatomic_set(&crdp->flags, URCU_CALL_RCU_STOPPED);
1054                 call_rcu_data_free(crdp);
1055         }
1056 }
1057 URCU_ATTR_ALIAS(urcu_stringify(call_rcu_after_fork_child))
1058 void alias_call_rcu_after_fork_child();
1059
1060 void urcu_register_rculfhash_atfork(struct urcu_atfork *atfork)
1061 {
1062         call_rcu_lock(&call_rcu_mutex);
1063         if (registered_rculfhash_atfork_refcount++)
1064                 goto end;
1065         registered_rculfhash_atfork = atfork;
1066 end:
1067         call_rcu_unlock(&call_rcu_mutex);
1068 }
1069 URCU_ATTR_ALIAS(urcu_stringify(urcu_register_rculfhash_atfork))
1070 void alias_urcu_register_rculfhash_atfork();
1071
1072 void urcu_unregister_rculfhash_atfork(struct urcu_atfork *atfork __attribute__((unused)))
1073 {
1074         call_rcu_lock(&call_rcu_mutex);
1075         if (--registered_rculfhash_atfork_refcount)
1076                 goto end;
1077         registered_rculfhash_atfork = NULL;
1078 end:
1079         call_rcu_unlock(&call_rcu_mutex);
1080 }
1081 URCU_ATTR_ALIAS(urcu_stringify(urcu_unregister_rculfhash_atfork))
1082 void alias_urcu_unregister_rculfhash_atfork();