urcu-call-rcu-impl.h

   1 /*
   2  * urcu-call-rcu.c
   3  *
   4  * Userspace RCU library - batch memory reclamation with kernel API
   5  *
   6  * Copyright (c) 2010 Paul E. McKenney <paulmck@linux.vnet.ibm.com>
   7  *
   8  * This library is free software; you can redistribute it and/or
   9  * modify it under the terms of the GNU Lesser General Public
  10  * License as published by the Free Software Foundation; either
  11  * version 2.1 of the License, or (at your option) any later version.
  12  *
  13  * This library is distributed in the hope that it will be useful,
  14  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  15  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  16  * Lesser General Public License for more details.
  17  *
  18  * You should have received a copy of the GNU Lesser General Public
  19  * License along with this library; if not, write to the Free Software
  20  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  21  */
  22
  23 #define _GNU_SOURCE
  24 #define _LGPL_SOURCE
  25 #include <stdio.h>
  26 #include <pthread.h>
  27 #include <signal.h>
  28 #include <assert.h>
  29 #include <stdlib.h>
  30 #include <stdint.h>
  31 #include <string.h>
  32 #include <errno.h>
  33 #include <poll.h>
  34 #include <sys/time.h>
  35 #include <unistd.h>
  36 #include <sched.h>
  37
  38 #include "config.h"
  39 #include "urcu/wfcqueue.h"
  40 #include "urcu-call-rcu.h"
  41 #include "urcu-pointer.h"
  42 #include "urcu/list.h"
  43 #include "urcu/futex.h"
  44 #include "urcu/tls-compat.h"
  45 #include "urcu/ref.h"
  46 #include "urcu-die.h"
  47
  48 /* Data structure that identifies a call_rcu thread. */
  49
  50 struct call_rcu_data {
  51         /*
  52          * We do not align head on a different cache-line than tail
  53          * mainly because call_rcu callback-invocation threads use
  54          * batching ("splice") to get an entire list of callbacks, which
  55          * effectively empties the queue, and requires to touch the tail
  56          * anyway.
  57          */
  58         struct cds_wfcq_tail cbs_tail;
  59         struct cds_wfcq_head cbs_head;
  60         unsigned long flags;
  61         int32_t futex;
  62         unsigned long qlen; /* maintained for debugging. */
  63         pthread_t tid;
  64         int cpu_affinity;
  65         struct cds_list_head list;
  66 } __attribute__((aligned(CAA_CACHE_LINE_SIZE)));
  67
  68 struct call_rcu_completion {
  69         int barrier_count;
  70         int32_t futex;
  71         struct urcu_ref ref;
  72 };
  73
  74 struct call_rcu_completion_work {
  75         struct rcu_head head;
  76         struct call_rcu_completion *completion;
  77 };
  78
  79 /*
  80  * List of all call_rcu_data structures to keep valgrind happy.
  81  * Protected by call_rcu_mutex.
  82  */
  83
  84 static CDS_LIST_HEAD(call_rcu_data_list);
  85
  86 /* Link a thread using call_rcu() to its call_rcu thread. */
  87
  88 static DEFINE_URCU_TLS(struct call_rcu_data *, thread_call_rcu_data);
  89
  90 /*
  91  * Guard call_rcu thread creation and atfork handlers.
  92  */
  93 static pthread_mutex_t call_rcu_mutex = PTHREAD_MUTEX_INITIALIZER;
  94
  95 /* If a given thread does not have its own call_rcu thread, this is default. */
  96
  97 static struct call_rcu_data *default_call_rcu_data;
  98
  99 /*
 100  * If the sched_getcpu() and sysconf(_SC_NPROCESSORS_CONF) calls are
 101  * available, then we can have call_rcu threads assigned to individual
 102  * CPUs rather than only to specific threads.
 103  */
 104
 105 #ifdef HAVE_SCHED_GETCPU
 106
 107 static int urcu_sched_getcpu(void)
 108 {
 109         return sched_getcpu();
 110 }
 111
 112 #else /* #ifdef HAVE_SCHED_GETCPU */
 113
 114 static int urcu_sched_getcpu(void)
 115 {
 116         return -1;
 117 }
 118
 119 #endif /* #else #ifdef HAVE_SCHED_GETCPU */
 120
 121 #if defined(HAVE_SYSCONF) && defined(HAVE_SCHED_GETCPU)
 122
 123 /*
 124  * Pointer to array of pointers to per-CPU call_rcu_data structures
 125  * and # CPUs. per_cpu_call_rcu_data is a RCU-protected pointer to an
 126  * array of RCU-protected pointers to call_rcu_data. call_rcu acts as a
 127  * RCU read-side and reads per_cpu_call_rcu_data and the per-cpu pointer
 128  * without mutex. The call_rcu_mutex protects updates.
 129  */
 130
 131 static struct call_rcu_data **per_cpu_call_rcu_data;
 132 static long maxcpus;
 133
 134 static void maxcpus_reset(void)
 135 {
 136         maxcpus = 0;
 137 }
 138
 139 /* Allocate the array if it has not already been allocated. */
 140
 141 static void alloc_cpu_call_rcu_data(void)
 142 {
 143         struct call_rcu_data **p;
 144         static int warned = 0;
 145
 146         if (maxcpus != 0)
 147                 return;
 148         maxcpus = sysconf(_SC_NPROCESSORS_CONF);
 149         if (maxcpus <= 0) {
 150                 return;
 151         }
 152         p = malloc(maxcpus * sizeof(*per_cpu_call_rcu_data));
 153         if (p != NULL) {
 154                 memset(p, '\0', maxcpus * sizeof(*per_cpu_call_rcu_data));
 155                 rcu_set_pointer(&per_cpu_call_rcu_data, p);
 156         } else {
 157                 if (!warned) {
 158                         fprintf(stderr, "[error] liburcu: unable to allocate per-CPU pointer array\n");
 159                 }
 160                 warned = 1;
 161         }
 162 }
 163
 164 #else /* #if defined(HAVE_SYSCONF) && defined(HAVE_SCHED_GETCPU) */
 165
 166 /*
 167  * per_cpu_call_rcu_data should be constant, but some functions below, used both
 168  * for cases where cpu number is available and not available, assume it it not
 169  * constant.
 170  */
 171 static struct call_rcu_data **per_cpu_call_rcu_data = NULL;
 172 static const long maxcpus = -1;
 173
 174 static void maxcpus_reset(void)
 175 {
 176 }
 177
 178 static void alloc_cpu_call_rcu_data(void)
 179 {
 180 }
 181
 182 #endif /* #else #if defined(HAVE_SYSCONF) && defined(HAVE_SCHED_GETCPU) */
 183
 184 /* Acquire the specified pthread mutex. */
 185
 186 static void call_rcu_lock(pthread_mutex_t *pmp)
 187 {
 188         int ret;
 189
 190         ret = pthread_mutex_lock(pmp);
 191         if (ret)
 192                 urcu_die(ret);
 193 }
 194
 195 /* Release the specified pthread mutex. */
 196
 197 static void call_rcu_unlock(pthread_mutex_t *pmp)
 198 {
 199         int ret;
 200
 201         ret = pthread_mutex_unlock(pmp);
 202         if (ret)
 203                 urcu_die(ret);
 204 }
 205
 206 #if HAVE_SCHED_SETAFFINITY
 207 static
 208 int set_thread_cpu_affinity(struct call_rcu_data *crdp)
 209 {
 210         cpu_set_t mask;
 211
 212         if (crdp->cpu_affinity < 0)
 213                 return 0;
 214
 215         CPU_ZERO(&mask);
 216         CPU_SET(crdp->cpu_affinity, &mask);
 217 #if SCHED_SETAFFINITY_ARGS == 2
 218         return sched_setaffinity(0, &mask);
 219 #else
 220         return sched_setaffinity(0, sizeof(mask), &mask);
 221 #endif
 222 }
 223 #else
 224 static
 225 int set_thread_cpu_affinity(struct call_rcu_data *crdp)
 226 {
 227         return 0;
 228 }
 229 #endif
 230
 231 static void call_rcu_wait(struct call_rcu_data *crdp)
 232 {
 233         /* Read call_rcu list before read futex */
 234         cmm_smp_mb();
 235         if (uatomic_read(&crdp->futex) == -1)
 236                 futex_async(&crdp->futex, FUTEX_WAIT, -1,
 237                       NULL, NULL, 0);
 238 }
 239
 240 static void call_rcu_wake_up(struct call_rcu_data *crdp)
 241 {
 242         /* Write to call_rcu list before reading/writing futex */
 243         cmm_smp_mb();
 244         if (caa_unlikely(uatomic_read(&crdp->futex) == -1)) {
 245                 uatomic_set(&crdp->futex, 0);
 246                 futex_async(&crdp->futex, FUTEX_WAKE, 1,
 247                       NULL, NULL, 0);
 248         }
 249 }
 250
 251 static void call_rcu_completion_wait(struct call_rcu_completion *completion)
 252 {
 253         /* Read completion barrier count before read futex */
 254         cmm_smp_mb();
 255         if (uatomic_read(&completion->futex) == -1)
 256                 futex_async(&completion->futex, FUTEX_WAIT, -1,
 257                       NULL, NULL, 0);
 258 }
 259
 260 static void call_rcu_completion_wake_up(struct call_rcu_completion *completion)
 261 {
 262         /* Write to completion barrier count before reading/writing futex */
 263         cmm_smp_mb();
 264         if (caa_unlikely(uatomic_read(&completion->futex) == -1)) {
 265                 uatomic_set(&completion->futex, 0);
 266                 futex_async(&completion->futex, FUTEX_WAKE, 1,
 267                       NULL, NULL, 0);
 268         }
 269 }
 270
 271 /* This is the code run by each call_rcu thread. */
 272
 273 static void *call_rcu_thread(void *arg)
 274 {
 275         unsigned long cbcount;
 276         struct call_rcu_data *crdp = (struct call_rcu_data *) arg;
 277         int rt = !!(uatomic_read(&crdp->flags) & URCU_CALL_RCU_RT);
 278         int ret;
 279
 280         ret = set_thread_cpu_affinity(crdp);
 281         if (ret)
 282                 urcu_die(errno);
 283
 284         /*
 285          * If callbacks take a read-side lock, we need to be registered.
 286          */
 287         rcu_register_thread();
 288
 289         URCU_TLS(thread_call_rcu_data) = crdp;
 290         if (!rt) {
 291                 uatomic_dec(&crdp->futex);
 292                 /* Decrement futex before reading call_rcu list */
 293                 cmm_smp_mb();
 294         }
 295         for (;;) {
 296                 struct cds_wfcq_head cbs_tmp_head;
 297                 struct cds_wfcq_tail cbs_tmp_tail;
 298                 struct cds_wfcq_node *cbs, *cbs_tmp_n;
 299                 enum cds_wfcq_ret splice_ret;
 300
 301                 if (uatomic_read(&crdp->flags) & URCU_CALL_RCU_PAUSE) {
 302                         /*
 303                          * Pause requested. Become quiescent: remove
 304                          * ourself from all global lists, and don't
 305                          * process any callback. The callback lists may
 306                          * still be non-empty though.
 307                          */
 308                         rcu_unregister_thread();
 309                         cmm_smp_mb__before_uatomic_or();
 310                         uatomic_or(&crdp->flags, URCU_CALL_RCU_PAUSED);
 311                         while ((uatomic_read(&crdp->flags) & URCU_CALL_RCU_PAUSE) != 0)
 312                                 poll(NULL, 0, 1);
 313                         uatomic_and(&crdp->flags, ~URCU_CALL_RCU_PAUSED);
 314                         cmm_smp_mb__after_uatomic_and();
 315                         rcu_register_thread();
 316                 }
 317
 318                 cds_wfcq_init(&cbs_tmp_head, &cbs_tmp_tail);
 319                 splice_ret = __cds_wfcq_splice_blocking(&cbs_tmp_head,
 320                         &cbs_tmp_tail, &crdp->cbs_head, &crdp->cbs_tail);
 321                 assert(splice_ret != CDS_WFCQ_RET_WOULDBLOCK);
 322                 assert(splice_ret != CDS_WFCQ_RET_DEST_NON_EMPTY);
 323                 if (splice_ret != CDS_WFCQ_RET_SRC_EMPTY) {
 324                         synchronize_rcu();
 325                         cbcount = 0;
 326                         __cds_wfcq_for_each_blocking_safe(&cbs_tmp_head,
 327                                         &cbs_tmp_tail, cbs, cbs_tmp_n) {
 328                                 struct rcu_head *rhp;
 329
 330                                 rhp = caa_container_of(cbs,
 331                                         struct rcu_head, next);
 332                                 rhp->func(rhp);
 333                                 cbcount++;
 334                         }
 335                         uatomic_sub(&crdp->qlen, cbcount);
 336                 }
 337                 if (uatomic_read(&crdp->flags) & URCU_CALL_RCU_STOP)
 338                         break;
 339                 rcu_thread_offline();
 340                 if (!rt) {
 341                         if (cds_wfcq_empty(&crdp->cbs_head,
 342                                         &crdp->cbs_tail)) {
 343                                 call_rcu_wait(crdp);
 344                                 poll(NULL, 0, 10);
 345                                 uatomic_dec(&crdp->futex);
 346                                 /*
 347                                  * Decrement futex before reading
 348                                  * call_rcu list.
 349                                  */
 350                                 cmm_smp_mb();
 351                         } else {
 352                                 poll(NULL, 0, 10);
 353                         }
 354                 } else {
 355                         poll(NULL, 0, 10);
 356                 }
 357                 rcu_thread_online();
 358         }
 359         if (!rt) {
 360                 /*
 361                  * Read call_rcu list before write futex.
 362                  */
 363                 cmm_smp_mb();
 364                 uatomic_set(&crdp->futex, 0);
 365         }
 366         uatomic_or(&crdp->flags, URCU_CALL_RCU_STOPPED);
 367         rcu_unregister_thread();
 368         return NULL;
 369 }
 370
 371 /*
 372  * Create both a call_rcu thread and the corresponding call_rcu_data
 373  * structure, linking the structure in as specified.  Caller must hold
 374  * call_rcu_mutex.
 375  */
 376
 377 static void call_rcu_data_init(struct call_rcu_data **crdpp,
 378                                unsigned long flags,
 379                                int cpu_affinity)
 380 {
 381         struct call_rcu_data *crdp;
 382         int ret;
 383
 384         crdp = malloc(sizeof(*crdp));
 385         if (crdp == NULL)
 386                 urcu_die(errno);
 387         memset(crdp, '\0', sizeof(*crdp));
 388         cds_wfcq_init(&crdp->cbs_head, &crdp->cbs_tail);
 389         crdp->qlen = 0;
 390         crdp->futex = 0;
 391         crdp->flags = flags;
 392         cds_list_add(&crdp->list, &call_rcu_data_list);
 393         crdp->cpu_affinity = cpu_affinity;
 394         cmm_smp_mb();  /* Structure initialized before pointer is planted. */
 395         *crdpp = crdp;
 396         ret = pthread_create(&crdp->tid, NULL, call_rcu_thread, crdp);
 397         if (ret)
 398                 urcu_die(ret);
 399 }
 400
 401 /*
 402  * Return a pointer to the call_rcu_data structure for the specified
 403  * CPU, returning NULL if there is none.  We cannot automatically
 404  * created it because the platform we are running on might not define
 405  * urcu_sched_getcpu().
 406  *
 407  * The call to this function and use of the returned call_rcu_data
 408  * should be protected by RCU read-side lock.
 409  */
 410
 411 struct call_rcu_data *get_cpu_call_rcu_data(int cpu)
 412 {
 413         static int warned = 0;
 414         struct call_rcu_data **pcpu_crdp;
 415
 416         pcpu_crdp = rcu_dereference(per_cpu_call_rcu_data);
 417         if (pcpu_crdp == NULL)
 418                 return NULL;
 419         if (!warned && maxcpus > 0 && (cpu < 0 || maxcpus <= cpu)) {
 420                 fprintf(stderr, "[error] liburcu: get CPU # out of range\n");
 421                 warned = 1;
 422         }
 423         if (cpu < 0 || maxcpus <= cpu)
 424                 return NULL;
 425         return rcu_dereference(pcpu_crdp[cpu]);
 426 }
 427
 428 /*
 429  * Return the tid corresponding to the call_rcu thread whose
 430  * call_rcu_data structure is specified.
 431  */
 432
 433 pthread_t get_call_rcu_thread(struct call_rcu_data *crdp)
 434 {
 435         return crdp->tid;
 436 }
 437
 438 /*
 439  * Create a call_rcu_data structure (with thread) and return a pointer.
 440  */
 441
 442 static struct call_rcu_data *__create_call_rcu_data(unsigned long flags,
 443                                                     int cpu_affinity)
 444 {
 445         struct call_rcu_data *crdp;
 446
 447         call_rcu_data_init(&crdp, flags, cpu_affinity);
 448         return crdp;
 449 }
 450
 451 struct call_rcu_data *create_call_rcu_data(unsigned long flags,
 452                                            int cpu_affinity)
 453 {
 454         struct call_rcu_data *crdp;
 455
 456         call_rcu_lock(&call_rcu_mutex);
 457         crdp = __create_call_rcu_data(flags, cpu_affinity);
 458         call_rcu_unlock(&call_rcu_mutex);
 459         return crdp;
 460 }
 461
 462 /*
 463  * Set the specified CPU to use the specified call_rcu_data structure.
 464  *
 465  * Use NULL to remove a CPU's call_rcu_data structure, but it is
 466  * the caller's responsibility to dispose of the removed structure.
 467  * Use get_cpu_call_rcu_data() to obtain a pointer to the old structure
 468  * (prior to NULLing it out, of course).
 469  *
 470  * The caller must wait for a grace-period to pass between return from
 471  * set_cpu_call_rcu_data() and call to call_rcu_data_free() passing the
 472  * previous call rcu data as argument.
 473  */
 474
 475 int set_cpu_call_rcu_data(int cpu, struct call_rcu_data *crdp)
 476 {
 477         static int warned = 0;
 478
 479         call_rcu_lock(&call_rcu_mutex);
 480         alloc_cpu_call_rcu_data();
 481         if (cpu < 0 || maxcpus <= cpu) {
 482                 if (!warned) {
 483                         fprintf(stderr, "[error] liburcu: set CPU # out of range\n");
 484                         warned = 1;
 485                 }
 486                 call_rcu_unlock(&call_rcu_mutex);
 487                 errno = EINVAL;
 488                 return -EINVAL;
 489         }
 490
 491         if (per_cpu_call_rcu_data == NULL) {
 492                 call_rcu_unlock(&call_rcu_mutex);
 493                 errno = ENOMEM;
 494                 return -ENOMEM;
 495         }
 496
 497         if (per_cpu_call_rcu_data[cpu] != NULL && crdp != NULL) {
 498                 call_rcu_unlock(&call_rcu_mutex);
 499                 errno = EEXIST;
 500                 return -EEXIST;
 501         }
 502
 503         rcu_set_pointer(&per_cpu_call_rcu_data[cpu], crdp);
 504         call_rcu_unlock(&call_rcu_mutex);
 505         return 0;
 506 }
 507
 508 /*
 509  * Return a pointer to the default call_rcu_data structure, creating
 510  * one if need be.  Because we never free call_rcu_data structures,
 511  * we don't need to be in an RCU read-side critical section.
 512  */
 513
 514 struct call_rcu_data *get_default_call_rcu_data(void)
 515 {
 516         if (default_call_rcu_data != NULL)
 517                 return rcu_dereference(default_call_rcu_data);
 518         call_rcu_lock(&call_rcu_mutex);
 519         if (default_call_rcu_data != NULL) {
 520                 call_rcu_unlock(&call_rcu_mutex);
 521                 return default_call_rcu_data;
 522         }
 523         call_rcu_data_init(&default_call_rcu_data, 0, -1);
 524         call_rcu_unlock(&call_rcu_mutex);
 525         return default_call_rcu_data;
 526 }
 527
 528 /*
 529  * Return the call_rcu_data structure that applies to the currently
 530  * running thread.  Any call_rcu_data structure assigned specifically
 531  * to this thread has first priority, followed by any call_rcu_data
 532  * structure assigned to the CPU on which the thread is running,
 533  * followed by the default call_rcu_data structure.  If there is not
 534  * yet a default call_rcu_data structure, one will be created.
 535  *
 536  * Calls to this function and use of the returned call_rcu_data should
 537  * be protected by RCU read-side lock.
 538  */
 539 struct call_rcu_data *get_call_rcu_data(void)
 540 {
 541         struct call_rcu_data *crd;
 542
 543         if (URCU_TLS(thread_call_rcu_data) != NULL)
 544                 return URCU_TLS(thread_call_rcu_data);
 545
 546         if (maxcpus > 0) {
 547                 crd = get_cpu_call_rcu_data(urcu_sched_getcpu());
 548                 if (crd)
 549                         return crd;
 550         }
 551
 552         return get_default_call_rcu_data();
 553 }
 554
 555 /*
 556  * Return a pointer to this task's call_rcu_data if there is one.
 557  */
 558
 559 struct call_rcu_data *get_thread_call_rcu_data(void)
 560 {
 561         return URCU_TLS(thread_call_rcu_data);
 562 }
 563
 564 /*
 565  * Set this task's call_rcu_data structure as specified, regardless
 566  * of whether or not this task already had one.  (This allows switching
 567  * to and from real-time call_rcu threads, for example.)
 568  *
 569  * Use NULL to remove a thread's call_rcu_data structure, but it is
 570  * the caller's responsibility to dispose of the removed structure.
 571  * Use get_thread_call_rcu_data() to obtain a pointer to the old structure
 572  * (prior to NULLing it out, of course).
 573  */
 574
 575 void set_thread_call_rcu_data(struct call_rcu_data *crdp)
 576 {
 577         URCU_TLS(thread_call_rcu_data) = crdp;
 578 }
 579
 580 /*
 581  * Create a separate call_rcu thread for each CPU.  This does not
 582  * replace a pre-existing call_rcu thread -- use the set_cpu_call_rcu_data()
 583  * function if you want that behavior. Should be paired with
 584  * free_all_cpu_call_rcu_data() to teardown these call_rcu worker
 585  * threads.
 586  */
 587
 588 int create_all_cpu_call_rcu_data(unsigned long flags)
 589 {
 590         int i;
 591         struct call_rcu_data *crdp;
 592         int ret;
 593
 594         call_rcu_lock(&call_rcu_mutex);
 595         alloc_cpu_call_rcu_data();
 596         call_rcu_unlock(&call_rcu_mutex);
 597         if (maxcpus <= 0) {
 598                 errno = EINVAL;
 599                 return -EINVAL;
 600         }
 601         if (per_cpu_call_rcu_data == NULL) {
 602                 errno = ENOMEM;
 603                 return -ENOMEM;
 604         }
 605         for (i = 0; i < maxcpus; i++) {
 606                 call_rcu_lock(&call_rcu_mutex);
 607                 if (get_cpu_call_rcu_data(i)) {
 608                         call_rcu_unlock(&call_rcu_mutex);
 609                         continue;
 610                 }
 611                 crdp = __create_call_rcu_data(flags, i);
 612                 if (crdp == NULL) {
 613                         call_rcu_unlock(&call_rcu_mutex);
 614                         errno = ENOMEM;
 615                         return -ENOMEM;
 616                 }
 617                 call_rcu_unlock(&call_rcu_mutex);
 618                 if ((ret = set_cpu_call_rcu_data(i, crdp)) != 0) {
 619                         call_rcu_data_free(crdp);
 620
 621                         /* it has been created by other thread */
 622                         if (ret == -EEXIST)
 623                                 continue;
 624
 625                         return ret;
 626                 }
 627         }
 628         return 0;
 629 }
 630
 631 /*
 632  * Wake up the call_rcu thread corresponding to the specified
 633  * call_rcu_data structure.
 634  */
 635 static void wake_call_rcu_thread(struct call_rcu_data *crdp)
 636 {
 637         if (!(_CMM_LOAD_SHARED(crdp->flags) & URCU_CALL_RCU_RT))
 638                 call_rcu_wake_up(crdp);
 639 }
 640
 641 static void _call_rcu(struct rcu_head *head,
 642                       void (*func)(struct rcu_head *head),
 643                       struct call_rcu_data *crdp)
 644 {
 645         cds_wfcq_node_init(&head->next);
 646         head->func = func;
 647         cds_wfcq_enqueue(&crdp->cbs_head, &crdp->cbs_tail, &head->next);
 648         uatomic_inc(&crdp->qlen);
 649         wake_call_rcu_thread(crdp);
 650 }
 651
 652 /*
 653  * Schedule a function to be invoked after a following grace period.
 654  * This is the only function that must be called -- the others are
 655  * only present to allow applications to tune their use of RCU for
 656  * maximum performance.
 657  *
 658  * Note that unless a call_rcu thread has not already been created,
 659  * the first invocation of call_rcu() will create one.  So, if you
 660  * need the first invocation of call_rcu() to be fast, make sure
 661  * to create a call_rcu thread first.  One way to accomplish this is
 662  * "get_call_rcu_data();", and another is create_all_cpu_call_rcu_data().
 663  *
 664  * call_rcu must be called by registered RCU read-side threads.
 665  */
 666 void call_rcu(struct rcu_head *head,
 667               void (*func)(struct rcu_head *head))
 668 {
 669         struct call_rcu_data *crdp;
 670
 671         /* Holding rcu read-side lock across use of per-cpu crdp */
 672         rcu_read_lock();
 673         crdp = get_call_rcu_data();
 674         _call_rcu(head, func, crdp);
 675         rcu_read_unlock();
 676 }
 677
 678 /*
 679  * Free up the specified call_rcu_data structure, terminating the
 680  * associated call_rcu thread.  The caller must have previously
 681  * removed the call_rcu_data structure from per-thread or per-CPU
 682  * usage.  For example, set_cpu_call_rcu_data(cpu, NULL) for per-CPU
 683  * call_rcu_data structures or set_thread_call_rcu_data(NULL) for
 684  * per-thread call_rcu_data structures.
 685  *
 686  * We silently refuse to free up the default call_rcu_data structure
 687  * because that is where we put any leftover callbacks.  Note that
 688  * the possibility of self-spawning callbacks makes it impossible
 689  * to execute all the callbacks in finite time without putting any
 690  * newly spawned callbacks somewhere else.  The "somewhere else" of
 691  * last resort is the default call_rcu_data structure.
 692  *
 693  * We also silently refuse to free NULL pointers.  This simplifies
 694  * the calling code.
 695  *
 696  * The caller must wait for a grace-period to pass between return from
 697  * set_cpu_call_rcu_data() and call to call_rcu_data_free() passing the
 698  * previous call rcu data as argument.
 699  *
 700  * Note: introducing __cds_wfcq_splice_blocking() in this function fixed
 701  * a list corruption bug in the 0.7.x series. The equivalent fix
 702  * appeared in 0.6.8 for the stable-0.6 branch.
 703  */
 704 void call_rcu_data_free(struct call_rcu_data *crdp)
 705 {
 706         if (crdp == NULL || crdp == default_call_rcu_data) {
 707                 return;
 708         }
 709         if ((uatomic_read(&crdp->flags) & URCU_CALL_RCU_STOPPED) == 0) {
 710                 uatomic_or(&crdp->flags, URCU_CALL_RCU_STOP);
 711                 wake_call_rcu_thread(crdp);
 712                 while ((uatomic_read(&crdp->flags) & URCU_CALL_RCU_STOPPED) == 0)
 713                         poll(NULL, 0, 1);
 714         }
 715         if (!cds_wfcq_empty(&crdp->cbs_head, &crdp->cbs_tail)) {
 716                 /* Create default call rcu data if need be */
 717                 (void) get_default_call_rcu_data();
 718                 __cds_wfcq_splice_blocking(&default_call_rcu_data->cbs_head,
 719                         &default_call_rcu_data->cbs_tail,
 720                         &crdp->cbs_head, &crdp->cbs_tail);
 721                 uatomic_add(&default_call_rcu_data->qlen,
 722                             uatomic_read(&crdp->qlen));
 723                 wake_call_rcu_thread(default_call_rcu_data);
 724         }
 725
 726         call_rcu_lock(&call_rcu_mutex);
 727         cds_list_del(&crdp->list);
 728         call_rcu_unlock(&call_rcu_mutex);
 729
 730         free(crdp);
 731 }
 732
 733 /*
 734  * Clean up all the per-CPU call_rcu threads.
 735  */
 736 void free_all_cpu_call_rcu_data(void)
 737 {
 738         int cpu;
 739         struct call_rcu_data **crdp;
 740         static int warned = 0;
 741
 742         if (maxcpus <= 0)
 743                 return;
 744
 745         crdp = malloc(sizeof(*crdp) * maxcpus);
 746         if (!crdp) {
 747                 if (!warned) {
 748                         fprintf(stderr, "[error] liburcu: unable to allocate per-CPU pointer array\n");
 749                 }
 750                 warned = 1;
 751                 return;
 752         }
 753
 754         for (cpu = 0; cpu < maxcpus; cpu++) {
 755                 crdp[cpu] = get_cpu_call_rcu_data(cpu);
 756                 if (crdp[cpu] == NULL)
 757                         continue;
 758                 set_cpu_call_rcu_data(cpu, NULL);
 759         }
 760         /*
 761          * Wait for call_rcu sites acting as RCU readers of the
 762          * call_rcu_data to become quiescent.
 763          */
 764         synchronize_rcu();
 765         for (cpu = 0; cpu < maxcpus; cpu++) {
 766                 if (crdp[cpu] == NULL)
 767                         continue;
 768                 call_rcu_data_free(crdp[cpu]);
 769         }
 770         free(crdp);
 771 }
 772
 773 static
 774 void free_completion(struct urcu_ref *ref)
 775 {
 776         struct call_rcu_completion *completion;
 777
 778         completion = caa_container_of(ref, struct call_rcu_completion, ref);
 779         free(completion);
 780 }
 781
 782 static
 783 void _rcu_barrier_complete(struct rcu_head *head)
 784 {
 785         struct call_rcu_completion_work *work;
 786         struct call_rcu_completion *completion;
 787
 788         work = caa_container_of(head, struct call_rcu_completion_work, head);
 789         completion = work->completion;
 790         if (!uatomic_sub_return(&completion->barrier_count, 1))
 791                 call_rcu_completion_wake_up(completion);
 792         urcu_ref_put(&completion->ref, free_completion);
 793         free(work);
 794 }
 795
 796 /*
 797  * Wait for all in-flight call_rcu callbacks to complete execution.
 798  */
 799 void rcu_barrier(void)
 800 {
 801         struct call_rcu_data *crdp;
 802         struct call_rcu_completion *completion;
 803         int count = 0;
 804         int was_online;
 805
 806         /* Put in offline state in QSBR. */
 807         was_online = rcu_read_ongoing();
 808         if (was_online)
 809                 rcu_thread_offline();
 810         /*
 811          * Calling a rcu_barrier() within a RCU read-side critical
 812          * section is an error.
 813          */
 814         if (rcu_read_ongoing()) {
 815                 static int warned = 0;
 816
 817                 if (!warned) {
 818                         fprintf(stderr, "[error] liburcu: rcu_barrier() called from within RCU read-side critical section.\n");
 819                 }
 820                 warned = 1;
 821                 goto online;
 822         }
 823
 824         completion = calloc(sizeof(*completion), 1);
 825         if (!completion)
 826                 urcu_die(errno);
 827
 828         call_rcu_lock(&call_rcu_mutex);
 829         cds_list_for_each_entry(crdp, &call_rcu_data_list, list)
 830                 count++;
 831
 832         /* Referenced by rcu_barrier() and each call_rcu thread. */
 833         urcu_ref_set(&completion->ref, count + 1);
 834         completion->barrier_count = count;
 835
 836         cds_list_for_each_entry(crdp, &call_rcu_data_list, list) {
 837                 struct call_rcu_completion_work *work;
 838
 839                 work = calloc(sizeof(*work), 1);
 840                 if (!work)
 841                         urcu_die(errno);
 842                 work->completion = completion;
 843                 _call_rcu(&work->head, _rcu_barrier_complete, crdp);
 844         }
 845         call_rcu_unlock(&call_rcu_mutex);
 846
 847         /* Wait for them */
 848         for (;;) {
 849                 uatomic_dec(&completion->futex);
 850                 /* Decrement futex before reading barrier_count */
 851                 cmm_smp_mb();
 852                 if (!uatomic_read(&completion->barrier_count))
 853                         break;
 854                 call_rcu_completion_wait(completion);
 855         }
 856
 857         urcu_ref_put(&completion->ref, free_completion);
 858
 859 online:
 860         if (was_online)
 861                 rcu_thread_online();
 862 }
 863
 864 /*
 865  * Acquire the call_rcu_mutex in order to ensure that the child sees
 866  * all of the call_rcu() data structures in a consistent state. Ensure
 867  * that all call_rcu threads are in a quiescent state across fork.
 868  * Suitable for pthread_atfork() and friends.
 869  */
 870 void call_rcu_before_fork(void)
 871 {
 872         struct call_rcu_data *crdp;
 873
 874         call_rcu_lock(&call_rcu_mutex);
 875
 876         cds_list_for_each_entry(crdp, &call_rcu_data_list, list) {
 877                 uatomic_or(&crdp->flags, URCU_CALL_RCU_PAUSE);
 878                 cmm_smp_mb__after_uatomic_or();
 879                 wake_call_rcu_thread(crdp);
 880         }
 881         cds_list_for_each_entry(crdp, &call_rcu_data_list, list) {
 882                 while ((uatomic_read(&crdp->flags) & URCU_CALL_RCU_PAUSED) == 0)
 883                         poll(NULL, 0, 1);
 884         }
 885 }
 886
 887 /*
 888  * Clean up call_rcu data structures in the parent of a successful fork()
 889  * that is not followed by exec() in the child.  Suitable for
 890  * pthread_atfork() and friends.
 891  */
 892 void call_rcu_after_fork_parent(void)
 893 {
 894         struct call_rcu_data *crdp;
 895
 896         cds_list_for_each_entry(crdp, &call_rcu_data_list, list)
 897                 uatomic_and(&crdp->flags, ~URCU_CALL_RCU_PAUSE);
 898         cds_list_for_each_entry(crdp, &call_rcu_data_list, list) {
 899                 while ((uatomic_read(&crdp->flags) & URCU_CALL_RCU_PAUSED) != 0)
 900                         poll(NULL, 0, 1);
 901         }
 902         call_rcu_unlock(&call_rcu_mutex);
 903 }
 904
 905 /*
 906  * Clean up call_rcu data structures in the child of a successful fork()
 907  * that is not followed by exec().  Suitable for pthread_atfork() and
 908  * friends.
 909  */
 910 void call_rcu_after_fork_child(void)
 911 {
 912         struct call_rcu_data *crdp, *next;
 913
 914         /* Release the mutex. */
 915         call_rcu_unlock(&call_rcu_mutex);
 916
 917         /* Do nothing when call_rcu() has not been used */
 918         if (cds_list_empty(&call_rcu_data_list))
 919                 return;
 920
 921         /*
 922          * Allocate a new default call_rcu_data structure in order
 923          * to get a working call_rcu thread to go with it.
 924          */
 925         default_call_rcu_data = NULL;
 926         (void)get_default_call_rcu_data();
 927
 928         /* Cleanup call_rcu_data pointers before use */
 929         maxcpus_reset();
 930         free(per_cpu_call_rcu_data);
 931         rcu_set_pointer(&per_cpu_call_rcu_data, NULL);
 932         URCU_TLS(thread_call_rcu_data) = NULL;
 933
 934         /*
 935          * Dispose of all of the rest of the call_rcu_data structures.
 936          * Leftover call_rcu callbacks will be merged into the new
 937          * default call_rcu thread queue.
 938          */
 939         cds_list_for_each_entry_safe(crdp, next, &call_rcu_data_list, list) {
 940                 if (crdp == default_call_rcu_data)
 941                         continue;
 942                 uatomic_set(&crdp->flags, URCU_CALL_RCU_STOPPED);
 943                 call_rcu_data_free(crdp);
 944         }
 945 }