Fix: urcu: futex wait: handle spurious futex wakeups
[urcu.git] / src / urcu.c
... / ...
CommitLineData
1/*
2 * urcu.c
3 *
4 * Userspace RCU library
5 *
6 * Copyright (c) 2009 Mathieu Desnoyers <mathieu.desnoyers@efficios.com>
7 * Copyright (c) 2009 Paul E. McKenney, IBM Corporation.
8 *
9 * This library is free software; you can redistribute it and/or
10 * modify it under the terms of the GNU Lesser General Public
11 * License as published by the Free Software Foundation; either
12 * version 2.1 of the License, or (at your option) any later version.
13 *
14 * This library is distributed in the hope that it will be useful,
15 * but WITHOUT ANY WARRANTY; without even the implied warranty of
16 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
17 * Lesser General Public License for more details.
18 *
19 * You should have received a copy of the GNU Lesser General Public
20 * License along with this library; if not, write to the Free Software
21 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
22 *
23 * IBM's contributions to this file may be relicensed under LGPLv2 or later.
24 */
25
26#define URCU_NO_COMPAT_IDENTIFIERS
27#define _BSD_SOURCE
28#define _LGPL_SOURCE
29#define _DEFAULT_SOURCE
30#include <stdio.h>
31#include <pthread.h>
32#include <signal.h>
33#include <assert.h>
34#include <stdlib.h>
35#include <stdint.h>
36#include <string.h>
37#include <errno.h>
38#include <stdbool.h>
39#include <poll.h>
40
41#include <urcu/config.h>
42#include <urcu/arch.h>
43#include <urcu/wfcqueue.h>
44#include <urcu/map/urcu.h>
45#include <urcu/static/urcu.h>
46#include <urcu/pointer.h>
47#include <urcu/tls-compat.h>
48
49#include "urcu-die.h"
50#include "urcu-wait.h"
51#include "urcu-utils.h"
52
53#define URCU_API_MAP
54/* Do not #define _LGPL_SOURCE to ensure we can emit the wrapper symbols */
55#undef _LGPL_SOURCE
56#include <urcu/urcu.h>
57#define _LGPL_SOURCE
58
59/*
60 * If a reader is really non-cooperative and refuses to commit its
61 * rcu_active_readers count to memory (there is no barrier in the reader
62 * per-se), kick it after 10 loops waiting for it.
63 */
64#define KICK_READER_LOOPS 10
65
66/*
67 * Active attempts to check for reader Q.S. before calling futex().
68 */
69#define RCU_QS_ACTIVE_ATTEMPTS 100
70
71/* If the headers do not support membarrier system call, fall back on RCU_MB */
72#ifdef __NR_membarrier
73# define membarrier(...) syscall(__NR_membarrier, __VA_ARGS__)
74#else
75# define membarrier(...) -ENOSYS
76#endif
77
78enum membarrier_cmd {
79 MEMBARRIER_CMD_QUERY = 0,
80 MEMBARRIER_CMD_SHARED = (1 << 0),
81 /* reserved for MEMBARRIER_CMD_SHARED_EXPEDITED (1 << 1) */
82 /* reserved for MEMBARRIER_CMD_PRIVATE (1 << 2) */
83 MEMBARRIER_CMD_PRIVATE_EXPEDITED = (1 << 3),
84 MEMBARRIER_CMD_REGISTER_PRIVATE_EXPEDITED = (1 << 4),
85};
86
87#ifdef RCU_MEMBARRIER
88static int init_done;
89static int urcu_memb_has_sys_membarrier_private_expedited;
90
91#ifndef CONFIG_RCU_FORCE_SYS_MEMBARRIER
92/*
93 * Explicitly initialize to zero because we can't alias a non-static
94 * uninitialized variable.
95 */
96int urcu_memb_has_sys_membarrier = 0;
97URCU_ATTR_ALIAS("urcu_memb_has_sys_membarrier")
98extern int rcu_has_sys_membarrier_memb;
99#endif
100
101void __attribute__((constructor)) rcu_init(void);
102#endif
103
104#ifdef RCU_MB
105void rcu_init(void)
106{
107}
108URCU_ATTR_ALIAS(urcu_stringify(rcu_init))
109void alias_rcu_init(void);
110#endif
111
112#ifdef RCU_SIGNAL
113static int init_done;
114
115void __attribute__((constructor)) rcu_init(void);
116void __attribute__((destructor)) rcu_exit(void);
117#endif
118
119/*
120 * rcu_gp_lock ensures mutual exclusion between threads calling
121 * synchronize_rcu().
122 */
123static pthread_mutex_t rcu_gp_lock = PTHREAD_MUTEX_INITIALIZER;
124/*
125 * rcu_registry_lock ensures mutual exclusion between threads
126 * registering and unregistering themselves to/from the registry, and
127 * with threads reading that registry from synchronize_rcu(). However,
128 * this lock is not held all the way through the completion of awaiting
129 * for the grace period. It is sporadically released between iterations
130 * on the registry.
131 * rcu_registry_lock may nest inside rcu_gp_lock.
132 */
133static pthread_mutex_t rcu_registry_lock = PTHREAD_MUTEX_INITIALIZER;
134struct urcu_gp rcu_gp = { .ctr = URCU_GP_COUNT };
135URCU_ATTR_ALIAS(urcu_stringify(rcu_gp))
136extern struct urcu_gp alias_rcu_gp;
137
138/*
139 * Written to only by each individual reader. Read by both the reader and the
140 * writers.
141 */
142DEFINE_URCU_TLS(struct urcu_reader, rcu_reader);
143DEFINE_URCU_TLS_ALIAS(struct urcu_reader, rcu_reader, alias_rcu_reader);
144
145static CDS_LIST_HEAD(registry);
146
147/*
148 * Queue keeping threads awaiting to wait for a grace period. Contains
149 * struct gp_waiters_thread objects.
150 */
151static DEFINE_URCU_WAIT_QUEUE(gp_waiters);
152
153static void mutex_lock(pthread_mutex_t *mutex)
154{
155 int ret;
156
157#ifndef DISTRUST_SIGNALS_EXTREME
158 ret = pthread_mutex_lock(mutex);
159 if (ret)
160 urcu_die(ret);
161#else /* #ifndef DISTRUST_SIGNALS_EXTREME */
162 while ((ret = pthread_mutex_trylock(mutex)) != 0) {
163 if (ret != EBUSY && ret != EINTR)
164 urcu_die(ret);
165 if (CMM_LOAD_SHARED(URCU_TLS(rcu_reader).need_mb)) {
166 cmm_smp_mb();
167 _CMM_STORE_SHARED(URCU_TLS(rcu_reader).need_mb, 0);
168 cmm_smp_mb();
169 }
170 (void) poll(NULL, 0, 10);
171 }
172#endif /* #else #ifndef DISTRUST_SIGNALS_EXTREME */
173}
174
175static void mutex_unlock(pthread_mutex_t *mutex)
176{
177 int ret;
178
179 ret = pthread_mutex_unlock(mutex);
180 if (ret)
181 urcu_die(ret);
182}
183
184#ifdef RCU_MEMBARRIER
185static void smp_mb_master(void)
186{
187 if (caa_likely(urcu_memb_has_sys_membarrier)) {
188 if (membarrier(urcu_memb_has_sys_membarrier_private_expedited ?
189 MEMBARRIER_CMD_PRIVATE_EXPEDITED :
190 MEMBARRIER_CMD_SHARED, 0))
191 urcu_die(errno);
192 } else {
193 cmm_smp_mb();
194 }
195}
196#endif
197
198#ifdef RCU_MB
199static void smp_mb_master(void)
200{
201 cmm_smp_mb();
202}
203#endif
204
205#ifdef RCU_SIGNAL
206static void force_mb_all_readers(void)
207{
208 struct urcu_reader *index;
209
210 /*
211 * Ask for each threads to execute a cmm_smp_mb() so we can consider the
212 * compiler barriers around rcu read lock as real memory barriers.
213 */
214 if (cds_list_empty(&registry))
215 return;
216 /*
217 * pthread_kill has a cmm_smp_mb(). But beware, we assume it performs
218 * a cache flush on architectures with non-coherent cache. Let's play
219 * safe and don't assume anything : we use cmm_smp_mc() to make sure the
220 * cache flush is enforced.
221 */
222 cds_list_for_each_entry(index, &registry, node) {
223 CMM_STORE_SHARED(index->need_mb, 1);
224 pthread_kill(index->tid, SIGRCU);
225 }
226 /*
227 * Wait for sighandler (and thus mb()) to execute on every thread.
228 *
229 * Note that the pthread_kill() will never be executed on systems
230 * that correctly deliver signals in a timely manner. However, it
231 * is not uncommon for kernels to have bugs that can result in
232 * lost or unduly delayed signals.
233 *
234 * If you are seeing the below pthread_kill() executing much at
235 * all, we suggest testing the underlying kernel and filing the
236 * relevant bug report. For Linux kernels, we recommend getting
237 * the Linux Test Project (LTP).
238 */
239 cds_list_for_each_entry(index, &registry, node) {
240 while (CMM_LOAD_SHARED(index->need_mb)) {
241 pthread_kill(index->tid, SIGRCU);
242 (void) poll(NULL, 0, 1);
243 }
244 }
245 cmm_smp_mb(); /* read ->need_mb before ending the barrier */
246}
247
248static void smp_mb_master(void)
249{
250 force_mb_all_readers();
251}
252#endif /* #ifdef RCU_SIGNAL */
253
254/*
255 * synchronize_rcu() waiting. Single thread.
256 * Always called with rcu_registry lock held. Releases this lock and
257 * grabs it again. Holds the lock when it returns.
258 */
259static void wait_gp(void)
260{
261 /*
262 * Read reader_gp before read futex. smp_mb_master() needs to
263 * be called with the rcu registry lock held in RCU_SIGNAL
264 * flavor.
265 */
266 smp_mb_master();
267 /* Temporarily unlock the registry lock. */
268 mutex_unlock(&rcu_registry_lock);
269 while (uatomic_read(&rcu_gp.futex) == -1) {
270 if (!futex_async(&rcu_gp.futex, FUTEX_WAIT, -1, NULL, NULL, 0)) {
271 /*
272 * Prior queued wakeups queued by unrelated code
273 * using the same address can cause futex wait to
274 * return 0 even through the futex value is still
275 * -1 (spurious wakeups). Check the value again
276 * in user-space to validate whether it really
277 * differs from -1.
278 */
279 continue;
280 }
281 switch (errno) {
282 case EAGAIN:
283 /* Value already changed. */
284 goto end;
285 case EINTR:
286 /* Retry if interrupted by signal. */
287 break; /* Get out of switch. Check again. */
288 default:
289 /* Unexpected error. */
290 urcu_die(errno);
291 }
292 }
293end:
294 /*
295 * Re-lock the registry lock before the next loop.
296 */
297 mutex_lock(&rcu_registry_lock);
298}
299
300/*
301 * Always called with rcu_registry lock held. Releases this lock between
302 * iterations and grabs it again. Holds the lock when it returns.
303 */
304static void wait_for_readers(struct cds_list_head *input_readers,
305 struct cds_list_head *cur_snap_readers,
306 struct cds_list_head *qsreaders)
307{
308 unsigned int wait_loops = 0;
309 struct urcu_reader *index, *tmp;
310#ifdef HAS_INCOHERENT_CACHES
311 unsigned int wait_gp_loops = 0;
312#endif /* HAS_INCOHERENT_CACHES */
313
314 /*
315 * Wait for each thread URCU_TLS(rcu_reader).ctr to either
316 * indicate quiescence (not nested), or observe the current
317 * rcu_gp.ctr value.
318 */
319 for (;;) {
320 if (wait_loops < RCU_QS_ACTIVE_ATTEMPTS)
321 wait_loops++;
322 if (wait_loops >= RCU_QS_ACTIVE_ATTEMPTS) {
323 uatomic_dec(&rcu_gp.futex);
324 /* Write futex before read reader_gp */
325 smp_mb_master();
326 }
327
328 cds_list_for_each_entry_safe(index, tmp, input_readers, node) {
329 switch (urcu_common_reader_state(&rcu_gp, &index->ctr)) {
330 case URCU_READER_ACTIVE_CURRENT:
331 if (cur_snap_readers) {
332 cds_list_move(&index->node,
333 cur_snap_readers);
334 break;
335 }
336 /* Fall-through */
337 case URCU_READER_INACTIVE:
338 cds_list_move(&index->node, qsreaders);
339 break;
340 case URCU_READER_ACTIVE_OLD:
341 /*
342 * Old snapshot. Leaving node in
343 * input_readers will make us busy-loop
344 * until the snapshot becomes current or
345 * the reader becomes inactive.
346 */
347 break;
348 }
349 }
350
351#ifndef HAS_INCOHERENT_CACHES
352 if (cds_list_empty(input_readers)) {
353 if (wait_loops >= RCU_QS_ACTIVE_ATTEMPTS) {
354 /* Read reader_gp before write futex */
355 smp_mb_master();
356 uatomic_set(&rcu_gp.futex, 0);
357 }
358 break;
359 } else {
360 if (wait_loops >= RCU_QS_ACTIVE_ATTEMPTS) {
361 /* wait_gp unlocks/locks registry lock. */
362 wait_gp();
363 } else {
364 /* Temporarily unlock the registry lock. */
365 mutex_unlock(&rcu_registry_lock);
366 caa_cpu_relax();
367 /*
368 * Re-lock the registry lock before the
369 * next loop.
370 */
371 mutex_lock(&rcu_registry_lock);
372 }
373 }
374#else /* #ifndef HAS_INCOHERENT_CACHES */
375 /*
376 * BUSY-LOOP. Force the reader thread to commit its
377 * URCU_TLS(rcu_reader).ctr update to memory if we wait
378 * for too long.
379 */
380 if (cds_list_empty(input_readers)) {
381 if (wait_loops >= RCU_QS_ACTIVE_ATTEMPTS) {
382 /* Read reader_gp before write futex */
383 smp_mb_master();
384 uatomic_set(&rcu_gp.futex, 0);
385 }
386 break;
387 } else {
388 if (wait_gp_loops == KICK_READER_LOOPS) {
389 smp_mb_master();
390 wait_gp_loops = 0;
391 }
392 if (wait_loops >= RCU_QS_ACTIVE_ATTEMPTS) {
393 /* wait_gp unlocks/locks registry lock. */
394 wait_gp();
395 wait_gp_loops++;
396 } else {
397 /* Temporarily unlock the registry lock. */
398 mutex_unlock(&rcu_registry_lock);
399 caa_cpu_relax();
400 /*
401 * Re-lock the registry lock before the
402 * next loop.
403 */
404 mutex_lock(&rcu_registry_lock);
405 }
406 }
407#endif /* #else #ifndef HAS_INCOHERENT_CACHES */
408 }
409}
410
411void synchronize_rcu(void)
412{
413 CDS_LIST_HEAD(cur_snap_readers);
414 CDS_LIST_HEAD(qsreaders);
415 DEFINE_URCU_WAIT_NODE(wait, URCU_WAIT_WAITING);
416 struct urcu_waiters waiters;
417
418 /*
419 * Add ourself to gp_waiters queue of threads awaiting to wait
420 * for a grace period. Proceed to perform the grace period only
421 * if we are the first thread added into the queue.
422 * The implicit memory barrier before urcu_wait_add()
423 * orders prior memory accesses of threads put into the wait
424 * queue before their insertion into the wait queue.
425 */
426 if (urcu_wait_add(&gp_waiters, &wait) != 0) {
427 /* Not first in queue: will be awakened by another thread. */
428 urcu_adaptative_busy_wait(&wait);
429 /* Order following memory accesses after grace period. */
430 cmm_smp_mb();
431 return;
432 }
433 /* We won't need to wake ourself up */
434 urcu_wait_set_state(&wait, URCU_WAIT_RUNNING);
435
436 mutex_lock(&rcu_gp_lock);
437
438 /*
439 * Move all waiters into our local queue.
440 */
441 urcu_move_waiters(&waiters, &gp_waiters);
442
443 mutex_lock(&rcu_registry_lock);
444
445 if (cds_list_empty(&registry))
446 goto out;
447
448 /*
449 * All threads should read qparity before accessing data structure
450 * where new ptr points to. Must be done within rcu_registry_lock
451 * because it iterates on reader threads.
452 */
453 /* Write new ptr before changing the qparity */
454 smp_mb_master();
455
456 /*
457 * Wait for readers to observe original parity or be quiescent.
458 * wait_for_readers() can release and grab again rcu_registry_lock
459 * interally.
460 */
461 wait_for_readers(&registry, &cur_snap_readers, &qsreaders);
462
463 /*
464 * Must finish waiting for quiescent state for original parity before
465 * committing next rcu_gp.ctr update to memory. Failure to do so could
466 * result in the writer waiting forever while new readers are always
467 * accessing data (no progress). Enforce compiler-order of load
468 * URCU_TLS(rcu_reader).ctr before store to rcu_gp.ctr.
469 */
470 cmm_barrier();
471
472 /*
473 * Adding a cmm_smp_mb() which is _not_ formally required, but makes the
474 * model easier to understand. It does not have a big performance impact
475 * anyway, given this is the write-side.
476 */
477 cmm_smp_mb();
478
479 /* Switch parity: 0 -> 1, 1 -> 0 */
480 CMM_STORE_SHARED(rcu_gp.ctr, rcu_gp.ctr ^ URCU_GP_CTR_PHASE);
481
482 /*
483 * Must commit rcu_gp.ctr update to memory before waiting for quiescent
484 * state. Failure to do so could result in the writer waiting forever
485 * while new readers are always accessing data (no progress). Enforce
486 * compiler-order of store to rcu_gp.ctr before load rcu_reader ctr.
487 */
488 cmm_barrier();
489
490 /*
491 *
492 * Adding a cmm_smp_mb() which is _not_ formally required, but makes the
493 * model easier to understand. It does not have a big performance impact
494 * anyway, given this is the write-side.
495 */
496 cmm_smp_mb();
497
498 /*
499 * Wait for readers to observe new parity or be quiescent.
500 * wait_for_readers() can release and grab again rcu_registry_lock
501 * interally.
502 */
503 wait_for_readers(&cur_snap_readers, NULL, &qsreaders);
504
505 /*
506 * Put quiescent reader list back into registry.
507 */
508 cds_list_splice(&qsreaders, &registry);
509
510 /*
511 * Finish waiting for reader threads before letting the old ptr
512 * being freed. Must be done within rcu_registry_lock because it
513 * iterates on reader threads.
514 */
515 smp_mb_master();
516out:
517 mutex_unlock(&rcu_registry_lock);
518 mutex_unlock(&rcu_gp_lock);
519
520 /*
521 * Wakeup waiters only after we have completed the grace period
522 * and have ensured the memory barriers at the end of the grace
523 * period have been issued.
524 */
525 urcu_wake_all_waiters(&waiters);
526}
527URCU_ATTR_ALIAS(urcu_stringify(synchronize_rcu))
528void alias_synchronize_rcu();
529
530/*
531 * library wrappers to be used by non-LGPL compatible source code.
532 */
533
534void rcu_read_lock(void)
535{
536 _rcu_read_lock();
537}
538URCU_ATTR_ALIAS(urcu_stringify(rcu_read_lock))
539void alias_rcu_read_lock();
540
541void rcu_read_unlock(void)
542{
543 _rcu_read_unlock();
544}
545URCU_ATTR_ALIAS(urcu_stringify(rcu_read_unlock))
546void alias_rcu_read_unlock();
547
548int rcu_read_ongoing(void)
549{
550 return _rcu_read_ongoing();
551}
552URCU_ATTR_ALIAS(urcu_stringify(rcu_read_ongoing))
553void alias_rcu_read_ongoing();
554
555void rcu_register_thread(void)
556{
557 URCU_TLS(rcu_reader).tid = pthread_self();
558 assert(URCU_TLS(rcu_reader).need_mb == 0);
559 assert(!(URCU_TLS(rcu_reader).ctr & URCU_GP_CTR_NEST_MASK));
560
561 mutex_lock(&rcu_registry_lock);
562 assert(!URCU_TLS(rcu_reader).registered);
563 URCU_TLS(rcu_reader).registered = 1;
564 rcu_init(); /* In case gcc does not support constructor attribute */
565 cds_list_add(&URCU_TLS(rcu_reader).node, &registry);
566 mutex_unlock(&rcu_registry_lock);
567}
568URCU_ATTR_ALIAS(urcu_stringify(rcu_register_thread))
569void alias_rcu_register_thread();
570
571void rcu_unregister_thread(void)
572{
573 mutex_lock(&rcu_registry_lock);
574 assert(URCU_TLS(rcu_reader).registered);
575 URCU_TLS(rcu_reader).registered = 0;
576 cds_list_del(&URCU_TLS(rcu_reader).node);
577 mutex_unlock(&rcu_registry_lock);
578}
579URCU_ATTR_ALIAS(urcu_stringify(rcu_unregister_thread))
580void alias_rcu_unregister_thread();
581
582#ifdef RCU_MEMBARRIER
583
584#ifdef CONFIG_RCU_FORCE_SYS_MEMBARRIER
585static
586void rcu_sys_membarrier_status(bool available)
587{
588 if (!available)
589 abort();
590}
591#else
592static
593void rcu_sys_membarrier_status(bool available)
594{
595 if (!available)
596 return;
597 urcu_memb_has_sys_membarrier = 1;
598}
599#endif
600
601static
602void rcu_sys_membarrier_init(void)
603{
604 bool available = false;
605 int mask;
606
607 mask = membarrier(MEMBARRIER_CMD_QUERY, 0);
608 if (mask >= 0) {
609 if (mask & MEMBARRIER_CMD_PRIVATE_EXPEDITED) {
610 if (membarrier(MEMBARRIER_CMD_REGISTER_PRIVATE_EXPEDITED, 0))
611 urcu_die(errno);
612 urcu_memb_has_sys_membarrier_private_expedited = 1;
613 available = true;
614 } else if (mask & MEMBARRIER_CMD_SHARED) {
615 available = true;
616 }
617 }
618 rcu_sys_membarrier_status(available);
619}
620
621void rcu_init(void)
622{
623 if (init_done)
624 return;
625 init_done = 1;
626 rcu_sys_membarrier_init();
627}
628URCU_ATTR_ALIAS(urcu_stringify(rcu_init))
629void alias_rcu_init(void);
630#endif
631
632#ifdef RCU_SIGNAL
633static void sigrcu_handler(int signo __attribute__((unused)),
634 siginfo_t *siginfo __attribute__((unused)),
635 void *context __attribute__((unused)))
636{
637 /*
638 * Executing this cmm_smp_mb() is the only purpose of this signal handler.
639 * It punctually promotes cmm_barrier() into cmm_smp_mb() on every thread it is
640 * executed on.
641 */
642 cmm_smp_mb();
643 _CMM_STORE_SHARED(URCU_TLS(rcu_reader).need_mb, 0);
644 cmm_smp_mb();
645}
646
647/*
648 * rcu_init constructor. Called when the library is linked, but also when
649 * reader threads are calling rcu_register_thread().
650 * Should only be called by a single thread at a given time. This is ensured by
651 * holing the rcu_registry_lock from rcu_register_thread() or by running
652 * at library load time, which should not be executed by multiple
653 * threads nor concurrently with rcu_register_thread() anyway.
654 */
655void rcu_init(void)
656{
657 struct sigaction act;
658 int ret;
659
660 if (init_done)
661 return;
662 init_done = 1;
663
664 act.sa_sigaction = sigrcu_handler;
665 act.sa_flags = SA_SIGINFO | SA_RESTART;
666 sigemptyset(&act.sa_mask);
667 ret = sigaction(SIGRCU, &act, NULL);
668 if (ret)
669 urcu_die(errno);
670}
671URCU_ATTR_ALIAS(urcu_stringify(rcu_init))
672void alias_rcu_init(void);
673
674void rcu_exit(void)
675{
676 /*
677 * Don't unregister the SIGRCU signal handler anymore, because
678 * call_rcu threads could still be using it shortly before the
679 * application exits.
680 * Assertion disabled because call_rcu threads are now rcu
681 * readers, and left running at exit.
682 * assert(cds_list_empty(&registry));
683 */
684}
685URCU_ATTR_ALIAS(urcu_stringify(rcu_exit))
686void alias_rcu_exit(void);
687
688#endif /* #ifdef RCU_SIGNAL */
689
690DEFINE_RCU_FLAVOR(rcu_flavor);
691DEFINE_RCU_FLAVOR_ALIAS(rcu_flavor, alias_rcu_flavor);
692
693#include "urcu-call-rcu-impl.h"
694#include "urcu-defer-impl.h"
This page took 0.03173 seconds and 4 git commands to generate.