urcu-bp: use sys_membarrier when available
[urcu.git] / urcu-bp.c
1 /*
2 * urcu-bp.c
3 *
4 * Userspace RCU library, "bulletproof" version.
5 *
6 * Copyright (c) 2009 Mathieu Desnoyers <mathieu.desnoyers@efficios.com>
7 * Copyright (c) 2009 Paul E. McKenney, IBM Corporation.
8 *
9 * This library is free software; you can redistribute it and/or
10 * modify it under the terms of the GNU Lesser General Public
11 * License as published by the Free Software Foundation; either
12 * version 2.1 of the License, or (at your option) any later version.
13 *
14 * This library is distributed in the hope that it will be useful,
15 * but WITHOUT ANY WARRANTY; without even the implied warranty of
16 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
17 * Lesser General Public License for more details.
18 *
19 * You should have received a copy of the GNU Lesser General Public
20 * License along with this library; if not, write to the Free Software
21 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
22 *
23 * IBM's contributions to this file may be relicensed under LGPLv2 or later.
24 */
25
26 #define _GNU_SOURCE
27 #define _LGPL_SOURCE
28 #include <stdio.h>
29 #include <pthread.h>
30 #include <signal.h>
31 #include <assert.h>
32 #include <stdlib.h>
33 #include <string.h>
34 #include <errno.h>
35 #include <poll.h>
36 #include <unistd.h>
37 #include <sys/mman.h>
38
39 #include "urcu/wfcqueue.h"
40 #include "urcu/map/urcu-bp.h"
41 #include "urcu/static/urcu-bp.h"
42 #include "urcu-pointer.h"
43 #include "urcu/tls-compat.h"
44
45 #include "urcu-die.h"
46
47 /* Do not #define _LGPL_SOURCE to ensure we can emit the wrapper symbols */
48 #undef _LGPL_SOURCE
49 #include "urcu-bp.h"
50 #define _LGPL_SOURCE
51
52 #ifndef MAP_ANONYMOUS
53 #define MAP_ANONYMOUS MAP_ANON
54 #endif
55
56 #ifdef __linux__
57 static
58 void *mremap_wrapper(void *old_address, size_t old_size,
59 size_t new_size, int flags)
60 {
61 return mremap(old_address, old_size, new_size, flags);
62 }
63 #else
64
65 #define MREMAP_MAYMOVE 1
66 #define MREMAP_FIXED 2
67
68 /*
69 * mremap wrapper for non-Linux systems not allowing MAYMOVE.
70 * This is not generic.
71 */
72 static
73 void *mremap_wrapper(void *old_address, size_t old_size,
74 size_t new_size, int flags)
75 {
76 assert(!(flags & MREMAP_MAYMOVE));
77
78 return MAP_FAILED;
79 }
80 #endif
81
82 /* Sleep delay in ms */
83 #define RCU_SLEEP_DELAY_MS 10
84 #define INIT_NR_THREADS 8
85 #define ARENA_INIT_ALLOC \
86 sizeof(struct registry_chunk) \
87 + INIT_NR_THREADS * sizeof(struct rcu_reader)
88
89 /*
90 * Active attempts to check for reader Q.S. before calling sleep().
91 */
92 #define RCU_QS_ACTIVE_ATTEMPTS 100
93
94 static
95 int rcu_bp_refcount;
96
97 /*
98 * RCU_MEMBARRIER is only possibly available on Linux.
99 */
100 #ifdef __linux__
101 #include <urcu/syscall-compat.h>
102 #endif
103
104 /* If the headers do not support SYS_membarrier, fall back on RCU_MB */
105 #ifdef SYS_membarrier
106 # define membarrier(...) syscall(SYS_membarrier, __VA_ARGS__)
107 #else
108 # define membarrier(...) -ENOSYS
109 #endif
110
111 enum membarrier_cmd {
112 MEMBARRIER_CMD_QUERY = 0,
113 MEMBARRIER_CMD_SHARED = (1 << 0),
114 };
115
116 static
117 void __attribute__((constructor)) rcu_bp_init(void);
118 static
119 void __attribute__((destructor)) rcu_bp_exit(void);
120
121 int urcu_bp_has_sys_membarrier;
122
123 /*
124 * rcu_gp_lock ensures mutual exclusion between threads calling
125 * synchronize_rcu().
126 */
127 static pthread_mutex_t rcu_gp_lock = PTHREAD_MUTEX_INITIALIZER;
128 /*
129 * rcu_registry_lock ensures mutual exclusion between threads
130 * registering and unregistering themselves to/from the registry, and
131 * with threads reading that registry from synchronize_rcu(). However,
132 * this lock is not held all the way through the completion of awaiting
133 * for the grace period. It is sporadically released between iterations
134 * on the registry.
135 * rcu_registry_lock may nest inside rcu_gp_lock.
136 */
137 static pthread_mutex_t rcu_registry_lock = PTHREAD_MUTEX_INITIALIZER;
138
139 static pthread_mutex_t init_lock = PTHREAD_MUTEX_INITIALIZER;
140 static int initialized;
141
142 static pthread_key_t urcu_bp_key;
143
144 struct rcu_gp rcu_gp = { .ctr = RCU_GP_COUNT };
145
146 /*
147 * Pointer to registry elements. Written to only by each individual reader. Read
148 * by both the reader and the writers.
149 */
150 DEFINE_URCU_TLS(struct rcu_reader *, rcu_reader);
151
152 static CDS_LIST_HEAD(registry);
153
154 struct registry_chunk {
155 size_t data_len; /* data length */
156 size_t used; /* amount of data used */
157 struct cds_list_head node; /* chunk_list node */
158 char data[];
159 };
160
161 struct registry_arena {
162 struct cds_list_head chunk_list;
163 };
164
165 static struct registry_arena registry_arena = {
166 .chunk_list = CDS_LIST_HEAD_INIT(registry_arena.chunk_list),
167 };
168
169 /* Saved fork signal mask, protected by rcu_gp_lock */
170 static sigset_t saved_fork_signal_mask;
171
172 static void mutex_lock(pthread_mutex_t *mutex)
173 {
174 int ret;
175
176 #ifndef DISTRUST_SIGNALS_EXTREME
177 ret = pthread_mutex_lock(mutex);
178 if (ret)
179 urcu_die(ret);
180 #else /* #ifndef DISTRUST_SIGNALS_EXTREME */
181 while ((ret = pthread_mutex_trylock(mutex)) != 0) {
182 if (ret != EBUSY && ret != EINTR)
183 urcu_die(ret);
184 poll(NULL,0,10);
185 }
186 #endif /* #else #ifndef DISTRUST_SIGNALS_EXTREME */
187 }
188
189 static void mutex_unlock(pthread_mutex_t *mutex)
190 {
191 int ret;
192
193 ret = pthread_mutex_unlock(mutex);
194 if (ret)
195 urcu_die(ret);
196 }
197
198 static void smp_mb_master(void)
199 {
200 if (caa_likely(urcu_bp_has_sys_membarrier))
201 (void) membarrier(MEMBARRIER_CMD_SHARED, 0);
202 else
203 cmm_smp_mb();
204 }
205
206 /*
207 * Always called with rcu_registry lock held. Releases this lock between
208 * iterations and grabs it again. Holds the lock when it returns.
209 */
210 static void wait_for_readers(struct cds_list_head *input_readers,
211 struct cds_list_head *cur_snap_readers,
212 struct cds_list_head *qsreaders)
213 {
214 unsigned int wait_loops = 0;
215 struct rcu_reader *index, *tmp;
216
217 /*
218 * Wait for each thread URCU_TLS(rcu_reader).ctr to either
219 * indicate quiescence (not nested), or observe the current
220 * rcu_gp.ctr value.
221 */
222 for (;;) {
223 if (wait_loops < RCU_QS_ACTIVE_ATTEMPTS)
224 wait_loops++;
225
226 cds_list_for_each_entry_safe(index, tmp, input_readers, node) {
227 switch (rcu_reader_state(&index->ctr)) {
228 case RCU_READER_ACTIVE_CURRENT:
229 if (cur_snap_readers) {
230 cds_list_move(&index->node,
231 cur_snap_readers);
232 break;
233 }
234 /* Fall-through */
235 case RCU_READER_INACTIVE:
236 cds_list_move(&index->node, qsreaders);
237 break;
238 case RCU_READER_ACTIVE_OLD:
239 /*
240 * Old snapshot. Leaving node in
241 * input_readers will make us busy-loop
242 * until the snapshot becomes current or
243 * the reader becomes inactive.
244 */
245 break;
246 }
247 }
248
249 if (cds_list_empty(input_readers)) {
250 break;
251 } else {
252 /* Temporarily unlock the registry lock. */
253 mutex_unlock(&rcu_registry_lock);
254 if (wait_loops >= RCU_QS_ACTIVE_ATTEMPTS)
255 (void) poll(NULL, 0, RCU_SLEEP_DELAY_MS);
256 else
257 caa_cpu_relax();
258 /* Re-lock the registry lock before the next loop. */
259 mutex_lock(&rcu_registry_lock);
260 }
261 }
262 }
263
264 void synchronize_rcu(void)
265 {
266 CDS_LIST_HEAD(cur_snap_readers);
267 CDS_LIST_HEAD(qsreaders);
268 sigset_t newmask, oldmask;
269 int ret;
270
271 ret = sigfillset(&newmask);
272 assert(!ret);
273 ret = pthread_sigmask(SIG_BLOCK, &newmask, &oldmask);
274 assert(!ret);
275
276 mutex_lock(&rcu_gp_lock);
277
278 mutex_lock(&rcu_registry_lock);
279
280 if (cds_list_empty(&registry))
281 goto out;
282
283 /* All threads should read qparity before accessing data structure
284 * where new ptr points to. */
285 /* Write new ptr before changing the qparity */
286 smp_mb_master();
287
288 /*
289 * Wait for readers to observe original parity or be quiescent.
290 * wait_for_readers() can release and grab again rcu_registry_lock
291 * interally.
292 */
293 wait_for_readers(&registry, &cur_snap_readers, &qsreaders);
294
295 /*
296 * Adding a cmm_smp_mb() which is _not_ formally required, but makes the
297 * model easier to understand. It does not have a big performance impact
298 * anyway, given this is the write-side.
299 */
300 cmm_smp_mb();
301
302 /* Switch parity: 0 -> 1, 1 -> 0 */
303 CMM_STORE_SHARED(rcu_gp.ctr, rcu_gp.ctr ^ RCU_GP_CTR_PHASE);
304
305 /*
306 * Must commit qparity update to memory before waiting for other parity
307 * quiescent state. Failure to do so could result in the writer waiting
308 * forever while new readers are always accessing data (no progress).
309 * Ensured by CMM_STORE_SHARED and CMM_LOAD_SHARED.
310 */
311
312 /*
313 * Adding a cmm_smp_mb() which is _not_ formally required, but makes the
314 * model easier to understand. It does not have a big performance impact
315 * anyway, given this is the write-side.
316 */
317 cmm_smp_mb();
318
319 /*
320 * Wait for readers to observe new parity or be quiescent.
321 * wait_for_readers() can release and grab again rcu_registry_lock
322 * interally.
323 */
324 wait_for_readers(&cur_snap_readers, NULL, &qsreaders);
325
326 /*
327 * Put quiescent reader list back into registry.
328 */
329 cds_list_splice(&qsreaders, &registry);
330
331 /*
332 * Finish waiting for reader threads before letting the old ptr being
333 * freed.
334 */
335 smp_mb_master();
336 out:
337 mutex_unlock(&rcu_registry_lock);
338 mutex_unlock(&rcu_gp_lock);
339 ret = pthread_sigmask(SIG_SETMASK, &oldmask, NULL);
340 assert(!ret);
341 }
342
343 /*
344 * library wrappers to be used by non-LGPL compatible source code.
345 */
346
347 void rcu_read_lock(void)
348 {
349 _rcu_read_lock();
350 }
351
352 void rcu_read_unlock(void)
353 {
354 _rcu_read_unlock();
355 }
356
357 int rcu_read_ongoing(void)
358 {
359 return _rcu_read_ongoing();
360 }
361
362 /*
363 * Only grow for now. If empty, allocate a ARENA_INIT_ALLOC sized chunk.
364 * Else, try expanding the last chunk. If this fails, allocate a new
365 * chunk twice as big as the last chunk.
366 * Memory used by chunks _never_ moves. A chunk could theoretically be
367 * freed when all "used" slots are released, but we don't do it at this
368 * point.
369 */
370 static
371 void expand_arena(struct registry_arena *arena)
372 {
373 struct registry_chunk *new_chunk, *last_chunk;
374 size_t old_chunk_len, new_chunk_len;
375
376 /* No chunk. */
377 if (cds_list_empty(&arena->chunk_list)) {
378 assert(ARENA_INIT_ALLOC >=
379 sizeof(struct registry_chunk)
380 + sizeof(struct rcu_reader));
381 new_chunk_len = ARENA_INIT_ALLOC;
382 new_chunk = mmap(NULL, new_chunk_len,
383 PROT_READ | PROT_WRITE,
384 MAP_ANONYMOUS | MAP_PRIVATE,
385 -1, 0);
386 if (new_chunk == MAP_FAILED)
387 abort();
388 bzero(new_chunk, new_chunk_len);
389 new_chunk->data_len =
390 new_chunk_len - sizeof(struct registry_chunk);
391 cds_list_add_tail(&new_chunk->node, &arena->chunk_list);
392 return; /* We're done. */
393 }
394
395 /* Try expanding last chunk. */
396 last_chunk = cds_list_entry(arena->chunk_list.prev,
397 struct registry_chunk, node);
398 old_chunk_len =
399 last_chunk->data_len + sizeof(struct registry_chunk);
400 new_chunk_len = old_chunk_len << 1;
401
402 /* Don't allow memory mapping to move, just expand. */
403 new_chunk = mremap_wrapper(last_chunk, old_chunk_len,
404 new_chunk_len, 0);
405 if (new_chunk != MAP_FAILED) {
406 /* Should not have moved. */
407 assert(new_chunk == last_chunk);
408 bzero((char *) last_chunk + old_chunk_len,
409 new_chunk_len - old_chunk_len);
410 last_chunk->data_len =
411 new_chunk_len - sizeof(struct registry_chunk);
412 return; /* We're done. */
413 }
414
415 /* Remap did not succeed, we need to add a new chunk. */
416 new_chunk = mmap(NULL, new_chunk_len,
417 PROT_READ | PROT_WRITE,
418 MAP_ANONYMOUS | MAP_PRIVATE,
419 -1, 0);
420 if (new_chunk == MAP_FAILED)
421 abort();
422 bzero(new_chunk, new_chunk_len);
423 new_chunk->data_len =
424 new_chunk_len - sizeof(struct registry_chunk);
425 cds_list_add_tail(&new_chunk->node, &arena->chunk_list);
426 }
427
428 static
429 struct rcu_reader *arena_alloc(struct registry_arena *arena)
430 {
431 struct registry_chunk *chunk;
432 struct rcu_reader *rcu_reader_reg;
433 int expand_done = 0; /* Only allow to expand once per alloc */
434 size_t len = sizeof(struct rcu_reader);
435
436 retry:
437 cds_list_for_each_entry(chunk, &arena->chunk_list, node) {
438 if (chunk->data_len - chunk->used < len)
439 continue;
440 /* Find spot */
441 for (rcu_reader_reg = (struct rcu_reader *) &chunk->data[0];
442 rcu_reader_reg < (struct rcu_reader *) &chunk->data[chunk->data_len];
443 rcu_reader_reg++) {
444 if (!rcu_reader_reg->alloc) {
445 rcu_reader_reg->alloc = 1;
446 chunk->used += len;
447 return rcu_reader_reg;
448 }
449 }
450 }
451
452 if (!expand_done) {
453 expand_arena(arena);
454 expand_done = 1;
455 goto retry;
456 }
457
458 return NULL;
459 }
460
461 /* Called with signals off and mutex locked */
462 static
463 void add_thread(void)
464 {
465 struct rcu_reader *rcu_reader_reg;
466 int ret;
467
468 rcu_reader_reg = arena_alloc(&registry_arena);
469 if (!rcu_reader_reg)
470 abort();
471 ret = pthread_setspecific(urcu_bp_key, rcu_reader_reg);
472 if (ret)
473 abort();
474
475 /* Add to registry */
476 rcu_reader_reg->tid = pthread_self();
477 assert(rcu_reader_reg->ctr == 0);
478 cds_list_add(&rcu_reader_reg->node, &registry);
479 /*
480 * Reader threads are pointing to the reader registry. This is
481 * why its memory should never be relocated.
482 */
483 URCU_TLS(rcu_reader) = rcu_reader_reg;
484 }
485
486 /* Called with mutex locked */
487 static
488 void cleanup_thread(struct registry_chunk *chunk,
489 struct rcu_reader *rcu_reader_reg)
490 {
491 rcu_reader_reg->ctr = 0;
492 cds_list_del(&rcu_reader_reg->node);
493 rcu_reader_reg->tid = 0;
494 rcu_reader_reg->alloc = 0;
495 chunk->used -= sizeof(struct rcu_reader);
496 }
497
498 static
499 struct registry_chunk *find_chunk(struct rcu_reader *rcu_reader_reg)
500 {
501 struct registry_chunk *chunk;
502
503 cds_list_for_each_entry(chunk, &registry_arena.chunk_list, node) {
504 if (rcu_reader_reg < (struct rcu_reader *) &chunk->data[0])
505 continue;
506 if (rcu_reader_reg >= (struct rcu_reader *) &chunk->data[chunk->data_len])
507 continue;
508 return chunk;
509 }
510 return NULL;
511 }
512
513 /* Called with signals off and mutex locked */
514 static
515 void remove_thread(struct rcu_reader *rcu_reader_reg)
516 {
517 cleanup_thread(find_chunk(rcu_reader_reg), rcu_reader_reg);
518 URCU_TLS(rcu_reader) = NULL;
519 }
520
521 /* Disable signals, take mutex, add to registry */
522 void rcu_bp_register(void)
523 {
524 sigset_t newmask, oldmask;
525 int ret;
526
527 ret = sigfillset(&newmask);
528 if (ret)
529 abort();
530 ret = pthread_sigmask(SIG_BLOCK, &newmask, &oldmask);
531 if (ret)
532 abort();
533
534 /*
535 * Check if a signal concurrently registered our thread since
536 * the check in rcu_read_lock().
537 */
538 if (URCU_TLS(rcu_reader))
539 goto end;
540
541 /*
542 * Take care of early registration before urcu_bp constructor.
543 */
544 rcu_bp_init();
545
546 mutex_lock(&rcu_registry_lock);
547 add_thread();
548 mutex_unlock(&rcu_registry_lock);
549 end:
550 ret = pthread_sigmask(SIG_SETMASK, &oldmask, NULL);
551 if (ret)
552 abort();
553 }
554
555 /* Disable signals, take mutex, remove from registry */
556 static
557 void rcu_bp_unregister(struct rcu_reader *rcu_reader_reg)
558 {
559 sigset_t newmask, oldmask;
560 int ret;
561
562 ret = sigfillset(&newmask);
563 if (ret)
564 abort();
565 ret = pthread_sigmask(SIG_BLOCK, &newmask, &oldmask);
566 if (ret)
567 abort();
568
569 mutex_lock(&rcu_registry_lock);
570 remove_thread(rcu_reader_reg);
571 mutex_unlock(&rcu_registry_lock);
572 ret = pthread_sigmask(SIG_SETMASK, &oldmask, NULL);
573 if (ret)
574 abort();
575 rcu_bp_exit();
576 }
577
578 /*
579 * Remove thread from the registry when it exits, and flag it as
580 * destroyed so garbage collection can take care of it.
581 */
582 static
583 void urcu_bp_thread_exit_notifier(void *rcu_key)
584 {
585 rcu_bp_unregister(rcu_key);
586 }
587
588 static
589 void rcu_bp_init(void)
590 {
591 mutex_lock(&init_lock);
592 if (!rcu_bp_refcount++) {
593 int ret;
594
595 ret = pthread_key_create(&urcu_bp_key,
596 urcu_bp_thread_exit_notifier);
597 if (ret)
598 abort();
599 ret = membarrier(MEMBARRIER_CMD_QUERY, 0);
600 if (ret >= 0 && (ret & MEMBARRIER_CMD_SHARED)) {
601 urcu_bp_has_sys_membarrier = 1;
602 }
603 initialized = 1;
604 }
605 mutex_unlock(&init_lock);
606 }
607
608 static
609 void rcu_bp_exit(void)
610 {
611 mutex_lock(&init_lock);
612 if (!--rcu_bp_refcount) {
613 struct registry_chunk *chunk, *tmp;
614 int ret;
615
616 cds_list_for_each_entry_safe(chunk, tmp,
617 &registry_arena.chunk_list, node) {
618 munmap(chunk, chunk->data_len
619 + sizeof(struct registry_chunk));
620 }
621 ret = pthread_key_delete(urcu_bp_key);
622 if (ret)
623 abort();
624 }
625 mutex_unlock(&init_lock);
626 }
627
628 /*
629 * Holding the rcu_gp_lock and rcu_registry_lock across fork will make
630 * sure we fork() don't race with a concurrent thread executing with
631 * any of those locks held. This ensures that the registry and data
632 * protected by rcu_gp_lock are in a coherent state in the child.
633 */
634 void rcu_bp_before_fork(void)
635 {
636 sigset_t newmask, oldmask;
637 int ret;
638
639 ret = sigfillset(&newmask);
640 assert(!ret);
641 ret = pthread_sigmask(SIG_BLOCK, &newmask, &oldmask);
642 assert(!ret);
643 mutex_lock(&rcu_gp_lock);
644 mutex_lock(&rcu_registry_lock);
645 saved_fork_signal_mask = oldmask;
646 }
647
648 void rcu_bp_after_fork_parent(void)
649 {
650 sigset_t oldmask;
651 int ret;
652
653 oldmask = saved_fork_signal_mask;
654 mutex_unlock(&rcu_registry_lock);
655 mutex_unlock(&rcu_gp_lock);
656 ret = pthread_sigmask(SIG_SETMASK, &oldmask, NULL);
657 assert(!ret);
658 }
659
660 /*
661 * Prune all entries from registry except our own thread. Fits the Linux
662 * fork behavior. Called with rcu_gp_lock and rcu_registry_lock held.
663 */
664 static
665 void urcu_bp_prune_registry(void)
666 {
667 struct registry_chunk *chunk;
668 struct rcu_reader *rcu_reader_reg;
669
670 cds_list_for_each_entry(chunk, &registry_arena.chunk_list, node) {
671 for (rcu_reader_reg = (struct rcu_reader *) &chunk->data[0];
672 rcu_reader_reg < (struct rcu_reader *) &chunk->data[chunk->data_len];
673 rcu_reader_reg++) {
674 if (!rcu_reader_reg->alloc)
675 continue;
676 if (rcu_reader_reg->tid == pthread_self())
677 continue;
678 cleanup_thread(chunk, rcu_reader_reg);
679 }
680 }
681 }
682
683 void rcu_bp_after_fork_child(void)
684 {
685 sigset_t oldmask;
686 int ret;
687
688 urcu_bp_prune_registry();
689 oldmask = saved_fork_signal_mask;
690 mutex_unlock(&rcu_registry_lock);
691 mutex_unlock(&rcu_gp_lock);
692 ret = pthread_sigmask(SIG_SETMASK, &oldmask, NULL);
693 assert(!ret);
694 }
695
696 void *rcu_dereference_sym_bp(void *p)
697 {
698 return _rcu_dereference(p);
699 }
700
701 void *rcu_set_pointer_sym_bp(void **p, void *v)
702 {
703 cmm_wmb();
704 uatomic_set(p, v);
705 return v;
706 }
707
708 void *rcu_xchg_pointer_sym_bp(void **p, void *v)
709 {
710 cmm_wmb();
711 return uatomic_xchg(p, v);
712 }
713
714 void *rcu_cmpxchg_pointer_sym_bp(void **p, void *old, void *_new)
715 {
716 cmm_wmb();
717 return uatomic_cmpxchg(p, old, _new);
718 }
719
720 DEFINE_RCU_FLAVOR(rcu_flavor);
721
722 #include "urcu-call-rcu-impl.h"
723 #include "urcu-defer-impl.h"
This page took 0.04334 seconds and 4 git commands to generate.