Fix: compat_futex_noasync race condition
[urcu.git] / urcu-bp.c
1 /*
2 * urcu-bp.c
3 *
4 * Userspace RCU library, "bulletproof" version.
5 *
6 * Copyright (c) 2009 Mathieu Desnoyers <mathieu.desnoyers@efficios.com>
7 * Copyright (c) 2009 Paul E. McKenney, IBM Corporation.
8 *
9 * This library is free software; you can redistribute it and/or
10 * modify it under the terms of the GNU Lesser General Public
11 * License as published by the Free Software Foundation; either
12 * version 2.1 of the License, or (at your option) any later version.
13 *
14 * This library is distributed in the hope that it will be useful,
15 * but WITHOUT ANY WARRANTY; without even the implied warranty of
16 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
17 * Lesser General Public License for more details.
18 *
19 * You should have received a copy of the GNU Lesser General Public
20 * License along with this library; if not, write to the Free Software
21 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
22 *
23 * IBM's contributions to this file may be relicensed under LGPLv2 or later.
24 */
25
26 #define _GNU_SOURCE
27 #define _LGPL_SOURCE
28 #include <stdio.h>
29 #include <pthread.h>
30 #include <signal.h>
31 #include <assert.h>
32 #include <stdlib.h>
33 #include <string.h>
34 #include <errno.h>
35 #include <poll.h>
36 #include <unistd.h>
37 #include <sys/mman.h>
38
39 #include "urcu/wfcqueue.h"
40 #include "urcu/map/urcu-bp.h"
41 #include "urcu/static/urcu-bp.h"
42 #include "urcu-pointer.h"
43 #include "urcu/tls-compat.h"
44
45 #include "urcu-die.h"
46
47 /* Do not #define _LGPL_SOURCE to ensure we can emit the wrapper symbols */
48 #undef _LGPL_SOURCE
49 #include "urcu-bp.h"
50 #define _LGPL_SOURCE
51
52 #ifndef MAP_ANONYMOUS
53 #define MAP_ANONYMOUS MAP_ANON
54 #endif
55
56 #ifdef __linux__
57 static
58 void *mremap_wrapper(void *old_address, size_t old_size,
59 size_t new_size, int flags)
60 {
61 return mremap(old_address, old_size, new_size, flags);
62 }
63 #else
64
65 #define MREMAP_MAYMOVE 1
66 #define MREMAP_FIXED 2
67
68 /*
69 * mremap wrapper for non-Linux systems not allowing MAYMOVE.
70 * This is not generic.
71 */
72 static
73 void *mremap_wrapper(void *old_address, size_t old_size,
74 size_t new_size, int flags)
75 {
76 assert(!(flags & MREMAP_MAYMOVE));
77
78 return MAP_FAILED;
79 }
80 #endif
81
82 /* Sleep delay in ms */
83 #define RCU_SLEEP_DELAY_MS 10
84 #define INIT_NR_THREADS 8
85 #define ARENA_INIT_ALLOC \
86 sizeof(struct registry_chunk) \
87 + INIT_NR_THREADS * sizeof(struct rcu_reader)
88
89 /*
90 * Active attempts to check for reader Q.S. before calling sleep().
91 */
92 #define RCU_QS_ACTIVE_ATTEMPTS 100
93
94 static
95 int rcu_bp_refcount;
96
97 static
98 void __attribute__((constructor)) rcu_bp_init(void);
99 static
100 void __attribute__((destructor)) rcu_bp_exit(void);
101
102 static pthread_mutex_t rcu_gp_lock = PTHREAD_MUTEX_INITIALIZER;
103
104 static pthread_mutex_t init_lock = PTHREAD_MUTEX_INITIALIZER;
105 static int initialized;
106
107 static pthread_key_t urcu_bp_key;
108
109 struct rcu_gp rcu_gp = { .ctr = RCU_GP_COUNT };
110
111 /*
112 * Pointer to registry elements. Written to only by each individual reader. Read
113 * by both the reader and the writers.
114 */
115 DEFINE_URCU_TLS(struct rcu_reader *, rcu_reader);
116
117 static CDS_LIST_HEAD(registry);
118
119 struct registry_chunk {
120 size_t data_len; /* data length */
121 size_t used; /* amount of data used */
122 struct cds_list_head node; /* chunk_list node */
123 char data[];
124 };
125
126 struct registry_arena {
127 struct cds_list_head chunk_list;
128 };
129
130 static struct registry_arena registry_arena = {
131 .chunk_list = CDS_LIST_HEAD_INIT(registry_arena.chunk_list),
132 };
133
134 /* Saved fork signal mask, protected by rcu_gp_lock */
135 static sigset_t saved_fork_signal_mask;
136
137 static void mutex_lock(pthread_mutex_t *mutex)
138 {
139 int ret;
140
141 #ifndef DISTRUST_SIGNALS_EXTREME
142 ret = pthread_mutex_lock(mutex);
143 if (ret)
144 urcu_die(ret);
145 #else /* #ifndef DISTRUST_SIGNALS_EXTREME */
146 while ((ret = pthread_mutex_trylock(mutex)) != 0) {
147 if (ret != EBUSY && ret != EINTR)
148 urcu_die(ret);
149 poll(NULL,0,10);
150 }
151 #endif /* #else #ifndef DISTRUST_SIGNALS_EXTREME */
152 }
153
154 static void mutex_unlock(pthread_mutex_t *mutex)
155 {
156 int ret;
157
158 ret = pthread_mutex_unlock(mutex);
159 if (ret)
160 urcu_die(ret);
161 }
162
163 static void wait_for_readers(struct cds_list_head *input_readers,
164 struct cds_list_head *cur_snap_readers,
165 struct cds_list_head *qsreaders)
166 {
167 unsigned int wait_loops = 0;
168 struct rcu_reader *index, *tmp;
169
170 /*
171 * Wait for each thread URCU_TLS(rcu_reader).ctr to either
172 * indicate quiescence (not nested), or observe the current
173 * rcu_gp.ctr value.
174 */
175 for (;;) {
176 if (wait_loops < RCU_QS_ACTIVE_ATTEMPTS)
177 wait_loops++;
178
179 cds_list_for_each_entry_safe(index, tmp, input_readers, node) {
180 switch (rcu_reader_state(&index->ctr)) {
181 case RCU_READER_ACTIVE_CURRENT:
182 if (cur_snap_readers) {
183 cds_list_move(&index->node,
184 cur_snap_readers);
185 break;
186 }
187 /* Fall-through */
188 case RCU_READER_INACTIVE:
189 cds_list_move(&index->node, qsreaders);
190 break;
191 case RCU_READER_ACTIVE_OLD:
192 /*
193 * Old snapshot. Leaving node in
194 * input_readers will make us busy-loop
195 * until the snapshot becomes current or
196 * the reader becomes inactive.
197 */
198 break;
199 }
200 }
201
202 if (cds_list_empty(input_readers)) {
203 break;
204 } else {
205 if (wait_loops >= RCU_QS_ACTIVE_ATTEMPTS)
206 (void) poll(NULL, 0, RCU_SLEEP_DELAY_MS);
207 else
208 caa_cpu_relax();
209 }
210 }
211 }
212
213 void synchronize_rcu(void)
214 {
215 CDS_LIST_HEAD(cur_snap_readers);
216 CDS_LIST_HEAD(qsreaders);
217 sigset_t newmask, oldmask;
218 int ret;
219
220 ret = sigfillset(&newmask);
221 assert(!ret);
222 ret = pthread_sigmask(SIG_BLOCK, &newmask, &oldmask);
223 assert(!ret);
224
225 mutex_lock(&rcu_gp_lock);
226
227 if (cds_list_empty(&registry))
228 goto out;
229
230 /* All threads should read qparity before accessing data structure
231 * where new ptr points to. */
232 /* Write new ptr before changing the qparity */
233 cmm_smp_mb();
234
235 /*
236 * Wait for readers to observe original parity or be quiescent.
237 */
238 wait_for_readers(&registry, &cur_snap_readers, &qsreaders);
239
240 /*
241 * Adding a cmm_smp_mb() which is _not_ formally required, but makes the
242 * model easier to understand. It does not have a big performance impact
243 * anyway, given this is the write-side.
244 */
245 cmm_smp_mb();
246
247 /* Switch parity: 0 -> 1, 1 -> 0 */
248 CMM_STORE_SHARED(rcu_gp.ctr, rcu_gp.ctr ^ RCU_GP_CTR_PHASE);
249
250 /*
251 * Must commit qparity update to memory before waiting for other parity
252 * quiescent state. Failure to do so could result in the writer waiting
253 * forever while new readers are always accessing data (no progress).
254 * Ensured by CMM_STORE_SHARED and CMM_LOAD_SHARED.
255 */
256
257 /*
258 * Adding a cmm_smp_mb() which is _not_ formally required, but makes the
259 * model easier to understand. It does not have a big performance impact
260 * anyway, given this is the write-side.
261 */
262 cmm_smp_mb();
263
264 /*
265 * Wait for readers to observe new parity or be quiescent.
266 */
267 wait_for_readers(&cur_snap_readers, NULL, &qsreaders);
268
269 /*
270 * Put quiescent reader list back into registry.
271 */
272 cds_list_splice(&qsreaders, &registry);
273
274 /*
275 * Finish waiting for reader threads before letting the old ptr being
276 * freed.
277 */
278 cmm_smp_mb();
279 out:
280 mutex_unlock(&rcu_gp_lock);
281 ret = pthread_sigmask(SIG_SETMASK, &oldmask, NULL);
282 assert(!ret);
283 }
284
285 /*
286 * library wrappers to be used by non-LGPL compatible source code.
287 */
288
289 void rcu_read_lock(void)
290 {
291 _rcu_read_lock();
292 }
293
294 void rcu_read_unlock(void)
295 {
296 _rcu_read_unlock();
297 }
298
299 int rcu_read_ongoing(void)
300 {
301 return _rcu_read_ongoing();
302 }
303
304 /*
305 * Only grow for now. If empty, allocate a ARENA_INIT_ALLOC sized chunk.
306 * Else, try expanding the last chunk. If this fails, allocate a new
307 * chunk twice as big as the last chunk.
308 * Memory used by chunks _never_ moves. A chunk could theoretically be
309 * freed when all "used" slots are released, but we don't do it at this
310 * point.
311 */
312 static
313 void expand_arena(struct registry_arena *arena)
314 {
315 struct registry_chunk *new_chunk, *last_chunk;
316 size_t old_chunk_len, new_chunk_len;
317
318 /* No chunk. */
319 if (cds_list_empty(&arena->chunk_list)) {
320 assert(ARENA_INIT_ALLOC >=
321 sizeof(struct registry_chunk)
322 + sizeof(struct rcu_reader));
323 new_chunk_len = ARENA_INIT_ALLOC;
324 new_chunk = mmap(NULL, new_chunk_len,
325 PROT_READ | PROT_WRITE,
326 MAP_ANONYMOUS | MAP_PRIVATE,
327 -1, 0);
328 if (new_chunk == MAP_FAILED)
329 abort();
330 bzero(new_chunk, new_chunk_len);
331 new_chunk->data_len =
332 new_chunk_len - sizeof(struct registry_chunk);
333 cds_list_add_tail(&new_chunk->node, &arena->chunk_list);
334 return; /* We're done. */
335 }
336
337 /* Try expanding last chunk. */
338 last_chunk = cds_list_entry(arena->chunk_list.prev,
339 struct registry_chunk, node);
340 old_chunk_len =
341 last_chunk->data_len + sizeof(struct registry_chunk);
342 new_chunk_len = old_chunk_len << 1;
343
344 /* Don't allow memory mapping to move, just expand. */
345 new_chunk = mremap_wrapper(last_chunk, old_chunk_len,
346 new_chunk_len, 0);
347 if (new_chunk != MAP_FAILED) {
348 /* Should not have moved. */
349 assert(new_chunk == last_chunk);
350 bzero((char *) last_chunk + old_chunk_len,
351 new_chunk_len - old_chunk_len);
352 last_chunk->data_len =
353 new_chunk_len - sizeof(struct registry_chunk);
354 return; /* We're done. */
355 }
356
357 /* Remap did not succeed, we need to add a new chunk. */
358 new_chunk = mmap(NULL, new_chunk_len,
359 PROT_READ | PROT_WRITE,
360 MAP_ANONYMOUS | MAP_PRIVATE,
361 -1, 0);
362 if (new_chunk == MAP_FAILED)
363 abort();
364 bzero(new_chunk, new_chunk_len);
365 new_chunk->data_len =
366 new_chunk_len - sizeof(struct registry_chunk);
367 cds_list_add_tail(&new_chunk->node, &arena->chunk_list);
368 }
369
370 static
371 struct rcu_reader *arena_alloc(struct registry_arena *arena)
372 {
373 struct registry_chunk *chunk;
374 struct rcu_reader *rcu_reader_reg;
375 int expand_done = 0; /* Only allow to expand once per alloc */
376 size_t len = sizeof(struct rcu_reader);
377
378 retry:
379 cds_list_for_each_entry(chunk, &arena->chunk_list, node) {
380 if (chunk->data_len - chunk->used < len)
381 continue;
382 /* Find spot */
383 for (rcu_reader_reg = (struct rcu_reader *) &chunk->data[0];
384 rcu_reader_reg < (struct rcu_reader *) &chunk->data[chunk->data_len];
385 rcu_reader_reg++) {
386 if (!rcu_reader_reg->alloc) {
387 rcu_reader_reg->alloc = 1;
388 chunk->used += len;
389 return rcu_reader_reg;
390 }
391 }
392 }
393
394 if (!expand_done) {
395 expand_arena(arena);
396 expand_done = 1;
397 goto retry;
398 }
399
400 return NULL;
401 }
402
403 /* Called with signals off and mutex locked */
404 static
405 void add_thread(void)
406 {
407 struct rcu_reader *rcu_reader_reg;
408 int ret;
409
410 rcu_reader_reg = arena_alloc(&registry_arena);
411 if (!rcu_reader_reg)
412 abort();
413 ret = pthread_setspecific(urcu_bp_key, rcu_reader_reg);
414 if (ret)
415 abort();
416
417 /* Add to registry */
418 rcu_reader_reg->tid = pthread_self();
419 assert(rcu_reader_reg->ctr == 0);
420 cds_list_add(&rcu_reader_reg->node, &registry);
421 /*
422 * Reader threads are pointing to the reader registry. This is
423 * why its memory should never be relocated.
424 */
425 URCU_TLS(rcu_reader) = rcu_reader_reg;
426 }
427
428 /* Called with mutex locked */
429 static
430 void cleanup_thread(struct registry_chunk *chunk,
431 struct rcu_reader *rcu_reader_reg)
432 {
433 rcu_reader_reg->ctr = 0;
434 cds_list_del(&rcu_reader_reg->node);
435 rcu_reader_reg->tid = 0;
436 rcu_reader_reg->alloc = 0;
437 chunk->used -= sizeof(struct rcu_reader);
438 }
439
440 static
441 struct registry_chunk *find_chunk(struct rcu_reader *rcu_reader_reg)
442 {
443 struct registry_chunk *chunk;
444
445 cds_list_for_each_entry(chunk, &registry_arena.chunk_list, node) {
446 if (rcu_reader_reg < (struct rcu_reader *) &chunk->data[0])
447 continue;
448 if (rcu_reader_reg >= (struct rcu_reader *) &chunk->data[chunk->data_len])
449 continue;
450 return chunk;
451 }
452 return NULL;
453 }
454
455 /* Called with signals off and mutex locked */
456 static
457 void remove_thread(struct rcu_reader *rcu_reader_reg)
458 {
459 cleanup_thread(find_chunk(rcu_reader_reg), rcu_reader_reg);
460 URCU_TLS(rcu_reader) = NULL;
461 }
462
463 /* Disable signals, take mutex, add to registry */
464 void rcu_bp_register(void)
465 {
466 sigset_t newmask, oldmask;
467 int ret;
468
469 ret = sigfillset(&newmask);
470 if (ret)
471 abort();
472 ret = pthread_sigmask(SIG_BLOCK, &newmask, &oldmask);
473 if (ret)
474 abort();
475
476 /*
477 * Check if a signal concurrently registered our thread since
478 * the check in rcu_read_lock().
479 */
480 if (URCU_TLS(rcu_reader))
481 goto end;
482
483 /*
484 * Take care of early registration before urcu_bp constructor.
485 */
486 rcu_bp_init();
487
488 mutex_lock(&rcu_gp_lock);
489 add_thread();
490 mutex_unlock(&rcu_gp_lock);
491 end:
492 ret = pthread_sigmask(SIG_SETMASK, &oldmask, NULL);
493 if (ret)
494 abort();
495 }
496
497 /* Disable signals, take mutex, remove from registry */
498 static
499 void rcu_bp_unregister(struct rcu_reader *rcu_reader_reg)
500 {
501 sigset_t newmask, oldmask;
502 int ret;
503
504 ret = sigfillset(&newmask);
505 if (ret)
506 abort();
507 ret = pthread_sigmask(SIG_BLOCK, &newmask, &oldmask);
508 if (ret)
509 abort();
510
511 mutex_lock(&rcu_gp_lock);
512 remove_thread(rcu_reader_reg);
513 mutex_unlock(&rcu_gp_lock);
514 ret = pthread_sigmask(SIG_SETMASK, &oldmask, NULL);
515 if (ret)
516 abort();
517 rcu_bp_exit();
518 }
519
520 /*
521 * Remove thread from the registry when it exits, and flag it as
522 * destroyed so garbage collection can take care of it.
523 */
524 static
525 void urcu_bp_thread_exit_notifier(void *rcu_key)
526 {
527 rcu_bp_unregister(rcu_key);
528 }
529
530 static
531 void rcu_bp_init(void)
532 {
533 mutex_lock(&init_lock);
534 if (!rcu_bp_refcount++) {
535 int ret;
536
537 ret = pthread_key_create(&urcu_bp_key,
538 urcu_bp_thread_exit_notifier);
539 if (ret)
540 abort();
541 initialized = 1;
542 }
543 mutex_unlock(&init_lock);
544 }
545
546 static
547 void rcu_bp_exit(void)
548 {
549 mutex_lock(&init_lock);
550 if (!--rcu_bp_refcount) {
551 struct registry_chunk *chunk, *tmp;
552 int ret;
553
554 cds_list_for_each_entry_safe(chunk, tmp,
555 &registry_arena.chunk_list, node) {
556 munmap(chunk, chunk->data_len
557 + sizeof(struct registry_chunk));
558 }
559 ret = pthread_key_delete(urcu_bp_key);
560 if (ret)
561 abort();
562 }
563 mutex_unlock(&init_lock);
564 }
565
566 /*
567 * Holding the rcu_gp_lock across fork will make sure we fork() don't race with
568 * a concurrent thread executing with this same lock held. This ensures that the
569 * registry is in a coherent state in the child.
570 */
571 void rcu_bp_before_fork(void)
572 {
573 sigset_t newmask, oldmask;
574 int ret;
575
576 ret = sigfillset(&newmask);
577 assert(!ret);
578 ret = pthread_sigmask(SIG_BLOCK, &newmask, &oldmask);
579 assert(!ret);
580 mutex_lock(&rcu_gp_lock);
581 saved_fork_signal_mask = oldmask;
582 }
583
584 void rcu_bp_after_fork_parent(void)
585 {
586 sigset_t oldmask;
587 int ret;
588
589 oldmask = saved_fork_signal_mask;
590 mutex_unlock(&rcu_gp_lock);
591 ret = pthread_sigmask(SIG_SETMASK, &oldmask, NULL);
592 assert(!ret);
593 }
594
595 /*
596 * Prune all entries from registry except our own thread. Fits the Linux
597 * fork behavior. Called with rcu_gp_lock held.
598 */
599 static
600 void urcu_bp_prune_registry(void)
601 {
602 struct registry_chunk *chunk;
603 struct rcu_reader *rcu_reader_reg;
604
605 cds_list_for_each_entry(chunk, &registry_arena.chunk_list, node) {
606 for (rcu_reader_reg = (struct rcu_reader *) &chunk->data[0];
607 rcu_reader_reg < (struct rcu_reader *) &chunk->data[chunk->data_len];
608 rcu_reader_reg++) {
609 if (!rcu_reader_reg->alloc)
610 continue;
611 if (rcu_reader_reg->tid == pthread_self())
612 continue;
613 cleanup_thread(chunk, rcu_reader_reg);
614 }
615 }
616 }
617
618 void rcu_bp_after_fork_child(void)
619 {
620 sigset_t oldmask;
621 int ret;
622
623 urcu_bp_prune_registry();
624 oldmask = saved_fork_signal_mask;
625 mutex_unlock(&rcu_gp_lock);
626 ret = pthread_sigmask(SIG_SETMASK, &oldmask, NULL);
627 assert(!ret);
628 }
629
630 void *rcu_dereference_sym_bp(void *p)
631 {
632 return _rcu_dereference(p);
633 }
634
635 void *rcu_set_pointer_sym_bp(void **p, void *v)
636 {
637 cmm_wmb();
638 uatomic_set(p, v);
639 return v;
640 }
641
642 void *rcu_xchg_pointer_sym_bp(void **p, void *v)
643 {
644 cmm_wmb();
645 return uatomic_xchg(p, v);
646 }
647
648 void *rcu_cmpxchg_pointer_sym_bp(void **p, void *old, void *_new)
649 {
650 cmm_wmb();
651 return uatomic_cmpxchg(p, old, _new);
652 }
653
654 DEFINE_RCU_FLAVOR(rcu_flavor);
655
656 #include "urcu-call-rcu-impl.h"
657 #include "urcu-defer-impl.h"
This page took 0.041798 seconds and 4 git commands to generate.