72a7609e11badfca7b7313fca71617497d3f3a17
4 * Userspace RCU library - RCU Judy Array Range Support
6 * Copyright 2012-2013 - Mathieu Desnoyers <mathieu.desnoyers@efficios.com>
8 * This library is free software; you can redistribute it and/or
9 * modify it under the terms of the GNU Lesser General Public
10 * License as published by the Free Software Foundation; either
11 * version 2.1 of the License, or (at your option) any later version.
13 * This library is distributed in the hope that it will be useful,
14 * but WITHOUT ANY WARRANTY; without even the implied warranty of
15 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
16 * Lesser General Public License for more details.
18 * You should have received a copy of the GNU Lesser General Public
19 * License along with this library; if not, write to the Free Software
20 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
30 #include <urcu/rcuja.h>
31 #include <urcu/compiler.h>
32 #include <urcu/arch.h>
33 #include <urcu-pointer.h>
34 #include <urcu/uatomic.h>
35 #include <urcu/rcuja-range.h>
36 #include <urcu-flavor.h>
38 #include "rcuja-internal.h"
41 * Discussion about order of lookup/lock vs allocated node deletion.
43 * - If node deletion returns before call to
44 * cds_ja_range_lookup(), the node will not be found by lookup.
45 * - If node deletion is called after cds_ja_range_lock() returns a
46 * non-NULL range, the deletion will wait until the lock is released
47 * before it takes place.
48 * - If node deletion call/return overlaps with the call to
49 * cds_ja_range_lookup() and return from cds_ja_range_lock(), the node
50 * may or may not be found by each of cds_ja_range_lookup() and
51 * cds_ja_range_lock().
55 * Discussion about order of lookup/lock vs allocated node add. Assuming
56 * no concurrent delete.
58 * - If node add returns before call to
59 * cds_ja_range_lookup(), the node will be found by lookup.
60 * - If node add is called after cds_ja_range_lookup returns, the node
61 * will not be found by lookup.
62 * - If node add call/return overlaps with the call to and return from
63 * cds_ja_range_lookup(), the node may or may not be found.
64 * - If node add call/return overlaps with call to cds_ja_range_lookup()
65 * and return from cds_ja_range_lock(), in the specific case where
66 * cds_ja_range_lookup() _does_ succeed, then cds_ja_range_lock() will
67 * succeed (still assuming no concurrent deletion).
71 * Discussion: concurrent deletion of contiguous allocated ranges.
73 * Ensuring that merge of contiguous free ranges is always performed, we
74 * need to ensure locking of concurrent removal of contiguous allocated
75 * ranges one with respect to another. This is done by locking the
76 * ranges prior to and after the range to remove, even if that range is
77 * allocated. This serializes removal of contiguous ranges. The only
78 * cases for which there is no range to lock is when removing an
79 * allocated range starting at 0, and/or ending at the end of the key
84 * Discussion: concurrent lookup vs add
86 * When executed concurrently with node add, the inequality
87 * lookup can see no node for the looked-up range, because a range can
88 * be shrinked. This can happen if, for instance, we lookup key 2
89 * between addition of a "free" range for values [1,2], and removal of
90 * the old "free" range for values [0,2]. We would then fail to observe
91 * any range for key 2. Given that the lookup is performed during a
92 * range transition, we can safely return that there is no allocated
97 * Discussion: concurrent lookup vs del
99 * There is no special case for lookups performed concurrently with node
100 * del, because node del either replaces the node with the exact same
101 * start key (see duplicates guarantees), or replaces it with a larger
102 * range containing the prior range. Therefore, we are sure that
103 * inequality lookups will see the larger range before the old range is
104 * deleted, in whichever direction the lookup is performed.
108 * Discussion of the type state transitions.
110 * State transitions of "type" always go from either:
112 * CDS_JA_RANGE_FREE -> CDS_JA_RANGE_REMOVED
114 * CDS_JA_RANGE_ALLOCATED -> CDS_JA_RANGE_REMOVED
116 * A range type never changes otherwise.
119 //#define RANGE_DEBUG
124 #define dbg_printf(fmt, args...) \
125 fprintf(stderr, "[debug rcuja-range %lu %s()@%s:%u] " fmt, \
126 (unsigned long) gettid(), __func__, \
127 __FILE__, __LINE__, ## args)
129 #define dbg_printf(fmt, args...) \
131 /* do nothing but check printf format */ \
133 fprintf(stderr, "[debug rcuja-range %lu %s()@%s:%u] " fmt, \
134 (unsigned long) gettid(), __func__, \
135 __FILE__, __LINE__, ## args); \
139 enum cds_ja_range_type
{
140 CDS_JA_RANGE_ALLOCATED
,
142 CDS_JA_RANGE_REMOVED
,
146 * Range goes from start (inclusive) to end (inclusive).
147 * Range start is used as node key in the Judy array.
149 struct cds_ja_range
{
151 struct cds_ja_node ja_node
;
152 pthread_mutex_t lock
;
154 enum cds_ja_range_type type
;
156 /* not required on lookup fast-path */
158 struct rcu_head head
;
161 struct cds_ja_range
*cds_ja_range_lookup(struct cds_ja
*ja
, uint64_t key
)
163 struct cds_ja_node
*node
, *last_node
;
164 struct cds_ja_range
*range
;
166 dbg_printf("key: %" PRIu64
"\n", key
);
167 node
= cds_ja_lookup_below_equal(ja
, key
, NULL
);
171 * Get the last of duplicate chain. Adding a node to Judy array
172 * duplicates inserts them at the end of the chain.
174 cds_ja_for_each_duplicate_rcu(node
)
176 range
= caa_container_of(last_node
, struct cds_ja_range
, ja_node
);
178 /* Check if range is currently hidden by concurrent add */
179 if (range
->end
< key
)
183 * If last node in the duplicates is removed or free, we can
184 * consider that either a removal or add operation is in
185 * progress, or removal is the last completed operation to
186 * update this range. We can therefore consider that this area
189 if (range
->type
!= CDS_JA_RANGE_ALLOCATED
)
192 * We found an allocated range. We can return it for use with
193 * RCU read-side protection for existence. However, we have no
194 * mutual exclusion against removal at this point.
200 * Provide mutual exclusion against removal.
202 struct cds_ja_range
*cds_ja_range_lock(struct cds_ja_range
*range
)
204 pthread_mutex_lock(&range
->lock
);
206 if (range
->type
== CDS_JA_RANGE_REMOVED
)
211 pthread_mutex_unlock(&range
->lock
);
215 void cds_ja_range_unlock(struct cds_ja_range
*range
)
217 pthread_mutex_unlock(&range
->lock
);
221 struct cds_ja_range
*range_create(
222 uint64_t start
, /* inclusive */
223 uint64_t end
, /* inclusive */
225 enum cds_ja_range_type type
)
227 struct cds_ja_range
*range
;
229 range
= calloc(sizeof(*range
), 1);
232 range
->start
= start
;
236 pthread_mutex_init(&range
->lock
, NULL
);
241 void free_range_cb(struct rcu_head
*head
)
243 struct cds_ja_range
*range
=
244 caa_container_of(head
, struct cds_ja_range
, head
);
249 void free_range(struct cds_ja_range
*range
)
255 void rcu_free_range(struct cds_ja
*ja
, struct cds_ja_range
*range
)
257 cds_lfht_rcu_flavor(ja
->ht
)->update_call_rcu(&range
->head
,
261 int cds_ja_range_add(struct cds_ja
*ja
,
262 uint64_t start
, /* inclusive */
263 uint64_t end
, /* inclusive */
266 struct cds_ja_node
*old_node
;
267 struct cds_ja_range
*old_range
, *new_range
, *ranges
[3];
268 unsigned int nr_ranges
, i
;
271 if (start
> end
|| end
== UINT64_MAX
)
275 dbg_printf("start: %" PRIu64
", end: %" PRIu64
", priv %p\n",
278 * Find if requested range is entirely contained within a single
281 old_node
= cds_ja_lookup_below_equal(ja
, start
, NULL
);
282 /* Range hidden by concurrent add */
286 old_range
= caa_container_of(old_node
, struct cds_ja_range
, ja_node
);
288 /* Range hidden by concurrent add */
289 if (old_range
->end
< start
)
292 /* We now know that old_range overlaps with our range */
293 switch (CMM_LOAD_SHARED(old_range
->type
)) {
294 case CDS_JA_RANGE_ALLOCATED
:
296 case CDS_JA_RANGE_FREE
:
298 case CDS_JA_RANGE_REMOVED
:
302 /* We do not fit entirely within the range */
303 if (old_range
->end
< end
)
306 pthread_mutex_lock(&old_range
->lock
);
308 if (old_range
->type
== CDS_JA_RANGE_REMOVED
) {
309 pthread_mutex_unlock(&old_range
->lock
);
313 /* Create replacement ranges: at most 2 free and 1 allocated */
314 if (start
== old_range
->start
) {
315 if (end
== old_range
->end
) {
317 ranges
[0] = new_range
= range_create(start
, end
,
318 priv
, CDS_JA_RANGE_ALLOCATED
);
322 assert(old_range
->end
> end
);
323 ranges
[0] = new_range
= range_create(start
, end
,
324 priv
, CDS_JA_RANGE_ALLOCATED
);
325 ranges
[1] = range_create(end
+ 1, old_range
->end
,
326 NULL
, CDS_JA_RANGE_FREE
);
330 if (end
== old_range
->end
) {
332 assert(old_range
->start
< start
);
333 ranges
[0] = range_create(old_range
->start
, start
- 1,
334 NULL
, CDS_JA_RANGE_FREE
);
335 ranges
[1] = new_range
= range_create(start
, end
,
336 priv
, CDS_JA_RANGE_ALLOCATED
);
340 assert(old_range
->start
< start
);
341 assert(old_range
->end
> end
);
342 ranges
[0] = range_create(old_range
->start
, start
- 1,
343 NULL
, CDS_JA_RANGE_FREE
);
344 ranges
[1] = new_range
= range_create(start
, end
,
345 priv
, CDS_JA_RANGE_ALLOCATED
);
346 ranges
[2] = range_create(end
+ 1, old_range
->end
,
347 NULL
, CDS_JA_RANGE_FREE
);
352 /* Add replacement ranges to Judy array */
353 for (i
= 0; i
< nr_ranges
; i
++) {
354 dbg_printf("ADD RANGE: %" PRIu64
"-%" PRIu64
" %s.\n",
355 ranges
[i
]->start
, ranges
[i
]->end
,
356 ranges
[i
]->type
== CDS_JA_RANGE_ALLOCATED
?
357 "allocated" : "free");
358 pthread_mutex_lock(&ranges
[i
]->lock
);
359 ret
= cds_ja_add(ja
, ranges
[i
]->start
, &ranges
[i
]->ja_node
);
364 * We add replacement ranges _before_ removing old ranges, so
365 * concurrent traversals will always see one or the other. This
366 * is OK because we temporarily have a duplicate key, and Judy
367 * arrays provide key existence guarantee for lookups performed
368 * concurrently with add followed by del of duplicate keys.
371 dbg_printf("REM RANGE: %" PRIu64
"-%" PRIu64
" %s.\n",
372 old_range
->start
, old_range
->end
,
373 old_range
->type
== CDS_JA_RANGE_ALLOCATED
?
374 "allocated" : "free");
375 /* Remove old free range */
376 ret
= cds_ja_del(ja
, old_range
->start
, &old_range
->ja_node
);
378 old_range
->type
= CDS_JA_RANGE_REMOVED
;
379 pthread_mutex_unlock(&old_range
->lock
);
380 for (i
= 0; i
< nr_ranges
; i
++)
381 pthread_mutex_unlock(&ranges
[i
]->lock
);
383 rcu_free_range(ja
, old_range
);
385 dbg_printf("<SUCCEED>\n");
390 int cds_ja_range_del(struct cds_ja
*ja
, struct cds_ja_range
*range
)
392 struct cds_ja_node
*prev_node
, *next_node
;
393 struct cds_ja_range
*new_range
;
394 struct cds_ja_range
*merge_ranges
[3], *lock_ranges
[3];
395 unsigned int nr_merge
, nr_lock
, i
;
400 dbg_printf("start: %" PRIu64
", end %" PRIu64
", priv: %p\n",
401 range
->start
, range
->end
, range
->priv
);
407 * Range has been concurrently updated.
409 if (range
->type
!= CDS_JA_RANGE_ALLOCATED
)
412 if (range
->start
> 0) {
413 struct cds_ja_range
*prev_range
;
415 prev_node
= cds_ja_lookup_below_equal(ja
, range
->start
- 1,
420 prev_range
= caa_container_of(prev_node
,
421 struct cds_ja_range
, ja_node
);
422 /* Prev range temporarily hidden due to concurrent add. */
423 if (prev_range
->end
!= range
->start
- 1)
426 lock_ranges
[nr_lock
++] = prev_range
;
427 if (prev_range
->type
!= CDS_JA_RANGE_ALLOCATED
)
428 merge_ranges
[nr_merge
++] = prev_range
;
431 lock_ranges
[nr_lock
++] = range
;
432 merge_ranges
[nr_merge
++] = range
;
434 if (range
->end
< UINT64_MAX
- 1) {
435 struct cds_ja_range
*next_range
;
437 next_node
= cds_ja_lookup_below_equal(ja
, range
->end
+ 1,
439 /* Next range temporarily hidden due to concurrent add. */
443 next_range
= caa_container_of(next_node
,
444 struct cds_ja_range
, ja_node
);
445 if (next_range
->start
!= range
->end
+ 1)
448 lock_ranges
[nr_lock
++] = next_range
;
449 if (next_range
->type
!= CDS_JA_RANGE_ALLOCATED
)
450 merge_ranges
[nr_merge
++] = next_range
;
453 /* Acquire locks in increasing key order for range merge */
454 for (i
= 0; i
< nr_lock
; i
++)
455 pthread_mutex_lock(&lock_ranges
[i
]->lock
);
456 if (range
->type
!= CDS_JA_RANGE_ALLOCATED
) {
460 /* Ensure they are valid */
461 for (i
= 0; i
< nr_lock
; i
++) {
462 if (lock_ranges
[i
]->type
== CDS_JA_RANGE_REMOVED
)
466 /* Create new free range */
467 start
= merge_ranges
[0]->start
;
468 end
= merge_ranges
[nr_merge
- 1]->end
;
469 new_range
= range_create(start
, end
, NULL
, CDS_JA_RANGE_FREE
);
470 pthread_mutex_lock(&new_range
->lock
);
472 dbg_printf("ADD RANGE: %" PRIu64
"-%" PRIu64
" %s.\n",
473 new_range
->start
, new_range
->end
,
474 new_range
->type
== CDS_JA_RANGE_ALLOCATED
?
475 "allocated" : "free");
477 ret
= cds_ja_add(ja
, start
, &new_range
->ja_node
);
480 /* Remove old ranges */
481 for (i
= 0; i
< nr_merge
; i
++) {
483 dbg_printf("REM RANGE: %" PRIu64
"-%" PRIu64
" %s.\n",
484 merge_ranges
[i
]->start
, merge_ranges
[i
]->end
,
485 merge_ranges
[i
]->type
== CDS_JA_RANGE_ALLOCATED
?
486 "allocated" : "free");
487 ret
= cds_ja_del(ja
, merge_ranges
[i
]->start
,
488 &merge_ranges
[i
]->ja_node
);
490 merge_ranges
[i
]->type
= CDS_JA_RANGE_REMOVED
;
492 for (i
= 0; i
< nr_lock
; i
++)
493 pthread_mutex_unlock(&lock_ranges
[i
]->lock
);
494 pthread_mutex_unlock(&new_range
->lock
);
495 /* Free old merged ranges */
496 for (i
= 0; i
< nr_merge
; i
++)
497 rcu_free_range(ja
, merge_ranges
[i
]);
499 dbg_printf("<SUCCEED>\n");
505 for (i
= 0; i
< nr_lock
; i
++)
506 pthread_mutex_unlock(&lock_ranges
[i
]->lock
);
510 for (i
= 0; i
< nr_lock
; i
++)
511 pthread_mutex_unlock(&lock_ranges
[i
]->lock
);
515 struct cds_ja
*_cds_ja_range_new(unsigned int key_bits
,
516 const struct rcu_flavor_struct
*flavor
)
518 struct cds_ja_range
*range
;
522 ja
= _cds_ja_new(key_bits
, flavor
);
525 range
= range_create(0, UINT64_MAX
- 1, NULL
, CDS_JA_RANGE_FREE
);
528 cds_lfht_rcu_flavor(ja
->ht
)->read_lock();
529 ret
= cds_ja_add(ja
, 0, &range
->ja_node
);
530 cds_lfht_rcu_flavor(ja
->ht
)->read_unlock();
538 ret
= cds_ja_destroy(ja
);
543 int cds_ja_range_validate(struct cds_ja
*ja
)
545 uint64_t iter_key
, start
, end
, last_end
= UINT64_MAX
;
546 struct cds_ja_node
*ja_node
, *last_node
;
549 cds_lfht_rcu_flavor(ja
->ht
)->read_lock();
550 cds_ja_for_each_key_rcu(ja
, iter_key
, ja_node
) {
551 struct cds_ja_range
*range
;
552 struct cds_ja_node
*first_node
;
554 first_node
= ja_node
;
555 cds_ja_for_each_duplicate_rcu(ja_node
)
557 if (last_node
!= first_node
) {
558 struct cds_ja_range
*first_range
= caa_container_of(first_node
,
559 struct cds_ja_range
, ja_node
);
560 struct cds_ja_range
*last_range
= caa_container_of(last_node
,
561 struct cds_ja_range
, ja_node
);
562 fprintf(stderr
, "found duplicate node: first %" PRIu64
"-%" PRIu64
" last %" PRIu64
"-%" PRIu64
"\n",
563 first_range
->start
, first_range
->end
, last_range
->start
, last_range
->end
);
566 range
= caa_container_of(last_node
,
567 struct cds_ja_range
, ja_node
);
568 start
= range
->start
;
570 if (last_end
!= UINT64_MAX
) {
571 if (start
!= last_end
+ 1) {
572 fprintf(stderr
, "ja range discrepancy: last end: %" PRIu64
", start: %" PRIu64
"\n",
579 if (last_end
!= UINT64_MAX
- 1) {
580 fprintf(stderr
, "ja range error: end of last range is: %" PRIu64
"\n",
584 cds_lfht_rcu_flavor(ja
->ht
)->read_unlock();
588 int cds_ja_range_destroy(struct cds_ja
*ja
,
589 void (*free_priv
)(void *ptr
))
592 struct cds_ja_node
*ja_node
;
595 cds_lfht_rcu_flavor(ja
->ht
)->read_lock();
596 cds_ja_for_each_key_rcu(ja
, key
, ja_node
) {
597 struct cds_ja_node
*tmp_node
;
599 cds_ja_for_each_duplicate_safe_rcu(ja_node
, tmp_node
) {
600 struct cds_ja_range
*range
;
602 range
= caa_container_of(ja_node
,
603 struct cds_ja_range
, ja_node
);
604 ret
= cds_ja_del(ja
, key
, &range
->ja_node
);
608 free_priv(range
->priv
);
609 /* Alone using Judy array, OK to free now */
613 cds_lfht_rcu_flavor(ja
->ht
)->read_unlock();
614 return cds_ja_destroy(ja
);
617 cds_lfht_rcu_flavor(ja
->ht
)->read_unlock();
This page took 0.055635 seconds and 4 git commands to generate.