rcuja range: serialize freeing of contiguous ranges
[userspace-rcu.git] / rcuja / rcuja-range.c
CommitLineData
b45a45b9
MD
1/*
2 * rcuja/rcuja-range.c
3 *
4 * Userspace RCU library - RCU Judy Array Range Support
5 *
6 * Copyright 2012-2013 - Mathieu Desnoyers <mathieu.desnoyers@efficios.com>
7 *
8 * This library is free software; you can redistribute it and/or
9 * modify it under the terms of the GNU Lesser General Public
10 * License as published by the Free Software Foundation; either
11 * version 2.1 of the License, or (at your option) any later version.
12 *
13 * This library is distributed in the hope that it will be useful,
14 * but WITHOUT ANY WARRANTY; without even the implied warranty of
15 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
16 * Lesser General Public License for more details.
17 *
18 * You should have received a copy of the GNU Lesser General Public
19 * License along with this library; if not, write to the Free Software
20 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
21 */
22
23#define _LGPL_SOURCE
24#include <stdint.h>
25#include <errno.h>
26#include <limits.h>
27#include <string.h>
28#include <assert.h>
29#include <pthread.h>
30#include <urcu/rcuja.h>
31#include <urcu/compiler.h>
32#include <urcu/arch.h>
33#include <urcu-pointer.h>
34#include <urcu/uatomic.h>
35#include <urcu/rcuja-range.h>
36#include <urcu-flavor.h>
37
38#include "rcuja-internal.h"
39
40/*
41 * Discussion about order of lookup/lock vs allocated node deletion.
42 *
43 * - If node deletion returns before call to
44 * cds_ja_range_lookup(), the node will not be found by lookup.
45 * - If node deletion is called after cds_ja_range_lock() returns a
46 * non-NULL range, the deletion will wait until the lock is released
47 * before it takes place.
48 * - If node deletion call/return overlaps with the call to
49 * cds_ja_range_lookup() and return from cds_ja_range_lock(), the node
50 * may or may not be found by each of cds_ja_range_lookup() and
51 * cds_ja_range_lock().
52 */
53
54/*
55 * Discussion about order of lookup/lock vs allocated node add. Assuming
56 * no concurrent delete.
57 *
58 * - If node add returns before call to
59 * cds_ja_range_lookup(), the node will be found by lookup.
60 * - If node add is called after cds_ja_range_lookup returns, the node
61 * will not be found by lookup.
62 * - If node add call/return overlaps with the call to and return from
63 * cds_ja_range_lookup(), the node may or may not be found.
64 * - If node add call/return overlaps with call to cds_ja_range_lookup()
65 * and return from cds_ja_range_lock(), in the specific case where
66 * cds_ja_range_lookup() _does_ succeed, then cds_ja_range_lock() will
67 * succeed (still assuming no concurrent deletion).
68 */
69
c34bfad2
MD
70/*
71 * Discussion: concurrent deletion of contiguous allocated ranges.
72 *
73 * Ensuring that merge of contiguous free ranges is always performed, we
74 * need to ensure locking of concurrent removal of contiguous allocated
75 * ranges one with respect to another. This is done by locking the
76 * ranges prior to and after the range to remove, even if that range is
77 * allocated. This serializes removal of contiguous ranges. The only
78 * cases for which there is no range to lock is when removing an
79 * allocated range starting at 0, and/or ending at the end of the key
80 * space.
81 */
82
b45a45b9
MD
83/*
84 * Discussion of the type state transitions.
85 *
86 * State transitions of "type" always go from either:
87 *
88 * CDS_JA_RANGE_FREE -> CDS_JA_RANGE_REMOVED
89 * or
90 * CDS_JA_RANGE_ALLOCATED -> CDS_JA_RANGE_REMOVED
91 *
92 * A range type never changes otherwise.
93 */
94
95
96struct cds_ja_range *cds_ja_range_lookup(struct cds_ja *ja, uint64_t key)
97{
98 struct cds_ja_node *node, *last_node;
99 struct cds_ja_range *range;
100
101 node = cds_ja_lookup_below_equal(ja, key, NULL);
102 assert(node);
103 /*
104 * Get the last of duplicate chain. Adding a node to Judy array
105 * duplicates inserts them at the end of the chain.
106 */
107 cds_ja_for_each_duplicate_rcu(node)
108 last_node = node;
109 range = caa_container_of(last_node, struct cds_ja_range, ja_node);
110 /*
111 * If last node in the duplicates is removed or free, we can
112 * consider that either a removal or add operation is in
113 * progress, or removal is the last completed operation to
114 * update this range. We can therefore consider that this area
115 * is not allocated.
116 */
117 if (range->type != CDS_JA_RANGE_ALLOCATED)
118 return NULL;
119 /*
120 * We found an allocated range. We can return it for use with
7c74dcc7 121 * RCU read-side protection for existence. However, we have no
b45a45b9
MD
122 * mutual exclusion against removal at this point.
123 */
124 return range;
125}
126
127/*
128 * Provide mutual exclusion against removal.
129 */
130struct cds_ja_range *cds_ja_range_lock(struct cds_ja_range *range)
131{
132 pthread_mutex_lock(&range->lock);
133
134 if (range->type == CDS_JA_RANGE_REMOVED)
135 goto removed;
136 return range;
137
138removed:
139 pthread_mutex_unlock(&range->lock);
140 return NULL;
141}
142
143void cds_ja_range_unlock(struct cds_ja_range *range)
144{
145 pthread_mutex_unlock(&range->lock);
146}
147
148static
149struct cds_ja_range *range_create(
150 uint64_t start, /* inclusive */
151 uint64_t end, /* inclusive */
152 enum cds_ja_range_type type)
153{
154 struct cds_ja_range *range;
155
156 range = calloc(sizeof(*range), 1);
157 if (!range)
158 return NULL;
159 range->start = start;
160 range->end = end;
161 range->type = type;
162 pthread_mutex_init(&range->lock, NULL);
163 return range;
164}
165
166static
167void free_range_cb(struct rcu_head *head)
168{
169 struct cds_ja_range *range =
170 caa_container_of(head, struct cds_ja_range, head);
171 free(range);
172}
173
174static
175void free_range(struct cds_ja_range *range)
176{
177 free(range);
178}
179
180static
181void rcu_free_range(struct cds_ja *ja, struct cds_ja_range *range)
182{
183 cds_lfht_rcu_flavor(ja->ht)->update_call_rcu(&range->head,
184 free_range_cb);
185}
186
187struct cds_ja_range *cds_ja_range_add(struct cds_ja *ja,
188 uint64_t start, /* inclusive */
189 uint64_t end) /* inclusive */
190{
191 struct cds_ja_node *old_node, *old_node_end;
192 struct cds_ja_range *old_range, *old_range_end, *new_range, *ranges[3];
193 unsigned int nr_ranges, i;
194 int ret;
195
196retry:
197 /*
198 * Find if requested range is entirely contained within a single
199 * free range.
200 */
201 old_node = cds_ja_lookup_below_equal(ja, start, NULL);
202 assert(old_node);
203
204 old_range = caa_container_of(old_node, struct cds_ja_range, ja_node);
205 switch (CMM_LOAD_SHARED(old_range->type)) {
206 case CDS_JA_RANGE_ALLOCATED:
207 return NULL;
208 case CDS_JA_RANGE_FREE:
209 break;
210 case CDS_JA_RANGE_REMOVED:
211 goto retry;
212 }
213
214 old_node_end = cds_ja_lookup_below_equal(ja, end, NULL);
215 assert(old_node_end);
216 old_range_end = caa_container_of(old_node_end,
217 struct cds_ja_range, ja_node);
218 if (old_range_end != old_range) {
219 switch (CMM_LOAD_SHARED(old_range->type)) {
220 case CDS_JA_RANGE_ALLOCATED:
221 case CDS_JA_RANGE_FREE: /* fall-through */
b45a45b9
MD
222 return NULL;
223 case CDS_JA_RANGE_REMOVED:
224 goto retry;
225 }
226 }
227
228 pthread_mutex_lock(&old_range->lock);
229
230 if (old_range->type == CDS_JA_RANGE_REMOVED) {
231 pthread_mutex_unlock(&old_range->lock);
232 goto retry;
233 }
234
235 /* Create replacement ranges: at most 2 free and 1 allocated */
236 if (start == old_range->start) {
237 if (end == old_range->end) {
238 /* 1 range */
239 ranges[0] = new_range = range_create(start, end,
240 CDS_JA_RANGE_ALLOCATED);
241 nr_ranges = 1;
242 } else {
243 /* 2 ranges */
244 ranges[0] = new_range = range_create(start, end,
245 CDS_JA_RANGE_ALLOCATED);
246 ranges[1] = range_create(end + 1, old_range->end,
247 CDS_JA_RANGE_FREE);
248 nr_ranges = 2;
249 }
250 } else {
251 if (end == old_range->end) {
252 /* 2 ranges */
253 ranges[0] = range_create(old_range->start, start - 1,
254 CDS_JA_RANGE_FREE);
255 ranges[1] = new_range = range_create(start, end,
256 CDS_JA_RANGE_ALLOCATED);
257 nr_ranges = 2;
258 } else {
259 /* 3 ranges */
260 ranges[0] = range_create(old_range->start, start - 1,
261 CDS_JA_RANGE_FREE);
262 ranges[1] = new_range = range_create(start, end,
263 CDS_JA_RANGE_ALLOCATED);
264 ranges[2] = range_create(end + 1, old_range->end,
265 CDS_JA_RANGE_FREE);
266 nr_ranges = 3;
267 }
268 }
269
270 /* Add replacement ranges to Judy array */
271 for (i = 0; i < nr_ranges; i++) {
272 ret = cds_ja_add(ja, ranges[i]->start, &ranges[i]->ja_node);
273 assert(!ret);
274 }
275
276 /*
277 * We add replacement ranges _before_ removing old ranges, so
278 * concurrent traversals will always see one or the other. This
279 * is OK because we temporarily have a duplicate key, and Judy
280 * arrays provide key existence guarantee for lookups performed
281 * concurrently with add followed by del of duplicate keys.
282 */
283
284 /* Remove old free range */
285 ret = cds_ja_del(ja, old_range->start, &old_range->ja_node);
286 assert(!ret);
287 old_range->type = CDS_JA_RANGE_REMOVED;
288 pthread_mutex_unlock(&old_range->lock);
289
290 rcu_free_range(ja, old_range);
291
292 return new_range;
293}
294
295int cds_ja_range_del(struct cds_ja *ja, struct cds_ja_range *range)
296{
297 struct cds_ja_node *prev_node, *next_node;
c34bfad2
MD
298 struct cds_ja_range *new_range;
299 struct cds_ja_range *merge_ranges[3], *lock_ranges[3];
300 unsigned int nr_merge, nr_lock, i;
b45a45b9
MD
301 uint64_t start, end;
302 int ret;
303
304retry:
305 nr_merge = 0;
c34bfad2 306 nr_lock = 0;
b45a45b9 307 prev_node = cds_ja_lookup_below_equal(ja, range->start - 1, NULL);
c34bfad2
MD
308 if (prev_node) {
309 struct cds_ja_range *prev_range;
b45a45b9 310
c34bfad2
MD
311 prev_range = caa_container_of(prev_node,
312 struct cds_ja_range, ja_node);
313 lock_ranges[nr_lock++] = prev_range;
314 if (prev_range->type != CDS_JA_RANGE_ALLOCATED)
315 merge_ranges[nr_merge++] = prev_range;
316 }
317
318 lock_ranges[nr_lock++] = range;
b45a45b9
MD
319 merge_ranges[nr_merge++] = range;
320
321 next_node = cds_ja_lookup_above_equal(ja, range->end + 1, NULL);
c34bfad2
MD
322 if (next_node) {
323 struct cds_ja_range *next_range;
324
325 next_range = caa_container_of(next_node,
326 struct cds_ja_range, ja_node);
327 lock_ranges[nr_lock++] = next_range;
328 if (next_range->type != CDS_JA_RANGE_ALLOCATED)
329 merge_ranges[nr_merge++] = next_range;
330 }
b45a45b9
MD
331
332 /* Acquire locks in increasing key order for range merge */
c34bfad2
MD
333 for (i = 0; i < nr_lock; i++)
334 pthread_mutex_lock(&lock_ranges[i]->lock);
b45a45b9 335 /* Ensure they are valid */
c34bfad2
MD
336 for (i = 0; i < nr_lock; i++) {
337 if (lock_ranges[i]->type == CDS_JA_RANGE_REMOVED)
b45a45b9
MD
338 goto unlock_retry;
339 }
340
341 /* Create new free range */
342 start = merge_ranges[0]->start;
343 end = merge_ranges[nr_merge - 1]->end;
344 new_range = range_create(start, end, CDS_JA_RANGE_FREE);
345 ret = cds_ja_add(ja, start, &new_range->ja_node);
346 assert(!ret);
347
348 /* Remove old ranges */
349 for (i = 0; i < nr_merge; i++) {
350 ret = cds_ja_del(ja, merge_ranges[i]->start,
351 &merge_ranges[i]->ja_node);
352 assert(!ret);
353 merge_ranges[i]->type = CDS_JA_RANGE_REMOVED;
b45a45b9 354 }
c34bfad2
MD
355 for (i = 0; i < nr_lock; i++)
356 pthread_mutex_unlock(&lock_ranges[i]->lock);
357 /* Free old merged ranges */
358 for (i = 0; i < nr_merge; i++)
359 rcu_free_range(ja, merge_ranges[i]);
b45a45b9
MD
360
361 return 0;
362
363 /* retry paths */
364unlock_retry:
c34bfad2
MD
365 for (i = 0; i < nr_lock; i++)
366 pthread_mutex_unlock(&lock_ranges[i]->lock);
b45a45b9
MD
367 goto retry;
368}
369
370int cds_ja_range_init(struct cds_ja *ja)
371{
372 struct cds_ja_node *node;
373 struct cds_ja_range *range;
374 int ret;
375
376 /*
377 * Sanity check: should be empty.
378 */
379 node = cds_ja_lookup_above_equal(ja, 0, NULL);
380 if (node)
381 return -EINVAL;
382 range = range_create(0, UINT64_MAX, CDS_JA_RANGE_FREE);
383 if (!range)
384 return -EINVAL;
385 ret = cds_ja_add(ja, 0, &range->ja_node);
386 return ret;
387}
388
389int cds_ja_range_fini(struct cds_ja *ja)
390{
391 uint64_t key;
392 struct cds_ja_node *ja_node;
393 int ret = 0;
394
395 cds_ja_for_each_key_rcu(ja, key, ja_node) {
396 struct cds_ja_node *tmp_node;
397
398 cds_ja_for_each_duplicate_safe_rcu(ja_node, tmp_node) {
399 struct cds_ja_range *range;
400
401 range = caa_container_of(ja_node,
402 struct cds_ja_range, ja_node);
403 ret = cds_ja_del(ja, key, &range->ja_node);
404 if (ret) {
405 goto end;
406 }
407 /* Alone using Judy array, OK to free now */
408 free_range(range);
409 }
410 }
411end:
412 return ret;
413}
This page took 0.037537 seconds and 4 git commands to generate.