*
* Userspace RCU library - RCU Judy Array
*
- * Copyright 2012 - Mathieu Desnoyers <mathieu.desnoyers@efficios.com>
+ * Copyright (C) 2000 - 2002 Hewlett-Packard Company
+ * Copyright 2012-2013 - Mathieu Desnoyers <mathieu.desnoyers@efficios.com>
*
* This library is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
#include <errno.h>
#include <limits.h>
#include <string.h>
+#include <assert.h>
#include <urcu/rcuja.h>
#include <urcu/compiler.h>
#include <urcu/arch.h>
-#include <assert.h>
#include <urcu-pointer.h>
#include <urcu/uatomic.h>
-#include <stdint.h>
#include "rcuja-internal.h"
JA_RECOMPACT_DEL,
};
+enum ja_lookup_inequality {
+ JA_LOOKUP_BE,
+ JA_LOOKUP_AE,
+};
+
+enum ja_direction {
+ JA_LEFT,
+ JA_RIGHT,
+ JA_LEFTMOST,
+ JA_RIGHTMOST,
+};
+
static
struct cds_ja_inode *_ja_node_mask_ptr(struct cds_ja_inode_flag *node)
{
return NULL;
}
memset(p, 0, len);
- uatomic_inc(&ja->nr_nodes_allocated);
+ if (ja_debug_counters())
+ uatomic_inc(&ja->nr_nodes_allocated);
return p;
}
void free_cds_ja_node(struct cds_ja *ja, struct cds_ja_inode *node)
{
free(node);
- if (node)
+ if (ja_debug_counters() && node)
uatomic_inc(&ja->nr_nodes_freed);
}
}
static
-struct cds_ja_inode_flag *ja_linear_node_get_left(const struct cds_ja_type *type,
+struct cds_ja_inode_flag *ja_linear_node_get_direction(const struct cds_ja_type *type,
struct cds_ja_inode *node,
- unsigned int n)
+ int n, uint8_t *result_key,
+ enum ja_direction dir)
{
uint8_t nr_child;
uint8_t *values;
struct cds_ja_inode_flag **pointers;
- struct cds_ja_inode_flag *ptr;
- unsigned int i, match_idx;
- int match_v = -1;
+ struct cds_ja_inode_flag *ptr, *match_ptr = NULL;
+ unsigned int i;
+ int match_v;
assert(type->type_class == RCU_JA_LINEAR || type->type_class == RCU_JA_POOL);
+ assert(dir == JA_LEFT || dir == JA_RIGHT);
+
+ if (dir == JA_LEFT) {
+ match_v = -1;
+ } else {
+ match_v = JA_ENTRY_PER_NODE;
+ }
nr_child = ja_linear_node_get_nr_child(type, node);
cmm_smp_rmb(); /* read nr_child before values and pointers */
assert(type->type_class != RCU_JA_LINEAR || nr_child >= type->min_child);
values = &node->u.data[1];
+ pointers = (struct cds_ja_inode_flag **) align_ptr_size(&values[type->max_linear_child]);
for (i = 0; i < nr_child; i++) {
unsigned int v;
v = CMM_LOAD_SHARED(values[i]);
- if (v < n && (int) v > match_v) {
- match_v = v;
- match_idx = i;
+ ptr = CMM_LOAD_SHARED(pointers[i]);
+ if (!ptr)
+ continue;
+ if (dir == JA_LEFT) {
+ if ((int) v < n && (int) v > match_v) {
+ match_v = v;
+ match_ptr = ptr;
+ }
+ } else {
+ if ((int) v > n && (int) v < match_v) {
+ match_v = v;
+ match_ptr = ptr;
+ }
}
}
- if (match_v < 0) {
+
+ if (!match_ptr) {
return NULL;
}
- pointers = (struct cds_ja_inode_flag **) align_ptr_size(&values[type->max_linear_child]);
- ptr = rcu_dereference(pointers[match_idx]);
- return ptr;
+ assert(match_v >= 0 && match_v < JA_ENTRY_PER_NODE);
+
+ *result_key = (uint8_t) match_v;
+ return rcu_dereference(match_ptr);
}
static
}
static
-struct cds_ja_inode_flag *ja_pool_node_get_left(const struct cds_ja_type *type,
+struct cds_ja_inode_flag *ja_pool_node_get_direction(const struct cds_ja_type *type,
struct cds_ja_inode *node,
- unsigned int n)
+ int n, uint8_t *result_key,
+ enum ja_direction dir)
{
unsigned int pool_nr;
- int match_v = -1;
+ int match_v;
struct cds_ja_inode_flag *match_node_flag = NULL;
assert(type->type_class == RCU_JA_POOL);
+ assert(dir == JA_LEFT || dir == JA_RIGHT);
+
+ if (dir == JA_LEFT) {
+ match_v = -1;
+ } else {
+ match_v = JA_ENTRY_PER_NODE;
+ }
for (pool_nr = 0; pool_nr < (1U << type->nr_pool_order); pool_nr++) {
struct cds_ja_inode *pool =
j, &v, &iter);
if (!iter)
continue;
- if (v < n && (int) v > match_v) {
- match_v = v;
- match_node_flag = iter;
+ if (dir == JA_LEFT) {
+ if ((int) v < n && (int) v > match_v) {
+ match_v = v;
+ match_node_flag = iter;
+ }
+ } else {
+ if ((int) v > n && (int) v < match_v) {
+ match_v = v;
+ match_node_flag = iter;
+ }
}
}
}
+ if (match_node_flag)
+ *result_key = (uint8_t) match_v;
return match_node_flag;
}
}
static
-struct cds_ja_inode_flag *ja_pigeon_node_get_left(const struct cds_ja_type *type,
+struct cds_ja_inode_flag *ja_pigeon_node_get_direction(const struct cds_ja_type *type,
struct cds_ja_inode *node,
- unsigned int n)
+ int n, uint8_t *result_key,
+ enum ja_direction dir)
{
struct cds_ja_inode_flag **child_node_flag_ptr;
struct cds_ja_inode_flag *child_node_flag;
int i;
assert(type->type_class == RCU_JA_PIGEON);
-
- /* n - 1 is first value left of n */
- for (i = n - 1; i >= 0; i--) {
- child_node_flag_ptr = &((struct cds_ja_inode_flag **) node->u.data)[i];
- child_node_flag = rcu_dereference(*child_node_flag_ptr);
- if (child_node_flag) {
- dbg_printf("ja_pigeon_node_get_left child_node_flag %p\n",
- child_node_flag);
- return child_node_flag;
+ assert(dir == JA_LEFT || dir == JA_RIGHT);
+
+ if (dir == JA_LEFT) {
+ /* n - 1 is first value left of n */
+ for (i = n - 1; i >= 0; i--) {
+ child_node_flag_ptr = &((struct cds_ja_inode_flag **) node->u.data)[i];
+ child_node_flag = rcu_dereference(*child_node_flag_ptr);
+ if (child_node_flag) {
+ dbg_printf("ja_pigeon_node_get_left child_node_flag %p\n",
+ child_node_flag);
+ *result_key = (uint8_t) i;
+ return child_node_flag;
+ }
+ }
+ } else {
+ /* n + 1 is first value right of n */
+ for (i = n + 1; i < JA_ENTRY_PER_NODE; i++) {
+ child_node_flag_ptr = &((struct cds_ja_inode_flag **) node->u.data)[i];
+ child_node_flag = rcu_dereference(*child_node_flag_ptr);
+ if (child_node_flag) {
+ dbg_printf("ja_pigeon_node_get_right child_node_flag %p\n",
+ child_node_flag);
+ *result_key = (uint8_t) i;
+ return child_node_flag;
+ }
}
}
return NULL;
}
static
-struct cds_ja_inode_flag *ja_node_get_left(struct cds_ja_inode_flag *node_flag,
- unsigned int n)
+struct cds_ja_inode_flag *ja_node_get_direction(struct cds_ja_inode_flag *node_flag,
+ int n, uint8_t *result_key,
+ enum ja_direction dir)
{
unsigned int type_index;
struct cds_ja_inode *node;
switch (type->type_class) {
case RCU_JA_LINEAR:
- return ja_linear_node_get_left(type, node, n);
+ return ja_linear_node_get_direction(type, node, n, result_key, dir);
case RCU_JA_POOL:
- return ja_pool_node_get_left(type, node, n);
+ return ja_pool_node_get_direction(type, node, n, result_key, dir);
case RCU_JA_PIGEON:
- return ja_pigeon_node_get_left(type, node, n);
+ return ja_pigeon_node_get_direction(type, node, n, result_key, dir);
default:
assert(0);
return (void *) -1UL;
}
static
-struct cds_ja_inode_flag *ja_node_get_rightmost(struct cds_ja_inode_flag *node_flag)
+struct cds_ja_inode_flag *ja_node_get_leftright(struct cds_ja_inode_flag *node_flag,
+ unsigned int n, uint8_t *result_key,
+ enum ja_direction dir)
{
- return ja_node_get_left(node_flag, JA_ENTRY_PER_NODE);
+ return ja_node_get_direction(node_flag, n, result_key, dir);
+}
+
+static
+struct cds_ja_inode_flag *ja_node_get_minmax(struct cds_ja_inode_flag *node_flag,
+ uint8_t *result_key,
+ enum ja_direction dir)
+{
+ switch (dir) {
+ case JA_LEFTMOST:
+ return ja_node_get_direction(node_flag,
+ -1, result_key, JA_RIGHT);
+ case JA_RIGHTMOST:
+ return ja_node_get_direction(node_flag,
+ JA_ENTRY_PER_NODE, result_key, JA_LEFT);
+ default:
+ assert(0);
+ }
}
static
dbg_printf("Using fallback for %u children, node type index: %u, mode %s\n",
new_shadow_node->nr_child, old_type_index, mode == JA_RECOMPACT_ADD_NEXT ? "add_next" :
(mode == JA_RECOMPACT_DEL ? "del" : "add_same"));
- uatomic_inc(&ja->node_fallback_count_distribution[new_shadow_node->nr_child]);
+ if (ja_debug_counters())
+ uatomic_inc(&ja->node_fallback_count_distribution[new_shadow_node->nr_child]);
}
/* Return pointer to new recompacted node through old_node_flag_ptr */
} else {
new_type_index++;
dbg_printf("Add fallback to type %d\n", new_type_index);
- uatomic_inc(&ja->nr_fallback);
+ if (ja_debug_counters())
+ uatomic_inc(&ja->nr_fallback);
fallback = 1;
goto retry;
}
unsigned int tree_depth, i;
struct cds_ja_inode_flag *node_flag;
- if (caa_unlikely(key > ja->key_max))
+ if (caa_unlikely(key > ja->key_max || key == UINT64_MAX))
return NULL;
tree_depth = ja->tree_depth;
node_flag = rcu_dereference(ja->root);
return (struct cds_ja_node *) node_flag;
}
-struct cds_ja_node *cds_ja_lookup_lower_equal(struct cds_ja *ja, uint64_t key)
+static
+struct cds_ja_node *cds_ja_lookup_inequality(struct cds_ja *ja, uint64_t key,
+ uint64_t *result_key, enum ja_lookup_inequality mode)
{
int tree_depth, level;
struct cds_ja_inode_flag *node_flag, *cur_node_depth[JA_MAX_DEPTH];
+ uint8_t cur_key[JA_MAX_DEPTH];
+ uint64_t _result_key = 0;
+ enum ja_direction dir;
- if (caa_unlikely(key > ja->key_max || !key))
+ switch (mode) {
+ case JA_LOOKUP_BE:
+ case JA_LOOKUP_AE:
+ if (caa_unlikely(key > ja->key_max || key == UINT64_MAX))
+ return NULL;
+ break;
+ default:
return NULL;
+ }
memset(cur_node_depth, 0, sizeof(cur_node_depth));
+ memset(cur_key, 0, sizeof(cur_key));
tree_depth = ja->tree_depth;
node_flag = rcu_dereference(ja->root);
cur_node_depth[0] = node_flag;
node_flag = ja_node_get_nth(node_flag, NULL, iter_key);
if (!ja_node_ptr(node_flag))
break;
+ cur_key[level - 1] = iter_key;
cur_node_depth[level] = node_flag;
- dbg_printf("cds_ja_lookup iter key lookup %u finds node_flag %p\n",
+ dbg_printf("cds_ja_lookup_inequality iter key lookup %u finds node_flag %p\n",
(unsigned int) iter_key, node_flag);
}
if (level == tree_depth) {
/* Last level lookup succeded. We got an equal match. */
+ if (result_key)
+ *result_key = key;
return (struct cds_ja_node *) node_flag;
}
/*
- * Find highest value left of current node.
+ * Find highest value left/right of current node.
* Current node is cur_node_depth[level].
- * Start at current level. If we cannot find any key left of
- * ours, go one level up, seek highest value left of current
- * (recursively), and when we find one, get the rightmost child
- * of its rightmost child (recursively).
+ * Start at current level. If we cannot find any key left/right
+ * of ours, go one level up, seek highest value left/right of
+ * current (recursively), and when we find one, get the
+ * rightmost/leftmost child of its rightmost/leftmost child
+ * (recursively).
*/
+ switch (mode) {
+ case JA_LOOKUP_BE:
+ dir = JA_LEFT;
+ break;
+ case JA_LOOKUP_AE:
+ dir = JA_RIGHT;
+ break;
+ default:
+ assert(0);
+ }
for (; level > 0; level--) {
uint8_t iter_key;
iter_key = (uint8_t) (key >> (JA_BITS_PER_BYTE * (tree_depth - level - 1)));
- node_flag = ja_node_get_left(cur_node_depth[level - 1],
- iter_key);
- /* If found left sibling, find rightmost child. */
+ node_flag = ja_node_get_leftright(cur_node_depth[level - 1],
+ iter_key, &cur_key[level - 1], dir);
+ dbg_printf("cds_ja_lookup_inequality find sibling from %u at %u finds node_flag %p\n",
+ (unsigned int) iter_key, (unsigned int) cur_key[level - 1],
+ node_flag);
+ /* If found left/right sibling, find rightmost/leftmost child. */
if (ja_node_ptr(node_flag))
break;
}
if (!level) {
- /* Reached the root and could not find a left sibling. */
+ /* Reached the root and could not find a left/right sibling. */
return NULL;
}
/*
* From this point, we are guaranteed to be able to find a
- * "lower than" match. ja_attach_node() and ja_detach_node()
- * both guarantee that it is not possible for a lookup to reach
- * a dead-end.
+ * "below than"/"above than" match. ja_attach_node() and
+ * ja_detach_node() both guarantee that it is not possible for a
+ * lookup to reach a dead-end.
*/
- /* Find rightmost child of rightmost child (recursively). */
+ /*
+ * Find rightmost/leftmost child of rightmost/leftmost child
+ * (recursively).
+ */
+ switch (mode) {
+ case JA_LOOKUP_BE:
+ dir = JA_RIGHTMOST;
+ break;
+ case JA_LOOKUP_AE:
+ dir = JA_LEFTMOST;
+ break;
+ default:
+ assert(0);
+ }
for (; level < tree_depth; level++) {
- node_flag = ja_node_get_rightmost(node_flag);
- /* If found left sibling, find rightmost child. */
+ node_flag = ja_node_get_minmax(node_flag, &cur_key[level - 1], dir);
+ dbg_printf("cds_ja_lookup_inequality find minmax at %u finds node_flag %p\n",
+ (unsigned int) cur_key[level - 1],
+ node_flag);
if (!ja_node_ptr(node_flag))
break;
}
assert(level == tree_depth);
+ if (result_key) {
+ for (level = 1; level < tree_depth; level++) {
+ _result_key |= ((uint64_t) cur_key[level - 1])
+ << (JA_BITS_PER_BYTE * (tree_depth - level - 1));
+ }
+ *result_key = _result_key;
+ }
return (struct cds_ja_node *) node_flag;
}
+struct cds_ja_node *cds_ja_lookup_below_equal(struct cds_ja *ja,
+ uint64_t key, uint64_t *result_key)
+{
+ dbg_printf("cds_ja_lookup_below_equal key %" PRIu64 "\n", key);
+ return cds_ja_lookup_inequality(ja, key, result_key, JA_LOOKUP_BE);
+}
+
+struct cds_ja_node *cds_ja_lookup_above_equal(struct cds_ja *ja,
+ uint64_t key, uint64_t *result_key)
+{
+ dbg_printf("cds_ja_lookup_above_equal key %" PRIu64 "\n", key);
+ return cds_ja_lookup_inequality(ja, key, result_key, JA_LOOKUP_AE);
+}
+
/*
* We reached an unpopulated node. Create it and the children we need,
* and then attach the entire branch to the current node. This may
struct cds_ja_inode_flag *parent_node_flag,
struct cds_ja_inode_flag **node_flag_ptr,
struct cds_ja_inode_flag *node_flag,
+ struct cds_ja_node *last_node,
struct cds_ja_node *node)
{
struct cds_ja_shadow_node *shadow_node;
- int ret = 0;
+ struct cds_ja_node *iter_node;
+ int ret = 0, found = 0;
shadow_node = rcuja_shadow_lookup_lock(ja->ht, parent_node_flag);
if (!shadow_node) {
return -EAGAIN;
}
- if (ja_node_ptr(*node_flag_ptr) != ja_node_ptr(node_flag)) {
+ /*
+ * Ensure that previous node is still there at end of list.
+ */
+ iter_node = (struct cds_ja_node *) ja_node_ptr(node_flag);
+ if ((struct cds_ja_node *) ja_node_ptr(*node_flag_ptr) != iter_node) {
+ ret = -EAGAIN;
+ goto end;
+ }
+ cds_ja_for_each_duplicate(iter_node) {
+ if (iter_node == last_node)
+ found = 1;
+ }
+ if (!found) {
ret = -EAGAIN;
goto end;
}
/*
- * Add node to head of list. Safe against concurrent RCU read
- * traversals.
+ * Add node to tail of list to ensure that RCU traversals will
+ * always see either the prior node or the newly added if
+ * executed concurrently with a sequence of add followed by del
+ * on the same key. Safe against concurrent RCU read traversals.
*/
- node->next = (struct cds_ja_node *) node_flag;
- rcu_assign_pointer(*node_flag_ptr, (struct cds_ja_inode_flag *) node);
+ node->next = NULL;
+ rcu_assign_pointer(last_node->next, node);
end:
rcuja_shadow_unlock(shadow_node);
return ret;
**node_flag_ptr;
int ret;
- if (caa_unlikely(key > ja->key_max)) {
+ if (caa_unlikely(key > ja->key_max || key == UINT64_MAX)) {
return -EINVAL;
}
tree_depth = ja->tree_depth;
node_flag,
key, i, node);
} else {
+ struct cds_ja_node *iter_node, *last_node = NULL;
+
if (unique_node_ret) {
*unique_node_ret = (struct cds_ja_node *) ja_node_ptr(node_flag);
return -EEXIST;
}
+ /* Find last duplicate */
+ iter_node = (struct cds_ja_node *) ja_node_ptr(node_flag);
+ cds_ja_for_each_duplicate_rcu(iter_node)
+ last_node = iter_node;
+
dbg_printf("cds_ja_add duplicate parent2_node_flag %p parent_node_flag %p node_flag_ptr %p node_flag %p\n",
parent2_node_flag, parent_node_flag, node_flag_ptr, node_flag);
parent_attach_node_flag,
attach_node_flag_ptr,
attach_node_flag,
+ last_node,
node);
}
if (ret == -EAGAIN || ret == -EEXIST)
int nr_snapshot;
int ret;
- if (caa_unlikely(key > ja->key_max))
+ if (caa_unlikely(key > ja->key_max || key == UINT64_MAX))
return -EINVAL;
tree_depth = ja->tree_depth;
return NULL;
}
-/*
- * Called from RCU read-side CS.
- */
-__attribute__((visibility("protected")))
-void rcuja_free_all_children(struct cds_ja_shadow_node *shadow_node,
- struct cds_ja_inode_flag *node_flag,
- void (*rcu_free_node)(struct cds_ja_node *node))
-{
- unsigned int type_index;
- struct cds_ja_inode *node;
- const struct cds_ja_type *type;
-
- node = ja_node_ptr(node_flag);
- assert(node != NULL);
- type_index = ja_node_type(node_flag);
- type = &ja_types[type_index];
-
- switch (type->type_class) {
- case RCU_JA_LINEAR:
- {
- uint8_t nr_child =
- ja_linear_node_get_nr_child(type, node);
- unsigned int i;
-
- for (i = 0; i < nr_child; i++) {
- struct cds_ja_inode_flag *iter;
- struct cds_ja_node *node_iter, *n;
- uint8_t v;
-
- ja_linear_node_get_ith_pos(type, node, i, &v, &iter);
- node_iter = (struct cds_ja_node *) iter;
- cds_ja_for_each_duplicate_safe(node_iter, n) {
- rcu_free_node(node_iter);
- }
- }
- break;
- }
- case RCU_JA_POOL:
- {
- unsigned int pool_nr;
-
- for (pool_nr = 0; pool_nr < (1U << type->nr_pool_order); pool_nr++) {
- struct cds_ja_inode *pool =
- ja_pool_node_get_ith_pool(type, node, pool_nr);
- uint8_t nr_child =
- ja_linear_node_get_nr_child(type, pool);
- unsigned int j;
-
- for (j = 0; j < nr_child; j++) {
- struct cds_ja_inode_flag *iter;
- struct cds_ja_node *node_iter, *n;
- uint8_t v;
-
- ja_linear_node_get_ith_pos(type, pool, j, &v, &iter);
- node_iter = (struct cds_ja_node *) iter;
- cds_ja_for_each_duplicate_safe(node_iter, n) {
- rcu_free_node(node_iter);
- }
- }
- }
- break;
- }
- case RCU_JA_NULL:
- break;
- case RCU_JA_PIGEON:
- {
- unsigned int i;
-
- for (i = 0; i < JA_ENTRY_PER_NODE; i++) {
- struct cds_ja_inode_flag *iter;
- struct cds_ja_node *node_iter, *n;
-
- iter = ja_pigeon_node_get_ith_pos(type, node, i);
- node_iter = (struct cds_ja_node *) iter;
- cds_ja_for_each_duplicate_safe(node_iter, n) {
- rcu_free_node(node_iter);
- }
- }
- break;
- }
- default:
- assert(0);
- }
-}
-
static
void print_debug_fallback_distribution(struct cds_ja *ja)
{
unsigned long na, nf, nr_fallback;
int ret = 0;
+ if (!ja_debug_counters())
+ return 0;
+
fallback_ratio = (double) uatomic_read(&ja->nr_fallback);
fallback_ratio /= (double) uatomic_read(&ja->nr_nodes_allocated);
nr_fallback = uatomic_read(&ja->nr_fallback);
}
/*
- * There should be no more concurrent add to the judy array while it is
- * being destroyed (ensured by the caller).
+ * There should be no more concurrent add, delete, nor look-up performed
+ * on the Judy array while it is being destroyed (ensured by the
+ * caller).
*/
-int cds_ja_destroy(struct cds_ja *ja,
- void (*rcu_free_node)(struct cds_ja_node *node))
+int cds_ja_destroy(struct cds_ja *ja)
{
const struct rcu_flavor_struct *flavor;
int ret;
flavor = cds_lfht_rcu_flavor(ja->ht);
rcuja_shadow_prune(ja->ht,
- RCUJA_SHADOW_CLEAR_FREE_NODE | RCUJA_SHADOW_CLEAR_FREE_LOCK,
- rcu_free_node);
+ RCUJA_SHADOW_CLEAR_FREE_NODE | RCUJA_SHADOW_CLEAR_FREE_LOCK);
flavor->thread_offline();
ret = rcuja_delete_ht(ja->ht);
if (ret)