rcuja: lookup lower equal cannot reach dead-end
[userspace-rcu.git] / rcuja / rcuja.c
index e5145ea36a3b22dbec54cd4a45eeefe7334e343a..74bb3910a384b67cc410ff09a12c953447b3cded 100644 (file)
@@ -34,7 +34,6 @@
 #include <stdint.h>
 
 #include "rcuja-internal.h"
-#include "bitfield.h"
 
 #ifndef abs
 #define abs_int(a)     ((int) (a) > 0 ? (int) (a) : -((int) (a)))
@@ -128,7 +127,7 @@ const struct cds_ja_type ja_types[] = {
         * Upon node removal below min_child, if child pool is filled
         * beyond capacity, we roll back to pigeon.
         */
-       { .type_class = RCU_JA_PIGEON, .min_child = 89, .max_child = ja_type_7_max_child, .order = 10, },
+       { .type_class = RCU_JA_PIGEON, .min_child = 83, .max_child = ja_type_7_max_child, .order = 10, },
 
        { .type_class = RCU_JA_NULL, .min_child = 0, .max_child = ja_type_8_max_child, },
 };
@@ -176,7 +175,7 @@ const struct cds_ja_type ja_types[] = {
         * Upon node removal below min_child, if child pool is filled
         * beyond capacity, we roll back to pigeon.
         */
-       { .type_class = RCU_JA_PIGEON, .min_child = 101, .max_child = ja_type_7_max_child, .order = 11, },
+       { .type_class = RCU_JA_PIGEON, .min_child = 95, .max_child = ja_type_7_max_child, .order = 11, },
 
        { .type_class = RCU_JA_NULL, .min_child = 0, .max_child = ja_type_8_max_child, },
 };
@@ -244,11 +243,6 @@ enum ja_recompact {
        JA_RECOMPACT_DEL,
 };
 
-static
-unsigned long node_fallback_count_distribution[JA_ENTRY_PER_NODE];
-static
-unsigned long nr_nodes_allocated, nr_nodes_freed;
-
 static
 struct cds_ja_inode *_ja_node_mask_ptr(struct cds_ja_inode_flag *node)
 {
@@ -291,7 +285,9 @@ struct cds_ja_inode *ja_node_ptr(struct cds_ja_inode_flag *node)
        }
 }
 
-struct cds_ja_inode *alloc_cds_ja_node(const struct cds_ja_type *ja_type)
+static
+struct cds_ja_inode *alloc_cds_ja_node(struct cds_ja *ja,
+               const struct cds_ja_type *ja_type)
 {
        size_t len = 1U << ja_type->order;
        void *p;
@@ -302,15 +298,15 @@ struct cds_ja_inode *alloc_cds_ja_node(const struct cds_ja_type *ja_type)
                return NULL;
        }
        memset(p, 0, len);
-       uatomic_inc(&nr_nodes_allocated);
+       uatomic_inc(&ja->nr_nodes_allocated);
        return p;
 }
 
-void free_cds_ja_node(struct cds_ja_inode *node)
+void free_cds_ja_node(struct cds_ja *ja, struct cds_ja_inode *node)
 {
        free(node);
        if (node)
-               uatomic_inc(&nr_nodes_freed);
+               uatomic_inc(&ja->nr_nodes_freed);
 }
 
 #define __JA_ALIGN_MASK(v, mask)       (((v) + (mask)) & ~(mask))
@@ -373,6 +369,43 @@ struct cds_ja_inode_flag *ja_linear_node_get_nth(const struct cds_ja_type *type,
        return ptr;
 }
 
+static
+struct cds_ja_inode_flag *ja_linear_node_get_left(const struct cds_ja_type *type,
+               struct cds_ja_inode *node,
+               unsigned int n)
+{
+       uint8_t nr_child;
+       uint8_t *values;
+       struct cds_ja_inode_flag **pointers;
+       struct cds_ja_inode_flag *ptr;
+       unsigned int i, match_idx;
+       int match_v = -1;
+
+       assert(type->type_class == RCU_JA_LINEAR || type->type_class == RCU_JA_POOL);
+
+       nr_child = ja_linear_node_get_nr_child(type, node);
+       cmm_smp_rmb();  /* read nr_child before values and pointers */
+       assert(nr_child <= type->max_linear_child);
+       assert(type->type_class != RCU_JA_LINEAR || nr_child >= type->min_child);
+
+       values = &node->u.data[1];
+       for (i = 0; i < nr_child; i++) {
+               unsigned int v;
+
+               v = CMM_LOAD_SHARED(values[i]);
+               if (v < n && (int) v > match_v) {
+                       match_v = v;
+                       match_idx = i;
+               }
+       }
+       if (match_v < 0) {
+               return NULL;
+       }
+       pointers = (struct cds_ja_inode_flag **) align_ptr_size(&values[type->max_linear_child]);
+       ptr = rcu_dereference(pointers[match_idx]);
+       return ptr;
+}
+
 static
 void ja_linear_node_get_ith_pos(const struct cds_ja_type *type,
                struct cds_ja_inode *node,
@@ -445,6 +478,42 @@ struct cds_ja_inode *ja_pool_node_get_ith_pool(const struct cds_ja_type *type,
                &node->u.data[(unsigned int) i << type->pool_size_order];
 }
 
+static
+struct cds_ja_inode_flag *ja_pool_node_get_left(const struct cds_ja_type *type,
+               struct cds_ja_inode *node,
+               unsigned int n)
+{
+       unsigned int pool_nr;
+       int match_v = -1;
+       struct cds_ja_inode_flag *match_node_flag = NULL;
+
+       assert(type->type_class == RCU_JA_POOL);
+
+       for (pool_nr = 0; pool_nr < (1U << type->nr_pool_order); pool_nr++) {
+               struct cds_ja_inode *pool =
+                       ja_pool_node_get_ith_pool(type,
+                               node, pool_nr);
+               uint8_t nr_child =
+                       ja_linear_node_get_nr_child(type, pool);
+               unsigned int j;
+
+               for (j = 0; j < nr_child; j++) {
+                       struct cds_ja_inode_flag *iter;
+                       uint8_t v;
+
+                       ja_linear_node_get_ith_pos(type, pool,
+                                       j, &v, &iter);
+                       if (!iter)
+                               continue;
+                       if (v < n && (int) v > match_v) {
+                               match_v = v;
+                               match_node_flag = iter;
+                       }
+               }
+       }
+       return match_node_flag;
+}
+
 static
 struct cds_ja_inode_flag *ja_pigeon_node_get_nth(const struct cds_ja_type *type,
                struct cds_ja_inode *node,
@@ -464,6 +533,30 @@ struct cds_ja_inode_flag *ja_pigeon_node_get_nth(const struct cds_ja_type *type,
        return child_node_flag;
 }
 
+static
+struct cds_ja_inode_flag *ja_pigeon_node_get_left(const struct cds_ja_type *type,
+               struct cds_ja_inode *node,
+               unsigned int n)
+{
+       struct cds_ja_inode_flag **child_node_flag_ptr;
+       struct cds_ja_inode_flag *child_node_flag;
+       int i;
+
+       assert(type->type_class == RCU_JA_PIGEON);
+
+       /* n - 1 is first value left of n */
+       for (i = n - 1; i >= 0; i--) {
+               child_node_flag_ptr = &((struct cds_ja_inode_flag **) node->u.data)[i];
+               child_node_flag = rcu_dereference(*child_node_flag_ptr);
+               if (child_node_flag) {
+                       dbg_printf("ja_pigeon_node_get_left child_node_flag %p\n",
+                               child_node_flag);
+                       return child_node_flag;
+               }
+       }
+       return NULL;
+}
+
 static
 struct cds_ja_inode_flag *ja_pigeon_node_get_ith_pos(const struct cds_ja_type *type,
                struct cds_ja_inode *node,
@@ -506,6 +599,38 @@ struct cds_ja_inode_flag *ja_node_get_nth(struct cds_ja_inode_flag *node_flag,
        }
 }
 
+static
+struct cds_ja_inode_flag *ja_node_get_left(struct cds_ja_inode_flag *node_flag,
+               unsigned int n)
+{
+       unsigned int type_index;
+       struct cds_ja_inode *node;
+       const struct cds_ja_type *type;
+
+       node = ja_node_ptr(node_flag);
+       assert(node != NULL);
+       type_index = ja_node_type(node_flag);
+       type = &ja_types[type_index];
+
+       switch (type->type_class) {
+       case RCU_JA_LINEAR:
+               return ja_linear_node_get_left(type, node, n);
+       case RCU_JA_POOL:
+               return ja_pool_node_get_left(type, node, n);
+       case RCU_JA_PIGEON:
+               return ja_pigeon_node_get_left(type, node, n);
+       default:
+               assert(0);
+               return (void *) -1UL;
+       }
+}
+
+static
+struct cds_ja_inode_flag *ja_node_get_rightmost(struct cds_ja_inode_flag *node_flag)
+{
+       return ja_node_get_left(node_flag, JA_ENTRY_PER_NODE);
+}
+
 static
 int ja_linear_node_set_nth(const struct cds_ja_type *type,
                struct cds_ja_inode *node,
@@ -1238,7 +1363,7 @@ retry:            /* for fallback */
                        old_type_index, new_type_index);
        new_type = &ja_types[new_type_index];
        if (new_type_index != NODE_INDEX_NULL) {
-               new_node = alloc_cds_ja_node(new_type);
+               new_node = alloc_cds_ja_node(ja, new_type);
                if (!new_node)
                        return -ENOMEM;
 
@@ -1285,7 +1410,7 @@ retry:            /* for fallback */
                dbg_printf("Recompact inherit lock from %p\n", shadow_node);
                new_shadow_node = rcuja_shadow_set(ja->ht, new_node_flag, shadow_node, ja, level);
                if (!new_shadow_node) {
-                       free_cds_ja_node(new_node);
+                       free_cds_ja_node(ja, new_node);
                        return -ENOMEM;
                }
                if (fallback)
@@ -1409,7 +1534,7 @@ skip_copy:
                dbg_printf("Using fallback for %u children, node type index: %u, mode %s\n",
                        new_shadow_node->nr_child, old_type_index, mode == JA_RECOMPACT_ADD_NEXT ? "add_next" :
                                (mode == JA_RECOMPACT_DEL ? "del" : "add_same"));
-               uatomic_inc(&node_fallback_count_distribution[new_shadow_node->nr_child]);
+               uatomic_inc(&ja->node_fallback_count_distribution[new_shadow_node->nr_child]);
        }
 
        /* Return pointer to new recompacted node through old_node_flag_ptr */
@@ -1613,6 +1738,88 @@ struct cds_hlist_head cds_ja_lookup(struct cds_ja *ja, uint64_t key)
        return head;
 }
 
+struct cds_hlist_head cds_ja_lookup_lower_equal(struct cds_ja *ja, uint64_t key)
+{
+       int tree_depth, level;
+       struct cds_ja_inode_flag *node_flag, *cur_node_depth[JA_MAX_DEPTH];
+       struct cds_hlist_head head = { NULL };
+
+       if (caa_unlikely(key > ja->key_max || !key))
+               return head;
+
+       memset(cur_node_depth, 0, sizeof(cur_node_depth));
+       tree_depth = ja->tree_depth;
+       node_flag = rcu_dereference(ja->root);
+       cur_node_depth[0] = node_flag;
+
+       /* level 0: root node */
+       if (!ja_node_ptr(node_flag))
+               return head;
+
+       for (level = 1; level < tree_depth; level++) {
+               uint8_t iter_key;
+
+               iter_key = (uint8_t) (key >> (JA_BITS_PER_BYTE * (tree_depth - level - 1)));
+               node_flag = ja_node_get_nth(node_flag, NULL, iter_key);
+               if (!ja_node_ptr(node_flag))
+                       break;
+               cur_node_depth[level] = node_flag;
+               dbg_printf("cds_ja_lookup iter key lookup %u finds node_flag %p\n",
+                               (unsigned int) iter_key, node_flag);
+       }
+
+       if (level == tree_depth) {
+               /* Last level lookup succeded. We got an equal match. */
+               head.next = (struct cds_hlist_node *) node_flag;
+               return head;
+       }
+
+       /*
+        * Find highest value left of current node.
+        * Current node is cur_node_depth[level].
+        * Start at current level. If we cannot find any key left of
+        * ours, go one level up, seek highest value left of current
+        * (recursively), and when we find one, get the rightmost child
+        * of its rightmost child (recursively).
+        */
+       for (; level > 0; level--) {
+               uint8_t iter_key;
+
+               iter_key = (uint8_t) (key >> (JA_BITS_PER_BYTE * (tree_depth - level - 1)));
+               node_flag = ja_node_get_left(cur_node_depth[level - 1],
+                               iter_key);
+               /* If found left sibling, find rightmost child. */
+               if (ja_node_ptr(node_flag))
+                       break;
+       }
+
+       if (!level) {
+               /* Reached the root and could not find a left sibling. */
+               return head;
+       }
+
+       level++;
+
+       /*
+        * From this point, we are guaranteed to be able to find a
+        * "lower than" match. ja_detach_node() guarantees that it is
+        * not possible for a lookup to reach a dead-end.
+        */
+
+       /* Find rightmost child of rightmost child (recursively). */
+       for (; level < tree_depth; level++) {
+               node_flag = ja_node_get_rightmost(node_flag);
+               /* If found left sibling, find rightmost child. */
+               if (!ja_node_ptr(node_flag))
+                       break;
+       }
+
+       assert(level == tree_depth);
+
+       head.next = (struct cds_hlist_node *) node_flag;
+       return head;
+}
+
 /*
  * We reached an unpopulated node. Create it and the children we need,
  * and then attach the entire branch to the current node. This may
@@ -1674,6 +1881,27 @@ int ja_attach_node(struct cds_ja *ja,
                goto unlock_parent;
        }
 
+       /*
+        * Perform a lookup query to handle the case where
+        * old_node_flag_ptr is NULL. We cannot use it to check if the
+        * node has been populated between RCU lookup and mutex
+        * acquisition.
+        */
+       if (!old_node_flag_ptr) {
+               uint8_t iter_key;
+               struct cds_ja_inode_flag *lookup_node_flag;
+               struct cds_ja_inode_flag **lookup_node_flag_ptr;
+
+               iter_key = (uint8_t) (key >> (JA_BITS_PER_BYTE * (ja->tree_depth - level)));
+               lookup_node_flag = ja_node_get_nth(attach_node_flag,
+                       &lookup_node_flag_ptr,
+                       iter_key);
+               if (lookup_node_flag) {
+                       ret = -EEXIST;
+                       goto unlock_parent;
+               }
+       }
+
        if (attach_node_flag_ptr && ja_node_ptr(*attach_node_flag_ptr) !=
                        ja_node_ptr(attach_node_flag)) {
                /*
@@ -1701,8 +1929,10 @@ int ja_attach_node(struct cds_ja *ja,
                        iter_key,
                        iter_node_flag,
                        NULL, i);
-               if (ret)
+               if (ret) {
+                       dbg_printf("branch creation error %d\n", ret);
                        goto check_error;
+               }
                created_nodes[nr_created_nodes++] = iter_dest_node_flag;
                iter_node_flag = iter_dest_node_flag;
        }
@@ -1726,8 +1956,10 @@ int ja_attach_node(struct cds_ja *ja,
                        iter_key,
                        iter_node_flag,
                        shadow_node, level - 1);
-               if (ret)
+               if (ret) {
+                       dbg_printf("branch publish error %d\n", ret);
                        goto check_error;
+               }
                /*
                 * Attach branch
                 */
@@ -1793,8 +2025,10 @@ end:
        return ret;
 }
 
-int cds_ja_add(struct cds_ja *ja, uint64_t key,
-               struct cds_ja_node *new_node)
+static
+int _cds_ja_add(struct cds_ja *ja, uint64_t key,
+               struct cds_ja_node *new_node,
+               struct cds_ja_node **unique_node_ret)
 {
        unsigned int tree_depth, i;
        struct cds_ja_inode_flag *attach_node_flag,
@@ -1847,6 +2081,7 @@ retry:
        if (!ja_node_ptr(node_flag)) {
                dbg_printf("cds_ja_add NULL parent2_node_flag %p parent_node_flag %p node_flag_ptr %p node_flag %p\n",
                                parent2_node_flag, parent_node_flag, node_flag_ptr, node_flag);
+
                attach_node_flag = parent_node_flag;
                attach_node_flag_ptr = parent_node_flag_ptr;
                parent_attach_node_flag = parent2_node_flag;
@@ -1858,8 +2093,14 @@ retry:
                                node_flag,
                                key, i, new_node);
        } else {
+               if (unique_node_ret) {
+                       *unique_node_ret = (struct cds_ja_node *) ja_node_ptr(node_flag);
+                       return -EEXIST;
+               }
+
                dbg_printf("cds_ja_add duplicate parent2_node_flag %p parent_node_flag %p node_flag_ptr %p node_flag %p\n",
                                parent2_node_flag, parent_node_flag, node_flag_ptr, node_flag);
+
                attach_node_flag = node_flag;
                attach_node_flag_ptr = node_flag_ptr;
                parent_attach_node_flag = parent_node_flag;
@@ -1876,6 +2117,25 @@ retry:
        return ret;
 }
 
+int cds_ja_add(struct cds_ja *ja, uint64_t key,
+               struct cds_ja_node *new_node)
+{
+       return _cds_ja_add(ja, key, new_node, NULL);
+}
+
+struct cds_ja_node *cds_ja_add_unique(struct cds_ja *ja, uint64_t key,
+               struct cds_ja_node *new_node)
+{
+       int ret;
+       struct cds_ja_node *ret_node;
+
+       ret = _cds_ja_add(ja, key, new_node, &ret_node);
+       if (ret == -EEXIST)
+               return ret_node;
+       else
+               return new_node;
+}
+
 /*
  * Note: there is no need to lookup the pointer address associated with
  * each node's nth item after taking the lock: it's already been done by
@@ -1886,6 +2146,11 @@ retry:
  * However, when a child is removed from "linear" nodes, its pointer
  * is set to NULL. We therefore check, while holding the locks, if this
  * pointer is NULL, and return -ENOENT to the caller if it is the case.
+ *
+ * ja_detach_node() ensures that a lookup will _never_ see a branch that
+ * leads to a dead-end: when removing branch, it makes sure to perform
+ * the "cut" at the highest node that has only one child, effectively
+ * replacing it with a NULL pointer.
  */
 static
 int ja_detach_node(struct cds_ja *ja,
@@ -2312,7 +2577,7 @@ void rcuja_free_all_children(struct cds_ja_shadow_node *shadow_node,
                                struct cds_hlist_node *pos, *tmp;
                                uint8_t v;
 
-                               ja_linear_node_get_ith_pos(type, node, j, &v, &iter);
+                               ja_linear_node_get_ith_pos(type, pool, j, &v, &iter);
                                if (!iter)
                                        continue;
                                head.next = (struct cds_hlist_node *) iter;
@@ -2351,17 +2616,47 @@ void rcuja_free_all_children(struct cds_ja_shadow_node *shadow_node,
 }
 
 static
-void print_debug_fallback_distribution(void)
+void print_debug_fallback_distribution(struct cds_ja *ja)
 {
        int i;
 
        fprintf(stderr, "Fallback node distribution:\n");
        for (i = 0; i < JA_ENTRY_PER_NODE; i++) {
-               if (!node_fallback_count_distribution[i])
+               if (!ja->node_fallback_count_distribution[i])
                        continue;
                fprintf(stderr, "       %3u: %4lu\n",
-                       i, node_fallback_count_distribution[i]);
+                       i, ja->node_fallback_count_distribution[i]);
+       }
+}
+
+static
+int ja_final_checks(struct cds_ja *ja)
+{
+       double fallback_ratio;
+       unsigned long na, nf, nr_fallback;
+       int ret = 0;
+
+       fallback_ratio = (double) uatomic_read(&ja->nr_fallback);
+       fallback_ratio /= (double) uatomic_read(&ja->nr_nodes_allocated);
+       nr_fallback = uatomic_read(&ja->nr_fallback);
+       if (nr_fallback)
+               fprintf(stderr,
+                       "[warning] RCU Judy Array used %lu fallback node(s) (ratio: %g)\n",
+                       uatomic_read(&ja->nr_fallback),
+                       fallback_ratio);
+
+       na = uatomic_read(&ja->nr_nodes_allocated);
+       nf = uatomic_read(&ja->nr_nodes_freed);
+       dbg_printf("Nodes allocated: %lu, Nodes freed: %lu.\n", na, nf);
+       if (nr_fallback)
+               print_debug_fallback_distribution(ja);
+
+       if (na != nf) {
+               fprintf(stderr, "[error] Judy array leaked %ld nodes. Allocated: %lu, freed: %lu.\n",
+                       (long) na - nf, na, nf);
+               ret = -1;
        }
+       return ret;
 }
 
 /*
@@ -2382,16 +2677,12 @@ int cds_ja_destroy(struct cds_ja *ja,
        ret = rcuja_delete_ht(ja->ht);
        if (ret)
                return ret;
+
+       /* Wait for in-flight call_rcu free to complete. */
+       flavor->barrier();
+
        flavor->thread_online();
-       if (uatomic_read(&ja->nr_fallback))
-               fprintf(stderr,
-                       "[warning] RCU Judy Array used %lu fallback node(s)\n",
-                       uatomic_read(&ja->nr_fallback));
-       fprintf(stderr, "Nodes allocated: %lu, Nodes freed: %lu. Fallback ratio: %g\n",
-               uatomic_read(&nr_nodes_allocated),
-               uatomic_read(&nr_nodes_freed),
-               (double) uatomic_read(&ja->nr_fallback) / (double) uatomic_read(&nr_nodes_allocated));
-       print_debug_fallback_distribution();
+       ret = ja_final_checks(ja);
        free(ja);
-       return 0;
+       return ret;
 }
This page took 0.029191 seconds and 4 git commands to generate.