* the "dummy node" tables.
* - There is one dummy node table per hash index order. The size of
* each dummy node table is half the number of hashes contained in
- * this order.
- * - call_rcu is used to garbage-collect the old order table.
+ * this order (except for order 0).
+ * - synchronzie_rcu is used to garbage-collect the old dummy node table.
* - The per-order dummy node tables contain a compact version of the
* hash table nodes. These tables are invariant after they are
* populated into the hash table.
- *
+ *
+ * Dummy node tables:
+ *
+ * hash table hash table the last all dummy node tables
+ * order size dummy node 0 1 2 3 4 5 6(index)
+ * table size
+ * 0 1 1 1
+ * 1 2 1 1 1
+ * 2 4 2 1 1 2
+ * 3 8 4 1 1 2 4
+ * 4 16 8 1 1 2 4 8
+ * 5 32 16 1 1 2 4 8 16
+ * 6 64 32 1 1 2 4 8 16 32
+ *
+ * When growing/shrinking, we only focus on the last dummy node table
+ * which size is (!order ? 1 : (1 << (order -1))).
+ *
+ * Example for growing/shrinking:
+ * grow hash table from order 5 to 6: init the index=6 dummy node table
+ * shrink hash table from order 6 to 5: fini the index=6 dummy node table
+ *
* A bit of ascii art explanation:
*
* Order index is the off-by-one compare to the actual power of 2 because
*
* order bits reverse
* 0 0 000 000
- * |
- * 1 | 1 001 100 <- <-
- * | | | |
- * 2 | | 2 010 010 | |
+ * 1 | 1 001 100 <-
+ * 2 | | 2 010 010 <- |
* | | | 3 011 110 | <- |
- * | | | | | | |
* 3 -> | | | 4 100 001 | |
* -> | | 5 101 101 |
* -> | 6 110 011
goto insert;
if (likely(clear_flag(iter)->p.reverse_hash > node->p.reverse_hash))
goto insert;
+
/* dummy node is the first node of the identical-hash-value chain */
if (dummy && clear_flag(iter)->p.reverse_hash == node->p.reverse_hash)
goto insert;
+
next = rcu_dereference(clear_flag(iter)->p.next);
if (unlikely(is_removed(next)))
goto gc_node;
+
+ /* uniquely add */
if (unique_ret
&& !is_dummy(next)
- && clear_flag(iter)->p.reverse_hash == node->p.reverse_hash
- && !ht->compare_fct(node->key, node->key_len,
- clear_flag(iter)->key,
- clear_flag(iter)->key_len)) {
- unique_ret->node = clear_flag(iter);
- unique_ret->next = next;
+ && clear_flag(iter)->p.reverse_hash == node->p.reverse_hash) {
+ struct cds_lfht_iter d_iter = { .node = node, .next = iter, };
+
+ /*
+ * uniquely adding inserts the node as the first
+ * node of the identical-hash-value node chain.
+ *
+ * This semantic ensures no duplicated keys
+ * should ever be observable in the table
+ * (including observe one node by one node
+ * by forward iterations)
+ */
+ cds_lfht_next_duplicate(ht, &d_iter);
+ if (!d_iter.node)
+ goto insert;
+
+ *unique_ret = d_iter;
return;
}
+
/* Only account for identical reverse hash once */
if (iter_prev->p.reverse_hash != clear_flag(iter)->p.reverse_hash
&& !is_dummy(next))
static
void init_table(struct cds_lfht *ht,
- unsigned long first_order, unsigned long len_order)
+ unsigned long first_order, unsigned long last_order)
{
- unsigned long i, end_order;
+ unsigned long i;
- dbg_printf("init table: first_order %lu end_order %lu\n",
- first_order, first_order + len_order);
- end_order = first_order + len_order;
- for (i = first_order; i < end_order; i++) {
+ dbg_printf("init table: first_order %lu last_order %lu\n",
+ first_order, last_order);
+ for (i = first_order; i <= last_order; i++) {
unsigned long len;
len = !i ? 1 : 1UL << (i - 1);
static
void fini_table(struct cds_lfht *ht,
- unsigned long first_order, unsigned long len_order)
+ unsigned long first_order, unsigned long last_order)
{
- long i, end_order;
+ long i;
void *free_by_rcu = NULL;
- dbg_printf("fini table: first_order %lu end_order %lu\n",
- first_order, first_order + len_order);
- end_order = first_order + len_order;
+ dbg_printf("fini table: first_order %lu last_order %lu\n",
+ first_order, last_order);
assert(first_order > 0);
- for (i = end_order - 1; i >= first_order; i--) {
+ for (i = last_order; i >= first_order; i--) {
unsigned long len;
len = !i ? 1 : 1UL << (i - 1);
}
}
+static
+void cds_lfht_create_dummy(struct cds_lfht *ht, unsigned long size)
+{
+ struct _cds_lfht_node *prev, *node;
+ unsigned long order, len, i, j;
+
+ ht->t.tbl[0] = calloc(1, sizeof(struct _cds_lfht_node));
+ assert(ht->t.tbl[0]);
+
+ dbg_printf("create dummy: order %lu index %lu hash %lu\n", 0, 0, 0);
+ ht->t.tbl[0]->nodes[0].next = flag_dummy(get_end());
+ ht->t.tbl[0]->nodes[0].reverse_hash = 0;
+
+ for (order = 1; order < get_count_order_ulong(size) + 1; order++) {
+ len = 1UL << (order - 1);
+ ht->t.tbl[order] = calloc(1, len * sizeof(struct _cds_lfht_node));
+ assert(ht->t.tbl[order]);
+
+ i = 0;
+ prev = ht->t.tbl[i]->nodes;
+ for (j = 0; j < len; j++) {
+ if (j & (j - 1)) { /* Between power of 2 */
+ prev++;
+ } else if (j) { /* At each power of 2 */
+ i++;
+ prev = ht->t.tbl[i]->nodes;
+ }
+
+ node = &ht->t.tbl[order]->nodes[j];
+ dbg_printf("create dummy: order %lu index %lu hash %lu\n",
+ order, j, j + len);
+ node->next = prev->next;
+ assert(is_dummy(node->next));
+ node->reverse_hash = bit_reverse_ulong(j + len);
+ prev->next = flag_dummy((struct cds_lfht_node *)node);
+ }
+ }
+}
+
struct cds_lfht *_cds_lfht_new(cds_lfht_hash_fct hash_fct,
cds_lfht_compare_fct compare_fct,
unsigned long hash_seed,
ht->percpu_count = alloc_per_cpu_items_count();
/* this mutex should not nest in read-side C.S. */
pthread_mutex_init(&ht->resize_mutex, NULL);
- order = get_count_order_ulong(max(init_size, MIN_TABLE_SIZE)) + 1;
ht->flags = flags;
- ht->cds_lfht_rcu_thread_offline();
- pthread_mutex_lock(&ht->resize_mutex);
- ht->t.resize_target = 1UL << (order - 1);
- init_table(ht, 0, order);
- pthread_mutex_unlock(&ht->resize_mutex);
- ht->cds_lfht_rcu_thread_online();
+ order = get_count_order_ulong(max(init_size, MIN_TABLE_SIZE));
+ ht->t.resize_target = 1UL << order;
+ cds_lfht_create_dummy(ht, 1UL << order);
+ ht->t.size = 1UL << order;
return ht;
}
{
unsigned long old_order, new_order;
- old_order = get_count_order_ulong(old_size) + 1;
- new_order = get_count_order_ulong(new_size) + 1;
+ old_order = get_count_order_ulong(old_size);
+ new_order = get_count_order_ulong(new_size);
dbg_printf("resize from %lu (order %lu) to %lu (order %lu) buckets\n",
old_size, old_order, new_size, new_order);
assert(new_size > old_size);
- init_table(ht, old_order, new_order - old_order);
+ init_table(ht, old_order + 1, new_order);
}
/* called with resize mutex held */
unsigned long old_order, new_order;
new_size = max(new_size, MIN_TABLE_SIZE);
- old_order = get_count_order_ulong(old_size) + 1;
- new_order = get_count_order_ulong(new_size) + 1;
+ old_order = get_count_order_ulong(old_size);
+ new_order = get_count_order_ulong(new_size);
dbg_printf("resize from %lu (order %lu) to %lu (order %lu) buckets\n",
old_size, old_order, new_size, new_order);
assert(new_size < old_size);
/* Remove and unlink all dummy nodes to remove. */
- fini_table(ht, new_order, old_order - new_order);
+ fini_table(ht, new_order + 1, old_order);
}