* order bits reverse
* 0 0 000 000
* |
- * 1 | 1 001 100 <-
- * | | |
- * 2 | | 2 010 010 |
- * | | | 3 011 110 <- |
- * | | | | | |
+ * 1 | 1 001 100 <- <-
+ * | | | |
+ * 2 | | 2 010 010 | |
+ * | | | 3 011 110 | <- |
+ * | | | | | | |
* 3 -> | | | 4 100 001 | |
* -> | | 5 101 101 |
* -> | 6 110 011
#define CHAIN_LEN_TARGET 1
#define CHAIN_LEN_RESIZE_THRESHOLD 3
+/*
+ * Define the minimum table size. Protects against hash table resize overload
+ * when too many entries are added quickly before the resize can complete.
+ * This is especially the case if the table could be shrinked to a size of 1.
+ * TODO: we might want to make the add/remove operations help the resize to
+ * add or remove dummy nodes when a resize is ongoing to ensure upper-bound on
+ * chain length.
+ */
+#define MIN_TABLE_SIZE 128
+
#ifndef max
#define max(a, b) ((a) > (b) ? (a) : (b))
#endif
cds_lfht_hash_fct hash_fct;
cds_lfht_compare_fct compare_fct;
unsigned long hash_seed;
+ int flags;
pthread_mutex_t resize_mutex; /* resize mutex: add/del mutex */
unsigned int in_progress_resize, in_progress_destroy;
void (*cds_lfht_call_rcu)(struct rcu_head *head,
{
unsigned long count;
+ if (!(ht->flags & CDS_LFHT_AUTO_RESIZE))
+ return;
count = uatomic_read(&ht->count);
/*
* Use bucket-local length for small table expand and for
{
struct cds_lfht_node *iter_prev, *iter, *next, *new_next;
+ assert(!is_dummy(dummy));
+ assert(!is_removed(dummy));
+ assert(!is_dummy(node));
+ assert(!is_removed(node));
for (;;) {
iter_prev = dummy;
/* We can always skip the dummy node initially */
struct _cds_lfht_node *lookup;
unsigned long hash, index, order;
+ assert(!is_dummy(node));
+ assert(!is_removed(node));
if (!t->size) {
assert(dummy);
node->p.next = flag_dummy(NULL);
*/
index = hash & (t->size - 1);
order = get_count_order_ulong(index + 1);
- lookup = &t->tbl[order]->nodes[index & ((1UL << (order - 1)) - 1)];
+ lookup = &t->tbl[order]->nodes[index & ((!order ? 0 : (1UL << (order - 1))) - 1)];
iter_prev = (struct cds_lfht_node *) lookup;
/* We can always skip the dummy node initially */
iter = rcu_dereference(iter_prev->p.next);
assert(iter_prev->p.reverse_hash <= node->p.reverse_hash);
for (;;) {
+ /* TODO: check if removed */
if (unlikely(!clear_flag(iter)))
goto insert;
+ /* TODO: check if removed */
if (likely(clear_flag(iter)->p.reverse_hash > node->p.reverse_hash))
goto insert;
next = rcu_dereference(clear_flag(iter)->p.next);
insert:
assert(node != clear_flag(iter));
assert(!is_removed(iter_prev));
+ assert(!is_removed(iter));
assert(iter_prev != node);
if (!dummy)
node->p.next = clear_flag(iter);
/* Garbage collect logically removed nodes in the bucket */
index = hash & (t->size - 1);
order = get_count_order_ulong(index + 1);
- lookup = &t->tbl[order]->nodes[index & ((1UL << (order - 1)) - 1)];
+ lookup = &t->tbl[order]->nodes[index & (!order ? 0 : ((1UL << (order - 1)) - 1))];
dummy_node = (struct cds_lfht_node *) lookup;
_cds_lfht_gc_bucket(dummy_node, node);
return node;
unsigned long hash, index, order;
/* logically delete the node */
+ assert(!is_dummy(node));
+ assert(!is_removed(node));
old = rcu_dereference(node->p.next);
do {
next = old;
/* We performed the (logical) deletion. */
flagged = 1;
- if (dummy_removal)
- node = clear_flag(node);
-
/*
* Ensure that the node is not visible to readers anymore: lookup for
* the node, and remove it (along with any other logically removed node)
* if found.
*/
hash = bit_reverse_ulong(node->p.reverse_hash);
- /*
- * When removing a dummy node, we need to consider the lower
- * order table, so we don't end up looking up the dummy nodes we
- * are currently removing.
- */
-
- if (dummy_removal)
- index = hash & ((t->size >> 1) - 1);
- else
- index = hash & (t->size - 1);
+ assert(t->size > 0);
+ index = hash & (t->size - 1);
order = get_count_order_ulong(index + 1);
- lookup = &t->tbl[order]->nodes[index & ((1UL << (order - 1)) - 1)];
+ lookup = &t->tbl[order]->nodes[index & (!order ? 0 : ((1UL << (order - 1)) - 1))];
dummy = (struct cds_lfht_node *) lookup;
_cds_lfht_gc_bucket(dummy, node);
end:
len = !i ? 1 : 1UL << (i - 1);
dbg_printf("fini order %lu len: %lu\n", i, len);
+ /*
+ * Update table size. Need to shrink this table prior to
+ * removal so gc lookups use non-logically-removed dummy
+ * nodes.
+ */
+ t->size = 1UL << (i - 2);
/* Unlink */
for (j = 0; j < len; j++) {
- struct cds_lfht_node *new_node =
+ struct cds_lfht_node *fini_node =
(struct cds_lfht_node *) &t->tbl[i]->nodes[j];
dbg_printf("fini entry: i %lu j %lu hash %lu\n",
i, j, !i ? 0 : (1UL << (i - 1)) + j);
- new_node->p.reverse_hash =
+ fini_node->p.reverse_hash =
bit_reverse_ulong(!i ? 0 : (1UL << (i - 1)) + j);
- (void) _cds_lfht_remove(ht, t, new_node, 1);
+ (void) _cds_lfht_remove(ht, t, fini_node, 1);
if (CMM_LOAD_SHARED(ht->in_progress_destroy))
break;
}
ht->cds_lfht_call_rcu(&t->tbl[i]->head, cds_lfht_free_level);
- /* Update table size */
- t->size = (i == 1) ? 0 : 1UL << (i - 2);
dbg_printf("fini new size: %lu\n", t->size);
if (CMM_LOAD_SHARED(ht->in_progress_destroy))
break;
cds_lfht_compare_fct compare_fct,
unsigned long hash_seed,
unsigned long init_size,
+ int flags,
void (*cds_lfht_call_rcu)(struct rcu_head *head,
void (*func)(struct rcu_head *head)),
void (*cds_lfht_synchronize_rcu)(void))
ht->percpu_count = alloc_per_cpu_items_count();
/* this mutex should not nest in read-side C.S. */
pthread_mutex_init(&ht->resize_mutex, NULL);
- order = get_count_order_ulong(max(init_size, 1)) + 1;
+ order = get_count_order_ulong(max(init_size, MIN_TABLE_SIZE)) + 1;
ht->t = calloc(1, sizeof(struct cds_lfht)
+ (order * sizeof(struct rcu_level *)));
ht->t->size = 0;
+ ht->flags = flags;
pthread_mutex_lock(&ht->resize_mutex);
init_table(ht, ht->t, 0, order);
pthread_mutex_unlock(&ht->resize_mutex);
t = rcu_dereference(ht->t);
index = hash & (t->size - 1);
order = get_count_order_ulong(index + 1);
- lookup = &t->tbl[order]->nodes[index & ((1UL << (order - 1)) - 1)];
+ lookup = &t->tbl[order]->nodes[index & (!order ? 0 : ((1UL << (order - 1))) - 1)];
dbg_printf("lookup hash %lu index %lu order %lu aridx %lu\n",
- hash, index, order, index & ((1UL << (order - 1)) - 1));
+ hash, index, order, index & (!order ? 0 : ((1UL << (order - 1)) - 1)));
node = (struct cds_lfht_node *) lookup;
for (;;) {
if (unlikely(!node))
unsigned long old_order, new_order;
struct rcu_table *new_t;
- new_size = max(new_size, 1);
+ new_size = max(new_size, MIN_TABLE_SIZE);
old_order = get_count_order_ulong(old_size) + 1;
new_order = get_count_order_ulong(new_size) + 1;
printf("resize from %lu (order %lu) to %lu (order %lu) buckets\n",
memcpy(&new_t->tbl, &old_t->tbl,
new_order * sizeof(struct rcu_level *));
new_t->size = !new_order ? 1 : (1UL << (new_order - 1));
+ assert(new_t->size == new_size);
new_t->resize_target = new_t->size;
new_t->resize_initiated = 0;
rcu_assign_pointer(ht->t, new_t);
/*
- * We need to wait for all reader threads to reach Q.S. (and
+ * We need to wait for all add operations to reach Q.S. (and
* thus use the new table for lookups) before we can start
- * releasing the old dummy nodes.
+ * releasing the old dummy nodes. Otherwise their lookup will
+ * return a logically removed node as insert position.
*/
ht->cds_lfht_synchronize_rcu();
}
static
-unsigned long resize_target_update_count(struct rcu_table *t,
- unsigned long count)
+void resize_target_update_count(struct rcu_table *t,
+ unsigned long count)
{
- count = max(count, 1);
- return uatomic_set(&t->resize_target, count);
+ count = max(count, MIN_TABLE_SIZE);
+ uatomic_set(&t->resize_target, count);
}
void cds_lfht_resize(struct cds_lfht *ht, unsigned long new_size)
{
struct rcu_table *t = rcu_dereference(ht->t);
- unsigned long target_size;
- target_size = resize_target_update_count(t, new_size);
+ resize_target_update_count(t, new_size);
CMM_STORE_SHARED(t->resize_initiated, 1);
pthread_mutex_lock(&ht->resize_mutex);
_do_cds_lfht_resize(ht);
unsigned long count)
{
struct rcu_resize_work *work;
- unsigned long target_size;
- target_size = resize_target_update_count(t, count);
+ if (!(ht->flags & CDS_LFHT_AUTO_RESIZE))
+ return;
+ resize_target_update_count(t, count);
if (!CMM_LOAD_SHARED(t->resize_initiated)) {
uatomic_inc(&ht->in_progress_resize);
cmm_smp_mb(); /* increment resize count before calling it */