+ dbg_printf("init table: first_order %lu last_order %lu\n",
+ first_order, last_order);
+ assert(first_order > MIN_TABLE_ORDER);
+ for (i = first_order; i <= last_order; i++) {
+ unsigned long len;
+
+ len = 1UL << (i - 1);
+ dbg_printf("init order %lu len: %lu\n", i, len);
+
+ /* Stop expand if the resize target changes under us */
+ if (CMM_LOAD_SHARED(ht->resize_target) < (1UL << i))
+ break;
+
+ cds_lfht_alloc_bucket_table(ht, i);
+
+ /*
+ * Set all bucket nodes reverse hash values for a level and
+ * link all bucket nodes into the table.
+ */
+ init_table_populate(ht, i, len);
+
+ /*
+ * Update table size.
+ */
+ cmm_smp_wmb(); /* populate data before RCU size */
+ CMM_STORE_SHARED(ht->size, 1UL << i);
+
+ dbg_printf("init new size: %lu\n", 1UL << i);
+ if (CMM_LOAD_SHARED(ht->in_progress_destroy))
+ break;
+ }
+}
+
+/*
+ * Holding RCU read lock to protect _cds_lfht_remove against memory
+ * reclaim that could be performed by other call_rcu worker threads (ABA
+ * problem).
+ * For a single level, we logically remove and garbage collect each node.
+ *
+ * As a design choice, we perform logical removal and garbage collection on a
+ * node-per-node basis to simplify this algorithm. We also assume keeping good
+ * cache locality of the operation would overweight possible performance gain
+ * that could be achieved by batching garbage collection for multiple levels.
+ * However, this would have to be justified by benchmarks.
+ *
+ * Concurrent removal and add operations are helping us perform garbage
+ * collection of logically removed nodes. We guarantee that all logically
+ * removed nodes have been garbage-collected (unlinked) before call_rcu is
+ * invoked to free a hole level of bucket nodes (after a grace period).
+ *
+ * Logical removal and garbage collection can therefore be done in batch or on a
+ * node-per-node basis, as long as the guarantee above holds.
+ *
+ * When we reach a certain length, we can split this removal over many worker
+ * threads, based on the number of CPUs available in the system. This should
+ * take care of not letting resize process lag behind too many concurrent
+ * updater threads actively inserting into the hash table.
+ */
+static
+void remove_table_partition(struct cds_lfht *ht, unsigned long i,
+ unsigned long start, unsigned long len)
+{
+ unsigned long j, size = 1UL << (i - 1);
+
+ assert(i > MIN_TABLE_ORDER);
+ ht->flavor->read_lock();
+ for (j = size + start; j < size + start + len; j++) {
+ struct cds_lfht_node *fini_bucket = bucket_at(ht, j);
+ struct cds_lfht_node *parent_bucket = bucket_at(ht, j - size);
+
+ assert(j >= size && j < (size << 1));
+ dbg_printf("remove entry: order %lu index %lu hash %lu\n",
+ i, j, j);
+ /* Set the REMOVED_FLAG to freeze the ->next for gc */
+ uatomic_or(&fini_bucket->next, REMOVED_FLAG);
+ _cds_lfht_gc_bucket(parent_bucket, fini_bucket);
+ }
+ ht->flavor->read_unlock();
+}
+
+static
+void remove_table(struct cds_lfht *ht, unsigned long i, unsigned long len)
+{
+
+ assert(nr_cpus_mask != -1);
+ if (nr_cpus_mask < 0 || len < 2 * MIN_PARTITION_PER_THREAD) {
+ ht->flavor->thread_online();
+ remove_table_partition(ht, i, 0, len);
+ ht->flavor->thread_offline();
+ return;
+ }
+ partition_resize_helper(ht, i, len, remove_table_partition);
+}
+
+/*
+ * fini_table() is never called for first_order == 0, which is why
+ * free_by_rcu_order == 0 can be used as criterion to know if free must
+ * be called.
+ */
+static
+void fini_table(struct cds_lfht *ht,
+ unsigned long first_order, unsigned long last_order)
+{
+ long i;
+ unsigned long free_by_rcu_order = 0;
+
+ dbg_printf("fini table: first_order %lu last_order %lu\n",
+ first_order, last_order);
+ assert(first_order > MIN_TABLE_ORDER);
+ for (i = last_order; i >= first_order; i--) {
+ unsigned long len;
+
+ len = 1UL << (i - 1);
+ dbg_printf("fini order %lu len: %lu\n", i, len);
+
+ /* Stop shrink if the resize target changes under us */
+ if (CMM_LOAD_SHARED(ht->resize_target) > (1UL << (i - 1)))
+ break;
+
+ cmm_smp_wmb(); /* populate data before RCU size */
+ CMM_STORE_SHARED(ht->size, 1UL << (i - 1));
+
+ /*
+ * We need to wait for all add operations to reach Q.S. (and
+ * thus use the new table for lookups) before we can start
+ * releasing the old bucket nodes. Otherwise their lookup will
+ * return a logically removed node as insert position.
+ */
+ ht->flavor->update_synchronize_rcu();
+ if (free_by_rcu_order)
+ cds_lfht_free_bucket_table(ht, free_by_rcu_order);
+
+ /*
+ * Set "removed" flag in bucket nodes about to be removed.
+ * Unlink all now-logically-removed bucket node pointers.
+ * Concurrent add/remove operation are helping us doing
+ * the gc.
+ */
+ remove_table(ht, i, len);
+
+ free_by_rcu_order = i;
+
+ dbg_printf("fini new size: %lu\n", 1UL << i);
+ if (CMM_LOAD_SHARED(ht->in_progress_destroy))
+ break;
+ }
+
+ if (free_by_rcu_order) {
+ ht->flavor->update_synchronize_rcu();
+ cds_lfht_free_bucket_table(ht, free_by_rcu_order);
+ }
+}
+
+static
+void cds_lfht_create_bucket(struct cds_lfht *ht, unsigned long size)
+{
+ struct cds_lfht_node *prev, *node;
+ unsigned long order, len, i;
+
+ cds_lfht_alloc_bucket_table(ht, 0);
+
+ dbg_printf("create bucket: order 0 index 0 hash 0\n");
+ node = bucket_at(ht, 0);
+ node->next = flag_bucket(get_end());
+ node->reverse_hash = 0;
+
+ for (order = 1; order < cds_lfht_get_count_order_ulong(size) + 1; order++) {
+ len = 1UL << (order - 1);
+ cds_lfht_alloc_bucket_table(ht, order);
+
+ for (i = 0; i < len; i++) {
+ /*
+ * Now, we are trying to init the node with the
+ * hash=(len+i) (which is also a bucket with the
+ * index=(len+i)) and insert it into the hash table,
+ * so this node has to be inserted after the bucket
+ * with the index=(len+i)&(len-1)=i. And because there
+ * is no other non-bucket node nor bucket node with
+ * larger index/hash inserted, so the bucket node
+ * being inserted should be inserted directly linked
+ * after the bucket node with index=i.
+ */
+ prev = bucket_at(ht, i);
+ node = bucket_at(ht, len + i);
+
+ dbg_printf("create bucket: order %lu index %lu hash %lu\n",
+ order, len + i, len + i);
+ node->reverse_hash = bit_reverse_ulong(len + i);
+
+ /* insert after prev */
+ assert(is_bucket(prev->next));
+ node->next = prev->next;
+ prev->next = flag_bucket(node);
+ }
+ }
+}
+
+struct cds_lfht *_cds_lfht_new(unsigned long init_size,
+ unsigned long min_nr_alloc_buckets,
+ unsigned long max_nr_buckets,
+ int flags,
+ const struct cds_lfht_mm_type *mm,
+ const struct rcu_flavor_struct *flavor,
+ pthread_attr_t *attr)
+{
+ struct cds_lfht *ht;
+ unsigned long order;
+
+ /* min_nr_alloc_buckets must be power of two */
+ if (!min_nr_alloc_buckets || (min_nr_alloc_buckets & (min_nr_alloc_buckets - 1)))
+ return NULL;
+
+ /* init_size must be power of two */
+ if (!init_size || (init_size & (init_size - 1)))
+ return NULL;
+
+ /*
+ * Memory management plugin default.
+ */
+ if (!mm) {
+ if (CAA_BITS_PER_LONG > 32
+ && max_nr_buckets
+ && max_nr_buckets <= (1ULL << 32)) {
+ /*
+ * For 64-bit architectures, with max number of
+ * buckets small enough not to use the entire
+ * 64-bit memory mapping space (and allowing a
+ * fair number of hash table instances), use the
+ * mmap allocator, which is faster than the
+ * order allocator.
+ */
+ mm = &cds_lfht_mm_mmap;
+ } else {
+ /*
+ * The fallback is to use the order allocator.
+ */
+ mm = &cds_lfht_mm_order;
+ }
+ }
+
+ /* max_nr_buckets == 0 for order based mm means infinite */
+ if (mm == &cds_lfht_mm_order && !max_nr_buckets)
+ max_nr_buckets = 1UL << (MAX_TABLE_ORDER - 1);
+
+ /* max_nr_buckets must be power of two */
+ if (!max_nr_buckets || (max_nr_buckets & (max_nr_buckets - 1)))
+ return NULL;
+
+ min_nr_alloc_buckets = max(min_nr_alloc_buckets, MIN_TABLE_SIZE);
+ init_size = max(init_size, MIN_TABLE_SIZE);
+ max_nr_buckets = max(max_nr_buckets, min_nr_alloc_buckets);
+ init_size = min(init_size, max_nr_buckets);
+
+ ht = mm->alloc_cds_lfht(min_nr_alloc_buckets, max_nr_buckets);
+ assert(ht);
+ assert(ht->mm == mm);
+ assert(ht->bucket_at == mm->bucket_at);
+
+ ht->flags = flags;
+ ht->flavor = flavor;
+ ht->resize_attr = attr;
+ alloc_split_items_count(ht);