+/*
+ * fls: returns the position of the most significant bit.
+ * Returns 0 if no bit is set, else returns the position of the most
+ * significant bit (from 1 to 32 on 32-bit, from 1 to 64 on 64-bit).
+ */
+#if defined(__i386) || defined(__x86_64)
+static inline
+unsigned int fls_u32(uint32_t x)
+{
+ int r;
+
+ asm("bsrl %1,%0\n\t"
+ "jnz 1f\n\t"
+ "movl $-1,%0\n\t"
+ "1:\n\t"
+ : "=r" (r) : "rm" (x));
+ return r + 1;
+}
+#define HAS_FLS_U32
+#endif
+
+#if defined(__x86_64)
+static inline
+unsigned int fls_u64(uint64_t x)
+{
+ long r;
+
+ asm("bsrq %1,%0\n\t"
+ "jnz 1f\n\t"
+ "movq $-1,%0\n\t"
+ "1:\n\t"
+ : "=r" (r) : "rm" (x));
+ return r + 1;
+}
+#define HAS_FLS_U64
+#endif
+
+#ifndef HAS_FLS_U64
+static __attribute__((unused))
+unsigned int fls_u64(uint64_t x)
+{
+ unsigned int r = 64;
+
+ if (!x)
+ return 0;
+
+ if (!(x & 0xFFFFFFFF00000000ULL)) {
+ x <<= 32;
+ r -= 32;
+ }
+ if (!(x & 0xFFFF000000000000ULL)) {
+ x <<= 16;
+ r -= 16;
+ }
+ if (!(x & 0xFF00000000000000ULL)) {
+ x <<= 8;
+ r -= 8;
+ }
+ if (!(x & 0xF000000000000000ULL)) {
+ x <<= 4;
+ r -= 4;
+ }
+ if (!(x & 0xC000000000000000ULL)) {
+ x <<= 2;
+ r -= 2;
+ }
+ if (!(x & 0x8000000000000000ULL)) {
+ x <<= 1;
+ r -= 1;
+ }
+ return r;
+}
+#endif
+
+#ifndef HAS_FLS_U32
+static __attribute__((unused))
+unsigned int fls_u32(uint32_t x)
+{
+ unsigned int r = 32;
+
+ if (!x)
+ return 0;
+ if (!(x & 0xFFFF0000U)) {
+ x <<= 16;
+ r -= 16;
+ }
+ if (!(x & 0xFF000000U)) {
+ x <<= 8;
+ r -= 8;
+ }
+ if (!(x & 0xF0000000U)) {
+ x <<= 4;
+ r -= 4;
+ }
+ if (!(x & 0xC0000000U)) {
+ x <<= 2;
+ r -= 2;
+ }
+ if (!(x & 0x80000000U)) {
+ x <<= 1;
+ r -= 1;
+ }
+ return r;
+}
+#endif
+
+unsigned int fls_ulong(unsigned long x)
+{
+#if (CAA_BITS_PER_lONG == 32)
+ return fls_u32(x);
+#else
+ return fls_u64(x);
+#endif
+}
+
+int get_count_order_u32(uint32_t x)
+{
+ int order;
+
+ order = fls_u32(x) - 1;
+ if (x & (x - 1))
+ order++;
+ return order;
+}
+
+int get_count_order_ulong(unsigned long x)
+{
+ int order;
+
+ order = fls_ulong(x) - 1;
+ if (x & (x - 1))
+ order++;
+ return order;
+}
+
+static
+void cds_lfht_resize_lazy(struct cds_lfht *ht, struct rcu_table *t, int growth);
+
+/*
+ * If the sched_getcpu() and sysconf(_SC_NPROCESSORS_CONF) calls are
+ * available, then we support hash table item accounting.
+ * In the unfortunate event the number of CPUs reported would be
+ * inaccurate, we use modulo arithmetic on the number of CPUs we got.
+ */
+#if defined(HAVE_SCHED_GETCPU) && defined(HAVE_SYSCONF)
+
+static
+void cds_lfht_resize_lazy_count(struct cds_lfht *ht, struct rcu_table *t,
+ unsigned long count);
+
+static long nr_cpus_mask = -1;
+
+static
+struct ht_items_count *alloc_per_cpu_items_count(void)
+{
+ struct ht_items_count *count;
+
+ switch (nr_cpus_mask) {
+ case -2:
+ return NULL;
+ case -1:
+ {
+ long maxcpus;
+
+ maxcpus = sysconf(_SC_NPROCESSORS_CONF);
+ if (maxcpus <= 0) {
+ nr_cpus_mask = -2;
+ return NULL;
+ }
+ /*
+ * round up number of CPUs to next power of two, so we
+ * can use & for modulo.
+ */
+ maxcpus = 1UL << get_count_order_ulong(maxcpus);
+ nr_cpus_mask = maxcpus - 1;
+ }
+ /* Fall-through */
+ default:
+ return calloc(nr_cpus_mask + 1, sizeof(*count));
+ }
+}
+
+static
+void free_per_cpu_items_count(struct ht_items_count *count)
+{
+ free(count);
+}
+
+static
+int ht_get_cpu(void)
+{
+ int cpu;
+
+ assert(nr_cpus_mask >= 0);
+ cpu = sched_getcpu();
+ if (unlikely(cpu < 0))
+ return cpu;
+ else
+ return cpu & nr_cpus_mask;
+}
+
+static
+void ht_count_add(struct cds_lfht *ht, struct rcu_table *t)
+{
+ unsigned long percpu_count;
+ int cpu;
+
+ if (unlikely(!ht->percpu_count))
+ return;
+ cpu = ht_get_cpu();
+ if (unlikely(cpu < 0))
+ return;
+ percpu_count = uatomic_add_return(&ht->percpu_count[cpu].add, 1);
+ if (unlikely(!(percpu_count & ((1UL << COUNT_COMMIT_ORDER) - 1)))) {
+ unsigned long count;
+
+ dbg_printf("add percpu %lu\n", percpu_count);
+ count = uatomic_add_return(&ht->count,
+ 1UL << COUNT_COMMIT_ORDER);
+ /* If power of 2 */
+ if (!(count & (count - 1))) {
+ if ((count >> CHAIN_LEN_RESIZE_THRESHOLD)
+ < t->size)
+ return;
+ dbg_printf("add set global %lu\n", count);
+ cds_lfht_resize_lazy_count(ht, t,
+ count >> (CHAIN_LEN_TARGET - 1));
+ }
+ }
+}
+
+static
+void ht_count_remove(struct cds_lfht *ht, struct rcu_table *t)
+{
+ unsigned long percpu_count;
+ int cpu;
+
+ if (unlikely(!ht->percpu_count))
+ return;
+ cpu = ht_get_cpu();
+ if (unlikely(cpu < 0))
+ return;
+ percpu_count = uatomic_add_return(&ht->percpu_count[cpu].remove, -1);
+ if (unlikely(!(percpu_count & ((1UL << COUNT_COMMIT_ORDER) - 1)))) {
+ unsigned long count;
+
+ dbg_printf("remove percpu %lu\n", percpu_count);
+ count = uatomic_add_return(&ht->count,
+ -(1UL << COUNT_COMMIT_ORDER));
+ /* If power of 2 */
+ if (!(count & (count - 1))) {
+ if ((count >> CHAIN_LEN_RESIZE_THRESHOLD)
+ >= t->size)
+ return;
+ dbg_printf("remove set global %lu\n", count);
+ cds_lfht_resize_lazy_count(ht, t,
+ count >> (CHAIN_LEN_TARGET - 1));
+ }
+ }
+}
+
+#else /* #if defined(HAVE_SCHED_GETCPU) && defined(HAVE_SYSCONF) */
+
+static const long nr_cpus_mask = -1;
+
+static
+struct ht_items_count *alloc_per_cpu_items_count(void)
+{
+ return NULL;
+}
+
+static
+void free_per_cpu_items_count(struct ht_items_count *count)
+{
+}
+
+static
+void ht_count_add(struct cds_lfht *ht, struct rcu_table *t)
+{
+}
+
+static
+void ht_count_remove(struct cds_lfht *ht, struct rcu_table *t)
+{
+}
+
+#endif /* #else #if defined(HAVE_SCHED_GETCPU) && defined(HAVE_SYSCONF) */
+
+
+static
+void check_resize(struct cds_lfht *ht, struct rcu_table *t,
+ uint32_t chain_len)
+{
+ unsigned long count;
+
+ if (!(ht->flags & CDS_LFHT_AUTO_RESIZE))
+ return;
+ count = uatomic_read(&ht->count);
+ /*
+ * Use bucket-local length for small table expand and for
+ * environments lacking per-cpu data support.
+ */
+ if (count >= (1UL << COUNT_COMMIT_ORDER))
+ return;
+ if (chain_len > 100)
+ dbg_printf("WARNING: large chain length: %u.\n",
+ chain_len);
+ if (chain_len >= CHAIN_LEN_RESIZE_THRESHOLD)
+ cds_lfht_resize_lazy(ht, t,
+ get_count_order_u32(chain_len - (CHAIN_LEN_TARGET - 1)));
+}
+