test_urcu_wfq_dynlink_CFLAGS = -DDYNAMIC_LINK_TEST $(AM_CFLAGS)
test_urcu_wfq_dynlink_LDADD = $(URCU_COMMON_LIB)
-test_urcu_lfs_SOURCES = test_urcu_lfs.c $(URCU_CDS_LIB) $(URCU_DEFER)
-test_urcu_lfs_dynlink_SOURCES = test_urcu_lfs.c $(URCU_DEFER)
+test_urcu_lfs_SOURCES = test_urcu_lfs.c $(URCU) $(URCU_CDS_LIB)
+test_urcu_lfs_dynlink_SOURCES = test_urcu_lfs.c $(URCU)
test_urcu_lfs_dynlink_CFLAGS = -DDYNAMIC_LINK_TEST $(AM_CFLAGS)
test_urcu_lfs_dynlink_LDADD = $(URCU_CDS_LIB)
static unsigned int nr_enqueuers;
static unsigned int nr_dequeuers;
+struct test {
+ struct cds_lfq_node_rcu list;
+ struct rcu_head rcu;
+};
+
static struct cds_lfq_queue_rcu q;
void *thr_enqueuer(void *_count)
cmm_smp_mb();
for (;;) {
- struct cds_lfq_node_rcu *node = malloc(sizeof(*node));
+ struct test *node = malloc(sizeof(*node));
if (!node)
goto fail;
- cds_lfq_node_init_rcu(node);
+ cds_lfq_node_init_rcu(&node->list);
rcu_read_lock();
- cds_lfq_enqueue_rcu(&q, node);
+ cds_lfq_enqueue_rcu(&q, &node->list);
rcu_read_unlock();
nr_successful_enqueues++;
}
+static
+void free_node_cb(struct rcu_head *head)
+{
+ struct test *node =
+ caa_container_of(head, struct test, rcu);
+ free(node);
+}
+
void *thr_dequeuer(void *_count)
{
unsigned long long *count = _count;
cmm_smp_mb();
for (;;) {
- struct cds_lfq_node_rcu *node;
+ struct cds_lfq_node_rcu *qnode;
+ struct test *node;
rcu_read_lock();
- node = cds_lfq_dequeue_rcu(&q);
+ qnode = cds_lfq_dequeue_rcu(&q);
+ node = caa_container_of(qnode, struct test, list);
rcu_read_unlock();
if (node) {
- defer_rcu(free, node);
+ call_rcu(&node->rcu, free_node_cb);
nr_successful_dequeues++;
}
void test_end(struct cds_lfq_queue_rcu *q, unsigned long long *nr_dequeues)
{
- struct cds_lfq_node_rcu *node;
+ struct cds_lfq_node_rcu *snode;
do {
- rcu_read_lock();
- node = cds_lfq_dequeue_rcu(q);
- rcu_read_unlock();
- if (node) {
+ snode = cds_lfq_dequeue_rcu(q);
+ if (snode) {
+ struct test *node;
+
+ node = caa_container_of(snode, struct test, list);
free(node); /* no more concurrent access */
(*nr_dequeues)++;
}
- } while (node);
+ } while (snode);
}
void show_usage(int argc, char **argv)
count_enqueuer = malloc(2 * sizeof(*count_enqueuer) * nr_enqueuers);
count_dequeuer = malloc(2 * sizeof(*count_dequeuer) * nr_dequeuers);
cds_lfq_init_rcu(&q, call_rcu);
+ err = create_all_cpu_call_rcu_data(0);
+ assert(!err);
next_aff = 0;
tot_successful_enqueues,
tot_successful_dequeues + end_dequeues);
+ free_all_cpu_call_rcu_data();
free(count_enqueuer);
free(count_dequeuer);
free(tid_enqueuer);
static unsigned int nr_enqueuers;
static unsigned int nr_dequeuers;
+struct test {
+ struct cds_lfs_node_rcu list;
+ struct rcu_head rcu;
+};
+
static struct cds_lfs_stack_rcu s;
void *thr_enqueuer(void *_count)
cmm_smp_mb();
for (;;) {
- struct cds_lfs_node_rcu *node = malloc(sizeof(*node));
+ struct test *node = malloc(sizeof(*node));
if (!node)
goto fail;
- cds_lfs_node_init_rcu(node);
+ cds_lfs_node_init_rcu(&node->list);
/* No rcu read-side is needed for push */
- cds_lfs_push_rcu(&s, node);
+ cds_lfs_push_rcu(&s, &node->list);
nr_successful_enqueues++;
if (unlikely(wdelay))
}
+static
+void free_node_cb(struct rcu_head *head)
+{
+ struct test *node =
+ caa_container_of(head, struct test, rcu);
+ free(node);
+}
+
void *thr_dequeuer(void *_count)
{
unsigned long long *count = _count;
cmm_smp_mb();
for (;;) {
- struct cds_lfs_node_rcu *node;
+ struct cds_lfs_node_rcu *snode;
+ struct test *node;
rcu_read_lock();
- node = cds_lfs_pop_rcu(&s);
+ snode = cds_lfs_pop_rcu(&s);
+ node = caa_container_of(snode, struct test, list);
rcu_read_unlock();
if (node) {
- defer_rcu(free, node);
+ call_rcu(&node->rcu, free_node_cb);
nr_successful_dequeues++;
}
nr_dequeues++;
void test_end(struct cds_lfs_stack_rcu *s, unsigned long long *nr_dequeues)
{
- struct cds_lfs_node_rcu *node;
+ struct cds_lfs_node_rcu *snode;
do {
- node = cds_lfs_pop_rcu(s);
- if (node) {
+ snode = cds_lfs_pop_rcu(s);
+ if (snode) {
+ struct test *node;
+
+ node = caa_container_of(snode, struct test, list);
free(node);
(*nr_dequeues)++;
}
- } while (node);
+ } while (snode);
}
void show_usage(int argc, char **argv)
count_enqueuer = malloc(2 * sizeof(*count_enqueuer) * nr_enqueuers);
count_dequeuer = malloc(2 * sizeof(*count_dequeuer) * nr_dequeuers);
cds_lfs_init_rcu(&s);
+ err = create_all_cpu_call_rcu_data(0);
+ assert(!err);
next_aff = 0;
tot_successful_enqueues,
tot_successful_dequeues + end_dequeues);
+ free_all_cpu_call_rcu_data();
free(count_enqueuer);
free(count_dequeuer);
free(tid_enqueuer);
/*
* Create a separate call_rcu thread for each CPU. This does not
* replace a pre-existing call_rcu thread -- use the set_cpu_call_rcu_data()
- * function if you want that behavior.
+ * function if you want that behavior. Should be paired with
+ * free_all_cpu_call_rcu_data() to teardown these call_rcu worker
+ * threads.
*/
int create_all_cpu_call_rcu_data(unsigned long flags)
was_online = rcu_reader.ctr;
/* All threads should read qparity before accessing data structure
- * where new ptr points to.
- */
- /* Write new ptr before changing the qparity */
- cmm_smp_mb();
-
- /*
+ * where new ptr points to. In the "then" case, rcu_thread_offline
+ * includes a memory barrier.
+ *
* Mark the writer thread offline to make sure we don't wait for
* our own quiescent state. This allows using synchronize_rcu()
* in threads registered as readers.
*/
- if (was_online) {
- CMM_STORE_SHARED(rcu_reader.ctr, 0);
- cmm_smp_mb(); /* write rcu_reader.ctr before read futex */
- wake_up_gp();
- }
+ if (was_online)
+ rcu_thread_offline();
+ else
+ cmm_smp_mb();
mutex_lock(&rcu_gp_lock);
* freed.
*/
if (was_online)
- _CMM_STORE_SHARED(rcu_reader.ctr,
- CMM_LOAD_SHARED(rcu_gp_ctr));
- cmm_smp_mb();
+ rcu_thread_online();
+ else
+ cmm_smp_mb();
}
#else /* !(CAA_BITS_PER_LONG < 64) */
void synchronize_rcu(void)
* our own quiescent state. This allows using synchronize_rcu()
* in threads registered as readers.
*/
- cmm_smp_mb();
- if (was_online) {
- CMM_STORE_SHARED(rcu_reader.ctr, 0);
- cmm_smp_mb(); /* write rcu_reader.ctr before read futex */
- wake_up_gp();
- }
+ if (was_online)
+ rcu_thread_offline();
+ else
+ cmm_smp_mb();
mutex_lock(&rcu_gp_lock);
if (cds_list_empty(®istry))
mutex_unlock(&rcu_gp_lock);
if (was_online)
- _CMM_STORE_SHARED(rcu_reader.ctr,
- CMM_LOAD_SHARED(rcu_gp_ctr));
- cmm_smp_mb();
+ rcu_thread_online();
+ else
+ cmm_smp_mb();
}
#endif /* !(CAA_BITS_PER_LONG < 64) */
#endif
#ifdef CONFIG_RCU_SMP
+#ifndef cmm_smp_mb
#define cmm_smp_mb() cmm_mb()
+#endif
+#ifndef cmm_smp_rmb
#define cmm_smp_rmb() cmm_rmb()
+#endif
+#ifndef cmm_smp_wmb
#define cmm_smp_wmb() cmm_wmb()
+#endif
+#ifndef cmm_smp_mc
#define cmm_smp_mc() cmm_mc()
+#endif
+#ifndef cmm_smp_rmc
#define cmm_smp_rmc() cmm_rmc()
+#endif
+#ifndef cmm_smp_wmc
#define cmm_smp_wmc() cmm_wmc()
+#endif
+#ifndef cmm_smp_read_barrier_depends
#define cmm_smp_read_barrier_depends() cmm_read_barrier_depends()
+#endif
#else
+#ifndef cmm_smp_mb
#define cmm_smp_mb() cmm_barrier()
+#endif
+#ifndef cmm_smp_rmb
#define cmm_smp_rmb() cmm_barrier()
+#endif
+#ifndef cmm_smp_wmb
#define cmm_smp_wmb() cmm_barrier()
+#endif
+#ifndef cmm_smp_mc
#define cmm_smp_mc() cmm_barrier()
+#endif
+#ifndef cmm_smp_rmc
#define cmm_smp_rmc() cmm_barrier()
+#endif
+#ifndef cmm_smp_wmc
#define cmm_smp_wmc() cmm_barrier()
+#endif
+#ifndef cmm_smp_read_barrier_depends
#define cmm_smp_read_barrier_depends()
#endif
+#endif
#ifndef caa_cpu_relax
#define caa_cpu_relax() cmm_barrier()
rval; \
})
+#define mftb() \
+ ({ \
+ unsigned long long rval; \
+ asm volatile("mftb %0" : "=r" (rval)); \
+ rval; \
+ })
+
typedef unsigned long long cycles_t;
-static inline cycles_t caa_get_cycles (void)
+#ifdef __powerpc64__
+static inline cycles_t caa_get_cycles(void)
{
- long h, l;
+ return (cycles_t) mftb();
+}
+#else
+static inline cycles_t caa_get_cycles(void)
+{
+ unsigned long h, l;
for (;;) {
h = mftbu();
return (((cycles_t) h) << 32) + l;
}
}
+#endif
#ifdef __cplusplus
}
#ifdef CONFIG_RCU_HAVE_FENCE
#define cmm_mb() asm volatile("mfence":::"memory")
-#define cmm_rmb() asm volatile("lfence":::"memory")
-#define cmm_wmb() asm volatile("sfence"::: "memory")
+
+/*
+ * Define cmm_rmb/cmm_wmb to "strict" barriers that may be needed when
+ * using SSE or working with I/O areas. cmm_smp_rmb/cmm_smp_wmb are
+ * only compiler barriers, which is enough for general use.
+ */
+#define cmm_rmb() asm volatile("lfence":::"memory")
+#define cmm_wmb() asm volatile("sfence"::: "memory")
+#define cmm_smp_rmb() cmm_barrier()
+#define cmm_smp_wmb() cmm_barrier()
#else
/*
- * Some non-Intel clones support out of order store. cmm_wmb() ceases to be a
- * nop for these.
+ * We leave smp_rmb/smp_wmb as full barriers for processors that do not have
+ * fence instructions.
+ *
+ * An empty cmm_smp_rmb() may not be enough on old PentiumPro multiprocessor
+ * systems, due to an erratum. The Linux kernel says that "Even distro
+ * kernels should think twice before enabling this", but for now let's
+ * be conservative and leave the full barrier on 32-bit processors. Also,
+ * IDT WinChip supports weak store ordering, and the kernel may enable it
+ * under our feet; cmm_smp_wmb() ceases to be a nop for these processors.
*/
#define cmm_mb() asm volatile("lock; addl $0,0(%%esp)":::"memory")
#define cmm_rmb() asm volatile("lock; addl $0,0(%%esp)":::"memory")
}
}
-
/* Get typed element from list at a given position. */
#define cds_list_entry(ptr, type, member) \
((type *) ((char *) (ptr) - (unsigned long) (&((type *) 0)->member)))
+/* Get first entry from a list. */
+#define cds_list_first_entry(ptr, type, member) \
+ cds_list_entry((ptr)->next, type, member)
+
/* Iterate forward over the elements of the list. */
#define cds_list_for_each(pos, head) \