summary |
shortlog |
log |
commit | commitdiff |
tree
raw |
patch |
inline | side by side (from parent 1:
9d33508)
Also makes the read fast-path twice faster :
7 cycles instead of 14 on a 8-cores x86_64.
Mathieu :
I limited the amount of nested readers to 256. Should be enough and lets us use
testb generically.
Changed the 64-bits code to make it the same as 32-bits. I prefer to have the
exact same behavior on both architectures.
Signed-off-by: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
Signed-off-by: Mathieu Desnoyers <mathieu.desnoyers@polymtl.ca>
SRC_DEP=`echo $^ | sed 's/[^ ]*.h//g'`
SRC_DEP=`echo $^ | sed 's/[^ ]*.h//g'`
-all: test_urcu test_urcu_timing test_rwlock_timing test_urcu_yield
+all: test_urcu test_urcu_timing test_rwlock_timing test_urcu_yield urcu-asm.S \
+ urcu-asm.o
-test_urcu: urcu.o test_urcu.c
+test_urcu: urcu.o test_urcu.c urcu.h
$(CC) ${CFLAGS} $(LDFLAGS) -o $@ $(SRC_DEP)
$(CC) ${CFLAGS} $(LDFLAGS) -o $@ $(SRC_DEP)
-test_urcu_yield: urcu-yield.o test_urcu.c
+test_urcu_yield: urcu-yield.o test_urcu.c urcu.h
$(CC) -DDEBUG_YIELD ${CFLAGS} $(LDFLAGS) -o $@ $(SRC_DEP)
$(CC) -DDEBUG_YIELD ${CFLAGS} $(LDFLAGS) -o $@ $(SRC_DEP)
-test_urcu_timing: urcu.o test_urcu_timing.c
+test_urcu_timing: urcu.o test_urcu_timing.c urcu.h
$(CC) ${CFLAGS} $(LDFLAGS) -o $@ $(SRC_DEP)
$(CC) ${CFLAGS} $(LDFLAGS) -o $@ $(SRC_DEP)
-test_rwlock_timing: urcu.o test_rwlock_timing.c
+test_rwlock_timing: urcu.o test_rwlock_timing.c urcu.h
$(CC) ${CFLAGS} $(LDFLAGS) -o $@ $(SRC_DEP)
urcu.o: urcu.c urcu.h
$(CC) ${CFLAGS} $(LDFLAGS) -o $@ $(SRC_DEP)
urcu.o: urcu.c urcu.h
urcu-yield.o: urcu.c urcu.h
$(CC) -DDEBUG_YIELD ${CFLAGS} $(LDFLAGS) -c -o $@ $(SRC_DEP)
urcu-yield.o: urcu.c urcu.h
$(CC) -DDEBUG_YIELD ${CFLAGS} $(LDFLAGS) -c -o $@ $(SRC_DEP)
+urcu-asm.S: urcu-asm.c urcu.h
+ $(CC) ${CFLAGS} -S -o $@ $(SRC_DEP)
+
+urcu-asm.o: urcu-asm.c urcu.h
+ $(CC) ${CFLAGS} -c -o $@ $(SRC_DEP)
+
- rm -f urcu.o test_urcu test_urcu_timing
+ rm -f *.o test_urcu test_urcu_timing test_rwlock_timing urcu-asm.S \
+ test_urcu_yield
void *thr_reader(void *arg)
{
void *thr_reader(void *arg)
{
struct test_array *local_ptr;
printf("thread_begin %s, thread id : %lx, tid %lu\n",
struct test_array *local_ptr;
printf("thread_begin %s, thread id : %lx, tid %lu\n",
urcu_register_thread();
for (;;) {
urcu_register_thread();
for (;;) {
- rcu_read_lock(&qparity);
local_ptr = rcu_dereference(test_rcu_pointer);
if (local_ptr)
assert(local_ptr->a == 8);
local_ptr = rcu_dereference(test_rcu_pointer);
if (local_ptr)
assert(local_ptr->a == 8);
- rcu_read_unlock(&qparity);
if (!test_duration())
break;
}
if (!test_duration())
break;
}
+void show_usage(int argc, char **argv)
+{
+ printf("Usage : %s duration (s)", argv[0]);
+#ifdef DEBUG_YIELD
+ printf(" [-r] [-w] (yield reader and/or writer)");
+#endif
+ printf("\n");
+}
+
int main(int argc, char **argv)
{
int err;
int main(int argc, char **argv)
{
int err;
- printf("Usage : %s duration (s) [-r] [-w] "
- "(yield reader and/or writer)\n", argv[0]);
+ show_usage(argc, argv);
return -1;
}
err = sscanf(argv[1], "%lu", &duration);
if (err != 1) {
return -1;
}
err = sscanf(argv[1], "%lu", &duration);
if (err != 1) {
- printf("Usage : %s duration (s) [-r] [-w] "
- "(yield reader and/or writer)\n", argv[0]);
+ show_usage(argc, argv);
void *thr_reader(void *arg)
{
void *thr_reader(void *arg)
{
struct test_array *local_ptr;
cycles_t time1, time2;
struct test_array *local_ptr;
cycles_t time1, time2;
time1 = get_cycles();
for (i = 0; i < OUTER_READ_LOOP; i++) {
for (j = 0; j < INNER_READ_LOOP; j++) {
time1 = get_cycles();
for (i = 0; i < OUTER_READ_LOOP; i++) {
for (j = 0; j < INNER_READ_LOOP; j++) {
- rcu_read_lock(&qparity);
local_ptr = rcu_dereference(test_rcu_pointer);
if (local_ptr) {
assert(local_ptr->a == 8);
}
local_ptr = rcu_dereference(test_rcu_pointer);
if (local_ptr) {
assert(local_ptr->a == 8);
}
- rcu_read_unlock(&qparity);
}
}
time2 = get_cycles();
}
}
time2 = get_cycles();
pthread_mutex_t urcu_mutex = PTHREAD_MUTEX_INITIALIZER;
pthread_mutex_t urcu_mutex = PTHREAD_MUTEX_INITIALIZER;
-/* Global quiescent period parity */
-int urcu_qparity;
+/* Global grace period counter */
+int urcu_gp_ctr;
-int __thread urcu_active_readers[2];
+int __thread urcu_active_readers;
/* Thread IDs of registered readers */
#define INIT_NUM_THREADS 4
/* Thread IDs of registered readers */
#define INIT_NUM_THREADS 4
/*
* called with urcu_mutex held.
*/
/*
* called with urcu_mutex held.
*/
-static int switch_next_urcu_qparity(void)
+static void switch_next_urcu_qparity(void)
- int old_parity = urcu_qparity;
- urcu_qparity = 1 - old_parity;
- return old_parity;
+ urcu_gp_ctr ^= RCU_GP_CTR_BIT;
}
static void force_mb_all_threads(void)
}
static void force_mb_all_threads(void)
-void wait_for_quiescent_state(int parity)
+void wait_for_quiescent_state(void)
{
struct reader_data *index;
{
struct reader_data *index;
- while (index->urcu_active_readers[parity] != 0)
+ while (rcu_old_gp_ongoing(index->urcu_active_readers))
static void switch_qparity(void)
{
static void switch_qparity(void)
{
/* All threads should read qparity before accessing data structure. */
/* Write ptr before changing the qparity */
force_mb_all_threads();
debug_yield_write();
/* All threads should read qparity before accessing data structure. */
/* Write ptr before changing the qparity */
force_mb_all_threads();
debug_yield_write();
- prev_parity = switch_next_urcu_qparity();
+ switch_next_urcu_qparity();
debug_yield_write();
/*
* Wait for previous parity to be empty of readers.
*/
debug_yield_write();
/*
* Wait for previous parity to be empty of readers.
*/
- wait_for_quiescent_state(prev_parity);
+ wait_for_quiescent_state();
}
void synchronize_rcu(void)
}
void synchronize_rcu(void)
}
reader_data[num_readers].tid = id;
/* reference to the TLS of _this_ reader thread. */
}
reader_data[num_readers].tid = id;
/* reference to the TLS of _this_ reader thread. */
- reader_data[num_readers].urcu_active_readers = urcu_active_readers;
+ reader_data[num_readers].urcu_active_readers = &urcu_active_readers;
* Distributed under GPLv2
*/
* Distributed under GPLv2
*/
/* The "volatile" is due to gcc bugs */
#define barrier() __asm__ __volatile__("": : :"memory")
/* The "volatile" is due to gcc bugs */
#define barrier() __asm__ __volatile__("": : :"memory")
-/* Global quiescent period parity */
-extern int urcu_qparity;
+/*
+ * Limiting the nesting level to 256 to keep instructions small in the read
+ * fast-path.
+ */
+#define RCU_GP_COUNT (1U << 0)
+#define RCU_GP_CTR_BIT (1U << 8)
+#define RCU_GP_CTR_NEST_MASK (RCU_GP_CTR_BIT - 1)
+
+/* Global quiescent period counter with low-order bits unused. */
+extern int urcu_gp_ctr;
-extern int __thread urcu_active_readers[2];
+extern int __thread urcu_active_readers;
-static inline int get_urcu_qparity(void)
+static inline int rcu_old_gp_ongoing(int *value)
+ int v;
+
+ if (value == NULL)
+ return 0;
+ debug_yield_write();
+ v = ACCESS_ONCE(*value);
+ debug_yield_write();
+ return (v & RCU_GP_CTR_NEST_MASK) &&
+ ((v ^ ACCESS_ONCE(urcu_gp_ctr)) & RCU_GP_CTR_BIT);
-/*
- * urcu_parity should be declared on the caller's stack.
- */
-static inline void rcu_read_lock(int *urcu_parity)
+static inline void rcu_read_lock(void)
- *urcu_parity = get_urcu_qparity();
+ tmp = urcu_active_readers;
- urcu_active_readers[*urcu_parity]++;
+ if (!(tmp & RCU_GP_CTR_NEST_MASK))
+ urcu_active_readers = urcu_gp_ctr + RCU_GP_COUNT;
+ else
+ urcu_active_readers = tmp + RCU_GP_COUNT;
debug_yield_read();
/*
* Increment active readers count before accessing the pointer.
debug_yield_read();
/*
* Increment active readers count before accessing the pointer.
-static inline void rcu_read_unlock(int *urcu_parity)
+static inline void rcu_read_unlock(void)
{
debug_yield_read();
barrier();
{
debug_yield_read();
barrier();
* Finish using rcu before decrementing the pointer.
* See force_mb_all_threads().
*/
* Finish using rcu before decrementing the pointer.
* See force_mb_all_threads().
*/
- urcu_active_readers[*urcu_parity]--;
+ urcu_active_readers -= RCU_GP_COUNT;