+ cmm_barrier();
+
+ /*
+ *
+ * Adding a cmm_smp_mb() which is _not_ formally required, but makes the
+ * model easier to understand. It does not have a big performance impact
+ * anyway, given this is the write-side.
+ */
+ cmm_smp_mb();
+
+ /*
+ * Wait for each thread URCU_TLS(rcu_reader).ctr count to become 0.
+ */
+ for (;;) {
+ if (wait_loops < RCU_QS_ACTIVE_ATTEMPTS)
+ wait_loops++;
+ if (wait_loops >= RCU_QS_ACTIVE_ATTEMPTS) {
+ uatomic_dec(&gp_futex);
+ /* Write futex before read reader_gp */
+ smp_mb_master(RCU_MB_GROUP);
+ }
+
+ cds_list_for_each_entry_safe(index, tmp, ®istry, node) {
+ if (!rcu_gp_ongoing(&index->ctr))
+ cds_list_move(&index->node, &qsreaders);
+ }
+
+#ifndef HAS_INCOHERENT_CACHES
+ if (cds_list_empty(®istry)) {
+ if (wait_loops >= RCU_QS_ACTIVE_ATTEMPTS) {
+ /* Read reader_gp before write futex */
+ smp_mb_master(RCU_MB_GROUP);
+ uatomic_set(&gp_futex, 0);
+ }
+ break;
+ } else {
+ /* Temporarily unlock the registry lock. */
+ mutex_unlock(&rcu_registry_lock);
+ if (wait_loops >= RCU_QS_ACTIVE_ATTEMPTS)
+ wait_gp();
+ else
+ caa_cpu_relax();
+ /* Re-lock the registry lock before the next loop. */
+ mutex_lock(&rcu_registry_lock);
+ }
+#else /* #ifndef HAS_INCOHERENT_CACHES */