urcu.h

   1 #ifndef _URCU_H
   2 #define _URCU_H
   3
   4 /*
   5  * urcu.h
   6  *
   7  * Userspace RCU header
   8  *
   9  * Copyright February 2009 - Mathieu Desnoyers <mathieu.desnoyers@polymtl.ca>
  10  *
  11  * Credits for Paul e. McKenney <paulmck@linux.vnet.ibm.com>
  12  * for inspiration coming from the Linux kernel RCU and rcu-preempt.
  13  *
  14  * The barrier, mb, rmb, wmb, atomic_inc, smp_read_barrier_depends, ACCESS_ONCE
  15  * and rcu_dereference primitives come from the Linux kernel.
  16  *
  17  * Distributed under GPLv2
  18  */
  19
  20 #include <stdlib.h>
  21 #include <pthread.h>
  22
  23 /* The "volatile" is due to gcc bugs */
  24 #define barrier() __asm__ __volatile__("": : :"memory")
  25
  26 #define likely(x)       __builtin_expect(!!(x), 1)
  27 #define unlikely(x)     __builtin_expect(!!(x), 0)
  28
  29 /*
  30  * Assume the architecture has coherent caches. Blackfin will want this unset.
  31  */
  32 #define CONFIG_HAVE_MEM_COHERENCY 1
  33
  34 /* Assume P4 or newer */
  35 #define CONFIG_HAVE_FENCE 1
  36
  37 /* Assume SMP machine, given we don't have this information */
  38 #define CONFIG_SMP 1
  39
  40
  41 #ifdef CONFIG_HAVE_MEM_COHERENCY
  42 /*
  43  * Caches are coherent, no need to flush them.
  44  */
  45 #define mc()    barrier()
  46 #define rmc()   barrier()
  47 #define wmc()   barrier()
  48 #else
  49 #error "The architecture must create its own cache flush primitives"
  50 #define mc()    arch_cache_flush()
  51 #define rmc()   arch_cache_flush_read()
  52 #define wmc()   arch_cache_flush_write()
  53 #endif
  54
  55
  56 #ifdef CONFIG_HAVE_MEM_COHERENCY
  57
  58 /* x86 32/64 specific */
  59 #ifdef CONFIG_HAVE_FENCE
  60 #define mb()    asm volatile("mfence":::"memory")
  61 #define rmb()   asm volatile("lfence":::"memory")
  62 #define wmb()   asm volatile("sfence"::: "memory")
  63 #else
  64 /*
  65  * Some non-Intel clones support out of order store. wmb() ceases to be a
  66  * nop for these.
  67  */
  68 #define mb()    asm volatile("lock; addl $0,0(%%esp)":::"memory")
  69 #define rmb()   asm volatile("lock; addl $0,0(%%esp)":::"memory")
  70 #define wmb()   asm volatile("lock; addl $0,0(%%esp)"::: "memory")
  71 #endif
  72
  73 #else /* !CONFIG_HAVE_MEM_COHERENCY */
  74
  75 /*
  76  * Without cache coherency, the memory barriers become cache flushes.
  77  */
  78 #define mb()    mc()
  79 #define rmb()   rmc()
  80 #define wmb()   wmc()
  81
  82 #endif /* !CONFIG_HAVE_MEM_COHERENCY */
  83
  84
  85 #ifdef CONFIG_SMP
  86 #define smp_mb()        mb()
  87 #define smp_rmb()       rmb()
  88 #define smp_wmb()       wmb()
  89 #define smp_mc()        mc()
  90 #define smp_rmc()       rmc()
  91 #define smp_wmc()       wmc()
  92 #else
  93 #define smp_mb()        barrier()
  94 #define smp_rmb()       barrier()
  95 #define smp_wmb()       barrier()
  96 #define smp_mc()        barrier()
  97 #define smp_rmc()       barrier()
  98 #define smp_wmc()       barrier()
  99 #endif
 100
 101 /* REP NOP (PAUSE) is a good thing to insert into busy-wait loops. */
 102 static inline void rep_nop(void)
 103 {
 104         asm volatile("rep; nop" ::: "memory");
 105 }
 106
 107 static inline void cpu_relax(void)
 108 {
 109         rep_nop();
 110 }
 111
 112 static inline void atomic_inc(int *v)
 113 {
 114         asm volatile("lock; incl %0"
 115                      : "+m" (*v));
 116 }
 117
 118 #define xchg(ptr, v)                                                    \
 119         ((__typeof__(*(ptr)))__xchg((unsigned long)(v), (ptr), sizeof(*(ptr))))
 120
 121 struct __xchg_dummy {
 122         unsigned long a[100];
 123 };
 124 #define __xg(x) ((struct __xchg_dummy *)(x))
 125
 126 /*
 127  * Note: no "lock" prefix even on SMP: xchg always implies lock anyway
 128  * Note 2: xchg has side effect, so that attribute volatile is necessary,
 129  *        but generally the primitive is invalid, *ptr is output argument. --ANK
 130  * x is considered local, ptr is considered remote.
 131  */
 132 static inline unsigned long __xchg(unsigned long x, volatile void *ptr,
 133                                    int size)
 134 {
 135         switch (size) {
 136         case 1:
 137                 asm volatile("xchgb %b0,%1"
 138                              : "=q" (x)
 139                              : "m" (*__xg(ptr)), "0" (x)
 140                              : "memory");
 141                 break;
 142         case 2:
 143                 asm volatile("xchgw %w0,%1"
 144                              : "=r" (x)
 145                              : "m" (*__xg(ptr)), "0" (x)
 146                              : "memory");
 147                 break;
 148         case 4:
 149                 asm volatile("xchgl %k0,%1"
 150                              : "=r" (x)
 151                              : "m" (*__xg(ptr)), "0" (x)
 152                              : "memory");
 153                 break;
 154         case 8:
 155                 asm volatile("xchgq %0,%1"
 156                              : "=r" (x)
 157                              : "m" (*__xg(ptr)), "0" (x)
 158                              : "memory");
 159                 break;
 160         }
 161         smp_wmc();
 162         return x;
 163 }
 164
 165 /* Nop everywhere except on alpha. */
 166 #define smp_read_barrier_depends()
 167
 168 /*
 169  * Prevent the compiler from merging or refetching accesses.  The compiler
 170  * is also forbidden from reordering successive instances of ACCESS_ONCE(),
 171  * but only when the compiler is aware of some particular ordering.  One way
 172  * to make the compiler aware of ordering is to put the two invocations of
 173  * ACCESS_ONCE() in different C statements.
 174  *
 175  * This macro does absolutely -nothing- to prevent the CPU from reordering,
 176  * merging, or refetching absolutely anything at any time.  Its main intended
 177  * use is to mediate communication between process-level code and irq/NMI
 178  * handlers, all running on the same CPU.
 179  */
 180 #define ACCESS_ONCE(x) (*(volatile typeof(x) *)&(x))
 181
 182 /*
 183  * Identify a shared load. A smp_rmc() or smp_mc() should come before the load.
 184  */
 185 #define _LOAD_SHARED(p)        ACCESS_ONCE(p)
 186
 187 /*
 188  * Load a data from shared memory, doing a cache flush if required.
 189  */
 190 #define LOAD_SHARED(p) \
 191         ({ \
 192                 smp_rmc(); \
 193                 _LOAD_SHARED(p); \
 194         })
 195
 196
 197 /*
 198  * Identify a shared store. A smp_wmc() or smp_mc() should follow the store.
 199  */
 200 #define _STORE_SHARED(x, v) \
 201         do { \
 202                 (x) = (v); \
 203         } while (0)
 204
 205 /*
 206  * Store v into x, where x is located in shared memory. Performs the required
 207  * cache flush after writing.
 208  */
 209 #define STORE_SHARED(x, v) \
 210         do { \
 211                 _STORE_SHARED(x, v); \
 212                 smp_wmc(); \
 213         } while (0)
 214
 215 /**
 216  * rcu_dereference - fetch an RCU-protected pointer in an
 217  * RCU read-side critical section.  This pointer may later
 218  * be safely dereferenced.
 219  *
 220  * Inserts memory barriers on architectures that require them
 221  * (currently only the Alpha), and, more importantly, documents
 222  * exactly which pointers are protected by RCU.
 223  */
 224
 225 #define rcu_dereference(p)     ({ \
 226                                 typeof(p) _________p1 = LOAD_SHARED(p); \
 227                                 smp_read_barrier_depends(); \
 228                                 (_________p1); \
 229                                 })
 230
 231 #define SIGURCU SIGUSR1
 232
 233 /*
 234  * If a reader is really non-cooperative and refuses to commit its
 235  * urcu_active_readers count to memory (there is no barrier in the reader
 236  * per-se), kick it after a few loops waiting for it.
 237  */
 238 #define KICK_READER_LOOPS 10000
 239
 240 #ifdef DEBUG_YIELD
 241 #include <sched.h>
 242 #include <time.h>
 243 #include <pthread.h>
 244 #include <unistd.h>
 245
 246 #define YIELD_READ      (1 << 0)
 247 #define YIELD_WRITE     (1 << 1)
 248
 249 /* Updates without DEBUG_FULL_MB are much slower. Account this in the delay */
 250 #ifdef DEBUG_FULL_MB
 251 /* maximum sleep delay, in us */
 252 #define MAX_SLEEP 50
 253 #else
 254 #define MAX_SLEEP 30000
 255 #endif
 256
 257 extern unsigned int yield_active;
 258 extern unsigned int __thread rand_yield;
 259
 260 static inline void debug_yield_read(void)
 261 {
 262         if (yield_active & YIELD_READ)
 263                 if (rand_r(&rand_yield) & 0x1)
 264                         usleep(rand_r(&rand_yield) % MAX_SLEEP);
 265 }
 266
 267 static inline void debug_yield_write(void)
 268 {
 269         if (yield_active & YIELD_WRITE)
 270                 if (rand_r(&rand_yield) & 0x1)
 271                         usleep(rand_r(&rand_yield) % MAX_SLEEP);
 272 }
 273
 274 static inline void debug_yield_init(void)
 275 {
 276         rand_yield = time(NULL) ^ pthread_self();
 277 }
 278 #else
 279 static inline void debug_yield_read(void)
 280 {
 281 }
 282
 283 static inline void debug_yield_write(void)
 284 {
 285 }
 286
 287 static inline void debug_yield_init(void)
 288 {
 289
 290 }
 291 #endif
 292
 293 #ifdef DEBUG_FULL_MB
 294 static inline void reader_barrier()
 295 {
 296         smp_mb();
 297 }
 298 #else
 299 static inline void reader_barrier()
 300 {
 301         barrier();
 302 }
 303 #endif
 304
 305 /*
 306  * The trick here is that RCU_GP_CTR_BIT must be a multiple of 8 so we can use a
 307  * full 8-bits, 16-bits or 32-bits bitmask for the lower order bits.
 308  */
 309 #define RCU_GP_COUNT            (1UL << 0)
 310 /* Use the amount of bits equal to half of the architecture long size */
 311 #define RCU_GP_CTR_BIT          (1UL << (sizeof(long) << 2))
 312 #define RCU_GP_CTR_NEST_MASK    (RCU_GP_CTR_BIT - 1)
 313
 314 /*
 315  * Global quiescent period counter with low-order bits unused.
 316  * Using a int rather than a char to eliminate false register dependencies
 317  * causing stalls on some architectures.
 318  */
 319 extern long urcu_gp_ctr;
 320
 321 extern long __thread urcu_active_readers;
 322
 323 static inline int rcu_old_gp_ongoing(long *value)
 324 {
 325         long v;
 326
 327         if (value == NULL)
 328                 return 0;
 329         /*
 330          * Make sure both tests below are done on the same version of *value
 331          * to insure consistency.
 332          */
 333         v = LOAD_SHARED(*value);
 334         return (v & RCU_GP_CTR_NEST_MASK) &&
 335                  ((v ^ urcu_gp_ctr) & RCU_GP_CTR_BIT);
 336 }
 337
 338 static inline void rcu_read_lock(void)
 339 {
 340         long tmp;
 341
 342         tmp = urcu_active_readers;
 343         /* urcu_gp_ctr = RCU_GP_COUNT | (~RCU_GP_CTR_BIT or RCU_GP_CTR_BIT) */
 344         /*
 345          * The data dependency "read urcu_gp_ctr, write urcu_active_readers",
 346          * serializes those two memory operations. The memory barrier in the
 347          * signal handler ensures we receive the proper memory commit barriers
 348          * required by _STORE_SHARED and _LOAD_SHARED whenever communication
 349          * with the writer is needed.
 350          */
 351         if (likely(!(tmp & RCU_GP_CTR_NEST_MASK)))
 352                 _STORE_SHARED(urcu_active_readers, _LOAD_SHARED(urcu_gp_ctr));
 353         else
 354                 _STORE_SHARED(urcu_active_readers, tmp + RCU_GP_COUNT);
 355         /*
 356          * Increment active readers count before accessing the pointer.
 357          * See force_mb_all_threads().
 358          */
 359         reader_barrier();
 360 }
 361
 362 static inline void rcu_read_unlock(void)
 363 {
 364         reader_barrier();
 365         /*
 366          * Finish using rcu before decrementing the pointer.
 367          * See force_mb_all_threads().
 368          */
 369         _STORE_SHARED(urcu_active_readers, urcu_active_readers - RCU_GP_COUNT);
 370 }
 371
 372 /**
 373  * rcu_assign_pointer - assign (publicize) a pointer to a newly
 374  * initialized structure that will be dereferenced by RCU read-side
 375  * critical sections.  Returns the value assigned.
 376  *
 377  * Inserts memory barriers on architectures that require them
 378  * (pretty much all of them other than x86), and also prevents
 379  * the compiler from reordering the code that initializes the
 380  * structure after the pointer assignment.  More importantly, this
 381  * call documents which pointers will be dereferenced by RCU read-side
 382  * code.
 383  */
 384
 385 #define rcu_assign_pointer(p, v) \
 386         ({ \
 387                 if (!__builtin_constant_p(v) || \
 388                     ((v) != NULL)) \
 389                         wmb(); \
 390                 STORE_SHARED(p, v); \
 391         })
 392
 393 #define rcu_xchg_pointer(p, v) \
 394         ({ \
 395                 if (!__builtin_constant_p(v) || \
 396                     ((v) != NULL)) \
 397                         wmb(); \
 398                 xchg(p, v); \
 399         })
 400
 401 extern void synchronize_rcu(void);
 402
 403 /*
 404  * Exchanges the pointer and waits for quiescent state.
 405  * The pointer returned can be freed.
 406  */
 407 #define urcu_publish_content(p, v) \
 408         ({ \
 409                 void *oldptr; \
 410                 oldptr = rcu_xchg_pointer(p, v); \
 411                 synchronize_rcu(); \
 412                 oldptr; \
 413         })
 414
 415 /*
 416  * Reader thread registration.
 417  */
 418 extern void urcu_register_thread(void);
 419 extern void urcu_unregister_thread(void);
 420
 421 #endif /* _URCU_H */