Update formal model from local copy

[urcu.git] / formal-model / urcu-controldataflow / urcu.spin
diff --git a/formal-model/urcu-controldataflow/urcu.spin b/formal-model/urcu-controldataflow/urcu.spin

index 12f841ce46ed7225be2c8fe83b63d938483a00e6..8075506ec66c4cb21dca6dbb9efa8a5e8a8c9f9e 100644 (file)
--- a/formal-model/urcu-controldataflow/urcu.spin
+++ b/formal-model/urcu-controldataflow/urcu.spin
@@ -98,6 +98,15 @@
   * output exchanged. Therefore, i post-dominating j ensures that every path
   * passing by j will pass by i before reaching the output.
   *
+ * Prefetch and speculative execution
+ *
+ * If an instruction depends on the result of a previous branch, but it does not
+ * have side-effects, it can be executed before the branch result is known.
+ * however, it must be restarted if a core-synchronizing instruction is issued.
+ * Note that instructions which depend on the speculative instruction result
+ * but that have side-effects must depend on the branch completion in addition
+ * to the speculatively executed instruction.
+ *
   * Other considerations
   *
   * Note about "volatile" keyword dependency : The compiler will order volatile
@@ -126,6 +135,15 @@
   * Nested calls are not supported.
   */
  
+/*
+ * Only Alpha has out-of-order cache bank loads. Other architectures (intel,
+ * powerpc, arm) ensure that dependent reads won't be reordered. c.f.
+ * http://www.linuxjournal.com/article/8212)
+ */
+#ifdef ARCH_ALPHA
+#define HAVE_OOO_CACHE_READ
+#endif
+
  /*
   * Each process have its own data in cache. Caches are randomly updated.
   * smp_wmb and smp_rmb forces cache updates (write and read), smp_mb forces
@@ -146,38 +164,34 @@ typedef per_proc_bitfield {
  };
  
  #define DECLARE_CACHED_VAR(type, x)    \
-       type mem_##x;                   \
-       per_proc_##type cached_##x;     \
-       per_proc_bitfield cache_dirty_##x;
-
-#define INIT_CACHED_VAR(x, v, j)       \
-       mem_##x = v;                    \
-       cache_dirty_##x.bitfield = 0;   \
-       j = 0;                          \
-       do                              \
-       :: j < NR_PROCS ->              \
-               cached_##x.val[j] = v;  \
-               j++                     \
-       :: j >= NR_PROCS -> break       \
-       od;
+       type mem_##x;
+
+#define DECLARE_PROC_CACHED_VAR(type, x)\
+       type cached_##x;                \
+       bit cache_dirty_##x;
+
+#define INIT_CACHED_VAR(x, v)          \
+       mem_##x = v;
+
+#define INIT_PROC_CACHED_VAR(x, v)     \
+       cache_dirty_##x = 0;            \
+       cached_##x = v;
  
-#define IS_CACHE_DIRTY(x, id)  (cache_dirty_##x.bitfield & (1 << id))
+#define IS_CACHE_DIRTY(x, id)  (cache_dirty_##x)
  
-#define READ_CACHED_VAR(x)     (cached_##x.val[get_pid()])
+#define READ_CACHED_VAR(x)     (cached_##x)
  
  #define WRITE_CACHED_VAR(x, v)                         \
         atomic {                                        \
-               cached_##x.val[get_pid()] = v;          \
-               cache_dirty_##x.bitfield =              \
-                       cache_dirty_##x.bitfield | (1 << get_pid());    \
+               cached_##x = v;                         \
+               cache_dirty_##x = 1;                    \
         }
  
  #define CACHE_WRITE_TO_MEM(x, id)                      \
         if                                              \
         :: IS_CACHE_DIRTY(x, id) ->                     \
-               mem_##x = cached_##x.val[id];           \
-               cache_dirty_##x.bitfield =              \
-                       cache_dirty_##x.bitfield & (~(1 << id));        \
+               mem_##x = cached_##x;                   \
+               cache_dirty_##x = 0;                    \
         :: else ->                                      \
                 skip                                    \
         fi;
@@ -185,7 +199,7 @@ typedef per_proc_bitfield {
  #define CACHE_READ_FROM_MEM(x, id)     \
         if                              \
         :: !IS_CACHE_DIRTY(x, id) ->    \
-               cached_##x.val[id] = mem_##x;\
+               cached_##x = mem_##x;   \
         :: else ->                      \
                 skip                    \
         fi;
@@ -206,7 +220,7 @@ typedef per_proc_bitfield {
         fi;
  
  /* Must consume all prior read tokens. All subsequent reads depend on it. */
-inline smp_rmb(i, j)
+inline smp_rmb(i)
  {
         atomic {
                 CACHE_READ_FROM_MEM(urcu_gp_ctr, get_pid());
@@ -229,7 +243,7 @@ inline smp_rmb(i, j)
  }
  
  /* Must consume all prior write tokens. All subsequent writes depend on it. */
-inline smp_wmb(i, j)
+inline smp_wmb(i)
  {
         atomic {
                 CACHE_WRITE_TO_MEM(urcu_gp_ctr, get_pid());
@@ -253,11 +267,11 @@ inline smp_wmb(i, j)
  
  /* Synchronization point. Must consume all prior read and write tokens. All
   * subsequent reads and writes depend on it. */
-inline smp_mb(i, j)
+inline smp_mb(i)
  {
         atomic {
-               smp_wmb(i, j);
-               smp_rmb(i, j);
+               smp_wmb(i);
+               smp_rmb(i);
         }
  }
  
@@ -284,28 +298,29 @@ inline smp_mb_recv(i, j)
  {
         do
         :: (reader_barrier[get_readerid()] == 1) ->
-               smp_mb(i, j);
+               /*
+                * We choose to ignore cycles caused by writer busy-looping,
+                * waiting for the reader, sending barrier requests, and the
+                * reader always services them without continuing execution.
+                */
+progress_ignoring_mb1:
+               smp_mb(i);
                 reader_barrier[get_readerid()] = 0;
         :: 1 ->
-               /* We choose to ignore writer's non-progress caused from the
-                * reader ignoring the writer's mb() requests */
-#ifdef WRITER_PROGRESS
-progress_writer_from_reader:
-#endif
+               /*
+                * We choose to ignore writer's non-progress caused by the
+                * reader ignoring the writer's mb() requests.
+                */
+progress_ignoring_mb2:
                 break;
         od;
  }
  
-#ifdef WRITER_PROGRESS
-//#define PROGRESS_LABEL(progressid)   progress_writer_progid_##progressid:
-#define PROGRESS_LABEL(progressid)
-#else
-#define PROGRESS_LABEL(progressid)
-#endif
+#define PROGRESS_LABEL(progressid)     progress_writer_progid_##progressid:
  
  #define smp_mb_send(i, j, progressid)                                          \
  {                                                                              \
-       smp_mb(i, j);                                                           \
+       smp_mb(i);                                                              \
         i = 0;                                                                  \
         do                                                                      \
         :: i < NR_READERS ->                                                    \
@@ -315,22 +330,23 @@ progress_writer_from_reader:
                  * interest, given the reader has the ability to totally ignore \
                  * barrier requests.                                            \
                  */                                                             \
-PROGRESS_LABEL(progressid)                                                     \
                 do                                                              \
-               :: (reader_barrier[i] == 1) -> skip;                            \
+               :: (reader_barrier[i] == 1) ->                                  \
+PROGRESS_LABEL(progressid)                                                     \
+                       skip;                                                   \
                 :: (reader_barrier[i] == 0) -> break;                           \
                 od;                                                             \
                 i++;                                                            \
         :: i >= NR_READERS ->                                                   \
                 break                                                           \
         od;                                                                     \
-       smp_mb(i, j);                                                           \
+       smp_mb(i);                                                              \
  }
  
  #else
  
-#define smp_mb_send(i, j, progressid)  smp_mb(i, j)
-#define smp_mb_reader  smp_mb
+#define smp_mb_send(i, j, progressid)  smp_mb(i)
+#define smp_mb_reader(i, j)            smp_mb(i)
  #define smp_mb_recv(i, j)
  
  #endif
@@ -386,6 +402,7 @@ inline ooo_mem(i)
                         i++
                 :: i >= SLAB_SIZE -> break
                 od;
+#ifdef HAVE_OOO_CACHE_READ
                 RANDOM_CACHE_READ_FROM_MEM(urcu_gp_ctr, get_pid());
                 i = 0;
                 do
@@ -403,6 +420,9 @@ inline ooo_mem(i)
                         i++
                 :: i >= SLAB_SIZE -> break
                 od;
+#else
+               smp_rmb(i);
+#endif /* HAVE_OOO_CACHE_READ */
         }
  }
  
@@ -419,8 +439,8 @@ int _proc_urcu_reader;
  #define READ_PROD_B_IF_FALSE           (1 << 2)
  #define READ_PROD_C_IF_TRUE_READ       (1 << 3)
  
-#define PROCEDURE_READ_LOCK(base, consumetoken, producetoken)                          \
-       :: CONSUME_TOKENS(proc_urcu_reader, consumetoken, READ_PROD_A_READ << base) ->  \
+#define PROCEDURE_READ_LOCK(base, consumetoken, consumetoken2, producetoken)           \
+       :: CONSUME_TOKENS(proc_urcu_reader, (consumetoken | consumetoken2), READ_PROD_A_READ << base) ->        \
                 ooo_mem(i);                                                             \
                 tmp = READ_CACHED_VAR(urcu_active_readers[get_readerid()]);             \
                 PRODUCE_TOKENS(proc_urcu_reader, READ_PROD_A_READ << base);             \
@@ -434,13 +454,14 @@ int _proc_urcu_reader;
                         PRODUCE_TOKENS(proc_urcu_reader, READ_PROD_B_IF_FALSE << base); \
                 fi;                                                                     \
         /* IF TRUE */                                                                   \
-       :: CONSUME_TOKENS(proc_urcu_reader, READ_PROD_B_IF_TRUE << base,                \
+       :: CONSUME_TOKENS(proc_urcu_reader, consumetoken, /* prefetch */                \
                           READ_PROD_C_IF_TRUE_READ << base) ->                          \
                 ooo_mem(i);                                                             \
                 tmp2 = READ_CACHED_VAR(urcu_gp_ctr);                                    \
                 PRODUCE_TOKENS(proc_urcu_reader, READ_PROD_C_IF_TRUE_READ << base);     \
         :: CONSUME_TOKENS(proc_urcu_reader,                                             \
-                         (READ_PROD_C_IF_TRUE_READ     /* pre-dominant */              \
+                         (READ_PROD_B_IF_TRUE                                          \
+                         | READ_PROD_C_IF_TRUE_READ    /* pre-dominant */              \
                           | READ_PROD_A_READ) << base,          /* WAR */               \
                           producetoken) ->                                              \
                 ooo_mem(i);                                                             \
@@ -470,14 +491,14 @@ int _proc_urcu_reader;
                           consumetoken,                                                 \
                           READ_PROC_READ_UNLOCK << base) ->                             \
                 ooo_mem(i);                                                             \
-               tmp2 = READ_CACHED_VAR(urcu_active_readers[get_readerid()]);            \
+               tmp = READ_CACHED_VAR(urcu_active_readers[get_readerid()]);             \
                 PRODUCE_TOKENS(proc_urcu_reader, READ_PROC_READ_UNLOCK << base);        \
         :: CONSUME_TOKENS(proc_urcu_reader,                                             \
                           consumetoken                                                  \
                           | (READ_PROC_READ_UNLOCK << base),    /* WAR */               \
                           producetoken) ->                                              \
                 ooo_mem(i);                                                             \
-               WRITE_CACHED_VAR(urcu_active_readers[get_readerid()], tmp2 - 1);        \
+               WRITE_CACHED_VAR(urcu_active_readers[get_readerid()], tmp - 1);         \
                 PRODUCE_TOKENS(proc_urcu_reader, producetoken);                         \
         skip
  
@@ -641,7 +662,6 @@ non_atomic3_end:
                                         skip;
                                 fi;
                         }
-               :: 1 -> skip;
                 fi;
  
                 goto non_atomic3_skip;
@@ -654,7 +674,7 @@ non_atomic3_skip:
  
                 atomic {
                         if
-                       PROCEDURE_READ_LOCK(READ_LOCK_BASE, READ_PROD_NONE, READ_LOCK_OUT);
+                       PROCEDURE_READ_LOCK(READ_LOCK_BASE, READ_PROD_NONE, 0, READ_LOCK_OUT);
  
                         :: CONSUME_TOKENS(proc_urcu_reader,
                                           READ_LOCK_OUT,                /* post-dominant */
@@ -662,7 +682,7 @@ non_atomic3_skip:
                                 smp_mb_reader(i, j);
                                 PRODUCE_TOKENS(proc_urcu_reader, READ_PROC_FIRST_MB);
  
-                       PROCEDURE_READ_LOCK(READ_LOCK_NESTED_BASE, READ_PROC_FIRST_MB | READ_LOCK_OUT,
+                       PROCEDURE_READ_LOCK(READ_LOCK_NESTED_BASE, READ_PROC_FIRST_MB, READ_LOCK_OUT,
                                             READ_LOCK_NESTED_OUT);
  
                         :: CONSUME_TOKENS(proc_urcu_reader,
@@ -720,12 +740,12 @@ rmb1_end:
                         /* reading urcu_active_readers, which have been written by
                          * READ_UNLOCK_OUT : RAW */
                         PROCEDURE_READ_LOCK(READ_LOCK_UNROLL_BASE,
-                                           READ_UNLOCK_OUT             /* RAW */
-                                           | READ_PROC_SECOND_MB       /* mb() orders reads */
-                                           | READ_PROC_FIRST_MB        /* mb() orders reads */
-                                           | READ_LOCK_NESTED_OUT      /* RAW */
+                                           READ_PROC_SECOND_MB         /* mb() orders reads */
+                                           | READ_PROC_FIRST_MB,       /* mb() orders reads */
+                                           READ_LOCK_NESTED_OUT        /* RAW */
                                             | READ_LOCK_OUT             /* RAW */
-                                           | READ_UNLOCK_NESTED_OUT,   /* RAW */
+                                           | READ_UNLOCK_NESTED_OUT    /* RAW */
+                                           | READ_UNLOCK_OUT,          /* RAW */
                                             READ_LOCK_OUT_UNROLL);
  
  
@@ -810,14 +830,14 @@ rmb2_end:
         goto end;
  rmb1:
  #ifndef NO_RMB
-       smp_rmb(i, j);
+       smp_rmb(i);
  #else
         ooo_mem(i);
  #endif
         goto rmb1_end;
  rmb2:
  #ifndef NO_RMB
-       smp_rmb(i, j);
+       smp_rmb(i);
  #else
         ooo_mem(i);
  #endif
@@ -833,6 +853,41 @@ active proctype urcu_reader()
         byte i, j, nest_i;
         byte tmp, tmp2;
  
+       /* Keep in sync manually with smp_rmb, smp_wmb, ooo_mem and init() */
+       DECLARE_PROC_CACHED_VAR(byte, urcu_gp_ctr);
+       /* Note ! currently only one reader */
+       DECLARE_PROC_CACHED_VAR(byte, urcu_active_readers[NR_READERS]);
+       /* RCU data */
+       DECLARE_PROC_CACHED_VAR(bit, rcu_data[SLAB_SIZE]);
+
+       /* RCU pointer */
+#if (SLAB_SIZE == 2)
+       DECLARE_PROC_CACHED_VAR(bit, rcu_ptr);
+#else
+       DECLARE_PROC_CACHED_VAR(byte, rcu_ptr);
+#endif
+
+       atomic {
+               INIT_PROC_CACHED_VAR(urcu_gp_ctr, 1);
+               INIT_PROC_CACHED_VAR(rcu_ptr, 0);
+
+               i = 0;
+               do
+               :: i < NR_READERS ->
+                       INIT_PROC_CACHED_VAR(urcu_active_readers[i], 0);
+                       i++;
+               :: i >= NR_READERS -> break
+               od;
+               INIT_PROC_CACHED_VAR(rcu_data[0], WINE);
+               i = 1;
+               do
+               :: i < SLAB_SIZE ->
+                       INIT_PROC_CACHED_VAR(rcu_data[i], POISON);
+                       i++
+               :: i >= SLAB_SIZE -> break
+               od;
+       }
+
         wait_init_done();
  
         assert(get_pid() < NR_PROCS);
@@ -922,12 +977,48 @@ active proctype urcu_writer()
                                  * GP update. Needed to test single flip case.
                                  */
  
+       /* Keep in sync manually with smp_rmb, smp_wmb, ooo_mem and init() */
+       DECLARE_PROC_CACHED_VAR(byte, urcu_gp_ctr);
+       /* Note ! currently only one reader */
+       DECLARE_PROC_CACHED_VAR(byte, urcu_active_readers[NR_READERS]);
+       /* RCU data */
+       DECLARE_PROC_CACHED_VAR(bit, rcu_data[SLAB_SIZE]);
+
+       /* RCU pointer */
+#if (SLAB_SIZE == 2)
+       DECLARE_PROC_CACHED_VAR(bit, rcu_ptr);
+#else
+       DECLARE_PROC_CACHED_VAR(byte, rcu_ptr);
+#endif
+
+       atomic {
+               INIT_PROC_CACHED_VAR(urcu_gp_ctr, 1);
+               INIT_PROC_CACHED_VAR(rcu_ptr, 0);
+
+               i = 0;
+               do
+               :: i < NR_READERS ->
+                       INIT_PROC_CACHED_VAR(urcu_active_readers[i], 0);
+                       i++;
+               :: i >= NR_READERS -> break
+               od;
+               INIT_PROC_CACHED_VAR(rcu_data[0], WINE);
+               i = 1;
+               do
+               :: i < SLAB_SIZE ->
+                       INIT_PROC_CACHED_VAR(rcu_data[i], POISON);
+                       i++
+               :: i >= SLAB_SIZE -> break
+               od;
+       }
+
+
         wait_init_done();
  
         assert(get_pid() < NR_PROCS);
  
         do
-       :: (loop_nr < 4) ->
+       :: (loop_nr < 3) ->
  #ifdef WRITER_PROGRESS
  progress_writer1:
  #endif
@@ -968,7 +1059,7 @@ progress_writer1:
                 :: CONSUME_TOKENS(proc_urcu_writer,
                                   WRITE_DATA,
                                   WRITE_PROC_WMB) ->
-                       smp_wmb(i, j);
+                       smp_wmb(i);
                         PRODUCE_TOKENS(proc_urcu_writer, WRITE_PROC_WMB);
  
                 :: CONSUME_TOKENS(proc_urcu_writer,
@@ -1003,10 +1094,11 @@ smp_mb_send1_end:
                         PRODUCE_TOKENS(proc_urcu_writer, WRITE_PROC_FIRST_WRITE_GP);
  
                 :: CONSUME_TOKENS(proc_urcu_writer,
-                                 //WRITE_PROC_FIRST_WRITE_GP   /* TEST ADDING SYNC CORE */
+                                 //WRITE_PROC_FIRST_WRITE_GP | /* TEST ADDING SYNC CORE */
                                   WRITE_PROC_FIRST_MB,  /* can be reordered before/after flips */
                                   WRITE_PROC_FIRST_WAIT | WRITE_PROC_FIRST_WAIT_LOOP) ->
                         ooo_mem(i);
+                       //smp_mb(i);    /* TEST */
                         /* ONLY WAITING FOR READER 0 */
                         tmp2 = READ_CACHED_VAR(urcu_active_readers[0]);
  #ifndef SINGLE_FLIP
@@ -1034,6 +1126,12 @@ smp_mb_send1_end:
  #ifndef GEN_ERROR_WRITER_PROGRESS
                         goto smp_mb_send2;
  smp_mb_send2_end:
+                       /* The memory barrier will invalidate the
+                        * second read done as prefetching. Note that all
+                        * instructions with side-effects depending on
+                        * WRITE_PROC_SECOND_READ_GP should also depend on
+                        * completion of this busy-waiting loop. */
+                       CLEAR_TOKENS(proc_urcu_writer, WRITE_PROC_SECOND_READ_GP);
  #else
                         ooo_mem(i);
  #endif
@@ -1042,17 +1140,21 @@ smp_mb_send2_end:
  
                 /* second flip */
                 :: CONSUME_TOKENS(proc_urcu_writer,
-                                 WRITE_PROC_FIRST_WAIT         /* Control dependency : need to branch out of
-                                                                * the loop to execute the next flip (CHECK) */
-                                 | WRITE_PROC_FIRST_WRITE_GP
+                                 //WRITE_PROC_FIRST_WAIT |     //test  /* no dependency. Could pre-fetch, no side-effect. */
+                                 WRITE_PROC_FIRST_WRITE_GP
                                   | WRITE_PROC_FIRST_READ_GP
                                   | WRITE_PROC_FIRST_MB,
                                   WRITE_PROC_SECOND_READ_GP) ->
                         ooo_mem(i);
+                       //smp_mb(i);    /* TEST */
                         tmpa = READ_CACHED_VAR(urcu_gp_ctr);
                         PRODUCE_TOKENS(proc_urcu_writer, WRITE_PROC_SECOND_READ_GP);
                 :: CONSUME_TOKENS(proc_urcu_writer,
-                                 WRITE_PROC_FIRST_MB
+                                 WRITE_PROC_FIRST_WAIT                 /* dependency on first wait, because this
+                                                                        * instruction has globally observable
+                                                                        * side-effects.
+                                                                        */
+                                 | WRITE_PROC_FIRST_MB
                                   | WRITE_PROC_WMB
                                   | WRITE_PROC_FIRST_READ_GP
                                   | WRITE_PROC_FIRST_WRITE_GP
@@ -1063,11 +1165,12 @@ smp_mb_send2_end:
                         PRODUCE_TOKENS(proc_urcu_writer, WRITE_PROC_SECOND_WRITE_GP);
  
                 :: CONSUME_TOKENS(proc_urcu_writer,
-                                 //WRITE_PROC_FIRST_WRITE_GP   /* TEST ADDING SYNC CORE */
+                                 //WRITE_PROC_FIRST_WRITE_GP | /* TEST ADDING SYNC CORE */
                                   WRITE_PROC_FIRST_WAIT
                                   | WRITE_PROC_FIRST_MB,        /* can be reordered before/after flips */
                                   WRITE_PROC_SECOND_WAIT | WRITE_PROC_SECOND_WAIT_LOOP) ->
                         ooo_mem(i);
+                       //smp_mb(i);    /* TEST */
                         /* ONLY WAITING FOR READER 0 */
                         tmp2 = READ_CACHED_VAR(urcu_active_readers[0]);
                         if
@@ -1079,7 +1182,7 @@ smp_mb_send2_end:
                         fi;
  
                 :: CONSUME_TOKENS(proc_urcu_writer,
-                                 //WRITE_PROC_FIRST_WRITE_GP   /* TEST ADDING SYNC CORE */
+                                 //WRITE_PROC_FIRST_WRITE_GP | /* TEST ADDING SYNC CORE */
                                   WRITE_PROC_SECOND_WRITE_GP
                                   | WRITE_PROC_FIRST_WRITE_GP
                                   | WRITE_PROC_SECOND_READ_GP
@@ -1151,6 +1254,12 @@ end_writer:
         :: 1 ->
  #ifdef WRITER_PROGRESS
  progress_writer2:
+#endif
+#ifdef READER_PROGRESS
+               /*
+                * Make sure we don't block the reader's progress.
+                */
+               smp_mb_send(i, j, 5);
  #endif
                 skip;
         od;
@@ -1184,13 +1293,13 @@ init {
         byte i, j;
  
         atomic {
-               INIT_CACHED_VAR(urcu_gp_ctr, 1, j);
-               INIT_CACHED_VAR(rcu_ptr, 0, j);
+               INIT_CACHED_VAR(urcu_gp_ctr, 1);
+               INIT_CACHED_VAR(rcu_ptr, 0);
  
                 i = 0;
                 do
                 :: i < NR_READERS ->
-                       INIT_CACHED_VAR(urcu_active_readers[i], 0, j);
+                       INIT_CACHED_VAR(urcu_active_readers[i], 0);
                         ptr_read_first[i] = 1;
                         ptr_read_second[i] = 1;
                         data_read_first[i] = WINE;
@@ -1198,11 +1307,11 @@ init {
                         i++;
                 :: i >= NR_READERS -> break
                 od;
-               INIT_CACHED_VAR(rcu_data[0], WINE, j);
+               INIT_CACHED_VAR(rcu_data[0], WINE);
                 i = 1;
                 do
                 :: i < SLAB_SIZE ->
-                       INIT_CACHED_VAR(rcu_data[i], POISON, j);
+                       INIT_CACHED_VAR(rcu_data[i], POISON);
                         i++
                 :: i >= SLAB_SIZE -> break
                 od;