X-Git-Url: https://git.lttng.org/?a=blobdiff_plain;f=formal-model%2Furcu-controldataflow%2Furcu.spin;h=8075506ec66c4cb21dca6dbb9efa8a5e8a8c9f9e;hb=7aafc6ea87a87157d9e110b15da1829e767aab96;hp=dd5d17d32b33cbad437caae1ab2da8c9dffdc842;hpb=f24274b95c646682a448694eb0c759311cb7a5af;p=urcu.git diff --git a/formal-model/urcu-controldataflow/urcu.spin b/formal-model/urcu-controldataflow/urcu.spin index dd5d17d..8075506 100644 --- a/formal-model/urcu-controldataflow/urcu.spin +++ b/formal-model/urcu-controldataflow/urcu.spin @@ -98,6 +98,15 @@ * output exchanged. Therefore, i post-dominating j ensures that every path * passing by j will pass by i before reaching the output. * + * Prefetch and speculative execution + * + * If an instruction depends on the result of a previous branch, but it does not + * have side-effects, it can be executed before the branch result is known. + * however, it must be restarted if a core-synchronizing instruction is issued. + * Note that instructions which depend on the speculative instruction result + * but that have side-effects must depend on the branch completion in addition + * to the speculatively executed instruction. + * * Other considerations * * Note about "volatile" keyword dependency : The compiler will order volatile @@ -130,6 +139,7 @@ * Only Alpha has out-of-order cache bank loads. Other architectures (intel, * powerpc, arm) ensure that dependent reads won't be reordered. c.f. * http://www.linuxjournal.com/article/8212) + */ #ifdef ARCH_ALPHA #define HAVE_OOO_CACHE_READ #endif @@ -154,38 +164,34 @@ typedef per_proc_bitfield { }; #define DECLARE_CACHED_VAR(type, x) \ - type mem_##x; \ - per_proc_##type cached_##x; \ - per_proc_bitfield cache_dirty_##x; - -#define INIT_CACHED_VAR(x, v, j) \ - mem_##x = v; \ - cache_dirty_##x.bitfield = 0; \ - j = 0; \ - do \ - :: j < NR_PROCS -> \ - cached_##x.val[j] = v; \ - j++ \ - :: j >= NR_PROCS -> break \ - od; + type mem_##x; + +#define DECLARE_PROC_CACHED_VAR(type, x)\ + type cached_##x; \ + bit cache_dirty_##x; + +#define INIT_CACHED_VAR(x, v) \ + mem_##x = v; + +#define INIT_PROC_CACHED_VAR(x, v) \ + cache_dirty_##x = 0; \ + cached_##x = v; -#define IS_CACHE_DIRTY(x, id) (cache_dirty_##x.bitfield & (1 << id)) +#define IS_CACHE_DIRTY(x, id) (cache_dirty_##x) -#define READ_CACHED_VAR(x) (cached_##x.val[get_pid()]) +#define READ_CACHED_VAR(x) (cached_##x) #define WRITE_CACHED_VAR(x, v) \ atomic { \ - cached_##x.val[get_pid()] = v; \ - cache_dirty_##x.bitfield = \ - cache_dirty_##x.bitfield | (1 << get_pid()); \ + cached_##x = v; \ + cache_dirty_##x = 1; \ } #define CACHE_WRITE_TO_MEM(x, id) \ if \ :: IS_CACHE_DIRTY(x, id) -> \ - mem_##x = cached_##x.val[id]; \ - cache_dirty_##x.bitfield = \ - cache_dirty_##x.bitfield & (~(1 << id)); \ + mem_##x = cached_##x; \ + cache_dirty_##x = 0; \ :: else -> \ skip \ fi; @@ -193,7 +199,7 @@ typedef per_proc_bitfield { #define CACHE_READ_FROM_MEM(x, id) \ if \ :: !IS_CACHE_DIRTY(x, id) -> \ - cached_##x.val[id] = mem_##x;\ + cached_##x = mem_##x; \ :: else -> \ skip \ fi; @@ -340,7 +346,7 @@ PROGRESS_LABEL(progressid) \ #else #define smp_mb_send(i, j, progressid) smp_mb(i) -#define smp_mb_reader smp_mb(i) +#define smp_mb_reader(i, j) smp_mb(i) #define smp_mb_recv(i, j) #endif @@ -433,8 +439,8 @@ int _proc_urcu_reader; #define READ_PROD_B_IF_FALSE (1 << 2) #define READ_PROD_C_IF_TRUE_READ (1 << 3) -#define PROCEDURE_READ_LOCK(base, consumetoken, producetoken) \ - :: CONSUME_TOKENS(proc_urcu_reader, consumetoken, READ_PROD_A_READ << base) -> \ +#define PROCEDURE_READ_LOCK(base, consumetoken, consumetoken2, producetoken) \ + :: CONSUME_TOKENS(proc_urcu_reader, (consumetoken | consumetoken2), READ_PROD_A_READ << base) -> \ ooo_mem(i); \ tmp = READ_CACHED_VAR(urcu_active_readers[get_readerid()]); \ PRODUCE_TOKENS(proc_urcu_reader, READ_PROD_A_READ << base); \ @@ -448,13 +454,14 @@ int _proc_urcu_reader; PRODUCE_TOKENS(proc_urcu_reader, READ_PROD_B_IF_FALSE << base); \ fi; \ /* IF TRUE */ \ - :: CONSUME_TOKENS(proc_urcu_reader, READ_PROD_B_IF_TRUE << base, \ + :: CONSUME_TOKENS(proc_urcu_reader, consumetoken, /* prefetch */ \ READ_PROD_C_IF_TRUE_READ << base) -> \ ooo_mem(i); \ tmp2 = READ_CACHED_VAR(urcu_gp_ctr); \ PRODUCE_TOKENS(proc_urcu_reader, READ_PROD_C_IF_TRUE_READ << base); \ :: CONSUME_TOKENS(proc_urcu_reader, \ - (READ_PROD_C_IF_TRUE_READ /* pre-dominant */ \ + (READ_PROD_B_IF_TRUE \ + | READ_PROD_C_IF_TRUE_READ /* pre-dominant */ \ | READ_PROD_A_READ) << base, /* WAR */ \ producetoken) -> \ ooo_mem(i); \ @@ -484,14 +491,14 @@ int _proc_urcu_reader; consumetoken, \ READ_PROC_READ_UNLOCK << base) -> \ ooo_mem(i); \ - tmp2 = READ_CACHED_VAR(urcu_active_readers[get_readerid()]); \ + tmp = READ_CACHED_VAR(urcu_active_readers[get_readerid()]); \ PRODUCE_TOKENS(proc_urcu_reader, READ_PROC_READ_UNLOCK << base); \ :: CONSUME_TOKENS(proc_urcu_reader, \ consumetoken \ | (READ_PROC_READ_UNLOCK << base), /* WAR */ \ producetoken) -> \ ooo_mem(i); \ - WRITE_CACHED_VAR(urcu_active_readers[get_readerid()], tmp2 - 1); \ + WRITE_CACHED_VAR(urcu_active_readers[get_readerid()], tmp - 1); \ PRODUCE_TOKENS(proc_urcu_reader, producetoken); \ skip @@ -667,7 +674,7 @@ non_atomic3_skip: atomic { if - PROCEDURE_READ_LOCK(READ_LOCK_BASE, READ_PROD_NONE, READ_LOCK_OUT); + PROCEDURE_READ_LOCK(READ_LOCK_BASE, READ_PROD_NONE, 0, READ_LOCK_OUT); :: CONSUME_TOKENS(proc_urcu_reader, READ_LOCK_OUT, /* post-dominant */ @@ -675,7 +682,7 @@ non_atomic3_skip: smp_mb_reader(i, j); PRODUCE_TOKENS(proc_urcu_reader, READ_PROC_FIRST_MB); - PROCEDURE_READ_LOCK(READ_LOCK_NESTED_BASE, READ_PROC_FIRST_MB | READ_LOCK_OUT, + PROCEDURE_READ_LOCK(READ_LOCK_NESTED_BASE, READ_PROC_FIRST_MB, READ_LOCK_OUT, READ_LOCK_NESTED_OUT); :: CONSUME_TOKENS(proc_urcu_reader, @@ -733,12 +740,12 @@ rmb1_end: /* reading urcu_active_readers, which have been written by * READ_UNLOCK_OUT : RAW */ PROCEDURE_READ_LOCK(READ_LOCK_UNROLL_BASE, - READ_UNLOCK_OUT /* RAW */ - | READ_PROC_SECOND_MB /* mb() orders reads */ - | READ_PROC_FIRST_MB /* mb() orders reads */ - | READ_LOCK_NESTED_OUT /* RAW */ + READ_PROC_SECOND_MB /* mb() orders reads */ + | READ_PROC_FIRST_MB, /* mb() orders reads */ + READ_LOCK_NESTED_OUT /* RAW */ | READ_LOCK_OUT /* RAW */ - | READ_UNLOCK_NESTED_OUT, /* RAW */ + | READ_UNLOCK_NESTED_OUT /* RAW */ + | READ_UNLOCK_OUT, /* RAW */ READ_LOCK_OUT_UNROLL); @@ -846,6 +853,41 @@ active proctype urcu_reader() byte i, j, nest_i; byte tmp, tmp2; + /* Keep in sync manually with smp_rmb, smp_wmb, ooo_mem and init() */ + DECLARE_PROC_CACHED_VAR(byte, urcu_gp_ctr); + /* Note ! currently only one reader */ + DECLARE_PROC_CACHED_VAR(byte, urcu_active_readers[NR_READERS]); + /* RCU data */ + DECLARE_PROC_CACHED_VAR(bit, rcu_data[SLAB_SIZE]); + + /* RCU pointer */ +#if (SLAB_SIZE == 2) + DECLARE_PROC_CACHED_VAR(bit, rcu_ptr); +#else + DECLARE_PROC_CACHED_VAR(byte, rcu_ptr); +#endif + + atomic { + INIT_PROC_CACHED_VAR(urcu_gp_ctr, 1); + INIT_PROC_CACHED_VAR(rcu_ptr, 0); + + i = 0; + do + :: i < NR_READERS -> + INIT_PROC_CACHED_VAR(urcu_active_readers[i], 0); + i++; + :: i >= NR_READERS -> break + od; + INIT_PROC_CACHED_VAR(rcu_data[0], WINE); + i = 1; + do + :: i < SLAB_SIZE -> + INIT_PROC_CACHED_VAR(rcu_data[i], POISON); + i++ + :: i >= SLAB_SIZE -> break + od; + } + wait_init_done(); assert(get_pid() < NR_PROCS); @@ -935,6 +977,42 @@ active proctype urcu_writer() * GP update. Needed to test single flip case. */ + /* Keep in sync manually with smp_rmb, smp_wmb, ooo_mem and init() */ + DECLARE_PROC_CACHED_VAR(byte, urcu_gp_ctr); + /* Note ! currently only one reader */ + DECLARE_PROC_CACHED_VAR(byte, urcu_active_readers[NR_READERS]); + /* RCU data */ + DECLARE_PROC_CACHED_VAR(bit, rcu_data[SLAB_SIZE]); + + /* RCU pointer */ +#if (SLAB_SIZE == 2) + DECLARE_PROC_CACHED_VAR(bit, rcu_ptr); +#else + DECLARE_PROC_CACHED_VAR(byte, rcu_ptr); +#endif + + atomic { + INIT_PROC_CACHED_VAR(urcu_gp_ctr, 1); + INIT_PROC_CACHED_VAR(rcu_ptr, 0); + + i = 0; + do + :: i < NR_READERS -> + INIT_PROC_CACHED_VAR(urcu_active_readers[i], 0); + i++; + :: i >= NR_READERS -> break + od; + INIT_PROC_CACHED_VAR(rcu_data[0], WINE); + i = 1; + do + :: i < SLAB_SIZE -> + INIT_PROC_CACHED_VAR(rcu_data[i], POISON); + i++ + :: i >= SLAB_SIZE -> break + od; + } + + wait_init_done(); assert(get_pid() < NR_PROCS); @@ -1016,10 +1094,11 @@ smp_mb_send1_end: PRODUCE_TOKENS(proc_urcu_writer, WRITE_PROC_FIRST_WRITE_GP); :: CONSUME_TOKENS(proc_urcu_writer, - //WRITE_PROC_FIRST_WRITE_GP /* TEST ADDING SYNC CORE */ + //WRITE_PROC_FIRST_WRITE_GP | /* TEST ADDING SYNC CORE */ WRITE_PROC_FIRST_MB, /* can be reordered before/after flips */ WRITE_PROC_FIRST_WAIT | WRITE_PROC_FIRST_WAIT_LOOP) -> ooo_mem(i); + //smp_mb(i); /* TEST */ /* ONLY WAITING FOR READER 0 */ tmp2 = READ_CACHED_VAR(urcu_active_readers[0]); #ifndef SINGLE_FLIP @@ -1047,6 +1126,12 @@ smp_mb_send1_end: #ifndef GEN_ERROR_WRITER_PROGRESS goto smp_mb_send2; smp_mb_send2_end: + /* The memory barrier will invalidate the + * second read done as prefetching. Note that all + * instructions with side-effects depending on + * WRITE_PROC_SECOND_READ_GP should also depend on + * completion of this busy-waiting loop. */ + CLEAR_TOKENS(proc_urcu_writer, WRITE_PROC_SECOND_READ_GP); #else ooo_mem(i); #endif @@ -1055,17 +1140,21 @@ smp_mb_send2_end: /* second flip */ :: CONSUME_TOKENS(proc_urcu_writer, - WRITE_PROC_FIRST_WAIT /* Control dependency : need to branch out of - * the loop to execute the next flip (CHECK) */ - | WRITE_PROC_FIRST_WRITE_GP + //WRITE_PROC_FIRST_WAIT | //test /* no dependency. Could pre-fetch, no side-effect. */ + WRITE_PROC_FIRST_WRITE_GP | WRITE_PROC_FIRST_READ_GP | WRITE_PROC_FIRST_MB, WRITE_PROC_SECOND_READ_GP) -> ooo_mem(i); + //smp_mb(i); /* TEST */ tmpa = READ_CACHED_VAR(urcu_gp_ctr); PRODUCE_TOKENS(proc_urcu_writer, WRITE_PROC_SECOND_READ_GP); :: CONSUME_TOKENS(proc_urcu_writer, - WRITE_PROC_FIRST_MB + WRITE_PROC_FIRST_WAIT /* dependency on first wait, because this + * instruction has globally observable + * side-effects. + */ + | WRITE_PROC_FIRST_MB | WRITE_PROC_WMB | WRITE_PROC_FIRST_READ_GP | WRITE_PROC_FIRST_WRITE_GP @@ -1076,11 +1165,12 @@ smp_mb_send2_end: PRODUCE_TOKENS(proc_urcu_writer, WRITE_PROC_SECOND_WRITE_GP); :: CONSUME_TOKENS(proc_urcu_writer, - //WRITE_PROC_FIRST_WRITE_GP /* TEST ADDING SYNC CORE */ + //WRITE_PROC_FIRST_WRITE_GP | /* TEST ADDING SYNC CORE */ WRITE_PROC_FIRST_WAIT | WRITE_PROC_FIRST_MB, /* can be reordered before/after flips */ WRITE_PROC_SECOND_WAIT | WRITE_PROC_SECOND_WAIT_LOOP) -> ooo_mem(i); + //smp_mb(i); /* TEST */ /* ONLY WAITING FOR READER 0 */ tmp2 = READ_CACHED_VAR(urcu_active_readers[0]); if @@ -1092,7 +1182,7 @@ smp_mb_send2_end: fi; :: CONSUME_TOKENS(proc_urcu_writer, - //WRITE_PROC_FIRST_WRITE_GP /* TEST ADDING SYNC CORE */ + //WRITE_PROC_FIRST_WRITE_GP | /* TEST ADDING SYNC CORE */ WRITE_PROC_SECOND_WRITE_GP | WRITE_PROC_FIRST_WRITE_GP | WRITE_PROC_SECOND_READ_GP @@ -1203,13 +1293,13 @@ init { byte i, j; atomic { - INIT_CACHED_VAR(urcu_gp_ctr, 1, j); - INIT_CACHED_VAR(rcu_ptr, 0, j); + INIT_CACHED_VAR(urcu_gp_ctr, 1); + INIT_CACHED_VAR(rcu_ptr, 0); i = 0; do :: i < NR_READERS -> - INIT_CACHED_VAR(urcu_active_readers[i], 0, j); + INIT_CACHED_VAR(urcu_active_readers[i], 0); ptr_read_first[i] = 1; ptr_read_second[i] = 1; data_read_first[i] = WINE; @@ -1217,11 +1307,11 @@ init { i++; :: i >= NR_READERS -> break od; - INIT_CACHED_VAR(rcu_data[0], WINE, j); + INIT_CACHED_VAR(rcu_data[0], WINE); i = 1; do :: i < SLAB_SIZE -> - INIT_CACHED_VAR(rcu_data[i], POISON, j); + INIT_CACHED_VAR(rcu_data[i], POISON); i++ :: i >= SLAB_SIZE -> break od;