Fix: uatomic arm32: add missing release barrier before uatomic_xchg
[urcu.git] / urcu / uatomic / x86.h
CommitLineData
ec4e58a3
MD
1#ifndef _URCU_ARCH_UATOMIC_X86_H
2#define _URCU_ARCH_UATOMIC_X86_H
0114ba7f 3
67ecffc0 4/*
0114ba7f
MD
5 * Copyright (c) 1991-1994 by Xerox Corporation. All rights reserved.
6 * Copyright (c) 1996-1999 by Silicon Graphics. All rights reserved.
7 * Copyright (c) 1999-2004 Hewlett-Packard Development Company, L.P.
8 * Copyright (c) 2009 Mathieu Desnoyers
9 *
10 * THIS MATERIAL IS PROVIDED AS IS, WITH ABSOLUTELY NO WARRANTY EXPRESSED
11 * OR IMPLIED. ANY USE IS AT YOUR OWN RISK.
12 *
13 * Permission is hereby granted to use or copy this program
14 * for any purpose, provided the above notices are retained on all copies.
15 * Permission to modify the code and to distribute modified code is granted,
16 * provided the above notices are retained, and a notice that the code was
17 * modified is included with the above copyright notice.
18 *
ec4e58a3 19 * Code inspired from libuatomic_ops-1.2, inherited in part from the
0114ba7f
MD
20 * Boehm-Demers-Weiser conservative garbage collector.
21 */
22
ec4e58a3 23#include <urcu/compiler.h>
bf9de1b7 24#include <urcu/system.h>
0fad128b 25
f469d839
PB
26#define UATOMIC_HAS_ATOMIC_BYTE
27#define UATOMIC_HAS_ATOMIC_SHORT
28
36bc70a8
MD
29#ifdef __cplusplus
30extern "C" {
67ecffc0 31#endif
36bc70a8 32
0114ba7f 33/*
0114ba7f
MD
34 * Derived from AO_compare_and_swap() and AO_test_and_set_full().
35 */
36
ec4e58a3 37struct __uatomic_dummy {
cc1be41b
MD
38 unsigned long v[10];
39};
ec4e58a3 40#define __hp(x) ((struct __uatomic_dummy *)(x))
cc1be41b 41
424d4ed5 42#define _uatomic_set(addr, v) ((void) CMM_STORE_SHARED(*(addr), (v)))
0fad128b 43
cc1be41b
MD
44/* cmpxchg */
45
5dba80f9 46static inline __attribute__((always_inline))
bf9de1b7 47unsigned long __uatomic_cmpxchg(void *addr, unsigned long old,
0fad128b 48 unsigned long _new, int len)
0114ba7f 49{
cc1be41b
MD
50 switch (len) {
51 case 1:
52 {
53 unsigned char result = old;
0fad128b 54
cc1be41b
MD
55 __asm__ __volatile__(
56 "lock; cmpxchgb %2, %1"
57 : "+a"(result), "+m"(*__hp(addr))
58 : "q"((unsigned char)_new)
0114ba7f 59 : "memory");
cc1be41b
MD
60 return result;
61 }
62 case 2:
63 {
64 unsigned short result = old;
0fad128b 65
cc1be41b
MD
66 __asm__ __volatile__(
67 "lock; cmpxchgw %2, %1"
68 : "+a"(result), "+m"(*__hp(addr))
69 : "r"((unsigned short)_new)
70 : "memory");
71 return result;
72 }
73 case 4:
74 {
75 unsigned int result = old;
0fad128b 76
cc1be41b
MD
77 __asm__ __volatile__(
78 "lock; cmpxchgl %2, %1"
79 : "+a"(result), "+m"(*__hp(addr))
80 : "r"((unsigned int)_new)
81 : "memory");
82 return result;
83 }
e040d717 84#if (CAA_BITS_PER_LONG == 64)
cc1be41b
MD
85 case 8:
86 {
6edb297e 87 unsigned long result = old;
0fad128b 88
cc1be41b 89 __asm__ __volatile__(
2c5e5fb3 90 "lock; cmpxchgq %2, %1"
cc1be41b
MD
91 : "+a"(result), "+m"(*__hp(addr))
92 : "r"((unsigned long)_new)
93 : "memory");
94 return result;
95 }
96#endif
97 }
d0bbd9c2
MD
98 /*
99 * generate an illegal instruction. Cannot catch this with
100 * linker tricks when optimizations are disabled.
101 */
cc1be41b
MD
102 __asm__ __volatile__("ud2");
103 return 0;
0114ba7f
MD
104}
105
bf9de1b7 106#define _uatomic_cmpxchg(addr, old, _new) \
e56d99bf
MD
107 ((__typeof__(*(addr))) __uatomic_cmpxchg((addr), \
108 caa_cast_long_keep_sign(old), \
109 caa_cast_long_keep_sign(_new),\
cc1be41b
MD
110 sizeof(*(addr))))
111
112/* xchg */
0114ba7f 113
5dba80f9 114static inline __attribute__((always_inline))
bf9de1b7 115unsigned long __uatomic_exchange(void *addr, unsigned long val, int len)
0114ba7f 116{
cc1be41b
MD
117 /* Note: the "xchg" instruction does not need a "lock" prefix. */
118 switch (len) {
119 case 1:
120 {
121 unsigned char result;
122 __asm__ __volatile__(
123 "xchgb %0, %1"
124 : "=q"(result), "+m"(*__hp(addr))
125 : "0" ((unsigned char)val)
126 : "memory");
127 return result;
128 }
129 case 2:
130 {
131 unsigned short result;
132 __asm__ __volatile__(
133 "xchgw %0, %1"
134 : "=r"(result), "+m"(*__hp(addr))
135 : "0" ((unsigned short)val)
136 : "memory");
137 return result;
138 }
139 case 4:
140 {
141 unsigned int result;
142 __asm__ __volatile__(
143 "xchgl %0, %1"
144 : "=r"(result), "+m"(*__hp(addr))
145 : "0" ((unsigned int)val)
146 : "memory");
147 return result;
148 }
e040d717 149#if (CAA_BITS_PER_LONG == 64)
cc1be41b
MD
150 case 8:
151 {
152 unsigned long result;
153 __asm__ __volatile__(
0114ba7f 154 "xchgq %0, %1"
cc1be41b
MD
155 : "=r"(result), "+m"(*__hp(addr))
156 : "0" ((unsigned long)val)
0114ba7f 157 : "memory");
cc1be41b
MD
158 return result;
159 }
160#endif
161 }
d0bbd9c2
MD
162 /*
163 * generate an illegal instruction. Cannot catch this with
164 * linker tricks when optimizations are disabled.
165 */
cc1be41b
MD
166 __asm__ __volatile__("ud2");
167 return 0;
0114ba7f
MD
168}
169
bf9de1b7 170#define _uatomic_xchg(addr, v) \
e56d99bf
MD
171 ((__typeof__(*(addr))) __uatomic_exchange((addr), \
172 caa_cast_long_keep_sign(v), \
cc1be41b
MD
173 sizeof(*(addr))))
174
8760d94e 175/* uatomic_add_return */
0fad128b
MD
176
177static inline __attribute__((always_inline))
bf9de1b7 178unsigned long __uatomic_add_return(void *addr, unsigned long val,
0fad128b
MD
179 int len)
180{
181 switch (len) {
182 case 1:
183 {
184 unsigned char result = val;
185
186 __asm__ __volatile__(
187 "lock; xaddb %1, %0"
188 : "+m"(*__hp(addr)), "+q" (result)
189 :
190 : "memory");
191 return result + (unsigned char)val;
192 }
193 case 2:
194 {
195 unsigned short result = val;
196
197 __asm__ __volatile__(
198 "lock; xaddw %1, %0"
199 : "+m"(*__hp(addr)), "+r" (result)
200 :
201 : "memory");
202 return result + (unsigned short)val;
203 }
204 case 4:
205 {
206 unsigned int result = val;
207
208 __asm__ __volatile__(
209 "lock; xaddl %1, %0"
210 : "+m"(*__hp(addr)), "+r" (result)
211 :
212 : "memory");
213 return result + (unsigned int)val;
214 }
e040d717 215#if (CAA_BITS_PER_LONG == 64)
0fad128b
MD
216 case 8:
217 {
218 unsigned long result = val;
219
220 __asm__ __volatile__(
221 "lock; xaddq %1, %0"
222 : "+m"(*__hp(addr)), "+r" (result)
223 :
224 : "memory");
225 return result + (unsigned long)val;
226 }
227#endif
228 }
d0bbd9c2
MD
229 /*
230 * generate an illegal instruction. Cannot catch this with
231 * linker tricks when optimizations are disabled.
232 */
0fad128b
MD
233 __asm__ __volatile__("ud2");
234 return 0;
235}
236
e56d99bf
MD
237#define _uatomic_add_return(addr, v) \
238 ((__typeof__(*(addr))) __uatomic_add_return((addr), \
239 caa_cast_long_keep_sign(v), \
240 sizeof(*(addr))))
0fad128b 241
bf33aaea
PB
242/* uatomic_and */
243
244static inline __attribute__((always_inline))
245void __uatomic_and(void *addr, unsigned long val, int len)
246{
247 switch (len) {
248 case 1:
249 {
250 __asm__ __volatile__(
251 "lock; andb %1, %0"
252 : "=m"(*__hp(addr))
253 : "iq" ((unsigned char)val)
254 : "memory");
255 return;
256 }
257 case 2:
258 {
259 __asm__ __volatile__(
260 "lock; andw %1, %0"
261 : "=m"(*__hp(addr))
262 : "ir" ((unsigned short)val)
263 : "memory");
264 return;
265 }
266 case 4:
267 {
268 __asm__ __volatile__(
269 "lock; andl %1, %0"
270 : "=m"(*__hp(addr))
271 : "ir" ((unsigned int)val)
272 : "memory");
273 return;
274 }
275#if (CAA_BITS_PER_LONG == 64)
276 case 8:
277 {
278 __asm__ __volatile__(
279 "lock; andq %1, %0"
280 : "=m"(*__hp(addr))
281 : "er" ((unsigned long)val)
282 : "memory");
283 return;
284 }
285#endif
286 }
d0bbd9c2
MD
287 /*
288 * generate an illegal instruction. Cannot catch this with
289 * linker tricks when optimizations are disabled.
290 */
bf33aaea
PB
291 __asm__ __volatile__("ud2");
292 return;
293}
294
295#define _uatomic_and(addr, v) \
e56d99bf 296 (__uatomic_and((addr), caa_cast_long_keep_sign(v), sizeof(*(addr))))
bf33aaea 297
985b35b1
PB
298/* uatomic_or */
299
300static inline __attribute__((always_inline))
301void __uatomic_or(void *addr, unsigned long val, int len)
302{
303 switch (len) {
304 case 1:
305 {
306 __asm__ __volatile__(
307 "lock; orb %1, %0"
308 : "=m"(*__hp(addr))
309 : "iq" ((unsigned char)val)
310 : "memory");
311 return;
312 }
313 case 2:
314 {
315 __asm__ __volatile__(
316 "lock; orw %1, %0"
317 : "=m"(*__hp(addr))
318 : "ir" ((unsigned short)val)
319 : "memory");
320 return;
321 }
322 case 4:
323 {
324 __asm__ __volatile__(
325 "lock; orl %1, %0"
326 : "=m"(*__hp(addr))
327 : "ir" ((unsigned int)val)
328 : "memory");
329 return;
330 }
331#if (CAA_BITS_PER_LONG == 64)
332 case 8:
333 {
334 __asm__ __volatile__(
335 "lock; orq %1, %0"
336 : "=m"(*__hp(addr))
337 : "er" ((unsigned long)val)
338 : "memory");
339 return;
340 }
341#endif
342 }
d0bbd9c2
MD
343 /*
344 * generate an illegal instruction. Cannot catch this with
345 * linker tricks when optimizations are disabled.
346 */
985b35b1
PB
347 __asm__ __volatile__("ud2");
348 return;
349}
350
351#define _uatomic_or(addr, v) \
e56d99bf 352 (__uatomic_or((addr), caa_cast_long_keep_sign(v), sizeof(*(addr))))
985b35b1 353
8760d94e 354/* uatomic_add */
0114ba7f 355
5dba80f9 356static inline __attribute__((always_inline))
bf9de1b7 357void __uatomic_add(void *addr, unsigned long val, int len)
0114ba7f
MD
358{
359 switch (len) {
cc1be41b
MD
360 case 1:
361 {
362 __asm__ __volatile__(
363 "lock; addb %1, %0"
364 : "=m"(*__hp(addr))
87322fe8
MD
365 : "iq" ((unsigned char)val)
366 : "memory");
cc1be41b
MD
367 return;
368 }
369 case 2:
370 {
371 __asm__ __volatile__(
372 "lock; addw %1, %0"
373 : "=m"(*__hp(addr))
87322fe8
MD
374 : "ir" ((unsigned short)val)
375 : "memory");
cc1be41b
MD
376 return;
377 }
378 case 4:
379 {
380 __asm__ __volatile__(
381 "lock; addl %1, %0"
382 : "=m"(*__hp(addr))
87322fe8
MD
383 : "ir" ((unsigned int)val)
384 : "memory");
cc1be41b
MD
385 return;
386 }
e040d717 387#if (CAA_BITS_PER_LONG == 64)
cc1be41b
MD
388 case 8:
389 {
390 __asm__ __volatile__(
391 "lock; addq %1, %0"
392 : "=m"(*__hp(addr))
87322fe8
MD
393 : "er" ((unsigned long)val)
394 : "memory");
cc1be41b
MD
395 return;
396 }
0114ba7f
MD
397#endif
398 }
d0bbd9c2
MD
399 /*
400 * generate an illegal instruction. Cannot catch this with
401 * linker tricks when optimizations are disabled.
402 */
0114ba7f 403 __asm__ __volatile__("ud2");
a81b8e5e 404 return;
0114ba7f
MD
405}
406
bf9de1b7 407#define _uatomic_add(addr, v) \
e56d99bf 408 (__uatomic_add((addr), caa_cast_long_keep_sign(v), sizeof(*(addr))))
0114ba7f 409
2c5e5fb3 410
ec4e58a3 411/* uatomic_inc */
2c5e5fb3
MD
412
413static inline __attribute__((always_inline))
bf9de1b7 414void __uatomic_inc(void *addr, int len)
2c5e5fb3
MD
415{
416 switch (len) {
417 case 1:
418 {
419 __asm__ __volatile__(
420 "lock; incb %0"
421 : "=m"(*__hp(addr))
422 :
423 : "memory");
424 return;
425 }
426 case 2:
427 {
428 __asm__ __volatile__(
429 "lock; incw %0"
430 : "=m"(*__hp(addr))
431 :
432 : "memory");
433 return;
434 }
435 case 4:
436 {
437 __asm__ __volatile__(
438 "lock; incl %0"
439 : "=m"(*__hp(addr))
440 :
441 : "memory");
442 return;
443 }
e040d717 444#if (CAA_BITS_PER_LONG == 64)
2c5e5fb3
MD
445 case 8:
446 {
447 __asm__ __volatile__(
448 "lock; incq %0"
449 : "=m"(*__hp(addr))
450 :
451 : "memory");
452 return;
453 }
454#endif
455 }
456 /* generate an illegal instruction. Cannot catch this with linker tricks
457 * when optimizations are disabled. */
458 __asm__ __volatile__("ud2");
459 return;
460}
461
bf9de1b7 462#define _uatomic_inc(addr) (__uatomic_inc((addr), sizeof(*(addr))))
2c5e5fb3 463
ec4e58a3 464/* uatomic_dec */
2c5e5fb3
MD
465
466static inline __attribute__((always_inline))
bf9de1b7 467void __uatomic_dec(void *addr, int len)
2c5e5fb3
MD
468{
469 switch (len) {
470 case 1:
471 {
472 __asm__ __volatile__(
473 "lock; decb %0"
474 : "=m"(*__hp(addr))
475 :
476 : "memory");
477 return;
478 }
479 case 2:
480 {
481 __asm__ __volatile__(
482 "lock; decw %0"
483 : "=m"(*__hp(addr))
484 :
485 : "memory");
486 return;
487 }
488 case 4:
489 {
490 __asm__ __volatile__(
491 "lock; decl %0"
492 : "=m"(*__hp(addr))
493 :
494 : "memory");
495 return;
496 }
e040d717 497#if (CAA_BITS_PER_LONG == 64)
2c5e5fb3
MD
498 case 8:
499 {
500 __asm__ __volatile__(
501 "lock; decq %0"
502 : "=m"(*__hp(addr))
503 :
504 : "memory");
505 return;
506 }
507#endif
508 }
d0bbd9c2
MD
509 /*
510 * generate an illegal instruction. Cannot catch this with
511 * linker tricks when optimizations are disabled.
512 */
2c5e5fb3
MD
513 __asm__ __volatile__("ud2");
514 return;
515}
516
bf9de1b7 517#define _uatomic_dec(addr) (__uatomic_dec((addr), sizeof(*(addr))))
0114ba7f 518
e040d717 519#if ((CAA_BITS_PER_LONG != 64) && defined(CONFIG_RCU_COMPAT_ARCH))
02be5561
MD
520extern int __rcu_cas_avail;
521extern int __rcu_cas_init(void);
bf9de1b7
MD
522
523#define UATOMIC_COMPAT(insn) \
a0b7f7ea 524 ((caa_likely(__rcu_cas_avail > 0)) \
bf9de1b7 525 ? (_uatomic_##insn) \
a0b7f7ea 526 : ((caa_unlikely(__rcu_cas_avail < 0) \
02be5561 527 ? ((__rcu_cas_init() > 0) \
bf9de1b7
MD
528 ? (_uatomic_##insn) \
529 : (compat_uatomic_##insn)) \
530 : (compat_uatomic_##insn))))
531
424d4ed5
MD
532/*
533 * We leave the return value so we don't break the ABI, but remove the
534 * return value from the API.
535 */
bf9de1b7
MD
536extern unsigned long _compat_uatomic_set(void *addr,
537 unsigned long _new, int len);
538#define compat_uatomic_set(addr, _new) \
424d4ed5
MD
539 ((void) _compat_uatomic_set((addr), \
540 caa_cast_long_keep_sign(_new), \
541 sizeof(*(addr))))
bf9de1b7
MD
542
543
544extern unsigned long _compat_uatomic_xchg(void *addr,
545 unsigned long _new, int len);
546#define compat_uatomic_xchg(addr, _new) \
547 ((__typeof__(*(addr))) _compat_uatomic_xchg((addr), \
e56d99bf 548 caa_cast_long_keep_sign(_new), \
bf9de1b7 549 sizeof(*(addr))))
7d413817
MD
550
551extern unsigned long _compat_uatomic_cmpxchg(void *addr, unsigned long old,
bf9de1b7
MD
552 unsigned long _new, int len);
553#define compat_uatomic_cmpxchg(addr, old, _new) \
554 ((__typeof__(*(addr))) _compat_uatomic_cmpxchg((addr), \
e56d99bf
MD
555 caa_cast_long_keep_sign(old), \
556 caa_cast_long_keep_sign(_new), \
bf9de1b7 557 sizeof(*(addr))))
7d413817 558
8c43fe72 559extern void _compat_uatomic_and(void *addr, unsigned long _new, int len);
bf33aaea 560#define compat_uatomic_and(addr, v) \
8c43fe72 561 (_compat_uatomic_and((addr), \
e56d99bf 562 caa_cast_long_keep_sign(v), \
8c43fe72 563 sizeof(*(addr))))
bf33aaea 564
8c43fe72 565extern void _compat_uatomic_or(void *addr, unsigned long _new, int len);
985b35b1 566#define compat_uatomic_or(addr, v) \
8c43fe72 567 (_compat_uatomic_or((addr), \
e56d99bf 568 caa_cast_long_keep_sign(v), \
8c43fe72 569 sizeof(*(addr))))
985b35b1 570
28ca843d
PB
571extern unsigned long _compat_uatomic_add_return(void *addr,
572 unsigned long _new, int len);
e56d99bf
MD
573#define compat_uatomic_add_return(addr, v) \
574 ((__typeof__(*(addr))) _compat_uatomic_add_return((addr), \
575 caa_cast_long_keep_sign(v), \
576 sizeof(*(addr))))
bf9de1b7 577
bf9de1b7
MD
578#define compat_uatomic_add(addr, v) \
579 ((void)compat_uatomic_add_return((addr), (v)))
bf9de1b7
MD
580#define compat_uatomic_inc(addr) \
581 (compat_uatomic_add((addr), 1))
582#define compat_uatomic_dec(addr) \
8760d94e 583 (compat_uatomic_add((addr), -1))
bf9de1b7
MD
584
585#else
586#define UATOMIC_COMPAT(insn) (_uatomic_##insn)
7d413817
MD
587#endif
588
bf9de1b7 589/* Read is atomic even in compat mode */
bf9de1b7
MD
590#define uatomic_set(addr, v) \
591 UATOMIC_COMPAT(set(addr, v))
8760d94e 592
bf9de1b7
MD
593#define uatomic_cmpxchg(addr, old, _new) \
594 UATOMIC_COMPAT(cmpxchg(addr, old, _new))
595#define uatomic_xchg(addr, v) \
596 UATOMIC_COMPAT(xchg(addr, v))
2812a2d2 597
bf33aaea
PB
598#define uatomic_and(addr, v) \
599 UATOMIC_COMPAT(and(addr, v))
42e83919
MD
600#define cmm_smp_mb__before_uatomic_and() cmm_barrier()
601#define cmm_smp_mb__after_uatomic_and() cmm_barrier()
2812a2d2 602
985b35b1
PB
603#define uatomic_or(addr, v) \
604 UATOMIC_COMPAT(or(addr, v))
42e83919
MD
605#define cmm_smp_mb__before_uatomic_or() cmm_barrier()
606#define cmm_smp_mb__after_uatomic_or() cmm_barrier()
2812a2d2 607
bf9de1b7
MD
608#define uatomic_add_return(addr, v) \
609 UATOMIC_COMPAT(add_return(addr, v))
8760d94e 610
bf9de1b7 611#define uatomic_add(addr, v) UATOMIC_COMPAT(add(addr, v))
42e83919
MD
612#define cmm_smp_mb__before_uatomic_add() cmm_barrier()
613#define cmm_smp_mb__after_uatomic_add() cmm_barrier()
2812a2d2 614
bf9de1b7 615#define uatomic_inc(addr) UATOMIC_COMPAT(inc(addr))
42e83919
MD
616#define cmm_smp_mb__before_uatomic_inc() cmm_barrier()
617#define cmm_smp_mb__after_uatomic_inc() cmm_barrier()
2812a2d2 618
bf9de1b7 619#define uatomic_dec(addr) UATOMIC_COMPAT(dec(addr))
42e83919
MD
620#define cmm_smp_mb__before_uatomic_dec() cmm_barrier()
621#define cmm_smp_mb__after_uatomic_dec() cmm_barrier()
bf9de1b7 622
67ecffc0 623#ifdef __cplusplus
36bc70a8
MD
624}
625#endif
626
a2e7bf9c 627#include <urcu/uatomic/generic.h>
8760d94e 628
ec4e58a3 629#endif /* _URCU_ARCH_UATOMIC_X86_H */
This page took 0.066355 seconds and 4 git commands to generate.