e9f2f78275f472618d2ad302205eda8bb37629c8
[urcu.git] / include / urcu / uatomic / x86.h
1 #ifndef _URCU_ARCH_UATOMIC_X86_H
2 #define _URCU_ARCH_UATOMIC_X86_H
3
4 /*
5 * Copyright (c) 1991-1994 by Xerox Corporation. All rights reserved.
6 * Copyright (c) 1996-1999 by Silicon Graphics. All rights reserved.
7 * Copyright (c) 1999-2004 Hewlett-Packard Development Company, L.P.
8 * Copyright (c) 2009 Mathieu Desnoyers
9 *
10 * THIS MATERIAL IS PROVIDED AS IS, WITH ABSOLUTELY NO WARRANTY EXPRESSED
11 * OR IMPLIED. ANY USE IS AT YOUR OWN RISK.
12 *
13 * Permission is hereby granted to use or copy this program
14 * for any purpose, provided the above notices are retained on all copies.
15 * Permission to modify the code and to distribute modified code is granted,
16 * provided the above notices are retained, and a notice that the code was
17 * modified is included with the above copyright notice.
18 *
19 * Code inspired from libuatomic_ops-1.2, inherited in part from the
20 * Boehm-Demers-Weiser conservative garbage collector.
21 */
22
23 #include <urcu/arch.h>
24 #include <urcu/config.h>
25 #include <urcu/compiler.h>
26 #include <urcu/system.h>
27
28 #define UATOMIC_HAS_ATOMIC_BYTE
29 #define UATOMIC_HAS_ATOMIC_SHORT
30
31 #ifdef __cplusplus
32 extern "C" {
33 #endif
34
35 /*
36 * Derived from AO_compare_and_swap() and AO_test_and_set_full().
37 */
38
39 /*
40 * The __hp() macro casts the void pointer "x" to a pointer to a structure
41 * containing an array of char of the specified size. This allows passing the
42 * @addr arguments of the following inline functions as "m" and "+m" operands
43 * to the assembly.
44 */
45
46 #define __hp(size, x) ((struct { char v[size]; } *)(x))
47
48 #define _uatomic_set(addr, v) ((void) CMM_STORE_SHARED(*(addr), (v)))
49
50 /* cmpxchg */
51
52 static inline __attribute__((always_inline))
53 unsigned long __uatomic_cmpxchg(void *addr, unsigned long old,
54 unsigned long _new, int len)
55 {
56 switch (len) {
57 case 1:
58 {
59 unsigned char result = old;
60
61 __asm__ __volatile__(
62 "lock; cmpxchgb %2, %1"
63 : "+a"(result), "+m"(*__hp(len, addr))
64 : "q"((unsigned char)_new)
65 : "memory");
66 return result;
67 }
68 case 2:
69 {
70 unsigned short result = old;
71
72 __asm__ __volatile__(
73 "lock; cmpxchgw %2, %1"
74 : "+a"(result), "+m"(*__hp(len, addr))
75 : "r"((unsigned short)_new)
76 : "memory");
77 return result;
78 }
79 case 4:
80 {
81 unsigned int result = old;
82
83 __asm__ __volatile__(
84 "lock; cmpxchgl %2, %1"
85 : "+a"(result), "+m"(*__hp(len, addr))
86 : "r"((unsigned int)_new)
87 : "memory");
88 return result;
89 }
90 #if (CAA_BITS_PER_LONG == 64)
91 case 8:
92 {
93 unsigned long result = old;
94
95 __asm__ __volatile__(
96 "lock; cmpxchgq %2, %1"
97 : "+a"(result), "+m"(*__hp(len, addr))
98 : "r"((unsigned long)_new)
99 : "memory");
100 return result;
101 }
102 #endif
103 }
104 /*
105 * generate an illegal instruction. Cannot catch this with
106 * linker tricks when optimizations are disabled.
107 */
108 __asm__ __volatile__("ud2");
109 return 0;
110 }
111
112 #define _uatomic_cmpxchg(addr, old, _new) \
113 ((__typeof__(*(addr))) __uatomic_cmpxchg((addr), \
114 caa_cast_long_keep_sign(old), \
115 caa_cast_long_keep_sign(_new),\
116 sizeof(*(addr))))
117
118 /* xchg */
119
120 static inline __attribute__((always_inline))
121 unsigned long __uatomic_exchange(void *addr, unsigned long val, int len)
122 {
123 /* Note: the "xchg" instruction does not need a "lock" prefix. */
124 switch (len) {
125 case 1:
126 {
127 unsigned char result;
128 __asm__ __volatile__(
129 "xchgb %0, %1"
130 : "=q"(result), "+m"(*__hp(len, addr))
131 : "0" ((unsigned char)val)
132 : "memory");
133 return result;
134 }
135 case 2:
136 {
137 unsigned short result;
138 __asm__ __volatile__(
139 "xchgw %0, %1"
140 : "=r"(result), "+m"(*__hp(len, addr))
141 : "0" ((unsigned short)val)
142 : "memory");
143 return result;
144 }
145 case 4:
146 {
147 unsigned int result;
148 __asm__ __volatile__(
149 "xchgl %0, %1"
150 : "=r"(result), "+m"(*__hp(len, addr))
151 : "0" ((unsigned int)val)
152 : "memory");
153 return result;
154 }
155 #if (CAA_BITS_PER_LONG == 64)
156 case 8:
157 {
158 unsigned long result;
159 __asm__ __volatile__(
160 "xchgq %0, %1"
161 : "=r"(result), "+m"(*__hp(len, addr))
162 : "0" ((unsigned long)val)
163 : "memory");
164 return result;
165 }
166 #endif
167 }
168 /*
169 * generate an illegal instruction. Cannot catch this with
170 * linker tricks when optimizations are disabled.
171 */
172 __asm__ __volatile__("ud2");
173 return 0;
174 }
175
176 #define _uatomic_xchg(addr, v) \
177 ((__typeof__(*(addr))) __uatomic_exchange((addr), \
178 caa_cast_long_keep_sign(v), \
179 sizeof(*(addr))))
180
181 /* uatomic_add_return */
182
183 static inline __attribute__((always_inline))
184 unsigned long __uatomic_add_return(void *addr, unsigned long val,
185 int len)
186 {
187 switch (len) {
188 case 1:
189 {
190 unsigned char result = val;
191
192 __asm__ __volatile__(
193 "lock; xaddb %1, %0"
194 : "+m"(*__hp(len, addr)), "+q" (result)
195 :
196 : "memory");
197 return result + (unsigned char)val;
198 }
199 case 2:
200 {
201 unsigned short result = val;
202
203 __asm__ __volatile__(
204 "lock; xaddw %1, %0"
205 : "+m"(*__hp(len, addr)), "+r" (result)
206 :
207 : "memory");
208 return result + (unsigned short)val;
209 }
210 case 4:
211 {
212 unsigned int result = val;
213
214 __asm__ __volatile__(
215 "lock; xaddl %1, %0"
216 : "+m"(*__hp(len, addr)), "+r" (result)
217 :
218 : "memory");
219 return result + (unsigned int)val;
220 }
221 #if (CAA_BITS_PER_LONG == 64)
222 case 8:
223 {
224 unsigned long result = val;
225
226 __asm__ __volatile__(
227 "lock; xaddq %1, %0"
228 : "+m"(*__hp(len, addr)), "+r" (result)
229 :
230 : "memory");
231 return result + (unsigned long)val;
232 }
233 #endif
234 }
235 /*
236 * generate an illegal instruction. Cannot catch this with
237 * linker tricks when optimizations are disabled.
238 */
239 __asm__ __volatile__("ud2");
240 return 0;
241 }
242
243 #define _uatomic_add_return(addr, v) \
244 ((__typeof__(*(addr))) __uatomic_add_return((addr), \
245 caa_cast_long_keep_sign(v), \
246 sizeof(*(addr))))
247
248 /* uatomic_and */
249
250 static inline __attribute__((always_inline))
251 void __uatomic_and(void *addr, unsigned long val, int len)
252 {
253 switch (len) {
254 case 1:
255 {
256 __asm__ __volatile__(
257 "lock; andb %1, %0"
258 : "=m"(*__hp(len, addr))
259 : "iq" ((unsigned char)val)
260 : "memory");
261 return;
262 }
263 case 2:
264 {
265 __asm__ __volatile__(
266 "lock; andw %1, %0"
267 : "=m"(*__hp(len, addr))
268 : "ir" ((unsigned short)val)
269 : "memory");
270 return;
271 }
272 case 4:
273 {
274 __asm__ __volatile__(
275 "lock; andl %1, %0"
276 : "=m"(*__hp(len, addr))
277 : "ir" ((unsigned int)val)
278 : "memory");
279 return;
280 }
281 #if (CAA_BITS_PER_LONG == 64)
282 case 8:
283 {
284 __asm__ __volatile__(
285 "lock; andq %1, %0"
286 : "=m"(*__hp(len, addr))
287 : "er" ((unsigned long)val)
288 : "memory");
289 return;
290 }
291 #endif
292 }
293 /*
294 * generate an illegal instruction. Cannot catch this with
295 * linker tricks when optimizations are disabled.
296 */
297 __asm__ __volatile__("ud2");
298 return;
299 }
300
301 #define _uatomic_and(addr, v) \
302 (__uatomic_and((addr), caa_cast_long_keep_sign(v), sizeof(*(addr))))
303
304 /* uatomic_or */
305
306 static inline __attribute__((always_inline))
307 void __uatomic_or(void *addr, unsigned long val, int len)
308 {
309 switch (len) {
310 case 1:
311 {
312 __asm__ __volatile__(
313 "lock; orb %1, %0"
314 : "=m"(*__hp(len, addr))
315 : "iq" ((unsigned char)val)
316 : "memory");
317 return;
318 }
319 case 2:
320 {
321 __asm__ __volatile__(
322 "lock; orw %1, %0"
323 : "=m"(*__hp(len, addr))
324 : "ir" ((unsigned short)val)
325 : "memory");
326 return;
327 }
328 case 4:
329 {
330 __asm__ __volatile__(
331 "lock; orl %1, %0"
332 : "=m"(*__hp(len, addr))
333 : "ir" ((unsigned int)val)
334 : "memory");
335 return;
336 }
337 #if (CAA_BITS_PER_LONG == 64)
338 case 8:
339 {
340 __asm__ __volatile__(
341 "lock; orq %1, %0"
342 : "=m"(*__hp(len, addr))
343 : "er" ((unsigned long)val)
344 : "memory");
345 return;
346 }
347 #endif
348 }
349 /*
350 * generate an illegal instruction. Cannot catch this with
351 * linker tricks when optimizations are disabled.
352 */
353 __asm__ __volatile__("ud2");
354 return;
355 }
356
357 #define _uatomic_or(addr, v) \
358 (__uatomic_or((addr), caa_cast_long_keep_sign(v), sizeof(*(addr))))
359
360 /* uatomic_add */
361
362 static inline __attribute__((always_inline))
363 void __uatomic_add(void *addr, unsigned long val, int len)
364 {
365 switch (len) {
366 case 1:
367 {
368 __asm__ __volatile__(
369 "lock; addb %1, %0"
370 : "=m"(*__hp(len, addr))
371 : "iq" ((unsigned char)val)
372 : "memory");
373 return;
374 }
375 case 2:
376 {
377 __asm__ __volatile__(
378 "lock; addw %1, %0"
379 : "=m"(*__hp(len, addr))
380 : "ir" ((unsigned short)val)
381 : "memory");
382 return;
383 }
384 case 4:
385 {
386 __asm__ __volatile__(
387 "lock; addl %1, %0"
388 : "=m"(*__hp(len, addr))
389 : "ir" ((unsigned int)val)
390 : "memory");
391 return;
392 }
393 #if (CAA_BITS_PER_LONG == 64)
394 case 8:
395 {
396 __asm__ __volatile__(
397 "lock; addq %1, %0"
398 : "=m"(*__hp(len, addr))
399 : "er" ((unsigned long)val)
400 : "memory");
401 return;
402 }
403 #endif
404 }
405 /*
406 * generate an illegal instruction. Cannot catch this with
407 * linker tricks when optimizations are disabled.
408 */
409 __asm__ __volatile__("ud2");
410 return;
411 }
412
413 #define _uatomic_add(addr, v) \
414 (__uatomic_add((addr), caa_cast_long_keep_sign(v), sizeof(*(addr))))
415
416
417 /* uatomic_inc */
418
419 static inline __attribute__((always_inline))
420 void __uatomic_inc(void *addr, int len)
421 {
422 switch (len) {
423 case 1:
424 {
425 __asm__ __volatile__(
426 "lock; incb %0"
427 : "=m"(*__hp(len, addr))
428 :
429 : "memory");
430 return;
431 }
432 case 2:
433 {
434 __asm__ __volatile__(
435 "lock; incw %0"
436 : "=m"(*__hp(len, addr))
437 :
438 : "memory");
439 return;
440 }
441 case 4:
442 {
443 __asm__ __volatile__(
444 "lock; incl %0"
445 : "=m"(*__hp(len, addr))
446 :
447 : "memory");
448 return;
449 }
450 #if (CAA_BITS_PER_LONG == 64)
451 case 8:
452 {
453 __asm__ __volatile__(
454 "lock; incq %0"
455 : "=m"(*__hp(len, addr))
456 :
457 : "memory");
458 return;
459 }
460 #endif
461 }
462 /* generate an illegal instruction. Cannot catch this with linker tricks
463 * when optimizations are disabled. */
464 __asm__ __volatile__("ud2");
465 return;
466 }
467
468 #define _uatomic_inc(addr) (__uatomic_inc((addr), sizeof(*(addr))))
469
470 /* uatomic_dec */
471
472 static inline __attribute__((always_inline))
473 void __uatomic_dec(void *addr, int len)
474 {
475 switch (len) {
476 case 1:
477 {
478 __asm__ __volatile__(
479 "lock; decb %0"
480 : "=m"(*__hp(len, addr))
481 :
482 : "memory");
483 return;
484 }
485 case 2:
486 {
487 __asm__ __volatile__(
488 "lock; decw %0"
489 : "=m"(*__hp(len, addr))
490 :
491 : "memory");
492 return;
493 }
494 case 4:
495 {
496 __asm__ __volatile__(
497 "lock; decl %0"
498 : "=m"(*__hp(len, addr))
499 :
500 : "memory");
501 return;
502 }
503 #if (CAA_BITS_PER_LONG == 64)
504 case 8:
505 {
506 __asm__ __volatile__(
507 "lock; decq %0"
508 : "=m"(*__hp(len, addr))
509 :
510 : "memory");
511 return;
512 }
513 #endif
514 }
515 /*
516 * generate an illegal instruction. Cannot catch this with
517 * linker tricks when optimizations are disabled.
518 */
519 __asm__ __volatile__("ud2");
520 return;
521 }
522
523 #define _uatomic_dec(addr) (__uatomic_dec((addr), sizeof(*(addr))))
524
525 #if ((CAA_BITS_PER_LONG != 64) && defined(URCU_ARCH_I386))
526
527 /* For backwards compat */
528 #define CONFIG_RCU_COMPAT_ARCH 1
529
530 extern int __rcu_cas_avail;
531 extern int __rcu_cas_init(void);
532
533 #define UATOMIC_COMPAT(insn) \
534 ((caa_likely(__rcu_cas_avail > 0)) \
535 ? (_uatomic_##insn) \
536 : ((caa_unlikely(__rcu_cas_avail < 0) \
537 ? ((__rcu_cas_init() > 0) \
538 ? (_uatomic_##insn) \
539 : (compat_uatomic_##insn)) \
540 : (compat_uatomic_##insn))))
541
542 /*
543 * We leave the return value so we don't break the ABI, but remove the
544 * return value from the API.
545 */
546 extern unsigned long _compat_uatomic_set(void *addr,
547 unsigned long _new, int len);
548 #define compat_uatomic_set(addr, _new) \
549 ((void) _compat_uatomic_set((addr), \
550 caa_cast_long_keep_sign(_new), \
551 sizeof(*(addr))))
552
553
554 extern unsigned long _compat_uatomic_xchg(void *addr,
555 unsigned long _new, int len);
556 #define compat_uatomic_xchg(addr, _new) \
557 ((__typeof__(*(addr))) _compat_uatomic_xchg((addr), \
558 caa_cast_long_keep_sign(_new), \
559 sizeof(*(addr))))
560
561 extern unsigned long _compat_uatomic_cmpxchg(void *addr, unsigned long old,
562 unsigned long _new, int len);
563 #define compat_uatomic_cmpxchg(addr, old, _new) \
564 ((__typeof__(*(addr))) _compat_uatomic_cmpxchg((addr), \
565 caa_cast_long_keep_sign(old), \
566 caa_cast_long_keep_sign(_new), \
567 sizeof(*(addr))))
568
569 extern void _compat_uatomic_and(void *addr, unsigned long _new, int len);
570 #define compat_uatomic_and(addr, v) \
571 (_compat_uatomic_and((addr), \
572 caa_cast_long_keep_sign(v), \
573 sizeof(*(addr))))
574
575 extern void _compat_uatomic_or(void *addr, unsigned long _new, int len);
576 #define compat_uatomic_or(addr, v) \
577 (_compat_uatomic_or((addr), \
578 caa_cast_long_keep_sign(v), \
579 sizeof(*(addr))))
580
581 extern unsigned long _compat_uatomic_add_return(void *addr,
582 unsigned long _new, int len);
583 #define compat_uatomic_add_return(addr, v) \
584 ((__typeof__(*(addr))) _compat_uatomic_add_return((addr), \
585 caa_cast_long_keep_sign(v), \
586 sizeof(*(addr))))
587
588 #define compat_uatomic_add(addr, v) \
589 ((void)compat_uatomic_add_return((addr), (v)))
590 #define compat_uatomic_inc(addr) \
591 (compat_uatomic_add((addr), 1))
592 #define compat_uatomic_dec(addr) \
593 (compat_uatomic_add((addr), -1))
594
595 #else
596 #define UATOMIC_COMPAT(insn) (_uatomic_##insn)
597 #endif
598
599 /* Read is atomic even in compat mode */
600 #define uatomic_set(addr, v) \
601 UATOMIC_COMPAT(set(addr, v))
602
603 #define uatomic_cmpxchg(addr, old, _new) \
604 UATOMIC_COMPAT(cmpxchg(addr, old, _new))
605 #define uatomic_xchg(addr, v) \
606 UATOMIC_COMPAT(xchg(addr, v))
607
608 #define uatomic_and(addr, v) \
609 UATOMIC_COMPAT(and(addr, v))
610 #define cmm_smp_mb__before_uatomic_and() cmm_barrier()
611 #define cmm_smp_mb__after_uatomic_and() cmm_barrier()
612
613 #define uatomic_or(addr, v) \
614 UATOMIC_COMPAT(or(addr, v))
615 #define cmm_smp_mb__before_uatomic_or() cmm_barrier()
616 #define cmm_smp_mb__after_uatomic_or() cmm_barrier()
617
618 #define uatomic_add_return(addr, v) \
619 UATOMIC_COMPAT(add_return(addr, v))
620
621 #define uatomic_add(addr, v) UATOMIC_COMPAT(add(addr, v))
622 #define cmm_smp_mb__before_uatomic_add() cmm_barrier()
623 #define cmm_smp_mb__after_uatomic_add() cmm_barrier()
624
625 #define uatomic_inc(addr) UATOMIC_COMPAT(inc(addr))
626 #define cmm_smp_mb__before_uatomic_inc() cmm_barrier()
627 #define cmm_smp_mb__after_uatomic_inc() cmm_barrier()
628
629 #define uatomic_dec(addr) UATOMIC_COMPAT(dec(addr))
630 #define cmm_smp_mb__before_uatomic_dec() cmm_barrier()
631 #define cmm_smp_mb__after_uatomic_dec() cmm_barrier()
632
633 #ifdef __cplusplus
634 }
635 #endif
636
637 #include <urcu/uatomic/generic.h>
638
639 #endif /* _URCU_ARCH_UATOMIC_X86_H */
This page took 0.041677 seconds and 3 git commands to generate.