03a0b2f06afef335822dbf3009233c19461edc55
[urcu.git] / include / urcu / uatomic / x86.h
1 #ifndef _URCU_ARCH_UATOMIC_X86_H
2 #define _URCU_ARCH_UATOMIC_X86_H
3
4 /*
5 * Copyright (c) 1991-1994 by Xerox Corporation. All rights reserved.
6 * Copyright (c) 1996-1999 by Silicon Graphics. All rights reserved.
7 * Copyright (c) 1999-2004 Hewlett-Packard Development Company, L.P.
8 * Copyright (c) 2009 Mathieu Desnoyers
9 *
10 * THIS MATERIAL IS PROVIDED AS IS, WITH ABSOLUTELY NO WARRANTY EXPRESSED
11 * OR IMPLIED. ANY USE IS AT YOUR OWN RISK.
12 *
13 * Permission is hereby granted to use or copy this program
14 * for any purpose, provided the above notices are retained on all copies.
15 * Permission to modify the code and to distribute modified code is granted,
16 * provided the above notices are retained, and a notice that the code was
17 * modified is included with the above copyright notice.
18 *
19 * Code inspired from libuatomic_ops-1.2, inherited in part from the
20 * Boehm-Demers-Weiser conservative garbage collector.
21 */
22
23 #include <urcu/config.h>
24 #include <urcu/compiler.h>
25 #include <urcu/system.h>
26
27 #define UATOMIC_HAS_ATOMIC_BYTE
28 #define UATOMIC_HAS_ATOMIC_SHORT
29
30 #ifdef __cplusplus
31 extern "C" {
32 #endif
33
34 /*
35 * Derived from AO_compare_and_swap() and AO_test_and_set_full().
36 */
37
38 /*
39 * The __hp() macro casts the void pointer "x" to a pointer to a structure
40 * containing an array of char of the specified size. This allows passing the
41 * @addr arguments of the following inline functions as "m" and "+m" operands
42 * to the assembly.
43 */
44
45 #define __hp(size, x) ((struct { char v[size]; } *)(x))
46
47 #define _uatomic_set(addr, v) ((void) CMM_STORE_SHARED(*(addr), (v)))
48
49 /* cmpxchg */
50
51 static inline __attribute__((always_inline))
52 unsigned long __uatomic_cmpxchg(void *addr, unsigned long old,
53 unsigned long _new, int len)
54 {
55 switch (len) {
56 case 1:
57 {
58 unsigned char result = old;
59
60 __asm__ __volatile__(
61 "lock; cmpxchgb %2, %1"
62 : "+a"(result), "+m"(*__hp(len, addr))
63 : "q"((unsigned char)_new)
64 : "memory");
65 return result;
66 }
67 case 2:
68 {
69 unsigned short result = old;
70
71 __asm__ __volatile__(
72 "lock; cmpxchgw %2, %1"
73 : "+a"(result), "+m"(*__hp(len, addr))
74 : "r"((unsigned short)_new)
75 : "memory");
76 return result;
77 }
78 case 4:
79 {
80 unsigned int result = old;
81
82 __asm__ __volatile__(
83 "lock; cmpxchgl %2, %1"
84 : "+a"(result), "+m"(*__hp(len, addr))
85 : "r"((unsigned int)_new)
86 : "memory");
87 return result;
88 }
89 #if (CAA_BITS_PER_LONG == 64)
90 case 8:
91 {
92 unsigned long result = old;
93
94 __asm__ __volatile__(
95 "lock; cmpxchgq %2, %1"
96 : "+a"(result), "+m"(*__hp(len, addr))
97 : "r"((unsigned long)_new)
98 : "memory");
99 return result;
100 }
101 #endif
102 }
103 /*
104 * generate an illegal instruction. Cannot catch this with
105 * linker tricks when optimizations are disabled.
106 */
107 __asm__ __volatile__("ud2");
108 return 0;
109 }
110
111 #define _uatomic_cmpxchg(addr, old, _new) \
112 ((__typeof__(*(addr))) __uatomic_cmpxchg((addr), \
113 caa_cast_long_keep_sign(old), \
114 caa_cast_long_keep_sign(_new),\
115 sizeof(*(addr))))
116
117 /* xchg */
118
119 static inline __attribute__((always_inline))
120 unsigned long __uatomic_exchange(void *addr, unsigned long val, int len)
121 {
122 /* Note: the "xchg" instruction does not need a "lock" prefix. */
123 switch (len) {
124 case 1:
125 {
126 unsigned char result;
127 __asm__ __volatile__(
128 "xchgb %0, %1"
129 : "=q"(result), "+m"(*__hp(len, addr))
130 : "0" ((unsigned char)val)
131 : "memory");
132 return result;
133 }
134 case 2:
135 {
136 unsigned short result;
137 __asm__ __volatile__(
138 "xchgw %0, %1"
139 : "=r"(result), "+m"(*__hp(len, addr))
140 : "0" ((unsigned short)val)
141 : "memory");
142 return result;
143 }
144 case 4:
145 {
146 unsigned int result;
147 __asm__ __volatile__(
148 "xchgl %0, %1"
149 : "=r"(result), "+m"(*__hp(len, addr))
150 : "0" ((unsigned int)val)
151 : "memory");
152 return result;
153 }
154 #if (CAA_BITS_PER_LONG == 64)
155 case 8:
156 {
157 unsigned long result;
158 __asm__ __volatile__(
159 "xchgq %0, %1"
160 : "=r"(result), "+m"(*__hp(len, addr))
161 : "0" ((unsigned long)val)
162 : "memory");
163 return result;
164 }
165 #endif
166 }
167 /*
168 * generate an illegal instruction. Cannot catch this with
169 * linker tricks when optimizations are disabled.
170 */
171 __asm__ __volatile__("ud2");
172 return 0;
173 }
174
175 #define _uatomic_xchg(addr, v) \
176 ((__typeof__(*(addr))) __uatomic_exchange((addr), \
177 caa_cast_long_keep_sign(v), \
178 sizeof(*(addr))))
179
180 /* uatomic_add_return */
181
182 static inline __attribute__((always_inline))
183 unsigned long __uatomic_add_return(void *addr, unsigned long val,
184 int len)
185 {
186 switch (len) {
187 case 1:
188 {
189 unsigned char result = val;
190
191 __asm__ __volatile__(
192 "lock; xaddb %1, %0"
193 : "+m"(*__hp(len, addr)), "+q" (result)
194 :
195 : "memory");
196 return result + (unsigned char)val;
197 }
198 case 2:
199 {
200 unsigned short result = val;
201
202 __asm__ __volatile__(
203 "lock; xaddw %1, %0"
204 : "+m"(*__hp(len, addr)), "+r" (result)
205 :
206 : "memory");
207 return result + (unsigned short)val;
208 }
209 case 4:
210 {
211 unsigned int result = val;
212
213 __asm__ __volatile__(
214 "lock; xaddl %1, %0"
215 : "+m"(*__hp(len, addr)), "+r" (result)
216 :
217 : "memory");
218 return result + (unsigned int)val;
219 }
220 #if (CAA_BITS_PER_LONG == 64)
221 case 8:
222 {
223 unsigned long result = val;
224
225 __asm__ __volatile__(
226 "lock; xaddq %1, %0"
227 : "+m"(*__hp(len, addr)), "+r" (result)
228 :
229 : "memory");
230 return result + (unsigned long)val;
231 }
232 #endif
233 }
234 /*
235 * generate an illegal instruction. Cannot catch this with
236 * linker tricks when optimizations are disabled.
237 */
238 __asm__ __volatile__("ud2");
239 return 0;
240 }
241
242 #define _uatomic_add_return(addr, v) \
243 ((__typeof__(*(addr))) __uatomic_add_return((addr), \
244 caa_cast_long_keep_sign(v), \
245 sizeof(*(addr))))
246
247 /* uatomic_and */
248
249 static inline __attribute__((always_inline))
250 void __uatomic_and(void *addr, unsigned long val, int len)
251 {
252 switch (len) {
253 case 1:
254 {
255 __asm__ __volatile__(
256 "lock; andb %1, %0"
257 : "=m"(*__hp(len, addr))
258 : "iq" ((unsigned char)val)
259 : "memory");
260 return;
261 }
262 case 2:
263 {
264 __asm__ __volatile__(
265 "lock; andw %1, %0"
266 : "=m"(*__hp(len, addr))
267 : "ir" ((unsigned short)val)
268 : "memory");
269 return;
270 }
271 case 4:
272 {
273 __asm__ __volatile__(
274 "lock; andl %1, %0"
275 : "=m"(*__hp(len, addr))
276 : "ir" ((unsigned int)val)
277 : "memory");
278 return;
279 }
280 #if (CAA_BITS_PER_LONG == 64)
281 case 8:
282 {
283 __asm__ __volatile__(
284 "lock; andq %1, %0"
285 : "=m"(*__hp(len, addr))
286 : "er" ((unsigned long)val)
287 : "memory");
288 return;
289 }
290 #endif
291 }
292 /*
293 * generate an illegal instruction. Cannot catch this with
294 * linker tricks when optimizations are disabled.
295 */
296 __asm__ __volatile__("ud2");
297 return;
298 }
299
300 #define _uatomic_and(addr, v) \
301 (__uatomic_and((addr), caa_cast_long_keep_sign(v), sizeof(*(addr))))
302
303 /* uatomic_or */
304
305 static inline __attribute__((always_inline))
306 void __uatomic_or(void *addr, unsigned long val, int len)
307 {
308 switch (len) {
309 case 1:
310 {
311 __asm__ __volatile__(
312 "lock; orb %1, %0"
313 : "=m"(*__hp(len, addr))
314 : "iq" ((unsigned char)val)
315 : "memory");
316 return;
317 }
318 case 2:
319 {
320 __asm__ __volatile__(
321 "lock; orw %1, %0"
322 : "=m"(*__hp(len, addr))
323 : "ir" ((unsigned short)val)
324 : "memory");
325 return;
326 }
327 case 4:
328 {
329 __asm__ __volatile__(
330 "lock; orl %1, %0"
331 : "=m"(*__hp(len, addr))
332 : "ir" ((unsigned int)val)
333 : "memory");
334 return;
335 }
336 #if (CAA_BITS_PER_LONG == 64)
337 case 8:
338 {
339 __asm__ __volatile__(
340 "lock; orq %1, %0"
341 : "=m"(*__hp(len, addr))
342 : "er" ((unsigned long)val)
343 : "memory");
344 return;
345 }
346 #endif
347 }
348 /*
349 * generate an illegal instruction. Cannot catch this with
350 * linker tricks when optimizations are disabled.
351 */
352 __asm__ __volatile__("ud2");
353 return;
354 }
355
356 #define _uatomic_or(addr, v) \
357 (__uatomic_or((addr), caa_cast_long_keep_sign(v), sizeof(*(addr))))
358
359 /* uatomic_add */
360
361 static inline __attribute__((always_inline))
362 void __uatomic_add(void *addr, unsigned long val, int len)
363 {
364 switch (len) {
365 case 1:
366 {
367 __asm__ __volatile__(
368 "lock; addb %1, %0"
369 : "=m"(*__hp(len, addr))
370 : "iq" ((unsigned char)val)
371 : "memory");
372 return;
373 }
374 case 2:
375 {
376 __asm__ __volatile__(
377 "lock; addw %1, %0"
378 : "=m"(*__hp(len, addr))
379 : "ir" ((unsigned short)val)
380 : "memory");
381 return;
382 }
383 case 4:
384 {
385 __asm__ __volatile__(
386 "lock; addl %1, %0"
387 : "=m"(*__hp(len, addr))
388 : "ir" ((unsigned int)val)
389 : "memory");
390 return;
391 }
392 #if (CAA_BITS_PER_LONG == 64)
393 case 8:
394 {
395 __asm__ __volatile__(
396 "lock; addq %1, %0"
397 : "=m"(*__hp(len, addr))
398 : "er" ((unsigned long)val)
399 : "memory");
400 return;
401 }
402 #endif
403 }
404 /*
405 * generate an illegal instruction. Cannot catch this with
406 * linker tricks when optimizations are disabled.
407 */
408 __asm__ __volatile__("ud2");
409 return;
410 }
411
412 #define _uatomic_add(addr, v) \
413 (__uatomic_add((addr), caa_cast_long_keep_sign(v), sizeof(*(addr))))
414
415
416 /* uatomic_inc */
417
418 static inline __attribute__((always_inline))
419 void __uatomic_inc(void *addr, int len)
420 {
421 switch (len) {
422 case 1:
423 {
424 __asm__ __volatile__(
425 "lock; incb %0"
426 : "=m"(*__hp(len, addr))
427 :
428 : "memory");
429 return;
430 }
431 case 2:
432 {
433 __asm__ __volatile__(
434 "lock; incw %0"
435 : "=m"(*__hp(len, addr))
436 :
437 : "memory");
438 return;
439 }
440 case 4:
441 {
442 __asm__ __volatile__(
443 "lock; incl %0"
444 : "=m"(*__hp(len, addr))
445 :
446 : "memory");
447 return;
448 }
449 #if (CAA_BITS_PER_LONG == 64)
450 case 8:
451 {
452 __asm__ __volatile__(
453 "lock; incq %0"
454 : "=m"(*__hp(len, addr))
455 :
456 : "memory");
457 return;
458 }
459 #endif
460 }
461 /* generate an illegal instruction. Cannot catch this with linker tricks
462 * when optimizations are disabled. */
463 __asm__ __volatile__("ud2");
464 return;
465 }
466
467 #define _uatomic_inc(addr) (__uatomic_inc((addr), sizeof(*(addr))))
468
469 /* uatomic_dec */
470
471 static inline __attribute__((always_inline))
472 void __uatomic_dec(void *addr, int len)
473 {
474 switch (len) {
475 case 1:
476 {
477 __asm__ __volatile__(
478 "lock; decb %0"
479 : "=m"(*__hp(len, addr))
480 :
481 : "memory");
482 return;
483 }
484 case 2:
485 {
486 __asm__ __volatile__(
487 "lock; decw %0"
488 : "=m"(*__hp(len, addr))
489 :
490 : "memory");
491 return;
492 }
493 case 4:
494 {
495 __asm__ __volatile__(
496 "lock; decl %0"
497 : "=m"(*__hp(len, addr))
498 :
499 : "memory");
500 return;
501 }
502 #if (CAA_BITS_PER_LONG == 64)
503 case 8:
504 {
505 __asm__ __volatile__(
506 "lock; decq %0"
507 : "=m"(*__hp(len, addr))
508 :
509 : "memory");
510 return;
511 }
512 #endif
513 }
514 /*
515 * generate an illegal instruction. Cannot catch this with
516 * linker tricks when optimizations are disabled.
517 */
518 __asm__ __volatile__("ud2");
519 return;
520 }
521
522 #define _uatomic_dec(addr) (__uatomic_dec((addr), sizeof(*(addr))))
523
524 #if ((CAA_BITS_PER_LONG != 64) && defined(CONFIG_RCU_COMPAT_ARCH))
525 extern int __rcu_cas_avail;
526 extern int __rcu_cas_init(void);
527
528 #define UATOMIC_COMPAT(insn) \
529 ((caa_likely(__rcu_cas_avail > 0)) \
530 ? (_uatomic_##insn) \
531 : ((caa_unlikely(__rcu_cas_avail < 0) \
532 ? ((__rcu_cas_init() > 0) \
533 ? (_uatomic_##insn) \
534 : (compat_uatomic_##insn)) \
535 : (compat_uatomic_##insn))))
536
537 /*
538 * We leave the return value so we don't break the ABI, but remove the
539 * return value from the API.
540 */
541 extern unsigned long _compat_uatomic_set(void *addr,
542 unsigned long _new, int len);
543 #define compat_uatomic_set(addr, _new) \
544 ((void) _compat_uatomic_set((addr), \
545 caa_cast_long_keep_sign(_new), \
546 sizeof(*(addr))))
547
548
549 extern unsigned long _compat_uatomic_xchg(void *addr,
550 unsigned long _new, int len);
551 #define compat_uatomic_xchg(addr, _new) \
552 ((__typeof__(*(addr))) _compat_uatomic_xchg((addr), \
553 caa_cast_long_keep_sign(_new), \
554 sizeof(*(addr))))
555
556 extern unsigned long _compat_uatomic_cmpxchg(void *addr, unsigned long old,
557 unsigned long _new, int len);
558 #define compat_uatomic_cmpxchg(addr, old, _new) \
559 ((__typeof__(*(addr))) _compat_uatomic_cmpxchg((addr), \
560 caa_cast_long_keep_sign(old), \
561 caa_cast_long_keep_sign(_new), \
562 sizeof(*(addr))))
563
564 extern void _compat_uatomic_and(void *addr, unsigned long _new, int len);
565 #define compat_uatomic_and(addr, v) \
566 (_compat_uatomic_and((addr), \
567 caa_cast_long_keep_sign(v), \
568 sizeof(*(addr))))
569
570 extern void _compat_uatomic_or(void *addr, unsigned long _new, int len);
571 #define compat_uatomic_or(addr, v) \
572 (_compat_uatomic_or((addr), \
573 caa_cast_long_keep_sign(v), \
574 sizeof(*(addr))))
575
576 extern unsigned long _compat_uatomic_add_return(void *addr,
577 unsigned long _new, int len);
578 #define compat_uatomic_add_return(addr, v) \
579 ((__typeof__(*(addr))) _compat_uatomic_add_return((addr), \
580 caa_cast_long_keep_sign(v), \
581 sizeof(*(addr))))
582
583 #define compat_uatomic_add(addr, v) \
584 ((void)compat_uatomic_add_return((addr), (v)))
585 #define compat_uatomic_inc(addr) \
586 (compat_uatomic_add((addr), 1))
587 #define compat_uatomic_dec(addr) \
588 (compat_uatomic_add((addr), -1))
589
590 #else
591 #define UATOMIC_COMPAT(insn) (_uatomic_##insn)
592 #endif
593
594 /* Read is atomic even in compat mode */
595 #define uatomic_set(addr, v) \
596 UATOMIC_COMPAT(set(addr, v))
597
598 #define uatomic_cmpxchg(addr, old, _new) \
599 UATOMIC_COMPAT(cmpxchg(addr, old, _new))
600 #define uatomic_xchg(addr, v) \
601 UATOMIC_COMPAT(xchg(addr, v))
602
603 #define uatomic_and(addr, v) \
604 UATOMIC_COMPAT(and(addr, v))
605 #define cmm_smp_mb__before_uatomic_and() cmm_barrier()
606 #define cmm_smp_mb__after_uatomic_and() cmm_barrier()
607
608 #define uatomic_or(addr, v) \
609 UATOMIC_COMPAT(or(addr, v))
610 #define cmm_smp_mb__before_uatomic_or() cmm_barrier()
611 #define cmm_smp_mb__after_uatomic_or() cmm_barrier()
612
613 #define uatomic_add_return(addr, v) \
614 UATOMIC_COMPAT(add_return(addr, v))
615
616 #define uatomic_add(addr, v) UATOMIC_COMPAT(add(addr, v))
617 #define cmm_smp_mb__before_uatomic_add() cmm_barrier()
618 #define cmm_smp_mb__after_uatomic_add() cmm_barrier()
619
620 #define uatomic_inc(addr) UATOMIC_COMPAT(inc(addr))
621 #define cmm_smp_mb__before_uatomic_inc() cmm_barrier()
622 #define cmm_smp_mb__after_uatomic_inc() cmm_barrier()
623
624 #define uatomic_dec(addr) UATOMIC_COMPAT(dec(addr))
625 #define cmm_smp_mb__before_uatomic_dec() cmm_barrier()
626 #define cmm_smp_mb__after_uatomic_dec() cmm_barrier()
627
628 #ifdef __cplusplus
629 }
630 #endif
631
632 #include <urcu/uatomic/generic.h>
633
634 #endif /* _URCU_ARCH_UATOMIC_X86_H */
This page took 0.040347 seconds and 3 git commands to generate.