+unsigned long _atomic_add_return(void *addr, unsigned long val,
+ int len)
+{
+ switch (len) {
+ case 1:
+ {
+ unsigned char result = val;
+
+ __asm__ __volatile__(
+ "lock; xaddb %1, %0"
+ : "+m"(*__hp(addr)), "+q" (result)
+ :
+ : "memory");
+ return result + (unsigned char)val;
+ }
+ case 2:
+ {
+ unsigned short result = val;
+
+ __asm__ __volatile__(
+ "lock; xaddw %1, %0"
+ : "+m"(*__hp(addr)), "+r" (result)
+ :
+ : "memory");
+ return result + (unsigned short)val;
+ }
+ case 4:
+ {
+ unsigned int result = val;
+
+ __asm__ __volatile__(
+ "lock; xaddl %1, %0"
+ : "+m"(*__hp(addr)), "+r" (result)
+ :
+ : "memory");
+ return result + (unsigned int)val;
+ }
+#if (BITS_PER_LONG == 64)
+ case 8:
+ {
+ unsigned long result = val;
+
+ __asm__ __volatile__(
+ "lock; xaddq %1, %0"
+ : "+m"(*__hp(addr)), "+r" (result)
+ :
+ : "memory");
+ return result + (unsigned long)val;
+ }
+#endif
+ }
+ /* generate an illegal instruction. Cannot catch this with linker tricks
+ * when optimizations are disabled. */
+ __asm__ __volatile__("ud2");
+ return 0;
+}
+
+#define atomic_add_return(addr, v) \
+ ((__typeof__(*(addr))) _atomic_add_return((addr), \
+ (unsigned long)(v), \
+ sizeof(*(addr))))
+
+#define atomic_sub_return(addr, v) atomic_add_return((addr), -(v))
+
+/* atomic_add, atomic_sub */
+
+static inline __attribute__((always_inline))
+void _atomic_add(void *addr, unsigned long val, int len)