From: Mathieu Desnoyers <mathieu.desnoyers@polymtl.ca>
Date: Mon, 28 Sep 2009 19:54:13 +0000 (-0400)
Subject: Cleanup headers
X-Git-Tag: v0.1~22
X-Git-Url: https://git.lttng.org/?p=urcu.git;a=commitdiff_plain;h=ec4e58a3aba2084440012f8ccac3a31eb6101183

Cleanup headers

* atomic_ -> uatomic (to remove namespace clash with libkcompat)
* moved arch.h, compiler.h, arch_uatomic.h to
  /usr/include/urcu/

to make sure we do not pollute system headers.

Also add call_rcu() documentation to README.

Signed-off-by: Mathieu Desnoyers <mathieu.desnoyers@polymtl.ca>
---

diff --git a/Makefile.inc b/Makefile.inc
index ae62f54..cde31cc 100644
--- a/Makefile.inc
+++ b/Makefile.inc
@@ -16,15 +16,15 @@ ifeq (${ARCHTYPE},)
 	@exit 1
 endif
 
-arch.h: arch_${ARCHTYPE}.h
-	cp -f arch_${ARCHTYPE}.h arch.h
+urcu/arch.h: urcu/arch_${ARCHTYPE}.h
+	cp -f urcu/arch_${ARCHTYPE}.h urcu/arch.h
 
-arch_atomic.h: arch_atomic_${ARCHTYPE}.h
-	cp -f arch_atomic_${ARCHTYPE}.h arch_atomic.h
+urcu/arch_uatomic.h: urcu/arch_uatomic_${ARCHTYPE}.h
+	cp -f urcu/arch_uatomic_${ARCHTYPE}.h urcu/arch_uatomic.h
 
-urcu.h: arch.h arch_atomic.h
+urcu.h: urcu/arch.h urcu/arch_uatomic.h
 
-urcu-qsbr.h: arch.h arch_atomic.h
+urcu-qsbr.h: urcu/arch.h urcu/arch_uatomic.h
 
 urcu.o: urcu.c urcu.h
 	$(CC) -fPIC ${CFLAGS} -c -o $@ $(SRC_DEP)
@@ -61,12 +61,14 @@ subdirs:
 install: liburcu.so
 	cp -f liburcu.so liburcu-mb.so liburcu-qsbr.so liburcu-defer.so	\
 			/usr/lib/
-	cp -f arch.h arch_atomic.h compiler.h				\
-		urcu.h urcu-static.h					\
+	mkdir -p /usr/include/urcu
+	cp -f urcu/arch.h urcu/arch_uatomic.h urcu/compiler.h		\
+			/usr/include/urcu/
+	cp -f urcu.h urcu-static.h					\
 		urcu-qsbr.h urcu-qsbr-static.h				\
 		urcu-defer.h urcu-defer-static.h			\
 			/usr/include/
 
 clean:
-	rm -f *.o *.so arch.h arch_atomic.h
+	rm -f *.o *.so urcu/arch.h urcu/arch_uatomic.h
 	-for d in ${DIRS}; do cd $${d}; ${MAKE} clean; done
diff --git a/README b/README
index b47fe48..e5c04b1 100644
--- a/README
+++ b/README
@@ -70,6 +70,15 @@ Writing
 	After, synchronize_rcu() must be called. When it returns, the old
 	values are not in usage anymore.
 
+Usage of liburcu-defer
+
+	* #include <urcu-defer.h>
+	* Link with "-lurcu-defer"
+	* Provides call_rcu() primitive to enqueue delayed callbacks. Queued
+	  callbacks are executed in batch periodically after a grace period.
+	  Do _not_ use call_rcu() within a read-side critical section, because
+	  it may call synchronize_rcu() if the thread queue is full.
+
 Being careful with signals
 
 	The liburcu library uses signals internally. The signal handler is
diff --git a/arch_atomic_ppc.h b/arch_atomic_ppc.h
deleted file mode 100644
index 54c5f5c..0000000
--- a/arch_atomic_ppc.h
+++ /dev/null
@@ -1,235 +0,0 @@
-#ifndef _ARCH_ATOMIC_PPC_H
-#define _ARCH_ATOMIC_PPC_H
-
-/* 
- * Copyright (c) 1991-1994 by Xerox Corporation.  All rights reserved.
- * Copyright (c) 1996-1999 by Silicon Graphics.  All rights reserved.
- * Copyright (c) 1999-2004 Hewlett-Packard Development Company, L.P.
- * Copyright (c) 2009      Mathieu Desnoyers
- *
- * THIS MATERIAL IS PROVIDED AS IS, WITH ABSOLUTELY NO WARRANTY EXPRESSED
- * OR IMPLIED.  ANY USE IS AT YOUR OWN RISK.
- *
- * Permission is hereby granted to use or copy this program
- * for any purpose,  provided the above notices are retained on all copies.
- * Permission to modify the code and to distribute modified code is granted,
- * provided the above notices are retained, and a notice that the code was
- * modified is included with the above copyright notice.
- *
- * Code inspired from libatomic_ops-1.2, inherited in part from the
- * Boehm-Demers-Weiser conservative garbage collector.
- */
-
-#include <compiler.h>
-
-#ifndef __SIZEOF_LONG__
-#ifdef __powerpc64__
-#define __SIZEOF_LONG__ 8
-#else
-#define __SIZEOF_LONG__ 4
-#endif
-#endif
-
-#ifndef BITS_PER_LONG
-#define BITS_PER_LONG	(__SIZEOF_LONG__ * 8)
-#endif
-
-#define ILLEGAL_INSTR	".long	0xd00d00"
-
-#ifndef _INCLUDE_API_H
-
-#define atomic_set(addr, v)				\
-do {							\
-	ACCESS_ONCE(*(addr)) = (v);			\
-} while (0)
-
-#define atomic_read(addr)	ACCESS_ONCE(*(addr))
-
-/*
- * Using a isync as second barrier for exchange to provide acquire semantic.
- * According to atomic_ops/sysdeps/gcc/powerpc.h, the documentation is "fairly
- * explicit that this also has acquire semantics."
- * Derived from AO_compare_and_swap(), but removed the comparison.
- */
-
-/* xchg */
-
-static inline __attribute__((always_inline))
-unsigned long _atomic_exchange(void *addr, unsigned long val, int len)
-{
-	switch (len) {
-	case 4:
-	{
-		unsigned int result;
-
-		__asm__ __volatile__(
-			"lwsync\n"
-		"1:\t"	"lwarx %0,0,%1\n"	/* load and reserve */
-			"stwcx. %2,0,%1\n"	/* else store conditional */
-			"bne- 1b\n"	 	/* retry if lost reservation */
-			"isync\n"
-				: "=&r"(result)
-				: "r"(addr), "r"(val)
-				: "memory", "cc");
-
-		return result;
-	}
-#if (BITS_PER_LONG == 64)
-	case 8:
-	{
-		unsigned long result;
-
-		__asm__ __volatile__(
-			"lwsync\n"
-		"1:\t"	"ldarx %0,0,%1\n"	/* load and reserve */
-			"stdcx. %2,0,%1\n"	/* else store conditional */
-			"bne- 1b\n"	 	/* retry if lost reservation */
-			"isync\n"
-				: "=&r"(result)
-				: "r"(addr), "r"(val)
-				: "memory", "cc");
-
-		return result;
-	}
-#endif
-	}
-	/* generate an illegal instruction. Cannot catch this with linker tricks
-	 * when optimizations are disabled. */
-	__asm__ __volatile__(ILLEGAL_INSTR);
-	return 0;
-}
-
-#define xchg(addr, v)							    \
-	((__typeof__(*(addr))) _atomic_exchange((addr), (unsigned long)(v), \
-						sizeof(*(addr))))
-/* cmpxchg */
-
-static inline __attribute__((always_inline))
-unsigned long _atomic_cmpxchg(void *addr, unsigned long old,
-			      unsigned long _new, int len)
-{
-	switch (len) {
-	case 4:
-	{
-		unsigned int old_val;
-
-		__asm__ __volatile__(
-			"lwsync\n"
-		"1:\t"	"lwarx %0,0,%1\n"	/* load and reserve */
-			"cmpd %0,%3\n"		/* if load is not equal to */
-			"bne 2f\n"		/* old, fail */
-			"stwcx. %2,0,%1\n"	/* else store conditional */
-			"bne- 1b\n"	 	/* retry if lost reservation */
-			"isync\n"
-		"2:\n"
-				: "=&r"(old_val)
-				: "r"(addr), "r"((unsigned int)_new),
-				  "r"((unsigned int)old)
-				: "memory", "cc");
-
-		return old_val;
-	}
-#if (BITS_PER_LONG == 64)
-	case 8:
-	{
-		unsigned long old_val;
-
-		__asm__ __volatile__(
-			"lwsync\n"
-		"1:\t"	"ldarx %0,0,%1\n"	/* load and reserve */
-			"cmpd %0,%3\n"		/* if load is not equal to */
-			"bne 2f\n"		/* old, fail */
-			"stdcx. %2,0,%1\n"	/* else store conditional */
-			"bne- 1b\n"	 	/* retry if lost reservation */
-			"isync\n"
-		"2:\n"
-				: "=&r"(old_val),
-				: "r"(addr), "r"((unsigned long)_new),
-				  "r"((unsigned long)old)
-				: "memory", "cc");
-
-		return old_val;
-	}
-#endif
-	}
-	/* generate an illegal instruction. Cannot catch this with linker tricks
-	 * when optimizations are disabled. */
-	__asm__ __volatile__(ILLEGAL_INSTR);
-	return 0;
-}
-
-
-#define cmpxchg(addr, old, _new)					    \
-	((__typeof__(*(addr))) _atomic_cmpxchg((addr), (unsigned long)(old),\
-						(unsigned long)(_new), 	    \
-						sizeof(*(addr))))
-
-/* atomic_add_return */
-
-static inline __attribute__((always_inline))
-unsigned long _atomic_add_return(void *addr, unsigned long val,
-				 int len)
-{
-	switch (len) {
-	case 4:
-	{
-		unsigned int result;
-
-		__asm__ __volatile__(
-			"lwsync\n"
-		"1:\t"	"lwarx %0,0,%1\n"	/* load and reserve */
-			"add %0,%2,%0\n"	/* add val to value loaded */
-			"stwcx. %0,0,%1\n"	/* store conditional */
-			"bne- 1b\n"	 	/* retry if lost reservation */
-			"isync\n"
-				: "=&r"(result)
-				: "r"(addr), "r"(val)
-				: "memory", "cc");
-
-		return result;
-	}
-#if (BITS_PER_LONG == 64)
-	case 8:
-	{
-		unsigned long result;
-
-		__asm__ __volatile__(
-			"lwsync\n"
-		"1:\t"	"ldarx %0,0,%1\n"	/* load and reserve */
-			"add %0,%2,%0\n"	/* add val to value loaded */
-			"stdcx. %0,0,%1\n"	/* store conditional */
-			"bne- 1b\n"	 	/* retry if lost reservation */
-			"isync\n"
-				: "=&r"(result)
-				: "r"(addr), "r"(val)
-				: "memory", "cc");
-
-		return result;
-	}
-#endif
-	}
-	/* generate an illegal instruction. Cannot catch this with linker tricks
-	 * when optimizations are disabled. */
-	__asm__ __volatile__(ILLEGAL_INSTR);
-	return 0;
-}
-
-
-#define atomic_add_return(addr, v)					\
-	((__typeof__(*(addr))) _atomic_add_return((addr),		\
-						  (unsigned long)(v),	\
-						  sizeof(*(addr))))
-
-/* atomic_sub_return, atomic_add, atomic_sub, atomic_inc, atomic_dec */
-
-#define atomic_sub_return(addr, v)	atomic_add_return((addr), -(v))
-
-#define atomic_add(addr, v)		(void)atomic_add_return((addr), (v))
-#define atomic_sub(addr, v)		(void)atomic_sub_return((addr), (v))
-
-#define atomic_inc(addr)		atomic_add((addr), 1)
-#define atomic_dec(addr)		atomic_add((addr), -1)
-
-#endif /* #ifndef _INCLUDE_API_H */
-
-#endif /* ARCH_ATOMIC_PPC_H */
diff --git a/arch_atomic_s390.h b/arch_atomic_s390.h
deleted file mode 100644
index 8857d42..0000000
--- a/arch_atomic_s390.h
+++ /dev/null
@@ -1,103 +0,0 @@
-#ifndef _ARCH_ATOMIC_S390_H
-#define _ARCH_ATOMIC_S390_H
-
-/*
- * Atomic exchange operations for the S390 architecture. Based on information
- * taken from the Principles of Operation Appendix A "Conditional Swapping
- * Instructions (CS, CDS)".
- *
- * Copyright (c) 2009 Novell, Inc.
- * Author: Jan Blunck <jblunck@suse.de>
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in
- * all copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
- * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
- * IN THE SOFTWARE.
- */
-
-#ifndef __SIZEOF_LONG__
-#ifdef __s390x__
-#define __SIZEOF_LONG__ 8
-#else
-#define __SIZEOF_LONG__ 4
-#endif
-#endif
-
-#ifndef BITS_PER_LONG
-#define BITS_PER_LONG	(__SIZEOF_LONG__ * 8)
-#endif
-
-#ifndef _INCLUDE_API_H
-
-static inline __attribute__((always_inline))
-unsigned int atomic_exchange_32(volatile unsigned int *addr, unsigned int val)
-{
-	unsigned int result;
-
-	__asm__ __volatile__(
-		"0:	cs %0,%2,%1\n"
-		"	brc 4,0b\n"
-		: "=&r"(result), "=m" (*addr)
-		: "r"(val), "m" (*addr)
-		: "memory", "cc");
-
-	return result;
-}
-
-#if (BITS_PER_LONG == 64)
-
-static inline __attribute__((always_inline))
-unsigned long atomic_exchange_64(volatile unsigned long *addr,
-				 unsigned long val)
-{
-	unsigned long result;
-
-	__asm__ __volatile__(
-		"0:	csg %0,%2,%1\n"
-		"	brc 4,0b\n"
-		: "=&r"(result), "=m" (*addr)
-		: "r"(val), "m" (*addr)
-		: "memory", "cc");
-
-	return result;
-}
-
-#endif
-
-static inline __attribute__((always_inline))
-unsigned long _atomic_exchange(volatile void *addr, unsigned long val, int len)
-{
-	switch (len) {
-	case 4:
-		return atomic_exchange_32(addr, val);
-#if (BITS_PER_LONG == 64)
-	case 8:
-		return atomic_exchange_64(addr, val);
-#endif
-	default:
-		__asm__ __volatile__(".long	0xd00d00");
-	}
-
-	return 0;
-}
-
-#define xchg(addr, v)							\
-	(__typeof__(*(addr))) _atomic_exchange((addr), (unsigned long)(v), \
-					       sizeof(*(addr)))
-
-#endif /* #ifndef _INCLUDE_API_H */
-
-#endif /* ARCH_ATOMIC_S390_H */
diff --git a/arch_atomic_x86.h b/arch_atomic_x86.h
deleted file mode 100644
index 3eedc3f..0000000
--- a/arch_atomic_x86.h
+++ /dev/null
@@ -1,404 +0,0 @@
-#ifndef _ARCH_ATOMIC_X86_H
-#define _ARCH_ATOMIC_X86_H
-
-/* 
- * Copyright (c) 1991-1994 by Xerox Corporation.  All rights reserved.
- * Copyright (c) 1996-1999 by Silicon Graphics.  All rights reserved.
- * Copyright (c) 1999-2004 Hewlett-Packard Development Company, L.P.
- * Copyright (c) 2009      Mathieu Desnoyers
- *
- * THIS MATERIAL IS PROVIDED AS IS, WITH ABSOLUTELY NO WARRANTY EXPRESSED
- * OR IMPLIED.  ANY USE IS AT YOUR OWN RISK.
- *
- * Permission is hereby granted to use or copy this program
- * for any purpose,  provided the above notices are retained on all copies.
- * Permission to modify the code and to distribute modified code is granted,
- * provided the above notices are retained, and a notice that the code was
- * modified is included with the above copyright notice.
- *
- * Code inspired from libatomic_ops-1.2, inherited in part from the
- * Boehm-Demers-Weiser conservative garbage collector.
- */
-
-#include <compiler.h>
-
-#ifndef BITS_PER_LONG
-#define BITS_PER_LONG	(__SIZEOF_LONG__ * 8)
-#endif
-
-#ifndef _INCLUDE_API_H
-
-/*
- * Derived from AO_compare_and_swap() and AO_test_and_set_full().
- */
-
-struct __atomic_dummy {
-	unsigned long v[10];
-};
-#define __hp(x)	((struct __atomic_dummy *)(x))
-
-#define atomic_set(addr, v)				\
-do {							\
-	ACCESS_ONCE(*(addr)) = (v);			\
-} while (0)
-
-#define atomic_read(addr)	ACCESS_ONCE(*(addr))
-
-/* cmpxchg */
-
-static inline __attribute__((always_inline))
-unsigned long _atomic_cmpxchg(void *addr, unsigned long old,
-			      unsigned long _new, int len)
-{
-	switch (len) {
-	case 1:
-	{
-		unsigned char result = old;
-
-		__asm__ __volatile__(
-		"lock; cmpxchgb %2, %1"
-			: "+a"(result), "+m"(*__hp(addr))
-			: "q"((unsigned char)_new)
-			: "memory");
-		return result;
-	}
-	case 2:
-	{
-		unsigned short result = old;
-
-		__asm__ __volatile__(
-		"lock; cmpxchgw %2, %1"
-			: "+a"(result), "+m"(*__hp(addr))
-			: "r"((unsigned short)_new)
-			: "memory");
-		return result;
-	}
-	case 4:
-	{
-		unsigned int result = old;
-
-		__asm__ __volatile__(
-		"lock; cmpxchgl %2, %1"
-			: "+a"(result), "+m"(*__hp(addr))
-			: "r"((unsigned int)_new)
-			: "memory");
-		return result;
-	}
-#if (BITS_PER_LONG == 64)
-	case 8:
-	{
-		unsigned long result = old;
-
-		__asm__ __volatile__(
-		"lock; cmpxchgq %2, %1"
-			: "+a"(result), "+m"(*__hp(addr))
-			: "r"((unsigned long)_new)
-			: "memory");
-		return result;
-	}
-#endif
-	}
-	/* generate an illegal instruction. Cannot catch this with linker tricks
-	 * when optimizations are disabled. */
-	__asm__ __volatile__("ud2");
-	return 0;
-}
-
-#define cmpxchg(addr, old, _new)					    \
-	((__typeof__(*(addr))) _atomic_cmpxchg((addr), (unsigned long)(old),\
-						(unsigned long)(_new), 	    \
-						sizeof(*(addr))))
-
-/* xchg */
-
-static inline __attribute__((always_inline))
-unsigned long _atomic_exchange(void *addr, unsigned long val, int len)
-{
-	/* Note: the "xchg" instruction does not need a "lock" prefix. */
-	switch (len) {
-	case 1:
-	{
-		unsigned char result;
-		__asm__ __volatile__(
-		"xchgb %0, %1"
-			: "=q"(result), "+m"(*__hp(addr))
-			: "0" ((unsigned char)val)
-			: "memory");
-		return result;
-	}
-	case 2:
-	{
-		unsigned short result;
-		__asm__ __volatile__(
-		"xchgw %0, %1"
-			: "=r"(result), "+m"(*__hp(addr))
-			: "0" ((unsigned short)val)
-			: "memory");
-		return result;
-	}
-	case 4:
-	{
-		unsigned int result;
-		__asm__ __volatile__(
-		"xchgl %0, %1"
-			: "=r"(result), "+m"(*__hp(addr))
-			: "0" ((unsigned int)val)
-			: "memory");
-		return result;
-	}
-#if (BITS_PER_LONG == 64)
-	case 8:
-	{
-		unsigned long result;
-		__asm__ __volatile__(
-		"xchgq %0, %1"
-			: "=r"(result), "+m"(*__hp(addr))
-			: "0" ((unsigned long)val)
-			: "memory");
-		return result;
-	}
-#endif
-	}
-	/* generate an illegal instruction. Cannot catch this with linker tricks
-	 * when optimizations are disabled. */
-	__asm__ __volatile__("ud2");
-	return 0;
-}
-
-#define xchg(addr, v)							    \
-	((__typeof__(*(addr))) _atomic_exchange((addr), (unsigned long)(v), \
-						sizeof(*(addr))))
-
-/* atomic_add_return, atomic_sub_return */
-
-static inline __attribute__((always_inline))
-unsigned long _atomic_add_return(void *addr, unsigned long val,
-				 int len)
-{
-	switch (len) {
-	case 1:
-	{
-		unsigned char result = val;
-
-		__asm__ __volatile__(
-		"lock; xaddb %1, %0"
-			: "+m"(*__hp(addr)), "+q" (result)
-			:
-			: "memory");
-		return result + (unsigned char)val;
-	}
-	case 2:
-	{
-		unsigned short result = val;
-
-		__asm__ __volatile__(
-		"lock; xaddw %1, %0"
-			: "+m"(*__hp(addr)), "+r" (result)
-			:
-			: "memory");
-		return result + (unsigned short)val;
-	}
-	case 4:
-	{
-		unsigned int result = val;
-
-		__asm__ __volatile__(
-		"lock; xaddl %1, %0"
-			: "+m"(*__hp(addr)), "+r" (result)
-			:
-			: "memory");
-		return result + (unsigned int)val;
-	}
-#if (BITS_PER_LONG == 64)
-	case 8:
-	{
-		unsigned long result = val;
-
-		__asm__ __volatile__(
-		"lock; xaddq %1, %0"
-			: "+m"(*__hp(addr)), "+r" (result)
-			:
-			: "memory");
-		return result + (unsigned long)val;
-	}
-#endif
-	}
-	/* generate an illegal instruction. Cannot catch this with linker tricks
-	 * when optimizations are disabled. */
-	__asm__ __volatile__("ud2");
-	return 0;
-}
-
-#define atomic_add_return(addr, v)					\
-	((__typeof__(*(addr))) _atomic_add_return((addr),		\
-						  (unsigned long)(v),	\
-						  sizeof(*(addr))))
-
-#define atomic_sub_return(addr, v)	atomic_add_return((addr), -(v))
-
-/* atomic_add, atomic_sub */
-
-static inline __attribute__((always_inline))
-void _atomic_add(void *addr, unsigned long val, int len)
-{
-	switch (len) {
-	case 1:
-	{
-		__asm__ __volatile__(
-		"lock; addb %1, %0"
-			: "=m"(*__hp(addr))
-			: "iq" ((unsigned char)val)
-			: "memory");
-		return;
-	}
-	case 2:
-	{
-		__asm__ __volatile__(
-		"lock; addw %1, %0"
-			: "=m"(*__hp(addr))
-			: "ir" ((unsigned short)val)
-			: "memory");
-		return;
-	}
-	case 4:
-	{
-		__asm__ __volatile__(
-		"lock; addl %1, %0"
-			: "=m"(*__hp(addr))
-			: "ir" ((unsigned int)val)
-			: "memory");
-		return;
-	}
-#if (BITS_PER_LONG == 64)
-	case 8:
-	{
-		__asm__ __volatile__(
-		"lock; addq %1, %0"
-			: "=m"(*__hp(addr))
-			: "er" ((unsigned long)val)
-			: "memory");
-		return;
-	}
-#endif
-	}
-	/* generate an illegal instruction. Cannot catch this with linker tricks
-	 * when optimizations are disabled. */
-	__asm__ __volatile__("ud2");
-	return;
-}
-
-#define atomic_add(addr, v)						   \
-	(_atomic_add((addr), (unsigned long)(v), sizeof(*(addr))))
-
-#define atomic_sub(addr, v)	atomic_add((addr), -(v))
-
-
-/* atomic_inc */
-
-static inline __attribute__((always_inline))
-void _atomic_inc(void *addr, int len)
-{
-	switch (len) {
-	case 1:
-	{
-		__asm__ __volatile__(
-		"lock; incb %0"
-			: "=m"(*__hp(addr))
-			:
-			: "memory");
-		return;
-	}
-	case 2:
-	{
-		__asm__ __volatile__(
-		"lock; incw %0"
-			: "=m"(*__hp(addr))
-			:
-			: "memory");
-		return;
-	}
-	case 4:
-	{
-		__asm__ __volatile__(
-		"lock; incl %0"
-			: "=m"(*__hp(addr))
-			:
-			: "memory");
-		return;
-	}
-#if (BITS_PER_LONG == 64)
-	case 8:
-	{
-		__asm__ __volatile__(
-		"lock; incq %0"
-			: "=m"(*__hp(addr))
-			:
-			: "memory");
-		return;
-	}
-#endif
-	}
-	/* generate an illegal instruction. Cannot catch this with linker tricks
-	 * when optimizations are disabled. */
-	__asm__ __volatile__("ud2");
-	return;
-}
-
-#define atomic_inc(addr)	(_atomic_inc((addr), sizeof(*(addr))))
-
-/* atomic_dec */
-
-static inline __attribute__((always_inline))
-void _atomic_dec(void *addr, int len)
-{
-	switch (len) {
-	case 1:
-	{
-		__asm__ __volatile__(
-		"lock; decb %0"
-			: "=m"(*__hp(addr))
-			:
-			: "memory");
-		return;
-	}
-	case 2:
-	{
-		__asm__ __volatile__(
-		"lock; decw %0"
-			: "=m"(*__hp(addr))
-			:
-			: "memory");
-		return;
-	}
-	case 4:
-	{
-		__asm__ __volatile__(
-		"lock; decl %0"
-			: "=m"(*__hp(addr))
-			:
-			: "memory");
-		return;
-	}
-#if (BITS_PER_LONG == 64)
-	case 8:
-	{
-		__asm__ __volatile__(
-		"lock; decq %0"
-			: "=m"(*__hp(addr))
-			:
-			: "memory");
-		return;
-	}
-#endif
-	}
-	/* generate an illegal instruction. Cannot catch this with linker tricks
-	 * when optimizations are disabled. */
-	__asm__ __volatile__("ud2");
-	return;
-}
-
-#define atomic_dec(addr)	(_atomic_dec((addr), sizeof(*(addr))))
-
-#endif /* #ifndef _INCLUDE_API_H */
-
-#endif /* ARCH_ATOMIC_X86_H */
diff --git a/arch_ppc.h b/arch_ppc.h
deleted file mode 100644
index 9c4ec91..0000000
--- a/arch_ppc.h
+++ /dev/null
@@ -1,119 +0,0 @@
-#ifndef _ARCH_PPC_H
-#define _ARCH_PPC_H
-
-/*
- * arch_ppc.h: trivial definitions for the powerpc architecture.
- *
- * Copyright (c) 2009 Paul E. McKenney, IBM Corporation.
- * Copyright (c) 2009 Mathieu Desnoyers <mathieu.desnoyers@polymtl.ca>
- *
- * This library is free software; you can redistribute it and/or
- * modify it under the terms of the GNU Lesser General Public
- * License as published by the Free Software Foundation; either
- * version 2.1 of the License, or (at your option) any later version.
-*
- * This library is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
- * Lesser General Public License for more details.
- *
- * You should have received a copy of the GNU Lesser General Public
- * License along with this library; if not, write to the Free Software
- * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
- */
-
-#include <compiler.h>
-#include <arch_atomic.h>
-
-#define CONFIG_HAVE_FENCE 1
-#define CONFIG_HAVE_MEM_COHERENCY
-
-#ifndef BITS_PER_LONG
-#define BITS_PER_LONG	(__SIZEOF_LONG__ * 8)
-#endif
-
-#define mb()    asm volatile("sync":::"memory")
-#define rmb()   asm volatile("sync":::"memory")
-#define wmb()   asm volatile("sync"::: "memory")
-
-/*
- * Architectures without cache coherency need something like the following:
- *
- * #define mb()		mc()
- * #define rmb()	rmc()
- * #define wmb()	wmc()
- * #define mc()		arch_cache_flush()
- * #define rmc()	arch_cache_flush_read()
- * #define wmc()	arch_cache_flush_write()
- */
-
-#define mc()	barrier()
-#define rmc()	barrier()
-#define wmc()	barrier()
-
-/* Assume SMP machine, given we don't have this information */
-#define CONFIG_SMP 1
-
-#ifdef CONFIG_SMP
-#define smp_mb()	mb()
-#define smp_rmb()	rmb()
-#define smp_wmb()	wmb()
-#define smp_mc()	mc()
-#define smp_rmc()	rmc()
-#define smp_wmc()	wmc()
-#else
-#define smp_mb()	barrier()
-#define smp_rmb()	barrier()
-#define smp_wmb()	barrier()
-#define smp_mc()	barrier()
-#define smp_rmc()	barrier()
-#define smp_wmc()	barrier()
-#endif
-
-/* Nop everywhere except on alpha. */
-#define smp_read_barrier_depends()
-
-static inline void cpu_relax(void)
-{
-	barrier();
-}
-
-/*
- * Serialize core instruction execution. Also acts as a compiler barrier.
- */
-static inline void sync_core()
-{
-	asm volatile("isync" : : : "memory");
-}
-
-#define mftbl()						\
-	({ 						\
-		unsigned long rval;			\
-		asm volatile("mftbl %0" : "=r" (rval));	\
-		rval;					\
-	})
-
-#define mftbu()						\
-	({						\
-		unsigned long rval;			\
-		asm volatile("mftbu %0" : "=r" (rval));	\
-		rval;					\
-	})
-
-typedef unsigned long long cycles_t;
-
-static inline cycles_t get_cycles (void)
-{
-	long h, l;
-
-	for (;;) {
-		h = mftbu();
-		barrier();
-		l = mftbl();
-		barrier();
-		if (mftbu() == h)
-			return (((cycles_t) h) << 32) + l;
-	}
-}
-
-#endif /* _ARCH_PPC_H */
diff --git a/arch_s390.h b/arch_s390.h
deleted file mode 100644
index dba7240..0000000
--- a/arch_s390.h
+++ /dev/null
@@ -1,80 +0,0 @@
-#ifndef _ARCH_S390_H
-#define _ARCH_S390_H
-
-/*
- * Trivial definitions for the S390 architecture based on information from the
- * Principles of Operation "CPU Serialization" (5-91), "BRANCH ON CONDITION"
- * (7-25) and "STORE CLOCK" (7-169).
- *
- * Copyright (c) 2009 Novell, Inc.
- * Author: Jan Blunck <jblunck@suse.de>
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in
- * all copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
- * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
- * IN THE SOFTWARE.
- */
-
-#include <compiler.h>
-#include <arch_atomic.h>
-
-#define CONFIG_HAVE_MEM_COHERENCY
-/* Assume SMP machine, given we don't have this information */
-#define CONFIG_SMP 1
-
-#ifndef BITS_PER_LONG
-#define BITS_PER_LONG	(__SIZEOF_LONG__ * 8)
-#endif
-
-#define mb()    __asm__ __volatile__("bcr 15,0" : : : "memory")
-#define rmb()   __asm__ __volatile__("bcr 15,0" : : : "memory");
-#define wmb()   __asm__ __volatile__("bcr 15,0" : : : "memory");
-#define mc()	barrier()
-#define rmc()	barrier()
-#define wmc()	barrier()
-
-#define smp_mb()	mb()
-#define smp_rmb()	rmb()
-#define smp_wmb()	wmb()
-#define smp_mc()	mc()
-#define smp_rmc()	rmc()
-#define smp_wmc()	wmc()
-
-/* Nop everywhere except on alpha. */
-#define smp_read_barrier_depends()
-
-static inline void cpu_relax(void)
-{
-	barrier();
-}
-
-static inline void sync_core()
-{
-	__asm__ __volatile__("bcr 15,0" : : : "memory");
-}
-
-typedef unsigned long long cycles_t;
-
-static inline cycles_t get_cycles (void)
-{
-	cycles_t cycles;
-
-	__asm__ __volatile__("stck %0" : "=m" (cycles) : : "cc", "memory" );
-
-	return cycles;
-}
-
-#endif /* _ARCH_S390_H */
diff --git a/arch_x86.h b/arch_x86.h
deleted file mode 100644
index 8a57325..0000000
--- a/arch_x86.h
+++ /dev/null
@@ -1,135 +0,0 @@
-#ifndef _ARCH_X86_H
-#define _ARCH_X86_H
-
-/*
- * arch_x86.h: trivial definitions for the x86 architecture.
- *
- * Copyright (c) 2009 Paul E. McKenney, IBM Corporation.
- * Copyright (c) 2009 Mathieu Desnoyers <mathieu.desnoyers@polymtl.ca>
- *
- * This library is free software; you can redistribute it and/or
- * modify it under the terms of the GNU Lesser General Public
- * License as published by the Free Software Foundation; either
- * version 2.1 of the License, or (at your option) any later version.
-*
- * This library is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
- * Lesser General Public License for more details.
- *
- * You should have received a copy of the GNU Lesser General Public
- * License along with this library; if not, write to the Free Software
- * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
- */
-
-#include <compiler.h>
-#include <arch_atomic.h>
-
-/* Assume P4 or newer */
-#define CONFIG_HAVE_FENCE 1
-#define CONFIG_HAVE_MEM_COHERENCY
-
-#ifndef BITS_PER_LONG
-#define BITS_PER_LONG	(__SIZEOF_LONG__ * 8)
-#endif
-
-#ifdef CONFIG_HAVE_FENCE
-#define mb()    asm volatile("mfence":::"memory")
-#define rmb()   asm volatile("lfence":::"memory")
-#define wmb()   asm volatile("sfence"::: "memory")
-#else
-/*
- * Some non-Intel clones support out of order store. wmb() ceases to be a
- * nop for these.
- */
-#define mb()    asm volatile("lock; addl $0,0(%%esp)":::"memory")
-#define rmb()   asm volatile("lock; addl $0,0(%%esp)":::"memory")
-#define wmb()   asm volatile("lock; addl $0,0(%%esp)"::: "memory")
-#endif
-
-/*
- * Architectures without cache coherency need something like the following:
- *
- * #define mb()		mc()
- * #define rmb()	rmc()
- * #define wmb()	wmc()
- * #define mc()		arch_cache_flush()
- * #define rmc()	arch_cache_flush_read()
- * #define wmc()	arch_cache_flush_write()
- */
-
-#define mc()	barrier()
-#define rmc()	barrier()
-#define wmc()	barrier()
-
-/* Assume SMP machine, given we don't have this information */
-#define CONFIG_SMP 1
-
-#ifdef CONFIG_SMP
-#define smp_mb()	mb()
-#define smp_rmb()	rmb()
-#define smp_wmb()	wmb()
-#define smp_mc()	mc()
-#define smp_rmc()	rmc()
-#define smp_wmc()	wmc()
-#else
-#define smp_mb()	barrier()
-#define smp_rmb()	barrier()
-#define smp_wmb()	barrier()
-#define smp_mc()	barrier()
-#define smp_rmc()	barrier()
-#define smp_wmc()	barrier()
-#endif
-
-/* Nop everywhere except on alpha. */
-#define smp_read_barrier_depends()
-
-static inline void rep_nop(void)
-{
-	asm volatile("rep; nop" : : : "memory");
-}
-
-static inline void cpu_relax(void)
-{
-	rep_nop();
-}
-
-/*
- * Serialize core instruction execution. Also acts as a compiler barrier.
- */
-#ifdef __PIC__
-/*
- * Cannot use cpuid because it clobbers the ebx register and clashes
- * with -fPIC :
- * error: PIC register 'ebx' clobbered in 'asm'
- */
-static inline void sync_core(void)
-{
-	mb();
-}
-#else
-static inline void sync_core(void)
-{
-	asm volatile("cpuid" : : : "memory", "eax", "ebx", "ecx", "edx");
-}
-#endif
-
-#define rdtscll(val)							  \
-	do {						  		  \
-	     unsigned int __a, __d;					  \
-	     asm volatile("rdtsc" : "=a" (__a), "=d" (__d));		  \
-	     (val) = ((unsigned long long)__a)				  \
-			| (((unsigned long long)__d) << 32);		  \
-	} while(0)
-
-typedef unsigned long long cycles_t;
-
-static inline cycles_t get_cycles(void)
-{
-        cycles_t ret = 0;
-
-        rdtscll(ret);
-        return ret;
-}
-
-#endif /* _ARCH_X86_H */
diff --git a/compiler.h b/compiler.h
deleted file mode 100644
index 99972f3..0000000
--- a/compiler.h
+++ /dev/null
@@ -1,50 +0,0 @@
-#ifndef _COMPILER_H
-#define _COMPILER_H
-
-/*
- * compiler.h
- *
- * Compiler definitions.
- *
- * Copyright (c) 2009 Mathieu Desnoyers <mathieu.desnoyers@polymtl.ca>
- *
- * THIS MATERIAL IS PROVIDED AS IS, WITH ABSOLUTELY NO WARRANTY EXPRESSED
- * OR IMPLIED.  ANY USE IS AT YOUR OWN RISK.
- *
- * Permission is hereby granted to use or copy this program
- * for any purpose,  provided the above notices are retained on all copies.
- * Permission to modify the code and to distribute modified code is granted,
- * provided the above notices are retained, and a notice that the code was
- * modified is included with the above copyright notice.
- */
-
-#define likely(x)	__builtin_expect(!!(x), 1)
-#define unlikely(x)	__builtin_expect(!!(x), 0)
-
-#define	barrier()	asm volatile("" : : : "memory");
-
-/*
- * Instruct the compiler to perform only a single access to a variable
- * (prohibits merging and refetching). The compiler is also forbidden to reorder
- * successive instances of ACCESS_ONCE(), but only when the compiler is aware of
- * particular ordering. Compiler ordering can be ensured, for example, by
- * putting two ACCESS_ONCE() in separate C statements.
- *
- * This macro does absolutely -nothing- to prevent the CPU from reordering,
- * merging, or refetching absolutely anything at any time.  Its main intended
- * use is to mediate communication between process-level code and irq/NMI
- * handlers, all running on the same CPU.
- */
-#define ACCESS_ONCE(x)	(*(volatile typeof(x) *)&x)
-
-#if (__GNUC__ == 4)
-#define __compiler_offsetof(a, b) __builtin_offsetof(a, b)
-#endif
-
-#ifdef __compiler_offsetof
-#define offsetof(TYPE, MEMBER) __compiler_offsetof(TYPE, MEMBER)
-#else
-#define offsetof(TYPE, MEMBER) ((size_t) &((TYPE *)0)->MEMBER)
-#endif
-
-#endif /* _COMPILER_H */
diff --git a/tests/Makefile.inc b/tests/Makefile.inc
index f62256b..20eef58 100644
--- a/tests/Makefile.inc
+++ b/tests/Makefile.inc
@@ -9,7 +9,7 @@ endif
 
 LIBDIR=..
 
-CFLAGS+=-I ${LIBDIR}
+CFLAGS+=-I${LIBDIR}
 
 URCU_SIGNAL=${LIBDIR}/urcu.o ${LIBDIR}/urcu.h
 URCU_SIGNAL_YIELD=${LIBDIR}/urcu-yield.o ${LIBDIR}/urcu.h
@@ -24,7 +24,7 @@ all: test_urcu test_urcu_dynamic_link test_urcu_timing \
 	urcutorture-yield test_mutex test_looplen test_urcu_gc \
 	test_urcu_gc_mb test_qsbr_gc test_qsbr_lgc test_urcu_lgc \
 	test_urcu_lgc_mb test_qsbr_dynamic_link test_urcu_mb_defer \
-	test_atomic
+	test_uatomic
 
 api.h: ${APIHEADER}
 	cp -f ${APIHEADER} api.h
@@ -109,7 +109,7 @@ urcutorture: urcutorture.c rcutorture.h api.h ${URCU_SIGNAL}
 urcutorture-yield: urcutorture.c ${URCU_SIGNAL_YIELD} rcutorture.h api.h
 	$(CC) -DDEBUG_YIELD ${CFLAGS} $(LDFLAGS) -o $@ $(SRC_DEP)
 
-test_atomic: test_atomic.c ../arch_atomic.h
+test_uatomic: test_uatomic.c ../urcu/arch_uatomic.h
 	$(CC) ${CFLAGS} $(LDFLAGS) -o $@ $(SRC_DEP)
 
 ,PHONY: clean
@@ -122,4 +122,5 @@ clean:
 	urcutorture-yield liburcu.so api.h \
 	test_mutex test_urcu_gc test_urcu_gc_mb urcu-asm-1.S \
 	test_qsbr_lgc test_qsbr_gc test_looplen test_urcu_lgc \
-	test_urcu_lgc_mb test_qsbr_dynamic_link test_urcu_mb_defer
+	test_urcu_lgc_mb test_qsbr_dynamic_link test_urcu_mb_defer \
+	test_uatomic
diff --git a/tests/api_gcc.h b/tests/api_gcc.h
index 721af0a..c53b2c1 100644
--- a/tests/api_gcc.h
+++ b/tests/api_gcc.h
@@ -285,7 +285,6 @@ cmpxchg(volatile long *ptr, long oldval, long newval)
 #include <pthread.h>
 #include <sched.h>
 #include <sys/param.h>
-#include <arch.h>
 /* #include "atomic.h" */
 
 /*
diff --git a/tests/rcutorture.h b/tests/rcutorture.h
index 7d4948e..712d59d 100644
--- a/tests/rcutorture.h
+++ b/tests/rcutorture.h
@@ -116,7 +116,7 @@ void *rcu_read_perf_test(void *arg)
 
 	rcu_register_thread();
 	run_on(me);
-	atomic_inc(&nthreadsrunning);
+	uatomic_inc(&nthreadsrunning);
 	while (goflag == GOFLAG_INIT)
 		poll(NULL, 0, 1);
 	mark_rcu_quiescent_state();
@@ -141,7 +141,7 @@ void *rcu_update_perf_test(void *arg)
 {
 	long long n_updates_local = 0;
 
-	atomic_inc(&nthreadsrunning);
+	uatomic_inc(&nthreadsrunning);
 	while (goflag == GOFLAG_INIT)
 		poll(NULL, 0, 1);
 	while (goflag == GOFLAG_RUN) {
@@ -156,7 +156,7 @@ void perftestinit(void)
 {
 	init_per_thread(n_reads_pt, 0LL);
 	init_per_thread(n_updates_pt, 0LL);
-	atomic_set(&nthreadsrunning, 0);
+	uatomic_set(&nthreadsrunning, 0);
 }
 
 void perftestrun(int nthreads, int nreaders, int nupdaters)
@@ -165,7 +165,7 @@ void perftestrun(int nthreads, int nreaders, int nupdaters)
 	int duration = 1;
 
 	smp_mb();
-	while (atomic_read(&nthreadsrunning) < nthreads)
+	while (uatomic_read(&nthreadsrunning) < nthreads)
 		poll(NULL, 0, 1);
 	goflag = GOFLAG_RUN;
 	smp_mb();
diff --git a/tests/test_atomic.c b/tests/test_atomic.c
deleted file mode 100644
index 585c8ca..0000000
--- a/tests/test_atomic.c
+++ /dev/null
@@ -1,66 +0,0 @@
-#include <stdio.h>
-#include <arch_atomic.h>
-#include <assert.h>
-
-#if (defined(__i386__) || defined(__x86_64__))
-#define HAS_ATOMIC_BYTE
-#define HAS_ATOMIC_SHORT
-#endif
-
-struct testvals {
-#ifdef HAS_ATOMIC_BYTE
-	unsigned char c;
-#endif
-#ifdef HAS_ATOMIC_SHORT
-	unsigned short s;
-#endif
-	unsigned int i;
-	unsigned long l;
-};
-
-static struct testvals vals;
-
-#define do_test(ptr)				\
-do {						\
-	__typeof__(*(ptr)) v;			\
-						\
-	atomic_add(ptr, 10);			\
-	assert(atomic_read(ptr) == 10);		\
-	atomic_add(ptr, -11UL);			\
-	assert(atomic_read(ptr) == (__typeof__(*(ptr)))-1UL);	\
-	v = cmpxchg(ptr, -1UL, 22);		\
-	assert(atomic_read(ptr) == 22);		\
-	assert(v == (__typeof__(*(ptr)))-1UL);	\
-	v = cmpxchg(ptr, 33, 44);		\
-	assert(atomic_read(ptr) == 22);		\
-	assert(v == 22);			\
-	v = xchg(ptr, 55);			\
-	assert(atomic_read(ptr) == 55);		\
-	assert(v == 22);			\
-	atomic_set(ptr, 22);			\
-	atomic_inc(ptr);			\
-	assert(atomic_read(ptr) == 23);		\
-	atomic_dec(ptr);			\
-	assert(atomic_read(ptr) == 22);		\
-	v = atomic_add_return(ptr, 100);	\
-	assert(v == 122);			\
-	assert(atomic_read(ptr) == 122);	\
-	v = atomic_sub_return(ptr, 1);		\
-	assert(v == 121);			\
-	assert(atomic_read(ptr) == 121);	\
-} while (0)
-
-int main(int argc, char **argv)
-{
-#ifdef HAS_ATOMIC_BYTE
-	do_test(&vals.c);
-#endif
-#ifdef HAS_ATOMIC_SHORT
-	do_test(&vals.s);
-#endif
-	do_test(&vals.i);
-	do_test(&vals.l);
-	printf("Atomic ops test OK\n");
-
-	return 0;
-}
diff --git a/tests/test_looplen.c b/tests/test_looplen.c
index 5006951..9e2ee2b 100644
--- a/tests/test_looplen.c
+++ b/tests/test_looplen.c
@@ -33,7 +33,7 @@
 #include <sys/syscall.h>
 #include <sched.h>
 
-#include "../arch.h"
+#include <urcu/arch.h>
 
 #if defined(_syscall0)
 _syscall0(pid_t, gettid)
@@ -55,7 +55,7 @@ static inline pid_t gettid(void)
 #else
 #define debug_yield_read()
 #endif
-#include "../urcu.h"
+#include <urcu.h>
 
 static inline void loop_sleep(unsigned long l)
 {
diff --git a/tests/test_mutex.c b/tests/test_mutex.c
index e94819a..e3b1b64 100644
--- a/tests/test_mutex.c
+++ b/tests/test_mutex.c
@@ -33,7 +33,7 @@
 #include <sys/syscall.h>
 #include <sched.h>
 
-#include "../arch.h"
+#include <urcu/arch.h>
 
 /* Make this big enough to include the POWER5+ L3 cacheline size of 256B */
 #define CACHE_LINE_SIZE 4096
@@ -61,7 +61,7 @@ static inline pid_t gettid(void)
 #else
 #define debug_yield_read()
 #endif
-#include "../urcu.h"
+#include <urcu.h>
 
 struct test_array {
 	int a;
diff --git a/tests/test_perthreadlock.c b/tests/test_perthreadlock.c
index 7402f01..ea47e46 100644
--- a/tests/test_perthreadlock.c
+++ b/tests/test_perthreadlock.c
@@ -33,7 +33,7 @@
 #include <sys/syscall.h>
 #include <sched.h>
 
-#include "../arch.h"
+#include <urcu/arch.h>
 
 /* Make this big enough to include the POWER5+ L3 cacheline size of 256B */
 #define CACHE_LINE_SIZE 4096
@@ -61,7 +61,7 @@ static inline pid_t gettid(void)
 #else
 #define debug_yield_read()
 #endif
-#include "../urcu.h"
+#include <urcu.h>
 
 struct test_array {
 	int a;
diff --git a/tests/test_perthreadlock_timing.c b/tests/test_perthreadlock_timing.c
index d5bd912..10720cb 100644
--- a/tests/test_perthreadlock_timing.c
+++ b/tests/test_perthreadlock_timing.c
@@ -32,7 +32,7 @@
 #include <sys/syscall.h>
 #include <pthread.h>
 
-#include "../arch.h"
+#include <urcu/arch.h>
 
 /* Make this big enough to include the POWER5+ L3 cacheline size of 256B */
 #define CACHE_LINE_SIZE 4096
@@ -52,7 +52,7 @@ static inline pid_t gettid(void)
 }
 #endif
 
-#include "../urcu.h"
+#include <urcu.h>
 
 struct test_array {
 	int a;
diff --git a/tests/test_qsbr.c b/tests/test_qsbr.c
index 4379771..cf2fec2 100644
--- a/tests/test_qsbr.c
+++ b/tests/test_qsbr.c
@@ -33,7 +33,7 @@
 #include <sys/syscall.h>
 #include <sched.h>
 
-#include "../arch.h"
+#include <urcu/arch.h>
 
 /* Make this big enough to include the POWER5+ L3 cacheline size of 256B */
 #define CACHE_LINE_SIZE 4096
@@ -61,7 +61,7 @@ static inline pid_t gettid(void)
 #else
 #define debug_yield_read()
 #endif
-#include "../urcu-qsbr.h"
+#include "urcu-qsbr.h"
 
 struct test_array {
 	int a;
diff --git a/tests/test_qsbr_gc.c b/tests/test_qsbr_gc.c
index 004672d..d32d1a0 100644
--- a/tests/test_qsbr_gc.c
+++ b/tests/test_qsbr_gc.c
@@ -33,7 +33,7 @@
 #include <sys/syscall.h>
 #include <sched.h>
 
-#include "../arch.h"
+#include <urcu/arch.h>
 
 /* Make this big enough to include the POWER5+ L3 cacheline size of 256B */
 #define CACHE_LINE_SIZE 4096
@@ -57,7 +57,7 @@ static inline pid_t gettid(void)
 #endif
 
 #define _LGPL_SOURCE
-#include "../urcu-qsbr.h"
+#include <urcu-qsbr.h>
 
 struct test_array {
 	int a;
diff --git a/tests/test_qsbr_timing.c b/tests/test_qsbr_timing.c
index 2a8963a..3585f83 100644
--- a/tests/test_qsbr_timing.c
+++ b/tests/test_qsbr_timing.c
@@ -30,7 +30,7 @@
 #include <stdio.h>
 #include <assert.h>
 #include <sys/syscall.h>
-#include "../arch.h"
+#include <urcu/arch.h>
 
 /* Make this big enough to include the POWER5+ L3 cacheline size of 256B */
 #define CACHE_LINE_SIZE 4096
@@ -51,7 +51,7 @@ static inline pid_t gettid(void)
 #endif
 
 #define _LGPL_SOURCE
-#include "../urcu-qsbr.h"
+#include <urcu-qsbr.h>
 
 pthread_mutex_t rcu_copy_mutex = PTHREAD_MUTEX_INITIALIZER;
 
diff --git a/tests/test_rwlock.c b/tests/test_rwlock.c
index c7edd32..d3f072c 100644
--- a/tests/test_rwlock.c
+++ b/tests/test_rwlock.c
@@ -33,7 +33,7 @@
 #include <sys/syscall.h>
 #include <sched.h>
 
-#include "../arch.h"
+#include <urcu/arch.h>
 
 /* Make this big enough to include the POWER5+ L3 cacheline size of 256B */
 #define CACHE_LINE_SIZE 4096
@@ -61,7 +61,7 @@ static inline pid_t gettid(void)
 #else
 #define debug_yield_read()
 #endif
-#include "../urcu.h"
+#include <urcu.h>
 
 struct test_array {
 	int a;
diff --git a/tests/test_rwlock_timing.c b/tests/test_rwlock_timing.c
index 5bc93d3..b26f83d 100644
--- a/tests/test_rwlock_timing.c
+++ b/tests/test_rwlock_timing.c
@@ -31,7 +31,7 @@
 #include <assert.h>
 #include <sys/syscall.h>
 #include <pthread.h>
-#include "../arch.h"
+#include <urcu/arch.h>
 
 /* Make this big enough to include the POWER5+ L3 cacheline size of 256B */
 #define CACHE_LINE_SIZE 4096
@@ -51,7 +51,7 @@ static inline pid_t gettid(void)
 }
 #endif
 
-#include "../urcu.h"
+#include <urcu.h>
 
 struct test_array {
 	int a;
diff --git a/tests/test_uatomic.c b/tests/test_uatomic.c
new file mode 100644
index 0000000..6c11a2d
--- /dev/null
+++ b/tests/test_uatomic.c
@@ -0,0 +1,66 @@
+#include <stdio.h>
+#include <assert.h>
+#include <urcu/arch_uatomic.h>
+
+#if (defined(__i386__) || defined(__x86_64__))
+#define HAS_ATOMIC_BYTE
+#define HAS_ATOMIC_SHORT
+#endif
+
+struct testvals {
+#ifdef HAS_ATOMIC_BYTE
+	unsigned char c;
+#endif
+#ifdef HAS_ATOMIC_SHORT
+	unsigned short s;
+#endif
+	unsigned int i;
+	unsigned long l;
+};
+
+static struct testvals vals;
+
+#define do_test(ptr)				\
+do {						\
+	__typeof__(*(ptr)) v;			\
+						\
+	uatomic_add(ptr, 10);			\
+	assert(uatomic_read(ptr) == 10);		\
+	uatomic_add(ptr, -11UL);			\
+	assert(uatomic_read(ptr) == (__typeof__(*(ptr)))-1UL);	\
+	v = uatomic_cmpxchg(ptr, -1UL, 22);		\
+	assert(uatomic_read(ptr) == 22);		\
+	assert(v == (__typeof__(*(ptr)))-1UL);	\
+	v = uatomic_cmpxchg(ptr, 33, 44);		\
+	assert(uatomic_read(ptr) == 22);		\
+	assert(v == 22);			\
+	v = uatomic_xchg(ptr, 55);			\
+	assert(uatomic_read(ptr) == 55);		\
+	assert(v == 22);			\
+	uatomic_set(ptr, 22);			\
+	uatomic_inc(ptr);			\
+	assert(uatomic_read(ptr) == 23);		\
+	uatomic_dec(ptr);			\
+	assert(uatomic_read(ptr) == 22);		\
+	v = uatomic_add_return(ptr, 100);	\
+	assert(v == 122);			\
+	assert(uatomic_read(ptr) == 122);	\
+	v = uatomic_sub_return(ptr, 1);		\
+	assert(v == 121);			\
+	assert(uatomic_read(ptr) == 121);	\
+} while (0)
+
+int main(int argc, char **argv)
+{
+#ifdef HAS_ATOMIC_BYTE
+	do_test(&vals.c);
+#endif
+#ifdef HAS_ATOMIC_SHORT
+	do_test(&vals.s);
+#endif
+	do_test(&vals.i);
+	do_test(&vals.l);
+	printf("Atomic ops test OK\n");
+
+	return 0;
+}
diff --git a/tests/test_urcu.c b/tests/test_urcu.c
index 18683bf..8d090eb 100644
--- a/tests/test_urcu.c
+++ b/tests/test_urcu.c
@@ -33,7 +33,7 @@
 #include <sys/syscall.h>
 #include <sched.h>
 
-#include "../arch.h"
+#include <urcu/arch.h>
 
 /* Make this big enough to include the POWER5+ L3 cacheline size of 256B */
 #define CACHE_LINE_SIZE 4096
@@ -61,7 +61,7 @@ static inline pid_t gettid(void)
 #else
 #define debug_yield_read()
 #endif
-#include "../urcu.h"
+#include <urcu.h>
 
 struct test_array {
 	int a;
diff --git a/tests/test_urcu_defer.c b/tests/test_urcu_defer.c
index 0961b8d..1c6f742 100644
--- a/tests/test_urcu_defer.c
+++ b/tests/test_urcu_defer.c
@@ -33,7 +33,7 @@
 #include <sys/syscall.h>
 #include <sched.h>
 
-#include "../arch.h"
+#include <urcu/arch.h>
 
 /* Make this big enough to include the POWER5+ L3 cacheline size of 256B */
 #define CACHE_LINE_SIZE 4096
@@ -61,8 +61,8 @@ static inline pid_t gettid(void)
 #else
 #define debug_yield_read()
 #endif
-#include "../urcu.h"
-#include "../urcu-defer.h"
+#include <urcu.h>
+#include <urcu-defer.h>
 
 struct test_array {
 	int a;
diff --git a/tests/test_urcu_gc.c b/tests/test_urcu_gc.c
index 213c68b..60f7816 100644
--- a/tests/test_urcu_gc.c
+++ b/tests/test_urcu_gc.c
@@ -33,7 +33,7 @@
 #include <sys/syscall.h>
 #include <sched.h>
 
-#include "../arch.h"
+#include <urcu/arch.h>
 
 /* Make this big enough to include the POWER5+ L3 cacheline size of 256B */
 #define CACHE_LINE_SIZE 4096
@@ -61,7 +61,7 @@ static inline pid_t gettid(void)
 #else
 #define debug_yield_read()
 #endif
-#include "../urcu.h"
+#include <urcu.h>
 
 struct test_array {
 	int a;
diff --git a/tests/test_urcu_timing.c b/tests/test_urcu_timing.c
index a3ca783..27d9730 100644
--- a/tests/test_urcu_timing.c
+++ b/tests/test_urcu_timing.c
@@ -30,7 +30,7 @@
 #include <stdio.h>
 #include <assert.h>
 #include <sys/syscall.h>
-#include "../arch.h"
+#include <urcu/arch.h>
 
 /* Make this big enough to include the POWER5+ L3 cacheline size of 256B */
 #define CACHE_LINE_SIZE 4096
@@ -51,7 +51,7 @@ static inline pid_t gettid(void)
 #endif
 
 #define _LGPL_SOURCE
-#include "../urcu.h"
+#include <urcu.h>
 
 pthread_mutex_t rcu_copy_mutex = PTHREAD_MUTEX_INITIALIZER;
 
diff --git a/tests/urcu-asm.c b/tests/urcu-asm.c
index 66be709..dd26604 100644
--- a/tests/urcu-asm.c
+++ b/tests/urcu-asm.c
@@ -20,7 +20,7 @@
  * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
  */
 
-#include "../urcu.h"
+#include <urcu.h>
 
 void show_read_lock(void)
 {
diff --git a/tests/urcutorture.c b/tests/urcutorture.c
index a0aa5dd..9af450d 100644
--- a/tests/urcutorture.c
+++ b/tests/urcutorture.c
@@ -3,8 +3,8 @@
 #include <poll.h>
 #include <unistd.h>
 #include <stdlib.h>
-#include "../arch_atomic.h"
 #include "api.h"
 #define _LGPL_SOURCE
-#include "../urcu.h"
+#include <urcu.h>
+#include <urcu/arch_uatomic.h>
 #include "rcutorture.h"
diff --git a/urcu-defer-static.h b/urcu-defer-static.h
index 427f3d3..dd875ca 100644
--- a/urcu-defer-static.h
+++ b/urcu-defer-static.h
@@ -32,8 +32,8 @@
 #include <stdlib.h>
 #include <pthread.h>
 
-#include <compiler.h>
-#include <arch.h>
+#include <urcu/compiler.h>
+#include <urcu/arch.h>
 
 
 /*
diff --git a/urcu-defer.c b/urcu-defer.c
index ccf3511..e71b0cf 100644
--- a/urcu-defer.c
+++ b/urcu-defer.c
@@ -111,8 +111,8 @@ static void internal_urcu_unlock(pthread_mutex_t *mutex)
  */
 static void wake_up_defer(void)
 {
-	if (unlikely(atomic_read(&defer_thread_futex) == -1)) {
-		atomic_set(&defer_thread_futex, 0);
+	if (unlikely(uatomic_read(&defer_thread_futex) == -1)) {
+		uatomic_set(&defer_thread_futex, 0);
 		futex(&defer_thread_futex, FUTEX_WAKE, 1,
 		      NULL, NULL, 0);
 	}
@@ -137,15 +137,15 @@ static unsigned long rcu_defer_num_callbacks(void)
  */
 static void wait_defer(void)
 {
-	atomic_dec(&defer_thread_futex);
+	uatomic_dec(&defer_thread_futex);
 	smp_mb();	/* Write futex before read queue */
 	if (rcu_defer_num_callbacks()) {
 		smp_mb();	/* Read queue before write futex */
 		/* Callbacks are queued, don't wait. */
-		atomic_set(&defer_thread_futex, 0);
+		uatomic_set(&defer_thread_futex, 0);
 	} else {
 		smp_rmb();	/* Read queue before read futex */
-		if (atomic_read(&defer_thread_futex) == -1)
+		if (uatomic_read(&defer_thread_futex) == -1)
 			futex(&defer_thread_futex, FUTEX_WAIT, -1,
 			      NULL, NULL, 0);
 	}
diff --git a/urcu-qsbr-static.h b/urcu-qsbr-static.h
index 87305cb..0d73bcf 100644
--- a/urcu-qsbr-static.h
+++ b/urcu-qsbr-static.h
@@ -36,8 +36,8 @@
 #include <syscall.h>
 #include <unistd.h>
 
-#include <compiler.h>
-#include <arch.h>
+#include <urcu/compiler.h>
+#include <urcu/arch.h>
 
 /*
  * Identify a shared load. A smp_rmc() or smp_mc() should come before the load.
@@ -191,8 +191,8 @@ extern int gp_futex;
  */
 static inline void wake_up_gp(void)
 {
-	if (unlikely(atomic_read(&gp_futex) == -1)) {
-		atomic_set(&gp_futex, 0);
+	if (unlikely(uatomic_read(&gp_futex) == -1)) {
+		uatomic_set(&gp_futex, 0);
 		futex(&gp_futex, FUTEX_WAKE, 1,
 		      NULL, NULL, 0);
 	}
@@ -286,7 +286,7 @@ static inline void _rcu_thread_online(void)
 		if (!__builtin_constant_p(_new) ||	\
 		    ((_new) != NULL))			\
 			wmb();				\
-		cmpxchg(p, old, _new);			\
+		uatomic_cmpxchg(p, old, _new);		\
 	})
 
 /**
@@ -300,7 +300,7 @@ static inline void _rcu_thread_online(void)
 		if (!__builtin_constant_p(v) ||		\
 		    ((v) != NULL))			\
 			wmb();				\
-		xchg(p, v);				\
+		uatomic_xchg(p, v);			\
 	})
 
 /*
diff --git a/urcu-qsbr.c b/urcu-qsbr.c
index dac6649..f5103ee 100644
--- a/urcu-qsbr.c
+++ b/urcu-qsbr.c
@@ -106,17 +106,17 @@ static void internal_urcu_unlock(void)
  */
 static void wait_gp(struct reader_registry *index)
 {
-	atomic_dec(&gp_futex);
+	uatomic_dec(&gp_futex);
 	smp_mb(); /* Write futex before read reader_gp */
 	if (!rcu_gp_ongoing(index->rcu_reader_qs_gp)) {
 		/* Read reader_gp before write futex */
 		smp_mb();
 		/* Callbacks are queued, don't wait. */
-		atomic_set(&gp_futex, 0);
+		uatomic_set(&gp_futex, 0);
 	} else {
 		/* Read reader_gp before read futex */
 		smp_rmb();
-		if (atomic_read(&gp_futex) == -1)
+		if (uatomic_read(&gp_futex) == -1)
 			futex(&gp_futex, FUTEX_WAIT, -1,
 			      NULL, NULL, 0);
 	}
@@ -287,13 +287,13 @@ void *rcu_assign_pointer_sym(void **p, void *v)
 void *rcu_cmpxchg_pointer_sym(void **p, void *old, void *_new)
 {
 	wmb();
-	return cmpxchg(p, old, _new);
+	return uatomic_cmpxchg(p, old, _new);
 }
 
 void *rcu_xchg_pointer_sym(void **p, void *v)
 {
 	wmb();
-	return xchg(p, v);
+	return uatomic_xchg(p, v);
 }
 
 void *rcu_publish_content_sym(void **p, void *v)
diff --git a/urcu-static.h b/urcu-static.h
index 3caa0f9..0a23ee5 100644
--- a/urcu-static.h
+++ b/urcu-static.h
@@ -34,8 +34,8 @@
 #include <syscall.h>
 #include <unistd.h>
 
-#include <compiler.h>
-#include <arch.h>
+#include <urcu/compiler.h>
+#include <urcu/arch.h>
 
 /*
  * Identify a shared load. A smp_rmc() or smp_mc() should come before the load.
@@ -227,8 +227,8 @@ extern int gp_futex;
  */
 static inline void wake_up_gp(void)
 {
-	if (unlikely(atomic_read(&gp_futex) == -1)) {
-		atomic_set(&gp_futex, 0);
+	if (unlikely(uatomic_read(&gp_futex) == -1)) {
+		uatomic_set(&gp_futex, 0);
 		futex(&gp_futex, FUTEX_WAKE, 1,
 		      NULL, NULL, 0);
 	}
@@ -323,7 +323,7 @@ static inline void _rcu_read_unlock(void)
 		if (!__builtin_constant_p(_new) ||	\
 		    ((_new) != NULL))			\
 			wmb();				\
-		cmpxchg(p, old, _new);			\
+		uatomic_cmpxchg(p, old, _new);		\
 	})
 
 /**
@@ -337,7 +337,7 @@ static inline void _rcu_read_unlock(void)
 		if (!__builtin_constant_p(v) ||		\
 		    ((v) != NULL))			\
 			wmb();				\
-		xchg(p, v);				\
+		uatomic_xchg(p, v);			\
 	})
 
 /*
diff --git a/urcu.c b/urcu.c
index 07661a3..b323711 100644
--- a/urcu.c
+++ b/urcu.c
@@ -211,17 +211,17 @@ static void force_mb_all_threads(void)
  */
 static void wait_gp(struct reader_registry *index)
 {
-	atomic_dec(&gp_futex);
+	uatomic_dec(&gp_futex);
 	force_mb_single_thread(index); /* Write futex before read reader_gp */
 	if (!rcu_old_gp_ongoing(index->urcu_active_readers)) {
 		/* Read reader_gp before write futex */
 		force_mb_single_thread(index);
 		/* Callbacks are queued, don't wait. */
-		atomic_set(&gp_futex, 0);
+		uatomic_set(&gp_futex, 0);
 	} else {
 		/* Read reader_gp before read futex */
 		force_mb_single_thread(index);
-		if (atomic_read(&gp_futex) == -1)
+		if (uatomic_read(&gp_futex) == -1)
 			futex(&gp_futex, FUTEX_WAIT, -1,
 			      NULL, NULL, 0);
 	}
@@ -373,13 +373,13 @@ void *rcu_assign_pointer_sym(void **p, void *v)
 void *rcu_xchg_pointer_sym(void **p, void *v)
 {
 	wmb();
-	return xchg(p, v);
+	return uatomic_xchg(p, v);
 }
 
 void *rcu_cmpxchg_pointer_sym(void **p, void *old, void *_new)
 {
 	wmb();
-	return cmpxchg(p, old, _new);
+	return uatomic_cmpxchg(p, old, _new);
 }
 
 void *rcu_publish_content_sym(void **p, void *v)
diff --git a/urcu/arch_ppc.h b/urcu/arch_ppc.h
new file mode 100644
index 0000000..8dfd6d1
--- /dev/null
+++ b/urcu/arch_ppc.h
@@ -0,0 +1,119 @@
+#ifndef _URCU_ARCH_PPC_H
+#define _URCU_ARCH_PPC_H
+
+/*
+ * arch_ppc.h: trivial definitions for the powerpc architecture.
+ *
+ * Copyright (c) 2009 Paul E. McKenney, IBM Corporation.
+ * Copyright (c) 2009 Mathieu Desnoyers <mathieu.desnoyers@polymtl.ca>
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+*
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#include <urcu/compiler.h>
+#include <urcu/arch_uatomic.h>
+
+#define CONFIG_HAVE_FENCE 1
+#define CONFIG_HAVE_MEM_COHERENCY
+
+#ifndef BITS_PER_LONG
+#define BITS_PER_LONG	(__SIZEOF_LONG__ * 8)
+#endif
+
+#define mb()    asm volatile("sync":::"memory")
+#define rmb()   asm volatile("sync":::"memory")
+#define wmb()   asm volatile("sync"::: "memory")
+
+/*
+ * Architectures without cache coherency need something like the following:
+ *
+ * #define mb()		mc()
+ * #define rmb()	rmc()
+ * #define wmb()	wmc()
+ * #define mc()		arch_cache_flush()
+ * #define rmc()	arch_cache_flush_read()
+ * #define wmc()	arch_cache_flush_write()
+ */
+
+#define mc()	barrier()
+#define rmc()	barrier()
+#define wmc()	barrier()
+
+/* Assume SMP machine, given we don't have this information */
+#define CONFIG_SMP 1
+
+#ifdef CONFIG_SMP
+#define smp_mb()	mb()
+#define smp_rmb()	rmb()
+#define smp_wmb()	wmb()
+#define smp_mc()	mc()
+#define smp_rmc()	rmc()
+#define smp_wmc()	wmc()
+#else
+#define smp_mb()	barrier()
+#define smp_rmb()	barrier()
+#define smp_wmb()	barrier()
+#define smp_mc()	barrier()
+#define smp_rmc()	barrier()
+#define smp_wmc()	barrier()
+#endif
+
+/* Nop everywhere except on alpha. */
+#define smp_read_barrier_depends()
+
+static inline void cpu_relax(void)
+{
+	barrier();
+}
+
+/*
+ * Serialize core instruction execution. Also acts as a compiler barrier.
+ */
+static inline void sync_core()
+{
+	asm volatile("isync" : : : "memory");
+}
+
+#define mftbl()						\
+	({ 						\
+		unsigned long rval;			\
+		asm volatile("mftbl %0" : "=r" (rval));	\
+		rval;					\
+	})
+
+#define mftbu()						\
+	({						\
+		unsigned long rval;			\
+		asm volatile("mftbu %0" : "=r" (rval));	\
+		rval;					\
+	})
+
+typedef unsigned long long cycles_t;
+
+static inline cycles_t get_cycles (void)
+{
+	long h, l;
+
+	for (;;) {
+		h = mftbu();
+		barrier();
+		l = mftbl();
+		barrier();
+		if (mftbu() == h)
+			return (((cycles_t) h) << 32) + l;
+	}
+}
+
+#endif /* _URCU_ARCH_PPC_H */
diff --git a/urcu/arch_s390.h b/urcu/arch_s390.h
new file mode 100644
index 0000000..6c69a46
--- /dev/null
+++ b/urcu/arch_s390.h
@@ -0,0 +1,80 @@
+#ifndef _ARCH_S390_H
+#define _ARCH_S390_H
+
+/*
+ * Trivial definitions for the S390 architecture based on information from the
+ * Principles of Operation "CPU Serialization" (5-91), "BRANCH ON CONDITION"
+ * (7-25) and "STORE CLOCK" (7-169).
+ *
+ * Copyright (c) 2009 Novell, Inc.
+ * Author: Jan Blunck <jblunck@suse.de>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ */
+
+#include <compiler.h>
+#include <arch_uatomic.h>
+
+#define CONFIG_HAVE_MEM_COHERENCY
+/* Assume SMP machine, given we don't have this information */
+#define CONFIG_SMP 1
+
+#ifndef BITS_PER_LONG
+#define BITS_PER_LONG	(__SIZEOF_LONG__ * 8)
+#endif
+
+#define mb()    __asm__ __volatile__("bcr 15,0" : : : "memory")
+#define rmb()   __asm__ __volatile__("bcr 15,0" : : : "memory");
+#define wmb()   __asm__ __volatile__("bcr 15,0" : : : "memory");
+#define mc()	barrier()
+#define rmc()	barrier()
+#define wmc()	barrier()
+
+#define smp_mb()	mb()
+#define smp_rmb()	rmb()
+#define smp_wmb()	wmb()
+#define smp_mc()	mc()
+#define smp_rmc()	rmc()
+#define smp_wmc()	wmc()
+
+/* Nop everywhere except on alpha. */
+#define smp_read_barrier_depends()
+
+static inline void cpu_relax(void)
+{
+	barrier();
+}
+
+static inline void sync_core()
+{
+	__asm__ __volatile__("bcr 15,0" : : : "memory");
+}
+
+typedef unsigned long long cycles_t;
+
+static inline cycles_t get_cycles (void)
+{
+	cycles_t cycles;
+
+	__asm__ __volatile__("stck %0" : "=m" (cycles) : : "cc", "memory" );
+
+	return cycles;
+}
+
+#endif /* _ARCH_S390_H */
diff --git a/urcu/arch_uatomic_ppc.h b/urcu/arch_uatomic_ppc.h
new file mode 100644
index 0000000..486b974
--- /dev/null
+++ b/urcu/arch_uatomic_ppc.h
@@ -0,0 +1,235 @@
+#ifndef _URCU_ARCH_UATOMIC_PPC_H
+#define _URCU_ARCH_UATOMIC_PPC_H
+
+/* 
+ * Copyright (c) 1991-1994 by Xerox Corporation.  All rights reserved.
+ * Copyright (c) 1996-1999 by Silicon Graphics.  All rights reserved.
+ * Copyright (c) 1999-2004 Hewlett-Packard Development Company, L.P.
+ * Copyright (c) 2009      Mathieu Desnoyers
+ *
+ * THIS MATERIAL IS PROVIDED AS IS, WITH ABSOLUTELY NO WARRANTY EXPRESSED
+ * OR IMPLIED.  ANY USE IS AT YOUR OWN RISK.
+ *
+ * Permission is hereby granted to use or copy this program
+ * for any purpose,  provided the above notices are retained on all copies.
+ * Permission to modify the code and to distribute modified code is granted,
+ * provided the above notices are retained, and a notice that the code was
+ * modified is included with the above copyright notice.
+ *
+ * Code inspired from libuatomic_ops-1.2, inherited in part from the
+ * Boehm-Demers-Weiser conservative garbage collector.
+ */
+
+#include <urcu/compiler.h>
+
+#ifndef __SIZEOF_LONG__
+#ifdef __powerpc64__
+#define __SIZEOF_LONG__ 8
+#else
+#define __SIZEOF_LONG__ 4
+#endif
+#endif
+
+#ifndef BITS_PER_LONG
+#define BITS_PER_LONG	(__SIZEOF_LONG__ * 8)
+#endif
+
+#define ILLEGAL_INSTR	".long	0xd00d00"
+
+#ifndef _INCLUDE_API_H
+
+#define uatomic_set(addr, v)				\
+do {							\
+	ACCESS_ONCE(*(addr)) = (v);			\
+} while (0)
+
+#define uatomic_read(addr)	ACCESS_ONCE(*(addr))
+
+/*
+ * Using a isync as second barrier for exchange to provide acquire semantic.
+ * According to uatomic_ops/sysdeps/gcc/powerpc.h, the documentation is "fairly
+ * explicit that this also has acquire semantics."
+ * Derived from AO_compare_and_swap(), but removed the comparison.
+ */
+
+/* xchg */
+
+static inline __attribute__((always_inline))
+unsigned long _uatomic_exchange(void *addr, unsigned long val, int len)
+{
+	switch (len) {
+	case 4:
+	{
+		unsigned int result;
+
+		__asm__ __volatile__(
+			"lwsync\n"
+		"1:\t"	"lwarx %0,0,%1\n"	/* load and reserve */
+			"stwcx. %2,0,%1\n"	/* else store conditional */
+			"bne- 1b\n"	 	/* retry if lost reservation */
+			"isync\n"
+				: "=&r"(result)
+				: "r"(addr), "r"(val)
+				: "memory", "cc");
+
+		return result;
+	}
+#if (BITS_PER_LONG == 64)
+	case 8:
+	{
+		unsigned long result;
+
+		__asm__ __volatile__(
+			"lwsync\n"
+		"1:\t"	"ldarx %0,0,%1\n"	/* load and reserve */
+			"stdcx. %2,0,%1\n"	/* else store conditional */
+			"bne- 1b\n"	 	/* retry if lost reservation */
+			"isync\n"
+				: "=&r"(result)
+				: "r"(addr), "r"(val)
+				: "memory", "cc");
+
+		return result;
+	}
+#endif
+	}
+	/* generate an illegal instruction. Cannot catch this with linker tricks
+	 * when optimizations are disabled. */
+	__asm__ __volatile__(ILLEGAL_INSTR);
+	return 0;
+}
+
+#define uatomic_xchg(addr, v)						    \
+	((__typeof__(*(addr))) _uatomic_exchange((addr), (unsigned long)(v), \
+						sizeof(*(addr))))
+/* cmpxchg */
+
+static inline __attribute__((always_inline))
+unsigned long _uatomic_cmpxchg(void *addr, unsigned long old,
+			      unsigned long _new, int len)
+{
+	switch (len) {
+	case 4:
+	{
+		unsigned int old_val;
+
+		__asm__ __volatile__(
+			"lwsync\n"
+		"1:\t"	"lwarx %0,0,%1\n"	/* load and reserve */
+			"cmpd %0,%3\n"		/* if load is not equal to */
+			"bne 2f\n"		/* old, fail */
+			"stwcx. %2,0,%1\n"	/* else store conditional */
+			"bne- 1b\n"	 	/* retry if lost reservation */
+			"isync\n"
+		"2:\n"
+				: "=&r"(old_val)
+				: "r"(addr), "r"((unsigned int)_new),
+				  "r"((unsigned int)old)
+				: "memory", "cc");
+
+		return old_val;
+	}
+#if (BITS_PER_LONG == 64)
+	case 8:
+	{
+		unsigned long old_val;
+
+		__asm__ __volatile__(
+			"lwsync\n"
+		"1:\t"	"ldarx %0,0,%1\n"	/* load and reserve */
+			"cmpd %0,%3\n"		/* if load is not equal to */
+			"bne 2f\n"		/* old, fail */
+			"stdcx. %2,0,%1\n"	/* else store conditional */
+			"bne- 1b\n"	 	/* retry if lost reservation */
+			"isync\n"
+		"2:\n"
+				: "=&r"(old_val),
+				: "r"(addr), "r"((unsigned long)_new),
+				  "r"((unsigned long)old)
+				: "memory", "cc");
+
+		return old_val;
+	}
+#endif
+	}
+	/* generate an illegal instruction. Cannot catch this with linker tricks
+	 * when optimizations are disabled. */
+	__asm__ __volatile__(ILLEGAL_INSTR);
+	return 0;
+}
+
+
+#define uatomic_cmpxchg(addr, old, _new)				    \
+	((__typeof__(*(addr))) _uatomic_cmpxchg((addr), (unsigned long)(old),\
+						(unsigned long)(_new), 	    \
+						sizeof(*(addr))))
+
+/* uatomic_add_return */
+
+static inline __attribute__((always_inline))
+unsigned long _uatomic_add_return(void *addr, unsigned long val,
+				 int len)
+{
+	switch (len) {
+	case 4:
+	{
+		unsigned int result;
+
+		__asm__ __volatile__(
+			"lwsync\n"
+		"1:\t"	"lwarx %0,0,%1\n"	/* load and reserve */
+			"add %0,%2,%0\n"	/* add val to value loaded */
+			"stwcx. %0,0,%1\n"	/* store conditional */
+			"bne- 1b\n"	 	/* retry if lost reservation */
+			"isync\n"
+				: "=&r"(result)
+				: "r"(addr), "r"(val)
+				: "memory", "cc");
+
+		return result;
+	}
+#if (BITS_PER_LONG == 64)
+	case 8:
+	{
+		unsigned long result;
+
+		__asm__ __volatile__(
+			"lwsync\n"
+		"1:\t"	"ldarx %0,0,%1\n"	/* load and reserve */
+			"add %0,%2,%0\n"	/* add val to value loaded */
+			"stdcx. %0,0,%1\n"	/* store conditional */
+			"bne- 1b\n"	 	/* retry if lost reservation */
+			"isync\n"
+				: "=&r"(result)
+				: "r"(addr), "r"(val)
+				: "memory", "cc");
+
+		return result;
+	}
+#endif
+	}
+	/* generate an illegal instruction. Cannot catch this with linker tricks
+	 * when optimizations are disabled. */
+	__asm__ __volatile__(ILLEGAL_INSTR);
+	return 0;
+}
+
+
+#define uatomic_add_return(addr, v)					\
+	((__typeof__(*(addr))) _uatomic_add_return((addr),		\
+						  (unsigned long)(v),	\
+						  sizeof(*(addr))))
+
+/* uatomic_sub_return, uatomic_add, uatomic_sub, uatomic_inc, uatomic_dec */
+
+#define uatomic_sub_return(addr, v)	uatomic_add_return((addr), -(v))
+
+#define uatomic_add(addr, v)		(void)uatomic_add_return((addr), (v))
+#define uatomic_sub(addr, v)		(void)uatomic_sub_return((addr), (v))
+
+#define uatomic_inc(addr)		uatomic_add((addr), 1)
+#define uatomic_dec(addr)		uatomic_add((addr), -1)
+
+#endif /* #ifndef _INCLUDE_API_H */
+
+#endif /* _URCU_ARCH_UATOMIC_PPC_H */
diff --git a/urcu/arch_uatomic_s390.h b/urcu/arch_uatomic_s390.h
new file mode 100644
index 0000000..c289c74
--- /dev/null
+++ b/urcu/arch_uatomic_s390.h
@@ -0,0 +1,99 @@
+#ifndef _URCU_ARCH_ATOMIC_S390_H
+#define _URCU_ARCH_ATOMIC_S390_H
+
+/*
+ * Atomic exchange operations for the S390 architecture. Based on information
+ * taken from the Principles of Operation Appendix A "Conditional Swapping
+ * Instructions (CS, CDS)".
+ *
+ * Copyright (c) 2009 Novell, Inc.
+ * Author: Jan Blunck <jblunck@suse.de>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ */
+
+#ifndef __SIZEOF_LONG__
+#ifdef __s390x__
+#define __SIZEOF_LONG__ 8
+#else
+#define __SIZEOF_LONG__ 4
+#endif
+#endif
+
+#ifndef BITS_PER_LONG
+#define BITS_PER_LONG	(__SIZEOF_LONG__ * 8)
+#endif
+
+static inline __attribute__((always_inline))
+unsigned int uatomic_exchange_32(volatile unsigned int *addr, unsigned int val)
+{
+	unsigned int result;
+
+	__asm__ __volatile__(
+		"0:	cs %0,%2,%1\n"
+		"	brc 4,0b\n"
+		: "=&r"(result), "=m" (*addr)
+		: "r"(val), "m" (*addr)
+		: "memory", "cc");
+
+	return result;
+}
+
+#if (BITS_PER_LONG == 64)
+
+static inline __attribute__((always_inline))
+unsigned long uatomic_exchange_64(volatile unsigned long *addr,
+				 unsigned long val)
+{
+	unsigned long result;
+
+	__asm__ __volatile__(
+		"0:	csg %0,%2,%1\n"
+		"	brc 4,0b\n"
+		: "=&r"(result), "=m" (*addr)
+		: "r"(val), "m" (*addr)
+		: "memory", "cc");
+
+	return result;
+}
+
+#endif
+
+static inline __attribute__((always_inline))
+unsigned long _uatomic_exchange(volatile void *addr, unsigned long val, int len)
+{
+	switch (len) {
+	case 4:
+		return uatomic_exchange_32(addr, val);
+#if (BITS_PER_LONG == 64)
+	case 8:
+		return uatomic_exchange_64(addr, val);
+#endif
+	default:
+		__asm__ __volatile__(".long	0xd00d00");
+	}
+
+	return 0;
+}
+
+#define uatomic_xchg(addr, v)						\
+	(__typeof__(*(addr))) _uatomic_exchange((addr), (unsigned long)(v), \
+					       sizeof(*(addr)))
+
+#endif /* _URCU_ARCH_ATOMIC_S390_H */
diff --git a/urcu/arch_uatomic_x86.h b/urcu/arch_uatomic_x86.h
new file mode 100644
index 0000000..43de9e6
--- /dev/null
+++ b/urcu/arch_uatomic_x86.h
@@ -0,0 +1,400 @@
+#ifndef _URCU_ARCH_UATOMIC_X86_H
+#define _URCU_ARCH_UATOMIC_X86_H
+
+/* 
+ * Copyright (c) 1991-1994 by Xerox Corporation.  All rights reserved.
+ * Copyright (c) 1996-1999 by Silicon Graphics.  All rights reserved.
+ * Copyright (c) 1999-2004 Hewlett-Packard Development Company, L.P.
+ * Copyright (c) 2009      Mathieu Desnoyers
+ *
+ * THIS MATERIAL IS PROVIDED AS IS, WITH ABSOLUTELY NO WARRANTY EXPRESSED
+ * OR IMPLIED.  ANY USE IS AT YOUR OWN RISK.
+ *
+ * Permission is hereby granted to use or copy this program
+ * for any purpose,  provided the above notices are retained on all copies.
+ * Permission to modify the code and to distribute modified code is granted,
+ * provided the above notices are retained, and a notice that the code was
+ * modified is included with the above copyright notice.
+ *
+ * Code inspired from libuatomic_ops-1.2, inherited in part from the
+ * Boehm-Demers-Weiser conservative garbage collector.
+ */
+
+#include <urcu/compiler.h>
+
+#ifndef BITS_PER_LONG
+#define BITS_PER_LONG	(__SIZEOF_LONG__ * 8)
+#endif
+
+/*
+ * Derived from AO_compare_and_swap() and AO_test_and_set_full().
+ */
+
+struct __uatomic_dummy {
+	unsigned long v[10];
+};
+#define __hp(x)	((struct __uatomic_dummy *)(x))
+
+#define uatomic_set(addr, v)				\
+do {							\
+	ACCESS_ONCE(*(addr)) = (v);			\
+} while (0)
+
+#define uatomic_read(addr)	ACCESS_ONCE(*(addr))
+
+/* cmpxchg */
+
+static inline __attribute__((always_inline))
+unsigned long _uatomic_cmpxchg(void *addr, unsigned long old,
+			      unsigned long _new, int len)
+{
+	switch (len) {
+	case 1:
+	{
+		unsigned char result = old;
+
+		__asm__ __volatile__(
+		"lock; cmpxchgb %2, %1"
+			: "+a"(result), "+m"(*__hp(addr))
+			: "q"((unsigned char)_new)
+			: "memory");
+		return result;
+	}
+	case 2:
+	{
+		unsigned short result = old;
+
+		__asm__ __volatile__(
+		"lock; cmpxchgw %2, %1"
+			: "+a"(result), "+m"(*__hp(addr))
+			: "r"((unsigned short)_new)
+			: "memory");
+		return result;
+	}
+	case 4:
+	{
+		unsigned int result = old;
+
+		__asm__ __volatile__(
+		"lock; cmpxchgl %2, %1"
+			: "+a"(result), "+m"(*__hp(addr))
+			: "r"((unsigned int)_new)
+			: "memory");
+		return result;
+	}
+#if (BITS_PER_LONG == 64)
+	case 8:
+	{
+		unsigned long result = old;
+
+		__asm__ __volatile__(
+		"lock; cmpxchgq %2, %1"
+			: "+a"(result), "+m"(*__hp(addr))
+			: "r"((unsigned long)_new)
+			: "memory");
+		return result;
+	}
+#endif
+	}
+	/* generate an illegal instruction. Cannot catch this with linker tricks
+	 * when optimizations are disabled. */
+	__asm__ __volatile__("ud2");
+	return 0;
+}
+
+#define uatomic_cmpxchg(addr, old, _new)				    \
+	((__typeof__(*(addr))) _uatomic_cmpxchg((addr), (unsigned long)(old),\
+						(unsigned long)(_new), 	    \
+						sizeof(*(addr))))
+
+/* xchg */
+
+static inline __attribute__((always_inline))
+unsigned long _uatomic_exchange(void *addr, unsigned long val, int len)
+{
+	/* Note: the "xchg" instruction does not need a "lock" prefix. */
+	switch (len) {
+	case 1:
+	{
+		unsigned char result;
+		__asm__ __volatile__(
+		"xchgb %0, %1"
+			: "=q"(result), "+m"(*__hp(addr))
+			: "0" ((unsigned char)val)
+			: "memory");
+		return result;
+	}
+	case 2:
+	{
+		unsigned short result;
+		__asm__ __volatile__(
+		"xchgw %0, %1"
+			: "=r"(result), "+m"(*__hp(addr))
+			: "0" ((unsigned short)val)
+			: "memory");
+		return result;
+	}
+	case 4:
+	{
+		unsigned int result;
+		__asm__ __volatile__(
+		"xchgl %0, %1"
+			: "=r"(result), "+m"(*__hp(addr))
+			: "0" ((unsigned int)val)
+			: "memory");
+		return result;
+	}
+#if (BITS_PER_LONG == 64)
+	case 8:
+	{
+		unsigned long result;
+		__asm__ __volatile__(
+		"xchgq %0, %1"
+			: "=r"(result), "+m"(*__hp(addr))
+			: "0" ((unsigned long)val)
+			: "memory");
+		return result;
+	}
+#endif
+	}
+	/* generate an illegal instruction. Cannot catch this with linker tricks
+	 * when optimizations are disabled. */
+	__asm__ __volatile__("ud2");
+	return 0;
+}
+
+#define uatomic_xchg(addr, v)						    \
+	((__typeof__(*(addr))) _uatomic_exchange((addr), (unsigned long)(v), \
+						sizeof(*(addr))))
+
+/* uatomic_add_return, uatomic_sub_return */
+
+static inline __attribute__((always_inline))
+unsigned long _uatomic_add_return(void *addr, unsigned long val,
+				 int len)
+{
+	switch (len) {
+	case 1:
+	{
+		unsigned char result = val;
+
+		__asm__ __volatile__(
+		"lock; xaddb %1, %0"
+			: "+m"(*__hp(addr)), "+q" (result)
+			:
+			: "memory");
+		return result + (unsigned char)val;
+	}
+	case 2:
+	{
+		unsigned short result = val;
+
+		__asm__ __volatile__(
+		"lock; xaddw %1, %0"
+			: "+m"(*__hp(addr)), "+r" (result)
+			:
+			: "memory");
+		return result + (unsigned short)val;
+	}
+	case 4:
+	{
+		unsigned int result = val;
+
+		__asm__ __volatile__(
+		"lock; xaddl %1, %0"
+			: "+m"(*__hp(addr)), "+r" (result)
+			:
+			: "memory");
+		return result + (unsigned int)val;
+	}
+#if (BITS_PER_LONG == 64)
+	case 8:
+	{
+		unsigned long result = val;
+
+		__asm__ __volatile__(
+		"lock; xaddq %1, %0"
+			: "+m"(*__hp(addr)), "+r" (result)
+			:
+			: "memory");
+		return result + (unsigned long)val;
+	}
+#endif
+	}
+	/* generate an illegal instruction. Cannot catch this with linker tricks
+	 * when optimizations are disabled. */
+	__asm__ __volatile__("ud2");
+	return 0;
+}
+
+#define uatomic_add_return(addr, v)					\
+	((__typeof__(*(addr))) _uatomic_add_return((addr),		\
+						  (unsigned long)(v),	\
+						  sizeof(*(addr))))
+
+#define uatomic_sub_return(addr, v)	uatomic_add_return((addr), -(v))
+
+/* uatomic_add, uatomic_sub */
+
+static inline __attribute__((always_inline))
+void _uatomic_add(void *addr, unsigned long val, int len)
+{
+	switch (len) {
+	case 1:
+	{
+		__asm__ __volatile__(
+		"lock; addb %1, %0"
+			: "=m"(*__hp(addr))
+			: "iq" ((unsigned char)val)
+			: "memory");
+		return;
+	}
+	case 2:
+	{
+		__asm__ __volatile__(
+		"lock; addw %1, %0"
+			: "=m"(*__hp(addr))
+			: "ir" ((unsigned short)val)
+			: "memory");
+		return;
+	}
+	case 4:
+	{
+		__asm__ __volatile__(
+		"lock; addl %1, %0"
+			: "=m"(*__hp(addr))
+			: "ir" ((unsigned int)val)
+			: "memory");
+		return;
+	}
+#if (BITS_PER_LONG == 64)
+	case 8:
+	{
+		__asm__ __volatile__(
+		"lock; addq %1, %0"
+			: "=m"(*__hp(addr))
+			: "er" ((unsigned long)val)
+			: "memory");
+		return;
+	}
+#endif
+	}
+	/* generate an illegal instruction. Cannot catch this with linker tricks
+	 * when optimizations are disabled. */
+	__asm__ __volatile__("ud2");
+	return;
+}
+
+#define uatomic_add(addr, v)						   \
+	(_uatomic_add((addr), (unsigned long)(v), sizeof(*(addr))))
+
+#define uatomic_sub(addr, v)	uatomic_add((addr), -(v))
+
+
+/* uatomic_inc */
+
+static inline __attribute__((always_inline))
+void _uatomic_inc(void *addr, int len)
+{
+	switch (len) {
+	case 1:
+	{
+		__asm__ __volatile__(
+		"lock; incb %0"
+			: "=m"(*__hp(addr))
+			:
+			: "memory");
+		return;
+	}
+	case 2:
+	{
+		__asm__ __volatile__(
+		"lock; incw %0"
+			: "=m"(*__hp(addr))
+			:
+			: "memory");
+		return;
+	}
+	case 4:
+	{
+		__asm__ __volatile__(
+		"lock; incl %0"
+			: "=m"(*__hp(addr))
+			:
+			: "memory");
+		return;
+	}
+#if (BITS_PER_LONG == 64)
+	case 8:
+	{
+		__asm__ __volatile__(
+		"lock; incq %0"
+			: "=m"(*__hp(addr))
+			:
+			: "memory");
+		return;
+	}
+#endif
+	}
+	/* generate an illegal instruction. Cannot catch this with linker tricks
+	 * when optimizations are disabled. */
+	__asm__ __volatile__("ud2");
+	return;
+}
+
+#define uatomic_inc(addr)	(_uatomic_inc((addr), sizeof(*(addr))))
+
+/* uatomic_dec */
+
+static inline __attribute__((always_inline))
+void _uatomic_dec(void *addr, int len)
+{
+	switch (len) {
+	case 1:
+	{
+		__asm__ __volatile__(
+		"lock; decb %0"
+			: "=m"(*__hp(addr))
+			:
+			: "memory");
+		return;
+	}
+	case 2:
+	{
+		__asm__ __volatile__(
+		"lock; decw %0"
+			: "=m"(*__hp(addr))
+			:
+			: "memory");
+		return;
+	}
+	case 4:
+	{
+		__asm__ __volatile__(
+		"lock; decl %0"
+			: "=m"(*__hp(addr))
+			:
+			: "memory");
+		return;
+	}
+#if (BITS_PER_LONG == 64)
+	case 8:
+	{
+		__asm__ __volatile__(
+		"lock; decq %0"
+			: "=m"(*__hp(addr))
+			:
+			: "memory");
+		return;
+	}
+#endif
+	}
+	/* generate an illegal instruction. Cannot catch this with linker tricks
+	 * when optimizations are disabled. */
+	__asm__ __volatile__("ud2");
+	return;
+}
+
+#define uatomic_dec(addr)	(_uatomic_dec((addr), sizeof(*(addr))))
+
+#endif /* _URCU_ARCH_UATOMIC_X86_H */
diff --git a/urcu/arch_x86.h b/urcu/arch_x86.h
new file mode 100644
index 0000000..29612e0
--- /dev/null
+++ b/urcu/arch_x86.h
@@ -0,0 +1,135 @@
+#ifndef _URCU_ARCH_X86_H
+#define _URCU_ARCH_X86_H
+
+/*
+ * arch_x86.h: trivial definitions for the x86 architecture.
+ *
+ * Copyright (c) 2009 Paul E. McKenney, IBM Corporation.
+ * Copyright (c) 2009 Mathieu Desnoyers <mathieu.desnoyers@polymtl.ca>
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+*
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#include <urcu/compiler.h>
+#include <urcu/arch_uatomic.h>
+
+/* Assume P4 or newer */
+#define CONFIG_HAVE_FENCE 1
+#define CONFIG_HAVE_MEM_COHERENCY
+
+#ifndef BITS_PER_LONG
+#define BITS_PER_LONG	(__SIZEOF_LONG__ * 8)
+#endif
+
+#ifdef CONFIG_HAVE_FENCE
+#define mb()    asm volatile("mfence":::"memory")
+#define rmb()   asm volatile("lfence":::"memory")
+#define wmb()   asm volatile("sfence"::: "memory")
+#else
+/*
+ * Some non-Intel clones support out of order store. wmb() ceases to be a
+ * nop for these.
+ */
+#define mb()    asm volatile("lock; addl $0,0(%%esp)":::"memory")
+#define rmb()   asm volatile("lock; addl $0,0(%%esp)":::"memory")
+#define wmb()   asm volatile("lock; addl $0,0(%%esp)"::: "memory")
+#endif
+
+/*
+ * Architectures without cache coherency need something like the following:
+ *
+ * #define mb()		mc()
+ * #define rmb()	rmc()
+ * #define wmb()	wmc()
+ * #define mc()		arch_cache_flush()
+ * #define rmc()	arch_cache_flush_read()
+ * #define wmc()	arch_cache_flush_write()
+ */
+
+#define mc()	barrier()
+#define rmc()	barrier()
+#define wmc()	barrier()
+
+/* Assume SMP machine, given we don't have this information */
+#define CONFIG_SMP 1
+
+#ifdef CONFIG_SMP
+#define smp_mb()	mb()
+#define smp_rmb()	rmb()
+#define smp_wmb()	wmb()
+#define smp_mc()	mc()
+#define smp_rmc()	rmc()
+#define smp_wmc()	wmc()
+#else
+#define smp_mb()	barrier()
+#define smp_rmb()	barrier()
+#define smp_wmb()	barrier()
+#define smp_mc()	barrier()
+#define smp_rmc()	barrier()
+#define smp_wmc()	barrier()
+#endif
+
+/* Nop everywhere except on alpha. */
+#define smp_read_barrier_depends()
+
+static inline void rep_nop(void)
+{
+	asm volatile("rep; nop" : : : "memory");
+}
+
+static inline void cpu_relax(void)
+{
+	rep_nop();
+}
+
+/*
+ * Serialize core instruction execution. Also acts as a compiler barrier.
+ */
+#ifdef __PIC__
+/*
+ * Cannot use cpuid because it clobbers the ebx register and clashes
+ * with -fPIC :
+ * error: PIC register 'ebx' clobbered in 'asm'
+ */
+static inline void sync_core(void)
+{
+	mb();
+}
+#else
+static inline void sync_core(void)
+{
+	asm volatile("cpuid" : : : "memory", "eax", "ebx", "ecx", "edx");
+}
+#endif
+
+#define rdtscll(val)							  \
+	do {						  		  \
+	     unsigned int __a, __d;					  \
+	     asm volatile("rdtsc" : "=a" (__a), "=d" (__d));		  \
+	     (val) = ((unsigned long long)__a)				  \
+			| (((unsigned long long)__d) << 32);		  \
+	} while(0)
+
+typedef unsigned long long cycles_t;
+
+static inline cycles_t get_cycles(void)
+{
+        cycles_t ret = 0;
+
+        rdtscll(ret);
+        return ret;
+}
+
+#endif /* _URCU_ARCH_X86_H */
diff --git a/urcu/compiler.h b/urcu/compiler.h
new file mode 100644
index 0000000..6f6d3e9
--- /dev/null
+++ b/urcu/compiler.h
@@ -0,0 +1,50 @@
+#ifndef _URCU_COMPILER_H
+#define _URCU_COMPILER_H
+
+/*
+ * compiler.h
+ *
+ * Compiler definitions.
+ *
+ * Copyright (c) 2009 Mathieu Desnoyers <mathieu.desnoyers@polymtl.ca>
+ *
+ * THIS MATERIAL IS PROVIDED AS IS, WITH ABSOLUTELY NO WARRANTY EXPRESSED
+ * OR IMPLIED.  ANY USE IS AT YOUR OWN RISK.
+ *
+ * Permission is hereby granted to use or copy this program
+ * for any purpose,  provided the above notices are retained on all copies.
+ * Permission to modify the code and to distribute modified code is granted,
+ * provided the above notices are retained, and a notice that the code was
+ * modified is included with the above copyright notice.
+ */
+
+#define likely(x)	__builtin_expect(!!(x), 1)
+#define unlikely(x)	__builtin_expect(!!(x), 0)
+
+#define	barrier()	asm volatile("" : : : "memory");
+
+/*
+ * Instruct the compiler to perform only a single access to a variable
+ * (prohibits merging and refetching). The compiler is also forbidden to reorder
+ * successive instances of ACCESS_ONCE(), but only when the compiler is aware of
+ * particular ordering. Compiler ordering can be ensured, for example, by
+ * putting two ACCESS_ONCE() in separate C statements.
+ *
+ * This macro does absolutely -nothing- to prevent the CPU from reordering,
+ * merging, or refetching absolutely anything at any time.  Its main intended
+ * use is to mediate communication between process-level code and irq/NMI
+ * handlers, all running on the same CPU.
+ */
+#define ACCESS_ONCE(x)	(*(volatile typeof(x) *)&x)
+
+#if (__GNUC__ == 4)
+#define __compiler_offsetof(a, b) __builtin_offsetof(a, b)
+#endif
+
+#ifdef __compiler_offsetof
+#define offsetof(TYPE, MEMBER) __compiler_offsetof(TYPE, MEMBER)
+#else
+#define offsetof(TYPE, MEMBER) ((size_t) &((TYPE *)0)->MEMBER)
+#endif
+
+#endif /* _URCU_COMPILER_H */