[sr-dev] git:andrei/armv7: core: armv7 & improved armv6 support

Andrei Pelinescu-Onciul andrei at iptel.org
Mon Sep 19 17:41:24 CEST 2011


Module: sip-router
Branch: andrei/armv7
Commit: b092c2d10f4672dbcf387603ce08c86f10a7cdc4
URL:    http://git.sip-router.org/cgi-bin/gitweb.cgi/sip-router/?a=commit;h=b092c2d10f4672dbcf387603ce08c86f10a7cdc4

Author: Andrei Pelinescu-Onciul <andrei at iptel.org>
Committer: Andrei Pelinescu-Onciul <andrei at iptel.org>
Date:   Mon Sep 19 17:21:52 2011 +0200

core: armv7 & improved armv6 support

Experimental armv7 support & armv6 smp.

---

 atomic/atomic_arm.h    |   67 ++++++++++++++++++++++++++++++++++++++----------
 atomic/atomic_native.h |    4 +-
 fastlock.h             |   63 +++++++++++++++++++++++++++++++++-----------
 3 files changed, 102 insertions(+), 32 deletions(-)

diff --git a/atomic/atomic_arm.h b/atomic/atomic_arm.h
index b5497a2..dc61111 100644
--- a/atomic/atomic_arm.h
+++ b/atomic/atomic_arm.h
@@ -25,6 +25,7 @@
  * - NOSMP
  * - __CPU_arm
  * - __CPU_arm6    - armv6 support (supports atomic ops via ldrex/strex)
+ * - __CPU_arm7    - armv7 support
  * @ingroup atomic
  */
 
@@ -47,13 +48,39 @@
 #ifdef NOSMP
 #define HAVE_ASM_INLINE_MEMBAR
 #define membar() asm volatile ("" : : : "memory") /* gcc do not cache barrier*/
+
+#else /* SMP */
+
+#ifdef __CPU_arm7
+
+#define HAVE_ASM_INLINE_MEMBAR
+#define membar() asm volatile ("dmb" : : : "memory")
+
+#elif defined __CPU_arm6
+
+#define HAVE_ASM_INLINE_MEMBAR
+/* arm6 implements memory barriers using CP15 */
+#define membar() asm volatile ("mcr p15, 0, %0, c7, c10, 5" \
+								: : "r"(0) : "memory")
+
+#else
+#warning SMP not supported for arm atomic ops, try compiling with -DNOSMP
+/* fall back to default lock based barriers (don't define HAVE_ASM...) */
+#endif /* __CPU_arm7 / __CPU_arm6 */
+
+#endif /* NOSMP */
+
+
+/* all other membars are either empty or the same as membar(),
+   irrespective of the SMP or NOSMP mode */
+#ifdef HAVE_ASM_INLINE_MEMBAR
+
 #define membar_read()  membar()
 #define membar_write() membar()
 #define membar_depends()   do {} while(0) /* really empty, not even a cc bar.*/
-/* lock barriers: empty, not needed for NOSMP; the lock/unlock should already
- * contain gcc barriers*/
-#define membar_enter_lock() do {} while(0)
-#define membar_leave_lock() do {} while(0)
+/* lock barriers */
+#define membar_enter_lock()  membar()
+#define membar_leave_lock()  membar()
 /* membars after or before atomic_ops or atomic_setget -> use these or
  *  mb_<atomic_op_name>() if you need a memory barrier in one of these
  *  situations (on some archs where the atomic operations imply memory
@@ -65,19 +92,18 @@
 #define membar_write_atomic_setget()	membar_write()
 #define membar_read_atomic_op()			membar_read()
 #define membar_read_atomic_setget()		membar_read()
-#else /* SMP */
-#warning SMP not supported for arm atomic ops, try compiling with -DNOSMP
-/* fall back to default lock based barriers (don't define HAVE_ASM...) */
-#endif /* NOSMP */
 
+#endif /* HAVE_ASM_INLINE_MEMBAR */
 
-#ifdef __CPU_arm6
+
+
+#if defined __CPU_arm6  || defined __CPU_arm7
 
 
 #define HAVE_ASM_INLINE_ATOMIC_OPS
 
 /* hack to get some membars */
-#ifndef NOSMP
+#if !defined NOSMP && !defined HAVE_ASM_INLINE_MEMBAR
 #include "atomic_unknown.h"
 #endif
 
@@ -138,7 +164,19 @@
 		return RET_EXPR; \
 	}
 
+#define ATOMIC_XCHG_DECL(NAME, P_TYPE) \
+	inline static P_TYPE atomic_##NAME##_##P_TYPE (volatile P_TYPE *var, \
+														P_TYPE v ) \
+	{ \
+		P_TYPE ret; \
+		asm volatile( \
+			ATOMIC_ASM_OP2() \
+			: "=&r"(ret), "=&r"(tmp), "=m"(*var) : "r"(var), "r"(v) : "cc" \
+			); \
+		return ret; \
+	}
 
+/* old swp based version (doesn't work on arm7)
 #define ATOMIC_XCHG_DECL(NAME, P_TYPE) \
 	inline static P_TYPE atomic_##NAME##_##P_TYPE (volatile P_TYPE *var, \
 														P_TYPE v ) \
@@ -151,6 +189,7 @@
 			); \
 		return ret; \
 	}
+*/
 
 
 /* cmpxchg: %5=old, %4=new_v, %3=var
@@ -186,8 +225,8 @@ ATOMIC_FUNC_DECL1(and,     "and  %1, %0, %4", int, void, /* no return */ )
 ATOMIC_FUNC_DECL1(or,      "orr  %1, %0, %4", int, void, /* no return */ )
 ATOMIC_FUNC_DECL(inc_and_test, "add  %1, %0, #1", int, int, ret==0 )
 ATOMIC_FUNC_DECL(dec_and_test, "sub  %1, %0, #1", int, int, ret==0 )
-//ATOMIC_FUNC_DECL2(get_and_set, /* no extra op needed */ , int, int,  ret)
-ATOMIC_XCHG_DECL(get_and_set, int)
+ATOMIC_FUNC_DECL2(get_and_set, "" /* no extra op needed */ , int, int,  ret)
+//ATOMIC_XCHG_DECL(get_and_set, int)
 ATOMIC_CMPXCHG_DECL(cmpxchg, int)
 ATOMIC_FUNC_DECL1(add,     "add  %1, %0, %4", int, int, ret )
 
@@ -197,8 +236,8 @@ ATOMIC_FUNC_DECL1(and,     "and  %1, %0, %4", long, void, /* no return */ )
 ATOMIC_FUNC_DECL1(or,      "orr  %1, %0, %4", long, void, /* no return */ )
 ATOMIC_FUNC_DECL(inc_and_test, "add  %1, %0, #1", long, long, ret==0 )
 ATOMIC_FUNC_DECL(dec_and_test, "sub  %1, %0, #1", long, long, ret==0 )
-//ATOMIC_FUNC_DECL2(get_and_set, /* no extra op needed */ , long, long,  ret)
-ATOMIC_XCHG_DECL(get_and_set, long)
+ATOMIC_FUNC_DECL2(get_and_set, "" /* no extra op needed */ , long, long,  ret)
+//ATOMIC_XCHG_DECL(get_and_set, long)
 ATOMIC_CMPXCHG_DECL(cmpxchg, long)
 ATOMIC_FUNC_DECL1(add,     "add  %1, %0, %4", long, long, ret )
 
diff --git a/atomic/atomic_native.h b/atomic/atomic_native.h
index 499e846..3086269 100644
--- a/atomic/atomic_native.h
+++ b/atomic/atomic_native.h
@@ -30,7 +30,7 @@
  * - __CPU_ppc, __CPU_ppc64 - see atomic_ppc.h
  * - __CPU_sparc - see atomic_sparc.h
  * - __CPU_sparc64, SPARC64_MODE - see atomic_sparc64.h
- * - __CPU_arm, __CPU_arm6 - see atomic_arm.h
+ * - __CPU_arm, __CPU_arm6, __CPU_arm7 - see atomic_arm.h
  * - __CPU_alpha - see atomic_alpha.h
  * @ingroup atomic
  */
@@ -69,7 +69,7 @@
 
 #include "atomic_sparc.h"
 
-#elif defined __CPU_arm || defined __CPU_arm6
+#elif defined __CPU_arm || defined __CPU_arm6 || defined __CPU_arm7
 
 #include "atomic_arm.h"
 
diff --git a/fastlock.h b/fastlock.h
index a09f0e9..c17322b 100644
--- a/fastlock.h
+++ b/fastlock.h
@@ -47,6 +47,7 @@
  * 2006-11-22  arm early clobber added: according to the swp instruction 
  *              specification the address register must be != from the other 2
  *              (Julien Blache <jblache at debian.org>)
+ * 2011-09-19  arm v7 and arm v6 smp experimental support (andrei)
  *
  */
 
@@ -91,7 +92,7 @@ typedef  volatile int fl_lock_t;
 #elif defined(__CPU_sparc64)
 #ifndef NOSMP
 #define membar_getlock() \
-	asm volatile ("membar #StoreStore | #StoreLoad \n\t" : : : "memory");
+	asm volatile ("membar #StoreStore | #StoreLoad \n\t" : : : "memory")
 	/* can be either StoreStore|StoreLoad or LoadStore|LoadLoad
 	 * since ldstub acts both as a store and as a load */
 #else
@@ -102,40 +103,55 @@ typedef  volatile int fl_lock_t;
 #elif  defined(__CPU_sparc)
 #define membar_getlock()/* no need for a compiler barrier, already included */
 
-#elif defined __CPU_arm || defined __CPU_arm6
+#elif defined __CPU_arm
 #ifndef NOSMP
-#warning smp not supported on arm* (no membars), try compiling with -DNOSMP
+#warning smp not supported on arm < 6 (no membars), try compiling with -DNOSMP
+#endif /* NOSMP */
+#define membar_getlock()
+
+#elif defined __CPU_arm6
+#ifndef NOSMP
+#define membar_getlock() asm volatile ("mcr p15, 0, %0, c7, c10, 5" \
+										: : "r"(0) : "memory")
+#else /* NOSMP */
+#define membar_getlock()
+#endif /* NOSMP */
+
+#elif defined __CPU_arm7
+#ifndef NOSMP
+#define membar_getlock() asm volatile ("dmb" : : : "memory")
+#else /* NOSMP */
+#define membar_getlock()
 #endif /* NOSMP */
-#define membar_getlock() 
 
 #elif defined(__CPU_ppc) || defined(__CPU_ppc64)
 #ifndef NOSMP
 #define membar_getlock() \
-	asm volatile("lwsync \n\t" : : : "memory");
+	asm volatile("lwsync \n\t" : : : "memory")
 #else
-#define membar_getlock() 
+#define membar_getlock()
 #endif /* NOSMP */
 
 #elif defined __CPU_mips2 || defined __CPU_mips64
 #ifndef NOSMP
 #define membar_getlock() \
-	asm volatile("sync \n\t" : : : "memory");
+	asm volatile("sync \n\t" : : : "memory")
 #else
-#define membar_getlock() 
+#define membar_getlock()
 #endif /* NOSMP */
 
 #elif defined __CPU_mips
 #ifndef NOSMP
 #warning smp not supported on mips1 (no membars), try compiling with -DNOSMP
 #endif
-#define membar_getlock() 
+#define membar_getlock()
 
 #elif defined __CPU_alpha
 #ifndef NOSMP
 #define membar_getlock() \
-	asm volatile("mb \n\t" : : : "memory");
+	asm volatile("mb \n\t" : : : "memory")
 #else
-#define membar_getlock() 
+#define membar_getlock()
 #endif /* NOSMP */
 
 #else /* __CPU_xxx */
@@ -211,7 +227,7 @@ inline static int tsl(fl_lock_t* lock)
 			"swp %0, %2, [%3] \n\t"
 			: "=&r" (val), "=m"(*lock) : "r"(1), "r" (lock) : "memory"
 	);
-#elif defined __CPU_arm6
+#elif defined __CPU_arm6 || defined __CPU_arm7
 	asm volatile(
 			"   ldrex %0, [%2] \n\t" 
 			"   cmp %0, #0 \n\t"
@@ -219,6 +235,7 @@ inline static int tsl(fl_lock_t* lock)
 			/* if %0!=0 => either it was 1 initially or was 0
 			 * and somebody changed it just before the strexeq (so the 
 			 * lock is taken) => it's safe to return %0 */
+			/* membar_getlock must be  called outside this function */
 			: "=&r"(val), "=m"(*lock) : "r"(lock), "r"(1) : "cc"
 	);
 #elif defined(__CPU_ppc) || defined(__CPU_ppc64)
@@ -362,14 +379,28 @@ inline static void release_lock(fl_lock_t* lock)
 			"stb %%g0, [%1] \n\t"
 			: "=m"(*lock) : "r" (lock) : "memory"
 	);
-#elif defined __CPU_arm || defined __CPU_arm6
-#ifndef NOSMP
+#elif defined __CPU_arm || defined __CPU_arm6 || defined __CPU_arm7
+#if !defined NOSMP && defined __CPU_arm
 #warning arm* smp mode not supported (no membars), try compiling with -DNOSMP
 #endif
+	/* missuse membar_getlock */
+	membar_getlock();
 	asm volatile(
-		" str %1, [%2] \n\r" 
-		: "=m"(*lock) : "r"(0), "r"(lock) : "memory"
+		" str %1, [%2] \n\r"
+		: "=m"(*lock) : "r"(0), "r"(lock) : "cc", "memory"
 	);
+#ifdef __CPU_arm6
+	/* drain store buffer: drain the per processor buffer into the L1 cache
+	   making all the changes visible to other processors */
+	asm volatile(
+			"mcr p15, 0, %0, c7, c10, 4 \n\r"  /* DSB equiv. on arm6*/
+			: : "r" (0) : "memory"
+			);
+#elif defined __CPU_arm7
+	/* drain store buffer: drain the per processor buffer into the L1 cache
+	   making all the changes visible to other processors */
+	asm volatile( "dsb \n\r" : : : "memory");
+#endif /* __CPU_arm6 / __CPU_arm7 */
 #elif defined(__CPU_ppc) || defined(__CPU_ppc64)
 	asm volatile(
 			/* "sync\n\t"  lwsync is faster and will work




More information about the sr-dev mailing list