[sr-dev] git:andrei/armv7: core: armv7 & improved armv6 support
Andrei Pelinescu-Onciul
andrei at iptel.org
Mon Sep 19 17:41:24 CEST 2011
Module: sip-router
Branch: andrei/armv7
Commit: b092c2d10f4672dbcf387603ce08c86f10a7cdc4
URL: http://git.sip-router.org/cgi-bin/gitweb.cgi/sip-router/?a=commit;h=b092c2d10f4672dbcf387603ce08c86f10a7cdc4
Author: Andrei Pelinescu-Onciul <andrei at iptel.org>
Committer: Andrei Pelinescu-Onciul <andrei at iptel.org>
Date: Mon Sep 19 17:21:52 2011 +0200
core: armv7 & improved armv6 support
Experimental armv7 support & armv6 smp.
---
atomic/atomic_arm.h | 67 ++++++++++++++++++++++++++++++++++++++----------
atomic/atomic_native.h | 4 +-
fastlock.h | 63 +++++++++++++++++++++++++++++++++-----------
3 files changed, 102 insertions(+), 32 deletions(-)
diff --git a/atomic/atomic_arm.h b/atomic/atomic_arm.h
index b5497a2..dc61111 100644
--- a/atomic/atomic_arm.h
+++ b/atomic/atomic_arm.h
@@ -25,6 +25,7 @@
* - NOSMP
* - __CPU_arm
* - __CPU_arm6 - armv6 support (supports atomic ops via ldrex/strex)
+ * - __CPU_arm7 - armv7 support
* @ingroup atomic
*/
@@ -47,13 +48,39 @@
#ifdef NOSMP
#define HAVE_ASM_INLINE_MEMBAR
#define membar() asm volatile ("" : : : "memory") /* gcc do not cache barrier*/
+
+#else /* SMP */
+
+#ifdef __CPU_arm7
+
+#define HAVE_ASM_INLINE_MEMBAR
+#define membar() asm volatile ("dmb" : : : "memory")
+
+#elif defined __CPU_arm6
+
+#define HAVE_ASM_INLINE_MEMBAR
+/* arm6 implements memory barriers using CP15 */
+#define membar() asm volatile ("mcr p15, 0, %0, c7, c10, 5" \
+ : : "r"(0) : "memory")
+
+#else
+#warning SMP not supported for arm atomic ops, try compiling with -DNOSMP
+/* fall back to default lock based barriers (don't define HAVE_ASM...) */
+#endif /* __CPU_arm7 / __CPU_arm6 */
+
+#endif /* NOSMP */
+
+
+/* all other membars are either empty or the same as membar(),
+ irrespective of the SMP or NOSMP mode */
+#ifdef HAVE_ASM_INLINE_MEMBAR
+
#define membar_read() membar()
#define membar_write() membar()
#define membar_depends() do {} while(0) /* really empty, not even a cc bar.*/
-/* lock barriers: empty, not needed for NOSMP; the lock/unlock should already
- * contain gcc barriers*/
-#define membar_enter_lock() do {} while(0)
-#define membar_leave_lock() do {} while(0)
+/* lock barriers */
+#define membar_enter_lock() membar()
+#define membar_leave_lock() membar()
/* membars after or before atomic_ops or atomic_setget -> use these or
* mb_<atomic_op_name>() if you need a memory barrier in one of these
* situations (on some archs where the atomic operations imply memory
@@ -65,19 +92,18 @@
#define membar_write_atomic_setget() membar_write()
#define membar_read_atomic_op() membar_read()
#define membar_read_atomic_setget() membar_read()
-#else /* SMP */
-#warning SMP not supported for arm atomic ops, try compiling with -DNOSMP
-/* fall back to default lock based barriers (don't define HAVE_ASM...) */
-#endif /* NOSMP */
+#endif /* HAVE_ASM_INLINE_MEMBAR */
-#ifdef __CPU_arm6
+
+
+#if defined __CPU_arm6 || defined __CPU_arm7
#define HAVE_ASM_INLINE_ATOMIC_OPS
/* hack to get some membars */
-#ifndef NOSMP
+#if !defined NOSMP && !defined HAVE_ASM_INLINE_MEMBAR
#include "atomic_unknown.h"
#endif
@@ -138,7 +164,19 @@
return RET_EXPR; \
}
+#define ATOMIC_XCHG_DECL(NAME, P_TYPE) \
+ inline static P_TYPE atomic_##NAME##_##P_TYPE (volatile P_TYPE *var, \
+ P_TYPE v ) \
+ { \
+ P_TYPE ret; \
+ asm volatile( \
+ ATOMIC_ASM_OP2() \
+ : "=&r"(ret), "=&r"(tmp), "=m"(*var) : "r"(var), "r"(v) : "cc" \
+ ); \
+ return ret; \
+ }
+/* old swp based version (doesn't work on arm7)
#define ATOMIC_XCHG_DECL(NAME, P_TYPE) \
inline static P_TYPE atomic_##NAME##_##P_TYPE (volatile P_TYPE *var, \
P_TYPE v ) \
@@ -151,6 +189,7 @@
); \
return ret; \
}
+*/
/* cmpxchg: %5=old, %4=new_v, %3=var
@@ -186,8 +225,8 @@ ATOMIC_FUNC_DECL1(and, "and %1, %0, %4", int, void, /* no return */ )
ATOMIC_FUNC_DECL1(or, "orr %1, %0, %4", int, void, /* no return */ )
ATOMIC_FUNC_DECL(inc_and_test, "add %1, %0, #1", int, int, ret==0 )
ATOMIC_FUNC_DECL(dec_and_test, "sub %1, %0, #1", int, int, ret==0 )
-//ATOMIC_FUNC_DECL2(get_and_set, /* no extra op needed */ , int, int, ret)
-ATOMIC_XCHG_DECL(get_and_set, int)
+ATOMIC_FUNC_DECL2(get_and_set, "" /* no extra op needed */ , int, int, ret)
+//ATOMIC_XCHG_DECL(get_and_set, int)
ATOMIC_CMPXCHG_DECL(cmpxchg, int)
ATOMIC_FUNC_DECL1(add, "add %1, %0, %4", int, int, ret )
@@ -197,8 +236,8 @@ ATOMIC_FUNC_DECL1(and, "and %1, %0, %4", long, void, /* no return */ )
ATOMIC_FUNC_DECL1(or, "orr %1, %0, %4", long, void, /* no return */ )
ATOMIC_FUNC_DECL(inc_and_test, "add %1, %0, #1", long, long, ret==0 )
ATOMIC_FUNC_DECL(dec_and_test, "sub %1, %0, #1", long, long, ret==0 )
-//ATOMIC_FUNC_DECL2(get_and_set, /* no extra op needed */ , long, long, ret)
-ATOMIC_XCHG_DECL(get_and_set, long)
+ATOMIC_FUNC_DECL2(get_and_set, "" /* no extra op needed */ , long, long, ret)
+//ATOMIC_XCHG_DECL(get_and_set, long)
ATOMIC_CMPXCHG_DECL(cmpxchg, long)
ATOMIC_FUNC_DECL1(add, "add %1, %0, %4", long, long, ret )
diff --git a/atomic/atomic_native.h b/atomic/atomic_native.h
index 499e846..3086269 100644
--- a/atomic/atomic_native.h
+++ b/atomic/atomic_native.h
@@ -30,7 +30,7 @@
* - __CPU_ppc, __CPU_ppc64 - see atomic_ppc.h
* - __CPU_sparc - see atomic_sparc.h
* - __CPU_sparc64, SPARC64_MODE - see atomic_sparc64.h
- * - __CPU_arm, __CPU_arm6 - see atomic_arm.h
+ * - __CPU_arm, __CPU_arm6, __CPU_arm7 - see atomic_arm.h
* - __CPU_alpha - see atomic_alpha.h
* @ingroup atomic
*/
@@ -69,7 +69,7 @@
#include "atomic_sparc.h"
-#elif defined __CPU_arm || defined __CPU_arm6
+#elif defined __CPU_arm || defined __CPU_arm6 || defined __CPU_arm7
#include "atomic_arm.h"
diff --git a/fastlock.h b/fastlock.h
index a09f0e9..c17322b 100644
--- a/fastlock.h
+++ b/fastlock.h
@@ -47,6 +47,7 @@
* 2006-11-22 arm early clobber added: according to the swp instruction
* specification the address register must be != from the other 2
* (Julien Blache <jblache at debian.org>)
+ * 2011-09-19 arm v7 and arm v6 smp experimental support (andrei)
*
*/
@@ -91,7 +92,7 @@ typedef volatile int fl_lock_t;
#elif defined(__CPU_sparc64)
#ifndef NOSMP
#define membar_getlock() \
- asm volatile ("membar #StoreStore | #StoreLoad \n\t" : : : "memory");
+ asm volatile ("membar #StoreStore | #StoreLoad \n\t" : : : "memory")
/* can be either StoreStore|StoreLoad or LoadStore|LoadLoad
* since ldstub acts both as a store and as a load */
#else
@@ -102,40 +103,55 @@ typedef volatile int fl_lock_t;
#elif defined(__CPU_sparc)
#define membar_getlock()/* no need for a compiler barrier, already included */
-#elif defined __CPU_arm || defined __CPU_arm6
+#elif defined __CPU_arm
#ifndef NOSMP
-#warning smp not supported on arm* (no membars), try compiling with -DNOSMP
+#warning smp not supported on arm < 6 (no membars), try compiling with -DNOSMP
+#endif /* NOSMP */
+#define membar_getlock()
+
+#elif defined __CPU_arm6
+#ifndef NOSMP
+#define membar_getlock() asm volatile ("mcr p15, 0, %0, c7, c10, 5" \
+ : : "r"(0) : "memory")
+#else /* NOSMP */
+#define membar_getlock()
+#endif /* NOSMP */
+
+#elif defined __CPU_arm7
+#ifndef NOSMP
+#define membar_getlock() asm volatile ("dmb" : : : "memory")
+#else /* NOSMP */
+#define membar_getlock()
#endif /* NOSMP */
-#define membar_getlock()
#elif defined(__CPU_ppc) || defined(__CPU_ppc64)
#ifndef NOSMP
#define membar_getlock() \
- asm volatile("lwsync \n\t" : : : "memory");
+ asm volatile("lwsync \n\t" : : : "memory")
#else
-#define membar_getlock()
+#define membar_getlock()
#endif /* NOSMP */
#elif defined __CPU_mips2 || defined __CPU_mips64
#ifndef NOSMP
#define membar_getlock() \
- asm volatile("sync \n\t" : : : "memory");
+ asm volatile("sync \n\t" : : : "memory")
#else
-#define membar_getlock()
+#define membar_getlock()
#endif /* NOSMP */
#elif defined __CPU_mips
#ifndef NOSMP
#warning smp not supported on mips1 (no membars), try compiling with -DNOSMP
#endif
-#define membar_getlock()
+#define membar_getlock()
#elif defined __CPU_alpha
#ifndef NOSMP
#define membar_getlock() \
- asm volatile("mb \n\t" : : : "memory");
+ asm volatile("mb \n\t" : : : "memory")
#else
-#define membar_getlock()
+#define membar_getlock()
#endif /* NOSMP */
#else /* __CPU_xxx */
@@ -211,7 +227,7 @@ inline static int tsl(fl_lock_t* lock)
"swp %0, %2, [%3] \n\t"
: "=&r" (val), "=m"(*lock) : "r"(1), "r" (lock) : "memory"
);
-#elif defined __CPU_arm6
+#elif defined __CPU_arm6 || defined __CPU_arm7
asm volatile(
" ldrex %0, [%2] \n\t"
" cmp %0, #0 \n\t"
@@ -219,6 +235,7 @@ inline static int tsl(fl_lock_t* lock)
/* if %0!=0 => either it was 1 initially or was 0
* and somebody changed it just before the strexeq (so the
* lock is taken) => it's safe to return %0 */
+ /* membar_getlock must be called outside this function */
: "=&r"(val), "=m"(*lock) : "r"(lock), "r"(1) : "cc"
);
#elif defined(__CPU_ppc) || defined(__CPU_ppc64)
@@ -362,14 +379,28 @@ inline static void release_lock(fl_lock_t* lock)
"stb %%g0, [%1] \n\t"
: "=m"(*lock) : "r" (lock) : "memory"
);
-#elif defined __CPU_arm || defined __CPU_arm6
-#ifndef NOSMP
+#elif defined __CPU_arm || defined __CPU_arm6 || defined __CPU_arm7
+#if !defined NOSMP && defined __CPU_arm
#warning arm* smp mode not supported (no membars), try compiling with -DNOSMP
#endif
+ /* missuse membar_getlock */
+ membar_getlock();
asm volatile(
- " str %1, [%2] \n\r"
- : "=m"(*lock) : "r"(0), "r"(lock) : "memory"
+ " str %1, [%2] \n\r"
+ : "=m"(*lock) : "r"(0), "r"(lock) : "cc", "memory"
);
+#ifdef __CPU_arm6
+ /* drain store buffer: drain the per processor buffer into the L1 cache
+ making all the changes visible to other processors */
+ asm volatile(
+ "mcr p15, 0, %0, c7, c10, 4 \n\r" /* DSB equiv. on arm6*/
+ : : "r" (0) : "memory"
+ );
+#elif defined __CPU_arm7
+ /* drain store buffer: drain the per processor buffer into the L1 cache
+ making all the changes visible to other processors */
+ asm volatile( "dsb \n\r" : : : "memory");
+#endif /* __CPU_arm6 / __CPU_arm7 */
#elif defined(__CPU_ppc) || defined(__CPU_ppc64)
asm volatile(
/* "sync\n\t" lwsync is faster and will work
More information about the sr-dev
mailing list