5 files changed, 39 insertions, 142 deletions
diff --git a/arch/arm/include/asm/cacheflush.h b/arch/arm/include/asm/cacheflush.h
index ce110c3..c03ae91 100644
--- a/arch/arm/include/asm/cacheflush.h
+++ b/arch/arm/include/asm/cacheflush.h
@@ -222,7 +222,9 @@ static inline void vivt_flush_cache_mm(struct mm_struct *mm)
 static inline void
 vivt_flush_cache_range(struct vm_area_struct *vma, unsigned long start, unsigned long end)
 {
-	if (cpumask_test_cpu(smp_processor_id(), mm_cpumask(vma->vm_mm)))
+	struct mm_struct *mm = vma->vm_mm;
+
+	if (!mm || cpumask_test_cpu(smp_processor_id(), mm_cpumask(mm)))
 		__cpuc_flush_user_range(start & PAGE_MASK, PAGE_ALIGN(end),
 					vma->vm_flags);
 }
@@ -230,7 +232,9 @@ vivt_flush_cache_range(struct vm_area_struct *vma, unsigned long start, unsigned
 static inline void
 vivt_flush_cache_page(struct vm_area_struct *vma, unsigned long user_addr, unsigned long pfn)
 {
-	if (cpumask_test_cpu(smp_processor_id(), mm_cpumask(vma->vm_mm))) {
+	struct mm_struct *mm = vma->vm_mm;
+
+	if (!mm || cpumask_test_cpu(smp_processor_id(), mm_cpumask(mm))) {
 		unsigned long addr = user_addr & PAGE_MASK;
 		__cpuc_flush_user_range(addr, addr + PAGE_SIZE, vma->vm_flags);
 	}
diff --git a/arch/arm/include/asm/hwcap.h b/arch/arm/include/asm/hwcap.h
index c93a22a..2a0e8b8 100644
--- a/arch/arm/include/asm/hwcap.h
+++ b/arch/arm/include/asm/hwcap.h
@@ -4,6 +4,7 @@
 /*
  * HWCAP flags - for elf_hwcap (in kernel) and AT_HWCAP
  */
+
 #define HWCAP_SWP	(1 << 0)
 #define HWCAP_HALF	(1 << 1)
 #define HWCAP_THUMB	(1 << 2)
@@ -25,6 +26,7 @@
 #define HWCAP_IDIVT	(1 << 18)
 #define HWCAP_IDIV	(HWCAP_IDIVA | HWCAP_IDIVT)
 
+
 #if defined(__KERNEL__) && !defined(__ASSEMBLY__)
 /*
  * This yields a mask that user programs can use to figure out what
diff --git a/arch/arm/include/asm/mutex.h b/arch/arm/include/asm/mutex.h
index 93226cf..b1479fd 100644
--- a/arch/arm/include/asm/mutex.h
+++ b/arch/arm/include/asm/mutex.h
@@ -7,121 +7,10 @@
  */
 #ifndef _ASM_MUTEX_H
 #define _ASM_MUTEX_H
-
-#if __LINUX_ARM_ARCH__ < 6
-/* On pre-ARMv6 hardware the swp based implementation is the most efficient. */
-# include <asm-generic/mutex-xchg.h>
-#else
-
 /*
- * Attempting to lock a mutex on ARMv6+ can be done with a bastardized
- * atomic decrement (it is not a reliable atomic decrement but it satisfies
- * the defined semantics for our purpose, while being smaller and faster
- * than a real atomic decrement or atomic swap.  The idea is to attempt
- * decrementing the lock value only once.  If once decremented it isn't zero,
- * or if its store-back fails due to a dispute on the exclusive store, we
- * simply bail out immediately through the slow path where the lock will be
- * reattempted until it succeeds.
+ * On pre-ARMv6 hardware this results in a swp-based implementation,
+ * which is the most efficient. For ARMv6+, we emit a pair of exclusive
+ * accesses instead.
  */
-static inline void
-__mutex_fastpath_lock(atomic_t *count, void (*fail_fn)(atomic_t *))
-{
-	int __ex_flag, __res;
-
-	__asm__ (
-
-		"ldrex	%0, [%2]	\n\t"
-		"sub	%0, %0, #1	\n\t"
-		"strex	%1, %0, [%2]	"
-
-		: "=&r" (__res), "=&r" (__ex_flag)
-		: "r" (&(count)->counter)
-		: "cc","memory" );
-
-	__res |= __ex_flag;
-	if (unlikely(__res != 0))
-		fail_fn(count);
-}
-
-static inline int
-__mutex_fastpath_lock_retval(atomic_t *count, int (*fail_fn)(atomic_t *))
-{
-	int __ex_flag, __res;
-
-	__asm__ (
-
-		"ldrex	%0, [%2]	\n\t"
-		"sub	%0, %0, #1	\n\t"
-		"strex	%1, %0, [%2]	"
-
-		: "=&r" (__res), "=&r" (__ex_flag)
-		: "r" (&(count)->counter)
-		: "cc","memory" );
-
-	__res |= __ex_flag;
-	if (unlikely(__res != 0))
-		__res = fail_fn(count);
-	return __res;
-}
-
-/*
- * Same trick is used for the unlock fast path. However the original value,
- * rather than the result, is used to test for success in order to have
- * better generated assembly.
- */
-static inline void
-__mutex_fastpath_unlock(atomic_t *count, void (*fail_fn)(atomic_t *))
-{
-	int __ex_flag, __res, __orig;
-
-	__asm__ (
-
-		"ldrex	%0, [%3]	\n\t"
-		"add	%1, %0, #1	\n\t"
-		"strex	%2, %1, [%3]	"
-
-		: "=&r" (__orig), "=&r" (__res), "=&r" (__ex_flag)
-		: "r" (&(count)->counter)
-		: "cc","memory" );
-
-	__orig |= __ex_flag;
-	if (unlikely(__orig != 0))
-		fail_fn(count);
-}
-
-/*
- * If the unlock was done on a contended lock, or if the unlock simply fails
- * then the mutex remains locked.
- */
-#define __mutex_slowpath_needs_to_unlock()	1
-
-/*
- * For __mutex_fastpath_trylock we use another construct which could be
- * described as a "single value cmpxchg".
- *
- * This provides the needed trylock semantics like cmpxchg would, but it is
- * lighter and less generic than a true cmpxchg implementation.
- */
-static inline int
-__mutex_fastpath_trylock(atomic_t *count, int (*fail_fn)(atomic_t *))
-{
-	int __ex_flag, __res, __orig;
-
-	__asm__ (
-
-		"1: ldrex	%0, [%3]	\n\t"
-		"subs		%1, %0, #1	\n\t"
-		"strexeq	%2, %1, [%3]	\n\t"
-		"movlt		%0, #0		\n\t"
-		"cmpeq		%2, #0		\n\t"
-		"bgt		1b		"
-
-		: "=&r" (__orig), "=&r" (__res), "=&r" (__ex_flag)
-		: "r" (&count->counter)
-		: "cc", "memory" );
-
-	return __orig;
-}
-
-#endif
+#include <asm-generic/mutex-xchg.h>
 #endif
diff --git a/arch/arm/include/asm/pgtable.h b/arch/arm/include/asm/pgtable.h
index 5750704..6afd081 100644
--- a/arch/arm/include/asm/pgtable.h
+++ b/arch/arm/include/asm/pgtable.h
@@ -360,6 +360,18 @@ static inline pte_t *pmd_page_vaddr(pmd_t pmd)
 #define set_pte_ext(ptep,pte,ext) cpu_set_pte_ext(ptep,pte,ext)
 #define pte_clear(mm,addr,ptep)	set_pte_ext(ptep, __pte(0), 0)
 
+#define pte_none(pte)		(!pte_val(pte))
+#define pte_present(pte)	(pte_val(pte) & L_PTE_PRESENT)
+#define pte_write(pte)		(!(pte_val(pte) & L_PTE_RDONLY))
+#define pte_dirty(pte)		(pte_val(pte) & L_PTE_DIRTY)
+#define pte_young(pte)		(pte_val(pte) & L_PTE_YOUNG)
+#define pte_exec(pte)		(!(pte_val(pte) & L_PTE_XN))
+#define pte_special(pte)	(0)
+
+#define pte_present_user(pte) \
+	((pte_val(pte) & (L_PTE_PRESENT | L_PTE_USER)) == \
+	 (L_PTE_PRESENT | L_PTE_USER))
+
 #if __LINUX_ARM_ARCH__ < 6
 static inline void __sync_icache_dcache(pte_t pteval)
 {
@@ -371,25 +383,15 @@ extern void __sync_icache_dcache(pte_t pteval);
 static inline void set_pte_at(struct mm_struct *mm, unsigned long addr,
 			      pte_t *ptep, pte_t pteval)
 {
-	if (addr >= TASK_SIZE)
-		set_pte_ext(ptep, pteval, 0);
-	else {
+	unsigned long ext = 0;
+
+	if (addr < TASK_SIZE && pte_present_user(pteval)) {
 		__sync_icache_dcache(pteval);
-		set_pte_ext(ptep, pteval, PTE_EXT_NG);
+		ext |= PTE_EXT_NG;
 	}
-}
 
-#define pte_none(pte)		(!pte_val(pte))
-#define pte_present(pte)	(pte_val(pte) & L_PTE_PRESENT)
-#define pte_write(pte)		(!(pte_val(pte) & L_PTE_RDONLY))
-#define pte_dirty(pte)		(pte_val(pte) & L_PTE_DIRTY)
-#define pte_young(pte)		(pte_val(pte) & L_PTE_YOUNG)
-#define pte_exec(pte)		(!(pte_val(pte) & L_PTE_XN))
-#define pte_special(pte)	(0)
-
-#define pte_present_user(pte) \
-	((pte_val(pte) & (L_PTE_PRESENT | L_PTE_USER)) == \
-	 (L_PTE_PRESENT | L_PTE_USER))
+	set_pte_ext(ptep, pteval, ext);
+}
 
 #define PTE_BIT_FUNC(fn,op) \
 static inline pte_t pte_##fn(pte_t pte) { pte_val(pte) op; return pte; }
@@ -416,13 +418,13 @@ static inline pte_t pte_modify(pte_t pte, pgprot_t newprot)
  *
  *   3 3 2 2 2 2 2 2 2 2 2 2 1 1 1 1 1 1 1 1 1 1
  *   1 0 9 8 7 6 5 4 3 2 1 0 9 8 7 6 5 4 3 2 1 0 9 8 7 6 5 4 3 2 1 0
- *   <--------------- offset --------------------> <- type --> 0 0 0
+ *   <--------------- offset ----------------------> < type -> 0 0 0
  *
- * This gives us up to 63 swap files and 32GB per swap file.  Note that
+ * This gives us up to 31 swap files and 64GB per swap file.  Note that
  * the offset field is always non-zero.
  */
 #define __SWP_TYPE_SHIFT	3
-#define __SWP_TYPE_BITS		6
+#define __SWP_TYPE_BITS		5
 #define __SWP_TYPE_MASK		((1 << __SWP_TYPE_BITS) - 1)
 #define __SWP_OFFSET_SHIFT	(__SWP_TYPE_BITS + __SWP_TYPE_SHIFT)
 
diff --git a/arch/arm/include/asm/vfpmacros.h b/arch/arm/include/asm/vfpmacros.h
index 3d5fc41..c49c8f7 100644
--- a/arch/arm/include/asm/vfpmacros.h
+++ b/arch/arm/include/asm/vfpmacros.h
@@ -27,9 +27,9 @@
 #if __LINUX_ARM_ARCH__ <= 6
 	ldr	\tmp, =elf_hwcap		    @ may not have MVFR regs
 	ldr	\tmp, [\tmp, #0]
-	tst	\tmp, #HWCAP_VFPv3D16
-	ldceq	p11, cr0, [\base],#32*4		    @ FLDMIAD \base!, {d16-d31}
-	addne	\base, \base, #32*4		    @ step over unused register space
+	tst	\tmp, #HWCAP_VFPD32
+	ldcnel	p11, cr0, [\base],#32*4		    @ FLDMIAD \base!, {d16-d31}
+	addeq	\base, \base, #32*4		    @ step over unused register space
 #else
 	VFPFMRX	\tmp, MVFR0			    @ Media and VFP Feature Register 0
 	and	\tmp, \tmp, #MVFR0_A_SIMD_MASK	    @ A_SIMD field
@@ -51,9 +51,9 @@
 #if __LINUX_ARM_ARCH__ <= 6
 	ldr	\tmp, =elf_hwcap		    @ may not have MVFR regs
 	ldr	\tmp, [\tmp, #0]
-	tst	\tmp, #HWCAP_VFPv3D16
-	stceq	p11, cr0, [\base],#32*4		    @ FSTMIAD \base!, {d16-d31}
-	addne	\base, \base, #32*4		    @ step over unused register space
+	tst	\tmp, #HWCAP_VFPD32
+	stcnel	p11, cr0, [\base],#32*4		    @ FSTMIAD \base!, {d16-d31}
+	addeq	\base, \base, #32*4		    @ step over unused register space
 #else
 	VFPFMRX	\tmp, MVFR0			    @ Media and VFP Feature Register 0
 	and	\tmp, \tmp, #MVFR0_A_SIMD_MASK	    @ A_SIMD field