diff options
author | Andrew Dodd <atd7@cornell.edu> | 2013-02-16 18:41:04 -0500 |
---|---|---|
committer | Andrew Dodd <atd7@cornell.edu> | 2013-02-27 09:19:08 -0500 |
commit | b08797f2afdfc604c3143f8725d058aeef8ddcb2 (patch) | |
tree | c59e963bd6931d4e9f9526034ab402cc551f18ae /arch | |
parent | cbfae70f1dcaf3cc6e93061179dad80caa1597fe (diff) | |
parent | 54ea5b40f067cf098cac639973c6628c6944cfb2 (diff) | |
download | kernel_samsung_smdk4412-b08797f2afdfc604c3143f8725d058aeef8ddcb2.zip kernel_samsung_smdk4412-b08797f2afdfc604c3143f8725d058aeef8ddcb2.tar.gz kernel_samsung_smdk4412-b08797f2afdfc604c3143f8725d058aeef8ddcb2.tar.bz2 |
Merge remote-tracking branch 'kernelorg/linux-3.0.y' into 3_0_64
Conflicts:
arch/arm/Kconfig
arch/arm/include/asm/hwcap.h
arch/arm/kernel/smp.c
arch/arm/plat-samsung/adc.c
drivers/gpu/drm/i915/i915_reg.h
drivers/gpu/drm/i915/intel_drv.h
drivers/mmc/core/sd.c
drivers/net/tun.c
drivers/net/usb/usbnet.c
drivers/regulator/max8997.c
drivers/usb/core/hub.c
drivers/usb/host/xhci.h
drivers/usb/serial/qcserial.c
fs/jbd2/transaction.c
include/linux/migrate.h
kernel/sys.c
kernel/time/timekeeping.c
lib/genalloc.c
mm/memory-failure.c
mm/memory_hotplug.c
mm/mempolicy.c
mm/page_alloc.c
mm/vmalloc.c
mm/vmscan.c
mm/vmstat.c
scripts/Kbuild.include
Change-Id: I91e2d85c07320c7ccfc04cf98a448e89bed6ade6
Diffstat (limited to 'arch')
136 files changed, 1229 insertions, 577 deletions
diff --git a/arch/alpha/include/asm/atomic.h b/arch/alpha/include/asm/atomic.h index e756d04..b15162f 100644 --- a/arch/alpha/include/asm/atomic.h +++ b/arch/alpha/include/asm/atomic.h @@ -14,8 +14,8 @@ */ -#define ATOMIC_INIT(i) ( (atomic_t) { (i) } ) -#define ATOMIC64_INIT(i) ( (atomic64_t) { (i) } ) +#define ATOMIC_INIT(i) { (i) } +#define ATOMIC64_INIT(i) { (i) } #define atomic_read(v) (*(volatile int *)&(v)->counter) #define atomic64_read(v) (*(volatile long *)&(v)->counter) diff --git a/arch/alpha/include/asm/socket.h b/arch/alpha/include/asm/socket.h index 06edfef..3eeb47c 100644 --- a/arch/alpha/include/asm/socket.h +++ b/arch/alpha/include/asm/socket.h @@ -69,9 +69,11 @@ #define SO_RXQ_OVFL 40 +#ifdef __KERNEL__ /* O_NONBLOCK clashes with the bits used for socket types. Therefore we * have to define SOCK_NONBLOCK to a different value here. */ #define SOCK_NONBLOCK 0x40000000 +#endif /* __KERNEL__ */ #endif /* _ASM_SOCKET_H */ diff --git a/arch/arm/Kconfig b/arch/arm/Kconfig index b9e66ff..af7e74d 100644 --- a/arch/arm/Kconfig +++ b/arch/arm/Kconfig @@ -1281,6 +1281,43 @@ config ARM_ERRATA_763722 depends on CPU_V7 && SMP help This option enables the workaround for the 763722 Cortex-A15 erratum. + +config ARM_ERRATA_764369 + bool "ARM errata: Data cache line maintenance operation by MVA may not succeed" + depends on CPU_V7 && SMP + help + This option enables the workaround for erratum 764369 + affecting Cortex-A9 MPCore with two or more processors (all + current revisions). Under certain timing circumstances, a data + cache line maintenance operation by MVA targeting an Inner + Shareable memory region may fail to proceed up to either the + Point of Coherency or to the Point of Unification of the + system. This workaround adds a DSB instruction before the + relevant cache maintenance functions and sets a specific bit + in the diagnostic control register of the SCU. + +config PL310_ERRATA_769419 + bool "PL310 errata: no automatic Store Buffer drain" + depends on CACHE_L2X0 + help + On revisions of the PL310 prior to r3p2, the Store Buffer does + not automatically drain. This can cause normal, non-cacheable + writes to be retained when the memory system is idle, leading + to suboptimal I/O performance for drivers using coherent DMA. + This option adds a write barrier to the cpu_idle loop so that, + on systems with an outer cache, the store buffer is drained + explicitly. + +config ARM_ERRATA_775420 + bool "ARM errata: A data cache maintenance operation which aborts, might lead to deadlock" + depends on CPU_V7 + help + This option enables the workaround for the 775420 Cortex-A9 (r2p2, + r2p6,r2p8,r2p10,r3p0) erratum. In case a date cache maintenance + operation aborts with MMU exception, it might cause the processor + to deadlock. This workaround puts DSB before executing ISB if + an abort may occur on cache maintenance. + endmenu source "arch/arm/common/Kconfig" @@ -1345,32 +1382,6 @@ source "drivers/pci/Kconfig" source "drivers/pcmcia/Kconfig" -config ARM_ERRATA_764369 - bool "ARM errata: Data cache line maintenance operation by MVA may not succeed" - depends on CPU_V7 && SMP - help - This option enables the workaround for erratum 764369 - affecting Cortex-A9 MPCore with two or more processors (all - current revisions). Under certain timing circumstances, a data - cache line maintenance operation by MVA targeting an Inner - Shareable memory region may fail to proceed up to either the - Point of Coherency or to the Point of Unification of the - system. This workaround adds a DSB instruction before the - relevant cache maintenance functions and sets a specific bit - in the diagnostic control register of the SCU. - -config PL310_ERRATA_769419 - bool "PL310 errata: no automatic Store Buffer drain" - depends on CACHE_L2X0 - help - On revisions of the PL310 prior to r3p2, the Store Buffer does - not automatically drain. This can cause normal, non-cacheable - writes to be retained when the memory system is idle, leading - to suboptimal I/O performance for drivers using coherent DMA. - This option adds a write barrier to the cpu_idle loop so that, - on systems with an outer cache, the store buffer is drained - explicitly. - endmenu menu "Kernel Features" @@ -1945,6 +1956,7 @@ source "drivers/cpufreq/Kconfig" config CPU_FREQ_IMX tristate "CPUfreq driver for i.MX CPUs" depends on ARCH_MXC && CPU_FREQ + select CPU_FREQ_TABLE help This enables the CPUfreq driver for i.MX CPUs. diff --git a/arch/arm/boot/compressed/head.S b/arch/arm/boot/compressed/head.S index 3a959994..63cfa2e 100644 --- a/arch/arm/boot/compressed/head.S +++ b/arch/arm/boot/compressed/head.S @@ -577,6 +577,7 @@ __armv7_mmu_cache_on: mcrne p15, 0, r0, c8, c7, 0 @ flush I,D TLBs #endif mrc p15, 0, r0, c1, c0, 0 @ read control reg + bic r0, r0, #1 << 28 @ clear SCTLR.TRE orr r0, r0, #0x5000 @ I-cache enable, RR cache replacement orr r0, r0, #0x003c @ write buffer #ifdef CONFIG_MMU diff --git a/arch/arm/configs/mxs_defconfig b/arch/arm/configs/mxs_defconfig index 2bf2243..166d6aa 100644 --- a/arch/arm/configs/mxs_defconfig +++ b/arch/arm/configs/mxs_defconfig @@ -29,7 +29,6 @@ CONFIG_NO_HZ=y CONFIG_HIGH_RES_TIMERS=y CONFIG_PREEMPT_VOLUNTARY=y CONFIG_AEABI=y -CONFIG_DEFAULT_MMAP_MIN_ADDR=65536 CONFIG_AUTO_ZRELADDR=y CONFIG_FPE_NWFPE=y CONFIG_NET=y diff --git a/arch/arm/include/asm/cacheflush.h b/arch/arm/include/asm/cacheflush.h index ce110c3..c03ae91 100644 --- a/arch/arm/include/asm/cacheflush.h +++ b/arch/arm/include/asm/cacheflush.h @@ -222,7 +222,9 @@ static inline void vivt_flush_cache_mm(struct mm_struct *mm) static inline void vivt_flush_cache_range(struct vm_area_struct *vma, unsigned long start, unsigned long end) { - if (cpumask_test_cpu(smp_processor_id(), mm_cpumask(vma->vm_mm))) + struct mm_struct *mm = vma->vm_mm; + + if (!mm || cpumask_test_cpu(smp_processor_id(), mm_cpumask(mm))) __cpuc_flush_user_range(start & PAGE_MASK, PAGE_ALIGN(end), vma->vm_flags); } @@ -230,7 +232,9 @@ vivt_flush_cache_range(struct vm_area_struct *vma, unsigned long start, unsigned static inline void vivt_flush_cache_page(struct vm_area_struct *vma, unsigned long user_addr, unsigned long pfn) { - if (cpumask_test_cpu(smp_processor_id(), mm_cpumask(vma->vm_mm))) { + struct mm_struct *mm = vma->vm_mm; + + if (!mm || cpumask_test_cpu(smp_processor_id(), mm_cpumask(mm))) { unsigned long addr = user_addr & PAGE_MASK; __cpuc_flush_user_range(addr, addr + PAGE_SIZE, vma->vm_flags); } diff --git a/arch/arm/include/asm/hwcap.h b/arch/arm/include/asm/hwcap.h index c93a22a..2a0e8b8 100644 --- a/arch/arm/include/asm/hwcap.h +++ b/arch/arm/include/asm/hwcap.h @@ -4,6 +4,7 @@ /* * HWCAP flags - for elf_hwcap (in kernel) and AT_HWCAP */ + #define HWCAP_SWP (1 << 0) #define HWCAP_HALF (1 << 1) #define HWCAP_THUMB (1 << 2) @@ -25,6 +26,7 @@ #define HWCAP_IDIVT (1 << 18) #define HWCAP_IDIV (HWCAP_IDIVA | HWCAP_IDIVT) + #if defined(__KERNEL__) && !defined(__ASSEMBLY__) /* * This yields a mask that user programs can use to figure out what diff --git a/arch/arm/include/asm/mutex.h b/arch/arm/include/asm/mutex.h index 93226cf..b1479fd 100644 --- a/arch/arm/include/asm/mutex.h +++ b/arch/arm/include/asm/mutex.h @@ -7,121 +7,10 @@ */ #ifndef _ASM_MUTEX_H #define _ASM_MUTEX_H - -#if __LINUX_ARM_ARCH__ < 6 -/* On pre-ARMv6 hardware the swp based implementation is the most efficient. */ -# include <asm-generic/mutex-xchg.h> -#else - /* - * Attempting to lock a mutex on ARMv6+ can be done with a bastardized - * atomic decrement (it is not a reliable atomic decrement but it satisfies - * the defined semantics for our purpose, while being smaller and faster - * than a real atomic decrement or atomic swap. The idea is to attempt - * decrementing the lock value only once. If once decremented it isn't zero, - * or if its store-back fails due to a dispute on the exclusive store, we - * simply bail out immediately through the slow path where the lock will be - * reattempted until it succeeds. + * On pre-ARMv6 hardware this results in a swp-based implementation, + * which is the most efficient. For ARMv6+, we emit a pair of exclusive + * accesses instead. */ -static inline void -__mutex_fastpath_lock(atomic_t *count, void (*fail_fn)(atomic_t *)) -{ - int __ex_flag, __res; - - __asm__ ( - - "ldrex %0, [%2] \n\t" - "sub %0, %0, #1 \n\t" - "strex %1, %0, [%2] " - - : "=&r" (__res), "=&r" (__ex_flag) - : "r" (&(count)->counter) - : "cc","memory" ); - - __res |= __ex_flag; - if (unlikely(__res != 0)) - fail_fn(count); -} - -static inline int -__mutex_fastpath_lock_retval(atomic_t *count, int (*fail_fn)(atomic_t *)) -{ - int __ex_flag, __res; - - __asm__ ( - - "ldrex %0, [%2] \n\t" - "sub %0, %0, #1 \n\t" - "strex %1, %0, [%2] " - - : "=&r" (__res), "=&r" (__ex_flag) - : "r" (&(count)->counter) - : "cc","memory" ); - - __res |= __ex_flag; - if (unlikely(__res != 0)) - __res = fail_fn(count); - return __res; -} - -/* - * Same trick is used for the unlock fast path. However the original value, - * rather than the result, is used to test for success in order to have - * better generated assembly. - */ -static inline void -__mutex_fastpath_unlock(atomic_t *count, void (*fail_fn)(atomic_t *)) -{ - int __ex_flag, __res, __orig; - - __asm__ ( - - "ldrex %0, [%3] \n\t" - "add %1, %0, #1 \n\t" - "strex %2, %1, [%3] " - - : "=&r" (__orig), "=&r" (__res), "=&r" (__ex_flag) - : "r" (&(count)->counter) - : "cc","memory" ); - - __orig |= __ex_flag; - if (unlikely(__orig != 0)) - fail_fn(count); -} - -/* - * If the unlock was done on a contended lock, or if the unlock simply fails - * then the mutex remains locked. - */ -#define __mutex_slowpath_needs_to_unlock() 1 - -/* - * For __mutex_fastpath_trylock we use another construct which could be - * described as a "single value cmpxchg". - * - * This provides the needed trylock semantics like cmpxchg would, but it is - * lighter and less generic than a true cmpxchg implementation. - */ -static inline int -__mutex_fastpath_trylock(atomic_t *count, int (*fail_fn)(atomic_t *)) -{ - int __ex_flag, __res, __orig; - - __asm__ ( - - "1: ldrex %0, [%3] \n\t" - "subs %1, %0, #1 \n\t" - "strexeq %2, %1, [%3] \n\t" - "movlt %0, #0 \n\t" - "cmpeq %2, #0 \n\t" - "bgt 1b " - - : "=&r" (__orig), "=&r" (__res), "=&r" (__ex_flag) - : "r" (&count->counter) - : "cc", "memory" ); - - return __orig; -} - -#endif +#include <asm-generic/mutex-xchg.h> #endif diff --git a/arch/arm/include/asm/pgtable.h b/arch/arm/include/asm/pgtable.h index 5750704..6afd081 100644 --- a/arch/arm/include/asm/pgtable.h +++ b/arch/arm/include/asm/pgtable.h @@ -360,6 +360,18 @@ static inline pte_t *pmd_page_vaddr(pmd_t pmd) #define set_pte_ext(ptep,pte,ext) cpu_set_pte_ext(ptep,pte,ext) #define pte_clear(mm,addr,ptep) set_pte_ext(ptep, __pte(0), 0) +#define pte_none(pte) (!pte_val(pte)) +#define pte_present(pte) (pte_val(pte) & L_PTE_PRESENT) +#define pte_write(pte) (!(pte_val(pte) & L_PTE_RDONLY)) +#define pte_dirty(pte) (pte_val(pte) & L_PTE_DIRTY) +#define pte_young(pte) (pte_val(pte) & L_PTE_YOUNG) +#define pte_exec(pte) (!(pte_val(pte) & L_PTE_XN)) +#define pte_special(pte) (0) + +#define pte_present_user(pte) \ + ((pte_val(pte) & (L_PTE_PRESENT | L_PTE_USER)) == \ + (L_PTE_PRESENT | L_PTE_USER)) + #if __LINUX_ARM_ARCH__ < 6 static inline void __sync_icache_dcache(pte_t pteval) { @@ -371,25 +383,15 @@ extern void __sync_icache_dcache(pte_t pteval); static inline void set_pte_at(struct mm_struct *mm, unsigned long addr, pte_t *ptep, pte_t pteval) { - if (addr >= TASK_SIZE) - set_pte_ext(ptep, pteval, 0); - else { + unsigned long ext = 0; + + if (addr < TASK_SIZE && pte_present_user(pteval)) { __sync_icache_dcache(pteval); - set_pte_ext(ptep, pteval, PTE_EXT_NG); + ext |= PTE_EXT_NG; } -} -#define pte_none(pte) (!pte_val(pte)) -#define pte_present(pte) (pte_val(pte) & L_PTE_PRESENT) -#define pte_write(pte) (!(pte_val(pte) & L_PTE_RDONLY)) -#define pte_dirty(pte) (pte_val(pte) & L_PTE_DIRTY) -#define pte_young(pte) (pte_val(pte) & L_PTE_YOUNG) -#define pte_exec(pte) (!(pte_val(pte) & L_PTE_XN)) -#define pte_special(pte) (0) - -#define pte_present_user(pte) \ - ((pte_val(pte) & (L_PTE_PRESENT | L_PTE_USER)) == \ - (L_PTE_PRESENT | L_PTE_USER)) + set_pte_ext(ptep, pteval, ext); +} #define PTE_BIT_FUNC(fn,op) \ static inline pte_t pte_##fn(pte_t pte) { pte_val(pte) op; return pte; } @@ -416,13 +418,13 @@ static inline pte_t pte_modify(pte_t pte, pgprot_t newprot) * * 3 3 2 2 2 2 2 2 2 2 2 2 1 1 1 1 1 1 1 1 1 1 * 1 0 9 8 7 6 5 4 3 2 1 0 9 8 7 6 5 4 3 2 1 0 9 8 7 6 5 4 3 2 1 0 - * <--------------- offset --------------------> <- type --> 0 0 0 + * <--------------- offset ----------------------> < type -> 0 0 0 * - * This gives us up to 63 swap files and 32GB per swap file. Note that + * This gives us up to 31 swap files and 64GB per swap file. Note that * the offset field is always non-zero. */ #define __SWP_TYPE_SHIFT 3 -#define __SWP_TYPE_BITS 6 +#define __SWP_TYPE_BITS 5 #define __SWP_TYPE_MASK ((1 << __SWP_TYPE_BITS) - 1) #define __SWP_OFFSET_SHIFT (__SWP_TYPE_BITS + __SWP_TYPE_SHIFT) diff --git a/arch/arm/include/asm/vfpmacros.h b/arch/arm/include/asm/vfpmacros.h index 3d5fc41..c49c8f7 100644 --- a/arch/arm/include/asm/vfpmacros.h +++ b/arch/arm/include/asm/vfpmacros.h @@ -27,9 +27,9 @@ #if __LINUX_ARM_ARCH__ <= 6 ldr \tmp, =elf_hwcap @ may not have MVFR regs ldr \tmp, [\tmp, #0] - tst \tmp, #HWCAP_VFPv3D16 - ldceq p11, cr0, [\base],#32*4 @ FLDMIAD \base!, {d16-d31} - addne \base, \base, #32*4 @ step over unused register space + tst \tmp, #HWCAP_VFPD32 + ldcnel p11, cr0, [\base],#32*4 @ FLDMIAD \base!, {d16-d31} + addeq \base, \base, #32*4 @ step over unused register space #else VFPFMRX \tmp, MVFR0 @ Media and VFP Feature Register 0 and \tmp, \tmp, #MVFR0_A_SIMD_MASK @ A_SIMD field @@ -51,9 +51,9 @@ #if __LINUX_ARM_ARCH__ <= 6 ldr \tmp, =elf_hwcap @ may not have MVFR regs ldr \tmp, [\tmp, #0] - tst \tmp, #HWCAP_VFPv3D16 - stceq p11, cr0, [\base],#32*4 @ FSTMIAD \base!, {d16-d31} - addne \base, \base, #32*4 @ step over unused register space + tst \tmp, #HWCAP_VFPD32 + stcnel p11, cr0, [\base],#32*4 @ FSTMIAD \base!, {d16-d31} + addeq \base, \base, #32*4 @ step over unused register space #else VFPFMRX \tmp, MVFR0 @ Media and VFP Feature Register 0 and \tmp, \tmp, #MVFR0_A_SIMD_MASK @ A_SIMD field diff --git a/arch/arm/kernel/smp.c b/arch/arm/kernel/smp.c index e550be9..6f4ecfc 100644 --- a/arch/arm/kernel/smp.c +++ b/arch/arm/kernel/smp.c @@ -288,18 +288,26 @@ static inline int skip_secondary_calibrate(void) asmlinkage void __cpuinit secondary_start_kernel(void) { struct mm_struct *mm = &init_mm; - unsigned int cpu = smp_processor_id(); + unsigned int cpu; + + /* + * The identity mapping is uncached (strongly ordered), so + * switch away from it before attempting any exclusive accesses. + */ + cpu_switch_mm(mm->pgd, mm); + enter_lazy_tlb(mm, current); + local_flush_tlb_all(); /* * All kernel threads share the same mm context; grab a * reference and switch to it. */ + cpu = smp_processor_id(); atomic_inc(&mm->mm_count); current->active_mm = mm; cpumask_set_cpu(cpu, mm_cpumask(mm)); - cpu_switch_mm(mm->pgd, mm); - enter_lazy_tlb(mm, current); - local_flush_tlb_all(); + + printk("CPU%u: Booted secondary processor\n", cpu); printk("CPU%u: Booted secondary processor\n", cpu); diff --git a/arch/arm/kernel/swp_emulate.c b/arch/arm/kernel/swp_emulate.c index 40ee7e5..0951a32 100644 --- a/arch/arm/kernel/swp_emulate.c +++ b/arch/arm/kernel/swp_emulate.c @@ -108,10 +108,12 @@ static void set_segfault(struct pt_regs *regs, unsigned long addr) { siginfo_t info; + down_read(¤t->mm->mmap_sem); if (find_vma(current->mm, addr) == NULL) info.si_code = SEGV_MAPERR; else info.si_code = SEGV_ACCERR; + up_read(¤t->mm->mmap_sem); info.si_signo = SIGSEGV; info.si_errno = 0; diff --git a/arch/arm/kernel/sys_arm.c b/arch/arm/kernel/sys_arm.c index 62e7c61..0264ab4 100644 --- a/arch/arm/kernel/sys_arm.c +++ b/arch/arm/kernel/sys_arm.c @@ -115,7 +115,7 @@ int kernel_execve(const char *filename, "Ir" (THREAD_START_SP - sizeof(regs)), "r" (®s), "Ir" (sizeof(regs)) - : "r0", "r1", "r2", "r3", "ip", "lr", "memory"); + : "r0", "r1", "r2", "r3", "r8", "r9", "ip", "lr", "memory"); out: return ret; diff --git a/arch/arm/mach-at91/at91rm9200_devices.c b/arch/arm/mach-at91/at91rm9200_devices.c index 7227755..871a818 100644 --- a/arch/arm/mach-at91/at91rm9200_devices.c +++ b/arch/arm/mach-at91/at91rm9200_devices.c @@ -454,7 +454,7 @@ static struct i2c_gpio_platform_data pdata = { static struct platform_device at91rm9200_twi_device = { .name = "i2c-gpio", - .id = -1, + .id = 0, .dev.platform_data = &pdata, }; diff --git a/arch/arm/mach-at91/at91sam9260_devices.c b/arch/arm/mach-at91/at91sam9260_devices.c index 39f81f4..89a8414 100644 --- a/arch/arm/mach-at91/at91sam9260_devices.c +++ b/arch/arm/mach-at91/at91sam9260_devices.c @@ -459,7 +459,7 @@ static struct i2c_gpio_platform_data pdata = { static struct platform_device at91sam9260_twi_device = { .name = "i2c-gpio", - .id = -1, + .id = 0, .dev.platform_data = &pdata, }; diff --git a/arch/arm/mach-at91/at91sam9261_devices.c b/arch/arm/mach-at91/at91sam9261_devices.c index 5004bf0..5d43cf4 100644 --- a/arch/arm/mach-at91/at91sam9261_devices.c +++ b/arch/arm/mach-at91/at91sam9261_devices.c @@ -276,7 +276,7 @@ static struct i2c_gpio_platform_data pdata = { static struct platform_device at91sam9261_twi_device = { .name = "i2c-gpio", - .id = -1, + .id = 0, .dev.platform_data = &pdata, }; diff --git a/arch/arm/mach-at91/at91sam9263_devices.c b/arch/arm/mach-at91/at91sam9263_devices.c index a050f41..2bbd163 100644 --- a/arch/arm/mach-at91/at91sam9263_devices.c +++ b/arch/arm/mach-at91/at91sam9263_devices.c @@ -534,7 +534,7 @@ static struct i2c_gpio_platform_data pdata = { static struct platform_device at91sam9263_twi_device = { .name = "i2c-gpio", - .id = -1, + .id = 0, .dev.platform_data = &pdata, }; diff --git a/arch/arm/mach-at91/at91sam9rl_devices.c b/arch/arm/mach-at91/at91sam9rl_devices.c index aacb19d..659870e 100644 --- a/arch/arm/mach-at91/at91sam9rl_devices.c +++ b/arch/arm/mach-at91/at91sam9rl_devices.c @@ -319,7 +319,7 @@ static struct i2c_gpio_platform_data pdata = { static struct platform_device at91sam9rl_twi_device = { .name = "i2c-gpio", - .id = -1, + .id = 0, .dev.platform_data = &pdata, }; diff --git a/arch/arm/mach-dove/include/mach/pm.h b/arch/arm/mach-dove/include/mach/pm.h index 3ad9f94..11799c3 100644 --- a/arch/arm/mach-dove/include/mach/pm.h +++ b/arch/arm/mach-dove/include/mach/pm.h @@ -45,7 +45,7 @@ static inline int pmu_to_irq(int pin) static inline int irq_to_pmu(int irq) { - if (IRQ_DOVE_PMU_START < irq && irq < NR_IRQS) + if (IRQ_DOVE_PMU_START <= irq && irq < NR_IRQS) return irq - IRQ_DOVE_PMU_START; return -EINVAL; diff --git a/arch/arm/mach-dove/irq.c b/arch/arm/mach-dove/irq.c index f07fd16..9f2fd10 100644 --- a/arch/arm/mach-dove/irq.c +++ b/arch/arm/mach-dove/irq.c @@ -61,8 +61,20 @@ static void pmu_irq_ack(struct irq_data *d) int pin = irq_to_pmu(d->irq); u32 u; + /* + * The PMU mask register is not RW0C: it is RW. This means that + * the bits take whatever value is written to them; if you write + * a '1', you will set the interrupt. + * + * Unfortunately this means there is NO race free way to clear + * these interrupts. + * + * So, let's structure the code so that the window is as small as + * possible. + */ u = ~(1 << (pin & 31)); - writel(u, PMU_INTERRUPT_CAUSE); + u &= readl_relaxed(PMU_INTERRUPT_CAUSE); + writel_relaxed(u, PMU_INTERRUPT_CAUSE); } static struct irq_chip pmu_irq_chip = { diff --git a/arch/arm/mach-imx/mach-mx21ads.c b/arch/arm/mach-imx/mach-mx21ads.c index 74ac889..a37fe02 100644 --- a/arch/arm/mach-imx/mach-mx21ads.c +++ b/arch/arm/mach-imx/mach-mx21ads.c @@ -32,7 +32,7 @@ * Memory-mapped I/O on MX21ADS base board */ #define MX21ADS_MMIO_BASE_ADDR 0xf5000000 -#define MX21ADS_MMIO_SIZE SZ_16M +#define MX21ADS_MMIO_SIZE 0xc00000 #define MX21ADS_REG_ADDR(offset) (void __force __iomem *) \ (MX21ADS_MMIO_BASE_ADDR + (offset)) diff --git a/arch/arm/mach-omap2/opp.c b/arch/arm/mach-omap2/opp.c index ab8b35b..0627494 100644 --- a/arch/arm/mach-omap2/opp.c +++ b/arch/arm/mach-omap2/opp.c @@ -53,7 +53,7 @@ int __init omap_init_opp_table(struct omap_opp_def *opp_def, omap_table_init = 1; /* Lets now register with OPP library */ - for (i = 0; i < opp_def_size; i++) { + for (i = 0; i < opp_def_size; i++, opp_def++) { struct omap_hwmod *oh; struct device *dev; @@ -86,7 +86,6 @@ int __init omap_init_opp_table(struct omap_opp_def *opp_def, __func__, opp_def->freq, opp_def->hwmod_name, i, r); } - opp_def++; } return 0; diff --git a/arch/arm/mach-orion5x/mpp.h b/arch/arm/mach-orion5x/mpp.h index eac6897..db70e79 100644 --- a/arch/arm/mach-orion5x/mpp.h +++ b/arch/arm/mach-orion5x/mpp.h @@ -65,8 +65,8 @@ #define MPP8_GIGE MPP(8, 0x1, 0, 0, 1, 1, 1) #define MPP9_UNUSED MPP(9, 0x0, 0, 0, 1, 1, 1) -#define MPP9_GPIO MPP(9, 0x0, 0, 0, 1, 1, 1) -#define MPP9_GIGE MPP(9, 0x1, 1, 1, 1, 1, 1) +#define MPP9_GPIO MPP(9, 0x0, 1, 1, 1, 1, 1) +#define MPP9_GIGE MPP(9, 0x1, 0, 0, 1, 1, 1) #define MPP10_UNUSED MPP(10, 0x0, 0, 0, 1, 1, 1) #define MPP10_GPIO MPP(10, 0x0, 1, 1, 1, 1, 1) diff --git a/arch/arm/mm/cache-v7.S b/arch/arm/mm/cache-v7.S index a655d3d..82ab2c5 100644 --- a/arch/arm/mm/cache-v7.S +++ b/arch/arm/mm/cache-v7.S @@ -211,6 +211,9 @@ ENTRY(v7_coherent_user_range) * isn't mapped, just try the next page. */ 9001: +#ifdef CONFIG_ARM_ERRATA_775420 + dsb +#endif mov r12, r12, lsr #12 mov r12, r12, lsl #12 add r12, r12, #4096 diff --git a/arch/arm/mm/dma-mapping.c b/arch/arm/mm/dma-mapping.c index f96d2c7..5663650 100644 --- a/arch/arm/mm/dma-mapping.c +++ b/arch/arm/mm/dma-mapping.c @@ -467,25 +467,27 @@ static void dma_cache_maint_page(struct page *page, unsigned long offset, size_t size, enum dma_data_direction dir, void (*op)(const void *, size_t, int)) { + unsigned long pfn; + size_t left = size; + + pfn = page_to_pfn(page) + offset / PAGE_SIZE; + offset %= PAGE_SIZE; + /* * A single sg entry may refer to multiple physically contiguous * pages. But we still need to process highmem pages individually. * If highmem is not configured then the bulk of this loop gets * optimized out. */ - size_t left = size; do { size_t len = left; void *vaddr; + page = pfn_to_page(pfn); + if (PageHighMem(page)) { - if (len + offset > PAGE_SIZE) { - if (offset >= PAGE_SIZE) { - page += offset / PAGE_SIZE; - offset %= PAGE_SIZE; - } + if (len + offset > PAGE_SIZE) len = PAGE_SIZE - offset; - } vaddr = kmap_high_get(page); if (vaddr) { vaddr += offset; @@ -502,7 +504,7 @@ static void dma_cache_maint_page(struct page *page, unsigned long offset, op(vaddr, len, dir); } offset = 0; - page++; + pfn++; left -= len; } while (left); } diff --git a/arch/arm/mm/fault.c b/arch/arm/mm/fault.c index 6f66c0e..c398b12 100644 --- a/arch/arm/mm/fault.c +++ b/arch/arm/mm/fault.c @@ -293,7 +293,9 @@ good_area: return fault; check_stack: - if (vma->vm_flags & VM_GROWSDOWN && !expand_stack(vma, addr)) + /* Don't allow expansion below FIRST_USER_ADDRESS */ + if (vma->vm_flags & VM_GROWSDOWN && + addr >= FIRST_USER_ADDRESS && !expand_stack(vma, addr)) goto good_area; out: return fault; diff --git a/arch/arm/mm/flush.c b/arch/arm/mm/flush.c index 1a8d4aa..8fda9f7 100644 --- a/arch/arm/mm/flush.c +++ b/arch/arm/mm/flush.c @@ -236,8 +236,6 @@ void __sync_icache_dcache(pte_t pteval) struct page *page; struct address_space *mapping; - if (!pte_present_user(pteval)) - return; if (cache_is_vipt_nonaliasing() && !pte_exec(pteval)) /* only flush non-aliasing VIPT caches for exec mappings */ return; diff --git a/arch/arm/mm/mmu.c b/arch/arm/mm/mmu.c index 594d677..19d9369 100644 --- a/arch/arm/mm/mmu.c +++ b/arch/arm/mm/mmu.c @@ -467,7 +467,7 @@ static void __init build_mem_type_table(void) } for (i = 0; i < 16; i++) { - unsigned long v = pgprot_val(protection_map[i]); + pteval_t v = pgprot_val(protection_map[i]); protection_map[i] = __pgprot(v | user_pgprot); } diff --git a/arch/arm/mm/tlb-v7.S b/arch/arm/mm/tlb-v7.S index ebd4290..f2e2e9f 100644 --- a/arch/arm/mm/tlb-v7.S +++ b/arch/arm/mm/tlb-v7.S @@ -39,10 +39,18 @@ ENTRY(v7wbi_flush_user_tlb_range) mov r0, r0, lsr #PAGE_SHIFT @ align address mov r1, r1, lsr #PAGE_SHIFT asid r3, r3 @ mask ASID +#ifdef CONFIG_ARM_ERRATA_720789 + ALT_SMP(W(mov) r3, #0 ) + ALT_UP(W(nop) ) +#endif orr r0, r3, r0, lsl #PAGE_SHIFT @ Create initial MVA mov r1, r1, lsl #PAGE_SHIFT 1: +#ifdef CONFIG_ARM_ERRATA_720789 + ALT_SMP(mcr p15, 0, r0, c8, c3, 3) @ TLB invalidate U MVA all ASID (shareable) +#else ALT_SMP(mcr p15, 0, r0, c8, c3, 1) @ TLB invalidate U MVA (shareable) +#endif ALT_UP(mcr p15, 0, r0, c8, c7, 1) @ TLB invalidate U MVA add r0, r0, #PAGE_SZ @@ -70,7 +78,11 @@ ENTRY(v7wbi_flush_kern_tlb_range) mov r0, r0, lsl #PAGE_SHIFT mov r1, r1, lsl #PAGE_SHIFT 1: +#ifdef CONFIG_ARM_ERRATA_720789 + ALT_SMP(mcr p15, 0, r0, c8, c3, 3) @ TLB invalidate U MVA all ASID (shareable) +#else ALT_SMP(mcr p15, 0, r0, c8, c3, 1) @ TLB invalidate U MVA (shareable) +#endif ALT_UP(mcr p15, 0, r0, c8, c7, 1) @ TLB invalidate U MVA add r0, r0, #PAGE_SZ cmp r0, r1 diff --git a/arch/arm/plat-s3c24xx/dma.c b/arch/arm/plat-s3c24xx/dma.c index 0719f49..9f422ba 100644 --- a/arch/arm/plat-s3c24xx/dma.c +++ b/arch/arm/plat-s3c24xx/dma.c @@ -431,7 +431,7 @@ s3c2410_dma_canload(struct s3c2410_dma_chan *chan) * when necessary. */ -int s3c2410_dma_enqueue(unsigned int channel, void *id, +int s3c2410_dma_enqueue(enum dma_ch channel, void *id, dma_addr_t data, int size) { struct s3c2410_dma_chan *chan = s3c_dma_lookup_channel(channel); diff --git a/arch/arm/plat-samsung/adc.c b/arch/arm/plat-samsung/adc.c index 389e7e0..0c59b1d 100644 --- a/arch/arm/plat-samsung/adc.c +++ b/arch/arm/plat-samsung/adc.c @@ -173,6 +173,11 @@ int s3c_adc_start(struct s3c_adc_client *client, spin_lock_irqsave(&adc->lock, flags); + if (client->is_ts && adc->ts_pend) { + spin_unlock_irqrestore(&adc->lock, flags); + return -EAGAIN; + } + client->convert_cb = s3c_convert_done; client->wait = pwake; client->result = -1; diff --git a/arch/arm/vfp/vfpmodule.c b/arch/arm/vfp/vfpmodule.c index c00eb89..0e46a47 100644 --- a/arch/arm/vfp/vfpmodule.c +++ b/arch/arm/vfp/vfpmodule.c @@ -588,11 +588,14 @@ static int __init vfp_init(void) elf_hwcap |= HWCAP_VFPv3; /* - * Check for VFPv3 D16. CPUs in this configuration - * only have 16 x 64bit registers. + * Check for VFPv3 D16 and VFPv4 D16. CPUs in + * this configuration only have 16 x 64bit + * registers. */ if (((fmrx(MVFR0) & MVFR0_A_SIMD_MASK)) == 1) - elf_hwcap |= HWCAP_VFPv3D16; + elf_hwcap |= HWCAP_VFPv3D16; /* also v4-D16 */ + else + elf_hwcap |= HWCAP_VFPD32; } #endif /* diff --git a/arch/cris/include/asm/io.h b/arch/cris/include/asm/io.h index 32567bc..ac12ae2 100644 --- a/arch/cris/include/asm/io.h +++ b/arch/cris/include/asm/io.h @@ -133,12 +133,39 @@ static inline void writel(unsigned int b, volatile void __iomem *addr) #define insb(port,addr,count) (cris_iops ? cris_iops->read_io(port,addr,1,count) : 0) #define insw(port,addr,count) (cris_iops ? cris_iops->read_io(port,addr,2,count) : 0) #define insl(port,addr,count) (cris_iops ? cris_iops->read_io(port,addr,4,count) : 0) -#define outb(data,port) if (cris_iops) cris_iops->write_io(port,(void*)(unsigned)data,1,1) -#define outw(data,port) if (cris_iops) cris_iops->write_io(port,(void*)(unsigned)data,2,1) -#define outl(data,port) if (cris_iops) cris_iops->write_io(port,(void*)(unsigned)data,4,1) -#define outsb(port,addr,count) if(cris_iops) cris_iops->write_io(port,(void*)addr,1,count) -#define outsw(port,addr,count) if(cris_iops) cris_iops->write_io(port,(void*)addr,2,count) -#define outsl(port,addr,count) if(cris_iops) cris_iops->write_io(port,(void*)addr,3,count) +static inline void outb(unsigned char data, unsigned int port) +{ + if (cris_iops) + cris_iops->write_io(port, (void *) &data, 1, 1); +} +static inline void outw(unsigned short data, unsigned int port) +{ + if (cris_iops) + cris_iops->write_io(port, (void *) &data, 2, 1); +} +static inline void outl(unsigned int data, unsigned int port) +{ + if (cris_iops) + cris_iops->write_io(port, (void *) &data, 4, 1); +} +static inline void outsb(unsigned int port, const void *addr, + unsigned long count) +{ + if (cris_iops) + cris_iops->write_io(port, (void *)addr, 1, count); +} +static inline void outsw(unsigned int port, const void *addr, + unsigned long count) +{ + if (cris_iops) + cris_iops->write_io(port, (void *)addr, 2, count); +} +static inline void outsl(unsigned int port, const void *addr, + unsigned long count) +{ + if (cris_iops) + cris_iops->write_io(port, (void *)addr, 4, count); +} /* * Convert a physical pointer to a virtual kernel pointer for /dev/mem diff --git a/arch/ia64/include/asm/atomic.h b/arch/ia64/include/asm/atomic.h index 4468814..6fcc9a0 100644 --- a/arch/ia64/include/asm/atomic.h +++ b/arch/ia64/include/asm/atomic.h @@ -18,8 +18,8 @@ #include <asm/system.h> -#define ATOMIC_INIT(i) ((atomic_t) { (i) }) -#define ATOMIC64_INIT(i) ((atomic64_t) { (i) }) +#define ATOMIC_INIT(i) { (i) } +#define ATOMIC64_INIT(i) { (i) } #define atomic_read(v) (*(volatile int *)&(v)->counter) #define atomic64_read(v) (*(volatile long *)&(v)->counter) diff --git a/arch/ia64/include/asm/unistd.h b/arch/ia64/include/asm/unistd.h index 7c928da..d8de182 100644 --- a/arch/ia64/include/asm/unistd.h +++ b/arch/ia64/include/asm/unistd.h @@ -321,11 +321,12 @@ #define __NR_syncfs 1329 #define __NR_setns 1330 #define __NR_sendmmsg 1331 +#define __NR_accept4 1334 #ifdef __KERNEL__ -#define NR_syscalls 308 /* length of syscall table */ +#define NR_syscalls 311 /* length of syscall table */ /* * The following defines stop scripts/checksyscalls.sh from complaining about diff --git a/arch/ia64/kernel/entry.S b/arch/ia64/kernel/entry.S index 97dd2ab..df477f8 100644 --- a/arch/ia64/kernel/entry.S +++ b/arch/ia64/kernel/entry.S @@ -1777,6 +1777,9 @@ sys_call_table: data8 sys_syncfs data8 sys_setns // 1330 data8 sys_sendmmsg + data8 sys_ni_syscall /* process_vm_readv */ + data8 sys_ni_syscall /* process_vm_writev */ + data8 sys_accept4 .org sys_call_table + 8*NR_syscalls // guard against failures to increase NR_syscalls #endif /* __IA64_ASM_PARAVIRTUALIZED_NATIVE */ diff --git a/arch/ia64/kernel/irq_ia64.c b/arch/ia64/kernel/irq_ia64.c index 782c3a35..3540c5e 100644 --- a/arch/ia64/kernel/irq_ia64.c +++ b/arch/ia64/kernel/irq_ia64.c @@ -23,7 +23,6 @@ #include <linux/ioport.h> #include <linux/kernel_stat.h> #include <linux/ptrace.h> -#include <linux/random.h> /* for rand_initialize_irq() */ #include <linux/signal.h> #include <linux/smp.h> #include <linux/threads.h> diff --git a/arch/m68k/include/asm/entry_mm.h b/arch/m68k/include/asm/entry_mm.h index 73b8c8f..bdace4b 100644 --- a/arch/m68k/include/asm/entry_mm.h +++ b/arch/m68k/include/asm/entry_mm.h @@ -35,8 +35,8 @@ /* the following macro is used when enabling interrupts */ #if defined(MACH_ATARI_ONLY) - /* block out HSYNC on the atari */ -#define ALLOWINT (~0x400) + /* block out HSYNC = ipl 2 on the atari */ +#define ALLOWINT (~0x500) #define MAX_NOINT_IPL 3 #else /* portable version */ diff --git a/arch/m68k/include/asm/signal.h b/arch/m68k/include/asm/signal.h index 5bc09c7..0b6b0e5 100644 --- a/arch/m68k/include/asm/signal.h +++ b/arch/m68k/include/asm/signal.h @@ -156,7 +156,7 @@ typedef struct sigaltstack { static inline void sigaddset(sigset_t *set, int _sig) { asm ("bfset %0{%1,#1}" - : "+od" (*set) + : "+o" (*set) : "id" ((_sig - 1) ^ 31) : "cc"); } @@ -164,7 +164,7 @@ static inline void sigaddset(sigset_t *set, int _sig) static inline void sigdelset(sigset_t *set, int _sig) { asm ("bfclr %0{%1,#1}" - : "+od" (*set) + : "+o" (*set) : "id" ((_sig - 1) ^ 31) : "cc"); } @@ -180,7 +180,7 @@ static inline int __gen_sigismember(sigset_t *set, int _sig) int ret; asm ("bfextu %1{%2,#1},%0" : "=d" (ret) - : "od" (*set), "id" ((_sig-1) ^ 31) + : "o" (*set), "id" ((_sig-1) ^ 31) : "cc"); return ret; } diff --git a/arch/m68k/kernel/sys_m68k.c b/arch/m68k/kernel/sys_m68k.c index 8623f8d..9a5932e 100644 --- a/arch/m68k/kernel/sys_m68k.c +++ b/arch/m68k/kernel/sys_m68k.c @@ -479,9 +479,13 @@ sys_atomic_cmpxchg_32(unsigned long newval, int oldval, int d3, int d4, int d5, goto bad_access; } - mem_value = *mem; + /* + * No need to check for EFAULT; we know that the page is + * present and writable. + */ + __get_user(mem_value, mem); if (mem_value == oldval) - *mem = newval; + __put_user(newval, mem); pte_unmap_unlock(pte, ptl); up_read(&mm->mmap_sem); diff --git a/arch/mips/Makefile b/arch/mips/Makefile index 884819c..9aa60f6 100644 --- a/arch/mips/Makefile +++ b/arch/mips/Makefile @@ -236,7 +236,7 @@ KBUILD_CPPFLAGS += -D"DATAOFFSET=$(if $(dataoffset-y),$(dataoffset-y),0)" LDFLAGS += -m $(ld-emul) ifdef CONFIG_MIPS -CHECKFLAGS += $(shell $(CC) $(KBUILD_CFLAGS) -dM -E -xc /dev/null | \ +CHECKFLAGS += $(shell $(CC) $(KBUILD_CFLAGS) -dM -E -x c /dev/null | \ egrep -vw '__GNUC_(|MINOR_|PATCHLEVEL_)_' | \ sed -e 's/^\#define /-D/' -e "s/ /='/" -e "s/$$/'/") ifdef CONFIG_64BIT diff --git a/arch/mips/include/asm/thread_info.h b/arch/mips/include/asm/thread_info.h index 97f8bf6..adda036 100644 --- a/arch/mips/include/asm/thread_info.h +++ b/arch/mips/include/asm/thread_info.h @@ -60,6 +60,8 @@ struct thread_info { register struct thread_info *__current_thread_info __asm__("$28"); #define current_thread_info() __current_thread_info +#endif /* !__ASSEMBLY__ */ + /* thread information allocation */ #if defined(CONFIG_PAGE_SIZE_4KB) && defined(CONFIG_32BIT) #define THREAD_SIZE_ORDER (1) @@ -97,8 +99,6 @@ register struct thread_info *__current_thread_info __asm__("$28"); #define free_thread_info(info) kfree(info) -#endif /* !__ASSEMBLY__ */ - #define PREEMPT_ACTIVE 0x10000000 /* diff --git a/arch/mips/kernel/Makefile b/arch/mips/kernel/Makefile index 83bba33..8b3c62c 100644 --- a/arch/mips/kernel/Makefile +++ b/arch/mips/kernel/Makefile @@ -100,7 +100,7 @@ obj-$(CONFIG_MIPS_MACHINE) += mips_machine.o obj-$(CONFIG_OF) += prom.o -CFLAGS_cpu-bugs64.o = $(shell if $(CC) $(KBUILD_CFLAGS) -Wa,-mdaddi -c -o /dev/null -xc /dev/null >/dev/null 2>&1; then echo "-DHAVE_AS_SET_DADDI"; fi) +CFLAGS_cpu-bugs64.o = $(shell if $(CC) $(KBUILD_CFLAGS) -Wa,-mdaddi -c -o /dev/null -x c /dev/null >/dev/null 2>&1; then echo "-DHAVE_AS_SET_DADDI"; fi) obj-$(CONFIG_HAVE_STD_PC_SERIAL_PORT) += 8250-platform.o diff --git a/arch/mips/kernel/kgdb.c b/arch/mips/kernel/kgdb.c index f4546e9..23817a6 100644 --- a/arch/mips/kernel/kgdb.c +++ b/arch/mips/kernel/kgdb.c @@ -283,6 +283,15 @@ static int kgdb_mips_notify(struct notifier_block *self, unsigned long cmd, struct pt_regs *regs = args->regs; int trap = (regs->cp0_cause & 0x7c) >> 2; +#ifdef CONFIG_KPROBES + /* + * Return immediately if the kprobes fault notifier has set + * DIE_PAGE_FAULT. + */ + if (cmd == DIE_PAGE_FAULT) + return NOTIFY_DONE; +#endif /* CONFIG_KPROBES */ + /* Userspace events, ignore. */ if (user_mode(regs)) return NOTIFY_DONE; diff --git a/arch/mips/kernel/vmlinux.lds.S b/arch/mips/kernel/vmlinux.lds.S index a81176f..be281c6 100644 --- a/arch/mips/kernel/vmlinux.lds.S +++ b/arch/mips/kernel/vmlinux.lds.S @@ -1,5 +1,6 @@ #include <asm/asm-offsets.h> #include <asm/page.h> +#include <asm/thread_info.h> #include <asm-generic/vmlinux.lds.h> #undef mips @@ -73,7 +74,7 @@ SECTIONS .data : { /* Data */ . = . + DATAOFFSET; /* for CONFIG_MAPPED_KERNEL */ - INIT_TASK_DATA(PAGE_SIZE) + INIT_TASK_DATA(THREAD_SIZE) NOSAVE_DATA CACHELINE_ALIGNED_DATA(1 << CONFIG_MIPS_L1_CACHE_SHIFT) READ_MOSTLY_DATA(1 << CONFIG_MIPS_L1_CACHE_SHIFT) diff --git a/arch/mn10300/Makefile b/arch/mn10300/Makefile index 7120282..3eb4a52 100644 --- a/arch/mn10300/Makefile +++ b/arch/mn10300/Makefile @@ -26,7 +26,7 @@ CHECKFLAGS += PROCESSOR := unset UNIT := unset -KBUILD_CFLAGS += -mam33 -mmem-funcs -DCPU=AM33 +KBUILD_CFLAGS += -mam33 -DCPU=AM33 $(call cc-option,-mmem-funcs,) KBUILD_AFLAGS += -mam33 -DCPU=AM33 ifeq ($(CONFIG_MN10300_CURRENT_IN_E2),y) diff --git a/arch/parisc/include/asm/atomic.h b/arch/parisc/include/asm/atomic.h index 26fd114..3706cf0 100644 --- a/arch/parisc/include/asm/atomic.h +++ b/arch/parisc/include/asm/atomic.h @@ -248,7 +248,7 @@ static __inline__ int atomic_add_unless(atomic_t *v, int a, int u) #define atomic_sub_and_test(i,v) (atomic_sub_return((i),(v)) == 0) -#define ATOMIC_INIT(i) ((atomic_t) { (i) }) +#define ATOMIC_INIT(i) { (i) } #define smp_mb__before_atomic_dec() smp_mb() #define smp_mb__after_atomic_dec() smp_mb() @@ -257,7 +257,7 @@ static __inline__ int atomic_add_unless(atomic_t *v, int a, int u) #ifdef CONFIG_64BIT -#define ATOMIC64_INIT(i) ((atomic64_t) { (i) }) +#define ATOMIC64_INIT(i) { (i) } static __inline__ s64 __atomic64_add_return(s64 i, atomic64_t *v) diff --git a/arch/parisc/include/asm/prefetch.h b/arch/parisc/include/asm/prefetch.h index c5edc60..1ee7c82 100644 --- a/arch/parisc/include/asm/prefetch.h +++ b/arch/parisc/include/asm/prefetch.h @@ -21,7 +21,12 @@ #define ARCH_HAS_PREFETCH static inline void prefetch(const void *addr) { - __asm__("ldw 0(%0), %%r0" : : "r" (addr)); + __asm__( +#ifndef CONFIG_PA20 + /* Need to avoid prefetch of NULL on PA7300LC */ + " extrw,u,= %0,31,32,%%r0\n" +#endif + " ldw 0(%0), %%r0" : : "r" (addr)); } /* LDD is a PA2.0 addition. */ diff --git a/arch/parisc/kernel/entry.S b/arch/parisc/kernel/entry.S index 6f05944..07ef351 100644 --- a/arch/parisc/kernel/entry.S +++ b/arch/parisc/kernel/entry.S @@ -552,7 +552,7 @@ * entry (identifying the physical page) and %r23 up with * the from tlb entry (or nothing if only a to entry---for * clear_user_page_asm) */ - .macro do_alias spc,tmp,tmp1,va,pte,prot,fault + .macro do_alias spc,tmp,tmp1,va,pte,prot,fault,patype cmpib,COND(<>),n 0,\spc,\fault ldil L%(TMPALIAS_MAP_START),\tmp #if defined(CONFIG_64BIT) && (TMPALIAS_MAP_START >= 0x80000000) @@ -581,7 +581,15 @@ */ cmpiclr,= 0x01,\tmp,%r0 ldi (_PAGE_DIRTY|_PAGE_READ|_PAGE_WRITE),\prot +.ifc \patype,20 depd,z \prot,8,7,\prot +.else +.ifc \patype,11 + depw,z \prot,8,7,\prot +.else + .error "undefined PA type to do_alias" +.endif +.endif /* * OK, it is in the temp alias region, check whether "from" or "to". * Check "subtle" note in pacache.S re: r23/r26. @@ -1185,7 +1193,7 @@ dtlb_miss_20w: nop dtlb_check_alias_20w: - do_alias spc,t0,t1,va,pte,prot,dtlb_fault + do_alias spc,t0,t1,va,pte,prot,dtlb_fault,20 idtlbt pte,prot @@ -1209,7 +1217,7 @@ nadtlb_miss_20w: nop nadtlb_check_alias_20w: - do_alias spc,t0,t1,va,pte,prot,nadtlb_emulate + do_alias spc,t0,t1,va,pte,prot,nadtlb_emulate,20 idtlbt pte,prot @@ -1241,7 +1249,7 @@ dtlb_miss_11: nop dtlb_check_alias_11: - do_alias spc,t0,t1,va,pte,prot,dtlb_fault + do_alias spc,t0,t1,va,pte,prot,dtlb_fault,11 idtlba pte,(va) idtlbp prot,(va) @@ -1273,7 +1281,7 @@ nadtlb_miss_11: nop nadtlb_check_alias_11: - do_alias spc,t0,t1,va,pte,prot,nadtlb_emulate + do_alias spc,t0,t1,va,pte,prot,nadtlb_emulate,11 idtlba pte,(va) idtlbp prot,(va) @@ -1300,7 +1308,7 @@ dtlb_miss_20: nop dtlb_check_alias_20: - do_alias spc,t0,t1,va,pte,prot,dtlb_fault + do_alias spc,t0,t1,va,pte,prot,dtlb_fault,20 idtlbt pte,prot @@ -1326,7 +1334,7 @@ nadtlb_miss_20: nop nadtlb_check_alias_20: - do_alias spc,t0,t1,va,pte,prot,nadtlb_emulate + do_alias spc,t0,t1,va,pte,prot,nadtlb_emulate,20 idtlbt pte,prot @@ -1453,7 +1461,7 @@ naitlb_miss_20w: nop naitlb_check_alias_20w: - do_alias spc,t0,t1,va,pte,prot,naitlb_fault + do_alias spc,t0,t1,va,pte,prot,naitlb_fault,20 iitlbt pte,prot @@ -1507,7 +1515,7 @@ naitlb_miss_11: nop naitlb_check_alias_11: - do_alias spc,t0,t1,va,pte,prot,itlb_fault + do_alias spc,t0,t1,va,pte,prot,itlb_fault,11 iitlba pte,(%sr0, va) iitlbp prot,(%sr0, va) @@ -1553,7 +1561,7 @@ naitlb_miss_20: nop naitlb_check_alias_20: - do_alias spc,t0,t1,va,pte,prot,naitlb_fault + do_alias spc,t0,t1,va,pte,prot,naitlb_fault,20 iitlbt pte,prot diff --git a/arch/parisc/kernel/pacache.S b/arch/parisc/kernel/pacache.S index 93ff3d9..5d7218a 100644 --- a/arch/parisc/kernel/pacache.S +++ b/arch/parisc/kernel/pacache.S @@ -692,7 +692,7 @@ ENTRY(flush_icache_page_asm) /* Purge any old translation */ - pitlb (%sr0,%r28) + pitlb (%sr4,%r28) ldil L%icache_stride, %r1 ldw R%icache_stride(%r1), %r1 @@ -706,27 +706,29 @@ ENTRY(flush_icache_page_asm) sub %r25, %r1, %r25 -1: fic,m %r1(%r28) - fic,m %r1(%r28) - fic,m %r1(%r28) - fic,m %r1(%r28) - fic,m %r1(%r28) - fic,m %r1(%r28) - fic,m %r1(%r28) - fic,m %r1(%r28) - fic,m %r1(%r28) - fic,m %r1(%r28) - fic,m %r1(%r28) - fic,m %r1(%r28) - fic,m %r1(%r28) - fic,m %r1(%r28) - fic,m %r1(%r28) + /* fic only has the type 26 form on PA1.1, requiring an + * explicit space specification, so use %sr4 */ +1: fic,m %r1(%sr4,%r28) + fic,m %r1(%sr4,%r28) + fic,m %r1(%sr4,%r28) + fic,m %r1(%sr4,%r28) + fic,m %r1(%sr4,%r28) + fic,m %r1(%sr4,%r28) + fic,m %r1(%sr4,%r28) + fic,m %r1(%sr4,%r28) + fic,m %r1(%sr4,%r28) + fic,m %r1(%sr4,%r28) + fic,m %r1(%sr4,%r28) + fic,m %r1(%sr4,%r28) + fic,m %r1(%sr4,%r28) + fic,m %r1(%sr4,%r28) + fic,m %r1(%sr4,%r28) cmpb,COND(<<) %r28, %r25,1b - fic,m %r1(%r28) + fic,m %r1(%sr4,%r28) sync bv %r0(%r2) - pitlb (%sr0,%r25) + pitlb (%sr4,%r25) .exit .procend diff --git a/arch/parisc/kernel/signal32.c b/arch/parisc/kernel/signal32.c index e141324..d0ea054 100644 --- a/arch/parisc/kernel/signal32.c +++ b/arch/parisc/kernel/signal32.c @@ -67,7 +67,8 @@ put_sigset32(compat_sigset_t __user *up, sigset_t *set, size_t sz) { compat_sigset_t s; - if (sz != sizeof *set) panic("put_sigset32()"); + if (sz != sizeof *set) + return -EINVAL; sigset_64to32(&s, set); return copy_to_user(up, &s, sizeof s); @@ -79,7 +80,8 @@ get_sigset32(compat_sigset_t __user *up, sigset_t *set, size_t sz) compat_sigset_t s; int r; - if (sz != sizeof *set) panic("put_sigset32()"); + if (sz != sizeof *set) + return -EINVAL; if ((r = copy_from_user(&s, up, sz)) == 0) { sigset_32to64(set, &s); diff --git a/arch/parisc/kernel/sys_parisc.c b/arch/parisc/kernel/sys_parisc.c index c9b9322..7ea75d1 100644 --- a/arch/parisc/kernel/sys_parisc.c +++ b/arch/parisc/kernel/sys_parisc.c @@ -73,6 +73,8 @@ static unsigned long get_shared_area(struct address_space *mapping, struct vm_area_struct *vma; int offset = mapping ? get_offset(mapping) : 0; + offset = (offset + (pgoff << PAGE_SHIFT)) & 0x3FF000; + addr = DCACHE_ALIGN(addr - offset) + offset; for (vma = find_vma(current->mm, addr); ; vma = vma->vm_next) { diff --git a/arch/parisc/kernel/vmlinux.lds.S b/arch/parisc/kernel/vmlinux.lds.S index fa6f2b8..64a9998 100644 --- a/arch/parisc/kernel/vmlinux.lds.S +++ b/arch/parisc/kernel/vmlinux.lds.S @@ -50,8 +50,10 @@ SECTIONS . = KERNEL_BINARY_TEXT_START; _text = .; /* Text and read-only data */ - .text ALIGN(16) : { + .head ALIGN(16) : { HEAD_TEXT + } = 0 + .text ALIGN(16) : { TEXT_TEXT SCHED_TEXT LOCK_TEXT @@ -65,7 +67,7 @@ SECTIONS *(.fixup) *(.lock.text) /* out-of-line lock text */ *(.gnu.warning) - } = 0 + } /* End of text section */ _etext = .; diff --git a/arch/powerpc/include/asm/cputime.h b/arch/powerpc/include/asm/cputime.h index 1cf20bd..33a3580 100644 --- a/arch/powerpc/include/asm/cputime.h +++ b/arch/powerpc/include/asm/cputime.h @@ -126,11 +126,11 @@ static inline u64 cputime64_to_jiffies64(const cputime_t ct) /* * Convert cputime <-> microseconds */ -extern u64 __cputime_msec_factor; +extern u64 __cputime_usec_factor; static inline unsigned long cputime_to_usecs(const cputime_t ct) { - return mulhdu(ct, __cputime_msec_factor) * USEC_PER_MSEC; + return mulhdu(ct, __cputime_usec_factor); } static inline cputime_t usecs_to_cputime(const unsigned long us) @@ -143,7 +143,7 @@ static inline cputime_t usecs_to_cputime(const unsigned long us) sec = us / 1000000; if (ct) { ct *= tb_ticks_per_sec; - do_div(ct, 1000); + do_div(ct, 1000000); } if (sec) ct += (cputime_t) sec * tb_ticks_per_sec; diff --git a/arch/powerpc/include/asm/reg.h b/arch/powerpc/include/asm/reg.h index c5cae0d..764e99c 100644 --- a/arch/powerpc/include/asm/reg.h +++ b/arch/powerpc/include/asm/reg.h @@ -1000,7 +1000,8 @@ /* Macros for setting and retrieving special purpose registers */ #ifndef __ASSEMBLY__ #define mfmsr() ({unsigned long rval; \ - asm volatile("mfmsr %0" : "=r" (rval)); rval;}) + asm volatile("mfmsr %0" : "=r" (rval) : \ + : "memory"); rval;}) #ifdef CONFIG_PPC_BOOK3S_64 #define __mtmsrd(v, l) asm volatile("mtmsrd %0," __stringify(l) \ : : "r" (v) : "memory") diff --git a/arch/powerpc/kernel/asm-offsets.c b/arch/powerpc/kernel/asm-offsets.c index 36e1c8a..4c12a751 100644 --- a/arch/powerpc/kernel/asm-offsets.c +++ b/arch/powerpc/kernel/asm-offsets.c @@ -75,6 +75,7 @@ int main(void) DEFINE(SIGSEGV, SIGSEGV); DEFINE(NMI_MASK, NMI_MASK); DEFINE(THREAD_DSCR, offsetof(struct thread_struct, dscr)); + DEFINE(THREAD_DSCR_INHERIT, offsetof(struct thread_struct, dscr_inherit)); #else DEFINE(THREAD_INFO, offsetof(struct task_struct, stack)); #endif /* CONFIG_PPC64 */ diff --git a/arch/powerpc/kernel/entry_64.S b/arch/powerpc/kernel/entry_64.S index d834425..654fc53 100644 --- a/arch/powerpc/kernel/entry_64.S +++ b/arch/powerpc/kernel/entry_64.S @@ -380,6 +380,12 @@ _GLOBAL(ret_from_fork) li r3,0 b syscall_exit + .section ".toc","aw" +DSCR_DEFAULT: + .tc dscr_default[TC],dscr_default + + .section ".text" + /* * This routine switches between two different tasks. The process * state of one is saved on its kernel stack. Then the state @@ -519,9 +525,6 @@ END_MMU_FTR_SECTION_IFSET(MMU_FTR_1T_SEGMENT) mr r1,r8 /* start using new stack pointer */ std r7,PACAKSAVE(r13) - ld r6,_CCR(r1) - mtcrf 0xFF,r6 - #ifdef CONFIG_ALTIVEC BEGIN_FTR_SECTION ld r0,THREAD_VRSAVE(r4) @@ -530,14 +533,22 @@ END_FTR_SECTION_IFSET(CPU_FTR_ALTIVEC) #endif /* CONFIG_ALTIVEC */ #ifdef CONFIG_PPC64 BEGIN_FTR_SECTION + lwz r6,THREAD_DSCR_INHERIT(r4) + ld r7,DSCR_DEFAULT@toc(2) ld r0,THREAD_DSCR(r4) - cmpd r0,r25 - beq 1f + cmpwi r6,0 + bne 1f + ld r0,0(r7) +1: cmpd r0,r25 + beq 2f mtspr SPRN_DSCR,r0 -1: +2: END_FTR_SECTION_IFSET(CPU_FTR_DSCR) #endif + ld r6,_CCR(r1) + mtcrf 0xFF,r6 + /* r3-r13 are destroyed -- Cort */ REST_8GPRS(14, r1) REST_10GPRS(22, r1) diff --git a/arch/powerpc/kernel/ftrace.c b/arch/powerpc/kernel/ftrace.c index bf99cfa..6324008 100644 --- a/arch/powerpc/kernel/ftrace.c +++ b/arch/powerpc/kernel/ftrace.c @@ -245,9 +245,9 @@ __ftrace_make_nop(struct module *mod, /* * On PPC32 the trampoline looks like: - * 0x3d, 0x60, 0x00, 0x00 lis r11,sym@ha - * 0x39, 0x6b, 0x00, 0x00 addi r11,r11,sym@l - * 0x7d, 0x69, 0x03, 0xa6 mtctr r11 + * 0x3d, 0x80, 0x00, 0x00 lis r12,sym@ha + * 0x39, 0x8c, 0x00, 0x00 addi r12,r12,sym@l + * 0x7d, 0x89, 0x03, 0xa6 mtctr r12 * 0x4e, 0x80, 0x04, 0x20 bctr */ @@ -262,9 +262,9 @@ __ftrace_make_nop(struct module *mod, pr_devel(" %08x %08x ", jmp[0], jmp[1]); /* verify that this is what we expect it to be */ - if (((jmp[0] & 0xffff0000) != 0x3d600000) || - ((jmp[1] & 0xffff0000) != 0x396b0000) || - (jmp[2] != 0x7d6903a6) || + if (((jmp[0] & 0xffff0000) != 0x3d800000) || + ((jmp[1] & 0xffff0000) != 0x398c0000) || + (jmp[2] != 0x7d8903a6) || (jmp[3] != 0x4e800420)) { printk(KERN_ERR "Not a trampoline\n"); return -EINVAL; diff --git a/arch/powerpc/kernel/head_64.S b/arch/powerpc/kernel/head_64.S index ba50409..e8befef 100644 --- a/arch/powerpc/kernel/head_64.S +++ b/arch/powerpc/kernel/head_64.S @@ -425,7 +425,7 @@ _STATIC(__after_prom_start) tovirt(r6,r6) /* on booke, we already run at PAGE_OFFSET */ #endif -#ifdef CONFIG_CRASH_DUMP +#ifdef CONFIG_RELOCATABLE /* * Check if the kernel has to be running as relocatable kernel based on the * variable __run_at_load, if it is set the kernel is treated as relocatable diff --git a/arch/powerpc/kernel/module_32.c b/arch/powerpc/kernel/module_32.c index f832773..449a7e0 100644 --- a/arch/powerpc/kernel/module_32.c +++ b/arch/powerpc/kernel/module_32.c @@ -187,8 +187,8 @@ int apply_relocate(Elf32_Shdr *sechdrs, static inline int entry_matches(struct ppc_plt_entry *entry, Elf32_Addr val) { - if (entry->jump[0] == 0x3d600000 + ((val + 0x8000) >> 16) - && entry->jump[1] == 0x396b0000 + (val & 0xffff)) + if (entry->jump[0] == 0x3d800000 + ((val + 0x8000) >> 16) + && entry->jump[1] == 0x398c0000 + (val & 0xffff)) return 1; return 0; } @@ -215,10 +215,9 @@ static uint32_t do_plt_call(void *location, entry++; } - /* Stolen from Paul Mackerras as well... */ - entry->jump[0] = 0x3d600000+((val+0x8000)>>16); /* lis r11,sym@ha */ - entry->jump[1] = 0x396b0000 + (val&0xffff); /* addi r11,r11,sym@l*/ - entry->jump[2] = 0x7d6903a6; /* mtctr r11 */ + entry->jump[0] = 0x3d800000+((val+0x8000)>>16); /* lis r12,sym@ha */ + entry->jump[1] = 0x398c0000 + (val&0xffff); /* addi r12,r12,sym@l*/ + entry->jump[2] = 0x7d8903a6; /* mtctr r12 */ entry->jump[3] = 0x4e800420; /* bctr */ DEBUGP("Initialized plt for 0x%x at %p\n", val, entry); diff --git a/arch/powerpc/kernel/process.c b/arch/powerpc/kernel/process.c index 91e52df..5596397 100644 --- a/arch/powerpc/kernel/process.c +++ b/arch/powerpc/kernel/process.c @@ -794,16 +794,8 @@ int copy_thread(unsigned long clone_flags, unsigned long usp, #endif /* CONFIG_PPC_STD_MMU_64 */ #ifdef CONFIG_PPC64 if (cpu_has_feature(CPU_FTR_DSCR)) { - if (current->thread.dscr_inherit) { - p->thread.dscr_inherit = 1; - p->thread.dscr = current->thread.dscr; - } else if (0 != dscr_default) { - p->thread.dscr_inherit = 1; - p->thread.dscr = dscr_default; - } else { - p->thread.dscr_inherit = 0; - p->thread.dscr = 0; - } + p->thread.dscr_inherit = current->thread.dscr_inherit; + p->thread.dscr = current->thread.dscr; } #endif diff --git a/arch/powerpc/kernel/ptrace.c b/arch/powerpc/kernel/ptrace.c index cb22024..9321d0f 100644 --- a/arch/powerpc/kernel/ptrace.c +++ b/arch/powerpc/kernel/ptrace.c @@ -1497,9 +1497,14 @@ long arch_ptrace(struct task_struct *child, long request, if (index < PT_FPR0) { tmp = ptrace_get_reg(child, (int) index); } else { + unsigned int fpidx = index - PT_FPR0; + flush_fp_to_thread(child); - tmp = ((unsigned long *)child->thread.fpr) - [TS_FPRWIDTH * (index - PT_FPR0)]; + if (fpidx < (PT_FPSCR - PT_FPR0)) + tmp = ((unsigned long *)child->thread.fpr) + [fpidx * TS_FPRWIDTH]; + else + tmp = child->thread.fpscr.val; } ret = put_user(tmp, datalp); break; @@ -1525,9 +1530,14 @@ long arch_ptrace(struct task_struct *child, long request, if (index < PT_FPR0) { ret = ptrace_put_reg(child, index, data); } else { + unsigned int fpidx = index - PT_FPR0; + flush_fp_to_thread(child); - ((unsigned long *)child->thread.fpr) - [TS_FPRWIDTH * (index - PT_FPR0)] = data; + if (fpidx < (PT_FPSCR - PT_FPR0)) + ((unsigned long *)child->thread.fpr) + [fpidx * TS_FPRWIDTH] = data; + else + child->thread.fpscr.val = data; ret = 0; } break; diff --git a/arch/powerpc/kernel/sysfs.c b/arch/powerpc/kernel/sysfs.c index f0f2199..cd51a5c 100644 --- a/arch/powerpc/kernel/sysfs.c +++ b/arch/powerpc/kernel/sysfs.c @@ -192,6 +192,14 @@ static ssize_t show_dscr_default(struct sysdev_class *class, return sprintf(buf, "%lx\n", dscr_default); } +static void update_dscr(void *dummy) +{ + if (!current->thread.dscr_inherit) { + current->thread.dscr = dscr_default; + mtspr(SPRN_DSCR, dscr_default); + } +} + static ssize_t __used store_dscr_default(struct sysdev_class *class, struct sysdev_class_attribute *attr, const char *buf, size_t count) @@ -204,6 +212,8 @@ static ssize_t __used store_dscr_default(struct sysdev_class *class, return -EINVAL; dscr_default = val; + on_each_cpu(update_dscr, NULL, 1); + return count; } diff --git a/arch/powerpc/kernel/time.c b/arch/powerpc/kernel/time.c index 2de304a..818d809 100644 --- a/arch/powerpc/kernel/time.c +++ b/arch/powerpc/kernel/time.c @@ -168,13 +168,13 @@ EXPORT_SYMBOL_GPL(ppc_tb_freq); #ifdef CONFIG_VIRT_CPU_ACCOUNTING /* * Factors for converting from cputime_t (timebase ticks) to - * jiffies, milliseconds, seconds, and clock_t (1/USER_HZ seconds). + * jiffies, microseconds, seconds, and clock_t (1/USER_HZ seconds). * These are all stored as 0.64 fixed-point binary fractions. */ u64 __cputime_jiffies_factor; EXPORT_SYMBOL(__cputime_jiffies_factor); -u64 __cputime_msec_factor; -EXPORT_SYMBOL(__cputime_msec_factor); +u64 __cputime_usec_factor; +EXPORT_SYMBOL(__cputime_usec_factor); u64 __cputime_sec_factor; EXPORT_SYMBOL(__cputime_sec_factor); u64 __cputime_clockt_factor; @@ -192,8 +192,8 @@ static void calc_cputime_factors(void) div128_by_32(HZ, 0, tb_ticks_per_sec, &res); __cputime_jiffies_factor = res.result_low; - div128_by_32(1000, 0, tb_ticks_per_sec, &res); - __cputime_msec_factor = res.result_low; + div128_by_32(1000000, 0, tb_ticks_per_sec, &res); + __cputime_usec_factor = res.result_low; div128_by_32(1, 0, tb_ticks_per_sec, &res); __cputime_sec_factor = res.result_low; div128_by_32(USER_HZ, 0, tb_ticks_per_sec, &res); @@ -859,13 +859,8 @@ void update_vsyscall(struct timespec *wall_time, struct timespec *wtm, void update_vsyscall_tz(void) { - /* Make userspace gettimeofday spin until we're done. */ - ++vdso_data->tb_update_count; - smp_mb(); vdso_data->tz_minuteswest = sys_tz.tz_minuteswest; vdso_data->tz_dsttime = sys_tz.tz_dsttime; - smp_mb(); - ++vdso_data->tb_update_count; } static void __init clocksource_init(void) diff --git a/arch/powerpc/kernel/traps.c b/arch/powerpc/kernel/traps.c index 1a01414..6889f26 100644 --- a/arch/powerpc/kernel/traps.c +++ b/arch/powerpc/kernel/traps.c @@ -935,8 +935,9 @@ static int emulate_instruction(struct pt_regs *regs) cpu_has_feature(CPU_FTR_DSCR)) { PPC_WARN_EMULATED(mtdscr, regs); rd = (instword >> 21) & 0x1f; - mtspr(SPRN_DSCR, regs->gpr[rd]); + current->thread.dscr = regs->gpr[rd]; current->thread.dscr_inherit = 1; + mtspr(SPRN_DSCR, current->thread.dscr); return 0; } #endif diff --git a/arch/powerpc/kvm/44x_emulate.c b/arch/powerpc/kvm/44x_emulate.c index 549bb2c..ded8a1a 100644 --- a/arch/powerpc/kvm/44x_emulate.c +++ b/arch/powerpc/kvm/44x_emulate.c @@ -79,6 +79,7 @@ int kvmppc_core_emulate_op(struct kvm_run *run, struct kvm_vcpu *vcpu, run->dcr.dcrn = dcrn; run->dcr.data = 0; run->dcr.is_write = 0; + vcpu->arch.dcr_is_write = 0; vcpu->arch.io_gpr = rt; vcpu->arch.dcr_needed = 1; kvmppc_account_exit(vcpu, DCR_EXITS); @@ -100,6 +101,7 @@ int kvmppc_core_emulate_op(struct kvm_run *run, struct kvm_vcpu *vcpu, run->dcr.dcrn = dcrn; run->dcr.data = kvmppc_get_gpr(vcpu, rs); run->dcr.is_write = 1; + vcpu->arch.dcr_is_write = 1; vcpu->arch.dcr_needed = 1; kvmppc_account_exit(vcpu, DCR_EXITS); emulated = EMULATE_DO_DCR; diff --git a/arch/powerpc/platforms/embedded6xx/wii.c b/arch/powerpc/platforms/embedded6xx/wii.c index 1b5dc1a..daf793b 100644 --- a/arch/powerpc/platforms/embedded6xx/wii.c +++ b/arch/powerpc/platforms/embedded6xx/wii.c @@ -85,9 +85,11 @@ void __init wii_memory_fixups(void) wii_hole_start = p[0].base + p[0].size; wii_hole_size = p[1].base - wii_hole_start; - pr_info("MEM1: <%08llx %08llx>\n", p[0].base, p[0].size); + pr_info("MEM1: <%08llx %08llx>\n", + (unsigned long long) p[0].base, (unsigned long long) p[0].size); pr_info("HOLE: <%08lx %08lx>\n", wii_hole_start, wii_hole_size); - pr_info("MEM2: <%08llx %08llx>\n", p[1].base, p[1].size); + pr_info("MEM2: <%08llx %08llx>\n", + (unsigned long long) p[1].base, (unsigned long long) p[1].size); p[0].size += wii_hole_size + p[1].size; diff --git a/arch/powerpc/xmon/xmon.c b/arch/powerpc/xmon/xmon.c index 42541bb..ace1784 100644 --- a/arch/powerpc/xmon/xmon.c +++ b/arch/powerpc/xmon/xmon.c @@ -975,7 +975,7 @@ static int cpu_cmd(void) /* print cpus waiting or in xmon */ printf("cpus stopped:"); count = 0; - for (cpu = 0; cpu < NR_CPUS; ++cpu) { + for_each_possible_cpu(cpu) { if (cpumask_test_cpu(cpu, &cpus_in_xmon)) { if (count == 0) printf(" %x", cpu); diff --git a/arch/s390/include/asm/timex.h b/arch/s390/include/asm/timex.h index 88829a4..a4b8f60 100644 --- a/arch/s390/include/asm/timex.h +++ b/arch/s390/include/asm/timex.h @@ -126,4 +126,32 @@ static inline unsigned long long get_clock_monotonic(void) return get_clock_xt() - sched_clock_base_cc; } +/** + * tod_to_ns - convert a TOD format value to nanoseconds + * @todval: to be converted TOD format value + * Returns: number of nanoseconds that correspond to the TOD format value + * + * Converting a 64 Bit TOD format value to nanoseconds means that the value + * must be divided by 4.096. In order to achieve that we multiply with 125 + * and divide by 512: + * + * ns = (todval * 125) >> 9; + * + * In order to avoid an overflow with the multiplication we can rewrite this. + * With a split todval == 2^32 * th + tl (th upper 32 bits, tl lower 32 bits) + * we end up with + * + * ns = ((2^32 * th + tl) * 125 ) >> 9; + * -> ns = (2^23 * th * 125) + ((tl * 125) >> 9); + * + */ +static inline unsigned long long tod_to_ns(unsigned long long todval) +{ + unsigned long long ns; + + ns = ((todval >> 32) << 23) * 125; + ns += ((todval & 0xffffffff) * 125) >> 9; + return ns; +} + #endif diff --git a/arch/s390/kernel/compat_linux.c b/arch/s390/kernel/compat_linux.c index 53acaa8..f98af03 100644 --- a/arch/s390/kernel/compat_linux.c +++ b/arch/s390/kernel/compat_linux.c @@ -631,7 +631,6 @@ asmlinkage unsigned long old32_mmap(struct mmap_arg_struct_emu31 __user *arg) return -EFAULT; if (a.offset & ~PAGE_MASK) return -EINVAL; - a.addr = (unsigned long) compat_ptr(a.addr); return sys_mmap_pgoff(a.addr, a.len, a.prot, a.flags, a.fd, a.offset >> PAGE_SHIFT); } @@ -642,7 +641,6 @@ asmlinkage long sys32_mmap2(struct mmap_arg_struct_emu31 __user *arg) if (copy_from_user(&a, arg, sizeof(a))) return -EFAULT; - a.addr = (unsigned long) compat_ptr(a.addr); return sys_mmap_pgoff(a.addr, a.len, a.prot, a.flags, a.fd, a.offset); } diff --git a/arch/s390/kernel/time.c b/arch/s390/kernel/time.c index dff9330..943ea0e 100644 --- a/arch/s390/kernel/time.c +++ b/arch/s390/kernel/time.c @@ -63,7 +63,7 @@ static DEFINE_PER_CPU(struct clock_event_device, comparators); */ unsigned long long notrace __kprobes sched_clock(void) { - return (get_clock_monotonic() * 125) >> 9; + return tod_to_ns(get_clock_monotonic()); } /* diff --git a/arch/s390/kvm/interrupt.c b/arch/s390/kvm/interrupt.c index 35c21bf..a3db4c8 100644 --- a/arch/s390/kvm/interrupt.c +++ b/arch/s390/kvm/interrupt.c @@ -358,7 +358,7 @@ int kvm_s390_handle_wait(struct kvm_vcpu *vcpu) return 0; } - sltime = ((vcpu->arch.sie_block->ckc - now)*125)>>9; + sltime = tod_to_ns(vcpu->arch.sie_block->ckc - now); hrtimer_start(&vcpu->arch.ckc_timer, ktime_set (0, sltime) , HRTIMER_MODE_REL); VCPU_EVENT(vcpu, 5, "enabled wait via clock comparator: %llx ns", sltime); diff --git a/arch/s390/mm/fault.c b/arch/s390/mm/fault.c index d814f79..6903d44 100644 --- a/arch/s390/mm/fault.c +++ b/arch/s390/mm/fault.c @@ -567,6 +567,7 @@ static void pfault_interrupt(unsigned int ext_int_code, tsk->thread.pfault_wait = 0; list_del(&tsk->thread.list); wake_up_process(tsk); + put_task_struct(tsk); } else { /* Completion interrupt was faster than initial * interrupt. Set pfault_wait to -1 so the initial @@ -576,14 +577,22 @@ static void pfault_interrupt(unsigned int ext_int_code, put_task_struct(tsk); } else { /* signal bit not set -> a real page is missing. */ - if (tsk->thread.pfault_wait == -1) { + if (tsk->thread.pfault_wait == 1) { + /* Already on the list with a reference: put to sleep */ + set_task_state(tsk, TASK_UNINTERRUPTIBLE); + set_tsk_need_resched(tsk); + } else if (tsk->thread.pfault_wait == -1) { /* Completion interrupt was faster than the initial * interrupt (pfault_wait == -1). Set pfault_wait * back to zero and exit. */ tsk->thread.pfault_wait = 0; } else { /* Initial interrupt arrived before completion - * interrupt. Let the task sleep. */ + * interrupt. Let the task sleep. + * An extra task reference is needed since a different + * cpu may set the task state to TASK_RUNNING again + * before the scheduler is reached. */ + get_task_struct(tsk); tsk->thread.pfault_wait = 1; list_add(&tsk->thread.list, &pfault_list); set_task_state(tsk, TASK_UNINTERRUPTIBLE); @@ -608,6 +617,7 @@ static int __cpuinit pfault_cpu_notify(struct notifier_block *self, list_del(&thread->list); tsk = container_of(thread, struct task_struct, thread); wake_up_process(tsk); + put_task_struct(tsk); } spin_unlock_irq(&pfault_lock); break; diff --git a/arch/s390/mm/gup.c b/arch/s390/mm/gup.c index 65cb06e..4ccf9f5 100644 --- a/arch/s390/mm/gup.c +++ b/arch/s390/mm/gup.c @@ -183,7 +183,7 @@ int get_user_pages_fast(unsigned long start, int nr_pages, int write, addr = start; len = (unsigned long) nr_pages << PAGE_SHIFT; end = start + len; - if (end < start) + if ((end < start) || (end > TASK_SIZE)) goto slow_irqon; /* diff --git a/arch/sh/include/asm/elf.h b/arch/sh/include/asm/elf.h index f38112b..978b7fd 100644 --- a/arch/sh/include/asm/elf.h +++ b/arch/sh/include/asm/elf.h @@ -202,9 +202,9 @@ extern void __kernel_vsyscall; if (vdso_enabled) \ NEW_AUX_ENT(AT_SYSINFO_EHDR, VDSO_BASE); \ else \ - NEW_AUX_ENT(AT_IGNORE, 0); + NEW_AUX_ENT(AT_IGNORE, 0) #else -#define VSYSCALL_AUX_ENT +#define VSYSCALL_AUX_ENT NEW_AUX_ENT(AT_IGNORE, 0) #endif /* CONFIG_VSYSCALL */ #ifdef CONFIG_SH_FPU diff --git a/arch/sparc/Kconfig b/arch/sparc/Kconfig index 2e79419..9e70257 100644 --- a/arch/sparc/Kconfig +++ b/arch/sparc/Kconfig @@ -590,6 +590,9 @@ config SYSVIPC_COMPAT depends on COMPAT && SYSVIPC default y +config KEYS_COMPAT + def_bool y if COMPAT && KEYS + endmenu source "net/Kconfig" diff --git a/arch/sparc/include/asm/hugetlb.h b/arch/sparc/include/asm/hugetlb.h index 1770610..f368cef 100644 --- a/arch/sparc/include/asm/hugetlb.h +++ b/arch/sparc/include/asm/hugetlb.h @@ -58,14 +58,20 @@ static inline pte_t huge_pte_wrprotect(pte_t pte) static inline void huge_ptep_set_wrprotect(struct mm_struct *mm, unsigned long addr, pte_t *ptep) { - ptep_set_wrprotect(mm, addr, ptep); + pte_t old_pte = *ptep; + set_huge_pte_at(mm, addr, ptep, pte_wrprotect(old_pte)); } static inline int huge_ptep_set_access_flags(struct vm_area_struct *vma, unsigned long addr, pte_t *ptep, pte_t pte, int dirty) { - return ptep_set_access_flags(vma, addr, ptep, pte, dirty); + int changed = !pte_same(*ptep, pte); + if (changed) { + set_huge_pte_at(vma->vm_mm, addr, ptep, pte); + flush_tlb_page(vma, addr); + } + return changed; } static inline pte_t huge_ptep_get(pte_t *ptep) diff --git a/arch/sparc/kernel/central.c b/arch/sparc/kernel/central.c index 7eef3f7..f5ddc0b 100644 --- a/arch/sparc/kernel/central.c +++ b/arch/sparc/kernel/central.c @@ -268,4 +268,4 @@ static int __init sunfire_init(void) return 0; } -subsys_initcall(sunfire_init); +fs_initcall(sunfire_init); diff --git a/arch/sparc/kernel/perf_event.c b/arch/sparc/kernel/perf_event.c index 6860d40..904ed63 100644 --- a/arch/sparc/kernel/perf_event.c +++ b/arch/sparc/kernel/perf_event.c @@ -513,11 +513,13 @@ static u64 nop_for_index(int idx) static inline void sparc_pmu_enable_event(struct cpu_hw_events *cpuc, struct hw_perf_event *hwc, int idx) { - u64 val, mask = mask_for_index(idx); + u64 enc, val, mask = mask_for_index(idx); + + enc = perf_event_get_enc(cpuc->events[idx]); val = cpuc->pcr; val &= ~mask; - val |= hwc->config; + val |= event_encoding(enc, idx); cpuc->pcr = val; pcr_ops->write(cpuc->pcr); @@ -1380,8 +1382,6 @@ static void perf_callchain_user_64(struct perf_callchain_entry *entry, { unsigned long ufp; - perf_callchain_store(entry, regs->tpc); - ufp = regs->u_regs[UREG_I6] + STACK_BIAS; do { struct sparc_stackf *usf, sf; @@ -1402,8 +1402,6 @@ static void perf_callchain_user_32(struct perf_callchain_entry *entry, { unsigned long ufp; - perf_callchain_store(entry, regs->tpc); - ufp = regs->u_regs[UREG_I6] & 0xffffffffUL; do { struct sparc_stackf32 *usf, sf; @@ -1422,6 +1420,11 @@ static void perf_callchain_user_32(struct perf_callchain_entry *entry, void perf_callchain_user(struct perf_callchain_entry *entry, struct pt_regs *regs) { + perf_callchain_store(entry, regs->tpc); + + if (!current->mm) + return; + flushw_user(); if (test_thread_flag(TIF_32BIT)) perf_callchain_user_32(entry, regs); diff --git a/arch/sparc/kernel/signal_64.c b/arch/sparc/kernel/signal_64.c index d58260b..77d4761 100644 --- a/arch/sparc/kernel/signal_64.c +++ b/arch/sparc/kernel/signal_64.c @@ -309,9 +309,7 @@ void do_rt_sigreturn(struct pt_regs *regs) err |= restore_fpu_state(regs, fpu_save); err |= __copy_from_user(&set, &sf->mask, sizeof(sigset_t)); - err |= do_sigaltstack(&sf->stack, NULL, (unsigned long)sf); - - if (err) + if (err || do_sigaltstack(&sf->stack, NULL, (unsigned long)sf) == -EFAULT) goto segv; err |= __get_user(rwin_save, &sf->rwin_save); diff --git a/arch/sparc/kernel/sys_sparc_64.c b/arch/sparc/kernel/sys_sparc_64.c index 908b47a..10c9b36 100644 --- a/arch/sparc/kernel/sys_sparc_64.c +++ b/arch/sparc/kernel/sys_sparc_64.c @@ -519,12 +519,12 @@ SYSCALL_DEFINE1(sparc64_personality, unsigned long, personality) { int ret; - if (current->personality == PER_LINUX32 && - personality == PER_LINUX) - personality = PER_LINUX32; + if (personality(current->personality) == PER_LINUX32 && + personality(personality) == PER_LINUX) + personality |= PER_LINUX32; ret = sys_personality(personality); - if (ret == PER_LINUX32) - ret = PER_LINUX; + if (personality(ret) == PER_LINUX32) + ret &= ~PER_LINUX32; return ret; } diff --git a/arch/sparc/kernel/syscalls.S b/arch/sparc/kernel/syscalls.S index 1d7e274..7f5f65d 100644 --- a/arch/sparc/kernel/syscalls.S +++ b/arch/sparc/kernel/syscalls.S @@ -212,24 +212,20 @@ linux_sparc_syscall: 3: stx %o0, [%sp + PTREGS_OFF + PT_V9_I0] ret_sys_call: ldx [%sp + PTREGS_OFF + PT_V9_TSTATE], %g3 - ldx [%sp + PTREGS_OFF + PT_V9_TNPC], %l1 ! pc = npc sra %o0, 0, %o0 mov %ulo(TSTATE_XCARRY | TSTATE_ICARRY), %g2 sllx %g2, 32, %g2 - /* Check if force_successful_syscall_return() - * was invoked. - */ - ldub [%g6 + TI_SYS_NOERROR], %l2 - brnz,a,pn %l2, 80f - stb %g0, [%g6 + TI_SYS_NOERROR] - cmp %o0, -ERESTART_RESTARTBLOCK bgeu,pn %xcc, 1f - andcc %l0, (_TIF_SYSCALL_TRACE|_TIF_SECCOMP|_TIF_SYSCALL_AUDIT|_TIF_SYSCALL_TRACEPOINT), %l6 -80: + andcc %l0, (_TIF_SYSCALL_TRACE|_TIF_SECCOMP|_TIF_SYSCALL_AUDIT|_TIF_SYSCALL_TRACEPOINT), %g0 + ldx [%sp + PTREGS_OFF + PT_V9_TNPC], %l1 ! pc = npc + +2: + stb %g0, [%g6 + TI_SYS_NOERROR] /* System call success, clear Carry condition code. */ andn %g3, %g2, %g3 +3: stx %g3, [%sp + PTREGS_OFF + PT_V9_TSTATE] bne,pn %icc, linux_syscall_trace2 add %l1, 0x4, %l2 ! npc = npc+4 @@ -238,20 +234,20 @@ ret_sys_call: stx %l2, [%sp + PTREGS_OFF + PT_V9_TNPC] 1: + /* Check if force_successful_syscall_return() + * was invoked. + */ + ldub [%g6 + TI_SYS_NOERROR], %l2 + brnz,pn %l2, 2b + ldx [%sp + PTREGS_OFF + PT_V9_TNPC], %l1 ! pc = npc /* System call failure, set Carry condition code. * Also, get abs(errno) to return to the process. */ - andcc %l0, (_TIF_SYSCALL_TRACE|_TIF_SECCOMP|_TIF_SYSCALL_AUDIT|_TIF_SYSCALL_TRACEPOINT), %l6 sub %g0, %o0, %o0 - or %g3, %g2, %g3 stx %o0, [%sp + PTREGS_OFF + PT_V9_I0] - stx %g3, [%sp + PTREGS_OFF + PT_V9_TSTATE] - bne,pn %icc, linux_syscall_trace2 - add %l1, 0x4, %l2 ! npc = npc+4 - stx %l1, [%sp + PTREGS_OFF + PT_V9_TPC] + ba,pt %xcc, 3b + or %g3, %g2, %g3 - b,pt %xcc, rtrap - stx %l2, [%sp + PTREGS_OFF + PT_V9_TNPC] linux_syscall_trace2: call syscall_trace_leave add %sp, PTREGS_OFF, %o0 diff --git a/arch/sparc/kernel/systbls_64.S b/arch/sparc/kernel/systbls_64.S index f566518..248fb67 100644 --- a/arch/sparc/kernel/systbls_64.S +++ b/arch/sparc/kernel/systbls_64.S @@ -74,7 +74,7 @@ sys_call_table32: .word sys_timer_delete, compat_sys_timer_create, sys_ni_syscall, compat_sys_io_setup, sys_io_destroy /*270*/ .word sys32_io_submit, sys_io_cancel, compat_sys_io_getevents, sys32_mq_open, sys_mq_unlink .word compat_sys_mq_timedsend, compat_sys_mq_timedreceive, compat_sys_mq_notify, compat_sys_mq_getsetattr, compat_sys_waitid -/*280*/ .word sys32_tee, sys_add_key, sys_request_key, sys_keyctl, compat_sys_openat +/*280*/ .word sys32_tee, sys_add_key, sys_request_key, compat_sys_keyctl, compat_sys_openat .word sys_mkdirat, sys_mknodat, sys_fchownat, compat_sys_futimesat, compat_sys_fstatat64 /*290*/ .word sys_unlinkat, sys_renameat, sys_linkat, sys_symlinkat, sys_readlinkat .word sys_fchmodat, sys_faccessat, compat_sys_pselect6, compat_sys_ppoll, sys_unshare diff --git a/arch/sparc/mm/init_64.c b/arch/sparc/mm/init_64.c index 8e073d8..6ff4d78 100644 --- a/arch/sparc/mm/init_64.c +++ b/arch/sparc/mm/init_64.c @@ -2118,6 +2118,9 @@ EXPORT_SYMBOL(_PAGE_CACHE); #ifdef CONFIG_SPARSEMEM_VMEMMAP unsigned long vmemmap_table[VMEMMAP_SIZE]; +static long __meminitdata addr_start, addr_end; +static int __meminitdata node_start; + int __meminit vmemmap_populate(struct page *start, unsigned long nr, int node) { unsigned long vstart = (unsigned long) start; @@ -2148,15 +2151,30 @@ int __meminit vmemmap_populate(struct page *start, unsigned long nr, int node) *vmem_pp = pte_base | __pa(block); - printk(KERN_INFO "[%p-%p] page_structs=%lu " - "node=%d entry=%lu/%lu\n", start, block, nr, - node, - addr >> VMEMMAP_CHUNK_SHIFT, - VMEMMAP_SIZE); + /* check to see if we have contiguous blocks */ + if (addr_end != addr || node_start != node) { + if (addr_start) + printk(KERN_DEBUG " [%lx-%lx] on node %d\n", + addr_start, addr_end-1, node_start); + addr_start = addr; + node_start = node; + } + addr_end = addr + VMEMMAP_CHUNK; } } return 0; } + +void __meminit vmemmap_populate_print_last(void) +{ + if (addr_start) { + printk(KERN_DEBUG " [%lx-%lx] on node %d\n", + addr_start, addr_end-1, node_start); + addr_start = 0; + addr_end = 0; + node_start = 0; + } +} #endif /* CONFIG_SPARSEMEM_VMEMMAP */ static void prot_init_common(unsigned long page_none, diff --git a/arch/sparc/mm/ultra.S b/arch/sparc/mm/ultra.S index b57a594..874162a 100644 --- a/arch/sparc/mm/ultra.S +++ b/arch/sparc/mm/ultra.S @@ -495,11 +495,11 @@ xcall_fetch_glob_regs: stx %o7, [%g1 + GR_SNAP_O7] stx %i7, [%g1 + GR_SNAP_I7] /* Don't try this at home kids... */ - rdpr %cwp, %g2 - sub %g2, 1, %g7 + rdpr %cwp, %g3 + sub %g3, 1, %g7 wrpr %g7, %cwp mov %i7, %g7 - wrpr %g2, %cwp + wrpr %g3, %cwp stx %g7, [%g1 + GR_SNAP_RPC] sethi %hi(trap_block), %g7 or %g7, %lo(trap_block), %g7 diff --git a/arch/tile/Kconfig b/arch/tile/Kconfig index 0249b8b..532a2a4 100644 --- a/arch/tile/Kconfig +++ b/arch/tile/Kconfig @@ -11,6 +11,7 @@ config TILE select GENERIC_IRQ_PROBE select GENERIC_PENDING_IRQ if SMP select GENERIC_IRQ_SHOW + select HAVE_SYSCALL_WRAPPERS if TILEGX select SYS_HYPERVISOR # FIXME: investigate whether we need/want these options. diff --git a/arch/tile/Makefile b/arch/tile/Makefile index 17acce7..04c637c 100644 --- a/arch/tile/Makefile +++ b/arch/tile/Makefile @@ -26,6 +26,10 @@ $(error Set TILERA_ROOT or CROSS_COMPILE when building $(ARCH) on $(HOST_ARCH)) endif endif +# The tile compiler may emit .eh_frame information for backtracing. +# In kernel modules, this causes load failures due to unsupported relocations. +KBUILD_CFLAGS += -fno-asynchronous-unwind-tables + ifneq ($(CONFIG_DEBUG_EXTRA_FLAGS),"") KBUILD_CFLAGS += $(CONFIG_DEBUG_EXTRA_FLAGS) endif diff --git a/arch/tile/include/asm/bitops.h b/arch/tile/include/asm/bitops.h index 16f1fa5..bd186c4 100644 --- a/arch/tile/include/asm/bitops.h +++ b/arch/tile/include/asm/bitops.h @@ -77,6 +77,11 @@ static inline int ffs(int x) return __builtin_ffs(x); } +static inline int fls64(__u64 w) +{ + return (sizeof(__u64) * 8) - __builtin_clzll(w); +} + /** * fls - find last set bit in word * @x: the word to search @@ -90,12 +95,7 @@ static inline int ffs(int x) */ static inline int fls(int x) { - return (sizeof(int) * 8) - __builtin_clz(x); -} - -static inline int fls64(__u64 w) -{ - return (sizeof(__u64) * 8) - __builtin_clzll(w); + return fls64((unsigned int) x); } static inline unsigned int __arch_hweight32(unsigned int w) diff --git a/arch/tile/kernel/compat_signal.c b/arch/tile/kernel/compat_signal.c index a7869ad..41459d8 100644 --- a/arch/tile/kernel/compat_signal.c +++ b/arch/tile/kernel/compat_signal.c @@ -406,19 +406,17 @@ int compat_setup_rt_frame(int sig, struct k_sigaction *ka, siginfo_t *info, * Set up registers for signal handler. * Registers that we don't modify keep the value they had from * user-space at the time we took the signal. + * We always pass siginfo and mcontext, regardless of SA_SIGINFO, + * since some things rely on this (e.g. glibc's debug/segfault.c). */ regs->pc = ptr_to_compat_reg(ka->sa.sa_handler); regs->ex1 = PL_ICS_EX1(USER_PL, 1); /* set crit sec in handler */ regs->sp = ptr_to_compat_reg(frame); regs->lr = restorer; regs->regs[0] = (unsigned long) usig; - - if (ka->sa.sa_flags & SA_SIGINFO) { - /* Need extra arguments, so mark to restore caller-saves. */ - regs->regs[1] = ptr_to_compat_reg(&frame->info); - regs->regs[2] = ptr_to_compat_reg(&frame->uc); - regs->flags |= PT_FLAGS_CALLER_SAVES; - } + regs->regs[1] = ptr_to_compat_reg(&frame->info); + regs->regs[2] = ptr_to_compat_reg(&frame->uc); + regs->flags |= PT_FLAGS_CALLER_SAVES; /* * Notify any tracer that was single-stepping it. diff --git a/arch/um/include/asm/pgtable.h b/arch/um/include/asm/pgtable.h index 41474fb..aa365c5 100644 --- a/arch/um/include/asm/pgtable.h +++ b/arch/um/include/asm/pgtable.h @@ -271,6 +271,12 @@ static inline void set_pte(pte_t *pteptr, pte_t pteval) } #define set_pte_at(mm,addr,ptep,pteval) set_pte(ptep,pteval) +#define __HAVE_ARCH_PTE_SAME +static inline int pte_same(pte_t pte_a, pte_t pte_b) +{ + return !((pte_val(pte_a) ^ pte_val(pte_b)) & ~_PAGE_NEWPAGE); +} + /* * Conversion functions: convert a page and protection to a page entry, * and a page entry and page directory to the page they refer to. @@ -346,11 +352,11 @@ extern pte_t *virt_to_pte(struct mm_struct *mm, unsigned long addr); #define update_mmu_cache(vma,address,ptep) do ; while (0) /* Encode and de-code a swap entry */ -#define __swp_type(x) (((x).val >> 4) & 0x3f) +#define __swp_type(x) (((x).val >> 5) & 0x1f) #define __swp_offset(x) ((x).val >> 11) #define __swp_entry(type, offset) \ - ((swp_entry_t) { ((type) << 4) | ((offset) << 11) }) + ((swp_entry_t) { ((type) << 5) | ((offset) << 11) }) #define __pte_to_swp_entry(pte) \ ((swp_entry_t) { pte_val(pte_mkuptodate(pte)) }) #define __swp_entry_to_pte(x) ((pte_t) { (x).val }) diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index 37357a5..a0e9bda 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig @@ -1451,6 +1451,15 @@ config ARCH_USES_PG_UNCACHED def_bool y depends on X86_PAT +config ARCH_RANDOM + def_bool y + prompt "x86 architectural random number generator" if EXPERT + ---help--- + Enable the x86 architectural RDRAND instruction + (Intel Bull Mountain technology) to generate random numbers. + If supported, this is a high bandwidth, cryptographically + secure hardware random number generator. + config EFI bool "EFI runtime service support" depends on ACPI diff --git a/arch/x86/crypto/aesni-intel_asm.S b/arch/x86/crypto/aesni-intel_asm.S index be6d9e3..3470624 100644 --- a/arch/x86/crypto/aesni-intel_asm.S +++ b/arch/x86/crypto/aesni-intel_asm.S @@ -2460,10 +2460,12 @@ ENTRY(aesni_cbc_dec) pxor IN3, STATE4 movaps IN4, IV #else - pxor (INP), STATE2 - pxor 0x10(INP), STATE3 pxor IN1, STATE4 movaps IN2, IV + movups (INP), IN1 + pxor IN1, STATE2 + movups 0x10(INP), IN2 + pxor IN2, STATE3 #endif movups STATE1, (OUTP) movups STATE2, 0x10(OUTP) diff --git a/arch/x86/ia32/ia32entry.S b/arch/x86/ia32/ia32entry.S index c1870dd..26af1e3 100644 --- a/arch/x86/ia32/ia32entry.S +++ b/arch/x86/ia32/ia32entry.S @@ -208,7 +208,7 @@ sysexit_from_sys_call: testl $(_TIF_ALLWORK_MASK & ~_TIF_SYSCALL_AUDIT),TI_flags(%r10) jnz ia32_ret_from_sys_call TRACE_IRQS_ON - sti + ENABLE_INTERRUPTS(CLBR_NONE) movl %eax,%esi /* second arg, syscall return value */ cmpl $0,%eax /* is it < 0? */ setl %al /* 1 if so, 0 if not */ @@ -218,7 +218,7 @@ sysexit_from_sys_call: GET_THREAD_INFO(%r10) movl RAX-ARGOFFSET(%rsp),%eax /* reload syscall return value */ movl $(_TIF_ALLWORK_MASK & ~_TIF_SYSCALL_AUDIT),%edi - cli + DISABLE_INTERRUPTS(CLBR_NONE) TRACE_IRQS_OFF testl %edi,TI_flags(%r10) jz \exit diff --git a/arch/x86/include/asm/archrandom.h b/arch/x86/include/asm/archrandom.h new file mode 100644 index 0000000..0d9ec77 --- /dev/null +++ b/arch/x86/include/asm/archrandom.h @@ -0,0 +1,75 @@ +/* + * This file is part of the Linux kernel. + * + * Copyright (c) 2011, Intel Corporation + * Authors: Fenghua Yu <fenghua.yu@intel.com>, + * H. Peter Anvin <hpa@linux.intel.com> + * + * This program is free software; you can redistribute it and/or modify it + * under the terms and conditions of the GNU General Public License, + * version 2, as published by the Free Software Foundation. + * + * This program is distributed in the hope it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + * + * You should have received a copy of the GNU General Public License along with + * this program; if not, write to the Free Software Foundation, Inc., + * 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA. + * + */ + +#ifndef ASM_X86_ARCHRANDOM_H +#define ASM_X86_ARCHRANDOM_H + +#include <asm/processor.h> +#include <asm/cpufeature.h> +#include <asm/alternative.h> +#include <asm/nops.h> + +#define RDRAND_RETRY_LOOPS 10 + +#define RDRAND_INT ".byte 0x0f,0xc7,0xf0" +#ifdef CONFIG_X86_64 +# define RDRAND_LONG ".byte 0x48,0x0f,0xc7,0xf0" +#else +# define RDRAND_LONG RDRAND_INT +#endif + +#ifdef CONFIG_ARCH_RANDOM + +#define GET_RANDOM(name, type, rdrand, nop) \ +static inline int name(type *v) \ +{ \ + int ok; \ + alternative_io("movl $0, %0\n\t" \ + nop, \ + "\n1: " rdrand "\n\t" \ + "jc 2f\n\t" \ + "decl %0\n\t" \ + "jnz 1b\n\t" \ + "2:", \ + X86_FEATURE_RDRAND, \ + ASM_OUTPUT2("=r" (ok), "=a" (*v)), \ + "0" (RDRAND_RETRY_LOOPS)); \ + return ok; \ +} + +#ifdef CONFIG_X86_64 + +GET_RANDOM(arch_get_random_long, unsigned long, RDRAND_LONG, ASM_NOP5); +GET_RANDOM(arch_get_random_int, unsigned int, RDRAND_INT, ASM_NOP4); + +#else + +GET_RANDOM(arch_get_random_long, unsigned long, RDRAND_LONG, ASM_NOP3); +GET_RANDOM(arch_get_random_int, unsigned int, RDRAND_INT, ASM_NOP3); + +#endif /* CONFIG_X86_64 */ + +#endif /* CONFIG_ARCH_RANDOM */ + +extern void x86_init_rdrand(struct cpuinfo_x86 *c); + +#endif /* ASM_X86_ARCHRANDOM_H */ diff --git a/arch/x86/include/asm/cpufeature.h b/arch/x86/include/asm/cpufeature.h index 71cc380..c5d941f 100644 --- a/arch/x86/include/asm/cpufeature.h +++ b/arch/x86/include/asm/cpufeature.h @@ -173,7 +173,7 @@ #define X86_FEATURE_XSAVEOPT (7*32+ 4) /* Optimized Xsave */ #define X86_FEATURE_PLN (7*32+ 5) /* Intel Power Limit Notification */ #define X86_FEATURE_PTS (7*32+ 6) /* Intel Package Thermal Status */ -#define X86_FEATURE_DTS (7*32+ 7) /* Digital Thermal Sensor */ +#define X86_FEATURE_DTHERM (7*32+ 7) /* Digital Thermal Sensor */ /* Virtualization flags: Linux defined, word 8 */ #define X86_FEATURE_TPR_SHADOW (8*32+ 0) /* Intel TPR Shadow */ diff --git a/arch/x86/include/asm/pgtable-3level.h b/arch/x86/include/asm/pgtable-3level.h index effff47..43876f1 100644 --- a/arch/x86/include/asm/pgtable-3level.h +++ b/arch/x86/include/asm/pgtable-3level.h @@ -31,6 +31,56 @@ static inline void native_set_pte(pte_t *ptep, pte_t pte) ptep->pte_low = pte.pte_low; } +#define pmd_read_atomic pmd_read_atomic +/* + * pte_offset_map_lock on 32bit PAE kernels was reading the pmd_t with + * a "*pmdp" dereference done by gcc. Problem is, in certain places + * where pte_offset_map_lock is called, concurrent page faults are + * allowed, if the mmap_sem is hold for reading. An example is mincore + * vs page faults vs MADV_DONTNEED. On the page fault side + * pmd_populate rightfully does a set_64bit, but if we're reading the + * pmd_t with a "*pmdp" on the mincore side, a SMP race can happen + * because gcc will not read the 64bit of the pmd atomically. To fix + * this all places running pmd_offset_map_lock() while holding the + * mmap_sem in read mode, shall read the pmdp pointer using this + * function to know if the pmd is null nor not, and in turn to know if + * they can run pmd_offset_map_lock or pmd_trans_huge or other pmd + * operations. + * + * Without THP if the mmap_sem is hold for reading, the + * pmd can only transition from null to not null while pmd_read_atomic runs. + * So there's no need of literally reading it atomically. + * + * With THP if the mmap_sem is hold for reading, the pmd can become + * THP or null or point to a pte (and in turn become "stable") at any + * time under pmd_read_atomic, so it's mandatory to read it atomically + * with cmpxchg8b. + */ +#ifndef CONFIG_TRANSPARENT_HUGEPAGE +static inline pmd_t pmd_read_atomic(pmd_t *pmdp) +{ + pmdval_t ret; + u32 *tmp = (u32 *)pmdp; + + ret = (pmdval_t) (*tmp); + if (ret) { + /* + * If the low part is null, we must not read the high part + * or we can end up with a partial pmd. + */ + smp_rmb(); + ret |= ((pmdval_t)*(tmp + 1)) << 32; + } + + return (pmd_t) { ret }; +} +#else /* CONFIG_TRANSPARENT_HUGEPAGE */ +static inline pmd_t pmd_read_atomic(pmd_t *pmdp) +{ + return (pmd_t) { atomic64_read((atomic64_t *)pmdp) }; +} +#endif /* CONFIG_TRANSPARENT_HUGEPAGE */ + static inline void native_set_pte_atomic(pte_t *ptep, pte_t pte) { set_64bit((unsigned long long *)(ptep), native_pte_val(pte)); diff --git a/arch/x86/include/asm/pgtable.h b/arch/x86/include/asm/pgtable.h index 18601c8..884507e 100644 --- a/arch/x86/include/asm/pgtable.h +++ b/arch/x86/include/asm/pgtable.h @@ -146,8 +146,7 @@ static inline unsigned long pmd_pfn(pmd_t pmd) static inline int pmd_large(pmd_t pte) { - return (pmd_flags(pte) & (_PAGE_PSE | _PAGE_PRESENT)) == - (_PAGE_PSE | _PAGE_PRESENT); + return pmd_flags(pte) & _PAGE_PSE; } #ifdef CONFIG_TRANSPARENT_HUGEPAGE @@ -415,7 +414,13 @@ static inline int pte_hidden(pte_t pte) static inline int pmd_present(pmd_t pmd) { - return pmd_flags(pmd) & _PAGE_PRESENT; + /* + * Checking for _PAGE_PSE is needed too because + * split_huge_page will temporarily clear the present bit (but + * the _PAGE_PSE flag will remain set at all times while the + * _PAGE_PRESENT bit is clear). + */ + return pmd_flags(pmd) & (_PAGE_PRESENT | _PAGE_PROTNONE | _PAGE_PSE); } static inline int pmd_none(pmd_t pmd) diff --git a/arch/x86/include/asm/processor.h b/arch/x86/include/asm/processor.h index 5d9c61d..e5f7248 100644 --- a/arch/x86/include/asm/processor.h +++ b/arch/x86/include/asm/processor.h @@ -99,7 +99,6 @@ struct cpuinfo_x86 { u16 apicid; u16 initial_apicid; u16 x86_clflush_size; -#ifdef CONFIG_SMP /* number of cores as seen by the OS: */ u16 booted_cores; /* Physical processor id: */ @@ -110,7 +109,6 @@ struct cpuinfo_x86 { u8 compute_unit_id; /* Index into per_cpu list: */ u16 cpu_index; -#endif } __attribute__((__aligned__(SMP_CACHE_BYTES))); #define X86_VENDOR_INTEL 0 diff --git a/arch/x86/include/asm/ptrace.h b/arch/x86/include/asm/ptrace.h index 94e7618..f332d64 100644 --- a/arch/x86/include/asm/ptrace.h +++ b/arch/x86/include/asm/ptrace.h @@ -187,21 +187,14 @@ static inline int v8086_mode(struct pt_regs *regs) #endif } -/* - * X86_32 CPUs don't save ss and esp if the CPU is already in kernel mode - * when it traps. The previous stack will be directly underneath the saved - * registers, and 'sp/ss' won't even have been saved. Thus the '®s->sp'. - * - * This is valid only for kernel mode traps. - */ -static inline unsigned long kernel_stack_pointer(struct pt_regs *regs) -{ #ifdef CONFIG_X86_32 - return (unsigned long)(®s->sp); +extern unsigned long kernel_stack_pointer(struct pt_regs *regs); #else +static inline unsigned long kernel_stack_pointer(struct pt_regs *regs) +{ return regs->sp; -#endif } +#endif #define GET_IP(regs) ((regs)->ip) #define GET_FP(regs) ((regs)->bp) diff --git a/arch/x86/include/asm/system.h b/arch/x86/include/asm/system.h index c2ff2a1..f0d89d9 100644 --- a/arch/x86/include/asm/system.h +++ b/arch/x86/include/asm/system.h @@ -93,10 +93,6 @@ do { \ "memory"); \ } while (0) -/* - * disable hlt during certain critical i/o operations - */ -#define HAVE_DISABLE_HLT #else /* frame pointer must be last for get_wchan */ @@ -392,9 +388,6 @@ static inline void clflush(volatile void *__p) #define nop() asm volatile ("nop") -void disable_hlt(void); -void enable_hlt(void); - void cpu_idle_wait(void); extern unsigned long arch_align_stack(unsigned long sp); diff --git a/arch/x86/include/asm/traps.h b/arch/x86/include/asm/traps.h index 0310da6..1d44903 100644 --- a/arch/x86/include/asm/traps.h +++ b/arch/x86/include/asm/traps.h @@ -1,6 +1,7 @@ #ifndef _ASM_X86_TRAPS_H #define _ASM_X86_TRAPS_H +#include <linux/kprobes.h> #include <asm/debugreg.h> #include <asm/siginfo.h> /* TRAP_TRACE, ... */ @@ -87,4 +88,29 @@ asmlinkage void smp_thermal_interrupt(void); asmlinkage void mce_threshold_interrupt(void); #endif +/* Interrupts/Exceptions */ +enum { + X86_TRAP_DE = 0, /* 0, Divide-by-zero */ + X86_TRAP_DB, /* 1, Debug */ + X86_TRAP_NMI, /* 2, Non-maskable Interrupt */ + X86_TRAP_BP, /* 3, Breakpoint */ + X86_TRAP_OF, /* 4, Overflow */ + X86_TRAP_BR, /* 5, Bound Range Exceeded */ + X86_TRAP_UD, /* 6, Invalid Opcode */ + X86_TRAP_NM, /* 7, Device Not Available */ + X86_TRAP_DF, /* 8, Double Fault */ + X86_TRAP_OLD_MF, /* 9, Coprocessor Segment Overrun */ + X86_TRAP_TS, /* 10, Invalid TSS */ + X86_TRAP_NP, /* 11, Segment Not Present */ + X86_TRAP_SS, /* 12, Stack Segment Fault */ + X86_TRAP_GP, /* 13, General Protection Fault */ + X86_TRAP_PF, /* 14, Page Fault */ + X86_TRAP_SPURIOUS, /* 15, Spurious Interrupt */ + X86_TRAP_MF, /* 16, x87 Floating-Point Exception */ + X86_TRAP_AC, /* 17, Alignment Check */ + X86_TRAP_MC, /* 18, Machine Check */ + X86_TRAP_XF, /* 19, SIMD Floating-Point Exception */ + X86_TRAP_IRET = 32, /* 32, IRET Exception */ +}; + #endif /* _ASM_X86_TRAPS_H */ diff --git a/arch/x86/kernel/acpi/boot.c b/arch/x86/kernel/acpi/boot.c index 4558f0d..479d03c 100644 --- a/arch/x86/kernel/acpi/boot.c +++ b/arch/x86/kernel/acpi/boot.c @@ -416,12 +416,14 @@ acpi_parse_int_src_ovr(struct acpi_subtable_header * header, return 0; } - if (intsrc->source_irq == 0 && intsrc->global_irq == 2) { + if (intsrc->source_irq == 0) { if (acpi_skip_timer_override) { - printk(PREFIX "BIOS IRQ0 pin2 override ignored.\n"); + printk(PREFIX "BIOS IRQ0 override ignored.\n"); return 0; } - if (acpi_fix_pin2_polarity && (intsrc->inti_flags & ACPI_MADT_POLARITY_MASK)) { + + if ((intsrc->global_irq == 2) && acpi_fix_pin2_polarity + && (intsrc->inti_flags & ACPI_MADT_POLARITY_MASK)) { intsrc->inti_flags &= ~ACPI_MADT_POLARITY_MASK; printk(PREFIX "BIOS IRQ0 pin2 override: forcing polarity to high active.\n"); } @@ -1327,17 +1329,12 @@ static int __init dmi_disable_acpi(const struct dmi_system_id *d) } /* - * Force ignoring BIOS IRQ0 pin2 override + * Force ignoring BIOS IRQ0 override */ static int __init dmi_ignore_irq0_timer_override(const struct dmi_system_id *d) { - /* - * The ati_ixp4x0_rev() early PCI quirk should have set - * the acpi_skip_timer_override flag already: - */ if (!acpi_skip_timer_override) { - WARN(1, KERN_ERR "ati_ixp4x0 quirk not complete.\n"); - pr_notice("%s detected: Ignoring BIOS IRQ0 pin2 override\n", + pr_notice("%s detected: Ignoring BIOS IRQ0 override\n", d->ident); acpi_skip_timer_override = 1; } @@ -1431,7 +1428,7 @@ static struct dmi_system_id __initdata acpi_dmi_table_late[] = { * is enabled. This input is incorrectly designated the * ISA IRQ 0 via an interrupt source override even though * it is wired to the output of the master 8259A and INTIN0 - * is not connected at all. Force ignoring BIOS IRQ0 pin2 + * is not connected at all. Force ignoring BIOS IRQ0 * override in that cases. */ { @@ -1466,6 +1463,14 @@ static struct dmi_system_id __initdata acpi_dmi_table_late[] = { DMI_MATCH(DMI_PRODUCT_NAME, "HP Compaq 6715b"), }, }, + { + .callback = dmi_ignore_irq0_timer_override, + .ident = "FUJITSU SIEMENS", + .matches = { + DMI_MATCH(DMI_SYS_VENDOR, "FUJITSU SIEMENS"), + DMI_MATCH(DMI_PRODUCT_NAME, "AMILO PRO V2030"), + }, + }, {} }; diff --git a/arch/x86/kernel/alternative.c b/arch/x86/kernel/alternative.c index a81f2d5..4c734e6 100644 --- a/arch/x86/kernel/alternative.c +++ b/arch/x86/kernel/alternative.c @@ -161,7 +161,7 @@ static const unsigned char * const k7_nops[ASM_NOP_MAX+2] = #endif #ifdef P6_NOP1 -static const unsigned char __initconst_or_module p6nops[] = +static const unsigned char p6nops[] = { P6_NOP1, P6_NOP2, @@ -220,7 +220,7 @@ void __init arch_init_ideal_nops(void) ideal_nops = intel_nops; #endif } - + break; default: #ifdef CONFIG_X86_64 ideal_nops = k8_nops; diff --git a/arch/x86/kernel/amd_nb.c b/arch/x86/kernel/amd_nb.c index bae1efe..be16854 100644 --- a/arch/x86/kernel/amd_nb.c +++ b/arch/x86/kernel/amd_nb.c @@ -154,16 +154,14 @@ int amd_get_subcaches(int cpu) { struct pci_dev *link = node_to_amd_nb(amd_get_nb_id(cpu))->link; unsigned int mask; - int cuid = 0; + int cuid; if (!amd_nb_has_feature(AMD_NB_L3_PARTITIONING)) return 0; pci_read_config_dword(link, 0x1d4, &mask); -#ifdef CONFIG_SMP cuid = cpu_data(cpu).compute_unit_id; -#endif return (mask >> (4 * cuid)) & 0xf; } @@ -172,7 +170,7 @@ int amd_set_subcaches(int cpu, int mask) static unsigned int reset, ban; struct amd_northbridge *nb = node_to_amd_nb(amd_get_nb_id(cpu)); unsigned int reg; - int cuid = 0; + int cuid; if (!amd_nb_has_feature(AMD_NB_L3_PARTITIONING) || mask > 0xf) return -EINVAL; @@ -190,9 +188,7 @@ int amd_set_subcaches(int cpu, int mask) pci_write_config_dword(nb->misc, 0x1b8, reg & ~0x180000); } -#ifdef CONFIG_SMP cuid = cpu_data(cpu).compute_unit_id; -#endif mask <<= 4 * cuid; mask |= (0xf ^ (1 << cuid)) << 26; diff --git a/arch/x86/kernel/cpu/Makefile b/arch/x86/kernel/cpu/Makefile index 6042981..0e3a82a 100644 --- a/arch/x86/kernel/cpu/Makefile +++ b/arch/x86/kernel/cpu/Makefile @@ -15,6 +15,7 @@ CFLAGS_common.o := $(nostackp) obj-y := intel_cacheinfo.o scattered.o topology.o obj-y += proc.o capflags.o powerflags.o common.o obj-y += vmware.o hypervisor.o sched.o mshyperv.o +obj-y += rdrand.o obj-$(CONFIG_X86_32) += bugs.o obj-$(CONFIG_X86_64) += bugs_64.o diff --git a/arch/x86/kernel/cpu/amd.c b/arch/x86/kernel/cpu/amd.c index b13ed39..a93741d 100644 --- a/arch/x86/kernel/cpu/amd.c +++ b/arch/x86/kernel/cpu/amd.c @@ -146,7 +146,6 @@ static void __cpuinit init_amd_k6(struct cpuinfo_x86 *c) static void __cpuinit amd_k7_smp_check(struct cpuinfo_x86 *c) { -#ifdef CONFIG_SMP /* calling is from identify_secondary_cpu() ? */ if (!c->cpu_index) return; @@ -190,7 +189,6 @@ static void __cpuinit amd_k7_smp_check(struct cpuinfo_x86 *c) valid_k7: ; -#endif } static void __cpuinit init_amd_k7(struct cpuinfo_x86 *c) @@ -556,6 +554,34 @@ static void __cpuinit init_amd(struct cpuinfo_x86 *c) } } + /* + * The way access filter has a performance penalty on some workloads. + * Disable it on the affected CPUs. + */ + if ((c->x86 == 0x15) && + (c->x86_model >= 0x02) && (c->x86_model < 0x20)) { + u64 val; + + if (!rdmsrl_safe(0xc0011021, &val) && !(val & 0x1E)) { + val |= 0x1E; + checking_wrmsrl(0xc0011021, val); + } + } + + /* + * The way access filter has a performance penalty on some workloads. + * Disable it on the affected CPUs. + */ + if ((c->x86 == 0x15) && + (c->x86_model >= 0x02) && (c->x86_model < 0x20)) { + u64 val; + + if (!rdmsrl_safe(0xc0011021, &val) && !(val & 0x1E)) { + val |= 0x1E; + checking_wrmsrl(0xc0011021, val); + } + } + cpu_detect_cache_sizes(c); /* Multi core CPU? */ diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c index 22a073d..1579ab9 100644 --- a/arch/x86/kernel/cpu/common.c +++ b/arch/x86/kernel/cpu/common.c @@ -15,6 +15,7 @@ #include <asm/stackprotector.h> #include <asm/perf_event.h> #include <asm/mmu_context.h> +#include <asm/archrandom.h> #include <asm/hypervisor.h> #include <asm/processor.h> #include <asm/sections.h> @@ -675,9 +676,7 @@ static void __init early_identify_cpu(struct cpuinfo_x86 *c) if (this_cpu->c_early_init) this_cpu->c_early_init(c); -#ifdef CONFIG_SMP c->cpu_index = 0; -#endif filter_cpuid_features(c, false); setup_smep(c); @@ -760,10 +759,7 @@ static void __cpuinit generic_identify(struct cpuinfo_x86 *c) c->apicid = c->initial_apicid; # endif #endif - -#ifdef CONFIG_X86_HT c->phys_proc_id = c->initial_apicid; -#endif } setup_smep(c); @@ -857,6 +853,7 @@ static void __cpuinit identify_cpu(struct cpuinfo_x86 *c) #endif init_hypervisor(c); + x86_init_rdrand(c); /* * Clear/Set all flags overriden by options, need do it diff --git a/arch/x86/kernel/cpu/intel.c b/arch/x86/kernel/cpu/intel.c index ed6086e..e0dc000 100644 --- a/arch/x86/kernel/cpu/intel.c +++ b/arch/x86/kernel/cpu/intel.c @@ -179,7 +179,6 @@ static void __cpuinit trap_init_f00f_bug(void) static void __cpuinit intel_smp_check(struct cpuinfo_x86 *c) { -#ifdef CONFIG_SMP /* calling is from identify_secondary_cpu() ? */ if (!c->cpu_index) return; @@ -196,7 +195,6 @@ static void __cpuinit intel_smp_check(struct cpuinfo_x86 *c) WARN_ONCE(1, "WARNING: SMP operation may be unreliable" "with B stepping processors.\n"); } -#endif } static void __cpuinit intel_workarounds(struct cpuinfo_x86 *c) diff --git a/arch/x86/kernel/cpu/mcheck/mce-severity.c b/arch/x86/kernel/cpu/mcheck/mce-severity.c index 1e8d66c..362190b 100644 --- a/arch/x86/kernel/cpu/mcheck/mce-severity.c +++ b/arch/x86/kernel/cpu/mcheck/mce-severity.c @@ -101,15 +101,19 @@ static struct severity { }; /* - * If the EIPV bit is set, it means the saved IP is the - * instruction which caused the MCE. + * If mcgstatus indicated that ip/cs on the stack were + * no good, then "m->cs" will be zero and we will have + * to assume the worst case (IN_KERNEL) as we actually + * have no idea what we were executing when the machine + * check hit. + * If we do have a good "m->cs" (or a faked one in the + * case we were executing in VM86 mode) we can use it to + * distinguish an exception taken in user from from one + * taken in the kernel. */ static int error_context(struct mce *m) { - if (m->mcgstatus & MCG_STATUS_EIPV) - return (m->ip && (m->cs & 3) == 3) ? IN_USER : IN_KERNEL; - /* Unknown, assume kernel */ - return IN_KERNEL; + return ((m->cs & 3) == 3) ? IN_USER : IN_KERNEL; } int mce_severity(struct mce *a, int tolerant, char **msg) diff --git a/arch/x86/kernel/cpu/mcheck/mce.c b/arch/x86/kernel/cpu/mcheck/mce.c index ff1ae9b..1396edf 100644 --- a/arch/x86/kernel/cpu/mcheck/mce.c +++ b/arch/x86/kernel/cpu/mcheck/mce.c @@ -122,9 +122,7 @@ void mce_setup(struct mce *m) m->time = get_seconds(); m->cpuvendor = boot_cpu_data.x86_vendor; m->cpuid = cpuid_eax(1); -#ifdef CONFIG_SMP m->socketid = cpu_data(m->extcpu).phys_proc_id; -#endif m->apicid = cpu_data(m->extcpu).initial_apicid; rdmsrl(MSR_IA32_MCG_CAP, m->mcgcap); } @@ -453,6 +451,13 @@ static inline void mce_get_rip(struct mce *m, struct pt_regs *regs) if (regs && (m->mcgstatus & (MCG_STATUS_RIPV|MCG_STATUS_EIPV))) { m->ip = regs->ip; m->cs = regs->cs; + /* + * When in VM86 mode make the cs look like ring 3 + * always. This is a lie, but it's better than passing + * the additional vm86 bit around everywhere. + */ + if (v8086_mode(regs)) + m->cs |= 3; } else { m->ip = 0; m->cs = 0; @@ -990,6 +995,7 @@ void do_machine_check(struct pt_regs *regs, long error_code) */ add_taint(TAINT_MACHINE_CHECK); + mce_get_rip(&m, regs); severity = mce_severity(&m, tolerant, NULL); /* @@ -1028,7 +1034,6 @@ void do_machine_check(struct pt_regs *regs, long error_code) if (severity == MCE_AO_SEVERITY && mce_usable_address(&m)) mce_ring_add(m.addr >> PAGE_SHIFT); - mce_get_rip(&m, regs); mce_log(&m); if (severity > worst) { diff --git a/arch/x86/kernel/cpu/mcheck/mce_amd.c b/arch/x86/kernel/cpu/mcheck/mce_amd.c index bb0adad..b97aa72 100644 --- a/arch/x86/kernel/cpu/mcheck/mce_amd.c +++ b/arch/x86/kernel/cpu/mcheck/mce_amd.c @@ -52,6 +52,7 @@ struct threshold_block { unsigned int cpu; u32 address; u16 interrupt_enable; + bool interrupt_capable; u16 threshold_limit; struct kobject kobj; struct list_head miscj; @@ -64,11 +65,9 @@ struct threshold_bank { }; static DEFINE_PER_CPU(struct threshold_bank * [NR_BANKS], threshold_banks); -#ifdef CONFIG_SMP static unsigned char shared_bank[NR_BANKS] = { 0, 0, 0, 0, 1 }; -#endif static DEFINE_PER_CPU(unsigned char, bank_map); /* see which banks are on */ @@ -86,6 +85,21 @@ struct thresh_restart { u16 old_limit; }; +static bool lvt_interrupt_supported(unsigned int bank, u32 msr_high_bits) +{ + /* + * bank 4 supports APIC LVT interrupts implicitly since forever. + */ + if (bank == 4) + return true; + + /* + * IntP: interrupt present; if this bit is set, the thresholding + * bank can generate APIC LVT interrupts + */ + return msr_high_bits & BIT(28); +} + static int lvt_off_valid(struct threshold_block *b, int apic, u32 lo, u32 hi) { int msr = (hi & MASK_LVTOFF_HI) >> 20; @@ -107,8 +121,10 @@ static int lvt_off_valid(struct threshold_block *b, int apic, u32 lo, u32 hi) return 1; }; -/* must be called with correct cpu affinity */ -/* Called via smp_call_function_single() */ +/* + * Called via smp_call_function_single(), must be called with correct + * cpu affinity. + */ static void threshold_restart_bank(void *_tr) { struct thresh_restart *tr = _tr; @@ -131,6 +147,12 @@ static void threshold_restart_bank(void *_tr) (new_count & THRESHOLD_MAX); } + /* clear IntType */ + hi &= ~MASK_INT_TYPE_HI; + + if (!tr->b->interrupt_capable) + goto done; + if (tr->set_lvt_off) { if (lvt_off_valid(tr->b, tr->lvt_off, lo, hi)) { /* set new lvt offset */ @@ -139,9 +161,10 @@ static void threshold_restart_bank(void *_tr) } } - tr->b->interrupt_enable ? - (hi = (hi & ~MASK_INT_TYPE_HI) | INT_TYPE_APIC) : - (hi &= ~MASK_INT_TYPE_HI); + if (tr->b->interrupt_enable) + hi |= INT_TYPE_APIC; + + done: hi |= MASK_COUNT_EN_HI; wrmsr(tr->b->address, lo, hi); @@ -202,18 +225,21 @@ void mce_amd_feature_init(struct cpuinfo_x86 *c) if (!block) per_cpu(bank_map, cpu) |= (1 << bank); -#ifdef CONFIG_SMP + if (shared_bank[bank] && c->cpu_core_id) break; -#endif - offset = setup_APIC_mce(offset, - (high & MASK_LVTOFF_HI) >> 20); memset(&b, 0, sizeof(b)); - b.cpu = cpu; - b.bank = bank; - b.block = block; - b.address = address; + b.cpu = cpu; + b.bank = bank; + b.block = block; + b.address = address; + b.interrupt_capable = lvt_interrupt_supported(bank, high); + + if (b.interrupt_capable) { + int new = (high & MASK_LVTOFF_HI) >> 20; + offset = setup_APIC_mce(offset, new); + } mce_threshold_block_init(&b, offset); mce_threshold_vector = amd_threshold_interrupt; @@ -313,6 +339,9 @@ store_interrupt_enable(struct threshold_block *b, const char *buf, size_t size) struct thresh_restart tr; unsigned long new; + if (!b->interrupt_capable) + return -EINVAL; + if (strict_strtoul(buf, 0, &new) < 0) return -EINVAL; @@ -471,6 +500,7 @@ static __cpuinit int allocate_threshold_blocks(unsigned int cpu, b->cpu = cpu; b->address = address; b->interrupt_enable = 0; + b->interrupt_capable = lvt_interrupt_supported(bank, high); b->threshold_limit = THRESHOLD_MAX; INIT_LIST_HEAD(&b->miscj); diff --git a/arch/x86/kernel/cpu/mcheck/therm_throt.c b/arch/x86/kernel/cpu/mcheck/therm_throt.c index 27c6251..99cd9d2 100644 --- a/arch/x86/kernel/cpu/mcheck/therm_throt.c +++ b/arch/x86/kernel/cpu/mcheck/therm_throt.c @@ -322,17 +322,6 @@ device_initcall(thermal_throttle_init_device); #endif /* CONFIG_SYSFS */ -/* - * Set up the most two significant bit to notify mce log that this thermal - * event type. - * This is a temp solution. May be changed in the future with mce log - * infrasture. - */ -#define CORE_THROTTLED (0) -#define CORE_POWER_LIMIT ((__u64)1 << 62) -#define PACKAGE_THROTTLED ((__u64)2 << 62) -#define PACKAGE_POWER_LIMIT ((__u64)3 << 62) - static void notify_thresholds(__u64 msr_val) { /* check whether the interrupt handler is defined; @@ -362,27 +351,23 @@ static void intel_thermal_interrupt(void) if (therm_throt_process(msr_val & THERM_STATUS_PROCHOT, THERMAL_THROTTLING_EVENT, CORE_LEVEL) != 0) - mce_log_therm_throt_event(CORE_THROTTLED | msr_val); + mce_log_therm_throt_event(msr_val); if (this_cpu_has(X86_FEATURE_PLN)) - if (therm_throt_process(msr_val & THERM_STATUS_POWER_LIMIT, + therm_throt_process(msr_val & THERM_STATUS_POWER_LIMIT, POWER_LIMIT_EVENT, - CORE_LEVEL) != 0) - mce_log_therm_throt_event(CORE_POWER_LIMIT | msr_val); + CORE_LEVEL); if (this_cpu_has(X86_FEATURE_PTS)) { rdmsrl(MSR_IA32_PACKAGE_THERM_STATUS, msr_val); - if (therm_throt_process(msr_val & PACKAGE_THERM_STATUS_PROCHOT, + therm_throt_process(msr_val & PACKAGE_THERM_STATUS_PROCHOT, THERMAL_THROTTLING_EVENT, - PACKAGE_LEVEL) != 0) - mce_log_therm_throt_event(PACKAGE_THROTTLED | msr_val); + PACKAGE_LEVEL); if (this_cpu_has(X86_FEATURE_PLN)) - if (therm_throt_process(msr_val & + therm_throt_process(msr_val & PACKAGE_THERM_STATUS_POWER_LIMIT, POWER_LIMIT_EVENT, - PACKAGE_LEVEL) != 0) - mce_log_therm_throt_event(PACKAGE_POWER_LIMIT - | msr_val); + PACKAGE_LEVEL); } } diff --git a/arch/x86/kernel/cpu/perf_event_amd.c b/arch/x86/kernel/cpu/perf_event_amd.c index fe29c1d..4b50c96 100644 --- a/arch/x86/kernel/cpu/perf_event_amd.c +++ b/arch/x86/kernel/cpu/perf_event_amd.c @@ -437,6 +437,7 @@ static __initconst const struct x86_pmu amd_pmu = { * 0x023 DE PERF_CTL[2:0] * 0x02D LS PERF_CTL[3] * 0x02E LS PERF_CTL[3,0] + * 0x031 LS PERF_CTL[2:0] (**) * 0x043 CU PERF_CTL[2:0] * 0x045 CU PERF_CTL[2:0] * 0x046 CU PERF_CTL[2:0] @@ -450,10 +451,12 @@ static __initconst const struct x86_pmu amd_pmu = { * 0x0DD LS PERF_CTL[5:0] * 0x0DE LS PERF_CTL[5:0] * 0x0DF LS PERF_CTL[5:0] + * 0x1C0 EX PERF_CTL[5:3] * 0x1D6 EX PERF_CTL[5:0] * 0x1D8 EX PERF_CTL[5:0] * - * (*) depending on the umask all FPU counters may be used + * (*) depending on the umask all FPU counters may be used + * (**) only one unitmask enabled at a time */ static struct event_constraint amd_f15_PMC0 = EVENT_CONSTRAINT(0, 0x01, 0); @@ -503,6 +506,12 @@ amd_get_event_constraints_f15h(struct cpu_hw_events *cpuc, struct perf_event *ev return &amd_f15_PMC3; case 0x02E: return &amd_f15_PMC30; + case 0x031: + if (hweight_long(hwc->config & ARCH_PERFMON_EVENTSEL_UMASK) <= 1) + return &amd_f15_PMC20; + return &emptyconstraint; + case 0x1C0: + return &amd_f15_PMC53; default: return &amd_f15_PMC50; } diff --git a/arch/x86/kernel/cpu/proc.c b/arch/x86/kernel/cpu/proc.c index 62ac8cb..72c365a 100644 --- a/arch/x86/kernel/cpu/proc.c +++ b/arch/x86/kernel/cpu/proc.c @@ -64,12 +64,10 @@ static void show_cpuinfo_misc(struct seq_file *m, struct cpuinfo_x86 *c) static int show_cpuinfo(struct seq_file *m, void *v) { struct cpuinfo_x86 *c = v; - unsigned int cpu = 0; + unsigned int cpu; int i; -#ifdef CONFIG_SMP cpu = c->cpu_index; -#endif seq_printf(m, "processor\t: %u\n" "vendor_id\t: %s\n" "cpu family\t: %d\n" diff --git a/arch/x86/kernel/cpu/rdrand.c b/arch/x86/kernel/cpu/rdrand.c new file mode 100644 index 0000000..feca286 --- /dev/null +++ b/arch/x86/kernel/cpu/rdrand.c @@ -0,0 +1,73 @@ +/* + * This file is part of the Linux kernel. + * + * Copyright (c) 2011, Intel Corporation + * Authors: Fenghua Yu <fenghua.yu@intel.com>, + * H. Peter Anvin <hpa@linux.intel.com> + * + * This program is free software; you can redistribute it and/or modify it + * under the terms and conditions of the GNU General Public License, + * version 2, as published by the Free Software Foundation. + * + * This program is distributed in the hope it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + * + * You should have received a copy of the GNU General Public License along with + * this program; if not, write to the Free Software Foundation, Inc., + * 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA. + * + */ + +#include <asm/processor.h> +#include <asm/archrandom.h> +#include <asm/sections.h> + +static int __init x86_rdrand_setup(char *s) +{ + setup_clear_cpu_cap(X86_FEATURE_RDRAND); + return 1; +} +__setup("nordrand", x86_rdrand_setup); + +/* We can't use arch_get_random_long() here since alternatives haven't run */ +static inline int rdrand_long(unsigned long *v) +{ + int ok; + asm volatile("1: " RDRAND_LONG "\n\t" + "jc 2f\n\t" + "decl %0\n\t" + "jnz 1b\n\t" + "2:" + : "=r" (ok), "=a" (*v) + : "0" (RDRAND_RETRY_LOOPS)); + return ok; +} + +/* + * Force a reseed cycle; we are architecturally guaranteed a reseed + * after no more than 512 128-bit chunks of random data. This also + * acts as a test of the CPU capability. + */ +#define RESEED_LOOP ((512*128)/sizeof(unsigned long)) + +void __cpuinit x86_init_rdrand(struct cpuinfo_x86 *c) +{ +#ifdef CONFIG_ARCH_RANDOM + unsigned long tmp; + int i, count, ok; + + if (!cpu_has(c, X86_FEATURE_RDRAND)) + return; /* Nothing to do */ + + for (count = i = 0; i < RESEED_LOOP; i++) { + ok = rdrand_long(&tmp); + if (ok) + count++; + } + + if (count != RESEED_LOOP) + clear_cpu_cap(c, X86_FEATURE_RDRAND); +#endif +} diff --git a/arch/x86/kernel/cpu/scattered.c b/arch/x86/kernel/cpu/scattered.c index c7f64e6..ea6106c 100644 --- a/arch/x86/kernel/cpu/scattered.c +++ b/arch/x86/kernel/cpu/scattered.c @@ -31,7 +31,7 @@ void __cpuinit init_scattered_cpuid_features(struct cpuinfo_x86 *c) const struct cpuid_bit *cb; static const struct cpuid_bit __cpuinitconst cpuid_bits[] = { - { X86_FEATURE_DTS, CR_EAX, 0, 0x00000006, 0 }, + { X86_FEATURE_DTHERM, CR_EAX, 0, 0x00000006, 0 }, { X86_FEATURE_IDA, CR_EAX, 1, 0x00000006, 0 }, { X86_FEATURE_ARAT, CR_EAX, 2, 0x00000006, 0 }, { X86_FEATURE_PLN, CR_EAX, 4, 0x00000006, 0 }, diff --git a/arch/x86/kernel/entry_32.S b/arch/x86/kernel/entry_32.S index edb3d46..2df1252 100644 --- a/arch/x86/kernel/entry_32.S +++ b/arch/x86/kernel/entry_32.S @@ -1029,7 +1029,7 @@ ENTRY(xen_sysenter_target) ENTRY(xen_hypervisor_callback) CFI_STARTPROC - pushl_cfi $0 + pushl_cfi $-1 /* orig_ax = -1 => not a system call */ SAVE_ALL TRACE_IRQS_OFF @@ -1071,14 +1071,15 @@ ENTRY(xen_failsafe_callback) 2: mov 8(%esp),%es 3: mov 12(%esp),%fs 4: mov 16(%esp),%gs + /* EAX == 0 => Category 1 (Bad segment) + EAX != 0 => Category 2 (Bad IRET) */ testl %eax,%eax popl_cfi %eax lea 16(%esp),%esp CFI_ADJUST_CFA_OFFSET -16 jz 5f - addl $16,%esp - jmp iret_exc # EAX != 0 => Category 2 (Bad IRET) -5: pushl_cfi $0 # EAX == 0 => Category 1 (Bad segment) + jmp iret_exc +5: pushl_cfi $-1 /* orig_ax = -1 => not a system call */ SAVE_ALL jmp ret_from_exception CFI_ENDPROC diff --git a/arch/x86/kernel/entry_64.S b/arch/x86/kernel/entry_64.S index 8a445a0..dd4dba4 100644 --- a/arch/x86/kernel/entry_64.S +++ b/arch/x86/kernel/entry_64.S @@ -1308,7 +1308,7 @@ ENTRY(xen_failsafe_callback) CFI_RESTORE r11 addq $0x30,%rsp CFI_ADJUST_CFA_OFFSET -0x30 - pushq_cfi $0 + pushq_cfi $-1 /* orig_ax = -1 => not a system call */ SAVE_ALL jmp error_exit CFI_ENDPROC diff --git a/arch/x86/kernel/hpet.c b/arch/x86/kernel/hpet.c index aa083d3..0aa649e 100644 --- a/arch/x86/kernel/hpet.c +++ b/arch/x86/kernel/hpet.c @@ -427,7 +427,7 @@ void hpet_msi_unmask(struct irq_data *data) /* unmask it */ cfg = hpet_readl(HPET_Tn_CFG(hdev->num)); - cfg |= HPET_TN_FSB; + cfg |= HPET_TN_ENABLE | HPET_TN_FSB; hpet_writel(cfg, HPET_Tn_CFG(hdev->num)); } @@ -438,7 +438,7 @@ void hpet_msi_mask(struct irq_data *data) /* mask it */ cfg = hpet_readl(HPET_Tn_CFG(hdev->num)); - cfg &= ~HPET_TN_FSB; + cfg &= ~(HPET_TN_ENABLE | HPET_TN_FSB); hpet_writel(cfg, HPET_Tn_CFG(hdev->num)); } diff --git a/arch/x86/kernel/microcode_amd.c b/arch/x86/kernel/microcode_amd.c index b727450..53ab9ff 100644 --- a/arch/x86/kernel/microcode_amd.c +++ b/arch/x86/kernel/microcode_amd.c @@ -162,6 +162,7 @@ static unsigned int verify_ucode_size(int cpu, const u8 *buf, unsigned int size) #define F1XH_MPB_MAX_SIZE 2048 #define F14H_MPB_MAX_SIZE 1824 #define F15H_MPB_MAX_SIZE 4096 +#define F16H_MPB_MAX_SIZE 3458 switch (c->x86) { case 0x14: @@ -170,6 +171,9 @@ static unsigned int verify_ucode_size(int cpu, const u8 *buf, unsigned int size) case 0x15: max_size = F15H_MPB_MAX_SIZE; break; + case 0x16: + max_size = F16H_MPB_MAX_SIZE; + break; default: max_size = F1XH_MPB_MAX_SIZE; break; diff --git a/arch/x86/kernel/microcode_core.c b/arch/x86/kernel/microcode_core.c index f924280..c4e2465 100644 --- a/arch/x86/kernel/microcode_core.c +++ b/arch/x86/kernel/microcode_core.c @@ -297,20 +297,31 @@ static ssize_t reload_store(struct sys_device *dev, const char *buf, size_t size) { unsigned long val; - int cpu = dev->id; - int ret = 0; - char *end; + int cpu; + ssize_t ret = 0, tmp_ret; - val = simple_strtoul(buf, &end, 0); - if (end == buf) + /* allow reload only from the BSP */ + if (boot_cpu_data.cpu_index != dev->id) return -EINVAL; - if (val == 1) { - get_online_cpus(); - if (cpu_online(cpu)) - ret = reload_for_cpu(cpu); - put_online_cpus(); + ret = kstrtoul(buf, 0, &val); + if (ret) + return ret; + + if (val != 1) + return size; + + get_online_cpus(); + for_each_online_cpu(cpu) { + tmp_ret = reload_for_cpu(cpu); + if (tmp_ret != 0) + pr_warn("Error reloading microcode on CPU %d\n", cpu); + + /* save retval of the first encountered reload error */ + if (!ret) + ret = tmp_ret; } + put_online_cpus(); if (!ret) ret = size; diff --git a/arch/x86/kernel/msr.c b/arch/x86/kernel/msr.c index 12fcbe2..f7d1a64 100644 --- a/arch/x86/kernel/msr.c +++ b/arch/x86/kernel/msr.c @@ -175,6 +175,9 @@ static int msr_open(struct inode *inode, struct file *file) unsigned int cpu; struct cpuinfo_x86 *c; + if (!capable(CAP_SYS_RAWIO)) + return -EPERM; + cpu = iminor(file->f_path.dentry->d_inode); if (cpu >= nr_cpu_ids || !cpu_online(cpu)) return -ENXIO; /* No such CPU */ diff --git a/arch/x86/kernel/process.c b/arch/x86/kernel/process.c index e1ba8cb..4272502 100644 --- a/arch/x86/kernel/process.c +++ b/arch/x86/kernel/process.c @@ -341,34 +341,10 @@ void (*pm_idle)(void); EXPORT_SYMBOL(pm_idle); #endif -#ifdef CONFIG_X86_32 -/* - * This halt magic was a workaround for ancient floppy DMA - * wreckage. It should be safe to remove. - */ -static int hlt_counter; -void disable_hlt(void) -{ - hlt_counter++; -} -EXPORT_SYMBOL(disable_hlt); - -void enable_hlt(void) -{ - hlt_counter--; -} -EXPORT_SYMBOL(enable_hlt); - -static inline int hlt_use_halt(void) -{ - return (!hlt_counter && boot_cpu_data.hlt_works_ok); -} -#else static inline int hlt_use_halt(void) { return 1; } -#endif /* * We use this if we don't have any better diff --git a/arch/x86/kernel/ptrace.c b/arch/x86/kernel/ptrace.c index 807c2a2..911e16d 100644 --- a/arch/x86/kernel/ptrace.c +++ b/arch/x86/kernel/ptrace.c @@ -21,6 +21,7 @@ #include <linux/signal.h> #include <linux/perf_event.h> #include <linux/hw_breakpoint.h> +#include <linux/module.h> #include <asm/uaccess.h> #include <asm/pgtable.h> @@ -164,6 +165,35 @@ static inline bool invalid_selector(u16 value) #define FLAG_MASK FLAG_MASK_32 +/* + * X86_32 CPUs don't save ss and esp if the CPU is already in kernel mode + * when it traps. The previous stack will be directly underneath the saved + * registers, and 'sp/ss' won't even have been saved. Thus the '®s->sp'. + * + * Now, if the stack is empty, '®s->sp' is out of range. In this + * case we try to take the previous stack. To always return a non-null + * stack pointer we fall back to regs as stack if no previous stack + * exists. + * + * This is valid only for kernel mode traps. + */ +unsigned long kernel_stack_pointer(struct pt_regs *regs) +{ + unsigned long context = (unsigned long)regs & ~(THREAD_SIZE - 1); + unsigned long sp = (unsigned long)®s->sp; + struct thread_info *tinfo; + + if (context == (sp & ~(THREAD_SIZE - 1))) + return sp; + + tinfo = (struct thread_info *)context; + if (tinfo->previous_esp) + return tinfo->previous_esp; + + return (unsigned long)regs; +} +EXPORT_SYMBOL_GPL(kernel_stack_pointer); + static unsigned long *pt_regs_access(struct pt_regs *regs, unsigned long regno) { BUILD_BUG_ON(offsetof(struct pt_regs, bx) != 0); diff --git a/arch/x86/kernel/reboot.c b/arch/x86/kernel/reboot.c index d4a705f..89d6877 100644 --- a/arch/x86/kernel/reboot.c +++ b/arch/x86/kernel/reboot.c @@ -452,6 +452,14 @@ static struct dmi_system_id __initdata pci_reboot_dmi_table[] = { DMI_MATCH(DMI_PRODUCT_NAME, "Latitude E6420"), }, }, + { /* Handle problems with rebooting on the Precision M6600. */ + .callback = set_pci_reboot, + .ident = "Dell OptiPlex 990", + .matches = { + DMI_MATCH(DMI_SYS_VENDOR, "Dell Inc."), + DMI_MATCH(DMI_PRODUCT_NAME, "Precision M6600"), + }, + }, { } }; diff --git a/arch/x86/kernel/setup.c b/arch/x86/kernel/setup.c index afaf384..6c4e9ff 100644 --- a/arch/x86/kernel/setup.c +++ b/arch/x86/kernel/setup.c @@ -631,6 +631,83 @@ static __init void reserve_ibft_region(void) static unsigned reserve_low = CONFIG_X86_RESERVE_LOW << 10; +static bool __init snb_gfx_workaround_needed(void) +{ +#ifdef CONFIG_PCI + int i; + u16 vendor, devid; + static const u16 snb_ids[] = { + 0x0102, + 0x0112, + 0x0122, + 0x0106, + 0x0116, + 0x0126, + 0x010a, + }; + + /* Assume no if something weird is going on with PCI */ + if (!early_pci_allowed()) + return false; + + vendor = read_pci_config_16(0, 2, 0, PCI_VENDOR_ID); + if (vendor != 0x8086) + return false; + + devid = read_pci_config_16(0, 2, 0, PCI_DEVICE_ID); + for (i = 0; i < ARRAY_SIZE(snb_ids); i++) + if (devid == snb_ids[i]) + return true; +#endif + + return false; +} + +/* + * Sandy Bridge graphics has trouble with certain ranges, exclude + * them from allocation. + */ +static void __init trim_snb_memory(void) +{ + static const unsigned long bad_pages[] = { + 0x20050000, + 0x20110000, + 0x20130000, + 0x20138000, + 0x40004000, + }; + int i; + + if (!snb_gfx_workaround_needed()) + return; + + printk(KERN_DEBUG "reserving inaccessible SNB gfx pages\n"); + + /* + * Reserve all memory below the 1 MB mark that has not + * already been reserved. + */ + memblock_reserve(0, 1<<20); + + for (i = 0; i < ARRAY_SIZE(bad_pages); i++) { + if (memblock_reserve(bad_pages[i], PAGE_SIZE)) + printk(KERN_WARNING "failed to reserve 0x%08lx\n", + bad_pages[i]); + } +} + +/* + * Here we put platform-specific memory range workarounds, i.e. + * memory known to be corrupt or otherwise in need to be reserved on + * specific platforms. + * + * If this gets used more widely it could use a real dispatch mechanism. + */ +static void __init trim_platform_memory_ranges(void) +{ + trim_snb_memory(); +} + static void __init trim_bios_range(void) { /* @@ -651,6 +728,7 @@ static void __init trim_bios_range(void) * take them out. */ e820_remove_range(BIOS_BEGIN, BIOS_END - BIOS_BEGIN, E820_RAM, 1); + sanitize_e820_map(e820.map, ARRAY_SIZE(e820.map), &e820.nr_map); } @@ -929,6 +1007,8 @@ void __init setup_arch(char **cmdline_p) setup_trampolines(); + trim_platform_memory_ranges(); + init_gbpages(); /* max_pfn_mapped is updated here */ @@ -937,8 +1017,21 @@ void __init setup_arch(char **cmdline_p) #ifdef CONFIG_X86_64 if (max_pfn > max_low_pfn) { - max_pfn_mapped = init_memory_mapping(1UL<<32, - max_pfn<<PAGE_SHIFT); + int i; + for (i = 0; i < e820.nr_map; i++) { + struct e820entry *ei = &e820.map[i]; + + if (ei->addr + ei->size <= 1UL << 32) + continue; + + if (ei->type == E820_RESERVED) + continue; + + max_pfn_mapped = init_memory_mapping( + ei->addr < 1UL << 32 ? 1UL << 32 : ei->addr, + ei->addr + ei->size); + } + /* can we preseve max_low_pfn ?*/ max_low_pfn = max_pfn; } diff --git a/arch/x86/kernel/setup_percpu.c b/arch/x86/kernel/setup_percpu.c index 71f4727..5a98aa2 100644 --- a/arch/x86/kernel/setup_percpu.c +++ b/arch/x86/kernel/setup_percpu.c @@ -185,10 +185,22 @@ void __init setup_per_cpu_areas(void) #endif rc = -EINVAL; if (pcpu_chosen_fc != PCPU_FC_PAGE) { - const size_t atom_size = cpu_has_pse ? PMD_SIZE : PAGE_SIZE; const size_t dyn_size = PERCPU_MODULE_RESERVE + PERCPU_DYNAMIC_RESERVE - PERCPU_FIRST_CHUNK_RESERVE; + size_t atom_size; + /* + * On 64bit, use PMD_SIZE for atom_size so that embedded + * percpu areas are aligned to PMD. This, in the future, + * can also allow using PMD mappings in vmalloc area. Use + * PAGE_SIZE on 32bit as vmalloc space is highly contended + * and large vmalloc area allocs can easily fail. + */ +#ifdef CONFIG_X86_64 + atom_size = PMD_SIZE; +#else + atom_size = PAGE_SIZE; +#endif rc = pcpu_embed_first_chunk(PERCPU_FIRST_CHUNK_RESERVE, dyn_size, atom_size, pcpu_cpu_distance, diff --git a/arch/x86/mm/hugetlbpage.c b/arch/x86/mm/hugetlbpage.c index f581a18..df7d12c 100644 --- a/arch/x86/mm/hugetlbpage.c +++ b/arch/x86/mm/hugetlbpage.c @@ -56,9 +56,16 @@ static int vma_shareable(struct vm_area_struct *vma, unsigned long addr) } /* - * search for a shareable pmd page for hugetlb. + * Search for a shareable pmd page for hugetlb. In any case calls pmd_alloc() + * and returns the corresponding pte. While this is not necessary for the + * !shared pmd case because we can allocate the pmd later as well, it makes the + * code much cleaner. pmd allocation is essential for the shared case because + * pud has to be populated inside the same i_mmap_mutex section - otherwise + * racing tasks could either miss the sharing (see huge_pte_offset) or select a + * bad pmd for sharing. */ -static void huge_pmd_share(struct mm_struct *mm, unsigned long addr, pud_t *pud) +static pte_t * +huge_pmd_share(struct mm_struct *mm, unsigned long addr, pud_t *pud) { struct vm_area_struct *vma = find_vma(mm, addr); struct address_space *mapping = vma->vm_file->f_mapping; @@ -68,9 +75,10 @@ static void huge_pmd_share(struct mm_struct *mm, unsigned long addr, pud_t *pud) struct vm_area_struct *svma; unsigned long saddr; pte_t *spte = NULL; + pte_t *pte; if (!vma_shareable(vma, addr)) - return; + return (pte_t *)pmd_alloc(mm, pud, addr); mutex_lock(&mapping->i_mmap_mutex); vma_prio_tree_foreach(svma, &iter, &mapping->i_mmap, idx, idx) { @@ -97,7 +105,9 @@ static void huge_pmd_share(struct mm_struct *mm, unsigned long addr, pud_t *pud) put_page(virt_to_page(spte)); spin_unlock(&mm->page_table_lock); out: + pte = (pte_t *)pmd_alloc(mm, pud, addr); mutex_unlock(&mapping->i_mmap_mutex); + return pte; } /* @@ -142,8 +152,9 @@ pte_t *huge_pte_alloc(struct mm_struct *mm, } else { BUG_ON(sz != PMD_SIZE); if (pud_none(*pud)) - huge_pmd_share(mm, addr, pud); - pte = (pte_t *) pmd_alloc(mm, pud, addr); + pte = huge_pmd_share(mm, addr, pud); + else + pte = (pte_t *)pmd_alloc(mm, pud, addr); } } BUG_ON(pte && !pte_none(*pte) && !pte_huge(*pte)); diff --git a/arch/x86/mm/init.c b/arch/x86/mm/init.c index 87488b9..c22c423 100644 --- a/arch/x86/mm/init.c +++ b/arch/x86/mm/init.c @@ -28,36 +28,50 @@ int direct_gbpages #endif ; -static void __init find_early_table_space(unsigned long end, int use_pse, - int use_gbpages) +struct map_range { + unsigned long start; + unsigned long end; + unsigned page_size_mask; +}; + +/* + * First calculate space needed for kernel direct mapping page tables to cover + * mr[0].start to mr[nr_range - 1].end, while accounting for possible 2M and 1GB + * pages. Then find enough contiguous space for those page tables. + */ +static void __init find_early_table_space(struct map_range *mr, int nr_range) { - unsigned long puds, pmds, ptes, tables, start = 0, good_end = end; + int i; + unsigned long puds = 0, pmds = 0, ptes = 0, tables; + unsigned long start = 0, good_end; phys_addr_t base; - puds = (end + PUD_SIZE - 1) >> PUD_SHIFT; - tables = roundup(puds * sizeof(pud_t), PAGE_SIZE); + for (i = 0; i < nr_range; i++) { + unsigned long range, extra; - if (use_gbpages) { - unsigned long extra; + range = mr[i].end - mr[i].start; + puds += (range + PUD_SIZE - 1) >> PUD_SHIFT; - extra = end - ((end>>PUD_SHIFT) << PUD_SHIFT); - pmds = (extra + PMD_SIZE - 1) >> PMD_SHIFT; - } else - pmds = (end + PMD_SIZE - 1) >> PMD_SHIFT; - - tables += roundup(pmds * sizeof(pmd_t), PAGE_SIZE); + if (mr[i].page_size_mask & (1 << PG_LEVEL_1G)) { + extra = range - ((range >> PUD_SHIFT) << PUD_SHIFT); + pmds += (extra + PMD_SIZE - 1) >> PMD_SHIFT; + } else { + pmds += (range + PMD_SIZE - 1) >> PMD_SHIFT; + } - if (use_pse) { - unsigned long extra; - - extra = end - ((end>>PMD_SHIFT) << PMD_SHIFT); + if (mr[i].page_size_mask & (1 << PG_LEVEL_2M)) { + extra = range - ((range >> PMD_SHIFT) << PMD_SHIFT); #ifdef CONFIG_X86_32 - extra += PMD_SIZE; + extra += PMD_SIZE; #endif - ptes = (extra + PAGE_SIZE - 1) >> PAGE_SHIFT; - } else - ptes = (end + PAGE_SIZE - 1) >> PAGE_SHIFT; + ptes += (extra + PAGE_SIZE - 1) >> PAGE_SHIFT; + } else { + ptes += (range + PAGE_SIZE - 1) >> PAGE_SHIFT; + } + } + tables = roundup(puds * sizeof(pud_t), PAGE_SIZE); + tables += roundup(pmds * sizeof(pmd_t), PAGE_SIZE); tables += roundup(ptes * sizeof(pte_t), PAGE_SIZE); #ifdef CONFIG_X86_32 @@ -74,8 +88,9 @@ static void __init find_early_table_space(unsigned long end, int use_pse, pgt_buf_end = pgt_buf_start; pgt_buf_top = pgt_buf_start + (tables >> PAGE_SHIFT); - printk(KERN_DEBUG "kernel direct mapping tables up to %lx @ %lx-%lx\n", - end, pgt_buf_start << PAGE_SHIFT, pgt_buf_top << PAGE_SHIFT); + printk(KERN_DEBUG "kernel direct mapping tables up to %#lx @ [mem %#010lx-%#010lx]\n", + mr[nr_range - 1].end - 1, pgt_buf_start << PAGE_SHIFT, + (pgt_buf_top << PAGE_SHIFT) - 1); } void __init native_pagetable_reserve(u64 start, u64 end) @@ -83,12 +98,6 @@ void __init native_pagetable_reserve(u64 start, u64 end) memblock_x86_reserve_range(start, end, "PGTABLE"); } -struct map_range { - unsigned long start; - unsigned long end; - unsigned page_size_mask; -}; - #ifdef CONFIG_X86_32 #define NR_RANGE_MR 3 #else /* CONFIG_X86_64 */ @@ -260,7 +269,7 @@ unsigned long __init_refok init_memory_mapping(unsigned long start, * nodes are discovered. */ if (!after_bootmem) - find_early_table_space(end, use_pse, use_gbpages); + find_early_table_space(mr, nr_range); for (i = 0; i < nr_range; i++) ret = kernel_physical_mapping_init(mr[i].start, mr[i].end, diff --git a/arch/x86/oprofile/nmi_int.c b/arch/x86/oprofile/nmi_int.c index 68894fd..a00c588 100644 --- a/arch/x86/oprofile/nmi_int.c +++ b/arch/x86/oprofile/nmi_int.c @@ -55,7 +55,7 @@ u64 op_x86_get_ctrl(struct op_x86_model_spec const *model, val |= counter_config->extra; event &= model->event_mask ? model->event_mask : 0xFF; val |= event & 0xFF; - val |= (event & 0x0F00) << 24; + val |= (u64)(event & 0x0F00) << 24; return val; } diff --git a/arch/x86/pci/fixup.c b/arch/x86/pci/fixup.c index 6dd8955..0951b81 100644 --- a/arch/x86/pci/fixup.c +++ b/arch/x86/pci/fixup.c @@ -521,3 +521,20 @@ static void sb600_disable_hpet_bar(struct pci_dev *dev) } } DECLARE_PCI_FIXUP_EARLY(PCI_VENDOR_ID_ATI, 0x4385, sb600_disable_hpet_bar); + +/* + * Twinhead H12Y needs us to block out a region otherwise we map devices + * there and any access kills the box. + * + * See: https://bugzilla.kernel.org/show_bug.cgi?id=10231 + * + * Match off the LPC and svid/sdid (older kernels lose the bridge subvendor) + */ +static void __devinit twinhead_reserve_killing_zone(struct pci_dev *dev) +{ + if (dev->subsystem_vendor == 0x14FF && dev->subsystem_device == 0xA003) { + pr_info("Reserving memory on Twinhead H12Y\n"); + request_mem_region(0xFFB00000, 0x100000, "twinhead"); + } +} +DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x27B9, twinhead_reserve_killing_zone); diff --git a/arch/x86/platform/efi/efi_64.c b/arch/x86/platform/efi/efi_64.c index ac3aa54..0fba86d 100644 --- a/arch/x86/platform/efi/efi_64.c +++ b/arch/x86/platform/efi/efi_64.c @@ -38,7 +38,7 @@ #include <asm/cacheflush.h> #include <asm/fixmap.h> -static pgd_t save_pgd __initdata; +static pgd_t *save_pgd __initdata; static unsigned long efi_flags __initdata; static void __init early_code_mapping_set_exec(int executable) @@ -61,12 +61,20 @@ static void __init early_code_mapping_set_exec(int executable) void __init efi_call_phys_prelog(void) { unsigned long vaddress; + int pgd; + int n_pgds; early_code_mapping_set_exec(1); local_irq_save(efi_flags); - vaddress = (unsigned long)__va(0x0UL); - save_pgd = *pgd_offset_k(0x0UL); - set_pgd(pgd_offset_k(0x0UL), *pgd_offset_k(vaddress)); + + n_pgds = DIV_ROUND_UP((max_pfn << PAGE_SHIFT), PGDIR_SIZE); + save_pgd = kmalloc(n_pgds * sizeof(pgd_t), GFP_KERNEL); + + for (pgd = 0; pgd < n_pgds; pgd++) { + save_pgd[pgd] = *pgd_offset_k(pgd * PGDIR_SIZE); + vaddress = (unsigned long)__va(pgd * PGDIR_SIZE); + set_pgd(pgd_offset_k(pgd * PGDIR_SIZE), *pgd_offset_k(vaddress)); + } __flush_tlb_all(); } @@ -75,7 +83,11 @@ void __init efi_call_phys_epilog(void) /* * After the lock is released, the original page table is restored. */ - set_pgd(pgd_offset_k(0x0UL), save_pgd); + int pgd; + int n_pgds = DIV_ROUND_UP((max_pfn << PAGE_SHIFT) , PGDIR_SIZE); + for (pgd = 0; pgd < n_pgds; pgd++) + set_pgd(pgd_offset_k(pgd * PGDIR_SIZE), save_pgd[pgd]); + kfree(save_pgd); __flush_tlb_all(); local_irq_restore(efi_flags); early_code_mapping_set_exec(0); diff --git a/arch/x86/xen/enlighten.c b/arch/x86/xen/enlighten.c index 0fb662a..9f808af 100644 --- a/arch/x86/xen/enlighten.c +++ b/arch/x86/xen/enlighten.c @@ -62,6 +62,7 @@ #include <asm/reboot.h> #include <asm/stackprotector.h> #include <asm/hypervisor.h> +#include <asm/pci_x86.h> #include "xen-ops.h" #include "mmu.h" @@ -197,6 +198,9 @@ static void __init xen_banner(void) xen_feature(XENFEAT_mmu_pt_update_preserve_ad) ? " (preserve-AD)" : ""); } +#define CPUID_THERM_POWER_LEAF 6 +#define APERFMPERF_PRESENT 0 + static __read_mostly unsigned int cpuid_leaf1_edx_mask = ~0; static __read_mostly unsigned int cpuid_leaf1_ecx_mask = ~0; @@ -217,6 +221,11 @@ static void xen_cpuid(unsigned int *ax, unsigned int *bx, maskedx = cpuid_leaf1_edx_mask; break; + case CPUID_THERM_POWER_LEAF: + /* Disabling APERFMPERF for kernel usage */ + maskecx = ~(1 << APERFMPERF_PRESENT); + break; + case 0xb: /* Suppress extended topology stuff */ maskebx = 0; @@ -794,7 +803,16 @@ static void xen_write_cr4(unsigned long cr4) native_write_cr4(cr4); } - +#ifdef CONFIG_X86_64 +static inline unsigned long xen_read_cr8(void) +{ + return 0; +} +static inline void xen_write_cr8(unsigned long val) +{ + BUG_ON(val); +} +#endif static int xen_write_msr_safe(unsigned int msr, unsigned low, unsigned high) { int ret; @@ -959,6 +977,11 @@ static const struct pv_cpu_ops xen_cpu_ops __initconst = { .read_cr4_safe = native_read_cr4_safe, .write_cr4 = xen_write_cr4, +#ifdef CONFIG_X86_64 + .read_cr8 = xen_read_cr8, + .write_cr8 = xen_write_cr8, +#endif + .wbinvd = native_wbinvd, .read_msr = native_read_msr_safe, @@ -966,6 +989,8 @@ static const struct pv_cpu_ops xen_cpu_ops __initconst = { .read_tsc = native_read_tsc, .read_pmc = native_read_pmc, + .read_tscp = native_read_tscp, + .iret = xen_iret, .irq_enable_sysexit = xen_sysexit, #ifdef CONFIG_X86_64 @@ -1259,8 +1284,10 @@ asmlinkage void __init xen_start_kernel(void) /* Make sure ACS will be enabled */ pci_request_acs(); } - - +#ifdef CONFIG_PCI + /* PCI BIOS service won't work from a PV guest. */ + pci_probe &= ~PCI_PROBE_BIOS; +#endif xen_raw_console_write("about to get started...\n"); xen_setup_runstate_info(0); diff --git a/arch/x86/xen/mmu.c b/arch/x86/xen/mmu.c index 5f76c0a..d957dce 100644 --- a/arch/x86/xen/mmu.c +++ b/arch/x86/xen/mmu.c @@ -320,8 +320,13 @@ static pteval_t pte_mfn_to_pfn(pteval_t val) { if (val & _PAGE_PRESENT) { unsigned long mfn = (val & PTE_PFN_MASK) >> PAGE_SHIFT; + unsigned long pfn = mfn_to_pfn(mfn); + pteval_t flags = val & PTE_FLAGS_MASK; - val = ((pteval_t)mfn_to_pfn(mfn) << PAGE_SHIFT) | flags; + if (unlikely(pfn == ~0)) + val = flags & ~_PAGE_PRESENT; + else + val = ((pteval_t)pfn << PAGE_SHIFT) | flags; } return val; diff --git a/arch/x86/xen/p2m.c b/arch/x86/xen/p2m.c index 58efeb9..2f7847d 100644 --- a/arch/x86/xen/p2m.c +++ b/arch/x86/xen/p2m.c @@ -683,6 +683,7 @@ int m2p_add_override(unsigned long mfn, struct page *page, bool clear_pte) unsigned long uninitialized_var(address); unsigned level; pte_t *ptep = NULL; + int ret = 0; pfn = page_to_pfn(page); if (!PageHighMem(page)) { @@ -706,6 +707,24 @@ int m2p_add_override(unsigned long mfn, struct page *page, bool clear_pte) list_add(&page->lru, &m2p_overrides[mfn_hash(mfn)]); spin_unlock_irqrestore(&m2p_override_lock, flags); + /* p2m(m2p(mfn)) == mfn: the mfn is already present somewhere in + * this domain. Set the FOREIGN_FRAME_BIT in the p2m for the other + * pfn so that the following mfn_to_pfn(mfn) calls will return the + * pfn from the m2p_override (the backend pfn) instead. + * We need to do this because the pages shared by the frontend + * (xen-blkfront) can be already locked (lock_page, called by + * do_read_cache_page); when the userspace backend tries to use them + * with direct_IO, mfn_to_pfn returns the pfn of the frontend, so + * do_blockdev_direct_IO is going to try to lock the same pages + * again resulting in a deadlock. + * As a side effect get_user_pages_fast might not be safe on the + * frontend pages while they are being shared with the backend, + * because mfn_to_pfn (that ends up being called by GUPF) will + * return the backend pfn rather than the frontend pfn. */ + ret = __get_user(pfn, &machine_to_phys_mapping[mfn]); + if (ret == 0 && get_phys_to_machine(pfn) == mfn) + set_phys_to_machine(pfn, FOREIGN_FRAME(mfn)); + return 0; } EXPORT_SYMBOL_GPL(m2p_add_override); @@ -717,6 +736,7 @@ int m2p_remove_override(struct page *page, bool clear_pte) unsigned long uninitialized_var(address); unsigned level; pte_t *ptep = NULL; + int ret = 0; pfn = page_to_pfn(page); mfn = get_phys_to_machine(pfn); @@ -743,6 +763,22 @@ int m2p_remove_override(struct page *page, bool clear_pte) /* No tlb flush necessary because the caller already * left the pte unmapped. */ + /* p2m(m2p(mfn)) == FOREIGN_FRAME(mfn): the mfn is already present + * somewhere in this domain, even before being added to the + * m2p_override (see comment above in m2p_add_override). + * If there are no other entries in the m2p_override corresponding + * to this mfn, then remove the FOREIGN_FRAME_BIT from the p2m for + * the original pfn (the one shared by the frontend): the backend + * cannot do any IO on this page anymore because it has been + * unshared. Removing the FOREIGN_FRAME_BIT from the p2m entry of + * the original pfn causes mfn_to_pfn(mfn) to return the frontend + * pfn again. */ + mfn &= ~FOREIGN_FRAME_BIT; + ret = __get_user(pfn, &machine_to_phys_mapping[mfn]); + if (ret == 0 && get_phys_to_machine(pfn) == FOREIGN_FRAME(mfn) && + m2p_find_override(mfn) == NULL) + set_phys_to_machine(pfn, mfn); + return 0; } EXPORT_SYMBOL_GPL(m2p_remove_override); diff --git a/arch/x86/xen/setup.c b/arch/x86/xen/setup.c index f8dcda4..5669564 100644 --- a/arch/x86/xen/setup.c +++ b/arch/x86/xen/setup.c @@ -15,6 +15,7 @@ #include <asm/e820.h> #include <asm/setup.h> #include <asm/acpi.h> +#include <asm/numa.h> #include <asm/xen/hypervisor.h> #include <asm/xen/hypercall.h> @@ -463,4 +464,7 @@ void __init xen_arch_setup(void) boot_option_idle_override = IDLE_HALT; fiddle_vdso(); +#ifdef CONFIG_NUMA + numa_off = 1; +#endif } |