diff options
Diffstat (limited to 'arch/arm/kernel')
51 files changed, 4706 insertions, 2384 deletions
diff --git a/arch/arm/kernel/Makefile b/arch/arm/kernel/Makefile index 16eed6a..6dccbbf 100644 --- a/arch/arm/kernel/Makefile +++ b/arch/arm/kernel/Makefile @@ -29,7 +29,7 @@ obj-$(CONFIG_MODULES) += armksyms.o module.o obj-$(CONFIG_ARTHUR) += arthur.o obj-$(CONFIG_ISA_DMA) += dma-isa.o obj-$(CONFIG_PCI) += bios32.o isa.o -obj-$(CONFIG_ARM_CPU_SUSPEND) += sleep.o suspend.o +obj-$(CONFIG_PM_SLEEP) += sleep.o obj-$(CONFIG_HAVE_SCHED_CLOCK) += sched_clock.o obj-$(CONFIG_SMP) += smp.o smp_tlb.o obj-$(CONFIG_HAVE_ARM_SCU) += smp_scu.o @@ -37,25 +37,14 @@ obj-$(CONFIG_HAVE_ARM_TWD) += smp_twd.o obj-$(CONFIG_DYNAMIC_FTRACE) += ftrace.o obj-$(CONFIG_FUNCTION_GRAPH_TRACER) += ftrace.o obj-$(CONFIG_KEXEC) += machine_kexec.o relocate_kernel.o -obj-$(CONFIG_KPROBES) += kprobes.o kprobes-common.o -ifdef CONFIG_THUMB2_KERNEL -obj-$(CONFIG_KPROBES) += kprobes-thumb.o -else -obj-$(CONFIG_KPROBES) += kprobes-arm.o -endif -obj-$(CONFIG_ARM_KPROBES_TEST) += test-kprobes.o -test-kprobes-objs := kprobes-test.o -ifdef CONFIG_THUMB2_KERNEL -test-kprobes-objs += kprobes-test-thumb.o -else -test-kprobes-objs += kprobes-test-arm.o -endif +obj-$(CONFIG_KPROBES) += kprobes.o kprobes-decode.o obj-$(CONFIG_ATAGS_PROC) += atags.o obj-$(CONFIG_OABI_COMPAT) += sys_oabi-compat.o obj-$(CONFIG_ARM_THUMBEE) += thumbee.o obj-$(CONFIG_KGDB) += kgdb.o obj-$(CONFIG_ARM_UNWIND) += unwind.o obj-$(CONFIG_HAVE_TCM) += tcm.o +obj-$(CONFIG_HIBERNATION) += hibernate.o hibernate_asm.o obj-$(CONFIG_OF) += devtree.o obj-$(CONFIG_CRASH_DUMP) += crash_dump.o obj-$(CONFIG_SWP_EMULATE) += swp_emulate.o @@ -73,7 +62,6 @@ obj-$(CONFIG_IWMMXT) += iwmmxt.o obj-$(CONFIG_CPU_HAS_PMU) += pmu.o obj-$(CONFIG_HW_PERF_EVENTS) += perf_event.o AFLAGS_iwmmxt.o := -Wa,-mcpu=iwmmxt -obj-$(CONFIG_ARM_CPU_TOPOLOGY) += topology.o ifneq ($(CONFIG_ARCH_EBSA110),y) obj-y += io.o diff --git a/arch/arm/kernel/armksyms.c b/arch/arm/kernel/armksyms.c index 5b0bce6..acca35a 100644 --- a/arch/arm/kernel/armksyms.c +++ b/arch/arm/kernel/armksyms.c @@ -7,7 +7,7 @@ * it under the terms of the GNU General Public License version 2 as * published by the Free Software Foundation. */ -#include <linux/export.h> +#include <linux/module.h> #include <linux/sched.h> #include <linux/string.h> #include <linux/cryptohash.h> @@ -49,6 +49,9 @@ extern void __aeabi_ulcmp(void); extern void fpundefinstr(void); + +EXPORT_SYMBOL(__backtrace); + /* platform dependent support */ EXPORT_SYMBOL(__udelay); EXPORT_SYMBOL(__const_udelay); @@ -109,6 +112,9 @@ EXPORT_SYMBOL(__put_user_4); EXPORT_SYMBOL(__put_user_8); #endif + /* crypto hash */ +EXPORT_SYMBOL(sha_transform); + /* gcc lib functions */ EXPORT_SYMBOL(__ashldi3); EXPORT_SYMBOL(__ashrdi3); diff --git a/arch/arm/kernel/asm-offsets.c b/arch/arm/kernel/asm-offsets.c index 1429d89..16baba2 100644 --- a/arch/arm/kernel/asm-offsets.c +++ b/arch/arm/kernel/asm-offsets.c @@ -20,7 +20,6 @@ #include <asm/thread_info.h> #include <asm/memory.h> #include <asm/procinfo.h> -#include <asm/hardware/cache-l2x0.h> #include <linux/kbuild.h> /* @@ -93,17 +92,6 @@ int main(void) DEFINE(S_OLD_R0, offsetof(struct pt_regs, ARM_ORIG_r0)); DEFINE(S_FRAME_SIZE, sizeof(struct pt_regs)); BLANK(); -#ifdef CONFIG_CACHE_L2X0 - DEFINE(L2X0_R_PHY_BASE, offsetof(struct l2x0_regs, phy_base)); - DEFINE(L2X0_R_AUX_CTRL, offsetof(struct l2x0_regs, aux_ctrl)); - DEFINE(L2X0_R_TAG_LATENCY, offsetof(struct l2x0_regs, tag_latency)); - DEFINE(L2X0_R_DATA_LATENCY, offsetof(struct l2x0_regs, data_latency)); - DEFINE(L2X0_R_FILTER_START, offsetof(struct l2x0_regs, filter_start)); - DEFINE(L2X0_R_FILTER_END, offsetof(struct l2x0_regs, filter_end)); - DEFINE(L2X0_R_PREFETCH_CTRL, offsetof(struct l2x0_regs, prefetch_ctrl)); - DEFINE(L2X0_R_PWR_CTRL, offsetof(struct l2x0_regs, pwr_ctrl)); - BLANK(); -#endif #ifdef CONFIG_CPU_HAS_ASID DEFINE(MM_CONTEXT_ID, offsetof(struct mm_struct, context.id)); BLANK(); diff --git a/arch/arm/kernel/bios32.c b/arch/arm/kernel/bios32.c index b530e91..e4ee050 100644 --- a/arch/arm/kernel/bios32.c +++ b/arch/arm/kernel/bios32.c @@ -5,7 +5,7 @@ * * Bits taken from various places. */ -#include <linux/export.h> +#include <linux/module.h> #include <linux/kernel.h> #include <linux/pci.h> #include <linux/slab.h> @@ -412,9 +412,6 @@ void pcibios_fixup_bus(struct pci_bus *bus) printk(KERN_INFO "PCI: bus%d: Fast back to back transfers %sabled\n", bus->number, (features & PCI_COMMAND_FAST_BACK) ? "en" : "dis"); } -#ifdef CONFIG_HOTPLUG -EXPORT_SYMBOL(pcibios_fixup_bus); -#endif /* * Convert from Linux-centric to bus-centric addresses for bridge devices. @@ -434,7 +431,6 @@ pcibios_resource_to_bus(struct pci_dev *dev, struct pci_bus_region *region, region->start = res->start - offset; region->end = res->end - offset; } -EXPORT_SYMBOL(pcibios_resource_to_bus); void __devinit pcibios_bus_to_resource(struct pci_dev *dev, struct resource *res, @@ -451,7 +447,12 @@ pcibios_bus_to_resource(struct pci_dev *dev, struct resource *res, res->start = region->start + offset; res->end = region->end + offset; } + +#ifdef CONFIG_HOTPLUG +EXPORT_SYMBOL(pcibios_fixup_bus); +EXPORT_SYMBOL(pcibios_resource_to_bus); EXPORT_SYMBOL(pcibios_bus_to_resource); +#endif /* * Swizzle the device pin each time we cross a bridge. @@ -475,7 +476,7 @@ static u8 __devinit pcibios_swizzle(struct pci_dev *dev, u8 *pin) /* * Map a slot/pin to an IRQ. */ -static int pcibios_map_irq(const struct pci_dev *dev, u8 slot, u8 pin) +static int pcibios_map_irq(struct pci_dev *dev, u8 slot, u8 pin) { struct pci_sys_data *sys = dev->sysdata; int irq = -1; diff --git a/arch/arm/kernel/calls.S b/arch/arm/kernel/calls.S index 463ff4a..80f7896 100644 --- a/arch/arm/kernel/calls.S +++ b/arch/arm/kernel/calls.S @@ -178,7 +178,7 @@ CALL(sys_ni_syscall) /* vm86 */ CALL(sys_ni_syscall) /* was sys_query_module */ CALL(sys_poll) - CALL(sys_ni_syscall) /* was nfsservctl */ + CALL(sys_nfsservctl) /* 170 */ CALL(sys_setresgid16) CALL(sys_getresgid16) CALL(sys_prctl) @@ -385,8 +385,6 @@ CALL(sys_syncfs) CALL(sys_sendmmsg) /* 375 */ CALL(sys_setns) - CALL(sys_process_vm_readv) - CALL(sys_process_vm_writev) #ifndef syscalls_counted .equ syscalls_padding, ((NR_syscalls + 3) & ~3) - NR_syscalls #define syscalls_counted diff --git a/arch/arm/kernel/crash_dump.c b/arch/arm/kernel/crash_dump.c index 5d1286d..90c50d4 100644 --- a/arch/arm/kernel/crash_dump.c +++ b/arch/arm/kernel/crash_dump.c @@ -39,7 +39,7 @@ ssize_t copy_oldmem_page(unsigned long pfn, char *buf, if (!csize) return 0; - vaddr = ioremap(__pfn_to_phys(pfn), PAGE_SIZE); + vaddr = ioremap(pfn << PAGE_SHIFT, PAGE_SIZE); if (!vaddr) return -ENOMEM; diff --git a/arch/arm/kernel/debug.S b/arch/arm/kernel/debug.S index 204e216..bcd66e0 100644 --- a/arch/arm/kernel/debug.S +++ b/arch/arm/kernel/debug.S @@ -22,7 +22,7 @@ #if defined(CONFIG_DEBUG_ICEDCC) @@ debug using ARM EmbeddedICE DCC channel - .macro addruart, rp, rv, tmp + .macro addruart, rp, rv .endm #if defined(CONFIG_CPU_V6) || defined(CONFIG_CPU_V6K) || defined(CONFIG_CPU_V7) @@ -106,7 +106,7 @@ #ifdef CONFIG_MMU .macro addruart_current, rx, tmp1, tmp2 - addruart \tmp1, \tmp2, \rx + addruart \tmp1, \tmp2 mrc p15, 0, \rx, c1, c0 tst \rx, #1 moveq \rx, \tmp1 @@ -151,8 +151,6 @@ printhex: adr r2, hexbuf b printascii ENDPROC(printhex2) -hexbuf: .space 16 - .ltorg ENTRY(printascii) @@ -177,3 +175,5 @@ ENTRY(printch) mov r0, #0 b 1b ENDPROC(printch) + +hexbuf: .space 16 diff --git a/arch/arm/kernel/devtree.c b/arch/arm/kernel/devtree.c index bee7f9d..0cdd7b4 100644 --- a/arch/arm/kernel/devtree.c +++ b/arch/arm/kernel/devtree.c @@ -9,7 +9,7 @@ */ #include <linux/init.h> -#include <linux/export.h> +#include <linux/module.h> #include <linux/errno.h> #include <linux/types.h> #include <linux/bootmem.h> @@ -132,3 +132,17 @@ struct machine_desc * __init setup_machine_fdt(unsigned int dt_phys) return mdesc_best; } + +/** + * irq_create_of_mapping - Hook to resolve OF irq specifier into a Linux irq# + * + * Currently the mapping mechanism is trivial; simple flat hwirq numbers are + * mapped 1:1 onto Linux irq numbers. Cascaded irq controllers are not + * supported. + */ +unsigned int irq_create_of_mapping(struct device_node *controller, + const u32 *intspec, unsigned int intsize) +{ + return intspec[0]; +} +EXPORT_SYMBOL_GPL(irq_create_of_mapping); diff --git a/arch/arm/kernel/dma.c b/arch/arm/kernel/dma.c index 7b829d9..2c4a185 100644 --- a/arch/arm/kernel/dma.c +++ b/arch/arm/kernel/dma.c @@ -23,7 +23,7 @@ #include <asm/mach/dma.h> -DEFINE_RAW_SPINLOCK(dma_spin_lock); +DEFINE_SPINLOCK(dma_spin_lock); EXPORT_SYMBOL(dma_spin_lock); static dma_t *dma_chan[MAX_DMA_CHANNELS]; diff --git a/arch/arm/kernel/ecard.c b/arch/arm/kernel/ecard.c index 4dd0eda..d165001 100644 --- a/arch/arm/kernel/ecard.c +++ b/arch/arm/kernel/ecard.c @@ -237,7 +237,7 @@ static void ecard_init_pgtables(struct mm_struct *mm) memcpy(dst_pgd, src_pgd, sizeof(pgd_t) * (IO_SIZE / PGDIR_SIZE)); - src_pgd = pgd_offset(mm, (unsigned long)EASI_BASE); + src_pgd = pgd_offset(mm, EASI_BASE); dst_pgd = pgd_offset(mm, EASI_START); memcpy(dst_pgd, src_pgd, sizeof(pgd_t) * (EASI_SIZE / PGDIR_SIZE)); @@ -674,37 +674,44 @@ static int __init ecard_probeirqhw(void) #define ecard_probeirqhw() (0) #endif -static void __iomem *__ecard_address(ecard_t *ec, card_type_t type, card_speed_t speed) +#ifndef IO_EC_MEMC8_BASE +#define IO_EC_MEMC8_BASE 0 +#endif + +static unsigned int __ecard_address(ecard_t *ec, card_type_t type, card_speed_t speed) { - void __iomem *address = NULL; + unsigned long address = 0; int slot = ec->slot_no; if (ec->slot_no == 8) - return ECARD_MEMC8_BASE; + return IO_EC_MEMC8_BASE; ectcr &= ~(1 << slot); switch (type) { case ECARD_MEMC: if (slot < 4) - address = ECARD_MEMC_BASE + (slot << 14); + address = IO_EC_MEMC_BASE + (slot << 12); break; case ECARD_IOC: if (slot < 4) - address = ECARD_IOC_BASE + (slot << 14); + address = IO_EC_IOC_BASE + (slot << 12); +#ifdef IO_EC_IOC4_BASE else - address = ECARD_IOC4_BASE + ((slot - 4) << 14); + address = IO_EC_IOC4_BASE + ((slot - 4) << 12); +#endif if (address) - address += speed << 19; + address += speed << 17; break; +#ifdef IO_EC_EASI_BASE case ECARD_EASI: - address = ECARD_EASI_BASE + (slot << 24); + address = IO_EC_EASI_BASE + (slot << 22); if (speed == ECARD_FAST) ectcr |= 1 << slot; break; - +#endif default: break; } @@ -983,7 +990,6 @@ ecard_probe(int slot, card_type_t type) ecard_t **ecp; ecard_t *ec; struct ex_ecid cid; - void __iomem *addr; int i, rc; ec = ecard_alloc_card(type, slot); @@ -993,7 +999,7 @@ ecard_probe(int slot, card_type_t type) } rc = -ENODEV; - if ((addr = __ecard_address(ec, type, ECARD_SYNC)) == NULL) + if ((ec->podaddr = __ecard_address(ec, type, ECARD_SYNC)) == 0) goto nodev; cid.r_zero = 1; @@ -1013,7 +1019,7 @@ ecard_probe(int slot, card_type_t type) ec->cid.fiqmask = cid.r_fiqmask; ec->cid.fiqoff = ecard_gets24(cid.r_fiqoff); ec->fiqaddr = - ec->irqaddr = addr; + ec->irqaddr = (void __iomem *)ioaddr(ec->podaddr); if (ec->cid.is) { ec->irqmask = ec->cid.irqmask; @@ -1042,8 +1048,10 @@ ecard_probe(int slot, card_type_t type) set_irq_flags(ec->irq, IRQF_VALID); } +#ifdef IO_EC_MEMC8_BASE if (slot == 8) ec->irq = 11; +#endif #ifdef CONFIG_ARCH_RPC /* On RiscPC, only first two slots have DMA capability */ if (slot < 2) @@ -1089,7 +1097,9 @@ static int __init ecard_init(void) ecard_probe(slot, ECARD_IOC); } +#ifdef IO_EC_MEMC8_BASE ecard_probe(8, ECARD_IOC); +#endif irqhw = ecard_probeirqhw(); diff --git a/arch/arm/kernel/elf.c b/arch/arm/kernel/elf.c index ddba41d..9b05c6a 100644 --- a/arch/arm/kernel/elf.c +++ b/arch/arm/kernel/elf.c @@ -1,4 +1,4 @@ -#include <linux/export.h> +#include <linux/module.h> #include <linux/sched.h> #include <linux/personality.h> #include <linux/binfmts.h> diff --git a/arch/arm/kernel/entry-armv.S b/arch/arm/kernel/entry-armv.S index ece0996..2cd0076 100644 --- a/arch/arm/kernel/entry-armv.S +++ b/arch/arm/kernel/entry-armv.S @@ -24,59 +24,26 @@ #include <asm/unwind.h> #include <asm/unistd.h> #include <asm/tls.h> -#include <asm/system.h> #include "entry-header.S" #include <asm/entry-macro-multi.S> /* - * Interrupt handling. + * Interrupt handling. Preserves r7, r8, r9 */ .macro irq_handler #ifdef CONFIG_MULTI_IRQ_HANDLER - ldr r1, =handle_arch_irq + ldr r5, =handle_arch_irq mov r0, sp - ldr r1, [r1] + ldr r5, [r5] adr lr, BSYM(9997f) - teq r1, #0 - movne pc, r1 + teq r5, #0 + movne pc, r5 #endif arch_irq_handler_default 9997: .endm - .macro pabt_helper - @ PABORT handler takes pt_regs in r2, fault address in r4 and psr in r5 -#ifdef MULTI_PABORT - ldr ip, .LCprocfns - mov lr, pc - ldr pc, [ip, #PROCESSOR_PABT_FUNC] -#else - bl CPU_PABORT_HANDLER -#endif - .endm - - .macro dabt_helper - - @ - @ Call the processor-specific abort handler: - @ - @ r2 - pt_regs - @ r4 - aborted context pc - @ r5 - aborted context psr - @ - @ The abort handler must return the aborted address in r0, and - @ the fault status register in r1. r9 must be preserved. - @ -#ifdef MULTI_DABORT - ldr ip, .LCprocfns - mov lr, pc - ldr pc, [ip, #PROCESSOR_DABT_FUNC] -#else - bl CPU_DABORT_HANDLER -#endif - .endm - #ifdef CONFIG_KPROBES .section .kprobes.text,"ax",%progbits #else @@ -159,74 +126,106 @@ ENDPROC(__und_invalid) SPFIX( subeq sp, sp, #4 ) stmia sp, {r1 - r12} - ldmia r0, {r3 - r5} - add r7, sp, #S_SP - 4 @ here for interlock avoidance - mov r6, #-1 @ "" "" "" "" - add r2, sp, #(S_FRAME_SIZE + \stack_hole - 4) - SPFIX( addeq r2, r2, #4 ) - str r3, [sp, #-4]! @ save the "real" r0 copied + ldmia r0, {r1 - r3} + add r5, sp, #S_SP - 4 @ here for interlock avoidance + mov r4, #-1 @ "" "" "" "" + add r0, sp, #(S_FRAME_SIZE + \stack_hole - 4) + SPFIX( addeq r0, r0, #4 ) + str r1, [sp, #-4]! @ save the "real" r0 copied @ from the exception stack - mov r3, lr + mov r1, lr @ @ We are now ready to fill in the remaining blanks on the stack: @ - @ r2 - sp_svc - @ r3 - lr_svc - @ r4 - lr_<exception>, already fixed up for correct return/restart - @ r5 - spsr_<exception> - @ r6 - orig_r0 (see pt_regs definition in ptrace.h) + @ r0 - sp_svc + @ r1 - lr_svc + @ r2 - lr_<exception>, already fixed up for correct return/restart + @ r3 - spsr_<exception> + @ r4 - orig_r0 (see pt_regs definition in ptrace.h) @ - stmia r7, {r2 - r6} - -#ifdef CONFIG_TRACE_IRQFLAGS - bl trace_hardirqs_off -#endif + stmia r5, {r0 - r4} .endm .align 5 __dabt_svc: svc_entry + + @ + @ get ready to re-enable interrupts if appropriate + @ + mrs r9, cpsr + tst r3, #PSR_I_BIT + biceq r9, r9, #PSR_I_BIT + + @ + @ Call the processor-specific abort handler: + @ + @ r2 - aborted context pc + @ r3 - aborted context cpsr + @ + @ The abort handler must return the aborted address in r0, and + @ the fault status register in r1. r9 must be preserved. + @ +#ifdef MULTI_DABORT + ldr r4, .LCprocfns + mov lr, pc + ldr pc, [r4, #PROCESSOR_DABT_FUNC] +#else + bl CPU_DABORT_HANDLER +#endif + + @ + @ set desired IRQ state, then call main handler + @ + debug_entry r1 + msr cpsr_c, r9 mov r2, sp - dabt_helper + bl do_DataAbort @ @ IRQs off again before pulling preserved data off the stack @ disable_irq_notrace -#ifdef CONFIG_TRACE_IRQFLAGS - tst r5, #PSR_I_BIT - bleq trace_hardirqs_on - tst r5, #PSR_I_BIT - blne trace_hardirqs_off -#endif - svc_exit r5 @ return from exception + @ + @ restore SPSR and restart the instruction + @ + ldr r2, [sp, #S_PSR] + svc_exit r2 @ return from exception UNWIND(.fnend ) ENDPROC(__dabt_svc) .align 5 __irq_svc: svc_entry - irq_handler +#ifdef CONFIG_TRACE_IRQFLAGS + bl trace_hardirqs_off +#endif #ifdef CONFIG_PREEMPT get_thread_info tsk ldr r8, [tsk, #TI_PREEMPT] @ get preempt count + add r7, r8, #1 @ increment it + str r7, [tsk, #TI_PREEMPT] +#endif + + irq_handler +#ifdef CONFIG_PREEMPT + str r8, [tsk, #TI_PREEMPT] @ restore preempt count ldr r0, [tsk, #TI_FLAGS] @ get flags teq r8, #0 @ if preempt count != 0 movne r0, #0 @ force flags to 0 tst r0, #_TIF_NEED_RESCHED blne svc_preempt #endif - + ldr r4, [sp, #S_PSR] @ irqs are already disabled #ifdef CONFIG_TRACE_IRQFLAGS - @ The parent context IRQs must have been enabled to get here in - @ the first place, so there's no point checking the PSR I bit. - bl trace_hardirqs_on + tst r4, #PSR_I_BIT + bleq trace_hardirqs_on #endif - svc_exit r5 @ return from exception + svc_exit r4 @ return from exception UNWIND(.fnend ) ENDPROC(__irq_svc) @@ -242,19 +241,6 @@ svc_preempt: b 1b #endif -__und_fault: - @ Correct the PC such that it is pointing at the instruction - @ which caused the fault. If the faulting instruction was ARM - @ the PC will be pointing at the next instruction, and have to - @ subtract 4. Otherwise, it is Thumb, and the PC will be - @ pointing at the second half of the Thumb instruction. We - @ have to subtract 2. - ldr r2, [r0, #S_PC] - sub r2, r2, r1 - str r2, [r0, #S_PC] - b do_undefinstr -ENDPROC(__und_fault) - .align 5 __und_svc: #ifdef CONFIG_KPROBES @@ -265,6 +251,7 @@ __und_svc: #else svc_entry #endif + @ @ call emulation code, which returns using r9 if it has emulated @ the instruction, or the more conventional lr if we are to treat @@ -272,65 +259,68 @@ __und_svc: @ @ r0 - instruction @ -#ifndef CONFIG_THUMB2_KERNEL - ldr r0, [r4, #-4] +#ifndef CONFIG_THUMB2_KERNEL + ldr r0, [r2, #-4] #else - mov r1, #2 - ldrh r0, [r4, #-2] @ Thumb instruction at LR - 2 - cmp r0, #0xe800 @ 32-bit instruction if xx >= 0 - blo __und_svc_fault - ldrh r9, [r4] @ bottom 16 bits - add r4, r4, #2 - str r4, [sp, #S_PC] - orr r0, r9, r0, lsl #16 + ldrh r0, [r2, #-2] @ Thumb instruction at LR - 2 + and r9, r0, #0xf800 + cmp r9, #0xe800 @ 32-bit instruction if xx >= 0 + ldrhhs r9, [r2] @ bottom 16 bits + orrhs r0, r9, r0, lsl #16 #endif - adr r9, BSYM(__und_svc_finish) - mov r2, r4 + adr r9, BSYM(1f) bl call_fpe - mov r1, #4 @ PC correction to apply -__und_svc_fault: mov r0, sp @ struct pt_regs *regs - bl __und_fault + bl do_undefinstr @ @ IRQs off again before pulling preserved data off the stack @ -__und_svc_finish: - disable_irq_notrace +1: disable_irq_notrace @ @ restore SPSR and restart the instruction @ - ldr r5, [sp, #S_PSR] @ Get SVC cpsr -#ifdef CONFIG_TRACE_IRQFLAGS - tst r5, #PSR_I_BIT - bleq trace_hardirqs_on - tst r5, #PSR_I_BIT - blne trace_hardirqs_off -#endif - svc_exit r5 @ return from exception + ldr r2, [sp, #S_PSR] @ Get SVC cpsr + svc_exit r2 @ return from exception UNWIND(.fnend ) ENDPROC(__und_svc) .align 5 __pabt_svc: svc_entry + + @ + @ re-enable interrupts if appropriate + @ + mrs r9, cpsr + tst r3, #PSR_I_BIT + biceq r9, r9, #PSR_I_BIT + + mov r0, r2 @ pass address of aborted instruction. +#ifdef MULTI_PABORT + ldr r4, .LCprocfns + mov lr, pc + ldr pc, [r4, #PROCESSOR_PABT_FUNC] +#else + bl CPU_PABORT_HANDLER +#endif + debug_entry r1 + msr cpsr_c, r9 @ Maybe enable interrupts mov r2, sp @ regs - pabt_helper + bl do_PrefetchAbort @ call abort handler @ @ IRQs off again before pulling preserved data off the stack @ disable_irq_notrace -#ifdef CONFIG_TRACE_IRQFLAGS - tst r5, #PSR_I_BIT - bleq trace_hardirqs_on - tst r5, #PSR_I_BIT - blne trace_hardirqs_off -#endif - svc_exit r5 @ return from exception + @ + @ restore SPSR and restart the instruction + @ + ldr r2, [sp, #S_PSR] + svc_exit r2 @ return from exception UNWIND(.fnend ) ENDPROC(__pabt_svc) @@ -361,23 +351,23 @@ ENDPROC(__pabt_svc) ARM( stmib sp, {r1 - r12} ) THUMB( stmia sp, {r0 - r12} ) - ldmia r0, {r3 - r5} + ldmia r0, {r1 - r3} add r0, sp, #S_PC @ here for interlock avoidance - mov r6, #-1 @ "" "" "" "" + mov r4, #-1 @ "" "" "" "" - str r3, [sp] @ save the "real" r0 copied + str r1, [sp] @ save the "real" r0 copied @ from the exception stack @ @ We are now ready to fill in the remaining blanks on the stack: @ - @ r4 - lr_<exception>, already fixed up for correct return/restart - @ r5 - spsr_<exception> - @ r6 - orig_r0 (see pt_regs definition in ptrace.h) + @ r2 - lr_<exception>, already fixed up for correct return/restart + @ r3 - spsr_<exception> + @ r4 - orig_r0 (see pt_regs definition in ptrace.h) @ @ Also, separately save sp_usr and lr_usr @ - stmia r0, {r4 - r6} + stmia r0, {r2 - r4} ARM( stmdb r0, {sp, lr}^ ) THUMB( store_user_sp_lr r0, r1, S_SP - S_PC ) @@ -390,14 +380,10 @@ ENDPROC(__pabt_svc) @ Clear FP to mark the first stack frame @ zero_fp - -#ifdef CONFIG_IRQSOFF_TRACER - bl trace_hardirqs_off -#endif .endm .macro kuser_cmpxchg_check -#if !defined(CONFIG_CPU_32v6K) && !defined(CONFIG_NEEDS_SYSCALL_FOR_CMPXCHG) +#if __LINUX_ARM_ARCH__ < 6 && !defined(CONFIG_NEEDS_SYSCALL_FOR_CMPXCHG) #ifndef CONFIG_MMU #warning "NPTL on non MMU needs fixing" #else @@ -405,8 +391,8 @@ ENDPROC(__pabt_svc) @ if it was interrupted in a critical region. Here we @ perform a quick test inline since it should be false @ 99.9999% of the time. The rest is done out of line. - cmp r4, #TASK_SIZE - blhs kuser_cmpxchg64_fixup + cmp r2, #TASK_SIZE + blhs kuser_cmpxchg_fixup #endif #endif .endm @@ -415,9 +401,32 @@ ENDPROC(__pabt_svc) __dabt_usr: usr_entry kuser_cmpxchg_check + + @ + @ Call the processor-specific abort handler: + @ + @ r2 - aborted context pc + @ r3 - aborted context cpsr + @ + @ The abort handler must return the aborted address in r0, and + @ the fault status register in r1. + @ +#ifdef MULTI_DABORT + ldr r4, .LCprocfns + mov lr, pc + ldr pc, [r4, #PROCESSOR_DABT_FUNC] +#else + bl CPU_DABORT_HANDLER +#endif + + @ + @ IRQs on, then call the main handler + @ + debug_entry r1 + enable_irq mov r2, sp - dabt_helper - b ret_from_exception + adr lr, BSYM(ret_from_exception) + b do_DataAbort UNWIND(.fnend ) ENDPROC(__dabt_usr) @@ -425,8 +434,28 @@ ENDPROC(__dabt_usr) __irq_usr: usr_entry kuser_cmpxchg_check - irq_handler + +#ifdef CONFIG_IRQSOFF_TRACER + bl trace_hardirqs_off +#endif + get_thread_info tsk +#ifdef CONFIG_PREEMPT + ldr r8, [tsk, #TI_PREEMPT] @ get preempt count + add r7, r8, #1 @ increment it + str r7, [tsk, #TI_PREEMPT] +#endif + + irq_handler +#ifdef CONFIG_PREEMPT + ldr r0, [tsk, #TI_PREEMPT] + str r8, [tsk, #TI_PREEMPT] + teq r0, r7 + ARM( strne r0, [r0, -r0] ) + THUMB( movne r0, #0 ) + THUMB( strne r0, [r0] ) +#endif + mov why, #0 b ret_to_user_from_irq UNWIND(.fnend ) @@ -438,93 +467,55 @@ ENDPROC(__irq_usr) __und_usr: usr_entry - mov r2, r4 - mov r3, r5 - - @ r2 = regs->ARM_pc, which is either 2 or 4 bytes ahead of the - @ faulting instruction depending on Thumb mode. - @ r3 = regs->ARM_cpsr @ - @ The emulation code returns using r9 if it has emulated the - @ instruction, or the more conventional lr if we are to treat - @ this as a real undefined instruction + @ fall through to the emulation code, which returns using r9 if + @ it has emulated the instruction, or the more conventional lr + @ if we are to treat this as a real undefined instruction + @ + @ r0 - instruction @ adr r9, BSYM(ret_from_exception) - + adr lr, BSYM(__und_usr_unknown) tst r3, #PSR_T_BIT @ Thumb mode? - bne __und_usr_thumb - sub r4, r2, #4 @ ARM instr at LR - 4 -1: ldrt r0, [r4] + itet eq @ explicit IT needed for the 1f label + subeq r4, r2, #4 @ ARM instr at LR - 4 + subne r4, r2, #2 @ Thumb instr at LR - 2 +1: ldreqt r0, [r4] #ifdef CONFIG_CPU_ENDIAN_BE8 - rev r0, r0 @ little endian instruction + reveq r0, r0 @ little endian instruction #endif - @ r0 = 32-bit ARM instruction which caused the exception - @ r2 = PC value for the following instruction (:= regs->ARM_pc) - @ r4 = PC value for the faulting instruction - @ lr = 32-bit undefined instruction function - adr lr, BSYM(__und_usr_fault_32) - b call_fpe - -__und_usr_thumb: + beq call_fpe @ Thumb instruction - sub r4, r2, #2 @ First half of thumb instr at LR - 2 -#if CONFIG_ARM_THUMB && __LINUX_ARM_ARCH__ >= 6 && CONFIG_CPU_V7 -/* - * Thumb-2 instruction handling. Note that because pre-v6 and >= v6 platforms - * can never be supported in a single kernel, this code is not applicable at - * all when __LINUX_ARM_ARCH__ < 6. This allows simplifying assumptions to be - * made about .arch directives. - */ -#if __LINUX_ARM_ARCH__ < 7 -/* If the target CPU may not be Thumb-2-capable, a run-time check is needed: */ -#define NEED_CPU_ARCHITECTURE - ldr r5, .LCcpu_architecture - ldr r5, [r5] - cmp r5, #CPU_ARCH_ARMv7 - blo __und_usr_fault_16 @ 16bit undefined instruction -/* - * The following code won't get run unless the running CPU really is v7, so - * coding round the lack of ldrht on older arches is pointless. Temporarily - * override the assembler target arch with the minimum required instead: - */ - .arch armv6t2 -#endif -2: ldrht r5, [r4] - cmp r5, #0xe800 @ 32bit instruction if xx != 0 - blo __und_usr_fault_16 @ 16bit undefined instruction -3: ldrht r0, [r2] +#if __LINUX_ARM_ARCH__ >= 7 +2: + ARM( ldrht r5, [r4], #2 ) + THUMB( ldrht r5, [r4] ) + THUMB( add r4, r4, #2 ) + and r0, r5, #0xf800 @ mask bits 111x x... .... .... + cmp r0, #0xe800 @ 32bit instruction if xx != 0 + blo __und_usr_unknown +3: ldrht r0, [r4] add r2, r2, #2 @ r2 is PC + 2, make it PC + 4 - str r2, [sp, #S_PC] @ it's a 2x16bit instr, update - orr r0, r0, r5, lsl #16 - adr lr, BSYM(__und_usr_fault_32) - @ r0 = the two 16-bit Thumb instructions which caused the exception - @ r2 = PC value for the following Thumb instruction (:= regs->ARM_pc) - @ r4 = PC value for the first 16-bit Thumb instruction - @ lr = 32bit undefined instruction function - -#if __LINUX_ARM_ARCH__ < 7 -/* If the target arch was overridden, change it back: */ -#ifdef CONFIG_CPU_32v6K - .arch armv6k + orr r0, r0, r5, lsl #16 #else - .arch armv6 -#endif -#endif /* __LINUX_ARM_ARCH__ < 7 */ -#else /* !(CONFIG_ARM_THUMB && __LINUX_ARM_ARCH__ >= 6 && CONFIG_CPU_V7) */ - b __und_usr_fault_16 + b __und_usr_unknown #endif - UNWIND(.fnend) + UNWIND(.fnend ) ENDPROC(__und_usr) + @ + @ fallthrough to call_fpe + @ + /* - * The out of line fixup for the ldrt instructions above. + * The out of line fixup for the ldrt above. */ .pushsection .fixup, "ax" 4: mov pc, r9 .popsection .pushsection __ex_table,"a" .long 1b, 4b -#if CONFIG_ARM_THUMB && __LINUX_ARM_ARCH__ >= 6 && CONFIG_CPU_V7 +#if __LINUX_ARM_ARCH__ >= 7 .long 2b, 4b .long 3b, 4b #endif @@ -548,12 +539,11 @@ ENDPROC(__und_usr) * NEON handler code. * * Emulators may wish to make use of the following registers: - * r0 = instruction opcode (32-bit ARM or two 16-bit Thumb) - * r2 = PC value to resume execution after successful emulation + * r0 = instruction opcode. + * r2 = PC+4 * r9 = normal "successful" return address - * r10 = this threads thread_info structure + * r10 = this threads thread_info structure. * lr = unrecognised instruction return address - * IRQs disabled, FIQs enabled. */ @ @ Fall-through from Thumb-2 __und_usr @@ -634,12 +624,6 @@ call_fpe: movw_pc lr @ CP#14 (Debug) movw_pc lr @ CP#15 (Control) -#ifdef NEED_CPU_ARCHITECTURE - .align 2 -.LCcpu_architecture: - .word __cpu_architecture -#endif - #ifdef CONFIG_NEON .align 6 @@ -688,23 +672,29 @@ ENTRY(no_fp) mov pc, lr ENDPROC(no_fp) -__und_usr_fault_32: - mov r1, #4 - b 1f -__und_usr_fault_16: - mov r1, #2 -1: enable_irq +__und_usr_unknown: + enable_irq mov r0, sp adr lr, BSYM(ret_from_exception) - b __und_fault -ENDPROC(__und_usr_fault_32) -ENDPROC(__und_usr_fault_16) + b do_undefinstr +ENDPROC(__und_usr_unknown) .align 5 __pabt_usr: usr_entry + + mov r0, r2 @ pass address of aborted instruction. +#ifdef MULTI_PABORT + ldr r4, .LCprocfns + mov lr, pc + ldr pc, [r4, #PROCESSOR_PABT_FUNC] +#else + bl CPU_PABORT_HANDLER +#endif + debug_entry r1 + enable_irq @ Enable interrupts mov r2, sp @ regs - pabt_helper + bl do_PrefetchAbort @ call abort handler UNWIND(.fnend ) /* fall through */ /* @@ -768,12 +758,31 @@ ENDPROC(__switch_to) /* * User helpers. * + * These are segment of kernel provided user code reachable from user space + * at a fixed address in kernel memory. This is used to provide user space + * with some operations which require kernel help because of unimplemented + * native feature and/or instructions in many ARM CPUs. The idea is for + * this code to be executed directly in user mode for best efficiency but + * which is too intimate with the kernel counter part to be left to user + * libraries. In fact this code might even differ from one CPU to another + * depending on the available instruction set and restrictions like on + * SMP systems. In other words, the kernel reserves the right to change + * this code as needed without warning. Only the entry points and their + * results are guaranteed to be stable. + * * Each segment is 32-byte aligned and will be moved to the top of the high * vector page. New segments (if ever needed) must be added in front of * existing ones. This mechanism should be used only for things that are * really small and justified, and not be abused freely. * - * See Documentation/arm/kernel_user_helpers.txt for formal definitions. + * User space is expected to implement those things inline when optimizing + * for a processor that has the necessary native support, but only if such + * resulting binaries are already to be incompatible with earlier ARM + * processors due to the use of unsupported instructions other than what + * is provided here. In other words don't make binaries unable to run on + * earlier processors just for the sake of not using these kernel helpers + * if your compiled code is not going to use the new instructions for other + * purpose. */ THUMB( .arm ) @@ -790,104 +799,97 @@ ENDPROC(__switch_to) __kuser_helper_start: /* - * Due to the length of some sequences, __kuser_cmpxchg64 spans 2 regular - * kuser "slots", therefore 0xffff0f80 is not used as a valid entry point. + * Reference prototype: + * + * void __kernel_memory_barrier(void) + * + * Input: + * + * lr = return address + * + * Output: + * + * none + * + * Clobbered: + * + * none + * + * Definition and user space usage example: + * + * typedef void (__kernel_dmb_t)(void); + * #define __kernel_dmb (*(__kernel_dmb_t *)0xffff0fa0) + * + * Apply any needed memory barrier to preserve consistency with data modified + * manually and __kuser_cmpxchg usage. + * + * This could be used as follows: + * + * #define __kernel_dmb() \ + * asm volatile ( "mov r0, #0xffff0fff; mov lr, pc; sub pc, r0, #95" \ + * : : : "r0", "lr","cc" ) */ -__kuser_cmpxchg64: @ 0xffff0f60 - -#if defined(CONFIG_NEEDS_SYSCALL_FOR_CMPXCHG) - - /* - * Poor you. No fast solution possible... - * The kernel itself must perform the operation. - * A special ghost syscall is used for that (see traps.c). - */ - stmfd sp!, {r7, lr} - ldr r7, 1f @ it's 20 bits - swi __ARM_NR_cmpxchg64 - ldmfd sp!, {r7, pc} -1: .word __ARM_NR_cmpxchg64 - -#elif defined(CONFIG_CPU_32v6K) - - stmfd sp!, {r4, r5, r6, r7} - ldrd r4, r5, [r0] @ load old val - ldrd r6, r7, [r1] @ load new val - smp_dmb arm -1: ldrexd r0, r1, [r2] @ load current val - eors r3, r0, r4 @ compare with oldval (1) - eoreqs r3, r1, r5 @ compare with oldval (2) - strexdeq r3, r6, r7, [r2] @ store newval if eq - teqeq r3, #1 @ success? - beq 1b @ if no then retry - smp_dmb arm - rsbs r0, r3, #0 @ set returned val and C flag - ldmfd sp!, {r4, r5, r6, r7} - bx lr - -#elif !defined(CONFIG_SMP) - -#ifdef CONFIG_MMU - - /* - * The only thing that can break atomicity in this cmpxchg64 - * implementation is either an IRQ or a data abort exception - * causing another process/thread to be scheduled in the middle of - * the critical sequence. The same strategy as for cmpxchg is used. - */ - stmfd sp!, {r4, r5, r6, lr} - ldmia r0, {r4, r5} @ load old val - ldmia r1, {r6, lr} @ load new val -1: ldmia r2, {r0, r1} @ load current val - eors r3, r0, r4 @ compare with oldval (1) - eoreqs r3, r1, r5 @ compare with oldval (2) -2: stmeqia r2, {r6, lr} @ store newval if eq - rsbs r0, r3, #0 @ set return val and C flag - ldmfd sp!, {r4, r5, r6, pc} - - .text -kuser_cmpxchg64_fixup: - @ Called from kuser_cmpxchg_fixup. - @ r4 = address of interrupted insn (must be preserved). - @ sp = saved regs. r7 and r8 are clobbered. - @ 1b = first critical insn, 2b = last critical insn. - @ If r4 >= 1b and r4 <= 2b then saved pc_usr is set to 1b. - mov r7, #0xffff0fff - sub r7, r7, #(0xffff0fff - (0xffff0f60 + (1b - __kuser_cmpxchg64))) - subs r8, r4, r7 - rsbcss r8, r8, #(2b - 1b) - strcs r7, [sp, #S_PC] -#if __LINUX_ARM_ARCH__ < 6 - bcc kuser_cmpxchg32_fixup -#endif - mov pc, lr - .previous - -#else -#warning "NPTL on non MMU needs fixing" - mov r0, #-1 - adds r0, r0, #0 - usr_ret lr -#endif - -#else -#error "incoherent kernel configuration" -#endif - - /* pad to next slot */ - .rept (16 - (. - __kuser_cmpxchg64)/4) - .word 0 - .endr - - .align 5 - __kuser_memory_barrier: @ 0xffff0fa0 smp_dmb arm usr_ret lr .align 5 +/* + * Reference prototype: + * + * int __kernel_cmpxchg(int oldval, int newval, int *ptr) + * + * Input: + * + * r0 = oldval + * r1 = newval + * r2 = ptr + * lr = return address + * + * Output: + * + * r0 = returned value (zero or non-zero) + * C flag = set if r0 == 0, clear if r0 != 0 + * + * Clobbered: + * + * r3, ip, flags + * + * Definition and user space usage example: + * + * typedef int (__kernel_cmpxchg_t)(int oldval, int newval, int *ptr); + * #define __kernel_cmpxchg (*(__kernel_cmpxchg_t *)0xffff0fc0) + * + * Atomically store newval in *ptr if *ptr is equal to oldval for user space. + * Return zero if *ptr was changed or non-zero if no exchange happened. + * The C flag is also set if *ptr was changed to allow for assembly + * optimization in the calling code. + * + * Notes: + * + * - This routine already includes memory barriers as needed. + * + * For example, a user space atomic_add implementation could look like this: + * + * #define atomic_add(ptr, val) \ + * ({ register unsigned int *__ptr asm("r2") = (ptr); \ + * register unsigned int __result asm("r1"); \ + * asm volatile ( \ + * "1: @ atomic_add\n\t" \ + * "ldr r0, [r2]\n\t" \ + * "mov r3, #0xffff0fff\n\t" \ + * "add lr, pc, #4\n\t" \ + * "add r1, r0, %2\n\t" \ + * "add pc, r3, #(0xffff0fc0 - 0xffff0fff)\n\t" \ + * "bcc 1b" \ + * : "=&r" (__result) \ + * : "r" (__ptr), "rIL" (val) \ + * : "r0","r3","ip","lr","cc","memory" ); \ + * __result; }) + */ + __kuser_cmpxchg: @ 0xffff0fc0 #if defined(CONFIG_NEEDS_SYSCALL_FOR_CMPXCHG) @@ -923,15 +925,15 @@ __kuser_cmpxchg: @ 0xffff0fc0 usr_ret lr .text -kuser_cmpxchg32_fixup: +kuser_cmpxchg_fixup: @ Called from kuser_cmpxchg_check macro. - @ r4 = address of interrupted insn (must be preserved). + @ r2 = address of interrupted insn (must be preserved). @ sp = saved regs. r7 and r8 are clobbered. @ 1b = first critical insn, 2b = last critical insn. - @ If r4 >= 1b and r4 <= 2b then saved pc_usr is set to 1b. + @ If r2 >= 1b and r2 <= 2b then saved pc_usr is set to 1b. mov r7, #0xffff0fff sub r7, r7, #(0xffff0fff - (0xffff0fc0 + (1b - __kuser_cmpxchg))) - subs r8, r4, r7 + subs r8, r2, r7 rsbcss r8, r8, #(2b - 1b) strcs r7, [sp, #S_PC] mov pc, lr @@ -961,6 +963,39 @@ kuser_cmpxchg32_fixup: .align 5 +/* + * Reference prototype: + * + * int __kernel_get_tls(void) + * + * Input: + * + * lr = return address + * + * Output: + * + * r0 = TLS value + * + * Clobbered: + * + * none + * + * Definition and user space usage example: + * + * typedef int (__kernel_get_tls_t)(void); + * #define __kernel_get_tls (*(__kernel_get_tls_t *)0xffff0fe0) + * + * Get the TLS value as previously set via the __ARM_NR_set_tls syscall. + * + * This could be used as follows: + * + * #define __kernel_get_tls() \ + * ({ register unsigned int __val asm("r0"); \ + * asm( "mov r0, #0xffff0fff; mov lr, pc; sub pc, r0, #31" \ + * : "=r" (__val) : : "lr","cc" ); \ + * __val; }) + */ + __kuser_get_tls: @ 0xffff0fe0 ldr r0, [pc, #(16 - 8)] @ read TLS, set in kuser_get_tls_init usr_ret lr @@ -969,6 +1004,19 @@ __kuser_get_tls: @ 0xffff0fe0 .word 0 @ 0xffff0ff0 software TLS value, then .endr @ pad up to __kuser_helper_version +/* + * Reference declaration: + * + * extern unsigned int __kernel_helper_version; + * + * Definition and user space usage example: + * + * #define __kernel_helper_version (*(unsigned int *)0xffff0ffc) + * + * User space may read this to determine the curent number of helpers + * available. + */ + __kuser_helper_version: @ 0xffff0ffc .word ((__kuser_helper_end - __kuser_helper_start) >> 5) diff --git a/arch/arm/kernel/entry-header.S b/arch/arm/kernel/entry-header.S index 9d95a46..051166c 100644 --- a/arch/arm/kernel/entry-header.S +++ b/arch/arm/kernel/entry-header.S @@ -76,21 +76,26 @@ #ifndef CONFIG_THUMB2_KERNEL .macro svc_exit, rpsr msr spsr_cxsf, \rpsr -#if defined(CONFIG_CPU_V6) || defined(CONFIG_CPU_32v6K) - @ We must avoid clrex due to Cortex-A15 erratum #830321 - sub r0, sp, #4 @ uninhabited address - strex r1, r2, [r0] @ clear the exclusive monitor -#endif +#if defined(CONFIG_CPU_V6) + ldr r0, [sp] + strex r1, r2, [sp] @ clear the exclusive monitor + ldmib sp, {r1 - pc}^ @ load r1 - pc, cpsr +#elif defined(CONFIG_CPU_32v6K) + clrex @ clear the exclusive monitor ldmia sp, {r0 - pc}^ @ load r0 - pc, cpsr +#else + ldmia sp, {r0 - pc}^ @ load r0 - pc, cpsr +#endif .endm .macro restore_user_regs, fast = 0, offset = 0 ldr r1, [sp, #\offset + S_PSR] @ get calling cpsr ldr lr, [sp, #\offset + S_PC]! @ get pc msr spsr_cxsf, r1 @ save in spsr_svc -#if defined(CONFIG_CPU_V6) || defined(CONFIG_CPU_32v6K) - @ We must avoid clrex due to Cortex-A15 erratum #830321 +#if defined(CONFIG_CPU_V6) strex r1, r2, [sp] @ clear the exclusive monitor +#elif defined(CONFIG_CPU_32v6K) + clrex @ clear the exclusive monitor #endif .if \fast ldmdb sp, {r1 - lr}^ @ get calling r1 - lr @@ -116,30 +121,26 @@ .endm #else /* CONFIG_THUMB2_KERNEL */ .macro svc_exit, rpsr - ldr lr, [sp, #S_SP] @ top of the stack - ldrd r0, r1, [sp, #S_LR] @ calling lr and pc - - @ We must avoid clrex due to Cortex-A15 erratum #830321 - strex r2, r1, [sp, #S_LR] @ clear the exclusive monitor - - stmdb lr!, {r0, r1, \rpsr} @ calling lr and rfe context + clrex @ clear the exclusive monitor + ldr r0, [sp, #S_SP] @ top of the stack + ldr r1, [sp, #S_PC] @ return address + tst r0, #4 @ orig stack 8-byte aligned? + stmdb r0, {r1, \rpsr} @ rfe context ldmia sp, {r0 - r12} - mov sp, lr - ldr lr, [sp], #4 + ldr lr, [sp, #S_LR] + addeq sp, sp, #S_FRAME_SIZE - 8 @ aligned + addne sp, sp, #S_FRAME_SIZE - 4 @ not aligned rfeia sp! .endm .macro restore_user_regs, fast = 0, offset = 0 + clrex @ clear the exclusive monitor mov r2, sp load_user_sp_lr r2, r3, \offset + S_SP @ calling sp, lr ldr r1, [sp, #\offset + S_PSR] @ get calling cpsr ldr lr, [sp, #\offset + S_PC] @ get pc add sp, sp, #\offset + S_SP msr spsr_cxsf, r1 @ save in spsr_svc - - @ We must avoid clrex due to Cortex-A15 erratum #830321 - strex r1, r2, [sp] @ clear the exclusive monitor - .if \fast ldmdb sp, {r1 - r12} @ get calling r1 - r12 .else @@ -164,6 +165,25 @@ .endm #endif /* !CONFIG_THUMB2_KERNEL */ + @ + @ Debug exceptions are taken as prefetch or data aborts. + @ We must disable preemption during the handler so that + @ we can access the debug registers safely. + @ + .macro debug_entry, fsr +#if defined(CONFIG_HAVE_HW_BREAKPOINT) && defined(CONFIG_PREEMPT) + ldr r4, =0x40f @ mask out fsr.fs + and r5, r4, \fsr + cmp r5, #2 @ debug exception + bne 1f + get_thread_info r10 + ldr r6, [r10, #TI_PREEMPT] @ get preempt count + add r11, r6, #1 @ increment it + str r11, [r10, #TI_PREEMPT] +1: +#endif + .endm + /* * These are the registers used in the syscall handler, and allow us to * have in theory up to 7 arguments to a function - r0 to r6. diff --git a/arch/arm/kernel/etm.c b/arch/arm/kernel/etm.c index 36d20bd..496b8b8 100644 --- a/arch/arm/kernel/etm.c +++ b/arch/arm/kernel/etm.c @@ -15,6 +15,7 @@ #include <linux/init.h> #include <linux/types.h> #include <linux/io.h> +#include <linux/slab.h> #include <linux/sysrq.h> #include <linux/device.h> #include <linux/clk.h> @@ -24,7 +25,6 @@ #include <linux/miscdevice.h> #include <linux/vmalloc.h> #include <linux/mutex.h> -#include <linux/module.h> #include <asm/hardware/coresight.h> #include <asm/sections.h> @@ -37,26 +37,36 @@ MODULE_AUTHOR("Alexander Shishkin"); struct tracectx { unsigned int etb_bufsz; void __iomem *etb_regs; - void __iomem *etm_regs; + void __iomem **etm_regs; + int etm_regs_count; unsigned long flags; int ncmppairs; int etm_portsz; + u32 etb_fc; + unsigned long range_start; + unsigned long range_end; + unsigned long data_range_start; + unsigned long data_range_end; + bool dump_initial_etb; struct device *dev; struct clk *emu_clk; struct mutex mutex; }; -static struct tracectx tracer; +static struct tracectx tracer = { + .range_start = (unsigned long)_stext, + .range_end = (unsigned long)_etext, +}; static inline bool trace_isrunning(struct tracectx *t) { return !!(t->flags & TRACER_RUNNING); } -static int etm_setup_address_range(struct tracectx *t, int n, +static int etm_setup_address_range(struct tracectx *t, int id, int n, unsigned long start, unsigned long end, int exclude, int data) { - u32 flags = ETMAAT_ARM | ETMAAT_IGNCONTEXTID | ETMAAT_NSONLY | \ + u32 flags = ETMAAT_ARM | ETMAAT_IGNCONTEXTID | ETMAAT_IGNSECURITY | ETMAAT_NOVALCMP; if (n < 1 || n > t->ncmppairs) @@ -72,95 +82,155 @@ static int etm_setup_address_range(struct tracectx *t, int n, flags |= ETMAAT_IEXEC; /* first comparator for the range */ - etm_writel(t, flags, ETMR_COMP_ACC_TYPE(n * 2)); - etm_writel(t, start, ETMR_COMP_VAL(n * 2)); + etm_writel(t, id, flags, ETMR_COMP_ACC_TYPE(n * 2)); + etm_writel(t, id, start, ETMR_COMP_VAL(n * 2)); /* second comparator is right next to it */ - etm_writel(t, flags, ETMR_COMP_ACC_TYPE(n * 2 + 1)); - etm_writel(t, end, ETMR_COMP_VAL(n * 2 + 1)); - - flags = exclude ? ETMTE_INCLEXCL : 0; - etm_writel(t, flags | (1 << n), ETMR_TRACEENCTRL); + etm_writel(t, id, flags, ETMR_COMP_ACC_TYPE(n * 2 + 1)); + etm_writel(t, id, end, ETMR_COMP_VAL(n * 2 + 1)); + + if (data) { + flags = exclude ? ETMVDC3_EXCLONLY : 0; + if (exclude) + n += 8; + etm_writel(t, id, flags | BIT(n), ETMR_VIEWDATACTRL3); + } else { + flags = exclude ? ETMTE_INCLEXCL : 0; + etm_writel(t, id, flags | (1 << n), ETMR_TRACEENCTRL); + } return 0; } -static int trace_start(struct tracectx *t) +static int trace_start_etm(struct tracectx *t, int id) { u32 v; unsigned long timeout = TRACER_TIMEOUT; - etb_unlock(t); - - etb_writel(t, 0, ETBR_FORMATTERCTRL); - etb_writel(t, 1, ETBR_CTRL); - - etb_lock(t); - - /* configure etm */ v = ETMCTRL_OPTS | ETMCTRL_PROGRAM | ETMCTRL_PORTSIZE(t->etm_portsz); if (t->flags & TRACER_CYCLE_ACC) v |= ETMCTRL_CYCLEACCURATE; - etm_unlock(t); + if (t->flags & TRACER_TRACE_DATA) + v |= ETMCTRL_DATA_DO_ADDR; + + etm_unlock(t, id); - etm_writel(t, v, ETMR_CTRL); + etm_writel(t, id, v, ETMR_CTRL); - while (!(etm_readl(t, ETMR_CTRL) & ETMCTRL_PROGRAM) && --timeout) + while (!(etm_readl(t, id, ETMR_CTRL) & ETMCTRL_PROGRAM) && --timeout) ; if (!timeout) { dev_dbg(t->dev, "Waiting for progbit to assert timed out\n"); - etm_lock(t); + etm_lock(t, id); return -EFAULT; } - etm_setup_address_range(t, 1, (unsigned long)_stext, - (unsigned long)_etext, 0, 0); - etm_writel(t, 0, ETMR_TRACEENCTRL2); - etm_writel(t, 0, ETMR_TRACESSCTRL); - etm_writel(t, 0x6f, ETMR_TRACEENEVT); + if (t->range_start || t->range_end) + etm_setup_address_range(t, id, 1, + t->range_start, t->range_end, 0, 0); + else + etm_writel(t, id, ETMTE_INCLEXCL, ETMR_TRACEENCTRL); + + etm_writel(t, id, 0, ETMR_TRACEENCTRL2); + etm_writel(t, id, 0, ETMR_TRACESSCTRL); + etm_writel(t, id, 0x6f, ETMR_TRACEENEVT); + + etm_writel(t, id, 0, ETMR_VIEWDATACTRL1); + etm_writel(t, id, 0, ETMR_VIEWDATACTRL2); + + if (t->data_range_start || t->data_range_end) + etm_setup_address_range(t, id, 2, t->data_range_start, + t->data_range_end, 0, 1); + else + etm_writel(t, id, ETMVDC3_EXCLONLY, ETMR_VIEWDATACTRL3); + + etm_writel(t, id, 0x6f, ETMR_VIEWDATAEVT); v &= ~ETMCTRL_PROGRAM; v |= ETMCTRL_PORTSEL; - etm_writel(t, v, ETMR_CTRL); + etm_writel(t, id, v, ETMR_CTRL); timeout = TRACER_TIMEOUT; - while (etm_readl(t, ETMR_CTRL) & ETMCTRL_PROGRAM && --timeout) + while (etm_readl(t, id, ETMR_CTRL) & ETMCTRL_PROGRAM && --timeout) ; if (!timeout) { dev_dbg(t->dev, "Waiting for progbit to deassert timed out\n"); - etm_lock(t); + etm_lock(t, id); return -EFAULT; } - etm_lock(t); + etm_lock(t, id); + return 0; +} + +static int trace_start(struct tracectx *t) +{ + int ret; + int id; + u32 etb_fc = t->etb_fc; + + etb_unlock(t); + + t->dump_initial_etb = false; + etb_writel(t, 0, ETBR_WRITEADDR); + etb_writel(t, etb_fc, ETBR_FORMATTERCTRL); + etb_writel(t, 1, ETBR_CTRL); + + etb_lock(t); + + /* configure etm(s) */ + for (id = 0; id < t->etm_regs_count; id++) { + ret = trace_start_etm(t, id); + if (ret) + return ret; + } t->flags |= TRACER_RUNNING; return 0; } -static int trace_stop(struct tracectx *t) +static int trace_stop_etm(struct tracectx *t, int id) { unsigned long timeout = TRACER_TIMEOUT; - etm_unlock(t); + etm_unlock(t, id); - etm_writel(t, 0x440, ETMR_CTRL); - while (!(etm_readl(t, ETMR_CTRL) & ETMCTRL_PROGRAM) && --timeout) + etm_writel(t, id, 0x441, ETMR_CTRL); + while (!(etm_readl(t, id, ETMR_CTRL) & ETMCTRL_PROGRAM) && --timeout) ; if (!timeout) { dev_dbg(t->dev, "Waiting for progbit to assert timed out\n"); - etm_lock(t); + etm_lock(t, id); return -EFAULT; } - etm_lock(t); + etm_lock(t, id); + return 0; +} + +static int trace_stop(struct tracectx *t) +{ + int id; + int ret; + unsigned long timeout = TRACER_TIMEOUT; + u32 etb_fc = t->etb_fc; + + for (id = 0; id < t->etm_regs_count; id++) { + ret = trace_stop_etm(t, id); + if (ret) + return ret; + } etb_unlock(t); - etb_writel(t, ETBFF_MANUAL_FLUSH, ETBR_FORMATTERCTRL); + if (etb_fc) { + etb_fc |= ETBFF_STOPFL; + etb_writel(t, t->etb_fc, ETBR_FORMATTERCTRL); + } + etb_writel(t, etb_fc | ETBFF_MANUAL_FLUSH, ETBR_FORMATTERCTRL); timeout = TRACER_TIMEOUT; while (etb_readl(t, ETBR_FORMATTERCTRL) & @@ -185,24 +255,15 @@ static int trace_stop(struct tracectx *t) static int etb_getdatalen(struct tracectx *t) { u32 v; - int rp, wp; + int wp; v = etb_readl(t, ETBR_STATUS); if (v & 1) return t->etb_bufsz; - rp = etb_readl(t, ETBR_READADDR); wp = etb_readl(t, ETBR_WRITEADDR); - - if (rp > wp) { - etb_writel(t, 0, ETBR_READADDR); - etb_writel(t, 0, ETBR_WRITEADDR); - - return 0; - } - - return wp - rp; + return wp; } /* sysrq+v will always stop the running trace and leave it at that */ @@ -235,21 +296,18 @@ static void etm_dump(void) printk("%08x", cpu_to_be32(etb_readl(t, ETBR_READMEM))); printk(KERN_INFO "\n--- ETB buffer end ---\n"); - /* deassert the overflow bit */ - etb_writel(t, 1, ETBR_CTRL); - etb_writel(t, 0, ETBR_CTRL); - - etb_writel(t, 0, ETBR_TRIGGERCOUNT); - etb_writel(t, 0, ETBR_READADDR); - etb_writel(t, 0, ETBR_WRITEADDR); - etb_lock(t); } static void sysrq_etm_dump(int key) { + if (!mutex_trylock(&tracer.mutex)) { + printk(KERN_INFO "Tracing hardware busy\n"); + return; + } dev_dbg(tracer.dev, "Dumping ETB buffer\n"); etm_dump(); + mutex_unlock(&tracer.mutex); } static struct sysrq_key_op sysrq_etm_op = { @@ -276,6 +334,10 @@ static ssize_t etb_read(struct file *file, char __user *data, struct tracectx *t = file->private_data; u32 first = 0; u32 *buf; + int wpos; + int skip; + long wlength; + loff_t pos = *ppos; mutex_lock(&t->mutex); @@ -287,31 +349,39 @@ static ssize_t etb_read(struct file *file, char __user *data, etb_unlock(t); total = etb_getdatalen(t); + if (total == 0 && t->dump_initial_etb) + total = t->etb_bufsz; if (total == t->etb_bufsz) first = etb_readl(t, ETBR_WRITEADDR); + if (pos > total * 4) { + skip = 0; + wpos = total; + } else { + skip = (int)pos % 4; + wpos = (int)pos / 4; + } + total -= wpos; + first = (first + wpos) % t->etb_bufsz; + etb_writel(t, first, ETBR_READADDR); - length = min(total * 4, (int)len); - buf = vmalloc(length); + wlength = min(total, DIV_ROUND_UP(skip + (int)len, 4)); + length = min(total * 4 - skip, (int)len); + buf = vmalloc(wlength * 4); - dev_dbg(t->dev, "ETB buffer length: %d\n", total); + dev_dbg(t->dev, "ETB read %ld bytes to %lld from %ld words at %d\n", + length, pos, wlength, first); + dev_dbg(t->dev, "ETB buffer length: %d\n", total + wpos); dev_dbg(t->dev, "ETB status reg: %x\n", etb_readl(t, ETBR_STATUS)); - for (i = 0; i < length / 4; i++) + for (i = 0; i < wlength; i++) buf[i] = etb_readl(t, ETBR_READMEM); - /* the only way to deassert overflow bit in ETB status is this */ - etb_writel(t, 1, ETBR_CTRL); - etb_writel(t, 0, ETBR_CTRL); - - etb_writel(t, 0, ETBR_WRITEADDR); - etb_writel(t, 0, ETBR_READADDR); - etb_writel(t, 0, ETBR_TRIGGERCOUNT); - etb_lock(t); - length -= copy_to_user(data, buf, length); + length -= copy_to_user(data, (u8 *)buf + skip, length); vfree(buf); + *ppos = pos + length; out: mutex_unlock(&t->mutex); @@ -348,28 +418,17 @@ static int __devinit etb_probe(struct amba_device *dev, const struct amba_id *id if (ret) goto out; + mutex_lock(&t->mutex); t->etb_regs = ioremap_nocache(dev->res.start, resource_size(&dev->res)); if (!t->etb_regs) { ret = -ENOMEM; goto out_release; } + t->dev = &dev->dev; + t->dump_initial_etb = true; amba_set_drvdata(dev, t); - etb_miscdev.parent = &dev->dev; - - ret = misc_register(&etb_miscdev); - if (ret) - goto out_unmap; - - t->emu_clk = clk_get(&dev->dev, "emu_src_ck"); - if (IS_ERR(t->emu_clk)) { - dev_dbg(&dev->dev, "Failed to obtain emu_src_ck.\n"); - return -EFAULT; - } - - clk_enable(t->emu_clk); - etb_unlock(t); t->etb_bufsz = etb_readl(t, ETBR_DEPTH); dev_dbg(&dev->dev, "Size: %x\n", t->etb_bufsz); @@ -378,6 +437,20 @@ static int __devinit etb_probe(struct amba_device *dev, const struct amba_id *id etb_writel(t, 0, ETBR_CTRL); etb_writel(t, 0x1000, ETBR_FORMATTERCTRL); etb_lock(t); + mutex_unlock(&t->mutex); + + etb_miscdev.parent = &dev->dev; + + ret = misc_register(&etb_miscdev); + if (ret) + goto out_unmap; + + /* Get optional clock. Currently used to select clock source on omap3 */ + t->emu_clk = clk_get(&dev->dev, "emu_src_ck"); + if (IS_ERR(t->emu_clk)) + dev_dbg(&dev->dev, "Failed to obtain emu_src_ck.\n"); + else + clk_enable(t->emu_clk); dev_dbg(&dev->dev, "ETB AMBA driver initialized.\n"); @@ -385,10 +458,13 @@ out: return ret; out_unmap: + mutex_lock(&t->mutex); amba_set_drvdata(dev, NULL); iounmap(t->etb_regs); + t->etb_regs = NULL; out_release: + mutex_unlock(&t->mutex); amba_release_regions(dev); return ret; @@ -403,8 +479,10 @@ static int etb_remove(struct amba_device *dev) iounmap(t->etb_regs); t->etb_regs = NULL; - clk_disable(t->emu_clk); - clk_put(t->emu_clk); + if (!IS_ERR(t->emu_clk)) { + clk_disable(t->emu_clk); + clk_put(t->emu_clk); + } amba_release_regions(dev); @@ -448,7 +526,10 @@ static ssize_t trace_running_store(struct kobject *kobj, return -EINVAL; mutex_lock(&tracer.mutex); - ret = value ? trace_start(&tracer) : trace_stop(&tracer); + if (!tracer.etb_regs) + ret = -ENODEV; + else + ret = value ? trace_start(&tracer) : trace_stop(&tracer); mutex_unlock(&tracer.mutex); return ret ? : n; @@ -463,36 +544,50 @@ static ssize_t trace_info_show(struct kobject *kobj, { u32 etb_wa, etb_ra, etb_st, etb_fc, etm_ctrl, etm_st; int datalen; + int id; + int ret; - etb_unlock(&tracer); - datalen = etb_getdatalen(&tracer); - etb_wa = etb_readl(&tracer, ETBR_WRITEADDR); - etb_ra = etb_readl(&tracer, ETBR_READADDR); - etb_st = etb_readl(&tracer, ETBR_STATUS); - etb_fc = etb_readl(&tracer, ETBR_FORMATTERCTRL); - etb_lock(&tracer); - - etm_unlock(&tracer); - etm_ctrl = etm_readl(&tracer, ETMR_CTRL); - etm_st = etm_readl(&tracer, ETMR_STATUS); - etm_lock(&tracer); + mutex_lock(&tracer.mutex); + if (tracer.etb_regs) { + etb_unlock(&tracer); + datalen = etb_getdatalen(&tracer); + etb_wa = etb_readl(&tracer, ETBR_WRITEADDR); + etb_ra = etb_readl(&tracer, ETBR_READADDR); + etb_st = etb_readl(&tracer, ETBR_STATUS); + etb_fc = etb_readl(&tracer, ETBR_FORMATTERCTRL); + etb_lock(&tracer); + } else { + etb_wa = etb_ra = etb_st = etb_fc = ~0; + datalen = -1; + } - return sprintf(buf, "Trace buffer len: %d\nComparator pairs: %d\n" + ret = sprintf(buf, "Trace buffer len: %d\nComparator pairs: %d\n" "ETBR_WRITEADDR:\t%08x\n" "ETBR_READADDR:\t%08x\n" "ETBR_STATUS:\t%08x\n" - "ETBR_FORMATTERCTRL:\t%08x\n" - "ETMR_CTRL:\t%08x\n" - "ETMR_STATUS:\t%08x\n", + "ETBR_FORMATTERCTRL:\t%08x\n", datalen, tracer.ncmppairs, etb_wa, etb_ra, etb_st, - etb_fc, + etb_fc + ); + + for (id = 0; id < tracer.etm_regs_count; id++) { + etm_unlock(&tracer, id); + etm_ctrl = etm_readl(&tracer, id, ETMR_CTRL); + etm_st = etm_readl(&tracer, id, ETMR_STATUS); + etm_lock(&tracer, id); + ret += sprintf(buf + ret, "ETMR_CTRL:\t%08x\n" + "ETMR_STATUS:\t%08x\n", etm_ctrl, etm_st ); + } + mutex_unlock(&tracer.mutex); + + return ret; } static struct kobj_attribute trace_info_attr = @@ -531,42 +626,121 @@ static ssize_t trace_mode_store(struct kobject *kobj, static struct kobj_attribute trace_mode_attr = __ATTR(trace_mode, 0644, trace_mode_show, trace_mode_store); +static ssize_t trace_range_show(struct kobject *kobj, + struct kobj_attribute *attr, + char *buf) +{ + return sprintf(buf, "%08lx %08lx\n", + tracer.range_start, tracer.range_end); +} + +static ssize_t trace_range_store(struct kobject *kobj, + struct kobj_attribute *attr, + const char *buf, size_t n) +{ + unsigned long range_start, range_end; + + if (sscanf(buf, "%lx %lx", &range_start, &range_end) != 2) + return -EINVAL; + + mutex_lock(&tracer.mutex); + tracer.range_start = range_start; + tracer.range_end = range_end; + mutex_unlock(&tracer.mutex); + + return n; +} + + +static struct kobj_attribute trace_range_attr = + __ATTR(trace_range, 0644, trace_range_show, trace_range_store); + +static ssize_t trace_data_range_show(struct kobject *kobj, + struct kobj_attribute *attr, + char *buf) +{ + unsigned long range_start; + u64 range_end; + mutex_lock(&tracer.mutex); + range_start = tracer.data_range_start; + range_end = tracer.data_range_end; + if (!range_end && (tracer.flags & TRACER_TRACE_DATA)) + range_end = 0x100000000ULL; + mutex_unlock(&tracer.mutex); + return sprintf(buf, "%08lx %08llx\n", range_start, range_end); +} + +static ssize_t trace_data_range_store(struct kobject *kobj, + struct kobj_attribute *attr, + const char *buf, size_t n) +{ + unsigned long range_start; + u64 range_end; + + if (sscanf(buf, "%lx %llx", &range_start, &range_end) != 2) + return -EINVAL; + + mutex_lock(&tracer.mutex); + tracer.data_range_start = range_start; + tracer.data_range_end = (unsigned long)range_end; + if (range_end) + tracer.flags |= TRACER_TRACE_DATA; + else + tracer.flags &= ~TRACER_TRACE_DATA; + mutex_unlock(&tracer.mutex); + + return n; +} + + +static struct kobj_attribute trace_data_range_attr = + __ATTR(trace_data_range, 0644, + trace_data_range_show, trace_data_range_store); + static int __devinit etm_probe(struct amba_device *dev, const struct amba_id *id) { struct tracectx *t = &tracer; int ret = 0; + void __iomem **new_regs; + int new_count; - if (t->etm_regs) { - dev_dbg(&dev->dev, "ETM already initialized\n"); - ret = -EBUSY; + mutex_lock(&t->mutex); + new_count = t->etm_regs_count + 1; + new_regs = krealloc(t->etm_regs, + sizeof(t->etm_regs[0]) * new_count, GFP_KERNEL); + + if (!new_regs) { + dev_dbg(&dev->dev, "Failed to allocate ETM register array\n"); + ret = -ENOMEM; goto out; } + t->etm_regs = new_regs; ret = amba_request_regions(dev, NULL); if (ret) goto out; - t->etm_regs = ioremap_nocache(dev->res.start, resource_size(&dev->res)); - if (!t->etm_regs) { + t->etm_regs[t->etm_regs_count] = + ioremap_nocache(dev->res.start, resource_size(&dev->res)); + if (!t->etm_regs[t->etm_regs_count]) { ret = -ENOMEM; goto out_release; } - amba_set_drvdata(dev, t); + amba_set_drvdata(dev, t->etm_regs[t->etm_regs_count]); - mutex_init(&t->mutex); - t->dev = &dev->dev; - t->flags = TRACER_CYCLE_ACC; + t->flags = TRACER_CYCLE_ACC | TRACER_TRACE_DATA; t->etm_portsz = 1; - etm_unlock(t); - (void)etm_readl(t, ETMMR_PDSR); + etm_unlock(t, t->etm_regs_count); + (void)etm_readl(t, t->etm_regs_count, ETMMR_PDSR); /* dummy first read */ - (void)etm_readl(&tracer, ETMMR_OSSRR); + (void)etm_readl(&tracer, t->etm_regs_count, ETMMR_OSSRR); - t->ncmppairs = etm_readl(t, ETMR_CONFCODE) & 0xf; - etm_writel(t, 0x440, ETMR_CTRL); - etm_lock(t); + t->ncmppairs = etm_readl(t, t->etm_regs_count, ETMR_CONFCODE) & 0xf; + etm_writel(t, t->etm_regs_count, 0x441, ETMR_CTRL); + etm_writel(t, t->etm_regs_count, new_count, ETMR_TRACEIDR); + etm_lock(t, t->etm_regs_count); ret = sysfs_create_file(&dev->dev.kobj, &trace_running_attr.attr); @@ -582,36 +756,68 @@ static int __devinit etm_probe(struct amba_device *dev, const struct amba_id *id if (ret) dev_dbg(&dev->dev, "Failed to create trace_mode in sysfs\n"); - dev_dbg(t->dev, "ETM AMBA driver initialized.\n"); + ret = sysfs_create_file(&dev->dev.kobj, &trace_range_attr.attr); + if (ret) + dev_dbg(&dev->dev, "Failed to create trace_range in sysfs\n"); + + ret = sysfs_create_file(&dev->dev.kobj, &trace_data_range_attr.attr); + if (ret) + dev_dbg(&dev->dev, + "Failed to create trace_data_range in sysfs\n"); + + dev_dbg(&dev->dev, "ETM AMBA driver initialized.\n"); + + /* Enable formatter if there are multiple trace sources */ + if (new_count > 1) + t->etb_fc = ETBFF_ENFCONT | ETBFF_ENFTC; + + t->etm_regs_count = new_count; out: + mutex_unlock(&t->mutex); return ret; out_unmap: amba_set_drvdata(dev, NULL); - iounmap(t->etm_regs); + iounmap(t->etm_regs[t->etm_regs_count]); out_release: amba_release_regions(dev); + mutex_unlock(&t->mutex); return ret; } static int etm_remove(struct amba_device *dev) { - struct tracectx *t = amba_get_drvdata(dev); + int i; + struct tracectx *t = &tracer; + void __iomem *etm_regs = amba_get_drvdata(dev); + + sysfs_remove_file(&dev->dev.kobj, &trace_running_attr.attr); + sysfs_remove_file(&dev->dev.kobj, &trace_info_attr.attr); + sysfs_remove_file(&dev->dev.kobj, &trace_mode_attr.attr); + sysfs_remove_file(&dev->dev.kobj, &trace_range_attr.attr); + sysfs_remove_file(&dev->dev.kobj, &trace_data_range_attr.attr); amba_set_drvdata(dev, NULL); - iounmap(t->etm_regs); - t->etm_regs = NULL; + mutex_lock(&t->mutex); + for (i = 0; i < t->etm_regs_count; i++) + if (t->etm_regs[i] == etm_regs) + break; + for (; i < t->etm_regs_count - 1; i++) + t->etm_regs[i] = t->etm_regs[i + 1]; + t->etm_regs_count--; + if (!t->etm_regs_count) { + kfree(t->etm_regs); + t->etm_regs = NULL; + } + mutex_unlock(&t->mutex); + iounmap(etm_regs); amba_release_regions(dev); - sysfs_remove_file(&dev->dev.kobj, &trace_running_attr.attr); - sysfs_remove_file(&dev->dev.kobj, &trace_info_attr.attr); - sysfs_remove_file(&dev->dev.kobj, &trace_mode_attr.attr); - return 0; } @@ -620,6 +826,10 @@ static struct amba_id etm_ids[] = { .id = 0x0003b921, .mask = 0x0007ffff, }, + { + .id = 0x0003b950, + .mask = 0x0007ffff, + }, { 0, 0 }, }; @@ -637,6 +847,8 @@ static int __init etm_init(void) { int retval; + mutex_init(&tracer.mutex); + retval = amba_driver_register(&etb_driver); if (retval) { printk(KERN_ERR "Failed to register etb\n"); diff --git a/arch/arm/kernel/head-nommu.S b/arch/arm/kernel/head-nommu.S index d46f259..6b1e0ad 100644 --- a/arch/arm/kernel/head-nommu.S +++ b/arch/arm/kernel/head-nommu.S @@ -32,16 +32,8 @@ * numbers for r1. * */ - .arm - __HEAD ENTRY(stext) - - THUMB( adr r9, BSYM(1f) ) @ Kernel is always entered in ARM. - THUMB( bx r9 ) @ If this is a Thumb-2 kernel, - THUMB( .thumb ) @ switch to Thumb now. - THUMB(1: ) - setmode PSR_F_BIT | PSR_I_BIT | SVC_MODE, r9 @ ensure svc mode @ and irqs disabled #ifndef CONFIG_CPU_CP15 diff --git a/arch/arm/kernel/head.S b/arch/arm/kernel/head.S index 3606e85..673151c 100644 --- a/arch/arm/kernel/head.S +++ b/arch/arm/kernel/head.S @@ -21,7 +21,6 @@ #include <asm/memory.h> #include <asm/thread_info.h> #include <asm/system.h> -#include <asm/pgtable.h> #ifdef CONFIG_DEBUG_LL #include <mach/debug-macro.S> @@ -39,14 +38,11 @@ #error KERNEL_RAM_VADDR must start at 0xXXXX8000 #endif -#define PG_DIR_SIZE 0x4000 -#define PMD_ORDER 2 - .globl swapper_pg_dir - .equ swapper_pg_dir, KERNEL_RAM_VADDR - PG_DIR_SIZE + .equ swapper_pg_dir, KERNEL_RAM_VADDR - 0x4000 .macro pgtbl, rd, phys - add \rd, \phys, #TEXT_OFFSET - PG_DIR_SIZE + add \rd, \phys, #TEXT_OFFSET - 0x4000 .endm #ifdef CONFIG_XIP_KERNEL @@ -75,16 +71,8 @@ * crap here - that's what the boot loader (or in extreme, well justified * circumstances, zImage) is for. */ - .arm - __HEAD ENTRY(stext) - - THUMB( adr r9, BSYM(1f) ) @ Kernel is always entered in ARM. - THUMB( bx r9 ) @ If this is a Thumb-2 kernel, - THUMB( .thumb ) @ switch to Thumb now. - THUMB(1: ) - setmode PSR_F_BIT | PSR_I_BIT | SVC_MODE, r9 @ ensure svc mode @ and irqs disabled mrc p15, 0, r9, c0, c0 @ get processor id @@ -99,7 +87,7 @@ ENTRY(stext) sub r4, r3, r4 @ (PHYS_OFFSET - PAGE_OFFSET) add r8, r8, r4 @ PHYS_OFFSET #else - ldr r8, =PHYS_OFFSET @ always constant in this case + ldr r8, =PLAT_PHYS_OFFSET #endif /* @@ -152,11 +140,11 @@ __create_page_tables: pgtbl r4, r8 @ page table address /* - * Clear the swapper page table + * Clear the 16K level 1 swapper page table */ mov r0, r4 mov r3, #0 - add r6, r0, #PG_DIR_SIZE + add r6, r0, #0x4000 1: str r3, [r0], #4 str r3, [r0], #4 str r3, [r0], #4 @@ -175,30 +163,30 @@ __create_page_tables: sub r0, r0, r3 @ virt->phys offset add r5, r5, r0 @ phys __enable_mmu add r6, r6, r0 @ phys __enable_mmu_end - mov r5, r5, lsr #SECTION_SHIFT - mov r6, r6, lsr #SECTION_SHIFT + mov r5, r5, lsr #20 + mov r6, r6, lsr #20 -1: orr r3, r7, r5, lsl #SECTION_SHIFT @ flags + kernel base - str r3, [r4, r5, lsl #PMD_ORDER] @ identity mapping - cmp r5, r6 - addlo r5, r5, #1 @ next section - blo 1b +1: orr r3, r7, r5, lsl #20 @ flags + kernel base + str r3, [r4, r5, lsl #2] @ identity mapping + teq r5, r6 + addne r5, r5, #1 @ next section + bne 1b /* * Now setup the pagetables for our kernel direct * mapped region. */ mov r3, pc - mov r3, r3, lsr #SECTION_SHIFT - orr r3, r7, r3, lsl #SECTION_SHIFT - add r0, r4, #(KERNEL_START & 0xff000000) >> (SECTION_SHIFT - PMD_ORDER) - str r3, [r0, #((KERNEL_START & 0x00f00000) >> SECTION_SHIFT) << PMD_ORDER]! + mov r3, r3, lsr #20 + orr r3, r7, r3, lsl #20 + add r0, r4, #(KERNEL_START & 0xff000000) >> 18 + str r3, [r0, #(KERNEL_START & 0x00f00000) >> 18]! ldr r6, =(KERNEL_END - 1) - add r0, r0, #1 << PMD_ORDER - add r6, r4, r6, lsr #(SECTION_SHIFT - PMD_ORDER) + add r0, r0, #4 + add r6, r4, r6, lsr #18 1: cmp r0, r6 - add r3, r3, #1 << SECTION_SHIFT - strls r3, [r0], #1 << PMD_ORDER + add r3, r3, #1 << 20 + strls r3, [r0], #4 bls 1b #ifdef CONFIG_XIP_KERNEL @@ -207,11 +195,11 @@ __create_page_tables: */ add r3, r8, #TEXT_OFFSET orr r3, r3, r7 - add r0, r4, #(KERNEL_RAM_VADDR & 0xff000000) >> (SECTION_SHIFT - PMD_ORDER) - str r3, [r0, #(KERNEL_RAM_VADDR & 0x00f00000) >> (SECTION_SHIFT - PMD_ORDER)]! + add r0, r4, #(KERNEL_RAM_VADDR & 0xff000000) >> 18 + str r3, [r0, #(KERNEL_RAM_VADDR & 0x00f00000) >> 18]! ldr r6, =(_end - 1) add r0, r0, #4 - add r6, r4, r6, lsr #(SECTION_SHIFT - PMD_ORDER) + add r6, r4, r6, lsr #18 1: cmp r0, r6 add r3, r3, #1 << 20 strls r3, [r0], #4 @@ -221,17 +209,14 @@ __create_page_tables: /* * Then map boot params address in r2 or * the first 1MB of ram if boot params address is not specified. - * We map 2 sections in case the ATAGs/DTB crosses a section boundary. */ - mov r0, r2, lsr #SECTION_SHIFT - movs r0, r0, lsl #SECTION_SHIFT + mov r0, r2, lsr #20 + movs r0, r0, lsl #20 moveq r0, r8 sub r3, r0, r8 add r3, r3, #PAGE_OFFSET - add r3, r4, r3, lsr #(SECTION_SHIFT - PMD_ORDER) + add r3, r4, r3, lsr #18 orr r6, r7, r0 - str r6, [r3], #1 << PMD_ORDER - add r6, r6, #1 << SECTION_SHIFT str r6, [r3] #ifdef CONFIG_DEBUG_LL @@ -241,23 +226,23 @@ __create_page_tables: * This allows debug messages to be output * via a serial console before paging_init. */ - addruart r7, r3, r0 + addruart r7, r3 - mov r3, r3, lsr #SECTION_SHIFT - mov r3, r3, lsl #PMD_ORDER + mov r3, r3, lsr #20 + mov r3, r3, lsl #2 add r0, r4, r3 rsb r3, r3, #0x4000 @ PTRS_PER_PGD*sizeof(long) cmp r3, #0x0800 @ limit to 512MB movhi r3, #0x0800 add r6, r0, r3 - mov r3, r7, lsr #SECTION_SHIFT + mov r3, r7, lsr #20 ldr r7, [r10, #PROCINFO_IO_MMUFLAGS] @ io_mmuflags - orr r3, r7, r3, lsl #SECTION_SHIFT + orr r3, r7, r3, lsl #20 1: str r3, [r0], #4 - add r3, r3, #1 << SECTION_SHIFT - cmp r0, r6 - blo 1b + add r3, r3, #1 << 20 + teq r0, r6 + bne 1b #else /* CONFIG_DEBUG_ICEDCC */ /* we don't need any serial debugging mappings for ICEDCC */ @@ -269,7 +254,7 @@ __create_page_tables: * If we're using the NetWinder or CATS, we also need to map * in the 16550-type serial port for the debug messages */ - add r0, r4, #0xff000000 >> (SECTION_SHIFT - PMD_ORDER) + add r0, r4, #0xff000000 >> 18 orr r3, r7, #0x7c000000 str r3, [r0] #endif @@ -279,10 +264,10 @@ __create_page_tables: * Similar reasons here - for debug. This is * only for Acorn RiscPC architectures. */ - add r0, r4, #0x02000000 >> (SECTION_SHIFT - PMD_ORDER) + add r0, r4, #0x02000000 >> 18 orr r3, r7, #0x02000000 str r3, [r0] - add r0, r4, #0xd8000000 >> (SECTION_SHIFT - PMD_ORDER) + add r0, r4, #0xd8000000 >> 18 str r3, [r0] #endif #endif @@ -495,8 +480,13 @@ __fixup_pv_table: add r5, r5, r3 @ adjust table end address add r7, r7, r3 @ adjust __pv_phys_offset address str r8, [r7] @ save computed PHYS_OFFSET to __pv_phys_offset +#ifndef CONFIG_ARM_PATCH_PHYS_VIRT_16BIT mov r6, r3, lsr #24 @ constant for add/sub instructions teq r3, r6, lsl #24 @ must be 16MiB aligned +#else + mov r6, r3, lsr #16 @ constant for add/sub instructions + teq r3, r6, lsl #16 @ must be 64kiB aligned +#endif THUMB( it ne @ cross section branch ) bne __error str r6, [r7, #4] @ save to __pv_offset @@ -512,8 +502,20 @@ ENDPROC(__fixup_pv_table) .text __fixup_a_pv_table: #ifdef CONFIG_THUMB2_KERNEL - lsls r6, #24 - beq 2f +#ifdef CONFIG_ARM_PATCH_PHYS_VIRT_16BIT + lsls r0, r6, #24 + lsr r6, #8 + beq 1f + clz r7, r0 + lsr r0, #24 + lsl r0, r7 + bic r0, 0x0080 + lsrs r7, #1 + orrcs r0, #0x0080 + orr r0, r0, r7, lsl #12 +#endif +1: lsls r6, #24 + beq 4f clz r7, r6 lsr r6, #24 lsl r6, r7 @@ -522,25 +524,43 @@ __fixup_a_pv_table: orrcs r6, #0x0080 orr r6, r6, r7, lsl #12 orr r6, #0x4000 - b 2f -1: add r7, r3 - ldrh ip, [r7, #2] + b 4f +2: @ at this point the C flag is always clear + add r7, r3 +#ifdef CONFIG_ARM_PATCH_PHYS_VIRT_16BIT + ldrh ip, [r7] + tst ip, 0x0400 @ the i bit tells us LS or MS byte + beq 3f + cmp r0, #0 @ set C flag, and ... + biceq ip, 0x0400 @ immediate zero value has a special encoding + streqh ip, [r7] @ that requires the i bit cleared +#endif +3: ldrh ip, [r7, #2] and ip, 0x8f00 - orr ip, r6 @ mask in offset bits 31-24 + orrcc ip, r6 @ mask in offset bits 31-24 + orrcs ip, r0 @ mask in offset bits 23-16 strh ip, [r7, #2] -2: cmp r4, r5 +4: cmp r4, r5 ldrcc r7, [r4], #4 @ use branch for delay slot - bcc 1b + bcc 2b bx lr #else - b 2f -1: ldr ip, [r7, r3] +#ifdef CONFIG_ARM_PATCH_PHYS_VIRT_16BIT + and r0, r6, #255 @ offset bits 23-16 + mov r6, r6, lsr #8 @ offset bits 31-24 +#else + mov r0, #0 @ just in case... +#endif + b 3f +2: ldr ip, [r7, r3] bic ip, ip, #0x000000ff - orr ip, ip, r6 @ mask in offset bits 31-24 + tst ip, #0x400 @ rotate shift tells us LS or MS byte + orrne ip, ip, r6 @ mask in offset bits 31-24 + orreq ip, ip, r0 @ mask in offset bits 23-16 str ip, [r7, r3] -2: cmp r4, r5 +3: cmp r4, r5 ldrcc r7, [r4], #4 @ use branch for delay slot - bcc 1b + bcc 2b mov pc, lr #endif ENDPROC(__fixup_a_pv_table) diff --git a/arch/arm/kernel/hibernate.c b/arch/arm/kernel/hibernate.c new file mode 100644 index 0000000..354cefc --- /dev/null +++ b/arch/arm/kernel/hibernate.c @@ -0,0 +1,470 @@ +/* + * Hibernation support specific for ARM + * + * Copyright (C) 2010 Nokia Corporation + * Copyright (C) 2010 Texas Instruments, Inc. + * Copyright (C) 2006 Rafael J. Wysocki <rjw@sisk.pl> + * + * Contact: Hiroshi DOYU <Hiroshi.DOYU@nokia.com> + * + * License terms: GNU General Public License (GPL) version 2 + */ + +#include <linux/module.h> +#include <linux/mm.h> +#include <asm/sections.h> + +/* + * Image of the saved processor state + * + * coprocessor 15 registers(RW) + */ +struct saved_context_cortex_a8 { + /* CR0 */ + u32 cssr; /* Cache Size Selection */ + /* CR1 */ + u32 cr; /* Control */ + u32 cacr; /* Coprocessor Access Control */ + /* CR2 */ + u32 ttb_0r; /* Translation Table Base 0 */ + u32 ttb_1r; /* Translation Table Base 1 */ + u32 ttbcr; /* Translation Talbe Base Control */ + /* CR3 */ + u32 dacr; /* Domain Access Control */ + /* CR5 */ + u32 d_fsr; /* Data Fault Status */ + u32 i_fsr; /* Instruction Fault Status */ + u32 d_afsr; /* Data Auxilirary Fault Status */ ; + u32 i_afsr; /* Instruction Auxilirary Fault Status */; + /* CR6 */ + u32 d_far; /* Data Fault Address */ + u32 i_far; /* Instruction Fault Address */ + /* CR7 */ + u32 par; /* Physical Address */ + /* CR9 */ /* FIXME: Are they necessary? */ + u32 pmcontrolr; /* Performance Monitor Control */ + u32 cesr; /* Count Enable Set */ + u32 cecr; /* Count Enable Clear */ + u32 ofsr; /* Overflow Flag Status */ + u32 sir; /* Software Increment */ + u32 pcsr; /* Performance Counter Selection */ + u32 ccr; /* Cycle Count */ + u32 esr; /* Event Selection */ + u32 pmcountr; /* Performance Monitor Count */ + u32 uer; /* User Enable */ + u32 iesr; /* Interrupt Enable Set */ + u32 iecr; /* Interrupt Enable Clear */ + u32 l2clr; /* L2 Cache Lockdown */ + /* CR10 */ + u32 d_tlblr; /* Data TLB Lockdown Register */ + u32 i_tlblr; /* Instruction TLB Lockdown Register */ + u32 prrr; /* Primary Region Remap Register */ + u32 nrrr; /* Normal Memory Remap Register */ + /* CR11 */ + u32 pleuar; /* PLE User Accessibility */ + u32 plecnr; /* PLE Channel Number */ + u32 plecr; /* PLE Control */ + u32 pleisar; /* PLE Internal Start Address */ + u32 pleiear; /* PLE Internal End Address */ + u32 plecidr; /* PLE Context ID */ + /* CR12 */ + u32 snsvbar; /* Secure or Nonsecure Vector Base Address */ + /* CR13 */ + u32 fcse; /* FCSE PID */ + u32 cid; /* Context ID */ + u32 urwtpid; /* User read/write Thread and Process ID */ + u32 urotpid; /* User read-only Thread and Process ID */ + u32 potpid; /* Privileged only Thread and Process ID */ +} __packed; + +struct saved_context_cortex_a9 { + /* CR0 */ + u32 cssr; /* Cache Size Selection */ + /* CR1 */ + u32 cr; + u32 actlr; + u32 cacr; + u32 sder; + u32 vcr; + /* CR2 */ + u32 ttb_0r; /* Translation Table Base 0 */ + u32 ttb_1r; /* Translation Table Base 1 */ + u32 ttbcr; /* Translation Talbe Base Control */ + /* CR3 */ + u32 dacr; /* Domain Access Control */ + /* CR5 */ + u32 d_fsr; /* Data Fault Status */ + u32 i_fsr; /* Instruction Fault Status */ + u32 d_afsr; /* Data Auxilirary Fault Status */ ; + u32 i_afsr; /* Instruction Auxilirary Fault Status */; + /* CR6 */ + u32 d_far; /* Data Fault Address */ + u32 i_far; /* Instruction Fault Address */ + /* CR7 */ + u32 par; /* Physical Address */ + /* CR9 */ /* FIXME: Are they necessary? */ + u32 pmcontrolr; /* Performance Monitor Control */ + u32 cesr; /* Count Enable Set */ + u32 cecr; /* Count Enable Clear */ + u32 ofsr; /* Overflow Flag Status */ + u32 pcsr; /* Performance Counter Selection */ + u32 ccr; /* Cycle Count */ + u32 esr; /* Event Selection */ + u32 pmcountr; /* Performance Monitor Count */ + u32 uer; /* User Enable */ + u32 iesr; /* Interrupt Enable Set */ + u32 iecr; /* Interrupt Enable Clear */ + /* CR10 */ + u32 d_tlblr; /* Data TLB Lockdown Register */ + u32 prrr; /* Primary Region Remap Register */ + u32 nrrr; /* Normal Memory Remap Register */ + /* CR11 */ + /* CR12 */ + u32 vbar; + u32 mvbar; + u32 vir; + /* CR13 */ + u32 fcse; /* FCSE PID */ + u32 cid; /* Context ID */ + u32 urwtpid; /* User read/write Thread and Process ID */ + u32 urotpid; /* User read-only Thread and Process ID */ + u32 potpid; /* Privileged only Thread and Process ID */ + /* CR15 */ + u32 mtlbar; +} __packed; + +union saved_context { + struct saved_context_cortex_a8 cortex_a8; + struct saved_context_cortex_a9 cortex_a9; +}; + +/* Used in hibernate_asm.S */ +#define USER_CONTEXT_SIZE (15 * sizeof(u32)) +unsigned long saved_context_r0[USER_CONTEXT_SIZE]; +unsigned long saved_cpsr; +unsigned long saved_context_r13_svc; +unsigned long saved_context_r14_svc; +unsigned long saved_spsr_svc; + +static union saved_context saved_context; + +/* + * pfn_is_nosave - check if given pfn is in the 'nosave' section + */ +int pfn_is_nosave(unsigned long pfn) +{ + unsigned long nosave_begin_pfn = __pa_symbol(&__nosave_begin) + >> PAGE_SHIFT; + unsigned long nosave_end_pfn = PAGE_ALIGN(__pa_symbol(&__nosave_end)) + >> PAGE_SHIFT; + + return (pfn >= nosave_begin_pfn) && (pfn < nosave_end_pfn); +} + +#define PART_NUM_CORTEX_A8 (0xC08) +#define PART_NUM_CORTEX_A9 (0xC09) + +static inline u32 arm_primary_part_number(void) +{ + u32 id; + + asm volatile ("mrc p15, 0, %0, c0, c0, 0" : "=r"(id)); + + /* Is this ARM? */ + if ((id & 0xff000000) != 0x41000000) + return UINT_MAX; + + id >>= 4; + id &= 0xfff; + return id; +} + +static inline void __save_processor_state_a8( + struct saved_context_cortex_a8 *ctxt) +{ + /* CR0 */ + asm volatile ("mrc p15, 2, %0, c0, c0, 0" : "=r"(ctxt->cssr)); + /* CR1 */ + asm volatile ("mrc p15, 0, %0, c1, c0, 0" : "=r"(ctxt->cr)); + asm volatile ("mrc p15, 0, %0, c1, c0, 2" : "=r"(ctxt->cacr)); + /* CR2 */ + asm volatile ("mrc p15, 0, %0, c2, c0, 0" : "=r"(ctxt->ttb_0r)); + asm volatile ("mrc p15, 0, %0, c2, c0, 1" : "=r"(ctxt->ttb_1r)); + asm volatile ("mrc p15, 0, %0, c2, c0, 2" : "=r"(ctxt->ttbcr)); + /* CR3 */ + asm volatile ("mrc p15, 0, %0, c3, c0, 0" : "=r"(ctxt->dacr)); + /* CR5 */ + asm volatile ("mrc p15, 0, %0, c5, c0, 0" : "=r"(ctxt->d_fsr)); + asm volatile ("mrc p15, 0, %0, c5, c0, 1" : "=r"(ctxt->i_fsr)); + asm volatile ("mrc p15, 0, %0, c5, c1, 0" : "=r"(ctxt->d_afsr)); + asm volatile ("mrc p15, 0, %0, c5, c1, 1" : "=r"(ctxt->i_afsr)); + /* CR6 */ + asm volatile ("mrc p15, 0, %0, c6, c0, 0" : "=r"(ctxt->d_far)); + asm volatile ("mrc p15, 0, %0, c6, c0, 2" : "=r"(ctxt->i_far)); + /* CR7 */ + asm volatile ("mrc p15, 0, %0, c7, c4, 0" : "=r"(ctxt->par)); + /* CR9 */ + asm volatile ("mrc p15, 0, %0, c9, c12, 0" : "=r"(ctxt->pmcontrolr)); + asm volatile ("mrc p15, 0, %0, c9, c12, 1" : "=r"(ctxt->cesr)); + asm volatile ("mrc p15, 0, %0, c9, c12, 2" : "=r"(ctxt->cecr)); + asm volatile ("mrc p15, 0, %0, c9, c12, 3" : "=r"(ctxt->ofsr)); + asm volatile ("mrc p15, 0, %0, c9, c12, 4" : "=r"(ctxt->sir)); + asm volatile ("mrc p15, 0, %0, c9, c12, 5" : "=r"(ctxt->pcsr)); + asm volatile ("mrc p15, 0, %0, c9, c13, 0" : "=r"(ctxt->ccr)); + asm volatile ("mrc p15, 0, %0, c9, c13, 1" : "=r"(ctxt->esr)); + asm volatile ("mrc p15, 0, %0, c9, c13, 2" : "=r"(ctxt->pmcountr)); + asm volatile ("mrc p15, 0, %0, c9, c14, 0" : "=r"(ctxt->uer)); + asm volatile ("mrc p15, 0, %0, c9, c14, 1" : "=r"(ctxt->iesr)); + asm volatile ("mrc p15, 0, %0, c9, c14, 2" : "=r"(ctxt->iecr)); + asm volatile ("mrc p15, 1, %0, c9, c0, 0" : "=r"(ctxt->l2clr)); + /* CR10 */ + asm volatile ("mrc p15, 0, %0, c10, c0, 0" : "=r"(ctxt->d_tlblr)); + asm volatile ("mrc p15, 0, %0, c10, c0, 1" : "=r"(ctxt->i_tlblr)); + asm volatile ("mrc p15, 0, %0, c10, c2, 0" : "=r"(ctxt->prrr)); + asm volatile ("mrc p15, 0, %0, c10, c2, 1" : "=r"(ctxt->nrrr)); + /* CR11 */ + asm volatile ("mrc p15, 0, %0, c11, c1, 0" : "=r"(ctxt->pleuar)); + asm volatile ("mrc p15, 0, %0, c11, c2, 0" : "=r"(ctxt->plecnr)); + asm volatile ("mrc p15, 0, %0, c11, c4, 0" : "=r"(ctxt->plecr)); + asm volatile ("mrc p15, 0, %0, c11, c5, 0" : "=r"(ctxt->pleisar)); + asm volatile ("mrc p15, 0, %0, c11, c7, 0" : "=r"(ctxt->pleiear)); + asm volatile ("mrc p15, 0, %0, c11, c15, 0" : "=r"(ctxt->plecidr)); + /* CR12 */ + asm volatile ("mrc p15, 0, %0, c12, c0, 0" : "=r"(ctxt->snsvbar)); + /* CR13 */ + asm volatile ("mrc p15, 0, %0, c13, c0, 0" : "=r"(ctxt->fcse)); + asm volatile ("mrc p15, 0, %0, c13, c0, 1" : "=r"(ctxt->cid)); + asm volatile ("mrc p15, 0, %0, c13, c0, 2" : "=r"(ctxt->urwtpid)); + asm volatile ("mrc p15, 0, %0, c13, c0, 3" : "=r"(ctxt->urotpid)); + asm volatile ("mrc p15, 0, %0, c13, c0, 4" : "=r"(ctxt->potpid)); +} + +static inline void __save_processor_state_a9( + struct saved_context_cortex_a9 *ctxt) +{ + /* CR0 */ + asm volatile ("mrc p15, 2, %0, c0, c0, 0" : "=r"(ctxt->cssr)); + /* CR1 */ + asm volatile ("mrc p15, 0, %0, c1, c0, 0" : "=r"(ctxt->cr)); + asm volatile ("mrc p15, 0, %0, c1, c0, 1" : "=r"(ctxt->actlr)); + asm volatile ("mrc p15, 0, %0, c1, c0, 2" : "=r"(ctxt->cacr)); +#ifndef CONFIG_ARM_TRUSTZONE + asm volatile ("mrc p15, 0, %0, c1, c1, 1" : "=r"(ctxt->sder)); + asm volatile ("mrc p15, 0, %0, c1, c1, 3" : "=r"(ctxt->vcr)); +#endif + /* CR2 */ + asm volatile ("mrc p15, 0, %0, c2, c0, 0" : "=r"(ctxt->ttb_0r)); + asm volatile ("mrc p15, 0, %0, c2, c0, 1" : "=r"(ctxt->ttb_1r)); + asm volatile ("mrc p15, 0, %0, c2, c0, 2" : "=r"(ctxt->ttbcr)); + /* CR3 */ + asm volatile ("mrc p15, 0, %0, c3, c0, 0" : "=r"(ctxt->dacr)); + /* CR5 */ + asm volatile ("mrc p15, 0, %0, c5, c0, 0" : "=r"(ctxt->d_fsr)); + asm volatile ("mrc p15, 0, %0, c5, c0, 1" : "=r"(ctxt->i_fsr)); + asm volatile ("mrc p15, 0, %0, c5, c1, 0" : "=r"(ctxt->d_afsr)); + asm volatile ("mrc p15, 0, %0, c5, c1, 1" : "=r"(ctxt->i_afsr)); + /* CR6 */ + asm volatile ("mrc p15, 0, %0, c6, c0, 0" : "=r"(ctxt->d_far)); + asm volatile ("mrc p15, 0, %0, c6, c0, 2" : "=r"(ctxt->i_far)); + /* CR7 */ + asm volatile ("mrc p15, 0, %0, c7, c4, 0" : "=r"(ctxt->par)); + /* CR9 */ + asm volatile ("mrc p15, 0, %0, c9, c12, 0" : "=r"(ctxt->pmcontrolr)); + asm volatile ("mrc p15, 0, %0, c9, c12, 1" : "=r"(ctxt->cesr)); + asm volatile ("mrc p15, 0, %0, c9, c12, 2" : "=r"(ctxt->cecr)); + asm volatile ("mrc p15, 0, %0, c9, c12, 3" : "=r"(ctxt->ofsr)); + asm volatile ("mrc p15, 0, %0, c9, c12, 5" : "=r"(ctxt->pcsr)); + asm volatile ("mrc p15, 0, %0, c9, c13, 0" : "=r"(ctxt->ccr)); + asm volatile ("mrc p15, 0, %0, c9, c13, 1" : "=r"(ctxt->esr)); + asm volatile ("mrc p15, 0, %0, c9, c13, 2" : "=r"(ctxt->pmcountr)); + asm volatile ("mrc p15, 0, %0, c9, c14, 0" : "=r"(ctxt->uer)); + asm volatile ("mrc p15, 0, %0, c9, c14, 1" : "=r"(ctxt->iesr)); + asm volatile ("mrc p15, 0, %0, c9, c14, 2" : "=r"(ctxt->iecr)); + /* CR10 */ + asm volatile ("mrc p15, 0, %0, c10, c0, 0" : "=r"(ctxt->d_tlblr)); + asm volatile ("mrc p15, 0, %0, c10, c2, 0" : "=r"(ctxt->prrr)); + asm volatile ("mrc p15, 0, %0, c10, c2, 1" : "=r"(ctxt->nrrr)); + /* CR11 */ + /* CR12 */ + asm volatile ("mrc p15, 0, %0, c12, c0, 0" : "=r"(ctxt->vbar)); +#ifndef CONFIG_ARM_TRUSTZONE + asm volatile ("mrc p15, 0, %0, c12, c0, 1" : "=r"(ctxt->mvbar)); + asm volatile ("mrc p15, 0, %0, c12, c1, 1" : "=r"(ctxt->vir)); +#endif + /* CR13 */ + asm volatile ("mrc p15, 0, %0, c13, c0, 0" : "=r"(ctxt->fcse)); + asm volatile ("mrc p15, 0, %0, c13, c0, 1" : "=r"(ctxt->cid)); + asm volatile ("mrc p15, 0, %0, c13, c0, 2" : "=r"(ctxt->urwtpid)); + asm volatile ("mrc p15, 0, %0, c13, c0, 3" : "=r"(ctxt->urotpid)); + asm volatile ("mrc p15, 0, %0, c13, c0, 4" : "=r"(ctxt->potpid)); + /* CR15*/ +#ifndef CONFIG_ARM_TRUSTZONE + asm volatile ("mrc p15, 5, %0, c15, c7, 2" : "=r"(ctxt->mtlbar)); +#endif +} + +static inline void __save_processor_state(union saved_context *ctxt) +{ + switch (arm_primary_part_number()) { + case PART_NUM_CORTEX_A8: + __save_processor_state_a8(&ctxt->cortex_a8); + break; + case PART_NUM_CORTEX_A9: + __save_processor_state_a9(&ctxt->cortex_a9); + break; + default: + WARN(true, "Hibernation is not supported for this processor.(%d)", + arm_primary_part_number()); + } +} + +static inline void __restore_processor_state_a8( + struct saved_context_cortex_a8 *ctxt) +{ + /* CR0 */ + asm volatile ("mcr p15, 2, %0, c0, c0, 0" : : "r"(ctxt->cssr)); + /* CR1 */ + asm volatile ("mcr p15, 0, %0, c1, c0, 0" : : "r"(ctxt->cr)); + asm volatile ("mcr p15, 0, %0, c1, c0, 2" : : "r"(ctxt->cacr)); + /* CR2 */ + asm volatile ("mcr p15, 0, %0, c2, c0, 0" : : "r"(ctxt->ttb_0r)); + asm volatile ("mcr p15, 0, %0, c2, c0, 1" : : "r"(ctxt->ttb_1r)); + asm volatile ("mcr p15, 0, %0, c2, c0, 2" : : "r"(ctxt->ttbcr)); + /* CR3 */ + asm volatile ("mcr p15, 0, %0, c3, c0, 0" : : "r"(ctxt->dacr)); + /* CR5 */ + asm volatile ("mcr p15, 0, %0, c5, c0, 0" : : "r"(ctxt->d_fsr)); + asm volatile ("mcr p15, 0, %0, c5, c0, 1" : : "r"(ctxt->i_fsr)); + asm volatile ("mcr p15, 0, %0, c5, c1, 0" : : "r"(ctxt->d_afsr)); + asm volatile ("mcr p15, 0, %0, c5, c1, 1" : : "r"(ctxt->i_afsr)); + /* CR6 */ + asm volatile ("mcr p15, 0, %0, c6, c0, 0" : : "r"(ctxt->d_far)); + asm volatile ("mcr p15, 0, %0, c6, c0, 2" : : "r"(ctxt->i_far)); + /* CR7 */ + asm volatile ("mcr p15, 0, %0, c7, c4, 0" : : "r"(ctxt->par)); + /* CR9 */ + asm volatile ("mcr p15, 0, %0, c9, c12, 0" : : "r"(ctxt->pmcontrolr)); + asm volatile ("mcr p15, 0, %0, c9, c12, 1" : : "r"(ctxt->cesr)); + asm volatile ("mcr p15, 0, %0, c9, c12, 2" : : "r"(ctxt->cecr)); + asm volatile ("mcr p15, 0, %0, c9, c12, 3" : : "r"(ctxt->ofsr)); + asm volatile ("mcr p15, 0, %0, c9, c12, 4" : : "r"(ctxt->sir)); + asm volatile ("mcr p15, 0, %0, c9, c12, 5" : : "r"(ctxt->pcsr)); + asm volatile ("mcr p15, 0, %0, c9, c13, 0" : : "r"(ctxt->ccr)); + asm volatile ("mcr p15, 0, %0, c9, c13, 1" : : "r"(ctxt->esr)); + asm volatile ("mcr p15, 0, %0, c9, c13, 2" : : "r"(ctxt->pmcountr)); + asm volatile ("mcr p15, 0, %0, c9, c14, 0" : : "r"(ctxt->uer)); + asm volatile ("mcr p15, 0, %0, c9, c14, 1" : : "r"(ctxt->iesr)); + asm volatile ("mcr p15, 0, %0, c9, c14, 2" : : "r"(ctxt->iecr)); + asm volatile ("mcr p15, 1, %0, c9, c0, 0" : : "r"(ctxt->l2clr)); + /* CR10 */ + asm volatile ("mcr p15, 0, %0, c10, c0, 0" : : "r"(ctxt->d_tlblr)); + asm volatile ("mcr p15, 0, %0, c10, c0, 1" : : "r"(ctxt->i_tlblr)); + asm volatile ("mcr p15, 0, %0, c10, c2, 0" : : "r"(ctxt->prrr)); + asm volatile ("mcr p15, 0, %0, c10, c2, 1" : : "r"(ctxt->nrrr)); + /* CR11 */ + asm volatile ("mcr p15, 0, %0, c11, c1, 0" : : "r"(ctxt->pleuar)); + asm volatile ("mcr p15, 0, %0, c11, c2, 0" : : "r"(ctxt->plecnr)); + asm volatile ("mcr p15, 0, %0, c11, c4, 0" : : "r"(ctxt->plecr)); + asm volatile ("mcr p15, 0, %0, c11, c5, 0" : : "r"(ctxt->pleisar)); + asm volatile ("mcr p15, 0, %0, c11, c7, 0" : : "r"(ctxt->pleiear)); + asm volatile ("mcr p15, 0, %0, c11, c15, 0" : : "r"(ctxt->plecidr)); + /* CR12 */ + asm volatile ("mcr p15, 0, %0, c12, c0, 0" : : "r"(ctxt->snsvbar)); + /* CR13 */ + asm volatile ("mcr p15, 0, %0, c13, c0, 0" : : "r"(ctxt->fcse)); + asm volatile ("mcr p15, 0, %0, c13, c0, 1" : : "r"(ctxt->cid)); + asm volatile ("mcr p15, 0, %0, c13, c0, 2" : : "r"(ctxt->urwtpid)); + asm volatile ("mcr p15, 0, %0, c13, c0, 3" : : "r"(ctxt->urotpid)); + asm volatile ("mcr p15, 0, %0, c13, c0, 4" : : "r"(ctxt->potpid)); +} + +static inline void __restore_processor_state_a9( + struct saved_context_cortex_a9 *ctxt) +{ + /* CR0 */ + asm volatile ("mcr p15, 2, %0, c0, c0, 0" : : "r"(ctxt->cssr)); + /* CR1 */ + asm volatile ("mcr p15, 0, %0, c1, c0, 0" : : "r"(ctxt->cr)); + asm volatile ("mcr p15, 0, %0, c1, c0, 1" : : "r"(ctxt->actlr)); + asm volatile ("mcr p15, 0, %0, c1, c0, 2" : : "r"(ctxt->cacr)); +#ifndef CONFIG_ARM_TRUSTZONE + asm volatile ("mcr p15, 0, %0, c1, c1, 1" : : "r"(ctxt->sder)); + asm volatile ("mcr p15, 0, %0, c1, c1, 3" : : "r"(ctxt->vcr)); +#endif + /* CR2 */ + asm volatile ("mcr p15, 0, %0, c2, c0, 0" : : "r"(ctxt->ttb_0r)); + asm volatile ("mcr p15, 0, %0, c2, c0, 1" : : "r"(ctxt->ttb_1r)); + asm volatile ("mcr p15, 0, %0, c2, c0, 2" : : "r"(ctxt->ttbcr)); + /* CR3 */ + asm volatile ("mcr p15, 0, %0, c3, c0, 0" : : "r"(ctxt->dacr)); + /* CR5 */ + asm volatile ("mcr p15, 0, %0, c5, c0, 0" : : "r"(ctxt->d_fsr)); + asm volatile ("mcr p15, 0, %0, c5, c0, 1" : : "r"(ctxt->i_fsr)); + asm volatile ("mcr p15, 0, %0, c5, c1, 0" : : "r"(ctxt->d_afsr)); + asm volatile ("mcr p15, 0, %0, c5, c1, 1" : : "r"(ctxt->i_afsr)); + /* CR6 */ + asm volatile ("mcr p15, 0, %0, c6, c0, 0" : : "r"(ctxt->d_far)); + asm volatile ("mcr p15, 0, %0, c6, c0, 2" : : "r"(ctxt->i_far)); + /* CR7 */ + asm volatile ("mcr p15, 0, %0, c7, c4, 0" : : "r"(ctxt->par)); + /* CR9 */ + asm volatile ("mcr p15, 0, %0, c9, c12, 0" : : "r"(ctxt->pmcontrolr)); + asm volatile ("mcr p15, 0, %0, c9, c12, 1" : : "r"(ctxt->cesr)); + asm volatile ("mcr p15, 0, %0, c9, c12, 2" : : "r"(ctxt->cecr)); + asm volatile ("mcr p15, 0, %0, c9, c12, 3" : : "r"(ctxt->ofsr)); + asm volatile ("mcr p15, 0, %0, c9, c12, 5" : : "r"(ctxt->pcsr)); + asm volatile ("mcr p15, 0, %0, c9, c13, 0" : : "r"(ctxt->ccr)); + asm volatile ("mcr p15, 0, %0, c9, c13, 1" : : "r"(ctxt->esr)); + asm volatile ("mcr p15, 0, %0, c9, c13, 2" : : "r"(ctxt->pmcountr)); + asm volatile ("mcr p15, 0, %0, c9, c14, 0" : : "r"(ctxt->uer)); + asm volatile ("mcr p15, 0, %0, c9, c14, 1" : : "r"(ctxt->iesr)); + asm volatile ("mcr p15, 0, %0, c9, c14, 2" : : "r"(ctxt->iecr)); + /* CR10 */ + asm volatile ("mcr p15, 0, %0, c10, c0, 0" : : "r"(ctxt->d_tlblr)); + asm volatile ("mcr p15, 0, %0, c10, c2, 0" : : "r"(ctxt->prrr)); + asm volatile ("mcr p15, 0, %0, c10, c2, 1" : : "r"(ctxt->nrrr)); + /* CR11 */ + /* CR12 */ + asm volatile ("mcr p15, 0, %0, c12, c0, 0" : : "r"(ctxt->vbar)); +#ifndef CONFIG_ARM_TRUSTZONE + asm volatile ("mcr p15, 0, %0, c12, c0, 1" : : "r"(ctxt->mvbar)); + asm volatile ("mcr p15, 0, %0, c12, c1, 1" : : "r"(ctxt->vir)); +#endif + /* CR13 */ + asm volatile ("mcr p15, 0, %0, c13, c0, 0" : : "r"(ctxt->fcse)); + asm volatile ("mcr p15, 0, %0, c13, c0, 1" : : "r"(ctxt->cid)); + asm volatile ("mcr p15, 0, %0, c13, c0, 2" : : "r"(ctxt->urwtpid)); + asm volatile ("mcr p15, 0, %0, c13, c0, 3" : : "r"(ctxt->urotpid)); + asm volatile ("mcr p15, 0, %0, c13, c0, 4" : : "r"(ctxt->potpid)); + /* CR15 */ +#ifndef CONFIG_ARM_TRUSTZONE + asm volatile ("mcr p15, 5, %0, c15, c7, 2" : : "r"(ctxt->mtlbar)); +#endif +} + +static inline void __restore_processor_state(union saved_context *ctxt) +{ + switch (arm_primary_part_number()) { + case PART_NUM_CORTEX_A8: + __restore_processor_state_a8(&ctxt->cortex_a8); + break; + case PART_NUM_CORTEX_A9: + __restore_processor_state_a9(&ctxt->cortex_a9); + break; + default: + WARN(true, "Hibernation is not supported for this processor.(%d)", + arm_primary_part_number()); + } +} + +void save_processor_state(void) +{ + preempt_disable(); + __save_processor_state(&saved_context); +} + +void restore_processor_state(void) +{ + __restore_processor_state(&saved_context); + preempt_enable(); +} diff --git a/arch/arm/kernel/hibernate_asm.S b/arch/arm/kernel/hibernate_asm.S new file mode 100644 index 0000000..9538789 --- /dev/null +++ b/arch/arm/kernel/hibernate_asm.S @@ -0,0 +1,139 @@ +/* + * Hibernation support specific for ARM + * + * Copyright (C) 2010 Nokia Corporation + * Copyright (C) 2010 Texas Instruments, Inc. + * Copyright (C) 2006 Rafael J. Wysocki <rjw@sisk.pl> + * + * Contact: Hiroshi DOYU <Hiroshi.DOYU@nokia.com> + * + * License terms: GNU General Public License (GPL) version 2 + */ + +#include <linux/linkage.h> + .text +ENTRY(swsusp_arch_suspend) + /* + * Save current program status register + */ + ldr r3, .Lsaved_cpsr + mrs r0, cpsr + str r0, [r3] + + /* + * Change to system(user) mode + */ + mov r1, r0 + orr r1, r1, #0x1f + msr cpsr_c, r1 + + /* + * Save User context + */ + ldr r3, .Lsaved_context_r0 + stmia r3, {r0-r14} + + /* + * Go back to original SVC mode + */ + msr cpsr_c, r0 + + /* + * Save SVC context + */ + ldr r3, .Lsaved_context_r13_svc + stmia r3, {r13-r14} + ldr r3, .Lsaved_spsr_svc + mrs r1, spsr + str r1, [r3] + + bl swsusp_save + + /* + * Restore return address + */ + ldr r3, .Lsaved_context_r14_svc + ldr lr, [r3] + mov pc, lr +ENDPROC(swsusp_arch_suspend) + +ENTRY(swsusp_arch_resume) + /* + * Restore_pblist is the starting point for loaded pages + */ + ldr r0, .Lrestore_pblist + ldr r6, [r0] + +.Lcopy_loop: + ldr r4, [r6] /* src IOW present address */ + ldr r5, [r6, #4] /* dst IOW original address*/ + + /* No. of entries in one page, where each entry is 4 bytes */ + mov r9, #1024 + +.Lcopy_one_page: + /* + * This loop could be optimized by using stm and ldm. + */ + ldr r8, [r4], #4 + str r8, [r5], #4 + subs r9, r9, #1 + bne .Lcopy_one_page + + /* + * The last field of struct pbe is a pointer to the next pbe structure + */ + ldr r6, [r6, #8] + cmp r6, #0 + bne .Lcopy_loop + + /* + * Restore SVC context + */ + ldr r3, .Lsaved_context_r13_svc + ldmia r3, {r13-r14} + ldr r3, .Lsaved_spsr_svc + ldr r1, [r3] + msr spsr_cxsf, r1 + + mrs r0, cpsr /* Save current mode into r0 */ + + /* + * Change to system(user) mode + */ + mov r1, r0 + orr r1, r1, #0x1f + msr cpsr_c, r1 + + /* + * Restore User context + */ + ldr r3, .Lsaved_context_r0 + ldmia r3, {r0-r14} + ldr r3, .Lsaved_cpsr + ldr r1, [r3] + msr cpsr_cxsf, r1 + + msr cpsr_c, r0 /* Restore original mode from r0 */ + + /* + * Flush TLB (Invalidate unified TLB unlocked entries) + */ + mov r1, #0 + mcr p15, 0, r1, c8, c7, 0 + + /* Set the return value */ + mov r0, #0 + + /* Restore return address */ + ldr r3, .Lsaved_context_r14_svc + ldr lr, [r3] + mov pc, lr +ENDPROC(swsusp_arch_resume) + .align 4 +.Lsaved_context_r0: .long saved_context_r0 +.Lsaved_cpsr: .long saved_cpsr +.Lsaved_context_r13_svc: .long saved_context_r13_svc +.Lsaved_context_r14_svc: .long saved_context_r14_svc +.Lsaved_spsr_svc: .long saved_spsr_svc +.Lrestore_pblist: .long restore_pblist diff --git a/arch/arm/kernel/hw_breakpoint.c b/arch/arm/kernel/hw_breakpoint.c index 2bc1a8e..87acc25 100644 --- a/arch/arm/kernel/hw_breakpoint.c +++ b/arch/arm/kernel/hw_breakpoint.c @@ -45,6 +45,7 @@ static DEFINE_PER_CPU(struct perf_event *, wp_on_reg[ARM_MAX_WRP]); /* Number of BRP/WRP registers on this CPU. */ static int core_num_brps; +static int core_num_reserved_brps; static int core_num_wrps; /* Debug architecture version. */ @@ -136,11 +137,10 @@ static u8 get_debug_arch(void) u32 didr; /* Do we implement the extended CPUID interface? */ - if (((read_cpuid_id() >> 16) & 0xf) != 0xf) { - pr_warning("CPUID feature registers not supported. " - "Assuming v6 debug is present.\n"); + if (WARN_ONCE((((read_cpuid_id() >> 16) & 0xf) != 0xf), + "CPUID feature registers not supported. " + "Assuming v6 debug is present.\n")) return ARM_DEBUG_ARCH_V6; - } ARM_DBG_READ(c0, 0, didr); return (didr >> 16) & 0xf; @@ -154,27 +154,10 @@ u8 arch_get_debug_arch(void) static int debug_arch_supported(void) { u8 arch = get_debug_arch(); - - /* We don't support the memory-mapped interface. */ - return (arch >= ARM_DEBUG_ARCH_V6 && arch <= ARM_DEBUG_ARCH_V7_ECP14) || - arch >= ARM_DEBUG_ARCH_V7_1; -} - -/* Can we determine the watchpoint access type from the fsr? */ -static int debug_exception_updates_fsr(void) -{ - return 0; + return arch >= ARM_DEBUG_ARCH_V6 && arch <= ARM_DEBUG_ARCH_V7_ECP14; } -/* Determine number of WRP registers available. */ -static int get_num_wrp_resources(void) -{ - u32 didr; - ARM_DBG_READ(c0, 0, didr); - return ((didr >> 28) & 0xf) + 1; -} - -/* Determine number of BRP registers available. */ +/* Determine number of BRP register available. */ static int get_num_brp_resources(void) { u32 didr; @@ -193,10 +176,9 @@ static int core_has_mismatch_brps(void) static int get_num_wrps(void) { /* - * On debug architectures prior to 7.1, when a watchpoint fires, the - * only way to work out which watchpoint it was is by disassembling - * the faulting instruction and working out the address of the memory - * access. + * FIXME: When a watchpoint fires, the only way to work out which + * watchpoint it was is by disassembling the faulting instruction + * and working out the address of the memory access. * * Furthermore, we can only do this if the watchpoint was precise * since imprecise watchpoints prevent us from calculating register @@ -210,17 +192,36 @@ static int get_num_wrps(void) * [the ARM ARM states that the DFAR is UNKNOWN, but experience shows * that it is set on some implementations]. */ - if (get_debug_arch() < ARM_DEBUG_ARCH_V7_1) - return 1; - return get_num_wrp_resources(); +#if 0 + int wrps; + u32 didr; + ARM_DBG_READ(c0, 0, didr); + wrps = ((didr >> 28) & 0xf) + 1; +#endif + int wrps = 1; + + if (core_has_mismatch_brps() && wrps >= get_num_brp_resources()) + wrps = get_num_brp_resources() - 1; + + return wrps; +} + +/* We reserve one breakpoint for each watchpoint. */ +static int get_num_reserved_brps(void) +{ + if (core_has_mismatch_brps()) + return get_num_wrps(); + return 0; } /* Determine number of usable BRPs available. */ static int get_num_brps(void) { int brps = get_num_brp_resources(); - return core_has_mismatch_brps() ? brps - 1 : brps; + if (core_has_mismatch_brps()) + brps -= get_num_reserved_brps(); + return brps; } /* @@ -238,7 +239,7 @@ static int enable_monitor_mode(void) /* Ensure that halting mode is disabled. */ if (WARN_ONCE(dscr & ARM_DSCR_HDBGEN, - "halting debug mode enabled. Unable to access hardware resources.\n")) { + "halting debug mode enabled. Unable to access hardware resources.\n")) { ret = -EPERM; goto out; } @@ -254,7 +255,6 @@ static int enable_monitor_mode(void) ARM_DBG_WRITE(c1, 0, (dscr | ARM_DSCR_MDBGEN)); break; case ARM_DEBUG_ARCH_V7_ECP14: - case ARM_DEBUG_ARCH_V7_1: ARM_DBG_WRITE(c2, 2, (dscr | ARM_DSCR_MDBGEN)); break; default: @@ -346,10 +346,24 @@ int arch_install_hw_breakpoint(struct perf_event *bp) val_base = ARM_BASE_BVR; slots = (struct perf_event **)__get_cpu_var(bp_on_reg); max_slots = core_num_brps; + if (info->step_ctrl.enabled) { + /* Override the breakpoint data with the step data. */ + addr = info->trigger & ~0x3; + ctrl = encode_ctrl_reg(info->step_ctrl); + } } else { /* Watchpoint */ - ctrl_base = ARM_BASE_WCR; - val_base = ARM_BASE_WVR; + if (info->step_ctrl.enabled) { + /* Install into the reserved breakpoint region. */ + ctrl_base = ARM_BASE_BCR + core_num_brps; + val_base = ARM_BASE_BVR + core_num_brps; + /* Override the watchpoint data with the step data. */ + addr = info->trigger & ~0x3; + ctrl = encode_ctrl_reg(info->step_ctrl); + } else { + ctrl_base = ARM_BASE_WCR; + val_base = ARM_BASE_WVR; + } slots = (struct perf_event **)__get_cpu_var(wp_on_reg); max_slots = core_num_wrps; } @@ -368,17 +382,6 @@ int arch_install_hw_breakpoint(struct perf_event *bp) goto out; } - /* Override the breakpoint data with the step data. */ - if (info->step_ctrl.enabled) { - addr = info->trigger & ~0x3; - ctrl = encode_ctrl_reg(info->step_ctrl); - if (info->ctrl.type != ARM_BREAKPOINT_EXECUTE) { - i = 0; - ctrl_base = ARM_BASE_BCR + core_num_brps; - val_base = ARM_BASE_BVR + core_num_brps; - } - } - /* Setup the address register. */ write_wb_reg(val_base + i, addr); @@ -402,7 +405,10 @@ void arch_uninstall_hw_breakpoint(struct perf_event *bp) max_slots = core_num_brps; } else { /* Watchpoint */ - base = ARM_BASE_WCR; + if (info->step_ctrl.enabled) + base = ARM_BASE_BCR + core_num_brps; + else + base = ARM_BASE_WCR; slots = (struct perf_event **)__get_cpu_var(wp_on_reg); max_slots = core_num_wrps; } @@ -420,13 +426,6 @@ void arch_uninstall_hw_breakpoint(struct perf_event *bp) if (WARN_ONCE(i == max_slots, "Can't find any breakpoint slot\n")) return; - /* Ensure that we disable the mismatch breakpoint. */ - if (info->ctrl.type != ARM_BREAKPOINT_EXECUTE && - info->step_ctrl.enabled) { - i = 0; - base = ARM_BASE_BCR + core_num_brps; - } - /* Reset the control register. */ write_wb_reg(base + i, 0); } @@ -626,35 +625,19 @@ int arch_validate_hwbkpt_settings(struct perf_event *bp) info->address &= ~alignment_mask; info->ctrl.len <<= offset; - if (!bp->overflow_handler) { - /* - * Mismatch breakpoints are required for single-stepping - * breakpoints. - */ - if (!core_has_mismatch_brps()) - return -EINVAL; - - /* We don't allow mismatch breakpoints in kernel space. */ - if (arch_check_bp_in_kernelspace(bp)) - return -EPERM; - - /* - * Per-cpu breakpoints are not supported by our stepping - * mechanism. - */ - if (!bp->hw.bp_target) - return -EINVAL; - - /* - * We only support specific access types if the fsr - * reports them. - */ - if (!debug_exception_updates_fsr() && - (info->ctrl.type == ARM_BREAKPOINT_LOAD || - info->ctrl.type == ARM_BREAKPOINT_STORE)) - return -EINVAL; + /* + * Currently we rely on an overflow handler to take + * care of single-stepping the breakpoint when it fires. + * In the case of userspace breakpoints on a core with V7 debug, + * we can use the mismatch feature as a poor-man's hardware + * single-step, but this only works for per-task breakpoints. + */ + if (WARN_ONCE(!bp->overflow_handler && + (arch_check_bp_in_kernelspace(bp) || !core_has_mismatch_brps() + || !bp->hw.bp_target), + "overflow handler required but none found\n")) { + ret = -EINVAL; } - out: return ret; } @@ -683,64 +666,34 @@ static void disable_single_step(struct perf_event *bp) arch_install_hw_breakpoint(bp); } -static void watchpoint_handler(unsigned long addr, unsigned int fsr, - struct pt_regs *regs) +static void watchpoint_handler(unsigned long unknown, struct pt_regs *regs) { - int i, access; - u32 val, ctrl_reg, alignment_mask; + int i; struct perf_event *wp, **slots; struct arch_hw_breakpoint *info; - struct arch_hw_breakpoint_ctrl ctrl; slots = (struct perf_event **)__get_cpu_var(wp_on_reg); + /* Without a disassembler, we can only handle 1 watchpoint. */ + BUG_ON(core_num_wrps > 1); + for (i = 0; i < core_num_wrps; ++i) { rcu_read_lock(); wp = slots[i]; - if (wp == NULL) - goto unlock; + if (wp == NULL) { + rcu_read_unlock(); + continue; + } - info = counter_arch_bp(wp); /* - * The DFAR is an unknown value on debug architectures prior - * to 7.1. Since we only allow a single watchpoint on these - * older CPUs, we can set the trigger to the lowest possible - * faulting address. + * The DFAR is an unknown value. Since we only allow a + * single watchpoint, we can set the trigger to the lowest + * possible faulting address. */ - if (debug_arch < ARM_DEBUG_ARCH_V7_1) { - BUG_ON(i > 0); - info->trigger = wp->attr.bp_addr; - } else { - if (info->ctrl.len == ARM_BREAKPOINT_LEN_8) - alignment_mask = 0x7; - else - alignment_mask = 0x3; - - /* Check if the watchpoint value matches. */ - val = read_wb_reg(ARM_BASE_WVR + i); - if (val != (addr & ~alignment_mask)) - goto unlock; - - /* Possible match, check the byte address select. */ - ctrl_reg = read_wb_reg(ARM_BASE_WCR + i); - decode_ctrl_reg(ctrl_reg, &ctrl); - if (!((1 << (addr & alignment_mask)) & ctrl.len)) - goto unlock; - - /* Check that the access type matches. */ - if (debug_exception_updates_fsr()) { - access = (fsr & ARM_FSR_ACCESS_MASK) ? - HW_BREAKPOINT_W : HW_BREAKPOINT_R; - if (!(access & hw_breakpoint_type(wp))) - goto unlock; - } - - /* We have a winner. */ - info->trigger = addr; - } - + info = counter_arch_bp(wp); + info->trigger = wp->attr.bp_addr; pr_debug("watchpoint fired: address = 0x%x\n", info->trigger); perf_bp_event(wp, regs); @@ -752,7 +705,6 @@ static void watchpoint_handler(unsigned long addr, unsigned int fsr, if (!wp->overflow_handler) enable_single_step(wp, instruction_pointer(regs)); -unlock: rcu_read_unlock(); } } @@ -765,7 +717,7 @@ static void watchpoint_single_step_handler(unsigned long pc) slots = (struct perf_event **)__get_cpu_var(wp_on_reg); - for (i = 0; i < core_num_wrps; ++i) { + for (i = 0; i < core_num_reserved_brps; ++i) { rcu_read_lock(); wp = slots[i]; @@ -844,7 +796,7 @@ unlock: /* * Called from either the Data Abort Handler [watchpoint] or the - * Prefetch Abort Handler [breakpoint] with interrupts disabled. + * Prefetch Abort Handler [breakpoint] with preemption disabled. */ static int hw_breakpoint_pending(unsigned long addr, unsigned int fsr, struct pt_regs *regs) @@ -852,10 +804,8 @@ static int hw_breakpoint_pending(unsigned long addr, unsigned int fsr, int ret = 0; u32 dscr; - preempt_disable(); - - if (interrupts_enabled(regs)) - local_irq_enable(); + /* We must be called with preemption disabled. */ + WARN_ON(preemptible()); /* We only handle watchpoints and hardware breakpoints. */ ARM_DBG_READ(c1, 0, dscr); @@ -868,12 +818,16 @@ static int hw_breakpoint_pending(unsigned long addr, unsigned int fsr, case ARM_ENTRY_ASYNC_WATCHPOINT: WARN(1, "Asynchronous watchpoint exception taken. Debugging results may be unreliable\n"); case ARM_ENTRY_SYNC_WATCHPOINT: - watchpoint_handler(addr, fsr, regs); + watchpoint_handler(addr, regs); break; default: ret = 1; /* Unhandled fault. */ } + /* + * Re-enable preemption after it was disabled in the + * low-level exception handling code. + */ preempt_enable(); return ret; @@ -882,31 +836,11 @@ static int hw_breakpoint_pending(unsigned long addr, unsigned int fsr, /* * One-time initialisation. */ -static cpumask_t debug_err_mask; - -static int debug_reg_trap(struct pt_regs *regs, unsigned int instr) -{ - int cpu = smp_processor_id(); - - pr_warning("Debug register access (0x%x) caused undefined instruction on CPU %d\n", - instr, cpu); - - /* Set the error flag for this CPU and skip the faulting instruction. */ - cpumask_set_cpu(cpu, &debug_err_mask); - instruction_pointer(regs) += 4; - return 0; -} - -static struct undef_hook debug_reg_hook = { - .instr_mask = 0x0fe80f10, - .instr_val = 0x0e000e10, - .fn = debug_reg_trap, -}; - -static void reset_ctrl_regs(void *unused) +static void reset_ctrl_regs(void *info) { - int i, raw_num_brps, err = 0, cpu = smp_processor_id(); + int i, cpu = smp_processor_id(); u32 dbg_power; + cpumask_t *cpumask = info; /* * v7 debug contains save and restore registers so that debug state @@ -916,57 +850,38 @@ static void reset_ctrl_regs(void *unused) * Access Register to avoid taking undefined instruction exceptions * later on. */ - switch (debug_arch) { - case ARM_DEBUG_ARCH_V6: - case ARM_DEBUG_ARCH_V6_1: - /* ARMv6 cores just need to reset the registers. */ - goto reset_regs; - case ARM_DEBUG_ARCH_V7_ECP14: + if (debug_arch >= ARM_DEBUG_ARCH_V7_ECP14) { /* * Ensure sticky power-down is clear (i.e. debug logic is * powered up). */ asm volatile("mrc p14, 0, %0, c1, c5, 4" : "=r" (dbg_power)); - if ((dbg_power & 0x1) == 0) - err = -EPERM; - break; - case ARM_DEBUG_ARCH_V7_1: + if ((dbg_power & 0x1) == 0) { + pr_warning("CPU %d debug is powered down!\n", cpu); + cpumask_or(cpumask, cpumask, cpumask_of(cpu)); + return; + } + /* - * Ensure the OS double lock is clear. + * Unconditionally clear the lock by writing a value + * other than 0xC5ACCE55 to the access register. */ - asm volatile("mrc p14, 0, %0, c1, c3, 4" : "=r" (dbg_power)); - if ((dbg_power & 0x1) == 1) - err = -EPERM; - break; - } + asm volatile("mcr p14, 0, %0, c1, c0, 4" : : "r" (0)); + isb(); - if (err) { - pr_warning("CPU %d debug is powered down!\n", cpu); - cpumask_or(&debug_err_mask, &debug_err_mask, cpumask_of(cpu)); - return; + /* + * Clear any configured vector-catch events before + * enabling monitor mode. + */ + asm volatile("mcr p14, 0, %0, c0, c7, 0" : : "r" (0)); + isb(); } - /* - * Unconditionally clear the lock by writing a value - * other than 0xC5ACCE55 to the access register. - */ - asm volatile("mcr p14, 0, %0, c1, c0, 4" : : "r" (0)); - isb(); - - /* - * Clear any configured vector-catch events before - * enabling monitor mode. - */ - asm volatile("mcr p14, 0, %0, c0, c7, 0" : : "r" (0)); - isb(); - -reset_regs: if (enable_monitor_mode()) return; /* We must also reset any reserved registers. */ - raw_num_brps = get_num_brp_resources(); - for (i = 0; i < raw_num_brps; ++i) { + for (i = 0; i < core_num_brps + core_num_reserved_brps; ++i) { write_wb_reg(ARM_BASE_BCR + i, 0UL); write_wb_reg(ARM_BASE_BVR + i, 0UL); } @@ -982,7 +897,6 @@ static int __cpuinit dbg_reset_notify(struct notifier_block *self, { if (action == CPU_ONLINE) smp_call_function_single((int)cpu, reset_ctrl_regs, NULL, 1); - return NOTIFY_OK; } @@ -993,6 +907,7 @@ static struct notifier_block __cpuinitdata dbg_reset_nb = { static int __init arch_hw_breakpoint_init(void) { u32 dscr; + cpumask_t cpumask = { CPU_BITS_NONE }; debug_arch = get_debug_arch(); @@ -1003,31 +918,28 @@ static int __init arch_hw_breakpoint_init(void) /* Determine how many BRPs/WRPs are available. */ core_num_brps = get_num_brps(); + core_num_reserved_brps = get_num_reserved_brps(); core_num_wrps = get_num_wrps(); - /* - * We need to tread carefully here because DBGSWENABLE may be - * driven low on this core and there isn't an architected way to - * determine that. - */ - register_undef_hook(&debug_reg_hook); + pr_info("found %d breakpoint and %d watchpoint registers.\n", + core_num_brps + core_num_reserved_brps, core_num_wrps); + + if (core_num_reserved_brps) + pr_info("%d breakpoint(s) reserved for watchpoint " + "single-step.\n", core_num_reserved_brps); /* * Reset the breakpoint resources. We assume that a halting * debugger will leave the world in a nice state for us. */ - on_each_cpu(reset_ctrl_regs, NULL, 1); - unregister_undef_hook(&debug_reg_hook); - if (!cpumask_empty(&debug_err_mask)) { + on_each_cpu(reset_ctrl_regs, &cpumask, 1); + if (!cpumask_empty(&cpumask)) { core_num_brps = 0; + core_num_reserved_brps = 0; core_num_wrps = 0; return 0; } - pr_info("found %d " "%s" "breakpoint and %d watchpoint registers.\n", - core_num_brps, core_has_mismatch_brps() ? "(+1 reserved) " : - "", core_num_wrps); - ARM_DBG_READ(c1, 0, dscr); if (dscr & ARM_DSCR_HDBGEN) { max_watchpoint_len = 4; diff --git a/arch/arm/kernel/io.c b/arch/arm/kernel/io.c index dcd5b4d..f447030 100644 --- a/arch/arm/kernel/io.c +++ b/arch/arm/kernel/io.c @@ -1,4 +1,4 @@ -#include <linux/export.h> +#include <linux/module.h> #include <linux/types.h> #include <linux/io.h> diff --git a/arch/arm/kernel/irq.c b/arch/arm/kernel/irq.c index 87c8be5..83bbad0 100644 --- a/arch/arm/kernel/irq.c +++ b/arch/arm/kernel/irq.c @@ -22,6 +22,7 @@ * Naturally it's not a 1:1 relation, but there are similarities. */ #include <linux/kernel_stat.h> +#include <linux/module.h> #include <linux/signal.h> #include <linux/ioport.h> #include <linux/interrupt.h> @@ -34,8 +35,8 @@ #include <linux/list.h> #include <linux/kallsyms.h> #include <linux/proc_fs.h> +#include <linux/ftrace.h> -#include <asm/exception.h> #include <asm/system.h> #include <asm/mach/arch.h> #include <asm/mach/irq.h> @@ -58,17 +59,20 @@ int arch_show_interrupts(struct seq_file *p, int prec) #ifdef CONFIG_SMP show_ipi_list(p, prec); #endif +#ifdef CONFIG_LOCAL_TIMERS + show_local_irqs(p, prec); +#endif seq_printf(p, "%*s: %10lu\n", prec, "Err", irq_err_count); return 0; } /* - * handle_IRQ handles all hardware IRQ's. Decoded IRQs should - * not come via this function. Instead, they should provide their - * own 'handler'. Used by platform code implementing C-based 1st - * level decoding. + * do_IRQ handles all hardware IRQ's. Decoded IRQs should not + * come via this function. Instead, they should provide their + * own 'handler' */ -void handle_IRQ(unsigned int irq, struct pt_regs *regs) +asmlinkage void __exception_irq_entry +asm_do_IRQ(unsigned int irq, struct pt_regs *regs) { struct pt_regs *old_regs = set_irq_regs(regs); @@ -93,15 +97,6 @@ void handle_IRQ(unsigned int irq, struct pt_regs *regs) set_irq_regs(old_regs); } -/* - * asm_do_IRQ is the interface to be used from assembly code. - */ -asmlinkage void __exception_irq_entry -asm_do_IRQ(unsigned int irq, struct pt_regs *regs) -{ - handle_IRQ(irq, regs); -} - void set_irq_flags(unsigned int irq, unsigned int iflags) { unsigned long clr = 0, set = IRQ_NOREQUEST | IRQ_NOPROBE | IRQ_NOAUTOEN; @@ -136,63 +131,54 @@ int __init arch_probe_nr_irqs(void) #ifdef CONFIG_HOTPLUG_CPU -static bool migrate_one_irq(struct irq_desc *desc) +static bool migrate_one_irq(struct irq_data *d) { - struct irq_data *d = irq_desc_get_irq_data(desc); - const struct cpumask *affinity = d->affinity; - struct irq_chip *c; + unsigned int cpu = cpumask_any_and(d->affinity, cpu_online_mask); bool ret = false; - /* - * If this is a per-CPU interrupt, or the affinity does not - * include this CPU, then we have nothing to do. - */ - if (irqd_is_per_cpu(d) || !cpumask_test_cpu(smp_processor_id(), affinity)) - return false; - - if (cpumask_any_and(affinity, cpu_online_mask) >= nr_cpu_ids) { - affinity = cpu_online_mask; + if (cpu >= nr_cpu_ids) { + cpu = cpumask_any(cpu_online_mask); ret = true; } - c = irq_data_get_irq_chip(d); - if (!c->irq_set_affinity) - pr_debug("IRQ%u: unable to set affinity\n", d->irq); - else if (c->irq_set_affinity(d, affinity, true) == IRQ_SET_MASK_OK && ret) - cpumask_copy(d->affinity, affinity); + pr_debug("IRQ%u: moving from cpu%u to cpu%u\n", d->irq, d->node, cpu); + + d->chip->irq_set_affinity(d, cpumask_of(cpu), true); return ret; } /* - * The current CPU has been marked offline. Migrate IRQs off this CPU. - * If the affinity settings do not allow other CPUs, force them onto any + * The CPU has been marked offline. Migrate IRQs off this CPU. If + * the affinity settings do not allow other CPUs, force them onto any * available CPU. - * - * Note: we must iterate over all IRQs, whether they have an attached - * action structure or not, as we need to get chained interrupts too. */ void migrate_irqs(void) { - unsigned int i; + unsigned int i, cpu = smp_processor_id(); struct irq_desc *desc; unsigned long flags; local_irq_save(flags); for_each_irq_desc(i, desc) { + struct irq_data *d = &desc->irq_data; bool affinity_broken = false; - if (!desc) - continue; - raw_spin_lock(&desc->lock); - affinity_broken = migrate_one_irq(desc); + do { + if (desc->action == NULL) + break; + + if (d->node != cpu) + break; + + affinity_broken = migrate_one_irq(d); + } while (0); raw_spin_unlock(&desc->lock); if (affinity_broken && printk_ratelimit()) - pr_warning("IRQ%u no longer affine to CPU%u\n", i, - smp_processor_id()); + pr_warning("IRQ%u no longer affine to CPU%u\n", i, cpu); } local_irq_restore(flags); diff --git a/arch/arm/kernel/iwmmxt.S b/arch/arm/kernel/iwmmxt.S index a087838..7fa3bb0 100644 --- a/arch/arm/kernel/iwmmxt.S +++ b/arch/arm/kernel/iwmmxt.S @@ -195,10 +195,10 @@ ENTRY(iwmmxt_task_disable) @ enable access to CP0 and CP1 XSC(mrc p15, 0, r4, c15, c1, 0) - XSC(orr r4, r4, #0x3) + XSC(orr r4, r4, #0xf) XSC(mcr p15, 0, r4, c15, c1, 0) PJ4(mrc p15, 0, r4, c1, c0, 2) - PJ4(orr r4, r4, #0xf) + PJ4(orr r4, r4, #0x3) PJ4(mcr p15, 0, r4, c1, c0, 2) mov r0, #0 @ nothing to load @@ -313,7 +313,7 @@ ENTRY(iwmmxt_task_switch) teq r2, r3 @ next task owns it? movne pc, lr @ no: leave Concan disabled -1: @ flip Concan access +1: @ flip Conan access XSC(eor r1, r1, #0x3) XSC(mcr p15, 0, r1, c15, c1, 0) PJ4(eor r1, r1, #0xf) diff --git a/arch/arm/kernel/kprobes-decode.c b/arch/arm/kernel/kprobes-decode.c new file mode 100644 index 0000000..15eeff6 --- /dev/null +++ b/arch/arm/kernel/kprobes-decode.c @@ -0,0 +1,1670 @@ +/* + * arch/arm/kernel/kprobes-decode.c + * + * Copyright (C) 2006, 2007 Motorola Inc. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + */ + +/* + * We do not have hardware single-stepping on ARM, This + * effort is further complicated by the ARM not having a + * "next PC" register. Instructions that change the PC + * can't be safely single-stepped in a MP environment, so + * we have a lot of work to do: + * + * In the prepare phase: + * *) If it is an instruction that does anything + * with the CPU mode, we reject it for a kprobe. + * (This is out of laziness rather than need. The + * instructions could be simulated.) + * + * *) Otherwise, decode the instruction rewriting its + * registers to take fixed, ordered registers and + * setting a handler for it to run the instruction. + * + * In the execution phase by an instruction's handler: + * + * *) If the PC is written to by the instruction, the + * instruction must be fully simulated in software. + * + * *) Otherwise, a modified form of the instruction is + * directly executed. Its handler calls the + * instruction in insn[0]. In insn[1] is a + * "mov pc, lr" to return. + * + * Before calling, load up the reordered registers + * from the original instruction's registers. If one + * of the original input registers is the PC, compute + * and adjust the appropriate input register. + * + * After call completes, copy the output registers to + * the original instruction's original registers. + * + * We don't use a real breakpoint instruction since that + * would have us in the kernel go from SVC mode to SVC + * mode losing the link register. Instead we use an + * undefined instruction. To simplify processing, the + * undefined instruction used for kprobes must be reserved + * exclusively for kprobes use. + * + * TODO: ifdef out some instruction decoding based on architecture. + */ + +#include <linux/kernel.h> +#include <linux/kprobes.h> + +#define sign_extend(x, signbit) ((x) | (0 - ((x) & (1 << (signbit))))) + +#define branch_displacement(insn) sign_extend(((insn) & 0xffffff) << 2, 25) + +#define is_r15(insn, bitpos) (((insn) & (0xf << bitpos)) == (0xf << bitpos)) + +/* + * Test if load/store instructions writeback the address register. + * if P (bit 24) == 0 or W (bit 21) == 1 + */ +#define is_writeback(insn) ((insn ^ 0x01000000) & 0x01200000) + +#define PSR_fs (PSR_f|PSR_s) + +#define KPROBE_RETURN_INSTRUCTION 0xe1a0f00e /* mov pc, lr */ + +typedef long (insn_0arg_fn_t)(void); +typedef long (insn_1arg_fn_t)(long); +typedef long (insn_2arg_fn_t)(long, long); +typedef long (insn_3arg_fn_t)(long, long, long); +typedef long (insn_4arg_fn_t)(long, long, long, long); +typedef long long (insn_llret_0arg_fn_t)(void); +typedef long long (insn_llret_3arg_fn_t)(long, long, long); +typedef long long (insn_llret_4arg_fn_t)(long, long, long, long); + +union reg_pair { + long long dr; +#ifdef __LITTLE_ENDIAN + struct { long r0, r1; }; +#else + struct { long r1, r0; }; +#endif +}; + +/* + * For STR and STM instructions, an ARM core may choose to use either + * a +8 or a +12 displacement from the current instruction's address. + * Whichever value is chosen for a given core, it must be the same for + * both instructions and may not change. This function measures it. + */ + +static int str_pc_offset; + +static void __init find_str_pc_offset(void) +{ + int addr, scratch, ret; + + __asm__ ( + "sub %[ret], pc, #4 \n\t" + "str pc, %[addr] \n\t" + "ldr %[scr], %[addr] \n\t" + "sub %[ret], %[scr], %[ret] \n\t" + : [ret] "=r" (ret), [scr] "=r" (scratch), [addr] "+m" (addr)); + + str_pc_offset = ret; +} + +/* + * The insnslot_?arg_r[w]flags() functions below are to keep the + * msr -> *fn -> mrs instruction sequences indivisible so that + * the state of the CPSR flags aren't inadvertently modified + * just before or just after the call. + */ + +static inline long __kprobes +insnslot_0arg_rflags(long cpsr, insn_0arg_fn_t *fn) +{ + register long ret asm("r0"); + + __asm__ __volatile__ ( + "msr cpsr_fs, %[cpsr] \n\t" + "mov lr, pc \n\t" + "mov pc, %[fn] \n\t" + : "=r" (ret) + : [cpsr] "r" (cpsr), [fn] "r" (fn) + : "lr", "cc" + ); + return ret; +} + +static inline long long __kprobes +insnslot_llret_0arg_rflags(long cpsr, insn_llret_0arg_fn_t *fn) +{ + register long ret0 asm("r0"); + register long ret1 asm("r1"); + union reg_pair fnr; + + __asm__ __volatile__ ( + "msr cpsr_fs, %[cpsr] \n\t" + "mov lr, pc \n\t" + "mov pc, %[fn] \n\t" + : "=r" (ret0), "=r" (ret1) + : [cpsr] "r" (cpsr), [fn] "r" (fn) + : "lr", "cc" + ); + fnr.r0 = ret0; + fnr.r1 = ret1; + return fnr.dr; +} + +static inline long __kprobes +insnslot_1arg_rflags(long r0, long cpsr, insn_1arg_fn_t *fn) +{ + register long rr0 asm("r0") = r0; + register long ret asm("r0"); + + __asm__ __volatile__ ( + "msr cpsr_fs, %[cpsr] \n\t" + "mov lr, pc \n\t" + "mov pc, %[fn] \n\t" + : "=r" (ret) + : "0" (rr0), [cpsr] "r" (cpsr), [fn] "r" (fn) + : "lr", "cc" + ); + return ret; +} + +static inline long __kprobes +insnslot_2arg_rflags(long r0, long r1, long cpsr, insn_2arg_fn_t *fn) +{ + register long rr0 asm("r0") = r0; + register long rr1 asm("r1") = r1; + register long ret asm("r0"); + + __asm__ __volatile__ ( + "msr cpsr_fs, %[cpsr] \n\t" + "mov lr, pc \n\t" + "mov pc, %[fn] \n\t" + : "=r" (ret) + : "0" (rr0), "r" (rr1), + [cpsr] "r" (cpsr), [fn] "r" (fn) + : "lr", "cc" + ); + return ret; +} + +static inline long __kprobes +insnslot_3arg_rflags(long r0, long r1, long r2, long cpsr, insn_3arg_fn_t *fn) +{ + register long rr0 asm("r0") = r0; + register long rr1 asm("r1") = r1; + register long rr2 asm("r2") = r2; + register long ret asm("r0"); + + __asm__ __volatile__ ( + "msr cpsr_fs, %[cpsr] \n\t" + "mov lr, pc \n\t" + "mov pc, %[fn] \n\t" + : "=r" (ret) + : "0" (rr0), "r" (rr1), "r" (rr2), + [cpsr] "r" (cpsr), [fn] "r" (fn) + : "lr", "cc" + ); + return ret; +} + +static inline long long __kprobes +insnslot_llret_3arg_rflags(long r0, long r1, long r2, long cpsr, + insn_llret_3arg_fn_t *fn) +{ + register long rr0 asm("r0") = r0; + register long rr1 asm("r1") = r1; + register long rr2 asm("r2") = r2; + register long ret0 asm("r0"); + register long ret1 asm("r1"); + union reg_pair fnr; + + __asm__ __volatile__ ( + "msr cpsr_fs, %[cpsr] \n\t" + "mov lr, pc \n\t" + "mov pc, %[fn] \n\t" + : "=r" (ret0), "=r" (ret1) + : "0" (rr0), "r" (rr1), "r" (rr2), + [cpsr] "r" (cpsr), [fn] "r" (fn) + : "lr", "cc" + ); + fnr.r0 = ret0; + fnr.r1 = ret1; + return fnr.dr; +} + +static inline long __kprobes +insnslot_4arg_rflags(long r0, long r1, long r2, long r3, long cpsr, + insn_4arg_fn_t *fn) +{ + register long rr0 asm("r0") = r0; + register long rr1 asm("r1") = r1; + register long rr2 asm("r2") = r2; + register long rr3 asm("r3") = r3; + register long ret asm("r0"); + + __asm__ __volatile__ ( + "msr cpsr_fs, %[cpsr] \n\t" + "mov lr, pc \n\t" + "mov pc, %[fn] \n\t" + : "=r" (ret) + : "0" (rr0), "r" (rr1), "r" (rr2), "r" (rr3), + [cpsr] "r" (cpsr), [fn] "r" (fn) + : "lr", "cc" + ); + return ret; +} + +static inline long __kprobes +insnslot_1arg_rwflags(long r0, long *cpsr, insn_1arg_fn_t *fn) +{ + register long rr0 asm("r0") = r0; + register long ret asm("r0"); + long oldcpsr = *cpsr; + long newcpsr; + + __asm__ __volatile__ ( + "msr cpsr_fs, %[oldcpsr] \n\t" + "mov lr, pc \n\t" + "mov pc, %[fn] \n\t" + "mrs %[newcpsr], cpsr \n\t" + : "=r" (ret), [newcpsr] "=r" (newcpsr) + : "0" (rr0), [oldcpsr] "r" (oldcpsr), [fn] "r" (fn) + : "lr", "cc" + ); + *cpsr = (oldcpsr & ~PSR_fs) | (newcpsr & PSR_fs); + return ret; +} + +static inline long __kprobes +insnslot_2arg_rwflags(long r0, long r1, long *cpsr, insn_2arg_fn_t *fn) +{ + register long rr0 asm("r0") = r0; + register long rr1 asm("r1") = r1; + register long ret asm("r0"); + long oldcpsr = *cpsr; + long newcpsr; + + __asm__ __volatile__ ( + "msr cpsr_fs, %[oldcpsr] \n\t" + "mov lr, pc \n\t" + "mov pc, %[fn] \n\t" + "mrs %[newcpsr], cpsr \n\t" + : "=r" (ret), [newcpsr] "=r" (newcpsr) + : "0" (rr0), "r" (rr1), [oldcpsr] "r" (oldcpsr), [fn] "r" (fn) + : "lr", "cc" + ); + *cpsr = (oldcpsr & ~PSR_fs) | (newcpsr & PSR_fs); + return ret; +} + +static inline long __kprobes +insnslot_3arg_rwflags(long r0, long r1, long r2, long *cpsr, + insn_3arg_fn_t *fn) +{ + register long rr0 asm("r0") = r0; + register long rr1 asm("r1") = r1; + register long rr2 asm("r2") = r2; + register long ret asm("r0"); + long oldcpsr = *cpsr; + long newcpsr; + + __asm__ __volatile__ ( + "msr cpsr_fs, %[oldcpsr] \n\t" + "mov lr, pc \n\t" + "mov pc, %[fn] \n\t" + "mrs %[newcpsr], cpsr \n\t" + : "=r" (ret), [newcpsr] "=r" (newcpsr) + : "0" (rr0), "r" (rr1), "r" (rr2), + [oldcpsr] "r" (oldcpsr), [fn] "r" (fn) + : "lr", "cc" + ); + *cpsr = (oldcpsr & ~PSR_fs) | (newcpsr & PSR_fs); + return ret; +} + +static inline long __kprobes +insnslot_4arg_rwflags(long r0, long r1, long r2, long r3, long *cpsr, + insn_4arg_fn_t *fn) +{ + register long rr0 asm("r0") = r0; + register long rr1 asm("r1") = r1; + register long rr2 asm("r2") = r2; + register long rr3 asm("r3") = r3; + register long ret asm("r0"); + long oldcpsr = *cpsr; + long newcpsr; + + __asm__ __volatile__ ( + "msr cpsr_fs, %[oldcpsr] \n\t" + "mov lr, pc \n\t" + "mov pc, %[fn] \n\t" + "mrs %[newcpsr], cpsr \n\t" + : "=r" (ret), [newcpsr] "=r" (newcpsr) + : "0" (rr0), "r" (rr1), "r" (rr2), "r" (rr3), + [oldcpsr] "r" (oldcpsr), [fn] "r" (fn) + : "lr", "cc" + ); + *cpsr = (oldcpsr & ~PSR_fs) | (newcpsr & PSR_fs); + return ret; +} + +static inline long long __kprobes +insnslot_llret_4arg_rwflags(long r0, long r1, long r2, long r3, long *cpsr, + insn_llret_4arg_fn_t *fn) +{ + register long rr0 asm("r0") = r0; + register long rr1 asm("r1") = r1; + register long rr2 asm("r2") = r2; + register long rr3 asm("r3") = r3; + register long ret0 asm("r0"); + register long ret1 asm("r1"); + long oldcpsr = *cpsr; + long newcpsr; + union reg_pair fnr; + + __asm__ __volatile__ ( + "msr cpsr_fs, %[oldcpsr] \n\t" + "mov lr, pc \n\t" + "mov pc, %[fn] \n\t" + "mrs %[newcpsr], cpsr \n\t" + : "=r" (ret0), "=r" (ret1), [newcpsr] "=r" (newcpsr) + : "0" (rr0), "r" (rr1), "r" (rr2), "r" (rr3), + [oldcpsr] "r" (oldcpsr), [fn] "r" (fn) + : "lr", "cc" + ); + *cpsr = (oldcpsr & ~PSR_fs) | (newcpsr & PSR_fs); + fnr.r0 = ret0; + fnr.r1 = ret1; + return fnr.dr; +} + +/* + * To avoid the complications of mimicing single-stepping on a + * processor without a Next-PC or a single-step mode, and to + * avoid having to deal with the side-effects of boosting, we + * simulate or emulate (almost) all ARM instructions. + * + * "Simulation" is where the instruction's behavior is duplicated in + * C code. "Emulation" is where the original instruction is rewritten + * and executed, often by altering its registers. + * + * By having all behavior of the kprobe'd instruction completed before + * returning from the kprobe_handler(), all locks (scheduler and + * interrupt) can safely be released. There is no need for secondary + * breakpoints, no race with MP or preemptable kernels, nor having to + * clean up resources counts at a later time impacting overall system + * performance. By rewriting the instruction, only the minimum registers + * need to be loaded and saved back optimizing performance. + * + * Calling the insnslot_*_rwflags version of a function doesn't hurt + * anything even when the CPSR flags aren't updated by the + * instruction. It's just a little slower in return for saving + * a little space by not having a duplicate function that doesn't + * update the flags. (The same optimization can be said for + * instructions that do or don't perform register writeback) + * Also, instructions can either read the flags, only write the + * flags, or read and write the flags. To save combinations + * rather than for sheer performance, flag functions just assume + * read and write of flags. + */ + +static void __kprobes simulate_bbl(struct kprobe *p, struct pt_regs *regs) +{ + kprobe_opcode_t insn = p->opcode; + long iaddr = (long)p->addr; + int disp = branch_displacement(insn); + + if (insn & (1 << 24)) + regs->ARM_lr = iaddr + 4; + + regs->ARM_pc = iaddr + 8 + disp; +} + +static void __kprobes simulate_blx1(struct kprobe *p, struct pt_regs *regs) +{ + kprobe_opcode_t insn = p->opcode; + long iaddr = (long)p->addr; + int disp = branch_displacement(insn); + + regs->ARM_lr = iaddr + 4; + regs->ARM_pc = iaddr + 8 + disp + ((insn >> 23) & 0x2); + regs->ARM_cpsr |= PSR_T_BIT; +} + +static void __kprobes simulate_blx2bx(struct kprobe *p, struct pt_regs *regs) +{ + kprobe_opcode_t insn = p->opcode; + int rm = insn & 0xf; + long rmv = regs->uregs[rm]; + + if (insn & (1 << 5)) + regs->ARM_lr = (long)p->addr + 4; + + regs->ARM_pc = rmv & ~0x1; + regs->ARM_cpsr &= ~PSR_T_BIT; + if (rmv & 0x1) + regs->ARM_cpsr |= PSR_T_BIT; +} + +static void __kprobes simulate_mrs(struct kprobe *p, struct pt_regs *regs) +{ + kprobe_opcode_t insn = p->opcode; + int rd = (insn >> 12) & 0xf; + unsigned long mask = 0xf8ff03df; /* Mask out execution state */ + regs->uregs[rd] = regs->ARM_cpsr & mask; +} + +static void __kprobes simulate_ldm1stm1(struct kprobe *p, struct pt_regs *regs) +{ + kprobe_opcode_t insn = p->opcode; + int rn = (insn >> 16) & 0xf; + int lbit = insn & (1 << 20); + int wbit = insn & (1 << 21); + int ubit = insn & (1 << 23); + int pbit = insn & (1 << 24); + long *addr = (long *)regs->uregs[rn]; + int reg_bit_vector; + int reg_count; + + reg_count = 0; + reg_bit_vector = insn & 0xffff; + while (reg_bit_vector) { + reg_bit_vector &= (reg_bit_vector - 1); + ++reg_count; + } + + if (!ubit) + addr -= reg_count; + addr += (!pbit == !ubit); + + reg_bit_vector = insn & 0xffff; + while (reg_bit_vector) { + int reg = __ffs(reg_bit_vector); + reg_bit_vector &= (reg_bit_vector - 1); + if (lbit) + regs->uregs[reg] = *addr++; + else + *addr++ = regs->uregs[reg]; + } + + if (wbit) { + if (!ubit) + addr -= reg_count; + addr -= (!pbit == !ubit); + regs->uregs[rn] = (long)addr; + } +} + +static void __kprobes simulate_stm1_pc(struct kprobe *p, struct pt_regs *regs) +{ + regs->ARM_pc = (long)p->addr + str_pc_offset; + simulate_ldm1stm1(p, regs); + regs->ARM_pc = (long)p->addr + 4; +} + +static void __kprobes simulate_mov_ipsp(struct kprobe *p, struct pt_regs *regs) +{ + regs->uregs[12] = regs->uregs[13]; +} + +static void __kprobes emulate_ldrd(struct kprobe *p, struct pt_regs *regs) +{ + insn_2arg_fn_t *i_fn = (insn_2arg_fn_t *)&p->ainsn.insn[0]; + kprobe_opcode_t insn = p->opcode; + long ppc = (long)p->addr + 8; + int rd = (insn >> 12) & 0xf; + int rn = (insn >> 16) & 0xf; + int rm = insn & 0xf; /* rm may be invalid, don't care. */ + long rmv = (rm == 15) ? ppc : regs->uregs[rm]; + long rnv = (rn == 15) ? ppc : regs->uregs[rn]; + + /* Not following the C calling convention here, so need asm(). */ + __asm__ __volatile__ ( + "ldr r0, %[rn] \n\t" + "ldr r1, %[rm] \n\t" + "msr cpsr_fs, %[cpsr]\n\t" + "mov lr, pc \n\t" + "mov pc, %[i_fn] \n\t" + "str r0, %[rn] \n\t" /* in case of writeback */ + "str r2, %[rd0] \n\t" + "str r3, %[rd1] \n\t" + : [rn] "+m" (rnv), + [rd0] "=m" (regs->uregs[rd]), + [rd1] "=m" (regs->uregs[rd+1]) + : [rm] "m" (rmv), + [cpsr] "r" (regs->ARM_cpsr), + [i_fn] "r" (i_fn) + : "r0", "r1", "r2", "r3", "lr", "cc" + ); + if (is_writeback(insn)) + regs->uregs[rn] = rnv; +} + +static void __kprobes emulate_strd(struct kprobe *p, struct pt_regs *regs) +{ + insn_4arg_fn_t *i_fn = (insn_4arg_fn_t *)&p->ainsn.insn[0]; + kprobe_opcode_t insn = p->opcode; + long ppc = (long)p->addr + 8; + int rd = (insn >> 12) & 0xf; + int rn = (insn >> 16) & 0xf; + int rm = insn & 0xf; + long rnv = (rn == 15) ? ppc : regs->uregs[rn]; + /* rm/rmv may be invalid, don't care. */ + long rmv = (rm == 15) ? ppc : regs->uregs[rm]; + long rnv_wb; + + rnv_wb = insnslot_4arg_rflags(rnv, rmv, regs->uregs[rd], + regs->uregs[rd+1], + regs->ARM_cpsr, i_fn); + if (is_writeback(insn)) + regs->uregs[rn] = rnv_wb; +} + +static void __kprobes emulate_ldr(struct kprobe *p, struct pt_regs *regs) +{ + insn_llret_3arg_fn_t *i_fn = (insn_llret_3arg_fn_t *)&p->ainsn.insn[0]; + kprobe_opcode_t insn = p->opcode; + long ppc = (long)p->addr + 8; + union reg_pair fnr; + int rd = (insn >> 12) & 0xf; + int rn = (insn >> 16) & 0xf; + int rm = insn & 0xf; + long rdv; + long rnv = (rn == 15) ? ppc : regs->uregs[rn]; + long rmv = (rm == 15) ? ppc : regs->uregs[rm]; + long cpsr = regs->ARM_cpsr; + + fnr.dr = insnslot_llret_3arg_rflags(rnv, 0, rmv, cpsr, i_fn); + if (rn != 15) + regs->uregs[rn] = fnr.r0; /* Save Rn in case of writeback. */ + rdv = fnr.r1; + + if (rd == 15) { +#if __LINUX_ARM_ARCH__ >= 5 + cpsr &= ~PSR_T_BIT; + if (rdv & 0x1) + cpsr |= PSR_T_BIT; + regs->ARM_cpsr = cpsr; + rdv &= ~0x1; +#else + rdv &= ~0x2; +#endif + } + regs->uregs[rd] = rdv; +} + +static void __kprobes emulate_str(struct kprobe *p, struct pt_regs *regs) +{ + insn_3arg_fn_t *i_fn = (insn_3arg_fn_t *)&p->ainsn.insn[0]; + kprobe_opcode_t insn = p->opcode; + long iaddr = (long)p->addr; + int rd = (insn >> 12) & 0xf; + int rn = (insn >> 16) & 0xf; + int rm = insn & 0xf; + long rdv = (rd == 15) ? iaddr + str_pc_offset : regs->uregs[rd]; + long rnv = (rn == 15) ? iaddr + 8 : regs->uregs[rn]; + long rmv = regs->uregs[rm]; /* rm/rmv may be invalid, don't care. */ + long rnv_wb; + + rnv_wb = insnslot_3arg_rflags(rnv, rdv, rmv, regs->ARM_cpsr, i_fn); + if (rn != 15) + regs->uregs[rn] = rnv_wb; /* Save Rn in case of writeback. */ +} + +static void __kprobes emulate_sat(struct kprobe *p, struct pt_regs *regs) +{ + insn_1arg_fn_t *i_fn = (insn_1arg_fn_t *)&p->ainsn.insn[0]; + kprobe_opcode_t insn = p->opcode; + int rd = (insn >> 12) & 0xf; + int rm = insn & 0xf; + long rmv = regs->uregs[rm]; + + /* Writes Q flag */ + regs->uregs[rd] = insnslot_1arg_rwflags(rmv, ®s->ARM_cpsr, i_fn); +} + +static void __kprobes emulate_sel(struct kprobe *p, struct pt_regs *regs) +{ + insn_2arg_fn_t *i_fn = (insn_2arg_fn_t *)&p->ainsn.insn[0]; + kprobe_opcode_t insn = p->opcode; + int rd = (insn >> 12) & 0xf; + int rn = (insn >> 16) & 0xf; + int rm = insn & 0xf; + long rnv = regs->uregs[rn]; + long rmv = regs->uregs[rm]; + + /* Reads GE bits */ + regs->uregs[rd] = insnslot_2arg_rflags(rnv, rmv, regs->ARM_cpsr, i_fn); +} + +static void __kprobes emulate_none(struct kprobe *p, struct pt_regs *regs) +{ + insn_0arg_fn_t *i_fn = (insn_0arg_fn_t *)&p->ainsn.insn[0]; + + insnslot_0arg_rflags(regs->ARM_cpsr, i_fn); +} + +static void __kprobes emulate_nop(struct kprobe *p, struct pt_regs *regs) +{ +} + +static void __kprobes +emulate_rd12_modify(struct kprobe *p, struct pt_regs *regs) +{ + insn_1arg_fn_t *i_fn = (insn_1arg_fn_t *)&p->ainsn.insn[0]; + kprobe_opcode_t insn = p->opcode; + int rd = (insn >> 12) & 0xf; + long rdv = regs->uregs[rd]; + + regs->uregs[rd] = insnslot_1arg_rflags(rdv, regs->ARM_cpsr, i_fn); +} + +static void __kprobes +emulate_rd12rn0_modify(struct kprobe *p, struct pt_regs *regs) +{ + insn_2arg_fn_t *i_fn = (insn_2arg_fn_t *)&p->ainsn.insn[0]; + kprobe_opcode_t insn = p->opcode; + int rd = (insn >> 12) & 0xf; + int rn = insn & 0xf; + long rdv = regs->uregs[rd]; + long rnv = regs->uregs[rn]; + + regs->uregs[rd] = insnslot_2arg_rflags(rdv, rnv, regs->ARM_cpsr, i_fn); +} + +static void __kprobes emulate_rd12rm0(struct kprobe *p, struct pt_regs *regs) +{ + insn_1arg_fn_t *i_fn = (insn_1arg_fn_t *)&p->ainsn.insn[0]; + kprobe_opcode_t insn = p->opcode; + int rd = (insn >> 12) & 0xf; + int rm = insn & 0xf; + long rmv = regs->uregs[rm]; + + regs->uregs[rd] = insnslot_1arg_rflags(rmv, regs->ARM_cpsr, i_fn); +} + +static void __kprobes +emulate_rd12rn16rm0_rwflags(struct kprobe *p, struct pt_regs *regs) +{ + insn_2arg_fn_t *i_fn = (insn_2arg_fn_t *)&p->ainsn.insn[0]; + kprobe_opcode_t insn = p->opcode; + int rd = (insn >> 12) & 0xf; + int rn = (insn >> 16) & 0xf; + int rm = insn & 0xf; + long rnv = regs->uregs[rn]; + long rmv = regs->uregs[rm]; + + regs->uregs[rd] = + insnslot_2arg_rwflags(rnv, rmv, ®s->ARM_cpsr, i_fn); +} + +static void __kprobes +emulate_rd16rn12rs8rm0_rwflags(struct kprobe *p, struct pt_regs *regs) +{ + insn_3arg_fn_t *i_fn = (insn_3arg_fn_t *)&p->ainsn.insn[0]; + kprobe_opcode_t insn = p->opcode; + int rd = (insn >> 16) & 0xf; + int rn = (insn >> 12) & 0xf; + int rs = (insn >> 8) & 0xf; + int rm = insn & 0xf; + long rnv = regs->uregs[rn]; + long rsv = regs->uregs[rs]; + long rmv = regs->uregs[rm]; + + regs->uregs[rd] = + insnslot_3arg_rwflags(rnv, rsv, rmv, ®s->ARM_cpsr, i_fn); +} + +static void __kprobes +emulate_rd16rs8rm0_rwflags(struct kprobe *p, struct pt_regs *regs) +{ + insn_2arg_fn_t *i_fn = (insn_2arg_fn_t *)&p->ainsn.insn[0]; + kprobe_opcode_t insn = p->opcode; + int rd = (insn >> 16) & 0xf; + int rs = (insn >> 8) & 0xf; + int rm = insn & 0xf; + long rsv = regs->uregs[rs]; + long rmv = regs->uregs[rm]; + + regs->uregs[rd] = + insnslot_2arg_rwflags(rsv, rmv, ®s->ARM_cpsr, i_fn); +} + +static void __kprobes +emulate_rdhi16rdlo12rs8rm0_rwflags(struct kprobe *p, struct pt_regs *regs) +{ + insn_llret_4arg_fn_t *i_fn = (insn_llret_4arg_fn_t *)&p->ainsn.insn[0]; + kprobe_opcode_t insn = p->opcode; + union reg_pair fnr; + int rdhi = (insn >> 16) & 0xf; + int rdlo = (insn >> 12) & 0xf; + int rs = (insn >> 8) & 0xf; + int rm = insn & 0xf; + long rsv = regs->uregs[rs]; + long rmv = regs->uregs[rm]; + + fnr.dr = insnslot_llret_4arg_rwflags(regs->uregs[rdhi], + regs->uregs[rdlo], rsv, rmv, + ®s->ARM_cpsr, i_fn); + regs->uregs[rdhi] = fnr.r0; + regs->uregs[rdlo] = fnr.r1; +} + +static void __kprobes +emulate_alu_imm_rflags(struct kprobe *p, struct pt_regs *regs) +{ + insn_1arg_fn_t *i_fn = (insn_1arg_fn_t *)&p->ainsn.insn[0]; + kprobe_opcode_t insn = p->opcode; + int rd = (insn >> 12) & 0xf; + int rn = (insn >> 16) & 0xf; + long rnv = (rn == 15) ? (long)p->addr + 8 : regs->uregs[rn]; + + regs->uregs[rd] = insnslot_1arg_rflags(rnv, regs->ARM_cpsr, i_fn); +} + +static void __kprobes +emulate_alu_imm_rwflags(struct kprobe *p, struct pt_regs *regs) +{ + insn_1arg_fn_t *i_fn = (insn_1arg_fn_t *)&p->ainsn.insn[0]; + kprobe_opcode_t insn = p->opcode; + int rd = (insn >> 12) & 0xf; + int rn = (insn >> 16) & 0xf; + long rnv = (rn == 15) ? (long)p->addr + 8 : regs->uregs[rn]; + + regs->uregs[rd] = insnslot_1arg_rwflags(rnv, ®s->ARM_cpsr, i_fn); +} + +static void __kprobes +emulate_alu_tests_imm(struct kprobe *p, struct pt_regs *regs) +{ + insn_1arg_fn_t *i_fn = (insn_1arg_fn_t *)&p->ainsn.insn[0]; + kprobe_opcode_t insn = p->opcode; + int rn = (insn >> 16) & 0xf; + long rnv = (rn == 15) ? (long)p->addr + 8 : regs->uregs[rn]; + + insnslot_1arg_rwflags(rnv, ®s->ARM_cpsr, i_fn); +} + +static void __kprobes +emulate_alu_rflags(struct kprobe *p, struct pt_regs *regs) +{ + insn_3arg_fn_t *i_fn = (insn_3arg_fn_t *)&p->ainsn.insn[0]; + kprobe_opcode_t insn = p->opcode; + long ppc = (long)p->addr + 8; + int rd = (insn >> 12) & 0xf; + int rn = (insn >> 16) & 0xf; /* rn/rnv/rs/rsv may be */ + int rs = (insn >> 8) & 0xf; /* invalid, don't care. */ + int rm = insn & 0xf; + long rnv = (rn == 15) ? ppc : regs->uregs[rn]; + long rmv = (rm == 15) ? ppc : regs->uregs[rm]; + long rsv = regs->uregs[rs]; + + regs->uregs[rd] = + insnslot_3arg_rflags(rnv, rmv, rsv, regs->ARM_cpsr, i_fn); +} + +static void __kprobes +emulate_alu_rwflags(struct kprobe *p, struct pt_regs *regs) +{ + insn_3arg_fn_t *i_fn = (insn_3arg_fn_t *)&p->ainsn.insn[0]; + kprobe_opcode_t insn = p->opcode; + long ppc = (long)p->addr + 8; + int rd = (insn >> 12) & 0xf; + int rn = (insn >> 16) & 0xf; /* rn/rnv/rs/rsv may be */ + int rs = (insn >> 8) & 0xf; /* invalid, don't care. */ + int rm = insn & 0xf; + long rnv = (rn == 15) ? ppc : regs->uregs[rn]; + long rmv = (rm == 15) ? ppc : regs->uregs[rm]; + long rsv = regs->uregs[rs]; + + regs->uregs[rd] = + insnslot_3arg_rwflags(rnv, rmv, rsv, ®s->ARM_cpsr, i_fn); +} + +static void __kprobes +emulate_alu_tests(struct kprobe *p, struct pt_regs *regs) +{ + insn_3arg_fn_t *i_fn = (insn_3arg_fn_t *)&p->ainsn.insn[0]; + kprobe_opcode_t insn = p->opcode; + long ppc = (long)p->addr + 8; + int rn = (insn >> 16) & 0xf; + int rs = (insn >> 8) & 0xf; /* rs/rsv may be invalid, don't care. */ + int rm = insn & 0xf; + long rnv = (rn == 15) ? ppc : regs->uregs[rn]; + long rmv = (rm == 15) ? ppc : regs->uregs[rm]; + long rsv = regs->uregs[rs]; + + insnslot_3arg_rwflags(rnv, rmv, rsv, ®s->ARM_cpsr, i_fn); +} + +static enum kprobe_insn __kprobes +prep_emulate_ldr_str(kprobe_opcode_t insn, struct arch_specific_insn *asi) +{ + int not_imm = (insn & (1 << 26)) ? (insn & (1 << 25)) + : (~insn & (1 << 22)); + + if (is_writeback(insn) && is_r15(insn, 16)) + return INSN_REJECTED; /* Writeback to PC */ + + insn &= 0xfff00fff; + insn |= 0x00001000; /* Rn = r0, Rd = r1 */ + if (not_imm) { + insn &= ~0xf; + insn |= 2; /* Rm = r2 */ + } + asi->insn[0] = insn; + asi->insn_handler = (insn & (1 << 20)) ? emulate_ldr : emulate_str; + return INSN_GOOD; +} + +static enum kprobe_insn __kprobes +prep_emulate_rd12_modify(kprobe_opcode_t insn, struct arch_specific_insn *asi) +{ + if (is_r15(insn, 12)) + return INSN_REJECTED; /* Rd is PC */ + + insn &= 0xffff0fff; /* Rd = r0 */ + asi->insn[0] = insn; + asi->insn_handler = emulate_rd12_modify; + return INSN_GOOD; +} + +static enum kprobe_insn __kprobes +prep_emulate_rd12rn0_modify(kprobe_opcode_t insn, + struct arch_specific_insn *asi) +{ + if (is_r15(insn, 12)) + return INSN_REJECTED; /* Rd is PC */ + + insn &= 0xffff0ff0; /* Rd = r0 */ + insn |= 0x00000001; /* Rn = r1 */ + asi->insn[0] = insn; + asi->insn_handler = emulate_rd12rn0_modify; + return INSN_GOOD; +} + +static enum kprobe_insn __kprobes +prep_emulate_rd12rm0(kprobe_opcode_t insn, struct arch_specific_insn *asi) +{ + if (is_r15(insn, 12)) + return INSN_REJECTED; /* Rd is PC */ + + insn &= 0xffff0ff0; /* Rd = r0, Rm = r0 */ + asi->insn[0] = insn; + asi->insn_handler = emulate_rd12rm0; + return INSN_GOOD; +} + +static enum kprobe_insn __kprobes +prep_emulate_rd12rn16rm0_wflags(kprobe_opcode_t insn, + struct arch_specific_insn *asi) +{ + if (is_r15(insn, 12)) + return INSN_REJECTED; /* Rd is PC */ + + insn &= 0xfff00ff0; /* Rd = r0, Rn = r0 */ + insn |= 0x00000001; /* Rm = r1 */ + asi->insn[0] = insn; + asi->insn_handler = emulate_rd12rn16rm0_rwflags; + return INSN_GOOD; +} + +static enum kprobe_insn __kprobes +prep_emulate_rd16rs8rm0_wflags(kprobe_opcode_t insn, + struct arch_specific_insn *asi) +{ + if (is_r15(insn, 16)) + return INSN_REJECTED; /* Rd is PC */ + + insn &= 0xfff0f0f0; /* Rd = r0, Rs = r0 */ + insn |= 0x00000001; /* Rm = r1 */ + asi->insn[0] = insn; + asi->insn_handler = emulate_rd16rs8rm0_rwflags; + return INSN_GOOD; +} + +static enum kprobe_insn __kprobes +prep_emulate_rd16rn12rs8rm0_wflags(kprobe_opcode_t insn, + struct arch_specific_insn *asi) +{ + if (is_r15(insn, 16)) + return INSN_REJECTED; /* Rd is PC */ + + insn &= 0xfff000f0; /* Rd = r0, Rn = r0 */ + insn |= 0x00000102; /* Rs = r1, Rm = r2 */ + asi->insn[0] = insn; + asi->insn_handler = emulate_rd16rn12rs8rm0_rwflags; + return INSN_GOOD; +} + +static enum kprobe_insn __kprobes +prep_emulate_rdhi16rdlo12rs8rm0_wflags(kprobe_opcode_t insn, + struct arch_specific_insn *asi) +{ + if (is_r15(insn, 16) || is_r15(insn, 12)) + return INSN_REJECTED; /* RdHi or RdLo is PC */ + + insn &= 0xfff000f0; /* RdHi = r0, RdLo = r1 */ + insn |= 0x00001203; /* Rs = r2, Rm = r3 */ + asi->insn[0] = insn; + asi->insn_handler = emulate_rdhi16rdlo12rs8rm0_rwflags; + return INSN_GOOD; +} + +/* + * For the instruction masking and comparisons in all the "space_*" + * functions below, Do _not_ rearrange the order of tests unless + * you're very, very sure of what you are doing. For the sake of + * efficiency, the masks for some tests sometimes assume other test + * have been done prior to them so the number of patterns to test + * for an instruction set can be as broad as possible to reduce the + * number of tests needed. + */ + +static enum kprobe_insn __kprobes +space_1111(kprobe_opcode_t insn, struct arch_specific_insn *asi) +{ + /* memory hint : 1111 0100 x001 xxxx xxxx xxxx xxxx xxxx : */ + /* PLDI : 1111 0100 x101 xxxx xxxx xxxx xxxx xxxx : */ + /* PLDW : 1111 0101 x001 xxxx xxxx xxxx xxxx xxxx : */ + /* PLD : 1111 0101 x101 xxxx xxxx xxxx xxxx xxxx : */ + if ((insn & 0xfe300000) == 0xf4100000) { + asi->insn_handler = emulate_nop; + return INSN_GOOD_NO_SLOT; + } + + /* BLX(1) : 1111 101x xxxx xxxx xxxx xxxx xxxx xxxx : */ + if ((insn & 0xfe000000) == 0xfa000000) { + asi->insn_handler = simulate_blx1; + return INSN_GOOD_NO_SLOT; + } + + /* CPS : 1111 0001 0000 xxx0 xxxx xxxx xx0x xxxx */ + /* SETEND: 1111 0001 0000 0001 xxxx xxxx 0000 xxxx */ + + /* SRS : 1111 100x x1x0 xxxx xxxx xxxx xxxx xxxx */ + /* RFE : 1111 100x x0x1 xxxx xxxx xxxx xxxx xxxx */ + + /* Coprocessor instructions... */ + /* MCRR2 : 1111 1100 0100 xxxx xxxx xxxx xxxx xxxx : (Rd != Rn) */ + /* MRRC2 : 1111 1100 0101 xxxx xxxx xxxx xxxx xxxx : (Rd != Rn) */ + /* LDC2 : 1111 110x xxx1 xxxx xxxx xxxx xxxx xxxx */ + /* STC2 : 1111 110x xxx0 xxxx xxxx xxxx xxxx xxxx */ + /* CDP2 : 1111 1110 xxxx xxxx xxxx xxxx xxx0 xxxx */ + /* MCR2 : 1111 1110 xxx0 xxxx xxxx xxxx xxx1 xxxx */ + /* MRC2 : 1111 1110 xxx1 xxxx xxxx xxxx xxx1 xxxx */ + + return INSN_REJECTED; +} + +static enum kprobe_insn __kprobes +space_cccc_000x(kprobe_opcode_t insn, struct arch_specific_insn *asi) +{ + /* cccc 0001 0xx0 xxxx xxxx xxxx xxxx xxx0 xxxx */ + if ((insn & 0x0f900010) == 0x01000000) { + + /* MRS cpsr : cccc 0001 0000 xxxx xxxx xxxx 0000 xxxx */ + if ((insn & 0x0ff000f0) == 0x01000000) { + if (is_r15(insn, 12)) + return INSN_REJECTED; /* Rd is PC */ + asi->insn_handler = simulate_mrs; + return INSN_GOOD_NO_SLOT; + } + + /* SMLALxy : cccc 0001 0100 xxxx xxxx xxxx 1xx0 xxxx */ + if ((insn & 0x0ff00090) == 0x01400080) + return prep_emulate_rdhi16rdlo12rs8rm0_wflags(insn, + asi); + + /* SMULWy : cccc 0001 0010 xxxx xxxx xxxx 1x10 xxxx */ + /* SMULxy : cccc 0001 0110 xxxx xxxx xxxx 1xx0 xxxx */ + if ((insn & 0x0ff000b0) == 0x012000a0 || + (insn & 0x0ff00090) == 0x01600080) + return prep_emulate_rd16rs8rm0_wflags(insn, asi); + + /* SMLAxy : cccc 0001 0000 xxxx xxxx xxxx 1xx0 xxxx : Q */ + /* SMLAWy : cccc 0001 0010 xxxx xxxx xxxx 1x00 xxxx : Q */ + if ((insn & 0x0ff00090) == 0x01000080 || + (insn & 0x0ff000b0) == 0x01200080) + return prep_emulate_rd16rn12rs8rm0_wflags(insn, asi); + + /* BXJ : cccc 0001 0010 xxxx xxxx xxxx 0010 xxxx */ + /* MSR : cccc 0001 0x10 xxxx xxxx xxxx 0000 xxxx */ + /* MRS spsr : cccc 0001 0100 xxxx xxxx xxxx 0000 xxxx */ + + /* Other instruction encodings aren't yet defined */ + return INSN_REJECTED; + } + + /* cccc 0001 0xx0 xxxx xxxx xxxx xxxx 0xx1 xxxx */ + else if ((insn & 0x0f900090) == 0x01000010) { + + /* BLX(2) : cccc 0001 0010 xxxx xxxx xxxx 0011 xxxx */ + /* BX : cccc 0001 0010 xxxx xxxx xxxx 0001 xxxx */ + if ((insn & 0x0ff000d0) == 0x01200010) { + if ((insn & 0x0ff000ff) == 0x0120003f) + return INSN_REJECTED; /* BLX pc */ + asi->insn_handler = simulate_blx2bx; + return INSN_GOOD_NO_SLOT; + } + + /* CLZ : cccc 0001 0110 xxxx xxxx xxxx 0001 xxxx */ + if ((insn & 0x0ff000f0) == 0x01600010) + return prep_emulate_rd12rm0(insn, asi); + + /* QADD : cccc 0001 0000 xxxx xxxx xxxx 0101 xxxx :Q */ + /* QSUB : cccc 0001 0010 xxxx xxxx xxxx 0101 xxxx :Q */ + /* QDADD : cccc 0001 0100 xxxx xxxx xxxx 0101 xxxx :Q */ + /* QDSUB : cccc 0001 0110 xxxx xxxx xxxx 0101 xxxx :Q */ + if ((insn & 0x0f9000f0) == 0x01000050) + return prep_emulate_rd12rn16rm0_wflags(insn, asi); + + /* BKPT : 1110 0001 0010 xxxx xxxx xxxx 0111 xxxx */ + /* SMC : cccc 0001 0110 xxxx xxxx xxxx 0111 xxxx */ + + /* Other instruction encodings aren't yet defined */ + return INSN_REJECTED; + } + + /* cccc 0000 xxxx xxxx xxxx xxxx xxxx 1001 xxxx */ + else if ((insn & 0x0f0000f0) == 0x00000090) { + + /* MUL : cccc 0000 0000 xxxx xxxx xxxx 1001 xxxx : */ + /* MULS : cccc 0000 0001 xxxx xxxx xxxx 1001 xxxx :cc */ + /* MLA : cccc 0000 0010 xxxx xxxx xxxx 1001 xxxx : */ + /* MLAS : cccc 0000 0011 xxxx xxxx xxxx 1001 xxxx :cc */ + /* UMAAL : cccc 0000 0100 xxxx xxxx xxxx 1001 xxxx : */ + /* undef : cccc 0000 0101 xxxx xxxx xxxx 1001 xxxx : */ + /* MLS : cccc 0000 0110 xxxx xxxx xxxx 1001 xxxx : */ + /* undef : cccc 0000 0111 xxxx xxxx xxxx 1001 xxxx : */ + /* UMULL : cccc 0000 1000 xxxx xxxx xxxx 1001 xxxx : */ + /* UMULLS : cccc 0000 1001 xxxx xxxx xxxx 1001 xxxx :cc */ + /* UMLAL : cccc 0000 1010 xxxx xxxx xxxx 1001 xxxx : */ + /* UMLALS : cccc 0000 1011 xxxx xxxx xxxx 1001 xxxx :cc */ + /* SMULL : cccc 0000 1100 xxxx xxxx xxxx 1001 xxxx : */ + /* SMULLS : cccc 0000 1101 xxxx xxxx xxxx 1001 xxxx :cc */ + /* SMLAL : cccc 0000 1110 xxxx xxxx xxxx 1001 xxxx : */ + /* SMLALS : cccc 0000 1111 xxxx xxxx xxxx 1001 xxxx :cc */ + if ((insn & 0x00d00000) == 0x00500000) + return INSN_REJECTED; + else if ((insn & 0x00e00000) == 0x00000000) + return prep_emulate_rd16rs8rm0_wflags(insn, asi); + else if ((insn & 0x00a00000) == 0x00200000) + return prep_emulate_rd16rn12rs8rm0_wflags(insn, asi); + else + return prep_emulate_rdhi16rdlo12rs8rm0_wflags(insn, + asi); + } + + /* cccc 000x xxxx xxxx xxxx xxxx xxxx 1xx1 xxxx */ + else if ((insn & 0x0e000090) == 0x00000090) { + + /* SWP : cccc 0001 0000 xxxx xxxx xxxx 1001 xxxx */ + /* SWPB : cccc 0001 0100 xxxx xxxx xxxx 1001 xxxx */ + /* ??? : cccc 0001 0x01 xxxx xxxx xxxx 1001 xxxx */ + /* ??? : cccc 0001 0x10 xxxx xxxx xxxx 1001 xxxx */ + /* ??? : cccc 0001 0x11 xxxx xxxx xxxx 1001 xxxx */ + /* STREX : cccc 0001 1000 xxxx xxxx xxxx 1001 xxxx */ + /* LDREX : cccc 0001 1001 xxxx xxxx xxxx 1001 xxxx */ + /* STREXD: cccc 0001 1010 xxxx xxxx xxxx 1001 xxxx */ + /* LDREXD: cccc 0001 1011 xxxx xxxx xxxx 1001 xxxx */ + /* STREXB: cccc 0001 1100 xxxx xxxx xxxx 1001 xxxx */ + /* LDREXB: cccc 0001 1101 xxxx xxxx xxxx 1001 xxxx */ + /* STREXH: cccc 0001 1110 xxxx xxxx xxxx 1001 xxxx */ + /* LDREXH: cccc 0001 1111 xxxx xxxx xxxx 1001 xxxx */ + + /* LDRD : cccc 000x xxx0 xxxx xxxx xxxx 1101 xxxx */ + /* STRD : cccc 000x xxx0 xxxx xxxx xxxx 1111 xxxx */ + /* LDRH : cccc 000x xxx1 xxxx xxxx xxxx 1011 xxxx */ + /* STRH : cccc 000x xxx0 xxxx xxxx xxxx 1011 xxxx */ + /* LDRSB : cccc 000x xxx1 xxxx xxxx xxxx 1101 xxxx */ + /* LDRSH : cccc 000x xxx1 xxxx xxxx xxxx 1111 xxxx */ + if ((insn & 0x0f0000f0) == 0x01000090) { + if ((insn & 0x0fb000f0) == 0x01000090) { + /* SWP/SWPB */ + return prep_emulate_rd12rn16rm0_wflags(insn, + asi); + } else { + /* STREX/LDREX variants and unallocaed space */ + return INSN_REJECTED; + } + + } else if ((insn & 0x0e1000d0) == 0x00000d0) { + /* STRD/LDRD */ + if ((insn & 0x0000e000) == 0x0000e000) + return INSN_REJECTED; /* Rd is LR or PC */ + if (is_writeback(insn) && is_r15(insn, 16)) + return INSN_REJECTED; /* Writeback to PC */ + + insn &= 0xfff00fff; + insn |= 0x00002000; /* Rn = r0, Rd = r2 */ + if (!(insn & (1 << 22))) { + /* Register index */ + insn &= ~0xf; + insn |= 1; /* Rm = r1 */ + } + asi->insn[0] = insn; + asi->insn_handler = + (insn & (1 << 5)) ? emulate_strd : emulate_ldrd; + return INSN_GOOD; + } + + /* LDRH/STRH/LDRSB/LDRSH */ + if (is_r15(insn, 12)) + return INSN_REJECTED; /* Rd is PC */ + return prep_emulate_ldr_str(insn, asi); + } + + /* cccc 000x xxxx xxxx xxxx xxxx xxxx xxxx xxxx */ + + /* + * ALU op with S bit and Rd == 15 : + * cccc 000x xxx1 xxxx 1111 xxxx xxxx xxxx + */ + if ((insn & 0x0e10f000) == 0x0010f000) + return INSN_REJECTED; + + /* + * "mov ip, sp" is the most common kprobe'd instruction by far. + * Check and optimize for it explicitly. + */ + if (insn == 0xe1a0c00d) { + asi->insn_handler = simulate_mov_ipsp; + return INSN_GOOD_NO_SLOT; + } + + /* + * Data processing: Immediate-shift / Register-shift + * ALU op : cccc 000x xxxx xxxx xxxx xxxx xxxx xxxx + * CPY : cccc 0001 1010 xxxx xxxx 0000 0000 xxxx + * MOV : cccc 0001 101x xxxx xxxx xxxx xxxx xxxx + * *S (bit 20) updates condition codes + * ADC/SBC/RSC reads the C flag + */ + insn &= 0xfff00ff0; /* Rn = r0, Rd = r0 */ + insn |= 0x00000001; /* Rm = r1 */ + if (insn & 0x010) { + insn &= 0xfffff0ff; /* register shift */ + insn |= 0x00000200; /* Rs = r2 */ + } + asi->insn[0] = insn; + + if ((insn & 0x0f900000) == 0x01100000) { + /* + * TST : cccc 0001 0001 xxxx xxxx xxxx xxxx xxxx + * TEQ : cccc 0001 0011 xxxx xxxx xxxx xxxx xxxx + * CMP : cccc 0001 0101 xxxx xxxx xxxx xxxx xxxx + * CMN : cccc 0001 0111 xxxx xxxx xxxx xxxx xxxx + */ + asi->insn_handler = emulate_alu_tests; + } else { + /* ALU ops which write to Rd */ + asi->insn_handler = (insn & (1 << 20)) ? /* S-bit */ + emulate_alu_rwflags : emulate_alu_rflags; + } + return INSN_GOOD; +} + +static enum kprobe_insn __kprobes +space_cccc_001x(kprobe_opcode_t insn, struct arch_specific_insn *asi) +{ + /* MOVW : cccc 0011 0000 xxxx xxxx xxxx xxxx xxxx */ + /* MOVT : cccc 0011 0100 xxxx xxxx xxxx xxxx xxxx */ + if ((insn & 0x0fb00000) == 0x03000000) + return prep_emulate_rd12_modify(insn, asi); + + /* hints : cccc 0011 0010 0000 xxxx xxxx xxxx xxxx */ + if ((insn & 0x0fff0000) == 0x03200000) { + unsigned op2 = insn & 0x000000ff; + if (op2 == 0x01 || op2 == 0x04) { + /* YIELD : cccc 0011 0010 0000 xxxx xxxx 0000 0001 */ + /* SEV : cccc 0011 0010 0000 xxxx xxxx 0000 0100 */ + asi->insn[0] = insn; + asi->insn_handler = emulate_none; + return INSN_GOOD; + } else if (op2 <= 0x03) { + /* NOP : cccc 0011 0010 0000 xxxx xxxx 0000 0000 */ + /* WFE : cccc 0011 0010 0000 xxxx xxxx 0000 0010 */ + /* WFI : cccc 0011 0010 0000 xxxx xxxx 0000 0011 */ + /* + * We make WFE and WFI true NOPs to avoid stalls due + * to missing events whilst processing the probe. + */ + asi->insn_handler = emulate_nop; + return INSN_GOOD_NO_SLOT; + } + /* For DBG and unallocated hints it's safest to reject them */ + return INSN_REJECTED; + } + + /* + * MSR : cccc 0011 0x10 xxxx xxxx xxxx xxxx xxxx + * ALU op with S bit and Rd == 15 : + * cccc 001x xxx1 xxxx 1111 xxxx xxxx xxxx + */ + if ((insn & 0x0fb00000) == 0x03200000 || /* MSR */ + (insn & 0x0e10f000) == 0x0210f000) /* ALU s-bit, R15 */ + return INSN_REJECTED; + + /* + * Data processing: 32-bit Immediate + * ALU op : cccc 001x xxxx xxxx xxxx xxxx xxxx xxxx + * MOV : cccc 0011 101x xxxx xxxx xxxx xxxx xxxx + * *S (bit 20) updates condition codes + * ADC/SBC/RSC reads the C flag + */ + insn &= 0xfff00fff; /* Rn = r0 and Rd = r0 */ + asi->insn[0] = insn; + + if ((insn & 0x0f900000) == 0x03100000) { + /* + * TST : cccc 0011 0001 xxxx xxxx xxxx xxxx xxxx + * TEQ : cccc 0011 0011 xxxx xxxx xxxx xxxx xxxx + * CMP : cccc 0011 0101 xxxx xxxx xxxx xxxx xxxx + * CMN : cccc 0011 0111 xxxx xxxx xxxx xxxx xxxx + */ + asi->insn_handler = emulate_alu_tests_imm; + } else { + /* ALU ops which write to Rd */ + asi->insn_handler = (insn & (1 << 20)) ? /* S-bit */ + emulate_alu_imm_rwflags : emulate_alu_imm_rflags; + } + return INSN_GOOD; +} + +static enum kprobe_insn __kprobes +space_cccc_0110__1(kprobe_opcode_t insn, struct arch_specific_insn *asi) +{ + /* SEL : cccc 0110 1000 xxxx xxxx xxxx 1011 xxxx GE: !!! */ + if ((insn & 0x0ff000f0) == 0x068000b0) { + if (is_r15(insn, 12)) + return INSN_REJECTED; /* Rd is PC */ + insn &= 0xfff00ff0; /* Rd = r0, Rn = r0 */ + insn |= 0x00000001; /* Rm = r1 */ + asi->insn[0] = insn; + asi->insn_handler = emulate_sel; + return INSN_GOOD; + } + + /* SSAT : cccc 0110 101x xxxx xxxx xxxx xx01 xxxx :Q */ + /* USAT : cccc 0110 111x xxxx xxxx xxxx xx01 xxxx :Q */ + /* SSAT16 : cccc 0110 1010 xxxx xxxx xxxx 0011 xxxx :Q */ + /* USAT16 : cccc 0110 1110 xxxx xxxx xxxx 0011 xxxx :Q */ + if ((insn & 0x0fa00030) == 0x06a00010 || + (insn & 0x0fb000f0) == 0x06a00030) { + if (is_r15(insn, 12)) + return INSN_REJECTED; /* Rd is PC */ + insn &= 0xffff0ff0; /* Rd = r0, Rm = r0 */ + asi->insn[0] = insn; + asi->insn_handler = emulate_sat; + return INSN_GOOD; + } + + /* REV : cccc 0110 1011 xxxx xxxx xxxx 0011 xxxx */ + /* REV16 : cccc 0110 1011 xxxx xxxx xxxx 1011 xxxx */ + /* RBIT : cccc 0110 1111 xxxx xxxx xxxx 0011 xxxx */ + /* REVSH : cccc 0110 1111 xxxx xxxx xxxx 1011 xxxx */ + if ((insn & 0x0ff00070) == 0x06b00030 || + (insn & 0x0ff00070) == 0x06f00030) + return prep_emulate_rd12rm0(insn, asi); + + /* ??? : cccc 0110 0000 xxxx xxxx xxxx xxx1 xxxx : */ + /* SADD16 : cccc 0110 0001 xxxx xxxx xxxx 0001 xxxx :GE */ + /* SADDSUBX : cccc 0110 0001 xxxx xxxx xxxx 0011 xxxx :GE */ + /* SSUBADDX : cccc 0110 0001 xxxx xxxx xxxx 0101 xxxx :GE */ + /* SSUB16 : cccc 0110 0001 xxxx xxxx xxxx 0111 xxxx :GE */ + /* SADD8 : cccc 0110 0001 xxxx xxxx xxxx 1001 xxxx :GE */ + /* ??? : cccc 0110 0001 xxxx xxxx xxxx 1011 xxxx : */ + /* ??? : cccc 0110 0001 xxxx xxxx xxxx 1101 xxxx : */ + /* SSUB8 : cccc 0110 0001 xxxx xxxx xxxx 1111 xxxx :GE */ + /* QADD16 : cccc 0110 0010 xxxx xxxx xxxx 0001 xxxx : */ + /* QADDSUBX : cccc 0110 0010 xxxx xxxx xxxx 0011 xxxx : */ + /* QSUBADDX : cccc 0110 0010 xxxx xxxx xxxx 0101 xxxx : */ + /* QSUB16 : cccc 0110 0010 xxxx xxxx xxxx 0111 xxxx : */ + /* QADD8 : cccc 0110 0010 xxxx xxxx xxxx 1001 xxxx : */ + /* ??? : cccc 0110 0010 xxxx xxxx xxxx 1011 xxxx : */ + /* ??? : cccc 0110 0010 xxxx xxxx xxxx 1101 xxxx : */ + /* QSUB8 : cccc 0110 0010 xxxx xxxx xxxx 1111 xxxx : */ + /* SHADD16 : cccc 0110 0011 xxxx xxxx xxxx 0001 xxxx : */ + /* SHADDSUBX : cccc 0110 0011 xxxx xxxx xxxx 0011 xxxx : */ + /* SHSUBADDX : cccc 0110 0011 xxxx xxxx xxxx 0101 xxxx : */ + /* SHSUB16 : cccc 0110 0011 xxxx xxxx xxxx 0111 xxxx : */ + /* SHADD8 : cccc 0110 0011 xxxx xxxx xxxx 1001 xxxx : */ + /* ??? : cccc 0110 0011 xxxx xxxx xxxx 1011 xxxx : */ + /* ??? : cccc 0110 0011 xxxx xxxx xxxx 1101 xxxx : */ + /* SHSUB8 : cccc 0110 0011 xxxx xxxx xxxx 1111 xxxx : */ + /* ??? : cccc 0110 0100 xxxx xxxx xxxx xxx1 xxxx : */ + /* UADD16 : cccc 0110 0101 xxxx xxxx xxxx 0001 xxxx :GE */ + /* UADDSUBX : cccc 0110 0101 xxxx xxxx xxxx 0011 xxxx :GE */ + /* USUBADDX : cccc 0110 0101 xxxx xxxx xxxx 0101 xxxx :GE */ + /* USUB16 : cccc 0110 0101 xxxx xxxx xxxx 0111 xxxx :GE */ + /* UADD8 : cccc 0110 0101 xxxx xxxx xxxx 1001 xxxx :GE */ + /* ??? : cccc 0110 0101 xxxx xxxx xxxx 1011 xxxx : */ + /* ??? : cccc 0110 0101 xxxx xxxx xxxx 1101 xxxx : */ + /* USUB8 : cccc 0110 0101 xxxx xxxx xxxx 1111 xxxx :GE */ + /* UQADD16 : cccc 0110 0110 xxxx xxxx xxxx 0001 xxxx : */ + /* UQADDSUBX : cccc 0110 0110 xxxx xxxx xxxx 0011 xxxx : */ + /* UQSUBADDX : cccc 0110 0110 xxxx xxxx xxxx 0101 xxxx : */ + /* UQSUB16 : cccc 0110 0110 xxxx xxxx xxxx 0111 xxxx : */ + /* UQADD8 : cccc 0110 0110 xxxx xxxx xxxx 1001 xxxx : */ + /* ??? : cccc 0110 0110 xxxx xxxx xxxx 1011 xxxx : */ + /* ??? : cccc 0110 0110 xxxx xxxx xxxx 1101 xxxx : */ + /* UQSUB8 : cccc 0110 0110 xxxx xxxx xxxx 1111 xxxx : */ + /* UHADD16 : cccc 0110 0111 xxxx xxxx xxxx 0001 xxxx : */ + /* UHADDSUBX : cccc 0110 0111 xxxx xxxx xxxx 0011 xxxx : */ + /* UHSUBADDX : cccc 0110 0111 xxxx xxxx xxxx 0101 xxxx : */ + /* UHSUB16 : cccc 0110 0111 xxxx xxxx xxxx 0111 xxxx : */ + /* UHADD8 : cccc 0110 0111 xxxx xxxx xxxx 1001 xxxx : */ + /* ??? : cccc 0110 0111 xxxx xxxx xxxx 1011 xxxx : */ + /* ??? : cccc 0110 0111 xxxx xxxx xxxx 1101 xxxx : */ + /* UHSUB8 : cccc 0110 0111 xxxx xxxx xxxx 1111 xxxx : */ + if ((insn & 0x0f800010) == 0x06000010) { + if ((insn & 0x00300000) == 0x00000000 || + (insn & 0x000000e0) == 0x000000a0 || + (insn & 0x000000e0) == 0x000000c0) + return INSN_REJECTED; /* Unallocated space */ + return prep_emulate_rd12rn16rm0_wflags(insn, asi); + } + + /* PKHBT : cccc 0110 1000 xxxx xxxx xxxx x001 xxxx : */ + /* PKHTB : cccc 0110 1000 xxxx xxxx xxxx x101 xxxx : */ + if ((insn & 0x0ff00030) == 0x06800010) + return prep_emulate_rd12rn16rm0_wflags(insn, asi); + + /* SXTAB16 : cccc 0110 1000 xxxx xxxx xxxx 0111 xxxx : */ + /* SXTB16 : cccc 0110 1000 1111 xxxx xxxx 0111 xxxx : */ + /* ??? : cccc 0110 1001 xxxx xxxx xxxx 0111 xxxx : */ + /* SXTAB : cccc 0110 1010 xxxx xxxx xxxx 0111 xxxx : */ + /* SXTB : cccc 0110 1010 1111 xxxx xxxx 0111 xxxx : */ + /* SXTAH : cccc 0110 1011 xxxx xxxx xxxx 0111 xxxx : */ + /* SXTH : cccc 0110 1011 1111 xxxx xxxx 0111 xxxx : */ + /* UXTAB16 : cccc 0110 1100 xxxx xxxx xxxx 0111 xxxx : */ + /* UXTB16 : cccc 0110 1100 1111 xxxx xxxx 0111 xxxx : */ + /* ??? : cccc 0110 1101 xxxx xxxx xxxx 0111 xxxx : */ + /* UXTAB : cccc 0110 1110 xxxx xxxx xxxx 0111 xxxx : */ + /* UXTB : cccc 0110 1110 1111 xxxx xxxx 0111 xxxx : */ + /* UXTAH : cccc 0110 1111 xxxx xxxx xxxx 0111 xxxx : */ + /* UXTH : cccc 0110 1111 1111 xxxx xxxx 0111 xxxx : */ + if ((insn & 0x0f8000f0) == 0x06800070) { + if ((insn & 0x00300000) == 0x00100000) + return INSN_REJECTED; /* Unallocated space */ + + if ((insn & 0x000f0000) == 0x000f0000) + return prep_emulate_rd12rm0(insn, asi); + else + return prep_emulate_rd12rn16rm0_wflags(insn, asi); + } + + /* Other instruction encodings aren't yet defined */ + return INSN_REJECTED; +} + +static enum kprobe_insn __kprobes +space_cccc_0111__1(kprobe_opcode_t insn, struct arch_specific_insn *asi) +{ + /* Undef : cccc 0111 1111 xxxx xxxx xxxx 1111 xxxx */ + if ((insn & 0x0ff000f0) == 0x03f000f0) + return INSN_REJECTED; + + /* SMLALD : cccc 0111 0100 xxxx xxxx xxxx 00x1 xxxx */ + /* SMLSLD : cccc 0111 0100 xxxx xxxx xxxx 01x1 xxxx */ + if ((insn & 0x0ff00090) == 0x07400010) + return prep_emulate_rdhi16rdlo12rs8rm0_wflags(insn, asi); + + /* SMLAD : cccc 0111 0000 xxxx xxxx xxxx 00x1 xxxx :Q */ + /* SMUAD : cccc 0111 0000 xxxx 1111 xxxx 00x1 xxxx :Q */ + /* SMLSD : cccc 0111 0000 xxxx xxxx xxxx 01x1 xxxx :Q */ + /* SMUSD : cccc 0111 0000 xxxx 1111 xxxx 01x1 xxxx : */ + /* SMMLA : cccc 0111 0101 xxxx xxxx xxxx 00x1 xxxx : */ + /* SMMUL : cccc 0111 0101 xxxx 1111 xxxx 00x1 xxxx : */ + /* USADA8 : cccc 0111 1000 xxxx xxxx xxxx 0001 xxxx : */ + /* USAD8 : cccc 0111 1000 xxxx 1111 xxxx 0001 xxxx : */ + if ((insn & 0x0ff00090) == 0x07000010 || + (insn & 0x0ff000d0) == 0x07500010 || + (insn & 0x0ff000f0) == 0x07800010) { + + if ((insn & 0x0000f000) == 0x0000f000) + return prep_emulate_rd16rs8rm0_wflags(insn, asi); + else + return prep_emulate_rd16rn12rs8rm0_wflags(insn, asi); + } + + /* SMMLS : cccc 0111 0101 xxxx xxxx xxxx 11x1 xxxx : */ + if ((insn & 0x0ff000d0) == 0x075000d0) + return prep_emulate_rd16rn12rs8rm0_wflags(insn, asi); + + /* SBFX : cccc 0111 101x xxxx xxxx xxxx x101 xxxx : */ + /* UBFX : cccc 0111 111x xxxx xxxx xxxx x101 xxxx : */ + if ((insn & 0x0fa00070) == 0x07a00050) + return prep_emulate_rd12rm0(insn, asi); + + /* BFI : cccc 0111 110x xxxx xxxx xxxx x001 xxxx : */ + /* BFC : cccc 0111 110x xxxx xxxx xxxx x001 1111 : */ + if ((insn & 0x0fe00070) == 0x07c00010) { + + if ((insn & 0x0000000f) == 0x0000000f) + return prep_emulate_rd12_modify(insn, asi); + else + return prep_emulate_rd12rn0_modify(insn, asi); + } + + return INSN_REJECTED; +} + +static enum kprobe_insn __kprobes +space_cccc_01xx(kprobe_opcode_t insn, struct arch_specific_insn *asi) +{ + /* LDR : cccc 01xx x0x1 xxxx xxxx xxxx xxxx xxxx */ + /* LDRB : cccc 01xx x1x1 xxxx xxxx xxxx xxxx xxxx */ + /* LDRBT : cccc 01x0 x111 xxxx xxxx xxxx xxxx xxxx */ + /* LDRT : cccc 01x0 x011 xxxx xxxx xxxx xxxx xxxx */ + /* STR : cccc 01xx x0x0 xxxx xxxx xxxx xxxx xxxx */ + /* STRB : cccc 01xx x1x0 xxxx xxxx xxxx xxxx xxxx */ + /* STRBT : cccc 01x0 x110 xxxx xxxx xxxx xxxx xxxx */ + /* STRT : cccc 01x0 x010 xxxx xxxx xxxx xxxx xxxx */ + + if ((insn & 0x00500000) == 0x00500000 && is_r15(insn, 12)) + return INSN_REJECTED; /* LDRB into PC */ + + return prep_emulate_ldr_str(insn, asi); +} + +static enum kprobe_insn __kprobes +space_cccc_100x(kprobe_opcode_t insn, struct arch_specific_insn *asi) +{ + /* LDM(2) : cccc 100x x101 xxxx 0xxx xxxx xxxx xxxx */ + /* LDM(3) : cccc 100x x1x1 xxxx 1xxx xxxx xxxx xxxx */ + if ((insn & 0x0e708000) == 0x85000000 || + (insn & 0x0e508000) == 0x85010000) + return INSN_REJECTED; + + /* LDM(1) : cccc 100x x0x1 xxxx xxxx xxxx xxxx xxxx */ + /* STM(1) : cccc 100x x0x0 xxxx xxxx xxxx xxxx xxxx */ + asi->insn_handler = ((insn & 0x108000) == 0x008000) ? /* STM & R15 */ + simulate_stm1_pc : simulate_ldm1stm1; + return INSN_GOOD_NO_SLOT; +} + +static enum kprobe_insn __kprobes +space_cccc_101x(kprobe_opcode_t insn, struct arch_specific_insn *asi) +{ + /* B : cccc 1010 xxxx xxxx xxxx xxxx xxxx xxxx */ + /* BL : cccc 1011 xxxx xxxx xxxx xxxx xxxx xxxx */ + asi->insn_handler = simulate_bbl; + return INSN_GOOD_NO_SLOT; +} + +static enum kprobe_insn __kprobes +space_cccc_11xx(kprobe_opcode_t insn, struct arch_specific_insn *asi) +{ + /* Coprocessor instructions... */ + /* MCRR : cccc 1100 0100 xxxx xxxx xxxx xxxx xxxx : (Rd!=Rn) */ + /* MRRC : cccc 1100 0101 xxxx xxxx xxxx xxxx xxxx : (Rd!=Rn) */ + /* LDC : cccc 110x xxx1 xxxx xxxx xxxx xxxx xxxx */ + /* STC : cccc 110x xxx0 xxxx xxxx xxxx xxxx xxxx */ + /* CDP : cccc 1110 xxxx xxxx xxxx xxxx xxx0 xxxx */ + /* MCR : cccc 1110 xxx0 xxxx xxxx xxxx xxx1 xxxx */ + /* MRC : cccc 1110 xxx1 xxxx xxxx xxxx xxx1 xxxx */ + + /* SVC : cccc 1111 xxxx xxxx xxxx xxxx xxxx xxxx */ + + return INSN_REJECTED; +} + +static unsigned long __kprobes __check_eq(unsigned long cpsr) +{ + return cpsr & PSR_Z_BIT; +} + +static unsigned long __kprobes __check_ne(unsigned long cpsr) +{ + return (~cpsr) & PSR_Z_BIT; +} + +static unsigned long __kprobes __check_cs(unsigned long cpsr) +{ + return cpsr & PSR_C_BIT; +} + +static unsigned long __kprobes __check_cc(unsigned long cpsr) +{ + return (~cpsr) & PSR_C_BIT; +} + +static unsigned long __kprobes __check_mi(unsigned long cpsr) +{ + return cpsr & PSR_N_BIT; +} + +static unsigned long __kprobes __check_pl(unsigned long cpsr) +{ + return (~cpsr) & PSR_N_BIT; +} + +static unsigned long __kprobes __check_vs(unsigned long cpsr) +{ + return cpsr & PSR_V_BIT; +} + +static unsigned long __kprobes __check_vc(unsigned long cpsr) +{ + return (~cpsr) & PSR_V_BIT; +} + +static unsigned long __kprobes __check_hi(unsigned long cpsr) +{ + cpsr &= ~(cpsr >> 1); /* PSR_C_BIT &= ~PSR_Z_BIT */ + return cpsr & PSR_C_BIT; +} + +static unsigned long __kprobes __check_ls(unsigned long cpsr) +{ + cpsr &= ~(cpsr >> 1); /* PSR_C_BIT &= ~PSR_Z_BIT */ + return (~cpsr) & PSR_C_BIT; +} + +static unsigned long __kprobes __check_ge(unsigned long cpsr) +{ + cpsr ^= (cpsr << 3); /* PSR_N_BIT ^= PSR_V_BIT */ + return (~cpsr) & PSR_N_BIT; +} + +static unsigned long __kprobes __check_lt(unsigned long cpsr) +{ + cpsr ^= (cpsr << 3); /* PSR_N_BIT ^= PSR_V_BIT */ + return cpsr & PSR_N_BIT; +} + +static unsigned long __kprobes __check_gt(unsigned long cpsr) +{ + unsigned long temp = cpsr ^ (cpsr << 3); /* PSR_N_BIT ^= PSR_V_BIT */ + temp |= (cpsr << 1); /* PSR_N_BIT |= PSR_Z_BIT */ + return (~temp) & PSR_N_BIT; +} + +static unsigned long __kprobes __check_le(unsigned long cpsr) +{ + unsigned long temp = cpsr ^ (cpsr << 3); /* PSR_N_BIT ^= PSR_V_BIT */ + temp |= (cpsr << 1); /* PSR_N_BIT |= PSR_Z_BIT */ + return temp & PSR_N_BIT; +} + +static unsigned long __kprobes __check_al(unsigned long cpsr) +{ + return true; +} + +static kprobe_check_cc * const condition_checks[16] = { + &__check_eq, &__check_ne, &__check_cs, &__check_cc, + &__check_mi, &__check_pl, &__check_vs, &__check_vc, + &__check_hi, &__check_ls, &__check_ge, &__check_lt, + &__check_gt, &__check_le, &__check_al, &__check_al +}; + +/* Return: + * INSN_REJECTED If instruction is one not allowed to kprobe, + * INSN_GOOD If instruction is supported and uses instruction slot, + * INSN_GOOD_NO_SLOT If instruction is supported but doesn't use its slot. + * + * For instructions we don't want to kprobe (INSN_REJECTED return result): + * These are generally ones that modify the processor state making + * them "hard" to simulate such as switches processor modes or + * make accesses in alternate modes. Any of these could be simulated + * if the work was put into it, but low return considering they + * should also be very rare. + */ +enum kprobe_insn __kprobes +arm_kprobe_decode_insn(kprobe_opcode_t insn, struct arch_specific_insn *asi) +{ + asi->insn_check_cc = condition_checks[insn>>28]; + asi->insn[1] = KPROBE_RETURN_INSTRUCTION; + + if ((insn & 0xf0000000) == 0xf0000000) + + return space_1111(insn, asi); + + else if ((insn & 0x0e000000) == 0x00000000) + + return space_cccc_000x(insn, asi); + + else if ((insn & 0x0e000000) == 0x02000000) + + return space_cccc_001x(insn, asi); + + else if ((insn & 0x0f000010) == 0x06000010) + + return space_cccc_0110__1(insn, asi); + + else if ((insn & 0x0f000010) == 0x07000010) + + return space_cccc_0111__1(insn, asi); + + else if ((insn & 0x0c000000) == 0x04000000) + + return space_cccc_01xx(insn, asi); + + else if ((insn & 0x0e000000) == 0x08000000) + + return space_cccc_100x(insn, asi); + + else if ((insn & 0x0e000000) == 0x0a000000) + + return space_cccc_101x(insn, asi); + + return space_cccc_11xx(insn, asi); +} + +void __init arm_kprobe_decode_init(void) +{ + find_str_pc_offset(); +} diff --git a/arch/arm/kernel/kprobes.c b/arch/arm/kernel/kprobes.c index 129c116..1656c87 100644 --- a/arch/arm/kernel/kprobes.c +++ b/arch/arm/kernel/kprobes.c @@ -28,16 +28,14 @@ #include <asm/traps.h> #include <asm/cacheflush.h> -#include "kprobes.h" - #define MIN_STACK_SIZE(addr) \ min((unsigned long)MAX_STACK_SIZE, \ (unsigned long)current_thread_info() + THREAD_START_SP - (addr)) -#define flush_insns(addr, size) \ +#define flush_insns(addr, cnt) \ flush_icache_range((unsigned long)(addr), \ (unsigned long)(addr) + \ - (size)) + sizeof(kprobe_opcode_t) * (cnt)) /* Used as a marker in ARM_pc to note when we're in a jprobe. */ #define JPROBE_MAGIC_ADDR 0xffffffff @@ -51,35 +49,16 @@ int __kprobes arch_prepare_kprobe(struct kprobe *p) kprobe_opcode_t insn; kprobe_opcode_t tmp_insn[MAX_INSN_SIZE]; unsigned long addr = (unsigned long)p->addr; - bool thumb; - kprobe_decode_insn_t *decode_insn; int is; - if (in_exception_text(addr)) + if (addr & 0x3 || in_exception_text(addr)) return -EINVAL; -#ifdef CONFIG_THUMB2_KERNEL - thumb = true; - addr &= ~1; /* Bit 0 would normally be set to indicate Thumb code */ - insn = ((u16 *)addr)[0]; - if (is_wide_instruction(insn)) { - insn <<= 16; - insn |= ((u16 *)addr)[1]; - decode_insn = thumb32_kprobe_decode_insn; - } else - decode_insn = thumb16_kprobe_decode_insn; -#else /* !CONFIG_THUMB2_KERNEL */ - thumb = false; - if (addr & 0x3) - return -EINVAL; insn = *p->addr; - decode_insn = arm_kprobe_decode_insn; -#endif - p->opcode = insn; p->ainsn.insn = tmp_insn; - switch ((*decode_insn)(insn, &p->ainsn)) { + switch (arm_kprobe_decode_insn(insn, &p->ainsn)) { case INSN_REJECTED: /* not supported */ return -EINVAL; @@ -89,10 +68,7 @@ int __kprobes arch_prepare_kprobe(struct kprobe *p) return -ENOMEM; for (is = 0; is < MAX_INSN_SIZE; ++is) p->ainsn.insn[is] = tmp_insn[is]; - flush_insns(p->ainsn.insn, - sizeof(p->ainsn.insn[0]) * MAX_INSN_SIZE); - p->ainsn.insn_fn = (kprobe_insn_fn_t *) - ((uintptr_t)p->ainsn.insn | thumb); + flush_insns(p->ainsn.insn, MAX_INSN_SIZE); break; case INSN_GOOD_NO_SLOT: /* instruction doesn't need insn slot */ @@ -103,88 +79,24 @@ int __kprobes arch_prepare_kprobe(struct kprobe *p) return 0; } -#ifdef CONFIG_THUMB2_KERNEL - -/* - * For a 32-bit Thumb breakpoint spanning two memory words we need to take - * special precautions to insert the breakpoint atomically, especially on SMP - * systems. This is achieved by calling this arming function using stop_machine. - */ -static int __kprobes set_t32_breakpoint(void *addr) -{ - ((u16 *)addr)[0] = KPROBE_THUMB32_BREAKPOINT_INSTRUCTION >> 16; - ((u16 *)addr)[1] = KPROBE_THUMB32_BREAKPOINT_INSTRUCTION & 0xffff; - flush_insns(addr, 2*sizeof(u16)); - return 0; -} - -void __kprobes arch_arm_kprobe(struct kprobe *p) -{ - uintptr_t addr = (uintptr_t)p->addr & ~1; /* Remove any Thumb flag */ - - if (!is_wide_instruction(p->opcode)) { - *(u16 *)addr = KPROBE_THUMB16_BREAKPOINT_INSTRUCTION; - flush_insns(addr, sizeof(u16)); - } else if (addr & 2) { - /* A 32-bit instruction spanning two words needs special care */ - stop_machine(set_t32_breakpoint, (void *)addr, &cpu_online_map); - } else { - /* Word aligned 32-bit instruction can be written atomically */ - u32 bkp = KPROBE_THUMB32_BREAKPOINT_INSTRUCTION; -#ifndef __ARMEB__ /* Swap halfwords for little-endian */ - bkp = (bkp >> 16) | (bkp << 16); -#endif - *(u32 *)addr = bkp; - flush_insns(addr, sizeof(u32)); - } -} - -#else /* !CONFIG_THUMB2_KERNEL */ - void __kprobes arch_arm_kprobe(struct kprobe *p) { - kprobe_opcode_t insn = p->opcode; - kprobe_opcode_t brkp = KPROBE_ARM_BREAKPOINT_INSTRUCTION; - if (insn >= 0xe0000000) - brkp |= 0xe0000000; /* Unconditional instruction */ - else - brkp |= insn & 0xf0000000; /* Copy condition from insn */ - *p->addr = brkp; - flush_insns(p->addr, sizeof(p->addr[0])); + *p->addr = KPROBE_BREAKPOINT_INSTRUCTION; + flush_insns(p->addr, 1); } -#endif /* !CONFIG_THUMB2_KERNEL */ - /* * The actual disarming is done here on each CPU and synchronized using * stop_machine. This synchronization is necessary on SMP to avoid removing * a probe between the moment the 'Undefined Instruction' exception is raised * and the moment the exception handler reads the faulting instruction from - * memory. It is also needed to atomically set the two half-words of a 32-bit - * Thumb breakpoint. + * memory. */ int __kprobes __arch_disarm_kprobe(void *p) { struct kprobe *kp = p; -#ifdef CONFIG_THUMB2_KERNEL - u16 *addr = (u16 *)((uintptr_t)kp->addr & ~1); - kprobe_opcode_t insn = kp->opcode; - unsigned int len; - - if (is_wide_instruction(insn)) { - ((u16 *)addr)[0] = insn>>16; - ((u16 *)addr)[1] = insn; - len = 2*sizeof(u16); - } else { - ((u16 *)addr)[0] = insn; - len = sizeof(u16); - } - flush_insns(addr, len); - -#else /* !CONFIG_THUMB2_KERNEL */ *kp->addr = kp->opcode; - flush_insns(kp->addr, sizeof(kp->addr[0])); -#endif + flush_insns(kp->addr, 1); return 0; } @@ -218,24 +130,12 @@ static void __kprobes set_current_kprobe(struct kprobe *p) __get_cpu_var(current_kprobe) = p; } -static void __kprobes -singlestep_skip(struct kprobe *p, struct pt_regs *regs) +static void __kprobes singlestep(struct kprobe *p, struct pt_regs *regs, + struct kprobe_ctlblk *kcb) { -#ifdef CONFIG_THUMB2_KERNEL - regs->ARM_cpsr = it_advance(regs->ARM_cpsr); - if (is_wide_instruction(p->opcode)) - regs->ARM_pc += 4; - else - regs->ARM_pc += 2; -#else regs->ARM_pc += 4; -#endif -} - -static inline void __kprobes -singlestep(struct kprobe *p, struct pt_regs *regs, struct kprobe_ctlblk *kcb) -{ - p->ainsn.insn_singlestep(p, regs); + if (p->ainsn.insn_check_cc(regs->ARM_cpsr)) + p->ainsn.insn_handler(p, regs); } /* @@ -249,23 +149,11 @@ void __kprobes kprobe_handler(struct pt_regs *regs) { struct kprobe *p, *cur; struct kprobe_ctlblk *kcb; + kprobe_opcode_t *addr = (kprobe_opcode_t *)regs->ARM_pc; kcb = get_kprobe_ctlblk(); cur = kprobe_running(); - -#ifdef CONFIG_THUMB2_KERNEL - /* - * First look for a probe which was registered using an address with - * bit 0 set, this is the usual situation for pointers to Thumb code. - * If not found, fallback to looking for one with bit 0 clear. - */ - p = get_kprobe((kprobe_opcode_t *)(regs->ARM_pc | 1)); - if (!p) - p = get_kprobe((kprobe_opcode_t *)regs->ARM_pc); - -#else /* ! CONFIG_THUMB2_KERNEL */ - p = get_kprobe((kprobe_opcode_t *)regs->ARM_pc); -#endif + p = get_kprobe(addr); if (p) { if (cur) { @@ -285,8 +173,7 @@ void __kprobes kprobe_handler(struct pt_regs *regs) /* impossible cases */ BUG(); } - } else if (p->ainsn.insn_check_cc(regs->ARM_cpsr)) { - /* Probe hit and conditional execution check ok. */ + } else { set_current_kprobe(p); kcb->kprobe_status = KPROBE_HIT_ACTIVE; @@ -306,13 +193,6 @@ void __kprobes kprobe_handler(struct pt_regs *regs) } reset_current_kprobe(); } - } else { - /* - * Probe hit but conditional execution check failed, - * so just skip the instruction and continue as if - * nothing had happened. - */ - singlestep_skip(p, regs); } } else if (cur) { /* We probably hit a jprobe. Call its break handler. */ @@ -420,11 +300,7 @@ void __naked __kprobes kretprobe_trampoline(void) "bl trampoline_handler \n\t" "mov lr, r0 \n\t" "ldmia sp!, {r0 - r11} \n\t" -#ifdef CONFIG_THUMB2_KERNEL - "bx lr \n\t" -#else "mov pc, lr \n\t" -#endif : : : "memory"); } @@ -502,22 +378,11 @@ int __kprobes setjmp_pre_handler(struct kprobe *p, struct pt_regs *regs) struct jprobe *jp = container_of(p, struct jprobe, kp); struct kprobe_ctlblk *kcb = get_kprobe_ctlblk(); long sp_addr = regs->ARM_sp; - long cpsr; kcb->jprobe_saved_regs = *regs; memcpy(kcb->jprobes_stack, (void *)sp_addr, MIN_STACK_SIZE(sp_addr)); regs->ARM_pc = (long)jp->entry; - - cpsr = regs->ARM_cpsr | PSR_I_BIT; -#ifdef CONFIG_THUMB2_KERNEL - /* Set correct Thumb state in cpsr */ - if (regs->ARM_pc & 1) - cpsr |= PSR_T_BIT; - else - cpsr &= ~PSR_T_BIT; -#endif - regs->ARM_cpsr = cpsr; - + regs->ARM_cpsr |= PSR_I_BIT; preempt_disable(); return 1; } @@ -539,12 +404,7 @@ void __kprobes jprobe_return(void) * This is to prevent any simulated instruction from writing * over the regs when they are accessing the stack. */ -#ifdef CONFIG_THUMB2_KERNEL - "sub r0, %0, %1 \n\t" - "mov sp, r0 \n\t" -#else "sub sp, %0, %1 \n\t" -#endif "ldr r0, ="__stringify(JPROBE_MAGIC_ADDR)"\n\t" "str %0, [sp, %2] \n\t" "str r0, [sp, %3] \n\t" @@ -555,28 +415,15 @@ void __kprobes jprobe_return(void) * Return to the context saved by setjmp_pre_handler * and restored by longjmp_break_handler. */ -#ifdef CONFIG_THUMB2_KERNEL - "ldr lr, [sp, %2] \n\t" /* lr = saved sp */ - "ldrd r0, r1, [sp, %5] \n\t" /* r0,r1 = saved lr,pc */ - "ldr r2, [sp, %4] \n\t" /* r2 = saved psr */ - "stmdb lr!, {r0, r1, r2} \n\t" /* push saved lr and */ - /* rfe context */ - "ldmia sp, {r0 - r12} \n\t" - "mov sp, lr \n\t" - "ldr lr, [sp], #4 \n\t" - "rfeia sp! \n\t" -#else "ldr r0, [sp, %4] \n\t" "msr cpsr_cxsf, r0 \n\t" "ldmia sp, {r0 - pc} \n\t" -#endif : : "r" (kcb->jprobe_saved_regs.ARM_sp), "I" (sizeof(struct pt_regs) * 2), "J" (offsetof(struct pt_regs, ARM_sp)), "J" (offsetof(struct pt_regs, ARM_pc)), - "J" (offsetof(struct pt_regs, ARM_cpsr)), - "J" (offsetof(struct pt_regs, ARM_lr)) + "J" (offsetof(struct pt_regs, ARM_cpsr)) : "memory", "cc"); } @@ -613,44 +460,17 @@ int __kprobes arch_trampoline_kprobe(struct kprobe *p) return 0; } -#ifdef CONFIG_THUMB2_KERNEL - -static struct undef_hook kprobes_thumb16_break_hook = { - .instr_mask = 0xffff, - .instr_val = KPROBE_THUMB16_BREAKPOINT_INSTRUCTION, - .cpsr_mask = MODE_MASK, - .cpsr_val = SVC_MODE, - .fn = kprobe_trap_handler, -}; - -static struct undef_hook kprobes_thumb32_break_hook = { +static struct undef_hook kprobes_break_hook = { .instr_mask = 0xffffffff, - .instr_val = KPROBE_THUMB32_BREAKPOINT_INSTRUCTION, + .instr_val = KPROBE_BREAKPOINT_INSTRUCTION, .cpsr_mask = MODE_MASK, .cpsr_val = SVC_MODE, .fn = kprobe_trap_handler, }; -#else /* !CONFIG_THUMB2_KERNEL */ - -static struct undef_hook kprobes_arm_break_hook = { - .instr_mask = 0x0fffffff, - .instr_val = KPROBE_ARM_BREAKPOINT_INSTRUCTION, - .cpsr_mask = MODE_MASK, - .cpsr_val = SVC_MODE, - .fn = kprobe_trap_handler, -}; - -#endif /* !CONFIG_THUMB2_KERNEL */ - int __init arch_init_kprobes() { arm_kprobe_decode_init(); -#ifdef CONFIG_THUMB2_KERNEL - register_undef_hook(&kprobes_thumb16_break_hook); - register_undef_hook(&kprobes_thumb32_break_hook); -#else - register_undef_hook(&kprobes_arm_break_hook); -#endif + register_undef_hook(&kprobes_break_hook); return 0; } diff --git a/arch/arm/kernel/leds.c b/arch/arm/kernel/leds.c index 0bcd383..136e837 100644 --- a/arch/arm/kernel/leds.c +++ b/arch/arm/kernel/leds.c @@ -7,11 +7,12 @@ * it under the terms of the GNU General Public License version 2 as * published by the Free Software Foundation. */ -#include <linux/export.h> +#include <linux/module.h> #include <linux/init.h> +#include <linux/notifier.h> +#include <linux/cpu.h> #include <linux/sysdev.h> #include <linux/syscore_ops.h> -#include <linux/string.h> #include <asm/leds.h> @@ -102,6 +103,25 @@ static struct syscore_ops leds_syscore_ops = { .resume = leds_resume, }; +static int leds_idle_notifier(struct notifier_block *nb, unsigned long val, + void *data) +{ + switch (val) { + case IDLE_START: + leds_event(led_idle_start); + break; + case IDLE_END: + leds_event(led_idle_end); + break; + } + + return 0; +} + +static struct notifier_block leds_idle_nb = { + .notifier_call = leds_idle_notifier, +}; + static int __init leds_init(void) { int ret; @@ -110,8 +130,12 @@ static int __init leds_init(void) ret = sysdev_register(&leds_device); if (ret == 0) ret = sysdev_create_file(&leds_device, &attr_event); - if (ret == 0) + + if (ret == 0) { register_syscore_ops(&leds_syscore_ops); + idle_notifier_register(&leds_idle_nb); + } + return ret; } diff --git a/arch/arm/kernel/module.c b/arch/arm/kernel/module.c index 1e9be5d..016d6a0 100644 --- a/arch/arm/kernel/module.c +++ b/arch/arm/kernel/module.c @@ -33,7 +33,7 @@ * recompiling the whole kernel when CONFIG_XIP_KERNEL is turned on/off. */ #undef MODULES_VADDR -#define MODULES_VADDR (((unsigned long)_etext + ~PMD_MASK) & PMD_MASK) +#define MODULES_VADDR (((unsigned long)_etext + ~PGDIR_MASK) & PGDIR_MASK) #endif #ifdef CONFIG_MMU @@ -43,7 +43,25 @@ void *module_alloc(unsigned long size) GFP_KERNEL, PAGE_KERNEL_EXEC, -1, __builtin_return_address(0)); } -#endif +#else /* CONFIG_MMU */ +void *module_alloc(unsigned long size) +{ + return size == 0 ? NULL : vmalloc(size); +} +#endif /* !CONFIG_MMU */ + +void module_free(struct module *module, void *region) +{ + vfree(region); +} + +int module_frob_arch_sections(Elf_Ehdr *hdr, + Elf_Shdr *sechdrs, + char *secstrings, + struct module *mod) +{ + return 0; +} int apply_relocate(Elf32_Shdr *sechdrs, const char *strtab, unsigned int symindex, @@ -247,6 +265,15 @@ apply_relocate(Elf32_Shdr *sechdrs, const char *strtab, unsigned int symindex, return 0; } +int +apply_relocate_add(Elf32_Shdr *sechdrs, const char *strtab, + unsigned int symindex, unsigned int relsec, struct module *module) +{ + printk(KERN_ERR "module %s: ADD RELOCATION unsupported\n", + module->name); + return -ENOEXEC; +} + struct mod_unwind_map { const Elf_Shdr *unw_sec; const Elf_Shdr *txt_sec; @@ -323,11 +350,7 @@ int module_finalize(const Elf32_Ehdr *hdr, const Elf_Shdr *sechdrs, #endif s = find_mod_section(hdr, sechdrs, ".alt.smp.init"); if (s && !is_smp()) -#ifdef CONFIG_SMP_ON_UP fixup_smp((void *)s->sh_addr, s->sh_size); -#else - return -EINVAL; -#endif return 0; } diff --git a/arch/arm/kernel/perf_event.c b/arch/arm/kernel/perf_event.c index 4a2db48..5e1ac82 100644 --- a/arch/arm/kernel/perf_event.c +++ b/arch/arm/kernel/perf_event.c @@ -12,10 +12,9 @@ */ #define pr_fmt(fmt) "hw perfevents: " fmt -#include <linux/bitmap.h> #include <linux/interrupt.h> #include <linux/kernel.h> -#include <linux/export.h> +#include <linux/module.h> #include <linux/perf_event.h> #include <linux/platform_device.h> #include <linux/spinlock.h> @@ -27,8 +26,16 @@ #include <asm/pmu.h> #include <asm/stacktrace.h> +static struct platform_device *pmu_device; + /* - * ARMv6 supports a maximum of 3 events, starting from index 0. If we add + * Hardware lock to serialize accesses to PMU registers. Needed for the + * read/modify/write sequences. + */ +static DEFINE_RAW_SPINLOCK(pmu_lock); + +/* + * ARMv6 supports a maximum of 3 events, starting from index 1. If we add * another platform that supports more, we need to increase this to be the * largest of all platforms. * @@ -36,24 +43,62 @@ * cycle counter CCNT + 31 events counters CNT0..30. * Cortex-A8 has 1+4 counters, Cortex-A9 has 1+6 counters. */ -#define ARMPMU_MAX_HWEVENTS 32 +#define ARMPMU_MAX_HWEVENTS 33 -static DEFINE_PER_CPU(struct perf_event * [ARMPMU_MAX_HWEVENTS], hw_events); -static DEFINE_PER_CPU(unsigned long [BITS_TO_LONGS(ARMPMU_MAX_HWEVENTS)], used_mask); -static DEFINE_PER_CPU(struct pmu_hw_events, cpu_hw_events); +/* The events for a given CPU. */ +struct cpu_hw_events { + /* + * The events that are active on the CPU for the given index. Index 0 + * is reserved. + */ + struct perf_event *events[ARMPMU_MAX_HWEVENTS]; -#define to_arm_pmu(p) (container_of(p, struct arm_pmu, pmu)) + /* + * A 1 bit for an index indicates that the counter is being used for + * an event. A 0 means that the counter can be used. + */ + unsigned long used_mask[BITS_TO_LONGS(ARMPMU_MAX_HWEVENTS)]; + + /* + * A 1 bit for an index indicates that the counter is actively being + * used. + */ + unsigned long active_mask[BITS_TO_LONGS(ARMPMU_MAX_HWEVENTS)]; +}; +static DEFINE_PER_CPU(struct cpu_hw_events, cpu_hw_events); + +struct arm_pmu { + enum arm_perf_pmu_ids id; + const char *name; + irqreturn_t (*handle_irq)(int irq_num, void *dev); + void (*enable)(struct hw_perf_event *evt, int idx); + void (*disable)(struct hw_perf_event *evt, int idx); + int (*get_event_idx)(struct cpu_hw_events *cpuc, + struct hw_perf_event *hwc); + u32 (*read_counter)(int idx); + void (*write_counter)(int idx, u32 val); + void (*start)(void); + void (*stop)(void); + void (*reset)(void *); + const unsigned (*cache_map)[PERF_COUNT_HW_CACHE_MAX] + [PERF_COUNT_HW_CACHE_OP_MAX] + [PERF_COUNT_HW_CACHE_RESULT_MAX]; + const unsigned (*event_map)[PERF_COUNT_HW_MAX]; + u32 raw_event_mask; + int num_events; + u64 max_period; +}; /* Set at runtime when we know what CPU type we are. */ -static struct arm_pmu *cpu_pmu; +static const struct arm_pmu *armpmu; enum arm_perf_pmu_ids armpmu_get_pmu_id(void) { int id = -ENODEV; - if (cpu_pmu != NULL) - id = cpu_pmu->id; + if (armpmu != NULL) + id = armpmu->id; return id; } @@ -64,8 +109,8 @@ armpmu_get_max_events(void) { int max_events = 0; - if (cpu_pmu != NULL) - max_events = cpu_pmu->num_events; + if (armpmu != NULL) + max_events = armpmu->num_events; return max_events; } @@ -85,11 +130,7 @@ EXPORT_SYMBOL_GPL(perf_num_counters); #define CACHE_OP_UNSUPPORTED 0xFFFF static int -armpmu_map_cache_event(const unsigned (*cache_map) - [PERF_COUNT_HW_CACHE_MAX] - [PERF_COUNT_HW_CACHE_OP_MAX] - [PERF_COUNT_HW_CACHE_RESULT_MAX], - u64 config) +armpmu_map_cache_event(u64 config) { unsigned int cache_type, cache_op, cache_result, ret; @@ -105,7 +146,7 @@ armpmu_map_cache_event(const unsigned (*cache_map) if (cache_result >= PERF_COUNT_HW_CACHE_RESULT_MAX) return -EINVAL; - ret = (int)(*cache_map)[cache_type][cache_op][cache_result]; + ret = (int)(*armpmu->cache_map)[cache_type][cache_op][cache_result]; if (ret == CACHE_OP_UNSUPPORTED) return -ENOENT; @@ -114,51 +155,23 @@ armpmu_map_cache_event(const unsigned (*cache_map) } static int -armpmu_map_event(const unsigned (*event_map)[PERF_COUNT_HW_MAX], u64 config) +armpmu_map_event(u64 config) { - int mapping; - - if (config >= PERF_COUNT_HW_MAX) - return -ENOENT; - - mapping = (*event_map)[config]; - return mapping == HW_OP_UNSUPPORTED ? -ENOENT : mapping; + int mapping = (*armpmu->event_map)[config]; + return mapping == HW_OP_UNSUPPORTED ? -EOPNOTSUPP : mapping; } static int -armpmu_map_raw_event(u32 raw_event_mask, u64 config) +armpmu_map_raw_event(u64 config) { - return (int)(config & raw_event_mask); + return (int)(config & armpmu->raw_event_mask); } -static int map_cpu_event(struct perf_event *event, - const unsigned (*event_map)[PERF_COUNT_HW_MAX], - const unsigned (*cache_map) - [PERF_COUNT_HW_CACHE_MAX] - [PERF_COUNT_HW_CACHE_OP_MAX] - [PERF_COUNT_HW_CACHE_RESULT_MAX], - u32 raw_event_mask) -{ - u64 config = event->attr.config; - - switch (event->attr.type) { - case PERF_TYPE_HARDWARE: - return armpmu_map_event(event_map, config); - case PERF_TYPE_HW_CACHE: - return armpmu_map_cache_event(cache_map, config); - case PERF_TYPE_RAW: - return armpmu_map_raw_event(raw_event_mask, config); - } - - return -ENOENT; -} - -int +static int armpmu_event_set_period(struct perf_event *event, struct hw_perf_event *hwc, int idx) { - struct arm_pmu *armpmu = to_arm_pmu(event->pmu); s64 left = local64_read(&hwc->period_left); s64 period = hwc->sample_period; int ret = 0; @@ -189,12 +202,11 @@ armpmu_event_set_period(struct perf_event *event, return ret; } -u64 +static u64 armpmu_event_update(struct perf_event *event, struct hw_perf_event *hwc, - int idx) + int idx, int overflow) { - struct arm_pmu *armpmu = to_arm_pmu(event->pmu); u64 delta, prev_raw_count, new_raw_count; again: @@ -205,7 +217,13 @@ again: new_raw_count) != prev_raw_count) goto again; - delta = (new_raw_count - prev_raw_count) & armpmu->max_period; + new_raw_count &= armpmu->max_period; + prev_raw_count &= armpmu->max_period; + + if (overflow) + delta = armpmu->max_period - prev_raw_count + new_raw_count + 1; + else + delta = new_raw_count - prev_raw_count; local64_add(delta, &event->count); local64_sub(delta, &hwc->period_left); @@ -222,15 +240,17 @@ armpmu_read(struct perf_event *event) if (hwc->idx < 0) return; - armpmu_event_update(event, hwc, hwc->idx); + armpmu_event_update(event, hwc, hwc->idx, 0); } static void armpmu_stop(struct perf_event *event, int flags) { - struct arm_pmu *armpmu = to_arm_pmu(event->pmu); struct hw_perf_event *hwc = &event->hw; + if (!armpmu) + return; + /* * ARM pmu always has to update the counter, so ignore * PERF_EF_UPDATE, see comments in armpmu_start(). @@ -238,7 +258,7 @@ armpmu_stop(struct perf_event *event, int flags) if (!(hwc->state & PERF_HES_STOPPED)) { armpmu->disable(hwc, hwc->idx); barrier(); /* why? */ - armpmu_event_update(event, hwc, hwc->idx); + armpmu_event_update(event, hwc, hwc->idx, 0); hwc->state |= PERF_HES_STOPPED | PERF_HES_UPTODATE; } } @@ -246,9 +266,11 @@ armpmu_stop(struct perf_event *event, int flags) static void armpmu_start(struct perf_event *event, int flags) { - struct arm_pmu *armpmu = to_arm_pmu(event->pmu); struct hw_perf_event *hwc = &event->hw; + if (!armpmu) + return; + /* * ARM pmu always has to reprogram the period, so ignore * PERF_EF_RELOAD, see the comment below. @@ -271,16 +293,16 @@ armpmu_start(struct perf_event *event, int flags) static void armpmu_del(struct perf_event *event, int flags) { - struct arm_pmu *armpmu = to_arm_pmu(event->pmu); - struct pmu_hw_events *hw_events = armpmu->get_hw_events(); + struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); struct hw_perf_event *hwc = &event->hw; int idx = hwc->idx; WARN_ON(idx < 0); + clear_bit(idx, cpuc->active_mask); armpmu_stop(event, PERF_EF_UPDATE); - hw_events->events[idx] = NULL; - clear_bit(idx, hw_events->used_mask); + cpuc->events[idx] = NULL; + clear_bit(idx, cpuc->used_mask); perf_event_update_userpage(event); } @@ -288,8 +310,7 @@ armpmu_del(struct perf_event *event, int flags) static int armpmu_add(struct perf_event *event, int flags) { - struct arm_pmu *armpmu = to_arm_pmu(event->pmu); - struct pmu_hw_events *hw_events = armpmu->get_hw_events(); + struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); struct hw_perf_event *hwc = &event->hw; int idx; int err = 0; @@ -297,7 +318,7 @@ armpmu_add(struct perf_event *event, int flags) perf_pmu_disable(event->pmu); /* If we don't have a space for the counter then finish early. */ - idx = armpmu->get_event_idx(hw_events, hwc); + idx = armpmu->get_event_idx(cpuc, hwc); if (idx < 0) { err = idx; goto out; @@ -309,7 +330,8 @@ armpmu_add(struct perf_event *event, int flags) */ event->hw.idx = idx; armpmu->disable(hwc, idx); - hw_events->events[idx] = event; + cpuc->events[idx] = event; + set_bit(idx, cpuc->active_mask); hwc->state = PERF_HES_STOPPED | PERF_HES_UPTODATE; if (flags & PERF_EF_START) @@ -323,172 +345,148 @@ out: return err; } +static struct pmu pmu; + static int -validate_event(struct pmu_hw_events *hw_events, +validate_event(struct cpu_hw_events *cpuc, struct perf_event *event) { - struct arm_pmu *armpmu = to_arm_pmu(event->pmu); struct hw_perf_event fake_event = event->hw; - struct pmu *leader_pmu = event->group_leader->pmu; - - if (is_software_event(event)) - return 1; - if (event->pmu != leader_pmu || event->state < PERF_EVENT_STATE_OFF) + if (event->pmu != &pmu || event->state <= PERF_EVENT_STATE_OFF) return 1; - if (event->state == PERF_EVENT_STATE_OFF && !event->attr.enable_on_exec) - return 1; - - return armpmu->get_event_idx(hw_events, &fake_event) >= 0; + return armpmu->get_event_idx(cpuc, &fake_event) >= 0; } static int validate_group(struct perf_event *event) { struct perf_event *sibling, *leader = event->group_leader; - struct pmu_hw_events fake_pmu; - DECLARE_BITMAP(fake_used_mask, ARMPMU_MAX_HWEVENTS); + struct cpu_hw_events fake_pmu; - /* - * Initialise the fake PMU. We only need to populate the - * used_mask for the purposes of validation. - */ - memset(fake_used_mask, 0, sizeof(fake_used_mask)); - fake_pmu.used_mask = fake_used_mask; + memset(&fake_pmu, 0, sizeof(fake_pmu)); if (!validate_event(&fake_pmu, leader)) - return -EINVAL; + return -ENOSPC; list_for_each_entry(sibling, &leader->sibling_list, group_entry) { if (!validate_event(&fake_pmu, sibling)) - return -EINVAL; + return -ENOSPC; } if (!validate_event(&fake_pmu, event)) - return -EINVAL; + return -ENOSPC; return 0; } static irqreturn_t armpmu_platform_irq(int irq, void *dev) { - struct arm_pmu *armpmu = (struct arm_pmu *) dev; - struct platform_device *plat_device = armpmu->plat_device; - struct arm_pmu_platdata *plat = dev_get_platdata(&plat_device->dev); + struct arm_pmu_platdata *plat = dev_get_platdata(&pmu_device->dev); return plat->handle_irq(irq, dev, armpmu->handle_irq); } -static void -armpmu_release_hardware(struct arm_pmu *armpmu) -{ - int i, irq, irqs; - struct platform_device *pmu_device = armpmu->plat_device; - - irqs = min(pmu_device->num_resources, num_possible_cpus()); - - for (i = 0; i < irqs; ++i) { - if (!cpumask_test_and_clear_cpu(i, &armpmu->active_irqs)) - continue; - irq = platform_get_irq(pmu_device, i); - if (irq >= 0) - free_irq(irq, armpmu); - } - - release_pmu(armpmu->type); -} - static int -armpmu_reserve_hardware(struct arm_pmu *armpmu) +armpmu_reserve_hardware(void) { struct arm_pmu_platdata *plat; irq_handler_t handle_irq; - int i, err, irq, irqs; - struct platform_device *pmu_device = armpmu->plat_device; + int i, err = -ENODEV, irq; - if (!pmu_device) - return -ENODEV; - - err = reserve_pmu(armpmu->type); - if (err) { + pmu_device = reserve_pmu(ARM_PMU_DEVICE_CPU); + if (IS_ERR(pmu_device)) { pr_warning("unable to reserve pmu\n"); - return err; + return PTR_ERR(pmu_device); } + init_pmu(ARM_PMU_DEVICE_CPU); + plat = dev_get_platdata(&pmu_device->dev); if (plat && plat->handle_irq) handle_irq = armpmu_platform_irq; else handle_irq = armpmu->handle_irq; - irqs = min(pmu_device->num_resources, num_possible_cpus()); - if (irqs < 1) { + if (pmu_device->num_resources < 1) { pr_err("no irqs for PMUs defined\n"); return -ENODEV; } - for (i = 0; i < irqs; ++i) { - err = 0; + for (i = 0; i < pmu_device->num_resources; ++i) { irq = platform_get_irq(pmu_device, i); if (irq < 0) continue; - /* - * If we have a single PMU interrupt that we can't shift, - * assume that we're running on a uniprocessor machine and - * continue. Otherwise, continue without this interrupt. - */ - if (irq_set_affinity(irq, cpumask_of(i)) && irqs > 1) { - pr_warning("unable to set irq affinity (irq=%d, cpu=%u)\n", - irq, i); - continue; - } - err = request_irq(irq, handle_irq, IRQF_DISABLED | IRQF_NOBALANCING, - "arm-pmu", armpmu); + "armpmu", NULL); if (err) { - pr_err("unable to request IRQ%d for ARM PMU counters\n", - irq); - armpmu_release_hardware(armpmu); - return err; + pr_warning("unable to request IRQ%d for ARM perf " + "counters\n", irq); + break; } + } - cpumask_set_cpu(i, &armpmu->active_irqs); + if (err) { + for (i = i - 1; i >= 0; --i) { + irq = platform_get_irq(pmu_device, i); + if (irq >= 0) + free_irq(irq, NULL); + } + release_pmu(pmu_device); + pmu_device = NULL; } - return 0; + return err; } static void -hw_perf_event_destroy(struct perf_event *event) +armpmu_release_hardware(void) { - struct arm_pmu *armpmu = to_arm_pmu(event->pmu); - atomic_t *active_events = &armpmu->active_events; - struct mutex *pmu_reserve_mutex = &armpmu->reserve_mutex; + int i, irq; - if (atomic_dec_and_mutex_lock(active_events, pmu_reserve_mutex)) { - armpmu_release_hardware(armpmu); - mutex_unlock(pmu_reserve_mutex); + for (i = pmu_device->num_resources - 1; i >= 0; --i) { + irq = platform_get_irq(pmu_device, i); + if (irq >= 0) + free_irq(irq, NULL); } + armpmu->stop(); + + release_pmu(pmu_device); + pmu_device = NULL; } -static int -event_requires_mode_exclusion(struct perf_event_attr *attr) +static atomic_t active_events = ATOMIC_INIT(0); +static DEFINE_MUTEX(pmu_reserve_mutex); + +static void +hw_perf_event_destroy(struct perf_event *event) { - return attr->exclude_idle || attr->exclude_user || - attr->exclude_kernel || attr->exclude_hv; + if (atomic_dec_and_mutex_lock(&active_events, &pmu_reserve_mutex)) { + armpmu_release_hardware(); + mutex_unlock(&pmu_reserve_mutex); + } } static int __hw_perf_event_init(struct perf_event *event) { - struct arm_pmu *armpmu = to_arm_pmu(event->pmu); struct hw_perf_event *hwc = &event->hw; int mapping, err; - mapping = armpmu->map_event(event); + /* Decode the generic type into an ARM event identifier. */ + if (PERF_TYPE_HARDWARE == event->attr.type) { + mapping = armpmu_map_event(event->attr.config); + } else if (PERF_TYPE_HW_CACHE == event->attr.type) { + mapping = armpmu_map_cache_event(event->attr.config); + } else if (PERF_TYPE_RAW == event->attr.type) { + mapping = armpmu_map_raw_event(event->attr.config); + } else { + pr_debug("event type %x not supported\n", event->attr.type); + return -EOPNOTSUPP; + } if (mapping < 0) { pr_debug("event %x:%llx not supported\n", event->attr.type, @@ -497,40 +495,37 @@ __hw_perf_event_init(struct perf_event *event) } /* - * We don't assign an index until we actually place the event onto - * hardware. Use -1 to signify that we haven't decided where to put it - * yet. For SMP systems, each core has it's own PMU so we can't do any - * clever allocation or constraints checking at this point. - */ - hwc->idx = -1; - hwc->config_base = 0; - hwc->config = 0; - hwc->event_base = 0; - - /* * Check whether we need to exclude the counter from certain modes. + * The ARM performance counters are on all of the time so if someone + * has asked us for some excludes then we have to fail. */ - if ((!armpmu->set_event_filter || - armpmu->set_event_filter(hwc, &event->attr)) && - event_requires_mode_exclusion(&event->attr)) { + if (event->attr.exclude_kernel || event->attr.exclude_user || + event->attr.exclude_hv || event->attr.exclude_idle) { pr_debug("ARM performance counters do not support " "mode exclusion\n"); return -EPERM; } /* - * Store the event encoding into the config_base field. + * We don't assign an index until we actually place the event onto + * hardware. Use -1 to signify that we haven't decided where to put it + * yet. For SMP systems, each core has it's own PMU so we can't do any + * clever allocation or constraints checking at this point. + */ + hwc->idx = -1; + + /* + * Store the event encoding into the config_base field. config and + * event_base are unused as the only 2 things we need to know are + * the event mapping and the counter to use. The counter to use is + * also the indx and the config_base is the event type. */ - hwc->config_base |= (unsigned long)mapping; + hwc->config_base = (unsigned long)mapping; + hwc->config = 0; + hwc->event_base = 0; if (!hwc->sample_period) { - /* - * For non-sampling runs, limit the sample_period to half - * of the counter width. That way, the new counter value - * is far less likely to overtake the previous one unless - * you have some serious IRQ latency issues. - */ - hwc->sample_period = armpmu->max_period >> 1; + hwc->sample_period = armpmu->max_period; hwc->last_period = hwc->sample_period; local64_set(&hwc->period_left, hwc->sample_period); } @@ -547,23 +542,32 @@ __hw_perf_event_init(struct perf_event *event) static int armpmu_event_init(struct perf_event *event) { - struct arm_pmu *armpmu = to_arm_pmu(event->pmu); int err = 0; - atomic_t *active_events = &armpmu->active_events; - if (armpmu->map_event(event) == -ENOENT) + switch (event->attr.type) { + case PERF_TYPE_RAW: + case PERF_TYPE_HARDWARE: + case PERF_TYPE_HW_CACHE: + break; + + default: return -ENOENT; + } + + if (!armpmu) + return -ENODEV; event->destroy = hw_perf_event_destroy; - if (!atomic_inc_not_zero(active_events)) { - mutex_lock(&armpmu->reserve_mutex); - if (atomic_read(active_events) == 0) - err = armpmu_reserve_hardware(armpmu); + if (!atomic_inc_not_zero(&active_events)) { + mutex_lock(&pmu_reserve_mutex); + if (atomic_read(&active_events) == 0) { + err = armpmu_reserve_hardware(); + } if (!err) - atomic_inc(active_events); - mutex_unlock(&armpmu->reserve_mutex); + atomic_inc(&active_events); + mutex_unlock(&pmu_reserve_mutex); } if (err) @@ -578,9 +582,22 @@ static int armpmu_event_init(struct perf_event *event) static void armpmu_enable(struct pmu *pmu) { - struct arm_pmu *armpmu = to_arm_pmu(pmu); - struct pmu_hw_events *hw_events = armpmu->get_hw_events(); - int enabled = bitmap_weight(hw_events->used_mask, armpmu->num_events); + /* Enable all of the perf events on hardware. */ + int idx, enabled = 0; + struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); + + if (!armpmu) + return; + + for (idx = 0; idx <= armpmu->num_events; ++idx) { + struct perf_event *event = cpuc->events[idx]; + + if (!event) + continue; + + armpmu->enable(&event->hw, idx); + enabled = 1; + } if (enabled) armpmu->start(); @@ -588,32 +605,20 @@ static void armpmu_enable(struct pmu *pmu) static void armpmu_disable(struct pmu *pmu) { - struct arm_pmu *armpmu = to_arm_pmu(pmu); - armpmu->stop(); + if (armpmu) + armpmu->stop(); } -static void __init armpmu_init(struct arm_pmu *armpmu) -{ - atomic_set(&armpmu->active_events, 0); - mutex_init(&armpmu->reserve_mutex); - - armpmu->pmu = (struct pmu) { - .pmu_enable = armpmu_enable, - .pmu_disable = armpmu_disable, - .event_init = armpmu_event_init, - .add = armpmu_add, - .del = armpmu_del, - .start = armpmu_start, - .stop = armpmu_stop, - .read = armpmu_read, - }; -} - -int __init armpmu_register(struct arm_pmu *armpmu, char *name, int type) -{ - armpmu_init(armpmu); - return perf_pmu_register(&armpmu->pmu, name, type); -} +static struct pmu pmu = { + .pmu_enable = armpmu_enable, + .pmu_disable = armpmu_disable, + .event_init = armpmu_event_init, + .add = armpmu_add, + .del = armpmu_del, + .start = armpmu_start, + .stop = armpmu_stop, + .read = armpmu_read, +}; /* Include the PMU-specific implementations. */ #include "perf_event_xscale.c" @@ -625,75 +630,14 @@ int __init armpmu_register(struct arm_pmu *armpmu, char *name, int type) * This requires SMP to be available, so exists as a separate initcall. */ static int __init -cpu_pmu_reset(void) -{ - if (cpu_pmu && cpu_pmu->reset) - return on_each_cpu(cpu_pmu->reset, NULL, 1); - return 0; -} -arch_initcall(cpu_pmu_reset); - -/* - * PMU platform driver and devicetree bindings. - */ -static struct of_device_id armpmu_of_device_ids[] = { - {.compatible = "arm,cortex-a9-pmu"}, - {.compatible = "arm,cortex-a8-pmu"}, - {.compatible = "arm,arm1136-pmu"}, - {.compatible = "arm,arm1176-pmu"}, - {}, -}; - -static struct platform_device_id armpmu_plat_device_ids[] = { - {.name = "arm-pmu"}, - {}, -}; - -static int __devinit armpmu_device_probe(struct platform_device *pdev) +armpmu_reset(void) { - if (!cpu_pmu) - return -ENODEV; - - cpu_pmu->plat_device = pdev; + if (armpmu && armpmu->reset) + return on_each_cpu(armpmu->reset, NULL, 1); return 0; } +arch_initcall(armpmu_reset); -static struct platform_driver armpmu_driver = { - .driver = { - .name = "arm-pmu", - .of_match_table = armpmu_of_device_ids, - }, - .probe = armpmu_device_probe, - .id_table = armpmu_plat_device_ids, -}; - -static int __init register_pmu_driver(void) -{ - return platform_driver_register(&armpmu_driver); -} -device_initcall(register_pmu_driver); - -static struct pmu_hw_events *armpmu_get_cpu_events(void) -{ - return &__get_cpu_var(cpu_hw_events); -} - -static void __init cpu_pmu_init(struct arm_pmu *armpmu) -{ - int cpu; - for_each_possible_cpu(cpu) { - struct pmu_hw_events *events = &per_cpu(cpu_hw_events, cpu); - events->events = per_cpu(hw_events, cpu); - events->used_mask = per_cpu(used_mask, cpu); - raw_spin_lock_init(&events->pmu_lock); - } - armpmu->get_hw_events = armpmu_get_cpu_events; - armpmu->type = ARM_PMU_DEVICE_CPU; -} - -/* - * CPU PMU identification and registration. - */ static int __init init_hw_perf_events(void) { @@ -707,22 +651,22 @@ init_hw_perf_events(void) case 0xB360: /* ARM1136 */ case 0xB560: /* ARM1156 */ case 0xB760: /* ARM1176 */ - cpu_pmu = armv6pmu_init(); + armpmu = armv6pmu_init(); break; case 0xB020: /* ARM11mpcore */ - cpu_pmu = armv6mpcore_pmu_init(); + armpmu = armv6mpcore_pmu_init(); break; case 0xC080: /* Cortex-A8 */ - cpu_pmu = armv7_a8_pmu_init(); + armpmu = armv7_a8_pmu_init(); break; case 0xC090: /* Cortex-A9 */ - cpu_pmu = armv7_a9_pmu_init(); + armpmu = armv7_a9_pmu_init(); break; case 0xC050: /* Cortex-A5 */ - cpu_pmu = armv7_a5_pmu_init(); + armpmu = armv7_a5_pmu_init(); break; case 0xC0F0: /* Cortex-A15 */ - cpu_pmu = armv7_a15_pmu_init(); + armpmu = armv7_a15_pmu_init(); break; } /* Intel CPUs [xscale]. */ @@ -730,23 +674,23 @@ init_hw_perf_events(void) part_number = (cpuid >> 13) & 0x7; switch (part_number) { case 1: - cpu_pmu = xscale1pmu_init(); + armpmu = xscale1pmu_init(); break; case 2: - cpu_pmu = xscale2pmu_init(); + armpmu = xscale2pmu_init(); break; } } - if (cpu_pmu) { + if (armpmu) { pr_info("enabled with %s PMU driver, %d counters available\n", - cpu_pmu->name, cpu_pmu->num_events); - cpu_pmu_init(cpu_pmu); - armpmu_register(cpu_pmu, "cpu", PERF_TYPE_RAW); + armpmu->name, armpmu->num_events); } else { pr_info("no hardware support available\n"); } + perf_pmu_register(&pmu, "cpu", PERF_TYPE_RAW); + return 0; } early_initcall(init_hw_perf_events); diff --git a/arch/arm/kernel/perf_event_v6.c b/arch/arm/kernel/perf_event_v6.c index 0ad3c6f..f1e8dd9 100644 --- a/arch/arm/kernel/perf_event_v6.c +++ b/arch/arm/kernel/perf_event_v6.c @@ -54,7 +54,7 @@ enum armv6_perf_types { }; enum armv6_counters { - ARMV6_CYCLE_COUNTER = 0, + ARMV6_CYCLE_COUNTER = 1, ARMV6_COUNTER0, ARMV6_COUNTER1, }; @@ -173,20 +173,6 @@ static const unsigned armv6_perf_cache_map[PERF_COUNT_HW_CACHE_MAX] [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED, }, }, - [C(NODE)] = { - [C(OP_READ)] = { - [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, - [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED, - }, - [C(OP_WRITE)] = { - [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, - [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED, - }, - [C(OP_PREFETCH)] = { - [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, - [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED, - }, - }, }; enum armv6mpcore_perf_types { @@ -324,20 +310,6 @@ static const unsigned armv6mpcore_perf_cache_map[PERF_COUNT_HW_CACHE_MAX] [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED, }, }, - [C(NODE)] = { - [C(OP_READ)] = { - [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, - [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED, - }, - [C(OP_WRITE)] = { - [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, - [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED, - }, - [C(OP_PREFETCH)] = { - [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, - [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED, - }, - }, }; static inline unsigned long @@ -433,7 +405,6 @@ armv6pmu_enable_event(struct hw_perf_event *hwc, int idx) { unsigned long val, mask, evt, flags; - struct pmu_hw_events *events = cpu_pmu->get_hw_events(); if (ARMV6_CYCLE_COUNTER == idx) { mask = 0; @@ -455,12 +426,12 @@ armv6pmu_enable_event(struct hw_perf_event *hwc, * Mask out the current event and set the counter to count the event * that we're interested in. */ - raw_spin_lock_irqsave(&events->pmu_lock, flags); + raw_spin_lock_irqsave(&pmu_lock, flags); val = armv6_pmcr_read(); val &= ~mask; val |= evt; armv6_pmcr_write(val); - raw_spin_unlock_irqrestore(&events->pmu_lock, flags); + raw_spin_unlock_irqrestore(&pmu_lock, flags); } static irqreturn_t @@ -469,7 +440,7 @@ armv6pmu_handle_irq(int irq_num, { unsigned long pmcr = armv6_pmcr_read(); struct perf_sample_data data; - struct pmu_hw_events *cpuc; + struct cpu_hw_events *cpuc; struct pt_regs *regs; int idx; @@ -488,12 +459,11 @@ armv6pmu_handle_irq(int irq_num, perf_sample_data_init(&data, 0); cpuc = &__get_cpu_var(cpu_hw_events); - for (idx = 0; idx < cpu_pmu->num_events; ++idx) { + for (idx = 0; idx <= armpmu->num_events; ++idx) { struct perf_event *event = cpuc->events[idx]; struct hw_perf_event *hwc; - /* Ignore if we don't have an event. */ - if (!event) + if (!test_bit(idx, cpuc->active_mask)) continue; /* @@ -504,13 +474,13 @@ armv6pmu_handle_irq(int irq_num, continue; hwc = &event->hw; - armpmu_event_update(event, hwc, idx); + armpmu_event_update(event, hwc, idx, 1); data.period = event->hw.last_period; if (!armpmu_event_set_period(event, hwc, idx)) continue; - if (perf_event_overflow(event, &data, regs)) - cpu_pmu->disable(hwc, idx); + if (perf_event_overflow(event, 0, &data, regs)) + armpmu->disable(hwc, idx); } /* @@ -529,30 +499,28 @@ static void armv6pmu_start(void) { unsigned long flags, val; - struct pmu_hw_events *events = cpu_pmu->get_hw_events(); - raw_spin_lock_irqsave(&events->pmu_lock, flags); + raw_spin_lock_irqsave(&pmu_lock, flags); val = armv6_pmcr_read(); val |= ARMV6_PMCR_ENABLE; armv6_pmcr_write(val); - raw_spin_unlock_irqrestore(&events->pmu_lock, flags); + raw_spin_unlock_irqrestore(&pmu_lock, flags); } static void armv6pmu_stop(void) { unsigned long flags, val; - struct pmu_hw_events *events = cpu_pmu->get_hw_events(); - raw_spin_lock_irqsave(&events->pmu_lock, flags); + raw_spin_lock_irqsave(&pmu_lock, flags); val = armv6_pmcr_read(); val &= ~ARMV6_PMCR_ENABLE; armv6_pmcr_write(val); - raw_spin_unlock_irqrestore(&events->pmu_lock, flags); + raw_spin_unlock_irqrestore(&pmu_lock, flags); } static int -armv6pmu_get_event_idx(struct pmu_hw_events *cpuc, +armv6pmu_get_event_idx(struct cpu_hw_events *cpuc, struct hw_perf_event *event) { /* Always place a cycle counter into the cycle counter. */ @@ -582,7 +550,6 @@ armv6pmu_disable_event(struct hw_perf_event *hwc, int idx) { unsigned long val, mask, evt, flags; - struct pmu_hw_events *events = cpu_pmu->get_hw_events(); if (ARMV6_CYCLE_COUNTER == idx) { mask = ARMV6_PMCR_CCOUNT_IEN; @@ -603,12 +570,12 @@ armv6pmu_disable_event(struct hw_perf_event *hwc, * of ETM bus signal assertion cycles. The external reporting should * be disabled and so this should never increment. */ - raw_spin_lock_irqsave(&events->pmu_lock, flags); + raw_spin_lock_irqsave(&pmu_lock, flags); val = armv6_pmcr_read(); val &= ~mask; val |= evt; armv6_pmcr_write(val); - raw_spin_unlock_irqrestore(&events->pmu_lock, flags); + raw_spin_unlock_irqrestore(&pmu_lock, flags); } static void @@ -616,7 +583,6 @@ armv6mpcore_pmu_disable_event(struct hw_perf_event *hwc, int idx) { unsigned long val, mask, flags, evt = 0; - struct pmu_hw_events *events = cpu_pmu->get_hw_events(); if (ARMV6_CYCLE_COUNTER == idx) { mask = ARMV6_PMCR_CCOUNT_IEN; @@ -633,21 +599,15 @@ armv6mpcore_pmu_disable_event(struct hw_perf_event *hwc, * Unlike UP ARMv6, we don't have a way of stopping the counters. We * simply disable the interrupt reporting. */ - raw_spin_lock_irqsave(&events->pmu_lock, flags); + raw_spin_lock_irqsave(&pmu_lock, flags); val = armv6_pmcr_read(); val &= ~mask; val |= evt; armv6_pmcr_write(val); - raw_spin_unlock_irqrestore(&events->pmu_lock, flags); -} - -static int armv6_map_event(struct perf_event *event) -{ - return map_cpu_event(event, &armv6_perf_map, - &armv6_perf_cache_map, 0xFF); + raw_spin_unlock_irqrestore(&pmu_lock, flags); } -static struct arm_pmu armv6pmu = { +static const struct arm_pmu armv6pmu = { .id = ARM_PERF_PMU_ID_V6, .name = "v6", .handle_irq = armv6pmu_handle_irq, @@ -658,12 +618,14 @@ static struct arm_pmu armv6pmu = { .get_event_idx = armv6pmu_get_event_idx, .start = armv6pmu_start, .stop = armv6pmu_stop, - .map_event = armv6_map_event, + .cache_map = &armv6_perf_cache_map, + .event_map = &armv6_perf_map, + .raw_event_mask = 0xFF, .num_events = 3, .max_period = (1LLU << 32) - 1, }; -static struct arm_pmu *__init armv6pmu_init(void) +static const struct arm_pmu *__init armv6pmu_init(void) { return &armv6pmu; } @@ -675,14 +637,7 @@ static struct arm_pmu *__init armv6pmu_init(void) * disable the interrupt reporting and update the event. When unthrottling we * reset the period and enable the interrupt reporting. */ - -static int armv6mpcore_map_event(struct perf_event *event) -{ - return map_cpu_event(event, &armv6mpcore_perf_map, - &armv6mpcore_perf_cache_map, 0xFF); -} - -static struct arm_pmu armv6mpcore_pmu = { +static const struct arm_pmu armv6mpcore_pmu = { .id = ARM_PERF_PMU_ID_V6MP, .name = "v6mpcore", .handle_irq = armv6pmu_handle_irq, @@ -693,22 +648,24 @@ static struct arm_pmu armv6mpcore_pmu = { .get_event_idx = armv6pmu_get_event_idx, .start = armv6pmu_start, .stop = armv6pmu_stop, - .map_event = armv6mpcore_map_event, + .cache_map = &armv6mpcore_perf_cache_map, + .event_map = &armv6mpcore_perf_map, + .raw_event_mask = 0xFF, .num_events = 3, .max_period = (1LLU << 32) - 1, }; -static struct arm_pmu *__init armv6mpcore_pmu_init(void) +static const struct arm_pmu *__init armv6mpcore_pmu_init(void) { return &armv6mpcore_pmu; } #else -static struct arm_pmu *__init armv6pmu_init(void) +static const struct arm_pmu *__init armv6pmu_init(void) { return NULL; } -static struct arm_pmu *__init armv6mpcore_pmu_init(void) +static const struct arm_pmu *__init armv6mpcore_pmu_init(void) { return NULL; } diff --git a/arch/arm/kernel/perf_event_v7.c b/arch/arm/kernel/perf_event_v7.c index 510456d..462aefb 100644 --- a/arch/arm/kernel/perf_event_v7.c +++ b/arch/arm/kernel/perf_event_v7.c @@ -17,9 +17,6 @@ */ #ifdef CONFIG_CPU_V7 - -static struct arm_pmu armv7pmu; - /* * Common ARMv7 event types * @@ -301,20 +298,6 @@ static const unsigned armv7_a8_perf_cache_map[PERF_COUNT_HW_CACHE_MAX] [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED, }, }, - [C(NODE)] = { - [C(OP_READ)] = { - [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, - [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED, - }, - [C(OP_WRITE)] = { - [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, - [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED, - }, - [C(OP_PREFETCH)] = { - [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, - [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED, - }, - }, }; /* @@ -426,20 +409,6 @@ static const unsigned armv7_a9_perf_cache_map[PERF_COUNT_HW_CACHE_MAX] [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED, }, }, - [C(NODE)] = { - [C(OP_READ)] = { - [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, - [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED, - }, - [C(OP_WRITE)] = { - [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, - [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED, - }, - [C(OP_PREFETCH)] = { - [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, - [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED, - }, - }, }; /* @@ -679,24 +648,23 @@ static const unsigned armv7_a15_perf_cache_map[PERF_COUNT_HW_CACHE_MAX] }; /* - * Perf Events' indices + * Perf Events counters */ -#define ARMV7_IDX_CYCLE_COUNTER 0 -#define ARMV7_IDX_COUNTER0 1 -#define ARMV7_IDX_COUNTER_LAST (ARMV7_IDX_CYCLE_COUNTER + cpu_pmu->num_events - 1) - -#define ARMV7_MAX_COUNTERS 32 -#define ARMV7_COUNTER_MASK (ARMV7_MAX_COUNTERS - 1) +enum armv7_counters { + ARMV7_CYCLE_COUNTER = 1, /* Cycle counter */ + ARMV7_COUNTER0 = 2, /* First event counter */ +}; /* - * ARMv7 low level PMNC access + * The cycle counter is ARMV7_CYCLE_COUNTER. + * The first event counter is ARMV7_COUNTER0. + * The last event counter is (ARMV7_COUNTER0 + armpmu->num_events - 1). */ +#define ARMV7_COUNTER_LAST (ARMV7_COUNTER0 + armpmu->num_events - 1) /* - * Perf Event to low level counters mapping + * ARMv7 low level PMNC access */ -#define ARMV7_IDX_TO_COUNTER(x) \ - (((x) - ARMV7_IDX_COUNTER0) & ARMV7_COUNTER_MASK) /* * Per-CPU PMNC: config reg @@ -712,76 +680,103 @@ static const unsigned armv7_a15_perf_cache_map[PERF_COUNT_HW_CACHE_MAX] #define ARMV7_PMNC_MASK 0x3f /* Mask for writable bits */ /* - * FLAG: counters overflow flag status reg + * Available counters */ -#define ARMV7_FLAG_MASK 0xffffffff /* Mask for writable bits */ -#define ARMV7_OVERFLOWED_MASK ARMV7_FLAG_MASK +#define ARMV7_CNT0 0 /* First event counter */ +#define ARMV7_CCNT 31 /* Cycle counter */ + +/* Perf Event to low level counters mapping */ +#define ARMV7_EVENT_CNT_TO_CNTx (ARMV7_COUNTER0 - ARMV7_CNT0) + +/* + * CNTENS: counters enable reg + */ +#define ARMV7_CNTENS_P(idx) (1 << (idx - ARMV7_EVENT_CNT_TO_CNTx)) +#define ARMV7_CNTENS_C (1 << ARMV7_CCNT) + +/* + * CNTENC: counters disable reg + */ +#define ARMV7_CNTENC_P(idx) (1 << (idx - ARMV7_EVENT_CNT_TO_CNTx)) +#define ARMV7_CNTENC_C (1 << ARMV7_CCNT) + +/* + * INTENS: counters overflow interrupt enable reg + */ +#define ARMV7_INTENS_P(idx) (1 << (idx - ARMV7_EVENT_CNT_TO_CNTx)) +#define ARMV7_INTENS_C (1 << ARMV7_CCNT) + +/* + * INTENC: counters overflow interrupt disable reg + */ +#define ARMV7_INTENC_P(idx) (1 << (idx - ARMV7_EVENT_CNT_TO_CNTx)) +#define ARMV7_INTENC_C (1 << ARMV7_CCNT) + +/* + * EVTSEL: Event selection reg + */ +#define ARMV7_EVTSEL_MASK 0xff /* Mask for writable bits */ /* - * PMXEVTYPER: Event selection reg + * SELECT: Counter selection reg */ -#define ARMV7_EVTYPE_MASK 0xc80000ff /* Mask for writable bits */ -#define ARMV7_EVTYPE_EVENT 0xff /* Mask for EVENT bits */ +#define ARMV7_SELECT_MASK 0x1f /* Mask for writable bits */ /* - * Event filters for PMUv2 + * FLAG: counters overflow flag status reg */ -#define ARMV7_EXCLUDE_PL1 (1 << 31) -#define ARMV7_EXCLUDE_USER (1 << 30) -#define ARMV7_INCLUDE_HYP (1 << 27) +#define ARMV7_FLAG_P(idx) (1 << (idx - ARMV7_EVENT_CNT_TO_CNTx)) +#define ARMV7_FLAG_C (1 << ARMV7_CCNT) +#define ARMV7_FLAG_MASK 0xffffffff /* Mask for writable bits */ +#define ARMV7_OVERFLOWED_MASK ARMV7_FLAG_MASK -static inline u32 armv7_pmnc_read(void) +static inline unsigned long armv7_pmnc_read(void) { u32 val; asm volatile("mrc p15, 0, %0, c9, c12, 0" : "=r"(val)); return val; } -static inline void armv7_pmnc_write(u32 val) +static inline void armv7_pmnc_write(unsigned long val) { val &= ARMV7_PMNC_MASK; isb(); asm volatile("mcr p15, 0, %0, c9, c12, 0" : : "r"(val)); } -static inline int armv7_pmnc_has_overflowed(u32 pmnc) +static inline int armv7_pmnc_has_overflowed(unsigned long pmnc) { return pmnc & ARMV7_OVERFLOWED_MASK; } -static inline int armv7_pmnc_counter_valid(int idx) -{ - return idx >= ARMV7_IDX_CYCLE_COUNTER && idx <= ARMV7_IDX_COUNTER_LAST; -} - -static inline int armv7_pmnc_counter_has_overflowed(u32 pmnc, int idx) +static inline int armv7_pmnc_counter_has_overflowed(unsigned long pmnc, + enum armv7_counters counter) { int ret = 0; - u32 counter; - if (!armv7_pmnc_counter_valid(idx)) { + if (counter == ARMV7_CYCLE_COUNTER) + ret = pmnc & ARMV7_FLAG_C; + else if ((counter >= ARMV7_COUNTER0) && (counter <= ARMV7_COUNTER_LAST)) + ret = pmnc & ARMV7_FLAG_P(counter); + else pr_err("CPU%u checking wrong counter %d overflow status\n", - smp_processor_id(), idx); - } else { - counter = ARMV7_IDX_TO_COUNTER(idx); - ret = pmnc & BIT(counter); - } + smp_processor_id(), counter); return ret; } -static inline int armv7_pmnc_select_counter(int idx) +static inline int armv7_pmnc_select_counter(unsigned int idx) { - u32 counter; + u32 val; - if (!armv7_pmnc_counter_valid(idx)) { - pr_err("CPU%u selecting wrong PMNC counter %d\n", - smp_processor_id(), idx); - return -EINVAL; + if ((idx < ARMV7_COUNTER0) || (idx > ARMV7_COUNTER_LAST)) { + pr_err("CPU%u selecting wrong PMNC counter" + " %d\n", smp_processor_id(), idx); + return -1; } - counter = ARMV7_IDX_TO_COUNTER(idx); - asm volatile("mcr p15, 0, %0, c9, c12, 5" : : "r" (counter)); + val = (idx - ARMV7_EVENT_CNT_TO_CNTx) & ARMV7_SELECT_MASK; + asm volatile("mcr p15, 0, %0, c9, c12, 5" : : "r" (val)); isb(); return idx; @@ -789,99 +784,123 @@ static inline int armv7_pmnc_select_counter(int idx) static inline u32 armv7pmu_read_counter(int idx) { - u32 value = 0; + unsigned long value = 0; - if (!armv7_pmnc_counter_valid(idx)) + if (idx == ARMV7_CYCLE_COUNTER) + asm volatile("mrc p15, 0, %0, c9, c13, 0" : "=r" (value)); + else if ((idx >= ARMV7_COUNTER0) && (idx <= ARMV7_COUNTER_LAST)) { + if (armv7_pmnc_select_counter(idx) == idx) + asm volatile("mrc p15, 0, %0, c9, c13, 2" + : "=r" (value)); + } else pr_err("CPU%u reading wrong counter %d\n", smp_processor_id(), idx); - else if (idx == ARMV7_IDX_CYCLE_COUNTER) - asm volatile("mrc p15, 0, %0, c9, c13, 0" : "=r" (value)); - else if (armv7_pmnc_select_counter(idx) == idx) - asm volatile("mrc p15, 0, %0, c9, c13, 2" : "=r" (value)); return value; } static inline void armv7pmu_write_counter(int idx, u32 value) { - if (!armv7_pmnc_counter_valid(idx)) + if (idx == ARMV7_CYCLE_COUNTER) + asm volatile("mcr p15, 0, %0, c9, c13, 0" : : "r" (value)); + else if ((idx >= ARMV7_COUNTER0) && (idx <= ARMV7_COUNTER_LAST)) { + if (armv7_pmnc_select_counter(idx) == idx) + asm volatile("mcr p15, 0, %0, c9, c13, 2" + : : "r" (value)); + } else pr_err("CPU%u writing wrong counter %d\n", smp_processor_id(), idx); - else if (idx == ARMV7_IDX_CYCLE_COUNTER) - asm volatile("mcr p15, 0, %0, c9, c13, 0" : : "r" (value)); - else if (armv7_pmnc_select_counter(idx) == idx) - asm volatile("mcr p15, 0, %0, c9, c13, 2" : : "r" (value)); } -static inline void armv7_pmnc_write_evtsel(int idx, u32 val) +static inline void armv7_pmnc_write_evtsel(unsigned int idx, u32 val) { if (armv7_pmnc_select_counter(idx) == idx) { - val &= ARMV7_EVTYPE_MASK; + val &= ARMV7_EVTSEL_MASK; asm volatile("mcr p15, 0, %0, c9, c13, 1" : : "r" (val)); } } -static inline int armv7_pmnc_enable_counter(int idx) +static inline u32 armv7_pmnc_enable_counter(unsigned int idx) { - u32 counter; + u32 val; - if (!armv7_pmnc_counter_valid(idx)) { - pr_err("CPU%u enabling wrong PMNC counter %d\n", - smp_processor_id(), idx); - return -EINVAL; + if ((idx != ARMV7_CYCLE_COUNTER) && + ((idx < ARMV7_COUNTER0) || (idx > ARMV7_COUNTER_LAST))) { + pr_err("CPU%u enabling wrong PMNC counter" + " %d\n", smp_processor_id(), idx); + return -1; } - counter = ARMV7_IDX_TO_COUNTER(idx); - asm volatile("mcr p15, 0, %0, c9, c12, 1" : : "r" (BIT(counter))); + if (idx == ARMV7_CYCLE_COUNTER) + val = ARMV7_CNTENS_C; + else + val = ARMV7_CNTENS_P(idx); + + asm volatile("mcr p15, 0, %0, c9, c12, 1" : : "r" (val)); + return idx; } -static inline int armv7_pmnc_disable_counter(int idx) +static inline u32 armv7_pmnc_disable_counter(unsigned int idx) { - u32 counter; + u32 val; - if (!armv7_pmnc_counter_valid(idx)) { - pr_err("CPU%u disabling wrong PMNC counter %d\n", - smp_processor_id(), idx); - return -EINVAL; + + if ((idx != ARMV7_CYCLE_COUNTER) && + ((idx < ARMV7_COUNTER0) || (idx > ARMV7_COUNTER_LAST))) { + pr_err("CPU%u disabling wrong PMNC counter" + " %d\n", smp_processor_id(), idx); + return -1; } - counter = ARMV7_IDX_TO_COUNTER(idx); - asm volatile("mcr p15, 0, %0, c9, c12, 2" : : "r" (BIT(counter))); + if (idx == ARMV7_CYCLE_COUNTER) + val = ARMV7_CNTENC_C; + else + val = ARMV7_CNTENC_P(idx); + + asm volatile("mcr p15, 0, %0, c9, c12, 2" : : "r" (val)); + return idx; } -static inline int armv7_pmnc_enable_intens(int idx) +static inline u32 armv7_pmnc_enable_intens(unsigned int idx) { - u32 counter; + u32 val; - if (!armv7_pmnc_counter_valid(idx)) { - pr_err("CPU%u enabling wrong PMNC counter IRQ enable %d\n", - smp_processor_id(), idx); - return -EINVAL; + if ((idx != ARMV7_CYCLE_COUNTER) && + ((idx < ARMV7_COUNTER0) || (idx > ARMV7_COUNTER_LAST))) { + pr_err("CPU%u enabling wrong PMNC counter" + " interrupt enable %d\n", smp_processor_id(), idx); + return -1; } - counter = ARMV7_IDX_TO_COUNTER(idx); - asm volatile("mcr p15, 0, %0, c9, c14, 1" : : "r" (BIT(counter))); + if (idx == ARMV7_CYCLE_COUNTER) + val = ARMV7_INTENS_C; + else + val = ARMV7_INTENS_P(idx); + + asm volatile("mcr p15, 0, %0, c9, c14, 1" : : "r" (val)); + return idx; } -static inline int armv7_pmnc_disable_intens(int idx) +static inline u32 armv7_pmnc_disable_intens(unsigned int idx) { - u32 counter; + u32 val; - if (!armv7_pmnc_counter_valid(idx)) { - pr_err("CPU%u disabling wrong PMNC counter IRQ enable %d\n", - smp_processor_id(), idx); - return -EINVAL; + if ((idx != ARMV7_CYCLE_COUNTER) && + ((idx < ARMV7_COUNTER0) || (idx > ARMV7_COUNTER_LAST))) { + pr_err("CPU%u disabling wrong PMNC counter" + " interrupt enable %d\n", smp_processor_id(), idx); + return -1; } - counter = ARMV7_IDX_TO_COUNTER(idx); - asm volatile("mcr p15, 0, %0, c9, c14, 2" : : "r" (BIT(counter))); - isb(); - /* Clear the overflow flag in case an interrupt is pending. */ - asm volatile("mcr p15, 0, %0, c9, c12, 3" : : "r" (BIT(counter))); - isb(); + if (idx == ARMV7_CYCLE_COUNTER) + val = ARMV7_INTENC_C; + else + val = ARMV7_INTENC_P(idx); + + asm volatile("mcr p15, 0, %0, c9, c14, 2" : : "r" (val)); return idx; } @@ -926,14 +945,14 @@ static void armv7_pmnc_dump_regs(void) asm volatile("mrc p15, 0, %0, c9, c13, 0" : "=r" (val)); printk(KERN_INFO "CCNT =0x%08x\n", val); - for (cnt = ARMV7_IDX_COUNTER0; cnt <= ARMV7_IDX_COUNTER_LAST; cnt++) { + for (cnt = ARMV7_COUNTER0; cnt < ARMV7_COUNTER_LAST; cnt++) { armv7_pmnc_select_counter(cnt); asm volatile("mrc p15, 0, %0, c9, c13, 2" : "=r" (val)); printk(KERN_INFO "CNT[%d] count =0x%08x\n", - ARMV7_IDX_TO_COUNTER(cnt), val); + cnt-ARMV7_EVENT_CNT_TO_CNTx, val); asm volatile("mrc p15, 0, %0, c9, c13, 1" : "=r" (val)); printk(KERN_INFO "CNT[%d] evtsel=0x%08x\n", - ARMV7_IDX_TO_COUNTER(cnt), val); + cnt-ARMV7_EVENT_CNT_TO_CNTx, val); } } #endif @@ -941,13 +960,12 @@ static void armv7_pmnc_dump_regs(void) static void armv7pmu_enable_event(struct hw_perf_event *hwc, int idx) { unsigned long flags; - struct pmu_hw_events *events = cpu_pmu->get_hw_events(); /* * Enable counter and interrupt, and set the counter to count * the event that we're interested in. */ - raw_spin_lock_irqsave(&events->pmu_lock, flags); + raw_spin_lock_irqsave(&pmu_lock, flags); /* * Disable counter @@ -956,10 +974,9 @@ static void armv7pmu_enable_event(struct hw_perf_event *hwc, int idx) /* * Set event (if destined for PMNx counters) - * We only need to set the event for the cycle counter if we - * have the ability to perform event filtering. + * We don't need to set the event if it's a cycle count */ - if (armv7pmu.set_event_filter || idx != ARMV7_IDX_CYCLE_COUNTER) + if (idx != ARMV7_CYCLE_COUNTER) armv7_pmnc_write_evtsel(idx, hwc->config_base); /* @@ -972,18 +989,17 @@ static void armv7pmu_enable_event(struct hw_perf_event *hwc, int idx) */ armv7_pmnc_enable_counter(idx); - raw_spin_unlock_irqrestore(&events->pmu_lock, flags); + raw_spin_unlock_irqrestore(&pmu_lock, flags); } static void armv7pmu_disable_event(struct hw_perf_event *hwc, int idx) { unsigned long flags; - struct pmu_hw_events *events = cpu_pmu->get_hw_events(); /* * Disable counter and interrupt */ - raw_spin_lock_irqsave(&events->pmu_lock, flags); + raw_spin_lock_irqsave(&pmu_lock, flags); /* * Disable counter @@ -995,14 +1011,14 @@ static void armv7pmu_disable_event(struct hw_perf_event *hwc, int idx) */ armv7_pmnc_disable_intens(idx); - raw_spin_unlock_irqrestore(&events->pmu_lock, flags); + raw_spin_unlock_irqrestore(&pmu_lock, flags); } static irqreturn_t armv7pmu_handle_irq(int irq_num, void *dev) { - u32 pmnc; + unsigned long pmnc; struct perf_sample_data data; - struct pmu_hw_events *cpuc; + struct cpu_hw_events *cpuc; struct pt_regs *regs; int idx; @@ -1025,12 +1041,11 @@ static irqreturn_t armv7pmu_handle_irq(int irq_num, void *dev) perf_sample_data_init(&data, 0); cpuc = &__get_cpu_var(cpu_hw_events); - for (idx = 0; idx < cpu_pmu->num_events; ++idx) { + for (idx = 0; idx <= armpmu->num_events; ++idx) { struct perf_event *event = cpuc->events[idx]; struct hw_perf_event *hwc; - /* Ignore if we don't have an event. */ - if (!event) + if (!test_bit(idx, cpuc->active_mask)) continue; /* @@ -1041,13 +1056,13 @@ static irqreturn_t armv7pmu_handle_irq(int irq_num, void *dev) continue; hwc = &event->hw; - armpmu_event_update(event, hwc, idx); + armpmu_event_update(event, hwc, idx, 1); data.period = event->hw.last_period; if (!armpmu_event_set_period(event, hwc, idx)) continue; - if (perf_event_overflow(event, &data, regs)) - cpu_pmu->disable(hwc, idx); + if (perf_event_overflow(event, 0, &data, regs)) + armpmu->disable(hwc, idx); } /* @@ -1065,114 +1080,61 @@ static irqreturn_t armv7pmu_handle_irq(int irq_num, void *dev) static void armv7pmu_start(void) { unsigned long flags; - struct pmu_hw_events *events = cpu_pmu->get_hw_events(); - raw_spin_lock_irqsave(&events->pmu_lock, flags); + raw_spin_lock_irqsave(&pmu_lock, flags); /* Enable all counters */ armv7_pmnc_write(armv7_pmnc_read() | ARMV7_PMNC_E); - raw_spin_unlock_irqrestore(&events->pmu_lock, flags); + raw_spin_unlock_irqrestore(&pmu_lock, flags); } static void armv7pmu_stop(void) { unsigned long flags; - struct pmu_hw_events *events = cpu_pmu->get_hw_events(); - raw_spin_lock_irqsave(&events->pmu_lock, flags); + raw_spin_lock_irqsave(&pmu_lock, flags); /* Disable all counters */ armv7_pmnc_write(armv7_pmnc_read() & ~ARMV7_PMNC_E); - raw_spin_unlock_irqrestore(&events->pmu_lock, flags); + raw_spin_unlock_irqrestore(&pmu_lock, flags); } -static int armv7pmu_get_event_idx(struct pmu_hw_events *cpuc, +static int armv7pmu_get_event_idx(struct cpu_hw_events *cpuc, struct hw_perf_event *event) { int idx; - unsigned long evtype = event->config_base & ARMV7_EVTYPE_EVENT; /* Always place a cycle counter into the cycle counter. */ - if (evtype == ARMV7_PERFCTR_CPU_CYCLES) { - if (test_and_set_bit(ARMV7_IDX_CYCLE_COUNTER, cpuc->used_mask)) + if (event->config_base == ARMV7_PERFCTR_CPU_CYCLES) { + if (test_and_set_bit(ARMV7_CYCLE_COUNTER, cpuc->used_mask)) return -EAGAIN; - return ARMV7_IDX_CYCLE_COUNTER; - } + return ARMV7_CYCLE_COUNTER; + } else { + /* + * For anything other than a cycle counter, try and use + * the events counters + */ + for (idx = ARMV7_COUNTER0; idx <= armpmu->num_events; ++idx) { + if (!test_and_set_bit(idx, cpuc->used_mask)) + return idx; + } - /* - * For anything other than a cycle counter, try and use - * the events counters - */ - for (idx = ARMV7_IDX_COUNTER0; idx < cpu_pmu->num_events; ++idx) { - if (!test_and_set_bit(idx, cpuc->used_mask)) - return idx; + /* The counters are all in use. */ + return -EAGAIN; } - - /* The counters are all in use. */ - return -EAGAIN; -} - -/* - * Add an event filter to a given event. This will only work for PMUv2 PMUs. - */ -static int armv7pmu_set_event_filter(struct hw_perf_event *event, - struct perf_event_attr *attr) -{ - unsigned long config_base = 0; - - if (attr->exclude_idle) - return -EPERM; - if (attr->exclude_user) - config_base |= ARMV7_EXCLUDE_USER; - if (attr->exclude_kernel) - config_base |= ARMV7_EXCLUDE_PL1; - if (!attr->exclude_hv) - config_base |= ARMV7_INCLUDE_HYP; - - /* - * Install the filter into config_base as this is used to - * construct the event type. - */ - event->config_base = config_base; - - return 0; } static void armv7pmu_reset(void *info) { - u32 idx, nb_cnt = cpu_pmu->num_events; + u32 idx, nb_cnt = armpmu->num_events; /* The counter and interrupt enable registers are unknown at reset. */ - for (idx = ARMV7_IDX_CYCLE_COUNTER; idx < nb_cnt; ++idx) + for (idx = 1; idx < nb_cnt; ++idx) armv7pmu_disable_event(NULL, idx); /* Initialize & Reset PMNC: C and P bits */ armv7_pmnc_write(ARMV7_PMNC_P | ARMV7_PMNC_C); } -static int armv7_a8_map_event(struct perf_event *event) -{ - return map_cpu_event(event, &armv7_a8_perf_map, - &armv7_a8_perf_cache_map, 0xFF); -} - -static int armv7_a9_map_event(struct perf_event *event) -{ - return map_cpu_event(event, &armv7_a9_perf_map, - &armv7_a9_perf_cache_map, 0xFF); -} - -static int armv7_a5_map_event(struct perf_event *event) -{ - return map_cpu_event(event, &armv7_a5_perf_map, - &armv7_a5_perf_cache_map, 0xFF); -} - -static int armv7_a15_map_event(struct perf_event *event) -{ - return map_cpu_event(event, &armv7_a15_perf_map, - &armv7_a15_perf_cache_map, 0xFF); -} - static struct arm_pmu armv7pmu = { .handle_irq = armv7pmu_handle_irq, .enable = armv7pmu_enable_event, @@ -1183,6 +1145,7 @@ static struct arm_pmu armv7pmu = { .start = armv7pmu_start, .stop = armv7pmu_stop, .reset = armv7pmu_reset, + .raw_event_mask = 0xFF, .max_period = (1LLU << 32) - 1, }; @@ -1197,59 +1160,62 @@ static u32 __init armv7_read_num_pmnc_events(void) return nb_cnt + 1; } -static struct arm_pmu *__init armv7_a8_pmu_init(void) +static const struct arm_pmu *__init armv7_a8_pmu_init(void) { armv7pmu.id = ARM_PERF_PMU_ID_CA8; armv7pmu.name = "ARMv7 Cortex-A8"; - armv7pmu.map_event = armv7_a8_map_event; + armv7pmu.cache_map = &armv7_a8_perf_cache_map; + armv7pmu.event_map = &armv7_a8_perf_map; armv7pmu.num_events = armv7_read_num_pmnc_events(); return &armv7pmu; } -static struct arm_pmu *__init armv7_a9_pmu_init(void) +static const struct arm_pmu *__init armv7_a9_pmu_init(void) { armv7pmu.id = ARM_PERF_PMU_ID_CA9; armv7pmu.name = "ARMv7 Cortex-A9"; - armv7pmu.map_event = armv7_a9_map_event; + armv7pmu.cache_map = &armv7_a9_perf_cache_map; + armv7pmu.event_map = &armv7_a9_perf_map; armv7pmu.num_events = armv7_read_num_pmnc_events(); return &armv7pmu; } -static struct arm_pmu *__init armv7_a5_pmu_init(void) +static const struct arm_pmu *__init armv7_a5_pmu_init(void) { armv7pmu.id = ARM_PERF_PMU_ID_CA5; armv7pmu.name = "ARMv7 Cortex-A5"; - armv7pmu.map_event = armv7_a5_map_event; + armv7pmu.cache_map = &armv7_a5_perf_cache_map; + armv7pmu.event_map = &armv7_a5_perf_map; armv7pmu.num_events = armv7_read_num_pmnc_events(); return &armv7pmu; } -static struct arm_pmu *__init armv7_a15_pmu_init(void) +static const struct arm_pmu *__init armv7_a15_pmu_init(void) { armv7pmu.id = ARM_PERF_PMU_ID_CA15; armv7pmu.name = "ARMv7 Cortex-A15"; - armv7pmu.map_event = armv7_a15_map_event; + armv7pmu.cache_map = &armv7_a15_perf_cache_map; + armv7pmu.event_map = &armv7_a15_perf_map; armv7pmu.num_events = armv7_read_num_pmnc_events(); - armv7pmu.set_event_filter = armv7pmu_set_event_filter; return &armv7pmu; } #else -static struct arm_pmu *__init armv7_a8_pmu_init(void) +static const struct arm_pmu *__init armv7_a8_pmu_init(void) { return NULL; } -static struct arm_pmu *__init armv7_a9_pmu_init(void) +static const struct arm_pmu *__init armv7_a9_pmu_init(void) { return NULL; } -static struct arm_pmu *__init armv7_a5_pmu_init(void) +static const struct arm_pmu *__init armv7_a5_pmu_init(void) { return NULL; } -static struct arm_pmu *__init armv7_a15_pmu_init(void) +static const struct arm_pmu *__init armv7_a15_pmu_init(void) { return NULL; } diff --git a/arch/arm/kernel/perf_event_xscale.c b/arch/arm/kernel/perf_event_xscale.c index 9fc2c95..39affbe 100644 --- a/arch/arm/kernel/perf_event_xscale.c +++ b/arch/arm/kernel/perf_event_xscale.c @@ -40,7 +40,7 @@ enum xscale_perf_types { }; enum xscale_counters { - XSCALE_CYCLE_COUNTER = 0, + XSCALE_CYCLE_COUNTER = 1, XSCALE_COUNTER0, XSCALE_COUNTER1, XSCALE_COUNTER2, @@ -144,20 +144,6 @@ static const unsigned xscale_perf_cache_map[PERF_COUNT_HW_CACHE_MAX] [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED, }, }, - [C(NODE)] = { - [C(OP_READ)] = { - [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, - [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED, - }, - [C(OP_WRITE)] = { - [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, - [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED, - }, - [C(OP_PREFETCH)] = { - [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, - [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED, - }, - }, }; #define XSCALE_PMU_ENABLE 0x001 @@ -222,7 +208,7 @@ xscale1pmu_handle_irq(int irq_num, void *dev) { unsigned long pmnc; struct perf_sample_data data; - struct pmu_hw_events *cpuc; + struct cpu_hw_events *cpuc; struct pt_regs *regs; int idx; @@ -249,24 +235,24 @@ xscale1pmu_handle_irq(int irq_num, void *dev) perf_sample_data_init(&data, 0); cpuc = &__get_cpu_var(cpu_hw_events); - for (idx = 0; idx < cpu_pmu->num_events; ++idx) { + for (idx = 0; idx <= armpmu->num_events; ++idx) { struct perf_event *event = cpuc->events[idx]; struct hw_perf_event *hwc; - if (!event) + if (!test_bit(idx, cpuc->active_mask)) continue; if (!xscale1_pmnc_counter_has_overflowed(pmnc, idx)) continue; hwc = &event->hw; - armpmu_event_update(event, hwc, idx); + armpmu_event_update(event, hwc, idx, 1); data.period = event->hw.last_period; if (!armpmu_event_set_period(event, hwc, idx)) continue; - if (perf_event_overflow(event, &data, regs)) - cpu_pmu->disable(hwc, idx); + if (perf_event_overflow(event, 0, &data, regs)) + armpmu->disable(hwc, idx); } irq_work_run(); @@ -284,7 +270,6 @@ static void xscale1pmu_enable_event(struct hw_perf_event *hwc, int idx) { unsigned long val, mask, evt, flags; - struct pmu_hw_events *events = cpu_pmu->get_hw_events(); switch (idx) { case XSCALE_CYCLE_COUNTER: @@ -306,19 +291,18 @@ xscale1pmu_enable_event(struct hw_perf_event *hwc, int idx) return; } - raw_spin_lock_irqsave(&events->pmu_lock, flags); + raw_spin_lock_irqsave(&pmu_lock, flags); val = xscale1pmu_read_pmnc(); val &= ~mask; val |= evt; xscale1pmu_write_pmnc(val); - raw_spin_unlock_irqrestore(&events->pmu_lock, flags); + raw_spin_unlock_irqrestore(&pmu_lock, flags); } static void xscale1pmu_disable_event(struct hw_perf_event *hwc, int idx) { unsigned long val, mask, evt, flags; - struct pmu_hw_events *events = cpu_pmu->get_hw_events(); switch (idx) { case XSCALE_CYCLE_COUNTER: @@ -338,16 +322,16 @@ xscale1pmu_disable_event(struct hw_perf_event *hwc, int idx) return; } - raw_spin_lock_irqsave(&events->pmu_lock, flags); + raw_spin_lock_irqsave(&pmu_lock, flags); val = xscale1pmu_read_pmnc(); val &= ~mask; val |= evt; xscale1pmu_write_pmnc(val); - raw_spin_unlock_irqrestore(&events->pmu_lock, flags); + raw_spin_unlock_irqrestore(&pmu_lock, flags); } static int -xscale1pmu_get_event_idx(struct pmu_hw_events *cpuc, +xscale1pmu_get_event_idx(struct cpu_hw_events *cpuc, struct hw_perf_event *event) { if (XSCALE_PERFCTR_CCNT == event->config_base) { @@ -370,26 +354,24 @@ static void xscale1pmu_start(void) { unsigned long flags, val; - struct pmu_hw_events *events = cpu_pmu->get_hw_events(); - raw_spin_lock_irqsave(&events->pmu_lock, flags); + raw_spin_lock_irqsave(&pmu_lock, flags); val = xscale1pmu_read_pmnc(); val |= XSCALE_PMU_ENABLE; xscale1pmu_write_pmnc(val); - raw_spin_unlock_irqrestore(&events->pmu_lock, flags); + raw_spin_unlock_irqrestore(&pmu_lock, flags); } static void xscale1pmu_stop(void) { unsigned long flags, val; - struct pmu_hw_events *events = cpu_pmu->get_hw_events(); - raw_spin_lock_irqsave(&events->pmu_lock, flags); + raw_spin_lock_irqsave(&pmu_lock, flags); val = xscale1pmu_read_pmnc(); val &= ~XSCALE_PMU_ENABLE; xscale1pmu_write_pmnc(val); - raw_spin_unlock_irqrestore(&events->pmu_lock, flags); + raw_spin_unlock_irqrestore(&pmu_lock, flags); } static inline u32 @@ -428,13 +410,7 @@ xscale1pmu_write_counter(int counter, u32 val) } } -static int xscale_map_event(struct perf_event *event) -{ - return map_cpu_event(event, &xscale_perf_map, - &xscale_perf_cache_map, 0xFF); -} - -static struct arm_pmu xscale1pmu = { +static const struct arm_pmu xscale1pmu = { .id = ARM_PERF_PMU_ID_XSCALE1, .name = "xscale1", .handle_irq = xscale1pmu_handle_irq, @@ -445,12 +421,14 @@ static struct arm_pmu xscale1pmu = { .get_event_idx = xscale1pmu_get_event_idx, .start = xscale1pmu_start, .stop = xscale1pmu_stop, - .map_event = xscale_map_event, + .cache_map = &xscale_perf_cache_map, + .event_map = &xscale_perf_map, + .raw_event_mask = 0xFF, .num_events = 3, .max_period = (1LLU << 32) - 1, }; -static struct arm_pmu *__init xscale1pmu_init(void) +static const struct arm_pmu *__init xscale1pmu_init(void) { return &xscale1pmu; } @@ -568,7 +546,7 @@ xscale2pmu_handle_irq(int irq_num, void *dev) { unsigned long pmnc, of_flags; struct perf_sample_data data; - struct pmu_hw_events *cpuc; + struct cpu_hw_events *cpuc; struct pt_regs *regs; int idx; @@ -589,24 +567,24 @@ xscale2pmu_handle_irq(int irq_num, void *dev) perf_sample_data_init(&data, 0); cpuc = &__get_cpu_var(cpu_hw_events); - for (idx = 0; idx < cpu_pmu->num_events; ++idx) { + for (idx = 0; idx <= armpmu->num_events; ++idx) { struct perf_event *event = cpuc->events[idx]; struct hw_perf_event *hwc; - if (!event) + if (!test_bit(idx, cpuc->active_mask)) continue; - if (!xscale2_pmnc_counter_has_overflowed(of_flags, idx)) + if (!xscale2_pmnc_counter_has_overflowed(pmnc, idx)) continue; hwc = &event->hw; - armpmu_event_update(event, hwc, idx); + armpmu_event_update(event, hwc, idx, 1); data.period = event->hw.last_period; if (!armpmu_event_set_period(event, hwc, idx)) continue; - if (perf_event_overflow(event, &data, regs)) - cpu_pmu->disable(hwc, idx); + if (perf_event_overflow(event, 0, &data, regs)) + armpmu->disable(hwc, idx); } irq_work_run(); @@ -624,7 +602,6 @@ static void xscale2pmu_enable_event(struct hw_perf_event *hwc, int idx) { unsigned long flags, ien, evtsel; - struct pmu_hw_events *events = cpu_pmu->get_hw_events(); ien = xscale2pmu_read_int_enable(); evtsel = xscale2pmu_read_event_select(); @@ -658,17 +635,16 @@ xscale2pmu_enable_event(struct hw_perf_event *hwc, int idx) return; } - raw_spin_lock_irqsave(&events->pmu_lock, flags); + raw_spin_lock_irqsave(&pmu_lock, flags); xscale2pmu_write_event_select(evtsel); xscale2pmu_write_int_enable(ien); - raw_spin_unlock_irqrestore(&events->pmu_lock, flags); + raw_spin_unlock_irqrestore(&pmu_lock, flags); } static void xscale2pmu_disable_event(struct hw_perf_event *hwc, int idx) { - unsigned long flags, ien, evtsel, of_flags; - struct pmu_hw_events *events = cpu_pmu->get_hw_events(); + unsigned long flags, ien, evtsel; ien = xscale2pmu_read_int_enable(); evtsel = xscale2pmu_read_event_select(); @@ -676,46 +652,40 @@ xscale2pmu_disable_event(struct hw_perf_event *hwc, int idx) switch (idx) { case XSCALE_CYCLE_COUNTER: ien &= ~XSCALE2_CCOUNT_INT_EN; - of_flags = XSCALE2_CCOUNT_OVERFLOW; break; case XSCALE_COUNTER0: ien &= ~XSCALE2_COUNT0_INT_EN; evtsel &= ~XSCALE2_COUNT0_EVT_MASK; evtsel |= XSCALE_PERFCTR_UNUSED << XSCALE2_COUNT0_EVT_SHFT; - of_flags = XSCALE2_COUNT0_OVERFLOW; break; case XSCALE_COUNTER1: ien &= ~XSCALE2_COUNT1_INT_EN; evtsel &= ~XSCALE2_COUNT1_EVT_MASK; evtsel |= XSCALE_PERFCTR_UNUSED << XSCALE2_COUNT1_EVT_SHFT; - of_flags = XSCALE2_COUNT1_OVERFLOW; break; case XSCALE_COUNTER2: ien &= ~XSCALE2_COUNT2_INT_EN; evtsel &= ~XSCALE2_COUNT2_EVT_MASK; evtsel |= XSCALE_PERFCTR_UNUSED << XSCALE2_COUNT2_EVT_SHFT; - of_flags = XSCALE2_COUNT2_OVERFLOW; break; case XSCALE_COUNTER3: ien &= ~XSCALE2_COUNT3_INT_EN; evtsel &= ~XSCALE2_COUNT3_EVT_MASK; evtsel |= XSCALE_PERFCTR_UNUSED << XSCALE2_COUNT3_EVT_SHFT; - of_flags = XSCALE2_COUNT3_OVERFLOW; break; default: WARN_ONCE(1, "invalid counter number (%d)\n", idx); return; } - raw_spin_lock_irqsave(&events->pmu_lock, flags); + raw_spin_lock_irqsave(&pmu_lock, flags); xscale2pmu_write_event_select(evtsel); xscale2pmu_write_int_enable(ien); - xscale2pmu_write_overflow_flags(of_flags); - raw_spin_unlock_irqrestore(&events->pmu_lock, flags); + raw_spin_unlock_irqrestore(&pmu_lock, flags); } static int -xscale2pmu_get_event_idx(struct pmu_hw_events *cpuc, +xscale2pmu_get_event_idx(struct cpu_hw_events *cpuc, struct hw_perf_event *event) { int idx = xscale1pmu_get_event_idx(cpuc, event); @@ -734,26 +704,24 @@ static void xscale2pmu_start(void) { unsigned long flags, val; - struct pmu_hw_events *events = cpu_pmu->get_hw_events(); - raw_spin_lock_irqsave(&events->pmu_lock, flags); + raw_spin_lock_irqsave(&pmu_lock, flags); val = xscale2pmu_read_pmnc() & ~XSCALE_PMU_CNT64; val |= XSCALE_PMU_ENABLE; xscale2pmu_write_pmnc(val); - raw_spin_unlock_irqrestore(&events->pmu_lock, flags); + raw_spin_unlock_irqrestore(&pmu_lock, flags); } static void xscale2pmu_stop(void) { unsigned long flags, val; - struct pmu_hw_events *events = cpu_pmu->get_hw_events(); - raw_spin_lock_irqsave(&events->pmu_lock, flags); + raw_spin_lock_irqsave(&pmu_lock, flags); val = xscale2pmu_read_pmnc(); val &= ~XSCALE_PMU_ENABLE; xscale2pmu_write_pmnc(val); - raw_spin_unlock_irqrestore(&events->pmu_lock, flags); + raw_spin_unlock_irqrestore(&pmu_lock, flags); } static inline u32 @@ -804,7 +772,7 @@ xscale2pmu_write_counter(int counter, u32 val) } } -static struct arm_pmu xscale2pmu = { +static const struct arm_pmu xscale2pmu = { .id = ARM_PERF_PMU_ID_XSCALE2, .name = "xscale2", .handle_irq = xscale2pmu_handle_irq, @@ -815,22 +783,24 @@ static struct arm_pmu xscale2pmu = { .get_event_idx = xscale2pmu_get_event_idx, .start = xscale2pmu_start, .stop = xscale2pmu_stop, - .map_event = xscale_map_event, + .cache_map = &xscale_perf_cache_map, + .event_map = &xscale_perf_map, + .raw_event_mask = 0xFF, .num_events = 5, .max_period = (1LLU << 32) - 1, }; -static struct arm_pmu *__init xscale2pmu_init(void) +static const struct arm_pmu *__init xscale2pmu_init(void) { return &xscale2pmu; } #else -static struct arm_pmu *__init xscale1pmu_init(void) +static const struct arm_pmu *__init xscale1pmu_init(void) { return NULL; } -static struct arm_pmu *__init xscale2pmu_init(void) +static const struct arm_pmu *__init xscale2pmu_init(void) { return NULL; } diff --git a/arch/arm/kernel/pj4-cp0.c b/arch/arm/kernel/pj4-cp0.c index 679cf4d..a4b1b07 100644 --- a/arch/arm/kernel/pj4-cp0.c +++ b/arch/arm/kernel/pj4-cp0.c @@ -10,6 +10,7 @@ * published by the Free Software Foundation. */ +#include <linux/module.h> #include <linux/types.h> #include <linux/kernel.h> #include <linux/signal.h> diff --git a/arch/arm/kernel/pmu.c b/arch/arm/kernel/pmu.c index 2334bf8..2c79eec 100644 --- a/arch/arm/kernel/pmu.c +++ b/arch/arm/kernel/pmu.c @@ -10,27 +10,139 @@ * */ +#define pr_fmt(fmt) "PMU: " fmt + +#include <linux/cpumask.h> #include <linux/err.h> +#include <linux/interrupt.h> #include <linux/kernel.h> #include <linux/module.h> +#include <linux/platform_device.h> #include <asm/pmu.h> -/* - * PMU locking to ensure mutual exclusion between different subsystems. - */ -static unsigned long pmu_lock[BITS_TO_LONGS(ARM_NUM_PMU_DEVICES)]; +static volatile long pmu_lock; -int -reserve_pmu(enum arm_pmu_type type) +static struct platform_device *pmu_devices[ARM_NUM_PMU_DEVICES]; + +static int __devinit pmu_device_probe(struct platform_device *pdev) +{ + + if (pdev->id < 0 || pdev->id >= ARM_NUM_PMU_DEVICES) { + pr_warning("received registration request for unknown " + "device %d\n", pdev->id); + return -EINVAL; + } + + if (pmu_devices[pdev->id]) + pr_warning("registering new PMU device type %d overwrites " + "previous registration!\n", pdev->id); + else + pr_info("registered new PMU device of type %d\n", + pdev->id); + + pmu_devices[pdev->id] = pdev; + return 0; +} + +static struct platform_driver pmu_driver = { + .driver = { + .name = "arm-pmu", + }, + .probe = pmu_device_probe, +}; + +static int __init register_pmu_driver(void) { - return test_and_set_bit_lock(type, pmu_lock) ? -EBUSY : 0; + return platform_driver_register(&pmu_driver); +} +device_initcall(register_pmu_driver); + +struct platform_device * +reserve_pmu(enum arm_pmu_type device) +{ + struct platform_device *pdev; + + if (test_and_set_bit_lock(device, &pmu_lock)) { + pdev = ERR_PTR(-EBUSY); + } else if (pmu_devices[device] == NULL) { + clear_bit_unlock(device, &pmu_lock); + pdev = ERR_PTR(-ENODEV); + } else { + pdev = pmu_devices[device]; + } + + return pdev; } EXPORT_SYMBOL_GPL(reserve_pmu); -void -release_pmu(enum arm_pmu_type type) +int +release_pmu(struct platform_device *pdev) { - clear_bit_unlock(type, pmu_lock); + if (WARN_ON(pdev != pmu_devices[pdev->id])) + return -EINVAL; + clear_bit_unlock(pdev->id, &pmu_lock); + return 0; } EXPORT_SYMBOL_GPL(release_pmu); + +static int +set_irq_affinity(int irq, + unsigned int cpu) +{ +#ifdef CONFIG_SMP + int err = irq_set_affinity(irq, cpumask_of(cpu)); + if (err) + pr_warning("unable to set irq affinity (irq=%d, cpu=%u)\n", + irq, cpu); + return err; +#else + return -EINVAL; +#endif +} + +static int +init_cpu_pmu(void) +{ + int i, irqs, err = 0; + struct platform_device *pdev = pmu_devices[ARM_PMU_DEVICE_CPU]; + + if (!pdev) + return -ENODEV; + + irqs = pdev->num_resources; + + /* + * If we have a single PMU interrupt that we can't shift, assume that + * we're running on a uniprocessor machine and continue. + */ + if (irqs == 1 && !irq_can_set_affinity(platform_get_irq(pdev, 0))) + return 0; + + for (i = 0; i < irqs; ++i) { + err = set_irq_affinity(platform_get_irq(pdev, i), i); + if (err) + break; + } + + return err; +} + +int +init_pmu(enum arm_pmu_type device) +{ + int err = 0; + + switch (device) { + case ARM_PMU_DEVICE_CPU: + err = init_cpu_pmu(); + break; + default: + pr_warning("attempt to initialise unknown device %d\n", + device); + err = -EINVAL; + } + + return err; +} +EXPORT_SYMBOL_GPL(init_pmu); diff --git a/arch/arm/kernel/process.c b/arch/arm/kernel/process.c index d9e3c61..e5cfa6a 100644 --- a/arch/arm/kernel/process.c +++ b/arch/arm/kernel/process.c @@ -10,7 +10,7 @@ */ #include <stdarg.h> -#include <linux/export.h> +#include <linux/module.h> #include <linux/sched.h> #include <linux/kernel.h> #include <linux/mm.h> @@ -30,10 +30,9 @@ #include <linux/uaccess.h> #include <linux/random.h> #include <linux/hw_breakpoint.h> -#include <linux/cpuidle.h> +#include <linux/console.h> #include <asm/cacheflush.h> -#include <asm/leds.h> #include <asm/processor.h> #include <asm/system.h> #include <asm/thread_notify.h> @@ -63,6 +62,18 @@ static volatile int hlt_counter; #include <mach/system.h> +#ifdef CONFIG_SMP +void arch_trigger_all_cpu_backtrace(void) +{ + smp_send_all_cpu_backtrace(); +} +#else +void arch_trigger_all_cpu_backtrace(void) +{ + dump_stack(); +} +#endif + void disable_hlt(void) { hlt_counter++; @@ -92,8 +103,37 @@ static int __init hlt_setup(char *__unused) __setup("nohlt", nohlt_setup); __setup("hlt", hlt_setup); +#ifdef CONFIG_ARM_FLUSH_CONSOLE_ON_RESTART +void arm_machine_flush_console(void) +{ + printk("\n"); + pr_emerg("Restarting %s\n", linux_banner); + if (console_trylock()) { + console_unlock(); + return; + } + + mdelay(50); + + local_irq_disable(); + if (!console_trylock()) + pr_emerg("arm_restart: Console was locked! Busting\n"); + else + pr_emerg("arm_restart: Console was locked!\n"); + console_unlock(); +} +#else +void arm_machine_flush_console(void) +{ +} +#endif + void arm_machine_restart(char mode, const char *cmd) { + /* Flush the console to make sure all the relevant messages make it + * out to the console drivers */ + arm_machine_flush_console(); + /* Disable interrupts first */ local_irq_disable(); local_fiq_disable(); @@ -125,7 +165,6 @@ void arm_machine_restart(char mode, const char *cmd) */ mdelay(1000); printk("Reboot failed -- System halted\n"); - local_irq_disable(); while (1); } @@ -184,8 +223,8 @@ void cpu_idle(void) /* endless idle loop with no priority at all */ while (1) { + idle_notifier_call_chain(IDLE_START); tick_nohz_stop_sched_tick(1); - leds_event(led_idle_start); while (!need_resched()) { #ifdef CONFIG_HOTPLUG_CPU if (cpu_is_offline(smp_processor_id())) @@ -201,8 +240,7 @@ void cpu_idle(void) cpu_relax(); } else { stop_critical_timings(); - if (cpuidle_idle_call()) - pm_idle(); + pm_idle(); start_critical_timings(); /* * This will eventually be removed - pm_idle @@ -213,8 +251,8 @@ void cpu_idle(void) local_irq_enable(); } } - leds_event(led_idle_end); tick_nohz_restart_sched_tick(); + idle_notifier_call_chain(IDLE_END); preempt_enable_no_resched(); schedule(); preempt_disable(); @@ -241,7 +279,6 @@ void machine_shutdown(void) void machine_halt(void) { machine_shutdown(); - local_irq_disable(); while (1); } @@ -258,6 +295,77 @@ void machine_restart(char *cmd) arm_pm_restart(reboot_mode, cmd); } +/* + * dump a block of kernel memory from around the given address + */ +static void show_data(unsigned long addr, int nbytes, const char *name) +{ + int i, j; + int nlines; + u32 *p; + + /* + * don't attempt to dump non-kernel addresses or + * values that are probably just small negative numbers + */ + if (addr < PAGE_OFFSET || addr > -256UL) + return; + + printk("\n%s: %#lx:\n", name, addr); + + /* + * round address down to a 32 bit boundary + * and always dump a multiple of 32 bytes + */ + p = (u32 *)(addr & ~(sizeof(u32) - 1)); + nbytes += (addr & (sizeof(u32) - 1)); + nlines = (nbytes + 31) / 32; + + + for (i = 0; i < nlines; i++) { + /* + * just display low 16 bits of address to keep + * each line of the dump < 80 characters + */ + printk("%04lx ", (unsigned long)p & 0xffff); + for (j = 0; j < 8; j++) { + u32 data; + if (probe_kernel_address(p, data)) { + printk(" ********"); + } else { + printk(" %08x", data); + } + ++p; + } + printk("\n"); + } +} + +static void show_extra_register_data(struct pt_regs *regs, int nbytes) +{ + mm_segment_t fs; + + fs = get_fs(); + set_fs(KERNEL_DS); + show_data(regs->ARM_pc - nbytes, nbytes * 2, "PC"); + show_data(regs->ARM_lr - nbytes, nbytes * 2, "LR"); + show_data(regs->ARM_sp - nbytes, nbytes * 2, "SP"); + show_data(regs->ARM_ip - nbytes, nbytes * 2, "IP"); + show_data(regs->ARM_fp - nbytes, nbytes * 2, "FP"); + show_data(regs->ARM_r0 - nbytes, nbytes * 2, "R0"); + show_data(regs->ARM_r1 - nbytes, nbytes * 2, "R1"); + show_data(regs->ARM_r2 - nbytes, nbytes * 2, "R2"); + show_data(regs->ARM_r3 - nbytes, nbytes * 2, "R3"); + show_data(regs->ARM_r4 - nbytes, nbytes * 2, "R4"); + show_data(regs->ARM_r5 - nbytes, nbytes * 2, "R5"); + show_data(regs->ARM_r6 - nbytes, nbytes * 2, "R6"); + show_data(regs->ARM_r7 - nbytes, nbytes * 2, "R7"); + show_data(regs->ARM_r8 - nbytes, nbytes * 2, "R8"); + show_data(regs->ARM_r9 - nbytes, nbytes * 2, "R9"); + show_data(regs->ARM_r10 - nbytes, nbytes * 2, "R10"); + set_fs(fs); +} + void __show_regs(struct pt_regs *regs) { unsigned long flags; @@ -317,6 +425,8 @@ void __show_regs(struct pt_regs *regs) printk("Control: %08x%s\n", ctrl, buf); } #endif + + show_extra_register_data(regs, 128); } void show_regs(struct pt_regs * regs) @@ -324,7 +434,7 @@ void show_regs(struct pt_regs * regs) printk("\n"); printk("Pid: %d, comm: %20s\n", task_pid_nr(current), current->comm); __show_regs(regs); - dump_stack(); + __backtrace(); } ATOMIC_NOTIFIER_HEAD(thread_notify_head); @@ -468,7 +578,6 @@ EXPORT_SYMBOL(kernel_thread); unsigned long get_wchan(struct task_struct *p) { struct stackframe frame; - unsigned long stack_page; int count = 0; if (!p || p == current || p->state == TASK_RUNNING) return 0; @@ -477,11 +586,9 @@ unsigned long get_wchan(struct task_struct *p) frame.sp = thread_saved_sp(p); frame.lr = 0; /* recovered from the stack */ frame.pc = thread_saved_pc(p); - stack_page = (unsigned long)task_stack_page(p); do { - if (frame.sp < stack_page || - frame.sp >= stack_page + THREAD_SIZE || - unwind_frame(&frame) < 0) + int ret = unwind_frame(&frame); + if (ret < 0) return 0; if (!in_sched_functions(frame.pc)) return frame.pc; diff --git a/arch/arm/kernel/ptrace.c b/arch/arm/kernel/ptrace.c index 90fa8b3..172ae01 100644 --- a/arch/arm/kernel/ptrace.c +++ b/arch/arm/kernel/ptrace.c @@ -12,7 +12,6 @@ #include <linux/kernel.h> #include <linux/sched.h> #include <linux/mm.h> -#include <linux/elf.h> #include <linux/smp.h> #include <linux/ptrace.h> #include <linux/user.h> @@ -229,12 +228,34 @@ static struct undef_hook thumb_break_hook = { .fn = break_trap, }; +static int thumb2_break_trap(struct pt_regs *regs, unsigned int instr) +{ + unsigned int instr2; + void __user *pc; + + /* Check the second half of the instruction. */ + pc = (void __user *)(instruction_pointer(regs) + 2); + + if (processor_mode(regs) == SVC_MODE) { + instr2 = *(u16 *) pc; + } else { + get_user(instr2, (u16 __user *)pc); + } + + if (instr2 == 0xa000) { + ptrace_break(current, regs); + return 0; + } else { + return 1; + } +} + static struct undef_hook thumb2_break_hook = { - .instr_mask = 0xffffffff, - .instr_val = 0xf7f0a000, + .instr_mask = 0xffff, + .instr_val = 0xf7f0, .cpsr_mask = PSR_T_BIT, .cpsr_val = PSR_T_BIT, - .fn = break_trap, + .fn = thumb2_break_trap, }; static int __init ptrace_break_init(void) @@ -375,7 +396,7 @@ static long ptrace_hbp_idx_to_num(int idx) /* * Handle hitting a HW-breakpoint. */ -static void ptrace_hbptriggered(struct perf_event *bp, +static void ptrace_hbptriggered(struct perf_event *bp, int unused, struct perf_sample_data *data, struct pt_regs *regs) { @@ -458,8 +479,7 @@ static struct perf_event *ptrace_hbp_create(struct task_struct *tsk, int type) attr.bp_type = type; attr.disabled = 1; - return register_user_hw_breakpoint(&attr, ptrace_hbptriggered, NULL, - tsk); + return register_user_hw_breakpoint(&attr, ptrace_hbptriggered, tsk); } static int ptrace_gethbpregs(struct task_struct *tsk, long num, diff --git a/arch/arm/kernel/relocate_kernel.S b/arch/arm/kernel/relocate_kernel.S index d0cdedf..9cf4cbf 100644 --- a/arch/arm/kernel/relocate_kernel.S +++ b/arch/arm/kernel/relocate_kernel.S @@ -57,8 +57,7 @@ relocate_new_kernel: mov r0,#0 ldr r1,kexec_mach_type ldr r2,kexec_boot_atags - ARM( mov pc, lr ) - THUMB( bx lr ) + mov pc,lr .align diff --git a/arch/arm/kernel/return_address.c b/arch/arm/kernel/return_address.c index 8085417..0b13a72 100644 --- a/arch/arm/kernel/return_address.c +++ b/arch/arm/kernel/return_address.c @@ -8,7 +8,7 @@ * under the terms of the GNU General Public License version 2 as published by * the Free Software Foundation. */ -#include <linux/export.h> +#include <linux/module.h> #include <linux/ftrace.h> #if defined(CONFIG_FRAME_POINTER) && !defined(CONFIG_ARM_UNWIND) diff --git a/arch/arm/kernel/setup.c b/arch/arm/kernel/setup.c index 8fc2c8f..7e51962 100644 --- a/arch/arm/kernel/setup.c +++ b/arch/arm/kernel/setup.c @@ -7,7 +7,7 @@ * it under the terms of the GNU General Public License version 2 as * published by the Free Software Foundation. */ -#include <linux/export.h> +#include <linux/module.h> #include <linux/kernel.h> #include <linux/stddef.h> #include <linux/ioport.h> @@ -29,8 +29,6 @@ #include <linux/fs.h> #include <linux/proc_fs.h> #include <linux/memblock.h> -#include <linux/bug.h> -#include <linux/compiler.h> #include <asm/unified.h> #include <asm/cpu.h> @@ -44,7 +42,6 @@ #include <asm/cacheflush.h> #include <asm/cachetype.h> #include <asm/tlbflush.h> -#include <asm/system.h> #include <asm/prom.h> #include <asm/mach/arch.h> @@ -52,6 +49,9 @@ #include <asm/mach/time.h> #include <asm/traps.h> #include <asm/unwind.h> +#ifdef CONFIG_MIDAS_COMMON +#include <plat/cpu.h> +#endif #if defined(CONFIG_DEPRECATED_PARAM_STRUCT) #include "compat.h" @@ -78,6 +78,9 @@ __setup("fpe=", fpe_setup); extern void paging_init(struct machine_desc *desc); extern void sanity_check_meminfo(void); extern void reboot_setup(char *str); +#ifdef CONFIG_DMA_CMA +extern void setup_dma_zone(struct machine_desc *desc); +#endif unsigned int processor_id; EXPORT_SYMBOL(processor_id); @@ -118,13 +121,6 @@ struct outer_cache_fns outer_cache __read_mostly; EXPORT_SYMBOL(outer_cache); #endif -/* - * Cached cpu_architecture() result for use by assembler code. - * C code should use the cpu_architecture() function instead of accessing this - * variable directly. - */ -int __cpu_architecture __read_mostly = CPU_ARCH_UNKNOWN; - struct stack { u32 irq[3]; u32 abt[3]; @@ -220,7 +216,7 @@ static const char *proc_arch[] = { "?(17)", }; -static int __get_cpu_architecture(void) +int cpu_architecture(void) { int cpu_arch; @@ -253,22 +249,11 @@ static int __get_cpu_architecture(void) return cpu_arch; } -int __pure cpu_architecture(void) -{ - BUG_ON(__cpu_architecture == CPU_ARCH_UNKNOWN); - - return __cpu_architecture; -} - static int cpu_has_aliasing_icache(unsigned int arch) { int aliasing_icache; unsigned int id_reg, num_sets, line_size; - /* PIPT caches never alias. */ - if (icache_is_pipt()) - return 0; - /* arch specifies the register format */ switch (arch) { case CPU_ARCH_ARMv7: @@ -301,25 +286,18 @@ static void __init cacheid_init(void) if (arch >= CPU_ARCH_ARMv6) { if ((cachetype & (7 << 29)) == 4 << 29) { /* ARMv7 register format */ - arch = CPU_ARCH_ARMv7; cacheid = CACHEID_VIPT_NONALIASING; - switch (cachetype & (3 << 14)) { - case (1 << 14): + if ((cachetype & (3 << 14)) == 1 << 14) cacheid |= CACHEID_ASID_TAGGED; - break; - case (3 << 14): - cacheid |= CACHEID_PIPT; - break; - } + else if (cpu_has_aliasing_icache(CPU_ARCH_ARMv7)) + cacheid |= CACHEID_VIPT_I_ALIASING; + } else if (cachetype & (1 << 23)) { + cacheid = CACHEID_VIPT_ALIASING; } else { - arch = CPU_ARCH_ARMv6; - if (cachetype & (1 << 23)) - cacheid = CACHEID_VIPT_ALIASING; - else - cacheid = CACHEID_VIPT_NONALIASING; + cacheid = CACHEID_VIPT_NONALIASING; + if (cpu_has_aliasing_icache(CPU_ARCH_ARMv6)) + cacheid |= CACHEID_VIPT_I_ALIASING; } - if (cpu_has_aliasing_icache(arch)) - cacheid |= CACHEID_VIPT_I_ALIASING; } else { cacheid = CACHEID_VIVT; } @@ -327,11 +305,10 @@ static void __init cacheid_init(void) printk("CPU: %s data cache, %s instruction cache\n", cache_is_vivt() ? "VIVT" : cache_is_vipt_aliasing() ? "VIPT aliasing" : - cache_is_vipt_nonaliasing() ? "PIPT / VIPT nonaliasing" : "unknown", + cache_is_vipt_nonaliasing() ? "VIPT nonaliasing" : "unknown", cache_is_vivt() ? "VIVT" : icache_is_vivt_asid_tagged() ? "VIVT ASID tagged" : icache_is_vipt_aliasing() ? "VIPT aliasing" : - icache_is_pipt() ? "PIPT" : cache_is_vipt_nonaliasing() ? "VIPT nonaliasing" : "unknown"); } @@ -372,6 +349,54 @@ static void __init feat_v6_fixup(void) elf_hwcap &= ~HWCAP_TLS; } +static void __init setup_processor(void) +{ + struct proc_info_list *list; + + /* + * locate processor in the list of supported processor + * types. The linker builds this table for us from the + * entries in arch/arm/mm/proc-*.S + */ + list = lookup_processor_type(read_cpuid_id()); + if (!list) { + printk("CPU configuration botched (ID %08x), unable " + "to continue.\n", read_cpuid_id()); + while (1); + } + + cpu_name = list->cpu_name; + +#ifdef MULTI_CPU + processor = *list->proc; +#endif +#ifdef MULTI_TLB + cpu_tlb = *list->tlb; +#endif +#ifdef MULTI_USER + cpu_user = *list->user; +#endif +#ifdef MULTI_CACHE + cpu_cache = *list->cache; +#endif + + printk("CPU: %s [%08x] revision %d (ARMv%s), cr=%08lx\n", + cpu_name, read_cpuid_id(), read_cpuid_id() & 15, + proc_arch[cpu_architecture()], cr_alignment); + + sprintf(init_utsname()->machine, "%s%c", list->arch_name, ENDIANNESS); + sprintf(elf_platform, "%s%c", list->elf_name, ENDIANNESS); + elf_hwcap = list->elf_hwcap; +#ifndef CONFIG_ARM_THUMB + elf_hwcap &= ~HWCAP_THUMB; +#endif + + feat_v6_fixup(); + + cacheid_init(); + cpu_proc_init(); +} + /* * cpu_init - initialise one CPU. * @@ -387,8 +412,6 @@ void cpu_init(void) BUG(); } - cpu_proc_init(); - /* * Define the placement constraint for the inline asm directive below. * In Thumb-2, msr with an immediate value is not allowed. @@ -425,57 +448,6 @@ void cpu_init(void) : "r14"); } -static void __init setup_processor(void) -{ - struct proc_info_list *list; - - /* - * locate processor in the list of supported processor - * types. The linker builds this table for us from the - * entries in arch/arm/mm/proc-*.S - */ - list = lookup_processor_type(read_cpuid_id()); - if (!list) { - printk("CPU configuration botched (ID %08x), unable " - "to continue.\n", read_cpuid_id()); - while (1); - } - - cpu_name = list->cpu_name; - __cpu_architecture = __get_cpu_architecture(); - -#ifdef MULTI_CPU - processor = *list->proc; -#endif -#ifdef MULTI_TLB - cpu_tlb = *list->tlb; -#endif -#ifdef MULTI_USER - cpu_user = *list->user; -#endif -#ifdef MULTI_CACHE - cpu_cache = *list->cache; -#endif - - printk("CPU: %s [%08x] revision %d (ARMv%s), cr=%08lx\n", - cpu_name, read_cpuid_id(), read_cpuid_id() & 15, - proc_arch[cpu_architecture()], cr_alignment); - - snprintf(init_utsname()->machine, __NEW_UTS_LEN + 1, "%s%c", - list->arch_name, ENDIANNESS); - snprintf(elf_platform, ELF_PLATFORM_SIZE, "%s%c", - list->elf_name, ENDIANNESS); - elf_hwcap = list->elf_hwcap; -#ifndef CONFIG_ARM_THUMB - elf_hwcap &= ~HWCAP_THUMB; -#endif - - feat_v6_fixup(); - - cacheid_init(); - cpu_init(); -} - void __init dump_machine_table(void) { struct machine_desc *p; @@ -851,8 +823,25 @@ static struct machine_desc * __init setup_machine_tags(unsigned int nr) if (__atags_pointer) tags = phys_to_virt(__atags_pointer); - else if (mdesc->atag_offset) - tags = (void *)(PAGE_OFFSET + mdesc->atag_offset); + else if (mdesc->boot_params) { +#ifdef CONFIG_MMU + /* + * We still are executing with a minimal MMU mapping created + * with the presumption that the machine default for this + * is located in the first MB of RAM. Anything else will + * fault and silently hang the kernel at this point. + */ + if (mdesc->boot_params < PHYS_OFFSET || + mdesc->boot_params >= PHYS_OFFSET + SZ_1M) { + printk(KERN_WARNING + "Default boot params at physical 0x%08lx out of reach\n", + mdesc->boot_params); + } else +#endif + { + tags = phys_to_virt(mdesc->boot_params); + } + } #if defined(CONFIG_DEPRECATED_PARAM_STRUCT) /* @@ -875,7 +864,7 @@ static struct machine_desc * __init setup_machine_tags(unsigned int nr) } if (mdesc->fixup) - mdesc->fixup(tags, &from, &meminfo); + mdesc->fixup(mdesc, tags, &from, &meminfo); if (tags->hdr.tag == ATAG_CORE) { if (meminfo.nr_banks != 0) @@ -895,6 +884,8 @@ void __init setup_arch(char **cmdline_p) { struct machine_desc *mdesc; + unwind_init(); + setup_processor(); mdesc = setup_machine_fdt(__atags_pointer); if (!mdesc) @@ -902,11 +893,8 @@ void __init setup_arch(char **cmdline_p) machine_desc = mdesc; machine_name = mdesc->name; -#ifdef CONFIG_ZONE_DMA - if (mdesc->dma_zone_size) { - extern unsigned long arm_dma_zone_size; - arm_dma_zone_size = mdesc->dma_zone_size; - } +#ifdef CONFIG_DMA_CMA + setup_dma_zone(mdesc); #endif if (mdesc->soft_reboot) reboot_setup("s"); @@ -936,6 +924,7 @@ void __init setup_arch(char **cmdline_p) #endif reserve_crashkernel(); + cpu_init(); tcm_init(); #ifdef CONFIG_MULTI_IRQ_HANDLER @@ -1061,6 +1050,10 @@ static int c_show(struct seq_file *m, void *v) seq_puts(m, "\n"); +#ifdef CONFIG_MIDAS_COMMON + if (soc_is_exynos4412()) + seq_printf(m, "Chip revision\t: %04x\n", samsung_rev()); +#endif seq_printf(m, "Hardware\t: %s\n", machine_name); seq_printf(m, "Revision\t: %04x\n", system_rev); seq_printf(m, "Serial\t\t: %08x%08x\n", diff --git a/arch/arm/kernel/signal.c b/arch/arm/kernel/signal.c index c1d9c77..b02ce1d 100644 --- a/arch/arm/kernel/signal.c +++ b/arch/arm/kernel/signal.c @@ -179,44 +179,23 @@ static int restore_iwmmxt_context(struct iwmmxt_sigframe *frame) static int preserve_vfp_context(struct vfp_sigframe __user *frame) { - struct thread_info *thread = current_thread_info(); - struct vfp_hard_struct *h = &thread->vfpstate.hard; const unsigned long magic = VFP_MAGIC; const unsigned long size = VFP_STORAGE_SIZE; int err = 0; - vfp_sync_hwstate(thread); __put_user_error(magic, &frame->magic, err); __put_user_error(size, &frame->size, err); - /* - * Copy the floating point registers. There can be unused - * registers see asm/hwcap.h for details. - */ - err |= __copy_to_user(&frame->ufp.fpregs, &h->fpregs, - sizeof(h->fpregs)); - /* - * Copy the status and control register. - */ - __put_user_error(h->fpscr, &frame->ufp.fpscr, err); - - /* - * Copy the exception registers. - */ - __put_user_error(h->fpexc, &frame->ufp_exc.fpexc, err); - __put_user_error(h->fpinst, &frame->ufp_exc.fpinst, err); - __put_user_error(h->fpinst2, &frame->ufp_exc.fpinst2, err); + if (err) + return -EFAULT; - return err ? -EFAULT : 0; + return vfp_preserve_user_clear_hwstate(&frame->ufp, &frame->ufp_exc); } static int restore_vfp_context(struct vfp_sigframe __user *frame) { - struct thread_info *thread = current_thread_info(); - struct vfp_hard_struct *h = &thread->vfpstate.hard; unsigned long magic; unsigned long size; - unsigned long fpexc; int err = 0; __get_user_error(magic, &frame->magic, err); @@ -227,33 +206,7 @@ static int restore_vfp_context(struct vfp_sigframe __user *frame) if (magic != VFP_MAGIC || size != VFP_STORAGE_SIZE) return -EINVAL; - vfp_flush_hwstate(thread); - - /* - * Copy the floating point registers. There can be unused - * registers see asm/hwcap.h for details. - */ - err |= __copy_from_user(&h->fpregs, &frame->ufp.fpregs, - sizeof(h->fpregs)); - /* - * Copy the status and control register. - */ - __get_user_error(h->fpscr, &frame->ufp.fpscr, err); - - /* - * Sanitise and restore the exception registers. - */ - __get_user_error(fpexc, &frame->ufp_exc.fpexc, err); - /* Ensure the VFP is enabled. */ - fpexc |= FPEXC_EN; - /* Ensure FPINST2 is invalid and the exception flag is cleared. */ - fpexc &= ~(FPEXC_EX | FPEXC_FP2V); - h->fpexc = fpexc; - - __get_user_error(h->fpinst, &frame->ufp_exc.fpinst, err); - __get_user_error(h->fpinst2, &frame->ufp_exc.fpinst2, err); - - return err ? -EFAULT : 0; + return vfp_restore_user_hwstate(&frame->ufp, &frame->ufp_exc); } #endif @@ -486,23 +439,12 @@ setup_return(struct pt_regs *regs, struct k_sigaction *ka, */ thumb = handler & 1; -#if __LINUX_ARM_ARCH__ >= 6 - /* - * Clear the If-Then Thumb-2 execution state. ARM spec - * requires this to be all 000s in ARM mode. Snapdragon - * S4/Krait misbehaves on a Thumb=>ARM signal transition - * without this. - * - * We must do this whenever we are running on a Thumb-2 - * capable CPU, which includes ARMv6T2. However, we elect - * to do this whenever we're on an ARMv6 or later CPU for - * simplicity. - */ - cpsr &= ~PSR_IT_MASK; -#endif - if (thumb) { cpsr |= PSR_T_BIT; +#if __LINUX_ARM_ARCH__ >= 7 + /* clear the If-Then Thumb-2 execution state */ + cpsr &= ~PSR_IT_MASK; +#endif } else cpsr &= ~PSR_T_BIT; } diff --git a/arch/arm/kernel/sleep.S b/arch/arm/kernel/sleep.S index 020e99c..6398ead 100644 --- a/arch/arm/kernel/sleep.S +++ b/arch/arm/kernel/sleep.S @@ -8,64 +8,96 @@ .text /* - * Save CPU state for a suspend. This saves the CPU general purpose - * registers, and allocates space on the kernel stack to save the CPU - * specific registers and some other data for resume. - * r0 = suspend function arg0 - * r1 = suspend function + * Save CPU state for a suspend + * r1 = v:p offset + * r3 = virtual return function + * Note: sp is decremented to allocate space for CPU state on stack + * r0-r3,r9,r10,lr corrupted */ -ENTRY(__cpu_suspend) - stmfd sp!, {r4 - r11, lr} +ENTRY(cpu_suspend) + mov r9, lr #ifdef MULTI_CPU ldr r10, =processor - ldr r4, [r10, #CPU_SLEEP_SIZE] @ size of CPU sleep state + mov r2, sp @ current virtual SP + ldr r0, [r10, #CPU_SLEEP_SIZE] @ size of CPU sleep state + ldr ip, [r10, #CPU_DO_RESUME] @ virtual resume function + sub sp, sp, r0 @ allocate CPU state on stack + mov r0, sp @ save pointer + add ip, ip, r1 @ convert resume fn to phys + stmfd sp!, {r1, r2, r3, ip} @ save v:p, virt SP, retfn, phys resume fn + ldr r3, =sleep_save_sp + add r2, sp, r1 @ convert SP to phys +#ifdef CONFIG_SMP + ALT_SMP(mrc p15, 0, lr, c0, c0, 5) + ALT_UP(mov lr, #0) + and lr, lr, #15 + str r2, [r3, lr, lsl #2] @ save phys SP #else - ldr r4, =cpu_suspend_size + str r2, [r3] @ save phys SP #endif - mov r5, sp @ current virtual SP - add r4, r4, #12 @ Space for pgd, virt sp, phys resume fn - sub sp, sp, r4 @ allocate CPU state on stack - stmfd sp!, {r0, r1} @ save suspend func arg and pointer - add r0, sp, #8 @ save pointer to save block - mov r1, r4 @ size of save block - mov r2, r5 @ virtual SP + mov lr, pc + ldr pc, [r10, #CPU_DO_SUSPEND] @ save CPU state +#else + mov r2, sp @ current virtual SP + ldr r0, =cpu_suspend_size + sub sp, sp, r0 @ allocate CPU state on stack + mov r0, sp @ save pointer + stmfd sp!, {r1, r2, r3} @ save v:p, virt SP, return fn ldr r3, =sleep_save_sp + add r2, sp, r1 @ convert SP to phys #ifdef CONFIG_SMP ALT_SMP(mrc p15, 0, lr, c0, c0, 5) ALT_UP(mov lr, #0) and lr, lr, #15 - add r3, r3, lr, lsl #2 + str r2, [r3, lr, lsl #2] @ save phys SP +#else + str r2, [r3] @ save phys SP +#endif + bl cpu_do_suspend #endif - bl __cpu_suspend_save - adr lr, BSYM(cpu_suspend_abort) - ldmfd sp!, {r0, pc} @ call suspend fn -ENDPROC(__cpu_suspend) - .ltorg -cpu_suspend_abort: - ldmia sp!, {r1 - r3} @ pop phys pgd, virt SP, phys resume fn - teq r0, #0 - moveq r0, #1 @ force non-zero value - mov sp, r2 - ldmfd sp!, {r4 - r11, pc} -ENDPROC(cpu_suspend_abort) + @ flush data cache +#ifdef MULTI_CACHE + ldr r10, =cpu_cache + mov lr, r9 + ldr pc, [r10, #CACHE_FLUSH_KERN_ALL] +#else + mov lr, r9 + b __cpuc_flush_kern_all +#endif +ENDPROC(cpu_suspend) + .ltorg /* * r0 = control register value + * r1 = v:p offset (preserved by cpu_do_resume) + * r2 = phys page table base + * r3 = L1 section flags */ - .align 5 ENTRY(cpu_resume_mmu) + adr r4, cpu_resume_turn_mmu_on + mov r4, r4, lsr #20 + orr r3, r3, r4, lsl #20 + ldr r5, [r2, r4, lsl #2] @ save old mapping + str r3, [r2, r4, lsl #2] @ setup 1:1 mapping for mmu code + sub r2, r2, r1 ldr r3, =cpu_resume_after_mmu - mcr p15, 0, r0, c1, c0, 0 @ turn on MMU, I-cache, etc - mrc p15, 0, r0, c0, c0, 0 @ read id reg - mov r0, r0 - mov r0, r0 - mov pc, r3 @ jump to virtual address + bic r1, r0, #CR_C @ ensure D-cache is disabled + b cpu_resume_turn_mmu_on ENDPROC(cpu_resume_mmu) + .ltorg + .align 5 +cpu_resume_turn_mmu_on: + mcr p15, 0, r1, c1, c0, 0 @ turn on MMU, I-cache, etc + mrc p15, 0, r1, c0, c0, 0 @ read id reg + mov r1, r1 + mov r1, r1 + mov pc, r3 @ jump to virtual address +ENDPROC(cpu_resume_turn_mmu_on) cpu_resume_after_mmu: - bl cpu_init @ restore the und/abt/irq banked regs - mov r0, #0 @ return zero on success - ldmfd sp!, {r4 - r11, pc} + str r5, [r2, r4, lsl #2] @ restore old mapping + mcr p15, 0, r0, c1, c0, 0 @ turn on D-cache + mov pc, lr ENDPROC(cpu_resume_after_mmu) /* @@ -88,11 +120,20 @@ ENTRY(cpu_resume) ldr r0, sleep_save_sp @ stack phys addr #endif setmode PSR_I_BIT | PSR_F_BIT | SVC_MODE, r1 @ set SVC, irqs off - @ load phys pgd, stack, resume fn - ARM( ldmia r0!, {r1, sp, pc} ) -THUMB( ldmia r0!, {r1, r2, r3} ) +#ifdef MULTI_CPU + @ load v:p, stack, return fn, resume fn + ARM( ldmia r0!, {r1, sp, lr, pc} ) +THUMB( ldmia r0!, {r1, r2, r3, r4} ) THUMB( mov sp, r2 ) -THUMB( bx r3 ) +THUMB( mov lr, r3 ) +THUMB( bx r4 ) +#else + @ load v:p, stack, return fn + ARM( ldmia r0!, {r1, sp, lr} ) +THUMB( ldmia r0!, {r1, r2, lr} ) +THUMB( mov sp, r2 ) + b cpu_do_resume +#endif ENDPROC(cpu_resume) sleep_save_sp: diff --git a/arch/arm/kernel/smp.c b/arch/arm/kernel/smp.c index bfa0eeb..6f4ecfc 100644 --- a/arch/arm/kernel/smp.c +++ b/arch/arm/kernel/smp.c @@ -16,6 +16,7 @@ #include <linux/cache.h> #include <linux/profile.h> #include <linux/errno.h> +#include <linux/ftrace.h> #include <linux/mm.h> #include <linux/err.h> #include <linux/cpu.h> @@ -26,12 +27,10 @@ #include <linux/clockchips.h> #include <linux/completion.h> -#include <linux/atomic.h> +#include <asm/atomic.h> #include <asm/cacheflush.h> #include <asm/cpu.h> #include <asm/cputype.h> -#include <asm/exception.h> -#include <asm/topology.h> #include <asm/mmu_context.h> #include <asm/pgtable.h> #include <asm/pgalloc.h> @@ -40,7 +39,8 @@ #include <asm/tlbflush.h> #include <asm/ptrace.h> #include <asm/localtimer.h> -#include <asm/smp_plat.h> + +#include <mach/sec_debug.h> /* * as from 2.5, kernels no longer have an init_tasks structure @@ -55,8 +55,11 @@ enum ipi_msg_type { IPI_CALL_FUNC, IPI_CALL_FUNC_SINGLE, IPI_CPU_STOP, + IPI_CPU_BACKTRACE, }; +static DECLARE_COMPLETION(cpu_running); + int __cpuinit __cpu_up(unsigned int cpu) { struct cpuinfo_arm *ci = &per_cpu(cpu_data, cpu); @@ -116,20 +119,12 @@ int __cpuinit __cpu_up(unsigned int cpu) */ ret = boot_secondary(cpu, idle); if (ret == 0) { - unsigned long timeout; - /* * CPU was successfully started, wait for it * to come online or time out. */ - timeout = jiffies + HZ; - while (time_before(jiffies, timeout)) { - if (cpu_online(cpu)) - break; - - udelay(10); - barrier(); - } + wait_for_completion_timeout(&cpu_running, + msecs_to_jiffies(1000)); if (!cpu_online(cpu)) { pr_crit("CPU%u: failed to come online\n", cpu); @@ -261,20 +256,6 @@ void __ref cpu_die(void) } #endif /* CONFIG_HOTPLUG_CPU */ -int __cpu_logical_map[NR_CPUS]; - -void __init smp_setup_processor_id(void) -{ - int i; - u32 cpu = is_smp() ? read_cpuid_mpidr() & 0xff : 0; - - cpu_logical_map(0) = cpu; - for (i = 1; i < NR_CPUS; ++i) - cpu_logical_map(i) = i == cpu ? 0 : i; - - printk(KERN_INFO "Booting Linux on physical CPU %d\n", cpu); -} - /* * Called by both boot and secondaries to move global data into * per-processor storage. @@ -284,8 +265,20 @@ static void __cpuinit smp_store_cpu_info(unsigned int cpuid) struct cpuinfo_arm *cpu_info = &per_cpu(cpu_data, cpuid); cpu_info->loops_per_jiffy = loops_per_jiffy; +} - store_cpu_topology(cpuid); +/* + * Skip the secondary calibration on architectures sharing clock + * with primary cpu. Archs can use ARCH_SKIP_SECONDARY_CALIBRATE + * for this. + */ +static inline int skip_secondary_calibrate(void) +{ +#ifdef CONFIG_ARCH_SKIP_SECONDARY_CALIBRATE + return 0; +#else + return -ENXIO; +#endif } /* @@ -316,6 +309,8 @@ asmlinkage void __cpuinit secondary_start_kernel(void) printk("CPU%u: Booted secondary processor\n", cpu); + printk("CPU%u: Booted secondary processor\n", cpu); + cpu_init(); preempt_disable(); trace_hardirqs_off(); @@ -327,29 +322,24 @@ asmlinkage void __cpuinit secondary_start_kernel(void) notify_cpu_starting(cpu); - calibrate_delay(); + if (skip_secondary_calibrate()) + calibrate_delay(); smp_store_cpu_info(cpu); /* * OK, now it's safe to let the boot CPU continue. Wait for * the CPU migration code to notice that the CPU is online - * before we continue. + * before we continue - which happens after __cpu_up returns. */ set_cpu_online(cpu, true); + complete(&cpu_running); /* * Setup the percpu timer for this CPU. */ percpu_timer_setup(); - while (!cpu_active(cpu)) - cpu_relax(); - - /* - * cpu_active bit is set, so it's safe to enalbe interrupts - * now. - */ local_irq_enable(); local_fiq_enable(); @@ -385,8 +375,6 @@ void __init smp_prepare_cpus(unsigned int max_cpus) { unsigned int ncores = num_possible_cpus(); - init_cpu_topology(); - smp_store_cpu_info(smp_processor_id()); /* @@ -394,7 +382,8 @@ void __init smp_prepare_cpus(unsigned int max_cpus) */ if (max_cpus > ncores) max_cpus = ncores; - if (ncores > 1 && max_cpus) { + + if (max_cpus > 1) { /* * Enable the local timer or broadcast device for the * boot CPU, but only if we have more than one CPU. @@ -402,14 +391,6 @@ void __init smp_prepare_cpus(unsigned int max_cpus) percpu_timer_setup(); /* - * Initialise the present map, which describes the set of CPUs - * actually populated at the present time. A platform should - * re-initialize the map in platform_smp_prepare_cpus() if - * present != possible (e.g. physical hotplug). - */ - init_cpu_present(&cpu_possible_map); - - /* * Initialise the SCU if there are more than one CPU * and let them know where to start. */ @@ -441,6 +422,7 @@ static const char *ipi_types[NR_IPI] = { S(IPI_CALL_FUNC, "Function call interrupts"), S(IPI_CALL_FUNC_SINGLE, "Single function call interrupts"), S(IPI_CPU_STOP, "CPU stop interrupts"), + S(IPI_CPU_BACKTRACE, "CPU backtrace"), }; void show_ipi_list(struct seq_file *p, int prec) @@ -466,6 +448,10 @@ u64 smp_irq_stat_cpu(unsigned int cpu) for (i = 0; i < NR_IPI; i++) sum += __get_irq_stat(cpu, ipi_irqs[i]); +#ifdef CONFIG_LOCAL_TIMERS + sum += __get_irq_stat(cpu, local_timer_irqs); +#endif + return sum; } @@ -480,6 +466,38 @@ static void ipi_timer(void) evt->event_handler(evt); } +#ifdef CONFIG_LOCAL_TIMERS +asmlinkage void __exception_irq_entry do_local_timer(struct pt_regs *regs) +{ + struct pt_regs *old_regs = set_irq_regs(regs); + int cpu = smp_processor_id(); + + if (local_timer_ack()) { + __inc_irq_stat(cpu, local_timer_irqs); + sec_debug_irq_log(0, do_local_timer, 1); + irq_enter(); + ipi_timer(); + irq_exit(); + sec_debug_irq_log(0, do_local_timer, 2); + } else + sec_debug_irq_log(0, do_local_timer, 3); + + set_irq_regs(old_regs); +} + +void show_local_irqs(struct seq_file *p, int prec) +{ + unsigned int cpu; + + seq_printf(p, "%*s: ", prec, "LOC"); + + for_each_present_cpu(cpu) + seq_printf(p, "%10u ", __get_irq_stat(cpu, local_timer_irqs)); + + seq_printf(p, " Local timer interrupts\n"); +} +#endif + #ifdef CONFIG_GENERIC_CLOCKEVENTS_BROADCAST static void smp_timer_broadcast(const struct cpumask *mask) { @@ -530,11 +548,11 @@ static void percpu_timer_stop(void) unsigned int cpu = smp_processor_id(); struct clock_event_device *evt = &per_cpu(percpu_clockevent, cpu); - local_timer_stop(evt); + evt->set_mode(CLOCK_EVT_MODE_UNUSED, evt); } #endif -static DEFINE_RAW_SPINLOCK(stop_lock); +static DEFINE_SPINLOCK(stop_lock); /* * ipi_cpu_stop - handle IPI from smp_send_stop() @@ -543,10 +561,10 @@ static void ipi_cpu_stop(unsigned int cpu) { if (system_state == SYSTEM_BOOTING || system_state == SYSTEM_RUNNING) { - raw_spin_lock(&stop_lock); + spin_lock(&stop_lock); printk(KERN_CRIT "CPU%u: stopping\n", cpu); dump_stack(); - raw_spin_unlock(&stop_lock); + spin_unlock(&stop_lock); } set_cpu_online(cpu, false); @@ -554,19 +572,69 @@ static void ipi_cpu_stop(unsigned int cpu) local_fiq_disable(); local_irq_disable(); + flush_cache_all(); + local_flush_tlb_all(); + while (1) cpu_relax(); } +static cpumask_t backtrace_mask; +static DEFINE_RAW_SPINLOCK(backtrace_lock); + +/* "in progress" flag of arch_trigger_all_cpu_backtrace */ +static unsigned long backtrace_flag; + +void smp_send_all_cpu_backtrace(void) +{ + unsigned int this_cpu = smp_processor_id(); + int i; + + if (test_and_set_bit(0, &backtrace_flag)) + /* + * If there is already a trigger_all_cpu_backtrace() in progress + * (backtrace_flag == 1), don't output double cpu dump infos. + */ + return; + + cpumask_copy(&backtrace_mask, cpu_online_mask); + cpu_clear(this_cpu, backtrace_mask); + + pr_info("Backtrace for cpu %d (current):\n", this_cpu); + dump_stack(); + + pr_info("\nsending IPI to all other CPUs:\n"); + smp_cross_call(&backtrace_mask, IPI_CPU_BACKTRACE); + + /* Wait for up to 10 seconds for all other CPUs to do the backtrace */ + for (i = 0; i < 10 * 1000; i++) { + if (cpumask_empty(&backtrace_mask)) + break; + mdelay(1); + } + + clear_bit(0, &backtrace_flag); + smp_mb__after_clear_bit(); +} + /* - * Main handler for inter-processor interrupts + * ipi_cpu_backtrace - handle IPI from smp_send_all_cpu_backtrace() */ -asmlinkage void __exception_irq_entry do_IPI(int ipinr, struct pt_regs *regs) +static void ipi_cpu_backtrace(unsigned int cpu, struct pt_regs *regs) { - handle_IPI(ipinr, regs); + if (cpu_isset(cpu, backtrace_mask)) { + raw_spin_lock(&backtrace_lock); + pr_warning("IPI backtrace for cpu %d\n", cpu); + show_regs(regs); + raw_spin_unlock(&backtrace_lock); + cpu_clear(cpu, backtrace_mask); + } } -void handle_IPI(int ipinr, struct pt_regs *regs) +/* + * Main handler for inter-processor interrupts + */ +asmlinkage void __exception_irq_entry do_IPI(int ipinr, struct pt_regs *regs) { unsigned int cpu = smp_processor_id(); struct pt_regs *old_regs = set_irq_regs(regs); @@ -574,6 +642,8 @@ void handle_IPI(int ipinr, struct pt_regs *regs) if (ipinr >= IPI_TIMER && ipinr < IPI_TIMER + NR_IPI) __inc_irq_stat(cpu, ipi_irqs[ipinr - IPI_TIMER]); + sec_debug_irq_log(ipinr, do_IPI, 1); + switch (ipinr) { case IPI_TIMER: irq_enter(); @@ -603,11 +673,18 @@ void handle_IPI(int ipinr, struct pt_regs *regs) irq_exit(); break; + case IPI_CPU_BACKTRACE: + ipi_cpu_backtrace(cpu, regs); + break; + default: printk(KERN_CRIT "CPU%u: Unknown IPI message 0x%x\n", cpu, ipinr); break; } + + sec_debug_irq_log(ipinr, do_IPI, 2); + set_irq_regs(old_regs); } @@ -643,3 +720,13 @@ int setup_profiling_timer(unsigned int multiplier) { return -EINVAL; } + +static void flush_all_cpu_cache(void *info) +{ + flush_cache_all(); +} + +void flush_all_cpu_caches(void) +{ + on_each_cpu(flush_all_cpu_cache, NULL, 1); +} diff --git a/arch/arm/kernel/smp_scu.c b/arch/arm/kernel/smp_scu.c index 8f5dd79..1936649 100644 --- a/arch/arm/kernel/smp_scu.c +++ b/arch/arm/kernel/smp_scu.c @@ -15,13 +15,18 @@ #include <asm/cacheflush.h> #include <asm/cputype.h> +#include <plat/cpu.h> + #define SCU_CTRL 0x00 #define SCU_CONFIG 0x04 #define SCU_CPU_STATUS 0x08 #define SCU_INVALIDATE 0x0c #define SCU_FPGA_REVISION 0x10 -#ifdef CONFIG_SMP +#ifdef CONFIG_MACH_PX +extern void logbuf_force_unlock(void); +#endif + /* * Get the number of CPU cores from the SCU configuration */ @@ -52,6 +57,10 @@ void scu_enable(void __iomem *scu_base) if (scu_ctrl & 1) return; + if ((soc_is_exynos4412() && (samsung_rev() >= EXYNOS4412_REV_1_0)) || + soc_is_exynos4210()) + scu_ctrl |= (1<<3); + scu_ctrl |= 1; __raw_writel(scu_ctrl, scu_base + SCU_CTRL); @@ -60,8 +69,11 @@ void scu_enable(void __iomem *scu_base) * initialised is visible to the other CPUs. */ flush_cache_all(); -} + +#ifdef CONFIG_MACH_PX + logbuf_force_unlock(); #endif +} /* * Set the executing CPUs power mode as defined. This will be in diff --git a/arch/arm/kernel/smp_twd.c b/arch/arm/kernel/smp_twd.c index a8a6682..2c277d4 100644 --- a/arch/arm/kernel/smp_twd.c +++ b/arch/arm/kernel/smp_twd.c @@ -19,7 +19,6 @@ #include <linux/io.h> #include <asm/smp_twd.h> -#include <asm/localtimer.h> #include <asm/hardware/gic.h> /* set up by the platform code */ @@ -27,8 +26,6 @@ void __iomem *twd_base; static unsigned long twd_timer_rate; -static struct clock_event_device __percpu **twd_evt; - static void twd_set_mode(enum clock_event_mode mode, struct clock_event_device *clk) { @@ -83,12 +80,6 @@ int twd_timer_ack(void) return 0; } -void twd_timer_stop(struct clock_event_device *clk) -{ - twd_set_mode(CLOCK_EVT_MODE_UNUSED, clk); - disable_percpu_irq(clk->irq); -} - static void __cpuinit twd_calibrate_rate(void) { unsigned long count; @@ -128,43 +119,11 @@ static void __cpuinit twd_calibrate_rate(void) } } -static irqreturn_t twd_handler(int irq, void *dev_id) -{ - struct clock_event_device *evt = *(struct clock_event_device **)dev_id; - - if (twd_timer_ack()) { - evt->event_handler(evt); - return IRQ_HANDLED; - } - - return IRQ_NONE; -} - /* * Setup the local clock events for a CPU. */ void __cpuinit twd_timer_setup(struct clock_event_device *clk) { - struct clock_event_device **this_cpu_clk; - - if (!twd_evt) { - int err; - - twd_evt = alloc_percpu(struct clock_event_device *); - if (!twd_evt) { - pr_err("twd: can't allocate memory\n"); - return; - } - - err = request_percpu_irq(clk->irq, twd_handler, - "twd", twd_evt); - if (err) { - pr_err("twd: can't register interrupt %d (%d)\n", - clk->irq, err); - return; - } - } - twd_calibrate_rate(); clk->name = "local_timer"; @@ -178,10 +137,8 @@ void __cpuinit twd_timer_setup(struct clock_event_device *clk) clk->max_delta_ns = clockevent_delta2ns(0xffffffff, clk); clk->min_delta_ns = clockevent_delta2ns(0xf, clk); - this_cpu_clk = __this_cpu_ptr(twd_evt); - *this_cpu_clk = clk; + /* Make sure our local interrupt controller has this enabled */ + gic_enable_ppi(clk->irq); clockevents_register_device(clk); - - enable_percpu_irq(clk->irq, 0); } diff --git a/arch/arm/kernel/stacktrace.c b/arch/arm/kernel/stacktrace.c index af4e8c8..b3337e7 100644 --- a/arch/arm/kernel/stacktrace.c +++ b/arch/arm/kernel/stacktrace.c @@ -1,4 +1,4 @@ -#include <linux/export.h> +#include <linux/module.h> #include <linux/sched.h> #include <linux/stacktrace.h> @@ -31,7 +31,7 @@ int notrace unwind_frame(struct stackframe *frame) high = ALIGN(low, THREAD_SIZE); /* check current frame pointer is within bounds */ - if (fp < low + 12 || fp > high - 4) + if (fp < (low + 12) || fp + 4 >= high) return -EINVAL; /* restore the registers from the stack frame */ @@ -94,20 +94,19 @@ void save_stack_trace_tsk(struct task_struct *tsk, struct stack_trace *trace) if (tsk != current) { #ifdef CONFIG_SMP /* - * What guarantees do we have here that 'tsk' is not - * running on another CPU? For now, ignore it as we - * can't guarantee we won't explode. + * What guarantees do we have here that 'tsk' + * is not running on another CPU? + * + * We guarantee that this function will be used for + * latencytop only :-) */ - if (trace->nr_entries < trace->max_entries) - trace->entries[trace->nr_entries++] = ULONG_MAX; - return; -#else + /* BUG(); */ +#endif data.no_sched_functions = 1; frame.fp = thread_saved_fp(tsk); frame.sp = thread_saved_sp(tsk); frame.lr = 0; /* recovered from the stack */ frame.pc = thread_saved_pc(tsk); -#endif } else { register unsigned long current_sp asm ("sp"); diff --git a/arch/arm/kernel/swp_emulate.c b/arch/arm/kernel/swp_emulate.c index 5d9b1ee..0951a32 100644 --- a/arch/arm/kernel/swp_emulate.c +++ b/arch/arm/kernel/swp_emulate.c @@ -185,7 +185,7 @@ static int swp_handler(struct pt_regs *regs, unsigned int instr) unsigned int address, destreg, data, type; unsigned int res = 0; - perf_sw_event(PERF_COUNT_SW_EMULATION_FAULTS, 1, regs, regs->ARM_pc); + perf_sw_event(PERF_COUNT_SW_EMULATION_FAULTS, 1, 0, regs, regs->ARM_pc); if (current->pid != previous_pid) { pr_debug("\"%s\" (%ld) uses deprecated SWP{B} instruction\n", diff --git a/arch/arm/kernel/sys_arm.c b/arch/arm/kernel/sys_arm.c index 76cbb05..0264ab4 100644 --- a/arch/arm/kernel/sys_arm.c +++ b/arch/arm/kernel/sys_arm.c @@ -12,7 +12,7 @@ * have a non-standard calling sequence on the Linux/arm * platform. */ -#include <linux/export.h> +#include <linux/module.h> #include <linux/errno.h> #include <linux/sched.h> #include <linux/mm.h> diff --git a/arch/arm/kernel/tcm.c b/arch/arm/kernel/tcm.c index 30e302d..f5cf660 100644 --- a/arch/arm/kernel/tcm.c +++ b/arch/arm/kernel/tcm.c @@ -19,8 +19,6 @@ #include "tcm.h" static struct gen_pool *tcm_pool; -static bool dtcm_present; -static bool itcm_present; /* TCM section definitions from the linker */ extern char __itcm_start, __sitcm_text, __eitcm_text; @@ -92,18 +90,6 @@ void tcm_free(void *addr, size_t len) } EXPORT_SYMBOL(tcm_free); -bool tcm_dtcm_present(void) -{ - return dtcm_present; -} -EXPORT_SYMBOL(tcm_dtcm_present); - -bool tcm_itcm_present(void) -{ - return itcm_present; -} -EXPORT_SYMBOL(tcm_itcm_present); - static int __init setup_tcm_bank(u8 type, u8 bank, u8 banks, u32 *offset) { @@ -148,10 +134,6 @@ static int __init setup_tcm_bank(u8 type, u8 bank, u8 banks, (tcm_region & 1) ? "" : "not "); } - /* Not much fun you can do with a size 0 bank */ - if (tcm_size == 0) - return 0; - /* Force move the TCM bank to where we want it, enable */ tcm_region = *offset | (tcm_region & 0x00000ffeU) | 1; @@ -183,20 +165,12 @@ void __init tcm_init(void) u32 tcm_status = read_cpuid_tcmstatus(); u8 dtcm_banks = (tcm_status >> 16) & 0x03; u8 itcm_banks = (tcm_status & 0x03); - size_t dtcm_code_sz = &__edtcm_data - &__sdtcm_data; - size_t itcm_code_sz = &__eitcm_text - &__sitcm_text; char *start; char *end; char *ram; int ret; int i; - /* Values greater than 2 for D/ITCM banks are "reserved" */ - if (dtcm_banks > 2) - dtcm_banks = 0; - if (itcm_banks > 2) - itcm_banks = 0; - /* Setup DTCM if present */ if (dtcm_banks > 0) { for (i = 0; i < dtcm_banks; i++) { @@ -204,13 +178,6 @@ void __init tcm_init(void) if (ret) return; } - /* This means you compiled more code than fits into DTCM */ - if (dtcm_code_sz > (dtcm_end - DTCM_OFFSET)) { - pr_info("CPU DTCM: %u bytes of code compiled to " - "DTCM but only %lu bytes of DTCM present\n", - dtcm_code_sz, (dtcm_end - DTCM_OFFSET)); - goto no_dtcm; - } dtcm_res.end = dtcm_end - 1; request_resource(&iomem_resource, &dtcm_res); dtcm_iomap[0].length = dtcm_end - DTCM_OFFSET; @@ -219,16 +186,12 @@ void __init tcm_init(void) start = &__sdtcm_data; end = &__edtcm_data; ram = &__dtcm_start; - memcpy(start, ram, dtcm_code_sz); - pr_debug("CPU DTCM: copied data from %p - %p\n", - start, end); - dtcm_present = true; - } else if (dtcm_code_sz) { - pr_info("CPU DTCM: %u bytes of code compiled to DTCM but no " - "DTCM banks present in CPU\n", dtcm_code_sz); + /* This means you compiled more code than fits into DTCM */ + BUG_ON((end - start) > (dtcm_end - DTCM_OFFSET)); + memcpy(start, ram, (end-start)); + pr_debug("CPU DTCM: copied data from %p - %p\n", start, end); } -no_dtcm: /* Setup ITCM if present */ if (itcm_banks > 0) { for (i = 0; i < itcm_banks; i++) { @@ -236,13 +199,6 @@ no_dtcm: if (ret) return; } - /* This means you compiled more code than fits into ITCM */ - if (itcm_code_sz > (itcm_end - ITCM_OFFSET)) { - pr_info("CPU ITCM: %u bytes of code compiled to " - "ITCM but only %lu bytes of ITCM present\n", - itcm_code_sz, (itcm_end - ITCM_OFFSET)); - return; - } itcm_res.end = itcm_end - 1; request_resource(&iomem_resource, &itcm_res); itcm_iomap[0].length = itcm_end - ITCM_OFFSET; @@ -251,13 +207,10 @@ no_dtcm: start = &__sitcm_text; end = &__eitcm_text; ram = &__itcm_start; - memcpy(start, ram, itcm_code_sz); - pr_debug("CPU ITCM: copied code from %p - %p\n", - start, end); - itcm_present = true; - } else if (itcm_code_sz) { - pr_info("CPU ITCM: %u bytes of code compiled to ITCM but no " - "ITCM banks present in CPU\n", itcm_code_sz); + /* This means you compiled more code than fits into ITCM */ + BUG_ON((end - start) > (itcm_end - ITCM_OFFSET)); + memcpy(start, ram, (end-start)); + pr_debug("CPU ITCM: copied code from %p - %p\n", start, end); } } @@ -268,6 +221,7 @@ no_dtcm: */ static int __init setup_tcm_pool(void) { + u32 tcm_status = read_cpuid_tcmstatus(); u32 dtcm_pool_start = (u32) &__edtcm_data; u32 itcm_pool_start = (u32) &__eitcm_text; int ret; @@ -282,7 +236,7 @@ static int __init setup_tcm_pool(void) pr_debug("Setting up TCM memory pool\n"); /* Add the rest of DTCM to the TCM pool */ - if (dtcm_present) { + if (tcm_status & (0x03 << 16)) { if (dtcm_pool_start < dtcm_end) { ret = gen_pool_add(tcm_pool, dtcm_pool_start, dtcm_end - dtcm_pool_start, -1); @@ -299,7 +253,7 @@ static int __init setup_tcm_pool(void) } /* Add the rest of ITCM to the TCM pool */ - if (itcm_present) { + if (tcm_status & 0x03) { if (itcm_pool_start < itcm_end) { ret = gen_pool_add(tcm_pool, itcm_pool_start, itcm_end - itcm_pool_start, -1); diff --git a/arch/arm/kernel/time.c b/arch/arm/kernel/time.c index 8c57dd3..cb634c3 100644 --- a/arch/arm/kernel/time.c +++ b/arch/arm/kernel/time.c @@ -11,7 +11,7 @@ * This file contains the ARM-specific time handling details: * reading the RTC at bootup, etc... */ -#include <linux/export.h> +#include <linux/module.h> #include <linux/kernel.h> #include <linux/interrupt.h> #include <linux/time.h> @@ -39,11 +39,13 @@ */ static struct sys_timer *system_timer; -#if defined(CONFIG_RTC_DRV_CMOS) || defined(CONFIG_RTC_DRV_CMOS_MODULE) || \ - defined(CONFIG_NVRAM) || defined(CONFIG_NVRAM_MODULE) +#if defined(CONFIG_RTC_DRV_CMOS) || defined(CONFIG_RTC_DRV_CMOS_MODULE) /* this needs a better home */ DEFINE_SPINLOCK(rtc_lock); + +#ifdef CONFIG_RTC_DRV_CMOS_MODULE EXPORT_SYMBOL(rtc_lock); +#endif #endif /* pc-style 'CMOS' RTC support */ /* change this if you have some constant time drift */ diff --git a/arch/arm/kernel/traps.c b/arch/arm/kernel/traps.c index d45fd22..56b2715 100644 --- a/arch/arm/kernel/traps.c +++ b/arch/arm/kernel/traps.c @@ -21,14 +21,12 @@ #include <linux/kdebug.h> #include <linux/module.h> #include <linux/kexec.h> -#include <linux/bug.h> #include <linux/delay.h> #include <linux/init.h> #include <linux/sched.h> -#include <linux/atomic.h> +#include <asm/atomic.h> #include <asm/cacheflush.h> -#include <asm/exception.h> #include <asm/system.h> #include <asm/unistd.h> #include <asm/traps.h> @@ -37,13 +35,7 @@ #include "signal.h" -static const char *handler[]= { - "prefetch abort", - "data abort", - "address exception", - "interrupt", - "undefined instruction", -}; +static const char *handler[]= { "prefetch abort", "data abort", "address exception", "interrupt" }; void *vectors_page; @@ -263,7 +255,7 @@ static int __die(const char *str, int err, struct thread_info *thread, struct pt return ret; } -static DEFINE_RAW_SPINLOCK(die_lock); +static DEFINE_SPINLOCK(die_lock); /* * This function is protected against re-entrancy. @@ -275,11 +267,9 @@ void die(const char *str, struct pt_regs *regs, int err) oops_enter(); - raw_spin_lock_irq(&die_lock); + spin_lock_irq(&die_lock); console_verbose(); bust_spinlocks(1); - if (!user_mode(regs)) - report_bug(regs->ARM_pc, regs); ret = __die(str, err, thread, regs); if (regs && kexec_should_crash(thread->task)) @@ -287,7 +277,7 @@ void die(const char *str, struct pt_regs *regs, int err) bust_spinlocks(0); add_taint(TAINT_DIE); - raw_spin_unlock_irq(&die_lock); + spin_unlock_irq(&die_lock); oops_exit(); if (in_interrupt()) @@ -311,43 +301,25 @@ void arm_notify_die(const char *str, struct pt_regs *regs, } } -#ifdef CONFIG_GENERIC_BUG - -int is_valid_bugaddr(unsigned long pc) -{ -#ifdef CONFIG_THUMB2_KERNEL - unsigned short bkpt; -#else - unsigned long bkpt; -#endif - - if (probe_kernel_address((unsigned *)pc, bkpt)) - return 0; - - return bkpt == BUG_INSTR_VALUE; -} - -#endif - static LIST_HEAD(undef_hook); -static DEFINE_RAW_SPINLOCK(undef_lock); +static DEFINE_SPINLOCK(undef_lock); void register_undef_hook(struct undef_hook *hook) { unsigned long flags; - raw_spin_lock_irqsave(&undef_lock, flags); + spin_lock_irqsave(&undef_lock, flags); list_add(&hook->node, &undef_hook); - raw_spin_unlock_irqrestore(&undef_lock, flags); + spin_unlock_irqrestore(&undef_lock, flags); } void unregister_undef_hook(struct undef_hook *hook) { unsigned long flags; - raw_spin_lock_irqsave(&undef_lock, flags); + spin_lock_irqsave(&undef_lock, flags); list_del(&hook->node); - raw_spin_unlock_irqrestore(&undef_lock, flags); + spin_unlock_irqrestore(&undef_lock, flags); } static int call_undef_hook(struct pt_regs *regs, unsigned int instr) @@ -356,53 +328,43 @@ static int call_undef_hook(struct pt_regs *regs, unsigned int instr) unsigned long flags; int (*fn)(struct pt_regs *regs, unsigned int instr) = NULL; - raw_spin_lock_irqsave(&undef_lock, flags); + spin_lock_irqsave(&undef_lock, flags); list_for_each_entry(hook, &undef_hook, node) if ((instr & hook->instr_mask) == hook->instr_val && (regs->ARM_cpsr & hook->cpsr_mask) == hook->cpsr_val) fn = hook->fn; - raw_spin_unlock_irqrestore(&undef_lock, flags); + spin_unlock_irqrestore(&undef_lock, flags); return fn ? fn(regs, instr) : 1; } asmlinkage void __exception do_undefinstr(struct pt_regs *regs) { + unsigned int correction = thumb_mode(regs) ? 2 : 4; unsigned int instr; siginfo_t info; void __user *pc; + /* + * According to the ARM ARM, PC is 2 or 4 bytes ahead, + * depending whether we're in Thumb mode or not. + * Correct this offset. + */ + regs->ARM_pc -= correction; + pc = (void __user *)instruction_pointer(regs); if (processor_mode(regs) == SVC_MODE) { -#ifdef CONFIG_THUMB2_KERNEL - if (thumb_mode(regs)) { - instr = ((u16 *)pc)[0]; - if (is_wide_instruction(instr)) { - instr <<= 16; - instr |= ((u16 *)pc)[1]; - } - } else -#endif - instr = *(u32 *) pc; + instr = *(u32 *) pc; } else if (thumb_mode(regs)) { - if (get_user(instr, (u16 __user *)pc)) - goto die_sig; - if (is_wide_instruction(instr)) { - unsigned int instr2; - if (get_user(instr2, (u16 __user *)pc+1)) - goto die_sig; - instr <<= 16; - instr |= instr2; - } - } else if (get_user(instr, (u32 __user *)pc)) { - goto die_sig; + get_user(instr, (u16 __user *)pc); + } else { + get_user(instr, (u32 __user *)pc); } if (call_undef_hook(regs, instr) == 0) return; -die_sig: #ifdef CONFIG_DEBUG_USER if (user_debug & UDBG_UNDEFINED) { printk(KERN_INFO "%s (%d): undefined instruction: pc=%p\n", @@ -731,6 +693,16 @@ baddataabort(int code, unsigned long instr, struct pt_regs *regs) arm_notify_die("unknown data abort code", regs, &info, instr, 0); } +void __attribute__((noreturn)) __bug(const char *file, int line) +{ + printk(KERN_CRIT"kernel BUG at %s:%d!\n", file, line); + *(int *)0 = 0; + + /* Avoid "noreturn function does return" */ + for (;;); +} +EXPORT_SYMBOL(__bug); + void __readwrite_bug(const char *fn) { printk("%s called, but not implemented\n", fn); diff --git a/arch/arm/kernel/unwind.c b/arch/arm/kernel/unwind.c index 00df012..d2cb0b3 100644 --- a/arch/arm/kernel/unwind.c +++ b/arch/arm/kernel/unwind.c @@ -39,7 +39,7 @@ #include <linux/kernel.h> #include <linux/init.h> -#include <linux/export.h> +#include <linux/module.h> #include <linux/sched.h> #include <linux/slab.h> #include <linux/spinlock.h> @@ -67,7 +67,7 @@ EXPORT_SYMBOL(__aeabi_unwind_cpp_pr2); struct unwind_ctrl_block { unsigned long vrs[16]; /* virtual register set */ - const unsigned long *insn; /* pointer to the current instructions word */ + unsigned long *insn; /* pointer to the current instructions word */ int entries; /* number of entries left to interpret */ int byte; /* current byte number in the instructions word */ }; @@ -83,9 +83,8 @@ enum regs { PC = 15 }; -extern const struct unwind_idx __start_unwind_idx[]; -static const struct unwind_idx *__origin_unwind_idx; -extern const struct unwind_idx __stop_unwind_idx[]; +extern struct unwind_idx __start_unwind_idx[]; +extern struct unwind_idx __stop_unwind_idx[]; static DEFINE_SPINLOCK(unwind_lock); static LIST_HEAD(unwind_tables); @@ -99,99 +98,45 @@ static LIST_HEAD(unwind_tables); }) /* - * Binary search in the unwind index. The entries are + * Binary search in the unwind index. The entries entries are * guaranteed to be sorted in ascending order by the linker. - * - * start = first entry - * origin = first entry with positive offset (or stop if there is no such entry) - * stop - 1 = last entry */ -static const struct unwind_idx *search_index(unsigned long addr, - const struct unwind_idx *start, - const struct unwind_idx *origin, - const struct unwind_idx *stop) +static struct unwind_idx *search_index(unsigned long addr, + struct unwind_idx *first, + struct unwind_idx *last) { - unsigned long addr_prel31; - - pr_debug("%s(%08lx, %p, %p, %p)\n", - __func__, addr, start, origin, stop); - - /* - * only search in the section with the matching sign. This way the - * prel31 numbers can be compared as unsigned longs. - */ - if (addr < (unsigned long)start) - /* negative offsets: [start; origin) */ - stop = origin; - else - /* positive offsets: [origin; stop) */ - start = origin; - - /* prel31 for address relavive to start */ - addr_prel31 = (addr - (unsigned long)start) & 0x7fffffff; + pr_debug("%s(%08lx, %p, %p)\n", __func__, addr, first, last); - while (start < stop - 1) { - const struct unwind_idx *mid = start + ((stop - start) >> 1); - - /* - * As addr_prel31 is relative to start an offset is needed to - * make it relative to mid. - */ - if (addr_prel31 - ((unsigned long)mid - (unsigned long)start) < - mid->addr_offset) - stop = mid; - else { - /* keep addr_prel31 relative to start */ - addr_prel31 -= ((unsigned long)mid - - (unsigned long)start); - start = mid; - } - } - - if (likely(start->addr_offset <= addr_prel31)) - return start; - else { + if (addr < first->addr) { pr_warning("unwind: Unknown symbol address %08lx\n", addr); return NULL; - } -} + } else if (addr >= last->addr) + return last; -static const struct unwind_idx *unwind_find_origin( - const struct unwind_idx *start, const struct unwind_idx *stop) -{ - pr_debug("%s(%p, %p)\n", __func__, start, stop); - while (start < stop) { - const struct unwind_idx *mid = start + ((stop - start) >> 1); + while (first < last - 1) { + struct unwind_idx *mid = first + ((last - first + 1) >> 1); - if (mid->addr_offset >= 0x40000000) - /* negative offset */ - start = mid + 1; + if (addr < mid->addr) + last = mid; else - /* positive offset */ - stop = mid; + first = mid; } - pr_debug("%s -> %p\n", __func__, stop); - return stop; + + return first; } -static const struct unwind_idx *unwind_find_idx(unsigned long addr) +static struct unwind_idx *unwind_find_idx(unsigned long addr) { - const struct unwind_idx *idx = NULL; + struct unwind_idx *idx = NULL; unsigned long flags; pr_debug("%s(%08lx)\n", __func__, addr); - if (core_kernel_text(addr)) { - if (unlikely(!__origin_unwind_idx)) - __origin_unwind_idx = - unwind_find_origin(__start_unwind_idx, - __stop_unwind_idx); - + if (core_kernel_text(addr)) /* main unwind table */ idx = search_index(addr, __start_unwind_idx, - __origin_unwind_idx, - __stop_unwind_idx); - } else { + __stop_unwind_idx - 1); + else { /* module unwind tables */ struct unwind_table *table; @@ -200,8 +145,7 @@ static const struct unwind_idx *unwind_find_idx(unsigned long addr) if (addr >= table->begin_addr && addr < table->end_addr) { idx = search_index(addr, table->start, - table->origin, - table->stop); + table->stop - 1); /* Move-to-front to exploit common traces */ list_move(&table->list, &unwind_tables); break; @@ -330,7 +274,7 @@ static int unwind_exec_insn(struct unwind_ctrl_block *ctrl) int unwind_frame(struct stackframe *frame) { unsigned long high, low; - const struct unwind_idx *idx; + struct unwind_idx *idx; struct unwind_ctrl_block ctrl; /* only go to a higher address on the stack */ @@ -455,6 +399,7 @@ struct unwind_table *unwind_table_add(unsigned long start, unsigned long size, unsigned long text_size) { unsigned long flags; + struct unwind_idx *idx; struct unwind_table *tab = kmalloc(sizeof(*tab), GFP_KERNEL); pr_debug("%s(%08lx, %08lx, %08lx, %08lx)\n", __func__, start, size, @@ -463,12 +408,15 @@ struct unwind_table *unwind_table_add(unsigned long start, unsigned long size, if (!tab) return tab; - tab->start = (const struct unwind_idx *)start; - tab->stop = (const struct unwind_idx *)(start + size); - tab->origin = unwind_find_origin(tab->start, tab->stop); + tab->start = (struct unwind_idx *)start; + tab->stop = (struct unwind_idx *)(start + size); tab->begin_addr = text_addr; tab->end_addr = text_addr + text_size; + /* Convert the symbol addresses to absolute values */ + for (idx = tab->start; idx < tab->stop; idx++) + idx->addr = prel31_to_addr(&idx->addr); + spin_lock_irqsave(&unwind_lock, flags); list_add_tail(&tab->list, &unwind_tables); spin_unlock_irqrestore(&unwind_lock, flags); @@ -489,3 +437,16 @@ void unwind_table_del(struct unwind_table *tab) kfree(tab); } + +int __init unwind_init(void) +{ + struct unwind_idx *idx; + + /* Convert the symbol addresses to absolute values */ + for (idx = __start_unwind_idx; idx < __stop_unwind_idx; idx++) + idx->addr = prel31_to_addr(&idx->addr); + + pr_debug("unwind: ARM stack unwinding initialised\n"); + + return 0; +} diff --git a/arch/arm/kernel/vmlinux.lds.S b/arch/arm/kernel/vmlinux.lds.S index 20b3041..e5287f2 100644 --- a/arch/arm/kernel/vmlinux.lds.S +++ b/arch/arm/kernel/vmlinux.lds.S @@ -21,13 +21,10 @@ #define ARM_CPU_KEEP(x) #endif -#if (defined(CONFIG_SMP_ON_UP) && !defined(CONFIG_DEBUG_SPINLOCK)) || \ - defined(CONFIG_GENERIC_BUG) +#if defined(CONFIG_SMP_ON_UP) && !defined(CONFIG_DEBUG_SPINLOCK) #define ARM_EXIT_KEEP(x) x -#define ARM_EXIT_DISCARD(x) #else #define ARM_EXIT_KEEP(x) -#define ARM_EXIT_DISCARD(x) x #endif OUTPUT_ARCH(arm) @@ -41,12 +38,58 @@ jiffies = jiffies_64 + 4; SECTIONS { +#ifdef CONFIG_XIP_KERNEL + . = XIP_VIRT_ADDR(CONFIG_XIP_PHYS_ADDR); +#else + . = PAGE_OFFSET + TEXT_OFFSET; +#endif + + .init : { /* Init code and data */ + _stext = .; + _sinittext = .; + HEAD_TEXT + INIT_TEXT + ARM_EXIT_KEEP(EXIT_TEXT) + _einittext = .; + ARM_CPU_DISCARD(PROC_INFO) + __arch_info_begin = .; + *(.arch.info.init) + __arch_info_end = .; + __tagtable_begin = .; + *(.taglist.init) + __tagtable_end = .; +#ifdef CONFIG_SMP_ON_UP + __smpalt_begin = .; + *(.alt.smp.init) + __smpalt_end = .; +#endif + + __pv_table_begin = .; + *(.pv_table) + __pv_table_end = .; + + INIT_SETUP(16) + + INIT_CALLS + CON_INITCALL + SECURITY_INITCALL + INIT_RAM_FS + +#ifndef CONFIG_XIP_KERNEL + __init_begin = _stext; + INIT_DATA + ARM_EXIT_KEEP(EXIT_DATA) +#endif + } + + PERCPU_SECTION(32) + +#ifndef CONFIG_XIP_KERNEL + . = ALIGN(PAGE_SIZE); + __init_end = .; +#endif + /* - * XXX: The linker does not define how output sections are - * assigned to input sections when there are multiple statements - * matching the same input section name. There is no documented - * order of matching. - * * unwind exit sections must be discarded before the rest of the * unwind sections get included. */ @@ -55,9 +98,6 @@ SECTIONS *(.ARM.extab.exit.text) ARM_CPU_DISCARD(*(.ARM.exidx.cpuexit.text)) ARM_CPU_DISCARD(*(.ARM.extab.cpuexit.text)) - ARM_EXIT_DISCARD(EXIT_TEXT) - ARM_EXIT_DISCARD(EXIT_DATA) - EXIT_CALL #ifndef CONFIG_HOTPLUG *(.ARM.exidx.devexit.text) *(.ARM.extab.devexit.text) @@ -66,24 +106,10 @@ SECTIONS *(.fixup) *(__ex_table) #endif -#ifndef CONFIG_SMP_ON_UP - *(.alt.smp.init) -#endif - *(.discard) - *(.discard.*) } -#ifdef CONFIG_XIP_KERNEL - . = XIP_VIRT_ADDR(CONFIG_XIP_PHYS_ADDR); -#else - . = PAGE_OFFSET + TEXT_OFFSET; -#endif - .head.text : { - _text = .; - HEAD_TEXT - } .text : { /* Real text segment */ - _stext = .; /* Text and read-only data */ + _text = .; /* Text and read-only data */ __exception_text_start = .; *(.exception.text) __exception_text_end = .; @@ -96,6 +122,8 @@ SECTIONS *(.fixup) #endif *(.gnu.warning) + *(.rodata) + *(.rodata.*) *(.glue_7) *(.glue_7t) . = ALIGN(4); @@ -124,63 +152,10 @@ SECTIONS _etext = .; /* End of text and rodata section */ -#ifndef CONFIG_XIP_KERNEL - . = ALIGN(PAGE_SIZE); - __init_begin = .; -#endif - - INIT_TEXT_SECTION(8) - .exit.text : { - ARM_EXIT_KEEP(EXIT_TEXT) - } - .init.proc.info : { - ARM_CPU_DISCARD(PROC_INFO) - } - .init.arch.info : { - __arch_info_begin = .; - *(.arch.info.init) - __arch_info_end = .; - } - .init.tagtable : { - __tagtable_begin = .; - *(.taglist.init) - __tagtable_end = .; - } -#ifdef CONFIG_SMP_ON_UP - .init.smpalt : { - __smpalt_begin = .; - *(.alt.smp.init) - __smpalt_end = .; - } -#endif - .init.pv_table : { - __pv_table_begin = .; - *(.pv_table) - __pv_table_end = .; - } - .init.data : { -#ifndef CONFIG_XIP_KERNEL - INIT_DATA -#endif - INIT_SETUP(16) - INIT_CALLS - CON_INITCALL - SECURITY_INITCALL - INIT_RAM_FS - } -#ifndef CONFIG_XIP_KERNEL - .exit.data : { - ARM_EXIT_KEEP(EXIT_DATA) - } -#endif - - PERCPU_SECTION(32) - #ifdef CONFIG_XIP_KERNEL __data_loc = ALIGN(4); /* location in binary */ . = PAGE_OFFSET + TEXT_OFFSET; #else - __init_end = .; . = ALIGN(THREAD_SIZE); __data_loc = .; #endif @@ -292,6 +267,15 @@ SECTIONS STABS_DEBUG .comment 0 : { *(.comment) } + + /* Default discards */ + DISCARDS + +#ifndef CONFIG_SMP_ON_UP + /DISCARD/ : { + *(.alt.smp.init) + } +#endif } /* diff --git a/arch/arm/kernel/xscale-cp0.c b/arch/arm/kernel/xscale-cp0.c index e42adc6..1796157 100644 --- a/arch/arm/kernel/xscale-cp0.c +++ b/arch/arm/kernel/xscale-cp0.c @@ -8,6 +8,7 @@ * published by the Free Software Foundation. */ +#include <linux/module.h> #include <linux/types.h> #include <linux/kernel.h> #include <linux/signal.h> |