From f5b42c93d891cc9e6528b4e1ab11160ce670e67d Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Sun, 6 Feb 2011 23:04:40 +0000 Subject: tile: Convert irq_chip to new functions Signed-off-by: Thomas Gleixner Signed-off-by: Chris Metcalf --- arch/tile/kernel/irq.c | 28 ++++++++++++++-------------- 1 file changed, 14 insertions(+), 14 deletions(-) (limited to 'arch/tile/kernel') diff --git a/arch/tile/kernel/irq.c b/arch/tile/kernel/irq.c index 128805e..a3a12c4 100644 --- a/arch/tile/kernel/irq.c +++ b/arch/tile/kernel/irq.c @@ -176,43 +176,43 @@ void disable_percpu_irq(unsigned int irq) EXPORT_SYMBOL(disable_percpu_irq); /* Mask an interrupt. */ -static void tile_irq_chip_mask(unsigned int irq) +static void tile_irq_chip_mask(struct irq_data *d) { - mask_irqs(1UL << irq); + mask_irqs(1UL << d->irq); } /* Unmask an interrupt. */ -static void tile_irq_chip_unmask(unsigned int irq) +static void tile_irq_chip_unmask(struct irq_data *d) { - unmask_irqs(1UL << irq); + unmask_irqs(1UL << d->irq); } /* * Clear an interrupt before processing it so that any new assertions * will trigger another irq. */ -static void tile_irq_chip_ack(unsigned int irq) +static void tile_irq_chip_ack(struct irq_data *d) { - if ((unsigned long)get_irq_chip_data(irq) != IS_HW_CLEARED) - clear_irqs(1UL << irq); + if ((unsigned long)irq_data_get_irq_chip_data(d) != IS_HW_CLEARED) + clear_irqs(1UL << d->irq); } /* * For per-cpu interrupts, we need to avoid unmasking any interrupts * that we disabled via disable_percpu_irq(). */ -static void tile_irq_chip_eoi(unsigned int irq) +static void tile_irq_chip_eoi(struct irq_data *d) { - if (!(__get_cpu_var(irq_disable_mask) & (1UL << irq))) - unmask_irqs(1UL << irq); + if (!(__get_cpu_var(irq_disable_mask) & (1UL << d->irq))) + unmask_irqs(1UL << d->irq); } static struct irq_chip tile_irq_chip = { .name = "tile_irq_chip", - .ack = tile_irq_chip_ack, - .eoi = tile_irq_chip_eoi, - .mask = tile_irq_chip_mask, - .unmask = tile_irq_chip_unmask, + .irq_ack = tile_irq_chip_ack, + .irq_eoi = tile_irq_chip_eoi, + .irq_mask = tile_irq_chip_mask, + .irq_unmask = tile_irq_chip_unmask, }; void __init init_IRQ(void) -- cgit v1.1 From 14536076dfa382a4b302709ffd3041ae10107144 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Sun, 6 Feb 2011 23:04:42 +0000 Subject: tile: Use proper accessor functions in show_interrupt() Signed-off-by: Thomas Gleixner Signed-off-by: Chris Metcalf --- arch/tile/kernel/irq.c | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) (limited to 'arch/tile/kernel') diff --git a/arch/tile/kernel/irq.c b/arch/tile/kernel/irq.c index a3a12c4..0baa758 100644 --- a/arch/tile/kernel/irq.c +++ b/arch/tile/kernel/irq.c @@ -277,8 +277,10 @@ int show_interrupts(struct seq_file *p, void *v) } if (i < NR_IRQS) { - raw_spin_lock_irqsave(&irq_desc[i].lock, flags); - action = irq_desc[i].action; + struct irq_desc *desc = irq_to_desc(i); + + raw_spin_lock_irqsave(&desc->lock, flags); + action = desc->action; if (!action) goto skip; seq_printf(p, "%3d: ", i); @@ -288,7 +290,7 @@ int show_interrupts(struct seq_file *p, void *v) for_each_online_cpu(j) seq_printf(p, "%10u ", kstat_irqs_cpu(i, j)); #endif - seq_printf(p, " %14s", irq_desc[i].chip->name); + seq_printf(p, " %14s", get_irq_desc_chip(desc)->name); seq_printf(p, " %s", action->name); for (action = action->next; action; action = action->next) @@ -296,7 +298,7 @@ int show_interrupts(struct seq_file *p, void *v) seq_putc(p, '\n'); skip: - raw_spin_unlock_irqrestore(&irq_desc[i].lock, flags); + raw_spin_unlock_irqrestore(&desc->lock, flags); } return 0; } -- cgit v1.1 From 2cb82400719e085a3c226cf7cce8950208f09a06 Mon Sep 17 00:00:00 2001 From: Chris Metcalf Date: Sun, 27 Feb 2011 18:52:24 -0500 Subject: arch/tile: catch up with section naming convention in 2.6.35 The convention changed to, e.g., ".data..page_aligned". This commit fixes the places in the tile architecture that were still using the old convention. One tile-specific section (.init.page) was dropped in favor of just using an "aligned" attribute. Sam Ravnborg pointed out __PAGE_ALIGNED_BSS, etc. Signed-off-by: Chris Metcalf --- arch/tile/kernel/head_32.S | 4 ++-- arch/tile/kernel/vmlinux.lds.S | 5 +---- 2 files changed, 3 insertions(+), 6 deletions(-) (limited to 'arch/tile/kernel') diff --git a/arch/tile/kernel/head_32.S b/arch/tile/kernel/head_32.S index 90e7c44..05b5f4d 100644 --- a/arch/tile/kernel/head_32.S +++ b/arch/tile/kernel/head_32.S @@ -133,7 +133,7 @@ ENTRY(_start) } ENDPROC(_start) -.section ".bss.page_aligned","w" +__PAGE_ALIGNED_BSS .align PAGE_SIZE ENTRY(empty_zero_page) .fill PAGE_SIZE,1,0 @@ -148,7 +148,7 @@ ENTRY(empty_zero_page) .word (\bits1) | (HV_CPA_TO_PFN(\cpa) << HV_PTE_INDEX_PFN) .endm -.section ".data.page_aligned","wa" +__PAGE_ALIGNED_DATA .align PAGE_SIZE ENTRY(swapper_pg_dir) /* diff --git a/arch/tile/kernel/vmlinux.lds.S b/arch/tile/kernel/vmlinux.lds.S index 25fdc0c..4e211c1 100644 --- a/arch/tile/kernel/vmlinux.lds.S +++ b/arch/tile/kernel/vmlinux.lds.S @@ -59,10 +59,7 @@ SECTIONS . = ALIGN(PAGE_SIZE); VMLINUX_SYMBOL(_sinitdata) = .; - .init.page : AT (ADDR(.init.page) - LOAD_OFFSET) { - *(.init.page) - } :data =0 - INIT_DATA_SECTION(16) + INIT_DATA_SECTION(16) :data =0 PERCPU(PAGE_SIZE) . = ALIGN(PAGE_SIZE); VMLINUX_SYMBOL(_einitdata) = .; -- cgit v1.1 From 04f7a3f12e10032ee3d44df1a509dbf5b2001fce Mon Sep 17 00:00:00 2001 From: Chris Metcalf Date: Mon, 28 Feb 2011 13:08:32 -0500 Subject: arch/tile: bug fix: exec'ed task thought it was still single-stepping To handle single-step, tile mmap's a page of memory in the process space for each thread and uses it to construct a version of the instruction that we want to single step. If the process exec's, though, we lose that mapping, and the kernel needs to be aware that it will need to recreate it if the exec'ed process than tries to single-step as well. Also correct some int32_t to s32 for better kernel style. Signed-off-by: Chris Metcalf --- arch/tile/kernel/process.c | 4 ++++ arch/tile/kernel/single_step.c | 21 +++++++++++++++++++-- 2 files changed, 23 insertions(+), 2 deletions(-) (limited to 'arch/tile/kernel') diff --git a/arch/tile/kernel/process.c b/arch/tile/kernel/process.c index e90eb53..5db8b5b 100644 --- a/arch/tile/kernel/process.c +++ b/arch/tile/kernel/process.c @@ -574,6 +574,8 @@ SYSCALL_DEFINE4(execve, const char __user *, path, goto out; error = do_execve(filename, argv, envp, regs); putname(filename); + if (error == 0) + single_step_execve(); out: return error; } @@ -593,6 +595,8 @@ long compat_sys_execve(const char __user *path, goto out; error = compat_do_execve(filename, argv, envp, regs); putname(filename); + if (error == 0) + single_step_execve(); out: return error; } diff --git a/arch/tile/kernel/single_step.c b/arch/tile/kernel/single_step.c index 1eb3b39..84a729e 100644 --- a/arch/tile/kernel/single_step.c +++ b/arch/tile/kernel/single_step.c @@ -56,7 +56,7 @@ enum mem_op { MEMOP_STORE_POSTINCR }; -static inline tile_bundle_bits set_BrOff_X1(tile_bundle_bits n, int32_t offset) +static inline tile_bundle_bits set_BrOff_X1(tile_bundle_bits n, s32 offset) { tile_bundle_bits result; @@ -254,6 +254,18 @@ P("\n"); return bundle; } +/* + * Called after execve() has started the new image. This allows us + * to reset the info state. Note that the the mmap'ed memory, if there + * was any, has already been unmapped by the exec. + */ +void single_step_execve(void) +{ + struct thread_info *ti = current_thread_info(); + kfree(ti->step_state); + ti->step_state = NULL; +} + /** * single_step_once() - entry point when single stepping has been triggered. * @regs: The machine register state @@ -373,7 +385,7 @@ void single_step_once(struct pt_regs *regs) /* branches */ case BRANCH_OPCODE_X1: { - int32_t offset = signExtend17(get_BrOff_X1(bundle)); + s32 offset = signExtend17(get_BrOff_X1(bundle)); /* * For branches, we use a rewriting trick to let the @@ -731,4 +743,9 @@ void single_step_once(struct pt_regs *regs) __insn_mtspr(SPR_SINGLE_STEP_EN_K_K, 1 << USER_PL); } +void single_step_execve(void) +{ + /* Nothing */ +} + #endif /* !__tilegx__ */ -- cgit v1.1 From 13371731487896a6ef158b1cd74297f40a3da4bb Mon Sep 17 00:00:00 2001 From: Chris Metcalf Date: Mon, 28 Feb 2011 13:21:52 -0500 Subject: arch/tile: fix __ndelay etc to work better The current implementations of __ndelay and __udelay call a hypervisor service to delay, but the hypervisor service isn't actually implemented very well, and the consensus is that Linux should handle figuring this out natively and not use a hypervisor service. By converting nanoseconds to cycles, and then spinning until the cycle counter reaches the desired cycle, we get several benefits: first, we are sensitive to the actual clock speed; second, we use less power by issuing a slow SPR read once every six cycles while we delay; and third, we properly handle the case of an interrupt by exiting at the target time rather than after some number of cycles. Signed-off-by: Chris Metcalf --- arch/tile/kernel/entry.S | 6 ------ arch/tile/kernel/time.c | 10 ++++++++++ 2 files changed, 10 insertions(+), 6 deletions(-) (limited to 'arch/tile/kernel') diff --git a/arch/tile/kernel/entry.S b/arch/tile/kernel/entry.S index fd8dc42..c3aa067 100644 --- a/arch/tile/kernel/entry.S +++ b/arch/tile/kernel/entry.S @@ -38,12 +38,6 @@ STD_ENTRY(kernel_execve) jrp lr STD_ENDPROC(kernel_execve) -/* Delay a fixed number of cycles. */ -STD_ENTRY(__delay) - { addi r0, r0, -1; bnzt r0, . } - jrp lr - STD_ENDPROC(__delay) - /* * We don't run this function directly, but instead copy it to a page * we map into every user process. See vdso_setup(). diff --git a/arch/tile/kernel/time.c b/arch/tile/kernel/time.c index f2e156e..49a605b 100644 --- a/arch/tile/kernel/time.c +++ b/arch/tile/kernel/time.c @@ -224,3 +224,13 @@ int setup_profiling_timer(unsigned int multiplier) { return -EINVAL; } + +/* + * Use the tile timer to convert nsecs to core clock cycles, relying + * on it having the same frequency as SPR_CYCLE. + */ +cycles_t ns2cycles(unsigned long nsecs) +{ + struct clock_event_device *dev = &__get_cpu_var(tile_timer); + return ((u64)nsecs * dev->mult) >> dev->shift; +} -- cgit v1.1 From b2ce2bdaf942172914a9a39b26065ff7aacdf962 Mon Sep 17 00:00:00 2001 From: Chris Metcalf Date: Mon, 28 Feb 2011 13:24:37 -0500 Subject: arch/tile: stop disabling INTCTRL_1 interrupts during hypervisor downcalls The problem was that this could lead to IPIs being disabled during the softirq processing after a hypervisor downcall (e.g. for I/O), since both IPI and device interrupts use the INCTRL_1 downcall mechanism. When this happened at the wrong time, it could lead to deadlock. Luckily, we were already maintaining the per-interrupt state we need, and using it in the proper way in the hypervisor, so all we had to do was to change Linux to stop blocking downcall interrupts for the entire length of the downcall. (Now they're blocked while we're executing the downcall routine itself, but not while we're executing any subsequent softirq routines.) The hypervisor is doing a very small amount of work it no longer needs to do (masking INTCTRL_1 on entry to the client interrupt routine), but doing so means that older versions of Tile Linux will continue to work with a current hypervisor, so that seems reasonable. Signed-off-by: Chris Metcalf --- arch/tile/kernel/intvec_32.S | 54 ++++---------------------------------------- 1 file changed, 5 insertions(+), 49 deletions(-) (limited to 'arch/tile/kernel') diff --git a/arch/tile/kernel/intvec_32.S b/arch/tile/kernel/intvec_32.S index 5eed4a0..abf92f5 100644 --- a/arch/tile/kernel/intvec_32.S +++ b/arch/tile/kernel/intvec_32.S @@ -32,10 +32,6 @@ # error "No support for kernel preemption currently" #endif -#if INT_INTCTRL_K < 32 || INT_INTCTRL_K >= 48 -# error INT_INTCTRL_K coded to set high interrupt mask -#endif - #define PTREGS_PTR(reg, ptreg) addli reg, sp, C_ABI_SAVE_AREA_SIZE + (ptreg) #define PTREGS_OFFSET_SYSCALL PTREGS_OFFSET_REG(TREG_SYSCALL_NR) @@ -1199,46 +1195,6 @@ STD_ENTRY(interrupt_return) STD_ENDPROC(interrupt_return) /* - * This interrupt variant clears the INT_INTCTRL_K interrupt mask bit - * before returning, so we can properly get more downcalls. - */ - .pushsection .text.handle_interrupt_downcall,"ax" -handle_interrupt_downcall: - finish_interrupt_save handle_interrupt_downcall - check_single_stepping normal, .Ldispatch_downcall -.Ldispatch_downcall: - - /* Clear INTCTRL_K from the set of interrupts we ever enable. */ - GET_INTERRUPTS_ENABLED_MASK_PTR(r30) - { - addi r30, r30, 4 - movei r31, INT_MASK(INT_INTCTRL_K) - } - { - lw r20, r30 - nor r21, r31, zero - } - and r20, r20, r21 - sw r30, r20 - - { - jalr r0 - PTREGS_PTR(r0, PTREGS_OFFSET_BASE) - } - FEEDBACK_REENTER(handle_interrupt_downcall) - - /* Allow INTCTRL_K to be enabled next time we enable interrupts. */ - lw r20, r30 - or r20, r20, r31 - sw r30, r20 - - { - movei r30, 0 /* not an NMI */ - j interrupt_return - } - STD_ENDPROC(handle_interrupt_downcall) - - /* * Some interrupts don't check for single stepping */ .pushsection .text.handle_interrupt_no_single_step,"ax" @@ -2014,17 +1970,17 @@ int_unalign: #endif int_hand INT_INTCTRL_0, INTCTRL_0, bad_intr int_hand INT_MESSAGE_RCV_DWNCL, MESSAGE_RCV_DWNCL, \ - hv_message_intr, handle_interrupt_downcall + hv_message_intr int_hand INT_DEV_INTR_DWNCL, DEV_INTR_DWNCL, \ - tile_dev_intr, handle_interrupt_downcall + tile_dev_intr int_hand INT_I_ASID, I_ASID, bad_intr int_hand INT_D_ASID, D_ASID, bad_intr int_hand INT_DMATLB_MISS_DWNCL, DMATLB_MISS_DWNCL, \ - do_page_fault, handle_interrupt_downcall + do_page_fault int_hand INT_SNITLB_MISS_DWNCL, SNITLB_MISS_DWNCL, \ - do_page_fault, handle_interrupt_downcall + do_page_fault int_hand INT_DMATLB_ACCESS_DWNCL, DMATLB_ACCESS_DWNCL, \ - do_page_fault, handle_interrupt_downcall + do_page_fault int_hand INT_SN_CPL, SN_CPL, bad_intr int_hand INT_DOUBLE_FAULT, DOUBLE_FAULT, do_trap #if CHIP_HAS_AUX_PERF_COUNTERS() -- cgit v1.1 From bbeee4b2815dd318e9ec9d092d7f79061cc8ba36 Mon Sep 17 00:00:00 2001 From: Chris Metcalf Date: Mon, 28 Feb 2011 13:32:14 -0500 Subject: arch/tile: warn and retry if an IPI is not accepted by the target cpu Previously we assumed this was impossible, but in fact it can happen. Handle it gracefully by retrying after issuing a warning. Signed-off-by: Chris Metcalf --- arch/tile/kernel/smp.c | 33 +++++++++++++++++++-------------- 1 file changed, 19 insertions(+), 14 deletions(-) (limited to 'arch/tile/kernel') diff --git a/arch/tile/kernel/smp.c b/arch/tile/kernel/smp.c index 9575b37..a429310 100644 --- a/arch/tile/kernel/smp.c +++ b/arch/tile/kernel/smp.c @@ -36,6 +36,22 @@ static unsigned long __iomem *ipi_mappings[NR_CPUS]; /* Set by smp_send_stop() to avoid recursive panics. */ static int stopping_cpus; +static void __send_IPI_many(HV_Recipient *recip, int nrecip, int tag) +{ + int sent = 0; + while (sent < nrecip) { + int rc = hv_send_message(recip, nrecip, + (HV_VirtAddr)&tag, sizeof(tag)); + if (rc < 0) { + if (!stopping_cpus) /* avoid recursive panic */ + panic("hv_send_message returned %d", rc); + break; + } + WARN_ONCE(rc == 0, "hv_send_message() returned zero\n"); + sent += rc; + } +} + void send_IPI_single(int cpu, int tag) { HV_Recipient recip = { @@ -43,14 +59,13 @@ void send_IPI_single(int cpu, int tag) .x = cpu % smp_width, .state = HV_TO_BE_SENT }; - int rc = hv_send_message(&recip, 1, (HV_VirtAddr)&tag, sizeof(tag)); - BUG_ON(rc <= 0); + __send_IPI_many(&recip, 1, tag); } void send_IPI_many(const struct cpumask *mask, int tag) { HV_Recipient recip[NR_CPUS]; - int cpu, sent; + int cpu; int nrecip = 0; int my_cpu = smp_processor_id(); for_each_cpu(cpu, mask) { @@ -61,17 +76,7 @@ void send_IPI_many(const struct cpumask *mask, int tag) r->x = cpu % smp_width; r->state = HV_TO_BE_SENT; } - sent = 0; - while (sent < nrecip) { - int rc = hv_send_message(recip, nrecip, - (HV_VirtAddr)&tag, sizeof(tag)); - if (rc <= 0) { - if (!stopping_cpus) /* avoid recursive panic */ - panic("hv_send_message returned %d", rc); - break; - } - sent += rc; - } + __send_IPI_many(recip, nrecip, tag); } void send_IPI_allbutself(int tag) -- cgit v1.1 From 0b989cac90144565b8780ddde36e6a927f8ca7ba Mon Sep 17 00:00:00 2001 From: Chris Metcalf Date: Mon, 28 Feb 2011 15:22:40 -0500 Subject: arch/tile: use a cleaner technique to enable interrupt for cpu_idle() Previously we used iret to atomically return to kernel PL with interrupts enabled. However, it turns out that we are architecturally guaranteed that we can just set and clear the "interrupt critical section" and only interrupt on the following instruction, so we now do that instead, since it's cleaner. Signed-off-by: Chris Metcalf --- arch/tile/kernel/entry.S | 16 +++++----------- 1 file changed, 5 insertions(+), 11 deletions(-) (limited to 'arch/tile/kernel') diff --git a/arch/tile/kernel/entry.S b/arch/tile/kernel/entry.S index c3aa067..431e9ae 100644 --- a/arch/tile/kernel/entry.S +++ b/arch/tile/kernel/entry.S @@ -91,23 +91,17 @@ STD_ENTRY(smp_nap) /* * Enable interrupts racelessly and then nap until interrupted. + * Architecturally, we are guaranteed that enabling interrupts via + * mtspr to INTERRUPT_CRITICAL_SECTION only interrupts at the next PC. * This function's _cpu_idle_nap address is special; see intvec.S. * When interrupted at _cpu_idle_nap, we bump the PC forward 8, and * as a result return to the function that called _cpu_idle(). */ STD_ENTRY(_cpu_idle) - { - lnk r0 - movei r1, KERNEL_PL - } - { - addli r0, r0, _cpu_idle_nap - . - mtspr INTERRUPT_CRITICAL_SECTION, r1 - } + movei r1, 1 + mtspr INTERRUPT_CRITICAL_SECTION, r1 IRQ_ENABLE(r2, r3) /* unmask, but still with ICS set */ - mtspr SPR_EX_CONTEXT_K_1, r1 /* Kernel PL, ICS clear */ - mtspr SPR_EX_CONTEXT_K_0, r0 - iret + mtspr INTERRUPT_CRITICAL_SECTION, zero .global _cpu_idle_nap _cpu_idle_nap: nap -- cgit v1.1 From 3cebbafd28e6f91677f3becffcdf9150b74a4e0c Mon Sep 17 00:00:00 2001 From: Chris Metcalf Date: Mon, 28 Feb 2011 15:30:16 -0500 Subject: arch/tile: fix two bugs in the backtracer code The first is that we were using an incorrect hand-rolled variant of __kernel_text_address() which didn't handle module PCs. We now just use the standard API. The second was that we weren't accounting for the three-level page table when we were trying to pre-verify the addresses on the 64-bit TILE-Gx processor; we now do that correctly. Signed-off-by: Chris Metcalf --- arch/tile/kernel/stack.c | 28 +++++++++++++++++++--------- 1 file changed, 19 insertions(+), 9 deletions(-) (limited to 'arch/tile/kernel') diff --git a/arch/tile/kernel/stack.c b/arch/tile/kernel/stack.c index 0d54106b..dd81713 100644 --- a/arch/tile/kernel/stack.c +++ b/arch/tile/kernel/stack.c @@ -44,13 +44,6 @@ static int in_kernel_stack(struct KBacktraceIterator *kbt, VirtualAddress sp) return sp >= kstack_base && sp < kstack_base + THREAD_SIZE; } -/* Is address in the specified kernel code? */ -static int in_kernel_text(VirtualAddress address) -{ - return (address >= MEM_SV_INTRPT && - address < MEM_SV_INTRPT + HPAGE_SIZE); -} - /* Is address valid for reading? */ static int valid_address(struct KBacktraceIterator *kbt, VirtualAddress address) { @@ -63,6 +56,23 @@ static int valid_address(struct KBacktraceIterator *kbt, VirtualAddress address) if (l1_pgtable == NULL) return 0; /* can't read user space in other tasks */ +#ifdef CONFIG_64BIT + /* Find the real l1_pgtable by looking in the l0_pgtable. */ + pte = l1_pgtable[HV_L0_INDEX(address)]; + if (!hv_pte_get_present(pte)) + return 0; + pfn = hv_pte_get_pfn(pte); + if (pte_huge(pte)) { + if (!pfn_valid(pfn)) { + pr_err("L0 huge page has bad pfn %#lx\n", pfn); + return 0; + } + return hv_pte_get_present(pte) && hv_pte_get_readable(pte); + } + page = pfn_to_page(pfn); + BUG_ON(PageHighMem(page)); /* No HIGHMEM on 64-bit. */ + l1_pgtable = (HV_PTE *)pfn_to_kaddr(pfn); +#endif pte = l1_pgtable[HV_L1_INDEX(address)]; if (!hv_pte_get_present(pte)) return 0; @@ -92,7 +102,7 @@ static bool read_memory_func(void *result, VirtualAddress address, { int retval; struct KBacktraceIterator *kbt = (struct KBacktraceIterator *)vkbt; - if (in_kernel_text(address)) { + if (__kernel_text_address(address)) { /* OK to read kernel code. */ } else if (address >= PAGE_OFFSET) { /* We only tolerate kernel-space reads of this task's stack */ @@ -132,7 +142,7 @@ static struct pt_regs *valid_fault_handler(struct KBacktraceIterator* kbt) } } if (EX1_PL(p->ex1) == KERNEL_PL && - in_kernel_text(p->pc) && + __kernel_text_address(p->pc) && in_kernel_stack(kbt, p->sp) && p->sp >= sp) { if (kbt->verbose) -- cgit v1.1 From 5fb682b0644cd20015d9b0e3ca6921ad5533f4ba Mon Sep 17 00:00:00 2001 From: Chris Metcalf Date: Mon, 28 Feb 2011 15:58:39 -0500 Subject: arch/tile: fix some comments and whitespace This is a grab bag of changes with no actual change to generated code. This includes whitespace and comment typos, plus a couple of stale comments being removed. Signed-off-by: Chris Metcalf --- arch/tile/kernel/intvec_32.S | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'arch/tile/kernel') diff --git a/arch/tile/kernel/intvec_32.S b/arch/tile/kernel/intvec_32.S index abf92f5..eabf1ef 100644 --- a/arch/tile/kernel/intvec_32.S +++ b/arch/tile/kernel/intvec_32.S @@ -1584,7 +1584,7 @@ ENTRY(sys_cmpxchg) * about aliasing among multiple mappings of the same physical page, * and we ignore the low 3 bits so we have one lock that covers * both a cmpxchg64() and a cmpxchg() on either its low or high word. - * NOTE: this code must match __atomic_hashed_lock() in lib/atomic.c. + * NOTE: this must match __atomic_hashed_lock() in lib/atomic_32.c. */ #if ATOMIC_LOCKS_FOUND_VIA_TABLE() @@ -1718,7 +1718,7 @@ ENTRY(sys_cmpxchg) /* * Perform the actual cmpxchg or atomic_update. - * Note that __futex_mark_unlocked() in uClibc relies on + * Note that the system header relies on * atomic_update() to always perform an "mf", so don't make * it optional or conditional without modifying that code. */ -- cgit v1.1 From 76c567fbba50c3da2f4d40e2e551bab26cfd4381 Mon Sep 17 00:00:00 2001 From: Chris Metcalf Date: Mon, 28 Feb 2011 16:37:34 -0500 Subject: arch/tile: support 4KB page size as well as 64KB The Tilera architecture traditionally supports 64KB page sizes to improve TLB utilization and improve performance when the hardware is being used primarily to run a single application. For more generic server scenarios, it can be beneficial to run with 4KB page sizes, so this commit allows that to be specified (by modifying the arch/tile/include/hv/pagesize.h header). As part of this change, we also re-worked the PTE management slightly so that PTE writes all go through a __set_pte() function where we can do some additional validation. The set_pte_order() function was eliminated since the "order" argument wasn't being used. One bug uncovered was in the PCI DMA code, which wasn't properly flushing the specified range. This was benign with 64KB pages, but with 4KB pages we were getting some larger flushes wrong. The per-cpu memory reservation code also needed updating to conform with the newer percpu stuff; before it always chose 64KB, and that was always correct, but with 4KB granularity we now have to pay closer attention and reserve the amount of memory that will be requested when the percpu code starts allocating. Signed-off-by: Chris Metcalf --- arch/tile/kernel/intvec_32.S | 16 +++++++++++++--- arch/tile/kernel/machine_kexec.c | 7 +++++-- arch/tile/kernel/pci-dma.c | 38 +++++++++++++++++++------------------- arch/tile/kernel/process.c | 2 +- arch/tile/kernel/setup.c | 20 ++++++++++++-------- 5 files changed, 50 insertions(+), 33 deletions(-) (limited to 'arch/tile/kernel') diff --git a/arch/tile/kernel/intvec_32.S b/arch/tile/kernel/intvec_32.S index eabf1ef..fffcfa6 100644 --- a/arch/tile/kernel/intvec_32.S +++ b/arch/tile/kernel/intvec_32.S @@ -1556,7 +1556,10 @@ STD_ENTRY(_sys_clone) .align 64 /* Align much later jump on the start of a cache line. */ #if !ATOMIC_LOCKS_FOUND_VIA_TABLE() - nop; nop + nop +#if PAGE_SIZE >= 0x10000 + nop +#endif #endif ENTRY(sys_cmpxchg) @@ -1587,6 +1590,10 @@ ENTRY(sys_cmpxchg) * NOTE: this must match __atomic_hashed_lock() in lib/atomic_32.c. */ +#if (PAGE_OFFSET & 0xffff) != 0 +# error Code here assumes PAGE_OFFSET can be loaded with just hi16() +#endif + #if ATOMIC_LOCKS_FOUND_VIA_TABLE() { /* Check for unaligned input. */ @@ -1679,11 +1686,14 @@ ENTRY(sys_cmpxchg) lw r26, r0 } { - /* atomic_locks is page aligned so this suffices to get its addr. */ - auli r21, zero, hi16(atomic_locks) + auli r21, zero, ha16(atomic_locks) bbns r23, .Lcmpxchg_badaddr } +#if PAGE_SIZE < 0x10000 + /* atomic_locks is page-aligned so for big pages we don't need this. */ + addli r21, r21, lo16(atomic_locks) +#endif { /* * Insert the hash bits into the page-aligned pointer. diff --git a/arch/tile/kernel/machine_kexec.c b/arch/tile/kernel/machine_kexec.c index 0d8b9e9..e00d717 100644 --- a/arch/tile/kernel/machine_kexec.c +++ b/arch/tile/kernel/machine_kexec.c @@ -240,8 +240,11 @@ static void setup_quasi_va_is_pa(void) pte = hv_pte(_PAGE_KERNEL | _PAGE_HUGE_PAGE); pte = hv_pte_set_mode(pte, HV_PTE_MODE_CACHE_NO_L3); - for (i = 0; i < pgd_index(PAGE_OFFSET); i++) - pgtable[i] = pfn_pte(i << (HPAGE_SHIFT - PAGE_SHIFT), pte); + for (i = 0; i < pgd_index(PAGE_OFFSET); i++) { + unsigned long pfn = i << (HPAGE_SHIFT - PAGE_SHIFT); + if (pfn_valid(pfn)) + __set_pte(&pgtable[i], pfn_pte(pfn, pte)); + } } diff --git a/arch/tile/kernel/pci-dma.c b/arch/tile/kernel/pci-dma.c index 5ad5e13..658752b2 100644 --- a/arch/tile/kernel/pci-dma.c +++ b/arch/tile/kernel/pci-dma.c @@ -86,6 +86,21 @@ EXPORT_SYMBOL(dma_free_coherent); * can count on nothing having been touched. */ +/* Flush a PA range from cache page by page. */ +static void __dma_map_pa_range(dma_addr_t dma_addr, size_t size) +{ + struct page *page = pfn_to_page(PFN_DOWN(dma_addr)); + size_t bytesleft = PAGE_SIZE - (dma_addr & (PAGE_SIZE - 1)); + + while ((ssize_t)size > 0) { + /* Flush the page. */ + homecache_flush_cache(page++, 0); + + /* Figure out if we need to continue on the next page. */ + size -= bytesleft; + bytesleft = PAGE_SIZE; + } +} /* * dma_map_single can be passed any memory address, and there appear @@ -97,26 +112,12 @@ EXPORT_SYMBOL(dma_free_coherent); dma_addr_t dma_map_single(struct device *dev, void *ptr, size_t size, enum dma_data_direction direction) { - struct page *page; - dma_addr_t dma_addr; - int thispage; + dma_addr_t dma_addr = __pa(ptr); BUG_ON(!valid_dma_direction(direction)); WARN_ON(size == 0); - dma_addr = __pa(ptr); - - /* We might have been handed a buffer that wraps a page boundary */ - while ((int)size > 0) { - /* The amount to flush that's on this page */ - thispage = PAGE_SIZE - ((unsigned long)ptr & (PAGE_SIZE - 1)); - thispage = min((int)thispage, (int)size); - /* Is this valid for any page we could be handed? */ - page = pfn_to_page(kaddr_to_pfn(ptr)); - homecache_flush_cache(page, 0); - ptr += thispage; - size -= thispage; - } + __dma_map_pa_range(dma_addr, size); return dma_addr; } @@ -140,10 +141,8 @@ int dma_map_sg(struct device *dev, struct scatterlist *sglist, int nents, WARN_ON(nents == 0 || sglist->length == 0); for_each_sg(sglist, sg, nents, i) { - struct page *page; sg->dma_address = sg_phys(sg); - page = pfn_to_page(sg->dma_address >> PAGE_SHIFT); - homecache_flush_cache(page, 0); + __dma_map_pa_range(sg->dma_address, sg->length); } return nents; @@ -163,6 +162,7 @@ dma_addr_t dma_map_page(struct device *dev, struct page *page, { BUG_ON(!valid_dma_direction(direction)); + BUG_ON(offset + size > PAGE_SIZE); homecache_flush_cache(page, 0); return page_to_pa(page) + offset; diff --git a/arch/tile/kernel/process.c b/arch/tile/kernel/process.c index 5db8b5b..b9cd962 100644 --- a/arch/tile/kernel/process.c +++ b/arch/tile/kernel/process.c @@ -165,7 +165,7 @@ void free_thread_info(struct thread_info *info) kfree(step_state); } - free_page((unsigned long)info); + free_pages((unsigned long)info, THREAD_SIZE_ORDER); } static void save_arch_state(struct thread_struct *t); diff --git a/arch/tile/kernel/setup.c b/arch/tile/kernel/setup.c index f185736..3696b18 100644 --- a/arch/tile/kernel/setup.c +++ b/arch/tile/kernel/setup.c @@ -59,6 +59,8 @@ unsigned long __initdata node_memmap_pfn[MAX_NUMNODES]; unsigned long __initdata node_percpu_pfn[MAX_NUMNODES]; unsigned long __initdata node_free_pfn[MAX_NUMNODES]; +static unsigned long __initdata node_percpu[MAX_NUMNODES]; + #ifdef CONFIG_HIGHMEM /* Page frame index of end of lowmem on each controller. */ unsigned long __cpuinitdata node_lowmem_end_pfn[MAX_NUMNODES]; @@ -554,7 +556,6 @@ static void __init setup_bootmem_allocator(void) reserve_bootmem(crashk_res.start, crashk_res.end - crashk_res.start + 1, 0); #endif - } void *__init alloc_remap(int nid, unsigned long size) @@ -568,11 +569,13 @@ void *__init alloc_remap(int nid, unsigned long size) static int __init percpu_size(void) { - int size = ALIGN(__per_cpu_end - __per_cpu_start, PAGE_SIZE); -#ifdef CONFIG_MODULES - if (size < PERCPU_ENOUGH_ROOM) - size = PERCPU_ENOUGH_ROOM; -#endif + int size = __per_cpu_end - __per_cpu_start; + size += PERCPU_MODULE_RESERVE; + size += PERCPU_DYNAMIC_EARLY_SIZE; + if (size < PCPU_MIN_UNIT_SIZE) + size = PCPU_MIN_UNIT_SIZE; + size = roundup(size, PAGE_SIZE); + /* In several places we assume the per-cpu data fits on a huge page. */ BUG_ON(kdata_huge && size > HPAGE_SIZE); return size; @@ -589,7 +592,6 @@ static inline unsigned long alloc_bootmem_pfn(int size, unsigned long goal) static void __init zone_sizes_init(void) { unsigned long zones_size[MAX_NR_ZONES] = { 0 }; - unsigned long node_percpu[MAX_NUMNODES] = { 0 }; int size = percpu_size(); int num_cpus = smp_height * smp_width; int i; @@ -674,7 +676,7 @@ static void __init zone_sizes_init(void) NODE_DATA(i)->bdata = NODE_DATA(0)->bdata; free_area_init_node(i, zones_size, start, NULL); - printk(KERN_DEBUG " DMA zone: %ld per-cpu pages\n", + printk(KERN_DEBUG " Normal zone: %ld per-cpu pages\n", PFN_UP(node_percpu[i])); /* Track the type of memory on each node */ @@ -1312,6 +1314,8 @@ static void *__init pcpu_fc_alloc(unsigned int cpu, size_t size, size_t align) BUG_ON(size % PAGE_SIZE != 0); pfn_offset[nid] += size / PAGE_SIZE; + BUG_ON(node_percpu[nid] < size); + node_percpu[nid] -= size; if (percpu_pfn[cpu] == 0) percpu_pfn[cpu] = pfn; return pfn_to_kaddr(pfn); -- cgit v1.1 From 0dccb0489f9a5a13a33e828ab965aa49685d12f8 Mon Sep 17 00:00:00 2001 From: Chris Metcalf Date: Thu, 17 Mar 2011 14:32:06 -0400 Subject: arch/tile: support newer binutils assembler shift semantics This change supports building the kernel with newer binutils where a shift of greater than the word size is no longer interpreted silently as modulo the word size, but instead generates a warning. Signed-off-by: Chris Metcalf --- arch/tile/kernel/head_32.S | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) (limited to 'arch/tile/kernel') diff --git a/arch/tile/kernel/head_32.S b/arch/tile/kernel/head_32.S index 05b5f4d..1a39b7c 100644 --- a/arch/tile/kernel/head_32.S +++ b/arch/tile/kernel/head_32.S @@ -145,7 +145,7 @@ ENTRY(empty_zero_page) .endif .word HV_PTE_PAGE | HV_PTE_DIRTY | HV_PTE_PRESENT | HV_PTE_ACCESSED | \ (HV_PTE_MODE_CACHE_NO_L3 << HV_PTE_INDEX_MODE) - .word (\bits1) | (HV_CPA_TO_PFN(\cpa) << HV_PTE_INDEX_PFN) + .word (\bits1) | (HV_CPA_TO_PFN(\cpa) << (HV_PTE_INDEX_PFN - 32)) .endm __PAGE_ALIGNED_DATA @@ -158,12 +158,14 @@ ENTRY(swapper_pg_dir) */ .set addr, 0 .rept (MEM_USER_INTRPT - PAGE_OFFSET) >> PGDIR_SHIFT - PTE addr + PAGE_OFFSET, addr, HV_PTE_READABLE | HV_PTE_WRITABLE + PTE addr + PAGE_OFFSET, addr, (1 << (HV_PTE_INDEX_READABLE - 32)) | \ + (1 << (HV_PTE_INDEX_WRITABLE - 32)) .set addr, addr + PGDIR_SIZE .endr /* The true text VAs are mapped as VA = PA + MEM_SV_INTRPT */ - PTE MEM_SV_INTRPT, 0, HV_PTE_READABLE | HV_PTE_EXECUTABLE + PTE MEM_SV_INTRPT, 0, (1 << (HV_PTE_INDEX_READABLE - 32)) | \ + (1 << (HV_PTE_INDEX_EXECUTABLE - 32)) .org swapper_pg_dir + HV_L1_SIZE END(swapper_pg_dir) @@ -176,6 +178,7 @@ ENTRY(swapper_pg_dir) __INITDATA .align CHIP_L2_LINE_SIZE() ENTRY(swapper_pgprot) - PTE 0, 0, HV_PTE_READABLE | HV_PTE_WRITABLE, 1 + PTE 0, 0, (1 << (HV_PTE_INDEX_READABLE - 32)) | \ + (1 << (HV_PTE_INDEX_WRITABLE - 32)), 1 .align CHIP_L2_LINE_SIZE() END(swapper_pgprot) -- cgit v1.1