From 578b7cd1518f8d1b17a7fb1671d3d756c9cb49f1 Mon Sep 17 00:00:00 2001 From: Benjamin Herrenschmidt Date: Wed, 7 Apr 2010 14:44:28 +1000 Subject: powerpc/vio: Add modalias support BenH: Added to vio_cmo_dev_attrs as well Provide a modalias entry for VIO devices in sysfs. I believe this was another initrd generation bugfix for anaconda. Signed-off-by: David Woodhouse Signed-off-by: Benjamin Herrenschmidt --- arch/powerpc/kernel/vio.c | 21 +++++++++++++++++++++ 1 file changed, 21 insertions(+) (limited to 'arch/powerpc/kernel') diff --git a/arch/powerpc/kernel/vio.c b/arch/powerpc/kernel/vio.c index 8223717..2f57956 100644 --- a/arch/powerpc/kernel/vio.c +++ b/arch/powerpc/kernel/vio.c @@ -958,9 +958,12 @@ viodev_cmo_rd_attr(allocated); static ssize_t name_show(struct device *, struct device_attribute *, char *); static ssize_t devspec_show(struct device *, struct device_attribute *, char *); +static ssize_t modalias_show(struct device *dev, struct device_attribute *attr, + char *buf); static struct device_attribute vio_cmo_dev_attrs[] = { __ATTR_RO(name), __ATTR_RO(devspec), + __ATTR_RO(modalias), __ATTR(cmo_desired, S_IWUSR|S_IRUSR|S_IWGRP|S_IRGRP|S_IROTH, viodev_cmo_desired_show, viodev_cmo_desired_set), __ATTR(cmo_entitled, S_IRUGO, viodev_cmo_entitled_show, NULL), @@ -1320,9 +1323,27 @@ static ssize_t devspec_show(struct device *dev, return sprintf(buf, "%s\n", of_node ? of_node->full_name : "none"); } +static ssize_t modalias_show(struct device *dev, struct device_attribute *attr, + char *buf) +{ + const struct vio_dev *vio_dev = to_vio_dev(dev); + struct device_node *dn; + const char *cp; + + dn = dev->archdata.of_node; + if (!dn) + return -ENODEV; + cp = of_get_property(dn, "compatible", NULL); + if (!cp) + return -ENODEV; + + return sprintf(buf, "vio:T%sS%s\n", vio_dev->type, cp); +} + static struct device_attribute vio_dev_attrs[] = { __ATTR_RO(name), __ATTR_RO(devspec), + __ATTR_RO(modalias), __ATTR_NULL }; -- cgit v1.1 From 9c7cc234dc5edf5379fbbab4973f6704f59bc57b Mon Sep 17 00:00:00 2001 From: "K.Prasad" Date: Mon, 29 Mar 2010 23:59:25 +0000 Subject: powerpc: Disable interrupts for data breakpoint exceptions Data address breakpoint exceptions are currently handled along with page-faults which require interrupts to remain in enabled state. Since exception handling for data breakpoints aren't pre-empt safe, we handle them separately. Signed-off-by: K.Prasad Signed-off-by: Benjamin Herrenschmidt --- arch/powerpc/kernel/exceptions-64s.S | 13 ++++++++++++- 1 file changed, 12 insertions(+), 1 deletion(-) (limited to 'arch/powerpc/kernel') diff --git a/arch/powerpc/kernel/exceptions-64s.S b/arch/powerpc/kernel/exceptions-64s.S index e3be98f..3e423fb 100644 --- a/arch/powerpc/kernel/exceptions-64s.S +++ b/arch/powerpc/kernel/exceptions-64s.S @@ -735,8 +735,11 @@ _STATIC(do_hash_page) std r3,_DAR(r1) std r4,_DSISR(r1) - andis. r0,r4,0xa450 /* weird error? */ + andis. r0,r4,0xa410 /* weird error? */ bne- handle_page_fault /* if not, try to insert a HPTE */ + andis. r0,r4,DSISR_DABRMATCH@h + bne- handle_dabr_fault + BEGIN_FTR_SECTION andis. r0,r4,0x0020 /* Is it a segment table fault? */ bne- do_ste_alloc /* If so handle it */ @@ -823,6 +826,14 @@ END_FW_FTR_SECTION_IFCLR(FW_FEATURE_ISERIES) bl .raw_local_irq_restore b 11f +/* We have a data breakpoint exception - handle it */ +handle_dabr_fault: + ld r4,_DAR(r1) + ld r5,_DSISR(r1) + addi r3,r1,STACK_FRAME_OVERHEAD + bl .do_dabr + b .ret_from_except_lite + /* Here we have a page fault that hash_page can't handle. */ handle_page_fault: ENABLE_INTS -- cgit v1.1 From 6fe9d1facb5346a615f9b571df3b91593afb29c3 Mon Sep 17 00:00:00 2001 From: Vaidyanathan Srinivasan Date: Wed, 31 Mar 2010 21:39:24 +0000 Subject: powerpc/pseries: Export data from new hcall H_EM_GET_PARMS Add support for H_EM_GET_PARMS hcall that will return data related to power modes from the platform. Export the data directly to user space for administrative tools to interpret and use. cat /proc/powerpc/lparcfg will export power mode data Signed-off-by: Vaidyanathan Srinivasan Signed-off-by: Benjamin Herrenschmidt --- arch/powerpc/kernel/lparcfg.c | 12 +++++++++++- 1 file changed, 11 insertions(+), 1 deletion(-) (limited to 'arch/powerpc/kernel') diff --git a/arch/powerpc/kernel/lparcfg.c b/arch/powerpc/kernel/lparcfg.c index c2c70e1..50362b6 100644 --- a/arch/powerpc/kernel/lparcfg.c +++ b/arch/powerpc/kernel/lparcfg.c @@ -38,7 +38,7 @@ #include #include -#define MODULE_VERS "1.8" +#define MODULE_VERS "1.9" #define MODULE_NAME "lparcfg" /* #define LPARCFG_DEBUG */ @@ -487,6 +487,14 @@ static void splpar_dispatch_data(struct seq_file *m) seq_printf(m, "dispatch_dispersions=%lu\n", dispatch_dispersions); } +static void parse_em_data(struct seq_file *m) +{ + unsigned long retbuf[PLPAR_HCALL_BUFSIZE]; + + if (plpar_hcall(H_GET_EM_PARMS, retbuf) == H_SUCCESS) + seq_printf(m, "power_mode_data=%016lx\n", retbuf[0]); +} + static int pseries_lparcfg_data(struct seq_file *m, void *v) { int partition_potential_processors; @@ -541,6 +549,8 @@ static int pseries_lparcfg_data(struct seq_file *m, void *v) seq_printf(m, "slb_size=%d\n", mmu_slb_size); + parse_em_data(m); + return 0; } -- cgit v1.1 From fe1691e3f49d41452832f5aee2b952bd201ccab1 Mon Sep 17 00:00:00 2001 From: Joakim Tjernlund Date: Tue, 2 Mar 2010 05:37:09 +0000 Subject: powerpc/8xx: Optimze TLB Miss handlers This removes a couple of insn's from the TLB Miss handlers whithout changing functionality. Signed-off-by: Joakim Tjernlund Signed-off-by: Benjamin Herrenschmidt --- arch/powerpc/kernel/head_8xx.S | 11 +++-------- 1 file changed, 3 insertions(+), 8 deletions(-) (limited to 'arch/powerpc/kernel') diff --git a/arch/powerpc/kernel/head_8xx.S b/arch/powerpc/kernel/head_8xx.S index 3ef743f..ecc4a02 100644 --- a/arch/powerpc/kernel/head_8xx.S +++ b/arch/powerpc/kernel/head_8xx.S @@ -343,17 +343,14 @@ InstructionTLBMiss: cmpwi cr0, r11, _PAGE_ACCESSED | _PAGE_PRESENT bne- cr0, 2f - /* Clear PP lsb, 0x400 */ - rlwinm r10, r10, 0, 22, 20 - /* The Linux PTE won't go exactly into the MMU TLB. - * Software indicator bits 22 and 28 must be clear. + * Software indicator bits 21 and 28 must be clear. * Software indicator bits 24, 25, 26, and 27 must be * set. All other Linux PTE bits control the behavior * of the MMU. */ li r11, 0x00f0 - rlwimi r10, r11, 0, 24, 28 /* Set 24-27, clear 28 */ + rlwimi r10, r11, 0, 0x07f8 /* Set 24-27, clear 21-23,28 */ DO_8xx_CPU6(0x2d80, r3) mtspr SPRN_MI_RPN, r10 /* Update TLB entry */ @@ -444,9 +441,7 @@ DataStoreTLBMiss: /* Honour kernel RO, User NA */ /* 0x200 == Extended encoding, bit 22 */ - /* r11 = (r10 & _PAGE_USER) >> 2 */ - rlwinm r11, r10, 32-2, 0x200 - or r10, r11, r10 + rlwimi r10, r10, 32-2, 0x200 /* Copy USER to bit 22, 0x200 */ /* r11 = (r10 & _PAGE_RW) >> 1 */ rlwinm r11, r10, 32-1, 0x200 or r10, r11, r10 -- cgit v1.1 From 4afb0be7ccda0ca551cc37572bab74ba4a3c18dd Mon Sep 17 00:00:00 2001 From: Joakim Tjernlund Date: Tue, 2 Mar 2010 05:37:10 +0000 Subject: powerpc/8xx: Avoid testing for kernel space in ITLB Miss. Only modules will cause ITLB Misses as we always pin the first 8MB of kernel memory. Signed-off-by: Joakim Tjernlund Signed-off-by: Benjamin Herrenschmidt --- arch/powerpc/kernel/head_8xx.S | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'arch/powerpc/kernel') diff --git a/arch/powerpc/kernel/head_8xx.S b/arch/powerpc/kernel/head_8xx.S index ecc4a02..84ca1d9 100644 --- a/arch/powerpc/kernel/head_8xx.S +++ b/arch/powerpc/kernel/head_8xx.S @@ -318,12 +318,16 @@ InstructionTLBMiss: /* If we are faulting a kernel address, we have to use the * kernel page tables. */ +#ifdef CONFIG_MODULES + /* Only modules will cause ITLB Misses as we always + * pin the first 8MB of kernel memory */ andi. r11, r10, 0x0800 /* Address >= 0x80000000 */ beq 3f lis r11, swapper_pg_dir@h ori r11, r11, swapper_pg_dir@l rlwimi r10, r11, 0, 2, 19 3: +#endif lwz r11, 0(r10) /* Get the level 1 entry */ rlwinm. r10, r11,0,0,19 /* Extract page descriptor page address */ beq 2f /* If zero, don't try to find a pte */ -- cgit v1.1 From d069cb4373fe0d451357c4d3769623a7564dfa9f Mon Sep 17 00:00:00 2001 From: Joakim Tjernlund Date: Tue, 2 Mar 2010 05:37:11 +0000 Subject: powerpc/8xx: Don't touch ACCESSED when no SWAP. Only the swap function cares about the ACCESSED bit in the pte. Do not waste cycles updateting ACCESSED when swap is not compiled into the kernel. Signed-off-by: Joakim Tjernlund Signed-off-by: Benjamin Herrenschmidt --- arch/powerpc/kernel/head_8xx.S | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) (limited to 'arch/powerpc/kernel') diff --git a/arch/powerpc/kernel/head_8xx.S b/arch/powerpc/kernel/head_8xx.S index 84ca1d9..6478a96 100644 --- a/arch/powerpc/kernel/head_8xx.S +++ b/arch/powerpc/kernel/head_8xx.S @@ -343,10 +343,11 @@ InstructionTLBMiss: mfspr r11, SPRN_MD_TWC /* ....and get the pte address */ lwz r10, 0(r11) /* Get the pte */ +#ifdef CONFIG_SWAP andi. r11, r10, _PAGE_ACCESSED | _PAGE_PRESENT cmpwi cr0, r11, _PAGE_ACCESSED | _PAGE_PRESENT bne- cr0, 2f - +#endif /* The Linux PTE won't go exactly into the MMU TLB. * Software indicator bits 21 and 28 must be clear. * Software indicator bits 24, 25, 26, and 27 must be @@ -439,10 +440,11 @@ DataStoreTLBMiss: * r11 = ((r10 & PRESENT) & ((r10 & ACCESSED) >> 5)); * r10 = (r10 & ~PRESENT) | r11; */ +#ifdef CONFIG_SWAP rlwinm r11, r10, 32-5, _PAGE_PRESENT and r11, r11, r10 rlwimi r10, r11, 0, _PAGE_PRESENT - +#endif /* Honour kernel RO, User NA */ /* 0x200 == Extended encoding, bit 22 */ rlwimi r10, r10, 32-2, 0x200 /* Copy USER to bit 22, 0x200 */ -- cgit v1.1 From 469d62be9263b92f2c3329540cbb1c076111f4f3 Mon Sep 17 00:00:00 2001 From: Joakim Tjernlund Date: Tue, 2 Mar 2010 05:37:12 +0000 Subject: powerpc/8xx: Use SPRG2 and DAR registers to stash r11 and cr. This avoids storing these registers in memory. CPU6 errata will still use the old way. Remove some G2 leftover accesses from 2.4 Signed-off-by: Joakim Tjernlund Signed-off-by: Benjamin Herrenschmidt --- arch/powerpc/kernel/head_8xx.S | 49 +++++++++++++++++++++++++++++++----------- 1 file changed, 36 insertions(+), 13 deletions(-) (limited to 'arch/powerpc/kernel') diff --git a/arch/powerpc/kernel/head_8xx.S b/arch/powerpc/kernel/head_8xx.S index 6478a96..1f1a04b 100644 --- a/arch/powerpc/kernel/head_8xx.S +++ b/arch/powerpc/kernel/head_8xx.S @@ -71,9 +71,6 @@ _ENTRY(_start); * in the first level table, but that would require many changes to the * Linux page directory/table functions that I don't want to do right now. * - * I used to use SPRG2 for a temporary register in the TLB handler, but it - * has since been put to other uses. I now use a hack to save a register - * and the CCR at memory location 0.....Someday I'll fix this..... * -- Dan */ .globl __start @@ -302,8 +299,13 @@ InstructionTLBMiss: DO_8xx_CPU6(0x3f80, r3) mtspr SPRN_M_TW, r10 /* Save a couple of working registers */ mfcr r10 +#ifdef CONFIG_8xx_CPU6 stw r10, 0(r0) stw r11, 4(r0) +#else + mtspr SPRN_DAR, r10 + mtspr SPRN_SPRG2, r11 +#endif mfspr r10, SPRN_SRR0 /* Get effective address of fault */ #ifdef CONFIG_8xx_CPU15 addi r11, r10, 0x1000 @@ -359,13 +361,19 @@ InstructionTLBMiss: DO_8xx_CPU6(0x2d80, r3) mtspr SPRN_MI_RPN, r10 /* Update TLB entry */ - mfspr r10, SPRN_M_TW /* Restore registers */ + /* Restore registers */ +#ifndef CONFIG_8xx_CPU6 + mfspr r10, SPRN_DAR + mtcr r10 + mtspr SPRN_DAR, r11 /* Tag DAR */ + mfspr r11, SPRN_SPRG2 +#else lwz r11, 0(r0) mtcr r11 lwz r11, 4(r0) -#ifdef CONFIG_8xx_CPU6 lwz r3, 8(r0) #endif + mfspr r10, SPRN_M_TW rfi 2: mfspr r11, SPRN_SRR1 @@ -375,13 +383,20 @@ InstructionTLBMiss: rlwinm r11, r11, 0, 0xffff mtspr SPRN_SRR1, r11 - mfspr r10, SPRN_M_TW /* Restore registers */ + /* Restore registers */ +#ifndef CONFIG_8xx_CPU6 + mfspr r10, SPRN_DAR + mtcr r10 + li r11, 0x00f0 + mtspr SPRN_DAR, r11 /* Tag DAR */ + mfspr r11, SPRN_SPRG2 +#else lwz r11, 0(r0) mtcr r11 lwz r11, 4(r0) -#ifdef CONFIG_8xx_CPU6 lwz r3, 8(r0) #endif + mfspr r10, SPRN_M_TW b InstructionAccess . = 0x1200 @@ -392,8 +407,13 @@ DataStoreTLBMiss: DO_8xx_CPU6(0x3f80, r3) mtspr SPRN_M_TW, r10 /* Save a couple of working registers */ mfcr r10 +#ifdef CONFIG_8xx_CPU6 stw r10, 0(r0) stw r11, 4(r0) +#else + mtspr SPRN_DAR, r10 + mtspr SPRN_SPRG2, r11 +#endif mfspr r10, SPRN_M_TWB /* Get level 1 table entry address */ /* If we are faulting a kernel address, we have to use the @@ -461,18 +481,24 @@ DataStoreTLBMiss: * of the MMU. */ 2: li r11, 0x00f0 - mtspr SPRN_DAR,r11 /* Tag DAR */ rlwimi r10, r11, 0, 24, 28 /* Set 24-27, clear 28 */ DO_8xx_CPU6(0x3d80, r3) mtspr SPRN_MD_RPN, r10 /* Update TLB entry */ - mfspr r10, SPRN_M_TW /* Restore registers */ + /* Restore registers */ +#ifndef CONFIG_8xx_CPU6 + mfspr r10, SPRN_DAR + mtcr r10 + mtspr SPRN_DAR, r11 /* Tag DAR */ + mfspr r11, SPRN_SPRG2 +#else + mtspr SPRN_DAR, r11 /* Tag DAR */ lwz r11, 0(r0) mtcr r11 lwz r11, 4(r0) -#ifdef CONFIG_8xx_CPU6 lwz r3, 8(r0) #endif + mfspr r10, SPRN_M_TW rfi /* This is an instruction TLB error on the MPC8xx. This could be due @@ -684,9 +710,6 @@ start_here: tophys(r4,r2) addi r4,r4,THREAD /* init task's THREAD */ mtspr SPRN_SPRG_THREAD,r4 - li r3,0 - /* XXX What is that for ? SPRG2 appears otherwise unused on 8xx */ - mtspr SPRN_SPRG2,r3 /* 0 => r1 has kernel sp */ /* stack */ lis r1,init_thread_union@ha -- cgit v1.1 From f6d8c8bb1d360272d795927d39f3d2c5934e77d9 Mon Sep 17 00:00:00 2001 From: Julia Lawall Date: Mon, 29 Mar 2010 05:33:34 +0000 Subject: powerpc/vio: Add missing unlock in error path Add an unlock before exiting the function. A simplified version of the semantic patch that finds this problem is as follows: (http://coccinelle.lip6.fr/) // @r exists@ expression E1; identifier f; @@ f (...) { <+... * spin_lock_irq (E1,...); ... when != E1 * return ...; ...+> } // Signed-off-by: Julia Lawall Acked-by: Stephen Rothwell Signed-off-by: Benjamin Herrenschmidt --- arch/powerpc/kernel/vio.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) (limited to 'arch/powerpc/kernel') diff --git a/arch/powerpc/kernel/vio.c b/arch/powerpc/kernel/vio.c index 2f57956..2a3428b 100644 --- a/arch/powerpc/kernel/vio.c +++ b/arch/powerpc/kernel/vio.c @@ -645,8 +645,10 @@ void vio_cmo_set_dev_desired(struct vio_dev *viodev, size_t desired) found = 1; break; } - if (!found) + if (!found) { + spin_unlock_irqrestore(&vio_cmo.lock, flags); return; + } /* Increase/decrease in desired device entitlement */ if (desired >= viodev->cmo.desired) { -- cgit v1.1 From 21dbeb91a24d867af0e98ba155bfa80d2906344f Mon Sep 17 00:00:00 2001 From: Julia Lawall Date: Fri, 26 Mar 2010 12:03:29 +0000 Subject: powerpc: Use set_cpus_allowed_ptr Use set_cpus_allowed_ptr rather than set_cpus_allowed. The semantic patch that makes this change is as follows: (http://coccinelle.lip6.fr/) // @@ expression E1,E2; @@ - set_cpus_allowed(E1, cpumask_of_cpu(E2)) + set_cpus_allowed_ptr(E1, cpumask_of(E2)) @@ expression E; identifier I; @@ - set_cpus_allowed(E, I) + set_cpus_allowed_ptr(E, &I) // Signed-off-by: Julia Lawall Signed-off-by: Benjamin Herrenschmidt --- arch/powerpc/kernel/smp.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'arch/powerpc/kernel') diff --git a/arch/powerpc/kernel/smp.c b/arch/powerpc/kernel/smp.c index c2ee144..e36f94f 100644 --- a/arch/powerpc/kernel/smp.c +++ b/arch/powerpc/kernel/smp.c @@ -561,12 +561,12 @@ void __init smp_cpus_done(unsigned int max_cpus) * se we pin us down to CPU 0 for a short while */ old_mask = current->cpus_allowed; - set_cpus_allowed(current, cpumask_of_cpu(boot_cpuid)); + set_cpus_allowed_ptr(current, cpumask_of(boot_cpuid)); if (smp_ops && smp_ops->setup_cpu) smp_ops->setup_cpu(boot_cpuid); - set_cpus_allowed(current, old_mask); + set_cpus_allowed_ptr(current, &old_mask); snapshot_timebases(); -- cgit v1.1 From e9bbc8cde0e3c33b42ddbe1b02108cb5c97275eb Mon Sep 17 00:00:00 2001 From: Anton Blanchard Date: Thu, 18 Feb 2010 12:11:51 +0000 Subject: powerpc/pseries: Call ibm,os-term if the ibm,extended-os-term is present We have had issues in the past with ibm,os-term initiating shutdown of a partition. This is confusing to the user, especially if panic_timeout is non zero. The temporary fix was to avoid calling ibm,os-term if a panic_timeout was set and since we set it on every boot we basically never call ibm,os-term. An extended version of ibm,os-term has since been implemented which gives us the behaviour we want: "When the platform supports extended ibm,os-term behavior, the return to the RTAS will always occur unless there is a kernel assisted dump active as initiated by an ibm,configure-kernel-dump call." This patch checks for the ibm,extended-os-term property and calls ibm,os-term if it exists. Signed-off-by: Anton Blanchard Signed-off-by: Benjamin Herrenschmidt --- arch/powerpc/kernel/rtas.c | 15 +++++++++------ 1 file changed, 9 insertions(+), 6 deletions(-) (limited to 'arch/powerpc/kernel') diff --git a/arch/powerpc/kernel/rtas.c b/arch/powerpc/kernel/rtas.c index 7436784..0e1ec6f 100644 --- a/arch/powerpc/kernel/rtas.c +++ b/arch/powerpc/kernel/rtas.c @@ -691,10 +691,14 @@ void rtas_os_term(char *str) { int status; - if (panic_timeout) - return; - - if (RTAS_UNKNOWN_SERVICE == rtas_token("ibm,os-term")) + /* + * Firmware with the ibm,extended-os-term property is guaranteed + * to always return from an ibm,os-term call. Earlier versions without + * this property may terminate the partition which we want to avoid + * since it interferes with panic_timeout. + */ + if (RTAS_UNKNOWN_SERVICE == rtas_token("ibm,os-term") || + RTAS_UNKNOWN_SERVICE == rtas_token("ibm,extended-os-term")) return; snprintf(rtas_os_term_buf, 2048, "OS panic: %s", str); @@ -705,8 +709,7 @@ void rtas_os_term(char *str) } while (rtas_busy_delay(status)); if (status != 0) - printk(KERN_EMERG "ibm,os-term call failed %d\n", - status); + printk(KERN_EMERG "ibm,os-term call failed %d\n", status); } static int ibm_suspend_me_token = RTAS_UNKNOWN_SERVICE; -- cgit v1.1 From a32aaf14513da776556ad9995de8d83cd76ae60a Mon Sep 17 00:00:00 2001 From: Benjamin Herrenschmidt Date: Wed, 7 Apr 2010 18:09:15 +1000 Subject: powerpc/vio: Add power management support Adds support for suspend/resume for VIO devices. This is needed for support for HMC initiated hibernation. Signed-off-by: Brian King Signed-off-by: Benjamin Herrenschmidt --- arch/powerpc/kernel/vio.c | 24 ++++++++++++++++++++++++ 1 file changed, 24 insertions(+) (limited to 'arch/powerpc/kernel') diff --git a/arch/powerpc/kernel/vio.c b/arch/powerpc/kernel/vio.c index 2a3428b..b8e311d 100644 --- a/arch/powerpc/kernel/vio.c +++ b/arch/powerpc/kernel/vio.c @@ -1381,6 +1381,29 @@ static int vio_hotplug(struct device *dev, struct kobj_uevent_env *env) return 0; } +static int vio_pm_suspend(struct device *dev) +{ + const struct dev_pm_ops *pm = dev->driver ? dev->driver->pm : NULL; + + if (pm && pm->suspend) + return pm->suspend(dev); + return 0; +} + +static int vio_pm_resume(struct device *dev) +{ + const struct dev_pm_ops *pm = dev->driver ? dev->driver->pm : NULL; + + if (pm && pm->resume) + return pm->resume(dev); + return 0; +} + +const struct dev_pm_ops vio_dev_pm_ops = { + .suspend = vio_pm_suspend, + .resume = vio_pm_resume, +}; + static struct bus_type vio_bus_type = { .name = "vio", .dev_attrs = vio_dev_attrs, @@ -1388,6 +1411,7 @@ static struct bus_type vio_bus_type = { .match = vio_bus_match, .probe = vio_bus_probe, .remove = vio_bus_remove, + .pm = &vio_dev_pm_ops, }; /** -- cgit v1.1 From 359e4284a3f37aba7fd06d993863de2509d86f54 Mon Sep 17 00:00:00 2001 From: Mahesh Salgaonkar Date: Wed, 7 Apr 2010 18:10:20 +1000 Subject: powerpc: Add kprobe-based event tracer This patch ports the kprobe-based event tracer to powerpc. This patch is based on x86 port. This brings powerpc on par with x86. Signed-off-by: Mahesh Salgaonkar Acked-by: Masami Hiramatsu Signed-off-by: Benjamin Herrenschmidt --- arch/powerpc/kernel/ptrace.c | 103 +++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 103 insertions(+) (limited to 'arch/powerpc/kernel') diff --git a/arch/powerpc/kernel/ptrace.c b/arch/powerpc/kernel/ptrace.c index ed2cfe1..7a0c0199 100644 --- a/arch/powerpc/kernel/ptrace.c +++ b/arch/powerpc/kernel/ptrace.c @@ -39,6 +39,109 @@ #include /* + * The parameter save area on the stack is used to store arguments being passed + * to callee function and is located at fixed offset from stack pointer. + */ +#ifdef CONFIG_PPC32 +#define PARAMETER_SAVE_AREA_OFFSET 24 /* bytes */ +#else /* CONFIG_PPC32 */ +#define PARAMETER_SAVE_AREA_OFFSET 48 /* bytes */ +#endif + +struct pt_regs_offset { + const char *name; + int offset; +}; + +#define STR(s) #s /* convert to string */ +#define REG_OFFSET_NAME(r) {.name = #r, .offset = offsetof(struct pt_regs, r)} +#define GPR_OFFSET_NAME(num) \ + {.name = STR(gpr##num), .offset = offsetof(struct pt_regs, gpr[num])} +#define REG_OFFSET_END {.name = NULL, .offset = 0} + +static const struct pt_regs_offset regoffset_table[] = { + GPR_OFFSET_NAME(0), + GPR_OFFSET_NAME(1), + GPR_OFFSET_NAME(2), + GPR_OFFSET_NAME(3), + GPR_OFFSET_NAME(4), + GPR_OFFSET_NAME(5), + GPR_OFFSET_NAME(6), + GPR_OFFSET_NAME(7), + GPR_OFFSET_NAME(8), + GPR_OFFSET_NAME(9), + GPR_OFFSET_NAME(10), + GPR_OFFSET_NAME(11), + GPR_OFFSET_NAME(12), + GPR_OFFSET_NAME(13), + GPR_OFFSET_NAME(14), + GPR_OFFSET_NAME(15), + GPR_OFFSET_NAME(16), + GPR_OFFSET_NAME(17), + GPR_OFFSET_NAME(18), + GPR_OFFSET_NAME(19), + GPR_OFFSET_NAME(20), + GPR_OFFSET_NAME(21), + GPR_OFFSET_NAME(22), + GPR_OFFSET_NAME(23), + GPR_OFFSET_NAME(24), + GPR_OFFSET_NAME(25), + GPR_OFFSET_NAME(26), + GPR_OFFSET_NAME(27), + GPR_OFFSET_NAME(28), + GPR_OFFSET_NAME(29), + GPR_OFFSET_NAME(30), + GPR_OFFSET_NAME(31), + REG_OFFSET_NAME(nip), + REG_OFFSET_NAME(msr), + REG_OFFSET_NAME(ctr), + REG_OFFSET_NAME(link), + REG_OFFSET_NAME(xer), + REG_OFFSET_NAME(ccr), +#ifdef CONFIG_PPC64 + REG_OFFSET_NAME(softe), +#else + REG_OFFSET_NAME(mq), +#endif + REG_OFFSET_NAME(trap), + REG_OFFSET_NAME(dar), + REG_OFFSET_NAME(dsisr), + REG_OFFSET_END, +}; + +/** + * regs_query_register_offset() - query register offset from its name + * @name: the name of a register + * + * regs_query_register_offset() returns the offset of a register in struct + * pt_regs from its name. If the name is invalid, this returns -EINVAL; + */ +int regs_query_register_offset(const char *name) +{ + const struct pt_regs_offset *roff; + for (roff = regoffset_table; roff->name != NULL; roff++) + if (!strcmp(roff->name, name)) + return roff->offset; + return -EINVAL; +} + +/** + * regs_query_register_name() - query register name from its offset + * @offset: the offset of a register in struct pt_regs. + * + * regs_query_register_name() returns the name of a register from its + * offset in struct pt_regs. If the @offset is invalid, this returns NULL; + */ +const char *regs_query_register_name(unsigned int offset) +{ + const struct pt_regs_offset *roff; + for (roff = regoffset_table; roff->name != NULL; roff++) + if (roff->offset == offset) + return roff->name; + return NULL; +} + +/* * does not yet catch signals sent when the child dies. * in exit.c or in signal.c. */ -- cgit v1.1 From 471c70ff39809af783c7718defe574a9ba81dd26 Mon Sep 17 00:00:00 2001 From: Torez Smith Date: Fri, 5 Mar 2010 10:43:01 +0000 Subject: powerpc/booke: Add Stack Marking support to Booke Exception Prolog This patch adds a marker to the exception stack frame to aid in debugging. It's already inserted on other platforms and xmon recognizes it and identifies exception frames when showing stack traces. Signed-off-by: Torez Smith Signed-off-by: Dave Kleikamp Signed-off-by: Josh Boyer --- arch/powerpc/kernel/head_booke.h | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'arch/powerpc/kernel') diff --git a/arch/powerpc/kernel/head_booke.h b/arch/powerpc/kernel/head_booke.h index 50504ae..a0bf158 100644 --- a/arch/powerpc/kernel/head_booke.h +++ b/arch/powerpc/kernel/head_booke.h @@ -1,6 +1,7 @@ #ifndef __HEAD_BOOKE_H__ #define __HEAD_BOOKE_H__ +#include /* for STACK_FRAME_REGS_MARKER */ /* * Macros used for common Book-e exception handling */ @@ -48,6 +49,9 @@ stw r10,0(r11); \ rlwinm r9,r9,0,14,12; /* clear MSR_WE (necessary?) */\ stw r0,GPR0(r11); \ + lis r10, STACK_FRAME_REGS_MARKER@ha;/* exception frame marker */ \ + addi r10, r10, STACK_FRAME_REGS_MARKER@l; \ + stw r10, 8(r11); \ SAVE_4GPRS(3, r11); \ SAVE_2GPRS(7, r11) -- cgit v1.1 From 795033c344d88dc6aa5106d0cc358656f29bd722 Mon Sep 17 00:00:00 2001 From: Dave Kleikamp Date: Fri, 5 Mar 2010 10:43:07 +0000 Subject: powerpc/44x: break out cpu init code into stand-alone function The 47x platform supports multiple cores and shares code with 44x. Break out code that is common for initializing the primary and secondary cpus into a function which can be called for both. Signed-off-by: Dave Kleikamp Signed-off-by: Josh Boyer --- arch/powerpc/kernel/head_44x.S | 330 +++++++++++++++++++++-------------------- 1 file changed, 171 insertions(+), 159 deletions(-) (limited to 'arch/powerpc/kernel') diff --git a/arch/powerpc/kernel/head_44x.S b/arch/powerpc/kernel/head_44x.S index 711368b..39be049 100644 --- a/arch/powerpc/kernel/head_44x.S +++ b/arch/powerpc/kernel/head_44x.S @@ -69,165 +69,7 @@ _ENTRY(_start); mr r27,r7 li r24,0 /* CPU number */ -/* - * In case the firmware didn't do it, we apply some workarounds - * that are good for all 440 core variants here - */ - mfspr r3,SPRN_CCR0 - rlwinm r3,r3,0,0,27 /* disable icache prefetch */ - isync - mtspr SPRN_CCR0,r3 - isync - sync - -/* - * Set up the initial MMU state - * - * We are still executing code at the virtual address - * mappings set by the firmware for the base of RAM. - * - * We first invalidate all TLB entries but the one - * we are running from. We then load the KERNELBASE - * mappings so we can begin to use kernel addresses - * natively and so the interrupt vector locations are - * permanently pinned (necessary since Book E - * implementations always have translation enabled). - * - * TODO: Use the known TLB entry we are running from to - * determine which physical region we are located - * in. This can be used to determine where in RAM - * (on a shared CPU system) or PCI memory space - * (on a DRAMless system) we are located. - * For now, we assume a perfect world which means - * we are located at the base of DRAM (physical 0). - */ - -/* - * Search TLB for entry that we are currently using. - * Invalidate all entries but the one we are using. - */ - /* Load our current PID->MMUCR TID and MSR IS->MMUCR STS */ - mfspr r3,SPRN_PID /* Get PID */ - mfmsr r4 /* Get MSR */ - andi. r4,r4,MSR_IS@l /* TS=1? */ - beq wmmucr /* If not, leave STS=0 */ - oris r3,r3,PPC44x_MMUCR_STS@h /* Set STS=1 */ -wmmucr: mtspr SPRN_MMUCR,r3 /* Put MMUCR */ - sync - - bl invstr /* Find our address */ -invstr: mflr r5 /* Make it accessible */ - tlbsx r23,0,r5 /* Find entry we are in */ - li r4,0 /* Start at TLB entry 0 */ - li r3,0 /* Set PAGEID inval value */ -1: cmpw r23,r4 /* Is this our entry? */ - beq skpinv /* If so, skip the inval */ - tlbwe r3,r4,PPC44x_TLB_PAGEID /* If not, inval the entry */ -skpinv: addi r4,r4,1 /* Increment */ - cmpwi r4,64 /* Are we done? */ - bne 1b /* If not, repeat */ - isync /* If so, context change */ - -/* - * Configure and load pinned entry into TLB slot 63. - */ - - lis r3,PAGE_OFFSET@h - ori r3,r3,PAGE_OFFSET@l - - /* Kernel is at the base of RAM */ - li r4, 0 /* Load the kernel physical address */ - - /* Load the kernel PID = 0 */ - li r0,0 - mtspr SPRN_PID,r0 - sync - - /* Initialize MMUCR */ - li r5,0 - mtspr SPRN_MMUCR,r5 - sync - - /* pageid fields */ - clrrwi r3,r3,10 /* Mask off the effective page number */ - ori r3,r3,PPC44x_TLB_VALID | PPC44x_TLB_256M - - /* xlat fields */ - clrrwi r4,r4,10 /* Mask off the real page number */ - /* ERPN is 0 for first 4GB page */ - - /* attrib fields */ - /* Added guarded bit to protect against speculative loads/stores */ - li r5,0 - ori r5,r5,(PPC44x_TLB_SW | PPC44x_TLB_SR | PPC44x_TLB_SX | PPC44x_TLB_G) - - li r0,63 /* TLB slot 63 */ - - tlbwe r3,r0,PPC44x_TLB_PAGEID /* Load the pageid fields */ - tlbwe r4,r0,PPC44x_TLB_XLAT /* Load the translation fields */ - tlbwe r5,r0,PPC44x_TLB_ATTRIB /* Load the attrib/access fields */ - - /* Force context change */ - mfmsr r0 - mtspr SPRN_SRR1, r0 - lis r0,3f@h - ori r0,r0,3f@l - mtspr SPRN_SRR0,r0 - sync - rfi - - /* If necessary, invalidate original entry we used */ -3: cmpwi r23,63 - beq 4f - li r6,0 - tlbwe r6,r23,PPC44x_TLB_PAGEID - isync - -4: -#ifdef CONFIG_PPC_EARLY_DEBUG_44x - /* Add UART mapping for early debug. */ - - /* pageid fields */ - lis r3,PPC44x_EARLY_DEBUG_VIRTADDR@h - ori r3,r3,PPC44x_TLB_VALID|PPC44x_TLB_TS|PPC44x_TLB_64K - - /* xlat fields */ - lis r4,CONFIG_PPC_EARLY_DEBUG_44x_PHYSLOW@h - ori r4,r4,CONFIG_PPC_EARLY_DEBUG_44x_PHYSHIGH - - /* attrib fields */ - li r5,(PPC44x_TLB_SW|PPC44x_TLB_SR|PPC44x_TLB_I|PPC44x_TLB_G) - li r0,62 /* TLB slot 0 */ - - tlbwe r3,r0,PPC44x_TLB_PAGEID - tlbwe r4,r0,PPC44x_TLB_XLAT - tlbwe r5,r0,PPC44x_TLB_ATTRIB - - /* Force context change */ - isync -#endif /* CONFIG_PPC_EARLY_DEBUG_44x */ - - /* Establish the interrupt vector offsets */ - SET_IVOR(0, CriticalInput); - SET_IVOR(1, MachineCheck); - SET_IVOR(2, DataStorage); - SET_IVOR(3, InstructionStorage); - SET_IVOR(4, ExternalInput); - SET_IVOR(5, Alignment); - SET_IVOR(6, Program); - SET_IVOR(7, FloatingPointUnavailable); - SET_IVOR(8, SystemCall); - SET_IVOR(9, AuxillaryProcessorUnavailable); - SET_IVOR(10, Decrementer); - SET_IVOR(11, FixedIntervalTimer); - SET_IVOR(12, WatchdogTimer); - SET_IVOR(13, DataTLBError); - SET_IVOR(14, InstructionTLBError); - SET_IVOR(15, DebugCrit); - - /* Establish the interrupt vector base */ - lis r4,interrupt_base@h /* IVPR only uses the high 16-bits */ - mtspr SPRN_IVPR,r4 + bl init_cpu_state /* * This is where the main kernel code starts. @@ -647,6 +489,176 @@ _GLOBAL(set_context) blr /* + * Init CPU state. This is called at boot time or for secondary CPUs + * to setup initial TLB entries, setup IVORs, etc... + */ +_GLOBAL(init_cpu_state) + mflr r22 +/* + * In case the firmware didn't do it, we apply some workarounds + * that are good for all 440 core variants here + */ + mfspr r3,SPRN_CCR0 + rlwinm r3,r3,0,0,27 /* disable icache prefetch */ + isync + mtspr SPRN_CCR0,r3 + isync + sync + +/* + * Set up the initial MMU state + * + * We are still executing code at the virtual address + * mappings set by the firmware for the base of RAM. + * + * We first invalidate all TLB entries but the one + * we are running from. We then load the KERNELBASE + * mappings so we can begin to use kernel addresses + * natively and so the interrupt vector locations are + * permanently pinned (necessary since Book E + * implementations always have translation enabled). + * + * TODO: Use the known TLB entry we are running from to + * determine which physical region we are located + * in. This can be used to determine where in RAM + * (on a shared CPU system) or PCI memory space + * (on a DRAMless system) we are located. + * For now, we assume a perfect world which means + * we are located at the base of DRAM (physical 0). + */ + +/* + * Search TLB for entry that we are currently using. + * Invalidate all entries but the one we are using. + */ + /* Load our current PID->MMUCR TID and MSR IS->MMUCR STS */ + mfspr r3,SPRN_PID /* Get PID */ + mfmsr r4 /* Get MSR */ + andi. r4,r4,MSR_IS@l /* TS=1? */ + beq wmmucr /* If not, leave STS=0 */ + oris r3,r3,PPC44x_MMUCR_STS@h /* Set STS=1 */ +wmmucr: mtspr SPRN_MMUCR,r3 /* Put MMUCR */ + sync + + bl invstr /* Find our address */ +invstr: mflr r5 /* Make it accessible */ + tlbsx r23,0,r5 /* Find entry we are in */ + li r4,0 /* Start at TLB entry 0 */ + li r3,0 /* Set PAGEID inval value */ +1: cmpw r23,r4 /* Is this our entry? */ + beq skpinv /* If so, skip the inval */ + tlbwe r3,r4,PPC44x_TLB_PAGEID /* If not, inval the entry */ +skpinv: addi r4,r4,1 /* Increment */ + cmpwi r4,64 /* Are we done? */ + bne 1b /* If not, repeat */ + isync /* If so, context change */ + +/* + * Configure and load pinned entry into TLB slot 63. + */ + + lis r3,PAGE_OFFSET@h + ori r3,r3,PAGE_OFFSET@l + + /* Kernel is at the base of RAM */ + li r4, 0 /* Load the kernel physical address */ + + /* Load the kernel PID = 0 */ + li r0,0 + mtspr SPRN_PID,r0 + sync + + /* Initialize MMUCR */ + li r5,0 + mtspr SPRN_MMUCR,r5 + sync + + /* pageid fields */ + clrrwi r3,r3,10 /* Mask off the effective page number */ + ori r3,r3,PPC44x_TLB_VALID | PPC44x_TLB_256M + + /* xlat fields */ + clrrwi r4,r4,10 /* Mask off the real page number */ + /* ERPN is 0 for first 4GB page */ + + /* attrib fields */ + /* Added guarded bit to protect against speculative loads/stores */ + li r5,0 + ori r5,r5,(PPC44x_TLB_SW | PPC44x_TLB_SR | PPC44x_TLB_SX | PPC44x_TLB_G) + + li r0,63 /* TLB slot 63 */ + + tlbwe r3,r0,PPC44x_TLB_PAGEID /* Load the pageid fields */ + tlbwe r4,r0,PPC44x_TLB_XLAT /* Load the translation fields */ + tlbwe r5,r0,PPC44x_TLB_ATTRIB /* Load the attrib/access fields */ + + /* Force context change */ + mfmsr r0 + mtspr SPRN_SRR1, r0 + lis r0,3f@h + ori r0,r0,3f@l + mtspr SPRN_SRR0,r0 + sync + rfi + + /* If necessary, invalidate original entry we used */ +3: cmpwi r23,63 + beq 4f + li r6,0 + tlbwe r6,r23,PPC44x_TLB_PAGEID + isync + +4: +#ifdef CONFIG_PPC_EARLY_DEBUG_44x + /* Add UART mapping for early debug. */ + + /* pageid fields */ + lis r3,PPC44x_EARLY_DEBUG_VIRTADDR@h + ori r3,r3,PPC44x_TLB_VALID|PPC44x_TLB_TS|PPC44x_TLB_64K + + /* xlat fields */ + lis r4,CONFIG_PPC_EARLY_DEBUG_44x_PHYSLOW@h + ori r4,r4,CONFIG_PPC_EARLY_DEBUG_44x_PHYSHIGH + + /* attrib fields */ + li r5,(PPC44x_TLB_SW|PPC44x_TLB_SR|PPC44x_TLB_I|PPC44x_TLB_G) + li r0,62 /* TLB slot 0 */ + + tlbwe r3,r0,PPC44x_TLB_PAGEID + tlbwe r4,r0,PPC44x_TLB_XLAT + tlbwe r5,r0,PPC44x_TLB_ATTRIB + + /* Force context change */ + isync +#endif /* CONFIG_PPC_EARLY_DEBUG_44x */ + + /* Establish the interrupt vector offsets */ + SET_IVOR(0, CriticalInput); + SET_IVOR(1, MachineCheck); + SET_IVOR(2, DataStorage); + SET_IVOR(3, InstructionStorage); + SET_IVOR(4, ExternalInput); + SET_IVOR(5, Alignment); + SET_IVOR(6, Program); + SET_IVOR(7, FloatingPointUnavailable); + SET_IVOR(8, SystemCall); + SET_IVOR(9, AuxillaryProcessorUnavailable); + SET_IVOR(10, Decrementer); + SET_IVOR(11, FixedIntervalTimer); + SET_IVOR(12, WatchdogTimer); + SET_IVOR(13, DataTLBError); + SET_IVOR(14, InstructionTLBError); + SET_IVOR(15, DebugCrit); + + /* Establish the interrupt vector base */ + lis r4,interrupt_base@h /* IVPR only uses the high 16-bits */ + mtspr SPRN_IVPR,r4 + + addis r22,r22,KERNELBASE@h + mtlr r22 + blr + +/* * We put a few things here that have to be page-aligned. This stuff * goes at the beginning of the data segment, which is page-aligned. */ -- cgit v1.1 From e7f75ad01d590243904c2d95ab47e6b2e9ef6dad Mon Sep 17 00:00:00 2001 From: Dave Kleikamp Date: Fri, 5 Mar 2010 10:43:12 +0000 Subject: powerpc/47x: Base ppc476 support This patch adds the base support for the 476 processor. The code was primarily written by Ben Herrenschmidt and Torez Smith, but I've been maintaining it for a while. The goal is to have a single binary that will run on 44x and 47x, but we still have some details to work out. The biggest is that the L1 cache line size differs on the two platforms, but it's currently a compile-time option. Signed-off-by: Benjamin Herrenschmidt Signed-off-by: Torez Smith Signed-off-by: Dave Kleikamp Signed-off-by: Josh Boyer --- arch/powerpc/kernel/cputable.c | 13 ++ arch/powerpc/kernel/entry_32.S | 5 + arch/powerpc/kernel/head_44x.S | 502 +++++++++++++++++++++++++++++++++++++++-- arch/powerpc/kernel/misc_32.S | 9 +- arch/powerpc/kernel/smp.c | 8 + 5 files changed, 519 insertions(+), 18 deletions(-) (limited to 'arch/powerpc/kernel') diff --git a/arch/powerpc/kernel/cputable.c b/arch/powerpc/kernel/cputable.c index 8af4949..a1d8458 100644 --- a/arch/powerpc/kernel/cputable.c +++ b/arch/powerpc/kernel/cputable.c @@ -1701,6 +1701,19 @@ static struct cpu_spec __initdata cpu_specs[] = { .machine_check = machine_check_440A, .platform = "ppc440", }, + { /* 476 core */ + .pvr_mask = 0xffff0000, + .pvr_value = 0x11a50000, + .cpu_name = "476", + .cpu_features = CPU_FTRS_47X, + .cpu_user_features = COMMON_USER_BOOKE | + PPC_FEATURE_HAS_FPU, + .mmu_features = MMU_FTR_TYPE_47x | + MMU_FTR_USE_TLBIVAX_BCAST | MMU_FTR_LOCK_BCAST_INVAL, + .icache_bsize = 32, + .dcache_bsize = 128, + .platform = "ppc470", + }, { /* default match */ .pvr_mask = 0x00000000, .pvr_value = 0x00000000, diff --git a/arch/powerpc/kernel/entry_32.S b/arch/powerpc/kernel/entry_32.S index 1175a85..ed4aeb9 100644 --- a/arch/powerpc/kernel/entry_32.S +++ b/arch/powerpc/kernel/entry_32.S @@ -373,11 +373,13 @@ syscall_exit_cont: bnel- load_dbcr0 #endif #ifdef CONFIG_44x +BEGIN_MMU_FTR_SECTION lis r4,icache_44x_need_flush@ha lwz r5,icache_44x_need_flush@l(r4) cmplwi cr0,r5,0 bne- 2f 1: +END_MMU_FTR_SECTION_IFCLR(MMU_FTR_TYPE_47x) #endif /* CONFIG_44x */ BEGIN_FTR_SECTION lwarx r7,0,r1 @@ -848,6 +850,9 @@ resume_kernel: /* interrupts are hard-disabled at this point */ restore: #ifdef CONFIG_44x +BEGIN_MMU_FTR_SECTION + b 1f +END_MMU_FTR_SECTION_IFSET(MMU_FTR_TYPE_47x) lis r4,icache_44x_need_flush@ha lwz r5,icache_44x_need_flush@l(r4) cmplwi cr0,r5,0 diff --git a/arch/powerpc/kernel/head_44x.S b/arch/powerpc/kernel/head_44x.S index 39be049..1acd175 100644 --- a/arch/powerpc/kernel/head_44x.S +++ b/arch/powerpc/kernel/head_44x.S @@ -37,6 +37,7 @@ #include #include #include +#include #include "head_booke.h" @@ -191,7 +192,7 @@ interrupt_base: #endif /* Data TLB Error Interrupt */ - START_EXCEPTION(DataTLBError) + START_EXCEPTION(DataTLBError44x) mtspr SPRN_SPRG_WSCRATCH0, r10 /* Save some working registers */ mtspr SPRN_SPRG_WSCRATCH1, r11 mtspr SPRN_SPRG_WSCRATCH2, r12 @@ -282,7 +283,7 @@ tlb_44x_patch_hwater_D: mfspr r10,SPRN_DEAR /* Jump to common tlb load */ - b finish_tlb_load + b finish_tlb_load_44x 2: /* The bailout. Restore registers to pre-exception conditions @@ -302,7 +303,7 @@ tlb_44x_patch_hwater_D: * information from different registers and bailout * to a different point. */ - START_EXCEPTION(InstructionTLBError) + START_EXCEPTION(InstructionTLBError44x) mtspr SPRN_SPRG_WSCRATCH0, r10 /* Save some working registers */ mtspr SPRN_SPRG_WSCRATCH1, r11 mtspr SPRN_SPRG_WSCRATCH2, r12 @@ -378,7 +379,7 @@ tlb_44x_patch_hwater_I: mfspr r10,SPRN_SRR0 /* Jump to common TLB load point */ - b finish_tlb_load + b finish_tlb_load_44x 2: /* The bailout. Restore registers to pre-exception conditions @@ -392,15 +393,7 @@ tlb_44x_patch_hwater_I: mfspr r10, SPRN_SPRG_RSCRATCH0 b InstructionStorage - /* Debug Interrupt */ - DEBUG_CRIT_EXCEPTION - -/* - * Local functions - */ - /* - * Both the instruction and data TLB miss get to this * point to load the TLB. * r10 - EA of fault @@ -410,7 +403,7 @@ tlb_44x_patch_hwater_I: * MMUCR - loaded with proper value when we get here * Upon exit, we reload everything and RFI. */ -finish_tlb_load: +finish_tlb_load_44x: /* Combine RPN & ERPN an write WS 0 */ rlwimi r11,r12,0,0,31-PAGE_SHIFT tlbwe r11,r13,PPC44x_TLB_XLAT @@ -443,6 +436,227 @@ finish_tlb_load: mfspr r10, SPRN_SPRG_RSCRATCH0 rfi /* Force context change */ +/* TLB error interrupts for 476 + */ +#ifdef CONFIG_PPC_47x + START_EXCEPTION(DataTLBError47x) + mtspr SPRN_SPRG_WSCRATCH0,r10 /* Save some working registers */ + mtspr SPRN_SPRG_WSCRATCH1,r11 + mtspr SPRN_SPRG_WSCRATCH2,r12 + mtspr SPRN_SPRG_WSCRATCH3,r13 + mfcr r11 + mtspr SPRN_SPRG_WSCRATCH4,r11 + mfspr r10,SPRN_DEAR /* Get faulting address */ + + /* If we are faulting a kernel address, we have to use the + * kernel page tables. + */ + lis r11,PAGE_OFFSET@h + cmplw cr0,r10,r11 + blt+ 3f + lis r11,swapper_pg_dir@h + ori r11,r11, swapper_pg_dir@l + li r12,0 /* MMUCR = 0 */ + b 4f + + /* Get the PGD for the current thread and setup MMUCR */ +3: mfspr r11,SPRN_SPRG3 + lwz r11,PGDIR(r11) + mfspr r12,SPRN_PID /* Get PID */ +4: mtspr SPRN_MMUCR,r12 /* Set MMUCR */ + + /* Mask of required permission bits. Note that while we + * do copy ESR:ST to _PAGE_RW position as trying to write + * to an RO page is pretty common, we don't do it with + * _PAGE_DIRTY. We could do it, but it's a fairly rare + * event so I'd rather take the overhead when it happens + * rather than adding an instruction here. We should measure + * whether the whole thing is worth it in the first place + * as we could avoid loading SPRN_ESR completely in the first + * place... + * + * TODO: Is it worth doing that mfspr & rlwimi in the first + * place or can we save a couple of instructions here ? + */ + mfspr r12,SPRN_ESR + li r13,_PAGE_PRESENT|_PAGE_ACCESSED + rlwimi r13,r12,10,30,30 + + /* Load the PTE */ + /* Compute pgdir/pmd offset */ + rlwinm r12,r10,PPC44x_PGD_OFF_SHIFT,PPC44x_PGD_OFF_MASK_BIT,29 + lwzx r11,r12,r11 /* Get pgd/pmd entry */ + + /* Word 0 is EPN,V,TS,DSIZ */ + li r12,PPC47x_TLB0_VALID | PPC47x_TLBE_SIZE + rlwimi r10,r12,0,32-PAGE_SHIFT,31 /* Insert valid and page size*/ + li r12,0 + tlbwe r10,r12,0 + + /* XXX can we do better ? Need to make sure tlbwe has established + * latch V bit in MMUCR0 before the PTE is loaded further down */ +#ifdef CONFIG_SMP + isync +#endif + + rlwinm. r12,r11,0,0,20 /* Extract pt base address */ + /* Compute pte address */ + rlwimi r12,r10,PPC44x_PTE_ADD_SHIFT,PPC44x_PTE_ADD_MASK_BIT,28 + beq 2f /* Bail if no table */ + lwz r11,0(r12) /* Get high word of pte entry */ + + /* XXX can we do better ? maybe insert a known 0 bit from r11 into the + * bottom of r12 to create a data dependency... We can also use r10 + * as destination nowadays + */ +#ifdef CONFIG_SMP + lwsync +#endif + lwz r12,4(r12) /* Get low word of pte entry */ + + andc. r13,r13,r12 /* Check permission */ + + /* Jump to common tlb load */ + beq finish_tlb_load_47x + +2: /* The bailout. Restore registers to pre-exception conditions + * and call the heavyweights to help us out. + */ + mfspr r11,SPRN_SPRG_RSCRATCH4 + mtcr r11 + mfspr r13,SPRN_SPRG_RSCRATCH3 + mfspr r12,SPRN_SPRG_RSCRATCH2 + mfspr r11,SPRN_SPRG_RSCRATCH1 + mfspr r10,SPRN_SPRG_RSCRATCH0 + b DataStorage + + /* Instruction TLB Error Interrupt */ + /* + * Nearly the same as above, except we get our + * information from different registers and bailout + * to a different point. + */ + START_EXCEPTION(InstructionTLBError47x) + mtspr SPRN_SPRG_WSCRATCH0,r10 /* Save some working registers */ + mtspr SPRN_SPRG_WSCRATCH1,r11 + mtspr SPRN_SPRG_WSCRATCH2,r12 + mtspr SPRN_SPRG_WSCRATCH3,r13 + mfcr r11 + mtspr SPRN_SPRG_WSCRATCH4,r11 + mfspr r10,SPRN_SRR0 /* Get faulting address */ + + /* If we are faulting a kernel address, we have to use the + * kernel page tables. + */ + lis r11,PAGE_OFFSET@h + cmplw cr0,r10,r11 + blt+ 3f + lis r11,swapper_pg_dir@h + ori r11,r11, swapper_pg_dir@l + li r12,0 /* MMUCR = 0 */ + b 4f + + /* Get the PGD for the current thread and setup MMUCR */ +3: mfspr r11,SPRN_SPRG_THREAD + lwz r11,PGDIR(r11) + mfspr r12,SPRN_PID /* Get PID */ +4: mtspr SPRN_MMUCR,r12 /* Set MMUCR */ + + /* Make up the required permissions */ + li r13,_PAGE_PRESENT | _PAGE_ACCESSED | _PAGE_EXEC + + /* Load PTE */ + /* Compute pgdir/pmd offset */ + rlwinm r12,r10,PPC44x_PGD_OFF_SHIFT,PPC44x_PGD_OFF_MASK_BIT,29 + lwzx r11,r12,r11 /* Get pgd/pmd entry */ + + /* Word 0 is EPN,V,TS,DSIZ */ + li r12,PPC47x_TLB0_VALID | PPC47x_TLBE_SIZE + rlwimi r10,r12,0,32-PAGE_SHIFT,31 /* Insert valid and page size*/ + li r12,0 + tlbwe r10,r12,0 + + /* XXX can we do better ? Need to make sure tlbwe has established + * latch V bit in MMUCR0 before the PTE is loaded further down */ +#ifdef CONFIG_SMP + isync +#endif + + rlwinm. r12,r11,0,0,20 /* Extract pt base address */ + /* Compute pte address */ + rlwimi r12,r10,PPC44x_PTE_ADD_SHIFT,PPC44x_PTE_ADD_MASK_BIT,28 + beq 2f /* Bail if no table */ + + lwz r11,0(r12) /* Get high word of pte entry */ + /* XXX can we do better ? maybe insert a known 0 bit from r11 into the + * bottom of r12 to create a data dependency... We can also use r10 + * as destination nowadays + */ +#ifdef CONFIG_SMP + lwsync +#endif + lwz r12,4(r12) /* Get low word of pte entry */ + + andc. r13,r13,r12 /* Check permission */ + + /* Jump to common TLB load point */ + beq finish_tlb_load_47x + +2: /* The bailout. Restore registers to pre-exception conditions + * and call the heavyweights to help us out. + */ + mfspr r11, SPRN_SPRG_RSCRATCH4 + mtcr r11 + mfspr r13, SPRN_SPRG_RSCRATCH3 + mfspr r12, SPRN_SPRG_RSCRATCH2 + mfspr r11, SPRN_SPRG_RSCRATCH1 + mfspr r10, SPRN_SPRG_RSCRATCH0 + b InstructionStorage + +/* + * Both the instruction and data TLB miss get to this + * point to load the TLB. + * r10 - free to use + * r11 - PTE high word value + * r12 - PTE low word value + * r13 - free to use + * MMUCR - loaded with proper value when we get here + * Upon exit, we reload everything and RFI. + */ +finish_tlb_load_47x: + /* Combine RPN & ERPN an write WS 1 */ + rlwimi r11,r12,0,0,31-PAGE_SHIFT + tlbwe r11,r13,1 + + /* And make up word 2 */ + li r10,0xf85 /* Mask to apply from PTE */ + rlwimi r10,r12,29,30,30 /* DIRTY -> SW position */ + and r11,r12,r10 /* Mask PTE bits to keep */ + andi. r10,r12,_PAGE_USER /* User page ? */ + beq 1f /* nope, leave U bits empty */ + rlwimi r11,r11,3,26,28 /* yes, copy S bits to U */ +1: tlbwe r11,r13,2 + + /* Done...restore registers and get out of here. + */ + mfspr r11, SPRN_SPRG_RSCRATCH4 + mtcr r11 + mfspr r13, SPRN_SPRG_RSCRATCH3 + mfspr r12, SPRN_SPRG_RSCRATCH2 + mfspr r11, SPRN_SPRG_RSCRATCH1 + mfspr r10, SPRN_SPRG_RSCRATCH0 + rfi + +#endif /* CONFIG_PPC_47x */ + + /* Debug Interrupt */ + /* + * This statement needs to exist at the end of the IVPR + * definition just in case you end up taking a debug + * exception within another exception. + */ + DEBUG_CRIT_EXCEPTION + /* * Global functions */ @@ -491,9 +705,18 @@ _GLOBAL(set_context) /* * Init CPU state. This is called at boot time or for secondary CPUs * to setup initial TLB entries, setup IVORs, etc... + * */ _GLOBAL(init_cpu_state) mflr r22 +#ifdef CONFIG_PPC_47x + /* We use the PVR to differenciate 44x cores from 476 */ + mfspr r3,SPRN_PVR + srwi r3,r3,16 + cmplwi cr0,r3,PVR_476@h + beq head_start_47x +#endif /* CONFIG_PPC_47x */ + /* * In case the firmware didn't do it, we apply some workarounds * that are good for all 440 core variants here @@ -506,7 +729,7 @@ _GLOBAL(init_cpu_state) sync /* - * Set up the initial MMU state + * Set up the initial MMU state for 44x * * We are still executing code at the virtual address * mappings set by the firmware for the base of RAM. @@ -646,16 +869,257 @@ skpinv: addi r4,r4,1 /* Increment */ SET_IVOR(10, Decrementer); SET_IVOR(11, FixedIntervalTimer); SET_IVOR(12, WatchdogTimer); - SET_IVOR(13, DataTLBError); - SET_IVOR(14, InstructionTLBError); + SET_IVOR(13, DataTLBError44x); + SET_IVOR(14, InstructionTLBError44x); SET_IVOR(15, DebugCrit); + b head_start_common + + +#ifdef CONFIG_PPC_47x + +#ifdef CONFIG_SMP + +/* Entry point for secondary 47x processors */ +_GLOBAL(start_secondary_47x) + mr r24,r3 /* CPU number */ + + bl init_cpu_state + + /* Now we need to bolt the rest of kernel memory which + * is done in C code. We must be careful because our task + * struct or our stack can (and will probably) be out + * of reach of the initial 256M TLB entry, so we use a + * small temporary stack in .bss for that. This works + * because only one CPU at a time can be in this code + */ + lis r1,temp_boot_stack@h + ori r1,r1,temp_boot_stack@l + addi r1,r1,1024-STACK_FRAME_OVERHEAD + li r0,0 + stw r0,0(r1) + bl mmu_init_secondary + + /* Now we can get our task struct and real stack pointer */ + + /* Get current_thread_info and current */ + lis r1,secondary_ti@ha + lwz r1,secondary_ti@l(r1) + lwz r2,TI_TASK(r1) + + /* Current stack pointer */ + addi r1,r1,THREAD_SIZE-STACK_FRAME_OVERHEAD + li r0,0 + stw r0,0(r1) + + /* Kernel stack for exception entry in SPRG3 */ + addi r4,r2,THREAD /* init task's THREAD */ + mtspr SPRN_SPRG3,r4 + + b start_secondary + +#endif /* CONFIG_SMP */ + +/* + * Set up the initial MMU state for 44x + * + * We are still executing code at the virtual address + * mappings set by the firmware for the base of RAM. + */ + +head_start_47x: + /* Load our current PID->MMUCR TID and MSR IS->MMUCR STS */ + mfspr r3,SPRN_PID /* Get PID */ + mfmsr r4 /* Get MSR */ + andi. r4,r4,MSR_IS@l /* TS=1? */ + beq 1f /* If not, leave STS=0 */ + oris r3,r3,PPC47x_MMUCR_STS@h /* Set STS=1 */ +1: mtspr SPRN_MMUCR,r3 /* Put MMUCR */ + sync + + /* Find the entry we are running from */ + bl 1f +1: mflr r23 + tlbsx r23,0,r23 + tlbre r24,r23,0 + tlbre r25,r23,1 + tlbre r26,r23,2 + +/* + * Cleanup time + */ + + /* Initialize MMUCR */ + li r5,0 + mtspr SPRN_MMUCR,r5 + sync + +clear_all_utlb_entries: + + #; Set initial values. + + addis r3,0,0x8000 + addi r4,0,0 + addi r5,0,0 + b clear_utlb_entry + + #; Align the loop to speed things up. + + .align 6 + +clear_utlb_entry: + + tlbwe r4,r3,0 + tlbwe r5,r3,1 + tlbwe r5,r3,2 + addis r3,r3,0x2000 + cmpwi r3,0 + bne clear_utlb_entry + addis r3,0,0x8000 + addis r4,r4,0x100 + cmpwi r4,0 + bne clear_utlb_entry + + #; Restore original entry. + + oris r23,r23,0x8000 /* specify the way */ + tlbwe r24,r23,0 + tlbwe r25,r23,1 + tlbwe r26,r23,2 + +/* + * Configure and load pinned entry into TLB for the kernel core + */ + + lis r3,PAGE_OFFSET@h + ori r3,r3,PAGE_OFFSET@l + + /* Kernel is at the base of RAM */ + li r4, 0 /* Load the kernel physical address */ + + /* Load the kernel PID = 0 */ + li r0,0 + mtspr SPRN_PID,r0 + sync + + /* Word 0 */ + clrrwi r3,r3,12 /* Mask off the effective page number */ + ori r3,r3,PPC47x_TLB0_VALID | PPC47x_TLB0_256M + + /* Word 1 */ + clrrwi r4,r4,12 /* Mask off the real page number */ + /* ERPN is 0 for first 4GB page */ + /* Word 2 */ + li r5,0 + ori r5,r5,PPC47x_TLB2_S_RWX +#ifdef CONFIG_SMP + ori r5,r5,PPC47x_TLB2_M +#endif + + /* We write to way 0 and bolted 0 */ + lis r0,0x8800 + tlbwe r3,r0,0 + tlbwe r4,r0,1 + tlbwe r5,r0,2 + +/* + * Configure SSPCR, ISPCR and USPCR for now to search everything, we can fix + * them up later + */ + LOAD_REG_IMMEDIATE(r3, 0x9abcdef0) + mtspr SPRN_SSPCR,r3 + mtspr SPRN_USPCR,r3 + LOAD_REG_IMMEDIATE(r3, 0x12345670) + mtspr SPRN_ISPCR,r3 + + /* Force context change */ + mfmsr r0 + mtspr SPRN_SRR1, r0 + lis r0,3f@h + ori r0,r0,3f@l + mtspr SPRN_SRR0,r0 + sync + rfi + + /* Invalidate original entry we used */ +3: + rlwinm r24,r24,0,21,19 /* clear the "valid" bit */ + tlbwe r24,r23,0 + addi r24,0,0 + tlbwe r24,r23,1 + tlbwe r24,r23,2 + isync /* Clear out the shadow TLB entries */ + +#ifdef CONFIG_PPC_EARLY_DEBUG_44x + /* Add UART mapping for early debug. */ + + /* Word 0 */ + lis r3,PPC44x_EARLY_DEBUG_VIRTADDR@h + ori r3,r3,PPC47x_TLB0_VALID | PPC47x_TLB0_TS | PPC47x_TLB0_1M + + /* Word 1 */ + lis r4,CONFIG_PPC_EARLY_DEBUG_44x_PHYSLOW@h + ori r4,r4,CONFIG_PPC_EARLY_DEBUG_44x_PHYSHIGH + + /* Word 2 */ + li r5,(PPC47x_TLB2_S_RW | PPC47x_TLB2_IMG) + + /* Bolted in way 0, bolt slot 5, we -hope- we don't hit the same + * congruence class as the kernel, we need to make sure of it at + * some point + */ + lis r0,0x8d00 + tlbwe r3,r0,0 + tlbwe r4,r0,1 + tlbwe r5,r0,2 + + /* Force context change */ + isync +#endif /* CONFIG_PPC_EARLY_DEBUG_44x */ + + /* Establish the interrupt vector offsets */ + SET_IVOR(0, CriticalInput); + SET_IVOR(1, MachineCheckA); + SET_IVOR(2, DataStorage); + SET_IVOR(3, InstructionStorage); + SET_IVOR(4, ExternalInput); + SET_IVOR(5, Alignment); + SET_IVOR(6, Program); + SET_IVOR(7, FloatingPointUnavailable); + SET_IVOR(8, SystemCall); + SET_IVOR(9, AuxillaryProcessorUnavailable); + SET_IVOR(10, Decrementer); + SET_IVOR(11, FixedIntervalTimer); + SET_IVOR(12, WatchdogTimer); + SET_IVOR(13, DataTLBError47x); + SET_IVOR(14, InstructionTLBError47x); + SET_IVOR(15, DebugCrit); + + /* We configure icbi to invalidate 128 bytes at a time since the + * current 32-bit kernel code isn't too happy with icache != dcache + * block size + */ + mfspr r3,SPRN_CCR0 + oris r3,r3,0x0020 + mtspr SPRN_CCR0,r3 + isync + +#endif /* CONFIG_PPC_47x */ + +/* + * Here we are back to code that is common between 44x and 47x + * + * We proceed to further kernel initialization and return to the + * main kernel entry + */ +head_start_common: /* Establish the interrupt vector base */ lis r4,interrupt_base@h /* IVPR only uses the high 16-bits */ mtspr SPRN_IVPR,r4 addis r22,r22,KERNELBASE@h mtlr r22 + isync blr /* @@ -683,3 +1147,9 @@ swapper_pg_dir: */ abatron_pteptrs: .space 8 + +#ifdef CONFIG_SMP + .align 12 +temp_boot_stack: + .space 1024 +#endif /* CONFIG_SMP */ diff --git a/arch/powerpc/kernel/misc_32.S b/arch/powerpc/kernel/misc_32.S index 8649f53..8043d1b 100644 --- a/arch/powerpc/kernel/misc_32.S +++ b/arch/powerpc/kernel/misc_32.S @@ -441,7 +441,7 @@ END_FTR_SECTION_IFSET(CPU_FTR_COHERENT_ICACHE) addi r3,r3,L1_CACHE_BYTES bdnz 0b sync -#ifndef CONFIG_44x +#ifdef CONFIG_44x /* We don't flush the icache on 44x. Those have a virtual icache * and we don't have access to the virtual address here (it's * not the page vaddr but where it's mapped in user space). The @@ -449,15 +449,19 @@ END_FTR_SECTION_IFSET(CPU_FTR_COHERENT_ICACHE) * a change in the address space occurs, before returning to * user space */ +BEGIN_MMU_FTR_SECTION + blr +END_MMU_FTR_SECTION_IFSET(MMU_FTR_TYPE_44x) +#endif /* CONFIG_44x */ mtctr r4 1: icbi 0,r6 addi r6,r6,L1_CACHE_BYTES bdnz 1b sync isync -#endif /* CONFIG_44x */ blr +#ifndef CONFIG_BOOKE /* * Flush a particular page from the data cache to RAM, identified * by its physical address. We turn off the MMU so we can just use @@ -490,6 +494,7 @@ END_FTR_SECTION_IFSET(CPU_FTR_COHERENT_ICACHE) mtmsr r10 /* restore DR */ isync blr +#endif /* CONFIG_BOOKE */ /* * Clear pages using the dcbz instruction, which doesn't cause any diff --git a/arch/powerpc/kernel/smp.c b/arch/powerpc/kernel/smp.c index e36f94f..3fe4de2 100644 --- a/arch/powerpc/kernel/smp.c +++ b/arch/powerpc/kernel/smp.c @@ -495,6 +495,14 @@ int __devinit start_secondary(void *unused) current->active_mm = &init_mm; smp_store_cpu_info(cpu); + +#if defined(CONFIG_BOOKE) || defined(CONFIG_40x) + /* Clear any pending timer interrupts */ + mtspr(SPRN_TSR, TSR_ENW | TSR_WIS | TSR_DIS | TSR_FIS); + + /* Enable decrementer interrupt */ + mtspr(SPRN_TCR, TCR_DIE); +#endif set_dec(tb_ticks_per_jiffy); preempt_disable(); cpu_callin_map[cpu] = 1; -- cgit v1.1 From fc5e709731429bc2db27897630e7c0089f297680 Mon Sep 17 00:00:00 2001 From: Dave Kleikamp Date: Fri, 5 Mar 2010 03:43:18 +0000 Subject: powerpc/476: add machine check handler for 47x core The 47x core's MCSR varies from 44x, so it needs it's own machine check handler. Signed-off-by: Dave Kleikamp Signed-off-by: Josh Boyer --- arch/powerpc/kernel/cputable.c | 1 + arch/powerpc/kernel/traps.c | 40 ++++++++++++++++++++++++++++++++++++++++ 2 files changed, 41 insertions(+) (limited to 'arch/powerpc/kernel') diff --git a/arch/powerpc/kernel/cputable.c b/arch/powerpc/kernel/cputable.c index a1d8458..ad6baf8 100644 --- a/arch/powerpc/kernel/cputable.c +++ b/arch/powerpc/kernel/cputable.c @@ -1712,6 +1712,7 @@ static struct cpu_spec __initdata cpu_specs[] = { MMU_FTR_USE_TLBIVAX_BCAST | MMU_FTR_LOCK_BCAST_INVAL, .icache_bsize = 32, .dcache_bsize = 128, + .machine_check = machine_check_47x, .platform = "ppc470", }, { /* default match */ diff --git a/arch/powerpc/kernel/traps.c b/arch/powerpc/kernel/traps.c index 29d128e..ca7ce85 100644 --- a/arch/powerpc/kernel/traps.c +++ b/arch/powerpc/kernel/traps.c @@ -380,6 +380,46 @@ int machine_check_440A(struct pt_regs *regs) } return 0; } + +int machine_check_47x(struct pt_regs *regs) +{ + unsigned long reason = get_mc_reason(regs); + u32 mcsr; + + printk(KERN_ERR "Machine check in kernel mode.\n"); + if (reason & ESR_IMCP) { + printk(KERN_ERR + "Instruction Synchronous Machine Check exception\n"); + mtspr(SPRN_ESR, reason & ~ESR_IMCP); + return 0; + } + mcsr = mfspr(SPRN_MCSR); + if (mcsr & MCSR_IB) + printk(KERN_ERR "Instruction Read PLB Error\n"); + if (mcsr & MCSR_DRB) + printk(KERN_ERR "Data Read PLB Error\n"); + if (mcsr & MCSR_DWB) + printk(KERN_ERR "Data Write PLB Error\n"); + if (mcsr & MCSR_TLBP) + printk(KERN_ERR "TLB Parity Error\n"); + if (mcsr & MCSR_ICP) { + flush_instruction_cache(); + printk(KERN_ERR "I-Cache Parity Error\n"); + } + if (mcsr & MCSR_DCSP) + printk(KERN_ERR "D-Cache Search Parity Error\n"); + if (mcsr & PPC47x_MCSR_GPR) + printk(KERN_ERR "GPR Parity Error\n"); + if (mcsr & PPC47x_MCSR_FPR) + printk(KERN_ERR "FPR Parity Error\n"); + if (mcsr & PPC47x_MCSR_IPR) + printk(KERN_ERR "Machine Check exception is imprecise\n"); + + /* Clear MCSR */ + mtspr(SPRN_MCSR, mcsr); + + return 0; +} #elif defined(CONFIG_E500) int machine_check_e500(struct pt_regs *regs) { -- cgit v1.1 From 221c185d4e11b4061409da5d592779ced484614c Mon Sep 17 00:00:00 2001 From: Dave Kleikamp Date: Fri, 5 Mar 2010 10:43:24 +0000 Subject: powerpc/476: Add isync after loading mmu and debug spr's 476 requires an isync after loading MMU and debug related SPR's. Some of these are in performance-critical paths and may need to be optimized, but initially, we're playing it safe. Signed-off-by: Torez Smith Signed-off-by: Dave Kleikamp Signed-off-by: Josh Boyer --- arch/powerpc/kernel/kprobes.c | 3 +++ arch/powerpc/kernel/process.c | 3 +++ 2 files changed, 6 insertions(+) (limited to 'arch/powerpc/kernel') diff --git a/arch/powerpc/kernel/kprobes.c b/arch/powerpc/kernel/kprobes.c index b36f074..c533525 100644 --- a/arch/powerpc/kernel/kprobes.c +++ b/arch/powerpc/kernel/kprobes.c @@ -114,6 +114,9 @@ static void __kprobes prepare_singlestep(struct kprobe *p, struct pt_regs *regs) #ifdef CONFIG_PPC_ADV_DEBUG_REGS regs->msr &= ~MSR_CE; mtspr(SPRN_DBCR0, mfspr(SPRN_DBCR0) | DBCR0_IC | DBCR0_IDM); +#ifdef CONFIG_PPC_47x + isync(); +#endif #endif /* diff --git a/arch/powerpc/kernel/process.c b/arch/powerpc/kernel/process.c index e4d71ce..9d255b4 100644 --- a/arch/powerpc/kernel/process.c +++ b/arch/powerpc/kernel/process.c @@ -371,6 +371,9 @@ int set_dabr(unsigned long dabr) /* XXX should we have a CPU_FTR_HAS_DABR ? */ #ifdef CONFIG_PPC_ADV_DEBUG_REGS mtspr(SPRN_DAC1, dabr); +#ifdef CONFIG_PPC_47x + isync(); +#endif #elif defined(CONFIG_PPC_BOOK3S) mtspr(SPRN_DABR, dabr); #endif -- cgit v1.1 From b4e8c8dd8456c1d3685fb5b715c9795d250f500e Mon Sep 17 00:00:00 2001 From: Torez Smith Date: Fri, 5 Mar 2010 10:45:54 +0000 Subject: powerpc/4xx: Simple platform for the ISS 4xx simulator This is a trivial 4xx plaform that uses the new simple bsp from Josh and is handy to use in simulators such as ISS or even Mambo who don't properly implement most of the actual devices in the SoC but really only the core. Signed-off-by: Torez Smith Signed-off-by: Dave Kleikamp Signed-off-by: Josh Boyer --- arch/powerpc/kernel/cputable.c | 15 +++++++++++++++ arch/powerpc/kernel/head_44x.S | 2 ++ 2 files changed, 17 insertions(+) (limited to 'arch/powerpc/kernel') diff --git a/arch/powerpc/kernel/cputable.c b/arch/powerpc/kernel/cputable.c index ad6baf8..9556be9 100644 --- a/arch/powerpc/kernel/cputable.c +++ b/arch/powerpc/kernel/cputable.c @@ -1715,6 +1715,21 @@ static struct cpu_spec __initdata cpu_specs[] = { .machine_check = machine_check_47x, .platform = "ppc470", }, + { /* 476 iss */ + .pvr_mask = 0xffff0000, + .pvr_value = 0x00050000, + .cpu_name = "476", + .cpu_features = CPU_FTRS_47X, + .cpu_user_features = COMMON_USER_BOOKE | + PPC_FEATURE_HAS_FPU, + .cpu_user_features = COMMON_USER_BOOKE, + .mmu_features = MMU_FTR_TYPE_47x | + MMU_FTR_USE_TLBIVAX_BCAST | MMU_FTR_LOCK_BCAST_INVAL, + .icache_bsize = 32, + .dcache_bsize = 128, + .machine_check = machine_check_47x, + .platform = "ppc470", + }, { /* default match */ .pvr_mask = 0x00000000, .pvr_value = 0x00000000, diff --git a/arch/powerpc/kernel/head_44x.S b/arch/powerpc/kernel/head_44x.S index 1acd175..5ab484e 100644 --- a/arch/powerpc/kernel/head_44x.S +++ b/arch/powerpc/kernel/head_44x.S @@ -715,6 +715,8 @@ _GLOBAL(init_cpu_state) srwi r3,r3,16 cmplwi cr0,r3,PVR_476@h beq head_start_47x + cmplwi cr0,r3,PVR_476_ISS@h + beq head_start_47x #endif /* CONFIG_PPC_47x */ /* -- cgit v1.1 From d5f86fe3457f48f27eecd40c605e7876d026af7c Mon Sep 17 00:00:00 2001 From: Anton Blanchard Date: Mon, 26 Apr 2010 15:32:33 +0000 Subject: powerpc/cpumask: Convert rtasd to new cpumask API Use cpumask_first, cpumask_next in rtasd code. Signed-off-by: Anton Blanchard Signed-off-by: Benjamin Herrenschmidt --- arch/powerpc/kernel/rtasd.c | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) (limited to 'arch/powerpc/kernel') diff --git a/arch/powerpc/kernel/rtasd.c b/arch/powerpc/kernel/rtasd.c index 4190eae..e907ca6 100644 --- a/arch/powerpc/kernel/rtasd.c +++ b/arch/powerpc/kernel/rtasd.c @@ -411,9 +411,9 @@ static void rtas_event_scan(struct work_struct *w) get_online_cpus(); - cpu = next_cpu(smp_processor_id(), cpu_online_map); - if (cpu == NR_CPUS) { - cpu = first_cpu(cpu_online_map); + cpu = cpumask_next(smp_processor_id(), cpu_online_mask); + if (cpu >= nr_cpu_ids) { + cpu = cpumask_first(cpu_online_mask); if (first_pass) { first_pass = 0; @@ -466,8 +466,8 @@ static void start_event_scan(void) /* Retreive errors from nvram if any */ retreive_nvram_error_log(); - schedule_delayed_work_on(first_cpu(cpu_online_map), &event_scan_work, - event_scan_delay); + schedule_delayed_work_on(cpumask_first(cpu_online_mask), + &event_scan_work, event_scan_delay); } static int __init rtas_init(void) -- cgit v1.1 From bfb9126defa80cbed6d91ed9685b238b0d7e81c4 Mon Sep 17 00:00:00 2001 From: Anton Blanchard Date: Mon, 26 Apr 2010 15:32:34 +0000 Subject: powerpc/cpumask: Convert smp_cpus_done to new cpumask API Use the new cpumask_* functions and dynamically allocate the cpumask in smp_cpus_done. Signed-off-by: Anton Blanchard Signed-off-by: Benjamin Herrenschmidt --- arch/powerpc/kernel/smp.c | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) (limited to 'arch/powerpc/kernel') diff --git a/arch/powerpc/kernel/smp.c b/arch/powerpc/kernel/smp.c index 3fe4de2..17523a0 100644 --- a/arch/powerpc/kernel/smp.c +++ b/arch/powerpc/kernel/smp.c @@ -562,19 +562,22 @@ int setup_profiling_timer(unsigned int multiplier) void __init smp_cpus_done(unsigned int max_cpus) { - cpumask_t old_mask; + cpumask_var_t old_mask; /* We want the setup_cpu() here to be called from CPU 0, but our * init thread may have been "borrowed" by another CPU in the meantime * se we pin us down to CPU 0 for a short while */ - old_mask = current->cpus_allowed; + alloc_cpumask_var(&old_mask, GFP_NOWAIT); + cpumask_copy(old_mask, ¤t->cpus_allowed); set_cpus_allowed_ptr(current, cpumask_of(boot_cpuid)); if (smp_ops && smp_ops->setup_cpu) smp_ops->setup_cpu(boot_cpuid); - set_cpus_allowed_ptr(current, &old_mask); + set_cpus_allowed_ptr(current, old_mask); + + free_cpumask_var(old_mask); snapshot_timebases(); -- cgit v1.1 From b6decb707952c678d110699abb5ed86d45ca6927 Mon Sep 17 00:00:00 2001 From: Anton Blanchard Date: Mon, 26 Apr 2010 15:32:35 +0000 Subject: powerpc/cpumask: Convert fixup_irqs to new cpumask API Use new cpumask_* functions, and dynamically allocate cpumask in fixup_irqs. Signed-off-by: Anton Blanchard Signed-off-by: Benjamin Herrenschmidt --- arch/powerpc/kernel/irq.c | 17 ++++++++++------- arch/powerpc/kernel/smp.c | 4 ++-- 2 files changed, 12 insertions(+), 9 deletions(-) (limited to 'arch/powerpc/kernel') diff --git a/arch/powerpc/kernel/irq.c b/arch/powerpc/kernel/irq.c index 64f6f20..250ee2e 100644 --- a/arch/powerpc/kernel/irq.c +++ b/arch/powerpc/kernel/irq.c @@ -290,30 +290,33 @@ u64 arch_irq_stat_cpu(unsigned int cpu) } #ifdef CONFIG_HOTPLUG_CPU -void fixup_irqs(cpumask_t map) +void fixup_irqs(const struct cpumask *map) { struct irq_desc *desc; unsigned int irq; static int warned; + cpumask_var_t mask; - for_each_irq(irq) { - cpumask_t mask; + alloc_cpumask_var(&mask, GFP_KERNEL); + for_each_irq(irq) { desc = irq_to_desc(irq); if (desc && desc->status & IRQ_PER_CPU) continue; - cpumask_and(&mask, desc->affinity, &map); - if (any_online_cpu(mask) == NR_CPUS) { + cpumask_and(mask, desc->affinity, map); + if (cpumask_any(mask) >= nr_cpu_ids) { printk("Breaking affinity for irq %i\n", irq); - mask = map; + cpumask_copy(mask, map); } if (desc->chip->set_affinity) - desc->chip->set_affinity(irq, &mask); + desc->chip->set_affinity(irq, mask); else if (desc->action && !(warned++)) printk("Cannot set affinity for irq %i\n", irq); } + free_cpumask_var(mask); + local_irq_enable(); mdelay(1); local_irq_disable(); diff --git a/arch/powerpc/kernel/smp.c b/arch/powerpc/kernel/smp.c index 17523a0..62e82c2 100644 --- a/arch/powerpc/kernel/smp.c +++ b/arch/powerpc/kernel/smp.c @@ -313,7 +313,7 @@ int generic_cpu_disable(void) set_cpu_online(cpu, false); #ifdef CONFIG_PPC64 vdso_data->processorCount--; - fixup_irqs(cpu_online_map); + fixup_irqs(cpu_online_mask); #endif return 0; } @@ -333,7 +333,7 @@ int generic_cpu_enable(unsigned int cpu) cpu_relax(); #ifdef CONFIG_PPC64 - fixup_irqs(cpu_online_map); + fixup_irqs(cpu_online_mask); /* counter the irq disable in fixup_irqs */ local_irq_enable(); #endif -- cgit v1.1 From 2c2df038450cbf628426183c9efffc17cfea3406 Mon Sep 17 00:00:00 2001 From: Anton Blanchard Date: Mon, 26 Apr 2010 15:32:39 +0000 Subject: powerpc/cpumask: Refactor /proc/cpuinfo code This separates the per cpu output from the summary output at the end of the file, making it easier to convert to the new cpumask API in a subsequent patch. Signed-off-by: Anton Blanchard Signed-off-by: Benjamin Herrenschmidt --- arch/powerpc/kernel/setup-common.c | 62 ++++++++++++++++++++------------------ 1 file changed, 33 insertions(+), 29 deletions(-) (limited to 'arch/powerpc/kernel') diff --git a/arch/powerpc/kernel/setup-common.c b/arch/powerpc/kernel/setup-common.c index 48f0a00..58699a4 100644 --- a/arch/powerpc/kernel/setup-common.c +++ b/arch/powerpc/kernel/setup-common.c @@ -161,6 +161,38 @@ extern u32 cpu_temp_both(unsigned long cpu); DEFINE_PER_CPU(unsigned int, cpu_pvr); #endif +static void show_cpuinfo_summary(struct seq_file *m) +{ + struct device_node *root; + const char *model = NULL; +#if defined(CONFIG_SMP) && defined(CONFIG_PPC32) + unsigned long bogosum = 0; + int i; + for_each_online_cpu(i) + bogosum += loops_per_jiffy; + seq_printf(m, "total bogomips\t: %lu.%02lu\n", + bogosum/(500000/HZ), bogosum/(5000/HZ) % 100); +#endif /* CONFIG_SMP && CONFIG_PPC32 */ + seq_printf(m, "timebase\t: %lu\n", ppc_tb_freq); + if (ppc_md.name) + seq_printf(m, "platform\t: %s\n", ppc_md.name); + root = of_find_node_by_path("/"); + if (root) + model = of_get_property(root, "model", NULL); + if (model) + seq_printf(m, "model\t\t: %s\n", model); + of_node_put(root); + + if (ppc_md.show_cpuinfo != NULL) + ppc_md.show_cpuinfo(m); + +#ifdef CONFIG_PPC32 + /* Display the amount of memory */ + seq_printf(m, "Memory\t\t: %d MB\n", + (unsigned int)(total_memory / (1024 * 1024))); +#endif +} + static int show_cpuinfo(struct seq_file *m, void *v) { unsigned long cpu_id = (unsigned long)v - 1; @@ -169,35 +201,7 @@ static int show_cpuinfo(struct seq_file *m, void *v) unsigned short min; if (cpu_id == NR_CPUS) { - struct device_node *root; - const char *model = NULL; -#if defined(CONFIG_SMP) && defined(CONFIG_PPC32) - unsigned long bogosum = 0; - int i; - for_each_online_cpu(i) - bogosum += loops_per_jiffy; - seq_printf(m, "total bogomips\t: %lu.%02lu\n", - bogosum/(500000/HZ), bogosum/(5000/HZ) % 100); -#endif /* CONFIG_SMP && CONFIG_PPC32 */ - seq_printf(m, "timebase\t: %lu\n", ppc_tb_freq); - if (ppc_md.name) - seq_printf(m, "platform\t: %s\n", ppc_md.name); - root = of_find_node_by_path("/"); - if (root) - model = of_get_property(root, "model", NULL); - if (model) - seq_printf(m, "model\t\t: %s\n", model); - of_node_put(root); - - if (ppc_md.show_cpuinfo != NULL) - ppc_md.show_cpuinfo(m); - -#ifdef CONFIG_PPC32 - /* Display the amount of memory */ - seq_printf(m, "Memory\t\t: %d MB\n", - (unsigned int)(total_memory / (1024 * 1024))); -#endif - + show_cpuinfo_summary(m); return 0; } -- cgit v1.1 From e6532c63cc3dbefc79936fc9c9c68a151004fe46 Mon Sep 17 00:00:00 2001 From: Anton Blanchard Date: Mon, 26 Apr 2010 15:32:40 +0000 Subject: powerpc/cpumask: Convert /proc/cpuinfo to new cpumask API Use new cpumask API in /proc/cpuinfo code. Signed-off-by: Anton Blanchard Signed-off-by: Benjamin Herrenschmidt --- arch/powerpc/kernel/setup-common.c | 22 +++++++++++++--------- 1 file changed, 13 insertions(+), 9 deletions(-) (limited to 'arch/powerpc/kernel') diff --git a/arch/powerpc/kernel/setup-common.c b/arch/powerpc/kernel/setup-common.c index 58699a4..8dcec47 100644 --- a/arch/powerpc/kernel/setup-common.c +++ b/arch/powerpc/kernel/setup-common.c @@ -200,11 +200,6 @@ static int show_cpuinfo(struct seq_file *m, void *v) unsigned short maj; unsigned short min; - if (cpu_id == NR_CPUS) { - show_cpuinfo_summary(m); - return 0; - } - /* We only show online cpus: disable preempt (overzealous, I * knew) to prevent cpu going down. */ preempt_disable(); @@ -312,19 +307,28 @@ static int show_cpuinfo(struct seq_file *m, void *v) #endif preempt_enable(); + + /* If this is the last cpu, print the summary */ + if (cpumask_next(cpu_id, cpu_online_mask) >= nr_cpu_ids) + show_cpuinfo_summary(m); + return 0; } static void *c_start(struct seq_file *m, loff_t *pos) { - unsigned long i = *pos; - - return i <= NR_CPUS ? (void *)(i + 1) : NULL; + if (*pos == 0) /* just in case, cpu 0 is not the first */ + *pos = cpumask_first(cpu_online_mask); + else + *pos = cpumask_next(*pos - 1, cpu_online_mask); + if ((*pos) < nr_cpu_ids) + return (void *)(unsigned long)(*pos + 1); + return NULL; } static void *c_next(struct seq_file *m, void *v, loff_t *pos) { - ++*pos; + (*pos)++; return c_start(m, pos); } -- cgit v1.1 From cc1ba8ea6dde3f049b2b365d8fdc13976aee25cb Mon Sep 17 00:00:00 2001 From: Anton Blanchard Date: Mon, 26 Apr 2010 15:32:41 +0000 Subject: powerpc/cpumask: Dynamically allocate cpu_sibling_map and cpu_core_map cpumasks Dynamically allocate cpu_sibling_map and cpu_core_map cpumasks. We don't need to set_cpu_online() the boot cpu in smp_prepare_boot_cpu, init/main.c does it for us. We also postpone setting of the boot cpu in cpu_sibling_map and cpu_core_map until when the memory allocator is available (smp_prepare_cpus), similar to x86. Signed-off-by: Anton Blanchard Signed-off-by: Benjamin Herrenschmidt --- arch/powerpc/kernel/smp.c | 42 ++++++++++++++++++++++++------------------ 1 file changed, 24 insertions(+), 18 deletions(-) (limited to 'arch/powerpc/kernel') diff --git a/arch/powerpc/kernel/smp.c b/arch/powerpc/kernel/smp.c index 62e82c2..39babb1 100644 --- a/arch/powerpc/kernel/smp.c +++ b/arch/powerpc/kernel/smp.c @@ -59,8 +59,8 @@ struct thread_info *secondary_ti; -DEFINE_PER_CPU(cpumask_t, cpu_sibling_map) = CPU_MASK_NONE; -DEFINE_PER_CPU(cpumask_t, cpu_core_map) = CPU_MASK_NONE; +DEFINE_PER_CPU(cpumask_var_t, cpu_sibling_map); +DEFINE_PER_CPU(cpumask_var_t, cpu_core_map); EXPORT_PER_CPU_SYMBOL(cpu_sibling_map); EXPORT_PER_CPU_SYMBOL(cpu_core_map); @@ -271,6 +271,16 @@ void __init smp_prepare_cpus(unsigned int max_cpus) smp_store_cpu_info(boot_cpuid); cpu_callin_map[boot_cpuid] = 1; + for_each_possible_cpu(cpu) { + zalloc_cpumask_var_node(&per_cpu(cpu_sibling_map, cpu), + GFP_KERNEL, cpu_to_node(cpu)); + zalloc_cpumask_var_node(&per_cpu(cpu_core_map, cpu), + GFP_KERNEL, cpu_to_node(cpu)); + } + + cpumask_set_cpu(boot_cpuid, cpu_sibling_mask(boot_cpuid)); + cpumask_set_cpu(boot_cpuid, cpu_core_mask(boot_cpuid)); + if (smp_ops) if (smp_ops->probe) max_cpus = smp_ops->probe(); @@ -289,10 +299,6 @@ void __init smp_prepare_cpus(unsigned int max_cpus) void __devinit smp_prepare_boot_cpu(void) { BUG_ON(smp_processor_id() != boot_cpuid); - - set_cpu_online(boot_cpuid, true); - cpu_set(boot_cpuid, per_cpu(cpu_sibling_map, boot_cpuid)); - cpu_set(boot_cpuid, per_cpu(cpu_core_map, boot_cpuid)); #ifdef CONFIG_PPC64 paca[boot_cpuid].__current = current; #endif @@ -525,15 +531,15 @@ int __devinit start_secondary(void *unused) for (i = 0; i < threads_per_core; i++) { if (cpu_is_offline(base + i)) continue; - cpu_set(cpu, per_cpu(cpu_sibling_map, base + i)); - cpu_set(base + i, per_cpu(cpu_sibling_map, cpu)); + cpumask_set_cpu(cpu, cpu_sibling_mask(base + i)); + cpumask_set_cpu(base + i, cpu_sibling_mask(cpu)); /* cpu_core_map should be a superset of * cpu_sibling_map even if we don't have cache * information, so update the former here, too. */ - cpu_set(cpu, per_cpu(cpu_core_map, base +i)); - cpu_set(base + i, per_cpu(cpu_core_map, cpu)); + cpumask_set_cpu(cpu, cpu_core_mask(base + i)); + cpumask_set_cpu(base + i, cpu_core_mask(cpu)); } l2_cache = cpu_to_l2cache(cpu); for_each_online_cpu(i) { @@ -541,8 +547,8 @@ int __devinit start_secondary(void *unused) if (!np) continue; if (np == l2_cache) { - cpu_set(cpu, per_cpu(cpu_core_map, i)); - cpu_set(i, per_cpu(cpu_core_map, cpu)); + cpumask_set_cpu(cpu, cpu_core_mask(i)); + cpumask_set_cpu(i, cpu_core_mask(cpu)); } of_node_put(np); } @@ -602,10 +608,10 @@ int __cpu_disable(void) /* Update sibling maps */ base = cpu_first_thread_in_core(cpu); for (i = 0; i < threads_per_core; i++) { - cpu_clear(cpu, per_cpu(cpu_sibling_map, base + i)); - cpu_clear(base + i, per_cpu(cpu_sibling_map, cpu)); - cpu_clear(cpu, per_cpu(cpu_core_map, base +i)); - cpu_clear(base + i, per_cpu(cpu_core_map, cpu)); + cpumask_clear_cpu(cpu, cpu_sibling_mask(base + i)); + cpumask_clear_cpu(base + i, cpu_sibling_mask(cpu)); + cpumask_clear_cpu(cpu, cpu_core_mask(base + i)); + cpumask_clear_cpu(base + i, cpu_core_mask(cpu)); } l2_cache = cpu_to_l2cache(cpu); @@ -614,8 +620,8 @@ int __cpu_disable(void) if (!np) continue; if (np == l2_cache) { - cpu_clear(cpu, per_cpu(cpu_core_map, i)); - cpu_clear(i, per_cpu(cpu_core_map, cpu)); + cpumask_clear_cpu(cpu, cpu_core_mask(i)); + cpumask_clear_cpu(i, cpu_core_mask(cpu)); } of_node_put(np); } -- cgit v1.1 From 828a69869ba266cabb486a6b59ea8643d56b33ce Mon Sep 17 00:00:00 2001 From: Anton Blanchard Date: Mon, 26 Apr 2010 15:32:44 +0000 Subject: powerpc/cpumask: Update some comments Since the *_map cpumask variants are deprecated, change the comments to instead refer to *_mask. Signed-off-by: Anton Blanchard Signed-off-by: Benjamin Herrenschmidt --- arch/powerpc/kernel/setup-common.c | 6 +++--- arch/powerpc/kernel/smp.c | 2 +- 2 files changed, 4 insertions(+), 4 deletions(-) (limited to 'arch/powerpc/kernel') diff --git a/arch/powerpc/kernel/setup-common.c b/arch/powerpc/kernel/setup-common.c index 8dcec47..5e4d852 100644 --- a/arch/powerpc/kernel/setup-common.c +++ b/arch/powerpc/kernel/setup-common.c @@ -394,14 +394,14 @@ static void __init cpu_init_thread_core_maps(int tpc) /** * setup_cpu_maps - initialize the following cpu maps: - * cpu_possible_map - * cpu_present_map + * cpu_possible_mask + * cpu_present_mask * * Having the possible map set up early allows us to restrict allocations * of things like irqstacks to num_possible_cpus() rather than NR_CPUS. * * We do not initialize the online map here; cpus set their own bits in - * cpu_online_map as they come up. + * cpu_online_mask as they come up. * * This function is valid only for Open Firmware systems. finish_device_tree * must be called before using this. diff --git a/arch/powerpc/kernel/smp.c b/arch/powerpc/kernel/smp.c index 39babb1..bf36616 100644 --- a/arch/powerpc/kernel/smp.c +++ b/arch/powerpc/kernel/smp.c @@ -468,7 +468,7 @@ out: return id; } -/* Must be called when no change can occur to cpu_present_map, +/* Must be called when no change can occur to cpu_present_mask, * i.e. during cpu online or offline. */ static struct device_node *cpu_to_l2cache(int cpu) -- cgit v1.1 From 78e2e68a2b79f394b7cd61e07987a8a89af907f7 Mon Sep 17 00:00:00 2001 From: Li Yang Date: Fri, 7 May 2010 16:38:34 +0800 Subject: powerpc/fsl-booke: Fix InstructionTLBError execute permission check In CONFIG_PTE_64BIT the PTE format has unique permission bits for user and supervisor execute. However on !CONFIG_PTE_64BIT we overload the supervisor bit to imply user execute with _PAGE_USER set. This allows us to use the same permission check mask for user or supervisor code on !CONFIG_PTE_64BIT. However, on CONFIG_PTE_64BIT we map _PAGE_EXEC to _PAGE_BAP_UX so we need a different permission mask based on the fault coming from a kernel address or user space. Without unique permission masks we see issues like the following with modules: Unable to handle kernel paging request for instruction fetch Faulting instruction address: 0xf938d040 Oops: Kernel access of bad area, sig: 11 [#1] Signed-off-by: Li Yang Signed-off-by: Jin Qing Signed-off-by: Kumar Gala --- arch/powerpc/kernel/head_fsl_booke.S | 13 ++++++++++--- 1 file changed, 10 insertions(+), 3 deletions(-) (limited to 'arch/powerpc/kernel') diff --git a/arch/powerpc/kernel/head_fsl_booke.S b/arch/powerpc/kernel/head_fsl_booke.S index 7255265..edd4a57 100644 --- a/arch/powerpc/kernel/head_fsl_booke.S +++ b/arch/powerpc/kernel/head_fsl_booke.S @@ -639,6 +639,13 @@ interrupt_base: rlwinm r12,r12,0,16,1 mtspr SPRN_MAS1,r12 + /* Make up the required permissions for kernel code */ +#ifdef CONFIG_PTE_64BIT + li r13,_PAGE_PRESENT | _PAGE_BAP_SX + oris r13,r13,_PAGE_ACCESSED@h +#else + li r13,_PAGE_PRESENT | _PAGE_ACCESSED | _PAGE_EXEC +#endif b 4f /* Get the PGD for the current thread */ @@ -646,15 +653,15 @@ interrupt_base: mfspr r11,SPRN_SPRG_THREAD lwz r11,PGDIR(r11) -4: - /* Make up the required permissions */ + /* Make up the required permissions for user code */ #ifdef CONFIG_PTE_64BIT - li r13,_PAGE_PRESENT | _PAGE_EXEC + li r13,_PAGE_PRESENT | _PAGE_BAP_UX oris r13,r13,_PAGE_ACCESSED@h #else li r13,_PAGE_PRESENT | _PAGE_ACCESSED | _PAGE_EXEC #endif +4: FIND_PTE andc. r13,r13,r11 /* Check permission */ -- cgit v1.1 From 78f622377f7d31d988db350a43c5689dd5f31876 Mon Sep 17 00:00:00 2001 From: Kumar Gala Date: Thu, 13 May 2010 14:38:21 -0500 Subject: powerpc/fsl-booke: Move loadcam_entry back to asm code to fix SMP ftrace When we build with ftrace enabled its possible that loadcam_entry would have used the stack pointer (even though the code doesn't need it). We call loadcam_entry in __secondary_start before the stack is setup. To ensure that loadcam_entry doesn't use the stack pointer the easiest solution is to just have it in asm code. Signed-off-by: Kumar Gala --- arch/powerpc/kernel/asm-offsets.c | 8 ++++++++ 1 file changed, 8 insertions(+) (limited to 'arch/powerpc/kernel') diff --git a/arch/powerpc/kernel/asm-offsets.c b/arch/powerpc/kernel/asm-offsets.c index 957ceb7..0271b58 100644 --- a/arch/powerpc/kernel/asm-offsets.c +++ b/arch/powerpc/kernel/asm-offsets.c @@ -448,6 +448,14 @@ int main(void) DEFINE(PGD_T_LOG2, PGD_T_LOG2); DEFINE(PTE_T_LOG2, PTE_T_LOG2); #endif +#ifdef CONFIG_FSL_BOOKE + DEFINE(TLBCAM_SIZE, sizeof(struct tlbcam)); + DEFINE(TLBCAM_MAS0, offsetof(struct tlbcam, MAS0)); + DEFINE(TLBCAM_MAS1, offsetof(struct tlbcam, MAS1)); + DEFINE(TLBCAM_MAS2, offsetof(struct tlbcam, MAS2)); + DEFINE(TLBCAM_MAS3, offsetof(struct tlbcam, MAS3)); + DEFINE(TLBCAM_MAS7, offsetof(struct tlbcam, MAS7)); +#endif #ifdef CONFIG_KVM_EXIT_TIMING DEFINE(VCPU_TIMING_EXIT_TBU, offsetof(struct kvm_vcpu, -- cgit v1.1 From 7358650e9e9a81c854dc4582b4193eb5ea500bf6 Mon Sep 17 00:00:00 2001 From: Michael Ellerman Date: Wed, 19 May 2010 02:12:32 +0000 Subject: powerpc/rtasd: Don't start event scan if scan rate is zero There appear to be Pegasos systems which have the rtas-event-scan RTAS tokens, but on which the event scan always fails. They also have an event-scan-rate property containing 0, which means call event scan 0 times per minute. So interpret a scan rate of 0 to mean don't scan at all. This fixes the problem on the Pegasos machines and makes sense as well. Signed-off-by: Michael Ellerman Signed-off-by: Benjamin Herrenschmidt --- arch/powerpc/kernel/rtasd.c | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'arch/powerpc/kernel') diff --git a/arch/powerpc/kernel/rtasd.c b/arch/powerpc/kernel/rtasd.c index e907ca6..638883e 100644 --- a/arch/powerpc/kernel/rtasd.c +++ b/arch/powerpc/kernel/rtasd.c @@ -490,6 +490,12 @@ static int __init rtas_init(void) return -ENODEV; } + if (!rtas_event_scan_rate) { + /* Broken firmware: take a rate of zero to mean don't scan */ + printk(KERN_DEBUG "rtasd: scan rate is 0, not scanning\n"); + return 0; + } + /* Make room for the sequence number */ rtas_error_log_max = rtas_get_error_log_max(); rtas_error_log_buffer_max = rtas_error_log_max + sizeof(int); -- cgit v1.1 From abb17f9c3a92c5acf30e749efdf0419b7f50a5b8 Mon Sep 17 00:00:00 2001 From: Milton Miller Date: Wed, 19 May 2010 02:56:29 +0000 Subject: powerpc: Use common cpu_die (fixes SMP+SUSPEND build) Configuring a powerpc 32 bit kernel for both SMP and SUSPEND turns on CPU_HOTPLUG to enable disable_nonboot_cpus to be called by the common suspend code. Previously the definition of cpu_die for ppc32 was in the powermac platform code, causing it to be undefined if that platform as not selected. arch/powerpc/kernel/built-in.o: In function 'cpu_idle': arch/powerpc/kernel/idle.c:98: undefined reference to 'cpu_die' Move the code from setup_64 to smp.c and rename the power mac versions to their specific names. Note that this does not setup the cpu_die pointers in either smp_ops (request a given cpu die) or ppc_md (make this cpu die), for other platforms but there are generic versions in smp.c. Reported-by: Matt Sealey Reported-by: Kumar Gala Signed-off-by: Milton Miller Signed-off-by: Anton Vorontsov Signed-off-by: Benjamin Herrenschmidt --- arch/powerpc/kernel/setup_64.c | 6 ------ arch/powerpc/kernel/smp.c | 6 ++++++ 2 files changed, 6 insertions(+), 6 deletions(-) (limited to 'arch/powerpc/kernel') diff --git a/arch/powerpc/kernel/setup_64.c b/arch/powerpc/kernel/setup_64.c index 9143891..cea6698 100644 --- a/arch/powerpc/kernel/setup_64.c +++ b/arch/powerpc/kernel/setup_64.c @@ -573,12 +573,6 @@ void ppc64_boot_msg(unsigned int src, const char *msg) printk("[boot]%04x %s\n", src, msg); } -void cpu_die(void) -{ - if (ppc_md.cpu_die) - ppc_md.cpu_die(); -} - #ifdef CONFIG_SMP #define PCPU_DYN_SIZE () diff --git a/arch/powerpc/kernel/smp.c b/arch/powerpc/kernel/smp.c index bf36616..5c196d1 100644 --- a/arch/powerpc/kernel/smp.c +++ b/arch/powerpc/kernel/smp.c @@ -648,4 +648,10 @@ void cpu_hotplug_driver_unlock() { mutex_unlock(&powerpc_cpu_hotplug_driver_mutex); } + +void cpu_die(void) +{ + if (ppc_md.cpu_die) + ppc_md.cpu_die(); +} #endif -- cgit v1.1 From a1263c71448aa70afb6097fdedf93c3dff5a7a15 Mon Sep 17 00:00:00 2001 From: Brian King Date: Fri, 14 May 2010 12:04:41 +0000 Subject: powerpc/vio: Switch VIO Bus PM to use generic helpers Switch to use the generic power management helpers. Signed-off-by: Brian King Signed-off-by: Benjamin Herrenschmidt --- arch/powerpc/kernel/vio.c | 25 +------------------------ 1 file changed, 1 insertion(+), 24 deletions(-) (limited to 'arch/powerpc/kernel') diff --git a/arch/powerpc/kernel/vio.c b/arch/powerpc/kernel/vio.c index b8e311d..9ce7b62 100644 --- a/arch/powerpc/kernel/vio.c +++ b/arch/powerpc/kernel/vio.c @@ -1381,29 +1381,6 @@ static int vio_hotplug(struct device *dev, struct kobj_uevent_env *env) return 0; } -static int vio_pm_suspend(struct device *dev) -{ - const struct dev_pm_ops *pm = dev->driver ? dev->driver->pm : NULL; - - if (pm && pm->suspend) - return pm->suspend(dev); - return 0; -} - -static int vio_pm_resume(struct device *dev) -{ - const struct dev_pm_ops *pm = dev->driver ? dev->driver->pm : NULL; - - if (pm && pm->resume) - return pm->resume(dev); - return 0; -} - -const struct dev_pm_ops vio_dev_pm_ops = { - .suspend = vio_pm_suspend, - .resume = vio_pm_resume, -}; - static struct bus_type vio_bus_type = { .name = "vio", .dev_attrs = vio_dev_attrs, @@ -1411,7 +1388,7 @@ static struct bus_type vio_bus_type = { .match = vio_bus_match, .probe = vio_bus_probe, .remove = vio_bus_remove, - .pm = &vio_dev_pm_ops, + .pm = GENERIC_SUBSYS_PM_OPS, }; /** -- cgit v1.1 From 5b339bdf164d8aee394609768f7e2e4415b0252a Mon Sep 17 00:00:00 2001 From: Sonny Rao Date: Mon, 10 May 2010 15:13:41 +0000 Subject: powerpc/pci: Check devices status property when scanning OF tree We ran into an issue where it looks like we're not properly ignoring a pci device with a non-good status property when we walk the device tree and instanciate the Linux side PCI devices. However, the EEH init code does look for the property and disables EEH on these devices. This leaves us in an inconsistent where we are poking at a supposedly bad piece of hardware and RTAS will block our config cycles because EEH isn't enabled anyway. Signed-of-by: Sonny Rao Signed-off-by: Benjamin Herrenschmidt --- arch/powerpc/kernel/pci_of_scan.c | 2 ++ 1 file changed, 2 insertions(+) (limited to 'arch/powerpc/kernel') diff --git a/arch/powerpc/kernel/pci_of_scan.c b/arch/powerpc/kernel/pci_of_scan.c index cd11d5c..6ddb795f 100644 --- a/arch/powerpc/kernel/pci_of_scan.c +++ b/arch/powerpc/kernel/pci_of_scan.c @@ -310,6 +310,8 @@ static void __devinit __of_scan_bus(struct device_node *node, /* Scan direct children */ for_each_child_of_node(node, child) { pr_debug(" * %s\n", child->full_name); + if (!of_device_is_available(child)) + continue; reg = of_get_property(child, "reg", ®len); if (reg == NULL || reglen < 20) continue; -- cgit v1.1 From 426b6cb478e60352a463a0d1ec75c1c9fab30b13 Mon Sep 17 00:00:00 2001 From: Maxim Uvarov Date: Tue, 11 May 2010 05:41:08 +0000 Subject: powerpc/crashdump: Do not fail on NULL pointer dereferencing Signed-off-by: Maxim Uvarov Signed-off-by: Benjamin Herrenschmidt --- arch/powerpc/kernel/crash.c | 3 +++ 1 file changed, 3 insertions(+) (limited to 'arch/powerpc/kernel') diff --git a/arch/powerpc/kernel/crash.c b/arch/powerpc/kernel/crash.c index 6f4613d..341d8af 100644 --- a/arch/powerpc/kernel/crash.c +++ b/arch/powerpc/kernel/crash.c @@ -375,6 +375,9 @@ void default_machine_crash_shutdown(struct pt_regs *regs) for_each_irq(i) { struct irq_desc *desc = irq_to_desc(i); + if (!desc || !desc->chip || !desc->chip->eoi) + continue; + if (desc->status & IRQ_INPROGRESS) desc->chip->eoi(i); -- cgit v1.1 From 0644079410065567e3bb31fcb8e6441f2b7685a9 Mon Sep 17 00:00:00 2001 From: Anton Blanchard Date: Mon, 10 May 2010 16:25:51 +0000 Subject: powerpc/kdump: CPUs assume the context of the oopsing CPU We wrap the crash_shutdown_handles[] calls with longjmp/setjmp, so if any of them fault we can recover. The problem is we add a hook to the debugger fault handler hook which calls longjmp unconditionally. This first part of kdump is run before we marshall the other CPUs, so there is a very good chance some CPU on the box is going to page fault. And when it does it hits the longjmp code and assumes the context of the oopsing CPU. The machine gets very confused when it has 10 CPUs all with the same stack, all thinking they have the same CPU id. I get even more confused trying to debug it. The patch below adds crash_shutdown_cpu and uses it to specify which cpu is in the protected region. Since it can only be -1 or the oopsing CPU, we don't need to use memory barriers since it is only valid on the local CPU - no other CPU will ever see a value that matches it's local CPU id. Eventually we should switch the order and marshall all CPUs before doing the crash_shutdown_handles[] calls, but that is a bigger fix. Signed-off-by: Anton Blanchard Signed-off-by: Benjamin Herrenschmidt --- arch/powerpc/kernel/crash.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) (limited to 'arch/powerpc/kernel') diff --git a/arch/powerpc/kernel/crash.c b/arch/powerpc/kernel/crash.c index 341d8af..7ced58d 100644 --- a/arch/powerpc/kernel/crash.c +++ b/arch/powerpc/kernel/crash.c @@ -347,10 +347,12 @@ int crash_shutdown_unregister(crash_shutdown_t handler) EXPORT_SYMBOL(crash_shutdown_unregister); static unsigned long crash_shutdown_buf[JMP_BUF_LEN]; +static int crash_shutdown_cpu = -1; static int handle_fault(struct pt_regs *regs) { - longjmp(crash_shutdown_buf, 1); + if (crash_shutdown_cpu == smp_processor_id()) + longjmp(crash_shutdown_buf, 1); return 0; } @@ -391,6 +393,7 @@ void default_machine_crash_shutdown(struct pt_regs *regs) */ old_handler = __debugger_fault_handler; __debugger_fault_handler = handle_fault; + crash_shutdown_cpu = smp_processor_id(); for (i = 0; crash_shutdown_handles[i]; i++) { if (setjmp(crash_shutdown_buf) == 0) { /* @@ -404,6 +407,7 @@ void default_machine_crash_shutdown(struct pt_regs *regs) asm volatile("sync; isync"); } } + crash_shutdown_cpu = -1; __debugger_fault_handler = old_handler; /* -- cgit v1.1 From 5d7a87217de48b234b3c8ff8a73059947d822e07 Mon Sep 17 00:00:00 2001 From: Anton Blanchard Date: Mon, 10 May 2010 16:27:38 +0000 Subject: powerpc/kdump: Use chip->shutdown to disable IRQs I saw this in a kdump kernel: IOMMU table initialized, virtual merging enabled Interrupt 155954 (real) is invalid, disabling it. Interrupt 155953 (real) is invalid, disabling it. ie we took some spurious interrupts. default_machine_crash_shutdown tries to disable all interrupt sources but uses chip->disable which maps to the default action of: static void default_disable(unsigned int irq) { } If we use chip->shutdown, then we actually mask the IRQ: static void default_shutdown(unsigned int irq) { struct irq_desc *desc = irq_to_desc(irq); desc->chip->mask(irq); desc->status |= IRQ_MASKED; } Not sure why we don't implement a ->disable action for xics.c, or why default_disable doesn't mask the interrupt. Signed-off-by: Anton Blanchard Signed-off-by: Benjamin Herrenschmidt --- arch/powerpc/kernel/crash.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch/powerpc/kernel') diff --git a/arch/powerpc/kernel/crash.c b/arch/powerpc/kernel/crash.c index 7ced58d..cca7c8f 100644 --- a/arch/powerpc/kernel/crash.c +++ b/arch/powerpc/kernel/crash.c @@ -384,7 +384,7 @@ void default_machine_crash_shutdown(struct pt_regs *regs) desc->chip->eoi(i); if (!(desc->status & IRQ_DISABLED)) - desc->chip->disable(i); + desc->chip->shutdown(i); } /* -- cgit v1.1 From 095c7965f4dc870ed2b65143b1e2610de653416c Mon Sep 17 00:00:00 2001 From: Anton Blanchard Date: Mon, 10 May 2010 18:59:18 +0000 Subject: powerpc: Use more accurate limit for first segment memory allocations Author: Milton Miller On large machines we are running out of room below 256MB. In some cases we only need to ensure the allocation is in the first segment, which may be 256MB or 1TB. Add slb0_limit and use it to specify the upper limit for the irqstack and emergency stacks. On a large ppc64 box, this fixes a panic at boot when the crashkernel= option is specified (previously we would run out of memory below 256MB). Signed-off-by: Milton Miller Signed-off-by: Anton Blanchard Signed-off-by: Benjamin Herrenschmidt --- arch/powerpc/kernel/setup_64.c | 17 +++++++++++++---- 1 file changed, 13 insertions(+), 4 deletions(-) (limited to 'arch/powerpc/kernel') diff --git a/arch/powerpc/kernel/setup_64.c b/arch/powerpc/kernel/setup_64.c index cea6698..f3fb5a7 100644 --- a/arch/powerpc/kernel/setup_64.c +++ b/arch/powerpc/kernel/setup_64.c @@ -424,9 +424,18 @@ void __init setup_system(void) DBG(" <- setup_system()\n"); } +static u64 slb0_limit(void) +{ + if (cpu_has_feature(CPU_FTR_1T_SEGMENT)) { + return 1UL << SID_SHIFT_1T; + } + return 1UL << SID_SHIFT; +} + #ifdef CONFIG_IRQSTACKS static void __init irqstack_early_init(void) { + u64 limit = slb0_limit(); unsigned int i; /* @@ -436,10 +445,10 @@ static void __init irqstack_early_init(void) for_each_possible_cpu(i) { softirq_ctx[i] = (struct thread_info *) __va(lmb_alloc_base(THREAD_SIZE, - THREAD_SIZE, 0x10000000)); + THREAD_SIZE, limit)); hardirq_ctx[i] = (struct thread_info *) __va(lmb_alloc_base(THREAD_SIZE, - THREAD_SIZE, 0x10000000)); + THREAD_SIZE, limit)); } } #else @@ -470,7 +479,7 @@ static void __init exc_lvl_early_init(void) */ static void __init emergency_stack_init(void) { - unsigned long limit; + u64 limit; unsigned int i; /* @@ -482,7 +491,7 @@ static void __init emergency_stack_init(void) * bringup, we need to get at them in real mode. This means they * must also be within the RMO region. */ - limit = min(0x10000000ULL, lmb.rmo_size); + limit = min(slb0_limit(), lmb.rmo_size); for_each_possible_cpu(i) { unsigned long sp; -- cgit v1.1 From 1fc711f7ffb01089efc58042cfdbac8573d1b59a Mon Sep 17 00:00:00 2001 From: Michael Neuling Date: Thu, 13 May 2010 19:40:11 +0000 Subject: powerpc/kexec: Fix race in kexec shutdown In kexec_prepare_cpus, the primary CPU IPIs the secondary CPUs to kexec_smp_down(). kexec_smp_down() calls kexec_smp_wait() which sets the hw_cpu_id() to -1. The primary does this while leaving IRQs on which means the primary can take a timer interrupt which can lead to the IPIing one of the secondary CPUs (say, for a scheduler re-balance) but since the secondary CPU now has a hw_cpu_id = -1, we IPI CPU -1... Kaboom! We are hitting this case regularly on POWER7 machines. There is also a second race, where the primary will tear down the MMU mappings before knowing the secondaries have entered real mode. Also, the secondaries are clearing out any pending IPIs before guaranteeing that no more will be received. This changes kexec_prepare_cpus() so that we turn off IRQs in the primary CPU much earlier. It adds a paca flag to say that the secondaries have entered the kexec_smp_down() IPI and turned off IRQs, rather than overloading hw_cpu_id with -1. This new paca flag is again used to in indicate when the secondaries has entered real mode. It also ensures that all CPUs have their IRQs off before we clear out any pending IPI requests (in kexec_cpu_down()) to ensure there are no trailing IPIs left unacknowledged. Signed-off-by: Michael Neuling Signed-off-by: Benjamin Herrenschmidt --- arch/powerpc/kernel/asm-offsets.c | 1 + arch/powerpc/kernel/machine_kexec_64.c | 48 ++++++++++++++++++++++++---------- arch/powerpc/kernel/misc_64.S | 8 +++--- arch/powerpc/kernel/paca.c | 2 ++ 4 files changed, 42 insertions(+), 17 deletions(-) (limited to 'arch/powerpc/kernel') diff --git a/arch/powerpc/kernel/asm-offsets.c b/arch/powerpc/kernel/asm-offsets.c index 0271b58..1b784ff9 100644 --- a/arch/powerpc/kernel/asm-offsets.c +++ b/arch/powerpc/kernel/asm-offsets.c @@ -184,6 +184,7 @@ int main(void) #endif /* CONFIG_PPC_STD_MMU_64 */ DEFINE(PACAEMERGSP, offsetof(struct paca_struct, emergency_sp)); DEFINE(PACAHWCPUID, offsetof(struct paca_struct, hw_cpu_id)); + DEFINE(PACAKEXECSTATE, offsetof(struct paca_struct, kexec_state)); DEFINE(PACA_STARTPURR, offsetof(struct paca_struct, startpurr)); DEFINE(PACA_STARTSPURR, offsetof(struct paca_struct, startspurr)); DEFINE(PACA_USER_TIME, offsetof(struct paca_struct, user_time)); diff --git a/arch/powerpc/kernel/machine_kexec_64.c b/arch/powerpc/kernel/machine_kexec_64.c index 040bd1d..26f9900 100644 --- a/arch/powerpc/kernel/machine_kexec_64.c +++ b/arch/powerpc/kernel/machine_kexec_64.c @@ -155,33 +155,38 @@ void kexec_copy_flush(struct kimage *image) #ifdef CONFIG_SMP -/* FIXME: we should schedule this function to be called on all cpus based - * on calling the interrupts, but we would like to call it off irq level - * so that the interrupt controller is clean. - */ +static int kexec_all_irq_disabled = 0; + static void kexec_smp_down(void *arg) { + local_irq_disable(); + mb(); /* make sure our irqs are disabled before we say they are */ + get_paca()->kexec_state = KEXEC_STATE_IRQS_OFF; + while(kexec_all_irq_disabled == 0) + cpu_relax(); + mb(); /* make sure all irqs are disabled before this */ + /* + * Now every CPU has IRQs off, we can clear out any pending + * IPIs and be sure that no more will come in after this. + */ if (ppc_md.kexec_cpu_down) ppc_md.kexec_cpu_down(0, 1); - local_irq_disable(); kexec_smp_wait(); /* NOTREACHED */ } -static void kexec_prepare_cpus(void) +static void kexec_prepare_cpus_wait(int wait_state) { int my_cpu, i, notified=-1; - smp_call_function(kexec_smp_down, NULL, /* wait */0); my_cpu = get_cpu(); - - /* check the others cpus are now down (via paca hw cpu id == -1) */ + /* Make sure each CPU has atleast made it to the state we need */ for (i=0; i < NR_CPUS; i++) { if (i == my_cpu) continue; - while (paca[i].hw_cpu_id != -1) { + while (paca[i].kexec_state < wait_state) { barrier(); if (!cpu_possible(i)) { printk("kexec: cpu %d hw_cpu_id %d is not" @@ -201,20 +206,35 @@ static void kexec_prepare_cpus(void) } if (i != notified) { printk( "kexec: waiting for cpu %d (physical" - " %d) to go down\n", - i, paca[i].hw_cpu_id); + " %d) to enter %i state\n", + i, paca[i].hw_cpu_id, wait_state); notified = i; } } } + mb(); +} + +static void kexec_prepare_cpus(void) +{ + + smp_call_function(kexec_smp_down, NULL, /* wait */0); + local_irq_disable(); + mb(); /* make sure IRQs are disabled before we say they are */ + get_paca()->kexec_state = KEXEC_STATE_IRQS_OFF; + + kexec_prepare_cpus_wait(KEXEC_STATE_IRQS_OFF); + /* we are sure every CPU has IRQs off at this point */ + kexec_all_irq_disabled = 1; /* after we tell the others to go down */ if (ppc_md.kexec_cpu_down) ppc_md.kexec_cpu_down(0, 0); - put_cpu(); + /* Before removing MMU mapings make sure all CPUs have entered real mode */ + kexec_prepare_cpus_wait(KEXEC_STATE_REAL_MODE); - local_irq_disable(); + put_cpu(); } #else /* ! SMP */ diff --git a/arch/powerpc/kernel/misc_64.S b/arch/powerpc/kernel/misc_64.S index a5cf9c1..a2b18df 100644 --- a/arch/powerpc/kernel/misc_64.S +++ b/arch/powerpc/kernel/misc_64.S @@ -24,6 +24,7 @@ #include #include #include +#include .text @@ -471,6 +472,10 @@ _GLOBAL(kexec_wait) 1: mflr r5 addi r5,r5,kexec_flag-1b + li r4,KEXEC_STATE_REAL_MODE + stb r4,PACAKEXECSTATE(r13) + SYNC + 99: HMT_LOW #ifdef CONFIG_KEXEC /* use no memory without kexec */ lwz r4,0(r5) @@ -494,14 +499,11 @@ kexec_flag: * note: this is a terminal routine, it does not save lr * * get phys id from paca - * set paca id to -1 to say we got here * switch to real mode * join other cpus in kexec_wait(phys_id) */ _GLOBAL(kexec_smp_wait) lhz r3,PACAHWCPUID(r13) - li r4,-1 - sth r4,PACAHWCPUID(r13) /* let others know we left */ bl real_mode b .kexec_wait diff --git a/arch/powerpc/kernel/paca.c b/arch/powerpc/kernel/paca.c index 0c40c6f..f88acf0 100644 --- a/arch/powerpc/kernel/paca.c +++ b/arch/powerpc/kernel/paca.c @@ -18,6 +18,7 @@ #include #include #include +#include /* This symbol is provided by the linker - let it fill in the paca * field correctly */ @@ -97,6 +98,7 @@ void __init initialise_paca(struct paca_struct *new_paca, int cpu) new_paca->kernelbase = (unsigned long) _stext; new_paca->kernel_msr = MSR_KERNEL; new_paca->hw_cpu_id = 0xffff; + new_paca->kexec_state = KEXEC_STATE_NONE; new_paca->__current = &init_task; #ifdef CONFIG_PPC_STD_MMU_64 new_paca->slb_shadow_ptr = &slb_shadow[cpu]; -- cgit v1.1 From 60adec6226bbcf061d4c2d10944fced209d1847d Mon Sep 17 00:00:00 2001 From: Michael Neuling Date: Thu, 13 May 2010 19:40:11 +0000 Subject: powerpc/kdump: Fix race in kdump shutdown When we are crashing, the crashing/primary CPU IPIs the secondaries to turn off IRQs, go into real mode and wait in kexec_wait. While this is happening, the primary tears down all the MMU maps. Unfortunately the primary doesn't check to make sure the secondaries have entered real mode before doing this. On PHYP machines, the secondaries can take a long time shutting down the IRQ controller as RTAS calls are need. These RTAS calls need to be serialised which resilts in the secondaries contending in lock_rtas() and hence taking a long time to shut down. We've hit this on large POWER7 machines, where some secondaries are still waiting in lock_rtas(), when the primary tears down the HPTEs. This patch makes sure all secondaries are in real mode before the primary tears down the MMU. It uses the new kexec_state entry in the paca. It times out if the secondaries don't reach real mode after 10sec. Signed-off-by: Michael Neuling Signed-off-by: Benjamin Herrenschmidt --- arch/powerpc/kernel/crash.c | 27 +++++++++++++++++++++++++++ 1 file changed, 27 insertions(+) (limited to 'arch/powerpc/kernel') diff --git a/arch/powerpc/kernel/crash.c b/arch/powerpc/kernel/crash.c index cca7c8f..8c066d6 100644 --- a/arch/powerpc/kernel/crash.c +++ b/arch/powerpc/kernel/crash.c @@ -162,6 +162,32 @@ static void crash_kexec_prepare_cpus(int cpu) /* Leave the IPI callback set */ } +/* wait for all the CPUs to hit real mode but timeout if they don't come in */ +static void crash_kexec_wait_realmode(int cpu) +{ + unsigned int msecs; + int i; + + msecs = 10000; + for (i=0; i < NR_CPUS && msecs > 0; i++) { + if (i == cpu) + continue; + + while (paca[i].kexec_state < KEXEC_STATE_REAL_MODE) { + barrier(); + if (!cpu_possible(i)) { + break; + } + if (!cpu_online(i)) { + break; + } + msecs--; + mdelay(1); + } + } + mb(); +} + /* * This function will be called by secondary cpus or by kexec cpu * if soft-reset is activated to stop some CPUs. @@ -419,6 +445,7 @@ void default_machine_crash_shutdown(struct pt_regs *regs) crash_kexec_prepare_cpus(crashing_cpu); cpu_set(crashing_cpu, cpus_in_crash); crash_kexec_stop_spus(); + crash_kexec_wait_realmode(crashing_cpu); if (ppc_md.kexec_cpu_down) ppc_md.kexec_cpu_down(1, 0); } -- cgit v1.1 From dd04c63c96425af9b6741f3abf0ad25d6b1c0e8d Mon Sep 17 00:00:00 2001 From: Anton Blanchard Date: Sun, 16 May 2010 20:01:28 +0000 Subject: powerpc: Remove check of ibm,smt-snooze-delay OF property I'm not sure why we have code for parsing an ibm,smt-snooze-delay OF property. Since we have a smt-snooze-delay= boot option and we can also set it at runtime via sysfs, it should be safe to get rid of this code. Signed-off-by: Anton Blanchard Signed-off-by: Benjamin Herrenschmidt --- arch/powerpc/kernel/sysfs.c | 29 ----------------------------- 1 file changed, 29 deletions(-) (limited to 'arch/powerpc/kernel') diff --git a/arch/powerpc/kernel/sysfs.c b/arch/powerpc/kernel/sysfs.c index e235e52..158fb73 100644 --- a/arch/powerpc/kernel/sysfs.c +++ b/arch/powerpc/kernel/sysfs.c @@ -67,33 +67,6 @@ static ssize_t show_smt_snooze_delay(struct sys_device *dev, static SYSDEV_ATTR(smt_snooze_delay, 0644, show_smt_snooze_delay, store_smt_snooze_delay); -/* Only parse OF options if the matching cmdline option was not specified */ -static int smt_snooze_cmdline; - -static int __init smt_setup(void) -{ - struct device_node *options; - const unsigned int *val; - unsigned int cpu; - - if (!cpu_has_feature(CPU_FTR_SMT)) - return -ENODEV; - - options = of_find_node_by_path("/options"); - if (!options) - return -ENODEV; - - val = of_get_property(options, "ibm,smt-snooze-delay", NULL); - if (!smt_snooze_cmdline && val) { - for_each_possible_cpu(cpu) - per_cpu(smt_snooze_delay, cpu) = *val; - } - - of_node_put(options); - return 0; -} -__initcall(smt_setup); - static int __init setup_smt_snooze_delay(char *str) { unsigned int cpu; @@ -102,8 +75,6 @@ static int __init setup_smt_snooze_delay(char *str) if (!cpu_has_feature(CPU_FTR_SMT)) return 1; - smt_snooze_cmdline = 1; - if (get_option(&str, &snooze)) { for_each_possible_cpu(cpu) per_cpu(smt_snooze_delay, cpu) = snooze; -- cgit v1.1 From b878dc00595440586874952dd85ce9b803360b87 Mon Sep 17 00:00:00 2001 From: Anton Blanchard Date: Sun, 16 May 2010 20:02:39 +0000 Subject: powerpc: Use smt_snooze_delay=-1 to always busy loop Right now if we want to busy loop and not give up any time to the hypervisor we put a very large value into smt_snooze_delay. This is sometimes useful when running a single partition and you want to avoid any latencies due to the hypervisor or CPU power state transitions. While this works, it's a bit ugly - how big a number is enough now we have NO_HZ and can be idle for a very long time. The patch below makes smt_snooze_delay signed, and a negative value means loop forever: echo -1 > /sys/devices/system/cpu/cpu0/smt_snooze_delay This change shouldn't affect the existing userspace tools (eg ppc64_cpu), but I'm cc-ing Nathan just to be sure. Signed-off-by: Anton Blanchard Signed-off-by: Benjamin Herrenschmidt --- arch/powerpc/kernel/sysfs.c | 17 ++++++++--------- 1 file changed, 8 insertions(+), 9 deletions(-) (limited to 'arch/powerpc/kernel') diff --git a/arch/powerpc/kernel/sysfs.c b/arch/powerpc/kernel/sysfs.c index 158fb73..c0d8c20 100644 --- a/arch/powerpc/kernel/sysfs.c +++ b/arch/powerpc/kernel/sysfs.c @@ -35,7 +35,7 @@ static DEFINE_PER_CPU(struct cpu, cpu_devices); #ifdef CONFIG_PPC64 /* Time in microseconds we delay before sleeping in the idle loop */ -DEFINE_PER_CPU(unsigned long, smt_snooze_delay) = { 100 }; +DEFINE_PER_CPU(long, smt_snooze_delay) = { 100 }; static ssize_t store_smt_snooze_delay(struct sys_device *dev, struct sysdev_attribute *attr, @@ -44,9 +44,9 @@ static ssize_t store_smt_snooze_delay(struct sys_device *dev, { struct cpu *cpu = container_of(dev, struct cpu, sysdev); ssize_t ret; - unsigned long snooze; + long snooze; - ret = sscanf(buf, "%lu", &snooze); + ret = sscanf(buf, "%ld", &snooze); if (ret != 1) return -EINVAL; @@ -61,7 +61,7 @@ static ssize_t show_smt_snooze_delay(struct sys_device *dev, { struct cpu *cpu = container_of(dev, struct cpu, sysdev); - return sprintf(buf, "%lu\n", per_cpu(smt_snooze_delay, cpu->sysdev.id)); + return sprintf(buf, "%ld\n", per_cpu(smt_snooze_delay, cpu->sysdev.id)); } static SYSDEV_ATTR(smt_snooze_delay, 0644, show_smt_snooze_delay, @@ -70,15 +70,14 @@ static SYSDEV_ATTR(smt_snooze_delay, 0644, show_smt_snooze_delay, static int __init setup_smt_snooze_delay(char *str) { unsigned int cpu; - int snooze; + long snooze; if (!cpu_has_feature(CPU_FTR_SMT)) return 1; - if (get_option(&str, &snooze)) { - for_each_possible_cpu(cpu) - per_cpu(smt_snooze_delay, cpu) = snooze; - } + snooze = simple_strtol(str, NULL, 10); + for_each_possible_cpu(cpu) + per_cpu(smt_snooze_delay, cpu) = snooze; return 1; } -- cgit v1.1 From 99ec28f183daa450faa7bdad6f932364ae325648 Mon Sep 17 00:00:00 2001 From: FUJITA Tomonori Date: Sun, 9 May 2010 17:39:05 +0000 Subject: powerpc: Remove unused 'protect4gb' boot parameter 'protect4gb' boot parameter was introduced to avoid allocating dma space acrossing 4GB boundary in 2007 (the commit 569975591c5530fdc9c7a3c45122e5e46f075a74). In 2008, the IOMMU was fixed to use the boundary_mask parameter per device properly. So 'protect4gb' workaround was removed (the 383af9525bb27f927511874f6306247ec13f1c28). But somehow I messed the 'protect4gb' boot parameter that was used to enable the workaround. Signed-off-by: FUJITA Tomonori Signed-off-by: Benjamin Herrenschmidt --- arch/powerpc/kernel/iommu.c | 12 ------------ 1 file changed, 12 deletions(-) (limited to 'arch/powerpc/kernel') diff --git a/arch/powerpc/kernel/iommu.c b/arch/powerpc/kernel/iommu.c index ec94f90..d583917 100644 --- a/arch/powerpc/kernel/iommu.c +++ b/arch/powerpc/kernel/iommu.c @@ -43,20 +43,9 @@ #define DBG(...) static int novmerge; -static int protect4gb = 1; static void __iommu_free(struct iommu_table *, dma_addr_t, unsigned int); -static int __init setup_protect4gb(char *str) -{ - if (strcmp(str, "on") == 0) - protect4gb = 1; - else if (strcmp(str, "off") == 0) - protect4gb = 0; - - return 1; -} - static int __init setup_iommu(char *str) { if (!strcmp(str, "novmerge")) @@ -66,7 +55,6 @@ static int __init setup_iommu(char *str) return 1; } -__setup("protect4gb=", setup_protect4gb); __setup("iommu=", setup_iommu); static unsigned long iommu_range_alloc(struct device *dev, -- cgit v1.1