From 3dc68d9b58ae644cee8e218e3dcde0dceb5c47a3 Mon Sep 17 00:00:00 2001 From: Andrew Morton Date: Wed, 2 May 2007 19:27:04 +0200 Subject: [PATCH] x86-64: revert x86_64-mm-add-genapic_force This is obsoleted by new Ingo genapic patches. Cc: Suresh Siddha Cc: Andi Kleen Cc: "Li, Shaohua" Cc: Ingo Molnar Signed-off-by: Andrew Morton Signed-off-by: Andi Kleen --- include/asm-x86_64/genapic.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/asm-x86_64/genapic.h b/include/asm-x86_64/genapic.h index b80f4bb..a0e9a4b 100644 --- a/include/asm-x86_64/genapic.h +++ b/include/asm-x86_64/genapic.h @@ -30,6 +30,6 @@ struct genapic { }; -extern struct genapic *genapic, *genapic_force, apic_flat; +extern struct genapic *genapic; #endif -- cgit v1.1 From a86f34b49f32b238d16b2e3bf6c9a5391a3f683f Mon Sep 17 00:00:00 2001 From: Andrew Morton Date: Wed, 2 May 2007 19:27:04 +0200 Subject: [PATCH] x86: revert x86_64-mm-fix-the-irqbalance-quirk-for-e7320-e7520-e7525 Obsoleted by Ingo's genapic stuff. Cc: Ingo Molnar Cc: Suresh Siddha Cc: Andi Kleen Cc: "Li, Shaohua" Signed-off-by: Andrew Morton Signed-off-by: Andi Kleen --- include/asm-i386/genapic.h | 2 +- include/asm-i386/irq.h | 2 -- include/asm-x86_64/proto.h | 1 - 3 files changed, 1 insertion(+), 4 deletions(-) (limited to 'include') diff --git a/include/asm-i386/genapic.h b/include/asm-i386/genapic.h index fd2be59..8ffbb0f 100644 --- a/include/asm-i386/genapic.h +++ b/include/asm-i386/genapic.h @@ -122,6 +122,6 @@ struct genapic { APICFUNC(phys_pkg_id) \ } -extern struct genapic *genapic, apic_default; +extern struct genapic *genapic; #endif diff --git a/include/asm-i386/irq.h b/include/asm-i386/irq.h index 11761cd..9e15ce0 100644 --- a/include/asm-i386/irq.h +++ b/include/asm-i386/irq.h @@ -37,8 +37,6 @@ static __inline__ int irq_canonicalize(int irq) extern int irqbalance_disable(char *str); #endif -extern void quirk_intel_irqbalance(void); - #ifdef CONFIG_HOTPLUG_CPU extern void fixup_irqs(cpumask_t map); #endif diff --git a/include/asm-x86_64/proto.h b/include/asm-x86_64/proto.h index b6e65a6..6688cf9 100644 --- a/include/asm-x86_64/proto.h +++ b/include/asm-x86_64/proto.h @@ -82,7 +82,6 @@ extern void syscall32_cpu_init(void); extern void setup_node_bootmem(int nodeid, unsigned long start, unsigned long end); extern void early_quirks(void); -extern void quirk_intel_irqbalance(void); extern void check_efer(void); extern int unhandled_signal(struct task_struct *tsk, int sig); -- cgit v1.1 From 07c7c4744400f93a7c52b32159c31d823e1747a5 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Wed, 2 May 2007 19:27:04 +0200 Subject: [PATCH] x86-64: always use physical delivery mode on > 8 CPUs Remove clustered APIC mode. There's little point in the use of clustered APIC mode, broadcasting is limited to within the cluster only, and chipsets have bugs in this area as well. So default to physical APIC mode when the CPU count is large, and default to logical APIC mode when the CPU count is 8 or smaller. (this patch only removes the use of genapic_cluster and cleans up the resulting genapic.c file - removal of all remaining traces of clustered mode will be done by another patch.) Signed-off-by: Ingo Molnar Signed-off-by: Andi Kleen Cc: Suresh Siddha Cc: Andi Kleen Cc: "Li, Shaohua" Cc: "Eric W. Biederman" Signed-off-by: Andrew Morton --- include/asm-x86_64/genapic.h | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/asm-x86_64/genapic.h b/include/asm-x86_64/genapic.h index a0e9a4b..d7e516c 100644 --- a/include/asm-x86_64/genapic.h +++ b/include/asm-x86_64/genapic.h @@ -29,7 +29,9 @@ struct genapic { unsigned int (*phys_pkg_id)(int index_msb); }; - extern struct genapic *genapic; +extern struct genapic apic_flat; +extern struct genapic apic_physflat; + #endif -- cgit v1.1 From 3c43f03908de98fa8f7a9e8fc9411ebf4c2de298 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Wed, 2 May 2007 19:27:04 +0200 Subject: [PATCH] x86: default to physical mode on hotplug CPU kernels Default to physical mode on hotplug CPU kernels. Furher simplify and clean up the APIC initialization code. Signed-off-by: Ingo Molnar Signed-off-by: Andi Kleen Cc: Suresh Siddha Cc: Andi Kleen Cc: "Li, Shaohua" Cc: "Eric W. Biederman" Signed-off-by: Andrew Morton --- include/asm-i386/genapic.h | 4 ++-- include/asm-i386/mach-bigsmp/mach_apic.h | 2 +- include/asm-i386/mach-default/mach_apic.h | 2 +- include/asm-i386/mach-es7000/mach_apic.h | 2 +- include/asm-i386/mach-generic/mach_apic.h | 2 +- include/asm-i386/mach-numaq/mach_apic.h | 2 +- include/asm-i386/mach-summit/mach_apic.h | 2 +- include/asm-i386/mach-visws/mach_apic.h | 2 +- include/asm-x86_64/apic.h | 2 +- 9 files changed, 10 insertions(+), 10 deletions(-) (limited to 'include') diff --git a/include/asm-i386/genapic.h b/include/asm-i386/genapic.h index 8ffbb0f..33e3ffe 100644 --- a/include/asm-i386/genapic.h +++ b/include/asm-i386/genapic.h @@ -36,7 +36,7 @@ struct genapic { void (*init_apic_ldr)(void); physid_mask_t (*ioapic_phys_id_map)(physid_mask_t map); - void (*clustered_apic_check)(void); + void (*setup_apic_routing)(void); int (*multi_timer_check)(int apic, int irq); int (*apicid_to_node)(int logical_apicid); int (*cpu_to_logical_apicid)(int cpu); @@ -99,7 +99,7 @@ struct genapic { APICFUNC(check_apicid_present) \ APICFUNC(init_apic_ldr) \ APICFUNC(ioapic_phys_id_map) \ - APICFUNC(clustered_apic_check) \ + APICFUNC(setup_apic_routing) \ APICFUNC(multi_timer_check) \ APICFUNC(apicid_to_node) \ APICFUNC(cpu_to_logical_apicid) \ diff --git a/include/asm-i386/mach-bigsmp/mach_apic.h b/include/asm-i386/mach-bigsmp/mach_apic.h index 18b19a7..ebd319f 100644 --- a/include/asm-i386/mach-bigsmp/mach_apic.h +++ b/include/asm-i386/mach-bigsmp/mach_apic.h @@ -71,7 +71,7 @@ static inline void init_apic_ldr(void) apic_write_around(APIC_LDR, val); } -static inline void clustered_apic_check(void) +static inline void setup_apic_routing(void) { printk("Enabling APIC mode: %s. Using %d I/O APICs\n", "Physflat", nr_ioapics); diff --git a/include/asm-i386/mach-default/mach_apic.h b/include/asm-i386/mach-default/mach_apic.h index 3ef6292..6db1c3b 100644 --- a/include/asm-i386/mach-default/mach_apic.h +++ b/include/asm-i386/mach-default/mach_apic.h @@ -54,7 +54,7 @@ static inline physid_mask_t ioapic_phys_id_map(physid_mask_t phys_map) return phys_map; } -static inline void clustered_apic_check(void) +static inline void setup_apic_routing(void) { printk("Enabling APIC mode: %s. Using %d I/O APICs\n", "Flat", nr_ioapics); diff --git a/include/asm-i386/mach-es7000/mach_apic.h b/include/asm-i386/mach-es7000/mach_apic.h index 2633368..8e8b394 100644 --- a/include/asm-i386/mach-es7000/mach_apic.h +++ b/include/asm-i386/mach-es7000/mach_apic.h @@ -81,7 +81,7 @@ static inline void enable_apic_mode(void) } extern int apic_version [MAX_APICS]; -static inline void clustered_apic_check(void) +static inline void setup_apic_routing(void) { int apic = bios_cpu_apicid[smp_processor_id()]; printk("Enabling APIC mode: %s. Using %d I/O APICs, target cpus %lx\n", diff --git a/include/asm-i386/mach-generic/mach_apic.h b/include/asm-i386/mach-generic/mach_apic.h index d9dc039..a236e70 100644 --- a/include/asm-i386/mach-generic/mach_apic.h +++ b/include/asm-i386/mach-generic/mach_apic.h @@ -13,7 +13,7 @@ #define apic_id_registered (genapic->apic_id_registered) #define init_apic_ldr (genapic->init_apic_ldr) #define ioapic_phys_id_map (genapic->ioapic_phys_id_map) -#define clustered_apic_check (genapic->clustered_apic_check) +#define setup_apic_routing (genapic->setup_apic_routing) #define multi_timer_check (genapic->multi_timer_check) #define apicid_to_node (genapic->apicid_to_node) #define cpu_to_logical_apicid (genapic->cpu_to_logical_apicid) diff --git a/include/asm-i386/mach-numaq/mach_apic.h b/include/asm-i386/mach-numaq/mach_apic.h index 9d15809..5e5e7dd 100644 --- a/include/asm-i386/mach-numaq/mach_apic.h +++ b/include/asm-i386/mach-numaq/mach_apic.h @@ -34,7 +34,7 @@ static inline void init_apic_ldr(void) /* Already done in NUMA-Q firmware */ } -static inline void clustered_apic_check(void) +static inline void setup_apic_routing(void) { printk("Enabling APIC mode: %s. Using %d I/O APICs\n", "NUMA-Q", nr_ioapics); diff --git a/include/asm-i386/mach-summit/mach_apic.h b/include/asm-i386/mach-summit/mach_apic.h index 43e5bd8..732f776 100644 --- a/include/asm-i386/mach-summit/mach_apic.h +++ b/include/asm-i386/mach-summit/mach_apic.h @@ -80,7 +80,7 @@ static inline int apic_id_registered(void) return 1; } -static inline void clustered_apic_check(void) +static inline void setup_apic_routing(void) { printk("Enabling APIC mode: Summit. Using %d I/O APICs\n", nr_ioapics); diff --git a/include/asm-i386/mach-visws/mach_apic.h b/include/asm-i386/mach-visws/mach_apic.h index 18afe6b..efac6f0 100644 --- a/include/asm-i386/mach-visws/mach_apic.h +++ b/include/asm-i386/mach-visws/mach_apic.h @@ -47,7 +47,7 @@ static inline void summit_check(char *oem, char *productid) { } -static inline void clustered_apic_check(void) +static inline void setup_apic_routing(void) { } diff --git a/include/asm-x86_64/apic.h b/include/asm-x86_64/apic.h index 7cfb39c..2f3b013 100644 --- a/include/asm-x86_64/apic.h +++ b/include/asm-x86_64/apic.h @@ -83,7 +83,7 @@ extern void setup_secondary_APIC_clock (void); extern int APIC_init_uniprocessor (void); extern void disable_APIC_timer(void); extern void enable_APIC_timer(void); -extern void clustered_apic_check(void); +extern void setup_apic_routing(void); extern void setup_APIC_extened_lvt(unsigned char lvt_off, unsigned char vector, unsigned char msg_type, unsigned char mask); -- cgit v1.1 From 00f1ea696702163b7411d2316264525996c66ed3 Mon Sep 17 00:00:00 2001 From: Jan Beulich Date: Wed, 2 May 2007 19:27:04 +0200 Subject: [PATCH] x86: adjust inclusion of asm/fixmap.h Move inclusion of asm/fixmap.h to where it is really used rather than where it may have been used long ago (requires a few other adjustments to includes due to previous implicit dependencies). Signed-off-by: Jan Beulich Signed-off-by: Andi Kleen --- include/asm-i386/hpet.h | 2 -- include/asm-i386/kexec.h | 5 ----- include/asm-i386/pgalloc.h | 1 - include/asm-i386/smp.h | 7 ++----- include/asm-x86_64/ipi.h | 4 +--- include/asm-x86_64/pgalloc.h | 1 - include/asm-x86_64/pgtable.h | 1 - include/asm-x86_64/smp.h | 3 +-- 8 files changed, 4 insertions(+), 20 deletions(-) (limited to 'include') diff --git a/include/asm-i386/hpet.h b/include/asm-i386/hpet.h index fc03cf9..dddeedf 100644 --- a/include/asm-i386/hpet.h +++ b/include/asm-i386/hpet.h @@ -28,8 +28,6 @@ #include -#include - /* * Documentation on HPET can be found at: * http://www.intel.com/ial/home/sp/pcmmspec.htm diff --git a/include/asm-i386/kexec.h b/include/asm-i386/kexec.h index 4dfc9f5..c5b4ab9 100644 --- a/include/asm-i386/kexec.h +++ b/include/asm-i386/kexec.h @@ -21,7 +21,6 @@ #ifndef __ASSEMBLY__ -#include #include #include @@ -29,10 +28,6 @@ * KEXEC_SOURCE_MEMORY_LIMIT maximum page get_free_page can return. * I.e. Maximum page that is mapped directly into kernel memory, * and kmap is not required. - * - * Someone correct me if FIXADDR_START - PAGEOFFSET is not the correct - * calculation for the amount of memory directly mappable into the - * kernel memory space. */ /* Maximum physical address we can use pages from */ diff --git a/include/asm-i386/pgalloc.h b/include/asm-i386/pgalloc.h index c8dc2d0..4743017 100644 --- a/include/asm-i386/pgalloc.h +++ b/include/asm-i386/pgalloc.h @@ -1,7 +1,6 @@ #ifndef _I386_PGALLOC_H #define _I386_PGALLOC_H -#include #include #include /* for struct page */ diff --git a/include/asm-i386/smp.h b/include/asm-i386/smp.h index 6bf0033..9cab153 100644 --- a/include/asm-i386/smp.h +++ b/include/asm-i386/smp.h @@ -11,16 +11,13 @@ #include #endif -#ifdef CONFIG_X86_LOCAL_APIC -#ifndef __ASSEMBLY__ -#include +#if defined(CONFIG_X86_LOCAL_APIC) && !defined(__ASSEMBLY__) #include #include +#include #ifdef CONFIG_X86_IO_APIC #include #endif -#include -#endif #endif #define BAD_APICID 0xFFu diff --git a/include/asm-x86_64/ipi.h b/include/asm-x86_64/ipi.h index 2a5c162..ffa6f15 100644 --- a/include/asm-x86_64/ipi.h +++ b/include/asm-x86_64/ipi.h @@ -18,10 +18,8 @@ * Subject to the GNU Public License, v.2 */ -#include #include -#include -#include +#include /* * the following functions deal with sending IPIs between CPUs. diff --git a/include/asm-x86_64/pgalloc.h b/include/asm-x86_64/pgalloc.h index 4e28b60..31d4971 100644 --- a/include/asm-x86_64/pgalloc.h +++ b/include/asm-x86_64/pgalloc.h @@ -1,7 +1,6 @@ #ifndef _X86_64_PGALLOC_H #define _X86_64_PGALLOC_H -#include #include #include #include diff --git a/include/asm-x86_64/pgtable.h b/include/asm-x86_64/pgtable.h index 730bd60..5957361 100644 --- a/include/asm-x86_64/pgtable.h +++ b/include/asm-x86_64/pgtable.h @@ -6,7 +6,6 @@ * the x86-64 page table tree. */ #include -#include #include #include #include diff --git a/include/asm-x86_64/smp.h b/include/asm-x86_64/smp.h index de592a4..f4236d7 100644 --- a/include/asm-x86_64/smp.h +++ b/include/asm-x86_64/smp.h @@ -10,10 +10,9 @@ #include extern int disable_apic; -#include #include -#include #include +#include #include #ifdef CONFIG_SMP -- cgit v1.1 From b0354795c9c8fef2fadf8f867586c78efd9a1dc9 Mon Sep 17 00:00:00 2001 From: Jan Beulich Date: Wed, 2 May 2007 19:27:04 +0200 Subject: [PATCH] x86-64: adjust inclusion of asm/vsyscall32.h Avoid including asm/vsyscall32.h in virtually every source file. Signed-off-by: Jan Beulich Signed-off-by: Andi Kleen --- include/asm-x86_64/fixmap.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include') diff --git a/include/asm-x86_64/fixmap.h b/include/asm-x86_64/fixmap.h index 1b620db..e90e167 100644 --- a/include/asm-x86_64/fixmap.h +++ b/include/asm-x86_64/fixmap.h @@ -15,7 +15,6 @@ #include #include #include -#include /* * Here we define all the compile-time 'special' virtual -- cgit v1.1 From 9964cf7d776600724ef5f1b33303ceadc588b8ba Mon Sep 17 00:00:00 2001 From: Jan Beulich Date: Wed, 2 May 2007 19:27:05 +0200 Subject: [PATCH] x86: consolidate smp_send_stop() Synchronize i386's smp_send_stop() with x86-64's in only try-locking the call lock to prevent deadlocks when called from panic(). In both version, disable interrupts before clearing the CPU off the online map to eliminate races with IRQ handlers inspecting this map. Also in both versions, save/restore interrupts rather than disabling/ enabling them. On x86-64, eliminate one function used here by folding it into its single caller, convert to static, and rename for consistency with i386 (lkcd may like this). Signed-off-by: Jan Beulich Signed-off-by: Andi Kleen --- include/asm-x86_64/smp.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include') diff --git a/include/asm-x86_64/smp.h b/include/asm-x86_64/smp.h index f4236d7..d570442 100644 --- a/include/asm-x86_64/smp.h +++ b/include/asm-x86_64/smp.h @@ -37,7 +37,6 @@ extern void lock_ipi_call_lock(void); extern void unlock_ipi_call_lock(void); extern int smp_num_siblings; extern void smp_send_reschedule(int cpu); -void smp_stop_cpu(void); extern cpumask_t cpu_sibling_map[NR_CPUS]; extern cpumask_t cpu_core_map[NR_CPUS]; -- cgit v1.1 From f76c392380a40008ee6ecaea4e5a51a3a10282c4 Mon Sep 17 00:00:00 2001 From: Jeremy Fitzhardinge Date: Wed, 2 May 2007 19:27:05 +0200 Subject: [PATCH] i386: No need to use -traditional for processing asm in i386/kernel/ No need to use -traditional for processing asm in i386/kernel/ Signed-off-by: Jeremy Fitzhardinge Signed-off-by: Andi Kleen --- include/asm-i386/percpu.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/asm-i386/percpu.h b/include/asm-i386/percpu.h index 510ae1d..a10e7c6 100644 --- a/include/asm-i386/percpu.h +++ b/include/asm-i386/percpu.h @@ -20,10 +20,10 @@ #ifdef CONFIG_SMP #define PER_CPU(var, cpu) \ movl __per_cpu_offset(,cpu,4), cpu; \ - addl $per_cpu__/**/var, cpu; + addl $per_cpu__##var, cpu; #else /* ! SMP */ #define PER_CPU(var, cpu) \ - movl $per_cpu__/**/var, cpu; + movl $per_cpu__##var, cpu; #endif /* SMP */ #endif /* !__ASSEMBLY__ */ -- cgit v1.1 From f5e8861583a591020176c90c10c6a130fed4f3ec Mon Sep 17 00:00:00 2001 From: takada Date: Wed, 2 May 2007 19:27:05 +0200 Subject: [PATCH] i386: pit_latch_buggy has no effect Eliminated the arch/i386/kernel/timers in 2.6.18, use clocksoures instead. pit_latch_buggy was referred in timers/timer_tsc.c, and currently removed. Therefore nobody refer it. Until 2.6.17, MediaGX's TSC works correctly. after 2.6.18, warned "TSC appears to be running slowly. Marking it as unstable". So marked unstable TSC when CS55x0. Cc: Andi Kleen Signed-off-by: Andrew Morton Signed-off-by: Andi Kleen --- include/asm-i386/timer.h | 2 -- 1 file changed, 2 deletions(-) (limited to 'include') diff --git a/include/asm-i386/timer.h b/include/asm-i386/timer.h index 12dd67b..153770e 100644 --- a/include/asm-i386/timer.h +++ b/include/asm-i386/timer.h @@ -9,8 +9,6 @@ void setup_pit_timer(void); unsigned long long native_sched_clock(void); unsigned long native_calculate_cpu_khz(void); -/* Modifiers for buggy PIT handling */ -extern int pit_latch_buggy; extern int timer_ack; extern int no_timer_check; extern int no_sync_cmos_clock; -- cgit v1.1 From 0949be35095b53dbaa72db700cb5074c5c249629 Mon Sep 17 00:00:00 2001 From: Simon Arlott Date: Wed, 2 May 2007 19:27:05 +0200 Subject: [PATCH] i386: Add an option for the VIA C7 which sets appropriate L1 cache The VIA C7 is a 686 (with TSC) that supports MMX, SSE and SSE2, it also has a cache line length of 64 according to http://www.digit-life.com/articles2/cpu/rmma-via-c7.html. This patch sets gcc to -march=686 and select s the correct cache shift. Signed-off-by: Simon Arlott Signed-off-by: Andi Kleen Cc: Andi Kleen Cc: Dave Jones Cc: Alan Cox Signed-off-by: Andrew Morton --- include/asm-i386/module.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include') diff --git a/include/asm-i386/module.h b/include/asm-i386/module.h index 02f8f54..7e5fda6 100644 --- a/include/asm-i386/module.h +++ b/include/asm-i386/module.h @@ -54,6 +54,8 @@ struct mod_arch_specific #define MODULE_PROC_FAMILY "CYRIXIII " #elif defined CONFIG_MVIAC3_2 #define MODULE_PROC_FAMILY "VIAC3-2 " +#elif defined CONFIG_MVIAC7 +#define MODULE_PROC_FAMILY "VIAC7 " #elif defined CONFIG_MGEODEGX1 #define MODULE_PROC_FAMILY "GEODEGX1 " #elif defined CONFIG_MGEODE_LX -- cgit v1.1 From 803d80f65038f77c4681a0d7708e9d693e68aaa8 Mon Sep 17 00:00:00 2001 From: Andi Kleen Date: Wed, 2 May 2007 19:27:05 +0200 Subject: [PATCH] x86-64: Some cleanup in time.c Move prototypes into header files Remove unneeded includes. Signed-off-by: Andi Kleen --- include/asm-x86_64/proto.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include') diff --git a/include/asm-x86_64/proto.h b/include/asm-x86_64/proto.h index 6688cf9..f64949f 100644 --- a/include/asm-x86_64/proto.h +++ b/include/asm-x86_64/proto.h @@ -122,6 +122,8 @@ extern void smp_local_timer_interrupt(void); long do_arch_prctl(struct task_struct *task, int code, unsigned long addr); +void i8254_timer_resume(void); + #define round_up(x,y) (((x) + (y) - 1) & ~((y)-1)) #define round_down(x,y) ((x) & ~((y)-1)) -- cgit v1.1 From 86c0baf123e474b6eb404798926ecf62b426bf3a Mon Sep 17 00:00:00 2001 From: Prarit Bhargava Date: Wed, 2 May 2007 19:27:05 +0200 Subject: [PATCH] i386: Change sysenter_setup to __cpuinit & improve __INIT, __INITDATA Change sysenter_setup to __cpuinit. Change __INIT & __INITDATA to be cpu hotplug aware. Resolve MODPOST warnings similar to: WARNING: vmlinux - Section mismatch: reference to .init.text:sysenter_setup from .text between 'identify_cpu' (at offset 0xc040a380) and 'detect_ht' and WARNING: vmlinux - Section mismatch: reference to .init.data:vsyscall_int80_end from .text between 'sysenter_setup' (at offset 0xc041a269) and 'enable_sep_cpu' WARNING: vmlinux - Section mismatch: reference to .init.data:vsyscall_int80_start from .text between 'sysenter_setup' (at offset 0xc041a26e) and 'enable_sep_cpu' WARNING: vmlinux - Section mismatch: reference to .init.data:vsyscall_sysenter_end from .text between 'sysenter_setup' (at offset 0xc041a275) and 'enable_sep_cpu' WARNING: vmlinux - Section mismatch: reference to .init.data:vsyscall_sysenter_start from .text between 'sysenter_setup' (at offset 0xc041a27a) and 'enable_sep_cpu' Signed-off-by: Prarit Bhargava Signed-off-by: Andi Kleen --- include/linux/init.h | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/init.h b/include/linux/init.h index e290a01..9abf120 100644 --- a/include/linux/init.h +++ b/include/linux/init.h @@ -52,9 +52,14 @@ #endif /* For assembly routines */ +#ifdef CONFIG_HOTPLUG_CPU +#define __INIT .section ".text","ax" +#define __INITDATA .section ".data","aw" +#else #define __INIT .section ".init.text","ax" -#define __FINIT .previous #define __INITDATA .section ".init.data","aw" +#endif +#define __FINIT .previous #ifndef __ASSEMBLY__ /* -- cgit v1.1 From 6d1c426158131b11d05d66e7dd6bf91e5b1b4fc7 Mon Sep 17 00:00:00 2001 From: "Aneesh Kumar K.V" Date: Wed, 2 May 2007 19:27:06 +0200 Subject: [PATCH] i386: Update __copy_to_user_inatomic linuxdoc description Explicity specify that the caller should pin the user memory otherwise the function will sleep Signed-off-by: Aneesh Kumar K.V Signed-off-by: Andi Kleen --- include/asm-i386/uaccess.h | 14 +++++++++++++- 1 file changed, 13 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/asm-i386/uaccess.h b/include/asm-i386/uaccess.h index 70829ae..e2aa5e0 100644 --- a/include/asm-i386/uaccess.h +++ b/include/asm-i386/uaccess.h @@ -397,7 +397,19 @@ unsigned long __must_check __copy_from_user_ll_nocache(void *to, unsigned long __must_check __copy_from_user_ll_nocache_nozero(void *to, const void __user *from, unsigned long n); -/* +/** + * __copy_to_user_inatomic: - Copy a block of data into user space, with less checking. + * @to: Destination address, in user space. + * @from: Source address, in kernel space. + * @n: Number of bytes to copy. + * + * Context: User context only. + * + * Copy data from kernel space to user space. Caller must check + * the specified block with access_ok() before calling this function. + * The caller should also make sure he pins the user space address + * so that the we don't result in page fault and sleep. + * * Here we special-case 1, 2 and 4-byte copy_*_user invocations. On a fault * we return the initial request size (1, 2 or 4), as copy_*_user should do. * If a store crosses a page boundary and gets a fault, the x86 will not write -- cgit v1.1 From 973efae21beb2feda138f152ed06d4204774d93c Mon Sep 17 00:00:00 2001 From: Jeremy Fitzhardinge Date: Wed, 2 May 2007 19:27:06 +0200 Subject: [PATCH] i386: clean up mach_reboot_fixups The reboot_fixups stuff seems to be a bit of a mess, specifically the header is in linux/ when its a purely i386-specific piece of code. I'm not sure why it has its config option; its only currently needed for "geode-gx1/cs5530a", so perhaps whatever config option controls that hardware should enable this? Signed-off-by: Jeremy Fitzhardinge Signed-off-by: Andi Kleen --- include/asm-i386/reboot_fixups.h | 6 ++++++ include/linux/reboot_fixups.h | 10 ---------- 2 files changed, 6 insertions(+), 10 deletions(-) create mode 100644 include/asm-i386/reboot_fixups.h delete mode 100644 include/linux/reboot_fixups.h (limited to 'include') diff --git a/include/asm-i386/reboot_fixups.h b/include/asm-i386/reboot_fixups.h new file mode 100644 index 0000000..0cb7d87 --- /dev/null +++ b/include/asm-i386/reboot_fixups.h @@ -0,0 +1,6 @@ +#ifndef _LINUX_REBOOT_FIXUPS_H +#define _LINUX_REBOOT_FIXUPS_H + +extern void mach_reboot_fixups(void); + +#endif /* _LINUX_REBOOT_FIXUPS_H */ diff --git a/include/linux/reboot_fixups.h b/include/linux/reboot_fixups.h deleted file mode 100644 index 480ea2d..0000000 --- a/include/linux/reboot_fixups.h +++ /dev/null @@ -1,10 +0,0 @@ -#ifndef _LINUX_REBOOT_FIXUPS_H -#define _LINUX_REBOOT_FIXUPS_H - -#ifdef CONFIG_X86_REBOOTFIXUPS -extern void mach_reboot_fixups(void); -#else -#define mach_reboot_fixups() ((void)(0)) -#endif - -#endif /* _LINUX_REBOOT_FIXUPS_H */ -- cgit v1.1 From f9d09645d6157fefa18ff75930737060c8092ddb Mon Sep 17 00:00:00 2001 From: Rusty Russell Date: Wed, 2 May 2007 19:27:06 +0200 Subject: [PATCH] x86-64: Remove unused set_seg_base The set_seg_base function isn't used anywhere (2.6.21-rc3-git1) Signed-off-by: Rusty Russell Signed-off-by: Andi Kleen --- include/asm-x86_64/desc.h | 10 ---------- 1 file changed, 10 deletions(-) (limited to 'include') diff --git a/include/asm-x86_64/desc.h b/include/asm-x86_64/desc.h index 913d6ac..7726e74 100644 --- a/include/asm-x86_64/desc.h +++ b/include/asm-x86_64/desc.h @@ -107,16 +107,6 @@ static inline void set_ldt_desc(unsigned cpu, void *addr, int size) DESC_LDT, size * 8 - 1); } -static inline void set_seg_base(unsigned cpu, int entry, void *base) -{ - struct desc_struct *d = &cpu_gdt(cpu)[entry]; - u32 addr = (u32)(u64)base; - BUG_ON((u64)base >> 32); - d->base0 = addr & 0xffff; - d->base1 = (addr >> 16) & 0xff; - d->base2 = (addr >> 24) & 0xff; -} - #define LDT_entry_a(info) \ ((((info)->base_addr & 0x0000ffff) << 16) | ((info)->limit & 0x0ffff)) /* Don't allow setting of the lm bit. It is useless anyways because -- cgit v1.1 From fbc16f2c2a0e16dbd75ac85d3b6db97f92b642ba Mon Sep 17 00:00:00 2001 From: Glauber de Oliveira Costa Date: Wed, 2 May 2007 19:27:06 +0200 Subject: [PATCH] x86-64: Remove duplicated code for reading control registers On Tue, Mar 13, 2007 at 05:33:09AM -0700, Randy.Dunlap wrote: > On Tue, 13 Mar 2007, Glauber de Oliveira Costa wrote: > > > Tiny cleanup: > > > > In x86_64, the same functions for reading cr3 and writing cr{3,4} are > > defined in tlbflush.h and system.h, whith just a name change. > > The only difference is the clobbering of memory, which seems a safe, and > > even needed change for the write_cr4. This patch removes the duplicate. > > write_cr3() is moved to system.h for consistency. > > missing patch..... > thanks. Attached now -- Glauber de Oliveira Costa Red Hat Inc. "Free as in Freedom" Signed-off-by: Andi Kleen --- include/asm-x86_64/system.h | 7 ++++++- include/asm-x86_64/tlbflush.h | 33 +++++---------------------------- 2 files changed, 11 insertions(+), 29 deletions(-) (limited to 'include') diff --git a/include/asm-x86_64/system.h b/include/asm-x86_64/system.h index bd376bc..213b7fe 100644 --- a/include/asm-x86_64/system.h +++ b/include/asm-x86_64/system.h @@ -89,6 +89,11 @@ static inline unsigned long read_cr3(void) return cr3; } +static inline void write_cr3(unsigned long val) +{ + asm volatile("movq %0,%%cr3" :: "r" (val) : "memory"); +} + static inline unsigned long read_cr4(void) { unsigned long cr4; @@ -98,7 +103,7 @@ static inline unsigned long read_cr4(void) static inline void write_cr4(unsigned long val) { - asm volatile("movq %0,%%cr4" :: "r" (val)); + asm volatile("movq %0,%%cr4" :: "r" (val) : "memory"); } #define stts() write_cr0(8 | read_cr0()) diff --git a/include/asm-x86_64/tlbflush.h b/include/asm-x86_64/tlbflush.h index 983bd29..512401b 100644 --- a/include/asm-x86_64/tlbflush.h +++ b/include/asm-x86_64/tlbflush.h @@ -3,41 +3,18 @@ #include #include - -static inline unsigned long get_cr3(void) -{ - unsigned long cr3; - asm volatile("mov %%cr3,%0" : "=r" (cr3)); - return cr3; -} - -static inline void set_cr3(unsigned long cr3) -{ - asm volatile("mov %0,%%cr3" :: "r" (cr3) : "memory"); -} +#include static inline void __flush_tlb(void) { - set_cr3(get_cr3()); -} - -static inline unsigned long get_cr4(void) -{ - unsigned long cr4; - asm volatile("mov %%cr4,%0" : "=r" (cr4)); - return cr4; -} - -static inline void set_cr4(unsigned long cr4) -{ - asm volatile("mov %0,%%cr4" :: "r" (cr4) : "memory"); + write_cr3(read_cr3()); } static inline void __flush_tlb_all(void) { - unsigned long cr4 = get_cr4(); - set_cr4(cr4 & ~X86_CR4_PGE); /* clear PGE */ - set_cr4(cr4); /* write old PGE again and flush TLBs */ + unsigned long cr4 = read_cr4(); + write_cr4(cr4 & ~X86_CR4_PGE); /* clear PGE */ + write_cr4(cr4); /* write old PGE again and flush TLBs */ } #define __flush_tlb_one(addr) \ -- cgit v1.1 From 6b37f5a20c0e5c334c010a587058354215433e92 Mon Sep 17 00:00:00 2001 From: Joerg Roedel Date: Wed, 2 May 2007 19:27:06 +0200 Subject: [PATCH] x86-64: fix cpu MHz reporting on constant_tsc cpus This patch fixes the reporting of cpu_mhz in /proc/cpuinfo on CPUs with a constant TSC rate and a kernel with disabled cpufreq. Signed-off-by: Mark Langsdorf Signed-off-by: Joerg Roedel Signed-off-by: Andi Kleen arch/x86_64/kernel/apic.c | 2 - arch/x86_64/kernel/time.c | 58 +++++++++++++++++++++++++++++++++++++++--- arch/x86_64/kernel/tsc.c | 12 +++++--- arch/x86_64/kernel/tsc_sync.c | 2 - include/asm-x86_64/proto.h | 1 5 files changed, 65 insertions(+), 10 deletions(-) --- include/asm-x86_64/proto.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/asm-x86_64/proto.h b/include/asm-x86_64/proto.h index f64949f..7842702 100644 --- a/include/asm-x86_64/proto.h +++ b/include/asm-x86_64/proto.h @@ -92,6 +92,7 @@ extern unsigned long table_start, table_end; extern int exception_trace; extern unsigned cpu_khz; +extern unsigned tsc_khz; extern void no_iommu_init(void); extern int force_iommu, no_iommu; -- cgit v1.1 From e65845045588806fa5c8df8a4f4253516515a5e3 Mon Sep 17 00:00:00 2001 From: Stephen Hemminger Date: Wed, 2 May 2007 19:27:06 +0200 Subject: [PATCH] x86-64: dma_ops as const The dma_ops structure can be const since it never changes after boot. Signed-off-by: Stephen Hemminger Signed-off-by: Andi Kleen --- include/asm-x86_64/dma-mapping.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/asm-x86_64/dma-mapping.h b/include/asm-x86_64/dma-mapping.h index d2af227..6897e2a 100644 --- a/include/asm-x86_64/dma-mapping.h +++ b/include/asm-x86_64/dma-mapping.h @@ -52,7 +52,7 @@ struct dma_mapping_ops { }; extern dma_addr_t bad_dma_address; -extern struct dma_mapping_ops* dma_ops; +extern const struct dma_mapping_ops* dma_ops; extern int iommu_merge; static inline int dma_mapping_error(dma_addr_t dma_addr) -- cgit v1.1 From 9d291e787b2b71d1b57e5fbb24ba9c70e748ed84 Mon Sep 17 00:00:00 2001 From: Vivek Goyal Date: Wed, 2 May 2007 19:27:06 +0200 Subject: [PATCH] x86-64: Assembly safe page.h and pgtable.h This patch makes pgtable.h and page.h safe to include in assembly files like head.S. Allowing us to use symbolic constants instead of hard coded numbers when refering to the page tables. This patch copies asm-sparc64/const.h to asm-x86_64 to get a definition of _AC() a very convinient macro that allows us to force the type when we are compiling the code in C and to drop all of the type information when we are using the constant in assembly. Previously this was done with multiple definition of the same constant. const.h was modified slightly so that it works when given CONFIG options as arguments. This patch adds #ifndef __ASSEMBLY__ ... #endif and _AC(1,UL) where appropriate so the assembler won't choke on the header files. Otherwise nothing should have changed. AK: added const.h to exported headers to fix headers_check Signed-off-by: Eric W. Biederman Signed-off-by: Vivek Goyal Signed-off-by: Andi Kleen --- include/asm-x86_64/Kbuild | 1 + include/asm-x86_64/const.h | 20 ++++++++++++++++++++ include/asm-x86_64/page.h | 28 ++++++++++------------------ include/asm-x86_64/pgtable.h | 33 +++++++++++++++++++++------------ 4 files changed, 52 insertions(+), 30 deletions(-) create mode 100644 include/asm-x86_64/const.h (limited to 'include') diff --git a/include/asm-x86_64/Kbuild b/include/asm-x86_64/Kbuild index ebd7117..242296e 100644 --- a/include/asm-x86_64/Kbuild +++ b/include/asm-x86_64/Kbuild @@ -18,3 +18,4 @@ header-y += vsyscall32.h unifdef-y += mce.h unifdef-y += mtrr.h unifdef-y += vsyscall.h +unifdef-y += const.h diff --git a/include/asm-x86_64/const.h b/include/asm-x86_64/const.h new file mode 100644 index 0000000..54fb08f --- /dev/null +++ b/include/asm-x86_64/const.h @@ -0,0 +1,20 @@ +/* const.h: Macros for dealing with constants. */ + +#ifndef _X86_64_CONST_H +#define _X86_64_CONST_H + +/* Some constant macros are used in both assembler and + * C code. Therefore we cannot annotate them always with + * 'UL' and other type specificers unilaterally. We + * use the following macros to deal with this. + */ + +#ifdef __ASSEMBLY__ +#define _AC(X,Y) X +#else +#define __AC(X,Y) (X##Y) +#define _AC(X,Y) __AC(X,Y) +#endif + + +#endif /* !(_X86_64_CONST_H) */ diff --git a/include/asm-x86_64/page.h b/include/asm-x86_64/page.h index 10f3461..d554b94 100644 --- a/include/asm-x86_64/page.h +++ b/include/asm-x86_64/page.h @@ -1,14 +1,11 @@ #ifndef _X86_64_PAGE_H #define _X86_64_PAGE_H +#include /* PAGE_SHIFT determines the page size */ #define PAGE_SHIFT 12 -#ifdef __ASSEMBLY__ -#define PAGE_SIZE (0x1 << PAGE_SHIFT) -#else -#define PAGE_SIZE (1UL << PAGE_SHIFT) -#endif +#define PAGE_SIZE (_AC(1,UL) << PAGE_SHIFT) #define PAGE_MASK (~(PAGE_SIZE-1)) #define PHYSICAL_PAGE_MASK (~(PAGE_SIZE-1) & __PHYSICAL_MASK) @@ -33,10 +30,10 @@ #define N_EXCEPTION_STACKS 5 /* hw limit: 7 */ #define LARGE_PAGE_MASK (~(LARGE_PAGE_SIZE-1)) -#define LARGE_PAGE_SIZE (1UL << PMD_SHIFT) +#define LARGE_PAGE_SIZE (_AC(1,UL) << PMD_SHIFT) #define HPAGE_SHIFT PMD_SHIFT -#define HPAGE_SIZE ((1UL) << HPAGE_SHIFT) +#define HPAGE_SIZE (_AC(1,UL) << HPAGE_SHIFT) #define HPAGE_MASK (~(HPAGE_SIZE - 1)) #define HUGETLB_PAGE_ORDER (HPAGE_SHIFT - PAGE_SHIFT) @@ -76,29 +73,24 @@ typedef struct { unsigned long pgprot; } pgprot_t; #define __pgd(x) ((pgd_t) { (x) } ) #define __pgprot(x) ((pgprot_t) { (x) } ) -#define __PHYSICAL_START ((unsigned long)CONFIG_PHYSICAL_START) -#define __START_KERNEL (__START_KERNEL_map + __PHYSICAL_START) -#define __START_KERNEL_map 0xffffffff80000000UL -#define __PAGE_OFFSET 0xffff810000000000UL +#endif /* !__ASSEMBLY__ */ -#else #define __PHYSICAL_START CONFIG_PHYSICAL_START #define __START_KERNEL (__START_KERNEL_map + __PHYSICAL_START) #define __START_KERNEL_map 0xffffffff80000000 #define __PAGE_OFFSET 0xffff810000000000 -#endif /* !__ASSEMBLY__ */ /* to align the pointer to the (next) page boundary */ #define PAGE_ALIGN(addr) (((addr)+PAGE_SIZE-1)&PAGE_MASK) /* See Documentation/x86_64/mm.txt for a description of the memory map. */ #define __PHYSICAL_MASK_SHIFT 46 -#define __PHYSICAL_MASK ((1UL << __PHYSICAL_MASK_SHIFT) - 1) +#define __PHYSICAL_MASK ((_AC(1,UL) << __PHYSICAL_MASK_SHIFT) - 1) #define __VIRTUAL_MASK_SHIFT 48 -#define __VIRTUAL_MASK ((1UL << __VIRTUAL_MASK_SHIFT) - 1) +#define __VIRTUAL_MASK ((_AC(1,UL) << __VIRTUAL_MASK_SHIFT) - 1) -#define KERNEL_TEXT_SIZE (40UL*1024*1024) -#define KERNEL_TEXT_START 0xffffffff80000000UL +#define KERNEL_TEXT_SIZE (40*1024*1024) +#define KERNEL_TEXT_START 0xffffffff80000000 #ifndef __ASSEMBLY__ @@ -106,7 +98,7 @@ typedef struct { unsigned long pgprot; } pgprot_t; #endif /* __ASSEMBLY__ */ -#define PAGE_OFFSET ((unsigned long)__PAGE_OFFSET) +#define PAGE_OFFSET __PAGE_OFFSET /* Note: __pa(&symbol_visible_to_c) should be always replaced with __pa_symbol. Otherwise you risk miscompilation. */ diff --git a/include/asm-x86_64/pgtable.h b/include/asm-x86_64/pgtable.h index 5957361..c514deb 100644 --- a/include/asm-x86_64/pgtable.h +++ b/include/asm-x86_64/pgtable.h @@ -1,6 +1,9 @@ #ifndef _X86_64_PGTABLE_H #define _X86_64_PGTABLE_H +#include +#ifndef __ASSEMBLY__ + /* * This file contains the functions and defines necessary to modify and use * the x86-64 page table tree. @@ -30,6 +33,8 @@ extern void clear_kernel_mapping(unsigned long addr, unsigned long size); extern unsigned long empty_zero_page[PAGE_SIZE/sizeof(unsigned long)]; #define ZERO_PAGE(vaddr) (virt_to_page(empty_zero_page)) +#endif /* !__ASSEMBLY__ */ + /* * PGDIR_SHIFT determines what a top-level page table entry can map */ @@ -54,6 +59,8 @@ extern unsigned long empty_zero_page[PAGE_SIZE/sizeof(unsigned long)]; */ #define PTRS_PER_PTE 512 +#ifndef __ASSEMBLY__ + #define pte_ERROR(e) \ printk("%s:%d: bad pte %p(%016lx).\n", __FILE__, __LINE__, &(e), pte_val(e)) #define pmd_ERROR(e) \ @@ -117,22 +124,23 @@ static inline pte_t ptep_get_and_clear_full(struct mm_struct *mm, unsigned long #define pte_pgprot(a) (__pgprot((a).pte & ~PHYSICAL_PAGE_MASK)) -#define PMD_SIZE (1UL << PMD_SHIFT) +#endif /* !__ASSEMBLY__ */ + +#define PMD_SIZE (_AC(1,UL) << PMD_SHIFT) #define PMD_MASK (~(PMD_SIZE-1)) -#define PUD_SIZE (1UL << PUD_SHIFT) +#define PUD_SIZE (_AC(1,UL) << PUD_SHIFT) #define PUD_MASK (~(PUD_SIZE-1)) -#define PGDIR_SIZE (1UL << PGDIR_SHIFT) +#define PGDIR_SIZE (_AC(1,UL) << PGDIR_SHIFT) #define PGDIR_MASK (~(PGDIR_SIZE-1)) #define USER_PTRS_PER_PGD ((TASK_SIZE-1)/PGDIR_SIZE+1) #define FIRST_USER_ADDRESS 0 -#ifndef __ASSEMBLY__ -#define MAXMEM 0x3fffffffffffUL -#define VMALLOC_START 0xffffc20000000000UL -#define VMALLOC_END 0xffffe1ffffffffffUL -#define MODULES_VADDR 0xffffffff88000000UL -#define MODULES_END 0xfffffffffff00000UL +#define MAXMEM 0x3fffffffffff +#define VMALLOC_START 0xffffc20000000000 +#define VMALLOC_END 0xffffe1ffffffffff +#define MODULES_VADDR 0xffffffff88000000 +#define MODULES_END 0xfffffffffff00000 #define MODULES_LEN (MODULES_END - MODULES_VADDR) #define _PAGE_BIT_PRESENT 0 @@ -158,7 +166,7 @@ static inline pte_t ptep_get_and_clear_full(struct mm_struct *mm, unsigned long #define _PAGE_GLOBAL 0x100 /* Global TLB entry */ #define _PAGE_PROTNONE 0x080 /* If not present */ -#define _PAGE_NX (1UL<<_PAGE_BIT_NX) +#define _PAGE_NX (_AC(1,UL)<<_PAGE_BIT_NX) #define _PAGE_TABLE (_PAGE_PRESENT | _PAGE_RW | _PAGE_USER | _PAGE_ACCESSED | _PAGE_DIRTY) #define _KERNPG_TABLE (_PAGE_PRESENT | _PAGE_RW | _PAGE_ACCESSED | _PAGE_DIRTY) @@ -220,6 +228,8 @@ static inline pte_t ptep_get_and_clear_full(struct mm_struct *mm, unsigned long #define __S110 PAGE_SHARED_EXEC #define __S111 PAGE_SHARED_EXEC +#ifndef __ASSEMBLY__ + static inline unsigned long pgd_bad(pgd_t pgd) { return pgd_val(pgd) & ~(PTE_MASK | _KERNPG_TABLE | _PAGE_USER); @@ -405,8 +415,6 @@ extern spinlock_t pgd_lock; extern struct page *pgd_list; void vmalloc_sync_all(void); -#endif /* !__ASSEMBLY__ */ - extern int kern_addr_valid(unsigned long addr); #define io_remap_pfn_range(vma, vaddr, pfn, size, prot) \ @@ -436,5 +444,6 @@ extern int kern_addr_valid(unsigned long addr); #define __HAVE_ARCH_PTEP_SET_WRPROTECT #define __HAVE_ARCH_PTE_SAME #include +#endif /* !__ASSEMBLY__ */ #endif /* _X86_64_PGTABLE_H */ -- cgit v1.1 From 67dcbb6bc6537aea92a2466bfc75f015b00e465e Mon Sep 17 00:00:00 2001 From: Vivek Goyal Date: Wed, 2 May 2007 19:27:06 +0200 Subject: [PATCH] x86-64: Clean up the early boot page table - Merge physmem_pgt and ident_pgt, removing physmem_pgt. The merge is broken as soon as mm/init.c:init_memory_mapping is run. - As physmem_pgt is gone don't export it in pgtable.h. - Use defines from pgtable.h for page permissions. - Fix the physical memory identity mapping so it is at the correct address. - Remove the physical memory mapping from wakeup_level4_pgt it is at the wrong address so we can't possibly be usinging it. - Simply NEXT_PAGE the work to calculate the phys_ alias of the labels was very cool. Unfortuantely it was a brittle special purpose hack that makes maitenance more difficult. Instead just use label - __START_KERNEL_map like we do everywhere else in assembly. Signed-off-by: Eric W. Biederman Signed-off-by: Vivek Goyal Signed-off-by: Andi Kleen --- include/asm-x86_64/pgtable.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include') diff --git a/include/asm-x86_64/pgtable.h b/include/asm-x86_64/pgtable.h index c514deb..5a5d43b 100644 --- a/include/asm-x86_64/pgtable.h +++ b/include/asm-x86_64/pgtable.h @@ -14,7 +14,6 @@ #include extern pud_t level3_kernel_pgt[512]; -extern pud_t level3_physmem_pgt[512]; extern pud_t level3_ident_pgt[512]; extern pmd_t level2_kernel_pgt[512]; extern pgd_t init_level4_pgt[]; -- cgit v1.1 From 30f472895401fbe8e64f861a2569bc9acb098741 Mon Sep 17 00:00:00 2001 From: Vivek Goyal Date: Wed, 2 May 2007 19:27:07 +0200 Subject: [PATCH] x86-64: cleanup segments Move __KERNEL32_CS up into the unused gdt entry. __KERNEL32_CS is used when entering the kernel so putting it first is useful when trying to keep boot gdt sizes to a minimum. Set the accessed bit on all gdt entries. We don't care so there is no need for the cpu to burn the extra cycles, and it potentially allows the pages to be immutable. Plus it is confusing when debugging and your gdt entries mysteriously change. Signed-off-by: Eric W. Biederman Signed-off-by: Vivek Goyal Signed-off-by: Andi Kleen --- include/asm-x86_64/segment.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/asm-x86_64/segment.h b/include/asm-x86_64/segment.h index 334ddcd..adf2bf1 100644 --- a/include/asm-x86_64/segment.h +++ b/include/asm-x86_64/segment.h @@ -6,7 +6,7 @@ #define __KERNEL_CS 0x10 #define __KERNEL_DS 0x18 -#define __KERNEL32_CS 0x38 +#define __KERNEL32_CS 0x08 /* * we cannot use the same code segment descriptor for user and kernel -- cgit v1.1 From 3c321bceb4a626639ab43a5a24d884930e511826 Mon Sep 17 00:00:00 2001 From: Vivek Goyal Date: Wed, 2 May 2007 19:27:07 +0200 Subject: [PATCH] x86-64: Add EFER to the register set saved by save_processor_state EFER varies like %cr4 depending on the cpu capabilities, and which cpu capabilities we want to make use of. So save/restore it make certain we have the same EFER value when we are done. Signed-off-by: Eric W. Biederman Signed-off-by: Vivek Goyal Signed-off-by: Andi Kleen --- include/asm-x86_64/suspend.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/asm-x86_64/suspend.h b/include/asm-x86_64/suspend.h index bc7f817..a42306c 100644 --- a/include/asm-x86_64/suspend.h +++ b/include/asm-x86_64/suspend.h @@ -17,6 +17,7 @@ struct saved_context { u16 ds, es, fs, gs, ss; unsigned long gs_base, gs_kernel_base, fs_base; unsigned long cr0, cr2, cr3, cr4, cr8; + unsigned long efer; u16 gdt_pad; u16 gdt_limit; unsigned long gdt_base; -- cgit v1.1 From 7db681d7e4038ad205b5face5cf7f7815633e1b5 Mon Sep 17 00:00:00 2001 From: Vivek Goyal Date: Wed, 2 May 2007 19:27:07 +0200 Subject: [PATCH] x86-64: wakeup.S rename registers to reflect right names o Use appropriate names for 64bit regsiters. Signed-off-by: Eric W. Biederman Signed-off-by: Vivek Goyal Signed-off-by: Andi Kleen --- include/asm-x86_64/suspend.h | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) (limited to 'include') diff --git a/include/asm-x86_64/suspend.h b/include/asm-x86_64/suspend.h index a42306c..9c3f8de 100644 --- a/include/asm-x86_64/suspend.h +++ b/include/asm-x86_64/suspend.h @@ -45,12 +45,12 @@ extern unsigned long saved_context_eflags; extern void fix_processor_context(void); #ifdef CONFIG_ACPI_SLEEP -extern unsigned long saved_eip; -extern unsigned long saved_esp; -extern unsigned long saved_ebp; -extern unsigned long saved_ebx; -extern unsigned long saved_esi; -extern unsigned long saved_edi; +extern unsigned long saved_rip; +extern unsigned long saved_rsp; +extern unsigned long saved_rbp; +extern unsigned long saved_rbx; +extern unsigned long saved_rsi; +extern unsigned long saved_rdi; /* routines for saving/restoring kernel state */ extern int acpi_save_state_mem(void); -- cgit v1.1 From cfd243d4af7c7f8f52f5cb99d3932d9074b039ff Mon Sep 17 00:00:00 2001 From: Vivek Goyal Date: Wed, 2 May 2007 19:27:07 +0200 Subject: [PATCH] x86-64: Remove the identity mapping as early as possible With the rewrite of the SMP trampoline and the early page allocator there is nothing that needs identity mapped pages, once we start executing C code. So add zap_identity_mappings into head64.c and remove zap_low_mappings() from much later in the code. The functions are subtly different thus the name change. This also kills boot_level4_pgt which was from an earlier attempt to move the identity mappings as early as possible, and is now no longer needed. Essentially I have replaced boot_level4_pgt with trampoline_level4_pgt in trampoline.S Signed-off-by: Eric W. Biederman Signed-off-by: Vivek Goyal Signed-off-by: Andi Kleen --- include/asm-x86_64/pgtable.h | 1 - include/asm-x86_64/proto.h | 2 -- 2 files changed, 3 deletions(-) (limited to 'include') diff --git a/include/asm-x86_64/pgtable.h b/include/asm-x86_64/pgtable.h index 5a5d43b..703f024 100644 --- a/include/asm-x86_64/pgtable.h +++ b/include/asm-x86_64/pgtable.h @@ -17,7 +17,6 @@ extern pud_t level3_kernel_pgt[512]; extern pud_t level3_ident_pgt[512]; extern pmd_t level2_kernel_pgt[512]; extern pgd_t init_level4_pgt[]; -extern pgd_t boot_level4_pgt[]; extern unsigned long __supported_pte_mask; #define swapper_pg_dir init_level4_pgt diff --git a/include/asm-x86_64/proto.h b/include/asm-x86_64/proto.h index 7842702..3f8f285 100644 --- a/include/asm-x86_64/proto.h +++ b/include/asm-x86_64/proto.h @@ -11,8 +11,6 @@ struct pt_regs; extern void start_kernel(void); extern void pda_init(int); -extern void zap_low_mappings(int cpu); - extern void early_idt_handler(void); extern void mcheck_init(struct cpuinfo_x86 *c); -- cgit v1.1 From 0dbf7028c0c1f266c9631139450a1502d3cd457e Mon Sep 17 00:00:00 2001 From: Vivek Goyal Date: Wed, 2 May 2007 19:27:07 +0200 Subject: [PATCH] x86: __pa and __pa_symbol address space separation Currently __pa_symbol is for use with symbols in the kernel address map and __pa is for use with pointers into the physical memory map. But the code is implemented so you can usually interchange the two. __pa which is much more common can be implemented much more cheaply if it is it doesn't have to worry about any other kernel address spaces. This is especially true with a relocatable kernel as __pa_symbol needs to peform an extra variable read to resolve the address. There is a third macro that is added for the vsyscall data __pa_vsymbol for finding the physical addesses of vsyscall pages. Most of this patch is simply sorting through the references to __pa or __pa_symbol and using the proper one. A little of it is continuing to use a physical address when we have it instead of recalculating it several times. swapper_pgd is now NULL. leave_mm now uses init_mm.pgd and init_mm.pgd is initialized at boot (instead of compile time) to the physmem virtual mapping of init_level4_pgd. The physical address changed. Except for the for EMPTY_ZERO page all of the remaining references to __pa_symbol appear to be during kernel initialization. So this should reduce the cost of __pa in the common case, even on a relocated kernel. As this is technically a semantic change we need to be on the lookout for anything I missed. But it works for me (tm). Signed-off-by: Eric W. Biederman Signed-off-by: Vivek Goyal Signed-off-by: Andi Kleen --- include/asm-x86_64/page.h | 6 ++---- include/asm-x86_64/pgtable.h | 4 ++-- 2 files changed, 4 insertions(+), 6 deletions(-) (limited to 'include') diff --git a/include/asm-x86_64/page.h b/include/asm-x86_64/page.h index d554b94..4974433 100644 --- a/include/asm-x86_64/page.h +++ b/include/asm-x86_64/page.h @@ -102,17 +102,15 @@ typedef struct { unsigned long pgprot; } pgprot_t; /* Note: __pa(&symbol_visible_to_c) should be always replaced with __pa_symbol. Otherwise you risk miscompilation. */ -#define __pa(x) (((unsigned long)(x)>=__START_KERNEL_map)?(unsigned long)(x) - (unsigned long)__START_KERNEL_map:(unsigned long)(x) - PAGE_OFFSET) +#define __pa(x) ((unsigned long)(x) - PAGE_OFFSET) /* __pa_symbol should be used for C visible symbols. This seems to be the official gcc blessed way to do such arithmetic. */ #define __pa_symbol(x) \ ({unsigned long v; \ asm("" : "=r" (v) : "0" (x)); \ - __pa(v); }) + (v - __START_KERNEL_map); }) #define __va(x) ((void *)((unsigned long)(x)+PAGE_OFFSET)) -#define __boot_va(x) __va(x) -#define __boot_pa(x) __pa(x) #ifdef CONFIG_FLATMEM #define pfn_valid(pfn) ((pfn) < end_pfn) #endif diff --git a/include/asm-x86_64/pgtable.h b/include/asm-x86_64/pgtable.h index 703f024..c1865e3 100644 --- a/include/asm-x86_64/pgtable.h +++ b/include/asm-x86_64/pgtable.h @@ -19,7 +19,7 @@ extern pmd_t level2_kernel_pgt[512]; extern pgd_t init_level4_pgt[]; extern unsigned long __supported_pte_mask; -#define swapper_pg_dir init_level4_pgt +#define swapper_pg_dir ((pgd_t *)NULL) extern void paging_init(void); extern void clear_kernel_mapping(unsigned long addr, unsigned long size); @@ -29,7 +29,7 @@ extern void clear_kernel_mapping(unsigned long addr, unsigned long size); * for zero-mapped memory areas etc.. */ extern unsigned long empty_zero_page[PAGE_SIZE/sizeof(unsigned long)]; -#define ZERO_PAGE(vaddr) (virt_to_page(empty_zero_page)) +#define ZERO_PAGE(vaddr) (pfn_to_page(__pa_symbol(&empty_zero_page) >> PAGE_SHIFT)) #endif /* !__ASSEMBLY__ */ -- cgit v1.1 From 1ab60e0f72f71ec54831e525a3e1154f1c092408 Mon Sep 17 00:00:00 2001 From: Vivek Goyal Date: Wed, 2 May 2007 19:27:07 +0200 Subject: [PATCH] x86-64: Relocatable Kernel Support This patch modifies the x86_64 kernel so that it can be loaded and run at any 2M aligned address, below 512G. The technique used is to compile the decompressor with -fPIC and modify it so the decompressor is fully relocatable. For the main kernel the page tables are modified so the kernel remains at the same virtual address. In addition a variable phys_base is kept that holds the physical address the kernel is loaded at. __pa_symbol is modified to add that when we take the address of a kernel symbol. When loaded with a normal bootloader the decompressor will decompress the kernel to 2M and it will run there. This both ensures the relocation code is always working, and makes it easier to use 2M pages for the kernel and the cpu. AK: changed to not make RELOCATABLE default in Kconfig Signed-off-by: Eric W. Biederman Signed-off-by: Vivek Goyal Signed-off-by: Andi Kleen --- include/asm-x86_64/page.h | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/asm-x86_64/page.h b/include/asm-x86_64/page.h index 4974433..40a24d0 100644 --- a/include/asm-x86_64/page.h +++ b/include/asm-x86_64/page.h @@ -61,6 +61,8 @@ typedef struct { unsigned long pgd; } pgd_t; typedef struct { unsigned long pgprot; } pgprot_t; +extern unsigned long phys_base; + #define pte_val(x) ((x).pte) #define pmd_val(x) ((x).pmd) #define pud_val(x) ((x).pud) @@ -101,14 +103,14 @@ typedef struct { unsigned long pgprot; } pgprot_t; #define PAGE_OFFSET __PAGE_OFFSET /* Note: __pa(&symbol_visible_to_c) should be always replaced with __pa_symbol. - Otherwise you risk miscompilation. */ + Otherwise you risk miscompilation. */ #define __pa(x) ((unsigned long)(x) - PAGE_OFFSET) /* __pa_symbol should be used for C visible symbols. This seems to be the official gcc blessed way to do such arithmetic. */ #define __pa_symbol(x) \ ({unsigned long v; \ asm("" : "=r" (v) : "0" (x)); \ - (v - __START_KERNEL_map); }) + ((v - __START_KERNEL_map) + phys_base); }) #define __va(x) ((void *)((unsigned long)(x)+PAGE_OFFSET)) #ifdef CONFIG_FLATMEM -- cgit v1.1 From 6a50a664ca0cfd2a487525f10cec3ff4d570b5e8 Mon Sep 17 00:00:00 2001 From: Vivek Goyal Date: Wed, 2 May 2007 19:27:08 +0200 Subject: [PATCH] x86-64: build-time checking o X86_64 kernel should run from 2MB aligned address for two reasons. - Performance. - For relocatable kernels, page tables are updated based on difference between compile time address and load time physical address. This difference should be multiple of 2MB as kernel text and data is mapped using 2MB pages and PMD should be pointing to a 2MB aligned address. Life is simpler if both compile time and load time kernel addresses are 2MB aligned. o Flag the error at compile time if one is trying to build a kernel which does not meet alignment restrictions. Signed-off-by: Vivek Goyal Signed-off-by: Andi Kleen Cc: "Eric W. Biederman" Cc: Andi Kleen Signed-off-by: Andrew Morton --- include/asm-x86_64/page.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/asm-x86_64/page.h b/include/asm-x86_64/page.h index 40a24d0..b17fc16 100644 --- a/include/asm-x86_64/page.h +++ b/include/asm-x86_64/page.h @@ -78,6 +78,7 @@ extern unsigned long phys_base; #endif /* !__ASSEMBLY__ */ #define __PHYSICAL_START CONFIG_PHYSICAL_START +#define __KERNEL_ALIGN 0x200000 #define __START_KERNEL (__START_KERNEL_map + __PHYSICAL_START) #define __START_KERNEL_map 0xffffffff80000000 #define __PAGE_OFFSET 0xffff810000000000 -- cgit v1.1 From 184c44d2049c4db7ef6ec65794546954da2c6a0e Mon Sep 17 00:00:00 2001 From: Andrew Morton Date: Wed, 2 May 2007 19:27:08 +0200 Subject: [PATCH] x86-64: fix x86_64-mm-sched-clock-share Fix for the following patch. Provide dummy cpufreq functions when CPUFREQ is not compiled in. Cc: Andi Kleen Cc: Dave Jones Signed-off-by: Andrew Morton Signed-off-by: Andi Kleen --- include/linux/cpufreq.h | 19 ++++++++++++++++--- 1 file changed, 16 insertions(+), 3 deletions(-) (limited to 'include') diff --git a/include/linux/cpufreq.h b/include/linux/cpufreq.h index 0899e2c..cb9b2ec 100644 --- a/include/linux/cpufreq.h +++ b/include/linux/cpufreq.h @@ -32,7 +32,15 @@ * CPUFREQ NOTIFIER INTERFACE * *********************************************************************/ +#ifdef CONFIG_CPU_FREQ int cpufreq_register_notifier(struct notifier_block *nb, unsigned int list); +#else +static inline int cpufreq_register_notifier(struct notifier_block *nb, + unsigned int list) +{ + return 0; +} +#endif int cpufreq_unregister_notifier(struct notifier_block *nb, unsigned int list); #define CPUFREQ_TRANSITION_NOTIFIER (0) @@ -261,17 +269,22 @@ int cpufreq_set_policy(struct cpufreq_policy *policy); int cpufreq_get_policy(struct cpufreq_policy *policy, unsigned int cpu); int cpufreq_update_policy(unsigned int cpu); -/* query the current CPU frequency (in kHz). If zero, cpufreq couldn't detect it */ -unsigned int cpufreq_get(unsigned int cpu); -/* query the last known CPU freq (in kHz). If zero, cpufreq couldn't detect it */ +/* + * query the last known CPU freq (in kHz). If zero, cpufreq couldn't detect it + */ #ifdef CONFIG_CPU_FREQ unsigned int cpufreq_quick_get(unsigned int cpu); +unsigned int cpufreq_get(unsigned int cpu); #else static inline unsigned int cpufreq_quick_get(unsigned int cpu) { return 0; } +static inline unsigned int cpufreq_get(unsigned int cpu) +{ + return 0; +} #endif -- cgit v1.1 From e073ae1b34d5600ffc550407625dcb2d4cf46c6e Mon Sep 17 00:00:00 2001 From: Ravikiran G Thirumalai Date: Wed, 2 May 2007 19:27:08 +0200 Subject: [PATCH] x86-64: Set HASHDIST_DEFAULT to 1 for x86_64 NUMA Enable system hashtable memory to be distributed among nodes on x86_64 NUMA Forcing the kernel to use node interleaved vmalloc instead of bootmem for the system hashtable memory (alloc_large_system_hash) reduces the memory imbalance on node 0 by around 40MB on a 8 node x86_64 NUMA box: Before the following patch, on bootup of a 8 node box: Node 0 MemTotal: 3407488 kB Node 0 MemFree: 3206296 kB Node 0 MemUsed: 201192 kB Node 0 Active: 7012 kB Node 0 Inactive: 512 kB Node 0 Dirty: 0 kB Node 0 Writeback: 0 kB Node 0 FilePages: 1912 kB Node 0 Mapped: 420 kB Node 0 AnonPages: 5612 kB Node 0 PageTables: 468 kB Node 0 NFS_Unstable: 0 kB Node 0 Bounce: 0 kB Node 0 Slab: 5408 kB Node 0 SReclaimable: 644 kB Node 0 SUnreclaim: 4764 kB After the patch (or using hashdist=1 on the kernel command line): Node 0 MemTotal: 3407488 kB Node 0 MemFree: 3247608 kB Node 0 MemUsed: 159880 kB Node 0 Active: 3012 kB Node 0 Inactive: 616 kB Node 0 Dirty: 0 kB Node 0 Writeback: 0 kB Node 0 FilePages: 2424 kB Node 0 Mapped: 380 kB Node 0 AnonPages: 1200 kB Node 0 PageTables: 396 kB Node 0 NFS_Unstable: 0 kB Node 0 Bounce: 0 kB Node 0 Slab: 6304 kB Node 0 SReclaimable: 1596 kB Node 0 SUnreclaim: 4708 kB I guess it is a good idea to keep HASHDIST_DEFAULT "on" for x86_64 NUMA since x86_64 has no dearth of vmalloc space? Or maybe enable hash distribution for all 64bit NUMA arches? The following patch does it only for x86_64. I ran a HPC MPI benchmark -- 'Ansys wingsolid', which takes up quite a bit of memory and uses up tlb entries. This was on a 4 way, 2 socket Tyan AMD box (non vsmp), with 8G total memory (4G pernode). The results with and without hash distribution are: 1. Vanilla - runtime of 1188.000s 2. With hashdist=1 runtime of 1154.000s Oprofile output for the duration of run is: 1. Vanilla: PU: AMD64 processors, speed 2411.16 MHz (estimated) Counted L1_AND_L2_DTLB_MISSES events (L1 and L2 DTLB misses) with a unit mask of 0x00 (No unit mask) count 500 samples % app name symbol name 163054 6.5513 libansys1.so MultiFront::decompose(int, int, Elemset *, int *, int, int, int) 162061 6.5114 libansys3.so blockSaxpy6L_fd 162042 6.5107 libansys3.so blockInnerProduct6L_fd 156286 6.2794 libansys3.so maxb33_ 87879 3.5309 libansys1.so elmatrixmultpcg_ 84857 3.4095 libansys4.so saxpy_pcg 58637 2.3560 libansys4.so .st4560 46612 1.8728 libansys4.so .st4282 43043 1.7294 vmlinux-t copy_user_generic_string 41326 1.6604 libansys3.so blockSaxpyBackSolve6L_fd 41288 1.6589 libansys3.so blockInnerProductBackSolve6L_fd 2. With hashdist=1 CPU: AMD64 processors, speed 2411.13 MHz (estimated) Counted L1_AND_L2_DTLB_MISSES events (L1 and L2 DTLB misses) with a unit mask of 0x00 (No unit mask) count 500 samples % app name symbol name 162993 6.9814 libansys1.so MultiFront::decompose(int, int, Elemset *, int *, int, int, int) 160799 6.8874 libansys3.so blockInnerProduct6L_fd 160459 6.8729 libansys3.so blockSaxpy6L_fd 156018 6.6826 libansys3.so maxb33_ 84700 3.6279 libansys4.so saxpy_pcg 83434 3.5737 libansys1.so elmatrixmultpcg_ 58074 2.4875 libansys4.so .st4560 46000 1.9703 libansys4.so .st4282 41166 1.7632 libansys3.so blockSaxpyBackSolve6L_fd 41033 1.7575 libansys3.so blockInnerProductBackSolve6L_fd 35762 1.5318 libansys1.so inner_product_sub 35591 1.5245 libansys1.so inner_product_sub2 28259 1.2104 libansys4.so addVectors Signed-off-by: Pravin B. Shelar Signed-off-by: Ravikiran Thirumalai Signed-off-by: Shai Fultheim Signed-off-by: Andi Kleen Acked-by: Christoph Lameter Cc: Andi Kleen Signed-off-by: Andrew Morton --- include/linux/bootmem.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/linux/bootmem.h b/include/linux/bootmem.h index 81c07cd..0365ec9 100644 --- a/include/linux/bootmem.h +++ b/include/linux/bootmem.h @@ -122,9 +122,9 @@ extern void *alloc_large_system_hash(const char *tablename, #define HASH_EARLY 0x00000001 /* Allocating during early boot? */ /* Only NUMA needs hash distribution. - * IA64 is known to have sufficient vmalloc space. + * IA64 and x86_64 have sufficient vmalloc space. */ -#if defined(CONFIG_NUMA) && defined(CONFIG_IA64) +#if defined(CONFIG_NUMA) && (defined(CONFIG_IA64) || defined(CONFIG_X86_64)) #define HASHDIST_DEFAULT 1 #else #define HASHDIST_DEFAULT 0 -- cgit v1.1 From 1833d6bc72893265f22addd79cf52e6987496e0f Mon Sep 17 00:00:00 2001 From: Vivek Goyal Date: Wed, 2 May 2007 19:27:08 +0200 Subject: [PATCH] i386: modpost apic related warning fixes o Modpost generates warnings for i386 if compiled with CONFIG_RELOCATABLE=y WARNING: vmlinux - Section mismatch: reference to .init.text:find_unisys_acpi_oem_table from .text between 'acpi_madt_oem_check' (at offset 0xc0101eda) and 'enable_apic_mode' WARNING: vmlinux - Section mismatch: reference to .init.text:acpi_get_table_header_early from .text between 'acpi_madt_oem_check' (at offset 0xc0101ef0) and 'enable_apic_mode' WARNING: vmlinux - Section mismatch: reference to .init.text:parse_unisys_oem from .text between 'acpi_madt_oem_check' (at offset 0xc0101f2e) and 'enable_apic_mode' WARNING: vmlinux - Section mismatch: reference to .init.text:setup_unisys from .text between 'acpi_madt_oem_check' (at offset 0xc0101f37) and 'enable_apic_mode'WARNING: vmlinux - Section mismatch: reference to .init.text:parse_unisys_oem from .text between 'mps_oem_check' (at offset 0xc0101ec7) and 'acpi_madt_oem_check' WARNING: vmlinux - Section mismatch: reference to .init.text:es7000_sw_apic from .text between 'enable_apic_mode' (at offset 0xc0101f48) and 'check_apicid_present' o Some functions which are inline (acpi_madt_oem_check) are not inlined by compiler as these functions are accessed using function pointer. These functions are put in .text section and they in-turn access __init type functions hence modpost generates warnings. o Do not iniline acpi_madt_oem_check, instead make it __init. Signed-off-by: Vivek Goyal Signed-off-by: Andi Kleen Cc: Andi Kleen Cc: Len Brown Signed-off-by: Andrew Morton --- include/asm-i386/mach-es7000/mach_apic.h | 7 ------- include/asm-i386/mach-es7000/mach_mpparse.h | 32 ----------------------------- 2 files changed, 39 deletions(-) (limited to 'include') diff --git a/include/asm-i386/mach-es7000/mach_apic.h b/include/asm-i386/mach-es7000/mach_apic.h index 8e8b394..2d97892 100644 --- a/include/asm-i386/mach-es7000/mach_apic.h +++ b/include/asm-i386/mach-es7000/mach_apic.h @@ -73,13 +73,6 @@ static inline void init_apic_ldr(void) apic_write_around(APIC_LDR, val); } -extern void es7000_sw_apic(void); -static inline void enable_apic_mode(void) -{ - es7000_sw_apic(); - return; -} - extern int apic_version [MAX_APICS]; static inline void setup_apic_routing(void) { diff --git a/include/asm-i386/mach-es7000/mach_mpparse.h b/include/asm-i386/mach-es7000/mach_mpparse.h index 24990e5..b9fb784 100644 --- a/include/asm-i386/mach-es7000/mach_mpparse.h +++ b/include/asm-i386/mach-es7000/mach_mpparse.h @@ -18,18 +18,6 @@ extern int parse_unisys_oem (char *oemptr); extern int find_unisys_acpi_oem_table(unsigned long *oem_addr); extern void setup_unisys(void); -static inline int mps_oem_check(struct mp_config_table *mpc, char *oem, - char *productid) -{ - if (mpc->mpc_oemptr) { - struct mp_config_oemtable *oem_table = - (struct mp_config_oemtable *)mpc->mpc_oemptr; - if (!strncmp(oem, "UNISYS", 6)) - return parse_unisys_oem((char *)oem_table); - } - return 0; -} - #ifdef CONFIG_ACPI static inline int es7000_check_dsdt(void) @@ -41,26 +29,6 @@ static inline int es7000_check_dsdt(void) return 1; return 0; } - -/* Hook from generic ACPI tables.c */ -static inline int acpi_madt_oem_check(char *oem_id, char *oem_table_id) -{ - unsigned long oem_addr; - if (!find_unisys_acpi_oem_table(&oem_addr)) { - if (es7000_check_dsdt()) - return parse_unisys_oem((char *)oem_addr); - else { - setup_unisys(); - return 1; - } - } - return 0; -} -#else -static inline int acpi_madt_oem_check(char *oem_id, char *oem_table_id) -{ - return 0; -} #endif #endif /* __ASM_MACH_MPPARSE_H */ -- cgit v1.1 From 5a90cf205c922707ffed2d8f87cefd942e96b0ba Mon Sep 17 00:00:00 2001 From: john stultz Date: Wed, 2 May 2007 19:27:08 +0200 Subject: [PATCH] x86: Log reason why TSC was marked unstable Change mark_tsc_unstable() so it takes a string argument, which holds the reason the TSC was marked unstable. This is then displayed the first time mark_tsc_unstable is called. This should help us better debug why the TSC was marked unstable on certain systems and allow us to make sure we're not being overly paranoid when throwing out this troublesome clocksource. Cc: Ingo Molnar Cc: Thomas Gleixner Cc: Andi Kleen Signed-off-by: Andrew Morton Signed-off-by: Andi Kleen --- include/asm-i386/mach-summit/mach_mpparse.h | 4 ++-- include/asm-i386/tsc.h | 2 +- include/asm-x86_64/timex.h | 2 +- 3 files changed, 4 insertions(+), 4 deletions(-) (limited to 'include') diff --git a/include/asm-i386/mach-summit/mach_mpparse.h b/include/asm-i386/mach-summit/mach_mpparse.h index 9426839..c252053 100644 --- a/include/asm-i386/mach-summit/mach_mpparse.h +++ b/include/asm-i386/mach-summit/mach_mpparse.h @@ -30,7 +30,7 @@ static inline int mps_oem_check(struct mp_config_table *mpc, char *oem, (!strncmp(productid, "VIGIL SMP", 9) || !strncmp(productid, "EXA", 3) || !strncmp(productid, "RUTHLESS SMP", 12))){ - mark_tsc_unstable(); + mark_tsc_unstable("Summit based system"); use_cyclone = 1; /*enable cyclone-timer*/ setup_summit(); return 1; @@ -44,7 +44,7 @@ static inline int acpi_madt_oem_check(char *oem_id, char *oem_table_id) if (!strncmp(oem_id, "IBM", 3) && (!strncmp(oem_table_id, "SERVIGIL", 8) || !strncmp(oem_table_id, "EXA", 3))){ - mark_tsc_unstable(); + mark_tsc_unstable("Summit based system"); use_cyclone = 1; /*enable cyclone-timer*/ setup_summit(); return 1; diff --git a/include/asm-i386/tsc.h b/include/asm-i386/tsc.h index 84016ff..3469766 100644 --- a/include/asm-i386/tsc.h +++ b/include/asm-i386/tsc.h @@ -53,7 +53,7 @@ static __always_inline cycles_t get_cycles_sync(void) } extern void tsc_init(void); -extern void mark_tsc_unstable(void); +extern void mark_tsc_unstable(char *reason); extern int unsynchronized_tsc(void); extern void init_tsc_clocksource(void); diff --git a/include/asm-x86_64/timex.h b/include/asm-x86_64/timex.h index 8c6808a..f6527e1 100644 --- a/include/asm-x86_64/timex.h +++ b/include/asm-x86_64/timex.h @@ -27,6 +27,6 @@ extern int read_current_timer(unsigned long *timer_value); #define NS_SCALE 10 /* 2^10, carefully chosen */ #define US_SCALE 32 /* 2^32, arbitralrily chosen */ -extern void mark_tsc_unstable(void); +extern void mark_tsc_unstable(char *msg); extern void set_cyc2ns_scale(unsigned long khz); #endif -- cgit v1.1 From 8b8ca80e192b10eecc01fc44a2902510af86f73b Mon Sep 17 00:00:00 2001 From: David Rientjes Date: Wed, 2 May 2007 19:27:09 +0200 Subject: [PATCH] x86-64: configurable fake numa node sizes Extends the numa=fake x86_64 command-line option to allow for configurable node sizes. These nodes can be used in conjunction with cpusets for coarse memory resource management. The old command-line option is still supported: numa=fake=32 gives 32 fake NUMA nodes, ignoring the NUMA setup of the actual machine. But now you may configure your system for the node sizes of your choice: numa=fake=2*512,1024,2*256 gives two 512M nodes, one 1024M node, two 256M nodes, and the rest of system memory to a sixth node. The existing hash function is maintained to support the various node sizes that are possible with this implementation. Each node of the same size receives roughly the same amount of available pages, regardless of any reserved memory with its address range. The total available pages on the system is calculated and divided by the number of equal nodes to allocate. These nodes are then dynamically allocated and their borders extended until such time as their number of available pages reaches the required size. Configurable node sizes are recommended when used in conjunction with cpusets for memory control because it eliminates the overhead associated with scanning the zonelists of many smaller full nodes on page_alloc(). Cc: Andi Kleen Signed-off-by: David Rientjes Signed-off-by: Andi Kleen Cc: Paul Jackson Cc: Christoph Lameter Signed-off-by: Andrew Morton --- include/asm-x86_64/mmzone.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/asm-x86_64/mmzone.h b/include/asm-x86_64/mmzone.h index fb558fb..19a8937 100644 --- a/include/asm-x86_64/mmzone.h +++ b/include/asm-x86_64/mmzone.h @@ -49,7 +49,7 @@ extern int pfn_valid(unsigned long pfn); #ifdef CONFIG_NUMA_EMU #define FAKE_NODE_MIN_SIZE (64*1024*1024) -#define FAKE_NODE_MIN_HASH_MASK (~(FAKE_NODE_MIN_SIZE - 1ul)) +#define FAKE_NODE_MIN_HASH_MASK (~(FAKE_NODE_MIN_SIZE - 1uL)) #endif #endif -- cgit v1.1 From 692174b97d5b871f4b0f648b1fb17aa37b955876 Mon Sep 17 00:00:00 2001 From: Rusty Russell Date: Wed, 2 May 2007 19:27:09 +0200 Subject: [PATCH] i386: Initialize esp0 properly all the time Whenever we schedule, __switch_to calls load_esp0 which does: tss->esp0 = thread->esp0; This is never initialized for the initial thread (ie "swapper"), so when we're scheduling that, we end up setting esp0 to 0. This is fine: the swapper never leaves ring 0, so this field is never used. lguest, however, gets upset that we're trying to used an unmapped page as our kernel stack. Rather than work around it there, let's initialize it. Signed-off-by: Rusty Russell Signed-off-by: Andi Kleen Cc: Andi Kleen Signed-off-by: Andrew Morton --- include/asm-i386/processor.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/asm-i386/processor.h b/include/asm-i386/processor.h index 11bf899..01ae0ff 100644 --- a/include/asm-i386/processor.h +++ b/include/asm-i386/processor.h @@ -421,6 +421,7 @@ struct thread_struct { }; #define INIT_THREAD { \ + .esp0 = sizeof(init_stack) + (long)&init_stack, \ .vm86_info = NULL, \ .sysenter_cs = __KERNEL_CS, \ .io_bitmap_ptr = NULL, \ -- cgit v1.1 From eab0c72aecd7982b2c848f7d493ba379efcef15e Mon Sep 17 00:00:00 2001 From: Rusty Russell Date: Wed, 2 May 2007 19:27:09 +0200 Subject: [PATCH] x86-64: Introduce load_TLS to the "for" loop. GCC (4.1 at least) unrolls it anyway, but I can't believe this code was ever justifiable. (I've also submitted a patch which cleans up i386, which is even uglier). Signed-off-by: Rusty Russell Signed-off-by: Andi Kleen Cc: Andi Kleen Signed-off-by: Andrew Morton --- include/asm-x86_64/desc.h | 11 ++++------- 1 file changed, 4 insertions(+), 7 deletions(-) (limited to 'include') diff --git a/include/asm-x86_64/desc.h b/include/asm-x86_64/desc.h index 7726e74..ac991b5 100644 --- a/include/asm-x86_64/desc.h +++ b/include/asm-x86_64/desc.h @@ -135,16 +135,13 @@ static inline void set_ldt_desc(unsigned cpu, void *addr, int size) (info)->useable == 0 && \ (info)->lm == 0) -#if TLS_SIZE != 24 -# error update this code. -#endif - static inline void load_TLS(struct thread_struct *t, unsigned int cpu) { + unsigned int i; u64 *gdt = (u64 *)(cpu_gdt(cpu) + GDT_ENTRY_TLS_MIN); - gdt[0] = t->tls_array[0]; - gdt[1] = t->tls_array[1]; - gdt[2] = t->tls_array[2]; + + for (i = 0; i < GDT_ENTRY_TLS_ENTRIES; i++) + gdt[i] = t->tls_array[i]; } /* -- cgit v1.1 From 79e030114a8d97a1dcd593ab84fb986f8c91c536 Mon Sep 17 00:00:00 2001 From: Ian Campbell Date: Wed, 2 May 2007 19:27:09 +0200 Subject: [PATCH] i386: Allow i386 crash kernels to handle x86_64 dumps The specific case I am encountering is kdump under Xen with a 64 bit hypervisor and 32 bit kernel/userspace. The dump created is 64 bit due to the hypervisor but the dump kernel is 32 bit for maximum compatibility. It's possibly less likely to be useful in a purely native scenario but I see no reason to disallow it. [akpm@linux-foundation.org: build fix] Signed-off-by: Ian Campbell Signed-off-by: Andi Kleen Acked-by: Vivek Goyal Cc: Horms Cc: Magnus Damm Cc: "Eric W. Biederman" Cc: Andi Kleen Signed-off-by: Andrew Morton --- include/asm-i386/kexec.h | 3 +++ include/linux/crash_dump.h | 8 ++++++++ 2 files changed, 11 insertions(+) (limited to 'include') diff --git a/include/asm-i386/kexec.h b/include/asm-i386/kexec.h index c5b4ab9..bcb5b21 100644 --- a/include/asm-i386/kexec.h +++ b/include/asm-i386/kexec.h @@ -42,6 +42,9 @@ /* The native architecture */ #define KEXEC_ARCH KEXEC_ARCH_386 +/* We can also handle crash dumps from 64 bit kernel. */ +#define vmcore_elf_check_arch_cross(x) ((x)->e_machine == EM_X86_64) + #define MAX_NOTE_BYTES 1024 /* CPU does not save ss and esp on stack if execution is already diff --git a/include/linux/crash_dump.h b/include/linux/crash_dump.h index 3250365..22c7ac5 100644 --- a/include/linux/crash_dump.h +++ b/include/linux/crash_dump.h @@ -14,5 +14,13 @@ extern ssize_t copy_oldmem_page(unsigned long, char *, size_t, extern const struct file_operations proc_vmcore_operations; extern struct proc_dir_entry *proc_vmcore; +/* Architecture code defines this if there are other possible ELF + * machine types, e.g. on bi-arch capable hardware. */ +#ifndef vmcore_elf_check_arch_cross +#define vmcore_elf_check_arch_cross(x) 0 +#endif + +#define vmcore_elf_check_arch(x) (elf_check_arch(x) || vmcore_elf_check_arch_cross(x)) + #endif /* CONFIG_CRASH_DUMP */ #endif /* LINUX_CRASHDUMP_H */ -- cgit v1.1 From ae1ee11be77f51cedb6c569887dddc70c163ab6d Mon Sep 17 00:00:00 2001 From: Rusty Russell Date: Wed, 2 May 2007 19:27:10 +0200 Subject: [PATCH] i386: Use per-cpu variables for GDT, PDA Allocating PDA and GDT at boot is a pain. Using simple per-cpu variables adds happiness (although we need the GDT page-aligned for Xen, which we do in a followup patch). [akpm@linux-foundation.org: build fix] Signed-off-by: Rusty Russell Signed-off-by: Andi Kleen Cc: Andi Kleen Signed-off-by: Andrew Morton --- include/asm-generic/percpu.h | 1 + include/asm-i386/desc.h | 1 + include/asm-i386/pda.h | 7 +++---- include/asm-i386/processor.h | 2 +- 4 files changed, 6 insertions(+), 5 deletions(-) (limited to 'include') diff --git a/include/asm-generic/percpu.h b/include/asm-generic/percpu.h index 1963762..d984a90 100644 --- a/include/asm-generic/percpu.h +++ b/include/asm-generic/percpu.h @@ -1,6 +1,7 @@ #ifndef _ASM_GENERIC_PERCPU_H_ #define _ASM_GENERIC_PERCPU_H_ #include +#include #define __GENERIC_PER_CPU #ifdef CONFIG_SMP diff --git a/include/asm-i386/desc.h b/include/asm-i386/desc.h index 050831f..53c5916 100644 --- a/include/asm-i386/desc.h +++ b/include/asm-i386/desc.h @@ -22,6 +22,7 @@ struct Xgt_desc_struct { extern struct Xgt_desc_struct idt_descr; DECLARE_PER_CPU(struct Xgt_desc_struct, cpu_gdt_descr); +DECLARE_PER_CPU(struct desc_struct, cpu_gdt[GDT_ENTRIES]); extern struct Xgt_desc_struct early_gdt_descr; static inline struct desc_struct *get_cpu_gdt_table(unsigned int cpu) diff --git a/include/asm-i386/pda.h b/include/asm-i386/pda.h index b12d59a..aef7f73 100644 --- a/include/asm-i386/pda.h +++ b/include/asm-i386/pda.h @@ -8,6 +8,7 @@ #include #include +#include struct i386_pda { @@ -18,10 +19,8 @@ struct i386_pda struct pt_regs *irq_regs; }; -extern struct i386_pda *_cpu_pda[]; - -#define cpu_pda(i) (_cpu_pda[i]) - +DECLARE_PER_CPU(struct i386_pda, _cpu_pda); +#define cpu_pda(i) (&per_cpu(_cpu_pda, (i))) #define pda_offset(field) offsetof(struct i386_pda, field) extern void __bad_pda_field(void); diff --git a/include/asm-i386/processor.h b/include/asm-i386/processor.h index 01ae0ff..cd940be 100644 --- a/include/asm-i386/processor.h +++ b/include/asm-i386/processor.h @@ -743,7 +743,7 @@ extern unsigned long boot_option_idle_override; extern void enable_sep_cpu(void); extern int sysenter_setup(void); -extern int init_gdt(int cpu, struct task_struct *idle); +extern void init_gdt(int cpu, struct task_struct *idle); extern void cpu_set_gdt(int); extern void secondary_cpu_init(void); -- cgit v1.1 From bf50467204b435421d8de33ad080fa46c6f3d50b Mon Sep 17 00:00:00 2001 From: Rusty Russell Date: Wed, 2 May 2007 19:27:10 +0200 Subject: [PATCH] i386: Use per-cpu GDT immediately upon boot Now we are no longer dynamically allocating the GDT, we don't need the "cpu_gdt_table" at all: we can switch straight from "boot_gdt_table" to the per-cpu GDT. This means initializing the cpu_gdt array in C. The boot CPU uses the per-cpu var directly, then in smp_prepare_cpus() it switches to the per-cpu copy just allocated. For secondary CPUs, the early_gdt_descr is set to point directly to their per-cpu copy. For UP the code is very simple: it keeps using the "per-cpu" GDT as per SMP, but we never have to move. Signed-off-by: Rusty Russell Signed-off-by: Andi Kleen Cc: Andi Kleen Signed-off-by: Andrew Morton --- include/asm-i386/desc.h | 2 -- include/asm-i386/processor.h | 1 - 2 files changed, 3 deletions(-) (limited to 'include') diff --git a/include/asm-i386/desc.h b/include/asm-i386/desc.h index 53c5916..a75ae6b 100644 --- a/include/asm-i386/desc.h +++ b/include/asm-i386/desc.h @@ -12,8 +12,6 @@ #include -extern struct desc_struct cpu_gdt_table[GDT_ENTRIES]; - struct Xgt_desc_struct { unsigned short size; unsigned long address __attribute__((packed)); diff --git a/include/asm-i386/processor.h b/include/asm-i386/processor.h index cd940be..b25a2f5 100644 --- a/include/asm-i386/processor.h +++ b/include/asm-i386/processor.h @@ -743,7 +743,6 @@ extern unsigned long boot_option_idle_override; extern void enable_sep_cpu(void); extern int sysenter_setup(void); -extern void init_gdt(int cpu, struct task_struct *idle); extern void cpu_set_gdt(int); extern void secondary_cpu_init(void); -- cgit v1.1 From d2cbcc49e2bfd6eaa44d7e4e5e5f171aaa5ec80d Mon Sep 17 00:00:00 2001 From: Rusty Russell Date: Wed, 2 May 2007 19:27:10 +0200 Subject: [PATCH] i386: clean up cpu_init() We now have cpu_init() and secondary_cpu_init() doing nothing but calling _cpu_init() with the same arguments. Rename _cpu_init() to cpu_init() and use it as a replcement for secondary_cpu_init(). Signed-off-by: Rusty Russell Signed-off-by: Andi Kleen Cc: Andi Kleen Signed-off-by: Andrew Morton --- include/asm-i386/processor.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/asm-i386/processor.h b/include/asm-i386/processor.h index b25a2f5..80f7e8a 100644 --- a/include/asm-i386/processor.h +++ b/include/asm-i386/processor.h @@ -744,6 +744,6 @@ extern void enable_sep_cpu(void); extern int sysenter_setup(void); extern void cpu_set_gdt(int); -extern void secondary_cpu_init(void); +extern void cpu_init(void); #endif /* __ASM_I386_PROCESSOR_H */ -- cgit v1.1 From 90a0a06aa81692028864c21f981905fda46b1208 Mon Sep 17 00:00:00 2001 From: Rusty Russell Date: Wed, 2 May 2007 19:27:10 +0200 Subject: [PATCH] i386: rationalize paravirt wrappers paravirt.c used to implement native versions of all low-level functions. Far cleaner is to have the native versions exposed in the headers and as inline native_XXX, and if !CONFIG_PARAVIRT, then simply #define XXX native_XXX. There are several nice side effects: 1) write_dt_entry() now takes the correct "struct Xgt_desc_struct *" not "void *". 2) load_TLS is reintroduced to the for loop, not manually unrolled with a #error in case the bounds ever change. 3) Macros become inlines, with type checking. 4) Access to the native versions is trivial for KVM, lguest, Xen and others who might want it. Signed-off-by: Jeremy Fitzhardinge Signed-off-by: Rusty Russell Signed-off-by: Andi Kleen Cc: Andi Kleen Cc: Avi Kivity Signed-off-by: Andrew Morton --- include/asm-i386/desc.h | 82 +++++++++++++++------- include/asm-i386/io.h | 15 ++-- include/asm-i386/irqflags.h | 61 +++++++++++----- include/asm-i386/msr.h | 163 +++++++++++++++++++++++++++++-------------- include/asm-i386/paravirt.h | 17 ++--- include/asm-i386/processor.h | 94 +++++++++++++++++++------ include/asm-i386/system.h | 139 ++++++++++++++++++++++-------------- 7 files changed, 384 insertions(+), 187 deletions(-) (limited to 'include') diff --git a/include/asm-i386/desc.h b/include/asm-i386/desc.h index a75ae6b..13f701e 100644 --- a/include/asm-i386/desc.h +++ b/include/asm-i386/desc.h @@ -57,45 +57,33 @@ static inline void pack_gate(__u32 *a, __u32 *b, #ifdef CONFIG_PARAVIRT #include #else -#define load_TR_desc() __asm__ __volatile__("ltr %w0"::"q" (GDT_ENTRY_TSS*8)) - -#define load_gdt(dtr) __asm__ __volatile("lgdt %0"::"m" (*dtr)) -#define load_idt(dtr) __asm__ __volatile("lidt %0"::"m" (*dtr)) +#define load_TR_desc() native_load_tr_desc() +#define load_gdt(dtr) native_load_gdt(dtr) +#define load_idt(dtr) native_load_idt(dtr) #define load_tr(tr) __asm__ __volatile("ltr %0"::"m" (tr)) #define load_ldt(ldt) __asm__ __volatile("lldt %0"::"m" (ldt)) -#define store_gdt(dtr) __asm__ ("sgdt %0":"=m" (*dtr)) -#define store_idt(dtr) __asm__ ("sidt %0":"=m" (*dtr)) -#define store_tr(tr) __asm__ ("str %0":"=m" (tr)) +#define store_gdt(dtr) native_store_gdt(dtr) +#define store_idt(dtr) native_store_idt(dtr) +#define store_tr(tr) (tr = native_store_tr()) #define store_ldt(ldt) __asm__ ("sldt %0":"=m" (ldt)) -#if TLS_SIZE != 24 -# error update this code. -#endif - -static inline void load_TLS(struct thread_struct *t, unsigned int cpu) -{ -#define C(i) get_cpu_gdt_table(cpu)[GDT_ENTRY_TLS_MIN + i] = t->tls_array[i] - C(0); C(1); C(2); -#undef C -} +#define load_TLS(t, cpu) native_load_tls(t, cpu) +#define set_ldt native_set_ldt #define write_ldt_entry(dt, entry, a, b) write_dt_entry(dt, entry, a, b) #define write_gdt_entry(dt, entry, a, b) write_dt_entry(dt, entry, a, b) #define write_idt_entry(dt, entry, a, b) write_dt_entry(dt, entry, a, b) +#endif -static inline void write_dt_entry(void *dt, int entry, __u32 entry_a, __u32 entry_b) +static inline void write_dt_entry(struct desc_struct *dt, + int entry, u32 entry_low, u32 entry_high) { - __u32 *lp = (__u32 *)((char *)dt + entry*8); - *lp = entry_a; - *(lp+1) = entry_b; + dt[entry].a = entry_low; + dt[entry].b = entry_high; } -#define set_ldt native_set_ldt -#endif /* CONFIG_PARAVIRT */ - -static inline fastcall void native_set_ldt(const void *addr, - unsigned int entries) +static inline void native_set_ldt(const void *addr, unsigned int entries) { if (likely(entries == 0)) __asm__ __volatile__("lldt %w0"::"q" (0)); @@ -111,6 +99,48 @@ static inline fastcall void native_set_ldt(const void *addr, } } + +static inline void native_load_tr_desc(void) +{ + asm volatile("ltr %w0"::"q" (GDT_ENTRY_TSS*8)); +} + +static inline void native_load_gdt(const struct Xgt_desc_struct *dtr) +{ + asm volatile("lgdt %0"::"m" (*dtr)); +} + +static inline void native_load_idt(const struct Xgt_desc_struct *dtr) +{ + asm volatile("lidt %0"::"m" (*dtr)); +} + +static inline void native_store_gdt(struct Xgt_desc_struct *dtr) +{ + asm ("sgdt %0":"=m" (*dtr)); +} + +static inline void native_store_idt(struct Xgt_desc_struct *dtr) +{ + asm ("sidt %0":"=m" (*dtr)); +} + +static inline unsigned long native_store_tr(void) +{ + unsigned long tr; + asm ("str %0":"=r" (tr)); + return tr; +} + +static inline void native_load_tls(struct thread_struct *t, unsigned int cpu) +{ + unsigned int i; + struct desc_struct *gdt = get_cpu_gdt_table(cpu); + + for (i = 0; i < GDT_ENTRY_TLS_ENTRIES; i++) + gdt[GDT_ENTRY_TLS_MIN + i] = t->tls_array[i]; +} + static inline void _set_gate(int gate, unsigned int type, void *addr, unsigned short seg) { __u32 a, b; diff --git a/include/asm-i386/io.h b/include/asm-i386/io.h index 59fe616..e797586 100644 --- a/include/asm-i386/io.h +++ b/include/asm-i386/io.h @@ -250,19 +250,22 @@ static inline void flush_write_buffers(void) #endif /* __KERNEL__ */ +static inline void native_io_delay(void) +{ + asm volatile("outb %%al,$0x80" : : : "memory"); +} + #if defined(CONFIG_PARAVIRT) #include #else -#define __SLOW_DOWN_IO "outb %%al,$0x80;" - static inline void slow_down_io(void) { - __asm__ __volatile__( - __SLOW_DOWN_IO + native_io_delay(); #ifdef REALLY_SLOW_IO - __SLOW_DOWN_IO __SLOW_DOWN_IO __SLOW_DOWN_IO + native_io_delay(); + native_io_delay(); + native_io_delay(); #endif - : : ); } #endif diff --git a/include/asm-i386/irqflags.h b/include/asm-i386/irqflags.h index 17b18cf..c1cdd09 100644 --- a/include/asm-i386/irqflags.h +++ b/include/asm-i386/irqflags.h @@ -10,6 +10,42 @@ #ifndef _ASM_IRQFLAGS_H #define _ASM_IRQFLAGS_H +#ifndef __ASSEMBLY__ +static inline unsigned long native_save_fl(void) +{ + unsigned long f; + asm volatile("pushfl ; popl %0":"=g" (f): /* no input */); + return f; +} + +static inline void native_restore_fl(unsigned long f) +{ + asm volatile("pushl %0 ; popfl": /* no output */ + :"g" (f) + :"memory", "cc"); +} + +static inline void native_irq_disable(void) +{ + asm volatile("cli": : :"memory"); +} + +static inline void native_irq_enable(void) +{ + asm volatile("sti": : :"memory"); +} + +static inline void native_safe_halt(void) +{ + asm volatile("sti; hlt": : :"memory"); +} + +static inline void native_halt(void) +{ + asm volatile("hlt": : :"memory"); +} +#endif /* __ASSEMBLY__ */ + #ifdef CONFIG_PARAVIRT #include #else @@ -17,35 +53,22 @@ static inline unsigned long __raw_local_save_flags(void) { - unsigned long flags; - - __asm__ __volatile__( - "pushfl ; popl %0" - : "=g" (flags) - : /* no input */ - ); - - return flags; + return native_save_fl(); } static inline void raw_local_irq_restore(unsigned long flags) { - __asm__ __volatile__( - "pushl %0 ; popfl" - : /* no output */ - :"g" (flags) - :"memory", "cc" - ); + native_restore_fl(flags); } static inline void raw_local_irq_disable(void) { - __asm__ __volatile__("cli" : : : "memory"); + native_irq_disable(); } static inline void raw_local_irq_enable(void) { - __asm__ __volatile__("sti" : : : "memory"); + native_irq_enable(); } /* @@ -54,7 +77,7 @@ static inline void raw_local_irq_enable(void) */ static inline void raw_safe_halt(void) { - __asm__ __volatile__("sti; hlt" : : : "memory"); + native_safe_halt(); } /* @@ -63,7 +86,7 @@ static inline void raw_safe_halt(void) */ static inline void halt(void) { - __asm__ __volatile__("hlt": : :"memory"); + native_halt(); } /* diff --git a/include/asm-i386/msr.h b/include/asm-i386/msr.h index 2ad3f30..00acaa8 100644 --- a/include/asm-i386/msr.h +++ b/include/asm-i386/msr.h @@ -1,6 +1,74 @@ #ifndef __ASM_MSR_H #define __ASM_MSR_H +#include + +static inline unsigned long long native_read_msr(unsigned int msr) +{ + unsigned long long val; + + asm volatile("rdmsr" : "=A" (val) : "c" (msr)); + return val; +} + +static inline unsigned long long native_read_msr_safe(unsigned int msr, + int *err) +{ + unsigned long long val; + + asm volatile("2: rdmsr ; xorl %0,%0\n" + "1:\n\t" + ".section .fixup,\"ax\"\n\t" + "3: movl %3,%0 ; jmp 1b\n\t" + ".previous\n\t" + ".section __ex_table,\"a\"\n" + " .align 4\n\t" + " .long 2b,3b\n\t" + ".previous" + : "=r" (*err), "=A" (val) + : "c" (msr), "i" (-EFAULT)); + + return val; +} + +static inline void native_write_msr(unsigned int msr, unsigned long long val) +{ + asm volatile("wrmsr" : : "c" (msr), "A"(val)); +} + +static inline int native_write_msr_safe(unsigned int msr, + unsigned long long val) +{ + int err; + asm volatile("2: wrmsr ; xorl %0,%0\n" + "1:\n\t" + ".section .fixup,\"ax\"\n\t" + "3: movl %4,%0 ; jmp 1b\n\t" + ".previous\n\t" + ".section __ex_table,\"a\"\n" + " .align 4\n\t" + " .long 2b,3b\n\t" + ".previous" + : "=a" (err) + : "c" (msr), "0" ((u32)val), "d" ((u32)(val>>32)), + "i" (-EFAULT)); + return err; +} + +static inline unsigned long long native_read_tsc(void) +{ + unsigned long long val; + asm volatile("rdtsc" : "=A" (val)); + return val; +} + +static inline unsigned long long native_read_pmc(void) +{ + unsigned long long val; + asm volatile("rdpmc" : "=A" (val)); + return val; +} + #ifdef CONFIG_PARAVIRT #include #else @@ -11,22 +79,20 @@ * pointer indirection), this allows gcc to optimize better */ -#define rdmsr(msr,val1,val2) \ - __asm__ __volatile__("rdmsr" \ - : "=a" (val1), "=d" (val2) \ - : "c" (msr)) +#define rdmsr(msr,val1,val2) \ + do { \ + unsigned long long __val = native_read_msr(msr); \ + val1 = __val; \ + val2 = __val >> 32; \ + } while(0) -#define wrmsr(msr,val1,val2) \ - __asm__ __volatile__("wrmsr" \ - : /* no outputs */ \ - : "c" (msr), "a" (val1), "d" (val2)) +#define wrmsr(msr,val1,val2) \ + native_write_msr(msr, ((unsigned long long)val2 << 32) | val1) -#define rdmsrl(msr,val) do { \ - unsigned long l__,h__; \ - rdmsr (msr, l__, h__); \ - val = l__; \ - val |= ((u64)h__<<32); \ -} while(0) +#define rdmsrl(msr,val) \ + do { \ + (val) = native_read_msr(msr); \ + } while(0) static inline void wrmsrl (unsigned long msr, unsigned long long val) { @@ -37,50 +103,41 @@ static inline void wrmsrl (unsigned long msr, unsigned long long val) } /* wrmsr with exception handling */ -#define wrmsr_safe(msr,a,b) ({ int ret__; \ - asm volatile("2: wrmsr ; xorl %0,%0\n" \ - "1:\n\t" \ - ".section .fixup,\"ax\"\n\t" \ - "3: movl %4,%0 ; jmp 1b\n\t" \ - ".previous\n\t" \ - ".section __ex_table,\"a\"\n" \ - " .align 4\n\t" \ - " .long 2b,3b\n\t" \ - ".previous" \ - : "=a" (ret__) \ - : "c" (msr), "0" (a), "d" (b), "i" (-EFAULT));\ - ret__; }) +#define wrmsr_safe(msr,val1,val2) \ + (native_write_msr_safe(msr, ((unsigned long long)val2 << 32) | val1)) /* rdmsr with exception handling */ -#define rdmsr_safe(msr,a,b) ({ int ret__; \ - asm volatile("2: rdmsr ; xorl %0,%0\n" \ - "1:\n\t" \ - ".section .fixup,\"ax\"\n\t" \ - "3: movl %4,%0 ; jmp 1b\n\t" \ - ".previous\n\t" \ - ".section __ex_table,\"a\"\n" \ - " .align 4\n\t" \ - " .long 2b,3b\n\t" \ - ".previous" \ - : "=r" (ret__), "=a" (*(a)), "=d" (*(b)) \ - : "c" (msr), "i" (-EFAULT));\ - ret__; }) - -#define rdtsc(low,high) \ - __asm__ __volatile__("rdtsc" : "=a" (low), "=d" (high)) - -#define rdtscl(low) \ - __asm__ __volatile__("rdtsc" : "=a" (low) : : "edx") - -#define rdtscll(val) \ - __asm__ __volatile__("rdtsc" : "=A" (val)) +#define rdmsr_safe(msr,p1,p2) \ + ({ \ + int __err; \ + unsigned long long __val = native_read_msr_safe(msr, &__err);\ + (*p1) = __val; \ + (*p2) = __val >> 32; \ + __err; \ + }) + +#define rdtsc(low,high) \ + do { \ + u64 _l = native_read_tsc(); \ + (low) = (u32)_l; \ + (high) = _l >> 32; \ + } while(0) + +#define rdtscl(low) \ + do { \ + (low) = native_read_tsc(); \ + } while(0) + +#define rdtscll(val) ((val) = native_read_tsc()) #define write_tsc(val1,val2) wrmsr(0x10, val1, val2) -#define rdpmc(counter,low,high) \ - __asm__ __volatile__("rdpmc" \ - : "=a" (low), "=d" (high) \ - : "c" (counter)) +#define rdpmc(counter,low,high) \ + do { \ + u64 _l = native_read_pmc(); \ + low = (u32)_l; \ + high = _l >> 32; \ + } while(0) #endif /* !CONFIG_PARAVIRT */ #ifdef CONFIG_SMP diff --git a/include/asm-i386/paravirt.h b/include/asm-i386/paravirt.h index e63f1e44..32acebc 100644 --- a/include/asm-i386/paravirt.h +++ b/include/asm-i386/paravirt.h @@ -29,6 +29,7 @@ struct thread_struct; struct Xgt_desc_struct; struct tss_struct; struct mm_struct; +struct desc_struct; struct paravirt_ops { unsigned int kernel_rpl; @@ -105,14 +106,13 @@ struct paravirt_ops void (*set_ldt)(const void *desc, unsigned entries); unsigned long (*store_tr)(void); void (*load_tls)(struct thread_struct *t, unsigned int cpu); - void (*write_ldt_entry)(void *dt, int entrynum, - u32 low, u32 high); - void (*write_gdt_entry)(void *dt, int entrynum, - u32 low, u32 high); - void (*write_idt_entry)(void *dt, int entrynum, - u32 low, u32 high); - void (*load_esp0)(struct tss_struct *tss, - struct thread_struct *thread); + void (*write_ldt_entry)(struct desc_struct *, + int entrynum, u32 low, u32 high); + void (*write_gdt_entry)(struct desc_struct *, + int entrynum, u32 low, u32 high); + void (*write_idt_entry)(struct desc_struct *, + int entrynum, u32 low, u32 high); + void (*load_esp0)(struct tss_struct *tss, struct thread_struct *t); void (*set_iopl_mask)(unsigned mask); @@ -232,6 +232,7 @@ static inline void halt(void) #define get_kernel_rpl() (paravirt_ops.kernel_rpl) +/* These should all do BUG_ON(_err), but our headers are too tangled. */ #define rdmsr(msr,val1,val2) do { \ int _err; \ u64 _l = paravirt_ops.read_msr(msr,&_err); \ diff --git a/include/asm-i386/processor.h b/include/asm-i386/processor.h index 80f7e8a..96edfdf 100644 --- a/include/asm-i386/processor.h +++ b/include/asm-i386/processor.h @@ -147,7 +147,7 @@ static inline void detect_ht(struct cpuinfo_x86 *c) {} #define X86_EFLAGS_VIP 0x00100000 /* Virtual Interrupt Pending */ #define X86_EFLAGS_ID 0x00200000 /* CPUID detection flag */ -static inline fastcall void native_cpuid(unsigned int *eax, unsigned int *ebx, +static inline void native_cpuid(unsigned int *eax, unsigned int *ebx, unsigned int *ecx, unsigned int *edx) { /* ecx is often an input as well as an output. */ @@ -545,13 +545,7 @@ static inline void rep_nop(void) #define cpu_relax() rep_nop() -#ifdef CONFIG_PARAVIRT -#include -#else -#define paravirt_enabled() 0 -#define __cpuid native_cpuid - -static inline void load_esp0(struct tss_struct *tss, struct thread_struct *thread) +static inline void native_load_esp0(struct tss_struct *tss, struct thread_struct *thread) { tss->esp0 = thread->esp0; /* This can only happen when SEP is enabled, no need to test "SEP"arately */ @@ -561,24 +555,60 @@ static inline void load_esp0(struct tss_struct *tss, struct thread_struct *threa } } -/* - * These special macros can be used to get or set a debugging register - */ -#define get_debugreg(var, register) \ - __asm__("movl %%db" #register ", %0" \ - :"=r" (var)) -#define set_debugreg(value, register) \ - __asm__("movl %0,%%db" #register \ - : /* no output */ \ - :"r" (value)) -#define set_iopl_mask native_set_iopl_mask -#endif /* CONFIG_PARAVIRT */ +static inline unsigned long native_get_debugreg(int regno) +{ + unsigned long val = 0; /* Damn you, gcc! */ + + switch (regno) { + case 0: + asm("movl %%db0, %0" :"=r" (val)); break; + case 1: + asm("movl %%db1, %0" :"=r" (val)); break; + case 2: + asm("movl %%db2, %0" :"=r" (val)); break; + case 3: + asm("movl %%db3, %0" :"=r" (val)); break; + case 6: + asm("movl %%db6, %0" :"=r" (val)); break; + case 7: + asm("movl %%db7, %0" :"=r" (val)); break; + default: + BUG(); + } + return val; +} + +static inline void native_set_debugreg(int regno, unsigned long value) +{ + switch (regno) { + case 0: + asm("movl %0,%%db0" : /* no output */ :"r" (value)); + break; + case 1: + asm("movl %0,%%db1" : /* no output */ :"r" (value)); + break; + case 2: + asm("movl %0,%%db2" : /* no output */ :"r" (value)); + break; + case 3: + asm("movl %0,%%db3" : /* no output */ :"r" (value)); + break; + case 6: + asm("movl %0,%%db6" : /* no output */ :"r" (value)); + break; + case 7: + asm("movl %0,%%db7" : /* no output */ :"r" (value)); + break; + default: + BUG(); + } +} /* * Set IOPL bits in EFLAGS from given mask */ -static fastcall inline void native_set_iopl_mask(unsigned mask) +static inline void native_set_iopl_mask(unsigned mask) { unsigned int reg; __asm__ __volatile__ ("pushfl;" @@ -591,6 +621,28 @@ static fastcall inline void native_set_iopl_mask(unsigned mask) : "i" (~X86_EFLAGS_IOPL), "r" (mask)); } +#ifdef CONFIG_PARAVIRT +#include +#else +#define paravirt_enabled() 0 +#define __cpuid native_cpuid + +static inline void load_esp0(struct tss_struct *tss, struct thread_struct *thread) +{ + native_load_esp0(tss, thread); +} + +/* + * These special macros can be used to get or set a debugging register + */ +#define get_debugreg(var, register) \ + (var) = native_get_debugreg(register) +#define set_debugreg(value, register) \ + native_set_debugreg(register, value) + +#define set_iopl_mask native_set_iopl_mask +#endif /* CONFIG_PARAVIRT */ + /* * Generic CPUID function * clear %ecx since some cpus (Cyrix MII) do not set or clear %ecx diff --git a/include/asm-i386/system.h b/include/asm-i386/system.h index a6d20d9..c3a58c0 100644 --- a/include/asm-i386/system.h +++ b/include/asm-i386/system.h @@ -88,65 +88,96 @@ __asm__ __volatile__ ("movw %%dx,%1\n\t" \ #define savesegment(seg, value) \ asm volatile("mov %%" #seg ",%0":"=rm" (value)) + +static inline void native_clts(void) +{ + asm volatile ("clts"); +} + +static inline unsigned long native_read_cr0(void) +{ + unsigned long val; + asm volatile("movl %%cr0,%0\n\t" :"=r" (val)); + return val; +} + +static inline void native_write_cr0(unsigned long val) +{ + asm volatile("movl %0,%%cr0": :"r" (val)); +} + +static inline unsigned long native_read_cr2(void) +{ + unsigned long val; + asm volatile("movl %%cr2,%0\n\t" :"=r" (val)); + return val; +} + +static inline void native_write_cr2(unsigned long val) +{ + asm volatile("movl %0,%%cr2": :"r" (val)); +} + +static inline unsigned long native_read_cr3(void) +{ + unsigned long val; + asm volatile("movl %%cr3,%0\n\t" :"=r" (val)); + return val; +} + +static inline void native_write_cr3(unsigned long val) +{ + asm volatile("movl %0,%%cr3": :"r" (val)); +} + +static inline unsigned long native_read_cr4(void) +{ + unsigned long val; + asm volatile("movl %%cr4,%0\n\t" :"=r" (val)); + return val; +} + +static inline unsigned long native_read_cr4_safe(void) +{ + unsigned long val; + /* This could fault if %cr4 does not exist */ + asm("1: movl %%cr4, %0 \n" + "2: \n" + ".section __ex_table,\"a\" \n" + ".long 1b,2b \n" + ".previous \n" + : "=r" (val): "0" (0)); + return val; +} + +static inline void native_write_cr4(unsigned long val) +{ + asm volatile("movl %0,%%cr4": :"r" (val)); +} + +static inline void native_wbinvd(void) +{ + asm volatile("wbinvd": : :"memory"); +} + + #ifdef CONFIG_PARAVIRT #include #else -#define read_cr0() ({ \ - unsigned int __dummy; \ - __asm__ __volatile__( \ - "movl %%cr0,%0\n\t" \ - :"=r" (__dummy)); \ - __dummy; \ -}) -#define write_cr0(x) \ - __asm__ __volatile__("movl %0,%%cr0": :"r" (x)) - -#define read_cr2() ({ \ - unsigned int __dummy; \ - __asm__ __volatile__( \ - "movl %%cr2,%0\n\t" \ - :"=r" (__dummy)); \ - __dummy; \ -}) -#define write_cr2(x) \ - __asm__ __volatile__("movl %0,%%cr2": :"r" (x)) - -#define read_cr3() ({ \ - unsigned int __dummy; \ - __asm__ ( \ - "movl %%cr3,%0\n\t" \ - :"=r" (__dummy)); \ - __dummy; \ -}) -#define write_cr3(x) \ - __asm__ __volatile__("movl %0,%%cr3": :"r" (x)) - -#define read_cr4() ({ \ - unsigned int __dummy; \ - __asm__( \ - "movl %%cr4,%0\n\t" \ - :"=r" (__dummy)); \ - __dummy; \ -}) -#define read_cr4_safe() ({ \ - unsigned int __dummy; \ - /* This could fault if %cr4 does not exist */ \ - __asm__("1: movl %%cr4, %0 \n" \ - "2: \n" \ - ".section __ex_table,\"a\" \n" \ - ".long 1b,2b \n" \ - ".previous \n" \ - : "=r" (__dummy): "0" (0)); \ - __dummy; \ -}) -#define write_cr4(x) \ - __asm__ __volatile__("movl %0,%%cr4": :"r" (x)) - -#define wbinvd() \ - __asm__ __volatile__ ("wbinvd": : :"memory") +#define read_cr0() (native_read_cr0()) +#define write_cr0(x) (native_write_cr0(x)) +#define read_cr2() (native_read_cr2()) +#define write_cr2(x) (native_write_cr2(x)) +#define read_cr3() (native_read_cr3()) +#define write_cr3(x) (native_write_cr3(x)) +#define read_cr4() (native_read_cr4()) +#define read_cr4_safe() (native_read_cr4_safe()) +#define write_cr4(x) (native_write_cr4(x)) +#define wbinvd() (native_wbinvd()) /* Clear the 'TS' bit */ -#define clts() __asm__ __volatile__ ("clts") +#define clts() (native_clts()) + #endif/* CONFIG_PARAVIRT */ /* Set the 'TS' bit */ -- cgit v1.1 From d01ad8dd56527be72947b4b9997bb2c05783c3ed Mon Sep 17 00:00:00 2001 From: Jan Beulich Date: Wed, 2 May 2007 19:27:10 +0200 Subject: [PATCH] x86: Improve handling of kernel mappings in change_page_attr Fix various broken corner cases in i386 and x86-64 change_page_attr. AK: split off from tighten kernel image access rights Signed-off-by: Jan Beulich Signed-off-by: Andi Kleen --- include/asm-i386/pgtable.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include') diff --git a/include/asm-i386/pgtable.h b/include/asm-i386/pgtable.h index c3b58d4..143ddc4 100644 --- a/include/asm-i386/pgtable.h +++ b/include/asm-i386/pgtable.h @@ -159,6 +159,7 @@ void paging_init(void); extern unsigned long long __PAGE_KERNEL, __PAGE_KERNEL_EXEC; #define __PAGE_KERNEL_RO (__PAGE_KERNEL & ~_PAGE_RW) +#define __PAGE_KERNEL_RX (__PAGE_KERNEL_EXEC & ~_PAGE_RW) #define __PAGE_KERNEL_NOCACHE (__PAGE_KERNEL | _PAGE_PCD) #define __PAGE_KERNEL_LARGE (__PAGE_KERNEL | _PAGE_PSE) #define __PAGE_KERNEL_LARGE_EXEC (__PAGE_KERNEL_EXEC | _PAGE_PSE) @@ -166,6 +167,7 @@ extern unsigned long long __PAGE_KERNEL, __PAGE_KERNEL_EXEC; #define PAGE_KERNEL __pgprot(__PAGE_KERNEL) #define PAGE_KERNEL_RO __pgprot(__PAGE_KERNEL_RO) #define PAGE_KERNEL_EXEC __pgprot(__PAGE_KERNEL_EXEC) +#define PAGE_KERNEL_RX __pgprot(__PAGE_KERNEL_RX) #define PAGE_KERNEL_NOCACHE __pgprot(__PAGE_KERNEL_NOCACHE) #define PAGE_KERNEL_LARGE __pgprot(__PAGE_KERNEL_LARGE) #define PAGE_KERNEL_LARGE_EXEC __pgprot(__PAGE_KERNEL_LARGE_EXEC) -- cgit v1.1 From 6fb14755a676282a4e6caa05a08c92db8e45cfff Mon Sep 17 00:00:00 2001 From: Jan Beulich Date: Wed, 2 May 2007 19:27:10 +0200 Subject: [PATCH] x86: tighten kernel image page access rights On x86-64, kernel memory freed after init can be entirely unmapped instead of just getting 'poisoned' by overwriting with a debug pattern. On i386 and x86-64 (under CONFIG_DEBUG_RODATA), kernel text and bug table can also be write-protected. Compared to the first version, this one prevents re-creating deleted mappings in the kernel image range on x86-64, if those got removed previously. This, together with the original changes, prevents temporarily having inconsistent mappings when cacheability attributes are being changed on such pages (e.g. from AGP code). While on i386 such duplicate mappings don't exist, the same change is done there, too, both for consistency and because checking pte_present() before using various other pte_XXX functions is a requirement anyway. At once, i386 code gets adjusted to use pte_huge() instead of open coding this. AK: split out cpa() changes Signed-off-by: Jan Beulich Signed-off-by: Andi Kleen --- include/linux/poison.h | 3 --- 1 file changed, 3 deletions(-) (limited to 'include') diff --git a/include/linux/poison.h b/include/linux/poison.h index 3e628f9..89580b7 100644 --- a/include/linux/poison.h +++ b/include/linux/poison.h @@ -26,9 +26,6 @@ /********** arch/$ARCH/mm/init.c **********/ #define POISON_FREE_INITMEM 0xcc -/********** arch/x86_64/mm/init.c **********/ -#define POISON_FREE_INITDATA 0xba - /********** arch/ia64/hp/common/sba_iommu.c **********/ /* * arch/ia64/hp/common/sba_iommu.c uses a 16-byte poison string with a -- cgit v1.1 From b4531e863dbd06b5d336afefdb37483b690dea59 Mon Sep 17 00:00:00 2001 From: Andi Kleen Date: Wed, 2 May 2007 19:27:10 +0200 Subject: [PATCH] i386: Use X86_EFLAGS_IF in irqflags.h. Move X86_EFLAGS_IF et al out to a new header: processor-flags.h, so we can include it from irqflags.h and use it in raw_irqs_disabled_flags(). As a side-effect, we could now use these flags in .S files. Signed-off-by: Rusty Russell Signed-off-by: Andi Kleen --- include/asm-i386/irqflags.h | 3 ++- include/asm-i386/processor-flags.h | 26 ++++++++++++++++++++++++++ include/asm-i386/processor.h | 22 +--------------------- 3 files changed, 29 insertions(+), 22 deletions(-) create mode 100644 include/asm-i386/processor-flags.h (limited to 'include') diff --git a/include/asm-i386/irqflags.h b/include/asm-i386/irqflags.h index c1cdd09..eff8585 100644 --- a/include/asm-i386/irqflags.h +++ b/include/asm-i386/irqflags.h @@ -9,6 +9,7 @@ */ #ifndef _ASM_IRQFLAGS_H #define _ASM_IRQFLAGS_H +#include #ifndef __ASSEMBLY__ static inline unsigned long native_save_fl(void) @@ -119,7 +120,7 @@ static inline unsigned long __raw_local_irq_save(void) static inline int raw_irqs_disabled_flags(unsigned long flags) { - return !(flags & (1 << 9)); + return !(flags & X86_EFLAGS_IF); } static inline int raw_irqs_disabled(void) diff --git a/include/asm-i386/processor-flags.h b/include/asm-i386/processor-flags.h new file mode 100644 index 0000000..b4711c2 --- /dev/null +++ b/include/asm-i386/processor-flags.h @@ -0,0 +1,26 @@ +#ifndef __ASM_I386_PROCESSOR_FLAGS_H +#define __ASM_I386_PROCESSOR_FLAGS_H +/* Various flags defined: can be included from assembler. */ + +/* + * EFLAGS bits + */ +#define X86_EFLAGS_CF 0x00000001 /* Carry Flag */ +#define X86_EFLAGS_PF 0x00000004 /* Parity Flag */ +#define X86_EFLAGS_AF 0x00000010 /* Auxillary carry Flag */ +#define X86_EFLAGS_ZF 0x00000040 /* Zero Flag */ +#define X86_EFLAGS_SF 0x00000080 /* Sign Flag */ +#define X86_EFLAGS_TF 0x00000100 /* Trap Flag */ +#define X86_EFLAGS_IF 0x00000200 /* Interrupt Flag */ +#define X86_EFLAGS_DF 0x00000400 /* Direction Flag */ +#define X86_EFLAGS_OF 0x00000800 /* Overflow Flag */ +#define X86_EFLAGS_IOPL 0x00003000 /* IOPL mask */ +#define X86_EFLAGS_NT 0x00004000 /* Nested Task */ +#define X86_EFLAGS_RF 0x00010000 /* Resume Flag */ +#define X86_EFLAGS_VM 0x00020000 /* Virtual Mode */ +#define X86_EFLAGS_AC 0x00040000 /* Alignment Check */ +#define X86_EFLAGS_VIF 0x00080000 /* Virtual Interrupt Flag */ +#define X86_EFLAGS_VIP 0x00100000 /* Virtual Interrupt Pending */ +#define X86_EFLAGS_ID 0x00200000 /* CPUID detection flag */ + +#endif /* __ASM_I386_PROCESSOR_FLAGS_H */ diff --git a/include/asm-i386/processor.h b/include/asm-i386/processor.h index 96edfdf..11838df 100644 --- a/include/asm-i386/processor.h +++ b/include/asm-i386/processor.h @@ -21,6 +21,7 @@ #include #include #include +#include /* flag for disabling the tsc */ extern int tsc_disable; @@ -126,27 +127,6 @@ extern void detect_ht(struct cpuinfo_x86 *c); static inline void detect_ht(struct cpuinfo_x86 *c) {} #endif -/* - * EFLAGS bits - */ -#define X86_EFLAGS_CF 0x00000001 /* Carry Flag */ -#define X86_EFLAGS_PF 0x00000004 /* Parity Flag */ -#define X86_EFLAGS_AF 0x00000010 /* Auxillary carry Flag */ -#define X86_EFLAGS_ZF 0x00000040 /* Zero Flag */ -#define X86_EFLAGS_SF 0x00000080 /* Sign Flag */ -#define X86_EFLAGS_TF 0x00000100 /* Trap Flag */ -#define X86_EFLAGS_IF 0x00000200 /* Interrupt Flag */ -#define X86_EFLAGS_DF 0x00000400 /* Direction Flag */ -#define X86_EFLAGS_OF 0x00000800 /* Overflow Flag */ -#define X86_EFLAGS_IOPL 0x00003000 /* IOPL mask */ -#define X86_EFLAGS_NT 0x00004000 /* Nested Task */ -#define X86_EFLAGS_RF 0x00010000 /* Resume Flag */ -#define X86_EFLAGS_VM 0x00020000 /* Virtual Mode */ -#define X86_EFLAGS_AC 0x00040000 /* Alignment Check */ -#define X86_EFLAGS_VIF 0x00080000 /* Virtual Interrupt Flag */ -#define X86_EFLAGS_VIP 0x00100000 /* Virtual Interrupt Pending */ -#define X86_EFLAGS_ID 0x00200000 /* CPUID detection flag */ - static inline void native_cpuid(unsigned int *eax, unsigned int *ebx, unsigned int *ecx, unsigned int *edx) { -- cgit v1.1 From 2bff73830c3df5f575d3bc21bf19df1a10bf7091 Mon Sep 17 00:00:00 2001 From: Christoph Lameter Date: Wed, 2 May 2007 19:27:10 +0200 Subject: [PATCH] x86-64: use lru instead of page->index and page->private for pgd lists management. x86_64 currently simulates a list using the index and private fields of the page struct. Seems that the code was inherited from i386. But x86_64 does not use the slab to allocate pgds and pmds etc. So the lru field is not used by the slab and therefore available. This patch uses standard list operations on page->lru to realize pgd tracking. Signed-off-by: Christoph Lameter Signed-off-by: Andi Kleen Cc: Andi Kleen Signed-off-by: Andrew Morton --- include/asm-x86_64/pgalloc.h | 14 +++----------- include/asm-x86_64/pgtable.h | 2 +- 2 files changed, 4 insertions(+), 12 deletions(-) (limited to 'include') diff --git a/include/asm-x86_64/pgalloc.h b/include/asm-x86_64/pgalloc.h index 31d4971..8bb5646 100644 --- a/include/asm-x86_64/pgalloc.h +++ b/include/asm-x86_64/pgalloc.h @@ -44,24 +44,16 @@ static inline void pgd_list_add(pgd_t *pgd) struct page *page = virt_to_page(pgd); spin_lock(&pgd_lock); - page->index = (pgoff_t)pgd_list; - if (pgd_list) - pgd_list->private = (unsigned long)&page->index; - pgd_list = page; - page->private = (unsigned long)&pgd_list; + list_add(&page->lru, &pgd_list); spin_unlock(&pgd_lock); } static inline void pgd_list_del(pgd_t *pgd) { - struct page *next, **pprev, *page = virt_to_page(pgd); + struct page *page = virt_to_page(pgd); spin_lock(&pgd_lock); - next = (struct page *)page->index; - pprev = (struct page **)page->private; - *pprev = next; - if (next) - next->private = (unsigned long)pprev; + list_del(&page->lru); spin_unlock(&pgd_lock); } diff --git a/include/asm-x86_64/pgtable.h b/include/asm-x86_64/pgtable.h index c1865e3..599993f 100644 --- a/include/asm-x86_64/pgtable.h +++ b/include/asm-x86_64/pgtable.h @@ -410,7 +410,7 @@ static inline pte_t pte_modify(pte_t pte, pgprot_t newprot) #define __swp_entry_to_pte(x) ((pte_t) { (x).val }) extern spinlock_t pgd_lock; -extern struct page *pgd_list; +extern struct list_head pgd_list; void vmalloc_sync_all(void); extern int kern_addr_valid(unsigned long addr); -- cgit v1.1 From ca906e42312781c38b7a9625109fc65b937ca56c Mon Sep 17 00:00:00 2001 From: Adrian Bunk Date: Wed, 2 May 2007 19:27:10 +0200 Subject: [PATCH] x86: sys_ioperm() prototype cleanup - there's no reason for duplicating the prototype from include/linux/syscalls.h in include/asm-x86_64/unistd.h - every file should #include the headers containing the prototypes for it's global functions Signed-off-by: Adrian Bunk Signed-off-by: Andi Kleen --- include/asm-x86_64/unistd.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include') diff --git a/include/asm-x86_64/unistd.h b/include/asm-x86_64/unistd.h index c5f596e..576b297 100644 --- a/include/asm-x86_64/unistd.h +++ b/include/asm-x86_64/unistd.h @@ -655,7 +655,6 @@ __SYSCALL(__NR_move_pages, sys_move_pages) #include asmlinkage long sys_iopl(unsigned int level, struct pt_regs *regs); -asmlinkage long sys_ioperm(unsigned long from, unsigned long num, int turn_on); struct sigaction; asmlinkage long sys_rt_sigaction(int sig, const struct sigaction __user *act, -- cgit v1.1 From 4fbb5968810b237e81977f131986b9efd5245368 Mon Sep 17 00:00:00 2001 From: Rusty Russell Date: Wed, 2 May 2007 19:27:11 +0200 Subject: [PATCH] i386: cleanup GDT Access Now we have an explicit per-cpu GDT variable, we don't need to keep the descriptors around to use them to find the GDT: expose cpu_gdt directly. We could go further and make load_gdt() pack the descriptor for us, or even assume it means "load the current cpu's GDT" which is what it always does. Signed-off-by: Rusty Russell Signed-off-by: Andi Kleen Cc: Andi Kleen Signed-off-by: Andrew Morton --- include/asm-i386/desc.h | 7 ++----- 1 file changed, 2 insertions(+), 5 deletions(-) (limited to 'include') diff --git a/include/asm-i386/desc.h b/include/asm-i386/desc.h index 13f701e..4a97406 100644 --- a/include/asm-i386/desc.h +++ b/include/asm-i386/desc.h @@ -18,16 +18,13 @@ struct Xgt_desc_struct { unsigned short pad; } __attribute__ ((packed)); -extern struct Xgt_desc_struct idt_descr; -DECLARE_PER_CPU(struct Xgt_desc_struct, cpu_gdt_descr); DECLARE_PER_CPU(struct desc_struct, cpu_gdt[GDT_ENTRIES]); -extern struct Xgt_desc_struct early_gdt_descr; - static inline struct desc_struct *get_cpu_gdt_table(unsigned int cpu) { - return (struct desc_struct *)per_cpu(cpu_gdt_descr, cpu).address; + return per_cpu(cpu_gdt, cpu); } +extern struct Xgt_desc_struct idt_descr; extern struct desc_struct idt_table[]; extern void set_intr_gate(unsigned int irq, void * addr); -- cgit v1.1 From 01a2f435564b4baab61328b4018d36464468f57b Mon Sep 17 00:00:00 2001 From: Jeremy Fitzhardinge Date: Wed, 2 May 2007 19:27:11 +0200 Subject: [PATCH] i386: Add smp_ops interface Add a smp_ops interface. This abstracts the API defined by for use within arch/i386. The primary intent is that it be used by a paravirtualizing hypervisor to implement SMP, but it could also be used by non-APIC-using sub-architectures. This is related to CONFIG_PARAVIRT, but is implemented unconditionally since it is simpler that way and not a highly performance-sensitive interface. Signed-off-by: Jeremy Fitzhardinge Signed-off-by: Andi Kleen Cc: Andi Kleen Cc: Ingo Molnar Cc: James Bottomley --- include/asm-i386/smp.h | 53 ++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 53 insertions(+) (limited to 'include') diff --git a/include/asm-i386/smp.h b/include/asm-i386/smp.h index 9cab153..2d083cb 100644 --- a/include/asm-i386/smp.h +++ b/include/asm-i386/smp.h @@ -49,6 +49,59 @@ extern void cpu_exit_clear(void); extern void cpu_uninit(void); #endif +struct smp_ops +{ + void (*smp_prepare_boot_cpu)(void); + void (*smp_prepare_cpus)(unsigned max_cpus); + int (*cpu_up)(unsigned cpu); + void (*smp_cpus_done)(unsigned max_cpus); + + void (*smp_send_stop)(void); + void (*smp_send_reschedule)(int cpu); + int (*smp_call_function_mask)(cpumask_t mask, + void (*func)(void *info), void *info, + int wait); +}; + +extern struct smp_ops smp_ops; + +static inline void smp_prepare_boot_cpu(void) +{ + smp_ops.smp_prepare_boot_cpu(); +} +static inline void smp_prepare_cpus(unsigned int max_cpus) +{ + smp_ops.smp_prepare_cpus(max_cpus); +} +static inline int __cpu_up(unsigned int cpu) +{ + return smp_ops.cpu_up(cpu); +} +static inline void smp_cpus_done(unsigned int max_cpus) +{ + smp_ops.smp_cpus_done(max_cpus); +} + +static inline void smp_send_stop(void) +{ + smp_ops.smp_send_stop(); +} +static inline void smp_send_reschedule(int cpu) +{ + smp_ops.smp_send_reschedule(cpu); +} +static inline int smp_call_function_mask(cpumask_t mask, + void (*func) (void *info), void *info, + int wait) +{ + return smp_ops.smp_call_function_mask(mask, func, info, wait); +} + +void native_smp_prepare_boot_cpu(void); +void native_smp_prepare_cpus(unsigned int max_cpus); +int native_cpu_up(unsigned int cpunum); +void native_smp_cpus_done(unsigned int max_cpus); + #ifndef CONFIG_PARAVIRT #define startup_ipi_hook(phys_apicid, start_eip, start_esp) \ do { } while (0) -- cgit v1.1 From 07f3331c6bfd27a06dfb0ca9fa4f06dec6606876 Mon Sep 17 00:00:00 2001 From: Jeremy Fitzhardinge Date: Wed, 2 May 2007 19:27:11 +0200 Subject: [PATCH] i386: Add machine_ops interface to abstract halting and rebooting machine_ops is an interface for the machine_* functions defined in . This is intended to allow hypervisors to intercept the reboot process, but it could be used to implement other x86 subarchtecture reboots. Signed-off-by: Jeremy Fitzhardinge Signed-off-by: Andi Kleen --- include/asm-i386/reboot.h | 20 ++++++++++++++++++++ 1 file changed, 20 insertions(+) create mode 100644 include/asm-i386/reboot.h (limited to 'include') diff --git a/include/asm-i386/reboot.h b/include/asm-i386/reboot.h new file mode 100644 index 0000000..e9e3ffc --- /dev/null +++ b/include/asm-i386/reboot.h @@ -0,0 +1,20 @@ +#ifndef _ASM_REBOOT_H +#define _ASM_REBOOT_H + +struct pt_regs; + +struct machine_ops +{ + void (*restart)(char *cmd); + void (*halt)(void); + void (*power_off)(void); + void (*shutdown)(void); + void (*crash_shutdown)(struct pt_regs *); + void (*emergency_restart)(void); +}; + +extern struct machine_ops machine_ops; + +void machine_real_restart(unsigned char *code, int length); + +#endif /* _ASM_REBOOT_H */ -- cgit v1.1 From b00742d399513a4100c24cc2accefdc1bb1e0b15 Mon Sep 17 00:00:00 2001 From: Jeremy Fitzhardinge Date: Wed, 2 May 2007 19:27:11 +0200 Subject: [PATCH] x86-64: Account for module percpu space separately from kernel percpu Rather than using a single constant PERCPU_ENOUGH_ROOM, compute it as the sum of kernel_percpu + PERCPU_MODULE_RESERVE. This is now common to all architectures; if an architecture wants to set PERCPU_ENOUGH_ROOM to something special, then it may do so (ia64 is the only one which does). Signed-off-by: Jeremy Fitzhardinge Signed-off-by: Andi Kleen Cc: Rusty Russell Cc: Eric W. Biederman Cc: Andi Kleen --- include/asm-alpha/percpu.h | 14 -------------- include/asm-sparc64/percpu.h | 10 ---------- include/asm-x86_64/percpu.h | 10 ---------- include/linux/percpu.h | 9 ++++++++- 4 files changed, 8 insertions(+), 35 deletions(-) (limited to 'include') diff --git a/include/asm-alpha/percpu.h b/include/asm-alpha/percpu.h index 651ebb1..48348fe 100644 --- a/include/asm-alpha/percpu.h +++ b/include/asm-alpha/percpu.h @@ -1,20 +1,6 @@ #ifndef __ALPHA_PERCPU_H #define __ALPHA_PERCPU_H -/* - * Increase the per cpu area for Alpha so that - * modules using percpu area can load. - */ -#ifdef CONFIG_MODULES -# define PERCPU_MODULE_RESERVE 8192 -#else -# define PERCPU_MODULE_RESERVE 0 -#endif - -#define PERCPU_ENOUGH_ROOM \ - (ALIGN(__per_cpu_end - __per_cpu_start, SMP_CACHE_BYTES) + \ - PERCPU_MODULE_RESERVE) - #include #endif /* __ALPHA_PERCPU_H */ diff --git a/include/asm-sparc64/percpu.h b/include/asm-sparc64/percpu.h index 0d3df76..ced8cbd 100644 --- a/include/asm-sparc64/percpu.h +++ b/include/asm-sparc64/percpu.h @@ -5,16 +5,6 @@ #ifdef CONFIG_SMP -#ifdef CONFIG_MODULES -# define PERCPU_MODULE_RESERVE 8192 -#else -# define PERCPU_MODULE_RESERVE 0 -#endif - -#define PERCPU_ENOUGH_ROOM \ - (ALIGN(__per_cpu_end - __per_cpu_start, SMP_CACHE_BYTES) + \ - PERCPU_MODULE_RESERVE) - extern void setup_per_cpu_areas(void); extern unsigned long __per_cpu_base; diff --git a/include/asm-x86_64/percpu.h b/include/asm-x86_64/percpu.h index 5ed0ef3..c6fbb67 100644 --- a/include/asm-x86_64/percpu.h +++ b/include/asm-x86_64/percpu.h @@ -11,16 +11,6 @@ #include -#ifdef CONFIG_MODULES -# define PERCPU_MODULE_RESERVE 8192 -#else -# define PERCPU_MODULE_RESERVE 0 -#endif - -#define PERCPU_ENOUGH_ROOM \ - (ALIGN(__per_cpu_end - __per_cpu_start, SMP_CACHE_BYTES) + \ - PERCPU_MODULE_RESERVE) - #define __per_cpu_offset(cpu) (cpu_pda(cpu)->data_offset) #define __my_cpu_offset() read_pda(data_offset) diff --git a/include/linux/percpu.h b/include/linux/percpu.h index 600e3d3..b72be2f 100644 --- a/include/linux/percpu.h +++ b/include/linux/percpu.h @@ -11,9 +11,16 @@ /* Enough to cover all DEFINE_PER_CPUs in kernel, including modules. */ #ifndef PERCPU_ENOUGH_ROOM -#define PERCPU_ENOUGH_ROOM 32768 +#ifdef CONFIG_MODULES +#define PERCPU_MODULE_RESERVE 8192 +#else +#define PERCPU_MODULE_RESERVE 0 #endif +#define PERCPU_ENOUGH_ROOM \ + (__per_cpu_end - __per_cpu_start + PERCPU_MODULE_RESERVE) +#endif /* PERCPU_ENOUGH_ROOM */ + /* * Must be an lvalue. Since @var must be a simple identifier, * we force a syntax error here if it isn't. -- cgit v1.1 From b92e9fac400d4ae5bc7a75c568e9844ec53ea329 Mon Sep 17 00:00:00 2001 From: Jan Beulich Date: Wed, 2 May 2007 19:27:11 +0200 Subject: [PATCH] x86: fix amd64-agp aperture validation Under CONFIG_DISCONTIGMEM, assuming that a !pfn_valid() implies all subsequent pfn-s are also invalid is wrong. Thus replace this by explicitly checking against the E820 map. AK: make e820 on x86-64 not initdata Signed-off-by: Jan Beulich Signed-off-by: Andi Kleen Acked-by: Mark Langsdorf --- include/asm-i386/e820.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/asm-i386/e820.h b/include/asm-i386/e820.h index c5b8fc6..096a2a8 100644 --- a/include/asm-i386/e820.h +++ b/include/asm-i386/e820.h @@ -38,6 +38,7 @@ extern struct e820map e820; extern int e820_all_mapped(unsigned long start, unsigned long end, unsigned type); +extern int e820_any_mapped(u64 start, u64 end, unsigned type); extern void find_max_pfn(void); extern void register_bootmem_low_pages(unsigned long max_low_pfn); extern void e820_register_memory(void); -- cgit v1.1 From 5d02d7ae73ac9446f20bbf604b04a74637178b35 Mon Sep 17 00:00:00 2001 From: Andi Kleen Date: Wed, 2 May 2007 19:27:11 +0200 Subject: [PATCH] x86-64: Use X86_EFLAGS_IF in x86-64/irqflags.h. As per i386 patch: move X86_EFLAGS_IF et al out to a new header: processor-flags.h, so we can include it from irqflags.h and use it in raw_irqs_disabled_flags(). As a side-effect, we could now use these flags in .S files. Signed-off-by: Rusty Russell Signed-off-by: Andi Kleen --- include/asm-x86_64/irqflags.h | 9 +++++---- include/asm-x86_64/processor-flags.h | 26 ++++++++++++++++++++++++++ include/asm-x86_64/processor.h | 22 +--------------------- 3 files changed, 32 insertions(+), 25 deletions(-) create mode 100644 include/asm-x86_64/processor-flags.h (limited to 'include') diff --git a/include/asm-x86_64/irqflags.h b/include/asm-x86_64/irqflags.h index cce6937..86e70fe 100644 --- a/include/asm-x86_64/irqflags.h +++ b/include/asm-x86_64/irqflags.h @@ -9,6 +9,7 @@ */ #ifndef _ASM_IRQFLAGS_H #define _ASM_IRQFLAGS_H +#include #ifndef __ASSEMBLY__ /* @@ -53,19 +54,19 @@ static inline void raw_local_irq_disable(void) { unsigned long flags = __raw_local_save_flags(); - raw_local_irq_restore((flags & ~(1 << 9)) | (1 << 18)); + raw_local_irq_restore((flags & ~X86_EFLAGS_IF) | X86_EFLAGS_AC); } static inline void raw_local_irq_enable(void) { unsigned long flags = __raw_local_save_flags(); - raw_local_irq_restore((flags | (1 << 9)) & ~(1 << 18)); + raw_local_irq_restore((flags | X86_EFLAGS_IF) & (~X86_EFLAGS_AC)); } static inline int raw_irqs_disabled_flags(unsigned long flags) { - return !(flags & (1<<9)) || (flags & (1 << 18)); + return !(flags & X86_EFLAGS_IF) || (flags & X86_EFLAGS_AC); } #else /* CONFIG_X86_VSMP */ @@ -82,7 +83,7 @@ static inline void raw_local_irq_enable(void) static inline int raw_irqs_disabled_flags(unsigned long flags) { - return !(flags & (1 << 9)); + return !(flags & X86_EFLAGS_IF); } #endif diff --git a/include/asm-x86_64/processor-flags.h b/include/asm-x86_64/processor-flags.h new file mode 100644 index 0000000..806112f --- /dev/null +++ b/include/asm-x86_64/processor-flags.h @@ -0,0 +1,26 @@ +#ifndef __ASM_X86_64_PROCESSOR_FLAGS_H +#define __ASM_X86_64_PROCESSOR_FLAGS_H +/* Various flags defined: can be included from assembler. */ + +/* + * EFLAGS bits + */ +#define X86_EFLAGS_CF 0x00000001 /* Carry Flag */ +#define X86_EFLAGS_PF 0x00000004 /* Parity Flag */ +#define X86_EFLAGS_AF 0x00000010 /* Auxillary carry Flag */ +#define X86_EFLAGS_ZF 0x00000040 /* Zero Flag */ +#define X86_EFLAGS_SF 0x00000080 /* Sign Flag */ +#define X86_EFLAGS_TF 0x00000100 /* Trap Flag */ +#define X86_EFLAGS_IF 0x00000200 /* Interrupt Flag */ +#define X86_EFLAGS_DF 0x00000400 /* Direction Flag */ +#define X86_EFLAGS_OF 0x00000800 /* Overflow Flag */ +#define X86_EFLAGS_IOPL 0x00003000 /* IOPL mask */ +#define X86_EFLAGS_NT 0x00004000 /* Nested Task */ +#define X86_EFLAGS_RF 0x00010000 /* Resume Flag */ +#define X86_EFLAGS_VM 0x00020000 /* Virtual Mode */ +#define X86_EFLAGS_AC 0x00040000 /* Alignment Check */ +#define X86_EFLAGS_VIF 0x00080000 /* Virtual Interrupt Flag */ +#define X86_EFLAGS_VIP 0x00100000 /* Virtual Interrupt Pending */ +#define X86_EFLAGS_ID 0x00200000 /* CPUID detection flag */ + +#endif /* __ASM_X86_64_PROCESSOR_FLAGS_H */ diff --git a/include/asm-x86_64/processor.h b/include/asm-x86_64/processor.h index 76552d7..2c1497d 100644 --- a/include/asm-x86_64/processor.h +++ b/include/asm-x86_64/processor.h @@ -20,6 +20,7 @@ #include #include #include +#include #define TF_MASK 0x00000100 #define IF_MASK 0x00000200 @@ -103,27 +104,6 @@ extern unsigned int init_intel_cacheinfo(struct cpuinfo_x86 *c); extern unsigned short num_cache_leaves; /* - * EFLAGS bits - */ -#define X86_EFLAGS_CF 0x00000001 /* Carry Flag */ -#define X86_EFLAGS_PF 0x00000004 /* Parity Flag */ -#define X86_EFLAGS_AF 0x00000010 /* Auxillary carry Flag */ -#define X86_EFLAGS_ZF 0x00000040 /* Zero Flag */ -#define X86_EFLAGS_SF 0x00000080 /* Sign Flag */ -#define X86_EFLAGS_TF 0x00000100 /* Trap Flag */ -#define X86_EFLAGS_IF 0x00000200 /* Interrupt Flag */ -#define X86_EFLAGS_DF 0x00000400 /* Direction Flag */ -#define X86_EFLAGS_OF 0x00000800 /* Overflow Flag */ -#define X86_EFLAGS_IOPL 0x00003000 /* IOPL mask */ -#define X86_EFLAGS_NT 0x00004000 /* Nested Task */ -#define X86_EFLAGS_RF 0x00010000 /* Resume Flag */ -#define X86_EFLAGS_VM 0x00020000 /* Virtual Mode */ -#define X86_EFLAGS_AC 0x00040000 /* Alignment Check */ -#define X86_EFLAGS_VIF 0x00080000 /* Virtual Interrupt Flag */ -#define X86_EFLAGS_VIP 0x00100000 /* Virtual Interrupt Pending */ -#define X86_EFLAGS_ID 0x00200000 /* CPUID detection flag */ - -/* * Intel CPU features in CR4 */ #define X86_CR4_VME 0x0001 /* enable vm86 extensions */ -- cgit v1.1 From bbf30a1650be396b5467f769f4fbee715f16ec36 Mon Sep 17 00:00:00 2001 From: Avi Kivity Date: Wed, 2 May 2007 19:27:12 +0200 Subject: [PATCH] x86-64: fix arithmetic in comment The xmm space on x86_64 is 256 bytes. Signed-off-by: Avi Kivity Signed-off-by: Andi Kleen --- include/asm-x86_64/processor.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/asm-x86_64/processor.h b/include/asm-x86_64/processor.h index 2c1497d..6a11734 100644 --- a/include/asm-x86_64/processor.h +++ b/include/asm-x86_64/processor.h @@ -181,7 +181,7 @@ struct i387_fxsave_struct { u32 mxcsr; u32 mxcsr_mask; u32 st_space[32]; /* 8*16 bytes for each FP-reg = 128 bytes */ - u32 xmm_space[64]; /* 16*16 bytes for each XMM-reg = 128 bytes */ + u32 xmm_space[64]; /* 16*16 bytes for each XMM-reg = 256 bytes */ u32 padding[24]; } __attribute__ ((aligned (16))); -- cgit v1.1 From 1353ebb4b48151e3810d9a60449edd43a90ea3c3 Mon Sep 17 00:00:00 2001 From: Jeremy Fitzhardinge Date: Wed, 2 May 2007 19:27:12 +0200 Subject: [PATCH] i386: Clean up asm-i386/bugs.h Most of asm-i386/bugs.h is code which should be in a C file, so put it there. Signed-off-by: Jeremy Fitzhardinge Signed-off-by: Andi Kleen Cc: Linus Torvalds --- include/asm-i386/alternative.h | 1 + include/asm-i386/bugs.h | 194 +---------------------------------------- 2 files changed, 5 insertions(+), 190 deletions(-) (limited to 'include') diff --git a/include/asm-i386/alternative.h b/include/asm-i386/alternative.h index b8fa955..dbc1a29 100644 --- a/include/asm-i386/alternative.h +++ b/include/asm-i386/alternative.h @@ -16,6 +16,7 @@ struct alt_instr { u8 pad; }; +extern void alternative_instructions(void); extern void apply_alternatives(struct alt_instr *start, struct alt_instr *end); struct module; diff --git a/include/asm-i386/bugs.h b/include/asm-i386/bugs.h index c90c7c4..df539b3 100644 --- a/include/asm-i386/bugs.h +++ b/include/asm-i386/bugs.h @@ -1,198 +1,12 @@ /* - * include/asm-i386/bugs.h - * - * Copyright (C) 1994 Linus Torvalds - * - * Cyrix stuff, June 1998 by: - * - Rafael R. Reilova (moved everything from head.S), - * - * - Channing Corn (tests & fixes), - * - Andrew D. Balsa (code cleanup). - */ - -/* * This is included by init/main.c to check for architecture-dependent bugs. * * Needs: * void check_bugs(void); */ +#ifndef _ASM_I386_BUG_H +#define _ASM_I386_BUG_H -#include -#include -#include -#include -#include - -static int __init no_halt(char *s) -{ - boot_cpu_data.hlt_works_ok = 0; - return 1; -} - -__setup("no-hlt", no_halt); - -static int __init mca_pentium(char *s) -{ - mca_pentium_flag = 1; - return 1; -} - -__setup("mca-pentium", mca_pentium); - -static int __init no_387(char *s) -{ - boot_cpu_data.hard_math = 0; - write_cr0(0xE | read_cr0()); - return 1; -} - -__setup("no387", no_387); - -static double __initdata x = 4195835.0; -static double __initdata y = 3145727.0; - -/* - * This used to check for exceptions.. - * However, it turns out that to support that, - * the XMM trap handlers basically had to - * be buggy. So let's have a correct XMM trap - * handler, and forget about printing out - * some status at boot. - * - * We should really only care about bugs here - * anyway. Not features. - */ -static void __init check_fpu(void) -{ - if (!boot_cpu_data.hard_math) { -#ifndef CONFIG_MATH_EMULATION - printk(KERN_EMERG "No coprocessor found and no math emulation present.\n"); - printk(KERN_EMERG "Giving up.\n"); - for (;;) ; -#endif - return; - } - -/* trap_init() enabled FXSR and company _before_ testing for FP problems here. */ - /* Test for the divl bug.. */ - __asm__("fninit\n\t" - "fldl %1\n\t" - "fdivl %2\n\t" - "fmull %2\n\t" - "fldl %1\n\t" - "fsubp %%st,%%st(1)\n\t" - "fistpl %0\n\t" - "fwait\n\t" - "fninit" - : "=m" (*&boot_cpu_data.fdiv_bug) - : "m" (*&x), "m" (*&y)); - if (boot_cpu_data.fdiv_bug) - printk("Hmm, FPU with FDIV bug.\n"); -} - -static void __init check_hlt(void) -{ - if (paravirt_enabled()) - return; - - printk(KERN_INFO "Checking 'hlt' instruction... "); - if (!boot_cpu_data.hlt_works_ok) { - printk("disabled\n"); - return; - } - halt(); - halt(); - halt(); - halt(); - printk("OK.\n"); -} - -/* - * Most 386 processors have a bug where a POPAD can lock the - * machine even from user space. - */ - -static void __init check_popad(void) -{ -#ifndef CONFIG_X86_POPAD_OK - int res, inp = (int) &res; - - printk(KERN_INFO "Checking for popad bug... "); - __asm__ __volatile__( - "movl $12345678,%%eax; movl $0,%%edi; pusha; popa; movl (%%edx,%%edi),%%ecx " - : "=&a" (res) - : "d" (inp) - : "ecx", "edi" ); - /* If this fails, it means that any user program may lock the CPU hard. Too bad. */ - if (res != 12345678) printk( "Buggy.\n" ); - else printk( "OK.\n" ); -#endif -} - -/* - * Check whether we are able to run this kernel safely on SMP. - * - * - In order to run on a i386, we need to be compiled for i386 - * (for due to lack of "invlpg" and working WP on a i386) - * - In order to run on anything without a TSC, we need to be - * compiled for a i486. - * - In order to support the local APIC on a buggy Pentium machine, - * we need to be compiled with CONFIG_X86_GOOD_APIC disabled, - * which happens implicitly if compiled for a Pentium or lower - * (unless an advanced selection of CPU features is used) as an - * otherwise config implies a properly working local APIC without - * the need to do extra reads from the APIC. -*/ - -static void __init check_config(void) -{ -/* - * We'd better not be a i386 if we're configured to use some - * i486+ only features! (WP works in supervisor mode and the - * new "invlpg" and "bswap" instructions) - */ -#if defined(CONFIG_X86_WP_WORKS_OK) || defined(CONFIG_X86_INVLPG) || defined(CONFIG_X86_BSWAP) - if (boot_cpu_data.x86 == 3) - panic("Kernel requires i486+ for 'invlpg' and other features"); -#endif - -/* - * If we configured ourselves for a TSC, we'd better have one! - */ -#ifdef CONFIG_X86_TSC - if (!cpu_has_tsc && !tsc_disable) - panic("Kernel compiled for Pentium+, requires TSC feature!"); -#endif - -/* - * If we were told we had a good local APIC, check for buggy Pentia, - * i.e. all B steppings and the C2 stepping of P54C when using their - * integrated APIC (see 11AP erratum in "Pentium Processor - * Specification Update"). - */ -#if defined(CONFIG_X86_LOCAL_APIC) && defined(CONFIG_X86_GOOD_APIC) - if (boot_cpu_data.x86_vendor == X86_VENDOR_INTEL - && cpu_has_apic - && boot_cpu_data.x86 == 5 - && boot_cpu_data.x86_model == 2 - && (boot_cpu_data.x86_mask < 6 || boot_cpu_data.x86_mask == 11)) - panic("Kernel compiled for PMMX+, assumes a local APIC without the read-before-write bug!"); -#endif -} - -extern void alternative_instructions(void); +extern void __init check_bugs(void); -static void __init check_bugs(void) -{ - identify_cpu(&boot_cpu_data); -#ifndef CONFIG_SMP - printk("CPU: "); - print_cpu_info(&boot_cpu_data); -#endif - check_config(); - check_fpu(); - check_hlt(); - check_popad(); - init_utsname()->machine[1] = '0' + (boot_cpu_data.x86 > 6 ? 6 : boot_cpu_data.x86); - alternative_instructions(); -} +#endif /* _ASM_I386_BUG_H */ -- cgit v1.1 From a6c4e076ee4c1ea670e4faa55814e63dd08e3f29 Mon Sep 17 00:00:00 2001 From: Jeremy Fitzhardinge Date: Wed, 2 May 2007 19:27:12 +0200 Subject: [PATCH] i386: clean up identify_cpu identify_cpu() is used to identify both the boot CPU and secondary CPUs, but it performs some actions which only apply to the boot CPU. Those functions are therefore really __init functions, but because they're called by identify_cpu(), they must be marked __cpuinit. This patch splits identify_cpu() into identify_boot_cpu() and identify_secondary_cpu(), and calls the appropriate init functions from each. Also, identify_boot_cpu() and all the functions it dominates are marked __init. Signed-off-by: Jeremy Fitzhardinge Signed-off-by: Andi Kleen --- include/asm-i386/processor.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/asm-i386/processor.h b/include/asm-i386/processor.h index 11838df..9d895cc 100644 --- a/include/asm-i386/processor.h +++ b/include/asm-i386/processor.h @@ -116,7 +116,8 @@ extern char ignore_fpu_irq; void __init cpu_detect(struct cpuinfo_x86 *c); -extern void identify_cpu(struct cpuinfo_x86 *); +extern void identify_boot_cpu(void); +extern void identify_secondary_cpu(struct cpuinfo_x86 *); extern void print_cpu_info(struct cpuinfo_x86 *); extern unsigned int init_intel_cacheinfo(struct cpuinfo_x86 *c); extern unsigned short num_cache_leaves; -- cgit v1.1 From d4f7a2c18e59e0304a1c733589ce14fc02fec1bd Mon Sep 17 00:00:00 2001 From: Jeremy Fitzhardinge Date: Wed, 2 May 2007 19:27:12 +0200 Subject: [PATCH] i386: Relocate VDSO ELF headers to match mapped location with COMPAT_VDSO Some versions of libc can't deal with a VDSO which doesn't have its ELF headers matching its mapped address. COMPAT_VDSO maps the VDSO at a specific system-wide fixed address. Previously this was all done at build time, on the grounds that the fixed VDSO address is always at the top of the address space. However, a hypervisor may reserve some of that address space, pushing the fixmap address down. This patch does the adjustment dynamically at runtime, depending on the runtime location of the VDSO fixmap. [ Patch has been through several hands: Jan Beulich wrote the orignal version; Zach reworked it, and Jeremy converted it to relocate phdrs as well as sections. ] Signed-off-by: Jeremy Fitzhardinge Signed-off-by: Andi Kleen Cc: Zachary Amsden Cc: "Jan Beulich" Cc: Eric W. Biederman Cc: Andi Kleen Cc: Ingo Molnar Cc: Roland McGrath --- include/asm-i386/elf.h | 28 ++++++++++------------------ include/asm-i386/fixmap.h | 8 ++------ include/linux/elf.h | 17 +++++++++++++++++ 3 files changed, 29 insertions(+), 24 deletions(-) (limited to 'include') diff --git a/include/asm-i386/elf.h b/include/asm-i386/elf.h index 952b3ee..d304ab4 100644 --- a/include/asm-i386/elf.h +++ b/include/asm-i386/elf.h @@ -133,39 +133,31 @@ extern int dump_task_extended_fpu (struct task_struct *, struct user_fxsr_struct #define ELF_CORE_COPY_XFPREGS(tsk, elf_xfpregs) dump_task_extended_fpu(tsk, elf_xfpregs) #define VDSO_HIGH_BASE (__fix_to_virt(FIX_VDSO)) -#define VDSO_BASE ((unsigned long)current->mm->context.vdso) - -#ifdef CONFIG_COMPAT_VDSO -# define VDSO_COMPAT_BASE VDSO_HIGH_BASE -# define VDSO_PRELINK VDSO_HIGH_BASE -#else -# define VDSO_COMPAT_BASE VDSO_BASE -# define VDSO_PRELINK 0 -#endif +#define VDSO_CURRENT_BASE ((unsigned long)current->mm->context.vdso) +#define VDSO_PRELINK 0 #define VDSO_SYM(x) \ - (VDSO_COMPAT_BASE + (unsigned long)(x) - VDSO_PRELINK) + (VDSO_CURRENT_BASE + (unsigned long)(x) - VDSO_PRELINK) #define VDSO_HIGH_EHDR ((const struct elfhdr *) VDSO_HIGH_BASE) -#define VDSO_EHDR ((const struct elfhdr *) VDSO_COMPAT_BASE) +#define VDSO_EHDR ((const struct elfhdr *) VDSO_CURRENT_BASE) extern void __kernel_vsyscall; #define VDSO_ENTRY VDSO_SYM(&__kernel_vsyscall) -#ifndef CONFIG_COMPAT_VDSO -#define ARCH_HAS_SETUP_ADDITIONAL_PAGES struct linux_binprm; + +#define ARCH_HAS_SETUP_ADDITIONAL_PAGES extern int arch_setup_additional_pages(struct linux_binprm *bprm, int executable_stack); -#endif extern unsigned int vdso_enabled; -#define ARCH_DLINFO \ -do if (vdso_enabled) { \ - NEW_AUX_ENT(AT_SYSINFO, VDSO_ENTRY); \ - NEW_AUX_ENT(AT_SYSINFO_EHDR, VDSO_COMPAT_BASE); \ +#define ARCH_DLINFO \ +do if (vdso_enabled) { \ + NEW_AUX_ENT(AT_SYSINFO, VDSO_ENTRY); \ + NEW_AUX_ENT(AT_SYSINFO_EHDR, VDSO_CURRENT_BASE); \ } while (0) #endif diff --git a/include/asm-i386/fixmap.h b/include/asm-i386/fixmap.h index 3e9f610..e5651b2 100644 --- a/include/asm-i386/fixmap.h +++ b/include/asm-i386/fixmap.h @@ -19,13 +19,9 @@ * Leave one empty page between vmalloc'ed areas and * the start of the fixmap. */ -#ifndef CONFIG_COMPAT_VDSO extern unsigned long __FIXADDR_TOP; -#else -#define __FIXADDR_TOP 0xfffff000 -#define FIXADDR_USER_START __fix_to_virt(FIX_VDSO) -#define FIXADDR_USER_END __fix_to_virt(FIX_VDSO - 1) -#endif +#define FIXADDR_USER_START __fix_to_virt(FIX_VDSO) +#define FIXADDR_USER_END __fix_to_virt(FIX_VDSO - 1) #ifndef __ASSEMBLY__ #include diff --git a/include/linux/elf.h b/include/linux/elf.h index 60713e6..8b17ffe 100644 --- a/include/linux/elf.h +++ b/include/linux/elf.h @@ -83,6 +83,23 @@ typedef __s64 Elf64_Sxword; #define DT_DEBUG 21 #define DT_TEXTREL 22 #define DT_JMPREL 23 +#define DT_ENCODING 32 +#define OLD_DT_LOOS 0x60000000 +#define DT_LOOS 0x6000000d +#define DT_HIOS 0x6ffff000 +#define DT_VALRNGLO 0x6ffffd00 +#define DT_VALRNGHI 0x6ffffdff +#define DT_ADDRRNGLO 0x6ffffe00 +#define DT_ADDRRNGHI 0x6ffffeff +#define DT_VERSYM 0x6ffffff0 +#define DT_RELACOUNT 0x6ffffff9 +#define DT_RELCOUNT 0x6ffffffa +#define DT_FLAGS_1 0x6ffffffb +#define DT_VERDEF 0x6ffffffc +#define DT_VERDEFNUM 0x6ffffffd +#define DT_VERNEED 0x6ffffffe +#define DT_VERNEEDNUM 0x6fffffff +#define OLD_DT_HIOS 0x6fffffff #define DT_LOPROC 0x70000000 #define DT_HIPROC 0x7fffffff -- cgit v1.1 From 1dbf527c51c6c20c19869c8125cb5b87c3d09506 Mon Sep 17 00:00:00 2001 From: Jeremy Fitzhardinge Date: Wed, 2 May 2007 19:27:12 +0200 Subject: [PATCH] i386: Make COMPAT_VDSO runtime selectable. Now that relocation of the VDSO for COMPAT_VDSO users is done at runtime rather than compile time, it is possible to enable/disable compat mode at runtime. This patch allows you to enable COMPAT_VDSO mode with "vdso=2" on the kernel command line, or via sysctl. (Switching on a running system shouldn't be done lightly; any process which was relying on the compat VDSO will be upset if it goes away.) The COMPAT_VDSO config option still exists, but if enabled it just makes vdso_enabled default to VDSO_COMPAT. +From: Hugh Dickins Fix oops from i386-make-compat_vdso-runtime-selectable.patch. Even mingetty at system startup finds it easy to trigger an oops while reading /proc/PID/maps: though it has a good hold on the mm itself, that cannot stop exit_mm() from resetting tsk->mm to NULL. (It is usually show_map()'s call to get_gate_vma() which oopses, and I expect we could change that to check priv->tail_vma instead; but no matter, even m_start()'s call just after get_task_mm() is racy.) Signed-off-by: Jeremy Fitzhardinge Signed-off-by: Andi Kleen Cc: Zachary Amsden Cc: "Jan Beulich" Cc: Eric W. Biederman Cc: Andi Kleen Cc: Ingo Molnar Cc: Roland McGrath --- include/asm-i386/page.h | 2 -- 1 file changed, 2 deletions(-) (limited to 'include') diff --git a/include/asm-i386/page.h b/include/asm-i386/page.h index 7b19f45..fd3f64a 100644 --- a/include/asm-i386/page.h +++ b/include/asm-i386/page.h @@ -143,9 +143,7 @@ extern int page_is_ram(unsigned long pagenr); #include #include -#ifndef CONFIG_COMPAT_VDSO #define __HAVE_ARCH_GATE_AREA 1 -#endif #endif /* __KERNEL__ */ #endif /* _I386_PAGE_H */ -- cgit v1.1 From c169859d6dfc7471ef9f2dbd720936e17906a084 Mon Sep 17 00:00:00 2001 From: Jeremy Fitzhardinge Date: Wed, 2 May 2007 19:27:12 +0200 Subject: [PATCH] x86-64: Clean up asm-x86_64/bugs.h Most of asm-x86_64/bugs.h is code which should be in a C file, so put it there. Signed-off-by: Jeremy Fitzhardinge Signed-off-by: Andi Kleen Cc: Andi Kleen Cc: Linus Torvalds --- include/asm-x86_64/alternative.h | 1 + include/asm-x86_64/bugs.h | 30 ++++-------------------------- 2 files changed, 5 insertions(+), 26 deletions(-) (limited to 'include') diff --git a/include/asm-x86_64/alternative.h b/include/asm-x86_64/alternative.h index a6657b4..67ebea3 100644 --- a/include/asm-x86_64/alternative.h +++ b/include/asm-x86_64/alternative.h @@ -16,6 +16,7 @@ struct alt_instr { u8 pad[5]; }; +extern void alternative_instructions(void); extern void apply_alternatives(struct alt_instr *start, struct alt_instr *end); struct module; diff --git a/include/asm-x86_64/bugs.h b/include/asm-x86_64/bugs.h index d86c5dd..b33dc04 100644 --- a/include/asm-x86_64/bugs.h +++ b/include/asm-x86_64/bugs.h @@ -1,28 +1,6 @@ -/* - * include/asm-x86_64/bugs.h - * - * Copyright (C) 1994 Linus Torvalds - * Copyright (C) 2000 SuSE - * - * This is included by init/main.c to check for architecture-dependent bugs. - * - * Needs: - * void check_bugs(void); - */ +#ifndef _ASM_X86_64_BUGS_H +#define _ASM_X86_64_BUGS_H -#include -#include -#include -#include +void check_bugs(void); -extern void alternative_instructions(void); - -static void __init check_bugs(void) -{ - identify_cpu(&boot_cpu_data); -#if !defined(CONFIG_SMP) - printk("CPU: "); - print_cpu_info(&boot_cpu_data); -#endif - alternative_instructions(); -} +#endif /* _ASM_X86_64_BUGS_H */ -- cgit v1.1 From f039b754714a422959027cb18bb33760eb8153f0 Mon Sep 17 00:00:00 2001 From: Andi Kleen Date: Wed, 2 May 2007 19:27:12 +0200 Subject: [PATCH] x86: Don't use MWAIT on AMD Family 10 It doesn't put the CPU into deeper sleep states, so it's better to use the standard idle loop to save power. But allow to reenable it anyways for benchmarking. I also removed the obsolete idle=halt on i386 Cc: andreas.herrmann@amd.com Signed-off-by: Andi Kleen --- include/asm-i386/processor.h | 2 ++ include/asm-x86_64/proto.h | 2 ++ 2 files changed, 4 insertions(+) (limited to 'include') diff --git a/include/asm-i386/processor.h b/include/asm-i386/processor.h index 9d895cc..882d3f8 100644 --- a/include/asm-i386/processor.h +++ b/include/asm-i386/processor.h @@ -779,4 +779,6 @@ extern int sysenter_setup(void); extern void cpu_set_gdt(int); extern void cpu_init(void); +extern int force_mwait; + #endif /* __ASM_I386_PROCESSOR_H */ diff --git a/include/asm-x86_64/proto.h b/include/asm-x86_64/proto.h index 3f8f285..98063bc 100644 --- a/include/asm-x86_64/proto.h +++ b/include/asm-x86_64/proto.h @@ -119,6 +119,8 @@ extern int gsi_irq_sharing(int gsi); extern void smp_local_timer_interrupt(void); +extern int force_mwait; + long do_arch_prctl(struct task_struct *task, int code, unsigned long addr); void i8254_timer_resume(void); -- cgit v1.1 From 4bc5aa91fb1e544ad37805520030a0d9fc6e11d3 Mon Sep 17 00:00:00 2001 From: "H. Peter Anvin" Date: Wed, 2 May 2007 19:27:12 +0200 Subject: [PATCH] x86: Clean up x86 control register and MSR macros (corrected) This patch is based on Rusty's recent cleanup of the EFLAGS-related macros; it extends the same kind of cleanup to control registers and MSRs. It also unifies these between i386 and x86-64; at least with regards to MSRs, the two had definitely gotten out of sync. Signed-off-by: H. Peter Anvin Signed-off-by: Andi Kleen --- include/asm-i386/Kbuild | 2 + include/asm-i386/msr-index.h | 273 ++++++++++++++++++++++++++++++++++ include/asm-i386/msr.h | 237 +----------------------------- include/asm-i386/processor-flags.h | 65 +++++++++ include/asm-i386/processor.h | 35 ----- include/asm-x86_64/Kbuild | 3 +- include/asm-x86_64/msr-index.h | 1 + include/asm-x86_64/msr.h | 274 +---------------------------------- include/asm-x86_64/processor-flags.h | 27 +--- include/asm-x86_64/processor.h | 31 ---- 10 files changed, 356 insertions(+), 592 deletions(-) create mode 100644 include/asm-i386/msr-index.h create mode 100644 include/asm-x86_64/msr-index.h (limited to 'include') diff --git a/include/asm-i386/Kbuild b/include/asm-i386/Kbuild index 5ae93af..cbf6e8f 100644 --- a/include/asm-i386/Kbuild +++ b/include/asm-i386/Kbuild @@ -3,8 +3,10 @@ include include/asm-generic/Kbuild.asm header-y += boot.h header-y += debugreg.h header-y += ldt.h +header-y += msr-index.h header-y += ptrace-abi.h header-y += ucontext.h +unifdef-y += msr.h unifdef-y += mtrr.h unifdef-y += vm86.h diff --git a/include/asm-i386/msr-index.h b/include/asm-i386/msr-index.h new file mode 100644 index 0000000..f119080 --- /dev/null +++ b/include/asm-i386/msr-index.h @@ -0,0 +1,273 @@ +#ifndef __ASM_MSR_INDEX_H +#define __ASM_MSR_INDEX_H + +/* CPU model specific register (MSR) numbers */ + +/* x86-64 specific MSRs */ +#define MSR_EFER 0xc0000080 /* extended feature register */ +#define MSR_STAR 0xc0000081 /* legacy mode SYSCALL target */ +#define MSR_LSTAR 0xc0000082 /* long mode SYSCALL target */ +#define MSR_CSTAR 0xc0000083 /* compat mode SYSCALL target */ +#define MSR_SYSCALL_MASK 0xc0000084 /* EFLAGS mask for syscall */ +#define MSR_FS_BASE 0xc0000100 /* 64bit FS base */ +#define MSR_GS_BASE 0xc0000101 /* 64bit GS base */ +#define MSR_KERNEL_GS_BASE 0xc0000102 /* SwapGS GS shadow */ + +/* EFER bits: */ +#define _EFER_SCE 0 /* SYSCALL/SYSRET */ +#define _EFER_LME 8 /* Long mode enable */ +#define _EFER_LMA 10 /* Long mode active (read-only) */ +#define _EFER_NX 11 /* No execute enable */ + +#define EFER_SCE (1<<_EFER_SCE) +#define EFER_LME (1<<_EFER_LME) +#define EFER_LMA (1<<_EFER_LMA) +#define EFER_NX (1<<_EFER_NX) + +/* Intel MSRs. Some also available on other CPUs */ +#define MSR_IA32_PERFCTR0 0x000000c1 +#define MSR_IA32_PERFCTR1 0x000000c2 +#define MSR_FSB_FREQ 0x000000cd + +#define MSR_MTRRcap 0x000000fe +#define MSR_IA32_BBL_CR_CTL 0x00000119 + +#define MSR_IA32_SYSENTER_CS 0x00000174 +#define MSR_IA32_SYSENTER_ESP 0x00000175 +#define MSR_IA32_SYSENTER_EIP 0x00000176 + +#define MSR_IA32_MCG_CAP 0x00000179 +#define MSR_IA32_MCG_STATUS 0x0000017a +#define MSR_IA32_MCG_CTL 0x0000017b + +#define MSR_IA32_PEBS_ENABLE 0x000003f1 +#define MSR_IA32_DS_AREA 0x00000600 +#define MSR_IA32_PERF_CAPABILITIES 0x00000345 + +#define MSR_MTRRfix64K_00000 0x00000250 +#define MSR_MTRRfix16K_80000 0x00000258 +#define MSR_MTRRfix16K_A0000 0x00000259 +#define MSR_MTRRfix4K_C0000 0x00000268 +#define MSR_MTRRfix4K_C8000 0x00000269 +#define MSR_MTRRfix4K_D0000 0x0000026a +#define MSR_MTRRfix4K_D8000 0x0000026b +#define MSR_MTRRfix4K_E0000 0x0000026c +#define MSR_MTRRfix4K_E8000 0x0000026d +#define MSR_MTRRfix4K_F0000 0x0000026e +#define MSR_MTRRfix4K_F8000 0x0000026f +#define MSR_MTRRdefType 0x000002ff + +#define MSR_IA32_DEBUGCTLMSR 0x000001d9 +#define MSR_IA32_LASTBRANCHFROMIP 0x000001db +#define MSR_IA32_LASTBRANCHTOIP 0x000001dc +#define MSR_IA32_LASTINTFROMIP 0x000001dd +#define MSR_IA32_LASTINTTOIP 0x000001de + +#define MSR_IA32_MC0_CTL 0x00000400 +#define MSR_IA32_MC0_STATUS 0x00000401 +#define MSR_IA32_MC0_ADDR 0x00000402 +#define MSR_IA32_MC0_MISC 0x00000403 + +#define MSR_P6_PERFCTR0 0x000000c1 +#define MSR_P6_PERFCTR1 0x000000c2 +#define MSR_P6_EVNTSEL0 0x00000186 +#define MSR_P6_EVNTSEL1 0x00000187 + +/* K7/K8 MSRs. Not complete. See the architecture manual for a more + complete list. */ +#define MSR_K7_EVNTSEL0 0xc0010000 +#define MSR_K7_PERFCTR0 0xc0010004 +#define MSR_K7_EVNTSEL1 0xc0010001 +#define MSR_K7_PERFCTR1 0xc0010005 +#define MSR_K7_EVNTSEL2 0xc0010002 +#define MSR_K7_PERFCTR2 0xc0010006 +#define MSR_K7_EVNTSEL3 0xc0010003 +#define MSR_K7_PERFCTR3 0xc0010007 +#define MSR_K8_TOP_MEM1 0xc001001a +#define MSR_K7_CLK_CTL 0xc001001b +#define MSR_K8_TOP_MEM2 0xc001001d +#define MSR_K8_SYSCFG 0xc0010010 +#define MSR_K7_HWCR 0xc0010015 +#define MSR_K8_HWCR 0xc0010015 +#define MSR_K7_FID_VID_CTL 0xc0010041 +#define MSR_K7_FID_VID_STATUS 0xc0010042 +#define MSR_K8_ENABLE_C1E 0xc0010055 + +/* K6 MSRs */ +#define MSR_K6_EFER 0xc0000080 +#define MSR_K6_STAR 0xc0000081 +#define MSR_K6_WHCR 0xc0000082 +#define MSR_K6_UWCCR 0xc0000085 +#define MSR_K6_EPMR 0xc0000086 +#define MSR_K6_PSOR 0xc0000087 +#define MSR_K6_PFIR 0xc0000088 + +/* Centaur-Hauls/IDT defined MSRs. */ +#define MSR_IDT_FCR1 0x00000107 +#define MSR_IDT_FCR2 0x00000108 +#define MSR_IDT_FCR3 0x00000109 +#define MSR_IDT_FCR4 0x0000010a + +#define MSR_IDT_MCR0 0x00000110 +#define MSR_IDT_MCR1 0x00000111 +#define MSR_IDT_MCR2 0x00000112 +#define MSR_IDT_MCR3 0x00000113 +#define MSR_IDT_MCR4 0x00000114 +#define MSR_IDT_MCR5 0x00000115 +#define MSR_IDT_MCR6 0x00000116 +#define MSR_IDT_MCR7 0x00000117 +#define MSR_IDT_MCR_CTRL 0x00000120 + +/* VIA Cyrix defined MSRs*/ +#define MSR_VIA_FCR 0x00001107 +#define MSR_VIA_LONGHAUL 0x0000110a +#define MSR_VIA_RNG 0x0000110b +#define MSR_VIA_BCR2 0x00001147 + +/* Transmeta defined MSRs */ +#define MSR_TMTA_LONGRUN_CTRL 0x80868010 +#define MSR_TMTA_LONGRUN_FLAGS 0x80868011 +#define MSR_TMTA_LRTI_READOUT 0x80868018 +#define MSR_TMTA_LRTI_VOLT_MHZ 0x8086801a + +/* Intel defined MSRs. */ +#define MSR_IA32_P5_MC_ADDR 0x00000000 +#define MSR_IA32_P5_MC_TYPE 0x00000001 +#define MSR_IA32_TSC 0x00000010 +#define MSR_IA32_PLATFORM_ID 0x00000017 +#define MSR_IA32_EBL_CR_POWERON 0x0000002a + +#define MSR_IA32_APICBASE 0x0000001b +#define MSR_IA32_APICBASE_BSP (1<<8) +#define MSR_IA32_APICBASE_ENABLE (1<<11) +#define MSR_IA32_APICBASE_BASE (0xfffff<<12) + +#define MSR_IA32_UCODE_WRITE 0x00000079 +#define MSR_IA32_UCODE_REV 0x0000008b + +#define MSR_IA32_PERF_STATUS 0x00000198 +#define MSR_IA32_PERF_CTL 0x00000199 + +#define MSR_IA32_MPERF 0x000000e7 +#define MSR_IA32_APERF 0x000000e8 + +#define MSR_IA32_THERM_CONTROL 0x0000019a +#define MSR_IA32_THERM_INTERRUPT 0x0000019b +#define MSR_IA32_THERM_STATUS 0x0000019c +#define MSR_IA32_MISC_ENABLE 0x000001a0 + +/* Intel Model 6 */ +#define MSR_P6_EVNTSEL0 0x00000186 +#define MSR_P6_EVNTSEL1 0x00000187 + +/* P4/Xeon+ specific */ +#define MSR_IA32_MCG_EAX 0x00000180 +#define MSR_IA32_MCG_EBX 0x00000181 +#define MSR_IA32_MCG_ECX 0x00000182 +#define MSR_IA32_MCG_EDX 0x00000183 +#define MSR_IA32_MCG_ESI 0x00000184 +#define MSR_IA32_MCG_EDI 0x00000185 +#define MSR_IA32_MCG_EBP 0x00000186 +#define MSR_IA32_MCG_ESP 0x00000187 +#define MSR_IA32_MCG_EFLAGS 0x00000188 +#define MSR_IA32_MCG_EIP 0x00000189 +#define MSR_IA32_MCG_RESERVED 0x0000018a + +/* Pentium IV performance counter MSRs */ +#define MSR_P4_BPU_PERFCTR0 0x00000300 +#define MSR_P4_BPU_PERFCTR1 0x00000301 +#define MSR_P4_BPU_PERFCTR2 0x00000302 +#define MSR_P4_BPU_PERFCTR3 0x00000303 +#define MSR_P4_MS_PERFCTR0 0x00000304 +#define MSR_P4_MS_PERFCTR1 0x00000305 +#define MSR_P4_MS_PERFCTR2 0x00000306 +#define MSR_P4_MS_PERFCTR3 0x00000307 +#define MSR_P4_FLAME_PERFCTR0 0x00000308 +#define MSR_P4_FLAME_PERFCTR1 0x00000309 +#define MSR_P4_FLAME_PERFCTR2 0x0000030a +#define MSR_P4_FLAME_PERFCTR3 0x0000030b +#define MSR_P4_IQ_PERFCTR0 0x0000030c +#define MSR_P4_IQ_PERFCTR1 0x0000030d +#define MSR_P4_IQ_PERFCTR2 0x0000030e +#define MSR_P4_IQ_PERFCTR3 0x0000030f +#define MSR_P4_IQ_PERFCTR4 0x00000310 +#define MSR_P4_IQ_PERFCTR5 0x00000311 +#define MSR_P4_BPU_CCCR0 0x00000360 +#define MSR_P4_BPU_CCCR1 0x00000361 +#define MSR_P4_BPU_CCCR2 0x00000362 +#define MSR_P4_BPU_CCCR3 0x00000363 +#define MSR_P4_MS_CCCR0 0x00000364 +#define MSR_P4_MS_CCCR1 0x00000365 +#define MSR_P4_MS_CCCR2 0x00000366 +#define MSR_P4_MS_CCCR3 0x00000367 +#define MSR_P4_FLAME_CCCR0 0x00000368 +#define MSR_P4_FLAME_CCCR1 0x00000369 +#define MSR_P4_FLAME_CCCR2 0x0000036a +#define MSR_P4_FLAME_CCCR3 0x0000036b +#define MSR_P4_IQ_CCCR0 0x0000036c +#define MSR_P4_IQ_CCCR1 0x0000036d +#define MSR_P4_IQ_CCCR2 0x0000036e +#define MSR_P4_IQ_CCCR3 0x0000036f +#define MSR_P4_IQ_CCCR4 0x00000370 +#define MSR_P4_IQ_CCCR5 0x00000371 +#define MSR_P4_ALF_ESCR0 0x000003ca +#define MSR_P4_ALF_ESCR1 0x000003cb +#define MSR_P4_BPU_ESCR0 0x000003b2 +#define MSR_P4_BPU_ESCR1 0x000003b3 +#define MSR_P4_BSU_ESCR0 0x000003a0 +#define MSR_P4_BSU_ESCR1 0x000003a1 +#define MSR_P4_CRU_ESCR0 0x000003b8 +#define MSR_P4_CRU_ESCR1 0x000003b9 +#define MSR_P4_CRU_ESCR2 0x000003cc +#define MSR_P4_CRU_ESCR3 0x000003cd +#define MSR_P4_CRU_ESCR4 0x000003e0 +#define MSR_P4_CRU_ESCR5 0x000003e1 +#define MSR_P4_DAC_ESCR0 0x000003a8 +#define MSR_P4_DAC_ESCR1 0x000003a9 +#define MSR_P4_FIRM_ESCR0 0x000003a4 +#define MSR_P4_FIRM_ESCR1 0x000003a5 +#define MSR_P4_FLAME_ESCR0 0x000003a6 +#define MSR_P4_FLAME_ESCR1 0x000003a7 +#define MSR_P4_FSB_ESCR0 0x000003a2 +#define MSR_P4_FSB_ESCR1 0x000003a3 +#define MSR_P4_IQ_ESCR0 0x000003ba +#define MSR_P4_IQ_ESCR1 0x000003bb +#define MSR_P4_IS_ESCR0 0x000003b4 +#define MSR_P4_IS_ESCR1 0x000003b5 +#define MSR_P4_ITLB_ESCR0 0x000003b6 +#define MSR_P4_ITLB_ESCR1 0x000003b7 +#define MSR_P4_IX_ESCR0 0x000003c8 +#define MSR_P4_IX_ESCR1 0x000003c9 +#define MSR_P4_MOB_ESCR0 0x000003aa +#define MSR_P4_MOB_ESCR1 0x000003ab +#define MSR_P4_MS_ESCR0 0x000003c0 +#define MSR_P4_MS_ESCR1 0x000003c1 +#define MSR_P4_PMH_ESCR0 0x000003ac +#define MSR_P4_PMH_ESCR1 0x000003ad +#define MSR_P4_RAT_ESCR0 0x000003bc +#define MSR_P4_RAT_ESCR1 0x000003bd +#define MSR_P4_SAAT_ESCR0 0x000003ae +#define MSR_P4_SAAT_ESCR1 0x000003af +#define MSR_P4_SSU_ESCR0 0x000003be +#define MSR_P4_SSU_ESCR1 0x000003bf /* guess: not in manual */ + +#define MSR_P4_TBPU_ESCR0 0x000003c2 +#define MSR_P4_TBPU_ESCR1 0x000003c3 +#define MSR_P4_TC_ESCR0 0x000003c4 +#define MSR_P4_TC_ESCR1 0x000003c5 +#define MSR_P4_U2L_ESCR0 0x000003b0 +#define MSR_P4_U2L_ESCR1 0x000003b1 + +/* Intel Core-based CPU performance counters */ +#define MSR_CORE_PERF_FIXED_CTR0 0x00000309 +#define MSR_CORE_PERF_FIXED_CTR1 0x0000030a +#define MSR_CORE_PERF_FIXED_CTR2 0x0000030b +#define MSR_CORE_PERF_FIXED_CTR_CTRL 0x0000038d +#define MSR_CORE_PERF_GLOBAL_STATUS 0x0000038e +#define MSR_CORE_PERF_GLOBAL_CTRL 0x0000038f +#define MSR_CORE_PERF_GLOBAL_OVF_CTRL 0x00000390 + +/* Geode defined MSRs */ +#define MSR_GEODE_BUSCONT_CONF0 0x00001900 + +#endif /* __ASM_MSR_INDEX_H */ diff --git a/include/asm-i386/msr.h b/include/asm-i386/msr.h index 00acaa8..9559894 100644 --- a/include/asm-i386/msr.h +++ b/include/asm-i386/msr.h @@ -1,6 +1,11 @@ #ifndef __ASM_MSR_H #define __ASM_MSR_H +#include + +#ifdef __KERNEL__ +#ifndef __ASSEMBLY__ + #include static inline unsigned long long native_read_msr(unsigned int msr) @@ -153,234 +158,6 @@ static inline void wrmsr_on_cpu(unsigned int cpu, u32 msr_no, u32 l, u32 h) wrmsr(msr_no, l, h); } #endif /* CONFIG_SMP */ - -/* symbolic names for some interesting MSRs */ -/* Intel defined MSRs. */ -#define MSR_IA32_P5_MC_ADDR 0 -#define MSR_IA32_P5_MC_TYPE 1 -#define MSR_IA32_PLATFORM_ID 0x17 -#define MSR_IA32_EBL_CR_POWERON 0x2a - -#define MSR_IA32_APICBASE 0x1b -#define MSR_IA32_APICBASE_BSP (1<<8) -#define MSR_IA32_APICBASE_ENABLE (1<<11) -#define MSR_IA32_APICBASE_BASE (0xfffff<<12) - -#define MSR_IA32_UCODE_WRITE 0x79 -#define MSR_IA32_UCODE_REV 0x8b - -#define MSR_P6_PERFCTR0 0xc1 -#define MSR_P6_PERFCTR1 0xc2 -#define MSR_FSB_FREQ 0xcd - - -#define MSR_IA32_BBL_CR_CTL 0x119 - -#define MSR_IA32_SYSENTER_CS 0x174 -#define MSR_IA32_SYSENTER_ESP 0x175 -#define MSR_IA32_SYSENTER_EIP 0x176 - -#define MSR_IA32_MCG_CAP 0x179 -#define MSR_IA32_MCG_STATUS 0x17a -#define MSR_IA32_MCG_CTL 0x17b - -/* P4/Xeon+ specific */ -#define MSR_IA32_MCG_EAX 0x180 -#define MSR_IA32_MCG_EBX 0x181 -#define MSR_IA32_MCG_ECX 0x182 -#define MSR_IA32_MCG_EDX 0x183 -#define MSR_IA32_MCG_ESI 0x184 -#define MSR_IA32_MCG_EDI 0x185 -#define MSR_IA32_MCG_EBP 0x186 -#define MSR_IA32_MCG_ESP 0x187 -#define MSR_IA32_MCG_EFLAGS 0x188 -#define MSR_IA32_MCG_EIP 0x189 -#define MSR_IA32_MCG_RESERVED 0x18A - -#define MSR_P6_EVNTSEL0 0x186 -#define MSR_P6_EVNTSEL1 0x187 - -#define MSR_IA32_PERF_STATUS 0x198 -#define MSR_IA32_PERF_CTL 0x199 - -#define MSR_IA32_MPERF 0xE7 -#define MSR_IA32_APERF 0xE8 - -#define MSR_IA32_THERM_CONTROL 0x19a -#define MSR_IA32_THERM_INTERRUPT 0x19b -#define MSR_IA32_THERM_STATUS 0x19c -#define MSR_IA32_MISC_ENABLE 0x1a0 - -#define MSR_IA32_DEBUGCTLMSR 0x1d9 -#define MSR_IA32_LASTBRANCHFROMIP 0x1db -#define MSR_IA32_LASTBRANCHTOIP 0x1dc -#define MSR_IA32_LASTINTFROMIP 0x1dd -#define MSR_IA32_LASTINTTOIP 0x1de - -#define MSR_IA32_MC0_CTL 0x400 -#define MSR_IA32_MC0_STATUS 0x401 -#define MSR_IA32_MC0_ADDR 0x402 -#define MSR_IA32_MC0_MISC 0x403 - -#define MSR_IA32_PEBS_ENABLE 0x3f1 -#define MSR_IA32_DS_AREA 0x600 -#define MSR_IA32_PERF_CAPABILITIES 0x345 - -/* Pentium IV performance counter MSRs */ -#define MSR_P4_BPU_PERFCTR0 0x300 -#define MSR_P4_BPU_PERFCTR1 0x301 -#define MSR_P4_BPU_PERFCTR2 0x302 -#define MSR_P4_BPU_PERFCTR3 0x303 -#define MSR_P4_MS_PERFCTR0 0x304 -#define MSR_P4_MS_PERFCTR1 0x305 -#define MSR_P4_MS_PERFCTR2 0x306 -#define MSR_P4_MS_PERFCTR3 0x307 -#define MSR_P4_FLAME_PERFCTR0 0x308 -#define MSR_P4_FLAME_PERFCTR1 0x309 -#define MSR_P4_FLAME_PERFCTR2 0x30a -#define MSR_P4_FLAME_PERFCTR3 0x30b -#define MSR_P4_IQ_PERFCTR0 0x30c -#define MSR_P4_IQ_PERFCTR1 0x30d -#define MSR_P4_IQ_PERFCTR2 0x30e -#define MSR_P4_IQ_PERFCTR3 0x30f -#define MSR_P4_IQ_PERFCTR4 0x310 -#define MSR_P4_IQ_PERFCTR5 0x311 -#define MSR_P4_BPU_CCCR0 0x360 -#define MSR_P4_BPU_CCCR1 0x361 -#define MSR_P4_BPU_CCCR2 0x362 -#define MSR_P4_BPU_CCCR3 0x363 -#define MSR_P4_MS_CCCR0 0x364 -#define MSR_P4_MS_CCCR1 0x365 -#define MSR_P4_MS_CCCR2 0x366 -#define MSR_P4_MS_CCCR3 0x367 -#define MSR_P4_FLAME_CCCR0 0x368 -#define MSR_P4_FLAME_CCCR1 0x369 -#define MSR_P4_FLAME_CCCR2 0x36a -#define MSR_P4_FLAME_CCCR3 0x36b -#define MSR_P4_IQ_CCCR0 0x36c -#define MSR_P4_IQ_CCCR1 0x36d -#define MSR_P4_IQ_CCCR2 0x36e -#define MSR_P4_IQ_CCCR3 0x36f -#define MSR_P4_IQ_CCCR4 0x370 -#define MSR_P4_IQ_CCCR5 0x371 -#define MSR_P4_ALF_ESCR0 0x3ca -#define MSR_P4_ALF_ESCR1 0x3cb -#define MSR_P4_BPU_ESCR0 0x3b2 -#define MSR_P4_BPU_ESCR1 0x3b3 -#define MSR_P4_BSU_ESCR0 0x3a0 -#define MSR_P4_BSU_ESCR1 0x3a1 -#define MSR_P4_CRU_ESCR0 0x3b8 -#define MSR_P4_CRU_ESCR1 0x3b9 -#define MSR_P4_CRU_ESCR2 0x3cc -#define MSR_P4_CRU_ESCR3 0x3cd -#define MSR_P4_CRU_ESCR4 0x3e0 -#define MSR_P4_CRU_ESCR5 0x3e1 -#define MSR_P4_DAC_ESCR0 0x3a8 -#define MSR_P4_DAC_ESCR1 0x3a9 -#define MSR_P4_FIRM_ESCR0 0x3a4 -#define MSR_P4_FIRM_ESCR1 0x3a5 -#define MSR_P4_FLAME_ESCR0 0x3a6 -#define MSR_P4_FLAME_ESCR1 0x3a7 -#define MSR_P4_FSB_ESCR0 0x3a2 -#define MSR_P4_FSB_ESCR1 0x3a3 -#define MSR_P4_IQ_ESCR0 0x3ba -#define MSR_P4_IQ_ESCR1 0x3bb -#define MSR_P4_IS_ESCR0 0x3b4 -#define MSR_P4_IS_ESCR1 0x3b5 -#define MSR_P4_ITLB_ESCR0 0x3b6 -#define MSR_P4_ITLB_ESCR1 0x3b7 -#define MSR_P4_IX_ESCR0 0x3c8 -#define MSR_P4_IX_ESCR1 0x3c9 -#define MSR_P4_MOB_ESCR0 0x3aa -#define MSR_P4_MOB_ESCR1 0x3ab -#define MSR_P4_MS_ESCR0 0x3c0 -#define MSR_P4_MS_ESCR1 0x3c1 -#define MSR_P4_PMH_ESCR0 0x3ac -#define MSR_P4_PMH_ESCR1 0x3ad -#define MSR_P4_RAT_ESCR0 0x3bc -#define MSR_P4_RAT_ESCR1 0x3bd -#define MSR_P4_SAAT_ESCR0 0x3ae -#define MSR_P4_SAAT_ESCR1 0x3af -#define MSR_P4_SSU_ESCR0 0x3be -#define MSR_P4_SSU_ESCR1 0x3bf /* guess: not defined in manual */ -#define MSR_P4_TBPU_ESCR0 0x3c2 -#define MSR_P4_TBPU_ESCR1 0x3c3 -#define MSR_P4_TC_ESCR0 0x3c4 -#define MSR_P4_TC_ESCR1 0x3c5 -#define MSR_P4_U2L_ESCR0 0x3b0 -#define MSR_P4_U2L_ESCR1 0x3b1 - -/* AMD Defined MSRs */ -#define MSR_K6_EFER 0xC0000080 -#define MSR_K6_STAR 0xC0000081 -#define MSR_K6_WHCR 0xC0000082 -#define MSR_K6_UWCCR 0xC0000085 -#define MSR_K6_EPMR 0xC0000086 -#define MSR_K6_PSOR 0xC0000087 -#define MSR_K6_PFIR 0xC0000088 - -#define MSR_K7_EVNTSEL0 0xC0010000 -#define MSR_K7_EVNTSEL1 0xC0010001 -#define MSR_K7_EVNTSEL2 0xC0010002 -#define MSR_K7_EVNTSEL3 0xC0010003 -#define MSR_K7_PERFCTR0 0xC0010004 -#define MSR_K7_PERFCTR1 0xC0010005 -#define MSR_K7_PERFCTR2 0xC0010006 -#define MSR_K7_PERFCTR3 0xC0010007 -#define MSR_K7_HWCR 0xC0010015 -#define MSR_K7_CLK_CTL 0xC001001b -#define MSR_K7_FID_VID_CTL 0xC0010041 -#define MSR_K7_FID_VID_STATUS 0xC0010042 - -#define MSR_K8_ENABLE_C1E 0xC0010055 - -/* extended feature register */ -#define MSR_EFER 0xc0000080 - -/* EFER bits: */ - -/* Execute Disable enable */ -#define _EFER_NX 11 -#define EFER_NX (1<<_EFER_NX) - -/* Centaur-Hauls/IDT defined MSRs. */ -#define MSR_IDT_FCR1 0x107 -#define MSR_IDT_FCR2 0x108 -#define MSR_IDT_FCR3 0x109 -#define MSR_IDT_FCR4 0x10a - -#define MSR_IDT_MCR0 0x110 -#define MSR_IDT_MCR1 0x111 -#define MSR_IDT_MCR2 0x112 -#define MSR_IDT_MCR3 0x113 -#define MSR_IDT_MCR4 0x114 -#define MSR_IDT_MCR5 0x115 -#define MSR_IDT_MCR6 0x116 -#define MSR_IDT_MCR7 0x117 -#define MSR_IDT_MCR_CTRL 0x120 - -/* VIA Cyrix defined MSRs*/ -#define MSR_VIA_FCR 0x1107 -#define MSR_VIA_LONGHAUL 0x110a -#define MSR_VIA_RNG 0x110b -#define MSR_VIA_BCR2 0x1147 - -/* Transmeta defined MSRs */ -#define MSR_TMTA_LONGRUN_CTRL 0x80868010 -#define MSR_TMTA_LONGRUN_FLAGS 0x80868011 -#define MSR_TMTA_LRTI_READOUT 0x80868018 -#define MSR_TMTA_LRTI_VOLT_MHZ 0x8086801a - -/* Intel Core-based CPU performance counters */ -#define MSR_CORE_PERF_FIXED_CTR0 0x309 -#define MSR_CORE_PERF_FIXED_CTR1 0x30a -#define MSR_CORE_PERF_FIXED_CTR2 0x30b -#define MSR_CORE_PERF_FIXED_CTR_CTRL 0x38d -#define MSR_CORE_PERF_GLOBAL_STATUS 0x38e -#define MSR_CORE_PERF_GLOBAL_CTRL 0x38f -#define MSR_CORE_PERF_GLOBAL_OVF_CTRL 0x390 - -/* Geode defined MSRs */ -#define MSR_GEODE_BUSCONT_CONF0 0x1900 - +#endif +#endif #endif /* __ASM_MSR_H */ diff --git a/include/asm-i386/processor-flags.h b/include/asm-i386/processor-flags.h index b4711c2..5404e90 100644 --- a/include/asm-i386/processor-flags.h +++ b/include/asm-i386/processor-flags.h @@ -23,4 +23,69 @@ #define X86_EFLAGS_VIP 0x00100000 /* Virtual Interrupt Pending */ #define X86_EFLAGS_ID 0x00200000 /* CPUID detection flag */ +/* + * Basic CPU control in CR0 + */ +#define X86_CR0_PE 0x00000001 /* Protection Enable */ +#define X86_CR0_MP 0x00000002 /* Monitor Coprocessor */ +#define X86_CR0_EM 0x00000004 /* Emulation */ +#define X86_CR0_TS 0x00000008 /* Task Switched */ +#define X86_CR0_ET 0x00000010 /* Extension Type */ +#define X86_CR0_NE 0x00000020 /* Numeric Error */ +#define X86_CR0_WP 0x00010000 /* Write Protect */ +#define X86_CR0_AM 0x00040000 /* Alignment Mask */ +#define X86_CR0_NW 0x20000000 /* Not Write-through */ +#define X86_CR0_CD 0x40000000 /* Cache Disable */ +#define X86_CR0_PG 0x80000000 /* Paging */ + +/* + * Paging options in CR3 + */ +#define X86_CR3_PWT 0x00000008 /* Page Write Through */ +#define X86_CR3_PCD 0x00000010 /* Page Cache Disable */ + +/* + * Intel CPU features in CR4 + */ +#define X86_CR4_VME 0x00000001 /* enable vm86 extensions */ +#define X86_CR4_PVI 0x00000002 /* virtual interrupts flag enable */ +#define X86_CR4_TSD 0x00000004 /* disable time stamp at ipl 3 */ +#define X86_CR4_DE 0x00000008 /* enable debugging extensions */ +#define X86_CR4_PSE 0x00000010 /* enable page size extensions */ +#define X86_CR4_PAE 0x00000020 /* enable physical address extensions */ +#define X86_CR4_MCE 0x00000040 /* Machine check enable */ +#define X86_CR4_PGE 0x00000080 /* enable global pages */ +#define X86_CR4_PCE 0x00000100 /* enable performance counters at ipl 3 */ +#define X86_CR4_OSFXSR 0x00000200 /* enable fast FPU save and restore */ +#define X86_CR4_OSXMMEXCPT 0x00000400 /* enable unmasked SSE exceptions */ +#define X86_CR4_VMXE 0x00002000 /* enable VMX virtualization */ + +/* + * x86-64 Task Priority Register, CR8 + */ +#define X86_CR8_TPR 0x00000007 /* task priority register */ + +/* + * AMD and Transmeta use MSRs for configuration; see + */ + +/* + * NSC/Cyrix CPU configuration register indexes + */ +#define CX86_PCR0 0x20 +#define CX86_GCR 0xb8 +#define CX86_CCR0 0xc0 +#define CX86_CCR1 0xc1 +#define CX86_CCR2 0xc2 +#define CX86_CCR3 0xc3 +#define CX86_CCR4 0xe8 +#define CX86_CCR5 0xe9 +#define CX86_CCR6 0xea +#define CX86_CCR7 0xeb +#define CX86_PCR1 0xf0 +#define CX86_DIR0 0xfe +#define CX86_DIR1 0xff +#define CX86_ARR_BASE 0xc4 +#define CX86_RCR_BASE 0xdc + #endif /* __ASM_I386_PROCESSOR_FLAGS_H */ diff --git a/include/asm-i386/processor.h b/include/asm-i386/processor.h index 882d3f8..77e2632 100644 --- a/include/asm-i386/processor.h +++ b/include/asm-i386/processor.h @@ -143,21 +143,6 @@ static inline void native_cpuid(unsigned int *eax, unsigned int *ebx, #define load_cr3(pgdir) write_cr3(__pa(pgdir)) /* - * Intel CPU features in CR4 - */ -#define X86_CR4_VME 0x0001 /* enable vm86 extensions */ -#define X86_CR4_PVI 0x0002 /* virtual interrupts flag enable */ -#define X86_CR4_TSD 0x0004 /* disable time stamp at ipl 3 */ -#define X86_CR4_DE 0x0008 /* enable debugging extensions */ -#define X86_CR4_PSE 0x0010 /* enable page size extensions */ -#define X86_CR4_PAE 0x0020 /* enable physical address extensions */ -#define X86_CR4_MCE 0x0040 /* Machine check enable */ -#define X86_CR4_PGE 0x0080 /* enable global pages */ -#define X86_CR4_PCE 0x0100 /* enable performance counters at ipl 3 */ -#define X86_CR4_OSFXSR 0x0200 /* enable fast FPU save and restore */ -#define X86_CR4_OSXMMEXCPT 0x0400 /* enable unmasked SSE exceptions */ - -/* * Save the cr4 feature set we're using (ie * Pentium 4MB enable and PPro Global page * enable), so that any CPU's that boot up @@ -184,26 +169,6 @@ static inline void clear_in_cr4 (unsigned long mask) } /* - * NSC/Cyrix CPU configuration register indexes - */ - -#define CX86_PCR0 0x20 -#define CX86_GCR 0xb8 -#define CX86_CCR0 0xc0 -#define CX86_CCR1 0xc1 -#define CX86_CCR2 0xc2 -#define CX86_CCR3 0xc3 -#define CX86_CCR4 0xe8 -#define CX86_CCR5 0xe9 -#define CX86_CCR6 0xea -#define CX86_CCR7 0xeb -#define CX86_PCR1 0xf0 -#define CX86_DIR0 0xfe -#define CX86_DIR1 0xff -#define CX86_ARR_BASE 0xc4 -#define CX86_RCR_BASE 0xdc - -/* * NSC/Cyrix CPU indexed register access macros */ diff --git a/include/asm-x86_64/Kbuild b/include/asm-x86_64/Kbuild index 242296e..89ad1fc 100644 --- a/include/asm-x86_64/Kbuild +++ b/include/asm-x86_64/Kbuild @@ -8,7 +8,7 @@ header-y += boot.h header-y += bootsetup.h header-y += debugreg.h header-y += ldt.h -header-y += msr.h +header-y += msr-index.h header-y += prctl.h header-y += ptrace-abi.h header-y += sigcontext32.h @@ -16,6 +16,7 @@ header-y += ucontext.h header-y += vsyscall32.h unifdef-y += mce.h +unifdef-y += msr.h unifdef-y += mtrr.h unifdef-y += vsyscall.h unifdef-y += const.h diff --git a/include/asm-x86_64/msr-index.h b/include/asm-x86_64/msr-index.h new file mode 100644 index 0000000..d77a63f --- /dev/null +++ b/include/asm-x86_64/msr-index.h @@ -0,0 +1 @@ +#include diff --git a/include/asm-x86_64/msr.h b/include/asm-x86_64/msr.h index 902f9a5..a524f03 100644 --- a/include/asm-x86_64/msr.h +++ b/include/asm-x86_64/msr.h @@ -1,6 +1,8 @@ #ifndef X86_64_MSR_H #define X86_64_MSR_H 1 +#include + #ifndef __ASSEMBLY__ /* * Access to machine-specific registers (available on 586 and better only) @@ -157,9 +159,6 @@ static inline unsigned int cpuid_edx(unsigned int op) return edx; } -#define MSR_IA32_UCODE_WRITE 0x79 -#define MSR_IA32_UCODE_REV 0x8b - #ifdef CONFIG_SMP void rdmsr_on_cpu(unsigned int cpu, u32 msr_no, u32 *l, u32 *h); void wrmsr_on_cpu(unsigned int cpu, u32 msr_no, u32 l, u32 h); @@ -172,269 +171,6 @@ static inline void wrmsr_on_cpu(unsigned int cpu, u32 msr_no, u32 l, u32 h) { wrmsr(msr_no, l, h); } -#endif /* CONFIG_SMP */ - -#endif - -/* AMD/K8 specific MSRs */ -#define MSR_EFER 0xc0000080 /* extended feature register */ -#define MSR_STAR 0xc0000081 /* legacy mode SYSCALL target */ -#define MSR_LSTAR 0xc0000082 /* long mode SYSCALL target */ -#define MSR_CSTAR 0xc0000083 /* compatibility mode SYSCALL target */ -#define MSR_SYSCALL_MASK 0xc0000084 /* EFLAGS mask for syscall */ -#define MSR_FS_BASE 0xc0000100 /* 64bit FS base */ -#define MSR_GS_BASE 0xc0000101 /* 64bit GS base */ -#define MSR_KERNEL_GS_BASE 0xc0000102 /* SwapGS GS shadow (or USER_GS from kernel) */ -/* EFER bits: */ -#define _EFER_SCE 0 /* SYSCALL/SYSRET */ -#define _EFER_LME 8 /* Long mode enable */ -#define _EFER_LMA 10 /* Long mode active (read-only) */ -#define _EFER_NX 11 /* No execute enable */ - -#define EFER_SCE (1<<_EFER_SCE) -#define EFER_LME (1<<_EFER_LME) -#define EFER_LMA (1<<_EFER_LMA) -#define EFER_NX (1<<_EFER_NX) - -/* Intel MSRs. Some also available on other CPUs */ -#define MSR_IA32_TSC 0x10 -#define MSR_IA32_PLATFORM_ID 0x17 - -#define MSR_IA32_PERFCTR0 0xc1 -#define MSR_IA32_PERFCTR1 0xc2 -#define MSR_FSB_FREQ 0xcd - -#define MSR_MTRRcap 0x0fe -#define MSR_IA32_BBL_CR_CTL 0x119 - -#define MSR_IA32_SYSENTER_CS 0x174 -#define MSR_IA32_SYSENTER_ESP 0x175 -#define MSR_IA32_SYSENTER_EIP 0x176 - -#define MSR_IA32_MCG_CAP 0x179 -#define MSR_IA32_MCG_STATUS 0x17a -#define MSR_IA32_MCG_CTL 0x17b - -#define MSR_IA32_EVNTSEL0 0x186 -#define MSR_IA32_EVNTSEL1 0x187 - -#define MSR_IA32_DEBUGCTLMSR 0x1d9 -#define MSR_IA32_LASTBRANCHFROMIP 0x1db -#define MSR_IA32_LASTBRANCHTOIP 0x1dc -#define MSR_IA32_LASTINTFROMIP 0x1dd -#define MSR_IA32_LASTINTTOIP 0x1de - -#define MSR_IA32_PEBS_ENABLE 0x3f1 -#define MSR_IA32_DS_AREA 0x600 -#define MSR_IA32_PERF_CAPABILITIES 0x345 - -#define MSR_MTRRfix64K_00000 0x250 -#define MSR_MTRRfix16K_80000 0x258 -#define MSR_MTRRfix16K_A0000 0x259 -#define MSR_MTRRfix4K_C0000 0x268 -#define MSR_MTRRfix4K_C8000 0x269 -#define MSR_MTRRfix4K_D0000 0x26a -#define MSR_MTRRfix4K_D8000 0x26b -#define MSR_MTRRfix4K_E0000 0x26c -#define MSR_MTRRfix4K_E8000 0x26d -#define MSR_MTRRfix4K_F0000 0x26e -#define MSR_MTRRfix4K_F8000 0x26f -#define MSR_MTRRdefType 0x2ff - -#define MSR_IA32_MC0_CTL 0x400 -#define MSR_IA32_MC0_STATUS 0x401 -#define MSR_IA32_MC0_ADDR 0x402 -#define MSR_IA32_MC0_MISC 0x403 - -#define MSR_P6_PERFCTR0 0xc1 -#define MSR_P6_PERFCTR1 0xc2 -#define MSR_P6_EVNTSEL0 0x186 -#define MSR_P6_EVNTSEL1 0x187 - -/* K7/K8 MSRs. Not complete. See the architecture manual for a more complete list. */ -#define MSR_K7_EVNTSEL0 0xC0010000 -#define MSR_K7_PERFCTR0 0xC0010004 -#define MSR_K7_EVNTSEL1 0xC0010001 -#define MSR_K7_PERFCTR1 0xC0010005 -#define MSR_K7_EVNTSEL2 0xC0010002 -#define MSR_K7_PERFCTR2 0xC0010006 -#define MSR_K7_EVNTSEL3 0xC0010003 -#define MSR_K7_PERFCTR3 0xC0010007 -#define MSR_K8_TOP_MEM1 0xC001001A -#define MSR_K8_TOP_MEM2 0xC001001D -#define MSR_K8_SYSCFG 0xC0010010 -#define MSR_K8_HWCR 0xC0010015 - -/* K6 MSRs */ -#define MSR_K6_EFER 0xC0000080 -#define MSR_K6_STAR 0xC0000081 -#define MSR_K6_WHCR 0xC0000082 -#define MSR_K6_UWCCR 0xC0000085 -#define MSR_K6_PSOR 0xC0000087 -#define MSR_K6_PFIR 0xC0000088 - -/* Centaur-Hauls/IDT defined MSRs. */ -#define MSR_IDT_FCR1 0x107 -#define MSR_IDT_FCR2 0x108 -#define MSR_IDT_FCR3 0x109 -#define MSR_IDT_FCR4 0x10a - -#define MSR_IDT_MCR0 0x110 -#define MSR_IDT_MCR1 0x111 -#define MSR_IDT_MCR2 0x112 -#define MSR_IDT_MCR3 0x113 -#define MSR_IDT_MCR4 0x114 -#define MSR_IDT_MCR5 0x115 -#define MSR_IDT_MCR6 0x116 -#define MSR_IDT_MCR7 0x117 -#define MSR_IDT_MCR_CTRL 0x120 - -/* VIA Cyrix defined MSRs*/ -#define MSR_VIA_FCR 0x1107 -#define MSR_VIA_LONGHAUL 0x110a -#define MSR_VIA_RNG 0x110b -#define MSR_VIA_BCR2 0x1147 - -/* Intel defined MSRs. */ -#define MSR_IA32_P5_MC_ADDR 0 -#define MSR_IA32_P5_MC_TYPE 1 -#define MSR_IA32_PLATFORM_ID 0x17 -#define MSR_IA32_EBL_CR_POWERON 0x2a - -#define MSR_IA32_APICBASE 0x1b -#define MSR_IA32_APICBASE_BSP (1<<8) -#define MSR_IA32_APICBASE_ENABLE (1<<11) -#define MSR_IA32_APICBASE_BASE (0xfffff<<12) - -/* P4/Xeon+ specific */ -#define MSR_IA32_MCG_EAX 0x180 -#define MSR_IA32_MCG_EBX 0x181 -#define MSR_IA32_MCG_ECX 0x182 -#define MSR_IA32_MCG_EDX 0x183 -#define MSR_IA32_MCG_ESI 0x184 -#define MSR_IA32_MCG_EDI 0x185 -#define MSR_IA32_MCG_EBP 0x186 -#define MSR_IA32_MCG_ESP 0x187 -#define MSR_IA32_MCG_EFLAGS 0x188 -#define MSR_IA32_MCG_EIP 0x189 -#define MSR_IA32_MCG_RESERVED 0x18A - -#define MSR_P6_EVNTSEL0 0x186 -#define MSR_P6_EVNTSEL1 0x187 - -#define MSR_IA32_PERF_STATUS 0x198 -#define MSR_IA32_PERF_CTL 0x199 - -#define MSR_IA32_MPERF 0xE7 -#define MSR_IA32_APERF 0xE8 - -#define MSR_IA32_THERM_CONTROL 0x19a -#define MSR_IA32_THERM_INTERRUPT 0x19b -#define MSR_IA32_THERM_STATUS 0x19c -#define MSR_IA32_MISC_ENABLE 0x1a0 - -#define MSR_IA32_DEBUGCTLMSR 0x1d9 -#define MSR_IA32_LASTBRANCHFROMIP 0x1db -#define MSR_IA32_LASTBRANCHTOIP 0x1dc -#define MSR_IA32_LASTINTFROMIP 0x1dd -#define MSR_IA32_LASTINTTOIP 0x1de - -#define MSR_IA32_MC0_CTL 0x400 -#define MSR_IA32_MC0_STATUS 0x401 -#define MSR_IA32_MC0_ADDR 0x402 -#define MSR_IA32_MC0_MISC 0x403 - -/* Pentium IV performance counter MSRs */ -#define MSR_P4_BPU_PERFCTR0 0x300 -#define MSR_P4_BPU_PERFCTR1 0x301 -#define MSR_P4_BPU_PERFCTR2 0x302 -#define MSR_P4_BPU_PERFCTR3 0x303 -#define MSR_P4_MS_PERFCTR0 0x304 -#define MSR_P4_MS_PERFCTR1 0x305 -#define MSR_P4_MS_PERFCTR2 0x306 -#define MSR_P4_MS_PERFCTR3 0x307 -#define MSR_P4_FLAME_PERFCTR0 0x308 -#define MSR_P4_FLAME_PERFCTR1 0x309 -#define MSR_P4_FLAME_PERFCTR2 0x30a -#define MSR_P4_FLAME_PERFCTR3 0x30b -#define MSR_P4_IQ_PERFCTR0 0x30c -#define MSR_P4_IQ_PERFCTR1 0x30d -#define MSR_P4_IQ_PERFCTR2 0x30e -#define MSR_P4_IQ_PERFCTR3 0x30f -#define MSR_P4_IQ_PERFCTR4 0x310 -#define MSR_P4_IQ_PERFCTR5 0x311 -#define MSR_P4_BPU_CCCR0 0x360 -#define MSR_P4_BPU_CCCR1 0x361 -#define MSR_P4_BPU_CCCR2 0x362 -#define MSR_P4_BPU_CCCR3 0x363 -#define MSR_P4_MS_CCCR0 0x364 -#define MSR_P4_MS_CCCR1 0x365 -#define MSR_P4_MS_CCCR2 0x366 -#define MSR_P4_MS_CCCR3 0x367 -#define MSR_P4_FLAME_CCCR0 0x368 -#define MSR_P4_FLAME_CCCR1 0x369 -#define MSR_P4_FLAME_CCCR2 0x36a -#define MSR_P4_FLAME_CCCR3 0x36b -#define MSR_P4_IQ_CCCR0 0x36c -#define MSR_P4_IQ_CCCR1 0x36d -#define MSR_P4_IQ_CCCR2 0x36e -#define MSR_P4_IQ_CCCR3 0x36f -#define MSR_P4_IQ_CCCR4 0x370 -#define MSR_P4_IQ_CCCR5 0x371 -#define MSR_P4_ALF_ESCR0 0x3ca -#define MSR_P4_ALF_ESCR1 0x3cb -#define MSR_P4_BPU_ESCR0 0x3b2 -#define MSR_P4_BPU_ESCR1 0x3b3 -#define MSR_P4_BSU_ESCR0 0x3a0 -#define MSR_P4_BSU_ESCR1 0x3a1 -#define MSR_P4_CRU_ESCR0 0x3b8 -#define MSR_P4_CRU_ESCR1 0x3b9 -#define MSR_P4_CRU_ESCR2 0x3cc -#define MSR_P4_CRU_ESCR3 0x3cd -#define MSR_P4_CRU_ESCR4 0x3e0 -#define MSR_P4_CRU_ESCR5 0x3e1 -#define MSR_P4_DAC_ESCR0 0x3a8 -#define MSR_P4_DAC_ESCR1 0x3a9 -#define MSR_P4_FIRM_ESCR0 0x3a4 -#define MSR_P4_FIRM_ESCR1 0x3a5 -#define MSR_P4_FLAME_ESCR0 0x3a6 -#define MSR_P4_FLAME_ESCR1 0x3a7 -#define MSR_P4_FSB_ESCR0 0x3a2 -#define MSR_P4_FSB_ESCR1 0x3a3 -#define MSR_P4_IQ_ESCR0 0x3ba -#define MSR_P4_IQ_ESCR1 0x3bb -#define MSR_P4_IS_ESCR0 0x3b4 -#define MSR_P4_IS_ESCR1 0x3b5 -#define MSR_P4_ITLB_ESCR0 0x3b6 -#define MSR_P4_ITLB_ESCR1 0x3b7 -#define MSR_P4_IX_ESCR0 0x3c8 -#define MSR_P4_IX_ESCR1 0x3c9 -#define MSR_P4_MOB_ESCR0 0x3aa -#define MSR_P4_MOB_ESCR1 0x3ab -#define MSR_P4_MS_ESCR0 0x3c0 -#define MSR_P4_MS_ESCR1 0x3c1 -#define MSR_P4_PMH_ESCR0 0x3ac -#define MSR_P4_PMH_ESCR1 0x3ad -#define MSR_P4_RAT_ESCR0 0x3bc -#define MSR_P4_RAT_ESCR1 0x3bd -#define MSR_P4_SAAT_ESCR0 0x3ae -#define MSR_P4_SAAT_ESCR1 0x3af -#define MSR_P4_SSU_ESCR0 0x3be -#define MSR_P4_SSU_ESCR1 0x3bf /* guess: not defined in manual */ -#define MSR_P4_TBPU_ESCR0 0x3c2 -#define MSR_P4_TBPU_ESCR1 0x3c3 -#define MSR_P4_TC_ESCR0 0x3c4 -#define MSR_P4_TC_ESCR1 0x3c5 -#define MSR_P4_U2L_ESCR0 0x3b0 -#define MSR_P4_U2L_ESCR1 0x3b1 - -/* Intel Core-based CPU performance counters */ -#define MSR_CORE_PERF_FIXED_CTR0 0x309 -#define MSR_CORE_PERF_FIXED_CTR1 0x30a -#define MSR_CORE_PERF_FIXED_CTR2 0x30b -#define MSR_CORE_PERF_FIXED_CTR_CTRL 0x38d -#define MSR_CORE_PERF_GLOBAL_STATUS 0x38e -#define MSR_CORE_PERF_GLOBAL_CTRL 0x38f -#define MSR_CORE_PERF_GLOBAL_OVF_CTRL 0x390 - -#endif +#endif /* CONFIG_SMP */ +#endif /* __ASSEMBLY__ */ +#endif /* X86_64_MSR_H */ diff --git a/include/asm-x86_64/processor-flags.h b/include/asm-x86_64/processor-flags.h index 806112f..ec99a57 100644 --- a/include/asm-x86_64/processor-flags.h +++ b/include/asm-x86_64/processor-flags.h @@ -1,26 +1 @@ -#ifndef __ASM_X86_64_PROCESSOR_FLAGS_H -#define __ASM_X86_64_PROCESSOR_FLAGS_H -/* Various flags defined: can be included from assembler. */ - -/* - * EFLAGS bits - */ -#define X86_EFLAGS_CF 0x00000001 /* Carry Flag */ -#define X86_EFLAGS_PF 0x00000004 /* Parity Flag */ -#define X86_EFLAGS_AF 0x00000010 /* Auxillary carry Flag */ -#define X86_EFLAGS_ZF 0x00000040 /* Zero Flag */ -#define X86_EFLAGS_SF 0x00000080 /* Sign Flag */ -#define X86_EFLAGS_TF 0x00000100 /* Trap Flag */ -#define X86_EFLAGS_IF 0x00000200 /* Interrupt Flag */ -#define X86_EFLAGS_DF 0x00000400 /* Direction Flag */ -#define X86_EFLAGS_OF 0x00000800 /* Overflow Flag */ -#define X86_EFLAGS_IOPL 0x00003000 /* IOPL mask */ -#define X86_EFLAGS_NT 0x00004000 /* Nested Task */ -#define X86_EFLAGS_RF 0x00010000 /* Resume Flag */ -#define X86_EFLAGS_VM 0x00020000 /* Virtual Mode */ -#define X86_EFLAGS_AC 0x00040000 /* Alignment Check */ -#define X86_EFLAGS_VIF 0x00080000 /* Virtual Interrupt Flag */ -#define X86_EFLAGS_VIP 0x00100000 /* Virtual Interrupt Pending */ -#define X86_EFLAGS_ID 0x00200000 /* CPUID detection flag */ - -#endif /* __ASM_X86_64_PROCESSOR_FLAGS_H */ +#include diff --git a/include/asm-x86_64/processor.h b/include/asm-x86_64/processor.h index 6a11734..461ffe4 100644 --- a/include/asm-x86_64/processor.h +++ b/include/asm-x86_64/processor.h @@ -104,21 +104,6 @@ extern unsigned int init_intel_cacheinfo(struct cpuinfo_x86 *c); extern unsigned short num_cache_leaves; /* - * Intel CPU features in CR4 - */ -#define X86_CR4_VME 0x0001 /* enable vm86 extensions */ -#define X86_CR4_PVI 0x0002 /* virtual interrupts flag enable */ -#define X86_CR4_TSD 0x0004 /* disable time stamp at ipl 3 */ -#define X86_CR4_DE 0x0008 /* enable debugging extensions */ -#define X86_CR4_PSE 0x0010 /* enable page size extensions */ -#define X86_CR4_PAE 0x0020 /* enable physical address extensions */ -#define X86_CR4_MCE 0x0040 /* Machine check enable */ -#define X86_CR4_PGE 0x0080 /* enable global pages */ -#define X86_CR4_PCE 0x0100 /* enable performance counters at ipl 3 */ -#define X86_CR4_OSFXSR 0x0200 /* enable fast FPU save and restore */ -#define X86_CR4_OSXMMEXCPT 0x0400 /* enable unmasked SSE exceptions */ - -/* * Save the cr4 feature set we're using (ie * Pentium 4MB enable and PPro Global page * enable), so that any CPU's that boot up @@ -407,22 +392,6 @@ static inline void prefetchw(void *x) #define cpu_relax() rep_nop() /* - * NSC/Cyrix CPU configuration register indexes - */ -#define CX86_CCR0 0xc0 -#define CX86_CCR1 0xc1 -#define CX86_CCR2 0xc2 -#define CX86_CCR3 0xc3 -#define CX86_CCR4 0xe8 -#define CX86_CCR5 0xe9 -#define CX86_CCR6 0xea -#define CX86_CCR7 0xeb -#define CX86_DIR0 0xfe -#define CX86_DIR1 0xff -#define CX86_ARR_BASE 0xc4 -#define CX86_RCR_BASE 0xdc - -/* * NSC/Cyrix CPU indexed register access macros */ -- cgit v1.1 From d0175ab64412aabc93da8682aaa99124d6815056 Mon Sep 17 00:00:00 2001 From: Jeremy Fitzhardinge Date: Wed, 2 May 2007 19:27:13 +0200 Subject: [PATCH] i386: Remove smp_alt_instructions The .smp_altinstructions section and its corresponding symbols are completely unused, so remove them. Also, remove stray #ifdef __KENREL__ in asm-i386/alternative.h Signed-off-by: Jeremy Fitzhardinge Signed-off-by: Andi Kleen Cc: Andi Kleen --- include/asm-i386/alternative.h | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-) (limited to 'include') diff --git a/include/asm-i386/alternative.h b/include/asm-i386/alternative.h index dbc1a29..4d518ee 100644 --- a/include/asm-i386/alternative.h +++ b/include/asm-i386/alternative.h @@ -1,8 +1,6 @@ #ifndef _I386_ALTERNATIVE_H #define _I386_ALTERNATIVE_H -#ifdef __KERNEL__ - #include #include #include @@ -32,9 +30,7 @@ static inline void alternatives_smp_module_add(struct module *mod, char *name, void *text, void *text_end) {} static inline void alternatives_smp_module_del(struct module *mod) {} static inline void alternatives_smp_switch(int smp) {} -#endif - -#endif +#endif /* CONFIG_SMP */ /* * Alternative instructions for different CPU types or capabilities. -- cgit v1.1 From a75c54f933bd8db9f4a609bd128663c179b3e6a1 Mon Sep 17 00:00:00 2001 From: Rusty Russell Date: Wed, 2 May 2007 19:27:13 +0200 Subject: [PATCH] i386: i386 separate hardware-defined TSS from Linux additions On Thu, 2007-03-29 at 13:16 +0200, Andi Kleen wrote: > Please clean it up properly with two structs. Not sure about this, now I've done it. Running it here. If you like it, I can do x86-64 as well. == lguest defines its own TSS struct because the "struct tss_struct" contains linux-specific additions. Andi asked me to split the struct in processor.h. Unfortunately it makes usage a little awkward. Signed-off-by: Rusty Russell Signed-off-by: Andi Kleen --- include/asm-i386/processor.h | 24 ++++++++++++++++-------- 1 file changed, 16 insertions(+), 8 deletions(-) (limited to 'include') diff --git a/include/asm-i386/processor.h b/include/asm-i386/processor.h index 77e2632..9222604 100644 --- a/include/asm-i386/processor.h +++ b/include/asm-i386/processor.h @@ -291,7 +291,8 @@ typedef struct { struct thread_struct; -struct tss_struct { +/* This is the TSS defined by the hardware. */ +struct i386_hw_tss { unsigned short back_link,__blh; unsigned long esp0; unsigned short ss0,__ss0h; @@ -315,6 +316,11 @@ struct tss_struct { unsigned short gs, __gsh; unsigned short ldt, __ldth; unsigned short trace, io_bitmap_base; +} __attribute__((packed)); + +struct tss_struct { + struct i386_hw_tss x86_tss; + /* * The extra 1 is there because the CPU will access an * additional byte beyond the end of the IO permission @@ -381,10 +387,12 @@ struct thread_struct { * be within the limit. */ #define INIT_TSS { \ - .esp0 = sizeof(init_stack) + (long)&init_stack, \ - .ss0 = __KERNEL_DS, \ - .ss1 = __KERNEL_CS, \ - .io_bitmap_base = INVALID_IO_BITMAP_OFFSET, \ + .x86_tss = { \ + .esp0 = sizeof(init_stack) + (long)&init_stack, \ + .ss0 = __KERNEL_DS, \ + .ss1 = __KERNEL_CS, \ + .io_bitmap_base = INVALID_IO_BITMAP_OFFSET, \ + }, \ .io_bitmap = { [ 0 ... IO_BITMAP_LONGS] = ~0 }, \ } @@ -493,10 +501,10 @@ static inline void rep_nop(void) static inline void native_load_esp0(struct tss_struct *tss, struct thread_struct *thread) { - tss->esp0 = thread->esp0; + tss->x86_tss.esp0 = thread->esp0; /* This can only happen when SEP is enabled, no need to test "SEP"arately */ - if (unlikely(tss->ss1 != thread->sysenter_cs)) { - tss->ss1 = thread->sysenter_cs; + if (unlikely(tss->x86_tss.ss1 != thread->sysenter_cs)) { + tss->x86_tss.ss1 = thread->sysenter_cs; wrmsr(MSR_IA32_SYSENTER_CS, thread->sysenter_cs, 0); } } -- cgit v1.1 From 45876233605c268e929a7875081e129debe34bdc Mon Sep 17 00:00:00 2001 From: Jeremy Fitzhardinge Date: Wed, 2 May 2007 19:27:13 +0200 Subject: [PATCH] i386: PARAVIRT: use paravirt_nop to consistently mark no-op operations Add a _paravirt_nop function for use as a stub for no-op operations, and paravirt_nop #defined void * version to make using it easier (since all its uses are as a void *). This is useful to allow the patcher to automatically identify noop operations so it can simply nop out the callsite. Signed-off-by: Jeremy Fitzhardinge Signed-off-by: Andi Kleen Acked-by: Ingo Molnar [mingo] but only as a cleanup of the current open-coded (void *) casts. My problem with this is that it loses the types. Not that there is much to check for, but still, this adds some assumptions about how function calls look like --- include/asm-i386/paravirt.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include') diff --git a/include/asm-i386/paravirt.h b/include/asm-i386/paravirt.h index 32acebc..f0bdaea 100644 --- a/include/asm-i386/paravirt.h +++ b/include/asm-i386/paravirt.h @@ -434,6 +434,9 @@ static inline void pmd_clear(pmd_t *pmdp) #define arch_leave_lazy_mmu_mode() paravirt_ops.set_lazy_mode(PARAVIRT_LAZY_NONE) #define arch_flush_lazy_mmu_mode() paravirt_ops.set_lazy_mode(PARAVIRT_LAZY_FLUSH) +void _paravirt_nop(void); +#define paravirt_nop ((void *)_paravirt_nop) + /* These all sit in the .parainstructions section to tell us what to patch. */ struct paravirt_patch { u8 *instr; /* original instructions */ -- cgit v1.1 From 3dc494e86d1c93afd4c66385f270899dbfae483d Mon Sep 17 00:00:00 2001 From: Jeremy Fitzhardinge Date: Wed, 2 May 2007 19:27:13 +0200 Subject: [PATCH] i386: PARAVIRT: Add pagetable accessors to pack and unpack pagetable entries Add a set of accessors to pack, unpack and modify page table entries (at all levels). This allows a paravirt implementation to control the contents of pgd/pmd/pte entries. For example, Xen uses this to convert the (pseudo-)physical address into a machine address when populating a pagetable entry, and converting back to pphys address when an entry is read. Signed-off-by: Jeremy Fitzhardinge Signed-off-by: Andi Kleen Acked-by: Ingo Molnar --- include/asm-i386/page.h | 79 +++++++++++++++++++++++++++++++++------ include/asm-i386/paravirt.h | 52 ++++++++++++++++++++------ include/asm-i386/pgtable-2level.h | 26 ++++++++++--- include/asm-i386/pgtable-3level.h | 63 ++++++++++++++++++------------- include/asm-i386/pgtable.h | 2 + 5 files changed, 169 insertions(+), 53 deletions(-) (limited to 'include') diff --git a/include/asm-i386/page.h b/include/asm-i386/page.h index fd3f64a..818ac8b 100644 --- a/include/asm-i386/page.h +++ b/include/asm-i386/page.h @@ -12,7 +12,6 @@ #ifdef __KERNEL__ #ifndef __ASSEMBLY__ - #ifdef CONFIG_X86_USE_3DNOW #include @@ -42,26 +41,81 @@ * These are used to make use of C type-checking.. */ extern int nx_enabled; + #ifdef CONFIG_X86_PAE extern unsigned long long __supported_pte_mask; typedef struct { unsigned long pte_low, pte_high; } pte_t; typedef struct { unsigned long long pmd; } pmd_t; typedef struct { unsigned long long pgd; } pgd_t; typedef struct { unsigned long long pgprot; } pgprot_t; -#define pmd_val(x) ((x).pmd) -#define pte_val(x) ((x).pte_low | ((unsigned long long)(x).pte_high << 32)) -#define __pmd(x) ((pmd_t) { (x) } ) + +static inline unsigned long long native_pgd_val(pgd_t pgd) +{ + return pgd.pgd; +} + +static inline unsigned long long native_pmd_val(pmd_t pmd) +{ + return pmd.pmd; +} + +static inline unsigned long long native_pte_val(pte_t pte) +{ + return pte.pte_low | ((unsigned long long)pte.pte_high << 32); +} + +static inline pgd_t native_make_pgd(unsigned long long val) +{ + return (pgd_t) { val }; +} + +static inline pmd_t native_make_pmd(unsigned long long val) +{ + return (pmd_t) { val }; +} + +static inline pte_t native_make_pte(unsigned long long val) +{ + return (pte_t) { .pte_low = val, .pte_high = (val >> 32) } ; +} + +#ifndef CONFIG_PARAVIRT +#define pmd_val(x) native_pmd_val(x) +#define __pmd(x) native_make_pmd(x) +#endif + #define HPAGE_SHIFT 21 #include -#else +#else /* !CONFIG_X86_PAE */ typedef struct { unsigned long pte_low; } pte_t; typedef struct { unsigned long pgd; } pgd_t; typedef struct { unsigned long pgprot; } pgprot_t; #define boot_pte_t pte_t /* or would you rather have a typedef */ -#define pte_val(x) ((x).pte_low) + +static inline unsigned long native_pgd_val(pgd_t pgd) +{ + return pgd.pgd; +} + +static inline unsigned long native_pte_val(pte_t pte) +{ + return pte.pte_low; +} + +static inline pgd_t native_make_pgd(unsigned long val) +{ + return (pgd_t) { val }; +} + +static inline pte_t native_make_pte(unsigned long val) +{ + return (pte_t) { .pte_low = val }; +} + #define HPAGE_SHIFT 22 #include -#endif +#endif /* CONFIG_X86_PAE */ + #define PTE_MASK PAGE_MASK #ifdef CONFIG_HUGETLB_PAGE @@ -71,13 +125,16 @@ typedef struct { unsigned long pgprot; } pgprot_t; #define HAVE_ARCH_HUGETLB_UNMAPPED_AREA #endif -#define pgd_val(x) ((x).pgd) #define pgprot_val(x) ((x).pgprot) - -#define __pte(x) ((pte_t) { (x) } ) -#define __pgd(x) ((pgd_t) { (x) } ) #define __pgprot(x) ((pgprot_t) { (x) } ) +#ifndef CONFIG_PARAVIRT +#define pgd_val(x) native_pgd_val(x) +#define __pgd(x) native_make_pgd(x) +#define pte_val(x) native_pte_val(x) +#define __pte(x) native_make_pte(x) +#endif + #endif /* !__ASSEMBLY__ */ /* to align the pointer to the (next) page boundary */ diff --git a/include/asm-i386/paravirt.h b/include/asm-i386/paravirt.h index f0bdaea..0aacb13 100644 --- a/include/asm-i386/paravirt.h +++ b/include/asm-i386/paravirt.h @@ -2,7 +2,6 @@ #define __ASM_PARAVIRT_H /* Various instructions on x86 need to be replaced for * para-virtualization: those hooks are defined here. */ -#include #include #include @@ -25,6 +24,8 @@ #define CLBR_ANY 0x7 #ifndef __ASSEMBLY__ +#include + struct thread_struct; struct Xgt_desc_struct; struct tss_struct; @@ -55,11 +56,6 @@ struct paravirt_ops int (*set_wallclock)(unsigned long); void (*time_init)(void); - /* All the function pointers here are declared as "fastcall" - so that we get a specific register-based calling - convention. This makes it easier to implement inline - assembler replacements. */ - void (*cpuid)(unsigned int *eax, unsigned int *ebx, unsigned int *ecx, unsigned int *edx); @@ -139,16 +135,33 @@ struct paravirt_ops void (*release_pd)(u32 pfn); void (*set_pte)(pte_t *ptep, pte_t pteval); - void (*set_pte_at)(struct mm_struct *mm, u32 addr, pte_t *ptep, pte_t pteval); + void (*set_pte_at)(struct mm_struct *mm, unsigned long addr, pte_t *ptep, pte_t pteval); void (*set_pmd)(pmd_t *pmdp, pmd_t pmdval); - void (*pte_update)(struct mm_struct *mm, u32 addr, pte_t *ptep); - void (*pte_update_defer)(struct mm_struct *mm, u32 addr, pte_t *ptep); + void (*pte_update)(struct mm_struct *mm, unsigned long addr, pte_t *ptep); + void (*pte_update_defer)(struct mm_struct *mm, unsigned long addr, pte_t *ptep); + + pte_t (*ptep_get_and_clear)(pte_t *ptep); + #ifdef CONFIG_X86_PAE void (*set_pte_atomic)(pte_t *ptep, pte_t pteval); - void (*set_pte_present)(struct mm_struct *mm, unsigned long addr, pte_t *ptep, pte_t pte); + void (*set_pte_present)(struct mm_struct *mm, unsigned long addr, pte_t *ptep, pte_t pte); void (*set_pud)(pud_t *pudp, pud_t pudval); - void (*pte_clear)(struct mm_struct *mm, unsigned long addr, pte_t *ptep); + void (*pte_clear)(struct mm_struct *mm, unsigned long addr, pte_t *ptep); void (*pmd_clear)(pmd_t *pmdp); + + unsigned long long (*pte_val)(pte_t); + unsigned long long (*pmd_val)(pmd_t); + unsigned long long (*pgd_val)(pgd_t); + + pte_t (*make_pte)(unsigned long long pte); + pmd_t (*make_pmd)(unsigned long long pmd); + pgd_t (*make_pgd)(unsigned long long pgd); +#else + unsigned long (*pte_val)(pte_t); + unsigned long (*pgd_val)(pgd_t); + + pte_t (*make_pte)(unsigned long pte); + pgd_t (*make_pgd)(unsigned long pgd); #endif void (*set_lazy_mode)(int mode); @@ -219,6 +232,8 @@ static inline void __cpuid(unsigned int *eax, unsigned int *ebx, #define read_cr4_safe(x) paravirt_ops.read_cr4_safe() #define write_cr4(x) paravirt_ops.write_cr4(x) +#define raw_ptep_get_and_clear(xp) (paravirt_ops.ptep_get_and_clear(xp)) + static inline void raw_safe_halt(void) { paravirt_ops.safe_halt(); @@ -304,6 +319,17 @@ static inline void halt(void) (paravirt_ops.write_idt_entry((dt), (entry), (low), (high))) #define set_iopl_mask(mask) (paravirt_ops.set_iopl_mask(mask)) +#define __pte(x) paravirt_ops.make_pte(x) +#define __pgd(x) paravirt_ops.make_pgd(x) + +#define pte_val(x) paravirt_ops.pte_val(x) +#define pgd_val(x) paravirt_ops.pgd_val(x) + +#ifdef CONFIG_X86_PAE +#define __pmd(x) paravirt_ops.make_pmd(x) +#define pmd_val(x) paravirt_ops.pmd_val(x) +#endif + /* The paravirtualized I/O functions */ static inline void slow_down_io(void) { paravirt_ops.io_delay(); @@ -344,6 +370,7 @@ static inline void setup_secondary_clock(void) } #endif + #ifdef CONFIG_SMP static inline void startup_ipi_hook(int phys_apicid, unsigned long start_eip, unsigned long start_esp) @@ -371,7 +398,8 @@ static inline void set_pte(pte_t *ptep, pte_t pteval) paravirt_ops.set_pte(ptep, pteval); } -static inline void set_pte_at(struct mm_struct *mm, u32 addr, pte_t *ptep, pte_t pteval) +static inline void set_pte_at(struct mm_struct *mm, unsigned long addr, + pte_t *ptep, pte_t pteval) { paravirt_ops.set_pte_at(mm, addr, ptep, pteval); } diff --git a/include/asm-i386/pgtable-2level.h b/include/asm-i386/pgtable-2level.h index 38c3fcc..043a2bc 100644 --- a/include/asm-i386/pgtable-2level.h +++ b/include/asm-i386/pgtable-2level.h @@ -11,10 +11,23 @@ * within a page table are directly modified. Thus, the following * hook is made available. */ +static inline void native_set_pte(pte_t *ptep , pte_t pte) +{ + *ptep = pte; +} +static inline void native_set_pte_at(struct mm_struct *mm, unsigned long addr, + pte_t *ptep , pte_t pte) +{ + native_set_pte(ptep, pte); +} +static inline void native_set_pmd(pmd_t *pmdp, pmd_t pmd) +{ + *pmdp = pmd; +} #ifndef CONFIG_PARAVIRT -#define set_pte(pteptr, pteval) (*(pteptr) = pteval) -#define set_pte_at(mm,addr,ptep,pteval) set_pte(ptep,pteval) -#define set_pmd(pmdptr, pmdval) (*(pmdptr) = (pmdval)) +#define set_pte(pteptr, pteval) native_set_pte(pteptr, pteval) +#define set_pte_at(mm,addr,ptep,pteval) native_set_pte_at(mm, addr, ptep, pteval) +#define set_pmd(pmdptr, pmdval) native_set_pmd(pmdptr, pmdval) #endif #define set_pte_atomic(pteptr, pteval) set_pte(pteptr,pteval) @@ -23,11 +36,14 @@ #define pte_clear(mm,addr,xp) do { set_pte_at(mm, addr, xp, __pte(0)); } while (0) #define pmd_clear(xp) do { set_pmd(xp, __pmd(0)); } while (0) -#define raw_ptep_get_and_clear(xp) __pte(xchg(&(xp)->pte_low, 0)) +static inline pte_t native_ptep_get_and_clear(pte_t *xp) +{ + return __pte(xchg(&xp->pte_low, 0)); +} #define pte_page(x) pfn_to_page(pte_pfn(x)) #define pte_none(x) (!(x).pte_low) -#define pte_pfn(x) ((unsigned long)(((x).pte_low >> PAGE_SHIFT))) +#define pte_pfn(x) (pte_val(x) >> PAGE_SHIFT) #define pfn_pte(pfn, prot) __pte(((pfn) << PAGE_SHIFT) | pgprot_val(prot)) #define pfn_pmd(pfn, prot) __pmd(((pfn) << PAGE_SHIFT) | pgprot_val(prot)) diff --git a/include/asm-i386/pgtable-3level.h b/include/asm-i386/pgtable-3level.h index 7a2318f..be6017f 100644 --- a/include/asm-i386/pgtable-3level.h +++ b/include/asm-i386/pgtable-3level.h @@ -42,20 +42,23 @@ static inline int pte_exec_kernel(pte_t pte) return pte_x(pte); } -#ifndef CONFIG_PARAVIRT /* Rules for using set_pte: the pte being assigned *must* be * either not present or in a state where the hardware will * not attempt to update the pte. In places where this is * not possible, use pte_get_and_clear to obtain the old pte * value and then use set_pte to update it. -ben */ -static inline void set_pte(pte_t *ptep, pte_t pte) +static inline void native_set_pte(pte_t *ptep, pte_t pte) { ptep->pte_high = pte.pte_high; smp_wmb(); ptep->pte_low = pte.pte_low; } -#define set_pte_at(mm,addr,ptep,pteval) set_pte(ptep,pteval) +static inline void native_set_pte_at(struct mm_struct *mm, unsigned long addr, + pte_t *ptep , pte_t pte) +{ + native_set_pte(ptep, pte); +} /* * Since this is only called on user PTEs, and the page fault handler @@ -63,7 +66,8 @@ static inline void set_pte(pte_t *ptep, pte_t pte) * we are justified in merely clearing the PTE present bit, followed * by a set. The ordering here is important. */ -static inline void set_pte_present(struct mm_struct *mm, unsigned long addr, pte_t *ptep, pte_t pte) +static inline void native_set_pte_present(struct mm_struct *mm, unsigned long addr, + pte_t *ptep, pte_t pte) { ptep->pte_low = 0; smp_wmb(); @@ -72,32 +76,48 @@ static inline void set_pte_present(struct mm_struct *mm, unsigned long addr, pte ptep->pte_low = pte.pte_low; } -#define set_pte_atomic(pteptr,pteval) \ - set_64bit((unsigned long long *)(pteptr),pte_val(pteval)) -#define set_pmd(pmdptr,pmdval) \ - set_64bit((unsigned long long *)(pmdptr),pmd_val(pmdval)) -#define set_pud(pudptr,pudval) \ - (*(pudptr) = (pudval)) +static inline void native_set_pte_atomic(pte_t *ptep, pte_t pte) +{ + set_64bit((unsigned long long *)(ptep),native_pte_val(pte)); +} +static inline void native_set_pmd(pmd_t *pmdp, pmd_t pmd) +{ + set_64bit((unsigned long long *)(pmdp),native_pmd_val(pmd)); +} +static inline void native_set_pud(pud_t *pudp, pud_t pud) +{ + *pudp = pud; +} /* * For PTEs and PDEs, we must clear the P-bit first when clearing a page table * entry, so clear the bottom half first and enforce ordering with a compiler * barrier. */ -static inline void pte_clear(struct mm_struct *mm, unsigned long addr, pte_t *ptep) +static inline void native_pte_clear(struct mm_struct *mm, unsigned long addr, pte_t *ptep) { ptep->pte_low = 0; smp_wmb(); ptep->pte_high = 0; } -static inline void pmd_clear(pmd_t *pmd) +static inline void native_pmd_clear(pmd_t *pmd) { u32 *tmp = (u32 *)pmd; *tmp = 0; smp_wmb(); *(tmp + 1) = 0; } + +#ifndef CONFIG_PARAVIRT +#define set_pte(ptep, pte) native_set_pte(ptep, pte) +#define set_pte_at(mm, addr, ptep, pte) native_set_pte_at(mm, addr, ptep, pte) +#define set_pte_present(mm, addr, ptep, pte) native_set_pte_present(mm, addr, ptep, pte) +#define set_pte_atomic(ptep, pte) native_set_pte_atomic(ptep, pte) +#define set_pmd(pmdp, pmd) native_set_pmd(pmdp, pmd) +#define set_pud(pudp, pud) native_set_pud(pudp, pud) +#define pte_clear(mm, addr, ptep) native_pte_clear(mm, addr, ptep) +#define pmd_clear(pmd) native_pmd_clear(pmd) #endif /* @@ -119,7 +139,7 @@ static inline void pud_clear (pud_t * pud) { } #define pmd_offset(pud, address) ((pmd_t *) pud_page(*(pud)) + \ pmd_index(address)) -static inline pte_t raw_ptep_get_and_clear(pte_t *ptep) +static inline pte_t native_ptep_get_and_clear(pte_t *ptep) { pte_t res; @@ -146,28 +166,21 @@ static inline int pte_none(pte_t pte) static inline unsigned long pte_pfn(pte_t pte) { - return (pte.pte_low >> PAGE_SHIFT) | - (pte.pte_high << (32 - PAGE_SHIFT)); + return pte_val(pte) >> PAGE_SHIFT; } extern unsigned long long __supported_pte_mask; static inline pte_t pfn_pte(unsigned long page_nr, pgprot_t pgprot) { - pte_t pte; - - pte.pte_high = (page_nr >> (32 - PAGE_SHIFT)) | \ - (pgprot_val(pgprot) >> 32); - pte.pte_high &= (__supported_pte_mask >> 32); - pte.pte_low = ((page_nr << PAGE_SHIFT) | pgprot_val(pgprot)) & \ - __supported_pte_mask; - return pte; + return __pte((((unsigned long long)page_nr << PAGE_SHIFT) | + pgprot_val(pgprot)) & __supported_pte_mask); } static inline pmd_t pfn_pmd(unsigned long page_nr, pgprot_t pgprot) { - return __pmd((((unsigned long long)page_nr << PAGE_SHIFT) | \ - pgprot_val(pgprot)) & __supported_pte_mask); + return __pmd((((unsigned long long)page_nr << PAGE_SHIFT) | + pgprot_val(pgprot)) & __supported_pte_mask); } /* diff --git a/include/asm-i386/pgtable.h b/include/asm-i386/pgtable.h index 143ddc4..147f255 100644 --- a/include/asm-i386/pgtable.h +++ b/include/asm-i386/pgtable.h @@ -266,6 +266,8 @@ static inline pte_t pte_mkhuge(pte_t pte) { (pte).pte_low |= _PAGE_PSE; return p #define pte_update(mm, addr, ptep) do { } while (0) #define pte_update_defer(mm, addr, ptep) do { } while (0) #define paravirt_map_pt_hook(slot, va, pfn) do { } while (0) + +#define raw_ptep_get_and_clear(xp) native_ptep_get_and_clear(xp) #endif /* -- cgit v1.1 From b239fb2501117bf3aeb4dd6926edd855be92333d Mon Sep 17 00:00:00 2001 From: Jeremy Fitzhardinge Date: Wed, 2 May 2007 19:27:13 +0200 Subject: [PATCH] i386: PARAVIRT: Hooks to set up initial pagetable This patch introduces paravirt_ops hooks to control how the kernel's initial pagetable is set up. In the case of a native boot, the very early bootstrap code creates a simple non-PAE pagetable to map the kernel and physical memory. When the VM subsystem is initialized, it creates a proper pagetable which respects the PAE mode, large pages, etc. When booting under a hypervisor, there are many possibilities for what paging environment the hypervisor establishes for the guest kernel, so the constructon of the kernel's pagetable depends on the hypervisor. In the case of Xen, the hypervisor boots the kernel with a fully constructed pagetable, which is already using PAE if necessary. Also, Xen requires particular care when constructing pagetables to make sure all pagetables are always mapped read-only. In order to make this easier, kernel's initial pagetable construction has been changed to only allocate and initialize a pagetable page if there's no page already present in the pagetable. This allows the Xen paravirt backend to make a copy of the hypervisor-provided pagetable, allowing the kernel to establish any more mappings it needs while keeping the existing ones. A slightly subtle point which is worth highlighting here is that Xen requires all kernel mappings to share the same pte_t pages between all pagetables, so that updating a kernel page's mapping in one pagetable is reflected in all other pagetables. This makes it possible to allocate a page and attach it to a pagetable without having to explicitly enumerate that page's mapping in all pagetables. And: +From: "Eric W. Biederman" If we don't set the leaf page table entries it is quite possible that will inherit and incorrect page table entry from the initial boot page table setup in head.S. So we need to redo the effort here, so we pick up PSE, PGE and the like. Hypervisors like Xen require that their page tables be read-only, which is slightly incompatible with our low identity mappings, however I discussed this with Jeremy he has modified the Xen early set_pte function to avoid problems in this area. Signed-off-by: Eric W. Biederman Signed-off-by: Jeremy Fitzhardinge Signed-off-by: Andi Kleen Acked-by: William Irwin Cc: Ingo Molnar --- include/asm-i386/paravirt.h | 17 ++++++++++++++++- include/asm-i386/pgtable.h | 16 ++++++++++++++++ 2 files changed, 32 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/asm-i386/paravirt.h b/include/asm-i386/paravirt.h index 0aacb13..c49b44c 100644 --- a/include/asm-i386/paravirt.h +++ b/include/asm-i386/paravirt.h @@ -2,10 +2,11 @@ #define __ASM_PARAVIRT_H /* Various instructions on x86 need to be replaced for * para-virtualization: those hooks are defined here. */ + +#ifdef CONFIG_PARAVIRT #include #include -#ifdef CONFIG_PARAVIRT /* These are the most performance critical ops, so we want to be able to patch * callers */ #define PARAVIRT_IRQ_DISABLE 0 @@ -50,6 +51,9 @@ struct paravirt_ops char *(*memory_setup)(void); void (*init_IRQ)(void); + void (*pagetable_setup_start)(pgd_t *pgd_base); + void (*pagetable_setup_done)(pgd_t *pgd_base); + void (*banner)(void); unsigned long (*get_wallclock)(void); @@ -370,6 +374,17 @@ static inline void setup_secondary_clock(void) } #endif +static inline void paravirt_pagetable_setup_start(pgd_t *base) +{ + if (paravirt_ops.pagetable_setup_start) + (*paravirt_ops.pagetable_setup_start)(base); +} + +static inline void paravirt_pagetable_setup_done(pgd_t *base) +{ + if (paravirt_ops.pagetable_setup_done) + (*paravirt_ops.pagetable_setup_done)(base); +} #ifdef CONFIG_SMP static inline void startup_ipi_hook(int phys_apicid, unsigned long start_eip, diff --git a/include/asm-i386/pgtable.h b/include/asm-i386/pgtable.h index 147f255..0790ad6 100644 --- a/include/asm-i386/pgtable.h +++ b/include/asm-i386/pgtable.h @@ -514,6 +514,22 @@ do { \ * tables contain all the necessary information. */ #define update_mmu_cache(vma,address,pte) do { } while (0) + +void native_pagetable_setup_start(pgd_t *base); +void native_pagetable_setup_done(pgd_t *base); + +#ifndef CONFIG_PARAVIRT +static inline void paravirt_pagetable_setup_start(pgd_t *base) +{ + native_pagetable_setup_start(base); +} + +static inline void paravirt_pagetable_setup_done(pgd_t *base) +{ + native_pagetable_setup_done(base); +} +#endif /* !CONFIG_PARAVIRT */ + #endif /* !__ASSEMBLY__ */ #ifdef CONFIG_FLATMEM -- cgit v1.1 From 90caccb9758e88db68a69553689baee38254287b Mon Sep 17 00:00:00 2001 From: Jeremy Fitzhardinge Date: Wed, 2 May 2007 19:27:13 +0200 Subject: [PATCH] i386: PARAVIRT: Allocate a fixmap slot Allocate a fixmap slot for use by a paravirt_ops implementation. This is intended for early-boot bootstrap mappings. Once the zones and allocator have been set up, it would be better to use get_vm_area() to allocate some virtual space. Xen uses this to map the hypervisor's shared info page, which doesn't have a pseudo-physical page number, and therefore can't be mapped ordinarily. It is needed early because it contains the vcpu state, including the interrupt mask. Signed-off-by: Jeremy Fitzhardinge Signed-off-by: Andi Kleen Acked-by: Ingo Molnar --- include/asm-i386/fixmap.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include') diff --git a/include/asm-i386/fixmap.h b/include/asm-i386/fixmap.h index e5651b2..80ea052 100644 --- a/include/asm-i386/fixmap.h +++ b/include/asm-i386/fixmap.h @@ -84,6 +84,9 @@ enum fixed_addresses { #ifdef CONFIG_PCI_MMCONFIG FIX_PCIE_MCFG, #endif +#ifdef CONFIG_PARAVIRT + FIX_PARAVIRT_BOOTMAP, +#endif __end_of_permanent_fixed_addresses, /* temporary boot-time mappings, used before ioremap() is functional */ #define NR_FIX_BTMAPS 16 -- cgit v1.1 From 5311ab62cdc7788784971ed816ce85e926f3e994 Mon Sep 17 00:00:00 2001 From: Jeremy Fitzhardinge Date: Wed, 2 May 2007 19:27:13 +0200 Subject: [PATCH] i386: PARAVIRT: Allow paravirt backend to choose kernel PMD sharing Normally when running in PAE mode, the 4th PMD maps the kernel address space, which can be shared among all processes (since they all need the same kernel mappings). Xen, however, does not allow guests to have the kernel pmd shared between page tables, so parameterize pgtable.c to allow both modes of operation. There are several side-effects of this. One is that vmalloc will update the kernel address space mappings, and those updates need to be propagated into all processes if the kernel mappings are not intrinsically shared. In the non-PAE case, this is done by maintaining a pgd_list of all processes; this list is used when all process pagetables must be updated. pgd_list is threaded via otherwise unused entries in the page structure for the pgd, which means that the pgd must be page-sized for this to work. Normally the PAE pgd is only 4x64 byte entries large, but Xen requires the PAE pgd to page aligned anyway, so this patch forces the pgd to be page aligned+sized when the kernel pmd is unshared, to accomodate both these requirements. Also, since there may be several distinct kernel pmds (if the user/kernel split is below 3G), there's no point in allocating them from a slab cache; they're just allocated with get_free_page and initialized appropriately. (Of course the could be cached if there is just a single kernel pmd - which is the default with a 3G user/kernel split - but it doesn't seem worthwhile to add yet another case into this code). [ Many thanks to wli for review comments. ] Signed-off-by: Jeremy Fitzhardinge Signed-off-by: William Lee Irwin III Signed-off-by: Andi Kleen Cc: Zachary Amsden Cc: Christoph Lameter Acked-by: Ingo Molnar Signed-off-by: Andrew Morton --- include/asm-i386/paravirt.h | 1 + include/asm-i386/pgtable-2level-defs.h | 2 ++ include/asm-i386/pgtable-2level.h | 2 -- include/asm-i386/pgtable-3level-defs.h | 6 ++++++ include/asm-i386/pgtable-3level.h | 2 -- include/asm-i386/pgtable.h | 2 ++ 6 files changed, 11 insertions(+), 4 deletions(-) (limited to 'include') diff --git a/include/asm-i386/paravirt.h b/include/asm-i386/paravirt.h index c49b44c..f93599d 100644 --- a/include/asm-i386/paravirt.h +++ b/include/asm-i386/paravirt.h @@ -35,6 +35,7 @@ struct desc_struct; struct paravirt_ops { unsigned int kernel_rpl; + int shared_kernel_pmd; int paravirt_enabled; const char *name; diff --git a/include/asm-i386/pgtable-2level-defs.h b/include/asm-i386/pgtable-2level-defs.h index 0251807..0f71c9f 100644 --- a/include/asm-i386/pgtable-2level-defs.h +++ b/include/asm-i386/pgtable-2level-defs.h @@ -1,6 +1,8 @@ #ifndef _I386_PGTABLE_2LEVEL_DEFS_H #define _I386_PGTABLE_2LEVEL_DEFS_H +#define SHARED_KERNEL_PMD 0 + /* * traditional i386 two-level paging structure: */ diff --git a/include/asm-i386/pgtable-2level.h b/include/asm-i386/pgtable-2level.h index 043a2bc..781fe4b 100644 --- a/include/asm-i386/pgtable-2level.h +++ b/include/asm-i386/pgtable-2level.h @@ -82,6 +82,4 @@ static inline int pte_exec_kernel(pte_t pte) #define __pte_to_swp_entry(pte) ((swp_entry_t) { (pte).pte_low }) #define __swp_entry_to_pte(x) ((pte_t) { (x).val }) -void vmalloc_sync_all(void); - #endif /* _I386_PGTABLE_2LEVEL_H */ diff --git a/include/asm-i386/pgtable-3level-defs.h b/include/asm-i386/pgtable-3level-defs.h index eb3a1ea..c0df89f 100644 --- a/include/asm-i386/pgtable-3level-defs.h +++ b/include/asm-i386/pgtable-3level-defs.h @@ -1,6 +1,12 @@ #ifndef _I386_PGTABLE_3LEVEL_DEFS_H #define _I386_PGTABLE_3LEVEL_DEFS_H +#ifdef CONFIG_PARAVIRT +#define SHARED_KERNEL_PMD (paravirt_ops.shared_kernel_pmd) +#else +#define SHARED_KERNEL_PMD 1 +#endif + /* * PGDIR_SHIFT determines what a top-level page table entry can map */ diff --git a/include/asm-i386/pgtable-3level.h b/include/asm-i386/pgtable-3level.h index be6017f..664bfee 100644 --- a/include/asm-i386/pgtable-3level.h +++ b/include/asm-i386/pgtable-3level.h @@ -200,6 +200,4 @@ static inline pmd_t pfn_pmd(unsigned long page_nr, pgprot_t pgprot) #define __pmd_free_tlb(tlb, x) do { } while (0) -#define vmalloc_sync_all() ((void)0) - #endif /* _I386_PGTABLE_3LEVEL_H */ diff --git a/include/asm-i386/pgtable.h b/include/asm-i386/pgtable.h index 0790ad6..5b88a6a 100644 --- a/include/asm-i386/pgtable.h +++ b/include/asm-i386/pgtable.h @@ -243,6 +243,8 @@ static inline pte_t pte_mkyoung(pte_t pte) { (pte).pte_low |= _PAGE_ACCESSED; re static inline pte_t pte_mkwrite(pte_t pte) { (pte).pte_low |= _PAGE_RW; return pte; } static inline pte_t pte_mkhuge(pte_t pte) { (pte).pte_low |= _PAGE_PSE; return pte; } +extern void vmalloc_sync_all(void); + #ifdef CONFIG_X86_PAE # include #else -- cgit v1.1 From d6dd61c831226f9cd7750885da04d360d6455101 Mon Sep 17 00:00:00 2001 From: Jeremy Fitzhardinge Date: Wed, 2 May 2007 19:27:14 +0200 Subject: [PATCH] x86: PARAVIRT: add hooks to intercept mm creation and destruction Add hooks to allow a paravirt implementation to track the lifetime of an mm. Paravirtualization requires three hooks, but only two are needed in common code. They are: arch_dup_mmap, which is called when a new mmap is created at fork arch_exit_mmap, which is called when the last process reference to an mm is dropped, which typically happens on exit and exec. The third hook is activate_mm, which is called from the arch-specific activate_mm() macro/function, and so doesn't need stub versions for other architectures. It's called when an mm is first used. Signed-off-by: Jeremy Fitzhardinge Signed-off-by: Andi Kleen Cc: linux-arch@vger.kernel.org Cc: James Bottomley Acked-by: Ingo Molnar --- include/asm-alpha/mmu_context.h | 1 + include/asm-arm/mmu_context.h | 1 + include/asm-arm26/mmu_context.h | 2 ++ include/asm-avr32/mmu_context.h | 1 + include/asm-cris/mmu_context.h | 2 ++ include/asm-frv/mmu_context.h | 1 + include/asm-generic/mm_hooks.h | 18 ++++++++++++++++++ include/asm-h8300/mmu_context.h | 1 + include/asm-i386/mmu_context.h | 17 +++++++++++++++-- include/asm-i386/paravirt.h | 23 +++++++++++++++++++++++ include/asm-ia64/mmu_context.h | 1 + include/asm-m32r/mmu_context.h | 1 + include/asm-m68k/mmu_context.h | 1 + include/asm-m68knommu/mmu_context.h | 1 + include/asm-mips/mmu_context.h | 1 + include/asm-parisc/mmu_context.h | 1 + include/asm-powerpc/mmu_context.h | 1 + include/asm-ppc/mmu_context.h | 1 + include/asm-s390/mmu_context.h | 2 ++ include/asm-sh/mmu_context.h | 1 + include/asm-sh64/mmu_context.h | 2 +- include/asm-sparc/mmu_context.h | 2 ++ include/asm-sparc64/mmu_context.h | 1 + include/asm-um/mmu_context.h | 2 ++ include/asm-v850/mmu_context.h | 2 ++ include/asm-x86_64/mmu_context.h | 1 + include/asm-xtensa/mmu_context.h | 1 + 27 files changed, 86 insertions(+), 3 deletions(-) create mode 100644 include/asm-generic/mm_hooks.h (limited to 'include') diff --git a/include/asm-alpha/mmu_context.h b/include/asm-alpha/mmu_context.h index fe249e9..0bd7bd2 100644 --- a/include/asm-alpha/mmu_context.h +++ b/include/asm-alpha/mmu_context.h @@ -10,6 +10,7 @@ #include #include #include +#include /* * Force a context reload. This is needed when we change the page diff --git a/include/asm-arm/mmu_context.h b/include/asm-arm/mmu_context.h index d1a65b1..f8755c8 100644 --- a/include/asm-arm/mmu_context.h +++ b/include/asm-arm/mmu_context.h @@ -16,6 +16,7 @@ #include #include #include +#include void __check_kvm_seq(struct mm_struct *mm); diff --git a/include/asm-arm26/mmu_context.h b/include/asm-arm26/mmu_context.h index 1a929bf..16c821f 100644 --- a/include/asm-arm26/mmu_context.h +++ b/include/asm-arm26/mmu_context.h @@ -13,6 +13,8 @@ #ifndef __ASM_ARM_MMU_CONTEXT_H #define __ASM_ARM_MMU_CONTEXT_H +#include + #define init_new_context(tsk,mm) 0 #define destroy_context(mm) do { } while(0) diff --git a/include/asm-avr32/mmu_context.h b/include/asm-avr32/mmu_context.h index 31add1a..c37c391 100644 --- a/include/asm-avr32/mmu_context.h +++ b/include/asm-avr32/mmu_context.h @@ -15,6 +15,7 @@ #include #include #include +#include /* * The MMU "context" consists of two things: diff --git a/include/asm-cris/mmu_context.h b/include/asm-cris/mmu_context.h index e6e659d..72ba08d 100644 --- a/include/asm-cris/mmu_context.h +++ b/include/asm-cris/mmu_context.h @@ -1,6 +1,8 @@ #ifndef __CRIS_MMU_CONTEXT_H #define __CRIS_MMU_CONTEXT_H +#include + extern int init_new_context(struct task_struct *tsk, struct mm_struct *mm); extern void get_mmu_context(struct mm_struct *mm); extern void destroy_context(struct mm_struct *mm); diff --git a/include/asm-frv/mmu_context.h b/include/asm-frv/mmu_context.h index 72edcaa..c7daa39 100644 --- a/include/asm-frv/mmu_context.h +++ b/include/asm-frv/mmu_context.h @@ -15,6 +15,7 @@ #include #include #include +#include static inline void enter_lazy_tlb(struct mm_struct *mm, struct task_struct *tsk) { diff --git a/include/asm-generic/mm_hooks.h b/include/asm-generic/mm_hooks.h new file mode 100644 index 0000000..67dea81 --- /dev/null +++ b/include/asm-generic/mm_hooks.h @@ -0,0 +1,18 @@ +/* + * Define generic no-op hooks for arch_dup_mmap and arch_exit_mmap, to + * be included in asm-FOO/mmu_context.h for any arch FOO which doesn't + * need to hook these. + */ +#ifndef _ASM_GENERIC_MM_HOOKS_H +#define _ASM_GENERIC_MM_HOOKS_H + +static inline void arch_dup_mmap(struct mm_struct *oldmm, + struct mm_struct *mm) +{ +} + +static inline void arch_exit_mmap(struct mm_struct *mm) +{ +} + +#endif /* _ASM_GENERIC_MM_HOOKS_H */ diff --git a/include/asm-h8300/mmu_context.h b/include/asm-h8300/mmu_context.h index 5c165f7..f44b730 100644 --- a/include/asm-h8300/mmu_context.h +++ b/include/asm-h8300/mmu_context.h @@ -4,6 +4,7 @@ #include #include #include +#include static inline void enter_lazy_tlb(struct mm_struct *mm, struct task_struct *tsk) { diff --git a/include/asm-i386/mmu_context.h b/include/asm-i386/mmu_context.h index e6aa30f..8198d1c 100644 --- a/include/asm-i386/mmu_context.h +++ b/include/asm-i386/mmu_context.h @@ -5,6 +5,16 @@ #include #include #include +#include +#ifndef CONFIG_PARAVIRT +#include + +static inline void paravirt_activate_mm(struct mm_struct *prev, + struct mm_struct *next) +{ +} +#endif /* !CONFIG_PARAVIRT */ + /* * Used for LDT copy/destruction. @@ -65,7 +75,10 @@ static inline void switch_mm(struct mm_struct *prev, #define deactivate_mm(tsk, mm) \ asm("movl %0,%%gs": :"r" (0)); -#define activate_mm(prev, next) \ - switch_mm((prev),(next),NULL) +#define activate_mm(prev, next) \ + do { \ + paravirt_activate_mm(prev, next); \ + switch_mm((prev),(next),NULL); \ + } while(0); #endif diff --git a/include/asm-i386/paravirt.h b/include/asm-i386/paravirt.h index f93599d..61c03f1 100644 --- a/include/asm-i386/paravirt.h +++ b/include/asm-i386/paravirt.h @@ -119,6 +119,12 @@ struct paravirt_ops void (*io_delay)(void); + void (*activate_mm)(struct mm_struct *prev, + struct mm_struct *next); + void (*dup_mmap)(struct mm_struct *oldmm, + struct mm_struct *mm); + void (*exit_mmap)(struct mm_struct *mm); + #ifdef CONFIG_X86_LOCAL_APIC void (*apic_write)(unsigned long reg, unsigned long v); void (*apic_write_atomic)(unsigned long reg, unsigned long v); @@ -395,6 +401,23 @@ static inline void startup_ipi_hook(int phys_apicid, unsigned long start_eip, } #endif +static inline void paravirt_activate_mm(struct mm_struct *prev, + struct mm_struct *next) +{ + paravirt_ops.activate_mm(prev, next); +} + +static inline void arch_dup_mmap(struct mm_struct *oldmm, + struct mm_struct *mm) +{ + paravirt_ops.dup_mmap(oldmm, mm); +} + +static inline void arch_exit_mmap(struct mm_struct *mm) +{ + paravirt_ops.exit_mmap(mm); +} + #define __flush_tlb() paravirt_ops.flush_tlb_user() #define __flush_tlb_global() paravirt_ops.flush_tlb_kernel() #define __flush_tlb_single(addr) paravirt_ops.flush_tlb_single(addr) diff --git a/include/asm-ia64/mmu_context.h b/include/asm-ia64/mmu_context.h index b5c6508..cef2400 100644 --- a/include/asm-ia64/mmu_context.h +++ b/include/asm-ia64/mmu_context.h @@ -29,6 +29,7 @@ #include #include +#include struct ia64_ctx { spinlock_t lock; diff --git a/include/asm-m32r/mmu_context.h b/include/asm-m32r/mmu_context.h index 1f40d4a..91909e5 100644 --- a/include/asm-m32r/mmu_context.h +++ b/include/asm-m32r/mmu_context.h @@ -15,6 +15,7 @@ #include #include #include +#include /* * Cache of MMU context last used. diff --git a/include/asm-m68k/mmu_context.h b/include/asm-m68k/mmu_context.h index 231d11b..894dacb 100644 --- a/include/asm-m68k/mmu_context.h +++ b/include/asm-m68k/mmu_context.h @@ -1,6 +1,7 @@ #ifndef __M68K_MMU_CONTEXT_H #define __M68K_MMU_CONTEXT_H +#include static inline void enter_lazy_tlb(struct mm_struct *mm, struct task_struct *tsk) { diff --git a/include/asm-m68knommu/mmu_context.h b/include/asm-m68knommu/mmu_context.h index 6c077d3..9ccee42 100644 --- a/include/asm-m68knommu/mmu_context.h +++ b/include/asm-m68knommu/mmu_context.h @@ -4,6 +4,7 @@ #include #include #include +#include static inline void enter_lazy_tlb(struct mm_struct *mm, struct task_struct *tsk) { diff --git a/include/asm-mips/mmu_context.h b/include/asm-mips/mmu_context.h index fe065d6..65024ff 100644 --- a/include/asm-mips/mmu_context.h +++ b/include/asm-mips/mmu_context.h @@ -20,6 +20,7 @@ #include #include #endif /* SMTC */ +#include /* * For the fast tlb miss handlers, we keep a per cpu array of pointers diff --git a/include/asm-parisc/mmu_context.h b/include/asm-parisc/mmu_context.h index 9c05836..bad6902 100644 --- a/include/asm-parisc/mmu_context.h +++ b/include/asm-parisc/mmu_context.h @@ -5,6 +5,7 @@ #include #include #include +#include static inline void enter_lazy_tlb(struct mm_struct *mm, struct task_struct *tsk) { diff --git a/include/asm-powerpc/mmu_context.h b/include/asm-powerpc/mmu_context.h index 083ac91..c0d7795 100644 --- a/include/asm-powerpc/mmu_context.h +++ b/include/asm-powerpc/mmu_context.h @@ -10,6 +10,7 @@ #include #include #include +#include /* * Copyright (C) 2001 PPC 64 Team, IBM Corp diff --git a/include/asm-ppc/mmu_context.h b/include/asm-ppc/mmu_context.h index 2bc8589..a6441a0 100644 --- a/include/asm-ppc/mmu_context.h +++ b/include/asm-ppc/mmu_context.h @@ -6,6 +6,7 @@ #include #include #include +#include /* * On 32-bit PowerPC 6xx/7xx/7xxx CPUs, we use a set of 16 VSIDs diff --git a/include/asm-s390/mmu_context.h b/include/asm-s390/mmu_context.h index 1d21da2..501cb9b 100644 --- a/include/asm-s390/mmu_context.h +++ b/include/asm-s390/mmu_context.h @@ -10,6 +10,8 @@ #define __S390_MMU_CONTEXT_H #include +#include + /* * get a new mmu context.. S390 don't know about contexts. */ diff --git a/include/asm-sh/mmu_context.h b/include/asm-sh/mmu_context.h index 3420244..01acaaa 100644 --- a/include/asm-sh/mmu_context.h +++ b/include/asm-sh/mmu_context.h @@ -12,6 +12,7 @@ #include #include #include +#include /* * The MMU "context" consists of two things: diff --git a/include/asm-sh64/mmu_context.h b/include/asm-sh64/mmu_context.h index 8c860da..507bf72 100644 --- a/include/asm-sh64/mmu_context.h +++ b/include/asm-sh64/mmu_context.h @@ -27,7 +27,7 @@ extern unsigned long mmu_context_cache; #include - +#include /* Current mm's pgd */ extern pgd_t *mmu_pdtp_cache; diff --git a/include/asm-sparc/mmu_context.h b/include/asm-sparc/mmu_context.h index ed1e01d..671a997 100644 --- a/include/asm-sparc/mmu_context.h +++ b/include/asm-sparc/mmu_context.h @@ -5,6 +5,8 @@ #ifndef __ASSEMBLY__ +#include + static inline void enter_lazy_tlb(struct mm_struct *mm, struct task_struct *tsk) { } diff --git a/include/asm-sparc64/mmu_context.h b/include/asm-sparc64/mmu_context.h index 2337eb4..8d12903 100644 --- a/include/asm-sparc64/mmu_context.h +++ b/include/asm-sparc64/mmu_context.h @@ -9,6 +9,7 @@ #include #include #include +#include static inline void enter_lazy_tlb(struct mm_struct *mm, struct task_struct *tsk) { diff --git a/include/asm-um/mmu_context.h b/include/asm-um/mmu_context.h index f709c78..9aa4b44 100644 --- a/include/asm-um/mmu_context.h +++ b/include/asm-um/mmu_context.h @@ -6,6 +6,8 @@ #ifndef __UM_MMU_CONTEXT_H #define __UM_MMU_CONTEXT_H +#include + #include "linux/sched.h" #include "choose-mode.h" #include "um_mmu.h" diff --git a/include/asm-v850/mmu_context.h b/include/asm-v850/mmu_context.h index f521c80..01daacd 100644 --- a/include/asm-v850/mmu_context.h +++ b/include/asm-v850/mmu_context.h @@ -1,6 +1,8 @@ #ifndef __V850_MMU_CONTEXT_H__ #define __V850_MMU_CONTEXT_H__ +#include + #define destroy_context(mm) ((void)0) #define init_new_context(tsk,mm) 0 #define switch_mm(prev,next,tsk) ((void)0) diff --git a/include/asm-x86_64/mmu_context.h b/include/asm-x86_64/mmu_context.h index af03b9f..0cce83a 100644 --- a/include/asm-x86_64/mmu_context.h +++ b/include/asm-x86_64/mmu_context.h @@ -7,6 +7,7 @@ #include #include #include +#include /* * possibly do the LDT unload here? diff --git a/include/asm-xtensa/mmu_context.h b/include/asm-xtensa/mmu_context.h index f14851f..92f9483 100644 --- a/include/asm-xtensa/mmu_context.h +++ b/include/asm-xtensa/mmu_context.h @@ -18,6 +18,7 @@ #include #include #include +#include #define XCHAL_MMU_ASID_BITS 8 -- cgit v1.1 From 98de032b681d8a7532d44dfc66aa5c0c1c755a9d Mon Sep 17 00:00:00 2001 From: Jeremy Fitzhardinge Date: Wed, 2 May 2007 19:27:14 +0200 Subject: [PATCH] i386: PARAVIRT: rename struct paravirt_patch to paravirt_patch_site for clarity Rename struct paravirt_patch to paravirt_patch_site, so that it clearly refers to a callsite, and not the patch which may be applied to that callsite. Signed-off-by: Jeremy Fitzhardinge Signed-off-by: Andi Kleen Cc: Rusty Russell Cc: Zachary Amsden --- include/asm-i386/alternative.h | 8 +++++--- include/asm-i386/paravirt.h | 5 ++++- 2 files changed, 9 insertions(+), 4 deletions(-) (limited to 'include') diff --git a/include/asm-i386/alternative.h b/include/asm-i386/alternative.h index 4d518ee..5b59d07 100644 --- a/include/asm-i386/alternative.h +++ b/include/asm-i386/alternative.h @@ -115,12 +115,14 @@ static inline void alternatives_smp_switch(int smp) {} #define LOCK_PREFIX "" #endif -struct paravirt_patch; +struct paravirt_patch_site; #ifdef CONFIG_PARAVIRT -void apply_paravirt(struct paravirt_patch *start, struct paravirt_patch *end); +void apply_paravirt(struct paravirt_patch_site *start, + struct paravirt_patch_site *end); #else static inline void -apply_paravirt(struct paravirt_patch *start, struct paravirt_patch *end) +apply_paravirt(struct paravirt_patch_site *start, + struct paravirt_patch_site *end) {} #define __start_parainstructions NULL #define __stop_parainstructions NULL diff --git a/include/asm-i386/paravirt.h b/include/asm-i386/paravirt.h index 61c03f1..b4cc2fc 100644 --- a/include/asm-i386/paravirt.h +++ b/include/asm-i386/paravirt.h @@ -505,13 +505,16 @@ void _paravirt_nop(void); #define paravirt_nop ((void *)_paravirt_nop) /* These all sit in the .parainstructions section to tell us what to patch. */ -struct paravirt_patch { +struct paravirt_patch_site { u8 *instr; /* original instructions */ u8 instrtype; /* type of this instruction */ u8 len; /* length of original instruction */ u16 clobbers; /* what registers you may clobber */ }; +extern struct paravirt_patch_site __parainstructions[], + __parainstructions_end[]; + #define paravirt_alt(insn_string, typenum, clobber) \ "771:\n\t" insn_string "\n" "772:\n" \ ".pushsection .parainstructions,\"a\"\n" \ -- cgit v1.1 From d582203578a1f3d408e27bb9042e8635954cd320 Mon Sep 17 00:00:00 2001 From: Jeremy Fitzhardinge Date: Wed, 2 May 2007 19:27:14 +0200 Subject: [PATCH] i386: PARAVIRT: Use patch site IDs computed from offset in paravirt_ops structure Use patch type identifiers derived from the offset of the operation in the paravirt_ops structure. This avoids having to maintain a separate enum for patch site types. Also, since the identifier is derived from the offset into paravirt_ops, the offset can be derived from the identifier. This is used to remove replicated information in the various callsite macros, which has been a source of bugs in the past. This patch also drops the fused save_fl+cli operation, which doesn't really add much and makes things more complex - specifically because it breaks the 1:1 relationship between identifiers and offsets. If this operation turns out to be particularly beneficial, then the right answer is to define a new entrypoint for it. Signed-off-by: Jeremy Fitzhardinge Signed-off-by: Andi Kleen Cc: Rusty Russell Cc: Zachary Amsden --- include/asm-i386/paravirt.h | 177 +++++++++++++++++++++++--------------------- 1 file changed, 92 insertions(+), 85 deletions(-) (limited to 'include') diff --git a/include/asm-i386/paravirt.h b/include/asm-i386/paravirt.h index b4cc2fc..1dbc01f 100644 --- a/include/asm-i386/paravirt.h +++ b/include/asm-i386/paravirt.h @@ -4,19 +4,8 @@ * para-virtualization: those hooks are defined here. */ #ifdef CONFIG_PARAVIRT -#include #include -/* These are the most performance critical ops, so we want to be able to patch - * callers */ -#define PARAVIRT_IRQ_DISABLE 0 -#define PARAVIRT_IRQ_ENABLE 1 -#define PARAVIRT_RESTORE_FLAGS 2 -#define PARAVIRT_SAVE_FLAGS 3 -#define PARAVIRT_SAVE_FLAGS_IRQ_DISABLE 4 -#define PARAVIRT_INTERRUPT_RETURN 5 -#define PARAVIRT_STI_SYSEXIT 6 - /* Bitmask of what can be clobbered: usually at least eax. */ #define CLBR_NONE 0x0 #define CLBR_EAX 0x1 @@ -191,6 +180,28 @@ struct paravirt_ops extern struct paravirt_ops paravirt_ops; +#define PARAVIRT_PATCH(x) \ + (offsetof(struct paravirt_ops, x) / sizeof(void *)) + +#define paravirt_type(type) \ + [paravirt_typenum] "i" (PARAVIRT_PATCH(type)) +#define paravirt_clobber(clobber) \ + [paravirt_clobber] "i" (clobber) + +#define PARAVIRT_CALL "call *paravirt_ops+%c[paravirt_typenum]*4;" + +#define _paravirt_alt(insn_string, type, clobber) \ + "771:\n\t" insn_string "\n" "772:\n" \ + ".pushsection .parainstructions,\"a\"\n" \ + " .long 771b\n" \ + " .byte " type "\n" \ + " .byte 772b-771b\n" \ + " .short " clobber "\n" \ + ".popsection\n" + +#define paravirt_alt(insn_string) \ + _paravirt_alt(insn_string, "%c[paravirt_typenum]", "%c[paravirt_clobber]") + #define paravirt_enabled() (paravirt_ops.paravirt_enabled) static inline void load_esp0(struct tss_struct *tss, @@ -515,93 +526,89 @@ struct paravirt_patch_site { extern struct paravirt_patch_site __parainstructions[], __parainstructions_end[]; -#define paravirt_alt(insn_string, typenum, clobber) \ - "771:\n\t" insn_string "\n" "772:\n" \ - ".pushsection .parainstructions,\"a\"\n" \ - " .long 771b\n" \ - " .byte " __stringify(typenum) "\n" \ - " .byte 772b-771b\n" \ - " .short " __stringify(clobber) "\n" \ - ".popsection" - static inline unsigned long __raw_local_save_flags(void) { unsigned long f; - __asm__ __volatile__(paravirt_alt( "pushl %%ecx; pushl %%edx;" - "call *%1;" - "popl %%edx; popl %%ecx", - PARAVIRT_SAVE_FLAGS, CLBR_NONE) - : "=a"(f): "m"(paravirt_ops.save_fl) - : "memory", "cc"); + asm volatile(paravirt_alt("pushl %%ecx; pushl %%edx;" + PARAVIRT_CALL + "popl %%edx; popl %%ecx") + : "=a"(f) + : paravirt_type(save_fl), + paravirt_clobber(CLBR_NONE) + : "memory", "cc"); return f; } static inline void raw_local_irq_restore(unsigned long f) { - __asm__ __volatile__(paravirt_alt( "pushl %%ecx; pushl %%edx;" - "call *%1;" - "popl %%edx; popl %%ecx", - PARAVIRT_RESTORE_FLAGS, CLBR_EAX) - : "=a"(f) : "m" (paravirt_ops.restore_fl), "0"(f) - : "memory", "cc"); + asm volatile(paravirt_alt("pushl %%ecx; pushl %%edx;" + PARAVIRT_CALL + "popl %%edx; popl %%ecx") + : "=a"(f) + : "0"(f), + paravirt_type(restore_fl), + paravirt_clobber(CLBR_EAX) + : "memory", "cc"); } static inline void raw_local_irq_disable(void) { - __asm__ __volatile__(paravirt_alt( "pushl %%ecx; pushl %%edx;" - "call *%0;" - "popl %%edx; popl %%ecx", - PARAVIRT_IRQ_DISABLE, CLBR_EAX) - : : "m" (paravirt_ops.irq_disable) - : "memory", "eax", "cc"); + asm volatile(paravirt_alt("pushl %%ecx; pushl %%edx;" + PARAVIRT_CALL + "popl %%edx; popl %%ecx") + : + : paravirt_type(irq_disable), + paravirt_clobber(CLBR_EAX) + : "memory", "eax", "cc"); } static inline void raw_local_irq_enable(void) { - __asm__ __volatile__(paravirt_alt( "pushl %%ecx; pushl %%edx;" - "call *%0;" - "popl %%edx; popl %%ecx", - PARAVIRT_IRQ_ENABLE, CLBR_EAX) - : : "m" (paravirt_ops.irq_enable) - : "memory", "eax", "cc"); + asm volatile(paravirt_alt("pushl %%ecx; pushl %%edx;" + PARAVIRT_CALL + "popl %%edx; popl %%ecx") + : + : paravirt_type(irq_enable), + paravirt_clobber(CLBR_EAX) + : "memory", "eax", "cc"); } static inline unsigned long __raw_local_irq_save(void) { unsigned long f; - __asm__ __volatile__(paravirt_alt( "pushl %%ecx; pushl %%edx;" - "call *%1; pushl %%eax;" - "call *%2; popl %%eax;" - "popl %%edx; popl %%ecx", - PARAVIRT_SAVE_FLAGS_IRQ_DISABLE, - CLBR_NONE) - : "=a"(f) - : "m" (paravirt_ops.save_fl), - "m" (paravirt_ops.irq_disable) - : "memory", "cc"); + f = __raw_local_save_flags(); + raw_local_irq_disable(); return f; } -#define CLI_STRING paravirt_alt("pushl %%ecx; pushl %%edx;" \ - "call *paravirt_ops+%c[irq_disable];" \ - "popl %%edx; popl %%ecx", \ - PARAVIRT_IRQ_DISABLE, CLBR_EAX) +#define CLI_STRING \ + _paravirt_alt("pushl %%ecx; pushl %%edx;" \ + "call *paravirt_ops+%c[paravirt_cli_type]*4;" \ + "popl %%edx; popl %%ecx", \ + "%c[paravirt_cli_type]", "%c[paravirt_clobber]") + +#define STI_STRING \ + _paravirt_alt("pushl %%ecx; pushl %%edx;" \ + "call *paravirt_ops+%c[paravirt_sti_type]*4;" \ + "popl %%edx; popl %%ecx", \ + "%c[paravirt_sti_type]", "%c[paravirt_clobber]") -#define STI_STRING paravirt_alt("pushl %%ecx; pushl %%edx;" \ - "call *paravirt_ops+%c[irq_enable];" \ - "popl %%edx; popl %%ecx", \ - PARAVIRT_IRQ_ENABLE, CLBR_EAX) #define CLI_STI_CLOBBERS , "%eax" -#define CLI_STI_INPUT_ARGS \ +#define CLI_STI_INPUT_ARGS \ , \ - [irq_disable] "i" (offsetof(struct paravirt_ops, irq_disable)), \ - [irq_enable] "i" (offsetof(struct paravirt_ops, irq_enable)) + [paravirt_cli_type] "i" (PARAVIRT_PATCH(irq_disable)), \ + [paravirt_sti_type] "i" (PARAVIRT_PATCH(irq_enable)), \ + paravirt_clobber(CLBR_EAX) + +#undef PARAVIRT_CALL #else /* __ASSEMBLY__ */ -#define PARA_PATCH(ptype, clobbers, ops) \ +#define PARA_PATCH(off) ((off) / 4) + +#define PARA_SITE(ptype, clobbers, ops) \ 771:; \ ops; \ 772:; \ @@ -612,25 +619,25 @@ static inline unsigned long __raw_local_irq_save(void) .short clobbers; \ .popsection -#define INTERRUPT_RETURN \ - PARA_PATCH(PARAVIRT_INTERRUPT_RETURN, CLBR_ANY, \ - jmp *%cs:paravirt_ops+PARAVIRT_iret) - -#define DISABLE_INTERRUPTS(clobbers) \ - PARA_PATCH(PARAVIRT_IRQ_DISABLE, clobbers, \ - pushl %ecx; pushl %edx; \ - call *paravirt_ops+PARAVIRT_irq_disable; \ - popl %edx; popl %ecx) \ - -#define ENABLE_INTERRUPTS(clobbers) \ - PARA_PATCH(PARAVIRT_IRQ_ENABLE, clobbers, \ - pushl %ecx; pushl %edx; \ - call *%cs:paravirt_ops+PARAVIRT_irq_enable; \ - popl %edx; popl %ecx) - -#define ENABLE_INTERRUPTS_SYSEXIT \ - PARA_PATCH(PARAVIRT_STI_SYSEXIT, CLBR_ANY, \ - jmp *%cs:paravirt_ops+PARAVIRT_irq_enable_sysexit) +#define INTERRUPT_RETURN \ + PARA_SITE(PARA_PATCH(PARAVIRT_iret), CLBR_ANY, \ + jmp *%cs:paravirt_ops+PARAVIRT_iret) + +#define DISABLE_INTERRUPTS(clobbers) \ + PARA_SITE(PARA_PATCH(PARAVIRT_irq_disable), clobbers, \ + pushl %ecx; pushl %edx; \ + call *%cs:paravirt_ops+PARAVIRT_irq_disable; \ + popl %edx; popl %ecx) \ + +#define ENABLE_INTERRUPTS(clobbers) \ + PARA_SITE(PARA_PATCH(PARAVIRT_irq_enable), clobbers, \ + pushl %ecx; pushl %edx; \ + call *%cs:paravirt_ops+PARAVIRT_irq_enable; \ + popl %edx; popl %ecx) + +#define ENABLE_INTERRUPTS_SYSEXIT \ + PARA_SITE(PARA_PATCH(PARAVIRT_irq_enable_sysexit), CLBR_ANY, \ + jmp *%cs:paravirt_ops+PARAVIRT_irq_enable_sysexit) #define GET_CR0_INTO_EAX \ call *paravirt_ops+PARAVIRT_read_cr0 -- cgit v1.1 From 42c24fa22e86365055fc931d833f26165e687c19 Mon Sep 17 00:00:00 2001 From: Jeremy Fitzhardinge Date: Wed, 2 May 2007 19:27:14 +0200 Subject: [PATCH] i386: PARAVIRT: Fix patch site clobbers to include return register Fix a few clobbers to include the return register. The clobbers set is the set of all registers modified (or may be modified) by the code snippet, regardless of whether it was deliberate or accidental. Also, make sure that callsites which are used in contexts which don't allow clobbers actually save and restore all clobberable registers. Signed-off-by: Jeremy Fitzhardinge Signed-off-by: Andi Kleen Cc: Rusty Russell Cc: Zachary Amsden --- include/asm-i386/paravirt.h | 18 ++++++++++-------- 1 file changed, 10 insertions(+), 8 deletions(-) (limited to 'include') diff --git a/include/asm-i386/paravirt.h b/include/asm-i386/paravirt.h index 1dbc01f..87fd431 100644 --- a/include/asm-i386/paravirt.h +++ b/include/asm-i386/paravirt.h @@ -535,7 +535,7 @@ static inline unsigned long __raw_local_save_flags(void) "popl %%edx; popl %%ecx") : "=a"(f) : paravirt_type(save_fl), - paravirt_clobber(CLBR_NONE) + paravirt_clobber(CLBR_EAX) : "memory", "cc"); return f; } @@ -620,27 +620,29 @@ static inline unsigned long __raw_local_irq_save(void) .popsection #define INTERRUPT_RETURN \ - PARA_SITE(PARA_PATCH(PARAVIRT_iret), CLBR_ANY, \ + PARA_SITE(PARA_PATCH(PARAVIRT_iret), CLBR_NONE, \ jmp *%cs:paravirt_ops+PARAVIRT_iret) #define DISABLE_INTERRUPTS(clobbers) \ PARA_SITE(PARA_PATCH(PARAVIRT_irq_disable), clobbers, \ - pushl %ecx; pushl %edx; \ + pushl %eax; pushl %ecx; pushl %edx; \ call *%cs:paravirt_ops+PARAVIRT_irq_disable; \ - popl %edx; popl %ecx) \ + popl %edx; popl %ecx; popl %eax) \ #define ENABLE_INTERRUPTS(clobbers) \ PARA_SITE(PARA_PATCH(PARAVIRT_irq_enable), clobbers, \ - pushl %ecx; pushl %edx; \ + pushl %eax; pushl %ecx; pushl %edx; \ call *%cs:paravirt_ops+PARAVIRT_irq_enable; \ - popl %edx; popl %ecx) + popl %edx; popl %ecx; popl %eax) #define ENABLE_INTERRUPTS_SYSEXIT \ - PARA_SITE(PARA_PATCH(PARAVIRT_irq_enable_sysexit), CLBR_ANY, \ + PARA_SITE(PARA_PATCH(PARAVIRT_irq_enable_sysexit), CLBR_NONE, \ jmp *%cs:paravirt_ops+PARAVIRT_irq_enable_sysexit) #define GET_CR0_INTO_EAX \ - call *paravirt_ops+PARAVIRT_read_cr0 + push %ecx; push %edx; \ + call *paravirt_ops+PARAVIRT_read_cr0; \ + pop %edx; pop %ecx #endif /* __ASSEMBLY__ */ #endif /* CONFIG_PARAVIRT */ -- cgit v1.1 From f8822f42019eceed19cc6c0f985a489e17796ed8 Mon Sep 17 00:00:00 2001 From: Jeremy Fitzhardinge Date: Wed, 2 May 2007 19:27:14 +0200 Subject: [PATCH] i386: PARAVIRT: Consistently wrap paravirt ops callsites to make them patchable Wrap a set of interesting paravirt_ops calls in a wrapper which makes the callsites available for patching. Unfortunately this is pretty ugly because there's no way to get gcc to generate a function call, but also wrap just the callsite itself with the necessary labels. This patch supports functions with 0-4 arguments, and either void or returning a value. 64-bit arguments must be split into a pair of 32-bit arguments (lower word first). Small structures are returned in registers. Signed-off-by: Jeremy Fitzhardinge Signed-off-by: Andi Kleen Cc: Rusty Russell Cc: Zachary Amsden Cc: Anthony Liguori --- include/asm-i386/paravirt.h | 686 ++++++++++++++++++++++++++++++++++++-------- 1 file changed, 560 insertions(+), 126 deletions(-) (limited to 'include') diff --git a/include/asm-i386/paravirt.h b/include/asm-i386/paravirt.h index 87fd431..837457b 100644 --- a/include/asm-i386/paravirt.h +++ b/include/asm-i386/paravirt.h @@ -124,7 +124,7 @@ struct paravirt_ops void (*flush_tlb_user)(void); void (*flush_tlb_kernel)(void); - void (*flush_tlb_single)(u32 addr); + void (*flush_tlb_single)(unsigned long addr); void (*map_pt_hook)(int type, pte_t *va, u32 pfn); @@ -188,7 +188,7 @@ extern struct paravirt_ops paravirt_ops; #define paravirt_clobber(clobber) \ [paravirt_clobber] "i" (clobber) -#define PARAVIRT_CALL "call *paravirt_ops+%c[paravirt_typenum]*4;" +#define PARAVIRT_CALL "call *(paravirt_ops+%c[paravirt_typenum]*4);" #define _paravirt_alt(insn_string, type, clobber) \ "771:\n\t" insn_string "\n" "772:\n" \ @@ -199,26 +199,234 @@ extern struct paravirt_ops paravirt_ops; " .short " clobber "\n" \ ".popsection\n" -#define paravirt_alt(insn_string) \ +#define paravirt_alt(insn_string) \ _paravirt_alt(insn_string, "%c[paravirt_typenum]", "%c[paravirt_clobber]") -#define paravirt_enabled() (paravirt_ops.paravirt_enabled) +#define PVOP_CALL0(__rettype, __op) \ + ({ \ + __rettype __ret; \ + if (sizeof(__rettype) > sizeof(unsigned long)) { \ + unsigned long long __tmp; \ + unsigned long __ecx; \ + asm volatile(paravirt_alt(PARAVIRT_CALL) \ + : "=A" (__tmp), "=c" (__ecx) \ + : paravirt_type(__op), \ + paravirt_clobber(CLBR_ANY) \ + : "memory", "cc"); \ + __ret = (__rettype)__tmp; \ + } else { \ + unsigned long __tmp, __edx, __ecx; \ + asm volatile(paravirt_alt(PARAVIRT_CALL) \ + : "=a" (__tmp), "=d" (__edx), \ + "=c" (__ecx) \ + : paravirt_type(__op), \ + paravirt_clobber(CLBR_ANY) \ + : "memory", "cc"); \ + __ret = (__rettype)__tmp; \ + } \ + __ret; \ + }) +#define PVOP_VCALL0(__op) \ + ({ \ + unsigned long __eax, __edx, __ecx; \ + asm volatile(paravirt_alt(PARAVIRT_CALL) \ + : "=a" (__eax), "=d" (__edx), "=c" (__ecx) \ + : paravirt_type(__op), \ + paravirt_clobber(CLBR_ANY) \ + : "memory", "cc"); \ + }) + +#define PVOP_CALL1(__rettype, __op, arg1) \ + ({ \ + __rettype __ret; \ + if (sizeof(__rettype) > sizeof(unsigned long)) { \ + unsigned long long __tmp; \ + unsigned long __ecx; \ + asm volatile(paravirt_alt(PARAVIRT_CALL) \ + : "=A" (__tmp), "=c" (__ecx) \ + : "a" ((u32)(arg1)), \ + paravirt_type(__op), \ + paravirt_clobber(CLBR_ANY) \ + : "memory", "cc"); \ + __ret = (__rettype)__tmp; \ + } else { \ + unsigned long __tmp, __edx, __ecx; \ + asm volatile(paravirt_alt(PARAVIRT_CALL) \ + : "=a" (__tmp), "=d" (__edx), \ + "=c" (__ecx) \ + : "0" ((u32)(arg1)), \ + paravirt_type(__op), \ + paravirt_clobber(CLBR_ANY) \ + : "memory", "cc"); \ + __ret = (__rettype)__tmp; \ + } \ + __ret; \ + }) +#define PVOP_VCALL1(__op, arg1) \ + ({ \ + unsigned long __eax, __edx, __ecx; \ + asm volatile(paravirt_alt(PARAVIRT_CALL) \ + : "=a" (__eax), "=d" (__edx), "=c" (__ecx) \ + : "0" ((u32)(arg1)), \ + paravirt_type(__op), \ + paravirt_clobber(CLBR_ANY) \ + : "memory", "cc"); \ + }) + +#define PVOP_CALL2(__rettype, __op, arg1, arg2) \ + ({ \ + __rettype __ret; \ + if (sizeof(__rettype) > sizeof(unsigned long)) { \ + unsigned long long __tmp; \ + unsigned long __ecx; \ + asm volatile(paravirt_alt(PARAVIRT_CALL) \ + : "=A" (__tmp), "=c" (__ecx) \ + : "a" ((u32)(arg1)), \ + "d" ((u32)(arg2)), \ + paravirt_type(__op), \ + paravirt_clobber(CLBR_ANY) \ + : "memory", "cc"); \ + __ret = (__rettype)__tmp; \ + } else { \ + unsigned long __tmp, __edx, __ecx; \ + asm volatile(paravirt_alt(PARAVIRT_CALL) \ + : "=a" (__tmp), "=d" (__edx), \ + "=c" (__ecx) \ + : "0" ((u32)(arg1)), \ + "1" ((u32)(arg2)), \ + paravirt_type(__op), \ + paravirt_clobber(CLBR_ANY) \ + : "memory", "cc"); \ + __ret = (__rettype)__tmp; \ + } \ + __ret; \ + }) +#define PVOP_VCALL2(__op, arg1, arg2) \ + ({ \ + unsigned long __eax, __edx, __ecx; \ + asm volatile(paravirt_alt(PARAVIRT_CALL) \ + : "=a" (__eax), "=d" (__edx), "=c" (__ecx) \ + : "0" ((u32)(arg1)), \ + "1" ((u32)(arg2)), \ + paravirt_type(__op), \ + paravirt_clobber(CLBR_ANY) \ + : "memory", "cc"); \ + }) + +#define PVOP_CALL3(__rettype, __op, arg1, arg2, arg3) \ + ({ \ + __rettype __ret; \ + if (sizeof(__rettype) > sizeof(unsigned long)) { \ + unsigned long long __tmp; \ + unsigned long __ecx; \ + asm volatile(paravirt_alt(PARAVIRT_CALL) \ + : "=A" (__tmp), "=c" (__ecx) \ + : "a" ((u32)(arg1)), \ + "d" ((u32)(arg2)), \ + "1" ((u32)(arg3)), \ + paravirt_type(__op), \ + paravirt_clobber(CLBR_ANY) \ + : "memory", "cc"); \ + __ret = (__rettype)__tmp; \ + } else { \ + unsigned long __tmp, __edx, __ecx; \ + asm volatile(paravirt_alt(PARAVIRT_CALL) \ + : "=a" (__tmp), "=d" (__edx), \ + "=c" (__ecx) \ + : "0" ((u32)(arg1)), \ + "1" ((u32)(arg2)), \ + "2" ((u32)(arg3)), \ + paravirt_type(__op), \ + paravirt_clobber(CLBR_ANY) \ + : "memory", "cc"); \ + __ret = (__rettype)__tmp; \ + } \ + __ret; \ + }) +#define PVOP_VCALL3(__op, arg1, arg2, arg3) \ + ({ \ + unsigned long __eax, __edx, __ecx; \ + asm volatile(paravirt_alt(PARAVIRT_CALL) \ + : "=a" (__eax), "=d" (__edx), "=c" (__ecx) \ + : "0" ((u32)(arg1)), \ + "1" ((u32)(arg2)), \ + "2" ((u32)(arg3)), \ + paravirt_type(__op), \ + paravirt_clobber(CLBR_ANY) \ + : "memory", "cc"); \ + }) + +#define PVOP_CALL4(__rettype, __op, arg1, arg2, arg3, arg4) \ + ({ \ + __rettype __ret; \ + if (sizeof(__rettype) > sizeof(unsigned long)) { \ + unsigned long long __tmp; \ + unsigned long __ecx; \ + asm volatile("push %[_arg4]; " \ + paravirt_alt(PARAVIRT_CALL) \ + "lea 4(%%esp),%%esp" \ + : "=A" (__tmp), "=c" (__ecx) \ + : "a" ((u32)(arg1)), \ + "d" ((u32)(arg2)), \ + "1" ((u32)(arg3)), \ + [_arg4] "mr" ((u32)(arg4)), \ + paravirt_type(__op), \ + paravirt_clobber(CLBR_ANY) \ + : "memory", "cc",); \ + __ret = (__rettype)__tmp; \ + } else { \ + unsigned long __tmp, __edx, __ecx; \ + asm volatile("push %[_arg4]; " \ + paravirt_alt(PARAVIRT_CALL) \ + "lea 4(%%esp),%%esp" \ + : "=a" (__tmp), "=d" (__edx), "=c" (__ecx) \ + : "0" ((u32)(arg1)), \ + "1" ((u32)(arg2)), \ + "2" ((u32)(arg3)), \ + [_arg4]"mr" ((u32)(arg4)), \ + paravirt_type(__op), \ + paravirt_clobber(CLBR_ANY) \ + : "memory", "cc"); \ + __ret = (__rettype)__tmp; \ + } \ + __ret; \ + }) +#define PVOP_VCALL4(__op, arg1, arg2, arg3, arg4) \ + ({ \ + unsigned long __eax, __edx, __ecx; \ + asm volatile("push %[_arg4]; " \ + paravirt_alt(PARAVIRT_CALL) \ + "lea 4(%%esp),%%esp" \ + : "=a" (__eax), "=d" (__edx), "=c" (__ecx) \ + : "0" ((u32)(arg1)), \ + "1" ((u32)(arg2)), \ + "2" ((u32)(arg3)), \ + [_arg4]"mr" ((u32)(arg4)), \ + paravirt_type(__op), \ + paravirt_clobber(CLBR_ANY) \ + : "memory", "cc"); \ + }) + +static inline int paravirt_enabled(void) +{ + return paravirt_ops.paravirt_enabled; +} static inline void load_esp0(struct tss_struct *tss, struct thread_struct *thread) { - paravirt_ops.load_esp0(tss, thread); + PVOP_VCALL2(load_esp0, tss, thread); } #define ARCH_SETUP paravirt_ops.arch_setup(); static inline unsigned long get_wallclock(void) { - return paravirt_ops.get_wallclock(); + return PVOP_CALL0(unsigned long, get_wallclock); } static inline int set_wallclock(unsigned long nowtime) { - return paravirt_ops.set_wallclock(nowtime); + return PVOP_CALL1(int, set_wallclock, nowtime); } static inline void (*choose_time_init(void))(void) @@ -230,127 +438,208 @@ static inline void (*choose_time_init(void))(void) static inline void __cpuid(unsigned int *eax, unsigned int *ebx, unsigned int *ecx, unsigned int *edx) { - paravirt_ops.cpuid(eax, ebx, ecx, edx); + PVOP_VCALL4(cpuid, eax, ebx, ecx, edx); } /* * These special macros can be used to get or set a debugging register */ -#define get_debugreg(var, reg) var = paravirt_ops.get_debugreg(reg) -#define set_debugreg(val, reg) paravirt_ops.set_debugreg(reg, val) +static inline unsigned long paravirt_get_debugreg(int reg) +{ + return PVOP_CALL1(unsigned long, get_debugreg, reg); +} +#define get_debugreg(var, reg) var = paravirt_get_debugreg(reg) +static inline void set_debugreg(unsigned long val, int reg) +{ + PVOP_VCALL2(set_debugreg, reg, val); +} -#define clts() paravirt_ops.clts() +static inline void clts(void) +{ + PVOP_VCALL0(clts); +} -#define read_cr0() paravirt_ops.read_cr0() -#define write_cr0(x) paravirt_ops.write_cr0(x) +static inline unsigned long read_cr0(void) +{ + return PVOP_CALL0(unsigned long, read_cr0); +} -#define read_cr2() paravirt_ops.read_cr2() -#define write_cr2(x) paravirt_ops.write_cr2(x) +static inline void write_cr0(unsigned long x) +{ + PVOP_VCALL1(write_cr0, x); +} + +static inline unsigned long read_cr2(void) +{ + return PVOP_CALL0(unsigned long, read_cr2); +} + +static inline void write_cr2(unsigned long x) +{ + PVOP_VCALL1(write_cr2, x); +} + +static inline unsigned long read_cr3(void) +{ + return PVOP_CALL0(unsigned long, read_cr3); +} -#define read_cr3() paravirt_ops.read_cr3() -#define write_cr3(x) paravirt_ops.write_cr3(x) +static inline void write_cr3(unsigned long x) +{ + PVOP_VCALL1(write_cr3, x); +} -#define read_cr4() paravirt_ops.read_cr4() -#define read_cr4_safe(x) paravirt_ops.read_cr4_safe() -#define write_cr4(x) paravirt_ops.write_cr4(x) +static inline unsigned long read_cr4(void) +{ + return PVOP_CALL0(unsigned long, read_cr4); +} +static inline unsigned long read_cr4_safe(void) +{ + return PVOP_CALL0(unsigned long, read_cr4_safe); +} -#define raw_ptep_get_and_clear(xp) (paravirt_ops.ptep_get_and_clear(xp)) +static inline void write_cr4(unsigned long x) +{ + PVOP_VCALL1(write_cr4, x); +} static inline void raw_safe_halt(void) { - paravirt_ops.safe_halt(); + PVOP_VCALL0(safe_halt); } static inline void halt(void) { - paravirt_ops.safe_halt(); + PVOP_VCALL0(safe_halt); +} + +static inline void wbinvd(void) +{ + PVOP_VCALL0(wbinvd); } -#define wbinvd() paravirt_ops.wbinvd() #define get_kernel_rpl() (paravirt_ops.kernel_rpl) +static inline u64 paravirt_read_msr(unsigned msr, int *err) +{ + return PVOP_CALL2(u64, read_msr, msr, err); +} +static inline int paravirt_write_msr(unsigned msr, unsigned low, unsigned high) +{ + return PVOP_CALL3(int, write_msr, msr, low, high); +} + /* These should all do BUG_ON(_err), but our headers are too tangled. */ -#define rdmsr(msr,val1,val2) do { \ - int _err; \ - u64 _l = paravirt_ops.read_msr(msr,&_err); \ - val1 = (u32)_l; \ - val2 = _l >> 32; \ +#define rdmsr(msr,val1,val2) do { \ + int _err; \ + u64 _l = paravirt_read_msr(msr, &_err); \ + val1 = (u32)_l; \ + val2 = _l >> 32; \ } while(0) -#define wrmsr(msr,val1,val2) do { \ - u64 _l = ((u64)(val2) << 32) | (val1); \ - paravirt_ops.write_msr((msr), _l); \ +#define wrmsr(msr,val1,val2) do { \ + paravirt_write_msr(msr, val1, val2); \ } while(0) -#define rdmsrl(msr,val) do { \ - int _err; \ - val = paravirt_ops.read_msr((msr),&_err); \ +#define rdmsrl(msr,val) do { \ + int _err; \ + val = paravirt_read_msr(msr, &_err); \ } while(0) -#define wrmsrl(msr,val) (paravirt_ops.write_msr((msr),(val))) -#define wrmsr_safe(msr,a,b) ({ \ - u64 _l = ((u64)(b) << 32) | (a); \ - paravirt_ops.write_msr((msr),_l); \ -}) +#define wrmsrl(msr,val) ((void)paravirt_write_msr(msr, val, 0)) +#define wrmsr_safe(msr,a,b) paravirt_write_msr(msr, a, b) /* rdmsr with exception handling */ -#define rdmsr_safe(msr,a,b) ({ \ - int _err; \ - u64 _l = paravirt_ops.read_msr(msr,&_err); \ - (*a) = (u32)_l; \ - (*b) = _l >> 32; \ +#define rdmsr_safe(msr,a,b) ({ \ + int _err; \ + u64 _l = paravirt_read_msr(msr, &_err); \ + (*a) = (u32)_l; \ + (*b) = _l >> 32; \ _err; }) -#define rdtsc(low,high) do { \ - u64 _l = paravirt_ops.read_tsc(); \ - low = (u32)_l; \ - high = _l >> 32; \ + +static inline u64 paravirt_read_tsc(void) +{ + return PVOP_CALL0(u64, read_tsc); +} +#define rdtsc(low,high) do { \ + u64 _l = paravirt_read_tsc(); \ + low = (u32)_l; \ + high = _l >> 32; \ } while(0) -#define rdtscl(low) do { \ - u64 _l = paravirt_ops.read_tsc(); \ - low = (int)_l; \ +#define rdtscl(low) do { \ + u64 _l = paravirt_read_tsc(); \ + low = (int)_l; \ } while(0) -#define rdtscll(val) (val = paravirt_ops.read_tsc()) +#define rdtscll(val) (val = paravirt_read_tsc()) #define get_scheduled_cycles(val) (val = paravirt_ops.get_scheduled_cycles()) #define calculate_cpu_khz() (paravirt_ops.get_cpu_khz()) #define write_tsc(val1,val2) wrmsr(0x10, val1, val2) -#define rdpmc(counter,low,high) do { \ - u64 _l = paravirt_ops.read_pmc(); \ - low = (u32)_l; \ - high = _l >> 32; \ -} while(0) +static inline unsigned long long paravirt_read_pmc(int counter) +{ + return PVOP_CALL1(u64, read_pmc, counter); +} -#define load_TR_desc() (paravirt_ops.load_tr_desc()) -#define load_gdt(dtr) (paravirt_ops.load_gdt(dtr)) -#define load_idt(dtr) (paravirt_ops.load_idt(dtr)) -#define set_ldt(addr, entries) (paravirt_ops.set_ldt((addr), (entries))) -#define store_gdt(dtr) (paravirt_ops.store_gdt(dtr)) -#define store_idt(dtr) (paravirt_ops.store_idt(dtr)) -#define store_tr(tr) ((tr) = paravirt_ops.store_tr()) -#define load_TLS(t,cpu) (paravirt_ops.load_tls((t),(cpu))) -#define write_ldt_entry(dt, entry, low, high) \ - (paravirt_ops.write_ldt_entry((dt), (entry), (low), (high))) -#define write_gdt_entry(dt, entry, low, high) \ - (paravirt_ops.write_gdt_entry((dt), (entry), (low), (high))) -#define write_idt_entry(dt, entry, low, high) \ - (paravirt_ops.write_idt_entry((dt), (entry), (low), (high))) -#define set_iopl_mask(mask) (paravirt_ops.set_iopl_mask(mask)) - -#define __pte(x) paravirt_ops.make_pte(x) -#define __pgd(x) paravirt_ops.make_pgd(x) - -#define pte_val(x) paravirt_ops.pte_val(x) -#define pgd_val(x) paravirt_ops.pgd_val(x) +#define rdpmc(counter,low,high) do { \ + u64 _l = paravirt_read_pmc(counter); \ + low = (u32)_l; \ + high = _l >> 32; \ +} while(0) -#ifdef CONFIG_X86_PAE -#define __pmd(x) paravirt_ops.make_pmd(x) -#define pmd_val(x) paravirt_ops.pmd_val(x) -#endif +static inline void load_TR_desc(void) +{ + PVOP_VCALL0(load_tr_desc); +} +static inline void load_gdt(const struct Xgt_desc_struct *dtr) +{ + PVOP_VCALL1(load_gdt, dtr); +} +static inline void load_idt(const struct Xgt_desc_struct *dtr) +{ + PVOP_VCALL1(load_idt, dtr); +} +static inline void set_ldt(const void *addr, unsigned entries) +{ + PVOP_VCALL2(set_ldt, addr, entries); +} +static inline void store_gdt(struct Xgt_desc_struct *dtr) +{ + PVOP_VCALL1(store_gdt, dtr); +} +static inline void store_idt(struct Xgt_desc_struct *dtr) +{ + PVOP_VCALL1(store_idt, dtr); +} +static inline unsigned long paravirt_store_tr(void) +{ + return PVOP_CALL0(unsigned long, store_tr); +} +#define store_tr(tr) ((tr) = paravirt_store_tr()) +static inline void load_TLS(struct thread_struct *t, unsigned cpu) +{ + PVOP_VCALL2(load_tls, t, cpu); +} +static inline void write_ldt_entry(void *dt, int entry, u32 low, u32 high) +{ + PVOP_VCALL4(write_ldt_entry, dt, entry, low, high); +} +static inline void write_gdt_entry(void *dt, int entry, u32 low, u32 high) +{ + PVOP_VCALL4(write_gdt_entry, dt, entry, low, high); +} +static inline void write_idt_entry(void *dt, int entry, u32 low, u32 high) +{ + PVOP_VCALL4(write_idt_entry, dt, entry, low, high); +} +static inline void set_iopl_mask(unsigned mask) +{ + PVOP_VCALL1(set_iopl_mask, mask); +} /* The paravirtualized I/O functions */ static inline void slow_down_io(void) { @@ -368,27 +657,27 @@ static inline void slow_down_io(void) { */ static inline void apic_write(unsigned long reg, unsigned long v) { - paravirt_ops.apic_write(reg,v); + PVOP_VCALL2(apic_write, reg, v); } static inline void apic_write_atomic(unsigned long reg, unsigned long v) { - paravirt_ops.apic_write_atomic(reg,v); + PVOP_VCALL2(apic_write_atomic, reg, v); } static inline unsigned long apic_read(unsigned long reg) { - return paravirt_ops.apic_read(reg); + return PVOP_CALL1(unsigned long, apic_read, reg); } static inline void setup_boot_clock(void) { - paravirt_ops.setup_boot_clock(); + PVOP_VCALL0(setup_boot_clock); } static inline void setup_secondary_clock(void) { - paravirt_ops.setup_secondary_clock(); + PVOP_VCALL0(setup_secondary_clock); } #endif @@ -408,93 +697,205 @@ static inline void paravirt_pagetable_setup_done(pgd_t *base) static inline void startup_ipi_hook(int phys_apicid, unsigned long start_eip, unsigned long start_esp) { - return paravirt_ops.startup_ipi_hook(phys_apicid, start_eip, start_esp); + PVOP_VCALL3(startup_ipi_hook, phys_apicid, start_eip, start_esp); } #endif static inline void paravirt_activate_mm(struct mm_struct *prev, struct mm_struct *next) { - paravirt_ops.activate_mm(prev, next); + PVOP_VCALL2(activate_mm, prev, next); } static inline void arch_dup_mmap(struct mm_struct *oldmm, struct mm_struct *mm) { - paravirt_ops.dup_mmap(oldmm, mm); + PVOP_VCALL2(dup_mmap, oldmm, mm); } static inline void arch_exit_mmap(struct mm_struct *mm) { - paravirt_ops.exit_mmap(mm); + PVOP_VCALL1(exit_mmap, mm); } -#define __flush_tlb() paravirt_ops.flush_tlb_user() -#define __flush_tlb_global() paravirt_ops.flush_tlb_kernel() -#define __flush_tlb_single(addr) paravirt_ops.flush_tlb_single(addr) +static inline void __flush_tlb(void) +{ + PVOP_VCALL0(flush_tlb_user); +} +static inline void __flush_tlb_global(void) +{ + PVOP_VCALL0(flush_tlb_kernel); +} +static inline void __flush_tlb_single(unsigned long addr) +{ + PVOP_VCALL1(flush_tlb_single, addr); +} -#define paravirt_map_pt_hook(type, va, pfn) paravirt_ops.map_pt_hook(type, va, pfn) +static inline void paravirt_map_pt_hook(int type, pte_t *va, u32 pfn) +{ + PVOP_VCALL3(map_pt_hook, type, va, pfn); +} -#define paravirt_alloc_pt(pfn) paravirt_ops.alloc_pt(pfn) -#define paravirt_release_pt(pfn) paravirt_ops.release_pt(pfn) +static inline void paravirt_alloc_pt(unsigned pfn) +{ + PVOP_VCALL1(alloc_pt, pfn); +} +static inline void paravirt_release_pt(unsigned pfn) +{ + PVOP_VCALL1(release_pt, pfn); +} -#define paravirt_alloc_pd(pfn) paravirt_ops.alloc_pd(pfn) -#define paravirt_alloc_pd_clone(pfn, clonepfn, start, count) \ - paravirt_ops.alloc_pd_clone(pfn, clonepfn, start, count) -#define paravirt_release_pd(pfn) paravirt_ops.release_pd(pfn) +static inline void paravirt_alloc_pd(unsigned pfn) +{ + PVOP_VCALL1(alloc_pd, pfn); +} -static inline void set_pte(pte_t *ptep, pte_t pteval) +static inline void paravirt_alloc_pd_clone(unsigned pfn, unsigned clonepfn, + unsigned start, unsigned count) +{ + PVOP_VCALL4(alloc_pd_clone, pfn, clonepfn, start, count); +} +static inline void paravirt_release_pd(unsigned pfn) { - paravirt_ops.set_pte(ptep, pteval); + PVOP_VCALL1(release_pd, pfn); } -static inline void set_pte_at(struct mm_struct *mm, unsigned long addr, - pte_t *ptep, pte_t pteval) +static inline void pte_update(struct mm_struct *mm, unsigned long addr, + pte_t *ptep) { - paravirt_ops.set_pte_at(mm, addr, ptep, pteval); + PVOP_VCALL3(pte_update, mm, addr, ptep); } -static inline void set_pmd(pmd_t *pmdp, pmd_t pmdval) +static inline void pte_update_defer(struct mm_struct *mm, unsigned long addr, + pte_t *ptep) { - paravirt_ops.set_pmd(pmdp, pmdval); + PVOP_VCALL3(pte_update_defer, mm, addr, ptep); } -static inline void pte_update(struct mm_struct *mm, u32 addr, pte_t *ptep) +#ifdef CONFIG_X86_PAE +static inline pte_t __pte(unsigned long long val) { - paravirt_ops.pte_update(mm, addr, ptep); + unsigned long long ret = PVOP_CALL2(unsigned long long, make_pte, + val, val >> 32); + return (pte_t) { ret, ret >> 32 }; } -static inline void pte_update_defer(struct mm_struct *mm, u32 addr, pte_t *ptep) +static inline pmd_t __pmd(unsigned long long val) { - paravirt_ops.pte_update_defer(mm, addr, ptep); + return (pmd_t) { PVOP_CALL2(unsigned long long, make_pmd, val, val >> 32) }; +} + +static inline pgd_t __pgd(unsigned long long val) +{ + return (pgd_t) { PVOP_CALL2(unsigned long long, make_pgd, val, val >> 32) }; +} + +static inline unsigned long long pte_val(pte_t x) +{ + return PVOP_CALL2(unsigned long long, pte_val, x.pte_low, x.pte_high); +} + +static inline unsigned long long pmd_val(pmd_t x) +{ + return PVOP_CALL2(unsigned long long, pmd_val, x.pmd, x.pmd >> 32); +} + +static inline unsigned long long pgd_val(pgd_t x) +{ + return PVOP_CALL2(unsigned long long, pgd_val, x.pgd, x.pgd >> 32); +} + +static inline void set_pte(pte_t *ptep, pte_t pteval) +{ + PVOP_VCALL3(set_pte, ptep, pteval.pte_low, pteval.pte_high); +} + +static inline void set_pte_at(struct mm_struct *mm, unsigned long addr, + pte_t *ptep, pte_t pteval) +{ + /* 5 arg words */ + paravirt_ops.set_pte_at(mm, addr, ptep, pteval); } -#ifdef CONFIG_X86_PAE static inline void set_pte_atomic(pte_t *ptep, pte_t pteval) { - paravirt_ops.set_pte_atomic(ptep, pteval); + PVOP_VCALL3(set_pte_atomic, ptep, pteval.pte_low, pteval.pte_high); } -static inline void set_pte_present(struct mm_struct *mm, unsigned long addr, pte_t *ptep, pte_t pte) +static inline void set_pte_present(struct mm_struct *mm, unsigned long addr, + pte_t *ptep, pte_t pte) { + /* 5 arg words */ paravirt_ops.set_pte_present(mm, addr, ptep, pte); } +static inline void set_pmd(pmd_t *pmdp, pmd_t pmdval) +{ + PVOP_VCALL3(set_pmd, pmdp, pmdval.pmd, pmdval.pmd >> 32); +} + static inline void set_pud(pud_t *pudp, pud_t pudval) { - paravirt_ops.set_pud(pudp, pudval); + PVOP_VCALL3(set_pud, pudp, pudval.pgd.pgd, pudval.pgd.pgd >> 32); } static inline void pte_clear(struct mm_struct *mm, unsigned long addr, pte_t *ptep) { - paravirt_ops.pte_clear(mm, addr, ptep); + PVOP_VCALL3(pte_clear, mm, addr, ptep); } static inline void pmd_clear(pmd_t *pmdp) { - paravirt_ops.pmd_clear(pmdp); + PVOP_VCALL1(pmd_clear, pmdp); +} + +static inline pte_t raw_ptep_get_and_clear(pte_t *p) +{ + unsigned long long val = PVOP_CALL1(unsigned long long, ptep_get_and_clear, p); + return (pte_t) { val, val >> 32 }; +} +#else /* !CONFIG_X86_PAE */ +static inline pte_t __pte(unsigned long val) +{ + return (pte_t) { PVOP_CALL1(unsigned long, make_pte, val) }; } -#endif + +static inline pgd_t __pgd(unsigned long val) +{ + return (pgd_t) { PVOP_CALL1(unsigned long, make_pgd, val) }; +} + +static inline unsigned long pte_val(pte_t x) +{ + return PVOP_CALL1(unsigned long, pte_val, x.pte_low); +} + +static inline unsigned long pgd_val(pgd_t x) +{ + return PVOP_CALL1(unsigned long, pgd_val, x.pgd); +} + +static inline void set_pte(pte_t *ptep, pte_t pteval) +{ + PVOP_VCALL2(set_pte, ptep, pteval.pte_low); +} + +static inline void set_pte_at(struct mm_struct *mm, unsigned long addr, + pte_t *ptep, pte_t pteval) +{ + PVOP_VCALL4(set_pte_at, mm, addr, ptep, pteval.pte_low); +} + +static inline void set_pmd(pmd_t *pmdp, pmd_t pmdval) +{ + PVOP_VCALL2(set_pmd, pmdp, pmdval.pud.pgd.pgd); +} + +static inline pte_t raw_ptep_get_and_clear(pte_t *p) +{ + return (pte_t) { PVOP_CALL1(unsigned long, ptep_get_and_clear, p) }; +} +#endif /* CONFIG_X86_PAE */ /* Lazy mode for batching updates / context switch */ #define PARAVIRT_LAZY_NONE 0 @@ -503,14 +904,37 @@ static inline void pmd_clear(pmd_t *pmdp) #define PARAVIRT_LAZY_FLUSH 3 #define __HAVE_ARCH_ENTER_LAZY_CPU_MODE -#define arch_enter_lazy_cpu_mode() paravirt_ops.set_lazy_mode(PARAVIRT_LAZY_CPU) -#define arch_leave_lazy_cpu_mode() paravirt_ops.set_lazy_mode(PARAVIRT_LAZY_NONE) -#define arch_flush_lazy_cpu_mode() paravirt_ops.set_lazy_mode(PARAVIRT_LAZY_FLUSH) +static inline void arch_enter_lazy_cpu_mode(void) +{ + PVOP_VCALL1(set_lazy_mode, PARAVIRT_LAZY_CPU); +} + +static inline void arch_leave_lazy_cpu_mode(void) +{ + PVOP_VCALL1(set_lazy_mode, PARAVIRT_LAZY_NONE); +} + +static inline void arch_flush_lazy_cpu_mode(void) +{ + PVOP_VCALL1(set_lazy_mode, PARAVIRT_LAZY_FLUSH); +} + #define __HAVE_ARCH_ENTER_LAZY_MMU_MODE -#define arch_enter_lazy_mmu_mode() paravirt_ops.set_lazy_mode(PARAVIRT_LAZY_MMU) -#define arch_leave_lazy_mmu_mode() paravirt_ops.set_lazy_mode(PARAVIRT_LAZY_NONE) -#define arch_flush_lazy_mmu_mode() paravirt_ops.set_lazy_mode(PARAVIRT_LAZY_FLUSH) +static inline void arch_enter_lazy_mmu_mode(void) +{ + PVOP_VCALL1(set_lazy_mode, PARAVIRT_LAZY_MMU); +} + +static inline void arch_leave_lazy_mmu_mode(void) +{ + PVOP_VCALL1(set_lazy_mode, PARAVIRT_LAZY_NONE); +} + +static inline void arch_flush_lazy_mmu_mode(void) +{ + PVOP_VCALL1(set_lazy_mode, PARAVIRT_LAZY_FLUSH); +} void _paravirt_nop(void); #define paravirt_nop ((void *)_paravirt_nop) @@ -603,6 +1027,16 @@ static inline unsigned long __raw_local_irq_save(void) paravirt_clobber(CLBR_EAX) #undef PARAVIRT_CALL +#undef PVOP_VCALL0 +#undef PVOP_CALL0 +#undef PVOP_VCALL1 +#undef PVOP_CALL1 +#undef PVOP_VCALL2 +#undef PVOP_CALL2 +#undef PVOP_VCALL3 +#undef PVOP_CALL3 +#undef PVOP_VCALL4 +#undef PVOP_CALL4 #else /* __ASSEMBLY__ */ -- cgit v1.1 From 294688c028e80fd467cdd22da79f62c5f311eaf5 Mon Sep 17 00:00:00 2001 From: Jeremy Fitzhardinge Date: Wed, 2 May 2007 19:27:14 +0200 Subject: [PATCH] i386: PARAVIRT: Document asm-i386/paravirt.h Clean things up, and broadly document: - the paravirt_ops functions themselves - the patching mechanism Signed-off-by: Jeremy Fitzhardinge Signed-off-by: Andi Kleen Cc: Rusty Russell --- include/asm-i386/paravirt.h | 131 ++++++++++++++++++++++++++++++++++++++++---- 1 file changed, 121 insertions(+), 10 deletions(-) (limited to 'include') diff --git a/include/asm-i386/paravirt.h b/include/asm-i386/paravirt.h index 837457b..8bfaf10 100644 --- a/include/asm-i386/paravirt.h +++ b/include/asm-i386/paravirt.h @@ -21,6 +21,14 @@ struct Xgt_desc_struct; struct tss_struct; struct mm_struct; struct desc_struct; + +/* Lazy mode for batching updates / context switch */ +enum paravirt_lazy_mode { + PARAVIRT_LAZY_NONE = 0, + PARAVIRT_LAZY_MMU = 1, + PARAVIRT_LAZY_CPU = 2, +}; + struct paravirt_ops { unsigned int kernel_rpl; @@ -37,22 +45,33 @@ struct paravirt_ops */ unsigned (*patch)(u8 type, u16 clobber, void *firstinsn, unsigned len); + /* Basic arch-specific setup */ void (*arch_setup)(void); char *(*memory_setup)(void); void (*init_IRQ)(void); + void (*time_init)(void); + /* + * Called before/after init_mm pagetable setup. setup_start + * may reset %cr3, and may pre-install parts of the pagetable; + * pagetable setup is expected to preserve any existing + * mapping. + */ void (*pagetable_setup_start)(pgd_t *pgd_base); void (*pagetable_setup_done)(pgd_t *pgd_base); + /* Print a banner to identify the environment */ void (*banner)(void); + /* Set and set time of day */ unsigned long (*get_wallclock)(void); int (*set_wallclock)(unsigned long); - void (*time_init)(void); + /* cpuid emulation, mostly so that caps bits can be disabled */ void (*cpuid)(unsigned int *eax, unsigned int *ebx, unsigned int *ecx, unsigned int *edx); + /* hooks for various privileged instructions */ unsigned long (*get_debugreg)(int regno); void (*set_debugreg)(int regno, unsigned long value); @@ -71,15 +90,23 @@ struct paravirt_ops unsigned long (*read_cr4)(void); void (*write_cr4)(unsigned long); + /* + * Get/set interrupt state. save_fl and restore_fl are only + * expected to use X86_EFLAGS_IF; all other bits + * returned from save_fl are undefined, and may be ignored by + * restore_fl. + */ unsigned long (*save_fl)(void); void (*restore_fl)(unsigned long); void (*irq_disable)(void); void (*irq_enable)(void); void (*safe_halt)(void); void (*halt)(void); + void (*wbinvd)(void); - /* err = 0/-EFAULT. wrmsr returns 0/-EFAULT. */ + /* MSR, PMC and TSR operations. + err = 0/-EFAULT. wrmsr returns 0/-EFAULT. */ u64 (*read_msr)(unsigned int msr, int *err); int (*write_msr)(unsigned int msr, u64 val); @@ -88,6 +115,7 @@ struct paravirt_ops u64 (*get_scheduled_cycles)(void); unsigned long (*get_cpu_khz)(void); + /* Segment descriptor handling */ void (*load_tr_desc)(void); void (*load_gdt)(const struct Xgt_desc_struct *); void (*load_idt)(const struct Xgt_desc_struct *); @@ -105,9 +133,12 @@ struct paravirt_ops void (*load_esp0)(struct tss_struct *tss, struct thread_struct *t); void (*set_iopl_mask)(unsigned mask); - void (*io_delay)(void); + /* + * Hooks for intercepting the creation/use/destruction of an + * mm_struct. + */ void (*activate_mm)(struct mm_struct *prev, struct mm_struct *next); void (*dup_mmap)(struct mm_struct *oldmm, @@ -115,30 +146,43 @@ struct paravirt_ops void (*exit_mmap)(struct mm_struct *mm); #ifdef CONFIG_X86_LOCAL_APIC + /* + * Direct APIC operations, principally for VMI. Ideally + * these shouldn't be in this interface. + */ void (*apic_write)(unsigned long reg, unsigned long v); void (*apic_write_atomic)(unsigned long reg, unsigned long v); unsigned long (*apic_read)(unsigned long reg); void (*setup_boot_clock)(void); void (*setup_secondary_clock)(void); + + void (*startup_ipi_hook)(int phys_apicid, + unsigned long start_eip, + unsigned long start_esp); #endif + /* TLB operations */ void (*flush_tlb_user)(void); void (*flush_tlb_kernel)(void); void (*flush_tlb_single)(unsigned long addr); void (*map_pt_hook)(int type, pte_t *va, u32 pfn); + /* Hooks for allocating/releasing pagetable pages */ void (*alloc_pt)(u32 pfn); void (*alloc_pd)(u32 pfn); void (*alloc_pd_clone)(u32 pfn, u32 clonepfn, u32 start, u32 count); void (*release_pt)(u32 pfn); void (*release_pd)(u32 pfn); + /* Pagetable manipulation functions */ void (*set_pte)(pte_t *ptep, pte_t pteval); - void (*set_pte_at)(struct mm_struct *mm, unsigned long addr, pte_t *ptep, pte_t pteval); + void (*set_pte_at)(struct mm_struct *mm, unsigned long addr, + pte_t *ptep, pte_t pteval); void (*set_pmd)(pmd_t *pmdp, pmd_t pmdval); void (*pte_update)(struct mm_struct *mm, unsigned long addr, pte_t *ptep); - void (*pte_update_defer)(struct mm_struct *mm, unsigned long addr, pte_t *ptep); + void (*pte_update_defer)(struct mm_struct *mm, + unsigned long addr, pte_t *ptep); pte_t (*ptep_get_and_clear)(pte_t *ptep); @@ -164,13 +208,12 @@ struct paravirt_ops pgd_t (*make_pgd)(unsigned long pgd); #endif - void (*set_lazy_mode)(int mode); + /* Set deferred update mode, used for batching operations. */ + void (*set_lazy_mode)(enum paravirt_lazy_mode mode); /* These two are jmp to, not actually called. */ void (*irq_enable_sysexit)(void); void (*iret)(void); - - void (*startup_ipi_hook)(int phys_apicid, unsigned long start_eip, unsigned long start_esp); }; /* Mark a paravirt probe function. */ @@ -188,8 +231,10 @@ extern struct paravirt_ops paravirt_ops; #define paravirt_clobber(clobber) \ [paravirt_clobber] "i" (clobber) -#define PARAVIRT_CALL "call *(paravirt_ops+%c[paravirt_typenum]*4);" - +/* + * Generate some code, and mark it as patchable by the + * apply_paravirt() alternate instruction patcher. + */ #define _paravirt_alt(insn_string, type, clobber) \ "771:\n\t" insn_string "\n" "772:\n" \ ".pushsection .parainstructions,\"a\"\n" \ @@ -199,9 +244,74 @@ extern struct paravirt_ops paravirt_ops; " .short " clobber "\n" \ ".popsection\n" +/* Generate patchable code, with the default asm parameters. */ #define paravirt_alt(insn_string) \ _paravirt_alt(insn_string, "%c[paravirt_typenum]", "%c[paravirt_clobber]") +/* + * This generates an indirect call based on the operation type number. + * The type number, computed in PARAVIRT_PATCH, is derived from the + * offset into the paravirt_ops structure, and can therefore be freely + * converted back into a structure offset. + */ +#define PARAVIRT_CALL "call *(paravirt_ops+%c[paravirt_typenum]*4);" + +/* + * These macros are intended to wrap calls into a paravirt_ops + * operation, so that they can be later identified and patched at + * runtime. + * + * Normally, a call to a pv_op function is a simple indirect call: + * (paravirt_ops.operations)(args...). + * + * Unfortunately, this is a relatively slow operation for modern CPUs, + * because it cannot necessarily determine what the destination + * address is. In this case, the address is a runtime constant, so at + * the very least we can patch the call to e a simple direct call, or + * ideally, patch an inline implementation into the callsite. (Direct + * calls are essentially free, because the call and return addresses + * are completely predictable.) + * + * These macros rely on the standard gcc "regparm(3)" calling + * convention, in which the first three arguments are placed in %eax, + * %edx, %ecx (in that order), and the remaining arguments are placed + * on the stack. All caller-save registers (eax,edx,ecx) are expected + * to be modified (either clobbered or used for return values). + * + * The call instruction itself is marked by placing its start address + * and size into the .parainstructions section, so that + * apply_paravirt() in arch/i386/kernel/alternative.c can do the + * appropriate patching under the control of the backend paravirt_ops + * implementation. + * + * Unfortunately there's no way to get gcc to generate the args setup + * for the call, and then allow the call itself to be generated by an + * inline asm. Because of this, we must do the complete arg setup and + * return value handling from within these macros. This is fairly + * cumbersome. + * + * There are 5 sets of PVOP_* macros for dealing with 0-4 arguments. + * It could be extended to more arguments, but there would be little + * to be gained from that. For each number of arguments, there are + * the two VCALL and CALL variants for void and non-void functions. + * + * When there is a return value, the invoker of the macro must specify + * the return type. The macro then uses sizeof() on that type to + * determine whether its a 32 or 64 bit value, and places the return + * in the right register(s) (just %eax for 32-bit, and %edx:%eax for + * 64-bit). + * + * 64-bit arguments are passed as a pair of adjacent 32-bit arguments + * in low,high order. + * + * Small structures are passed and returned in registers. The macro + * calling convention can't directly deal with this, so the wrapper + * functions must do this. + * + * These PVOP_* macros are only defined within this header. This + * means that all uses must be wrapped in inline functions. This also + * makes sure the incoming and outgoing types are always correct. + */ #define PVOP_CALL0(__rettype, __op) \ ({ \ __rettype __ret; \ @@ -1026,6 +1136,7 @@ static inline unsigned long __raw_local_irq_save(void) [paravirt_sti_type] "i" (PARAVIRT_PATCH(irq_enable)), \ paravirt_clobber(CLBR_EAX) +/* Make sure as little as possible of this mess escapes. */ #undef PARAVIRT_CALL #undef PVOP_VCALL0 #undef PVOP_CALL0 -- cgit v1.1 From 63f70270ccd981ce40a8ff58c03a8c2e97e368be Mon Sep 17 00:00:00 2001 From: Jeremy Fitzhardinge Date: Wed, 2 May 2007 19:27:14 +0200 Subject: [PATCH] i386: PARAVIRT: add common patching machinery Implement the actual patching machinery. paravirt_patch_default() contains the logic to automatically patch a callsite based on a few simple rules: - if the paravirt_op function is paravirt_nop, then patch nops - if the paravirt_op function is a jmp target, then jmp to it - if the paravirt_op function is callable and doesn't clobber too much for the callsite, call it directly paravirt_patch_default is suitable as a default implementation of paravirt_ops.patch, will remove most of the expensive indirect calls in favour of either a direct call or a pile of nops. Backends may implement their own patcher, however. There are several helper functions to help with this: paravirt_patch_nop nop out a callsite paravirt_patch_ignore leave the callsite as-is paravirt_patch_call patch a call if the caller and callee have compatible clobbers paravirt_patch_jmp patch in a jmp paravirt_patch_insns patch some literal instructions over the callsite, if they fit This patch also implements more direct patches for the native case, so that when running on native hardware many common operations are implemented inline. Signed-off-by: Jeremy Fitzhardinge Signed-off-by: Andi Kleen Cc: Rusty Russell Cc: Zachary Amsden Cc: Anthony Liguori Acked-by: Ingo Molnar --- include/asm-i386/paravirt.h | 12 ++++++++++++ 1 file changed, 12 insertions(+) (limited to 'include') diff --git a/include/asm-i386/paravirt.h b/include/asm-i386/paravirt.h index 8bfaf10..4b3d508 100644 --- a/include/asm-i386/paravirt.h +++ b/include/asm-i386/paravirt.h @@ -248,6 +248,18 @@ extern struct paravirt_ops paravirt_ops; #define paravirt_alt(insn_string) \ _paravirt_alt(insn_string, "%c[paravirt_typenum]", "%c[paravirt_clobber]") +unsigned paravirt_patch_nop(void); +unsigned paravirt_patch_ignore(unsigned len); +unsigned paravirt_patch_call(void *target, u16 tgt_clobbers, + void *site, u16 site_clobbers, + unsigned len); +unsigned paravirt_patch_jmp(void *target, void *site, unsigned len); +unsigned paravirt_patch_default(u8 type, u16 clobbers, void *site, unsigned len); + +unsigned paravirt_patch_insns(void *site, unsigned len, + const char *start, const char *end); + + /* * This generates an indirect call based on the operation type number. * The type number, computed in PARAVIRT_PATCH, is derived from the -- cgit v1.1 From d4c104771a1c58e3de2a888b73b0ba1b54c0ae76 Mon Sep 17 00:00:00 2001 From: Jeremy Fitzhardinge Date: Wed, 2 May 2007 19:27:15 +0200 Subject: [PATCH] i386: PARAVIRT: add flush_tlb_others paravirt_op This patch adds a pv_op for flush_tlb_others. Linux running on native hardware uses cross-CPU IPIs to flush the TLB on any CPU which may have a particular mm's pagetable entries cached in its TLB. This is inefficient in a paravirtualized environment, since the hypervisor knows which real CPUs actually contain cached mappings, which may be a small subset of a guest's VCPUs. Signed-off-by: Jeremy Fitzhardinge Signed-off-by: Andi Kleen --- include/asm-i386/paravirt.h | 9 +++++++++ include/asm-i386/tlbflush.h | 19 +++++++++++++++++-- 2 files changed, 26 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/asm-i386/paravirt.h b/include/asm-i386/paravirt.h index 4b3d508..f880b06 100644 --- a/include/asm-i386/paravirt.h +++ b/include/asm-i386/paravirt.h @@ -15,6 +15,7 @@ #ifndef __ASSEMBLY__ #include +#include struct thread_struct; struct Xgt_desc_struct; @@ -165,6 +166,8 @@ struct paravirt_ops void (*flush_tlb_user)(void); void (*flush_tlb_kernel)(void); void (*flush_tlb_single)(unsigned long addr); + void (*flush_tlb_others)(const cpumask_t *cpus, struct mm_struct *mm, + unsigned long va); void (*map_pt_hook)(int type, pte_t *va, u32 pfn); @@ -853,6 +856,12 @@ static inline void __flush_tlb_single(unsigned long addr) PVOP_VCALL1(flush_tlb_single, addr); } +static inline void flush_tlb_others(cpumask_t cpumask, struct mm_struct *mm, + unsigned long va) +{ + PVOP_VCALL3(flush_tlb_others, &cpumask, mm, va); +} + static inline void paravirt_map_pt_hook(int type, pte_t *va, u32 pfn) { PVOP_VCALL3(map_pt_hook, type, va, pfn); diff --git a/include/asm-i386/tlbflush.h b/include/asm-i386/tlbflush.h index 4dd8284..db7f77e 100644 --- a/include/asm-i386/tlbflush.h +++ b/include/asm-i386/tlbflush.h @@ -79,11 +79,15 @@ * - flush_tlb_range(vma, start, end) flushes a range of pages * - flush_tlb_kernel_range(start, end) flushes a range of kernel pages * - flush_tlb_pgtables(mm, start, end) flushes a range of page tables + * - flush_tlb_others(cpumask, mm, va) flushes a TLBs on other cpus * * ..but the i386 has somewhat limited tlb flushing capabilities, * and page-granular flushes are available only on i486 and up. */ +#define TLB_FLUSH_ALL 0xffffffff + + #ifndef CONFIG_SMP #define flush_tlb() __flush_tlb() @@ -110,7 +114,12 @@ static inline void flush_tlb_range(struct vm_area_struct *vma, __flush_tlb(); } -#else +static inline void native_flush_tlb_others(const cpumask_t *cpumask, + struct mm_struct *mm, unsigned long va) +{ +} + +#else /* SMP */ #include @@ -129,6 +138,9 @@ static inline void flush_tlb_range(struct vm_area_struct * vma, unsigned long st flush_tlb_mm(vma->vm_mm); } +void native_flush_tlb_others(const cpumask_t *cpumask, struct mm_struct *mm, + unsigned long va); + #define TLBSTATE_OK 1 #define TLBSTATE_LAZY 2 @@ -139,8 +151,11 @@ struct tlb_state char __cacheline_padding[L1_CACHE_BYTES-8]; }; DECLARE_PER_CPU(struct tlb_state, cpu_tlbstate); +#endif /* SMP */ - +#ifndef CONFIG_PARAVIRT +#define flush_tlb_others(mask, mm, va) \ + native_flush_tlb_others(&mask, mm, va) #endif #define flush_tlb_kernel_range(start, end) flush_tlb_all() -- cgit v1.1 From a27fe809b82c5e18932fcceded28d0d1481ce7bb Mon Sep 17 00:00:00 2001 From: Jeremy Fitzhardinge Date: Wed, 2 May 2007 19:27:15 +0200 Subject: [PATCH] i386: PARAVIRT: revert map_pt_hook. Back out the map_pt_hook to clear the way for kmap_atomic_pte. Signed-off-by: Jeremy Fitzhardinge Signed-off-by: Andi Kleen Cc: Zachary Amsden --- include/asm-i386/paravirt.h | 7 ------- include/asm-i386/pgtable.h | 23 ++++------------------- 2 files changed, 4 insertions(+), 26 deletions(-) (limited to 'include') diff --git a/include/asm-i386/paravirt.h b/include/asm-i386/paravirt.h index f880b06..10f44af 100644 --- a/include/asm-i386/paravirt.h +++ b/include/asm-i386/paravirt.h @@ -169,8 +169,6 @@ struct paravirt_ops void (*flush_tlb_others)(const cpumask_t *cpus, struct mm_struct *mm, unsigned long va); - void (*map_pt_hook)(int type, pte_t *va, u32 pfn); - /* Hooks for allocating/releasing pagetable pages */ void (*alloc_pt)(u32 pfn); void (*alloc_pd)(u32 pfn); @@ -862,11 +860,6 @@ static inline void flush_tlb_others(cpumask_t cpumask, struct mm_struct *mm, PVOP_VCALL3(flush_tlb_others, &cpumask, mm, va); } -static inline void paravirt_map_pt_hook(int type, pte_t *va, u32 pfn) -{ - PVOP_VCALL3(map_pt_hook, type, va, pfn); -} - static inline void paravirt_alloc_pt(unsigned pfn) { PVOP_VCALL1(alloc_pt, pfn); diff --git a/include/asm-i386/pgtable.h b/include/asm-i386/pgtable.h index 5b88a6a..6599f2a 100644 --- a/include/asm-i386/pgtable.h +++ b/include/asm-i386/pgtable.h @@ -267,7 +267,6 @@ extern void vmalloc_sync_all(void); */ #define pte_update(mm, addr, ptep) do { } while (0) #define pte_update_defer(mm, addr, ptep) do { } while (0) -#define paravirt_map_pt_hook(slot, va, pfn) do { } while (0) #define raw_ptep_get_and_clear(xp) native_ptep_get_and_clear(xp) #endif @@ -476,24 +475,10 @@ extern pte_t *lookup_address(unsigned long address); #endif #if defined(CONFIG_HIGHPTE) -#define pte_offset_map(dir, address) \ -({ \ - pte_t *__ptep; \ - unsigned pfn = pmd_val(*(dir)) >> PAGE_SHIFT; \ - __ptep = (pte_t *)kmap_atomic(pfn_to_page(pfn),KM_PTE0);\ - paravirt_map_pt_hook(KM_PTE0,__ptep, pfn); \ - __ptep = __ptep + pte_index(address); \ - __ptep; \ -}) -#define pte_offset_map_nested(dir, address) \ -({ \ - pte_t *__ptep; \ - unsigned pfn = pmd_val(*(dir)) >> PAGE_SHIFT; \ - __ptep = (pte_t *)kmap_atomic(pfn_to_page(pfn),KM_PTE1);\ - paravirt_map_pt_hook(KM_PTE1,__ptep, pfn); \ - __ptep = __ptep + pte_index(address); \ - __ptep; \ -}) +#define pte_offset_map(dir, address) \ + ((pte_t *)kmap_atomic(pmd_page(*(dir)),KM_PTE0) + pte_index(address)) +#define pte_offset_map_nested(dir, address) \ + ((pte_t *)kmap_atomic(pmd_page(*(dir)),KM_PTE1) + pte_index(address)) #define pte_unmap(pte) kunmap_atomic(pte, KM_PTE0) #define pte_unmap_nested(pte) kunmap_atomic(pte, KM_PTE1) #else -- cgit v1.1 From ce6234b5298902aaec831a67d5f8d9bd2ef5a488 Mon Sep 17 00:00:00 2001 From: Jeremy Fitzhardinge Date: Wed, 2 May 2007 19:27:15 +0200 Subject: [PATCH] i386: PARAVIRT: add kmap_atomic_pte for mapping highpte pages Xen and VMI both have special requirements when mapping a highmem pte page into the kernel address space. These can be dealt with by adding a new kmap_atomic_pte() function for mapping highptes, and hooking it into the paravirt_ops infrastructure. Xen specifically wants to map the pte page RO, so this patch exposes a helper function, kmap_atomic_prot, which maps the page with the specified page protections. This also adds a kmap_flush_unused() function to clear out the cached kmap mappings. Xen needs this to clear out any potential stray RW mappings of pages which will become part of a pagetable. [ Zach - vmi.c will need some attention after this patch. It wasn't immediately obvious to me what needs to be done. ] Signed-off-by: Jeremy Fitzhardinge Signed-off-by: Andi Kleen Cc: Zachary Amsden --- include/asm-i386/highmem.h | 6 ++++++ include/asm-i386/paravirt.h | 15 +++++++++++++++ include/asm-i386/pgtable.h | 4 ++-- include/linux/highmem.h | 6 ++++++ 4 files changed, 29 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/asm-i386/highmem.h b/include/asm-i386/highmem.h index e9a34eb..13cdcd6 100644 --- a/include/asm-i386/highmem.h +++ b/include/asm-i386/highmem.h @@ -24,6 +24,7 @@ #include #include #include +#include /* declarations for highmem.c */ extern unsigned long highstart_pfn, highend_pfn; @@ -67,11 +68,16 @@ extern void FASTCALL(kunmap_high(struct page *page)); void *kmap(struct page *page); void kunmap(struct page *page); +void *kmap_atomic_prot(struct page *page, enum km_type type, pgprot_t prot); void *kmap_atomic(struct page *page, enum km_type type); void kunmap_atomic(void *kvaddr, enum km_type type); void *kmap_atomic_pfn(unsigned long pfn, enum km_type type); struct page *kmap_atomic_to_page(void *ptr); +#ifndef CONFIG_PARAVIRT +#define kmap_atomic_pte(page, type) kmap_atomic(page, type) +#endif + #define flush_cache_kmaps() do { } while (0) #endif /* __KERNEL__ */ diff --git a/include/asm-i386/paravirt.h b/include/asm-i386/paravirt.h index 10f44af..5048b41 100644 --- a/include/asm-i386/paravirt.h +++ b/include/asm-i386/paravirt.h @@ -16,7 +16,9 @@ #ifndef __ASSEMBLY__ #include #include +#include +struct page; struct thread_struct; struct Xgt_desc_struct; struct tss_struct; @@ -187,6 +189,10 @@ struct paravirt_ops pte_t (*ptep_get_and_clear)(pte_t *ptep); +#ifdef CONFIG_HIGHPTE + void *(*kmap_atomic_pte)(struct page *page, enum km_type type); +#endif + #ifdef CONFIG_X86_PAE void (*set_pte_atomic)(pte_t *ptep, pte_t pteval); void (*set_pte_present)(struct mm_struct *mm, unsigned long addr, pte_t *ptep, pte_t pte); @@ -884,6 +890,15 @@ static inline void paravirt_release_pd(unsigned pfn) PVOP_VCALL1(release_pd, pfn); } +#ifdef CONFIG_HIGHPTE +static inline void *kmap_atomic_pte(struct page *page, enum km_type type) +{ + unsigned long ret; + ret = PVOP_CALL2(unsigned long, kmap_atomic_pte, page, type); + return (void *)ret; +} +#endif + static inline void pte_update(struct mm_struct *mm, unsigned long addr, pte_t *ptep) { diff --git a/include/asm-i386/pgtable.h b/include/asm-i386/pgtable.h index 6599f2a..befc697 100644 --- a/include/asm-i386/pgtable.h +++ b/include/asm-i386/pgtable.h @@ -476,9 +476,9 @@ extern pte_t *lookup_address(unsigned long address); #if defined(CONFIG_HIGHPTE) #define pte_offset_map(dir, address) \ - ((pte_t *)kmap_atomic(pmd_page(*(dir)),KM_PTE0) + pte_index(address)) + ((pte_t *)kmap_atomic_pte(pmd_page(*(dir)),KM_PTE0) + pte_index(address)) #define pte_offset_map_nested(dir, address) \ - ((pte_t *)kmap_atomic(pmd_page(*(dir)),KM_PTE1) + pte_index(address)) + ((pte_t *)kmap_atomic_pte(pmd_page(*(dir)),KM_PTE1) + pte_index(address)) #define pte_unmap(pte) kunmap_atomic(pte, KM_PTE0) #define pte_unmap_nested(pte) kunmap_atomic(pte, KM_PTE1) #else diff --git a/include/linux/highmem.h b/include/linux/highmem.h index 645d440..bca8e2d 100644 --- a/include/linux/highmem.h +++ b/include/linux/highmem.h @@ -27,6 +27,8 @@ static inline void flush_kernel_dcache_page(struct page *page) unsigned int nr_free_highpages(void); extern unsigned long totalhigh_pages; +void kmap_flush_unused(void); + #else /* CONFIG_HIGHMEM */ static inline unsigned int nr_free_highpages(void) { return 0; } @@ -44,9 +46,13 @@ static inline void *kmap(struct page *page) #define kmap_atomic(page, idx) \ ({ pagefault_disable(); page_address(page); }) +#define kmap_atomic_prot(page, idx, prot) kmap_atomic(page, idx) + #define kunmap_atomic(addr, idx) do { pagefault_enable(); } while (0) #define kmap_atomic_pfn(pfn, idx) kmap_atomic(pfn_to_page(pfn), (idx)) #define kmap_atomic_to_page(ptr) virt_to_page(ptr) + +#define kmap_flush_unused() do {} while(0) #endif #endif /* CONFIG_HIGHMEM */ -- cgit v1.1 From 4e0fa85602a4fa219fc3a9c053d5140bf987d3e3 Mon Sep 17 00:00:00 2001 From: Jeremy Fitzhardinge Date: Wed, 2 May 2007 19:27:15 +0200 Subject: [PATCH] i386: PARAVIRT: Use enums for paravirt lazy flush modi Remove #defines, add enum for PARAVIRT_LAZY_FLUSH. Signed-off-by: Jeremy Fitzhardinge Signed-off-by: Andi Kleen --- include/asm-i386/paravirt.h | 7 +------ 1 file changed, 1 insertion(+), 6 deletions(-) (limited to 'include') diff --git a/include/asm-i386/paravirt.h b/include/asm-i386/paravirt.h index 5048b41..c545192 100644 --- a/include/asm-i386/paravirt.h +++ b/include/asm-i386/paravirt.h @@ -30,6 +30,7 @@ enum paravirt_lazy_mode { PARAVIRT_LAZY_NONE = 0, PARAVIRT_LAZY_MMU = 1, PARAVIRT_LAZY_CPU = 2, + PARAVIRT_LAZY_FLUSH = 3, }; struct paravirt_ops @@ -1036,12 +1037,6 @@ static inline pte_t raw_ptep_get_and_clear(pte_t *p) } #endif /* CONFIG_X86_PAE */ -/* Lazy mode for batching updates / context switch */ -#define PARAVIRT_LAZY_NONE 0 -#define PARAVIRT_LAZY_MMU 1 -#define PARAVIRT_LAZY_CPU 2 -#define PARAVIRT_LAZY_FLUSH 3 - #define __HAVE_ARCH_ENTER_LAZY_CPU_MODE static inline void arch_enter_lazy_cpu_mode(void) { -- cgit v1.1 From 1a45b7aaa5051489b46afbc48509bd91f8b4a1ba Mon Sep 17 00:00:00 2001 From: Jeremy Fitzhardinge Date: Wed, 2 May 2007 19:27:15 +0200 Subject: [PATCH] i386: PARAVIRT: Clean up paravirt patchable wrappers Replace all the open-coded macros for generating calls with a pair of more general macros (__PVOP_CALL/VCALL), and redefine all the PVOP_V?CALL[0-4] in terms of them. [ Andrew, Andi: this should slot in immediately after "Document asm-i386/paravirt.h" (paravirt_ops-document-asm-i386-paravirth.patch) ] Signed-off-by: Jeremy Fitzhardinge Signed-off-by: Andi Kleen Cc: Ingo Molnar --- include/asm-i386/paravirt.h | 248 +++++++++++--------------------------------- 1 file changed, 60 insertions(+), 188 deletions(-) (limited to 'include') diff --git a/include/asm-i386/paravirt.h b/include/asm-i386/paravirt.h index c545192..2ba1896 100644 --- a/include/asm-i386/paravirt.h +++ b/include/asm-i386/paravirt.h @@ -332,211 +332,81 @@ unsigned paravirt_patch_insns(void *site, unsigned len, * means that all uses must be wrapped in inline functions. This also * makes sure the incoming and outgoing types are always correct. */ -#define PVOP_CALL0(__rettype, __op) \ - ({ \ - __rettype __ret; \ - if (sizeof(__rettype) > sizeof(unsigned long)) { \ - unsigned long long __tmp; \ - unsigned long __ecx; \ - asm volatile(paravirt_alt(PARAVIRT_CALL) \ - : "=A" (__tmp), "=c" (__ecx) \ - : paravirt_type(__op), \ - paravirt_clobber(CLBR_ANY) \ - : "memory", "cc"); \ - __ret = (__rettype)__tmp; \ - } else { \ - unsigned long __tmp, __edx, __ecx; \ - asm volatile(paravirt_alt(PARAVIRT_CALL) \ - : "=a" (__tmp), "=d" (__edx), \ - "=c" (__ecx) \ - : paravirt_type(__op), \ - paravirt_clobber(CLBR_ANY) \ - : "memory", "cc"); \ - __ret = (__rettype)__tmp; \ - } \ - __ret; \ - }) -#define PVOP_VCALL0(__op) \ +#define __PVOP_CALL(rettype, op, pre, post, ...) \ ({ \ + rettype __ret; \ unsigned long __eax, __edx, __ecx; \ - asm volatile(paravirt_alt(PARAVIRT_CALL) \ - : "=a" (__eax), "=d" (__edx), "=c" (__ecx) \ - : paravirt_type(__op), \ - paravirt_clobber(CLBR_ANY) \ - : "memory", "cc"); \ - }) - -#define PVOP_CALL1(__rettype, __op, arg1) \ - ({ \ - __rettype __ret; \ - if (sizeof(__rettype) > sizeof(unsigned long)) { \ - unsigned long long __tmp; \ - unsigned long __ecx; \ - asm volatile(paravirt_alt(PARAVIRT_CALL) \ - : "=A" (__tmp), "=c" (__ecx) \ - : "a" ((u32)(arg1)), \ - paravirt_type(__op), \ - paravirt_clobber(CLBR_ANY) \ - : "memory", "cc"); \ - __ret = (__rettype)__tmp; \ - } else { \ - unsigned long __tmp, __edx, __ecx; \ - asm volatile(paravirt_alt(PARAVIRT_CALL) \ - : "=a" (__tmp), "=d" (__edx), \ - "=c" (__ecx) \ - : "0" ((u32)(arg1)), \ - paravirt_type(__op), \ - paravirt_clobber(CLBR_ANY) \ - : "memory", "cc"); \ - __ret = (__rettype)__tmp; \ - } \ - __ret; \ - }) -#define PVOP_VCALL1(__op, arg1) \ - ({ \ - unsigned long __eax, __edx, __ecx; \ - asm volatile(paravirt_alt(PARAVIRT_CALL) \ - : "=a" (__eax), "=d" (__edx), "=c" (__ecx) \ - : "0" ((u32)(arg1)), \ - paravirt_type(__op), \ - paravirt_clobber(CLBR_ANY) \ - : "memory", "cc"); \ - }) - -#define PVOP_CALL2(__rettype, __op, arg1, arg2) \ - ({ \ - __rettype __ret; \ - if (sizeof(__rettype) > sizeof(unsigned long)) { \ - unsigned long long __tmp; \ - unsigned long __ecx; \ - asm volatile(paravirt_alt(PARAVIRT_CALL) \ - : "=A" (__tmp), "=c" (__ecx) \ - : "a" ((u32)(arg1)), \ - "d" ((u32)(arg2)), \ - paravirt_type(__op), \ - paravirt_clobber(CLBR_ANY) \ - : "memory", "cc"); \ - __ret = (__rettype)__tmp; \ - } else { \ - unsigned long __tmp, __edx, __ecx; \ - asm volatile(paravirt_alt(PARAVIRT_CALL) \ - : "=a" (__tmp), "=d" (__edx), \ - "=c" (__ecx) \ - : "0" ((u32)(arg1)), \ - "1" ((u32)(arg2)), \ - paravirt_type(__op), \ - paravirt_clobber(CLBR_ANY) \ - : "memory", "cc"); \ - __ret = (__rettype)__tmp; \ - } \ - __ret; \ - }) -#define PVOP_VCALL2(__op, arg1, arg2) \ - ({ \ - unsigned long __eax, __edx, __ecx; \ - asm volatile(paravirt_alt(PARAVIRT_CALL) \ - : "=a" (__eax), "=d" (__edx), "=c" (__ecx) \ - : "0" ((u32)(arg1)), \ - "1" ((u32)(arg2)), \ - paravirt_type(__op), \ - paravirt_clobber(CLBR_ANY) \ - : "memory", "cc"); \ - }) - -#define PVOP_CALL3(__rettype, __op, arg1, arg2, arg3) \ - ({ \ - __rettype __ret; \ - if (sizeof(__rettype) > sizeof(unsigned long)) { \ - unsigned long long __tmp; \ - unsigned long __ecx; \ - asm volatile(paravirt_alt(PARAVIRT_CALL) \ - : "=A" (__tmp), "=c" (__ecx) \ - : "a" ((u32)(arg1)), \ - "d" ((u32)(arg2)), \ - "1" ((u32)(arg3)), \ - paravirt_type(__op), \ - paravirt_clobber(CLBR_ANY) \ - : "memory", "cc"); \ - __ret = (__rettype)__tmp; \ - } else { \ - unsigned long __tmp, __edx, __ecx; \ - asm volatile(paravirt_alt(PARAVIRT_CALL) \ - : "=a" (__tmp), "=d" (__edx), \ + if (sizeof(rettype) > sizeof(unsigned long)) { \ + asm volatile(pre \ + paravirt_alt(PARAVIRT_CALL) \ + post \ + : "=a" (__eax), "=d" (__edx), \ "=c" (__ecx) \ - : "0" ((u32)(arg1)), \ - "1" ((u32)(arg2)), \ - "2" ((u32)(arg3)), \ - paravirt_type(__op), \ - paravirt_clobber(CLBR_ANY) \ + : paravirt_type(op), \ + paravirt_clobber(CLBR_ANY), \ + ##__VA_ARGS__ \ : "memory", "cc"); \ - __ret = (__rettype)__tmp; \ - } \ - __ret; \ - }) -#define PVOP_VCALL3(__op, arg1, arg2, arg3) \ - ({ \ - unsigned long __eax, __edx, __ecx; \ - asm volatile(paravirt_alt(PARAVIRT_CALL) \ - : "=a" (__eax), "=d" (__edx), "=c" (__ecx) \ - : "0" ((u32)(arg1)), \ - "1" ((u32)(arg2)), \ - "2" ((u32)(arg3)), \ - paravirt_type(__op), \ - paravirt_clobber(CLBR_ANY) \ - : "memory", "cc"); \ - }) - -#define PVOP_CALL4(__rettype, __op, arg1, arg2, arg3, arg4) \ - ({ \ - __rettype __ret; \ - if (sizeof(__rettype) > sizeof(unsigned long)) { \ - unsigned long long __tmp; \ - unsigned long __ecx; \ - asm volatile("push %[_arg4]; " \ - paravirt_alt(PARAVIRT_CALL) \ - "lea 4(%%esp),%%esp" \ - : "=A" (__tmp), "=c" (__ecx) \ - : "a" ((u32)(arg1)), \ - "d" ((u32)(arg2)), \ - "1" ((u32)(arg3)), \ - [_arg4] "mr" ((u32)(arg4)), \ - paravirt_type(__op), \ - paravirt_clobber(CLBR_ANY) \ - : "memory", "cc",); \ - __ret = (__rettype)__tmp; \ + __ret = (rettype)((((u64)__edx) << 32) | __eax); \ } else { \ - unsigned long __tmp, __edx, __ecx; \ - asm volatile("push %[_arg4]; " \ + asm volatile(pre \ paravirt_alt(PARAVIRT_CALL) \ - "lea 4(%%esp),%%esp" \ - : "=a" (__tmp), "=d" (__edx), "=c" (__ecx) \ - : "0" ((u32)(arg1)), \ - "1" ((u32)(arg2)), \ - "2" ((u32)(arg3)), \ - [_arg4]"mr" ((u32)(arg4)), \ - paravirt_type(__op), \ - paravirt_clobber(CLBR_ANY) \ + post \ + : "=a" (__eax), "=d" (__edx), \ + "=c" (__ecx) \ + : paravirt_type(op), \ + paravirt_clobber(CLBR_ANY), \ + ##__VA_ARGS__ \ : "memory", "cc"); \ - __ret = (__rettype)__tmp; \ + __ret = (rettype)__eax; \ } \ __ret; \ }) -#define PVOP_VCALL4(__op, arg1, arg2, arg3, arg4) \ +#define __PVOP_VCALL(op, pre, post, ...) \ ({ \ unsigned long __eax, __edx, __ecx; \ - asm volatile("push %[_arg4]; " \ + asm volatile(pre \ paravirt_alt(PARAVIRT_CALL) \ - "lea 4(%%esp),%%esp" \ + post \ : "=a" (__eax), "=d" (__edx), "=c" (__ecx) \ - : "0" ((u32)(arg1)), \ - "1" ((u32)(arg2)), \ - "2" ((u32)(arg3)), \ - [_arg4]"mr" ((u32)(arg4)), \ - paravirt_type(__op), \ - paravirt_clobber(CLBR_ANY) \ + : paravirt_type(op), \ + paravirt_clobber(CLBR_ANY), \ + ##__VA_ARGS__ \ : "memory", "cc"); \ }) +#define PVOP_CALL0(rettype, op) \ + __PVOP_CALL(rettype, op, "", "") +#define PVOP_VCALL0(op) \ + __PVOP_VCALL(op, "", "") + +#define PVOP_CALL1(rettype, op, arg1) \ + __PVOP_CALL(rettype, op, "", "", "0" ((u32)(arg1))) +#define PVOP_VCALL1(op, arg1) \ + __PVOP_VCALL(op, "", "", "0" ((u32)(arg1))) + +#define PVOP_CALL2(rettype, op, arg1, arg2) \ + __PVOP_CALL(rettype, op, "", "", "0" ((u32)(arg1)), "1" ((u32)(arg2))) +#define PVOP_VCALL2(op, arg1, arg2) \ + __PVOP_VCALL(op, "", "", "0" ((u32)(arg1)), "1" ((u32)(arg2))) + +#define PVOP_CALL3(rettype, op, arg1, arg2, arg3) \ + __PVOP_CALL(rettype, op, "", "", "0" ((u32)(arg1)), \ + "1"((u32)(arg2)), "2"((u32)(arg3))) +#define PVOP_VCALL3(op, arg1, arg2, arg3) \ + __PVOP_VCALL(op, "", "", "0" ((u32)(arg1)), "1"((u32)(arg2)), \ + "2"((u32)(arg3))) + +#define PVOP_CALL4(rettype, op, arg1, arg2, arg3, arg4) \ + __PVOP_CALL(rettype, op, \ + "push %[_arg4];", "lea 4(%%esp),%%esp;", \ + "0" ((u32)(arg1)), "1" ((u32)(arg2)), \ + "2" ((u32)(arg3)), [_arg4] "mr" ((u32)(arg4))) +#define PVOP_VCALL4(op, arg1, arg2, arg3, arg4) \ + __PVOP_VCALL(op, \ + "push %[_arg4];", "lea 4(%%esp),%%esp;", \ + "0" ((u32)(arg1)), "1" ((u32)(arg2)), \ + "2" ((u32)(arg3)), [_arg4] "mr" ((u32)(arg4))) + static inline int paravirt_enabled(void) { return paravirt_ops.paravirt_enabled; @@ -1162,6 +1032,8 @@ static inline unsigned long __raw_local_irq_save(void) /* Make sure as little as possible of this mess escapes. */ #undef PARAVIRT_CALL +#undef __PVOP_CALL +#undef __PVOP_VCALL #undef PVOP_VCALL0 #undef PVOP_CALL0 #undef PVOP_VCALL1 -- cgit v1.1 From 4cdd9c8931767e1c56a51a1078d33a8c340f4405 Mon Sep 17 00:00:00 2001 From: Jeremy Fitzhardinge Date: Wed, 2 May 2007 19:27:15 +0200 Subject: [PATCH] i386: PARAVIRT: drop unused ptep_get_and_clear In shadow mode hypervisors, ptep_get_and_clear achieves the desired purpose of keeping the shadows in sync by issuing a native_get_and_clear, followed by a call to pte_update, which indicates the PTE has been modified. Direct mode hypervisors (Xen) have no need for this anyway, and will trap the update using writable pagetables. This means no hypervisor makes use of ptep_get_and_clear; there is no reason to have it in the paravirt-ops structure. Change confusing terminology about raw vs. native functions into consistent use of native_pte_xxx for operations which do not invoke paravirt-ops. Signed-off-by: Zachary Amsden Signed-off-by: Andi Kleen --- include/asm-i386/paravirt.h | 13 +------------ include/asm-i386/pgtable.h | 4 +--- 2 files changed, 2 insertions(+), 15 deletions(-) (limited to 'include') diff --git a/include/asm-i386/paravirt.h b/include/asm-i386/paravirt.h index 2ba1896..e2e7f98 100644 --- a/include/asm-i386/paravirt.h +++ b/include/asm-i386/paravirt.h @@ -188,8 +188,6 @@ struct paravirt_ops void (*pte_update_defer)(struct mm_struct *mm, unsigned long addr, pte_t *ptep); - pte_t (*ptep_get_and_clear)(pte_t *ptep); - #ifdef CONFIG_HIGHPTE void *(*kmap_atomic_pte)(struct page *page, enum km_type type); #endif @@ -859,12 +857,8 @@ static inline void pmd_clear(pmd_t *pmdp) PVOP_VCALL1(pmd_clear, pmdp); } -static inline pte_t raw_ptep_get_and_clear(pte_t *p) -{ - unsigned long long val = PVOP_CALL1(unsigned long long, ptep_get_and_clear, p); - return (pte_t) { val, val >> 32 }; -} #else /* !CONFIG_X86_PAE */ + static inline pte_t __pte(unsigned long val) { return (pte_t) { PVOP_CALL1(unsigned long, make_pte, val) }; @@ -900,11 +894,6 @@ static inline void set_pmd(pmd_t *pmdp, pmd_t pmdval) { PVOP_VCALL2(set_pmd, pmdp, pmdval.pud.pgd.pgd); } - -static inline pte_t raw_ptep_get_and_clear(pte_t *p) -{ - return (pte_t) { PVOP_CALL1(unsigned long, ptep_get_and_clear, p) }; -} #endif /* CONFIG_X86_PAE */ #define __HAVE_ARCH_ENTER_LAZY_CPU_MODE diff --git a/include/asm-i386/pgtable.h b/include/asm-i386/pgtable.h index befc697..e7ddd23 100644 --- a/include/asm-i386/pgtable.h +++ b/include/asm-i386/pgtable.h @@ -267,8 +267,6 @@ extern void vmalloc_sync_all(void); */ #define pte_update(mm, addr, ptep) do { } while (0) #define pte_update_defer(mm, addr, ptep) do { } while (0) - -#define raw_ptep_get_and_clear(xp) native_ptep_get_and_clear(xp) #endif /* @@ -335,7 +333,7 @@ do { \ #define __HAVE_ARCH_PTEP_GET_AND_CLEAR static inline pte_t ptep_get_and_clear(struct mm_struct *mm, unsigned long addr, pte_t *ptep) { - pte_t pte = raw_ptep_get_and_clear(ptep); + pte_t pte = native_ptep_get_and_clear(ptep); pte_update(mm, addr, ptep); return pte; } -- cgit v1.1 From 7a61d35d4b4056e7711031202da7605e052f4137 Mon Sep 17 00:00:00 2001 From: Jeremy Fitzhardinge Date: Wed, 2 May 2007 19:27:15 +0200 Subject: [PATCH] i386: Page-align the GDT Xen wants a dedicated page for the GDT. I believe VMI likes it too. lguest, KVM and native don't care. Simple transformation to page-aligned "struct gdt_page". Signed-off-by: Rusty Russell Signed-off-by: Andi Kleen Acked-by: Jeremy Fitzhardinge --- include/asm-i386/desc.h | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/asm-i386/desc.h b/include/asm-i386/desc.h index 4a97406..c547403 100644 --- a/include/asm-i386/desc.h +++ b/include/asm-i386/desc.h @@ -18,10 +18,15 @@ struct Xgt_desc_struct { unsigned short pad; } __attribute__ ((packed)); -DECLARE_PER_CPU(struct desc_struct, cpu_gdt[GDT_ENTRIES]); +struct gdt_page +{ + struct desc_struct gdt[GDT_ENTRIES]; +} __attribute__((aligned(PAGE_SIZE))); +DECLARE_PER_CPU(struct gdt_page, gdt_page); + static inline struct desc_struct *get_cpu_gdt_table(unsigned int cpu) { - return per_cpu(cpu_gdt, cpu); + return per_cpu(gdt_page, cpu).gdt; } extern struct Xgt_desc_struct idt_descr; -- cgit v1.1 From 7c3576d261ce046789a7db14f43303f8120910c7 Mon Sep 17 00:00:00 2001 From: Jeremy Fitzhardinge Date: Wed, 2 May 2007 19:27:16 +0200 Subject: [PATCH] i386: Convert PDA into the percpu section Currently x86 (similar to x84-64) has a special per-cpu structure called "i386_pda" which can be easily and efficiently referenced via the %fs register. An ELF section is more flexible than a structure, allowing any piece of code to use this area. Indeed, such a section already exists: the per-cpu area. So this patch: (1) Removes the PDA and uses per-cpu variables for each current member. (2) Replaces the __KERNEL_PDA segment with __KERNEL_PERCPU. (3) Creates a per-cpu mirror of __per_cpu_offset called this_cpu_off, which can be used to calculate addresses for this CPU's variables. (4) Simplifies startup, because %fs doesn't need to be loaded with a special segment at early boot; it can be deferred until the first percpu area is allocated (or never for UP). The result is less code and one less x86-specific concept. Signed-off-by: Rusty Russell Signed-off-by: Jeremy Fitzhardinge Signed-off-by: Andi Kleen Cc: Andi Kleen --- include/asm-i386/current.h | 5 +- include/asm-i386/irq_regs.h | 12 ++-- include/asm-i386/pda.h | 99 -------------------------------- include/asm-i386/percpu.h | 132 ++++++++++++++++++++++++++++++++++++++++--- include/asm-i386/processor.h | 2 +- include/asm-i386/segment.h | 6 +- include/asm-i386/smp.h | 4 +- 7 files changed, 139 insertions(+), 121 deletions(-) delete mode 100644 include/asm-i386/pda.h (limited to 'include') diff --git a/include/asm-i386/current.h b/include/asm-i386/current.h index 5252ee0..d352485 100644 --- a/include/asm-i386/current.h +++ b/include/asm-i386/current.h @@ -1,14 +1,15 @@ #ifndef _I386_CURRENT_H #define _I386_CURRENT_H -#include #include +#include struct task_struct; +DECLARE_PER_CPU(struct task_struct *, current_task); static __always_inline struct task_struct *get_current(void) { - return read_pda(pcurrent); + return x86_read_percpu(current_task); } #define current get_current() diff --git a/include/asm-i386/irq_regs.h b/include/asm-i386/irq_regs.h index a1b3f7f..3368b20 100644 --- a/include/asm-i386/irq_regs.h +++ b/include/asm-i386/irq_regs.h @@ -1,25 +1,27 @@ /* * Per-cpu current frame pointer - the location of the last exception frame on - * the stack, stored in the PDA. + * the stack, stored in the per-cpu area. * * Jeremy Fitzhardinge */ #ifndef _ASM_I386_IRQ_REGS_H #define _ASM_I386_IRQ_REGS_H -#include +#include + +DECLARE_PER_CPU(struct pt_regs *, irq_regs); static inline struct pt_regs *get_irq_regs(void) { - return read_pda(irq_regs); + return x86_read_percpu(irq_regs); } static inline struct pt_regs *set_irq_regs(struct pt_regs *new_regs) { struct pt_regs *old_regs; - old_regs = read_pda(irq_regs); - write_pda(irq_regs, new_regs); + old_regs = get_irq_regs(); + x86_write_percpu(irq_regs, new_regs); return old_regs; } diff --git a/include/asm-i386/pda.h b/include/asm-i386/pda.h deleted file mode 100644 index aef7f73..0000000 --- a/include/asm-i386/pda.h +++ /dev/null @@ -1,99 +0,0 @@ -/* - Per-processor Data Areas - Jeremy Fitzhardinge 2006 - Based on asm-x86_64/pda.h by Andi Kleen. - */ -#ifndef _I386_PDA_H -#define _I386_PDA_H - -#include -#include -#include - -struct i386_pda -{ - struct i386_pda *_pda; /* pointer to self */ - - int cpu_number; - struct task_struct *pcurrent; /* current process */ - struct pt_regs *irq_regs; -}; - -DECLARE_PER_CPU(struct i386_pda, _cpu_pda); -#define cpu_pda(i) (&per_cpu(_cpu_pda, (i))) -#define pda_offset(field) offsetof(struct i386_pda, field) - -extern void __bad_pda_field(void); - -/* This variable is never instantiated. It is only used as a stand-in - for the real per-cpu PDA memory, so that gcc can understand what - memory operations the inline asms() below are performing. This - eliminates the need to make the asms volatile or have memory - clobbers, so gcc can readily analyse them. */ -extern struct i386_pda _proxy_pda; - -#define pda_to_op(op,field,val) \ - do { \ - typedef typeof(_proxy_pda.field) T__; \ - if (0) { T__ tmp__; tmp__ = (val); } \ - switch (sizeof(_proxy_pda.field)) { \ - case 1: \ - asm(op "b %1,%%fs:%c2" \ - : "+m" (_proxy_pda.field) \ - :"ri" ((T__)val), \ - "i"(pda_offset(field))); \ - break; \ - case 2: \ - asm(op "w %1,%%fs:%c2" \ - : "+m" (_proxy_pda.field) \ - :"ri" ((T__)val), \ - "i"(pda_offset(field))); \ - break; \ - case 4: \ - asm(op "l %1,%%fs:%c2" \ - : "+m" (_proxy_pda.field) \ - :"ri" ((T__)val), \ - "i"(pda_offset(field))); \ - break; \ - default: __bad_pda_field(); \ - } \ - } while (0) - -#define pda_from_op(op,field) \ - ({ \ - typeof(_proxy_pda.field) ret__; \ - switch (sizeof(_proxy_pda.field)) { \ - case 1: \ - asm(op "b %%fs:%c1,%0" \ - : "=r" (ret__) \ - : "i" (pda_offset(field)), \ - "m" (_proxy_pda.field)); \ - break; \ - case 2: \ - asm(op "w %%fs:%c1,%0" \ - : "=r" (ret__) \ - : "i" (pda_offset(field)), \ - "m" (_proxy_pda.field)); \ - break; \ - case 4: \ - asm(op "l %%fs:%c1,%0" \ - : "=r" (ret__) \ - : "i" (pda_offset(field)), \ - "m" (_proxy_pda.field)); \ - break; \ - default: __bad_pda_field(); \ - } \ - ret__; }) - -/* Return a pointer to a pda field */ -#define pda_addr(field) \ - ((typeof(_proxy_pda.field) *)((unsigned char *)read_pda(_pda) + \ - pda_offset(field))) - -#define read_pda(field) pda_from_op("mov",field) -#define write_pda(field,val) pda_to_op("mov",field,val) -#define add_pda(field,val) pda_to_op("add",field,val) -#define sub_pda(field,val) pda_to_op("sub",field,val) -#define or_pda(field,val) pda_to_op("or",field,val) - -#endif /* _I386_PDA_H */ diff --git a/include/asm-i386/percpu.h b/include/asm-i386/percpu.h index a10e7c6..c5f12f0 100644 --- a/include/asm-i386/percpu.h +++ b/include/asm-i386/percpu.h @@ -1,9 +1,30 @@ #ifndef __ARCH_I386_PERCPU__ #define __ARCH_I386_PERCPU__ -#ifndef __ASSEMBLY__ -#include -#else +#ifdef __ASSEMBLY__ + +/* + * PER_CPU finds an address of a per-cpu variable. + * + * Args: + * var - variable name + * reg - 32bit register + * + * The resulting address is stored in the "reg" argument. + * + * Example: + * PER_CPU(cpu_gdt_descr, %ebx) + */ +#ifdef CONFIG_SMP +#define PER_CPU(var, reg) \ + movl %fs:per_cpu__this_cpu_off, reg; \ + addl $per_cpu__##var, reg +#else /* ! SMP */ +#define PER_CPU(var, reg) \ + movl $per_cpu__##var, reg; +#endif /* SMP */ + +#else /* ...!ASSEMBLY */ /* * PER_CPU finds an address of a per-cpu variable. @@ -18,14 +39,107 @@ * PER_CPU(cpu_gdt_descr, %ebx) */ #ifdef CONFIG_SMP -#define PER_CPU(var, cpu) \ - movl __per_cpu_offset(,cpu,4), cpu; \ - addl $per_cpu__##var, cpu; -#else /* ! SMP */ -#define PER_CPU(var, cpu) \ - movl $per_cpu__##var, cpu; +/* Same as generic implementation except for optimized local access. */ +#define __GENERIC_PER_CPU + +/* This is used for other cpus to find our section. */ +extern unsigned long __per_cpu_offset[]; + +/* Separate out the type, so (int[3], foo) works. */ +#define DECLARE_PER_CPU(type, name) extern __typeof__(type) per_cpu__##name +#define DEFINE_PER_CPU(type, name) \ + __attribute__((__section__(".data.percpu"))) __typeof__(type) per_cpu__##name + +/* We can use this directly for local CPU (faster). */ +DECLARE_PER_CPU(unsigned long, this_cpu_off); + +/* var is in discarded region: offset to particular copy we want */ +#define per_cpu(var, cpu) (*({ \ + extern int simple_indentifier_##var(void); \ + RELOC_HIDE(&per_cpu__##var, __per_cpu_offset[cpu]); })) + +#define __raw_get_cpu_var(var) (*({ \ + extern int simple_indentifier_##var(void); \ + RELOC_HIDE(&per_cpu__##var, x86_read_percpu(this_cpu_off)); \ +})) + +#define __get_cpu_var(var) __raw_get_cpu_var(var) + +/* A macro to avoid #include hell... */ +#define percpu_modcopy(pcpudst, src, size) \ +do { \ + unsigned int __i; \ + for_each_possible_cpu(__i) \ + memcpy((pcpudst)+__per_cpu_offset[__i], \ + (src), (size)); \ +} while (0) + +#define EXPORT_PER_CPU_SYMBOL(var) EXPORT_SYMBOL(per_cpu__##var) +#define EXPORT_PER_CPU_SYMBOL_GPL(var) EXPORT_SYMBOL_GPL(per_cpu__##var) + +/* fs segment starts at (positive) offset == __per_cpu_offset[cpu] */ +#define __percpu_seg "%%fs:" +#else /* !SMP */ +#include +#define __percpu_seg "" #endif /* SMP */ +/* For arch-specific code, we can use direct single-insn ops (they + * don't give an lvalue though). */ +extern void __bad_percpu_size(void); + +#define percpu_to_op(op,var,val) \ + do { \ + typedef typeof(var) T__; \ + if (0) { T__ tmp__; tmp__ = (val); } \ + switch (sizeof(var)) { \ + case 1: \ + asm(op "b %1,"__percpu_seg"%0" \ + : "+m" (var) \ + :"ri" ((T__)val)); \ + break; \ + case 2: \ + asm(op "w %1,"__percpu_seg"%0" \ + : "+m" (var) \ + :"ri" ((T__)val)); \ + break; \ + case 4: \ + asm(op "l %1,"__percpu_seg"%0" \ + : "+m" (var) \ + :"ri" ((T__)val)); \ + break; \ + default: __bad_percpu_size(); \ + } \ + } while (0) + +#define percpu_from_op(op,var) \ + ({ \ + typeof(var) ret__; \ + switch (sizeof(var)) { \ + case 1: \ + asm(op "b "__percpu_seg"%1,%0" \ + : "=r" (ret__) \ + : "m" (var)); \ + break; \ + case 2: \ + asm(op "w "__percpu_seg"%1,%0" \ + : "=r" (ret__) \ + : "m" (var)); \ + break; \ + case 4: \ + asm(op "l "__percpu_seg"%1,%0" \ + : "=r" (ret__) \ + : "m" (var)); \ + break; \ + default: __bad_percpu_size(); \ + } \ + ret__; }) + +#define x86_read_percpu(var) percpu_from_op("mov", per_cpu__##var) +#define x86_write_percpu(var,val) percpu_to_op("mov", per_cpu__##var, val) +#define x86_add_percpu(var,val) percpu_to_op("add", per_cpu__##var, val) +#define x86_sub_percpu(var,val) percpu_to_op("sub", per_cpu__##var, val) +#define x86_or_percpu(var,val) percpu_to_op("or", per_cpu__##var, val) #endif /* !__ASSEMBLY__ */ #endif /* __ARCH_I386_PERCPU__ */ diff --git a/include/asm-i386/processor.h b/include/asm-i386/processor.h index 9222604..ced2da8 100644 --- a/include/asm-i386/processor.h +++ b/include/asm-i386/processor.h @@ -377,7 +377,7 @@ struct thread_struct { .vm86_info = NULL, \ .sysenter_cs = __KERNEL_CS, \ .io_bitmap_ptr = NULL, \ - .fs = __KERNEL_PDA, \ + .fs = __KERNEL_PERCPU, \ } /* diff --git a/include/asm-i386/segment.h b/include/asm-i386/segment.h index 065f10b..07e7062 100644 --- a/include/asm-i386/segment.h +++ b/include/asm-i386/segment.h @@ -39,7 +39,7 @@ * 25 - APM BIOS support * * 26 - ESPFIX small SS - * 27 - PDA [ per-cpu private data area ] + * 27 - per-cpu [ offset to per-cpu data area ] * 28 - unused * 29 - unused * 30 - unused @@ -74,8 +74,8 @@ #define GDT_ENTRY_ESPFIX_SS (GDT_ENTRY_KERNEL_BASE + 14) #define __ESPFIX_SS (GDT_ENTRY_ESPFIX_SS * 8) -#define GDT_ENTRY_PDA (GDT_ENTRY_KERNEL_BASE + 15) -#define __KERNEL_PDA (GDT_ENTRY_PDA * 8) +#define GDT_ENTRY_PERCPU (GDT_ENTRY_KERNEL_BASE + 15) +#define __KERNEL_PERCPU (GDT_ENTRY_PERCPU * 8) #define GDT_ENTRY_DOUBLEFAULT_TSS 31 diff --git a/include/asm-i386/smp.h b/include/asm-i386/smp.h index 2d083cb..090abc1 100644 --- a/include/asm-i386/smp.h +++ b/include/asm-i386/smp.h @@ -8,7 +8,6 @@ #include #include #include -#include #endif #if defined(CONFIG_X86_LOCAL_APIC) && !defined(__ASSEMBLY__) @@ -112,7 +111,8 @@ do { } while (0) * from the initial startup. We map APIC_BASE very early in page_setup(), * so this is correct in the x86 case. */ -#define raw_smp_processor_id() (read_pda(cpu_number)) +DECLARE_PER_CPU(int, cpu_number); +#define raw_smp_processor_id() (x86_read_percpu(cpu_number)) extern cpumask_t cpu_callout_map; extern cpumask_t cpu_callin_map; -- cgit v1.1 From 978c038ec944e4f2c940b0975c6acb433203a9be Mon Sep 17 00:00:00 2001 From: Jeremy Fitzhardinge Date: Wed, 2 May 2007 19:27:16 +0200 Subject: [PATCH] i386: cleanups to help using per-cpu variables from asm This patch does a few small cleanups: - use PER_CPU_NAME to generate the names of per-cpu variables - use lea to add the per_cpu offset in PER_CPU(), because it doesn't affect condition flags - add PER_CPU_VAR which allows direct access to pre-cpu variables with the %fs: prefix on SMP. Signed-off-by: Jeremy Fitzhardinge Signed-off-by: Andi Kleen Cc: Rusty Russell Cc: Andi Kleen --- include/asm-i386/percpu.h | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) (limited to 'include') diff --git a/include/asm-i386/percpu.h b/include/asm-i386/percpu.h index c5f12f0..cdcb63d 100644 --- a/include/asm-i386/percpu.h +++ b/include/asm-i386/percpu.h @@ -16,12 +16,14 @@ * PER_CPU(cpu_gdt_descr, %ebx) */ #ifdef CONFIG_SMP -#define PER_CPU(var, reg) \ - movl %fs:per_cpu__this_cpu_off, reg; \ - addl $per_cpu__##var, reg +#define PER_CPU(var, reg) \ + movl %fs:per_cpu__##this_cpu_off, reg; \ + lea per_cpu__##var(reg), reg +#define PER_CPU_VAR(var) %fs:per_cpu__##var #else /* ! SMP */ -#define PER_CPU(var, reg) \ - movl $per_cpu__##var, reg; +#define PER_CPU(var, reg) \ + movl $per_cpu__##var, reg +#define PER_CPU_VAR(var) per_cpu__##var #endif /* SMP */ #else /* ...!ASSEMBLY */ -- cgit v1.1 From 1956c73bb5bf81ee577ed7d3c64e3cad876ad2a5 Mon Sep 17 00:00:00 2001 From: Jeremy Fitzhardinge Date: Wed, 2 May 2007 19:27:16 +0200 Subject: [PATCH] i386: Define per_cpu_offset Define per_cpu_offset in asm-i386/percpu.h when SMP defined, like asm-generic/percpu.h does for UP. Signed-off-by: Jeremy Fitzhardinge Signed-off-by: Andi Kleen Cc: Rusty Russell Cc: Andi Kleen --- include/asm-i386/percpu.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include') diff --git a/include/asm-i386/percpu.h b/include/asm-i386/percpu.h index cdcb63d..f54830b 100644 --- a/include/asm-i386/percpu.h +++ b/include/asm-i386/percpu.h @@ -47,6 +47,8 @@ /* This is used for other cpus to find our section. */ extern unsigned long __per_cpu_offset[]; +#define per_cpu_offset(x) (__per_cpu_offset[x]) + /* Separate out the type, so (int[3], foo) works. */ #define DECLARE_PER_CPU(type, name) extern __typeof__(type) per_cpu__##name #define DEFINE_PER_CPU(type, name) \ -- cgit v1.1 From c5413fbe894924ddb8aa474a4d4da52e7a6c7e0b Mon Sep 17 00:00:00 2001 From: Jeremy Fitzhardinge Date: Wed, 2 May 2007 19:27:16 +0200 Subject: [PATCH] i386: Fix UP gdt bugs Fixes two problems with the GDT when compiling for uniprocessor: - There's no percpu segment, so trying to load its selector into %fs fails. Use a null selector instead. - The real gdt needs to be loaded at some point. Do it in cpu_init(). Signed-off-by: Chris Wright Signed-off-by: Jeremy Fitzhardinge Signed-off-by: Andi Kleen Cc: Rusty Russell --- include/asm-i386/processor.h | 1 + include/asm-i386/segment.h | 4 ++++ 2 files changed, 5 insertions(+) (limited to 'include') diff --git a/include/asm-i386/processor.h b/include/asm-i386/processor.h index ced2da8..70f3515 100644 --- a/include/asm-i386/processor.h +++ b/include/asm-i386/processor.h @@ -750,6 +750,7 @@ extern void enable_sep_cpu(void); extern int sysenter_setup(void); extern void cpu_set_gdt(int); +extern void switch_to_new_gdt(void); extern void cpu_init(void); extern int force_mwait; diff --git a/include/asm-i386/segment.h b/include/asm-i386/segment.h index 07e7062..597a47c 100644 --- a/include/asm-i386/segment.h +++ b/include/asm-i386/segment.h @@ -75,7 +75,11 @@ #define __ESPFIX_SS (GDT_ENTRY_ESPFIX_SS * 8) #define GDT_ENTRY_PERCPU (GDT_ENTRY_KERNEL_BASE + 15) +#ifdef CONFIG_SMP #define __KERNEL_PERCPU (GDT_ENTRY_PERCPU * 8) +#else +#define __KERNEL_PERCPU 0 +#endif #define GDT_ENTRY_DOUBLEFAULT_TSS 31 -- cgit v1.1 From 57decbda6a2a7c400b2a3b3b12e52ccbdc977118 Mon Sep 17 00:00:00 2001 From: Jeremy Fitzhardinge Date: Wed, 2 May 2007 19:27:16 +0200 Subject: [PATCH] x86: update for i386 and x86-64 check_bugs Remove spurious comments, headers and keywords from x86-64 bugs.[ch]. Use identify_boot_cpu() AK: merged with other patch Signed-off-by: Jeremy Fitzhardinge Signed-off-by: Andi Kleen --- include/asm-i386/bugs.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/asm-i386/bugs.h b/include/asm-i386/bugs.h index df539b3..d28979f 100644 --- a/include/asm-i386/bugs.h +++ b/include/asm-i386/bugs.h @@ -7,6 +7,6 @@ #ifndef _ASM_I386_BUG_H #define _ASM_I386_BUG_H -extern void __init check_bugs(void); +void check_bugs(void); #endif /* _ASM_I386_BUG_H */ -- cgit v1.1 From e0bb8643974397a8d36670e06e6a54bb84f3289f Mon Sep 17 00:00:00 2001 From: Zachary Amsden Date: Wed, 2 May 2007 19:27:16 +0200 Subject: [PATCH] i386: Convert VMI timer to use clock events Convert VMI timer to use clock events, making it properly able to use the NO_HZ infrastructure. On UP systems, with no local APIC, we just continue to route these events through the PIT. On systems with a local APIC, or SMP, we provide a single source interrupt chip which creates the local timer IRQ. It actually gets delivered by the APIC hardware, but we don't want to use the same local APIC clocksource processing, so we create our own handler here. Signed-off-by: Zachary Amsden Signed-off-by: Andi Kleen CC: Dan Hecht CC: Ingo Molnar CC: Thomas Gleixner --- include/asm-i386/vmi_time.h | 18 ++---------------- 1 file changed, 2 insertions(+), 16 deletions(-) (limited to 'include') diff --git a/include/asm-i386/vmi_time.h b/include/asm-i386/vmi_time.h index c3a1fcf..213930b 100644 --- a/include/asm-i386/vmi_time.h +++ b/include/asm-i386/vmi_time.h @@ -53,22 +53,8 @@ extern unsigned long long vmi_get_sched_cycles(void); extern unsigned long vmi_cpu_khz(void); #ifdef CONFIG_X86_LOCAL_APIC -extern void __init vmi_timer_setup_boot_alarm(void); -extern void __devinit vmi_timer_setup_secondary_alarm(void); -extern void apic_vmi_timer_interrupt(void); -#endif - -#ifdef CONFIG_NO_IDLE_HZ -extern int vmi_stop_hz_timer(void); -extern void vmi_account_time_restart_hz_timer(void); -#else -static inline int vmi_stop_hz_timer(void) -{ - return 0; -} -static inline void vmi_account_time_restart_hz_timer(void) -{ -} +extern void __devinit vmi_time_bsp_init(void); +extern void __devinit vmi_time_ap_init(void); #endif /* -- cgit v1.1 From 441d40dca024deb305a5e3d5003e8cd9d364d10f Mon Sep 17 00:00:00 2001 From: Jeremy Fitzhardinge Date: Wed, 2 May 2007 19:27:16 +0200 Subject: [PATCH] x86: PARAVIRT: Jeremy Fitzhardinge The other symbols used to delineate the alt-instructions sections have the form __foo/__foo_end. Rename parainstructions to match. Signed-off-by: Jeremy Fitzhardinge Signed-off-by: Andi Kleen Cc: Andi Kleen Cc: Rusty Russell Signed-off-by: Andrew Morton --- include/asm-i386/alternative.h | 4 ++-- include/asm-x86_64/alternative.h | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) (limited to 'include') diff --git a/include/asm-i386/alternative.h b/include/asm-i386/alternative.h index 5b59d07..2774673 100644 --- a/include/asm-i386/alternative.h +++ b/include/asm-i386/alternative.h @@ -124,8 +124,8 @@ static inline void apply_paravirt(struct paravirt_patch_site *start, struct paravirt_patch_site *end) {} -#define __start_parainstructions NULL -#define __stop_parainstructions NULL +#define __parainstructions NULL +#define __parainstructions_end NULL #endif #endif /* _I386_ALTERNATIVE_H */ diff --git a/include/asm-x86_64/alternative.h b/include/asm-x86_64/alternative.h index 67ebea3..a09fe85 100644 --- a/include/asm-x86_64/alternative.h +++ b/include/asm-x86_64/alternative.h @@ -142,8 +142,8 @@ void apply_paravirt(struct paravirt_patch *start, struct paravirt_patch *end); static inline void apply_paravirt(struct paravirt_patch *start, struct paravirt_patch *end) {} -#define __start_parainstructions NULL -#define __stop_parainstructions NULL +#define __parainstructions NULL +#define __parainstructions_end NULL #endif #endif /* _X86_64_ALTERNATIVE_H */ -- cgit v1.1 From 03df4f6ee997589a84d5f9492c6419183724c710 Mon Sep 17 00:00:00 2001 From: Jeremy Fitzhardinge Date: Wed, 2 May 2007 19:27:17 +0200 Subject: [PATCH] i386: Clean up ELF note generation Three cleanups: 1: ELF notes are never mapped, so there's no need to have any access flags in their phdr. 2: When generating them from asm, tell the assembler to use a SHT_NOTE section type. There doesn't seem to be a way to do this from C. 3: Use ANSI rather than traditional cpp behaviour to stringify the macro argument. Signed-off-by: Jeremy Fitzhardinge Signed-off-by: Andi Kleen Cc: Eric W. Biederman --- include/asm-generic/vmlinux.lds.h | 2 +- include/linux/elfnote.h | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) (limited to 'include') diff --git a/include/asm-generic/vmlinux.lds.h b/include/asm-generic/vmlinux.lds.h index 9fcc8d9..f3806a74 100644 --- a/include/asm-generic/vmlinux.lds.h +++ b/include/asm-generic/vmlinux.lds.h @@ -208,7 +208,7 @@ } #define NOTES \ - .notes : { *(.note.*) } :note + .notes : { *(.note.*) } :note #define INITCALLS \ *(.initcall0.init) \ diff --git a/include/linux/elfnote.h b/include/linux/elfnote.h index 67396db..9a1e067 100644 --- a/include/linux/elfnote.h +++ b/include/linux/elfnote.h @@ -39,12 +39,12 @@ * ELFNOTE(XYZCo, 12, .long, 0xdeadbeef) */ #define ELFNOTE(name, type, desctype, descdata) \ -.pushsection .note.name ; \ +.pushsection .note.name, "",@note ; \ .align 4 ; \ .long 2f - 1f /* namesz */ ; \ .long 4f - 3f /* descsz */ ; \ .long type ; \ -1:.asciz "name" ; \ +1:.asciz #name ; \ 2:.align 4 ; \ 3:desctype descdata ; \ 4:.align 4 ; \ -- cgit v1.1 From 856f44ff4af6e57fdc39a8b2bec498c88438bd27 Mon Sep 17 00:00:00 2001 From: Andi Kleen Date: Wed, 2 May 2007 19:27:17 +0200 Subject: [PATCH] x86-64: Move mtrr prototypes from proto.h to mtrr.h Signed-off-by: Andi Kleen --- include/asm-x86_64/mtrr.h | 8 ++++++++ include/asm-x86_64/proto.h | 7 ------- 2 files changed, 8 insertions(+), 7 deletions(-) (limited to 'include') diff --git a/include/asm-x86_64/mtrr.h b/include/asm-x86_64/mtrr.h index d6135b2..66809ec 100644 --- a/include/asm-x86_64/mtrr.h +++ b/include/asm-x86_64/mtrr.h @@ -135,6 +135,14 @@ struct mtrr_gentry32 #endif /* CONFIG_COMPAT */ +#ifdef CONFIG_MTRR +extern void mtrr_ap_init(void); +extern void mtrr_bp_init(void); +#else +#define mtrr_ap_init() do {} while (0) +#define mtrr_bp_init() do {} while (0) +#endif + #endif /* __KERNEL__ */ #endif /* _LINUX_MTRR_H */ diff --git a/include/asm-x86_64/proto.h b/include/asm-x86_64/proto.h index 98063bc..85255db 100644 --- a/include/asm-x86_64/proto.h +++ b/include/asm-x86_64/proto.h @@ -14,13 +14,6 @@ extern void pda_init(int); extern void early_idt_handler(void); extern void mcheck_init(struct cpuinfo_x86 *c); -#ifdef CONFIG_MTRR -extern void mtrr_ap_init(void); -extern void mtrr_bp_init(void); -#else -#define mtrr_ap_init() do {} while (0) -#define mtrr_bp_init() do {} while (0) -#endif extern void init_memory_mapping(unsigned long start, unsigned long end); extern void system_call(void); -- cgit v1.1 From 2b3b4835c94226681c496de9446d456dcf42ed08 Mon Sep 17 00:00:00 2001 From: Bernhard Kaindl Date: Wed, 2 May 2007 19:27:17 +0200 Subject: [PATCH] x86: Adds mtrr_save_fixed_ranges() for use in two later patches. In this current implementation which is used in other patches, mtrr_save_fixed_ranges() accepts a dummy void pointer because in the current implementation of one of these patches, this function may be called from smp_call_function_single() which requires that this function takes a void pointer argument. This function calls get_fixed_ranges(), passing mtrr_state.fixed_ranges which is the element of the static struct which stores our current backup of the fixed-range MTRR values which all CPUs shall be using. Because mtrr_save_fixed_ranges calls get_fixed_ranges after kernel initialisation time, __init needs to be removed from the declaration of get_fixed_ranges(). If CONFIG_MTRR is not set, we define mtrr_save_fixed_ranges as an empty statement because there is nothing to do. AK: Moved prototypes for x86-64 around to fix warnings Signed-off-by: Bernhard Kaindl Signed-off-by: Andi Kleen Cc: Andrew Morton Cc: Andi Kleen Cc: Dave Jones --- include/asm-i386/mtrr.h | 2 ++ include/asm-x86_64/mtrr.h | 2 ++ 2 files changed, 4 insertions(+) (limited to 'include') diff --git a/include/asm-i386/mtrr.h b/include/asm-i386/mtrr.h index 07f063a..02a41b9 100644 --- a/include/asm-i386/mtrr.h +++ b/include/asm-i386/mtrr.h @@ -69,6 +69,7 @@ struct mtrr_gentry /* The following functions are for use by other drivers */ # ifdef CONFIG_MTRR +extern void mtrr_save_fixed_ranges(void *); extern int mtrr_add (unsigned long base, unsigned long size, unsigned int type, char increment); extern int mtrr_add_page (unsigned long base, unsigned long size, @@ -79,6 +80,7 @@ extern void mtrr_centaur_report_mcr(int mcr, u32 lo, u32 hi); extern void mtrr_ap_init(void); extern void mtrr_bp_init(void); # else +#define mtrr_save_fixed_ranges(arg) do {} while (0) static __inline__ int mtrr_add (unsigned long base, unsigned long size, unsigned int type, char increment) { diff --git a/include/asm-x86_64/mtrr.h b/include/asm-x86_64/mtrr.h index 66809ec..1b326cb 100644 --- a/include/asm-x86_64/mtrr.h +++ b/include/asm-x86_64/mtrr.h @@ -138,9 +138,11 @@ struct mtrr_gentry32 #ifdef CONFIG_MTRR extern void mtrr_ap_init(void); extern void mtrr_bp_init(void); +extern void mtrr_save_fixed_ranges(void *); #else #define mtrr_ap_init() do {} while (0) #define mtrr_bp_init() do {} while (0) +#define mtrr_save_fixed_ranges(arg) do {} while (0) #endif #endif /* __KERNEL__ */ -- cgit v1.1 From 2b1f6278d77c1f2f669346fc2bb48012b5e9495a Mon Sep 17 00:00:00 2001 From: Bernhard Kaindl Date: Wed, 2 May 2007 19:27:17 +0200 Subject: [PATCH] x86: Save the MTRRs of the BSP before booting an AP Applied fix by Andew Morton: http://lkml.org/lkml/2007/4/8/88 - Fix `make headers_check'. AMD and Intel x86 CPU manuals state that it is the responsibility of system software to initialize and maintain MTRR consistency across all processors in Multi-Processing Environments. Quote from page 188 of the AMD64 System Programming manual (Volume 2): 7.6.5 MTRRs in Multi-Processing Environments "In multi-processing environments, the MTRRs located in all processors must characterize memory in the same way. Generally, this means that identical values are written to the MTRRs used by the processors." (short omission here) "Failure to do so may result in coherency violations or loss of atomicity. Processor implementations do not check the MTRR settings in other processors to ensure consistency. It is the responsibility of system software to initialize and maintain MTRR consistency across all processors." Current Linux MTRR code already implements the above in the case that the BIOS does not properly initialize MTRRs on the secondary processors, but the case where the fixed-range MTRRs of the boot processor are changed after Linux started to boot, before the initialsation of a secondary processor, is not handled yet. In this case, secondary processors are currently initialized by Linux with MTRRs which the boot processor had very early, when mtrr_bp_init() did run, but not with the MTRRs which the boot processor uses at the time when that secondary processors is actually booted, causing differing MTRR contents on the secondary processors. Such situation happens on Acer Ferrari 1000 and 5000 notebooks where the BIOS enables and sets AMD-specific IORR bits in the fixed-range MTRRs of the boot processor when it transitions the system into ACPI mode. The SMI handler of the BIOS does this in SMM, entered while Linux ACPI code runs acpi_enable(). Other occasions where the SMI handler of the BIOS may change bits in the MTRRs could occur as well. To initialize newly booted secodary processors with the fixed-range MTRRs which the boot processor uses at that time, this patch saves the fixed-range MTRRs of the boot processor before new secondary processors are started. When the secondary processors run their Linux initialisation code, their fixed-range MTRRs will be updated with the saved fixed-range MTRRs. If CONFIG_MTRR is not set, we define mtrr_save_state as an empty statement because there is nothing to do. Possible TODOs: *) CPU-hotplugging outside of SMP suspend/resume is not yet tested with this patch. *) If, even in this case, an AP never runs i386/do_boot_cpu or x86_64/cpu_up, then the calls to mtrr_save_state() could be replaced by calls to mtrr_save_fixed_ranges(NULL) and mtrr_save_state() would not be needed. That would need either verification of the CPU-hotplug code or at least a test on a >2 CPU machine. *) The MTRRs of other running processors are not yet checked at this time but it might be interesting to syncronize the MTTRs of all processors before booting. That would be an incremental patch, but of rather low priority since there is no machine known so far which would require this. AK: moved prototypes on x86-64 around to fix warnings Signed-off-by: Bernhard Kaindl Signed-off-by: Andrew Morton Signed-off-by: Andi Kleen Cc: Andi Kleen Cc: Dave Jones --- include/asm-i386/mtrr.h | 2 ++ include/asm-x86_64/mtrr.h | 2 ++ 2 files changed, 4 insertions(+) (limited to 'include') diff --git a/include/asm-i386/mtrr.h b/include/asm-i386/mtrr.h index 02a41b9..7e9c7cc 100644 --- a/include/asm-i386/mtrr.h +++ b/include/asm-i386/mtrr.h @@ -70,6 +70,7 @@ struct mtrr_gentry /* The following functions are for use by other drivers */ # ifdef CONFIG_MTRR extern void mtrr_save_fixed_ranges(void *); +extern void mtrr_save_state(void); extern int mtrr_add (unsigned long base, unsigned long size, unsigned int type, char increment); extern int mtrr_add_page (unsigned long base, unsigned long size, @@ -81,6 +82,7 @@ extern void mtrr_ap_init(void); extern void mtrr_bp_init(void); # else #define mtrr_save_fixed_ranges(arg) do {} while (0) +#define mtrr_save_state() do {} while (0) static __inline__ int mtrr_add (unsigned long base, unsigned long size, unsigned int type, char increment) { diff --git a/include/asm-x86_64/mtrr.h b/include/asm-x86_64/mtrr.h index 1b326cb..b557c48 100644 --- a/include/asm-x86_64/mtrr.h +++ b/include/asm-x86_64/mtrr.h @@ -139,10 +139,12 @@ struct mtrr_gentry32 extern void mtrr_ap_init(void); extern void mtrr_bp_init(void); extern void mtrr_save_fixed_ranges(void *); +extern void mtrr_save_state(void); #else #define mtrr_ap_init() do {} while (0) #define mtrr_bp_init() do {} while (0) #define mtrr_save_fixed_ranges(arg) do {} while (0) +#define mtrr_save_state() do {} while (0) #endif #endif /* __KERNEL__ */ -- cgit v1.1 From de938c51d5fec4ae03af64b06beb15d4423ec611 Mon Sep 17 00:00:00 2001 From: Bernhard Kaindl Date: Wed, 2 May 2007 19:27:17 +0200 Subject: [PATCH] i386: Enable support for fixed-range IORRs to keep RdMem & WrMem in sync If our copy of the MTRRs of the BSP has RdMem or WrMem set, and we are running on an AMD64/K8 system, the boot CPU must have had MtrrFixDramEn and MtrrFixDramModEn set (otherwise our RDMSR would have copied these bits cleared), so we set them on this CPU as well. This allows us to keep the AMD64/K8 RdMem and WrMem bits in sync across the CPUs of SMP systems in order to fullfill the duty of system software to "initialize and maintain MTRR consistency across all processors." as written in the AMD and Intel manuals. If an WRMSR instruction fails because MtrrFixDramModEn is not set, I expect that also the Intel-style MTRR bits are not updated. AK: minor cleanup, moved MSR defines around Signed-off-by: Bernhard Kaindl Signed-off-by: Andi Kleen Cc: Andrew Morton Cc: Andi Kleen Cc: Dave Jones --- include/asm-i386/msr-index.h | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'include') diff --git a/include/asm-i386/msr-index.h b/include/asm-i386/msr-index.h index f119080..a02eb29 100644 --- a/include/asm-i386/msr-index.h +++ b/include/asm-i386/msr-index.h @@ -87,6 +87,11 @@ #define MSR_K7_CLK_CTL 0xc001001b #define MSR_K8_TOP_MEM2 0xc001001d #define MSR_K8_SYSCFG 0xc0010010 + +#define K8_MTRRFIXRANGE_DRAM_ENABLE 0x00040000 /* MtrrFixDramEn bit */ +#define K8_MTRRFIXRANGE_DRAM_MODIFY 0x00080000 /* MtrrFixDramModEn bit */ +#define K8_MTRR_RDMEM_WRMEM_MASK 0x18181818 /* Mask: RdMem|WrMem */ + #define MSR_K7_HWCR 0xc0010015 #define MSR_K8_HWCR 0xc0010015 #define MSR_K7_FID_VID_CTL 0xc0010041 -- cgit v1.1 From f2b218dd6199983b120a96bc6531c1b81f4090d8 Mon Sep 17 00:00:00 2001 From: Fernando Luis VazquezCao Date: Wed, 2 May 2007 19:27:17 +0200 Subject: [PATCH] i386: safe_apic_wait_icr_idle - i386 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit apic_wait_icr_idle looks like this: static __inline__ void apic_wait_icr_idle(void) { while (apic_read(APIC_ICR) & APIC_ICR_BUSY) cpu_relax(); } The busy loop in this function would not be problematic if the corresponding status bit in the ICR were always updated, but that does not seem to be the case under certain crash scenarios. Kdump uses an IPI to stop the other CPUs in the event of a crash, but when any of the other CPUs are locked-up inside the NMI handler the CPU that sends the IPI will end up looping forever in the ICR check, effectively hard-locking the whole system. Quoting from Intel's "MultiProcessor Specification" (Version 1.4), B-3: "A local APIC unit indicates successful dispatch of an IPI by resetting the Delivery Status bit in the Interrupt Command Register (ICR). The operating system polls the delivery status bit after sending an INIT or STARTUP IPI until the command has been dispatched. A period of 20 microseconds should be sufficient for IPI dispatch to complete under normal operating conditions. If the IPI is not successfully dispatched, the operating system can abort the command. Alternatively, the operating system can retry the IPI by writing the lower 32-bit double word of the ICR. This “time-out” mechanism can be implemented through an external interrupt, if interrupts are enabled on the processor, or through execution of an instruction or time-stamp counter spin loop." Intel's documentation suggests the implementation of a time-out mechanism, which, by the way, is already being open-coded in some parts of the kernel that tinker with ICR. Create a apic_wait_icr_idle replacement that implements the time-out mechanism and that can be used to solve the aforementioned problem. AK: moved both functions out of line AK: added improved loop from Keith Owens Signed-off-by: Fernando Luis Vazquez Cao Signed-off-by: Andi Kleen --- include/asm-i386/apic.h | 9 +++------ 1 file changed, 3 insertions(+), 6 deletions(-) (limited to 'include') diff --git a/include/asm-i386/apic.h b/include/asm-i386/apic.h index a19810a..1e8f6f2 100644 --- a/include/asm-i386/apic.h +++ b/include/asm-i386/apic.h @@ -2,6 +2,7 @@ #define __ASM_APIC_H #include +#include #include #include #include @@ -64,12 +65,8 @@ static __inline fastcall unsigned long native_apic_read(unsigned long reg) return *((volatile unsigned long *)(APIC_BASE+reg)); } -static __inline__ void apic_wait_icr_idle(void) -{ - while ( apic_read( APIC_ICR ) & APIC_ICR_BUSY ) - cpu_relax(); -} - +void apic_wait_icr_idle(void); +unsigned long safe_apic_wait_icr_idle(void); int get_physical_broadcast(void); #ifdef CONFIG_X86_GOOD_APIC -- cgit v1.1 From 8339e9fba33aa3205f541478c413982c0ac5a37f Mon Sep 17 00:00:00 2001 From: Fernando Luis VazquezCao Date: Wed, 2 May 2007 19:27:17 +0200 Subject: [PATCH] x86-64: safe_apic_wait_icr_idle - x86_64 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit apic_wait_icr_idle looks like this: static __inline__ void apic_wait_icr_idle(void) { while (apic_read(APIC_ICR) & APIC_ICR_BUSY) cpu_relax(); } The busy loop in this function would not be problematic if the corresponding status bit in the ICR were always updated, but that does not seem to be the case under certain crash scenarios. Kdump uses an IPI to stop the other CPUs in the event of a crash, but when any of the other CPUs are locked-up inside the NMI handler the CPU that sends the IPI will end up looping forever in the ICR check, effectively hard-locking the whole system. Quoting from Intel's "MultiProcessor Specification" (Version 1.4), B-3: "A local APIC unit indicates successful dispatch of an IPI by resetting the Delivery Status bit in the Interrupt Command Register (ICR). The operating system polls the delivery status bit after sending an INIT or STARTUP IPI until the command has been dispatched. A period of 20 microseconds should be sufficient for IPI dispatch to complete under normal operating conditions. If the IPI is not successfully dispatched, the operating system can abort the command. Alternatively, the operating system can retry the IPI by writing the lower 32-bit double word of the ICR. This “time-out” mechanism can be implemented through an external interrupt, if interrupts are enabled on the processor, or through execution of an instruction or time-stamp counter spin loop." Intel's documentation suggests the implementation of a time-out mechanism, which, by the way, is already being open-coded in some parts of the kernel that tinker with ICR. Create a apic_wait_icr_idle replacement that implements the time-out mechanism and that can be used to solve the aforementioned problem. AK: moved both functions out of line AK: Added improved loop from Keith Owens Signed-off-by: Fernando Luis Vazquez Cao Signed-off-by: Andi Kleen --- include/asm-x86_64/apic.h | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) (limited to 'include') diff --git a/include/asm-x86_64/apic.h b/include/asm-x86_64/apic.h index 2f3b013..45e9fca 100644 --- a/include/asm-x86_64/apic.h +++ b/include/asm-x86_64/apic.h @@ -2,6 +2,7 @@ #define __ASM_APIC_H #include +#include #include #include #include @@ -47,11 +48,8 @@ static __inline unsigned int apic_read(unsigned long reg) return *((volatile unsigned int *)(APIC_BASE+reg)); } -static __inline__ void apic_wait_icr_idle(void) -{ - while (apic_read( APIC_ICR ) & APIC_ICR_BUSY) - cpu_relax(); -} +extern void apic_wait_icr_idle(void); +extern unsigned int safe_apic_wait_icr_idle(void); static inline void ack_APIC_irq(void) { -- cgit v1.1 From 9062d888aa448318e38792b6879a795dd10adda4 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Fernando=20Luis=20=5B**=20ISO-8859-1=20charset=20**=5D=20V?= =?UTF-8?q?=E1zquezCao?= Date: Wed, 2 May 2007 19:27:18 +0200 Subject: [PATCH] x86-64: __send_IPI_dest_field - x86_64 Implement __send_IPI_dest_field which can be used to send IPIs when the "destination shorthand" field of the ICR is set to 00 (destination field). Use it whenever possible. Signed-off-by: Fernando Luis Vazquez Cao Signed-off-by: Andi Kleen --- include/asm-x86_64/ipi.h | 54 ++++++++++++++++++++++++++++-------------------- 1 file changed, 32 insertions(+), 22 deletions(-) (limited to 'include') diff --git a/include/asm-x86_64/ipi.h b/include/asm-x86_64/ipi.h index ffa6f15..26961e6 100644 --- a/include/asm-x86_64/ipi.h +++ b/include/asm-x86_64/ipi.h @@ -74,10 +74,39 @@ static inline void __send_IPI_shortcut(unsigned int shortcut, int vector, unsign apic_write(APIC_ICR, cfg); } +/* + * This is used to send an IPI with no shorthand notation (the destination is + * specified in bits 56 to 63 of the ICR). + */ +static inline void __send_IPI_dest_field(unsigned int mask, int vector, unsigned int dest) +{ + unsigned long cfg; + + /* + * Wait for idle. + */ + apic_wait_icr_idle(); + + /* + * prepare target chip field + */ + cfg = __prepare_ICR2(mask); + apic_write(APIC_ICR2, cfg); + + /* + * program the ICR + */ + cfg = __prepare_ICR(0, vector, dest); + + /* + * Send the IPI. The write to APIC_ICR fires this off. + */ + apic_write(APIC_ICR, cfg); +} static inline void send_IPI_mask_sequence(cpumask_t mask, int vector) { - unsigned long cfg, flags; + unsigned long flags; unsigned long query_cpu; /* @@ -86,28 +115,9 @@ static inline void send_IPI_mask_sequence(cpumask_t mask, int vector) * - mbligh */ local_irq_save(flags); - for_each_cpu_mask(query_cpu, mask) { - /* - * Wait for idle. - */ - apic_wait_icr_idle(); - - /* - * prepare target chip field - */ - cfg = __prepare_ICR2(x86_cpu_to_apicid[query_cpu]); - apic_write(APIC_ICR2, cfg); - - /* - * program the ICR - */ - cfg = __prepare_ICR(0, vector, APIC_DEST_PHYSICAL); - - /* - * Send the IPI. The write to APIC_ICR fires this off. - */ - apic_write(APIC_ICR, cfg); + __send_IPI_dest_field(x86_cpu_to_apicid[query_cpu], + vector, APIC_DEST_PHYSICAL); } local_irq_restore(flags); } -- cgit v1.1 From 70ae77f497a57b3ef6b0987b6310327264517cb0 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Fernando=20Luis=20=5B**=20ISO-8859-1=20charset=20**=5D=20V?= =?UTF-8?q?=E1zquezCao?= Date: Wed, 2 May 2007 19:27:18 +0200 Subject: [PATCH] x86-64: Use safe_apic_wait_icr_idle in __send_IPI_dest_field - x86_64 Use safe_apic_wait_icr_idle to check ICR idle bit if the vector is NMI_VECTOR to avoid potential hangups in the event of crash when kdump tries to stop the other CPUs. Signed-off-by: Fernando Luis Vazquez Cao Signed-off-by: Andi Kleen --- include/asm-x86_64/ipi.h | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/asm-x86_64/ipi.h b/include/asm-x86_64/ipi.h index 26961e6..a7c75ea 100644 --- a/include/asm-x86_64/ipi.h +++ b/include/asm-x86_64/ipi.h @@ -85,7 +85,10 @@ static inline void __send_IPI_dest_field(unsigned int mask, int vector, unsigned /* * Wait for idle. */ - apic_wait_icr_idle(); + if (unlikely(vector == NMI_VECTOR)) + safe_apic_wait_icr_idle(); + else + apic_wait_icr_idle(); /* * prepare target chip field -- cgit v1.1 From 57a4f91ae5571edd7c0428285d8df16bb8bf5f40 Mon Sep 17 00:00:00 2001 From: Andi Kleen Date: Wed, 2 May 2007 19:27:18 +0200 Subject: [PATCH] x86-64: Auto compute __NR_syscall_max at compile time No need to maintain it anymore Signed-off-by: Andi Kleen --- include/asm-x86_64/unistd.h | 2 -- 1 file changed, 2 deletions(-) (limited to 'include') diff --git a/include/asm-x86_64/unistd.h b/include/asm-x86_64/unistd.h index 576b297..26e23e0 100644 --- a/include/asm-x86_64/unistd.h +++ b/include/asm-x86_64/unistd.h @@ -620,8 +620,6 @@ __SYSCALL(__NR_vmsplice, sys_vmsplice) #define __NR_move_pages 279 __SYSCALL(__NR_move_pages, sys_move_pages) -#define __NR_syscall_max __NR_move_pages - #ifndef __NO_STUBS #define __ARCH_WANT_OLD_READDIR #define __ARCH_WANT_OLD_STAT -- cgit v1.1 From c2c1accd4b2f9c82fb89d40611c7f581948db255 Mon Sep 17 00:00:00 2001 From: Zachary Amsden Date: Wed, 2 May 2007 19:27:19 +0200 Subject: [PATCH] i386: pte clear optimization When exiting from an address space, no special hypervisor notification of page table updates needs to occur; direct page table hypervisors, such as Xen, switch to another address space first (init_mm) and unprotects the page tables to avoid the cost of trapping to the hypervisor for each pte_clear. Shadow mode hypervisors, such as VMI and lhype don't need to do the extra work of calling through paravirt-ops, and can just directly clear the page table entries without notifiying the hypervisor, since all the page tables are about to be freed. So introduce native_pte_clear functions which bypass any paravirt-ops notification. This results in a significant performance win for VMI and removes some indirect calls from zap_pte_range. Note the 3-level paging already had a native_pte_clear function, thus demanding argument conformance and extra args for the 2-level definition. Signed-off-by: Zachary Amsden Signed-off-by: Andrew Morton Signed-off-by: Andi Kleen --- include/asm-i386/pgtable-2level.h | 5 +++++ include/asm-i386/pgtable.h | 2 +- 2 files changed, 6 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/asm-i386/pgtable-2level.h b/include/asm-i386/pgtable-2level.h index 781fe4b..85d9005 100644 --- a/include/asm-i386/pgtable-2level.h +++ b/include/asm-i386/pgtable-2level.h @@ -36,6 +36,11 @@ static inline void native_set_pmd(pmd_t *pmdp, pmd_t pmd) #define pte_clear(mm,addr,xp) do { set_pte_at(mm, addr, xp, __pte(0)); } while (0) #define pmd_clear(xp) do { set_pmd(xp, __pmd(0)); } while (0) +static inline void native_pte_clear(struct mm_struct *mm, unsigned long addr, pte_t *xp) +{ + *xp = __pte(0); +} + static inline pte_t native_ptep_get_and_clear(pte_t *xp) { return __pte(xchg(&xp->pte_low, 0)); diff --git a/include/asm-i386/pgtable.h b/include/asm-i386/pgtable.h index e7ddd23..00e97a9 100644 --- a/include/asm-i386/pgtable.h +++ b/include/asm-i386/pgtable.h @@ -344,7 +344,7 @@ static inline pte_t ptep_get_and_clear_full(struct mm_struct *mm, unsigned long pte_t pte; if (full) { pte = *ptep; - pte_clear(mm, addr, ptep); + native_pte_clear(mm, addr, ptep); } else { pte = ptep_get_and_clear(mm, addr, ptep); } -- cgit v1.1 From 142dd975911fdd82b1b6f6617cd20ac90a8ccf00 Mon Sep 17 00:00:00 2001 From: Zachary Amsden Date: Wed, 2 May 2007 19:27:19 +0200 Subject: [PATCH] i386: pte xchg optimization In situations where page table updates need only be made locally, and there is no cross-processor A/D bit races involved, we need not use the heavyweight xchg instruction to atomically fetch and clear page table entries. Instead, we can just read and clear them directly. This introduces a neat optimization for non-SMP kernels; drop the atomic xchg operations from page table updates. Thanks to Michel Lespinasse for noting this potential optimization. Signed-off-by: Zachary Amsden Signed-off-by: Andrew Morton Signed-off-by: Andi Kleen --- include/asm-i386/pgtable-2level.h | 14 ++++++++++++++ include/asm-i386/pgtable-3level.h | 14 ++++++++++++++ 2 files changed, 28 insertions(+) (limited to 'include') diff --git a/include/asm-i386/pgtable-2level.h b/include/asm-i386/pgtable-2level.h index 85d9005..3daab67 100644 --- a/include/asm-i386/pgtable-2level.h +++ b/include/asm-i386/pgtable-2level.h @@ -41,10 +41,24 @@ static inline void native_pte_clear(struct mm_struct *mm, unsigned long addr, pt *xp = __pte(0); } +/* local pte updates need not use xchg for locking */ +static inline pte_t native_local_ptep_get_and_clear(pte_t *ptep) +{ + pte_t res; + + res = *ptep; + native_pte_clear(NULL, 0, ptep); + return res; +} + +#ifdef CONFIG_SMP static inline pte_t native_ptep_get_and_clear(pte_t *xp) { return __pte(xchg(&xp->pte_low, 0)); } +#else +#define native_ptep_get_and_clear(xp) native_local_ptep_get_and_clear(xp) +#endif #define pte_page(x) pfn_to_page(pte_pfn(x)) #define pte_none(x) (!(x).pte_low) diff --git a/include/asm-i386/pgtable-3level.h b/include/asm-i386/pgtable-3level.h index 664bfee..45b0241 100644 --- a/include/asm-i386/pgtable-3level.h +++ b/include/asm-i386/pgtable-3level.h @@ -139,6 +139,17 @@ static inline void pud_clear (pud_t * pud) { } #define pmd_offset(pud, address) ((pmd_t *) pud_page(*(pud)) + \ pmd_index(address)) +/* local pte updates need not use xchg for locking */ +static inline pte_t native_local_ptep_get_and_clear(pte_t *ptep) +{ + pte_t res; + + res = *ptep; + native_pte_clear(NULL, 0, ptep); + return res; +} + +#ifdef CONFIG_SMP static inline pte_t native_ptep_get_and_clear(pte_t *ptep) { pte_t res; @@ -150,6 +161,9 @@ static inline pte_t native_ptep_get_and_clear(pte_t *ptep) return res; } +#else +#define native_ptep_get_and_clear(xp) native_local_ptep_get_and_clear(xp) +#endif #define __HAVE_ARCH_PTE_SAME static inline int pte_same(pte_t a, pte_t b) -- cgit v1.1 From 9e5e3162b2d5e4466187ecd63c9eec2de33cb7bc Mon Sep 17 00:00:00 2001 From: Zachary Amsden Date: Wed, 2 May 2007 19:27:19 +0200 Subject: [PATCH] i386: pte simplify ops Add comment and condense code to make use of native_local_ptep_get_and_clear function. Also, it turns out the 2-level and 3-level paging definitions were identical, so move the common definition into pgtable.h Signed-off-by: Zachary Amsden Signed-off-by: Andrew Morton Signed-off-by: Andi Kleen --- include/asm-i386/pgtable-2level.h | 10 ---------- include/asm-i386/pgtable-3level.h | 10 ---------- include/asm-i386/pgtable.h | 17 +++++++++++++++-- 3 files changed, 15 insertions(+), 22 deletions(-) (limited to 'include') diff --git a/include/asm-i386/pgtable-2level.h b/include/asm-i386/pgtable-2level.h index 3daab67..a50fd17 100644 --- a/include/asm-i386/pgtable-2level.h +++ b/include/asm-i386/pgtable-2level.h @@ -41,16 +41,6 @@ static inline void native_pte_clear(struct mm_struct *mm, unsigned long addr, pt *xp = __pte(0); } -/* local pte updates need not use xchg for locking */ -static inline pte_t native_local_ptep_get_and_clear(pte_t *ptep) -{ - pte_t res; - - res = *ptep; - native_pte_clear(NULL, 0, ptep); - return res; -} - #ifdef CONFIG_SMP static inline pte_t native_ptep_get_and_clear(pte_t *xp) { diff --git a/include/asm-i386/pgtable-3level.h b/include/asm-i386/pgtable-3level.h index 45b0241..eb0f1d7 100644 --- a/include/asm-i386/pgtable-3level.h +++ b/include/asm-i386/pgtable-3level.h @@ -139,16 +139,6 @@ static inline void pud_clear (pud_t * pud) { } #define pmd_offset(pud, address) ((pmd_t *) pud_page(*(pud)) + \ pmd_index(address)) -/* local pte updates need not use xchg for locking */ -static inline pte_t native_local_ptep_get_and_clear(pte_t *ptep) -{ - pte_t res; - - res = *ptep; - native_pte_clear(NULL, 0, ptep); - return res; -} - #ifdef CONFIG_SMP static inline pte_t native_ptep_get_and_clear(pte_t *ptep) { diff --git a/include/asm-i386/pgtable.h b/include/asm-i386/pgtable.h index 00e97a9..c6b8b94 100644 --- a/include/asm-i386/pgtable.h +++ b/include/asm-i386/pgtable.h @@ -269,6 +269,16 @@ extern void vmalloc_sync_all(void); #define pte_update_defer(mm, addr, ptep) do { } while (0) #endif +/* local pte updates need not use xchg for locking */ +static inline pte_t native_local_ptep_get_and_clear(pte_t *ptep) +{ + pte_t res = *ptep; + + /* Pure native function needs no input for mm, addr */ + native_pte_clear(NULL, 0, ptep); + return res; +} + /* * We only update the dirty/accessed state if we set * the dirty bit by hand in the kernel, since the hardware @@ -343,8 +353,11 @@ static inline pte_t ptep_get_and_clear_full(struct mm_struct *mm, unsigned long { pte_t pte; if (full) { - pte = *ptep; - native_pte_clear(mm, addr, ptep); + /* + * Full address destruction in progress; paravirt does not + * care about updates and native needs no locking + */ + pte = native_local_ptep_get_and_clear(ptep); } else { pte = ptep_get_and_clear(mm, addr, ptep); } -- cgit v1.1 From 09198e68501a7e34737cd9264d266f42429abcdc Mon Sep 17 00:00:00 2001 From: Andi Kleen Date: Wed, 2 May 2007 19:27:20 +0200 Subject: [PATCH] i386: Clean up NMI watchdog code - Introduce a wd_ops structure - Convert the various nmi watchdogs over to it - This allows to split the perfctr reservation from the watchdog setup cleanly. - Do perfctr reservation globally as it should have always been - Remove dead code referenced only by unused EXPORT_SYMBOLs Signed-off-by: Andi Kleen --- include/asm-i386/nmi.h | 8 ++++++++ 1 file changed, 8 insertions(+) (limited to 'include') diff --git a/include/asm-i386/nmi.h b/include/asm-i386/nmi.h index b04333e..fb1e133 100644 --- a/include/asm-i386/nmi.h +++ b/include/asm-i386/nmi.h @@ -50,4 +50,12 @@ void __trigger_all_cpu_backtrace(void); #endif +void lapic_watchdog_stop(void); +int lapic_watchdog_init(unsigned nmi_hz); +int lapic_wd_event(unsigned nmi_hz); +unsigned lapic_adjust_nmi_hz(unsigned hz); +int lapic_watchdog_ok(void); +void disable_lapic_nmi_watchdog(void); +void enable_lapic_nmi_watchdog(void); + #endif /* ASM_NMI_H */ -- cgit v1.1 From 05cb007dac9a50148daf87d0b9469e0cd05fd5e7 Mon Sep 17 00:00:00 2001 From: Andi Kleen Date: Wed, 2 May 2007 19:27:20 +0200 Subject: [PATCH] x86-64: Use the 32bit wd_ops for 64bit too. This mainly removes a lot of code, replacing it with calls into the new 32bit perfctr-watchdog.c Signed-off-by: Andi Kleen --- include/asm-x86_64/nmi.h | 9 +++++++++ 1 file changed, 9 insertions(+) (limited to 'include') diff --git a/include/asm-x86_64/nmi.h b/include/asm-x86_64/nmi.h index 72375e7..d0a7f53 100644 --- a/include/asm-x86_64/nmi.h +++ b/include/asm-x86_64/nmi.h @@ -80,4 +80,13 @@ extern int unknown_nmi_panic; void __trigger_all_cpu_backtrace(void); #define trigger_all_cpu_backtrace() __trigger_all_cpu_backtrace() + +void lapic_watchdog_stop(void); +int lapic_watchdog_init(unsigned nmi_hz); +int lapic_wd_event(unsigned nmi_hz); +unsigned lapic_adjust_nmi_hz(unsigned hz); +int lapic_watchdog_ok(void); +void disable_lapic_nmi_watchdog(void); +void enable_lapic_nmi_watchdog(void); + #endif /* ASM_NMI_H */ -- cgit v1.1 From c7f81c9453375d6416658995eafd3397cb9bba1d Mon Sep 17 00:00:00 2001 From: Andi Kleen Date: Wed, 2 May 2007 19:27:20 +0200 Subject: [PATCH] i386: Verify important CPUID bits in real mode Check some CPUID bits that are needed for compiler generated early in boot. When the system is still in real mode before changing the VESA BIOS mode it is possible to still display an visible error message on the screen. Similar to x86-64. Includes cleanups from Eric Biederman Signed-off-by: Andi Kleen --- include/asm-i386/cpufeature.h | 3 +++ include/asm-i386/required-features.h | 34 ++++++++++++++++++++++++++++++++++ 2 files changed, 37 insertions(+) create mode 100644 include/asm-i386/required-features.h (limited to 'include') diff --git a/include/asm-i386/cpufeature.h b/include/asm-i386/cpufeature.h index d1b8e4a..e66d004 100644 --- a/include/asm-i386/cpufeature.h +++ b/include/asm-i386/cpufeature.h @@ -7,7 +7,10 @@ #ifndef __ASM_I386_CPUFEATURE_H #define __ASM_I386_CPUFEATURE_H +#ifndef __ASSEMBLY__ #include +#endif +#include #define NCAPINTS 7 /* N 32-bit words worth of info */ diff --git a/include/asm-i386/required-features.h b/include/asm-i386/required-features.h new file mode 100644 index 0000000..9db866c --- /dev/null +++ b/include/asm-i386/required-features.h @@ -0,0 +1,34 @@ +#ifndef _ASM_REQUIRED_FEATURES_H +#define _ASM_REQUIRED_FEATURES_H 1 + +/* Define minimum CPUID feature set for kernel These bits are checked + really early to actually display a visible error message before the + kernel dies. Only add word 0 bits here + + Some requirements that are not in CPUID yet are also in the + CONFIG_X86_MINIMUM_CPU mode which is checked too. + + The real information is in arch/i386/Kconfig.cpu, this just converts + the CONFIGs into a bitmask */ + +#ifdef CONFIG_X86_PAE +#define NEED_PAE (1< Date: Wed, 2 May 2007 19:27:20 +0200 Subject: [PATCH] i386: Evaluate constant cpu features at runtime Redefine cpu_has() to evaluate cpu features already checked in early boot at compile time. This way the compiler might eliminate some dead code. Signed-off-by: Andi Kleen --- include/asm-i386/cpufeature.h | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/asm-i386/cpufeature.h b/include/asm-i386/cpufeature.h index e66d004..20e849a 100644 --- a/include/asm-i386/cpufeature.h +++ b/include/asm-i386/cpufeature.h @@ -106,8 +106,12 @@ #define X86_FEATURE_LAHF_LM (6*32+ 0) /* LAHF/SAHF in long mode */ #define X86_FEATURE_CMP_LEGACY (6*32+ 1) /* If yes HyperThreading not valid */ -#define cpu_has(c, bit) test_bit(bit, (c)->x86_capability) -#define boot_cpu_has(bit) test_bit(bit, boot_cpu_data.x86_capability) +#define cpu_has(c, bit) \ + ((__builtin_constant_p(bit) && (bit) < 32 && \ + (1UL << (bit)) & REQUIRED_MASK1) ? \ + 1 : \ + test_bit(bit, (c)->x86_capability)) +#define boot_cpu_has(bit) cpu_has(&boot_cpu_data, bit) #define cpu_has_fpu boot_cpu_has(X86_FEATURE_FPU) #define cpu_has_vme boot_cpu_has(X86_FEATURE_VME) -- cgit v1.1 From e859dc553c857f4672b3bbb73ee9170a901f8712 Mon Sep 17 00:00:00 2001 From: Andi Kleen Date: Wed, 2 May 2007 19:27:20 +0200 Subject: [PATCH] i386: Implement alternative_io for i386 Ported from x86-64. Signed-off-by: Andi Kleen --- include/asm-i386/alternative.h | 15 +++++++++++++++ 1 file changed, 15 insertions(+) (limited to 'include') diff --git a/include/asm-i386/alternative.h b/include/asm-i386/alternative.h index 2774673..0f70b37 100644 --- a/include/asm-i386/alternative.h +++ b/include/asm-i386/alternative.h @@ -82,6 +82,21 @@ static inline void alternatives_smp_switch(int smp) {} "663:\n\t" newinstr "\n664:\n" /* replacement */\ ".previous" :: "i" (feature), ##input) +/* Like alternative_input, but with a single output argument */ +#define alternative_io(oldinstr, newinstr, feature, output, input...) \ + asm volatile ("661:\n\t" oldinstr "\n662:\n" \ + ".section .altinstructions,\"a\"\n" \ + " .align 4\n" \ + " .long 661b\n" /* label */ \ + " .long 663f\n" /* new instruction */ \ + " .byte %c[feat]\n" /* feature bit */ \ + " .byte 662b-661b\n" /* sourcelen */ \ + " .byte 664f-663f\n" /* replacementlen */ \ + ".previous\n" \ + ".section .altinstr_replacement,\"ax\"\n" \ + "663:\n\t" newinstr "\n664:\n" /* replacement */ \ + ".previous" : output : [feat] "i" (feature), ##input) + /* * Alternative inline assembly for SMP. * -- cgit v1.1 From 3aefbe0746580a710d4392a884ac1e4aac7c728f Mon Sep 17 00:00:00 2001 From: Andi Kleen Date: Wed, 2 May 2007 19:27:20 +0200 Subject: [PATCH] i386: Implement X86_FEATURE_SYNC_RDTSC on i386 Syncs up with x86-64. Signed-off-by: Andi Kleen --- include/asm-i386/cpufeature.h | 1 + include/asm-i386/tsc.h | 4 ---- 2 files changed, 1 insertion(+), 4 deletions(-) (limited to 'include') diff --git a/include/asm-i386/cpufeature.h b/include/asm-i386/cpufeature.h index 20e849a..b8a3a5a 100644 --- a/include/asm-i386/cpufeature.h +++ b/include/asm-i386/cpufeature.h @@ -79,6 +79,7 @@ #define X86_FEATURE_PEBS (3*32+12) /* Precise-Event Based Sampling */ #define X86_FEATURE_BTS (3*32+13) /* Branch Trace Store */ #define X86_FEATURE_LAPIC_TIMER_BROKEN (3*32+ 14) /* lapic timer broken in C1 */ +#define X86_FEATURE_SYNC_RDTSC (3*32+15) /* RDTSC synchronizes the CPU */ /* Intel-defined CPU features, CPUID level 0x00000001 (ecx), word 4 */ #define X86_FEATURE_XMM3 (4*32+ 0) /* Streaming SIMD Extensions-3 */ diff --git a/include/asm-i386/tsc.h b/include/asm-i386/tsc.h index 3469766..0181f9d 100644 --- a/include/asm-i386/tsc.h +++ b/include/asm-i386/tsc.h @@ -35,7 +35,6 @@ static inline cycles_t get_cycles(void) static __always_inline cycles_t get_cycles_sync(void) { unsigned long long ret; -#ifdef X86_FEATURE_SYNC_RDTSC unsigned eax; /* @@ -44,9 +43,6 @@ static __always_inline cycles_t get_cycles_sync(void) */ alternative_io("cpuid", ASM_NOP2, X86_FEATURE_SYNC_RDTSC, "=a" (eax), "0" (1) : "ebx","ecx","edx","memory"); -#else - sync_core(); -#endif rdtscll(ret); return ret; -- cgit v1.1 From 9bccb23dc5fc2d268ab676e2d4212d29e230fd86 Mon Sep 17 00:00:00 2001 From: Andi Kleen Date: Wed, 2 May 2007 19:27:20 +0200 Subject: [PATCH] i386: Add X86_FEATURE_RDTSCP Following x86-64 Signed-off-by: Andi Kleen --- include/asm-i386/cpufeature.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/asm-i386/cpufeature.h b/include/asm-i386/cpufeature.h index b8a3a5a..f514e90 100644 --- a/include/asm-i386/cpufeature.h +++ b/include/asm-i386/cpufeature.h @@ -52,6 +52,7 @@ #define X86_FEATURE_MP (1*32+19) /* MP Capable. */ #define X86_FEATURE_NX (1*32+20) /* Execute Disable */ #define X86_FEATURE_MMXEXT (1*32+22) /* AMD MMX extensions */ +#define X86_FEATURE_RDTSCP (1*32+27) /* RDTSCP */ #define X86_FEATURE_LM (1*32+29) /* Long Mode (x86-64) */ #define X86_FEATURE_3DNOWEXT (1*32+30) /* AMD 3DNow! extensions */ #define X86_FEATURE_3DNOW (1*32+31) /* 3DNow! */ -- cgit v1.1 From c5bcb5635a03da3158f121ae20ccbbf72b4fc62a Mon Sep 17 00:00:00 2001 From: Andi Kleen Date: Wed, 2 May 2007 19:27:21 +0200 Subject: [PATCH] x86: Use RDTSCP for synchronous get_cycles if possible RDTSCP is already synchronous and doesn't need an explicit CPUID. This is a little faster and more importantly avoids VMEXITs on Hypervisors. Original patch from Joerg Roedel, but reworked by AK Also includes miscompilation fix by Eric Biederman Cc: "Joerg Roedel" Signed-off-by: Andi Kleen --- include/asm-i386/tsc.h | 9 +++++++++ 1 file changed, 9 insertions(+) (limited to 'include') diff --git a/include/asm-i386/tsc.h b/include/asm-i386/tsc.h index 0181f9d..3f3c1fa 100644 --- a/include/asm-i386/tsc.h +++ b/include/asm-i386/tsc.h @@ -38,6 +38,15 @@ static __always_inline cycles_t get_cycles_sync(void) unsigned eax; /* + * Use RDTSCP if possible; it is guaranteed to be synchronous + * and doesn't cause a VMEXIT on Hypervisors + */ + alternative_io(ASM_NOP3, ".byte 0x0f,0x01,0xf9", X86_FEATURE_RDTSCP, + "=A" (ret), "0" (0ULL) : "ecx", "memory"); + if (ret) + return ret; + + /* * Don't do an additional sync on CPUs where we know * RDTSC is already synchronous: */ -- cgit v1.1 From 02b64dab5675bc08048c7f70cbb0d8a417d20dbe Mon Sep 17 00:00:00 2001 From: Jan Kiszka Date: Wed, 2 May 2007 19:27:21 +0200 Subject: [PATCH] i386: white space fixes in i387.h Signed-off-by: Jan Kiszka Signed-off-by: Andi Kleen --- include/asm-i386/i387.h | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'include') diff --git a/include/asm-i386/i387.h b/include/asm-i386/i387.h index 434936c..49dc8e1 100644 --- a/include/asm-i386/i387.h +++ b/include/asm-i386/i387.h @@ -83,8 +83,8 @@ static inline void __save_init_fpu( struct task_struct *tsk ) #define __clear_fpu( tsk ) \ do { \ - if (task_thread_info(tsk)->status & TS_USEDFPU) { \ - asm volatile("fnclex ; fwait"); \ + if (task_thread_info(tsk)->status & TS_USEDFPU) { \ + asm volatile("fnclex ; fwait"); \ task_thread_info(tsk)->status &= ~TS_USEDFPU; \ stts(); \ } \ @@ -113,7 +113,7 @@ static inline void save_init_fpu( struct task_struct *tsk ) __clear_fpu( tsk ); \ preempt_enable(); \ } while (0) - \ + /* * FPU state interaction... */ -- cgit v1.1 From c41bf8fa5e777b6a8a19cf2484937a7167eac77f Mon Sep 17 00:00:00 2001 From: Jan Kiszka Date: Wed, 2 May 2007 19:27:21 +0200 Subject: [PATCH] i386: avoid redundant preempt_disable in __unlazy_fpu There are two callers of __unlazy_fpu, unlazy_fpu and __switch_to, and none of them appear to require additional preempt_disable/enable here. Let's open-code save_init_fpu in __unlazy_fpu to save a few ops. Signed-off-by: Jan Kiszka Signed-off-by: Andi Kleen --- include/asm-i386/i387.h | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) (limited to 'include') diff --git a/include/asm-i386/i387.h b/include/asm-i386/i387.h index 49dc8e1..cdd1e24 100644 --- a/include/asm-i386/i387.h +++ b/include/asm-i386/i387.h @@ -74,11 +74,12 @@ static inline void __save_init_fpu( struct task_struct *tsk ) task_thread_info(tsk)->status &= ~TS_USEDFPU; } -#define __unlazy_fpu( tsk ) do { \ - if (task_thread_info(tsk)->status & TS_USEDFPU) \ - save_init_fpu( tsk ); \ - else \ - tsk->fpu_counter = 0; \ +#define __unlazy_fpu( tsk ) do { \ + if (task_thread_info(tsk)->status & TS_USEDFPU) { \ + __save_init_fpu(tsk); \ + stts(); \ + } else \ + tsk->fpu_counter = 0; \ } while (0) #define __clear_fpu( tsk ) \ -- cgit v1.1