Define per_cpu_offset in asm-i386/percpu.h when SMP defined, like asm-generic/percpu.h does for UP. Signed-off-by: Jeremy Fitzhardinge <jeremy@xensource.com> Cc: Rusty Russell <rusty@rustcorp.com.au> Cc: Andi Kleen <ak@suse.de> --- include/asm-i386/percpu.h | 2 ++ 1 file changed, 2 insertions(+) ==================================================================--- a/include/asm-i386/percpu.h +++ b/include/asm-i386/percpu.h @@ -34,6 +34,8 @@ /* This is used for other cpus to find our section. */ extern unsigned long __per_cpu_offset[]; + +#define per_cpu_offset(x) (__per_cpu_offset[x]) /* Separate out the type, so (int[3], foo) works. */ #define DECLARE_PER_CPU(type, name) extern __typeof__(type) per_cpu__##name --
Jeremy Fitzhardinge
2007-Apr-18 13:02 UTC
[patch 1/6] i386: Account for module percpu space separately from kernel percpu
Rather than using a single constant PERCPU_ENOUGH_ROOM, compute it as
the sum of kernel_percpu + PERCPU_MODULE_RESERVE. This is now common
to all architectures; if an architecture wants to set
PERCPU_ENOUGH_ROOM to something special, then it may do so (ia64 is
the only one which does).
Signed-off-by: Jeremy Fitzhardinge <jeremy@xensource.com>
Cc: Rusty Russell <rusty@rustcorp.com.au>
Cc: Eric W. Biederman <ebiederm@xmission.com>
Cc: Andi Kleen <ak@suse.de>
---
include/asm-alpha/percpu.h | 14 --------------
include/asm-sparc64/percpu.h | 10 ----------
include/asm-x86_64/percpu.h | 10 ----------
include/linux/percpu.h | 9 ++++++++-
init/main.c | 7 ++-----
kernel/module.c | 2 +-
6 files changed, 11 insertions(+), 41 deletions(-)
==================================================================---
a/include/asm-alpha/percpu.h
+++ b/include/asm-alpha/percpu.h
@@ -1,19 +1,5 @@
#ifndef __ALPHA_PERCPU_H
#define __ALPHA_PERCPU_H
-
-/*
- * Increase the per cpu area for Alpha so that
- * modules using percpu area can load.
- */
-#ifdef CONFIG_MODULES
-# define PERCPU_MODULE_RESERVE 8192
-#else
-# define PERCPU_MODULE_RESERVE 0
-#endif
-
-#define PERCPU_ENOUGH_ROOM \
- (ALIGN(__per_cpu_end - __per_cpu_start, SMP_CACHE_BYTES) + \
- PERCPU_MODULE_RESERVE)
#include <asm-generic/percpu.h>
==================================================================---
a/include/asm-sparc64/percpu.h
+++ b/include/asm-sparc64/percpu.h
@@ -4,16 +4,6 @@
#include <linux/compiler.h>
#ifdef CONFIG_SMP
-
-#ifdef CONFIG_MODULES
-# define PERCPU_MODULE_RESERVE 8192
-#else
-# define PERCPU_MODULE_RESERVE 0
-#endif
-
-#define PERCPU_ENOUGH_ROOM \
- (ALIGN(__per_cpu_end - __per_cpu_start, SMP_CACHE_BYTES) + \
- PERCPU_MODULE_RESERVE)
extern void setup_per_cpu_areas(void);
==================================================================---
a/include/asm-x86_64/percpu.h
+++ b/include/asm-x86_64/percpu.h
@@ -10,16 +10,6 @@
#ifdef CONFIG_SMP
#include <asm/pda.h>
-
-#ifdef CONFIG_MODULES
-# define PERCPU_MODULE_RESERVE 8192
-#else
-# define PERCPU_MODULE_RESERVE 0
-#endif
-
-#define PERCPU_ENOUGH_ROOM \
- (ALIGN(__per_cpu_end - __per_cpu_start, SMP_CACHE_BYTES) + \
- PERCPU_MODULE_RESERVE)
#define __per_cpu_offset(cpu) (cpu_pda(cpu)->data_offset)
#define __my_cpu_offset() read_pda(data_offset)
==================================================================---
a/include/linux/percpu.h
+++ b/include/linux/percpu.h
@@ -11,8 +11,15 @@
/* Enough to cover all DEFINE_PER_CPUs in kernel, including modules. */
#ifndef PERCPU_ENOUGH_ROOM
-#define PERCPU_ENOUGH_ROOM 32768
+#ifdef CONFIG_MODULES
+#define PERCPU_MODULE_RESERVE 8192
+#else
+#define PERCPU_MODULE_RESERVE 0
#endif
+
+#define PERCPU_ENOUGH_ROOM \
+ (__per_cpu_end - __per_cpu_start + PERCPU_MODULE_RESERVE)
+#endif /* PERCPU_ENOUGH_ROOM */
/*
* Must be an lvalue. Since @var must be a simple identifier,
==================================================================---
a/init/main.c
+++ b/init/main.c
@@ -369,11 +369,8 @@ static void __init setup_per_cpu_areas(v
unsigned long nr_possible_cpus = num_possible_cpus();
/* Copy section for each CPU (we discard the original) */
- size = ALIGN(__per_cpu_end - __per_cpu_start, SMP_CACHE_BYTES);
-#ifdef CONFIG_MODULES
- if (size < PERCPU_ENOUGH_ROOM)
- size = PERCPU_ENOUGH_ROOM;
-#endif
+
+ size = ALIGN(PERCPU_ENOUGH_ROOM, SMP_CACHE_BYTES);
ptr = alloc_bootmem(size * nr_possible_cpus);
for_each_possible_cpu(i) {
==================================================================---
a/kernel/module.c
+++ b/kernel/module.c
@@ -430,7 +430,7 @@ static int percpu_modinit(void)
pcpu_size = kmalloc(sizeof(pcpu_size[0]) * pcpu_num_allocated,
GFP_KERNEL);
/* Static in-kernel percpu data (used). */
- pcpu_size[0] = -ALIGN(__per_cpu_end-__per_cpu_start, SMP_CACHE_BYTES);
+ pcpu_size[0] = -(__per_cpu_end-__per_cpu_start);
/* Free room. */
pcpu_size[1] = PERCPU_ENOUGH_ROOM + pcpu_size[0];
if (pcpu_size[1] < 0) {
--
Xen wants a dedicated page for the GDT. I believe VMI likes it too.
lguest, KVM and native don't care.
Simple transformation to page-aligned "struct gdt_page".
Signed-off-by: Rusty Russell <rusty@rustcorp.com.au>
Acked-by: Jeremy Fitzhardinge <jeremy@xensource.com>
---
arch/i386/kernel/cpu/common.c | 6 +++---
arch/i386/kernel/entry.S | 2 +-
arch/i386/kernel/head.S | 2 +-
arch/i386/kernel/traps.c | 2 +-
include/asm-i386/desc.h | 9 +++++++--
5 files changed, 13 insertions(+), 8 deletions(-)
==================================================================---
a/arch/i386/kernel/cpu/common.c
+++ b/arch/i386/kernel/cpu/common.c
@@ -22,7 +22,7 @@
#include "cpu.h"
-DEFINE_PER_CPU(struct desc_struct, cpu_gdt[GDT_ENTRIES]) = {
+DEFINE_PER_CPU(struct gdt_page, gdt_page) = { .gdt = {
[GDT_ENTRY_KERNEL_CS] = { 0x0000ffff, 0x00cf9a00 },
[GDT_ENTRY_KERNEL_DS] = { 0x0000ffff, 0x00cf9200 },
[GDT_ENTRY_DEFAULT_USER_CS] = { 0x0000ffff, 0x00cffa00 },
@@ -48,8 +48,8 @@ DEFINE_PER_CPU(struct desc_struct, cpu_g
[GDT_ENTRY_ESPFIX_SS] = { 0x00000000, 0x00c09200 },
[GDT_ENTRY_PDA] = { 0x00000000, 0x00c09200 }, /* set in setup_pda */
-};
-EXPORT_PER_CPU_SYMBOL_GPL(cpu_gdt);
+} };
+EXPORT_PER_CPU_SYMBOL_GPL(gdt_page);
DEFINE_PER_CPU(struct i386_pda, _cpu_pda) = {
._pda = &per_cpu___cpu_pda,
==================================================================---
a/arch/i386/kernel/entry.S
+++ b/arch/i386/kernel/entry.S
@@ -558,7 +558,7 @@ END(syscall_badsys)
#define FIXUP_ESPFIX_STACK \
/* since we are on a wrong stack, we cant make it a C code :( */ \
movl %fs:PDA_cpu, %ebx; \
- PER_CPU(cpu_gdt, %ebx); \
+ PER_CPU(gdt_page, %ebx); \
GET_DESC_BASE(GDT_ENTRY_ESPFIX_SS, %ebx, %eax, %ax, %al, %ah); \
addl %esp, %eax; \
pushl $__KERNEL_DS; \
==================================================================---
a/arch/i386/kernel/head.S
+++ b/arch/i386/kernel/head.S
@@ -599,7 +599,7 @@ idt_descr:
.word 0 # 32 bit align gdt_desc.address
ENTRY(early_gdt_descr)
.word GDT_ENTRIES*8-1
- .long per_cpu__cpu_gdt /* Overwritten for secondary CPUs */
+ .long per_cpu__gdt_page /* Overwritten for secondary CPUs */
/*
* The boot_gdt_table must mirror the equivalent in setup.S and is
==================================================================---
a/arch/i386/kernel/traps.c
+++ b/arch/i386/kernel/traps.c
@@ -1037,7 +1037,7 @@ fastcall unsigned long patch_espfix_desc
fastcall unsigned long patch_espfix_desc(unsigned long uesp,
unsigned long kesp)
{
- struct desc_struct *gdt = __get_cpu_var(cpu_gdt);
+ struct desc_struct *gdt = __get_cpu_var(gdt_page).gdt;
unsigned long base = (kesp - uesp) & -THREAD_SIZE;
unsigned long new_kesp = kesp - base;
unsigned long lim_pages = (new_kesp | (THREAD_SIZE - 1)) >> PAGE_SHIFT;
==================================================================---
a/include/asm-i386/desc.h
+++ b/include/asm-i386/desc.h
@@ -18,10 +18,15 @@ struct Xgt_desc_struct {
unsigned short pad;
} __attribute__ ((packed));
-DECLARE_PER_CPU(struct desc_struct, cpu_gdt[GDT_ENTRIES]);
+struct gdt_page
+{
+ struct desc_struct gdt[GDT_ENTRIES];
+} __attribute__((aligned(PAGE_SIZE)));
+DECLARE_PER_CPU(struct gdt_page, gdt_page);
+
static inline struct desc_struct *get_cpu_gdt_table(unsigned int cpu)
{
- return per_cpu(cpu_gdt, cpu);
+ return per_cpu(gdt_page, cpu).gdt;
}
extern struct Xgt_desc_struct idt_descr;
--
Currently x86 (similar to x84-64) has a special per-cpu structure
called "i386_pda" which can be easily and efficiently referenced via
the %fs register. An ELF section is more flexible than a structure,
allowing any piece of code to use this area. Indeed, such a section
already exists: the per-cpu area.
So this patch:
(1) Removes the PDA and uses per-cpu variables for each current member.
(2) Replaces the __KERNEL_PDA segment with __KERNEL_PERCPU.
(3) Creates a per-cpu mirror of __per_cpu_offset called this_cpu_off, which
can be used to calculate addresses for this CPU's variables.
(4) Simplifies startup, because %fs doesn't need to be loaded with a
special segment at early boot; it can be deferred until the first
percpu area is allocated (or never for UP).
The result is less code and one less x86-specific concept.
Signed-off-by: Rusty Russell <rusty@rustcorp.com.au>
Signed-off-by: Jeremy Fitzhardinge <jeremy@xensource.com>
Cc: Andi Kleen <ak@suse.de>
---
arch/i386/kernel/asm-offsets.c | 5 -
arch/i386/kernel/cpu/common.c | 17 -----
arch/i386/kernel/entry.S | 5 -
arch/i386/kernel/head.S | 31 +--------
arch/i386/kernel/i386_ksyms.c | 2
arch/i386/kernel/irq.c | 3
arch/i386/kernel/process.c | 12 ++-
arch/i386/kernel/smpboot.c | 34 ++++------
arch/i386/kernel/vmi.c | 6 -
arch/i386/kernel/vmlinux.lds.S | 1
include/asm-i386/current.h | 5 -
include/asm-i386/irq_regs.h | 12 ++-
include/asm-i386/pda.h | 99 ------------------------------
include/asm-i386/percpu.h | 132 +++++++++++++++++++++++++++++++++++++---
include/asm-i386/processor.h | 2
include/asm-i386/segment.h | 6 -
include/asm-i386/smp.h | 4 -
17 files changed, 179 insertions(+), 197 deletions(-)
==================================================================---
a/arch/i386/kernel/asm-offsets.c
+++ b/arch/i386/kernel/asm-offsets.c
@@ -15,7 +15,6 @@
#include <asm/processor.h>
#include <asm/thread_info.h>
#include <asm/elf.h>
-#include <asm/pda.h>
#define DEFINE(sym, val) \
asm volatile("\n->" #sym " %0 " #val : :
"i" (val))
@@ -101,10 +100,6 @@ void foo(void)
OFFSET(crypto_tfm_ctx_offset, crypto_tfm, __crt_ctx);
- BLANK();
- OFFSET(PDA_cpu, i386_pda, cpu_number);
- OFFSET(PDA_pcurrent, i386_pda, pcurrent);
-
#ifdef CONFIG_PARAVIRT
BLANK();
OFFSET(PARAVIRT_enabled, paravirt_ops, paravirt_enabled);
==================================================================---
a/arch/i386/kernel/cpu/common.c
+++ b/arch/i386/kernel/cpu/common.c
@@ -18,7 +18,6 @@
#include <asm/apic.h>
#include <mach_apic.h>
#endif
-#include <asm/pda.h>
#include "cpu.h"
@@ -47,12 +46,9 @@ DEFINE_PER_CPU(struct gdt_page, gdt_page
[GDT_ENTRY_APMBIOS_BASE+2] = { 0x0000ffff, 0x00409200 }, /* data */
[GDT_ENTRY_ESPFIX_SS] = { 0x00000000, 0x00c09200 },
- [GDT_ENTRY_PDA] = { 0x00000000, 0x00c09200 }, /* set in setup_pda */
+ [GDT_ENTRY_PERCPU] = { 0x00000000, 0x00000000 },
} };
EXPORT_PER_CPU_SYMBOL_GPL(gdt_page);
-
-DEFINE_PER_CPU(struct i386_pda, _cpu_pda);
-EXPORT_PER_CPU_SYMBOL(_cpu_pda);
static int cachesize_override __cpuinitdata = -1;
static int disable_x86_fxsr __cpuinitdata;
@@ -627,20 +623,13 @@ void __init early_cpu_init(void)
#endif
}
-/* Make sure %gs is initialized properly in idle threads */
+/* Make sure %fs is initialized properly in idle threads */
struct pt_regs * __devinit idle_regs(struct pt_regs *regs)
{
memset(regs, 0, sizeof(struct pt_regs));
- regs->xfs = __KERNEL_PDA;
+ regs->xfs = __KERNEL_PERCPU;
return regs;
}
-
-/* Initial PDA used by boot CPU */
-struct i386_pda boot_pda = {
- ._pda = &boot_pda,
- .cpu_number = 0,
- .pcurrent = &init_task,
-};
/*
* cpu_init() initializes state that is per-CPU. Some data is already
==================================================================---
a/arch/i386/kernel/entry.S
+++ b/arch/i386/kernel/entry.S
@@ -132,7 +132,7 @@ 1:
movl $(__USER_DS), %edx; \
movl %edx, %ds; \
movl %edx, %es; \
- movl $(__KERNEL_PDA), %edx; \
+ movl $(__KERNEL_PERCPU), %edx; \
movl %edx, %fs
#define RESTORE_INT_REGS \
@@ -560,7 +560,6 @@ END(syscall_badsys)
#define FIXUP_ESPFIX_STACK \
/* since we are on a wrong stack, we cant make it a C code :( */ \
- movl %fs:PDA_cpu, %ebx; \
PER_CPU(gdt_page, %ebx); \
GET_DESC_BASE(GDT_ENTRY_ESPFIX_SS, %ebx, %eax, %ax, %al, %ah); \
addl %esp, %eax; \
@@ -685,7 +684,7 @@ error_code:
pushl %fs
CFI_ADJUST_CFA_OFFSET 4
/*CFI_REL_OFFSET fs, 0*/
- movl $(__KERNEL_PDA), %ecx
+ movl $(__KERNEL_PERCPU), %ecx
movl %ecx, %fs
UNWIND_ESPFIX_STACK
popl %ecx
==================================================================---
a/arch/i386/kernel/head.S
+++ b/arch/i386/kernel/head.S
@@ -317,12 +317,12 @@ 2: movl %cr0,%eax
movl %eax,%cr0
call check_x87
- call setup_pda
lgdt early_gdt_descr
lidt idt_descr
ljmp $(__KERNEL_CS),$1f
1: movl $(__KERNEL_DS),%eax # reload all the segment registers
movl %eax,%ss # after changing gdt.
+ movl %eax,%fs # gets reset once there's real percpu
movl $(__USER_DS),%eax # DS/ES contains default USER segment
movl %eax,%ds
@@ -331,9 +331,6 @@ 1: movl $(__KERNEL_DS),%eax # reload all
xorl %eax,%eax # Clear GS and LDT
movl %eax,%gs
lldt %ax
-
- movl $(__KERNEL_PDA),%eax
- mov %eax,%fs
cld # gcc2 wants the direction flag cleared at all times
pushl $0 # fake return address for unwinder
@@ -341,7 +338,11 @@ 1: movl $(__KERNEL_DS),%eax # reload all
movb ready, %cl
movb $1, ready
cmpb $0,%cl # the first CPU calls start_kernel
- jne initialize_secondary # all other CPUs call initialize_secondary
+ je 1f
+ movl $(__KERNEL_PERCPU), %eax
+ movl %eax,%fs # set this cpu's percpu
+ jmp initialize_secondary # all other CPUs call initialize_secondary
+1:
#endif /* CONFIG_SMP */
jmp start_kernel
@@ -362,23 +363,6 @@ check_x87:
ALIGN
1: movb $1,X86_HARD_MATH
.byte 0xDB,0xE4 /* fsetpm for 287, ignored by 387 */
- ret
-
-/*
- * Point the GDT at this CPU's PDA. On boot this will be
- * cpu_gdt_table and boot_pda; for secondary CPUs, these will be
- * that CPU's GDT and PDA.
- */
-ENTRY(setup_pda)
- /* get the PDA pointer */
- movl start_pda, %eax
-
- /* slot the PDA address into the GDT */
- mov early_gdt_descr+2, %ecx
- mov %ax, (__KERNEL_PDA+0+2)(%ecx) /* base & 0x0000ffff */
- shr $16, %eax
- mov %al, (__KERNEL_PDA+4+0)(%ecx) /* base & 0x00ff0000 */
- mov %ah, (__KERNEL_PDA+4+3)(%ecx) /* base & 0xff000000 */
ret
/*
@@ -553,9 +537,6 @@ ENTRY(empty_zero_page)
* This starts the data section.
*/
.data
-ENTRY(start_pda)
- .long boot_pda
-
ENTRY(stack_start)
.long init_thread_union+THREAD_SIZE
.long __BOOT_DS
==================================================================---
a/arch/i386/kernel/i386_ksyms.c
+++ b/arch/i386/kernel/i386_ksyms.c
@@ -28,5 +28,3 @@ EXPORT_SYMBOL(__read_lock_failed);
#endif
EXPORT_SYMBOL(csum_partial);
-
-EXPORT_SYMBOL(_proxy_pda);
==================================================================---
a/arch/i386/kernel/irq.c
+++ b/arch/i386/kernel/irq.c
@@ -23,6 +23,9 @@
DEFINE_PER_CPU(irq_cpustat_t, irq_stat) ____cacheline_internodealigned_in_smp;
EXPORT_PER_CPU_SYMBOL(irq_stat);
+
+DEFINE_PER_CPU(struct pt_regs *, irq_regs);
+EXPORT_PER_CPU_SYMBOL(irq_regs);
/*
* 'what should we do if we get a hw irq event on an illegal vector'.
==================================================================---
a/arch/i386/kernel/process.c
+++ b/arch/i386/kernel/process.c
@@ -39,6 +39,7 @@
#include <linux/random.h>
#include <linux/personality.h>
#include <linux/tick.h>
+#include <linux/percpu.h>
#include <asm/uaccess.h>
#include <asm/pgtable.h>
@@ -57,7 +58,6 @@
#include <asm/tlbflush.h>
#include <asm/cpu.h>
-#include <asm/pda.h>
asmlinkage void ret_from_fork(void) __asm__("ret_from_fork");
@@ -65,6 +65,12 @@ static int hlt_counter;
unsigned long boot_option_idle_override = 0;
EXPORT_SYMBOL(boot_option_idle_override);
+
+DEFINE_PER_CPU(struct task_struct *, current_task) = &init_task;
+EXPORT_PER_CPU_SYMBOL(current_task);
+
+DEFINE_PER_CPU(int, cpu_number);
+EXPORT_PER_CPU_SYMBOL(cpu_number);
/*
* Return saved PC of a blocked thread.
@@ -343,7 +349,7 @@ int kernel_thread(int (*fn)(void *), voi
regs.xds = __USER_DS;
regs.xes = __USER_DS;
- regs.xfs = __KERNEL_PDA;
+ regs.xfs = __KERNEL_PERCPU;
regs.orig_eax = -1;
regs.eip = (unsigned long) kernel_thread_helper;
regs.xcs = __KERNEL_CS | get_kernel_rpl();
@@ -712,7 +718,7 @@ struct task_struct fastcall * __switch_t
if (prev->gs | next->gs)
loadsegment(gs, next->gs);
- write_pda(pcurrent, next_p);
+ x86_write_percpu(current_task, next_p);
return prev_p;
}
==================================================================---
a/arch/i386/kernel/smpboot.c
+++ b/arch/i386/kernel/smpboot.c
@@ -53,7 +53,6 @@
#include <asm/desc.h>
#include <asm/arch_hooks.h>
#include <asm/nmi.h>
-#include <asm/pda.h>
#include <mach_apic.h>
#include <mach_wakecpu.h>
@@ -98,6 +97,9 @@ EXPORT_SYMBOL(x86_cpu_to_apicid);
EXPORT_SYMBOL(x86_cpu_to_apicid);
u8 apicid_2_node[MAX_APICID];
+
+DEFINE_PER_CPU(unsigned long, this_cpu_off);
+EXPORT_PER_CPU_SYMBOL(this_cpu_off);
/*
* Trampoline 80x86 program as an array.
@@ -456,7 +458,6 @@ extern struct {
void * esp;
unsigned short ss;
} stack_start;
-extern struct i386_pda *start_pda;
#ifdef CONFIG_NUMA
@@ -784,20 +785,17 @@ static inline struct task_struct * alloc
/* Initialize the CPU's GDT. This is either the boot CPU doing itself
(still using the master per-cpu area), or a CPU doing it for a
secondary which will soon come up. */
-static __cpuinit void init_gdt(int cpu, struct task_struct *idle)
+static __cpuinit void init_gdt(int cpu)
{
struct desc_struct *gdt = get_cpu_gdt_table(cpu);
- struct i386_pda *pda = &per_cpu(_cpu_pda, cpu);
-
- pack_descriptor((u32 *)&gdt[GDT_ENTRY_PDA].a,
- (u32 *)&gdt[GDT_ENTRY_PDA].b,
- (unsigned long)pda, sizeof(*pda) - 1,
- 0x80 | DESCTYPE_S | 0x2, 0); /* present read-write data segment */
-
- memset(pda, 0, sizeof(*pda));
- pda->_pda = pda;
- pda->cpu_number = cpu;
- pda->pcurrent = idle;
+
+ pack_descriptor((u32 *)&gdt[GDT_ENTRY_PERCPU].a,
+ (u32 *)&gdt[GDT_ENTRY_PERCPU].b,
+ __per_cpu_offset[cpu], 0xFFFFF,
+ 0x80 | DESCTYPE_S | 0x2, 0x8);
+
+ per_cpu(this_cpu_off, cpu) = __per_cpu_offset[cpu];
+ per_cpu(cpu_number, cpu) = cpu;
}
/* Defined in head.S */
@@ -824,9 +822,9 @@ static int __cpuinit do_boot_cpu(int api
if (IS_ERR(idle))
panic("failed fork for CPU %d", cpu);
- init_gdt(cpu, idle);
+ init_gdt(cpu);
+ per_cpu(current_task, cpu) = idle;
early_gdt_descr.address = (unsigned long)get_cpu_gdt_table(cpu);
- start_pda = cpu_pda(cpu);
idle->thread.eip = (unsigned long) start_secondary;
/* start_eip had better be page-aligned! */
@@ -1188,14 +1186,14 @@ static inline void switch_to_new_gdt(voi
gdt_descr.address = (long)get_cpu_gdt_table(smp_processor_id());
gdt_descr.size = GDT_SIZE - 1;
load_gdt(&gdt_descr);
- asm volatile ("mov %0, %%fs" : : "r" (__KERNEL_PDA) :
"memory");
+ asm("mov %0, %%fs" : : "r" (__KERNEL_PERCPU) :
"memory");
}
void __init native_smp_prepare_boot_cpu(void)
{
unsigned int cpu = smp_processor_id();
- init_gdt(cpu, current);
+ init_gdt(cpu);
switch_to_new_gdt();
cpu_set(cpu, cpu_online_map);
==================================================================---
a/arch/i386/kernel/vmi.c
+++ b/arch/i386/kernel/vmi.c
@@ -524,8 +524,6 @@ static void vmi_pmd_clear(pmd_t *pmd)
#endif
#ifdef CONFIG_SMP
-extern void setup_pda(void);
-
static void __devinit
vmi_startup_ipi_hook(int phys_apicid, unsigned long start_eip,
unsigned long start_esp)
@@ -550,12 +548,10 @@ vmi_startup_ipi_hook(int phys_apicid, un
ap.ds = __USER_DS;
ap.es = __USER_DS;
- ap.fs = __KERNEL_PDA;
+ ap.fs = __KERNEL_PERCPU;
ap.gs = 0;
ap.eflags = 0;
-
- setup_pda();
#ifdef CONFIG_X86_PAE
/* efer should match BSP efer. */
==================================================================---
a/arch/i386/kernel/vmlinux.lds.S
+++ b/arch/i386/kernel/vmlinux.lds.S
@@ -26,7 +26,6 @@ OUTPUT_ARCH(i386)
OUTPUT_ARCH(i386)
ENTRY(phys_startup_32)
jiffies = jiffies_64;
-_proxy_pda = 0;
PHDRS {
text PT_LOAD FLAGS(5); /* R_E */
==================================================================---
a/include/asm-i386/current.h
+++ b/include/asm-i386/current.h
@@ -1,14 +1,15 @@
#ifndef _I386_CURRENT_H
#define _I386_CURRENT_H
-#include <asm/pda.h>
#include <linux/compiler.h>
+#include <asm/percpu.h>
struct task_struct;
+DECLARE_PER_CPU(struct task_struct *, current_task);
static __always_inline struct task_struct *get_current(void)
{
- return read_pda(pcurrent);
+ return x86_read_percpu(current_task);
}
#define current get_current()
==================================================================---
a/include/asm-i386/irq_regs.h
+++ b/include/asm-i386/irq_regs.h
@@ -1,25 +1,27 @@
/*
* Per-cpu current frame pointer - the location of the last exception frame on
- * the stack, stored in the PDA.
+ * the stack, stored in the per-cpu area.
*
* Jeremy Fitzhardinge <jeremy@goop.org>
*/
#ifndef _ASM_I386_IRQ_REGS_H
#define _ASM_I386_IRQ_REGS_H
-#include <asm/pda.h>
+#include <asm/percpu.h>
+
+DECLARE_PER_CPU(struct pt_regs *, irq_regs);
static inline struct pt_regs *get_irq_regs(void)
{
- return read_pda(irq_regs);
+ return x86_read_percpu(irq_regs);
}
static inline struct pt_regs *set_irq_regs(struct pt_regs *new_regs)
{
struct pt_regs *old_regs;
- old_regs = read_pda(irq_regs);
- write_pda(irq_regs, new_regs);
+ old_regs = get_irq_regs();
+ x86_write_percpu(irq_regs, new_regs);
return old_regs;
}
==================================================================---
a/include/asm-i386/pda.h
+++ /dev/null
@@ -1,99 +0,0 @@
-/*
- Per-processor Data Areas
- Jeremy Fitzhardinge <jeremy@goop.org> 2006
- Based on asm-x86_64/pda.h by Andi Kleen.
- */
-#ifndef _I386_PDA_H
-#define _I386_PDA_H
-
-#include <linux/stddef.h>
-#include <linux/types.h>
-#include <asm/percpu.h>
-
-struct i386_pda
-{
- struct i386_pda *_pda; /* pointer to self */
-
- int cpu_number;
- struct task_struct *pcurrent; /* current process */
- struct pt_regs *irq_regs;
-};
-
-DECLARE_PER_CPU(struct i386_pda, _cpu_pda);
-#define cpu_pda(i) (&per_cpu(_cpu_pda, (i)))
-#define pda_offset(field) offsetof(struct i386_pda, field)
-
-extern void __bad_pda_field(void);
-
-/* This variable is never instantiated. It is only used as a stand-in
- for the real per-cpu PDA memory, so that gcc can understand what
- memory operations the inline asms() below are performing. This
- eliminates the need to make the asms volatile or have memory
- clobbers, so gcc can readily analyse them. */
-extern struct i386_pda _proxy_pda;
-
-#define pda_to_op(op,field,val) \
- do { \
- typedef typeof(_proxy_pda.field) T__; \
- if (0) { T__ tmp__; tmp__ = (val); } \
- switch (sizeof(_proxy_pda.field)) { \
- case 1: \
- asm(op "b %1,%%fs:%c2" \
- : "+m" (_proxy_pda.field) \
- :"ri" ((T__)val), \
- "i"(pda_offset(field))); \
- break; \
- case 2: \
- asm(op "w %1,%%fs:%c2" \
- : "+m" (_proxy_pda.field) \
- :"ri" ((T__)val), \
- "i"(pda_offset(field))); \
- break; \
- case 4: \
- asm(op "l %1,%%fs:%c2" \
- : "+m" (_proxy_pda.field) \
- :"ri" ((T__)val), \
- "i"(pda_offset(field))); \
- break; \
- default: __bad_pda_field(); \
- } \
- } while (0)
-
-#define pda_from_op(op,field) \
- ({ \
- typeof(_proxy_pda.field) ret__; \
- switch (sizeof(_proxy_pda.field)) { \
- case 1: \
- asm(op "b %%fs:%c1,%0" \
- : "=r" (ret__) \
- : "i" (pda_offset(field)), \
- "m" (_proxy_pda.field)); \
- break; \
- case 2: \
- asm(op "w %%fs:%c1,%0" \
- : "=r" (ret__) \
- : "i" (pda_offset(field)), \
- "m" (_proxy_pda.field)); \
- break; \
- case 4: \
- asm(op "l %%fs:%c1,%0" \
- : "=r" (ret__) \
- : "i" (pda_offset(field)), \
- "m" (_proxy_pda.field)); \
- break; \
- default: __bad_pda_field(); \
- } \
- ret__; })
-
-/* Return a pointer to a pda field */
-#define pda_addr(field) \
- ((typeof(_proxy_pda.field) *)((unsigned char *)read_pda(_pda) + \
- pda_offset(field)))
-
-#define read_pda(field) pda_from_op("mov",field)
-#define write_pda(field,val) pda_to_op("mov",field,val)
-#define add_pda(field,val) pda_to_op("add",field,val)
-#define sub_pda(field,val) pda_to_op("sub",field,val)
-#define or_pda(field,val) pda_to_op("or",field,val)
-
-#endif /* _I386_PDA_H */
==================================================================---
a/include/asm-i386/percpu.h
+++ b/include/asm-i386/percpu.h
@@ -1,9 +1,30 @@
#ifndef __ARCH_I386_PERCPU__
#define __ARCH_I386_PERCPU__
-#ifndef __ASSEMBLY__
-#include <asm-generic/percpu.h>
-#else
+#ifdef __ASSEMBLY__
+
+/*
+ * PER_CPU finds an address of a per-cpu variable.
+ *
+ * Args:
+ * var - variable name
+ * reg - 32bit register
+ *
+ * The resulting address is stored in the "reg" argument.
+ *
+ * Example:
+ * PER_CPU(cpu_gdt_descr, %ebx)
+ */
+#ifdef CONFIG_SMP
+#define PER_CPU(var, reg) \
+ movl %fs:per_cpu__this_cpu_off, reg; \
+ addl $per_cpu__##var, reg
+#else /* ! SMP */
+#define PER_CPU(var, reg) \
+ movl $per_cpu__##var, reg;
+#endif /* SMP */
+
+#else /* ...!ASSEMBLY */
/*
* PER_CPU finds an address of a per-cpu variable.
@@ -18,14 +39,107 @@
* PER_CPU(cpu_gdt_descr, %ebx)
*/
#ifdef CONFIG_SMP
-#define PER_CPU(var, cpu) \
- movl __per_cpu_offset(,cpu,4), cpu; \
- addl $per_cpu__##var, cpu;
-#else /* ! SMP */
-#define PER_CPU(var, cpu) \
- movl $per_cpu__##var, cpu;
+/* Same as generic implementation except for optimized local access. */
+#define __GENERIC_PER_CPU
+
+/* This is used for other cpus to find our section. */
+extern unsigned long __per_cpu_offset[];
+
+/* Separate out the type, so (int[3], foo) works. */
+#define DECLARE_PER_CPU(type, name) extern __typeof__(type) per_cpu__##name
+#define DEFINE_PER_CPU(type, name) \
+ __attribute__((__section__(".data.percpu"))) __typeof__(type)
per_cpu__##name
+
+/* We can use this directly for local CPU (faster). */
+DECLARE_PER_CPU(unsigned long, this_cpu_off);
+
+/* var is in discarded region: offset to particular copy we want */
+#define per_cpu(var, cpu) (*({ \
+ extern int simple_indentifier_##var(void); \
+ RELOC_HIDE(&per_cpu__##var, __per_cpu_offset[cpu]); }))
+
+#define __raw_get_cpu_var(var) (*({ \
+ extern int simple_indentifier_##var(void); \
+ RELOC_HIDE(&per_cpu__##var, x86_read_percpu(this_cpu_off)); \
+}))
+
+#define __get_cpu_var(var) __raw_get_cpu_var(var)
+
+/* A macro to avoid #include hell... */
+#define percpu_modcopy(pcpudst, src, size) \
+do { \
+ unsigned int __i; \
+ for_each_possible_cpu(__i) \
+ memcpy((pcpudst)+__per_cpu_offset[__i], \
+ (src), (size)); \
+} while (0)
+
+#define EXPORT_PER_CPU_SYMBOL(var) EXPORT_SYMBOL(per_cpu__##var)
+#define EXPORT_PER_CPU_SYMBOL_GPL(var) EXPORT_SYMBOL_GPL(per_cpu__##var)
+
+/* fs segment starts at (positive) offset == __per_cpu_offset[cpu] */
+#define __percpu_seg "%%fs:"
+#else /* !SMP */
+#include <asm-generic/percpu.h>
+#define __percpu_seg ""
#endif /* SMP */
+/* For arch-specific code, we can use direct single-insn ops (they
+ * don't give an lvalue though). */
+extern void __bad_percpu_size(void);
+
+#define percpu_to_op(op,var,val) \
+ do { \
+ typedef typeof(var) T__; \
+ if (0) { T__ tmp__; tmp__ = (val); } \
+ switch (sizeof(var)) { \
+ case 1: \
+ asm(op "b %1,"__percpu_seg"%0" \
+ : "+m" (var) \
+ :"ri" ((T__)val)); \
+ break; \
+ case 2: \
+ asm(op "w %1,"__percpu_seg"%0" \
+ : "+m" (var) \
+ :"ri" ((T__)val)); \
+ break; \
+ case 4: \
+ asm(op "l %1,"__percpu_seg"%0" \
+ : "+m" (var) \
+ :"ri" ((T__)val)); \
+ break; \
+ default: __bad_percpu_size(); \
+ } \
+ } while (0)
+
+#define percpu_from_op(op,var) \
+ ({ \
+ typeof(var) ret__; \
+ switch (sizeof(var)) { \
+ case 1: \
+ asm(op "b "__percpu_seg"%1,%0" \
+ : "=r" (ret__) \
+ : "m" (var)); \
+ break; \
+ case 2: \
+ asm(op "w "__percpu_seg"%1,%0" \
+ : "=r" (ret__) \
+ : "m" (var)); \
+ break; \
+ case 4: \
+ asm(op "l "__percpu_seg"%1,%0" \
+ : "=r" (ret__) \
+ : "m" (var)); \
+ break; \
+ default: __bad_percpu_size(); \
+ } \
+ ret__; })
+
+#define x86_read_percpu(var) percpu_from_op("mov", per_cpu__##var)
+#define x86_write_percpu(var,val) percpu_to_op("mov", per_cpu__##var,
val)
+#define x86_add_percpu(var,val) percpu_to_op("add", per_cpu__##var,
val)
+#define x86_sub_percpu(var,val) percpu_to_op("sub", per_cpu__##var,
val)
+#define x86_or_percpu(var,val) percpu_to_op("or", per_cpu__##var,
val)
#endif /* !__ASSEMBLY__ */
#endif /* __ARCH_I386_PERCPU__ */
==================================================================---
a/include/asm-i386/processor.h
+++ b/include/asm-i386/processor.h
@@ -425,7 +425,7 @@ struct thread_struct {
.vm86_info = NULL, \
.sysenter_cs = __KERNEL_CS, \
.io_bitmap_ptr = NULL, \
- .fs = __KERNEL_PDA, \
+ .fs = __KERNEL_PERCPU, \
}
/*
==================================================================---
a/include/asm-i386/segment.h
+++ b/include/asm-i386/segment.h
@@ -39,7 +39,7 @@
* 25 - APM BIOS support
*
* 26 - ESPFIX small SS
- * 27 - PDA [ per-cpu private data area ]
+ * 27 - per-cpu [ offset to per-cpu data area ]
* 28 - unused
* 29 - unused
* 30 - unused
@@ -74,8 +74,8 @@
#define GDT_ENTRY_ESPFIX_SS (GDT_ENTRY_KERNEL_BASE + 14)
#define __ESPFIX_SS (GDT_ENTRY_ESPFIX_SS * 8)
-#define GDT_ENTRY_PDA (GDT_ENTRY_KERNEL_BASE + 15)
-#define __KERNEL_PDA (GDT_ENTRY_PDA * 8)
+#define GDT_ENTRY_PERCPU (GDT_ENTRY_KERNEL_BASE + 15)
+#define __KERNEL_PERCPU (GDT_ENTRY_PERCPU * 8)
#define GDT_ENTRY_DOUBLEFAULT_TSS 31
==================================================================---
a/include/asm-i386/smp.h
+++ b/include/asm-i386/smp.h
@@ -8,7 +8,6 @@
#include <linux/kernel.h>
#include <linux/threads.h>
#include <linux/cpumask.h>
-#include <asm/pda.h>
#endif
#if defined(CONFIG_X86_LOCAL_APIC) && !defined(__ASSEMBLY__)
@@ -112,7 +111,8 @@ do { } while (0)
* from the initial startup. We map APIC_BASE very early in page_setup(),
* so this is correct in the x86 case.
*/
-#define raw_smp_processor_id() (read_pda(cpu_number))
+DECLARE_PER_CPU(int, cpu_number);
+#define raw_smp_processor_id() (x86_read_percpu(cpu_number))
extern cpumask_t cpu_callout_map;
extern cpumask_t cpu_callin_map;
--
Jeremy Fitzhardinge
2007-Apr-18 13:02 UTC
[patch 2/6] Allow percpu variables to be page-aligned
Let's allow page-alignment in general for per-cpu data (wanted by Xen, and
Ingo suggested KVM as well).
Because larger alignments can use more room, we increase the max per-cpu
memory to 64k rather than 32k: it's getting a little tight.
Signed-off-by: Rusty Russell <rusty@rustcorp.com.au>
Acked-by: Ingo Molnar <mingo@elte.hu>
Cc: Andi Kleen <ak@suse.de>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
arch/alpha/kernel/vmlinux.lds.S | 2 +-
arch/arm/kernel/vmlinux.lds.S | 2 +-
arch/cris/arch-v32/vmlinux.lds.S | 1 +
arch/frv/kernel/vmlinux.lds.S | 1 +
arch/i386/kernel/vmlinux.lds.S | 2 +-
arch/m32r/kernel/vmlinux.lds.S | 2 +-
arch/mips/kernel/vmlinux.lds.S | 2 +-
arch/parisc/kernel/vmlinux.lds.S | 2 +-
arch/powerpc/kernel/setup_64.c | 4 ++--
arch/powerpc/kernel/vmlinux.lds.S | 6 +-----
arch/ppc/kernel/vmlinux.lds.S | 2 +-
arch/s390/kernel/vmlinux.lds.S | 2 +-
arch/sh/kernel/vmlinux.lds.S | 2 +-
arch/sh64/kernel/vmlinux.lds.S | 2 +-
arch/sparc/kernel/vmlinux.lds.S | 2 +-
arch/sparc64/kernel/smp.c | 6 +++---
arch/x86_64/kernel/setup64.c | 4 ++--
arch/x86_64/kernel/vmlinux.lds.S | 2 +-
arch/xtensa/kernel/vmlinux.lds.S | 2 +-
init/main.c | 4 ++--
kernel/module.c | 8 ++++----
21 files changed, 29 insertions(+), 31 deletions(-)
==================================================================---
a/arch/alpha/kernel/vmlinux.lds.S
+++ b/arch/alpha/kernel/vmlinux.lds.S
@@ -69,7 +69,7 @@ SECTIONS
. = ALIGN(8);
SECURITY_INIT
- . = ALIGN(64);
+ . = ALIGN(8192);
__per_cpu_start = .;
.data.percpu : { *(.data.percpu) }
__per_cpu_end = .;
==================================================================---
a/arch/arm/kernel/vmlinux.lds.S
+++ b/arch/arm/kernel/vmlinux.lds.S
@@ -59,7 +59,7 @@ SECTIONS
usr/built-in.o(.init.ramfs)
__initramfs_end = .;
#endif
- . = ALIGN(64);
+ . = ALIGN(4096);
__per_cpu_start = .;
*(.data.percpu)
__per_cpu_end = .;
==================================================================---
a/arch/cris/arch-v32/vmlinux.lds.S
+++ b/arch/cris/arch-v32/vmlinux.lds.S
@@ -91,6 +91,7 @@ SECTIONS
}
SECURITY_INIT
+ . = ALIGN (8192);
__per_cpu_start = .;
.data.percpu : { *(.data.percpu) }
__per_cpu_end = .;
==================================================================---
a/arch/frv/kernel/vmlinux.lds.S
+++ b/arch/frv/kernel/vmlinux.lds.S
@@ -57,6 +57,7 @@ SECTIONS
__alt_instructions_end = .;
.altinstr_replacement : { *(.altinstr_replacement) }
+ . = ALIGN(4096);
__per_cpu_start = .;
.data.percpu : { *(.data.percpu) }
__per_cpu_end = .;
==================================================================---
a/arch/i386/kernel/vmlinux.lds.S
+++ b/arch/i386/kernel/vmlinux.lds.S
@@ -194,7 +194,7 @@ SECTIONS
__initramfs_end = .;
}
#endif
- . = ALIGN(L1_CACHE_BYTES);
+ . = ALIGN(4096);
.data.percpu : AT(ADDR(.data.percpu) - LOAD_OFFSET) {
__per_cpu_start = .;
*(.data.percpu)
==================================================================---
a/arch/m32r/kernel/vmlinux.lds.S
+++ b/arch/m32r/kernel/vmlinux.lds.S
@@ -110,7 +110,7 @@ SECTIONS
__initramfs_end = .;
#endif
- . = ALIGN(32);
+ . = ALIGN(4096);
__per_cpu_start = .;
.data.percpu : { *(.data.percpu) }
__per_cpu_end = .;
==================================================================---
a/arch/mips/kernel/vmlinux.lds.S
+++ b/arch/mips/kernel/vmlinux.lds.S
@@ -119,7 +119,7 @@ SECTIONS
.init.ramfs : { *(.init.ramfs) }
__initramfs_end = .;
#endif
- . = ALIGN(32);
+ . = ALIGN(_PAGE_SIZE);
__per_cpu_start = .;
.data.percpu : { *(.data.percpu) }
__per_cpu_end = .;
==================================================================---
a/arch/parisc/kernel/vmlinux.lds.S
+++ b/arch/parisc/kernel/vmlinux.lds.S
@@ -181,7 +181,7 @@ SECTIONS
.init.ramfs : { *(.init.ramfs) }
__initramfs_end = .;
#endif
- . = ALIGN(32);
+ . = ALIGN(ASM_PAGE_SIZE);
__per_cpu_start = .;
.data.percpu : { *(.data.percpu) }
__per_cpu_end = .;
==================================================================---
a/arch/powerpc/kernel/setup_64.c
+++ b/arch/powerpc/kernel/setup_64.c
@@ -583,14 +583,14 @@ void __init setup_per_cpu_areas(void)
char *ptr;
/* Copy section for each CPU (we discard the original) */
- size = ALIGN(__per_cpu_end - __per_cpu_start, SMP_CACHE_BYTES);
+ size = ALIGN(__per_cpu_end - __per_cpu_start, PAGE_SIZE);
#ifdef CONFIG_MODULES
if (size < PERCPU_ENOUGH_ROOM)
size = PERCPU_ENOUGH_ROOM;
#endif
for_each_possible_cpu(i) {
- ptr = alloc_bootmem_node(NODE_DATA(cpu_to_node(i)), size);
+ ptr = alloc_bootmem_pages_node(NODE_DATA(cpu_to_node(i)), size);
if (!ptr)
panic("Cannot allocate cpu data for CPU %d\n", i);
==================================================================---
a/arch/powerpc/kernel/vmlinux.lds.S
+++ b/arch/powerpc/kernel/vmlinux.lds.S
@@ -139,11 +139,7 @@ SECTIONS
__initramfs_end = .;
}
#endif
-#ifdef CONFIG_PPC32
- . = ALIGN(32);
-#else
- . = ALIGN(128);
-#endif
+ . = ALIGN(PAGE_SIZE);
.data.percpu : {
__per_cpu_start = .;
*(.data.percpu)
==================================================================---
a/arch/ppc/kernel/vmlinux.lds.S
+++ b/arch/ppc/kernel/vmlinux.lds.S
@@ -130,7 +130,7 @@ SECTIONS
__ftr_fixup : { *(__ftr_fixup) }
__stop___ftr_fixup = .;
- . = ALIGN(32);
+ . = ALIGN(4096);
__per_cpu_start = .;
.data.percpu : { *(.data.percpu) }
__per_cpu_end = .;
==================================================================---
a/arch/s390/kernel/vmlinux.lds.S
+++ b/arch/s390/kernel/vmlinux.lds.S
@@ -99,7 +99,7 @@ SECTIONS
. = ALIGN(2);
__initramfs_end = .;
#endif
- . = ALIGN(256);
+ . = ALIGN(4096);
__per_cpu_start = .;
.data.percpu : { *(.data.percpu) }
__per_cpu_end = .;
==================================================================---
a/arch/sh/kernel/vmlinux.lds.S
+++ b/arch/sh/kernel/vmlinux.lds.S
@@ -54,7 +54,7 @@ SECTIONS
. = ALIGN(PAGE_SIZE);
.data.page_aligned : { *(.data.page_aligned) }
- . = ALIGN(L1_CACHE_BYTES);
+ . = ALIGN(PAGE_SIZE);
__per_cpu_start = .;
.data.percpu : { *(.data.percpu) }
__per_cpu_end = .;
==================================================================---
a/arch/sh64/kernel/vmlinux.lds.S
+++ b/arch/sh64/kernel/vmlinux.lds.S
@@ -85,7 +85,7 @@ SECTIONS
. = ALIGN(PAGE_SIZE);
.data.page_aligned : C_PHYS(.data.page_aligned) { *(.data.page_aligned) }
- . = ALIGN(L1_CACHE_BYTES);
+ . = ALIGN(PAGE_SIZE);
__per_cpu_start = .;
.data.percpu : C_PHYS(.data.percpu) { *(.data.percpu) }
__per_cpu_end = . ;
==================================================================---
a/arch/sparc/kernel/vmlinux.lds.S
+++ b/arch/sparc/kernel/vmlinux.lds.S
@@ -65,7 +65,7 @@ SECTIONS
__initramfs_end = .;
#endif
- . = ALIGN(32);
+ . = ALIGN(4096);
__per_cpu_start = .;
.data.percpu : { *(.data.percpu) }
__per_cpu_end = .;
==================================================================---
a/arch/sparc64/kernel/smp.c
+++ b/arch/sparc64/kernel/smp.c
@@ -1449,11 +1449,11 @@ void __init setup_per_cpu_areas(void)
/* Copy section for each CPU (we discard the original) */
goal = PERCPU_ENOUGH_ROOM;
- __per_cpu_shift = 0;
- for (size = 1UL; size < goal; size <<= 1UL)
+ __per_cpu_shift = PAGE_SHIFT;
+ for (size = PAGE_SIZE; size < goal; size <<= 1UL)
__per_cpu_shift++;
- ptr = alloc_bootmem(size * NR_CPUS);
+ ptr = alloc_bootmem_pages(size * NR_CPUS);
__per_cpu_base = ptr - __per_cpu_start;
==================================================================---
a/arch/x86_64/kernel/setup64.c
+++ b/arch/x86_64/kernel/setup64.c
@@ -103,9 +103,9 @@ void __init setup_per_cpu_areas(void)
if (!NODE_DATA(cpu_to_node(i))) {
printk("cpu with no node %d, num_online_nodes %d\n",
i, num_online_nodes());
- ptr = alloc_bootmem(size);
+ ptr = alloc_bootmem_pages(size);
} else {
- ptr = alloc_bootmem_node(NODE_DATA(cpu_to_node(i)), size);
+ ptr = alloc_bootmem_pages_node(NODE_DATA(cpu_to_node(i)), size);
}
if (!ptr)
panic("Cannot allocate cpu data for CPU %d\n", i);
==================================================================---
a/arch/x86_64/kernel/vmlinux.lds.S
+++ b/arch/x86_64/kernel/vmlinux.lds.S
@@ -194,7 +194,7 @@ SECTIONS
__initramfs_end = .;
#endif
- . = ALIGN(CONFIG_X86_L1_CACHE_BYTES);
+ . = ALIGN(4096);
__per_cpu_start = .;
.data.percpu : AT(ADDR(.data.percpu) - LOAD_OFFSET) { *(.data.percpu) }
__per_cpu_end = .;
==================================================================---
a/arch/xtensa/kernel/vmlinux.lds.S
+++ b/arch/xtensa/kernel/vmlinux.lds.S
@@ -198,7 +198,7 @@ SECTIONS
__ftr_fixup : { *(__ftr_fixup) }
__stop___ftr_fixup = .;
- . = ALIGN(32);
+ . = ALIGN(4096);
__per_cpu_start = .;
.data.percpu : { *(.data.percpu) }
__per_cpu_end = .;
==================================================================---
a/init/main.c
+++ b/init/main.c
@@ -370,8 +370,8 @@ static void __init setup_per_cpu_areas(v
/* Copy section for each CPU (we discard the original) */
- size = ALIGN(PERCPU_ENOUGH_ROOM, SMP_CACHE_BYTES);
- ptr = alloc_bootmem(size * nr_possible_cpus);
+ size = ALIGN(PERCPU_ENOUGH_ROOM, PAGE_SIZE);
+ ptr = alloc_bootmem_pages(size * nr_possible_cpus);
for_each_possible_cpu(i) {
__per_cpu_offset[i] = ptr - __per_cpu_start;
==================================================================---
a/kernel/module.c
+++ b/kernel/module.c
@@ -346,10 +346,10 @@ static void *percpu_modalloc(unsigned lo
unsigned int i;
void *ptr;
- if (align > SMP_CACHE_BYTES) {
- printk(KERN_WARNING "%s: per-cpu alignment %li > %i\n",
- name, align, SMP_CACHE_BYTES);
- align = SMP_CACHE_BYTES;
+ if (align > PAGE_SIZE) {
+ printk(KERN_WARNING "%s: per-cpu alignment %li > %li\n",
+ name, align, PAGE_SIZE);
+ align = PAGE_SIZE;
}
ptr = __per_cpu_start;
--
Hi Andi, This is a series of patches based on your latest queue (as of the other day, at least). It includes: - the most recent patch to compute the appropriate amount of percpu space to allocate, using a separate reservation for modules where needed. - make the percpu sections page-aligned, so that percpu variables can be page aligned if needed (which is used by gdt_page) - page-align the gdt - remove the pda and convert all pda usages into percpu variables (percpu variables still use the %fs prefix mechanism the pda used) - some improvements to asm-i386/percpu.h to make asm access to percpu variables easy - define per_cpu_offset in asm-i386/percpu.h, to match asm-generic/ Thanks, J --
Jeremy Fitzhardinge wrote:> This is a series of patches based on your latest queue (as of the > other day, at least). >BTW, the From: line attributions got dropped from a few of these. These: Allow percpu variables to be page-aligned Page-align the GDT should be From: Rusty. He did most of the work on the others, but I changed them enough that he shouldn't be saddled with the blame when things break ;) J
> - some improvements to asm-i386/percpu.h to make asm access to percpu > variables easyOne nitpick: I'd really like PER_CPU() renamed to PER_CPU_ADDR(). That's a separate patch, but I think would be far clearer. Thanks, Rusty.
Jeremy Fitzhardinge
2007-Apr-18 13:02 UTC
[patch 5/6] cleanups to help using per-cpu variables from asm
This patch does a few small cleanups: - use PER_CPU_NAME to generate the names of per-cpu variables - use lea to add the per_cpu offset in PER_CPU(), because it doesn't affect condition flags - add PER_CPU_VAR which allows direct access to pre-cpu variables with the %fs: prefix on SMP. Signed-off-by: Jeremy Fitzhardinge <jeremy@xensource.com> Cc: Rusty Russell <rusty@rustcorp.com.au> Cc: Andi Kleen <ak@suse.de> --- include/asm-i386/percpu.h | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) ==================================================================--- a/include/asm-i386/percpu.h +++ b/include/asm-i386/percpu.h @@ -16,12 +16,14 @@ * PER_CPU(cpu_gdt_descr, %ebx) */ #ifdef CONFIG_SMP +#define PER_CPU(var, reg) \ + movl %fs:per_cpu__##this_cpu_off, reg; \ + lea per_cpu__##var(reg), reg +#define PER_CPU_VAR(var) %fs:per_cpu__##var +#else /* ! SMP */ #define PER_CPU(var, reg) \ - movl %fs:per_cpu__this_cpu_off, reg; \ - addl $per_cpu__##var, reg -#else /* ! SMP */ -#define PER_CPU(var, reg) \ - movl $per_cpu__##var, reg; + movl $per_cpu__##var, reg +#define PER_CPU_VAR(var) per_cpu__##var #endif /* SMP */ #else /* ...!ASSEMBLY */ --