Isaku Yamahata
2008-Nov-25 13:03 UTC
[PATCH 0/5] ia64/pv_ops, xen: binary patch optimization
This patch set is for binary patch optimization for paravirt_ops. The binary patch optimization is important on native case because the paravirt_ops overhead can be reduced by converting indirect call into in-place execution or direct call. The first patch imports helper functions which themselves doesn't interesting things. The second patch replaces the indirect function calls with a special call written in gcc extended inline asm and introduces native methods. The third patch introduces binary patch for kernel modules. The forth patch suppress false positive warnings which were caused by the previous patches. The last patch implements xen methods. For convenience the working full source is available from http://people.valinux.co.jp/~yamahata/xen-ia64/for_eagl/linux-2.6-ia64-pv-ops.git/ branch: ia64-pv-ops-2008nov25-xen-ia64-optimized-domu-binary-patch For the status of this patch series http://wiki.xensource.com/xenwiki/XenIA64/UpstreamMerge thanks, Diffstat: arch/ia64/include/asm/intrinsics.h | 6 +- arch/ia64/include/asm/module.h | 6 + arch/ia64/include/asm/paravirt.h | 8 + arch/ia64/include/asm/paravirt_patch.h | 143 +++++++ arch/ia64/include/asm/paravirt_privop.h | 347 ++++++++++++++++- arch/ia64/include/asm/xen/privop.h | 4 + arch/ia64/kernel/Makefile | 3 +- arch/ia64/kernel/efi.c | 1 + arch/ia64/kernel/module.c | 32 ++ arch/ia64/kernel/paravirt.c | 520 ++++++++++++++++++++++++- arch/ia64/kernel/paravirt_patch.c | 509 +++++++++++++++++++++++ arch/ia64/kernel/paravirtentry.S | 101 ++++- arch/ia64/kernel/setup.c | 1 + arch/ia64/kernel/vmlinux.lds.S | 24 ++ arch/ia64/kvm/vtlb.c | 2 + arch/ia64/xen/hypercall.S | 2 + arch/ia64/xen/xen_pv_ops.c | 667 +++++++++++++++++++++++++++++++ 17 files changed, 2351 insertions(+), 25 deletions(-)
Isaku Yamahata
2008-Nov-25 13:03 UTC
[PATCH 1/5] ia64/pv_op/binarypatch: add helper functions to support binary patching for paravirt_ops.
add helper functions to support binary patching for paravirt_ops. Signed-off-by: Isaku Yamahata <yamahata at valinux.co.jp> --- arch/ia64/include/asm/paravirt_patch.h | 143 +++++++++ arch/ia64/kernel/paravirt_patch.c | 509 ++++++++++++++++++++++++++++++++ arch/ia64/kernel/paravirtentry.S | 58 ++++ arch/ia64/kernel/vmlinux.lds.S | 24 ++ 4 files changed, 734 insertions(+), 0 deletions(-) create mode 100644 arch/ia64/include/asm/paravirt_patch.h create mode 100644 arch/ia64/kernel/paravirt_patch.c diff --git a/arch/ia64/include/asm/paravirt_patch.h b/arch/ia64/include/asm/paravirt_patch.h new file mode 100644 index 0000000..23d4fbf --- /dev/null +++ b/arch/ia64/include/asm/paravirt_patch.h @@ -0,0 +1,143 @@ +/****************************************************************************** + * Copyright (c) 2008 Isaku Yamahata <yamahata at valinux co jp> + * VA Linux Systems Japan K.K. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + * + */ + +#ifndef __ASM_PARAVIRT_PATCH_H +#define __ASM_PARAVIRT_PATCH_H + +#ifdef __ASSEMBLY__ + + .section .paravirt_branches, "a" + .previous +#define PARAVIRT_PATCH_SITE_BR(type) \ + { \ + [1:] ; \ + br.cond.sptk.many 2f ; \ + nop.b 0 ; \ + nop.b 0;; ; \ + } ; \ + 2: \ + .xdata8 ".paravirt_branches", 1b, type + +#else + +#include <linux/stringify.h> +#include <asm/intrinsics.h> + +/* for binary patch */ +struct paravirt_patch_site_bundle { + void *sbundle; + void *ebundle; + unsigned long type; +}; + +/* label means the beginning of new bundle */ +#define paravirt_alt_bundle(instr, privop) \ + "\t998:\n" \ + "\t" instr "\n" \ + "\t999:\n" \ + "\t.pushsection .paravirt_bundles, \"a\"\n" \ + "\t.popsection\n" \ + "\t.xdata8 \".paravirt_bundles\", 998b, 999b, " \ + __stringify(privop) "\n" + + +struct paravirt_patch_bundle_elem { + const void *sbundle; + const void *ebundle; + unsigned long type; +}; + + +struct paravirt_patch_site_inst { + unsigned long stag; + unsigned long etag; + unsigned long type; +}; + +#define paravirt_alt_inst(instr, privop) \ + "\t[998:]\n" \ + "\t" instr "\n" \ + "\t[999:]\n" \ + "\t.pushsection .paravirt_insts, \"a\"\n" \ + "\t.popsection\n" \ + "\t.xdata8 \".paravirt_insts\", 998b, 999b, " \ + __stringify(privop) "\n" + +struct paravirt_patch_site_branch { + unsigned long tag; + unsigned long type; +}; + +struct paravirt_patch_branch_target { + const void *entry; + unsigned long type; +}; + +void +__paravirt_patch_apply_branch( + unsigned long tag, unsigned long type, + const struct paravirt_patch_branch_target *entries, + unsigned int nr_entries); + +void +paravirt_patch_reloc_br(unsigned long tag, const void *target); + +void +paravirt_patch_reloc_brl(unsigned long tag, const void *target); + + +#ifdef ASM_SUPPORTED +unsigned long +ia64_native_patch_bundle(void *sbundle, void *ebundle, unsigned long type); + +unsigned long +__paravirt_patch_apply_bundle(void *sbundle, void *ebundle, unsigned long type, + const struct paravirt_patch_bundle_elem *elems, + unsigned long nelems, + const struct paravirt_patch_bundle_elem **found); + +void +paravirt_patch_apply_bundle(const struct paravirt_patch_site_bundle *start, + const struct paravirt_patch_site_bundle *end); + +void +paravirt_patch_apply_inst(const struct paravirt_patch_site_inst *start, + const struct paravirt_patch_site_inst *end); + +void paravirt_patch_apply(void); +#else +#define paravirt_patch_apply_bundle(start, end) do { } while (0) +#define paravirt_patch_apply_inst(start, end) do { } while (0) +#define paravirt_patch_apply() do { } while (0) +#endif + +#endif /* !__ASSEMBLEY__ */ + +#endif /* __ASM_PARAVIRT_PATCH_H */ + +/* + * Local variables: + * mode: C + * c-set-style: "linux" + * c-basic-offset: 8 + * tab-width: 8 + * indent-tabs-mode: t + * End: + */ diff --git a/arch/ia64/kernel/paravirt_patch.c b/arch/ia64/kernel/paravirt_patch.c new file mode 100644 index 0000000..079ee38 --- /dev/null +++ b/arch/ia64/kernel/paravirt_patch.c @@ -0,0 +1,509 @@ +/****************************************************************************** + * linux/arch/ia64/xen/paravirt_patch.c + * + * Copyright (c) 2008 Isaku Yamahata <yamahata at valinux co jp> + * VA Linux Systems Japan K.K. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + * + */ + +#include <linux/init.h> +#include <asm/intrinsics.h> +#include <asm/kprobes.h> +#include <asm/paravirt.h> +#include <asm/paravirt_patch.h> + +/* + * flush_icache_range() can't be used here. + * we are here before cpu_init() which initializes + * ia64_i_cache_stride_shift. flush_icache_range() uses it. + */ +void __init_or_module +paravirt_flush_i_cache_range(const void *instr, unsigned long size) +{ +#ifdef ASM_SUPPORTED +#define paravirt_fc_i(addr) \ + asm volatile ("fc.i %0":: "r"(addr): "memory"); +#else + extern void paravirt_fc_i(unsigned long addr); +#endif + unsigned long i; + + for (i = 0; i < size; i += sizeof(bundle_t)) + paravirt_fc_i(instr + i) +} + +bundle_t* __init_or_module +paravirt_get_bundle(unsigned long tag) +{ + return (bundle_t *)(tag & ~3UL); +} + +unsigned long __init_or_module +paravirt_get_slot(unsigned long tag) +{ + return tag & 3UL; +} + +unsigned long __init_or_module +paravirt_get_num_inst(unsigned long stag, unsigned long etag) +{ + bundle_t *sbundle = paravirt_get_bundle(stag); + unsigned long sslot = paravirt_get_slot(stag); + bundle_t *ebundle = paravirt_get_bundle(etag); + unsigned long eslot = paravirt_get_slot(etag); + + return (ebundle - sbundle) * 3 + eslot - sslot + 1; +} + +unsigned long __init_or_module +paravirt_get_next_tag(unsigned long tag) +{ + unsigned long slot = paravirt_get_slot(tag); + + switch (slot) { + case 0: + case 1: + return tag + 1; + case 2: { + bundle_t *bundle = paravirt_get_bundle(tag); + return (unsigned long)(bundle + 1); + } + default: + BUG(); + } + /* NOTREACHED */ +} + +cmp_inst_t __init_or_module +paravirt_read_slot0(const bundle_t *bundle) +{ + cmp_inst_t inst; + inst.l = bundle->quad0.slot0; + return inst; +} + +cmp_inst_t __init_or_module +paravirt_read_slot1(const bundle_t *bundle) +{ + cmp_inst_t inst; + inst.l = bundle->quad0.slot1_p0 | + ((unsigned long long)bundle->quad1.slot1_p1 << 18UL); + return inst; +} + +cmp_inst_t __init_or_module +paravirt_read_slot2(const bundle_t *bundle) +{ + cmp_inst_t inst; + inst.l = bundle->quad1.slot2; + return inst; +} + +cmp_inst_t __init_or_module +paravirt_read_inst(unsigned long tag) +{ + bundle_t *bundle = paravirt_get_bundle(tag); + unsigned long slot = paravirt_get_slot(tag); + + switch (slot) { + case 0: + return paravirt_read_slot0(bundle); + case 1: + return paravirt_read_slot1(bundle); + case 2: + return paravirt_read_slot2(bundle); + default: + BUG(); + } + /* NOTREACHED */ +} + +void __init_or_module +paravirt_write_slot0(bundle_t *bundle, cmp_inst_t inst) +{ + bundle->quad0.slot0 = inst.l; +} + +void __init_or_module +paravirt_write_slot1(bundle_t *bundle, cmp_inst_t inst) +{ + bundle->quad0.slot1_p0 = inst.l; + bundle->quad1.slot1_p1 = inst.l >> 18UL; +} + +void __init_or_module +paravirt_write_slot2(bundle_t *bundle, cmp_inst_t inst) +{ + bundle->quad1.slot2 = inst.l; +} + +void __init_or_module +paravirt_write_inst(unsigned long tag, cmp_inst_t inst) +{ + bundle_t *bundle = paravirt_get_bundle(tag); + unsigned long slot = paravirt_get_slot(tag); + + switch (slot) { + case 0: + paravirt_write_slot0(bundle, inst); + break; + case 1: + paravirt_write_slot1(bundle, inst); + break; + case 2: + paravirt_write_slot2(bundle, inst); + break; + default: + BUG(); + break; + } + paravirt_flush_i_cache_range(bundle, sizeof(*bundle)); +} + +/* for debug */ +void +paravirt_print_bundle(const bundle_t *bundle) +{ + const unsigned long *quad = (const unsigned long *)bundle; + cmp_inst_t slot0 = paravirt_read_slot0(bundle); + cmp_inst_t slot1 = paravirt_read_slot1(bundle); + cmp_inst_t slot2 = paravirt_read_slot2(bundle); + + printk(KERN_DEBUG + "bundle 0x%p 0x%016lx 0x%016lx\n", bundle, quad[0], quad[1]); + printk(KERN_DEBUG + "bundle template 0x%x\n", + bundle->quad0.template); + printk(KERN_DEBUG + "slot0 0x%lx slot1_p0 0x%lx slot1_p1 0x%lx slot2 0x%lx\n", + (unsigned long)bundle->quad0.slot0, + (unsigned long)bundle->quad0.slot1_p0, + (unsigned long)bundle->quad1.slot1_p1, + (unsigned long)bundle->quad1.slot2); + printk(KERN_DEBUG + "slot0 0x%016llx slot1 0x%016llx slot2 0x%016llx\n", + slot0.l, slot1.l, slot2.l); +} + +static int noreplace_paravirt __init_or_module = 0; + +static int __init setup_noreplace_paravirt(char *str) +{ + noreplace_paravirt = 1; + return 1; +} +__setup("noreplace-paravirt", setup_noreplace_paravirt); + +#ifdef ASM_SUPPORTED +static void __init_or_module +fill_nop_bundle(void *sbundle, void *ebundle) +{ + extern const char paravirt_nop_bundle[]; + extern const unsigned long paravirt_nop_bundle_size; + + void *bundle = sbundle; + + BUG_ON((((unsigned long)sbundle) % sizeof(bundle_t)) != 0); + BUG_ON((((unsigned long)ebundle) % sizeof(bundle_t)) != 0); + + while (bundle < ebundle) { + memcpy(bundle, paravirt_nop_bundle, paravirt_nop_bundle_size); + + bundle += paravirt_nop_bundle_size; + } +} + +/* helper function */ +unsigned long __init_or_module +__paravirt_patch_apply_bundle(void *sbundle, void *ebundle, unsigned long type, + const struct paravirt_patch_bundle_elem *elems, + unsigned long nelems, + const struct paravirt_patch_bundle_elem **found) +{ + unsigned long used = 0; + unsigned long i; + + BUG_ON((((unsigned long)sbundle) % sizeof(bundle_t)) != 0); + BUG_ON((((unsigned long)ebundle) % sizeof(bundle_t)) != 0); + + found = NULL; + for (i = 0; i < nelems; i++) { + const struct paravirt_patch_bundle_elem *p = &elems[i]; + if (p->type == type) { + unsigned long need = p->ebundle - p->sbundle; + unsigned long room = ebundle - sbundle; + + if (found != NULL) + *found = p; + + if (room < need) { + /* no room to replace. skip it */ + printk(KERN_DEBUG + "the space is too small to put " + "bundles. type %ld need %ld room %ld\n", + type, need, room); + break; + } + + used = need; + memcpy(sbundle, p->sbundle, used); + break; + } + } + + return used; +} + +void __init_or_module +paravirt_patch_apply_bundle(const struct paravirt_patch_site_bundle *start, + const struct paravirt_patch_site_bundle *end) +{ + const struct paravirt_patch_site_bundle *p; + + if (noreplace_paravirt) + return; + if (pv_init_ops.patch_bundle == NULL) + return; + + for (p = start; p < end; p++) { + unsigned long used; + + used = (*pv_init_ops.patch_bundle)(p->sbundle, p->ebundle, + p->type); + if (used == 0) + continue; + + fill_nop_bundle(p->sbundle + used, p->ebundle); + paravirt_flush_i_cache_range(p->sbundle, + p->ebundle - p->sbundle); + } + ia64_sync_i(); + ia64_srlz_i(); +} + +/* + * nop.i, nop.m, nop.f instruction are same format. + * but nop.b has differennt format. + * This doesn't support nop.b for now. + */ +static void __init_or_module +fill_nop_inst(unsigned long stag, unsigned long etag) +{ + extern const bundle_t paravirt_nop_mfi_inst_bundle[]; + unsigned long tag; + const cmp_inst_t nop_inst + paravirt_read_slot0(paravirt_nop_mfi_inst_bundle); + + for (tag = stag; tag < etag; tag = paravirt_get_next_tag(tag)) + paravirt_write_inst(tag, nop_inst); +} + +void __init_or_module +paravirt_patch_apply_inst(const struct paravirt_patch_site_inst *start, + const struct paravirt_patch_site_inst *end) +{ + const struct paravirt_patch_site_inst *p; + + if (noreplace_paravirt) + return; + if (pv_init_ops.patch_inst == NULL) + return; + + for (p = start; p < end; p++) { + unsigned long tag; + bundle_t *sbundle; + bundle_t *ebundle; + + tag = (*pv_init_ops.patch_inst)(p->stag, p->etag, p->type); + if (tag == p->stag) + continue; + + fill_nop_inst(tag, p->etag); + sbundle = paravirt_get_bundle(p->stag); + ebundle = paravirt_get_bundle(p->etag) + 1; + paravirt_flush_i_cache_range(sbundle, (ebundle - sbundle) * + sizeof(bundle_t)); + } + ia64_sync_i(); + ia64_srlz_i(); +} +#endif /* ASM_SUPPOTED */ + +/* brl.cond.sptk.many <target64> X3 */ +typedef union inst_x3_op { + cmp_inst_t inst; + struct { + unsigned long qp: 6; + unsigned long btyp: 3; + unsigned long unused: 3; + unsigned long p: 1; + unsigned long imm20b: 20; + unsigned long wh: 2; + unsigned long d: 1; + unsigned long i: 1; + unsigned long opcode: 4; + }; + unsigned long l; +} inst_x3_op_t; + +typedef union inst_x3_imm { + cmp_inst_t inst; + struct { + unsigned long unused: 2; + unsigned long imm39: 39; + }; + unsigned long l; +} inst_x3_imm_t; + +void __init_or_module +paravirt_patch_reloc_brl(unsigned long tag, const void *target) +{ + unsigned long tag_op = paravirt_get_next_tag(tag); + unsigned long tag_imm = tag; + bundle_t *bundle = paravirt_get_bundle(tag); + + cmp_inst_t inst_op = paravirt_read_inst(tag_op); + cmp_inst_t inst_imm = paravirt_read_inst(tag_imm); + + inst_x3_op_t inst_x3_op = { .l = inst_op.l }; + inst_x3_imm_t inst_x3_imm = { .l = inst_imm.l }; + + unsigned long imm60 + ((unsigned long)target - (unsigned long)bundle) >> 4; + + BUG_ON(paravirt_get_slot(tag) != 1); /* MLX */ + BUG_ON(((unsigned long)target & (sizeof(bundle_t) - 1)) != 0); + + /* imm60[59] 1bit */ + inst_x3_op.i = (imm60 >> 59) & 1; + /* imm60[19:0] 20bit */ + inst_x3_op.imm20b = imm60 & ((1UL << 20) - 1); + /* imm60[58:20] 39bit */ + inst_x3_imm.imm39 = (imm60 >> 20) & ((1UL << 39) - 1); + + inst_op.l = inst_x3_op.l; + inst_imm.l = inst_x3_imm.l; + + paravirt_write_inst(tag_op, inst_op); + paravirt_write_inst(tag_imm, inst_imm); +} + +/* br.cond.sptk.many <target25> B1 */ +typedef union inst_b1 { + cmp_inst_t inst; + struct { + unsigned long qp: 6; + unsigned long btype: 3; + unsigned long unused: 3; + unsigned long p: 1; + unsigned long imm20b: 20; + unsigned long wh: 2; + unsigned long d: 1; + unsigned long s: 1; + unsigned long opcode: 4; + }; + unsigned long l; +} inst_b1_t; + +void __init +paravirt_patch_reloc_br(unsigned long tag, const void *target) +{ + bundle_t *bundle = paravirt_get_bundle(tag); + cmp_inst_t inst = paravirt_read_inst(tag); + unsigned long target25 = (unsigned long)target - (unsigned long)bundle; + inst_b1_t inst_b1; + + BUG_ON(((unsigned long)target & (sizeof(bundle_t) - 1)) != 0); + + inst_b1.l = inst.l; + if (target25 & (1UL << 63)) + inst_b1.s = 1; + else + inst_b1.s = 0; + + inst_b1.imm20b = target25 >> 4; + inst.l = inst_b1.l; + + paravirt_write_inst(tag, inst); +} + +void __init +__paravirt_patch_apply_branch( + unsigned long tag, unsigned long type, + const struct paravirt_patch_branch_target *entries, + unsigned int nr_entries) +{ + unsigned int i; + for (i = 0; i < nr_entries; i++) { + if (entries[i].type == type) { + paravirt_patch_reloc_br(tag, entries[i].entry); + break; + } + } +} + +static void __init +paravirt_patch_apply_branch(const struct paravirt_patch_site_branch *start, + const struct paravirt_patch_site_branch *end) +{ + const struct paravirt_patch_site_branch *p; + + if (noreplace_paravirt) + return; + if (pv_init_ops.patch_branch == NULL) + return; + + for (p = start; p < end; p++) + (*pv_init_ops.patch_branch)(p->tag, p->type); + + ia64_sync_i(); + ia64_srlz_i(); +} + +void __init +paravirt_patch_apply(void) +{ + extern const char __start_paravirt_bundles[]; + extern const char __stop_paravirt_bundles[]; + extern const char __start_paravirt_insts[]; + extern const char __stop_paravirt_insts[]; + extern const char __start_paravirt_branches[]; + extern const char __stop_paravirt_branches[]; + + paravirt_patch_apply_bundle((const struct paravirt_patch_site_bundle *) + __start_paravirt_bundles, + (const struct paravirt_patch_site_bundle *) + __stop_paravirt_bundles); + paravirt_patch_apply_inst((const struct paravirt_patch_site_inst *) + __start_paravirt_insts, + (const struct paravirt_patch_site_inst *) + __stop_paravirt_insts); + paravirt_patch_apply_branch((const struct paravirt_patch_site_branch *) + __start_paravirt_branches, + (const struct paravirt_patch_site_branch *) + __stop_paravirt_branches); +} + +/* + * Local variables: + * mode: C + * c-set-style: "linux" + * c-basic-offset: 8 + * tab-width: 8 + * indent-tabs-mode: t + * End: + */ diff --git a/arch/ia64/kernel/paravirtentry.S b/arch/ia64/kernel/paravirtentry.S index 2f42fcb..733e31a 100644 --- a/arch/ia64/kernel/paravirtentry.S +++ b/arch/ia64/kernel/paravirtentry.S @@ -58,3 +58,61 @@ BRANCH_PROC(switch_to, r22, b7) BRANCH_PROC_UNWINFO(leave_syscall, r22, b7) BRANCH_PROC(work_processed_syscall, r2, b7) BRANCH_PROC_UNWINFO(leave_kernel, r22, b7) + + +#ifdef CONFIG_MODULES +#define __INIT_OR_MODULE .text +#define __INITDATA_OR_MODULE .data +#else +#define __INIT_OR_MODULE __INIT +#define __INITDATA_OR_MODULE __INITDATA +#endif /* CONFIG_MODULES */ + +#ifdef __INTEL_COMPILER + __INIT_OR_MODULE + GLOBAL_ENTRY(paravirt_fc_i) + fc.i r32 + br.ret.sptk.many rp + END(paravirt_fc_i) + __FINIT +#endif + + __INIT_OR_MODULE + .align 32 + GLOBAL_ENTRY(paravirt_nop_b_inst_bundle) + { + nop.b 0 + nop.b 0 + nop.b 0 + } + END(paravirt_nop_b_inst_bundle) + __FINIT + + /* NOTE: nop.[mfi] has same format */ + __INIT_OR_MODULE + GLOBAL_ENTRY(paravirt_nop_mfi_inst_bundle) + { + nop.m 0 + nop.f 0 + nop.i 0 + } + END(paravirt_nop_mfi_inst_bundle) + __FINIT + + __INIT_OR_MODULE + GLOBAL_ENTRY(paravirt_nop_bundle) +paravirt_nop_bundle_start: + { + nop 0 + nop 0 + nop 0 + } +paravirt_nop_bundle_end: + END(paravirt_nop_bundle) + __FINIT + + __INITDATA_OR_MODULE + .align 8 + .global paravirt_nop_bundle_size +paravirt_nop_bundle_size: + data8 paravirt_nop_bundle_end - paravirt_nop_bundle_start diff --git a/arch/ia64/kernel/vmlinux.lds.S b/arch/ia64/kernel/vmlinux.lds.S index 92ae7e8..794d168 100644 --- a/arch/ia64/kernel/vmlinux.lds.S +++ b/arch/ia64/kernel/vmlinux.lds.S @@ -169,6 +169,30 @@ SECTIONS __end___mckinley_e9_bundles = .; } +#if defined(CONFIG_PARAVIRT) + . = ALIGN(16); + .paravirt_bundles : AT(ADDR(.paravirt_bundles) - LOAD_OFFSET) + { + __start_paravirt_bundles = .; + *(.paravirt_bundles) + __stop_paravirt_bundles = .; + } + . = ALIGN(16); + .paravirt_insts : AT(ADDR(.paravirt_insts) - LOAD_OFFSET) + { + __start_paravirt_insts = .; + *(.paravirt_insts) + __stop_paravirt_insts = .; + } + . = ALIGN(16); + .paravirt_branches : AT(ADDR(.paravirt_branches) - LOAD_OFFSET) + { + __start_paravirt_branches = .; + *(.paravirt_branches) + __stop_paravirt_branches = .; + } +#endif + #if defined(CONFIG_IA64_GENERIC) /* Machine Vector */ . = ALIGN(16); -- 1.6.0.2
Isaku Yamahata
2008-Nov-25 13:03 UTC
[PATCH 2/5] ia64/pv_ops: implement binary patching optimization for native.
implement binary patching optimization for pv_cpu_ops. With this optimization, indirect call for pv_cpu_ops methods can be converted into inline execution or direct call. Signed-off-by: Isaku Yamahata <yamahata at valinux.co.jp> --- arch/ia64/include/asm/intrinsics.h | 6 +- arch/ia64/include/asm/paravirt.h | 8 + arch/ia64/include/asm/paravirt_privop.h | 341 ++++++++++++++++++++- arch/ia64/kernel/Makefile | 3 +- arch/ia64/kernel/paravirt.c | 520 ++++++++++++++++++++++++++++++- arch/ia64/kernel/paravirtentry.S | 43 ++-- arch/ia64/kernel/setup.c | 1 + 7 files changed, 897 insertions(+), 25 deletions(-) diff --git a/arch/ia64/include/asm/intrinsics.h b/arch/ia64/include/asm/intrinsics.h index a3e44a5..fbe2ad9 100644 --- a/arch/ia64/include/asm/intrinsics.h +++ b/arch/ia64/include/asm/intrinsics.h @@ -201,7 +201,11 @@ extern long ia64_cmpxchg_called_with_bad_pointer (void); #ifndef __ASSEMBLY__ #if defined(CONFIG_PARAVIRT) && defined(__KERNEL__) -#define IA64_INTRINSIC_API(name) pv_cpu_ops.name +#ifdef ASM_SUPPORTED +# define IA64_INTRINSIC_API(name) paravirt_ ## name +#else +# define IA64_INTRINSIC_API(name) pv_cpu_ops.name +#endif #define IA64_INTRINSIC_MACRO(name) paravirt_ ## name #else #define IA64_INTRINSIC_API(name) ia64_native_ ## name diff --git a/arch/ia64/include/asm/paravirt.h b/arch/ia64/include/asm/paravirt.h index fc433f6..2eb0a98 100644 --- a/arch/ia64/include/asm/paravirt.h +++ b/arch/ia64/include/asm/paravirt.h @@ -118,6 +118,14 @@ struct pv_init_ops { int (*arch_setup_nomca)(void); void (*post_smp_prepare_boot_cpu)(void); + +#ifdef ASM_SUPPORTED + unsigned long (*patch_bundle)(void *sbundle, void *ebundle, + unsigned long type); + unsigned long (*patch_inst)(unsigned long stag, unsigned long etag, + unsigned long type); +#endif + void (*patch_branch)(unsigned long tag, unsigned long type); }; extern struct pv_init_ops pv_init_ops; diff --git a/arch/ia64/include/asm/paravirt_privop.h b/arch/ia64/include/asm/paravirt_privop.h index 0b59742..ee941cb 100644 --- a/arch/ia64/include/asm/paravirt_privop.h +++ b/arch/ia64/include/asm/paravirt_privop.h @@ -60,12 +60,18 @@ extern unsigned long ia64_native_getreg_func(int regnum); /* Instructions paravirtualized for performance */ /************************************************/ +#ifndef ASM_SUPPORTED +#define paravirt_ssm_i() pv_cpu_ops.ssm_i() +#define paravirt_rsm_i() pv_cpu_ops.rsm_i() +#define __paravirt_getreg() pv_cpu_ops.getreg() +#endif + /* mask for ia64_native_ssm/rsm() must be constant.("i" constraing). * static inline function doesn't satisfy it. */ #define paravirt_ssm(mask) \ do { \ if ((mask) == IA64_PSR_I) \ - pv_cpu_ops.ssm_i(); \ + paravirt_ssm_i(); \ else \ ia64_native_ssm(mask); \ } while (0) @@ -73,7 +79,7 @@ extern unsigned long ia64_native_getreg_func(int regnum); #define paravirt_rsm(mask) \ do { \ if ((mask) == IA64_PSR_I) \ - pv_cpu_ops.rsm_i(); \ + paravirt_rsm_i(); \ else \ ia64_native_rsm(mask); \ } while (0) @@ -87,7 +93,7 @@ extern unsigned long ia64_native_getreg_func(int regnum); if ((reg) == _IA64_REG_IP) \ res = ia64_native_getreg(_IA64_REG_IP); \ else \ - res = pv_cpu_ops.getreg(reg); \ + res = __paravirt_getreg(reg); \ res; \ }) @@ -122,4 +128,333 @@ void paravirt_cpu_asm_init(const struct pv_cpu_asm_switch *cpu_asm_switch); IA64_PARAVIRT_ASM_FUNC(work_processed_syscall) #define ia64_leave_kernel IA64_PARAVIRT_ASM_FUNC(leave_kernel) + +#if defined(CONFIG_PARAVIRT) +/****************************************************************************** + * binary patching infrastructure + */ +#define PARAVIRT_PATCH_TYPE_FC 1 +#define PARAVIRT_PATCH_TYPE_THASH 2 +#define PARAVIRT_PATCH_TYPE_GET_CPUID 3 +#define PARAVIRT_PATCH_TYPE_GET_PMD 4 +#define PARAVIRT_PATCH_TYPE_PTCGA 5 +#define PARAVIRT_PATCH_TYPE_GET_RR 6 +#define PARAVIRT_PATCH_TYPE_SET_RR 7 +#define PARAVIRT_PATCH_TYPE_SET_RR0_TO_RR4 8 +#define PARAVIRT_PATCH_TYPE_SSM_I 9 +#define PARAVIRT_PATCH_TYPE_RSM_I 10 +#define PARAVIRT_PATCH_TYPE_GET_PSR_I 11 +#define PARAVIRT_PATCH_TYPE_INTRIN_LOCAL_IRQ_RESTORE 12 + +/* PARAVIRT_PATY_TYPE_[GS]ETREG + _IA64_REG_xxx */ +#define PARAVIRT_PATCH_TYPE_GETREG 0x10000000 +#define PARAVIRT_PATCH_TYPE_SETREG 0x20000000 + +/* + * struct task_struct* (*ia64_switch_to)(void* next_task); + * void *ia64_leave_syscall; + * void *ia64_work_processed_syscall + * void *ia64_leave_kernel; + */ + +#define PARAVIRT_PATCH_TYPE_BR_START 0x30000000 +#define PARAVIRT_PATCH_TYPE_BR_SWITCH_TO \ + (PARAVIRT_PATCH_TYPE_BR_START + 0) +#define PARAVIRT_PATCH_TYPE_BR_LEAVE_SYSCALL \ + (PARAVIRT_PATCH_TYPE_BR_START + 1) +#define PARAVIRT_PATCH_TYPE_BR_WORK_PROCESSED_SYSCALL \ + (PARAVIRT_PATCH_TYPE_BR_START + 2) +#define PARAVIRT_PATCH_TYPE_BR_LEAVE_KERNEL \ + (PARAVIRT_PATCH_TYPE_BR_START + 3) + +#ifdef ASM_SUPPORTED +#include <asm/paravirt_patch.h> + +/* + * pv_cpu_ops calling stub. + * normal function call convension can't be written by gcc + * inline assembly. + * + * from the caller's point of view, + * the following registers will be clobbered. + * r2, r3 + * r8-r15 + * r16, r17 + * b6, b7 + * p6-p15 + * ar.ccv + * + * from the callee's point of view , + * the following registers can be used. + * r2, r3: scratch + * r8: scratch, input argument0 and return value + * r0-r15: scratch, input argument1-5 + * b6: return pointer + * b7: scratch + * p6-p15: scratch + * ar.ccv: scratch + * + * other registers must not be changed. especially + * b0: rp: preserved. gcc ignores b0 in clobbered register. + * r16: saved gp + */ +/* 5 bundles */ +#define __PARAVIRT_BR \ + ";;\n" \ + "{ .mlx\n" \ + "nop 0\n" \ + "movl r2 = %[op_addr]\n"/* get function pointer address */ \ + ";;\n" \ + "}\n" \ + "1:\n" \ + "{ .mii\n" \ + "ld8 r2 = [r2]\n" /* load function descriptor address */ \ + "mov r17 = ip\n" /* get ip to calc return address */ \ + "mov r16 = gp\n" /* save gp */ \ + ";;\n" \ + "}\n" \ + "{ .mii\n" \ + "ld8 r3 = [r2], 8\n" /* load entry address */ \ + "adds r17 = 1f - 1b, r17\n" /* calculate return address */ \ + ";;\n" \ + "mov b7 = r3\n" /* set entry address */ \ + "}\n" \ + "{ .mib\n" \ + "ld8 gp = [r2]\n" /* load gp value */ \ + "mov b6 = r17\n" /* set return address */ \ + "br.cond.sptk.few b7\n" /* intrinsics are very short isns */ \ + "}\n" \ + "1:\n" \ + "{ .mii\n" \ + "mov gp = r16\n" /* restore gp value */ \ + "nop 0\n" \ + "nop 0\n" \ + ";;\n" \ + "}\n" + +#define PARAVIRT_OP(op) \ + [op_addr] "i"(&pv_cpu_ops.op) + +#define PARAVIRT_TYPE(type) \ + PARAVIRT_PATCH_TYPE_ ## type + +#define PARAVIRT_REG_CLOBBERS0 \ + "r2", "r3", /*"r8",*/ "r9", "r10", "r11", "r14", \ + "r15", "r16", "r17" + +#define PARAVIRT_REG_CLOBBERS1 \ + "r2","r3", /*"r8",*/ "r9", "r10", "r11", "r14", \ + "r15", "r16", "r17" + +#define PARAVIRT_REG_CLOBBERS2 \ + "r2", "r3", /*"r8", "r9",*/ "r10", "r11", "r14", \ + "r15", "r16", "r17" + +#define PARAVIRT_REG_CLOBBERS5 \ + "r2", "r3", /*"r8", "r9", "r10", "r11", "r14",*/ \ + "r15", "r16", "r17" + +#define PARAVIRT_BR_CLOBBERS \ + "b6", "b7" + +#define PARAVIRT_PR_CLOBBERS \ + "p6", "p7", "p8", "p9", "p10", "p11", "p12", "p13", "p14", "p15" + +#define PARAVIRT_AR_CLOBBERS \ + "ar.ccv" + +#define PARAVIRT_CLOBBERS0 \ + PARAVIRT_REG_CLOBBERS0, \ + PARAVIRT_BR_CLOBBERS, \ + PARAVIRT_PR_CLOBBERS, \ + PARAVIRT_AR_CLOBBERS, \ + "memory" + +#define PARAVIRT_CLOBBERS1 \ + PARAVIRT_REG_CLOBBERS1, \ + PARAVIRT_BR_CLOBBERS, \ + PARAVIRT_PR_CLOBBERS, \ + PARAVIRT_AR_CLOBBERS, \ + "memory" + +#define PARAVIRT_CLOBBERS2 \ + PARAVIRT_REG_CLOBBERS2, \ + PARAVIRT_BR_CLOBBERS, \ + PARAVIRT_PR_CLOBBERS, \ + PARAVIRT_AR_CLOBBERS, \ + "memory" + +#define PARAVIRT_CLOBBERS5 \ + PARAVIRT_REG_CLOBBERS5, \ + PARAVIRT_BR_CLOBBERS, \ + PARAVIRT_PR_CLOBBERS, \ + PARAVIRT_AR_CLOBBERS, \ + "memory" + +#define PARAVIRT_BR0(op, type) \ + register unsigned long ia64_clobber asm ("r8"); \ + asm volatile (paravirt_alt_bundle(__PARAVIRT_BR, \ + PARAVIRT_TYPE(type)) \ + : "=r"(ia64_clobber) \ + : PARAVIRT_OP(op) \ + : PARAVIRT_CLOBBERS0) + +#define PARAVIRT_BR0_RET(op, type) \ + register unsigned long ia64_intri_res asm ("r8"); \ + asm volatile (paravirt_alt_bundle(__PARAVIRT_BR, \ + PARAVIRT_TYPE(type)) \ + : "=r"(ia64_intri_res) \ + : PARAVIRT_OP(op) \ + : PARAVIRT_CLOBBERS0) + +#define PARAVIRT_BR1(op, type, arg1) \ + register unsigned long __##arg1 asm ("r8") = arg1; \ + register unsigned long ia64_clobber asm ("r8"); \ + asm volatile (paravirt_alt_bundle(__PARAVIRT_BR, \ + PARAVIRT_TYPE(type)) \ + : "=r"(ia64_clobber) \ + : PARAVIRT_OP(op), "0"(__##arg1) \ + : PARAVIRT_CLOBBERS1) + +#define PARAVIRT_BR1_RET(op, type, arg1) \ + register unsigned long ia64_intri_res asm ("r8"); \ + register unsigned long __##arg1 asm ("r8") = arg1; \ + asm volatile (paravirt_alt_bundle(__PARAVIRT_BR, \ + PARAVIRT_TYPE(type)) \ + : "=r"(ia64_intri_res) \ + : PARAVIRT_OP(op), "0"(__##arg1) \ + : PARAVIRT_CLOBBERS1) + +#define PARAVIRT_BR2(op, type, arg1, arg2) \ + register unsigned long __##arg1 asm ("r8") = arg1; \ + register unsigned long __##arg2 asm ("r9") = arg2; \ + register unsigned long ia64_clobber1 asm ("r8"); \ + register unsigned long ia64_clobber2 asm ("r9"); \ + asm volatile (paravirt_alt_bundle(__PARAVIRT_BR, \ + PARAVIRT_TYPE(type)) \ + : "=r"(ia64_clobber1), "=r"(ia64_clobber2) \ + : PARAVIRT_OP(op), "0"(__##arg1), "1"(__##arg2) \ + : PARAVIRT_CLOBBERS2) + + +#define PARAVIRT_DEFINE_CPU_OP0(op, type) \ + static inline void \ + paravirt_ ## op (void) \ + { \ + PARAVIRT_BR0(op, type); \ + } + +#define PARAVIRT_DEFINE_CPU_OP0_RET(op, type) \ + static inline unsigned long \ + paravirt_ ## op (void) \ + { \ + PARAVIRT_BR0_RET(op, type); \ + return ia64_intri_res; \ + } + +#define PARAVIRT_DEFINE_CPU_OP1(op, type) \ + static inline void \ + paravirt_ ## op (unsigned long arg1) \ + { \ + PARAVIRT_BR1(op, type, arg1); \ + } + +#define PARAVIRT_DEFINE_CPU_OP1_RET(op, type) \ + static inline unsigned long \ + paravirt_ ## op (unsigned long arg1) \ + { \ + PARAVIRT_BR1_RET(op, type, arg1); \ + return ia64_intri_res; \ + } + +#define PARAVIRT_DEFINE_CPU_OP2(op, type) \ + static inline void \ + paravirt_ ## op (unsigned long arg1, \ + unsigned long arg2) \ + { \ + PARAVIRT_BR2(op, type, arg1, arg2); \ + } + + +PARAVIRT_DEFINE_CPU_OP1(fc, FC); +PARAVIRT_DEFINE_CPU_OP1_RET(thash, THASH) +PARAVIRT_DEFINE_CPU_OP1_RET(get_cpuid, GET_CPUID) +PARAVIRT_DEFINE_CPU_OP1_RET(get_pmd, GET_PMD) +PARAVIRT_DEFINE_CPU_OP2(ptcga, PTCGA) +PARAVIRT_DEFINE_CPU_OP1_RET(get_rr, GET_RR) +PARAVIRT_DEFINE_CPU_OP2(set_rr, SET_RR) +PARAVIRT_DEFINE_CPU_OP0(ssm_i, SSM_I) +PARAVIRT_DEFINE_CPU_OP0(rsm_i, RSM_I) +PARAVIRT_DEFINE_CPU_OP0_RET(get_psr_i, GET_PSR_I) +PARAVIRT_DEFINE_CPU_OP1(intrin_local_irq_restore, INTRIN_LOCAL_IRQ_RESTORE) + +static inline void +paravirt_set_rr0_to_rr4(unsigned long val0, unsigned long val1, + unsigned long val2, unsigned long val3, + unsigned long val4) +{ + register unsigned long __val0 asm ("r8") = val0; + register unsigned long __val1 asm ("r9") = val1; + register unsigned long __val2 asm ("r10") = val2; + register unsigned long __val3 asm ("r11") = val3; + register unsigned long __val4 asm ("r14") = val4; + + register unsigned long ia64_clobber0 asm ("r8"); + register unsigned long ia64_clobber1 asm ("r9"); + register unsigned long ia64_clobber2 asm ("r10"); + register unsigned long ia64_clobber3 asm ("r11"); + register unsigned long ia64_clobber4 asm ("r14"); + + asm volatile (paravirt_alt_bundle(__PARAVIRT_BR, + PARAVIRT_TYPE(SET_RR0_TO_RR4)) + : "=r"(ia64_clobber0), + "=r"(ia64_clobber1), + "=r"(ia64_clobber2), + "=r"(ia64_clobber3), + "=r"(ia64_clobber4) + : PARAVIRT_OP(set_rr0_to_rr4), + "0"(__val0), "1"(__val1), "2"(__val2), + "3"(__val3), "4"(__val4) + : PARAVIRT_CLOBBERS5); +} + +/* unsigned long paravirt_getreg(int reg) */ +#define __paravirt_getreg(reg) \ + ({ \ + register unsigned long ia64_intri_res asm ("r8"); \ + register unsigned long __reg asm ("r8") = (reg); \ + \ + BUILD_BUG_ON(!__builtin_constant_p(reg)); \ + asm volatile (paravirt_alt_bundle(__PARAVIRT_BR, \ + PARAVIRT_TYPE(GETREG) \ + + (reg)) \ + : "=r"(ia64_intri_res) \ + : PARAVIRT_OP(getreg), "0"(__reg) \ + : PARAVIRT_CLOBBERS1); \ + \ + ia64_intri_res; \ + }) + +/* void paravirt_setreg(int reg, unsigned long val) */ +#define paravirt_setreg(reg, val) \ + do { \ + register unsigned long __val asm ("r8") = val; \ + register unsigned long __reg asm ("r9") = reg; \ + register unsigned long ia64_clobber1 asm ("r8"); \ + register unsigned long ia64_clobber2 asm ("r9"); \ + \ + BUILD_BUG_ON(!__builtin_constant_p(reg)); \ + asm volatile (paravirt_alt_bundle(__PARAVIRT_BR, \ + PARAVIRT_TYPE(SETREG) \ + + (reg)) \ + : "=r"(ia64_clobber1), \ + "=r"(ia64_clobber2) \ + : PARAVIRT_OP(setreg), \ + "1"(__reg), "0"(__val) \ + : PARAVIRT_CLOBBERS2); \ + } while (0) + +#endif /* ASM_SUPPORTED */ +#endif /* CONFIG_PARAVIRT && ASM_SUPPOTED */ + #endif /* _ASM_IA64_PARAVIRT_PRIVOP_H */ diff --git a/arch/ia64/kernel/Makefile b/arch/ia64/kernel/Makefile index 8dc9df8..dbc19e4 100644 --- a/arch/ia64/kernel/Makefile +++ b/arch/ia64/kernel/Makefile @@ -36,7 +36,8 @@ obj-$(CONFIG_PCI_MSI) += msi_ia64.o mca_recovery-y += mca_drv.o mca_drv_asm.o obj-$(CONFIG_IA64_MC_ERR_INJECT)+= err_inject.o -obj-$(CONFIG_PARAVIRT) += paravirt.o paravirtentry.o +obj-$(CONFIG_PARAVIRT) += paravirt.o paravirtentry.o \ + paravirt_patch.o obj-$(CONFIG_IA64_ESI) += esi.o ifneq ($(CONFIG_IA64_ESI),) diff --git a/arch/ia64/kernel/paravirt.c b/arch/ia64/kernel/paravirt.c index 6bc33a6..158d524 100644 --- a/arch/ia64/kernel/paravirt.c +++ b/arch/ia64/kernel/paravirt.c @@ -46,13 +46,23 @@ struct pv_info pv_info = { * initialization hooks. */ -struct pv_init_ops pv_init_ops; +static void __init +ia64_native_patch_branch(unsigned long tag, unsigned long type); + +struct pv_init_ops pv_init_ops +{ +#ifdef ASM_SUPPORTED + .patch_bundle = ia64_native_patch_bundle, +#endif + .patch_branch = ia64_native_patch_branch, +}; /*************************************************************************** * pv_cpu_ops * intrinsics hooks. */ +#ifndef ASM_SUPPORTED /* ia64_native_xxx are macros so that we have to make them real functions */ #define DEFINE_VOID_FUNC1(name) \ @@ -274,6 +284,261 @@ ia64_native_setreg_func(int regnum, unsigned long val) break; } } +#else + +#define __DEFINE_FUNC(name, code) \ + extern const char ia64_native_ ## name ## _direct_start[]; \ + extern const char ia64_native_ ## name ## _direct_end[]; \ + asm (".align 32\n" \ + ".proc ia64_native_" #name "_func\n" \ + "ia64_native_" #name "_func:\n" \ + "ia64_native_" #name "_direct_start:\n" \ + code \ + "ia64_native_" #name "_direct_end:\n" \ + "br.cond.sptk.many b6\n" \ + ".endp ia64_native_" #name "_func\n") + +#define DEFINE_VOID_FUNC0(name, code) \ + extern void \ + ia64_native_ ## name ## _func(void); \ + __DEFINE_FUNC(name, code) + +#define DEFINE_VOID_FUNC1(name, code) \ + extern void \ + ia64_native_ ## name ## _func(unsigned long arg); \ + __DEFINE_FUNC(name, code) + +#define DEFINE_VOID_FUNC2(name, code) \ + extern void \ + ia64_native_ ## name ## _func(unsigned long arg0, \ + unsigned long arg1); \ + __DEFINE_FUNC(name, code) + +#define DEFINE_FUNC0(name, code) \ + extern unsigned long \ + ia64_native_ ## name ## _func(void); \ + __DEFINE_FUNC(name, code) + +#define DEFINE_FUNC1(name, type, code) \ + extern unsigned long \ + ia64_native_ ## name ## _func(type arg); \ + __DEFINE_FUNC(name, code) + +DEFINE_VOID_FUNC1(fc, + "fc r8\n"); +DEFINE_VOID_FUNC1(intrin_local_irq_restore, + ";;\n" + " cmp.ne p6, p7 = r8, r0\n" + ";;\n" + "(p6) ssm psr.i\n" + "(p7) rsm psr.i\n" + ";;\n" + "(p6) srlz.d\n"); + +DEFINE_VOID_FUNC2(ptcga, + "ptc.ga r8, r9\n"); +DEFINE_VOID_FUNC2(set_rr, + "mov rr[r8] = r9\n"); + +/* ia64_native_getreg(_IA64_REG_PSR) & IA64_PSR_I */ +DEFINE_FUNC0(get_psr_i, + "mov r2 = " __stringify(1 << IA64_PSR_I_BIT) "\n" + "mov r8 = psr\n" + ";;\n" + "and r8 = r2, r8\n"); + +DEFINE_FUNC1(thash, unsigned long, + "thash r8 = r8\n"); +DEFINE_FUNC1(get_cpuid, int, + "mov r8 = cpuid[r8]\n"); +DEFINE_FUNC1(get_pmd, int, + "mov r8 = pmd[r8]\n"); +DEFINE_FUNC1(get_rr, unsigned long, + "mov r8 = rr[r8]\n"); + +DEFINE_VOID_FUNC0(ssm_i, + "ssm psr.i\n"); +DEFINE_VOID_FUNC0(rsm_i, + "rsm psr.i\n"); + +extern void +ia64_native_set_rr0_to_rr4_func(unsigned long val0, unsigned long val1, + unsigned long val2, unsigned long val3, + unsigned long val4); +__DEFINE_FUNC(set_rr0_to_rr4, + "mov rr[r0] = r8\n" + "movl r2 = 0x2000000000000000\n" + ";;\n" + "mov rr[r2] = r9\n" + "shl r3 = r2, 1\n" /* movl r3 = 0x4000000000000000 */ + ";;\n" + "add r2 = r2, r3\n" /* movl r2 = 0x6000000000000000 */ + "mov rr[r3] = r10\n" + ";;\n" + "mov rr[r2] = r11\n" + "shl r3 = r3, 1\n" /* movl r3 = 0x8000000000000000 */ + ";;\n" + "mov rr[r3] = r14\n"); + +extern unsigned long ia64_native_getreg_func(int regnum); +asm(".global ia64_native_getreg_func\n"); +#define __DEFINE_GET_REG(id, reg) \ + "mov r2 = " __stringify(_IA64_REG_ ## id) "\n" \ + ";;\n" \ + "cmp.eq p6, p0 = r2, r8\n" \ + ";;\n" \ + "(p6) mov r8 = " #reg "\n" \ + "(p6) br.cond.sptk.many b6\n" \ + ";;\n" +#define __DEFINE_GET_AR(id, reg) __DEFINE_GET_REG(AR_ ## id, ar.reg) +#define __DEFINE_GET_CR(id, reg) __DEFINE_GET_REG(CR_ ## id, cr.reg) + +__DEFINE_FUNC(getreg, + __DEFINE_GET_REG(GP, gp) + /*__DEFINE_GET_REG(IP, ip)*/ /* returned ip value shouldn't be constant */ + __DEFINE_GET_REG(PSR, psr) + __DEFINE_GET_REG(TP, tp) + __DEFINE_GET_REG(SP, sp) + + __DEFINE_GET_REG(AR_KR0, ar0) + __DEFINE_GET_REG(AR_KR1, ar1) + __DEFINE_GET_REG(AR_KR2, ar2) + __DEFINE_GET_REG(AR_KR3, ar3) + __DEFINE_GET_REG(AR_KR4, ar4) + __DEFINE_GET_REG(AR_KR5, ar5) + __DEFINE_GET_REG(AR_KR6, ar6) + __DEFINE_GET_REG(AR_KR7, ar7) + __DEFINE_GET_AR(RSC, rsc) + __DEFINE_GET_AR(BSP, bsp) + __DEFINE_GET_AR(BSPSTORE, bspstore) + __DEFINE_GET_AR(RNAT, rnat) + __DEFINE_GET_AR(FCR, fcr) + __DEFINE_GET_AR(EFLAG, eflag) + __DEFINE_GET_AR(CSD, csd) + __DEFINE_GET_AR(SSD, ssd) + __DEFINE_GET_REG(AR_CFLAG, ar27) + __DEFINE_GET_AR(FSR, fsr) + __DEFINE_GET_AR(FIR, fir) + __DEFINE_GET_AR(FDR, fdr) + __DEFINE_GET_AR(CCV, ccv) + __DEFINE_GET_AR(UNAT, unat) + __DEFINE_GET_AR(FPSR, fpsr) + __DEFINE_GET_AR(ITC, itc) + __DEFINE_GET_AR(PFS, pfs) + __DEFINE_GET_AR(LC, lc) + __DEFINE_GET_AR(EC, ec) + + __DEFINE_GET_CR(DCR, dcr) + __DEFINE_GET_CR(ITM, itm) + __DEFINE_GET_CR(IVA, iva) + __DEFINE_GET_CR(PTA, pta) + __DEFINE_GET_CR(IPSR, ipsr) + __DEFINE_GET_CR(ISR, isr) + __DEFINE_GET_CR(IIP, iip) + __DEFINE_GET_CR(IFA, ifa) + __DEFINE_GET_CR(ITIR, itir) + __DEFINE_GET_CR(IIPA, iipa) + __DEFINE_GET_CR(IFS, ifs) + __DEFINE_GET_CR(IIM, iim) + __DEFINE_GET_CR(IHA, iha) + __DEFINE_GET_CR(LID, lid) + __DEFINE_GET_CR(IVR, ivr) + __DEFINE_GET_CR(TPR, tpr) + __DEFINE_GET_CR(EOI, eoi) + __DEFINE_GET_CR(IRR0, irr0) + __DEFINE_GET_CR(IRR1, irr1) + __DEFINE_GET_CR(IRR2, irr2) + __DEFINE_GET_CR(IRR3, irr3) + __DEFINE_GET_CR(ITV, itv) + __DEFINE_GET_CR(PMV, pmv) + __DEFINE_GET_CR(CMCV, cmcv) + __DEFINE_GET_CR(LRR0, lrr0) + __DEFINE_GET_CR(LRR1, lrr1) + + "mov r8 = -1\n" /* unsupported case */ + ); + +extern void ia64_native_setreg_func(int regnum, unsigned long val); +asm(".global ia64_native_setreg_func\n"); +#define __DEFINE_SET_REG(id, reg) \ + "mov r2 = " __stringify(_IA64_REG_ ## id) "\n" \ + ";;\n" \ + "cmp.eq p6, p0 = r2, r9\n" \ + ";;\n" \ + "(p6) mov " #reg " = r8\n" \ + "(p6) br.cond.sptk.many b6\n" \ + ";;\n" +#define __DEFINE_SET_AR(id, reg) __DEFINE_SET_REG(AR_ ## id, ar.reg) +#define __DEFINE_SET_CR(id, reg) __DEFINE_SET_REG(CR_ ## id, cr.reg) +__DEFINE_FUNC(setreg, + "mov r2 = " __stringify(_IA64_REG_PSR_L) "\n" + ";;\n" + "cmp.eq p6, p0 = r2, r9\n" + ";;\n" + "(p6) mov psr.l = r8\n" +#ifdef HAVE_SERIALIZE_DIRECTIVE + ".serialize.data\n" +#endif + "(p6) br.cond.sptk.many b6\n" + __DEFINE_SET_REG(GP, gp) + __DEFINE_SET_REG(SP, sp) + + __DEFINE_SET_REG(AR_KR0, ar0) + __DEFINE_SET_REG(AR_KR1, ar1) + __DEFINE_SET_REG(AR_KR2, ar2) + __DEFINE_SET_REG(AR_KR3, ar3) + __DEFINE_SET_REG(AR_KR4, ar4) + __DEFINE_SET_REG(AR_KR5, ar5) + __DEFINE_SET_REG(AR_KR6, ar6) + __DEFINE_SET_REG(AR_KR7, ar7) + __DEFINE_SET_AR(RSC, rsc) + __DEFINE_SET_AR(BSP, bsp) + __DEFINE_SET_AR(BSPSTORE, bspstore) + __DEFINE_SET_AR(RNAT, rnat) + __DEFINE_SET_AR(FCR, fcr) + __DEFINE_SET_AR(EFLAG, eflag) + __DEFINE_SET_AR(CSD, csd) + __DEFINE_SET_AR(SSD, ssd) + __DEFINE_SET_REG(AR_CFLAG, ar27) + __DEFINE_SET_AR(FSR, fsr) + __DEFINE_SET_AR(FIR, fir) + __DEFINE_SET_AR(FDR, fdr) + __DEFINE_SET_AR(CCV, ccv) + __DEFINE_SET_AR(UNAT, unat) + __DEFINE_SET_AR(FPSR, fpsr) + __DEFINE_SET_AR(ITC, itc) + __DEFINE_SET_AR(PFS, pfs) + __DEFINE_SET_AR(LC, lc) + __DEFINE_SET_AR(EC, ec) + + __DEFINE_SET_CR(DCR, dcr) + __DEFINE_SET_CR(ITM, itm) + __DEFINE_SET_CR(IVA, iva) + __DEFINE_SET_CR(PTA, pta) + __DEFINE_SET_CR(IPSR, ipsr) + __DEFINE_SET_CR(ISR, isr) + __DEFINE_SET_CR(IIP, iip) + __DEFINE_SET_CR(IFA, ifa) + __DEFINE_SET_CR(ITIR, itir) + __DEFINE_SET_CR(IIPA, iipa) + __DEFINE_SET_CR(IFS, ifs) + __DEFINE_SET_CR(IIM, iim) + __DEFINE_SET_CR(IHA, iha) + __DEFINE_SET_CR(LID, lid) + __DEFINE_SET_CR(IVR, ivr) + __DEFINE_SET_CR(TPR, tpr) + __DEFINE_SET_CR(EOI, eoi) + __DEFINE_SET_CR(IRR0, irr0) + __DEFINE_SET_CR(IRR1, irr1) + __DEFINE_SET_CR(IRR2, irr2) + __DEFINE_SET_CR(IRR3, irr3) + __DEFINE_SET_CR(ITV, itv) + __DEFINE_SET_CR(PMV, pmv) + __DEFINE_SET_CR(CMCV, cmcv) + __DEFINE_SET_CR(LRR0, lrr0) + __DEFINE_SET_CR(LRR1, lrr1) + ); +#endif struct pv_cpu_ops pv_cpu_ops = { .fc = ia64_native_fc_func, @@ -368,3 +633,256 @@ struct pv_time_ops pv_time_ops = { .do_steal_accounting = ia64_native_do_steal_accounting, .sched_clock = ia64_native_sched_clock, }; + +/*************************************************************************** + * binary pacthing + * pv_init_ops.patch_bundle + */ + +#ifdef ASM_SUPPORTED +#define IA64_NATIVE_PATCH_DEFINE_GET_REG(name, reg) \ + __DEFINE_FUNC(get_ ## name, \ + ";;\n" \ + "mov r8 = " #reg "\n" \ + ";;\n") + +#define IA64_NATIVE_PATCH_DEFINE_SET_REG(name, reg) \ + __DEFINE_FUNC(set_ ## name, \ + ";;\n" \ + "mov " #reg " = r8\n" \ + ";;\n") + +#define IA64_NATIVE_PATCH_DEFINE_REG(name, reg) \ + IA64_NATIVE_PATCH_DEFINE_GET_REG(name, reg); \ + IA64_NATIVE_PATCH_DEFINE_SET_REG(name, reg) \ + +#define IA64_NATIVE_PATCH_DEFINE_AR(name, reg) \ + IA64_NATIVE_PATCH_DEFINE_REG(ar_ ## name, ar.reg) + +#define IA64_NATIVE_PATCH_DEFINE_CR(name, reg) \ + IA64_NATIVE_PATCH_DEFINE_REG(cr_ ## name, cr.reg) + + +IA64_NATIVE_PATCH_DEFINE_GET_REG(psr, psr); +IA64_NATIVE_PATCH_DEFINE_GET_REG(tp, tp); + +/* IA64_NATIVE_PATCH_DEFINE_SET_REG(psr_l, psr.l); */ +__DEFINE_FUNC(set_psr_l, + ";;\n" + "mov psr.l = r8\n" +#ifdef HAVE_SERIALIZE_DIRECTIVE + ".serialize.data\n" +#endif + ";;\n"); + +IA64_NATIVE_PATCH_DEFINE_REG(gp, gp); +IA64_NATIVE_PATCH_DEFINE_REG(sp, sp); + +IA64_NATIVE_PATCH_DEFINE_REG(kr0, ar0); +IA64_NATIVE_PATCH_DEFINE_REG(kr1, ar1); +IA64_NATIVE_PATCH_DEFINE_REG(kr2, ar2); +IA64_NATIVE_PATCH_DEFINE_REG(kr3, ar3); +IA64_NATIVE_PATCH_DEFINE_REG(kr4, ar4); +IA64_NATIVE_PATCH_DEFINE_REG(kr5, ar5); +IA64_NATIVE_PATCH_DEFINE_REG(kr6, ar6); +IA64_NATIVE_PATCH_DEFINE_REG(kr7, ar7); + +IA64_NATIVE_PATCH_DEFINE_AR(rsc, rsc); +IA64_NATIVE_PATCH_DEFINE_AR(bsp, bsp); +IA64_NATIVE_PATCH_DEFINE_AR(bspstore, bspstore); +IA64_NATIVE_PATCH_DEFINE_AR(rnat, rnat); +IA64_NATIVE_PATCH_DEFINE_AR(fcr, fcr); +IA64_NATIVE_PATCH_DEFINE_AR(eflag, eflag); +IA64_NATIVE_PATCH_DEFINE_AR(csd, csd); +IA64_NATIVE_PATCH_DEFINE_AR(ssd, ssd); +IA64_NATIVE_PATCH_DEFINE_REG(ar27, ar27); +IA64_NATIVE_PATCH_DEFINE_AR(fsr, fsr); +IA64_NATIVE_PATCH_DEFINE_AR(fir, fir); +IA64_NATIVE_PATCH_DEFINE_AR(fdr, fdr); +IA64_NATIVE_PATCH_DEFINE_AR(ccv, ccv); +IA64_NATIVE_PATCH_DEFINE_AR(unat, unat); +IA64_NATIVE_PATCH_DEFINE_AR(fpsr, fpsr); +IA64_NATIVE_PATCH_DEFINE_AR(itc, itc); +IA64_NATIVE_PATCH_DEFINE_AR(pfs, pfs); +IA64_NATIVE_PATCH_DEFINE_AR(lc, lc); +IA64_NATIVE_PATCH_DEFINE_AR(ec, ec); + +IA64_NATIVE_PATCH_DEFINE_CR(dcr, dcr); +IA64_NATIVE_PATCH_DEFINE_CR(itm, itm); +IA64_NATIVE_PATCH_DEFINE_CR(iva, iva); +IA64_NATIVE_PATCH_DEFINE_CR(pta, pta); +IA64_NATIVE_PATCH_DEFINE_CR(ipsr, ipsr); +IA64_NATIVE_PATCH_DEFINE_CR(isr, isr); +IA64_NATIVE_PATCH_DEFINE_CR(iip, iip); +IA64_NATIVE_PATCH_DEFINE_CR(ifa, ifa); +IA64_NATIVE_PATCH_DEFINE_CR(itir, itir); +IA64_NATIVE_PATCH_DEFINE_CR(iipa, iipa); +IA64_NATIVE_PATCH_DEFINE_CR(ifs, ifs); +IA64_NATIVE_PATCH_DEFINE_CR(iim, iim); +IA64_NATIVE_PATCH_DEFINE_CR(iha, iha); +IA64_NATIVE_PATCH_DEFINE_CR(lid, lid); +IA64_NATIVE_PATCH_DEFINE_CR(ivr, ivr); +IA64_NATIVE_PATCH_DEFINE_CR(tpr, tpr); +IA64_NATIVE_PATCH_DEFINE_CR(eoi, eoi); +IA64_NATIVE_PATCH_DEFINE_CR(irr0, irr0); +IA64_NATIVE_PATCH_DEFINE_CR(irr1, irr1); +IA64_NATIVE_PATCH_DEFINE_CR(irr2, irr2); +IA64_NATIVE_PATCH_DEFINE_CR(irr3, irr3); +IA64_NATIVE_PATCH_DEFINE_CR(itv, itv); +IA64_NATIVE_PATCH_DEFINE_CR(pmv, pmv); +IA64_NATIVE_PATCH_DEFINE_CR(cmcv, cmcv); +IA64_NATIVE_PATCH_DEFINE_CR(lrr0, lrr0); +IA64_NATIVE_PATCH_DEFINE_CR(lrr1, lrr1); + +static const struct paravirt_patch_bundle_elem ia64_native_patch_bundle_elems[] +__initdata_or_module +{ +#define IA64_NATIVE_PATCH_BUNDLE_ELEM(name, type) \ + { \ + (void*)ia64_native_ ## name ## _direct_start, \ + (void*)ia64_native_ ## name ## _direct_end, \ + PARAVIRT_PATCH_TYPE_ ## type, \ + } + + IA64_NATIVE_PATCH_BUNDLE_ELEM(fc, FC), + IA64_NATIVE_PATCH_BUNDLE_ELEM(thash, THASH), + IA64_NATIVE_PATCH_BUNDLE_ELEM(get_cpuid, GET_CPUID), + IA64_NATIVE_PATCH_BUNDLE_ELEM(get_pmd, GET_PMD), + IA64_NATIVE_PATCH_BUNDLE_ELEM(ptcga, PTCGA), + IA64_NATIVE_PATCH_BUNDLE_ELEM(get_rr, GET_RR), + IA64_NATIVE_PATCH_BUNDLE_ELEM(set_rr, SET_RR), + IA64_NATIVE_PATCH_BUNDLE_ELEM(set_rr0_to_rr4, SET_RR0_TO_RR4), + IA64_NATIVE_PATCH_BUNDLE_ELEM(ssm_i, SSM_I), + IA64_NATIVE_PATCH_BUNDLE_ELEM(rsm_i, RSM_I), + IA64_NATIVE_PATCH_BUNDLE_ELEM(get_psr_i, GET_PSR_I), + IA64_NATIVE_PATCH_BUNDLE_ELEM(intrin_local_irq_restore, + INTRIN_LOCAL_IRQ_RESTORE), + +#define IA64_NATIVE_PATCH_BUNDLE_ELEM_GETREG(name, reg) \ + { \ + (void*)ia64_native_get_ ## name ## _direct_start, \ + (void*)ia64_native_get_ ## name ## _direct_end, \ + PARAVIRT_PATCH_TYPE_GETREG + _IA64_REG_ ## reg, \ + } + +#define IA64_NATIVE_PATCH_BUNDLE_ELEM_SETREG(name, reg) \ + { \ + (void*)ia64_native_set_ ## name ## _direct_start, \ + (void*)ia64_native_set_ ## name ## _direct_end, \ + PARAVIRT_PATCH_TYPE_SETREG + _IA64_REG_ ## reg, \ + } + +#define IA64_NATIVE_PATCH_BUNDLE_ELEM_REG(name, reg) \ + IA64_NATIVE_PATCH_BUNDLE_ELEM_GETREG(name, reg), \ + IA64_NATIVE_PATCH_BUNDLE_ELEM_SETREG(name, reg) \ + +#define IA64_NATIVE_PATCH_BUNDLE_ELEM_AR(name, reg) \ + IA64_NATIVE_PATCH_BUNDLE_ELEM_REG(ar_ ## name, AR_ ## reg) + +#define IA64_NATIVE_PATCH_BUNDLE_ELEM_CR(name, reg) \ + IA64_NATIVE_PATCH_BUNDLE_ELEM_REG(cr_ ## name, CR_ ## reg) + + IA64_NATIVE_PATCH_BUNDLE_ELEM_GETREG(psr, PSR), + IA64_NATIVE_PATCH_BUNDLE_ELEM_GETREG(tp, TP), + + IA64_NATIVE_PATCH_BUNDLE_ELEM_SETREG(psr_l, PSR_L), + + IA64_NATIVE_PATCH_BUNDLE_ELEM_REG(gp, GP), + IA64_NATIVE_PATCH_BUNDLE_ELEM_REG(sp, SP), + + IA64_NATIVE_PATCH_BUNDLE_ELEM_REG(kr0, AR_KR0), + IA64_NATIVE_PATCH_BUNDLE_ELEM_REG(kr1, AR_KR1), + IA64_NATIVE_PATCH_BUNDLE_ELEM_REG(kr2, AR_KR2), + IA64_NATIVE_PATCH_BUNDLE_ELEM_REG(kr3, AR_KR3), + IA64_NATIVE_PATCH_BUNDLE_ELEM_REG(kr4, AR_KR4), + IA64_NATIVE_PATCH_BUNDLE_ELEM_REG(kr5, AR_KR5), + IA64_NATIVE_PATCH_BUNDLE_ELEM_REG(kr6, AR_KR6), + IA64_NATIVE_PATCH_BUNDLE_ELEM_REG(kr7, AR_KR7), + + IA64_NATIVE_PATCH_BUNDLE_ELEM_AR(rsc, RSC), + IA64_NATIVE_PATCH_BUNDLE_ELEM_AR(bsp, BSP), + IA64_NATIVE_PATCH_BUNDLE_ELEM_AR(bspstore, BSPSTORE), + IA64_NATIVE_PATCH_BUNDLE_ELEM_AR(rnat, RNAT), + IA64_NATIVE_PATCH_BUNDLE_ELEM_AR(fcr, FCR), + IA64_NATIVE_PATCH_BUNDLE_ELEM_AR(eflag, EFLAG), + IA64_NATIVE_PATCH_BUNDLE_ELEM_AR(csd, CSD), + IA64_NATIVE_PATCH_BUNDLE_ELEM_AR(ssd, SSD), + IA64_NATIVE_PATCH_BUNDLE_ELEM_REG(ar27, AR_CFLAG), + IA64_NATIVE_PATCH_BUNDLE_ELEM_AR(fsr, FSR), + IA64_NATIVE_PATCH_BUNDLE_ELEM_AR(fir, FIR), + IA64_NATIVE_PATCH_BUNDLE_ELEM_AR(fdr, FDR), + IA64_NATIVE_PATCH_BUNDLE_ELEM_AR(ccv, CCV), + IA64_NATIVE_PATCH_BUNDLE_ELEM_AR(unat, UNAT), + IA64_NATIVE_PATCH_BUNDLE_ELEM_AR(fpsr, FPSR), + IA64_NATIVE_PATCH_BUNDLE_ELEM_AR(itc, ITC), + IA64_NATIVE_PATCH_BUNDLE_ELEM_AR(pfs, PFS), + IA64_NATIVE_PATCH_BUNDLE_ELEM_AR(lc, LC), + IA64_NATIVE_PATCH_BUNDLE_ELEM_AR(ec, EC), + + IA64_NATIVE_PATCH_BUNDLE_ELEM_CR(dcr, DCR), + IA64_NATIVE_PATCH_BUNDLE_ELEM_CR(itm, ITM), + IA64_NATIVE_PATCH_BUNDLE_ELEM_CR(iva, IVA), + IA64_NATIVE_PATCH_BUNDLE_ELEM_CR(pta, PTA), + IA64_NATIVE_PATCH_BUNDLE_ELEM_CR(ipsr, IPSR), + IA64_NATIVE_PATCH_BUNDLE_ELEM_CR(isr, ISR), + IA64_NATIVE_PATCH_BUNDLE_ELEM_CR(iip, IIP), + IA64_NATIVE_PATCH_BUNDLE_ELEM_CR(ifa, IFA), + IA64_NATIVE_PATCH_BUNDLE_ELEM_CR(itir, ITIR), + IA64_NATIVE_PATCH_BUNDLE_ELEM_CR(iipa, IIPA), + IA64_NATIVE_PATCH_BUNDLE_ELEM_CR(ifs, IFS), + IA64_NATIVE_PATCH_BUNDLE_ELEM_CR(iim, IIM), + IA64_NATIVE_PATCH_BUNDLE_ELEM_CR(iha, IHA), + IA64_NATIVE_PATCH_BUNDLE_ELEM_CR(lid, LID), + IA64_NATIVE_PATCH_BUNDLE_ELEM_CR(ivr, IVR), + IA64_NATIVE_PATCH_BUNDLE_ELEM_CR(tpr, TPR), + IA64_NATIVE_PATCH_BUNDLE_ELEM_CR(eoi, EOI), + IA64_NATIVE_PATCH_BUNDLE_ELEM_CR(irr0, IRR0), + IA64_NATIVE_PATCH_BUNDLE_ELEM_CR(irr1, IRR1), + IA64_NATIVE_PATCH_BUNDLE_ELEM_CR(irr2, IRR2), + IA64_NATIVE_PATCH_BUNDLE_ELEM_CR(irr3, IRR3), + IA64_NATIVE_PATCH_BUNDLE_ELEM_CR(itv, ITV), + IA64_NATIVE_PATCH_BUNDLE_ELEM_CR(pmv, PMV), + IA64_NATIVE_PATCH_BUNDLE_ELEM_CR(cmcv, CMCV), + IA64_NATIVE_PATCH_BUNDLE_ELEM_CR(lrr0, LRR0), + IA64_NATIVE_PATCH_BUNDLE_ELEM_CR(lrr1, LRR1), +}; + +unsigned long __init_or_module +ia64_native_patch_bundle(void *sbundle, void *ebundle, unsigned long type) +{ + const unsigned long nelems = sizeof(ia64_native_patch_bundle_elems) / + sizeof(ia64_native_patch_bundle_elems[0]); + + return __paravirt_patch_apply_bundle(sbundle, ebundle, type, + ia64_native_patch_bundle_elems, + nelems, NULL); +} +#endif /* ASM_SUPPOTED */ + +extern const char ia64_native_switch_to[]; +extern const char ia64_native_leave_syscall[]; +extern const char ia64_native_work_processed_syscall[]; +extern const char ia64_native_leave_kernel[]; + +const struct paravirt_patch_branch_target ia64_native_branch_target[] +__initconst = { +#define PARAVIRT_BR_TARGET(name, type) \ + { \ + ia64_native_ ## name, \ + PARAVIRT_PATCH_TYPE_BR_ ## type, \ + } + PARAVIRT_BR_TARGET(switch_to, SWITCH_TO), + PARAVIRT_BR_TARGET(leave_syscall, LEAVE_SYSCALL), + PARAVIRT_BR_TARGET(work_processed_syscall, WORK_PROCESSED_SYSCALL), + PARAVIRT_BR_TARGET(leave_kernel, LEAVE_KERNEL), +}; + +static void __init +ia64_native_patch_branch(unsigned long tag, unsigned long type) +{ + const unsigned long nelem + sizeof(ia64_native_branch_target) / + sizeof(ia64_native_branch_target[0]); + __paravirt_patch_apply_branch(tag, type, + ia64_native_branch_target, nelem); +} diff --git a/arch/ia64/kernel/paravirtentry.S b/arch/ia64/kernel/paravirtentry.S index 733e31a..19e87aa 100644 --- a/arch/ia64/kernel/paravirtentry.S +++ b/arch/ia64/kernel/paravirtentry.S @@ -20,8 +20,11 @@ * */ +#include <linux/init.h> #include <asm/asmmacro.h> #include <asm/asm-offsets.h> +#include <asm/paravirt_privop.h> +#include <asm/paravirt_patch.h> #include "entry.h" #define DATA8(sym, init_value) \ @@ -32,32 +35,34 @@ data8 init_value ; \ .popsection -#define BRANCH(targ, reg, breg) \ - movl reg=targ ; \ - ;; \ - ld8 reg=[reg] ; \ - ;; \ - mov breg=reg ; \ +#define BRANCH(targ, reg, breg, type) \ + PARAVIRT_PATCH_SITE_BR(PARAVIRT_PATCH_TYPE_BR_ ## type) ; \ + ;; \ + movl reg=targ ; \ + ;; \ + ld8 reg=[reg] ; \ + ;; \ + mov breg=reg ; \ br.cond.sptk.many breg -#define BRANCH_PROC(sym, reg, breg) \ - DATA8(paravirt_ ## sym ## _targ, ia64_native_ ## sym) ; \ - GLOBAL_ENTRY(paravirt_ ## sym) ; \ - BRANCH(paravirt_ ## sym ## _targ, reg, breg) ; \ +#define BRANCH_PROC(sym, reg, breg, type) \ + DATA8(paravirt_ ## sym ## _targ, ia64_native_ ## sym) ; \ + GLOBAL_ENTRY(paravirt_ ## sym) ; \ + BRANCH(paravirt_ ## sym ## _targ, reg, breg, type) ; \ END(paravirt_ ## sym) -#define BRANCH_PROC_UNWINFO(sym, reg, breg) \ - DATA8(paravirt_ ## sym ## _targ, ia64_native_ ## sym) ; \ - GLOBAL_ENTRY(paravirt_ ## sym) ; \ - PT_REGS_UNWIND_INFO(0) ; \ - BRANCH(paravirt_ ## sym ## _targ, reg, breg) ; \ +#define BRANCH_PROC_UNWINFO(sym, reg, breg, type) \ + DATA8(paravirt_ ## sym ## _targ, ia64_native_ ## sym) ; \ + GLOBAL_ENTRY(paravirt_ ## sym) ; \ + PT_REGS_UNWIND_INFO(0) ; \ + BRANCH(paravirt_ ## sym ## _targ, reg, breg, type) ; \ END(paravirt_ ## sym) -BRANCH_PROC(switch_to, r22, b7) -BRANCH_PROC_UNWINFO(leave_syscall, r22, b7) -BRANCH_PROC(work_processed_syscall, r2, b7) -BRANCH_PROC_UNWINFO(leave_kernel, r22, b7) +BRANCH_PROC(switch_to, r22, b7, SWITCH_TO) +BRANCH_PROC_UNWINFO(leave_syscall, r22, b7, LEAVE_SYSCALL) +BRANCH_PROC(work_processed_syscall, r2, b7, WORK_PROCESSED_SYSCALL) +BRANCH_PROC_UNWINFO(leave_kernel, r22, b7, LEAVE_KERNEL) #ifdef CONFIG_MODULES diff --git a/arch/ia64/kernel/setup.c b/arch/ia64/kernel/setup.c index 865af27..a76ad4e 100644 --- a/arch/ia64/kernel/setup.c +++ b/arch/ia64/kernel/setup.c @@ -537,6 +537,7 @@ setup_arch (char **cmdline_p) paravirt_arch_setup_early(); ia64_patch_vtop((u64) __start___vtop_patchlist, (u64) __end___vtop_patchlist); + paravirt_patch_apply(); *cmdline_p = __va(ia64_boot_param->command_line); strlcpy(boot_command_line, *cmdline_p, COMMAND_LINE_SIZE); -- 1.6.0.2
Isaku Yamahata
2008-Nov-25 13:03 UTC
[PATCH 3/5] ia64/pv_ops/bp/module: support binary patching for kernel module.
support binary patching for kernel module. Signed-off-by: Isaku Yamahata <yamahata at valinux.co.jp> --- arch/ia64/include/asm/module.h | 6 ++++++ arch/ia64/kernel/module.c | 32 ++++++++++++++++++++++++++++++++ 2 files changed, 38 insertions(+), 0 deletions(-) diff --git a/arch/ia64/include/asm/module.h b/arch/ia64/include/asm/module.h index d2da61e..908eaef 100644 --- a/arch/ia64/include/asm/module.h +++ b/arch/ia64/include/asm/module.h @@ -16,6 +16,12 @@ struct mod_arch_specific { struct elf64_shdr *got; /* global offset table */ struct elf64_shdr *opd; /* official procedure descriptors */ struct elf64_shdr *unwind; /* unwind-table section */ +#ifdef CONFIG_PARAVIRT + struct elf64_shdr *paravirt_bundles; + /* paravirt_alt_bundle_patch table */ + struct elf64_shdr *paravirt_insts; + /* paravirt_alt_inst_patch table */ +#endif unsigned long gp; /* global-pointer for module */ void *core_unw_table; /* core unwind-table cookie returned by unwinder */ diff --git a/arch/ia64/kernel/module.c b/arch/ia64/kernel/module.c index aaa7d90..34fe425 100644 --- a/arch/ia64/kernel/module.c +++ b/arch/ia64/kernel/module.c @@ -446,6 +446,14 @@ module_frob_arch_sections (Elf_Ehdr *ehdr, Elf_Shdr *sechdrs, char *secstrings, mod->arch.opd = s; else if (strcmp(".IA_64.unwind", secstrings + s->sh_name) == 0) mod->arch.unwind = s; +#ifdef CONFIG_PARAVIRT + else if (strcmp(".paravirt_bundles", + secstrings + s->sh_name) == 0) + mod->arch.paravirt_bundles = s; + else if (strcmp(".paravirt_insts", + secstrings + s->sh_name) == 0) + mod->arch.paravirt_insts = s; +#endif if (!mod->arch.core_plt || !mod->arch.init_plt || !mod->arch.got || !mod->arch.opd) { printk(KERN_ERR "%s: sections missing\n", mod->name); @@ -921,6 +929,30 @@ module_finalize (const Elf_Ehdr *hdr, const Elf_Shdr *sechdrs, struct module *mo DEBUGP("%s: init: entry=%p\n", __func__, mod->init); if (mod->arch.unwind) register_unwind_table(mod); +#ifdef CONFIG_PARAVIRT + if (mod->arch.paravirt_bundles) { + struct paravirt_patch_site_bundle *start + (struct paravirt_patch_site_bundle *) + mod->arch.paravirt_bundles->sh_addr; + struct paravirt_patch_site_bundle *end + (struct paravirt_patch_site_bundle *) + (mod->arch.paravirt_bundles->sh_addr + + mod->arch.paravirt_bundles->sh_size); + + paravirt_patch_apply_bundle(start, end); + } + if (mod->arch.paravirt_insts) { + struct paravirt_patch_site_inst *start + (struct paravirt_patch_site_inst *) + mod->arch.paravirt_insts->sh_addr; + struct paravirt_patch_site_inst *end + (struct paravirt_patch_site_inst *) + (mod->arch.paravirt_insts->sh_addr + + mod->arch.paravirt_insts->sh_size); + + paravirt_patch_apply_inst(start, end); + } +#endif return 0; } -- 1.6.0.2
Isaku Yamahata
2008-Nov-25 13:03 UTC
[PATCH 4/5] ia64/pv_ops/binary patch: define paravirt_dv_serialize_data() and suppress false positive warning.
define paravirt_dv_serialize_data() and insert it to suppress false positive warnings. Signed-off-by: Isaku Yamahata <yamahata at valinux.co.jp> --- arch/ia64/include/asm/paravirt_privop.h | 6 ++++++ arch/ia64/kernel/efi.c | 1 + arch/ia64/kvm/vtlb.c | 2 ++ 3 files changed, 9 insertions(+), 0 deletions(-) diff --git a/arch/ia64/include/asm/paravirt_privop.h b/arch/ia64/include/asm/paravirt_privop.h index ee941cb..4baaae9 100644 --- a/arch/ia64/include/asm/paravirt_privop.h +++ b/arch/ia64/include/asm/paravirt_privop.h @@ -119,6 +119,12 @@ void paravirt_cpu_asm_init(const struct pv_cpu_asm_switch *cpu_asm_switch); #endif /* CONFIG_PARAVIRT */ +#if defined(CONFIG_PARAVIRT) && defined(ASM_SUPPORTED) +#define paravirt_dv_serialize_data() ia64_dv_serialize_data() +#else +#define paravirt_dv_serialize_data() /* nothing */ +#endif + /* these routines utilize privilege-sensitive or performance-sensitive * privileged instructions so the code must be replaced with * paravirtualized versions */ diff --git a/arch/ia64/kernel/efi.c b/arch/ia64/kernel/efi.c index efaff15..7ef80e8 100644 --- a/arch/ia64/kernel/efi.c +++ b/arch/ia64/kernel/efi.c @@ -456,6 +456,7 @@ efi_map_pal_code (void) GRANULEROUNDDOWN((unsigned long) pal_vaddr), pte_val(pfn_pte(__pa(pal_vaddr) >> PAGE_SHIFT, PAGE_KERNEL)), IA64_GRANULE_SHIFT); + paravirt_dv_serialize_data(); ia64_set_psr(psr); /* restore psr */ } diff --git a/arch/ia64/kvm/vtlb.c b/arch/ia64/kvm/vtlb.c index e22b933..500c878 100644 --- a/arch/ia64/kvm/vtlb.c +++ b/arch/ia64/kvm/vtlb.c @@ -210,6 +210,7 @@ void thash_vhpt_insert(struct kvm_vcpu *v, u64 pte, u64 itir, u64 va, int type) phy_pte &= ~PAGE_FLAGS_RV_MASK; psr = ia64_clear_ic(); ia64_itc(type, va, phy_pte, itir_ps(itir)); + paravirt_dv_serialize_data(); ia64_set_psr(psr); } @@ -464,6 +465,7 @@ int thash_purge_and_insert(struct kvm_vcpu *v, u64 pte, u64 itir, phy_pte &= ~PAGE_FLAGS_RV_MASK; psr = ia64_clear_ic(); ia64_itc(type, ifa, phy_pte, ps); + paravirt_dv_serialize_data(); ia64_set_psr(psr); } if (!(pte&VTLB_PTE_IO)) -- 1.6.0.2
Isaku Yamahata
2008-Nov-25 13:03 UTC
[PATCH 5/5] ia64/pv_ops/bp/xen: implemented binary patchable pv_cpu_ops.
implemented xen binary patch for pv_cpu_ops. Signed-off-by: Isaku Yamahata <yamahata at valinux.co.jp> --- arch/ia64/include/asm/xen/privop.h | 4 + arch/ia64/xen/hypercall.S | 2 + arch/ia64/xen/xen_pv_ops.c | 667 ++++++++++++++++++++++++++++++++++++ 3 files changed, 673 insertions(+), 0 deletions(-) diff --git a/arch/ia64/include/asm/xen/privop.h b/arch/ia64/include/asm/xen/privop.h index 2261dda..e5fbaee 100644 --- a/arch/ia64/include/asm/xen/privop.h +++ b/arch/ia64/include/asm/xen/privop.h @@ -82,8 +82,10 @@ extern unsigned long xen_thash(unsigned long addr); extern unsigned long xen_get_cpuid(int index); extern unsigned long xen_get_pmd(int index); +#ifndef ASM_SUPPORTED extern unsigned long xen_get_eflag(void); /* see xen_ia64_getreg */ extern void xen_set_eflag(unsigned long); /* see xen_ia64_setreg */ +#endif /************************************************/ /* Instructions paravirtualized for performance */ @@ -108,6 +110,7 @@ extern void xen_set_eflag(unsigned long); /* see xen_ia64_setreg */ #define xen_get_virtual_pend() \ (*(((uint8_t *)XEN_MAPPEDREGS->interrupt_mask_addr) - 1)) +#ifndef ASM_SUPPORTED /* Although all privileged operations can be left to trap and will * be properly handled by Xen, some are frequent enough that we use * hyperprivops for performance. */ @@ -125,6 +128,7 @@ extern void xen_set_rr0_to_rr4(unsigned long val0, unsigned long val1, unsigned long val4); extern void xen_set_kr(unsigned long index, unsigned long val); extern void xen_ptcga(unsigned long addr, unsigned long size); +#endif /* !ASM_SUPPORTED */ #endif /* !__ASSEMBLY__ */ diff --git a/arch/ia64/xen/hypercall.S b/arch/ia64/xen/hypercall.S index 45e02bb..e32dae4 100644 --- a/arch/ia64/xen/hypercall.S +++ b/arch/ia64/xen/hypercall.S @@ -9,6 +9,7 @@ #include <asm/intrinsics.h> #include <asm/xen/privop.h> +#ifdef __INTEL_COMPILER /* * Hypercalls without parameter. */ @@ -72,6 +73,7 @@ GLOBAL_ENTRY(xen_set_rr0_to_rr4) br.ret.sptk.many rp ;; END(xen_set_rr0_to_rr4) +#endif GLOBAL_ENTRY(xen_send_ipi) mov r14=r32 diff --git a/arch/ia64/xen/xen_pv_ops.c b/arch/ia64/xen/xen_pv_ops.c index eda13a8..a2825fa 100644 --- a/arch/ia64/xen/xen_pv_ops.c +++ b/arch/ia64/xen/xen_pv_ops.c @@ -154,6 +154,13 @@ xen_post_smp_prepare_boot_cpu(void) xen_setup_vcpu_info_placement(); } +#ifdef ASM_SUPPORTED +static unsigned long __init_or_module +xen_patch_bundle(void *sbundle, void *ebundle, unsigned long type); +#endif +static void __init +xen_patch_branch(unsigned long tag, unsigned long type); + static struct pv_init_ops xen_init_ops __initdata = { .banner = xen_banner, @@ -164,6 +171,10 @@ static struct pv_init_ops xen_init_ops __initdata = { .arch_setup_nomca = xen_arch_setup_nomca, .post_smp_prepare_boot_cpu = xen_post_smp_prepare_boot_cpu, +#ifdef ASM_SUPPORTED + .patch_bundle = xen_patch_bundle, +#endif + .patch_branch = xen_patch_branch, }; /*************************************************************************** @@ -214,6 +225,7 @@ static struct pv_patchdata xen_patchdata __initdata = { * intrinsics hooks. */ +#ifndef ASM_SUPPORTED static void xen_set_itm_with_offset(unsigned long val) { @@ -381,6 +393,412 @@ xen_intrin_local_irq_restore(unsigned long mask) else xen_rsm_i(); } +#else +#define __DEFINE_FUNC(name, code) \ + extern const char xen_ ## name ## _direct_start[]; \ + extern const char xen_ ## name ## _direct_end[]; \ + asm (".align 32\n" \ + ".proc xen_" #name "\n" \ + "xen_" #name ":\n" \ + "xen_" #name "_direct_start:\n" \ + code \ + "xen_" #name "_direct_end:\n" \ + "br.cond.sptk.many b6\n" \ + ".endp xen_" #name "\n") + +#define DEFINE_VOID_FUNC0(name, code) \ + extern void \ + xen_ ## name (void); \ + __DEFINE_FUNC(name, code) + +#define DEFINE_VOID_FUNC1(name, code) \ + extern void \ + xen_ ## name (unsigned long arg); \ + __DEFINE_FUNC(name, code) + +#define DEFINE_VOID_FUNC2(name, code) \ + extern void \ + xen_ ## name (unsigned long arg0, \ + unsigned long arg1); \ + __DEFINE_FUNC(name, code) + +#define DEFINE_FUNC0(name, code) \ + extern unsigned long \ + xen_ ## name (void); \ + __DEFINE_FUNC(name, code) + +#define DEFINE_FUNC1(name, type, code) \ + extern unsigned long \ + xen_ ## name (type arg); \ + __DEFINE_FUNC(name, code) + +#define XEN_PSR_I_ADDR_ADDR (XSI_BASE + XSI_PSR_I_ADDR_OFS) + +/* + * static void xen_set_itm_with_offset(unsigned long val) + * xen_set_itm(val - XEN_MAPPEDREGS->itc_offset); + */ +/* 2 bundles */ +DEFINE_VOID_FUNC1(set_itm_with_offset, + "mov r2 = " __stringify(XSI_BASE) " + " + __stringify(XSI_ITC_OFFSET_OFS) "\n" + ";;\n" + "ld8 r3 = [r2]\n" + ";;\n" + "sub r8 = r8, r3\n" + "break " __stringify(HYPERPRIVOP_SET_ITM) "\n"); + +/* + * static unsigned long xen_get_itm_with_offset(void) + * return ia64_native_getreg(_IA64_REG_CR_ITM) + XEN_MAPPEDREGS->itc_offset; + */ +/* 2 bundles */ +DEFINE_FUNC0(get_itm_with_offset, + "mov r2 = " __stringify(XSI_BASE) " + " + __stringify(XSI_ITC_OFFSET_OFS) "\n" + ";;\n" + "ld8 r3 = [r2]\n" + "mov r8 = cr.itm\n" + ";;\n" + "add r8 = r8, r2\n"); + +/* + * static void xen_set_itc(unsigned long val) + * unsigned long mitc; + * + * WARN_ON(!irqs_disabled()); + * mitc = ia64_native_getreg(_IA64_REG_AR_ITC); + * XEN_MAPPEDREGS->itc_offset = val - mitc; + * XEN_MAPPEDREGS->itc_last = val; + */ +/* 2 bundles */ +DEFINE_VOID_FUNC1(set_itc, + "mov r2 = " __stringify(XSI_BASE) " + " + __stringify(XSI_ITC_LAST_OFS) "\n" + "mov r3 = ar.itc\n" + ";;\n" + "sub r3 = r8, r3\n" + "st8 [r2] = r8, " + __stringify(XSI_ITC_LAST_OFS) " - " + __stringify(XSI_ITC_OFFSET_OFS) "\n" + ";;\n" + "st8 [r2] = r3\n"); + +/* + * static unsigned long xen_get_itc(void) + * unsigned long res; + * unsigned long itc_offset; + * unsigned long itc_last; + * unsigned long ret_itc_last; + * + * itc_offset = XEN_MAPPEDREGS->itc_offset; + * do { + * itc_last = XEN_MAPPEDREGS->itc_last; + * res = ia64_native_getreg(_IA64_REG_AR_ITC); + * res += itc_offset; + * if (itc_last >= res) + * res = itc_last + 1; + * ret_itc_last = cmpxchg(&XEN_MAPPEDREGS->itc_last, + * itc_last, res); + * } while (unlikely(ret_itc_last != itc_last)); + * return res; + */ +/* 5 bundles */ +DEFINE_FUNC0(get_itc, + "mov r2 = " __stringify(XSI_BASE) " + " + __stringify(XSI_ITC_OFFSET_OFS) "\n" + ";;\n" + "ld8 r9 = [r2], " __stringify(XSI_ITC_LAST_OFS) " - " + __stringify(XSI_ITC_OFFSET_OFS) "\n" + /* r9 = itc_offset */ + /* r2 = XSI_ITC_OFFSET */ + "888:\n" + "mov r8 = ar.itc\n" /* res = ar.itc */ + ";;\n" + "ld8 r3 = [r2]\n" /* r3 = itc_last */ + "add r8 = r8, r9\n" /* res = ar.itc + itc_offset */ + ";;\n" + "cmp.gtu p6, p0 = r3, r8\n" + ";;\n" + "(p6) add r8 = 1, r3\n" /* if (itc_last > res) itc_last + 1 */ + ";;\n" + "mov ar.ccv = r8\n" + ";;\n" + "cmpxchg8.acq r10 = [r2], r8, ar.ccv\n" + ";;\n" + "cmp.ne p6, p0 = r10, r3\n" + "(p6) hint @pause\n" + "(p6) br.cond.spnt 888b\n"); + +DEFINE_VOID_FUNC1(fc, + "break " __stringify(HYPERPRIVOP_FC) "\n"); + +/* + * psr_i_addr_addr = XEN_PSR_I_ADDR_ADDR + * masked_addr = *psr_i_addr_addr + * pending_intr_addr = masked_addr - 1 + * if (val & IA64_PSR_I) { + * masked = *masked_addr + * *masked_addr = 0:xen_set_virtual_psr_i(1) + * compiler barrier + * if (masked) { + * uint8_t pending = *pending_intr_addr; + * if (pending) + * XEN_HYPER_SSM_I + * } + * } else { + * *masked_addr = 1:xen_set_virtual_psr_i(0) + * } + */ +/* 6 bundles */ +DEFINE_VOID_FUNC1(intrin_local_irq_restore, + /* r8 = input value: 0 or IA64_PSR_I + * p6 = (flags & IA64_PSR_I) + * = if clause + * p7 = !(flags & IA64_PSR_I) + * = else clause + */ + "cmp.ne p6, p7 = r8, r0\n" + "mov r9 = " __stringify(XEN_PSR_I_ADDR_ADDR) "\n" + ";;\n" + /* r9 = XEN_PSR_I_ADDR */ + "ld8 r9 = [r9]\n" + ";;\n" + + /* r10 = masked previous value */ + "(p6) ld1.acq r10 = [r9]\n" + ";;\n" + + /* p8 = !masked interrupt masked previously? */ + "(p6) cmp.ne.unc p8, p0 = r10, r0\n" + + /* p7 = else clause */ + "(p7) mov r11 = 1\n" + ";;\n" + /* masked = 1 */ + "(p7) st1.rel [r9] = r11\n" + + /* p6 = if clause */ + /* masked = 0 + * r9 = masked_addr - 1 + * = pending_intr_addr + */ + "(p8) st1.rel [r9] = r0, -1\n" + ";;\n" + /* r8 = pending_intr */ + "(p8) ld1.acq r11 = [r9]\n" + ";;\n" + /* p9 = interrupt pending? */ + "(p8) cmp.ne.unc p9, p10 = r11, r0\n" + ";;\n" + "(p10) mf\n" + /* issue hypercall to trigger interrupt */ + "(p9) break " __stringify(HYPERPRIVOP_SSM_I) "\n"); + +DEFINE_VOID_FUNC2(ptcga, + "break " __stringify(HYPERPRIVOP_PTC_GA) "\n"); +DEFINE_VOID_FUNC2(set_rr, + "break " __stringify(HYPERPRIVOP_SET_RR) "\n"); + +/* + * tmp = XEN_MAPPEDREGS->interrupt_mask_addr = XEN_PSR_I_ADDR_ADDR; + * tmp = *tmp + * tmp = *tmp; + * psr_i = tmp? 0: IA64_PSR_I; + */ +/* 4 bundles */ +DEFINE_FUNC0(get_psr_i, + "mov r9 = " __stringify(XEN_PSR_I_ADDR_ADDR) "\n" + ";;\n" + "ld8 r9 = [r9]\n" /* r9 = XEN_PSR_I_ADDR */ + "mov r8 = 0\n" /* psr_i = 0 */ + ";;\n" + "ld1 r9 = [r9]\n" /* r9 = XEN_PSR_I */ + ";;\n" + "cmp.eq.unc p6, p0 = r9, r0\n" /* p6 = (XEN_PSR_I != 0) */ + ";;\n" + "(p6) mov r8 = " __stringify(1 << IA64_PSR_I_BIT) "\n"); + +DEFINE_FUNC1(thash, unsigned long, + "break " __stringify(HYPERPRIVOP_THASH) "\n"); +DEFINE_FUNC1(get_cpuid, int, + "break " __stringify(HYPERPRIVOP_GET_CPUID) "\n"); +DEFINE_FUNC1(get_pmd, int, + "break " __stringify(HYPERPRIVOP_GET_PMD) "\n"); +DEFINE_FUNC1(get_rr, unsigned long, + "break " __stringify(HYPERPRIVOP_GET_RR) "\n"); + +/* + * void xen_privop_ssm_i(void) + * + * int masked = !xen_get_virtual_psr_i(); + * // masked = *(*XEN_MAPPEDREGS->interrupt_mask_addr) + * xen_set_virtual_psr_i(1) + * // *(*XEN_MAPPEDREGS->interrupt_mask_addr) = 0 + * // compiler barrier + * if (masked) { + * uint8_t* pend_int_addr + * (uint8_t*)(*XEN_MAPPEDREGS->interrupt_mask_addr) - 1; + * uint8_t pending = *pend_int_addr; + * if (pending) + * XEN_HYPER_SSM_I + * } + */ +/* 4 bundles */ +DEFINE_VOID_FUNC0(ssm_i, + "mov r8 = " __stringify(XEN_PSR_I_ADDR_ADDR) "\n" + ";;\n" + "ld8 r8 = [r8]\n" /* r8 = XEN_PSR_I_ADDR */ + ";;\n" + "ld1 r9 = [r8]\n" /* r9 = XEN_PSR_I */ + ";;\n" + "st1 [r8] = r0, -1\n" /* psr_i = 0. enable interrupt + * r8 = XEN_PSR_I_ADDR - 1 + * = pend_int_addr + */ + "cmp.eq.unc p0, p6 = r9, r0\n"/* p6 = !XEN_PSR_I + * previously interrupt + * masked? + */ + "mf\n" + ";;\n" + "(p6) ld1 r8 = [r8]\n" /* r8 = xen_pend_int */ + ";;\n" + "(p6) cmp.eq.unc p6, p7 = r8, r0\n" /*interrupt pending?*/ + ";;\n" + "(p6) mf\n" + /* issue hypercall to get interrupt */ + "(p7) break " __stringify(HYPERPRIVOP_SSM_I) "\n" + ";;\n"); + +/* + * psr_i_addr_addr = XEN_MAPPEDREGS->interrupt_mask_addr + * = XEN_PSR_I_ADDR_ADDR; + * psr_i_addr = *psr_i_addr_addr; + * *psr_i_addr = 1; + */ +/* 2 bundles */ +DEFINE_VOID_FUNC0(rsm_i, + "mov r8 = " __stringify(XEN_PSR_I_ADDR_ADDR) "\n" + /* r8 = XEN_PSR_I_ADDR */ + "mov r9 = 1\n" + ";;\n" + "ld8 r8 = [r8]\n" /* r8 = XEN_PSR_I */ + ";;\n" + "st1 [r8] = r9\n"); /* XEN_PSR_I = 1 */ + +extern void +xen_set_rr0_to_rr4(unsigned long val0, unsigned long val1, + unsigned long val2, unsigned long val3, + unsigned long val4); +__DEFINE_FUNC(set_rr0_to_rr4, + "break " __stringify(HYPERPRIVOP_SET_RR0_TO_RR4) "\n"); + + +extern unsigned long xen_getreg(int regnum); +#define __DEFINE_GET_REG(id, privop) \ + "mov r2 = " __stringify(_IA64_REG_ ## id) "\n" \ + ";;\n" \ + "cmp.eq p6, p0 = r2, r8\n" \ + ";;\n" \ + "(p6) break " __stringify(HYPERPRIVOP_GET_ ## privop) "\n" \ + "(p6) br.cond.sptk.many b6\n" \ + ";;\n" + +__DEFINE_FUNC(getreg, + __DEFINE_GET_REG(PSR, PSR) +#ifdef CONFIG_IA32_SUPPORT + __DEFINE_GET_REG(AR_EFLAG, EFLAG) +#endif + + /* get_itc */ + "mov r2 = " __stringify(_IA64_REG_AR_ITC) "\n" + ";;\n" + "cmp.eq p6, p0 = r2, r8\n" + ";;\n" + "(p6) br.cond.spnt xen_get_itc\n" + ";;\n" + + /* get itm */ + "mov r2 = " __stringify(_IA64_REG_CR_ITM) "\n" + ";;\n" + "cmp.eq p6, p0 = r2, r8\n" + ";;\n" + "(p6) br.cond.spnt xen_get_itm_with_offset\n" + ";;\n" + + __DEFINE_GET_REG(CR_IVR, IVR) + __DEFINE_GET_REG(CR_TPR, TPR) + + /* fall back */ + "movl r2 = ia64_native_getreg_func\n" + ";;\n" + "mov b7 = r2\n" + ";;\n" + "br.cond.sptk.many b7\n"); + +extern void xen_setreg(int regnum, unsigned long val); +#define __DEFINE_SET_REG(id, privop) \ + "mov r2 = " __stringify(_IA64_REG_ ## id) "\n" \ + ";;\n" \ + "cmp.eq p6, p0 = r2, r9\n" \ + ";;\n" \ + "(p6) break " __stringify(HYPERPRIVOP_ ## privop) "\n" \ + "(p6) br.cond.sptk.many b6\n" \ + ";;\n" + +__DEFINE_FUNC(setreg, + /* kr0 .. kr 7*/ + /* + * if (_IA64_REG_AR_KR0 <= regnum && + * regnum <= _IA64_REG_AR_KR7) { + * register __index asm ("r8") = regnum - _IA64_REG_AR_KR0 + * register __val asm ("r9") = val + * "break HYPERPRIVOP_SET_KR" + * } + */ + "mov r17 = r9\n" + "mov r2 = " __stringify(_IA64_REG_AR_KR0) "\n" + ";;\n" + "cmp.ge p6, p0 = r9, r2\n" + "sub r17 = r17, r2\n" + ";;\n" + "(p6) cmp.ge.unc p7, p0 = " + __stringify(_IA64_REG_AR_KR7) " - " __stringify(_IA64_REG_AR_KR0) + ", r17\n" + ";;\n" + "(p7) mov r9 = r8\n" + ";;\n" + "(p7) mov r8 = r17\n" + "(p7) break " __stringify(HYPERPRIVOP_SET_KR) "\n" + + /* set itm */ + "mov r2 = " __stringify(_IA64_REG_CR_ITM) "\n" + ";;\n" + "cmp.eq p6, p0 = r2, r8\n" + ";;\n" + "(p6) br.cond.spnt xen_set_itm_with_offset\n" + + /* set itc */ + "mov r2 = " __stringify(_IA64_REG_AR_ITC) "\n" + ";;\n" + "cmp.eq p6, p0 = r2, r8\n" + ";;\n" + "(p6) br.cond.spnt xen_set_itc\n" + +#ifdef CONFIG_IA32_SUPPORT + __DEFINE_SET_REG(AR_EFLAG, SET_EFLAG) +#endif + __DEFINE_SET_REG(CR_TPR, SET_TPR) + __DEFINE_SET_REG(CR_EOI, EOI) + + /* fall back */ + "movl r2 = ia64_native_setreg_func\n" + ";;\n" + "mov b7 = r2\n" + ";;\n" + "br.cond.sptk.many b7\n"); +#endif static struct pv_cpu_ops xen_cpu_ops __initdata = { .fc = xen_fc, @@ -486,3 +904,252 @@ xen_setup_pv_ops(void) paravirt_cpu_asm_init(&xen_cpu_asm_switch); } + +#ifdef ASM_SUPPORTED +/*************************************************************************** + * binary pacthing + * pv_init_ops.patch_bundle + */ + +#define DEFINE_FUNC_GETREG(name, privop) \ + DEFINE_FUNC0(get_ ## name, \ + "break "__stringify(HYPERPRIVOP_GET_ ## privop) "\n") + +DEFINE_FUNC_GETREG(psr, PSR); +DEFINE_FUNC_GETREG(eflag, EFLAG); +DEFINE_FUNC_GETREG(ivr, IVR); +DEFINE_FUNC_GETREG(tpr, TPR); + +#define DEFINE_FUNC_SET_KR(n) \ + DEFINE_VOID_FUNC0(set_kr ## n, \ + ";;\n" \ + "mov r9 = r8\n" \ + "mov r8 = " #n "\n" \ + "break " __stringify(HYPERPRIVOP_SET_KR) "\n") + +DEFINE_FUNC_SET_KR(0); +DEFINE_FUNC_SET_KR(1); +DEFINE_FUNC_SET_KR(2); +DEFINE_FUNC_SET_KR(3); +DEFINE_FUNC_SET_KR(4); +DEFINE_FUNC_SET_KR(5); +DEFINE_FUNC_SET_KR(6); +DEFINE_FUNC_SET_KR(7); + +#define __DEFINE_FUNC_SETREG(name, privop) \ + DEFINE_VOID_FUNC0(name, \ + "break "__stringify(HYPERPRIVOP_ ## privop) "\n") + +#define DEFINE_FUNC_SETREG(name, privop) \ + __DEFINE_FUNC_SETREG(set_ ## name, SET_ ## privop) + +DEFINE_FUNC_SETREG(eflag, EFLAG); +DEFINE_FUNC_SETREG(tpr, TPR); +__DEFINE_FUNC_SETREG(eoi, EOI); + +extern const char xen_check_events[]; +extern const char __xen_intrin_local_irq_restore_direct_start[]; +extern const char __xen_intrin_local_irq_restore_direct_end[]; +extern const unsigned long __xen_intrin_local_irq_restore_direct_reloc; + +asm ( + ".align 32\n" + ".proc xen_check_events\n" + "xen_check_events:\n" + /* masked = 0 + * r9 = masked_addr - 1 + * = pending_intr_addr + */ + "st1.rel [r9] = r0, -1\n" + ";;\n" + /* r8 = pending_intr */ + "ld1.acq r11 = [r9]\n" + ";;\n" + /* p9 = interrupt pending? */ + "cmp.ne p9, p10 = r11, r0\n" + ";;\n" + "(p10) mf\n" + /* issue hypercall to trigger interrupt */ + "(p9) break " __stringify(HYPERPRIVOP_SSM_I) "\n" + "br.cond.sptk.many b6\n" + ".endp xen_check_events\n" + "\n" + ".align 32\n" + ".proc __xen_intrin_local_irq_restore_direct\n" + "__xen_intrin_local_irq_restore_direct:\n" + "__xen_intrin_local_irq_restore_direct_start:\n" + "1:\n" + "{\n" + "cmp.ne p6, p7 = r8, r0\n" + "mov r17 = ip\n" /* get ip to calc return address */ + "mov r9 = "__stringify(XEN_PSR_I_ADDR_ADDR) "\n" + ";;\n" + "}\n" + "{\n" + /* r9 = XEN_PSR_I_ADDR */ + "ld8 r9 = [r9]\n" + ";;\n" + /* r10 = masked previous value */ + "(p6) ld1.acq r10 = [r9]\n" + "adds r17 = 1f - 1b, r17\n" /* calculate return address */ + ";;\n" + "}\n" + "{\n" + /* p8 = !masked interrupt masked previously? */ + "(p6) cmp.ne.unc p8, p0 = r10, r0\n" + "\n" + /* p7 = else clause */ + "(p7) mov r11 = 1\n" + ";;\n" + "(p8) mov b6 = r17\n" /* set return address */ + "}\n" + "{\n" + /* masked = 1 */ + "(p7) st1.rel [r9] = r11\n" + "\n" + "[99:]\n" + "(p8) brl.cond.dptk.few xen_check_events\n" + "}\n" + /* pv calling stub is 5 bundles. fill nop to adjust return address */ + "{\n" + "nop 0\n" + "nop 0\n" + "nop 0\n" + "}\n" + "1:\n" + "__xen_intrin_local_irq_restore_direct_end:\n" + ".endp __xen_intrin_local_irq_restore_direct\n" + "\n" + ".align 8\n" + "__xen_intrin_local_irq_restore_direct_reloc:\n" + "data8 99b\n" +); + +static struct paravirt_patch_bundle_elem xen_patch_bundle_elems[] +__initdata_or_module +{ +#define XEN_PATCH_BUNDLE_ELEM(name, type) \ + { \ + (void*)xen_ ## name ## _direct_start, \ + (void*)xen_ ## name ## _direct_end, \ + PARAVIRT_PATCH_TYPE_ ## type, \ + } + + XEN_PATCH_BUNDLE_ELEM(fc, FC), + XEN_PATCH_BUNDLE_ELEM(thash, THASH), + XEN_PATCH_BUNDLE_ELEM(get_cpuid, GET_CPUID), + XEN_PATCH_BUNDLE_ELEM(get_pmd, GET_PMD), + XEN_PATCH_BUNDLE_ELEM(ptcga, PTCGA), + XEN_PATCH_BUNDLE_ELEM(get_rr, GET_RR), + XEN_PATCH_BUNDLE_ELEM(set_rr, SET_RR), + XEN_PATCH_BUNDLE_ELEM(set_rr0_to_rr4, SET_RR0_TO_RR4), + XEN_PATCH_BUNDLE_ELEM(ssm_i, SSM_I), + XEN_PATCH_BUNDLE_ELEM(rsm_i, RSM_I), + XEN_PATCH_BUNDLE_ELEM(get_psr_i, GET_PSR_I), + { + (void*)__xen_intrin_local_irq_restore_direct_start, + (void*)__xen_intrin_local_irq_restore_direct_end, + PARAVIRT_PATCH_TYPE_INTRIN_LOCAL_IRQ_RESTORE, + }, + +#define XEN_PATCH_BUNDLE_ELEM_GETREG(name, reg) \ + { \ + xen_get_ ## name ## _direct_start, \ + xen_get_ ## name ## _direct_end, \ + PARAVIRT_PATCH_TYPE_GETREG + _IA64_REG_ ## reg, \ + } + + XEN_PATCH_BUNDLE_ELEM_GETREG(psr, PSR), + XEN_PATCH_BUNDLE_ELEM_GETREG(eflag, AR_EFLAG), + + XEN_PATCH_BUNDLE_ELEM_GETREG(ivr, CR_IVR), + XEN_PATCH_BUNDLE_ELEM_GETREG(tpr, CR_TPR), + + XEN_PATCH_BUNDLE_ELEM_GETREG(itc, AR_ITC), + XEN_PATCH_BUNDLE_ELEM_GETREG(itm_with_offset, CR_ITM), + + +#define __XEN_PATCH_BUNDLE_ELEM_SETREG(name, reg) \ + { \ + xen_ ## name ## _direct_start, \ + xen_ ## name ## _direct_end, \ + PARAVIRT_PATCH_TYPE_SETREG + _IA64_REG_ ## reg, \ + } + +#define XEN_PATCH_BUNDLE_ELEM_SETREG(name, reg) \ + __XEN_PATCH_BUNDLE_ELEM_SETREG(set_ ## name, reg) + + XEN_PATCH_BUNDLE_ELEM_SETREG(kr0, AR_KR0), + XEN_PATCH_BUNDLE_ELEM_SETREG(kr1, AR_KR1), + XEN_PATCH_BUNDLE_ELEM_SETREG(kr2, AR_KR2), + XEN_PATCH_BUNDLE_ELEM_SETREG(kr3, AR_KR3), + XEN_PATCH_BUNDLE_ELEM_SETREG(kr4, AR_KR4), + XEN_PATCH_BUNDLE_ELEM_SETREG(kr5, AR_KR5), + XEN_PATCH_BUNDLE_ELEM_SETREG(kr6, AR_KR6), + XEN_PATCH_BUNDLE_ELEM_SETREG(kr7, AR_KR7), + + XEN_PATCH_BUNDLE_ELEM_SETREG(eflag, AR_EFLAG), + XEN_PATCH_BUNDLE_ELEM_SETREG(tpr, CR_TPR), + __XEN_PATCH_BUNDLE_ELEM_SETREG(eoi, CR_EOI), + + XEN_PATCH_BUNDLE_ELEM_SETREG(itc, AR_ITC), + XEN_PATCH_BUNDLE_ELEM_SETREG(itm_with_offset, CR_ITM), +}; + +static unsigned long __init_or_module +xen_patch_bundle(void *sbundle, void *ebundle, unsigned long type) +{ + const unsigned long nelems = sizeof(xen_patch_bundle_elems) / + sizeof(xen_patch_bundle_elems[0]); + unsigned long used; + const struct paravirt_patch_bundle_elem *found; + + used = __paravirt_patch_apply_bundle(sbundle, ebundle, type, + xen_patch_bundle_elems, nelems, + &found); + + if (found == NULL) + /* fallback */ + return ia64_native_patch_bundle(sbundle, ebundle, type); + if (used == 0) + return used; + + /* relocation */ + switch (type) { + case PARAVIRT_PATCH_TYPE_INTRIN_LOCAL_IRQ_RESTORE: { + unsigned long reloc + __xen_intrin_local_irq_restore_direct_reloc; + unsigned long reloc_offset = reloc - (unsigned long) + __xen_intrin_local_irq_restore_direct_start; + unsigned long tag = (unsigned long)sbundle + reloc_offset; + paravirt_patch_reloc_brl(tag, xen_check_events); + break; + } + default: + /* nothing */ + break; + } + return used; +} +#endif /* ASM_SUPPOTED */ + +const struct paravirt_patch_branch_target xen_branch_target[] +__initconst = { +#define PARAVIRT_BR_TARGET(name, type) \ + { \ + &xen_ ## name, \ + PARAVIRT_PATCH_TYPE_BR_ ## type, \ + } + PARAVIRT_BR_TARGET(switch_to, SWITCH_TO), + PARAVIRT_BR_TARGET(leave_syscall, LEAVE_SYSCALL), + PARAVIRT_BR_TARGET(work_processed_syscall, WORK_PROCESSED_SYSCALL), + PARAVIRT_BR_TARGET(leave_kernel, LEAVE_KERNEL), +}; + +static void __init +xen_patch_branch(unsigned long tag, unsigned long type) +{ + const unsigned long nelem + sizeof(xen_branch_target) / sizeof(xen_branch_target[0]); + __paravirt_patch_apply_branch(tag, type, xen_branch_target, nelem); +} -- 1.6.0.2
Isaku Yamahata
2008-Dec-01 02:37 UTC
[PATCH 0/5] ia64/pv_ops, xen: binary patch optimization
Hi Tony. I'd like to merge those patches and the other xen domU optimization patches at the next merge window. i.e. for 2.6.29. What do you think? I think that they are just enhancements of the already merged patches or ia64 porting from x86 paravirt techniques and that their quality is enough for merge. Although people want some twist, I believe they would be minor. thanks, On Tue, Nov 25, 2008 at 10:03:50PM +0900, Isaku Yamahata wrote:> This patch set is for binary patch optimization for paravirt_ops. > The binary patch optimization is important on native case because > the paravirt_ops overhead can be reduced by converting indirect > call into in-place execution or direct call. > > > The first patch imports helper functions which themselves doesn't interesting > things. > The second patch replaces the indirect function calls with a special > call written in gcc extended inline asm and introduces native methods. > The third patch introduces binary patch for kernel modules. > The forth patch suppress false positive warnings which were caused by > the previous patches. > The last patch implements xen methods. > > > For convenience the working full source is available from > http://people.valinux.co.jp/~yamahata/xen-ia64/for_eagl/linux-2.6-ia64-pv-ops.git/ > branch: ia64-pv-ops-2008nov25-xen-ia64-optimized-domu-binary-patch > > For the status of this patch series > http://wiki.xensource.com/xenwiki/XenIA64/UpstreamMerge > > thanks, > > Diffstat: > arch/ia64/include/asm/intrinsics.h | 6 +- > arch/ia64/include/asm/module.h | 6 + > arch/ia64/include/asm/paravirt.h | 8 + > arch/ia64/include/asm/paravirt_patch.h | 143 +++++++ > arch/ia64/include/asm/paravirt_privop.h | 347 ++++++++++++++++- > arch/ia64/include/asm/xen/privop.h | 4 + > arch/ia64/kernel/Makefile | 3 +- > arch/ia64/kernel/efi.c | 1 + > arch/ia64/kernel/module.c | 32 ++ > arch/ia64/kernel/paravirt.c | 520 ++++++++++++++++++++++++- > arch/ia64/kernel/paravirt_patch.c | 509 +++++++++++++++++++++++ > arch/ia64/kernel/paravirtentry.S | 101 ++++- > arch/ia64/kernel/setup.c | 1 + > arch/ia64/kernel/vmlinux.lds.S | 24 ++ > arch/ia64/kvm/vtlb.c | 2 + > arch/ia64/xen/hypercall.S | 2 + > arch/ia64/xen/xen_pv_ops.c | 667 +++++++++++++++++++++++++++++++ > 17 files changed, 2351 insertions(+), 25 deletions(-)-- yamahata
Reasonably Related Threads
- [PATCH 0/5] ia64/pv_ops, xen: binary patch optimization
- [PATCH 0/5] ia64/pv_ops, xen: binary patch optimization TAKE 2
- [PATCH 0/5] ia64/pv_ops, xen: binary patch optimization TAKE 2
- [PATCH 0/5] ia64/pv_ops, xen: binary patch optimization TAKE 3
- [PATCH 0/5] ia64/pv_ops, xen: binary patch optimization TAKE 3