Andrew Cooper
2013-Sep-30 17:46 UTC
[PATCH v3 0/2] Improvements for domain_crash_synchronous
This patch series provides improvements for identifying the cause of a domain_crash_synchronous from assembly code. There are minimal changes from v2, and again are all in the second patch. More tweaks to the printed string, and a less ambiguous name of the label before entering the UNLIKELY() section. ~Andrew -- 1.7.10.4
Andrew Cooper
2013-Sep-30 17:46 UTC
[Patch v3 1/2] x86/traps: Record last extable faulting address
... so the following patch can identify the location of faults leading to a decision to crash a domain. Signed-off-by: Andrew Cooper <andrew.cooper3@citrix.com> Acked-by: Keir Fraser <keir@xen.org> CC: Keir Fraser <keir@xen.org> CC: Jan Beulich <JBeulich@suse.com> --- xen/arch/x86/traps.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/xen/arch/x86/traps.c b/xen/arch/x86/traps.c index 72e8566..771e59a 100644 --- a/xen/arch/x86/traps.c +++ b/xen/arch/x86/traps.c @@ -89,6 +89,7 @@ static char __read_mostly opt_nmi[10] = "fatal"; string_param("nmi", opt_nmi); DEFINE_PER_CPU(u64, efer); +static DEFINE_PER_CPU(unsigned long, last_extable_addr); DEFINE_PER_CPU_READ_MOSTLY(u32, ler_msr); @@ -550,6 +551,7 @@ static inline void do_trap( { dprintk(XENLOG_ERR, "Trap %d: %p -> %p\n", trapnr, _p(regs->eip), _p(fixup)); + this_cpu(last_extable_addr) = regs->eip; regs->eip = fixup; return; } @@ -1038,6 +1040,7 @@ void do_invalid_op(struct cpu_user_regs *regs) die: if ( (fixup = search_exception_table(regs->eip)) != 0 ) { + this_cpu(last_extable_addr) = regs->eip; regs->eip = fixup; return; } @@ -1370,6 +1373,7 @@ void do_page_fault(struct cpu_user_regs *regs) perfc_incr(copy_user_faults); if ( unlikely(regs->error_code & PFEC_reserved_bit) ) reserved_bit_page_fault(addr, regs); + this_cpu(last_extable_addr) = regs->eip; regs->eip = fixup; return; } @@ -3062,6 +3066,7 @@ void do_general_protection(struct cpu_user_regs *regs) { dprintk(XENLOG_INFO, "GPF (%04x): %p -> %p\n", regs->error_code, _p(regs->eip), _p(fixup)); + this_cpu(last_extable_addr) = regs->eip; regs->eip = fixup; return; } -- 1.7.10.4
Andrew Cooper
2013-Sep-30 17:46 UTC
[Patch v3 2/2] Xen/x86: Improve information from domain_crash_synchronous
As it currently stands, the string "domain_crash_sync called from entry.S" is not helpful at identifying why the domain was crashed, and a debug build of Xen doesn''t help the matter This patch improves the information printed, by pointing to where the crash decision was made. Specific improvements include: * Moving the ascii string "domain_crash_sync called from entry.S\n" away from some semi-hot code cache lines. * Moving the printk into C code (especially as this_cpu() is miserable to use in assembly code) * Undo the previous confusing situation of having the domain_crash_synchronous() as a macro in C code, yet a global symbol in assembly code. Signed-off-by: Andrew Cooper <andrew.cooper3@citrix.com> Acked-by: Keir Fraser <keir@xen.org> CC: Keir Fraser <keir@xen.org> CC: Jan Beulich <JBeulich@suse.com> --- Changes in v3: * Tweak printed string format * Use less ambiguious name for the tag outside the UNLIKELY() section --- xen/arch/x86/traps.c | 11 +++++++++ xen/arch/x86/x86_64/compat/entry.S | 11 ++++++--- xen/arch/x86/x86_64/entry.S | 48 ++++++++++++++++++------------------ xen/include/asm-x86/asm_defns.h | 4 +++ xen/include/xen/sched.h | 7 ++++++ 5 files changed, 53 insertions(+), 28 deletions(-) diff --git a/xen/arch/x86/traps.c b/xen/arch/x86/traps.c index 771e59a..6c7bd99 100644 --- a/xen/arch/x86/traps.c +++ b/xen/arch/x86/traps.c @@ -3748,6 +3748,17 @@ unsigned long do_get_debugreg(int reg) return -EINVAL; } +void asm_domain_crash_synchronous(unsigned long addr) +{ + if ( addr == 0 ) + addr = this_cpu(last_extable_addr); + + printk("domain_crash_sync called from entry.S: fault at %p ", _p(addr)); + print_symbol("%s\n", addr); + + __domain_crash_synchronous(); +} + /* * Local variables: * mode: C diff --git a/xen/arch/x86/x86_64/compat/entry.S b/xen/arch/x86/x86_64/compat/entry.S index c0afe2c..594b0b9 100644 --- a/xen/arch/x86/x86_64/compat/entry.S +++ b/xen/arch/x86/x86_64/compat/entry.S @@ -329,7 +329,10 @@ UNLIKELY_END(compat_bounce_failsafe) movzwl TRAPBOUNCE_cs(%rdx),%eax /* Null selectors (0-3) are not allowed. */ testl $~3,%eax - jz domain_crash_synchronous +UNLIKELY_START(z, compat_bounce_null_selector) + lea UNLIKELY_DISPATCH_LABEL(compat_bounce_null_selector)(%rip), %rdi + jmp asm_domain_crash_synchronous /* Does not return */ +__UNLIKELY_END(compat_bounce_null_selector) movl %eax,UREGS_cs+8(%rsp) movl TRAPBOUNCE_eip(%rdx),%eax movl %eax,UREGS_rip+8(%rsp) @@ -339,10 +342,10 @@ UNLIKELY_END(compat_bounce_failsafe) xorl %edi,%edi jmp .Lft13 .previous - _ASM_EXTABLE(.Lft1, domain_crash_synchronous) + _ASM_EXTABLE(.Lft1, dom_crash_sync_extable) _ASM_EXTABLE(.Lft2, compat_crash_page_fault) _ASM_EXTABLE(.Lft3, compat_crash_page_fault_4) - _ASM_EXTABLE(.Lft4, domain_crash_synchronous) + _ASM_EXTABLE(.Lft4, dom_crash_sync_extable) _ASM_EXTABLE(.Lft5, compat_crash_page_fault_4) _ASM_EXTABLE(.Lft6, compat_crash_page_fault_8) _ASM_EXTABLE(.Lft7, compat_crash_page_fault) @@ -363,7 +366,7 @@ compat_crash_page_fault: .Lft14: mov %edi,%fs movl %esi,%edi call show_page_walk - jmp domain_crash_synchronous + jmp dom_crash_sync_extable .section .fixup,"ax" .Lfx14: xorl %edi,%edi diff --git a/xen/arch/x86/x86_64/entry.S b/xen/arch/x86/x86_64/entry.S index f64e871..3ea4683 100644 --- a/xen/arch/x86/x86_64/entry.S +++ b/xen/arch/x86/x86_64/entry.S @@ -370,7 +370,10 @@ create_bounce_frame: sbb %ecx,%ecx # In +ve address space? Then okay. cmpq %rax,%rsi adc %ecx,%ecx # Above Xen private area? Then okay. - jg domain_crash_synchronous +UNLIKELY_START(g, create_bounce_frame_bad_sp) + lea UNLIKELY_DISPATCH_LABEL(create_bounce_frame_bad_sp)(%rip), %rdi + jmp asm_domain_crash_synchronous /* Does not return */ +__UNLIKELY_END(create_bounce_frame_bad_sp) movb TRAPBOUNCE_flags(%rdx),%cl subq $40,%rsi movq UREGS_ss+8(%rsp),%rax @@ -429,26 +432,26 @@ UNLIKELY_END(bounce_failsafe) movq $FLAT_KERNEL_CS,UREGS_cs+8(%rsp) movq TRAPBOUNCE_eip(%rdx),%rax testq %rax,%rax - jz domain_crash_synchronous +UNLIKELY_START(z, create_bounce_frame_bad_bounce_ip) + lea UNLIKELY_DISPATCH_LABEL(create_bounce_frame_bad_bounce_ip)(%rip), %rdi + jmp asm_domain_crash_synchronous /* Does not return */ +__UNLIKELY_END(create_bounce_frame_bad_bounce_ip) movq %rax,UREGS_rip+8(%rsp) ret - _ASM_EXTABLE(.Lft2, domain_crash_synchronous) - _ASM_EXTABLE(.Lft3, domain_crash_synchronous) - _ASM_EXTABLE(.Lft4, domain_crash_synchronous) - _ASM_EXTABLE(.Lft5, domain_crash_synchronous) - _ASM_EXTABLE(.Lft6, domain_crash_synchronous) - _ASM_EXTABLE(.Lft7, domain_crash_synchronous) - _ASM_EXTABLE(.Lft8, domain_crash_synchronous) - _ASM_EXTABLE(.Lft9, domain_crash_synchronous) - _ASM_EXTABLE(.Lft10, domain_crash_synchronous) - _ASM_EXTABLE(.Lft11, domain_crash_synchronous) - _ASM_EXTABLE(.Lft12, domain_crash_synchronous) - _ASM_EXTABLE(.Lft13, domain_crash_synchronous) - -domain_crash_synchronous_string: - .asciz "domain_crash_sync called from entry.S\n" - -ENTRY(domain_crash_synchronous) + _ASM_EXTABLE(.Lft2, dom_crash_sync_extable) + _ASM_EXTABLE(.Lft3, dom_crash_sync_extable) + _ASM_EXTABLE(.Lft4, dom_crash_sync_extable) + _ASM_EXTABLE(.Lft5, dom_crash_sync_extable) + _ASM_EXTABLE(.Lft6, dom_crash_sync_extable) + _ASM_EXTABLE(.Lft7, dom_crash_sync_extable) + _ASM_EXTABLE(.Lft8, dom_crash_sync_extable) + _ASM_EXTABLE(.Lft9, dom_crash_sync_extable) + _ASM_EXTABLE(.Lft10, dom_crash_sync_extable) + _ASM_EXTABLE(.Lft11, dom_crash_sync_extable) + _ASM_EXTABLE(.Lft12, dom_crash_sync_extable) + _ASM_EXTABLE(.Lft13, dom_crash_sync_extable) + +ENTRY(dom_crash_sync_extable) # Get out of the guest-save area of the stack. GET_STACK_BASE(%rax) leaq STACK_CPUINFO_FIELD(guest_cpu_user_regs)(%rax),%rsp @@ -459,11 +462,8 @@ ENTRY(domain_crash_synchronous) setz %al leal (%rax,%rax,2),%eax orb %al,UREGS_cs(%rsp) - # printk(domain_crash_synchronous_string) - leaq domain_crash_synchronous_string(%rip),%rdi - xorl %eax,%eax - call printk - jmp __domain_crash_synchronous + xorl %edi,%edi + jmp asm_domain_crash_synchronous /* Does not return */ /* No special register assumptions. */ ENTRY(ret_from_intr) diff --git a/xen/include/asm-x86/asm_defns.h b/xen/include/asm-x86/asm_defns.h index 25032d5..a4601ba 100644 --- a/xen/include/asm-x86/asm_defns.h +++ b/xen/include/asm-x86/asm_defns.h @@ -35,10 +35,14 @@ void ret_from_intr(void); #ifdef __ASSEMBLY__ #define UNLIKELY_START(cond, tag) \ + .Ldispatch.tag: \ j##cond .Lunlikely.tag; \ .subsection 1; \ .Lunlikely.tag: +#define UNLIKELY_DISPATCH_LABEL(tag) \ + .Ldispatch.tag + #define UNLIKELY_DONE(cond, tag) \ j##cond .Llikely.tag diff --git a/xen/include/xen/sched.h b/xen/include/xen/sched.h index 0013a8d..1765e18 100644 --- a/xen/include/xen/sched.h +++ b/xen/include/xen/sched.h @@ -547,6 +547,13 @@ void __domain_crash_synchronous(void) __attribute__((noreturn)); __domain_crash_synchronous(); \ } while (0) +/* + * Called from assembly code, with an optional address to help indicate why + * the crash occured. If addr is 0, look up address from last extable + * redirection. + */ +void asm_domain_crash_synchronous(unsigned long addr) __attribute__((noreturn)); + #define set_current_state(_s) do { current->state = (_s); } while (0) void scheduler_init(void); int sched_init_vcpu(struct vcpu *v, unsigned int processor); -- 1.7.10.4