Sandeep Raju via llvm-dev
2015-Aug-18 21:31 UTC
[llvm-dev] Unwind info in .debug_frame incorrect for addresses beyond function epilog
Hi All, We discovered an issue with the unwind info generated by LLVM in the .debug_frame section. It appears that the cfa_offset for instructions beyond the function epilog is incorrect. The problem is best depicted with the following testcase: $ cat t.c int foo(int i, int j, int k, int l, int m, int n, int o, int p, int q) { return 0; } int main() { return foo (1,2,3,4,5,6,7,8,9); } Compiled thusly: $ clang --target=i686-pc-linux -fomit-frame-pointer -m32 -march=i686 -fPIC -g -o t t.c Running it in a debugger (look for annotations "<====" ): (gdb) file t Reading symbols from t...done. (gdb) disassemble foo Dump of assembler code for function foo: 0x08048380 <+0>: push %ebp 0x08048381 <+1>: push %ebx 0x08048382 <+2>: push %edi 0x08048383 <+3>: push %esi 0x08048384 <+4>: sub $0x34,%esp 0x08048387 <+7>: mov 0x68(%esp),%eax 0x0804838b <+11>: mov 0x64(%esp),%ecx 0x0804838f <+15>: mov 0x60(%esp),%edx 0x08048393 <+19>: mov 0x5c(%esp),%esi 0x08048397 <+23>: mov 0x58(%esp),%edi 0x0804839b <+27>: mov 0x54(%esp),%ebx 0x0804839f <+31>: mov 0x50(%esp),%ebp 0x080483a3 <+35>: mov %eax,0xc(%esp) 0x080483a7 <+39>: mov 0x4c(%esp),%eax 0x080483ab <+43>: mov %eax,0x8(%esp) 0x080483af <+47>: mov 0x48(%esp),%eax 0x080483b3 <+51>: mov %eax,0x4(%esp) 0x080483b7 <+55>: xor %eax,%eax 0x080483b9 <+57>: mov %eax,(%esp) 0x080483bc <+60>: mov 0x4(%esp),%eax 0x080483c0 <+64>: mov %eax,0x30(%esp) 0x080483c4 <+68>: mov 0x8(%esp),%eax 0x080483c8 <+72>: mov %eax,0x2c(%esp) 0x080483cc <+76>: mov %ebp,0x28(%esp) 0x080483d0 <+80>: mov %ebx,0x24(%esp) 0x080483d4 <+84>: mov %edi,0x20(%esp) 0x080483d8 <+88>: mov %esi,0x1c(%esp) 0x080483dc <+92>: mov %edx,0x18(%esp) 0x080483e0 <+96>: mov %ecx,0x14(%esp) 0x080483e4 <+100>: mov 0xc(%esp),%ecx 0x080483e8 <+104>: mov %ecx,0x10(%esp) 0x080483ec <+108>: mov (%esp),%eax 0x080483ef <+111>: add $0x34,%esp <==== epilog 0x080483f2 <+114>: pop %esi 0x080483f3 <+115>: pop %edi 0x080483f4 <+116>: pop %ebx 0x080483f5 <+117>: pop %ebp 0x080483f6 <+118>: ret End of assembler dump. (gdb) break *0x080483ef <==== break just before the epilog to illustrate the issue Breakpoint 1 at 0x80483ef: file t.c, line 3. (gdb) run Starting program: /auto/wssanraju-sjc/unw/issue1_3/t warning: Unable to find dynamic linker breakpoint function. GDB will be unable to debug shared library initializers and track explicitly loaded dynamic code. warning: Could not load shared library symbols for 2 libraries, e.g. /lib/libc.so.6. Use the "info sharedlibrary" command to see the complete listing. Do you need "set solib-search-path" or "set sysroot"? Breakpoint 1, 0x080483ef in foo (i=1, j=2, k=3, l=4, m=5, n=6, o=7, p=8, q=9) at t.c:3 3 return 0; (gdb) bt <==== debugger able to generate correct backtrace #0 0x080483ef in foo (i=1, j=2, k=3, l=4, m=5, n=6, o=7, p=8, q=9) at t.c:3 #1 0x080484c2 in main () at t.c:8 (gdb) stepi <==== execute one instruction, now we're in the epilog 0x080483f2 in foo (i=8, j=7, k=6, l=5, m=4, n=3, o=2, p=1, q=134513858) at t.c:3 3 return 0; (gdb) bt <==== debugger not able to generate correct backtrace and arguments are displayed incorrectly #0 0x080483f2 in foo (i=8, j=7, k=6, l=5, m=4, n=3, o=2, p=1, q=134513858) at t.c:3 #1 0x00000006 in ?? () #2 0x00000004 in ?? () #3 0x00000003 in ?? () #4 0x00000002 in ?? () #5 0x00000001 in ?? () #6 0x00000005 in ?? () #7 0x00000009 in ?? () #8 0x00000008 in ?? () #9 0x00000007 in ?? () #10 0x080496f4 in ?? () Backtrace stopped: previous frame inner to this frame (corrupt stack?) (gdb) Now, we look at the .debug_frame section in the object file for the same testcase: $ clang --target=i686-pc-linux -fomit-frame-pointer -m32 -march=i686 -fPIC -c -o t.o -g t.c $ readelf --debug-dump=frames t.o The section .debug_frame contains: 00000000 00000010 ffffffff CIE Version: 4 Augmentation: "" Code alignment factor: 4 Data alignment factor: 0 Return address column: 1 DW_CFA_advance_loc: 240 to 000000f0 DW_CFA_same_value: r12 DW_CFA_advance_loc4: 2416320528 to 90062100 00000014 00000024 00000000 FDE cie=00000000 pc=00000000..00000077 DW_CFA_advance_loc: 4 to 00000004 DW_CFA_def_cfa_offset: 8 DW_CFA_advance_loc: 4 to 00000008 DW_CFA_def_cfa_offset: 12 DW_CFA_advance_loc: 4 to 0000000c DW_CFA_def_cfa_offset: 16 DW_CFA_advance_loc: 4 to 00000010 DW_CFA_def_cfa_offset: 20 DW_CFA_advance_loc: 12 to 0000001c <=== for all addresses after 0x1c, the offset is 72 ! DW_CFA_def_cfa_offset: 72 DW_CFA_offset: r6 at cfa+0 DW_CFA_offset: r7 at cfa+0 DW_CFA_offset: r3 at cfa+0 DW_CFA_offset: r5 at cfa+0 DW_CFA_nop ... ... Disassembly of foo, with relative addresses: $ objdump --disassemble t.o t.o: file format elf32-i386 Disassembly of section .text: 00000000 <foo>: 0: 55 push %ebp 1: 53 push %ebx 2: 57 push %edi 3: 56 push %esi 4: 83 ec 34 sub $0x34,%esp 7: 8b 44 24 68 mov 0x68(%esp),%eax b: 8b 4c 24 64 mov 0x64(%esp),%ecx f: 8b 54 24 60 mov 0x60(%esp),%edx 13: 8b 74 24 5c mov 0x5c(%esp),%esi 17: 8b 7c 24 58 mov 0x58(%esp),%edi 1b: 8b 5c 24 54 mov 0x54(%esp),%ebx 1f: 8b 6c 24 50 mov 0x50(%esp),%ebp 23: 89 44 24 0c mov %eax,0xc(%esp) 27: 8b 44 24 4c mov 0x4c(%esp),%eax 2b: 89 44 24 08 mov %eax,0x8(%esp) 2f: 8b 44 24 48 mov 0x48(%esp),%eax 33: 89 44 24 04 mov %eax,0x4(%esp) 37: 31 c0 xor %eax,%eax 39: 89 04 24 mov %eax,(%esp) 3c: 8b 44 24 04 mov 0x4(%esp),%eax 40: 89 44 24 30 mov %eax,0x30(%esp) 44: 8b 44 24 08 mov 0x8(%esp),%eax 48: 89 44 24 2c mov %eax,0x2c(%esp) 4c: 89 6c 24 28 mov %ebp,0x28(%esp) 50: 89 5c 24 24 mov %ebx,0x24(%esp) 54: 89 7c 24 20 mov %edi,0x20(%esp) 58: 89 74 24 1c mov %esi,0x1c(%esp) 5c: 89 54 24 18 mov %edx,0x18(%esp) 60: 89 4c 24 14 mov %ecx,0x14(%esp) 64: 8b 4c 24 0c mov 0xc(%esp),%ecx 68: 89 4c 24 10 mov %ecx,0x10(%esp) 6c: 8b 04 24 mov (%esp),%eax 6f: 83 c4 34 add $0x34,%esp 72: 5e pop %esi 73: 5f pop %edi 74: 5b pop %ebx 75: 5d pop %ebp 76: c3 ret 77: 90 nop Since at the function epilog, the stack pointer is adjusted, it needs to generate a new offset for instructions after esp? The use case for this is, when the execution is interrupted in the function epilog by a signal handler, it will not be able to unwind the backtrace correctly. Any pointers on how to fix this issue? Thanks, Sandeep
Rafael EspĂndola via llvm-dev
2015-Dec-09 19:39 UTC
[llvm-dev] Unwind info in .debug_frame incorrect for addresses beyond function epilog
I think this is pr20774. Cheers, Rafael On 18 August 2015 at 17:31, Sandeep Raju via llvm-dev <llvm-dev at lists.llvm.org> wrote:> Hi All, > > We discovered an issue with the unwind info generated by LLVM in the > .debug_frame section. It appears that the cfa_offset for instructions > beyond the function epilog is incorrect. > > The problem is best depicted with the following testcase: > > $ cat t.c > int foo(int i, int j, int k, int l, int m, int n, int o, int p, int q) > { > return 0; > } > > int main() > { > return foo (1,2,3,4,5,6,7,8,9); > } > > Compiled thusly: > > $ clang --target=i686-pc-linux -fomit-frame-pointer -m32 -march=i686 > -fPIC -g -o t t.c > > Running it in a debugger (look for annotations "<====" ): > > (gdb) file t > Reading symbols from t...done. > (gdb) disassemble foo > Dump of assembler code for function foo: > 0x08048380 <+0>: push %ebp > 0x08048381 <+1>: push %ebx > 0x08048382 <+2>: push %edi > 0x08048383 <+3>: push %esi > 0x08048384 <+4>: sub $0x34,%esp > 0x08048387 <+7>: mov 0x68(%esp),%eax > 0x0804838b <+11>: mov 0x64(%esp),%ecx > 0x0804838f <+15>: mov 0x60(%esp),%edx > 0x08048393 <+19>: mov 0x5c(%esp),%esi > 0x08048397 <+23>: mov 0x58(%esp),%edi > 0x0804839b <+27>: mov 0x54(%esp),%ebx > 0x0804839f <+31>: mov 0x50(%esp),%ebp > 0x080483a3 <+35>: mov %eax,0xc(%esp) > 0x080483a7 <+39>: mov 0x4c(%esp),%eax > 0x080483ab <+43>: mov %eax,0x8(%esp) > 0x080483af <+47>: mov 0x48(%esp),%eax > 0x080483b3 <+51>: mov %eax,0x4(%esp) > 0x080483b7 <+55>: xor %eax,%eax > 0x080483b9 <+57>: mov %eax,(%esp) > 0x080483bc <+60>: mov 0x4(%esp),%eax > 0x080483c0 <+64>: mov %eax,0x30(%esp) > 0x080483c4 <+68>: mov 0x8(%esp),%eax > 0x080483c8 <+72>: mov %eax,0x2c(%esp) > 0x080483cc <+76>: mov %ebp,0x28(%esp) > 0x080483d0 <+80>: mov %ebx,0x24(%esp) > 0x080483d4 <+84>: mov %edi,0x20(%esp) > 0x080483d8 <+88>: mov %esi,0x1c(%esp) > 0x080483dc <+92>: mov %edx,0x18(%esp) > 0x080483e0 <+96>: mov %ecx,0x14(%esp) > 0x080483e4 <+100>: mov 0xc(%esp),%ecx > 0x080483e8 <+104>: mov %ecx,0x10(%esp) > 0x080483ec <+108>: mov (%esp),%eax > 0x080483ef <+111>: add $0x34,%esp <==== epilog > 0x080483f2 <+114>: pop %esi > 0x080483f3 <+115>: pop %edi > 0x080483f4 <+116>: pop %ebx > 0x080483f5 <+117>: pop %ebp > 0x080483f6 <+118>: ret > End of assembler dump. > (gdb) break *0x080483ef <==== break just before the epilog to > illustrate the issue > Breakpoint 1 at 0x80483ef: file t.c, line 3. > (gdb) run > Starting program: /auto/wssanraju-sjc/unw/issue1_3/t > warning: Unable to find dynamic linker breakpoint function. > GDB will be unable to debug shared library initializers > and track explicitly loaded dynamic code. > warning: Could not load shared library symbols for 2 libraries, e.g. > /lib/libc.so.6. > Use the "info sharedlibrary" command to see the complete listing. > Do you need "set solib-search-path" or "set sysroot"? > > Breakpoint 1, 0x080483ef in foo (i=1, j=2, k=3, l=4, m=5, n=6, o=7, > p=8, q=9) at t.c:3 > 3 return 0; > (gdb) bt <==== debugger able to generate correct backtrace > #0 0x080483ef in foo (i=1, j=2, k=3, l=4, m=5, n=6, o=7, p=8, q=9) at t.c:3 > #1 0x080484c2 in main () at t.c:8 > (gdb) stepi <==== execute one instruction, now we're in the epilog > 0x080483f2 in foo (i=8, j=7, k=6, l=5, m=4, n=3, o=2, p=1, q=134513858) at t.c:3 > 3 return 0; > (gdb) bt <==== debugger not able to generate correct backtrace and > arguments are displayed incorrectly > #0 0x080483f2 in foo (i=8, j=7, k=6, l=5, m=4, n=3, o=2, p=1, > q=134513858) at t.c:3 > #1 0x00000006 in ?? () > #2 0x00000004 in ?? () > #3 0x00000003 in ?? () > #4 0x00000002 in ?? () > #5 0x00000001 in ?? () > #6 0x00000005 in ?? () > #7 0x00000009 in ?? () > #8 0x00000008 in ?? () > #9 0x00000007 in ?? () > #10 0x080496f4 in ?? () > Backtrace stopped: previous frame inner to this frame (corrupt stack?) > (gdb) > > Now, we look at the .debug_frame section in the object file for the > same testcase: > > $ clang --target=i686-pc-linux -fomit-frame-pointer -m32 -march=i686 > -fPIC -c -o t.o -g t.c > $ readelf --debug-dump=frames t.o > The section .debug_frame contains: > > 00000000 00000010 ffffffff CIE > Version: 4 > Augmentation: "" > Code alignment factor: 4 > Data alignment factor: 0 > Return address column: 1 > > DW_CFA_advance_loc: 240 to 000000f0 > DW_CFA_same_value: r12 > DW_CFA_advance_loc4: 2416320528 to 90062100 > > 00000014 00000024 00000000 FDE cie=00000000 pc=00000000..00000077 > DW_CFA_advance_loc: 4 to 00000004 > DW_CFA_def_cfa_offset: 8 > DW_CFA_advance_loc: 4 to 00000008 > DW_CFA_def_cfa_offset: 12 > DW_CFA_advance_loc: 4 to 0000000c > DW_CFA_def_cfa_offset: 16 > DW_CFA_advance_loc: 4 to 00000010 > DW_CFA_def_cfa_offset: 20 > DW_CFA_advance_loc: 12 to 0000001c <=== for all addresses after > 0x1c, the offset is 72 ! > DW_CFA_def_cfa_offset: 72 > DW_CFA_offset: r6 at cfa+0 > DW_CFA_offset: r7 at cfa+0 > DW_CFA_offset: r3 at cfa+0 > DW_CFA_offset: r5 at cfa+0 > DW_CFA_nop > ... > ... > > Disassembly of foo, with relative addresses: > > $ objdump --disassemble t.o > > t.o: file format elf32-i386 > > Disassembly of section .text: > > > 00000000 <foo>: > 0: 55 push %ebp > 1: 53 push %ebx > 2: 57 push %edi > 3: 56 push %esi > 4: 83 ec 34 sub $0x34,%esp > 7: 8b 44 24 68 mov 0x68(%esp),%eax > b: 8b 4c 24 64 mov 0x64(%esp),%ecx > f: 8b 54 24 60 mov 0x60(%esp),%edx > 13: 8b 74 24 5c mov 0x5c(%esp),%esi > 17: 8b 7c 24 58 mov 0x58(%esp),%edi > 1b: 8b 5c 24 54 mov 0x54(%esp),%ebx > 1f: 8b 6c 24 50 mov 0x50(%esp),%ebp > 23: 89 44 24 0c mov %eax,0xc(%esp) > 27: 8b 44 24 4c mov 0x4c(%esp),%eax > 2b: 89 44 24 08 mov %eax,0x8(%esp) > 2f: 8b 44 24 48 mov 0x48(%esp),%eax > 33: 89 44 24 04 mov %eax,0x4(%esp) > 37: 31 c0 xor %eax,%eax > 39: 89 04 24 mov %eax,(%esp) > 3c: 8b 44 24 04 mov 0x4(%esp),%eax > 40: 89 44 24 30 mov %eax,0x30(%esp) > 44: 8b 44 24 08 mov 0x8(%esp),%eax > 48: 89 44 24 2c mov %eax,0x2c(%esp) > 4c: 89 6c 24 28 mov %ebp,0x28(%esp) > 50: 89 5c 24 24 mov %ebx,0x24(%esp) > 54: 89 7c 24 20 mov %edi,0x20(%esp) > 58: 89 74 24 1c mov %esi,0x1c(%esp) > 5c: 89 54 24 18 mov %edx,0x18(%esp) > 60: 89 4c 24 14 mov %ecx,0x14(%esp) > 64: 8b 4c 24 0c mov 0xc(%esp),%ecx > 68: 89 4c 24 10 mov %ecx,0x10(%esp) > 6c: 8b 04 24 mov (%esp),%eax > 6f: 83 c4 34 add $0x34,%esp > 72: 5e pop %esi > 73: 5f pop %edi > 74: 5b pop %ebx > 75: 5d pop %ebp > 76: c3 ret > 77: 90 nop > > > Since at the function epilog, the stack pointer is adjusted, it needs > to generate a new offset for instructions after esp? > > The use case for this is, when the execution is interrupted in the > function epilog by a signal handler, it will not be able to unwind the > backtrace correctly. > > Any pointers on how to fix this issue? > > Thanks, > Sandeep > _______________________________________________ > LLVM Developers mailing list > llvm-dev at lists.llvm.org > http://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-dev