I'm porting the musl C library to ARM Thumb. It looks like inline asm is failing in some cases. Here's one: The lseek system call looks like this: ... off_t result; return syscall(SYS__llseek, fd, offset>>32, offset, &result, whence) ? -1 : result; ... Which eventually goes through this macro: static inline long __syscall5(long n, long a, long b, long c, long d, long e) { register long r7 __asm__("r7") = n; register long r0 __asm__("r0") = a; register long r1 __asm__("r1") = b; register long r2 __asm__("r2") = c; register long r3 __asm__("r3") = d; register long r4 __asm__("r4") = e; __asm_syscall("r"(r7), "0"(r0), "r"(r1), "r"(r2), "r"(r3), "r"(r4)); } And then this macro: #define __asm_syscall(...) do { \ __asm__ __volatile__ ( "svc 0" \ : "=r"(r0) : __VA_ARGS__ : "memory"); \ return r0; \ } while (0) Gives: Disassembly of section .text: 00000000 <lseek>: 0: b590 push {r4, r7, lr} 2: af01 add r7, sp, #4 4: b083 sub sp, #12 6: 278c movs r7, #140 ; 0x8c 8: 46ec mov ip, sp a: 4619 mov r1, r3 c: 68bc ldr r4, [r7, #8] ; XXX r7 is clobbered here. e: 4663 mov r3, ip 10: df00 svc 0 12: f7ff fffe bl 0 <__syscall_ret> 16: 9a00 ldr r2, [sp, #0] 18: 9901 ldr r1, [sp, #4] 1a: 2800 cmp r0, #0 1c: bf1c itt ne 1e: f04f 32ff movne.w r2, #4294967295 ; 0xffffffff 22: f04f 31ff movne.w r1, #4294967295 ; 0xffffffff 26: 4610 mov r0, r2 28: b003 add sp, #12 2a: bd90 pop {r4, r7, pc} The question is, does the line register long r7 __asm__("r7") = n; make any guarantee about the value of r7 in the asm block? Adding the -fomit-frame-pointer flag fixes it, but is the bug in the code or in the compiler? -Rich
Hi Richard, My belief is that we don't fully support this syntax. I believe, and I'm sure Renato on CC will tell me if I'm wrong as he implemented it, that we only support this for non-allocatable registers, such as SP or FP. Cheers, James On Tue, 10 Feb 2015 at 21:29, Richard Pennington <rich at pennware.com> wrote:> I'm porting the musl C library to ARM Thumb. It looks like inline asm is > failing in some cases. Here's one: > > The lseek system call looks like this: > ... > off_t result; > return syscall(SYS__llseek, fd, offset>>32, offset, &result, > whence) ? -1 : result; > ... > > Which eventually goes through this macro: > > static inline long __syscall5(long n, long a, long b, long c, long d, > long e) > { > register long r7 __asm__("r7") = n; > register long r0 __asm__("r0") = a; > register long r1 __asm__("r1") = b; > register long r2 __asm__("r2") = c; > register long r3 __asm__("r3") = d; > register long r4 __asm__("r4") = e; > __asm_syscall("r"(r7), "0"(r0), "r"(r1), "r"(r2), "r"(r3), > "r"(r4)); > } > > And then this macro: > #define __asm_syscall(...) do { \ > __asm__ __volatile__ ( "svc 0" \ > : "=r"(r0) : __VA_ARGS__ : "memory"); \ > return r0; \ > } while (0) > > Gives: > Disassembly of section .text: > > 00000000 <lseek>: > 0: b590 push {r4, r7, lr} > 2: af01 add r7, sp, #4 > 4: b083 sub sp, #12 > 6: 278c movs r7, #140 ; 0x8c > 8: 46ec mov ip, sp > a: 4619 mov r1, r3 > c: 68bc ldr r4, [r7, #8] ; XXX r7 is clobbered > here. > e: 4663 mov r3, ip > 10: df00 svc 0 > 12: f7ff fffe bl 0 <__syscall_ret> > 16: 9a00 ldr r2, [sp, #0] > 18: 9901 ldr r1, [sp, #4] > 1a: 2800 cmp r0, #0 > 1c: bf1c itt ne > 1e: f04f 32ff movne.w r2, #4294967295 ; 0xffffffff > 22: f04f 31ff movne.w r1, #4294967295 ; 0xffffffff > 26: 4610 mov r0, r2 > 28: b003 add sp, #12 > 2a: bd90 pop {r4, r7, pc} > > The question is, does the line > > register long r7 __asm__("r7") = n; > > make any guarantee about the value of r7 in the asm block? > > Adding the -fomit-frame-pointer flag fixes it, but is the bug in the > code or in the compiler? > > -Rich > _______________________________________________ > cfe-dev mailing list > cfe-dev at cs.uiuc.edu > http://lists.cs.uiuc.edu/mailman/listinfo/cfe-dev >-------------- next part -------------- An HTML attachment was scrubbed... URL: <http://lists.llvm.org/pipermail/llvm-dev/attachments/20150210/d8c1fbdf/attachment.html>
On 11 February 2015 at 06:39, James Molloy <james at jamesmolloy.co.uk> wrote:> My belief is that we don't fully support this syntax. I believe, and I'm > sure Renato on CC will tell me if I'm wrong as he implemented it, that we > only support this for non-allocatable registers, such as SP or FP.Hi James, We do support that syntax, and that's the only syntax for local allocatable registers we support. But I'm not sure if we guarantee that the register won't be clobbered in between, especially if you're using macros in inline functions. The main issue here is that such a macro would be expanded inside a function that is already using a lot of registers for whatever reasons and even though the compiler can try to guarantee it will assign a specific register inside the inline asm, it won't have much effect outside of it, and this is what I think it's going on.>> 6: 278c movs r7, #140 ; 0x8c >> 8: 46ec mov ip, sp >> a: 4619 mov r1, r3 >> c: 68bc ldr r4, [r7, #8] ; XXX r7 is clobbered >> e: 4663 mov r3, ip >> 10: df00 svc 0 >> 12: f7ff fffe bl 0 <__syscall_ret>That code seems very odd... It may be a result of the outside function (lseek) intermixing code with the macro after some heavy optimisation.>> register long r7 __asm__("r7") = n; >> make any guarantee about the value of r7 in the asm block?I believe the guarantee is IFF the inline asm immediately follows, which is what you have, so I'm not sure why r7 is being clobbered in this case. It may be a side effect of some other optimisation or just that the guarantee is not being kept in that special case.>> Adding the -fomit-frame-pointer flag fixes it,That sounds like a coincidence... :)>> but is the bug in the code or in the compiler?Hard to say. Inline asm is a poorly documented GNU extension that was not designed in any meaningful way but evolved from whatever was out there pretty much like a genetic algorithm. Clang/LLVM tries to rationalise the implementation and, for obvious reasons, will get different behaviour, not necessarily wrong. Basically, YMMV. The first thing is to duplicate the macro code inside the static inline function and see if the problem disappears. Also, try with lower optimisation levels and if that fixes it, run bugpoint on a reduced case to spot what pass "breaks" it. I think we have to approach the problem in a different way, though. Is there a better way of doing this? cheers, --renato
On 02/10/2015 03:10 PM, Richard Pennington wrote:> I'm porting the musl C library to ARM Thumb. It looks like inline asm > is failing in some cases. Here's one:I've put together a simple test file and am puzzled by the results. If I compile this for Thumb: typedef long long off_t; #if 1 long foo(long); #else #define foo(x) (x) #endif #if 1 inline long __syscall5(n, a, b, c, d, e) { register long r7 __asm__("r7") = n; register long r0 __asm__("r0") = a; register long r1 __asm__("r1") = b; register long r2 __asm__("r2") = c; register long r3 __asm__("r3") = d; register long r4 __asm__("r4") = e; do { __asm__ __volatile__ ( "svc 0" : "=r"(r0) : "r"(r7), "0"(r0), "r"(r1), "r"(r2), "r"(r3), "r"(r4) : "memory"); return r0; } while (0); } #else #define __syscall5(n, a, b, c, d, e) \ ({ \ register long r7 __asm__("r7") = n; \ register long r0 __asm__("r0") = a; \ register long r1 __asm__("r1") = b; \ register long r2 __asm__("r2") = c; \ register long r3 __asm__("r3") = d; \ register long r4 __asm__("r4") = e; \ do { __asm__ __volatile__ ( "svc 0" : "=r"(r0) : "r"(r7), "0"(r0), "r"(r1), "r"(r2), "r"(r3), "r"(r4) : "memory"); return r0; } while (0); \ ;r0; }) #endif off_t lseek(int fd, off_t offset, int whence) { off_t result; return foo(__syscall5(140,((long) (fd)),((long) (offset>>32)), ((long) (offset)),((long) (&result)),((long) (whence)))) ? -1 : result; } I get .globl lseek .align 2 .type lseek,%function .code 16 @ @lseek .thumb_func lseek: .fnstart .Leh_func_begin0: @ BB#0: @ %entry push {r4, r7, lr} add r7, sp, #4 sub sp, #12 movs r7, #140 mov r12, sp mov r1, r3 ldr r4, [r7, #8] mov r3, r12 @APP svc #0 @NO_APP bl foo ldr r2, [sp] ldr r1, [sp, #4] cmp r0, #0 itt ne movne.w r2, #-1 movne.w r1, #-1 mov r0, r2 add sp, #12 pop {r4, r7, pc} .Ltmp0: .size lseek, .Ltmp0-lseek .cantunwind .fnend where the "ldr r4, [r7, #8]" instruction uses r7 as a frame pointer even though it was over-written earlier. If I use -fomit-frame-pointer or any other combination if the conditionals (macro vs. inline, used as a function parameter vs. not) the code emitted is correct. Is there some in the original code that causes it to break while the other forms do not? Or are the other forms working just because of luck? Should the original code work? -Rich