carl-llvm-dev@petosoft.com via llvm-dev
2018-Dec-29 17:46 UTC
[llvm-dev] Branch folding optimisation on the AVR platform produces out of order code
Hi everyone, I'm getting a mis-compilation from LLVM IR on the AVR platform. Studying the assembly language, it looks like the basic blocks get out of order or something like that during the branch folding optimisation phase. This is the source LLVM IR that I am working from: define hidden void @_TF4main9i2cUpdateFT8registerVs5UInt85valueS0__T_(i8, i8) local_unnamed_addr #1 { entry: switch i8 %0, label %9 [ i8 6, label %2 i8 7, label %8 ] ; <label>:2: ; preds = %entry %3 = icmp ugt i8 %1, 90 %4 = icmp ult i8 %1, 5 %. = select i1 %4, i8 5, i8 %1 %5 = select i1 %3, i8 90, i8 %. store i8 %5, i8* getelementptr inbounds (%Vs5UInt8, %Vs5UInt8* @_Tv4main11delayFactorVs5UInt8, i64 0, i32 0), align 1 %6 = zext i8 %5 to i32 %7 = mul nuw nsw i32 %6, 100 store i32 %7, i32* getelementptr inbounds (%Vs6UInt32, %Vs6UInt32* @_Tv4main7delayUsVs6UInt32, i64 0, i32 0), align 4 tail call void @_TF3AVR11writeEEPROMFT7addressVs6UInt165valueVs5UInt8_T_(i16 34, i8 %5) br label %9 ; <label>:8: ; preds = %entry %not. = icmp ne i8 %1, 0 %.2 = zext i1 %not. to i8 store i1 %not., i1* getelementptr inbounds (%Sb, %Sb* @_Tv4main7enabledSb, i64 0, i32 0), align 1 tail call void @_TF3AVR11writeEEPROMFT7addressVs6UInt165valueVs5UInt8_T_(i16 35, i8 %.2) br label %9 ; <label>:9: ; preds = %8, %2, %entry ret void } When this is compiled for the AVR architecture using llc, with -O3 it produces this assembly language: 00000420 <_TF4main9i2cUpdateFT8registerVs5UInt85valueS0__T_>: 420: 1f 93 push r17 422: 87 30 cpi r24, 0x07 ; 7 424: 09 f4 brne .+2 ; 0x428 <LBB4_1> 426: 26 c0 rjmp .+76 ; 0x474 <LBB4_8> 00000428 <LBB4_1>: 428: 86 30 cpi r24, 0x06 ; 6 42a: 09 f0 breq .+2 ; 0x42e <LBB4_2> 42c: 21 c0 rjmp .+66 ; 0x470 <LBB4_7> 0000042e <LBB4_2>: 42e: 85 e0 ldi r24, 0x05 ; 5 430: 65 30 cpi r22, 0x05 ; 5 432: 08 f0 brcs .+2 ; 0x436 <LBB4_4> 434: 86 2f mov r24, r22 00000436 <LBB4_4>: 436: 1a e5 ldi r17, 0x5A ; 90 438: 6b 35 cpi r22, 0x5B ; 91 43a: 08 f4 brcc .+2 ; 0x43e <LBB4_6> 43c: 18 2f mov r17, r24 0000043e <LBB4_6>: 43e: 10 93 b8 01 sts 0x01B8, r17 442: 61 2f mov r22, r17 444: 77 27 eor r23, r23 446: 24 e6 ldi r18, 0x64 ; 100 448: 30 e0 ldi r19, 0x00 ; 0 44a: 80 e0 ldi r24, 0x00 ; 0 44c: 90 e0 ldi r25, 0x00 ; 0 44e: 48 2f mov r20, r24 450: 59 2f mov r21, r25 452: 0e 94 33 16 call 0x2c66 ; 0x2c66 <__mulsi3> 456: 90 93 bf 01 sts 0x01BF, r25 45a: 80 93 be 01 sts 0x01BE, r24 45e: 70 93 bd 01 sts 0x01BD, r23 462: 60 93 bc 01 sts 0x01BC, r22 466: 82 e2 ldi r24, 0x22 ; 34 468: 90 e0 ldi r25, 0x00 ; 0 46a: 61 2f mov r22, r17 46c: 0e 94 fd 02 call 0x5fa ; 0x5fa <_TF3AVR11writeEEPROMFT7addressVs6UInt165valueVs5UInt8_T_> 00000470 <LBB4_7>: 470: 1f 91 pop r17 472: 08 95 ret 00000474 <LBB4_8>: 474: 21 e0 ldi r18, 0x01 ; 1 476: 60 30 cpi r22, 0x00 ; 0 478: 09 f4 brne .+2 ; 0x47c <LBB4_10> 47a: 20 e0 ldi r18, 0x00 ; 0 0000047c <LBB4_10>: 47c: 82 2f mov r24, r18 47e: 81 70 andi r24, 0x01 ; 1 480: 80 93 c0 01 sts 0x01C0, r24 484: 83 e2 ldi r24, 0x23 ; 35 486: 90 e0 ldi r25, 0x00 ; 0 488: 62 2f mov r22, r18 48a: 0e 94 fd 02 call 0x5fa ; 0x5fa <_TF3AVR11writeEEPROMFT7addressVs6UInt165valueVs5UInt8_T_> ; *** falls through to the next method *** 0000048e <_TF4main11updateDelayFVs5UInt8T_>: 48e: 1f 93 push r17 490: 95 e0 ... It looks to me like either block LBB4_7 should be last or there should be a rjmp at the end of LBB4_10. When compiled with -O0 the code stays in the correct order, although obviously it's much more verbose and inefficient, with loads of "register spill" stuff, whatever that means! Using -print-after-all I managed to work out that the mis-optimisation occurred in the optimisation pass "Control Flow Optimizer". The thing I'm finding confusing is that this pass seems to be shared code, not target specific code. Is there a way to understand how this pass works and in particular if there are any hooks or configuration coming in from the target specific AVR code in my branch that could be causing this behaviour? Thanks for any help or advice you guys can give. Carl -------------- next part -------------- An HTML attachment was scrubbed... URL: <http://lists.llvm.org/pipermail/llvm-dev/attachments/20181229/5fa786e0/attachment.html>