search for: lbb0_3

Displaying 20 results from an estimated 59 matches for "lbb0_3".

Did you mean: lbb0_1
2015 Oct 27
4
How can I tell llvm, that a branch is preferred ?
...correct ? I see nothing in the specs for "branch" or "switch". And __buildin_expect does nothing, that I am sure of. Unfortunately llvm has this knack for ordering my one most crucial part of code exactly the opposite I want to, it does: (x86_64) cmpq %r15, (%rax,%rdx) jne LBB0_3 Ltmp18: leaq 8(%rax,%rdx), %rcx jmp LBB0_4 LBB0_3: addq $8, %rcx LBB0_4: when I want, cmpq %r15, (%rax,%rdx) jeq LBB0_3 addq $8, %rcx jmp LBB0_4 LBB0_3: leaq 8(%rax,%rdx), %rcx LBB0_4: since that saves me executing a jump 99.9% of the time. Is there anything I can do ? Ciao Nat!
2020 Jun 01
3
Aarch64: unaligned access despite -mstrict-align
...$ cat test.s .text .file "test.c" .globl f // -- Begin function f .p2align 2 .type f, at function f: // @f // %bb.0: adrp x8, g ldr x10, [x8, :lo12:g] ldr x9, [x0] ldr x8, [x10] rev x9, x9 rev x8, x8 cmp x8, x9 b.ne .LBB0_3 // %bb.1: ldr x8, [x10, #8] ldr x9, [x0, #8] rev x8, x8 rev x9, x9 cmp x8, x9 b.ne .LBB0_3 // %bb.2: mov w0, wzr ret .LBB0_3: cmp x8, x9 mov w8, #-1 cneg w0, w8, hs ret .Lfunc_end0: .size f, .Lfunc_end0-f // -- End function .ident "clang ve...
2018 Apr 12
3
[RFC] __builtin_constant_p() Improvements
...# %entry pushq %rbp .cfi_def_cfa_offset 16 .cfi_offset %rbp, -16 movq %rsp, %rbp .cfi_def_cfa_register %rbp movl %edi, -16(%rbp) cmpl $0, -16(%rbp) je .LBB0_2 # %bb.1: # %if.then movl $42, -8(%rbp) movl $0, -4(%rbp) movl -4(%rbp), %eax movl %eax, -12(%rbp) jmp .LBB0_3 .LBB0_2: # %if.else movl $927, -12(%rbp) # imm = 0x39F .LBB0_3: # %return movl -12(%rbp), %eax popq %rbp retq If the patch looks okay to people, I can shove it onto Phabricator for a review. (My phab-fu is bad.) Thoughts? -bw...
2019 Jun 30
6
[hexagon][PowerPC] code regression (sub-optimal code) on LLVM 9 when generating hardware loops, and the "llvm.uadd" intrinsic.
...// encoding: [A,0x48'A',A,0x5c'A',0xe0,0xf1,0x40,0x75] // fixup A - offset: 0, value: .LBB0_5, kind: fixup_Hexagon_B15_PCREL // %bb.2: { r0 = #-100 } // encoding: [0x80,0xf3,0xdf,0x78] .LBB0_3: // %while.body // =>This Inner Loop Header: Depth=1 { r3 = add(r0,#1) r4 = memw(r2++#4) memw(r1++#4) = r4.new } // encoding: [0x23,0x40,0x00,0xb0,0x24,0x40,0x82,0x9b,0x08,0xd2,0xa1,0xab]...
2015 Sep 01
2
[RFC] New pass: LoopExitValues
...uter) for (int Inner = 0; Inner < Size; ++Inner) Dst[Outer * Size + Inner] = Src[Outer * Size + Inner] * Val; } With LoopExitValues ------------------------------- matrix_mul: testl %edi, %edi je .LBB0_5 xorl %r9d, %r9d xorl %r8d, %r8d .LBB0_2: xorl %r11d, %r11d .LBB0_3: movl %r9d, %r10d movl (%rdx,%r10,4), %eax imull %ecx, %eax movl %eax, (%rsi,%r10,4) incl %r11d incl %r9d cmpl %r11d, %edi jne .LBB0_3 incl %r8d cmpl %edi, %r8d jne .LBB0_2 .LBB0_5: retq Without LoopExitValues: ----------------------------------- m...
2016 May 27
2
Handling post-inc users in LSR
...i32 %StoredValue, i32* %StoredAddr %cmp = icmp sgt i64 %K, 1 br i1 %cmp, label %for.body, label %for.end for.end: ret void } # Output in AArch64 where you can see redundant add instructions for stored value, store address, and in cmp : foo: .cfi_startproc // BB#0: cmp w0, #2 b.lt .LBB0_3 // BB#1: sxtw x9, w0 add w8, w0, #1 .LBB0_2: add x10, x1, x9, lsl #2 add x9, x9, #1 str w8, [x10, #4] add w8, w8, #1 cmp x9, #1 b.gt .LBB0_2 .LBB0_3: ret
2012 Jan 13
2
[LLVMdev] Odd weak symbol thing on i386
...lldiv_t r; r.quot = num / denom; r.rem = num % denom; if (num >= 0 && r.rem < 0) { r.quot++; r.rem -= denom; } return (r); } I get the following code emitted for the return if the alias line is present: LBB0_3: # %if.end movl 64(%esp), %eax movsd 24(%esp), %xmm0 movsd 32(%esp), %xmm1 movsd %xmm1, 8(%eax) movsd %xmm0, (%eax) addl $56, %esp popl %esi ret .Ltmp0: .size _lldiv, .Ltmp0-_lldiv .weak lldiv lldiv = _lldiv And this if it isn't: LBB0_3:...
2015 Aug 31
2
[RFC] New pass: LoopExitValues
Hello LLVM, This is a proposal for a new pass that improves performance and code size in some nested loop situations. The pass is target independent. >From the description in the file header: This optimization finds loop exit values reevaluated after the loop execution and replaces them by the corresponding exit values if they are available. Such sequences can arise after the
2020 Jul 20
2
[ARM] Should Use Load and Store with Register Offset
...void* src, size_t len) { char* save = (char*)dst; for (size_t i = 0; i < len; ++i) *((char*)(dst + i)) = *((char*)(src + i)); return save; } clang --target=armv6m-none-eabi -Os -fomit-frame-pointer memcpy_alt1: push {r4, lr} cmp r2, #0 beq .LBB0_3 mov r3, r0 .LBB0_2: ldrb r4, [r1] strb r4, [r3] adds r1, r1, #1 adds r3, r3, #1 subs r2, r2, #1 bne .LBB0_2 .LBB0_3: pop {r4, pc} arm-none-eabi-gcc -march=armv6-m -Os memcpy_alt1: movs r3, #0...
2019 Jul 01
0
[hexagon][PowerPC] code regression (sub-optimal code) on LLVM 9 when generating hardware loops, and the "llvm.uadd" intrinsic.
...// fixup A - offset: 0, value: .LBB0_5, kind: fixup_Hexagon_B15_PCREL // %bb.2: { r0 = #-100 } // encoding: [0x80,0xf3,0xdf,0x78] .LBB0_3: // %while.body // =>This Inner Loop Header: Depth=1 { r3 = add(r0,#1) r4 = memw(r2++#4)...
2018 Sep 20
3
Comparing Clang and GCC: only clang stores updated value in each iteration.
...  .type   b, at function b:                                      # @b # %bb.0:                                # %entry         lrl     %r0, a .LBB0_1:                                # %do.body                                         # =>This Inner Loop Header: Depth=1         cije    %r0, 0, .LBB0_3 # %bb.2:                                # %if.then                                         #   in Loop: Header=BB0_1 Depth=1         ahi     %r0, 1         strl    %r0, a .LBB0_3:                                # %do.cond                                         #   in Loop: Header=BB0_1 Depth=1...
2018 Apr 13
0
[RFC] __builtin_constant_p() Improvements
...offset %rbp, -16 > movq %rsp, %rbp > .cfi_def_cfa_register %rbp > movl %edi, -16(%rbp) > cmpl $0, -16(%rbp) > je .LBB0_2 > # %bb.1: # %if.then > movl $42, -8(%rbp) > movl $0, -4(%rbp) > movl -4(%rbp), %eax > movl %eax, -12(%rbp) > jmp .LBB0_3 > .LBB0_2: # %if.else > movl $927, -12(%rbp) # imm = 0x39F > .LBB0_3: # %return > movl -12(%rbp), %eax > popq %rbp > retq > > If the patch looks okay to people, I can shove it onto Phabricator for a > r...
2020 Jul 21
2
[ARM] Should Use Load and Store with Register Offset
...ompiling with clang and confirmed that the Clang's generated assembly is equivalent to GCC for the code snippet I posted above. clang --target=armv6m-none-eabi -Oz -fomit-frame-pointer memcpy_alt1: push {r4, lr} movs r3, #0 .LBB0_1: cmp r2, r3 beq .LBB0_3 ldrb r4, [r1, r3] strb r4, [r0, r3] adds r3, r3, #1 b .LBB0_1 .LBB0_3: pop {r4, pc} On the other hand, -O2 in GCC still uses the register-offset load and store instructions while Clang -O2 generates the same assembly as -Os: immediate-offs...
2017 May 30
3
[atomics][AArch64] Possible bug in cmpxchg lowering
...efine i1 @foo(i32* %obj, i32 %old, i32 %new) { entry: %v0 = cmpxchg weak volatile i32* %obj, i32 %old, i32 %new _*release acquire*_ %v1 = extractvalue { i32, i1 } %v0, 1 ret i1 %v1 } to the equivalent of the following on AArch64: _*ldxr w8, [x0]*_ cmp w8, w1 b.ne .LBB0_3 // BB#1: // %cmpxchg.trystore stlxr w8, w2, [x0] cbz w8, .LBB0_4 // BB#2: // %cmpxchg.failure mov w0, wzr ret .LBB0_3: // %cmpxchg.nostore clrex mov w0, wzr...
2011 Feb 07
1
[LLVMdev] Post-inc combining
...i<n2;i+=n3) { s+=a[i]; } , with GCC, I get the following loop body, with a post-modify load: .L4: add r1, r1, r3 ldr r4, [ip], r6 rsb r5, r3, r1 cmp r2, r5 add r0, r0, r4 bgt .L4 With LLVM, however, I get: .LBB0_3: @ %for.body @ =>This Inner Loop Header: Depth=1 add r12, lr, r3 ldr lr, [r0, lr, lsl #2] add r1, lr, r1 cmp r12, r2 mov lr, r12 blt .LBB0_3 , which doe...
2016 May 27
0
Handling post-inc users in LSR
...gt; br i1 %cmp, label %for.body, label %for.end > > for.end: > ret void > } > > > # Output in AArch64 where you can see redundant add instructions for stored value, store address, and in cmp : > > foo: > .cfi_startproc > // BB#0: > cmp w0, #2 > b.lt .LBB0_3 > // BB#1: > sxtw x9, w0 > add w8, w0, #1 > .LBB0_2: > add x10, x1, x9, lsl #2 > add x9, x9, #1 > str w8, [x10, #4] > add w8, w8, #1 > cmp x9, #1 > b.gt .LBB0_2 > .LBB0_3: > ret > _______________________________________________ > LLVM Developers m...
2018 Nov 06
4
Rather poor code optimisation of current clang/LLVM targeting Intel x86 (both -64 and -32)
...0_4: # in Loop: Header=BB0_2 Depth=1 add rdi, 1 test esi, esi je .LBB0_5 .LBB0_2: # =>This Loop Header: Depth=1 add esi, -1 movzx edx, byte ptr [rdi] shl edx, 24 xor edx, eax mov ecx, -8 mov eax, edx .LBB0_3: # Parent Loop BB0_2 Depth=1 | # 4 instructions instead of 6, r8 not clobbered! lea r8d, [rax + rax] | add eax, eax mov edx, r8d | # CF is set from the MSB of EAX xor edx, -306674912 | sbb edx, edx test eax, eax...
2017 Dec 19
4
A code layout related side-effect introduced by rL318299
...-------------- ~/workarea/llvm-r318298/dbuild/bin/opt -loop-rotate -S < b.ll |~/workarea/llvm-r318298/dbuild/bin/llc .cfi_startproc # BB#0: # %entry pushq %rax .cfi_def_cfa_offset 16 movl $i, %eax cmpq %rax, %rsi ja .LBB0_5 # BB#1: movl $i, %eax .p2align 4, 0x90 .LBB0_3: # %while.body # =>This Inner Loop Header: Depth=1 movq (%rdi), %rcx movq %rcx, (%rsi) movq 8(%rdi), %rcx movq %rcx, (%rsi) addq $6, %rsi cmpq %rdx, %rsi jae .LBB0_4 # BB#2: # %while.cond...
2016 Aug 05
3
enabling interleaved access loop vectorization
...in[i + 2] + in[i * 2]; } } We don't vectorize this loop at all, because we calculate the cost of the in[i * 2] gather to be 14 cycles per lane (!). This is an overestimate we need to fix, since the vectorized code is actually fairly decent - e.g. forcing vectorization, with SSE4.2, we get: .LBB0_3: # %vector.body # =>This Inner Loop Header: Depth=1 movdqu (%rdi,%rax,4), %xmm3 movd %xmm0, %rcx movdqu 4(%rdi,%rcx,4), %xmm4 paddd %xmm3, %xmm4 movdqu 8(%rdi,%rcx,4), %xmm3 paddd %xmm4, %xmm3 movdqa %xmm1, %xmm4 paddq %xmm4,...
2017 Nov 20
2
Nowaday Scalar Evolution's Problem.
...; eax++ cmp eax, 4 ; cmpv = (ecx == 4) je .LBB0_4 ; if(cmpv == true) goto LBB0_4 .LBB0_2: cmp eax, 10 ; cmpv = (eax == 10) jne .LBB0_5 ; if(cmpv == false) goto LBB0_5 jmp .LBB0_3 ; goto LBB0_3 .LBB0_4: mov eax, 5 ; eax = 5 jmp .LBB0_5 ; goto LBB0_5 .LBB0_3: ret ; return; .Lfunc_end0: The loop doesn't even deleted! whats happening to SCEV! Yes, reason...