Displaying 20 results from an estimated 86 matches for "lbb0_2".
Did you mean:
lbb0_1
2020 Oct 06
2
Optimizing assembly generated for tail call
...ation case. Below is an example (https://godbolt.org/z/ao15xE):
> void g1();
> void g2();
> void f(bool v) {
> if (v) {
> g1();
> } else {
> g2();
> }
> }
>
The assembly generated is as follow:
> f(bool): # @f(bool)
> testb %dil, %dil
> je .LBB0_2
> jmp g1() # TAILCALL
> .LBB0_2:
> jmp g2() # TAILCALL
>
However, in this specific case (where no function epilogue is needed), one
can actually change 'je .LBB0_2' to 'je g2()' directly, thus saving a jump.
Is there any way I could instruct LLVM to do this? For my use c...
2010 Sep 01
5
[LLVMdev] equivalent IR, different asm
...kolos1ERiS0_PKNS_20RenderBoxModelObjectEPNS_10StyleImageE
## BB#0:
pushq %r14
pushq %rbx
subq $8, %rsp
movq %rsi, %rbx
movq %rdi, %r14
movq %rdx, %rdi
movq %rcx, %rsi
callq __ZN7WebCore4viziEPKNS_20RenderBoxModelObjectEPNS_10StyleImageE
movq %rax, %rcx
shrq $32, %rcx
testl %ecx, %ecx
je LBB0_2
## BB#1:
imull (%rbx), %eax
cltd
idivl %ecx
movl %eax, (%r14)
LBB0_2:
addq $8, %rsp
popq %rbx
popq %r14
ret
$ llc opt-fail.ll -o -
.section __TEXT,__text,regular,pure_instructions
.globl __ZN7WebCore6kolos1ERiS0_PKNS_20RenderBoxModelObjectEPNS_10StyleImageE
.align 4, 0x90
__ZN7WebCore...
2010 Sep 01
0
[LLVMdev] equivalent IR, different asm
...BB#0:
> pushq %r14
> pushq %rbx
> subq $8, %rsp
> movq %rsi, %rbx
> movq %rdi, %r14
> movq %rdx, %rdi
> movq %rcx, %rsi
> callq __ZN7WebCore4viziEPKNS_20RenderBoxModelObjectEPNS_10StyleImageE
> movq %rax, %rcx
> shrq $32, %rcx
> testl %ecx, %ecx
> je LBB0_2
> ## BB#1:
> imull (%rbx), %eax
> cltd
> idivl %ecx
> movl %eax, (%r14)
> LBB0_2:
> addq $8, %rsp
> popq %rbx
> popq %r14
> ret
>
>
> $ llc opt-fail.ll -o -
>
> .section __TEXT,__text,regular,pure_instructions
> .globl __ZN7WebCore6kolos1...
2013 Aug 19
2
[LLVMdev] Duplicate loading of double constants
...uble
constants,
e.g.
$ cat t.c
double f(double* p, int n)
{
double s = 0;
if (n)
s += *p;
return s;
}
$ clang -S -O3 t.c -o -
...
f: # @f
.cfi_startproc
# BB#0:
xorps %xmm0, %xmm0
testl %esi, %esi
je .LBB0_2
# BB#1:
xorps %xmm0, %xmm0
addsd (%rdi), %xmm0
.LBB0_2:
ret
...
Note that there are 2 xorps instructions, the one in BB#1 being clearly
redundant
as it's dominated by the first one. Two xorps come from 2 FsFLD0SD
generated by
instruction selection and never eliminat...
2018 Apr 12
3
[RFC] __builtin_constant_p() Improvements
...movel %ecx, %eax
retq
And this code at -O0:
bar: # @bar
.cfi_startproc
# %bb.0: # %entry
pushq %rbp
.cfi_def_cfa_offset 16
.cfi_offset %rbp, -16
movq %rsp, %rbp
.cfi_def_cfa_register %rbp
movl %edi, -16(%rbp)
cmpl $0, -16(%rbp)
je .LBB0_2
# %bb.1: # %if.then
movl $42, -8(%rbp)
movl $0, -4(%rbp)
movl -4(%rbp), %eax
movl %eax, -12(%rbp)
jmp .LBB0_3
.LBB0_2: # %if.else
movl $927, -12(%rbp) # imm = 0x39F
.LBB0_3: # %return
movl -12(%rbp)...
2015 Sep 01
2
[RFC] New pass: LoopExitValues
...Outer = 0; Outer < Size; ++Outer)
for (int Inner = 0; Inner < Size; ++Inner)
Dst[Outer * Size + Inner] = Src[Outer * Size + Inner] * Val;
}
With LoopExitValues
-------------------------------
matrix_mul:
testl %edi, %edi
je .LBB0_5
xorl %r9d, %r9d
xorl %r8d, %r8d
.LBB0_2:
xorl %r11d, %r11d
.LBB0_3:
movl %r9d, %r10d
movl (%rdx,%r10,4), %eax
imull %ecx, %eax
movl %eax, (%rsi,%r10,4)
incl %r11d
incl %r9d
cmpl %r11d, %edi
jne .LBB0_3
incl %r8d
cmpl %edi, %r8d
jne .LBB0_2
.LBB0_5:
retq
Without LoopExitValues:
------...
2013 Aug 20
0
[LLVMdev] Duplicate loading of double constants
...double s = 0;
> if (n)
> s += *p;
> return s;
> }
> $ clang -S -O3 t.c -o -
> ...
> f: # @f
> .cfi_startproc
> # BB#0:
> xorps %xmm0, %xmm0
> testl %esi, %esi
> je .LBB0_2
> # BB#1:
> xorps %xmm0, %xmm0
> addsd (%rdi), %xmm0
> .LBB0_2:
> ret
> ...
>
Thanks. Please file a bug for this on llvm.org/bugs .
The crux of the problem is that machine CSE runs before register allocation
and is consequently extremely conservati...
2016 May 27
2
Handling post-inc users in LSR
...cmp sgt i64 %K, 1
br i1 %cmp, label %for.body, label %for.end
for.end:
ret void
}
# Output in AArch64 where you can see redundant add instructions for
stored value, store address, and in cmp :
foo:
.cfi_startproc
// BB#0:
cmp w0, #2
b.lt .LBB0_3
// BB#1:
sxtw x9, w0
add w8, w0, #1
.LBB0_2:
add x10, x1, x9, lsl #2
add x9, x9, #1
str w8, [x10, #4]
add w8, w8, #1
cmp x9, #1
b.gt .LBB0_2
.LBB0_3:
ret
2019 Sep 14
2
Side-channel resistant values
...ill doesn’t work:
int test_cmov(int left, int right, int *alt) {
return __builtin_unpredictable(left < right) ? *alt : 999;
}
Should generate:
test_cmov:
movl $999, %eax
cmpl %esi, %edi
cmovll (%rdx), %eax
retq
But currently generates:
test_cmov:
movl $999, %eax
cmpl %esi, %edi
jge .LBB0_2
movl (%rdx), %eax
.LBB0_2:
retq
> On Sep 14, 2019, at 12:18 AM, Sanjay Patel <spatel at rotateright.com> wrote:
>
> I'm not sure if this is the entire problem, but SimplifyCFG loses the 'unpredictable' metadata when it converts a set of cmp/br into a switch:
> ht...
2019 Sep 02
3
AVX2 codegen - question reg. FMA generation
...est case that has an fmul/fadd
sequence on <8 x float> vector types, I don't see the x86-64 code
generator (with cpu set to haswell or later types) turning it into an
AVX2 FMA instructions. Here's the snippet in the output it generates:
$ llc -O3 -mcpu=skylake
---------------------
.LBB0_2: # =>This Inner Loop Header: Depth=1
vbroadcastss (%rsi,%rdx,4), %ymm0
vmulps (%rdi,%rcx), %ymm0, %ymm0
vaddps (%rax,%rcx), %ymm0, %ymm0
vmovups %ymm0, (%rax,%rcx)
incq %rdx
addq $32, %rcx
cmpq $15, %rdx
jle .LBB0_2
-----------------------
$ llc --version
LLVM (ht...
2010 Sep 01
2
[LLVMdev] equivalent IR, different asm
...>> subq $8, %rsp
>> movq %rsi, %rbx
>> movq %rdi, %r14
>> movq %rdx, %rdi
>> movq %rcx, %rsi
>> callq __ZN7WebCore4viziEPKNS_20RenderBoxModelObjectEPNS_10StyleImageE
>> movq %rax, %rcx
>> shrq $32, %rcx
>> testl %ecx, %ecx
>> je LBB0_2
>> ## BB#1:
>> imull (%rbx), %eax
>> cltd
>> idivl %ecx
>> movl %eax, (%r14)
>> LBB0_2:
>> addq $8, %rsp
>> popq %rbx
>> popq %r14
>> ret
>>
>>
>> $ llc opt-fail.ll -o -
>>
>> .section __TEXT,__tex...
2015 Aug 31
2
[RFC] New pass: LoopExitValues
Hello LLVM,
This is a proposal for a new pass that improves performance and code
size in some nested loop situations. The pass is target independent.
>From the description in the file header:
This optimization finds loop exit values reevaluated after the loop
execution and replaces them by the corresponding exit values if they
are available. Such sequences can arise after the
2012 Jan 12
1
[LLVMdev] A question of Sparc assembly generated by llc
...%l1
or %g0, %l1, %o0
call printf
nop
ld [%fp+-12], %o2
ld [%fp+-8], %l2
sethi %hi(.L.strQ521), %l3
add %l3, %lo(.L.strQ521), %o0
or %g0, %l2, %o1
call MY_FUNCTION
nop
or %g0, 1, %i0
(subcc %l1, 0, %l1 ! This line is added by me. It was not there)
bne .LBB0_2
nop
! BB#1:
subcc %l2, 0, %l2
or %g0, %l0, %i0
.LBB0_2:
.......
I am not an expert on Sparc assembly, but I read from somewhere that
branching instructions are set by the statues flags. The first 'bne'
statement appeared before any subcc or any other cc opcodes. The code...
2018 Apr 13
0
[RFC] __builtin_constant_p() Improvements
...# @bar
> .cfi_startproc
> # %bb.0: # %entry
> pushq %rbp
> .cfi_def_cfa_offset 16
> .cfi_offset %rbp, -16
> movq %rsp, %rbp
> .cfi_def_cfa_register %rbp
> movl %edi, -16(%rbp)
> cmpl $0, -16(%rbp)
> je .LBB0_2
> # %bb.1: # %if.then
> movl $42, -8(%rbp)
> movl $0, -4(%rbp)
> movl -4(%rbp), %eax
> movl %eax, -12(%rbp)
> jmp .LBB0_3
> .LBB0_2: # %if.else
> movl $927, -12(%rbp) # imm = 0x39F
> .LBB0_3:...
2016 May 27
0
Handling post-inc users in LSR
...d:
> ret void
> }
>
>
> # Output in AArch64 where you can see redundant add instructions for stored value, store address, and in cmp :
>
> foo:
> .cfi_startproc
> // BB#0:
> cmp w0, #2
> b.lt .LBB0_3
> // BB#1:
> sxtw x9, w0
> add w8, w0, #1
> .LBB0_2:
> add x10, x1, x9, lsl #2
> add x9, x9, #1
> str w8, [x10, #4]
> add w8, w8, #1
> cmp x9, #1
> b.gt .LBB0_2
> .LBB0_3:
> ret
> _______________________________________________
> LLVM Developers mailing list
> llvm-dev at lists.llvm.org
> http://lists.llvm...
2018 Nov 06
4
Rather poor code optimisation of current clang/LLVM targeting Intel x86 (both -64 and -32)
...// these 4 lines is
crc >>= 1; // rather poor!
}
return ~crc;
}
See <https://godbolt.org/z/eYJeWt> (-O1) and <https://godbolt.org/z/zeExHm> (-O2)
crc32be: # @crc32be
xor eax, eax
test esi, esi
jne .LBB0_2
jmp .LBB0_5
.LBB0_4: # in Loop: Header=BB0_2 Depth=1
add rdi, 1
test esi, esi
je .LBB0_5
.LBB0_2: # =>This Loop Header: Depth=1
add esi, -1
movzx edx, byte ptr [rdi]
shl edx, 24
xor edx, eax
mov ecx,...
2015 Mar 03
2
[LLVMdev] Need a clue to improve the optimization of some C code
....S. In case someone is interested, here is the assembler code and the IR that produced it.
Relevant LLVM generated x86_64 assembler portion with -Os
~~~
testq %r12, %r12
je LBB0_5
## BB#1:
movq -8(%r12), %rcx
movq (%rcx), %rax
movq -8(%rax), %rdx
andq %r15, %rdx
cmpq %r15, (%rax,%rdx)
je LBB0_2
## BB#3:
addq $8, %rcx
jmp LBB0_4
LBB0_2:
leaq 8(%rdx,%rax), %rcx
LBB0_4:
movq %r12, %rdi
movq %r15, %rsi
movq %r14, %rdx
callq *(%rcx)
movq %rax, %rbx
LBB0_5:
~~~
Better/tighter assembler code would be (saves 2 instructions, one jump less)
~~~
testq %r12, %r12
je LBB0_5
movq -8(%r12),...
2019 Sep 14
2
Side-channel resistant values
...> movl $999, %eax
>> cmpl %esi, %edi
>> cmovll (%rdx), %eax
>> retq
>>
>> But currently generates:
>>
>> test_cmov:
>> movl $999, %eax
>> cmpl %esi, %edi
>> jge .LBB0_2
>> movl (%rdx), %eax
>> .LBB0_2:
>> retq
>>
>>
>>
>> > On Sep 14, 2019, at 12:18 AM, Sanjay Patel <spatel at rotateright.com> wrote:
>> >
>> > I'm not sure if this is the entire problem, but SimplifyCFG...
2014 Sep 02
3
[LLVMdev] LICM promoting memory to scalar
...ooii
// BB#0: // %entry
cbz w0, .LBB0_5
// BB#1: // %for.body.lr.ph
mov w8, wzr
cmp w0, #0 // =0
cinc w9, w0, lt
asr w9, w9, #1
adrp x10, globalvar
.LBB0_2: // %for.body
// =>This Inner Loop Header: Depth=1
cmp w8, w9
b.hs .LBB0_4
// BB#3: // %if.then
// in Loop: Header=BB0_2 Depth=1...
2011 Feb 18
0
[LLVMdev] Adding "S" suffixed ARM/Thumb2 instructions
On Feb 17, 2011, at 10:35 PM, Вадим Марковцев wrote:
> Hello everyone,
>
> I've added the "S" suffixed versions of ARM and Thumb2 instructions to tablegen. Those are, for example, "movs" or "muls".
> Of course, some instructions have already had their twins, such as add/adds, and I leaved them untouched.
Adding separate "s" instructions is