Displaying 20 results from an estimated 93 matches for "imul".
Did you mean:
impl
2015 Dec 17
2
llvm-3.6 MCAsmParser x64 Error "invalid operand for instruction" when msb set
Hello,
I am experiencing problems, when trying to assemble these two x86-64 Opcodes
"add r64, imm32"
"imul r64, r64, imm32"
When having the most significant bit set for imm32, for example:
"add rax, 0x80000000", "add rax, 0xffffffff", ...
"imul rbx, rsi, 0x80000000", "imul rbx, rsi, 0xffffffff", ...
The Error Message I receive is the following:
"Instru...
2014 Jul 13
2
[LLVMdev] IMUL x86 instruction
Hi,
The x86 CPU IMUL instruction has forms such as:
IMUL reg
EDX:EAX ← EAX ∗ reg
reg, EAX and EDX are 32bit registers.
How can I represent this sort of instruction in LLVM IR ?
It is really a 32bit * 32 bit = 64 bit, but no LLVM IR exists to do that.
Or, a similar question:
What LLVM IR would produce this IMUL instru...
2018 Dec 01
2
Where's the optimiser gone? (part 5.c): missed tail calls, and more...
...|
__int64 __fastcall mul(__int64 foo, __int64 bar)
{
return foo * bar;
}
push esi | mov ecx, dword ptr [esp + 16]
mov ecx, dword ptr [esp + 16] | mov edx, dword ptr [esp + 12]
mov esi, dword ptr [esp + 8] | imul edx, dword ptr [esp + 8]
mov eax, ecx | mov eax, dword ptr [esp + 4]
imul ecx, dword ptr [esp + 12] | imul ecx, eax
mul esi | add ecx, edx
imul esi, dword ptr [esp + 20] | mul dword ptr [esp + 12]
add...
2005 Feb 22
5
[LLVMdev] Area for improvement
...is generates the following X86 code:
.text
.align 16
.globl init_board
.type init_board, @function
init_board:
subl $4, %esp
movl %esi, (%esp)
movl 8(%esp), %eax
movl $0, %ecx
.LBBinit_board_1: # loopexit.1
imull $7, %ecx, %edx
movl %eax, %esi
addl %edx, %esi
movb $46, (%esi)
imull $7, %ecx, %edx
movl %eax, %esi
addl %edx, %esi
leal 1(%esi), %edx
movb $46, (%edx)
imull $7, %ecx, %edx
movl %eax, %esi
addl %edx, %esi...
2005 Feb 22
0
[LLVMdev] Area for improvement
When I increased COLS to the point where the loop could no longer be
unrolled, the selection dag code generator generated effectively the
same code as the default X86 code generator. Lots of redundant
imul/movl/addl sequences. It can't clean it up either. Only unrolling
all nested loops permits it to be optimized away, regardless of code
generator.
Jeff Cohen wrote:
> I noticed that fourinarow is one of the programs in which LLVM is much
> slower than GCC, so I decided to take a look...
2005 Feb 22
0
[LLVMdev] Area for improvement
...WS+1])
> {
> int i,j;
>
> for (i=0;i<COLS;i++)
> for (j=0;j<ROWS;j++)
> b[i][j]='.';
> for (i=0;i<COLS;i++)
> b[i][ROWS]=0;
> }
>
> This generates the following X86 code:
> imull $7, %ecx, %edx
> movl %eax, %esi
> addl %edx, %esi
> movb $46, (%esi)
> imull $7, %ecx, %edx
> movl %eax, %esi
> addl %edx, %esi
> leal 1(%esi), %edx
... (many many copies of this, see the end of the email for full output)
...
> T...
2005 Feb 22
2
[LLVMdev] Area for improvement
...t;
>> for (i=0;i<COLS;i++)
>> for (j=0;j<ROWS;j++)
>> b[i][j]='.';
>> for (i=0;i<COLS;i++)
>> b[i][ROWS]=0;
>> }
>>
>> This generates the following X86 code:
>> imull $7, %ecx, %edx
>> movl %eax, %esi
>> addl %edx, %esi
>> movb $46, (%esi)
>> imull $7, %ecx, %edx
>> movl %eax, %esi
>> addl %edx, %esi
>> leal 1(%esi), %edx
>
> ... (many many copies of this, see the end of t...
2011 Dec 14
2
[LLVMdev] Failure to optimize ? operator
...e for the two functions:
==============================================
_f1: pushl %ebp xorl %eax, %eax movl
%esp, %ebp movl 8(%ebp), %edx testl %edx, %edx
jle L5 popl %ebp ret .p2align 4,,7L5:
movl %edx, %ecx imull %edx, %ecx popl %ebp
leal 3(%ecx,%ecx,4), %eax imull %edx, %eax
leal 1(%eax,%ecx,2), %eax ret .p2align 4,,15
_f2:
pushl %ebp xorl %eax, %eax movl %esp,
%ebp movl 8(%ebp), %edx testl %edx, %edx jle...
2010 Sep 01
5
[LLVMdev] equivalent IR, different asm
...20RenderBoxModelObjectEPNS_10StyleImageE
## BB#0:
pushq %r14
pushq %rbx
subq $8, %rsp
movq %rsi, %rbx
movq %rdi, %r14
movq %rdx, %rdi
movq %rcx, %rsi
callq __ZN7WebCore4viziEPKNS_20RenderBoxModelObjectEPNS_10StyleImageE
movq %rax, %rcx
shrq $32, %rcx
testl %ecx, %ecx
je LBB0_2
## BB#1:
imull (%rbx), %eax
cltd
idivl %ecx
movl %eax, (%r14)
LBB0_2:
addq $8, %rsp
popq %rbx
popq %r14
ret
$ llc opt-fail.ll -o -
.section __TEXT,__text,regular,pure_instructions
.globl __ZN7WebCore6kolos1ERiS0_PKNS_20RenderBoxModelObjectEPNS_10StyleImageE
.align 4, 0x90
__ZN7WebCore6kolos1ERiS0_PK...
2007 Apr 30
0
[LLVMdev] Boostrap Failure -- Expected Differences?
...g F .text 000000e4 alias_sets_conflict_p
> @@ -11617,23 +11617,23 @@
> 39c: R_386_32 gt_ggc_mx_varray_head_tag
> 3a0: R_386_32 gt_pch_nx_varray_head_tag
>
> -000003b8 <__FUNCTION__.20147>:
> +000003b8 <__FUNCTION__.20062>:
> 3b8: 66 69 6e 64 5f 62 imul $0x625f,0x64(%esi),%bp
> 3be: 61 popa
> - 3bf: 73 65 jae 426 <__FUNCTION__.20952+0xa>
> + 3bf: 73 65 jae 426 <__FUNCTION__.20866+0xa>
> 3c1: 5f pop %edi
> 3c2: 64 65 63 6c 00 2f arp...
2005 Feb 22
0
[LLVMdev] Area for improvement
...S;i++)
>>> for (j=0;j<ROWS;j++)
>>> b[i][j]='.';
>>> for (i=0;i<COLS;i++)
>>> b[i][ROWS]=0;
>>> }
>>>
>>> This generates the following X86 code:
>>> imull $7, %ecx, %edx
>>> movl %eax, %esi
>>> addl %edx, %esi
>>> movb $46, (%esi)
>>> imull $7, %ecx, %edx
>>> movl %eax, %esi
>>> addl %edx, %esi
>>> leal 1(%esi), %edx
>>
>> ... (many...
2018 Dec 01
2
Where's the optimiser gone? (part 5.b): missed tail calls, and more...
...|
pop ebp |
ret |
long long mul(long long foo, long long bar)
{
return foo * bar;
}
mul: # @mul
push ebp
mov ebp, esp
push esi
mov ecx, dword ptr [ebp + 16]
mov esi, dword ptr [ebp + 8]
mov eax, ecx
imul ecx, dword ptr [ebp + 12]
mul esi
imul esi, dword ptr [ebp + 20]
add edx, ecx
add edx, esi
pop esi
pop ebp
ret
2012 Feb 17
0
[LLVMdev] Folding an insertelt chain
On Feb 17, 2012, at 12:50 AM, Ivan Llopard wrote:
> Hello,
>
> I've added a little combining operation in DAGCombiner to fold a chain of insertelt nodes if that chain is proved to fully overwrite the very first source vector. In which case, I supposed a build_vector is better. It seems to be safe but I don't know if it is correctly implemented or if it is already done somewhere
2007 Apr 27
2
[LLVMdev] Boostrap Failure -- Expected Differences?
The saga continues.
I've been tracking the interface changes and merging them with
the refactoring work I'm doing. I got as far as building stage3
of llvm-gcc but the object files from stage2 and stage3 differ:
warning: ./cc1-checksum.o differs
warning: ./cc1plus-checksum.o differs
(Are the above two ok?)
The list below is clearly bad. I think it's every object file in
the
2019 Mar 01
2
Condition removed? Difference between LLVM and GCC on a small testcase
...es anyone know which optimization pass removes the condition? Thanks!
C code:
extern void bar(int, int);
void foo(int a) {
int b, d;
if (a > 114) {
b = a * 58;
} else {
d = a * 51;
}
bar(b, d);
}
clang.7.0.1 -O2, LLVM generated assembly:
0: 6b c7 3a imul $0x3a,%edi,%eax
3: 6b f7 33 imul $0x33,%edi,%esi
6: 89 c7 mov %eax,%edi
8: e9 00 00 00 00 jmpq d <foo+0xd>
GCC.5.2.0 -O3, GCC generated assembly:
0: 83 ff 72 cmp $0x72,%edi
3: 7f...
2011 Dec 14
0
[LLVMdev] Failure to optimize ? operator
On Tue, Dec 13, 2011 at 5:59 AM, Brent Walker <brenthwalker at gmail.com> wrote:
> The following seemingly identical functions, get compiled to quite
> different machine code. The first is correctly optimized (the
> computation of var y is nicely moved into the else branch of the "if"
> statement), which the second one is not (the full computation of var y
> is
2010 Sep 01
0
[LLVMdev] equivalent IR, different asm
...; pushq %rbx
> subq $8, %rsp
> movq %rsi, %rbx
> movq %rdi, %r14
> movq %rdx, %rdi
> movq %rcx, %rsi
> callq __ZN7WebCore4viziEPKNS_20RenderBoxModelObjectEPNS_10StyleImageE
> movq %rax, %rcx
> shrq $32, %rcx
> testl %ecx, %ecx
> je LBB0_2
> ## BB#1:
> imull (%rbx), %eax
> cltd
> idivl %ecx
> movl %eax, (%r14)
> LBB0_2:
> addq $8, %rsp
> popq %rbx
> popq %r14
> ret
>
>
> $ llc opt-fail.ll -o -
>
> .section __TEXT,__text,regular,pure_instructions
> .globl __ZN7WebCore6kolos1ERiS0_PKNS_20RenderBoxMod...
2012 Feb 17
3
[LLVMdev] Folding an insertelt chain
Hello,
I've added a little combining operation in DAGCombiner to fold a chain
of insertelt nodes if that chain is proved to fully overwrite the very
first source vector. In which case, I supposed a build_vector is better.
It seems to be safe but I don't know if it is correctly implemented or
if it is already done somewhere else. Please find attached the patch.
Regards,
Ivan
2017 Jul 01
2
KNL Assembly Code for Matrix Multiplication
...Parent Loop BB0_2 Depth=2
>>>>> # => This Inner Loop
>>>>> Header: Depth=3
>>>>> # this bb will run 15 times
>>>>> vmovq rax, xmm9
>>>>> imul r10, r9, 4000
>>>>> lea rbx, [rdi + r10]
>>>>> *vpmuludq zmm14, zmm10, zmm2 ; this is BB for vector here we
>>>>> have to do gather for B due to arbitrary addresses so here
>>>>> zmm10=[8,9,10,11,12,13,14,15]. it means zmm10 contains...
2014 Oct 24
3
[LLVMdev] IndVar widening in IndVarSimplify causing performance regression on GPU programs
...4, and setp.lt.s64
instructions which are more expensive than their 32-bit counterparts.
Indeed, the SASS code (disassembly of the actual machine code running on
GPUs) of the version with widening looks significantly longer.
Without widening (7 instructions):
.L_1:
/*0048*/ IMUL R2, R0, R0;
/*0050*/ IADD R0, R0, 0x1;
/*0058*/ ST.E [R4], R2;
/*0060*/ ISETP.NE.AND P0, PT, R0, c[0x0][0x140], PT;
/*0068*/ IADD R4.CC, R4, 0x4;
/*0070*/ IADD.X R5, R5, RZ;...