thr3ads.net - llvm dev - [LLVMdev] How to improve code generated for 'getelementptr' ? [Feb 2012]

If this information is useful, please help other people find it:
Share via:

Jeroen Dobbelaere

2012-Feb-02 15:18 UTC

[LLVMdev] How to improve code generated for 'getelementptr' ?

Hi all,

I am working on an llvm backend for a processor with a relative simple
instruction set.
For small loops, the code that is produced depends heavily on how the loop is
specified:
The less information we provide to clang, the better the loop code becomes...

Any idea how I can learn llvm that we don't have load/store instructions
with register index,
so that it is more efficient to convert Init1 to incrementing a pointer instead
of
recomputing the address of 'data[i]' every time ?


The sample C-code looks like :
---
void Init1(int* data)
{
  int i=0;
  for (i=0; i<100; ++i) {
    data[i]=1;
  }
}

void Init2(int* p, int* e)
{
  while (p!=e) {
    *p++=4;
  }
}
---

This produces following assembly code for -O3 (see below).
NOTE: a branch instruction has a delay slot

For Init1, the loop body consists of 8 instructions.
For Init2, it consists of 5 instructions which is already much better
(The optimal would use 4 instructions)

Question: how can we teach llvm to provide code like Init2, for input of Init1 ?

---
	.text
	.globl	_Init1
_Init1:                                 ;; @Init1
;; BB#0:                                ;; %entry
	ldi  R1 , 0
	ldi  R2 , 1
	ldi  R3 , 100
_BB1_1:                                 ;; %for.body
                                        ;; =>This Inner Loop Header: Depth=1
	mov  R4 , R1
	add  R1 , 1
	mov  R5 , R0
	sll  R4 , 2
	cmpne  R1 , R3
	add  R5 , R4
	bcc24  _BB1_1
	sw  R2 , R5, 0  
;; BB#2:                                ;; %for.end
	b  r15
	nop

	.globl	_Init2
_Init2:                                 ;; @Init2
;; BB#0:                                ;; %entry
	cmpeq  R0 , R1
	bcc24  _BB2_3
	nop
;; BB#1:
	ldi  R2 , 4
_BB2_2:                                 ;; %while.body
                                        ;; =>This Inner Loop Header: Depth=1
	sw  R2 , R0, 0  
	add  R0 , 4
	cmpne  R1 , R0
	bcc24  _BB2_2
	nop
_BB2_3:                                 ;; %while.end
	b  r15
	nop
---




For reference, the .ll file:
---
; ModuleID = 'loop_test.c'
target datalayout =
"e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:32-f64:32:32-v64:32:32-v128:32:32-n32-s0:32:32-a0:0:32-S32"
target triple = "arch--"

define void @Init1(i32* nocapture %data) nounwind {
entry:
  br label %for.body

for.body:                                         ; preds = %for.body, %entry
  %i.01 = phi i32 [ 0, %entry ], [ %inc, %for.body ]
  %arrayidx = getelementptr inbounds i32* %data, i32 %i.01
  store i32 1, i32* %arrayidx, align 4, !tbaa !0
  %inc = add nsw i32 %i.01, 1
  %exitcond = icmp eq i32 %inc, 100
  br i1 %exitcond, label %for.end, label %for.body

for.end:                                          ; preds = %for.body
  ret void
}

define void @Init2(i32* %p, i32* %e) nounwind {
entry:
  %cmp1 = icmp eq i32* %p, %e
  br i1 %cmp1, label %while.end, label %while.body

while.body:                                       ; preds = %entry, %while.body
  %p.addr.02 = phi i32* [ %incdec.ptr, %while.body ], [ %p, %entry ]
  %incdec.ptr = getelementptr inbounds i32* %p.addr.02, i32 1
  store i32 4, i32* %p.addr.02, align 4, !tbaa !0
  %cmp = icmp eq i32* %incdec.ptr, %e
  br i1 %cmp, label %while.end, label %while.body

while.end:                                        ; preds = %while.body, %entry
  ret void
}

!0 = metadata !{metadata !"int", metadata !1}
!1 = metadata !{metadata !"omnipotent char", metadata !2}
!2 = metadata !{metadata !"Simple C/C++ TBAA", null}
---

Greetings,

Jeroen Dobbelaere

Apparently Analagous Threads

Search for more maybe matching threads

llvm dev - Feb 2012 - [LLVMdev] How to improve code generated for 'getelementptr' ?

[LLVMdev] How to improve code generated for 'getelementptr' ?

Apparently Analagous Threads

Wisdom of the Ancients