thr3ads.net - llvm dev - [LLVMdev] How can I remove these redundant copy between registers? [May 2015]

If this information is useful, please help other people find it:
Share via:

zan jyu Wong

2015-May-21 13:21 UTC

[LLVMdev] How can I remove these redundant copy between registers?

Hi,

I've been working on a Blackfin backend (llvm-3.6.0) based on the previous
one that was removed in llvm-3.1.
llc generates codes like this:

 29     p1 = r2;
 30     r5 = [p1];
 31     p1 = r2;
 32     r6 = [p1 + 4];
 33     r5 = r6 + r5;
 34     r6 = [p0 + -4];
 35     r5 *= r6;
 36     p1 = r2;
 37     r6 = [p1 + 8];
 38     p1 = r2;

p1 and r2 are in different register classes.
A p* register can be used for load/stroe values from memory while a r*
register can not.

As we can see, line 31, 36, 38 can be deleted. How can I configure llc to
do this? Or do I have to write a custom pass to do this optimization? Any
suggestion is welcome.

Thanks,

Huang
-------------- next part --------------
An HTML attachment was scrubbed...
URL:
<http://lists.llvm.org/pipermail/llvm-dev/attachments/20150521/3f5110e8/attachment.html>

Samuel Crow

2015-May-21 16:24 UTC

head link

[LLVMdev] How can I remove these redundant copy between registers?

On May 21, 2015, at 7:21 AM, zan jyu Wong wrote:
> Hi, 
> 
> I've been working on a Blackfin backend (llvm-3.6.0) based on the
previous one that was removed in llvm-3.1.
> llc generates codes like this:
> 
>  29     p1 = r2;
>  30     r5 = [p1];
>  31     p1 = r2;
>  32     r6 = [p1 + 4];
>  33     r5 = r6 + r5;
>  34     r6 = [p0 + -4];
>  35     r5 *= r6;
>  36     p1 = r2;
>  37     r6 = [p1 + 8];
>  38     p1 = r2;
> 
> p1 and r2 are in different register classes.
> A p* register can be used for load/stroe values from memory while a r*
register can not.
> 
> As we can see, line 31, 36, 38 can be deleted. How can I configure llc to
do this? Or do I have to write a custom pass to do this optimization? Any
suggestion is welcome.
> 
> Thanks,
> 
> Huang
Hello Huang,

SIlly as this may sound, did you run OPT on the bitcode first before using LLC?

Cheers,

Sam

zan jyu Wong

2015-May-22 02:26 UTC

head link

[LLVMdev] How can I remove these redundant copy between registers?

Hi Sam, Thanks for your helping.

I've never noticed OPT before, and I tried to run it on the bitcode, but
still I get the code listed above.
FYI, I did as the following:
$ clang -c -m32 -O3 -emit-llvm ex11.c -o ex11.bc
$ opt -S -gvn ex11.bc > ex11.ll
$ llc -march=bfin ex11.ll
Is there any thing I'm missing?


And the following is how I did before:
$ clang -S -m32 -emit-llvm -O3 file.c -o file.ll
$ llc -march=bfin file.ll

Original C Source File:

  1 typedef struct state {
  2     int V[8][8];
  3     int *offset[8];
  4 } state_t;
  5
  6 void foo(state_t* state, int ch, int *buffer)
  7 {
  8     int *offset = state->offset[ch];
  9
 10     int idx, i;
 11     for (i = 0, idx = 0; i < 100; i++, idx += 5) {
 12         //long long tmp = 0;
 13         int tmp = 0;
 14         for (int j = 0; j < 2; j++) {
 15             tmp += state->V[ch][offset[i]+2*j+0]*buffer[idx + j];
 16             tmp += state->V[ch][offset[i]+2*j+1]*buffer[idx + j];
 17         }
 18
 19         // disable optimization
 20         //volatile long long ret = tmp;
 21         volatile int ret = tmp;
 22     }
 23 }

.ll file after run  OPT on .bc file
; ModuleID = 'ex11.bc'
target datalayout = "e-m:o-p:32:32-f64:32:64-f80:128-n8:16:32-S128"
target triple = "i386-apple-macosx10.10.0"

%struct.state = type { [8 x [8 x i32]], [8 x i32*] }

; Function Attrs: nounwind ssp
define void @foo(%struct.state* nocapture readonly %state, i32 %ch, i32*
nocapture readonly %buffer) #0 {
entry:
  %ret = alloca i32, align 4
  %arrayidx = getelementptr inbounds %struct.state* %state, i32 0, i32 1,
i32 %ch
  %0 = load i32** %arrayidx, align 4, !tbaa !2
  br label %for.cond3.preheader

for.cond3.preheader:                              ; preds %for.cond3.preheader,
%entry
  %i.052 = phi i32 [ 0, %entry ], [ %inc27, %for.cond3.preheader ]
  %idx.051 = phi i32 [ 0, %entry ], [ %add28, %for.cond3.preheader ]
  %arrayidx6 = getelementptr inbounds i32* %0, i32 %i.052
  %1 = load i32* %arrayidx6, align 4, !tbaa !6
  %arrayidx9 = getelementptr inbounds %struct.state* %state, i32 0, i32 0,
i32 %ch, i32 %1
  %2 = load i32* %arrayidx9, align 4, !tbaa !6
  %arrayidx11 = getelementptr inbounds i32* %buffer, i32 %idx.051
  %3 = load i32* %arrayidx11, align 4, !tbaa !6
  %add17 = add nsw i32 %1, 1
  %arrayidx20 = getelementptr inbounds %struct.state* %state, i32 0, i32 0,
i32 %ch, i32 %add17
  %4 = load i32* %arrayidx20, align 4, !tbaa !6
  %tmp = add i32 %4, %2
  %tmp48 = mul i32 %tmp, %3
  %add.1 = add nsw i32 %1, 2
  %arrayidx9.1 = getelementptr inbounds %struct.state* %state, i32 0, i32
0, i32 %ch, i32 %add.1
  %5 = load i32* %arrayidx9.1, align 4, !tbaa !6
  %add10.1 = add nuw nsw i32 %idx.051, 1
  %arrayidx11.1 = getelementptr inbounds i32* %buffer, i32 %add10.1
  %6 = load i32* %arrayidx11.1, align 4, !tbaa !6
  %add17.1 = add nsw i32 %1, 3
  %arrayidx20.1 = getelementptr inbounds %struct.state* %state, i32 0, i32
0, i32 %ch, i32 %add17.1
  %7 = load i32* %arrayidx20.1, align 4, !tbaa !6
  %tmp.1 = add i32 %7, %5
  %tmp48.1 = mul i32 %tmp.1, %6
  %add24.1 = add i32 %tmp48.1, %tmp48
  store volatile i32 %add24.1, i32* %ret, align 4
  %inc27 = add nuw nsw i32 %i.052, 1
  %add28 = add nuw nsw i32 %idx.051, 5
  %exitcond53 = icmp eq i32 %inc27, 100
  br i1 %exitcond53, label %for.end29, label %for.cond3.preheader

for.end29:                                        ; preds %for.cond3.preheader
  ret void
}

attributes #0 = { nounwind ssp "less-precise-fpmad"="false"
"no-frame-pointer-elim"="true"
"no-frame-pointer-elim-non-leaf"
"no-infs-fp-math"="false"
"no-nans-fp-math"="false"
"stack-protector-buffer-size"="8"
"unsafe-fp-math"="false"
"use-soft-float"="false" }

!llvm.module.flags = !{!0}
!llvm.ident = !{!1}

!0 = !{i32 1, !"PIC Level", i32 2}
!1 = !{!"clang version 3.6.0 (tags/RELEASE_360/final)"}
!2 = !{!3, !3, i64 0}
!3 = !{!"any pointer", !4, i64 0}
!4 = !{!"omnipotent char", !5, i64 0}
!5 = !{!"Simple C/C++ TBAA"}
!6 = !{!7, !7, i64 0}
!7 = !{!"int", !4, i64 0}

And the generated .s file

    .text
    .macosx_version_min 10, 10
    .file    "ex11.ll"
    .globl    foo
    .align    4
    .type    foo, at function
foo:                                    // @foo
// BB#0:                                // %entry
    link 16;
    [fp - 4] = r4;
    [fp - 8] = r5;
    [fp - 12] = r6;
    r3 = r1 << 2;
    r4 = r0 + r3;
    r3 = 0 (x);
    r2 += 4;
    p0 = r4;
    r4 = [p0 + 256];
    p0 = r2;
LBB0_1:                                 // %for.cond3.preheader
                                        // =>This Inner Loop Header: Depth=1
    r2 = r1 << 5;
    r2 = r0 + r2;
    r5 = r4 + r3;
    p1 = r5;
    r5 = [p1];
    r5 = r5 << 2;
    r2 = r2 + r5;
    p1 = r2;              <--------------
    r5 = [p1];
    p1 = r2;             <--------------- redundant copy
    r6 = [p1 + 4];
    r5 = r6 + r5;
    r6 = [p0 + -4];
    r5 *= r6;
    p1 = r2;             <--------------- redundant copy
    r6 = [p1 + 8];
    p1 = r2;             <--------------- redundant copy
    r2 = [p1 + 12];
    r2 = r2 + r6;
    r6 = [p0];
    r2 *= r6;
    r2 = r2 + r5;
    [fp - 16] = r2;
    r2 = p0;
    r2 += 20;
    r3 += 4;
    r5 = 400 (z);
    cc = r3 == r5;
    p0 = r2;
    if !cc jump LBB0_1;
    jump LBB0_2;
LBB0_2:                                 // %for.end29
    r6 = [fp - 12];
    r5 = [fp - 8];
    r4 = [fp - 4];
    unlink;
    rts;
Ltmp0:
    .size    foo, Ltmp0-foo


Huang

On Fri, May 22, 2015 at 12:24 AM, Samuel Crow <samueldcrow at gmail.com>
wrote:
>
> On May 21, 2015, at 7:21 AM, zan jyu Wong wrote:
>
> > Hi,
> >
> > I've been working on a Blackfin backend (llvm-3.6.0) based on the
> previous one that was removed in llvm-3.1.
> > llc generates codes like this:
> >
> >  29     p1 = r2;
> >  30     r5 = [p1];
> >  31     p1 = r2;
> >  32     r6 = [p1 + 4];
> >  33     r5 = r6 + r5;
> >  34     r6 = [p0 + -4];
> >  35     r5 *= r6;
> >  36     p1 = r2;
> >  37     r6 = [p1 + 8];
> >  38     p1 = r2;
> >
> > p1 and r2 are in different register classes.
> > A p* register can be used for load/stroe values from memory while a r*
> register can not.
> >
> > As we can see, line 31, 36, 38 can be deleted. How can I configure llc
> to do this? Or do I have to write a custom pass to do this optimization?
> Any suggestion is welcome.
> >
> > Thanks,
> >
> > Huang
>
> Hello Huang,
>
> SIlly as this may sound, did you run OPT on the bitcode first before using
> LLC?
>
> Cheers,
>
> Sam-------------- next part --------------
An HTML attachment was scrubbed...
URL:
<http://lists.llvm.org/pipermail/llvm-dev/attachments/20150522/71f36cf0/attachment.html>

llvm dev - May 2015 - [LLVMdev] How can I remove these redundant copy between registers?

[LLVMdev] How can I remove these redundant copy between registers?

[LLVMdev] How can I remove these redundant copy between registers?

[LLVMdev] How can I remove these redundant copy between registers?