Hello, i've noticed a new possible missed optimization while testing more
trivial code.
This time it's not a with a xor but with a multiplication instruction and
the example is little bit more involved.
C code:
typedef short t;
t foo(t a, t b)
{
t a4 = a*b;
return a4;
}
argument "a" is passed in R15:R14, argument "b" in R13:R12,
the return value
is stored in R15:R14.
The mul instruction takes in two 8bit regs and returns a 16bit result in
R1:R0, this is handled in the selectionDAG same way as x86 (btw mul is
marked as commutable).
Asm code:
mul r12, r15
mov r8, r0
mul r12, r14
mov r9, r0
mov r10, r1
add r10, r8
mul r13, r14
mov r15, r0
add r15, r10
mov r14, r9
This can be tuned further to the following:
mov r8, r14
mov r9, r15
mul r12, r8
mov r14, r0
mov r15, r1
mul r12, r9
add r15, r0
mul r13, r8
add r15, r0
The difference between both versions is that the second has one instruction
less and saves a scratch register.
If we start by multiplying the lower parts of both arguments instead of
mixing upper and lower parts from a start we can save r8 in the first
example and a later move, notice that the second version stores directly the
result of a.low*b.low into R15:R14. I'm unsure if this is related to
http://llvm.org/bugs/show_bug.cgi?id=8112
I've attached a txt file with the regcoalescing output incase it's
useful
like requested in the previous emails.
Thanks
-------------- next part --------------
An HTML attachment was scrubbed...
URL:
<http://lists.llvm.org/pipermail/llvm-dev/attachments/20100909/2836177c/attachment.html>
-------------- next part --------------
********** SIMPLE REGISTER COALESCING **********
********** Function: foo
********** JOINING INTERVALS ***********
entry:
4 %reg1027<def> = MOVRdRr %R12<kill>
Inspecting R12,inf = [0,6:0) 0 at 0*-(6) and %reg1027,0.000000e+00 = [6,54:0)
0 at 6-(54):
Joined. Result = R12,inf = [0,54:0) 0 at 0*-(54)
12 %reg1026<def> = MOVRdRr %R13<kill>
Inspecting R13,inf = [0,14:0) 0 at 0*-(14) and %reg1026,0.000000e+00 =
[14,94:0) 0 at 14-(94):
Joined. Result = R13,inf = [0,94:0) 0 at 0*-(94)
20 %reg1025<def> = MOVRdRr %R14<kill>
Inspecting R14,inf = [0,22:0)[134,146:1) 0 at 0*-(22) 1 at 134-(146) and
%reg1025,0.000000e+00 = [22,94:0) 0 at 22-(94):
Joined. Result = R14,inf = [0,94:0)[134,146:1) 0 at 0*-(94) 1 at 134-(146)
28 %reg1024<def> = MOVRdRr %R15<kill>
Inspecting R15,inf = [0,30:0)[126,146:1) 0 at 0*-(30) 1 at 126-(146) and
%reg1024,0.000000e+00 = [30,38:0) 0 at 30-(38):
Joined. Result = R15,inf = [0,38:0)[126,146:1) 0 at 0*-(38) 1 at 126-(146)
44 %reg1028<def> = MOVRdRr %R0<kill>
Inspecting R0,inf = [38,46:0)[54,62:1)[94,102:2) 0 at 38-(46) 1 at 54-(62) 2
at 94-(102) and %reg1028,0.000000e+00 = [46,86:0) 0 at 46-(86): Interference!
60 %reg1029<def> = MOVRdRr %R0<kill>
Inspecting R0,inf = [38,46:0)[54,62:1)[94,102:2) 0 at 38-(46) 1 at 54-(62) 2
at 94-(102) and %reg1029,0.000000e+00 = [62,134:0) 0 at 62-(134): Interference!
68 %reg1030<def> = MOVRdRr %R1<kill>
Inspecting R1,inf = [38,39:0)[54,70:1)[94,95:2) 0 at 38-(39) 1 at 54-(70) 2
at 94-(95) and %reg1030,0.000000e+00 = [70,78:0) 0 at 70-(78):
Joined. Result = R1,inf = [38,39:1)[54,78:0)[94,95:2) 0 at 54-(78) 1 at
38-(39) 2 at 94-(95)
100 %reg1032<def> = MOVRdRr %R0<kill>
Inspecting R0,inf = [38,46:0)[54,62:1)[94,102:2) 0 at 38-(46) 1 at 54-(62) 2
at 94-(102) and %reg1032,0.000000e+00 = [102,110:0) 0 at 102-(110):
Joined. Result = R0,inf = [38,46:1)[54,62:2)[94,110:0) 0 at 94-(110) 1 at
38-(46) 2 at 54-(62)
124 %R15<def> = MOVRdRr %reg1033<kill>
Inspecting %reg1033,0.000000e+00 = [110,118:1)[118,126:0) 0 at 118-(126) 1 at
110-(118) and R15,inf = [0,38:0)[126,146:1) 0 at 0*-(38) 1 at 126-(146):
Joined. Result = R15,inf = [0,38:0)[110,118:2)[118,146:1) 0 at 0*-(38) 1 at
118-(146) 2 at 110-(118)
132 %R14<def> = MOVRdRr %reg1029<kill>
Inspecting %reg1029,0.000000e+00 = [62,134:0) 0 at 62-(134) and R14,inf =
[0,94:0)[134,146:1) 0 at 0*-(94) 1 at 134-(146): Interference!
76 %reg1031<def> = MOVRdRr %R1<kill>
Inspecting R1,inf = [38,39:1)[54,78:0)[94,95:2) 0 at 54-(78) 1 at 38-(39) 2
at 94-(95) and %reg1031,0.000000e+00 = [78,86:1)[86,118:0) 0 at 86-(118) 1 at
78-(86): Interference!
108 %R15<def> = MOVRdRr %R0<kill>
Can not coalesce physregs.
44 %reg1028<def> = MOVRdRr %R0<kill>
Inspecting R0,inf = [38,46:1)[54,62:2)[94,110:0) 0 at 94-(110) 1 at 38-(46) 2
at 54-(62) and %reg1028,0.000000e+00 = [46,86:0) 0 at 46-(86): Interference!
60 %reg1029<def> = MOVRdRr %R0<kill>
Inspecting R0,inf = [38,46:1)[54,62:2)[94,110:0) 0 at 94-(110) 1 at 38-(46) 2
at 54-(62) and %reg1029,0.000000e+00 = [62,134:0) 0 at 62-(134): Interference!
132 %R14<def> = MOVRdRr %reg1029<kill>
Inspecting %reg1029,0.000000e+00 = [62,134:0) 0 at 62-(134) and R14,inf =
[0,94:0)[134,146:1) 0 at 0*-(94) 1 at 134-(146): Interference!
76 %reg1031<def> = MOVRdRr %R1<kill>
Inspecting R1,inf = [38,39:1)[54,78:0)[94,95:2) 0 at 54-(78) 1 at 38-(39) 2
at 94-(95) and %reg1031,0.000000e+00 = [78,86:1)[86,118:0) 0 at 86-(118) 1 at
78-(86): Interference!
********** INTERVALS POST JOINING **********
R14,inf = [0,94:0)[134,146:1) 0 at 0*-(94) 1 at 134-(146)
%reg1031,0.000000e+00 = [78,86:1)[86,118:0) 0 at 86-(118) 1 at 78-(86)
R1,inf = [38,39:1)[54,78:0)[94,95:2) 0 at 54-(78) 1 at 38-(39) 2 at 94-(95)
R12,inf = [0,54:0) 0 at 0*-(54)
%reg1028,0.000000e+00 = [46,86:0) 0 at 46-(86)
SREG,inf = [86,87:0)[118,119:1) 0 at 86-(87) 1 at 118-(119)
R0,inf = [38,46:1)[54,62:2)[94,110:0) 0 at 94-(110) 1 at 38-(46) 2 at 54-(62)
R15,inf = [0,38:0)[110,118:2)[118,146:1) 0 at 0*-(38) 1 at 118-(146) 2 at
110-(118)
R13,inf = [0,94:0) 0 at 0*-(94)
%reg1029,0.000000e+00 = [62,134:0) 0 at 62-(134)
********** INTERVALS **********
R14,inf = [0,94:0)[134,146:1) 0 at 0*-(94) 1 at 134-(146)
%reg1031,0.000000e+00 = [78,86:1)[86,118:0) 0 at 86-(118) 1 at 78-(86)
R1,inf = [38,39:1)[54,78:0)[94,95:2) 0 at 54-(78) 1 at 38-(39) 2 at 94-(95)
R12,inf = [0,54:0) 0 at 0*-(54)
%reg1028,0.000000e+00 = [46,86:0) 0 at 46-(86)
SREG,inf = [86,87:0)[118,119:1) 0 at 86-(87) 1 at 118-(119)
R0,inf = [38,46:1)[54,62:2)[94,110:0) 0 at 94-(110) 1 at 38-(46) 2 at 54-(62)
R15,inf = [0,38:0)[110,118:2)[118,146:1) 0 at 0*-(38) 1 at 118-(146) 2 at
110-(118)
R13,inf = [0,94:0) 0 at 0*-(94)
%reg1029,0.000000e+00 = [62,134:0) 0 at 62-(134)
********** MACHINEINSTRS **********
BB#0: # derived from entry
36 MULRdRr %R12, %R15<kill>, %R1<imp-def,dead>, %R0<imp-def>
44 %reg1028<def> = MOVRdRr %R0<kill>
52 MULRdRr %R12<kill>, %R14, %R1<imp-def>, %R0<imp-def>
60 %reg1029<def> = MOVRdRr %R0<kill>
76 %reg1031<def> = MOVRdRr %R1<kill>
84 %reg1031<def> = ADDRdRr %reg1031, %reg1028<kill>,
%SREG<imp-def,dead>
92 MULRdRr %R13<kill>, %R14<kill>, %R1<imp-def,dead>,
%R0<imp-def>
108 %R15<def> = MOVRdRr %R0<kill>
116 %R15<def> = ADDRdRr %R15, %reg1031<kill>,
%SREG<imp-def,dead>
132 %R14<def> = MOVRdRr %reg1029<kill>
144 RET %R15<imp-use,kill>, %R14<imp-use,kill>