Hello
since my broad RFC request didn't catch any responses, let me get a bit
more into the nitty-gritty:
I tried to get llvm (3.7) to optimize superflous allocas away, but so
far I haven't figured out how. Is there no optimizer for this ? Should I
adorn something with some attributes ? As far as I can tell no, but I am
no llvm expert...
For what I want to do, i will probably have a few dozen combinable
allocas in my produced code each function. So it would be worthwhile to
reduce stack space needs.
Ciao
Nat!
---
%struct.a_b = type { i32, i32 }
; Function Attrs: nounwind ssp uwtable
define void @g() #0 {
entry:
%x = alloca %struct.a_b, align 4
%y = alloca %struct.a_b, align 4
%a = getelementptr inbounds %struct.a_b, %struct.a_b* %x, i32 0, i32 0
store i32 1, i32* %a, align 4
%b = getelementptr inbounds %struct.a_b, %struct.a_b* %x, i32 0, i32 1
store i32 2, i32* %b, align 4
call void @f(%struct.a_b* %x)
%a1 = getelementptr inbounds %struct.a_b, %struct.a_b* %y, i32 0, i32 0
store i32 1, i32* %a1, align 4
%b2 = getelementptr inbounds %struct.a_b, %struct.a_b* %y, i32 0, i32 1
store i32 3, i32* %b2, align 4
call void @f(%struct.a_b* %y)
ret void
}
---
produces the following x86_64-apple-macosx10.10.0 output (with -O4)
_g:
0000000000000000 pushq %rbp
0000000000000001 movq %rsp, %rbp
0000000000000004 subq $0x10, %rsp
0000000000000008 movabsq $0x200000001, %rax ## imm = 0x200000001
0000000000000012 movq %rax, -0x8(%rbp)
0000000000000016 leaq -0x8(%rbp), %rdi
--------------------------------^^^^
000000000000001a callq 0x1f
000000000000001f movabsq $0x300000001, %rax ## imm = 0x300000001
0000000000000029 movq %rax, -0x10(%rbp)
000000000000002d leaq -0x10(%rbp), %rdi
--------------------------------^^^^^
0000000000000031 callq 0x36
0000000000000036 addq $0x10, %rsp
000000000000003a popq %rbp
000000000000003b retq
000000000000003c nopl (%rax)
void g( void)
{
struct a_b x;
struct a_b y;
x.a = 1;
x.b = 2;
f( &x);
// x no longer needed
// expect y to reuse x space
y.a = 1;
y.b = 3;
f( &y);
}
Caldarale, Charles R via llvm-dev
2015-Aug-31 12:30 UTC
[llvm-dev] alloca combining, not (yet) possible ?
> From: llvm-dev [mailto:llvm-dev-bounces at lists.llvm.org] On Behalf Of Nat! via llvm-dev > Subject: [llvm-dev] alloca combining, not (yet) possible ?> I tried to get llvm (3.7) to optimize superflous allocas away, but > so far I haven't figured out how. Should I adorn something with > some attributes ?Yes.> void g( void) > { > struct a_b x; > struct a_b y; > > x.a = 1; > x.b = 2; > f( &x); > > // x no longer needed > // expect y to reuse x space > y.a = 1; > y.b = 3; > f( &y); > }You have not provided us with the declaration for f(). Unless its argument is marked with the nocapture attribute, the compilation of g() cannot assume that f() has not retained a pointer to the x struct and is using it in the second call. - Chuck
Caldarale, Charles R schrieb:> You have not provided us with the declaration for f(). Unless its argument is marked with the nocapture attribute, the compilation of g() cannot assume that f() has not retained a pointer to the x struct and is using it in the second call. >thanks a lot for the input. Yes, I forgot to that. The C function declaration would have been void f( struct a_b *p); which compiled into declare void @f(%struct.a_b*) #2 with attributes #2 = { "disable-tail-calls"="false" "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="core2" "target-features"="+cx16,+sse,+sse2,+sse3,+ssse3" "unsafe-fp-math"="false" "use-soft-float"="false" } --- I could not figure out how to decorate my C code to emit the nocapture attribute, __attribute(( nocapture) is unknown. So I tried to modify the IR code by hand to read thusly: declare void @f(%struct.a_b* nocapture) #1 But in the end, it didn't make a difference, when I compiled it with ../llvm-build.d/bin/llc -O3 -o test-combine-alloca.s test-combine-alloca.ir it still used two allocas. From a C perspective, I find it weird, that it should concern the caller if the called function "mistakenly" holds onto an alloca buffer, that will be invalid soon anyway. But I guess that's C++ magic somehow :) Ciao Nat! ---- ; ModuleID = 'test-combine-alloca.c' target datalayout = "e-m:o-i64:64-f80:128-n8:16:32:64-S128" target triple = "x86_64-apple-macosx10.10.0" %struct.a_b = type { i32, i32 } declare void @f(%struct.a_b* nocapture) #1 ; Function Attrs: nounwind ssp uwtable define void @g() #0 { entry: %x = alloca %struct.a_b, align 4 %y = alloca %struct.a_b, align 4 %a = getelementptr inbounds %struct.a_b, %struct.a_b* %x, i32 0, i32 0 store i32 1, i32* %a, align 4 %b = getelementptr inbounds %struct.a_b, %struct.a_b* %x, i32 0, i32 1 store i32 2, i32* %b, align 4 call void @f(%struct.a_b* %x) %a1 = getelementptr inbounds %struct.a_b, %struct.a_b* %y, i32 0, i32 0 store i32 1, i32* %a1, align 4 %b2 = getelementptr inbounds %struct.a_b, %struct.a_b* %y, i32 0, i32 1 store i32 3, i32* %b2, align 4 call void @f(%struct.a_b* %y) ret void } attributes #0 = { nounwind ssp uwtable "disable-tail-calls"="false" "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="core2" "target-features"="+cx16,+sse,+sse2,+sse3,+ssse3" "unsafe-fp-math"="false" "use-soft-float"="false" } attributes #1 = { "disable-tail-calls"="false" "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="core2" "target-features"="+cx16,+sse,+sse2,+sse3,+ssse3" "unsafe-fp-math"="false" "use-soft-float"="false" } !llvm.module.flags = !{!0} !llvm.ident = !{!1} !0 = !{i32 1, !"PIC Level", i32 2} !1 = !{!"clang version 3.7.0 (http://llvm.org/git/clang.git 36ba449caa88f710520cdce148457e5a75e9dabc) (http://llvm.org/git/llvm.git dccade93466c50834dbaa5f4dabb81e90d768c40)"} ----
Seemingly Similar Threads
- alloca combining, not (yet) possible ?
- Expected constant simplification not happening
- Updated llc does not compile my .ll files any more [addrspace on AVR problem?]
- Updated llc does not compile my .ll files any more [addrspace on AVR problem?]
- Expected constant simplification not happening