Hello since my broad RFC request didn't catch any responses, let me get a bit more into the nitty-gritty: I tried to get llvm (3.7) to optimize superflous allocas away, but so far I haven't figured out how. Is there no optimizer for this ? Should I adorn something with some attributes ? As far as I can tell no, but I am no llvm expert... For what I want to do, i will probably have a few dozen combinable allocas in my produced code each function. So it would be worthwhile to reduce stack space needs. Ciao Nat! --- %struct.a_b = type { i32, i32 } ; Function Attrs: nounwind ssp uwtable define void @g() #0 { entry: %x = alloca %struct.a_b, align 4 %y = alloca %struct.a_b, align 4 %a = getelementptr inbounds %struct.a_b, %struct.a_b* %x, i32 0, i32 0 store i32 1, i32* %a, align 4 %b = getelementptr inbounds %struct.a_b, %struct.a_b* %x, i32 0, i32 1 store i32 2, i32* %b, align 4 call void @f(%struct.a_b* %x) %a1 = getelementptr inbounds %struct.a_b, %struct.a_b* %y, i32 0, i32 0 store i32 1, i32* %a1, align 4 %b2 = getelementptr inbounds %struct.a_b, %struct.a_b* %y, i32 0, i32 1 store i32 3, i32* %b2, align 4 call void @f(%struct.a_b* %y) ret void } --- produces the following x86_64-apple-macosx10.10.0 output (with -O4) _g: 0000000000000000 pushq %rbp 0000000000000001 movq %rsp, %rbp 0000000000000004 subq $0x10, %rsp 0000000000000008 movabsq $0x200000001, %rax ## imm = 0x200000001 0000000000000012 movq %rax, -0x8(%rbp) 0000000000000016 leaq -0x8(%rbp), %rdi --------------------------------^^^^ 000000000000001a callq 0x1f 000000000000001f movabsq $0x300000001, %rax ## imm = 0x300000001 0000000000000029 movq %rax, -0x10(%rbp) 000000000000002d leaq -0x10(%rbp), %rdi --------------------------------^^^^^ 0000000000000031 callq 0x36 0000000000000036 addq $0x10, %rsp 000000000000003a popq %rbp 000000000000003b retq 000000000000003c nopl (%rax) void g( void) { struct a_b x; struct a_b y; x.a = 1; x.b = 2; f( &x); // x no longer needed // expect y to reuse x space y.a = 1; y.b = 3; f( &y); }
Caldarale, Charles R via llvm-dev
2015-Aug-31 12:30 UTC
[llvm-dev] alloca combining, not (yet) possible ?
> From: llvm-dev [mailto:llvm-dev-bounces at lists.llvm.org] On Behalf Of Nat! via llvm-dev > Subject: [llvm-dev] alloca combining, not (yet) possible ?> I tried to get llvm (3.7) to optimize superflous allocas away, but > so far I haven't figured out how. Should I adorn something with > some attributes ?Yes.> void g( void) > { > struct a_b x; > struct a_b y; > > x.a = 1; > x.b = 2; > f( &x); > > // x no longer needed > // expect y to reuse x space > y.a = 1; > y.b = 3; > f( &y); > }You have not provided us with the declaration for f(). Unless its argument is marked with the nocapture attribute, the compilation of g() cannot assume that f() has not retained a pointer to the x struct and is using it in the second call. - Chuck
Caldarale, Charles R schrieb:> You have not provided us with the declaration for f(). Unless its argument is marked with the nocapture attribute, the compilation of g() cannot assume that f() has not retained a pointer to the x struct and is using it in the second call. >thanks a lot for the input. Yes, I forgot to that. The C function declaration would have been void f( struct a_b *p); which compiled into declare void @f(%struct.a_b*) #2 with attributes #2 = { "disable-tail-calls"="false" "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="core2" "target-features"="+cx16,+sse,+sse2,+sse3,+ssse3" "unsafe-fp-math"="false" "use-soft-float"="false" } --- I could not figure out how to decorate my C code to emit the nocapture attribute, __attribute(( nocapture) is unknown. So I tried to modify the IR code by hand to read thusly: declare void @f(%struct.a_b* nocapture) #1 But in the end, it didn't make a difference, when I compiled it with ../llvm-build.d/bin/llc -O3 -o test-combine-alloca.s test-combine-alloca.ir it still used two allocas. From a C perspective, I find it weird, that it should concern the caller if the called function "mistakenly" holds onto an alloca buffer, that will be invalid soon anyway. But I guess that's C++ magic somehow :) Ciao Nat! ---- ; ModuleID = 'test-combine-alloca.c' target datalayout = "e-m:o-i64:64-f80:128-n8:16:32:64-S128" target triple = "x86_64-apple-macosx10.10.0" %struct.a_b = type { i32, i32 } declare void @f(%struct.a_b* nocapture) #1 ; Function Attrs: nounwind ssp uwtable define void @g() #0 { entry: %x = alloca %struct.a_b, align 4 %y = alloca %struct.a_b, align 4 %a = getelementptr inbounds %struct.a_b, %struct.a_b* %x, i32 0, i32 0 store i32 1, i32* %a, align 4 %b = getelementptr inbounds %struct.a_b, %struct.a_b* %x, i32 0, i32 1 store i32 2, i32* %b, align 4 call void @f(%struct.a_b* %x) %a1 = getelementptr inbounds %struct.a_b, %struct.a_b* %y, i32 0, i32 0 store i32 1, i32* %a1, align 4 %b2 = getelementptr inbounds %struct.a_b, %struct.a_b* %y, i32 0, i32 1 store i32 3, i32* %b2, align 4 call void @f(%struct.a_b* %y) ret void } attributes #0 = { nounwind ssp uwtable "disable-tail-calls"="false" "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="core2" "target-features"="+cx16,+sse,+sse2,+sse3,+ssse3" "unsafe-fp-math"="false" "use-soft-float"="false" } attributes #1 = { "disable-tail-calls"="false" "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="core2" "target-features"="+cx16,+sse,+sse2,+sse3,+ssse3" "unsafe-fp-math"="false" "use-soft-float"="false" } !llvm.module.flags = !{!0} !llvm.ident = !{!1} !0 = !{i32 1, !"PIC Level", i32 2} !1 = !{!"clang version 3.7.0 (http://llvm.org/git/clang.git 36ba449caa88f710520cdce148457e5a75e9dabc) (http://llvm.org/git/llvm.git dccade93466c50834dbaa5f4dabb81e90d768c40)"} ----
Possibly Parallel Threads
- alloca combining, not (yet) possible ?
- Expected constant simplification not happening
- Updated llc does not compile my .ll files any more [addrspace on AVR problem?]
- Updated llc does not compile my .ll files any more [addrspace on AVR problem?]
- Expected constant simplification not happening