Hello all,
My app is using LLVM JIT as a runtime engine for image analysis (similar to
opencl). I'm placing values into a struct to pass byref to a series of
functions. After running the createStandardModulePasses(...) set of
optimizations, the function calls are all inlined but the struct store/load
pairs haven't been optimized away. I've attached the code below. What
I'd like to see is the store / load combos to be opt to registers, and the
stores with no associated loads to be removed so further optimizations can be
performed.
Thanks for your help,
mike
--
Mike Woodworth
mike at divergentmedia.com
The code (after optimizations):
ModuleID = '/Users/mike/Desktop/Development/ScopeBox3/build/Beta
Release/ScopeBox.app/Contents/Resources/Main2vuy.i386kernel'
target datalayout =
"e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128-n8:16:32"
target triple = "i386-apple-darwin9.0.0"
%struct.AKEKernelContext = type { i32, i32, i32, i32, i32, i32, i32, i32, i32 }
define void @Main(i8* nocapture %inputBuffer, i32 %threadSliceNum) nounwind {
entry:
  %.compoundliteral = alloca %struct.AKEKernelContext, align 4
  %x139 = getelementptr inbounds %struct.AKEKernelContext* %.compoundliteral,
i32 0, i32 0
  %y141 = getelementptr inbounds %struct.AKEKernelContext* %.compoundliteral,
i32 0, i32 1
  %yVal = getelementptr inbounds %struct.AKEKernelContext* %.compoundliteral,
i32 0, i32 2
  %crVal = getelementptr inbounds %struct.AKEKernelContext* %.compoundliteral,
i32 0, i32 3
  %cbVal = getelementptr inbounds %struct.AKEKernelContext* %.compoundliteral,
i32 0, i32 4
  %rVal = getelementptr inbounds %struct.AKEKernelContext* %.compoundliteral,
i32 0, i32 5
  %gVal = getelementptr inbounds %struct.AKEKernelContext* %.compoundliteral,
i32 0, i32 6
  %bVal = getelementptr inbounds %struct.AKEKernelContext* %.compoundliteral,
i32 0, i32 7
  %threadID = getelementptr inbounds %struct.AKEKernelContext*
%.compoundliteral, i32 0, i32 8
  %.idx.i = getelementptr %struct.AKEKernelContext* %.compoundliteral, i64 0,
i32 7
  br label %for.cond21.preheader
for.cond21.preheader:                             ; preds = %for.inc152, %entry
  %indvar10 = phi i64 [ %indvar.next11, %for.inc152 ], [ 0, %entry ]
  %tmp20 = mul i64 %indvar10, 3840
  %tmp2227 = or i64 %tmp20, 1
  %tmp2428 = or i64 %tmp20, 2
  %tmp26 = trunc i64 %indvar10 to i32
  br label %for.body25
for.body25:                                       ; preds = %for.body25,
%for.cond21.preheader
  %indvar4 = phi i64 [ 0, %for.cond21.preheader ], [ %indvar.next5, %for.body25
]
  %tmp19 = shl i64 %indvar4, 2
  %tmp21 = add i64 %tmp20, %tmp19
  %arrayidx = getelementptr i8* %inputBuffer, i64 %tmp21
  %tmp23 = add i64 %tmp2227, %tmp19
  %arrayidx42 = getelementptr i8* %inputBuffer, i64 %tmp23
  %tmp25 = add i64 %tmp2428, %tmp19
  %arrayidx49 = getelementptr i8* %inputBuffer, i64 %tmp25
  %tmp7 = shl i64 %indvar4, 1
  %tmp = trunc i64 %tmp7 to i32
  %tmp37 = load i8* %arrayidx, align 1, !tbaa !0
  %conv = zext i8 %tmp37 to i32
  %tmp43 = load i8* %arrayidx42, align 1, !tbaa !0
  %conv44 = zext i8 %tmp43 to i32
  %tmp50 = load i8* %arrayidx49, align 1, !tbaa !0
  %conv51 = zext i8 %tmp50 to i32
  %sub.i183 = add nsw i32 %conv, -128
  %sub6.i184 = add nsw i32 %conv51, -128
  %tmp.i185 = mul i32 %conv44, 298
  %mul17.i186 = mul nsw i32 %sub6.i184, 459
  %add18.i187 = add i32 %tmp.i185, -4640
  %add19.i188 = add i32 %mul17.i186, %add18.i187
  %shr.i189 = ashr i32 %add19.i188, 8
  %mul26.i190 = mul nsw i32 %sub.i183, -55
  %mul30.i191 = mul nsw i32 %sub6.i184, -136
  %add31.i192 = add i32 %add18.i187, %mul26.i190
  %add32.i193 = add i32 %add31.i192, %mul30.i191
  %shr33.i194 = ashr i32 %add32.i193, 8
  %mul40.i195 = mul nsw i32 %sub.i183, 541
  %add46.i196 = add i32 %add18.i187, %mul40.i195
  %shr47.i197 = ashr i32 %add46.i196, 8
  store i32 %tmp, i32* %x139, align 4, !tbaa !2
  store i32 %tmp26, i32* %y141, align 4, !tbaa !2
  store i32 %conv44, i32* %yVal, align 4, !tbaa !2
  store i32 %conv, i32* %crVal, align 4, !tbaa !2
  store i32 %conv51, i32* %cbVal, align 4, !tbaa !2
  store i32 %shr.i189, i32* %rVal, align 4, !tbaa !2
  store i32 %shr33.i194, i32* %gVal, align 4, !tbaa !2
  store i32 %shr47.i197, i32* %bVal, align 4, !tbaa !2
  store i32 %threadSliceNum, i32* %threadID, align 4, !tbaa !2
  %.idx.val.i = load i32* %.idx.i, align 4, !tbaa !2
  %arrayidx.i.i.i = getelementptr inbounds i32* inttoptr (i32 33807872 to i32*),
i32 %.idx.val.i
  %tmp6.i.i.i = load i32* %arrayidx.i.i.i, align 4, !tbaa !2
  %inc.i.i.i = add i32 %tmp6.i.i.i, 1
  store i32 %inc.i.i.i, i32* %arrayidx.i.i.i, align 4, !tbaa !2
  %arrayidx.i.i6.i = getelementptr inbounds i32* inttoptr (i32 33812992 to
i32*), i32 %.idx.val.i
  %tmp6.i.i7.i = load i32* %arrayidx.i.i6.i, align 4, !tbaa !2
  %inc.i.i8.i = add i32 %tmp6.i.i7.i, 1
  store i32 %inc.i.i8.i, i32* %arrayidx.i.i6.i, align 4, !tbaa !2
  %arrayidx.i.i3.i = getelementptr inbounds i32* inttoptr (i32 33818112 to
i32*), i32 %.idx.val.i
  %tmp6.i.i4.i = load i32* %arrayidx.i.i3.i, align 4, !tbaa !2
  %inc.i.i5.i = add i32 %tmp6.i.i4.i, 1
  store i32 %inc.i.i5.i, i32* %arrayidx.i.i3.i, align 4, !tbaa !2
  %indvar.next5 = add i64 %indvar4, 1
  %exitcond6 = icmp eq i64 %indvar.next5, 960
  br i1 %exitcond6, label %for.inc152, label %for.body25
for.inc152:                                       ; preds = %for.body25
  %indvar.next11 = add i64 %indvar10, 1
  %exitcond18 = icmp eq i64 %indvar.next11, 1080
  br i1 %exitcond18, label %for.end154, label %for.cond21.preheader
for.end154:                                       ; preds = %for.inc152
  ret void
}
!0 = metadata !{metadata !"omnipotent char", metadata !1}
!1 = metadata !{metadata !"Simple C/C++ TBAA", null}
!2 = metadata !{metadata !"int", metadata !0}