Hello all, My app is using LLVM JIT as a runtime engine for image analysis (similar to opencl). I'm placing values into a struct to pass byref to a series of functions. After running the createStandardModulePasses(...) set of optimizations, the function calls are all inlined but the struct store/load pairs haven't been optimized away. I've attached the code below. What I'd like to see is the store / load combos to be opt to registers, and the stores with no associated loads to be removed so further optimizations can be performed. Thanks for your help, mike -- Mike Woodworth mike at divergentmedia.com The code (after optimizations): ModuleID = '/Users/mike/Desktop/Development/ScopeBox3/build/Beta Release/ScopeBox.app/Contents/Resources/Main2vuy.i386kernel' target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128-n8:16:32" target triple = "i386-apple-darwin9.0.0" %struct.AKEKernelContext = type { i32, i32, i32, i32, i32, i32, i32, i32, i32 } define void @Main(i8* nocapture %inputBuffer, i32 %threadSliceNum) nounwind { entry: %.compoundliteral = alloca %struct.AKEKernelContext, align 4 %x139 = getelementptr inbounds %struct.AKEKernelContext* %.compoundliteral, i32 0, i32 0 %y141 = getelementptr inbounds %struct.AKEKernelContext* %.compoundliteral, i32 0, i32 1 %yVal = getelementptr inbounds %struct.AKEKernelContext* %.compoundliteral, i32 0, i32 2 %crVal = getelementptr inbounds %struct.AKEKernelContext* %.compoundliteral, i32 0, i32 3 %cbVal = getelementptr inbounds %struct.AKEKernelContext* %.compoundliteral, i32 0, i32 4 %rVal = getelementptr inbounds %struct.AKEKernelContext* %.compoundliteral, i32 0, i32 5 %gVal = getelementptr inbounds %struct.AKEKernelContext* %.compoundliteral, i32 0, i32 6 %bVal = getelementptr inbounds %struct.AKEKernelContext* %.compoundliteral, i32 0, i32 7 %threadID = getelementptr inbounds %struct.AKEKernelContext* %.compoundliteral, i32 0, i32 8 %.idx.i = getelementptr %struct.AKEKernelContext* %.compoundliteral, i64 0, i32 7 br label %for.cond21.preheader for.cond21.preheader: ; preds = %for.inc152, %entry %indvar10 = phi i64 [ %indvar.next11, %for.inc152 ], [ 0, %entry ] %tmp20 = mul i64 %indvar10, 3840 %tmp2227 = or i64 %tmp20, 1 %tmp2428 = or i64 %tmp20, 2 %tmp26 = trunc i64 %indvar10 to i32 br label %for.body25 for.body25: ; preds = %for.body25, %for.cond21.preheader %indvar4 = phi i64 [ 0, %for.cond21.preheader ], [ %indvar.next5, %for.body25 ] %tmp19 = shl i64 %indvar4, 2 %tmp21 = add i64 %tmp20, %tmp19 %arrayidx = getelementptr i8* %inputBuffer, i64 %tmp21 %tmp23 = add i64 %tmp2227, %tmp19 %arrayidx42 = getelementptr i8* %inputBuffer, i64 %tmp23 %tmp25 = add i64 %tmp2428, %tmp19 %arrayidx49 = getelementptr i8* %inputBuffer, i64 %tmp25 %tmp7 = shl i64 %indvar4, 1 %tmp = trunc i64 %tmp7 to i32 %tmp37 = load i8* %arrayidx, align 1, !tbaa !0 %conv = zext i8 %tmp37 to i32 %tmp43 = load i8* %arrayidx42, align 1, !tbaa !0 %conv44 = zext i8 %tmp43 to i32 %tmp50 = load i8* %arrayidx49, align 1, !tbaa !0 %conv51 = zext i8 %tmp50 to i32 %sub.i183 = add nsw i32 %conv, -128 %sub6.i184 = add nsw i32 %conv51, -128 %tmp.i185 = mul i32 %conv44, 298 %mul17.i186 = mul nsw i32 %sub6.i184, 459 %add18.i187 = add i32 %tmp.i185, -4640 %add19.i188 = add i32 %mul17.i186, %add18.i187 %shr.i189 = ashr i32 %add19.i188, 8 %mul26.i190 = mul nsw i32 %sub.i183, -55 %mul30.i191 = mul nsw i32 %sub6.i184, -136 %add31.i192 = add i32 %add18.i187, %mul26.i190 %add32.i193 = add i32 %add31.i192, %mul30.i191 %shr33.i194 = ashr i32 %add32.i193, 8 %mul40.i195 = mul nsw i32 %sub.i183, 541 %add46.i196 = add i32 %add18.i187, %mul40.i195 %shr47.i197 = ashr i32 %add46.i196, 8 store i32 %tmp, i32* %x139, align 4, !tbaa !2 /// these stores should be removed store i32 %tmp26, i32* %y141, align 4, !tbaa !2 store i32 %conv44, i32* %yVal, align 4, !tbaa !2 store i32 %conv, i32* %crVal, align 4, !tbaa !2 store i32 %conv51, i32* %cbVal, align 4, !tbaa !2 store i32 %shr.i189, i32* %rVal, align 4, !tbaa !2 store i32 %shr33.i194, i32* %gVal, align 4, !tbaa !2 store i32 %shr47.i197, i32* %bVal, align 4, !tbaa !2 store i32 %threadSliceNum, i32* %threadID, align 4, !tbaa !2 %.idx.val.i = load i32* %.idx.i, align 4, !tbaa !2 /// and this load replaced by the original reg (%shr47.i197 above) %arrayidx.i.i.i = getelementptr inbounds i32* inttoptr (i32 33807872 to i32*), i32 %.idx.val.i %tmp6.i.i.i = load i32* %arrayidx.i.i.i, align 4, !tbaa !2 %inc.i.i.i = add i32 %tmp6.i.i.i, 1 store i32 %inc.i.i.i, i32* %arrayidx.i.i.i, align 4, !tbaa !2 %arrayidx.i.i6.i = getelementptr inbounds i32* inttoptr (i32 33812992 to i32*), i32 %.idx.val.i %tmp6.i.i7.i = load i32* %arrayidx.i.i6.i, align 4, !tbaa !2 %inc.i.i8.i = add i32 %tmp6.i.i7.i, 1 store i32 %inc.i.i8.i, i32* %arrayidx.i.i6.i, align 4, !tbaa !2 %arrayidx.i.i3.i = getelementptr inbounds i32* inttoptr (i32 33818112 to i32*), i32 %.idx.val.i %tmp6.i.i4.i = load i32* %arrayidx.i.i3.i, align 4, !tbaa !2 %inc.i.i5.i = add i32 %tmp6.i.i4.i, 1 store i32 %inc.i.i5.i, i32* %arrayidx.i.i3.i, align 4, !tbaa !2 %indvar.next5 = add i64 %indvar4, 1 %exitcond6 = icmp eq i64 %indvar.next5, 960 br i1 %exitcond6, label %for.inc152, label %for.body25 for.inc152: ; preds = %for.body25 %indvar.next11 = add i64 %indvar10, 1 %exitcond18 = icmp eq i64 %indvar.next11, 1080 br i1 %exitcond18, label %for.end154, label %for.cond21.preheader for.end154: ; preds = %for.inc152 ret void } !0 = metadata !{metadata !"omnipotent char", metadata !1} !1 = metadata !{metadata !"Simple C/C++ TBAA", null} !2 = metadata !{metadata !"int", metadata !0}