Vincent Lejeune
2012-Oct-20 20:23 UTC
[LLVMdev] RegisterCoalescing pass crashes with ImplicitDef registers
Hi, below is an output of "llc -march=r600 -mcpu=cayman -print-before-all -debug-only=regalloc file.shader" command from llvm3.2svn. The register coalescing pass crashes when joining vreg12:sel_z with vreg13 registers, because it tries to access the interval liveness of vreg13... which is undefined. I don't know if it's a bug of the pass, or if my backend should do something specific before calling the pass. It worked with llvm 3.1, I don't know if there was a requirement introduced between 3.1 and current trunk related to register coalescing. Regards, Vincent *** IR Dump Before Preliminary module verification *** define void @main() { call void @llvm.AMDGPU.reserve.reg(i32 0) call void @llvm.AMDGPU.reserve.reg(i32 1) call void @llvm.AMDGPU.reserve.reg(i32 2) call void @llvm.AMDGPU.reserve.reg(i32 3) %1 = call float @llvm.R600.load.input(i32 4) %2 = insertelement <4 x float> undef, float %1, i32 0 %3 = call float @llvm.R600.load.input(i32 5) %4 = insertelement <4 x float> %2, float %3, i32 1 %5 = call float @llvm.R600.load.input(i32 6) %6 = insertelement <4 x float> %4, float %5, i32 2 %7 = call float @llvm.R600.load.input(i32 7) %8 = insertelement <4 x float> %6, float %7, i32 3 %9 = shufflevector <4 x float> undef, <4 x float> %8, <4 x i32> <i32 4, i32 5, i32 6, i32 7> %10 = shufflevector <4 x float> %8, <4 x float> %8, <2 x i32> <i32 0, i32 1> %11 = shufflevector <2 x float> %10, <2 x float> %10, <4 x i32> <i32 0, i32 1, i32 undef, i32 undef> %12 = shufflevector <4 x float> undef, <4 x float> %11, <4 x i32> <i32 4, i32 5, i32 2, i32 3> %13 = fsub <2 x float> zeroinitializer, %10 %14 = shufflevector <2 x float> %13, <2 x float> %13, <4 x i32> <i32 0, i32 1, i32 undef, i32 undef> %15 = shufflevector <4 x float> %12, <4 x float> %14, <4 x i32> <i32 0, i32 1, i32 4, i32 5> %16 = shufflevector <4 x float> undef, <4 x float> %15, <4 x i32> <i32 4, i32 5, i32 6, i32 7> %17 = extractelement <4 x float> %16, i32 0 call void @llvm.AMDGPU.store.output(float %17, i32 8) %18 = extractelement <4 x float> %16, i32 1 call void @llvm.AMDGPU.store.output(float %18, i32 9) %19 = extractelement <4 x float> %16, i32 2 call void @llvm.AMDGPU.store.output(float %19, i32 10) %20 = extractelement <4 x float> %16, i32 3 call void @llvm.AMDGPU.store.output(float %20, i32 11) %21 = extractelement <4 x float> %9, i32 0 call void @llvm.AMDGPU.store.output(float %21, i32 4) %22 = extractelement <4 x float> %9, i32 1 call void @llvm.AMDGPU.store.output(float %22, i32 5) %23 = extractelement <4 x float> %9, i32 2 call void @llvm.AMDGPU.store.output(float %23, i32 6) %24 = extractelement <4 x float> %9, i32 3 call void @llvm.AMDGPU.store.output(float %24, i32 7) ret void } *** IR Dump Before Module Verifier *** define void @main() { call void @llvm.AMDGPU.reserve.reg(i32 0) call void @llvm.AMDGPU.reserve.reg(i32 1) call void @llvm.AMDGPU.reserve.reg(i32 2) call void @llvm.AMDGPU.reserve.reg(i32 3) %1 = call float @llvm.R600.load.input(i32 4) %2 = insertelement <4 x float> undef, float %1, i32 0 %3 = call float @llvm.R600.load.input(i32 5) %4 = insertelement <4 x float> %2, float %3, i32 1 %5 = call float @llvm.R600.load.input(i32 6) %6 = insertelement <4 x float> %4, float %5, i32 2 %7 = call float @llvm.R600.load.input(i32 7) %8 = insertelement <4 x float> %6, float %7, i32 3 %9 = shufflevector <4 x float> undef, <4 x float> %8, <4 x i32> <i32 4, i32 5, i32 6, i32 7> %10 = shufflevector <4 x float> %8, <4 x float> %8, <2 x i32> <i32 0, i32 1> %11 = shufflevector <2 x float> %10, <2 x float> %10, <4 x i32> <i32 0, i32 1, i32 undef, i32 undef> %12 = shufflevector <4 x float> undef, <4 x float> %11, <4 x i32> <i32 4, i32 5, i32 2, i32 3> %13 = fsub <2 x float> zeroinitializer, %10 %14 = shufflevector <2 x float> %13, <2 x float> %13, <4 x i32> <i32 0, i32 1, i32 undef, i32 undef> %15 = shufflevector <4 x float> %12, <4 x float> %14, <4 x i32> <i32 0, i32 1, i32 4, i32 5> %16 = shufflevector <4 x float> undef, <4 x float> %15, <4 x i32> <i32 4, i32 5, i32 6, i32 7> %17 = extractelement <4 x float> %16, i32 0 call void @llvm.AMDGPU.store.output(float %17, i32 8) %18 = extractelement <4 x float> %16, i32 1 call void @llvm.AMDGPU.store.output(float %18, i32 9) %19 = extractelement <4 x float> %16, i32 2 call void @llvm.AMDGPU.store.output(float %19, i32 10) %20 = extractelement <4 x float> %16, i32 3 call void @llvm.AMDGPU.store.output(float %20, i32 11) %21 = extractelement <4 x float> %9, i32 0 call void @llvm.AMDGPU.store.output(float %21, i32 4) %22 = extractelement <4 x float> %9, i32 1 call void @llvm.AMDGPU.store.output(float %22, i32 5) %23 = extractelement <4 x float> %9, i32 2 call void @llvm.AMDGPU.store.output(float %23, i32 6) %24 = extractelement <4 x float> %9, i32 3 call void @llvm.AMDGPU.store.output(float %24, i32 7) ret void } *** IR Dump Before Lower Garbage Collection Instructions *** define void @main() { call void @llvm.AMDGPU.reserve.reg(i32 0) call void @llvm.AMDGPU.reserve.reg(i32 1) call void @llvm.AMDGPU.reserve.reg(i32 2) call void @llvm.AMDGPU.reserve.reg(i32 3) %1 = call float @llvm.R600.load.input(i32 4) %2 = insertelement <4 x float> undef, float %1, i32 0 %3 = call float @llvm.R600.load.input(i32 5) %4 = insertelement <4 x float> %2, float %3, i32 1 %5 = call float @llvm.R600.load.input(i32 6) %6 = insertelement <4 x float> %4, float %5, i32 2 %7 = call float @llvm.R600.load.input(i32 7) %8 = insertelement <4 x float> %6, float %7, i32 3 %9 = shufflevector <4 x float> undef, <4 x float> %8, <4 x i32> <i32 4, i32 5, i32 6, i32 7> %10 = shufflevector <4 x float> %8, <4 x float> %8, <2 x i32> <i32 0, i32 1> %11 = shufflevector <2 x float> %10, <2 x float> %10, <4 x i32> <i32 0, i32 1, i32 undef, i32 undef> %12 = shufflevector <4 x float> undef, <4 x float> %11, <4 x i32> <i32 4, i32 5, i32 2, i32 3> %13 = fsub <2 x float> zeroinitializer, %10 %14 = shufflevector <2 x float> %13, <2 x float> %13, <4 x i32> <i32 0, i32 1, i32 undef, i32 undef> %15 = shufflevector <4 x float> %12, <4 x float> %14, <4 x i32> <i32 0, i32 1, i32 4, i32 5> %16 = shufflevector <4 x float> undef, <4 x float> %15, <4 x i32> <i32 4, i32 5, i32 6, i32 7> %17 = extractelement <4 x float> %16, i32 0 call void @llvm.AMDGPU.store.output(float %17, i32 8) %18 = extractelement <4 x float> %16, i32 1 call void @llvm.AMDGPU.store.output(float %18, i32 9) %19 = extractelement <4 x float> %16, i32 2 call void @llvm.AMDGPU.store.output(float %19, i32 10) %20 = extractelement <4 x float> %16, i32 3 call void @llvm.AMDGPU.store.output(float %20, i32 11) %21 = extractelement <4 x float> %9, i32 0 call void @llvm.AMDGPU.store.output(float %21, i32 4) %22 = extractelement <4 x float> %9, i32 1 call void @llvm.AMDGPU.store.output(float %22, i32 5) %23 = extractelement <4 x float> %9, i32 2 call void @llvm.AMDGPU.store.output(float %23, i32 6) %24 = extractelement <4 x float> %9, i32 3 call void @llvm.AMDGPU.store.output(float %24, i32 7) ret void } *** IR Dump Before Remove unreachable blocks from the CFG *** define void @main() { call void @llvm.AMDGPU.reserve.reg(i32 0) call void @llvm.AMDGPU.reserve.reg(i32 1) call void @llvm.AMDGPU.reserve.reg(i32 2) call void @llvm.AMDGPU.reserve.reg(i32 3) %1 = call float @llvm.R600.load.input(i32 4) %2 = insertelement <4 x float> undef, float %1, i32 0 %3 = call float @llvm.R600.load.input(i32 5) %4 = insertelement <4 x float> %2, float %3, i32 1 %5 = call float @llvm.R600.load.input(i32 6) %6 = insertelement <4 x float> %4, float %5, i32 2 %7 = call float @llvm.R600.load.input(i32 7) %8 = insertelement <4 x float> %6, float %7, i32 3 %9 = shufflevector <4 x float> undef, <4 x float> %8, <4 x i32> <i32 4, i32 5, i32 6, i32 7> %10 = shufflevector <4 x float> %8, <4 x float> %8, <2 x i32> <i32 0, i32 1> %11 = shufflevector <2 x float> %10, <2 x float> %10, <4 x i32> <i32 0, i32 1, i32 undef, i32 undef> %12 = shufflevector <4 x float> undef, <4 x float> %11, <4 x i32> <i32 4, i32 5, i32 2, i32 3> %13 = fsub <2 x float> zeroinitializer, %10 %14 = shufflevector <2 x float> %13, <2 x float> %13, <4 x i32> <i32 0, i32 1, i32 undef, i32 undef> %15 = shufflevector <4 x float> %12, <4 x float> %14, <4 x i32> <i32 0, i32 1, i32 4, i32 5> %16 = shufflevector <4 x float> undef, <4 x float> %15, <4 x i32> <i32 4, i32 5, i32 6, i32 7> %17 = extractelement <4 x float> %16, i32 0 call void @llvm.AMDGPU.store.output(float %17, i32 8) %18 = extractelement <4 x float> %16, i32 1 call void @llvm.AMDGPU.store.output(float %18, i32 9) %19 = extractelement <4 x float> %16, i32 2 call void @llvm.AMDGPU.store.output(float %19, i32 10) %20 = extractelement <4 x float> %16, i32 3 call void @llvm.AMDGPU.store.output(float %20, i32 11) %21 = extractelement <4 x float> %9, i32 0 call void @llvm.AMDGPU.store.output(float %21, i32 4) %22 = extractelement <4 x float> %9, i32 1 call void @llvm.AMDGPU.store.output(float %22, i32 5) %23 = extractelement <4 x float> %9, i32 2 call void @llvm.AMDGPU.store.output(float %23, i32 6) %24 = extractelement <4 x float> %9, i32 3 call void @llvm.AMDGPU.store.output(float %24, i32 7) ret void } *** IR Dump Before Lower invoke and unwind, for unwindless code generators *** define void @main() { call void @llvm.AMDGPU.reserve.reg(i32 0) call void @llvm.AMDGPU.reserve.reg(i32 1) call void @llvm.AMDGPU.reserve.reg(i32 2) call void @llvm.AMDGPU.reserve.reg(i32 3) %1 = call float @llvm.R600.load.input(i32 4) %2 = insertelement <4 x float> undef, float %1, i32 0 %3 = call float @llvm.R600.load.input(i32 5) %4 = insertelement <4 x float> %2, float %3, i32 1 %5 = call float @llvm.R600.load.input(i32 6) %6 = insertelement <4 x float> %4, float %5, i32 2 %7 = call float @llvm.R600.load.input(i32 7) %8 = insertelement <4 x float> %6, float %7, i32 3 %9 = shufflevector <4 x float> undef, <4 x float> %8, <4 x i32> <i32 4, i32 5, i32 6, i32 7> %10 = shufflevector <4 x float> %8, <4 x float> %8, <2 x i32> <i32 0, i32 1> %11 = shufflevector <2 x float> %10, <2 x float> %10, <4 x i32> <i32 0, i32 1, i32 undef, i32 undef> %12 = shufflevector <4 x float> undef, <4 x float> %11, <4 x i32> <i32 4, i32 5, i32 2, i32 3> %13 = fsub <2 x float> zeroinitializer, %10 %14 = shufflevector <2 x float> %13, <2 x float> %13, <4 x i32> <i32 0, i32 1, i32 undef, i32 undef> %15 = shufflevector <4 x float> %12, <4 x float> %14, <4 x i32> <i32 0, i32 1, i32 4, i32 5> %16 = shufflevector <4 x float> undef, <4 x float> %15, <4 x i32> <i32 4, i32 5, i32 6, i32 7> %17 = extractelement <4 x float> %16, i32 0 call void @llvm.AMDGPU.store.output(float %17, i32 8) %18 = extractelement <4 x float> %16, i32 1 call void @llvm.AMDGPU.store.output(float %18, i32 9) %19 = extractelement <4 x float> %16, i32 2 call void @llvm.AMDGPU.store.output(float %19, i32 10) %20 = extractelement <4 x float> %16, i32 3 call void @llvm.AMDGPU.store.output(float %20, i32 11) %21 = extractelement <4 x float> %9, i32 0 call void @llvm.AMDGPU.store.output(float %21, i32 4) %22 = extractelement <4 x float> %9, i32 1 call void @llvm.AMDGPU.store.output(float %22, i32 5) %23 = extractelement <4 x float> %9, i32 2 call void @llvm.AMDGPU.store.output(float %23, i32 6) %24 = extractelement <4 x float> %9, i32 3 call void @llvm.AMDGPU.store.output(float %24, i32 7) ret void } *** IR Dump Before Remove unreachable blocks from the CFG *** define void @main() { call void @llvm.AMDGPU.reserve.reg(i32 0) call void @llvm.AMDGPU.reserve.reg(i32 1) call void @llvm.AMDGPU.reserve.reg(i32 2) call void @llvm.AMDGPU.reserve.reg(i32 3) %1 = call float @llvm.R600.load.input(i32 4) %2 = insertelement <4 x float> undef, float %1, i32 0 %3 = call float @llvm.R600.load.input(i32 5) %4 = insertelement <4 x float> %2, float %3, i32 1 %5 = call float @llvm.R600.load.input(i32 6) %6 = insertelement <4 x float> %4, float %5, i32 2 %7 = call float @llvm.R600.load.input(i32 7) %8 = insertelement <4 x float> %6, float %7, i32 3 %9 = shufflevector <4 x float> undef, <4 x float> %8, <4 x i32> <i32 4, i32 5, i32 6, i32 7> %10 = shufflevector <4 x float> %8, <4 x float> %8, <2 x i32> <i32 0, i32 1> %11 = shufflevector <2 x float> %10, <2 x float> %10, <4 x i32> <i32 0, i32 1, i32 undef, i32 undef> %12 = shufflevector <4 x float> undef, <4 x float> %11, <4 x i32> <i32 4, i32 5, i32 2, i32 3> %13 = fsub <2 x float> zeroinitializer, %10 %14 = shufflevector <2 x float> %13, <2 x float> %13, <4 x i32> <i32 0, i32 1, i32 undef, i32 undef> %15 = shufflevector <4 x float> %12, <4 x float> %14, <4 x i32> <i32 0, i32 1, i32 4, i32 5> %16 = shufflevector <4 x float> undef, <4 x float> %15, <4 x i32> <i32 4, i32 5, i32 6, i32 7> %17 = extractelement <4 x float> %16, i32 0 call void @llvm.AMDGPU.store.output(float %17, i32 8) %18 = extractelement <4 x float> %16, i32 1 call void @llvm.AMDGPU.store.output(float %18, i32 9) %19 = extractelement <4 x float> %16, i32 2 call void @llvm.AMDGPU.store.output(float %19, i32 10) %20 = extractelement <4 x float> %16, i32 3 call void @llvm.AMDGPU.store.output(float %20, i32 11) %21 = extractelement <4 x float> %9, i32 0 call void @llvm.AMDGPU.store.output(float %21, i32 4) %22 = extractelement <4 x float> %9, i32 1 call void @llvm.AMDGPU.store.output(float %22, i32 5) %23 = extractelement <4 x float> %9, i32 2 call void @llvm.AMDGPU.store.output(float %23, i32 6) %24 = extractelement <4 x float> %9, i32 3 call void @llvm.AMDGPU.store.output(float %24, i32 7) ret void } *** IR Dump Before Optimize for code generation *** define void @main() { call void @llvm.AMDGPU.reserve.reg(i32 0) call void @llvm.AMDGPU.reserve.reg(i32 1) call void @llvm.AMDGPU.reserve.reg(i32 2) call void @llvm.AMDGPU.reserve.reg(i32 3) %1 = call float @llvm.R600.load.input(i32 4) %2 = insertelement <4 x float> undef, float %1, i32 0 %3 = call float @llvm.R600.load.input(i32 5) %4 = insertelement <4 x float> %2, float %3, i32 1 %5 = call float @llvm.R600.load.input(i32 6) %6 = insertelement <4 x float> %4, float %5, i32 2 %7 = call float @llvm.R600.load.input(i32 7) %8 = insertelement <4 x float> %6, float %7, i32 3 %9 = shufflevector <4 x float> undef, <4 x float> %8, <4 x i32> <i32 4, i32 5, i32 6, i32 7> %10 = shufflevector <4 x float> %8, <4 x float> %8, <2 x i32> <i32 0, i32 1> %11 = shufflevector <2 x float> %10, <2 x float> %10, <4 x i32> <i32 0, i32 1, i32 undef, i32 undef> %12 = shufflevector <4 x float> undef, <4 x float> %11, <4 x i32> <i32 4, i32 5, i32 2, i32 3> %13 = fsub <2 x float> zeroinitializer, %10 %14 = shufflevector <2 x float> %13, <2 x float> %13, <4 x i32> <i32 0, i32 1, i32 undef, i32 undef> %15 = shufflevector <4 x float> %12, <4 x float> %14, <4 x i32> <i32 0, i32 1, i32 4, i32 5> %16 = shufflevector <4 x float> undef, <4 x float> %15, <4 x i32> <i32 4, i32 5, i32 6, i32 7> %17 = extractelement <4 x float> %16, i32 0 call void @llvm.AMDGPU.store.output(float %17, i32 8) %18 = extractelement <4 x float> %16, i32 1 call void @llvm.AMDGPU.store.output(float %18, i32 9) %19 = extractelement <4 x float> %16, i32 2 call void @llvm.AMDGPU.store.output(float %19, i32 10) %20 = extractelement <4 x float> %16, i32 3 call void @llvm.AMDGPU.store.output(float %20, i32 11) %21 = extractelement <4 x float> %9, i32 0 call void @llvm.AMDGPU.store.output(float %21, i32 4) %22 = extractelement <4 x float> %9, i32 1 call void @llvm.AMDGPU.store.output(float %22, i32 5) %23 = extractelement <4 x float> %9, i32 2 call void @llvm.AMDGPU.store.output(float %23, i32 6) %24 = extractelement <4 x float> %9, i32 3 call void @llvm.AMDGPU.store.output(float %24, i32 7) ret void } *** IR Dump Before Insert stack protectors *** define void @main() { call void @llvm.AMDGPU.reserve.reg(i32 0) call void @llvm.AMDGPU.reserve.reg(i32 1) call void @llvm.AMDGPU.reserve.reg(i32 2) call void @llvm.AMDGPU.reserve.reg(i32 3) %1 = call float @llvm.R600.load.input(i32 4) %2 = insertelement <4 x float> undef, float %1, i32 0 %3 = call float @llvm.R600.load.input(i32 5) %4 = insertelement <4 x float> %2, float %3, i32 1 %5 = call float @llvm.R600.load.input(i32 6) %6 = insertelement <4 x float> %4, float %5, i32 2 %7 = call float @llvm.R600.load.input(i32 7) %8 = insertelement <4 x float> %6, float %7, i32 3 %9 = shufflevector <4 x float> undef, <4 x float> %8, <4 x i32> <i32 4, i32 5, i32 6, i32 7> %10 = shufflevector <4 x float> %8, <4 x float> %8, <2 x i32> <i32 0, i32 1> %11 = shufflevector <2 x float> %10, <2 x float> %10, <4 x i32> <i32 0, i32 1, i32 undef, i32 undef> %12 = shufflevector <4 x float> undef, <4 x float> %11, <4 x i32> <i32 4, i32 5, i32 2, i32 3> %13 = fsub <2 x float> zeroinitializer, %10 %14 = shufflevector <2 x float> %13, <2 x float> %13, <4 x i32> <i32 0, i32 1, i32 undef, i32 undef> %15 = shufflevector <4 x float> %12, <4 x float> %14, <4 x i32> <i32 0, i32 1, i32 4, i32 5> %16 = shufflevector <4 x float> undef, <4 x float> %15, <4 x i32> <i32 4, i32 5, i32 6, i32 7> %17 = extractelement <4 x float> %16, i32 0 call void @llvm.AMDGPU.store.output(float %17, i32 8) %18 = extractelement <4 x float> %16, i32 1 call void @llvm.AMDGPU.store.output(float %18, i32 9) %19 = extractelement <4 x float> %16, i32 2 call void @llvm.AMDGPU.store.output(float %19, i32 10) %20 = extractelement <4 x float> %16, i32 3 call void @llvm.AMDGPU.store.output(float %20, i32 11) %21 = extractelement <4 x float> %9, i32 0 call void @llvm.AMDGPU.store.output(float %21, i32 4) %22 = extractelement <4 x float> %9, i32 1 call void @llvm.AMDGPU.store.output(float %22, i32 5) %23 = extractelement <4 x float> %9, i32 2 call void @llvm.AMDGPU.store.output(float %23, i32 6) %24 = extractelement <4 x float> %9, i32 3 call void @llvm.AMDGPU.store.output(float %24, i32 7) ret void } *** IR Dump Before Preliminary module verification *** define void @main() { call void @llvm.AMDGPU.reserve.reg(i32 0) call void @llvm.AMDGPU.reserve.reg(i32 1) call void @llvm.AMDGPU.reserve.reg(i32 2) call void @llvm.AMDGPU.reserve.reg(i32 3) %1 = call float @llvm.R600.load.input(i32 4) %2 = insertelement <4 x float> undef, float %1, i32 0 %3 = call float @llvm.R600.load.input(i32 5) %4 = insertelement <4 x float> %2, float %3, i32 1 %5 = call float @llvm.R600.load.input(i32 6) %6 = insertelement <4 x float> %4, float %5, i32 2 %7 = call float @llvm.R600.load.input(i32 7) %8 = insertelement <4 x float> %6, float %7, i32 3 %9 = shufflevector <4 x float> undef, <4 x float> %8, <4 x i32> <i32 4, i32 5, i32 6, i32 7> %10 = shufflevector <4 x float> %8, <4 x float> %8, <2 x i32> <i32 0, i32 1> %11 = shufflevector <2 x float> %10, <2 x float> %10, <4 x i32> <i32 0, i32 1, i32 undef, i32 undef> %12 = shufflevector <4 x float> undef, <4 x float> %11, <4 x i32> <i32 4, i32 5, i32 2, i32 3> %13 = fsub <2 x float> zeroinitializer, %10 %14 = shufflevector <2 x float> %13, <2 x float> %13, <4 x i32> <i32 0, i32 1, i32 undef, i32 undef> %15 = shufflevector <4 x float> %12, <4 x float> %14, <4 x i32> <i32 0, i32 1, i32 4, i32 5> %16 = shufflevector <4 x float> undef, <4 x float> %15, <4 x i32> <i32 4, i32 5, i32 6, i32 7> %17 = extractelement <4 x float> %16, i32 0 call void @llvm.AMDGPU.store.output(float %17, i32 8) %18 = extractelement <4 x float> %16, i32 1 call void @llvm.AMDGPU.store.output(float %18, i32 9) %19 = extractelement <4 x float> %16, i32 2 call void @llvm.AMDGPU.store.output(float %19, i32 10) %20 = extractelement <4 x float> %16, i32 3 call void @llvm.AMDGPU.store.output(float %20, i32 11) %21 = extractelement <4 x float> %9, i32 0 call void @llvm.AMDGPU.store.output(float %21, i32 4) %22 = extractelement <4 x float> %9, i32 1 call void @llvm.AMDGPU.store.output(float %22, i32 5) %23 = extractelement <4 x float> %9, i32 2 call void @llvm.AMDGPU.store.output(float %23, i32 6) %24 = extractelement <4 x float> %9, i32 3 call void @llvm.AMDGPU.store.output(float %24, i32 7) ret void } *** IR Dump Before Module Verifier *** define void @main() { call void @llvm.AMDGPU.reserve.reg(i32 0) call void @llvm.AMDGPU.reserve.reg(i32 1) call void @llvm.AMDGPU.reserve.reg(i32 2) call void @llvm.AMDGPU.reserve.reg(i32 3) %1 = call float @llvm.R600.load.input(i32 4) %2 = insertelement <4 x float> undef, float %1, i32 0 %3 = call float @llvm.R600.load.input(i32 5) %4 = insertelement <4 x float> %2, float %3, i32 1 %5 = call float @llvm.R600.load.input(i32 6) %6 = insertelement <4 x float> %4, float %5, i32 2 %7 = call float @llvm.R600.load.input(i32 7) %8 = insertelement <4 x float> %6, float %7, i32 3 %9 = shufflevector <4 x float> undef, <4 x float> %8, <4 x i32> <i32 4, i32 5, i32 6, i32 7> %10 = shufflevector <4 x float> %8, <4 x float> %8, <2 x i32> <i32 0, i32 1> %11 = shufflevector <2 x float> %10, <2 x float> %10, <4 x i32> <i32 0, i32 1, i32 undef, i32 undef> %12 = shufflevector <4 x float> undef, <4 x float> %11, <4 x i32> <i32 4, i32 5, i32 2, i32 3> %13 = fsub <2 x float> zeroinitializer, %10 %14 = shufflevector <2 x float> %13, <2 x float> %13, <4 x i32> <i32 0, i32 1, i32 undef, i32 undef> %15 = shufflevector <4 x float> %12, <4 x float> %14, <4 x i32> <i32 0, i32 1, i32 4, i32 5> %16 = shufflevector <4 x float> undef, <4 x float> %15, <4 x i32> <i32 4, i32 5, i32 6, i32 7> %17 = extractelement <4 x float> %16, i32 0 call void @llvm.AMDGPU.store.output(float %17, i32 8) %18 = extractelement <4 x float> %16, i32 1 call void @llvm.AMDGPU.store.output(float %18, i32 9) %19 = extractelement <4 x float> %16, i32 2 call void @llvm.AMDGPU.store.output(float %19, i32 10) %20 = extractelement <4 x float> %16, i32 3 call void @llvm.AMDGPU.store.output(float %20, i32 11) %21 = extractelement <4 x float> %9, i32 0 call void @llvm.AMDGPU.store.output(float %21, i32 4) %22 = extractelement <4 x float> %9, i32 1 call void @llvm.AMDGPU.store.output(float %22, i32 5) %23 = extractelement <4 x float> %9, i32 2 call void @llvm.AMDGPU.store.output(float %23, i32 6) %24 = extractelement <4 x float> %9, i32 3 call void @llvm.AMDGPU.store.output(float %24, i32 7) ret void } # *** IR Dump Before Expand ISel Pseudo-instructions ***: # Machine code for function main: SSA Function Live Ins: %T1_W in %vreg0, %T1_Z in %vreg1, %T1_Y in %vreg2, %T1_X in %vreg3 Function Live Outs: %T1_W %T1_Z %T1_Y %T1_X %T2_W %T2_Z %T2_Y %T2_X BB#0: derived from LLVM BB %0 Live Ins: %T1_W %T1_Z %T1_Y %T1_X %vreg3<def> = COPY %T1_X; R600_TReg32:%vreg3 %vreg2<def> = COPY %T1_Y; R600_TReg32:%vreg2 %vreg1<def> = COPY %T1_Z; R600_TReg32:%vreg1 %vreg0<def> = COPY %T1_W; R600_TReg32:%vreg0 RESERVE_REG 0 %vreg4<def> = FNEG_R600 %vreg3; R600_Reg32:%vreg4 R600_TReg32:%vreg3 %vreg5<def> = MOV_IMM_F32 0.000000e+00; R600_Reg32:%vreg5 %vreg6<def> = ADD 0, 0, 1, 0, 0, 0, %vreg4<kill>, 0, 0, 0, %vreg5, 0, 0, 0, 1, pred:%PRED_SEL_OFF, 0; R600_Reg32:%vreg6,%vreg4,%vreg5 %vreg7<def> = FNEG_R600 %vreg2; R600_Reg32:%vreg7 R600_TReg32:%vreg2 %vreg8<def> = ADD 0, 0, 1, 0, 0, 0, %vreg7<kill>, 0, 0, 0, %vreg5, 0, 0, 0, 1, pred:%PRED_SEL_OFF, 0; R600_Reg32:%vreg8,%vreg7,%vreg5 %vreg10<def> = IMPLICIT_DEF; R600_Reg128:%vreg10 %vreg9<def,tied1> = INSERT_SUBREG %vreg10<tied0>, %vreg6<kill>, sel_x; R600_Reg128:%vreg9,%vreg10 R600_Reg32:%vreg6 RESERVE_REG 1 RESERVE_REG 2 %vreg11<def,tied1> = INSERT_SUBREG %vreg9<tied0>, %vreg8<kill>, sel_y; R600_Reg128:%vreg11,%vreg9 R600_Reg32:%vreg8 %vreg13<def> = IMPLICIT_DEF; R600_Reg32:%vreg13 %vreg12<def,tied1> = INSERT_SUBREG %vreg11<tied0>, %vreg13, sel_z; R600_Reg128:%vreg12,%vreg11 R600_Reg32:%vreg13 RESERVE_REG 3 %vreg15<def> = IMPLICIT_DEF; R600_Reg32:%vreg15 %vreg14<def,tied1> = INSERT_SUBREG %vreg12<tied0>, %vreg15, sel_w; R600_Reg128:%vreg14,%vreg12 R600_Reg32:%vreg15 %T2_X<def> = COPY %vreg3; R600_TReg32:%vreg3 %vreg16<def> = COPY %vreg14:sel_x; R600_Reg32:%vreg16 R600_Reg128:%vreg14 %T2_Y<def> = COPY %vreg2; R600_TReg32:%vreg2 %vreg17<def> = COPY %vreg14:sel_y; R600_Reg32:%vreg17 R600_Reg128:%vreg14 %T2_Z<def> = COPY %vreg16; R600_Reg32:%vreg16 %T2_W<def> = COPY %vreg17; R600_Reg32:%vreg17 %T1_X<def> = COPY %vreg3; R600_TReg32:%vreg3 %T1_Y<def> = COPY %vreg2; R600_TReg32:%vreg2 %T1_Z<def> = COPY %vreg1; R600_TReg32:%vreg1 %T1_W<def> = COPY %vreg0; R600_TReg32:%vreg0 RETURN # End machine code for function main. # *** IR Dump Before Tail Duplication ***: # Machine code for function main: SSA Function Live Ins: %T1_W in %vreg0, %T1_Z in %vreg1, %T1_Y in %vreg2, %T1_X in %vreg3 Function Live Outs: %T1_W %T1_Z %T1_Y %T1_X %T2_W %T2_Z %T2_Y %T2_X BB#0: derived from LLVM BB %0 Live Ins: %T1_W %T1_Z %T1_Y %T1_X %vreg3<def> = COPY %T1_X; R600_TReg32:%vreg3 %vreg2<def> = COPY %T1_Y; R600_TReg32:%vreg2 %vreg1<def> = COPY %T1_Z; R600_TReg32:%vreg1 %vreg0<def> = COPY %T1_W; R600_TReg32:%vreg0 %vreg4<def> = MOV 1, 0, 0, 0, %vreg3, 1, 0, 0, 1, pred:%PRED_SEL_OFF, 0; R600_Reg32:%vreg4 R600_TReg32:%vreg3 %vreg5<def> = MOV 1, 0, 0, 0, %ALU_LITERAL_X, 0, 0, 0, 1, pred:%PRED_SEL_OFF, 0; R600_Reg32:%vreg5 %vreg6<def> = ADD 0, 0, 1, 0, 0, 0, %vreg4<kill>, 0, 0, 0, %vreg5, 0, 0, 0, 1, pred:%PRED_SEL_OFF, 0; R600_Reg32:%vreg6,%vreg4,%vreg5 %vreg7<def> = MOV 1, 0, 0, 0, %vreg2, 1, 0, 0, 1, pred:%PRED_SEL_OFF, 0; R600_Reg32:%vreg7 R600_TReg32:%vreg2 %vreg8<def> = ADD 0, 0, 1, 0, 0, 0, %vreg7<kill>, 0, 0, 0, %vreg5, 0, 0, 0, 1, pred:%PRED_SEL_OFF, 0; R600_Reg32:%vreg8,%vreg7,%vreg5 %vreg10<def> = IMPLICIT_DEF; R600_Reg128:%vreg10 %vreg9<def,tied1> = INSERT_SUBREG %vreg10<tied0>, %vreg6<kill>, sel_x; R600_Reg128:%vreg9,%vreg10 R600_Reg32:%vreg6 %vreg11<def,tied1> = INSERT_SUBREG %vreg9<tied0>, %vreg8<kill>, sel_y; R600_Reg128:%vreg11,%vreg9 R600_Reg32:%vreg8 %vreg13<def> = IMPLICIT_DEF; R600_Reg32:%vreg13 %vreg12<def,tied1> = INSERT_SUBREG %vreg11<tied0>, %vreg13, sel_z; R600_Reg128:%vreg12,%vreg11 R600_Reg32:%vreg13 %vreg15<def> = IMPLICIT_DEF; R600_Reg32:%vreg15 %vreg14<def,tied1> = INSERT_SUBREG %vreg12<tied0>, %vreg15, sel_w; R600_Reg128:%vreg14,%vreg12 R600_Reg32:%vreg15 %T2_X<def> = COPY %vreg3; R600_TReg32:%vreg3 %vreg16<def> = COPY %vreg14:sel_x; R600_Reg32:%vreg16 R600_Reg128:%vreg14 %T2_Y<def> = COPY %vreg2; R600_TReg32:%vreg2 %vreg17<def> = COPY %vreg14:sel_y; R600_Reg32:%vreg17 R600_Reg128:%vreg14 %T2_Z<def> = COPY %vreg16; R600_Reg32:%vreg16 %T2_W<def> = COPY %vreg17; R600_Reg32:%vreg17 %T1_X<def> = COPY %vreg3; R600_TReg32:%vreg3 %T1_Y<def> = COPY %vreg2; R600_TReg32:%vreg2 %T1_Z<def> = COPY %vreg1; R600_TReg32:%vreg1 %T1_W<def> = COPY %vreg0; R600_TReg32:%vreg0 RETURN # End machine code for function main. # *** IR Dump Before Optimize machine instruction PHIs ***: # Machine code for function main: SSA Function Live Ins: %T1_W in %vreg0, %T1_Z in %vreg1, %T1_Y in %vreg2, %T1_X in %vreg3 Function Live Outs: %T1_W %T1_Z %T1_Y %T1_X %T2_W %T2_Z %T2_Y %T2_X BB#0: derived from LLVM BB %0 Live Ins: %T1_W %T1_Z %T1_Y %T1_X %vreg3<def> = COPY %T1_X; R600_TReg32:%vreg3 %vreg2<def> = COPY %T1_Y; R600_TReg32:%vreg2 %vreg1<def> = COPY %T1_Z; R600_TReg32:%vreg1 %vreg0<def> = COPY %T1_W; R600_TReg32:%vreg0 %vreg4<def> = MOV 1, 0, 0, 0, %vreg3, 1, 0, 0, 1, pred:%PRED_SEL_OFF, 0; R600_Reg32:%vreg4 R600_TReg32:%vreg3 %vreg5<def> = MOV 1, 0, 0, 0, %ALU_LITERAL_X, 0, 0, 0, 1, pred:%PRED_SEL_OFF, 0; R600_Reg32:%vreg5 %vreg6<def> = ADD 0, 0, 1, 0, 0, 0, %vreg4<kill>, 0, 0, 0, %vreg5, 0, 0, 0, 1, pred:%PRED_SEL_OFF, 0; R600_Reg32:%vreg6,%vreg4,%vreg5 %vreg7<def> = MOV 1, 0, 0, 0, %vreg2, 1, 0, 0, 1, pred:%PRED_SEL_OFF, 0; R600_Reg32:%vreg7 R600_TReg32:%vreg2 %vreg8<def> = ADD 0, 0, 1, 0, 0, 0, %vreg7<kill>, 0, 0, 0, %vreg5, 0, 0, 0, 1, pred:%PRED_SEL_OFF, 0; R600_Reg32:%vreg8,%vreg7,%vreg5 %vreg10<def> = IMPLICIT_DEF; R600_Reg128:%vreg10 %vreg9<def,tied1> = INSERT_SUBREG %vreg10<tied0>, %vreg6<kill>, sel_x; R600_Reg128:%vreg9,%vreg10 R600_Reg32:%vreg6 %vreg11<def,tied1> = INSERT_SUBREG %vreg9<tied0>, %vreg8<kill>, sel_y; R600_Reg128:%vreg11,%vreg9 R600_Reg32:%vreg8 %vreg13<def> = IMPLICIT_DEF; R600_Reg32:%vreg13 %vreg12<def,tied1> = INSERT_SUBREG %vreg11<tied0>, %vreg13, sel_z; R600_Reg128:%vreg12,%vreg11 R600_Reg32:%vreg13 %vreg15<def> = IMPLICIT_DEF; R600_Reg32:%vreg15 %vreg14<def,tied1> = INSERT_SUBREG %vreg12<tied0>, %vreg15, sel_w; R600_Reg128:%vreg14,%vreg12 R600_Reg32:%vreg15 %T2_X<def> = COPY %vreg3; R600_TReg32:%vreg3 %vreg16<def> = COPY %vreg14:sel_x; R600_Reg32:%vreg16 R600_Reg128:%vreg14 %T2_Y<def> = COPY %vreg2; R600_TReg32:%vreg2 %vreg17<def> = COPY %vreg14:sel_y; R600_Reg32:%vreg17 R600_Reg128:%vreg14 %T2_Z<def> = COPY %vreg16; R600_Reg32:%vreg16 %T2_W<def> = COPY %vreg17; R600_Reg32:%vreg17 %T1_X<def> = COPY %vreg3; R600_TReg32:%vreg3 %T1_Y<def> = COPY %vreg2; R600_TReg32:%vreg2 %T1_Z<def> = COPY %vreg1; R600_TReg32:%vreg1 %T1_W<def> = COPY %vreg0; R600_TReg32:%vreg0 RETURN # End machine code for function main. # *** IR Dump Before Slot index numbering ***: # Machine code for function main: SSA Function Live Ins: %T1_W in %vreg0, %T1_Z in %vreg1, %T1_Y in %vreg2, %T1_X in %vreg3 Function Live Outs: %T1_W %T1_Z %T1_Y %T1_X %T2_W %T2_Z %T2_Y %T2_X BB#0: derived from LLVM BB %0 Live Ins: %T1_W %T1_Z %T1_Y %T1_X %vreg3<def> = COPY %T1_X; R600_TReg32:%vreg3 %vreg2<def> = COPY %T1_Y; R600_TReg32:%vreg2 %vreg1<def> = COPY %T1_Z; R600_TReg32:%vreg1 %vreg0<def> = COPY %T1_W; R600_TReg32:%vreg0 %vreg4<def> = MOV 1, 0, 0, 0, %vreg3, 1, 0, 0, 1, pred:%PRED_SEL_OFF, 0; R600_Reg32:%vreg4 R600_TReg32:%vreg3 %vreg5<def> = MOV 1, 0, 0, 0, %ALU_LITERAL_X, 0, 0, 0, 1, pred:%PRED_SEL_OFF, 0; R600_Reg32:%vreg5 %vreg6<def> = ADD 0, 0, 1, 0, 0, 0, %vreg4<kill>, 0, 0, 0, %vreg5, 0, 0, 0, 1, pred:%PRED_SEL_OFF, 0; R600_Reg32:%vreg6,%vreg4,%vreg5 %vreg7<def> = MOV 1, 0, 0, 0, %vreg2, 1, 0, 0, 1, pred:%PRED_SEL_OFF, 0; R600_Reg32:%vreg7 R600_TReg32:%vreg2 %vreg8<def> = ADD 0, 0, 1, 0, 0, 0, %vreg7<kill>, 0, 0, 0, %vreg5, 0, 0, 0, 1, pred:%PRED_SEL_OFF, 0; R600_Reg32:%vreg8,%vreg7,%vreg5 %vreg10<def> = IMPLICIT_DEF; R600_Reg128:%vreg10 %vreg9<def,tied1> = INSERT_SUBREG %vreg10<tied0>, %vreg6<kill>, sel_x; R600_Reg128:%vreg9,%vreg10 R600_Reg32:%vreg6 %vreg11<def,tied1> = INSERT_SUBREG %vreg9<tied0>, %vreg8<kill>, sel_y; R600_Reg128:%vreg11,%vreg9 R600_Reg32:%vreg8 %vreg13<def> = IMPLICIT_DEF; R600_Reg32:%vreg13 %vreg12<def,tied1> = INSERT_SUBREG %vreg11<tied0>, %vreg13, sel_z; R600_Reg128:%vreg12,%vreg11 R600_Reg32:%vreg13 %vreg15<def> = IMPLICIT_DEF; R600_Reg32:%vreg15 %vreg14<def,tied1> = INSERT_SUBREG %vreg12<tied0>, %vreg15, sel_w; R600_Reg128:%vreg14,%vreg12 R600_Reg32:%vreg15 %T2_X<def> = COPY %vreg3; R600_TReg32:%vreg3 %vreg16<def> = COPY %vreg14:sel_x; R600_Reg32:%vreg16 R600_Reg128:%vreg14 %T2_Y<def> = COPY %vreg2; R600_TReg32:%vreg2 %vreg17<def> = COPY %vreg14:sel_y; R600_Reg32:%vreg17 R600_Reg128:%vreg14 %T2_Z<def> = COPY %vreg16; R600_Reg32:%vreg16 %T2_W<def> = COPY %vreg17; R600_Reg32:%vreg17 %T1_X<def> = COPY %vreg3; R600_TReg32:%vreg3 %T1_Y<def> = COPY %vreg2; R600_TReg32:%vreg2 %T1_Z<def> = COPY %vreg1; R600_TReg32:%vreg1 %T1_W<def> = COPY %vreg0; R600_TReg32:%vreg0 RETURN # End machine code for function main. # *** IR Dump Before Merge disjoint stack slots ***: # Machine code for function main: SSA Function Live Ins: %T1_W in %vreg0, %T1_Z in %vreg1, %T1_Y in %vreg2, %T1_X in %vreg3 Function Live Outs: %T1_W %T1_Z %T1_Y %T1_X %T2_W %T2_Z %T2_Y %T2_X 0BBB#0: derived from LLVM BB %0 Live Ins: %T1_W %T1_Z %T1_Y %T1_X 16B%vreg3<def> = COPY %T1_X; R600_TReg32:%vreg3 32B%vreg2<def> = COPY %T1_Y; R600_TReg32:%vreg2 48B%vreg1<def> = COPY %T1_Z; R600_TReg32:%vreg1 64B%vreg0<def> = COPY %T1_W; R600_TReg32:%vreg0 80B%vreg4<def> = MOV 1, 0, 0, 0, %vreg3, 1, 0, 0, 1, pred:%PRED_SEL_OFF, 0; R600_Reg32:%vreg4 R600_TReg32:%vreg3 96B%vreg5<def> = MOV 1, 0, 0, 0, %ALU_LITERAL_X, 0, 0, 0, 1, pred:%PRED_SEL_OFF, 0; R600_Reg32:%vreg5 112B%vreg6<def> = ADD 0, 0, 1, 0, 0, 0, %vreg4<kill>, 0, 0, 0, %vreg5, 0, 0, 0, 1, pred:%PRED_SEL_OFF, 0; R600_Reg32:%vreg6,%vreg4,%vreg5 128B%vreg7<def> = MOV 1, 0, 0, 0, %vreg2, 1, 0, 0, 1, pred:%PRED_SEL_OFF, 0; R600_Reg32:%vreg7 R600_TReg32:%vreg2 144B%vreg8<def> = ADD 0, 0, 1, 0, 0, 0, %vreg7<kill>, 0, 0, 0, %vreg5, 0, 0, 0, 1, pred:%PRED_SEL_OFF, 0; R600_Reg32:%vreg8,%vreg7,%vreg5 160B%vreg10<def> = IMPLICIT_DEF; R600_Reg128:%vreg10 176B%vreg9<def,tied1> = INSERT_SUBREG %vreg10<tied0>, %vreg6<kill>, sel_x; R600_Reg128:%vreg9,%vreg10 R600_Reg32:%vreg6 192B%vreg11<def,tied1> = INSERT_SUBREG %vreg9<tied0>, %vreg8<kill>, sel_y; R600_Reg128:%vreg11,%vreg9 R600_Reg32:%vreg8 208B%vreg13<def> = IMPLICIT_DEF; R600_Reg32:%vreg13 224B%vreg12<def,tied1> = INSERT_SUBREG %vreg11<tied0>, %vreg13, sel_z; R600_Reg128:%vreg12,%vreg11 R600_Reg32:%vreg13 240B%vreg15<def> = IMPLICIT_DEF; R600_Reg32:%vreg15 256B%vreg14<def,tied1> = INSERT_SUBREG %vreg12<tied0>, %vreg15, sel_w; R600_Reg128:%vreg14,%vreg12 R600_Reg32:%vreg15 272B%T2_X<def> = COPY %vreg3; R600_TReg32:%vreg3 288B%vreg16<def> = COPY %vreg14:sel_x; R600_Reg32:%vreg16 R600_Reg128:%vreg14 304B%T2_Y<def> = COPY %vreg2; R600_TReg32:%vreg2 320B%vreg17<def> = COPY %vreg14:sel_y; R600_Reg32:%vreg17 R600_Reg128:%vreg14 336B%T2_Z<def> = COPY %vreg16; R600_Reg32:%vreg16 352B%T2_W<def> = COPY %vreg17; R600_Reg32:%vreg17 368B%T1_X<def> = COPY %vreg3; R600_TReg32:%vreg3 384B%T1_Y<def> = COPY %vreg2; R600_TReg32:%vreg2 400B%T1_Z<def> = COPY %vreg1; R600_TReg32:%vreg1 416B%T1_W<def> = COPY %vreg0; R600_TReg32:%vreg0 432BRETURN # End machine code for function main. # *** IR Dump Before Local Stack Slot Allocation ***: # Machine code for function main: SSA Function Live Ins: %T1_W in %vreg0, %T1_Z in %vreg1, %T1_Y in %vreg2, %T1_X in %vreg3 Function Live Outs: %T1_W %T1_Z %T1_Y %T1_X %T2_W %T2_Z %T2_Y %T2_X BB#0: derived from LLVM BB %0 Live Ins: %T1_W %T1_Z %T1_Y %T1_X %vreg3<def> = COPY %T1_X; R600_TReg32:%vreg3 %vreg2<def> = COPY %T1_Y; R600_TReg32:%vreg2 %vreg1<def> = COPY %T1_Z; R600_TReg32:%vreg1 %vreg0<def> = COPY %T1_W; R600_TReg32:%vreg0 %vreg4<def> = MOV 1, 0, 0, 0, %vreg3, 1, 0, 0, 1, pred:%PRED_SEL_OFF, 0; R600_Reg32:%vreg4 R600_TReg32:%vreg3 %vreg5<def> = MOV 1, 0, 0, 0, %ALU_LITERAL_X, 0, 0, 0, 1, pred:%PRED_SEL_OFF, 0; R600_Reg32:%vreg5 %vreg6<def> = ADD 0, 0, 1, 0, 0, 0, %vreg4<kill>, 0, 0, 0, %vreg5, 0, 0, 0, 1, pred:%PRED_SEL_OFF, 0; R600_Reg32:%vreg6,%vreg4,%vreg5 %vreg7<def> = MOV 1, 0, 0, 0, %vreg2, 1, 0, 0, 1, pred:%PRED_SEL_OFF, 0; R600_Reg32:%vreg7 R600_TReg32:%vreg2 %vreg8<def> = ADD 0, 0, 1, 0, 0, 0, %vreg7<kill>, 0, 0, 0, %vreg5, 0, 0, 0, 1, pred:%PRED_SEL_OFF, 0; R600_Reg32:%vreg8,%vreg7,%vreg5 %vreg10<def> = IMPLICIT_DEF; R600_Reg128:%vreg10 %vreg9<def,tied1> = INSERT_SUBREG %vreg10<tied0>, %vreg6<kill>, sel_x; R600_Reg128:%vreg9,%vreg10 R600_Reg32:%vreg6 %vreg11<def,tied1> = INSERT_SUBREG %vreg9<tied0>, %vreg8<kill>, sel_y; R600_Reg128:%vreg11,%vreg9 R600_Reg32:%vreg8 %vreg13<def> = IMPLICIT_DEF; R600_Reg32:%vreg13 %vreg12<def,tied1> = INSERT_SUBREG %vreg11<tied0>, %vreg13, sel_z; R600_Reg128:%vreg12,%vreg11 R600_Reg32:%vreg13 %vreg15<def> = IMPLICIT_DEF; R600_Reg32:%vreg15 %vreg14<def,tied1> = INSERT_SUBREG %vreg12<tied0>, %vreg15, sel_w; R600_Reg128:%vreg14,%vreg12 R600_Reg32:%vreg15 %T2_X<def> = COPY %vreg3; R600_TReg32:%vreg3 %vreg16<def> = COPY %vreg14:sel_x; R600_Reg32:%vreg16 R600_Reg128:%vreg14 %T2_Y<def> = COPY %vreg2; R600_TReg32:%vreg2 %vreg17<def> = COPY %vreg14:sel_y; R600_Reg32:%vreg17 R600_Reg128:%vreg14 %T2_Z<def> = COPY %vreg16; R600_Reg32:%vreg16 %T2_W<def> = COPY %vreg17; R600_Reg32:%vreg17 %T1_X<def> = COPY %vreg3; R600_TReg32:%vreg3 %T1_Y<def> = COPY %vreg2; R600_TReg32:%vreg2 %T1_Z<def> = COPY %vreg1; R600_TReg32:%vreg1 %T1_W<def> = COPY %vreg0; R600_TReg32:%vreg0 RETURN # End machine code for function main. # *** IR Dump Before Remove dead machine instructions ***: # Machine code for function main: SSA Function Live Ins: %T1_W in %vreg0, %T1_Z in %vreg1, %T1_Y in %vreg2, %T1_X in %vreg3 Function Live Outs: %T1_W %T1_Z %T1_Y %T1_X %T2_W %T2_Z %T2_Y %T2_X BB#0: derived from LLVM BB %0 Live Ins: %T1_W %T1_Z %T1_Y %T1_X %vreg3<def> = COPY %T1_X; R600_TReg32:%vreg3 %vreg2<def> = COPY %T1_Y; R600_TReg32:%vreg2 %vreg1<def> = COPY %T1_Z; R600_TReg32:%vreg1 %vreg0<def> = COPY %T1_W; R600_TReg32:%vreg0 %vreg4<def> = MOV 1, 0, 0, 0, %vreg3, 1, 0, 0, 1, pred:%PRED_SEL_OFF, 0; R600_Reg32:%vreg4 R600_TReg32:%vreg3 %vreg5<def> = MOV 1, 0, 0, 0, %ALU_LITERAL_X, 0, 0, 0, 1, pred:%PRED_SEL_OFF, 0; R600_Reg32:%vreg5 %vreg6<def> = ADD 0, 0, 1, 0, 0, 0, %vreg4<kill>, 0, 0, 0, %vreg5, 0, 0, 0, 1, pred:%PRED_SEL_OFF, 0; R600_Reg32:%vreg6,%vreg4,%vreg5 %vreg7<def> = MOV 1, 0, 0, 0, %vreg2, 1, 0, 0, 1, pred:%PRED_SEL_OFF, 0; R600_Reg32:%vreg7 R600_TReg32:%vreg2 %vreg8<def> = ADD 0, 0, 1, 0, 0, 0, %vreg7<kill>, 0, 0, 0, %vreg5, 0, 0, 0, 1, pred:%PRED_SEL_OFF, 0; R600_Reg32:%vreg8,%vreg7,%vreg5 %vreg10<def> = IMPLICIT_DEF; R600_Reg128:%vreg10 %vreg9<def,tied1> = INSERT_SUBREG %vreg10<tied0>, %vreg6<kill>, sel_x; R600_Reg128:%vreg9,%vreg10 R600_Reg32:%vreg6 %vreg11<def,tied1> = INSERT_SUBREG %vreg9<tied0>, %vreg8<kill>, sel_y; R600_Reg128:%vreg11,%vreg9 R600_Reg32:%vreg8 %vreg13<def> = IMPLICIT_DEF; R600_Reg32:%vreg13 %vreg12<def,tied1> = INSERT_SUBREG %vreg11<tied0>, %vreg13, sel_z; R600_Reg128:%vreg12,%vreg11 R600_Reg32:%vreg13 %vreg15<def> = IMPLICIT_DEF; R600_Reg32:%vreg15 %vreg14<def,tied1> = INSERT_SUBREG %vreg12<tied0>, %vreg15, sel_w; R600_Reg128:%vreg14,%vreg12 R600_Reg32:%vreg15 %T2_X<def> = COPY %vreg3; R600_TReg32:%vreg3 %vreg16<def> = COPY %vreg14:sel_x; R600_Reg32:%vreg16 R600_Reg128:%vreg14 %T2_Y<def> = COPY %vreg2; R600_TReg32:%vreg2 %vreg17<def> = COPY %vreg14:sel_y; R600_Reg32:%vreg17 R600_Reg128:%vreg14 %T2_Z<def> = COPY %vreg16; R600_Reg32:%vreg16 %T2_W<def> = COPY %vreg17; R600_Reg32:%vreg17 %T1_X<def> = COPY %vreg3; R600_TReg32:%vreg3 %T1_Y<def> = COPY %vreg2; R600_TReg32:%vreg2 %T1_Z<def> = COPY %vreg1; R600_TReg32:%vreg1 %T1_W<def> = COPY %vreg0; R600_TReg32:%vreg0 RETURN # End machine code for function main. # *** IR Dump Before Machine Loop Invariant Code Motion ***: # Machine code for function main: SSA Function Live Ins: %T1_W in %vreg0, %T1_Z in %vreg1, %T1_Y in %vreg2, %T1_X in %vreg3 Function Live Outs: %T1_W %T1_Z %T1_Y %T1_X %T2_W %T2_Z %T2_Y %T2_X BB#0: derived from LLVM BB %0 Live Ins: %T1_W %T1_Z %T1_Y %T1_X %vreg3<def> = COPY %T1_X; R600_TReg32:%vreg3 %vreg2<def> = COPY %T1_Y; R600_TReg32:%vreg2 %vreg1<def> = COPY %T1_Z; R600_TReg32:%vreg1 %vreg0<def> = COPY %T1_W; R600_TReg32:%vreg0 %vreg4<def> = MOV 1, 0, 0, 0, %vreg3, 1, 0, 0, 1, pred:%PRED_SEL_OFF, 0; R600_Reg32:%vreg4 R600_TReg32:%vreg3 %vreg5<def> = MOV 1, 0, 0, 0, %ALU_LITERAL_X, 0, 0, 0, 1, pred:%PRED_SEL_OFF, 0; R600_Reg32:%vreg5 %vreg6<def> = ADD 0, 0, 1, 0, 0, 0, %vreg4<kill>, 0, 0, 0, %vreg5, 0, 0, 0, 1, pred:%PRED_SEL_OFF, 0; R600_Reg32:%vreg6,%vreg4,%vreg5 %vreg7<def> = MOV 1, 0, 0, 0, %vreg2, 1, 0, 0, 1, pred:%PRED_SEL_OFF, 0; R600_Reg32:%vreg7 R600_TReg32:%vreg2 %vreg8<def> = ADD 0, 0, 1, 0, 0, 0, %vreg7<kill>, 0, 0, 0, %vreg5, 0, 0, 0, 1, pred:%PRED_SEL_OFF, 0; R600_Reg32:%vreg8,%vreg7,%vreg5 %vreg10<def> = IMPLICIT_DEF; R600_Reg128:%vreg10 %vreg9<def,tied1> = INSERT_SUBREG %vreg10<tied0>, %vreg6<kill>, sel_x; R600_Reg128:%vreg9,%vreg10 R600_Reg32:%vreg6 %vreg11<def,tied1> = INSERT_SUBREG %vreg9<tied0>, %vreg8<kill>, sel_y; R600_Reg128:%vreg11,%vreg9 R600_Reg32:%vreg8 %vreg13<def> = IMPLICIT_DEF; R600_Reg32:%vreg13 %vreg12<def,tied1> = INSERT_SUBREG %vreg11<tied0>, %vreg13, sel_z; R600_Reg128:%vreg12,%vreg11 R600_Reg32:%vreg13 %vreg15<def> = IMPLICIT_DEF; R600_Reg32:%vreg15 %vreg14<def,tied1> = INSERT_SUBREG %vreg12<tied0>, %vreg15, sel_w; R600_Reg128:%vreg14,%vreg12 R600_Reg32:%vreg15 %T2_X<def> = COPY %vreg3; R600_TReg32:%vreg3 %vreg16<def> = COPY %vreg14:sel_x; R600_Reg32:%vreg16 R600_Reg128:%vreg14 %T2_Y<def> = COPY %vreg2; R600_TReg32:%vreg2 %vreg17<def> = COPY %vreg14:sel_y; R600_Reg32:%vreg17 R600_Reg128:%vreg14 %T2_Z<def> = COPY %vreg16; R600_Reg32:%vreg16 %T2_W<def> = COPY %vreg17; R600_Reg32:%vreg17 %T1_X<def> = COPY %vreg3; R600_TReg32:%vreg3 %T1_Y<def> = COPY %vreg2; R600_TReg32:%vreg2 %T1_Z<def> = COPY %vreg1; R600_TReg32:%vreg1 %T1_W<def> = COPY %vreg0; R600_TReg32:%vreg0 RETURN # End machine code for function main. # *** IR Dump Before Machine Common Subexpression Elimination ***: # Machine code for function main: SSA Function Live Ins: %T1_W in %vreg0, %T1_Z in %vreg1, %T1_Y in %vreg2, %T1_X in %vreg3 Function Live Outs: %T1_W %T1_Z %T1_Y %T1_X %T2_W %T2_Z %T2_Y %T2_X BB#0: derived from LLVM BB %0 Live Ins: %T1_W %T1_Z %T1_Y %T1_X %vreg3<def> = COPY %T1_X; R600_TReg32:%vreg3 %vreg2<def> = COPY %T1_Y; R600_TReg32:%vreg2 %vreg1<def> = COPY %T1_Z; R600_TReg32:%vreg1 %vreg0<def> = COPY %T1_W; R600_TReg32:%vreg0 %vreg4<def> = MOV 1, 0, 0, 0, %vreg3, 1, 0, 0, 1, pred:%PRED_SEL_OFF, 0; R600_Reg32:%vreg4 R600_TReg32:%vreg3 %vreg5<def> = MOV 1, 0, 0, 0, %ALU_LITERAL_X, 0, 0, 0, 1, pred:%PRED_SEL_OFF, 0; R600_Reg32:%vreg5 %vreg6<def> = ADD 0, 0, 1, 0, 0, 0, %vreg4<kill>, 0, 0, 0, %vreg5, 0, 0, 0, 1, pred:%PRED_SEL_OFF, 0; R600_Reg32:%vreg6,%vreg4,%vreg5 %vreg7<def> = MOV 1, 0, 0, 0, %vreg2, 1, 0, 0, 1, pred:%PRED_SEL_OFF, 0; R600_Reg32:%vreg7 R600_TReg32:%vreg2 %vreg8<def> = ADD 0, 0, 1, 0, 0, 0, %vreg7<kill>, 0, 0, 0, %vreg5, 0, 0, 0, 1, pred:%PRED_SEL_OFF, 0; R600_Reg32:%vreg8,%vreg7,%vreg5 %vreg10<def> = IMPLICIT_DEF; R600_Reg128:%vreg10 %vreg9<def,tied1> = INSERT_SUBREG %vreg10<tied0>, %vreg6<kill>, sel_x; R600_Reg128:%vreg9,%vreg10 R600_Reg32:%vreg6 %vreg11<def,tied1> = INSERT_SUBREG %vreg9<tied0>, %vreg8<kill>, sel_y; R600_Reg128:%vreg11,%vreg9 R600_Reg32:%vreg8 %vreg13<def> = IMPLICIT_DEF; R600_Reg32:%vreg13 %vreg12<def,tied1> = INSERT_SUBREG %vreg11<tied0>, %vreg13, sel_z; R600_Reg128:%vreg12,%vreg11 R600_Reg32:%vreg13 %vreg15<def> = IMPLICIT_DEF; R600_Reg32:%vreg15 %vreg14<def,tied1> = INSERT_SUBREG %vreg12<tied0>, %vreg15, sel_w; R600_Reg128:%vreg14,%vreg12 R600_Reg32:%vreg15 %T2_X<def> = COPY %vreg3; R600_TReg32:%vreg3 %vreg16<def> = COPY %vreg14:sel_x; R600_Reg32:%vreg16 R600_Reg128:%vreg14 %T2_Y<def> = COPY %vreg2; R600_TReg32:%vreg2 %vreg17<def> = COPY %vreg14:sel_y; R600_Reg32:%vreg17 R600_Reg128:%vreg14 %T2_Z<def> = COPY %vreg16; R600_Reg32:%vreg16 %T2_W<def> = COPY %vreg17; R600_Reg32:%vreg17 %T1_X<def> = COPY %vreg3; R600_TReg32:%vreg3 %T1_Y<def> = COPY %vreg2; R600_TReg32:%vreg2 %T1_Z<def> = COPY %vreg1; R600_TReg32:%vreg1 %T1_W<def> = COPY %vreg0; R600_TReg32:%vreg0 RETURN # End machine code for function main. # *** IR Dump Before Machine code sinking ***: # Machine code for function main: SSA Function Live Ins: %T1_W in %vreg0, %T1_Z in %vreg1, %T1_Y in %vreg2, %T1_X in %vreg3 Function Live Outs: %T1_W %T1_Z %T1_Y %T1_X %T2_W %T2_Z %T2_Y %T2_X BB#0: derived from LLVM BB %0 Live Ins: %T1_W %T1_Z %T1_Y %T1_X %vreg3<def> = COPY %T1_X; R600_TReg32:%vreg3 %vreg2<def> = COPY %T1_Y; R600_TReg32:%vreg2 %vreg1<def> = COPY %T1_Z; R600_TReg32:%vreg1 %vreg0<def> = COPY %T1_W; R600_TReg32:%vreg0 %vreg4<def> = MOV 1, 0, 0, 0, %vreg3, 1, 0, 0, 1, pred:%PRED_SEL_OFF, 0; R600_Reg32:%vreg4 R600_TReg32:%vreg3 %vreg5<def> = MOV 1, 0, 0, 0, %ALU_LITERAL_X, 0, 0, 0, 1, pred:%PRED_SEL_OFF, 0; R600_Reg32:%vreg5 %vreg6<def> = ADD 0, 0, 1, 0, 0, 0, %vreg4<kill>, 0, 0, 0, %vreg5, 0, 0, 0, 1, pred:%PRED_SEL_OFF, 0; R600_Reg32:%vreg6,%vreg4,%vreg5 %vreg7<def> = MOV 1, 0, 0, 0, %vreg2, 1, 0, 0, 1, pred:%PRED_SEL_OFF, 0; R600_Reg32:%vreg7 R600_TReg32:%vreg2 %vreg8<def> = ADD 0, 0, 1, 0, 0, 0, %vreg7<kill>, 0, 0, 0, %vreg5, 0, 0, 0, 1, pred:%PRED_SEL_OFF, 0; R600_Reg32:%vreg8,%vreg7,%vreg5 %vreg10<def> = IMPLICIT_DEF; R600_Reg128:%vreg10 %vreg9<def,tied1> = INSERT_SUBREG %vreg10<tied0>, %vreg6<kill>, sel_x; R600_Reg128:%vreg9,%vreg10 R600_Reg32:%vreg6 %vreg11<def,tied1> = INSERT_SUBREG %vreg9<tied0>, %vreg8<kill>, sel_y; R600_Reg128:%vreg11,%vreg9 R600_Reg32:%vreg8 %vreg13<def> = IMPLICIT_DEF; R600_Reg32:%vreg13 %vreg12<def,tied1> = INSERT_SUBREG %vreg11<tied0>, %vreg13, sel_z; R600_Reg128:%vreg12,%vreg11 R600_Reg32:%vreg13 %vreg15<def> = IMPLICIT_DEF; R600_Reg32:%vreg15 %vreg14<def,tied1> = INSERT_SUBREG %vreg12<tied0>, %vreg15, sel_w; R600_Reg128:%vreg14,%vreg12 R600_Reg32:%vreg15 %T2_X<def> = COPY %vreg3; R600_TReg32:%vreg3 %vreg16<def> = COPY %vreg14:sel_x; R600_Reg32:%vreg16 R600_Reg128:%vreg14 %T2_Y<def> = COPY %vreg2; R600_TReg32:%vreg2 %vreg17<def> = COPY %vreg14:sel_y; R600_Reg32:%vreg17 R600_Reg128:%vreg14 %T2_Z<def> = COPY %vreg16; R600_Reg32:%vreg16 %T2_W<def> = COPY %vreg17; R600_Reg32:%vreg17 %T1_X<def> = COPY %vreg3; R600_TReg32:%vreg3 %T1_Y<def> = COPY %vreg2; R600_TReg32:%vreg2 %T1_Z<def> = COPY %vreg1; R600_TReg32:%vreg1 %T1_W<def> = COPY %vreg0; R600_TReg32:%vreg0 RETURN # End machine code for function main. # *** IR Dump Before Peephole Optimizations ***: # Machine code for function main: SSA Function Live Ins: %T1_W in %vreg0, %T1_Z in %vreg1, %T1_Y in %vreg2, %T1_X in %vreg3 Function Live Outs: %T1_W %T1_Z %T1_Y %T1_X %T2_W %T2_Z %T2_Y %T2_X BB#0: derived from LLVM BB %0 Live Ins: %T1_W %T1_Z %T1_Y %T1_X %vreg3<def> = COPY %T1_X; R600_TReg32:%vreg3 %vreg2<def> = COPY %T1_Y; R600_TReg32:%vreg2 %vreg1<def> = COPY %T1_Z; R600_TReg32:%vreg1 %vreg0<def> = COPY %T1_W; R600_TReg32:%vreg0 %vreg4<def> = MOV 1, 0, 0, 0, %vreg3, 1, 0, 0, 1, pred:%PRED_SEL_OFF, 0; R600_Reg32:%vreg4 R600_TReg32:%vreg3 %vreg5<def> = MOV 1, 0, 0, 0, %ALU_LITERAL_X, 0, 0, 0, 1, pred:%PRED_SEL_OFF, 0; R600_Reg32:%vreg5 %vreg6<def> = ADD 0, 0, 1, 0, 0, 0, %vreg4<kill>, 0, 0, 0, %vreg5, 0, 0, 0, 1, pred:%PRED_SEL_OFF, 0; R600_Reg32:%vreg6,%vreg4,%vreg5 %vreg7<def> = MOV 1, 0, 0, 0, %vreg2, 1, 0, 0, 1, pred:%PRED_SEL_OFF, 0; R600_Reg32:%vreg7 R600_TReg32:%vreg2 %vreg8<def> = ADD 0, 0, 1, 0, 0, 0, %vreg7<kill>, 0, 0, 0, %vreg5, 0, 0, 0, 1, pred:%PRED_SEL_OFF, 0; R600_Reg32:%vreg8,%vreg7,%vreg5 %vreg10<def> = IMPLICIT_DEF; R600_Reg128:%vreg10 %vreg9<def,tied1> = INSERT_SUBREG %vreg10<tied0>, %vreg6<kill>, sel_x; R600_Reg128:%vreg9,%vreg10 R600_Reg32:%vreg6 %vreg11<def,tied1> = INSERT_SUBREG %vreg9<tied0>, %vreg8<kill>, sel_y; R600_Reg128:%vreg11,%vreg9 R600_Reg32:%vreg8 %vreg13<def> = IMPLICIT_DEF; R600_Reg32:%vreg13 %vreg12<def,tied1> = INSERT_SUBREG %vreg11<tied0>, %vreg13, sel_z; R600_Reg128:%vreg12,%vreg11 R600_Reg32:%vreg13 %vreg15<def> = IMPLICIT_DEF; R600_Reg32:%vreg15 %vreg14<def,tied1> = INSERT_SUBREG %vreg12<tied0>, %vreg15, sel_w; R600_Reg128:%vreg14,%vreg12 R600_Reg32:%vreg15 %T2_X<def> = COPY %vreg3; R600_TReg32:%vreg3 %vreg16<def> = COPY %vreg14:sel_x; R600_Reg32:%vreg16 R600_Reg128:%vreg14 %T2_Y<def> = COPY %vreg2; R600_TReg32:%vreg2 %vreg17<def> = COPY %vreg14:sel_y; R600_Reg32:%vreg17 R600_Reg128:%vreg14 %T2_Z<def> = COPY %vreg16; R600_Reg32:%vreg16 %T2_W<def> = COPY %vreg17; R600_Reg32:%vreg17 %T1_X<def> = COPY %vreg3; R600_TReg32:%vreg3 %T1_Y<def> = COPY %vreg2; R600_TReg32:%vreg2 %T1_Z<def> = COPY %vreg1; R600_TReg32:%vreg1 %T1_W<def> = COPY %vreg0; R600_TReg32:%vreg0 RETURN # End machine code for function main. # *** IR Dump Before Process Implicit Definitions ***: # Machine code for function main: SSA Function Live Ins: %T1_W in %vreg0, %T1_Z in %vreg1, %T1_Y in %vreg2, %T1_X in %vreg3 Function Live Outs: %T1_W %T1_Z %T1_Y %T1_X %T2_W %T2_Z %T2_Y %T2_X BB#0: derived from LLVM BB %0 Live Ins: %T1_W %T1_Z %T1_Y %T1_X %vreg3<def> = COPY %T1_X; R600_TReg32:%vreg3 %vreg2<def> = COPY %T1_Y; R600_TReg32:%vreg2 %vreg1<def> = COPY %T1_Z; R600_TReg32:%vreg1 %vreg0<def> = COPY %T1_W; R600_TReg32:%vreg0 %vreg4<def> = MOV 1, 0, 0, 0, %vreg3, 1, 0, 0, 1, pred:%PRED_SEL_OFF, 0; R600_Reg32:%vreg4 R600_TReg32:%vreg3 %vreg5<def> = MOV 1, 0, 0, 0, %ALU_LITERAL_X, 0, 0, 0, 1, pred:%PRED_SEL_OFF, 0; R600_Reg32:%vreg5 %vreg6<def> = ADD 0, 0, 1, 0, 0, 0, %vreg4<kill>, 0, 0, 0, %vreg5, 0, 0, 0, 1, pred:%PRED_SEL_OFF, 0; R600_Reg32:%vreg6,%vreg4,%vreg5 %vreg7<def> = MOV 1, 0, 0, 0, %vreg2, 1, 0, 0, 1, pred:%PRED_SEL_OFF, 0; R600_Reg32:%vreg7 R600_TReg32:%vreg2 %vreg8<def> = ADD 0, 0, 1, 0, 0, 0, %vreg7<kill>, 0, 0, 0, %vreg5, 0, 0, 0, 1, pred:%PRED_SEL_OFF, 0; R600_Reg32:%vreg8,%vreg7,%vreg5 %vreg10<def> = IMPLICIT_DEF; R600_Reg128:%vreg10 %vreg9<def,tied1> = INSERT_SUBREG %vreg10<tied0>, %vreg6<kill>, sel_x; R600_Reg128:%vreg9,%vreg10 R600_Reg32:%vreg6 %vreg11<def,tied1> = INSERT_SUBREG %vreg9<tied0>, %vreg8<kill>, sel_y; R600_Reg128:%vreg11,%vreg9 R600_Reg32:%vreg8 %vreg13<def> = IMPLICIT_DEF; R600_Reg32:%vreg13 %vreg12<def,tied1> = INSERT_SUBREG %vreg11<tied0>, %vreg13, sel_z; R600_Reg128:%vreg12,%vreg11 R600_Reg32:%vreg13 %vreg15<def> = IMPLICIT_DEF; R600_Reg32:%vreg15 %vreg14<def,tied1> = INSERT_SUBREG %vreg12<tied0>, %vreg15, sel_w; R600_Reg128:%vreg14,%vreg12 R600_Reg32:%vreg15 %T2_X<def> = COPY %vreg3; R600_TReg32:%vreg3 %vreg16<def> = COPY %vreg14:sel_x; R600_Reg32:%vreg16 R600_Reg128:%vreg14 %T2_Y<def> = COPY %vreg2; R600_TReg32:%vreg2 %vreg17<def> = COPY %vreg14:sel_y; R600_Reg32:%vreg17 R600_Reg128:%vreg14 %T2_Z<def> = COPY %vreg16; R600_Reg32:%vreg16 %T2_W<def> = COPY %vreg17; R600_Reg32:%vreg17 %T1_X<def> = COPY %vreg3; R600_TReg32:%vreg3 %T1_Y<def> = COPY %vreg2; R600_TReg32:%vreg2 %T1_Z<def> = COPY %vreg1; R600_TReg32:%vreg1 %T1_W<def> = COPY %vreg0; R600_TReg32:%vreg0 RETURN # End machine code for function main. # *** IR Dump Before Remove unreachable machine basic blocks ***: # Machine code for function main: SSA Function Live Ins: %T1_W in %vreg0, %T1_Z in %vreg1, %T1_Y in %vreg2, %T1_X in %vreg3 Function Live Outs: %T1_W %T1_Z %T1_Y %T1_X %T2_W %T2_Z %T2_Y %T2_X BB#0: derived from LLVM BB %0 Live Ins: %T1_W %T1_Z %T1_Y %T1_X %vreg3<def> = COPY %T1_X; R600_TReg32:%vreg3 %vreg2<def> = COPY %T1_Y; R600_TReg32:%vreg2 %vreg1<def> = COPY %T1_Z; R600_TReg32:%vreg1 %vreg0<def> = COPY %T1_W; R600_TReg32:%vreg0 %vreg4<def> = MOV 1, 0, 0, 0, %vreg3, 1, 0, 0, 1, pred:%PRED_SEL_OFF, 0; R600_Reg32:%vreg4 R600_TReg32:%vreg3 %vreg5<def> = MOV 1, 0, 0, 0, %ALU_LITERAL_X, 0, 0, 0, 1, pred:%PRED_SEL_OFF, 0; R600_Reg32:%vreg5 %vreg6<def> = ADD 0, 0, 1, 0, 0, 0, %vreg4<kill>, 0, 0, 0, %vreg5, 0, 0, 0, 1, pred:%PRED_SEL_OFF, 0; R600_Reg32:%vreg6,%vreg4,%vreg5 %vreg7<def> = MOV 1, 0, 0, 0, %vreg2, 1, 0, 0, 1, pred:%PRED_SEL_OFF, 0; R600_Reg32:%vreg7 R600_TReg32:%vreg2 %vreg8<def> = ADD 0, 0, 1, 0, 0, 0, %vreg7<kill>, 0, 0, 0, %vreg5, 0, 0, 0, 1, pred:%PRED_SEL_OFF, 0; R600_Reg32:%vreg8,%vreg7,%vreg5 %vreg9<def,tied1> = INSERT_SUBREG %vreg10<undef,tied0>, %vreg6<kill>, sel_x; R600_Reg128:%vreg9,%vreg10 R600_Reg32:%vreg6 %vreg11<def,tied1> = INSERT_SUBREG %vreg9<tied0>, %vreg8<kill>, sel_y; R600_Reg128:%vreg11,%vreg9 R600_Reg32:%vreg8 %vreg12<def,tied1> = INSERT_SUBREG %vreg11<tied0>, %vreg13<undef>, sel_z; R600_Reg128:%vreg12,%vreg11 R600_Reg32:%vreg13 %vreg14<def,tied1> = INSERT_SUBREG %vreg12<tied0>, %vreg15<undef>, sel_w; R600_Reg128:%vreg14,%vreg12 R600_Reg32:%vreg15 %T2_X<def> = COPY %vreg3; R600_TReg32:%vreg3 %vreg16<def> = COPY %vreg14:sel_x; R600_Reg32:%vreg16 R600_Reg128:%vreg14 %T2_Y<def> = COPY %vreg2; R600_TReg32:%vreg2 %vreg17<def> = COPY %vreg14:sel_y; R600_Reg32:%vreg17 R600_Reg128:%vreg14 %T2_Z<def> = COPY %vreg16; R600_Reg32:%vreg16 %T2_W<def> = COPY %vreg17; R600_Reg32:%vreg17 %T1_X<def> = COPY %vreg3; R600_TReg32:%vreg3 %T1_Y<def> = COPY %vreg2; R600_TReg32:%vreg2 %T1_Z<def> = COPY %vreg1; R600_TReg32:%vreg1 %T1_W<def> = COPY %vreg0; R600_TReg32:%vreg0 RETURN # End machine code for function main. # *** IR Dump Before Live Variable Analysis ***: # Machine code for function main: SSA Function Live Ins: %T1_W in %vreg0, %T1_Z in %vreg1, %T1_Y in %vreg2, %T1_X in %vreg3 Function Live Outs: %T1_W %T1_Z %T1_Y %T1_X %T2_W %T2_Z %T2_Y %T2_X BB#0: derived from LLVM BB %0 Live Ins: %T1_W %T1_Z %T1_Y %T1_X %vreg3<def> = COPY %T1_X; R600_TReg32:%vreg3 %vreg2<def> = COPY %T1_Y; R600_TReg32:%vreg2 %vreg1<def> = COPY %T1_Z; R600_TReg32:%vreg1 %vreg0<def> = COPY %T1_W; R600_TReg32:%vreg0 %vreg4<def> = MOV 1, 0, 0, 0, %vreg3, 1, 0, 0, 1, pred:%PRED_SEL_OFF, 0; R600_Reg32:%vreg4 R600_TReg32:%vreg3 %vreg5<def> = MOV 1, 0, 0, 0, %ALU_LITERAL_X, 0, 0, 0, 1, pred:%PRED_SEL_OFF, 0; R600_Reg32:%vreg5 %vreg6<def> = ADD 0, 0, 1, 0, 0, 0, %vreg4<kill>, 0, 0, 0, %vreg5, 0, 0, 0, 1, pred:%PRED_SEL_OFF, 0; R600_Reg32:%vreg6,%vreg4,%vreg5 %vreg7<def> = MOV 1, 0, 0, 0, %vreg2, 1, 0, 0, 1, pred:%PRED_SEL_OFF, 0; R600_Reg32:%vreg7 R600_TReg32:%vreg2 %vreg8<def> = ADD 0, 0, 1, 0, 0, 0, %vreg7<kill>, 0, 0, 0, %vreg5, 0, 0, 0, 1, pred:%PRED_SEL_OFF, 0; R600_Reg32:%vreg8,%vreg7,%vreg5 %vreg9<def,tied1> = INSERT_SUBREG %vreg10<undef,tied0>, %vreg6<kill>, sel_x; R600_Reg128:%vreg9,%vreg10 R600_Reg32:%vreg6 %vreg11<def,tied1> = INSERT_SUBREG %vreg9<tied0>, %vreg8<kill>, sel_y; R600_Reg128:%vreg11,%vreg9 R600_Reg32:%vreg8 %vreg12<def,tied1> = INSERT_SUBREG %vreg11<tied0>, %vreg13<undef>, sel_z; R600_Reg128:%vreg12,%vreg11 R600_Reg32:%vreg13 %vreg14<def,tied1> = INSERT_SUBREG %vreg12<tied0>, %vreg15<undef>, sel_w; R600_Reg128:%vreg14,%vreg12 R600_Reg32:%vreg15 %T2_X<def> = COPY %vreg3; R600_TReg32:%vreg3 %vreg16<def> = COPY %vreg14:sel_x; R600_Reg32:%vreg16 R600_Reg128:%vreg14 %T2_Y<def> = COPY %vreg2; R600_TReg32:%vreg2 %vreg17<def> = COPY %vreg14:sel_y; R600_Reg32:%vreg17 R600_Reg128:%vreg14 %T2_Z<def> = COPY %vreg16; R600_Reg32:%vreg16 %T2_W<def> = COPY %vreg17; R600_Reg32:%vreg17 %T1_X<def> = COPY %vreg3; R600_TReg32:%vreg3 %T1_Y<def> = COPY %vreg2; R600_TReg32:%vreg2 %T1_Z<def> = COPY %vreg1; R600_TReg32:%vreg1 %T1_W<def> = COPY %vreg0; R600_TReg32:%vreg0 RETURN # End machine code for function main. # *** IR Dump Before Eliminate PHI nodes for register allocation ***: # Machine code for function main: SSA Function Live Ins: %T1_W in %vreg0, %T1_Z in %vreg1, %T1_Y in %vreg2, %T1_X in %vreg3 Function Live Outs: %T1_W %T1_Z %T1_Y %T1_X %T2_W %T2_Z %T2_Y %T2_X BB#0: derived from LLVM BB %0 Live Ins: %T1_W %T1_Z %T1_Y %T1_X %vreg3<def> = COPY %T1_X<kill>; R600_TReg32:%vreg3 %vreg2<def> = COPY %T1_Y<kill>; R600_TReg32:%vreg2 %vreg1<def> = COPY %T1_Z<kill>; R600_TReg32:%vreg1 %vreg0<def> = COPY %T1_W<kill>; R600_TReg32:%vreg0 %vreg4<def> = MOV 1, 0, 0, 0, %vreg3, 1, 0, 0, 1, pred:%PRED_SEL_OFF, 0; R600_Reg32:%vreg4 R600_TReg32:%vreg3 %vreg5<def> = MOV 1, 0, 0, 0, %ALU_LITERAL_X, 0, 0, 0, 1, pred:%PRED_SEL_OFF, 0; R600_Reg32:%vreg5 %vreg6<def> = ADD 0, 0, 1, 0, 0, 0, %vreg4<kill>, 0, 0, 0, %vreg5, 0, 0, 0, 1, pred:%PRED_SEL_OFF, 0; R600_Reg32:%vreg6,%vreg4,%vreg5 %vreg7<def> = MOV 1, 0, 0, 0, %vreg2, 1, 0, 0, 1, pred:%PRED_SEL_OFF, 0; R600_Reg32:%vreg7 R600_TReg32:%vreg2 %vreg8<def> = ADD 0, 0, 1, 0, 0, 0, %vreg7<kill>, 0, 0, 0, %vreg5<kill>, 0, 0, 0, 1, pred:%PRED_SEL_OFF, 0; R600_Reg32:%vreg8,%vreg7,%vreg5 %vreg9<def,tied1> = INSERT_SUBREG %vreg10<undef,tied0>, %vreg6<kill>, sel_x; R600_Reg128:%vreg9,%vreg10 R600_Reg32:%vreg6 %vreg11<def,tied1> = INSERT_SUBREG %vreg9<kill,tied0>, %vreg8<kill>, sel_y; R600_Reg128:%vreg11,%vreg9 R600_Reg32:%vreg8 %vreg12<def,tied1> = INSERT_SUBREG %vreg11<kill,tied0>, %vreg13<undef>, sel_z; R600_Reg128:%vreg12,%vreg11 R600_Reg32:%vreg13 %vreg14<def,tied1> = INSERT_SUBREG %vreg12<kill,tied0>, %vreg15<undef>, sel_w; R600_Reg128:%vreg14,%vreg12 R600_Reg32:%vreg15 %T2_X<def> = COPY %vreg3; R600_TReg32:%vreg3 %vreg16<def> = COPY %vreg14:sel_x; R600_Reg32:%vreg16 R600_Reg128:%vreg14 %T2_Y<def> = COPY %vreg2; R600_TReg32:%vreg2 %vreg17<def> = COPY %vreg14:sel_y<kill>; R600_Reg32:%vreg17 R600_Reg128:%vreg14 %T2_Z<def> = COPY %vreg16<kill>; R600_Reg32:%vreg16 %T2_W<def> = COPY %vreg17<kill>; R600_Reg32:%vreg17 %T1_X<def> = COPY %vreg3<kill>; R600_TReg32:%vreg3 %T1_Y<def> = COPY %vreg2<kill>; R600_TReg32:%vreg2 %T1_Z<def> = COPY %vreg1<kill>; R600_TReg32:%vreg1 %T1_W<def> = COPY %vreg0<kill>; R600_TReg32:%vreg0 RETURN %T1_W<imp-use,kill>, %T1_Z<imp-use,kill>, %T1_Y<imp-use,kill>, %T1_X<imp-use,kill>, %T2_W<imp-use,kill>, %T2_Z<imp-use,kill>, %T2_Y<imp-use,kill>, %T2_X<imp-use,kill> # End machine code for function main. # *** IR Dump Before Two-Address instruction pass ***: # Machine code for function main: Post SSA Function Live Ins: %T1_W in %vreg0, %T1_Z in %vreg1, %T1_Y in %vreg2, %T1_X in %vreg3 Function Live Outs: %T1_W %T1_Z %T1_Y %T1_X %T2_W %T2_Z %T2_Y %T2_X BB#0: derived from LLVM BB %0 Live Ins: %T1_W %T1_Z %T1_Y %T1_X %vreg3<def> = COPY %T1_X<kill>; R600_TReg32:%vreg3 %vreg2<def> = COPY %T1_Y<kill>; R600_TReg32:%vreg2 %vreg1<def> = COPY %T1_Z<kill>; R600_TReg32:%vreg1 %vreg0<def> = COPY %T1_W<kill>; R600_TReg32:%vreg0 %vreg4<def> = MOV 1, 0, 0, 0, %vreg3, 1, 0, 0, 1, pred:%PRED_SEL_OFF, 0; R600_Reg32:%vreg4 R600_TReg32:%vreg3 %vreg5<def> = MOV 1, 0, 0, 0, %ALU_LITERAL_X, 0, 0, 0, 1, pred:%PRED_SEL_OFF, 0; R600_Reg32:%vreg5 %vreg6<def> = ADD 0, 0, 1, 0, 0, 0, %vreg4<kill>, 0, 0, 0, %vreg5, 0, 0, 0, 1, pred:%PRED_SEL_OFF, 0; R600_Reg32:%vreg6,%vreg4,%vreg5 %vreg7<def> = MOV 1, 0, 0, 0, %vreg2, 1, 0, 0, 1, pred:%PRED_SEL_OFF, 0; R600_Reg32:%vreg7 R600_TReg32:%vreg2 %vreg8<def> = ADD 0, 0, 1, 0, 0, 0, %vreg7<kill>, 0, 0, 0, %vreg5<kill>, 0, 0, 0, 1, pred:%PRED_SEL_OFF, 0; R600_Reg32:%vreg8,%vreg7,%vreg5 %vreg9<def,tied1> = INSERT_SUBREG %vreg10<undef,tied0>, %vreg6<kill>, sel_x; R600_Reg128:%vreg9,%vreg10 R600_Reg32:%vreg6 %vreg11<def,tied1> = INSERT_SUBREG %vreg9<kill,tied0>, %vreg8<kill>, sel_y; R600_Reg128:%vreg11,%vreg9 R600_Reg32:%vreg8 %vreg12<def,tied1> = INSERT_SUBREG %vreg11<kill,tied0>, %vreg13<undef>, sel_z; R600_Reg128:%vreg12,%vreg11 R600_Reg32:%vreg13 %vreg14<def,tied1> = INSERT_SUBREG %vreg12<kill,tied0>, %vreg15<undef>, sel_w; R600_Reg128:%vreg14,%vreg12 R600_Reg32:%vreg15 %T2_X<def> = COPY %vreg3; R600_TReg32:%vreg3 %vreg16<def> = COPY %vreg14:sel_x; R600_Reg32:%vreg16 R600_Reg128:%vreg14 %T2_Y<def> = COPY %vreg2; R600_TReg32:%vreg2 %vreg17<def> = COPY %vreg14:sel_y<kill>; R600_Reg32:%vreg17 R600_Reg128:%vreg14 %T2_Z<def> = COPY %vreg16<kill>; R600_Reg32:%vreg16 %T2_W<def> = COPY %vreg17<kill>; R600_Reg32:%vreg17 %T1_X<def> = COPY %vreg3<kill>; R600_TReg32:%vreg3 %T1_Y<def> = COPY %vreg2<kill>; R600_TReg32:%vreg2 %T1_Z<def> = COPY %vreg1<kill>; R600_TReg32:%vreg1 %T1_W<def> = COPY %vreg0<kill>; R600_TReg32:%vreg0 RETURN %T1_W<imp-use,kill>, %T1_Z<imp-use,kill>, %T1_Y<imp-use,kill>, %T1_X<imp-use,kill>, %T2_W<imp-use,kill>, %T2_Z<imp-use,kill>, %T2_Y<imp-use,kill>, %T2_X<imp-use,kill> # End machine code for function main. # *** IR Dump Before Slot index numbering ***: # Machine code for function main: Post SSA Function Live Ins: %T1_W in %vreg0, %T1_Z in %vreg1, %T1_Y in %vreg2, %T1_X in %vreg3 Function Live Outs: %T1_W %T1_Z %T1_Y %T1_X %T2_W %T2_Z %T2_Y %T2_X BB#0: derived from LLVM BB %0 Live Ins: %T1_W %T1_Z %T1_Y %T1_X %vreg3<def> = COPY %T1_X<kill>; R600_TReg32:%vreg3 %vreg2<def> = COPY %T1_Y<kill>; R600_TReg32:%vreg2 %vreg1<def> = COPY %T1_Z<kill>; R600_TReg32:%vreg1 %vreg0<def> = COPY %T1_W<kill>; R600_TReg32:%vreg0 %vreg4<def> = MOV 1, 0, 0, 0, %vreg3, 1, 0, 0, 1, pred:%PRED_SEL_OFF, 0; R600_Reg32:%vreg4 R600_TReg32:%vreg3 %vreg5<def> = MOV 1, 0, 0, 0, %ALU_LITERAL_X, 0, 0, 0, 1, pred:%PRED_SEL_OFF, 0; R600_Reg32:%vreg5 %vreg6<def> = ADD 0, 0, 1, 0, 0, 0, %vreg4<kill>, 0, 0, 0, %vreg5, 0, 0, 0, 1, pred:%PRED_SEL_OFF, 0; R600_Reg32:%vreg6,%vreg4,%vreg5 %vreg7<def> = MOV 1, 0, 0, 0, %vreg2, 1, 0, 0, 1, pred:%PRED_SEL_OFF, 0; R600_Reg32:%vreg7 R600_TReg32:%vreg2 %vreg8<def> = ADD 0, 0, 1, 0, 0, 0, %vreg7<kill>, 0, 0, 0, %vreg5<kill>, 0, 0, 0, 1, pred:%PRED_SEL_OFF, 0; R600_Reg32:%vreg8,%vreg7,%vreg5 %vreg9:sel_x<def,read-undef> = COPY %vreg6<kill>; R600_Reg128:%vreg9 R600_Reg32:%vreg6 %vreg11<def> = COPY %vreg9<kill>; R600_Reg128:%vreg11,%vreg9 %vreg11:sel_y<def> = COPY %vreg8<kill>; R600_Reg128:%vreg11 R600_Reg32:%vreg8 %vreg12<def> = COPY %vreg11<kill>; R600_Reg128:%vreg12,%vreg11 %vreg12:sel_z<def> = COPY %vreg13<undef>; R600_Reg128:%vreg12 R600_Reg32:%vreg13 %vreg14<def> = COPY %vreg12<kill>; R600_Reg128:%vreg14,%vreg12 %vreg14:sel_w<def> = COPY %vreg15<undef>; R600_Reg128:%vreg14 R600_Reg32:%vreg15 %T2_X<def> = COPY %vreg3; R600_TReg32:%vreg3 %vreg16<def> = COPY %vreg14:sel_x; R600_Reg32:%vreg16 R600_Reg128:%vreg14 %T2_Y<def> = COPY %vreg2; R600_TReg32:%vreg2 %vreg17<def> = COPY %vreg14:sel_y<kill>; R600_Reg32:%vreg17 R600_Reg128:%vreg14 %T2_Z<def> = COPY %vreg16<kill>; R600_Reg32:%vreg16 %T2_W<def> = COPY %vreg17<kill>; R600_Reg32:%vreg17 %T1_X<def> = COPY %vreg3<kill>; R600_TReg32:%vreg3 %T1_Y<def> = COPY %vreg2<kill>; R600_TReg32:%vreg2 %T1_Z<def> = COPY %vreg1<kill>; R600_TReg32:%vreg1 %T1_W<def> = COPY %vreg0<kill>; R600_TReg32:%vreg0 RETURN %T1_W<imp-use,kill>, %T1_Z<imp-use,kill>, %T1_Y<imp-use,kill>, %T1_X<imp-use,kill>, %T2_W<imp-use,kill>, %T2_Z<imp-use,kill>, %T2_Y<imp-use,kill>, %T2_X<imp-use,kill> # End machine code for function main. # *** IR Dump Before Live Interval Analysis ***: # Machine code for function main: Post SSA Function Live Ins: %T1_W in %vreg0, %T1_Z in %vreg1, %T1_Y in %vreg2, %T1_X in %vreg3 Function Live Outs: %T1_W %T1_Z %T1_Y %T1_X %T2_W %T2_Z %T2_Y %T2_X 0BBB#0: derived from LLVM BB %0 Live Ins: %T1_W %T1_Z %T1_Y %T1_X 16B%vreg3<def> = COPY %T1_X<kill>; R600_TReg32:%vreg3 32B%vreg2<def> = COPY %T1_Y<kill>; R600_TReg32:%vreg2 48B%vreg1<def> = COPY %T1_Z<kill>; R600_TReg32:%vreg1 64B%vreg0<def> = COPY %T1_W<kill>; R600_TReg32:%vreg0 80B%vreg4<def> = MOV 1, 0, 0, 0, %vreg3, 1, 0, 0, 1, pred:%PRED_SEL_OFF, 0; R600_Reg32:%vreg4 R600_TReg32:%vreg3 96B%vreg5<def> = MOV 1, 0, 0, 0, %ALU_LITERAL_X, 0, 0, 0, 1, pred:%PRED_SEL_OFF, 0; R600_Reg32:%vreg5 112B%vreg6<def> = ADD 0, 0, 1, 0, 0, 0, %vreg4<kill>, 0, 0, 0, %vreg5, 0, 0, 0, 1, pred:%PRED_SEL_OFF, 0; R600_Reg32:%vreg6,%vreg4,%vreg5 128B%vreg7<def> = MOV 1, 0, 0, 0, %vreg2, 1, 0, 0, 1, pred:%PRED_SEL_OFF, 0; R600_Reg32:%vreg7 R600_TReg32:%vreg2 144B%vreg8<def> = ADD 0, 0, 1, 0, 0, 0, %vreg7<kill>, 0, 0, 0, %vreg5<kill>, 0, 0, 0, 1, pred:%PRED_SEL_OFF, 0; R600_Reg32:%vreg8,%vreg7,%vreg5 160B%vreg9:sel_x<def,read-undef> = COPY %vreg6<kill>; R600_Reg128:%vreg9 R600_Reg32:%vreg6 176B%vreg11<def> = COPY %vreg9<kill>; R600_Reg128:%vreg11,%vreg9 192B%vreg11:sel_y<def> = COPY %vreg8<kill>; R600_Reg128:%vreg11 R600_Reg32:%vreg8 208B%vreg12<def> = COPY %vreg11<kill>; R600_Reg128:%vreg12,%vreg11 224B%vreg12:sel_z<def> = COPY %vreg13<undef>; R600_Reg128:%vreg12 R600_Reg32:%vreg13 240B%vreg14<def> = COPY %vreg12<kill>; R600_Reg128:%vreg14,%vreg12 256B%vreg14:sel_w<def> = COPY %vreg15<undef>; R600_Reg128:%vreg14 R600_Reg32:%vreg15 272B%T2_X<def> = COPY %vreg3; R600_TReg32:%vreg3 288B%vreg16<def> = COPY %vreg14:sel_x; R600_Reg32:%vreg16 R600_Reg128:%vreg14 304B%T2_Y<def> = COPY %vreg2; R600_TReg32:%vreg2 320B%vreg17<def> = COPY %vreg14:sel_y<kill>; R600_Reg32:%vreg17 R600_Reg128:%vreg14 336B%T2_Z<def> = COPY %vreg16<kill>; R600_Reg32:%vreg16 352B%T2_W<def> = COPY %vreg17<kill>; R600_Reg32:%vreg17 368B%T1_X<def> = COPY %vreg3<kill>; R600_TReg32:%vreg3 384B%T1_Y<def> = COPY %vreg2<kill>; R600_TReg32:%vreg2 400B%T1_Z<def> = COPY %vreg1<kill>; R600_TReg32:%vreg1 416B%T1_W<def> = COPY %vreg0<kill>; R600_TReg32:%vreg0 432BRETURN %T1_W<imp-use,kill>, %T1_Z<imp-use,kill>, %T1_Y<imp-use,kill>, %T1_X<imp-use,kill>, %T2_W<imp-use,kill>, %T2_Z<imp-use,kill>, %T2_Y<imp-use,kill>, %T2_X<imp-use,kill> # End machine code for function main. ********** COMPUTING LIVE INTERVALS ********** ********** Function: main BB#0:# derived from 16B%vreg3<def> = COPY %T1_X<kill>; R600_TReg32:%vreg3 register: %vreg3 +[16r,368r:0) 32B%vreg2<def> = COPY %T1_Y<kill>; R600_TReg32:%vreg2 register: %vreg2 +[32r,384r:0) 48B%vreg1<def> = COPY %T1_Z<kill>; R600_TReg32:%vreg1 register: %vreg1 +[48r,400r:0) 64B%vreg0<def> = COPY %T1_W<kill>; R600_TReg32:%vreg0 register: %vreg0 +[64r,416r:0) 80B%vreg4<def> = MOV 1, 0, 0, 0, %vreg3, 1, 0, 0, 1, pred:%PRED_SEL_OFF, 0; R600_Reg32:%vreg4 R600_TReg32:%vreg3 register: %vreg4 +[80r,112r:0) 96B%vreg5<def> = MOV 1, 0, 0, 0, %ALU_LITERAL_X, 0, 0, 0, 1, pred:%PRED_SEL_OFF, 0; R600_Reg32:%vreg5 register: %vreg5 +[96r,144r:0) 112B%vreg6<def> = ADD 0, 0, 1, 0, 0, 0, %vreg4<kill>, 0, 0, 0, %vreg5, 0, 0, 0, 1, pred:%PRED_SEL_OFF, 0; R600_Reg32:%vreg6,%vreg4,%vreg5 register: %vreg6 +[112r,160r:0) 128B%vreg7<def> = MOV 1, 0, 0, 0, %vreg2, 1, 0, 0, 1, pred:%PRED_SEL_OFF, 0; R600_Reg32:%vreg7 R600_TReg32:%vreg2 register: %vreg7 +[128r,144r:0) 144B%vreg8<def> = ADD 0, 0, 1, 0, 0, 0, %vreg7<kill>, 0, 0, 0, %vreg5<kill>, 0, 0, 0, 1, pred:%PRED_SEL_OFF, 0; R600_Reg32:%vreg8,%vreg7,%vreg5 register: %vreg8 +[144r,192r:0) 160B%vreg9:sel_x<def,read-undef> = COPY %vreg6<kill>; R600_Reg128:%vreg9 R600_Reg32:%vreg6 register: %vreg9 +[160r,176r:0) 176B%vreg11<def> = COPY %vreg9<kill>; R600_Reg128:%vreg11,%vreg9 register: %vreg11 +[176r,208r:0) 192B%vreg11:sel_y<def> = COPY %vreg8<kill>; R600_Reg128:%vreg11 R600_Reg32:%vreg8 register: %vreg11 replace range with [176r,192r:1) RESULT: [176r,192r:1)[192r,208r:0) 0 at 192r 1 at 176r 208B%vreg12<def> = COPY %vreg11<kill>; R600_Reg128:%vreg12,%vreg11 register: %vreg12 +[208r,240r:0) 224B%vreg12:sel_z<def> = COPY %vreg13<undef>; R600_Reg128:%vreg12 R600_Reg32:%vreg13 register: %vreg12 replace range with [208r,224r:1) RESULT: [208r,224r:1)[224r,240r:0) 0 at 224r 1 at 208r 240B%vreg14<def> = COPY %vreg12<kill>; R600_Reg128:%vreg14,%vreg12 register: %vreg14 +[240r,320r:0) 256B%vreg14:sel_w<def> = COPY %vreg15<undef>; R600_Reg128:%vreg14 R600_Reg32:%vreg15 register: %vreg14 replace range with [240r,256r:1) RESULT: [240r,256r:1)[256r,320r:0) 0 at 256r 1 at 240r 272B%T2_X<def> = COPY %vreg3; R600_TReg32:%vreg3 288B%vreg16<def> = COPY %vreg14:sel_x; R600_Reg32:%vreg16 R600_Reg128:%vreg14 register: %vreg16 +[288r,336r:0) 304B%T2_Y<def> = COPY %vreg2; R600_TReg32:%vreg2 320B%vreg17<def> = COPY %vreg14:sel_y<kill>; R600_Reg32:%vreg17 R600_Reg128:%vreg14 register: %vreg17 +[320r,352r:0) 336B%T2_Z<def> = COPY %vreg16<kill>; R600_Reg32:%vreg16 352B%T2_W<def> = COPY %vreg17<kill>; R600_Reg32:%vreg17 368B%T1_X<def> = COPY %vreg3<kill>; R600_TReg32:%vreg3 384B%T1_Y<def> = COPY %vreg2<kill>; R600_TReg32:%vreg2 400B%T1_Z<def> = COPY %vreg1<kill>; R600_TReg32:%vreg1 416B%T1_W<def> = COPY %vreg0<kill>; R600_TReg32:%vreg0 432BRETURN %T1_W<imp-use,kill>, %T1_Z<imp-use,kill>, %T1_Y<imp-use,kill>, %T1_X<imp-use,kill>, %T2_W<imp-use,kill>, %T2_Z<imp-use,kill>, %T2_Y<imp-use,kill>, %T2_X<imp-use,kill> Computing live-in reg-units in ABI blocks. 0BBB#0 T1_W#0 T1_Z#0 T1_Y#0 T1_X#0 Created 4 new intervals. ********** INTERVALS ********** T1_W = [0B,64r:0)[416r,432r:1) 0 at 0B-phi 1 at 416r T1_X = [0B,16r:0)[368r,432r:1) 0 at 0B-phi 1 at 368r T1_Y = [0B,32r:0)[384r,432r:1) 0 at 0B-phi 1 at 384r T1_Z = [0B,48r:0)[400r,432r:1) 0 at 0B-phi 1 at 400r %vreg0 = [64r,416r:0) 0 at 64r %vreg1 = [48r,400r:0) 0 at 48r %vreg2 = [32r,384r:0) 0 at 32r %vreg3 = [16r,368r:0) 0 at 16r %vreg4 = [80r,112r:0) 0 at 80r %vreg5 = [96r,144r:0) 0 at 96r %vreg6 = [112r,160r:0) 0 at 112r %vreg7 = [128r,144r:0) 0 at 128r %vreg8 = [144r,192r:0) 0 at 144r %vreg9 = [160r,176r:0) 0 at 160r %vreg11 = [176r,192r:1)[192r,208r:0) 0 at 192r 1 at 176r %vreg12 = [208r,224r:1)[224r,240r:0) 0 at 224r 1 at 208r %vreg13 = EMPTY %vreg14 = [240r,256r:1)[256r,320r:0) 0 at 256r 1 at 240r %vreg15 = EMPTY %vreg16 = [288r,336r:0) 0 at 288r %vreg17 = [320r,352r:0) 0 at 320r ********** MACHINEINSTRS ********** # Machine code for function main: Post SSA Function Live Ins: %T1_W in %vreg0, %T1_Z in %vreg1, %T1_Y in %vreg2, %T1_X in %vreg3 Function Live Outs: %T1_W %T1_Z %T1_Y %T1_X %T2_W %T2_Z %T2_Y %T2_X 0BBB#0: derived from LLVM BB %0 Live Ins: %T1_W %T1_Z %T1_Y %T1_X 16B%vreg3<def> = COPY %T1_X; R600_TReg32:%vreg3 32B%vreg2<def> = COPY %T1_Y; R600_TReg32:%vreg2 48B%vreg1<def> = COPY %T1_Z; R600_TReg32:%vreg1 64B%vreg0<def> = COPY %T1_W; R600_TReg32:%vreg0 80B%vreg4<def> = MOV 1, 0, 0, 0, %vreg3, 1, 0, 0, 1, pred:%PRED_SEL_OFF, 0; R600_Reg32:%vreg4 R600_TReg32:%vreg3 96B%vreg5<def> = MOV 1, 0, 0, 0, %ALU_LITERAL_X, 0, 0, 0, 1, pred:%PRED_SEL_OFF, 0; R600_Reg32:%vreg5 112B%vreg6<def> = ADD 0, 0, 1, 0, 0, 0, %vreg4<kill>, 0, 0, 0, %vreg5, 0, 0, 0, 1, pred:%PRED_SEL_OFF, 0; R600_Reg32:%vreg6,%vreg4,%vreg5 128B%vreg7<def> = MOV 1, 0, 0, 0, %vreg2, 1, 0, 0, 1, pred:%PRED_SEL_OFF, 0; R600_Reg32:%vreg7 R600_TReg32:%vreg2 144B%vreg8<def> = ADD 0, 0, 1, 0, 0, 0, %vreg7<kill>, 0, 0, 0, %vreg5<kill>, 0, 0, 0, 1, pred:%PRED_SEL_OFF, 0; R600_Reg32:%vreg8,%vreg7,%vreg5 160B%vreg9:sel_x<def,read-undef> = COPY %vreg6<kill>; R600_Reg128:%vreg9 R600_Reg32:%vreg6 176B%vreg11<def> = COPY %vreg9<kill>; R600_Reg128:%vreg11,%vreg9 192B%vreg11:sel_y<def> = COPY %vreg8<kill>; R600_Reg128:%vreg11 R600_Reg32:%vreg8 208B%vreg12<def> = COPY %vreg11<kill>; R600_Reg128:%vreg12,%vreg11 224B%vreg12:sel_z<def> = COPY %vreg13<undef>; R600_Reg128:%vreg12 R600_Reg32:%vreg13 240B%vreg14<def> = COPY %vreg12<kill>; R600_Reg128:%vreg14,%vreg12 256B%vreg14:sel_w<def> = COPY %vreg15<undef>; R600_Reg128:%vreg14 R600_Reg32:%vreg15 272B%T2_X<def> = COPY %vreg3; R600_TReg32:%vreg3 288B%vreg16<def> = COPY %vreg14:sel_x; R600_Reg32:%vreg16 R600_Reg128:%vreg14 304B%T2_Y<def> = COPY %vreg2; R600_TReg32:%vreg2 320B%vreg17<def> = COPY %vreg14:sel_y<kill>; R600_Reg32:%vreg17 R600_Reg128:%vreg14 336B%T2_Z<def> = COPY %vreg16<kill>; R600_Reg32:%vreg16 352B%T2_W<def> = COPY %vreg17<kill>; R600_Reg32:%vreg17 368B%T1_X<def> = COPY %vreg3<kill>; R600_TReg32:%vreg3 384B%T1_Y<def> = COPY %vreg2<kill>; R600_TReg32:%vreg2 400B%T1_Z<def> = COPY %vreg1<kill>; R600_TReg32:%vreg1 416B%T1_W<def> = COPY %vreg0<kill>; R600_TReg32:%vreg0 432BRETURN %T1_W<imp-use>, %T1_Z<imp-use>, %T1_Y<imp-use>, %T1_X<imp-use>, %T2_W<imp-use,kill>, %T2_Z<imp-use,kill>, %T2_Y<imp-use,kill>, %T2_X<imp-use,kill> # End machine code for function main. # *** IR Dump Before Debug Variable Analysis ***: # Machine code for function main: Post SSA Function Live Ins: %T1_W in %vreg0, %T1_Z in %vreg1, %T1_Y in %vreg2, %T1_X in %vreg3 Function Live Outs: %T1_W %T1_Z %T1_Y %T1_X %T2_W %T2_Z %T2_Y %T2_X 0BBB#0: derived from LLVM BB %0 Live Ins: %T1_W %T1_Z %T1_Y %T1_X 16B%vreg3<def> = COPY %T1_X; R600_TReg32:%vreg3 32B%vreg2<def> = COPY %T1_Y; R600_TReg32:%vreg2 48B%vreg1<def> = COPY %T1_Z; R600_TReg32:%vreg1 64B%vreg0<def> = COPY %T1_W; R600_TReg32:%vreg0 80B%vreg4<def> = MOV 1, 0, 0, 0, %vreg3, 1, 0, 0, 1, pred:%PRED_SEL_OFF, 0; R600_Reg32:%vreg4 R600_TReg32:%vreg3 96B%vreg5<def> = MOV 1, 0, 0, 0, %ALU_LITERAL_X, 0, 0, 0, 1, pred:%PRED_SEL_OFF, 0; R600_Reg32:%vreg5 112B%vreg6<def> = ADD 0, 0, 1, 0, 0, 0, %vreg4<kill>, 0, 0, 0, %vreg5, 0, 0, 0, 1, pred:%PRED_SEL_OFF, 0; R600_Reg32:%vreg6,%vreg4,%vreg5 128B%vreg7<def> = MOV 1, 0, 0, 0, %vreg2, 1, 0, 0, 1, pred:%PRED_SEL_OFF, 0; R600_Reg32:%vreg7 R600_TReg32:%vreg2 144B%vreg8<def> = ADD 0, 0, 1, 0, 0, 0, %vreg7<kill>, 0, 0, 0, %vreg5<kill>, 0, 0, 0, 1, pred:%PRED_SEL_OFF, 0; R600_Reg32:%vreg8,%vreg7,%vreg5 160B%vreg9:sel_x<def,read-undef> = COPY %vreg6<kill>; R600_Reg128:%vreg9 R600_Reg32:%vreg6 176B%vreg11<def> = COPY %vreg9<kill>; R600_Reg128:%vreg11,%vreg9 192B%vreg11:sel_y<def> = COPY %vreg8<kill>; R600_Reg128:%vreg11 R600_Reg32:%vreg8 208B%vreg12<def> = COPY %vreg11<kill>; R600_Reg128:%vreg12,%vreg11 224B%vreg12:sel_z<def> = COPY %vreg13<undef>; R600_Reg128:%vreg12 R600_Reg32:%vreg13 240B%vreg14<def> = COPY %vreg12<kill>; R600_Reg128:%vreg14,%vreg12 256B%vreg14:sel_w<def> = COPY %vreg15<undef>; R600_Reg128:%vreg14 R600_Reg32:%vreg15 272B%T2_X<def> = COPY %vreg3; R600_TReg32:%vreg3 288B%vreg16<def> = COPY %vreg14:sel_x; R600_Reg32:%vreg16 R600_Reg128:%vreg14 304B%T2_Y<def> = COPY %vreg2; R600_TReg32:%vreg2 320B%vreg17<def> = COPY %vreg14:sel_y<kill>; R600_Reg32:%vreg17 R600_Reg128:%vreg14 336B%T2_Z<def> = COPY %vreg16<kill>; R600_Reg32:%vreg16 352B%T2_W<def> = COPY %vreg17<kill>; R600_Reg32:%vreg17 368B%T1_X<def> = COPY %vreg3<kill>; R600_TReg32:%vreg3 384B%T1_Y<def> = COPY %vreg2<kill>; R600_TReg32:%vreg2 400B%T1_Z<def> = COPY %vreg1<kill>; R600_TReg32:%vreg1 416B%T1_W<def> = COPY %vreg0<kill>; R600_TReg32:%vreg0 432BRETURN %T1_W<imp-use>, %T1_Z<imp-use>, %T1_Y<imp-use>, %T1_X<imp-use>, %T2_W<imp-use,kill>, %T2_Z<imp-use,kill>, %T2_Y<imp-use,kill>, %T2_X<imp-use,kill> # End machine code for function main. # *** IR Dump Before Simple Register Coalescing ***: # Machine code for function main: Post SSA Function Live Ins: %T1_W in %vreg0, %T1_Z in %vreg1, %T1_Y in %vreg2, %T1_X in %vreg3 Function Live Outs: %T1_W %T1_Z %T1_Y %T1_X %T2_W %T2_Z %T2_Y %T2_X 0BBB#0: derived from LLVM BB %0 Live Ins: %T1_W %T1_Z %T1_Y %T1_X 16B%vreg3<def> = COPY %T1_X; R600_TReg32:%vreg3 32B%vreg2<def> = COPY %T1_Y; R600_TReg32:%vreg2 48B%vreg1<def> = COPY %T1_Z; R600_TReg32:%vreg1 64B%vreg0<def> = COPY %T1_W; R600_TReg32:%vreg0 80B%vreg4<def> = MOV 1, 0, 0, 0, %vreg3, 1, 0, 0, 1, pred:%PRED_SEL_OFF, 0; R600_Reg32:%vreg4 R600_TReg32:%vreg3 96B%vreg5<def> = MOV 1, 0, 0, 0, %ALU_LITERAL_X, 0, 0, 0, 1, pred:%PRED_SEL_OFF, 0; R600_Reg32:%vreg5 112B%vreg6<def> = ADD 0, 0, 1, 0, 0, 0, %vreg4<kill>, 0, 0, 0, %vreg5, 0, 0, 0, 1, pred:%PRED_SEL_OFF, 0; R600_Reg32:%vreg6,%vreg4,%vreg5 128B%vreg7<def> = MOV 1, 0, 0, 0, %vreg2, 1, 0, 0, 1, pred:%PRED_SEL_OFF, 0; R600_Reg32:%vreg7 R600_TReg32:%vreg2 144B%vreg8<def> = ADD 0, 0, 1, 0, 0, 0, %vreg7<kill>, 0, 0, 0, %vreg5<kill>, 0, 0, 0, 1, pred:%PRED_SEL_OFF, 0; R600_Reg32:%vreg8,%vreg7,%vreg5 160B%vreg9:sel_x<def,read-undef> = COPY %vreg6<kill>; R600_Reg128:%vreg9 R600_Reg32:%vreg6 176B%vreg11<def> = COPY %vreg9<kill>; R600_Reg128:%vreg11,%vreg9 192B%vreg11:sel_y<def> = COPY %vreg8<kill>; R600_Reg128:%vreg11 R600_Reg32:%vreg8 208B%vreg12<def> = COPY %vreg11<kill>; R600_Reg128:%vreg12,%vreg11 224B%vreg12:sel_z<def> = COPY %vreg13<undef>; R600_Reg128:%vreg12 R600_Reg32:%vreg13 240B%vreg14<def> = COPY %vreg12<kill>; R600_Reg128:%vreg14,%vreg12 256B%vreg14:sel_w<def> = COPY %vreg15<undef>; R600_Reg128:%vreg14 R600_Reg32:%vreg15 272B%T2_X<def> = COPY %vreg3; R600_TReg32:%vreg3 288B%vreg16<def> = COPY %vreg14:sel_x; R600_Reg32:%vreg16 R600_Reg128:%vreg14 304B%T2_Y<def> = COPY %vreg2; R600_TReg32:%vreg2 320B%vreg17<def> = COPY %vreg14:sel_y<kill>; R600_Reg32:%vreg17 R600_Reg128:%vreg14 336B%T2_Z<def> = COPY %vreg16<kill>; R600_Reg32:%vreg16 352B%T2_W<def> = COPY %vreg17<kill>; R600_Reg32:%vreg17 368B%T1_X<def> = COPY %vreg3<kill>; R600_TReg32:%vreg3 384B%T1_Y<def> = COPY %vreg2<kill>; R600_TReg32:%vreg2 400B%T1_Z<def> = COPY %vreg1<kill>; R600_TReg32:%vreg1 416B%T1_W<def> = COPY %vreg0<kill>; R600_TReg32:%vreg0 432BRETURN %T1_W<imp-use>, %T1_Z<imp-use>, %T1_Y<imp-use>, %T1_X<imp-use>, %T2_W<imp-use,kill>, %T2_Z<imp-use,kill>, %T2_Y<imp-use,kill>, %T2_X<imp-use,kill> # End machine code for function main. ********** SIMPLE REGISTER COALESCING ********** ********** Function: main ********** JOINING INTERVALS *********** : 16B%vreg3<def> = COPY %T1_X; R600_TReg32:%vreg3 Considering merging %vreg3 with %T1_X Can only merge into reserved registers. 32B%vreg2<def> = COPY %T1_Y; R600_TReg32:%vreg2 Considering merging %vreg2 with %T1_Y Can only merge into reserved registers. 48B%vreg1<def> = COPY %T1_Z; R600_TReg32:%vreg1 Considering merging %vreg1 with %T1_Z Can only merge into reserved registers. 64B%vreg0<def> = COPY %T1_W; R600_TReg32:%vreg0 Considering merging %vreg0 with %T1_W Can only merge into reserved registers. 160B%vreg9:sel_x<def,read-undef> = COPY %vreg6<kill>; R600_Reg128:%vreg9 R600_Reg32:%vreg6 Considering merging to R600_Reg128 with %vreg6 in %vreg9:sel_x RHS = %vreg6 [112r,160r:0) 0 at 112r LHS = %vreg9 [160r,176r:0) 0 at 160r merge %vreg9:0 at 160r into %vreg6:0 at 112r --> @112r erased:160r%vreg9:sel_x<def,read-undef> = COPY %vreg6<kill>; R600_Reg128:%vreg9 R600_Reg32:%vreg6 AllocationOrder(R600_Reg128) = [ %T0_XYZW %T1_XYZW %T2_XYZW %T3_XYZW %T4_XYZW %T5_XYZW %T6_XYZW %T7_XYZW %T8_XYZW %T9_XYZW %T10_XYZW %T11_XYZW %T12_XYZW %T13_XYZW %T14_XYZW %T15_XYZW %T16_XYZW %T17_XYZW %T18_XYZW %T19_XYZW %T20_XYZW %T21_XYZW %T22_XYZW %T23_XYZW %T24_XYZW %T25_XYZW %T26_XYZW %T27_XYZW %T28_XYZW %T29_XYZW %T30_XYZW %T31_XYZW %T32_XYZW %T33_XYZW %T34_XYZW %T35_XYZW %T36_XYZW %T37_XYZW %T38_XYZW %T39_XYZW %T40_XYZW %T41_XYZW %T42_XYZW %T43_XYZW %T44_XYZW %T45_XYZW %T46_XYZW %T47_XYZW %T48_XYZW %T49_XYZW %T50_XYZW %T51_XYZW %T52_XYZW %T53_XYZW %T54_XYZW %T55_XYZW %T56_XYZW %T57_XYZW %T58_XYZW %T59_XYZW %T60_XYZW %T61_XYZW %T62_XYZW %T63_XYZW %T64_XYZW %T65_XYZW %T66_XYZW %T67_XYZW %T68_XYZW %T69_XYZW %T70_XYZW %T71_XYZW %T72_XYZW %T73_XYZW %T74_XYZW %T75_XYZW %T76_XYZW %T77_XYZW %T78_XYZW %T79_XYZW %T80_XYZW %T81_XYZW %T82_XYZW %T83_XYZW %T84_XYZW %T85_XYZW %T86_XYZW %T87_XYZW %T88_XYZW %T89_XYZW %T90_XYZW %T91_XYZW %T92_XYZW %T93_XYZW %T94_XYZW %T95_XYZW %T96_XYZW %T97_XYZW %T98_XYZW %T99_XYZW %T100_XYZW %T101_XYZW %T102_XYZW %T103_XYZW %T104_XYZW %T105_XYZW %T106_XYZW %T107_XYZW %T108_XYZW %T109_XYZW %T110_XYZW %T111_XYZW %T112_XYZW %T113_XYZW %T114_XYZW %T115_XYZW %T116_XYZW %T117_XYZW %T118_XYZW %T119_XYZW %T120_XYZW %T121_XYZW %T122_XYZW %T123_XYZW %T124_XYZW %T125_XYZW %T126_XYZW %T127_XYZW ] updated: 112B%vreg9:sel_x<def,read-undef> = ADD 0, 0, 1, 0, 0, 0, %vreg4<kill>, 0, 0, 0, %vreg5, 0, 0, 0, 1, pred:%PRED_SEL_OFF, 0; R600_Reg128:%vreg9 R600_Reg32:%vreg4,%vreg5 Joined. Result = %vreg9[112r,176r:0) 0 at 112r 176B%vreg11<def> = COPY %vreg9; R600_Reg128:%vreg11,%vreg9 Considering merging to R600_Reg128 with %vreg9 in %vreg11 RHS = %vreg9 [112r,176r:0) 0 at 112r LHS = %vreg11 [176r,192r:1)[192r,208r:0) 0 at 192r 1 at 176r merge %vreg11:1 at 176r into %vreg9:0 at 112r --> @112r erased:176r%vreg11<def> = COPY %vreg9; R600_Reg128:%vreg11,%vreg9 updated: 112B%vreg11:sel_x<def,read-undef> = ADD 0, 0, 1, 0, 0, 0, %vreg4<kill>, 0, 0, 0, %vreg5, 0, 0, 0, 1, pred:%PRED_SEL_OFF, 0; R600_Reg128:%vreg11 R600_Reg32:%vreg4,%vreg5 Joined. Result = %vreg11[112r,192r:0)[192r,208r:1) 0 at 112r 1 at 192r 192B%vreg11:sel_y<def> = COPY %vreg8<kill>; R600_Reg128:%vreg11 R600_Reg32:%vreg8 Considering merging to R600_Reg128 with %vreg8 in %vreg11:sel_y RHS = %vreg8 [144r,192r:0) 0 at 144r LHS = %vreg11 [112r,192r:0)[192r,208r:1) 0 at 112r 1 at 192r merge %vreg11:1 at 192r into %vreg8:0 at 144r --> @144r pruned %vreg11 at 144r: [112r,144r:0)[192r,208r:1) 0 at 112r 1 at 192r erased:192r%vreg11:sel_y<def> = COPY %vreg8<kill>; R600_Reg128:%vreg11 R600_Reg32:%vreg8 restoring liveness to 2 points: [112r,144r:0)[144r,208r:1) 0 at 112r 1 at 144r updated: 144B%vreg11:sel_y<def> = ADD 0, 0, 1, 0, 0, 0, %vreg7<kill>, 0, 0, 0, %vreg5<kill>, 0, 0, 0, 1, pred:%PRED_SEL_OFF, 0; R600_Reg128:%vreg11 R600_Reg32:%vreg7,%vreg5 Joined. Result = %vreg11[112r,144r:0)[144r,208r:1) 0 at 112r 1 at 144r 208B%vreg12<def> = COPY %vreg11; R600_Reg128:%vreg12,%vreg11 Considering merging to R600_Reg128 with %vreg11 in %vreg12 RHS = %vreg11 [112r,144r:0)[144r,208r:1) 0 at 112r 1 at 144r LHS = %vreg12 [208r,224r:1)[224r,240r:0) 0 at 224r 1 at 208r merge %vreg12:1 at 208r into %vreg11:1 at 144r --> @144r erased:208r%vreg12<def> = COPY %vreg11; R600_Reg128:%vreg12,%vreg11 updated: 144B%vreg12:sel_y<def> = ADD 0, 0, 1, 0, 0, 0, %vreg7<kill>, 0, 0, 0, %vreg5<kill>, 0, 0, 0, 1, pred:%PRED_SEL_OFF, 0; R600_Reg128:%vreg12 R600_Reg32:%vreg7,%vreg5 updated: 112B%vreg12:sel_x<def,read-undef> = ADD 0, 0, 1, 0, 0, 0, %vreg4<kill>, 0, 0, 0, %vreg5, 0, 0, 0, 1, pred:%PRED_SEL_OFF, 0; R600_Reg128:%vreg12 R600_Reg32:%vreg4,%vreg5 Joined. Result = %vreg12[112r,144r:0)[144r,224r:1)[224r,240r:2) 0 at 112r 1 at 144r 2 at 224r 224B%vreg12:sel_z<def> = COPY %vreg13<undef>; R600_Reg128:%vreg12 R600_Reg32:%vreg13 Considering merging to R600_Reg128 with %vreg13 in %vreg12:sel_z RHS = %vreg13 EMPTY LHS = %vreg12 [112r,144r:0)[144r,224r:1)[224r,240r:2) 0 at 112r 1 at 144r 2 at 224r llc: /home/vlj/llvm/include/llvm/ADT/SmallVector.h:143: const T& llvm::SmallVectorTemplateCommon<T, <template-parameter-1-2> >::operator[](unsigned int) const [with T = int; <template-parameter-1-2> = void; llvm::SmallVectorTemplateCommon<T, <template-parameter-1-2> >::const_reference = const int&]: Assertion `begin() + idx < end()' failed. 0 llc 0x00000000014c4020 1 llc 0x00000000014c3cf6 2 libpthread.so.0 0x0000003dd820efe0 3 libc.so.6 0x0000003dd7e35925 gsignal + 53 4 libc.so.6 0x0000003dd7e370d8 abort + 328 5 libc.so.6 0x0000003dd7e2e6a2 6 libc.so.6 0x0000003dd7e2e752 7 llc 0x000000000094760b 8 llc 0x00000000010c8923 9 llc 0x00000000010caef9 10 llc 0x00000000010cb0e6 11 llc 0x00000000010c7f54 12 llc 0x00000000010cb2d0 13 llc 0x00000000010cb497 14 llc 0x00000000010cb618 15 llc 0x00000000010cba3d 16 llc 0x000000000102e90d llvm::MachineFunctionPass::runOnFunction(llvm::Function&) + 95 17 llc 0x00000000013ea521 llvm::FPPassManager::runOnFunction(llvm::Function&) + 383 18 llc 0x00000000013ea734 llvm::FPPassManager::runOnModule(llvm::Module&) + 102 19 llc 0x00000000013eaa5c llvm::MPPassManager::runOnModule(llvm::Module&) + 442 20 llc 0x00000000013eaf74 llvm::PassManagerImpl::run(llvm::Module&) + 120 21 llc 0x00000000013eb127 llvm::PassManager::run(llvm::Module&) + 39 22 llc 0x0000000000816ff3 main + 4244 23 libc.so.6 0x0000003dd7e21735 __libc_start_main + 245 24 llc 0x0000000000815989 Stack dump: 0.Program arguments: llc -march=r600 -mcpu=cayman -print-before-all -debug-only=regalloc /home/vlj/shader 1.Running pass 'Function Pass Manager' on module '/home/vlj/shader'. 2.Running pass 'Simple Register Coalescing' on function '@main'
Jakob Stoklund Olesen
2012-Oct-20 22:05 UTC
[LLVMdev] RegisterCoalescing pass crashes with ImplicitDef registers
On Oct 20, 2012, at 1:23 PM, Vincent Lejeune <vljn at ovi.com> wrote:> below is an output of "llc -march=r600 -mcpu=cayman -print-before-all -debug-only=regalloc file.shader" command from llvm3.2svn. > The register coalescing pass crashes when joining vreg12:sel_z with vreg13 registers, because it tries to access the interval liveness of vreg13... which is undefined. > > I don't know if it's a bug of the pass, or if my backend should do something specific before calling the pass. > It worked with llvm 3.1, I don't know if there was a requirement introduced between 3.1 and current trunk related to register coalescing.It's probably a simple bug in the new coalescing algorithm. Could you get me a better stack trace from a debug build? /jakob -------------- next part -------------- An HTML attachment was scrubbed... URL: <http://lists.llvm.org/pipermail/llvm-dev/attachments/20121020/406625d6/attachment.html>
Vincent Lejeune
2012-Oct-20 22:37 UTC
[LLVMdev] RegisterCoalescing pass crashes with ImplicitDef registers
Here it is : Starting program: /home/vlj/llvmbin/bin/llc -march=r600 -mcpu=cayman /home/vlj/shader [Thread debugging using libthread_db enabled] Using host libthread_db library "/lib64/libthread_db.so.1". llc: /home/vlj/llvm/include/llvm/ADT/SmallVector.h:143: const T& llvm::SmallVectorTemplateCommon<T, <template-parameter-1-2> >::operator[](unsigned int) const [with T = int; <template-parameter-1-2> = void; llvm::SmallVectorTemplateCommon<T, <template-parameter-1-2> >::const_reference = const int&]: Assertion `begin() + idx < end()' failed. Program received signal SIGABRT, Aborted. 0x0000003dd7e35925 in raise () from /lib64/libc.so.6 Missing separate debuginfos, use: debuginfo-install glibc-2.15-57.fc17.x86_64 libgcc-4.7.2-2.fc17.x86_64 libstdc++-4.7.2-2.fc17.x86_64 (gdb) bt #0 0x0000003dd7e35925 in raise () from /lib64/libc.so.6 #1 0x0000003dd7e370d8 in abort () from /lib64/libc.so.6 #2 0x0000003dd7e2e6a2 in __assert_fail_base () from /lib64/libc.so.6 #3 0x0000003dd7e2e752 in __assert_fail () from /lib64/libc.so.6 #4 0x000000000094760b in llvm::SmallVectorTemplateCommon<int, void>::operator[] (this=0x7fffffffd268, idx=0) at /home/vlj/llvm/include/llvm/ADT/SmallVector.h:143 #5 0x00000000010c8e83 in (anonymous namespace)::JoinVals::getAssignments (this=0x7fffffffd230) at RegisterCoalescer.cpp:1305 #6 0x00000000010cb459 in (anonymous namespace)::RegisterCoalescer::joinVirtRegs (this=0x21b62b0, CP=...) at RegisterCoalescer.cpp:1861 #7 0x00000000010cb646 in (anonymous namespace)::RegisterCoalescer::joinIntervals (this=0x21b62b0, CP=...) at RegisterCoalescer.cpp:1883 #8 0x00000000010c84b4 in (anonymous namespace)::RegisterCoalescer::joinCopy (this=0x21b62b0, CopyMI=0x21e8cf8, Again=@0x7fffffffd7a2: false) at RegisterCoalescer.cpp:1002 #9 0x00000000010cb830 in (anonymous namespace)::RegisterCoalescer::copyCoalesceWorkList (this=0x21b62b0, From=0) at RegisterCoalescer.cpp:1924 #10 0x00000000010cb9f7 in (anonymous namespace)::RegisterCoalescer::copyCoalesceInMBB (this=0x21b62b0, MBB=0x21e88e0) at RegisterCoalescer.cpp:1947 #11 0x00000000010cbb78 in (anonymous namespace)::RegisterCoalescer::joinAllIntervals (this=0x21b62b0) at RegisterCoalescer.cpp:1960 #12 0x00000000010cbf9d in (anonymous namespace)::RegisterCoalescer::runOnMachineFunction (this=0x21b62b0, fn=...) at RegisterCoalescer.cpp:2016 #13 0x000000000102ee6d in llvm::MachineFunctionPass::runOnFunction (this=0x21b62b0, F=...) at MachineFunctionPass.cpp:33 #14 0x00000000013eaa81 in llvm::FPPassManager::runOnFunction (this=0x21b5b80, F=...) at PassManager.cpp:1498 #15 0x00000000013eac94 in llvm::FPPassManager::runOnModule (this=0x21b5b80, M=...) at PassManager.cpp:1518 #16 0x00000000013eafbc in llvm::MPPassManager::runOnModule (this=0x21b0e70, M=...) at PassManager.cpp:1572 #17 0x00000000013eb4d4 in llvm::PassManagerImpl::run (this=0x21b0b60, M=...) at PassManager.cpp:1655 #18 0x00000000013eb687 in llvm::PassManager::run (this=0x7fffffffdd60, M=...) at PassManager.cpp:1684 #19 0x0000000000816ff3 in main (argc=4, argv=0x7fffffffe108) at llc.cpp:362 Thank for your fast answer Regards, Vincent>________________________________ > De : Jakob Stoklund Olesen <stoklund at 2pi.dk> >À : Vincent Lejeune <vljn at ovi.com> >Cc : "llvmdev at cs.uiuc.edu" <llvmdev at cs.uiuc.edu> >Envoyé le : Dimanche 21 octobre 2012 0h05 >Objet : Re: [LLVMdev] RegisterCoalescing pass crashes with ImplicitDef registers > > > > >On Oct 20, 2012, at 1:23 PM, Vincent Lejeune <vljn at ovi.com> wrote: > >below is an output of "llc -march=r600 -mcpu=cayman -print-before-all -debug-only=regalloc file.shader" command from llvm3.2svn. >>The register coalescing pass crashes when joining vreg12:sel_z with vreg13 registers, because it tries to access the interval liveness of vreg13... which is undefined. >> >>I don't know if it's a bug of the pass, or if my backend should do something specific before calling the pass. >>It worked with llvm 3.1, I don't know if there was a requirement introduced between 3.1 and current trunk related to register coalescing. >> > >It's probably a simple bug in the new coalescing algorithm. Could you get me a better stack trace from a debug build? > > >/jakob > > > >
Apparently Analagous Threads
- [LLVMdev] RegisterCoalescing Pass seems to ignore part of CFG.
- [LLVMdev] RegisterCoalescing Pass seems to ignore part of CFG.
- [LLVMdev] RegisterCoalescing Pass seems to ignore part of CFG.
- [LLVMdev] RegisterCoalescing Pass seems to ignore part of CFG.
- [LLVMdev] RegisterCoalescing Pass seems to ignore part of CFG.