Dmitry N. Mikushin
2012-Jul-10 22:15 UTC
[LLVMdev] [NVPTX] CUDA inline PTX asm definitions scoping "{" "}" is broken
Hi, Looks like "{" and "}" are lost when trying to use the combination of Clang and NVPTX, which may result into clash of definitions of the function-scope and asm-scope. Here is an example:> cat test.cu__attribute__((device)) __attribute__((nv_linkonce_odr)) __inline__ int __any(int a) { int result; asm __volatile__ ("{ \n\t" ".reg .pred \t%%p1; \n\t" ".reg .pred \t%%p2; \n\t" "setp.ne.u32 \t%%p1, %1, 0; \n\t" "vote.any.pred \t%%p2, %%p1; \n\t" "selp.s32 \t%0, 1, 0, %%p2; \n\t" "}" : "=r"(result) : "r"(a)); return result; }> clang -cc1 -emit-llvm -fcuda-is-device -triple ptx64-unknown-unknowntest.cu -o test.ll> cat test.ll; ModuleID = 'test.cu' target datalayout = "e-p:64:64-i64:64:64-f64:64:64-n1:8:16:32:64" target triple = "ptx64-unknown-unknown" define ptx_device i32 @_Z5__anyi(i32 %a) nounwind inlinehint { entry: %a.addr = alloca i32, align 4 %result = alloca i32, align 4 store i32 %a, i32* %a.addr, align 4 %0 = load i32* %a.addr, align 4 %1 = call i32 asm sideeffect "$( \0A\09.reg .pred \09%p1; \0A\09.reg .pred \09%p2; \0A\09setp.ne.u32 \09%p1, $1, 0; \0A\09vote.any.pred \09%p2, %p1; \0A\09selp.s32 \09$0, 1, 0, %p2; \0A\09$)", "=r,r"(i32 %0) nounwind, !srcloc !0 store i32 %1, i32* %result, align 4 %2 = load i32* %result, align 4 ret i32 %2 } !0 = metadata !{i32 127, i32 132, i32 166, i32 200, i32 242, i32 285, i32 327}> llc -march=nvptx64 test.ll -o test.ptx > cat test.ptx// // Generated by LLVM NVPTX Back-End // .version 3.0 .target sm_10, texmode_independent .address_size 64 // .globl _Z5__anyi .visible .global .align 4 .b8 __local_depot0[8]; .func (.reg .b32 func_retval0) _Z5__anyi( .reg .b32 _Z5__anyi_param_0 ) // @_Z5__anyi { .reg .b64 %SP; .reg .b64 %SPL; .reg .pred %p<396>; .reg .s16 %rc<396>; .reg .s16 %rs<396>; .reg .s32 %r<396>; .reg .s64 %rl<396>; .reg .f32 %f<396>; .reg .f64 %fl<396>; // BB#0: // %entry mov.u64 %SP, __local_depot0; mov.b32 %r0, _Z5__anyi_param_0; st.global.u32 [%SP+0], %r0; // inline asm .reg .pred %p1; .reg .pred %p2; setp.ne.u32 %p1, %r0, 0; vote.any.pred %p2, %p1; selp.s32 %r0, 1, 0, %p2; // inline asm st.global.u32 [%SP+4], %r0; mov.b32 func_retval0, %r0; ret; }> ptxas test.ptx -o test.cubinptxas test.ptx, line 33; error : Duplicate definition of variable '%p1' ptxas test.ptx, line 34; error : Duplicate definition of variable '%p2' ptxas test.ptx, line 36; error : Instruction 'vote' requires .target sm_12 or higher ptxas fatal : Ptx assembly aborted due to errors - D. -------------- next part -------------- An HTML attachment was scrubbed... URL: <http://lists.llvm.org/pipermail/llvm-dev/attachments/20120711/56136610/attachment.html>
Chad Rosier
2012-Jul-10 22:20 UTC
[LLVMdev] [NVPTX] CUDA inline PTX asm definitions scoping "{" "}" is broken
Dmitry, You might be better served by filing this as a bug (http://llvm.org/bugs/). Please include a test case and the steps to reproduce (i.e., what you've provided below). Chad On Jul 10, 2012, at 3:15 PM, Dmitry N. Mikushin wrote:> Hi, > > Looks like "{" and "}" are lost when trying to use the combination of Clang and NVPTX, which may result into clash of definitions of the function-scope and asm-scope. Here is an example: > > > cat test.cu > __attribute__((device)) __attribute__((nv_linkonce_odr)) __inline__ int __any(int a) { > int result; > asm __volatile__ ("{ \n\t" > ".reg .pred \t%%p1; \n\t" > ".reg .pred \t%%p2; \n\t" > "setp.ne.u32 \t%%p1, %1, 0; \n\t" > "vote.any.pred \t%%p2, %%p1; \n\t" > "selp.s32 \t%0, 1, 0, %%p2; \n\t" > "}" : "=r"(result) : "r"(a)); > return result; > } > > > clang -cc1 -emit-llvm -fcuda-is-device -triple ptx64-unknown-unknown test.cu -o test.ll > > cat test.ll > ; ModuleID = 'test.cu' > target datalayout = "e-p:64:64-i64:64:64-f64:64:64-n1:8:16:32:64" > target triple = "ptx64-unknown-unknown" > > define ptx_device i32 @_Z5__anyi(i32 %a) nounwind inlinehint { > entry: > %a.addr = alloca i32, align 4 > %result = alloca i32, align 4 > store i32 %a, i32* %a.addr, align 4 > %0 = load i32* %a.addr, align 4 > %1 = call i32 asm sideeffect "$( \0A\09.reg .pred \09%p1; \0A\09.reg .pred \09%p2; \0A\09setp.ne.u32 \09%p1, $1, 0; \0A\09vote.any.pred \09%p2, %p1; \0A\09selp.s32 \09$0, 1, 0, %p2; \0A\09$)", "=r,r"(i32 %0) nounwind, !srcloc !0 > store i32 %1, i32* %result, align 4 > %2 = load i32* %result, align 4 > ret i32 %2 > } > > !0 = metadata !{i32 127, i32 132, i32 166, i32 200, i32 242, i32 285, i32 327} > > > llc -march=nvptx64 test.ll -o test.ptx > > cat test.ptx > // > // Generated by LLVM NVPTX Back-End > // > > .version 3.0 > .target sm_10, texmode_independent > .address_size 64 > > > // .globl _Z5__anyi > .visible .global .align 4 .b8 __local_depot0[8]; > > .func (.reg .b32 func_retval0) _Z5__anyi( > .reg .b32 _Z5__anyi_param_0 > ) // @_Z5__anyi > { > .reg .b64 %SP; > .reg .b64 %SPL; > .reg .pred %p<396>; > .reg .s16 %rc<396>; > .reg .s16 %rs<396>; > .reg .s32 %r<396>; > .reg .s64 %rl<396>; > .reg .f32 %f<396>; > .reg .f64 %fl<396>; > > // BB#0: // %entry > mov.u64 %SP, __local_depot0; > mov.b32 %r0, _Z5__anyi_param_0; > st.global.u32 [%SP+0], %r0; > // inline asm > > .reg .pred %p1; > .reg .pred %p2; > setp.ne.u32 %p1, %r0, 0; > vote.any.pred %p2, %p1; > selp.s32 %r0, 1, 0, %p2; > > // inline asm > st.global.u32 [%SP+4], %r0; > mov.b32 func_retval0, %r0; > ret; > } > > > ptxas test.ptx -o test.cubin > ptxas test.ptx, line 33; error : Duplicate definition of variable '%p1' > ptxas test.ptx, line 34; error : Duplicate definition of variable '%p2' > ptxas test.ptx, line 36; error : Instruction 'vote' requires .target sm_12 or higher > ptxas fatal : Ptx assembly aborted due to errors > > - D. > > _______________________________________________ > LLVM Developers mailing list > LLVMdev at cs.uiuc.edu http://llvm.cs.uiuc.edu > http://lists.cs.uiuc.edu/mailman/listinfo/llvmdev-------------- next part -------------- An HTML attachment was scrubbed... URL: <http://lists.llvm.org/pipermail/llvm-dev/attachments/20120710/b29a1f43/attachment.html>
Dmitry N. Mikushin
2012-Jul-10 22:26 UTC
[LLVMdev] [NVPTX] CUDA inline PTX asm definitions scoping "{" "}" is broken
Yes, sure, good idea, because might be also Clang-related. http://llvm.org/bugs/show_bug.cgi?id=13322 2012/7/11 Chad Rosier <mcrosier at apple.com>> Dmitry, > You might be better served by filing this as a bug (http://llvm.org/bugs/). > Please include a test case and the steps to reproduce (i.e., what you've > provided below). > > Chad > > On Jul 10, 2012, at 3:15 PM, Dmitry N. Mikushin wrote: > > Hi, > > Looks like "{" and "}" are lost when trying to use the combination of > Clang and NVPTX, which may result into clash of definitions of the > function-scope and asm-scope. Here is an example: > > > cat test.cu > __attribute__((device)) __attribute__((nv_linkonce_odr)) __inline__ int > __any(int a) { > int result; > asm __volatile__ ("{ \n\t" > ".reg .pred \t%%p1; \n\t" > ".reg .pred \t%%p2; \n\t" > "setp.ne.u32 \t%%p1, %1, 0; \n\t" > "vote.any.pred \t%%p2, %%p1; \n\t" > "selp.s32 \t%0, 1, 0, %%p2; \n\t" > "}" : "=r"(result) : "r"(a)); > return result; > } > > > clang -cc1 -emit-llvm -fcuda-is-device -triple ptx64-unknown-unknown > test.cu -o test.ll > > cat test.ll > ; ModuleID = 'test.cu' > target datalayout = "e-p:64:64-i64:64:64-f64:64:64-n1:8:16:32:64" > target triple = "ptx64-unknown-unknown" > > define ptx_device i32 @_Z5__anyi(i32 %a) nounwind inlinehint { > entry: > %a.addr = alloca i32, align 4 > %result = alloca i32, align 4 > store i32 %a, i32* %a.addr, align 4 > %0 = load i32* %a.addr, align 4 > %1 = call i32 asm sideeffect "$( \0A\09.reg .pred \09%p1; \0A\09.reg > .pred \09%p2; \0A\09setp.ne.u32 \09%p1, $1, 0; \0A\09vote.any.pred \09%p2, > %p1; \0A\09selp.s32 \09$0, 1, 0, %p2; \0A\09$)", "=r,r"(i32 %0) nounwind, > !srcloc !0 > store i32 %1, i32* %result, align 4 > %2 = load i32* %result, align 4 > ret i32 %2 > } > > !0 = metadata !{i32 127, i32 132, i32 166, i32 200, i32 242, i32 285, i32 > 327} > > > llc -march=nvptx64 test.ll -o test.ptx > > cat test.ptx > // > // Generated by LLVM NVPTX Back-End > // > > .version 3.0 > .target sm_10, texmode_independent > .address_size 64 > > > // .globl _Z5__anyi > .visible .global .align 4 .b8 __local_depot0[8]; > > .func (.reg .b32 func_retval0) _Z5__anyi( > .reg .b32 _Z5__anyi_param_0 > ) // @_Z5__anyi > { > .reg .b64 %SP; > .reg .b64 %SPL; > .reg .pred %p<396>; > .reg .s16 %rc<396>; > .reg .s16 %rs<396>; > .reg .s32 %r<396>; > .reg .s64 %rl<396>; > .reg .f32 %f<396>; > .reg .f64 %fl<396>; > > // BB#0: // %entry > mov.u64 %SP, __local_depot0; > mov.b32 %r0, _Z5__anyi_param_0; > st.global.u32 [%SP+0], %r0; > // inline asm > > .reg .pred %p1; > .reg .pred %p2; > setp.ne.u32 %p1, %r0, 0; > vote.any.pred %p2, %p1; > selp.s32 %r0, 1, 0, %p2; > > // inline asm > st.global.u32 [%SP+4], %r0; > mov.b32 func_retval0, %r0; > ret; > } > > > ptxas test.ptx -o test.cubin > ptxas test.ptx, line 33; error : Duplicate definition of variable '%p1' > ptxas test.ptx, line 34; error : Duplicate definition of variable '%p2' > ptxas test.ptx, line 36; error : Instruction 'vote' requires .target > sm_12 or higher > ptxas fatal : Ptx assembly aborted due to errors > > - D. > > _______________________________________________ > LLVM Developers mailing list > LLVMdev at cs.uiuc.edu http://llvm.cs.uiuc.edu > http://lists.cs.uiuc.edu/mailman/listinfo/llvmdev > > >-------------- next part -------------- An HTML attachment was scrubbed... URL: <http://lists.llvm.org/pipermail/llvm-dev/attachments/20120711/0ff0bbcc/attachment.html>
Possibly Parallel Threads
- [LLVMdev] [NVPTX] CUDA inline PTX asm definitions scoping "{" "}" is broken
- [LLVMdev] [NVPTX] CUDA inline PTX asm definitions scoping "{" "}" is broken
- [GPUCC] how to remove _ZL21__nvvm_reflect_anchorv() automatically?
- Debug info for Cuda
- Debug info for Cuda