I have written test.ll as below and ran 'opt' on it as
" opt -std-compile-opts test.ll -S -o -" . But the output shows that
there
is code motion around the barrier intrinsics.
test.ll
-------
; ModuleID = 'test.bc'
define void @test(i16* %I_0, i16* %I_1, i16* %I_2, i16* %I_3, i16* %O_0) {
entry:
%T_0 = load volatile i16* %I_0
%T_1 = load volatile i16* %I_1
%T_2 = load volatile i16* %I_2
%T_3 = load volatile i16* %I_3
call void @llvm.nvvm.barrier0()
%T_5 = add i16 %T_1, %T_3
call void @llvm.nvvm.barrier0()
%T_7 = mul i16 %T_0, %T_2
%T_8 = xor i16 %T_2, %T_0
%T_9 = mul i16 %T_0, %T_1
call void @llvm.nvvm.barrier0()
%T_11 = sub i16 %T_7, %T_5
%T_12 = add i16 %T_8, %T_9
%T_13 = add i16 %T_11, %T_12
store volatile i16 %T_13, i16* %O_0
ret void
}
;declare i32 @llvm.nvvm.read.ptx.sreg.tid.x()
declare void @llvm.nvvm.barrier0()
output
---------
define void @test(i16* nocapture %I_0, i16* nocapture %I_1, i16* nocapture
%I_2, i16* nocapture %I_3, i16* nocapture %O_0) nounwind {
entry:
%T_0 = load volatile i16* %I_0
%T_1 = load volatile i16* %I_1
%T_2 = load volatile i16* %I_2
%T_3 = load volatile i16* %I_3
tail call void @llvm.nvvm.barrier0()
tail call void @llvm.nvvm.barrier0()
%T_8 = xor i16 %T_2, %T_0
tail call void @llvm.nvvm.barrier0()
%sum = add i16 %T_3, %T_1
%tmp = add i16 %T_2, %T_1
%tmp1 = mul i16 %tmp, %T_0
%T_11 = sub i16 %T_8, %sum
%T_13 = add i16 %T_11, %tmp1
store volatile i16 %T_13, i16* %O_0
ret void
}
Thanks
On Tue, Sep 30, 2014 at 9:50 PM, Eli Bendersky <eliben at google.com>
wrote:
>
>
> On Tue, Sep 30, 2014 at 9:04 AM, RAVI KORSA <ravi.korsa at gmail.com>
wrote:
>
>> is there any guarantee that the nvptx intrinsic
"llvm.nvvm.barrier0" will
>> not be moved around by opt ?
>> In other words, can I expect all the instructions above
>> "llvm.nvvm.barrier0" to remain above it and those below it to
remain below,
>> after all the opt passes are run ?
>>
>
> AFAIU, yes. Here's the definition:
>
> def int_nvvm_barrier0 : GCCBuiltin<"__nvvm_bar0">,
> Intrinsic<[], [], [IntrNoDuplicate]>;
>
> Note that IntrNoDuplicate is the only intrinsic attribute. It has no other
> attributes (like IntrNoMem) that would make it permissible for LLVM
> optimizations to reorder things around it. By default, the optimizers would
> not do this for function calls; only if these function calls are marked
> with special attributes that permit this.
>
> Eli
>
>
-------------- next part --------------
An HTML attachment was scrubbed...
URL:
<http://lists.llvm.org/pipermail/llvm-dev/attachments/20140930/1eba4aee/attachment.html>
llvm.nvvm.barrier0 corresponds to __syncthreads in CUDA. Moving around the arithmetic instructions in your example should be fine, because they do not access memory. Jingyue On Tue Sep 30 2014 at 10:45:00 AM RAVI KORSA <ravi.korsa at gmail.com> wrote:> I have written test.ll as below and ran 'opt' on it as > " opt -std-compile-opts test.ll -S -o -" . But the output shows that there > is code motion around the barrier intrinsics. > > test.ll > ------- > ; ModuleID = 'test.bc' > > define void @test(i16* %I_0, i16* %I_1, i16* %I_2, i16* %I_3, i16* %O_0) { > entry: > %T_0 = load volatile i16* %I_0 > %T_1 = load volatile i16* %I_1 > %T_2 = load volatile i16* %I_2 > %T_3 = load volatile i16* %I_3 > call void @llvm.nvvm.barrier0() > %T_5 = add i16 %T_1, %T_3 > call void @llvm.nvvm.barrier0() > %T_7 = mul i16 %T_0, %T_2 > %T_8 = xor i16 %T_2, %T_0 > %T_9 = mul i16 %T_0, %T_1 > call void @llvm.nvvm.barrier0() > %T_11 = sub i16 %T_7, %T_5 > %T_12 = add i16 %T_8, %T_9 > %T_13 = add i16 %T_11, %T_12 > store volatile i16 %T_13, i16* %O_0 > ret void > } > > ;declare i32 @llvm.nvvm.read.ptx.sreg.tid.x() > declare void @llvm.nvvm.barrier0() > > output > --------- > define void @test(i16* nocapture %I_0, i16* nocapture %I_1, i16* nocapture > %I_2, i16* nocapture %I_3, i16* nocapture %O_0) nounwind { > entry: > %T_0 = load volatile i16* %I_0 > %T_1 = load volatile i16* %I_1 > %T_2 = load volatile i16* %I_2 > %T_3 = load volatile i16* %I_3 > tail call void @llvm.nvvm.barrier0() > tail call void @llvm.nvvm.barrier0() > %T_8 = xor i16 %T_2, %T_0 > tail call void @llvm.nvvm.barrier0() > %sum = add i16 %T_3, %T_1 > %tmp = add i16 %T_2, %T_1 > %tmp1 = mul i16 %tmp, %T_0 > %T_11 = sub i16 %T_8, %sum > %T_13 = add i16 %T_11, %tmp1 > store volatile i16 %T_13, i16* %O_0 > ret void > } > > Thanks > > On Tue, Sep 30, 2014 at 9:50 PM, Eli Bendersky <eliben at google.com> wrote: > >> >> >> On Tue, Sep 30, 2014 at 9:04 AM, RAVI KORSA <ravi.korsa at gmail.com> wrote: >> >>> is there any guarantee that the nvptx intrinsic "llvm.nvvm.barrier0" >>> will not be moved around by opt ? >>> In other words, can I expect all the instructions above >>> "llvm.nvvm.barrier0" to remain above it and those below it to remain below, >>> after all the opt passes are run ? >>> >> >> AFAIU, yes. Here's the definition: >> >> def int_nvvm_barrier0 : GCCBuiltin<"__nvvm_bar0">, >> Intrinsic<[], [], [IntrNoDuplicate]>; >> >> Note that IntrNoDuplicate is the only intrinsic attribute. It has no >> other attributes (like IntrNoMem) that would make it permissible for LLVM >> optimizations to reorder things around it. By default, the optimizers would >> not do this for function calls; only if these function calls are marked >> with special attributes that permit this. >> >> Eli >> >> > _______________________________________________ > LLVM Developers mailing list > LLVMdev at cs.uiuc.edu http://llvm.cs.uiuc.edu > http://lists.cs.uiuc.edu/mailman/listinfo/llvmdev >-------------- next part -------------- An HTML attachment was scrubbed... URL: <http://lists.llvm.org/pipermail/llvm-dev/attachments/20140930/97ca66c0/attachment.html>
The actual purpose that I wanted such an intrinsic is to solve a problem similar to this one in X86. Say I wanted to read the "mxcsr" register(which is the status register for SSE instructions) after a particular instruction, then I need a kind of barrier intrinsic which will not allow the arithmetic instructions to move around it. Or else I will be reading the status of some other instruction. Thanks -------------- next part -------------- An HTML attachment was scrubbed... URL: <http://lists.llvm.org/pipermail/llvm-dev/attachments/20140930/d10f0a0c/attachment.html>