Displaying 2 results from an estimated 2 matches for "__internal_dsmul".
2012 Jul 11
2
[LLVMdev] [NVPTX] llc -march=nvptx64 -mcpu=sm_20 generates invalid zero align for device function params
...oblem does not occur if compiled
for sm_10.
> cat test.ll
; ModuleID = '__kernelgen_main_module'
target datalayout = "e-p:64:64-i64:64:64-f64:64:64-n1:8:16:32:64"
target triple = "ptx64-unknown-unknown"
%struct.float2 = type { float, float }
define ptx_device void @__internal_dsmul(%struct.float2* noalias nocapture
sret %agg.result, %struct.float2* nocapture byval %x, %struct.float2*
nocapture byval %y) nounwind inlinehint alwaysinline {
entry:
%y1 = getelementptr inbounds %struct.float2* %x, i64 0, i32 1
%0 = load float* %y1, align 4
%sub = fsub float -0.000000e+00, %0...
2012 Nov 09
0
[LLVMdev] [NVPTX] llc -march=nvptx64 -mcpu=sm_20 generates invalid zero align for device function params
...;< GVar->getAlignment();
Could you please review and commit? Do you think it needs a test case?
Thanks,
- D.
dmikushin at hp2:~/forge/align0> llc -march=nvptx64 -mcpu=sm_20 align0.ll -o -
//
// Generated by LLVM NVPTX Back-End
//
.version 3.1
.target sm_20
.address_size 64
// .globl __internal_dsmul
.visible .func __internal_dsmul(
.param .b64 __internal_dsmul_param_0,
.param .align 4 .b8 __internal_dsmul_param_1[8],
.param .align 4 .b8 __internal_dsmul_param_2[8]
) // @__internal_dsmul
{
.reg .pred %p<396>;
.reg .s16 %rc<396>;
.reg .s16...