Displaying 1 result from an estimated 1 matches for "thread_per_block".
2012 Jul 21
3
Use GPU in R with .Call
...resout_ptr=REAL(resout);
vecAdd_kernel(a_ptr,b_ptr,resout_ptr,len);
UNPROTECT(1);
return resout;
}
(b) Next, the host function and the kernel are in a *SEPARATE* file
called "VecAdd_kernel.cu".
=======================file VecAdd_kernel.cu========================
#define THREAD_PER_BLOCK 100
__global__ void VecAdd(double *a,double *b, double *c,int len) {
int idx = threadIdx.x + blockIdx.x * blockDim.x;
if (idx<len){
c[idx] = a[idx] + b[idx];
}
}
void vecAdd_kernel(double *ain,double *bin,double *cout,int len){
int alloc_size;
alloc_size=len*sizeof(double);...