I have a single .cu file. I have a array which will be used by all threads in all the blocks per grid. So i wanted to declare it as deviceconstant.
I have done like this:-
deviceconstant int* conarray;
global void readTexels(int n, int* oarray)
{
int ind=blockIdx.x + blockDim.x + threadIdx.x;
if(ind<n)
oarray[ind]=conarray[ind];
}
int main()
{
int N = 10;
cudaMalloc((void**)&conarray,sizeof(int)N);
int data = (int)malloc(Nsizeof(int));
for (int i = 0; i < N; i++)
{
data[i] = i;
}
cudaMemcpyToSymbol(“conarray”,&data,N*sizeof(int));
//Kernel Call goes here
}
I am just trying to copy the value of constant array ( carray) to the argument passed to the kernel call.
But i am not getting the same value as there is data.
void checkCUDAError(const char msg)
{
cudaError_t err = cudaGetLastError();
if( cudaSuccess != err)
{
fprintf(stderr, “Cuda error: %s: %s.\n”, msg, cudaGetErrorString( err) );
exit(EXIT_FAILURE);
}
} constant int conarray; global void readTexels(int n, int* d_out)
{
int ind=blockIdx.x + blockDim.x + threadIdx.x;
int tid=threadIdx.x;
if(ind<n)
d_out[ind]=conarray[ind];
} #define NUM_THREADS 512
int main()
{
int N = 10;
cudaMalloc((void**)&conarray,sizeof(int)*N);
checkCUDAError(“initilize constant array”);
int nBlocks = N/NUM_THREADS + ((N % NUM_THREADS)?1:0);
int* d_out;
cudaMalloc((void**)&d_out,sizeof(int)N);
int h_out = (int*)malloc(sizeof(int)N);
int data = (int)malloc(Nsizeof(int));
for (int i = 0; i < N; i++)
data[i] = i;
cudaMemcpy(conarray,data,Nsizeof(int),cudaMemcpyHostToDevic
e);
checkCUDAError(“copy data to constant array”);
readTexels<<<nBlocks,NUM_THREADS>>>(N,d_out);
checkCUDAError(“Call Kernel Function”);
cudaMemcpy(h_out, d_out,Nsizeof(int), cudaMemcpyDeviceToHost);
checkCUDAError(“copy from device to host”);
for (int i = 0; i < N; i++)
printf("%d %d\n ",data[i],h_out[i]);
}