c++ - Output of cuda program is not what was expected -


#include<cuda_runtime.h> #include<stdio.h> #include<cuda.h> #include<stdlib.h>   __global__ void setval(char **c){  c[(blockidx.y * griddim.x) + blockidx.x] = "hello\0";  }   int main(){  char **gpu = null; cudamalloc((void**)&gpu, 6 * sizeof(char *)); int i; /* cannot access second level directly for( =0 ; < 6 ;i++){     cudamalloc((void**)&gpu[i], 10 * sizeof(char)); }*/   dim3 grid(3,2); setval<<<grid, 1>>>(gpu); char *p = (char*)malloc(10 * sizeof(char)); char *x[6];  cudamemcpy(x, gpu, 6*sizeof(char*), cudamemcpydevicetohost); for( =0 ; i< 6; i++){     cudamemcpy(p, x[i], 10*sizeof(char), cudamemcpydevicetohost);     //put synchronize here if problem     printf("%s\n",p);  }   getchar(); return 0; } 

based on suggestions, revised code make concept correct. but, code still not working :(. appreciated

try -- tested on gtx 285 under cuda 3.2 -- it's bit more restrictive current version, works.

#include<stdio.h> #include<string.h>  __global__ void setvalues(char** word) {     volatile char* myword = word[blockidx.x];      myword[0] = 'h';     myword[1] = 'o';     myword[2] = 'l';     myword[3] = 'a';     myword[4] = '\0'; }  int main() {     const size_t buffersize = 32;     const int nobjects = 10;      char*  h_x[nobjects];     char** d_x = 0;      cudamalloc( (void**)(&d_x), nobjects * sizeof(char*) );      ( int i=0; < nobjects; i++ )     {         h_x[i] = null;         cudamalloc( (void**)(&h_x[i]), buffersize * sizeof(char) );         printf("h_x[%d] = %lx\n",i,(unsigned long)h_x[i]);     }      cudamemcpy( d_x, h_x, nobjects*sizeof(char*), cudamemcpyhosttodevice);     printf("copied h_x[] d_x[]\n");      char msg[] = "hello world!";     cudamemcpy( h_x[0], msg, 13*sizeof(char), cudamemcpyhosttodevice );      /*  force thread synchronization  */     cudaerror err = cudathreadsynchronize();      /*  check , display error  */     if ( cudasuccess != err )     {         fprintf( stderr, "cuda error in file '%s' in line %i : %s.\n",                 __file__, __line__, cudageterrorstring( err) );     }      setvalues<<<nobjects,1>>>(d_x);      /*  force thread synchronization  */     err = cudathreadsynchronize();      /*  check , display error  */     if ( cudasuccess != err )     {         fprintf( stderr, "cuda error in file '%s' in line %i : %s.\n",                 __file__, __line__, cudageterrorstring( err) );     }      printf("kernel completed successfully.  woot.\n\n");      char p[buffersize];      printf("d_x = %lx\n", (unsigned long)d_x );     printf("h_x = %lx\n", (unsigned long)h_x );      cudamemcpy( h_x, d_x, nobjects*sizeof(char*), cudamemcpydevicetohost);      printf("d_x = %lx\n", (unsigned long)d_x );     printf("h_x = %lx\n", (unsigned long)h_x );      ( int i=0; < nobjects; i++ )     {         cudamemcpy( &p, h_x[i], buffersize*sizeof(char), cudamemcpydevicetohost);         printf("%d p[] = %s\n",i,p);     }      /*  force thread synchronization  */     err = cudathreadsynchronize();      /*  check , display error  */     if ( cudasuccess != err )     {         fprintf( stderr, "cuda error in file '%s' in line %i : %s.\n",                 __file__, __line__, cudageterrorstring( err) );     }      getchar();      return 0; } 

as @jon notes, can't pass x (as had declared) gpu, because it's address lives on cpu. in code above, create array of char*'s , pass them char** allocated on gpu. hope helps!


Comments

Popular posts from this blog

c++ - Is it possible to compile a VST on linux? -

java - Output of Eclipse is rubbish -

jquery - Confused with JSON data and normal data in Django ajax request -