c++ - Output of cuda program is not what was expected -
#include<cuda_runtime.h> #include<stdio.h> #include<cuda.h> #include<stdlib.h> __global__ void setval(char **c){ c[(blockidx.y * griddim.x) + blockidx.x] = "hello\0"; } int main(){ char **gpu = null; cudamalloc((void**)&gpu, 6 * sizeof(char *)); int i; /* cannot access second level directly for( =0 ; < 6 ;i++){ cudamalloc((void**)&gpu[i], 10 * sizeof(char)); }*/ dim3 grid(3,2); setval<<<grid, 1>>>(gpu); char *p = (char*)malloc(10 * sizeof(char)); char *x[6]; cudamemcpy(x, gpu, 6*sizeof(char*), cudamemcpydevicetohost); for( =0 ; i< 6; i++){ cudamemcpy(p, x[i], 10*sizeof(char), cudamemcpydevicetohost); //put synchronize here if problem printf("%s\n",p); } getchar(); return 0; } based on suggestions, revised code make concept correct. but, code still not working :(. appreciated
try -- tested on gtx 285 under cuda 3.2 -- it's bit more restrictive current version, works.
#include<stdio.h> #include<string.h> __global__ void setvalues(char** word) { volatile char* myword = word[blockidx.x]; myword[0] = 'h'; myword[1] = 'o'; myword[2] = 'l'; myword[3] = 'a'; myword[4] = '\0'; } int main() { const size_t buffersize = 32; const int nobjects = 10; char* h_x[nobjects]; char** d_x = 0; cudamalloc( (void**)(&d_x), nobjects * sizeof(char*) ); ( int i=0; < nobjects; i++ ) { h_x[i] = null; cudamalloc( (void**)(&h_x[i]), buffersize * sizeof(char) ); printf("h_x[%d] = %lx\n",i,(unsigned long)h_x[i]); } cudamemcpy( d_x, h_x, nobjects*sizeof(char*), cudamemcpyhosttodevice); printf("copied h_x[] d_x[]\n"); char msg[] = "hello world!"; cudamemcpy( h_x[0], msg, 13*sizeof(char), cudamemcpyhosttodevice ); /* force thread synchronization */ cudaerror err = cudathreadsynchronize(); /* check , display error */ if ( cudasuccess != err ) { fprintf( stderr, "cuda error in file '%s' in line %i : %s.\n", __file__, __line__, cudageterrorstring( err) ); } setvalues<<<nobjects,1>>>(d_x); /* force thread synchronization */ err = cudathreadsynchronize(); /* check , display error */ if ( cudasuccess != err ) { fprintf( stderr, "cuda error in file '%s' in line %i : %s.\n", __file__, __line__, cudageterrorstring( err) ); } printf("kernel completed successfully. woot.\n\n"); char p[buffersize]; printf("d_x = %lx\n", (unsigned long)d_x ); printf("h_x = %lx\n", (unsigned long)h_x ); cudamemcpy( h_x, d_x, nobjects*sizeof(char*), cudamemcpydevicetohost); printf("d_x = %lx\n", (unsigned long)d_x ); printf("h_x = %lx\n", (unsigned long)h_x ); ( int i=0; < nobjects; i++ ) { cudamemcpy( &p, h_x[i], buffersize*sizeof(char), cudamemcpydevicetohost); printf("%d p[] = %s\n",i,p); } /* force thread synchronization */ err = cudathreadsynchronize(); /* check , display error */ if ( cudasuccess != err ) { fprintf( stderr, "cuda error in file '%s' in line %i : %s.\n", __file__, __line__, cudageterrorstring( err) ); } getchar(); return 0; } as @jon notes, can't pass x (as had declared) gpu, because it's address lives on cpu. in code above, create array of char*'s , pass them char** allocated on gpu. hope helps!
Comments
Post a Comment