diff --git a/Samples/0_Introduction/matrixMulDynlinkJIT/cuda_drvapi_dynlink.c b/Samples/0_Introduction/matrixMulDynlinkJIT/cuda_drvapi_dynlink.c index 8adc4d87a..f6d375f2b 100644 --- a/Samples/0_Introduction/matrixMulDynlinkJIT/cuda_drvapi_dynlink.c +++ b/Samples/0_Introduction/matrixMulDynlinkJIT/cuda_drvapi_dynlink.c @@ -242,12 +242,23 @@ static CUresult LOAD_LIBRARY(CUDADRIVER *pInstance) if (*pInstance == NULL) { printf("LoadLibrary \"%s\" failed!\n", __CudaLibName); - return CUDA_ERROR_UNKNOWN; + exit(EXIT_FAILURE); } return CUDA_SUCCESS; } +CUresult GET_DRIVER_HANDLE(CUDADRIVER *pInstance) +{ + *pInstance = GetModuleHandle(__CudaLibName); + if (*pInstance) { + return CUDA_SUCCESS; + } + else { + return CUDA_ERROR_UNKNOWN; + } +} + #define GET_PROC_EX(name, alias, required) \ alias = (t##name *)GetProcAddress(CudaDrvLib, #name); \ if (alias == NULL && required) { \ @@ -269,6 +280,13 @@ static CUresult LOAD_LIBRARY(CUDADRIVER *pInstance) return CUDA_ERROR_UNKNOWN; \ } +#define GET_PROC_ERROR_FUNCTIONS(name, alias, required) \ + alias = (t##name *)GetProcAddress(CudaDrvLib, #name); \ + if (alias == NULL && required) { \ + printf("Failed to find error function \"%s\" in %s\n", #name, __CudaLibName); \ + exit(EXIT_FAILURE); \ + } + #elif defined(__unix__) || defined(__QNX__) || defined(__APPLE__) || defined(__MACOSX) #include @@ -293,12 +311,23 @@ static CUresult LOAD_LIBRARY(CUDADRIVER *pInstance) if (*pInstance == NULL) { printf("dlopen \"%s\" failed!\n", __CudaLibName); - return CUDA_ERROR_UNKNOWN; + exit(EXIT_FAILURE); } return CUDA_SUCCESS; } +CUresult GET_DRIVER_HANDLE(CUDADRIVER *pInstance) +{ + *pInstance = dlopen(__CudaLibName, RTLD_LAZY); + if (*pInstance) { + return CUDA_SUCCESS; + } + else { + return CUDA_ERROR_UNKNOWN; + } +} + #define GET_PROC_EX(name, alias, required) \ alias = (t##name *)dlsym(CudaDrvLib, #name); \ if (alias == NULL && required) { \ @@ -320,6 +349,13 @@ static CUresult LOAD_LIBRARY(CUDADRIVER *pInstance) return CUDA_ERROR_UNKNOWN; \ } +#define GET_PROC_ERROR_FUNCTIONS(name, alias, required) \ + alias = (t##name *)dlsym(CudaDrvLib, #name); \ + if (alias == NULL && required) { \ + printf("Failed to find error function \"%s\" in %s\n", #name, __CudaLibName); \ + exit(EXIT_FAILURE); \ + } + #else #error unsupported platform #endif @@ -338,11 +374,19 @@ static CUresult LOAD_LIBRARY(CUDADRIVER *pInstance) #define GET_PROC_V2(name) GET_PROC_EX_V2(name, name, 1) #define GET_PROC_V3(name) GET_PROC_EX_V3(name, name, 1) +CUresult INIT_ERROR_FUNCTIONS(void) +{ + CUDADRIVER CudaDrvLib; + CUresult result = CUDA_SUCCESS; + result = GET_DRIVER_HANDLE(&CudaDrvLib); + GET_PROC_ERROR_FUNCTIONS(cuGetErrorString, cuGetErrorString, 1); + return result; +} + CUresult CUDAAPI cuInit(unsigned int Flags, int cudaVersion) { CUDADRIVER CudaDrvLib; int driverVer = 1000; - CHECKED_CALL(LOAD_LIBRARY(&CudaDrvLib)); // cuInit is required; alias it to _cuInit @@ -619,6 +663,5 @@ CUresult CUDAAPI cuInit(unsigned int Flags, int cudaVersion) GET_PROC(cuGraphicsD3D9RegisterResource); #endif } - return CUDA_SUCCESS; } diff --git a/Samples/0_Introduction/matrixMulDynlinkJIT/helper_cuda_drvapi.h b/Samples/0_Introduction/matrixMulDynlinkJIT/helper_cuda_drvapi.h index 7c61ff2a4..befd53bb2 100644 --- a/Samples/0_Introduction/matrixMulDynlinkJIT/helper_cuda_drvapi.h +++ b/Samples/0_Introduction/matrixMulDynlinkJIT/helper_cuda_drvapi.h @@ -42,11 +42,21 @@ inline int ftoi(float value) { return (value >= 0 ? static_cast(value + 0.5 #ifndef checkCudaErrors #define checkCudaErrors(err) __checkCudaErrors(err, __FILE__, __LINE__) +extern "C" CUresult INIT_ERROR_FUNCTIONS(void); + // These are the inline versions for all of the SDK helper functions inline void __checkCudaErrors(CUresult err, const char *file, const int line) { if (CUDA_SUCCESS != err) { const char *errorStr = NULL; + + if (!cuGetErrorString) { + CUresult result = INIT_ERROR_FUNCTIONS(); + if (result != CUDA_SUCCESS) { + printf("CUDA driver API failed"); + exit(EXIT_FAILURE); + } + } cuGetErrorString(err, &errorStr); fprintf(stderr, "checkCudaErrors() Driver API error = %04d \"%s\" from file <%s>, " diff --git a/Samples/3_CUDA_Features/cdpQuadtree/cdpQuadtree.cu b/Samples/3_CUDA_Features/cdpQuadtree/cdpQuadtree.cu index 60112fdae..c96f2dd94 100644 --- a/Samples/3_CUDA_Features/cdpQuadtree/cdpQuadtree.cu +++ b/Samples/3_CUDA_Features/cdpQuadtree/cdpQuadtree.cu @@ -502,11 +502,12 @@ __global__ void build_quadtree_kernel(Quadtree_node *nodes, Points *points, Para if (!(params.depth >= params.max_depth || num_points <= params.min_points_per_node)) { // The last thread launches new blocks. if (threadIdx.x == NUM_THREADS_PER_BLOCK - 1) { - // The children. - Quadtree_node *children = &nodes[params.num_nodes_at_this_level - (node.id() & ~3)]; + // The children. Move to the next-level slice relative to this 4-node group, + // and select the 4-children group of this node (local index within its parent). + Quadtree_node *children = &nodes[params.num_nodes_at_this_level]; - // The offsets of the children at their level. - int child_offset = 4 * node.id(); + // The offset of this node's 4 children within the next-level slice (local to group). + int child_offset = 4 * (node.id() & 3); // Set IDs. children[child_offset + 0].set_id(4 * node.id() + 0);