diff --git a/projects/hip/docs/markdown/hip_faq.md b/projects/hip/docs/markdown/hip_faq.md index 830b6423db..0bbb797cc1 100644 --- a/projects/hip/docs/markdown/hip_faq.md +++ b/projects/hip/docs/markdown/hip_faq.md @@ -237,3 +237,6 @@ To disable it and use the legancy grid launch method, please either change the d $HIP/include/hip/hcc_detail/hip_runtime_api.h $HIP/include/hip/hcc_detail/host_defines.h Or pass "-DGENERIC_GRID_LAUNCH=0" to hipcc at application compilation time. + +### What is maximum limit of Generic Grid Launch parameters (grid and block)? +Product of (grid.x and block.x), (grid.y and block.y) or (grid.z and block.z) should always be less than UINT_MAX. \ No newline at end of file diff --git a/projects/hip/src/hip_module.cpp b/projects/hip/src/hip_module.cpp index a8255ea725..415b93e457 100644 --- a/projects/hip/src/hip_module.cpp +++ b/projects/hip/src/hip_module.cpp @@ -134,10 +134,10 @@ extern hipError_t ihipGetDeviceProperties(hipDeviceProp_t* props, int device); return ihipLogStatus(hipStatus); \ } -hipError_t ihipModuleLaunchKernel(TlsData *tls, hipFunction_t f, uint32_t globalWorkSizeX, - uint32_t globalWorkSizeY, uint32_t globalWorkSizeZ, - uint32_t localWorkSizeX, uint32_t localWorkSizeY, - uint32_t localWorkSizeZ, size_t sharedMemBytes, +hipError_t ihipModuleLaunchKernel(TlsData *tls, hipFunction_t f, uint32_t gridSizeX, + uint32_t gridSizeY, uint32_t gridSizeZ, + uint32_t blockSizeX, uint32_t blockSizeY, + uint32_t blockSizeZ, size_t sharedMemBytes, hipStream_t hStream, void** kernelParams, void** extra, hipEvent_t startEvent, hipEvent_t stopEvent, uint32_t flags, bool isStreamLocked = 0, void** impCoopParams = 0) { @@ -146,6 +146,14 @@ hipError_t ihipModuleLaunchKernel(TlsData *tls, hipFunction_t f, uint32_t global auto ctx = ihipGetTlsDefaultCtx(); hipError_t ret = hipSuccess; + size_t globalWorkSizeX = (size_t)gridSizeX * (size_t)blockSizeX; + size_t globalWorkSizeY = (size_t)gridSizeY * (size_t)blockSizeY; + size_t globalWorkSizeZ = (size_t)gridSizeZ * (size_t)blockSizeZ; + if(globalWorkSizeX > UINT32_MAX || globalWorkSizeY > UINT32_MAX || globalWorkSizeZ > UINT32_MAX) + { + return hipErrorInvalidConfiguration; + } + if (ctx == nullptr) { ret = hipErrorInvalidDevice; @@ -202,8 +210,8 @@ hipError_t ihipModuleLaunchKernel(TlsData *tls, hipFunction_t f, uint32_t global lp.dynamic_group_mem_bytes = sharedMemBytes; // TODO - this should be part of preLaunchKernel. hStream = ihipPreLaunchKernel( - hStream, dim3(globalWorkSizeX/localWorkSizeX, globalWorkSizeY/localWorkSizeY, globalWorkSizeZ/localWorkSizeZ), - dim3(localWorkSizeX, localWorkSizeY, localWorkSizeZ), &lp, f->_name.c_str(), isStreamLocked); + hStream, dim3(globalWorkSizeX/blockSizeX, globalWorkSizeY/blockSizeY, globalWorkSizeZ/blockSizeZ), + dim3(blockSizeX, blockSizeY, blockSizeZ), &lp, f->_name.c_str(), isStreamLocked); hsa_kernel_dispatch_packet_t aql; @@ -212,9 +220,9 @@ hipError_t ihipModuleLaunchKernel(TlsData *tls, hipFunction_t f, uint32_t global // aql.completion_signal._handle = 0; // aql.kernarg_address = 0; - aql.workgroup_size_x = localWorkSizeX; - aql.workgroup_size_y = localWorkSizeY; - aql.workgroup_size_z = localWorkSizeZ; + aql.workgroup_size_x = blockSizeX; + aql.workgroup_size_y = blockSizeY; + aql.workgroup_size_z = blockSizeZ; aql.grid_size_x = globalWorkSizeX; aql.grid_size_y = globalWorkSizeY; aql.grid_size_z = globalWorkSizeZ; @@ -275,7 +283,7 @@ hipError_t hipModuleLaunchKernel(hipFunction_t f, uint32_t gridDimX, uint32_t gr HIP_INIT_API(hipModuleLaunchKernel, f, gridDimX, gridDimY, gridDimZ, blockDimX, blockDimY, blockDimZ, sharedMemBytes, hStream, kernelParams, extra); return ihipLogStatus(ihipModuleLaunchKernel(tls, - f, blockDimX * gridDimX, blockDimY * gridDimY, gridDimZ * blockDimZ, blockDimX, blockDimY, + f, gridDimX, gridDimY, gridDimZ, blockDimX, blockDimY, blockDimZ, sharedMemBytes, hStream, kernelParams, extra, nullptr, nullptr, 0)); } @@ -287,8 +295,12 @@ hipError_t hipExtModuleLaunchKernel(hipFunction_t f, uint32_t globalWorkSizeX, hipEvent_t startEvent, hipEvent_t stopEvent, uint32_t flags) { HIP_INIT_API(hipExtModuleLaunchKernel, f, globalWorkSizeX, globalWorkSizeY, globalWorkSizeZ, localWorkSizeX, localWorkSizeY, localWorkSizeZ, sharedMemBytes, hStream, kernelParams, extra); + + if(localWorkSizeX == 0 || localWorkSizeY == 0 || localWorkSizeZ == 0) + return hipErrorInvalidValue; + return ihipLogStatus(ihipModuleLaunchKernel(tls, - f, globalWorkSizeX, globalWorkSizeY, globalWorkSizeZ, localWorkSizeX, localWorkSizeY, + f, globalWorkSizeX/localWorkSizeX, globalWorkSizeY/localWorkSizeY, globalWorkSizeZ/localWorkSizeZ, localWorkSizeX, localWorkSizeY, localWorkSizeZ, sharedMemBytes, hStream, kernelParams, extra, startEvent, stopEvent, flags)); } @@ -300,8 +312,12 @@ hipError_t hipHccModuleLaunchKernel(hipFunction_t f, uint32_t globalWorkSizeX, hipEvent_t startEvent, hipEvent_t stopEvent) { HIP_INIT_API(hipHccModuleLaunchKernel, f, globalWorkSizeX, globalWorkSizeY, globalWorkSizeZ, localWorkSizeX, localWorkSizeY, localWorkSizeZ, sharedMemBytes, hStream, kernelParams, extra); + + if(localWorkSizeX == 0 || localWorkSizeY == 0 || localWorkSizeZ == 0) + return hipErrorInvalidValue; + return ihipLogStatus(ihipModuleLaunchKernel(tls, - f, globalWorkSizeX, globalWorkSizeY, globalWorkSizeZ, localWorkSizeX, localWorkSizeY, + f, globalWorkSizeX/localWorkSizeX, globalWorkSizeY/localWorkSizeY, globalWorkSizeZ/localWorkSizeZ, localWorkSizeX, localWorkSizeY, localWorkSizeZ, sharedMemBytes, hStream, kernelParams, extra, startEvent, stopEvent, 0)); } @@ -352,9 +368,9 @@ hipError_t ihipExtLaunchMultiKernelMultiDevice(hipLaunchParams* launchParamsList const hipLaunchParams& lp = launchParamsList[i]; result = ihipModuleLaunchKernel(tls, kds[i], - lp.gridDim.x * lp.blockDim.x, - lp.gridDim.y * lp.blockDim.y, - lp.gridDim.z * lp.blockDim.z, + lp.gridDim.x, + lp.gridDim.y, + lp.gridDim.z, lp.blockDim.x, lp.blockDim.y, lp.blockDim.z, lp.sharedMem, lp.stream, lp.args, nullptr, nullptr, nullptr, 0, @@ -458,9 +474,9 @@ hipError_t ihipLaunchCooperativeKernel(const void* f, dim3 gridDim, // launch the main kernel result = ihipModuleLaunchKernel(tls, kd, - gridDim.x * blockDimX.x, - gridDim.y * blockDimX.y, - gridDim.z * blockDimX.z, + gridDim.x, + gridDim.y, + gridDim.z, blockDimX.x, blockDimX.y, blockDimX.z, sharedMemBytes, stream, kernelParams, nullptr, nullptr, nullptr, 0, true, impCoopParams); @@ -612,9 +628,9 @@ hipError_t ihipLaunchCooperativeKernelMultiDevice(hipLaunchParams* launchParamsL impCoopParams[0] = &mg_info_ptr[i]; result = ihipModuleLaunchKernel(tls, kds[i], - lp.gridDim.x * lp.blockDim.x, - lp.gridDim.y * lp.blockDim.y, - lp.gridDim.z * lp.blockDim.z, + lp.gridDim.x, + lp.gridDim.y, + lp.gridDim.z, lp.blockDim.x, lp.blockDim.y, lp.blockDim.z, lp.sharedMem, lp.stream, lp.args, nullptr, nullptr, nullptr, 0, @@ -1469,6 +1485,6 @@ hipError_t hipLaunchKernel( &szKernArg, HIP_LAUNCH_PARAM_END}; - return ihipLogStatus(ihipModuleLaunchKernel(tls, kd, numBlocks.x * dimBlocks.x, numBlocks.y * dimBlocks.y, numBlocks.z * dimBlocks.z, + return ihipLogStatus(ihipModuleLaunchKernel(tls, kd, numBlocks.x, numBlocks.y, numBlocks.z, dimBlocks.x, dimBlocks.y, dimBlocks.z, sharedMemBytes, stream, nullptr, (void**)&config, nullptr, nullptr, 0)); }