diff --git a/projects/hip/docs/markdown/hip_porting_driver_api.md b/projects/hip/docs/markdown/hip_porting_driver_api.md index 7d02666af3..8e66780add 100644 --- a/projects/hip/docs/markdown/hip_porting_driver_api.md +++ b/projects/hip/docs/markdown/hip_porting_driver_api.md @@ -105,11 +105,13 @@ hip-clang emits a global variable `__hip_gpubin_handle` of void** type with link #### Kernel Launching hip-clang supports kernel launching by CUDA `<<<>>>` syntax, hipLaunchKernel, and hipLaunchKernelGGL. The latter two are macros which expand to CUDA `<<<>>>` syntax. -In host code, hip-clang emits a stub function with the same name and arguments as the kernel. In the body of this function, hipSetupArgument is called for each kernel argument, then hipLaunchByPtr is called with a function pointer to the stub function. - When the executable or shared library is loaded by the dynamic linker, the initilization functions are called. In the initialization functions, when `__hipRegisterFatBinary` is called, the code objects containing all kernels are loaded; when `__hipRegisterFunction` is called, the stub functions are associated with the corresponding kernels in code objects. -In the host code, for the `<<<>>>` statement, hip-clang first emits call of hipConfigureCall to set up the threads and grids, then emits call of the stub function with the given arguments. In the stub function, when the runtime host API function hipLaunchByPtr is called, the real kernel associated with the stub function is launched. +hip-clang implements two sets of kernel launching APIs. + +By default, in the host code, for the `<<<>>>` statement, hip-clang first emits call of hipConfigureCall to set up the threads and grids, then emits call of the stub function with the given arguments. In the stub function, hipSetupArgument is called for each kernel argument, then hipLaunchByPtr is called with a function pointer to the stub function. In hipLaunchByPtr, the real kernel associated with the stub function is launched. + +If HIP program is compiled with -fhip-new-launch-api, in the host code, for the `<<<>>>` statement, hip-clang first emits call of `__hipPushCallConfiguration` to save the grid dimension, block dimension, shared memory usage and stream to a stack, then emits call of the stub function with the given arguments. In the stub function, `__hipPopCallConfiguration` is called to get the saved grid dimension, block dimension, shared memory usage and stream, then hipLaunchKernel is called with a function pointer to the stub function. In hipLaunchKernel, the real kernel associated with the stub function is launched. ### NVCC Implementation Notes diff --git a/projects/hip/include/hip/hcc_detail/hip_runtime_api.h b/projects/hip/include/hip/hcc_detail/hip_runtime_api.h index 35b08be2ff..7568b0144a 100644 --- a/projects/hip/include/hip/hcc_detail/hip_runtime_api.h +++ b/projects/hip/include/hip/hcc_detail/hip_runtime_api.h @@ -1519,34 +1519,6 @@ hipError_t hipMemcpyToSymbol(void*, const void*, size_t, size_t, hipMemcpyKind, } // Namespace hip_impl. #endif -#ifdef __cplusplus -extern "C" { -#endif - -/** - * @brief C compliant kernel launch API - * - * @param [in] function_address - kernel function pointer. - * @param [in] numBlocks - number of blocks - * @param [in] dimBlocks - dimension of a block - * @param [in] args - kernel arguments - * @param [in] sharedMemBytes - Amount of dynamic shared memory to allocate for this kernel. The - * Kernel can access this with HIP_DYNAMIC_SHARED. - * @param [in] stream - Stream where the kernel should be dispatched. May be 0, in which case th - * default stream is used with associated synchronization rules. - * - * @returns #hipSuccess, #hipErrorInvalidValue, hipInvalidDevice - * - */ - -hipError_t hipLaunchKernel(const void* function_address, - dim3 numBlocks, dim3 dimBlocks, void** args, - size_t sharedMemBytes, hipStream_t stream); - -#ifdef __cplusplus -} -#endif - #if defined(__cplusplus) extern "C" { #endif @@ -3055,6 +3027,65 @@ hipError_t hipSetupArgument(const void* arg, size_t size, size_t offset); hipError_t hipLaunchByPtr(const void* func); +/** + * @brief Push configuration of a kernel launch. + * + * @param [in] gridDim grid dimension specified as multiple of blockDim. + * @param [in] blockDim block dimensions specified in work-items + * @param [in] sharedMem Amount of dynamic shared memory to allocate for this kernel. The + * kernel can access this with HIP_DYNAMIC_SHARED. + * @param [in] stream Stream where the kernel should be dispatched. May be 0, in which case the + * default stream is used with associated synchronization rules. + * + * @returns hipSuccess, hipInvalidDevice, hipErrorNotInitialized, hipErrorInvalidValue + * + */ + +hipError_t __hipPushCallConfiguration(dim3 gridDim, + dim3 blockDim, + size_t sharedMem __dparm(0), + hipStream_t stream __dparm(0)); + +/** + * @brief Pop configuration of a kernel launch. + * + * @param [out] gridDim grid dimension specified as multiple of blockDim. + * @param [out] blockDim block dimensions specified in work-items + * @param [out] sharedMem Amount of dynamic shared memory to allocate for this kernel. The + * kernel can access this with HIP_DYNAMIC_SHARED. + * @param [out] stream Stream where the kernel should be dispatched. May be 0, in which case the + * default stream is used with associated synchronization rules. + * + * @returns hipSuccess, hipInvalidDevice, hipErrorNotInitialized, hipErrorInvalidValue + * + */ +hipError_t __hipPopCallConfiguration(dim3 *gridDim, + dim3 *blockDim, + size_t *sharedMem, + hipStream_t *stream); + +/** + * @brief C compliant kernel launch API + * + * @param [in] function_address - kernel stub function pointer. + * @param [in] numBlocks - number of blocks + * @param [in] dimBlocks - dimension of a block + * @param [in] args - kernel arguments + * @param [in] sharedMemBytes - Amount of dynamic shared memory to allocate for this kernel. The + * Kernel can access this with HIP_DYNAMIC_SHARED. + * @param [in] stream - Stream where the kernel should be dispatched. May be 0, in which case th + * default stream is used with associated synchronization rules. + * + * @returns #hipSuccess, #hipErrorInvalidValue, hipInvalidDevice + * + */ + +hipError_t hipLaunchKernel(const void* function_address, + dim3 numBlocks, + dim3 dimBlocks, + void** args, + size_t sharedMemBytes __dparm(0), + hipStream_t stream __dparm(0)); /** * @}