From b70b2c4e9d05f4c01bb3b71d28e1a88e459ca1d6 Mon Sep 17 00:00:00 2001 From: Maneesh Gupta Date: Tue, 28 May 2019 16:58:55 +0530 Subject: [PATCH] Header changes for cooperative groups Change-Id: I5f3acca94275d74adc97adcb168aed9f74951189 [ROCm/clr commit: 4af81134ba51c61664bba1062e30ae61b12a4d3c] --- .../include/hip/hcc_detail/hip_runtime_api.h | 93 +++++++++++++++++++ .../clr/hipamd/include/hip/hip_runtime_api.h | 4 + 2 files changed, 97 insertions(+) diff --git a/projects/clr/hipamd/include/hip/hcc_detail/hip_runtime_api.h b/projects/clr/hipamd/include/hip/hcc_detail/hip_runtime_api.h index d870963101..ba8d2a21c5 100644 --- a/projects/clr/hipamd/include/hip/hcc_detail/hip_runtime_api.h +++ b/projects/clr/hipamd/include/hip/hcc_detail/hip_runtime_api.h @@ -275,6 +275,15 @@ typedef struct dim3 { #endif } dim3; +typedef struct hipLaunchParams_t { + void* func; ///< Device function symbol + dim3 gridDim; ///< Grid dimentions + dim3 blockDim; ///< Block dimentions + void **args; ///< Arguments + size_t sharedMem; ///< Shared memory + hipStream_t stream; ///< Stream identifier +} hipLaunchParams; + // Doxygen end group GlobalDefs /** @} */ @@ -2842,6 +2851,62 @@ hipError_t hipModuleLaunchKernel(hipFunction_t f, unsigned int gridDimX, unsigne unsigned int sharedMemBytes, hipStream_t stream, void** kernelParams, void** extra); +/** + * @brief launches kernel f with launch parameters and shared memory on stream with arguments passed + * to kernelparams or extra, where thread blocks can cooperate and synchronize as they execute + * + * @param [in] f Kernel to launch. + * @param [in] gridDim Grid dimensions specified as multiple of blockDim. + * @param [in] blockDim Block dimensions specified in work-items + * @param [in] kernelParams A list of kernel arguments + * @param [in] sharedMemBytes Amount of dynamic shared memory to allocate for this kernel. The + * kernel can access this with HIP_DYNAMIC_SHARED. + * @param [in] stream Stream where the kernel should be dispatched. May be 0, in which case th + * default stream is used with associated synchronization rules. + * + * @returns hipSuccess, hipInvalidDevice, hipErrorNotInitialized, hipErrorInvalidValue + */ +hipError_t hipLaunchCooperativeKernel(const void* f, dim3 gridDim, dim3 blockDimX, + void** kernelParams, unsigned int sharedMemBytes, + hipStream_t stream); + +/** + * @brief Launches kernels on multiple devices where thread blocks can cooperate and + * synchronize as they execute. + * + * @param [in] hipLaunchParams List of launch parameters, one per device. + * @param [in] numDevices Size of the launchParamsList array. + * @param [in] flags Flags to control launch behavior. + * + * @returns hipSuccess, hipInvalidDevice, hipErrorNotInitialized, hipErrorInvalidValue + */ +hipError_t hipLaunchCooperativeKernelMultiDevice(hipLaunchParams* launchParamsList, + int numDevices, unsigned int flags); + +/** + * @brief Returns occupancy for a device function. + * + * @param [out] numBlocks Returned occupancy + * @param [in] func Kernel function for which occupancy is calulated + * @param [in] blockSize Block size the kernel is intended to be launched with + * @param [in] dynamicSMemSize Per - block dynamic shared memory usage intended, in bytes + */ +hipError_t hipOccupancyMaxActiveBlocksPerMultiprocessor( + int* numBlocks, const void* f, int blockSize, size_t dynamicSMemSize); + +/** + * @brief Returns occupancy for a device function. + * + * @param [out] numBlocks Returned occupancy + * @param [in] func Kernel function for which occupancy is calulated + * @param [in] blockSize Block size the kernel is intended to be launched with + * @param [in] dynamicSMemSize Per - block dynamic shared memory usage intended, in bytes + * @param [in] flags Extra flags for occupancy calculation (currently ignored) + */ +hipError_t hipOccupancyMaxActiveBlocksPerMultiprocessorWithFlags( + int* numBlocks, const void* f, int blockSize, size_t dynamicSMemSize, unsigned int flags); + + // doxygen end Version Management /** * @} @@ -3170,6 +3235,34 @@ hipError_t hipBindTextureToMipmappedArray(const texture& tex, return hipSuccess; } +template +inline hipError_t hipOccupancyMaxActiveBlocksPerMultiprocessor( + int* numBlocks, T f, int blockSize, size_t dynamicSMemSize) { + return hipOccupancyMaxActiveBlocksPerMultiprocessor( + numBlocks, reinterpret_cast(f), blockSize, dynamicSMemSize); +} + +template +inline hipError_t hipOccupancyMaxActiveBlocksPerMultiprocessorWithFlags( + int* numBlocks, T f, int blockSize, size_t dynamicSMemSize, unsigned int flags) { + return hipOccupancyMaxActiveBlocksPerMultiprocessorWithFlags( + numBlocks, reinterpret_cast(f), blockSize, dynamicSMemSize, flags); +} + +template +inline hipError_t hipLaunchCooperativeKernel(T f, dim3 gridDim, dim3 blockDim, + void** kernelParams, unsigned int sharedMemBytes, hipStream_t stream) { + return hipLaunchCooperativeKernel(reinterpret_cast(f), gridDim, + blockDim, kernelParams, sharedMemBytes, stream); +} + +template +inline hipError_t hipLaunchCooperativeKernelMultiDevice(hipLaunchParams* launchParamsList, + unsigned int numDevices, unsigned int flags = 0) { + return hipLaunchCooperativeKernelMultiDevice(launchParamsList, numDevices, flags); +} + + /* * @brief Unbinds the textuer bound to @p tex * diff --git a/projects/clr/hipamd/include/hip/hip_runtime_api.h b/projects/clr/hipamd/include/hip/hip_runtime_api.h index e7ecede8c1..e3c10766e9 100644 --- a/projects/clr/hipamd/include/hip/hip_runtime_api.h +++ b/projects/clr/hipamd/include/hip/hip_runtime_api.h @@ -115,6 +115,8 @@ typedef struct hipDeviceProp_t { int canMapHostMemory; ///< Check whether HIP can map host memory int gcnArch; ///< AMD GCN Arch Value. Eg: 803, 701 int integrated; ///< APU vs dGPU + int cooperativeLaunch; ///< HIP device supports cooperative launch + int cooperativeMultiDeviceLaunch; ///< HIP device supports cooperative launch on multiple devices } hipDeviceProp_t; @@ -291,6 +293,8 @@ typedef enum hipDeviceAttribute_t { ///< Multiprocessor. hipDeviceAttributeIsMultiGpuBoard, ///< Multiple GPU devices. hipDeviceAttributeIntegrated, ///< iGPU + hipDeviceAttributeCooperativeLaunch, ///< Support cooperative launch + hipDeviceAttributeCooperativeMultiDeviceLaunch, ///< Support cooperative launch on multiple devices } hipDeviceAttribute_t; enum hipComputeMode {