SWDEV-271416 - Remove HIP_DYNAMIC_SHARED macro in hip
Change-Id: I12f39ea8438eb7ce76d8ffb2151b4faa93689048
[ROCm/hip commit: 090b2829b9]
Šī revīzija ir iekļauta:
@@ -24,10 +24,6 @@ cat >$tmp/hip_macros.h <<EOF
|
||||
#define __launch_bounds__(...) \
|
||||
select_impl_(__VA_ARGS__, launch_bounds_impl1, launch_bounds_impl0)(__VA_ARGS__)
|
||||
|
||||
// Macro to replace extern __shared__ declarations
|
||||
// to local variable definitions
|
||||
#define HIP_DYNAMIC_SHARED(type, var) \
|
||||
type* var = (type*)__amdgcn_get_dynamicgroupbaseptr();
|
||||
EOF
|
||||
|
||||
cat >$tmp/hip_pch.h <<EOF
|
||||
@@ -35,7 +31,6 @@ cat >$tmp/hip_pch.h <<EOF
|
||||
#include "hip/hip_fp16.h"
|
||||
EOF
|
||||
|
||||
|
||||
cat >$tmp/hip_pch.mcin <<EOF
|
||||
.type __hip_pch,@object
|
||||
.section .hip_pch,"aMS",@progbits,1
|
||||
|
||||
@@ -1863,14 +1863,6 @@ sub simpleSubstitutions {
|
||||
$ft{'define'} += s/\bcudaTextureTypeCubemapLayered\b/hipTextureTypeCubemapLayered/g;
|
||||
}
|
||||
|
||||
# CUDA extern __shared__ syntax replace with HIP_DYNAMIC_SHARED() macro
|
||||
sub transformExternShared {
|
||||
no warnings qw/uninitialized/;
|
||||
my $k = 0;
|
||||
$k += s/extern\s+([\w\(\)]+)?\s*__shared__\s+([\w:<>\s]+)\s+(\w+)\s*\[\s*\]\s*;/HIP_DYNAMIC_SHARED($1 $2, $3)/g;
|
||||
$ft{'extern_shared'} += $k;
|
||||
}
|
||||
|
||||
# CUDA Kernel Launch Syntax
|
||||
sub transformKernelLaunch {
|
||||
no warnings qw/uninitialized/;
|
||||
|
||||
@@ -1269,14 +1269,6 @@ unsigned __smid(void)
|
||||
return (se_id << HW_ID_CU_ID_SIZE) + cu_id;
|
||||
}
|
||||
|
||||
// Macro to replace extern __shared__ declarations
|
||||
// to local variable definitions
|
||||
#define HIP_DYNAMIC_SHARED(type, var) \
|
||||
type* var = (type*)__amdgcn_get_dynamicgroupbaseptr();
|
||||
|
||||
#define HIP_DYNAMIC_SHARED_ATTRIBUTE
|
||||
|
||||
|
||||
#endif //defined(__clang__) && defined(__HIP__)
|
||||
|
||||
|
||||
|
||||
@@ -3065,8 +3065,8 @@ hipError_t hipModuleLoadDataEx(hipModule_t* module, const void* image, unsigned
|
||||
* @param [in] blockDimX X block dimensions specified in work-items
|
||||
* @param [in] blockDimY Y grid dimension specified in work-items
|
||||
* @param [in] blockDimZ Z grid dimension specified in work-items
|
||||
* @param [in] sharedMemBytes Amount of dynamic shared memory to allocate for this kernel. The
|
||||
* kernel can access this with HIP_DYNAMIC_SHARED.
|
||||
* @param [in] sharedMemBytes Amount of dynamic shared memory to allocate for this kernel. The
|
||||
* HIP-Clang compiler provides support for extern shared declarations.
|
||||
* @param [in] stream Stream where the kernel should be dispatched. May be 0, in which case th
|
||||
* default stream is used with associated synchronization rules.
|
||||
* @param [in] kernelParams
|
||||
@@ -3092,8 +3092,8 @@ hipError_t hipModuleLaunchKernel(hipFunction_t f, unsigned int gridDimX, unsigne
|
||||
* @param [in] gridDim Grid dimensions specified as multiple of blockDim.
|
||||
* @param [in] blockDim Block dimensions specified in work-items
|
||||
* @param [in] kernelParams A list of kernel arguments
|
||||
* @param [in] sharedMemBytes Amount of dynamic shared memory to allocate for this kernel. The
|
||||
* kernel can access this with HIP_DYNAMIC_SHARED.
|
||||
* @param [in] sharedMemBytes Amount of dynamic shared memory to allocate for this kernel. The
|
||||
* HIP-Clang compiler provides support for extern shared declarations.
|
||||
* @param [in] stream Stream where the kernel should be dispatched. May be 0, in which case th
|
||||
* default stream is used with associated synchronization rules.
|
||||
*
|
||||
@@ -3294,8 +3294,8 @@ hipError_t hipProfilerStop();
|
||||
*
|
||||
* @param [in] gridDim grid dimension specified as multiple of blockDim.
|
||||
* @param [in] blockDim block dimensions specified in work-items
|
||||
* @param [in] sharedMem Amount of dynamic shared memory to allocate for this kernel. The
|
||||
* kernel can access this with HIP_DYNAMIC_SHARED.
|
||||
* @param [in] sharedMem Amount of dynamic shared memory to allocate for this kernel. The
|
||||
* HIP-Clang compiler provides support for extern shared declarations.
|
||||
* @param [in] stream Stream where the kernel should be dispatched. May be 0, in which case the
|
||||
* default stream is used with associated synchronization rules.
|
||||
*
|
||||
@@ -3334,8 +3334,8 @@ hipError_t hipLaunchByPtr(const void* func);
|
||||
*
|
||||
* @param [in] gridDim grid dimension specified as multiple of blockDim.
|
||||
* @param [in] blockDim block dimensions specified in work-items
|
||||
* @param [in] sharedMem Amount of dynamic shared memory to allocate for this kernel. The
|
||||
* kernel can access this with HIP_DYNAMIC_SHARED.
|
||||
* @param [in] sharedMem Amount of dynamic shared memory to allocate for this kernel. The
|
||||
* HIP-Clang compiler provides support for extern shared declarations.
|
||||
* @param [in] stream Stream where the kernel should be dispatched. May be 0, in which case the
|
||||
* default stream is used with associated synchronization rules.
|
||||
*
|
||||
@@ -3354,7 +3354,7 @@ hipError_t __hipPushCallConfiguration(dim3 gridDim,
|
||||
* @param [out] gridDim grid dimension specified as multiple of blockDim.
|
||||
* @param [out] blockDim block dimensions specified in work-items
|
||||
* @param [out] sharedMem Amount of dynamic shared memory to allocate for this kernel. The
|
||||
* kernel can access this with HIP_DYNAMIC_SHARED.
|
||||
* HIP-Clang compiler provides support for extern shared declarations.
|
||||
* @param [out] stream Stream where the kernel should be dispatched. May be 0, in which case the
|
||||
* default stream is used with associated synchronization rules.
|
||||
*
|
||||
@@ -3373,8 +3373,8 @@ hipError_t __hipPopCallConfiguration(dim3 *gridDim,
|
||||
* @param [in] numBlocks - number of blocks
|
||||
* @param [in] dimBlocks - dimension of a block
|
||||
* @param [in] args - kernel arguments
|
||||
* @param [in] sharedMemBytes - Amount of dynamic shared memory to allocate for this kernel. The
|
||||
* Kernel can access this with HIP_DYNAMIC_SHARED.
|
||||
* @param [in] sharedMemBytes - Amount of dynamic shared memory to allocate for this kernel. The
|
||||
* HIP-Clang compiler provides support for extern shared declarations.
|
||||
* @param [in] stream - Stream where the kernel should be dispatched. May be 0, in which case th
|
||||
* default stream is used with associated synchronization rules.
|
||||
*
|
||||
|
||||
@@ -43,7 +43,7 @@ THE SOFTWARE.
|
||||
* @param [in] blockDimY Y grid dimension specified in work-items
|
||||
* @param [in] blockDimZ Z grid dimension specified in work-items
|
||||
* @param [in] sharedMemBytes Amount of dynamic shared memory to allocate for this kernel. The
|
||||
kernel can access this with HIP_DYNAMIC_SHARED.
|
||||
HIP-Clang compiler provides support for extern shared declarations
|
||||
* @param [in] stream Stream where the kernel should be dispatched. May be 0, in which case th
|
||||
default stream is used with associated synchronization rules.
|
||||
* @param [in] kernelParams
|
||||
|
||||
@@ -95,14 +95,6 @@ typedef int hipLaunchParm;
|
||||
|
||||
#define HIP_SYMBOL(X) &X
|
||||
|
||||
/**
|
||||
* extern __shared__
|
||||
*/
|
||||
|
||||
#define HIP_DYNAMIC_SHARED(type, var) extern __shared__ type var[];
|
||||
|
||||
#define HIP_DYNAMIC_SHARED_ATTRIBUTE
|
||||
|
||||
#ifdef __HIP_DEVICE_COMPILE__
|
||||
#define abort_() \
|
||||
{ asm("trap;"); }
|
||||
|
||||
@@ -19,10 +19,13 @@ We will be using the Simple Matrix Transpose application from the previous tutor
|
||||
|
||||
## Shared Memory
|
||||
|
||||
Shared memory is way more faster than that of global and constant memory and accessible to all the threads in the block. For In the same sourcecode, we will use the `HIP_DYNAMIC_SHARED` keyword to declare dynamic shared memory as follows:
|
||||
Shared memory is way more faster than that of global and constant memory and accessible to all the threads in the block.
|
||||
|
||||
Previously, it was essential to declare dynamic shared memory using the HIP_DYNAMIC_SHARED macro for accuracy, as using static shared memory in the same kernel could result in overlapping memory ranges and data-races.
|
||||
|
||||
Now, the HIP-Clang compiler provides support for extern shared declarations, and the HIP_DYNAMIC_SHARED option is no longer required. You may use the standard extern definition:
|
||||
extern __shared__ type var[];
|
||||
|
||||
` HIP_DYNAMIC_SHARED(float, sharedMem) `
|
||||
here the first parameter is the data type while the second one is the variable name.
|
||||
|
||||
The other important change is:
|
||||
```
|
||||
|
||||
@@ -35,8 +35,7 @@ THE SOFTWARE.
|
||||
|
||||
// Device (Kernel) function, it must be void
|
||||
__global__ void matrixTranspose(float* out, float* in, const int width) {
|
||||
// declare dynamic shared memory
|
||||
HIP_DYNAMIC_SHARED(float, sharedMem);
|
||||
extern __shared__ float sharedMem[];
|
||||
|
||||
int x = hipBlockDim_x * hipBlockIdx_x + hipThreadIdx_x;
|
||||
int y = hipBlockDim_y * hipBlockIdx_y + hipThreadIdx_y;
|
||||
|
||||
@@ -49,8 +49,7 @@ __global__ void matrixTranspose_static_shared(float* out, float* in,
|
||||
|
||||
__global__ void matrixTranspose_dynamic_shared(float* out, float* in,
|
||||
const int width) {
|
||||
// declare dynamic shared memory
|
||||
HIP_DYNAMIC_SHARED(float, sharedMem)
|
||||
extern __shared__ float sharedMem[];
|
||||
|
||||
int x = hipBlockDim_x * hipBlockIdx_x + hipThreadIdx_x;
|
||||
int y = hipBlockDim_y * hipBlockIdx_y + hipThreadIdx_y;
|
||||
|
||||
@@ -122,8 +122,7 @@ __global__ void matrixTranspose_static_shared(float* out, float* in,
|
||||
|
||||
__global__ void matrixTranspose_dynamic_shared(float* out, float* in,
|
||||
const int width) {
|
||||
// declare dynamic shared memory
|
||||
HIP_DYNAMIC_SHARED(float, sharedMem)
|
||||
extern __shared__ float sharedMem[];
|
||||
|
||||
int x = hipBlockDim_x * hipBlockIdx_x + hipThreadIdx_x;
|
||||
int y = hipBlockDim_y * hipBlockIdx_y + hipThreadIdx_y;
|
||||
|
||||
@@ -49,12 +49,7 @@ template <typename T>
|
||||
__global__ void testExternSharedKernel(const T* A_d, const T* B_d, T* C_d,
|
||||
size_t numElements, size_t groupElements) {
|
||||
// declare dynamic shared memory
|
||||
#if defined(__HIP_PLATFORM_AMD__)
|
||||
HIP_DYNAMIC_SHARED(T, sdata)
|
||||
#else
|
||||
HIP_DYNAMIC_SHARED(__align__(sizeof(T)) unsigned char, my_sdata)
|
||||
T* sdata = reinterpret_cast<T*>(my_sdata);
|
||||
#endif
|
||||
extern __shared__ double sdata[];
|
||||
|
||||
size_t gid = (blockIdx.x * blockDim.x + threadIdx.x);
|
||||
size_t tid = threadIdx.x;
|
||||
|
||||
@@ -33,7 +33,7 @@ THE SOFTWARE.
|
||||
#define SIZE LEN * 4
|
||||
|
||||
__global__ void vectorAdd(float* Ad, float* Bd) {
|
||||
HIP_DYNAMIC_SHARED(float, sBd);
|
||||
extern __shared__ float sBd[];
|
||||
int tx = threadIdx.x;
|
||||
for (int i = 0; i < LEN / 64; i++) {
|
||||
sBd[tx + i * 64] = Ad[tx + i * 64] + 1.0f;
|
||||
|
||||
Atsaukties uz šo jaunā problēmā
Block a user