SWDEV-271416 - Remove HIP_DYNAMIC_SHARED macro in hip

Change-Id: I12f39ea8438eb7ce76d8ffb2151b4faa93689048


[ROCm/clr commit: e4fdbfcf5b]
Cette révision appartient à :
Julia Jiang
2021-02-04 16:22:01 -05:00
Parent 32e178cbca
révision 0e20fbfbec
12 fichiers modifiés avec 23 ajouts et 57 suppressions
-5
Voir le fichier
@@ -24,10 +24,6 @@ cat >$tmp/hip_macros.h <<EOF
#define __launch_bounds__(...) \
select_impl_(__VA_ARGS__, launch_bounds_impl1, launch_bounds_impl0)(__VA_ARGS__)
// Macro to replace extern __shared__ declarations
// to local variable definitions
#define HIP_DYNAMIC_SHARED(type, var) \
type* var = (type*)__amdgcn_get_dynamicgroupbaseptr();
EOF
cat >$tmp/hip_pch.h <<EOF
@@ -35,7 +31,6 @@ cat >$tmp/hip_pch.h <<EOF
#include "hip/hip_fp16.h"
EOF
cat >$tmp/hip_pch.mcin <<EOF
.type __hip_pch,@object
.section .hip_pch,"aMS",@progbits,1
-8
Voir le fichier
@@ -1863,14 +1863,6 @@ sub simpleSubstitutions {
$ft{'define'} += s/\bcudaTextureTypeCubemapLayered\b/hipTextureTypeCubemapLayered/g;
}
# CUDA extern __shared__ syntax replace with HIP_DYNAMIC_SHARED() macro
sub transformExternShared {
no warnings qw/uninitialized/;
my $k = 0;
$k += s/extern\s+([\w\(\)]+)?\s*__shared__\s+([\w:<>\s]+)\s+(\w+)\s*\[\s*\]\s*;/HIP_DYNAMIC_SHARED($1 $2, $3)/g;
$ft{'extern_shared'} += $k;
}
# CUDA Kernel Launch Syntax
sub transformKernelLaunch {
no warnings qw/uninitialized/;
-8
Voir le fichier
@@ -1269,14 +1269,6 @@ unsigned __smid(void)
return (se_id << HW_ID_CU_ID_SIZE) + cu_id;
}
// Macro to replace extern __shared__ declarations
// to local variable definitions
#define HIP_DYNAMIC_SHARED(type, var) \
type* var = (type*)__amdgcn_get_dynamicgroupbaseptr();
#define HIP_DYNAMIC_SHARED_ATTRIBUTE
#endif //defined(__clang__) && defined(__HIP__)
+11 -11
Voir le fichier
@@ -3065,8 +3065,8 @@ hipError_t hipModuleLoadDataEx(hipModule_t* module, const void* image, unsigned
* @param [in] blockDimX X block dimensions specified in work-items
* @param [in] blockDimY Y grid dimension specified in work-items
* @param [in] blockDimZ Z grid dimension specified in work-items
* @param [in] sharedMemBytes Amount of dynamic shared memory to allocate for this kernel. The
* kernel can access this with HIP_DYNAMIC_SHARED.
* @param [in] sharedMemBytes Amount of dynamic shared memory to allocate for this kernel. The
* HIP-Clang compiler provides support for extern shared declarations.
* @param [in] stream Stream where the kernel should be dispatched. May be 0, in which case th
* default stream is used with associated synchronization rules.
* @param [in] kernelParams
@@ -3092,8 +3092,8 @@ hipError_t hipModuleLaunchKernel(hipFunction_t f, unsigned int gridDimX, unsigne
* @param [in] gridDim Grid dimensions specified as multiple of blockDim.
* @param [in] blockDim Block dimensions specified in work-items
* @param [in] kernelParams A list of kernel arguments
* @param [in] sharedMemBytes Amount of dynamic shared memory to allocate for this kernel. The
* kernel can access this with HIP_DYNAMIC_SHARED.
* @param [in] sharedMemBytes Amount of dynamic shared memory to allocate for this kernel. The
* HIP-Clang compiler provides support for extern shared declarations.
* @param [in] stream Stream where the kernel should be dispatched. May be 0, in which case th
* default stream is used with associated synchronization rules.
*
@@ -3294,8 +3294,8 @@ hipError_t hipProfilerStop();
*
* @param [in] gridDim grid dimension specified as multiple of blockDim.
* @param [in] blockDim block dimensions specified in work-items
* @param [in] sharedMem Amount of dynamic shared memory to allocate for this kernel. The
* kernel can access this with HIP_DYNAMIC_SHARED.
* @param [in] sharedMem Amount of dynamic shared memory to allocate for this kernel. The
* HIP-Clang compiler provides support for extern shared declarations.
* @param [in] stream Stream where the kernel should be dispatched. May be 0, in which case the
* default stream is used with associated synchronization rules.
*
@@ -3334,8 +3334,8 @@ hipError_t hipLaunchByPtr(const void* func);
*
* @param [in] gridDim grid dimension specified as multiple of blockDim.
* @param [in] blockDim block dimensions specified in work-items
* @param [in] sharedMem Amount of dynamic shared memory to allocate for this kernel. The
* kernel can access this with HIP_DYNAMIC_SHARED.
* @param [in] sharedMem Amount of dynamic shared memory to allocate for this kernel. The
* HIP-Clang compiler provides support for extern shared declarations.
* @param [in] stream Stream where the kernel should be dispatched. May be 0, in which case the
* default stream is used with associated synchronization rules.
*
@@ -3354,7 +3354,7 @@ hipError_t __hipPushCallConfiguration(dim3 gridDim,
* @param [out] gridDim grid dimension specified as multiple of blockDim.
* @param [out] blockDim block dimensions specified in work-items
* @param [out] sharedMem Amount of dynamic shared memory to allocate for this kernel. The
* kernel can access this with HIP_DYNAMIC_SHARED.
* HIP-Clang compiler provides support for extern shared declarations.
* @param [out] stream Stream where the kernel should be dispatched. May be 0, in which case the
* default stream is used with associated synchronization rules.
*
@@ -3373,8 +3373,8 @@ hipError_t __hipPopCallConfiguration(dim3 *gridDim,
* @param [in] numBlocks - number of blocks
* @param [in] dimBlocks - dimension of a block
* @param [in] args - kernel arguments
* @param [in] sharedMemBytes - Amount of dynamic shared memory to allocate for this kernel. The
* Kernel can access this with HIP_DYNAMIC_SHARED.
* @param [in] sharedMemBytes - Amount of dynamic shared memory to allocate for this kernel. The
* HIP-Clang compiler provides support for extern shared declarations.
* @param [in] stream - Stream where the kernel should be dispatched. May be 0, in which case th
* default stream is used with associated synchronization rules.
*
+1 -1
Voir le fichier
@@ -43,7 +43,7 @@ THE SOFTWARE.
* @param [in] blockDimY Y grid dimension specified in work-items
* @param [in] blockDimZ Z grid dimension specified in work-items
* @param [in] sharedMemBytes Amount of dynamic shared memory to allocate for this kernel. The
kernel can access this with HIP_DYNAMIC_SHARED.
HIP-Clang compiler provides support for extern shared declarations
* @param [in] stream Stream where the kernel should be dispatched. May be 0, in which case th
default stream is used with associated synchronization rules.
* @param [in] kernelParams
-8
Voir le fichier
@@ -95,14 +95,6 @@ typedef int hipLaunchParm;
#define HIP_SYMBOL(X) &X
/**
* extern __shared__
*/
#define HIP_DYNAMIC_SHARED(type, var) extern __shared__ type var[];
#define HIP_DYNAMIC_SHARED_ATTRIBUTE
#ifdef __HIP_DEVICE_COMPILE__
#define abort_() \
{ asm("trap;"); }
+6 -3
Voir le fichier
@@ -19,10 +19,13 @@ We will be using the Simple Matrix Transpose application from the previous tutor
## Shared Memory
Shared memory is way more faster than that of global and constant memory and accessible to all the threads in the block. For In the same sourcecode, we will use the `HIP_DYNAMIC_SHARED` keyword to declare dynamic shared memory as follows:
Shared memory is way more faster than that of global and constant memory and accessible to all the threads in the block.
Previously, it was essential to declare dynamic shared memory using the HIP_DYNAMIC_SHARED macro for accuracy, as using static shared memory in the same kernel could result in overlapping memory ranges and data-races.
Now, the HIP-Clang compiler provides support for extern shared declarations, and the HIP_DYNAMIC_SHARED option is no longer required. You may use the standard extern definition:
extern __shared__ type var[];
` HIP_DYNAMIC_SHARED(float, sharedMem) `
here the first parameter is the data type while the second one is the variable name.
The other important change is:
```
+1 -2
Voir le fichier
@@ -35,8 +35,7 @@ THE SOFTWARE.
// Device (Kernel) function, it must be void
__global__ void matrixTranspose(float* out, float* in, const int width) {
// declare dynamic shared memory
HIP_DYNAMIC_SHARED(float, sharedMem);
extern __shared__ float sharedMem[];
int x = hipBlockDim_x * hipBlockIdx_x + hipThreadIdx_x;
int y = hipBlockDim_y * hipBlockIdx_y + hipThreadIdx_y;
+1 -2
Voir le fichier
@@ -49,8 +49,7 @@ __global__ void matrixTranspose_static_shared(float* out, float* in,
__global__ void matrixTranspose_dynamic_shared(float* out, float* in,
const int width) {
// declare dynamic shared memory
HIP_DYNAMIC_SHARED(float, sharedMem)
extern __shared__ float sharedMem[];
int x = hipBlockDim_x * hipBlockIdx_x + hipThreadIdx_x;
int y = hipBlockDim_y * hipBlockIdx_y + hipThreadIdx_y;
+1 -2
Voir le fichier
@@ -122,8 +122,7 @@ __global__ void matrixTranspose_static_shared(float* out, float* in,
__global__ void matrixTranspose_dynamic_shared(float* out, float* in,
const int width) {
// declare dynamic shared memory
HIP_DYNAMIC_SHARED(float, sharedMem)
extern __shared__ float sharedMem[];
int x = hipBlockDim_x * hipBlockIdx_x + hipThreadIdx_x;
int y = hipBlockDim_y * hipBlockIdx_y + hipThreadIdx_y;
+1 -6
Voir le fichier
@@ -49,12 +49,7 @@ template <typename T>
__global__ void testExternSharedKernel(const T* A_d, const T* B_d, T* C_d,
size_t numElements, size_t groupElements) {
// declare dynamic shared memory
#if defined(__HIP_PLATFORM_AMD__)
HIP_DYNAMIC_SHARED(T, sdata)
#else
HIP_DYNAMIC_SHARED(__align__(sizeof(T)) unsigned char, my_sdata)
T* sdata = reinterpret_cast<T*>(my_sdata);
#endif
extern __shared__ double sdata[];
size_t gid = (blockIdx.x * blockDim.x + threadIdx.x);
size_t tid = threadIdx.x;
+1 -1
Voir le fichier
@@ -33,7 +33,7 @@ THE SOFTWARE.
#define SIZE LEN * 4
__global__ void vectorAdd(float* Ad, float* Bd) {
HIP_DYNAMIC_SHARED(float, sBd);
extern __shared__ float sBd[];
int tx = threadIdx.x;
for (int i = 0; i < LEN / 64; i++) {
sBd[tx + i * 64] = Ad[tx + i * 64] + 1.0f;