SWDEV-339113 - update coordinate in HIP sample kernels (#2736)

Change-Id: I8ea179b4ba8f1c0ebec830a5aa5947e843f06e42
Αυτή η υποβολή περιλαμβάνεται σε:
ROCm CI Service Account
2022-06-15 11:22:27 +05:30
υποβλήθηκε από GitHub
γονέας 14a136c3e2
υποβολή a94de8f202
6 αρχεία άλλαξαν με 10 προσθήκες και 10 διαγραφές
@@ -37,8 +37,8 @@ THE SOFTWARE.
// Device (Kernel) function, it must be void
__global__ void matrixTranspose(float* out, float* in, const int width) {
int x = hipBlockDim_x * hipBlockIdx_x + hipThreadIdx_x;
int y = hipBlockDim_y * hipBlockIdx_y + hipThreadIdx_y;
int x = blockDim.x * blockIdx.x + threadIdx.x;
int y = blockDim.y * blockIdx.y + threadIdx.y;
out[y * width + x] = in[x * width + y];
}
@@ -35,8 +35,8 @@ THE SOFTWARE.
// Device (Kernel) function, it must be void
__global__ void matrixTranspose(float* out, float* in, const int width) {
int x = hipBlockDim_x * hipBlockIdx_x + hipThreadIdx_x;
int y = hipBlockDim_y * hipBlockIdx_y + hipThreadIdx_y;
int x = blockDim.x * blockIdx.x + threadIdx.x;
int y = blockDim.y * blockIdx.y + threadIdx.y;
out[y * width + x] = in[x * width + y];
}
@@ -38,8 +38,8 @@ THE SOFTWARE.
__global__ void matrixTranspose(float* out, float* in, const int width) {
__shared__ float sharedMem[WIDTH * WIDTH];
int x = hipBlockDim_x * hipBlockIdx_x + hipThreadIdx_x;
int y = hipBlockDim_y * hipBlockIdx_y + hipThreadIdx_y;
int x = blockDim.x * blockIdx.x + threadIdx.x;
int y = blockDim.y * blockIdx.y + threadIdx.y;
sharedMem[y * width + x] = in[x * width + y];
@@ -36,7 +36,7 @@ THE SOFTWARE.
// Device (Kernel) function, it must be void
__global__ void matrixTranspose(float* out, float* in, const int width) {
int x = hipBlockDim_x * hipBlockIdx_x + hipThreadIdx_x;
int x = blockDim.x * blockIdx.x + threadIdx.x;
float val = in[x];
@@ -36,8 +36,8 @@ THE SOFTWARE.
// Device (Kernel) function, it must be void
__global__ void matrixTranspose(float* out, float* in, const int width) {
int x = hipBlockDim_x * hipBlockIdx_x + hipThreadIdx_x;
int y = hipBlockDim_y * hipBlockIdx_y + hipThreadIdx_y;
int x = blockDim.x * blockIdx.x + threadIdx.x;
int y = blockDim.y * blockIdx.y + threadIdx.y;
float val = in[y * width + x];
out[x * width + y] = __shfl(val, y * width + x);
@@ -31,7 +31,7 @@ We will be using the Simple Matrix Transpose application from the previous tutor
In the same sourcecode, we used for MatrixTranspose. We'll add the following:
```
int y = hipBlockDim_y * hipBlockIdx_y + hipThreadIdx_y;
int y = blockDim.y * blockIdx.y + threadIdx.y;
out[x*width + y] = __shfl(val,y*width + x);
```