SWDEV-339113 - update coordinate in HIP sample kernels (#2736)
Change-Id: I8ea179b4ba8f1c0ebec830a5aa5947e843f06e42
이 커밋은 다음에 포함됨:
@@ -37,8 +37,8 @@ THE SOFTWARE.
|
||||
|
||||
// Device (Kernel) function, it must be void
|
||||
__global__ void matrixTranspose(float* out, float* in, const int width) {
|
||||
int x = hipBlockDim_x * hipBlockIdx_x + hipThreadIdx_x;
|
||||
int y = hipBlockDim_y * hipBlockIdx_y + hipThreadIdx_y;
|
||||
int x = blockDim.x * blockIdx.x + threadIdx.x;
|
||||
int y = blockDim.y * blockIdx.y + threadIdx.y;
|
||||
|
||||
out[y * width + x] = in[x * width + y];
|
||||
}
|
||||
|
||||
@@ -35,8 +35,8 @@ THE SOFTWARE.
|
||||
|
||||
// Device (Kernel) function, it must be void
|
||||
__global__ void matrixTranspose(float* out, float* in, const int width) {
|
||||
int x = hipBlockDim_x * hipBlockIdx_x + hipThreadIdx_x;
|
||||
int y = hipBlockDim_y * hipBlockIdx_y + hipThreadIdx_y;
|
||||
int x = blockDim.x * blockIdx.x + threadIdx.x;
|
||||
int y = blockDim.y * blockIdx.y + threadIdx.y;
|
||||
|
||||
out[y * width + x] = in[x * width + y];
|
||||
}
|
||||
|
||||
@@ -38,8 +38,8 @@ THE SOFTWARE.
|
||||
__global__ void matrixTranspose(float* out, float* in, const int width) {
|
||||
__shared__ float sharedMem[WIDTH * WIDTH];
|
||||
|
||||
int x = hipBlockDim_x * hipBlockIdx_x + hipThreadIdx_x;
|
||||
int y = hipBlockDim_y * hipBlockIdx_y + hipThreadIdx_y;
|
||||
int x = blockDim.x * blockIdx.x + threadIdx.x;
|
||||
int y = blockDim.y * blockIdx.y + threadIdx.y;
|
||||
|
||||
sharedMem[y * width + x] = in[x * width + y];
|
||||
|
||||
|
||||
@@ -36,7 +36,7 @@ THE SOFTWARE.
|
||||
|
||||
// Device (Kernel) function, it must be void
|
||||
__global__ void matrixTranspose(float* out, float* in, const int width) {
|
||||
int x = hipBlockDim_x * hipBlockIdx_x + hipThreadIdx_x;
|
||||
int x = blockDim.x * blockIdx.x + threadIdx.x;
|
||||
|
||||
float val = in[x];
|
||||
|
||||
|
||||
@@ -36,8 +36,8 @@ THE SOFTWARE.
|
||||
|
||||
// Device (Kernel) function, it must be void
|
||||
__global__ void matrixTranspose(float* out, float* in, const int width) {
|
||||
int x = hipBlockDim_x * hipBlockIdx_x + hipThreadIdx_x;
|
||||
int y = hipBlockDim_y * hipBlockIdx_y + hipThreadIdx_y;
|
||||
int x = blockDim.x * blockIdx.x + threadIdx.x;
|
||||
int y = blockDim.y * blockIdx.y + threadIdx.y;
|
||||
float val = in[y * width + x];
|
||||
|
||||
out[x * width + y] = __shfl(val, y * width + x);
|
||||
|
||||
@@ -31,7 +31,7 @@ We will be using the Simple Matrix Transpose application from the previous tutor
|
||||
|
||||
In the same sourcecode, we used for MatrixTranspose. We'll add the following:
|
||||
```
|
||||
int y = hipBlockDim_y * hipBlockIdx_y + hipThreadIdx_y;
|
||||
int y = blockDim.y * blockIdx.y + threadIdx.y;
|
||||
out[x*width + y] = __shfl(val,y*width + x);
|
||||
```
|
||||
|
||||
|
||||
새 이슈에서 참조
사용자 차단