Update hipLaunchParm.cpp
changing the condition, a = b, as Sam suggested
[ROCm/hip commit: 9e78ef99e5]
Этот коммит содержится в:
коммит произвёл
GitHub
родитель
b6e18b5af8
Коммит
76546f91dd
@@ -30,7 +30,8 @@ THE SOFTWARE.
|
||||
|
||||
static const int BLOCK_DIM_SIZE = 1024;
|
||||
|
||||
// This test is to verify Struct with variables to check the hipLaunchKernel() support, read from device.
|
||||
// This test is to verify Struct with variables to check the hipLaunchKernel()
|
||||
// support, read from device.
|
||||
typedef struct hipLaunchKernelStruct1 {
|
||||
int li; // local int
|
||||
float lf; // local float
|
||||
@@ -67,95 +68,137 @@ typedef struct hipLaunchKernelStruct5 {
|
||||
} hipLaunchKernelStruct_t5;
|
||||
|
||||
|
||||
// This test is to verify struct with aligned(8), right now it's broken on hcc & hip-clang
|
||||
// This test is to verify struct with aligned(8), right now it's broken
|
||||
// on hcc & hip-clang
|
||||
typedef struct hipLaunchKernelStruct6 {
|
||||
char c1;
|
||||
short int si;
|
||||
} __attribute__ ((aligned(8))) hipLaunchKernelStruct_t6;
|
||||
} __attribute__((aligned(8))) hipLaunchKernelStruct_t6;
|
||||
|
||||
// This test is to verify struct with aligned(16), right now it's broken on hcc & hip-clang
|
||||
// This test is to verify struct with aligned(16), right now it's broken
|
||||
// on hcc & hip-clang
|
||||
typedef struct hipLaunchKernelStruct7 {
|
||||
char c1;
|
||||
short int si;
|
||||
} __attribute__ ((aligned(16))) hipLaunchKernelStruct_t7;
|
||||
} __attribute__((aligned(16))) hipLaunchKernelStruct_t7;
|
||||
|
||||
// This test is to verify struct with packed & aligned, size should be 4Bytes, , right now it's broken on hcc & hip-clang
|
||||
// This test is to verify struct with packed & aligned, size should be 4Bytes,
|
||||
// right now it's broken on hcc & hip-clang
|
||||
typedef struct hipLaunchKernelStruct8 {
|
||||
char c1;
|
||||
short int si;
|
||||
bool b;
|
||||
} __attribute__ ((packed, aligned(4))) hipLaunchKernelStruct_t8;
|
||||
}__attribute__((packed, aligned(4))) hipLaunchKernelStruct_t8;
|
||||
|
||||
// Passing struct to a hipLaunchKernel(), read and write into the same struct
|
||||
__global__ void hipLaunchKernelStructFunc1(hipLaunchParm lp, hipLaunchKernelStruct_t1 hipLaunchKernelStruct_, bool* result_d1) {
|
||||
// Passing struct to a hipLaunchKernel(),
|
||||
// read and write into the same struct
|
||||
__global__ void hipLaunchKernelStructFunc1(hipLaunchParm lp,
|
||||
hipLaunchKernelStruct_t1 hipLaunchKernelStruct_,
|
||||
bool* result_d1) {
|
||||
int x = blockIdx.x * blockDim.x + threadIdx.x;
|
||||
|
||||
// set the result to true if the condition met
|
||||
result_d1[x] = ((hipLaunchKernelStruct_.li == 1) && (hipLaunchKernelStruct_.lf == 1.0) && (hipLaunchKernelStruct_.result == false)) ? true : false;
|
||||
result_d1[x] = ((hipLaunchKernelStruct_.li == 1)
|
||||
&& (hipLaunchKernelStruct_.lf == 1.0)
|
||||
&& (hipLaunchKernelStruct_.result == false));
|
||||
}
|
||||
|
||||
// Passing struct to a hipLaunchKernel(), checks padding, read and write into the same struct
|
||||
__global__ void hipLaunchKernelStructFunc2(hipLaunchParm lp, hipLaunchKernelStruct_t2 hipLaunchKernelStruct_, bool* result_d2) {
|
||||
// Passing struct to a hipLaunchKernel(), checks padding,
|
||||
// read and write into the same struct
|
||||
__global__ void hipLaunchKernelStructFunc2(hipLaunchParm lp,
|
||||
hipLaunchKernelStruct_t2 hipLaunchKernelStruct_,
|
||||
bool* result_d2) {
|
||||
int x = blockIdx.x * blockDim.x + threadIdx.x;
|
||||
|
||||
// set the result to true if the condition met
|
||||
result_d2[x] = ((hipLaunchKernelStruct_.c1 == 'a') && (hipLaunchKernelStruct_.l1 == 1.0)
|
||||
&& (hipLaunchKernelStruct_.c2 == 'b') && (hipLaunchKernelStruct_.l2 == 2.0) ) ? true : false;
|
||||
result_d2[x] = ((hipLaunchKernelStruct_.c1 == 'a')
|
||||
&& (hipLaunchKernelStruct_.l1 == 1.0)
|
||||
&& (hipLaunchKernelStruct_.c2 == 'b')
|
||||
&& (hipLaunchKernelStruct_.l2 == 2.0) );
|
||||
}
|
||||
|
||||
// Passing struct to a hipLaunchKernel(), checks padding, read and write into the same struct
|
||||
__global__ void hipLaunchKernelStructFunc3(hipLaunchParm lp, hipLaunchKernelStruct_t3 hipLaunchKernelStruct_, bool* result_d3) {
|
||||
// Passing struct to a hipLaunchKernel(), checks padding,
|
||||
// read and write into the same struct
|
||||
__global__ void hipLaunchKernelStructFunc3(hipLaunchParm lp,
|
||||
hipLaunchKernelStruct_t3 hipLaunchKernelStruct_,
|
||||
bool* result_d3) {
|
||||
int x = blockIdx.x * blockDim.x + threadIdx.x;
|
||||
|
||||
// set the result to true if the condition met
|
||||
result_d3[x] = ((hipLaunchKernelStruct_.bf1 == 1) && (hipLaunchKernelStruct_.bf2 == 1)
|
||||
&& (hipLaunchKernelStruct_.l1 == 1.0) && (hipLaunchKernelStruct_.bf3 == 1) ) ? true : false;
|
||||
result_d3[x] = ((hipLaunchKernelStruct_.bf1 == 1)
|
||||
&& (hipLaunchKernelStruct_.bf2 == 1)
|
||||
&& (hipLaunchKernelStruct_.l1 == 1.0)
|
||||
&& (hipLaunchKernelStruct_.bf3 == 1) );
|
||||
}
|
||||
|
||||
// Passing empty struct to a hipLaunchKernel(), check the size of 1Byte, set the result_d4 to true if condition met
|
||||
__global__ void hipLaunchKernelStructFunc4(hipLaunchParm lp, hipLaunchKernelStruct_t4 hipLaunchKernelStruct_, bool* result_d4) {
|
||||
// Passing empty struct to a hipLaunchKernel(), check the size of 1Byte,
|
||||
// set the result_d4 to true if condition met
|
||||
__global__ void hipLaunchKernelStructFunc4(hipLaunchParm lp,
|
||||
hipLaunchKernelStruct_t4 hipLaunchKernelStruct_,
|
||||
bool* result_d4) {
|
||||
int x = blockIdx.x * blockDim.x + threadIdx.x;
|
||||
|
||||
// set the result to true if the condition met
|
||||
result_d4[x] = (sizeof(hipLaunchKernelStruct_) == 1) ? true : false;
|
||||
result_d4[x] = (sizeof(hipLaunchKernelStruct_) == 1);
|
||||
}
|
||||
|
||||
// Passing struct with pointer object to a hipLaunchKernel()
|
||||
__global__ void hipLaunchKernelStructFunc5(hipLaunchParm lp, hipLaunchKernelStruct_t5 hipLaunchKernelStruct_, bool* result_d5) {
|
||||
__global__ void hipLaunchKernelStructFunc5(hipLaunchParm lp,
|
||||
hipLaunchKernelStruct_t5 hipLaunchKernelStruct_,
|
||||
bool* result_d5) {
|
||||
int x = blockIdx.x * blockDim.x + threadIdx.x;
|
||||
|
||||
// set the result to true if the condition met
|
||||
result_d5[x] = ((hipLaunchKernelStruct_.c1 == 'c') && (*hipLaunchKernelStruct_.cp == 'p')) ? true : false;
|
||||
result_d5[x] = ((hipLaunchKernelStruct_.c1 == 'c')
|
||||
&& (*hipLaunchKernelStruct_.cp == 'p'));
|
||||
}
|
||||
|
||||
// Passing struct which is aligned to 8Byte to a hipLaunchKernel(), set the result_d6 to true if condition met
|
||||
__global__ void hipLaunchKernelStructFunc6(hipLaunchParm lp, hipLaunchKernelStruct_t6 hipLaunchKernelStruct_, bool* result_d6) {
|
||||
// Passing struct which is aligned to 8Byte to a hipLaunchKernel(),
|
||||
// set the result_d6 to true if condition met
|
||||
__global__ void hipLaunchKernelStructFunc6(hipLaunchParm lp,
|
||||
hipLaunchKernelStruct_t6 hipLaunchKernelStruct_,
|
||||
bool* result_d6) {
|
||||
int x = blockIdx.x * blockDim.x + threadIdx.x;
|
||||
|
||||
// set the result to true if the condition met
|
||||
int *p = (int*)(&hipLaunchKernelStruct_); // get the address of the struct
|
||||
// size_t(p)%8 will be 0 if aligned to 8Byte address space
|
||||
result_d6[x] = ((hipLaunchKernelStruct_.c1 == 'c') && (hipLaunchKernelStruct_.si == 1) && ((size_t(p))%8 ==0)) ? true : false;
|
||||
// get the address of the struct
|
||||
// size_t(p)%8 will be 0 if aligned to 8Byte address space
|
||||
int *p = (int*)(&hipLaunchKernelStruct_);
|
||||
result_d6[x] = ((hipLaunchKernelStruct_.c1 == 'c')
|
||||
&& (hipLaunchKernelStruct_.si == 1)
|
||||
&& ((size_t(p))%8 ==0));
|
||||
}
|
||||
|
||||
// Passing struct which is aligned to 16Byte to a hipLaunchKernel(), set the result_d7 to true if condition met
|
||||
__global__ void hipLaunchKernelStructFunc7(hipLaunchParm lp, hipLaunchKernelStruct_t7 hipLaunchKernelStruct_, bool* result_d7) {
|
||||
// Passing struct which is aligned to 16Byte to a hipLaunchKernel(),
|
||||
// set the result_d7 to true if condition met
|
||||
__global__ void hipLaunchKernelStructFunc7(hipLaunchParm lp,
|
||||
hipLaunchKernelStruct_t7 hipLaunchKernelStruct_,
|
||||
bool* result_d7) {
|
||||
int x = blockIdx.x * blockDim.x + threadIdx.x;
|
||||
|
||||
// set the result to true if the condition met
|
||||
int *p = (int*)(&hipLaunchKernelStruct_); // get the address of the struct
|
||||
// size_t(p)%16 will be 0 if aligned to 16Byte address space
|
||||
result_d7[x] = ((hipLaunchKernelStruct_.c1 == 'c') && (hipLaunchKernelStruct_.si == 1) && ((size_t(p))%16 ==0) ) ? true : false;
|
||||
// get the address of the struct
|
||||
// size_t(p)%16 will be 0 if aligned to 16Byte address space
|
||||
int *p = (int*)(&hipLaunchKernelStruct_);
|
||||
result_d7[x] = ((hipLaunchKernelStruct_.c1 == 'c')
|
||||
&& (hipLaunchKernelStruct_.si == 1)
|
||||
&& ((size_t(p))%16 ==0) );
|
||||
}
|
||||
|
||||
// Passing struct which is packed & aligned to 4Byte to a hipLaunchKernel(), set the result_d8 to true if condition met
|
||||
__global__ void hipLaunchKernelStructFunc8(hipLaunchParm lp, hipLaunchKernelStruct_t8 hipLaunchKernelStruct_, bool* result_d8) {
|
||||
// Passing struct which is packed & aligned to 4Byte to a hipLaunchKernel(),
|
||||
// set the result_d8 to true if condition met
|
||||
__global__ void hipLaunchKernelStructFunc8(hipLaunchParm lp,
|
||||
hipLaunchKernelStruct_t8 hipLaunchKernelStruct_,
|
||||
bool* result_d8) {
|
||||
int x = blockIdx.x * blockDim.x + threadIdx.x;
|
||||
|
||||
// set the result to true if the condition met
|
||||
int *p = (int*)(&hipLaunchKernelStruct_); // get the address of the xth element, struct[x],
|
||||
// size_t(p)%4 will be 0 if aligned to 4Byte address space
|
||||
result_d8[x] = ((hipLaunchKernelStruct_.c1 == 'c') && (hipLaunchKernelStruct_.si == 1) && ((size_t(p))%4 ==0) ) ? true : false;
|
||||
// get the address of the xth element, struct[x],
|
||||
// size_t(p)%4 will be 0 if aligned to 4Byte address space
|
||||
int *p = (int*)(&hipLaunchKernelStruct_);
|
||||
result_d8[x] = ((hipLaunchKernelStruct_.c1 == 'c')
|
||||
&& (hipLaunchKernelStruct_.si == 1)
|
||||
&& ((size_t(p))%4 ==0) );
|
||||
}
|
||||
|
||||
__global__ void vAdd(hipLaunchParm lp, float* a) {}
|
||||
@@ -165,29 +208,29 @@ __global__ void vAdd(hipLaunchParm lp, float* a) {}
|
||||
#define WRAP(...) __VA_ARGS__
|
||||
|
||||
#include <sys/time.h>
|
||||
#define GPU_PRINT_TIME(cmd, elapsed, quiet) \
|
||||
do { \
|
||||
struct timeval start, stop; \
|
||||
float elapsed; \
|
||||
gettimeofday(&start, NULL); \
|
||||
hipDeviceSynchronize(); \
|
||||
cmd; \
|
||||
hipDeviceSynchronize(); \
|
||||
gettimeofday(&stop, NULL); \
|
||||
#define GPU_PRINT_TIME(cmd, elapsed, quiet) \
|
||||
do { \
|
||||
struct timeval start, stop; \
|
||||
float elapsed; \
|
||||
gettimeofday(&start, NULL); \
|
||||
hipDeviceSynchronize(); \
|
||||
cmd; \
|
||||
hipDeviceSynchronize(); \
|
||||
gettimeofday(&stop, NULL); \
|
||||
} while (0);
|
||||
|
||||
|
||||
#define MY_LAUNCH(command, doTrace, msg) \
|
||||
{ \
|
||||
if (doTrace) printf("TRACE: %s %s\n", msg, #command); \
|
||||
command; \
|
||||
#define MY_LAUNCH(command, doTrace, msg) \
|
||||
{ \
|
||||
if (doTrace) printf("TRACE: %s %s\n", msg, #command); \
|
||||
command; \
|
||||
}
|
||||
|
||||
|
||||
#define MY_LAUNCH_WITH_PAREN(command, doTrace, msg) \
|
||||
{ \
|
||||
if (doTrace) printf("TRACE: %s %s\n", msg, #command); \
|
||||
(command); \
|
||||
#define MY_LAUNCH_WITH_PAREN(command, doTrace, msg) \
|
||||
{ \
|
||||
if (doTrace) printf("TRACE: %s %s\n", msg, #command); \
|
||||
(command); \
|
||||
}
|
||||
|
||||
|
||||
@@ -202,7 +245,8 @@ int main() {
|
||||
hipMalloc((void**)&result_d1, BLOCK_DIM_SIZE*sizeof(bool));
|
||||
hipHostMalloc((void**)&result_h1, BLOCK_DIM_SIZE*sizeof(bool));
|
||||
for (int k = 0; k < BLOCK_DIM_SIZE; ++k) {
|
||||
result_d1[k] = false; // initialize to false, will be set to true if the struct is accessible from device.
|
||||
result_d1[k] = false; // initialize to false, will be set to true
|
||||
// if the struct is accessible from device.
|
||||
}
|
||||
hipLaunchKernelStruct_h1.li = 1;
|
||||
hipLaunchKernelStruct_h1.lf = 1.0;
|
||||
@@ -214,7 +258,8 @@ int main() {
|
||||
hipMalloc((void**)&result_d2, BLOCK_DIM_SIZE*sizeof(bool));
|
||||
hipHostMalloc((void**)&result_h2, BLOCK_DIM_SIZE*sizeof(bool));
|
||||
for (int k = 0; k < BLOCK_DIM_SIZE; ++k) {
|
||||
result_d2[k] = false; // initialize to false, will be set to true if the struct is accessible from device.
|
||||
result_d2[k] = false; // initialize to false, will be set to
|
||||
// true if the struct is accessible from device.
|
||||
}
|
||||
hipLaunchKernelStruct_h2.c1 = 'a';
|
||||
hipLaunchKernelStruct_h2.l1 = 1.0;
|
||||
@@ -228,13 +273,15 @@ int main() {
|
||||
hipMalloc((void**)&result_d3, BLOCK_DIM_SIZE*sizeof(bool));
|
||||
hipHostMalloc((void**)&result_h3, BLOCK_DIM_SIZE*sizeof(bool));
|
||||
for (int k = 0; k < BLOCK_DIM_SIZE; ++k) {
|
||||
result_d2[k] = false; // initialize to false, will be set to true if the struct is accessible from device.
|
||||
result_d2[k] = false; // initialize to false, will be set to
|
||||
// true if the struct is accessible from device.
|
||||
}
|
||||
hipLaunchKernelStruct_h3.bf1 = 1;
|
||||
hipLaunchKernelStruct_h3.bf2 = 1;
|
||||
hipLaunchKernelStruct_h3.l1 = 1.0;
|
||||
hipLaunchKernelStruct_h3.bf3 = 1;
|
||||
hipLaunchKernelStruct_h3.result = false; // This will be set to true if the the condition is satisfied, from device side
|
||||
hipLaunchKernelStruct_h3.result = false; // This will be set to true if
|
||||
// the the condition is satisfied, from device side
|
||||
|
||||
// empty struct
|
||||
hipLaunchKernelStruct_t4 hipLaunchKernelStruct_h4;
|
||||
@@ -242,21 +289,25 @@ int main() {
|
||||
hipMalloc((void**)&result_d4, BLOCK_DIM_SIZE*sizeof(bool));
|
||||
hipHostMalloc((void**)&result_h4, BLOCK_DIM_SIZE*sizeof(bool));
|
||||
for (int k = 0; k < BLOCK_DIM_SIZE; ++k) {
|
||||
result_d4[k] = false; // initialize to false, will be set to true if the struct size is 1Byte, from device size
|
||||
result_d4[k] = false; // initialize to false, will be set to
|
||||
// true if the struct size is 1Byte, from device size
|
||||
}
|
||||
|
||||
// Passing struct with pointer object to a hipLaunchKernel()
|
||||
hipLaunchKernelStruct_t5 hipLaunchKernelStruct_h5;
|
||||
char* cp_d5; // This is passed as pointer to struct member, struct.cp = &cp_d5
|
||||
// This is passed as pointer to struct member, struct.cp = &cp_d5
|
||||
char* cp_d5;
|
||||
bool *result_d5, *result_h5;
|
||||
hipMalloc((void**)&result_d5, BLOCK_DIM_SIZE*sizeof(bool));
|
||||
hipMalloc((void**)&cp_d5, sizeof(char)); // allocating memory for char pointer on device
|
||||
// allocating memory for char pointer on device
|
||||
hipMalloc((void**)&cp_d5, sizeof(char));
|
||||
hipHostMalloc((void**)&result_h5, BLOCK_DIM_SIZE*sizeof(bool));
|
||||
*cp_d5 = 'p'; // initializing memory to 'p'
|
||||
hipLaunchKernelStruct_h5.c1 = 'c';
|
||||
hipLaunchKernelStruct_h5.cp = cp_d5;
|
||||
for (int k = 0; k < BLOCK_DIM_SIZE; ++k) {
|
||||
result_d5[k] = false; // initialize to false, will be set to true if the struct size is 1Byte, from device size
|
||||
result_d5[k] = false; // initialize to false, will be set to
|
||||
// true if the struct size is 1Byte, from device size
|
||||
}
|
||||
|
||||
// Passing struct with aligned(8)
|
||||
@@ -267,7 +318,8 @@ int main() {
|
||||
hipLaunchKernelStruct_h6.c1 = 'c';
|
||||
hipLaunchKernelStruct_h6.si = 1;
|
||||
for (int k = 0; k < BLOCK_DIM_SIZE; ++k) {
|
||||
result_d6[k] = false; // initialize to false, will be set to true if the struct size is 1Byte, from device size
|
||||
result_d6[k] = false; // initialize to false, will be set to
|
||||
// true if the struct size is 1Byte, from device size
|
||||
}
|
||||
|
||||
// Passing struct with aligned(16)
|
||||
@@ -278,7 +330,8 @@ int main() {
|
||||
hipLaunchKernelStruct_h7.c1 = 'c';
|
||||
hipLaunchKernelStruct_h7.si = 1;
|
||||
for (int k = 0; k < BLOCK_DIM_SIZE; ++k) {
|
||||
result_d7[k] = false; // initialize to false, will be set to true if the struct size is 1Byte, from device size
|
||||
result_d7[k] = false; // initialize to false, will be set to
|
||||
// true if the struct size is 1Byte, from device size
|
||||
}
|
||||
// Passing struct with packed aligned to 6Bytes
|
||||
hipLaunchKernelStruct_t8 hipLaunchKernelStruct_h8;
|
||||
@@ -288,7 +341,8 @@ int main() {
|
||||
hipLaunchKernelStruct_h8.c1 = 'c';
|
||||
hipLaunchKernelStruct_h8.si = 1;
|
||||
for (int k = 0; k < BLOCK_DIM_SIZE; ++k) {
|
||||
result_d8[k] = false; // initialize to false, will be set to true if the struct size is 1Byte, from device size
|
||||
result_d8[k] = false; // initialize to false, will be set to
|
||||
// true if the struct size is 1Byte, from device size
|
||||
}
|
||||
|
||||
// Test the different hipLaunchParm options:
|
||||
@@ -296,68 +350,89 @@ int main() {
|
||||
hipLaunchKernel(vAdd, 1024, dim3(1), 0, 0, Ad);
|
||||
hipLaunchKernel(vAdd, dim3(1024), 1, 0, 0, Ad);
|
||||
hipLaunchKernel(vAdd, dim3(1024), dim3(1), 0, 0, Ad);
|
||||
hipLaunchKernel(hipLaunchKernelStructFunc1, dim3(BLOCK_DIM_SIZE), dim3(1), 0, 0, hipLaunchKernelStruct_h1, result_d1);
|
||||
hipLaunchKernel(hipLaunchKernelStructFunc2, dim3(BLOCK_DIM_SIZE), dim3(1), 0, 0, hipLaunchKernelStruct_h2, result_d2);
|
||||
hipLaunchKernel(hipLaunchKernelStructFunc3, dim3(BLOCK_DIM_SIZE), dim3(1), 0, 0, hipLaunchKernelStruct_h3, result_d3);
|
||||
hipLaunchKernel(hipLaunchKernelStructFunc4, dim3(BLOCK_DIM_SIZE), dim3(1), 0, 0, hipLaunchKernelStruct_h4, result_d4);
|
||||
hipLaunchKernel(hipLaunchKernelStructFunc5, dim3(BLOCK_DIM_SIZE), dim3(1), 0, 0, hipLaunchKernelStruct_h5, result_d5);
|
||||
hipLaunchKernel(hipLaunchKernelStructFunc6, dim3(BLOCK_DIM_SIZE), dim3(1), 0, 0, hipLaunchKernelStruct_h6, result_d6);
|
||||
hipLaunchKernel(hipLaunchKernelStructFunc7, dim3(BLOCK_DIM_SIZE), dim3(1), 0, 0, hipLaunchKernelStruct_h7, result_d7);
|
||||
hipLaunchKernel(hipLaunchKernelStructFunc8, dim3(BLOCK_DIM_SIZE), dim3(1), 0, 0, hipLaunchKernelStruct_h8, result_d8);
|
||||
hipLaunchKernel(hipLaunchKernelStructFunc1, dim3(BLOCK_DIM_SIZE),
|
||||
dim3(1), 0, 0, hipLaunchKernelStruct_h1, result_d1);
|
||||
hipLaunchKernel(hipLaunchKernelStructFunc2, dim3(BLOCK_DIM_SIZE),
|
||||
dim3(1), 0, 0, hipLaunchKernelStruct_h2, result_d2);
|
||||
hipLaunchKernel(hipLaunchKernelStructFunc3, dim3(BLOCK_DIM_SIZE),
|
||||
dim3(1), 0, 0, hipLaunchKernelStruct_h3, result_d3);
|
||||
hipLaunchKernel(hipLaunchKernelStructFunc4, dim3(BLOCK_DIM_SIZE),
|
||||
dim3(1), 0, 0, hipLaunchKernelStruct_h4, result_d4);
|
||||
hipLaunchKernel(hipLaunchKernelStructFunc5, dim3(BLOCK_DIM_SIZE),
|
||||
dim3(1), 0, 0, hipLaunchKernelStruct_h5, result_d5);
|
||||
hipLaunchKernel(hipLaunchKernelStructFunc6, dim3(BLOCK_DIM_SIZE),
|
||||
dim3(1), 0, 0, hipLaunchKernelStruct_h6, result_d6);
|
||||
hipLaunchKernel(hipLaunchKernelStructFunc7, dim3(BLOCK_DIM_SIZE),
|
||||
dim3(1), 0, 0, hipLaunchKernelStruct_h7, result_d7);
|
||||
hipLaunchKernel(hipLaunchKernelStructFunc8, dim3(BLOCK_DIM_SIZE),
|
||||
dim3(1), 0, 0, hipLaunchKernelStruct_h8, result_d8);
|
||||
|
||||
// Validation part of the struct, hipLaunchKernelStructFunc1
|
||||
hipMemcpy(result_h1, result_d1, BLOCK_DIM_SIZE*sizeof(bool), hipMemcpyDeviceToHost);
|
||||
hipMemcpy(result_h1, result_d1, BLOCK_DIM_SIZE*sizeof(bool),
|
||||
hipMemcpyDeviceToHost);
|
||||
for (int k = 0; k < BLOCK_DIM_SIZE; ++k)
|
||||
HIPASSERT(result_h1[k] == true);
|
||||
|
||||
// Validation part of the struct, hipLaunchKernelStructFunc2
|
||||
hipMemcpy(result_h2, result_d2, BLOCK_DIM_SIZE*sizeof(bool), hipMemcpyDeviceToHost);
|
||||
hipMemcpy(result_h2, result_d2, BLOCK_DIM_SIZE*sizeof(bool),
|
||||
hipMemcpyDeviceToHost);
|
||||
for (int k = 0; k < BLOCK_DIM_SIZE; ++k)
|
||||
HIPASSERT(result_h2[k] == true);
|
||||
|
||||
// Validation part of the struct, hipLaunchKernelStructFunc3
|
||||
hipMemcpy(result_h3, result_d3, BLOCK_DIM_SIZE*sizeof(bool), hipMemcpyDeviceToHost);
|
||||
hipMemcpy(result_h3, result_d3, BLOCK_DIM_SIZE*sizeof(bool),
|
||||
hipMemcpyDeviceToHost);
|
||||
for (int k = 0; k < BLOCK_DIM_SIZE; ++k)
|
||||
HIPASSERT(result_h3[k] == true);
|
||||
|
||||
// Validation part of the struct, hipLaunchKernelStructFunc4
|
||||
hipMemcpy(result_h4, result_d4, BLOCK_DIM_SIZE*sizeof(bool), hipMemcpyDeviceToHost);
|
||||
hipMemcpy(result_h4, result_d4, BLOCK_DIM_SIZE*sizeof(bool),
|
||||
hipMemcpyDeviceToHost);
|
||||
for (int k = 0; k < BLOCK_DIM_SIZE; ++k)
|
||||
HIPASSERT(result_h4[k] == true);
|
||||
|
||||
// Validation part of the struct, hipLaunchKernelStructFunc5
|
||||
hipMemcpy(result_h5, result_d5, BLOCK_DIM_SIZE*sizeof(bool), hipMemcpyDeviceToHost);
|
||||
hipMemcpy(result_h5, result_d5, BLOCK_DIM_SIZE*sizeof(bool),
|
||||
hipMemcpyDeviceToHost);
|
||||
for (int k = 0; k < BLOCK_DIM_SIZE; ++k)
|
||||
HIPASSERT(result_h5[k] == true);
|
||||
|
||||
#if DISABLE_TEST // alignment is broken hence disabled the validation part
|
||||
// alignment is broken hence disabled the validation part
|
||||
#if DISABLE_TEST
|
||||
// Validation part of the struct, hipLaunchKernelStructFunc6
|
||||
hipMemcpy(result_h6, result_d6, BLOCK_DIM_SIZE*sizeof(bool), hipMemcpyDeviceToHost);
|
||||
hipMemcpy(result_h6, result_d6, BLOCK_DIM_SIZE*sizeof(bool),
|
||||
hipMemcpyDeviceToHost);
|
||||
for (int k = 0; k < BLOCK_DIM_SIZE; ++k)
|
||||
HIPASSERT(result_h6[k] == true);
|
||||
|
||||
// Validation part of the struct, hipLaunchKernelStructFunc7
|
||||
hipMemcpy(result_h7, result_d7, BLOCK_DIM_SIZE*sizeof(bool), hipMemcpyDeviceToHost);
|
||||
hipMemcpy(result_h7, result_d7, BLOCK_DIM_SIZE*sizeof(bool),
|
||||
hipMemcpyDeviceToHost);
|
||||
for (int k = 0; k < BLOCK_DIM_SIZE; ++k)
|
||||
HIPASSERT(result_h7[k] == true);
|
||||
|
||||
// Validation part of the struct, hipLaunchKernelStructFunc7
|
||||
hipMemcpy(result_h8, result_d8, BLOCK_DIM_SIZE*sizeof(bool), hipMemcpyDeviceToHost);
|
||||
hipMemcpy(result_h8, result_d8, BLOCK_DIM_SIZE*sizeof(bool),
|
||||
hipMemcpyDeviceToHost);
|
||||
for (int k = 0; k < BLOCK_DIM_SIZE; ++k)
|
||||
HIPASSERT(result_h8[k] == true);
|
||||
#endif
|
||||
|
||||
// Test case with hipLaunchKernel inside another macro:
|
||||
float e0;
|
||||
GPU_PRINT_TIME(hipLaunchKernel(vAdd, dim3(1024), dim3(1), 0, 0, Ad), e0, j);
|
||||
GPU_PRINT_TIME(WRAP(hipLaunchKernel(vAdd, dim3(1024), dim3(1), 0, 0, Ad)), e0, j);
|
||||
GPU_PRINT_TIME(hipLaunchKernel(vAdd, dim3(1024),
|
||||
dim3(1), 0, 0, Ad), e0, j);
|
||||
GPU_PRINT_TIME(WRAP(hipLaunchKernel(vAdd, dim3(1024),
|
||||
dim3(1), 0, 0, Ad)), e0, j);
|
||||
|
||||
#ifdef EXTRA_PARENS_1
|
||||
// Don't wrap hipLaunchKernel in extra set of parens:
|
||||
GPU_PRINT_TIME((hipLaunchKernel(vAdd, dim3(1024), dim3(1), 0, 0, Ad)), e0, j);
|
||||
GPU_PRINT_TIME((hipLaunchKernel(vAdd, dim3(1024),
|
||||
dim3(1), 0, 0, Ad)), e0, j);
|
||||
#endif
|
||||
|
||||
MY_LAUNCH(hipLaunchKernel(vAdd, dim3(1024), dim3(1), 0, 0, Ad), true, "firstCall");
|
||||
MY_LAUNCH(hipLaunchKernel(vAdd, dim3(1024), dim3(1),
|
||||
0, 0, Ad), true, "firstCall");
|
||||
|
||||
float* A;
|
||||
float e1;
|
||||
@@ -365,7 +440,8 @@ int main() {
|
||||
|
||||
#ifdef EXTRA_PARENS_2
|
||||
// MY_LAUNCH_WITH_PAREN wraps cmd in () which can cause issues.
|
||||
MY_LAUNCH_WITH_PAREN(hipLaunchKernel(vAdd, dim3(1024), dim3(1), 0, 0, Ad), true, "firstCall");
|
||||
MY_LAUNCH_WITH_PAREN(hipLaunchKernel(vAdd, dim3(1024),
|
||||
dim3(1), 0, 0, Ad), true, "firstCall");
|
||||
#endif
|
||||
|
||||
hipFree((void **)&result_h1);
|
||||
|
||||
Ссылка в новой задаче
Block a user