SWDEV-230423 - [dtest] Adding Stress tests.
http://ontrack-internal.amd.com/browse/SWDEV-230423
1. Moving stress folder from hip/test/src to hip/test.
2. Adding Stream stress tests.
These stress tests create multiple streams and launches kernel on them
in multiple combinations for 100000 iterations. These tests will test
the stability of streams created using hipStreamCreate, hipStreamCreateWithPriority
and hipStreamCreateWithFlags.
3. Adding printf stress tests using simple kernels.
4. Adding printf stress tests using a complex kernel.
Change-Id: Idcd26707fb9504ab8dbe6cebcbb32ade61bf1483
[ROCm/hip-tests commit: f699e87bd2]
Šī revīzija ir iekļauta:
revīziju iesūtīja
Sumanth Tumbalam Gooty
vecāks
d802a4c601
revīzija
d90a8fd966
@@ -1,7 +1,13 @@
|
||||
add_custom_target(build_stress_test)
|
||||
add_executable(StressTest EXCLUDE_FROM_ALL ../hipTestMain/main.cc)
|
||||
add_executable(StressTest EXCLUDE_FROM_ALL ../hipTestMain/main.cc ../hipTestMain/hip_test_context.cc)
|
||||
set_property(TARGET StressTest PROPERTY CXX_STANDARD 17)
|
||||
target_link_libraries(StressTest PRIVATE stdc++fs)
|
||||
add_dependencies(build_stress_test StressTest)
|
||||
add_custom_target(stress_test COMMAND StressTest)
|
||||
#add_dependencies(stress_test build_stress_test)
|
||||
add_subdirectory(memory)
|
||||
target_link_libraries(StressTest PRIVATE memory ht_context)
|
||||
if(HIP_PLATFORM MATCHES "amd")
|
||||
add_subdirectory(printf)
|
||||
add_subdirectory(stream)
|
||||
target_link_libraries(StressTest PRIVATE printf stream)
|
||||
endif()
|
||||
target_link_libraries(StressTest PRIVATE memory)
|
||||
|
||||
@@ -0,0 +1,11 @@
|
||||
# Common Tests - Test independent of all platforms
|
||||
set(TEST_SRC
|
||||
Stress_printf_ComplexKernels.cc
|
||||
Stress_printf_SimpleKernels.cc
|
||||
)
|
||||
|
||||
# Create shared lib of all tests
|
||||
add_library(printf SHARED EXCLUDE_FROM_ALL ${TEST_SRC})
|
||||
|
||||
# Add dependency on build_tests to build it on this custom target
|
||||
add_dependencies(build_stress_test printf)
|
||||
@@ -0,0 +1,511 @@
|
||||
/*
|
||||
Copyright (c) 2021 - present Advanced Micro Devices, Inc. All rights reserved.
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to deal
|
||||
in the Software without restriction, including without limitation the rights
|
||||
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
copies of the Software, and to permit persons to whom the Software is
|
||||
furnished to do so, subject to the following conditions:
|
||||
|
||||
The above copyright notice and this permission notice shall be included in
|
||||
all copies or substantial portions of the Software.
|
||||
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
THE SOFTWARE.
|
||||
*/
|
||||
|
||||
#include <hip/hip_runtime.h>
|
||||
#ifdef __linux__
|
||||
#include "printf_common.h"
|
||||
#endif
|
||||
#include <hip_test_common.hh>
|
||||
|
||||
#define MAX_BLOCK_SIZE 523
|
||||
#define MAX_GRID_SIZE 503
|
||||
#define CHUNK_SIZE 1024
|
||||
#define NUM_STREAM 4
|
||||
#define CONST_WEIGHTING_FACT1 7
|
||||
#define CONST_WEIGHTING_FACT2 5
|
||||
|
||||
namespace hipPrintfStressTest {
|
||||
struct printInfo {
|
||||
uint32_t printSizeinBytes, lineCount;
|
||||
};
|
||||
|
||||
__device__ __host__ struct printInfo startPrint(uint32_t tid,
|
||||
uint32_t iterCount, uint32_t *a, uint32_t *b) {
|
||||
uint32_t printSize = 0;
|
||||
uint32_t lineCount = 0;
|
||||
// The 2nd modulus operand is arbitrarily chosen as 7 below to
|
||||
// diversify the printf output as much as possible while also being
|
||||
// a prime number. This number is fixed to 7 and should not be changed.
|
||||
uint32_t mod = tid % 7;
|
||||
// Perform some calculations and print the values.
|
||||
uint32_t uiresult;
|
||||
int32_t iresult;
|
||||
float fresult;
|
||||
for (uint32_t count = 0; count < iterCount; count++) {
|
||||
if (0 == mod) {
|
||||
// Perform Vector Multiplication a(i)*b(i)
|
||||
// Print both tid and result
|
||||
uiresult = a[tid]*b[tid];
|
||||
printSize +=
|
||||
printf("tid %u: Value of result=%u or %x\n",
|
||||
tid, uiresult, uiresult);
|
||||
lineCount++;
|
||||
} else if (1 == mod) {
|
||||
// Perform Array Addition a(i) + b(i)
|
||||
// Print both tid and result
|
||||
uiresult = a[tid] + b[tid];
|
||||
printSize +=
|
||||
printf("tid %u: Value of result=%u or %x \n",
|
||||
tid, uiresult, uiresult);
|
||||
lineCount++;
|
||||
} else if (2 == mod) {
|
||||
// Perform Array Subtraction a(i) - b(i)
|
||||
// Print both tid and result (as both int, uint)
|
||||
iresult = a[tid] - b[tid];
|
||||
printSize +=
|
||||
printf("tid %u: Value of result=%d or %x\n",
|
||||
tid, iresult, iresult);
|
||||
lineCount++;
|
||||
} else if (3 == mod) {
|
||||
// Perform Sum of Squares a(i)*a(i) + b(i)*b(i)
|
||||
// Print both tid and result
|
||||
uiresult = a[tid]*a[tid] + b[tid]*b[tid];
|
||||
printSize +=
|
||||
printf("tid %u: Value of result=%u or %x\n",
|
||||
tid, uiresult, uiresult);
|
||||
lineCount++;
|
||||
} else if (4 == mod) {
|
||||
// Perform (a(i)*a(i) + b(i)*b(i))/a(i)*b(i)
|
||||
// Print both tid and result (in float upto 2 decimal precision)
|
||||
fresult = (a[tid]*a[tid] + b[tid]*b[tid])/(a[tid]*b[tid]);
|
||||
printSize +=
|
||||
printf("tid %u: Value of result[%d] = %.2f or %.2e\n",
|
||||
tid, tid, fresult, fresult);
|
||||
lineCount++;
|
||||
} else if (5 == mod) {
|
||||
// Perform (a(i)*a(i) - b(i)*b(i))/a(i)*b(i)
|
||||
// Print both tid and result (in float upto 4 decimal precision)
|
||||
fresult = (a[tid]*a[tid] - b[tid]*b[tid])/(a[tid]*b[tid]);
|
||||
printSize +=
|
||||
printf("tid %u: Value of result[%d] = %.4f or %.4e \n",
|
||||
tid, tid, fresult, fresult);
|
||||
lineCount++;
|
||||
} else if (6 == mod) {
|
||||
// Perform (a(i)*a(i) + b(i)*b(i))/(a(i)*a(i) - b(i)*b(i))
|
||||
// Print both tid and result (in float upto 6 decimal precision)
|
||||
fresult = (a[tid]*a[tid] + b[tid]*b[tid])/
|
||||
(a[tid]*a[tid] - b[tid]*b[tid]);
|
||||
printSize +=
|
||||
printf("tid %u: Value of result[%d] = %.6f or %.6e \n",
|
||||
tid, tid, fresult, fresult);
|
||||
lineCount++;
|
||||
}
|
||||
// Print a random character string of variable size
|
||||
// and number.
|
||||
const char* msg;
|
||||
for (int i = 0; i < 12; i++) {
|
||||
int imod = (i % 6);
|
||||
if (0 == imod) {
|
||||
msg = "jhwehde2hl";
|
||||
} else if (1 == imod) {
|
||||
msg = "jhwehde2hlmc,prmlsl4";
|
||||
} else if (2 == imod) {
|
||||
msg = "xkdojdewnd34dMMnl2o4AAdeBEjbX0";
|
||||
} else if (3 == imod) {
|
||||
msg = "mcropkaA234dmelmfhja44ndalomkfokdMDFK328";
|
||||
} else if (4 == imod) {
|
||||
msg =
|
||||
"udnekc8939MDkdnjj3knsdlmnekdlgJNls328419i905409dfm";
|
||||
} else if (5 == imod) {
|
||||
msg =
|
||||
"lfjweknm4349u34sdlk09j3mAADDSDkeffe575675fdvfLKMWMORMFREKLkl";
|
||||
}
|
||||
printSize += printf("tid %u: %s imod = %d \n", tid, msg, imod);
|
||||
lineCount++;
|
||||
}
|
||||
// Print a long string with data
|
||||
msg =
|
||||
"jheku83290dnmnd##9u9BJKHFJLKsMMMMdkejwejjj232indnfdmsnndnsdn****bsXxZz";
|
||||
float pi = 3.141592;
|
||||
uint32_t unum = 123456789;
|
||||
int32_t inum = -123456789;
|
||||
printSize +=
|
||||
printf("%s,%d,%s,%u,%s,%x,%s,%f,%s,%e\n",
|
||||
msg, inum, msg, unum, msg, unum, msg, pi, msg, pi);
|
||||
lineCount++;
|
||||
// Print different data types using different specifiers
|
||||
float fmaxvalue = std::numeric_limits<float>::max();
|
||||
float fminvalue = std::numeric_limits<float>::min();
|
||||
double dmaxvalue = std::numeric_limits<double>::max();
|
||||
double dminvalue = std::numeric_limits<double>::min();
|
||||
printSize +=
|
||||
printf("%f, %f, %e, %e \n", fmaxvalue, fminvalue, fmaxvalue, fminvalue);
|
||||
printSize +=
|
||||
printf("%f, %f, %e, %e \n", dmaxvalue, dminvalue, dmaxvalue, dminvalue);
|
||||
printSize +=
|
||||
printf("%a, %a, %A, %A \n", fmaxvalue, fminvalue, fmaxvalue, fminvalue);
|
||||
printSize +=
|
||||
printf("%a, %a, %A, %A \n", dmaxvalue, dminvalue, dmaxvalue, dminvalue);
|
||||
lineCount+=4;
|
||||
size_t size_tmaxvalue = std::numeric_limits<size_t>::max();
|
||||
size_t size_tminvalue = std::numeric_limits<size_t>::min();
|
||||
long long llmaxvalue = std::numeric_limits<long long>::max();
|
||||
long long llminvalue = std::numeric_limits<long long>::min();
|
||||
unsigned long long ullmaxvalue =
|
||||
std::numeric_limits<unsigned long long>::max();
|
||||
unsigned long long ullminvalue =
|
||||
std::numeric_limits<unsigned long long>::min();
|
||||
long lmaxvalue = std::numeric_limits<long>::max();
|
||||
long lminvalue = std::numeric_limits<long>::min();
|
||||
unsigned long ulmaxvalue = std::numeric_limits<unsigned long>::max();
|
||||
unsigned long ulminvalue = std::numeric_limits<unsigned long>::min();
|
||||
short smaxvalue = std::numeric_limits<short>::max();
|
||||
short sminvalue = std::numeric_limits<short>::min();
|
||||
unsigned short usmaxvalue = std::numeric_limits<unsigned short>::max();
|
||||
unsigned short usminvalue = std::numeric_limits<unsigned short>::min();
|
||||
char cmaxvalue = std::numeric_limits<char>::max();
|
||||
char cminvalue = std::numeric_limits<char>::min();
|
||||
unsigned char ucmaxvalue = std::numeric_limits<unsigned char>::max();
|
||||
unsigned char ucminvalue = std::numeric_limits<unsigned char>::min();
|
||||
int32_t imaxvalue = std::numeric_limits<int32_t>::max();
|
||||
int32_t iminvalue = std::numeric_limits<int32_t>::min();
|
||||
uint32_t uimaxvalue = std::numeric_limits<uint32_t>::max();
|
||||
uint32_t uiminvalue = std::numeric_limits<uint32_t>::min();
|
||||
printSize +=
|
||||
printf("%zu, %zu, %lli, %lli, %llu, %llu, %li, %li, %lu, %lu\n",
|
||||
size_tmaxvalue, size_tminvalue, llmaxvalue, llminvalue,
|
||||
ullmaxvalue, ullminvalue, lmaxvalue, lminvalue,
|
||||
ulmaxvalue, ulminvalue);
|
||||
printSize +=
|
||||
printf("%zx, %zx, %llx, %llx, %llx, %llx, %lx, %lx, %lx, %lx\n",
|
||||
size_tmaxvalue, size_tminvalue, llmaxvalue, llminvalue,
|
||||
ullmaxvalue, ullminvalue, lmaxvalue, lminvalue,
|
||||
ulmaxvalue, ulminvalue);
|
||||
printSize +=
|
||||
printf("%zX, %zX, %llX, %llX, %llX, %llX, %lX, %lX, %lX, %lX\n",
|
||||
size_tmaxvalue, size_tminvalue, llmaxvalue, llminvalue,
|
||||
ullmaxvalue, ullminvalue, lmaxvalue, lminvalue,
|
||||
ulmaxvalue, ulminvalue);
|
||||
printSize +=
|
||||
printf("%zo, %zo, %llo, %llo, %llo, %llo, %lo, %lo, %lo, %lo\n",
|
||||
size_tmaxvalue, size_tminvalue, llmaxvalue, llminvalue,
|
||||
ullmaxvalue, ullminvalue, lmaxvalue, lminvalue,
|
||||
ulmaxvalue, ulminvalue);
|
||||
printSize +=
|
||||
printf("%hd, %hd, %hu, %hu, %hhd, %hhd, %hhu, %hhu, %d, %d, %u, %u\n",
|
||||
smaxvalue, sminvalue, usmaxvalue, usminvalue,
|
||||
cmaxvalue, cminvalue, ucmaxvalue, ucminvalue,
|
||||
imaxvalue, iminvalue, uimaxvalue, uiminvalue);
|
||||
printSize +=
|
||||
printf("%hx, %hx, %hx, %hx, %hhx, %hhx, %hhx, %hhx, %x, %x, %x, %x\n",
|
||||
smaxvalue, sminvalue, usmaxvalue, usminvalue,
|
||||
cmaxvalue, cminvalue, ucmaxvalue, ucminvalue,
|
||||
imaxvalue, iminvalue, uimaxvalue, uiminvalue);
|
||||
printSize +=
|
||||
printf("%hX, %hX, %hX, %hX, %hhX, %hhX, %hhX, %hhX, %X, %X, %X, %X\n",
|
||||
smaxvalue, sminvalue, usmaxvalue, usminvalue,
|
||||
cmaxvalue, cminvalue, ucmaxvalue, ucminvalue,
|
||||
imaxvalue, iminvalue, uimaxvalue, uiminvalue);
|
||||
printSize +=
|
||||
printf("%ho, %ho, %ho, %ho, %hho, %hho, %hho, %hho, %o, %o, %o, %o\n",
|
||||
smaxvalue, sminvalue, usmaxvalue, usminvalue,
|
||||
cmaxvalue, cminvalue, ucmaxvalue, ucminvalue,
|
||||
imaxvalue, iminvalue, uimaxvalue, uiminvalue);
|
||||
printSize +=
|
||||
printf("%c, %c, %c, %c\n", cmaxvalue, cminvalue, ucmaxvalue, ucminvalue);
|
||||
lineCount+=9;
|
||||
}
|
||||
struct printInfo pInfo = {printSize, lineCount};
|
||||
return pInfo;
|
||||
}
|
||||
// This kernel is launched only in X dimension
|
||||
__global__ void kernel_complex_opX(uint32_t *a, uint32_t *b,
|
||||
uint32_t iterCount) {
|
||||
uint32_t tid = hipThreadIdx_x + hipBlockIdx_x * hipBlockDim_x;
|
||||
startPrint(tid, iterCount, a, b);
|
||||
}
|
||||
// This kernel is launched only in Y dimension
|
||||
__global__ void kernel_complex_opY(uint32_t *a, uint32_t *b,
|
||||
uint32_t iterCount) {
|
||||
uint32_t tid = hipThreadIdx_y + hipBlockIdx_y * hipBlockDim_y;
|
||||
startPrint(tid, iterCount, a, b);
|
||||
}
|
||||
// This kernel is launched only in Z dimension
|
||||
__global__ void kernel_complex_opZ(uint32_t *a, uint32_t *b,
|
||||
uint32_t iterCount) {
|
||||
uint32_t tid = hipThreadIdx_z + hipBlockIdx_z * hipBlockDim_z;
|
||||
startPrint(tid, iterCount, a, b);
|
||||
}
|
||||
#ifdef __linux__
|
||||
// Performs printf stress test on a single GPU using multiple streams.
|
||||
bool test_printf_multistream(uint32_t num_blocks,
|
||||
uint32_t threads_per_block,
|
||||
uint32_t iterCount) {
|
||||
uint32_t buffsize = num_blocks*threads_per_block;
|
||||
size_t actualFileSize = 0;
|
||||
uint32_t totalActualLinecount = 0;
|
||||
uint32_t *Ah, *Bh;
|
||||
uint32_t *Ad, *Bd;
|
||||
Ah = new uint32_t[buffsize];
|
||||
Bh = new uint32_t[buffsize];
|
||||
for (uint32_t i = 0; i < buffsize; i++) {
|
||||
Ah[i] = i + 1;
|
||||
Bh[i] = buffsize - i;
|
||||
}
|
||||
HIP_CHECK(hipMalloc(&Ad, buffsize*sizeof(uint32_t)));
|
||||
HIP_CHECK(hipMalloc(&Bd, buffsize*sizeof(uint32_t)));
|
||||
HIP_CHECK(hipMemcpy(Ad, Ah, buffsize*sizeof(uint32_t),
|
||||
hipMemcpyHostToDevice));
|
||||
HIP_CHECK(hipMemcpy(Bd, Bh, buffsize*sizeof(uint32_t),
|
||||
hipMemcpyHostToDevice));
|
||||
// DO NOT PUT ANY PRINTF WITHIN THIS BLOCK OF CODE
|
||||
{
|
||||
CaptureStream captured(stdout);
|
||||
hipStream_t stream[NUM_STREAM];
|
||||
for (int i = 0; i < NUM_STREAM; i++) {
|
||||
HIP_CHECK(hipStreamCreate(&stream[i]));
|
||||
hipLaunchKernelGGL(kernel_complex_opX, dim3(num_blocks, 1, 1),
|
||||
dim3(threads_per_block, 1, 1),
|
||||
0, stream[i], Ad, Bd, iterCount);
|
||||
hipLaunchKernelGGL(kernel_complex_opY, dim3(1, num_blocks, 1),
|
||||
dim3(1, threads_per_block, 1),
|
||||
0, stream[i], Ad, Bd, iterCount);
|
||||
hipLaunchKernelGGL(kernel_complex_opZ, dim3(1, 1, num_blocks),
|
||||
dim3(1, 1, threads_per_block),
|
||||
0, stream[i], Ad, Bd, iterCount);
|
||||
}
|
||||
HIP_CHECK(hipDeviceSynchronize());
|
||||
for (int i = 0; i < NUM_STREAM; i++) {
|
||||
HIP_CHECK(hipStreamDestroy(stream[i]));
|
||||
}
|
||||
std::ifstream CapturedData = captured.getCapturedData();
|
||||
char *buffer = new char[CHUNK_SIZE];
|
||||
while (CapturedData.good()) {
|
||||
CapturedData.getline(buffer, CHUNK_SIZE);
|
||||
totalActualLinecount++;
|
||||
}
|
||||
delete[] buffer;
|
||||
struct stat st;
|
||||
if (stat(captured.getTempFilename(), &st)) {
|
||||
printf("Temp File not found \n");
|
||||
return false;
|
||||
}
|
||||
actualFileSize = st.st_size;
|
||||
}
|
||||
struct printInfo pInfo;
|
||||
size_t estimatedPrintSize = 0;
|
||||
// DO NOT PUT ANY PRINTF WITHIN THIS BLOCK OF CODE
|
||||
uint32_t lop = 0;
|
||||
{
|
||||
CaptureStream captured(stdout);
|
||||
for (int j = 0; j < NUM_STREAM; j++) {
|
||||
for (uint32_t tid = 0; tid < (buffsize); tid++) {
|
||||
pInfo = startPrint(tid, iterCount, Ah, Bh);
|
||||
lop += pInfo.lineCount;
|
||||
estimatedPrintSize += pInfo.printSizeinBytes;
|
||||
}
|
||||
for (uint32_t tid = 0; tid < (buffsize); tid++) {
|
||||
pInfo = startPrint(tid, iterCount, Ah, Bh);
|
||||
lop += pInfo.lineCount;
|
||||
estimatedPrintSize += pInfo.printSizeinBytes;
|
||||
}
|
||||
for (uint32_t tid = 0; tid < (buffsize); tid++) {
|
||||
pInfo = startPrint(tid, iterCount, Ah, Bh);
|
||||
lop += pInfo.lineCount;
|
||||
estimatedPrintSize += pInfo.printSizeinBytes;
|
||||
}
|
||||
}
|
||||
}
|
||||
printf("estimatedPrintSize = %zu, actualFileSize = %zu\n",
|
||||
estimatedPrintSize, actualFileSize);
|
||||
printf("estimatedLinesPrinted = %u, actualLinesPrinted = %u\n",
|
||||
lop, totalActualLinecount-1);
|
||||
HIP_CHECK(hipFree(Bd));
|
||||
HIP_CHECK(hipFree(Ad));
|
||||
delete[] Bh;
|
||||
delete[] Ah;
|
||||
if ((estimatedPrintSize != actualFileSize)||
|
||||
(lop != (totalActualLinecount-1))) {
|
||||
return false;
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
bool test_printf_multigpu(int gpu,
|
||||
uint32_t num_blocks,
|
||||
uint32_t threads_per_block,
|
||||
uint32_t iterCount,
|
||||
size_t *actualFileSize,
|
||||
uint32_t *totalActualLinecount) {
|
||||
uint32_t buffsize = num_blocks*threads_per_block;
|
||||
uint32_t *Ah, *Bh;
|
||||
uint32_t *Ad, *Bd;
|
||||
HIP_CHECK(hipSetDevice(gpu));
|
||||
Ah = new uint32_t[buffsize];
|
||||
Bh = new uint32_t[buffsize];
|
||||
for (uint32_t i = 0; i < buffsize; i++) {
|
||||
Ah[i] = i + 1;
|
||||
Bh[i] = buffsize - i;
|
||||
}
|
||||
HIP_CHECK(hipMalloc(&Ad, buffsize*sizeof(uint32_t)));
|
||||
HIP_CHECK(hipMalloc(&Bd, buffsize*sizeof(uint32_t)));
|
||||
HIP_CHECK(hipMemcpy(Ad, Ah, buffsize*sizeof(uint32_t),
|
||||
hipMemcpyHostToDevice));
|
||||
HIP_CHECK(hipMemcpy(Bd, Bh, buffsize*sizeof(uint32_t),
|
||||
hipMemcpyHostToDevice));
|
||||
// DO NOT PUT ANY PRINTF WITHIN THIS BLOCK OF CODE
|
||||
{
|
||||
CaptureStream captured(stdout);
|
||||
hipLaunchKernelGGL(kernel_complex_opX, dim3(num_blocks, 1, 1),
|
||||
dim3(threads_per_block, 1, 1),
|
||||
0, 0, Ad, Bd, iterCount);
|
||||
hipLaunchKernelGGL(kernel_complex_opY, dim3(1, num_blocks, 1),
|
||||
dim3(1, threads_per_block, 1),
|
||||
0, 0, Ad, Bd, iterCount);
|
||||
hipLaunchKernelGGL(kernel_complex_opZ, dim3(1, 1, num_blocks),
|
||||
dim3(1, 1, threads_per_block),
|
||||
0, 0, Ad, Bd, iterCount);
|
||||
HIP_CHECK(hipDeviceSynchronize());
|
||||
std::ifstream CapturedData = captured.getCapturedData();
|
||||
char *buffer = new char[CHUNK_SIZE];
|
||||
while (CapturedData.good()) {
|
||||
CapturedData.getline(buffer, CHUNK_SIZE);
|
||||
*totalActualLinecount += 1;
|
||||
}
|
||||
delete[] buffer;
|
||||
struct stat st;
|
||||
if (stat(captured.getTempFilename(), &st)) {
|
||||
printf("Temp File not found \n");
|
||||
return false;
|
||||
}
|
||||
*actualFileSize += st.st_size;
|
||||
}
|
||||
HIP_CHECK(hipFree(Bd));
|
||||
HIP_CHECK(hipFree(Ad));
|
||||
delete[] Bh;
|
||||
delete[] Ah;
|
||||
*totalActualLinecount -= 1; // Removing Empty Line
|
||||
HIP_CHECK(hipSetDevice(0));
|
||||
return true;
|
||||
}
|
||||
|
||||
// Performs printf stress test on all GPUs present in the system.
|
||||
bool testPrintfMultGPU(int numOfGPUs,
|
||||
uint32_t num_blocks,
|
||||
uint32_t threads_per_block,
|
||||
uint32_t iterCount) {
|
||||
uint32_t buffsize = num_blocks*threads_per_block;
|
||||
size_t actualFileSize = 0;
|
||||
uint32_t totalActualLinecount = 0;
|
||||
for (int gpu = 0; gpu < numOfGPUs; gpu++) {
|
||||
test_printf_multigpu(gpu, num_blocks, threads_per_block,
|
||||
iterCount, &actualFileSize, &totalActualLinecount);
|
||||
}
|
||||
struct printInfo pInfo;
|
||||
size_t estimatedPrintSize = 0;
|
||||
uint32_t *Ah, *Bh;
|
||||
Ah = new uint32_t[buffsize];
|
||||
Bh = new uint32_t[buffsize];
|
||||
for (uint32_t i = 0; i < buffsize; i++) {
|
||||
Ah[i] = i + 1;
|
||||
Bh[i] = buffsize - i;
|
||||
}
|
||||
// DO NOT PUT ANY PRINTF WITHIN THIS BLOCK OF CODE
|
||||
uint32_t lop = 0;
|
||||
{
|
||||
CaptureStream captured(stdout);
|
||||
for (int gpu = 0; gpu < numOfGPUs; gpu++) {
|
||||
for (uint32_t tid = 0; tid < (buffsize); tid++) {
|
||||
pInfo = startPrint(tid, iterCount, Ah, Bh);
|
||||
lop += pInfo.lineCount;
|
||||
estimatedPrintSize += pInfo.printSizeinBytes;
|
||||
}
|
||||
for (uint32_t tid = 0; tid < (buffsize); tid++) {
|
||||
pInfo = startPrint(tid, iterCount, Ah, Bh);
|
||||
lop += pInfo.lineCount;
|
||||
estimatedPrintSize += pInfo.printSizeinBytes;
|
||||
}
|
||||
for (uint32_t tid = 0; tid < (buffsize); tid++) {
|
||||
pInfo = startPrint(tid, iterCount, Ah, Bh);
|
||||
lop += pInfo.lineCount;
|
||||
estimatedPrintSize += pInfo.printSizeinBytes;
|
||||
}
|
||||
}
|
||||
}
|
||||
delete[] Bh;
|
||||
delete[] Ah;
|
||||
printf("estimatedPrintSize = %zu, actualFileSize = %zu\n",
|
||||
estimatedPrintSize, actualFileSize);
|
||||
printf("estimatedLinesPrinted = %u, actualLinesPrinted = %u\n",
|
||||
lop, totalActualLinecount);
|
||||
if ((estimatedPrintSize != actualFileSize)||
|
||||
(lop != totalActualLinecount)) {
|
||||
return false;
|
||||
}
|
||||
return true;
|
||||
}
|
||||
#endif
|
||||
} // namespace hipPrintfStressTest
|
||||
|
||||
TEST_CASE("Stress_printf_ComplexKernelMultStream") {
|
||||
#ifdef __linux__
|
||||
printf("Test - Stress_printf_ComplexKernelMultStream start\n");
|
||||
bool TestPassed = true;
|
||||
uint threads_per_block = MAX_BLOCK_SIZE;
|
||||
// N provide the print limit
|
||||
unsigned int print_limit = 4; // = 4 GB
|
||||
uint32_t iterCount = 1;
|
||||
// num_blocks is calculated using an approximate formula to arrive at
|
||||
// the required print data quantity. CONST_WEIGHTING_FACT1 and
|
||||
// CONST_WEIGHTING_FACT2 are empirically determined.
|
||||
uint32_t num_blocks = (MAX_GRID_SIZE*print_limit)/CONST_WEIGHTING_FACT1
|
||||
- (CONST_WEIGHTING_FACT2*print_limit);
|
||||
TestPassed =
|
||||
hipPrintfStressTest::test_printf_multistream(num_blocks, threads_per_block,
|
||||
iterCount);
|
||||
REQUIRE(TestPassed);
|
||||
printf("Test - Stress_printf_ComplexKernelMultStream completed \n");
|
||||
#else
|
||||
printf("This test is skipped due to non linux environment.\n");
|
||||
#endif
|
||||
}
|
||||
|
||||
TEST_CASE("Stress_printf_ComplexKernelMultStreamMultGpu") {
|
||||
#ifdef __linux__
|
||||
printf("Test - Stress_printf_ComplexKernelMultStreamMultGpu start \n");
|
||||
bool TestPassed = true;
|
||||
uint threads_per_block = MAX_BLOCK_SIZE;
|
||||
// N provide the print limit
|
||||
unsigned int print_limit = 4; // = 4 GB
|
||||
uint32_t iterCount = 1;
|
||||
int numOfGPUs = 0;
|
||||
hipGetDeviceCount(&numOfGPUs);
|
||||
if (numOfGPUs < 2) {
|
||||
printf("Skipping test because numOfGPUs < 2\n");
|
||||
return;
|
||||
}
|
||||
// num_blocks is calculated using an approximate formula to arrive at
|
||||
// the required print data quantity. CONST_WEIGHTING_FACT1 and
|
||||
// CONST_WEIGHTING_FACT2 are empirically determined.
|
||||
uint32_t num_blocks =
|
||||
(((MAX_GRID_SIZE*print_limit)/CONST_WEIGHTING_FACT1 -
|
||||
(CONST_WEIGHTING_FACT2*print_limit))*4)/numOfGPUs;
|
||||
TestPassed =
|
||||
hipPrintfStressTest::testPrintfMultGPU(numOfGPUs, num_blocks,
|
||||
threads_per_block,
|
||||
iterCount);
|
||||
REQUIRE(TestPassed);
|
||||
printf("Test - Stress_printf_ComplexKernelMultStreamMultGpu end \n");
|
||||
#else
|
||||
printf("This test is skipped due to non linux environment.\n");
|
||||
#endif
|
||||
}
|
||||
@@ -0,0 +1,790 @@
|
||||
/*
|
||||
Copyright (c) 2020 - present Advanced Micro Devices, Inc. All rights reserved.
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to deal
|
||||
in the Software without restriction, including without limitation the rights
|
||||
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
copies of the Software, and to permit persons to whom the Software is
|
||||
furnished to do so, subject to the following conditions:
|
||||
|
||||
The above copyright notice and this permission notice shall be included in
|
||||
all copies or substantial portions of the Software.
|
||||
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
THE SOFTWARE.
|
||||
*/
|
||||
#include <hip/hip_runtime.h>
|
||||
#ifdef __linux__
|
||||
#include "printf_common.h"
|
||||
#endif
|
||||
#include <hip_test_common.hh>
|
||||
|
||||
#define BLOCK_SIZE 512
|
||||
#define GRID_SIZE 512
|
||||
#define CHUNK_SIZE 256
|
||||
#define CONST_STR "Hello World from Device.Iam printing 55 bytes of data.\n"
|
||||
#define CONST_STR1 "Hello World from Device.Iam printing from even thread.\n"
|
||||
#define CONST_STR2 "Hello World from Device.This is odd thread.\n"
|
||||
#define CONST_STR3 "Hello World from Device. The sum of all threadID = "
|
||||
|
||||
namespace hipPrintfStressTest {
|
||||
struct SizeStruct {
|
||||
unsigned int block_size;
|
||||
unsigned int grid_size;
|
||||
unsigned int iteration;
|
||||
};
|
||||
// These values are empirically determined for kernel_divergent_str3
|
||||
// Any modification to the function or CONST_STR3 will change these values
|
||||
const struct SizeStruct EmpiricalValues1[12] = {
|
||||
{512, 512, 16},
|
||||
{512, 512, 32},
|
||||
{512, 512, 48},
|
||||
{512, 512, 64},
|
||||
{512, 512, 80},
|
||||
{512, 512, 96},
|
||||
{512, 512, 110},
|
||||
{512, 512, 126},
|
||||
{512, 512, 140},
|
||||
{512, 512, 156},
|
||||
{512, 512, 172},
|
||||
{512, 512, 186}
|
||||
};
|
||||
// These values are empirically determined for kernel_dependent_calc
|
||||
// and kernel_dependent_calc_atomic.
|
||||
// Any modification to the functions will change these values.
|
||||
const struct SizeStruct EmpiricalValues2[12] = {
|
||||
{512, 512, 20},
|
||||
{512, 512, 40},
|
||||
{512, 512, 60},
|
||||
{512, 512, 80},
|
||||
{512, 512, 100},
|
||||
{512, 512, 120},
|
||||
{512, 512, 140},
|
||||
{512, 512, 160},
|
||||
{512, 512, 180},
|
||||
{512, 512, 200},
|
||||
{512, 512, 220},
|
||||
{512, 512, 240}
|
||||
};
|
||||
// Print a constant string in a kernel for 'n' iterations per thread
|
||||
// using 'b' block size and 'g' grid size such that
|
||||
// (total bytes per iteration)*n*b*g ≈ N GB where N is user input.
|
||||
__global__ void kernel_printf_conststr(uint iterCount) {
|
||||
for (uint count = 0; count < iterCount; count++) {
|
||||
printf("%s", CONST_STR);
|
||||
}
|
||||
}
|
||||
// Print 2 different constant strings (using if and else conditionals)
|
||||
// in a kernel for 'n' iterations per thread using 'b' block size and
|
||||
// 'g' grid size such that (total bytes per iteration)*n*b*g ≈ N GB,
|
||||
// where N is user input.
|
||||
__global__ void kernel_printf_two_conditionalstr(uint iterCount) {
|
||||
uint tid = hipThreadIdx_x + hipBlockIdx_x * hipBlockDim_x;
|
||||
uint mod_tid = (tid % 2);
|
||||
if (0 == mod_tid) {
|
||||
for (uint count = 0; count < iterCount; count++) {
|
||||
printf("%s", CONST_STR1);
|
||||
}
|
||||
} else {
|
||||
for (uint count = 0; count < iterCount; count++) {
|
||||
printf("%s", CONST_STR2);
|
||||
}
|
||||
}
|
||||
}
|
||||
// Print a constant string (using only if condition) in a kernel for 'n'
|
||||
// iterations per thread using 'b' block size and 'g' grid size such that
|
||||
// (total bytes per iteration)*n*b*g ≈ N GB, where N is user input.
|
||||
__global__ void kernel_printf_single_conditionalstr(uint iterCount) {
|
||||
uint tid = hipThreadIdx_x + hipBlockIdx_x * hipBlockDim_x;
|
||||
uint mod_tid = (tid % 2);
|
||||
if (0 == mod_tid) {
|
||||
for (uint count = 0; count < iterCount; count++) {
|
||||
printf("%s", CONST_STR1);
|
||||
}
|
||||
}
|
||||
}
|
||||
// Please do not nodify this function.
|
||||
// Any modification to this function will fail the test case.
|
||||
// Print variable size string using integer data in a kernel for 'n'
|
||||
// iterations per thread using 'b' block size and 'g' grid size such
|
||||
// that (total bytes per iteration)*n*b*g ≈ N GB, where N is user input.
|
||||
__global__ void kernel_printf_variablestr(uint iterCount, int *ret) {
|
||||
uint tid = hipThreadIdx_x + hipBlockIdx_x * hipBlockDim_x;
|
||||
int retlocal = 0;
|
||||
const char *const_str =
|
||||
"Hello World from Device.Iam printing (threadID,number)=";
|
||||
for (int count = 0; count < (const int)iterCount; count++) {
|
||||
retlocal += printf("%s%u,%d\n", const_str, tid, count);
|
||||
retlocal += printf("%s%u,%d\n", const_str, tid, 10*count);
|
||||
retlocal += printf("%s%u,%d\n", const_str, tid, 100*count);
|
||||
retlocal += printf("%s%u,%d\n", const_str, tid, 1000*count);
|
||||
}
|
||||
ret[tid] = retlocal;
|
||||
}
|
||||
// Please do not nodify this function.
|
||||
// Any modification to this function will fail the test case.
|
||||
// Perform dependent calculations and print the result after each
|
||||
// calculation in a kernel for 'n' iterations per thread using 'b' block
|
||||
// size and 'g' grid size such that
|
||||
// (total bytes per iteration)*n*b*g ≈ N GB, where N is user input.
|
||||
__global__ void kernel_dependent_calc(uint32_t iterCount, int *ret) {
|
||||
uint32_t tid = hipThreadIdx_x + hipBlockIdx_x * hipBlockDim_x;
|
||||
int retlocal = 0;
|
||||
const char *const_str =
|
||||
"Hello World from Device.Iam printing number=";
|
||||
for (int count = 0; count < (const int)iterCount; count++) {
|
||||
uint32_t x = tid + count;
|
||||
retlocal += printf("%s%u\n", const_str, x);
|
||||
uint32_t y = x + tid;
|
||||
retlocal += printf("%s%u\n", const_str, y);
|
||||
uint32_t z = x*y;
|
||||
retlocal += printf("%s%u\n", const_str, z);
|
||||
uint32_t a = z/(tid + 1);
|
||||
retlocal += printf("%s%u\n", const_str, a);
|
||||
}
|
||||
ret[tid] = retlocal;
|
||||
}
|
||||
// Please do not nodify this function.
|
||||
// Any modification to this function will fail the test case.
|
||||
// Perform atomic calculations and print the result after each
|
||||
// calculation in a kernel for 'n' iterations per thread using 'b' block
|
||||
// size and 'g' grid size such that
|
||||
// (total bytes per iteration)*n*b*g ≈ N GB, where N is user input.
|
||||
__global__ void kernel_dependent_calc_atomic(uint32_t iterCount,
|
||||
int *ret) {
|
||||
uint32_t tid = hipThreadIdx_x + hipBlockIdx_x * hipBlockDim_x;
|
||||
int retlocal = 0;
|
||||
const char *const_str =
|
||||
"Hello World from Device.Iam printing number=";
|
||||
for (uint32_t count = 0; count < iterCount; count++) {
|
||||
uint32_t x = tid;
|
||||
atomicAdd(&x, count);
|
||||
retlocal += printf("%s%u\n", const_str, x);
|
||||
uint32_t y = x;
|
||||
atomicAdd(&y, tid);
|
||||
retlocal += printf("%s%u\n", const_str, y);
|
||||
uint32_t z = y;
|
||||
atomicSub(&z, count);
|
||||
retlocal += printf("%s%u\n", const_str, z);
|
||||
uint32_t a = z;
|
||||
atomicAnd(&a, 0x0000ffff);
|
||||
retlocal += printf("%s%u\n", const_str, a);
|
||||
}
|
||||
ret[tid] = retlocal;
|
||||
}
|
||||
// Print variable size string using floating point data of varying
|
||||
// precision in a kernel for 'n' iterations per thread using 'b' block
|
||||
// size and 'g' grid size such that
|
||||
// (total bytes per iteration)*n*b*g ≈ N GB, where N is user input.
|
||||
__device__ __host__ int printPi(int maxPrecision) {
|
||||
int printSize = 0;
|
||||
size_t expo = 1000000000000;
|
||||
double pi = 3.1415926535;
|
||||
double piScaled = pi*expo;
|
||||
const char *const_str =
|
||||
"Hello World from Device.Iam printing decimal number=";
|
||||
for (int prec = 0; prec <= maxPrecision ; prec++) {
|
||||
printSize += printf("%s%.*f %.*e\n", const_str, prec, pi,
|
||||
prec, piScaled);
|
||||
}
|
||||
return printSize;
|
||||
}
|
||||
|
||||
__global__ void kernel_decimal_calculation(uint iterCount,
|
||||
int maxPrecision) {
|
||||
for (int count = 0; count < (const int)iterCount; count++) {
|
||||
printPi(maxPrecision);
|
||||
}
|
||||
}
|
||||
// Print the value of shared memory variable using a stream of size 'n',
|
||||
// 'b' block size and 'g' grid size such that
|
||||
// (total bytes per thread)*n*b*g ≈ N GB, where N is user input.
|
||||
__global__ void kernel_shared_mem() {
|
||||
__shared__ uint32_t sharedMem;
|
||||
sharedMem = 0;
|
||||
__syncthreads();
|
||||
atomicAdd(&sharedMem, hipThreadIdx_x);
|
||||
__syncthreads();
|
||||
printf("%s%u\n", CONST_STR3, sharedMem);
|
||||
}
|
||||
// Synchronize the prints in a block using __syncthreads. Only 1 block
|
||||
// is launched in a stream of size 'n'. The size of the block is 'b'.
|
||||
// (total bytes per thread)*n*b ≈ N GB. where N is user input.
|
||||
__global__ void kernel_synchronized_printf() {
|
||||
printf("%s%u\n", CONST_STR3, 0);
|
||||
__syncthreads();
|
||||
printf("%s%u\n", CONST_STR3, 1);
|
||||
__syncthreads();
|
||||
printf("%s%u\n", CONST_STR3, 2);
|
||||
}
|
||||
#ifdef __linux__
|
||||
// Launches kernel_printf_conststr to generate the printf log file
|
||||
// and validates the generated file size and number of printed lines
|
||||
// with the calculated file size and lines.
|
||||
bool test_printf_conststr(uint32_t num_blocks, uint32_t threads_per_block,
|
||||
uint32_t print_limit) {
|
||||
uint32_t iterCount = 0;
|
||||
uint32_t sizePrintString = (sizeof(CONST_STR)-1); // Excluding NULL character
|
||||
// Calculate the number of iterations from print_limit.
|
||||
size_t stress_limit_bytes = ((size_t)print_limit*1024*1024*1024);
|
||||
iterCount = static_cast<uint32_t>(1 +
|
||||
stress_limit_bytes/(num_blocks*threads_per_block*sizePrintString));
|
||||
// Calculate expected lines of print and file size.
|
||||
uint32_t totalExpectedLines = num_blocks*threads_per_block*iterCount;
|
||||
size_t expectedFileSize = ((size_t)totalExpectedLines*sizePrintString);
|
||||
size_t actualFileSize = 0;
|
||||
uint32_t totalActualLinecount = 0;
|
||||
// DO NOT PUT ANY PRINTF WITHIN THIS BLOCK OF CODE
|
||||
{
|
||||
CaptureStream captured(stdout);
|
||||
hipLaunchKernelGGL(kernel_printf_conststr, dim3(num_blocks, 1, 1),
|
||||
dim3(threads_per_block, 1, 1),
|
||||
0, 0, iterCount);
|
||||
HIP_CHECK(hipStreamSynchronize(0));
|
||||
std::ifstream CapturedData = captured.getCapturedData();
|
||||
char *buffer = new char[CHUNK_SIZE];
|
||||
while (CapturedData.good()) {
|
||||
CapturedData.getline(buffer, CHUNK_SIZE);
|
||||
totalActualLinecount++;
|
||||
}
|
||||
delete[] buffer;
|
||||
struct stat st;
|
||||
if (stat(captured.getTempFilename(), &st)) {
|
||||
printf("Temp File not found \n");
|
||||
return false;
|
||||
}
|
||||
actualFileSize = st.st_size;
|
||||
}
|
||||
printf("totalExpectedLines = %u \n", totalExpectedLines);
|
||||
// Excluding the trailing newline
|
||||
printf("totalActualLinecount = %u \n", totalActualLinecount-1);
|
||||
printf("expectedFileSize = %zu \n", expectedFileSize);
|
||||
printf("actualFileSize = %zu \n", actualFileSize);
|
||||
if ((totalExpectedLines != (totalActualLinecount - 1))||
|
||||
(expectedFileSize != actualFileSize)) {
|
||||
return false;
|
||||
}
|
||||
return true;
|
||||
}
|
||||
// Launches kernel_printf_two_conditionalstr to generate the printf log file
|
||||
// and validates the generated file size and number of printed lines
|
||||
// with the calculated file size and lines.
|
||||
bool test_printf_two_conditionalstr(uint32_t num_blocks,
|
||||
uint32_t threads_per_block,
|
||||
uint32_t print_limit) {
|
||||
uint32_t iterCount = 0;
|
||||
uint32_t sizePrintStringEven, sizePrintStringOdd, avgsizePrintString;
|
||||
sizePrintStringEven = (sizeof(CONST_STR1)-1); // Excluding NULL character
|
||||
sizePrintStringOdd = (sizeof(CONST_STR2)-1); // Excluding NULL character
|
||||
avgsizePrintString = (sizePrintStringEven + sizePrintStringOdd)/2;
|
||||
// Calculate the number of iterations from print_limit
|
||||
size_t stress_limit_bytes = ((size_t)print_limit*1024*1024*1024);
|
||||
iterCount = static_cast<uint32_t>(1 +
|
||||
stress_limit_bytes/(num_blocks*threads_per_block*avgsizePrintString));
|
||||
// Calculate expected lines of print and file size.
|
||||
uint32_t totalExpectedEvenLines, totalExpectedOddLines;
|
||||
// 0, 1, 2, 3
|
||||
// 0, 1, 2
|
||||
totalExpectedEvenLines = ((num_blocks*threads_per_block)%2 == 0)?
|
||||
(num_blocks*threads_per_block*iterCount)/2 :
|
||||
(((num_blocks*threads_per_block)/2)+ 1)*iterCount;
|
||||
totalExpectedOddLines = (num_blocks*threads_per_block*iterCount
|
||||
- totalExpectedEvenLines);
|
||||
size_t expectedFileSize =
|
||||
((size_t)totalExpectedEvenLines*sizePrintStringEven +
|
||||
(size_t)totalExpectedOddLines*sizePrintStringOdd);
|
||||
size_t actualFileSize = 0;
|
||||
uint32_t totalActualEvenLines = 0, totalActualOddLines = 0;
|
||||
// DO NOT PUT ANY PRINTF WITHIN THIS BLOCK OF CODE
|
||||
{
|
||||
CaptureStream captured(stdout);
|
||||
hipLaunchKernelGGL(kernel_printf_two_conditionalstr,
|
||||
dim3(num_blocks, 1, 1),
|
||||
dim3(threads_per_block, 1, 1),
|
||||
0, 0, iterCount);
|
||||
HIP_CHECK(hipStreamSynchronize(0));
|
||||
std::ifstream CapturedData = captured.getCapturedData();
|
||||
char *buffer = new char[CHUNK_SIZE];
|
||||
while (CapturedData.good()) {
|
||||
CapturedData.getline(buffer, CHUNK_SIZE);
|
||||
uint32_t bufferlen = strlen(buffer);
|
||||
if ((sizePrintStringEven - 1) == bufferlen) {
|
||||
totalActualEvenLines++;
|
||||
} else if ((sizePrintStringOdd - 1) == bufferlen) {
|
||||
totalActualOddLines++;
|
||||
}
|
||||
}
|
||||
delete[] buffer;
|
||||
struct stat st;
|
||||
if (stat(captured.getTempFilename(), &st)) {
|
||||
printf("Temp File not found \n");
|
||||
return false;
|
||||
}
|
||||
actualFileSize = st.st_size;
|
||||
}
|
||||
printf("totalExpectedEvenLines = %u \n", totalExpectedEvenLines);
|
||||
printf("totalActualEvenLines = %u \n", totalActualEvenLines);
|
||||
printf("totalExpectedOddLines = %u \n", totalExpectedOddLines);
|
||||
printf("totalActualOddLines = %u \n", totalActualOddLines);
|
||||
printf("expectedFileSize = %zu \n", expectedFileSize);
|
||||
printf("actualFileSize = %zu \n", actualFileSize);
|
||||
if ((totalExpectedEvenLines != totalActualEvenLines)||
|
||||
(totalExpectedOddLines != totalActualOddLines)||
|
||||
(expectedFileSize != actualFileSize)) {
|
||||
return false;
|
||||
}
|
||||
return true;
|
||||
}
|
||||
// Launches kernel_printf_single_conditionalstr to generate the printf log
|
||||
// and validates the generated file size and number of printed lines
|
||||
// with the calculated file size and lines.
|
||||
bool test_printf_single_conditionalstr(uint32_t num_blocks,
|
||||
uint32_t threads_per_block,
|
||||
uint32_t print_limit) {
|
||||
uint32_t iterCount = 0;
|
||||
uint32_t sizePrintStringEven = (sizeof(CONST_STR1)-1);
|
||||
// Excluding NULL character
|
||||
// Calculate the number of iterations from print_limit
|
||||
size_t stress_limit_bytes = ((size_t)print_limit*1024*1024*1024);
|
||||
iterCount = static_cast<uint32_t>((2*stress_limit_bytes)/
|
||||
(num_blocks*threads_per_block*sizePrintStringEven));
|
||||
// Calculate expected lines of print and file size.
|
||||
uint32_t totalExpectedLines;
|
||||
totalExpectedLines = ((num_blocks*threads_per_block)%2 == 0)?
|
||||
(num_blocks*threads_per_block*iterCount)/2 :
|
||||
(((num_blocks*threads_per_block)/2)+ 1)*iterCount;
|
||||
size_t expectedFileSize =
|
||||
(size_t)totalExpectedLines*sizePrintStringEven;
|
||||
size_t actualFileSize = 0;
|
||||
uint32_t totalActualLines = 0;
|
||||
// DO NOT PUT ANY PRINTF WITHIN THIS BLOCK OF CODE
|
||||
{
|
||||
CaptureStream captured(stdout);
|
||||
hipLaunchKernelGGL(kernel_printf_single_conditionalstr,
|
||||
dim3(num_blocks, 1, 1),
|
||||
dim3(threads_per_block, 1, 1),
|
||||
0, 0, iterCount);
|
||||
HIP_CHECK(hipStreamSynchronize(0));
|
||||
std::ifstream CapturedData = captured.getCapturedData();
|
||||
char *buffer = new char[CHUNK_SIZE];
|
||||
while (CapturedData.good()) {
|
||||
CapturedData.getline(buffer, CHUNK_SIZE);
|
||||
totalActualLines++;
|
||||
}
|
||||
delete[] buffer;
|
||||
struct stat st;
|
||||
if (stat(captured.getTempFilename(), &st)) {
|
||||
printf("Temp File not found \n");
|
||||
return false;
|
||||
}
|
||||
actualFileSize = st.st_size;
|
||||
}
|
||||
printf("totalExpectedLines = %u \n", totalExpectedLines);
|
||||
printf("totalActualLines = %u \n", totalActualLines-1);
|
||||
printf("expectedFileSize = %zu \n", expectedFileSize);
|
||||
printf("actualFileSize = %zu \n", actualFileSize);
|
||||
if ((totalExpectedLines != (totalActualLines - 1))||
|
||||
(expectedFileSize != actualFileSize)) {
|
||||
return false;
|
||||
}
|
||||
return true;
|
||||
}
|
||||
// Launches kernel_printf_variablestr Or kernel_dependent_calc Or
|
||||
// kernel_dependent_calc_atomic to generate the printf log
|
||||
// and validates the generated file size and number of printed lines
|
||||
// with the calculated file size and lines.
|
||||
bool test_variable_str(uint32_t print_limit,
|
||||
void(*func)(uint32_t, int *),
|
||||
const struct SizeStruct* table) {
|
||||
uint32_t iterCount = table[print_limit - 1].iteration;
|
||||
uint32_t num_blocks = table[print_limit - 1].grid_size;
|
||||
uint32_t threads_per_block = table[print_limit - 1].block_size;
|
||||
// Calculate expected lines of print and file size.
|
||||
size_t actualFileSize = 0;
|
||||
uint32_t totalActualLines = 0;
|
||||
uint32_t totalExpectedLines = 4*iterCount*num_blocks*threads_per_block;
|
||||
size_t expectedFileSize = 0;
|
||||
|
||||
uint32_t buffsize = threads_per_block*num_blocks;
|
||||
int32_t *Ah;
|
||||
int32_t *Ad;
|
||||
Ah = new int32_t[buffsize];
|
||||
for (uint32_t i = 0; i < buffsize; i++) {
|
||||
Ah[i] = 0;
|
||||
}
|
||||
HIP_CHECK(hipMalloc(&Ad, buffsize*sizeof(int32_t)));
|
||||
HIP_CHECK(hipMemcpy(Ad, Ah, buffsize*sizeof(int32_t),
|
||||
hipMemcpyHostToDevice));
|
||||
// DO NOT PUT ANY PRINTF WITHIN THIS BLOCK OF CODE
|
||||
{
|
||||
CaptureStream captured(stdout);
|
||||
hipLaunchKernelGGL(func, dim3(num_blocks, 1, 1),
|
||||
dim3(threads_per_block, 1, 1),
|
||||
0, 0, iterCount, Ad);
|
||||
HIP_CHECK(hipStreamSynchronize(0));
|
||||
HIP_CHECK(hipMemcpy(Ah, Ad, buffsize*sizeof(int32_t),
|
||||
hipMemcpyDeviceToHost));
|
||||
std::ifstream CapturedData = captured.getCapturedData();
|
||||
char *buffer = new char[CHUNK_SIZE];
|
||||
while (CapturedData.good()) {
|
||||
CapturedData.getline(buffer, CHUNK_SIZE);
|
||||
totalActualLines++;
|
||||
}
|
||||
delete[] buffer;
|
||||
struct stat st;
|
||||
if (stat(captured.getTempFilename(), &st)) {
|
||||
printf("Temp File not found \n");
|
||||
return false;
|
||||
}
|
||||
actualFileSize = st.st_size;
|
||||
}
|
||||
for (uint32_t i = 0; i < buffsize; i++) {
|
||||
expectedFileSize += Ah[i];
|
||||
}
|
||||
HIP_CHECK(hipFree(Ad));
|
||||
delete[] Ah;
|
||||
printf("totalExpectedLines = %u \n", totalExpectedLines);
|
||||
printf("totalActualLines = %u \n", totalActualLines-1);
|
||||
printf("expectedFileSize = %zu \n", expectedFileSize);
|
||||
printf("actualFileSize = %zu \n", actualFileSize);
|
||||
if ((totalExpectedLines != (totalActualLines - 1))||
|
||||
(expectedFileSize != actualFileSize)) {
|
||||
return false;
|
||||
}
|
||||
return true;
|
||||
}
|
||||
// Launches kernel_decimal_calculation to generate the printf log file
|
||||
// and validates the generated file size and number of printed lines
|
||||
// with the calculated file size and lines.
|
||||
bool test_decimal_str(uint32_t num_blocks, uint32_t threads_per_block,
|
||||
uint32_t print_limit) {
|
||||
// Calculate the number of iterations from print_limit
|
||||
size_t stress_limit_bytes = ((size_t)print_limit*1024*1024*1024);
|
||||
int maxPrecision = 10;
|
||||
int totalPrintSizePerIter = printPi(maxPrecision);
|
||||
uint32_t iterCount = static_cast<uint32_t>(1+ stress_limit_bytes/
|
||||
(num_blocks*threads_per_block*totalPrintSizePerIter));
|
||||
// Calculate expected lines of print and file size.
|
||||
size_t actualFileSize = 0;
|
||||
size_t expectedFileSize =
|
||||
(size_t)num_blocks*threads_per_block*iterCount*totalPrintSizePerIter;
|
||||
uint32_t totalActualLines = 0;
|
||||
uint32_t totalExpectedLines =
|
||||
(maxPrecision + 1)*iterCount*num_blocks*threads_per_block;
|
||||
// DO NOT PUT ANY PRINTF WITHIN THIS BLOCK OF CODE
|
||||
{
|
||||
CaptureStream captured(stdout);
|
||||
hipLaunchKernelGGL(kernel_decimal_calculation, dim3(num_blocks, 1, 1),
|
||||
dim3(threads_per_block, 1, 1),
|
||||
0, 0, iterCount, maxPrecision);
|
||||
HIP_CHECK(hipStreamSynchronize(0));
|
||||
std::ifstream CapturedData = captured.getCapturedData();
|
||||
char *buffer = new char[CHUNK_SIZE];
|
||||
while (CapturedData.good()) {
|
||||
CapturedData.getline(buffer, CHUNK_SIZE);
|
||||
totalActualLines++;
|
||||
}
|
||||
delete[] buffer;
|
||||
struct stat st;
|
||||
if (stat(captured.getTempFilename(), &st)) {
|
||||
printf("Temp File not found \n");
|
||||
return false;
|
||||
}
|
||||
actualFileSize = st.st_size;
|
||||
}
|
||||
printf("totalExpectedLines = %u \n", totalExpectedLines);
|
||||
printf("totalActualLines = %u \n", totalActualLines-1);
|
||||
printf("expectedFileSize = %zu \n", expectedFileSize);
|
||||
printf("actualFileSize = %zu \n", actualFileSize);
|
||||
if ((totalExpectedLines != (totalActualLines - 1))||
|
||||
(expectedFileSize != actualFileSize)) {
|
||||
return false;
|
||||
}
|
||||
return true;
|
||||
}
|
||||
// Launches kernel_shared_mem to generate the printf log file
|
||||
// and validates the generated file size and number of printed lines
|
||||
// with the calculated file size and lines.
|
||||
bool test_shared_mem(uint32_t num_blocks, uint32_t threads_per_block,
|
||||
uint32_t print_limit) {
|
||||
// Calculate the number of iterations from print_limit
|
||||
size_t stress_limit_bytes = ((size_t)print_limit*1024*1024*1024);
|
||||
unsigned total_0_to_blksize = (BLOCK_SIZE - 1)*BLOCK_SIZE / 2;
|
||||
char buffer[CHUNK_SIZE];
|
||||
int totalPrintSizePerThread = snprintf(buffer, CHUNK_SIZE,
|
||||
"%s%u\n", CONST_STR3, total_0_to_blksize);
|
||||
uint32_t iterCount = static_cast<uint32_t>(1+ stress_limit_bytes/
|
||||
(num_blocks*threads_per_block*totalPrintSizePerThread));
|
||||
// Calculate expected lines of print and file size.
|
||||
size_t actualFileSize = 0;
|
||||
size_t expectedFileSize =
|
||||
(size_t)num_blocks*threads_per_block*iterCount*totalPrintSizePerThread;
|
||||
uint32_t totalActualLines = 0;
|
||||
uint32_t totalExpectedLines = iterCount*num_blocks*threads_per_block;
|
||||
// DO NOT PUT ANY PRINTF WITHIN THIS BLOCK OF CODE
|
||||
{
|
||||
CaptureStream captured(stdout);
|
||||
hipStream_t stream;
|
||||
HIP_CHECK(hipStreamCreate(&stream));
|
||||
for (int count = 0; count < (const int)iterCount; count++) {
|
||||
HIP_CHECK(hipLaunchKernel((const void*)kernel_shared_mem,
|
||||
dim3(num_blocks, 1, 1), dim3(threads_per_block, 1, 1),
|
||||
NULL, 0, stream));
|
||||
}
|
||||
HIP_CHECK(hipStreamSynchronize(stream));
|
||||
HIP_CHECK(hipStreamDestroy(stream));
|
||||
std::ifstream CapturedData = captured.getCapturedData();
|
||||
char *buffer = new char[CHUNK_SIZE];
|
||||
while (CapturedData.good()) {
|
||||
CapturedData.getline(buffer, CHUNK_SIZE);
|
||||
totalActualLines++;
|
||||
}
|
||||
delete[] buffer;
|
||||
struct stat st;
|
||||
if (stat(captured.getTempFilename(), &st)) {
|
||||
printf("Temp File not found \n");
|
||||
return false;
|
||||
}
|
||||
actualFileSize = st.st_size;
|
||||
}
|
||||
printf("totalExpectedLines = %u \n", totalExpectedLines);
|
||||
printf("totalActualLines = %u \n", totalActualLines-1);
|
||||
printf("expectedFileSize = %zu \n", expectedFileSize);
|
||||
printf("actualFileSize = %zu \n", actualFileSize);
|
||||
if ((totalExpectedLines != (totalActualLines - 1))||
|
||||
(expectedFileSize != actualFileSize)) {
|
||||
return false;
|
||||
}
|
||||
return true;
|
||||
}
|
||||
// Launches kernel_synchronized_printf to generate the printf log file
|
||||
// and validates the generated file size and number of printed lines
|
||||
// with the calculated file size and lines.
|
||||
bool test_synchronized_printf(uint32_t num_blocks,
|
||||
uint32_t threads_per_block,
|
||||
uint32_t print_limit) {
|
||||
// Calculate the number of iterations from print_limit
|
||||
size_t stress_limit_bytes = ((size_t)print_limit*1024*1024*1024);
|
||||
char buffer0[CHUNK_SIZE], buffer1[CHUNK_SIZE], buffer2[CHUNK_SIZE];
|
||||
int totalPrintSizePerThread = snprintf(buffer0, CHUNK_SIZE,
|
||||
"%s%u\n", CONST_STR3, 0);
|
||||
totalPrintSizePerThread += snprintf(buffer1, CHUNK_SIZE,
|
||||
"%s%u\n", CONST_STR3, 1);
|
||||
totalPrintSizePerThread += snprintf(buffer2, CHUNK_SIZE,
|
||||
"%s%u\n", CONST_STR3, 2);
|
||||
uint32_t iterCount = static_cast<uint32_t>(1+ stress_limit_bytes/
|
||||
(num_blocks*threads_per_block*totalPrintSizePerThread));
|
||||
// Calculate expected lines of print and file size.
|
||||
size_t actualFileSize = 0;
|
||||
size_t expectedFileSize =
|
||||
(size_t)num_blocks*threads_per_block*iterCount*totalPrintSizePerThread;
|
||||
uint32_t totalActualLines = 0;
|
||||
uint32_t totalExpectedLines = 3*iterCount*num_blocks*threads_per_block;
|
||||
bool TestPassed = true;
|
||||
size_t len = strlen(buffer0) - 1;
|
||||
// DO NOT PUT ANY PRINTF WITHIN THIS BLOCK OF CODE
|
||||
{
|
||||
CaptureStream captured(stdout);
|
||||
hipStream_t stream;
|
||||
HIP_CHECK(hipStreamCreate(&stream));
|
||||
for (int count = 0; count < (const int)iterCount; count++) {
|
||||
HIP_CHECK(hipLaunchKernel((const void*)kernel_synchronized_printf,
|
||||
dim3(num_blocks, 1, 1), dim3(threads_per_block, 1, 1),
|
||||
NULL, 0, stream));
|
||||
}
|
||||
HIP_CHECK(hipStreamSynchronize(stream));
|
||||
HIP_CHECK(hipStreamDestroy(stream));
|
||||
std::ifstream CapturedData = captured.getCapturedData();
|
||||
char *buffer = new char[CHUNK_SIZE];
|
||||
while (CapturedData.good()) {
|
||||
CapturedData.getline(buffer, CHUNK_SIZE);
|
||||
if (!strcmp(buffer, "")) {
|
||||
break;
|
||||
}
|
||||
if (0 == ((totalActualLines / threads_per_block) % 3)) {
|
||||
if (strncmp(buffer, buffer0, len)) {
|
||||
TestPassed = false;
|
||||
break;
|
||||
}
|
||||
} else if (1 == ((totalActualLines / threads_per_block) % 3)) {
|
||||
if (strncmp(buffer, buffer1, len)) {
|
||||
TestPassed = false;
|
||||
break;
|
||||
}
|
||||
} else if (2 == ((totalActualLines / threads_per_block) % 3)) {
|
||||
if (strncmp(buffer, buffer2, len)) {
|
||||
TestPassed = false;
|
||||
break;
|
||||
}
|
||||
}
|
||||
totalActualLines++;
|
||||
}
|
||||
delete[] buffer;
|
||||
struct stat st;
|
||||
if (stat(captured.getTempFilename(), &st)) {
|
||||
printf("Temp File not found");
|
||||
return false;
|
||||
}
|
||||
actualFileSize = st.st_size;
|
||||
}
|
||||
printf("totalExpectedLines = %u \n", totalExpectedLines);
|
||||
printf("totalActualLines = %u \n", totalActualLines);
|
||||
printf("expectedFileSize = %zu \n", expectedFileSize);
|
||||
printf("actualFileSize = %zu \n", actualFileSize);
|
||||
if ((TestPassed == false)||
|
||||
(expectedFileSize != actualFileSize)) {
|
||||
return false;
|
||||
}
|
||||
return true;
|
||||
}
|
||||
#endif
|
||||
} // namespace hipPrintfStressTest
|
||||
|
||||
TEST_CASE("Stress_printf_ConstStr") {
|
||||
#ifdef __linux__
|
||||
printf("Test: Stress_printf_ConstStr\n");
|
||||
bool TestPassed = true;
|
||||
uint threads_per_block = BLOCK_SIZE;
|
||||
uint num_blocks = GRID_SIZE;
|
||||
// N provide the print limit
|
||||
unsigned int print_limit = 1; // = 1 GB
|
||||
TestPassed =
|
||||
hipPrintfStressTest::test_printf_conststr(num_blocks, threads_per_block,
|
||||
print_limit);
|
||||
REQUIRE(TestPassed);
|
||||
#else
|
||||
printf("This test is skipped due to non linux environment.\n");
|
||||
#endif
|
||||
}
|
||||
|
||||
TEST_CASE("Stress_printf_IfElseConditionalStr") {
|
||||
#ifdef __linux__
|
||||
printf("Test: Stress_printf_IfElseConditionalStr\n");
|
||||
bool TestPassed = true;
|
||||
uint threads_per_block = BLOCK_SIZE;
|
||||
uint num_blocks = GRID_SIZE;
|
||||
// N provide the print limit
|
||||
unsigned int print_limit = 1; // = 1 GB
|
||||
TestPassed =
|
||||
hipPrintfStressTest::test_printf_two_conditionalstr(num_blocks,
|
||||
threads_per_block, print_limit);
|
||||
REQUIRE(TestPassed);
|
||||
#else
|
||||
printf("This test is skipped due to non linux environment.\n");
|
||||
#endif
|
||||
}
|
||||
|
||||
TEST_CASE("Stress_printf_IfConditionalStr") {
|
||||
#ifdef __linux__
|
||||
printf("Test: Stress_printf_IfConditionalStr\n");
|
||||
bool TestPassed = true;
|
||||
uint threads_per_block = BLOCK_SIZE;
|
||||
uint num_blocks = GRID_SIZE;
|
||||
// N provide the print limit
|
||||
unsigned int print_limit = 1; // = 1 GB
|
||||
TestPassed =
|
||||
hipPrintfStressTest::test_printf_single_conditionalstr(num_blocks,
|
||||
threads_per_block, print_limit);
|
||||
REQUIRE(TestPassed);
|
||||
#else
|
||||
printf("This test is skipped due to non linux environment.\n");
|
||||
#endif
|
||||
}
|
||||
|
||||
TEST_CASE("Stress_printf_VariableStr") {
|
||||
#ifdef __linux__
|
||||
printf("Test: Stress_printf_VariableStr\n");
|
||||
bool TestPassed = true;
|
||||
// N provide the print limit
|
||||
unsigned int print_limit = 1; // = 1 GB
|
||||
TestPassed = hipPrintfStressTest::test_variable_str(print_limit,
|
||||
hipPrintfStressTest::kernel_printf_variablestr,
|
||||
hipPrintfStressTest::EmpiricalValues1);
|
||||
REQUIRE(TestPassed);
|
||||
#else
|
||||
printf("This test is skipped due to non linux environment.\n");
|
||||
#endif
|
||||
}
|
||||
|
||||
TEST_CASE("Stress_printf_DependentCalc") {
|
||||
#ifdef __linux__
|
||||
printf("Test: Stress_printf_DependentCalc\n");
|
||||
bool TestPassed = true;
|
||||
// N provide the print limit
|
||||
unsigned int print_limit = 1; // = 1 GB
|
||||
TestPassed = hipPrintfStressTest::test_variable_str(print_limit,
|
||||
hipPrintfStressTest::kernel_dependent_calc,
|
||||
hipPrintfStressTest::EmpiricalValues2);
|
||||
REQUIRE(TestPassed);
|
||||
#else
|
||||
printf("This test is skipped due to non linux environment.\n");
|
||||
#endif
|
||||
}
|
||||
|
||||
TEST_CASE("Stress_printf_DecimalStr") {
|
||||
#ifdef __linux__
|
||||
printf("Test: Stress_printf_DecimalStr\n");
|
||||
bool TestPassed = true;
|
||||
uint threads_per_block = BLOCK_SIZE;
|
||||
uint num_blocks = GRID_SIZE;
|
||||
// N provide the print limit
|
||||
unsigned int print_limit = 1; // = 1 GB
|
||||
TestPassed = hipPrintfStressTest::test_decimal_str(num_blocks,
|
||||
threads_per_block, print_limit);
|
||||
REQUIRE(TestPassed);
|
||||
#else
|
||||
printf("This test is skipped due to non linux environment.\n");
|
||||
#endif
|
||||
}
|
||||
|
||||
TEST_CASE("Stress_printf_SharedMem") {
|
||||
#ifdef __linux__
|
||||
printf("Test: Stress_printf_SharedMem\n");
|
||||
bool TestPassed = true;
|
||||
uint threads_per_block = BLOCK_SIZE;
|
||||
uint num_blocks = GRID_SIZE;
|
||||
// N provide the print limit
|
||||
unsigned int print_limit = 1; // = 1 GB
|
||||
TestPassed = hipPrintfStressTest::test_shared_mem(num_blocks,
|
||||
threads_per_block, print_limit);
|
||||
REQUIRE(TestPassed);
|
||||
#else
|
||||
printf("This test is skipped due to non linux environment.\n");
|
||||
#endif
|
||||
}
|
||||
|
||||
TEST_CASE("Stress_printf_SynchronizedPrintf") {
|
||||
#ifdef __linux__
|
||||
printf("Test: Stress_printf_SynchronizedPrintf\n");
|
||||
bool TestPassed = true;
|
||||
uint threads_per_block = BLOCK_SIZE;
|
||||
// N provide the print limit
|
||||
unsigned int print_limit = 1; // = 1 GB
|
||||
TestPassed = hipPrintfStressTest::test_synchronized_printf(1,
|
||||
threads_per_block, print_limit);
|
||||
REQUIRE(TestPassed);
|
||||
#else
|
||||
printf("This test is skipped due to non linux environment.\n");
|
||||
#endif
|
||||
}
|
||||
|
||||
TEST_CASE("Stress_printf_AtomicCalc") {
|
||||
#ifdef __linux__
|
||||
printf("Test: Stress_printf_AtomicCalc\n");
|
||||
bool TestPassed = true;
|
||||
// N provide the print limit
|
||||
unsigned int print_limit = 1; // = 1 GB
|
||||
TestPassed = hipPrintfStressTest::test_variable_str(print_limit,
|
||||
hipPrintfStressTest::kernel_dependent_calc_atomic,
|
||||
hipPrintfStressTest::EmpiricalValues2);
|
||||
REQUIRE(TestPassed);
|
||||
#else
|
||||
printf("This test is skipped due to non linux environment.\n");
|
||||
#endif
|
||||
}
|
||||
@@ -0,0 +1,99 @@
|
||||
/*
|
||||
Copyright (c) 2020 - present Advanced Micro Devices, Inc. All rights reserved.
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to deal
|
||||
in the Software without restriction, including without limitation the rights
|
||||
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
copies of the Software, and to permit persons to whom the Software is
|
||||
furnished to do so, subject to the following conditions:
|
||||
|
||||
The above copyright notice and this permission notice shall be included in
|
||||
all copies or substantial portions of the Software.
|
||||
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
THE SOFTWARE.
|
||||
*/
|
||||
|
||||
#ifndef _STRESSTEST_PRINTF_COMMON_H_
|
||||
#define _STRESSTEST_PRINTF_COMMON_H_
|
||||
|
||||
#include <errno.h>
|
||||
#include <error.h>
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
#include <unistd.h>
|
||||
#include <sys/types.h>
|
||||
#include <sys/stat.h>
|
||||
#include <math.h>
|
||||
#include <fstream>
|
||||
#include <iostream>
|
||||
#include <string>
|
||||
|
||||
struct CaptureStream {
|
||||
int saved_fd;
|
||||
int orig_fd;
|
||||
int temp_fd;
|
||||
|
||||
char tempname[13] = "mytestXXXXXX";
|
||||
|
||||
explicit CaptureStream(FILE *original) {
|
||||
orig_fd = fileno(original);
|
||||
saved_fd = dup(orig_fd);
|
||||
|
||||
if ((temp_fd = mkstemp(tempname)) == -1) {
|
||||
error(0, errno, "Error");
|
||||
assert(false);
|
||||
}
|
||||
|
||||
fflush(nullptr);
|
||||
if (dup2(temp_fd, orig_fd) == -1) {
|
||||
error(0, errno, "Error");
|
||||
assert(false);
|
||||
}
|
||||
if (close(temp_fd) != 0) {
|
||||
error(0, errno, "Error");
|
||||
assert(false);
|
||||
}
|
||||
}
|
||||
|
||||
void restoreStream() {
|
||||
if (saved_fd == -1)
|
||||
return;
|
||||
fflush(nullptr);
|
||||
if (dup2(saved_fd, orig_fd) == -1) {
|
||||
error(0, errno, "Error");
|
||||
assert(false);
|
||||
}
|
||||
if (close(saved_fd) != 0) {
|
||||
error(0, errno, "Error");
|
||||
assert(false);
|
||||
}
|
||||
saved_fd = -1;
|
||||
}
|
||||
|
||||
const char *getTempFilename() {
|
||||
return (const char*)tempname;
|
||||
}
|
||||
|
||||
std::ifstream getCapturedData() {
|
||||
restoreStream();
|
||||
std::ifstream temp(tempname);
|
||||
return temp;
|
||||
}
|
||||
|
||||
~CaptureStream() {
|
||||
restoreStream();
|
||||
if (remove(tempname) != 0) {
|
||||
error(0, errno, "Error");
|
||||
assert(false);
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
#endif // _STRESSTEST_PRINTF_COMMON_H_
|
||||
@@ -0,0 +1,10 @@
|
||||
# Common Tests - Test independent of all platforms
|
||||
set(TEST_SRC
|
||||
Stress_hipStreamCreate.cc
|
||||
)
|
||||
|
||||
# Create shared lib of all tests
|
||||
add_library(stream SHARED EXCLUDE_FROM_ALL ${TEST_SRC})
|
||||
|
||||
# Add dependency on build_tests to build it on this custom target
|
||||
add_dependencies(build_stress_test stream)
|
||||
@@ -0,0 +1,203 @@
|
||||
/*
|
||||
Copyright (c) 2021-Present Advanced Micro Devices, Inc. All rights reserved.
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to deal
|
||||
in the Software without restriction, including without limitation the rights
|
||||
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
copies of the Software, and to permit persons to whom the Software is
|
||||
furnished to do so, subject to the following conditions:
|
||||
The above copyright notice and this permission notice shall be included in
|
||||
all copies or substantial portions of the Software.
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
THE SOFTWARE.
|
||||
*/
|
||||
|
||||
#include <hip_test_common.hh>
|
||||
#include <cstdio>
|
||||
#include <cassert>
|
||||
|
||||
#define NUM_ITER 100000
|
||||
#define TOTALSEQ 18
|
||||
|
||||
namespace hipStreamCreateStressTest {
|
||||
__global__ void kernel_do_nothing() {
|
||||
// do nothing
|
||||
}
|
||||
|
||||
int stream_seq[TOTALSEQ][4] = {
|
||||
{0, 1, 2, 0} , // Launch0->Launch1->Launch2->Sync0
|
||||
{0, 2, 1, 0} , // Launch0->Launch2->Launch1->Sync0
|
||||
{1, 0, 2, 0} , // Launch1->Launch0->Launch2->Sync0
|
||||
{1, 2, 0, 0} , // Launch1->Launch2->Launch0->Sync0
|
||||
{2, 0, 1, 0} , // Launch2->Launch0->Launch1->Sync0
|
||||
{2, 1, 0, 0} , // Launch2->Launch1->Launch0->Sync0
|
||||
{0, 1, 2, 1} , // Launch0->Launch1->Launch2->Sync1
|
||||
{0, 2, 1, 1} , // Launch0->Launch2->Launch1->Sync1
|
||||
{1, 0, 2, 1} , // Launch1->Launch0->Launch2->Sync1
|
||||
{1, 2, 0, 1} , // Launch1->Launch2->Launch0->Sync1
|
||||
{2, 0, 1, 1} , // Launch2->Launch0->Launch1->Sync1
|
||||
{2, 1, 0, 1} , // Launch2->Launch1->Launch0->Sync1
|
||||
{0, 1, 2, 2} , // Launch0->Launch1->Launch2->Sync2
|
||||
{0, 2, 1, 2} , // Launch0->Launch2->Launch1->Sync2
|
||||
{1, 0, 2, 2} , // Launch1->Launch0->Launch2->Sync2
|
||||
{1, 2, 0, 2} , // Launch1->Launch2->Launch0->Sync2
|
||||
{2, 0, 1, 2} , // Launch2->Launch0->Launch1->Sync2
|
||||
{2, 1, 0, 2} // Launch2->Launch1->Launch0->Sync2
|
||||
};
|
||||
|
||||
/**
|
||||
* Scenario: This test extends the DTEST introduced for SWDEV-238360 to test
|
||||
* all the possible scenarios mentioned under comments section
|
||||
* in SWDEV-237846.
|
||||
*/
|
||||
|
||||
void testhipStreamCreate(int *stream_sequence) {
|
||||
printf("%s: Testing sequence %d->%d->%d->sync(%d) \n", __func__,
|
||||
stream_sequence[0], stream_sequence[1], stream_sequence[2],
|
||||
stream_sequence[3]);
|
||||
// Streams
|
||||
hipStream_t stream[3];
|
||||
stream[0] = 0;
|
||||
HIP_CHECK(hipStreamCreate(&stream[1]));
|
||||
HIP_CHECK(hipStreamCreate(&stream[2]));
|
||||
// Run test loop
|
||||
for (int k = 0; k < NUM_ITER; ++k) {
|
||||
// Sync
|
||||
HIP_CHECK(hipDeviceSynchronize());
|
||||
HIP_CHECK(hipLaunchKernel((const void*)kernel_do_nothing,
|
||||
dim3(1, 1, 1), dim3(1, 1, 1), NULL, 0,
|
||||
stream[stream_sequence[0]]));
|
||||
HIP_CHECK(hipLaunchKernel((const void*)kernel_do_nothing,
|
||||
dim3(1, 1, 1), dim3(1, 1, 1), NULL, 0,
|
||||
stream[stream_sequence[1]]));
|
||||
HIP_CHECK(hipLaunchKernel((const void*)kernel_do_nothing,
|
||||
dim3(1, 1, 1), dim3(1, 1, 1), NULL, 0,
|
||||
stream[stream_sequence[2]]));
|
||||
// Sync stream 1
|
||||
HIP_CHECK(hipStreamSynchronize(stream[stream_sequence[3]]));
|
||||
}
|
||||
HIP_CHECK(hipDeviceSynchronize());
|
||||
// Clean up
|
||||
HIP_CHECK(hipStreamDestroy(stream[1]));
|
||||
HIP_CHECK(hipStreamDestroy(stream[2]));
|
||||
}
|
||||
/**
|
||||
* Scenario: This test extends the above test by using 2 streams
|
||||
* (of highest and lowest priority) created using hipStreamCreateWithPriority
|
||||
* along with the default stream.
|
||||
*/
|
||||
void testhipStreamCreatePriority(int *stream_sequence,
|
||||
unsigned int flag) {
|
||||
printf("%s: Testing sequence %d->%d->%d->sync(%d) \n", __func__,
|
||||
stream_sequence[0], stream_sequence[1], stream_sequence[2],
|
||||
stream_sequence[3]);
|
||||
// Streams
|
||||
hipStream_t stream[3];
|
||||
stream[0] = 0;
|
||||
int priority_low = 0;
|
||||
int priority_high = 0;
|
||||
HIP_CHECK(hipDeviceGetStreamPriorityRange(&priority_low, &priority_high));
|
||||
if (priority_low == priority_high) {
|
||||
printf("Exiting test since priorities are not supported \n");
|
||||
return;
|
||||
}
|
||||
HIP_CHECK(hipStreamCreateWithPriority(&stream[1],
|
||||
flag, priority_high));
|
||||
HIP_CHECK(hipStreamCreateWithPriority(&stream[2],
|
||||
flag, priority_low));
|
||||
// Run test loop
|
||||
for (int k = 0; k < NUM_ITER; ++k) {
|
||||
// Sync
|
||||
HIP_CHECK(hipDeviceSynchronize());
|
||||
HIP_CHECK(hipLaunchKernel((const void*)kernel_do_nothing,
|
||||
dim3(1, 1, 1), dim3(1, 1, 1), NULL, 0,
|
||||
stream[stream_sequence[0]]));
|
||||
HIP_CHECK(hipLaunchKernel((const void*)kernel_do_nothing,
|
||||
dim3(1, 1, 1), dim3(1, 1, 1), NULL, 0,
|
||||
stream[stream_sequence[1]]));
|
||||
HIP_CHECK(hipLaunchKernel((const void*)kernel_do_nothing,
|
||||
dim3(1, 1, 1), dim3(1, 1, 1), NULL, 0,
|
||||
stream[stream_sequence[2]]));
|
||||
// Sync stream 1
|
||||
HIP_CHECK(hipStreamSynchronize(stream[stream_sequence[3]]));
|
||||
}
|
||||
HIP_CHECK(hipDeviceSynchronize());
|
||||
// Clean up
|
||||
HIP_CHECK(hipStreamDestroy(stream[1]));
|
||||
HIP_CHECK(hipStreamDestroy(stream[2]));
|
||||
}
|
||||
/**
|
||||
* Scenario: This test extends the above test by using 2 streams
|
||||
* created using hipStreamCreateWithFlags along with the default stream.
|
||||
*/
|
||||
void testhipStreamCreateFlags(int *stream_sequence,
|
||||
unsigned int flag) {
|
||||
printf("%s: Testing sequence %d->%d->%d->sync(%d) \n", __func__,
|
||||
stream_sequence[0], stream_sequence[1], stream_sequence[2],
|
||||
stream_sequence[3]);
|
||||
// Streams
|
||||
hipStream_t stream[3];
|
||||
stream[0] = 0;
|
||||
HIP_CHECK(hipStreamCreateWithFlags(&stream[1], flag));
|
||||
HIP_CHECK(hipStreamCreateWithFlags(&stream[2], flag));
|
||||
// Run test loop
|
||||
for (int k = 0; k < NUM_ITER; ++k) {
|
||||
// Sync
|
||||
HIP_CHECK(hipDeviceSynchronize());
|
||||
HIP_CHECK(hipLaunchKernel((const void*)kernel_do_nothing,
|
||||
dim3(1, 1, 1), dim3(1, 1, 1), NULL, 0,
|
||||
stream[stream_sequence[0]]));
|
||||
HIP_CHECK(hipLaunchKernel((const void*)kernel_do_nothing,
|
||||
dim3(1, 1, 1), dim3(1, 1, 1), NULL, 0,
|
||||
stream[stream_sequence[1]]));
|
||||
HIP_CHECK(hipLaunchKernel((const void*)kernel_do_nothing,
|
||||
dim3(1, 1, 1), dim3(1, 1, 1), NULL, 0,
|
||||
stream[stream_sequence[2]]));
|
||||
// Sync stream 1
|
||||
HIP_CHECK(hipStreamSynchronize(stream[stream_sequence[3]]));
|
||||
}
|
||||
HIP_CHECK(hipDeviceSynchronize());
|
||||
// Clean up
|
||||
HIP_CHECK(hipStreamDestroy(stream[1]));
|
||||
HIP_CHECK(hipStreamDestroy(stream[2]));
|
||||
}
|
||||
} // namespace hipStreamCreateStressTest
|
||||
|
||||
TEST_CASE("Stress_hipStreamCreate_SyncTest") {
|
||||
printf("hipStreamCreate stress test:\n");
|
||||
for (int i = 0; i < TOTALSEQ; i++) {
|
||||
hipStreamCreateStressTest::testhipStreamCreate(
|
||||
hipStreamCreateStressTest::stream_seq[i]);
|
||||
}
|
||||
}
|
||||
|
||||
TEST_CASE("Stress_hipStreamCreatePriority_SyncTest") {
|
||||
printf("hipStreamCreateWithPriority(hipStreamDefault) stress test:\n");
|
||||
for (int i = 0; i < TOTALSEQ; i++) {
|
||||
hipStreamCreateStressTest::testhipStreamCreatePriority(
|
||||
hipStreamCreateStressTest::stream_seq[i], hipStreamDefault);
|
||||
}
|
||||
printf("hipStreamCreateWithPriority(hipStreamNonBlocking) stress test:\n");
|
||||
for (int i = 0; i < TOTALSEQ; i++) {
|
||||
hipStreamCreateStressTest::testhipStreamCreatePriority(
|
||||
hipStreamCreateStressTest::stream_seq[i], hipStreamNonBlocking);
|
||||
}
|
||||
}
|
||||
|
||||
TEST_CASE("Stress_hipStreamCreateWithFlags_SyncTest") {
|
||||
printf("hipStreamCreateWithFlags(hipStreamDefault) stress test:\n");
|
||||
for (int i = 0; i < TOTALSEQ; i++) {
|
||||
hipStreamCreateStressTest::testhipStreamCreateFlags(
|
||||
hipStreamCreateStressTest::stream_seq[i], hipStreamDefault);
|
||||
}
|
||||
printf("hipStreamCreateWithFlags(hipStreamNonBlocking) stress test:\n");
|
||||
for (int i = 0; i < TOTALSEQ; i++) {
|
||||
hipStreamCreateStressTest::testhipStreamCreateFlags(
|
||||
hipStreamCreateStressTest::stream_seq[i], hipStreamNonBlocking);
|
||||
}
|
||||
}
|
||||
Atsaukties uz šo jaunā problēmā
Block a user