SWDEV-475568 - Fix compiling issues

Fix compiling issues of "make perf_test" under
hip-tests.

Change-Id: Ib03328a2fb13375fa44626a42202b1eeb177b8b2


[ROCm/hip-tests commit: a2f37dfa3a]
Bu işleme şunda yer alıyor:
taosang2
2024-07-24 16:10:27 -04:00
işlemeyi yapan: Rakesh Roy
ebeveyn ad52c74707
işleme bb901bef4a
7 değiştirilmiş dosya ile 50 ekleme ve 49 silme
+3 -4
Dosyayı Görüntüle
@@ -244,8 +244,7 @@ TEST_CASE("Perf_hipPerfDotProduct") {
if (nGpu < 1) {
HipTest::HIP_SKIP_TEST("Skipping because devices < 1");
}
hipDeviceProp_t props = {0};
props = {0};
hipDeviceProp_t props;
HIP_CHECK(hipSetDevice(p_gpuDevice));
HIP_CHECK(hipGetDeviceProperties(&props, p_gpuDevice));
int nx, ny, nz;
@@ -286,8 +285,8 @@ TEST_CASE("Perf_hipPerfDotProduct") {
srand(time(NULL));
for (int i = 0; i < size; ++i) {
hx[i] = 2.0 * static_cast<double>rand() / static_cast<double>RAND_MAX - 1.0;
hy[i] = 2.0 * static_cast<double>rand() / static_cast<double>RAND_MAX - 1.0;
hx[i] = 2.0 * static_cast<double>(rand()) / static_cast<double>(RAND_MAX) - 1.0;
hy[i] = 2.0 * static_cast<double>(rand()) / static_cast<double>(RAND_MAX) - 1.0;
hresult_xy += hx[i] * hy[i];
hresult_xx += hx[i] * hx[i];
+14 -24
Dosyayı Görüntüle
@@ -47,7 +47,6 @@ static unsigned int numCoords = sizeof(coords) / sizeof(coordRec);
template <typename T>
__global__ void float_mad_kernel(uint *out, uint width, T xPos, T yPos,
T xStep, T yStep, uint maxIter) {
#pragma FP_CONTRACT ON
int tid = (blockIdx.x * blockDim.x + threadIdx.x);
int i = tid % width;
int j = tid / width;
@@ -71,7 +70,6 @@ template <typename T>
__global__ void float_mandel_unroll_kernel(uint *out, uint width, T xPos,
T yPos, T xStep, T yStep, uint maxIter) {
#pragma FP_CONTRACT ON
int tid = (blockIdx.x * blockDim.x + threadIdx.x);
int i = tid % width;
int j = tid / width;
@@ -176,7 +174,6 @@ __global__ void float_mandel_unroll_kernel(uint *out, uint width, T xPos,
template <typename T>
__global__ void double_mad_kernel(uint *out, uint width, T xPos, T yPos, T xStep, T yStep,
uint maxIter) {
#pragma FP_CONTRACT ON
int tid = (blockIdx.x * blockDim.x + threadIdx.x);
int i = tid % width;
int j = tid / width;
@@ -199,7 +196,6 @@ __global__ void double_mad_kernel(uint *out, uint width, T xPos, T yPos, T xSte
template <typename T>
__global__ void double_mandel_unroll_kernel(uint *out, uint width, T xPos,
T yPos, T xStep, T yStep, uint maxIter) {
#pragma FP_CONTRACT ON
int tid = (blockIdx.x * blockDim.x + threadIdx.x);
int i = tid % width;
@@ -304,8 +300,6 @@ __global__ void double_mandel_unroll_kernel(uint *out, uint width, T xPos,
out[tid] = (uint)ccount;
};
static const unsigned int FMA_EXPECTEDVALUES_INDEX = 15;
// Expected results for each kernel run at each coord
unsigned long long expectedIters[] = {
203277748ull, 2147483648ull, 120254651ull, 203277748ull, 2147483648ull,
@@ -336,7 +330,7 @@ class hipPerfMandelBrot {
}
void open(int deviceID);
bool run(unsigned int testCase, unsigned int deviceId);
bool run(unsigned int testCase);
void printResults(void);
// array of funtion pointers
@@ -391,7 +385,7 @@ void hipPerfMandelBrot::open(int deviceId) {
HipTest::HIP_SKIP_TEST("Skipping because devices < 1");
}
HIP_CHECK(hipSetDevice(deviceId));
hipDeviceProp_t props = {0};
hipDeviceProp_t props;
HIP_CHECK(hipGetDeviceProperties(&props, deviceId));
std::cout << "info: running on bus " << "0x" << props.pciBusID << " " << props.name
<< " with " << props.multiProcessorCount << " CUs" << " and device id: " << deviceId
@@ -401,7 +395,6 @@ void hipPerfMandelBrot::open(int deviceId) {
}
void hipPerfMandelBrot::printResults() {
int numkernels = getNumKernels();
int numStreams = getNumStreams();
std::cout << "\n" <<"Measured perf for kernels in GFLOPS on "
@@ -424,7 +417,7 @@ void hipPerfMandelBrot::float_mad(uint *out, uint width, float xPos, float yPos
int blocks, int threads_per_block, int kernelCnt) {
int streamCnt = getNumStreams();
hipLaunchKernelGGL(float_mad_kernel<float>, dim3(blocks), dim3(threads_per_block), 0,
streams[kernelCnt % streamCnt], out, width_, xPos, yPos, xStep, yStep,
streams[kernelCnt % streamCnt], out, width, xPos, yPos, xStep, yStep,
maxIter);
}
@@ -433,7 +426,7 @@ void hipPerfMandelBrot::float_mandel_unroll(uint *out, uint width, float xPos,
int blocks, int threads_per_block, int kernelCnt) {
int streamCnt = getNumStreams();
hipLaunchKernelGGL(float_mandel_unroll_kernel<float>, dim3(blocks), dim3(threads_per_block), 0,
streams[kernelCnt % streamCnt], out, width_, xPos, yPos, xStep, yStep, maxIter);
streams[kernelCnt % streamCnt], out, width, xPos, yPos, xStep, yStep, maxIter);
}
void hipPerfMandelBrot::double_mad(uint *out, uint width, float xPos, float yPos,
@@ -441,7 +434,7 @@ void hipPerfMandelBrot::double_mad(uint *out, uint width, float xPos, float yPo
int blocks, int threads_per_block, int kernelCnt) {
int streamCnt = getNumStreams();
hipLaunchKernelGGL(double_mad_kernel<double>, dim3(blocks), dim3(threads_per_block), 0,
streams[kernelCnt % streamCnt], out, width_, xPos, yPos, xStep, yStep, maxIter);
streams[kernelCnt % streamCnt], out, width, xPos, yPos, xStep, yStep, maxIter);
}
void hipPerfMandelBrot::double_mandel_unroll(uint *out, uint width, float xPos, float yPos,
@@ -449,10 +442,10 @@ void hipPerfMandelBrot::double_mandel_unroll(uint *out, uint width, float xPos,
int blocks, int threads_per_block, int kernelCnt) {
int streamCnt = getNumStreams();
hipLaunchKernelGGL(float_mandel_unroll_kernel<double>, dim3(blocks), dim3(threads_per_block), 0,
streams[kernelCnt % streamCnt], out, width_, xPos, yPos, xStep, yStep, maxIter);
streams[kernelCnt % streamCnt], out, width, xPos, yPos, xStep, yStep, maxIter);
}
bool hipPerfMandelBrot::run(unsigned int testCase,unsigned int deviceId) {
bool hipPerfMandelBrot::run(unsigned int testCase) {
unsigned int numStreams = getNumStreams();
coordIdx = testCase % numCoords;
@@ -462,8 +455,8 @@ bool hipPerfMandelBrot::run(unsigned int testCase,unsigned int deviceId) {
// Maximum iteration count
maxIter = 32768;
uint * hPtr[numKernels];
uint * dPtr[numKernels];
uint ** hPtr = new uint *[numKernels];
uint ** dPtr = new uint *[numKernels];
// Width is divisible by 4 because the mandelbrot kernel processes 4 pixels at once.
width_ = 256;
@@ -487,11 +480,6 @@ bool hipPerfMandelBrot::run(unsigned int testCase,unsigned int deviceId) {
int threads_per_block = 64;
int blocks = (threads/threads_per_block) + (threads % threads_per_block);
float xStep = static_cast<float>(coords[coordIdx].width / (double)width_);
float yStep = static_cast<float>(-coords[coordIdx].width / (double)width_);
float xPos = static_cast<float>(coords[coordIdx].x - 0.5 * coords[coordIdx].width);
float yPos = static_cast<float>(coords[coordIdx].y + 0.5 * coords[coordIdx].width);
// Copy memory asynchronously and concurrently from host to device
for (uint i = 0; i < numKernels; i++) {
HIP_CHECK(hipMemcpy(dPtr[i], hPtr[i], bufSize, hipMemcpyHostToDevice));
@@ -593,6 +581,8 @@ bool hipPerfMandelBrot::run(unsigned int testCase,unsigned int deviceId) {
HIP_CHECK(hipHostFree(hPtr[i]));
HIP_CHECK(hipFree(dPtr[i]));
}
delete [] hPtr;
delete [] dPtr;
return true;
}
@@ -632,7 +622,7 @@ TEST_CASE("Perf_hipPerfMandelbrot") {
SECTION("warm-up kernel default stream executes serially") {
mandelbrotCompute.setNumStreams(1);
mandelbrotCompute.setNumKernels(1);
REQUIRE(true == mandelbrotCompute.run(100/*Random number*/, deviceId));
REQUIRE(true == mandelbrotCompute.run(100/*Random number*/));
}
#endif
SECTION("run all - sync") {
@@ -640,7 +630,7 @@ TEST_CASE("Perf_hipPerfMandelbrot") {
do {
mandelbrotCompute.setNumStreams(1);
mandelbrotCompute.setNumKernels(1);
REQUIRE(true == mandelbrotCompute.run(i, deviceId));
REQUIRE(true == mandelbrotCompute.run(i));
i++;
}while(i < 12);
mandelbrotCompute.printResults();
@@ -651,7 +641,7 @@ TEST_CASE("Perf_hipPerfMandelbrot") {
do {
mandelbrotCompute.setNumStreams(2);
mandelbrotCompute.setNumKernels(2);
REQUIRE(true == mandelbrotCompute.run(i, deviceId));
REQUIRE(true == mandelbrotCompute.run(i));
i++;
}while(i < 12);
mandelbrotCompute.printResults();
+1 -1
Dosyayı Görüntüle
@@ -89,7 +89,7 @@ TEST_CASE("Perf_hipPerfDispatchSpeed") {
int p_gpuDevice = 0;
int p_tests = -1;
hipError_t err = hipSuccess;
hipDeviceProp_t props = {0};
hipDeviceProp_t props;
HIP_CHECK(hipGetDeviceProperties(&props, p_gpuDevice));
unsigned int testListSize = sizeof(testList) / sizeof(testStruct);
+4 -2
Dosyayı Görüntüle
@@ -185,8 +185,8 @@ void hipPerfSampleRate::run(unsigned int test) {
void ** dPtr;
void * hOutPtr;
void * dOutPtr;
void * hInPtr[numBufs_];
void * dInPtr[numBufs_];
void ** hInPtr = new void *[numBufs_];
void ** dInPtr = new void *[numBufs_];
outBufSize_ =
sizes[NUM_SIZES - 1] * sizes[NUM_SIZES - 1] * typeSizes[NUM_TYPES - 1];
@@ -265,6 +265,8 @@ void hipPerfSampleRate::run(unsigned int test) {
}
HIP_CHECK(hipHostFree(hOutPtr));
HIP_CHECK(hipFree(dPtr));
delete [] hInPtr;
delete [] dInPtr;
}
+13 -7
Dosyayı Görüntüle
@@ -105,12 +105,12 @@ void hipPerfDeviceConcurrency::close() {
bool hipPerfDeviceConcurrency::run(unsigned int testCase, int numGpus) {
static int deviceId;
uint * hPtr[numGpus];
uint * dPtr[numGpus];
hipStream_t streams[numGpus];
int numCUs[numGpus];
unsigned int maxIter[numGpus];
unsigned long long expectedIters[numGpus];
uint ** hPtr = new uint*[numGpus];
uint ** dPtr = new uint*[numGpus];
hipStream_t * streams = new hipStream_t[numGpus];
int *numCUs = new int[numGpus];
unsigned int *maxIter = new unsigned int[numGpus];
unsigned long long *expectedIters = new unsigned long long[numGpus];
int threads, threads_per_block, blocks;
float xStep, yStep, xPos, yPos;
@@ -121,7 +121,7 @@ bool hipPerfDeviceConcurrency::run(unsigned int testCase, int numGpus) {
}
HIP_CHECK(hipSetDevice(deviceId));
hipDeviceProp_t props = {0};
hipDeviceProp_t props;
HIP_CHECK(hipGetDeviceProperties(&props, i));
if (testCase != 0) {
std::cout << "info: running on bus " << "0x" << props.pciBusID
@@ -220,6 +220,12 @@ bool hipPerfDeviceConcurrency::run(unsigned int testCase, int numGpus) {
if (testCase == 0) {
deviceId++;
}
delete [] hPtr;
delete [] dPtr;
delete [] streams;
delete [] numCUs;
delete [] maxIter;
delete [] expectedIters;
return true;
}
+9 -8
Dosyayı Görüntüle
@@ -225,7 +225,7 @@ bool hipPerfStreamConcurrency::open(int deviceId) {
}
HIP_CHECK(hipSetDevice(deviceId));
hipDeviceProp_t props = {0};
hipDeviceProp_t props;
HIP_CHECK(hipGetDeviceProperties(&props, deviceId));
std::cout << "info: running on bus " << "0x" << props.pciBusID
<< " " << props.name << " with " << props.multiProcessorCount << " CUs"
@@ -250,12 +250,12 @@ bool hipPerfStreamConcurrency::run(unsigned int testCase,
// Maximum iteration count
// maxIter = 8388608 * (engine_clock / 1000).serial execution
maxIter = (unsigned int)(((8388608 * (static_cast<float>clkFrequency / 1000))
maxIter = (unsigned int)(((8388608 * (static_cast<float>(clkFrequency) / 1000))
* numCUs) / 128);
maxIter = (maxIter + 15) & ~15;
hipStream_t streams[numStreams];
uint * hPtr[numKernels];
uint * dPtr[numKernels];
hipStream_t *streams = new hipStream_t[numStreams];
uint ** hPtr = new uint*[numKernels];
uint ** dPtr = new uint*[numKernels];
// Width is divisible by 4 because the mandelbrot kernel
// processes 4 pixels at once.
@@ -327,9 +327,6 @@ bool hipPerfStreamConcurrency::run(unsigned int testCase,
<< numStreams <<" stream (s): " << all_kernel_time.count() << std::endl;
}
unsigned long long expected =
(unsigned long long)width_ * (unsigned long long)maxIter;
for (uint i = 0 ; i < numStreams; i++) {
HIP_CHECK(hipStreamDestroy(streams[i]));
}
@@ -339,6 +336,10 @@ bool hipPerfStreamConcurrency::run(unsigned int testCase,
HIP_CHECK(hipHostFree(hPtr[i]));
HIP_CHECK(hipFree(dPtr[i]));
}
delete [] streams;
delete [] hPtr;
delete [] dPtr;
return true;
}
+6 -3
Dosyayı Görüntüle
@@ -59,7 +59,7 @@ bool hipPerfStreamCreateCopyDestroy::open(int deviceId) {
HipTest::HIP_SKIP_TEST("Skipping because devices < 1");
}
HIP_CHECK(hipSetDevice(deviceId));
hipDeviceProp_t props = {0};
hipDeviceProp_t props;
HIP_CHECK(hipGetDeviceProperties(&props, deviceId));
std::cout << "info: running on bus " << "0x" << props.pciBusID
<< " " << props.name << " with " << props.multiProcessorCount << " CUs"
@@ -71,10 +71,10 @@ bool hipPerfStreamCreateCopyDestroy::run(unsigned int testNumber) {
numStreams_ = totalStreams_[testNumber % TotalStreams];
size_t iter = Iterations / (numStreams_ * (static_cast<size_t>(1)
<< (testNumber / TotalBufs + 1)));
hipStream_t streams[numStreams_];
hipStream_t *streams = new hipStream_t[numStreams_];
numBuffers_ = totalBuffers_[testNumber / TotalBufs];
float* dSrc[numBuffers_];
float ** dSrc = new float*[numBuffers_];
size_t nBytes = BufSize * sizeof(float);
for (size_t b = 0; b < numBuffers_; ++b) {
@@ -120,6 +120,9 @@ bool hipPerfStreamCreateCopyDestroy::run(unsigned int testNumber) {
for (size_t b = 0; b < numBuffers_; ++b) {
HIP_CHECK(hipFree(dSrc[b]));
}
delete [] streams;
delete [] dSrc;
return true;
}