SWDEV-475568 - Fix compiling issues

Fix compiling issues of "make perf_test" under hip-tests. Change-Id: Ib03328a2fb13375fa44626a42202b1eeb177b8b2 [ROCm/hip-tests commit: a2f37dfa3a]
2024-07-24 16:10:27 -04:00
@@ -244,8 +244,7 @@ TEST_CASE("Perf_hipPerfDotProduct") {
  if (nGpu < 1) {
    HipTest::HIP_SKIP_TEST("Skipping because devices < 1");
  }
-  hipDeviceProp_t props = {0};
-  props = {0};
+  hipDeviceProp_t props;
  HIP_CHECK(hipSetDevice(p_gpuDevice));
  HIP_CHECK(hipGetDeviceProperties(&props, p_gpuDevice));
  int nx, ny, nz;
@@ -286,8 +285,8 @@ TEST_CASE("Perf_hipPerfDotProduct") {
  srand(time(NULL));

  for (int i = 0; i < size; ++i) {
-    hx[i] = 2.0 * static_cast<double>rand() / static_cast<double>RAND_MAX - 1.0;
-    hy[i] = 2.0 * static_cast<double>rand() / static_cast<double>RAND_MAX - 1.0;
+    hx[i] = 2.0 * static_cast<double>(rand()) / static_cast<double>(RAND_MAX) - 1.0;
+    hy[i] = 2.0 * static_cast<double>(rand()) / static_cast<double>(RAND_MAX) - 1.0;

    hresult_xy += hx[i] * hy[i];
    hresult_xx += hx[i] * hx[i];
@@ -47,7 +47,6 @@ static unsigned int numCoords = sizeof(coords) / sizeof(coordRec);
 template <typename T>
 __global__ void float_mad_kernel(uint *out, uint width, T xPos, T yPos,
                                   T xStep, T yStep, uint maxIter) {
-#pragma FP_CONTRACT ON
  int tid = (blockIdx.x * blockDim.x + threadIdx.x);
  int i = tid % width;
  int j = tid / width;
@@ -71,7 +70,6 @@ template <typename T>
 __global__ void float_mandel_unroll_kernel(uint *out, uint width, T xPos,
    T yPos, T xStep, T yStep, uint maxIter) {

-#pragma FP_CONTRACT ON
  int tid = (blockIdx.x * blockDim.x + threadIdx.x);
  int i = tid % width;
  int j = tid / width;
@@ -176,7 +174,6 @@ __global__ void float_mandel_unroll_kernel(uint *out, uint width, T xPos,
 template <typename T>
 __global__ void double_mad_kernel(uint *out, uint width, T xPos,  T yPos, T xStep, T yStep,
                                   uint maxIter) {
-#pragma FP_CONTRACT ON
  int tid = (blockIdx.x * blockDim.x + threadIdx.x);
  int i = tid % width;
  int j = tid / width;
@@ -199,7 +196,6 @@ __global__ void double_mad_kernel(uint *out, uint width, T xPos,  T yPos, T xSte
 template <typename T>
 __global__ void double_mandel_unroll_kernel(uint *out, uint width, T xPos,
                  T yPos, T xStep, T yStep, uint maxIter) {
-#pragma FP_CONTRACT ON
  int tid = (blockIdx.x * blockDim.x + threadIdx.x);

  int i = tid % width;
@@ -304,8 +300,6 @@ __global__ void double_mandel_unroll_kernel(uint *out, uint width, T xPos,
    out[tid] = (uint)ccount;
 };

-static const unsigned int FMA_EXPECTEDVALUES_INDEX = 15;
-
 // Expected results for each kernel run at each coord
 unsigned long long expectedIters[] = {
    203277748ull,  2147483648ull, 120254651ull,  203277748ull,  2147483648ull,
@@ -336,7 +330,7 @@ class hipPerfMandelBrot {
  }

  void open(int deviceID);
-  bool run(unsigned int testCase, unsigned int deviceId);
+  bool run(unsigned int testCase);
  void printResults(void);

  // array of funtion pointers
@@ -391,7 +385,7 @@ void hipPerfMandelBrot::open(int deviceId) {
    HipTest::HIP_SKIP_TEST("Skipping because devices < 1");
  }
  HIP_CHECK(hipSetDevice(deviceId));
-  hipDeviceProp_t props = {0};
+  hipDeviceProp_t props;
  HIP_CHECK(hipGetDeviceProperties(&props, deviceId));
  std::cout << "info: running on bus " << "0x" << props.pciBusID << " " << props.name
    << " with " << props.multiProcessorCount << " CUs" << " and device id: " << deviceId
@@ -401,7 +395,6 @@ void hipPerfMandelBrot::open(int deviceId) {
 }

 void hipPerfMandelBrot::printResults() {
-  int numkernels = getNumKernels();
  int numStreams = getNumStreams();

  std::cout << "\n" <<"Measured perf for kernels in GFLOPS on "
@@ -424,7 +417,7 @@ void hipPerfMandelBrot::float_mad(uint *out, uint width, float xPos,  float yPos
                                   int blocks, int threads_per_block, int kernelCnt) {
  int streamCnt = getNumStreams();
  hipLaunchKernelGGL(float_mad_kernel<float>, dim3(blocks), dim3(threads_per_block), 0,
-                      streams[kernelCnt % streamCnt], out, width_, xPos, yPos, xStep, yStep,
+                      streams[kernelCnt % streamCnt], out, width, xPos, yPos, xStep, yStep,
                      maxIter);
 }

@@ -433,7 +426,7 @@ void hipPerfMandelBrot::float_mandel_unroll(uint *out, uint width, float xPos,
                             int blocks, int threads_per_block, int kernelCnt) {
  int streamCnt = getNumStreams();
  hipLaunchKernelGGL(float_mandel_unroll_kernel<float>, dim3(blocks), dim3(threads_per_block), 0,
-                  streams[kernelCnt % streamCnt], out, width_, xPos, yPos, xStep, yStep, maxIter);
+                  streams[kernelCnt % streamCnt], out, width, xPos, yPos, xStep, yStep, maxIter);
 }

 void hipPerfMandelBrot::double_mad(uint *out, uint width, float xPos,  float yPos,
@@ -441,7 +434,7 @@ void hipPerfMandelBrot::double_mad(uint *out, uint width, float xPos,  float yPo
                               int blocks, int threads_per_block, int kernelCnt) {
  int streamCnt = getNumStreams();
  hipLaunchKernelGGL(double_mad_kernel<double>, dim3(blocks), dim3(threads_per_block), 0,
-                  streams[kernelCnt % streamCnt], out, width_, xPos, yPos, xStep, yStep, maxIter);
+                  streams[kernelCnt % streamCnt], out, width, xPos, yPos, xStep, yStep, maxIter);
 }

 void hipPerfMandelBrot::double_mandel_unroll(uint *out, uint width, float xPos,  float yPos,
@@ -449,10 +442,10 @@ void hipPerfMandelBrot::double_mandel_unroll(uint *out, uint width, float xPos,
                              int blocks, int threads_per_block, int kernelCnt) {
  int streamCnt = getNumStreams();
  hipLaunchKernelGGL(float_mandel_unroll_kernel<double>, dim3(blocks), dim3(threads_per_block), 0,
-                  streams[kernelCnt % streamCnt], out, width_, xPos, yPos, xStep, yStep, maxIter);
+                  streams[kernelCnt % streamCnt], out, width, xPos, yPos, xStep, yStep, maxIter);
 }

-bool hipPerfMandelBrot::run(unsigned int testCase,unsigned int deviceId) {
+bool hipPerfMandelBrot::run(unsigned int testCase) {
  unsigned int numStreams = getNumStreams();
  coordIdx = testCase % numCoords;

@@ -462,8 +455,8 @@ bool hipPerfMandelBrot::run(unsigned int testCase,unsigned int deviceId) {
  // Maximum iteration count
  maxIter = 32768;

-  uint * hPtr[numKernels];
-  uint * dPtr[numKernels];
+  uint ** hPtr = new uint *[numKernels];
+  uint ** dPtr = new uint *[numKernels];

  // Width is divisible by 4 because the mandelbrot kernel processes 4 pixels at once.
  width_ = 256;
@@ -487,11 +480,6 @@ bool hipPerfMandelBrot::run(unsigned int testCase,unsigned int deviceId) {
  int threads_per_block  = 64;
  int blocks = (threads/threads_per_block) + (threads % threads_per_block);

-  float xStep = static_cast<float>(coords[coordIdx].width / (double)width_);
-  float yStep = static_cast<float>(-coords[coordIdx].width / (double)width_);
-  float xPos = static_cast<float>(coords[coordIdx].x - 0.5 * coords[coordIdx].width);
-  float yPos = static_cast<float>(coords[coordIdx].y + 0.5 * coords[coordIdx].width);
-
  // Copy memory asynchronously and concurrently from host to device
  for (uint i = 0; i < numKernels; i++) {
    HIP_CHECK(hipMemcpy(dPtr[i], hPtr[i], bufSize, hipMemcpyHostToDevice));
@@ -593,6 +581,8 @@ bool hipPerfMandelBrot::run(unsigned int testCase,unsigned int deviceId) {
    HIP_CHECK(hipHostFree(hPtr[i]));
    HIP_CHECK(hipFree(dPtr[i]));
  }
+  delete [] hPtr;
+  delete [] dPtr;
  return true;
 }

@@ -632,7 +622,7 @@ TEST_CASE("Perf_hipPerfMandelbrot") {
  SECTION("warm-up kernel default stream executes serially") {
    mandelbrotCompute.setNumStreams(1);
    mandelbrotCompute.setNumKernels(1);
-    REQUIRE(true == mandelbrotCompute.run(100/*Random number*/, deviceId));
+    REQUIRE(true == mandelbrotCompute.run(100/*Random number*/));
  }
  #endif
  SECTION("run all - sync") {
@@ -640,7 +630,7 @@ TEST_CASE("Perf_hipPerfMandelbrot") {
    do {
      mandelbrotCompute.setNumStreams(1);
      mandelbrotCompute.setNumKernels(1);
-      REQUIRE(true == mandelbrotCompute.run(i, deviceId));
+      REQUIRE(true == mandelbrotCompute.run(i));
      i++;
    }while(i < 12);
    mandelbrotCompute.printResults();
@@ -651,7 +641,7 @@ TEST_CASE("Perf_hipPerfMandelbrot") {
    do {
      mandelbrotCompute.setNumStreams(2);
      mandelbrotCompute.setNumKernels(2);
-      REQUIRE(true == mandelbrotCompute.run(i, deviceId));
+      REQUIRE(true == mandelbrotCompute.run(i));
      i++;
    }while(i < 12);
    mandelbrotCompute.printResults();
@@ -89,7 +89,7 @@ TEST_CASE("Perf_hipPerfDispatchSpeed") {
  int p_gpuDevice = 0;
  int p_tests = -1;
  hipError_t err = hipSuccess;
-  hipDeviceProp_t props = {0};
+  hipDeviceProp_t props;
  HIP_CHECK(hipGetDeviceProperties(&props, p_gpuDevice));

  unsigned int testListSize = sizeof(testList) / sizeof(testStruct);
@@ -185,8 +185,8 @@ void hipPerfSampleRate::run(unsigned int test) {
  void ** dPtr;
  void *  hOutPtr;
  void *  dOutPtr;
-  void *  hInPtr[numBufs_];
-  void *  dInPtr[numBufs_];
+  void ** hInPtr = new void *[numBufs_];
+  void ** dInPtr = new void *[numBufs_];

  outBufSize_ =
      sizes[NUM_SIZES - 1] * sizes[NUM_SIZES - 1] * typeSizes[NUM_TYPES - 1];
@@ -265,6 +265,8 @@ void hipPerfSampleRate::run(unsigned int test) {
  }
  HIP_CHECK(hipHostFree(hOutPtr));
  HIP_CHECK(hipFree(dPtr));
+  delete [] hInPtr;
+  delete [] dInPtr;
 }


@@ -105,12 +105,12 @@ void hipPerfDeviceConcurrency::close() {

 bool hipPerfDeviceConcurrency::run(unsigned int testCase, int numGpus) {
  static int deviceId;
-  uint * hPtr[numGpus];
-  uint * dPtr[numGpus];
-  hipStream_t streams[numGpus];
-  int numCUs[numGpus];
-  unsigned int maxIter[numGpus];
-  unsigned long long expectedIters[numGpus];
+  uint ** hPtr = new uint*[numGpus];
+  uint ** dPtr = new uint*[numGpus];
+  hipStream_t * streams = new hipStream_t[numGpus];
+  int *numCUs = new int[numGpus];
+  unsigned int *maxIter = new unsigned int[numGpus];
+  unsigned long long *expectedIters = new unsigned long long[numGpus];

  int threads, threads_per_block, blocks;
  float xStep, yStep, xPos, yPos;
@@ -121,7 +121,7 @@ bool hipPerfDeviceConcurrency::run(unsigned int testCase, int numGpus) {
    }

    HIP_CHECK(hipSetDevice(deviceId));
-    hipDeviceProp_t props = {0};
+    hipDeviceProp_t props;
    HIP_CHECK(hipGetDeviceProperties(&props, i));
    if (testCase != 0) {
    std::cout << "info: running on bus " << "0x" << props.pciBusID
@@ -220,6 +220,12 @@ bool hipPerfDeviceConcurrency::run(unsigned int testCase, int numGpus) {
  if (testCase == 0) {
    deviceId++;
  }
+  delete [] hPtr;
+  delete [] dPtr;
+  delete [] streams;
+  delete [] numCUs;
+  delete [] maxIter;
+  delete [] expectedIters;
  return true;
 }

@@ -225,7 +225,7 @@ bool hipPerfStreamConcurrency::open(int deviceId) {
  }

  HIP_CHECK(hipSetDevice(deviceId));
-  hipDeviceProp_t props = {0};
+  hipDeviceProp_t props;
  HIP_CHECK(hipGetDeviceProperties(&props, deviceId));
  std::cout << "info: running on bus " << "0x" << props.pciBusID
    << " " << props.name << " with " << props.multiProcessorCount << " CUs"
@@ -250,12 +250,12 @@ bool hipPerfStreamConcurrency::run(unsigned int testCase,

  // Maximum iteration count
  // maxIter = 8388608 * (engine_clock / 1000).serial execution
-  maxIter = (unsigned int)(((8388608 * (static_cast<float>clkFrequency / 1000))
+  maxIter = (unsigned int)(((8388608 * (static_cast<float>(clkFrequency) / 1000))
                                                        * numCUs) / 128);
  maxIter = (maxIter + 15) & ~15;
-  hipStream_t streams[numStreams];
-  uint * hPtr[numKernels];
-  uint * dPtr[numKernels];
+  hipStream_t *streams = new hipStream_t[numStreams];
+  uint ** hPtr = new uint*[numKernels];
+  uint ** dPtr = new uint*[numKernels];

  // Width is divisible by 4 because the mandelbrot kernel
  // processes 4 pixels at once.
@@ -327,9 +327,6 @@ bool hipPerfStreamConcurrency::run(unsigned int testCase,
  << numStreams <<" stream (s): " << all_kernel_time.count() << std::endl;
  }

-  unsigned long long expected =
-    (unsigned long long)width_ * (unsigned long long)maxIter;
-
  for (uint i = 0 ; i < numStreams; i++) {
    HIP_CHECK(hipStreamDestroy(streams[i]));
  }
@@ -339,6 +336,10 @@ bool hipPerfStreamConcurrency::run(unsigned int testCase,
    HIP_CHECK(hipHostFree(hPtr[i]));
    HIP_CHECK(hipFree(dPtr[i]));
  }
+
+  delete [] streams;
+  delete [] hPtr;
+  delete [] dPtr;
  return true;
 }

@@ -59,7 +59,7 @@ bool hipPerfStreamCreateCopyDestroy::open(int deviceId) {
    HipTest::HIP_SKIP_TEST("Skipping because devices < 1");
  }
  HIP_CHECK(hipSetDevice(deviceId));
-  hipDeviceProp_t props = {0};
+  hipDeviceProp_t props;
  HIP_CHECK(hipGetDeviceProperties(&props, deviceId));
  std::cout << "info: running on bus " << "0x" << props.pciBusID
  << " " << props.name << " with " << props.multiProcessorCount << " CUs"
@@ -71,10 +71,10 @@ bool hipPerfStreamCreateCopyDestroy::run(unsigned int testNumber) {
  numStreams_ = totalStreams_[testNumber % TotalStreams];
  size_t iter = Iterations / (numStreams_ * (static_cast<size_t>(1)
                 << (testNumber / TotalBufs + 1)));
-  hipStream_t streams[numStreams_];
+  hipStream_t *streams = new hipStream_t[numStreams_];

  numBuffers_ = totalBuffers_[testNumber / TotalBufs];
-  float* dSrc[numBuffers_];
+  float ** dSrc = new float*[numBuffers_];
  size_t nBytes = BufSize * sizeof(float);

  for (size_t b = 0; b < numBuffers_; ++b) {
@@ -120,6 +120,9 @@ bool hipPerfStreamCreateCopyDestroy::run(unsigned int testNumber) {
  for (size_t b = 0; b < numBuffers_; ++b) {
    HIP_CHECK(hipFree(dSrc[b]));
  }
+
+  delete [] streams;
+  delete [] dSrc;
  return true;
 }