diff --git a/projects/hip-tests/catch/perftests/compute/hipPerfDotProduct.cc b/projects/hip-tests/catch/perftests/compute/hipPerfDotProduct.cc index f29f605478..aad97f0630 100644 --- a/projects/hip-tests/catch/perftests/compute/hipPerfDotProduct.cc +++ b/projects/hip-tests/catch/perftests/compute/hipPerfDotProduct.cc @@ -244,8 +244,7 @@ TEST_CASE("Perf_hipPerfDotProduct") { if (nGpu < 1) { HipTest::HIP_SKIP_TEST("Skipping because devices < 1"); } - hipDeviceProp_t props = {0}; - props = {0}; + hipDeviceProp_t props; HIP_CHECK(hipSetDevice(p_gpuDevice)); HIP_CHECK(hipGetDeviceProperties(&props, p_gpuDevice)); int nx, ny, nz; @@ -286,8 +285,8 @@ TEST_CASE("Perf_hipPerfDotProduct") { srand(time(NULL)); for (int i = 0; i < size; ++i) { - hx[i] = 2.0 * static_castrand() / static_castRAND_MAX - 1.0; - hy[i] = 2.0 * static_castrand() / static_castRAND_MAX - 1.0; + hx[i] = 2.0 * static_cast(rand()) / static_cast(RAND_MAX) - 1.0; + hy[i] = 2.0 * static_cast(rand()) / static_cast(RAND_MAX) - 1.0; hresult_xy += hx[i] * hy[i]; hresult_xx += hx[i] * hx[i]; diff --git a/projects/hip-tests/catch/perftests/compute/hipPerfMandelbrot.cc b/projects/hip-tests/catch/perftests/compute/hipPerfMandelbrot.cc index 53e4f750d7..a500b7df3e 100644 --- a/projects/hip-tests/catch/perftests/compute/hipPerfMandelbrot.cc +++ b/projects/hip-tests/catch/perftests/compute/hipPerfMandelbrot.cc @@ -47,7 +47,6 @@ static unsigned int numCoords = sizeof(coords) / sizeof(coordRec); template __global__ void float_mad_kernel(uint *out, uint width, T xPos, T yPos, T xStep, T yStep, uint maxIter) { -#pragma FP_CONTRACT ON int tid = (blockIdx.x * blockDim.x + threadIdx.x); int i = tid % width; int j = tid / width; @@ -71,7 +70,6 @@ template __global__ void float_mandel_unroll_kernel(uint *out, uint width, T xPos, T yPos, T xStep, T yStep, uint maxIter) { -#pragma FP_CONTRACT ON int tid = (blockIdx.x * blockDim.x + threadIdx.x); int i = tid % width; int j = tid / width; @@ -176,7 +174,6 @@ __global__ void float_mandel_unroll_kernel(uint *out, uint width, T xPos, template __global__ void double_mad_kernel(uint *out, uint width, T xPos, T yPos, T xStep, T yStep, uint maxIter) { -#pragma FP_CONTRACT ON int tid = (blockIdx.x * blockDim.x + threadIdx.x); int i = tid % width; int j = tid / width; @@ -199,7 +196,6 @@ __global__ void double_mad_kernel(uint *out, uint width, T xPos, T yPos, T xSte template __global__ void double_mandel_unroll_kernel(uint *out, uint width, T xPos, T yPos, T xStep, T yStep, uint maxIter) { -#pragma FP_CONTRACT ON int tid = (blockIdx.x * blockDim.x + threadIdx.x); int i = tid % width; @@ -304,8 +300,6 @@ __global__ void double_mandel_unroll_kernel(uint *out, uint width, T xPos, out[tid] = (uint)ccount; }; -static const unsigned int FMA_EXPECTEDVALUES_INDEX = 15; - // Expected results for each kernel run at each coord unsigned long long expectedIters[] = { 203277748ull, 2147483648ull, 120254651ull, 203277748ull, 2147483648ull, @@ -336,7 +330,7 @@ class hipPerfMandelBrot { } void open(int deviceID); - bool run(unsigned int testCase, unsigned int deviceId); + bool run(unsigned int testCase); void printResults(void); // array of funtion pointers @@ -391,7 +385,7 @@ void hipPerfMandelBrot::open(int deviceId) { HipTest::HIP_SKIP_TEST("Skipping because devices < 1"); } HIP_CHECK(hipSetDevice(deviceId)); - hipDeviceProp_t props = {0}; + hipDeviceProp_t props; HIP_CHECK(hipGetDeviceProperties(&props, deviceId)); std::cout << "info: running on bus " << "0x" << props.pciBusID << " " << props.name << " with " << props.multiProcessorCount << " CUs" << " and device id: " << deviceId @@ -401,7 +395,6 @@ void hipPerfMandelBrot::open(int deviceId) { } void hipPerfMandelBrot::printResults() { - int numkernels = getNumKernels(); int numStreams = getNumStreams(); std::cout << "\n" <<"Measured perf for kernels in GFLOPS on " @@ -424,7 +417,7 @@ void hipPerfMandelBrot::float_mad(uint *out, uint width, float xPos, float yPos int blocks, int threads_per_block, int kernelCnt) { int streamCnt = getNumStreams(); hipLaunchKernelGGL(float_mad_kernel, dim3(blocks), dim3(threads_per_block), 0, - streams[kernelCnt % streamCnt], out, width_, xPos, yPos, xStep, yStep, + streams[kernelCnt % streamCnt], out, width, xPos, yPos, xStep, yStep, maxIter); } @@ -433,7 +426,7 @@ void hipPerfMandelBrot::float_mandel_unroll(uint *out, uint width, float xPos, int blocks, int threads_per_block, int kernelCnt) { int streamCnt = getNumStreams(); hipLaunchKernelGGL(float_mandel_unroll_kernel, dim3(blocks), dim3(threads_per_block), 0, - streams[kernelCnt % streamCnt], out, width_, xPos, yPos, xStep, yStep, maxIter); + streams[kernelCnt % streamCnt], out, width, xPos, yPos, xStep, yStep, maxIter); } void hipPerfMandelBrot::double_mad(uint *out, uint width, float xPos, float yPos, @@ -441,7 +434,7 @@ void hipPerfMandelBrot::double_mad(uint *out, uint width, float xPos, float yPo int blocks, int threads_per_block, int kernelCnt) { int streamCnt = getNumStreams(); hipLaunchKernelGGL(double_mad_kernel, dim3(blocks), dim3(threads_per_block), 0, - streams[kernelCnt % streamCnt], out, width_, xPos, yPos, xStep, yStep, maxIter); + streams[kernelCnt % streamCnt], out, width, xPos, yPos, xStep, yStep, maxIter); } void hipPerfMandelBrot::double_mandel_unroll(uint *out, uint width, float xPos, float yPos, @@ -449,10 +442,10 @@ void hipPerfMandelBrot::double_mandel_unroll(uint *out, uint width, float xPos, int blocks, int threads_per_block, int kernelCnt) { int streamCnt = getNumStreams(); hipLaunchKernelGGL(float_mandel_unroll_kernel, dim3(blocks), dim3(threads_per_block), 0, - streams[kernelCnt % streamCnt], out, width_, xPos, yPos, xStep, yStep, maxIter); + streams[kernelCnt % streamCnt], out, width, xPos, yPos, xStep, yStep, maxIter); } -bool hipPerfMandelBrot::run(unsigned int testCase,unsigned int deviceId) { +bool hipPerfMandelBrot::run(unsigned int testCase) { unsigned int numStreams = getNumStreams(); coordIdx = testCase % numCoords; @@ -462,8 +455,8 @@ bool hipPerfMandelBrot::run(unsigned int testCase,unsigned int deviceId) { // Maximum iteration count maxIter = 32768; - uint * hPtr[numKernels]; - uint * dPtr[numKernels]; + uint ** hPtr = new uint *[numKernels]; + uint ** dPtr = new uint *[numKernels]; // Width is divisible by 4 because the mandelbrot kernel processes 4 pixels at once. width_ = 256; @@ -487,11 +480,6 @@ bool hipPerfMandelBrot::run(unsigned int testCase,unsigned int deviceId) { int threads_per_block = 64; int blocks = (threads/threads_per_block) + (threads % threads_per_block); - float xStep = static_cast(coords[coordIdx].width / (double)width_); - float yStep = static_cast(-coords[coordIdx].width / (double)width_); - float xPos = static_cast(coords[coordIdx].x - 0.5 * coords[coordIdx].width); - float yPos = static_cast(coords[coordIdx].y + 0.5 * coords[coordIdx].width); - // Copy memory asynchronously and concurrently from host to device for (uint i = 0; i < numKernels; i++) { HIP_CHECK(hipMemcpy(dPtr[i], hPtr[i], bufSize, hipMemcpyHostToDevice)); @@ -593,6 +581,8 @@ bool hipPerfMandelBrot::run(unsigned int testCase,unsigned int deviceId) { HIP_CHECK(hipHostFree(hPtr[i])); HIP_CHECK(hipFree(dPtr[i])); } + delete [] hPtr; + delete [] dPtr; return true; } @@ -632,7 +622,7 @@ TEST_CASE("Perf_hipPerfMandelbrot") { SECTION("warm-up kernel default stream executes serially") { mandelbrotCompute.setNumStreams(1); mandelbrotCompute.setNumKernels(1); - REQUIRE(true == mandelbrotCompute.run(100/*Random number*/, deviceId)); + REQUIRE(true == mandelbrotCompute.run(100/*Random number*/)); } #endif SECTION("run all - sync") { @@ -640,7 +630,7 @@ TEST_CASE("Perf_hipPerfMandelbrot") { do { mandelbrotCompute.setNumStreams(1); mandelbrotCompute.setNumKernels(1); - REQUIRE(true == mandelbrotCompute.run(i, deviceId)); + REQUIRE(true == mandelbrotCompute.run(i)); i++; }while(i < 12); mandelbrotCompute.printResults(); @@ -651,7 +641,7 @@ TEST_CASE("Perf_hipPerfMandelbrot") { do { mandelbrotCompute.setNumStreams(2); mandelbrotCompute.setNumKernels(2); - REQUIRE(true == mandelbrotCompute.run(i, deviceId)); + REQUIRE(true == mandelbrotCompute.run(i)); i++; }while(i < 12); mandelbrotCompute.printResults(); diff --git a/projects/hip-tests/catch/perftests/dispatch/hipPerfDispatchSpeed.cc b/projects/hip-tests/catch/perftests/dispatch/hipPerfDispatchSpeed.cc index 671cb9a1f0..7f6c25e388 100644 --- a/projects/hip-tests/catch/perftests/dispatch/hipPerfDispatchSpeed.cc +++ b/projects/hip-tests/catch/perftests/dispatch/hipPerfDispatchSpeed.cc @@ -89,7 +89,7 @@ TEST_CASE("Perf_hipPerfDispatchSpeed") { int p_gpuDevice = 0; int p_tests = -1; hipError_t err = hipSuccess; - hipDeviceProp_t props = {0}; + hipDeviceProp_t props; HIP_CHECK(hipGetDeviceProperties(&props, p_gpuDevice)); unsigned int testListSize = sizeof(testList) / sizeof(testStruct); diff --git a/projects/hip-tests/catch/perftests/memory/hipPerfSampleRate.cc b/projects/hip-tests/catch/perftests/memory/hipPerfSampleRate.cc index c0cba90b87..9a083fa3ec 100644 --- a/projects/hip-tests/catch/perftests/memory/hipPerfSampleRate.cc +++ b/projects/hip-tests/catch/perftests/memory/hipPerfSampleRate.cc @@ -185,8 +185,8 @@ void hipPerfSampleRate::run(unsigned int test) { void ** dPtr; void * hOutPtr; void * dOutPtr; - void * hInPtr[numBufs_]; - void * dInPtr[numBufs_]; + void ** hInPtr = new void *[numBufs_]; + void ** dInPtr = new void *[numBufs_]; outBufSize_ = sizes[NUM_SIZES - 1] * sizes[NUM_SIZES - 1] * typeSizes[NUM_TYPES - 1]; @@ -265,6 +265,8 @@ void hipPerfSampleRate::run(unsigned int test) { } HIP_CHECK(hipHostFree(hOutPtr)); HIP_CHECK(hipFree(dPtr)); + delete [] hInPtr; + delete [] dInPtr; } diff --git a/projects/hip-tests/catch/perftests/stream/hipPerfDeviceConcurrency.cc b/projects/hip-tests/catch/perftests/stream/hipPerfDeviceConcurrency.cc index 5e54f5eaab..c0ea3e4638 100644 --- a/projects/hip-tests/catch/perftests/stream/hipPerfDeviceConcurrency.cc +++ b/projects/hip-tests/catch/perftests/stream/hipPerfDeviceConcurrency.cc @@ -105,12 +105,12 @@ void hipPerfDeviceConcurrency::close() { bool hipPerfDeviceConcurrency::run(unsigned int testCase, int numGpus) { static int deviceId; - uint * hPtr[numGpus]; - uint * dPtr[numGpus]; - hipStream_t streams[numGpus]; - int numCUs[numGpus]; - unsigned int maxIter[numGpus]; - unsigned long long expectedIters[numGpus]; + uint ** hPtr = new uint*[numGpus]; + uint ** dPtr = new uint*[numGpus]; + hipStream_t * streams = new hipStream_t[numGpus]; + int *numCUs = new int[numGpus]; + unsigned int *maxIter = new unsigned int[numGpus]; + unsigned long long *expectedIters = new unsigned long long[numGpus]; int threads, threads_per_block, blocks; float xStep, yStep, xPos, yPos; @@ -121,7 +121,7 @@ bool hipPerfDeviceConcurrency::run(unsigned int testCase, int numGpus) { } HIP_CHECK(hipSetDevice(deviceId)); - hipDeviceProp_t props = {0}; + hipDeviceProp_t props; HIP_CHECK(hipGetDeviceProperties(&props, i)); if (testCase != 0) { std::cout << "info: running on bus " << "0x" << props.pciBusID @@ -220,6 +220,12 @@ bool hipPerfDeviceConcurrency::run(unsigned int testCase, int numGpus) { if (testCase == 0) { deviceId++; } + delete [] hPtr; + delete [] dPtr; + delete [] streams; + delete [] numCUs; + delete [] maxIter; + delete [] expectedIters; return true; } diff --git a/projects/hip-tests/catch/perftests/stream/hipPerfStreamConcurrency.cc b/projects/hip-tests/catch/perftests/stream/hipPerfStreamConcurrency.cc index 1fcbd9fde8..2c86c27503 100644 --- a/projects/hip-tests/catch/perftests/stream/hipPerfStreamConcurrency.cc +++ b/projects/hip-tests/catch/perftests/stream/hipPerfStreamConcurrency.cc @@ -225,7 +225,7 @@ bool hipPerfStreamConcurrency::open(int deviceId) { } HIP_CHECK(hipSetDevice(deviceId)); - hipDeviceProp_t props = {0}; + hipDeviceProp_t props; HIP_CHECK(hipGetDeviceProperties(&props, deviceId)); std::cout << "info: running on bus " << "0x" << props.pciBusID << " " << props.name << " with " << props.multiProcessorCount << " CUs" @@ -250,12 +250,12 @@ bool hipPerfStreamConcurrency::run(unsigned int testCase, // Maximum iteration count // maxIter = 8388608 * (engine_clock / 1000).serial execution - maxIter = (unsigned int)(((8388608 * (static_castclkFrequency / 1000)) + maxIter = (unsigned int)(((8388608 * (static_cast(clkFrequency) / 1000)) * numCUs) / 128); maxIter = (maxIter + 15) & ~15; - hipStream_t streams[numStreams]; - uint * hPtr[numKernels]; - uint * dPtr[numKernels]; + hipStream_t *streams = new hipStream_t[numStreams]; + uint ** hPtr = new uint*[numKernels]; + uint ** dPtr = new uint*[numKernels]; // Width is divisible by 4 because the mandelbrot kernel // processes 4 pixels at once. @@ -327,9 +327,6 @@ bool hipPerfStreamConcurrency::run(unsigned int testCase, << numStreams <<" stream (s): " << all_kernel_time.count() << std::endl; } - unsigned long long expected = - (unsigned long long)width_ * (unsigned long long)maxIter; - for (uint i = 0 ; i < numStreams; i++) { HIP_CHECK(hipStreamDestroy(streams[i])); } @@ -339,6 +336,10 @@ bool hipPerfStreamConcurrency::run(unsigned int testCase, HIP_CHECK(hipHostFree(hPtr[i])); HIP_CHECK(hipFree(dPtr[i])); } + + delete [] streams; + delete [] hPtr; + delete [] dPtr; return true; } diff --git a/projects/hip-tests/catch/perftests/stream/hipPerfStreamCreateCopyDestroy.cc b/projects/hip-tests/catch/perftests/stream/hipPerfStreamCreateCopyDestroy.cc index ff8b1d3b67..edbe4c0046 100644 --- a/projects/hip-tests/catch/perftests/stream/hipPerfStreamCreateCopyDestroy.cc +++ b/projects/hip-tests/catch/perftests/stream/hipPerfStreamCreateCopyDestroy.cc @@ -59,7 +59,7 @@ bool hipPerfStreamCreateCopyDestroy::open(int deviceId) { HipTest::HIP_SKIP_TEST("Skipping because devices < 1"); } HIP_CHECK(hipSetDevice(deviceId)); - hipDeviceProp_t props = {0}; + hipDeviceProp_t props; HIP_CHECK(hipGetDeviceProperties(&props, deviceId)); std::cout << "info: running on bus " << "0x" << props.pciBusID << " " << props.name << " with " << props.multiProcessorCount << " CUs" @@ -71,10 +71,10 @@ bool hipPerfStreamCreateCopyDestroy::run(unsigned int testNumber) { numStreams_ = totalStreams_[testNumber % TotalStreams]; size_t iter = Iterations / (numStreams_ * (static_cast(1) << (testNumber / TotalBufs + 1))); - hipStream_t streams[numStreams_]; + hipStream_t *streams = new hipStream_t[numStreams_]; numBuffers_ = totalBuffers_[testNumber / TotalBufs]; - float* dSrc[numBuffers_]; + float ** dSrc = new float*[numBuffers_]; size_t nBytes = BufSize * sizeof(float); for (size_t b = 0; b < numBuffers_; ++b) { @@ -120,6 +120,9 @@ bool hipPerfStreamCreateCopyDestroy::run(unsigned int testNumber) { for (size_t b = 0; b < numBuffers_; ++b) { HIP_CHECK(hipFree(dSrc[b])); } + + delete [] streams; + delete [] dSrc; return true; }