SWDEV-475568 - Fix compiling issues
Fix compiling issues of "make perf_test" under
hip-tests.
Change-Id: Ib03328a2fb13375fa44626a42202b1eeb177b8b2
[ROCm/hip-tests commit: a2f37dfa3a]
Bu işleme şunda yer alıyor:
@@ -244,8 +244,7 @@ TEST_CASE("Perf_hipPerfDotProduct") {
|
||||
if (nGpu < 1) {
|
||||
HipTest::HIP_SKIP_TEST("Skipping because devices < 1");
|
||||
}
|
||||
hipDeviceProp_t props = {0};
|
||||
props = {0};
|
||||
hipDeviceProp_t props;
|
||||
HIP_CHECK(hipSetDevice(p_gpuDevice));
|
||||
HIP_CHECK(hipGetDeviceProperties(&props, p_gpuDevice));
|
||||
int nx, ny, nz;
|
||||
@@ -286,8 +285,8 @@ TEST_CASE("Perf_hipPerfDotProduct") {
|
||||
srand(time(NULL));
|
||||
|
||||
for (int i = 0; i < size; ++i) {
|
||||
hx[i] = 2.0 * static_cast<double>rand() / static_cast<double>RAND_MAX - 1.0;
|
||||
hy[i] = 2.0 * static_cast<double>rand() / static_cast<double>RAND_MAX - 1.0;
|
||||
hx[i] = 2.0 * static_cast<double>(rand()) / static_cast<double>(RAND_MAX) - 1.0;
|
||||
hy[i] = 2.0 * static_cast<double>(rand()) / static_cast<double>(RAND_MAX) - 1.0;
|
||||
|
||||
hresult_xy += hx[i] * hy[i];
|
||||
hresult_xx += hx[i] * hx[i];
|
||||
|
||||
@@ -47,7 +47,6 @@ static unsigned int numCoords = sizeof(coords) / sizeof(coordRec);
|
||||
template <typename T>
|
||||
__global__ void float_mad_kernel(uint *out, uint width, T xPos, T yPos,
|
||||
T xStep, T yStep, uint maxIter) {
|
||||
#pragma FP_CONTRACT ON
|
||||
int tid = (blockIdx.x * blockDim.x + threadIdx.x);
|
||||
int i = tid % width;
|
||||
int j = tid / width;
|
||||
@@ -71,7 +70,6 @@ template <typename T>
|
||||
__global__ void float_mandel_unroll_kernel(uint *out, uint width, T xPos,
|
||||
T yPos, T xStep, T yStep, uint maxIter) {
|
||||
|
||||
#pragma FP_CONTRACT ON
|
||||
int tid = (blockIdx.x * blockDim.x + threadIdx.x);
|
||||
int i = tid % width;
|
||||
int j = tid / width;
|
||||
@@ -176,7 +174,6 @@ __global__ void float_mandel_unroll_kernel(uint *out, uint width, T xPos,
|
||||
template <typename T>
|
||||
__global__ void double_mad_kernel(uint *out, uint width, T xPos, T yPos, T xStep, T yStep,
|
||||
uint maxIter) {
|
||||
#pragma FP_CONTRACT ON
|
||||
int tid = (blockIdx.x * blockDim.x + threadIdx.x);
|
||||
int i = tid % width;
|
||||
int j = tid / width;
|
||||
@@ -199,7 +196,6 @@ __global__ void double_mad_kernel(uint *out, uint width, T xPos, T yPos, T xSte
|
||||
template <typename T>
|
||||
__global__ void double_mandel_unroll_kernel(uint *out, uint width, T xPos,
|
||||
T yPos, T xStep, T yStep, uint maxIter) {
|
||||
#pragma FP_CONTRACT ON
|
||||
int tid = (blockIdx.x * blockDim.x + threadIdx.x);
|
||||
|
||||
int i = tid % width;
|
||||
@@ -304,8 +300,6 @@ __global__ void double_mandel_unroll_kernel(uint *out, uint width, T xPos,
|
||||
out[tid] = (uint)ccount;
|
||||
};
|
||||
|
||||
static const unsigned int FMA_EXPECTEDVALUES_INDEX = 15;
|
||||
|
||||
// Expected results for each kernel run at each coord
|
||||
unsigned long long expectedIters[] = {
|
||||
203277748ull, 2147483648ull, 120254651ull, 203277748ull, 2147483648ull,
|
||||
@@ -336,7 +330,7 @@ class hipPerfMandelBrot {
|
||||
}
|
||||
|
||||
void open(int deviceID);
|
||||
bool run(unsigned int testCase, unsigned int deviceId);
|
||||
bool run(unsigned int testCase);
|
||||
void printResults(void);
|
||||
|
||||
// array of funtion pointers
|
||||
@@ -391,7 +385,7 @@ void hipPerfMandelBrot::open(int deviceId) {
|
||||
HipTest::HIP_SKIP_TEST("Skipping because devices < 1");
|
||||
}
|
||||
HIP_CHECK(hipSetDevice(deviceId));
|
||||
hipDeviceProp_t props = {0};
|
||||
hipDeviceProp_t props;
|
||||
HIP_CHECK(hipGetDeviceProperties(&props, deviceId));
|
||||
std::cout << "info: running on bus " << "0x" << props.pciBusID << " " << props.name
|
||||
<< " with " << props.multiProcessorCount << " CUs" << " and device id: " << deviceId
|
||||
@@ -401,7 +395,6 @@ void hipPerfMandelBrot::open(int deviceId) {
|
||||
}
|
||||
|
||||
void hipPerfMandelBrot::printResults() {
|
||||
int numkernels = getNumKernels();
|
||||
int numStreams = getNumStreams();
|
||||
|
||||
std::cout << "\n" <<"Measured perf for kernels in GFLOPS on "
|
||||
@@ -424,7 +417,7 @@ void hipPerfMandelBrot::float_mad(uint *out, uint width, float xPos, float yPos
|
||||
int blocks, int threads_per_block, int kernelCnt) {
|
||||
int streamCnt = getNumStreams();
|
||||
hipLaunchKernelGGL(float_mad_kernel<float>, dim3(blocks), dim3(threads_per_block), 0,
|
||||
streams[kernelCnt % streamCnt], out, width_, xPos, yPos, xStep, yStep,
|
||||
streams[kernelCnt % streamCnt], out, width, xPos, yPos, xStep, yStep,
|
||||
maxIter);
|
||||
}
|
||||
|
||||
@@ -433,7 +426,7 @@ void hipPerfMandelBrot::float_mandel_unroll(uint *out, uint width, float xPos,
|
||||
int blocks, int threads_per_block, int kernelCnt) {
|
||||
int streamCnt = getNumStreams();
|
||||
hipLaunchKernelGGL(float_mandel_unroll_kernel<float>, dim3(blocks), dim3(threads_per_block), 0,
|
||||
streams[kernelCnt % streamCnt], out, width_, xPos, yPos, xStep, yStep, maxIter);
|
||||
streams[kernelCnt % streamCnt], out, width, xPos, yPos, xStep, yStep, maxIter);
|
||||
}
|
||||
|
||||
void hipPerfMandelBrot::double_mad(uint *out, uint width, float xPos, float yPos,
|
||||
@@ -441,7 +434,7 @@ void hipPerfMandelBrot::double_mad(uint *out, uint width, float xPos, float yPo
|
||||
int blocks, int threads_per_block, int kernelCnt) {
|
||||
int streamCnt = getNumStreams();
|
||||
hipLaunchKernelGGL(double_mad_kernel<double>, dim3(blocks), dim3(threads_per_block), 0,
|
||||
streams[kernelCnt % streamCnt], out, width_, xPos, yPos, xStep, yStep, maxIter);
|
||||
streams[kernelCnt % streamCnt], out, width, xPos, yPos, xStep, yStep, maxIter);
|
||||
}
|
||||
|
||||
void hipPerfMandelBrot::double_mandel_unroll(uint *out, uint width, float xPos, float yPos,
|
||||
@@ -449,10 +442,10 @@ void hipPerfMandelBrot::double_mandel_unroll(uint *out, uint width, float xPos,
|
||||
int blocks, int threads_per_block, int kernelCnt) {
|
||||
int streamCnt = getNumStreams();
|
||||
hipLaunchKernelGGL(float_mandel_unroll_kernel<double>, dim3(blocks), dim3(threads_per_block), 0,
|
||||
streams[kernelCnt % streamCnt], out, width_, xPos, yPos, xStep, yStep, maxIter);
|
||||
streams[kernelCnt % streamCnt], out, width, xPos, yPos, xStep, yStep, maxIter);
|
||||
}
|
||||
|
||||
bool hipPerfMandelBrot::run(unsigned int testCase,unsigned int deviceId) {
|
||||
bool hipPerfMandelBrot::run(unsigned int testCase) {
|
||||
unsigned int numStreams = getNumStreams();
|
||||
coordIdx = testCase % numCoords;
|
||||
|
||||
@@ -462,8 +455,8 @@ bool hipPerfMandelBrot::run(unsigned int testCase,unsigned int deviceId) {
|
||||
// Maximum iteration count
|
||||
maxIter = 32768;
|
||||
|
||||
uint * hPtr[numKernels];
|
||||
uint * dPtr[numKernels];
|
||||
uint ** hPtr = new uint *[numKernels];
|
||||
uint ** dPtr = new uint *[numKernels];
|
||||
|
||||
// Width is divisible by 4 because the mandelbrot kernel processes 4 pixels at once.
|
||||
width_ = 256;
|
||||
@@ -487,11 +480,6 @@ bool hipPerfMandelBrot::run(unsigned int testCase,unsigned int deviceId) {
|
||||
int threads_per_block = 64;
|
||||
int blocks = (threads/threads_per_block) + (threads % threads_per_block);
|
||||
|
||||
float xStep = static_cast<float>(coords[coordIdx].width / (double)width_);
|
||||
float yStep = static_cast<float>(-coords[coordIdx].width / (double)width_);
|
||||
float xPos = static_cast<float>(coords[coordIdx].x - 0.5 * coords[coordIdx].width);
|
||||
float yPos = static_cast<float>(coords[coordIdx].y + 0.5 * coords[coordIdx].width);
|
||||
|
||||
// Copy memory asynchronously and concurrently from host to device
|
||||
for (uint i = 0; i < numKernels; i++) {
|
||||
HIP_CHECK(hipMemcpy(dPtr[i], hPtr[i], bufSize, hipMemcpyHostToDevice));
|
||||
@@ -593,6 +581,8 @@ bool hipPerfMandelBrot::run(unsigned int testCase,unsigned int deviceId) {
|
||||
HIP_CHECK(hipHostFree(hPtr[i]));
|
||||
HIP_CHECK(hipFree(dPtr[i]));
|
||||
}
|
||||
delete [] hPtr;
|
||||
delete [] dPtr;
|
||||
return true;
|
||||
}
|
||||
|
||||
@@ -632,7 +622,7 @@ TEST_CASE("Perf_hipPerfMandelbrot") {
|
||||
SECTION("warm-up kernel default stream executes serially") {
|
||||
mandelbrotCompute.setNumStreams(1);
|
||||
mandelbrotCompute.setNumKernels(1);
|
||||
REQUIRE(true == mandelbrotCompute.run(100/*Random number*/, deviceId));
|
||||
REQUIRE(true == mandelbrotCompute.run(100/*Random number*/));
|
||||
}
|
||||
#endif
|
||||
SECTION("run all - sync") {
|
||||
@@ -640,7 +630,7 @@ TEST_CASE("Perf_hipPerfMandelbrot") {
|
||||
do {
|
||||
mandelbrotCompute.setNumStreams(1);
|
||||
mandelbrotCompute.setNumKernels(1);
|
||||
REQUIRE(true == mandelbrotCompute.run(i, deviceId));
|
||||
REQUIRE(true == mandelbrotCompute.run(i));
|
||||
i++;
|
||||
}while(i < 12);
|
||||
mandelbrotCompute.printResults();
|
||||
@@ -651,7 +641,7 @@ TEST_CASE("Perf_hipPerfMandelbrot") {
|
||||
do {
|
||||
mandelbrotCompute.setNumStreams(2);
|
||||
mandelbrotCompute.setNumKernels(2);
|
||||
REQUIRE(true == mandelbrotCompute.run(i, deviceId));
|
||||
REQUIRE(true == mandelbrotCompute.run(i));
|
||||
i++;
|
||||
}while(i < 12);
|
||||
mandelbrotCompute.printResults();
|
||||
|
||||
@@ -89,7 +89,7 @@ TEST_CASE("Perf_hipPerfDispatchSpeed") {
|
||||
int p_gpuDevice = 0;
|
||||
int p_tests = -1;
|
||||
hipError_t err = hipSuccess;
|
||||
hipDeviceProp_t props = {0};
|
||||
hipDeviceProp_t props;
|
||||
HIP_CHECK(hipGetDeviceProperties(&props, p_gpuDevice));
|
||||
|
||||
unsigned int testListSize = sizeof(testList) / sizeof(testStruct);
|
||||
|
||||
@@ -185,8 +185,8 @@ void hipPerfSampleRate::run(unsigned int test) {
|
||||
void ** dPtr;
|
||||
void * hOutPtr;
|
||||
void * dOutPtr;
|
||||
void * hInPtr[numBufs_];
|
||||
void * dInPtr[numBufs_];
|
||||
void ** hInPtr = new void *[numBufs_];
|
||||
void ** dInPtr = new void *[numBufs_];
|
||||
|
||||
outBufSize_ =
|
||||
sizes[NUM_SIZES - 1] * sizes[NUM_SIZES - 1] * typeSizes[NUM_TYPES - 1];
|
||||
@@ -265,6 +265,8 @@ void hipPerfSampleRate::run(unsigned int test) {
|
||||
}
|
||||
HIP_CHECK(hipHostFree(hOutPtr));
|
||||
HIP_CHECK(hipFree(dPtr));
|
||||
delete [] hInPtr;
|
||||
delete [] dInPtr;
|
||||
}
|
||||
|
||||
|
||||
|
||||
@@ -105,12 +105,12 @@ void hipPerfDeviceConcurrency::close() {
|
||||
|
||||
bool hipPerfDeviceConcurrency::run(unsigned int testCase, int numGpus) {
|
||||
static int deviceId;
|
||||
uint * hPtr[numGpus];
|
||||
uint * dPtr[numGpus];
|
||||
hipStream_t streams[numGpus];
|
||||
int numCUs[numGpus];
|
||||
unsigned int maxIter[numGpus];
|
||||
unsigned long long expectedIters[numGpus];
|
||||
uint ** hPtr = new uint*[numGpus];
|
||||
uint ** dPtr = new uint*[numGpus];
|
||||
hipStream_t * streams = new hipStream_t[numGpus];
|
||||
int *numCUs = new int[numGpus];
|
||||
unsigned int *maxIter = new unsigned int[numGpus];
|
||||
unsigned long long *expectedIters = new unsigned long long[numGpus];
|
||||
|
||||
int threads, threads_per_block, blocks;
|
||||
float xStep, yStep, xPos, yPos;
|
||||
@@ -121,7 +121,7 @@ bool hipPerfDeviceConcurrency::run(unsigned int testCase, int numGpus) {
|
||||
}
|
||||
|
||||
HIP_CHECK(hipSetDevice(deviceId));
|
||||
hipDeviceProp_t props = {0};
|
||||
hipDeviceProp_t props;
|
||||
HIP_CHECK(hipGetDeviceProperties(&props, i));
|
||||
if (testCase != 0) {
|
||||
std::cout << "info: running on bus " << "0x" << props.pciBusID
|
||||
@@ -220,6 +220,12 @@ bool hipPerfDeviceConcurrency::run(unsigned int testCase, int numGpus) {
|
||||
if (testCase == 0) {
|
||||
deviceId++;
|
||||
}
|
||||
delete [] hPtr;
|
||||
delete [] dPtr;
|
||||
delete [] streams;
|
||||
delete [] numCUs;
|
||||
delete [] maxIter;
|
||||
delete [] expectedIters;
|
||||
return true;
|
||||
}
|
||||
|
||||
|
||||
@@ -225,7 +225,7 @@ bool hipPerfStreamConcurrency::open(int deviceId) {
|
||||
}
|
||||
|
||||
HIP_CHECK(hipSetDevice(deviceId));
|
||||
hipDeviceProp_t props = {0};
|
||||
hipDeviceProp_t props;
|
||||
HIP_CHECK(hipGetDeviceProperties(&props, deviceId));
|
||||
std::cout << "info: running on bus " << "0x" << props.pciBusID
|
||||
<< " " << props.name << " with " << props.multiProcessorCount << " CUs"
|
||||
@@ -250,12 +250,12 @@ bool hipPerfStreamConcurrency::run(unsigned int testCase,
|
||||
|
||||
// Maximum iteration count
|
||||
// maxIter = 8388608 * (engine_clock / 1000).serial execution
|
||||
maxIter = (unsigned int)(((8388608 * (static_cast<float>clkFrequency / 1000))
|
||||
maxIter = (unsigned int)(((8388608 * (static_cast<float>(clkFrequency) / 1000))
|
||||
* numCUs) / 128);
|
||||
maxIter = (maxIter + 15) & ~15;
|
||||
hipStream_t streams[numStreams];
|
||||
uint * hPtr[numKernels];
|
||||
uint * dPtr[numKernels];
|
||||
hipStream_t *streams = new hipStream_t[numStreams];
|
||||
uint ** hPtr = new uint*[numKernels];
|
||||
uint ** dPtr = new uint*[numKernels];
|
||||
|
||||
// Width is divisible by 4 because the mandelbrot kernel
|
||||
// processes 4 pixels at once.
|
||||
@@ -327,9 +327,6 @@ bool hipPerfStreamConcurrency::run(unsigned int testCase,
|
||||
<< numStreams <<" stream (s): " << all_kernel_time.count() << std::endl;
|
||||
}
|
||||
|
||||
unsigned long long expected =
|
||||
(unsigned long long)width_ * (unsigned long long)maxIter;
|
||||
|
||||
for (uint i = 0 ; i < numStreams; i++) {
|
||||
HIP_CHECK(hipStreamDestroy(streams[i]));
|
||||
}
|
||||
@@ -339,6 +336,10 @@ bool hipPerfStreamConcurrency::run(unsigned int testCase,
|
||||
HIP_CHECK(hipHostFree(hPtr[i]));
|
||||
HIP_CHECK(hipFree(dPtr[i]));
|
||||
}
|
||||
|
||||
delete [] streams;
|
||||
delete [] hPtr;
|
||||
delete [] dPtr;
|
||||
return true;
|
||||
}
|
||||
|
||||
|
||||
@@ -59,7 +59,7 @@ bool hipPerfStreamCreateCopyDestroy::open(int deviceId) {
|
||||
HipTest::HIP_SKIP_TEST("Skipping because devices < 1");
|
||||
}
|
||||
HIP_CHECK(hipSetDevice(deviceId));
|
||||
hipDeviceProp_t props = {0};
|
||||
hipDeviceProp_t props;
|
||||
HIP_CHECK(hipGetDeviceProperties(&props, deviceId));
|
||||
std::cout << "info: running on bus " << "0x" << props.pciBusID
|
||||
<< " " << props.name << " with " << props.multiProcessorCount << " CUs"
|
||||
@@ -71,10 +71,10 @@ bool hipPerfStreamCreateCopyDestroy::run(unsigned int testNumber) {
|
||||
numStreams_ = totalStreams_[testNumber % TotalStreams];
|
||||
size_t iter = Iterations / (numStreams_ * (static_cast<size_t>(1)
|
||||
<< (testNumber / TotalBufs + 1)));
|
||||
hipStream_t streams[numStreams_];
|
||||
hipStream_t *streams = new hipStream_t[numStreams_];
|
||||
|
||||
numBuffers_ = totalBuffers_[testNumber / TotalBufs];
|
||||
float* dSrc[numBuffers_];
|
||||
float ** dSrc = new float*[numBuffers_];
|
||||
size_t nBytes = BufSize * sizeof(float);
|
||||
|
||||
for (size_t b = 0; b < numBuffers_; ++b) {
|
||||
@@ -120,6 +120,9 @@ bool hipPerfStreamCreateCopyDestroy::run(unsigned int testNumber) {
|
||||
for (size_t b = 0; b < numBuffers_; ++b) {
|
||||
HIP_CHECK(hipFree(dSrc[b]));
|
||||
}
|
||||
|
||||
delete [] streams;
|
||||
delete [] dSrc;
|
||||
return true;
|
||||
}
|
||||
|
||||
|
||||
Yeni konuda referans
Bir kullanıcı engelle