diff --git a/projects/hip-tests/perftests/compute/hipPerfMandelbrot.cpp b/projects/hip-tests/perftests/compute/hipPerfMandelbrot.cpp deleted file mode 100644 index c4234d8c37..0000000000 --- a/projects/hip-tests/perftests/compute/hipPerfMandelbrot.cpp +++ /dev/null @@ -1,747 +0,0 @@ -/* - Copyright (c) 2015-2020 Advanced Micro Devices, Inc. All rights reserved. - Permission is hereby granted, free of charge, to any person obtaining a copy - of this software and associated documentation files (the "Software"), to deal - in the Software without restriction, including without limitation the rights - to use, copy, modify, merge, publish, distribute, sublicense, and/or sell - copies of the Software, and to permit persons to whom the Software is - furnished to do so, subject to the following conditions: - The above copyright notice and this permission notice shall be included in - all copies or substantial portions of the Software. - THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN - THE SOFTWARE. - */ - -/* HIT_START - * BUILD: %t %s ../../src/test_common.cpp EXCLUDE_HIP_PLATFORM nvcc - * TEST: %t - * HIT_END - */ - -#include -#include -#include "test_common.h" -#include -#include -#include -#include -#include - -typedef struct { - double x; - double y; - double width; -} coordRec; - -coordRec coords[] = { - {0.0, 0.0, 4.0}, // Whole set - {0.0, 0.0, 0.00001}, // All black - {-0.0180789661868, 0.6424294066162, 0.00003824140}, // Hit detail -}; - -static unsigned int numCoords = sizeof(coords) / sizeof(coordRec); - -template -__global__ void float_mad_kernel(uint *out, uint width, T xPos, T yPos, T xStep, T yStep, - uint maxIter) { - -#pragma FP_CONTRACT ON - int tid = (blockIdx.x * blockDim.x + threadIdx.x); - int i = tid % width; - int j = tid / width; - float x0 = (float)(xPos + xStep*i); - float y0 = (float)(yPos + yStep*j); - - float x = x0; - float y = y0; - - uint iter = 0; - float tmp; - for (iter = 0; (x*x + y*y <= 4.0f) && (iter < maxIter); iter++) { - tmp = x; - x = fma(-y,y,fma(x,x,x0)); - y = fma(2.0f*tmp,y,y0); - } - - out[tid] = iter; -}; - -template -__global__ void float_mandel_unroll_kernel(uint *out, uint width, T xPos, - T yPos, T xStep, T yStep, uint maxIter) { - -#pragma FP_CONTRACT ON - int tid = (blockIdx.x * blockDim.x + threadIdx.x); - int i = tid % width; - int j = tid / width; - float x0 = (float)(xPos + xStep*(float)i); - float y0 = (float)(yPos + yStep*(float)j); - - float x = x0; - float y = y0; - -#define FAST - uint iter = 0; - float tmp; - int stay; - int ccount = 0; - stay = (x*x+y*y) <= 4.0; - float savx = x; - float savy = y; -#ifdef FAST - for (iter = 0; (iter < maxIter); iter+=16) { -#else - for (iter = 0; stay && (iter < maxIter); iter+=16) { -#endif - x = savx; - y = savy; - - // Two iterations - tmp = fma(-y,y, fma(x,x,x0)); - y = fma(2.0f*x,y,y0); - x = fma(-y,y, fma(tmp,tmp,x0)); - y = fma(2.0f*tmp,y,y0); - - // Two iterations - tmp = fma(-y,y, fma(x,x,x0)); - y = fma(2.0f*x,y,y0); - x = fma(-y,y, fma(tmp,tmp,x0)); - y = fma(2.0f*tmp,y,y0); - - // Two iterations - tmp = fma(-y,y, fma(x,x,x0)); - y = fma(2.0f*x,y,y0); - x = fma(-y,y, fma(tmp,tmp,x0)); - y = fma(2.0f*tmp,y,y0); - - // Two iterations - tmp = fma(-y,y, fma(x,x,x0)); - y = fma(2.0f*x,y,y0); - x = fma(-y,y, fma(tmp,tmp,x0)); - y = fma(2.0f*tmp,y,y0); - - // Two iterations - tmp = fma(-y,y, fma(x,x,x0)); - y = fma(2.0f*x,y,y0); - x = fma(-y,y, fma(tmp,tmp,x0)); - y = fma(2.0f*tmp,y,y0); - - // Two iterations - tmp = fma(-y,y, fma(x,x,x0)); - y = fma(2.0f*x,y,y0); - x = fma(-y,y, fma(tmp,tmp,x0)); - y = fma(2.0f*tmp,y,y0); - - // Two iterations - tmp = fma(-y,y, fma(x,x,x0)); - y = fma(2.0f*x,y,y0); - x = fma(-y,y, fma(tmp,tmp,x0)); - y = fma(2.0f*tmp,y,y0); - - // Two iterations - tmp = fma(-y,y, fma(x,x,x0)); - y = fma(2.0f*x,y,y0); - x = fma(-y,y, fma(tmp,tmp,x0)); - y = fma(2.0f*tmp,y,y0); - - stay = (x*x+y*y) <= 4.0; - savx = (stay ? x : savx); - savy = (stay ? y : savy); - ccount += stay*16; -#ifdef FAST - if (!stay) - break; -#endif - } - // Handle remainder - if (!stay) { - iter = 16; - do { - x = savx; - y = savy; - stay = ((x*x+y*y) <= 4.0) && (ccount < maxIter); - tmp = x; - x = fma(-y,y, fma(x,x,x0)); - y = fma(2.0f*tmp,y,y0); - ccount += stay; - iter--; - savx = (stay ? x : savx); - savy = (stay ? y : savy); - } while (stay && iter); - } - - - out[tid] = (uint)ccount; - -}; - - -template -__global__ void double_mad_kernel(uint *out, uint width, T xPos, T yPos, T xStep, T yStep, - uint maxIter) { - -#pragma FP_CONTRACT ON - int tid = (blockIdx.x * blockDim.x + threadIdx.x); - int i = tid % width; - int j = tid / width; - double x0 = (double)(xPos + xStep*i); - double y0 = (double)(yPos + yStep*j); - - double x = x0; - double y = y0; - - uint iter = 0; - double tmp; - for (iter = 0; (x*x + y*y <= 4.0f) && (iter < maxIter); iter++) { - tmp = x; - x = fma(-y,y,fma(x,x,x0)); - y = fma(2.0f*tmp,y,y0); - } - out[tid] = iter; -}; - - -template -__global__ void double_mandel_unroll_kernel(uint *out, uint width, T xPos, - T yPos, T xStep, T yStep, uint maxIter) { - -#pragma FP_CONTRACT ON - int tid = (blockIdx.x * blockDim.x + threadIdx.x); - - int i = tid % width; - int j = tid / width; - double x0 = (double)(xPos + xStep*(double)i); - double y0 = (double)(yPos + yStep*(double)j); - - double x = x0; - double y = y0; - -#define FAST - uint iter = 0; - double tmp; - int stay; - int ccount = 0; - stay = (x*x+y*y) <= 4.0; - double savx = x; - double savy = y; -#ifdef FAST - for (iter = 0; (iter < maxIter); iter+=16) -#else - for (iter = 0; stay && (iter < maxIter); iter+=16) -#endif - { - x = savx; - y = savy; - - // Two iterations - tmp = fma(-y,y, fma(x,x,x0)); - y = fma(2.0f*x,y,y0); - x = fma(-y,y, fma(tmp,tmp,x0)); - y = fma(2.0f*tmp,y,y0); - - // Two iterations - tmp = fma(-y,y, fma(x,x,x0)); - y = fma(2.0f*x,y,y0); - x = fma(-y,y, fma(tmp,tmp,x0)); - y = fma(2.0f*tmp,y,y0); - - // Two iterations - tmp = fma(-y,y, fma(x,x,x0)); - y = fma(2.0f*x,y,y0); - x = fma(-y,y, fma(tmp,tmp,x0)); - y = fma(2.0f*tmp,y,y0); - - // Two iterations - tmp = fma(-y,y, fma(x,x,x0)); - y = fma(2.0f*x,y,y0); - x = fma(-y,y, fma(tmp,tmp,x0)); - y = fma(2.0f*tmp,y,y0); - - // Two iterations - tmp = fma(-y,y, fma(x,x,x0)); - y = fma(2.0f*x,y,y0); - x = fma(-y,y, fma(tmp,tmp,x0)); - y = fma(2.0f*tmp,y,y0); - - // Two iterations - tmp = fma(-y,y, fma(x,x,x0)); - y = fma(2.0f*x,y,y0); - x = fma(-y,y, fma(tmp,tmp,x0)); - y = fma(2.0f*tmp,y,y0); - - // Two iterations - tmp = fma(-y,y, fma(x,x,x0)); - y = fma(2.0f*x,y,y0); - x = fma(-y,y, fma(tmp,tmp,x0)); - y = fma(2.0f*tmp,y,y0); - - // Two iterations - tmp = fma(-y,y, fma(x,x,x0)); - y = fma(2.0f*x,y,y0); - x = fma(-y,y, fma(tmp,tmp,x0)); - y = fma(2.0f*tmp,y,y0); - - stay = (x*x+y*y) <= 4.0; - savx = (stay ? x : savx); - savy = (stay ? y : savy); - ccount += stay*16; -#ifdef FAST - if (!stay) - break; -#endif - } - // Handle remainder - if (!stay) { - iter = 16; - do { - x = savx; - y = savy; - stay = ((x*x+y*y) <= 4.0) && (ccount < maxIter); - tmp = x; - x = fma(-y,y, fma(x,x,x0)); - y = fma(2.0f*tmp,y,y0); - ccount += stay; - iter--; - savx = (stay ? x : savx); - savy = (stay ? y : savy); - } - while (stay && iter); - - } - out[tid] = (uint)ccount; -}; - -static const unsigned int FMA_EXPECTEDVALUES_INDEX = 15; - -// Expected results for each kernel run at each coord -unsigned long long expectedIters[] = { - 203277748ull, 2147483648ull, 120254651ull, 203277748ull, 2147483648ull, - 120254651ull, 203277748ull, 2147483648ull, 120254651ull, 203315114ull, - 2147483648ull, 120042599ull, 203315114ull, 2147483648ull, 120042599ull, - 203280620ull, 2147483648ull, 120485704ull, 203280620ull, 2147483648ull, - 120485704ull, 203280620ull, 2147483648ull, 120485704ull, 203315114ull, - 2147483648ull, 120042599ull, 203315114ull, 2147483648ull, 120042599ull}; - -class hipPerfMandelBrot { - public: - hipPerfMandelBrot(); - ~hipPerfMandelBrot(); - - void setNumKernels(unsigned int num) { - numKernels = num; - } - - unsigned int getNumKernels() { - return numKernels; - } - - void setNumStreams(unsigned int num) { - numStreams = num; - } - unsigned int getNumStreams() { - return numStreams; - } - - void open(int deviceID); - void run(unsigned int testCase, unsigned int deviceId); - void printResults(void); - - // array of funtion pointers - typedef void (hipPerfMandelBrot::*funPtr)(uint *out, uint width, float xPos, float yPos, - float xStep, float yStep, uint maxIter, hipStream_t* streams, int blocks, - int threads_per_block, int kernelCnt); - - // Wrappers - void float_mad(uint *out, uint width, float xPos, float yPos, - float xStep, float yStep, uint maxIter, hipStream_t* streams, - int blocks, int threads_per_block, int kernelCnt); - - void float_mandel_unroll(uint *out, uint width, float xPos, float yPos, - float xStep, float yStep, uint maxIter, hipStream_t* streams, - int blocks, int threads_per_block, int kernelCnt); - - void double_mad(uint *out, uint width, float xPos, float yPos, float xStep, - float yStep, uint maxIter, hipStream_t* streams, int blocks, - int threads_per_block, int kernelCnt); - - void double_mandel_unroll(uint *out, uint width, float xPos, float yPos, float xStep, - float yStep, uint maxIter, hipStream_t* streams, int blocks, - int threads_per_block, int kernelCnt); - - hipStream_t streams[2]; - - private: - void setData(void *ptr, unsigned int value); - void checkData(uint *ptr); - - unsigned int numKernels; - unsigned int numStreams; - - std::map> results; - unsigned int width_; - unsigned int bufSize; - unsigned int maxIter; - unsigned int coordIdx; - volatile unsigned long long totalIters = 0; - int numCUs; - static const unsigned int numLoops = 10; -}; - - -hipPerfMandelBrot::hipPerfMandelBrot() {} - -hipPerfMandelBrot::~hipPerfMandelBrot() {} - -void hipPerfMandelBrot::open(int deviceId) { - - - int nGpu = 0; - HIPCHECK(hipGetDeviceCount(&nGpu)); - if (nGpu < 1) { - std::cout << "info: didn't find any GPU! skipping the test!\n"; - passed(); - return; - } - - - HIPCHECK(hipSetDevice(deviceId)); - hipDeviceProp_t props = {0}; - HIPCHECK(hipGetDeviceProperties(&props, deviceId)); - std::cout << "info: running on bus " << "0x" << props.pciBusID << " " << props.name - << " with " << props.multiProcessorCount << " CUs" << " and device id: " << deviceId - << std::endl; - - numCUs = props.multiProcessorCount; -} - - -void hipPerfMandelBrot::printResults() { - - int numkernels = getNumKernels(); - int numStreams = getNumStreams(); - - std::cout << "\n" <<"Measured perf for kernels in GFLOPS on " - << numStreams << " streams (s)" << std::endl; - - std::map>:: iterator itr; - for (itr = results.begin(); itr != results.end(); itr++) { - std::cout << "\n" << std::setw(20) << itr->first << " "; - for(auto i : results[itr->first]) { - std::cout << std::setw(10) << i << " "; - } - } - results.clear(); - - std::cout << std::endl; -} - - -// Wrappers for the kernel launches -void hipPerfMandelBrot::float_mad(uint *out, uint width, float xPos, float yPos, float xStep, - float yStep, uint maxIter, hipStream_t* streams, - int blocks, int threads_per_block, int kernelCnt) { - - int streamCnt = getNumStreams(); - hipLaunchKernelGGL(float_mad_kernel, dim3(blocks), dim3(threads_per_block), 0, - streams[kernelCnt % streamCnt], out, width_, xPos, yPos, xStep, yStep, - maxIter); - - -} - - -void hipPerfMandelBrot::float_mandel_unroll(uint *out, uint width, float xPos, float yPos, - float xStep, float yStep, uint maxIter, hipStream_t * streams, - int blocks, int threads_per_block, int kernelCnt) { - - int streamCnt = getNumStreams(); - hipLaunchKernelGGL(float_mandel_unroll_kernel, dim3(blocks), dim3(threads_per_block), 0, - streams[kernelCnt % streamCnt], out, width_, xPos, yPos, xStep, yStep, maxIter); - -} - - -void hipPerfMandelBrot::double_mad(uint *out, uint width, float xPos, float yPos, - float xStep, float yStep, uint maxIter, hipStream_t * streams, - int blocks, int threads_per_block, int kernelCnt) { - - int streamCnt = getNumStreams(); - hipLaunchKernelGGL(double_mad_kernel, dim3(blocks), dim3(threads_per_block), 0, - streams[kernelCnt % streamCnt], out, width_, xPos, yPos, xStep, yStep, maxIter); - -} - - -void hipPerfMandelBrot::double_mandel_unroll(uint *out, uint width, float xPos, float yPos, - float xStep, float yStep, uint maxIter, hipStream_t * streams, - int blocks, int threads_per_block, int kernelCnt) { - - int streamCnt = getNumStreams(); - hipLaunchKernelGGL(float_mandel_unroll_kernel, dim3(blocks), dim3(threads_per_block), 0, - streams[kernelCnt % streamCnt], out, width_, xPos, yPos, xStep, yStep, maxIter); - -} - - -void hipPerfMandelBrot::run(unsigned int testCase,unsigned int deviceId) { - - unsigned int numStreams = getNumStreams(); - - funPtr p[] = {&hipPerfMandelBrot::float_mad, &hipPerfMandelBrot::float_mandel_unroll, - &hipPerfMandelBrot::double_mad, &hipPerfMandelBrot::double_mandel_unroll}; - - // Maximum iteration count - maxIter = 32768; - - uint * hPtr[numKernels]; - uint * dPtr[numKernels]; - - // Width is divisible by 4 because the mandelbrot kernel processes 4 pixels at once. - width_ = 256; - - bufSize = width_ * width_ * sizeof(uint); - - // Create streams for concurrency - for (uint i = 0; i < numStreams; i++) { - HIPCHECK(hipStreamCreate(&streams[i])); - } - - - // Allocate memory on the host and device - for (uint i = 0; i < numKernels; i++) { - HIPCHECK(hipHostMalloc((void **)&hPtr[i], bufSize, hipHostMallocDefault)); - setData(hPtr[i], 0xdeadbeef); - HIPCHECK(hipMalloc((uint **)&dPtr[i], bufSize)) - } - - - // Prepare kernel launch parameters - int threads = (bufSize/sizeof(uint)); - int threads_per_block = 64; - int blocks = (threads/threads_per_block) + (threads % threads_per_block); - - float xStep = (float)(coords[coordIdx].width / (double)width_); - float yStep = (float)(-coords[coordIdx].width / (double)width_); - float xPos = (float)(coords[coordIdx].x - 0.5 * coords[coordIdx].width); - float yPos = (float)(coords[coordIdx].y + 0.5 * coords[coordIdx].width); - - // Copy memory asynchronously and concurrently from host to device - for (uint i = 0; i < numKernels; i++) { - HIPCHECK(hipMemcpy(dPtr[i], hPtr[i], bufSize, hipMemcpyHostToDevice)); - } - - // Synchronize to make sure all the copies are completed - HIPCHECK(hipStreamSynchronize(0)); - - int kernelIdx; - if(testCase == 0 || testCase == 5 || testCase == 10) { - kernelIdx = 0; - } - - else if(testCase == 1 || testCase == 6 || testCase == 11) { - kernelIdx = 1; - } - else if(testCase == 2 || testCase == 7 || testCase == 12) { - kernelIdx = 2; - } - else if(testCase == 3 || testCase == 8 || testCase == 13){ - kernelIdx = 3; - } - - - double totalTime = 0.0; - - for (unsigned int k = 0; k < numLoops; k++) { - - coordIdx = testCase % numCoords; - - if ((testCase == 0 || testCase == 1 || testCase == 2 || - testCase == 5 || testCase == 6 || testCase == 7 || - testCase == 10 || testCase == 11 || testCase == 12)) { - float xStep = (float)(coords[coordIdx].width / (double)width_); - float yStep = (float)(-coords[coordIdx].width / (double)width_); - float xPos = (float)(coords[coordIdx].x - 0.5 * coords[coordIdx].width); - float yPos = (float)(coords[coordIdx].y + 0.5 * coords[coordIdx].width); - - // Time the kernel execution - auto all_start = std::chrono::steady_clock::now(); - - for (uint i = 0; i < numKernels; i++) { - (this->*p[kernelIdx])(dPtr[i], width_, xPos, yPos, xStep, yStep, maxIter, streams, blocks, - threads_per_block, i); - } - - - // Synchronize all the concurrent streams to have completed execution - HIPCHECK(hipStreamSynchronize(0)); - - auto all_end = std::chrono::steady_clock::now(); - std::chrono::duration all_kernel_time = all_end - all_start; - totalTime += all_kernel_time.count(); - - } - - - else { - double xStep = coords[coordIdx].width / (double)width_; - double yStep = -coords[coordIdx].width / (double)width_; - double xPos = coords[coordIdx].x - 0.5 * coords[coordIdx].width; - double yPos = coords[coordIdx].y + 0.5 * coords[coordIdx].width; - - // Time the kernel execution - auto all_start = std::chrono::steady_clock::now(); - - for (uint i = 0; i < numKernels; i++) { - (this->*p[kernelIdx])(dPtr[i], width_, xPos, yPos, xStep, yStep, maxIter, streams, blocks, - threads_per_block, i); - } - - - // Synchronize all the concurrent streams to have completed execution - HIPCHECK(hipStreamSynchronize(0)); - - auto all_end = std::chrono::steady_clock::now(); - std::chrono::duration all_kernel_time = all_end - all_start; - totalTime += all_kernel_time.count(); - } - - - } - - // Copy data back from device to the host - for(uint i = 0; i < numKernels; i++) { - HIPCHECK(hipMemcpy(hPtr[i] ,dPtr[i], bufSize, hipMemcpyDeviceToHost)); - } - - - for(uint i = 0; i < numKernels; i++) { - checkData(hPtr[i]); - - int j =0; - while((totalIters != expectedIters[j] && totalIters > expectedIters[j]) && j < 30) { - j++; - } - - if(j==30) { - std::cout << "Incorrect iteration count detected. "; - } - - } - - - // Compute GFLOPS. There are 7 FLOPs per iteration - double perf = ((double)(totalIters*numKernels) * 7 * (double)(1e-09)) / - (totalTime / (double)numLoops); - - - std::vector kernelName = {"float", "float_unroll", - "double", "double_unroll"}; - - // Print results except for Warm-up kernel - if(testCase!=100) { - results[kernelName[testCase % 4]].push_back(perf); - } - - - for(uint i = 0 ; i < numStreams; i++) { - HIPCHECK(hipStreamDestroy(streams[i])); - } - - - // Free host and device memory - for (uint i = 0; i < numKernels; i++) { - HIPCHECK(hipFree(hPtr[i])); - HIPCHECK(hipFree(dPtr[i])); - } - - -} - - -void hipPerfMandelBrot::setData(void *ptr, unsigned int value) { - unsigned int *ptr2 = (unsigned int *)ptr; - for (unsigned int i = 0; i < width_ * width_; i++) { - ptr2[i] = value; - } -} - - -void hipPerfMandelBrot::checkData(uint *ptr) { - totalIters = 0; - for (unsigned int i = 0; i < width_ * width_; i++) { - totalIters += ptr[i]; - } -} - - -int main(int argc, char* argv[]) { - hipPerfMandelBrot mandelbrotCompute; - int deviceId = 0; - - mandelbrotCompute.open(deviceId); - - for (unsigned int testCase = 0; testCase < 3; testCase++) { - - - switch (testCase) { - - - case 0: { - // Warmup-kernel - default stream executes serially - mandelbrotCompute.setNumStreams(1); - mandelbrotCompute.setNumKernels(1); - mandelbrotCompute.run(100/*Random number*/, deviceId); - break; - } - - - case 1: { - // run all - sync - int i = 0; - do { - mandelbrotCompute.setNumStreams(1); - mandelbrotCompute.setNumKernels(1); - mandelbrotCompute.run(i, deviceId); - i++; - }while(i < 12); - mandelbrotCompute.printResults(); - - break; - } - - - case 2: { - // run all - async - int i = 0; - do { - mandelbrotCompute.setNumStreams(2); - mandelbrotCompute.setNumKernels(2); - mandelbrotCompute.run(i, deviceId); - i++; - }while(i < 12); - mandelbrotCompute.printResults(); - - break; - - } - - - default: { - break; - } - - - } - - - - } - - - passed(); -} diff --git a/projects/hip-tests/perftests/stream/hipPerfDeviceConcurrency.cpp b/projects/hip-tests/perftests/stream/hipPerfDeviceConcurrency.cpp deleted file mode 100644 index 7d6699a9a2..0000000000 --- a/projects/hip-tests/perftests/stream/hipPerfDeviceConcurrency.cpp +++ /dev/null @@ -1,289 +0,0 @@ -/* - Copyright (c) 2015-2020 Advanced Micro Devices, Inc. All rights reserved. - Permission is hereby granted, free of charge, to any person obtaining a copy - of this software and associated documentation files (the "Software"), to deal - in the Software without restriction, including without limitation the rights - to use, copy, modify, merge, publish, distribute, sublicense, and/or sell - copies of the Software, and to permit persons to whom the Software is - furnished to do so, subject to the following conditions: - The above copyright notice and this permission notice shall be included in - all copies or substantial portions of the Software. - THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN - THE SOFTWARE. - */ - -/* HIT_START - * BUILD: %t %s ../../src/test_common.cpp EXCLUDE_HIP_PLATFORM nvcc - * TEST: %t - * HIT_END - */ - -#include -#include -#include "test_common.h" - -typedef struct { - double x; - double y; - double width; -} coordRec; - -static coordRec coords[] = { - {0.0, 0.0, 0.00001}, // All black -}; - -static unsigned int numCoords = sizeof(coords) / sizeof(coordRec); - -__global__ void mandelbrot(uint *out, uint width, float xPos, float yPos, float xStep, - float yStep, uint maxIter) { - - int tid = (blockIdx.x * blockDim.x + threadIdx.x); - int i = tid % width; - int j = tid / width; - float x0 = (float)(xPos + xStep*i); - float y0 = (float)(yPos + yStep*j); - - float x = x0; - float y = y0; - - uint iter = 0; - float tmp; - for (iter = 0; (x*x + y*y <= 4.0f) && (iter < maxIter); iter++) { - tmp = x; - x = fma(-y,y,fma(x,x,x0)); - y = fma(2.0f*tmp,y,y0); - } - - out[tid] = iter; -}; - -class hipPerfDeviceConcurrency { - public: - hipPerfDeviceConcurrency(); - ~hipPerfDeviceConcurrency(); - - void setNumGpus(unsigned int num) { - numDevices = num; - } - unsigned int getNumGpus() { - return numDevices; - } - - void open(void); - void close(void); - void run(unsigned int testCase, int numGpus); - - private: - void setData(void *ptr, unsigned int value); - void checkData(uint *ptr); - - unsigned int numDevices; - unsigned int width_; - unsigned int bufSize; - unsigned int coordIdx; - unsigned long long totalIters = 0; -}; - - -hipPerfDeviceConcurrency::hipPerfDeviceConcurrency() {} - -hipPerfDeviceConcurrency::~hipPerfDeviceConcurrency() {} - -void hipPerfDeviceConcurrency::open(void) { - - - int nGpu = 0; - HIPCHECK(hipGetDeviceCount(&nGpu)); - setNumGpus(nGpu); - if (nGpu < 1) { - std::cout << "info: didn't find any GPU! skipping the test!\n"; - passed(); - } - - -} - - -void hipPerfDeviceConcurrency::close() { -} - -void hipPerfDeviceConcurrency::run(unsigned int testCase, int numGpus) { - - - static int deviceId; - uint * hPtr[numGpus]; - uint * dPtr[numGpus]; - hipStream_t streams[numGpus]; - int numCUs[numGpus]; - unsigned int maxIter[numGpus]; - unsigned long long expectedIters[numGpus]; - - int threads, threads_per_block, blocks; - float xStep, yStep, xPos, yPos; - - for(int i = 0; i < numGpus; i++) { - - if(testCase != 0) { - deviceId = i; - } - - HIPCHECK(hipSetDevice(deviceId)); - - hipDeviceProp_t props = {0}; - HIPCHECK(hipGetDeviceProperties(&props, i)); - - if (testCase != 0) { - std::cout << "info: running on bus " << "0x" << props.pciBusID << " " << props.name - << " with " << props.multiProcessorCount << " CUs" << " and device ID: " - << i << std::endl; - } - - numCUs[i] = props.multiProcessorCount; - int clkFrequency = 0; - HIPCHECK(hipDeviceGetAttribute(&clkFrequency, hipDeviceAttributeClockRate, i)); - - clkFrequency =(unsigned int)clkFrequency/1000; - - // Maximum iteration count - // maxIter = 8388608 * (engine_clock / 1000).serial execution - maxIter[i] = (unsigned int)(((8388608 * ((float)clkFrequency / 1000)) * numCUs[i]) / 128); - maxIter[i] = (maxIter[i] + 15) & ~15; - - // Width is divisible by 4 because the mandelbrot kernel processes 4 pixels at once. - width_ = 256; - - bufSize = width_ * width_ * sizeof(uint); - - // Create streams for concurrency - HIPCHECK(hipStreamCreate(&streams[i])); - - // Allocate memory on the host and device - HIPCHECK(hipHostMalloc((void **)&hPtr[i], bufSize, hipHostMallocDefault)); - setData(hPtr[i], 0xdeadbeef); - HIPCHECK(hipMalloc((uint **)&dPtr[i], bufSize)) - - // Prepare kernel launch parameters - threads = (bufSize/sizeof(uint)); - threads_per_block = 64; - blocks = (threads/threads_per_block) + (threads % threads_per_block); - - coordIdx = testCase % numCoords; - xStep = (float)(coords[coordIdx].width / (double)width_); - yStep = (float)(-coords[coordIdx].width / (double)width_); - xPos = (float)(coords[coordIdx].x - 0.5 * coords[coordIdx].width); - yPos = (float)(coords[coordIdx].y + 0.5 * coords[coordIdx].width); - - // Copy memory from host to device - HIPCHECK(hipMemcpy(dPtr[i], hPtr[i], bufSize, hipMemcpyHostToDevice)); - - } - - // Time the kernel execution - auto all_start = std::chrono::steady_clock::now(); - - for(int i = 0; i < numGpus; i++) { - - if(testCase != 0) { - deviceId = i; - } - - HIPCHECK(hipSetDevice(deviceId)); - - hipLaunchKernelGGL(mandelbrot, dim3(blocks), dim3(threads_per_block), 0, streams[i], - dPtr[i], width_, xPos, yPos, xStep, yStep, maxIter[i]); - - } - - for(int i = 0; i < numGpus; i++) { - HIPCHECK(hipStreamSynchronize(0)); - } - - - auto all_end = std::chrono::steady_clock::now(); - std::chrono::duration all_kernel_time = all_end - all_start; - - for(int i = 0; i < numGpus; i++) { - - if(testCase != 0) { - deviceId = i; - } - HIPCHECK(hipSetDevice(deviceId)); - - // Copy data back from device to the host - HIPCHECK(hipMemcpy(hPtr[i], dPtr[i], bufSize, hipMemcpyDeviceToHost)); - - checkData(hPtr[i]); - expectedIters[i] = width_ * width_ * (unsigned long long) maxIter[i]; - - if (testCase != 0) { - checkData(hPtr[i]); - if(totalIters != expectedIters[i]) { - std::cout << "Incorrect iteration count detected" << std::endl; - } - } - - - HIPCHECK(hipStreamDestroy(streams[i])); - - // Free host and device memory - HIPCHECK(hipFree(hPtr[i])); - HIPCHECK(hipFree(dPtr[i])); - } - - if (testCase != 0) { - std::cout << '\n' << "Measured time for kernel computation on " << numGpus << " device (s): " - << all_kernel_time.count() << " (s) " << '\n' << std::endl; - } - - if(testCase == 0) { - deviceId++; - } - - -} - - -void hipPerfDeviceConcurrency::setData(void *ptr, unsigned int value) { - unsigned int *ptr2 = (unsigned int *)ptr; - for (unsigned int i = 0; i < width_ * width_ ; i++) { - ptr2[i] = value; - } -} - - -void hipPerfDeviceConcurrency::checkData(uint *ptr) { - totalIters = 0; - for (unsigned int i = 0; i < width_ * width_; i++) { - totalIters += ptr[i]; - } -} - - -int main(int argc, char* argv[]) { - hipPerfDeviceConcurrency deviceConcurrency; - - deviceConcurrency.open(); - - int nGpu = deviceConcurrency.getNumGpus(); - - // testCase = 0 refers to warmup kernel run - int testCase = 0; - - for (int i = 0; i < nGpu; i++) { - // Warm-up kernel on all devices - deviceConcurrency.run(testCase, 1); - } - - // Time for kernel on 1 device - deviceConcurrency.run(++testCase, 1); - - // Time for kernel on all available devices - deviceConcurrency.run(++testCase, nGpu); - - passed(); -} diff --git a/projects/hip-tests/samples/0_Intro/bit_extract/CMakeLists.txt b/projects/hip-tests/samples/0_Intro/bit_extract/CMakeLists.txt deleted file mode 100644 index c9b13be812..0000000000 --- a/projects/hip-tests/samples/0_Intro/bit_extract/CMakeLists.txt +++ /dev/null @@ -1,20 +0,0 @@ -project(bit_extract) - -cmake_minimum_required(VERSION 3.10) - -# Search for rocm in common locations -list(APPEND CMAKE_PREFIX_PATH /opt/rocm/hip /opt/rocm) - -# Find hip -find_package(hip) - -# Set compiler and linker -set(CMAKE_CXX_COMPILER ${HIP_HIPCC_EXECUTABLE}) - -set(CMAKE_CXX_LINKER ${HIP_HIPCC_EXECUTABLE}) - -# Create the excutable -add_executable(bit_extract bit_extract.cpp) - -# Link with HIP -target_link_libraries(bit_extract hip::host) \ No newline at end of file diff --git a/projects/hip-tests/samples/0_Intro/bit_extract/Makefile b/projects/hip-tests/samples/0_Intro/bit_extract/Makefile index 3427815ffc..4a3a0bb4fe 100644 --- a/projects/hip-tests/samples/0_Intro/bit_extract/Makefile +++ b/projects/hip-tests/samples/0_Intro/bit_extract/Makefile @@ -9,15 +9,19 @@ HIPCC=$(HIP_PATH)/bin/hipcc # Show how to use PLATFORM to specify different options for each compiler: ifeq (${HIP_PLATFORM}, nvcc) - HIPCC_FLAGS = -gencode=arch=compute_20,code=sm_20 + HIPCC_FLAGS = -gencode=arch=compute_20,code=sm_20 endif EXE=bit_extract +EXE_STATIC=bit_extract_static $(EXE): bit_extract.cpp $(HIPCC) $(HIPCC_FLAGS) $< -o $@ -all: $(EXE) +$(EXE_STATIC): bit_extract.cpp + $(HIPCC) -use-staticlib $(HIPCC_FLAGS) $< -o $@ + +all: $(EXE) $(EXE_STATIC) clean: - rm -f *.o $(EXE) + rm -f *.o $(EXE) $(EXE_STATIC) diff --git a/projects/hip-tests/samples/0_Intro/module_api/CMakeLists.txt b/projects/hip-tests/samples/0_Intro/module_api/CMakeLists.txt deleted file mode 100644 index 0f5cc32f91..0000000000 --- a/projects/hip-tests/samples/0_Intro/module_api/CMakeLists.txt +++ /dev/null @@ -1,36 +0,0 @@ -project(module_api) - -cmake_minimum_required(VERSION 3.10) - -# Search for rocm in common locations -list(APPEND CMAKE_PREFIX_PATH /opt/rocm/hip /opt/rocm) - -# Find hip -find_package(hip) - -# Set compiler and linker -set(CMAKE_CXX_COMPILER ${HIP_HIPCC_EXECUTABLE}) - -set(CMAKE_CXX_LINKER ${HIP_HIPCC_EXECUTABLE}) - -# Create the excutable -add_executable(runKernel.hip.out runKernel.cpp) -add_executable(launchKernelHcc.hip.out launchKernelHcc.cpp) -add_executable(defaultDriver.hip.out defaultDriver.cpp) - -# Generate code object -add_custom_target( - codeobj - ALL - COMMAND ${HIP_HIPCC_EXECUTABLE} --genco ../vcpy_kernel.cpp -o vcpy_kernel.code - COMMENT "codeobj generated" -) - -add_dependencies(runKernel.hip.out codeobj) -add_dependencies(launchKernelHcc.hip.out codeobj) -add_dependencies(defaultDriver.hip.out codeobj) - -# Link with HIP -target_link_libraries(runKernel.hip.out hip::host) -target_link_libraries(launchKernelHcc.hip.out hip::host) -target_link_libraries(defaultDriver.hip.out hip::host) diff --git a/projects/hip-tests/samples/0_Intro/module_api_global/CMakeLists.txt b/projects/hip-tests/samples/0_Intro/module_api_global/CMakeLists.txt deleted file mode 100644 index 00caa79cfa..0000000000 --- a/projects/hip-tests/samples/0_Intro/module_api_global/CMakeLists.txt +++ /dev/null @@ -1,30 +0,0 @@ -project(modile_api_global) - -cmake_minimum_required(VERSION 3.10) - -# Search for rocm in common locations -list(APPEND CMAKE_PREFIX_PATH /opt/rocm/hip /opt/rocm) - -# Find hip -find_package(hip) - -# Set compiler and linker -set(CMAKE_CXX_COMPILER ${HIP_HIPCC_EXECUTABLE}) - -set(CMAKE_CXX_LINKER ${HIP_HIPCC_EXECUTABLE}) - -# Create the excutable -add_executable(runKernel.hip.out runKernel.cpp) - -# Generate code object -add_custom_target( - codeobj - ALL - COMMAND ${HIP_HIPCC_EXECUTABLE} --genco ../vcpy_kernel.cpp -o vcpy_kernel.code - COMMENT "codeobj generated" -) - -add_dependencies(runKernel.hip.out codeobj) - -# Link with HIP -target_link_libraries(runKernel.hip.out hip::host) \ No newline at end of file diff --git a/projects/hip-tests/samples/0_Intro/square/CMakeLists.txt b/projects/hip-tests/samples/0_Intro/square/CMakeLists.txt deleted file mode 100644 index 845c43fd1f..0000000000 --- a/projects/hip-tests/samples/0_Intro/square/CMakeLists.txt +++ /dev/null @@ -1,21 +0,0 @@ -#Follow "README.md" to generate square.cpp if it's missing - -project(square) - -cmake_minimum_required(VERSION 3.10) - -# Search for rocm in common locations -list(APPEND CMAKE_PREFIX_PATH /opt/rocm/hip /opt/rocm) - -# Find hip -find_package(hip) - -# Set compiler and linker -set(CMAKE_CXX_COMPILER ${HIP_HIPCC_EXECUTABLE}) -set(CMAKE_CXX_LINKER ${HIP_HIPCC_EXECUTABLE}) - -# Create the excutable -add_executable(square square.cpp) - -# Link with HIP -target_link_libraries(square hip::host) \ No newline at end of file diff --git a/projects/hip-tests/samples/0_Intro/square/Makefile b/projects/hip-tests/samples/0_Intro/square/Makefile index 9bb0dd8205..aa046eeaaa 100644 --- a/projects/hip-tests/samples/0_Intro/square/Makefile +++ b/projects/hip-tests/samples/0_Intro/square/Makefile @@ -11,7 +11,7 @@ else SOURCES=square.cpp endif -all: square.out +all: square.out square.out.static # Step square.cpp: square.cu @@ -20,5 +20,8 @@ square.cpp: square.cu square.out: $(SOURCES) $(HIPCC) $(CXXFLAGS) $(SOURCES) -o $@ +square.out.static: $(SOURCES) + $(HIPCC) -use-staticlib $(CXXFLAGS) $(SOURCES) -o $@ + clean: - rm -f *.o *.out square.cpp + rm -f *.o *.out *.out.static square.cpp diff --git a/projects/hip-tests/samples/0_Intro/square/README.md b/projects/hip-tests/samples/0_Intro/square/README.md index 0bbb2f7e39..c185903993 100644 --- a/projects/hip-tests/samples/0_Intro/square/README.md +++ b/projects/hip-tests/samples/0_Intro/square/README.md @@ -1,39 +1,13 @@ # Square.md -Simple test which shows how to use hipify-perl to port CUDA code to HIP. -See related [blog](http://gpuopen.com/hip-to-be-squared-an-introductory-hip-tutorial) that explains the example. +Simple test which shows how to use hipify-perl to port CUDA code to HIP. +See related [blog](http://gpuopen.com/hip-to-be-squared-an-introductory-hip-tutorial) that explains the example. Now it is even simpler and requires no manual modification to the hipified source code - just hipify and compile: -- Add hip/bin path to the PATH +1. Add hip/bin path to the PATH : + export PATH=$PATH:[MYHIP]/bin -``` -$ export PATH=$PATH:[MYHIP]/bin -``` - -- Define environment variable - -``` -$ export HIP_PATH=[MYHIP] -``` - -- Build executible file - -``` -$ cd ~/hip/samples/0_Intro/square -$ make -/home/user/hip/bin/hipify-perl square.cu > square.cpp -/home/user/hip/bin/hipcc square.cpp -o square.out -/home/user/hip/bin/hipcc -use-staticlib square.cpp -o square.out.static -``` -- Execute file -``` -$ ./square.out -info: running on device Navi 14 [Radeon Pro W5500] -info: allocate host mem ( 7.63 MB) -info: allocate device mem ( 7.63 MB) -info: copy Host2Device -info: launch 'vector_square' kernel -info: copy Device2Host -info: check result -PASSED! -``` +2. $ make + Make runs these steps. This can be performed on either CUDA or AMD platform: + hipify-perl square.cu > square.cpp # convert cuda code to hip code + hipcc square.cpp # compile into executable diff --git a/projects/hip-tests/samples/1_Utils/hipBusBandwidth/CMakeLists.txt b/projects/hip-tests/samples/1_Utils/hipBusBandwidth/CMakeLists.txt deleted file mode 100644 index df01c31d97..0000000000 --- a/projects/hip-tests/samples/1_Utils/hipBusBandwidth/CMakeLists.txt +++ /dev/null @@ -1,20 +0,0 @@ -project(hipBusBandwidth) - -cmake_minimum_required(VERSION 3.10) - -# Search for rocm in common locations -list(APPEND CMAKE_PREFIX_PATH /opt/rocm/hip /opt/rocm) - -# Find hip -find_package(hip) - -# Set compiler and linker -set(CMAKE_CXX_COMPILER ${HIP_HIPCC_EXECUTABLE}) -set(CMAKE_CXX_LINKER ${HIP_HIPCC_EXECUTABLE}) -set(CMAKE_BUILD_TYPE Release) - -# Create the excutable -add_executable(hipBusBandwidth hipBusBandwidth.cpp ResultDatabase.cpp) - -# Link with HIP -target_link_libraries(hipBusBandwidth hip::host) \ No newline at end of file diff --git a/projects/hip-tests/samples/1_Utils/hipBusBandwidth/hipBusBandwidth.cpp b/projects/hip-tests/samples/1_Utils/hipBusBandwidth/hipBusBandwidth.cpp index 8032bd0a20..6181c49afe 100644 --- a/projects/hip-tests/samples/1_Utils/hipBusBandwidth/hipBusBandwidth.cpp +++ b/projects/hip-tests/samples/1_Utils/hipBusBandwidth/hipBusBandwidth.cpp @@ -12,7 +12,7 @@ enum MallocMode { MallocPinned, MallocUnpinned, MallocRegistered }; bool p_verbose = false; MallocMode p_malloc_mode = MallocPinned; int p_numa_ctl = -1; -int p_iterations = 0; +int p_iterations = 10; int p_beatsperiteration = 1; int p_device = 0; int p_detailed = 0; @@ -89,9 +89,7 @@ hipError_t memcopy(void* dst, const void* src, size_t sizeBytes, enum hipMemcpyK int sizes[] = {-64, -256, -512, 1, 2, 4, 8, 16, 32, 64, 128, 256, 512, 1024, 2048, 4096, 8192, 16384, 32768, 65536, 131072, 262144, 524288}; int nSizes = sizeof(sizes) / sizeof(int); -// iterations to be run for the corresponding sizes, less number as the size increases -int iterations[] = {1000, 1000, 1000, 1000, 500, 500, 500, 500, 500, 200, 200, 200, - 200, 200, 100, 100, 100, 100, 50, 50, 50, 20, 20}; + // **************************************************************************** // Function: RunBenchmark_H2D @@ -176,48 +174,53 @@ void RunBenchmark_H2D(ResultDatabase& resultDB) { hipEventCreate(&stop); CHECK_HIP_ERROR(); - // store the times temporarily to estimate latency - // float times[nSizes]; - for (int i = 0; i < nSizes; i++) { - int sizeIndex, iterIndex; - sizeIndex = i; - iterIndex = i; + // Three passes, forward and backward both + for (int pass = 0; pass < p_iterations; pass++) { + // store the times temporarily to estimate latency + // float times[nSizes]; + // Step through sizes forward on even passes and backward on odd + for (int i = 0; i < nSizes; i++) { + int sizeIndex; + if ((pass % 2) == 0) + sizeIndex = i; + else + sizeIndex = (nSizes - 1) - i; - const int thisSize = p_onesize ? p_onesize : sizes[sizeIndex]; - const int nbytes = sizeToBytes(thisSize); - const int niter = p_iterations ? p_iterations : iterations[iterIndex]; - for (int pass = 0; pass < niter; pass++) { + const int thisSize = p_onesize ? p_onesize : sizes[sizeIndex]; + const int nbytes = sizeToBytes(thisSize); - hipEventRecord(start, 0); - for (int j = 0; j < p_beatsperiteration; j++) { - memcopy(device, hostMem, nbytes, hipMemcpyHostToDevice); + hipEventRecord(start, 0); + for (int j = 0; j < p_beatsperiteration; j++) { + memcopy(device, hostMem, nbytes, hipMemcpyHostToDevice); + } + hipEventRecord(stop, 0); + hipEventSynchronize(stop); + float t = 0; + hipEventElapsedTime(&t, start, stop); + // times[sizeIndex] = t; + + // Convert to GB/sec + if (p_verbose) { + std::cerr << "size " << sizeToString(thisSize) << " took " << t << " ms\n"; + } + + double speed = + (double(double(sizeToBytes(thisSize)/1000) * p_beatsperiteration) / 1000) / t; + char sizeStr[256]; + if (p_beatsperiteration > 1) { + sprintf(sizeStr, "%9sx%d", sizeToString(thisSize).c_str(), p_beatsperiteration); + } else { + sprintf(sizeStr, "%9s", sizeToString(thisSize).c_str()); + } + resultDB.AddResult(std::string("H2D_Bandwidth") + "_" + mallocModeString(p_malloc_mode), + sizeStr, "GB/sec", speed); + resultDB.AddResult(std::string("H2D_Time") + mallocModeString(p_malloc_mode), sizeStr, + "ms", t); + + if (p_onesize) { + break; + } } - hipEventRecord(stop, 0); - hipEventSynchronize(stop); - float t = 0; - hipEventElapsedTime(&t, start, stop); - // times[sizeIndex] = t; - // Convert to GB/sec - if (p_verbose) { - std::cerr << "size " << sizeToString(thisSize) << " took " << t << " ms\n"; - } - - double speed = - (double(double(sizeToBytes(thisSize)/1000) * p_beatsperiteration) / 1000) / t; - char sizeStr[256]; - if (p_beatsperiteration > 1) { - sprintf(sizeStr, "%9sx%d", sizeToString(thisSize).c_str(), p_beatsperiteration); - } else { - sprintf(sizeStr, "%9s", sizeToString(thisSize).c_str()); - } - resultDB.AddResult(std::string("H2D_Bandwidth") + "_" + mallocModeString(p_malloc_mode), - sizeStr, "GB/sec", speed); - resultDB.AddResult(std::string("H2D_Time") + mallocModeString(p_malloc_mode), sizeStr, "ms", t); - - } - if (p_onesize) { - break; - } } if (p_onesize) { @@ -344,50 +347,53 @@ void RunBenchmark_D2H(ResultDatabase& resultDB) { hipEventCreate(&stop); CHECK_HIP_ERROR(); - // store the times temporarily to estimate latency - // float times[nSizes]; - for (int i = 0; i < nSizes; i++) { - int sizeIndex, iterIndex; - sizeIndex = i; - iterIndex = i; + // Three passes, forward and backward both + for (int pass = 0; pass < p_iterations; pass++) { + // store the times temporarily to estimate latency + // float times[nSizes]; + // Step through sizes forward on even passes and backward on odd + for (int i = 0; i < nSizes; i++) { + int sizeIndex; + if ((pass % 2) == 0) + sizeIndex = i; + else + sizeIndex = (nSizes - 1) - i; - const int thisSize = p_onesize ? p_onesize : sizes[sizeIndex]; - const int nbytes = sizeToBytes(thisSize); - const int niter = p_iterations ? p_iterations : iterations[iterIndex]; - for (int pass = 0; pass < niter; pass++) { + const int thisSize = p_onesize ? p_onesize : sizes[sizeIndex]; + const int nbytes = sizeToBytes(thisSize); - hipEventRecord(start, 0); - for (int j = 0; j < p_beatsperiteration; j++) { - memcopy(hostMem2, device, nbytes, hipMemcpyDeviceToHost); + hipEventRecord(start, 0); + for (int j = 0; j < p_beatsperiteration; j++) { + memcopy(hostMem2, device, nbytes, hipMemcpyDeviceToHost); + } + hipEventRecord(stop, 0); + hipEventSynchronize(stop); + float t = 0; + hipEventElapsedTime(&t, start, stop); + // times[sizeIndex] = t; + + // Convert to GB/sec + if (p_verbose) { + std::cerr << "size " << sizeToString(thisSize) << " took " << t << " ms\n"; + } + + double speed = + (double(double(sizeToBytes(thisSize)/1000) * p_beatsperiteration) / 1000) / t; + char sizeStr[256]; + sprintf(sizeStr, "%9s", sizeToString(thisSize).c_str()); + if (p_beatsperiteration > 1) { + sprintf(sizeStr, "%9sx%d", sizeToString(thisSize).c_str(), p_beatsperiteration); + } else { + sprintf(sizeStr, "%9s", sizeToString(thisSize).c_str()); + } + resultDB.AddResult(std::string("D2H_Bandwidth") + "_" + mallocModeString(p_malloc_mode), + sizeStr, "GB/sec", speed); + resultDB.AddResult(std::string("D2H_Time") + "_" + mallocModeString(p_malloc_mode), + sizeStr, "ms", t); + if (p_onesize) { + break; + } } - hipEventRecord(stop, 0); - hipEventSynchronize(stop); - float t = 0; - hipEventElapsedTime(&t, start, stop); - // times[sizeIndex] = t; - // Convert to GB/sec - if (p_verbose) { - std::cerr << "size " << sizeToString(thisSize) << " took " << t << " ms\n"; - } - - double speed = - (double(double(sizeToBytes(thisSize)/1000) * p_beatsperiteration) / 1000) / t; - char sizeStr[256]; - sprintf(sizeStr, "%9s", sizeToString(thisSize).c_str()); - if (p_beatsperiteration > 1) { - sprintf(sizeStr, "%9sx%d", sizeToString(thisSize).c_str(), p_beatsperiteration); - } else { - sprintf(sizeStr, "%9s", sizeToString(thisSize).c_str()); - } - resultDB.AddResult(std::string("D2H_Bandwidth") + "_" + mallocModeString(p_malloc_mode), - sizeStr, "GB/sec", speed); - resultDB.AddResult(std::string("D2H_Time") + "_" + mallocModeString(p_malloc_mode), - sizeStr, "ms", t); - - } - if (p_onesize) { - break; - } } if (p_onesize) { @@ -516,43 +522,43 @@ void RunBenchmark_Bidir(ResultDatabase& resultDB) { hipStreamCreate(&stream[0]); hipStreamCreate(&stream[1]); - // store the times temporarily to estimate latency - // float times[nSizes]; - for (int i = 0; i < nSizes; i++) { - int sizeIndex, iterIndex; - sizeIndex = i; - iterIndex = i; + // Three passes, forward and backward both + for (int pass = 0; pass < p_iterations; pass++) { + // store the times temporarily to estimate latency + // float times[nSizes]; + // Step through sizes forward on even passes and backward on odd + for (int i = 0; i < nSizes; i++) { + int sizeIndex; + if ((pass % 2) == 0) + sizeIndex = i; + else + sizeIndex = (nSizes - 1) - i; - const int thisSize = p_onesize ? p_onesize : sizes[sizeIndex]; - const int nbytes = sizeToBytes(thisSize); - const int niter = p_iterations ? p_iterations : iterations[iterIndex]; - for (int pass = 0; pass < niter; pass++) { + const int thisSize = p_onesize ? p_onesize : sizes[sizeIndex]; + const int nbytes = sizeToBytes(thisSize); - hipEventRecord(start, 0); - hipMemcpyAsync(deviceMem[0], hostMem[0], nbytes, hipMemcpyHostToDevice, stream[0]); - hipMemcpyAsync(hostMem[1], deviceMem[1], nbytes, hipMemcpyDeviceToHost, stream[1]); - hipEventRecord(stop, 0); - hipEventSynchronize(stop); - float t = 0; - hipEventElapsedTime(&t, start, stop); + hipEventRecord(start, 0); + hipMemcpyAsync(deviceMem[0], hostMem[0], nbytes, hipMemcpyHostToDevice, stream[0]); + hipMemcpyAsync(hostMem[1], deviceMem[1], nbytes, hipMemcpyDeviceToHost, stream[1]); + hipEventRecord(stop, 0); + hipEventSynchronize(stop); + float t = 0; + hipEventElapsedTime(&t, start, stop); - // Convert to GB/sec - if (p_verbose) { - std::cerr << "size " << sizeToString(thisSize) << " took " << t << " ms\n"; + // Convert to GB/sec + if (p_verbose) { + std::cerr << "size " << sizeToString(thisSize) << " took " << t << " ms\n"; + } + + double speed = (double(sizeToBytes(2 * thisSize)) / (1000 * 1000)) / t; + char sizeStr[256]; + sprintf(sizeStr, "%9s", sizeToString(thisSize).c_str()); + resultDB.AddResult( + std::string("Bidir_Bandwidth") + "_" + mallocModeString(p_malloc_mode), sizeStr, + "GB/sec", speed); + resultDB.AddResult(std::string("Bidir_Time") + "_" + mallocModeString(p_malloc_mode), + sizeStr, "ms", t); } - - double speed = (double(sizeToBytes(2 * thisSize)) / (1000 * 1000)) / t; - char sizeStr[256]; - sprintf(sizeStr, "%9s", sizeToString(thisSize).c_str()); - resultDB.AddResult( - std::string("Bidir_Bandwidth") + "_" + mallocModeString(p_malloc_mode), sizeStr, - "GB/sec", speed); - resultDB.AddResult(std::string("Bidir_Time") + "_" + mallocModeString(p_malloc_mode), - sizeStr, "ms", t); - } - if (p_onesize) { - break; - } } // Cleanup @@ -702,63 +708,66 @@ void RunBenchmark_P2P_Unidir(ResultDatabase& resultDB) { hipEventCreate(&stop); CHECK_HIP_ERROR(); - // store the times temporarily to estimate latency - // float times[nSizes]; - for (int i = 0; i < nSizes; i++) { - int sizeIndex, iterIndex; - sizeIndex = i; - iterIndex = i; + // Three passes, forward and backward both + for (int pass = 0; pass < p_iterations; pass++) { + // store the times temporarily to estimate latency + // float times[nSizes]; + // Step through sizes forward on even passes and backward on odd + for (int i = 0; i < nSizes; i++) { + int sizeIndex; + if ((pass % 2) == 0) + sizeIndex = i; + else + sizeIndex = (nSizes - 1) - i; - const int thisSize = p_onesize ? p_onesize : sizes[sizeIndex]; - const int nbytes = sizeToBytes(thisSize); - const int niter = p_iterations ? p_iterations : iterations[iterIndex]; - for (int pass = 0; pass < niter; pass++) { + const int thisSize = p_onesize ? p_onesize : sizes[sizeIndex]; + const int nbytes = sizeToBytes(thisSize); - hipDeviceSynchronize(); + hipDeviceSynchronize(); - hipEventRecord(start, 0); + hipEventRecord(start, 0); - for (int j = 0; j < p_beatsperiteration; j++) { - hipMemcpy(peerGpuMem, currentGpuMem, nbytes, hipMemcpyDeviceToDevice); - } + for (int j = 0; j < p_beatsperiteration; j++) { + hipMemcpy(peerGpuMem, currentGpuMem, nbytes, hipMemcpyDeviceToDevice); + } - hipEventRecord(stop, 0); + hipEventRecord(stop, 0); - hipEventSynchronize(stop); + hipEventSynchronize(stop); - float t = 0; - hipEventElapsedTime(&t, start, stop); - // times[sizeIndex] = t; + float t = 0; + hipEventElapsedTime(&t, start, stop); + // times[sizeIndex] = t; - // Convert to GB/sec - if (p_verbose) { - std::cerr << "size " << sizeToString(thisSize) << " took " << t << " ms\n"; - } + // Convert to GB/sec + if (p_verbose) { + std::cerr << "size " << sizeToString(thisSize) << " took " << t << " ms\n"; + } - double speed = - (double(double(sizeToBytes(thisSize)/1000) * p_beatsperiteration) / 1000) / t; - char sizeStr[256]; - if (p_beatsperiteration > 1) { - sprintf(sizeStr, "%9sx%d", sizeToString(thisSize).c_str(), - p_beatsperiteration); - } else { - sprintf(sizeStr, "%9s", sizeToString(thisSize).c_str()); - } + double speed = + (double(double(sizeToBytes(thisSize)/1000) * p_beatsperiteration) / 1000) / t; + char sizeStr[256]; + if (p_beatsperiteration > 1) { + sprintf(sizeStr, "%9sx%d", sizeToString(thisSize).c_str(), + p_beatsperiteration); + } else { + sprintf(sizeStr, "%9s", sizeToString(thisSize).c_str()); + } - string cGpu, pGpu; - cGpu = gpuIDToString(currentGpu); - pGpu = gpuIDToString(peerGpu); + string cGpu, pGpu; + cGpu = gpuIDToString(currentGpu); + pGpu = gpuIDToString(peerGpu); - resultDB.AddResult(std::string("p2p_uni") + "_gpu" + std::string(cGpu) + - "_gpu" + std::string(pGpu), + resultDB.AddResult(std::string("p2p_uni") + "_gpu" + std::string(cGpu) + + "_gpu" + std::string(pGpu), sizeStr, "GB/sec", speed); - resultDB.AddResult(std::string("P2P_uni") + "_gpu" + std::string(cGpu) + - "_gpu" + std::string(pGpu), + resultDB.AddResult(std::string("P2P_uni") + "_gpu" + std::string(cGpu) + + "_gpu" + std::string(pGpu), sizeStr, "ms", t); - } - if (p_onesize) { - break; + if (p_onesize) { + break; + } } } @@ -820,68 +829,71 @@ void RunBenchmark_P2P_Bidir(ResultDatabase& resultDB) { hipStreamCreate(&stream[0]); hipStreamCreate(&stream[1]); - // store the times temporarily to estimate latency - // float times[nSizes]; - for (int i = 0; i < nSizes; i++) { - int sizeIndex, iterIndex; - sizeIndex = i; - iterIndex = i; + // Three passes, forward and backward both + for (int pass = 0; pass < p_iterations; pass++) { + // store the times temporarily to estimate latency + // float times[nSizes]; + // Step through sizes forward on even passes and backward on odd + for (int i = 0; i < nSizes; i++) { + int sizeIndex; + if ((pass % 2) == 0) + sizeIndex = i; + else + sizeIndex = (nSizes - 1) - i; - const int thisSize = p_onesize ? p_onesize : sizes[sizeIndex]; - const int nbytes = sizeToBytes(thisSize); - const int niter = p_iterations ? p_iterations : iterations[iterIndex]; - for (int pass = 0; pass < niter; pass++) { + const int thisSize = p_onesize ? p_onesize : sizes[sizeIndex]; + const int nbytes = sizeToBytes(thisSize); - hipDeviceSynchronize(); + hipDeviceSynchronize(); - hipEventRecord(start, 0); + hipEventRecord(start, 0); - for (int j = 0; j < p_beatsperiteration; j++) { - hipMemcpyAsync(peerGpuMem[0], currentGpuMem[0], nbytes, - hipMemcpyDeviceToDevice, stream[0]); - hipMemcpyAsync(currentGpuMem[1], peerGpuMem[1], nbytes, - hipMemcpyDeviceToDevice, stream[1]); + for (int j = 0; j < p_beatsperiteration; j++) { + hipMemcpyAsync(peerGpuMem[0], currentGpuMem[0], nbytes, + hipMemcpyDeviceToDevice, stream[0]); + hipMemcpyAsync(currentGpuMem[1], peerGpuMem[1], nbytes, + hipMemcpyDeviceToDevice, stream[1]); + } + + hipEventRecord(stop, 0); + + hipEventSynchronize(stop); + + float t = 0; + hipEventElapsedTime(&t, start, stop); + // times[sizeIndex] = t; + + // Convert to GB/sec + if (p_verbose) { + std::cerr << "size " << sizeToString(thisSize) << " took " << t << " ms\n"; + } + + double speed = + (double(double(sizeToBytes(2 * thisSize)/1000) * p_beatsperiteration) / 1000) / + t; + char sizeStr[256]; + if (p_beatsperiteration > 1) { + sprintf(sizeStr, "%9sx%d", sizeToString(thisSize).c_str(), + p_beatsperiteration); + } else { + sprintf(sizeStr, "%9s", sizeToString(thisSize).c_str()); + } + + string cGpu, pGpu; + cGpu = gpuIDToString(currentGpu); + pGpu = gpuIDToString(peerGpu); + + resultDB.AddResult(std::string("p2p_bi") + "_gpu" + std::string(cGpu) + "_gpu" + + std::string(pGpu), + sizeStr, "GB/sec", speed); + resultDB.AddResult(std::string("P2P_bi") + "_gpu" + std::string(cGpu) + "_gpu" + + std::string(pGpu), + sizeStr, "ms", t); + + if (p_onesize) { + break; + } } - - hipEventRecord(stop, 0); - - hipEventSynchronize(stop); - - float t = 0; - hipEventElapsedTime(&t, start, stop); - // times[sizeIndex] = t; - - // Convert to GB/sec - if (p_verbose) { - std::cerr << "size " << sizeToString(thisSize) << " took " << t << " ms\n"; - } - - double speed = - (double(double(sizeToBytes(2 * thisSize)/1000) * p_beatsperiteration) / 1000) / - t; - char sizeStr[256]; - if (p_beatsperiteration > 1) { - sprintf(sizeStr, "%9sx%d", sizeToString(thisSize).c_str(), - p_beatsperiteration); - } else { - sprintf(sizeStr, "%9s", sizeToString(thisSize).c_str()); - } - - string cGpu, pGpu; - cGpu = gpuIDToString(currentGpu); - pGpu = gpuIDToString(peerGpu); - - resultDB.AddResult(std::string("p2p_bi") + "_gpu" + std::string(cGpu) + "_gpu" + - std::string(pGpu), - sizeStr, "GB/sec", speed); - resultDB.AddResult(std::string("P2P_bi") + "_gpu" + std::string(cGpu) + "_gpu" + - std::string(pGpu), - sizeStr, "ms", t); - - } - if (p_onesize) { - break; - } } if (p_onesize) { diff --git a/projects/hip-tests/samples/1_Utils/hipCommander/CMakeLists.txt b/projects/hip-tests/samples/1_Utils/hipCommander/CMakeLists.txt deleted file mode 100644 index 2592020c66..0000000000 --- a/projects/hip-tests/samples/1_Utils/hipCommander/CMakeLists.txt +++ /dev/null @@ -1,31 +0,0 @@ -project(hipCommander) - -cmake_minimum_required(VERSION 3.10) - -# Search for rocm in common locations -list(APPEND CMAKE_PREFIX_PATH /opt/rocm/hip /opt/rocm) - -# Find hip -find_package(hip) - -# Set compiler and linker -set(CMAKE_CXX_COMPILER ${HIP_HIPCC_EXECUTABLE}) -set(CMAKE_CXX_LINKER ${HIP_HIPCC_EXECUTABLE}) -set(CMAKE_BUILD_TYPE Release) - -# Create the excutable -add_executable(hipCommander hipCommander.cpp) - -# Generate code object -add_custom_target( - codeobj - ALL - COMMAND ${HIP_HIPCC_EXECUTABLE} --genco ../nullkernel.hip.cpp -o nullkernel.hsaco - COMMENT "codeobj generated" -) - -add_dependencies(hipCommander codeobj) - -# Link with HIP -target_link_libraries(hipCommander hip::host) -set_property(TARGET hipCommander PROPERTY CXX_STANDARD 11) diff --git a/projects/hip-tests/samples/1_Utils/hipDispatchLatency/CMakeLists.txt b/projects/hip-tests/samples/1_Utils/hipDispatchLatency/CMakeLists.txt deleted file mode 100644 index b267f91905..0000000000 --- a/projects/hip-tests/samples/1_Utils/hipDispatchLatency/CMakeLists.txt +++ /dev/null @@ -1,35 +0,0 @@ -project(hipDispatchLatency) - -cmake_minimum_required(VERSION 3.10) - -# Search for rocm in common locations -list(APPEND CMAKE_PREFIX_PATH /opt/rocm/hip /opt/rocm) - -# Find hip -find_package(hip) - -# Set compiler and linker -set(CMAKE_CXX_COMPILER ${HIP_HIPCC_EXECUTABLE}) -set(CMAKE_CXX_LINKER ${HIP_HIPCC_EXECUTABLE}) -set(CMAKE_BUILD_TYPE Release) - -# Create the excutable -add_executable(hipDispatchLatency hipDispatchLatency.cpp) -add_executable(hipDispatchEnqueueRateMT hipDispatchEnqueueRateMT.cpp) - -# Generate code object -add_custom_target( - codeobj - ALL - COMMAND ${HIP_HIPCC_EXECUTABLE} --genco ../test_kernel.cpp -o test_kernel.code - COMMENT "codeobj generated" -) - -add_dependencies(hipDispatchLatency codeobj) -add_dependencies(hipDispatchEnqueueRateMT codeobj) - -# Link with HIP -target_link_libraries(hipDispatchLatency hip::host) -target_link_libraries(hipDispatchEnqueueRateMT hip::host) -set_property(TARGET hipDispatchLatency PROPERTY CXX_STANDARD 11) -set_property(TARGET hipDispatchEnqueueRateMT PROPERTY CXX_STANDARD 11) diff --git a/projects/hip-tests/samples/1_Utils/hipInfo/CMakeLists.txt b/projects/hip-tests/samples/1_Utils/hipInfo/CMakeLists.txt deleted file mode 100644 index f3678d3160..0000000000 --- a/projects/hip-tests/samples/1_Utils/hipInfo/CMakeLists.txt +++ /dev/null @@ -1,20 +0,0 @@ -project(hipInfo) - -cmake_minimum_required(VERSION 3.10) - -# Search for rocm in common locations -list(APPEND CMAKE_PREFIX_PATH /opt/rocm/hip /opt/rocm) - -# Find hip -find_package(hip) - -# Set compiler and linker -set(CMAKE_CXX_COMPILER ${HIP_HIPCC_EXECUTABLE}) -set(CMAKE_CXX_LINKER ${HIP_HIPCC_EXECUTABLE}) -set(CMAKE_BUILD_TYPE Release) - -# Create the excutable -add_executable(hipInfo hipInfo.cpp) - -# Link with HIP -target_link_libraries(hipInfo hip::host) diff --git a/projects/hip-tests/samples/2_Cookbook/0_MatrixTranspose/CMakeLists.txt b/projects/hip-tests/samples/2_Cookbook/0_MatrixTranspose/CMakeLists.txt deleted file mode 100644 index de5bb0b5ea..0000000000 --- a/projects/hip-tests/samples/2_Cookbook/0_MatrixTranspose/CMakeLists.txt +++ /dev/null @@ -1,20 +0,0 @@ -project(MatrixTranspose) - -cmake_minimum_required(VERSION 3.10) - -# Search for rocm in common locations -list(APPEND CMAKE_PREFIX_PATH /opt/rocm/hip /opt/rocm) - -# Find hip -find_package(hip) - -# Set compiler and linker -set(CMAKE_CXX_COMPILER ${HIP_HIPCC_EXECUTABLE}) -set(CMAKE_CXX_LINKER ${HIP_HIPCC_EXECUTABLE}) -set(CMAKE_BUILD_TYPE Release) - -# Create the excutable -add_executable(MatrixTranspose MatrixTranspose.cpp) - -# Link with HIP -target_link_libraries(MatrixTranspose hip::host) diff --git a/projects/hip-tests/samples/2_Cookbook/10_inline_asm/CMakeLists.txt b/projects/hip-tests/samples/2_Cookbook/10_inline_asm/CMakeLists.txt deleted file mode 100644 index 7adb51f5de..0000000000 --- a/projects/hip-tests/samples/2_Cookbook/10_inline_asm/CMakeLists.txt +++ /dev/null @@ -1,20 +0,0 @@ -project(inline_asm) - -cmake_minimum_required(VERSION 3.10) - -# Search for rocm in common locations -list(APPEND CMAKE_PREFIX_PATH /opt/rocm/hip /opt/rocm) - -# Find hip -find_package(hip) - -# Set compiler and linker -set(CMAKE_CXX_COMPILER ${HIP_HIPCC_EXECUTABLE}) -set(CMAKE_CXX_LINKER ${HIP_HIPCC_EXECUTABLE}) -set(CMAKE_BUILD_TYPE Release) - -# Create the excutable -add_executable(inline_asm inline_asm.cpp) - -# Link with HIP -target_link_libraries(inline_asm hip::host) diff --git a/projects/hip-tests/samples/2_Cookbook/11_texture_driver/CMakeLists.txt b/projects/hip-tests/samples/2_Cookbook/11_texture_driver/CMakeLists.txt deleted file mode 100644 index 8ff242c993..0000000000 --- a/projects/hip-tests/samples/2_Cookbook/11_texture_driver/CMakeLists.txt +++ /dev/null @@ -1,30 +0,0 @@ -project(texture2dDrv) - -cmake_minimum_required(VERSION 3.10) - -# Search for rocm in common locations -list(APPEND CMAKE_PREFIX_PATH /opt/rocm/hip /opt/rocm) - -# Find hip -find_package(hip) - -# Set compiler and linker -set(CMAKE_CXX_COMPILER ${HIP_HIPCC_EXECUTABLE}) -set(CMAKE_CXX_LINKER ${HIP_HIPCC_EXECUTABLE}) -set(CMAKE_BUILD_TYPE Release) - -# Create the excutable -add_executable(texture2dDrv texture2dDrv.cpp) - -# Generate code object -add_custom_target( - codeobj - ALL - COMMAND ${HIP_HIPCC_EXECUTABLE} --genco ../tex2dKernel.cpp -o tex2dKernel.code - COMMENT "codeobj generated" -) - -add_dependencies(texture2dDrv codeobj) - -# Link with HIP -target_link_libraries(texture2dDrv hip::host) diff --git a/projects/hip-tests/samples/2_Cookbook/13_occupancy/CMakeLists.txt b/projects/hip-tests/samples/2_Cookbook/13_occupancy/CMakeLists.txt deleted file mode 100644 index 6cad76a395..0000000000 --- a/projects/hip-tests/samples/2_Cookbook/13_occupancy/CMakeLists.txt +++ /dev/null @@ -1,20 +0,0 @@ -project(occupancy) - -cmake_minimum_required(VERSION 3.10) - -# Search for rocm in common locations -list(APPEND CMAKE_PREFIX_PATH /opt/rocm/hip /opt/rocm) - -# Find hip -find_package(hip) - -# Set compiler and linker -set(CMAKE_CXX_COMPILER ${HIP_HIPCC_EXECUTABLE}) -set(CMAKE_CXX_LINKER ${HIP_HIPCC_EXECUTABLE}) -set(CMAKE_BUILD_TYPE Release) - -# Create the excutable -add_executable(occupancy occupancy.cpp) - -# Link with HIP -target_link_libraries(occupancy hip::host) diff --git a/projects/hip-tests/samples/2_Cookbook/1_hipEvent/CMakeLists.txt b/projects/hip-tests/samples/2_Cookbook/1_hipEvent/CMakeLists.txt deleted file mode 100644 index 6f6ee4e050..0000000000 --- a/projects/hip-tests/samples/2_Cookbook/1_hipEvent/CMakeLists.txt +++ /dev/null @@ -1,20 +0,0 @@ -project(hipEvent) - -cmake_minimum_required(VERSION 3.10) - -# Search for rocm in common locations -list(APPEND CMAKE_PREFIX_PATH /opt/rocm/hip /opt/rocm) - -# Find hip -find_package(hip) - -# Set compiler and linker -set(CMAKE_CXX_COMPILER ${HIP_HIPCC_EXECUTABLE}) -set(CMAKE_CXX_LINKER ${HIP_HIPCC_EXECUTABLE}) -set(CMAKE_BUILD_TYPE Release) - -# Create the excutable -add_executable(hipEvent hipEvent.cpp) - -# Link with HIP -target_link_libraries(hipEvent hip::host) diff --git a/projects/hip-tests/samples/2_Cookbook/3_shared_memory/CMakeLists.txt b/projects/hip-tests/samples/2_Cookbook/3_shared_memory/CMakeLists.txt deleted file mode 100644 index 6401488628..0000000000 --- a/projects/hip-tests/samples/2_Cookbook/3_shared_memory/CMakeLists.txt +++ /dev/null @@ -1,20 +0,0 @@ -project(sharedMemory) - -cmake_minimum_required(VERSION 3.10) - -# Search for rocm in common locations -list(APPEND CMAKE_PREFIX_PATH /opt/rocm/hip /opt/rocm) - -# Find hip -find_package(hip) - -# Set compiler and linker -set(CMAKE_CXX_COMPILER ${HIP_HIPCC_EXECUTABLE}) -set(CMAKE_CXX_LINKER ${HIP_HIPCC_EXECUTABLE}) -set(CMAKE_BUILD_TYPE Release) - -# Create the excutable -add_executable(sharedMemory sharedMemory.cpp) - -# Link with HIP -target_link_libraries(sharedMemory hip::host) diff --git a/projects/hip-tests/samples/2_Cookbook/4_shfl/CMakeLists.txt b/projects/hip-tests/samples/2_Cookbook/4_shfl/CMakeLists.txt deleted file mode 100644 index 9d142eeb02..0000000000 --- a/projects/hip-tests/samples/2_Cookbook/4_shfl/CMakeLists.txt +++ /dev/null @@ -1,20 +0,0 @@ -project(shfl) - -cmake_minimum_required(VERSION 3.10) - -# Search for rocm in common locations -list(APPEND CMAKE_PREFIX_PATH /opt/rocm/hip /opt/rocm) - -# Find hip -find_package(hip) - -# Set compiler and linker -set(CMAKE_CXX_COMPILER ${HIP_HIPCC_EXECUTABLE}) -set(CMAKE_CXX_LINKER ${HIP_HIPCC_EXECUTABLE}) -set(CMAKE_BUILD_TYPE Release) - -# Create the excutable -add_executable(shfl shfl.cpp) - -# Link with HIP -target_link_libraries(shfl hip::host) diff --git a/projects/hip-tests/samples/2_Cookbook/5_2dshfl/CMakeLists.txt b/projects/hip-tests/samples/2_Cookbook/5_2dshfl/CMakeLists.txt deleted file mode 100644 index adc0e3595d..0000000000 --- a/projects/hip-tests/samples/2_Cookbook/5_2dshfl/CMakeLists.txt +++ /dev/null @@ -1,19 +0,0 @@ -project(2dshfl) - -cmake_minimum_required(VERSION 3.10) - -# Search for rocm in common locations -list(APPEND CMAKE_PREFIX_PATH /opt/rocm/hip /opt/rocm) - -# Find hip -find_package(hip) - -# Set compiler and linker -set(CMAKE_CXX_COMPILER ${HIP_HIPCC_EXECUTABLE}) -set(CMAKE_CXX_LINKER ${HIP_HIPCC_EXECUTABLE}) - -# Create the excutable -add_executable(2dshfl 2dshfl.cpp) - -# Link with HIP -target_link_libraries(2dshfl hip::host) diff --git a/projects/hip-tests/samples/2_Cookbook/6_dynamic_shared/CMakeLists.txt b/projects/hip-tests/samples/2_Cookbook/6_dynamic_shared/CMakeLists.txt deleted file mode 100644 index f177952d5a..0000000000 --- a/projects/hip-tests/samples/2_Cookbook/6_dynamic_shared/CMakeLists.txt +++ /dev/null @@ -1,19 +0,0 @@ -project(dynamic_shared) - -cmake_minimum_required(VERSION 3.10) - -# Search for rocm in common locations -list(APPEND CMAKE_PREFIX_PATH /opt/rocm/hip /opt/rocm) - -# Find hip -find_package(hip) - -# Set compiler and linker -set(CMAKE_CXX_COMPILER ${HIP_HIPCC_EXECUTABLE}) -set(CMAKE_CXX_LINKER ${HIP_HIPCC_EXECUTABLE}) - -# Create the excutable -add_executable(dynamic_shared dynamic_shared.cpp) - -# Link with HIP -target_link_libraries(dynamic_shared hip::host) diff --git a/projects/hip-tests/samples/2_Cookbook/7_streams/CMakeLists.txt b/projects/hip-tests/samples/2_Cookbook/7_streams/CMakeLists.txt deleted file mode 100644 index fac4187b47..0000000000 --- a/projects/hip-tests/samples/2_Cookbook/7_streams/CMakeLists.txt +++ /dev/null @@ -1,19 +0,0 @@ -project(stream) - -cmake_minimum_required(VERSION 3.10) - -# Search for rocm in common locations -list(APPEND CMAKE_PREFIX_PATH /opt/rocm/hip /opt/rocm) - -# Find hip -find_package(hip) - -# Set compiler and linker -set(CMAKE_CXX_COMPILER ${HIP_HIPCC_EXECUTABLE}) -set(CMAKE_CXX_LINKER ${HIP_HIPCC_EXECUTABLE}) - -# Create the excutable -add_executable(stream stream.cpp) - -# Link with HIP -target_link_libraries(stream hip::host) diff --git a/projects/hip-tests/samples/2_Cookbook/8_peer2peer/CMakeLists.txt b/projects/hip-tests/samples/2_Cookbook/8_peer2peer/CMakeLists.txt deleted file mode 100644 index 7c38373911..0000000000 --- a/projects/hip-tests/samples/2_Cookbook/8_peer2peer/CMakeLists.txt +++ /dev/null @@ -1,19 +0,0 @@ -project(peer2peer) - -cmake_minimum_required(VERSION 3.10) - -# Search for rocm in common locations -list(APPEND CMAKE_PREFIX_PATH /opt/rocm/hip /opt/rocm) - -# Find hip -find_package(hip) - -# Set compiler and linker -set(CMAKE_CXX_COMPILER ${HIP_HIPCC_EXECUTABLE}) -set(CMAKE_CXX_LINKER ${HIP_HIPCC_EXECUTABLE}) - -# Create the excutable -add_executable(peer2peer peer2peer.cpp) - -# Link with HIP -target_link_libraries(peer2peer hip::host) diff --git a/projects/hip-tests/samples/2_Cookbook/9_unroll/CMakeLists.txt b/projects/hip-tests/samples/2_Cookbook/9_unroll/CMakeLists.txt deleted file mode 100644 index fc1b740e33..0000000000 --- a/projects/hip-tests/samples/2_Cookbook/9_unroll/CMakeLists.txt +++ /dev/null @@ -1,19 +0,0 @@ -project(unroll) - -cmake_minimum_required(VERSION 3.10) - -# Search for rocm in common locations -list(APPEND CMAKE_PREFIX_PATH /opt/rocm/hip /opt/rocm) - -# Find hip -find_package(hip) - -# Set compiler and linker -set(CMAKE_CXX_COMPILER ${HIP_HIPCC_EXECUTABLE}) -set(CMAKE_CXX_LINKER ${HIP_HIPCC_EXECUTABLE}) - -# Create the excutable -add_executable(unroll unroll.cpp) - -# Link with HIP -target_link_libraries(unroll hip::host) diff --git a/projects/hip-tests/samples/README.md b/projects/hip-tests/samples/README.md deleted file mode 100644 index 739045382e..0000000000 --- a/projects/hip-tests/samples/README.md +++ /dev/null @@ -1,27 +0,0 @@ -Build procedure - -We provide Makefile and CMakeLists.txt to build the samples seperately. - -1.Makefile supports shared lib of hip-rocclr runtime and nvcc. - -To build a sample, just type in sample folder, - -make - - - -2.CMakeLists.txt can support shared and static libs of hip-rocclr runtime. - -To build a sample, type in sample folder, - -mkdir build (if build folder is missing) - -cd build - -cmake .. - -make - -If you want debug version, follow, - -cmake -DCMAKE_BUILD_TYPE=Debug .. \ No newline at end of file