diff --git a/hipamd/tests/performance/memory/hipPerfSampleRate.cpp b/hipamd/tests/performance/memory/hipPerfSampleRate.cpp index 040903e67a..15141998d8 100644 --- a/hipamd/tests/performance/memory/hipPerfSampleRate.cpp +++ b/hipamd/tests/performance/memory/hipPerfSampleRate.cpp @@ -1,308 +1,308 @@ -/* - Copyright (c) 2015-present Advanced Micro Devices, Inc. All rights reserved. - Permission is hereby granted, free of charge, to any person obtaining a copy - of this software and associated documentation files (the "Software"), to deal - in the Software without restriction, including without limitation the rights - to use, copy, modify, merge, publish, distribute, sublicense, and/or sell - copies of the Software, and to permit persons to whom the Software is - furnished to do so, subject to the following conditions: - The above copyright notice and this permission notice shall be included in - all copies or substantial portions of the Software. - THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN - THE SOFTWARE. - */ - -/* HIT_START - * BUILD: %t %s ../../src/test_common.cpp EXCLUDE_HIP_PLATFORM nvidia - * TEST: %t - * HIT_END - */ - -#include -#include -#include "test_common.h" -#include -#include - -using namespace std; - -#define NUM_TYPES 3 -vector types= {"float", "float2", "float4"}; -vector typeSizes = {4, 8, 16}; - -#define NUM_SIZES 12 -vector sizes = {1, 2, 4, 8, 16, 32, - 64, 128, 256, 512, 1024, 2048}; - -#define NUM_BUFS 6 -#define MAX_BUFS (1 << (NUM_BUFS - 1)) - -template -__global__ void sampleRate(T * outBuffer, unsigned int inBufSize, unsigned int writeIt, - T **inBuffer, int numBufs) { - - uint gid = (blockIdx.x * blockDim.x + threadIdx.x); - uint inputIdx = gid % inBufSize; - - T tmp = (T)0.0f; - for(int i = 0; i < numBufs; i++) { - tmp += *(*(inBuffer+i)+inputIdx); - } - - if (writeIt*(unsigned int)tmp.x) { - outBuffer[gid] = tmp; - } -}; - -template -__global__ void sampleRateFloat(T * outBuffer, unsigned int inBufSize, unsigned int writeIt, - T ** inBuffer, int numBufs) { - - uint gid = (blockIdx.x * blockDim.x + threadIdx.x); - uint inputIdx = gid % inBufSize; - - T tmp = (T)0.0f; - - for(int i = 0; i < numBufs; i++) { - tmp += *((*inBuffer+i)+inputIdx); - } - - if (writeIt*(unsigned int)tmp) { - outBuffer[gid] = tmp; - } -}; - -class hipPerfSampleRate { - public: - hipPerfSampleRate(); - ~hipPerfSampleRate(); - - void open(void); - void run(unsigned int testCase); - void close(void); - - // array of funtion pointers - typedef void (hipPerfSampleRate::*funPtr)(void * outBuffer, unsigned int - inBufSize, unsigned int writeIt, void **inBuffer, int numBufs, int grids, int blocks, - int threads_per_block); - - // Wrappers - void float_kernel(void * outBuffer, unsigned int - inBufSize, unsigned int writeIt, void **inBuffer, int numBufs, int grids, int blocks, - int threads_per_block); - - void float2_kernel(void * outBuffer, unsigned int - inBufSize, unsigned int writeIt, void **inBuffer, int numBufs, int grids, int blocks, - int threads_per_block); - - void float4_kernel(void * outBuffer, unsigned int - inBufSize, unsigned int writeIt, void **inBuffer, int numBufs, int grids, int blocks, - int threads_per_block); - - private: - void setData(void *ptr, unsigned int value); - void checkData(uint *ptr); - - unsigned int width_; - unsigned int bufSize_; - unsigned long long totalIters = 0; - int numCUs; - - unsigned int outBufSize_; - static const unsigned int MAX_ITERATIONS = 25; - unsigned int numBufs_; - unsigned int typeIdx_; -}; - - -hipPerfSampleRate::hipPerfSampleRate() {} - -hipPerfSampleRate::~hipPerfSampleRate() {} - -void hipPerfSampleRate::open(void) { - - int nGpu = 0; - HIPCHECK(hipGetDeviceCount(&nGpu)); - if (nGpu < 1) { - std::cout << "info: didn't find any GPU! skipping the test!\n"; - passed(); - return; - } - - int deviceId = 0; - hipDeviceProp_t props = {0}; - props = {0}; - HIPCHECK(hipSetDevice(deviceId)); - HIPCHECK(hipGetDeviceProperties(&props, deviceId)); - std::cout << "info: running on bus " << "0x" << props.pciBusID << " " << props.name - << " with " << props.multiProcessorCount << " CUs" << " and device id: " << deviceId - << std::endl; - numCUs = props.multiProcessorCount; - } - - -void hipPerfSampleRate::close() { - -} - - -// Wrappers for the kernel launches -void hipPerfSampleRate::float_kernel(void * outBuffer, unsigned int inBufSize, - unsigned int writeIt, void **inBuffer, - int numBufs, int grids, int blocks, int threads_per_block) { - - hipLaunchKernelGGL(sampleRateFloat, dim3(grids, grids, grids), dim3 (blocks), 0, 0, - (float*)outBuffer, inBufSize, writeIt, (float**)inBuffer, numBufs); - -} - -void hipPerfSampleRate::float2_kernel(void * outBuffer, unsigned int inBufSize, - unsigned int writeIt, void **inBuffer, - int grids, int blocks, int threads_per_block, int numBufs) { - - hipLaunchKernelGGL(sampleRate, dim3(grids, grids, grids), dim3(blocks), 0, 0, - (float2 *)outBuffer, inBufSize, writeIt, (float2**)inBuffer, numBufs); -} - -void hipPerfSampleRate::float4_kernel(void * outBuffer, unsigned int inBufSize, - unsigned int writeIt, void **inBuffer, - int grids, int blocks, int threads_per_block, int numBufs) { - - hipLaunchKernelGGL(sampleRate, dim3(grids, grids, grids), dim3(blocks), 0, 0, - (float4 *) outBuffer, inBufSize, writeIt, (float4**)inBuffer, numBufs); -} - -void hipPerfSampleRate::run(unsigned int test) { - - funPtr p[] = {&hipPerfSampleRate::float_kernel, &hipPerfSampleRate::float2_kernel, - &hipPerfSampleRate::float4_kernel}; - - // We compute a square domain - width_ = sizes[test % NUM_SIZES]; - typeIdx_ = (test / NUM_SIZES) % NUM_TYPES; - bufSize_ = width_ * width_ * typeSizes[typeIdx_]; - numBufs_ = (1 << (test / (NUM_SIZES * NUM_TYPES))); - - void * hOutPtr; - void * dOutPtr; - void * hInPtr[numBufs_]; - void ** dPtr; - void * dInPtr[numBufs_]; - - outBufSize_ = - sizes[NUM_SIZES - 1] * sizes[NUM_SIZES - 1] * typeSizes[NUM_TYPES - 1]; - - // Allocate memory on the host and device - HIPCHECK(hipHostMalloc((void **)&hOutPtr, outBufSize_, hipHostMallocDefault)); - setData((void *)hOutPtr, 0xdeadbeef); - HIPCHECK(hipMalloc((uint **)&dOutPtr, outBufSize_)); - - // Allocate 2D array in Device - hipMalloc((void **)&dPtr, numBufs_* sizeof(void *)); - - for (uint i = 0; i < numBufs_; i++) { - HIPCHECK(hipHostMalloc((void **)&hInPtr[i], bufSize_, hipHostMallocDefault)); - HIPCHECK(hipMalloc((uint **)&dInPtr[i], bufSize_)); - setData(hInPtr[i], 0x3f800000); - } - - // Populate array of pointers with array addresses - hipMemcpy(dPtr, dInPtr, numBufs_* sizeof(void *), hipMemcpyHostToDevice); - - // Copy memory from host to device - for (uint i = 0; i < numBufs_; i++) { - HIPCHECK(hipMemcpy(dInPtr[i], hInPtr[i], bufSize_, hipMemcpyHostToDevice)); - } - - HIPCHECK(hipMemcpy(dOutPtr, hOutPtr, outBufSize_, hipMemcpyHostToDevice)); - - // Prepare kernel launch parameters - // outBufSize_/sizeof(uint) - Grid size in 3D - int grids = 64; - int blocks = 64; - int threads_per_block = 1; - - unsigned int maxIter = MAX_ITERATIONS * (MAX_BUFS / numBufs_); - unsigned int sizeDW = width_ * width_; - unsigned int writeIt = 0; - - int idx = 0; - - if (!types[typeIdx_].compare("float")) { - idx = 0; - } - else if(!types[typeIdx_].compare("float2")) { - idx = 1; - } - else if(!types[typeIdx_].compare("float4")) { - idx = 2; - } - - - // Time the kernel execution - auto all_start = std::chrono::steady_clock::now(); - for (uint i = 0; i < maxIter; i++) { - (this->*p[idx]) ((void *)dOutPtr, sizeDW, writeIt, dPtr, numBufs_, grids, blocks, - threads_per_block); - } - - hipDeviceSynchronize(); - auto all_end = std::chrono::steady_clock::now(); - std::chrono::duration all_kernel_time = all_end - all_start; - - double perf = ((double)outBufSize_ * numBufs_ * (double)maxIter * (double)(1e-09)) / - all_kernel_time.count(); - - cout << "Domain " << sizes[NUM_SIZES - 1] << "x"<< sizes[NUM_SIZES - 1] << " bufs " - << numBufs_ << " " << types[typeIdx_] << " " << width_<<"x"< +#include +#include "test_common.h" +#include +#include + +using namespace std; + +#define NUM_TYPES 3 +vector types= {"float", "float2", "float4"}; +vector typeSizes = {4, 8, 16}; + +#define NUM_SIZES 12 +vector sizes = {1, 2, 4, 8, 16, 32, + 64, 128, 256, 512, 1024, 2048}; + +#define NUM_BUFS 6 +#define MAX_BUFS (1 << (NUM_BUFS - 1)) + +template +__global__ void sampleRate(T * outBuffer, unsigned int inBufSize, unsigned int writeIt, + T **inBuffer, int numBufs) { + + uint gid = (blockIdx.x * blockDim.x + threadIdx.x); + uint inputIdx = gid % inBufSize; + + T tmp = (T)0.0f; + for(int i = 0; i < numBufs; i++) { + tmp += *(*(inBuffer+i)+inputIdx); + } + + if (writeIt*(unsigned int)tmp.x) { + outBuffer[gid] = tmp; + } +}; + +template +__global__ void sampleRateFloat(T * outBuffer, unsigned int inBufSize, unsigned int writeIt, + T ** inBuffer, int numBufs) { + + uint gid = (blockIdx.x * blockDim.x + threadIdx.x); + uint inputIdx = gid % inBufSize; + + T tmp = (T)0.0f; + + for(int i = 0; i < numBufs; i++) { + tmp += *((*inBuffer+i)+inputIdx); + } + + if (writeIt*(unsigned int)tmp) { + outBuffer[gid] = tmp; + } +}; + +class hipPerfSampleRate { + public: + hipPerfSampleRate(); + ~hipPerfSampleRate(); + + void open(void); + void run(unsigned int testCase); + void close(void); + + // array of funtion pointers + typedef void (hipPerfSampleRate::*funPtr)(void * outBuffer, unsigned int + inBufSize, unsigned int writeIt, void **inBuffer, int numBufs, int grids, int blocks, + int threads_per_block); + + // Wrappers + void float_kernel(void * outBuffer, unsigned int + inBufSize, unsigned int writeIt, void **inBuffer, int numBufs, int grids, int blocks, + int threads_per_block); + + void float2_kernel(void * outBuffer, unsigned int + inBufSize, unsigned int writeIt, void **inBuffer, int numBufs, int grids, int blocks, + int threads_per_block); + + void float4_kernel(void * outBuffer, unsigned int + inBufSize, unsigned int writeIt, void **inBuffer, int numBufs, int grids, int blocks, + int threads_per_block); + + private: + void setData(void *ptr, unsigned int value); + void checkData(uint *ptr); + + unsigned int width_; + unsigned int bufSize_; + unsigned long long totalIters = 0; + int numCUs; + + unsigned int outBufSize_; + static const unsigned int MAX_ITERATIONS = 25; + unsigned int numBufs_; + unsigned int typeIdx_; +}; + + +hipPerfSampleRate::hipPerfSampleRate() {} + +hipPerfSampleRate::~hipPerfSampleRate() {} + +void hipPerfSampleRate::open(void) { + + int nGpu = 0; + HIPCHECK(hipGetDeviceCount(&nGpu)); + if (nGpu < 1) { + std::cout << "info: didn't find any GPU! skipping the test!\n"; + passed(); + return; + } + + int deviceId = 0; + hipDeviceProp_t props = {0}; + props = {0}; + HIPCHECK(hipSetDevice(deviceId)); + HIPCHECK(hipGetDeviceProperties(&props, deviceId)); + std::cout << "info: running on bus " << "0x" << props.pciBusID << " " << props.name + << " with " << props.multiProcessorCount << " CUs" << " and device id: " << deviceId + << std::endl; + numCUs = props.multiProcessorCount; + } + + +void hipPerfSampleRate::close() { + +} + + +// Wrappers for the kernel launches +void hipPerfSampleRate::float_kernel(void * outBuffer, unsigned int inBufSize, + unsigned int writeIt, void **inBuffer, + int numBufs, int grids, int blocks, int threads_per_block) { + + hipLaunchKernelGGL(sampleRateFloat, dim3(grids, grids, grids), dim3 (blocks), 0, 0, + (float*)outBuffer, inBufSize, writeIt, (float**)inBuffer, numBufs); + +} + +void hipPerfSampleRate::float2_kernel(void * outBuffer, unsigned int inBufSize, + unsigned int writeIt, void **inBuffer, + int grids, int blocks, int threads_per_block, int numBufs) { + + hipLaunchKernelGGL(sampleRate, dim3(grids, grids, grids), dim3(blocks), 0, 0, + (float2 *)outBuffer, inBufSize, writeIt, (float2**)inBuffer, numBufs); +} + +void hipPerfSampleRate::float4_kernel(void * outBuffer, unsigned int inBufSize, + unsigned int writeIt, void **inBuffer, + int grids, int blocks, int threads_per_block, int numBufs) { + + hipLaunchKernelGGL(sampleRate, dim3(grids, grids, grids), dim3(blocks), 0, 0, + (float4 *) outBuffer, inBufSize, writeIt, (float4**)inBuffer, numBufs); +} + +void hipPerfSampleRate::run(unsigned int test) { + + funPtr p[] = {&hipPerfSampleRate::float_kernel, &hipPerfSampleRate::float2_kernel, + &hipPerfSampleRate::float4_kernel}; + + // We compute a square domain + width_ = sizes[test % NUM_SIZES]; + typeIdx_ = (test / NUM_SIZES) % NUM_TYPES; + bufSize_ = width_ * width_ * typeSizes[typeIdx_]; + numBufs_ = (1 << (test / (NUM_SIZES * NUM_TYPES))); + + void * hOutPtr; + void * dOutPtr; + void * hInPtr[numBufs_]; + void ** dPtr; + void * dInPtr[numBufs_]; + + outBufSize_ = + sizes[NUM_SIZES - 1] * sizes[NUM_SIZES - 1] * typeSizes[NUM_TYPES - 1]; + + // Allocate memory on the host and device + HIPCHECK(hipHostMalloc((void **)&hOutPtr, outBufSize_, hipHostMallocDefault)); + setData((void *)hOutPtr, 0xdeadbeef); + HIPCHECK(hipMalloc((uint **)&dOutPtr, outBufSize_)); + + // Allocate 2D array in Device + hipMalloc((void **)&dPtr, numBufs_* sizeof(void *)); + + for (uint i = 0; i < numBufs_; i++) { + HIPCHECK(hipHostMalloc((void **)&hInPtr[i], bufSize_, hipHostMallocDefault)); + HIPCHECK(hipMalloc((uint **)&dInPtr[i], bufSize_)); + setData(hInPtr[i], 0x3f800000); + } + + // Populate array of pointers with array addresses + hipMemcpy(dPtr, dInPtr, numBufs_* sizeof(void *), hipMemcpyHostToDevice); + + // Copy memory from host to device + for (uint i = 0; i < numBufs_; i++) { + HIPCHECK(hipMemcpy(dInPtr[i], hInPtr[i], bufSize_, hipMemcpyHostToDevice)); + } + + HIPCHECK(hipMemcpy(dOutPtr, hOutPtr, outBufSize_, hipMemcpyHostToDevice)); + + // Prepare kernel launch parameters + // outBufSize_/sizeof(uint) - Grid size in 3D + int grids = 64; + int blocks = 64; + int threads_per_block = 1; + + unsigned int maxIter = MAX_ITERATIONS * (MAX_BUFS / numBufs_); + unsigned int sizeDW = width_ * width_; + unsigned int writeIt = 0; + + int idx = 0; + + if (!types[typeIdx_].compare("float")) { + idx = 0; + } + else if(!types[typeIdx_].compare("float2")) { + idx = 1; + } + else if(!types[typeIdx_].compare("float4")) { + idx = 2; + } + + + // Time the kernel execution + auto all_start = std::chrono::steady_clock::now(); + for (uint i = 0; i < maxIter; i++) { + (this->*p[idx]) ((void *)dOutPtr, sizeDW, writeIt, dPtr, numBufs_, grids, blocks, + threads_per_block); + } + + hipDeviceSynchronize(); + auto all_end = std::chrono::steady_clock::now(); + std::chrono::duration all_kernel_time = all_end - all_start; + + double perf = ((double)outBufSize_ * numBufs_ * (double)maxIter * (double)(1e-09)) / + all_kernel_time.count(); + + cout << "Domain " << sizes[NUM_SIZES - 1] << "x"<< sizes[NUM_SIZES - 1] << " bufs " + << numBufs_ << " " << types[typeIdx_] << " " << width_<<"x"< -#include -#include "test_common.h" - -typedef struct { - double x; - double y; - double width; -} coordRec; - -static coordRec coords[] = { - {0.0, 0.0, 0.00001}, // All black -}; - -static unsigned int numCoords = sizeof(coords) / sizeof(coordRec); - -__global__ void mandelbrot(uint *out, uint width, float xPos, float yPos, float xStep, - float yStep, uint maxIter) { - - int tid = (blockIdx.x * blockDim.x + threadIdx.x); - int i = tid % width; - int j = tid / width; - float x0 = (float)(xPos + xStep*i); - float y0 = (float)(yPos + yStep*j); - - float x = x0; - float y = y0; - - uint iter = 0; - float tmp; - for (iter = 0; (x*x + y*y <= 4.0f) && (iter < maxIter); iter++) { - tmp = x; - x = fma(-y,y,fma(x,x,x0)); - y = fma(2.0f*tmp,y,y0); - } - - out[tid] = iter; -}; - -class hipPerfDeviceConcurrency { - public: - hipPerfDeviceConcurrency(); - ~hipPerfDeviceConcurrency(); - - void setNumGpus(unsigned int num) { - numDevices = num; - } - unsigned int getNumGpus() { - return numDevices; - } - - void open(void); - void close(void); - void run(unsigned int testCase, int numGpus); - - private: - void setData(void *ptr, unsigned int value); - void checkData(uint *ptr); - - unsigned int numDevices; - unsigned int width_; - unsigned int bufSize; - unsigned int coordIdx; - unsigned long long totalIters = 0; -}; - - -hipPerfDeviceConcurrency::hipPerfDeviceConcurrency() {} - -hipPerfDeviceConcurrency::~hipPerfDeviceConcurrency() {} - -void hipPerfDeviceConcurrency::open(void) { - - - int nGpu = 0; - HIPCHECK(hipGetDeviceCount(&nGpu)); - setNumGpus(nGpu); - if (nGpu < 1) { - std::cout << "info: didn't find any GPU! skipping the test!\n"; - passed(); - } - - -} - - -void hipPerfDeviceConcurrency::close() { -} - -void hipPerfDeviceConcurrency::run(unsigned int testCase, int numGpus) { - - - static int deviceId; - uint * hPtr[numGpus]; - uint * dPtr[numGpus]; - hipStream_t streams[numGpus]; - int numCUs[numGpus]; - unsigned int maxIter[numGpus]; - unsigned long long expectedIters[numGpus]; - - int threads, threads_per_block, blocks; - float xStep, yStep, xPos, yPos; - - for(int i = 0; i < numGpus; i++) { - - if(testCase != 0) { - deviceId = i; - } - - HIPCHECK(hipSetDevice(deviceId)); - - hipDeviceProp_t props = {0}; - HIPCHECK(hipGetDeviceProperties(&props, i)); - - if (testCase != 0) { - std::cout << "info: running on bus " << "0x" << props.pciBusID << " " << props.name - << " with " << props.multiProcessorCount << " CUs" << " and device ID: " - << i << std::endl; - } - - numCUs[i] = props.multiProcessorCount; - int clkFrequency = 0; - HIPCHECK(hipDeviceGetAttribute(&clkFrequency, hipDeviceAttributeClockRate, i)); - - clkFrequency =(unsigned int)clkFrequency/1000; - - // Maximum iteration count - // maxIter = 8388608 * (engine_clock / 1000).serial execution - maxIter[i] = (unsigned int)(((8388608 * ((float)clkFrequency / 1000)) * numCUs[i]) / 128); - maxIter[i] = (maxIter[i] + 15) & ~15; - - // Width is divisible by 4 because the mandelbrot kernel processes 4 pixels at once. - width_ = 256; - - bufSize = width_ * width_ * sizeof(uint); - - // Create streams for concurrency - HIPCHECK(hipStreamCreate(&streams[i])); - - // Allocate memory on the host and device - HIPCHECK(hipHostMalloc((void **)&hPtr[i], bufSize, hipHostMallocDefault)); - setData(hPtr[i], 0xdeadbeef); - HIPCHECK(hipMalloc((uint **)&dPtr[i], bufSize)) - - // Prepare kernel launch parameters - threads = (bufSize/sizeof(uint)); - threads_per_block = 64; - blocks = (threads/threads_per_block) + (threads % threads_per_block); - - coordIdx = testCase % numCoords; - xStep = (float)(coords[coordIdx].width / (double)width_); - yStep = (float)(-coords[coordIdx].width / (double)width_); - xPos = (float)(coords[coordIdx].x - 0.5 * coords[coordIdx].width); - yPos = (float)(coords[coordIdx].y + 0.5 * coords[coordIdx].width); - - // Copy memory from host to device - HIPCHECK(hipMemcpy(dPtr[i], hPtr[i], bufSize, hipMemcpyHostToDevice)); - - } - - // Time the kernel execution - auto all_start = std::chrono::steady_clock::now(); - - for(int i = 0; i < numGpus; i++) { - - if(testCase != 0) { - deviceId = i; - } - - HIPCHECK(hipSetDevice(deviceId)); - - hipLaunchKernelGGL(mandelbrot, dim3(blocks), dim3(threads_per_block), 0, streams[i], - dPtr[i], width_, xPos, yPos, xStep, yStep, maxIter[i]); - - } - - for(int i = 0; i < numGpus; i++) { - HIPCHECK(hipStreamSynchronize(0)); - } - - - auto all_end = std::chrono::steady_clock::now(); - std::chrono::duration all_kernel_time = all_end - all_start; - - for(int i = 0; i < numGpus; i++) { - - if(testCase != 0) { - deviceId = i; - } - HIPCHECK(hipSetDevice(deviceId)); - - // Copy data back from device to the host - HIPCHECK(hipMemcpy(hPtr[i], dPtr[i], bufSize, hipMemcpyDeviceToHost)); - - checkData(hPtr[i]); - expectedIters[i] = width_ * width_ * (unsigned long long) maxIter[i]; - - if (testCase != 0) { - checkData(hPtr[i]); - if(totalIters != expectedIters[i]) { - std::cout << "Incorrect iteration count detected" << std::endl; - } - } - - - HIPCHECK(hipStreamDestroy(streams[i])); - - // Free host and device memory - HIPCHECK(hipFree(hPtr[i])); - HIPCHECK(hipFree(dPtr[i])); - } - - if (testCase != 0) { - std::cout << '\n' << "Measured time for kernel computation on " << numGpus << " device (s): " - << all_kernel_time.count() << " (s) " << '\n' << std::endl; - } - - if(testCase == 0) { - deviceId++; - } - - -} - - -void hipPerfDeviceConcurrency::setData(void *ptr, unsigned int value) { - unsigned int *ptr2 = (unsigned int *)ptr; - for (unsigned int i = 0; i < width_ * width_ ; i++) { - ptr2[i] = value; - } -} - - -void hipPerfDeviceConcurrency::checkData(uint *ptr) { - totalIters = 0; - for (unsigned int i = 0; i < width_ * width_; i++) { - totalIters += ptr[i]; - } -} - - -int main(int argc, char* argv[]) { - hipPerfDeviceConcurrency deviceConcurrency; - - deviceConcurrency.open(); - - int nGpu = deviceConcurrency.getNumGpus(); - - // testCase = 0 refers to warmup kernel run - int testCase = 0; - - for (int i = 0; i < nGpu; i++) { - // Warm-up kernel on all devices - deviceConcurrency.run(testCase, 1); - } - - // Time for kernel on 1 device - deviceConcurrency.run(++testCase, 1); - - // Time for kernel on all available devices - deviceConcurrency.run(++testCase, nGpu); - - passed(); -} +/* + Copyright (c) 2015-present Advanced Micro Devices, Inc. All rights reserved. + Permission is hereby granted, free of charge, to any person obtaining a copy + of this software and associated documentation files (the "Software"), to deal + in the Software without restriction, including without limitation the rights + to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + copies of the Software, and to permit persons to whom the Software is + furnished to do so, subject to the following conditions: + The above copyright notice and this permission notice shall be included in + all copies or substantial portions of the Software. + THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + THE SOFTWARE. + */ + +/* HIT_START + * BUILD: %t %s ../../src/test_common.cpp EXCLUDE_HIP_PLATFORM nvidia + * TEST: %t + * HIT_END + */ + +#include +#include +#include "test_common.h" + +typedef struct { + double x; + double y; + double width; +} coordRec; + +static coordRec coords[] = { + {0.0, 0.0, 0.00001}, // All black +}; + +static unsigned int numCoords = sizeof(coords) / sizeof(coordRec); + +__global__ void mandelbrot(uint *out, uint width, float xPos, float yPos, float xStep, + float yStep, uint maxIter) { + + int tid = (blockIdx.x * blockDim.x + threadIdx.x); + int i = tid % width; + int j = tid / width; + float x0 = (float)(xPos + xStep*i); + float y0 = (float)(yPos + yStep*j); + + float x = x0; + float y = y0; + + uint iter = 0; + float tmp; + for (iter = 0; (x*x + y*y <= 4.0f) && (iter < maxIter); iter++) { + tmp = x; + x = fma(-y,y,fma(x,x,x0)); + y = fma(2.0f*tmp,y,y0); + } + + out[tid] = iter; +}; + +class hipPerfDeviceConcurrency { + public: + hipPerfDeviceConcurrency(); + ~hipPerfDeviceConcurrency(); + + void setNumGpus(unsigned int num) { + numDevices = num; + } + unsigned int getNumGpus() { + return numDevices; + } + + void open(void); + void close(void); + void run(unsigned int testCase, int numGpus); + + private: + void setData(void *ptr, unsigned int value); + void checkData(uint *ptr); + + unsigned int numDevices; + unsigned int width_; + unsigned int bufSize; + unsigned int coordIdx; + unsigned long long totalIters = 0; +}; + + +hipPerfDeviceConcurrency::hipPerfDeviceConcurrency() {} + +hipPerfDeviceConcurrency::~hipPerfDeviceConcurrency() {} + +void hipPerfDeviceConcurrency::open(void) { + + + int nGpu = 0; + HIPCHECK(hipGetDeviceCount(&nGpu)); + setNumGpus(nGpu); + if (nGpu < 1) { + std::cout << "info: didn't find any GPU! skipping the test!\n"; + passed(); + } + + +} + + +void hipPerfDeviceConcurrency::close() { +} + +void hipPerfDeviceConcurrency::run(unsigned int testCase, int numGpus) { + + + static int deviceId; + uint * hPtr[numGpus]; + uint * dPtr[numGpus]; + hipStream_t streams[numGpus]; + int numCUs[numGpus]; + unsigned int maxIter[numGpus]; + unsigned long long expectedIters[numGpus]; + + int threads, threads_per_block, blocks; + float xStep, yStep, xPos, yPos; + + for(int i = 0; i < numGpus; i++) { + + if(testCase != 0) { + deviceId = i; + } + + HIPCHECK(hipSetDevice(deviceId)); + + hipDeviceProp_t props = {0}; + HIPCHECK(hipGetDeviceProperties(&props, i)); + + if (testCase != 0) { + std::cout << "info: running on bus " << "0x" << props.pciBusID << " " << props.name + << " with " << props.multiProcessorCount << " CUs" << " and device ID: " + << i << std::endl; + } + + numCUs[i] = props.multiProcessorCount; + int clkFrequency = 0; + HIPCHECK(hipDeviceGetAttribute(&clkFrequency, hipDeviceAttributeClockRate, i)); + + clkFrequency =(unsigned int)clkFrequency/1000; + + // Maximum iteration count + // maxIter = 8388608 * (engine_clock / 1000).serial execution + maxIter[i] = (unsigned int)(((8388608 * ((float)clkFrequency / 1000)) * numCUs[i]) / 128); + maxIter[i] = (maxIter[i] + 15) & ~15; + + // Width is divisible by 4 because the mandelbrot kernel processes 4 pixels at once. + width_ = 256; + + bufSize = width_ * width_ * sizeof(uint); + + // Create streams for concurrency + HIPCHECK(hipStreamCreate(&streams[i])); + + // Allocate memory on the host and device + HIPCHECK(hipHostMalloc((void **)&hPtr[i], bufSize, hipHostMallocDefault)); + setData(hPtr[i], 0xdeadbeef); + HIPCHECK(hipMalloc((uint **)&dPtr[i], bufSize)) + + // Prepare kernel launch parameters + threads = (bufSize/sizeof(uint)); + threads_per_block = 64; + blocks = (threads/threads_per_block) + (threads % threads_per_block); + + coordIdx = testCase % numCoords; + xStep = (float)(coords[coordIdx].width / (double)width_); + yStep = (float)(-coords[coordIdx].width / (double)width_); + xPos = (float)(coords[coordIdx].x - 0.5 * coords[coordIdx].width); + yPos = (float)(coords[coordIdx].y + 0.5 * coords[coordIdx].width); + + // Copy memory from host to device + HIPCHECK(hipMemcpy(dPtr[i], hPtr[i], bufSize, hipMemcpyHostToDevice)); + + } + + // Time the kernel execution + auto all_start = std::chrono::steady_clock::now(); + + for(int i = 0; i < numGpus; i++) { + + if(testCase != 0) { + deviceId = i; + } + + HIPCHECK(hipSetDevice(deviceId)); + + hipLaunchKernelGGL(mandelbrot, dim3(blocks), dim3(threads_per_block), 0, streams[i], + dPtr[i], width_, xPos, yPos, xStep, yStep, maxIter[i]); + + } + + for(int i = 0; i < numGpus; i++) { + HIPCHECK(hipStreamSynchronize(0)); + } + + + auto all_end = std::chrono::steady_clock::now(); + std::chrono::duration all_kernel_time = all_end - all_start; + + for(int i = 0; i < numGpus; i++) { + + if(testCase != 0) { + deviceId = i; + } + HIPCHECK(hipSetDevice(deviceId)); + + // Copy data back from device to the host + HIPCHECK(hipMemcpy(hPtr[i], dPtr[i], bufSize, hipMemcpyDeviceToHost)); + + checkData(hPtr[i]); + expectedIters[i] = width_ * width_ * (unsigned long long) maxIter[i]; + + if (testCase != 0) { + checkData(hPtr[i]); + if(totalIters != expectedIters[i]) { + std::cout << "Incorrect iteration count detected" << std::endl; + } + } + + + HIPCHECK(hipStreamDestroy(streams[i])); + + // Free host and device memory + HIPCHECK(hipFree(hPtr[i])); + HIPCHECK(hipFree(dPtr[i])); + } + + if (testCase != 0) { + std::cout << '\n' << "Measured time for kernel computation on " << numGpus << " device (s): " + << all_kernel_time.count() << " (s) " << '\n' << std::endl; + } + + if(testCase == 0) { + deviceId++; + } + + +} + + +void hipPerfDeviceConcurrency::setData(void *ptr, unsigned int value) { + unsigned int *ptr2 = (unsigned int *)ptr; + for (unsigned int i = 0; i < width_ * width_ ; i++) { + ptr2[i] = value; + } +} + + +void hipPerfDeviceConcurrency::checkData(uint *ptr) { + totalIters = 0; + for (unsigned int i = 0; i < width_ * width_; i++) { + totalIters += ptr[i]; + } +} + + +int main(int argc, char* argv[]) { + hipPerfDeviceConcurrency deviceConcurrency; + + deviceConcurrency.open(); + + int nGpu = deviceConcurrency.getNumGpus(); + + // testCase = 0 refers to warmup kernel run + int testCase = 0; + + for (int i = 0; i < nGpu; i++) { + // Warm-up kernel on all devices + deviceConcurrency.run(testCase, 1); + } + + // Time for kernel on 1 device + deviceConcurrency.run(++testCase, 1); + + // Time for kernel on all available devices + deviceConcurrency.run(++testCase, nGpu); + + passed(); +} diff --git a/hipamd/tests/src/ipc/MultiProcess.h b/hipamd/tests/src/ipc/MultiProcess.h index f996887ed5..40b42e5ce2 100755 --- a/hipamd/tests/src/ipc/MultiProcess.h +++ b/hipamd/tests/src/ipc/MultiProcess.h @@ -1,157 +1,157 @@ -#pragma once - -#ifdef __unix__ - -#include -#include -#include -#include -#include - -template -struct Shmem { - std::atomic handle_; - std::atomic done_counter_; -}; - -template -struct ShmemMeta { - std::string shmem_name_; - int shmem_fd_; - Shmem* shmem_; -}; - -template -class MultiProcess { -public: - MultiProcess(size_t num_proc) : num_proc_(num_proc) {} - ~MultiProcess(); - - void DebugInfo(pid_t pid); - - pid_t SpawnProcess(bool debug_bkpt); - bool CreateShmem(); - - bool WriteHandleToShmem(T ipc_handle); - bool WaitTillAllChildReads(); - - bool ReadHandleFromShmem(T& ipc_handle); - bool NotifyParentDone(); - -private: - const size_t num_proc_; - bool debug_proc_; - ShmemMeta shmem_meta_obj_; -}; - -// Template Implementations -template -MultiProcess::~MultiProcess() { - if(munmap(shmem_meta_obj_.shmem_, sizeof(Shmem)) < 0) { - std::cout<<"Error Unmapping shared memory "< -void MultiProcess::DebugInfo(pid_t pid) { - const int delay = 1; - - if (pid == 0) { - std::cout<<" Child Process with ID: "< -pid_t MultiProcess::SpawnProcess(bool debug_bkpt) { - if (num_proc_ < 0) { - std::cout<<"Num Process cannot be less than 1"< -bool MultiProcess::CreateShmem() { - if (num_proc_ < 0) { - std::cout<<"Num Process cannot be less than 1"<)) != 0) { - std::cout<<"Cannot FTruncate "<*)mmap(0, sizeof(Shmem), PROT_READ | PROT_WRITE, - MAP_SHARED, shmem_meta_obj_.shmem_fd_, 0); - memset(&shmem_meta_obj_.shmem_->handle_, 0x00, sizeof(T)); - shmem_meta_obj_.shmem_->done_counter_ = -1; - - return true; -} - -template -bool MultiProcess::WriteHandleToShmem(T ipc_handle) { - memcpy(&shmem_meta_obj_.shmem_->handle_, &ipc_handle, sizeof(T)); - shmem_meta_obj_.shmem_->done_counter_ = 0; - return true; -} - -template -bool MultiProcess::WaitTillAllChildReads() { - size_t write_count = 0; - while (shmem_meta_obj_.shmem_->done_counter_ != num_proc_) { - ++write_count; - } - return true; -} - -template -bool MultiProcess::ReadHandleFromShmem(T& ipc_handle) { - size_t read_count = 0; - while (shmem_meta_obj_.shmem_->done_counter_ == -1) { - ++read_count; - } - memcpy(&ipc_handle, &shmem_meta_obj_.shmem_->handle_, sizeof(T)); - return true; -} - -template -bool MultiProcess::NotifyParentDone() { - ++shmem_meta_obj_.shmem_->done_counter_; - return true; -} - -#endif /* __unix__ */ +#pragma once + +#ifdef __unix__ + +#include +#include +#include +#include +#include + +template +struct Shmem { + std::atomic handle_; + std::atomic done_counter_; +}; + +template +struct ShmemMeta { + std::string shmem_name_; + int shmem_fd_; + Shmem* shmem_; +}; + +template +class MultiProcess { +public: + MultiProcess(size_t num_proc) : num_proc_(num_proc) {} + ~MultiProcess(); + + void DebugInfo(pid_t pid); + + pid_t SpawnProcess(bool debug_bkpt); + bool CreateShmem(); + + bool WriteHandleToShmem(T ipc_handle); + bool WaitTillAllChildReads(); + + bool ReadHandleFromShmem(T& ipc_handle); + bool NotifyParentDone(); + +private: + const size_t num_proc_; + bool debug_proc_; + ShmemMeta shmem_meta_obj_; +}; + +// Template Implementations +template +MultiProcess::~MultiProcess() { + if(munmap(shmem_meta_obj_.shmem_, sizeof(Shmem)) < 0) { + std::cout<<"Error Unmapping shared memory "< +void MultiProcess::DebugInfo(pid_t pid) { + const int delay = 1; + + if (pid == 0) { + std::cout<<" Child Process with ID: "< +pid_t MultiProcess::SpawnProcess(bool debug_bkpt) { + if (num_proc_ < 0) { + std::cout<<"Num Process cannot be less than 1"< +bool MultiProcess::CreateShmem() { + if (num_proc_ < 0) { + std::cout<<"Num Process cannot be less than 1"<)) != 0) { + std::cout<<"Cannot FTruncate "<*)mmap(0, sizeof(Shmem), PROT_READ | PROT_WRITE, + MAP_SHARED, shmem_meta_obj_.shmem_fd_, 0); + memset(&shmem_meta_obj_.shmem_->handle_, 0x00, sizeof(T)); + shmem_meta_obj_.shmem_->done_counter_ = -1; + + return true; +} + +template +bool MultiProcess::WriteHandleToShmem(T ipc_handle) { + memcpy(&shmem_meta_obj_.shmem_->handle_, &ipc_handle, sizeof(T)); + shmem_meta_obj_.shmem_->done_counter_ = 0; + return true; +} + +template +bool MultiProcess::WaitTillAllChildReads() { + size_t write_count = 0; + while (shmem_meta_obj_.shmem_->done_counter_ != num_proc_) { + ++write_count; + } + return true; +} + +template +bool MultiProcess::ReadHandleFromShmem(T& ipc_handle) { + size_t read_count = 0; + while (shmem_meta_obj_.shmem_->done_counter_ == -1) { + ++read_count; + } + memcpy(&ipc_handle, &shmem_meta_obj_.shmem_->handle_, sizeof(T)); + return true; +} + +template +bool MultiProcess::NotifyParentDone() { + ++shmem_meta_obj_.shmem_->done_counter_; + return true; +} + +#endif /* __unix__ */ diff --git a/hipamd/tests/src/ipc/hipMultiProcIpcEvent.cpp b/hipamd/tests/src/ipc/hipMultiProcIpcEvent.cpp index ae1ce35a48..ed7db7beba 100755 --- a/hipamd/tests/src/ipc/hipMultiProcIpcEvent.cpp +++ b/hipamd/tests/src/ipc/hipMultiProcIpcEvent.cpp @@ -1,126 +1,126 @@ -/* -Copyright (c) 2015-2017 Advanced Micro Devices, Inc. All rights reserved. -Permission is hereby granted, free of charge, to any person obtaining a copy -of this software and associated documentation files (the "Software"), to deal -in the Software without restriction, including without limitation the rights -to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -copies of the Software, and to permit persons to whom the Software is -furnished to do so, subject to the following conditions: -The above copyright notice and this permission notice shall be included in -all copies or substantial portions of the Software. -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN -THE SOFTWARE. -*/ - -/* HIT_START - * BUILD: %t %s ../test_common.cpp EXCLUDE_HIP_PLATFORM nvidia - * TEST: %t - * HIT_END - */ - -#include "test_common.h" -#include "MultiProcess.h" - -void multi_process(int num_process, bool debug_process) { - -#ifdef __unix__ - - float *A_h, *B_h, *C_h; - float *A_d, *B_d, *C_d; - hipEvent_t start, stop; - size_t Nbytes = N * sizeof(float); - - MultiProcess* mProcess = new MultiProcess(num_process); - mProcess->CreateShmem(); - pid_t pid = mProcess->SpawnProcess(debug_process); - - // Parent Process - if (pid != 0) { - - unsigned blocks = (N + threadsPerBlock - 1) / threadsPerBlock; - if (blocks > 1024) blocks = 1024; - if (blocks == 0) blocks = 1; - - printf("N=%zu (A+B+C= %6.1f MB total) blocks=%u threadsPerBlock=%u iterations=%d\n", N, - ((double)3 * N * sizeof(float)) / 1024 / 1024, blocks, threadsPerBlock, iterations); - printf("iterations=%d\n", iterations); - - HipTest::initArrays(&A_d, &B_d, &C_d, &A_h, &B_h, &C_h, N); - - // NULL stream check: - HIPCHECK(hipEventCreateWithFlags(&start, hipEventDisableTiming|hipEventInterprocess)); - HIPCHECK(hipEventCreateWithFlags(&stop, hipEventDisableTiming|hipEventInterprocess)); - - - HIPCHECK(hipMemcpy(A_d, A_h, Nbytes, hipMemcpyHostToDevice)); - HIPCHECK(hipMemcpy(B_d, B_h, Nbytes, hipMemcpyHostToDevice)); - - - for (int i = 0; i < iterations; i++) { - //--- START TIMED REGION - long long hostStart = HipTest::get_time(); - // Record the start event - HIPCHECK(hipEventRecord(start, NULL)); - - hipLaunchKernelGGL(HipTest::vectorADD, dim3(blocks), dim3(threadsPerBlock), 0, 0, - static_cast(A_d), static_cast(B_d), C_d, N); - - - HIPCHECK(hipEventRecord(stop, NULL)); - HIPCHECK(hipEventSynchronize(stop)); - HIPCHECK(hipEventQuery(stop)); - long long hostStop = HipTest::get_time(); - //--- STOP TIMED REGION - - float eventMs = 1.0f; - // should fail - HIPASSERT(hipSuccess != hipEventElapsedTime(&eventMs, start, stop)); - float hostMs = HipTest::elapsed_time(hostStart, hostStop); - - printf("host_time (gettimeofday) =%6.3fms\n", hostMs); - printf("kernel_time (hipEventElapsedTime) =%6.3fms\n", eventMs); - printf("\n"); - - } - - hipIpcEventHandle_t ipc_handle; - HIPCHECK(hipIpcGetEventHandle(&ipc_handle, start)); - - mProcess->WriteHandleToShmem(ipc_handle); - mProcess->WaitTillAllChildReads(); - - } else { - hipEvent_t ipc_event; - hipIpcEventHandle_t ipc_handle; - mProcess->ReadHandleFromShmem(ipc_handle); - HIPCHECK(hipIpcOpenEventHandle(&ipc_event, ipc_handle)); - - HIPCHECK(hipEventSynchronize(ipc_event)); - HIPCHECK(hipEventDestroy(ipc_event)); - mProcess->NotifyParentDone(); - } - - if (pid != 0) { - HIPCHECK(hipMemcpy(C_h, C_d, Nbytes, hipMemcpyDeviceToHost)); - printf("check:\n"); - HipTest::checkVectorADD(A_h, B_h, C_h, N, true); - - HIPCHECK(hipEventDestroy(start)); - HIPCHECK(hipEventDestroy(stop)); - delete mProcess; - } - -#endif /* __unix__ */ - -} - -int main(int argc, char* argv[]) { - HipTest::parseStandardArguments(argc, argv, true); - multi_process((N < 64) ? N : 64, debug_test); - passed(); -} +/* +Copyright (c) 2015-2017 Advanced Micro Devices, Inc. All rights reserved. +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +*/ + +/* HIT_START + * BUILD: %t %s ../test_common.cpp EXCLUDE_HIP_PLATFORM nvidia + * TEST: %t + * HIT_END + */ + +#include "test_common.h" +#include "MultiProcess.h" + +void multi_process(int num_process, bool debug_process) { + +#ifdef __unix__ + + float *A_h, *B_h, *C_h; + float *A_d, *B_d, *C_d; + hipEvent_t start, stop; + size_t Nbytes = N * sizeof(float); + + MultiProcess* mProcess = new MultiProcess(num_process); + mProcess->CreateShmem(); + pid_t pid = mProcess->SpawnProcess(debug_process); + + // Parent Process + if (pid != 0) { + + unsigned blocks = (N + threadsPerBlock - 1) / threadsPerBlock; + if (blocks > 1024) blocks = 1024; + if (blocks == 0) blocks = 1; + + printf("N=%zu (A+B+C= %6.1f MB total) blocks=%u threadsPerBlock=%u iterations=%d\n", N, + ((double)3 * N * sizeof(float)) / 1024 / 1024, blocks, threadsPerBlock, iterations); + printf("iterations=%d\n", iterations); + + HipTest::initArrays(&A_d, &B_d, &C_d, &A_h, &B_h, &C_h, N); + + // NULL stream check: + HIPCHECK(hipEventCreateWithFlags(&start, hipEventDisableTiming|hipEventInterprocess)); + HIPCHECK(hipEventCreateWithFlags(&stop, hipEventDisableTiming|hipEventInterprocess)); + + + HIPCHECK(hipMemcpy(A_d, A_h, Nbytes, hipMemcpyHostToDevice)); + HIPCHECK(hipMemcpy(B_d, B_h, Nbytes, hipMemcpyHostToDevice)); + + + for (int i = 0; i < iterations; i++) { + //--- START TIMED REGION + long long hostStart = HipTest::get_time(); + // Record the start event + HIPCHECK(hipEventRecord(start, NULL)); + + hipLaunchKernelGGL(HipTest::vectorADD, dim3(blocks), dim3(threadsPerBlock), 0, 0, + static_cast(A_d), static_cast(B_d), C_d, N); + + + HIPCHECK(hipEventRecord(stop, NULL)); + HIPCHECK(hipEventSynchronize(stop)); + HIPCHECK(hipEventQuery(stop)); + long long hostStop = HipTest::get_time(); + //--- STOP TIMED REGION + + float eventMs = 1.0f; + // should fail + HIPASSERT(hipSuccess != hipEventElapsedTime(&eventMs, start, stop)); + float hostMs = HipTest::elapsed_time(hostStart, hostStop); + + printf("host_time (gettimeofday) =%6.3fms\n", hostMs); + printf("kernel_time (hipEventElapsedTime) =%6.3fms\n", eventMs); + printf("\n"); + + } + + hipIpcEventHandle_t ipc_handle; + HIPCHECK(hipIpcGetEventHandle(&ipc_handle, start)); + + mProcess->WriteHandleToShmem(ipc_handle); + mProcess->WaitTillAllChildReads(); + + } else { + hipEvent_t ipc_event; + hipIpcEventHandle_t ipc_handle; + mProcess->ReadHandleFromShmem(ipc_handle); + HIPCHECK(hipIpcOpenEventHandle(&ipc_event, ipc_handle)); + + HIPCHECK(hipEventSynchronize(ipc_event)); + HIPCHECK(hipEventDestroy(ipc_event)); + mProcess->NotifyParentDone(); + } + + if (pid != 0) { + HIPCHECK(hipMemcpy(C_h, C_d, Nbytes, hipMemcpyDeviceToHost)); + printf("check:\n"); + HipTest::checkVectorADD(A_h, B_h, C_h, N, true); + + HIPCHECK(hipEventDestroy(start)); + HIPCHECK(hipEventDestroy(stop)); + delete mProcess; + } + +#endif /* __unix__ */ + +} + +int main(int argc, char* argv[]) { + HipTest::parseStandardArguments(argc, argv, true); + multi_process((N < 64) ? N : 64, debug_test); + passed(); +} diff --git a/hipamd/tests/src/ipc/hipMultiProcIpcMem.cpp b/hipamd/tests/src/ipc/hipMultiProcIpcMem.cpp index 9301697aa0..f44cec6296 100755 --- a/hipamd/tests/src/ipc/hipMultiProcIpcMem.cpp +++ b/hipamd/tests/src/ipc/hipMultiProcIpcMem.cpp @@ -1,103 +1,103 @@ -/* -Copyright (c) 2015-2017 Advanced Micro Devices, Inc. All rights reserved. -Permission is hereby granted, free of charge, to any person obtaining a copy -of this software and associated documentation files (the "Software"), to deal -in the Software without restriction, including without limitation the rights -to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -copies of the Software, and to permit persons to whom the Software is -furnished to do so, subject to the following conditions: -The above copyright notice and this permission notice shall be included in -all copies or substantial portions of the Software. -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN -THE SOFTWARE. -*/ - -/* HIT_START - * BUILD: %t %s ../test_common.cpp - * TEST: %t - * HIT_END - */ - -#include "test_common.h" -#include "MultiProcess.h" - -#define NUM_ELEMS 1024 -#define OFFSET 128 - -void multi_process(int num_process, bool debug_process) { - -#ifdef __unix__ - - int* ipc_dptr = nullptr; - int* ipc_hptr = nullptr; - int* ipc_out_dptr = nullptr; - int* ipc_out_hptr = nullptr; - int* ipc_offset_dptr = nullptr; - - MultiProcess* mProcess = new MultiProcess(num_process); - mProcess->CreateShmem(); - pid_t pid = mProcess->SpawnProcess(debug_process); - - // Parent Process - if (pid != 0) { - hipIpcMemHandle_t ipc_handle; - memset(&ipc_handle, 0x00, sizeof(hipIpcMemHandle_t)); - - HIPCHECK(hipMalloc((void**)&ipc_dptr, NUM_ELEMS * sizeof(int))); - // Add offset to the dev_ptr - ipc_offset_dptr = ipc_dptr + OFFSET; - // Get handle for the offsetted device_ptr - HIPCHECK(hipIpcGetMemHandle(&ipc_handle, ipc_offset_dptr)); - - ipc_hptr = new int[NUM_ELEMS]; - for (size_t idx = 0; idx < NUM_ELEMS; ++idx) { - ipc_hptr[idx] = idx; - } - - HIPCHECK(hipMemset(ipc_dptr, 0x00, (NUM_ELEMS * sizeof(int)))); - HIPCHECK(hipMemcpy(ipc_dptr, ipc_hptr, (NUM_ELEMS * sizeof(int)), hipMemcpyHostToDevice)); - - mProcess->WriteHandleToShmem(ipc_handle); - - mProcess->WaitTillAllChildReads(); - - } else { - ipc_out_hptr = new int[NUM_ELEMS]; - memset(ipc_out_hptr, 0x00, (NUM_ELEMS * sizeof(int))); - - hipIpcMemHandle_t ipc_handle; - mProcess->ReadHandleFromShmem(ipc_handle); - // Open handle to get dev_ptr - HIPCHECK(hipIpcOpenMemHandle((void**)&ipc_out_dptr, ipc_handle, hipIpcMemLazyEnablePeerAccess)); - - HIPCHECK(hipMemcpy(ipc_out_hptr, ipc_out_dptr, (NUM_ELEMS * sizeof(int)), - hipMemcpyDeviceToHost)); - for (size_t idx = 0; idx < NUM_ELEMS; ++idx) { - if (ipc_out_hptr[idx] != idx) { - std::cout<<"Failing @ idx: "<< idx << std::endl; - } - } - mProcess->NotifyParentDone(); - HIPCHECK(hipIpcCloseMemHandle(ipc_out_dptr)); - delete[] ipc_out_hptr; - } - - if (pid != 0) { - delete mProcess; - } - -#endif /* __unix__ */ - -} - - -int main(int argc, char* argv[]) { - HipTest::parseStandardArguments(argc, argv, true); - multi_process((N < 64) ? N : 64, debug_test); - passed(); -} +/* +Copyright (c) 2015-2017 Advanced Micro Devices, Inc. All rights reserved. +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +*/ + +/* HIT_START + * BUILD: %t %s ../test_common.cpp + * TEST: %t + * HIT_END + */ + +#include "test_common.h" +#include "MultiProcess.h" + +#define NUM_ELEMS 1024 +#define OFFSET 128 + +void multi_process(int num_process, bool debug_process) { + +#ifdef __unix__ + + int* ipc_dptr = nullptr; + int* ipc_hptr = nullptr; + int* ipc_out_dptr = nullptr; + int* ipc_out_hptr = nullptr; + int* ipc_offset_dptr = nullptr; + + MultiProcess* mProcess = new MultiProcess(num_process); + mProcess->CreateShmem(); + pid_t pid = mProcess->SpawnProcess(debug_process); + + // Parent Process + if (pid != 0) { + hipIpcMemHandle_t ipc_handle; + memset(&ipc_handle, 0x00, sizeof(hipIpcMemHandle_t)); + + HIPCHECK(hipMalloc((void**)&ipc_dptr, NUM_ELEMS * sizeof(int))); + // Add offset to the dev_ptr + ipc_offset_dptr = ipc_dptr + OFFSET; + // Get handle for the offsetted device_ptr + HIPCHECK(hipIpcGetMemHandle(&ipc_handle, ipc_offset_dptr)); + + ipc_hptr = new int[NUM_ELEMS]; + for (size_t idx = 0; idx < NUM_ELEMS; ++idx) { + ipc_hptr[idx] = idx; + } + + HIPCHECK(hipMemset(ipc_dptr, 0x00, (NUM_ELEMS * sizeof(int)))); + HIPCHECK(hipMemcpy(ipc_dptr, ipc_hptr, (NUM_ELEMS * sizeof(int)), hipMemcpyHostToDevice)); + + mProcess->WriteHandleToShmem(ipc_handle); + + mProcess->WaitTillAllChildReads(); + + } else { + ipc_out_hptr = new int[NUM_ELEMS]; + memset(ipc_out_hptr, 0x00, (NUM_ELEMS * sizeof(int))); + + hipIpcMemHandle_t ipc_handle; + mProcess->ReadHandleFromShmem(ipc_handle); + // Open handle to get dev_ptr + HIPCHECK(hipIpcOpenMemHandle((void**)&ipc_out_dptr, ipc_handle, hipIpcMemLazyEnablePeerAccess)); + + HIPCHECK(hipMemcpy(ipc_out_hptr, ipc_out_dptr, (NUM_ELEMS * sizeof(int)), + hipMemcpyDeviceToHost)); + for (size_t idx = 0; idx < NUM_ELEMS; ++idx) { + if (ipc_out_hptr[idx] != idx) { + std::cout<<"Failing @ idx: "<< idx << std::endl; + } + } + mProcess->NotifyParentDone(); + HIPCHECK(hipIpcCloseMemHandle(ipc_out_dptr)); + delete[] ipc_out_hptr; + } + + if (pid != 0) { + delete mProcess; + } + +#endif /* __unix__ */ + +} + + +int main(int argc, char* argv[]) { + HipTest::parseStandardArguments(argc, argv, true); + multi_process((N < 64) ? N : 64, debug_test); + passed(); +} diff --git a/hipamd/tests/src/runtimeApi/device/hipChooseDevice.cpp b/hipamd/tests/src/runtimeApi/device/hipChooseDevice.cpp index 3868f9dc9e..351d7e98a7 100644 --- a/hipamd/tests/src/runtimeApi/device/hipChooseDevice.cpp +++ b/hipamd/tests/src/runtimeApi/device/hipChooseDevice.cpp @@ -1,49 +1,49 @@ -/* -Copyright (c) 2015-2017 Advanced Micro Devices, Inc. All rights reserved. - -Permission is hereby granted, free of charge, to any person obtaining a copy -of this software and associated documentation files (the "Software"), to deal -in the Software without restriction, including without limitation the rights -to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -copies of the Software, and to permit persons to whom the Software is -furnished to do so, subject to the following conditions: - -The above copyright notice and this permission notice shall be included in -all copies or substantial portions of the Software. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN -THE SOFTWARE. -*/ - -/* HIT_START - * BUILD: %t %s ../../test_common.cpp - * TEST: %t - * HIT_END - */ - -#include -#include "hip/hip_runtime.h" -#include "test_common.h" - -int main(void) { - hipDeviceProp_t prop; - int dev; - - hipGetDevice(&dev); - printf("ID of current HIP device: %d\n", dev); - - memset(&prop, 0, sizeof(hipDeviceProp_t)); - prop.major = 1; - prop.minor = 3; - hipChooseDevice(&dev, &prop); - printf("ID of hip device closest to revision 1.3: %d\n", dev); - - hipSetDevice(dev); - - passed(); -} +/* +Copyright (c) 2015-2017 Advanced Micro Devices, Inc. All rights reserved. + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +*/ + +/* HIT_START + * BUILD: %t %s ../../test_common.cpp + * TEST: %t + * HIT_END + */ + +#include +#include "hip/hip_runtime.h" +#include "test_common.h" + +int main(void) { + hipDeviceProp_t prop; + int dev; + + hipGetDevice(&dev); + printf("ID of current HIP device: %d\n", dev); + + memset(&prop, 0, sizeof(hipDeviceProp_t)); + prop.major = 1; + prop.minor = 3; + hipChooseDevice(&dev, &prop); + printf("ID of hip device closest to revision 1.3: %d\n", dev); + + hipSetDevice(dev); + + passed(); +} diff --git a/hipamd/tests/src/texture/hipTextureMipmapObj2D.cpp b/hipamd/tests/src/texture/hipTextureMipmapObj2D.cpp index 13428ca776..a4980806f1 100644 --- a/hipamd/tests/src/texture/hipTextureMipmapObj2D.cpp +++ b/hipamd/tests/src/texture/hipTextureMipmapObj2D.cpp @@ -1,161 +1,161 @@ -/* HIT_START - * BUILD: %t %s ../test_common.cpp - * TEST: %t - * HIT_END - */ -#include -#include -#include -#include - -#include -#include "test_common.h" - -// Height Width Vector -std::vector hw_vector = {2048, 1024, 512, 256, 64}; -std::vector mip_vector = {8, 4, 2, 1}; - -__global__ void tex2DKernel(float* outputData, hipTextureObject_t textureObject, int width, - int height, float level) { - int x = blockIdx.x * blockDim.x + threadIdx.x; - int y = blockIdx.y * blockDim.y + threadIdx.y; - outputData[y * width + x] = tex2DLod(textureObject, x, y, level); -} - -bool runMipMapTest(unsigned int width, unsigned int height, unsigned int mipmap_level) { - bool testResult = true; - - printf("Width: %u Height: %u mip: %u \n", width, height, mipmap_level); - - // Create new width & height to be tested - unsigned int orig_width = width; - unsigned int orig_height = height; - width /= pow(2, mipmap_level); - height /= pow(2, mipmap_level); - unsigned int size = width * height * sizeof(float); - - - float* hData = (float*)malloc(size); - memset(hData, 0, size); - for (int i = 0; i < height; i++) { - for (int j = 0; j < width; j++) { - hData[i * width + j] = i * width + j; - } - } - printf("hData: "); - for (int i = 0; i < 64; i++) { - printf("%f ", hData[i]); - if (i % width == 0) { - printf("\n"); - } - } - printf("\n"); - - hipChannelFormatDesc channelDesc = hipCreateChannelDesc(32, 0, 0, 0, hipChannelFormatKindFloat); - HIP_ARRAY3D_DESCRIPTOR mipmapped_array_desc; - memset(&mipmapped_array_desc, 0x00, sizeof(HIP_ARRAY3D_DESCRIPTOR)); - mipmapped_array_desc.Width = orig_width; - mipmapped_array_desc.Height = orig_height; - mipmapped_array_desc.Depth = 0; - mipmapped_array_desc.Format = HIP_AD_FORMAT_FLOAT; - mipmapped_array_desc.NumChannels = ((channelDesc.x != 0) + (channelDesc.y != 0) - + (channelDesc.z != 0) + (channelDesc.w != 0)); - mipmapped_array_desc.Flags = 0; - - - hipMipmappedArray* mip_array_ptr; - hipMipmappedArrayCreate(&mip_array_ptr, &mipmapped_array_desc, 2 * mipmap_level); - - hipArray *hipArray = nullptr; - HIPCHECK(hipMipmappedArrayGetLevel(&hipArray, mip_array_ptr, mipmap_level)); - HIPCHECK(hipMemcpyToArray(hipArray, 0, 0, hData, size, hipMemcpyHostToDevice)); - - hipResourceDesc resDesc; - memset(&resDesc, 0, sizeof(resDesc)); - resDesc.resType = hipResourceTypeArray; - resDesc.res.array.array = hipArray; - - // Specify texture object parameters - hipTextureDesc texDesc; - memset(&texDesc, 0, sizeof(texDesc)); - texDesc.addressMode[0] = hipAddressModeWrap; - texDesc.addressMode[1] = hipAddressModeWrap; - texDesc.filterMode = hipFilterModePoint; - texDesc.readMode = hipReadModeElementType; - texDesc.normalizedCoords = 0; - - // Create texture object - hipTextureObject_t textureObject = 0; - hipCreateTextureObject(&textureObject, &resDesc, &texDesc, NULL); - - float* dData = NULL; - hipMalloc((void**)&dData, size); - - dim3 dimBlock(16, 16, 1); - dim3 dimGrid(width / dimBlock.x, height / dimBlock.y, 1); - - hipLaunchKernelGGL(tex2DKernel, dim3(dimGrid), dim3(dimBlock), 0, 0, dData, textureObject, - width, height, (2 * mipmap_level)); - - hipDeviceSynchronize(); - - float* hOutputData = (float*)malloc(size); - memset(hOutputData, 0, size); - hipMemcpy(hOutputData, dData, size, hipMemcpyDeviceToHost); - - printf("dData: "); - for (int i = 0; i < 64; i++) { - printf("%f ", hOutputData[i]); - if (i % width == 0) { - printf("\n"); - } - } - printf("\n"); - for (int i = 0; i < height; i++) { - for (int j = 0; j < width; j++) { - if (hData[i * width + j] != hOutputData[i * width + j]) { - printf("Difference [ %d %d ]:%f ----%f\n", i, j, hData[i * width + j], - hOutputData[i * width + j]); - testResult = false; - break; - } - } - } - hipDestroyTextureObject(textureObject); - hipFree(dData); - hipFreeArray(hipArray); - return testResult; -} - - -bool runTest(int argc, char** argv) { - bool testResult = true; - - for (auto& hw: hw_vector) { - for (auto& mip: mip_vector) { - if ((hw / static_cast(pow (2,(mip * 2)))) > 0) { - testResult |= runMipMapTest(hw, hw, mip); - } - } - } - - printf("\n"); - return testResult; -} - -int main(int argc, char** argv) { - bool testResult = true; - -#ifdef _WIN32 - testResult = runTest(argc, argv); -#else - std::cout<<"Mipmaps are Supported only on windows, skipping the test"< +#include +#include +#include + +#include +#include "test_common.h" + +// Height Width Vector +std::vector hw_vector = {2048, 1024, 512, 256, 64}; +std::vector mip_vector = {8, 4, 2, 1}; + +__global__ void tex2DKernel(float* outputData, hipTextureObject_t textureObject, int width, + int height, float level) { + int x = blockIdx.x * blockDim.x + threadIdx.x; + int y = blockIdx.y * blockDim.y + threadIdx.y; + outputData[y * width + x] = tex2DLod(textureObject, x, y, level); +} + +bool runMipMapTest(unsigned int width, unsigned int height, unsigned int mipmap_level) { + bool testResult = true; + + printf("Width: %u Height: %u mip: %u \n", width, height, mipmap_level); + + // Create new width & height to be tested + unsigned int orig_width = width; + unsigned int orig_height = height; + width /= pow(2, mipmap_level); + height /= pow(2, mipmap_level); + unsigned int size = width * height * sizeof(float); + + + float* hData = (float*)malloc(size); + memset(hData, 0, size); + for (int i = 0; i < height; i++) { + for (int j = 0; j < width; j++) { + hData[i * width + j] = i * width + j; + } + } + printf("hData: "); + for (int i = 0; i < 64; i++) { + printf("%f ", hData[i]); + if (i % width == 0) { + printf("\n"); + } + } + printf("\n"); + + hipChannelFormatDesc channelDesc = hipCreateChannelDesc(32, 0, 0, 0, hipChannelFormatKindFloat); + HIP_ARRAY3D_DESCRIPTOR mipmapped_array_desc; + memset(&mipmapped_array_desc, 0x00, sizeof(HIP_ARRAY3D_DESCRIPTOR)); + mipmapped_array_desc.Width = orig_width; + mipmapped_array_desc.Height = orig_height; + mipmapped_array_desc.Depth = 0; + mipmapped_array_desc.Format = HIP_AD_FORMAT_FLOAT; + mipmapped_array_desc.NumChannels = ((channelDesc.x != 0) + (channelDesc.y != 0) + + (channelDesc.z != 0) + (channelDesc.w != 0)); + mipmapped_array_desc.Flags = 0; + + + hipMipmappedArray* mip_array_ptr; + hipMipmappedArrayCreate(&mip_array_ptr, &mipmapped_array_desc, 2 * mipmap_level); + + hipArray *hipArray = nullptr; + HIPCHECK(hipMipmappedArrayGetLevel(&hipArray, mip_array_ptr, mipmap_level)); + HIPCHECK(hipMemcpyToArray(hipArray, 0, 0, hData, size, hipMemcpyHostToDevice)); + + hipResourceDesc resDesc; + memset(&resDesc, 0, sizeof(resDesc)); + resDesc.resType = hipResourceTypeArray; + resDesc.res.array.array = hipArray; + + // Specify texture object parameters + hipTextureDesc texDesc; + memset(&texDesc, 0, sizeof(texDesc)); + texDesc.addressMode[0] = hipAddressModeWrap; + texDesc.addressMode[1] = hipAddressModeWrap; + texDesc.filterMode = hipFilterModePoint; + texDesc.readMode = hipReadModeElementType; + texDesc.normalizedCoords = 0; + + // Create texture object + hipTextureObject_t textureObject = 0; + hipCreateTextureObject(&textureObject, &resDesc, &texDesc, NULL); + + float* dData = NULL; + hipMalloc((void**)&dData, size); + + dim3 dimBlock(16, 16, 1); + dim3 dimGrid(width / dimBlock.x, height / dimBlock.y, 1); + + hipLaunchKernelGGL(tex2DKernel, dim3(dimGrid), dim3(dimBlock), 0, 0, dData, textureObject, + width, height, (2 * mipmap_level)); + + hipDeviceSynchronize(); + + float* hOutputData = (float*)malloc(size); + memset(hOutputData, 0, size); + hipMemcpy(hOutputData, dData, size, hipMemcpyDeviceToHost); + + printf("dData: "); + for (int i = 0; i < 64; i++) { + printf("%f ", hOutputData[i]); + if (i % width == 0) { + printf("\n"); + } + } + printf("\n"); + for (int i = 0; i < height; i++) { + for (int j = 0; j < width; j++) { + if (hData[i * width + j] != hOutputData[i * width + j]) { + printf("Difference [ %d %d ]:%f ----%f\n", i, j, hData[i * width + j], + hOutputData[i * width + j]); + testResult = false; + break; + } + } + } + hipDestroyTextureObject(textureObject); + hipFree(dData); + hipFreeArray(hipArray); + return testResult; +} + + +bool runTest(int argc, char** argv) { + bool testResult = true; + + for (auto& hw: hw_vector) { + for (auto& mip: mip_vector) { + if ((hw / static_cast(pow (2,(mip * 2)))) > 0) { + testResult |= runMipMapTest(hw, hw, mip); + } + } + } + + printf("\n"); + return testResult; +} + +int main(int argc, char** argv) { + bool testResult = true; + +#ifdef _WIN32 + testResult = runTest(argc, argv); +#else + std::cout<<"Mipmaps are Supported only on windows, skipping the test"<