From 4fbb6d8fa1d88f7abb47ea0aaa891cfd23f3d1aa Mon Sep 17 00:00:00 2001 From: Tao Sang Date: Mon, 4 May 2020 18:06:34 -0400 Subject: [PATCH] Support performance tests Support performance tests while direct tests commands keep unchanged. To build performance tests, run "make build_perf". To run all performance testis, run "make perf". To run specific tests, for example, run /usr/bin/ctest -C performance -R performance_tests/perfDispatch --verbose To run individual test, for example, run performance_tests/memory/hipPerfMemMallocCpyFree Change-Id: I168c1b9ef1ec21b392d48648d0c71e8fbd37d57b [ROCm/hip-tests commit: ec700116bcfe35cb69b3d754316bf669c240fe43] --- .../memory/hipPerfMemMallocCpyFree.cpp | 114 +++++++ .../hipPerfBufferCopyRectSpeed.cpp | 281 +++++++++++++++++ .../perfDispatch/hipPerfBufferCopySpeed.cpp | 287 ++++++++++++++++++ .../perfDispatch/hipPerfDispatchSpeed.cpp | 210 +++++++++++++ .../perftests/perfDispatch/timer.cpp | 116 +++++++ .../hip-tests/perftests/perfDispatch/timer.h | 28 ++ 6 files changed, 1036 insertions(+) create mode 100644 projects/hip-tests/perftests/memory/hipPerfMemMallocCpyFree.cpp create mode 100644 projects/hip-tests/perftests/perfDispatch/hipPerfBufferCopyRectSpeed.cpp create mode 100644 projects/hip-tests/perftests/perfDispatch/hipPerfBufferCopySpeed.cpp create mode 100644 projects/hip-tests/perftests/perfDispatch/hipPerfDispatchSpeed.cpp create mode 100644 projects/hip-tests/perftests/perfDispatch/timer.cpp create mode 100644 projects/hip-tests/perftests/perfDispatch/timer.h diff --git a/projects/hip-tests/perftests/memory/hipPerfMemMallocCpyFree.cpp b/projects/hip-tests/perftests/memory/hipPerfMemMallocCpyFree.cpp new file mode 100644 index 0000000000..d58fdb381e --- /dev/null +++ b/projects/hip-tests/perftests/memory/hipPerfMemMallocCpyFree.cpp @@ -0,0 +1,114 @@ +/* +Copyright (c) 2015-2016 Advanced Micro Devices, Inc. All rights reserved. +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +*/ + +#include "test_common.h" +#include +#include + +/* HIT_START + * BUILD: %t %s ../../src/test_common.cpp EXCLUDE_HIP_PLATFORM nvcc + * TEST: %t + * HIT_END + */ + +#define NUM_SIZE 19 //size up to 16M +#define NUM_ITER 500 //Total GPU memory up to 16M*500=8G + +void valSet(int* A, int val, size_t size) { + size_t len = size / sizeof(int); + for (int i = 0; i < len; i++) { + A[i] = val; + } +} + +void setup(size_t *size, const int num, int **pA) { + std::cout << "size: "; + for (int i = 0; i < num; i++) { + size[i] = 1 << (i + 6); + std::cout << size[i] << " "; + } + std::cout << std::endl; + *pA = (int*)malloc(size[num - 1]); + valSet(*pA, 1, size[num - 1]); +} + +void testInit(size_t size, int *A) { + int *Ad; + clock_t start = clock(); + hipMalloc(&Ad, size); //hip::init() will be called + clock_t end = clock(); + double uS = (end - start) * 1000000. / CLOCKS_PER_SEC; + std::cout << "Initial" << std::endl; + std::cout << "hipMalloc(" << size << ") cost " << uS << "us" << std::endl; + + start = clock(); + hipMemcpy(Ad, A, size, hipMemcpyHostToDevice); + hipDeviceSynchronize(); + end = clock(); + uS = (end - start) * 1000000. / CLOCKS_PER_SEC; + std::cout << "hipMemcpy(" << size << ") cost " << uS << "us" << std::endl; + + start = clock(); + hipFree(Ad); + end = clock(); + uS = (end - start) * 1000000. / CLOCKS_PER_SEC; + std::cout << "hipFree(" << size << ") cost " << uS << "us" << std::endl; +} + +int main() { + double uS; + clock_t start, end; + size_t size[NUM_SIZE] = { 0 }; + int *Ad[NUM_ITER] = { nullptr }; + int *A; + + setup(size, NUM_SIZE, &A); + testInit(size[0], A); + + for (int i = 0; i < NUM_SIZE; i++) { + std::cout << size[i] << std::endl; + start = clock(); + for (int j = 0; j < NUM_ITER; j++) { + HIPCHECK(hipMalloc(&Ad[j], size[i])); + } + end = clock(); + uS = (end - start) * 1000000. / (NUM_ITER * CLOCKS_PER_SEC); + std::cout << "hipMalloc(" << size[i] << ") cost " << uS << "us" << std::endl; + + start = clock(); + for (int j = 0; j < NUM_ITER; j++) { + HIPCHECK(hipMemcpy(Ad[j], A, size[i], hipMemcpyHostToDevice)); + } + hipDeviceSynchronize(); + end = clock(); + uS = (end - start) * 1000000. / (NUM_ITER * CLOCKS_PER_SEC); + std::cout << "hipMemcpy(" << size[i] << ") cost " << uS << "us" << std::endl; + + start = clock(); + for (int j = 0; j < NUM_ITER; j++) { + HIPCHECK(hipFree(Ad[j])); + Ad[j] = nullptr; + } + end = clock(); + double uS = (end - start) * 1000000. / (NUM_ITER * CLOCKS_PER_SEC); + std::cout << "hipFree(" << size[i] << ") cost " << uS << "us" << std::endl; + } + free(A); + passed(); +} diff --git a/projects/hip-tests/perftests/perfDispatch/hipPerfBufferCopyRectSpeed.cpp b/projects/hip-tests/perftests/perfDispatch/hipPerfBufferCopyRectSpeed.cpp new file mode 100644 index 0000000000..5000904af9 --- /dev/null +++ b/projects/hip-tests/perftests/perfDispatch/hipPerfBufferCopyRectSpeed.cpp @@ -0,0 +1,281 @@ +#include +#include +#include +#include + +#include "timer.h" +#include "test_common.h" + +/* HIT_START + * BUILD: %t %s ../../src/test_common.cpp timer.cpp EXCLUDE_HIP_PLATFORM nvcc + * TEST: %t + * HIT_END + */ + +// Quiet pesky warnings +#ifdef WIN_OS +#define SNPRINTF sprintf_s +#else +#define SNPRINTF snprintf +#endif + +#define NUM_SIZES 8 +//4KB, 8KB, 64KB, 256KB, 1 MB, 4MB, 16 MB, 16MB+10 +static const unsigned int Sizes[NUM_SIZES] = {4096, 8192, 65536, 262144, 1048576, 4194304, 16777216, 16777216+10}; + +static const unsigned int Iterations[2] = {1, 1000}; + +#define BUF_TYPES 4 +// 16 ways to combine 4 different buffer types +#define NUM_SUBTESTS (BUF_TYPES*BUF_TYPES) + +#define CHECK_RESULT(test, msg) \ + if ((test)) \ + { \ + printf("\n%s\n", msg); \ + abort(); \ + } + +void setData(void *ptr, unsigned int size, char value) +{ + char *ptr2 = (char *)ptr; + for (unsigned int i = 0; i < size ; i++) + { + ptr2[i] = value; + } +} + +void checkData(void *ptr, unsigned int size, char value) +{ + char *ptr2 = (char *)ptr; + for (unsigned int i = 0; i < size; i++) + { + if (ptr2[i] != value) + { + printf("Data validation failed at %d! Got 0x%08x\n", i, ptr2[i]); + printf("Expected 0x%08x\n", value); + CHECK_RESULT(true, "Data validation failed!"); + break; + } + } +} + + +int main(int argc, char* argv[]) { + HipTest::parseStandardArguments(argc, argv, true); + + hipError_t err = hipSuccess; + hipDeviceProp_t props = {0}; + hipGetDeviceProperties(&props, p_gpuDevice); + CHECK_RESULT(err != hipSuccess, "hipGetDeviceProperties failed" ); + printf("Set device to %d : %s\n", p_gpuDevice, props.name); + printf("Legend: unp - unpinned(malloc), hM - hipMalloc(device)\n"); + printf(" hHR - hipHostRegister(pinned), hHM - hipHostMalloc(prePinned)\n"); + err = hipSetDevice(p_gpuDevice); + CHECK_RESULT(err != hipSuccess, "hipSetDevice failed" ); + + unsigned int bufSize_; + bool hostMalloc[2] = {false}; + bool hostRegister[2] = {false}; + bool unpinnedMalloc[2] = {false}; + unsigned int numIter; + void *memptr[2] = {NULL}; + void *alignedmemptr[2] = {NULL}; + void* srcBuffer = NULL; + void* dstBuffer = NULL; + + int numTests = (p_tests == -1) ? (NUM_SIZES*NUM_SUBTESTS*2 - 1) : p_tests; + int test = (p_tests == -1) ? 0 : p_tests; + + for(;test <= numTests; test++) + { + unsigned int srcTest = (test / NUM_SIZES) % BUF_TYPES; + unsigned int dstTest = (test / (NUM_SIZES*BUF_TYPES)) % BUF_TYPES; + bufSize_ = Sizes[test % NUM_SIZES]; + hostMalloc[0] = hostMalloc[1] = false; + hostRegister[0] = hostRegister[1] = false; + unpinnedMalloc[0] = unpinnedMalloc[1] = false; + srcBuffer = dstBuffer = 0; + memptr[0] = memptr[1] = NULL; + alignedmemptr[0] = alignedmemptr[1] = NULL; + + size_t width = static_cast(sqrt(static_cast(bufSize_))); + + if (srcTest == 3) + { + hostRegister[0] = true; + } + else if (srcTest == 2) + { + hostMalloc[0] = true; + } + else if (srcTest == 1) + { + unpinnedMalloc[0] = true; + } + + if (dstTest == 1) + { + unpinnedMalloc[1] = true; + } + else if (dstTest == 2) + { + hostMalloc[1] = true; + } + else if (dstTest == 3) + { + hostRegister[1] = true; + } + + numIter = Iterations[test / (NUM_SIZES * NUM_SUBTESTS)]; + + if (hostMalloc[0]) + { + err = hipHostMalloc((void**)&srcBuffer, bufSize_, 0); + setData(srcBuffer, bufSize_, 0xd0); + CHECK_RESULT(err != hipSuccess, "hipHostMalloc failed"); + } + else if (hostRegister[0]) + { + memptr[0] = malloc(bufSize_ + 4096); + alignedmemptr[0] = (void*)(((size_t)memptr[0] + 4095) & ~4095); + srcBuffer = alignedmemptr[0]; + setData(srcBuffer, bufSize_, 0xd0); + err = hipHostRegister(srcBuffer, bufSize_, 0); + CHECK_RESULT(err != hipSuccess, "hipHostRegister failed"); + } + else if (unpinnedMalloc[0]) + { + memptr[0] = malloc(bufSize_ + 4096); + alignedmemptr[0] = (void*)(((size_t)memptr[0] + 4095) & ~4095); + srcBuffer = alignedmemptr[0]; + setData(srcBuffer, bufSize_, 0xd0); + } + else + { + err = hipMalloc(&srcBuffer, bufSize_); + CHECK_RESULT(err != hipSuccess, "hipMalloc failed"); + err = hipMemset(srcBuffer, 0xd0, bufSize_); + CHECK_RESULT(err != hipSuccess, "hipMemset failed"); + } + + if (hostMalloc[1]) + { + err = hipHostMalloc((void**)&dstBuffer, bufSize_, 0); + CHECK_RESULT(err != hipSuccess, "hipHostMalloc failed"); + } + else if (hostRegister[1]) + { + memptr[1] = malloc(bufSize_ + 4096); + alignedmemptr[1] = (void*)(((size_t)memptr[1] + 4095) & ~4095); + dstBuffer = alignedmemptr[1]; + err = hipHostRegister(dstBuffer, bufSize_, 0); + CHECK_RESULT(err != hipSuccess, "hipHostRegister failed"); + } + else if (unpinnedMalloc[1]) + { + memptr[1] = malloc(bufSize_ + 4096); + alignedmemptr[1] = (void*)(((size_t)memptr[1] + 4095) & ~4095); + dstBuffer = alignedmemptr[1]; + } + else + { + err = hipMalloc(&dstBuffer, bufSize_); + CHECK_RESULT(err != hipSuccess, "hipMalloc failed"); + } + + CPerfCounter timer; + + //warm up + err = hipMemcpy2D(dstBuffer, width, srcBuffer, width, width, width, hipMemcpyDefault); + CHECK_RESULT(err, "hipMemcpy2D failed"); + + timer.Reset(); + timer.Start(); + for (unsigned int i = 0; i < numIter; i++) + { + err = hipMemcpy2DAsync(dstBuffer, width, srcBuffer, width, width, width, hipMemcpyDefault, NULL); + CHECK_RESULT(err, "hipMemcpyAsync2D failed"); + } + err = hipDeviceSynchronize(); + CHECK_RESULT(err, "hipDeviceSynchronize failed"); + timer.Stop(); + double sec = timer.GetElapsedTime(); + + // Buffer copy bandwidth in GB/s + double perf = ((double)bufSize_*numIter*(double)(1e-09)) / sec; + + const char *strSrc = NULL; + const char *strDst = NULL; + if (hostMalloc[0]) + strSrc = "hHM"; + else if (hostRegister[0]) + strSrc = "hHR"; + else if (unpinnedMalloc[0]) + strSrc = "unp"; + else + strSrc = "hM"; + + if (hostMalloc[1]) + strDst = "hHM"; + else if (hostRegister[1]) + strDst = "hHR"; + else if (unpinnedMalloc[1]) + strDst = "unp"; + else + strDst = "hM"; + // Double results when src and dst are both on device + if ((!hostMalloc[0] && !hostRegister[0] && !unpinnedMalloc[0]) && + (!hostMalloc[1] && !hostRegister[1] && !unpinnedMalloc[1])) + perf *= 2.0; + // Double results when src and dst are both in sysmem + if ((hostMalloc[0] || hostRegister[0] || unpinnedMalloc[0]) && + (hostMalloc[1] || hostRegister[1] || unpinnedMalloc[1])) + perf *= 2.0; + + char buf[256]; + SNPRINTF(buf, sizeof(buf), "HIPPerfBufferCopyRectSpeed[%d]\t(%8d bytes)\ts:%s d:%s\ti:%4d\t(GB/s) perf\t%f", + test, bufSize_, strSrc, strDst, numIter, (float)perf); + printf("%s\n", buf); + + //Free src + if (hostMalloc[0]) + { + hipHostFree(srcBuffer); + } + else if (hostRegister[0]) + { + hipHostUnregister(srcBuffer); + free(memptr[0]); + } + else if (unpinnedMalloc[0]) + { + free(memptr[0]); + } + else + { + hipFree(srcBuffer); + } + + //Free dst + if (hostMalloc[1]) + { + hipHostFree(dstBuffer); + } + else if (hostRegister[1]) + { + hipHostUnregister(dstBuffer); + free(memptr[1]); + } + else if (unpinnedMalloc[1]) + { + free(memptr[1]); + } + else + { + hipFree(dstBuffer); + } + } + + passed(); +} diff --git a/projects/hip-tests/perftests/perfDispatch/hipPerfBufferCopySpeed.cpp b/projects/hip-tests/perftests/perfDispatch/hipPerfBufferCopySpeed.cpp new file mode 100644 index 0000000000..6f284ae7fb --- /dev/null +++ b/projects/hip-tests/perftests/perfDispatch/hipPerfBufferCopySpeed.cpp @@ -0,0 +1,287 @@ +#include +#include +#include +#include + +#include "timer.h" +#include "test_common.h" + +/* HIT_START + * BUILD: %t %s ../../src/test_common.cpp timer.cpp EXCLUDE_HIP_PLATFORM nvcc + * TEST: %t + * HIT_END + */ + +// Quiet pesky warnings +#ifdef WIN_OS +#define SNPRINTF sprintf_s +#else +#define SNPRINTF snprintf +#endif + +#define NUM_SIZES 8 +//4KB, 8KB, 64KB, 256KB, 1 MB, 4MB, 16 MB, 16MB+10 +static const unsigned int Sizes[NUM_SIZES] = {4096, 8192, 65536, 262144, 1048576, 4194304, 16777216, 16777216+10}; + +static const unsigned int Iterations[2] = {1, 1000}; + +#define BUF_TYPES 4 +// 16 ways to combine 4 different buffer types +#define NUM_SUBTESTS (BUF_TYPES*BUF_TYPES) + +#define CHECK_RESULT(test, msg) \ + if ((test)) \ + { \ + printf("\n%s\n", msg); \ + abort(); \ + } + +void setData(void *ptr, unsigned int size, char value) +{ + char *ptr2 = (char *)ptr; + for (unsigned int i = 0; i < size ; i++) + { + ptr2[i] = value; + } +} + +void checkData(void *ptr, unsigned int size, char value) +{ + char *ptr2 = (char *)ptr; + for (unsigned int i = 0; i < size; i++) + { + if (ptr2[i] != value) + { + printf("Data validation failed at %d! Got 0x%08x\n", i, ptr2[i]); + printf("Expected 0x%08x\n", value); + CHECK_RESULT(true, "Data validation failed!"); + break; + } + } +} + + +int main(int argc, char* argv[]) { + HipTest::parseStandardArguments(argc, argv, true); + + hipError_t err = hipSuccess; + hipDeviceProp_t props = {0}; + hipGetDeviceProperties(&props, p_gpuDevice); + CHECK_RESULT(err != hipSuccess, "hipGetDeviceProperties failed" ); + printf("Set device to %d : %s\n", p_gpuDevice, props.name); + printf("Legend: unp - unpinned(malloc), hM - hipMalloc(device)\n"); + printf(" hHR - hipHostRegister(pinned), hHM - hipHostMalloc(prePinned)\n"); + err = hipSetDevice(p_gpuDevice); + CHECK_RESULT(err != hipSuccess, "hipSetDevice failed" ); + + unsigned int bufSize_; + bool hostMalloc[2] = {false}; + bool hostRegister[2] = {false}; + bool unpinnedMalloc[2] = {false}; + unsigned int numIter; + void *memptr[2] = {NULL}; + void *alignedmemptr[2] = {NULL}; + void* srcBuffer = NULL; + void* dstBuffer = NULL; + + int numTests = (p_tests == -1) ? (NUM_SIZES*NUM_SUBTESTS*2 - 1) : p_tests; + int test = (p_tests == -1) ? 0 : p_tests; + + for(;test <= numTests; test++) + { + unsigned int srcTest = (test / NUM_SIZES) % BUF_TYPES; + unsigned int dstTest = (test / (NUM_SIZES*BUF_TYPES)) % BUF_TYPES; + bufSize_ = Sizes[test % NUM_SIZES]; + hostMalloc[0] = hostMalloc[1] = false; + hostRegister[0] = hostRegister[1] = false; + unpinnedMalloc[0] = unpinnedMalloc[1] = false; + srcBuffer = dstBuffer = 0; + memptr[0] = memptr[1] = NULL; + alignedmemptr[0] = alignedmemptr[1] = NULL; + + if (srcTest == 3) + { + hostRegister[0] = true; + } + else if (srcTest == 2) + { + hostMalloc[0] = true; + } + else if (srcTest == 1) + { + unpinnedMalloc[0] = true; + } + + if (dstTest == 1) + { + unpinnedMalloc[1] = true; + } + else if (dstTest == 2) + { + hostMalloc[1] = true; + } + else if (dstTest == 3) + { + hostRegister[1] = true; + } + + numIter = Iterations[test / (NUM_SIZES * NUM_SUBTESTS)]; + + if (hostMalloc[0]) + { + err = hipHostMalloc((void**)&srcBuffer, bufSize_, 0); + setData(srcBuffer, bufSize_, 0xd0); + CHECK_RESULT(err != hipSuccess, "hipHostMalloc failed"); + } + else if (hostRegister[0]) + { + memptr[0] = malloc(bufSize_ + 4096); + alignedmemptr[0] = (void*)(((size_t)memptr[0] + 4095) & ~4095); + srcBuffer = alignedmemptr[0]; + setData(srcBuffer, bufSize_, 0xd0); + err = hipHostRegister(srcBuffer, bufSize_, 0); + CHECK_RESULT(err != hipSuccess, "hipHostRegister failed"); + } + else if (unpinnedMalloc[0]) + { + memptr[0] = malloc(bufSize_ + 4096); + alignedmemptr[0] = (void*)(((size_t)memptr[0] + 4095) & ~4095); + srcBuffer = alignedmemptr[0]; + setData(srcBuffer, bufSize_, 0xd0); + } + else + { + err = hipMalloc(&srcBuffer, bufSize_); + CHECK_RESULT(err != hipSuccess, "hipMalloc failed"); + err = hipMemset(srcBuffer, 0xd0, bufSize_); + CHECK_RESULT(err != hipSuccess, "hipMemset failed"); + } + + if (hostMalloc[1]) + { + err = hipHostMalloc((void**)&dstBuffer, bufSize_, 0); + CHECK_RESULT(err != hipSuccess, "hipHostMalloc failed"); + } + else if (hostRegister[1]) + { + memptr[1] = malloc(bufSize_ + 4096); + alignedmemptr[1] = (void*)(((size_t)memptr[1] + 4095) & ~4095); + dstBuffer = alignedmemptr[1]; + err = hipHostRegister(dstBuffer, bufSize_, 0); + CHECK_RESULT(err != hipSuccess, "hipHostRegister failed"); + } + else if (unpinnedMalloc[1]) + { + memptr[1] = malloc(bufSize_ + 4096); + alignedmemptr[1] = (void*)(((size_t)memptr[1] + 4095) & ~4095); + dstBuffer = alignedmemptr[1]; + } + else + { + err = hipMalloc(&dstBuffer, bufSize_); + CHECK_RESULT(err != hipSuccess, "hipMalloc failed"); + } + + CPerfCounter timer; + + //warm up + err = hipMemcpy(dstBuffer, srcBuffer, bufSize_, hipMemcpyDefault); + CHECK_RESULT(err, "hipMemcpy failed"); + + timer.Reset(); + timer.Start(); + for (unsigned int i = 0; i < numIter; i++) + { + err = hipMemcpyAsync(dstBuffer, srcBuffer, bufSize_, hipMemcpyDefault, NULL); + CHECK_RESULT(err, "hipMemcpyAsync failed"); + } + err = hipDeviceSynchronize(); + CHECK_RESULT(err, "hipDeviceSynchronize failed"); + timer.Stop(); + double sec = timer.GetElapsedTime(); + + // Buffer copy bandwidth in GB/s + double perf = ((double)bufSize_*numIter*(double)(1e-09)) / sec; + + const char *strSrc = NULL; + const char *strDst = NULL; + if (hostMalloc[0]) + strSrc = "hHM"; + else if (hostRegister[0]) + strSrc = "hHR"; + else if (unpinnedMalloc[0]) + strSrc = "unp"; + else + strSrc = "hM"; + + if (hostMalloc[1]) + strDst = "hHM"; + else if (hostRegister[1]) + strDst = "hHR"; + else if (unpinnedMalloc[1]) + strDst = "unp"; + else + strDst = "hM"; + // Double results when src and dst are both on device + if ((!hostMalloc[0] && !hostRegister[0] && !unpinnedMalloc[0]) && + (!hostMalloc[1] && !hostRegister[1] && !unpinnedMalloc[1])) + perf *= 2.0; + // Double results when src and dst are both in sysmem + if ((hostMalloc[0] || hostRegister[0] || unpinnedMalloc[0]) && + (hostMalloc[1] || hostRegister[1] || unpinnedMalloc[1])) + perf *= 2.0; + + char buf[256]; + SNPRINTF(buf, sizeof(buf), "HIPPerfBufferCopySpeed[%d]\t(%8d bytes)\ts:%s d:%s\ti:%4d\t(GB/s) perf\t%f", + test, bufSize_, strSrc, strDst, numIter, (float)perf); + printf("%s\n", buf); + + // Verification + void* temp = malloc(bufSize_ + 4096); + void* chkBuf = (void*)(((size_t)temp + 4095) & ~4095); + err = hipMemcpy(chkBuf, dstBuffer, bufSize_, hipMemcpyDefault); + CHECK_RESULT(err, "hipMemcpy failed"); + checkData(chkBuf, bufSize_, 0xd0); + free(temp); + + //Free src + if (hostMalloc[0]) + { + hipHostFree(srcBuffer); + } + else if (hostRegister[0]) + { + hipHostUnregister(srcBuffer); + free(memptr[0]); + } + else if (unpinnedMalloc[0]) + { + free(memptr[0]); + } + else + { + hipFree(srcBuffer); + } + + //Free dst + if (hostMalloc[1]) + { + hipHostFree(dstBuffer); + } + else if (hostRegister[1]) + { + hipHostUnregister(dstBuffer); + free(memptr[1]); + } + else if (unpinnedMalloc[1]) + { + free(memptr[1]); + } + else + { + hipFree(dstBuffer); + } + } + + passed(); +} diff --git a/projects/hip-tests/perftests/perfDispatch/hipPerfDispatchSpeed.cpp b/projects/hip-tests/perftests/perfDispatch/hipPerfDispatchSpeed.cpp new file mode 100644 index 0000000000..84ba73c3aa --- /dev/null +++ b/projects/hip-tests/perftests/perfDispatch/hipPerfDispatchSpeed.cpp @@ -0,0 +1,210 @@ +#include +#include +#include +#include + +#include "timer.h" +#include "test_common.h" + +/* HIT_START + * BUILD: %t %s ../../src/test_common.cpp timer.cpp EXCLUDE_HIP_PLATFORM nvcc + * TEST: %t + * HIT_END + */ + +// Quiet pesky warnings +#ifdef WIN_OS +#define SNPRINTF sprintf_s +#else +#define SNPRINTF snprintf +#endif + +#define CHAR_BUF_SIZE 512 + +#define CHECK_RESULT(test, msg) \ + if ((test)) \ + { \ + printf("\n%s\n", msg); \ + abort(); \ + } + +typedef struct { + unsigned int iterations; + int flushEvery; +} testStruct; + +testStruct testList[] = +{ + { 1, -1}, + { 1, -1}, + { 10, 1}, + { 10, -1}, + { 100, 1}, + { 100, 10}, + { 100, -1}, + { 1000, 1}, + { 1000, 10}, + { 1000, 100}, + { 1000, -1}, + { 10000, 1}, + { 10000, 10}, + { 10000, 100}, + { 10000, 1000}, + { 10000, -1}, + { 100000, 1}, + { 100000, 10}, + { 100000, 100}, + { 100000, 1000}, + { 100000, 10000}, + { 100000, -1}, +}; + +unsigned int mapTestList[] = {1, 1, 10, 100, 1000, 10000, 100000}; + +__global__ void _dispatchSpeed(float *outBuf) +{ + int i = (blockIdx.x * blockDim.x + threadIdx.x); + if (i < 0) + outBuf[i] = 0.0f; +}; + + +int main(int argc, char* argv[]) { + HipTest::parseStandardArguments(argc, argv, true); + + hipError_t err = hipSuccess; + hipDeviceProp_t props = {0}; + hipGetDeviceProperties(&props, p_gpuDevice); + CHECK_RESULT(err != hipSuccess, "hipGetDeviceProperties failed" ); + printf("Set device to %d : %s\n", p_gpuDevice, props.name); + + unsigned int testListSize = sizeof(testList) / sizeof(testStruct); + int numTests = (p_tests == -1) ? (2*2*testListSize - 1) : p_tests; + int test = (p_tests == -1) ? 0 : p_tests; + + float* srcBuffer = NULL; + unsigned int bufSize_ = 64*sizeof(float); + err = hipMalloc(&srcBuffer, bufSize_); + CHECK_RESULT(err != hipSuccess, "hipMalloc failed"); + + for(;test <= numTests; test++) + { + int openTest = test % testListSize; + bool sleep = false; + bool doWarmup = false; + + if ((test / testListSize) % 2) + { + doWarmup = true; + } + if (test >= (testListSize * 2)) + { + sleep = true; + } + + int threads = (bufSize_ / sizeof(float)); + int threads_per_block = 64; + int blocks = (threads/threads_per_block) + (threads % threads_per_block); + hipEvent_t start, stop; + + // NULL stream check: + err = hipEventCreate(&start); + err = hipEventCreate(&stop); + + CHECK_RESULT(err != hipSuccess, "hipEventCreate failed"); + + if (doWarmup) + { + hipLaunchKernelGGL(_dispatchSpeed, dim3(blocks), dim3(threads_per_block), 0, hipStream_t(0), srcBuffer); + err = hipDeviceSynchronize(); + CHECK_RESULT(err != hipSuccess, "hipDeviceSynchronize failed"); + } + + CPerfCounter timer; + + timer.Reset(); + timer.Start(); + for (unsigned int i = 0; i < testList[openTest].iterations; i++) + { + hipEventRecord(start, NULL); + hipLaunchKernelGGL(_dispatchSpeed, dim3(blocks), dim3(threads_per_block), 0, hipStream_t(0), srcBuffer); + hipEventRecord(stop, NULL); + + if ((testList[openTest].flushEvery > 0) && + (((i + 1) % testList[openTest].flushEvery) == 0)) + { + if (sleep) + { + err = hipDeviceSynchronize(); + CHECK_RESULT(err != hipSuccess, "hipDeviceSynchronize failed"); + } + else + { + do { + err = hipEventQuery(stop); + } while (err == hipErrorNotReady); + } + } + } + if (sleep) + { + err = hipDeviceSynchronize(); + CHECK_RESULT(err != hipSuccess, "hipDeviceSynchronize failed"); + } + else + { + do { + err = hipEventQuery(stop); + } while (err == hipErrorNotReady); + } + timer.Stop(); + + hipEventDestroy(start); + hipEventDestroy(stop); + double sec = timer.GetElapsedTime(); + + // microseconds per launch + double perf = (1000000.f*sec/testList[openTest].iterations); + const char *waitType; + const char *extraChar; + const char *n; + const char *warmup; + if (sleep) + { + waitType = "sleep"; + extraChar = ""; + n = ""; + } + else + { + waitType = "spin"; + n = "n"; + extraChar = " "; + } + if (doWarmup) + { + warmup = "warmup"; + } + else + { + warmup = ""; + } + + + char buf[256]; + if (testList[openTest].flushEvery > 0) + { + SNPRINTF(buf, sizeof(buf), "HIPPerfDispatchSpeed[%3d] %7d dispatches %s%sing every %5d %6s (us/disp) %3f", test, testList[openTest].iterations, + waitType, n, testList[openTest].flushEvery, warmup, (float)perf); + } + else + { + SNPRINTF(buf, sizeof(buf), "HIPPerfDispatchSpeed[%3d] %7d dispatches (%s%s) %6s (us/disp) %3f", test, testList[openTest].iterations, + waitType, extraChar, warmup, (float)perf); + } + printf("%s\n", buf); + } + + hipFree(srcBuffer); + passed(); +} diff --git a/projects/hip-tests/perftests/perfDispatch/timer.cpp b/projects/hip-tests/perftests/perfDispatch/timer.cpp new file mode 100644 index 0000000000..ea9c6ea1d9 --- /dev/null +++ b/projects/hip-tests/perftests/perfDispatch/timer.cpp @@ -0,0 +1,116 @@ +#include "timer.h" + +#include + +#ifdef _WIN32 +#define WIN32_LEAN_AND_MEAN +#define VC_EXTRALEAN +#include +#pragma comment(lib, "user32") +#endif + +#ifdef __linux__ +#include +#define NANOSECONDS_PER_SEC 1000000000 +#endif + +CPerfCounter::CPerfCounter() : _clocks(0), _start(0) +{ + +#ifdef _WIN32 + + QueryPerformanceFrequency((LARGE_INTEGER *)&_freq); + +#endif + +#ifdef __linux__ + _freq = NANOSECONDS_PER_SEC; +#endif + +} + +CPerfCounter::~CPerfCounter() +{ + // EMPTY! +} + +void +CPerfCounter::Start(void) +{ + +#ifdef _WIN32 + + if( _start ) + { + MessageBox(NULL, "Bad Perf Counter Start", "Error", MB_OK); + exit(0); + } + QueryPerformanceCounter((LARGE_INTEGER *)&_start); + +#endif +#ifdef __linux__ + + struct timespec s; + clock_gettime(CLOCK_MONOTONIC, &s); + _start = (i64)s.tv_sec * NANOSECONDS_PER_SEC + (i64)s.tv_nsec ; + +#endif + +} + +void +CPerfCounter::Stop(void) +{ + i64 n; + +#ifdef _WIN32 + + if( !_start ) + { + MessageBox(NULL, "Bad Perf Counter Stop", "Error", MB_OK); + exit(0); + } + + QueryPerformanceCounter((LARGE_INTEGER *)&n); + +#endif +#ifdef __linux__ + + struct timespec s; + clock_gettime(CLOCK_MONOTONIC, &s); + n = (i64)s.tv_sec * NANOSECONDS_PER_SEC + (i64)s.tv_nsec ; + +#endif + + n -= _start; + _start = 0; + _clocks += n; +} + +void +CPerfCounter::Reset(void) +{ + +#ifdef _WIN32 + if( _start ) + { + MessageBox(NULL, "Bad Perf Counter Reset", "Error", MB_OK); + exit(0); + } +#endif + _clocks = 0; +} + +double +CPerfCounter::GetElapsedTime(void) +{ +#ifdef _WIN32 + if( _start ) { + MessageBox(NULL, "Trying to get time while still running.", "Error", MB_OK); + exit(0); + } +#endif + + return (double)_clocks / (double)_freq; + +} diff --git a/projects/hip-tests/perftests/perfDispatch/timer.h b/projects/hip-tests/perftests/perfDispatch/timer.h new file mode 100644 index 0000000000..28bfeff74b --- /dev/null +++ b/projects/hip-tests/perftests/perfDispatch/timer.h @@ -0,0 +1,28 @@ +#ifndef _TIMER_H_ +#define _TIMER_H_ + +#ifdef _WIN32 +typedef __int64 i64 ; +#endif +#ifdef __linux__ +typedef long long i64; +#endif + +class CPerfCounter { + +public: + CPerfCounter(); + ~CPerfCounter(); + void Start(void); + void Stop(void); + void Reset(void); + double GetElapsedTime(void); + +private: + + i64 _freq; + i64 _clocks; + i64 _start; +}; + +#endif // _TIMER_H_