Support performance tests
Support performance tests while direct tests commands keep unchanged.
To build performance tests, run "make build_perf".
To run all performance testis, run "make perf".
To run specific tests, for example, run
/usr/bin/ctest -C performance -R performance_tests/perfDispatch --verbose
To run individual test, for example, run
performance_tests/memory/hipPerfMemMallocCpyFree
Change-Id: I168c1b9ef1ec21b392d48648d0c71e8fbd37d57b
[ROCm/hip-tests commit: ec700116bc]
This commit is contained in:
@@ -0,0 +1,114 @@
|
||||
/*
|
||||
Copyright (c) 2015-2016 Advanced Micro Devices, Inc. All rights reserved.
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to deal
|
||||
in the Software without restriction, including without limitation the rights
|
||||
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
copies of the Software, and to permit persons to whom the Software is
|
||||
furnished to do so, subject to the following conditions:
|
||||
The above copyright notice and this permission notice shall be included in
|
||||
all copies or substantial portions of the Software.
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
THE SOFTWARE.
|
||||
*/
|
||||
|
||||
#include "test_common.h"
|
||||
#include <iostream>
|
||||
#include <time.h>
|
||||
|
||||
/* HIT_START
|
||||
* BUILD: %t %s ../../src/test_common.cpp EXCLUDE_HIP_PLATFORM nvcc
|
||||
* TEST: %t
|
||||
* HIT_END
|
||||
*/
|
||||
|
||||
#define NUM_SIZE 19 //size up to 16M
|
||||
#define NUM_ITER 500 //Total GPU memory up to 16M*500=8G
|
||||
|
||||
void valSet(int* A, int val, size_t size) {
|
||||
size_t len = size / sizeof(int);
|
||||
for (int i = 0; i < len; i++) {
|
||||
A[i] = val;
|
||||
}
|
||||
}
|
||||
|
||||
void setup(size_t *size, const int num, int **pA) {
|
||||
std::cout << "size: ";
|
||||
for (int i = 0; i < num; i++) {
|
||||
size[i] = 1 << (i + 6);
|
||||
std::cout << size[i] << " ";
|
||||
}
|
||||
std::cout << std::endl;
|
||||
*pA = (int*)malloc(size[num - 1]);
|
||||
valSet(*pA, 1, size[num - 1]);
|
||||
}
|
||||
|
||||
void testInit(size_t size, int *A) {
|
||||
int *Ad;
|
||||
clock_t start = clock();
|
||||
hipMalloc(&Ad, size); //hip::init() will be called
|
||||
clock_t end = clock();
|
||||
double uS = (end - start) * 1000000. / CLOCKS_PER_SEC;
|
||||
std::cout << "Initial" << std::endl;
|
||||
std::cout << "hipMalloc(" << size << ") cost " << uS << "us" << std::endl;
|
||||
|
||||
start = clock();
|
||||
hipMemcpy(Ad, A, size, hipMemcpyHostToDevice);
|
||||
hipDeviceSynchronize();
|
||||
end = clock();
|
||||
uS = (end - start) * 1000000. / CLOCKS_PER_SEC;
|
||||
std::cout << "hipMemcpy(" << size << ") cost " << uS << "us" << std::endl;
|
||||
|
||||
start = clock();
|
||||
hipFree(Ad);
|
||||
end = clock();
|
||||
uS = (end - start) * 1000000. / CLOCKS_PER_SEC;
|
||||
std::cout << "hipFree(" << size << ") cost " << uS << "us" << std::endl;
|
||||
}
|
||||
|
||||
int main() {
|
||||
double uS;
|
||||
clock_t start, end;
|
||||
size_t size[NUM_SIZE] = { 0 };
|
||||
int *Ad[NUM_ITER] = { nullptr };
|
||||
int *A;
|
||||
|
||||
setup(size, NUM_SIZE, &A);
|
||||
testInit(size[0], A);
|
||||
|
||||
for (int i = 0; i < NUM_SIZE; i++) {
|
||||
std::cout << size[i] << std::endl;
|
||||
start = clock();
|
||||
for (int j = 0; j < NUM_ITER; j++) {
|
||||
HIPCHECK(hipMalloc(&Ad[j], size[i]));
|
||||
}
|
||||
end = clock();
|
||||
uS = (end - start) * 1000000. / (NUM_ITER * CLOCKS_PER_SEC);
|
||||
std::cout << "hipMalloc(" << size[i] << ") cost " << uS << "us" << std::endl;
|
||||
|
||||
start = clock();
|
||||
for (int j = 0; j < NUM_ITER; j++) {
|
||||
HIPCHECK(hipMemcpy(Ad[j], A, size[i], hipMemcpyHostToDevice));
|
||||
}
|
||||
hipDeviceSynchronize();
|
||||
end = clock();
|
||||
uS = (end - start) * 1000000. / (NUM_ITER * CLOCKS_PER_SEC);
|
||||
std::cout << "hipMemcpy(" << size[i] << ") cost " << uS << "us" << std::endl;
|
||||
|
||||
start = clock();
|
||||
for (int j = 0; j < NUM_ITER; j++) {
|
||||
HIPCHECK(hipFree(Ad[j]));
|
||||
Ad[j] = nullptr;
|
||||
}
|
||||
end = clock();
|
||||
double uS = (end - start) * 1000000. / (NUM_ITER * CLOCKS_PER_SEC);
|
||||
std::cout << "hipFree(" << size[i] << ") cost " << uS << "us" << std::endl;
|
||||
}
|
||||
free(A);
|
||||
passed();
|
||||
}
|
||||
@@ -0,0 +1,281 @@
|
||||
#include <stdio.h>
|
||||
#include <assert.h>
|
||||
#include <string.h>
|
||||
#include <complex>
|
||||
|
||||
#include "timer.h"
|
||||
#include "test_common.h"
|
||||
|
||||
/* HIT_START
|
||||
* BUILD: %t %s ../../src/test_common.cpp timer.cpp EXCLUDE_HIP_PLATFORM nvcc
|
||||
* TEST: %t
|
||||
* HIT_END
|
||||
*/
|
||||
|
||||
// Quiet pesky warnings
|
||||
#ifdef WIN_OS
|
||||
#define SNPRINTF sprintf_s
|
||||
#else
|
||||
#define SNPRINTF snprintf
|
||||
#endif
|
||||
|
||||
#define NUM_SIZES 8
|
||||
//4KB, 8KB, 64KB, 256KB, 1 MB, 4MB, 16 MB, 16MB+10
|
||||
static const unsigned int Sizes[NUM_SIZES] = {4096, 8192, 65536, 262144, 1048576, 4194304, 16777216, 16777216+10};
|
||||
|
||||
static const unsigned int Iterations[2] = {1, 1000};
|
||||
|
||||
#define BUF_TYPES 4
|
||||
// 16 ways to combine 4 different buffer types
|
||||
#define NUM_SUBTESTS (BUF_TYPES*BUF_TYPES)
|
||||
|
||||
#define CHECK_RESULT(test, msg) \
|
||||
if ((test)) \
|
||||
{ \
|
||||
printf("\n%s\n", msg); \
|
||||
abort(); \
|
||||
}
|
||||
|
||||
void setData(void *ptr, unsigned int size, char value)
|
||||
{
|
||||
char *ptr2 = (char *)ptr;
|
||||
for (unsigned int i = 0; i < size ; i++)
|
||||
{
|
||||
ptr2[i] = value;
|
||||
}
|
||||
}
|
||||
|
||||
void checkData(void *ptr, unsigned int size, char value)
|
||||
{
|
||||
char *ptr2 = (char *)ptr;
|
||||
for (unsigned int i = 0; i < size; i++)
|
||||
{
|
||||
if (ptr2[i] != value)
|
||||
{
|
||||
printf("Data validation failed at %d! Got 0x%08x\n", i, ptr2[i]);
|
||||
printf("Expected 0x%08x\n", value);
|
||||
CHECK_RESULT(true, "Data validation failed!");
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
int main(int argc, char* argv[]) {
|
||||
HipTest::parseStandardArguments(argc, argv, true);
|
||||
|
||||
hipError_t err = hipSuccess;
|
||||
hipDeviceProp_t props = {0};
|
||||
hipGetDeviceProperties(&props, p_gpuDevice);
|
||||
CHECK_RESULT(err != hipSuccess, "hipGetDeviceProperties failed" );
|
||||
printf("Set device to %d : %s\n", p_gpuDevice, props.name);
|
||||
printf("Legend: unp - unpinned(malloc), hM - hipMalloc(device)\n");
|
||||
printf(" hHR - hipHostRegister(pinned), hHM - hipHostMalloc(prePinned)\n");
|
||||
err = hipSetDevice(p_gpuDevice);
|
||||
CHECK_RESULT(err != hipSuccess, "hipSetDevice failed" );
|
||||
|
||||
unsigned int bufSize_;
|
||||
bool hostMalloc[2] = {false};
|
||||
bool hostRegister[2] = {false};
|
||||
bool unpinnedMalloc[2] = {false};
|
||||
unsigned int numIter;
|
||||
void *memptr[2] = {NULL};
|
||||
void *alignedmemptr[2] = {NULL};
|
||||
void* srcBuffer = NULL;
|
||||
void* dstBuffer = NULL;
|
||||
|
||||
int numTests = (p_tests == -1) ? (NUM_SIZES*NUM_SUBTESTS*2 - 1) : p_tests;
|
||||
int test = (p_tests == -1) ? 0 : p_tests;
|
||||
|
||||
for(;test <= numTests; test++)
|
||||
{
|
||||
unsigned int srcTest = (test / NUM_SIZES) % BUF_TYPES;
|
||||
unsigned int dstTest = (test / (NUM_SIZES*BUF_TYPES)) % BUF_TYPES;
|
||||
bufSize_ = Sizes[test % NUM_SIZES];
|
||||
hostMalloc[0] = hostMalloc[1] = false;
|
||||
hostRegister[0] = hostRegister[1] = false;
|
||||
unpinnedMalloc[0] = unpinnedMalloc[1] = false;
|
||||
srcBuffer = dstBuffer = 0;
|
||||
memptr[0] = memptr[1] = NULL;
|
||||
alignedmemptr[0] = alignedmemptr[1] = NULL;
|
||||
|
||||
size_t width = static_cast<size_t>(sqrt(static_cast<float>(bufSize_)));
|
||||
|
||||
if (srcTest == 3)
|
||||
{
|
||||
hostRegister[0] = true;
|
||||
}
|
||||
else if (srcTest == 2)
|
||||
{
|
||||
hostMalloc[0] = true;
|
||||
}
|
||||
else if (srcTest == 1)
|
||||
{
|
||||
unpinnedMalloc[0] = true;
|
||||
}
|
||||
|
||||
if (dstTest == 1)
|
||||
{
|
||||
unpinnedMalloc[1] = true;
|
||||
}
|
||||
else if (dstTest == 2)
|
||||
{
|
||||
hostMalloc[1] = true;
|
||||
}
|
||||
else if (dstTest == 3)
|
||||
{
|
||||
hostRegister[1] = true;
|
||||
}
|
||||
|
||||
numIter = Iterations[test / (NUM_SIZES * NUM_SUBTESTS)];
|
||||
|
||||
if (hostMalloc[0])
|
||||
{
|
||||
err = hipHostMalloc((void**)&srcBuffer, bufSize_, 0);
|
||||
setData(srcBuffer, bufSize_, 0xd0);
|
||||
CHECK_RESULT(err != hipSuccess, "hipHostMalloc failed");
|
||||
}
|
||||
else if (hostRegister[0])
|
||||
{
|
||||
memptr[0] = malloc(bufSize_ + 4096);
|
||||
alignedmemptr[0] = (void*)(((size_t)memptr[0] + 4095) & ~4095);
|
||||
srcBuffer = alignedmemptr[0];
|
||||
setData(srcBuffer, bufSize_, 0xd0);
|
||||
err = hipHostRegister(srcBuffer, bufSize_, 0);
|
||||
CHECK_RESULT(err != hipSuccess, "hipHostRegister failed");
|
||||
}
|
||||
else if (unpinnedMalloc[0])
|
||||
{
|
||||
memptr[0] = malloc(bufSize_ + 4096);
|
||||
alignedmemptr[0] = (void*)(((size_t)memptr[0] + 4095) & ~4095);
|
||||
srcBuffer = alignedmemptr[0];
|
||||
setData(srcBuffer, bufSize_, 0xd0);
|
||||
}
|
||||
else
|
||||
{
|
||||
err = hipMalloc(&srcBuffer, bufSize_);
|
||||
CHECK_RESULT(err != hipSuccess, "hipMalloc failed");
|
||||
err = hipMemset(srcBuffer, 0xd0, bufSize_);
|
||||
CHECK_RESULT(err != hipSuccess, "hipMemset failed");
|
||||
}
|
||||
|
||||
if (hostMalloc[1])
|
||||
{
|
||||
err = hipHostMalloc((void**)&dstBuffer, bufSize_, 0);
|
||||
CHECK_RESULT(err != hipSuccess, "hipHostMalloc failed");
|
||||
}
|
||||
else if (hostRegister[1])
|
||||
{
|
||||
memptr[1] = malloc(bufSize_ + 4096);
|
||||
alignedmemptr[1] = (void*)(((size_t)memptr[1] + 4095) & ~4095);
|
||||
dstBuffer = alignedmemptr[1];
|
||||
err = hipHostRegister(dstBuffer, bufSize_, 0);
|
||||
CHECK_RESULT(err != hipSuccess, "hipHostRegister failed");
|
||||
}
|
||||
else if (unpinnedMalloc[1])
|
||||
{
|
||||
memptr[1] = malloc(bufSize_ + 4096);
|
||||
alignedmemptr[1] = (void*)(((size_t)memptr[1] + 4095) & ~4095);
|
||||
dstBuffer = alignedmemptr[1];
|
||||
}
|
||||
else
|
||||
{
|
||||
err = hipMalloc(&dstBuffer, bufSize_);
|
||||
CHECK_RESULT(err != hipSuccess, "hipMalloc failed");
|
||||
}
|
||||
|
||||
CPerfCounter timer;
|
||||
|
||||
//warm up
|
||||
err = hipMemcpy2D(dstBuffer, width, srcBuffer, width, width, width, hipMemcpyDefault);
|
||||
CHECK_RESULT(err, "hipMemcpy2D failed");
|
||||
|
||||
timer.Reset();
|
||||
timer.Start();
|
||||
for (unsigned int i = 0; i < numIter; i++)
|
||||
{
|
||||
err = hipMemcpy2DAsync(dstBuffer, width, srcBuffer, width, width, width, hipMemcpyDefault, NULL);
|
||||
CHECK_RESULT(err, "hipMemcpyAsync2D failed");
|
||||
}
|
||||
err = hipDeviceSynchronize();
|
||||
CHECK_RESULT(err, "hipDeviceSynchronize failed");
|
||||
timer.Stop();
|
||||
double sec = timer.GetElapsedTime();
|
||||
|
||||
// Buffer copy bandwidth in GB/s
|
||||
double perf = ((double)bufSize_*numIter*(double)(1e-09)) / sec;
|
||||
|
||||
const char *strSrc = NULL;
|
||||
const char *strDst = NULL;
|
||||
if (hostMalloc[0])
|
||||
strSrc = "hHM";
|
||||
else if (hostRegister[0])
|
||||
strSrc = "hHR";
|
||||
else if (unpinnedMalloc[0])
|
||||
strSrc = "unp";
|
||||
else
|
||||
strSrc = "hM";
|
||||
|
||||
if (hostMalloc[1])
|
||||
strDst = "hHM";
|
||||
else if (hostRegister[1])
|
||||
strDst = "hHR";
|
||||
else if (unpinnedMalloc[1])
|
||||
strDst = "unp";
|
||||
else
|
||||
strDst = "hM";
|
||||
// Double results when src and dst are both on device
|
||||
if ((!hostMalloc[0] && !hostRegister[0] && !unpinnedMalloc[0]) &&
|
||||
(!hostMalloc[1] && !hostRegister[1] && !unpinnedMalloc[1]))
|
||||
perf *= 2.0;
|
||||
// Double results when src and dst are both in sysmem
|
||||
if ((hostMalloc[0] || hostRegister[0] || unpinnedMalloc[0]) &&
|
||||
(hostMalloc[1] || hostRegister[1] || unpinnedMalloc[1]))
|
||||
perf *= 2.0;
|
||||
|
||||
char buf[256];
|
||||
SNPRINTF(buf, sizeof(buf), "HIPPerfBufferCopyRectSpeed[%d]\t(%8d bytes)\ts:%s d:%s\ti:%4d\t(GB/s) perf\t%f",
|
||||
test, bufSize_, strSrc, strDst, numIter, (float)perf);
|
||||
printf("%s\n", buf);
|
||||
|
||||
//Free src
|
||||
if (hostMalloc[0])
|
||||
{
|
||||
hipHostFree(srcBuffer);
|
||||
}
|
||||
else if (hostRegister[0])
|
||||
{
|
||||
hipHostUnregister(srcBuffer);
|
||||
free(memptr[0]);
|
||||
}
|
||||
else if (unpinnedMalloc[0])
|
||||
{
|
||||
free(memptr[0]);
|
||||
}
|
||||
else
|
||||
{
|
||||
hipFree(srcBuffer);
|
||||
}
|
||||
|
||||
//Free dst
|
||||
if (hostMalloc[1])
|
||||
{
|
||||
hipHostFree(dstBuffer);
|
||||
}
|
||||
else if (hostRegister[1])
|
||||
{
|
||||
hipHostUnregister(dstBuffer);
|
||||
free(memptr[1]);
|
||||
}
|
||||
else if (unpinnedMalloc[1])
|
||||
{
|
||||
free(memptr[1]);
|
||||
}
|
||||
else
|
||||
{
|
||||
hipFree(dstBuffer);
|
||||
}
|
||||
}
|
||||
|
||||
passed();
|
||||
}
|
||||
@@ -0,0 +1,287 @@
|
||||
#include <stdio.h>
|
||||
#include <assert.h>
|
||||
#include <string.h>
|
||||
#include <complex>
|
||||
|
||||
#include "timer.h"
|
||||
#include "test_common.h"
|
||||
|
||||
/* HIT_START
|
||||
* BUILD: %t %s ../../src/test_common.cpp timer.cpp EXCLUDE_HIP_PLATFORM nvcc
|
||||
* TEST: %t
|
||||
* HIT_END
|
||||
*/
|
||||
|
||||
// Quiet pesky warnings
|
||||
#ifdef WIN_OS
|
||||
#define SNPRINTF sprintf_s
|
||||
#else
|
||||
#define SNPRINTF snprintf
|
||||
#endif
|
||||
|
||||
#define NUM_SIZES 8
|
||||
//4KB, 8KB, 64KB, 256KB, 1 MB, 4MB, 16 MB, 16MB+10
|
||||
static const unsigned int Sizes[NUM_SIZES] = {4096, 8192, 65536, 262144, 1048576, 4194304, 16777216, 16777216+10};
|
||||
|
||||
static const unsigned int Iterations[2] = {1, 1000};
|
||||
|
||||
#define BUF_TYPES 4
|
||||
// 16 ways to combine 4 different buffer types
|
||||
#define NUM_SUBTESTS (BUF_TYPES*BUF_TYPES)
|
||||
|
||||
#define CHECK_RESULT(test, msg) \
|
||||
if ((test)) \
|
||||
{ \
|
||||
printf("\n%s\n", msg); \
|
||||
abort(); \
|
||||
}
|
||||
|
||||
void setData(void *ptr, unsigned int size, char value)
|
||||
{
|
||||
char *ptr2 = (char *)ptr;
|
||||
for (unsigned int i = 0; i < size ; i++)
|
||||
{
|
||||
ptr2[i] = value;
|
||||
}
|
||||
}
|
||||
|
||||
void checkData(void *ptr, unsigned int size, char value)
|
||||
{
|
||||
char *ptr2 = (char *)ptr;
|
||||
for (unsigned int i = 0; i < size; i++)
|
||||
{
|
||||
if (ptr2[i] != value)
|
||||
{
|
||||
printf("Data validation failed at %d! Got 0x%08x\n", i, ptr2[i]);
|
||||
printf("Expected 0x%08x\n", value);
|
||||
CHECK_RESULT(true, "Data validation failed!");
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
int main(int argc, char* argv[]) {
|
||||
HipTest::parseStandardArguments(argc, argv, true);
|
||||
|
||||
hipError_t err = hipSuccess;
|
||||
hipDeviceProp_t props = {0};
|
||||
hipGetDeviceProperties(&props, p_gpuDevice);
|
||||
CHECK_RESULT(err != hipSuccess, "hipGetDeviceProperties failed" );
|
||||
printf("Set device to %d : %s\n", p_gpuDevice, props.name);
|
||||
printf("Legend: unp - unpinned(malloc), hM - hipMalloc(device)\n");
|
||||
printf(" hHR - hipHostRegister(pinned), hHM - hipHostMalloc(prePinned)\n");
|
||||
err = hipSetDevice(p_gpuDevice);
|
||||
CHECK_RESULT(err != hipSuccess, "hipSetDevice failed" );
|
||||
|
||||
unsigned int bufSize_;
|
||||
bool hostMalloc[2] = {false};
|
||||
bool hostRegister[2] = {false};
|
||||
bool unpinnedMalloc[2] = {false};
|
||||
unsigned int numIter;
|
||||
void *memptr[2] = {NULL};
|
||||
void *alignedmemptr[2] = {NULL};
|
||||
void* srcBuffer = NULL;
|
||||
void* dstBuffer = NULL;
|
||||
|
||||
int numTests = (p_tests == -1) ? (NUM_SIZES*NUM_SUBTESTS*2 - 1) : p_tests;
|
||||
int test = (p_tests == -1) ? 0 : p_tests;
|
||||
|
||||
for(;test <= numTests; test++)
|
||||
{
|
||||
unsigned int srcTest = (test / NUM_SIZES) % BUF_TYPES;
|
||||
unsigned int dstTest = (test / (NUM_SIZES*BUF_TYPES)) % BUF_TYPES;
|
||||
bufSize_ = Sizes[test % NUM_SIZES];
|
||||
hostMalloc[0] = hostMalloc[1] = false;
|
||||
hostRegister[0] = hostRegister[1] = false;
|
||||
unpinnedMalloc[0] = unpinnedMalloc[1] = false;
|
||||
srcBuffer = dstBuffer = 0;
|
||||
memptr[0] = memptr[1] = NULL;
|
||||
alignedmemptr[0] = alignedmemptr[1] = NULL;
|
||||
|
||||
if (srcTest == 3)
|
||||
{
|
||||
hostRegister[0] = true;
|
||||
}
|
||||
else if (srcTest == 2)
|
||||
{
|
||||
hostMalloc[0] = true;
|
||||
}
|
||||
else if (srcTest == 1)
|
||||
{
|
||||
unpinnedMalloc[0] = true;
|
||||
}
|
||||
|
||||
if (dstTest == 1)
|
||||
{
|
||||
unpinnedMalloc[1] = true;
|
||||
}
|
||||
else if (dstTest == 2)
|
||||
{
|
||||
hostMalloc[1] = true;
|
||||
}
|
||||
else if (dstTest == 3)
|
||||
{
|
||||
hostRegister[1] = true;
|
||||
}
|
||||
|
||||
numIter = Iterations[test / (NUM_SIZES * NUM_SUBTESTS)];
|
||||
|
||||
if (hostMalloc[0])
|
||||
{
|
||||
err = hipHostMalloc((void**)&srcBuffer, bufSize_, 0);
|
||||
setData(srcBuffer, bufSize_, 0xd0);
|
||||
CHECK_RESULT(err != hipSuccess, "hipHostMalloc failed");
|
||||
}
|
||||
else if (hostRegister[0])
|
||||
{
|
||||
memptr[0] = malloc(bufSize_ + 4096);
|
||||
alignedmemptr[0] = (void*)(((size_t)memptr[0] + 4095) & ~4095);
|
||||
srcBuffer = alignedmemptr[0];
|
||||
setData(srcBuffer, bufSize_, 0xd0);
|
||||
err = hipHostRegister(srcBuffer, bufSize_, 0);
|
||||
CHECK_RESULT(err != hipSuccess, "hipHostRegister failed");
|
||||
}
|
||||
else if (unpinnedMalloc[0])
|
||||
{
|
||||
memptr[0] = malloc(bufSize_ + 4096);
|
||||
alignedmemptr[0] = (void*)(((size_t)memptr[0] + 4095) & ~4095);
|
||||
srcBuffer = alignedmemptr[0];
|
||||
setData(srcBuffer, bufSize_, 0xd0);
|
||||
}
|
||||
else
|
||||
{
|
||||
err = hipMalloc(&srcBuffer, bufSize_);
|
||||
CHECK_RESULT(err != hipSuccess, "hipMalloc failed");
|
||||
err = hipMemset(srcBuffer, 0xd0, bufSize_);
|
||||
CHECK_RESULT(err != hipSuccess, "hipMemset failed");
|
||||
}
|
||||
|
||||
if (hostMalloc[1])
|
||||
{
|
||||
err = hipHostMalloc((void**)&dstBuffer, bufSize_, 0);
|
||||
CHECK_RESULT(err != hipSuccess, "hipHostMalloc failed");
|
||||
}
|
||||
else if (hostRegister[1])
|
||||
{
|
||||
memptr[1] = malloc(bufSize_ + 4096);
|
||||
alignedmemptr[1] = (void*)(((size_t)memptr[1] + 4095) & ~4095);
|
||||
dstBuffer = alignedmemptr[1];
|
||||
err = hipHostRegister(dstBuffer, bufSize_, 0);
|
||||
CHECK_RESULT(err != hipSuccess, "hipHostRegister failed");
|
||||
}
|
||||
else if (unpinnedMalloc[1])
|
||||
{
|
||||
memptr[1] = malloc(bufSize_ + 4096);
|
||||
alignedmemptr[1] = (void*)(((size_t)memptr[1] + 4095) & ~4095);
|
||||
dstBuffer = alignedmemptr[1];
|
||||
}
|
||||
else
|
||||
{
|
||||
err = hipMalloc(&dstBuffer, bufSize_);
|
||||
CHECK_RESULT(err != hipSuccess, "hipMalloc failed");
|
||||
}
|
||||
|
||||
CPerfCounter timer;
|
||||
|
||||
//warm up
|
||||
err = hipMemcpy(dstBuffer, srcBuffer, bufSize_, hipMemcpyDefault);
|
||||
CHECK_RESULT(err, "hipMemcpy failed");
|
||||
|
||||
timer.Reset();
|
||||
timer.Start();
|
||||
for (unsigned int i = 0; i < numIter; i++)
|
||||
{
|
||||
err = hipMemcpyAsync(dstBuffer, srcBuffer, bufSize_, hipMemcpyDefault, NULL);
|
||||
CHECK_RESULT(err, "hipMemcpyAsync failed");
|
||||
}
|
||||
err = hipDeviceSynchronize();
|
||||
CHECK_RESULT(err, "hipDeviceSynchronize failed");
|
||||
timer.Stop();
|
||||
double sec = timer.GetElapsedTime();
|
||||
|
||||
// Buffer copy bandwidth in GB/s
|
||||
double perf = ((double)bufSize_*numIter*(double)(1e-09)) / sec;
|
||||
|
||||
const char *strSrc = NULL;
|
||||
const char *strDst = NULL;
|
||||
if (hostMalloc[0])
|
||||
strSrc = "hHM";
|
||||
else if (hostRegister[0])
|
||||
strSrc = "hHR";
|
||||
else if (unpinnedMalloc[0])
|
||||
strSrc = "unp";
|
||||
else
|
||||
strSrc = "hM";
|
||||
|
||||
if (hostMalloc[1])
|
||||
strDst = "hHM";
|
||||
else if (hostRegister[1])
|
||||
strDst = "hHR";
|
||||
else if (unpinnedMalloc[1])
|
||||
strDst = "unp";
|
||||
else
|
||||
strDst = "hM";
|
||||
// Double results when src and dst are both on device
|
||||
if ((!hostMalloc[0] && !hostRegister[0] && !unpinnedMalloc[0]) &&
|
||||
(!hostMalloc[1] && !hostRegister[1] && !unpinnedMalloc[1]))
|
||||
perf *= 2.0;
|
||||
// Double results when src and dst are both in sysmem
|
||||
if ((hostMalloc[0] || hostRegister[0] || unpinnedMalloc[0]) &&
|
||||
(hostMalloc[1] || hostRegister[1] || unpinnedMalloc[1]))
|
||||
perf *= 2.0;
|
||||
|
||||
char buf[256];
|
||||
SNPRINTF(buf, sizeof(buf), "HIPPerfBufferCopySpeed[%d]\t(%8d bytes)\ts:%s d:%s\ti:%4d\t(GB/s) perf\t%f",
|
||||
test, bufSize_, strSrc, strDst, numIter, (float)perf);
|
||||
printf("%s\n", buf);
|
||||
|
||||
// Verification
|
||||
void* temp = malloc(bufSize_ + 4096);
|
||||
void* chkBuf = (void*)(((size_t)temp + 4095) & ~4095);
|
||||
err = hipMemcpy(chkBuf, dstBuffer, bufSize_, hipMemcpyDefault);
|
||||
CHECK_RESULT(err, "hipMemcpy failed");
|
||||
checkData(chkBuf, bufSize_, 0xd0);
|
||||
free(temp);
|
||||
|
||||
//Free src
|
||||
if (hostMalloc[0])
|
||||
{
|
||||
hipHostFree(srcBuffer);
|
||||
}
|
||||
else if (hostRegister[0])
|
||||
{
|
||||
hipHostUnregister(srcBuffer);
|
||||
free(memptr[0]);
|
||||
}
|
||||
else if (unpinnedMalloc[0])
|
||||
{
|
||||
free(memptr[0]);
|
||||
}
|
||||
else
|
||||
{
|
||||
hipFree(srcBuffer);
|
||||
}
|
||||
|
||||
//Free dst
|
||||
if (hostMalloc[1])
|
||||
{
|
||||
hipHostFree(dstBuffer);
|
||||
}
|
||||
else if (hostRegister[1])
|
||||
{
|
||||
hipHostUnregister(dstBuffer);
|
||||
free(memptr[1]);
|
||||
}
|
||||
else if (unpinnedMalloc[1])
|
||||
{
|
||||
free(memptr[1]);
|
||||
}
|
||||
else
|
||||
{
|
||||
hipFree(dstBuffer);
|
||||
}
|
||||
}
|
||||
|
||||
passed();
|
||||
}
|
||||
@@ -0,0 +1,210 @@
|
||||
#include <stdio.h>
|
||||
#include <assert.h>
|
||||
#include <string.h>
|
||||
#include <complex>
|
||||
|
||||
#include "timer.h"
|
||||
#include "test_common.h"
|
||||
|
||||
/* HIT_START
|
||||
* BUILD: %t %s ../../src/test_common.cpp timer.cpp EXCLUDE_HIP_PLATFORM nvcc
|
||||
* TEST: %t
|
||||
* HIT_END
|
||||
*/
|
||||
|
||||
// Quiet pesky warnings
|
||||
#ifdef WIN_OS
|
||||
#define SNPRINTF sprintf_s
|
||||
#else
|
||||
#define SNPRINTF snprintf
|
||||
#endif
|
||||
|
||||
#define CHAR_BUF_SIZE 512
|
||||
|
||||
#define CHECK_RESULT(test, msg) \
|
||||
if ((test)) \
|
||||
{ \
|
||||
printf("\n%s\n", msg); \
|
||||
abort(); \
|
||||
}
|
||||
|
||||
typedef struct {
|
||||
unsigned int iterations;
|
||||
int flushEvery;
|
||||
} testStruct;
|
||||
|
||||
testStruct testList[] =
|
||||
{
|
||||
{ 1, -1},
|
||||
{ 1, -1},
|
||||
{ 10, 1},
|
||||
{ 10, -1},
|
||||
{ 100, 1},
|
||||
{ 100, 10},
|
||||
{ 100, -1},
|
||||
{ 1000, 1},
|
||||
{ 1000, 10},
|
||||
{ 1000, 100},
|
||||
{ 1000, -1},
|
||||
{ 10000, 1},
|
||||
{ 10000, 10},
|
||||
{ 10000, 100},
|
||||
{ 10000, 1000},
|
||||
{ 10000, -1},
|
||||
{ 100000, 1},
|
||||
{ 100000, 10},
|
||||
{ 100000, 100},
|
||||
{ 100000, 1000},
|
||||
{ 100000, 10000},
|
||||
{ 100000, -1},
|
||||
};
|
||||
|
||||
unsigned int mapTestList[] = {1, 1, 10, 100, 1000, 10000, 100000};
|
||||
|
||||
__global__ void _dispatchSpeed(float *outBuf)
|
||||
{
|
||||
int i = (blockIdx.x * blockDim.x + threadIdx.x);
|
||||
if (i < 0)
|
||||
outBuf[i] = 0.0f;
|
||||
};
|
||||
|
||||
|
||||
int main(int argc, char* argv[]) {
|
||||
HipTest::parseStandardArguments(argc, argv, true);
|
||||
|
||||
hipError_t err = hipSuccess;
|
||||
hipDeviceProp_t props = {0};
|
||||
hipGetDeviceProperties(&props, p_gpuDevice);
|
||||
CHECK_RESULT(err != hipSuccess, "hipGetDeviceProperties failed" );
|
||||
printf("Set device to %d : %s\n", p_gpuDevice, props.name);
|
||||
|
||||
unsigned int testListSize = sizeof(testList) / sizeof(testStruct);
|
||||
int numTests = (p_tests == -1) ? (2*2*testListSize - 1) : p_tests;
|
||||
int test = (p_tests == -1) ? 0 : p_tests;
|
||||
|
||||
float* srcBuffer = NULL;
|
||||
unsigned int bufSize_ = 64*sizeof(float);
|
||||
err = hipMalloc(&srcBuffer, bufSize_);
|
||||
CHECK_RESULT(err != hipSuccess, "hipMalloc failed");
|
||||
|
||||
for(;test <= numTests; test++)
|
||||
{
|
||||
int openTest = test % testListSize;
|
||||
bool sleep = false;
|
||||
bool doWarmup = false;
|
||||
|
||||
if ((test / testListSize) % 2)
|
||||
{
|
||||
doWarmup = true;
|
||||
}
|
||||
if (test >= (testListSize * 2))
|
||||
{
|
||||
sleep = true;
|
||||
}
|
||||
|
||||
int threads = (bufSize_ / sizeof(float));
|
||||
int threads_per_block = 64;
|
||||
int blocks = (threads/threads_per_block) + (threads % threads_per_block);
|
||||
hipEvent_t start, stop;
|
||||
|
||||
// NULL stream check:
|
||||
err = hipEventCreate(&start);
|
||||
err = hipEventCreate(&stop);
|
||||
|
||||
CHECK_RESULT(err != hipSuccess, "hipEventCreate failed");
|
||||
|
||||
if (doWarmup)
|
||||
{
|
||||
hipLaunchKernelGGL(_dispatchSpeed, dim3(blocks), dim3(threads_per_block), 0, hipStream_t(0), srcBuffer);
|
||||
err = hipDeviceSynchronize();
|
||||
CHECK_RESULT(err != hipSuccess, "hipDeviceSynchronize failed");
|
||||
}
|
||||
|
||||
CPerfCounter timer;
|
||||
|
||||
timer.Reset();
|
||||
timer.Start();
|
||||
for (unsigned int i = 0; i < testList[openTest].iterations; i++)
|
||||
{
|
||||
hipEventRecord(start, NULL);
|
||||
hipLaunchKernelGGL(_dispatchSpeed, dim3(blocks), dim3(threads_per_block), 0, hipStream_t(0), srcBuffer);
|
||||
hipEventRecord(stop, NULL);
|
||||
|
||||
if ((testList[openTest].flushEvery > 0) &&
|
||||
(((i + 1) % testList[openTest].flushEvery) == 0))
|
||||
{
|
||||
if (sleep)
|
||||
{
|
||||
err = hipDeviceSynchronize();
|
||||
CHECK_RESULT(err != hipSuccess, "hipDeviceSynchronize failed");
|
||||
}
|
||||
else
|
||||
{
|
||||
do {
|
||||
err = hipEventQuery(stop);
|
||||
} while (err == hipErrorNotReady);
|
||||
}
|
||||
}
|
||||
}
|
||||
if (sleep)
|
||||
{
|
||||
err = hipDeviceSynchronize();
|
||||
CHECK_RESULT(err != hipSuccess, "hipDeviceSynchronize failed");
|
||||
}
|
||||
else
|
||||
{
|
||||
do {
|
||||
err = hipEventQuery(stop);
|
||||
} while (err == hipErrorNotReady);
|
||||
}
|
||||
timer.Stop();
|
||||
|
||||
hipEventDestroy(start);
|
||||
hipEventDestroy(stop);
|
||||
double sec = timer.GetElapsedTime();
|
||||
|
||||
// microseconds per launch
|
||||
double perf = (1000000.f*sec/testList[openTest].iterations);
|
||||
const char *waitType;
|
||||
const char *extraChar;
|
||||
const char *n;
|
||||
const char *warmup;
|
||||
if (sleep)
|
||||
{
|
||||
waitType = "sleep";
|
||||
extraChar = "";
|
||||
n = "";
|
||||
}
|
||||
else
|
||||
{
|
||||
waitType = "spin";
|
||||
n = "n";
|
||||
extraChar = " ";
|
||||
}
|
||||
if (doWarmup)
|
||||
{
|
||||
warmup = "warmup";
|
||||
}
|
||||
else
|
||||
{
|
||||
warmup = "";
|
||||
}
|
||||
|
||||
|
||||
char buf[256];
|
||||
if (testList[openTest].flushEvery > 0)
|
||||
{
|
||||
SNPRINTF(buf, sizeof(buf), "HIPPerfDispatchSpeed[%3d] %7d dispatches %s%sing every %5d %6s (us/disp) %3f", test, testList[openTest].iterations,
|
||||
waitType, n, testList[openTest].flushEvery, warmup, (float)perf);
|
||||
}
|
||||
else
|
||||
{
|
||||
SNPRINTF(buf, sizeof(buf), "HIPPerfDispatchSpeed[%3d] %7d dispatches (%s%s) %6s (us/disp) %3f", test, testList[openTest].iterations,
|
||||
waitType, extraChar, warmup, (float)perf);
|
||||
}
|
||||
printf("%s\n", buf);
|
||||
}
|
||||
|
||||
hipFree(srcBuffer);
|
||||
passed();
|
||||
}
|
||||
@@ -0,0 +1,116 @@
|
||||
#include "timer.h"
|
||||
|
||||
#include <stdlib.h>
|
||||
|
||||
#ifdef _WIN32
|
||||
#define WIN32_LEAN_AND_MEAN
|
||||
#define VC_EXTRALEAN
|
||||
#include <windows.h>
|
||||
#pragma comment(lib, "user32")
|
||||
#endif
|
||||
|
||||
#ifdef __linux__
|
||||
#include <time.h>
|
||||
#define NANOSECONDS_PER_SEC 1000000000
|
||||
#endif
|
||||
|
||||
CPerfCounter::CPerfCounter() : _clocks(0), _start(0)
|
||||
{
|
||||
|
||||
#ifdef _WIN32
|
||||
|
||||
QueryPerformanceFrequency((LARGE_INTEGER *)&_freq);
|
||||
|
||||
#endif
|
||||
|
||||
#ifdef __linux__
|
||||
_freq = NANOSECONDS_PER_SEC;
|
||||
#endif
|
||||
|
||||
}
|
||||
|
||||
CPerfCounter::~CPerfCounter()
|
||||
{
|
||||
// EMPTY!
|
||||
}
|
||||
|
||||
void
|
||||
CPerfCounter::Start(void)
|
||||
{
|
||||
|
||||
#ifdef _WIN32
|
||||
|
||||
if( _start )
|
||||
{
|
||||
MessageBox(NULL, "Bad Perf Counter Start", "Error", MB_OK);
|
||||
exit(0);
|
||||
}
|
||||
QueryPerformanceCounter((LARGE_INTEGER *)&_start);
|
||||
|
||||
#endif
|
||||
#ifdef __linux__
|
||||
|
||||
struct timespec s;
|
||||
clock_gettime(CLOCK_MONOTONIC, &s);
|
||||
_start = (i64)s.tv_sec * NANOSECONDS_PER_SEC + (i64)s.tv_nsec ;
|
||||
|
||||
#endif
|
||||
|
||||
}
|
||||
|
||||
void
|
||||
CPerfCounter::Stop(void)
|
||||
{
|
||||
i64 n;
|
||||
|
||||
#ifdef _WIN32
|
||||
|
||||
if( !_start )
|
||||
{
|
||||
MessageBox(NULL, "Bad Perf Counter Stop", "Error", MB_OK);
|
||||
exit(0);
|
||||
}
|
||||
|
||||
QueryPerformanceCounter((LARGE_INTEGER *)&n);
|
||||
|
||||
#endif
|
||||
#ifdef __linux__
|
||||
|
||||
struct timespec s;
|
||||
clock_gettime(CLOCK_MONOTONIC, &s);
|
||||
n = (i64)s.tv_sec * NANOSECONDS_PER_SEC + (i64)s.tv_nsec ;
|
||||
|
||||
#endif
|
||||
|
||||
n -= _start;
|
||||
_start = 0;
|
||||
_clocks += n;
|
||||
}
|
||||
|
||||
void
|
||||
CPerfCounter::Reset(void)
|
||||
{
|
||||
|
||||
#ifdef _WIN32
|
||||
if( _start )
|
||||
{
|
||||
MessageBox(NULL, "Bad Perf Counter Reset", "Error", MB_OK);
|
||||
exit(0);
|
||||
}
|
||||
#endif
|
||||
_clocks = 0;
|
||||
}
|
||||
|
||||
double
|
||||
CPerfCounter::GetElapsedTime(void)
|
||||
{
|
||||
#ifdef _WIN32
|
||||
if( _start ) {
|
||||
MessageBox(NULL, "Trying to get time while still running.", "Error", MB_OK);
|
||||
exit(0);
|
||||
}
|
||||
#endif
|
||||
|
||||
return (double)_clocks / (double)_freq;
|
||||
|
||||
}
|
||||
@@ -0,0 +1,28 @@
|
||||
#ifndef _TIMER_H_
|
||||
#define _TIMER_H_
|
||||
|
||||
#ifdef _WIN32
|
||||
typedef __int64 i64 ;
|
||||
#endif
|
||||
#ifdef __linux__
|
||||
typedef long long i64;
|
||||
#endif
|
||||
|
||||
class CPerfCounter {
|
||||
|
||||
public:
|
||||
CPerfCounter();
|
||||
~CPerfCounter();
|
||||
void Start(void);
|
||||
void Stop(void);
|
||||
void Reset(void);
|
||||
double GetElapsedTime(void);
|
||||
|
||||
private:
|
||||
|
||||
i64 _freq;
|
||||
i64 _clocks;
|
||||
i64 _start;
|
||||
};
|
||||
|
||||
#endif // _TIMER_H_
|
||||
Fai riferimento in un nuovo problema
Block a user