use codexl marker interface to mark HIP function/begin end.

- Creates markers in HIP group and they show up in CodeXL trace
- Marker text includes HIP functioin arguments
- (Add trace_helper to convert arguments to strings)
- Still need to add HIP_INIT_API for ~30 HIP functions.


[ROCm/clr commit: 54704b59dd]
This commit is contained in:
Ben Sander
2016-03-23 01:17:53 -05:00
parent acfe7cf1cc
commit f418fe4dae
5 changed files with 192 additions and 57 deletions
Binary file not shown.
+12 -1
View File
@@ -36,6 +36,10 @@ $HIP_PATH=dirname (dirname $0) unless defined $HIP_PATH; # use parent dir
$CUDA_PATH=$ENV{'CUDA_PATH'};
$CUDA_PATH='/usr/local/cuda' unless defined $CUDA_PATH;
$CODEXL_PATH = $ENV{'CODEXL_PATH'};
$CODEXL_PATH = "/opt/AMD/CodeXL" unless defined $CODEXL_PATH;
$marker_path = "$CODEXL_PATH/SDK/AMDTActivityLogger";
#---
#HIP_PLATFORM controls whether to use NVCC or HCC for compilation:
@@ -73,6 +77,10 @@ if ($HIP_PLATFORM eq "hcc") {
# Suppress linker warnings in case HCC distribution contains OpenCL/SPIR symbols
$HIPLDFLAGS .= " -Wl,--defsym=_binary_kernel_spir_end=0 -Wl,--defsym=_binary_kernel_spir_start=0 -Wl,--defsym=_binary_kernel_cl_start=0 -Wl,--defsym=_binary_kernel_cl_end=0";
$HIPLDFLAGS .= " -L$HSA_PATH/lib -lhsa-runtime64 -lhc_am";
# Add trace marker library:
$HIPLDFLAGS .= " -L$marker_path/bin/x86_64 -lAMDTActivityLogger";
# Add C++ libs for GCC.
$HIPLDFLAGS .= " -lstdc++";
$HIPLDFLAGS .= " -lm";
@@ -139,7 +147,7 @@ foreach $arg (@ARGV)
$needCXXFLAGS = 1;
}
if (($arg =~ /hip_hcc\.o$/) or ($arg =~ /hip_hcc\.cpp/)) {
if (($arg =~ /hip_hcc\.o$/) or ($arg =~ /hip_hcc\.cpp$/)) {
$needHipHcc = 0;
}
@@ -173,6 +181,9 @@ if ($needHipHcc) {
$CMD .= " -O3" ;
}
$CMD .= " -I$marker_path/include";
if ($verbose & 0x1) {
print "remake-deps:", $CMD, "\n";
}
+118 -45
View File
@@ -49,6 +49,7 @@ THE SOFTWARE.
#define INLINE static inline
//---
@@ -97,12 +98,49 @@ int HIP_DISABLE_HW_COPY_DEP = 1;
// Compile debug trace mode - this prints debug messages to stderr when env var HIP_DB is set.
// May be set to 0 to remove debug if checks - possible code size and performance difference?
#define COMPILE_DB_TRACE 1
#define COMPILE_HIP_DB 1
// Compile HIP tracing capability.
// 0x1 = print a string at function entry with arguments.
// 0x2 = prints a simple message with function name + return code when function exits.
// 0x3 = print both.
// Must be enabled at runtime with HIP_TRACE_API
#define COMPILE_HIP_TRACE_API 0x3
// Compile code that generate
#define COMPILE_TRACE_MARKER 1
// #include CPP files to produce one object file
#define ONE_OBJECT_FILE 1
// TODO, re-org header order.
extern const char *ihipErrorString(hipError_t hip_error);
// Compile support for trace markers that are displayed on CodeXL GUI at start/stop of each function boundary.
// TODO - currently we print the trace message at the beginning. if we waited, we could also include return codes, and any values returned
// through ptr-to-args (ie the pointers allocated by hipMalloc).
#ifdef COMPILE_TRACE_MARKER
#include "AMDTActivityLogger.h"
#include "hcc_detail/trace_helper.h"
#define API_TRACE(...)\
{\
std::string s = std::string(__func__) + " (" + ToString(__VA_ARGS__) + ')';\
printf ("API_TRACE=%s\n", s.c_str());\
amdtScopedMarker(s.c_str(), "HIP", NULL);\
}
#else
#define API_TRACE()
#endif
#define HIP_INIT_API(...) \
std::call_once(hip_initialized, ihipInit);\
API_TRACE(__VA_ARGS__);
// Color defs for debug messages:
#define KNRM "\x1B[0m"
#define KRED "\x1B[31m"
@@ -115,7 +153,7 @@ int HIP_DISABLE_HW_COPY_DEP = 1;
//---
//Debug flags:
//HIP_DB Debug flags:
#define DB_API 0 /* 0x01 - shortcut to enable HIP_TRACE_API on single switch */
#define DB_SYNC 1 /* 0x02 - trace synchronization pieces */
#define DB_MEM 2 /* 0x04 - trace memory allocation / deallocation */
@@ -134,7 +172,7 @@ const char *dbName [] =
KNRM "hip-copy2",
};
#if COMPILE_DB_TRACE
#if COMPILE_HIP_DB
#define tprintf(trace_level, ...) {\
if (HIP_DB & (1<<(trace_level))) {\
fprintf (stderr, " %s:", dbName[trace_level]); \
@@ -443,10 +481,11 @@ void ihipStream_t::waitCopy(ihipSignal_t *signal)
void ihipStream_t::wait(bool assertQueueEmpty)
{
if (! assertQueueEmpty) {
tprintf (DB_SYNC, "stream %p wait for queue-empty and lastCopy:#%lu...\n", this, _last_copy_signal ? _last_copy_signal->_sig_id: 0x0 );
tprintf (DB_SYNC, "stream %p wait for queue-empty..\n", this);
_av.wait();
}
if (_last_copy_signal) {
tprintf (DB_SYNC, "stream %p wait for lastCopy:#%lu...\n", this, _last_copy_signal ? _last_copy_signal->_sig_id: 0x0 );
this->waitCopy(_last_copy_signal);
}
@@ -961,8 +1000,8 @@ void ihipDevice_t::waitAllStreams()
({\
tls_lastHipError = _hip_status;\
\
if (HIP_TRACE_API) {\
fprintf(stderr, "==hip-api: %-30s ret=%2d\n", __func__, _hip_status);\
if ((COMPILE_HIP_TRACE_API & 0x2) && HIP_TRACE_API) {\
fprintf(stderr, "]]hip-api: %-30s ret=%2d\n", __func__, _hip_status);\
}\
_hip_status;\
})
@@ -1044,6 +1083,10 @@ void ihipReadEnv_I(int *var_ptr, const char *var_name1, const char *var_name2, c
//It is called with C++11 call_once, which provided thread-safety.
void ihipInit()
{
#ifdef COMPILE_TRACE_MARKER
amdtInitializeActivityLogger();
amdtScopedMarker("ihipInit", "HIP", NULL);
#endif
/*
* Environment variables
*/
@@ -1069,6 +1112,14 @@ void ihipInit()
READ_ENV_I(release, HIP_DISABLE_HW_KERNEL_DEP, 0, "Disable HW dependencies before kernel commands - instead wait for dependency on host. -1 means ignore these dependencies. (debug mode)");
READ_ENV_I(release, HIP_DISABLE_HW_COPY_DEP, 0, "Disable HW dependencies before copy commands - instead wait for dependency on host. -1 means ifnore these dependencies (debug mode)");
if (HIP_DB && !COMPILE_HIP_DB) {
fprintf (stderr, "warning: env var HIP_DB=0x%x but COMPILE_HIP_DB=0. (perhaps enable COMPILE_HIP_DB in src code before compiling?)", HIP_DB);
}
if (HIP_TRACE_API && !COMPILE_HIP_TRACE_API) {
fprintf (stderr, "warning: env var HIP_TRACE_API=0x%x but COMPILE_HIP_TRACE_API=0. (perhaps enable COMPILE_HIP_DB in src code before compiling?)", HIP_DB);
}
/*
* Build a table of valid compute devices.
@@ -1110,6 +1161,7 @@ void ihipInit()
if(!g_visible_device)
assert(deviceCnt == g_deviceCnt);
tprintf(DB_SYNC, "pid=%u %-30s\n", getpid(), "<ihipInit>");
}
@@ -1177,8 +1229,6 @@ inline hipStream_t ihipSyncAndResolveStream(hipStream_t stream)
}
// TODO - data-up to data-down:
// Called just before a kernel is launched from hipLaunchKernel.
// Allows runtime to track some information about the stream.
@@ -1229,7 +1279,7 @@ void ihipPostLaunchKernel(hipStream_t stream, hc::completion_future &kernelFutur
*/
hipError_t hipGetDevice(int *device)
{
std::call_once(hip_initialized, ihipInit);
HIP_INIT_API(device);
*device = tls_defaultDevice;
return ihipLogStatus(hipSuccess);
@@ -1242,7 +1292,7 @@ hipError_t hipGetDevice(int *device)
*/
hipError_t hipGetDeviceCount(int *count)
{
std::call_once(hip_initialized, ihipInit);
HIP_INIT_API(count);
*count = g_deviceCnt;
@@ -1331,7 +1381,7 @@ hipError_t hipDeviceGetSharedMemConfig ( hipSharedMemConfig * pConfig )
*/
hipError_t hipSetDevice(int device)
{
std::call_once(hip_initialized, ihipInit);
HIP_INIT_API(device);
if ((device < 0) || (device >= g_deviceCnt)) {
return ihipLogStatus(hipErrorInvalidDevice);
@@ -1348,10 +1398,11 @@ hipError_t hipSetDevice(int device)
*/
hipError_t hipDeviceSynchronize(void)
{
std::call_once(hip_initialized, ihipInit);
HIP_INIT_API();
ihipGetTlsDefaultDevice()->waitAllStreams(); // ignores non-blocking streams, this waits for all activity to finish.
return ihipLogStatus(hipSuccess);
}
@@ -1362,7 +1413,7 @@ hipError_t hipDeviceSynchronize(void)
*/
hipError_t hipDeviceReset(void)
{
std::call_once(hip_initialized, ihipInit);
HIP_INIT_API();
ihipDevice_t *device = ihipGetTlsDefaultDevice();
@@ -1468,7 +1519,7 @@ hipError_t hipDeviceGetAttribute(int* pi, hipDeviceAttribute_t attr, int device)
*/
hipError_t hipGetDeviceProperties(hipDeviceProp_t* props, int device)
{
std::call_once(hip_initialized, ihipInit);
HIP_INIT_API(props, device);
hipError_t e;
@@ -1496,29 +1547,28 @@ hipError_t hipGetDeviceProperties(hipDeviceProp_t* props, int device)
*/
hipError_t hipGetLastError()
{
std::call_once(hip_initialized, ihipInit);
HIP_INIT_API();
// Return last error, but then reset the state:
return tls_lastHipError;
ihipLogStatus(hipSuccess);
hipError_t e = ihipLogStatus(tls_lastHipError);
tls_lastHipError = hipSuccess;
return e;
}
//---
hipError_t hipPeakAtLastError()
{
std::call_once(hip_initialized, ihipInit);
HIP_INIT_API();
return tls_lastHipError;
ihipLogStatus(tls_lastHipError);
// peak at last error, but don't reset it.
return ihipLogStatus(tls_lastHipError);
}
//---
const char *hipGetErrorName(hipError_t hip_error)
const char *ihipErrorString(hipError_t hip_error)
{
std::call_once(hip_initialized, ihipInit);
switch (hip_error) {
case hipSuccess : return "hipSuccess";
case hipErrorMemoryAllocation : return "hipErrorMemoryAllocation";
@@ -1537,6 +1587,16 @@ const char *hipGetErrorName(hipError_t hip_error)
case hipErrorTbd : return "hipErrorTbd";
default : return "hipErrorUnknown";
};
};
//---
const char *hipGetErrorName(hipError_t hip_error)
{
HIP_INIT_API(hip_error);
return ihipErrorString(hip_error);
}
@@ -2038,7 +2098,7 @@ ihipMemsetKernel(hipStream_t stream, T * ptr, T val, size_t sizeBytes)
*/
hipError_t hipMalloc(void** ptr, size_t sizeBytes)
{
std::call_once(hip_initialized, ihipInit);
HIP_INIT_API(ptr, sizeBytes);
hipError_t hip_status = hipSuccess;
@@ -2063,7 +2123,7 @@ hipError_t hipMalloc(void** ptr, size_t sizeBytes)
hipError_t hipMallocHost(void** ptr, size_t sizeBytes)
{
std::call_once(hip_initialized, ihipInit);
HIP_INIT_API(ptr, sizeBytes);
hipError_t hip_status = hipSuccess;
@@ -2085,8 +2145,9 @@ hipError_t hipMallocHost(void** ptr, size_t sizeBytes)
}
hipError_t hipHostMalloc(void** ptr, size_t sizeBytes, unsigned int flags){
std::call_once(hip_initialized, ihipInit);
hipError_t hipHostMalloc(void** ptr, size_t sizeBytes, unsigned int flags)
{
HIP_INIT_API(ptr, sizeBytes, flags);
hipError_t hip_status = hipSuccess;
@@ -2123,8 +2184,9 @@ hipError_t hipHostAlloc(void** ptr, size_t sizeBytes, unsigned int flags)
hipError_t hipHostGetDevicePointer(void** devPtr, void* hstPtr, size_t size){
std::call_once(hip_initialized, ihipInit);
hipError_t hipHostGetDevicePointer(void** devPtr, void* hstPtr, size_t size)
{
HIP_INIT_API(devPtr, hstPtr, size);
hipError_t hip_status = hipSuccess;
@@ -2148,7 +2210,8 @@ hipError_t hipHostGetDevicePointer(void** devPtr, void* hstPtr, size_t size){
hipError_t hipHostGetFlags(unsigned int* flagsPtr, void* hostPtr)
{
std::call_once(hip_initialized, ihipInit);
HIP_INIT_API(*flagsPtr, hostPtr);
hipError_t hip_status = hipSuccess;
hc::accelerator acc;
@@ -2171,7 +2234,8 @@ hipError_t hipHostGetFlags(unsigned int* flagsPtr, void* hostPtr)
hipError_t hipHostRegister(void *hostPtr, size_t sizeBytes, unsigned int flags)
{
std::call_once(hip_initialized, ihipInit);
HIP_INIT_API(hostPtr, sizeBytes, flags);
hipError_t hip_status = hipSuccess;
auto device = ihipGetTlsDefaultDevice();
@@ -2199,8 +2263,10 @@ hipError_t hipHostRegister(void *hostPtr, size_t sizeBytes, unsigned int flags)
return ihipLogStatus(hip_status);
}
hipError_t hipHostUnregister(void *hostPtr){
std::call_once(hip_initialized, ihipInit);
hipError_t hipHostUnregister(void *hostPtr)
{
HIP_INIT_API(hostPtr);
hipError_t hip_status = hipSuccess;
if(hostPtr == NULL){
hip_status = hipErrorInvalidValue;
@@ -2218,7 +2284,7 @@ hipError_t hipHostUnregister(void *hostPtr){
//---
hipError_t hipMemcpyToSymbol(const char* symbolName, const void *src, size_t count, size_t offset, hipMemcpyKind kind)
{
std::call_once(hip_initialized, ihipInit);
HIP_INIT_API(symbolName, src, count, offset, kind);
#ifdef USE_MEMCPYTOSYMBOL
if(kind != hipMemcpyHostToDevice)
@@ -2444,7 +2510,7 @@ void ihipStream_t::copyAsync(void* dst, const void* src, size_t sizeBytes, hipMe
//---
hipError_t hipMemcpy(void* dst, const void* src, size_t sizeBytes, hipMemcpyKind kind)
{
std::call_once(hip_initialized, ihipInit);
HIP_INIT_API(dst, src, sizeBytes, kind);
hipStream_t stream = ihipSyncAndResolveStream(hipStreamNull);
@@ -2478,7 +2544,7 @@ hipError_t hipMemcpy(void* dst, const void* src, size_t sizeBytes, hipMemcpyKind
//---
hipError_t hipMemcpyAsync(void* dst, const void* src, size_t sizeBytes, hipMemcpyKind kind, hipStream_t stream)
{
std::call_once(hip_initialized, ihipInit);
HIP_INIT_API(dst, src, sizeBytes, kind, stream);
hipError_t e = hipSuccess;
@@ -2507,7 +2573,7 @@ hipError_t hipMemcpyAsync(void* dst, const void* src, size_t sizeBytes, hipMemcp
*/
hipError_t hipMemsetAsync(void* dst, int value, size_t sizeBytes, hipStream_t stream )
{
std::call_once(hip_initialized, ihipInit);
HIP_INIT_API(dst, value, sizeBytes, stream);
hipError_t e = hipSuccess;
@@ -2557,7 +2623,7 @@ hipError_t hipMemsetAsync(void* dst, int value, size_t sizeBytes, hipStream_t s
hipError_t hipMemset(void* dst, int value, size_t sizeBytes )
{
std::call_once(hip_initialized, ihipInit);
HIP_INIT_API(dst, value, sizeBytes);
// TODO - call an ihip memset so HIP_TRACE is correct.
return hipMemsetAsync(dst, value, sizeBytes, hipStreamNull);
@@ -2570,7 +2636,7 @@ hipError_t hipMemset(void* dst, int value, size_t sizeBytes )
*/
hipError_t hipMemGetInfo (size_t *free, size_t *total)
{
std::call_once(hip_initialized, ihipInit);
HIP_INIT_API(free, total);
hipError_t e = hipSuccess;
@@ -2600,9 +2666,9 @@ hipError_t hipMemGetInfo (size_t *free, size_t *total)
//---
hipError_t hipFree(void* ptr)
{
// TODO - ensure this pointer was created by hipMalloc and not hipMallocHost
std::call_once(hip_initialized, ihipInit);
HIP_INIT_API(ptr);
// TODO - ensure this pointer was created by hipMalloc and not hipMallocHost
// Synchronize to ensure all work has finished.
ihipGetTlsDefaultDevice()->waitAllStreams(); // ignores non-blocking streams, this waits for all activity to finish.
@@ -2617,6 +2683,8 @@ hipError_t hipFree(void* ptr)
hipError_t hipHostFree(void* ptr)
{
HIP_INIT_API(ptr);
// TODO - ensure this pointer was created by hipMallocHost and not hipMalloc
std::call_once(hip_initialized, ihipInit);
@@ -2643,7 +2711,8 @@ hipError_t hipFreeHost(void* ptr)
//---
hipError_t hipDeviceCanAccessPeer ( int* canAccessPeer, int device, int peerDevice )
{
std::call_once(hip_initialized, ihipInit);
HIP_INIT_API(canAccessPeer, device, peerDevice);
*canAccessPeer = false;
return ihipLogStatus(hipSuccess);
}
@@ -2655,7 +2724,8 @@ hipError_t hipDeviceCanAccessPeer ( int* canAccessPeer, int device, int peerDe
//---
hipError_t hipDeviceDisablePeerAccess ( int peerDevice )
{
std::call_once(hip_initialized, ihipInit);
HIP_INIT_API(peerDevice);
// TODO-p2p
return ihipLogStatus(hipSuccess);
};
@@ -2701,7 +2771,10 @@ hipError_t hipMemcpyPeerAsync ( void* dst, int dstDevice, const void* src, int
hipError_t hipDriverGetVersion(int *driverVersion)
{
std::call_once(hip_initialized, ihipInit);
*driverVersion = 4;
if (driverVersion) {
*driverVersion = 4;
}
return ihipLogStatus(hipSuccess);
}
+15 -3
View File
@@ -19,11 +19,18 @@ MESSAGE ("HIP_PATH=" ${HIP_PATH})
if (${HIP_PLATFORM} STREQUAL "hcc")
MESSAGE ("HIP_PLATFORM=hcc")
set (HSA_PATH $ENV{HSA_PATH})
if (NOT DEFINED HSA_PATH)
set (HSA_PATH /opt/hsa)
endif()
set (CODEXL_PATH $ENV{CODEXL_PATH})
if (NOT DEFINED CODEXL_PATH)
set (CODEXL_PATH /opt/AMD/CodeXL)
endif()
set (CODEXL_SDK_ATAL_PATH ${CODEXL_PATH}/SDK/AMDTActivityLogger)
#---
# Add HSA library:
add_library(hsa-runtime64 SHARED IMPORTED)
@@ -37,7 +44,7 @@ if (${HIP_PLATFORM} STREQUAL "hcc")
# hip_hcc.o:
#add_library(hip_hcc STATIC ${HIP_PATH}/src/hip_hcc.cpp )
add_library(hip_hcc STATIC ${HIP_PATH}/src/hip_hcc.cpp ${HIP_PATH}/src/staging_buffer.cpp)
target_include_directories(hip_hcc PRIVATE ${HSA_PATH}/include)
target_include_directories(hip_hcc PRIVATE ${HSA_PATH}/include ${CODEXL_SDK_ATAL_PATH}/include)
elseif (${HIP_PLATFORM} STREQUAL "nvcc")
@@ -132,7 +139,8 @@ make_hip_executable (hipSimpleAtomicsTest hipSimpleAtomicsTest.cpp)
make_hip_executable (hipMathFunctionsHost hipMathFunctions.cpp hipSinglePrecisionMathHost.cpp hipDoublePrecisionMathHost.cpp)
make_hip_executable (hipMathFunctionsDevice hipMathFunctions.cpp hipSinglePrecisionMathDevice.cpp hipDoublePrecisionMathDevice.cpp)
make_hip_executable (hipIntrinsics hipMathFunctions.cpp hipSinglePrecisionIntrinsics.cpp hipDoublePrecisionIntrinsics.cpp hipIntegerIntrinsics.cpp)
make_hip_executable (hipPointerAttrib hipPointerAttrib.cpp)
#TODO - re-enable. This uses the pointer add feature.
#make_hip_executable (hipPointerAttrib hipPointerAttrib.cpp)
make_hip_executable (hipMultiThreadStreams1 hipMultiThreadStreams1.cpp)
make_hip_executable (hipMultiThreadStreams2 hipMultiThreadStreams2.cpp)
make_hip_executable (hipHostAlloc hipHostAlloc.cpp)
@@ -153,7 +161,8 @@ make_test(hipMemset --N 10013 --memsetval 0x5a ) # oddball size.
make_test(hipMemset --N 256M --memsetval 0xa6 ) # big copy
make_test(hipGridLaunch " " )
make_test(hipEnvVarDriver " " )
make_test(hipPointerAttrib " " )
#TODO -reenable
#make_test(hipPointerAttrib " " )
#make_test(hipMultiThreadStreams1 " " )
#make_test(hipMultiThreadStreams2 " " )
make_test(hipMemcpy_simple " " )
@@ -161,6 +170,9 @@ make_named_test(hipMemcpy "hipMemcpy-modes" --tests 0x1 )
make_named_test(hipMemcpy "hipMemcpy-size" --tests 0x6 )
make_named_test(hipMemcpy "hipMemcpy-multithreaded" --tests 0x8 )
# Debug synchronization, then enable.
#make_named_test(hipMemcpy_simple "hipMemcpyAsync-simple" --async)
make_test(hipHostAlloc " ")
make_test(hipMemcpyAsync " " )
make_test(hipHostGetFlags " ")
@@ -22,6 +22,18 @@ THE SOFTWARE.
#include "hip_runtime.h"
#include "test_common.h"
bool p_async = false;
// ****************************************************************************
hipError_t memcopy(void * dst, const void *src, size_t sizeBytes, enum hipMemcpyKind kind)
{
if (p_async) {
return hipMemcpyAsync(dst, src, sizeBytes, kind, NULL);
} else {
return hipMemcpy(dst, src, sizeBytes, kind);
}
}
//---
// Test simple H2D copies and back.
@@ -40,12 +52,12 @@ void simpleTest1()
printf ("A_d=%p B_d=%p C_d=%p A_h=%p B_h=%p C_h=%p\n", A_d, B_d, C_d, A_h, B_d, C_h);
unsigned blocks = HipTest::setNumBlocks(blocksPerCU, threadsPerBlock, N);
HIPCHECK ( hipMemcpy(A_d, A_h, Nbytes, hipMemcpyHostToDevice));
HIPCHECK ( hipMemcpy(B_d, B_h, Nbytes, hipMemcpyHostToDevice));
HIPCHECK ( memcopy(A_d, A_h, Nbytes, hipMemcpyHostToDevice));
HIPCHECK ( memcopy(B_d, B_h, Nbytes, hipMemcpyHostToDevice));
hipLaunchKernel(HipTest::vectorADD, dim3(blocks), dim3(threadsPerBlock), 0, 0, A_d, B_d, C_d, N);
HIPCHECK ( hipMemcpy(C_h, C_d, Nbytes, hipMemcpyDeviceToHost));
HIPCHECK ( memcopy(C_h, C_d, Nbytes, hipMemcpyDeviceToHost));
HIPCHECK (hipDeviceSynchronize());
@@ -86,9 +98,9 @@ void simpleTest2(size_t numElements, bool usePinnedHost)
A_h2[i] = 12345678.0 + i; // init output with something distincctive, to ensure we replace it.
}
HIPCHECK(hipMemcpy(A_d, A_h1, sizeElements, hipMemcpyHostToDevice));
HIPCHECK(memcopy(A_d, A_h1, sizeElements, hipMemcpyHostToDevice));
HIPCHECK(hipDeviceSynchronize());
HIPCHECK(hipMemcpy(A_h2, A_d, sizeElements, hipMemcpyDeviceToHost));
HIPCHECK(memcopy(A_h2, A_d, sizeElements, hipMemcpyDeviceToHost));
HIPCHECK(hipDeviceSynchronize());
for (size_t i=0; i<numElements; i++) {
@@ -104,9 +116,30 @@ void simpleTest2(size_t numElements, bool usePinnedHost)
free(A_h2);
}
}
//Parse arguments specific to this test.
void parseMyArguments(int argc, char *argv[])
{
int more_argc = HipTest::parseStandardArguments(argc, argv, false);
// parse args for this test:
for (int i = 1; i < more_argc; i++) {
const char *arg = argv[i];
if (!strcmp(arg, "--async")) {
p_async = true;
} else {
failed("Bad argument '%s'", arg);
}
}
};
int main(int argc, char *argv[])
{
HipTest::parseStandardArguments(argc, argv, true);
parseMyArguments(argc, argv);
printf ("info: set device to %d, tests=%x\n", p_gpuDevice, p_tests);
HIPCHECK(hipSetDevice(p_gpuDevice));
@@ -120,16 +153,22 @@ int main(int argc, char *argv[])
}
if (p_tests & 0x2) {
printf ("\n\n=== tests&2 (copy pin-pong, pinned host)\n");
printf ("\n\n=== tests&2 (copy ping-pong, pinned host)\n");
simpleTest2<float>(N, true/*usePinnedHost*/);
simpleTest2<char>(N, true/*usePinnedHost*/);
}
if (p_tests & 0x4) {
printf ("\n\n=== tests&2 (copy pin-pong, unpinned host)\n");
printf ("\n\n=== tests&4 (copy ping-pong, unpinned host)\n");
simpleTest2<char>(N, false/*usePinnedHost*/);
simpleTest2<float>(N, false/*usePinnedHost*/);
}
hipDeviceSynchronize();
hipDeviceReset();
int v;
hipDriverGetVersion(&v);
passed();
};