diff --git a/projects/clr/hipamd/bin/hipBusBandwidth b/projects/clr/hipamd/bin/hipBusBandwidth new file mode 100755 index 0000000000..6745f81039 Binary files /dev/null and b/projects/clr/hipamd/bin/hipBusBandwidth differ diff --git a/projects/clr/hipamd/bin/hipcc b/projects/clr/hipamd/bin/hipcc index 042b6e1f74..fbb8aa9775 100755 --- a/projects/clr/hipamd/bin/hipcc +++ b/projects/clr/hipamd/bin/hipcc @@ -36,6 +36,10 @@ $HIP_PATH=dirname (dirname $0) unless defined $HIP_PATH; # use parent dir $CUDA_PATH=$ENV{'CUDA_PATH'}; $CUDA_PATH='/usr/local/cuda' unless defined $CUDA_PATH; +$CODEXL_PATH = $ENV{'CODEXL_PATH'}; +$CODEXL_PATH = "/opt/AMD/CodeXL" unless defined $CODEXL_PATH; +$marker_path = "$CODEXL_PATH/SDK/AMDTActivityLogger"; + #--- #HIP_PLATFORM controls whether to use NVCC or HCC for compilation: @@ -73,6 +77,10 @@ if ($HIP_PLATFORM eq "hcc") { # Suppress linker warnings in case HCC distribution contains OpenCL/SPIR symbols $HIPLDFLAGS .= " -Wl,--defsym=_binary_kernel_spir_end=0 -Wl,--defsym=_binary_kernel_spir_start=0 -Wl,--defsym=_binary_kernel_cl_start=0 -Wl,--defsym=_binary_kernel_cl_end=0"; $HIPLDFLAGS .= " -L$HSA_PATH/lib -lhsa-runtime64 -lhc_am"; + + # Add trace marker library: + $HIPLDFLAGS .= " -L$marker_path/bin/x86_64 -lAMDTActivityLogger"; + # Add C++ libs for GCC. $HIPLDFLAGS .= " -lstdc++"; $HIPLDFLAGS .= " -lm"; @@ -139,7 +147,7 @@ foreach $arg (@ARGV) $needCXXFLAGS = 1; } - if (($arg =~ /hip_hcc\.o$/) or ($arg =~ /hip_hcc\.cpp/)) { + if (($arg =~ /hip_hcc\.o$/) or ($arg =~ /hip_hcc\.cpp$/)) { $needHipHcc = 0; } @@ -173,6 +181,9 @@ if ($needHipHcc) { $CMD .= " -O3" ; } + $CMD .= " -I$marker_path/include"; + + if ($verbose & 0x1) { print "remake-deps:", $CMD, "\n"; } diff --git a/projects/clr/hipamd/src/hip_hcc.cpp b/projects/clr/hipamd/src/hip_hcc.cpp index 7aa7ae4482..9e61a29d86 100644 --- a/projects/clr/hipamd/src/hip_hcc.cpp +++ b/projects/clr/hipamd/src/hip_hcc.cpp @@ -49,6 +49,7 @@ THE SOFTWARE. + #define INLINE static inline //--- @@ -97,12 +98,49 @@ int HIP_DISABLE_HW_COPY_DEP = 1; // Compile debug trace mode - this prints debug messages to stderr when env var HIP_DB is set. // May be set to 0 to remove debug if checks - possible code size and performance difference? -#define COMPILE_DB_TRACE 1 +#define COMPILE_HIP_DB 1 + + +// Compile HIP tracing capability. +// 0x1 = print a string at function entry with arguments. +// 0x2 = prints a simple message with function name + return code when function exits. +// 0x3 = print both. +// Must be enabled at runtime with HIP_TRACE_API +#define COMPILE_HIP_TRACE_API 0x3 + + +// Compile code that generate +#define COMPILE_TRACE_MARKER 1 // #include CPP files to produce one object file #define ONE_OBJECT_FILE 1 + +// TODO, re-org header order. +extern const char *ihipErrorString(hipError_t hip_error); + +// Compile support for trace markers that are displayed on CodeXL GUI at start/stop of each function boundary. +// TODO - currently we print the trace message at the beginning. if we waited, we could also include return codes, and any values returned +// through ptr-to-args (ie the pointers allocated by hipMalloc). +#ifdef COMPILE_TRACE_MARKER +#include "AMDTActivityLogger.h" +#include "hcc_detail/trace_helper.h" +#define API_TRACE(...)\ +{\ + std::string s = std::string(__func__) + " (" + ToString(__VA_ARGS__) + ')';\ + printf ("API_TRACE=%s\n", s.c_str());\ + amdtScopedMarker(s.c_str(), "HIP", NULL);\ +} +#else +#define API_TRACE() +#endif + +#define HIP_INIT_API(...) \ + std::call_once(hip_initialized, ihipInit);\ + API_TRACE(__VA_ARGS__); + + // Color defs for debug messages: #define KNRM "\x1B[0m" #define KRED "\x1B[31m" @@ -115,7 +153,7 @@ int HIP_DISABLE_HW_COPY_DEP = 1; //--- -//Debug flags: +//HIP_DB Debug flags: #define DB_API 0 /* 0x01 - shortcut to enable HIP_TRACE_API on single switch */ #define DB_SYNC 1 /* 0x02 - trace synchronization pieces */ #define DB_MEM 2 /* 0x04 - trace memory allocation / deallocation */ @@ -134,7 +172,7 @@ const char *dbName [] = KNRM "hip-copy2", }; -#if COMPILE_DB_TRACE +#if COMPILE_HIP_DB #define tprintf(trace_level, ...) {\ if (HIP_DB & (1<<(trace_level))) {\ fprintf (stderr, " %s:", dbName[trace_level]); \ @@ -443,10 +481,11 @@ void ihipStream_t::waitCopy(ihipSignal_t *signal) void ihipStream_t::wait(bool assertQueueEmpty) { if (! assertQueueEmpty) { - tprintf (DB_SYNC, "stream %p wait for queue-empty and lastCopy:#%lu...\n", this, _last_copy_signal ? _last_copy_signal->_sig_id: 0x0 ); + tprintf (DB_SYNC, "stream %p wait for queue-empty..\n", this); _av.wait(); } if (_last_copy_signal) { + tprintf (DB_SYNC, "stream %p wait for lastCopy:#%lu...\n", this, _last_copy_signal ? _last_copy_signal->_sig_id: 0x0 ); this->waitCopy(_last_copy_signal); } @@ -961,8 +1000,8 @@ void ihipDevice_t::waitAllStreams() ({\ tls_lastHipError = _hip_status;\ \ - if (HIP_TRACE_API) {\ - fprintf(stderr, "==hip-api: %-30s ret=%2d\n", __func__, _hip_status);\ + if ((COMPILE_HIP_TRACE_API & 0x2) && HIP_TRACE_API) {\ + fprintf(stderr, "]]hip-api: %-30s ret=%2d\n", __func__, _hip_status);\ }\ _hip_status;\ }) @@ -1044,6 +1083,10 @@ void ihipReadEnv_I(int *var_ptr, const char *var_name1, const char *var_name2, c //It is called with C++11 call_once, which provided thread-safety. void ihipInit() { +#ifdef COMPILE_TRACE_MARKER + amdtInitializeActivityLogger(); + amdtScopedMarker("ihipInit", "HIP", NULL); +#endif /* * Environment variables */ @@ -1069,6 +1112,14 @@ void ihipInit() READ_ENV_I(release, HIP_DISABLE_HW_KERNEL_DEP, 0, "Disable HW dependencies before kernel commands - instead wait for dependency on host. -1 means ignore these dependencies. (debug mode)"); READ_ENV_I(release, HIP_DISABLE_HW_COPY_DEP, 0, "Disable HW dependencies before copy commands - instead wait for dependency on host. -1 means ifnore these dependencies (debug mode)"); + if (HIP_DB && !COMPILE_HIP_DB) { + fprintf (stderr, "warning: env var HIP_DB=0x%x but COMPILE_HIP_DB=0. (perhaps enable COMPILE_HIP_DB in src code before compiling?)", HIP_DB); + } + + if (HIP_TRACE_API && !COMPILE_HIP_TRACE_API) { + fprintf (stderr, "warning: env var HIP_TRACE_API=0x%x but COMPILE_HIP_TRACE_API=0. (perhaps enable COMPILE_HIP_DB in src code before compiling?)", HIP_DB); + } + /* * Build a table of valid compute devices. @@ -1110,6 +1161,7 @@ void ihipInit() if(!g_visible_device) assert(deviceCnt == g_deviceCnt); + tprintf(DB_SYNC, "pid=%u %-30s\n", getpid(), ""); } @@ -1177,8 +1229,6 @@ inline hipStream_t ihipSyncAndResolveStream(hipStream_t stream) } - - // TODO - data-up to data-down: // Called just before a kernel is launched from hipLaunchKernel. // Allows runtime to track some information about the stream. @@ -1229,7 +1279,7 @@ void ihipPostLaunchKernel(hipStream_t stream, hc::completion_future &kernelFutur */ hipError_t hipGetDevice(int *device) { - std::call_once(hip_initialized, ihipInit); + HIP_INIT_API(device); *device = tls_defaultDevice; return ihipLogStatus(hipSuccess); @@ -1242,7 +1292,7 @@ hipError_t hipGetDevice(int *device) */ hipError_t hipGetDeviceCount(int *count) { - std::call_once(hip_initialized, ihipInit); + HIP_INIT_API(count); *count = g_deviceCnt; @@ -1331,7 +1381,7 @@ hipError_t hipDeviceGetSharedMemConfig ( hipSharedMemConfig * pConfig ) */ hipError_t hipSetDevice(int device) { - std::call_once(hip_initialized, ihipInit); + HIP_INIT_API(device); if ((device < 0) || (device >= g_deviceCnt)) { return ihipLogStatus(hipErrorInvalidDevice); @@ -1348,10 +1398,11 @@ hipError_t hipSetDevice(int device) */ hipError_t hipDeviceSynchronize(void) { - std::call_once(hip_initialized, ihipInit); + HIP_INIT_API(); ihipGetTlsDefaultDevice()->waitAllStreams(); // ignores non-blocking streams, this waits for all activity to finish. + return ihipLogStatus(hipSuccess); } @@ -1362,7 +1413,7 @@ hipError_t hipDeviceSynchronize(void) */ hipError_t hipDeviceReset(void) { - std::call_once(hip_initialized, ihipInit); + HIP_INIT_API(); ihipDevice_t *device = ihipGetTlsDefaultDevice(); @@ -1468,7 +1519,7 @@ hipError_t hipDeviceGetAttribute(int* pi, hipDeviceAttribute_t attr, int device) */ hipError_t hipGetDeviceProperties(hipDeviceProp_t* props, int device) { - std::call_once(hip_initialized, ihipInit); + HIP_INIT_API(props, device); hipError_t e; @@ -1496,29 +1547,28 @@ hipError_t hipGetDeviceProperties(hipDeviceProp_t* props, int device) */ hipError_t hipGetLastError() { - std::call_once(hip_initialized, ihipInit); + HIP_INIT_API(); // Return last error, but then reset the state: - return tls_lastHipError; - ihipLogStatus(hipSuccess); + hipError_t e = ihipLogStatus(tls_lastHipError); + tls_lastHipError = hipSuccess; + return e; } //--- hipError_t hipPeakAtLastError() { - std::call_once(hip_initialized, ihipInit); + HIP_INIT_API(); - return tls_lastHipError; - ihipLogStatus(tls_lastHipError); + + // peak at last error, but don't reset it. + return ihipLogStatus(tls_lastHipError); } -//--- -const char *hipGetErrorName(hipError_t hip_error) +const char *ihipErrorString(hipError_t hip_error) { - std::call_once(hip_initialized, ihipInit); - switch (hip_error) { case hipSuccess : return "hipSuccess"; case hipErrorMemoryAllocation : return "hipErrorMemoryAllocation"; @@ -1537,6 +1587,16 @@ const char *hipGetErrorName(hipError_t hip_error) case hipErrorTbd : return "hipErrorTbd"; default : return "hipErrorUnknown"; }; +}; + + + +//--- +const char *hipGetErrorName(hipError_t hip_error) +{ + HIP_INIT_API(hip_error); + + return ihipErrorString(hip_error); } @@ -2038,7 +2098,7 @@ ihipMemsetKernel(hipStream_t stream, T * ptr, T val, size_t sizeBytes) */ hipError_t hipMalloc(void** ptr, size_t sizeBytes) { - std::call_once(hip_initialized, ihipInit); + HIP_INIT_API(ptr, sizeBytes); hipError_t hip_status = hipSuccess; @@ -2063,7 +2123,7 @@ hipError_t hipMalloc(void** ptr, size_t sizeBytes) hipError_t hipMallocHost(void** ptr, size_t sizeBytes) { - std::call_once(hip_initialized, ihipInit); + HIP_INIT_API(ptr, sizeBytes); hipError_t hip_status = hipSuccess; @@ -2085,8 +2145,9 @@ hipError_t hipMallocHost(void** ptr, size_t sizeBytes) } -hipError_t hipHostMalloc(void** ptr, size_t sizeBytes, unsigned int flags){ - std::call_once(hip_initialized, ihipInit); +hipError_t hipHostMalloc(void** ptr, size_t sizeBytes, unsigned int flags) +{ + HIP_INIT_API(ptr, sizeBytes, flags); hipError_t hip_status = hipSuccess; @@ -2123,8 +2184,9 @@ hipError_t hipHostAlloc(void** ptr, size_t sizeBytes, unsigned int flags) -hipError_t hipHostGetDevicePointer(void** devPtr, void* hstPtr, size_t size){ - std::call_once(hip_initialized, ihipInit); +hipError_t hipHostGetDevicePointer(void** devPtr, void* hstPtr, size_t size) +{ + HIP_INIT_API(devPtr, hstPtr, size); hipError_t hip_status = hipSuccess; @@ -2148,7 +2210,8 @@ hipError_t hipHostGetDevicePointer(void** devPtr, void* hstPtr, size_t size){ hipError_t hipHostGetFlags(unsigned int* flagsPtr, void* hostPtr) { - std::call_once(hip_initialized, ihipInit); + HIP_INIT_API(*flagsPtr, hostPtr); + hipError_t hip_status = hipSuccess; hc::accelerator acc; @@ -2171,7 +2234,8 @@ hipError_t hipHostGetFlags(unsigned int* flagsPtr, void* hostPtr) hipError_t hipHostRegister(void *hostPtr, size_t sizeBytes, unsigned int flags) { - std::call_once(hip_initialized, ihipInit); + HIP_INIT_API(hostPtr, sizeBytes, flags); + hipError_t hip_status = hipSuccess; auto device = ihipGetTlsDefaultDevice(); @@ -2199,8 +2263,10 @@ hipError_t hipHostRegister(void *hostPtr, size_t sizeBytes, unsigned int flags) return ihipLogStatus(hip_status); } -hipError_t hipHostUnregister(void *hostPtr){ - std::call_once(hip_initialized, ihipInit); +hipError_t hipHostUnregister(void *hostPtr) +{ + HIP_INIT_API(hostPtr); + hipError_t hip_status = hipSuccess; if(hostPtr == NULL){ hip_status = hipErrorInvalidValue; @@ -2218,7 +2284,7 @@ hipError_t hipHostUnregister(void *hostPtr){ //--- hipError_t hipMemcpyToSymbol(const char* symbolName, const void *src, size_t count, size_t offset, hipMemcpyKind kind) { - std::call_once(hip_initialized, ihipInit); + HIP_INIT_API(symbolName, src, count, offset, kind); #ifdef USE_MEMCPYTOSYMBOL if(kind != hipMemcpyHostToDevice) @@ -2444,7 +2510,7 @@ void ihipStream_t::copyAsync(void* dst, const void* src, size_t sizeBytes, hipMe //--- hipError_t hipMemcpy(void* dst, const void* src, size_t sizeBytes, hipMemcpyKind kind) { - std::call_once(hip_initialized, ihipInit); + HIP_INIT_API(dst, src, sizeBytes, kind); hipStream_t stream = ihipSyncAndResolveStream(hipStreamNull); @@ -2478,7 +2544,7 @@ hipError_t hipMemcpy(void* dst, const void* src, size_t sizeBytes, hipMemcpyKind //--- hipError_t hipMemcpyAsync(void* dst, const void* src, size_t sizeBytes, hipMemcpyKind kind, hipStream_t stream) { - std::call_once(hip_initialized, ihipInit); + HIP_INIT_API(dst, src, sizeBytes, kind, stream); hipError_t e = hipSuccess; @@ -2507,7 +2573,7 @@ hipError_t hipMemcpyAsync(void* dst, const void* src, size_t sizeBytes, hipMemcp */ hipError_t hipMemsetAsync(void* dst, int value, size_t sizeBytes, hipStream_t stream ) { - std::call_once(hip_initialized, ihipInit); + HIP_INIT_API(dst, value, sizeBytes, stream); hipError_t e = hipSuccess; @@ -2557,7 +2623,7 @@ hipError_t hipMemsetAsync(void* dst, int value, size_t sizeBytes, hipStream_t s hipError_t hipMemset(void* dst, int value, size_t sizeBytes ) { - std::call_once(hip_initialized, ihipInit); + HIP_INIT_API(dst, value, sizeBytes); // TODO - call an ihip memset so HIP_TRACE is correct. return hipMemsetAsync(dst, value, sizeBytes, hipStreamNull); @@ -2570,7 +2636,7 @@ hipError_t hipMemset(void* dst, int value, size_t sizeBytes ) */ hipError_t hipMemGetInfo (size_t *free, size_t *total) { - std::call_once(hip_initialized, ihipInit); + HIP_INIT_API(free, total); hipError_t e = hipSuccess; @@ -2600,9 +2666,9 @@ hipError_t hipMemGetInfo (size_t *free, size_t *total) //--- hipError_t hipFree(void* ptr) { - // TODO - ensure this pointer was created by hipMalloc and not hipMallocHost - std::call_once(hip_initialized, ihipInit); + HIP_INIT_API(ptr); + // TODO - ensure this pointer was created by hipMalloc and not hipMallocHost // Synchronize to ensure all work has finished. ihipGetTlsDefaultDevice()->waitAllStreams(); // ignores non-blocking streams, this waits for all activity to finish. @@ -2617,6 +2683,8 @@ hipError_t hipFree(void* ptr) hipError_t hipHostFree(void* ptr) { + HIP_INIT_API(ptr); + // TODO - ensure this pointer was created by hipMallocHost and not hipMalloc std::call_once(hip_initialized, ihipInit); @@ -2643,7 +2711,8 @@ hipError_t hipFreeHost(void* ptr) //--- hipError_t hipDeviceCanAccessPeer ( int* canAccessPeer, int device, int peerDevice ) { - std::call_once(hip_initialized, ihipInit); + HIP_INIT_API(canAccessPeer, device, peerDevice); + *canAccessPeer = false; return ihipLogStatus(hipSuccess); } @@ -2655,7 +2724,8 @@ hipError_t hipDeviceCanAccessPeer ( int* canAccessPeer, int device, int peerDe //--- hipError_t hipDeviceDisablePeerAccess ( int peerDevice ) { - std::call_once(hip_initialized, ihipInit); + HIP_INIT_API(peerDevice); + // TODO-p2p return ihipLogStatus(hipSuccess); }; @@ -2701,7 +2771,10 @@ hipError_t hipMemcpyPeerAsync ( void* dst, int dstDevice, const void* src, int hipError_t hipDriverGetVersion(int *driverVersion) { std::call_once(hip_initialized, ihipInit); - *driverVersion = 4; + if (driverVersion) { + *driverVersion = 4; + } + return ihipLogStatus(hipSuccess); } diff --git a/projects/clr/hipamd/tests/src/CMakeLists.txt b/projects/clr/hipamd/tests/src/CMakeLists.txt index 84b1dfee0b..4f63ac33a5 100644 --- a/projects/clr/hipamd/tests/src/CMakeLists.txt +++ b/projects/clr/hipamd/tests/src/CMakeLists.txt @@ -19,11 +19,18 @@ MESSAGE ("HIP_PATH=" ${HIP_PATH}) if (${HIP_PLATFORM} STREQUAL "hcc") MESSAGE ("HIP_PLATFORM=hcc") + set (HSA_PATH $ENV{HSA_PATH}) if (NOT DEFINED HSA_PATH) set (HSA_PATH /opt/hsa) endif() + set (CODEXL_PATH $ENV{CODEXL_PATH}) + if (NOT DEFINED CODEXL_PATH) + set (CODEXL_PATH /opt/AMD/CodeXL) + endif() + set (CODEXL_SDK_ATAL_PATH ${CODEXL_PATH}/SDK/AMDTActivityLogger) + #--- # Add HSA library: add_library(hsa-runtime64 SHARED IMPORTED) @@ -37,7 +44,7 @@ if (${HIP_PLATFORM} STREQUAL "hcc") # hip_hcc.o: #add_library(hip_hcc STATIC ${HIP_PATH}/src/hip_hcc.cpp ) add_library(hip_hcc STATIC ${HIP_PATH}/src/hip_hcc.cpp ${HIP_PATH}/src/staging_buffer.cpp) - target_include_directories(hip_hcc PRIVATE ${HSA_PATH}/include) + target_include_directories(hip_hcc PRIVATE ${HSA_PATH}/include ${CODEXL_SDK_ATAL_PATH}/include) elseif (${HIP_PLATFORM} STREQUAL "nvcc") @@ -132,7 +139,8 @@ make_hip_executable (hipSimpleAtomicsTest hipSimpleAtomicsTest.cpp) make_hip_executable (hipMathFunctionsHost hipMathFunctions.cpp hipSinglePrecisionMathHost.cpp hipDoublePrecisionMathHost.cpp) make_hip_executable (hipMathFunctionsDevice hipMathFunctions.cpp hipSinglePrecisionMathDevice.cpp hipDoublePrecisionMathDevice.cpp) make_hip_executable (hipIntrinsics hipMathFunctions.cpp hipSinglePrecisionIntrinsics.cpp hipDoublePrecisionIntrinsics.cpp hipIntegerIntrinsics.cpp) -make_hip_executable (hipPointerAttrib hipPointerAttrib.cpp) +#TODO - re-enable. This uses the pointer add feature. +#make_hip_executable (hipPointerAttrib hipPointerAttrib.cpp) make_hip_executable (hipMultiThreadStreams1 hipMultiThreadStreams1.cpp) make_hip_executable (hipMultiThreadStreams2 hipMultiThreadStreams2.cpp) make_hip_executable (hipHostAlloc hipHostAlloc.cpp) @@ -153,7 +161,8 @@ make_test(hipMemset --N 10013 --memsetval 0x5a ) # oddball size. make_test(hipMemset --N 256M --memsetval 0xa6 ) # big copy make_test(hipGridLaunch " " ) make_test(hipEnvVarDriver " " ) -make_test(hipPointerAttrib " " ) +#TODO -reenable +#make_test(hipPointerAttrib " " ) #make_test(hipMultiThreadStreams1 " " ) #make_test(hipMultiThreadStreams2 " " ) make_test(hipMemcpy_simple " " ) @@ -161,6 +170,9 @@ make_named_test(hipMemcpy "hipMemcpy-modes" --tests 0x1 ) make_named_test(hipMemcpy "hipMemcpy-size" --tests 0x6 ) make_named_test(hipMemcpy "hipMemcpy-multithreaded" --tests 0x8 ) +# Debug synchronization, then enable. +#make_named_test(hipMemcpy_simple "hipMemcpyAsync-simple" --async) + make_test(hipHostAlloc " ") make_test(hipMemcpyAsync " " ) make_test(hipHostGetFlags " ") diff --git a/projects/clr/hipamd/tests/src/hipMemcpy_simple.cpp b/projects/clr/hipamd/tests/src/hipMemcpy_simple.cpp index 528fa41700..f348e5f0a6 100644 --- a/projects/clr/hipamd/tests/src/hipMemcpy_simple.cpp +++ b/projects/clr/hipamd/tests/src/hipMemcpy_simple.cpp @@ -22,6 +22,18 @@ THE SOFTWARE. #include "hip_runtime.h" #include "test_common.h" +bool p_async = false; + +// **************************************************************************** +hipError_t memcopy(void * dst, const void *src, size_t sizeBytes, enum hipMemcpyKind kind) +{ + if (p_async) { + return hipMemcpyAsync(dst, src, sizeBytes, kind, NULL); + } else { + return hipMemcpy(dst, src, sizeBytes, kind); + } +} + //--- // Test simple H2D copies and back. @@ -40,12 +52,12 @@ void simpleTest1() printf ("A_d=%p B_d=%p C_d=%p A_h=%p B_h=%p C_h=%p\n", A_d, B_d, C_d, A_h, B_d, C_h); unsigned blocks = HipTest::setNumBlocks(blocksPerCU, threadsPerBlock, N); - HIPCHECK ( hipMemcpy(A_d, A_h, Nbytes, hipMemcpyHostToDevice)); - HIPCHECK ( hipMemcpy(B_d, B_h, Nbytes, hipMemcpyHostToDevice)); + HIPCHECK ( memcopy(A_d, A_h, Nbytes, hipMemcpyHostToDevice)); + HIPCHECK ( memcopy(B_d, B_h, Nbytes, hipMemcpyHostToDevice)); hipLaunchKernel(HipTest::vectorADD, dim3(blocks), dim3(threadsPerBlock), 0, 0, A_d, B_d, C_d, N); - HIPCHECK ( hipMemcpy(C_h, C_d, Nbytes, hipMemcpyDeviceToHost)); + HIPCHECK ( memcopy(C_h, C_d, Nbytes, hipMemcpyDeviceToHost)); HIPCHECK (hipDeviceSynchronize()); @@ -86,9 +98,9 @@ void simpleTest2(size_t numElements, bool usePinnedHost) A_h2[i] = 12345678.0 + i; // init output with something distincctive, to ensure we replace it. } - HIPCHECK(hipMemcpy(A_d, A_h1, sizeElements, hipMemcpyHostToDevice)); + HIPCHECK(memcopy(A_d, A_h1, sizeElements, hipMemcpyHostToDevice)); HIPCHECK(hipDeviceSynchronize()); - HIPCHECK(hipMemcpy(A_h2, A_d, sizeElements, hipMemcpyDeviceToHost)); + HIPCHECK(memcopy(A_h2, A_d, sizeElements, hipMemcpyDeviceToHost)); HIPCHECK(hipDeviceSynchronize()); for (size_t i=0; i(N, true/*usePinnedHost*/); simpleTest2(N, true/*usePinnedHost*/); } if (p_tests & 0x4) { - printf ("\n\n=== tests&2 (copy pin-pong, unpinned host)\n"); + printf ("\n\n=== tests&4 (copy ping-pong, unpinned host)\n"); simpleTest2(N, false/*usePinnedHost*/); simpleTest2(N, false/*usePinnedHost*/); } + hipDeviceSynchronize(); + hipDeviceReset(); + + int v; + hipDriverGetVersion(&v); + passed(); };