From 160f01d772bdb83723f44b2339bb0e46d8920f64 Mon Sep 17 00:00:00 2001 From: Ben Sander Date: Fri, 1 Apr 2016 16:45:42 -0500 Subject: [PATCH 01/40] update proposed release notes [ROCm/hip commit: 4e452cf1f12653288119abaeeaca1007d2d5b479] --- projects/hip/RELEASE.md | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/projects/hip/RELEASE.md b/projects/hip/RELEASE.md index 8041657b8e..3fdc529571 100644 --- a/projects/hip/RELEASE.md +++ b/projects/hip/RELEASE.md @@ -18,9 +18,14 @@ Next: - Move to CMake. - Split source into multiple modular .cpp and .h files. - Create static library and link. -- Deprecate hipDeviceGetProp, replace with hipGetDeviceProp + - Set HIP_PATH to install. +- Make hipDevice and hipStream thread-safe. + - Prefered hipStream usage is still to create new streams for each new thread, but it works even if you don;t. +- Improve automated platform detection: If AMD GPU is installed and detected by driver, default HIP_PLATFORM to hcc. +- HIP_TRACE_API now prints arguments to the HIP function (in addition to name of function). +- Deprecate hipDeviceGetProp (Replace with hipGetDeviceProp) - Deprecate hipMallocHost (Replace with hipHostMalloc) -- Deprecate hipFreeHost (Replace with hipHostFree). +- Deprecate hipFreeHost (Replace with hipHostFree) ## Revision History: From 8db4a6d46ec47beb8f4e6b9a13a0de45ce320075 Mon Sep 17 00:00:00 2001 From: Ben Sander Date: Wed, 6 Apr 2016 11:28:23 -0500 Subject: [PATCH 02/40] Checkpoint initial peer2peer implementation. [ROCm/hip commit: b02e9163ab896c4af45012c4c51006c22446d24f] --- projects/hip/include/hcc_detail/hip_hcc.h | 27 ++++++++-- .../hip/include/hcc_detail/hip_runtime_api.h | 10 ++-- projects/hip/src/hip_hcc.cpp | 40 ++++++++++++++- projects/hip/src/hip_peer.cpp | 51 ++++++++++++++++--- 4 files changed, 111 insertions(+), 17 deletions(-) diff --git a/projects/hip/include/hcc_detail/hip_hcc.h b/projects/hip/include/hcc_detail/hip_hcc.h index 42dd3b6df5..5c8aaf5659 100644 --- a/projects/hip/include/hcc_detail/hip_hcc.h +++ b/projects/hip/include/hcc_detail/hip_hcc.h @@ -494,11 +494,23 @@ struct ihipEvent_t { // will lock the mutex on construction and unlock on destruction. // // MUTEX_TYPE is template argument so can easily convert to FakeMutex for performance or stress testing. -template +template class ihipDeviceCriticalBase_t : LockedBase { public: - ihipDeviceCriticalBase_t() : _stream_id(0) {}; + ihipDeviceCriticalBase_t() : _stream_id(0), _peerAgents(nullptr) {}; + + void init(unsigned deviceCnt) { + assert(_peerAgents == nullptr); + _peerAgents = new hsa_agent_t[deviceCnt]; + }; + + ~ihipDeviceCriticalBase_t() { + if (_peerAgents != nullptr) { + delete _peerAgents; + _peerAgents = nullptr; + } + } friend class LockedAccessor; std::list &streams() { return _streams; }; @@ -507,10 +519,19 @@ public: // "Allocate" a stream ID: ihipStream_t::SeqNum_t incStreamId() { return _stream_id++; }; + void recomputePeerAgents(); + void addPeer(ihipDevice_t *peer); + void removePeer(ihipDevice_t *peer); + private: std::list _streams; // streams associated with this device. ihipStream_t::SeqNum_t _stream_id; + + // These reflect the currently Enabled set of peers for this GPU: + std::list _peers; // list of enabled peer devices. + uint32_t _peerCnt; // number of enabled peers + hsa_agent_t *_peerAgents; // efficient packed array of enabled agents (to use for allocations.) }; // Note Mutex selected based on DeviceMutex @@ -530,7 +551,7 @@ class ihipDevice_t { public: // Functions: ihipDevice_t() {}; // note: calls constructor for _criticalData - void init(unsigned device_index, hc::accelerator &acc, unsigned flags); + void init(unsigned device_index, unsigned deviceCnt, hc::accelerator &acc, unsigned flags); ~ihipDevice_t(); void locked_addStream(ihipStream_t *s); diff --git a/projects/hip/include/hcc_detail/hip_runtime_api.h b/projects/hip/include/hcc_detail/hip_runtime_api.h index b5fdb312a4..56b3560427 100644 --- a/projects/hip/include/hcc_detail/hip_runtime_api.h +++ b/projects/hip/include/hcc_detail/hip_runtime_api.h @@ -908,12 +908,12 @@ hipError_t hipMemGetInfo (size_t * free, size_t * total) ; * Returns "1" in @p canAccessPeer if the specified @p device is capable * of directly accessing memory physically located on peerDevice , or "0" if not. */ -hipError_t hipDeviceCanAccessPeer ( int* canAccessPeer, int device, int peerDevice ); +hipError_t hipDeviceCanAccessPeer (int* canAccessPeer, int deviceId, int peerDeviceId); /** - * @brief Disables registering memory on peerDevice for direct access from the current device. + * @brief Disable registering memory on peerDevice for direct access from the current device. * * If there are any allocations on peerDevice which were registered in the current device using hipPeerRegister() then these allocations will be automatically unregistered. * Returns hipErrorPeerAccessNotEnabled if direct access to memory on peerDevice has not yet been enabled from the current device. @@ -922,10 +922,10 @@ hipError_t hipDeviceCanAccessPeer ( int* canAccessPeer, int device, int peerDe * TODO:cudaErrorPeerAccessNotEnabled and cudaErrorInvalidDevice error not supported in HIP, return hipErrorUnknown * Returns #hipSuccess, #hipErrorUnknown */ -hipError_t hipDeviceDisablePeerAccess ( int peerDevice ); +hipError_t hipDeviceDisablePeerAccess (int peerDeviceId); /** - * @brief Enables registering memory on peerDevice for direct access from the current device. + * @brief Enable registering memory on peerDevice for direct access from the current device. * * @param [in] peerDevice * @param [in] flags @@ -933,7 +933,7 @@ hipError_t hipDeviceDisablePeerAccess ( int peerDevice ); * TODO:cudaErrorInvalidDevice error not supported in HIP, return hipErrorUnknown * Returns #hipSuccess, #hipErrorInvalidDevice, #hipErrorInvalidValue, #hipErrorUnknown */ -hipError_t hipDeviceEnablePeerAccess ( int peerDevice, unsigned int flags ); +hipError_t hipDeviceEnablePeerAccess (int peerDeviceId, unsigned int flags); /** * @brief Copies memory from one device to memory on another device. diff --git a/projects/hip/src/hip_hcc.cpp b/projects/hip/src/hip_hcc.cpp index 6c80b84e0e..bbf6538817 100644 --- a/projects/hip/src/hip_hcc.cpp +++ b/projects/hip/src/hip_hcc.cpp @@ -198,6 +198,40 @@ void ihipStream_t::locked_wait(bool assertQueueEmpty) }; +// Recompute the peercnt and the packed _peerAgents whenever a peer is added or deleted. +// The packed _peerAgents can efficiently be used on each memory allocation. +template<> +void ihipDeviceCriticalBase_t::recomputePeerAgents() +{ + _peerCnt = 0; + std::for_each (_peers.begin(), _peers.end(), [this](ihipDevice_t* device) { + _peerAgents[_peerCnt++] = device->_hsa_agent; + }); +} + + +template<> +void ihipDeviceCriticalBase_t::addPeer(ihipDevice_t *peer) +{ + auto match = std::find(_peers.begin(), _peers.end(), peer); + if (match != std::end(_peers)) { + _peers.push_back(peer); + recomputePeerAgents(); + return; + } + + // If we get here - peer was already on list, silently ignore. +} + + +template<> +void ihipDeviceCriticalBase_t::removePeer(ihipDevice_t *peer) +{ + _peers.remove(peer); + recomputePeerAgents(); +} + +//------------------------------------------------------------------------------------------------- //--- ihipDevice_t * ihipStream_t::getDevice() const @@ -408,7 +442,7 @@ void ihipDevice_t::locked_reset() //--- -void ihipDevice_t::init(unsigned device_index, hc::accelerator &acc, unsigned flags) +void ihipDevice_t::init(unsigned device_index, unsigned deviceCnt, hc::accelerator &acc, unsigned flags) { _device_index = device_index; _device_flags = flags; @@ -431,6 +465,8 @@ void ihipDevice_t::init(unsigned device_index, hc::accelerator &acc, unsigned fl _default_stream = new ihipStream_t(device_index, acc.get_default_view(), hipStreamDefault); locked_addStream(_default_stream); + _criticalData.init(deviceCnt); + tprintf(DB_SYNC, "created device with default_stream=%p\n", _default_stream); hsa_region_t *pinnedHostRegion; @@ -905,7 +941,7 @@ void ihipInit() //If device is not in visible devices list, ignore continue; } - g_devices[g_deviceCnt].init(g_deviceCnt, accs[i], hipDeviceMapHost); + g_devices[g_deviceCnt].init(g_deviceCnt, deviceCnt, accs[i], hipDeviceMapHost); g_deviceCnt++; } } diff --git a/projects/hip/src/hip_peer.cpp b/projects/hip/src/hip_peer.cpp index d4de6053de..c4a8a1bcf3 100644 --- a/projects/hip/src/hip_peer.cpp +++ b/projects/hip/src/hip_peer.cpp @@ -20,22 +20,44 @@ THE SOFTWARE. #include "hip_runtime.h" #include "hcc_detail/hip_hcc.h" #include "hcc_detail/trace_helper.h" +#define USE_PEER_TO_PEER 1 /** * @warning HCC returns 0 in *canAccessPeer ; Need to update this function when RT supports P2P */ //--- -hipError_t hipDeviceCanAccessPeer ( int* canAccessPeer, int device, int peerDevice ) +hipError_t hipDeviceCanAccessPeer ( int* canAccessPeer, int deviceId, int peerDeviceId) { - HIP_INIT_API(canAccessPeer, device, peerDevice); + HIP_INIT_API(canAccessPeer, deviceId, peerDeviceId); + hipError_t err = hipSuccess; + +#if USE_PEER_TO_PEER + auto device = ihipGetDevice(deviceId); + auto peerDevice = ihipGetDevice(peerDeviceId); + + if ((device != NULL) && (peerDevice != NULL)) { +#if USE_PEER_TO_PEER==2 + *canAccessPeer = peerDevice->_acc.is_peer(device->_acc); +#else + *canAccessPeer = 0; +#endif + + } else { + *canAccessPeer = false; + err = hipErrorInvalidDevice; + } + + +#else *canAccessPeer = false; - return ihipLogStatus(hipSuccess); +#endif + return ihipLogStatus(err); } /** - * @warning Need to update this function when RT supports P2P + * warning Need to update this function when RT supports P2P */ //--- hipError_t hipDeviceDisablePeerAccess ( int peerDevice ) @@ -51,11 +73,26 @@ hipError_t hipDeviceDisablePeerAccess ( int peerDevice ) * @warning Need to update this function when RT supports P2P */ //--- -hipError_t hipDeviceEnablePeerAccess ( int peerDevice, unsigned int flags ) + // Enable registering memory on peerDevice for direct access from the current device. +hipError_t hipDeviceEnablePeerAccess (int peerDeviceId, unsigned int flags) { std::call_once(hip_initialized, ihipInit); - // TODO-p2p - return ihipLogStatus(hipSuccess); + + hipError_t err = hipSuccess; +#if USE_PEER_TO_PEER + if (flags != 0) { + err = hipErrorInvalidValue; + } else { + auto peerDevice = ihipGetDevice(peerDeviceId); + if (peerDevice != NULL) { + + } else { + err = hipErrorInvalidDevice; + } + } +#endif + + return ihipLogStatus(err); } From c5240bd0791f4709caf7402d975e391ffb47b5dd Mon Sep 17 00:00:00 2001 From: Ben Sander Date: Wed, 6 Apr 2016 15:49:32 -0500 Subject: [PATCH 03/40] P2P checkpoint. Maintain enabled peer tables for each device. [ROCm/hip commit: 36926e62335eb6ba0bd0954cd805f2e9b756a339] --- projects/hip/include/hcc_detail/hip_hcc.h | 12 +++- .../hip/include/hcc_detail/hip_runtime_api.h | 18 +++--- projects/hip/include/hip_runtime_api.h | 4 +- projects/hip/src/hip_error.cpp | 2 +- projects/hip/src/hip_hcc.cpp | 52 +++++++++------- projects/hip/src/hip_peer.cpp | 60 +++++++++++++------ 6 files changed, 96 insertions(+), 52 deletions(-) diff --git a/projects/hip/include/hcc_detail/hip_hcc.h b/projects/hip/include/hcc_detail/hip_hcc.h index 5c8aaf5659..05fa2818b6 100644 --- a/projects/hip/include/hcc_detail/hip_hcc.h +++ b/projects/hip/include/hcc_detail/hip_hcc.h @@ -519,9 +519,11 @@ public: // "Allocate" a stream ID: ihipStream_t::SeqNum_t incStreamId() { return _stream_id++; }; - void recomputePeerAgents(); - void addPeer(ihipDevice_t *peer); - void removePeer(ihipDevice_t *peer); + bool addPeer(ihipDevice_t *peer); + bool removePeer(ihipDevice_t *peer); + + uint32_t peerCnt() const { return _peerCnt; }; + uint32_t peerAgents() const { return _peerAgents; }; private: @@ -532,6 +534,8 @@ private: std::list _peers; // list of enabled peer devices. uint32_t _peerCnt; // number of enabled peers hsa_agent_t *_peerAgents; // efficient packed array of enabled agents (to use for allocations.) +private: + void recomputePeerAgents(); }; // Note Mutex selected based on DeviceMutex @@ -560,6 +564,8 @@ public: // Functions: void locked_waitAllStreams(); void locked_syncDefaultStream(bool waitOnSelf); + ihipDeviceCritical_t &criticalData() { return _criticalData; }; // TODO, move private. Fix P2P. + public: // Data, set at initialization: unsigned _device_index; // index into g_devices. diff --git a/projects/hip/include/hcc_detail/hip_runtime_api.h b/projects/hip/include/hcc_detail/hip_runtime_api.h index 56b3560427..f33895cd98 100644 --- a/projects/hip/include/hcc_detail/hip_runtime_api.h +++ b/projects/hip/include/hcc_detail/hip_runtime_api.h @@ -913,21 +913,21 @@ hipError_t hipDeviceCanAccessPeer (int* canAccessPeer, int deviceId, int peerDev /** - * @brief Disable registering memory on peerDevice for direct access from the current device. + * @brief Disable direct access from current device's virtual address space to memory allocations physically located on a peer device. * - * If there are any allocations on peerDevice which were registered in the current device using hipPeerRegister() then these allocations will be automatically unregistered. * Returns hipErrorPeerAccessNotEnabled if direct access to memory on peerDevice has not yet been enabled from the current device. * - * @param [in] peerDevice + * @param [in] peerDeviceId + * * TODO:cudaErrorPeerAccessNotEnabled and cudaErrorInvalidDevice error not supported in HIP, return hipErrorUnknown * Returns #hipSuccess, #hipErrorUnknown */ hipError_t hipDeviceDisablePeerAccess (int peerDeviceId); /** - * @brief Enable registering memory on peerDevice for direct access from the current device. + * @brief Enable direct access from current device's virtual address space to memory allocations physically located on a peer device. * - * @param [in] peerDevice + * @param [in] peerDeviceId * @param [in] flags * * TODO:cudaErrorInvalidDevice error not supported in HIP, return hipErrorUnknown @@ -939,14 +939,14 @@ hipError_t hipDeviceEnablePeerAccess (int peerDeviceId, unsigned int flags); * @brief Copies memory from one device to memory on another device. * * @param [out] dst - Destination device pointer. - * @param [in] dstDevice - Destination device + * @param [in] dstDeviceId - Destination device * @param [in] src - Source device pointer - * @param [in] srcDevice - Source device + * @param [in] srcDeviceId - Source device * @param [in] sizeBytes - Size of memory copy in bytes * * Returns #hipSuccess, #hipErrorInvalidValue, #hipErrorInvalidDevice */ -hipError_t hipMemcpyPeer ( void* dst, int dstDevice, const void* src, int srcDevice, size_t sizeBytes ); +hipError_t hipMemcpyPeer (void* dst, int dstDeviceId, const void* src, int srcDeviceId, size_t sizeBytes); /** * @brief Copies memory from one device to memory on another device. @@ -961,7 +961,7 @@ hipError_t hipMemcpyPeer ( void* dst, int dstDevice, const void* src, int srcD * Returns #hipSuccess, #hipErrorInvalidValue, #hipErrorInvalidDevice */ #if __cplusplus -hipError_t hipMemcpyPeerAsync ( void* dst, int dstDevice, const void* src, int srcDevice, size_t sizeBytes, hipStream_t stream=0 ); +hipError_t hipMemcpyPeerAsync ( void* dst, int dstDeviceId, const void* src, int srcDevice, size_t sizeBytes, hipStream_t stream=0 ); #else hipError_t hipMemcpyPeerAsync(void* dst, int dstDevice, const void* src, int srcDevice, size_t sizeBytes, hipStream_t stream); #endif diff --git a/projects/hip/include/hip_runtime_api.h b/projects/hip/include/hip_runtime_api.h index bef881d066..ca49ab5d13 100644 --- a/projects/hip/include/hip_runtime_api.h +++ b/projects/hip/include/hip_runtime_api.h @@ -138,7 +138,7 @@ typedef struct hipPointerAttribute_t { * @enum * @ingroup Enumerations */ -// Developer note - when updating these, update the hipErrorName and hipErrorString functions +// Developer note - when updating these, update the hipErrorName and hipErrorString functions in NVCC and HCC paths typedef enum hipError_t { hipSuccess = 0 ///< Successful completion. ,hipErrorMemoryAllocation ///< Memory allocation error. @@ -155,6 +155,8 @@ typedef enum hipError_t { ,hipErrorNoDevice ///< Call to hipGetDeviceCount returned 0 devices ,hipErrorNotReady ///< Indicates that asynchronous operations enqueued earlier are not ready. This is not actually an error, but is used to distinguish from hipSuccess (which indicates completion). APIs that return this error include hipEventQuery and hipStreamQuery. ,hipErrorUnknown ///< Unknown error. + ,hipErrorPeerAccessNotEnabled ///< Peer access was never enabled from the current device. + ,hipErrorPeerAccessAlreadyEnabled ///< Peer access was already enabled from the current device. ,hipErrorRuntimeMemory ///< HSA runtime memory call returned error. Typically not seen in production systems. ,hipErrorRuntimeOther ///< HSA runtime call other than memory returned error. Typically not seen in production systems. ,hipErrorTbd ///< Marker that more error codes are needed. diff --git a/projects/hip/src/hip_error.cpp b/projects/hip/src/hip_error.cpp index 655ab3b8c7..7c723b1aa2 100644 --- a/projects/hip/src/hip_error.cpp +++ b/projects/hip/src/hip_error.cpp @@ -65,7 +65,7 @@ const char *hipGetErrorName(hipError_t hip_error) //--- const char *hipGetErrorString(hipError_t hip_error) { - std::call_once(hip_initialized, ihipInit); + HIP_INIT_API(hip_error); // TODO - return a message explaining the error. // TODO - This should be set up to return the same string reported in the the doxygen comments, somehow. diff --git a/projects/hip/src/hip_hcc.cpp b/projects/hip/src/hip_hcc.cpp index bbf6538817..992dbca724 100644 --- a/projects/hip/src/hip_hcc.cpp +++ b/projects/hip/src/hip_hcc.cpp @@ -211,24 +211,31 @@ void ihipDeviceCriticalBase_t::recomputePeerAgents() template<> -void ihipDeviceCriticalBase_t::addPeer(ihipDevice_t *peer) +bool ihipDeviceCriticalBase_t::addPeer(ihipDevice_t *peer) { auto match = std::find(_peers.begin(), _peers.end(), peer); if (match != std::end(_peers)) { _peers.push_back(peer); recomputePeerAgents(); - return; + return true; } // If we get here - peer was already on list, silently ignore. + return false; } template<> -void ihipDeviceCriticalBase_t::removePeer(ihipDevice_t *peer) +bool ihipDeviceCriticalBase_t::removePeer(ihipDevice_t *peer) { - _peers.remove(peer); - recomputePeerAgents(); + auto match = std::find(_peers.begin(), _peers.end(), peer); + if (match != std::end(_peers)) { + _peers.remove(peer); + recomputePeerAgents(); + return true; + } else { + return false; + } } //------------------------------------------------------------------------------------------------- @@ -1073,22 +1080,25 @@ void ihipPostLaunchKernel(hipStream_t stream, hc::completion_future &kernelFutur const char *ihipErrorString(hipError_t hip_error) { switch (hip_error) { - case hipSuccess : return "hipSuccess"; - case hipErrorMemoryAllocation : return "hipErrorMemoryAllocation"; - case hipErrorMemoryFree : return "hipErrorMemoryFree"; - case hipErrorUnknownSymbol : return "hipErrorUnknownSymbol"; - case hipErrorOutOfResources : return "hipErrorOutOfResources"; - case hipErrorInvalidValue : return "hipErrorInvalidValue"; - case hipErrorInvalidResourceHandle : return "hipErrorInvalidResourceHandle"; - case hipErrorInvalidDevice : return "hipErrorInvalidDevice"; - case hipErrorInvalidMemcpyDirection : return "hipErrorInvalidMemcpyDirection"; - case hipErrorNoDevice : return "hipErrorNoDevice"; - case hipErrorNotReady : return "hipErrorNotReady"; - case hipErrorRuntimeMemory : return "hipErrorRuntimeMemory"; - case hipErrorRuntimeOther : return "hipErrorRuntimeOther"; - case hipErrorUnknown : return "hipErrorUnknown"; - case hipErrorTbd : return "hipErrorTbd"; - default : return "hipErrorUnknown"; + case hipSuccess : return "hipSuccess"; + case hipErrorMemoryAllocation : return "hipErrorMemoryAllocation"; + case hipErrorMemoryFree : return "hipErrorMemoryFree"; + case hipErrorUnknownSymbol : return "hipErrorUnknownSymbol"; + case hipErrorOutOfResources : return "hipErrorOutOfResources"; + case hipErrorInvalidValue : return "hipErrorInvalidValue"; + case hipErrorInvalidResourceHandle : return "hipErrorInvalidResourceHandle"; + case hipErrorInvalidDevice : return "hipErrorInvalidDevice"; + case hipErrorInvalidMemcpyDirection : return "hipErrorInvalidMemcpyDirection"; + case hipErrorNoDevice : return "hipErrorNoDevice"; + case hipErrorNotReady : return "hipErrorNotReady"; + case hipErrorPeerAccessNotEnabled : return "hipErrorPeerAccessNotEnabled"; + case hipErrorPeerAccessAlreadyEnabled : return "hipErrorPeerAccessAlreadyEnabled"; + + case hipErrorRuntimeMemory : return "hipErrorRuntimeMemory"; + case hipErrorRuntimeOther : return "hipErrorRuntimeOther"; + case hipErrorUnknown : return "hipErrorUnknown"; + case hipErrorTbd : return "hipErrorTbd"; + default : return "hipErrorUnknown"; }; }; diff --git a/projects/hip/src/hip_peer.cpp b/projects/hip/src/hip_peer.cpp index c4a8a1bcf3..de86688ede 100644 --- a/projects/hip/src/hip_peer.cpp +++ b/projects/hip/src/hip_peer.cpp @@ -26,7 +26,7 @@ THE SOFTWARE. * @warning HCC returns 0 in *canAccessPeer ; Need to update this function when RT supports P2P */ //--- -hipError_t hipDeviceCanAccessPeer ( int* canAccessPeer, int deviceId, int peerDeviceId) +hipError_t hipDeviceCanAccessPeer (int* canAccessPeer, int deviceId, int peerDeviceId) { HIP_INIT_API(canAccessPeer, deviceId, peerDeviceId); @@ -38,7 +38,7 @@ hipError_t hipDeviceCanAccessPeer ( int* canAccessPeer, int deviceId, int peerD if ((device != NULL) && (peerDevice != NULL)) { #if USE_PEER_TO_PEER==2 - *canAccessPeer = peerDevice->_acc.is_peer(device->_acc); + *canAccessPeer = peerDevice->_acc.get_is_peer(device->_acc); #else *canAccessPeer = 0; #endif @@ -56,16 +56,32 @@ hipError_t hipDeviceCanAccessPeer ( int* canAccessPeer, int deviceId, int peerD } -/** - * warning Need to update this function when RT supports P2P - */ //--- -hipError_t hipDeviceDisablePeerAccess ( int peerDevice ) +hipError_t hipDeviceDisablePeerAccess (int peerDeviceId) { - HIP_INIT_API(peerDevice); + HIP_INIT_API(peerDeviceId); - // TODO-p2p - return ihipLogStatus(hipSuccess); + hipError_t err = hipSuccess; +#if USE_PEER_TO_PEER + + auto thisDevice = ihipGetTlsDefaultDevice(); + auto peerDevice = ihipGetDevice(peerDeviceId); + if ((thisDevice != NULL) && (peerDevice != NULL)) { + LockedAccessor_DeviceCrit_t crit(thisDevice->criticalData()); + bool changed = crit->removePeer(peerDevice); + if (changed) { +#if USE_PEER_TO_PEER==2 + am_memtracker_update_peers(device->_acc, crit->peerCnt(), crit->peerAgents()); +#endif + } else { + err = hipErrorPeerAccessNotEnabled; // never enabled P2P access. + } + } else { + err = hipErrorInvalidDevice; + } +#endif + + return ihipLogStatus(err); }; @@ -74,18 +90,27 @@ hipError_t hipDeviceDisablePeerAccess ( int peerDevice ) */ //--- // Enable registering memory on peerDevice for direct access from the current device. -hipError_t hipDeviceEnablePeerAccess (int peerDeviceId, unsigned int flags) +hipError_t hipDeviceEnablePeerAccess (int peerDeviceId, unsigned int flags) { - std::call_once(hip_initialized, ihipInit); + HIP_INIT_API(peerDeviceId, flags); hipError_t err = hipSuccess; #if USE_PEER_TO_PEER if (flags != 0) { err = hipErrorInvalidValue; } else { + auto thisDevice = ihipGetTlsDefaultDevice(); auto peerDevice = ihipGetDevice(peerDeviceId); - if (peerDevice != NULL) { - + if ((thisDevice != NULL) && (peerDevice != NULL)) { + LockedAccessor_DeviceCrit_t crit(thisDevice->criticalData()); + bool isNewPeer = crit->addPeer(peerDevice); + if (isNewPeer) { +#if USE_PEER_TO_PEER==2 + am_memtracker_update_peers(device->_acc, crit->peerCnt(), crit->peerAgents()); +#endif + } else { + err = hipErrorPeerAccessAlreadyEnabled; + } } else { err = hipErrorInvalidDevice; } @@ -97,9 +122,10 @@ hipError_t hipDeviceEnablePeerAccess (int peerDeviceId, unsigned int flags) //--- -hipError_t hipMemcpyPeer ( void* dst, int dstDevice, const void* src, int srcDevice, size_t sizeBytes ) +hipError_t hipMemcpyPeer (void* dst, int dstDevice, const void* src, int srcDevice, size_t sizeBytes) { - std::call_once(hip_initialized, ihipInit); + HIP_INIT_API(dst, dstDevice, src, srcDevice, sizeBytes); + // HCC has a unified memory architecture so device specifiers are not required. return hipMemcpy(dst, src, sizeBytes, hipMemcpyDefault); }; @@ -109,9 +135,9 @@ hipError_t hipMemcpyPeer ( void* dst, int dstDevice, const void* src, int srcD * @bug This function uses a synchronous copy */ //--- -hipError_t hipMemcpyPeerAsync ( void* dst, int dstDevice, const void* src, int srcDevice, size_t sizeBytes, hipStream_t stream ) +hipError_t hipMemcpyPeerAsync (void* dst, int dstDevice, const void* src, int srcDevice, size_t sizeBytes, hipStream_t stream) { - std::call_once(hip_initialized, ihipInit); + HIP_INIT_API(dst, dstDevice, src, srcDevice, sizeBytes, stream); // HCC has a unified memory architecture so device specifiers are not required. return hipMemcpyAsync(dst, src, sizeBytes, hipMemcpyDefault, stream); }; From 84b5016a79b9e9dd27d2c382e3a83333ea8798d0 Mon Sep 17 00:00:00 2001 From: Ben Sander Date: Wed, 6 Apr 2016 16:44:31 -0500 Subject: [PATCH 04/40] For P2P, use the peer list when allocating Device memory or pinned host. Each new allocation is automatically mapped into the address space of all enabled peers. [ROCm/hip commit: 41f7317fb5737697944eb38393f0179f3d639993] --- projects/hip/CONTRIBUTING.md | 4 +- projects/hip/include/hcc_detail/hip_hcc.h | 6 ++- .../hip/include/hcc_detail/hip_runtime_api.h | 11 +++-- projects/hip/src/hip_hcc.cpp | 5 +- projects/hip/src/hip_memory.cpp | 47 ++++++++++--------- projects/hip/src/hip_peer.cpp | 24 +++++++--- 6 files changed, 59 insertions(+), 38 deletions(-) diff --git a/projects/hip/CONTRIBUTING.md b/projects/hip/CONTRIBUTING.md index b593cadd64..4d5050ca89 100644 --- a/projects/hip/CONTRIBUTING.md +++ b/projects/hip/CONTRIBUTING.md @@ -55,8 +55,8 @@ When adding a new HIP feature, add a new unit test as well. See [tests/README.md](README.md) for more information. ## Development Flow -The Unit testing environment automatically rebuilds libhip_hcc.a and the tests when a change it made to the HIP source, and this -is a great place to develop new features alongside the associated test. +It is recommended that developers set the flag HIP_BUILD_LOCAL=1 so that the unit testing environment automatically rebuilds libhip_hcc.a and the tests when a change it made to the HIP source. +Directed tests provide a great place to develop new features alongside the associated test. For applications and benchmarks outside the directed test environment, developments should use a two-step development flow: - #1. Compile, link, and install HCC. See [Installation](README.md#Installation) notes. diff --git a/projects/hip/include/hcc_detail/hip_hcc.h b/projects/hip/include/hcc_detail/hip_hcc.h index 05fa2818b6..23c66759f4 100644 --- a/projects/hip/include/hcc_detail/hip_hcc.h +++ b/projects/hip/include/hcc_detail/hip_hcc.h @@ -35,6 +35,10 @@ THE SOFTWARE. //Use the new HCC accelerator_view::copy instead of am_copy #define USE_AV_COPY 0 +// Compile peer-to-peer support. +// 2= use upcoming HCC APIs. +#define USE_PEER_TO_PEER 1 + //#define INLINE static inline //--- @@ -523,7 +527,7 @@ public: bool removePeer(ihipDevice_t *peer); uint32_t peerCnt() const { return _peerCnt; }; - uint32_t peerAgents() const { return _peerAgents; }; + hsa_agent_t *peerAgents() const { return _peerAgents; }; private: diff --git a/projects/hip/include/hcc_detail/hip_runtime_api.h b/projects/hip/include/hcc_detail/hip_runtime_api.h index f33895cd98..2f70ae47c3 100644 --- a/projects/hip/include/hcc_detail/hip_runtime_api.h +++ b/projects/hip/include/hcc_detail/hip_runtime_api.h @@ -919,19 +919,22 @@ hipError_t hipDeviceCanAccessPeer (int* canAccessPeer, int deviceId, int peerDev * * @param [in] peerDeviceId * - * TODO:cudaErrorPeerAccessNotEnabled and cudaErrorInvalidDevice error not supported in HIP, return hipErrorUnknown - * Returns #hipSuccess, #hipErrorUnknown + * Returns #hipSuccess, #hipErrorPeerAccessNotEnabled */ hipError_t hipDeviceDisablePeerAccess (int peerDeviceId); /** * @brief Enable direct access from current device's virtual address space to memory allocations physically located on a peer device. * + * Memory which already allocated on peer device will be mapped into the address space of the current device. In addition, all + * future memory allocations on peerDeviceId will be mapped into the address space of the current device when the memory is allocated. + * The peer memory remains accessible from the current device until a call to hipDeviceDisablePeerAccess or @hipDeviceReset. + * + * * @param [in] peerDeviceId * @param [in] flags * - * TODO:cudaErrorInvalidDevice error not supported in HIP, return hipErrorUnknown - * Returns #hipSuccess, #hipErrorInvalidDevice, #hipErrorInvalidValue, #hipErrorUnknown + * Returns #hipSuccess, #hipErrorInvalidDevice, #hipErrorInvalidValue, #hipErrorPeerAccessAlreadyEnabled */ hipError_t hipDeviceEnablePeerAccess (int peerDeviceId, unsigned int flags); diff --git a/projects/hip/src/hip_hcc.cpp b/projects/hip/src/hip_hcc.cpp index 992dbca724..20c6f42451 100644 --- a/projects/hip/src/hip_hcc.cpp +++ b/projects/hip/src/hip_hcc.cpp @@ -442,9 +442,12 @@ void ihipDevice_t::locked_reset() // Reset and remove streams: crit->streams().clear(); +#if USE_PEER_TO_PEER==2 + // remove peer mappings to this device? Call removePeer on all other devices? +#endif + // Reset and release all memory stored in the tracker: am_memtracker_reset(_acc); - }; diff --git a/projects/hip/src/hip_memory.cpp b/projects/hip/src/hip_memory.cpp index 4d75a1d7a1..4cbec08ba0 100644 --- a/projects/hip/src/hip_memory.cpp +++ b/projects/hip/src/hip_memory.cpp @@ -130,6 +130,12 @@ hipError_t hipMalloc(void** ptr, size_t sizeBytes) hip_status = hipErrorMemoryAllocation; } else { hc::am_memtracker_update(*ptr, device->_device_index, 0); + { + LockedAccessor_DeviceCrit_t crit(device->criticalData()); + if (crit->peerCnt()) { + hsa_amd_agents_allow_access(crit->peerCnt(), crit->peerAgents(), NULL, *ptr); + } + } } } else { hip_status = hipErrorMemoryAllocation; @@ -139,29 +145,6 @@ hipError_t hipMalloc(void** ptr, size_t sizeBytes) } -hipError_t hipMallocHost(void** ptr, size_t sizeBytes) -{ - HIP_INIT_API(ptr, sizeBytes); - - hipError_t hip_status = hipSuccess; - - const unsigned am_flags = amHostPinned; - auto device = ihipGetTlsDefaultDevice(); - - if (device) { - *ptr = hc::am_alloc(sizeBytes, device->_acc, am_flags); - if (sizeBytes && (*ptr == NULL)) { - hip_status = hipErrorMemoryAllocation; - } else { - hc::am_memtracker_update(*ptr, device->_device_index, 0); - } - - tprintf (DB_MEM, " %s: pinned ptr=%p\n", __func__, *ptr); - } - - return ihipLogStatus(hip_status); -} - hipError_t hipHostMalloc(void** ptr, size_t sizeBytes, unsigned int flags) { @@ -186,6 +169,12 @@ hipError_t hipHostMalloc(void** ptr, size_t sizeBytes, unsigned int flags) hip_status = hipErrorMemoryAllocation; }else{ hc::am_memtracker_update(*ptr, device->_device_index, flags); + { + LockedAccessor_DeviceCrit_t crit(device->criticalData()); + if (crit->peerCnt()) { + hsa_amd_agents_allow_access(crit->peerCnt(), crit->peerAgents(), NULL, *ptr); + } + } } tprintf(DB_MEM, " %s: pinned ptr=%p\n", __func__, *ptr); } @@ -194,6 +183,7 @@ hipError_t hipHostMalloc(void** ptr, size_t sizeBytes, unsigned int flags) } +//--- // TODO - remove me, this is deprecated. hipError_t hipHostAlloc(void** ptr, size_t sizeBytes, unsigned int flags) { @@ -201,6 +191,15 @@ hipError_t hipHostAlloc(void** ptr, size_t sizeBytes, unsigned int flags) }; +//--- +// TODO - remove me, this is deprecated. +hipError_t hipMallocHost(void** ptr, size_t sizeBytes) +{ + return hipHostMalloc(ptr, sizeBytes, 0); +} + + +//--- hipError_t hipHostGetFlags(unsigned int* flagsPtr, void* hostPtr) { HIP_INIT_API(flagsPtr, hostPtr); @@ -225,6 +224,8 @@ hipError_t hipHostGetFlags(unsigned int* flagsPtr, void* hostPtr) return ihipLogStatus(hip_status); } + +//--- hipError_t hipHostRegister(void *hostPtr, size_t sizeBytes, unsigned int flags) { HIP_INIT_API(hostPtr, sizeBytes, flags); diff --git a/projects/hip/src/hip_peer.cpp b/projects/hip/src/hip_peer.cpp index de86688ede..eaac47d82f 100644 --- a/projects/hip/src/hip_peer.cpp +++ b/projects/hip/src/hip_peer.cpp @@ -20,7 +20,6 @@ THE SOFTWARE. #include "hip_runtime.h" #include "hcc_detail/hip_hcc.h" #include "hcc_detail/trace_helper.h" -#define USE_PEER_TO_PEER 1 /** * @warning HCC returns 0 in *canAccessPeer ; Need to update this function when RT supports P2P @@ -67,15 +66,26 @@ hipError_t hipDeviceDisablePeerAccess (int peerDeviceId) auto thisDevice = ihipGetTlsDefaultDevice(); auto peerDevice = ihipGetDevice(peerDeviceId); if ((thisDevice != NULL) && (peerDevice != NULL)) { - LockedAccessor_DeviceCrit_t crit(thisDevice->criticalData()); - bool changed = crit->removePeer(peerDevice); - if (changed) { #if USE_PEER_TO_PEER==2 - am_memtracker_update_peers(device->_acc, crit->peerCnt(), crit->peerAgents()); + bool canAccessPeer = peerDevice->_acc.get_is_peer(device->_acc); +#else + bool canAccessPeer = 0; #endif + if (! canAccessPeer) { + err = hipErrorInvalidDevice; // P2P not allowed between these devices. } else { - err = hipErrorPeerAccessNotEnabled; // never enabled P2P access. - } + + + LockedAccessor_DeviceCrit_t crit(thisDevice->criticalData()); + bool changed = crit->removePeer(peerDevice); + if (changed) { +#if USE_PEER_TO_PEER==2 + am_memtracker_update_peers(device->_acc, crit->peerCnt(), crit->peerAgents()); +#endif + } else { + err = hipErrorPeerAccessNotEnabled; // never enabled P2P access. + } + } } else { err = hipErrorInvalidDevice; } From c0d88c2f6df18614fc6c19a465dd9552715e9c50 Mon Sep 17 00:00:00 2001 From: Ben Sander Date: Thu, 7 Apr 2016 14:28:42 -0500 Subject: [PATCH 05/40] Use HCC get_is_pool, add USE_PEER_TO_PEER level 2, 3. [ROCm/hip commit: 15b4b1f81ff2e2729c881b839c48c27c82996202] --- projects/hip/include/hcc_detail/hip_hcc.h | 5 +++-- projects/hip/src/hip_peer.cpp | 17 +++++++++-------- 2 files changed, 12 insertions(+), 10 deletions(-) diff --git a/projects/hip/include/hcc_detail/hip_hcc.h b/projects/hip/include/hcc_detail/hip_hcc.h index 23c66759f4..1dd8777b1c 100644 --- a/projects/hip/include/hcc_detail/hip_hcc.h +++ b/projects/hip/include/hcc_detail/hip_hcc.h @@ -36,8 +36,9 @@ THE SOFTWARE. #define USE_AV_COPY 0 // Compile peer-to-peer support. -// 2= use upcoming HCC APIs. -#define USE_PEER_TO_PEER 1 +// >= 2 : use HCC hc:accelerator::get_is_peer +// >= 3 : use hc::am_memtracker_update_peers(...) +#define USE_PEER_TO_PEER 2 //#define INLINE static inline diff --git a/projects/hip/src/hip_peer.cpp b/projects/hip/src/hip_peer.cpp index eaac47d82f..2e52f8e586 100644 --- a/projects/hip/src/hip_peer.cpp +++ b/projects/hip/src/hip_peer.cpp @@ -32,12 +32,12 @@ hipError_t hipDeviceCanAccessPeer (int* canAccessPeer, int deviceId, int peerDe hipError_t err = hipSuccess; #if USE_PEER_TO_PEER - auto device = ihipGetDevice(deviceId); + auto thisDevice = ihipGetDevice(deviceId); auto peerDevice = ihipGetDevice(peerDeviceId); - if ((device != NULL) && (peerDevice != NULL)) { -#if USE_PEER_TO_PEER==2 - *canAccessPeer = peerDevice->_acc.get_is_peer(device->_acc); + if ((thisDevice != NULL) && (peerDevice != NULL)) { +#if USE_PEER_TO_PEER>=2 + *canAccessPeer = peerDevice->_acc.get_is_peer(thisDevice->_acc); #else *canAccessPeer = 0; #endif @@ -66,8 +66,8 @@ hipError_t hipDeviceDisablePeerAccess (int peerDeviceId) auto thisDevice = ihipGetTlsDefaultDevice(); auto peerDevice = ihipGetDevice(peerDeviceId); if ((thisDevice != NULL) && (peerDevice != NULL)) { -#if USE_PEER_TO_PEER==2 - bool canAccessPeer = peerDevice->_acc.get_is_peer(device->_acc); +#if USE_PEER_TO_PEER>=2 + bool canAccessPeer = peerDevice->_acc.get_is_peer(thisDevice->_acc); #else bool canAccessPeer = 0; #endif @@ -79,7 +79,8 @@ hipError_t hipDeviceDisablePeerAccess (int peerDeviceId) LockedAccessor_DeviceCrit_t crit(thisDevice->criticalData()); bool changed = crit->removePeer(peerDevice); if (changed) { -#if USE_PEER_TO_PEER==2 +#if USE_PEER_TO_PEER>=3 + // Update the peers for all memory already saved in the tracker: am_memtracker_update_peers(device->_acc, crit->peerCnt(), crit->peerAgents()); #endif } else { @@ -115,7 +116,7 @@ hipError_t hipDeviceEnablePeerAccess (int peerDeviceId, unsigned int flags) LockedAccessor_DeviceCrit_t crit(thisDevice->criticalData()); bool isNewPeer = crit->addPeer(peerDevice); if (isNewPeer) { -#if USE_PEER_TO_PEER==2 +#if USE_PEER_TO_PEER>=3 am_memtracker_update_peers(device->_acc, crit->peerCnt(), crit->peerAgents()); #endif } else { From 4648855acfbc94394f063817574bdcf43efe8780 Mon Sep 17 00:00:00 2001 From: Ben Sander Date: Thu, 7 Apr 2016 15:51:08 -0500 Subject: [PATCH 06/40] Add simple P2P test [ROCm/hip commit: 1a5b1557686311a412c5ff896ac268a95c9fa09b] --- projects/hip/tests/src/CMakeLists.txt | 2 + .../hip/tests/src/hipPeerToPeer_simple.cpp | 87 +++++++++++++++++++ 2 files changed, 89 insertions(+) create mode 100644 projects/hip/tests/src/hipPeerToPeer_simple.cpp diff --git a/projects/hip/tests/src/CMakeLists.txt b/projects/hip/tests/src/CMakeLists.txt index a0413eaad8..28ea1bf10b 100644 --- a/projects/hip/tests/src/CMakeLists.txt +++ b/projects/hip/tests/src/CMakeLists.txt @@ -170,6 +170,7 @@ make_hip_executable (hipFuncSetDeviceFlags hipFuncSetDeviceFlags.cpp) make_hip_executable (hipFuncGetDevice hipFuncGetDevice.cpp) make_hip_executable (hipFuncSetDevice hipFuncSetDevice.cpp) make_hip_executable (hipFuncDeviceSynchronize hipFuncDeviceSynchronize.cpp) +make_hip_executable (hipPeerToPeer_simple hipPeerToPeer_simple.cpp) make_hip_executable (hipMultiThreadDevice hipMultiThreadDevice.cpp) @@ -217,5 +218,6 @@ make_test(hipFuncDeviceSynchronize " ") make_named_test (hipMultiThreadDevice "hipMultiThreadDevice-serial" --tests 0x1) make_named_test (hipMultiThreadDevice "hipMultiThreadDevice-pyramid" --tests 0x4) make_named_test (hipMultiThreadDevice "hipMultiThreadDevice-nearzero" --tests 0x10) +make_test(hipPeerToPeer_simple " " ) make_hipify_test(specialFunc.cu ) diff --git a/projects/hip/tests/src/hipPeerToPeer_simple.cpp b/projects/hip/tests/src/hipPeerToPeer_simple.cpp new file mode 100644 index 0000000000..79401d28b0 --- /dev/null +++ b/projects/hip/tests/src/hipPeerToPeer_simple.cpp @@ -0,0 +1,87 @@ + +/* +Copyright (c) 2015-2016 Advanced Micro Devices, Inc. All rights reserved. + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +*/ +// Simple test for memset. +// Also serves as a template for other tests. + +#include "hip_runtime.h" +#include "test_common.h" + + +int main(int argc, char *argv[]) +{ + + HipTest::parseStandardArguments(argc, argv, true); + + int deviceCnt; + + HIPCHECK(hipGetDeviceCount(&deviceCnt)); + HIPCHECK(hipSetDevice(p_gpuDevice)); + + int peerDevice = ((p_gpuDevice + 1) % deviceCnt); + + printf ("N=%zu device=%d peerDevice=%d (%d devices total)\n", N, p_gpuDevice, peerDevice, deviceCnt); + + // Must be on a multi-gpu system: + assert (p_gpuDevice != peerDevice); + + int canAccessPeer; + HIPCHECK(hipDeviceCanAccessPeer(&canAccessPeer, p_gpuDevice, peerDevice)); + printf ("dev#%d canAccessPeer:#%d=%d\n", p_gpuDevice, peerDevice, canAccessPeer); + + assert(canAccessPeer); + + HIPCHECK(hipDeviceEnablePeerAccess(peerDevice, 0)); + + size_t Nbytes = N*sizeof(float); + + float *A_d0, *A_d1; + float *A_h; + + A_h = (float*)malloc(Nbytes); + HIPCHECK (hipSetDevice(peerDevice)); + HIPCHECK (hipMalloc(&A_d1, Nbytes) ); + + HIPCHECK (hipSetDevice(p_gpuDevice)); + HIPCHECK (hipMalloc(&A_d0, Nbytes) ); + + + // Set memory on first device. + HIPCHECK (hipSetDevice(p_gpuDevice)); + HIPCHECK ( hipMemset(A_d0, memsetval, Nbytes) ); + + // Device0 push to device1, using P2P: + HIPCHECK ( hipMemcpy(A_d0, A_d1, Nbytes, hipMemcpyDefault)); + + // Copy data back to host: + HIPCHECK ( hipMemcpy(A_h, A_d1, Nbytes, hipMemcpyDeviceToHost)); + + // Check host data: + for (int i=0; i Date: Fri, 8 Apr 2016 02:15:46 -0500 Subject: [PATCH 07/40] Print peers in hipConfig. Also include peer APIs in vim hilighting. [ROCm/hip commit: 01108b63ae94169cb2b965e4aa93a1336e20e623] --- .../hip/samples/1_Utils/hipInfo/hipInfo.cpp | 17 +++++++++++++++++ projects/hip/util/vim/hip.vim | 6 ++++++ 2 files changed, 23 insertions(+) diff --git a/projects/hip/samples/1_Utils/hipInfo/hipInfo.cpp b/projects/hip/samples/1_Utils/hipInfo/hipInfo.cpp index 146d17e015..581194f624 100644 --- a/projects/hip/samples/1_Utils/hipInfo/hipInfo.cpp +++ b/projects/hip/samples/1_Utils/hipInfo/hipInfo.cpp @@ -120,8 +120,25 @@ void printDeviceProp (int deviceId) cout << setw(w1) << "arch.hasSurfaceFuncs: " << props.arch.hasSurfaceFuncs << endl; cout << setw(w1) << "arch.has3dGrid: " << props.arch.has3dGrid << endl; cout << setw(w1) << "arch.hasDynamicParallelism: " << props.arch.hasDynamicParallelism << endl; + + int deviceCnt; + hipGetDeviceCount(&deviceCnt); + cout << setw(w1) << "peers: "; + for (int i=0; i Date: Fri, 8 Apr 2016 02:17:29 -0500 Subject: [PATCH 08/40] Use HIP_PATH if set else use relative ../... [ROCm/hip commit: 597f3ed86d118490ce244b0e0b35443906877c25] --- projects/hip/samples/0_Intro/bit_extract/Makefile | 2 +- projects/hip/samples/0_Intro/square/Makefile | 2 +- projects/hip/samples/1_Utils/hipBusBandwidth/Makefile | 2 +- projects/hip/samples/1_Utils/hipDispatchLatency/Makefile | 2 +- projects/hip/samples/1_Utils/hipInfo/Makefile | 2 +- 5 files changed, 5 insertions(+), 5 deletions(-) diff --git a/projects/hip/samples/0_Intro/bit_extract/Makefile b/projects/hip/samples/0_Intro/bit_extract/Makefile index 39fb5cf8c6..cdf793363b 100644 --- a/projects/hip/samples/0_Intro/bit_extract/Makefile +++ b/projects/hip/samples/0_Intro/bit_extract/Makefile @@ -1,6 +1,6 @@ #Dependencies : [MYHIP]/bin must be in user's path. -HIP_PATH=../../.. +HIP_PATH=?../../.. HIP_PLATFORM=$(shell $(HIP_PATH)/bin/hipconfig --platform) HIPCC=$(HIP_PATH)/bin/hipcc diff --git a/projects/hip/samples/0_Intro/square/Makefile b/projects/hip/samples/0_Intro/square/Makefile index 98ee0be4f6..817c556b26 100644 --- a/projects/hip/samples/0_Intro/square/Makefile +++ b/projects/hip/samples/0_Intro/square/Makefile @@ -1,4 +1,4 @@ -HIP_PATH=../../.. +HIP_PATH?=../../.. HIPCC=$(HIP_PATH)/bin/hipcc all: square.hip.out diff --git a/projects/hip/samples/1_Utils/hipBusBandwidth/Makefile b/projects/hip/samples/1_Utils/hipBusBandwidth/Makefile index a713379d8d..4599cacba2 100644 --- a/projects/hip/samples/1_Utils/hipBusBandwidth/Makefile +++ b/projects/hip/samples/1_Utils/hipBusBandwidth/Makefile @@ -1,4 +1,4 @@ -HIP_PATH=../../.. +HIP_PATH?=../../.. HIPCC=$(HIP_PATH)/bin/hipcc EXE=hipBusBandwidth diff --git a/projects/hip/samples/1_Utils/hipDispatchLatency/Makefile b/projects/hip/samples/1_Utils/hipDispatchLatency/Makefile index 9b2d558114..87e707923d 100644 --- a/projects/hip/samples/1_Utils/hipDispatchLatency/Makefile +++ b/projects/hip/samples/1_Utils/hipDispatchLatency/Makefile @@ -1,4 +1,4 @@ -HIP_PATH=../../.. +HIP_PATH?=../../.. HIPCC=$(HIP_PATH)/bin/hipcc EXE=hipDispatchLatency diff --git a/projects/hip/samples/1_Utils/hipInfo/Makefile b/projects/hip/samples/1_Utils/hipInfo/Makefile index f38f157bcb..d69067388e 100644 --- a/projects/hip/samples/1_Utils/hipInfo/Makefile +++ b/projects/hip/samples/1_Utils/hipInfo/Makefile @@ -1,4 +1,4 @@ -HIP_PATH=../../.. +HIP_PATH?=../../.. HIPCC=$(HIP_PATH)/bin/hipcc EXE=hipInfo From b98a6bd1cd974e75aa87485bc391f3e076d186b5 Mon Sep 17 00:00:00 2001 From: Ben Sander Date: Fri, 8 Apr 2016 09:22:24 -0500 Subject: [PATCH 09/40] Fix simple P2P test, had device ptrs swapped [ROCm/hip commit: 4d394694ae30acdab9258c57e43c434f9d929022] --- projects/hip/tests/src/hipPeerToPeer_simple.cpp | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/projects/hip/tests/src/hipPeerToPeer_simple.cpp b/projects/hip/tests/src/hipPeerToPeer_simple.cpp index 79401d28b0..ca2501438d 100644 --- a/projects/hip/tests/src/hipPeerToPeer_simple.cpp +++ b/projects/hip/tests/src/hipPeerToPeer_simple.cpp @@ -52,12 +52,12 @@ int main(int argc, char *argv[]) HIPCHECK(hipDeviceEnablePeerAccess(peerDevice, 0)); - size_t Nbytes = N*sizeof(float); + size_t Nbytes = N*sizeof(char); - float *A_d0, *A_d1; - float *A_h; + char *A_d0, *A_d1; + char *A_h; - A_h = (float*)malloc(Nbytes); + A_h = (char*)malloc(Nbytes); HIPCHECK (hipSetDevice(peerDevice)); HIPCHECK (hipMalloc(&A_d1, Nbytes) ); @@ -70,7 +70,7 @@ int main(int argc, char *argv[]) HIPCHECK ( hipMemset(A_d0, memsetval, Nbytes) ); // Device0 push to device1, using P2P: - HIPCHECK ( hipMemcpy(A_d0, A_d1, Nbytes, hipMemcpyDefault)); + HIPCHECK ( hipMemcpy(A_d1, A_d0, Nbytes, hipMemcpyDefault)); // Copy data back to host: HIPCHECK ( hipMemcpy(A_h, A_d1, Nbytes, hipMemcpyDeviceToHost)); From 70108729bab176f2e1a3b4fde94396a6010db764 Mon Sep 17 00:00:00 2001 From: Ben Sander Date: Sat, 9 Apr 2016 04:10:57 -0500 Subject: [PATCH 10/40] fix bugs in P2P implementation - addPeers polarity reversed, would never add. - check allow_access return value, pipe error to hipMalloc. [ROCm/hip commit: 7886c9e3d9e103a5c6f1d3b2c404321355d14eb3] --- projects/hip/src/hip_hcc.cpp | 4 +++- projects/hip/src/hip_memory.cpp | 10 ++++++++-- projects/hip/src/hip_peer.cpp | 7 ++----- 3 files changed, 13 insertions(+), 8 deletions(-) diff --git a/projects/hip/src/hip_hcc.cpp b/projects/hip/src/hip_hcc.cpp index 20c6f42451..ddf6bb1691 100644 --- a/projects/hip/src/hip_hcc.cpp +++ b/projects/hip/src/hip_hcc.cpp @@ -214,7 +214,8 @@ template<> bool ihipDeviceCriticalBase_t::addPeer(ihipDevice_t *peer) { auto match = std::find(_peers.begin(), _peers.end(), peer); - if (match != std::end(_peers)) { + if (match == std::end(_peers)) { + // Not already a peer, let's update the list: _peers.push_back(peer); recomputePeerAgents(); return true; @@ -230,6 +231,7 @@ bool ihipDeviceCriticalBase_t::removePeer(ihipDevice_t *peer) { auto match = std::find(_peers.begin(), _peers.end(), peer); if (match != std::end(_peers)) { + // Found a valid peer, let's remove it. _peers.remove(peer); recomputePeerAgents(); return true; diff --git a/projects/hip/src/hip_memory.cpp b/projects/hip/src/hip_memory.cpp index 4cbec08ba0..2a4ec205e2 100644 --- a/projects/hip/src/hip_memory.cpp +++ b/projects/hip/src/hip_memory.cpp @@ -133,7 +133,10 @@ hipError_t hipMalloc(void** ptr, size_t sizeBytes) { LockedAccessor_DeviceCrit_t crit(device->criticalData()); if (crit->peerCnt()) { - hsa_amd_agents_allow_access(crit->peerCnt(), crit->peerAgents(), NULL, *ptr); + hsa_status_t hsa_status = hsa_amd_agents_allow_access(crit->peerCnt(), crit->peerAgents(), NULL, *ptr); + if (hsa_status != HSA_STATUS_SUCCESS) { + hip_status = hipErrorMemoryAllocation; + } } } } @@ -172,7 +175,10 @@ hipError_t hipHostMalloc(void** ptr, size_t sizeBytes, unsigned int flags) { LockedAccessor_DeviceCrit_t crit(device->criticalData()); if (crit->peerCnt()) { - hsa_amd_agents_allow_access(crit->peerCnt(), crit->peerAgents(), NULL, *ptr); + hsa_status_t hsa_status = hsa_amd_agents_allow_access(crit->peerCnt(), crit->peerAgents(), NULL, *ptr); + if (hsa_status != HSA_STATUS_SUCCESS) { + hip_status = hipErrorMemoryAllocation; + } } } } diff --git a/projects/hip/src/hip_peer.cpp b/projects/hip/src/hip_peer.cpp index 2e52f8e586..ad9cf14634 100644 --- a/projects/hip/src/hip_peer.cpp +++ b/projects/hip/src/hip_peer.cpp @@ -31,26 +31,23 @@ hipError_t hipDeviceCanAccessPeer (int* canAccessPeer, int deviceId, int peerDe hipError_t err = hipSuccess; -#if USE_PEER_TO_PEER auto thisDevice = ihipGetDevice(deviceId); auto peerDevice = ihipGetDevice(peerDeviceId); if ((thisDevice != NULL) && (peerDevice != NULL)) { #if USE_PEER_TO_PEER>=2 *canAccessPeer = peerDevice->_acc.get_is_peer(thisDevice->_acc); + printf ("canAccessPeer=%d\n", *canAccessPeer); #else *canAccessPeer = 0; #endif } else { - *canAccessPeer = false; + *canAccessPeer = 0; err = hipErrorInvalidDevice; } -#else - *canAccessPeer = false; -#endif return ihipLogStatus(err); } From 772983e547d00128fcb9056fa94ff782e1970281 Mon Sep 17 00:00:00 2001 From: Ben Sander Date: Sat, 9 Apr 2016 04:35:06 -0500 Subject: [PATCH 11/40] Remove stray debug msgs, hipInfo don't print self as peer. [ROCm/hip commit: 816de09842f711812ad807a5da8036e0df4d8679] --- projects/hip/samples/1_Utils/hipInfo/hipInfo.cpp | 2 +- projects/hip/src/hip_memory.cpp | 2 -- projects/hip/src/hip_peer.cpp | 1 - 3 files changed, 1 insertion(+), 4 deletions(-) diff --git a/projects/hip/samples/1_Utils/hipInfo/hipInfo.cpp b/projects/hip/samples/1_Utils/hipInfo/hipInfo.cpp index 581194f624..9151d5058e 100644 --- a/projects/hip/samples/1_Utils/hipInfo/hipInfo.cpp +++ b/projects/hip/samples/1_Utils/hipInfo/hipInfo.cpp @@ -127,7 +127,7 @@ void printDeviceProp (int deviceId) for (int i=0; i_acc, &deviceMemSize, &hostMemSize, &userMemSize); - printf ("deviceMemSize=%zu\n", deviceMemSize); - *free = hipDevice->_props.totalGlobalMem - deviceMemSize; } diff --git a/projects/hip/src/hip_peer.cpp b/projects/hip/src/hip_peer.cpp index ad9cf14634..aeb9c7fd49 100644 --- a/projects/hip/src/hip_peer.cpp +++ b/projects/hip/src/hip_peer.cpp @@ -37,7 +37,6 @@ hipError_t hipDeviceCanAccessPeer (int* canAccessPeer, int deviceId, int peerDe if ((thisDevice != NULL) && (peerDevice != NULL)) { #if USE_PEER_TO_PEER>=2 *canAccessPeer = peerDevice->_acc.get_is_peer(thisDevice->_acc); - printf ("canAccessPeer=%d\n", *canAccessPeer); #else *canAccessPeer = 0; #endif From 09f4fc418862e53ef016e7350898e042a25800c6 Mon Sep 17 00:00:00 2001 From: Ben Sander Date: Sat, 9 Apr 2016 04:47:07 -0500 Subject: [PATCH 12/40] Improve P2P test. Add option to select which device does the copy. [ROCm/hip commit: 466ab79f1600907c6b10795732c04c4ce280ca74] --- .../hip/tests/src/hipPeerToPeer_simple.cpp | 41 ++++++++++++++----- 1 file changed, 30 insertions(+), 11 deletions(-) diff --git a/projects/hip/tests/src/hipPeerToPeer_simple.cpp b/projects/hip/tests/src/hipPeerToPeer_simple.cpp index ca2501438d..aa305f92fa 100644 --- a/projects/hip/tests/src/hipPeerToPeer_simple.cpp +++ b/projects/hip/tests/src/hipPeerToPeer_simple.cpp @@ -26,11 +26,26 @@ THE SOFTWARE. #include "hip_runtime.h" #include "test_common.h" +bool p_memcpyWithPeer = false; + +void parseMyArguments(int argc, char *argv[]) +{ + int more_argc = HipTest::parseStandardArguments(argc, argv, false); + // parse args for this test: + for (int i = 1; i < more_argc; i++) { + const char *arg = argv[i]; + + if (!strcmp(arg, "--memcpyWithPeer")) { + p_memcpyWithPeer = true; + } else { + failed("Bad argument '%s'", arg); + } + }; +}; int main(int argc, char *argv[]) { - - HipTest::parseStandardArguments(argc, argv, true); + parseMyArguments(argc, argv); int deviceCnt; @@ -58,27 +73,31 @@ int main(int argc, char *argv[]) char *A_h; A_h = (char*)malloc(Nbytes); - HIPCHECK (hipSetDevice(peerDevice)); - HIPCHECK (hipMalloc(&A_d1, Nbytes) ); + // allocate and initialize memory on device0 HIPCHECK (hipSetDevice(p_gpuDevice)); HIPCHECK (hipMalloc(&A_d0, Nbytes) ); - - - // Set memory on first device. - HIPCHECK (hipSetDevice(p_gpuDevice)); HIPCHECK ( hipMemset(A_d0, memsetval, Nbytes) ); + // allocate and initialize memory on peer device + HIPCHECK (hipSetDevice(peerDevice)); + HIPCHECK (hipMalloc(&A_d1, Nbytes) ); + HIPCHECK ( hipMemset(A_d1, 0x13, Nbytes) ); + + + // Device0 push to device1, using P2P: - HIPCHECK ( hipMemcpy(A_d1, A_d0, Nbytes, hipMemcpyDefault)); + HIPCHECK (hipSetDevice(p_memcpyWithPeer ? peerDevice : p_gpuDevice)); + HIPCHECK (hipMemcpy(A_d1, A_d0, Nbytes, hipMemcpyDefault)); // Copy data back to host: - HIPCHECK ( hipMemcpy(A_h, A_d1, Nbytes, hipMemcpyDeviceToHost)); + HIPCHECK (hipSetDevice(peerDevice)); + HIPCHECK (hipMemcpy(A_h, A_d1, Nbytes, hipMemcpyDeviceToHost)); // Check host data: for (int i=0; i Date: Sat, 9 Apr 2016 05:03:08 -0500 Subject: [PATCH 13/40] Improve P2P test. - Ensure proper device is set before each command. - Add command line switches: +bool p_memcpyWithPeer = false; // use the peer device for the P2P copy +bool p_mirrorPeers = false; // in addition to mapping current to peer space, map peer to current space. +int p_peerDevice = -1; // explicly specify which peer to use, else use p_gpuDevice + 1. [ROCm/hip commit: b11bd9bbe3fe4b962329f261d0f5dd2c61812028] --- .../hip/tests/src/hipPeerToPeer_simple.cpp | 37 ++++++++++++++----- projects/hip/tests/src/test_common.cpp | 2 +- 2 files changed, 29 insertions(+), 10 deletions(-) diff --git a/projects/hip/tests/src/hipPeerToPeer_simple.cpp b/projects/hip/tests/src/hipPeerToPeer_simple.cpp index aa305f92fa..4aaa6a452b 100644 --- a/projects/hip/tests/src/hipPeerToPeer_simple.cpp +++ b/projects/hip/tests/src/hipPeerToPeer_simple.cpp @@ -26,7 +26,9 @@ THE SOFTWARE. #include "hip_runtime.h" #include "test_common.h" -bool p_memcpyWithPeer = false; +bool p_memcpyWithPeer = false; // use the peer device for the P2P copy +bool p_mirrorPeers = false; // in addition to mapping current to peer space, map peer to current space. +int p_peerDevice = -1; // explicly specify which peer to use, else use p_gpuDevice + 1. void parseMyArguments(int argc, char *argv[]) { @@ -37,12 +39,19 @@ void parseMyArguments(int argc, char *argv[]) if (!strcmp(arg, "--memcpyWithPeer")) { p_memcpyWithPeer = true; + } else if (!strcmp(arg, "--mirrorPeers")) { + p_mirrorPeers = true; + } else if (!strcmp(arg, "--peerDevice")) { + if (++i >= argc || !HipTest::parseInt(argv[i], &p_peerDevice)) { + failed("Bad peerDevice argument"); + } } else { failed("Bad argument '%s'", arg); } }; }; + int main(int argc, char *argv[]) { parseMyArguments(argc, argv); @@ -50,23 +59,33 @@ int main(int argc, char *argv[]) int deviceCnt; HIPCHECK(hipGetDeviceCount(&deviceCnt)); - HIPCHECK(hipSetDevice(p_gpuDevice)); - int peerDevice = ((p_gpuDevice + 1) % deviceCnt); + int currentDevice = p_gpuDevice; + int peerDevice = (p_peerDevice == -1) ? ((currentDevice + 1) % deviceCnt) : p_peerDevice; - printf ("N=%zu device=%d peerDevice=%d (%d devices total)\n", N, p_gpuDevice, peerDevice, deviceCnt); + printf ("N=%zu device=%d peerDevice=%d (%d devices total)\n", N, currentDevice, peerDevice, deviceCnt); // Must be on a multi-gpu system: - assert (p_gpuDevice != peerDevice); + assert (currentDevice != peerDevice); int canAccessPeer; - HIPCHECK(hipDeviceCanAccessPeer(&canAccessPeer, p_gpuDevice, peerDevice)); - printf ("dev#%d canAccessPeer:#%d=%d\n", p_gpuDevice, peerDevice, canAccessPeer); + HIPCHECK(hipDeviceCanAccessPeer(&canAccessPeer, currentDevice, peerDevice)); + printf ("dev#%d canAccessPeer:#%d=%d\n", currentDevice, peerDevice, canAccessPeer); assert(canAccessPeer); + HIPCHECK(hipSetDevice(currentDevice)); HIPCHECK(hipDeviceEnablePeerAccess(peerDevice, 0)); + if (p_mirrorPeers) { + int canAccessPeer; + HIPCHECK(hipDeviceCanAccessPeer(&canAccessPeer, peerDevice, currentDevice)); + assert(canAccessPeer); + + HIPCHECK(hipSetDevice(peerDevice)); + HIPCHECK(hipDeviceEnablePeerAccess(currentDevice, 0)); + } + size_t Nbytes = N*sizeof(char); char *A_d0, *A_d1; @@ -75,7 +94,7 @@ int main(int argc, char *argv[]) A_h = (char*)malloc(Nbytes); // allocate and initialize memory on device0 - HIPCHECK (hipSetDevice(p_gpuDevice)); + HIPCHECK (hipSetDevice(currentDevice)); HIPCHECK (hipMalloc(&A_d0, Nbytes) ); HIPCHECK ( hipMemset(A_d0, memsetval, Nbytes) ); @@ -87,7 +106,7 @@ int main(int argc, char *argv[]) // Device0 push to device1, using P2P: - HIPCHECK (hipSetDevice(p_memcpyWithPeer ? peerDevice : p_gpuDevice)); + HIPCHECK (hipSetDevice(p_memcpyWithPeer ? peerDevice : currentDevice)); HIPCHECK (hipMemcpy(A_d1, A_d0, Nbytes, hipMemcpyDefault)); // Copy data back to host: diff --git a/projects/hip/tests/src/test_common.cpp b/projects/hip/tests/src/test_common.cpp index 332c2856d3..35e3d6d3f2 100644 --- a/projects/hip/tests/src/test_common.cpp +++ b/projects/hip/tests/src/test_common.cpp @@ -111,7 +111,7 @@ int parseStandardArguments(int argc, char *argv[], bool failOnUndefinedArg) failed("Bad iterations argument"); } - } else if (!strcmp(arg, "--gpu") || (!strcmp(arg, "-g"))) { + } else if (!strcmp(arg, "--gpu") || (!strcmp(arg, "-gpuDevice")) || (!strcmp(arg, "-g"))) { if (++i >= argc || !HipTest::parseInt(argv[i], &p_gpuDevice)) { failed("Bad gpuDevice argument"); } From 56d684e2fe0d4baa113cbb8389f80de12a0c9284 Mon Sep 17 00:00:00 2001 From: Ben Sander Date: Wed, 6 Apr 2016 11:28:23 -0500 Subject: [PATCH 14/40] Checkpoint initial peer2peer implementation. [ROCm/hip commit: 69f2469cbb8082dce2da16171e1e7cf1e6acab29] --- projects/hip/include/hcc_detail/hip_hcc.h | 27 ++++++++-- .../hip/include/hcc_detail/hip_runtime_api.h | 10 ++-- projects/hip/src/hip_hcc.cpp | 40 ++++++++++++++- projects/hip/src/hip_peer.cpp | 51 ++++++++++++++++--- 4 files changed, 111 insertions(+), 17 deletions(-) diff --git a/projects/hip/include/hcc_detail/hip_hcc.h b/projects/hip/include/hcc_detail/hip_hcc.h index 42dd3b6df5..5c8aaf5659 100644 --- a/projects/hip/include/hcc_detail/hip_hcc.h +++ b/projects/hip/include/hcc_detail/hip_hcc.h @@ -494,11 +494,23 @@ struct ihipEvent_t { // will lock the mutex on construction and unlock on destruction. // // MUTEX_TYPE is template argument so can easily convert to FakeMutex for performance or stress testing. -template +template class ihipDeviceCriticalBase_t : LockedBase { public: - ihipDeviceCriticalBase_t() : _stream_id(0) {}; + ihipDeviceCriticalBase_t() : _stream_id(0), _peerAgents(nullptr) {}; + + void init(unsigned deviceCnt) { + assert(_peerAgents == nullptr); + _peerAgents = new hsa_agent_t[deviceCnt]; + }; + + ~ihipDeviceCriticalBase_t() { + if (_peerAgents != nullptr) { + delete _peerAgents; + _peerAgents = nullptr; + } + } friend class LockedAccessor; std::list &streams() { return _streams; }; @@ -507,10 +519,19 @@ public: // "Allocate" a stream ID: ihipStream_t::SeqNum_t incStreamId() { return _stream_id++; }; + void recomputePeerAgents(); + void addPeer(ihipDevice_t *peer); + void removePeer(ihipDevice_t *peer); + private: std::list _streams; // streams associated with this device. ihipStream_t::SeqNum_t _stream_id; + + // These reflect the currently Enabled set of peers for this GPU: + std::list _peers; // list of enabled peer devices. + uint32_t _peerCnt; // number of enabled peers + hsa_agent_t *_peerAgents; // efficient packed array of enabled agents (to use for allocations.) }; // Note Mutex selected based on DeviceMutex @@ -530,7 +551,7 @@ class ihipDevice_t { public: // Functions: ihipDevice_t() {}; // note: calls constructor for _criticalData - void init(unsigned device_index, hc::accelerator &acc, unsigned flags); + void init(unsigned device_index, unsigned deviceCnt, hc::accelerator &acc, unsigned flags); ~ihipDevice_t(); void locked_addStream(ihipStream_t *s); diff --git a/projects/hip/include/hcc_detail/hip_runtime_api.h b/projects/hip/include/hcc_detail/hip_runtime_api.h index b5fdb312a4..56b3560427 100644 --- a/projects/hip/include/hcc_detail/hip_runtime_api.h +++ b/projects/hip/include/hcc_detail/hip_runtime_api.h @@ -908,12 +908,12 @@ hipError_t hipMemGetInfo (size_t * free, size_t * total) ; * Returns "1" in @p canAccessPeer if the specified @p device is capable * of directly accessing memory physically located on peerDevice , or "0" if not. */ -hipError_t hipDeviceCanAccessPeer ( int* canAccessPeer, int device, int peerDevice ); +hipError_t hipDeviceCanAccessPeer (int* canAccessPeer, int deviceId, int peerDeviceId); /** - * @brief Disables registering memory on peerDevice for direct access from the current device. + * @brief Disable registering memory on peerDevice for direct access from the current device. * * If there are any allocations on peerDevice which were registered in the current device using hipPeerRegister() then these allocations will be automatically unregistered. * Returns hipErrorPeerAccessNotEnabled if direct access to memory on peerDevice has not yet been enabled from the current device. @@ -922,10 +922,10 @@ hipError_t hipDeviceCanAccessPeer ( int* canAccessPeer, int device, int peerDe * TODO:cudaErrorPeerAccessNotEnabled and cudaErrorInvalidDevice error not supported in HIP, return hipErrorUnknown * Returns #hipSuccess, #hipErrorUnknown */ -hipError_t hipDeviceDisablePeerAccess ( int peerDevice ); +hipError_t hipDeviceDisablePeerAccess (int peerDeviceId); /** - * @brief Enables registering memory on peerDevice for direct access from the current device. + * @brief Enable registering memory on peerDevice for direct access from the current device. * * @param [in] peerDevice * @param [in] flags @@ -933,7 +933,7 @@ hipError_t hipDeviceDisablePeerAccess ( int peerDevice ); * TODO:cudaErrorInvalidDevice error not supported in HIP, return hipErrorUnknown * Returns #hipSuccess, #hipErrorInvalidDevice, #hipErrorInvalidValue, #hipErrorUnknown */ -hipError_t hipDeviceEnablePeerAccess ( int peerDevice, unsigned int flags ); +hipError_t hipDeviceEnablePeerAccess (int peerDeviceId, unsigned int flags); /** * @brief Copies memory from one device to memory on another device. diff --git a/projects/hip/src/hip_hcc.cpp b/projects/hip/src/hip_hcc.cpp index 3c087467d7..8e87b96d70 100644 --- a/projects/hip/src/hip_hcc.cpp +++ b/projects/hip/src/hip_hcc.cpp @@ -198,6 +198,40 @@ void ihipStream_t::locked_wait(bool assertQueueEmpty) }; +// Recompute the peercnt and the packed _peerAgents whenever a peer is added or deleted. +// The packed _peerAgents can efficiently be used on each memory allocation. +template<> +void ihipDeviceCriticalBase_t::recomputePeerAgents() +{ + _peerCnt = 0; + std::for_each (_peers.begin(), _peers.end(), [this](ihipDevice_t* device) { + _peerAgents[_peerCnt++] = device->_hsa_agent; + }); +} + + +template<> +void ihipDeviceCriticalBase_t::addPeer(ihipDevice_t *peer) +{ + auto match = std::find(_peers.begin(), _peers.end(), peer); + if (match != std::end(_peers)) { + _peers.push_back(peer); + recomputePeerAgents(); + return; + } + + // If we get here - peer was already on list, silently ignore. +} + + +template<> +void ihipDeviceCriticalBase_t::removePeer(ihipDevice_t *peer) +{ + _peers.remove(peer); + recomputePeerAgents(); +} + +//------------------------------------------------------------------------------------------------- //--- ihipDevice_t * ihipStream_t::getDevice() const @@ -408,7 +442,7 @@ void ihipDevice_t::locked_reset() //--- -void ihipDevice_t::init(unsigned device_index, hc::accelerator &acc, unsigned flags) +void ihipDevice_t::init(unsigned device_index, unsigned deviceCnt, hc::accelerator &acc, unsigned flags) { _device_index = device_index; _device_flags = flags; @@ -431,6 +465,8 @@ void ihipDevice_t::init(unsigned device_index, hc::accelerator &acc, unsigned fl _default_stream = new ihipStream_t(device_index, acc.get_default_view(), hipStreamDefault); locked_addStream(_default_stream); + _criticalData.init(deviceCnt); + tprintf(DB_SYNC, "created device with default_stream=%p\n", _default_stream); hsa_region_t *pinnedHostRegion; @@ -904,7 +940,7 @@ void ihipInit() //If device is not in visible devices list, ignore continue; } - g_devices[g_deviceCnt].init(g_deviceCnt, accs[i], hipDeviceMapHost); + g_devices[g_deviceCnt].init(g_deviceCnt, deviceCnt, accs[i], hipDeviceMapHost); g_deviceCnt++; } } diff --git a/projects/hip/src/hip_peer.cpp b/projects/hip/src/hip_peer.cpp index d4de6053de..c4a8a1bcf3 100644 --- a/projects/hip/src/hip_peer.cpp +++ b/projects/hip/src/hip_peer.cpp @@ -20,22 +20,44 @@ THE SOFTWARE. #include "hip_runtime.h" #include "hcc_detail/hip_hcc.h" #include "hcc_detail/trace_helper.h" +#define USE_PEER_TO_PEER 1 /** * @warning HCC returns 0 in *canAccessPeer ; Need to update this function when RT supports P2P */ //--- -hipError_t hipDeviceCanAccessPeer ( int* canAccessPeer, int device, int peerDevice ) +hipError_t hipDeviceCanAccessPeer ( int* canAccessPeer, int deviceId, int peerDeviceId) { - HIP_INIT_API(canAccessPeer, device, peerDevice); + HIP_INIT_API(canAccessPeer, deviceId, peerDeviceId); + hipError_t err = hipSuccess; + +#if USE_PEER_TO_PEER + auto device = ihipGetDevice(deviceId); + auto peerDevice = ihipGetDevice(peerDeviceId); + + if ((device != NULL) && (peerDevice != NULL)) { +#if USE_PEER_TO_PEER==2 + *canAccessPeer = peerDevice->_acc.is_peer(device->_acc); +#else + *canAccessPeer = 0; +#endif + + } else { + *canAccessPeer = false; + err = hipErrorInvalidDevice; + } + + +#else *canAccessPeer = false; - return ihipLogStatus(hipSuccess); +#endif + return ihipLogStatus(err); } /** - * @warning Need to update this function when RT supports P2P + * warning Need to update this function when RT supports P2P */ //--- hipError_t hipDeviceDisablePeerAccess ( int peerDevice ) @@ -51,11 +73,26 @@ hipError_t hipDeviceDisablePeerAccess ( int peerDevice ) * @warning Need to update this function when RT supports P2P */ //--- -hipError_t hipDeviceEnablePeerAccess ( int peerDevice, unsigned int flags ) + // Enable registering memory on peerDevice for direct access from the current device. +hipError_t hipDeviceEnablePeerAccess (int peerDeviceId, unsigned int flags) { std::call_once(hip_initialized, ihipInit); - // TODO-p2p - return ihipLogStatus(hipSuccess); + + hipError_t err = hipSuccess; +#if USE_PEER_TO_PEER + if (flags != 0) { + err = hipErrorInvalidValue; + } else { + auto peerDevice = ihipGetDevice(peerDeviceId); + if (peerDevice != NULL) { + + } else { + err = hipErrorInvalidDevice; + } + } +#endif + + return ihipLogStatus(err); } From ac11c83d76601b82670f83cc906cee94cce033c6 Mon Sep 17 00:00:00 2001 From: Ben Sander Date: Wed, 6 Apr 2016 15:49:32 -0500 Subject: [PATCH 15/40] P2P checkpoint. Maintain enabled peer tables for each device. [ROCm/hip commit: f2aa470f7fd7e976da69c42ddd1d5b3d4388a143] --- projects/hip/include/hcc_detail/hip_hcc.h | 12 +++- .../hip/include/hcc_detail/hip_runtime_api.h | 18 +++--- projects/hip/include/hip_runtime_api.h | 4 +- projects/hip/src/hip_error.cpp | 2 +- projects/hip/src/hip_hcc.cpp | 52 +++++++++------- projects/hip/src/hip_peer.cpp | 60 +++++++++++++------ 6 files changed, 96 insertions(+), 52 deletions(-) diff --git a/projects/hip/include/hcc_detail/hip_hcc.h b/projects/hip/include/hcc_detail/hip_hcc.h index 5c8aaf5659..05fa2818b6 100644 --- a/projects/hip/include/hcc_detail/hip_hcc.h +++ b/projects/hip/include/hcc_detail/hip_hcc.h @@ -519,9 +519,11 @@ public: // "Allocate" a stream ID: ihipStream_t::SeqNum_t incStreamId() { return _stream_id++; }; - void recomputePeerAgents(); - void addPeer(ihipDevice_t *peer); - void removePeer(ihipDevice_t *peer); + bool addPeer(ihipDevice_t *peer); + bool removePeer(ihipDevice_t *peer); + + uint32_t peerCnt() const { return _peerCnt; }; + uint32_t peerAgents() const { return _peerAgents; }; private: @@ -532,6 +534,8 @@ private: std::list _peers; // list of enabled peer devices. uint32_t _peerCnt; // number of enabled peers hsa_agent_t *_peerAgents; // efficient packed array of enabled agents (to use for allocations.) +private: + void recomputePeerAgents(); }; // Note Mutex selected based on DeviceMutex @@ -560,6 +564,8 @@ public: // Functions: void locked_waitAllStreams(); void locked_syncDefaultStream(bool waitOnSelf); + ihipDeviceCritical_t &criticalData() { return _criticalData; }; // TODO, move private. Fix P2P. + public: // Data, set at initialization: unsigned _device_index; // index into g_devices. diff --git a/projects/hip/include/hcc_detail/hip_runtime_api.h b/projects/hip/include/hcc_detail/hip_runtime_api.h index 56b3560427..f33895cd98 100644 --- a/projects/hip/include/hcc_detail/hip_runtime_api.h +++ b/projects/hip/include/hcc_detail/hip_runtime_api.h @@ -913,21 +913,21 @@ hipError_t hipDeviceCanAccessPeer (int* canAccessPeer, int deviceId, int peerDev /** - * @brief Disable registering memory on peerDevice for direct access from the current device. + * @brief Disable direct access from current device's virtual address space to memory allocations physically located on a peer device. * - * If there are any allocations on peerDevice which were registered in the current device using hipPeerRegister() then these allocations will be automatically unregistered. * Returns hipErrorPeerAccessNotEnabled if direct access to memory on peerDevice has not yet been enabled from the current device. * - * @param [in] peerDevice + * @param [in] peerDeviceId + * * TODO:cudaErrorPeerAccessNotEnabled and cudaErrorInvalidDevice error not supported in HIP, return hipErrorUnknown * Returns #hipSuccess, #hipErrorUnknown */ hipError_t hipDeviceDisablePeerAccess (int peerDeviceId); /** - * @brief Enable registering memory on peerDevice for direct access from the current device. + * @brief Enable direct access from current device's virtual address space to memory allocations physically located on a peer device. * - * @param [in] peerDevice + * @param [in] peerDeviceId * @param [in] flags * * TODO:cudaErrorInvalidDevice error not supported in HIP, return hipErrorUnknown @@ -939,14 +939,14 @@ hipError_t hipDeviceEnablePeerAccess (int peerDeviceId, unsigned int flags); * @brief Copies memory from one device to memory on another device. * * @param [out] dst - Destination device pointer. - * @param [in] dstDevice - Destination device + * @param [in] dstDeviceId - Destination device * @param [in] src - Source device pointer - * @param [in] srcDevice - Source device + * @param [in] srcDeviceId - Source device * @param [in] sizeBytes - Size of memory copy in bytes * * Returns #hipSuccess, #hipErrorInvalidValue, #hipErrorInvalidDevice */ -hipError_t hipMemcpyPeer ( void* dst, int dstDevice, const void* src, int srcDevice, size_t sizeBytes ); +hipError_t hipMemcpyPeer (void* dst, int dstDeviceId, const void* src, int srcDeviceId, size_t sizeBytes); /** * @brief Copies memory from one device to memory on another device. @@ -961,7 +961,7 @@ hipError_t hipMemcpyPeer ( void* dst, int dstDevice, const void* src, int srcD * Returns #hipSuccess, #hipErrorInvalidValue, #hipErrorInvalidDevice */ #if __cplusplus -hipError_t hipMemcpyPeerAsync ( void* dst, int dstDevice, const void* src, int srcDevice, size_t sizeBytes, hipStream_t stream=0 ); +hipError_t hipMemcpyPeerAsync ( void* dst, int dstDeviceId, const void* src, int srcDevice, size_t sizeBytes, hipStream_t stream=0 ); #else hipError_t hipMemcpyPeerAsync(void* dst, int dstDevice, const void* src, int srcDevice, size_t sizeBytes, hipStream_t stream); #endif diff --git a/projects/hip/include/hip_runtime_api.h b/projects/hip/include/hip_runtime_api.h index bef881d066..ca49ab5d13 100644 --- a/projects/hip/include/hip_runtime_api.h +++ b/projects/hip/include/hip_runtime_api.h @@ -138,7 +138,7 @@ typedef struct hipPointerAttribute_t { * @enum * @ingroup Enumerations */ -// Developer note - when updating these, update the hipErrorName and hipErrorString functions +// Developer note - when updating these, update the hipErrorName and hipErrorString functions in NVCC and HCC paths typedef enum hipError_t { hipSuccess = 0 ///< Successful completion. ,hipErrorMemoryAllocation ///< Memory allocation error. @@ -155,6 +155,8 @@ typedef enum hipError_t { ,hipErrorNoDevice ///< Call to hipGetDeviceCount returned 0 devices ,hipErrorNotReady ///< Indicates that asynchronous operations enqueued earlier are not ready. This is not actually an error, but is used to distinguish from hipSuccess (which indicates completion). APIs that return this error include hipEventQuery and hipStreamQuery. ,hipErrorUnknown ///< Unknown error. + ,hipErrorPeerAccessNotEnabled ///< Peer access was never enabled from the current device. + ,hipErrorPeerAccessAlreadyEnabled ///< Peer access was already enabled from the current device. ,hipErrorRuntimeMemory ///< HSA runtime memory call returned error. Typically not seen in production systems. ,hipErrorRuntimeOther ///< HSA runtime call other than memory returned error. Typically not seen in production systems. ,hipErrorTbd ///< Marker that more error codes are needed. diff --git a/projects/hip/src/hip_error.cpp b/projects/hip/src/hip_error.cpp index 655ab3b8c7..7c723b1aa2 100644 --- a/projects/hip/src/hip_error.cpp +++ b/projects/hip/src/hip_error.cpp @@ -65,7 +65,7 @@ const char *hipGetErrorName(hipError_t hip_error) //--- const char *hipGetErrorString(hipError_t hip_error) { - std::call_once(hip_initialized, ihipInit); + HIP_INIT_API(hip_error); // TODO - return a message explaining the error. // TODO - This should be set up to return the same string reported in the the doxygen comments, somehow. diff --git a/projects/hip/src/hip_hcc.cpp b/projects/hip/src/hip_hcc.cpp index 8e87b96d70..0cae125509 100644 --- a/projects/hip/src/hip_hcc.cpp +++ b/projects/hip/src/hip_hcc.cpp @@ -211,24 +211,31 @@ void ihipDeviceCriticalBase_t::recomputePeerAgents() template<> -void ihipDeviceCriticalBase_t::addPeer(ihipDevice_t *peer) +bool ihipDeviceCriticalBase_t::addPeer(ihipDevice_t *peer) { auto match = std::find(_peers.begin(), _peers.end(), peer); if (match != std::end(_peers)) { _peers.push_back(peer); recomputePeerAgents(); - return; + return true; } // If we get here - peer was already on list, silently ignore. + return false; } template<> -void ihipDeviceCriticalBase_t::removePeer(ihipDevice_t *peer) +bool ihipDeviceCriticalBase_t::removePeer(ihipDevice_t *peer) { - _peers.remove(peer); - recomputePeerAgents(); + auto match = std::find(_peers.begin(), _peers.end(), peer); + if (match != std::end(_peers)) { + _peers.remove(peer); + recomputePeerAgents(); + return true; + } else { + return false; + } } //------------------------------------------------------------------------------------------------- @@ -1065,22 +1072,25 @@ void ihipPostLaunchKernel(hipStream_t stream, hc::completion_future &kernelFutur const char *ihipErrorString(hipError_t hip_error) { switch (hip_error) { - case hipSuccess : return "hipSuccess"; - case hipErrorMemoryAllocation : return "hipErrorMemoryAllocation"; - case hipErrorMemoryFree : return "hipErrorMemoryFree"; - case hipErrorUnknownSymbol : return "hipErrorUnknownSymbol"; - case hipErrorOutOfResources : return "hipErrorOutOfResources"; - case hipErrorInvalidValue : return "hipErrorInvalidValue"; - case hipErrorInvalidResourceHandle : return "hipErrorInvalidResourceHandle"; - case hipErrorInvalidDevice : return "hipErrorInvalidDevice"; - case hipErrorInvalidMemcpyDirection : return "hipErrorInvalidMemcpyDirection"; - case hipErrorNoDevice : return "hipErrorNoDevice"; - case hipErrorNotReady : return "hipErrorNotReady"; - case hipErrorRuntimeMemory : return "hipErrorRuntimeMemory"; - case hipErrorRuntimeOther : return "hipErrorRuntimeOther"; - case hipErrorUnknown : return "hipErrorUnknown"; - case hipErrorTbd : return "hipErrorTbd"; - default : return "hipErrorUnknown"; + case hipSuccess : return "hipSuccess"; + case hipErrorMemoryAllocation : return "hipErrorMemoryAllocation"; + case hipErrorMemoryFree : return "hipErrorMemoryFree"; + case hipErrorUnknownSymbol : return "hipErrorUnknownSymbol"; + case hipErrorOutOfResources : return "hipErrorOutOfResources"; + case hipErrorInvalidValue : return "hipErrorInvalidValue"; + case hipErrorInvalidResourceHandle : return "hipErrorInvalidResourceHandle"; + case hipErrorInvalidDevice : return "hipErrorInvalidDevice"; + case hipErrorInvalidMemcpyDirection : return "hipErrorInvalidMemcpyDirection"; + case hipErrorNoDevice : return "hipErrorNoDevice"; + case hipErrorNotReady : return "hipErrorNotReady"; + case hipErrorPeerAccessNotEnabled : return "hipErrorPeerAccessNotEnabled"; + case hipErrorPeerAccessAlreadyEnabled : return "hipErrorPeerAccessAlreadyEnabled"; + + case hipErrorRuntimeMemory : return "hipErrorRuntimeMemory"; + case hipErrorRuntimeOther : return "hipErrorRuntimeOther"; + case hipErrorUnknown : return "hipErrorUnknown"; + case hipErrorTbd : return "hipErrorTbd"; + default : return "hipErrorUnknown"; }; }; diff --git a/projects/hip/src/hip_peer.cpp b/projects/hip/src/hip_peer.cpp index c4a8a1bcf3..de86688ede 100644 --- a/projects/hip/src/hip_peer.cpp +++ b/projects/hip/src/hip_peer.cpp @@ -26,7 +26,7 @@ THE SOFTWARE. * @warning HCC returns 0 in *canAccessPeer ; Need to update this function when RT supports P2P */ //--- -hipError_t hipDeviceCanAccessPeer ( int* canAccessPeer, int deviceId, int peerDeviceId) +hipError_t hipDeviceCanAccessPeer (int* canAccessPeer, int deviceId, int peerDeviceId) { HIP_INIT_API(canAccessPeer, deviceId, peerDeviceId); @@ -38,7 +38,7 @@ hipError_t hipDeviceCanAccessPeer ( int* canAccessPeer, int deviceId, int peerD if ((device != NULL) && (peerDevice != NULL)) { #if USE_PEER_TO_PEER==2 - *canAccessPeer = peerDevice->_acc.is_peer(device->_acc); + *canAccessPeer = peerDevice->_acc.get_is_peer(device->_acc); #else *canAccessPeer = 0; #endif @@ -56,16 +56,32 @@ hipError_t hipDeviceCanAccessPeer ( int* canAccessPeer, int deviceId, int peerD } -/** - * warning Need to update this function when RT supports P2P - */ //--- -hipError_t hipDeviceDisablePeerAccess ( int peerDevice ) +hipError_t hipDeviceDisablePeerAccess (int peerDeviceId) { - HIP_INIT_API(peerDevice); + HIP_INIT_API(peerDeviceId); - // TODO-p2p - return ihipLogStatus(hipSuccess); + hipError_t err = hipSuccess; +#if USE_PEER_TO_PEER + + auto thisDevice = ihipGetTlsDefaultDevice(); + auto peerDevice = ihipGetDevice(peerDeviceId); + if ((thisDevice != NULL) && (peerDevice != NULL)) { + LockedAccessor_DeviceCrit_t crit(thisDevice->criticalData()); + bool changed = crit->removePeer(peerDevice); + if (changed) { +#if USE_PEER_TO_PEER==2 + am_memtracker_update_peers(device->_acc, crit->peerCnt(), crit->peerAgents()); +#endif + } else { + err = hipErrorPeerAccessNotEnabled; // never enabled P2P access. + } + } else { + err = hipErrorInvalidDevice; + } +#endif + + return ihipLogStatus(err); }; @@ -74,18 +90,27 @@ hipError_t hipDeviceDisablePeerAccess ( int peerDevice ) */ //--- // Enable registering memory on peerDevice for direct access from the current device. -hipError_t hipDeviceEnablePeerAccess (int peerDeviceId, unsigned int flags) +hipError_t hipDeviceEnablePeerAccess (int peerDeviceId, unsigned int flags) { - std::call_once(hip_initialized, ihipInit); + HIP_INIT_API(peerDeviceId, flags); hipError_t err = hipSuccess; #if USE_PEER_TO_PEER if (flags != 0) { err = hipErrorInvalidValue; } else { + auto thisDevice = ihipGetTlsDefaultDevice(); auto peerDevice = ihipGetDevice(peerDeviceId); - if (peerDevice != NULL) { - + if ((thisDevice != NULL) && (peerDevice != NULL)) { + LockedAccessor_DeviceCrit_t crit(thisDevice->criticalData()); + bool isNewPeer = crit->addPeer(peerDevice); + if (isNewPeer) { +#if USE_PEER_TO_PEER==2 + am_memtracker_update_peers(device->_acc, crit->peerCnt(), crit->peerAgents()); +#endif + } else { + err = hipErrorPeerAccessAlreadyEnabled; + } } else { err = hipErrorInvalidDevice; } @@ -97,9 +122,10 @@ hipError_t hipDeviceEnablePeerAccess (int peerDeviceId, unsigned int flags) //--- -hipError_t hipMemcpyPeer ( void* dst, int dstDevice, const void* src, int srcDevice, size_t sizeBytes ) +hipError_t hipMemcpyPeer (void* dst, int dstDevice, const void* src, int srcDevice, size_t sizeBytes) { - std::call_once(hip_initialized, ihipInit); + HIP_INIT_API(dst, dstDevice, src, srcDevice, sizeBytes); + // HCC has a unified memory architecture so device specifiers are not required. return hipMemcpy(dst, src, sizeBytes, hipMemcpyDefault); }; @@ -109,9 +135,9 @@ hipError_t hipMemcpyPeer ( void* dst, int dstDevice, const void* src, int srcD * @bug This function uses a synchronous copy */ //--- -hipError_t hipMemcpyPeerAsync ( void* dst, int dstDevice, const void* src, int srcDevice, size_t sizeBytes, hipStream_t stream ) +hipError_t hipMemcpyPeerAsync (void* dst, int dstDevice, const void* src, int srcDevice, size_t sizeBytes, hipStream_t stream) { - std::call_once(hip_initialized, ihipInit); + HIP_INIT_API(dst, dstDevice, src, srcDevice, sizeBytes, stream); // HCC has a unified memory architecture so device specifiers are not required. return hipMemcpyAsync(dst, src, sizeBytes, hipMemcpyDefault, stream); }; From 06e2dbd128177cf87ca2693c2cc5b49e4af69acd Mon Sep 17 00:00:00 2001 From: Ben Sander Date: Wed, 6 Apr 2016 16:44:31 -0500 Subject: [PATCH 16/40] For P2P, use the peer list when allocating Device memory or pinned host. Each new allocation is automatically mapped into the address space of all enabled peers. [ROCm/hip commit: 813b0638884e484e2e7a2a28c349932c29e37593] --- projects/hip/CONTRIBUTING.md | 4 +- projects/hip/include/hcc_detail/hip_hcc.h | 6 ++- .../hip/include/hcc_detail/hip_runtime_api.h | 11 +++-- projects/hip/src/hip_hcc.cpp | 5 +- projects/hip/src/hip_memory.cpp | 47 ++++++++++--------- projects/hip/src/hip_peer.cpp | 24 +++++++--- 6 files changed, 59 insertions(+), 38 deletions(-) diff --git a/projects/hip/CONTRIBUTING.md b/projects/hip/CONTRIBUTING.md index b593cadd64..4d5050ca89 100644 --- a/projects/hip/CONTRIBUTING.md +++ b/projects/hip/CONTRIBUTING.md @@ -55,8 +55,8 @@ When adding a new HIP feature, add a new unit test as well. See [tests/README.md](README.md) for more information. ## Development Flow -The Unit testing environment automatically rebuilds libhip_hcc.a and the tests when a change it made to the HIP source, and this -is a great place to develop new features alongside the associated test. +It is recommended that developers set the flag HIP_BUILD_LOCAL=1 so that the unit testing environment automatically rebuilds libhip_hcc.a and the tests when a change it made to the HIP source. +Directed tests provide a great place to develop new features alongside the associated test. For applications and benchmarks outside the directed test environment, developments should use a two-step development flow: - #1. Compile, link, and install HCC. See [Installation](README.md#Installation) notes. diff --git a/projects/hip/include/hcc_detail/hip_hcc.h b/projects/hip/include/hcc_detail/hip_hcc.h index 05fa2818b6..23c66759f4 100644 --- a/projects/hip/include/hcc_detail/hip_hcc.h +++ b/projects/hip/include/hcc_detail/hip_hcc.h @@ -35,6 +35,10 @@ THE SOFTWARE. //Use the new HCC accelerator_view::copy instead of am_copy #define USE_AV_COPY 0 +// Compile peer-to-peer support. +// 2= use upcoming HCC APIs. +#define USE_PEER_TO_PEER 1 + //#define INLINE static inline //--- @@ -523,7 +527,7 @@ public: bool removePeer(ihipDevice_t *peer); uint32_t peerCnt() const { return _peerCnt; }; - uint32_t peerAgents() const { return _peerAgents; }; + hsa_agent_t *peerAgents() const { return _peerAgents; }; private: diff --git a/projects/hip/include/hcc_detail/hip_runtime_api.h b/projects/hip/include/hcc_detail/hip_runtime_api.h index f33895cd98..2f70ae47c3 100644 --- a/projects/hip/include/hcc_detail/hip_runtime_api.h +++ b/projects/hip/include/hcc_detail/hip_runtime_api.h @@ -919,19 +919,22 @@ hipError_t hipDeviceCanAccessPeer (int* canAccessPeer, int deviceId, int peerDev * * @param [in] peerDeviceId * - * TODO:cudaErrorPeerAccessNotEnabled and cudaErrorInvalidDevice error not supported in HIP, return hipErrorUnknown - * Returns #hipSuccess, #hipErrorUnknown + * Returns #hipSuccess, #hipErrorPeerAccessNotEnabled */ hipError_t hipDeviceDisablePeerAccess (int peerDeviceId); /** * @brief Enable direct access from current device's virtual address space to memory allocations physically located on a peer device. * + * Memory which already allocated on peer device will be mapped into the address space of the current device. In addition, all + * future memory allocations on peerDeviceId will be mapped into the address space of the current device when the memory is allocated. + * The peer memory remains accessible from the current device until a call to hipDeviceDisablePeerAccess or @hipDeviceReset. + * + * * @param [in] peerDeviceId * @param [in] flags * - * TODO:cudaErrorInvalidDevice error not supported in HIP, return hipErrorUnknown - * Returns #hipSuccess, #hipErrorInvalidDevice, #hipErrorInvalidValue, #hipErrorUnknown + * Returns #hipSuccess, #hipErrorInvalidDevice, #hipErrorInvalidValue, #hipErrorPeerAccessAlreadyEnabled */ hipError_t hipDeviceEnablePeerAccess (int peerDeviceId, unsigned int flags); diff --git a/projects/hip/src/hip_hcc.cpp b/projects/hip/src/hip_hcc.cpp index 0cae125509..e3239c4119 100644 --- a/projects/hip/src/hip_hcc.cpp +++ b/projects/hip/src/hip_hcc.cpp @@ -442,9 +442,12 @@ void ihipDevice_t::locked_reset() // Reset and remove streams: crit->streams().clear(); +#if USE_PEER_TO_PEER==2 + // remove peer mappings to this device? Call removePeer on all other devices? +#endif + // Reset and release all memory stored in the tracker: am_memtracker_reset(_acc); - }; diff --git a/projects/hip/src/hip_memory.cpp b/projects/hip/src/hip_memory.cpp index b78eb7af6e..dd7f374cba 100644 --- a/projects/hip/src/hip_memory.cpp +++ b/projects/hip/src/hip_memory.cpp @@ -130,6 +130,12 @@ hipError_t hipMalloc(void** ptr, size_t sizeBytes) hip_status = hipErrorMemoryAllocation; } else { hc::am_memtracker_update(*ptr, device->_device_index, 0); + { + LockedAccessor_DeviceCrit_t crit(device->criticalData()); + if (crit->peerCnt()) { + hsa_amd_agents_allow_access(crit->peerCnt(), crit->peerAgents(), NULL, *ptr); + } + } } } else { hip_status = hipErrorMemoryAllocation; @@ -139,29 +145,6 @@ hipError_t hipMalloc(void** ptr, size_t sizeBytes) } -hipError_t hipMallocHost(void** ptr, size_t sizeBytes) -{ - HIP_INIT_API(ptr, sizeBytes); - - hipError_t hip_status = hipSuccess; - - const unsigned am_flags = amHostPinned; - auto device = ihipGetTlsDefaultDevice(); - - if (device) { - *ptr = hc::am_alloc(sizeBytes, device->_acc, am_flags); - if (sizeBytes && (*ptr == NULL)) { - hip_status = hipErrorMemoryAllocation; - } else { - hc::am_memtracker_update(*ptr, device->_device_index, 0); - } - - tprintf (DB_MEM, " %s: pinned ptr=%p\n", __func__, *ptr); - } - - return ihipLogStatus(hip_status); -} - hipError_t hipHostMalloc(void** ptr, size_t sizeBytes, unsigned int flags) { @@ -186,6 +169,12 @@ hipError_t hipHostMalloc(void** ptr, size_t sizeBytes, unsigned int flags) hip_status = hipErrorMemoryAllocation; }else{ hc::am_memtracker_update(*ptr, device->_device_index, flags); + { + LockedAccessor_DeviceCrit_t crit(device->criticalData()); + if (crit->peerCnt()) { + hsa_amd_agents_allow_access(crit->peerCnt(), crit->peerAgents(), NULL, *ptr); + } + } } tprintf(DB_MEM, " %s: pinned ptr=%p\n", __func__, *ptr); } @@ -194,6 +183,7 @@ hipError_t hipHostMalloc(void** ptr, size_t sizeBytes, unsigned int flags) } +//--- // TODO - remove me, this is deprecated. hipError_t hipHostAlloc(void** ptr, size_t sizeBytes, unsigned int flags) { @@ -201,6 +191,15 @@ hipError_t hipHostAlloc(void** ptr, size_t sizeBytes, unsigned int flags) }; +//--- +// TODO - remove me, this is deprecated. +hipError_t hipMallocHost(void** ptr, size_t sizeBytes) +{ + return hipHostMalloc(ptr, sizeBytes, 0); +} + + +//--- hipError_t hipHostGetFlags(unsigned int* flagsPtr, void* hostPtr) { HIP_INIT_API(flagsPtr, hostPtr); @@ -225,6 +224,8 @@ hipError_t hipHostGetFlags(unsigned int* flagsPtr, void* hostPtr) return ihipLogStatus(hip_status); } + +//--- hipError_t hipHostRegister(void *hostPtr, size_t sizeBytes, unsigned int flags) { HIP_INIT_API(hostPtr, sizeBytes, flags); diff --git a/projects/hip/src/hip_peer.cpp b/projects/hip/src/hip_peer.cpp index de86688ede..eaac47d82f 100644 --- a/projects/hip/src/hip_peer.cpp +++ b/projects/hip/src/hip_peer.cpp @@ -20,7 +20,6 @@ THE SOFTWARE. #include "hip_runtime.h" #include "hcc_detail/hip_hcc.h" #include "hcc_detail/trace_helper.h" -#define USE_PEER_TO_PEER 1 /** * @warning HCC returns 0 in *canAccessPeer ; Need to update this function when RT supports P2P @@ -67,15 +66,26 @@ hipError_t hipDeviceDisablePeerAccess (int peerDeviceId) auto thisDevice = ihipGetTlsDefaultDevice(); auto peerDevice = ihipGetDevice(peerDeviceId); if ((thisDevice != NULL) && (peerDevice != NULL)) { - LockedAccessor_DeviceCrit_t crit(thisDevice->criticalData()); - bool changed = crit->removePeer(peerDevice); - if (changed) { #if USE_PEER_TO_PEER==2 - am_memtracker_update_peers(device->_acc, crit->peerCnt(), crit->peerAgents()); + bool canAccessPeer = peerDevice->_acc.get_is_peer(device->_acc); +#else + bool canAccessPeer = 0; #endif + if (! canAccessPeer) { + err = hipErrorInvalidDevice; // P2P not allowed between these devices. } else { - err = hipErrorPeerAccessNotEnabled; // never enabled P2P access. - } + + + LockedAccessor_DeviceCrit_t crit(thisDevice->criticalData()); + bool changed = crit->removePeer(peerDevice); + if (changed) { +#if USE_PEER_TO_PEER==2 + am_memtracker_update_peers(device->_acc, crit->peerCnt(), crit->peerAgents()); +#endif + } else { + err = hipErrorPeerAccessNotEnabled; // never enabled P2P access. + } + } } else { err = hipErrorInvalidDevice; } From 216c9a0c1681d4abef4e42d8482583ab40b419bb Mon Sep 17 00:00:00 2001 From: Ben Sander Date: Thu, 7 Apr 2016 14:28:42 -0500 Subject: [PATCH 17/40] Use HCC get_is_pool, add USE_PEER_TO_PEER level 2, 3. [ROCm/hip commit: 7a11a2178ddd0d10e89319ca6ed988203040f7f6] --- projects/hip/include/hcc_detail/hip_hcc.h | 5 +++-- projects/hip/src/hip_peer.cpp | 17 +++++++++-------- 2 files changed, 12 insertions(+), 10 deletions(-) diff --git a/projects/hip/include/hcc_detail/hip_hcc.h b/projects/hip/include/hcc_detail/hip_hcc.h index 23c66759f4..1dd8777b1c 100644 --- a/projects/hip/include/hcc_detail/hip_hcc.h +++ b/projects/hip/include/hcc_detail/hip_hcc.h @@ -36,8 +36,9 @@ THE SOFTWARE. #define USE_AV_COPY 0 // Compile peer-to-peer support. -// 2= use upcoming HCC APIs. -#define USE_PEER_TO_PEER 1 +// >= 2 : use HCC hc:accelerator::get_is_peer +// >= 3 : use hc::am_memtracker_update_peers(...) +#define USE_PEER_TO_PEER 2 //#define INLINE static inline diff --git a/projects/hip/src/hip_peer.cpp b/projects/hip/src/hip_peer.cpp index eaac47d82f..2e52f8e586 100644 --- a/projects/hip/src/hip_peer.cpp +++ b/projects/hip/src/hip_peer.cpp @@ -32,12 +32,12 @@ hipError_t hipDeviceCanAccessPeer (int* canAccessPeer, int deviceId, int peerDe hipError_t err = hipSuccess; #if USE_PEER_TO_PEER - auto device = ihipGetDevice(deviceId); + auto thisDevice = ihipGetDevice(deviceId); auto peerDevice = ihipGetDevice(peerDeviceId); - if ((device != NULL) && (peerDevice != NULL)) { -#if USE_PEER_TO_PEER==2 - *canAccessPeer = peerDevice->_acc.get_is_peer(device->_acc); + if ((thisDevice != NULL) && (peerDevice != NULL)) { +#if USE_PEER_TO_PEER>=2 + *canAccessPeer = peerDevice->_acc.get_is_peer(thisDevice->_acc); #else *canAccessPeer = 0; #endif @@ -66,8 +66,8 @@ hipError_t hipDeviceDisablePeerAccess (int peerDeviceId) auto thisDevice = ihipGetTlsDefaultDevice(); auto peerDevice = ihipGetDevice(peerDeviceId); if ((thisDevice != NULL) && (peerDevice != NULL)) { -#if USE_PEER_TO_PEER==2 - bool canAccessPeer = peerDevice->_acc.get_is_peer(device->_acc); +#if USE_PEER_TO_PEER>=2 + bool canAccessPeer = peerDevice->_acc.get_is_peer(thisDevice->_acc); #else bool canAccessPeer = 0; #endif @@ -79,7 +79,8 @@ hipError_t hipDeviceDisablePeerAccess (int peerDeviceId) LockedAccessor_DeviceCrit_t crit(thisDevice->criticalData()); bool changed = crit->removePeer(peerDevice); if (changed) { -#if USE_PEER_TO_PEER==2 +#if USE_PEER_TO_PEER>=3 + // Update the peers for all memory already saved in the tracker: am_memtracker_update_peers(device->_acc, crit->peerCnt(), crit->peerAgents()); #endif } else { @@ -115,7 +116,7 @@ hipError_t hipDeviceEnablePeerAccess (int peerDeviceId, unsigned int flags) LockedAccessor_DeviceCrit_t crit(thisDevice->criticalData()); bool isNewPeer = crit->addPeer(peerDevice); if (isNewPeer) { -#if USE_PEER_TO_PEER==2 +#if USE_PEER_TO_PEER>=3 am_memtracker_update_peers(device->_acc, crit->peerCnt(), crit->peerAgents()); #endif } else { From 00c7d8dbbcb84edad42d4d3dd3bdfdd2117e4d0d Mon Sep 17 00:00:00 2001 From: Ben Sander Date: Thu, 7 Apr 2016 15:51:08 -0500 Subject: [PATCH 18/40] Add simple P2P test [ROCm/hip commit: 71dac8e37c378f3994d667c1deaeaf82ce00455c] --- projects/hip/tests/src/CMakeLists.txt | 2 + .../hip/tests/src/hipPeerToPeer_simple.cpp | 87 +++++++++++++++++++ 2 files changed, 89 insertions(+) create mode 100644 projects/hip/tests/src/hipPeerToPeer_simple.cpp diff --git a/projects/hip/tests/src/CMakeLists.txt b/projects/hip/tests/src/CMakeLists.txt index 6ed7e58be2..1bcdac2783 100644 --- a/projects/hip/tests/src/CMakeLists.txt +++ b/projects/hip/tests/src/CMakeLists.txt @@ -170,6 +170,7 @@ make_hip_executable (hipFuncSetDeviceFlags hipFuncSetDeviceFlags.cpp) make_hip_executable (hipFuncGetDevice hipFuncGetDevice.cpp) make_hip_executable (hipFuncSetDevice hipFuncSetDevice.cpp) make_hip_executable (hipFuncDeviceSynchronize hipFuncDeviceSynchronize.cpp) +make_hip_executable (hipPeerToPeer_simple hipPeerToPeer_simple.cpp) make_hip_executable (hipMultiThreadDevice hipMultiThreadDevice.cpp) @@ -217,5 +218,6 @@ make_test(hipFuncDeviceSynchronize " ") make_named_test (hipMultiThreadDevice "hipMultiThreadDevice-serial" --tests 0x1) make_named_test (hipMultiThreadDevice "hipMultiThreadDevice-pyramid" --tests 0x4) make_named_test (hipMultiThreadDevice "hipMultiThreadDevice-nearzero" --tests 0x10) +make_test(hipPeerToPeer_simple " " ) make_hipify_test(specialFunc.cu ) diff --git a/projects/hip/tests/src/hipPeerToPeer_simple.cpp b/projects/hip/tests/src/hipPeerToPeer_simple.cpp new file mode 100644 index 0000000000..79401d28b0 --- /dev/null +++ b/projects/hip/tests/src/hipPeerToPeer_simple.cpp @@ -0,0 +1,87 @@ + +/* +Copyright (c) 2015-2016 Advanced Micro Devices, Inc. All rights reserved. + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +*/ +// Simple test for memset. +// Also serves as a template for other tests. + +#include "hip_runtime.h" +#include "test_common.h" + + +int main(int argc, char *argv[]) +{ + + HipTest::parseStandardArguments(argc, argv, true); + + int deviceCnt; + + HIPCHECK(hipGetDeviceCount(&deviceCnt)); + HIPCHECK(hipSetDevice(p_gpuDevice)); + + int peerDevice = ((p_gpuDevice + 1) % deviceCnt); + + printf ("N=%zu device=%d peerDevice=%d (%d devices total)\n", N, p_gpuDevice, peerDevice, deviceCnt); + + // Must be on a multi-gpu system: + assert (p_gpuDevice != peerDevice); + + int canAccessPeer; + HIPCHECK(hipDeviceCanAccessPeer(&canAccessPeer, p_gpuDevice, peerDevice)); + printf ("dev#%d canAccessPeer:#%d=%d\n", p_gpuDevice, peerDevice, canAccessPeer); + + assert(canAccessPeer); + + HIPCHECK(hipDeviceEnablePeerAccess(peerDevice, 0)); + + size_t Nbytes = N*sizeof(float); + + float *A_d0, *A_d1; + float *A_h; + + A_h = (float*)malloc(Nbytes); + HIPCHECK (hipSetDevice(peerDevice)); + HIPCHECK (hipMalloc(&A_d1, Nbytes) ); + + HIPCHECK (hipSetDevice(p_gpuDevice)); + HIPCHECK (hipMalloc(&A_d0, Nbytes) ); + + + // Set memory on first device. + HIPCHECK (hipSetDevice(p_gpuDevice)); + HIPCHECK ( hipMemset(A_d0, memsetval, Nbytes) ); + + // Device0 push to device1, using P2P: + HIPCHECK ( hipMemcpy(A_d0, A_d1, Nbytes, hipMemcpyDefault)); + + // Copy data back to host: + HIPCHECK ( hipMemcpy(A_h, A_d1, Nbytes, hipMemcpyDeviceToHost)); + + // Check host data: + for (int i=0; i Date: Fri, 8 Apr 2016 02:15:46 -0500 Subject: [PATCH 19/40] Print peers in hipConfig. Also include peer APIs in vim hilighting. [ROCm/hip commit: 0ac41ad1430a0cc97528260a4591f59557c22d57] --- .../hip/samples/1_Utils/hipInfo/hipInfo.cpp | 17 +++++++++++++++++ projects/hip/util/vim/hip.vim | 6 ++++++ 2 files changed, 23 insertions(+) diff --git a/projects/hip/samples/1_Utils/hipInfo/hipInfo.cpp b/projects/hip/samples/1_Utils/hipInfo/hipInfo.cpp index 146d17e015..581194f624 100644 --- a/projects/hip/samples/1_Utils/hipInfo/hipInfo.cpp +++ b/projects/hip/samples/1_Utils/hipInfo/hipInfo.cpp @@ -120,8 +120,25 @@ void printDeviceProp (int deviceId) cout << setw(w1) << "arch.hasSurfaceFuncs: " << props.arch.hasSurfaceFuncs << endl; cout << setw(w1) << "arch.has3dGrid: " << props.arch.has3dGrid << endl; cout << setw(w1) << "arch.hasDynamicParallelism: " << props.arch.hasDynamicParallelism << endl; + + int deviceCnt; + hipGetDeviceCount(&deviceCnt); + cout << setw(w1) << "peers: "; + for (int i=0; i Date: Fri, 8 Apr 2016 02:17:29 -0500 Subject: [PATCH 20/40] Use HIP_PATH if set else use relative ../... [ROCm/hip commit: 40e72dcd4a99bbfb3c2e5246a5f964a8053c8ec1] --- projects/hip/samples/0_Intro/bit_extract/Makefile | 2 +- projects/hip/samples/0_Intro/square/Makefile | 2 +- projects/hip/samples/1_Utils/hipBusBandwidth/Makefile | 2 +- projects/hip/samples/1_Utils/hipDispatchLatency/Makefile | 2 +- projects/hip/samples/1_Utils/hipInfo/Makefile | 2 +- 5 files changed, 5 insertions(+), 5 deletions(-) diff --git a/projects/hip/samples/0_Intro/bit_extract/Makefile b/projects/hip/samples/0_Intro/bit_extract/Makefile index 39fb5cf8c6..cdf793363b 100644 --- a/projects/hip/samples/0_Intro/bit_extract/Makefile +++ b/projects/hip/samples/0_Intro/bit_extract/Makefile @@ -1,6 +1,6 @@ #Dependencies : [MYHIP]/bin must be in user's path. -HIP_PATH=../../.. +HIP_PATH=?../../.. HIP_PLATFORM=$(shell $(HIP_PATH)/bin/hipconfig --platform) HIPCC=$(HIP_PATH)/bin/hipcc diff --git a/projects/hip/samples/0_Intro/square/Makefile b/projects/hip/samples/0_Intro/square/Makefile index 98ee0be4f6..817c556b26 100644 --- a/projects/hip/samples/0_Intro/square/Makefile +++ b/projects/hip/samples/0_Intro/square/Makefile @@ -1,4 +1,4 @@ -HIP_PATH=../../.. +HIP_PATH?=../../.. HIPCC=$(HIP_PATH)/bin/hipcc all: square.hip.out diff --git a/projects/hip/samples/1_Utils/hipBusBandwidth/Makefile b/projects/hip/samples/1_Utils/hipBusBandwidth/Makefile index a713379d8d..4599cacba2 100644 --- a/projects/hip/samples/1_Utils/hipBusBandwidth/Makefile +++ b/projects/hip/samples/1_Utils/hipBusBandwidth/Makefile @@ -1,4 +1,4 @@ -HIP_PATH=../../.. +HIP_PATH?=../../.. HIPCC=$(HIP_PATH)/bin/hipcc EXE=hipBusBandwidth diff --git a/projects/hip/samples/1_Utils/hipDispatchLatency/Makefile b/projects/hip/samples/1_Utils/hipDispatchLatency/Makefile index 9b2d558114..87e707923d 100644 --- a/projects/hip/samples/1_Utils/hipDispatchLatency/Makefile +++ b/projects/hip/samples/1_Utils/hipDispatchLatency/Makefile @@ -1,4 +1,4 @@ -HIP_PATH=../../.. +HIP_PATH?=../../.. HIPCC=$(HIP_PATH)/bin/hipcc EXE=hipDispatchLatency diff --git a/projects/hip/samples/1_Utils/hipInfo/Makefile b/projects/hip/samples/1_Utils/hipInfo/Makefile index f38f157bcb..d69067388e 100644 --- a/projects/hip/samples/1_Utils/hipInfo/Makefile +++ b/projects/hip/samples/1_Utils/hipInfo/Makefile @@ -1,4 +1,4 @@ -HIP_PATH=../../.. +HIP_PATH?=../../.. HIPCC=$(HIP_PATH)/bin/hipcc EXE=hipInfo From fc4af30e16c7252bad57e621489237c8db6ade80 Mon Sep 17 00:00:00 2001 From: Ben Sander Date: Fri, 8 Apr 2016 09:22:24 -0500 Subject: [PATCH 21/40] Fix simple P2P test, had device ptrs swapped [ROCm/hip commit: eeb41955e401c8ca13259e41cee16356f0cd0c34] --- projects/hip/tests/src/hipPeerToPeer_simple.cpp | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/projects/hip/tests/src/hipPeerToPeer_simple.cpp b/projects/hip/tests/src/hipPeerToPeer_simple.cpp index 79401d28b0..ca2501438d 100644 --- a/projects/hip/tests/src/hipPeerToPeer_simple.cpp +++ b/projects/hip/tests/src/hipPeerToPeer_simple.cpp @@ -52,12 +52,12 @@ int main(int argc, char *argv[]) HIPCHECK(hipDeviceEnablePeerAccess(peerDevice, 0)); - size_t Nbytes = N*sizeof(float); + size_t Nbytes = N*sizeof(char); - float *A_d0, *A_d1; - float *A_h; + char *A_d0, *A_d1; + char *A_h; - A_h = (float*)malloc(Nbytes); + A_h = (char*)malloc(Nbytes); HIPCHECK (hipSetDevice(peerDevice)); HIPCHECK (hipMalloc(&A_d1, Nbytes) ); @@ -70,7 +70,7 @@ int main(int argc, char *argv[]) HIPCHECK ( hipMemset(A_d0, memsetval, Nbytes) ); // Device0 push to device1, using P2P: - HIPCHECK ( hipMemcpy(A_d0, A_d1, Nbytes, hipMemcpyDefault)); + HIPCHECK ( hipMemcpy(A_d1, A_d0, Nbytes, hipMemcpyDefault)); // Copy data back to host: HIPCHECK ( hipMemcpy(A_h, A_d1, Nbytes, hipMemcpyDeviceToHost)); From 69a9feb1fbab0dd7a625d1c5b933a74ceda52037 Mon Sep 17 00:00:00 2001 From: Ben Sander Date: Sat, 9 Apr 2016 04:10:57 -0500 Subject: [PATCH 22/40] fix bugs in P2P implementation - addPeers polarity reversed, would never add. - check allow_access return value, pipe error to hipMalloc. [ROCm/hip commit: fb31eaf07b5d9cdda25da441770cc4236b5f70c5] --- projects/hip/src/hip_hcc.cpp | 4 +++- projects/hip/src/hip_memory.cpp | 10 ++++++++-- projects/hip/src/hip_peer.cpp | 7 ++----- 3 files changed, 13 insertions(+), 8 deletions(-) diff --git a/projects/hip/src/hip_hcc.cpp b/projects/hip/src/hip_hcc.cpp index e3239c4119..9457e4edde 100644 --- a/projects/hip/src/hip_hcc.cpp +++ b/projects/hip/src/hip_hcc.cpp @@ -214,7 +214,8 @@ template<> bool ihipDeviceCriticalBase_t::addPeer(ihipDevice_t *peer) { auto match = std::find(_peers.begin(), _peers.end(), peer); - if (match != std::end(_peers)) { + if (match == std::end(_peers)) { + // Not already a peer, let's update the list: _peers.push_back(peer); recomputePeerAgents(); return true; @@ -230,6 +231,7 @@ bool ihipDeviceCriticalBase_t::removePeer(ihipDevice_t *peer) { auto match = std::find(_peers.begin(), _peers.end(), peer); if (match != std::end(_peers)) { + // Found a valid peer, let's remove it. _peers.remove(peer); recomputePeerAgents(); return true; diff --git a/projects/hip/src/hip_memory.cpp b/projects/hip/src/hip_memory.cpp index dd7f374cba..0775d59489 100644 --- a/projects/hip/src/hip_memory.cpp +++ b/projects/hip/src/hip_memory.cpp @@ -133,7 +133,10 @@ hipError_t hipMalloc(void** ptr, size_t sizeBytes) { LockedAccessor_DeviceCrit_t crit(device->criticalData()); if (crit->peerCnt()) { - hsa_amd_agents_allow_access(crit->peerCnt(), crit->peerAgents(), NULL, *ptr); + hsa_status_t hsa_status = hsa_amd_agents_allow_access(crit->peerCnt(), crit->peerAgents(), NULL, *ptr); + if (hsa_status != HSA_STATUS_SUCCESS) { + hip_status = hipErrorMemoryAllocation; + } } } } @@ -172,7 +175,10 @@ hipError_t hipHostMalloc(void** ptr, size_t sizeBytes, unsigned int flags) { LockedAccessor_DeviceCrit_t crit(device->criticalData()); if (crit->peerCnt()) { - hsa_amd_agents_allow_access(crit->peerCnt(), crit->peerAgents(), NULL, *ptr); + hsa_status_t hsa_status = hsa_amd_agents_allow_access(crit->peerCnt(), crit->peerAgents(), NULL, *ptr); + if (hsa_status != HSA_STATUS_SUCCESS) { + hip_status = hipErrorMemoryAllocation; + } } } } diff --git a/projects/hip/src/hip_peer.cpp b/projects/hip/src/hip_peer.cpp index 2e52f8e586..ad9cf14634 100644 --- a/projects/hip/src/hip_peer.cpp +++ b/projects/hip/src/hip_peer.cpp @@ -31,26 +31,23 @@ hipError_t hipDeviceCanAccessPeer (int* canAccessPeer, int deviceId, int peerDe hipError_t err = hipSuccess; -#if USE_PEER_TO_PEER auto thisDevice = ihipGetDevice(deviceId); auto peerDevice = ihipGetDevice(peerDeviceId); if ((thisDevice != NULL) && (peerDevice != NULL)) { #if USE_PEER_TO_PEER>=2 *canAccessPeer = peerDevice->_acc.get_is_peer(thisDevice->_acc); + printf ("canAccessPeer=%d\n", *canAccessPeer); #else *canAccessPeer = 0; #endif } else { - *canAccessPeer = false; + *canAccessPeer = 0; err = hipErrorInvalidDevice; } -#else - *canAccessPeer = false; -#endif return ihipLogStatus(err); } From 80e2e37c51ccd59e2c25c173577868353fd7ef1d Mon Sep 17 00:00:00 2001 From: Ben Sander Date: Sat, 9 Apr 2016 04:35:06 -0500 Subject: [PATCH 23/40] Remove stray debug msgs, hipInfo don't print self as peer. [ROCm/hip commit: d89539d40f47b754c59e203999c00b9a4e74a5b7] --- projects/hip/samples/1_Utils/hipInfo/hipInfo.cpp | 2 +- projects/hip/src/hip_memory.cpp | 2 -- projects/hip/src/hip_peer.cpp | 1 - 3 files changed, 1 insertion(+), 4 deletions(-) diff --git a/projects/hip/samples/1_Utils/hipInfo/hipInfo.cpp b/projects/hip/samples/1_Utils/hipInfo/hipInfo.cpp index 581194f624..9151d5058e 100644 --- a/projects/hip/samples/1_Utils/hipInfo/hipInfo.cpp +++ b/projects/hip/samples/1_Utils/hipInfo/hipInfo.cpp @@ -127,7 +127,7 @@ void printDeviceProp (int deviceId) for (int i=0; i_acc, &deviceMemSize, &hostMemSize, &userMemSize); - printf ("deviceMemSize=%zu\n", deviceMemSize); - *free = hipDevice->_props.totalGlobalMem - deviceMemSize; } diff --git a/projects/hip/src/hip_peer.cpp b/projects/hip/src/hip_peer.cpp index ad9cf14634..aeb9c7fd49 100644 --- a/projects/hip/src/hip_peer.cpp +++ b/projects/hip/src/hip_peer.cpp @@ -37,7 +37,6 @@ hipError_t hipDeviceCanAccessPeer (int* canAccessPeer, int deviceId, int peerDe if ((thisDevice != NULL) && (peerDevice != NULL)) { #if USE_PEER_TO_PEER>=2 *canAccessPeer = peerDevice->_acc.get_is_peer(thisDevice->_acc); - printf ("canAccessPeer=%d\n", *canAccessPeer); #else *canAccessPeer = 0; #endif From 07de6498339cc40e1d916a64f34e7602399db3c5 Mon Sep 17 00:00:00 2001 From: Ben Sander Date: Sat, 9 Apr 2016 04:47:07 -0500 Subject: [PATCH 24/40] Improve P2P test. Add option to select which device does the copy. [ROCm/hip commit: 40d24f67f5767d0256320ce3f9610bc8ba4a7f33] --- .../hip/tests/src/hipPeerToPeer_simple.cpp | 41 ++++++++++++++----- 1 file changed, 30 insertions(+), 11 deletions(-) diff --git a/projects/hip/tests/src/hipPeerToPeer_simple.cpp b/projects/hip/tests/src/hipPeerToPeer_simple.cpp index ca2501438d..aa305f92fa 100644 --- a/projects/hip/tests/src/hipPeerToPeer_simple.cpp +++ b/projects/hip/tests/src/hipPeerToPeer_simple.cpp @@ -26,11 +26,26 @@ THE SOFTWARE. #include "hip_runtime.h" #include "test_common.h" +bool p_memcpyWithPeer = false; + +void parseMyArguments(int argc, char *argv[]) +{ + int more_argc = HipTest::parseStandardArguments(argc, argv, false); + // parse args for this test: + for (int i = 1; i < more_argc; i++) { + const char *arg = argv[i]; + + if (!strcmp(arg, "--memcpyWithPeer")) { + p_memcpyWithPeer = true; + } else { + failed("Bad argument '%s'", arg); + } + }; +}; int main(int argc, char *argv[]) { - - HipTest::parseStandardArguments(argc, argv, true); + parseMyArguments(argc, argv); int deviceCnt; @@ -58,27 +73,31 @@ int main(int argc, char *argv[]) char *A_h; A_h = (char*)malloc(Nbytes); - HIPCHECK (hipSetDevice(peerDevice)); - HIPCHECK (hipMalloc(&A_d1, Nbytes) ); + // allocate and initialize memory on device0 HIPCHECK (hipSetDevice(p_gpuDevice)); HIPCHECK (hipMalloc(&A_d0, Nbytes) ); - - - // Set memory on first device. - HIPCHECK (hipSetDevice(p_gpuDevice)); HIPCHECK ( hipMemset(A_d0, memsetval, Nbytes) ); + // allocate and initialize memory on peer device + HIPCHECK (hipSetDevice(peerDevice)); + HIPCHECK (hipMalloc(&A_d1, Nbytes) ); + HIPCHECK ( hipMemset(A_d1, 0x13, Nbytes) ); + + + // Device0 push to device1, using P2P: - HIPCHECK ( hipMemcpy(A_d1, A_d0, Nbytes, hipMemcpyDefault)); + HIPCHECK (hipSetDevice(p_memcpyWithPeer ? peerDevice : p_gpuDevice)); + HIPCHECK (hipMemcpy(A_d1, A_d0, Nbytes, hipMemcpyDefault)); // Copy data back to host: - HIPCHECK ( hipMemcpy(A_h, A_d1, Nbytes, hipMemcpyDeviceToHost)); + HIPCHECK (hipSetDevice(peerDevice)); + HIPCHECK (hipMemcpy(A_h, A_d1, Nbytes, hipMemcpyDeviceToHost)); // Check host data: for (int i=0; i Date: Sat, 9 Apr 2016 05:03:08 -0500 Subject: [PATCH 25/40] Improve P2P test. - Ensure proper device is set before each command. - Add command line switches: +bool p_memcpyWithPeer = false; // use the peer device for the P2P copy +bool p_mirrorPeers = false; // in addition to mapping current to peer space, map peer to current space. +int p_peerDevice = -1; // explicly specify which peer to use, else use p_gpuDevice + 1. [ROCm/hip commit: c2d37b4bbdfd4045029e5667bce6d41e3e827378] --- .../hip/tests/src/hipPeerToPeer_simple.cpp | 37 ++++++++++++++----- projects/hip/tests/src/test_common.cpp | 2 +- 2 files changed, 29 insertions(+), 10 deletions(-) diff --git a/projects/hip/tests/src/hipPeerToPeer_simple.cpp b/projects/hip/tests/src/hipPeerToPeer_simple.cpp index aa305f92fa..4aaa6a452b 100644 --- a/projects/hip/tests/src/hipPeerToPeer_simple.cpp +++ b/projects/hip/tests/src/hipPeerToPeer_simple.cpp @@ -26,7 +26,9 @@ THE SOFTWARE. #include "hip_runtime.h" #include "test_common.h" -bool p_memcpyWithPeer = false; +bool p_memcpyWithPeer = false; // use the peer device for the P2P copy +bool p_mirrorPeers = false; // in addition to mapping current to peer space, map peer to current space. +int p_peerDevice = -1; // explicly specify which peer to use, else use p_gpuDevice + 1. void parseMyArguments(int argc, char *argv[]) { @@ -37,12 +39,19 @@ void parseMyArguments(int argc, char *argv[]) if (!strcmp(arg, "--memcpyWithPeer")) { p_memcpyWithPeer = true; + } else if (!strcmp(arg, "--mirrorPeers")) { + p_mirrorPeers = true; + } else if (!strcmp(arg, "--peerDevice")) { + if (++i >= argc || !HipTest::parseInt(argv[i], &p_peerDevice)) { + failed("Bad peerDevice argument"); + } } else { failed("Bad argument '%s'", arg); } }; }; + int main(int argc, char *argv[]) { parseMyArguments(argc, argv); @@ -50,23 +59,33 @@ int main(int argc, char *argv[]) int deviceCnt; HIPCHECK(hipGetDeviceCount(&deviceCnt)); - HIPCHECK(hipSetDevice(p_gpuDevice)); - int peerDevice = ((p_gpuDevice + 1) % deviceCnt); + int currentDevice = p_gpuDevice; + int peerDevice = (p_peerDevice == -1) ? ((currentDevice + 1) % deviceCnt) : p_peerDevice; - printf ("N=%zu device=%d peerDevice=%d (%d devices total)\n", N, p_gpuDevice, peerDevice, deviceCnt); + printf ("N=%zu device=%d peerDevice=%d (%d devices total)\n", N, currentDevice, peerDevice, deviceCnt); // Must be on a multi-gpu system: - assert (p_gpuDevice != peerDevice); + assert (currentDevice != peerDevice); int canAccessPeer; - HIPCHECK(hipDeviceCanAccessPeer(&canAccessPeer, p_gpuDevice, peerDevice)); - printf ("dev#%d canAccessPeer:#%d=%d\n", p_gpuDevice, peerDevice, canAccessPeer); + HIPCHECK(hipDeviceCanAccessPeer(&canAccessPeer, currentDevice, peerDevice)); + printf ("dev#%d canAccessPeer:#%d=%d\n", currentDevice, peerDevice, canAccessPeer); assert(canAccessPeer); + HIPCHECK(hipSetDevice(currentDevice)); HIPCHECK(hipDeviceEnablePeerAccess(peerDevice, 0)); + if (p_mirrorPeers) { + int canAccessPeer; + HIPCHECK(hipDeviceCanAccessPeer(&canAccessPeer, peerDevice, currentDevice)); + assert(canAccessPeer); + + HIPCHECK(hipSetDevice(peerDevice)); + HIPCHECK(hipDeviceEnablePeerAccess(currentDevice, 0)); + } + size_t Nbytes = N*sizeof(char); char *A_d0, *A_d1; @@ -75,7 +94,7 @@ int main(int argc, char *argv[]) A_h = (char*)malloc(Nbytes); // allocate and initialize memory on device0 - HIPCHECK (hipSetDevice(p_gpuDevice)); + HIPCHECK (hipSetDevice(currentDevice)); HIPCHECK (hipMalloc(&A_d0, Nbytes) ); HIPCHECK ( hipMemset(A_d0, memsetval, Nbytes) ); @@ -87,7 +106,7 @@ int main(int argc, char *argv[]) // Device0 push to device1, using P2P: - HIPCHECK (hipSetDevice(p_memcpyWithPeer ? peerDevice : p_gpuDevice)); + HIPCHECK (hipSetDevice(p_memcpyWithPeer ? peerDevice : currentDevice)); HIPCHECK (hipMemcpy(A_d1, A_d0, Nbytes, hipMemcpyDefault)); // Copy data back to host: diff --git a/projects/hip/tests/src/test_common.cpp b/projects/hip/tests/src/test_common.cpp index 332c2856d3..35e3d6d3f2 100644 --- a/projects/hip/tests/src/test_common.cpp +++ b/projects/hip/tests/src/test_common.cpp @@ -111,7 +111,7 @@ int parseStandardArguments(int argc, char *argv[], bool failOnUndefinedArg) failed("Bad iterations argument"); } - } else if (!strcmp(arg, "--gpu") || (!strcmp(arg, "-g"))) { + } else if (!strcmp(arg, "--gpu") || (!strcmp(arg, "-gpuDevice")) || (!strcmp(arg, "-g"))) { if (++i >= argc || !HipTest::parseInt(argv[i], &p_gpuDevice)) { failed("Bad gpuDevice argument"); } From c964e3c75aa670a09bb81e674114cbfde1a29b3b Mon Sep 17 00:00:00 2001 From: Ben Sander Date: Mon, 11 Apr 2016 12:52:18 -0500 Subject: [PATCH 26/40] P2p checkpoint. - set USE_PEER_TO_PEER=3 (requires HCC "am_memtracker_update_peers") - when enabling peer, turn it on for previously allocated memory. - hipDeviceCanAccessPeer is no longer self-ware (self does not qualify as a peer) - device peerlist always includes self, so when we call allow_access we never remove self access. - hipDeviceReset() removes old peer mappings. [ROCm/hip commit: 83f0de7806851d214e119076c4cbc2bcecb74121] --- projects/hip/include/hcc_detail/hip_hcc.h | 3 +- .../hip/include/hcc_detail/hip_runtime_api.h | 37 +++-- .../hip/samples/1_Utils/hipInfo/hipInfo.cpp | 2 +- projects/hip/src/hip_device.cpp | 1 + projects/hip/src/hip_hcc.cpp | 26 ++- projects/hip/src/hip_memory.cpp | 5 +- projects/hip/src/hip_peer.cpp | 21 +-- projects/hip/tests/src/CMakeLists.txt | 12 +- .../hip/tests/src/hipPeerToPeer_simple.cpp | 149 +++++++++++++++--- 9 files changed, 202 insertions(+), 54 deletions(-) diff --git a/projects/hip/include/hcc_detail/hip_hcc.h b/projects/hip/include/hcc_detail/hip_hcc.h index 1dd8777b1c..0d24862764 100644 --- a/projects/hip/include/hcc_detail/hip_hcc.h +++ b/projects/hip/include/hcc_detail/hip_hcc.h @@ -38,7 +38,7 @@ THE SOFTWARE. // Compile peer-to-peer support. // >= 2 : use HCC hc:accelerator::get_is_peer // >= 3 : use hc::am_memtracker_update_peers(...) -#define USE_PEER_TO_PEER 2 +#define USE_PEER_TO_PEER 3 //#define INLINE static inline @@ -526,6 +526,7 @@ public: bool addPeer(ihipDevice_t *peer); bool removePeer(ihipDevice_t *peer); + void resetPeers(ihipDevice_t *thisDevice); uint32_t peerCnt() const { return _peerCnt; }; hsa_agent_t *peerAgents() const { return _peerAgents; }; diff --git a/projects/hip/include/hcc_detail/hip_runtime_api.h b/projects/hip/include/hcc_detail/hip_runtime_api.h index 2f70ae47c3..58df3a2068 100644 --- a/projects/hip/include/hcc_detail/hip_runtime_api.h +++ b/projects/hip/include/hcc_detail/hip_runtime_api.h @@ -907,10 +907,33 @@ hipError_t hipMemGetInfo (size_t * free, size_t * total) ; * * Returns "1" in @p canAccessPeer if the specified @p device is capable * of directly accessing memory physically located on peerDevice , or "0" if not. + * + * Returns "0" in @p canAccessPeer if deviceId == peerDeviceId, and both are valid devices : a device is not a peer of itself. + * + * + * + * @returns #hipSuccess, + * @returns #hipErrorInvalidDevice if deviceId or peerDeviceId are not valid devices */ hipError_t hipDeviceCanAccessPeer (int* canAccessPeer, int deviceId, int peerDeviceId); +/** + * @brief Enable direct access from current device's virtual address space to memory allocations physically located on a peer device. + * + * Memory which already allocated on peer device will be mapped into the address space of the current device. In addition, all + * future memory allocations on peerDeviceId will be mapped into the address space of the current device when the memory is allocated. + * The peer memory remains accessible from the current device until a call to hipDeviceDisablePeerAccess or hipDeviceReset. + * + * + * @param [in] peerDeviceId + * @param [in] flags + * + * Returns #hipSuccess, #hipErrorInvalidDevice, #hipErrorInvalidValue, + * @returns #hipErrorPeerAccessAlreadyEnabled if peer access is already enabled for this device. + */ +hipError_t hipDeviceEnablePeerAccess (int peerDeviceId, unsigned int flags); + /** * @brief Disable direct access from current device's virtual address space to memory allocations physically located on a peer device. @@ -923,20 +946,6 @@ hipError_t hipDeviceCanAccessPeer (int* canAccessPeer, int deviceId, int peerDev */ hipError_t hipDeviceDisablePeerAccess (int peerDeviceId); -/** - * @brief Enable direct access from current device's virtual address space to memory allocations physically located on a peer device. - * - * Memory which already allocated on peer device will be mapped into the address space of the current device. In addition, all - * future memory allocations on peerDeviceId will be mapped into the address space of the current device when the memory is allocated. - * The peer memory remains accessible from the current device until a call to hipDeviceDisablePeerAccess or @hipDeviceReset. - * - * - * @param [in] peerDeviceId - * @param [in] flags - * - * Returns #hipSuccess, #hipErrorInvalidDevice, #hipErrorInvalidValue, #hipErrorPeerAccessAlreadyEnabled - */ -hipError_t hipDeviceEnablePeerAccess (int peerDeviceId, unsigned int flags); /** * @brief Copies memory from one device to memory on another device. diff --git a/projects/hip/samples/1_Utils/hipInfo/hipInfo.cpp b/projects/hip/samples/1_Utils/hipInfo/hipInfo.cpp index 9151d5058e..581194f624 100644 --- a/projects/hip/samples/1_Utils/hipInfo/hipInfo.cpp +++ b/projects/hip/samples/1_Utils/hipInfo/hipInfo.cpp @@ -127,7 +127,7 @@ void printDeviceProp (int deviceId) for (int i=0; ilocked_waitAllStreams(); diff --git a/projects/hip/src/hip_hcc.cpp b/projects/hip/src/hip_hcc.cpp index 9457e4edde..aa4912dc22 100644 --- a/projects/hip/src/hip_hcc.cpp +++ b/projects/hip/src/hip_hcc.cpp @@ -240,6 +240,15 @@ bool ihipDeviceCriticalBase_t::removePeer(ihipDevice_t *peer) } } + +template<> +void ihipDeviceCriticalBase_t::resetPeers(ihipDevice_t *thisDevice) +{ + _peers.clear(); + _peerCnt = 0; + addPeer(thisDevice); // peer-list always contains self agent. +} + //------------------------------------------------------------------------------------------------- //--- @@ -444,12 +453,18 @@ void ihipDevice_t::locked_reset() // Reset and remove streams: crit->streams().clear(); -#if USE_PEER_TO_PEER==2 - // remove peer mappings to this device? Call removePeer on all other devices? + + +#if USE_PEER_TO_PEER>=2 + // This resest peer list to just me: + crit->resetPeers(this); + #endif // Reset and release all memory stored in the tracker: + // Reset will remove peer mapping so don't need to do this explicitly. am_memtracker_reset(_acc); + }; @@ -474,10 +489,13 @@ void ihipDevice_t::init(unsigned device_index, unsigned deviceCnt, hc::accelerat getProperties(&_props); + _criticalData.init(deviceCnt); + + locked_reset(); + _default_stream = new ihipStream_t(device_index, acc.get_default_view(), hipStreamDefault); locked_addStream(_default_stream); - - _criticalData.init(deviceCnt); + tprintf(DB_SYNC, "created device with default_stream=%p\n", _default_stream); diff --git a/projects/hip/src/hip_memory.cpp b/projects/hip/src/hip_memory.cpp index 42b4d0628f..497c832242 100644 --- a/projects/hip/src/hip_memory.cpp +++ b/projects/hip/src/hip_memory.cpp @@ -132,7 +132,7 @@ hipError_t hipMalloc(void** ptr, size_t sizeBytes) hc::am_memtracker_update(*ptr, device->_device_index, 0); { LockedAccessor_DeviceCrit_t crit(device->criticalData()); - if (crit->peerCnt()) { + if (crit->peerCnt() > 1) { // peerCnt includes self so only call allow_access if other peers involved: hsa_status_t hsa_status = hsa_amd_agents_allow_access(crit->peerCnt(), crit->peerAgents(), NULL, *ptr); if (hsa_status != HSA_STATUS_SUCCESS) { hip_status = hipErrorMemoryAllocation; @@ -173,8 +173,9 @@ hipError_t hipHostMalloc(void** ptr, size_t sizeBytes, unsigned int flags) }else{ hc::am_memtracker_update(*ptr, device->_device_index, flags); { + // TODO - allow_access only works for device memory, need to change am_alloc to allocate host directly. LockedAccessor_DeviceCrit_t crit(device->criticalData()); - if (crit->peerCnt()) { + if (crit->peerCnt() > 1) { // peerCnt includes self so only call allow_access if other peers involved: hsa_status_t hsa_status = hsa_amd_agents_allow_access(crit->peerCnt(), crit->peerAgents(), NULL, *ptr); if (hsa_status != HSA_STATUS_SUCCESS) { hip_status = hipErrorMemoryAllocation; diff --git a/projects/hip/src/hip_peer.cpp b/projects/hip/src/hip_peer.cpp index aeb9c7fd49..612cd6e309 100644 --- a/projects/hip/src/hip_peer.cpp +++ b/projects/hip/src/hip_peer.cpp @@ -17,6 +17,8 @@ OUT OF OR INN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */ +#include + #include "hip_runtime.h" #include "hcc_detail/hip_hcc.h" #include "hcc_detail/trace_helper.h" @@ -35,11 +37,15 @@ hipError_t hipDeviceCanAccessPeer (int* canAccessPeer, int deviceId, int peerDe auto peerDevice = ihipGetDevice(peerDeviceId); if ((thisDevice != NULL) && (peerDevice != NULL)) { + if (deviceId == peerDeviceId) { + *canAccessPeer = 0; + } else { #if USE_PEER_TO_PEER>=2 - *canAccessPeer = peerDevice->_acc.get_is_peer(thisDevice->_acc); + *canAccessPeer = peerDevice->_acc.get_is_peer(thisDevice->_acc); #else - *canAccessPeer = 0; + *canAccessPeer = 0; #endif + } } else { *canAccessPeer = 0; @@ -69,15 +75,15 @@ hipError_t hipDeviceDisablePeerAccess (int peerDeviceId) #endif if (! canAccessPeer) { err = hipErrorInvalidDevice; // P2P not allowed between these devices. + } else if (thisDevice == peerDevice) { + err = hipErrorInvalidDevice; // Can't disable peer access to self. } else { - - LockedAccessor_DeviceCrit_t crit(thisDevice->criticalData()); bool changed = crit->removePeer(peerDevice); if (changed) { #if USE_PEER_TO_PEER>=3 // Update the peers for all memory already saved in the tracker: - am_memtracker_update_peers(device->_acc, crit->peerCnt(), crit->peerAgents()); + am_memtracker_update_peers(thisDevice->_acc, crit->peerCnt(), crit->peerAgents()); #endif } else { err = hipErrorPeerAccessNotEnabled; // never enabled P2P access. @@ -92,9 +98,6 @@ hipError_t hipDeviceDisablePeerAccess (int peerDeviceId) }; -/** - * @warning Need to update this function when RT supports P2P - */ //--- // Enable registering memory on peerDevice for direct access from the current device. hipError_t hipDeviceEnablePeerAccess (int peerDeviceId, unsigned int flags) @@ -113,7 +116,7 @@ hipError_t hipDeviceEnablePeerAccess (int peerDeviceId, unsigned int flags) bool isNewPeer = crit->addPeer(peerDevice); if (isNewPeer) { #if USE_PEER_TO_PEER>=3 - am_memtracker_update_peers(device->_acc, crit->peerCnt(), crit->peerAgents()); + am_memtracker_update_peers(thisDevice->_acc, crit->peerCnt(), crit->peerAgents()); #endif } else { err = hipErrorPeerAccessAlreadyEnabled; diff --git a/projects/hip/tests/src/CMakeLists.txt b/projects/hip/tests/src/CMakeLists.txt index 1bcdac2783..6f9af072e0 100644 --- a/projects/hip/tests/src/CMakeLists.txt +++ b/projects/hip/tests/src/CMakeLists.txt @@ -8,6 +8,10 @@ include_directories( ${PROJECT_SOURCE_DIR}/include ) set (HIP_Unit_Test_VERSION_MAJOR 1) set (HIP_Unit_Test_VERSION_MINOR 0) +if(NOT DEFINED HIP_MULTI_GPU) + set(HIP_MULTI_GPU 0 CACHE BOOL "Run tests requiring more than one GPU") +endif() + if(NOT DEFINED HIP_BUILD_LOCAL) if(NOT DEFINED ENV{HIP_BUILD_LOCAL}) set(HIP_BUILD_LOCAL 1 CACHE BOOL "Build HIP in local folder") @@ -218,6 +222,12 @@ make_test(hipFuncDeviceSynchronize " ") make_named_test (hipMultiThreadDevice "hipMultiThreadDevice-serial" --tests 0x1) make_named_test (hipMultiThreadDevice "hipMultiThreadDevice-pyramid" --tests 0x4) make_named_test (hipMultiThreadDevice "hipMultiThreadDevice-nearzero" --tests 0x10) -make_test(hipPeerToPeer_simple " " ) + +if (${HIP_MULTI_GPU}) + make_test(hipPeerToPeer_simple ) # use current device for copy, this fails. + make_test(hipPeerToPeer_simple --memcpyWithPeer) + make_test(hipPeerToPeer_simple --mirrorPeers) # mirror mapping: test to ensure mirror doesn't destroy orig mapping. + +endif() make_hipify_test(specialFunc.cu ) diff --git a/projects/hip/tests/src/hipPeerToPeer_simple.cpp b/projects/hip/tests/src/hipPeerToPeer_simple.cpp index 4aaa6a452b..5bfb583f3f 100644 --- a/projects/hip/tests/src/hipPeerToPeer_simple.cpp +++ b/projects/hip/tests/src/hipPeerToPeer_simple.cpp @@ -1,4 +1,3 @@ - /* Copyright (c) 2015-2016 Advanced Micro Devices, Inc. All rights reserved. @@ -52,9 +51,11 @@ void parseMyArguments(int argc, char *argv[]) }; -int main(int argc, char *argv[]) +//--- +// Test which enables peer2peer first, then allocates the memory. +void enablePeerFirst() { - parseMyArguments(argc, argv); + printf ("\n==testing: %s\n", __func__); int deviceCnt; @@ -74,6 +75,110 @@ int main(int argc, char *argv[]) assert(canAccessPeer); + HIPCHECK (hipSetDevice(currentDevice)); + HIPCHECK(hipDeviceReset()); + HIPCHECK (hipSetDevice(peerDevice)); + HIPCHECK(hipDeviceReset()); + + HIPCHECK(hipSetDevice(currentDevice)); + HIPCHECK(hipDeviceEnablePeerAccess(peerDevice, 0)); + + if (p_mirrorPeers) { + int canAccessPeer; + HIPCHECK(hipDeviceCanAccessPeer(&canAccessPeer, peerDevice, currentDevice)); + assert(canAccessPeer); + + HIPCHECK(hipSetDevice(peerDevice)); + HIPCHECK(hipDeviceEnablePeerAccess(currentDevice, 0)); + } + + size_t Nbytes = N*sizeof(char); + + char *A_d0, *A_d1; + char *A_h; + + A_h = (char*)malloc(Nbytes); + + // allocate and initialize memory on device0 + HIPCHECK (hipSetDevice(currentDevice)); + HIPCHECK (hipMalloc(&A_d0, Nbytes) ); + HIPCHECK ( hipMemset(A_d0, memsetval, Nbytes) ); + + // allocate and initialize memory on peer device + HIPCHECK (hipSetDevice(peerDevice)); + HIPCHECK (hipMalloc(&A_d1, Nbytes) ); + HIPCHECK ( hipMemset(A_d1, 0x13, Nbytes) ); + + + + // Device0 push to device1, using P2P: + HIPCHECK (hipSetDevice(p_memcpyWithPeer ? peerDevice : currentDevice)); + HIPCHECK (hipMemcpy(A_d1, A_d0, Nbytes, hipMemcpyDefault)); + + // Copy data back to host: + HIPCHECK (hipSetDevice(peerDevice)); + HIPCHECK (hipMemcpy(A_h, A_d1, Nbytes, hipMemcpyDeviceToHost)); + + // Check host data: + for (int i=0; i Date: Mon, 11 Apr 2016 13:46:53 -0500 Subject: [PATCH 27/40] add simple negative P2P tests [ROCm/hip commit: 4a0c0479d4f172b362423360d52a85c50c7940dd] --- projects/hip/tests/src/CMakeLists.txt | 2 +- .../hip/tests/src/hipMultiThreadDevice.cpp | 4 +- .../hip/tests/src/hipPeerToPeer_simple.cpp | 162 ++++++++++-------- 3 files changed, 97 insertions(+), 71 deletions(-) diff --git a/projects/hip/tests/src/CMakeLists.txt b/projects/hip/tests/src/CMakeLists.txt index 6f9af072e0..9037027654 100644 --- a/projects/hip/tests/src/CMakeLists.txt +++ b/projects/hip/tests/src/CMakeLists.txt @@ -224,7 +224,7 @@ make_named_test (hipMultiThreadDevice "hipMultiThreadDevice-pyramid" --tests 0x4 make_named_test (hipMultiThreadDevice "hipMultiThreadDevice-nearzero" --tests 0x10) if (${HIP_MULTI_GPU}) - make_test(hipPeerToPeer_simple ) # use current device for copy, this fails. + make_test(hipPeerToPeer_simple " ") # use current device for copy, this fails. make_test(hipPeerToPeer_simple --memcpyWithPeer) make_test(hipPeerToPeer_simple --mirrorPeers) # mirror mapping: test to ensure mirror doesn't destroy orig mapping. diff --git a/projects/hip/tests/src/hipMultiThreadDevice.cpp b/projects/hip/tests/src/hipMultiThreadDevice.cpp index a1f64aceb3..d9afda59d0 100644 --- a/projects/hip/tests/src/hipMultiThreadDevice.cpp +++ b/projects/hip/tests/src/hipMultiThreadDevice.cpp @@ -116,12 +116,12 @@ int main(int argc, char *argv[]) /*disable, this takess a while and if the next one works then no need to run serial*/ if (1 && (p_tests & 0x2)) { printf ("\ntest 0x2 : serialized multiThread_pyramid(1) \n"); - multiThread_pyramid(true, 10); + multiThread_pyramid(true, 3); } if (p_tests & 0x4) { printf ("\ntest 0x4 : parallel multiThread_pyramid(1) \n"); - multiThread_pyramid(false, 10); + multiThread_pyramid(false, 3); } //if (p_tests & 0x8) { diff --git a/projects/hip/tests/src/hipPeerToPeer_simple.cpp b/projects/hip/tests/src/hipPeerToPeer_simple.cpp index 5bfb583f3f..2c0dd95b36 100644 --- a/projects/hip/tests/src/hipPeerToPeer_simple.cpp +++ b/projects/hip/tests/src/hipPeerToPeer_simple.cpp @@ -29,6 +29,10 @@ bool p_memcpyWithPeer = false; // use the peer device for the P2P copy bool p_mirrorPeers = false; // in addition to mapping current to peer space, map peer to current space. int p_peerDevice = -1; // explicly specify which peer to use, else use p_gpuDevice + 1. + +int g_currentDevice; +int g_peerDevice; + void parseMyArguments(int argc, char *argv[]) { int more_argc = HipTest::parseStandardArguments(argc, argv, false); @@ -51,45 +55,50 @@ void parseMyArguments(int argc, char *argv[]) }; +// Sets globals g_currentDevice, g_peerDevice +void setupPeerTests() +{ + int deviceCnt; + + HIPCHECK(hipGetDeviceCount(&deviceCnt)); + + g_currentDevice = p_gpuDevice; + g_peerDevice = (p_peerDevice == -1) ? ((g_currentDevice + 1) % deviceCnt) : p_peerDevice; + + printf ("N=%zu device=%d peerDevice=%d (%d devices total)\n", N, g_currentDevice, g_peerDevice, deviceCnt); + + // Must be on a multi-gpu system: + assert (g_currentDevice != g_peerDevice); + + int canAccessPeer; + HIPCHECK(hipDeviceCanAccessPeer(&canAccessPeer, g_currentDevice, g_peerDevice)); + printf ("dev#%d canAccessPeer:#%d=%d\n", g_currentDevice, g_peerDevice, canAccessPeer); + + assert(canAccessPeer); + + HIPCHECK (hipSetDevice(g_currentDevice)); + HIPCHECK(hipDeviceReset()); + HIPCHECK (hipSetDevice(g_peerDevice)); + HIPCHECK(hipDeviceReset()); +} + //--- // Test which enables peer2peer first, then allocates the memory. void enablePeerFirst() { printf ("\n==testing: %s\n", __func__); - int deviceCnt; - HIPCHECK(hipGetDeviceCount(&deviceCnt)); - - int currentDevice = p_gpuDevice; - int peerDevice = (p_peerDevice == -1) ? ((currentDevice + 1) % deviceCnt) : p_peerDevice; - - printf ("N=%zu device=%d peerDevice=%d (%d devices total)\n", N, currentDevice, peerDevice, deviceCnt); - - // Must be on a multi-gpu system: - assert (currentDevice != peerDevice); - - int canAccessPeer; - HIPCHECK(hipDeviceCanAccessPeer(&canAccessPeer, currentDevice, peerDevice)); - printf ("dev#%d canAccessPeer:#%d=%d\n", currentDevice, peerDevice, canAccessPeer); - - assert(canAccessPeer); - - HIPCHECK (hipSetDevice(currentDevice)); - HIPCHECK(hipDeviceReset()); - HIPCHECK (hipSetDevice(peerDevice)); - HIPCHECK(hipDeviceReset()); - - HIPCHECK(hipSetDevice(currentDevice)); - HIPCHECK(hipDeviceEnablePeerAccess(peerDevice, 0)); + HIPCHECK(hipSetDevice(g_currentDevice)); + HIPCHECK(hipDeviceEnablePeerAccess(g_peerDevice, 0)); if (p_mirrorPeers) { int canAccessPeer; - HIPCHECK(hipDeviceCanAccessPeer(&canAccessPeer, peerDevice, currentDevice)); + HIPCHECK(hipDeviceCanAccessPeer(&canAccessPeer, g_peerDevice, g_currentDevice)); assert(canAccessPeer); - HIPCHECK(hipSetDevice(peerDevice)); - HIPCHECK(hipDeviceEnablePeerAccess(currentDevice, 0)); + HIPCHECK(hipSetDevice(g_peerDevice)); + HIPCHECK(hipDeviceEnablePeerAccess(g_currentDevice, 0)); } size_t Nbytes = N*sizeof(char); @@ -100,23 +109,23 @@ void enablePeerFirst() A_h = (char*)malloc(Nbytes); // allocate and initialize memory on device0 - HIPCHECK (hipSetDevice(currentDevice)); + HIPCHECK (hipSetDevice(g_currentDevice)); HIPCHECK (hipMalloc(&A_d0, Nbytes) ); HIPCHECK ( hipMemset(A_d0, memsetval, Nbytes) ); // allocate and initialize memory on peer device - HIPCHECK (hipSetDevice(peerDevice)); + HIPCHECK (hipSetDevice(g_peerDevice)); HIPCHECK (hipMalloc(&A_d1, Nbytes) ); HIPCHECK ( hipMemset(A_d1, 0x13, Nbytes) ); // Device0 push to device1, using P2P: - HIPCHECK (hipSetDevice(p_memcpyWithPeer ? peerDevice : currentDevice)); + HIPCHECK (hipSetDevice(p_memcpyWithPeer ? g_peerDevice : g_currentDevice)); HIPCHECK (hipMemcpy(A_d1, A_d0, Nbytes, hipMemcpyDefault)); // Copy data back to host: - HIPCHECK (hipSetDevice(peerDevice)); + HIPCHECK (hipSetDevice(g_peerDevice)); HIPCHECK (hipMemcpy(A_h, A_d1, Nbytes, hipMemcpyDeviceToHost)); // Check host data: @@ -128,35 +137,14 @@ void enablePeerFirst() } - //--- - // Test which allocated memory first, then enables peer2peer. - // Enabling peer needs to scan all allocated memory and enable peer access. - void allocMemoryFirst() - { - printf ("\n==testing: %s\n", __func__); - int deviceCnt; - - HIPCHECK(hipGetDeviceCount(&deviceCnt)); - - int currentDevice = p_gpuDevice; - int peerDevice = (p_peerDevice == -1) ? ((currentDevice + 1) % deviceCnt) : p_peerDevice; - - printf ("N=%zu device=%d peerDevice=%d (%d devices total)\n", N, currentDevice, peerDevice, deviceCnt); - - // Must be on a multi-gpu system: - assert (currentDevice != peerDevice); - - int canAccessPeer; - HIPCHECK(hipDeviceCanAccessPeer(&canAccessPeer, currentDevice, peerDevice)); - printf ("dev#%d canAccessPeer:#%d=%d\n", currentDevice, peerDevice, canAccessPeer); - - assert(canAccessPeer); - - HIPCHECK (hipSetDevice(currentDevice)); - HIPCHECK(hipDeviceReset()); - HIPCHECK (hipSetDevice(peerDevice)); - HIPCHECK(hipDeviceReset()); +//--- +// Test which allocated memory first, then enables peer2peer. +// Enabling peer needs to scan all allocated memory and enable peer access. +void allocMemoryFirst() +{ + printf ("\n==testing: %s\n", __func__); + setupPeerTests(); size_t Nbytes = N*sizeof(char); @@ -167,39 +155,39 @@ void enablePeerFirst() //--- // allocate and initialize memory on device0 - HIPCHECK (hipSetDevice(currentDevice)); + HIPCHECK (hipSetDevice(g_currentDevice)); HIPCHECK (hipMalloc(&A_d0, Nbytes) ); HIPCHECK ( hipMemset(A_d0, memsetval, Nbytes) ); // allocate and initialize memory on peer device - HIPCHECK (hipSetDevice(peerDevice)); + HIPCHECK (hipSetDevice(g_peerDevice)); HIPCHECK (hipMalloc(&A_d1, Nbytes) ); HIPCHECK ( hipMemset(A_d1, 0x13, Nbytes) ); //--- //Enable peer access, for memory already allocated: - HIPCHECK(hipSetDevice(currentDevice)); - HIPCHECK(hipDeviceEnablePeerAccess(peerDevice, 0)); + HIPCHECK(hipSetDevice(g_currentDevice)); + HIPCHECK(hipDeviceEnablePeerAccess(g_peerDevice, 0)); if (p_mirrorPeers) { int canAccessPeer; - HIPCHECK(hipDeviceCanAccessPeer(&canAccessPeer, peerDevice, currentDevice)); + HIPCHECK(hipDeviceCanAccessPeer(&canAccessPeer, g_peerDevice, g_currentDevice)); assert(canAccessPeer); - HIPCHECK(hipSetDevice(peerDevice)); - HIPCHECK(hipDeviceEnablePeerAccess(currentDevice, 0)); + HIPCHECK(hipSetDevice(g_peerDevice)); + HIPCHECK(hipDeviceEnablePeerAccess(g_currentDevice, 0)); } //--- // Copies to test functionality: // Device0 push to device1, using P2P: - HIPCHECK (hipSetDevice(p_memcpyWithPeer ? peerDevice : currentDevice)); + HIPCHECK (hipSetDevice(p_memcpyWithPeer ? g_peerDevice : g_currentDevice)); HIPCHECK (hipMemcpy(A_d1, A_d0, Nbytes, hipMemcpyDefault)); // Copy data back to host: - HIPCHECK (hipSetDevice(peerDevice)); + HIPCHECK (hipSetDevice(g_peerDevice)); HIPCHECK (hipMemcpy(A_h, A_d1, Nbytes, hipMemcpyDeviceToHost)); @@ -212,6 +200,40 @@ void enablePeerFirst() } } +void simpleNegative() +{ + printf ("\n==testing: %s\n", __func__); + + setupPeerTests(); + + int deviceId; + HIPCHECK (hipGetDevice(&deviceId)); + + //--- + //-- self is not a peer + int canAccessPeer; + hipError_t e = hipDeviceCanAccessPeer(&canAccessPeer, deviceId, deviceId); + HIPASSERT( e == hipSuccess); // no error returned, it doesn't hurt to ask. + HIPASSERT (canAccessPeer == 0); // but self is not a peer. + + e = hipSuccess; + //--- + // Enable same device twice in a row: + HIPCHECK(hipSetDevice(g_currentDevice)); + HIPCHECK(hipDeviceEnablePeerAccess(g_peerDevice, 0)); + e =(hipDeviceEnablePeerAccess(g_peerDevice, 0)); + HIPASSERT (e == hipErrorPeerAccessAlreadyEnabled); + + //--- + // try disabling twice in a row + HIPCHECK(hipDeviceDisablePeerAccess(g_peerDevice)); + e =(hipDeviceDisablePeerAccess(g_peerDevice)); + HIPASSERT (e == hipErrorPeerAccessNotEnabled); + + + // More tests here: +} + int main(int argc, char *argv[]) @@ -226,5 +248,9 @@ int main(int argc, char *argv[]) allocMemoryFirst(); } + if (p_tests & 0x4) { + simpleNegative(); + } + passed(); } From 775e484517ba4fe6431a42e7a8e493824930e7ac Mon Sep 17 00:00:00 2001 From: Ben Sander Date: Mon, 11 Apr 2016 07:47:22 -0500 Subject: [PATCH 28/40] fix peer query order [ROCm/hip commit: e4d1863ce8f9329f42edf96ce88dd8b337b17fd0] --- projects/hip/samples/1_Utils/hipInfo/hipInfo.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/projects/hip/samples/1_Utils/hipInfo/hipInfo.cpp b/projects/hip/samples/1_Utils/hipInfo/hipInfo.cpp index 581194f624..2b6e129900 100644 --- a/projects/hip/samples/1_Utils/hipInfo/hipInfo.cpp +++ b/projects/hip/samples/1_Utils/hipInfo/hipInfo.cpp @@ -126,7 +126,7 @@ void printDeviceProp (int deviceId) cout << setw(w1) << "peers: "; for (int i=0; i Date: Mon, 11 Apr 2016 09:09:36 -0500 Subject: [PATCH 29/40] Clean up disable. Add USE_HCC_LOCK (disabled) Disable USE_PEER_TO_PEER. [ROCm/hip commit: b0529e04f1814ac52e0248db685973458f5f19f8] --- projects/hip/include/hcc_detail/hip_hcc.h | 5 ++++- projects/hip/src/hip_hcc.cpp | 4 ---- projects/hip/src/hip_memory.cpp | 4 ++++ projects/hip/src/hip_peer.cpp | 4 ---- projects/hip/tests/src/CMakeLists.txt | 4 ++-- 5 files changed, 10 insertions(+), 11 deletions(-) diff --git a/projects/hip/include/hcc_detail/hip_hcc.h b/projects/hip/include/hcc_detail/hip_hcc.h index 0d24862764..b7d57ed317 100644 --- a/projects/hip/include/hcc_detail/hip_hcc.h +++ b/projects/hip/include/hcc_detail/hip_hcc.h @@ -38,7 +38,10 @@ THE SOFTWARE. // Compile peer-to-peer support. // >= 2 : use HCC hc:accelerator::get_is_peer // >= 3 : use hc::am_memtracker_update_peers(...) -#define USE_PEER_TO_PEER 3 +#define USE_PEER_TO_PEER 1 + +// Use new lock API in HCC: +#define USE_HCC_LOCK 0 //#define INLINE static inline diff --git a/projects/hip/src/hip_hcc.cpp b/projects/hip/src/hip_hcc.cpp index aa4912dc22..6c86cde747 100644 --- a/projects/hip/src/hip_hcc.cpp +++ b/projects/hip/src/hip_hcc.cpp @@ -454,13 +454,9 @@ void ihipDevice_t::locked_reset() crit->streams().clear(); - -#if USE_PEER_TO_PEER>=2 // This resest peer list to just me: crit->resetPeers(this); -#endif - // Reset and release all memory stored in the tracker: // Reset will remove peer mapping so don't need to do this explicitly. am_memtracker_reset(_acc); diff --git a/projects/hip/src/hip_memory.cpp b/projects/hip/src/hip_memory.cpp index 497c832242..46704f6277 100644 --- a/projects/hip/src/hip_memory.cpp +++ b/projects/hip/src/hip_memory.cpp @@ -246,7 +246,11 @@ hipError_t hipHostRegister(void *hostPtr, size_t sizeBytes, unsigned int flags) } if(device){ if(flags == hipHostRegisterDefault){ +#if USE_HCC_LOCK am_status_t am_status = hc::am_memtracker_host_memory_lock(device->_acc, hostPtr, sizeBytes); +#else + am_status_t am_status = AM_ERROR_MISC; +#endif // hsa_status_t hsa_status = hsa_amd_memory_lock(hostPtr, sizeBytes, &device->_hsa_agent, 1, &srcPtr); if(am_status == AM_SUCCESS){ hip_status = hipSuccess; diff --git a/projects/hip/src/hip_peer.cpp b/projects/hip/src/hip_peer.cpp index 612cd6e309..d45f95dc6c 100644 --- a/projects/hip/src/hip_peer.cpp +++ b/projects/hip/src/hip_peer.cpp @@ -63,7 +63,6 @@ hipError_t hipDeviceDisablePeerAccess (int peerDeviceId) HIP_INIT_API(peerDeviceId); hipError_t err = hipSuccess; -#if USE_PEER_TO_PEER auto thisDevice = ihipGetTlsDefaultDevice(); auto peerDevice = ihipGetDevice(peerDeviceId); @@ -92,7 +91,6 @@ hipError_t hipDeviceDisablePeerAccess (int peerDeviceId) } else { err = hipErrorInvalidDevice; } -#endif return ihipLogStatus(err); }; @@ -105,7 +103,6 @@ hipError_t hipDeviceEnablePeerAccess (int peerDeviceId, unsigned int flags) HIP_INIT_API(peerDeviceId, flags); hipError_t err = hipSuccess; -#if USE_PEER_TO_PEER if (flags != 0) { err = hipErrorInvalidValue; } else { @@ -125,7 +122,6 @@ hipError_t hipDeviceEnablePeerAccess (int peerDeviceId, unsigned int flags) err = hipErrorInvalidDevice; } } -#endif return ihipLogStatus(err); } diff --git a/projects/hip/tests/src/CMakeLists.txt b/projects/hip/tests/src/CMakeLists.txt index 9037027654..a6a4472d18 100644 --- a/projects/hip/tests/src/CMakeLists.txt +++ b/projects/hip/tests/src/CMakeLists.txt @@ -160,14 +160,14 @@ make_hip_executable (hipSimpleAtomicsTest hipSimpleAtomicsTest.cpp) make_hip_executable (hipMathFunctionsHost hipMathFunctions.cpp hipSinglePrecisionMathHost.cpp hipDoublePrecisionMathHost.cpp) make_hip_executable (hipMathFunctionsDevice hipMathFunctions.cpp hipSinglePrecisionMathDevice.cpp hipDoublePrecisionMathDevice.cpp) make_hip_executable (hipIntrinsics hipMathFunctions.cpp hipSinglePrecisionIntrinsics.cpp hipDoublePrecisionIntrinsics.cpp hipIntegerIntrinsics.cpp) -#TODO - re-enable. This uses the pointer add feature. make_hip_executable (hipPointerAttrib hipPointerAttrib.cpp) make_hip_executable (hipMultiThreadStreams1 hipMultiThreadStreams1.cpp) make_hip_executable (hipMultiThreadStreams2 hipMultiThreadStreams2.cpp) make_hip_executable (hipHostAlloc hipHostAlloc.cpp) make_hip_executable (hipStreamL5 hipStreamL5.cpp) make_hip_executable (hipHostGetFlags hipHostGetFlags.cpp) -make_hip_executable (hipHostRegister hipHostRegister.cpp) +#TODO - re-enable. This requires working hipHostRegister call, waiting on HCC feature. +#make_hip_executable (hipHostRegister hipHostRegister.cpp) make_hip_executable (hipRandomMemcpyAsync hipRandomMemcpyAsync.cpp) make_hip_executable (hipMemoryAllocate hipMemoryAllocate.cpp) make_hip_executable (hipFuncSetDeviceFlags hipFuncSetDeviceFlags.cpp) From 7c92c9cc22afc4fd05d984b66256fdb53bb00bb3 Mon Sep 17 00:00:00 2001 From: Ben Sander Date: Mon, 11 Apr 2016 12:52:18 -0500 Subject: [PATCH 30/40] P2p checkpoint. - set USE_PEER_TO_PEER=3 (requires HCC "am_memtracker_update_peers") - when enabling peer, turn it on for previously allocated memory. - hipDeviceCanAccessPeer is no longer self-ware (self does not qualify as a peer) - device peerlist always includes self, so when we call allow_access we never remove self access. - hipDeviceReset() removes old peer mappings. [ROCm/hip commit: 1f53c55d3e5564e5d71160a4ef4dcc31ef9b0591] --- projects/hip/include/hcc_detail/hip_hcc.h | 3 +- .../hip/include/hcc_detail/hip_runtime_api.h | 37 +++-- .../hip/samples/1_Utils/hipInfo/hipInfo.cpp | 2 +- projects/hip/src/hip_device.cpp | 1 + projects/hip/src/hip_hcc.cpp | 26 ++- projects/hip/src/hip_memory.cpp | 5 +- projects/hip/src/hip_peer.cpp | 21 +-- projects/hip/tests/src/CMakeLists.txt | 12 +- .../hip/tests/src/hipPeerToPeer_simple.cpp | 149 +++++++++++++++--- 9 files changed, 202 insertions(+), 54 deletions(-) diff --git a/projects/hip/include/hcc_detail/hip_hcc.h b/projects/hip/include/hcc_detail/hip_hcc.h index 1dd8777b1c..0d24862764 100644 --- a/projects/hip/include/hcc_detail/hip_hcc.h +++ b/projects/hip/include/hcc_detail/hip_hcc.h @@ -38,7 +38,7 @@ THE SOFTWARE. // Compile peer-to-peer support. // >= 2 : use HCC hc:accelerator::get_is_peer // >= 3 : use hc::am_memtracker_update_peers(...) -#define USE_PEER_TO_PEER 2 +#define USE_PEER_TO_PEER 3 //#define INLINE static inline @@ -526,6 +526,7 @@ public: bool addPeer(ihipDevice_t *peer); bool removePeer(ihipDevice_t *peer); + void resetPeers(ihipDevice_t *thisDevice); uint32_t peerCnt() const { return _peerCnt; }; hsa_agent_t *peerAgents() const { return _peerAgents; }; diff --git a/projects/hip/include/hcc_detail/hip_runtime_api.h b/projects/hip/include/hcc_detail/hip_runtime_api.h index 2f70ae47c3..58df3a2068 100644 --- a/projects/hip/include/hcc_detail/hip_runtime_api.h +++ b/projects/hip/include/hcc_detail/hip_runtime_api.h @@ -907,10 +907,33 @@ hipError_t hipMemGetInfo (size_t * free, size_t * total) ; * * Returns "1" in @p canAccessPeer if the specified @p device is capable * of directly accessing memory physically located on peerDevice , or "0" if not. + * + * Returns "0" in @p canAccessPeer if deviceId == peerDeviceId, and both are valid devices : a device is not a peer of itself. + * + * + * + * @returns #hipSuccess, + * @returns #hipErrorInvalidDevice if deviceId or peerDeviceId are not valid devices */ hipError_t hipDeviceCanAccessPeer (int* canAccessPeer, int deviceId, int peerDeviceId); +/** + * @brief Enable direct access from current device's virtual address space to memory allocations physically located on a peer device. + * + * Memory which already allocated on peer device will be mapped into the address space of the current device. In addition, all + * future memory allocations on peerDeviceId will be mapped into the address space of the current device when the memory is allocated. + * The peer memory remains accessible from the current device until a call to hipDeviceDisablePeerAccess or hipDeviceReset. + * + * + * @param [in] peerDeviceId + * @param [in] flags + * + * Returns #hipSuccess, #hipErrorInvalidDevice, #hipErrorInvalidValue, + * @returns #hipErrorPeerAccessAlreadyEnabled if peer access is already enabled for this device. + */ +hipError_t hipDeviceEnablePeerAccess (int peerDeviceId, unsigned int flags); + /** * @brief Disable direct access from current device's virtual address space to memory allocations physically located on a peer device. @@ -923,20 +946,6 @@ hipError_t hipDeviceCanAccessPeer (int* canAccessPeer, int deviceId, int peerDev */ hipError_t hipDeviceDisablePeerAccess (int peerDeviceId); -/** - * @brief Enable direct access from current device's virtual address space to memory allocations physically located on a peer device. - * - * Memory which already allocated on peer device will be mapped into the address space of the current device. In addition, all - * future memory allocations on peerDeviceId will be mapped into the address space of the current device when the memory is allocated. - * The peer memory remains accessible from the current device until a call to hipDeviceDisablePeerAccess or @hipDeviceReset. - * - * - * @param [in] peerDeviceId - * @param [in] flags - * - * Returns #hipSuccess, #hipErrorInvalidDevice, #hipErrorInvalidValue, #hipErrorPeerAccessAlreadyEnabled - */ -hipError_t hipDeviceEnablePeerAccess (int peerDeviceId, unsigned int flags); /** * @brief Copies memory from one device to memory on another device. diff --git a/projects/hip/samples/1_Utils/hipInfo/hipInfo.cpp b/projects/hip/samples/1_Utils/hipInfo/hipInfo.cpp index 9151d5058e..581194f624 100644 --- a/projects/hip/samples/1_Utils/hipInfo/hipInfo.cpp +++ b/projects/hip/samples/1_Utils/hipInfo/hipInfo.cpp @@ -127,7 +127,7 @@ void printDeviceProp (int deviceId) for (int i=0; ilocked_waitAllStreams(); diff --git a/projects/hip/src/hip_hcc.cpp b/projects/hip/src/hip_hcc.cpp index ddf6bb1691..a9194c266a 100644 --- a/projects/hip/src/hip_hcc.cpp +++ b/projects/hip/src/hip_hcc.cpp @@ -240,6 +240,15 @@ bool ihipDeviceCriticalBase_t::removePeer(ihipDevice_t *peer) } } + +template<> +void ihipDeviceCriticalBase_t::resetPeers(ihipDevice_t *thisDevice) +{ + _peers.clear(); + _peerCnt = 0; + addPeer(thisDevice); // peer-list always contains self agent. +} + //------------------------------------------------------------------------------------------------- //--- @@ -444,12 +453,18 @@ void ihipDevice_t::locked_reset() // Reset and remove streams: crit->streams().clear(); -#if USE_PEER_TO_PEER==2 - // remove peer mappings to this device? Call removePeer on all other devices? + + +#if USE_PEER_TO_PEER>=2 + // This resest peer list to just me: + crit->resetPeers(this); + #endif // Reset and release all memory stored in the tracker: + // Reset will remove peer mapping so don't need to do this explicitly. am_memtracker_reset(_acc); + }; @@ -474,10 +489,13 @@ void ihipDevice_t::init(unsigned device_index, unsigned deviceCnt, hc::accelerat getProperties(&_props); + _criticalData.init(deviceCnt); + + locked_reset(); + _default_stream = new ihipStream_t(device_index, acc.get_default_view(), hipStreamDefault); locked_addStream(_default_stream); - - _criticalData.init(deviceCnt); + tprintf(DB_SYNC, "created device with default_stream=%p\n", _default_stream); diff --git a/projects/hip/src/hip_memory.cpp b/projects/hip/src/hip_memory.cpp index 9e8c963433..a3409a3e2e 100644 --- a/projects/hip/src/hip_memory.cpp +++ b/projects/hip/src/hip_memory.cpp @@ -132,7 +132,7 @@ hipError_t hipMalloc(void** ptr, size_t sizeBytes) hc::am_memtracker_update(*ptr, device->_device_index, 0); { LockedAccessor_DeviceCrit_t crit(device->criticalData()); - if (crit->peerCnt()) { + if (crit->peerCnt() > 1) { // peerCnt includes self so only call allow_access if other peers involved: hsa_status_t hsa_status = hsa_amd_agents_allow_access(crit->peerCnt(), crit->peerAgents(), NULL, *ptr); if (hsa_status != HSA_STATUS_SUCCESS) { hip_status = hipErrorMemoryAllocation; @@ -173,8 +173,9 @@ hipError_t hipHostMalloc(void** ptr, size_t sizeBytes, unsigned int flags) }else{ hc::am_memtracker_update(*ptr, device->_device_index, flags); { + // TODO - allow_access only works for device memory, need to change am_alloc to allocate host directly. LockedAccessor_DeviceCrit_t crit(device->criticalData()); - if (crit->peerCnt()) { + if (crit->peerCnt() > 1) { // peerCnt includes self so only call allow_access if other peers involved: hsa_status_t hsa_status = hsa_amd_agents_allow_access(crit->peerCnt(), crit->peerAgents(), NULL, *ptr); if (hsa_status != HSA_STATUS_SUCCESS) { hip_status = hipErrorMemoryAllocation; diff --git a/projects/hip/src/hip_peer.cpp b/projects/hip/src/hip_peer.cpp index aeb9c7fd49..612cd6e309 100644 --- a/projects/hip/src/hip_peer.cpp +++ b/projects/hip/src/hip_peer.cpp @@ -17,6 +17,8 @@ OUT OF OR INN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */ +#include + #include "hip_runtime.h" #include "hcc_detail/hip_hcc.h" #include "hcc_detail/trace_helper.h" @@ -35,11 +37,15 @@ hipError_t hipDeviceCanAccessPeer (int* canAccessPeer, int deviceId, int peerDe auto peerDevice = ihipGetDevice(peerDeviceId); if ((thisDevice != NULL) && (peerDevice != NULL)) { + if (deviceId == peerDeviceId) { + *canAccessPeer = 0; + } else { #if USE_PEER_TO_PEER>=2 - *canAccessPeer = peerDevice->_acc.get_is_peer(thisDevice->_acc); + *canAccessPeer = peerDevice->_acc.get_is_peer(thisDevice->_acc); #else - *canAccessPeer = 0; + *canAccessPeer = 0; #endif + } } else { *canAccessPeer = 0; @@ -69,15 +75,15 @@ hipError_t hipDeviceDisablePeerAccess (int peerDeviceId) #endif if (! canAccessPeer) { err = hipErrorInvalidDevice; // P2P not allowed between these devices. + } else if (thisDevice == peerDevice) { + err = hipErrorInvalidDevice; // Can't disable peer access to self. } else { - - LockedAccessor_DeviceCrit_t crit(thisDevice->criticalData()); bool changed = crit->removePeer(peerDevice); if (changed) { #if USE_PEER_TO_PEER>=3 // Update the peers for all memory already saved in the tracker: - am_memtracker_update_peers(device->_acc, crit->peerCnt(), crit->peerAgents()); + am_memtracker_update_peers(thisDevice->_acc, crit->peerCnt(), crit->peerAgents()); #endif } else { err = hipErrorPeerAccessNotEnabled; // never enabled P2P access. @@ -92,9 +98,6 @@ hipError_t hipDeviceDisablePeerAccess (int peerDeviceId) }; -/** - * @warning Need to update this function when RT supports P2P - */ //--- // Enable registering memory on peerDevice for direct access from the current device. hipError_t hipDeviceEnablePeerAccess (int peerDeviceId, unsigned int flags) @@ -113,7 +116,7 @@ hipError_t hipDeviceEnablePeerAccess (int peerDeviceId, unsigned int flags) bool isNewPeer = crit->addPeer(peerDevice); if (isNewPeer) { #if USE_PEER_TO_PEER>=3 - am_memtracker_update_peers(device->_acc, crit->peerCnt(), crit->peerAgents()); + am_memtracker_update_peers(thisDevice->_acc, crit->peerCnt(), crit->peerAgents()); #endif } else { err = hipErrorPeerAccessAlreadyEnabled; diff --git a/projects/hip/tests/src/CMakeLists.txt b/projects/hip/tests/src/CMakeLists.txt index 28ea1bf10b..a75a48076c 100644 --- a/projects/hip/tests/src/CMakeLists.txt +++ b/projects/hip/tests/src/CMakeLists.txt @@ -8,6 +8,10 @@ include_directories( ${PROJECT_SOURCE_DIR}/include ) set (HIP_Unit_Test_VERSION_MAJOR 1) set (HIP_Unit_Test_VERSION_MINOR 0) +if(NOT DEFINED HIP_MULTI_GPU) + set(HIP_MULTI_GPU 0 CACHE BOOL "Run tests requiring more than one GPU") +endif() + if(NOT DEFINED HIP_BUILD_LOCAL) if(NOT DEFINED ENV{HIP_BUILD_LOCAL}) set(HIP_BUILD_LOCAL 0 CACHE BOOL "Build HIP in local folder") @@ -218,6 +222,12 @@ make_test(hipFuncDeviceSynchronize " ") make_named_test (hipMultiThreadDevice "hipMultiThreadDevice-serial" --tests 0x1) make_named_test (hipMultiThreadDevice "hipMultiThreadDevice-pyramid" --tests 0x4) make_named_test (hipMultiThreadDevice "hipMultiThreadDevice-nearzero" --tests 0x10) -make_test(hipPeerToPeer_simple " " ) + +if (${HIP_MULTI_GPU}) + make_test(hipPeerToPeer_simple ) # use current device for copy, this fails. + make_test(hipPeerToPeer_simple --memcpyWithPeer) + make_test(hipPeerToPeer_simple --mirrorPeers) # mirror mapping: test to ensure mirror doesn't destroy orig mapping. + +endif() make_hipify_test(specialFunc.cu ) diff --git a/projects/hip/tests/src/hipPeerToPeer_simple.cpp b/projects/hip/tests/src/hipPeerToPeer_simple.cpp index 4aaa6a452b..5bfb583f3f 100644 --- a/projects/hip/tests/src/hipPeerToPeer_simple.cpp +++ b/projects/hip/tests/src/hipPeerToPeer_simple.cpp @@ -1,4 +1,3 @@ - /* Copyright (c) 2015-2016 Advanced Micro Devices, Inc. All rights reserved. @@ -52,9 +51,11 @@ void parseMyArguments(int argc, char *argv[]) }; -int main(int argc, char *argv[]) +//--- +// Test which enables peer2peer first, then allocates the memory. +void enablePeerFirst() { - parseMyArguments(argc, argv); + printf ("\n==testing: %s\n", __func__); int deviceCnt; @@ -74,6 +75,110 @@ int main(int argc, char *argv[]) assert(canAccessPeer); + HIPCHECK (hipSetDevice(currentDevice)); + HIPCHECK(hipDeviceReset()); + HIPCHECK (hipSetDevice(peerDevice)); + HIPCHECK(hipDeviceReset()); + + HIPCHECK(hipSetDevice(currentDevice)); + HIPCHECK(hipDeviceEnablePeerAccess(peerDevice, 0)); + + if (p_mirrorPeers) { + int canAccessPeer; + HIPCHECK(hipDeviceCanAccessPeer(&canAccessPeer, peerDevice, currentDevice)); + assert(canAccessPeer); + + HIPCHECK(hipSetDevice(peerDevice)); + HIPCHECK(hipDeviceEnablePeerAccess(currentDevice, 0)); + } + + size_t Nbytes = N*sizeof(char); + + char *A_d0, *A_d1; + char *A_h; + + A_h = (char*)malloc(Nbytes); + + // allocate and initialize memory on device0 + HIPCHECK (hipSetDevice(currentDevice)); + HIPCHECK (hipMalloc(&A_d0, Nbytes) ); + HIPCHECK ( hipMemset(A_d0, memsetval, Nbytes) ); + + // allocate and initialize memory on peer device + HIPCHECK (hipSetDevice(peerDevice)); + HIPCHECK (hipMalloc(&A_d1, Nbytes) ); + HIPCHECK ( hipMemset(A_d1, 0x13, Nbytes) ); + + + + // Device0 push to device1, using P2P: + HIPCHECK (hipSetDevice(p_memcpyWithPeer ? peerDevice : currentDevice)); + HIPCHECK (hipMemcpy(A_d1, A_d0, Nbytes, hipMemcpyDefault)); + + // Copy data back to host: + HIPCHECK (hipSetDevice(peerDevice)); + HIPCHECK (hipMemcpy(A_h, A_d1, Nbytes, hipMemcpyDeviceToHost)); + + // Check host data: + for (int i=0; i Date: Mon, 11 Apr 2016 13:46:53 -0500 Subject: [PATCH 31/40] add simple negative P2P tests [ROCm/hip commit: c40c36aff7831461f77181035fd9aa6d110d2935] --- projects/hip/tests/src/CMakeLists.txt | 2 +- .../hip/tests/src/hipMultiThreadDevice.cpp | 4 +- .../hip/tests/src/hipPeerToPeer_simple.cpp | 162 ++++++++++-------- 3 files changed, 97 insertions(+), 71 deletions(-) diff --git a/projects/hip/tests/src/CMakeLists.txt b/projects/hip/tests/src/CMakeLists.txt index a75a48076c..c13f90a8fe 100644 --- a/projects/hip/tests/src/CMakeLists.txt +++ b/projects/hip/tests/src/CMakeLists.txt @@ -224,7 +224,7 @@ make_named_test (hipMultiThreadDevice "hipMultiThreadDevice-pyramid" --tests 0x4 make_named_test (hipMultiThreadDevice "hipMultiThreadDevice-nearzero" --tests 0x10) if (${HIP_MULTI_GPU}) - make_test(hipPeerToPeer_simple ) # use current device for copy, this fails. + make_test(hipPeerToPeer_simple " ") # use current device for copy, this fails. make_test(hipPeerToPeer_simple --memcpyWithPeer) make_test(hipPeerToPeer_simple --mirrorPeers) # mirror mapping: test to ensure mirror doesn't destroy orig mapping. diff --git a/projects/hip/tests/src/hipMultiThreadDevice.cpp b/projects/hip/tests/src/hipMultiThreadDevice.cpp index a1f64aceb3..d9afda59d0 100644 --- a/projects/hip/tests/src/hipMultiThreadDevice.cpp +++ b/projects/hip/tests/src/hipMultiThreadDevice.cpp @@ -116,12 +116,12 @@ int main(int argc, char *argv[]) /*disable, this takess a while and if the next one works then no need to run serial*/ if (1 && (p_tests & 0x2)) { printf ("\ntest 0x2 : serialized multiThread_pyramid(1) \n"); - multiThread_pyramid(true, 10); + multiThread_pyramid(true, 3); } if (p_tests & 0x4) { printf ("\ntest 0x4 : parallel multiThread_pyramid(1) \n"); - multiThread_pyramid(false, 10); + multiThread_pyramid(false, 3); } //if (p_tests & 0x8) { diff --git a/projects/hip/tests/src/hipPeerToPeer_simple.cpp b/projects/hip/tests/src/hipPeerToPeer_simple.cpp index 5bfb583f3f..2c0dd95b36 100644 --- a/projects/hip/tests/src/hipPeerToPeer_simple.cpp +++ b/projects/hip/tests/src/hipPeerToPeer_simple.cpp @@ -29,6 +29,10 @@ bool p_memcpyWithPeer = false; // use the peer device for the P2P copy bool p_mirrorPeers = false; // in addition to mapping current to peer space, map peer to current space. int p_peerDevice = -1; // explicly specify which peer to use, else use p_gpuDevice + 1. + +int g_currentDevice; +int g_peerDevice; + void parseMyArguments(int argc, char *argv[]) { int more_argc = HipTest::parseStandardArguments(argc, argv, false); @@ -51,45 +55,50 @@ void parseMyArguments(int argc, char *argv[]) }; +// Sets globals g_currentDevice, g_peerDevice +void setupPeerTests() +{ + int deviceCnt; + + HIPCHECK(hipGetDeviceCount(&deviceCnt)); + + g_currentDevice = p_gpuDevice; + g_peerDevice = (p_peerDevice == -1) ? ((g_currentDevice + 1) % deviceCnt) : p_peerDevice; + + printf ("N=%zu device=%d peerDevice=%d (%d devices total)\n", N, g_currentDevice, g_peerDevice, deviceCnt); + + // Must be on a multi-gpu system: + assert (g_currentDevice != g_peerDevice); + + int canAccessPeer; + HIPCHECK(hipDeviceCanAccessPeer(&canAccessPeer, g_currentDevice, g_peerDevice)); + printf ("dev#%d canAccessPeer:#%d=%d\n", g_currentDevice, g_peerDevice, canAccessPeer); + + assert(canAccessPeer); + + HIPCHECK (hipSetDevice(g_currentDevice)); + HIPCHECK(hipDeviceReset()); + HIPCHECK (hipSetDevice(g_peerDevice)); + HIPCHECK(hipDeviceReset()); +} + //--- // Test which enables peer2peer first, then allocates the memory. void enablePeerFirst() { printf ("\n==testing: %s\n", __func__); - int deviceCnt; - HIPCHECK(hipGetDeviceCount(&deviceCnt)); - - int currentDevice = p_gpuDevice; - int peerDevice = (p_peerDevice == -1) ? ((currentDevice + 1) % deviceCnt) : p_peerDevice; - - printf ("N=%zu device=%d peerDevice=%d (%d devices total)\n", N, currentDevice, peerDevice, deviceCnt); - - // Must be on a multi-gpu system: - assert (currentDevice != peerDevice); - - int canAccessPeer; - HIPCHECK(hipDeviceCanAccessPeer(&canAccessPeer, currentDevice, peerDevice)); - printf ("dev#%d canAccessPeer:#%d=%d\n", currentDevice, peerDevice, canAccessPeer); - - assert(canAccessPeer); - - HIPCHECK (hipSetDevice(currentDevice)); - HIPCHECK(hipDeviceReset()); - HIPCHECK (hipSetDevice(peerDevice)); - HIPCHECK(hipDeviceReset()); - - HIPCHECK(hipSetDevice(currentDevice)); - HIPCHECK(hipDeviceEnablePeerAccess(peerDevice, 0)); + HIPCHECK(hipSetDevice(g_currentDevice)); + HIPCHECK(hipDeviceEnablePeerAccess(g_peerDevice, 0)); if (p_mirrorPeers) { int canAccessPeer; - HIPCHECK(hipDeviceCanAccessPeer(&canAccessPeer, peerDevice, currentDevice)); + HIPCHECK(hipDeviceCanAccessPeer(&canAccessPeer, g_peerDevice, g_currentDevice)); assert(canAccessPeer); - HIPCHECK(hipSetDevice(peerDevice)); - HIPCHECK(hipDeviceEnablePeerAccess(currentDevice, 0)); + HIPCHECK(hipSetDevice(g_peerDevice)); + HIPCHECK(hipDeviceEnablePeerAccess(g_currentDevice, 0)); } size_t Nbytes = N*sizeof(char); @@ -100,23 +109,23 @@ void enablePeerFirst() A_h = (char*)malloc(Nbytes); // allocate and initialize memory on device0 - HIPCHECK (hipSetDevice(currentDevice)); + HIPCHECK (hipSetDevice(g_currentDevice)); HIPCHECK (hipMalloc(&A_d0, Nbytes) ); HIPCHECK ( hipMemset(A_d0, memsetval, Nbytes) ); // allocate and initialize memory on peer device - HIPCHECK (hipSetDevice(peerDevice)); + HIPCHECK (hipSetDevice(g_peerDevice)); HIPCHECK (hipMalloc(&A_d1, Nbytes) ); HIPCHECK ( hipMemset(A_d1, 0x13, Nbytes) ); // Device0 push to device1, using P2P: - HIPCHECK (hipSetDevice(p_memcpyWithPeer ? peerDevice : currentDevice)); + HIPCHECK (hipSetDevice(p_memcpyWithPeer ? g_peerDevice : g_currentDevice)); HIPCHECK (hipMemcpy(A_d1, A_d0, Nbytes, hipMemcpyDefault)); // Copy data back to host: - HIPCHECK (hipSetDevice(peerDevice)); + HIPCHECK (hipSetDevice(g_peerDevice)); HIPCHECK (hipMemcpy(A_h, A_d1, Nbytes, hipMemcpyDeviceToHost)); // Check host data: @@ -128,35 +137,14 @@ void enablePeerFirst() } - //--- - // Test which allocated memory first, then enables peer2peer. - // Enabling peer needs to scan all allocated memory and enable peer access. - void allocMemoryFirst() - { - printf ("\n==testing: %s\n", __func__); - int deviceCnt; - - HIPCHECK(hipGetDeviceCount(&deviceCnt)); - - int currentDevice = p_gpuDevice; - int peerDevice = (p_peerDevice == -1) ? ((currentDevice + 1) % deviceCnt) : p_peerDevice; - - printf ("N=%zu device=%d peerDevice=%d (%d devices total)\n", N, currentDevice, peerDevice, deviceCnt); - - // Must be on a multi-gpu system: - assert (currentDevice != peerDevice); - - int canAccessPeer; - HIPCHECK(hipDeviceCanAccessPeer(&canAccessPeer, currentDevice, peerDevice)); - printf ("dev#%d canAccessPeer:#%d=%d\n", currentDevice, peerDevice, canAccessPeer); - - assert(canAccessPeer); - - HIPCHECK (hipSetDevice(currentDevice)); - HIPCHECK(hipDeviceReset()); - HIPCHECK (hipSetDevice(peerDevice)); - HIPCHECK(hipDeviceReset()); +//--- +// Test which allocated memory first, then enables peer2peer. +// Enabling peer needs to scan all allocated memory and enable peer access. +void allocMemoryFirst() +{ + printf ("\n==testing: %s\n", __func__); + setupPeerTests(); size_t Nbytes = N*sizeof(char); @@ -167,39 +155,39 @@ void enablePeerFirst() //--- // allocate and initialize memory on device0 - HIPCHECK (hipSetDevice(currentDevice)); + HIPCHECK (hipSetDevice(g_currentDevice)); HIPCHECK (hipMalloc(&A_d0, Nbytes) ); HIPCHECK ( hipMemset(A_d0, memsetval, Nbytes) ); // allocate and initialize memory on peer device - HIPCHECK (hipSetDevice(peerDevice)); + HIPCHECK (hipSetDevice(g_peerDevice)); HIPCHECK (hipMalloc(&A_d1, Nbytes) ); HIPCHECK ( hipMemset(A_d1, 0x13, Nbytes) ); //--- //Enable peer access, for memory already allocated: - HIPCHECK(hipSetDevice(currentDevice)); - HIPCHECK(hipDeviceEnablePeerAccess(peerDevice, 0)); + HIPCHECK(hipSetDevice(g_currentDevice)); + HIPCHECK(hipDeviceEnablePeerAccess(g_peerDevice, 0)); if (p_mirrorPeers) { int canAccessPeer; - HIPCHECK(hipDeviceCanAccessPeer(&canAccessPeer, peerDevice, currentDevice)); + HIPCHECK(hipDeviceCanAccessPeer(&canAccessPeer, g_peerDevice, g_currentDevice)); assert(canAccessPeer); - HIPCHECK(hipSetDevice(peerDevice)); - HIPCHECK(hipDeviceEnablePeerAccess(currentDevice, 0)); + HIPCHECK(hipSetDevice(g_peerDevice)); + HIPCHECK(hipDeviceEnablePeerAccess(g_currentDevice, 0)); } //--- // Copies to test functionality: // Device0 push to device1, using P2P: - HIPCHECK (hipSetDevice(p_memcpyWithPeer ? peerDevice : currentDevice)); + HIPCHECK (hipSetDevice(p_memcpyWithPeer ? g_peerDevice : g_currentDevice)); HIPCHECK (hipMemcpy(A_d1, A_d0, Nbytes, hipMemcpyDefault)); // Copy data back to host: - HIPCHECK (hipSetDevice(peerDevice)); + HIPCHECK (hipSetDevice(g_peerDevice)); HIPCHECK (hipMemcpy(A_h, A_d1, Nbytes, hipMemcpyDeviceToHost)); @@ -212,6 +200,40 @@ void enablePeerFirst() } } +void simpleNegative() +{ + printf ("\n==testing: %s\n", __func__); + + setupPeerTests(); + + int deviceId; + HIPCHECK (hipGetDevice(&deviceId)); + + //--- + //-- self is not a peer + int canAccessPeer; + hipError_t e = hipDeviceCanAccessPeer(&canAccessPeer, deviceId, deviceId); + HIPASSERT( e == hipSuccess); // no error returned, it doesn't hurt to ask. + HIPASSERT (canAccessPeer == 0); // but self is not a peer. + + e = hipSuccess; + //--- + // Enable same device twice in a row: + HIPCHECK(hipSetDevice(g_currentDevice)); + HIPCHECK(hipDeviceEnablePeerAccess(g_peerDevice, 0)); + e =(hipDeviceEnablePeerAccess(g_peerDevice, 0)); + HIPASSERT (e == hipErrorPeerAccessAlreadyEnabled); + + //--- + // try disabling twice in a row + HIPCHECK(hipDeviceDisablePeerAccess(g_peerDevice)); + e =(hipDeviceDisablePeerAccess(g_peerDevice)); + HIPASSERT (e == hipErrorPeerAccessNotEnabled); + + + // More tests here: +} + int main(int argc, char *argv[]) @@ -226,5 +248,9 @@ int main(int argc, char *argv[]) allocMemoryFirst(); } + if (p_tests & 0x4) { + simpleNegative(); + } + passed(); } From 7dbb93af4e774c384b31f614a58db74ad268f11f Mon Sep 17 00:00:00 2001 From: Maneesh Gupta Date: Tue, 12 Apr 2016 12:42:12 +0530 Subject: [PATCH 32/40] Refactor and add support for nvcc path in cmake [ROCm/hip commit: 119de66cd28fa1125b1e8ab6376ddbafeab58191] --- projects/hip/CMakeLists.txt | 158 +++++++++++++++++++++++------------- 1 file changed, 101 insertions(+), 57 deletions(-) diff --git a/projects/hip/CMakeLists.txt b/projects/hip/CMakeLists.txt index e4041a5468..124295fbf7 100644 --- a/projects/hip/CMakeLists.txt +++ b/projects/hip/CMakeLists.txt @@ -1,40 +1,65 @@ -cmake_minimum_required(VERSION 2.6) +cmake_minimum_required(VERSION 2.8.3) project(hip_hcc) +############################# +# Setup version information +############################# set(HIP_VERSION_MAJOR "0") set(HIP_VERSION_MINOR "84") set(HIP_VERSION_PATCH "0") +############################# +# Configure variables +############################# +# Determine HIP_PLATFORM +if(NOT DEFINED HIP_PLATFORM) + if(NOT DEFINED ENV{HIP_PLATFORM}) + execute_process(COMMAND ${CMAKE_CURRENT_SOURCE_DIR}/bin/hipconfig --platform + OUTPUT_VARIABLE HIP_PLATFORM + OUTPUT_STRIP_TRAILING_WHITESPACE) + else() + set(HIP_PLATFORM $ENV{HIP_PLATFORM} CACHE STRING "HIP Platform") + endif() +endif() +message(STATUS "HIP Platform: " ${HIP_PLATFORM}) + +# If HIP_PLATFORM is hcc, we need HCC_HOME and HSA_PATH to be defined +if(HIP_PLATFORM STREQUAL "hcc") + # Determine HCC_HOME + if(NOT DEFINED HCC_HOME) + if(NOT DEFINED ENV{HCC_HOME}) + set(HCC_HOME "/opt/hcc" CACHE PATH "Path to which HCC has been installed") + else() + set(HCC_HOME $ENV{HCC_HOME} CACHE PATH "Path to which HCC has been installed") + endif() + endif() + if(IS_ABSOLUTE ${HCC_HOME} AND EXISTS ${HCC_HOME} AND IS_DIRECTORY ${HCC_HOME}) + message(STATUS "Looking for HCC in: " ${HCC_HOME}) + else() + message(FATAL_ERROR "Don't know where to find HCC. Please specify abolute path using -DHCC_HOME") + endif() + + # Determine HSA_PATH + if(NOT DEFINED HSA_PATH) + if(NOT DEFINED ENV{HSA_PATH}) + set(HSA_PATH "/opt/hsa" CACHE PATH "Path to which HSA runtime has been installed") + else() + set(HSA_PATH $ENV{HSA_PATH} CACHE PATH "Path to which HSA runtime has been installed") + endif() + endif() + if(IS_ABSOLUTE ${HSA_PATH} AND EXISTS ${HSA_PATH} AND IS_DIRECTORY ${HSA_PATH}) + message(STATUS "Looking for HSA runtime in: " ${HSA_PATH}) + else() + message(FATAL_ERROR "Don't know where to find HSA runtime. Please specify absolute path using -DHSA_PATH") + endif() +endif() + +# Set default build type if(NOT CMAKE_BUILD_TYPE) set(CMAKE_BUILD_TYPE "Release") endif() -if(NOT DEFINED HCC_HOME) - if(NOT DEFINED ENV{HCC_HOME}) - set(HCC_HOME "/opt/hcc" CACHE PATH "Path to which HCC has been installed") - else() - set(HCC_HOME $ENV{HCC_HOME} CACHE PATH "Path to which HCC has been installed") - endif() -endif() -if(IS_ABSOLUTE ${HCC_HOME} AND EXISTS ${HCC_HOME} AND IS_DIRECTORY ${HCC_HOME}) - message(STATUS "Looking for HCC in: " ${HCC_HOME}) -else() - message(FATAL_ERROR "Don't know where to find HCC. Please specify abolute path using -DHCC_HOME") -endif() - -if(NOT DEFINED HSA_PATH) - if(NOT DEFINED ENV{HSA_PATH}) - set(HSA_PATH "/opt/hsa" CACHE PATH "Path to which HSA runtime has been installed") - else() - set(HSA_PATH $ENV{HSA_PATH} CACHE PATH "Path to which HSA runtime has been installed") - endif() -endif() -if(IS_ABSOLUTE ${HSA_PATH} AND EXISTS ${HSA_PATH} AND IS_DIRECTORY ${HSA_PATH}) - message(STATUS "Looking for HSA runtime in: " ${HSA_PATH}) -else() - message(FATAL_ERROR "Don't know where to find HSA runtime. Please specify absolute path using -DHSA_PATH") -endif() - +# Determine HIP install path if(CMAKE_INSTALL_PREFIX_INITIALIZED_TO_DEFAULT AND CMAKE_INSTALL_PREFIX MATCHES "/usr/local") if(CMAKE_BUILD_TYPE MATCHES Debug) set(CMAKE_INSTALL_PREFIX ${CMAKE_CURRENT_SOURCE_DIR} CACHE PATH "Installation path for HIP" FORCE) @@ -50,51 +75,70 @@ else() message(FATAL_ERROR "Don't know where to install HIP. Please specify absolute path using -DCMAKE_INSTALL_PREFIX") endif() -include_directories(${PROJECT_SOURCE_DIR}/include) - -set(CMAKE_CXX_COMPILER "${HCC_HOME}/bin/hcc") -set(CMAKE_C_COMPILER "${HCC_HOME}/bin/hcc") - -set(CMAKE_CXX_FLAGS " -hc -I${HCC_HOME}/include -I${HSA_PATH}/include -stdlib=libc++ ") -set(CMAKE_C_FLAGS " -hc -I${HCC_HOME}/include -I${HSA_PATH}/include -stdlib=libc++ ") - -set(SOURCE_FILES src/device_util.cpp -src/hip_hcc.cpp -src/hip_device.cpp -src/hip_error.cpp -src/hip_event.cpp -src/hip_memory.cpp -src/hip_peer.cpp -src/hip_stream.cpp -src/staging_buffer.cpp) - +# Set if we need to build shared or static library if(NOT DEFINED ENV{HIP_USE_SHARED_LIBRARY}) set(HIP_USE_SHARED_LIBRARY 0) else() set(HIP_USE_SHARED_LIBRARY $ENV{HIP_USE_SHARED_LIBRARY}) endif() -#add_library(hip_hcc STATIC ${SOURCE_FILES}) -if(${HIP_USE_SHARED_LIBRARY} EQUAL 1) - add_library(hip_hcc SHARED ${SOURCE_FILES}) -else() - add_library(hip_hcc OBJECT ${SOURCE_FILES}) +############################# +# Build steps +############################# +# Build hip_hcc if platform is hcc +if(HIP_PLATFORM STREQUAL "hcc") + include_directories(${PROJECT_SOURCE_DIR}/include) + + set(CMAKE_CXX_COMPILER "${HCC_HOME}/bin/hcc") + set(CMAKE_C_COMPILER "${HCC_HOME}/bin/hcc") + + set(CMAKE_CXX_FLAGS " -hc -I${HCC_HOME}/include -I${HSA_PATH}/include -stdlib=libc++ ") + set(CMAKE_C_FLAGS " -hc -I${HCC_HOME}/include -I${HSA_PATH}/include -stdlib=libc++ ") + + set(SOURCE_FILES src/device_util.cpp + src/hip_hcc.cpp + src/hip_device.cpp + src/hip_error.cpp + src/hip_event.cpp + src/hip_memory.cpp + src/hip_peer.cpp + src/hip_stream.cpp + src/staging_buffer.cpp) + + if(${HIP_USE_SHARED_LIBRARY} EQUAL 1) + add_library(hip_hcc SHARED ${SOURCE_FILES}) + else() + #add_library(hip_hcc STATIC ${SOURCE_FILES}) + add_library(hip_hcc OBJECT ${SOURCE_FILES}) + endif() + endif() -#install(TARGETS hip_hcc DESTINATION lib) -if(${HIP_USE_SHARED_LIBRARY} EQUAL 1) - install(TARGETS hip_hcc DESTINATION lib) -else() - install(DIRECTORY ${PROJECT_BINARY_DIR}/CMakeFiles/hip_hcc.dir/src/ DESTINATION lib) +############################# +# Install steps +############################# +# Install hip_hcc if platform is hcc +if(HIP_PLATFORM STREQUAL "hcc") + if(${HIP_USE_SHARED_LIBRARY} EQUAL 1) + install(TARGETS hip_hcc DESTINATION lib) + else() + #install(TARGETS hip_hcc DESTINATION lib) + install(DIRECTORY ${PROJECT_BINARY_DIR}/CMakeFiles/hip_hcc.dir/src/ DESTINATION lib) + endif() endif() -set(EXECUTE_COMMAND test ${CMAKE_INSTALL_PREFIX} -ef ${CMAKE_CURRENT_SOURCE_DIR}) -execute_process(COMMAND ${EXECUTE_COMMAND} RESULT_VARIABLE INSTALL_SOURCE) + +# Install src, bin, include if necessary +execute_process(COMMAND test ${CMAKE_INSTALL_PREFIX} -ef ${CMAKE_CURRENT_SOURCE_DIR} + RESULT_VARIABLE INSTALL_SOURCE) if(NOT ${INSTALL_SOURCE} EQUAL 0) install(DIRECTORY src DESTINATION .) install(DIRECTORY bin DESTINATION . USE_SOURCE_PERMISSIONS) install(DIRECTORY include DESTINATION .) endif() +############################# +# Packaging steps +############################# set(CPACK_SET_DESTDIR TRUE) set(CPACK_INSTALL_PREFIX "/opt/rocm/hip") set(CPACK_PACKAGE_NAME "hip") From 10bc7e683f3537b8e87aae5095ff532fdab6115b Mon Sep 17 00:00:00 2001 From: Aditya Atluri Date: Tue, 12 Apr 2016 09:22:03 -0500 Subject: [PATCH 33/40] Removed pragma once [ROCm/hip commit: 4dfe27e8b51339e188c467f7b5883c353f570e3c] --- projects/hip/include/hcc_detail/hip_runtime.h | 6 ++++-- projects/hip/include/hcc_detail/hip_runtime_api.h | 7 +++++-- projects/hip/include/hcc_detail/hip_texture.h | 10 +++++++++- projects/hip/include/hcc_detail/hip_vector_types.h | 6 ++++++ projects/hip/include/hcc_detail/host_defines.h | 5 +++++ projects/hip/include/hcc_detail/staging_buffer.h | 6 +++++- projects/hip/include/hcc_detail/trace_helper.h | 7 ++++++- 7 files changed, 40 insertions(+), 7 deletions(-) diff --git a/projects/hip/include/hcc_detail/hip_runtime.h b/projects/hip/include/hcc_detail/hip_runtime.h index 6a69c0441b..aa420e992d 100644 --- a/projects/hip/include/hcc_detail/hip_runtime.h +++ b/projects/hip/include/hcc_detail/hip_runtime.h @@ -24,7 +24,9 @@ THE SOFTWARE. * @brief Contains definitions of APIs for HIP runtime. */ -#pragma once +//#pragma once +#ifndef HIP_RUNTIME_H +#define HIP_RUNTIME_H //--- // Top part of file can be compiled with any compiler @@ -574,4 +576,4 @@ do {\ */ - +#endif diff --git a/projects/hip/include/hcc_detail/hip_runtime_api.h b/projects/hip/include/hcc_detail/hip_runtime_api.h index 58df3a2068..013597bba6 100644 --- a/projects/hip/include/hcc_detail/hip_runtime_api.h +++ b/projects/hip/include/hcc_detail/hip_runtime_api.h @@ -19,8 +19,9 @@ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */ -#pragma once - +//#pragma once +#ifndef HIP_RUNTIME_API_H +#define HIP_RUNTIME_API_H /** * @file hcc_detail/hip_runtime_api.h * @brief Contains C function APIs for HIP runtime. This file does not use any HCC builtin or special language extensions (-hc mode) ; those functions in hip_runtime.h. @@ -1065,3 +1066,5 @@ hipError_t hipDriverGetVersion(int *driverVersion) ; /** * @} */ + +#endif diff --git a/projects/hip/include/hcc_detail/hip_texture.h b/projects/hip/include/hcc_detail/hip_texture.h index 1a20f1960b..53a6acf2bf 100644 --- a/projects/hip/include/hcc_detail/hip_texture.h +++ b/projects/hip/include/hcc_detail/hip_texture.h @@ -19,7 +19,12 @@ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */ -#pragma once + +//#pragma once + +#ifndef HIP_TEXTURE_H +#define HIP_TEXTURE_H + /** * @file hcc_detail/hip_texture.h * @brief HIP C++ Texture API for hcc compiler @@ -201,3 +206,6 @@ hipError_t hipUnbindTexture(struct texture *tex) /** * @} */ + +#endif + diff --git a/projects/hip/include/hcc_detail/hip_vector_types.h b/projects/hip/include/hcc_detail/hip_vector_types.h index 7d3ed98431..50030e4756 100644 --- a/projects/hip/include/hcc_detail/hip_vector_types.h +++ b/projects/hip/include/hcc_detail/hip_vector_types.h @@ -25,6 +25,9 @@ THE SOFTWARE. * @brief Defines the different newt vector types for HIP runtime. */ +#ifndef HIP_VECTOR_TYPES_H +#define HIP_VECTOR_TYPES_H + #if defined (__HCC__) && (__hcc_workweek__ < 16032) #error("This version of HIP requires a newer version of HCC."); #endif @@ -196,3 +199,6 @@ TWO_COMPONENT_ACCESS (double, double2); THREE_COMPONENT_ACCESS(double, double3); FOUR_COMPONENT_ACCESS (double, double4); */ + +#endif + diff --git a/projects/hip/include/hcc_detail/host_defines.h b/projects/hip/include/hcc_detail/host_defines.h index 6e12e26e0b..dd9d60fc27 100644 --- a/projects/hip/include/hcc_detail/host_defines.h +++ b/projects/hip/include/hcc_detail/host_defines.h @@ -25,6 +25,9 @@ THE SOFTWARE. * @brief TODO-doc */ +#ifndef HOST_DEFINES_H +#define HOST_DEFINES_H + #ifdef __HCC__ /** * Function and kernel markers @@ -67,3 +70,5 @@ THE SOFTWARE. #define __constant__ #endif + +#endif diff --git a/projects/hip/include/hcc_detail/staging_buffer.h b/projects/hip/include/hcc_detail/staging_buffer.h index 7152bb2216..fe53f8474d 100644 --- a/projects/hip/include/hcc_detail/staging_buffer.h +++ b/projects/hip/include/hcc_detail/staging_buffer.h @@ -17,7 +17,9 @@ OUT OF OR INN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */ -#pragma once +//#pragma once +#ifndef STAGING_BUFFER_H +#define STAGING_BUFFER_H #include "hsa.h" @@ -58,3 +60,5 @@ private: hsa_signal_t _completion_signal[_max_buffers]; std::mutex _copy_lock; // provide thread-safe access }; + +#endif diff --git a/projects/hip/include/hcc_detail/trace_helper.h b/projects/hip/include/hcc_detail/trace_helper.h index 13af1eab26..3740d8b9a7 100644 --- a/projects/hip/include/hcc_detail/trace_helper.h +++ b/projects/hip/include/hcc_detail/trace_helper.h @@ -16,7 +16,10 @@ LIABILITY, WHETHER INN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR INN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */ -#pragma once +//#pragma once + +#ifndef TRACE_HELPER_H +#define TRACE_HELPER_H #include #include @@ -116,3 +119,5 @@ inline std::string ToString(T first, Args... args) { return ToString(first) + ", " + ToString(args...) ; } + +#endif From 181b44df3fc9fad3781edc6aac53eea19058ea67 Mon Sep 17 00:00:00 2001 From: Jack Chung Date: Wed, 13 Apr 2016 13:56:26 +0800 Subject: [PATCH 34/40] Add missing USE_PEER_TO_PEER macro definition [ROCm/hip commit: 210ba67b84996c040cbaf8c121cab4b5370fa928] --- projects/hip/include/hcc_detail/hip_hcc.h | 1 + 1 file changed, 1 insertion(+) diff --git a/projects/hip/include/hcc_detail/hip_hcc.h b/projects/hip/include/hcc_detail/hip_hcc.h index 66388d3b2c..deb9fd0b04 100644 --- a/projects/hip/include/hcc_detail/hip_hcc.h +++ b/projects/hip/include/hcc_detail/hip_hcc.h @@ -38,6 +38,7 @@ THE SOFTWARE. // Compile peer-to-peer support. // >= 2 : use HCC hc:accelerator::get_is_peer // >= 3 : use hc::am_memtracker_update_peers(...) +#define USE_PEER_TO_PEER 0 // Use new lock API in HCC: #define USE_HCC_LOCK 0 From 0618c39648e41a8bd39fa8b0a18f248b360df197 Mon Sep 17 00:00:00 2001 From: Ben Sander Date: Wed, 13 Apr 2016 17:32:38 -0500 Subject: [PATCH 35/40] add hcc dialects sample [ROCm/hip commit: 624b2f35ffdc09fc757a63a8be59f336266439ef] --- .../hip/samples/0_Intro/hcc_dialects/Makefile | 66 +++++++++++++++++++ .../hcc_dialects/vadd_amp_arrayview.cpp | 48 ++++++++++++++ .../0_Intro/hcc_dialects/vadd_hc_am.cpp | 59 +++++++++++++++++ .../0_Intro/hcc_dialects/vadd_hc_array.cpp | 53 +++++++++++++++ .../0_Intro/hcc_dialects/vadd_hc_array.hc | 33 ++++++++++ .../hcc_dialects/vadd_hc_arrayview.cpp | 48 ++++++++++++++ .../samples/0_Intro/hcc_dialects/vadd_hip.cpp | 51 ++++++++++++++ 7 files changed, 358 insertions(+) create mode 100644 projects/hip/samples/0_Intro/hcc_dialects/Makefile create mode 100644 projects/hip/samples/0_Intro/hcc_dialects/vadd_amp_arrayview.cpp create mode 100644 projects/hip/samples/0_Intro/hcc_dialects/vadd_hc_am.cpp create mode 100644 projects/hip/samples/0_Intro/hcc_dialects/vadd_hc_array.cpp create mode 100644 projects/hip/samples/0_Intro/hcc_dialects/vadd_hc_array.hc create mode 100644 projects/hip/samples/0_Intro/hcc_dialects/vadd_hc_arrayview.cpp create mode 100644 projects/hip/samples/0_Intro/hcc_dialects/vadd_hip.cpp diff --git a/projects/hip/samples/0_Intro/hcc_dialects/Makefile b/projects/hip/samples/0_Intro/hcc_dialects/Makefile new file mode 100644 index 0000000000..108d30201c --- /dev/null +++ b/projects/hip/samples/0_Intro/hcc_dialects/Makefile @@ -0,0 +1,66 @@ +HCC_HOME?=/opt/rocm/hcc +HCC = $(HCC_HOME)/bin/hcc + +HCC_CFLAGS= `$(HCC_HOME)/bin/hcc-config --cxxflags` +HCC_LDFLAGS= `$(HCC_HOME)/bin/hcc-config --ldflags` + +CPPAMP_CFLAGS= -std=c++amp -stdlib=libc++ -I/opt/hcc/include +CPPAMP_LDFLAGS= -std=c++amp -L/opt/hcc/lib -Wl,--rpath=/opt/hcc/lib -lc++ -lc++abi -ldl -lpthread -Wl,--whole-archive -lmcwamp -Wl,--no-whole-archive + +HIP_PATH?=/opt/rocm/hip +HIPCC=$(HIP_PATH)/bin/hipcc +HIP_PLATFORM=$(shell $(HIP_PATH)/bin/hipconfig --platform) + +ifneq (${HIP_PLATFORM}, hcc) +$(error hcc_dialects requires hcc compiler and only runs on hcc platform) +endif + + +TARGETS=vadd_hc_arrayview vadd_hc_array vadd_amp_arrayview vadd_hip + +all: $(TARGETS) + +clean: + rm -f $(TARGETS) *.o + +run: $(TARGETS) + @for t in $(TARGETS); do\ + echo "Running $$t"; \ + ./$$t; \ + done + + +# HCC version: +vadd_hc_arrayview.o: vadd_hc_arrayview.cpp + $(HCC) $(HCC_CFLAGS) -c $< -o $@ +vadd_hc_arrayview: vadd_hc_arrayview.o + $(HCC) $(HCC_LDFLAGS) $< -o $@ + + +# HCC version, using explicit arrays: +vadd_hc_array.o: vadd_hc_array.cpp + $(HCC) $(HCC_CFLAGS) -c $< -o $@ +vadd_hc_array: vadd_hc_array.o + $(HCC) $(HCC_LDFLAGS) $< -o $@ + + +# HCC version, using AM (accelerator memory) pointer +vadd_hc_am.o: vadd_hc_am.cpp + $(HCC) $(HCC_CFLAGS) -c $< -o $@ +vadd_hc_am: vadd_hc_am.o + $(HCC) $(HCC_LDFLAGS) $< -o $@ + + + +# HIP version: +vadd_hip.o: vadd_hip.cpp + $(HIPCC) -c $< -o $@ +vadd_hip: vadd_hip.o + $(HIPCC) $< -o $@ + + +# AMP version: +vadd_amp_arrayview.o: vadd_amp_arrayview.cpp + $(HCC) $(CPPAMP_CFLAGS) -c $< -o $@ +vadd_amp_arrayview: vadd_amp_arrayview.o + $(HCC) $(CPPAMP_LDFLAGS) $< -o $@ diff --git a/projects/hip/samples/0_Intro/hcc_dialects/vadd_amp_arrayview.cpp b/projects/hip/samples/0_Intro/hcc_dialects/vadd_amp_arrayview.cpp new file mode 100644 index 0000000000..6fdea5d830 --- /dev/null +++ b/projects/hip/samples/0_Intro/hcc_dialects/vadd_amp_arrayview.cpp @@ -0,0 +1,48 @@ +// Simple test showing how to use C++AMP syntax with array_view. +// The code uses AMP's array_view class, which provides automatic data synchronization +// of data between the host and the accelerator. As noted below, the HCC runtime +// will automatically copy data to and from the host, without the user needing +// to manually perform such copies. This is an excellent mode for developers +// new to GPU programming and matches the memory models provided by recent systems where +// CPU and GPU share the same memory pool. Advanced programmers may prefer +// more explicit control over the data movement - shown in the other vadd_hc_array and +// vadd_hc_am examples. +// This example shows the similarity between C++AMP and and HC for simple cases where +// implicit data transfer is used - really the only difference is the namespace. +// Other examples show some of the more advanced controls. + +#include + +int main(int argc, char *argv[]) +{ + int sizeElements = 1000000; + + // Allocate auto-managed host/device views of data: + concurrency::array_view A(sizeElements); + concurrency::array_view B(sizeElements); + concurrency::array_view C(sizeElements); + + // Initialize host data + for (int i=0; i (sizeElements), + [=] (concurrency::index<1> idx) restrict(amp) { + int i = idx[0]; + C[i] = A[i] + B[i]; + }); + + for (int i=0; i +#include + +int main(int argc, char *argv[]) +{ + int sizeElements = 1000000; + size_t sizeBytes = sizeElements * sizeof(float); + + // Allocate host memory + float *A_h = (float*)malloc(sizeBytes); + float *B_h = (float*)malloc(sizeBytes); + float *C_h = (float*)malloc(sizeBytes); + + // Allocate device pointers: + // Unlike array_view, these must be explicitly managed by user: + hc::accelerator acc; // grab default accelerator where we want to allocate memory: + hc::accelerator_view av = acc.get_default_view(); + + float *A_d, *B_d, *C_d; + A_d = hc::am_alloc(sizeBytes, acc, 0); + B_d = hc::am_alloc(sizeBytes, acc, 0); + C_d = hc::am_alloc(sizeBytes, acc, 0); + + // Initialize host data + for (int i=0; i (sizeElements), + [&] (hc::index<1> idx) [[hc]] { + int i = idx[0]; + C_d[i] = A_d[i] + B_d[i]; + }); + + + // This copy is in same AV as the kernel and thus will wait for the kernel to finish before executing. + av.copy(C_d, C_h); // C++ copy D2H + + + for (int i=0; i + +int main(int argc, char *argv[]) +{ + int sizeElements = 1000000; + size_t sizeBytes = sizeElements * sizeof(float); + + // Allocate host memory + float *A_h = (float*)malloc(sizeBytes); + float *B_h = (float*)malloc(sizeBytes); + float *C_h = (float*)malloc(sizeBytes); + + // Allocate device arrays<> + // Unlike array_view, these must be explicitly managed by user: + hc::array A_d(sizeElements); + hc::array B_d(sizeElements); + hc::array C_d(sizeElements); + + // Initialize host data + for (int i=0; i types are not implicitly copied, so we performed copies above. + hc::parallel_for_each(hc::extent<1> (sizeElements), + [&] (hc::index<1> idx) [[hc]] { + int i = idx[0]; + C_d[i] = A_d[i] + B_d[i]; + }); + + // HCC runtime knows that C_d depends on previous PFE and will force the copy to wait for the PFE to complte. + hc::copy(C_d, C_h); // C++ copy D2H + + + for (int i=0; i + +int main(int argc, char *argv[]) +{ + int size = 1000000; + + // Allocate auto-managed host/device views of data: + hc::array_view A(size); + hc::array_view B(size); + hc::array_view C(size); + + // Initialize host data + for (int i=0; i (size), + [=] (hc::index<1> idx) [[hc]] { + int i = idx[0]; + C[i] = A[i] + B[i]; + }); + + for (int i=0; i + +int main(int argc, char *argv[]) +{ + int sizeElements = 1000000; + + // Allocate auto-managed host/device views of data: + hc::array_view A(sizeElements); + hc::array_view B(sizeElements); + hc::array_view C(sizeElements); + + // Initialize host data + for (int i=0; i (sizeElements), + [=] (hc::index<1> idx) [[hc]] { + int i = idx[0]; + C[i] = A[i] + B[i]; + }); + + for (int i=0; i + +__global__ void vadd_hip(hipLaunchParm lp, const float *a, const float *b, float *c, int N) +{ + int idx = (hipBlockIdx_x * hipBlockDim_x + hipThreadIdx_x); + + if (idx < N) { + c[idx] = a[idx] + b[idx]; + } +} + + +int main(int argc, char *argv[]) +{ + int sizeElements = 1000000; + size_t sizeBytes = sizeElements * sizeof(float); + + // Allocate host memory + float *A_h = (float*)malloc(sizeBytes); + float *B_h = (float*)malloc(sizeBytes); + float *C_h = (float*)malloc(sizeBytes); + + // Allocate device memory: + float *A_d, *B_d, *C_d; + hipMalloc(&A_d, sizeBytes); + hipMalloc(&B_d, sizeBytes); + hipMalloc(&C_d, sizeBytes); + + // Initialize host data + for (int i=0; i Date: Wed, 13 Apr 2016 17:37:39 -0500 Subject: [PATCH 36/40] Fix HIP_PATH, CHECK macro in samples. [ROCm/hip commit: 8bbe32a708267bdabe1d94c91be5e83b1aa8edd4] --- projects/hip/samples/0_Intro/bit_extract/Makefile | 2 +- projects/hip/samples/0_Intro/bit_extract/bit_extract.cpp | 7 +++++-- projects/hip/samples/0_Intro/square/square.cu | 7 +++++-- projects/hip/samples/0_Intro/square/square.hipref.cpp | 7 +++++-- 4 files changed, 16 insertions(+), 7 deletions(-) diff --git a/projects/hip/samples/0_Intro/bit_extract/Makefile b/projects/hip/samples/0_Intro/bit_extract/Makefile index cdf793363b..b71828f5fa 100644 --- a/projects/hip/samples/0_Intro/bit_extract/Makefile +++ b/projects/hip/samples/0_Intro/bit_extract/Makefile @@ -1,6 +1,6 @@ #Dependencies : [MYHIP]/bin must be in user's path. -HIP_PATH=?../../.. +HIP_PATH?=../../.. HIP_PLATFORM=$(shell $(HIP_PATH)/bin/hipconfig --platform) HIPCC=$(HIP_PATH)/bin/hipcc diff --git a/projects/hip/samples/0_Intro/bit_extract/bit_extract.cpp b/projects/hip/samples/0_Intro/bit_extract/bit_extract.cpp index 5545a99c0f..bdc8182c38 100644 --- a/projects/hip/samples/0_Intro/bit_extract/bit_extract.cpp +++ b/projects/hip/samples/0_Intro/bit_extract/bit_extract.cpp @@ -24,11 +24,14 @@ THE SOFTWARE. #include -#define CHECK(error) \ +#define CHECK(cmd) \ +{\ + hipError_t error = cmd;\ if (error != hipSuccess) { \ fprintf(stderr, "error: '%s'(%d) at %s:%d\n", hipGetErrorString(error), error,__FILE__, __LINE__); \ exit(EXIT_FAILURE);\ - } + }\ +} void __global__ bit_extract_kernel(hipLaunchParm lp, uint32_t *C_d, const uint32_t *A_d, size_t N) diff --git a/projects/hip/samples/0_Intro/square/square.cu b/projects/hip/samples/0_Intro/square/square.cu index 996344ed49..8b6980cd02 100644 --- a/projects/hip/samples/0_Intro/square/square.cu +++ b/projects/hip/samples/0_Intro/square/square.cu @@ -22,11 +22,14 @@ THE SOFTWARE. #include #include -#define CHECK(error) \ +#define CHECK(cmd) \ +{\ + hipError_t error = cmd;\ if (error != cudaSuccess) { \ fprintf(stderr, "error: '%s'(%d) at %s:%d\n", cudaGetErrorString(error), error,__FILE__, __LINE__); \ exit(EXIT_FAILURE);\ - } + }\ +} /* diff --git a/projects/hip/samples/0_Intro/square/square.hipref.cpp b/projects/hip/samples/0_Intro/square/square.hipref.cpp index 5d53a8d584..aa14077738 100644 --- a/projects/hip/samples/0_Intro/square/square.hipref.cpp +++ b/projects/hip/samples/0_Intro/square/square.hipref.cpp @@ -22,11 +22,14 @@ THE SOFTWARE. #include #include -#define CHECK(error) \ +#define CHECK(cmd) \ +{\ + hipError_t error = cmd;\ if (error != hipSuccess) { \ fprintf(stderr, "error: '%s'(%d) at %s:%d\n", hipGetErrorString(error), error,__FILE__, __LINE__); \ exit(EXIT_FAILURE);\ - } + }\ +} /* From 4ee0549d4fba6875203ec13ae49232cbe139a041 Mon Sep 17 00:00:00 2001 From: Maneesh Gupta Date: Thu, 14 Apr 2016 16:36:55 +0530 Subject: [PATCH 37/40] Change default HIP installation to /opt/rocm/hip [ROCm/hip commit: 18af18476b69c89b481e0cbf412028d5f4e25124] --- projects/hip/CMakeLists.txt | 2 +- projects/hip/bin/hipcc | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/projects/hip/CMakeLists.txt b/projects/hip/CMakeLists.txt index 124295fbf7..e7eee21b01 100644 --- a/projects/hip/CMakeLists.txt +++ b/projects/hip/CMakeLists.txt @@ -64,7 +64,7 @@ if(CMAKE_INSTALL_PREFIX_INITIALIZED_TO_DEFAULT AND CMAKE_INSTALL_PREFIX MATCHES if(CMAKE_BUILD_TYPE MATCHES Debug) set(CMAKE_INSTALL_PREFIX ${CMAKE_CURRENT_SOURCE_DIR} CACHE PATH "Installation path for HIP" FORCE) elseif(CMAKE_BUILD_TYPE MATCHES Release) - set(CMAKE_INSTALL_PREFIX "/opt/hip" CACHE PATH "Installation path for HIP" FORCE) + set(CMAKE_INSTALL_PREFIX "/opt/rocm/hip" CACHE PATH "Installation path for HIP" FORCE) else() message(FATAL_ERROR "Invalid CMAKE_BUILD_TYPE specified. Valid values are Debug and Release") endif() diff --git a/projects/hip/bin/hipcc b/projects/hip/bin/hipcc index ecf03e24ee..33bc1d9eca 100755 --- a/projects/hip/bin/hipcc +++ b/projects/hip/bin/hipcc @@ -196,7 +196,7 @@ if ($needHipHcc) { $HIP_USE_SHARED_LIBRARY = $ENV{'HIP_USE_SHARED_LIBRARY'}; $HIP_USE_SHARED_LIBRARY = 0 unless defined $HIP_USE_SHARED_LIBRARY; - #$HIPLDFLAGS .= " -L/opt/hip/lib -lhip_hcc" ; + #$HIPLDFLAGS .= " -L/opt/rocm/hip/lib -lhip_hcc" ; if ($HIP_USE_SHARED_LIBRARY) { $HIPLDFLAGS .= " -L$HIP_PATH/lib -Wl,--rpath=$HIP_PATH/lib -lhip_hcc"; } else { From 3f0256a3b11f92676a9acd232d2a36cedbda4b66 Mon Sep 17 00:00:00 2001 From: Maneesh Gupta Date: Thu, 14 Apr 2016 16:37:27 +0530 Subject: [PATCH 38/40] Add hip soft-link to include for migrating to new include format [ROCm/hip commit: 011a0335d4e4ff61d36f88f114587afe7fcd8112] --- projects/hip/include/hip | 1 + 1 file changed, 1 insertion(+) create mode 120000 projects/hip/include/hip diff --git a/projects/hip/include/hip b/projects/hip/include/hip new file mode 120000 index 0000000000..f5030fe889 --- /dev/null +++ b/projects/hip/include/hip @@ -0,0 +1 @@ +../include \ No newline at end of file From 4eb447f7faeab4b1060917b4456a91bf48fb2265 Mon Sep 17 00:00:00 2001 From: Ben Sander Date: Thu, 14 Apr 2016 07:19:07 -0500 Subject: [PATCH 39/40] update README for new make/installation steps, new FAQ [ROCm/hip commit: 5bb753acdef9bc040aeb302294d982ec0794923d] --- projects/hip/README.md | 24 ++++++++++++++---------- projects/hip/docs/markdown/hip_faq.md | 6 ++++++ 2 files changed, 20 insertions(+), 10 deletions(-) diff --git a/projects/hip/README.md b/projects/hip/README.md index 810c378436..39c1092c63 100644 --- a/projects/hip/README.md +++ b/projects/hip/README.md @@ -34,30 +34,34 @@ Make sure HIP_PATH is pointed to `/where/to/install/hip` and PATH includes `$HIP ## How do I get set up? ### Prerequisites - Choose Your Platform -HIP code can be developed either on AMD HSA or Boltzmann platform using hcc compiler, or a CUDA platform with nvcc installed: +HIP code can be developed either on AMD ROCm platform using hcc compiler, or a CUDA platform with nvcc installed: #### AMD (hcc): * Install [hcc](https://bitbucket.org/multicoreware/hcc/wiki/Home) including supporting HSA kernel and runtime driver stack -* By default HIP looks for hcc in /opt/hcc (can be overridden by setting HCC_HOME environment variable) -* By default HIP looks for HSA in /opt/hsa (can be overridden by setting HSA_PATH environment variable) +* By default HIP looks for hcc in /opt/rocm/hcc (can be overridden by setting HCC_HOME environment variable) +* By default HIP looks for HSA in /opt/rocm/hsa (can be overridden by setting HSA_PATH environment variable) * Ensure that ROCR runtime is installed and added to LD_LIBRARY_PATH +* Install HIP (from this GitHub repot). By default HIP is installed into /opt/rocm/hip (can be overridden by setting HIP_PATH environment variable). + +* Optionally, consider adding /opt/rocm/bin to your path to make it easier to use the tools. #### NVIDIA (nvcc) * Install CUDA SDK from manufacturer website * By default HIP looks for CUDA SDK in /usr/local/cuda (can be overriden by setting CUDA_PATH env variable) -### Add HIP/bin to your path. -For example, if this repot is cloned to ~/HIP, and you are running bash: ``` -> export PATH=$PATH:~/HIP/bin + +#### Verify your installation +Run hipconfig (instructions below assume default installation path) : ``` -Verify your can find hipconfig (one of the hip tools in bin dir): -``` -> hipconfig -pn -/home/me/HIP +> /opt/rocm/bin/hipconfig --full ``` +Compile and run the [square sample](https://github.com/GPUOpen-ProfessionalCompute-Tools/HIP/tree/master/samples/0_Intro/square). + + + ### HCC Options #### Compiling CodeXL markers for HIP Functions diff --git a/projects/hip/docs/markdown/hip_faq.md b/projects/hip/docs/markdown/hip_faq.md index 1a62784100..f1d8c607ea 100644 --- a/projects/hip/docs/markdown/hip_faq.md +++ b/projects/hip/docs/markdown/hip_faq.md @@ -107,9 +107,15 @@ HIP is a portable C++ language that supports a strong subset of the CUDA run-tim A C++ dialect, hc is supported by the AMD HCC compiler. It provides C++ run time, C++ kernel-launch APIs (parallel_for_each), C++ kernel language, and several memory-management options, including pointers, arrays and array_view (with implicit data synchronization). It's intended to be a leading indicator of the ISO C++ standard. + ### HIP detected my platform (hcc vs nvcc) incorrectly - what should I do? + HIP will set the platform to HCC if it sees that the AMD graphics driver is installed and has detected an AMD GPU. Sometimes this isn't what you want - you can force HIP to recognize the platform by setting HIP_PLATFORM to hcc (or nvcc) ``` export HIP_PLATFORM=hcc + ``` +One symptom of this problem is the message "error: 'unknown error'(11) at square.hipref.cpp:56". This can occur if you have a CUDA installation on an AMD platform, and HIP incorrectly detects the platform as nvcc. HIP may be able to compile the application using the nvcc tool-chain, but will generate this error at runtime since the platform does not have a CUDA device. The fix is to set HIP_PLATFORM=hcc and rebuild the issue. + +If you see issues related to incorrect platform detection, please file an issue with the GitHub issue tracker so we can improve HIP's platform detection logic. From 3b82076adf12f85bd5781cd157a45755cad0fbb1 Mon Sep 17 00:00:00 2001 From: Aditya Atluri Date: Thu, 14 Apr 2016 09:17:13 -0500 Subject: [PATCH 40/40] Enabling make_* device code [ROCm/hip commit: 573224e55f30a87e1e04f159d28c157b8cfa27b1] --- projects/hip/include/hcc_detail/hip_vector_types.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/projects/hip/include/hcc_detail/hip_vector_types.h b/projects/hip/include/hcc_detail/hip_vector_types.h index 50030e4756..f18bad8b89 100644 --- a/projects/hip/include/hcc_detail/hip_vector_types.h +++ b/projects/hip/include/hcc_detail/hip_vector_types.h @@ -115,7 +115,7 @@ typedef hc::short_vector::double2 double2; typedef hc::short_vector::double3 double3; typedef hc::short_vector::double4 double4; -/* + ///--- // Inline functions for creating vector types from basic types #define ONE_COMPONENT_ACCESS(T, VT) inline VT make_ ##VT (T x) { VT t; t.x = x; return t; }; @@ -198,7 +198,7 @@ ONE_COMPONENT_ACCESS (double, double1); TWO_COMPONENT_ACCESS (double, double2); THREE_COMPONENT_ACCESS(double, double3); FOUR_COMPONENT_ACCESS (double, double4); -*/ + #endif