From 3389e6077adc729cd73a063fb659bc77e1d20fe4 Mon Sep 17 00:00:00 2001 From: sdashmiz Date: Mon, 20 Jun 2022 15:40:43 -0400 Subject: [PATCH] SWDEV-334233 - add support for p2p in windows Signed-off-by: sdashmiz Change-Id: I9109120b5444c400e65cfff869cb36e876ffd1fc [ROCm/clr commit: e176e27bf7cbaa41a1645af0043bb16be24913fb] --- projects/clr/rocclr/device/device.cpp | 27 +++++++++++++++++++ projects/clr/rocclr/device/device.hpp | 13 +++------ projects/clr/rocclr/device/pal/paldevice.cpp | 6 +++++ projects/clr/rocclr/device/pal/paldevice.hpp | 4 +++ projects/clr/rocclr/device/rocm/rocdevice.cpp | 26 ------------------ projects/clr/rocclr/device/rocm/rocdevice.hpp | 15 ----------- 6 files changed, 41 insertions(+), 50 deletions(-) diff --git a/projects/clr/rocclr/device/device.cpp b/projects/clr/rocclr/device/device.cpp index 54689a209f..e7c86be86d 100644 --- a/projects/clr/rocclr/device/device.cpp +++ b/projects/clr/rocclr/device/device.cpp @@ -85,6 +85,7 @@ bool VirtualDevice::ActiveWait() const { namespace amd { +amd::Monitor Device::lockP2P_("Lock P2P ON/OFF"); std::pair Isa::supportedIsas() { constexpr amd::Isa::Feature NONE = amd::Isa::Feature::Unsupported; constexpr amd::Isa::Feature ANY = amd::Isa::Feature::Any; @@ -709,6 +710,32 @@ bool Device::getDeviceIDs(cl_device_type deviceType, uint32_t numEntries, cl_dev return true; } + +bool Device::enableP2P(amd::Device* ptrDev) { + assert(ptrDev != nullptr); + amd::ScopedLock lock(lockP2P_); + Device* peerDev = static_cast(ptrDev); + if (std::find(enabled_p2p_devices_.begin(), enabled_p2p_devices_.end(), peerDev) == + enabled_p2p_devices_.end()) { + enabled_p2p_devices_.push_back(peerDev); + // Update access to all old allocations + amd::MemObjMap::UpdateAccess(static_cast(this)); + } + return true; +} + +bool Device::disableP2P(amd::Device* ptrDev) { + assert(ptrDev != nullptr); + amd::ScopedLock lock(lockP2P_); + Device* peerDev = static_cast(ptrDev); + //if device is present then remove + auto it = std::find(enabled_p2p_devices_.begin(), enabled_p2p_devices_.end(), peerDev); + if (it != enabled_p2p_devices_.end()) { + enabled_p2p_devices_.erase(it); + } + return true; +} + bool Device::UpdateStackSize(uint64_t stackSize) { uint32_t maxMemPerThread = info().localMemSizePerCU_ / info().maxThreadsPerCU_; if (maxMemPerThread < stackSize) { diff --git a/projects/clr/rocclr/device/device.hpp b/projects/clr/rocclr/device/device.hpp index 03fa65ec0a..0be48a7ad6 100644 --- a/projects/clr/rocclr/device/device.hpp +++ b/projects/clr/rocclr/device/device.hpp @@ -1715,15 +1715,9 @@ class Device : public RuntimeObject { return true; } - virtual bool enableP2P(amd::Device* ptrDev) { - ShouldNotCallThis(); - return true; - } + bool enableP2P(amd::Device* ptrDev); - virtual bool disableP2P(amd::Device* ptrDev) { - ShouldNotCallThis(); - return true; - } + bool disableP2P(amd::Device* ptrDev); /** * @copydoc amd::Context::hostFree @@ -1973,6 +1967,7 @@ class Device : public RuntimeObject { static amd::Context* glb_ctx_; //!< Global context with all devices static amd::Monitor p2p_stage_ops_; //!< Lock to serialise cache for the P2P resources static Memory* p2p_stage_; //!< Staging resources + std::vector enabled_p2p_devices_; //!< List of user enabled P2P devices for this device std::once_flag heap_initialized_; //!< Heap buffer initialization flag device::Memory* heap_buffer_; //!< Preallocated heap buffer for memory allocations on device @@ -1989,7 +1984,7 @@ class Device : public RuntimeObject { #endif static std::vector* devices_; //!< All known devices - + static amd::Monitor lockP2P_; Monitor* vaCacheAccess_; //!< Lock to serialize VA caching access std::map* vaCacheMap_; //!< VA cache map uint32_t index_; //!< Unique device index diff --git a/projects/clr/rocclr/device/pal/paldevice.cpp b/projects/clr/rocclr/device/pal/paldevice.cpp index 72e5a24645..554ef27429 100644 --- a/projects/clr/rocclr/device/pal/paldevice.cpp +++ b/projects/clr/rocclr/device/pal/paldevice.cpp @@ -2191,6 +2191,12 @@ void Device::hostFree(void* ptr, size_t size) const { amd::Os::releaseMemory(ptr, size); } +bool Device::deviceAllowAccess(void* ptr) const { + std::lock_guard lock(lockAllowAccess_); + // Empty function for now. + return true; +} + void* Device::svmAlloc(amd::Context& context, size_t size, size_t alignment, cl_svm_mem_flags flags, void* svmPtr) const { alignment = std::max(alignment, static_cast(info_.memBaseAddrAlign_)); diff --git a/projects/clr/rocclr/device/pal/paldevice.hpp b/projects/clr/rocclr/device/pal/paldevice.hpp index 833ad9a493..7b8b92e8ad 100644 --- a/projects/clr/rocclr/device/pal/paldevice.hpp +++ b/projects/clr/rocclr/device/pal/paldevice.hpp @@ -543,6 +543,9 @@ class Device : public NullDevice { //! Returns PAL device interface Pal::IDevice* iDev() const { return device_; } + //! Allow access for peer device + bool deviceAllowAccess(void* dst) const; + RgpCaptureMgr* rgpCaptureMgr() const { return rgpCaptureMgr_; } //! Update free memory for OCL extension @@ -680,6 +683,7 @@ class Device : public NullDevice { mutable amd::Monitor scratchAlloc_; //!< Lock to serialise scratch allocation mutable amd::Monitor mapCacheOps_; //!< Lock to serialise cache for the map resources mutable amd::Monitor lockResourceOps_; //!< Lock to serialise resource access + mutable std::mutex lockAllowAccess_; //!< To serialize allow_access calls XferBuffers* xferRead_; //!< Transfer buffers read std::vector* mapCache_; //!< Map cache info structure ResourceCache* resourceCache_; //!< Resource cache diff --git a/projects/clr/rocclr/device/rocm/rocdevice.cpp b/projects/clr/rocclr/device/rocm/rocdevice.cpp index c211b95c3d..0b8e7f9930 100644 --- a/projects/clr/rocclr/device/rocm/rocdevice.cpp +++ b/projects/clr/rocclr/device/rocm/rocdevice.cpp @@ -106,7 +106,6 @@ std::vector roc::Device::gpu_agents_; std::vector roc::Device::cpu_agents_; address Device::mg_sync_ = nullptr; -amd::Monitor Device::lockP2P_("Lock P2P ON/OFF"); bool NullDevice::create(const amd::Isa &isa) { if (!isa.runtimeRocSupported()) { @@ -2020,31 +2019,6 @@ void* Device::hostNumaAlloc(size_t size, size_t alignment, bool atomics) const { void Device::hostFree(void* ptr, size_t size) const { memFree(ptr, size); } -bool Device::enableP2P(amd::Device* ptrDev) { - assert(ptrDev != nullptr); - amd::ScopedLock lock(lockP2P_); - Device* peerDev = static_cast(ptrDev); - if (std::find(enabled_p2p_devices_.begin(), enabled_p2p_devices_.end(), peerDev) == - enabled_p2p_devices_.end()) { - enabled_p2p_devices_.push_back(peerDev); - // Update access to all old allocations - amd::MemObjMap::UpdateAccess(static_cast(this)); - } - return true; -} - -bool Device::disableP2P(amd::Device* ptrDev) { - assert(ptrDev != nullptr); - amd::ScopedLock lock(lockP2P_); - Device* peerDev = static_cast(ptrDev); - //if device is present then remove - auto it = std::find(enabled_p2p_devices_.begin(), enabled_p2p_devices_.end(), peerDev); - if (it != enabled_p2p_devices_.end()) { - enabled_p2p_devices_.erase(it); - } - return true; -} - bool Device::deviceAllowAccess(void* ptr) const { std::lock_guard lock(lock_allow_access_); if (!p2pAgents().empty()) { diff --git a/projects/clr/rocclr/device/rocm/rocdevice.hpp b/projects/clr/rocclr/device/rocm/rocdevice.hpp index 44f416bbef..06cf8e1560 100644 --- a/projects/clr/rocclr/device/rocm/rocdevice.hpp +++ b/projects/clr/rocclr/device/rocm/rocdevice.hpp @@ -253,16 +253,6 @@ class NullDevice : public amd::Device { return false; } - virtual bool disableP2P(amd::Device* peerDev) { - ShouldNotReachHere(); - return true; - } - - virtual bool enableP2P(amd::Device* peerDev) { - ShouldNotReachHere(); - return true; - } - virtual bool SetClockMode( const cl_set_device_clock_mode_input_amd setClockModeInput, cl_set_device_clock_mode_output_amd* pSetClockModeOutput) { return true; } @@ -430,9 +420,6 @@ class Device : public NullDevice { virtual void hostFree(void* ptr, size_t size = 0) const; - virtual bool enableP2P(amd::Device* peerDev); - virtual bool disableP2P(amd::Device* peerDev); - bool deviceAllowAccess(void* dst) const; void* deviceLocalAlloc(size_t size, bool atomics = false) const; @@ -589,11 +576,9 @@ class Device : public NullDevice { static std::vector gpu_agents_; static std::vector cpu_agents_; - static amd::Monitor lockP2P_; hsa_agent_t cpu_agent_; uint32_t preferred_numa_node_; std::vector p2p_agents_; //!< List of P2P agents available for this device - std::vector enabled_p2p_devices_; //!< List of user enabled P2P devices for this device mutable std::mutex lock_allow_access_; //!< To serialize allow_access calls hsa_agent_t bkendDevice_; uint32_t pciDeviceId_;