diff --git a/projects/clr/rocclr/device/device.cpp b/projects/clr/rocclr/device/device.cpp index 2f5c717545..e620ff6308 100644 --- a/projects/clr/rocclr/device/device.cpp +++ b/projects/clr/rocclr/device/device.cpp @@ -443,6 +443,7 @@ Device::Device() blitProgram_(nullptr), hwDebugMgr_(nullptr), context_(nullptr), + arena_mem_obj_(nullptr), vaCacheAccess_(nullptr), vaCacheMap_(nullptr), index_(0) { @@ -459,6 +460,10 @@ Device::~Device() { delete vaCacheAccess_; } + if (arena_mem_obj_ != nullptr) { + arena_mem_obj_->release(); + } + // Destroy device settings if (settings_ != nullptr) { delete settings_; diff --git a/projects/clr/rocclr/device/device.hpp b/projects/clr/rocclr/device/device.hpp index 9adac471b6..003f2f92f7 100644 --- a/projects/clr/rocclr/device/device.hpp +++ b/projects/clr/rocclr/device/device.hpp @@ -1819,6 +1819,11 @@ class Device : public RuntimeObject { void SetActiveWait(bool state) { activeWait_ = state; } + virtual amd::Memory* GetArenaMemObj(const void* ptr, size_t& offset) { + ShouldNotReachHere(); + return nullptr; + } + protected: //! Enable the specified extension char* getExtensionString(); @@ -1842,6 +1847,8 @@ class Device : public RuntimeObject { static amd::Monitor p2p_stage_ops_; //!< Lock to serialise cache for the P2P resources static Memory* p2p_stage_; //!< Staging resources + amd::Memory* arena_mem_obj_; //!< Arena memory object + private: const Isa *isa_; //!< Device isa bool IsTypeMatching(cl_device_type type, bool offlineDevices); diff --git a/projects/clr/rocclr/device/rocm/rocdevice.cpp b/projects/clr/rocclr/device/rocm/rocdevice.cpp index aaa6b61b65..6fe6dd2032 100644 --- a/projects/clr/rocclr/device/rocm/rocdevice.cpp +++ b/projects/clr/rocclr/device/rocm/rocdevice.cpp @@ -790,6 +790,16 @@ bool Device::create() { return false; } + // only create arena_mem_object if CPU memory is accessible. + if (info_.hmmCpuMemoryAccessible_) { + arena_mem_obj_ = new (context()) amd::ArenaMemory(context()); + if (!arena_mem_obj_->create(nullptr)) { + LogError("Arena Memory Creation failed!"); + arena_mem_obj_->release(); + arena_mem_obj_ = nullptr; + } + } + return true; } @@ -2166,7 +2176,8 @@ void* Device::svmAlloc(amd::Context& context, size_t size, size_t alignment, cl_ if (nullptr == svmPtr) { // create a hidden buffer, which will allocated on the device later - mem = new (context) amd::Buffer(context, flags, size, reinterpret_cast(1)); + mem = new (context) amd::Buffer(context, flags, size, + reinterpret_cast(amd::Memory::MemoryType::kSvmMemoryPtr)); if (mem == nullptr) { LogError("failed to create a svm mem object!"); return nullptr; @@ -2940,5 +2951,19 @@ device::Signal* Device::createSignal() const { return new roc::Signal(); } +amd::Memory* Device::GetArenaMemObj(const void* ptr, size_t& offset) { + // If arena_mem_obj_ is null, then HMM and Xnack is disabled. Return nullptr. + if (arena_mem_obj_ == nullptr) { + return nullptr; + } + + // Calculate the offset of the pointer. + const void* dev_ptr = reinterpret_cast(arena_mem_obj_->getDeviceMemory( + *arena_mem_obj_->getContext().devices()[0])->virtualAddress()); + offset = reinterpret_cast(ptr) - reinterpret_cast(dev_ptr); + + return arena_mem_obj_; +} + } // namespace roc #endif // WITHOUT_HSA_BACKEND diff --git a/projects/clr/rocclr/device/rocm/rocdevice.hpp b/projects/clr/rocclr/device/rocm/rocdevice.hpp index 8188b40359..b3d009c523 100644 --- a/projects/clr/rocclr/device/rocm/rocdevice.hpp +++ b/projects/clr/rocclr/device/rocm/rocdevice.hpp @@ -497,6 +497,8 @@ class Device : public NullDevice { void getGlobalCUMask(std::string cuMaskStr); + virtual amd::Memory* GetArenaMemObj(const void* ptr, size_t& offset); + private: bool create(); diff --git a/projects/clr/rocclr/device/rocm/rocmemory.cpp b/projects/clr/rocclr/device/rocm/rocmemory.cpp index c9c2d7bc6f..9980661185 100644 --- a/projects/clr/rocclr/device/rocm/rocmemory.cpp +++ b/projects/clr/rocclr/device/rocm/rocmemory.cpp @@ -724,7 +724,7 @@ bool Buffer::create() { flags_ |= HostMemoryDirectAccess; } - if (owner()->getSvmPtr() == reinterpret_cast(1)) { + if (owner()->getSvmPtr() == reinterpret_cast(amd::Memory::MemoryType::kSvmMemoryPtr)) { if (isFineGrain) { if (memFlags & CL_MEM_ALLOC_HOST_PTR) { if (dev().info().hmmSupported_) { @@ -777,10 +777,16 @@ bool Buffer::create() { owner()->setSvmPtr(deviceMemory_); } else { deviceMemory_ = owner()->getSvmPtr(); - kind_ = MEMORY_KIND_PTRGIVEN; + if (owner()->getSvmPtr() == reinterpret_cast(amd::Memory::MemoryType + ::kArenaMemoryPtr)) { + kind_ = MEMORY_KIND_ARENA; + } else { + kind_ = MEMORY_KIND_PTRGIVEN; + } } - if ((deviceMemory_ != nullptr) && (dev().settings().apuSystem_ || !isFineGrain)) { + if ((deviceMemory_ != nullptr) && (dev().settings().apuSystem_ || !isFineGrain) + && (kind_ != MEMORY_KIND_ARENA)) { const_cast(dev()).updateFreeMemory(size(), false); } diff --git a/projects/clr/rocclr/device/rocm/rocmemory.hpp b/projects/clr/rocclr/device/rocm/rocmemory.hpp index b1f5345f74..6f9468e417 100644 --- a/projects/clr/rocclr/device/rocm/rocmemory.hpp +++ b/projects/clr/rocclr/device/rocm/rocmemory.hpp @@ -39,7 +39,10 @@ class Memory : public device::Memory { MEMORY_KIND_HOST, MEMORY_KIND_INTEROP, - MEMORY_KIND_PTRGIVEN + + MEMORY_KIND_PTRGIVEN, + + MEMORY_KIND_ARENA }; Memory(const roc::Device& dev, amd::Memory& owner); diff --git a/projects/clr/rocclr/platform/memory.hpp b/projects/clr/rocclr/platform/memory.hpp index f718a91031..4943d6a7db 100644 --- a/projects/clr/rocclr/platform/memory.hpp +++ b/projects/clr/rocclr/platform/memory.hpp @@ -36,6 +36,7 @@ #include #include #include +#include #define CL_MEM_FOLLOW_USER_NUMA_POLICY (1u << 31) #define ROCCLR_MEM_HSA_SIGNAL_MEMORY (1u << 30) @@ -133,6 +134,12 @@ class Memory : public amd::RuntimeObject { : callback_(callback), data_(data) {} }; + public: + enum MemoryType { + kSvmMemoryPtr = 0x1, + kArenaMemoryPtr = 0x2 + }; + protected: typedef cl_mem_object_type Type; typedef cl_mem_flags Flags; @@ -639,6 +646,13 @@ class LiquidFlashFile : public RuntimeObject { virtual ObjectType objectType() const { return ObjectTypeLiquidFlashFile; } }; + +class ArenaMemory: public Buffer { +public: + ArenaMemory(Context& context) + : Buffer(context, 0, std::numeric_limits::max(), kArenaMemoryPtr) {} +}; + } // namespace amd #endif // MEMORY_H_