diff --git a/hipamd/vdi/hip_device.cpp b/hipamd/vdi/hip_device.cpp index 65c09398fe..3476ac14fc 100644 --- a/hipamd/vdi/hip_device.cpp +++ b/hipamd/vdi/hip_device.cpp @@ -24,8 +24,9 @@ namespace hip { -amd::HostQueue* Device::NullStream() { - amd::HostQueue* null_queue = null_stream_.asHostQueue(); +// ================================================================================================ +amd::HostQueue* Device::NullStream(bool skip_alloc) { + amd::HostQueue* null_queue = null_stream_.asHostQueue(skip_alloc); if (null_queue == nullptr) { return nullptr; } @@ -34,7 +35,7 @@ amd::HostQueue* Device::NullStream() { return null_queue; } -}; +} hipError_t hipDeviceGet(hipDevice_t *device, int deviceId) { HIP_INIT_API(hipDeviceGet, device, deviceId); diff --git a/hipamd/vdi/hip_internal.hpp b/hipamd/vdi/hip_internal.hpp index 4f85e7b7d2..eda87cea7b 100755 --- a/hipamd/vdi/hip_internal.hpp +++ b/hipamd/vdi/hip_internal.hpp @@ -90,8 +90,13 @@ namespace hip { public: Stream(Device* dev, amd::CommandQueue::Priority p, unsigned int f = 0, bool null_stream = false); + + /// Creates the hip stream object, including AMD host queue bool Create(); - amd::HostQueue* asHostQueue(); + + /// Get device AMD host queue object. The method can allocate the queue + amd::HostQueue* asHostQueue(bool skip_alloc = false); + void Destroy(); void Finish() const; /// Get device ID associated with the current stream; @@ -147,7 +152,7 @@ namespace hip { return hipErrorPeerAccessNotEnabled; } } - amd::HostQueue* NullStream(); + amd::HostQueue* NullStream(bool skip_alloc = false); }; extern std::once_flag g_ihipInitialized; @@ -182,7 +187,6 @@ namespace hip { static Function* asFunction(hipFunction_t f) { return reinterpret_cast(f); } }; - }; struct ihipExec_t { diff --git a/hipamd/vdi/hip_memory.cpp b/hipamd/vdi/hip_memory.cpp index 54eacff530..593513c98d 100755 --- a/hipamd/vdi/hip_memory.cpp +++ b/hipamd/vdi/hip_memory.cpp @@ -25,6 +25,7 @@ #include "platform/command.hpp" #include "platform/memory.hpp" +// ================================================================================================ amd::Memory* getMemoryObject(const void* ptr, size_t& offset) { amd::Memory *memObj = amd::MemObjMap::FindMemObj(ptr); if (memObj != nullptr) { @@ -41,21 +42,39 @@ amd::Memory* getMemoryObject(const void* ptr, size_t& offset) { return memObj; } +// ================================================================================================ hipError_t ihipFree(void *ptr) { if (ptr == nullptr) { return hipSuccess; } - if (amd::SvmBuffer::malloced(ptr)) { - for (auto& dev : g_devices) { - dev->NullStream()->finish(); + + size_t offset = 0; + amd::Memory* memory_object = getMemoryObject(ptr, offset); + + if (memory_object != nullptr) { + // Check if it's an allocation in system memory and can be shared across all devices + if (memory_object->getMemFlags() & CL_MEM_SVM_FINE_GRAIN_BUFFER) { + for (auto& dev : g_devices) { + // Skip stream allocation, since if it wasn't allocated until free, then the device + // wasn't used + constexpr bool SkipStreamAlloc = true; + amd::HostQueue* queue = dev->NullStream(SkipStreamAlloc); + if (queue != nullptr) { + queue->finish(); + } + } + } else { + // Wait on the device, associated with the current memory object + hip::getNullStream(memory_object->getContext())->finish(); } - amd::SvmBuffer::free(*hip::getCurrentDevice()->asContext(), ptr); + amd::SvmBuffer::free(memory_object->getContext(), ptr); return hipSuccess; } return hipErrorInvalidValue; } +// ================================================================================================ hipError_t ihipMalloc(void** ptr, size_t sizeBytes, unsigned int flags) { if (sizeBytes == 0) { diff --git a/hipamd/vdi/hip_stream.cpp b/hipamd/vdi/hip_stream.cpp index fbcd223ed2..e4bf4fe192 100644 --- a/hipamd/vdi/hip_stream.cpp +++ b/hipamd/vdi/hip_stream.cpp @@ -68,13 +68,13 @@ bool Stream::Create() { } // ================================================================================================ -amd::HostQueue* Stream::asHostQueue() { +amd::HostQueue* Stream::asHostQueue(bool skip_alloc) { // Access to the stream object is lock protected, because possible allocation amd::ScopedLock l(Lock()); if (queue_ == nullptr) { // Create the host queue for the first time - if (!Create()) { - return nullptr; + if (!skip_alloc) { + Create(); } } return queue_;