diff --git a/projects/clr/rocclr/device/device.hpp b/projects/clr/rocclr/device/device.hpp index 6cad0c696a..96a38c3d66 100644 --- a/projects/clr/rocclr/device/device.hpp +++ b/projects/clr/rocclr/device/device.hpp @@ -84,6 +84,7 @@ class SvmUnmapMemoryCommand; class SvmPrefetchAsyncCommand; class TransferBufferFileCommand; class StreamOperationCommand; +class ExternalSemaphoreCmd; class HwDebugManager; class Isa; class Device; @@ -1171,6 +1172,7 @@ class VirtualDevice : public amd::HeapObject { virtual void submitKernel(amd::NDRangeKernelCommand& command) = 0; virtual void submitNativeFn(amd::NativeFnCommand& cmd) = 0; virtual void submitMarker(amd::Marker& cmd) = 0; + virtual void submitExternalSemaphoreCmd(amd::ExternalSemaphoreCmd& cmd) = 0; virtual void submitFillMemory(amd::FillMemoryCommand& cmd) = 0; virtual void submitMigrateMemObjects(amd::MigrateMemObjectsCommand& cmd) = 0; virtual void submitAcquireExtObjects(amd::AcquireExtObjectsCommand& cmd) = 0; @@ -1616,6 +1618,8 @@ class Device : public RuntimeObject { virtual bool globalFreeMemory(size_t* freeMemory //!< Free memory information on a GPU device ) const = 0; + virtual bool importExtSemaphore(void** extSemaphore, void* handle) = 0; + virtual void DestroyExtSemaphore(void* extSemaphore) = 0; /** * @return True if the device has its own custom host allocator to be used * instead of the generic OS allocation routines diff --git a/projects/clr/rocclr/device/pal/paldevice.cpp b/projects/clr/rocclr/device/pal/paldevice.cpp index c530a765f9..2a2eebb0ff 100644 --- a/projects/clr/rocclr/device/pal/paldevice.cpp +++ b/projects/clr/rocclr/device/pal/paldevice.cpp @@ -35,6 +35,7 @@ #include "palLib.h" #include "palPlatform.h" #include "palDevice.h" +#include "palQueueSemaphore.h" #include "hsailctx.hpp" #include "vdi_common.hpp" @@ -2408,4 +2409,33 @@ bool Device::SetClockMode(const cl_set_device_clock_mode_input_amd setClockModeI return result; } + +bool Device::importExtSemaphore(void** extSemaphore, void* handle) { + Pal::ExternalQueueSemaphoreOpenInfo palOpenInfo = {}; + palOpenInfo.externalSemaphore = handle; + palOpenInfo.flags.crossProcess = false; + palOpenInfo.flags.isReference = true; + Pal::Result result; + + size_t semaphoreSize = iDev()->GetExternalSharedQueueSemaphoreSize( + palOpenInfo, &result); + if (result != Pal::Result::Success) { + return false; + } + void* mem = amd::Os::alignedMalloc(semaphoreSize, 16); + result = iDev()->OpenExternalSharedQueueSemaphore( + palOpenInfo, mem, reinterpret_cast (extSemaphore)); + if (result != Pal::Result::Success) { + amd::Os::alignedFree(mem); + return false; + } + return true; +} + +void Device::DestroyExtSemaphore(void* extSemaphore) { + Pal::IQueueSemaphore* sem = reinterpret_cast(extSemaphore); + sem->Destroy(); + amd::Os::alignedFree(extSemaphore); +} + } // namespace pal diff --git a/projects/clr/rocclr/device/pal/paldevice.hpp b/projects/clr/rocclr/device/pal/paldevice.hpp index 51cc25331b..4c060a2dd7 100644 --- a/projects/clr/rocclr/device/pal/paldevice.hpp +++ b/projects/clr/rocclr/device/pal/paldevice.hpp @@ -136,6 +136,8 @@ class NullDevice : public amd::Device { return NULL; } virtual void svmFree(void* ptr) const { return; } + virtual bool importExtSemaphore(void** extSemaphore, void* handle) { return false; } + virtual void DestroyExtSemaphore(void* extSemaphore) { } void* Alloc(const Util::AllocInfo& allocInfo) { return allocator_.Alloc(allocInfo); } void Free(const Util::FreeInfo& freeInfo) { allocator_.Free(freeInfo); } @@ -592,6 +594,9 @@ class Device : public NullDevice { return false; } + virtual bool importExtSemaphore(void** extSemaphore, void* handle); + virtual void DestroyExtSemaphore(void* extSemaphore); + private: static void PAL_STDCALL PalDeveloperCallback(void* pPrivateData, const Pal::uint32 deviceIndex, Pal::Developer::CallbackType type, void* pCbData); diff --git a/projects/clr/rocclr/device/pal/palvirtual.cpp b/projects/clr/rocclr/device/pal/palvirtual.cpp index c9229c40f9..7260e0d244 100644 --- a/projects/clr/rocclr/device/pal/palvirtual.cpp +++ b/projects/clr/rocclr/device/pal/palvirtual.cpp @@ -42,6 +42,7 @@ #include #include "palQueue.h" #include "palFence.h" +#include "palQueueSemaphore.h" #ifdef _WIN32 #include @@ -2639,6 +2640,22 @@ void VirtualGPU::submitMarker(amd::Marker& vcmd) { } } +void VirtualGPU::submitExternalSemaphoreCmd(amd::ExternalSemaphoreCmd& cmd) { + + const Pal::IQueueSemaphore* sem = reinterpret_cast(cmd.sem_ptr()); + + if (cmd.semaphoreCmd() == + amd::ExternalSemaphoreCmd::COMMAND_SIGNAL_EXTSEMAPHORE) { + queues_[MainEngine]->iQueue_->SignalQueueSemaphore(const_cast(sem), + cmd.fence()); + } else { + queues_[MainEngine]->iQueue_->WaitQueueSemaphore(const_cast(sem), + cmd.fence()); + } + +} + + void VirtualGPU::releaseMemory(GpuMemoryReference* mem) { queues_[MainEngine]->removeCmdMemRef(mem); if (!dev().settings().disableSdma_) { diff --git a/projects/clr/rocclr/device/pal/palvirtual.hpp b/projects/clr/rocclr/device/pal/palvirtual.hpp index 4910615658..ed15a64930 100644 --- a/projects/clr/rocclr/device/pal/palvirtual.hpp +++ b/projects/clr/rocclr/device/pal/palvirtual.hpp @@ -349,6 +349,8 @@ class VirtualGPU : public device::VirtualDevice { virtual void submitSvmUnmapMemory(amd::SvmUnmapMemoryCommand& cmd); virtual void submitTransferBufferFromFile(amd::TransferBufferFileCommand& cmd); + void submitExternalSemaphoreCmd(amd::ExternalSemaphoreCmd& cmd); + void releaseMemory(GpuMemoryReference* mem); void flush(amd::Command* list = nullptr, bool wait = false); diff --git a/projects/clr/rocclr/device/rocm/rocdevice.hpp b/projects/clr/rocclr/device/rocm/rocdevice.hpp index b5e323378e..00f914fbf7 100644 --- a/projects/clr/rocclr/device/rocm/rocdevice.hpp +++ b/projects/clr/rocclr/device/rocm/rocdevice.hpp @@ -191,6 +191,13 @@ class NullDevice : public amd::Device { return !settings().enableCoarseGrainSVM_ || (memory->getContext().devices().size() > 1); } + virtual bool importExtSemaphore(void** extSemahore, void* handle) { + ShouldNotReachHere(); + return false; + } + + virtual void DestroyExtSemaphore(void* extSemaphore) { ShouldNotReachHere(); } + //! Acquire external graphics API object in the host thread //! Needed for OpenGL objects on CPU device diff --git a/projects/clr/rocclr/device/rocm/rocvirtual.hpp b/projects/clr/rocclr/device/rocm/rocvirtual.hpp index dff3d841b4..d3263b7c46 100644 --- a/projects/clr/rocclr/device/rocm/rocvirtual.hpp +++ b/projects/clr/rocclr/device/rocm/rocvirtual.hpp @@ -284,6 +284,7 @@ class VirtualGPU : public device::VirtualDevice { void submitThreadTraceMemObjects(amd::ThreadTraceMemObjectsCommand& cmd) {} void submitThreadTrace(amd::ThreadTraceCommand& vcmd) {} + virtual void submitExternalSemaphoreCmd(amd::ExternalSemaphoreCmd& cmd){} /** * @brief Waits on an outstanding kernel without regard to how * it was dispatched - with or without a signal diff --git a/projects/clr/rocclr/platform/command.hpp b/projects/clr/rocclr/platform/command.hpp index f27feb2bf9..68c249b1a6 100644 --- a/projects/clr/rocclr/platform/command.hpp +++ b/projects/clr/rocclr/platform/command.hpp @@ -956,6 +956,31 @@ class NativeFnCommand : public Command { int32_t invoke(); }; + +class ExternalSemaphoreCmd : public Command { + public: + enum ExternalSemaphoreCmdType { COMMAND_WAIT_EXTSEMAPHORE, COMMAND_SIGNAL_EXTSEMAPHORE }; + + private: + const void* sem_ptr_; //!< Pointer to external semaphore + int fence_; //!< semaphore value to be set + ExternalSemaphoreCmdType cmd_type_; //!< Signal or Wait semaphore command + + public: + ExternalSemaphoreCmd(HostQueue& queue, const void* sem_ptr, int fence, + ExternalSemaphoreCmdType cmd_type) + : Command::Command(queue, CL_COMMAND_USER), sem_ptr_(sem_ptr), fence_(fence), cmd_type_(cmd_type) {} + + virtual void submit(device::VirtualDevice& device) { + device.submitExternalSemaphoreCmd(*this); + } + const void* sem_ptr() const { return sem_ptr_; } + const int fence() { return fence_; } + const ExternalSemaphoreCmdType semaphoreCmd() { return cmd_type_; } + +}; + + class Marker : public Command { public: //! Create a new Marker