diff --git a/projects/clr/rocclr/runtime/device/cpu/cpudevice.hpp b/projects/clr/rocclr/runtime/device/cpu/cpudevice.hpp index 03dd1a8e12..64a767a87f 100644 --- a/projects/clr/rocclr/runtime/device/cpu/cpudevice.hpp +++ b/projects/clr/rocclr/runtime/device/cpu/cpudevice.hpp @@ -71,12 +71,7 @@ public: //! Instantiate a new virtual device virtual device::VirtualDevice* createVirtualDevice( - bool profiling, - bool interopQueue -#if cl_amd_open_video - , void* calVideoProperties = NULL -#endif // cl_amd_open_video - , uint deviceQueueSize = 0 + amd::CommandQueue* queue = NULL ) { VirtualCPU* virtualCpu = new VirtualCPU(*this); diff --git a/projects/clr/rocclr/runtime/device/device.hpp b/projects/clr/rocclr/runtime/device/device.hpp index b56bc64ffc..82dfc0ffd6 100644 --- a/projects/clr/rocclr/runtime/device/device.hpp +++ b/projects/clr/rocclr/runtime/device/device.hpp @@ -550,6 +550,10 @@ struct Info : public amd::EmbeddedObject cl_uint gfxipVersion_; //! Number of available async queues cl_uint numAsyncQueues_; + //! Number of available real time queues + cl_uint numRTQueues_; + //! Number of available real time compute units + cl_uint numRTCUs_; //! Thread trace enable cl_bool threadTraceEnable_; @@ -1577,12 +1581,7 @@ public: //! Create a new virtual device environment. virtual device::VirtualDevice* createVirtualDevice( - bool profiling, - bool interopQueue -#if cl_amd_open_video - , void* calVideoProperties = NULL -#endif // cl_amd_open_video - , uint deviceQueueSize = 0 + CommandQueue* queue = NULL ) = 0; //! Compile the given source code. diff --git a/projects/clr/rocclr/runtime/device/gpu/gpudevice.cpp b/projects/clr/rocclr/runtime/device/gpu/gpudevice.cpp index 81b78cc6c7..585925a44b 100644 --- a/projects/clr/rocclr/runtime/device/gpu/gpudevice.cpp +++ b/projects/clr/rocclr/runtime/device/gpu/gpudevice.cpp @@ -543,6 +543,8 @@ void NullDevice::fillDeviceInfo( info_.localMemBanks_ = hwInfo()->localMemBanks_; info_.gfxipVersion_ = hwInfo()->gfxipVersion_; info_.numAsyncQueues_ = numComputeRings; + info_.numRTQueues_ = 2; + info_.numRTCUs_ = 4; info_.threadTraceEnable_ = settings().threadTraceEnable_; } } @@ -1091,14 +1093,31 @@ Device::initializeHeapResources() device::VirtualDevice* Device::createVirtualDevice( - bool profiling, - bool interopQueue -#if cl_amd_open_video - , void* calVideoProperties -#endif // cl_amd_open_video - , uint deviceQueueSize + amd::CommandQueue* queue ) { + bool profiling = false; + bool interopQueue = false; + uint rtCUs = 0; + uint deviceQueueSize = 0; +#if cl_amd_open_video + void* calVideoProperties = NULL; +#endif // cl_amd_open_video + + if (queue != NULL) { + profiling = queue->properties().test(CL_QUEUE_PROFILING_ENABLE); + if (queue->asHostQueue() != NULL) { + interopQueue = (0 != (queue->context().info().flags_ & + (amd::Context::GLDeviceKhr | + amd::Context::D3D10DeviceKhr | + amd::Context::D3D11DeviceKhr))); + rtCUs = queue->rtCUs(); + } + else if (queue->asDeviceQueue() != NULL) { + deviceQueueSize = queue->asDeviceQueue()->size(); + } + } + // Not safe to add a queue. So lock the device amd::ScopedLock k(lockAsyncOps()); amd::ScopedLock lock(vgpusAccess()); diff --git a/projects/clr/rocclr/runtime/device/gpu/gpudevice.hpp b/projects/clr/rocclr/runtime/device/gpu/gpudevice.hpp index f2cc8969dc..3b159d04c0 100644 --- a/projects/clr/rocclr/runtime/device/gpu/gpudevice.hpp +++ b/projects/clr/rocclr/runtime/device/gpu/gpudevice.hpp @@ -63,12 +63,7 @@ public: //! Instantiate a new virtual device virtual device::VirtualDevice* createVirtualDevice( - bool profiling, - bool interopQueue -#if cl_amd_open_video - , void* calVideoProperties = NULL -#endif // cl_amd_open_video - , uint deviceQueueSize = 0 + amd::CommandQueue* queue = NULL ) { return NULL; } //! Compile the given source code. @@ -394,12 +389,7 @@ public: //! Instantiate a new virtual device device::VirtualDevice* createVirtualDevice( - bool profiling, - bool interopQueue -#if cl_amd_open_video - , void* calVideoProperties = NULL -#endif // cl_amd_open_video - , uint deviceQueueSize = 0 + amd::CommandQueue* queue = NULL ); //! Memory allocation diff --git a/projects/clr/rocclr/runtime/device/hsa/hsadevice.cpp b/projects/clr/rocclr/runtime/device/hsa/hsadevice.cpp index 5c58bec63e..0297f75db6 100644 --- a/projects/clr/rocclr/runtime/device/hsa/hsadevice.cpp +++ b/projects/clr/rocclr/runtime/device/hsa/hsadevice.cpp @@ -648,15 +648,14 @@ Device::populateOCLDeviceConstants() } device::VirtualDevice* -Device::createVirtualDevice( - bool profiling, - bool interopQueue -#if cl_amd_open_video - , void *calVideoProperties -#endif - , uint deviceQueueSize -) +Device::createVirtualDevice(amd::CommandQueue* queue) { + bool interopQueue = (queue != NULL) && + (0 != (queue->context().info().flags_ & + (amd::Context::GLDeviceKhr | + amd::Context::D3D10DeviceKhr | + amd::Context::D3D11DeviceKhr))); + // Initialization of heap and other resources occur during the command // queue creation time. HsaQueueType type = kHsaQueueTypeCompute; @@ -884,7 +883,7 @@ Device::xferQueue() const // Create virtual device for internal memory transfer Device* thisDevice = const_cast(this); thisDevice->xferQueue_ = reinterpret_cast( - thisDevice->createVirtualDevice(false, false)); + thisDevice->createVirtualDevice()); if (!xferQueue_) { LogError("Couldn't create the device transfer manager!"); } diff --git a/projects/clr/rocclr/runtime/device/hsa/hsadevice.hpp b/projects/clr/rocclr/runtime/device/hsa/hsadevice.hpp index 3b7c3cdbba..ce1fe3a230 100644 --- a/projects/clr/rocclr/runtime/device/hsa/hsadevice.hpp +++ b/projects/clr/rocclr/runtime/device/hsa/hsadevice.hpp @@ -100,16 +100,7 @@ public: //! Create a new virtual device environment. virtual device::VirtualDevice* createVirtualDevice( - bool profiling, - bool interopQueue -#if cl_amd_open_video - , void* calVideoProperties = NULL -#endif // cl_amd_open_video - , uint deviceQueueSize = 0 - ) { - ShouldNotReachHere(); - return NULL; - }; + amd::CommandQueue* queue = NULL) { return NULL; } virtual bool registerSvmMemory(void* ptr, size_t size) const { ShouldNotReachHere(); @@ -252,12 +243,8 @@ public: //! Instantiate a new virtual device virtual device::VirtualDevice *createVirtualDevice( - bool profiling, bool interopQueue -#if cl_amd_open_video - , void *calVideoProperties = NULL -#endif // cl_amd_open_vide - , uint deviceQueueSize = 0 - ); + amd::CommandQueue* queue = NULL); + //! Construct an HSAIL program object from the ELF assuming it is valid virtual device::Program *createProgram(int oclVer = 120); diff --git a/projects/clr/rocclr/runtime/platform/commandqueue.cpp b/projects/clr/rocclr/runtime/platform/commandqueue.cpp index d6724aab9f..acb6c4b170 100644 --- a/projects/clr/rocclr/runtime/platform/commandqueue.cpp +++ b/projects/clr/rocclr/runtime/platform/commandqueue.cpp @@ -18,13 +18,13 @@ namespace amd { HostQueue::HostQueue( - Context& context, Device& device, cl_command_queue_properties properties + Context& context, Device& device, cl_command_queue_properties properties, uint queueRTCUs #if cl_amd_open_video , void* calVideoProperties #endif // cl_amd_open_video ) : CommandQueue(context, device, properties, device.info().queueProperties_ - | CL_QUEUE_COMMAND_INTERCEPT_ENABLE_AMD) + | CL_QUEUE_COMMAND_INTERCEPT_ENABLE_AMD, queueRTCUs) #if cl_amd_open_video , calVideoProperties_(calVideoProperties) #endif // cl_amd_open_video @@ -182,14 +182,7 @@ DeviceQueue::create() const bool defaultDeviceQueue = properties().test(CL_QUEUE_ON_DEVICE_DEFAULT); bool result = false; - virtualDevice_ = device().createVirtualDevice( - properties().test(CL_QUEUE_PROFILING_ENABLE), - !InteropQueue -#if cl_amd_open_video - , NULL -#endif // cl_amd_open_video - , size_); - + virtualDevice_ = device().createVirtualDevice(this); if (virtualDevice_ != NULL) { result = true; context().addDeviceQueue(device(), this, defaultDeviceQueue); diff --git a/projects/clr/rocclr/runtime/platform/commandqueue.hpp b/projects/clr/rocclr/runtime/platform/commandqueue.hpp index 9df958e17e..6a47633556 100644 --- a/projects/clr/rocclr/runtime/platform/commandqueue.hpp +++ b/projects/clr/rocclr/runtime/platform/commandqueue.hpp @@ -89,6 +89,9 @@ public: //! Returns DeviceQueue object virtual DeviceQueue* asDeviceQueue() { return NULL; } + //! Returns the number or requested real time CUs + uint rtCUs() const { return rtCUs_; } + protected: //! CommandQueue constructor is protected //! to keep the CommandQueue class as a virtual interface @@ -96,14 +99,17 @@ protected: Context& context, //!< Context object Device& device, //!< Device object cl_command_queue_properties properties, //!< Queue properties - cl_command_queue_properties propMask //!< Queue properties mask + cl_command_queue_properties propMask, //!< Queue properties mask + uint rtCUs = 0 //!< Avaialble real time compute units ) : properties_(propMask, properties) + , rtCUs_(rtCUs) , queueLock_("CommandQueue::queueLock") , device_(device) , context_(context) {} Properties properties_; //!< Queue properties + uint rtCUs_; //!< The number of used RT compute units Monitor queueLock_; //!< Lock protecting the queue Device& device_; //!< The device SharedReference context_; //!< The context of this command queue @@ -133,16 +139,7 @@ class HostQueue : public CommandQueue //! The command queue thread entry point. void run(void *data) { HostQueue* queue = static_cast(data); - - bool interopQueue = (0 != (queue->context().info().flags_ & (Context::GLDeviceKhr | Context::D3D10DeviceKhr | Context::D3D11DeviceKhr))); - - virtualDevice_ = queue->device().createVirtualDevice( - queue->properties().test(CL_QUEUE_PROFILING_ENABLE), - interopQueue -#if cl_amd_open_video - , queue->calVideoProperties_ -#endif // cl_amd_open_video - ); + virtualDevice_ = queue->device().createVirtualDevice(queue); if (virtualDevice_ != NULL) { queue->loop(virtualDevice_); if (virtualDevice_->terminate()) { @@ -185,7 +182,8 @@ public: HostQueue( Context& context, Device& device, - cl_command_queue_properties properties + cl_command_queue_properties properties, + uint queueRTCUs = 0 #if cl_amd_open_video , void* calVideoProperties = NULL #endif // cl_amd_open_video