From 97fbdd7f585287242e16eb31f6f352fa653dfcb8 Mon Sep 17 00:00:00 2001
From: foreman
Date: Thu, 11 Jun 2015 19:17:47 -0400
Subject: [PATCH] P4 to Git Change 1160575 by gandryey@gera-dev-w7 on
2015/06/11 18:31:59
ECR #304775 - Real time queue support
- Add the new interface for the real time queue creation
Affected files ...
... //depot/stg/opencl/drivers/opencl/api/opencl/amdocl/cl_command.cpp#10 edit
... //depot/stg/opencl/drivers/opencl/api/opencl/amdocl/cl_device.cpp#58 edit
... //depot/stg/opencl/drivers/opencl/runtime/device/cpu/cpudevice.hpp#90 edit
... //depot/stg/opencl/drivers/opencl/runtime/device/device.hpp#246 edit
... //depot/stg/opencl/drivers/opencl/runtime/device/gpu/gpudevice.cpp#511 edit
... //depot/stg/opencl/drivers/opencl/runtime/device/gpu/gpudevice.hpp#144 edit
... //depot/stg/opencl/drivers/opencl/runtime/device/hsa/hsadevice.cpp#91 edit
... //depot/stg/opencl/drivers/opencl/runtime/device/hsa/hsadevice.hpp#48 edit
... //depot/stg/opencl/drivers/opencl/runtime/device/hsa_foundation/hsadevice.cpp#33 edit
... //depot/stg/opencl/drivers/opencl/runtime/device/hsa_foundation/hsadevice.hpp#14 edit
... //depot/stg/opencl/drivers/opencl/runtime/platform/commandqueue.cpp#20 edit
... //depot/stg/opencl/drivers/opencl/runtime/platform/commandqueue.hpp#15 edit
[ROCm/clr commit: e919691665359cab8beb88e852983fd0f6e77807]
---
.../rocclr/runtime/device/cpu/cpudevice.hpp | 7 +----
projects/clr/rocclr/runtime/device/device.hpp | 11 +++----
.../rocclr/runtime/device/gpu/gpudevice.cpp | 31 +++++++++++++++----
.../rocclr/runtime/device/gpu/gpudevice.hpp | 14 ++-------
.../rocclr/runtime/device/hsa/hsadevice.cpp | 17 +++++-----
.../rocclr/runtime/device/hsa/hsadevice.hpp | 19 ++----------
.../rocclr/runtime/platform/commandqueue.cpp | 13 ++------
.../rocclr/runtime/platform/commandqueue.hpp | 22 ++++++-------
8 files changed, 57 insertions(+), 77 deletions(-)
diff --git a/projects/clr/rocclr/runtime/device/cpu/cpudevice.hpp b/projects/clr/rocclr/runtime/device/cpu/cpudevice.hpp
index 03dd1a8e12..64a767a87f 100644
--- a/projects/clr/rocclr/runtime/device/cpu/cpudevice.hpp
+++ b/projects/clr/rocclr/runtime/device/cpu/cpudevice.hpp
@@ -71,12 +71,7 @@ public:
//! Instantiate a new virtual device
virtual device::VirtualDevice* createVirtualDevice(
- bool profiling,
- bool interopQueue
-#if cl_amd_open_video
- , void* calVideoProperties = NULL
-#endif // cl_amd_open_video
- , uint deviceQueueSize = 0
+ amd::CommandQueue* queue = NULL
)
{
VirtualCPU* virtualCpu = new VirtualCPU(*this);
diff --git a/projects/clr/rocclr/runtime/device/device.hpp b/projects/clr/rocclr/runtime/device/device.hpp
index b56bc64ffc..82dfc0ffd6 100644
--- a/projects/clr/rocclr/runtime/device/device.hpp
+++ b/projects/clr/rocclr/runtime/device/device.hpp
@@ -550,6 +550,10 @@ struct Info : public amd::EmbeddedObject
cl_uint gfxipVersion_;
//! Number of available async queues
cl_uint numAsyncQueues_;
+ //! Number of available real time queues
+ cl_uint numRTQueues_;
+ //! Number of available real time compute units
+ cl_uint numRTCUs_;
//! Thread trace enable
cl_bool threadTraceEnable_;
@@ -1577,12 +1581,7 @@ public:
//! Create a new virtual device environment.
virtual device::VirtualDevice* createVirtualDevice(
- bool profiling,
- bool interopQueue
-#if cl_amd_open_video
- , void* calVideoProperties = NULL
-#endif // cl_amd_open_video
- , uint deviceQueueSize = 0
+ CommandQueue* queue = NULL
) = 0;
//! Compile the given source code.
diff --git a/projects/clr/rocclr/runtime/device/gpu/gpudevice.cpp b/projects/clr/rocclr/runtime/device/gpu/gpudevice.cpp
index 81b78cc6c7..585925a44b 100644
--- a/projects/clr/rocclr/runtime/device/gpu/gpudevice.cpp
+++ b/projects/clr/rocclr/runtime/device/gpu/gpudevice.cpp
@@ -543,6 +543,8 @@ void NullDevice::fillDeviceInfo(
info_.localMemBanks_ = hwInfo()->localMemBanks_;
info_.gfxipVersion_ = hwInfo()->gfxipVersion_;
info_.numAsyncQueues_ = numComputeRings;
+ info_.numRTQueues_ = 2;
+ info_.numRTCUs_ = 4;
info_.threadTraceEnable_ = settings().threadTraceEnable_;
}
}
@@ -1091,14 +1093,31 @@ Device::initializeHeapResources()
device::VirtualDevice*
Device::createVirtualDevice(
- bool profiling,
- bool interopQueue
-#if cl_amd_open_video
- , void* calVideoProperties
-#endif // cl_amd_open_video
- , uint deviceQueueSize
+ amd::CommandQueue* queue
)
{
+ bool profiling = false;
+ bool interopQueue = false;
+ uint rtCUs = 0;
+ uint deviceQueueSize = 0;
+#if cl_amd_open_video
+ void* calVideoProperties = NULL;
+#endif // cl_amd_open_video
+
+ if (queue != NULL) {
+ profiling = queue->properties().test(CL_QUEUE_PROFILING_ENABLE);
+ if (queue->asHostQueue() != NULL) {
+ interopQueue = (0 != (queue->context().info().flags_ &
+ (amd::Context::GLDeviceKhr |
+ amd::Context::D3D10DeviceKhr |
+ amd::Context::D3D11DeviceKhr)));
+ rtCUs = queue->rtCUs();
+ }
+ else if (queue->asDeviceQueue() != NULL) {
+ deviceQueueSize = queue->asDeviceQueue()->size();
+ }
+ }
+
// Not safe to add a queue. So lock the device
amd::ScopedLock k(lockAsyncOps());
amd::ScopedLock lock(vgpusAccess());
diff --git a/projects/clr/rocclr/runtime/device/gpu/gpudevice.hpp b/projects/clr/rocclr/runtime/device/gpu/gpudevice.hpp
index f2cc8969dc..3b159d04c0 100644
--- a/projects/clr/rocclr/runtime/device/gpu/gpudevice.hpp
+++ b/projects/clr/rocclr/runtime/device/gpu/gpudevice.hpp
@@ -63,12 +63,7 @@ public:
//! Instantiate a new virtual device
virtual device::VirtualDevice* createVirtualDevice(
- bool profiling,
- bool interopQueue
-#if cl_amd_open_video
- , void* calVideoProperties = NULL
-#endif // cl_amd_open_video
- , uint deviceQueueSize = 0
+ amd::CommandQueue* queue = NULL
) { return NULL; }
//! Compile the given source code.
@@ -394,12 +389,7 @@ public:
//! Instantiate a new virtual device
device::VirtualDevice* createVirtualDevice(
- bool profiling,
- bool interopQueue
-#if cl_amd_open_video
- , void* calVideoProperties = NULL
-#endif // cl_amd_open_video
- , uint deviceQueueSize = 0
+ amd::CommandQueue* queue = NULL
);
//! Memory allocation
diff --git a/projects/clr/rocclr/runtime/device/hsa/hsadevice.cpp b/projects/clr/rocclr/runtime/device/hsa/hsadevice.cpp
index 5c58bec63e..0297f75db6 100644
--- a/projects/clr/rocclr/runtime/device/hsa/hsadevice.cpp
+++ b/projects/clr/rocclr/runtime/device/hsa/hsadevice.cpp
@@ -648,15 +648,14 @@ Device::populateOCLDeviceConstants()
}
device::VirtualDevice*
-Device::createVirtualDevice(
- bool profiling,
- bool interopQueue
-#if cl_amd_open_video
- , void *calVideoProperties
-#endif
- , uint deviceQueueSize
-)
+Device::createVirtualDevice(amd::CommandQueue* queue)
{
+ bool interopQueue = (queue != NULL) &&
+ (0 != (queue->context().info().flags_ &
+ (amd::Context::GLDeviceKhr |
+ amd::Context::D3D10DeviceKhr |
+ amd::Context::D3D11DeviceKhr)));
+
// Initialization of heap and other resources occur during the command
// queue creation time.
HsaQueueType type = kHsaQueueTypeCompute;
@@ -884,7 +883,7 @@ Device::xferQueue() const
// Create virtual device for internal memory transfer
Device* thisDevice = const_cast(this);
thisDevice->xferQueue_ = reinterpret_cast(
- thisDevice->createVirtualDevice(false, false));
+ thisDevice->createVirtualDevice());
if (!xferQueue_) {
LogError("Couldn't create the device transfer manager!");
}
diff --git a/projects/clr/rocclr/runtime/device/hsa/hsadevice.hpp b/projects/clr/rocclr/runtime/device/hsa/hsadevice.hpp
index 3b7c3cdbba..ce1fe3a230 100644
--- a/projects/clr/rocclr/runtime/device/hsa/hsadevice.hpp
+++ b/projects/clr/rocclr/runtime/device/hsa/hsadevice.hpp
@@ -100,16 +100,7 @@ public:
//! Create a new virtual device environment.
virtual device::VirtualDevice* createVirtualDevice(
- bool profiling,
- bool interopQueue
-#if cl_amd_open_video
- , void* calVideoProperties = NULL
-#endif // cl_amd_open_video
- , uint deviceQueueSize = 0
- ) {
- ShouldNotReachHere();
- return NULL;
- };
+ amd::CommandQueue* queue = NULL) { return NULL; }
virtual bool registerSvmMemory(void* ptr, size_t size) const {
ShouldNotReachHere();
@@ -252,12 +243,8 @@ public:
//! Instantiate a new virtual device
virtual device::VirtualDevice *createVirtualDevice(
- bool profiling, bool interopQueue
-#if cl_amd_open_video
- , void *calVideoProperties = NULL
-#endif // cl_amd_open_vide
- , uint deviceQueueSize = 0
- );
+ amd::CommandQueue* queue = NULL);
+
//! Construct an HSAIL program object from the ELF assuming it is valid
virtual device::Program *createProgram(int oclVer = 120);
diff --git a/projects/clr/rocclr/runtime/platform/commandqueue.cpp b/projects/clr/rocclr/runtime/platform/commandqueue.cpp
index d6724aab9f..acb6c4b170 100644
--- a/projects/clr/rocclr/runtime/platform/commandqueue.cpp
+++ b/projects/clr/rocclr/runtime/platform/commandqueue.cpp
@@ -18,13 +18,13 @@
namespace amd {
HostQueue::HostQueue(
- Context& context, Device& device, cl_command_queue_properties properties
+ Context& context, Device& device, cl_command_queue_properties properties, uint queueRTCUs
#if cl_amd_open_video
, void* calVideoProperties
#endif // cl_amd_open_video
)
: CommandQueue(context, device, properties, device.info().queueProperties_
- | CL_QUEUE_COMMAND_INTERCEPT_ENABLE_AMD)
+ | CL_QUEUE_COMMAND_INTERCEPT_ENABLE_AMD, queueRTCUs)
#if cl_amd_open_video
, calVideoProperties_(calVideoProperties)
#endif // cl_amd_open_video
@@ -182,14 +182,7 @@ DeviceQueue::create()
const bool defaultDeviceQueue = properties().test(CL_QUEUE_ON_DEVICE_DEFAULT);
bool result = false;
- virtualDevice_ = device().createVirtualDevice(
- properties().test(CL_QUEUE_PROFILING_ENABLE),
- !InteropQueue
-#if cl_amd_open_video
- , NULL
-#endif // cl_amd_open_video
- , size_);
-
+ virtualDevice_ = device().createVirtualDevice(this);
if (virtualDevice_ != NULL) {
result = true;
context().addDeviceQueue(device(), this, defaultDeviceQueue);
diff --git a/projects/clr/rocclr/runtime/platform/commandqueue.hpp b/projects/clr/rocclr/runtime/platform/commandqueue.hpp
index 9df958e17e..6a47633556 100644
--- a/projects/clr/rocclr/runtime/platform/commandqueue.hpp
+++ b/projects/clr/rocclr/runtime/platform/commandqueue.hpp
@@ -89,6 +89,9 @@ public:
//! Returns DeviceQueue object
virtual DeviceQueue* asDeviceQueue() { return NULL; }
+ //! Returns the number or requested real time CUs
+ uint rtCUs() const { return rtCUs_; }
+
protected:
//! CommandQueue constructor is protected
//! to keep the CommandQueue class as a virtual interface
@@ -96,14 +99,17 @@ protected:
Context& context, //!< Context object
Device& device, //!< Device object
cl_command_queue_properties properties, //!< Queue properties
- cl_command_queue_properties propMask //!< Queue properties mask
+ cl_command_queue_properties propMask, //!< Queue properties mask
+ uint rtCUs = 0 //!< Avaialble real time compute units
)
: properties_(propMask, properties)
+ , rtCUs_(rtCUs)
, queueLock_("CommandQueue::queueLock")
, device_(device)
, context_(context) {}
Properties properties_; //!< Queue properties
+ uint rtCUs_; //!< The number of used RT compute units
Monitor queueLock_; //!< Lock protecting the queue
Device& device_; //!< The device
SharedReference context_; //!< The context of this command queue
@@ -133,16 +139,7 @@ class HostQueue : public CommandQueue
//! The command queue thread entry point.
void run(void *data) {
HostQueue* queue = static_cast(data);
-
- bool interopQueue = (0 != (queue->context().info().flags_ & (Context::GLDeviceKhr | Context::D3D10DeviceKhr | Context::D3D11DeviceKhr)));
-
- virtualDevice_ = queue->device().createVirtualDevice(
- queue->properties().test(CL_QUEUE_PROFILING_ENABLE),
- interopQueue
-#if cl_amd_open_video
- , queue->calVideoProperties_
-#endif // cl_amd_open_video
- );
+ virtualDevice_ = queue->device().createVirtualDevice(queue);
if (virtualDevice_ != NULL) {
queue->loop(virtualDevice_);
if (virtualDevice_->terminate()) {
@@ -185,7 +182,8 @@ public:
HostQueue(
Context& context,
Device& device,
- cl_command_queue_properties properties
+ cl_command_queue_properties properties,
+ uint queueRTCUs = 0
#if cl_amd_open_video
, void* calVideoProperties = NULL
#endif // cl_amd_open_video