SWDEV-480209 - Make internal callbacks non-blocking

Change-Id: Ic918d08f341abfd9a7c167d09f9c723cdc43157f


[ROCm/clr commit: 683a942364]
Этот коммит содержится в:
Anusha GodavarthySurya
2024-10-28 09:23:25 +00:00
коммит произвёл Anusha Godavarthy Surya
родитель 16f14e4b00
Коммит 08c92f4793
11 изменённых файлов: 38 добавлений и 34 удалений
+2
Просмотреть файл
@@ -1476,6 +1476,8 @@ hipError_t hipGraphExecDestroy(hipGraphExec_t pGraphExec) {
}
hip::GraphExec* ge = reinterpret_cast<hip::GraphExec*>(pGraphExec);
ge->release();
amd::ScopedLock lock(GraphExec::graphExecSetLock_);
GraphExec::graphExecSet_.erase(ge);
HIP_RETURN(hipSuccess);
}
+2 -12
Просмотреть файл
@@ -732,21 +732,11 @@ hipError_t GraphExec::Run(hipStream_t graph_launch_stream) {
// we may not need to flush any caches.
CallbackCommand->setEventScope(amd::Device::kCacheStateIgnore);
amd::Event& event = CallbackCommand->event();
if (!event.setCallback(CL_COMPLETE, GraphExec::DecrementRefCount, this)) {
constexpr bool kBlocking = false;
if (!event.setCallback(CL_COMPLETE, GraphExec::DecrementRefCount, this, kBlocking)) {
return hipErrorInvalidHandle;
}
CallbackCommand->enqueue();
// Add the new barrier to stall the stream, until the callback is done
amd::Command::EventWaitList eventWaitList;
eventWaitList.push_back(CallbackCommand);
amd::Command* block_command = new amd::Marker(*launch_stream, kMarkerDisableFlush, eventWaitList);
// we may not need to flush any caches.
block_command->setEventScope(amd::Device::kCacheStateIgnore);
if (block_command == nullptr) {
return hipErrorInvalidValue;
}
block_command->enqueue();
block_command->release();
CallbackCommand->release();
return status;
}
+2 -4
Просмотреть файл
@@ -750,12 +750,10 @@ struct GraphExec : public amd::ReferenceCountedObject, public Graph {
~GraphExec() {
for (auto stream : parallel_streams_) {
if (stream != nullptr) {
stream->finish();
hip::Stream::Destroy(stream);
constexpr bool kForceDestroy = true;
hip::Stream::Destroy(stream, kForceDestroy);
}
}
amd::ScopedLock lock(graphExecSetLock_);
graphExecSet_.erase(this);
if (DEBUG_CLR_GRAPH_PACKET_CAPTURE) {
if (kernArgManager_ != nullptr) {
kernArgManager_->release();
+1 -1
Просмотреть файл
@@ -372,7 +372,7 @@ public:
/// Check whether any blocking stream running
static bool StreamCaptureBlocking();
static void Destroy(hip::Stream* stream);
static void Destroy(hip::Stream* stream, bool forceDestroy = false);
virtual bool terminate();
+2 -1
Просмотреть файл
@@ -72,8 +72,9 @@ bool Stream::Create() {
}
// ================================================================================================
void Stream::Destroy(hip::Stream* stream) {
void Stream::Destroy(hip::Stream* stream, bool forceDestroy) {
stream->device_->RemoveStream(stream);
stream->SetForceDestroy(forceDestroy);
stream->release();
stream = nullptr;
}
+7 -4
Просмотреть файл
@@ -239,7 +239,7 @@ bool HsaAmdSignalHandler(hsa_signal_value_t value, void* arg) {
gpu->updateCommandsState(ts->command().GetBatchHead());
// Reset API callback signal. It will release AQL queue and start commands processing
if (callback_signal.handle != 0) {
if (callback_signal.handle != 0 && ts->GetBlocking()) {
hsa_signal_subtract_relaxed(callback_signal, 1);
}
@@ -515,10 +515,13 @@ hsa_signal_t VirtualGPU::HwQueueTracker::ActiveSignal(
// If API callback is enabled, then use a blocking signal for AQL queue.
// HSA signal will be acquired in SW and released after HSA signal callback
if (ts->command().Callback() != nullptr) {
ts->SetCallbackSignal(prof_signal->signal_);
bool blocking = ts->command().Callback()->blocking_;
ts->SetCallbackSignal(prof_signal->signal_, blocking);
// Blocks AQL queue from further processing
hsa_signal_add_relaxed(prof_signal->signal_, 1);
init_value += 1;
if (blocking) {
hsa_signal_add_relaxed(prof_signal->signal_, 1);
init_value += 1;
}
}
gpu_.QueuedAsyncHandlers()++;
hsa_status_t result = hsa_amd_signal_async_handler(prof_signal->signal_,
+8 -4
Просмотреть файл
@@ -112,8 +112,9 @@ class Timestamp : public amd::ReferenceCountedObject {
std::vector<ProfilingSignal*> signals_; //!< The list of all signals, associated with the TS
hsa_signal_t callback_signal_; //!< Signal associated with a callback for possible later update
amd::Monitor lock_; //!< Serialize timestamp update
bool accum_ena_ = false; //!< If TRUE then the accumulation of execution times has started
bool hasHwProfiling_ = false; //!< If TRUE then HwProfiling is enabled for the command
bool accum_ena_ = false; //!< If TRUE then the accumulation of execution times has started
bool hasHwProfiling_ = false; //!< If TRUE then HwProfiling is enabled for the command
bool blocking_ = true; //!< If TRUE callback is blocking
Timestamp(const Timestamp&) = delete;
Timestamp& operator=(const Timestamp&) = delete;
@@ -177,12 +178,15 @@ class Timestamp : public amd::ReferenceCountedObject {
VirtualGPU* gpu() const { return gpu_; }
//! Updates the callback signal
void SetCallbackSignal(hsa_signal_t callback_signal) {
void SetCallbackSignal(hsa_signal_t callback_signal, bool blocking = true) {
callback_signal_ = callback_signal;
blocking_ = blocking;
}
//! Returns the callback signal
hsa_signal_t GetCallbackSignal() const { return callback_signal_; }
//! Return if callback is blocking/non-blocking
bool GetBlocking() { return blocking_; }
};
class VirtualGPU : public device::VirtualDevice {
+3 -2
Просмотреть файл
@@ -190,10 +190,11 @@ bool Event::resetStatus(int32_t status) {
}
// ================================================================================================
bool Event::setCallback(int32_t status, Event::CallBackFunction callback, void* data) {
bool Event::setCallback(int32_t status, Event::CallBackFunction callback, void* data,
bool blocking) {
assert(status >= CL_COMPLETE && status <= CL_QUEUED && "invalid status");
CallBackEntry* entry = new CallBackEntry(status, callback, data);
CallBackEntry* entry = new CallBackEntry(status, callback, data, blocking);
if (entry == NULL) {
return false;
}
+5 -5
Просмотреть файл
@@ -72,10 +72,10 @@ class Event : public RuntimeObject {
std::atomic<CallBackFunction> callback_; //!< callback function pointer.
void* data_; //!< user data passed to the callback function.
int32_t status_; //!< execution status triggering the callback.
CallBackEntry(int32_t status, CallBackFunction callback, void* data)
: callback_(callback), data_(data), status_(status) {}
int32_t status_; //!< execution status triggering the callback.
bool blocking_; //!< TRUE if callback is blocking
CallBackEntry(int32_t status, CallBackFunction callback, void* data, bool blocking)
: callback_(callback), data_(data), status_(status), blocking_(blocking) {}
};
public:
@@ -173,7 +173,7 @@ class Event : public RuntimeObject {
int32_t status() const { return status_.load(std::memory_order_relaxed); }
//! Insert the given \a callback into the callback stack.
bool setCallback(int32_t status, CallBackFunction callback, void* data);
bool setCallback(int32_t status, CallBackFunction callback, void* data, bool blocking = true);
/*! \brief Set the event status.
*
+2 -1
Просмотреть файл
@@ -57,8 +57,9 @@ HostQueue::HostQueue(Context& context, Device& device, cl_command_queue_properti
}
bool HostQueue::terminate() {
// incase of force destroy skip checking on the last command
if (AMD_DIRECT_DISPATCH) {
if (vdev() != nullptr) {
if (!forceDestroy_ && vdev() != nullptr) {
// If the queue still has the last command, then wait and release it
// We must be in protected way to get last command when calling
// awaitCompletion() where lastCommand will be released and possibly
+4
Просмотреть файл
@@ -294,6 +294,9 @@ class HostQueue : public CommandQueue {
//! Get queue status
bool GetQueueStatus() { return isActive_; }
//! Set the force destory to terminate queue without checking last command
void SetForceDestroy(bool forceDestroy) { forceDestroy_ = forceDestroy; }
uint64_t getQueueID() {
return thread_.vdev()->getQueueID();
}
@@ -305,6 +308,7 @@ private:
//! True if this command queue is active
bool isActive_;
bool forceDestroy_ = false; //!< Destroy the queue in the current state
};
class DeviceQueue : public CommandQueue {