SWDEV-480209 - Make internal callbacks non-blocking
Change-Id: Ic918d08f341abfd9a7c167d09f9c723cdc43157f
Этот коммит содержится в:
коммит произвёл
Anusha Godavarthy Surya
родитель
c9dd95bf6c
Коммит
683a942364
@@ -1476,6 +1476,8 @@ hipError_t hipGraphExecDestroy(hipGraphExec_t pGraphExec) {
|
||||
}
|
||||
hip::GraphExec* ge = reinterpret_cast<hip::GraphExec*>(pGraphExec);
|
||||
ge->release();
|
||||
amd::ScopedLock lock(GraphExec::graphExecSetLock_);
|
||||
GraphExec::graphExecSet_.erase(ge);
|
||||
HIP_RETURN(hipSuccess);
|
||||
}
|
||||
|
||||
|
||||
@@ -732,21 +732,11 @@ hipError_t GraphExec::Run(hipStream_t graph_launch_stream) {
|
||||
// we may not need to flush any caches.
|
||||
CallbackCommand->setEventScope(amd::Device::kCacheStateIgnore);
|
||||
amd::Event& event = CallbackCommand->event();
|
||||
if (!event.setCallback(CL_COMPLETE, GraphExec::DecrementRefCount, this)) {
|
||||
constexpr bool kBlocking = false;
|
||||
if (!event.setCallback(CL_COMPLETE, GraphExec::DecrementRefCount, this, kBlocking)) {
|
||||
return hipErrorInvalidHandle;
|
||||
}
|
||||
CallbackCommand->enqueue();
|
||||
// Add the new barrier to stall the stream, until the callback is done
|
||||
amd::Command::EventWaitList eventWaitList;
|
||||
eventWaitList.push_back(CallbackCommand);
|
||||
amd::Command* block_command = new amd::Marker(*launch_stream, kMarkerDisableFlush, eventWaitList);
|
||||
// we may not need to flush any caches.
|
||||
block_command->setEventScope(amd::Device::kCacheStateIgnore);
|
||||
if (block_command == nullptr) {
|
||||
return hipErrorInvalidValue;
|
||||
}
|
||||
block_command->enqueue();
|
||||
block_command->release();
|
||||
CallbackCommand->release();
|
||||
return status;
|
||||
}
|
||||
|
||||
@@ -750,12 +750,10 @@ struct GraphExec : public amd::ReferenceCountedObject, public Graph {
|
||||
~GraphExec() {
|
||||
for (auto stream : parallel_streams_) {
|
||||
if (stream != nullptr) {
|
||||
stream->finish();
|
||||
hip::Stream::Destroy(stream);
|
||||
constexpr bool kForceDestroy = true;
|
||||
hip::Stream::Destroy(stream, kForceDestroy);
|
||||
}
|
||||
}
|
||||
amd::ScopedLock lock(graphExecSetLock_);
|
||||
graphExecSet_.erase(this);
|
||||
if (DEBUG_CLR_GRAPH_PACKET_CAPTURE) {
|
||||
if (kernArgManager_ != nullptr) {
|
||||
kernArgManager_->release();
|
||||
|
||||
@@ -372,7 +372,7 @@ public:
|
||||
/// Check whether any blocking stream running
|
||||
static bool StreamCaptureBlocking();
|
||||
|
||||
static void Destroy(hip::Stream* stream);
|
||||
static void Destroy(hip::Stream* stream, bool forceDestroy = false);
|
||||
|
||||
virtual bool terminate();
|
||||
|
||||
|
||||
@@ -72,8 +72,9 @@ bool Stream::Create() {
|
||||
}
|
||||
|
||||
// ================================================================================================
|
||||
void Stream::Destroy(hip::Stream* stream) {
|
||||
void Stream::Destroy(hip::Stream* stream, bool forceDestroy) {
|
||||
stream->device_->RemoveStream(stream);
|
||||
stream->SetForceDestroy(forceDestroy);
|
||||
stream->release();
|
||||
stream = nullptr;
|
||||
}
|
||||
|
||||
@@ -239,7 +239,7 @@ bool HsaAmdSignalHandler(hsa_signal_value_t value, void* arg) {
|
||||
gpu->updateCommandsState(ts->command().GetBatchHead());
|
||||
|
||||
// Reset API callback signal. It will release AQL queue and start commands processing
|
||||
if (callback_signal.handle != 0) {
|
||||
if (callback_signal.handle != 0 && ts->GetBlocking()) {
|
||||
hsa_signal_subtract_relaxed(callback_signal, 1);
|
||||
}
|
||||
|
||||
@@ -515,10 +515,13 @@ hsa_signal_t VirtualGPU::HwQueueTracker::ActiveSignal(
|
||||
// If API callback is enabled, then use a blocking signal for AQL queue.
|
||||
// HSA signal will be acquired in SW and released after HSA signal callback
|
||||
if (ts->command().Callback() != nullptr) {
|
||||
ts->SetCallbackSignal(prof_signal->signal_);
|
||||
bool blocking = ts->command().Callback()->blocking_;
|
||||
ts->SetCallbackSignal(prof_signal->signal_, blocking);
|
||||
// Blocks AQL queue from further processing
|
||||
hsa_signal_add_relaxed(prof_signal->signal_, 1);
|
||||
init_value += 1;
|
||||
if (blocking) {
|
||||
hsa_signal_add_relaxed(prof_signal->signal_, 1);
|
||||
init_value += 1;
|
||||
}
|
||||
}
|
||||
gpu_.QueuedAsyncHandlers()++;
|
||||
hsa_status_t result = hsa_amd_signal_async_handler(prof_signal->signal_,
|
||||
|
||||
@@ -112,8 +112,9 @@ class Timestamp : public amd::ReferenceCountedObject {
|
||||
std::vector<ProfilingSignal*> signals_; //!< The list of all signals, associated with the TS
|
||||
hsa_signal_t callback_signal_; //!< Signal associated with a callback for possible later update
|
||||
amd::Monitor lock_; //!< Serialize timestamp update
|
||||
bool accum_ena_ = false; //!< If TRUE then the accumulation of execution times has started
|
||||
bool hasHwProfiling_ = false; //!< If TRUE then HwProfiling is enabled for the command
|
||||
bool accum_ena_ = false; //!< If TRUE then the accumulation of execution times has started
|
||||
bool hasHwProfiling_ = false; //!< If TRUE then HwProfiling is enabled for the command
|
||||
bool blocking_ = true; //!< If TRUE callback is blocking
|
||||
|
||||
Timestamp(const Timestamp&) = delete;
|
||||
Timestamp& operator=(const Timestamp&) = delete;
|
||||
@@ -177,12 +178,15 @@ class Timestamp : public amd::ReferenceCountedObject {
|
||||
VirtualGPU* gpu() const { return gpu_; }
|
||||
|
||||
//! Updates the callback signal
|
||||
void SetCallbackSignal(hsa_signal_t callback_signal) {
|
||||
void SetCallbackSignal(hsa_signal_t callback_signal, bool blocking = true) {
|
||||
callback_signal_ = callback_signal;
|
||||
blocking_ = blocking;
|
||||
}
|
||||
|
||||
//! Returns the callback signal
|
||||
hsa_signal_t GetCallbackSignal() const { return callback_signal_; }
|
||||
|
||||
//! Return if callback is blocking/non-blocking
|
||||
bool GetBlocking() { return blocking_; }
|
||||
};
|
||||
|
||||
class VirtualGPU : public device::VirtualDevice {
|
||||
|
||||
@@ -190,10 +190,11 @@ bool Event::resetStatus(int32_t status) {
|
||||
}
|
||||
|
||||
// ================================================================================================
|
||||
bool Event::setCallback(int32_t status, Event::CallBackFunction callback, void* data) {
|
||||
bool Event::setCallback(int32_t status, Event::CallBackFunction callback, void* data,
|
||||
bool blocking) {
|
||||
assert(status >= CL_COMPLETE && status <= CL_QUEUED && "invalid status");
|
||||
|
||||
CallBackEntry* entry = new CallBackEntry(status, callback, data);
|
||||
CallBackEntry* entry = new CallBackEntry(status, callback, data, blocking);
|
||||
if (entry == NULL) {
|
||||
return false;
|
||||
}
|
||||
|
||||
@@ -72,10 +72,10 @@ class Event : public RuntimeObject {
|
||||
|
||||
std::atomic<CallBackFunction> callback_; //!< callback function pointer.
|
||||
void* data_; //!< user data passed to the callback function.
|
||||
int32_t status_; //!< execution status triggering the callback.
|
||||
|
||||
CallBackEntry(int32_t status, CallBackFunction callback, void* data)
|
||||
: callback_(callback), data_(data), status_(status) {}
|
||||
int32_t status_; //!< execution status triggering the callback.
|
||||
bool blocking_; //!< TRUE if callback is blocking
|
||||
CallBackEntry(int32_t status, CallBackFunction callback, void* data, bool blocking)
|
||||
: callback_(callback), data_(data), status_(status), blocking_(blocking) {}
|
||||
};
|
||||
|
||||
public:
|
||||
@@ -173,7 +173,7 @@ class Event : public RuntimeObject {
|
||||
int32_t status() const { return status_.load(std::memory_order_relaxed); }
|
||||
|
||||
//! Insert the given \a callback into the callback stack.
|
||||
bool setCallback(int32_t status, CallBackFunction callback, void* data);
|
||||
bool setCallback(int32_t status, CallBackFunction callback, void* data, bool blocking = true);
|
||||
|
||||
/*! \brief Set the event status.
|
||||
*
|
||||
|
||||
@@ -57,8 +57,9 @@ HostQueue::HostQueue(Context& context, Device& device, cl_command_queue_properti
|
||||
}
|
||||
|
||||
bool HostQueue::terminate() {
|
||||
// incase of force destroy skip checking on the last command
|
||||
if (AMD_DIRECT_DISPATCH) {
|
||||
if (vdev() != nullptr) {
|
||||
if (!forceDestroy_ && vdev() != nullptr) {
|
||||
// If the queue still has the last command, then wait and release it
|
||||
// We must be in protected way to get last command when calling
|
||||
// awaitCompletion() where lastCommand will be released and possibly
|
||||
|
||||
@@ -294,6 +294,9 @@ class HostQueue : public CommandQueue {
|
||||
//! Get queue status
|
||||
bool GetQueueStatus() { return isActive_; }
|
||||
|
||||
//! Set the force destory to terminate queue without checking last command
|
||||
void SetForceDestroy(bool forceDestroy) { forceDestroy_ = forceDestroy; }
|
||||
|
||||
uint64_t getQueueID() {
|
||||
return thread_.vdev()->getQueueID();
|
||||
}
|
||||
@@ -305,6 +308,7 @@ private:
|
||||
|
||||
//! True if this command queue is active
|
||||
bool isActive_;
|
||||
bool forceDestroy_ = false; //!< Destroy the queue in the current state
|
||||
};
|
||||
|
||||
class DeviceQueue : public CommandQueue {
|
||||
|
||||
Ссылка в новой задаче
Block a user