SWDEV-486602 - Add tracking of HSA handlers
Add an atomic counter to track the outstanding HSA handlers. Wait on CPU for the callbacks if the number exceeds the value in DEBUG_HIP_BLOCK_SYNC env variable. Change-Id: I95dc8c4bf0258c7e59411b7504220709ed6898c5
Этот коммит содержится в:
@@ -359,14 +359,9 @@ hipError_t hipStreamSynchronize_common(hipStream_t stream) {
|
||||
}
|
||||
bool wait = (stream == nullptr || stream == hipStreamLegacy) ? true : false;
|
||||
auto hip_stream = hip::getStream(stream, wait);
|
||||
bool wait_for_cpu = false;
|
||||
// Force blocking wait if requested. That allows to avoid a build up of unreleased CPU commands
|
||||
if (DEBUG_HIP_BLOCK_SYNC != 0) {
|
||||
static std::atomic<uint64_t> flush = 0;
|
||||
wait_for_cpu = ((++flush % DEBUG_HIP_BLOCK_SYNC) == 0) ? true : false;
|
||||
}
|
||||
|
||||
// Wait for the current host queue
|
||||
hip_stream->finish(wait_for_cpu);
|
||||
hip_stream->finish();
|
||||
// Release freed memory for all memory pools on the device
|
||||
hip_stream->GetDevice()->ReleaseFreedMemory();
|
||||
return hipSuccess;
|
||||
|
||||
@@ -1332,6 +1332,9 @@ class VirtualDevice : public amd::HeapObject {
|
||||
const std::string& kernelName,
|
||||
amd::AccumulateCommand* vcmd = nullptr) = 0;
|
||||
|
||||
//! Returns the number of outstanding HSA async handlers
|
||||
std::atomic<uint64_t>& QueuedAsyncHandlers() const { return queued_async_handlers_; }
|
||||
|
||||
private:
|
||||
//! Disable default copy constructor
|
||||
VirtualDevice& operator=(const VirtualDevice&);
|
||||
@@ -1347,6 +1350,7 @@ class VirtualDevice : public amd::HeapObject {
|
||||
|
||||
amd::Monitor execution_; //!< Lock to serialise access to all device objects
|
||||
uint index_; //!< The virtual device unique index
|
||||
mutable std::atomic<uint64_t> queued_async_handlers_ = 0; //!< Outstanding HSA async handlers
|
||||
};
|
||||
|
||||
} // namespace amd::device
|
||||
|
||||
@@ -232,11 +232,14 @@ bool HsaAmdSignalHandler(hsa_signal_value_t value, void* arg) {
|
||||
// Save callback signal
|
||||
hsa_signal_t callback_signal = ts->GetCallbackSignal();
|
||||
|
||||
auto gpu = ts->gpu();
|
||||
gpu->QueuedAsyncHandlers()--;
|
||||
|
||||
// Reset last used SDMA engine mask
|
||||
ts->gpu()->setLastUsedSdmaEngine(0);
|
||||
gpu->setLastUsedSdmaEngine(0);
|
||||
|
||||
// Update the batch, since signal is complete
|
||||
ts->gpu()->updateCommandsState(ts->command().GetBatchHead());
|
||||
gpu->updateCommandsState(ts->command().GetBatchHead());
|
||||
|
||||
// Reset API callback signal. It will release AQL queue and start commands processing
|
||||
if (callback_signal.handle != 0) {
|
||||
@@ -474,6 +477,7 @@ hsa_signal_t VirtualGPU::HwQueueTracker::ActiveSignal(
|
||||
hsa_signal_add_relaxed(prof_signal->signal_, 1);
|
||||
init_value += 1;
|
||||
}
|
||||
gpu_.QueuedAsyncHandlers()++;
|
||||
hsa_status_t result = hsa_amd_signal_async_handler(prof_signal->signal_,
|
||||
HSA_SIGNAL_CONDITION_LT, init_value, &HsaAmdSignalHandler, ts);
|
||||
if (HSA_STATUS_SUCCESS != result) {
|
||||
|
||||
@@ -141,6 +141,11 @@ void HostQueue::finish(bool cpu_wait) {
|
||||
"Can't claim the queue is finished with the active batch!");
|
||||
return;
|
||||
}
|
||||
// Force blocking wait if requested. That allows to avoid a build up of unreleased CPU commands
|
||||
if ((DEBUG_HIP_BLOCK_SYNC > 0) &&
|
||||
(vdev()->QueuedAsyncHandlers().load() > DEBUG_HIP_BLOCK_SYNC)) {
|
||||
cpu_wait = true;
|
||||
}
|
||||
}
|
||||
// Force marker if the batch wasn't sent for CPU update or fence is dirty
|
||||
if (nullptr == command || (GetSubmissionBatch() != nullptr) || vdev()->isFenceDirty()) {
|
||||
|
||||
Ссылка в новой задаче
Block a user