From 7cd6e366edaf9cbdbe92c5be5aeb58a8c838ec59 Mon Sep 17 00:00:00 2001 From: Sean Keely Date: Thu, 26 Apr 2018 01:54:03 -0500 Subject: [PATCH] Workaround SDMA poll packet preemption. Use async. signal handler to satisfy dependencies for SDMA blits. Change-Id: Ifa8d3ee6810509f400a568ca2387ac6ab3ab7c36 --- runtime/hsa-runtime/core/inc/amd_blit_sdma.h | 1 + runtime/hsa-runtime/core/inc/blit.h | 3 ++ .../core/runtime/amd_gpu_agent.cpp | 48 +++++++++++++++++++ 3 files changed, 52 insertions(+) diff --git a/runtime/hsa-runtime/core/inc/amd_blit_sdma.h b/runtime/hsa-runtime/core/inc/amd_blit_sdma.h index 463a83c34b..8bf05d006e 100644 --- a/runtime/hsa-runtime/core/inc/amd_blit_sdma.h +++ b/runtime/hsa-runtime/core/inc/amd_blit_sdma.h @@ -61,6 +61,7 @@ class BlitSdmaBase : public core::Blit { static const size_t kCopyPacketSize; static const size_t kMaxSingleCopySize; static const size_t kMaxSingleFillSize; + virtual bool isSDMA() const override { return true; } }; // RingIndexTy: 32/64-bit monotonic ring index, counting in bytes. diff --git a/runtime/hsa-runtime/core/inc/blit.h b/runtime/hsa-runtime/core/inc/blit.h index 48aebaa64b..571893615c 100644 --- a/runtime/hsa-runtime/core/inc/blit.h +++ b/runtime/hsa-runtime/core/inc/blit.h @@ -113,6 +113,9 @@ class Blit { /// @return HSA_STATUS_SUCCESS if the request to enable/disable profiling is /// successful. virtual hsa_status_t EnableProfiling(bool enable) = 0; + + /// @brief Blit operations use SDMA. + virtual bool isSDMA() const { return false; } }; } // namespace core diff --git a/runtime/hsa-runtime/core/runtime/amd_gpu_agent.cpp b/runtime/hsa-runtime/core/runtime/amd_gpu_agent.cpp index bc91b564de..7d5727e182 100644 --- a/runtime/hsa-runtime/core/runtime/amd_gpu_agent.cpp +++ b/runtime/hsa-runtime/core/runtime/amd_gpu_agent.cpp @@ -49,6 +49,8 @@ #include #include #include +#include +#include #include "core/inc/amd_aql_queue.h" #include "core/inc/amd_blit_kernel.h" @@ -599,6 +601,39 @@ hsa_status_t GpuAgent::PostToolsInit() { return HSA_STATUS_SUCCESS; } +struct DmaDeps_t { + void* dst; + const void* src; + size_t size; + core::Signal* out_signal; + core::Blit* blit; + std::unique_ptr> deps; +}; + +static bool DmaDeps(hsa_signal_value_t val, void* arg) { + DmaDeps_t* Args = (DmaDeps_t*)arg; + std::vector& deps = *(Args->deps.get()); + if (val != 0) return true; + for (int i = deps.size() - 1; i != 0; i--) { + if (deps[i - 1]->LoadRelaxed() != 0) { + deps.resize(i); + hsa_status_t err = core::Runtime::runtime_singleton_->SetAsyncSignalHandler( + core::Signal::Convert(deps.back()), HSA_SIGNAL_CONDITION_EQ, 0, DmaDeps, arg); + assert(err == HSA_STATUS_SUCCESS && "Failed to update dependency handler."); + return false; + } + } + deps.clear(); + hsa_status_t stat; + do { // Only ready to run copies are on the SDMA queue so if resources are busy they will soon be + // free. + stat = Args->blit->SubmitLinearCopyCommand(Args->dst, Args->src, Args->size, deps, + *(Args->out_signal)); + } while (stat != HSA_STATUS_SUCCESS); + delete Args; + return false; +} + hsa_status_t GpuAgent::DmaCopy(void* dst, const void* src, size_t size) { return blits_[BlitDevToDev]->SubmitLinearCopyCommand(dst, src, size); } @@ -624,6 +659,19 @@ hsa_status_t GpuAgent::DmaCopy(void* dst, core::Agent& dst_agent, out_signal.async_copy_agent(core::Agent::Convert(this->public_handle())); } + if ((dep_signals.size() != 0) && blit->isSDMA()) { + DmaDeps_t* Arg = new DmaDeps_t; + Arg->dst = dst; + Arg->src = src; + Arg->size = size; + Arg->out_signal = &out_signal; + Arg->blit = (*blit).get(); + Arg->deps.reset(new std::vector(std::move(dep_signals))); + hsa_status_t stat = core::Runtime::runtime_singleton_->SetAsyncSignalHandler( + core::Signal::Convert(Arg->deps->back()), HSA_SIGNAL_CONDITION_EQ, 0, DmaDeps, Arg); + return stat; + } + hsa_status_t stat = blit->SubmitLinearCopyCommand(dst, src, size, dep_signals, out_signal); return stat;