SWDEV-283726 - Workaround for rocprof hang

This addresses the rocprof hang seen with direct dispatch. The
workaround queues the handler back if any of the signal value in the batch
is not decremented. To rememmber the last position in the list, we save
the parsed command in the current timestamp struct.

Change-Id: I02959e463cfe3cee83c54808ffd6e6f48f43b4e8


[ROCm/clr commit: e5e635f9bf]
This commit is contained in:
Saleel Kudchadker
2021-05-11 16:55:15 -07:00
orang tua 6cd1f5854e
melakukan eeea586a2a
5 mengubah file dengan 49 tambahan dan 1 penghapusan
+2
Melihat File
@@ -1192,6 +1192,8 @@ class VirtualDevice : public amd::HeapObject {
ShouldNotReachHere();
}
virtual void submitStreamOperation(amd::StreamOperationCommand& cmd) { ShouldNotReachHere(); }
virtual void profilerAttach(bool enable) = 0;
//! Get the blit manager object
device::BlitManager& blitMgr() const { return *blitMgr_; }
@@ -309,6 +309,9 @@ class VirtualGPU : public device::VirtualDevice, public CALGSLContext {
//! End the command profiling
void profilingEnd(amd::Command& command);
//! Attach profiler
void profilerAttach(bool enable = false) {}
//! Collect the profiling results
bool profilingCollectResults(CommandBatch* cb, //!< Command batch
const amd::Event* waitingEvent //!< Waiting event
@@ -355,6 +355,8 @@ class VirtualGPU : public device::VirtualDevice {
void flush(amd::Command* list = nullptr, bool wait = false);
void profilerAttach(bool enable = false) {}
//! Returns GPU device object associated with this kernel
const Device& dev() const { return gpuDevice_; }
@@ -147,12 +147,41 @@ void Timestamp::checkGpuTime() {
// ================================================================================================
bool HsaAmdSignalHandler(hsa_signal_value_t value, void* arg) {
Timestamp* ts = reinterpret_cast<Timestamp*>(arg);
amd::Thread* thread = amd::Thread::current();
if (!(thread != nullptr ||
((thread = new amd::HostThread()) != nullptr && thread == amd::Thread::current()))) {
return false;
}
amd::ScopedLock sl(ts->gpu()->execution());
if (ts->gpu()->isProfilerAttached()) {
amd::Command* head = ts->getParsedCommand();
if (head == nullptr) {
head = ts->command().GetBatchHead();
}
while (head != nullptr) {
if (head->data() != nullptr) {
Timestamp* headTs = reinterpret_cast<Timestamp*>(head->data());
ts->setParsedCommand(head);
for (auto it : headTs->Signals()) {
if (int64_t val = hsa_signal_load_relaxed(it->signal_) > 0) {
hsa_status_t result = hsa_amd_signal_async_handler(headTs->Signals()[0]->signal_,
HSA_SIGNAL_CONDITION_LT, kInitSignalValueOne,
&HsaAmdSignalHandler, ts);
if (HSA_STATUS_SUCCESS != result) {
LogError("hsa_amd_signal_async_handler() failed to requeue the handler!");
} else {
ClPrint(amd::LOG_INFO, amd::LOG_SIG, "Requeue handler : value(%d), timestamp(%p),"
"handle(0x%lx)", static_cast<uint32_t>(val), headTs,
headTs->HwProfiling() ? headTs->Signals()[0]->signal_.handle : 0);
}
return false;
}
}
}
head = head->getNext();
}
}
ClPrint(amd::LOG_INFO, amd::LOG_SIG, "Handler: value(%d), timestamp(%p), handle(0x%lx)",
static_cast<uint32_t>(value), arg, ts->HwProfiling() ? ts->Signals()[0]->signal_.handle : 0);
@@ -89,6 +89,7 @@ class Timestamp : public amd::HeapObject {
uint64_t end_;
VirtualGPU* gpu_; //!< Virtual GPU, associated with this timestamp
const amd::Command& command_; //!< Command, associated with this timestamp
amd::Command* parsedCommand_; //!< Command down the list, considering command_ as head
std::vector<ProfilingSignal*> signals_;
public:
@@ -96,7 +97,8 @@ class Timestamp : public amd::HeapObject {
: start_(std::numeric_limits<uint64_t>::max())
, end_(0)
, gpu_(gpu)
, command_(command) {}
, command_(command)
, parsedCommand_(nullptr) {}
~Timestamp() {}
@@ -131,6 +133,12 @@ class Timestamp : public amd::HeapObject {
//! Returns amd::command assigned to this timestamp
const amd::Command& command() const { return command_; }
//! Sets the parsed command
void setParsedCommand(amd::Command* command) { parsedCommand_ = command; }
//! Gets the parsed command
amd::Command* getParsedCommand() const { return parsedCommand_; }
//! Returns virtual GPU device, used with this timestamp
VirtualGPU* gpu() const { return gpu_; }
};
@@ -344,6 +352,9 @@ class VirtualGPU : public device::VirtualDevice {
Timestamp* timestamp() const { return timestamp_; }
void profilerAttach(bool enable = false) { profilerAttached_ = enable; }
bool isProfilerAttached() { return profilerAttached_; }
// } roc OpenCL integration
private:
bool dispatchAqlPacket(hsa_kernel_dispatch_packet_t* packet, uint16_t header,
@@ -413,6 +424,7 @@ class VirtualGPU : public device::VirtualDevice {
uint32_t cooperative_ : 1; //!< Cooperative launch is enabled
uint32_t addSystemScope_ : 1; //!< Insert a system scope to the next aql
uint32_t tracking_created_ : 1; //!< Enabled if tracking object was properly initialized
uint32_t profilerAttached_ : 1; //!< Indicates if profiler is attached
};
uint32_t state_;
};