SWDEV-404889 - Enable debugger interface in PAL

Add GPU_DEBUG_ENABLE to control ttpm behavior. If enabled,
then HW will collect more debug info at some perf cost

Change-Id: Icee0686b903a7b1bd483710b9d611877cd43c6aa
This commit is contained in:
German
2023-11-22 14:25:25 -05:00
committed by German Andryeyev
parent dc8f66b86f
commit 7d661bc7df
5 changed files with 28 additions and 26 deletions
+21 -17
View File
@@ -1141,15 +1141,15 @@ bool Device::initializeHeapResources() {
if (iDev()->Finalize(finalizeInfo) != Pal::Result::Success) {
return false;
}
#ifdef PAL_DEBUGGER
Pal::RuntimeSetup setup;
setup.r_debug = reinterpret_cast<uint64_t>(_amdgpu_r_debug_ptr);
if (iDev()->RegisterRuntimeState(&setup) != Pal::Result::Success) {
Pal::HipRuntimeSetup setup {.pRdebug = _amdgpu_r_debug_ptr,
.runtimeState = 1, // Always valid debug state
.ttmpSetupHint = GPU_DEBUG_ENABLE};
setup.pRdebug = _amdgpu_r_debug_ptr;
if (iDev()->RegisterHipRuntimeState(setup) != Pal::Result::Success) {
LogError("Couldn't register debug state from the loader!");
// Note: ignore debug state error, since it's not a critical
// error for the execution
}
#endif
heapInitComplete_ = true;
@@ -1211,10 +1211,11 @@ bool Device::initializeHeapResources() {
// Find an offset in memory for the trap handler.
// Loader returns an absolute address, but PAL accepts base + offset, hense find offset
auto offset = program->GetTrapHandlerAddress() - memRef.pGpuMemory->Desc().gpuVirtAddr;
#ifdef PAL_DEBUGGER
// Bind trap handler to the kernel mode driver
iDev()->BindTrapHandler(Pal::PipelineBindPoint::Compute, memRef.pGpuMemory, offset);
#endif
// Bind the trap handler's executable to the kernel mode driver
result = iDev()->SetHipTrapHandler(memRef.pGpuMemory, offset, nullptr, 0);
if (result != Pal::Result::Success) {
LogError("KMD failed to setup the trap handler");
}
} else {
LogError("Failed to make trap handler resident in memory");
}
@@ -2607,28 +2608,31 @@ bool Device::createBlitProgram() {
result = false;
}
#ifdef PAL_DEBUGGER
if (settings().useLightning_) {
const std::string TrapHandlerAsm = TrapHandlerCode;
// Create a program for trap handler
// note: It's not critical for runtime functionality to fail trap handler initialization
trap_handler_ = new amd::Program(*context_, TrapHandlerAsm.c_str(), amd::Program::Assembly);
if (trap_handler_ != nullptr) {
auto asm_program = new amd::Program(*context_, TrapHandlerAsm.c_str(), amd::Program::Assembly);
if (asm_program != nullptr) {
std::vector<amd::Device*> devices;
devices.push_back(this);
std::string opt = "-cl-internal-kernel ";
if (auto retval =
trap_handler_->build(devices, opt.c_str(), nullptr, nullptr, false) != CL_SUCCESS) {
asm_program->build(devices, opt.c_str(), nullptr, nullptr, false) != CL_SUCCESS) {
DevLogPrintfError("Build failed for trap handler with error code: %d\n", retval);
}
if (!trap_handler_->load()) {
DevLogPrintfError("Could not load the trap handler \n");
asm_program->release();
} else {
if (asm_program->load()) {
trap_handler_ = asm_program;
} else {
DevLogPrintfError("Could not load the trap handler \n");
asm_program->release();
}
}
} else {
DevLogPrintfError("Trap handler creation failed\n");
}
}
#endif
return result;
}
+5
View File
@@ -247,6 +247,11 @@ class Device : public NullDevice {
AqlPacketMgmt aql_packet_mgmt_; //!< AQL packets management class for debugger support
QueueRecycleInfo() : counter_(1), engineType_(Pal::EngineTypeCompute), index_(0),
queue_lock_("Queue lock for sharing", true) {}
//! Returns the aql packet list
uintptr_t AqlPacketList() const {
return reinterpret_cast<uintptr_t>(&aql_packet_mgmt_.aql_packets_);
}
};
//! Locks any access to the virtual GPUs
-6
View File
@@ -156,9 +156,7 @@ VirtualGPU::Queue* VirtualGPU::Queue::Create(VirtualGPU& gpu, Pal::QueueType que
return nullptr;
}
addrQ = reinterpret_cast<address>(&info[1]);
#ifdef PAL_DEBUGGER
qCreateInfo.aqlPacketList = info->AqlPacketList();
#endif
result = palDev->CreateQueue(qCreateInfo, addrQ, &queue->iQueue_);
if (result == Pal::Result::Success) {
const_cast<Device&>(gpu.dev()).QueuePool().insert({queue->iQueue_, info});
@@ -203,9 +201,7 @@ VirtualGPU::Queue* VirtualGPU::Queue::Create(VirtualGPU& gpu, Pal::QueueType que
queue->aql_mgmt_ = &info->aql_packet_mgmt_;
// Exclusive compute path
addrQ = reinterpret_cast<address>(&queue[1]);
#ifdef PAL_DEBUGGER
qCreateInfo.aqlPacketList = info->AqlPacketList();
#endif
result = palDev->CreateQueue(qCreateInfo, addrQ, &queue->iQueue_);
}
if (result != Pal::Result::Success) {
@@ -2668,9 +2664,7 @@ bool VirtualGPU::submitKernelInternal(const amd::NDRangeContainer& sizes,
dispatchParam.wavesPerSh = 0;
dispatchParam.useAtc = dev().settings().svmFineGrainSystem_ ? true : false;
dispatchParam.kernargSegmentSize = hsaKernel.argsBufferSize();
#ifdef PAL_DEBUGGER
dispatchParam.aqlPacketIndex = aql_index;
#endif
// Run AQL dispatch in HW
eventBegin(MainEngine);
iCmd()->CmdDispatchAql(dispatchParam);
-3
View File
@@ -60,9 +60,6 @@ struct AqlPacketMgmt : public amd::EmbeddedObject {
memset(aql_vgpus_, 0, sizeof(aql_vgpus_));
}
//! Returns the aql packet list
uintptr_t AqlPacketList() const { return reinterpret_cast<uintptr_t>(&aql_packets_); }
hsa_kernel_dispatch_packet_t aql_packets_[kAqlPacketsListSize]; //!< The list of AQL packets
GpuEvent aql_events_[kAqlPacketsListSize]; //!< The list of gpu for each AQL packet
VirtualGPU* aql_vgpus_[kAqlPacketsListSize]; //!< The list of vgpus which had submissions
+2
View File
@@ -237,6 +237,8 @@ release(bool, HIP_FORCE_DEV_KERNARG, 0, \
"Force device mem for kernel args.") \
release(bool, DEBUG_CLR_GRAPH_PACKET_CAPTURE, false, \
"Enable/Disable graph packet capturing") \
release(bool, GPU_DEBUG_ENABLE, false, \
"Enables collection of extra info for debugger at some perf cost") \
release(cstring, HIPRTC_COMPILE_OPTIONS_APPEND, "", \
"Set compile options needed for hiprtc compilation") \
release(cstring, HIPRTC_LINK_OPTIONS_APPEND, "", \