SWDEV-404889 - Enable debugger interface in PAL
Add GPU_DEBUG_ENABLE to control ttpm behavior. If enabled, then HW will collect more debug info at some perf cost Change-Id: Icee0686b903a7b1bd483710b9d611877cd43c6aa
This commit is contained in:
@@ -1141,15 +1141,15 @@ bool Device::initializeHeapResources() {
|
||||
if (iDev()->Finalize(finalizeInfo) != Pal::Result::Success) {
|
||||
return false;
|
||||
}
|
||||
#ifdef PAL_DEBUGGER
|
||||
Pal::RuntimeSetup setup;
|
||||
setup.r_debug = reinterpret_cast<uint64_t>(_amdgpu_r_debug_ptr);
|
||||
if (iDev()->RegisterRuntimeState(&setup) != Pal::Result::Success) {
|
||||
Pal::HipRuntimeSetup setup {.pRdebug = _amdgpu_r_debug_ptr,
|
||||
.runtimeState = 1, // Always valid debug state
|
||||
.ttmpSetupHint = GPU_DEBUG_ENABLE};
|
||||
setup.pRdebug = _amdgpu_r_debug_ptr;
|
||||
if (iDev()->RegisterHipRuntimeState(setup) != Pal::Result::Success) {
|
||||
LogError("Couldn't register debug state from the loader!");
|
||||
// Note: ignore debug state error, since it's not a critical
|
||||
// error for the execution
|
||||
}
|
||||
#endif
|
||||
|
||||
heapInitComplete_ = true;
|
||||
|
||||
@@ -1211,10 +1211,11 @@ bool Device::initializeHeapResources() {
|
||||
// Find an offset in memory for the trap handler.
|
||||
// Loader returns an absolute address, but PAL accepts base + offset, hense find offset
|
||||
auto offset = program->GetTrapHandlerAddress() - memRef.pGpuMemory->Desc().gpuVirtAddr;
|
||||
#ifdef PAL_DEBUGGER
|
||||
// Bind trap handler to the kernel mode driver
|
||||
iDev()->BindTrapHandler(Pal::PipelineBindPoint::Compute, memRef.pGpuMemory, offset);
|
||||
#endif
|
||||
// Bind the trap handler's executable to the kernel mode driver
|
||||
result = iDev()->SetHipTrapHandler(memRef.pGpuMemory, offset, nullptr, 0);
|
||||
if (result != Pal::Result::Success) {
|
||||
LogError("KMD failed to setup the trap handler");
|
||||
}
|
||||
} else {
|
||||
LogError("Failed to make trap handler resident in memory");
|
||||
}
|
||||
@@ -2607,28 +2608,31 @@ bool Device::createBlitProgram() {
|
||||
result = false;
|
||||
}
|
||||
|
||||
#ifdef PAL_DEBUGGER
|
||||
if (settings().useLightning_) {
|
||||
const std::string TrapHandlerAsm = TrapHandlerCode;
|
||||
// Create a program for trap handler
|
||||
// note: It's not critical for runtime functionality to fail trap handler initialization
|
||||
trap_handler_ = new amd::Program(*context_, TrapHandlerAsm.c_str(), amd::Program::Assembly);
|
||||
if (trap_handler_ != nullptr) {
|
||||
auto asm_program = new amd::Program(*context_, TrapHandlerAsm.c_str(), amd::Program::Assembly);
|
||||
if (asm_program != nullptr) {
|
||||
std::vector<amd::Device*> devices;
|
||||
devices.push_back(this);
|
||||
std::string opt = "-cl-internal-kernel ";
|
||||
if (auto retval =
|
||||
trap_handler_->build(devices, opt.c_str(), nullptr, nullptr, false) != CL_SUCCESS) {
|
||||
asm_program->build(devices, opt.c_str(), nullptr, nullptr, false) != CL_SUCCESS) {
|
||||
DevLogPrintfError("Build failed for trap handler with error code: %d\n", retval);
|
||||
}
|
||||
if (!trap_handler_->load()) {
|
||||
DevLogPrintfError("Could not load the trap handler \n");
|
||||
asm_program->release();
|
||||
} else {
|
||||
if (asm_program->load()) {
|
||||
trap_handler_ = asm_program;
|
||||
} else {
|
||||
DevLogPrintfError("Could not load the trap handler \n");
|
||||
asm_program->release();
|
||||
}
|
||||
}
|
||||
} else {
|
||||
DevLogPrintfError("Trap handler creation failed\n");
|
||||
}
|
||||
}
|
||||
#endif
|
||||
return result;
|
||||
}
|
||||
|
||||
|
||||
@@ -247,6 +247,11 @@ class Device : public NullDevice {
|
||||
AqlPacketMgmt aql_packet_mgmt_; //!< AQL packets management class for debugger support
|
||||
QueueRecycleInfo() : counter_(1), engineType_(Pal::EngineTypeCompute), index_(0),
|
||||
queue_lock_("Queue lock for sharing", true) {}
|
||||
|
||||
//! Returns the aql packet list
|
||||
uintptr_t AqlPacketList() const {
|
||||
return reinterpret_cast<uintptr_t>(&aql_packet_mgmt_.aql_packets_);
|
||||
}
|
||||
};
|
||||
|
||||
//! Locks any access to the virtual GPUs
|
||||
|
||||
@@ -156,9 +156,7 @@ VirtualGPU::Queue* VirtualGPU::Queue::Create(VirtualGPU& gpu, Pal::QueueType que
|
||||
return nullptr;
|
||||
}
|
||||
addrQ = reinterpret_cast<address>(&info[1]);
|
||||
#ifdef PAL_DEBUGGER
|
||||
qCreateInfo.aqlPacketList = info->AqlPacketList();
|
||||
#endif
|
||||
result = palDev->CreateQueue(qCreateInfo, addrQ, &queue->iQueue_);
|
||||
if (result == Pal::Result::Success) {
|
||||
const_cast<Device&>(gpu.dev()).QueuePool().insert({queue->iQueue_, info});
|
||||
@@ -203,9 +201,7 @@ VirtualGPU::Queue* VirtualGPU::Queue::Create(VirtualGPU& gpu, Pal::QueueType que
|
||||
queue->aql_mgmt_ = &info->aql_packet_mgmt_;
|
||||
// Exclusive compute path
|
||||
addrQ = reinterpret_cast<address>(&queue[1]);
|
||||
#ifdef PAL_DEBUGGER
|
||||
qCreateInfo.aqlPacketList = info->AqlPacketList();
|
||||
#endif
|
||||
result = palDev->CreateQueue(qCreateInfo, addrQ, &queue->iQueue_);
|
||||
}
|
||||
if (result != Pal::Result::Success) {
|
||||
@@ -2668,9 +2664,7 @@ bool VirtualGPU::submitKernelInternal(const amd::NDRangeContainer& sizes,
|
||||
dispatchParam.wavesPerSh = 0;
|
||||
dispatchParam.useAtc = dev().settings().svmFineGrainSystem_ ? true : false;
|
||||
dispatchParam.kernargSegmentSize = hsaKernel.argsBufferSize();
|
||||
#ifdef PAL_DEBUGGER
|
||||
dispatchParam.aqlPacketIndex = aql_index;
|
||||
#endif
|
||||
// Run AQL dispatch in HW
|
||||
eventBegin(MainEngine);
|
||||
iCmd()->CmdDispatchAql(dispatchParam);
|
||||
|
||||
@@ -60,9 +60,6 @@ struct AqlPacketMgmt : public amd::EmbeddedObject {
|
||||
memset(aql_vgpus_, 0, sizeof(aql_vgpus_));
|
||||
}
|
||||
|
||||
//! Returns the aql packet list
|
||||
uintptr_t AqlPacketList() const { return reinterpret_cast<uintptr_t>(&aql_packets_); }
|
||||
|
||||
hsa_kernel_dispatch_packet_t aql_packets_[kAqlPacketsListSize]; //!< The list of AQL packets
|
||||
GpuEvent aql_events_[kAqlPacketsListSize]; //!< The list of gpu for each AQL packet
|
||||
VirtualGPU* aql_vgpus_[kAqlPacketsListSize]; //!< The list of vgpus which had submissions
|
||||
|
||||
@@ -237,6 +237,8 @@ release(bool, HIP_FORCE_DEV_KERNARG, 0, \
|
||||
"Force device mem for kernel args.") \
|
||||
release(bool, DEBUG_CLR_GRAPH_PACKET_CAPTURE, false, \
|
||||
"Enable/Disable graph packet capturing") \
|
||||
release(bool, GPU_DEBUG_ENABLE, false, \
|
||||
"Enables collection of extra info for debugger at some perf cost") \
|
||||
release(cstring, HIPRTC_COMPILE_OPTIONS_APPEND, "", \
|
||||
"Set compile options needed for hiprtc compilation") \
|
||||
release(cstring, HIPRTC_LINK_OPTIONS_APPEND, "", \
|
||||
|
||||
Reference in New Issue
Block a user