fe098712df
ECR #399840 - OpenCL Runtime HW Debug support development - set aclBinary & event in the pre-dispatch callback function Affected files ... ... //depot/stg/opencl/drivers/opencl/runtime/device/gpu/gpudebugger.hpp#4 edit ... //depot/stg/opencl/drivers/opencl/runtime/device/gpu/gpudebugmanager.cpp#4 edit ... //depot/stg/opencl/drivers/opencl/runtime/device/gpu/gpuvirtual.cpp#347 edit ... //depot/stg/opencl/drivers/opencl/runtime/device/gpu/gpuvirtual.hpp#125 edit
365 строки
11 KiB
C++
365 строки
11 KiB
C++
/*******************************************************************************
|
|
*
|
|
* Copyright (c) 2014 Advanced Micro Devices, Inc. (unpublished)
|
|
*
|
|
* All rights reserved. This notice is intended as a precaution against
|
|
* inadvertent publication and does not imply publication or any waiver
|
|
* of confidentiality. The year included in the foregoing notice is the
|
|
* year of creation of the work.
|
|
*
|
|
******************************************************************************/
|
|
|
|
#include "gpudebugmanager.hpp"
|
|
#include "gpudevice.hpp"
|
|
#include "platform/commandqueue.hpp"
|
|
|
|
#include "device/device.hpp"
|
|
#include "device/gpu/gpumemory.hpp"
|
|
#include <iostream>
|
|
#include <sstream>
|
|
#include <fstream>
|
|
|
|
namespace gpu {
|
|
|
|
class VirtualGPU;
|
|
class Device;
|
|
class Memory;
|
|
|
|
/*
|
|
***************************************************************************
|
|
* Implementation of GPU Debug Manager class
|
|
***************************************************************************
|
|
*/
|
|
|
|
GpuDebugManager::GpuDebugManager(amd::Device* device)
|
|
: HwDebugManager(device)
|
|
, vGpu_(NULL)
|
|
, debugMessages_(0)
|
|
, addressWatch_(NULL)
|
|
, addressWatchSize_(0)
|
|
, oclEventHandle_(NULL)
|
|
{
|
|
// Initialize the exception info and the kernel execution mode
|
|
excpPolicy_.exceptionMask = 0x0;
|
|
excpPolicy_.waveAction = CL_DBG_WAVES_RESUME;
|
|
excpPolicy_.hostAction = CL_DBG_HOST_IGNORE;
|
|
excpPolicy_.waveMode = CL_DBG_WAVEMODE_BROADCAST;
|
|
|
|
execMode_.ui32All = 0;
|
|
|
|
rtTrapHandlerInfo_.trap_.trapHandler_ = NULL;
|
|
rtTrapHandlerInfo_.trap_.trapBuffer_ = NULL;
|
|
|
|
aqlPacket_ = (hsa_kernel_dispatch_packet_t *) NULL;
|
|
|
|
return;
|
|
}
|
|
|
|
GpuDebugManager::~GpuDebugManager()
|
|
{
|
|
if (NULL != addressWatch_) {
|
|
delete [] addressWatch_;
|
|
}
|
|
}
|
|
|
|
void
|
|
GpuDebugManager::executePreDispatchCallBack(void* aqlPacket,
|
|
void* toolInfo)
|
|
{
|
|
DebugToolInfo* info = reinterpret_cast<DebugToolInfo*>(toolInfo);
|
|
|
|
aqlPacket_ = reinterpret_cast<hsa_kernel_dispatch_packet_t*>(aqlPacket);
|
|
|
|
// Only if the pre-dispatch callback is set, will we update cache
|
|
// flush configuration and build the memory descriptor.
|
|
if (NULL != preDispatchCallBackFunc_) {
|
|
// Build the scratch memory descriptor
|
|
device()->gslCtx()->BuildScratchBufferResource(debugInfo_.scratchMemoryDescriptor_,
|
|
info->scratchAddress_,
|
|
info->scratchSize_);
|
|
|
|
// Build the global memory descriptor
|
|
device()->gslCtx()->BuildHeapBufferResource(debugInfo_.globalMemoryDescriptor_,
|
|
info->globalAddress_);
|
|
|
|
// // for invalidate cache (BuildEndOfKernelNotifyCommands)
|
|
// aqlPacket->release_fence_scope = 2;
|
|
|
|
aclBinary_ = reinterpret_cast<void*>(info->aclBinary_);
|
|
oclEventHandle_ = reinterpret_cast<void*>(as_cl(info->event_));
|
|
|
|
cl_device_id clDeviceId = as_cl(device_);
|
|
preDispatchCallBackFunc_(clDeviceId,
|
|
oclEventHandle_,
|
|
aqlPacket_,
|
|
aclBinary_,
|
|
deviceTrapInfo_,
|
|
preDispatchCallBackArgs_);
|
|
}
|
|
|
|
// Copy the various info set by the debugger/profiler to the tool info structure
|
|
setupTrapInformation(info);
|
|
}
|
|
|
|
void
|
|
GpuDebugManager::executePostDispatchCallBack()
|
|
{
|
|
if (NULL != postDispatchCallBackFunc_) {
|
|
cl_device_id clDeviceId = as_cl(device_);
|
|
postDispatchCallBackFunc_(clDeviceId,
|
|
aqlPacket_->completion_signal.handle,
|
|
postDispatchCallBackArgs_);
|
|
}
|
|
}
|
|
|
|
|
|
cl_int
|
|
GpuDebugManager::registerDebugger(amd::Context* context, uintptr_t messageStorage)
|
|
{
|
|
//! @todo: obtain the global mutex of HW debug to make sure only one debugger process exist
|
|
|
|
if (!device()->settings().enableHwDebug_) {
|
|
LogError("debugmanager: Register debugger error - HW DEBUG is not enable");
|
|
return CL_DEBUGGER_REGISTER_FAILURE_AMD;
|
|
}
|
|
|
|
// first time register - set the message storage, flush queue and enable hw debug
|
|
if (!isRegistered()) {
|
|
debugMessages_ = messageStorage;
|
|
dbgMsgBufferReady_ = true;
|
|
isRegistered_ = false;
|
|
}
|
|
|
|
context_ = context;
|
|
|
|
return CL_SUCCESS;
|
|
}
|
|
|
|
void
|
|
GpuDebugManager::unregisterDebugger()
|
|
{
|
|
if (isRegistered()) {
|
|
//! @todo: release the global mutex of HW debug
|
|
|
|
// reset the debugger registration flag
|
|
isRegistered_ = false;
|
|
dbgMsgBufferReady_ = false;
|
|
|
|
context_ = NULL;
|
|
}
|
|
}
|
|
|
|
cl_int
|
|
GpuDebugManager::registerDebuggerOnQueue(device::VirtualDevice* vDevice)
|
|
{
|
|
if (!isMsgBufferReady()) {
|
|
return CL_DEBUGGER_REGISTER_FAILURE_AMD;
|
|
}
|
|
|
|
if (isRegistered()) { // The debugger has already been registered,
|
|
return CL_SUCCESS; // nothing to be done
|
|
}
|
|
|
|
VirtualGPU* vGpu = reinterpret_cast<gpu::VirtualGPU*>(vDevice);
|
|
|
|
// populate the fields in the debugMessages structure used by the GPU exception notification
|
|
if (vGpu->RegisterHwDebugger(debugMessages_)) {
|
|
vGpu_ = vGpu;
|
|
isRegistered_ = true;
|
|
return CL_SUCCESS;
|
|
}
|
|
|
|
return CL_DEBUGGER_REGISTER_FAILURE_AMD;
|
|
}
|
|
|
|
void
|
|
GpuDebugManager::flushCache(uint32_t mask)
|
|
{
|
|
HwDbgGpuCacheMask cacheMask(mask);
|
|
device()->xferQueue()->flushCuCaches(cacheMask);
|
|
}
|
|
|
|
|
|
void
|
|
GpuDebugManager::setupTrapInformation(DebugToolInfo* toolInfo)
|
|
{
|
|
toolInfo->scratchAddress_ = 0;
|
|
toolInfo->scratchSize_ = 0;
|
|
toolInfo->globalAddress_ = 0;
|
|
toolInfo->sqPerfcounterEnable_ = false;
|
|
|
|
// Set up trap related info in the kernel info structure to be
|
|
// used in the kernel dispatch.
|
|
toolInfo->exceptionMask_ = excpPolicy_.exceptionMask;
|
|
toolInfo->gpuSingleStepMode_ = execMode_.gpuSingleStepMode;
|
|
toolInfo->monitorMode_ = execMode_.monitorMode;
|
|
|
|
// The order of these three bits is determined by the definition
|
|
// of the register COMPUTE_DISPATCH_INITIATOR
|
|
toolInfo->cacheDisableMask_ = ((execMode_.disableL1Scalar << 2)
|
|
| (execMode_.disableL2Cache << 1)
|
|
| (execMode_.disableL1Vector));
|
|
|
|
toolInfo->reservedCuNum_ = execMode_.reservedCuNum;
|
|
|
|
toolInfo->trapHandler_ =
|
|
as_amd(reinterpret_cast<cl_mem>(deviceTrapInfo_[kDebugTrapHandlerLocation]));
|
|
toolInfo->trapBuffer_ =
|
|
as_amd(reinterpret_cast<cl_mem>(deviceTrapInfo_[kDebugTrapBufferLocation]));
|
|
}
|
|
|
|
|
|
void
|
|
GpuDebugManager::getPacketAmdInfo(
|
|
const void* aqlCodeInfo,
|
|
void* packetInfo) const
|
|
|
|
{
|
|
const AqlCodeInfo* codeInfo =
|
|
reinterpret_cast<const AqlCodeInfo*>(aqlCodeInfo);
|
|
|
|
const amd_kernel_code_t* hostAqlCode = codeInfo->aqlCode_;
|
|
|
|
PacketAmdInfo* packet =
|
|
reinterpret_cast<PacketAmdInfo*>(packetInfo);
|
|
|
|
const amd_kernel_code_t* akc = hostAqlCode;
|
|
|
|
packet->numberOfSgprs_ = akc->wavefront_sgpr_count;
|
|
packet->numberOfVgprs_ = akc->workitem_vgpr_count;
|
|
|
|
// use mapped kernel_object_address for host accessing of ISA buffer
|
|
packet->pointerToIsaBuffer_ = (char*) (hostAqlCode) +
|
|
akc->kernel_code_entry_byte_offset;
|
|
|
|
packet->scratchBufferWaveOffset_ =
|
|
akc->debug_wavefront_private_segment_offset_sgpr;
|
|
|
|
packet->sizeOfIsaBuffer_ = codeInfo->aqlCodeSize_;
|
|
|
|
packet->sizeOfStaticGroupMemory_ = akc->workgroup_group_segment_byte_size;
|
|
|
|
// The trap_reserved_vgpr_index will be 4 less the original
|
|
// This value must be used only by the debugger
|
|
packet->trapReservedVgprIndex_ = akc->workitem_vgpr_count - NumberReserveVgprs;
|
|
}
|
|
|
|
DebugEvent
|
|
GpuDebugManager::createDebugEvent(
|
|
const bool autoReset)
|
|
{
|
|
if (!isRegistered()) {
|
|
LogError("debugmanager: Failed to flush cache - hw debug is not available");
|
|
return 0;
|
|
}
|
|
|
|
|
|
// create the event object
|
|
osEventHandle shaderEvent = osEventCreate(!autoReset);
|
|
|
|
// event object has been created, set the initial state
|
|
if (shaderEvent != 0) {
|
|
|
|
osEventReset(shaderEvent); // initial state is non-signaled
|
|
|
|
if (vGpu_->ExceptionNotification(shaderEvent)) {
|
|
isRegistered_ = true;
|
|
return shaderEvent;
|
|
}
|
|
}
|
|
|
|
return 0;
|
|
}
|
|
|
|
cl_int
|
|
GpuDebugManager::waitDebugEvent(
|
|
DebugEvent pEvent,
|
|
uint32_t timeOut) const
|
|
{
|
|
if (osEventTimedWait(pEvent, timeOut)) {
|
|
return CL_SUCCESS;
|
|
}
|
|
else {
|
|
return CL_EVENT_TIMEOUT_AMD;
|
|
}
|
|
}
|
|
|
|
void
|
|
GpuDebugManager::destroyDebugEvent(DebugEvent* pEvent)
|
|
{
|
|
osEventDestroy(*pEvent);
|
|
*pEvent = 0;
|
|
|
|
vGpu_->ExceptionNotification(0);
|
|
|
|
}
|
|
|
|
void
|
|
GpuDebugManager::wavefrontControl(
|
|
uint32_t waveAction,
|
|
uint32_t waveMode,
|
|
uint32_t trapId,
|
|
void* waveAddr) const
|
|
{
|
|
device()->gslCtx()->executeSqCommand(waveAction, waveMode, trapId, waveAddr);
|
|
}
|
|
|
|
void
|
|
GpuDebugManager::setAddressWatch(
|
|
uint32_t numWatchPoints,
|
|
void** watchAddress,
|
|
uint64_t* watchMask,
|
|
uint64_t* watchMode,
|
|
DebugEvent* event)
|
|
{
|
|
size_t requiredSize = numWatchPoints * sizeof(HwDbgAddressWatch);
|
|
|
|
// previously allocated size is not big enough, allocate new memory
|
|
if (addressWatchSize_ < requiredSize) {
|
|
if (NULL != addressWatch_) { // free the smaller address watch storage
|
|
delete [] addressWatch_;
|
|
}
|
|
addressWatch_ = new HwDbgAddressWatch[numWatchPoints];
|
|
addressWatchSize_ = requiredSize;
|
|
}
|
|
|
|
// fill in the address watch structure
|
|
memset(addressWatch_, 0, addressWatchSize_);
|
|
|
|
for (uint32_t i = 0; i < numWatchPoints; i++)
|
|
{
|
|
amd::Memory* watchMem = as_amd(reinterpret_cast<cl_mem>(watchAddress[i]));
|
|
Memory* watchMemAddress = device()->getGpuMemory(watchMem);
|
|
|
|
addressWatch_[i].watchAddress_ = reinterpret_cast<void*>(watchMemAddress->vmAddress());
|
|
addressWatch_[i].watchMask_ = watchMask[i];
|
|
addressWatch_[i].watchMode_ = (cl_dbg_address_watch_mode_amd) watchMode[i];
|
|
addressWatch_[i].event_ = (0 != event) ? event[i] : 0;
|
|
}
|
|
|
|
// setup the watch addresses
|
|
device()->gslCtx()->setAddressWatch(numWatchPoints, (void*) addressWatch_);
|
|
|
|
}
|
|
|
|
void
|
|
GpuDebugManager::setGlobalMemory(
|
|
amd::Memory* memObj,
|
|
uint32_t offset,
|
|
void* srcPtr,
|
|
uint32_t size)
|
|
{
|
|
gpu::Memory* globalMem = device()->getGpuMemory(memObj);
|
|
|
|
address mappedMem = static_cast<address>(globalMem->map(NULL,0));
|
|
assert(mappedMem != 0);
|
|
|
|
void* dest_ptr = reinterpret_cast<void*>(mappedMem + offset);
|
|
memcpy(dest_ptr, srcPtr, size);
|
|
|
|
globalMem->unmap(NULL);
|
|
}
|
|
|
|
|
|
} // namespace gpu
|