Files
rocm-systems/rocclr/runtime/device/pal/paldebugmanager.cpp
T
foreman 59ed7d2445 P4 to Git Change 1469850 by gandryey@gera-w8 on 2017/10/13 13:56:50
SWDEV-79445 - OCL generic changes and code clean-up
	- Remove obsolete/unused code

Affected files ...

... //depot/stg/opencl/drivers/opencl/runtime/device/pal/paldebugmanager.cpp#3 edit
... //depot/stg/opencl/drivers/opencl/runtime/device/pal/palvirtual.cpp#62 edit
... //depot/stg/opencl/drivers/opencl/runtime/device/pal/palvirtual.hpp#34 edit
2017-10-13 14:10:40 -04:00

354 řádky
11 KiB
C++

//
// Copyright (c) 2015 Advanced Micro Devices, Inc. All rights reserved.
//
#include "platform/commandqueue.hpp"
#include "device/device.hpp"
#include "device/pal/paldevice.hpp"
#include "device/pal/palmemory.hpp"
#include "device/pal/paltrap.hpp"
#include "device/pal/paldebugmanager.hpp"
#include <iostream>
#include <sstream>
#include <fstream>
namespace pal {
class VirtualGPU;
class Device;
class Memory;
/*
***************************************************************************
* Implementation of GPU Debug Manager class
***************************************************************************
*/
GpuDebugManager::GpuDebugManager(amd::Device* device)
: HwDebugManager(device),
vGpu_(nullptr),
debugMessages_(0),
addressWatch_(nullptr),
addressWatchSize_(0),
oclEventHandle_(nullptr) {
// Initialize the exception info and the kernel execution mode
excpPolicy_.exceptionMask = 0x0;
excpPolicy_.waveAction = CL_DBG_WAVES_RESUME;
excpPolicy_.hostAction = CL_DBG_HOST_IGNORE;
excpPolicy_.waveMode = CL_DBG_WAVEMODE_BROADCAST;
execMode_.ui32All = 0;
rtTrapHandlerInfo_.trap_.trapHandler_ = nullptr;
rtTrapHandlerInfo_.trap_.trapBuffer_ = nullptr;
aqlPacket_ = (hsa_kernel_dispatch_packet_t*)nullptr;
return;
}
GpuDebugManager::~GpuDebugManager() {
if (nullptr != addressWatch_) {
delete[] addressWatch_;
}
}
void GpuDebugManager::executePreDispatchCallBack(void* aqlPacket, void* toolInfo) {
DebugToolInfo* info = reinterpret_cast<DebugToolInfo*>(toolInfo);
aqlPacket_ = reinterpret_cast<hsa_kernel_dispatch_packet_t*>(aqlPacket);
Unimplemented();
// Only if the pre-dispatch callback is set, will we update cache
// flush configuration and build the memory descriptor.
if (nullptr != preDispatchCallBackFunc_) {
/*
// Build the scratch memory descriptor
device()->gslCtx()->BuildScratchBufferResource(debugInfo_.scratchMemoryDescriptor_,
info->scratchAddress_,
info->scratchSize_);
// Build the global memory descriptor
device()->gslCtx()->BuildHeapBufferResource(debugInfo_.globalMemoryDescriptor_,
info->globalAddress_);
*/
// // for invalidate cache (BuildEndOfKernelNotifyCommands)
// aqlPacket->release_fence_scope = 2;
aclBinary_ = reinterpret_cast<void*>(info->aclBinary_);
oclEventHandle_ = reinterpret_cast<void*>(as_cl(info->event_));
cl_device_id clDeviceId = as_cl(device_);
preDispatchCallBackFunc_(clDeviceId, oclEventHandle_, aqlPacket_, aclBinary_,
preDispatchCallBackArgs_);
}
// setup the trap handler information only if the debugger has been registered
if (isRegistered()) {
// Copy the various info set by the debugger/profiler to the tool info structure
setupTrapInformation(info);
}
}
void GpuDebugManager::executePostDispatchCallBack() {
if (nullptr != postDispatchCallBackFunc_) {
cl_device_id clDeviceId = as_cl(device_);
postDispatchCallBackFunc_(clDeviceId, aqlPacket_->completion_signal.handle,
postDispatchCallBackArgs_);
}
}
//! Map the kernel code for host access
void GpuDebugManager::mapKernelCode(void* aqlCodeInfo) const {
AqlCodeInfo* codeInfo = reinterpret_cast<AqlCodeInfo*>(aqlCodeInfo);
codeInfo->aqlCode_ = reinterpret_cast<amd_kernel_code_t*>(aqlCodeAddr_);
codeInfo->aqlCodeSize_ = aqlCodeSize_;
}
cl_int GpuDebugManager::registerDebugger(amd::Context* context, uintptr_t messageStorage) {
if (!device()->settings().enableHwDebug_) {
LogError("debugmanager: Register debugger error - HW DEBUG is not enable");
return CL_DEBUGGER_REGISTER_FAILURE_AMD;
}
// first time register - set the message storage, flush queue and enable hw debug
if (!isRegistered()) {
debugMessages_ = messageStorage;
Unimplemented();
/*
if (!device()->gslCtx()->registerHwDebugger(debugMessages_)) {
LogError("debugmanager: Register debugger failed");
return CL_OUT_OF_RESOURCES;
}
*/
isRegistered_ = true;
if (CL_SUCCESS != createRuntimeTrapHandler()) {
LogError("debugmanager: Create runtime trap handler failed");
return CL_OUT_OF_RESOURCES;
}
}
context_ = context;
return CL_SUCCESS;
}
void GpuDebugManager::unregisterDebugger() {
if (isRegistered()) {
// reset the debugger registration flag
isRegistered_ = false;
context_ = nullptr;
}
}
void GpuDebugManager::flushCache(uint32_t mask) {
HwDbgGpuCacheMask cacheMask(mask);
//device()->xferQueue()->flushCuCaches(cacheMask);
}
void GpuDebugManager::setupTrapInformation(DebugToolInfo* toolInfo) {
toolInfo->scratchAddress_ = 0;
toolInfo->scratchSize_ = 0;
toolInfo->globalAddress_ = 0;
toolInfo->sqPerfcounterEnable_ = false;
// Set up trap related info in the kernel info structure to be
// used in the kernel dispatch.
toolInfo->exceptionMask_ = excpPolicy_.exceptionMask;
toolInfo->gpuSingleStepMode_ = execMode_.gpuSingleStepMode;
toolInfo->monitorMode_ = execMode_.monitorMode;
// The order of these three bits is determined by the definition
// of the register COMPUTE_DISPATCH_INITIATOR
toolInfo->cacheDisableMask_ = ((execMode_.disableL1Scalar << 2) |
(execMode_.disableL2Cache << 1) | (execMode_.disableL1Vector));
toolInfo->reservedCuNum_ = execMode_.reservedCuNum;
toolInfo->trapHandler_ = rtTrapInfo_[kDebugTrapHandlerLocation];
toolInfo->trapBuffer_ = rtTrapInfo_[kDebugTrapBufferLocation];
}
void GpuDebugManager::getPacketAmdInfo(const void* aqlCodeInfo, void* packetInfo) const
{
const AqlCodeInfo* codeInfo = reinterpret_cast<const AqlCodeInfo*>(aqlCodeInfo);
const amd_kernel_code_t* hostAqlCode = codeInfo->aqlCode_;
PacketAmdInfo* packet = reinterpret_cast<PacketAmdInfo*>(packetInfo);
const amd_kernel_code_t* akc = hostAqlCode;
packet->numberOfSgprs_ = akc->wavefront_sgpr_count;
packet->numberOfVgprs_ = akc->workitem_vgpr_count;
// use mapped kernel_object_address for host accessing of ISA buffer
packet->pointerToIsaBuffer_ = (char*)(hostAqlCode) + akc->kernel_code_entry_byte_offset;
packet->scratchBufferWaveOffset_ = akc->debug_wavefront_private_segment_offset_sgpr;
packet->sizeOfIsaBuffer_ = codeInfo->aqlCodeSize_;
packet->sizeOfStaticGroupMemory_ = akc->workgroup_group_segment_byte_size;
// The trap_reserved_vgpr_index will be 4 less the original
// This value must be used only by the debugger
packet->trapReservedVgprIndex_ = akc->workitem_vgpr_count - NumberReserveVgprs;
}
DebugEvent GpuDebugManager::createDebugEvent(const bool autoReset) {
Unimplemented();
/*
// create the event object
osEventHandle shaderEvent = osEventCreate(!autoReset);
// event object has been created, set the initial state
if (shaderEvent != 0) {
osEventReset(shaderEvent); // initial state is non-signaled
if (device()->gslCtx()->exceptionNotification(shaderEvent)) {
return shaderEvent;
}
}
*/
return 0;
}
cl_int GpuDebugManager::waitDebugEvent(DebugEvent pEvent, uint32_t timeOut) const {
Unimplemented();
/*
if (osEventTimedWait(pEvent, timeOut)) {
return CL_SUCCESS;
}
else {
return CL_EVENT_TIMEOUT_AMD;
}
*/
return CL_SUCCESS;
}
void GpuDebugManager::destroyDebugEvent(DebugEvent* pEvent) {
Unimplemented();
/*
osEventDestroy(*pEvent);
*pEvent = 0;
device()->gslCtx()->exceptionNotification(0);
*/
}
void GpuDebugManager::wavefrontControl(uint32_t waveAction, uint32_t waveMode, uint32_t trapId,
void* waveAddr) const {
Unimplemented();
// device()->gslCtx()->executeSqCommand(waveAction, waveMode, trapId, waveAddr);
}
void GpuDebugManager::setAddressWatch(uint32_t numWatchPoints, void** watchAddress,
uint64_t* watchMask, uint64_t* watchMode, DebugEvent* event) {
size_t requiredSize = numWatchPoints * sizeof(HwDbgAddressWatch);
// previously allocated size is not big enough, allocate new memory
if (addressWatchSize_ < requiredSize) {
if (nullptr != addressWatch_) { // free the smaller address watch storage
delete[] addressWatch_;
}
addressWatch_ = new HwDbgAddressWatch[numWatchPoints];
addressWatchSize_ = requiredSize;
}
// fill in the address watch structure
memset(addressWatch_, 0, addressWatchSize_);
for (uint32_t i = 0; i < numWatchPoints; i++) {
amd::Memory* watchMem = as_amd(reinterpret_cast<cl_mem>(watchAddress[i]));
Memory* watchMemAddress = device()->getGpuMemory(watchMem);
addressWatch_[i].watchAddress_ = reinterpret_cast<void*>(watchMemAddress->vmAddress());
addressWatch_[i].watchMask_ = watchMask[i];
addressWatch_[i].watchMode_ = (cl_dbg_address_watch_mode_amd)watchMode[i];
addressWatch_[i].event_ = (0 != event) ? event[i] : 0;
}
Unimplemented();
// setup the watch addresses
// device()->gslCtx()->setAddressWatch(numWatchPoints, (void*) addressWatch_);
}
void GpuDebugManager::setGlobalMemory(amd::Memory* memObj, uint32_t offset, void* srcPtr,
uint32_t size) {
Memory* globalMem = device()->getGpuMemory(memObj);
address mappedMem = static_cast<address>(globalMem->map(nullptr, 0));
assert(mappedMem != 0);
void* dest_ptr = reinterpret_cast<void*>(mappedMem + offset);
memcpy(dest_ptr, srcPtr, size);
globalMem->unmap(nullptr);
}
cl_int GpuDebugManager::createRuntimeTrapHandler() {
size_t codeSize = 0;
const uint32_t* rtTrapCode = nullptr;
if (device()->settings().viPlus_) {
codeSize = sizeof(RuntimeTrapCodeVi);
rtTrapCode = RuntimeTrapCodeVi;
} else {
codeSize = sizeof(RuntimeTrapCode);
rtTrapCode = RuntimeTrapCode;
}
uint32_t numCodes = codeSize / sizeof(uint32_t);
// Handle TMA corruption hw bug workaround -
// The trap handler buffer has extra 256 bytes allocated, the TMA address
// is stored in the first two DWORDs and the actual trap handler code
// is stored starting at the location of 256 bytes (TbaStartOffset).
//
// allocate memory for the runtime trap handler (TBA) + TMA address
uint32_t allocSize = codeSize + TbaStartOffset;
Memory* rtTBA = new Memory(*device(), allocSize);
runtimeTBA_ = rtTBA;
if ((rtTBA == nullptr) || !rtTBA->create(Resource::RemoteUSWC)) {
return CL_OUT_OF_RESOURCES;
}
address tbaAddress = reinterpret_cast<address>(rtTBA->map(nullptr));
// allocate buffer for the runtime trap handler buffer (TMA)
uint32_t tmaSize = 0x100;
Memory* rtTMA = new Memory(*device(), tmaSize);
runtimeTMA_ = rtTMA;
if ((rtTMA == nullptr) || !rtTMA->create(Resource::RemoteUSWC)) {
return CL_OUT_OF_RESOURCES;
}
uint64_t rtTmaAddress = rtTMA->vmAddress();
if ((rtTBA->vmAddress() & 0xFF) != 0 || (rtTmaAddress & 0xFF) != 0) {
LogError("debugmanager: Trap handler/buffer is not 256-byte aligned");
return CL_INVALID_VALUE;
}
// store the TMA address at the beginning of trap handler buffer
uint64_t* tbaStorage = reinterpret_cast<uint64_t*>(tbaAddress);
tbaStorage[0] = rtTmaAddress;
// save the trap handler code
uint32_t* trapHandlerPtr = (uint32_t*)(tbaAddress + TbaStartOffset);
for (uint32_t i = 0; i < numCodes; i++) {
trapHandlerPtr[i] = rtTrapCode[i];
}
rtTBA->unmap(nullptr);
return CL_SUCCESS;
}
} // namespace pal