SWDEV-368308 - Remove HW debug extension
Change-Id: If0c68023c09f0dac9111d52ecc0ad63719aa4e70
This commit is contained in:
committed by
German Andryeyev
orang tua
80d444b2f1
melakukan
e5a36ab1ad
@@ -67,7 +67,6 @@ target_sources(rocclr PRIVATE
|
||||
${ROCCLR_SRC_DIR}/device/devprogram.cpp
|
||||
${ROCCLR_SRC_DIR}/device/devwavelimiter.cpp
|
||||
${ROCCLR_SRC_DIR}/device/hsailctx.cpp
|
||||
${ROCCLR_SRC_DIR}/device/hwdebug.cpp
|
||||
${ROCCLR_SRC_DIR}/elf/elf.cpp
|
||||
${ROCCLR_SRC_DIR}/os/alloc.cpp
|
||||
${ROCCLR_SRC_DIR}/os/os_posix.cpp
|
||||
|
||||
@@ -57,7 +57,6 @@ target_sources(rocclr PRIVATE
|
||||
${ROCCLR_SRC_DIR}/device/pal/palblit.cpp
|
||||
${ROCCLR_SRC_DIR}/device/pal/palconstbuf.cpp
|
||||
${ROCCLR_SRC_DIR}/device/pal/palcounters.cpp
|
||||
${ROCCLR_SRC_DIR}/device/pal/paldebugmanager.cpp
|
||||
${ROCCLR_SRC_DIR}/device/pal/paldevice.cpp
|
||||
${ROCCLR_SRC_DIR}/device/pal/paldeviced3d10.cpp
|
||||
${ROCCLR_SRC_DIR}/device/pal/paldeviced3d11.cpp
|
||||
|
||||
@@ -502,7 +502,6 @@ Device::Device()
|
||||
online_(true),
|
||||
activeWait_(false),
|
||||
blitProgram_(nullptr),
|
||||
hwDebugMgr_(nullptr),
|
||||
context_(nullptr),
|
||||
heap_buffer_(nullptr),
|
||||
arena_mem_obj_(nullptr),
|
||||
@@ -791,7 +790,6 @@ Settings::Settings() : value_(0) {
|
||||
customHostAllocator_ = false;
|
||||
waitCommand_ = AMD_OCL_WAIT_COMMAND;
|
||||
supportDepthsRGB_ = false;
|
||||
enableHwDebug_ = false;
|
||||
commandQueues_ = 200; //!< Field value set to maximum number
|
||||
//!< concurrent Virtual GPUs for default
|
||||
|
||||
|
||||
@@ -37,7 +37,6 @@
|
||||
#if defined(WITH_COMPILER_LIB)
|
||||
#include "hsailctx.hpp"
|
||||
#endif
|
||||
#include "hwdebug.hpp"
|
||||
#include "devsignal.hpp"
|
||||
|
||||
#if defined(__clang__)
|
||||
@@ -94,7 +93,6 @@ class TransferBufferFileCommand;
|
||||
class StreamOperationCommand;
|
||||
class VirtualMapCommand;
|
||||
class ExternalSemaphoreCmd;
|
||||
class HwDebugManager;
|
||||
class Isa;
|
||||
class Device;
|
||||
struct KernelParameterDescriptor;
|
||||
@@ -630,7 +628,6 @@ class Settings : public amd::HeapObject {
|
||||
uint customHostAllocator_ : 1; //!< True if device has custom host allocator
|
||||
// that replaces generic OS allocation routines
|
||||
uint supportDepthsRGB_ : 1; //!< Support DEPTH and sRGB channel order format
|
||||
uint enableHwDebug_ : 1; //!< Enable HW debug support
|
||||
uint reportFMAF_ : 1; //!< Report FP_FAST_FMAF define in CL program
|
||||
uint reportFMA_ : 1; //!< Report FP_FAST_FMA define in CL program
|
||||
uint singleFpDenorm_ : 1; //!< Support Single FP Denorm
|
||||
@@ -644,7 +641,7 @@ class Settings : public amd::HeapObject {
|
||||
uint enableCoopMultiDeviceGroups_ : 1; //!< Enable cooperative groups multi device
|
||||
uint fenceScopeAgent_ : 1; //!< Enable fence scope agent in AQL dispatch packet
|
||||
uint rocr_backend_ : 1; //!< Device uses ROCr backend for submissions
|
||||
uint reserved_ : 10;
|
||||
uint reserved_ : 11;
|
||||
};
|
||||
uint value_;
|
||||
};
|
||||
@@ -1844,17 +1841,6 @@ class Device : public RuntimeObject {
|
||||
//! Returns app profile
|
||||
static const AppProfile* appProfile() { return &appProfile_; }
|
||||
|
||||
//! Register a hardware debugger manager
|
||||
HwDebugManager* hwDebugMgr() const { return hwDebugMgr_; }
|
||||
|
||||
//! Initialize the Hardware Debug Manager
|
||||
virtual int32_t hwDebugManagerInit(amd::Context* context, uintptr_t messageStorage) {
|
||||
return CL_SUCCESS;
|
||||
}
|
||||
|
||||
//! Remove the Hardware Debug Manager
|
||||
virtual void hwDebugManagerRemove() {}
|
||||
|
||||
//! Adds GPU memory to the VA cache list
|
||||
void addVACache(device::Memory* memory) const;
|
||||
|
||||
@@ -1963,7 +1949,6 @@ class Device : public RuntimeObject {
|
||||
|
||||
BlitProgram* blitProgram_; //!< Blit program info
|
||||
static AppProfile appProfile_; //!< application profile
|
||||
HwDebugManager* hwDebugMgr_; //!< Hardware Debug manager
|
||||
amd::Context* context_; //!< Context
|
||||
|
||||
static amd::Context* glb_ctx_; //!< Global context with all devices
|
||||
|
||||
@@ -1,154 +0,0 @@
|
||||
/* Copyright (c) 2014 - 2021 Advanced Micro Devices, Inc.
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to deal
|
||||
in the Software without restriction, including without limitation the rights
|
||||
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
copies of the Software, and to permit persons to whom the Software is
|
||||
furnished to do so, subject to the following conditions:
|
||||
|
||||
The above copyright notice and this permission notice shall be included in
|
||||
all copies or substantial portions of the Software.
|
||||
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
THE SOFTWARE. */
|
||||
|
||||
#include "hwdebug.hpp"
|
||||
|
||||
#include <iostream>
|
||||
#include <sstream>
|
||||
#include <fstream>
|
||||
|
||||
namespace amd {
|
||||
|
||||
class Device;
|
||||
|
||||
/*
|
||||
***************************************************************************
|
||||
* Implementation of GPU Debug Manager class
|
||||
***************************************************************************
|
||||
*/
|
||||
|
||||
//! Constructor of the debug manager class
|
||||
HwDebugManager::HwDebugManager(amd::Device* device)
|
||||
: context_(NULL),
|
||||
device_(device),
|
||||
preDispatchCallBackFunc_(NULL),
|
||||
postDispatchCallBackFunc_(NULL),
|
||||
preDispatchCallBackArgs_(NULL),
|
||||
postDispatchCallBackArgs_(NULL),
|
||||
paramMemory_(NULL),
|
||||
numParams_(0),
|
||||
aclBinary_(NULL),
|
||||
aqlCodeAddr_(NULL),
|
||||
aqlCodeSize_(0),
|
||||
scratchRingAddr_(NULL),
|
||||
scratchRingSize_(0),
|
||||
isRegistered_(false),
|
||||
runtimeTBA_(NULL),
|
||||
runtimeTMA_(NULL) {
|
||||
memset(&debugInfo_, 0, sizeof(debugInfo_));
|
||||
|
||||
for (int i = 0; i < kDebugTrapLocationMax; i++) {
|
||||
rtTrapInfo_[i] = NULL;
|
||||
}
|
||||
}
|
||||
|
||||
HwDebugManager::~HwDebugManager() {
|
||||
delete[] paramMemory_;
|
||||
|
||||
delete runtimeTMA_;
|
||||
delete runtimeTBA_;
|
||||
}
|
||||
|
||||
//! Setup the call back function pointer
|
||||
void HwDebugManager::setCallBackFunctions(cl_PreDispatchCallBackFunctionAMD preDispatchFunction,
|
||||
cl_PostDispatchCallBackFunctionAMD postDispatchFunction) {
|
||||
preDispatchCallBackFunc_ = preDispatchFunction;
|
||||
postDispatchCallBackFunc_ = postDispatchFunction;
|
||||
}
|
||||
|
||||
//! Setup the call back argument pointers
|
||||
void HwDebugManager::setCallBackArguments(void* preDispatchArgs, void* postDispatchArgs) {
|
||||
preDispatchCallBackArgs_ = preDispatchArgs;
|
||||
postDispatchCallBackArgs_ = postDispatchArgs;
|
||||
}
|
||||
|
||||
//! Get dispatch debug info
|
||||
void HwDebugManager::getDispatchDebugInfo(void* debugInfo) const {
|
||||
memcpy(debugInfo, (void*)&debugInfo_, sizeof(DispatchDebugInfo));
|
||||
}
|
||||
|
||||
//! Set the kernel code address and its size
|
||||
void HwDebugManager::setKernelCodeInfo(address aqlCodeAddr, uint32_t aqlCodeSize) {
|
||||
aqlCodeAddr_ = aqlCodeAddr;
|
||||
aqlCodeSize_ = aqlCodeSize;
|
||||
}
|
||||
|
||||
//! Get the scratch ring
|
||||
void HwDebugManager::setScratchRing(address scratchRingAddr, uint32_t scratchRingSize) {
|
||||
scratchRingAddr_ = scratchRingAddr;
|
||||
scratchRingSize_ = scratchRingSize;
|
||||
}
|
||||
|
||||
//! Map the scratch ring for host access
|
||||
void HwDebugManager::mapScratchRing(uint64_t* scratchRingAddr, uint32_t* scratchRingSize) const {
|
||||
*scratchRingAddr = reinterpret_cast<uint64_t>(scratchRingAddr_);
|
||||
*scratchRingSize = scratchRingSize_;
|
||||
}
|
||||
|
||||
void HwDebugManager::setExceptionPolicy(void* exceptionPolicy) {
|
||||
memcpy(&excpPolicy_, exceptionPolicy, sizeof(cl_dbg_exception_policy_amd));
|
||||
}
|
||||
|
||||
void HwDebugManager::getExceptionPolicy(void* exceptionPolicy) const {
|
||||
memcpy(exceptionPolicy, &excpPolicy_, sizeof(cl_dbg_exception_policy_amd));
|
||||
}
|
||||
|
||||
void HwDebugManager::setKernelExecutionMode(void* mode) {
|
||||
cl_dbg_kernel_exec_mode_amd* execMode = reinterpret_cast<cl_dbg_kernel_exec_mode_amd*>(mode);
|
||||
execMode_.ui32All = execMode->ui32All;
|
||||
}
|
||||
|
||||
|
||||
void HwDebugManager::getKernelExecutionMode(void* mode) const {
|
||||
cl_dbg_kernel_exec_mode_amd* execMode = reinterpret_cast<cl_dbg_kernel_exec_mode_amd*>(mode);
|
||||
execMode->ui32All = execMode_.ui32All;
|
||||
}
|
||||
|
||||
void HwDebugManager::setAclBinary(void* aclBinary) { aclBinary_ = aclBinary; }
|
||||
|
||||
void HwDebugManager::allocParamMemList(uint32_t numParams) {
|
||||
if (NULL != paramMemory_) {
|
||||
delete[] paramMemory_;
|
||||
}
|
||||
|
||||
numParams_ = numParams;
|
||||
paramMemory_ = new amd::Memory*[numParams];
|
||||
}
|
||||
|
||||
cl_mem HwDebugManager::getKernelParamMem(uint32_t paramIdx) const {
|
||||
assert((paramIdx < numParams_) && "Invalid kernel parameter index too big");
|
||||
|
||||
return as_cl(paramMemory_[paramIdx]);
|
||||
}
|
||||
|
||||
void HwDebugManager::assignKernelParamMem(uint32_t paramIdx, amd::Memory* mem) {
|
||||
assert((paramIdx < numParams_) && "Invalid kernel parameter index too big");
|
||||
|
||||
paramMemory_[paramIdx] = mem;
|
||||
}
|
||||
|
||||
void HwDebugManager::installTrap(cl_dbg_trap_type_amd trapType, amd::Memory* pTrapHandler,
|
||||
amd::Memory* pTrapBuffer) {
|
||||
rtTrapInfo_[trapType << 2] = pTrapHandler;
|
||||
rtTrapInfo_[(trapType << 2) + 1] = pTrapBuffer;
|
||||
}
|
||||
|
||||
|
||||
} // namespace amd
|
||||
@@ -1,272 +0,0 @@
|
||||
/* Copyright (c) 2014 - 2021 Advanced Micro Devices, Inc.
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to deal
|
||||
in the Software without restriction, including without limitation the rights
|
||||
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
copies of the Software, and to permit persons to whom the Software is
|
||||
furnished to do so, subject to the following conditions:
|
||||
|
||||
The above copyright notice and this permission notice shall be included in
|
||||
all copies or substantial portions of the Software.
|
||||
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
THE SOFTWARE. */
|
||||
|
||||
#ifndef HWDEBUG_H_
|
||||
#define HWDEBUG_H_
|
||||
|
||||
#include "device.hpp"
|
||||
#include "amdocl/cl_debugger_amd.h"
|
||||
|
||||
static constexpr int TbaStartOffset = 256;
|
||||
|
||||
static constexpr int RtTrapBufferWaveSize = 64;
|
||||
static constexpr int RtTrapBufferSeNum = 4;
|
||||
static constexpr int RtTrapBufferShNum = 2;
|
||||
static constexpr int RtTrapBufferCuNum = 16;
|
||||
static constexpr int RtTrapBufferSimdNum = 4;
|
||||
static constexpr int RtTrapBufferWaveNum = 16;
|
||||
static constexpr int RtTrapBufferTotalWaveNum =
|
||||
((RtTrapBufferSeNum) * (RtTrapBufferShNum) * (RtTrapBufferCuNum) * (RtTrapBufferSimdNum) *
|
||||
(RtTrapBufferWaveNum));
|
||||
|
||||
|
||||
/*! \brief Debug trap handler location in the runtime trap buffer
|
||||
*
|
||||
* This enumeration is used to indicate the location where the debug
|
||||
* trap handler and debug trap buffer are set in the device trap buffer.
|
||||
*/
|
||||
enum DebugTrapLocation {
|
||||
kDebugTrapHandlerLocation = 0, //! Debug Trap handler location, this location must be 0
|
||||
kDebugTrapBufferLocation = 1, //! Debug Trap buffer location, this location must be 1
|
||||
kDebugTrapLocationMax = 2
|
||||
};
|
||||
|
||||
|
||||
/*! \brief This structure is for the debug info in each kernel dispatch.
|
||||
*
|
||||
* Contains the memory descriptor information of the scratch memory and the global
|
||||
* memory
|
||||
*/
|
||||
struct DispatchDebugInfo {
|
||||
uint32_t scratchMemoryDescriptor_[4]; //! Scratch memory descriptor
|
||||
uint32_t globalMemoryDescriptor_[4]; //! Global memory descriptor
|
||||
};
|
||||
|
||||
/*! \brief Trap handler descriptor
|
||||
*
|
||||
* The trap handler descriptor contains the details of a given trap handler.
|
||||
*/
|
||||
struct TrapHandlerInfo {
|
||||
amd::Memory* trapHandler_; //!< Device memory for the trap handler
|
||||
amd::Memory* trapBuffer_; //!< Device memory for the trap buffer
|
||||
};
|
||||
|
||||
/*! \brief Structure of the runtime trap handler buffer, which includes the following
|
||||
* information: information of the runtime trap handler and buffer, information of
|
||||
* the level-2 trap handlers and buffers.
|
||||
*/
|
||||
struct RuntimeTrapInfo {
|
||||
TrapHandlerInfo trap_; //!< Structure of the address of all trap handlers
|
||||
uint32_t dispatchId_; //!< Dispatch ID that signals the shader event
|
||||
uint32_t vgpr_backup_[RtTrapBufferTotalWaveNum][RtTrapBufferWaveSize];
|
||||
//!< Buffer to backup the VGPR used by the runtime trap handler
|
||||
};
|
||||
|
||||
/**
|
||||
* Opaque pointer to trap event
|
||||
*/
|
||||
typedef uintptr_t DebugEvent;
|
||||
|
||||
namespace amd {
|
||||
|
||||
|
||||
class Context;
|
||||
class Device;
|
||||
class HostQueue;
|
||||
|
||||
|
||||
/*! \class HwDebugManager
|
||||
*
|
||||
* \brief The device interface class for the hardware debug manager
|
||||
*/
|
||||
class HwDebugManager {
|
||||
public:
|
||||
//! Constructor for the Hardware Debug Manager
|
||||
HwDebugManager(amd::Device* device);
|
||||
|
||||
//! Destructor for Hardware Debug Manager
|
||||
virtual ~HwDebugManager();
|
||||
|
||||
//! Setup the call back function pointer
|
||||
void setCallBackFunctions(cl_PreDispatchCallBackFunctionAMD preDispatchFuncion,
|
||||
cl_PostDispatchCallBackFunctionAMD postDispatchFunction);
|
||||
|
||||
//! Setup the call back argument pointers
|
||||
void setCallBackArguments(void* preDispatchArgs, void* postDispatchArgs);
|
||||
|
||||
//! Get dispatch debug info
|
||||
void getDispatchDebugInfo(void* debugInfo) const;
|
||||
|
||||
//! Set the kernel code address and its size
|
||||
void setKernelCodeInfo(address aqlCodeAddr, uint32_t aqlCodeSize);
|
||||
|
||||
//! Get the scratch ring
|
||||
void setScratchRing(address scratchRingAddr, uint32_t scratchRingSize);
|
||||
|
||||
//! Map the scratch ring for host access
|
||||
void mapScratchRing(uint64_t* scratchRingAddr, uint32_t* scratchRingSize) const;
|
||||
|
||||
//! Retrieve the pre-dispatch callback function
|
||||
cl_PreDispatchCallBackFunctionAMD preDispatchCallBackFunc() const {
|
||||
return preDispatchCallBackFunc_;
|
||||
}
|
||||
|
||||
//! Retrieve the post-dispatch callback function
|
||||
cl_PostDispatchCallBackFunctionAMD postDispatchCallBackFunc() const {
|
||||
return postDispatchCallBackFunc_;
|
||||
}
|
||||
|
||||
//! Retrieve the pre-dispatch callback function arguments
|
||||
void* preDispatchCallBackArgs() const { return preDispatchCallBackArgs_; }
|
||||
|
||||
//! Retrieve the post-dispatch callback function arguments
|
||||
void* postDispatchCallBackArgs() const { return postDispatchCallBackArgs_; }
|
||||
|
||||
//! Retrieve the memory pointer of the runtime trap handler code
|
||||
device::Memory* runtimeTBA() const { return runtimeTBA_; }
|
||||
|
||||
//! Retrieve the memory pointer of the runtime trap handler buffer
|
||||
device::Memory* runtimeTMA() const { return runtimeTMA_; }
|
||||
|
||||
//! Set exception policy
|
||||
void setExceptionPolicy(void* exceptionPolicy);
|
||||
|
||||
//! Get exception policy
|
||||
void getExceptionPolicy(void* exceptionPolicy) const;
|
||||
|
||||
//! Set the kernel execution mode
|
||||
void setKernelExecutionMode(void* mode);
|
||||
|
||||
//! Get the kernel execution mode
|
||||
void getKernelExecutionMode(void* mode) const;
|
||||
|
||||
//! Setup the pointer to the aclBinary within the debug manager
|
||||
void setAclBinary(void* aclBinary);
|
||||
|
||||
//! Allocate storage to keep the memory pointers of the kernel parameters
|
||||
void allocParamMemList(uint32_t numParams);
|
||||
|
||||
//! Assign the kernel parameter memory
|
||||
void assignKernelParamMem(uint32_t paramIdx, amd::Memory* mem);
|
||||
|
||||
//! Get kernel parameter memory object
|
||||
cl_mem getKernelParamMem(uint32_t paramIdx) const;
|
||||
|
||||
//! Install trap handler
|
||||
void installTrap(cl_dbg_trap_type_amd trapType, amd::Memory* pTrapHandler,
|
||||
amd::Memory* pTrapBuffer);
|
||||
|
||||
//! Flush cache
|
||||
virtual void flushCache(uint32_t mask) = 0;
|
||||
|
||||
//! Create the debug event
|
||||
virtual DebugEvent createDebugEvent(const bool autoReset) = 0;
|
||||
|
||||
//! Wait for the debug event
|
||||
virtual int32_t waitDebugEvent(DebugEvent pEvent, uint32_t timeOut) const = 0;
|
||||
|
||||
//! Destroy the debug event
|
||||
virtual void destroyDebugEvent(DebugEvent* pEvent) = 0;
|
||||
|
||||
//! Register the debugger
|
||||
virtual int32_t registerDebugger(amd::Context* context, uintptr_t pMessageStorage) = 0;
|
||||
|
||||
//! Unregister the debugger
|
||||
virtual void unregisterDebugger() = 0;
|
||||
|
||||
//! Send the wavefront control cmmand
|
||||
virtual void wavefrontControl(uint32_t waveAction, uint32_t waveMode, uint32_t trapId,
|
||||
void* waveAddr) const = 0;
|
||||
|
||||
//! Set address watching point
|
||||
virtual void setAddressWatch(uint32_t numWatchPoints, void** watchAddress, uint64_t* watchMask,
|
||||
uint64_t* watchMode, DebugEvent* event) = 0;
|
||||
|
||||
//! Map the shader (AQL code) for host access
|
||||
virtual void mapKernelCode(void* aqlCodeInfo) const = 0;
|
||||
|
||||
//! Get the packet information for dispatch
|
||||
virtual void getPacketAmdInfo(const void* aqlCodeInfo, void* packetInfo) const = 0;
|
||||
|
||||
//! Set global memory values
|
||||
virtual void setGlobalMemory(amd::Memory* memObj, uint32_t offset, void* srcPtr,
|
||||
uint32_t size) = 0;
|
||||
|
||||
//! Execute the post-dispatch callback function
|
||||
virtual void executePostDispatchCallBack() = 0;
|
||||
|
||||
//! Execute the pre-dispatch callback function
|
||||
virtual void executePreDispatchCallBack(void* aqlPacket, void* toolInfo) = 0;
|
||||
|
||||
protected:
|
||||
//! Return the context
|
||||
const amd::Context* context() const { return context_; }
|
||||
|
||||
//! Get the debug device
|
||||
const amd::Device* device() const { return device_; }
|
||||
|
||||
//! Return the register flag
|
||||
bool isRegistered() const { return isRegistered_; }
|
||||
|
||||
protected:
|
||||
const amd::Context* context_; ///< context that used to create host queue for the debugger
|
||||
amd::Device* device_; ///< Device to run the debugger
|
||||
|
||||
cl_PreDispatchCallBackFunctionAMD preDispatchCallBackFunc_; //!< pre-dispatch callback function
|
||||
cl_PostDispatchCallBackFunctionAMD
|
||||
postDispatchCallBackFunc_; //!< post-dispatch callback function
|
||||
void* preDispatchCallBackArgs_; //!< pre-dispatch callback function arguments
|
||||
void* postDispatchCallBackArgs_; //!< post-dispatch callback function arguments
|
||||
|
||||
DispatchDebugInfo debugInfo_; //!< Debug setting/information for kernel dispatch
|
||||
amd::Memory* rtTrapInfo_[kDebugTrapLocationMax]; //!< Device trap buffer, to store various trap
|
||||
//!handlers on the device
|
||||
|
||||
amd::Memory** paramMemory_; //!< list of memory pointers for kernel parameters
|
||||
uint32_t numParams_; //!< number of kernel parameters
|
||||
|
||||
void* aclBinary_; //!< ACL binary
|
||||
|
||||
address aqlCodeAddr_; //!< The mapped AQL code to allow host access
|
||||
uint32_t aqlCodeSize_; //!< The size of the AQL code info
|
||||
|
||||
address scratchRingAddr_; //!< The mapped address of the scratch buffer
|
||||
uint32_t scratchRingSize_; //!< The size of the scratch ring
|
||||
|
||||
bool isRegistered_; //! flag to indicate the debugger has been registered
|
||||
|
||||
cl_dbg_exception_policy_amd excpPolicy_; //!< exception policy
|
||||
cl_dbg_kernel_exec_mode_amd execMode_; //!< kernel execution mode
|
||||
RuntimeTrapInfo rtTrapHandlerInfo_; //!< Runtime trap information
|
||||
|
||||
//! Runtime Trap handler pointer (TBA) & its buffer (TMA)
|
||||
device::Memory* runtimeTBA_; //! runtime trap handler pointer
|
||||
device::Memory* runtimeTMA_; //! runtime trap handler buffer
|
||||
};
|
||||
|
||||
|
||||
/**@}*/
|
||||
|
||||
/**
|
||||
* @}
|
||||
*/
|
||||
} // namespace amd
|
||||
|
||||
#endif // HWDEBUG_H_
|
||||
@@ -1,129 +0,0 @@
|
||||
/* Copyright (c) 2015 - 2021 Advanced Micro Devices, Inc.
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to deal
|
||||
in the Software without restriction, including without limitation the rights
|
||||
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
copies of the Software, and to permit persons to whom the Software is
|
||||
furnished to do so, subject to the following conditions:
|
||||
|
||||
The above copyright notice and this permission notice shall be included in
|
||||
all copies or substantial portions of the Software.
|
||||
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
THE SOFTWARE. */
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <cstddef>
|
||||
#include <cstdint>
|
||||
#include "hsa.h"
|
||||
#include "amd_hsa_kernel_code.h"
|
||||
#include "device/device.hpp"
|
||||
#include "device/hwdebug.hpp"
|
||||
#include "acl.h"
|
||||
|
||||
static constexpr int NumberReserveVgprs = 4;
|
||||
|
||||
namespace pal {
|
||||
|
||||
/**
|
||||
* \defgroup Services_API OCL Runtime Services API
|
||||
* @{
|
||||
*/
|
||||
|
||||
/*! \brief Dispatch packet information
|
||||
*
|
||||
* This structure contains the packet information for kernel dispatch
|
||||
*/
|
||||
struct PacketAmdInfo {
|
||||
uint32_t trapReservedVgprIndex_; //!< reserved VGPR index, -1 when they are not valid
|
||||
uint32_t scratchBufferWaveOffset_; //!< scratch buffer wave offset, -1 when no scratch buffer
|
||||
void* pointerToIsaBuffer_; //!< pointer to the buffer containing ISA
|
||||
size_t sizeOfIsaBuffer_; //!< size of the ISA buffer
|
||||
uint32_t numberOfVgprs_; //!< number of VGPRs used by the kernel
|
||||
uint32_t numberOfSgprs_; //!< number of SGPRs used by the kernel
|
||||
size_t sizeOfStaticGroupMemory_; //!< Static local memory used by the kernel
|
||||
};
|
||||
|
||||
/*! \brief Cache mask for invalidation
|
||||
*/
|
||||
struct HwDbgGpuCacheMask {
|
||||
HwDbgGpuCacheMask() : ui32All_(0) {}
|
||||
|
||||
HwDbgGpuCacheMask(uint32_t mask) : ui32All_(mask) {}
|
||||
|
||||
union {
|
||||
struct {
|
||||
uint32_t sqICache_ : 1; //!< Instruction cache
|
||||
uint32_t sqKCache_ : 1; //!< Data cache
|
||||
uint32_t tcL1_ : 1; //!< tcL1 cache
|
||||
uint32_t tcL2_ : 1; //!< tcL2 cache
|
||||
uint32_t reserved_ : 28;
|
||||
};
|
||||
uint32_t ui32All_;
|
||||
};
|
||||
};
|
||||
|
||||
/*! \brief Address watch information
|
||||
*
|
||||
* Information about each watch point - address, mask, mode and event
|
||||
*/
|
||||
struct HwDbgAddressWatch {
|
||||
void* watchAddress_; //! The address of watch point
|
||||
uint64_t watchMask_; //! The mask for watch point (lower 24 bits)
|
||||
cl_dbg_address_watch_mode_amd watchMode_; //! The watch mode for this watch
|
||||
DebugEvent event_; //! Event of the watch point (not used for now)
|
||||
};
|
||||
|
||||
/*! \brief Runtime structure used to communicate debug information
|
||||
* between Ocl services and core for a kernel dispatch.
|
||||
*/
|
||||
struct DebugToolInfo {
|
||||
uint64_t scratchAddress_; //! Scratch memory address
|
||||
size_t scratchSize_; //! Scratch memory size
|
||||
uint64_t globalAddress_; //! Global memory address
|
||||
uint32_t cacheDisableMask_; //! Cache mask, indicating caches disabled
|
||||
uint32_t exceptionMask_; //! Exception mask
|
||||
uint32_t reservedCuNum_; //! Number of reserved CUs for display,
|
||||
//! which ranges from 0 to 7 in the current implementation.
|
||||
bool monitorMode_; //! Debug or profiler mode
|
||||
bool gpuSingleStepMode_; //! SQ debug mode
|
||||
amd::Memory* trapHandler_; //! Trap handler address
|
||||
amd::Memory* trapBuffer_; //! Trap buffer address
|
||||
bool sqPerfcounterEnable_; //! whether SQ perf counters are enabled
|
||||
aclBinary* aclBinary_; //! pointer of the kernel ACL binary
|
||||
amd::Event* event_; //! pointer of the kernel event in the enqueue command
|
||||
};
|
||||
|
||||
/*! \brief Message used by the KFD wave control for CI
|
||||
*
|
||||
* Structure indicates the various information used by the wave control function.
|
||||
*/
|
||||
struct HwDebugWaveAddr {
|
||||
uint32_t VMID_ : 4; //! Virtual memory id
|
||||
uint32_t wave_ : 4; //! Wave id
|
||||
uint32_t SIMD_ : 2; //! SIMD id
|
||||
uint32_t CU_ : 4; //! Compute unit
|
||||
uint32_t SH_ : 1; //! Shader array
|
||||
uint32_t SE_ : 1; //! Shader engine
|
||||
};
|
||||
|
||||
/*! \brief Kernel code information
|
||||
*
|
||||
* This structure contains the pointer of mapped kernel code for host access
|
||||
* and its size (in bytes)
|
||||
*/
|
||||
struct AqlCodeInfo {
|
||||
amd_kernel_code_t* aqlCode_; //! pointer of AQL code to allow host access
|
||||
uint32_t aqlCodeSize_; //! size of AQL code
|
||||
};
|
||||
|
||||
/**@}*/
|
||||
|
||||
} // namespace pal
|
||||
@@ -1,366 +0,0 @@
|
||||
/* Copyright (c) 2015 - 2021 Advanced Micro Devices, Inc.
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to deal
|
||||
in the Software without restriction, including without limitation the rights
|
||||
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
copies of the Software, and to permit persons to whom the Software is
|
||||
furnished to do so, subject to the following conditions:
|
||||
|
||||
The above copyright notice and this permission notice shall be included in
|
||||
all copies or substantial portions of the Software.
|
||||
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
THE SOFTWARE. */
|
||||
|
||||
#include "platform/commandqueue.hpp"
|
||||
#include "device/device.hpp"
|
||||
#include "device/pal/paldevice.hpp"
|
||||
#include "device/pal/palmemory.hpp"
|
||||
#include "device/pal/paltrap.hpp"
|
||||
#include "device/pal/paldebugmanager.hpp"
|
||||
#include <iostream>
|
||||
#include <sstream>
|
||||
#include <fstream>
|
||||
|
||||
namespace pal {
|
||||
|
||||
class VirtualGPU;
|
||||
class Device;
|
||||
class Memory;
|
||||
|
||||
/*
|
||||
***************************************************************************
|
||||
* Implementation of GPU Debug Manager class
|
||||
***************************************************************************
|
||||
*/
|
||||
|
||||
GpuDebugManager::GpuDebugManager(amd::Device* device)
|
||||
: HwDebugManager(device),
|
||||
vGpu_(nullptr),
|
||||
debugMessages_(0),
|
||||
addressWatch_(nullptr),
|
||||
addressWatchSize_(0),
|
||||
oclEventHandle_(nullptr) {
|
||||
// Initialize the exception info and the kernel execution mode
|
||||
excpPolicy_.exceptionMask = 0x0;
|
||||
excpPolicy_.waveAction = CL_DBG_WAVES_RESUME;
|
||||
excpPolicy_.hostAction = CL_DBG_HOST_IGNORE;
|
||||
excpPolicy_.waveMode = CL_DBG_WAVEMODE_BROADCAST;
|
||||
|
||||
execMode_.ui32All = 0;
|
||||
|
||||
rtTrapHandlerInfo_.trap_.trapHandler_ = nullptr;
|
||||
rtTrapHandlerInfo_.trap_.trapBuffer_ = nullptr;
|
||||
|
||||
aqlPacket_ = (hsa_kernel_dispatch_packet_t*)nullptr;
|
||||
|
||||
return;
|
||||
}
|
||||
|
||||
GpuDebugManager::~GpuDebugManager() {
|
||||
delete[] addressWatch_;
|
||||
}
|
||||
|
||||
void GpuDebugManager::executePreDispatchCallBack(void* aqlPacket, void* toolInfo) {
|
||||
DebugToolInfo* info = reinterpret_cast<DebugToolInfo*>(toolInfo);
|
||||
|
||||
aqlPacket_ = reinterpret_cast<hsa_kernel_dispatch_packet_t*>(aqlPacket);
|
||||
Unimplemented();
|
||||
// Only if the pre-dispatch callback is set, will we update cache
|
||||
// flush configuration and build the memory descriptor.
|
||||
if (nullptr != preDispatchCallBackFunc_) {
|
||||
/*
|
||||
// Build the scratch memory descriptor
|
||||
device()->gslCtx()->BuildScratchBufferResource(debugInfo_.scratchMemoryDescriptor_,
|
||||
info->scratchAddress_,
|
||||
info->scratchSize_);
|
||||
|
||||
// Build the global memory descriptor
|
||||
device()->gslCtx()->BuildHeapBufferResource(debugInfo_.globalMemoryDescriptor_,
|
||||
info->globalAddress_);
|
||||
*/
|
||||
// // for invalidate cache (BuildEndOfKernelNotifyCommands)
|
||||
// aqlPacket->release_fence_scope = 2;
|
||||
|
||||
aclBinary_ = reinterpret_cast<void*>(info->aclBinary_);
|
||||
oclEventHandle_ = reinterpret_cast<void*>(as_cl(info->event_));
|
||||
|
||||
cl_device_id clDeviceId = as_cl(device_);
|
||||
preDispatchCallBackFunc_(clDeviceId, oclEventHandle_, aqlPacket_, aclBinary_,
|
||||
preDispatchCallBackArgs_);
|
||||
}
|
||||
|
||||
// setup the trap handler information only if the debugger has been registered
|
||||
if (isRegistered()) {
|
||||
// Copy the various info set by the debugger/profiler to the tool info structure
|
||||
setupTrapInformation(info);
|
||||
}
|
||||
}
|
||||
|
||||
void GpuDebugManager::executePostDispatchCallBack() {
|
||||
if (nullptr != postDispatchCallBackFunc_) {
|
||||
cl_device_id clDeviceId = as_cl(device_);
|
||||
postDispatchCallBackFunc_(clDeviceId, aqlPacket_->completion_signal.handle,
|
||||
postDispatchCallBackArgs_);
|
||||
}
|
||||
}
|
||||
|
||||
//! Map the kernel code for host access
|
||||
void GpuDebugManager::mapKernelCode(void* aqlCodeInfo) const {
|
||||
AqlCodeInfo* codeInfo = reinterpret_cast<AqlCodeInfo*>(aqlCodeInfo);
|
||||
|
||||
codeInfo->aqlCode_ = reinterpret_cast<amd_kernel_code_t*>(aqlCodeAddr_);
|
||||
codeInfo->aqlCodeSize_ = aqlCodeSize_;
|
||||
}
|
||||
|
||||
int32_t GpuDebugManager::registerDebugger(amd::Context* context, uintptr_t messageStorage) {
|
||||
if (!device()->settings().enableHwDebug_) {
|
||||
LogError("debugmanager: Register debugger error - HW DEBUG is not enable");
|
||||
return CL_DEBUGGER_REGISTER_FAILURE_AMD;
|
||||
}
|
||||
|
||||
// first time register - set the message storage, flush queue and enable hw debug
|
||||
if (!isRegistered()) {
|
||||
debugMessages_ = messageStorage;
|
||||
Unimplemented();
|
||||
/*
|
||||
if (!device()->gslCtx()->registerHwDebugger(debugMessages_)) {
|
||||
LogError("debugmanager: Register debugger failed");
|
||||
return CL_OUT_OF_RESOURCES;
|
||||
}
|
||||
*/
|
||||
isRegistered_ = true;
|
||||
|
||||
if (CL_SUCCESS != createRuntimeTrapHandler()) {
|
||||
LogError("debugmanager: Create runtime trap handler failed");
|
||||
return CL_OUT_OF_RESOURCES;
|
||||
}
|
||||
}
|
||||
|
||||
context_ = context;
|
||||
|
||||
return CL_SUCCESS;
|
||||
}
|
||||
|
||||
void GpuDebugManager::unregisterDebugger() {
|
||||
if (isRegistered()) {
|
||||
// reset the debugger registration flag
|
||||
isRegistered_ = false;
|
||||
context_ = nullptr;
|
||||
}
|
||||
}
|
||||
|
||||
void GpuDebugManager::flushCache(uint32_t mask) {
|
||||
HwDbgGpuCacheMask cacheMask(mask);
|
||||
// device()->xferQueue()->flushCuCaches(cacheMask);
|
||||
}
|
||||
|
||||
|
||||
void GpuDebugManager::setupTrapInformation(DebugToolInfo* toolInfo) {
|
||||
toolInfo->scratchAddress_ = 0;
|
||||
toolInfo->scratchSize_ = 0;
|
||||
toolInfo->globalAddress_ = 0;
|
||||
toolInfo->sqPerfcounterEnable_ = false;
|
||||
|
||||
// Set up trap related info in the kernel info structure to be
|
||||
// used in the kernel dispatch.
|
||||
toolInfo->exceptionMask_ = excpPolicy_.exceptionMask;
|
||||
toolInfo->gpuSingleStepMode_ = execMode_.gpuSingleStepMode;
|
||||
toolInfo->monitorMode_ = execMode_.monitorMode;
|
||||
|
||||
// The order of these three bits is determined by the definition
|
||||
// of the register COMPUTE_DISPATCH_INITIATOR
|
||||
toolInfo->cacheDisableMask_ = ((execMode_.disableL1Scalar << 2) |
|
||||
(execMode_.disableL2Cache << 1) | (execMode_.disableL1Vector));
|
||||
|
||||
toolInfo->reservedCuNum_ = execMode_.reservedCuNum;
|
||||
|
||||
toolInfo->trapHandler_ = rtTrapInfo_[kDebugTrapHandlerLocation];
|
||||
toolInfo->trapBuffer_ = rtTrapInfo_[kDebugTrapBufferLocation];
|
||||
}
|
||||
|
||||
void GpuDebugManager::getPacketAmdInfo(const void* aqlCodeInfo, void* packetInfo) const
|
||||
|
||||
{
|
||||
const AqlCodeInfo* codeInfo = reinterpret_cast<const AqlCodeInfo*>(aqlCodeInfo);
|
||||
|
||||
const amd_kernel_code_t* hostAqlCode = codeInfo->aqlCode_;
|
||||
|
||||
PacketAmdInfo* packet = reinterpret_cast<PacketAmdInfo*>(packetInfo);
|
||||
|
||||
const amd_kernel_code_t* akc = hostAqlCode;
|
||||
|
||||
packet->numberOfSgprs_ = akc->wavefront_sgpr_count;
|
||||
packet->numberOfVgprs_ = akc->workitem_vgpr_count;
|
||||
|
||||
// use mapped kernel_object_address for host accessing of ISA buffer
|
||||
packet->pointerToIsaBuffer_ = (char*)(hostAqlCode) + akc->kernel_code_entry_byte_offset;
|
||||
|
||||
packet->scratchBufferWaveOffset_ = akc->debug_wavefront_private_segment_offset_sgpr;
|
||||
|
||||
packet->sizeOfIsaBuffer_ = codeInfo->aqlCodeSize_;
|
||||
|
||||
packet->sizeOfStaticGroupMemory_ = akc->workgroup_group_segment_byte_size;
|
||||
|
||||
// The trap_reserved_vgpr_index will be 4 less the original
|
||||
// This value must be used only by the debugger
|
||||
packet->trapReservedVgprIndex_ = akc->workitem_vgpr_count - NumberReserveVgprs;
|
||||
}
|
||||
|
||||
DebugEvent GpuDebugManager::createDebugEvent(const bool autoReset) {
|
||||
Unimplemented();
|
||||
/*
|
||||
// create the event object
|
||||
osEventHandle shaderEvent = osEventCreate(!autoReset);
|
||||
|
||||
// event object has been created, set the initial state
|
||||
if (shaderEvent != 0) {
|
||||
|
||||
osEventReset(shaderEvent); // initial state is non-signaled
|
||||
|
||||
if (device()->gslCtx()->exceptionNotification(shaderEvent)) {
|
||||
return shaderEvent;
|
||||
}
|
||||
}
|
||||
*/
|
||||
return 0;
|
||||
}
|
||||
|
||||
int32_t GpuDebugManager::waitDebugEvent(DebugEvent pEvent, uint32_t timeOut) const {
|
||||
Unimplemented();
|
||||
/*
|
||||
if (osEventTimedWait(pEvent, timeOut)) {
|
||||
return CL_SUCCESS;
|
||||
}
|
||||
else {
|
||||
return CL_EVENT_TIMEOUT_AMD;
|
||||
}
|
||||
*/
|
||||
return CL_SUCCESS;
|
||||
}
|
||||
|
||||
void GpuDebugManager::destroyDebugEvent(DebugEvent* pEvent) {
|
||||
Unimplemented();
|
||||
/*
|
||||
osEventDestroy(*pEvent);
|
||||
*pEvent = 0;
|
||||
|
||||
device()->gslCtx()->exceptionNotification(0);
|
||||
*/
|
||||
}
|
||||
|
||||
void GpuDebugManager::wavefrontControl(uint32_t waveAction, uint32_t waveMode, uint32_t trapId,
|
||||
void* waveAddr) const {
|
||||
Unimplemented();
|
||||
// device()->gslCtx()->executeSqCommand(waveAction, waveMode, trapId, waveAddr);
|
||||
}
|
||||
|
||||
void GpuDebugManager::setAddressWatch(uint32_t numWatchPoints, void** watchAddress,
|
||||
uint64_t* watchMask, uint64_t* watchMode, DebugEvent* event) {
|
||||
size_t requiredSize = numWatchPoints * sizeof(HwDbgAddressWatch);
|
||||
|
||||
// previously allocated size is not big enough, allocate new memory
|
||||
if (addressWatchSize_ < requiredSize) {
|
||||
delete[] addressWatch_;
|
||||
addressWatch_ = new HwDbgAddressWatch[numWatchPoints];
|
||||
addressWatchSize_ = requiredSize;
|
||||
}
|
||||
|
||||
// fill in the address watch structure
|
||||
memset(addressWatch_, 0, addressWatchSize_);
|
||||
|
||||
for (uint32_t i = 0; i < numWatchPoints; i++) {
|
||||
amd::Memory* watchMem = as_amd(reinterpret_cast<cl_mem>(watchAddress[i]));
|
||||
Memory* watchMemAddress = device()->getGpuMemory(watchMem);
|
||||
|
||||
addressWatch_[i].watchAddress_ = reinterpret_cast<void*>(watchMemAddress->vmAddress());
|
||||
addressWatch_[i].watchMask_ = watchMask[i];
|
||||
addressWatch_[i].watchMode_ = (cl_dbg_address_watch_mode_amd)watchMode[i];
|
||||
addressWatch_[i].event_ = (0 != event) ? event[i] : 0;
|
||||
}
|
||||
|
||||
Unimplemented();
|
||||
// setup the watch addresses
|
||||
// device()->gslCtx()->setAddressWatch(numWatchPoints, (void*) addressWatch_);
|
||||
}
|
||||
|
||||
void GpuDebugManager::setGlobalMemory(amd::Memory* memObj, uint32_t offset, void* srcPtr,
|
||||
uint32_t size) {
|
||||
Memory* globalMem = device()->getGpuMemory(memObj);
|
||||
|
||||
address mappedMem = static_cast<address>(globalMem->map(nullptr, 0));
|
||||
assert(mappedMem != 0);
|
||||
|
||||
void* dest_ptr = reinterpret_cast<void*>(mappedMem + offset);
|
||||
memcpy(dest_ptr, srcPtr, size);
|
||||
|
||||
globalMem->unmap(nullptr);
|
||||
}
|
||||
|
||||
int32_t GpuDebugManager::createRuntimeTrapHandler() {
|
||||
size_t codeSize = 0;
|
||||
const uint32_t* rtTrapCode = nullptr;
|
||||
|
||||
if (device()->settings().viPlus_) {
|
||||
codeSize = sizeof(RuntimeTrapCodeVi);
|
||||
rtTrapCode = RuntimeTrapCodeVi;
|
||||
} else {
|
||||
codeSize = sizeof(RuntimeTrapCode);
|
||||
rtTrapCode = RuntimeTrapCode;
|
||||
}
|
||||
|
||||
uint32_t numCodes = codeSize / sizeof(uint32_t);
|
||||
|
||||
// Handle TMA corruption hw bug workaround -
|
||||
// The trap handler buffer has extra 256 bytes allocated, the TMA address
|
||||
// is stored in the first two DWORDs and the actual trap handler code
|
||||
// is stored starting at the location of 256 bytes (TbaStartOffset).
|
||||
//
|
||||
// allocate memory for the runtime trap handler (TBA) + TMA address
|
||||
uint32_t allocSize = codeSize + TbaStartOffset;
|
||||
|
||||
Memory* rtTBA = new Memory(*device(), allocSize);
|
||||
runtimeTBA_ = rtTBA;
|
||||
|
||||
if ((rtTBA == nullptr) || !rtTBA->create(Resource::RemoteUSWC)) {
|
||||
return CL_OUT_OF_RESOURCES;
|
||||
}
|
||||
address tbaAddress = reinterpret_cast<address>(rtTBA->map(nullptr));
|
||||
|
||||
// allocate buffer for the runtime trap handler buffer (TMA)
|
||||
uint32_t tmaSize = 0x100;
|
||||
Memory* rtTMA = new Memory(*device(), tmaSize);
|
||||
runtimeTMA_ = rtTMA;
|
||||
|
||||
if ((rtTMA == nullptr) || !rtTMA->create(Resource::RemoteUSWC)) {
|
||||
return CL_OUT_OF_RESOURCES;
|
||||
}
|
||||
|
||||
uint64_t rtTmaAddress = rtTMA->vmAddress();
|
||||
if ((rtTBA->vmAddress() & 0xFF) != 0 || (rtTmaAddress & 0xFF) != 0) {
|
||||
LogError("debugmanager: Trap handler/buffer is not 256-byte aligned");
|
||||
return CL_INVALID_VALUE;
|
||||
}
|
||||
|
||||
// store the TMA address at the beginning of trap handler buffer
|
||||
uint64_t* tbaStorage = reinterpret_cast<uint64_t*>(tbaAddress);
|
||||
tbaStorage[0] = rtTmaAddress;
|
||||
|
||||
// save the trap handler code
|
||||
uint32_t* trapHandlerPtr = (uint32_t*)(tbaAddress + TbaStartOffset);
|
||||
for (uint32_t i = 0; i < numCodes; i++) {
|
||||
trapHandlerPtr[i] = rtTrapCode[i];
|
||||
}
|
||||
|
||||
rtTBA->unmap(nullptr);
|
||||
|
||||
return CL_SUCCESS;
|
||||
}
|
||||
|
||||
} // namespace pal
|
||||
@@ -1,123 +0,0 @@
|
||||
/* Copyright (c) 2015 - 2021 Advanced Micro Devices, Inc.
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to deal
|
||||
in the Software without restriction, including without limitation the rights
|
||||
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
copies of the Software, and to permit persons to whom the Software is
|
||||
furnished to do so, subject to the following conditions:
|
||||
|
||||
The above copyright notice and this permission notice shall be included in
|
||||
all copies or substantial portions of the Software.
|
||||
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
THE SOFTWARE. */
|
||||
|
||||
#pragma once
|
||||
|
||||
#include "device/pal/palvirtual.hpp"
|
||||
#include "device/pal/paldebugger.hpp"
|
||||
|
||||
namespace pal {
|
||||
|
||||
class GpuDebugManager;
|
||||
class Device;
|
||||
class Memory;
|
||||
|
||||
|
||||
/*! \brief Debug Manager Class
|
||||
*
|
||||
* The debug manager class is used to pass all the trap info to the
|
||||
* kernel dispatch and then the kernel execution can use such trap information
|
||||
* for kernel execution. This class contains the trap handler and shader event
|
||||
* objects. The trap handler is setup by users and passed to the kernel dispatch.
|
||||
* The shader event is to receive interrupts from the GPU and then users can
|
||||
* perform various operations.
|
||||
*
|
||||
* This class also provides the interface for setting up the pre-dispatch
|
||||
* callback functions used by the profiler and debugger. It also provides
|
||||
* a way to retrieve various debug information for the kernel execution.
|
||||
*
|
||||
*/
|
||||
class GpuDebugManager : public amd::HwDebugManager {
|
||||
public:
|
||||
//! Constructor of the debug manager class
|
||||
GpuDebugManager(amd::Device* device);
|
||||
|
||||
//! Destructor of the debug manager class
|
||||
~GpuDebugManager();
|
||||
|
||||
//! Get the single instance of the GpuDebugManager class
|
||||
static GpuDebugManager* getDefaultInstance();
|
||||
|
||||
//! Destroy the GpuDebugManager class object
|
||||
static void destroyInstances();
|
||||
|
||||
//! Flush cache
|
||||
void flushCache(uint32_t mask);
|
||||
|
||||
//! Create the debug event
|
||||
DebugEvent createDebugEvent(const bool autoReset);
|
||||
|
||||
//! Wait for the debug event
|
||||
int32_t waitDebugEvent(DebugEvent pEvent, uint32_t timeOut) const;
|
||||
|
||||
//! Destroy the debug event
|
||||
void destroyDebugEvent(DebugEvent* pEvent);
|
||||
|
||||
//! Register the debugger
|
||||
int32_t registerDebugger(amd::Context* context, uintptr_t messageStorage);
|
||||
|
||||
//! Unregister the debugger
|
||||
void unregisterDebugger();
|
||||
|
||||
//! Send the wavefront control cmmand
|
||||
void wavefrontControl(uint32_t waveAction, uint32_t waveMode, uint32_t trapId,
|
||||
void* waveAddr) const;
|
||||
|
||||
//! Set address watching point
|
||||
void setAddressWatch(uint32_t numWatchPoints, void** watchAddress, uint64_t* watchMask,
|
||||
uint64_t* watchMode, DebugEvent* pEvent);
|
||||
|
||||
//! Map the kernel code for host access
|
||||
void mapKernelCode(void* aqlCodeInfo) const;
|
||||
|
||||
//! Get the packet information for dispatch
|
||||
void getPacketAmdInfo(const void* aqlCodeInfo, void* packetInfo) const;
|
||||
|
||||
//! Set global memory values
|
||||
void setGlobalMemory(amd::Memory* memObj, uint32_t offset, void* srcPtr, uint32_t size);
|
||||
|
||||
//! Execute the post-dispatch callback function
|
||||
void executePostDispatchCallBack();
|
||||
|
||||
//! Execute the pre-dispatch callback function
|
||||
void executePreDispatchCallBack(void* aqlPacket, void* toolInfo);
|
||||
|
||||
protected:
|
||||
const VirtualGPU* vGpu() const { return vGpu_; }
|
||||
|
||||
private:
|
||||
//! Setup trap handler info for kernel execution
|
||||
void setupTrapInformation(DebugToolInfo* toolInfo);
|
||||
|
||||
//! Create runtime trap handler
|
||||
int32_t createRuntimeTrapHandler();
|
||||
|
||||
const pal::Device* device() const { return reinterpret_cast<const pal::Device*>(device_); }
|
||||
|
||||
VirtualGPU* vGpu_; //!< the virtual GPU
|
||||
uintptr_t debugMessages_; //!< Pointer to a SHARED_DEBUG_MESSAGES pass to the KMD
|
||||
HwDbgAddressWatch* addressWatch_; //!< Address watch data
|
||||
size_t addressWatchSize_; //!< Size of address watch data
|
||||
//! Arguments used by the callback function
|
||||
void* oclEventHandle_; //!< event handler
|
||||
const hsa_kernel_dispatch_packet_t* aqlPacket_; //!< AQL packet
|
||||
};
|
||||
|
||||
} // namespace pal
|
||||
@@ -41,22 +41,6 @@ enum PalGpuMemoryType {
|
||||
PAL_TEXTURE_BUFFER, ///< "buffer" texture inside VBO
|
||||
};
|
||||
|
||||
struct HwDbgKernelInfo {
|
||||
uint64_t scratchBufAddr; ///< Handle of GPU local memory for kernel private scratch space
|
||||
size_t scratchBufferSizeInBytes; ///< size of memory pointed to by pScratchBuffer,
|
||||
uint64_t heapBufAddr; ///< Address of the global heap base
|
||||
const void* pAqlDispatchPacket; ///< Pointer to the dispatch packet
|
||||
const void* pAqlQueuePtr; ///< pointer to the AQL Queue
|
||||
void* trapHandler; ///< address of the trap handler (TBA)
|
||||
void* trapHandlerBuffer; ///< address of the trap handler buffer (TMA)
|
||||
uint32_t excpEn; ///< exception mask
|
||||
bool trapPresent; ///< trap present flag
|
||||
bool sqDebugMode; ///< debug mode flag (GPU single step mode)
|
||||
uint32_t mgmtSe0Mask; ///< mask for SE0 (reserving CU for display)
|
||||
uint32_t mgmtSe1Mask; ///< mask for SE1 (reserving CU for display)
|
||||
uint32_t cacheDisableMask; ///< cache disable mask
|
||||
};
|
||||
|
||||
//! Engine types
|
||||
enum EngineType { MainEngine = 0, SdmaEngine, AllEngines };
|
||||
|
||||
|
||||
@@ -31,7 +31,6 @@
|
||||
#include "device/pal/palprogram.hpp"
|
||||
#include "device/pal/palsettings.hpp"
|
||||
#include "device/pal/palblit.hpp"
|
||||
#include "device/pal/paldebugmanager.hpp"
|
||||
#include "palLib.h"
|
||||
#include "palPlatform.h"
|
||||
#include "palDevice.h"
|
||||
@@ -761,10 +760,6 @@ Device::Device()
|
||||
rgpCaptureMgr_(nullptr) {}
|
||||
|
||||
Device::~Device() {
|
||||
// remove the HW debug manager
|
||||
delete hwDebugMgr_;
|
||||
hwDebugMgr_ = nullptr;
|
||||
|
||||
if (p2p_stage_ != nullptr) {
|
||||
p2p_stage_->release();
|
||||
p2p_stage_ = nullptr;
|
||||
@@ -1025,11 +1020,6 @@ bool Device::create(Pal::IDevice* device) {
|
||||
return false;
|
||||
}
|
||||
|
||||
// create the HW debug manager if needed
|
||||
if (settings().enableHwDebug_) {
|
||||
hwDebugMgr_ = new GpuDebugManager(this);
|
||||
}
|
||||
|
||||
if ((glb_ctx_ == nullptr) && (gNumDevices > 1) && (device == gDeviceList[gNumDevices - 1])) {
|
||||
std::vector<amd::Device*> devices;
|
||||
uint32_t numDevices = amd::Device::numDevices(CL_DEVICE_TYPE_GPU, true);
|
||||
@@ -2511,17 +2501,6 @@ void Device::SrdManager::fillResourceList(VirtualGPU& gpu) {
|
||||
}
|
||||
}
|
||||
|
||||
int32_t Device::hwDebugManagerInit(amd::Context* context, uintptr_t messageStorage) {
|
||||
int32_t status = hwDebugMgr_->registerDebugger(context, messageStorage);
|
||||
|
||||
if (CL_SUCCESS != status) {
|
||||
delete hwDebugMgr_;
|
||||
hwDebugMgr_ = nullptr;
|
||||
}
|
||||
|
||||
return status;
|
||||
}
|
||||
|
||||
bool Device::SetClockMode(const cl_set_device_clock_mode_input_amd setClockModeInput,
|
||||
cl_set_device_clock_mode_output_amd* pSetClockModeOutput) {
|
||||
Pal::SetClockModeInput setClockMode = {};
|
||||
|
||||
@@ -533,9 +533,6 @@ class Device : public NullDevice {
|
||||
//! Returns SRD manger object
|
||||
SrdManager& srds() const { return *srdManager_; }
|
||||
|
||||
//! Initial the Hardware Debug Manager
|
||||
int32_t hwDebugManagerInit(amd::Context* context, uintptr_t messageStorage);
|
||||
|
||||
//! Returns PAL device properties
|
||||
const Pal::DeviceProperties& properties() const { return properties_; }
|
||||
|
||||
|
||||
@@ -88,7 +88,7 @@ bool Segment::alloc(HSAILProgram& prog, amdgpu_hsa_elf_segment_t segment, size_t
|
||||
}
|
||||
|
||||
if (!amd_mem_obj->create(nullptr)) {
|
||||
LogError("[OCL] failed to create a svm hidden buffer!");
|
||||
LogError("[OCL] fisThreadAliveailed to create a svm hidden buffer!");
|
||||
amd_mem_obj->release();
|
||||
return false;
|
||||
}
|
||||
|
||||
@@ -1157,17 +1157,14 @@ bool Resource::create(MemoryType memType, CreateParams* params, bool forceLinear
|
||||
} else {
|
||||
memType = Local;
|
||||
}
|
||||
// force to use remote memory for HW DEBUG or use
|
||||
// local memory once we determine if FGS is supported
|
||||
// memType = (!dev().settings().enableHwDebug_) ? Local : RemoteUSWC;
|
||||
}
|
||||
|
||||
// Get the element size
|
||||
elementSize_ = Pal::Formats::BytesPerPixel(format);
|
||||
desc_.type_ = memType;
|
||||
if (memType == Scratch) {
|
||||
// use local memory for scratch buffer unless it is using HW DEBUG
|
||||
desc_.type_ = (!dev().settings().enableHwDebug_) ? Local : RemoteUSWC;
|
||||
// use local memory for scratch buffer
|
||||
desc_.type_ = Local;
|
||||
desc_.scratch_ = true;
|
||||
}
|
||||
|
||||
|
||||
@@ -509,11 +509,6 @@ bool Settings::create(const Pal::DeviceProperties& palProp,
|
||||
enableExtension(ClKhrMipMapImageWrites);
|
||||
}
|
||||
|
||||
// Enable HW debug
|
||||
if (GPU_ENABLE_HW_DEBUG) {
|
||||
enableHwDebug_ = true;
|
||||
}
|
||||
|
||||
#if defined(_WIN32)
|
||||
enableExtension(ClAmdPlanarYuv);
|
||||
#endif
|
||||
|
||||
@@ -1,159 +0,0 @@
|
||||
/* Copyright (c) 2015 - 2021 Advanced Micro Devices, Inc.
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to deal
|
||||
in the Software without restriction, including without limitation the rights
|
||||
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
copies of the Software, and to permit persons to whom the Software is
|
||||
furnished to do so, subject to the following conditions:
|
||||
|
||||
The above copyright notice and this permission notice shall be included in
|
||||
all copies or substantial portions of the Software.
|
||||
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
THE SOFTWARE. */
|
||||
|
||||
/*******************************************************************************
|
||||
* The source of the runtime trap handler, "runtimetraphandler.sp3".
|
||||
* The binary is created by the SP3 tool with the following command:
|
||||
*
|
||||
* sp3.exe runtimetraphandler.sp3 -hex runtimeTrapCode.hex
|
||||
*
|
||||
*******************************************************************************
|
||||
|
||||
shader main
|
||||
asic(TAHITI) // for SI/CI or asic(VI) for VI
|
||||
type(CS)
|
||||
|
||||
// clear wave exception state
|
||||
v_clrexcp
|
||||
s_waitcnt 0
|
||||
//==========================================================================
|
||||
// Handle the workaround for HW bug that causes the incorrect TMA value.
|
||||
// Retrieve the TMA values, which are stored at TBA buffer at location
|
||||
// 256 (0x100).
|
||||
|
||||
// Construct the memory descriptor with TBA as the start address
|
||||
// we are using the registers ttmp[8:11] for that.
|
||||
s_mov_b32 ttmp8, tba_lo
|
||||
s_and_b32 ttmp9, tba_hi, 0xffff
|
||||
|
||||
// 0x100=256 bytes, which is the size of the buffer to
|
||||
// store all the level 2 trap handler info
|
||||
s_or_b32 ttmp9, ttmp9, 0x01000000
|
||||
s_mov_b32 ttmp10, 0x00002000
|
||||
s_mov_b32 ttmp11, 0x00024fac
|
||||
|
||||
// TMA is stored 256 (0x100) bytes before the TBA value
|
||||
s_sub_u32 ttmp8, ttmp8, 0x100
|
||||
|
||||
// Backup the s0 since ttmp registers cannot be target of
|
||||
// buffer read instruction
|
||||
s_mov_b32 ttmp7, s0
|
||||
s_buffer_load_dword s0, ttmp8, 0x0 // VI: offset=0x0 (bytes)
|
||||
s_waitcnt 0
|
||||
s_mov_b32 tma_lo, s0
|
||||
s_buffer_load_dword s0, ttmp8, 0x1 // VI: offset=0x4 (bytes)
|
||||
s_waitcnt 0
|
||||
s_mov_b32 tma_hi, s0
|
||||
s_mov_b32 s0, ttmp7
|
||||
|
||||
//===================================================
|
||||
// setup the mmeory descriptor for TMA
|
||||
s_mov_b32 ttmp6, 0x18
|
||||
s_add_u32 ttmp8, tma_lo, ttmp6
|
||||
s_and_b32 ttmp9, tma_hi, 0xffff
|
||||
//0x68=104 bytes, which is the size of the buffer to
|
||||
//store all the level2 trap handler info
|
||||
s_or_b32 ttmp9, ttmp9, 0x00680000
|
||||
s_mov_b32 ttmp10, 0x00002000
|
||||
s_mov_b32 ttmp11, 0x00024fac
|
||||
|
||||
//===================================================
|
||||
// backup the TMA values to be restored later
|
||||
// level-one TMA saved in the ttmp6,ttmp7
|
||||
s_mov_b32 ttmp6, tma_lo
|
||||
s_mov_b32 ttmp7, tma_hi
|
||||
|
||||
//===================================================
|
||||
// setup the TMA for the level-two trap handler
|
||||
// level-two TMA saved in tma_hi, tma_lo
|
||||
s_mov_b32 ttmp3, s0
|
||||
s_buffer_load_dword s0, ttmp8, 0x2 // VI: offset=0x8 (bytes)
|
||||
s_waitcnt 0x0000
|
||||
s_mov_b32 tma_lo, s0
|
||||
|
||||
s_buffer_load_dword s0, ttmp8, 0x3 // VI: offset=0xc (bytes)
|
||||
s_waitcnt 0x0000
|
||||
s_mov_b32 tma_hi, s0
|
||||
|
||||
//===================================================
|
||||
// setup the TBA for the level-two trap handler
|
||||
// level-two TBA saved in ttmp9, ttmp8
|
||||
s_buffer_load_dword s0, ttmp8, 0x0 // VI: offset=0x0 (bytes)
|
||||
s_waitcnt 0x0000
|
||||
s_mov_b32 ttmp2, s0
|
||||
|
||||
s_buffer_load_dword s0, ttmp8, 0x1 // VI: offset=0x4 (bytes)
|
||||
s_waitcnt 0x0000
|
||||
|
||||
//swap the values of s0 and ttmp3 without using other registers
|
||||
s_xor_b32 ttmp3, s0, ttmp3
|
||||
s_xor_b32 s0, s0, ttmp3
|
||||
s_xor_b32 ttmp3, s0, ttmp3
|
||||
|
||||
//store the debug trap handler start address in ttmp8,9
|
||||
s_mov_b32 ttmp8, ttmp2
|
||||
s_mov_b32 ttmp9, ttmp3
|
||||
|
||||
//===================================================
|
||||
// get the pc value to resume execution
|
||||
s_getpc_b64 [ttmp2, ttmp3]
|
||||
s_add_u32 ttmp2, ttmp2, 0x8
|
||||
|
||||
//===================================================
|
||||
//set the pc value to jump to the debug trap handler
|
||||
s_setpc_b64 [ttmp8, ttmp9]
|
||||
|
||||
//===================================================
|
||||
// restore the tamp values
|
||||
s_mov_b32 tma_hi, ttmp7
|
||||
s_mov_b32 tma_lo, ttmp6
|
||||
|
||||
label_return:
|
||||
//===================================================
|
||||
// return from the trap handler to the saved PC
|
||||
s_and_b32 ttmp1, ttmp1, 0xffff
|
||||
s_rfe_b64 [ttmp0,ttmp1]
|
||||
|
||||
end
|
||||
|
||||
*******************************************************************************/
|
||||
|
||||
/// shader codes with "asic(TAHITI)" instruction
|
||||
static constexpr uint32_t RuntimeTrapCode[] = {
|
||||
0x7e008200, 0xbf8c0000, 0xbef8036c, 0x8779ff6d, 0x0000ffff, 0x8879ff79, 0x01000000, 0xbefa03ff,
|
||||
0x00002000, 0xbefb03ff, 0x00024fac, 0x80f8ff78, 0x00000100, 0xbef70300, 0xc2007900, 0xbf8c0000,
|
||||
0xbeee0300, 0xc2007901, 0xbf8c0000, 0xbeef0300, 0xbe800377, 0xbef60398, 0x8078766e, 0x8779ff6f,
|
||||
0x0000ffff, 0x8879ff79, 0x00680000, 0xbefa03ff, 0x00002000, 0xbefb03ff, 0x00024fac, 0xbef6036e,
|
||||
0xbef7036f, 0xbef30300, 0xc2007902, 0xbf8c0000, 0xbeee0300, 0xc2007903, 0xbf8c0000, 0xbeef0300,
|
||||
0xc2007900, 0xbf8c0000, 0xbef20300, 0xc2007901, 0xbf8c0000, 0x89737300, 0x89007300, 0x89737300,
|
||||
0xbef80372, 0xbef90373, 0xbef21f00, 0x80728872, 0xbe802078, 0xbeef0377, 0xbeee0376, 0x8771ff71,
|
||||
0x0000ffff, 0xbe802270};
|
||||
|
||||
|
||||
/// shader codes with "asic(VI)" instruction
|
||||
static constexpr uint32_t RuntimeTrapCodeVi[] = {
|
||||
0x7e006a00, 0xbf8c0000, 0xbef8006c, 0x8679ff6d, 0x0000ffff, 0x8779ff79, 0x01000000, 0xbefa00ff,
|
||||
0x00002000, 0xbefb00ff, 0x00024fac, 0x80f8ff78, 0x00000100, 0xbef70000, 0xc022003c, 0x00000000,
|
||||
0xbf8c0000, 0xbeee0000, 0xc022003c, 0x00000004, 0xbf8c0000, 0xbeef0000, 0xbe800077, 0xbef60098,
|
||||
0x8078766e, 0x8679ff6f, 0x0000ffff, 0x8779ff79, 0x00680000, 0xbefa00ff, 0x00002000, 0xbefb00ff,
|
||||
0x00024fac, 0xbef6006e, 0xbef7006f, 0xbef30000, 0xc022003c, 0x00000008, 0xbf8c0000, 0xbeee0000,
|
||||
0xc022003c, 0x0000000c, 0xbf8c0000, 0xbeef0000, 0xc022003c, 0x00000000, 0xbf8c0000, 0xbef20000,
|
||||
0xc022003c, 0x00000004, 0xbf8c0000, 0x88737300, 0x88007300, 0x88737300, 0xbef80072, 0xbef90073,
|
||||
0xbef21c00, 0x80728872, 0xbe801d78, 0xbeef0077, 0xbeee0076, 0x8671ff71, 0x0000ffff, 0xbe801f70};
|
||||
@@ -30,7 +30,6 @@
|
||||
#include "device/pal/palthreadtrace.hpp"
|
||||
#include "device/pal/paltimestamp.hpp"
|
||||
#include "device/pal/palblit.hpp"
|
||||
#include "device/pal/paldebugger.hpp"
|
||||
#include "device/appprofile.hpp"
|
||||
#include "device/devhostcall.hpp"
|
||||
#include "hsa.h"
|
||||
@@ -1024,11 +1023,6 @@ bool VirtualGPU::create(bool profiling, uint deviceQueueSize, uint rtCUs,
|
||||
// Fall through ...
|
||||
case Settings::BlitEngineCAL:
|
||||
case Settings::BlitEngineKernel:
|
||||
// use host blit for HW debug
|
||||
if (dev().settings().enableHwDebug_) {
|
||||
blitSetup.disableCopyImageToBuffer_ = true;
|
||||
blitSetup.disableCopyBufferToImage_ = true;
|
||||
}
|
||||
blitMgr_ = new KernelBlitManager(*this, blitSetup);
|
||||
break;
|
||||
}
|
||||
@@ -3719,141 +3713,6 @@ void VirtualGPU::writeVQueueHeader(VirtualGPU& hostQ, const Memory* kernelTable)
|
||||
virtualQueue_->writeRawData(hostQ, 0, sizeof(AmdVQueueHeader), vqHeader_, true);
|
||||
}
|
||||
|
||||
void VirtualGPU::buildKernelInfo(const HSAILKernel& hsaKernel, hsa_kernel_dispatch_packet_t* aqlPkt,
|
||||
HwDbgKernelInfo& kernelInfo, amd::Event* enqueueEvent) {
|
||||
#if defined(WITH_COMPILER_LIB)
|
||||
amd::HwDebugManager* dbgManager = dev().hwDebugMgr();
|
||||
assert(dbgManager && "No HW Debug Manager!");
|
||||
|
||||
// Initialize structure with default values
|
||||
|
||||
if (hsaKernel.prog().maxScratchRegs() > 0) {
|
||||
pal::Memory* scratchBuf = dev().scratch(hwRing())->memObj_;
|
||||
kernelInfo.scratchBufAddr = scratchBuf->vmAddress();
|
||||
kernelInfo.scratchBufferSizeInBytes = scratchBuf->size();
|
||||
|
||||
// Get the address of the scratch buffer and its size for CPU access
|
||||
address scratchRingAddr = static_cast<address>(scratchBuf->map(nullptr, 0));
|
||||
dbgManager->setScratchRing(scratchRingAddr, scratchBuf->size());
|
||||
scratchBuf->unmap(nullptr);
|
||||
} else {
|
||||
kernelInfo.scratchBufAddr = 0;
|
||||
kernelInfo.scratchBufferSizeInBytes = 0;
|
||||
dbgManager->setScratchRing(nullptr, 0);
|
||||
}
|
||||
|
||||
//! @todo: need to verify what is wanted for the global memory
|
||||
Unimplemented();
|
||||
kernelInfo.heapBufAddr = 0;
|
||||
|
||||
kernelInfo.pAqlDispatchPacket = aqlPkt;
|
||||
kernelInfo.pAqlQueuePtr = reinterpret_cast<void*>(hsaQueueMem_->vmAddress());
|
||||
|
||||
// Get the address of the kernel code and its size for CPU access
|
||||
/* pal::Memory* aqlCode = hsaKernel.gpuAqlCode();
|
||||
if (nullptr != aqlCode) {
|
||||
address aqlCodeAddr = static_cast<address>(aqlCode->map(nullptr, 0));
|
||||
dbgManager->setKernelCodeInfo(aqlCodeAddr, hsaKernel.aqlCodeSize());
|
||||
aqlCode->unmap(nullptr);
|
||||
}
|
||||
else {
|
||||
dbgManager->setKernelCodeInfo(nullptr, 0);
|
||||
}
|
||||
*/
|
||||
kernelInfo.trapPresent = false;
|
||||
kernelInfo.trapHandler = nullptr;
|
||||
kernelInfo.trapHandlerBuffer = nullptr;
|
||||
|
||||
kernelInfo.excpEn = 0;
|
||||
kernelInfo.cacheDisableMask = 0;
|
||||
kernelInfo.sqDebugMode = 0;
|
||||
|
||||
kernelInfo.mgmtSe0Mask = 0xFFFFFFFF;
|
||||
kernelInfo.mgmtSe1Mask = 0xFFFFFFFF;
|
||||
|
||||
// set kernel info for HW debug and call the callback function
|
||||
if (nullptr != dbgManager->preDispatchCallBackFunc()) {
|
||||
DebugToolInfo dbgSetting = {0};
|
||||
dbgSetting.scratchAddress_ = kernelInfo.scratchBufAddr;
|
||||
dbgSetting.scratchSize_ = kernelInfo.scratchBufferSizeInBytes;
|
||||
dbgSetting.globalAddress_ = kernelInfo.heapBufAddr;
|
||||
dbgSetting.aclBinary_ = hsaKernel.prog().binaryElf();
|
||||
dbgSetting.event_ = enqueueEvent;
|
||||
|
||||
// Execute the pre-dispatch call back function
|
||||
dbgManager->executePreDispatchCallBack(reinterpret_cast<void*>(aqlPkt), &dbgSetting);
|
||||
|
||||
// assign the debug TMA and TBA for kernel dispatch
|
||||
if (nullptr != dbgSetting.trapHandler_ && nullptr != dbgSetting.trapBuffer_) {
|
||||
assignDebugTrapHandler(dbgSetting, kernelInfo);
|
||||
}
|
||||
|
||||
kernelInfo.trapPresent = (kernelInfo.trapHandler) ? true : false;
|
||||
|
||||
// Exception policy
|
||||
kernelInfo.excpEn = dbgSetting.exceptionMask_;
|
||||
kernelInfo.cacheDisableMask = dbgSetting.cacheDisableMask_;
|
||||
kernelInfo.sqDebugMode = dbgSetting.gpuSingleStepMode_;
|
||||
|
||||
// Compute the mask for reserved CUs. These two dwords correspond to
|
||||
// two registers used for reserving CUs for display. In the current
|
||||
// implementation, the number of CUs reserved can be 0 to 7, and it
|
||||
// is set by debugger users.
|
||||
if (dbgSetting.monitorMode_) {
|
||||
uint32_t i = dbgSetting.reservedCuNum_ / 2;
|
||||
kernelInfo.mgmtSe0Mask <<= i;
|
||||
i = dbgSetting.reservedCuNum_ - i;
|
||||
kernelInfo.mgmtSe1Mask <<= i;
|
||||
}
|
||||
Unimplemented();
|
||||
/*
|
||||
// flush/invalidate the instruction, data, L1 and L2 caches
|
||||
InvalidateSqCaches();
|
||||
*/
|
||||
}
|
||||
#endif
|
||||
}
|
||||
|
||||
void VirtualGPU::assignDebugTrapHandler(const DebugToolInfo& dbgSetting,
|
||||
HwDbgKernelInfo& kernelInfo) {
|
||||
// setup the runtime trap handler code and trap buffer to be assigned before kernel dispatching
|
||||
//
|
||||
Memory* rtTrapHandlerMem = static_cast<Memory*>(dev().hwDebugMgr()->runtimeTBA());
|
||||
Memory* rtTrapBufferMem = static_cast<Memory*>(dev().hwDebugMgr()->runtimeTMA());
|
||||
|
||||
kernelInfo.trapHandler = reinterpret_cast<void*>(rtTrapHandlerMem->vmAddress() + TbaStartOffset);
|
||||
// With the TMA corruption hw bug workaround, the trap handler buffer can be set to zero.
|
||||
// However, by setting the runtime trap buffer (TMA) correct, the runtime trap hander
|
||||
// without the workaround can still function correctly.
|
||||
kernelInfo.trapHandlerBuffer = reinterpret_cast<void*>(rtTrapBufferMem->vmAddress());
|
||||
|
||||
address rtTrapBufferAddress = static_cast<address>(rtTrapBufferMem->map(this));
|
||||
|
||||
Memory* trapHandlerMem = dev().getGpuMemory(dbgSetting.trapHandler_);
|
||||
Memory* trapBufferMem = dev().getGpuMemory(dbgSetting.trapBuffer_);
|
||||
|
||||
// Address of the trap handler code/buffer should be 256-byte aligned
|
||||
uint64_t tbaAddress = trapHandlerMem->vmAddress();
|
||||
uint64_t tmaAddress = trapBufferMem->vmAddress();
|
||||
if ((tbaAddress & 0xFF) != 0 || (tmaAddress & 0xFF) != 0) {
|
||||
assert(false && "Trap handler/buffer is not 256-byte aligned");
|
||||
}
|
||||
|
||||
// The addresses of the debug trap handler code (TBA) and buffer (TMA) are
|
||||
// stored in the runtime trap handler buffer with offset location of 0x18-19
|
||||
// and 0x20-21, respectively.
|
||||
uint64_t* rtTmaPtr = reinterpret_cast<uint64_t*>(rtTrapBufferAddress + 0x18);
|
||||
rtTmaPtr[0] = tbaAddress;
|
||||
rtTmaPtr[1] = tmaAddress;
|
||||
|
||||
rtTrapBufferMem->unmap(nullptr);
|
||||
// Add GPU mem handles to the memory list for VidMM
|
||||
addVmMemory(trapHandlerMem);
|
||||
addVmMemory(trapBufferMem);
|
||||
addVmMemory(rtTrapHandlerMem);
|
||||
addVmMemory(rtTrapBufferMem);
|
||||
}
|
||||
|
||||
bool VirtualGPU::validateSdmaOverlap(const Resource& src, const Resource& dst) {
|
||||
uint64_t srcVmEnd = src.vmAddress() + src.vmSize();
|
||||
if (((src.vmAddress() >= sdmaRange_.start_) && (src.vmAddress() <= sdmaRange_.end_)) ||
|
||||
|
||||
@@ -26,7 +26,6 @@
|
||||
#include "device/pal/palprintf.hpp"
|
||||
#include "device/pal/paltimestamp.hpp"
|
||||
#include "device/pal/palsched.hpp"
|
||||
#include "device/pal/paldebugger.hpp"
|
||||
#include "device/pal/palgpuopen.hpp"
|
||||
#include "platform/commandqueue.hpp"
|
||||
#include "device/blit.hpp"
|
||||
@@ -634,16 +633,6 @@ class VirtualGPU : public device::VirtualDevice {
|
||||
const amd::BufferRect& dstRect //!< region of destination for copy
|
||||
);
|
||||
|
||||
void buildKernelInfo(const HSAILKernel& hsaKernel, //!< hsa kernel
|
||||
hsa_kernel_dispatch_packet_t* aqlPkt, //!< aql packet for dispatch
|
||||
HwDbgKernelInfo& kernelInfo, //!< kernel info for the dispatch
|
||||
amd::Event* enqueueEvent //!< Event provided in the enqueue kernel command
|
||||
);
|
||||
|
||||
void assignDebugTrapHandler(const DebugToolInfo& dbgSetting, //!< debug settings
|
||||
HwDbgKernelInfo& kernelInfo //!< kernel info for the dispatch
|
||||
);
|
||||
|
||||
void PrintChildren(const HSAILKernel& hsaKernel, //!< The parent HSAIL kernel
|
||||
VirtualGPU* gpuDefQueue //!< Device queue for children execution
|
||||
);
|
||||
|
||||
@@ -154,8 +154,6 @@ release(uint, OCL_SET_SVM_SIZE, 4*16384, \
|
||||
"set SVM space size for discrete GPU") \
|
||||
debug(uint, OCL_SYSMEM_REQUIREMENT, 2, \
|
||||
"Use flag to change the minimum requirement of system memory not to downgrade") \
|
||||
debug(bool, GPU_ENABLE_HW_DEBUG, false, \
|
||||
"Enable HW DEBUG for GPU") \
|
||||
release(uint, GPU_WAVES_PER_SIMD, 0, \
|
||||
"Force the number of waves per SIMD (1-10)") \
|
||||
release(bool, GPU_WAVE_LIMIT_ENABLE, false, \
|
||||
|
||||
Reference in New Issue
Block a user