SWDEV-368308 - Remove HW debug extension

Change-Id: If0c68023c09f0dac9111d52ecc0ad63719aa4e70
This commit is contained in:
German
2022-11-17 16:27:21 -05:00
committed by German Andryeyev
orang tua 80d444b2f1
melakukan e5a36ab1ad
19 mengubah file dengan 4 tambahan dan 1428 penghapusan
-1
Melihat File
@@ -67,7 +67,6 @@ target_sources(rocclr PRIVATE
${ROCCLR_SRC_DIR}/device/devprogram.cpp
${ROCCLR_SRC_DIR}/device/devwavelimiter.cpp
${ROCCLR_SRC_DIR}/device/hsailctx.cpp
${ROCCLR_SRC_DIR}/device/hwdebug.cpp
${ROCCLR_SRC_DIR}/elf/elf.cpp
${ROCCLR_SRC_DIR}/os/alloc.cpp
${ROCCLR_SRC_DIR}/os/os_posix.cpp
-1
Melihat File
@@ -57,7 +57,6 @@ target_sources(rocclr PRIVATE
${ROCCLR_SRC_DIR}/device/pal/palblit.cpp
${ROCCLR_SRC_DIR}/device/pal/palconstbuf.cpp
${ROCCLR_SRC_DIR}/device/pal/palcounters.cpp
${ROCCLR_SRC_DIR}/device/pal/paldebugmanager.cpp
${ROCCLR_SRC_DIR}/device/pal/paldevice.cpp
${ROCCLR_SRC_DIR}/device/pal/paldeviced3d10.cpp
${ROCCLR_SRC_DIR}/device/pal/paldeviced3d11.cpp
-2
Melihat File
@@ -502,7 +502,6 @@ Device::Device()
online_(true),
activeWait_(false),
blitProgram_(nullptr),
hwDebugMgr_(nullptr),
context_(nullptr),
heap_buffer_(nullptr),
arena_mem_obj_(nullptr),
@@ -791,7 +790,6 @@ Settings::Settings() : value_(0) {
customHostAllocator_ = false;
waitCommand_ = AMD_OCL_WAIT_COMMAND;
supportDepthsRGB_ = false;
enableHwDebug_ = false;
commandQueues_ = 200; //!< Field value set to maximum number
//!< concurrent Virtual GPUs for default
+1 -16
Melihat File
@@ -37,7 +37,6 @@
#if defined(WITH_COMPILER_LIB)
#include "hsailctx.hpp"
#endif
#include "hwdebug.hpp"
#include "devsignal.hpp"
#if defined(__clang__)
@@ -94,7 +93,6 @@ class TransferBufferFileCommand;
class StreamOperationCommand;
class VirtualMapCommand;
class ExternalSemaphoreCmd;
class HwDebugManager;
class Isa;
class Device;
struct KernelParameterDescriptor;
@@ -630,7 +628,6 @@ class Settings : public amd::HeapObject {
uint customHostAllocator_ : 1; //!< True if device has custom host allocator
// that replaces generic OS allocation routines
uint supportDepthsRGB_ : 1; //!< Support DEPTH and sRGB channel order format
uint enableHwDebug_ : 1; //!< Enable HW debug support
uint reportFMAF_ : 1; //!< Report FP_FAST_FMAF define in CL program
uint reportFMA_ : 1; //!< Report FP_FAST_FMA define in CL program
uint singleFpDenorm_ : 1; //!< Support Single FP Denorm
@@ -644,7 +641,7 @@ class Settings : public amd::HeapObject {
uint enableCoopMultiDeviceGroups_ : 1; //!< Enable cooperative groups multi device
uint fenceScopeAgent_ : 1; //!< Enable fence scope agent in AQL dispatch packet
uint rocr_backend_ : 1; //!< Device uses ROCr backend for submissions
uint reserved_ : 10;
uint reserved_ : 11;
};
uint value_;
};
@@ -1844,17 +1841,6 @@ class Device : public RuntimeObject {
//! Returns app profile
static const AppProfile* appProfile() { return &appProfile_; }
//! Register a hardware debugger manager
HwDebugManager* hwDebugMgr() const { return hwDebugMgr_; }
//! Initialize the Hardware Debug Manager
virtual int32_t hwDebugManagerInit(amd::Context* context, uintptr_t messageStorage) {
return CL_SUCCESS;
}
//! Remove the Hardware Debug Manager
virtual void hwDebugManagerRemove() {}
//! Adds GPU memory to the VA cache list
void addVACache(device::Memory* memory) const;
@@ -1963,7 +1949,6 @@ class Device : public RuntimeObject {
BlitProgram* blitProgram_; //!< Blit program info
static AppProfile appProfile_; //!< application profile
HwDebugManager* hwDebugMgr_; //!< Hardware Debug manager
amd::Context* context_; //!< Context
static amd::Context* glb_ctx_; //!< Global context with all devices
-154
Melihat File
@@ -1,154 +0,0 @@
/* Copyright (c) 2014 - 2021 Advanced Micro Devices, Inc.
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in
all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
THE SOFTWARE. */
#include "hwdebug.hpp"
#include <iostream>
#include <sstream>
#include <fstream>
namespace amd {
class Device;
/*
***************************************************************************
* Implementation of GPU Debug Manager class
***************************************************************************
*/
//! Constructor of the debug manager class
HwDebugManager::HwDebugManager(amd::Device* device)
: context_(NULL),
device_(device),
preDispatchCallBackFunc_(NULL),
postDispatchCallBackFunc_(NULL),
preDispatchCallBackArgs_(NULL),
postDispatchCallBackArgs_(NULL),
paramMemory_(NULL),
numParams_(0),
aclBinary_(NULL),
aqlCodeAddr_(NULL),
aqlCodeSize_(0),
scratchRingAddr_(NULL),
scratchRingSize_(0),
isRegistered_(false),
runtimeTBA_(NULL),
runtimeTMA_(NULL) {
memset(&debugInfo_, 0, sizeof(debugInfo_));
for (int i = 0; i < kDebugTrapLocationMax; i++) {
rtTrapInfo_[i] = NULL;
}
}
HwDebugManager::~HwDebugManager() {
delete[] paramMemory_;
delete runtimeTMA_;
delete runtimeTBA_;
}
//! Setup the call back function pointer
void HwDebugManager::setCallBackFunctions(cl_PreDispatchCallBackFunctionAMD preDispatchFunction,
cl_PostDispatchCallBackFunctionAMD postDispatchFunction) {
preDispatchCallBackFunc_ = preDispatchFunction;
postDispatchCallBackFunc_ = postDispatchFunction;
}
//! Setup the call back argument pointers
void HwDebugManager::setCallBackArguments(void* preDispatchArgs, void* postDispatchArgs) {
preDispatchCallBackArgs_ = preDispatchArgs;
postDispatchCallBackArgs_ = postDispatchArgs;
}
//! Get dispatch debug info
void HwDebugManager::getDispatchDebugInfo(void* debugInfo) const {
memcpy(debugInfo, (void*)&debugInfo_, sizeof(DispatchDebugInfo));
}
//! Set the kernel code address and its size
void HwDebugManager::setKernelCodeInfo(address aqlCodeAddr, uint32_t aqlCodeSize) {
aqlCodeAddr_ = aqlCodeAddr;
aqlCodeSize_ = aqlCodeSize;
}
//! Get the scratch ring
void HwDebugManager::setScratchRing(address scratchRingAddr, uint32_t scratchRingSize) {
scratchRingAddr_ = scratchRingAddr;
scratchRingSize_ = scratchRingSize;
}
//! Map the scratch ring for host access
void HwDebugManager::mapScratchRing(uint64_t* scratchRingAddr, uint32_t* scratchRingSize) const {
*scratchRingAddr = reinterpret_cast<uint64_t>(scratchRingAddr_);
*scratchRingSize = scratchRingSize_;
}
void HwDebugManager::setExceptionPolicy(void* exceptionPolicy) {
memcpy(&excpPolicy_, exceptionPolicy, sizeof(cl_dbg_exception_policy_amd));
}
void HwDebugManager::getExceptionPolicy(void* exceptionPolicy) const {
memcpy(exceptionPolicy, &excpPolicy_, sizeof(cl_dbg_exception_policy_amd));
}
void HwDebugManager::setKernelExecutionMode(void* mode) {
cl_dbg_kernel_exec_mode_amd* execMode = reinterpret_cast<cl_dbg_kernel_exec_mode_amd*>(mode);
execMode_.ui32All = execMode->ui32All;
}
void HwDebugManager::getKernelExecutionMode(void* mode) const {
cl_dbg_kernel_exec_mode_amd* execMode = reinterpret_cast<cl_dbg_kernel_exec_mode_amd*>(mode);
execMode->ui32All = execMode_.ui32All;
}
void HwDebugManager::setAclBinary(void* aclBinary) { aclBinary_ = aclBinary; }
void HwDebugManager::allocParamMemList(uint32_t numParams) {
if (NULL != paramMemory_) {
delete[] paramMemory_;
}
numParams_ = numParams;
paramMemory_ = new amd::Memory*[numParams];
}
cl_mem HwDebugManager::getKernelParamMem(uint32_t paramIdx) const {
assert((paramIdx < numParams_) && "Invalid kernel parameter index too big");
return as_cl(paramMemory_[paramIdx]);
}
void HwDebugManager::assignKernelParamMem(uint32_t paramIdx, amd::Memory* mem) {
assert((paramIdx < numParams_) && "Invalid kernel parameter index too big");
paramMemory_[paramIdx] = mem;
}
void HwDebugManager::installTrap(cl_dbg_trap_type_amd trapType, amd::Memory* pTrapHandler,
amd::Memory* pTrapBuffer) {
rtTrapInfo_[trapType << 2] = pTrapHandler;
rtTrapInfo_[(trapType << 2) + 1] = pTrapBuffer;
}
} // namespace amd
-272
Melihat File
@@ -1,272 +0,0 @@
/* Copyright (c) 2014 - 2021 Advanced Micro Devices, Inc.
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in
all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
THE SOFTWARE. */
#ifndef HWDEBUG_H_
#define HWDEBUG_H_
#include "device.hpp"
#include "amdocl/cl_debugger_amd.h"
static constexpr int TbaStartOffset = 256;
static constexpr int RtTrapBufferWaveSize = 64;
static constexpr int RtTrapBufferSeNum = 4;
static constexpr int RtTrapBufferShNum = 2;
static constexpr int RtTrapBufferCuNum = 16;
static constexpr int RtTrapBufferSimdNum = 4;
static constexpr int RtTrapBufferWaveNum = 16;
static constexpr int RtTrapBufferTotalWaveNum =
((RtTrapBufferSeNum) * (RtTrapBufferShNum) * (RtTrapBufferCuNum) * (RtTrapBufferSimdNum) *
(RtTrapBufferWaveNum));
/*! \brief Debug trap handler location in the runtime trap buffer
*
* This enumeration is used to indicate the location where the debug
* trap handler and debug trap buffer are set in the device trap buffer.
*/
enum DebugTrapLocation {
kDebugTrapHandlerLocation = 0, //! Debug Trap handler location, this location must be 0
kDebugTrapBufferLocation = 1, //! Debug Trap buffer location, this location must be 1
kDebugTrapLocationMax = 2
};
/*! \brief This structure is for the debug info in each kernel dispatch.
*
* Contains the memory descriptor information of the scratch memory and the global
* memory
*/
struct DispatchDebugInfo {
uint32_t scratchMemoryDescriptor_[4]; //! Scratch memory descriptor
uint32_t globalMemoryDescriptor_[4]; //! Global memory descriptor
};
/*! \brief Trap handler descriptor
*
* The trap handler descriptor contains the details of a given trap handler.
*/
struct TrapHandlerInfo {
amd::Memory* trapHandler_; //!< Device memory for the trap handler
amd::Memory* trapBuffer_; //!< Device memory for the trap buffer
};
/*! \brief Structure of the runtime trap handler buffer, which includes the following
* information: information of the runtime trap handler and buffer, information of
* the level-2 trap handlers and buffers.
*/
struct RuntimeTrapInfo {
TrapHandlerInfo trap_; //!< Structure of the address of all trap handlers
uint32_t dispatchId_; //!< Dispatch ID that signals the shader event
uint32_t vgpr_backup_[RtTrapBufferTotalWaveNum][RtTrapBufferWaveSize];
//!< Buffer to backup the VGPR used by the runtime trap handler
};
/**
* Opaque pointer to trap event
*/
typedef uintptr_t DebugEvent;
namespace amd {
class Context;
class Device;
class HostQueue;
/*! \class HwDebugManager
*
* \brief The device interface class for the hardware debug manager
*/
class HwDebugManager {
public:
//! Constructor for the Hardware Debug Manager
HwDebugManager(amd::Device* device);
//! Destructor for Hardware Debug Manager
virtual ~HwDebugManager();
//! Setup the call back function pointer
void setCallBackFunctions(cl_PreDispatchCallBackFunctionAMD preDispatchFuncion,
cl_PostDispatchCallBackFunctionAMD postDispatchFunction);
//! Setup the call back argument pointers
void setCallBackArguments(void* preDispatchArgs, void* postDispatchArgs);
//! Get dispatch debug info
void getDispatchDebugInfo(void* debugInfo) const;
//! Set the kernel code address and its size
void setKernelCodeInfo(address aqlCodeAddr, uint32_t aqlCodeSize);
//! Get the scratch ring
void setScratchRing(address scratchRingAddr, uint32_t scratchRingSize);
//! Map the scratch ring for host access
void mapScratchRing(uint64_t* scratchRingAddr, uint32_t* scratchRingSize) const;
//! Retrieve the pre-dispatch callback function
cl_PreDispatchCallBackFunctionAMD preDispatchCallBackFunc() const {
return preDispatchCallBackFunc_;
}
//! Retrieve the post-dispatch callback function
cl_PostDispatchCallBackFunctionAMD postDispatchCallBackFunc() const {
return postDispatchCallBackFunc_;
}
//! Retrieve the pre-dispatch callback function arguments
void* preDispatchCallBackArgs() const { return preDispatchCallBackArgs_; }
//! Retrieve the post-dispatch callback function arguments
void* postDispatchCallBackArgs() const { return postDispatchCallBackArgs_; }
//! Retrieve the memory pointer of the runtime trap handler code
device::Memory* runtimeTBA() const { return runtimeTBA_; }
//! Retrieve the memory pointer of the runtime trap handler buffer
device::Memory* runtimeTMA() const { return runtimeTMA_; }
//! Set exception policy
void setExceptionPolicy(void* exceptionPolicy);
//! Get exception policy
void getExceptionPolicy(void* exceptionPolicy) const;
//! Set the kernel execution mode
void setKernelExecutionMode(void* mode);
//! Get the kernel execution mode
void getKernelExecutionMode(void* mode) const;
//! Setup the pointer to the aclBinary within the debug manager
void setAclBinary(void* aclBinary);
//! Allocate storage to keep the memory pointers of the kernel parameters
void allocParamMemList(uint32_t numParams);
//! Assign the kernel parameter memory
void assignKernelParamMem(uint32_t paramIdx, amd::Memory* mem);
//! Get kernel parameter memory object
cl_mem getKernelParamMem(uint32_t paramIdx) const;
//! Install trap handler
void installTrap(cl_dbg_trap_type_amd trapType, amd::Memory* pTrapHandler,
amd::Memory* pTrapBuffer);
//! Flush cache
virtual void flushCache(uint32_t mask) = 0;
//! Create the debug event
virtual DebugEvent createDebugEvent(const bool autoReset) = 0;
//! Wait for the debug event
virtual int32_t waitDebugEvent(DebugEvent pEvent, uint32_t timeOut) const = 0;
//! Destroy the debug event
virtual void destroyDebugEvent(DebugEvent* pEvent) = 0;
//! Register the debugger
virtual int32_t registerDebugger(amd::Context* context, uintptr_t pMessageStorage) = 0;
//! Unregister the debugger
virtual void unregisterDebugger() = 0;
//! Send the wavefront control cmmand
virtual void wavefrontControl(uint32_t waveAction, uint32_t waveMode, uint32_t trapId,
void* waveAddr) const = 0;
//! Set address watching point
virtual void setAddressWatch(uint32_t numWatchPoints, void** watchAddress, uint64_t* watchMask,
uint64_t* watchMode, DebugEvent* event) = 0;
//! Map the shader (AQL code) for host access
virtual void mapKernelCode(void* aqlCodeInfo) const = 0;
//! Get the packet information for dispatch
virtual void getPacketAmdInfo(const void* aqlCodeInfo, void* packetInfo) const = 0;
//! Set global memory values
virtual void setGlobalMemory(amd::Memory* memObj, uint32_t offset, void* srcPtr,
uint32_t size) = 0;
//! Execute the post-dispatch callback function
virtual void executePostDispatchCallBack() = 0;
//! Execute the pre-dispatch callback function
virtual void executePreDispatchCallBack(void* aqlPacket, void* toolInfo) = 0;
protected:
//! Return the context
const amd::Context* context() const { return context_; }
//! Get the debug device
const amd::Device* device() const { return device_; }
//! Return the register flag
bool isRegistered() const { return isRegistered_; }
protected:
const amd::Context* context_; ///< context that used to create host queue for the debugger
amd::Device* device_; ///< Device to run the debugger
cl_PreDispatchCallBackFunctionAMD preDispatchCallBackFunc_; //!< pre-dispatch callback function
cl_PostDispatchCallBackFunctionAMD
postDispatchCallBackFunc_; //!< post-dispatch callback function
void* preDispatchCallBackArgs_; //!< pre-dispatch callback function arguments
void* postDispatchCallBackArgs_; //!< post-dispatch callback function arguments
DispatchDebugInfo debugInfo_; //!< Debug setting/information for kernel dispatch
amd::Memory* rtTrapInfo_[kDebugTrapLocationMax]; //!< Device trap buffer, to store various trap
//!handlers on the device
amd::Memory** paramMemory_; //!< list of memory pointers for kernel parameters
uint32_t numParams_; //!< number of kernel parameters
void* aclBinary_; //!< ACL binary
address aqlCodeAddr_; //!< The mapped AQL code to allow host access
uint32_t aqlCodeSize_; //!< The size of the AQL code info
address scratchRingAddr_; //!< The mapped address of the scratch buffer
uint32_t scratchRingSize_; //!< The size of the scratch ring
bool isRegistered_; //! flag to indicate the debugger has been registered
cl_dbg_exception_policy_amd excpPolicy_; //!< exception policy
cl_dbg_kernel_exec_mode_amd execMode_; //!< kernel execution mode
RuntimeTrapInfo rtTrapHandlerInfo_; //!< Runtime trap information
//! Runtime Trap handler pointer (TBA) & its buffer (TMA)
device::Memory* runtimeTBA_; //! runtime trap handler pointer
device::Memory* runtimeTMA_; //! runtime trap handler buffer
};
/**@}*/
/**
* @}
*/
} // namespace amd
#endif // HWDEBUG_H_
-129
Melihat File
@@ -1,129 +0,0 @@
/* Copyright (c) 2015 - 2021 Advanced Micro Devices, Inc.
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in
all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
THE SOFTWARE. */
#pragma once
#include <cstddef>
#include <cstdint>
#include "hsa.h"
#include "amd_hsa_kernel_code.h"
#include "device/device.hpp"
#include "device/hwdebug.hpp"
#include "acl.h"
static constexpr int NumberReserveVgprs = 4;
namespace pal {
/**
* \defgroup Services_API OCL Runtime Services API
* @{
*/
/*! \brief Dispatch packet information
*
* This structure contains the packet information for kernel dispatch
*/
struct PacketAmdInfo {
uint32_t trapReservedVgprIndex_; //!< reserved VGPR index, -1 when they are not valid
uint32_t scratchBufferWaveOffset_; //!< scratch buffer wave offset, -1 when no scratch buffer
void* pointerToIsaBuffer_; //!< pointer to the buffer containing ISA
size_t sizeOfIsaBuffer_; //!< size of the ISA buffer
uint32_t numberOfVgprs_; //!< number of VGPRs used by the kernel
uint32_t numberOfSgprs_; //!< number of SGPRs used by the kernel
size_t sizeOfStaticGroupMemory_; //!< Static local memory used by the kernel
};
/*! \brief Cache mask for invalidation
*/
struct HwDbgGpuCacheMask {
HwDbgGpuCacheMask() : ui32All_(0) {}
HwDbgGpuCacheMask(uint32_t mask) : ui32All_(mask) {}
union {
struct {
uint32_t sqICache_ : 1; //!< Instruction cache
uint32_t sqKCache_ : 1; //!< Data cache
uint32_t tcL1_ : 1; //!< tcL1 cache
uint32_t tcL2_ : 1; //!< tcL2 cache
uint32_t reserved_ : 28;
};
uint32_t ui32All_;
};
};
/*! \brief Address watch information
*
* Information about each watch point - address, mask, mode and event
*/
struct HwDbgAddressWatch {
void* watchAddress_; //! The address of watch point
uint64_t watchMask_; //! The mask for watch point (lower 24 bits)
cl_dbg_address_watch_mode_amd watchMode_; //! The watch mode for this watch
DebugEvent event_; //! Event of the watch point (not used for now)
};
/*! \brief Runtime structure used to communicate debug information
* between Ocl services and core for a kernel dispatch.
*/
struct DebugToolInfo {
uint64_t scratchAddress_; //! Scratch memory address
size_t scratchSize_; //! Scratch memory size
uint64_t globalAddress_; //! Global memory address
uint32_t cacheDisableMask_; //! Cache mask, indicating caches disabled
uint32_t exceptionMask_; //! Exception mask
uint32_t reservedCuNum_; //! Number of reserved CUs for display,
//! which ranges from 0 to 7 in the current implementation.
bool monitorMode_; //! Debug or profiler mode
bool gpuSingleStepMode_; //! SQ debug mode
amd::Memory* trapHandler_; //! Trap handler address
amd::Memory* trapBuffer_; //! Trap buffer address
bool sqPerfcounterEnable_; //! whether SQ perf counters are enabled
aclBinary* aclBinary_; //! pointer of the kernel ACL binary
amd::Event* event_; //! pointer of the kernel event in the enqueue command
};
/*! \brief Message used by the KFD wave control for CI
*
* Structure indicates the various information used by the wave control function.
*/
struct HwDebugWaveAddr {
uint32_t VMID_ : 4; //! Virtual memory id
uint32_t wave_ : 4; //! Wave id
uint32_t SIMD_ : 2; //! SIMD id
uint32_t CU_ : 4; //! Compute unit
uint32_t SH_ : 1; //! Shader array
uint32_t SE_ : 1; //! Shader engine
};
/*! \brief Kernel code information
*
* This structure contains the pointer of mapped kernel code for host access
* and its size (in bytes)
*/
struct AqlCodeInfo {
amd_kernel_code_t* aqlCode_; //! pointer of AQL code to allow host access
uint32_t aqlCodeSize_; //! size of AQL code
};
/**@}*/
} // namespace pal
-366
Melihat File
@@ -1,366 +0,0 @@
/* Copyright (c) 2015 - 2021 Advanced Micro Devices, Inc.
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in
all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
THE SOFTWARE. */
#include "platform/commandqueue.hpp"
#include "device/device.hpp"
#include "device/pal/paldevice.hpp"
#include "device/pal/palmemory.hpp"
#include "device/pal/paltrap.hpp"
#include "device/pal/paldebugmanager.hpp"
#include <iostream>
#include <sstream>
#include <fstream>
namespace pal {
class VirtualGPU;
class Device;
class Memory;
/*
***************************************************************************
* Implementation of GPU Debug Manager class
***************************************************************************
*/
GpuDebugManager::GpuDebugManager(amd::Device* device)
: HwDebugManager(device),
vGpu_(nullptr),
debugMessages_(0),
addressWatch_(nullptr),
addressWatchSize_(0),
oclEventHandle_(nullptr) {
// Initialize the exception info and the kernel execution mode
excpPolicy_.exceptionMask = 0x0;
excpPolicy_.waveAction = CL_DBG_WAVES_RESUME;
excpPolicy_.hostAction = CL_DBG_HOST_IGNORE;
excpPolicy_.waveMode = CL_DBG_WAVEMODE_BROADCAST;
execMode_.ui32All = 0;
rtTrapHandlerInfo_.trap_.trapHandler_ = nullptr;
rtTrapHandlerInfo_.trap_.trapBuffer_ = nullptr;
aqlPacket_ = (hsa_kernel_dispatch_packet_t*)nullptr;
return;
}
GpuDebugManager::~GpuDebugManager() {
delete[] addressWatch_;
}
void GpuDebugManager::executePreDispatchCallBack(void* aqlPacket, void* toolInfo) {
DebugToolInfo* info = reinterpret_cast<DebugToolInfo*>(toolInfo);
aqlPacket_ = reinterpret_cast<hsa_kernel_dispatch_packet_t*>(aqlPacket);
Unimplemented();
// Only if the pre-dispatch callback is set, will we update cache
// flush configuration and build the memory descriptor.
if (nullptr != preDispatchCallBackFunc_) {
/*
// Build the scratch memory descriptor
device()->gslCtx()->BuildScratchBufferResource(debugInfo_.scratchMemoryDescriptor_,
info->scratchAddress_,
info->scratchSize_);
// Build the global memory descriptor
device()->gslCtx()->BuildHeapBufferResource(debugInfo_.globalMemoryDescriptor_,
info->globalAddress_);
*/
// // for invalidate cache (BuildEndOfKernelNotifyCommands)
// aqlPacket->release_fence_scope = 2;
aclBinary_ = reinterpret_cast<void*>(info->aclBinary_);
oclEventHandle_ = reinterpret_cast<void*>(as_cl(info->event_));
cl_device_id clDeviceId = as_cl(device_);
preDispatchCallBackFunc_(clDeviceId, oclEventHandle_, aqlPacket_, aclBinary_,
preDispatchCallBackArgs_);
}
// setup the trap handler information only if the debugger has been registered
if (isRegistered()) {
// Copy the various info set by the debugger/profiler to the tool info structure
setupTrapInformation(info);
}
}
void GpuDebugManager::executePostDispatchCallBack() {
if (nullptr != postDispatchCallBackFunc_) {
cl_device_id clDeviceId = as_cl(device_);
postDispatchCallBackFunc_(clDeviceId, aqlPacket_->completion_signal.handle,
postDispatchCallBackArgs_);
}
}
//! Map the kernel code for host access
void GpuDebugManager::mapKernelCode(void* aqlCodeInfo) const {
AqlCodeInfo* codeInfo = reinterpret_cast<AqlCodeInfo*>(aqlCodeInfo);
codeInfo->aqlCode_ = reinterpret_cast<amd_kernel_code_t*>(aqlCodeAddr_);
codeInfo->aqlCodeSize_ = aqlCodeSize_;
}
int32_t GpuDebugManager::registerDebugger(amd::Context* context, uintptr_t messageStorage) {
if (!device()->settings().enableHwDebug_) {
LogError("debugmanager: Register debugger error - HW DEBUG is not enable");
return CL_DEBUGGER_REGISTER_FAILURE_AMD;
}
// first time register - set the message storage, flush queue and enable hw debug
if (!isRegistered()) {
debugMessages_ = messageStorage;
Unimplemented();
/*
if (!device()->gslCtx()->registerHwDebugger(debugMessages_)) {
LogError("debugmanager: Register debugger failed");
return CL_OUT_OF_RESOURCES;
}
*/
isRegistered_ = true;
if (CL_SUCCESS != createRuntimeTrapHandler()) {
LogError("debugmanager: Create runtime trap handler failed");
return CL_OUT_OF_RESOURCES;
}
}
context_ = context;
return CL_SUCCESS;
}
void GpuDebugManager::unregisterDebugger() {
if (isRegistered()) {
// reset the debugger registration flag
isRegistered_ = false;
context_ = nullptr;
}
}
void GpuDebugManager::flushCache(uint32_t mask) {
HwDbgGpuCacheMask cacheMask(mask);
// device()->xferQueue()->flushCuCaches(cacheMask);
}
void GpuDebugManager::setupTrapInformation(DebugToolInfo* toolInfo) {
toolInfo->scratchAddress_ = 0;
toolInfo->scratchSize_ = 0;
toolInfo->globalAddress_ = 0;
toolInfo->sqPerfcounterEnable_ = false;
// Set up trap related info in the kernel info structure to be
// used in the kernel dispatch.
toolInfo->exceptionMask_ = excpPolicy_.exceptionMask;
toolInfo->gpuSingleStepMode_ = execMode_.gpuSingleStepMode;
toolInfo->monitorMode_ = execMode_.monitorMode;
// The order of these three bits is determined by the definition
// of the register COMPUTE_DISPATCH_INITIATOR
toolInfo->cacheDisableMask_ = ((execMode_.disableL1Scalar << 2) |
(execMode_.disableL2Cache << 1) | (execMode_.disableL1Vector));
toolInfo->reservedCuNum_ = execMode_.reservedCuNum;
toolInfo->trapHandler_ = rtTrapInfo_[kDebugTrapHandlerLocation];
toolInfo->trapBuffer_ = rtTrapInfo_[kDebugTrapBufferLocation];
}
void GpuDebugManager::getPacketAmdInfo(const void* aqlCodeInfo, void* packetInfo) const
{
const AqlCodeInfo* codeInfo = reinterpret_cast<const AqlCodeInfo*>(aqlCodeInfo);
const amd_kernel_code_t* hostAqlCode = codeInfo->aqlCode_;
PacketAmdInfo* packet = reinterpret_cast<PacketAmdInfo*>(packetInfo);
const amd_kernel_code_t* akc = hostAqlCode;
packet->numberOfSgprs_ = akc->wavefront_sgpr_count;
packet->numberOfVgprs_ = akc->workitem_vgpr_count;
// use mapped kernel_object_address for host accessing of ISA buffer
packet->pointerToIsaBuffer_ = (char*)(hostAqlCode) + akc->kernel_code_entry_byte_offset;
packet->scratchBufferWaveOffset_ = akc->debug_wavefront_private_segment_offset_sgpr;
packet->sizeOfIsaBuffer_ = codeInfo->aqlCodeSize_;
packet->sizeOfStaticGroupMemory_ = akc->workgroup_group_segment_byte_size;
// The trap_reserved_vgpr_index will be 4 less the original
// This value must be used only by the debugger
packet->trapReservedVgprIndex_ = akc->workitem_vgpr_count - NumberReserveVgprs;
}
DebugEvent GpuDebugManager::createDebugEvent(const bool autoReset) {
Unimplemented();
/*
// create the event object
osEventHandle shaderEvent = osEventCreate(!autoReset);
// event object has been created, set the initial state
if (shaderEvent != 0) {
osEventReset(shaderEvent); // initial state is non-signaled
if (device()->gslCtx()->exceptionNotification(shaderEvent)) {
return shaderEvent;
}
}
*/
return 0;
}
int32_t GpuDebugManager::waitDebugEvent(DebugEvent pEvent, uint32_t timeOut) const {
Unimplemented();
/*
if (osEventTimedWait(pEvent, timeOut)) {
return CL_SUCCESS;
}
else {
return CL_EVENT_TIMEOUT_AMD;
}
*/
return CL_SUCCESS;
}
void GpuDebugManager::destroyDebugEvent(DebugEvent* pEvent) {
Unimplemented();
/*
osEventDestroy(*pEvent);
*pEvent = 0;
device()->gslCtx()->exceptionNotification(0);
*/
}
void GpuDebugManager::wavefrontControl(uint32_t waveAction, uint32_t waveMode, uint32_t trapId,
void* waveAddr) const {
Unimplemented();
// device()->gslCtx()->executeSqCommand(waveAction, waveMode, trapId, waveAddr);
}
void GpuDebugManager::setAddressWatch(uint32_t numWatchPoints, void** watchAddress,
uint64_t* watchMask, uint64_t* watchMode, DebugEvent* event) {
size_t requiredSize = numWatchPoints * sizeof(HwDbgAddressWatch);
// previously allocated size is not big enough, allocate new memory
if (addressWatchSize_ < requiredSize) {
delete[] addressWatch_;
addressWatch_ = new HwDbgAddressWatch[numWatchPoints];
addressWatchSize_ = requiredSize;
}
// fill in the address watch structure
memset(addressWatch_, 0, addressWatchSize_);
for (uint32_t i = 0; i < numWatchPoints; i++) {
amd::Memory* watchMem = as_amd(reinterpret_cast<cl_mem>(watchAddress[i]));
Memory* watchMemAddress = device()->getGpuMemory(watchMem);
addressWatch_[i].watchAddress_ = reinterpret_cast<void*>(watchMemAddress->vmAddress());
addressWatch_[i].watchMask_ = watchMask[i];
addressWatch_[i].watchMode_ = (cl_dbg_address_watch_mode_amd)watchMode[i];
addressWatch_[i].event_ = (0 != event) ? event[i] : 0;
}
Unimplemented();
// setup the watch addresses
// device()->gslCtx()->setAddressWatch(numWatchPoints, (void*) addressWatch_);
}
void GpuDebugManager::setGlobalMemory(amd::Memory* memObj, uint32_t offset, void* srcPtr,
uint32_t size) {
Memory* globalMem = device()->getGpuMemory(memObj);
address mappedMem = static_cast<address>(globalMem->map(nullptr, 0));
assert(mappedMem != 0);
void* dest_ptr = reinterpret_cast<void*>(mappedMem + offset);
memcpy(dest_ptr, srcPtr, size);
globalMem->unmap(nullptr);
}
int32_t GpuDebugManager::createRuntimeTrapHandler() {
size_t codeSize = 0;
const uint32_t* rtTrapCode = nullptr;
if (device()->settings().viPlus_) {
codeSize = sizeof(RuntimeTrapCodeVi);
rtTrapCode = RuntimeTrapCodeVi;
} else {
codeSize = sizeof(RuntimeTrapCode);
rtTrapCode = RuntimeTrapCode;
}
uint32_t numCodes = codeSize / sizeof(uint32_t);
// Handle TMA corruption hw bug workaround -
// The trap handler buffer has extra 256 bytes allocated, the TMA address
// is stored in the first two DWORDs and the actual trap handler code
// is stored starting at the location of 256 bytes (TbaStartOffset).
//
// allocate memory for the runtime trap handler (TBA) + TMA address
uint32_t allocSize = codeSize + TbaStartOffset;
Memory* rtTBA = new Memory(*device(), allocSize);
runtimeTBA_ = rtTBA;
if ((rtTBA == nullptr) || !rtTBA->create(Resource::RemoteUSWC)) {
return CL_OUT_OF_RESOURCES;
}
address tbaAddress = reinterpret_cast<address>(rtTBA->map(nullptr));
// allocate buffer for the runtime trap handler buffer (TMA)
uint32_t tmaSize = 0x100;
Memory* rtTMA = new Memory(*device(), tmaSize);
runtimeTMA_ = rtTMA;
if ((rtTMA == nullptr) || !rtTMA->create(Resource::RemoteUSWC)) {
return CL_OUT_OF_RESOURCES;
}
uint64_t rtTmaAddress = rtTMA->vmAddress();
if ((rtTBA->vmAddress() & 0xFF) != 0 || (rtTmaAddress & 0xFF) != 0) {
LogError("debugmanager: Trap handler/buffer is not 256-byte aligned");
return CL_INVALID_VALUE;
}
// store the TMA address at the beginning of trap handler buffer
uint64_t* tbaStorage = reinterpret_cast<uint64_t*>(tbaAddress);
tbaStorage[0] = rtTmaAddress;
// save the trap handler code
uint32_t* trapHandlerPtr = (uint32_t*)(tbaAddress + TbaStartOffset);
for (uint32_t i = 0; i < numCodes; i++) {
trapHandlerPtr[i] = rtTrapCode[i];
}
rtTBA->unmap(nullptr);
return CL_SUCCESS;
}
} // namespace pal
-123
Melihat File
@@ -1,123 +0,0 @@
/* Copyright (c) 2015 - 2021 Advanced Micro Devices, Inc.
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in
all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
THE SOFTWARE. */
#pragma once
#include "device/pal/palvirtual.hpp"
#include "device/pal/paldebugger.hpp"
namespace pal {
class GpuDebugManager;
class Device;
class Memory;
/*! \brief Debug Manager Class
*
* The debug manager class is used to pass all the trap info to the
* kernel dispatch and then the kernel execution can use such trap information
* for kernel execution. This class contains the trap handler and shader event
* objects. The trap handler is setup by users and passed to the kernel dispatch.
* The shader event is to receive interrupts from the GPU and then users can
* perform various operations.
*
* This class also provides the interface for setting up the pre-dispatch
* callback functions used by the profiler and debugger. It also provides
* a way to retrieve various debug information for the kernel execution.
*
*/
class GpuDebugManager : public amd::HwDebugManager {
public:
//! Constructor of the debug manager class
GpuDebugManager(amd::Device* device);
//! Destructor of the debug manager class
~GpuDebugManager();
//! Get the single instance of the GpuDebugManager class
static GpuDebugManager* getDefaultInstance();
//! Destroy the GpuDebugManager class object
static void destroyInstances();
//! Flush cache
void flushCache(uint32_t mask);
//! Create the debug event
DebugEvent createDebugEvent(const bool autoReset);
//! Wait for the debug event
int32_t waitDebugEvent(DebugEvent pEvent, uint32_t timeOut) const;
//! Destroy the debug event
void destroyDebugEvent(DebugEvent* pEvent);
//! Register the debugger
int32_t registerDebugger(amd::Context* context, uintptr_t messageStorage);
//! Unregister the debugger
void unregisterDebugger();
//! Send the wavefront control cmmand
void wavefrontControl(uint32_t waveAction, uint32_t waveMode, uint32_t trapId,
void* waveAddr) const;
//! Set address watching point
void setAddressWatch(uint32_t numWatchPoints, void** watchAddress, uint64_t* watchMask,
uint64_t* watchMode, DebugEvent* pEvent);
//! Map the kernel code for host access
void mapKernelCode(void* aqlCodeInfo) const;
//! Get the packet information for dispatch
void getPacketAmdInfo(const void* aqlCodeInfo, void* packetInfo) const;
//! Set global memory values
void setGlobalMemory(amd::Memory* memObj, uint32_t offset, void* srcPtr, uint32_t size);
//! Execute the post-dispatch callback function
void executePostDispatchCallBack();
//! Execute the pre-dispatch callback function
void executePreDispatchCallBack(void* aqlPacket, void* toolInfo);
protected:
const VirtualGPU* vGpu() const { return vGpu_; }
private:
//! Setup trap handler info for kernel execution
void setupTrapInformation(DebugToolInfo* toolInfo);
//! Create runtime trap handler
int32_t createRuntimeTrapHandler();
const pal::Device* device() const { return reinterpret_cast<const pal::Device*>(device_); }
VirtualGPU* vGpu_; //!< the virtual GPU
uintptr_t debugMessages_; //!< Pointer to a SHARED_DEBUG_MESSAGES pass to the KMD
HwDbgAddressWatch* addressWatch_; //!< Address watch data
size_t addressWatchSize_; //!< Size of address watch data
//! Arguments used by the callback function
void* oclEventHandle_; //!< event handler
const hsa_kernel_dispatch_packet_t* aqlPacket_; //!< AQL packet
};
} // namespace pal
-16
Melihat File
@@ -41,22 +41,6 @@ enum PalGpuMemoryType {
PAL_TEXTURE_BUFFER, ///< "buffer" texture inside VBO
};
struct HwDbgKernelInfo {
uint64_t scratchBufAddr; ///< Handle of GPU local memory for kernel private scratch space
size_t scratchBufferSizeInBytes; ///< size of memory pointed to by pScratchBuffer,
uint64_t heapBufAddr; ///< Address of the global heap base
const void* pAqlDispatchPacket; ///< Pointer to the dispatch packet
const void* pAqlQueuePtr; ///< pointer to the AQL Queue
void* trapHandler; ///< address of the trap handler (TBA)
void* trapHandlerBuffer; ///< address of the trap handler buffer (TMA)
uint32_t excpEn; ///< exception mask
bool trapPresent; ///< trap present flag
bool sqDebugMode; ///< debug mode flag (GPU single step mode)
uint32_t mgmtSe0Mask; ///< mask for SE0 (reserving CU for display)
uint32_t mgmtSe1Mask; ///< mask for SE1 (reserving CU for display)
uint32_t cacheDisableMask; ///< cache disable mask
};
//! Engine types
enum EngineType { MainEngine = 0, SdmaEngine, AllEngines };
-21
Melihat File
@@ -31,7 +31,6 @@
#include "device/pal/palprogram.hpp"
#include "device/pal/palsettings.hpp"
#include "device/pal/palblit.hpp"
#include "device/pal/paldebugmanager.hpp"
#include "palLib.h"
#include "palPlatform.h"
#include "palDevice.h"
@@ -761,10 +760,6 @@ Device::Device()
rgpCaptureMgr_(nullptr) {}
Device::~Device() {
// remove the HW debug manager
delete hwDebugMgr_;
hwDebugMgr_ = nullptr;
if (p2p_stage_ != nullptr) {
p2p_stage_->release();
p2p_stage_ = nullptr;
@@ -1025,11 +1020,6 @@ bool Device::create(Pal::IDevice* device) {
return false;
}
// create the HW debug manager if needed
if (settings().enableHwDebug_) {
hwDebugMgr_ = new GpuDebugManager(this);
}
if ((glb_ctx_ == nullptr) && (gNumDevices > 1) && (device == gDeviceList[gNumDevices - 1])) {
std::vector<amd::Device*> devices;
uint32_t numDevices = amd::Device::numDevices(CL_DEVICE_TYPE_GPU, true);
@@ -2511,17 +2501,6 @@ void Device::SrdManager::fillResourceList(VirtualGPU& gpu) {
}
}
int32_t Device::hwDebugManagerInit(amd::Context* context, uintptr_t messageStorage) {
int32_t status = hwDebugMgr_->registerDebugger(context, messageStorage);
if (CL_SUCCESS != status) {
delete hwDebugMgr_;
hwDebugMgr_ = nullptr;
}
return status;
}
bool Device::SetClockMode(const cl_set_device_clock_mode_input_amd setClockModeInput,
cl_set_device_clock_mode_output_amd* pSetClockModeOutput) {
Pal::SetClockModeInput setClockMode = {};
-3
Melihat File
@@ -533,9 +533,6 @@ class Device : public NullDevice {
//! Returns SRD manger object
SrdManager& srds() const { return *srdManager_; }
//! Initial the Hardware Debug Manager
int32_t hwDebugManagerInit(amd::Context* context, uintptr_t messageStorage);
//! Returns PAL device properties
const Pal::DeviceProperties& properties() const { return properties_; }
+1 -1
Melihat File
@@ -88,7 +88,7 @@ bool Segment::alloc(HSAILProgram& prog, amdgpu_hsa_elf_segment_t segment, size_t
}
if (!amd_mem_obj->create(nullptr)) {
LogError("[OCL] failed to create a svm hidden buffer!");
LogError("[OCL] fisThreadAliveailed to create a svm hidden buffer!");
amd_mem_obj->release();
return false;
}
+2 -5
Melihat File
@@ -1157,17 +1157,14 @@ bool Resource::create(MemoryType memType, CreateParams* params, bool forceLinear
} else {
memType = Local;
}
// force to use remote memory for HW DEBUG or use
// local memory once we determine if FGS is supported
// memType = (!dev().settings().enableHwDebug_) ? Local : RemoteUSWC;
}
// Get the element size
elementSize_ = Pal::Formats::BytesPerPixel(format);
desc_.type_ = memType;
if (memType == Scratch) {
// use local memory for scratch buffer unless it is using HW DEBUG
desc_.type_ = (!dev().settings().enableHwDebug_) ? Local : RemoteUSWC;
// use local memory for scratch buffer
desc_.type_ = Local;
desc_.scratch_ = true;
}
-5
Melihat File
@@ -509,11 +509,6 @@ bool Settings::create(const Pal::DeviceProperties& palProp,
enableExtension(ClKhrMipMapImageWrites);
}
// Enable HW debug
if (GPU_ENABLE_HW_DEBUG) {
enableHwDebug_ = true;
}
#if defined(_WIN32)
enableExtension(ClAmdPlanarYuv);
#endif
-159
Melihat File
@@ -1,159 +0,0 @@
/* Copyright (c) 2015 - 2021 Advanced Micro Devices, Inc.
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in
all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
THE SOFTWARE. */
/*******************************************************************************
* The source of the runtime trap handler, "runtimetraphandler.sp3".
* The binary is created by the SP3 tool with the following command:
*
* sp3.exe runtimetraphandler.sp3 -hex runtimeTrapCode.hex
*
*******************************************************************************
shader main
asic(TAHITI) // for SI/CI or asic(VI) for VI
type(CS)
// clear wave exception state
v_clrexcp
s_waitcnt 0
//==========================================================================
// Handle the workaround for HW bug that causes the incorrect TMA value.
// Retrieve the TMA values, which are stored at TBA buffer at location
// 256 (0x100).
// Construct the memory descriptor with TBA as the start address
// we are using the registers ttmp[8:11] for that.
s_mov_b32 ttmp8, tba_lo
s_and_b32 ttmp9, tba_hi, 0xffff
// 0x100=256 bytes, which is the size of the buffer to
// store all the level 2 trap handler info
s_or_b32 ttmp9, ttmp9, 0x01000000
s_mov_b32 ttmp10, 0x00002000
s_mov_b32 ttmp11, 0x00024fac
// TMA is stored 256 (0x100) bytes before the TBA value
s_sub_u32 ttmp8, ttmp8, 0x100
// Backup the s0 since ttmp registers cannot be target of
// buffer read instruction
s_mov_b32 ttmp7, s0
s_buffer_load_dword s0, ttmp8, 0x0 // VI: offset=0x0 (bytes)
s_waitcnt 0
s_mov_b32 tma_lo, s0
s_buffer_load_dword s0, ttmp8, 0x1 // VI: offset=0x4 (bytes)
s_waitcnt 0
s_mov_b32 tma_hi, s0
s_mov_b32 s0, ttmp7
//===================================================
// setup the mmeory descriptor for TMA
s_mov_b32 ttmp6, 0x18
s_add_u32 ttmp8, tma_lo, ttmp6
s_and_b32 ttmp9, tma_hi, 0xffff
//0x68=104 bytes, which is the size of the buffer to
//store all the level2 trap handler info
s_or_b32 ttmp9, ttmp9, 0x00680000
s_mov_b32 ttmp10, 0x00002000
s_mov_b32 ttmp11, 0x00024fac
//===================================================
// backup the TMA values to be restored later
// level-one TMA saved in the ttmp6,ttmp7
s_mov_b32 ttmp6, tma_lo
s_mov_b32 ttmp7, tma_hi
//===================================================
// setup the TMA for the level-two trap handler
// level-two TMA saved in tma_hi, tma_lo
s_mov_b32 ttmp3, s0
s_buffer_load_dword s0, ttmp8, 0x2 // VI: offset=0x8 (bytes)
s_waitcnt 0x0000
s_mov_b32 tma_lo, s0
s_buffer_load_dword s0, ttmp8, 0x3 // VI: offset=0xc (bytes)
s_waitcnt 0x0000
s_mov_b32 tma_hi, s0
//===================================================
// setup the TBA for the level-two trap handler
// level-two TBA saved in ttmp9, ttmp8
s_buffer_load_dword s0, ttmp8, 0x0 // VI: offset=0x0 (bytes)
s_waitcnt 0x0000
s_mov_b32 ttmp2, s0
s_buffer_load_dword s0, ttmp8, 0x1 // VI: offset=0x4 (bytes)
s_waitcnt 0x0000
//swap the values of s0 and ttmp3 without using other registers
s_xor_b32 ttmp3, s0, ttmp3
s_xor_b32 s0, s0, ttmp3
s_xor_b32 ttmp3, s0, ttmp3
//store the debug trap handler start address in ttmp8,9
s_mov_b32 ttmp8, ttmp2
s_mov_b32 ttmp9, ttmp3
//===================================================
// get the pc value to resume execution
s_getpc_b64 [ttmp2, ttmp3]
s_add_u32 ttmp2, ttmp2, 0x8
//===================================================
//set the pc value to jump to the debug trap handler
s_setpc_b64 [ttmp8, ttmp9]
//===================================================
// restore the tamp values
s_mov_b32 tma_hi, ttmp7
s_mov_b32 tma_lo, ttmp6
label_return:
//===================================================
// return from the trap handler to the saved PC
s_and_b32 ttmp1, ttmp1, 0xffff
s_rfe_b64 [ttmp0,ttmp1]
end
*******************************************************************************/
/// shader codes with "asic(TAHITI)" instruction
static constexpr uint32_t RuntimeTrapCode[] = {
0x7e008200, 0xbf8c0000, 0xbef8036c, 0x8779ff6d, 0x0000ffff, 0x8879ff79, 0x01000000, 0xbefa03ff,
0x00002000, 0xbefb03ff, 0x00024fac, 0x80f8ff78, 0x00000100, 0xbef70300, 0xc2007900, 0xbf8c0000,
0xbeee0300, 0xc2007901, 0xbf8c0000, 0xbeef0300, 0xbe800377, 0xbef60398, 0x8078766e, 0x8779ff6f,
0x0000ffff, 0x8879ff79, 0x00680000, 0xbefa03ff, 0x00002000, 0xbefb03ff, 0x00024fac, 0xbef6036e,
0xbef7036f, 0xbef30300, 0xc2007902, 0xbf8c0000, 0xbeee0300, 0xc2007903, 0xbf8c0000, 0xbeef0300,
0xc2007900, 0xbf8c0000, 0xbef20300, 0xc2007901, 0xbf8c0000, 0x89737300, 0x89007300, 0x89737300,
0xbef80372, 0xbef90373, 0xbef21f00, 0x80728872, 0xbe802078, 0xbeef0377, 0xbeee0376, 0x8771ff71,
0x0000ffff, 0xbe802270};
/// shader codes with "asic(VI)" instruction
static constexpr uint32_t RuntimeTrapCodeVi[] = {
0x7e006a00, 0xbf8c0000, 0xbef8006c, 0x8679ff6d, 0x0000ffff, 0x8779ff79, 0x01000000, 0xbefa00ff,
0x00002000, 0xbefb00ff, 0x00024fac, 0x80f8ff78, 0x00000100, 0xbef70000, 0xc022003c, 0x00000000,
0xbf8c0000, 0xbeee0000, 0xc022003c, 0x00000004, 0xbf8c0000, 0xbeef0000, 0xbe800077, 0xbef60098,
0x8078766e, 0x8679ff6f, 0x0000ffff, 0x8779ff79, 0x00680000, 0xbefa00ff, 0x00002000, 0xbefb00ff,
0x00024fac, 0xbef6006e, 0xbef7006f, 0xbef30000, 0xc022003c, 0x00000008, 0xbf8c0000, 0xbeee0000,
0xc022003c, 0x0000000c, 0xbf8c0000, 0xbeef0000, 0xc022003c, 0x00000000, 0xbf8c0000, 0xbef20000,
0xc022003c, 0x00000004, 0xbf8c0000, 0x88737300, 0x88007300, 0x88737300, 0xbef80072, 0xbef90073,
0xbef21c00, 0x80728872, 0xbe801d78, 0xbeef0077, 0xbeee0076, 0x8671ff71, 0x0000ffff, 0xbe801f70};
-141
Melihat File
@@ -30,7 +30,6 @@
#include "device/pal/palthreadtrace.hpp"
#include "device/pal/paltimestamp.hpp"
#include "device/pal/palblit.hpp"
#include "device/pal/paldebugger.hpp"
#include "device/appprofile.hpp"
#include "device/devhostcall.hpp"
#include "hsa.h"
@@ -1024,11 +1023,6 @@ bool VirtualGPU::create(bool profiling, uint deviceQueueSize, uint rtCUs,
// Fall through ...
case Settings::BlitEngineCAL:
case Settings::BlitEngineKernel:
// use host blit for HW debug
if (dev().settings().enableHwDebug_) {
blitSetup.disableCopyImageToBuffer_ = true;
blitSetup.disableCopyBufferToImage_ = true;
}
blitMgr_ = new KernelBlitManager(*this, blitSetup);
break;
}
@@ -3719,141 +3713,6 @@ void VirtualGPU::writeVQueueHeader(VirtualGPU& hostQ, const Memory* kernelTable)
virtualQueue_->writeRawData(hostQ, 0, sizeof(AmdVQueueHeader), vqHeader_, true);
}
void VirtualGPU::buildKernelInfo(const HSAILKernel& hsaKernel, hsa_kernel_dispatch_packet_t* aqlPkt,
HwDbgKernelInfo& kernelInfo, amd::Event* enqueueEvent) {
#if defined(WITH_COMPILER_LIB)
amd::HwDebugManager* dbgManager = dev().hwDebugMgr();
assert(dbgManager && "No HW Debug Manager!");
// Initialize structure with default values
if (hsaKernel.prog().maxScratchRegs() > 0) {
pal::Memory* scratchBuf = dev().scratch(hwRing())->memObj_;
kernelInfo.scratchBufAddr = scratchBuf->vmAddress();
kernelInfo.scratchBufferSizeInBytes = scratchBuf->size();
// Get the address of the scratch buffer and its size for CPU access
address scratchRingAddr = static_cast<address>(scratchBuf->map(nullptr, 0));
dbgManager->setScratchRing(scratchRingAddr, scratchBuf->size());
scratchBuf->unmap(nullptr);
} else {
kernelInfo.scratchBufAddr = 0;
kernelInfo.scratchBufferSizeInBytes = 0;
dbgManager->setScratchRing(nullptr, 0);
}
//! @todo: need to verify what is wanted for the global memory
Unimplemented();
kernelInfo.heapBufAddr = 0;
kernelInfo.pAqlDispatchPacket = aqlPkt;
kernelInfo.pAqlQueuePtr = reinterpret_cast<void*>(hsaQueueMem_->vmAddress());
// Get the address of the kernel code and its size for CPU access
/* pal::Memory* aqlCode = hsaKernel.gpuAqlCode();
if (nullptr != aqlCode) {
address aqlCodeAddr = static_cast<address>(aqlCode->map(nullptr, 0));
dbgManager->setKernelCodeInfo(aqlCodeAddr, hsaKernel.aqlCodeSize());
aqlCode->unmap(nullptr);
}
else {
dbgManager->setKernelCodeInfo(nullptr, 0);
}
*/
kernelInfo.trapPresent = false;
kernelInfo.trapHandler = nullptr;
kernelInfo.trapHandlerBuffer = nullptr;
kernelInfo.excpEn = 0;
kernelInfo.cacheDisableMask = 0;
kernelInfo.sqDebugMode = 0;
kernelInfo.mgmtSe0Mask = 0xFFFFFFFF;
kernelInfo.mgmtSe1Mask = 0xFFFFFFFF;
// set kernel info for HW debug and call the callback function
if (nullptr != dbgManager->preDispatchCallBackFunc()) {
DebugToolInfo dbgSetting = {0};
dbgSetting.scratchAddress_ = kernelInfo.scratchBufAddr;
dbgSetting.scratchSize_ = kernelInfo.scratchBufferSizeInBytes;
dbgSetting.globalAddress_ = kernelInfo.heapBufAddr;
dbgSetting.aclBinary_ = hsaKernel.prog().binaryElf();
dbgSetting.event_ = enqueueEvent;
// Execute the pre-dispatch call back function
dbgManager->executePreDispatchCallBack(reinterpret_cast<void*>(aqlPkt), &dbgSetting);
// assign the debug TMA and TBA for kernel dispatch
if (nullptr != dbgSetting.trapHandler_ && nullptr != dbgSetting.trapBuffer_) {
assignDebugTrapHandler(dbgSetting, kernelInfo);
}
kernelInfo.trapPresent = (kernelInfo.trapHandler) ? true : false;
// Exception policy
kernelInfo.excpEn = dbgSetting.exceptionMask_;
kernelInfo.cacheDisableMask = dbgSetting.cacheDisableMask_;
kernelInfo.sqDebugMode = dbgSetting.gpuSingleStepMode_;
// Compute the mask for reserved CUs. These two dwords correspond to
// two registers used for reserving CUs for display. In the current
// implementation, the number of CUs reserved can be 0 to 7, and it
// is set by debugger users.
if (dbgSetting.monitorMode_) {
uint32_t i = dbgSetting.reservedCuNum_ / 2;
kernelInfo.mgmtSe0Mask <<= i;
i = dbgSetting.reservedCuNum_ - i;
kernelInfo.mgmtSe1Mask <<= i;
}
Unimplemented();
/*
// flush/invalidate the instruction, data, L1 and L2 caches
InvalidateSqCaches();
*/
}
#endif
}
void VirtualGPU::assignDebugTrapHandler(const DebugToolInfo& dbgSetting,
HwDbgKernelInfo& kernelInfo) {
// setup the runtime trap handler code and trap buffer to be assigned before kernel dispatching
//
Memory* rtTrapHandlerMem = static_cast<Memory*>(dev().hwDebugMgr()->runtimeTBA());
Memory* rtTrapBufferMem = static_cast<Memory*>(dev().hwDebugMgr()->runtimeTMA());
kernelInfo.trapHandler = reinterpret_cast<void*>(rtTrapHandlerMem->vmAddress() + TbaStartOffset);
// With the TMA corruption hw bug workaround, the trap handler buffer can be set to zero.
// However, by setting the runtime trap buffer (TMA) correct, the runtime trap hander
// without the workaround can still function correctly.
kernelInfo.trapHandlerBuffer = reinterpret_cast<void*>(rtTrapBufferMem->vmAddress());
address rtTrapBufferAddress = static_cast<address>(rtTrapBufferMem->map(this));
Memory* trapHandlerMem = dev().getGpuMemory(dbgSetting.trapHandler_);
Memory* trapBufferMem = dev().getGpuMemory(dbgSetting.trapBuffer_);
// Address of the trap handler code/buffer should be 256-byte aligned
uint64_t tbaAddress = trapHandlerMem->vmAddress();
uint64_t tmaAddress = trapBufferMem->vmAddress();
if ((tbaAddress & 0xFF) != 0 || (tmaAddress & 0xFF) != 0) {
assert(false && "Trap handler/buffer is not 256-byte aligned");
}
// The addresses of the debug trap handler code (TBA) and buffer (TMA) are
// stored in the runtime trap handler buffer with offset location of 0x18-19
// and 0x20-21, respectively.
uint64_t* rtTmaPtr = reinterpret_cast<uint64_t*>(rtTrapBufferAddress + 0x18);
rtTmaPtr[0] = tbaAddress;
rtTmaPtr[1] = tmaAddress;
rtTrapBufferMem->unmap(nullptr);
// Add GPU mem handles to the memory list for VidMM
addVmMemory(trapHandlerMem);
addVmMemory(trapBufferMem);
addVmMemory(rtTrapHandlerMem);
addVmMemory(rtTrapBufferMem);
}
bool VirtualGPU::validateSdmaOverlap(const Resource& src, const Resource& dst) {
uint64_t srcVmEnd = src.vmAddress() + src.vmSize();
if (((src.vmAddress() >= sdmaRange_.start_) && (src.vmAddress() <= sdmaRange_.end_)) ||
-11
Melihat File
@@ -26,7 +26,6 @@
#include "device/pal/palprintf.hpp"
#include "device/pal/paltimestamp.hpp"
#include "device/pal/palsched.hpp"
#include "device/pal/paldebugger.hpp"
#include "device/pal/palgpuopen.hpp"
#include "platform/commandqueue.hpp"
#include "device/blit.hpp"
@@ -634,16 +633,6 @@ class VirtualGPU : public device::VirtualDevice {
const amd::BufferRect& dstRect //!< region of destination for copy
);
void buildKernelInfo(const HSAILKernel& hsaKernel, //!< hsa kernel
hsa_kernel_dispatch_packet_t* aqlPkt, //!< aql packet for dispatch
HwDbgKernelInfo& kernelInfo, //!< kernel info for the dispatch
amd::Event* enqueueEvent //!< Event provided in the enqueue kernel command
);
void assignDebugTrapHandler(const DebugToolInfo& dbgSetting, //!< debug settings
HwDbgKernelInfo& kernelInfo //!< kernel info for the dispatch
);
void PrintChildren(const HSAILKernel& hsaKernel, //!< The parent HSAIL kernel
VirtualGPU* gpuDefQueue //!< Device queue for children execution
);
-2
Melihat File
@@ -154,8 +154,6 @@ release(uint, OCL_SET_SVM_SIZE, 4*16384, \
"set SVM space size for discrete GPU") \
debug(uint, OCL_SYSMEM_REQUIREMENT, 2, \
"Use flag to change the minimum requirement of system memory not to downgrade") \
debug(bool, GPU_ENABLE_HW_DEBUG, false, \
"Enable HW DEBUG for GPU") \
release(uint, GPU_WAVES_PER_SIMD, 0, \
"Force the number of waves per SIMD (1-10)") \
release(bool, GPU_WAVE_LIMIT_ENABLE, false, \