From e5a36ab1ade71dbf19b065892b88311667b9c485 Mon Sep 17 00:00:00 2001 From: German Date: Thu, 17 Nov 2022 16:27:21 -0500 Subject: [PATCH] SWDEV-368308 - Remove HW debug extension Change-Id: If0c68023c09f0dac9111d52ecc0ad63719aa4e70 --- rocclr/cmake/ROCclr.cmake | 1 - rocclr/cmake/ROCclrPAL.cmake | 1 - rocclr/device/device.cpp | 2 - rocclr/device/device.hpp | 17 +- rocclr/device/hwdebug.cpp | 154 ----------- rocclr/device/hwdebug.hpp | 272 ------------------- rocclr/device/pal/paldebugger.hpp | 129 --------- rocclr/device/pal/paldebugmanager.cpp | 366 -------------------------- rocclr/device/pal/paldebugmanager.hpp | 123 --------- rocclr/device/pal/paldefs.hpp | 16 -- rocclr/device/pal/paldevice.cpp | 21 -- rocclr/device/pal/paldevice.hpp | 3 - rocclr/device/pal/palprogram.cpp | 2 +- rocclr/device/pal/palresource.cpp | 7 +- rocclr/device/pal/palsettings.cpp | 5 - rocclr/device/pal/paltrap.hpp | 159 ----------- rocclr/device/pal/palvirtual.cpp | 141 ---------- rocclr/device/pal/palvirtual.hpp | 11 - rocclr/utils/flags.hpp | 2 - 19 files changed, 4 insertions(+), 1428 deletions(-) delete mode 100644 rocclr/device/hwdebug.cpp delete mode 100644 rocclr/device/hwdebug.hpp delete mode 100644 rocclr/device/pal/paldebugger.hpp delete mode 100644 rocclr/device/pal/paldebugmanager.cpp delete mode 100644 rocclr/device/pal/paldebugmanager.hpp delete mode 100644 rocclr/device/pal/paltrap.hpp diff --git a/rocclr/cmake/ROCclr.cmake b/rocclr/cmake/ROCclr.cmake index f859030c77..4d8f6a2575 100644 --- a/rocclr/cmake/ROCclr.cmake +++ b/rocclr/cmake/ROCclr.cmake @@ -67,7 +67,6 @@ target_sources(rocclr PRIVATE ${ROCCLR_SRC_DIR}/device/devprogram.cpp ${ROCCLR_SRC_DIR}/device/devwavelimiter.cpp ${ROCCLR_SRC_DIR}/device/hsailctx.cpp - ${ROCCLR_SRC_DIR}/device/hwdebug.cpp ${ROCCLR_SRC_DIR}/elf/elf.cpp ${ROCCLR_SRC_DIR}/os/alloc.cpp ${ROCCLR_SRC_DIR}/os/os_posix.cpp diff --git a/rocclr/cmake/ROCclrPAL.cmake b/rocclr/cmake/ROCclrPAL.cmake index daf72da446..9feae7e54f 100644 --- a/rocclr/cmake/ROCclrPAL.cmake +++ b/rocclr/cmake/ROCclrPAL.cmake @@ -57,7 +57,6 @@ target_sources(rocclr PRIVATE ${ROCCLR_SRC_DIR}/device/pal/palblit.cpp ${ROCCLR_SRC_DIR}/device/pal/palconstbuf.cpp ${ROCCLR_SRC_DIR}/device/pal/palcounters.cpp - ${ROCCLR_SRC_DIR}/device/pal/paldebugmanager.cpp ${ROCCLR_SRC_DIR}/device/pal/paldevice.cpp ${ROCCLR_SRC_DIR}/device/pal/paldeviced3d10.cpp ${ROCCLR_SRC_DIR}/device/pal/paldeviced3d11.cpp diff --git a/rocclr/device/device.cpp b/rocclr/device/device.cpp index afe8d64db8..ba98b28497 100644 --- a/rocclr/device/device.cpp +++ b/rocclr/device/device.cpp @@ -502,7 +502,6 @@ Device::Device() online_(true), activeWait_(false), blitProgram_(nullptr), - hwDebugMgr_(nullptr), context_(nullptr), heap_buffer_(nullptr), arena_mem_obj_(nullptr), @@ -791,7 +790,6 @@ Settings::Settings() : value_(0) { customHostAllocator_ = false; waitCommand_ = AMD_OCL_WAIT_COMMAND; supportDepthsRGB_ = false; - enableHwDebug_ = false; commandQueues_ = 200; //!< Field value set to maximum number //!< concurrent Virtual GPUs for default diff --git a/rocclr/device/device.hpp b/rocclr/device/device.hpp index 87fd7af76a..ebddc7c5cd 100644 --- a/rocclr/device/device.hpp +++ b/rocclr/device/device.hpp @@ -37,7 +37,6 @@ #if defined(WITH_COMPILER_LIB) #include "hsailctx.hpp" #endif -#include "hwdebug.hpp" #include "devsignal.hpp" #if defined(__clang__) @@ -94,7 +93,6 @@ class TransferBufferFileCommand; class StreamOperationCommand; class VirtualMapCommand; class ExternalSemaphoreCmd; -class HwDebugManager; class Isa; class Device; struct KernelParameterDescriptor; @@ -630,7 +628,6 @@ class Settings : public amd::HeapObject { uint customHostAllocator_ : 1; //!< True if device has custom host allocator // that replaces generic OS allocation routines uint supportDepthsRGB_ : 1; //!< Support DEPTH and sRGB channel order format - uint enableHwDebug_ : 1; //!< Enable HW debug support uint reportFMAF_ : 1; //!< Report FP_FAST_FMAF define in CL program uint reportFMA_ : 1; //!< Report FP_FAST_FMA define in CL program uint singleFpDenorm_ : 1; //!< Support Single FP Denorm @@ -644,7 +641,7 @@ class Settings : public amd::HeapObject { uint enableCoopMultiDeviceGroups_ : 1; //!< Enable cooperative groups multi device uint fenceScopeAgent_ : 1; //!< Enable fence scope agent in AQL dispatch packet uint rocr_backend_ : 1; //!< Device uses ROCr backend for submissions - uint reserved_ : 10; + uint reserved_ : 11; }; uint value_; }; @@ -1844,17 +1841,6 @@ class Device : public RuntimeObject { //! Returns app profile static const AppProfile* appProfile() { return &appProfile_; } - //! Register a hardware debugger manager - HwDebugManager* hwDebugMgr() const { return hwDebugMgr_; } - - //! Initialize the Hardware Debug Manager - virtual int32_t hwDebugManagerInit(amd::Context* context, uintptr_t messageStorage) { - return CL_SUCCESS; - } - - //! Remove the Hardware Debug Manager - virtual void hwDebugManagerRemove() {} - //! Adds GPU memory to the VA cache list void addVACache(device::Memory* memory) const; @@ -1963,7 +1949,6 @@ class Device : public RuntimeObject { BlitProgram* blitProgram_; //!< Blit program info static AppProfile appProfile_; //!< application profile - HwDebugManager* hwDebugMgr_; //!< Hardware Debug manager amd::Context* context_; //!< Context static amd::Context* glb_ctx_; //!< Global context with all devices diff --git a/rocclr/device/hwdebug.cpp b/rocclr/device/hwdebug.cpp deleted file mode 100644 index 63e06789a4..0000000000 --- a/rocclr/device/hwdebug.cpp +++ /dev/null @@ -1,154 +0,0 @@ -/* Copyright (c) 2014 - 2021 Advanced Micro Devices, Inc. - - Permission is hereby granted, free of charge, to any person obtaining a copy - of this software and associated documentation files (the "Software"), to deal - in the Software without restriction, including without limitation the rights - to use, copy, modify, merge, publish, distribute, sublicense, and/or sell - copies of the Software, and to permit persons to whom the Software is - furnished to do so, subject to the following conditions: - - The above copyright notice and this permission notice shall be included in - all copies or substantial portions of the Software. - - THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN - THE SOFTWARE. */ - -#include "hwdebug.hpp" - -#include -#include -#include - -namespace amd { - -class Device; - -/* - *************************************************************************** - * Implementation of GPU Debug Manager class - *************************************************************************** - */ - -//! Constructor of the debug manager class -HwDebugManager::HwDebugManager(amd::Device* device) - : context_(NULL), - device_(device), - preDispatchCallBackFunc_(NULL), - postDispatchCallBackFunc_(NULL), - preDispatchCallBackArgs_(NULL), - postDispatchCallBackArgs_(NULL), - paramMemory_(NULL), - numParams_(0), - aclBinary_(NULL), - aqlCodeAddr_(NULL), - aqlCodeSize_(0), - scratchRingAddr_(NULL), - scratchRingSize_(0), - isRegistered_(false), - runtimeTBA_(NULL), - runtimeTMA_(NULL) { - memset(&debugInfo_, 0, sizeof(debugInfo_)); - - for (int i = 0; i < kDebugTrapLocationMax; i++) { - rtTrapInfo_[i] = NULL; - } -} - -HwDebugManager::~HwDebugManager() { - delete[] paramMemory_; - - delete runtimeTMA_; - delete runtimeTBA_; -} - -//! Setup the call back function pointer -void HwDebugManager::setCallBackFunctions(cl_PreDispatchCallBackFunctionAMD preDispatchFunction, - cl_PostDispatchCallBackFunctionAMD postDispatchFunction) { - preDispatchCallBackFunc_ = preDispatchFunction; - postDispatchCallBackFunc_ = postDispatchFunction; -} - -//! Setup the call back argument pointers -void HwDebugManager::setCallBackArguments(void* preDispatchArgs, void* postDispatchArgs) { - preDispatchCallBackArgs_ = preDispatchArgs; - postDispatchCallBackArgs_ = postDispatchArgs; -} - -//! Get dispatch debug info -void HwDebugManager::getDispatchDebugInfo(void* debugInfo) const { - memcpy(debugInfo, (void*)&debugInfo_, sizeof(DispatchDebugInfo)); -} - -//! Set the kernel code address and its size -void HwDebugManager::setKernelCodeInfo(address aqlCodeAddr, uint32_t aqlCodeSize) { - aqlCodeAddr_ = aqlCodeAddr; - aqlCodeSize_ = aqlCodeSize; -} - -//! Get the scratch ring -void HwDebugManager::setScratchRing(address scratchRingAddr, uint32_t scratchRingSize) { - scratchRingAddr_ = scratchRingAddr; - scratchRingSize_ = scratchRingSize; -} - -//! Map the scratch ring for host access -void HwDebugManager::mapScratchRing(uint64_t* scratchRingAddr, uint32_t* scratchRingSize) const { - *scratchRingAddr = reinterpret_cast(scratchRingAddr_); - *scratchRingSize = scratchRingSize_; -} - -void HwDebugManager::setExceptionPolicy(void* exceptionPolicy) { - memcpy(&excpPolicy_, exceptionPolicy, sizeof(cl_dbg_exception_policy_amd)); -} - -void HwDebugManager::getExceptionPolicy(void* exceptionPolicy) const { - memcpy(exceptionPolicy, &excpPolicy_, sizeof(cl_dbg_exception_policy_amd)); -} - -void HwDebugManager::setKernelExecutionMode(void* mode) { - cl_dbg_kernel_exec_mode_amd* execMode = reinterpret_cast(mode); - execMode_.ui32All = execMode->ui32All; -} - - -void HwDebugManager::getKernelExecutionMode(void* mode) const { - cl_dbg_kernel_exec_mode_amd* execMode = reinterpret_cast(mode); - execMode->ui32All = execMode_.ui32All; -} - -void HwDebugManager::setAclBinary(void* aclBinary) { aclBinary_ = aclBinary; } - -void HwDebugManager::allocParamMemList(uint32_t numParams) { - if (NULL != paramMemory_) { - delete[] paramMemory_; - } - - numParams_ = numParams; - paramMemory_ = new amd::Memory*[numParams]; -} - -cl_mem HwDebugManager::getKernelParamMem(uint32_t paramIdx) const { - assert((paramIdx < numParams_) && "Invalid kernel parameter index too big"); - - return as_cl(paramMemory_[paramIdx]); -} - -void HwDebugManager::assignKernelParamMem(uint32_t paramIdx, amd::Memory* mem) { - assert((paramIdx < numParams_) && "Invalid kernel parameter index too big"); - - paramMemory_[paramIdx] = mem; -} - -void HwDebugManager::installTrap(cl_dbg_trap_type_amd trapType, amd::Memory* pTrapHandler, - amd::Memory* pTrapBuffer) { - rtTrapInfo_[trapType << 2] = pTrapHandler; - rtTrapInfo_[(trapType << 2) + 1] = pTrapBuffer; -} - - -} // namespace amd diff --git a/rocclr/device/hwdebug.hpp b/rocclr/device/hwdebug.hpp deleted file mode 100644 index 4be0855baf..0000000000 --- a/rocclr/device/hwdebug.hpp +++ /dev/null @@ -1,272 +0,0 @@ -/* Copyright (c) 2014 - 2021 Advanced Micro Devices, Inc. - - Permission is hereby granted, free of charge, to any person obtaining a copy - of this software and associated documentation files (the "Software"), to deal - in the Software without restriction, including without limitation the rights - to use, copy, modify, merge, publish, distribute, sublicense, and/or sell - copies of the Software, and to permit persons to whom the Software is - furnished to do so, subject to the following conditions: - - The above copyright notice and this permission notice shall be included in - all copies or substantial portions of the Software. - - THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN - THE SOFTWARE. */ - -#ifndef HWDEBUG_H_ -#define HWDEBUG_H_ - -#include "device.hpp" -#include "amdocl/cl_debugger_amd.h" - -static constexpr int TbaStartOffset = 256; - -static constexpr int RtTrapBufferWaveSize = 64; -static constexpr int RtTrapBufferSeNum = 4; -static constexpr int RtTrapBufferShNum = 2; -static constexpr int RtTrapBufferCuNum = 16; -static constexpr int RtTrapBufferSimdNum = 4; -static constexpr int RtTrapBufferWaveNum = 16; -static constexpr int RtTrapBufferTotalWaveNum = - ((RtTrapBufferSeNum) * (RtTrapBufferShNum) * (RtTrapBufferCuNum) * (RtTrapBufferSimdNum) * - (RtTrapBufferWaveNum)); - - -/*! \brief Debug trap handler location in the runtime trap buffer - * - * This enumeration is used to indicate the location where the debug - * trap handler and debug trap buffer are set in the device trap buffer. - */ -enum DebugTrapLocation { - kDebugTrapHandlerLocation = 0, //! Debug Trap handler location, this location must be 0 - kDebugTrapBufferLocation = 1, //! Debug Trap buffer location, this location must be 1 - kDebugTrapLocationMax = 2 -}; - - -/*! \brief This structure is for the debug info in each kernel dispatch. - * - * Contains the memory descriptor information of the scratch memory and the global - * memory - */ -struct DispatchDebugInfo { - uint32_t scratchMemoryDescriptor_[4]; //! Scratch memory descriptor - uint32_t globalMemoryDescriptor_[4]; //! Global memory descriptor -}; - -/*! \brief Trap handler descriptor - * - * The trap handler descriptor contains the details of a given trap handler. - */ -struct TrapHandlerInfo { - amd::Memory* trapHandler_; //!< Device memory for the trap handler - amd::Memory* trapBuffer_; //!< Device memory for the trap buffer -}; - -/*! \brief Structure of the runtime trap handler buffer, which includes the following - * information: information of the runtime trap handler and buffer, information of - * the level-2 trap handlers and buffers. - */ -struct RuntimeTrapInfo { - TrapHandlerInfo trap_; //!< Structure of the address of all trap handlers - uint32_t dispatchId_; //!< Dispatch ID that signals the shader event - uint32_t vgpr_backup_[RtTrapBufferTotalWaveNum][RtTrapBufferWaveSize]; - //!< Buffer to backup the VGPR used by the runtime trap handler -}; - -/** - * Opaque pointer to trap event - */ -typedef uintptr_t DebugEvent; - -namespace amd { - - -class Context; -class Device; -class HostQueue; - - -/*! \class HwDebugManager - * - * \brief The device interface class for the hardware debug manager - */ -class HwDebugManager { - public: - //! Constructor for the Hardware Debug Manager - HwDebugManager(amd::Device* device); - - //! Destructor for Hardware Debug Manager - virtual ~HwDebugManager(); - - //! Setup the call back function pointer - void setCallBackFunctions(cl_PreDispatchCallBackFunctionAMD preDispatchFuncion, - cl_PostDispatchCallBackFunctionAMD postDispatchFunction); - - //! Setup the call back argument pointers - void setCallBackArguments(void* preDispatchArgs, void* postDispatchArgs); - - //! Get dispatch debug info - void getDispatchDebugInfo(void* debugInfo) const; - - //! Set the kernel code address and its size - void setKernelCodeInfo(address aqlCodeAddr, uint32_t aqlCodeSize); - - //! Get the scratch ring - void setScratchRing(address scratchRingAddr, uint32_t scratchRingSize); - - //! Map the scratch ring for host access - void mapScratchRing(uint64_t* scratchRingAddr, uint32_t* scratchRingSize) const; - - //! Retrieve the pre-dispatch callback function - cl_PreDispatchCallBackFunctionAMD preDispatchCallBackFunc() const { - return preDispatchCallBackFunc_; - } - - //! Retrieve the post-dispatch callback function - cl_PostDispatchCallBackFunctionAMD postDispatchCallBackFunc() const { - return postDispatchCallBackFunc_; - } - - //! Retrieve the pre-dispatch callback function arguments - void* preDispatchCallBackArgs() const { return preDispatchCallBackArgs_; } - - //! Retrieve the post-dispatch callback function arguments - void* postDispatchCallBackArgs() const { return postDispatchCallBackArgs_; } - - //! Retrieve the memory pointer of the runtime trap handler code - device::Memory* runtimeTBA() const { return runtimeTBA_; } - - //! Retrieve the memory pointer of the runtime trap handler buffer - device::Memory* runtimeTMA() const { return runtimeTMA_; } - - //! Set exception policy - void setExceptionPolicy(void* exceptionPolicy); - - //! Get exception policy - void getExceptionPolicy(void* exceptionPolicy) const; - - //! Set the kernel execution mode - void setKernelExecutionMode(void* mode); - - //! Get the kernel execution mode - void getKernelExecutionMode(void* mode) const; - - //! Setup the pointer to the aclBinary within the debug manager - void setAclBinary(void* aclBinary); - - //! Allocate storage to keep the memory pointers of the kernel parameters - void allocParamMemList(uint32_t numParams); - - //! Assign the kernel parameter memory - void assignKernelParamMem(uint32_t paramIdx, amd::Memory* mem); - - //! Get kernel parameter memory object - cl_mem getKernelParamMem(uint32_t paramIdx) const; - - //! Install trap handler - void installTrap(cl_dbg_trap_type_amd trapType, amd::Memory* pTrapHandler, - amd::Memory* pTrapBuffer); - - //! Flush cache - virtual void flushCache(uint32_t mask) = 0; - - //! Create the debug event - virtual DebugEvent createDebugEvent(const bool autoReset) = 0; - - //! Wait for the debug event - virtual int32_t waitDebugEvent(DebugEvent pEvent, uint32_t timeOut) const = 0; - - //! Destroy the debug event - virtual void destroyDebugEvent(DebugEvent* pEvent) = 0; - - //! Register the debugger - virtual int32_t registerDebugger(amd::Context* context, uintptr_t pMessageStorage) = 0; - - //! Unregister the debugger - virtual void unregisterDebugger() = 0; - - //! Send the wavefront control cmmand - virtual void wavefrontControl(uint32_t waveAction, uint32_t waveMode, uint32_t trapId, - void* waveAddr) const = 0; - - //! Set address watching point - virtual void setAddressWatch(uint32_t numWatchPoints, void** watchAddress, uint64_t* watchMask, - uint64_t* watchMode, DebugEvent* event) = 0; - - //! Map the shader (AQL code) for host access - virtual void mapKernelCode(void* aqlCodeInfo) const = 0; - - //! Get the packet information for dispatch - virtual void getPacketAmdInfo(const void* aqlCodeInfo, void* packetInfo) const = 0; - - //! Set global memory values - virtual void setGlobalMemory(amd::Memory* memObj, uint32_t offset, void* srcPtr, - uint32_t size) = 0; - - //! Execute the post-dispatch callback function - virtual void executePostDispatchCallBack() = 0; - - //! Execute the pre-dispatch callback function - virtual void executePreDispatchCallBack(void* aqlPacket, void* toolInfo) = 0; - - protected: - //! Return the context - const amd::Context* context() const { return context_; } - - //! Get the debug device - const amd::Device* device() const { return device_; } - - //! Return the register flag - bool isRegistered() const { return isRegistered_; } - - protected: - const amd::Context* context_; ///< context that used to create host queue for the debugger - amd::Device* device_; ///< Device to run the debugger - - cl_PreDispatchCallBackFunctionAMD preDispatchCallBackFunc_; //!< pre-dispatch callback function - cl_PostDispatchCallBackFunctionAMD - postDispatchCallBackFunc_; //!< post-dispatch callback function - void* preDispatchCallBackArgs_; //!< pre-dispatch callback function arguments - void* postDispatchCallBackArgs_; //!< post-dispatch callback function arguments - - DispatchDebugInfo debugInfo_; //!< Debug setting/information for kernel dispatch - amd::Memory* rtTrapInfo_[kDebugTrapLocationMax]; //!< Device trap buffer, to store various trap - //!handlers on the device - - amd::Memory** paramMemory_; //!< list of memory pointers for kernel parameters - uint32_t numParams_; //!< number of kernel parameters - - void* aclBinary_; //!< ACL binary - - address aqlCodeAddr_; //!< The mapped AQL code to allow host access - uint32_t aqlCodeSize_; //!< The size of the AQL code info - - address scratchRingAddr_; //!< The mapped address of the scratch buffer - uint32_t scratchRingSize_; //!< The size of the scratch ring - - bool isRegistered_; //! flag to indicate the debugger has been registered - - cl_dbg_exception_policy_amd excpPolicy_; //!< exception policy - cl_dbg_kernel_exec_mode_amd execMode_; //!< kernel execution mode - RuntimeTrapInfo rtTrapHandlerInfo_; //!< Runtime trap information - - //! Runtime Trap handler pointer (TBA) & its buffer (TMA) - device::Memory* runtimeTBA_; //! runtime trap handler pointer - device::Memory* runtimeTMA_; //! runtime trap handler buffer -}; - - -/**@}*/ - -/** - * @} - */ -} // namespace amd - -#endif // HWDEBUG_H_ diff --git a/rocclr/device/pal/paldebugger.hpp b/rocclr/device/pal/paldebugger.hpp deleted file mode 100644 index 33cf9dee74..0000000000 --- a/rocclr/device/pal/paldebugger.hpp +++ /dev/null @@ -1,129 +0,0 @@ -/* Copyright (c) 2015 - 2021 Advanced Micro Devices, Inc. - - Permission is hereby granted, free of charge, to any person obtaining a copy - of this software and associated documentation files (the "Software"), to deal - in the Software without restriction, including without limitation the rights - to use, copy, modify, merge, publish, distribute, sublicense, and/or sell - copies of the Software, and to permit persons to whom the Software is - furnished to do so, subject to the following conditions: - - The above copyright notice and this permission notice shall be included in - all copies or substantial portions of the Software. - - THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN - THE SOFTWARE. */ - -#pragma once - -#include -#include -#include "hsa.h" -#include "amd_hsa_kernel_code.h" -#include "device/device.hpp" -#include "device/hwdebug.hpp" -#include "acl.h" - -static constexpr int NumberReserveVgprs = 4; - -namespace pal { - -/** - * \defgroup Services_API OCL Runtime Services API - * @{ - */ - -/*! \brief Dispatch packet information - * - * This structure contains the packet information for kernel dispatch - */ -struct PacketAmdInfo { - uint32_t trapReservedVgprIndex_; //!< reserved VGPR index, -1 when they are not valid - uint32_t scratchBufferWaveOffset_; //!< scratch buffer wave offset, -1 when no scratch buffer - void* pointerToIsaBuffer_; //!< pointer to the buffer containing ISA - size_t sizeOfIsaBuffer_; //!< size of the ISA buffer - uint32_t numberOfVgprs_; //!< number of VGPRs used by the kernel - uint32_t numberOfSgprs_; //!< number of SGPRs used by the kernel - size_t sizeOfStaticGroupMemory_; //!< Static local memory used by the kernel -}; - -/*! \brief Cache mask for invalidation - */ -struct HwDbgGpuCacheMask { - HwDbgGpuCacheMask() : ui32All_(0) {} - - HwDbgGpuCacheMask(uint32_t mask) : ui32All_(mask) {} - - union { - struct { - uint32_t sqICache_ : 1; //!< Instruction cache - uint32_t sqKCache_ : 1; //!< Data cache - uint32_t tcL1_ : 1; //!< tcL1 cache - uint32_t tcL2_ : 1; //!< tcL2 cache - uint32_t reserved_ : 28; - }; - uint32_t ui32All_; - }; -}; - -/*! \brief Address watch information - * - * Information about each watch point - address, mask, mode and event - */ -struct HwDbgAddressWatch { - void* watchAddress_; //! The address of watch point - uint64_t watchMask_; //! The mask for watch point (lower 24 bits) - cl_dbg_address_watch_mode_amd watchMode_; //! The watch mode for this watch - DebugEvent event_; //! Event of the watch point (not used for now) -}; - -/*! \brief Runtime structure used to communicate debug information - * between Ocl services and core for a kernel dispatch. - */ -struct DebugToolInfo { - uint64_t scratchAddress_; //! Scratch memory address - size_t scratchSize_; //! Scratch memory size - uint64_t globalAddress_; //! Global memory address - uint32_t cacheDisableMask_; //! Cache mask, indicating caches disabled - uint32_t exceptionMask_; //! Exception mask - uint32_t reservedCuNum_; //! Number of reserved CUs for display, - //! which ranges from 0 to 7 in the current implementation. - bool monitorMode_; //! Debug or profiler mode - bool gpuSingleStepMode_; //! SQ debug mode - amd::Memory* trapHandler_; //! Trap handler address - amd::Memory* trapBuffer_; //! Trap buffer address - bool sqPerfcounterEnable_; //! whether SQ perf counters are enabled - aclBinary* aclBinary_; //! pointer of the kernel ACL binary - amd::Event* event_; //! pointer of the kernel event in the enqueue command -}; - -/*! \brief Message used by the KFD wave control for CI - * - * Structure indicates the various information used by the wave control function. - */ -struct HwDebugWaveAddr { - uint32_t VMID_ : 4; //! Virtual memory id - uint32_t wave_ : 4; //! Wave id - uint32_t SIMD_ : 2; //! SIMD id - uint32_t CU_ : 4; //! Compute unit - uint32_t SH_ : 1; //! Shader array - uint32_t SE_ : 1; //! Shader engine -}; - -/*! \brief Kernel code information - * - * This structure contains the pointer of mapped kernel code for host access - * and its size (in bytes) - */ -struct AqlCodeInfo { - amd_kernel_code_t* aqlCode_; //! pointer of AQL code to allow host access - uint32_t aqlCodeSize_; //! size of AQL code -}; - -/**@}*/ - -} // namespace pal diff --git a/rocclr/device/pal/paldebugmanager.cpp b/rocclr/device/pal/paldebugmanager.cpp deleted file mode 100644 index 7c00abac9e..0000000000 --- a/rocclr/device/pal/paldebugmanager.cpp +++ /dev/null @@ -1,366 +0,0 @@ -/* Copyright (c) 2015 - 2021 Advanced Micro Devices, Inc. - - Permission is hereby granted, free of charge, to any person obtaining a copy - of this software and associated documentation files (the "Software"), to deal - in the Software without restriction, including without limitation the rights - to use, copy, modify, merge, publish, distribute, sublicense, and/or sell - copies of the Software, and to permit persons to whom the Software is - furnished to do so, subject to the following conditions: - - The above copyright notice and this permission notice shall be included in - all copies or substantial portions of the Software. - - THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN - THE SOFTWARE. */ - -#include "platform/commandqueue.hpp" -#include "device/device.hpp" -#include "device/pal/paldevice.hpp" -#include "device/pal/palmemory.hpp" -#include "device/pal/paltrap.hpp" -#include "device/pal/paldebugmanager.hpp" -#include -#include -#include - -namespace pal { - -class VirtualGPU; -class Device; -class Memory; - -/* - *************************************************************************** - * Implementation of GPU Debug Manager class - *************************************************************************** - */ - -GpuDebugManager::GpuDebugManager(amd::Device* device) - : HwDebugManager(device), - vGpu_(nullptr), - debugMessages_(0), - addressWatch_(nullptr), - addressWatchSize_(0), - oclEventHandle_(nullptr) { - // Initialize the exception info and the kernel execution mode - excpPolicy_.exceptionMask = 0x0; - excpPolicy_.waveAction = CL_DBG_WAVES_RESUME; - excpPolicy_.hostAction = CL_DBG_HOST_IGNORE; - excpPolicy_.waveMode = CL_DBG_WAVEMODE_BROADCAST; - - execMode_.ui32All = 0; - - rtTrapHandlerInfo_.trap_.trapHandler_ = nullptr; - rtTrapHandlerInfo_.trap_.trapBuffer_ = nullptr; - - aqlPacket_ = (hsa_kernel_dispatch_packet_t*)nullptr; - - return; -} - -GpuDebugManager::~GpuDebugManager() { - delete[] addressWatch_; -} - -void GpuDebugManager::executePreDispatchCallBack(void* aqlPacket, void* toolInfo) { - DebugToolInfo* info = reinterpret_cast(toolInfo); - - aqlPacket_ = reinterpret_cast(aqlPacket); - Unimplemented(); - // Only if the pre-dispatch callback is set, will we update cache - // flush configuration and build the memory descriptor. - if (nullptr != preDispatchCallBackFunc_) { - /* - // Build the scratch memory descriptor - device()->gslCtx()->BuildScratchBufferResource(debugInfo_.scratchMemoryDescriptor_, - info->scratchAddress_, - info->scratchSize_); - - // Build the global memory descriptor - device()->gslCtx()->BuildHeapBufferResource(debugInfo_.globalMemoryDescriptor_, - info->globalAddress_); - */ - // // for invalidate cache (BuildEndOfKernelNotifyCommands) - // aqlPacket->release_fence_scope = 2; - - aclBinary_ = reinterpret_cast(info->aclBinary_); - oclEventHandle_ = reinterpret_cast(as_cl(info->event_)); - - cl_device_id clDeviceId = as_cl(device_); - preDispatchCallBackFunc_(clDeviceId, oclEventHandle_, aqlPacket_, aclBinary_, - preDispatchCallBackArgs_); - } - - // setup the trap handler information only if the debugger has been registered - if (isRegistered()) { - // Copy the various info set by the debugger/profiler to the tool info structure - setupTrapInformation(info); - } -} - -void GpuDebugManager::executePostDispatchCallBack() { - if (nullptr != postDispatchCallBackFunc_) { - cl_device_id clDeviceId = as_cl(device_); - postDispatchCallBackFunc_(clDeviceId, aqlPacket_->completion_signal.handle, - postDispatchCallBackArgs_); - } -} - -//! Map the kernel code for host access -void GpuDebugManager::mapKernelCode(void* aqlCodeInfo) const { - AqlCodeInfo* codeInfo = reinterpret_cast(aqlCodeInfo); - - codeInfo->aqlCode_ = reinterpret_cast(aqlCodeAddr_); - codeInfo->aqlCodeSize_ = aqlCodeSize_; -} - -int32_t GpuDebugManager::registerDebugger(amd::Context* context, uintptr_t messageStorage) { - if (!device()->settings().enableHwDebug_) { - LogError("debugmanager: Register debugger error - HW DEBUG is not enable"); - return CL_DEBUGGER_REGISTER_FAILURE_AMD; - } - - // first time register - set the message storage, flush queue and enable hw debug - if (!isRegistered()) { - debugMessages_ = messageStorage; - Unimplemented(); - /* - if (!device()->gslCtx()->registerHwDebugger(debugMessages_)) { - LogError("debugmanager: Register debugger failed"); - return CL_OUT_OF_RESOURCES; - } - */ - isRegistered_ = true; - - if (CL_SUCCESS != createRuntimeTrapHandler()) { - LogError("debugmanager: Create runtime trap handler failed"); - return CL_OUT_OF_RESOURCES; - } - } - - context_ = context; - - return CL_SUCCESS; -} - -void GpuDebugManager::unregisterDebugger() { - if (isRegistered()) { - // reset the debugger registration flag - isRegistered_ = false; - context_ = nullptr; - } -} - -void GpuDebugManager::flushCache(uint32_t mask) { - HwDbgGpuCacheMask cacheMask(mask); - // device()->xferQueue()->flushCuCaches(cacheMask); -} - - -void GpuDebugManager::setupTrapInformation(DebugToolInfo* toolInfo) { - toolInfo->scratchAddress_ = 0; - toolInfo->scratchSize_ = 0; - toolInfo->globalAddress_ = 0; - toolInfo->sqPerfcounterEnable_ = false; - - // Set up trap related info in the kernel info structure to be - // used in the kernel dispatch. - toolInfo->exceptionMask_ = excpPolicy_.exceptionMask; - toolInfo->gpuSingleStepMode_ = execMode_.gpuSingleStepMode; - toolInfo->monitorMode_ = execMode_.monitorMode; - - // The order of these three bits is determined by the definition - // of the register COMPUTE_DISPATCH_INITIATOR - toolInfo->cacheDisableMask_ = ((execMode_.disableL1Scalar << 2) | - (execMode_.disableL2Cache << 1) | (execMode_.disableL1Vector)); - - toolInfo->reservedCuNum_ = execMode_.reservedCuNum; - - toolInfo->trapHandler_ = rtTrapInfo_[kDebugTrapHandlerLocation]; - toolInfo->trapBuffer_ = rtTrapInfo_[kDebugTrapBufferLocation]; -} - -void GpuDebugManager::getPacketAmdInfo(const void* aqlCodeInfo, void* packetInfo) const - -{ - const AqlCodeInfo* codeInfo = reinterpret_cast(aqlCodeInfo); - - const amd_kernel_code_t* hostAqlCode = codeInfo->aqlCode_; - - PacketAmdInfo* packet = reinterpret_cast(packetInfo); - - const amd_kernel_code_t* akc = hostAqlCode; - - packet->numberOfSgprs_ = akc->wavefront_sgpr_count; - packet->numberOfVgprs_ = akc->workitem_vgpr_count; - - // use mapped kernel_object_address for host accessing of ISA buffer - packet->pointerToIsaBuffer_ = (char*)(hostAqlCode) + akc->kernel_code_entry_byte_offset; - - packet->scratchBufferWaveOffset_ = akc->debug_wavefront_private_segment_offset_sgpr; - - packet->sizeOfIsaBuffer_ = codeInfo->aqlCodeSize_; - - packet->sizeOfStaticGroupMemory_ = akc->workgroup_group_segment_byte_size; - - // The trap_reserved_vgpr_index will be 4 less the original - // This value must be used only by the debugger - packet->trapReservedVgprIndex_ = akc->workitem_vgpr_count - NumberReserveVgprs; -} - -DebugEvent GpuDebugManager::createDebugEvent(const bool autoReset) { - Unimplemented(); - /* - // create the event object - osEventHandle shaderEvent = osEventCreate(!autoReset); - - // event object has been created, set the initial state - if (shaderEvent != 0) { - - osEventReset(shaderEvent); // initial state is non-signaled - - if (device()->gslCtx()->exceptionNotification(shaderEvent)) { - return shaderEvent; - } - } - */ - return 0; -} - -int32_t GpuDebugManager::waitDebugEvent(DebugEvent pEvent, uint32_t timeOut) const { - Unimplemented(); - /* - if (osEventTimedWait(pEvent, timeOut)) { - return CL_SUCCESS; - } - else { - return CL_EVENT_TIMEOUT_AMD; - } - */ - return CL_SUCCESS; -} - -void GpuDebugManager::destroyDebugEvent(DebugEvent* pEvent) { - Unimplemented(); - /* - osEventDestroy(*pEvent); - *pEvent = 0; - - device()->gslCtx()->exceptionNotification(0); - */ -} - -void GpuDebugManager::wavefrontControl(uint32_t waveAction, uint32_t waveMode, uint32_t trapId, - void* waveAddr) const { - Unimplemented(); - // device()->gslCtx()->executeSqCommand(waveAction, waveMode, trapId, waveAddr); -} - -void GpuDebugManager::setAddressWatch(uint32_t numWatchPoints, void** watchAddress, - uint64_t* watchMask, uint64_t* watchMode, DebugEvent* event) { - size_t requiredSize = numWatchPoints * sizeof(HwDbgAddressWatch); - - // previously allocated size is not big enough, allocate new memory - if (addressWatchSize_ < requiredSize) { - delete[] addressWatch_; - addressWatch_ = new HwDbgAddressWatch[numWatchPoints]; - addressWatchSize_ = requiredSize; - } - - // fill in the address watch structure - memset(addressWatch_, 0, addressWatchSize_); - - for (uint32_t i = 0; i < numWatchPoints; i++) { - amd::Memory* watchMem = as_amd(reinterpret_cast(watchAddress[i])); - Memory* watchMemAddress = device()->getGpuMemory(watchMem); - - addressWatch_[i].watchAddress_ = reinterpret_cast(watchMemAddress->vmAddress()); - addressWatch_[i].watchMask_ = watchMask[i]; - addressWatch_[i].watchMode_ = (cl_dbg_address_watch_mode_amd)watchMode[i]; - addressWatch_[i].event_ = (0 != event) ? event[i] : 0; - } - - Unimplemented(); - // setup the watch addresses - // device()->gslCtx()->setAddressWatch(numWatchPoints, (void*) addressWatch_); -} - -void GpuDebugManager::setGlobalMemory(amd::Memory* memObj, uint32_t offset, void* srcPtr, - uint32_t size) { - Memory* globalMem = device()->getGpuMemory(memObj); - - address mappedMem = static_cast
(globalMem->map(nullptr, 0)); - assert(mappedMem != 0); - - void* dest_ptr = reinterpret_cast(mappedMem + offset); - memcpy(dest_ptr, srcPtr, size); - - globalMem->unmap(nullptr); -} - -int32_t GpuDebugManager::createRuntimeTrapHandler() { - size_t codeSize = 0; - const uint32_t* rtTrapCode = nullptr; - - if (device()->settings().viPlus_) { - codeSize = sizeof(RuntimeTrapCodeVi); - rtTrapCode = RuntimeTrapCodeVi; - } else { - codeSize = sizeof(RuntimeTrapCode); - rtTrapCode = RuntimeTrapCode; - } - - uint32_t numCodes = codeSize / sizeof(uint32_t); - - // Handle TMA corruption hw bug workaround - - // The trap handler buffer has extra 256 bytes allocated, the TMA address - // is stored in the first two DWORDs and the actual trap handler code - // is stored starting at the location of 256 bytes (TbaStartOffset). - // - // allocate memory for the runtime trap handler (TBA) + TMA address - uint32_t allocSize = codeSize + TbaStartOffset; - - Memory* rtTBA = new Memory(*device(), allocSize); - runtimeTBA_ = rtTBA; - - if ((rtTBA == nullptr) || !rtTBA->create(Resource::RemoteUSWC)) { - return CL_OUT_OF_RESOURCES; - } - address tbaAddress = reinterpret_cast
(rtTBA->map(nullptr)); - - // allocate buffer for the runtime trap handler buffer (TMA) - uint32_t tmaSize = 0x100; - Memory* rtTMA = new Memory(*device(), tmaSize); - runtimeTMA_ = rtTMA; - - if ((rtTMA == nullptr) || !rtTMA->create(Resource::RemoteUSWC)) { - return CL_OUT_OF_RESOURCES; - } - - uint64_t rtTmaAddress = rtTMA->vmAddress(); - if ((rtTBA->vmAddress() & 0xFF) != 0 || (rtTmaAddress & 0xFF) != 0) { - LogError("debugmanager: Trap handler/buffer is not 256-byte aligned"); - return CL_INVALID_VALUE; - } - - // store the TMA address at the beginning of trap handler buffer - uint64_t* tbaStorage = reinterpret_cast(tbaAddress); - tbaStorage[0] = rtTmaAddress; - - // save the trap handler code - uint32_t* trapHandlerPtr = (uint32_t*)(tbaAddress + TbaStartOffset); - for (uint32_t i = 0; i < numCodes; i++) { - trapHandlerPtr[i] = rtTrapCode[i]; - } - - rtTBA->unmap(nullptr); - - return CL_SUCCESS; -} - -} // namespace pal diff --git a/rocclr/device/pal/paldebugmanager.hpp b/rocclr/device/pal/paldebugmanager.hpp deleted file mode 100644 index 1c85a045bc..0000000000 --- a/rocclr/device/pal/paldebugmanager.hpp +++ /dev/null @@ -1,123 +0,0 @@ -/* Copyright (c) 2015 - 2021 Advanced Micro Devices, Inc. - - Permission is hereby granted, free of charge, to any person obtaining a copy - of this software and associated documentation files (the "Software"), to deal - in the Software without restriction, including without limitation the rights - to use, copy, modify, merge, publish, distribute, sublicense, and/or sell - copies of the Software, and to permit persons to whom the Software is - furnished to do so, subject to the following conditions: - - The above copyright notice and this permission notice shall be included in - all copies or substantial portions of the Software. - - THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN - THE SOFTWARE. */ - -#pragma once - -#include "device/pal/palvirtual.hpp" -#include "device/pal/paldebugger.hpp" - -namespace pal { - -class GpuDebugManager; -class Device; -class Memory; - - -/*! \brief Debug Manager Class - * - * The debug manager class is used to pass all the trap info to the - * kernel dispatch and then the kernel execution can use such trap information - * for kernel execution. This class contains the trap handler and shader event - * objects. The trap handler is setup by users and passed to the kernel dispatch. - * The shader event is to receive interrupts from the GPU and then users can - * perform various operations. - * - * This class also provides the interface for setting up the pre-dispatch - * callback functions used by the profiler and debugger. It also provides - * a way to retrieve various debug information for the kernel execution. - * - */ -class GpuDebugManager : public amd::HwDebugManager { - public: - //! Constructor of the debug manager class - GpuDebugManager(amd::Device* device); - - //! Destructor of the debug manager class - ~GpuDebugManager(); - - //! Get the single instance of the GpuDebugManager class - static GpuDebugManager* getDefaultInstance(); - - //! Destroy the GpuDebugManager class object - static void destroyInstances(); - - //! Flush cache - void flushCache(uint32_t mask); - - //! Create the debug event - DebugEvent createDebugEvent(const bool autoReset); - - //! Wait for the debug event - int32_t waitDebugEvent(DebugEvent pEvent, uint32_t timeOut) const; - - //! Destroy the debug event - void destroyDebugEvent(DebugEvent* pEvent); - - //! Register the debugger - int32_t registerDebugger(amd::Context* context, uintptr_t messageStorage); - - //! Unregister the debugger - void unregisterDebugger(); - - //! Send the wavefront control cmmand - void wavefrontControl(uint32_t waveAction, uint32_t waveMode, uint32_t trapId, - void* waveAddr) const; - - //! Set address watching point - void setAddressWatch(uint32_t numWatchPoints, void** watchAddress, uint64_t* watchMask, - uint64_t* watchMode, DebugEvent* pEvent); - - //! Map the kernel code for host access - void mapKernelCode(void* aqlCodeInfo) const; - - //! Get the packet information for dispatch - void getPacketAmdInfo(const void* aqlCodeInfo, void* packetInfo) const; - - //! Set global memory values - void setGlobalMemory(amd::Memory* memObj, uint32_t offset, void* srcPtr, uint32_t size); - - //! Execute the post-dispatch callback function - void executePostDispatchCallBack(); - - //! Execute the pre-dispatch callback function - void executePreDispatchCallBack(void* aqlPacket, void* toolInfo); - - protected: - const VirtualGPU* vGpu() const { return vGpu_; } - - private: - //! Setup trap handler info for kernel execution - void setupTrapInformation(DebugToolInfo* toolInfo); - - //! Create runtime trap handler - int32_t createRuntimeTrapHandler(); - - const pal::Device* device() const { return reinterpret_cast(device_); } - - VirtualGPU* vGpu_; //!< the virtual GPU - uintptr_t debugMessages_; //!< Pointer to a SHARED_DEBUG_MESSAGES pass to the KMD - HwDbgAddressWatch* addressWatch_; //!< Address watch data - size_t addressWatchSize_; //!< Size of address watch data - //! Arguments used by the callback function - void* oclEventHandle_; //!< event handler - const hsa_kernel_dispatch_packet_t* aqlPacket_; //!< AQL packet -}; - -} // namespace pal diff --git a/rocclr/device/pal/paldefs.hpp b/rocclr/device/pal/paldefs.hpp index 68c36873e4..acada8ed90 100644 --- a/rocclr/device/pal/paldefs.hpp +++ b/rocclr/device/pal/paldefs.hpp @@ -41,22 +41,6 @@ enum PalGpuMemoryType { PAL_TEXTURE_BUFFER, ///< "buffer" texture inside VBO }; -struct HwDbgKernelInfo { - uint64_t scratchBufAddr; ///< Handle of GPU local memory for kernel private scratch space - size_t scratchBufferSizeInBytes; ///< size of memory pointed to by pScratchBuffer, - uint64_t heapBufAddr; ///< Address of the global heap base - const void* pAqlDispatchPacket; ///< Pointer to the dispatch packet - const void* pAqlQueuePtr; ///< pointer to the AQL Queue - void* trapHandler; ///< address of the trap handler (TBA) - void* trapHandlerBuffer; ///< address of the trap handler buffer (TMA) - uint32_t excpEn; ///< exception mask - bool trapPresent; ///< trap present flag - bool sqDebugMode; ///< debug mode flag (GPU single step mode) - uint32_t mgmtSe0Mask; ///< mask for SE0 (reserving CU for display) - uint32_t mgmtSe1Mask; ///< mask for SE1 (reserving CU for display) - uint32_t cacheDisableMask; ///< cache disable mask -}; - //! Engine types enum EngineType { MainEngine = 0, SdmaEngine, AllEngines }; diff --git a/rocclr/device/pal/paldevice.cpp b/rocclr/device/pal/paldevice.cpp index ab911867ae..328bf29282 100644 --- a/rocclr/device/pal/paldevice.cpp +++ b/rocclr/device/pal/paldevice.cpp @@ -31,7 +31,6 @@ #include "device/pal/palprogram.hpp" #include "device/pal/palsettings.hpp" #include "device/pal/palblit.hpp" -#include "device/pal/paldebugmanager.hpp" #include "palLib.h" #include "palPlatform.h" #include "palDevice.h" @@ -761,10 +760,6 @@ Device::Device() rgpCaptureMgr_(nullptr) {} Device::~Device() { - // remove the HW debug manager - delete hwDebugMgr_; - hwDebugMgr_ = nullptr; - if (p2p_stage_ != nullptr) { p2p_stage_->release(); p2p_stage_ = nullptr; @@ -1025,11 +1020,6 @@ bool Device::create(Pal::IDevice* device) { return false; } - // create the HW debug manager if needed - if (settings().enableHwDebug_) { - hwDebugMgr_ = new GpuDebugManager(this); - } - if ((glb_ctx_ == nullptr) && (gNumDevices > 1) && (device == gDeviceList[gNumDevices - 1])) { std::vector devices; uint32_t numDevices = amd::Device::numDevices(CL_DEVICE_TYPE_GPU, true); @@ -2511,17 +2501,6 @@ void Device::SrdManager::fillResourceList(VirtualGPU& gpu) { } } -int32_t Device::hwDebugManagerInit(amd::Context* context, uintptr_t messageStorage) { - int32_t status = hwDebugMgr_->registerDebugger(context, messageStorage); - - if (CL_SUCCESS != status) { - delete hwDebugMgr_; - hwDebugMgr_ = nullptr; - } - - return status; -} - bool Device::SetClockMode(const cl_set_device_clock_mode_input_amd setClockModeInput, cl_set_device_clock_mode_output_amd* pSetClockModeOutput) { Pal::SetClockModeInput setClockMode = {}; diff --git a/rocclr/device/pal/paldevice.hpp b/rocclr/device/pal/paldevice.hpp index aaae295fea..47ceb0cb57 100644 --- a/rocclr/device/pal/paldevice.hpp +++ b/rocclr/device/pal/paldevice.hpp @@ -533,9 +533,6 @@ class Device : public NullDevice { //! Returns SRD manger object SrdManager& srds() const { return *srdManager_; } - //! Initial the Hardware Debug Manager - int32_t hwDebugManagerInit(amd::Context* context, uintptr_t messageStorage); - //! Returns PAL device properties const Pal::DeviceProperties& properties() const { return properties_; } diff --git a/rocclr/device/pal/palprogram.cpp b/rocclr/device/pal/palprogram.cpp index 4b3c75d881..cbd517eea8 100644 --- a/rocclr/device/pal/palprogram.cpp +++ b/rocclr/device/pal/palprogram.cpp @@ -88,7 +88,7 @@ bool Segment::alloc(HSAILProgram& prog, amdgpu_hsa_elf_segment_t segment, size_t } if (!amd_mem_obj->create(nullptr)) { - LogError("[OCL] failed to create a svm hidden buffer!"); + LogError("[OCL] fisThreadAliveailed to create a svm hidden buffer!"); amd_mem_obj->release(); return false; } diff --git a/rocclr/device/pal/palresource.cpp b/rocclr/device/pal/palresource.cpp index 00a2a017f4..aa239f934e 100644 --- a/rocclr/device/pal/palresource.cpp +++ b/rocclr/device/pal/palresource.cpp @@ -1157,17 +1157,14 @@ bool Resource::create(MemoryType memType, CreateParams* params, bool forceLinear } else { memType = Local; } - // force to use remote memory for HW DEBUG or use - // local memory once we determine if FGS is supported - // memType = (!dev().settings().enableHwDebug_) ? Local : RemoteUSWC; } // Get the element size elementSize_ = Pal::Formats::BytesPerPixel(format); desc_.type_ = memType; if (memType == Scratch) { - // use local memory for scratch buffer unless it is using HW DEBUG - desc_.type_ = (!dev().settings().enableHwDebug_) ? Local : RemoteUSWC; + // use local memory for scratch buffer + desc_.type_ = Local; desc_.scratch_ = true; } diff --git a/rocclr/device/pal/palsettings.cpp b/rocclr/device/pal/palsettings.cpp index dd1ab9700b..f5ad8a1987 100644 --- a/rocclr/device/pal/palsettings.cpp +++ b/rocclr/device/pal/palsettings.cpp @@ -509,11 +509,6 @@ bool Settings::create(const Pal::DeviceProperties& palProp, enableExtension(ClKhrMipMapImageWrites); } - // Enable HW debug - if (GPU_ENABLE_HW_DEBUG) { - enableHwDebug_ = true; - } - #if defined(_WIN32) enableExtension(ClAmdPlanarYuv); #endif diff --git a/rocclr/device/pal/paltrap.hpp b/rocclr/device/pal/paltrap.hpp deleted file mode 100644 index 783ec5fe4d..0000000000 --- a/rocclr/device/pal/paltrap.hpp +++ /dev/null @@ -1,159 +0,0 @@ -/* Copyright (c) 2015 - 2021 Advanced Micro Devices, Inc. - - Permission is hereby granted, free of charge, to any person obtaining a copy - of this software and associated documentation files (the "Software"), to deal - in the Software without restriction, including without limitation the rights - to use, copy, modify, merge, publish, distribute, sublicense, and/or sell - copies of the Software, and to permit persons to whom the Software is - furnished to do so, subject to the following conditions: - - The above copyright notice and this permission notice shall be included in - all copies or substantial portions of the Software. - - THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN - THE SOFTWARE. */ - -/******************************************************************************* - * The source of the runtime trap handler, "runtimetraphandler.sp3". - * The binary is created by the SP3 tool with the following command: - * - * sp3.exe runtimetraphandler.sp3 -hex runtimeTrapCode.hex - * - ******************************************************************************* - -shader main - asic(TAHITI) // for SI/CI or asic(VI) for VI - type(CS) - - // clear wave exception state - v_clrexcp - s_waitcnt 0 - //========================================================================== - // Handle the workaround for HW bug that causes the incorrect TMA value. - // Retrieve the TMA values, which are stored at TBA buffer at location - // 256 (0x100). - - // Construct the memory descriptor with TBA as the start address - // we are using the registers ttmp[8:11] for that. - s_mov_b32 ttmp8, tba_lo - s_and_b32 ttmp9, tba_hi, 0xffff - - // 0x100=256 bytes, which is the size of the buffer to - // store all the level 2 trap handler info - s_or_b32 ttmp9, ttmp9, 0x01000000 - s_mov_b32 ttmp10, 0x00002000 - s_mov_b32 ttmp11, 0x00024fac - - // TMA is stored 256 (0x100) bytes before the TBA value - s_sub_u32 ttmp8, ttmp8, 0x100 - - // Backup the s0 since ttmp registers cannot be target of - // buffer read instruction - s_mov_b32 ttmp7, s0 - s_buffer_load_dword s0, ttmp8, 0x0 // VI: offset=0x0 (bytes) - s_waitcnt 0 - s_mov_b32 tma_lo, s0 - s_buffer_load_dword s0, ttmp8, 0x1 // VI: offset=0x4 (bytes) - s_waitcnt 0 - s_mov_b32 tma_hi, s0 - s_mov_b32 s0, ttmp7 - - //=================================================== - // setup the mmeory descriptor for TMA - s_mov_b32 ttmp6, 0x18 - s_add_u32 ttmp8, tma_lo, ttmp6 - s_and_b32 ttmp9, tma_hi, 0xffff - //0x68=104 bytes, which is the size of the buffer to - //store all the level2 trap handler info - s_or_b32 ttmp9, ttmp9, 0x00680000 - s_mov_b32 ttmp10, 0x00002000 - s_mov_b32 ttmp11, 0x00024fac - - //=================================================== - // backup the TMA values to be restored later - // level-one TMA saved in the ttmp6,ttmp7 - s_mov_b32 ttmp6, tma_lo - s_mov_b32 ttmp7, tma_hi - - //=================================================== - // setup the TMA for the level-two trap handler - // level-two TMA saved in tma_hi, tma_lo - s_mov_b32 ttmp3, s0 - s_buffer_load_dword s0, ttmp8, 0x2 // VI: offset=0x8 (bytes) - s_waitcnt 0x0000 - s_mov_b32 tma_lo, s0 - - s_buffer_load_dword s0, ttmp8, 0x3 // VI: offset=0xc (bytes) - s_waitcnt 0x0000 - s_mov_b32 tma_hi, s0 - - //=================================================== - // setup the TBA for the level-two trap handler - // level-two TBA saved in ttmp9, ttmp8 - s_buffer_load_dword s0, ttmp8, 0x0 // VI: offset=0x0 (bytes) - s_waitcnt 0x0000 - s_mov_b32 ttmp2, s0 - - s_buffer_load_dword s0, ttmp8, 0x1 // VI: offset=0x4 (bytes) - s_waitcnt 0x0000 - - //swap the values of s0 and ttmp3 without using other registers - s_xor_b32 ttmp3, s0, ttmp3 - s_xor_b32 s0, s0, ttmp3 - s_xor_b32 ttmp3, s0, ttmp3 - - //store the debug trap handler start address in ttmp8,9 - s_mov_b32 ttmp8, ttmp2 - s_mov_b32 ttmp9, ttmp3 - - //=================================================== - // get the pc value to resume execution - s_getpc_b64 [ttmp2, ttmp3] - s_add_u32 ttmp2, ttmp2, 0x8 - - //=================================================== - //set the pc value to jump to the debug trap handler - s_setpc_b64 [ttmp8, ttmp9] - - //=================================================== - // restore the tamp values - s_mov_b32 tma_hi, ttmp7 - s_mov_b32 tma_lo, ttmp6 - - label_return: - //=================================================== - // return from the trap handler to the saved PC - s_and_b32 ttmp1, ttmp1, 0xffff - s_rfe_b64 [ttmp0,ttmp1] - -end - -*******************************************************************************/ - -/// shader codes with "asic(TAHITI)" instruction -static constexpr uint32_t RuntimeTrapCode[] = { - 0x7e008200, 0xbf8c0000, 0xbef8036c, 0x8779ff6d, 0x0000ffff, 0x8879ff79, 0x01000000, 0xbefa03ff, - 0x00002000, 0xbefb03ff, 0x00024fac, 0x80f8ff78, 0x00000100, 0xbef70300, 0xc2007900, 0xbf8c0000, - 0xbeee0300, 0xc2007901, 0xbf8c0000, 0xbeef0300, 0xbe800377, 0xbef60398, 0x8078766e, 0x8779ff6f, - 0x0000ffff, 0x8879ff79, 0x00680000, 0xbefa03ff, 0x00002000, 0xbefb03ff, 0x00024fac, 0xbef6036e, - 0xbef7036f, 0xbef30300, 0xc2007902, 0xbf8c0000, 0xbeee0300, 0xc2007903, 0xbf8c0000, 0xbeef0300, - 0xc2007900, 0xbf8c0000, 0xbef20300, 0xc2007901, 0xbf8c0000, 0x89737300, 0x89007300, 0x89737300, - 0xbef80372, 0xbef90373, 0xbef21f00, 0x80728872, 0xbe802078, 0xbeef0377, 0xbeee0376, 0x8771ff71, - 0x0000ffff, 0xbe802270}; - - -/// shader codes with "asic(VI)" instruction -static constexpr uint32_t RuntimeTrapCodeVi[] = { - 0x7e006a00, 0xbf8c0000, 0xbef8006c, 0x8679ff6d, 0x0000ffff, 0x8779ff79, 0x01000000, 0xbefa00ff, - 0x00002000, 0xbefb00ff, 0x00024fac, 0x80f8ff78, 0x00000100, 0xbef70000, 0xc022003c, 0x00000000, - 0xbf8c0000, 0xbeee0000, 0xc022003c, 0x00000004, 0xbf8c0000, 0xbeef0000, 0xbe800077, 0xbef60098, - 0x8078766e, 0x8679ff6f, 0x0000ffff, 0x8779ff79, 0x00680000, 0xbefa00ff, 0x00002000, 0xbefb00ff, - 0x00024fac, 0xbef6006e, 0xbef7006f, 0xbef30000, 0xc022003c, 0x00000008, 0xbf8c0000, 0xbeee0000, - 0xc022003c, 0x0000000c, 0xbf8c0000, 0xbeef0000, 0xc022003c, 0x00000000, 0xbf8c0000, 0xbef20000, - 0xc022003c, 0x00000004, 0xbf8c0000, 0x88737300, 0x88007300, 0x88737300, 0xbef80072, 0xbef90073, - 0xbef21c00, 0x80728872, 0xbe801d78, 0xbeef0077, 0xbeee0076, 0x8671ff71, 0x0000ffff, 0xbe801f70}; diff --git a/rocclr/device/pal/palvirtual.cpp b/rocclr/device/pal/palvirtual.cpp index dc68a82dca..9c917681d1 100644 --- a/rocclr/device/pal/palvirtual.cpp +++ b/rocclr/device/pal/palvirtual.cpp @@ -30,7 +30,6 @@ #include "device/pal/palthreadtrace.hpp" #include "device/pal/paltimestamp.hpp" #include "device/pal/palblit.hpp" -#include "device/pal/paldebugger.hpp" #include "device/appprofile.hpp" #include "device/devhostcall.hpp" #include "hsa.h" @@ -1024,11 +1023,6 @@ bool VirtualGPU::create(bool profiling, uint deviceQueueSize, uint rtCUs, // Fall through ... case Settings::BlitEngineCAL: case Settings::BlitEngineKernel: - // use host blit for HW debug - if (dev().settings().enableHwDebug_) { - blitSetup.disableCopyImageToBuffer_ = true; - blitSetup.disableCopyBufferToImage_ = true; - } blitMgr_ = new KernelBlitManager(*this, blitSetup); break; } @@ -3719,141 +3713,6 @@ void VirtualGPU::writeVQueueHeader(VirtualGPU& hostQ, const Memory* kernelTable) virtualQueue_->writeRawData(hostQ, 0, sizeof(AmdVQueueHeader), vqHeader_, true); } -void VirtualGPU::buildKernelInfo(const HSAILKernel& hsaKernel, hsa_kernel_dispatch_packet_t* aqlPkt, - HwDbgKernelInfo& kernelInfo, amd::Event* enqueueEvent) { -#if defined(WITH_COMPILER_LIB) - amd::HwDebugManager* dbgManager = dev().hwDebugMgr(); - assert(dbgManager && "No HW Debug Manager!"); - - // Initialize structure with default values - - if (hsaKernel.prog().maxScratchRegs() > 0) { - pal::Memory* scratchBuf = dev().scratch(hwRing())->memObj_; - kernelInfo.scratchBufAddr = scratchBuf->vmAddress(); - kernelInfo.scratchBufferSizeInBytes = scratchBuf->size(); - - // Get the address of the scratch buffer and its size for CPU access - address scratchRingAddr = static_cast
(scratchBuf->map(nullptr, 0)); - dbgManager->setScratchRing(scratchRingAddr, scratchBuf->size()); - scratchBuf->unmap(nullptr); - } else { - kernelInfo.scratchBufAddr = 0; - kernelInfo.scratchBufferSizeInBytes = 0; - dbgManager->setScratchRing(nullptr, 0); - } - - //! @todo: need to verify what is wanted for the global memory - Unimplemented(); - kernelInfo.heapBufAddr = 0; - - kernelInfo.pAqlDispatchPacket = aqlPkt; - kernelInfo.pAqlQueuePtr = reinterpret_cast(hsaQueueMem_->vmAddress()); - - // Get the address of the kernel code and its size for CPU access - /* pal::Memory* aqlCode = hsaKernel.gpuAqlCode(); - if (nullptr != aqlCode) { - address aqlCodeAddr = static_cast
(aqlCode->map(nullptr, 0)); - dbgManager->setKernelCodeInfo(aqlCodeAddr, hsaKernel.aqlCodeSize()); - aqlCode->unmap(nullptr); - } - else { - dbgManager->setKernelCodeInfo(nullptr, 0); - } - */ - kernelInfo.trapPresent = false; - kernelInfo.trapHandler = nullptr; - kernelInfo.trapHandlerBuffer = nullptr; - - kernelInfo.excpEn = 0; - kernelInfo.cacheDisableMask = 0; - kernelInfo.sqDebugMode = 0; - - kernelInfo.mgmtSe0Mask = 0xFFFFFFFF; - kernelInfo.mgmtSe1Mask = 0xFFFFFFFF; - - // set kernel info for HW debug and call the callback function - if (nullptr != dbgManager->preDispatchCallBackFunc()) { - DebugToolInfo dbgSetting = {0}; - dbgSetting.scratchAddress_ = kernelInfo.scratchBufAddr; - dbgSetting.scratchSize_ = kernelInfo.scratchBufferSizeInBytes; - dbgSetting.globalAddress_ = kernelInfo.heapBufAddr; - dbgSetting.aclBinary_ = hsaKernel.prog().binaryElf(); - dbgSetting.event_ = enqueueEvent; - - // Execute the pre-dispatch call back function - dbgManager->executePreDispatchCallBack(reinterpret_cast(aqlPkt), &dbgSetting); - - // assign the debug TMA and TBA for kernel dispatch - if (nullptr != dbgSetting.trapHandler_ && nullptr != dbgSetting.trapBuffer_) { - assignDebugTrapHandler(dbgSetting, kernelInfo); - } - - kernelInfo.trapPresent = (kernelInfo.trapHandler) ? true : false; - - // Exception policy - kernelInfo.excpEn = dbgSetting.exceptionMask_; - kernelInfo.cacheDisableMask = dbgSetting.cacheDisableMask_; - kernelInfo.sqDebugMode = dbgSetting.gpuSingleStepMode_; - - // Compute the mask for reserved CUs. These two dwords correspond to - // two registers used for reserving CUs for display. In the current - // implementation, the number of CUs reserved can be 0 to 7, and it - // is set by debugger users. - if (dbgSetting.monitorMode_) { - uint32_t i = dbgSetting.reservedCuNum_ / 2; - kernelInfo.mgmtSe0Mask <<= i; - i = dbgSetting.reservedCuNum_ - i; - kernelInfo.mgmtSe1Mask <<= i; - } - Unimplemented(); - /* - // flush/invalidate the instruction, data, L1 and L2 caches - InvalidateSqCaches(); - */ - } -#endif -} - -void VirtualGPU::assignDebugTrapHandler(const DebugToolInfo& dbgSetting, - HwDbgKernelInfo& kernelInfo) { - // setup the runtime trap handler code and trap buffer to be assigned before kernel dispatching - // - Memory* rtTrapHandlerMem = static_cast(dev().hwDebugMgr()->runtimeTBA()); - Memory* rtTrapBufferMem = static_cast(dev().hwDebugMgr()->runtimeTMA()); - - kernelInfo.trapHandler = reinterpret_cast(rtTrapHandlerMem->vmAddress() + TbaStartOffset); - // With the TMA corruption hw bug workaround, the trap handler buffer can be set to zero. - // However, by setting the runtime trap buffer (TMA) correct, the runtime trap hander - // without the workaround can still function correctly. - kernelInfo.trapHandlerBuffer = reinterpret_cast(rtTrapBufferMem->vmAddress()); - - address rtTrapBufferAddress = static_cast
(rtTrapBufferMem->map(this)); - - Memory* trapHandlerMem = dev().getGpuMemory(dbgSetting.trapHandler_); - Memory* trapBufferMem = dev().getGpuMemory(dbgSetting.trapBuffer_); - - // Address of the trap handler code/buffer should be 256-byte aligned - uint64_t tbaAddress = trapHandlerMem->vmAddress(); - uint64_t tmaAddress = trapBufferMem->vmAddress(); - if ((tbaAddress & 0xFF) != 0 || (tmaAddress & 0xFF) != 0) { - assert(false && "Trap handler/buffer is not 256-byte aligned"); - } - - // The addresses of the debug trap handler code (TBA) and buffer (TMA) are - // stored in the runtime trap handler buffer with offset location of 0x18-19 - // and 0x20-21, respectively. - uint64_t* rtTmaPtr = reinterpret_cast(rtTrapBufferAddress + 0x18); - rtTmaPtr[0] = tbaAddress; - rtTmaPtr[1] = tmaAddress; - - rtTrapBufferMem->unmap(nullptr); - // Add GPU mem handles to the memory list for VidMM - addVmMemory(trapHandlerMem); - addVmMemory(trapBufferMem); - addVmMemory(rtTrapHandlerMem); - addVmMemory(rtTrapBufferMem); -} - bool VirtualGPU::validateSdmaOverlap(const Resource& src, const Resource& dst) { uint64_t srcVmEnd = src.vmAddress() + src.vmSize(); if (((src.vmAddress() >= sdmaRange_.start_) && (src.vmAddress() <= sdmaRange_.end_)) || diff --git a/rocclr/device/pal/palvirtual.hpp b/rocclr/device/pal/palvirtual.hpp index 5bfad81e39..7a3b562d8f 100644 --- a/rocclr/device/pal/palvirtual.hpp +++ b/rocclr/device/pal/palvirtual.hpp @@ -26,7 +26,6 @@ #include "device/pal/palprintf.hpp" #include "device/pal/paltimestamp.hpp" #include "device/pal/palsched.hpp" -#include "device/pal/paldebugger.hpp" #include "device/pal/palgpuopen.hpp" #include "platform/commandqueue.hpp" #include "device/blit.hpp" @@ -634,16 +633,6 @@ class VirtualGPU : public device::VirtualDevice { const amd::BufferRect& dstRect //!< region of destination for copy ); - void buildKernelInfo(const HSAILKernel& hsaKernel, //!< hsa kernel - hsa_kernel_dispatch_packet_t* aqlPkt, //!< aql packet for dispatch - HwDbgKernelInfo& kernelInfo, //!< kernel info for the dispatch - amd::Event* enqueueEvent //!< Event provided in the enqueue kernel command - ); - - void assignDebugTrapHandler(const DebugToolInfo& dbgSetting, //!< debug settings - HwDbgKernelInfo& kernelInfo //!< kernel info for the dispatch - ); - void PrintChildren(const HSAILKernel& hsaKernel, //!< The parent HSAIL kernel VirtualGPU* gpuDefQueue //!< Device queue for children execution ); diff --git a/rocclr/utils/flags.hpp b/rocclr/utils/flags.hpp index 66ae1a0b27..901d078d72 100644 --- a/rocclr/utils/flags.hpp +++ b/rocclr/utils/flags.hpp @@ -154,8 +154,6 @@ release(uint, OCL_SET_SVM_SIZE, 4*16384, \ "set SVM space size for discrete GPU") \ debug(uint, OCL_SYSMEM_REQUIREMENT, 2, \ "Use flag to change the minimum requirement of system memory not to downgrade") \ -debug(bool, GPU_ENABLE_HW_DEBUG, false, \ - "Enable HW DEBUG for GPU") \ release(uint, GPU_WAVES_PER_SIMD, 0, \ "Force the number of waves per SIMD (1-10)") \ release(bool, GPU_WAVE_LIMIT_ENABLE, false, \