P4 to Git Change 1114420 by wchau@wchau_WINDOWS7_OCL on 2015/01/22 16:45:46
ECR #399840 - OpenCL Runtime HW Debug support development - set aclBinary & event in the pre-dispatch callback function
Affected files ...
... //depot/stg/opencl/drivers/opencl/runtime/device/gpu/gpudebugger.hpp#4 edit
... //depot/stg/opencl/drivers/opencl/runtime/device/gpu/gpudebugmanager.cpp#4 edit
... //depot/stg/opencl/drivers/opencl/runtime/device/gpu/gpuvirtual.cpp#347 edit
... //depot/stg/opencl/drivers/opencl/runtime/device/gpu/gpuvirtual.hpp#125 edit
[ROCm/clr commit: fe098712df]
Этот коммит содержится в:
@@ -18,6 +18,7 @@
|
||||
#include "sc-hsa/Interface/SCHSAInterface.h"
|
||||
#include "device/device.hpp"
|
||||
#include "device/hwdebug.hpp"
|
||||
#include "acl.h"
|
||||
|
||||
static const int NumberReserveVgprs = 4;
|
||||
|
||||
@@ -93,6 +94,8 @@ struct DebugToolInfo
|
||||
amd::Memory* trapHandler_; //! Trap handler address
|
||||
amd::Memory* trapBuffer_; //! Trap buffer address
|
||||
bool sqPerfcounterEnable_; //! whether SQ perf counters are enabled
|
||||
aclBinary* aclBinary_; //! pointer of the kernel ACL binary
|
||||
amd::Event* event_; //! pointer of the kernel event in the enqueue command
|
||||
};
|
||||
|
||||
/*! \brief Message used by the KFD wave control for CI
|
||||
|
||||
@@ -85,6 +85,9 @@ GpuDebugManager::executePreDispatchCallBack(void* aqlPacket,
|
||||
// // for invalidate cache (BuildEndOfKernelNotifyCommands)
|
||||
// aqlPacket->release_fence_scope = 2;
|
||||
|
||||
aclBinary_ = reinterpret_cast<void*>(info->aclBinary_);
|
||||
oclEventHandle_ = reinterpret_cast<void*>(as_cl(info->event_));
|
||||
|
||||
cl_device_id clDeviceId = as_cl(device_);
|
||||
preDispatchCallBackFunc_(clDeviceId,
|
||||
oclEventHandle_,
|
||||
|
||||
@@ -1627,7 +1627,8 @@ VirtualGPU::submitKernel(amd::NDRangeKernelCommand& vcmd)
|
||||
profilingBegin(vcmd);
|
||||
|
||||
// Submit kernel to HW
|
||||
if (!submitKernelInternal(vcmd.sizes(), vcmd.kernel(), vcmd.parameters(), false)) {
|
||||
if (!submitKernelInternal(vcmd.sizes(), vcmd.kernel(), vcmd.parameters(), false,
|
||||
&vcmd.event())) {
|
||||
vcmd.setStatus(CL_INVALID_OPERATION);
|
||||
}
|
||||
|
||||
@@ -1639,7 +1640,8 @@ VirtualGPU::submitKernelInternalHSA(
|
||||
const amd::NDRangeContainer& sizes,
|
||||
const amd::Kernel& kernel,
|
||||
const_address parameters,
|
||||
bool nativeMem)
|
||||
bool nativeMem,
|
||||
amd::Event* enqueueEvent)
|
||||
{
|
||||
uint64_t vmParentWrap = 0;
|
||||
uint64_t vmDefQueue = 0;
|
||||
@@ -1766,7 +1768,7 @@ VirtualGPU::submitKernelInternalHSA(
|
||||
HwDbgKernelInfo *pKernelInfo = NULL;
|
||||
|
||||
if (useHwDebug_) {
|
||||
buildKernelInfo(hsaKernel, aqlPkt, kernelInfo);
|
||||
buildKernelInfo(hsaKernel, aqlPkt, kernelInfo, enqueueEvent);
|
||||
pKernelInfo = &kernelInfo;
|
||||
}
|
||||
|
||||
@@ -1982,7 +1984,8 @@ VirtualGPU::submitKernelInternal(
|
||||
const amd::NDRangeContainer& sizes,
|
||||
const amd::Kernel& kernel,
|
||||
const_address parameters,
|
||||
bool nativeMem)
|
||||
bool nativeMem,
|
||||
amd::Event* enqueueEvent)
|
||||
{
|
||||
bool result = true;
|
||||
uint i;
|
||||
@@ -1999,7 +2002,7 @@ VirtualGPU::submitKernelInternal(
|
||||
Kernel& gpuKernelOpt = static_cast<gpu::Kernel&>(*devKernel);
|
||||
|
||||
if (gpuKernelOpt.hsa()) {
|
||||
return submitKernelInternalHSA(sizes, kernel, parameters, nativeMem);
|
||||
return submitKernelInternalHSA(sizes, kernel, parameters, nativeMem, enqueueEvent);
|
||||
}
|
||||
else if (state_.hsailKernel_) {
|
||||
// Reload GSL state to HW, so runtime could run AMDIL kernel
|
||||
@@ -3458,7 +3461,8 @@ VirtualGPU::flushCuCaches(HwDbgGpuCacheMask cache_mask)
|
||||
void
|
||||
VirtualGPU::buildKernelInfo(const HSAILKernel& hsaKernel,
|
||||
hsa_kernel_dispatch_packet_t* aqlPkt,
|
||||
HwDbgKernelInfo& kernelInfo)
|
||||
HwDbgKernelInfo& kernelInfo,
|
||||
amd::Event* enqueueEvent)
|
||||
{
|
||||
amd::HwDebugManager * dbgManager = dev().hwDebugMgr();
|
||||
assert (dbgManager && "No HW Debug Manager!");
|
||||
@@ -3517,6 +3521,8 @@ VirtualGPU::buildKernelInfo(const HSAILKernel& hsaKernel,
|
||||
dbgSetting.scratchAddress_ = kernelInfo.scratchBufAddr;
|
||||
dbgSetting.scratchSize_ = kernelInfo.scratchBufferSizeInBytes;
|
||||
dbgSetting.globalAddress_ = kernelInfo.heapBufAddr;
|
||||
dbgSetting.aclBinary_ = hsaKernel.prog().binaryElf();
|
||||
dbgSetting.event_ = enqueueEvent;
|
||||
|
||||
// Call the predispatch callback function & set the trap info
|
||||
AqlCodeInfo aqlCodeInfo;
|
||||
|
||||
@@ -223,13 +223,15 @@ public:
|
||||
const amd::NDRangeContainer& sizes, //!< Workload sizes
|
||||
const amd::Kernel& kernel, //!< Kernel for execution
|
||||
const_address parameters, //!< Parameters for the kernel
|
||||
bool nativeMem = true //!< Native memory objects
|
||||
bool nativeMem = true, //!< Native memory objects
|
||||
amd::Event* enqueueEvent = NULL //!< Event provided in the enqueue kernel command
|
||||
);
|
||||
bool submitKernelInternalHSA(
|
||||
const amd::NDRangeContainer& sizes, //!< Workload sizes
|
||||
const amd::Kernel& kernel, //!< Kernel for execution
|
||||
const_address parameters, //!< Parameters for the kernel
|
||||
bool nativeMem = true //!< Native memory objects
|
||||
bool nativeMem = true, //!< Native memory objects
|
||||
amd::Event* enqueueEvent = NULL //!< Event provided in the enqueue kernel command
|
||||
);
|
||||
void submitNativeFn(amd::NativeFnCommand& vcmd);
|
||||
void submitFillMemory(amd::FillMemoryCommand& vcmd);
|
||||
@@ -505,7 +507,8 @@ private:
|
||||
void buildKernelInfo(
|
||||
const HSAILKernel& hsaKernel, //!< hsa kernel
|
||||
hsa_kernel_dispatch_packet_t* aqlPkt, //!< aql packet for dispatch
|
||||
HwDbgKernelInfo& kernelInfo //!< kernel info for the dispatch
|
||||
HwDbgKernelInfo& kernelInfo, //!< kernel info for the dispatch
|
||||
amd::Event* enqueueEvent //!< Event provided in the enqueue kernel command
|
||||
);
|
||||
|
||||
void assignTrapHandler(
|
||||
|
||||
Ссылка в новой задаче
Block a user