From 9df56bf9f007ce6d2bf06ff8718204b7706ee7e9 Mon Sep 17 00:00:00 2001
From: foreman
Date: Thu, 22 Jan 2015 17:05:58 -0500
Subject: [PATCH] P4 to Git Change 1114420 by wchau@wchau_WINDOWS7_OCL on
2015/01/22 16:45:46
ECR #399840 - OpenCL Runtime HW Debug support development - set aclBinary & event in the pre-dispatch callback function
Affected files ...
... //depot/stg/opencl/drivers/opencl/runtime/device/gpu/gpudebugger.hpp#4 edit
... //depot/stg/opencl/drivers/opencl/runtime/device/gpu/gpudebugmanager.cpp#4 edit
... //depot/stg/opencl/drivers/opencl/runtime/device/gpu/gpuvirtual.cpp#347 edit
... //depot/stg/opencl/drivers/opencl/runtime/device/gpu/gpuvirtual.hpp#125 edit
[ROCm/clr commit: fe098712df7bfc7dec780904bdaf8ba58b64913c]
---
.../rocclr/runtime/device/gpu/gpudebugger.hpp | 3 +++
.../runtime/device/gpu/gpudebugmanager.cpp | 3 +++
.../rocclr/runtime/device/gpu/gpuvirtual.cpp | 18 ++++++++++++------
.../rocclr/runtime/device/gpu/gpuvirtual.hpp | 9 ++++++---
4 files changed, 24 insertions(+), 9 deletions(-)
diff --git a/projects/clr/rocclr/runtime/device/gpu/gpudebugger.hpp b/projects/clr/rocclr/runtime/device/gpu/gpudebugger.hpp
index 34a78b50d0..2098808c6e 100644
--- a/projects/clr/rocclr/runtime/device/gpu/gpudebugger.hpp
+++ b/projects/clr/rocclr/runtime/device/gpu/gpudebugger.hpp
@@ -18,6 +18,7 @@
#include "sc-hsa/Interface/SCHSAInterface.h"
#include "device/device.hpp"
#include "device/hwdebug.hpp"
+#include "acl.h"
static const int NumberReserveVgprs = 4;
@@ -93,6 +94,8 @@ struct DebugToolInfo
amd::Memory* trapHandler_; //! Trap handler address
amd::Memory* trapBuffer_; //! Trap buffer address
bool sqPerfcounterEnable_; //! whether SQ perf counters are enabled
+ aclBinary* aclBinary_; //! pointer of the kernel ACL binary
+ amd::Event* event_; //! pointer of the kernel event in the enqueue command
};
/*! \brief Message used by the KFD wave control for CI
diff --git a/projects/clr/rocclr/runtime/device/gpu/gpudebugmanager.cpp b/projects/clr/rocclr/runtime/device/gpu/gpudebugmanager.cpp
index 426f58e13e..c08ffcf1ce 100644
--- a/projects/clr/rocclr/runtime/device/gpu/gpudebugmanager.cpp
+++ b/projects/clr/rocclr/runtime/device/gpu/gpudebugmanager.cpp
@@ -85,6 +85,9 @@ GpuDebugManager::executePreDispatchCallBack(void* aqlPacket,
// // for invalidate cache (BuildEndOfKernelNotifyCommands)
// aqlPacket->release_fence_scope = 2;
+ aclBinary_ = reinterpret_cast(info->aclBinary_);
+ oclEventHandle_ = reinterpret_cast(as_cl(info->event_));
+
cl_device_id clDeviceId = as_cl(device_);
preDispatchCallBackFunc_(clDeviceId,
oclEventHandle_,
diff --git a/projects/clr/rocclr/runtime/device/gpu/gpuvirtual.cpp b/projects/clr/rocclr/runtime/device/gpu/gpuvirtual.cpp
index cad8d7e4dc..f4e656413e 100644
--- a/projects/clr/rocclr/runtime/device/gpu/gpuvirtual.cpp
+++ b/projects/clr/rocclr/runtime/device/gpu/gpuvirtual.cpp
@@ -1627,7 +1627,8 @@ VirtualGPU::submitKernel(amd::NDRangeKernelCommand& vcmd)
profilingBegin(vcmd);
// Submit kernel to HW
- if (!submitKernelInternal(vcmd.sizes(), vcmd.kernel(), vcmd.parameters(), false)) {
+ if (!submitKernelInternal(vcmd.sizes(), vcmd.kernel(), vcmd.parameters(), false,
+ &vcmd.event())) {
vcmd.setStatus(CL_INVALID_OPERATION);
}
@@ -1639,7 +1640,8 @@ VirtualGPU::submitKernelInternalHSA(
const amd::NDRangeContainer& sizes,
const amd::Kernel& kernel,
const_address parameters,
- bool nativeMem)
+ bool nativeMem,
+ amd::Event* enqueueEvent)
{
uint64_t vmParentWrap = 0;
uint64_t vmDefQueue = 0;
@@ -1766,7 +1768,7 @@ VirtualGPU::submitKernelInternalHSA(
HwDbgKernelInfo *pKernelInfo = NULL;
if (useHwDebug_) {
- buildKernelInfo(hsaKernel, aqlPkt, kernelInfo);
+ buildKernelInfo(hsaKernel, aqlPkt, kernelInfo, enqueueEvent);
pKernelInfo = &kernelInfo;
}
@@ -1982,7 +1984,8 @@ VirtualGPU::submitKernelInternal(
const amd::NDRangeContainer& sizes,
const amd::Kernel& kernel,
const_address parameters,
- bool nativeMem)
+ bool nativeMem,
+ amd::Event* enqueueEvent)
{
bool result = true;
uint i;
@@ -1999,7 +2002,7 @@ VirtualGPU::submitKernelInternal(
Kernel& gpuKernelOpt = static_cast(*devKernel);
if (gpuKernelOpt.hsa()) {
- return submitKernelInternalHSA(sizes, kernel, parameters, nativeMem);
+ return submitKernelInternalHSA(sizes, kernel, parameters, nativeMem, enqueueEvent);
}
else if (state_.hsailKernel_) {
// Reload GSL state to HW, so runtime could run AMDIL kernel
@@ -3458,7 +3461,8 @@ VirtualGPU::flushCuCaches(HwDbgGpuCacheMask cache_mask)
void
VirtualGPU::buildKernelInfo(const HSAILKernel& hsaKernel,
hsa_kernel_dispatch_packet_t* aqlPkt,
- HwDbgKernelInfo& kernelInfo)
+ HwDbgKernelInfo& kernelInfo,
+ amd::Event* enqueueEvent)
{
amd::HwDebugManager * dbgManager = dev().hwDebugMgr();
assert (dbgManager && "No HW Debug Manager!");
@@ -3517,6 +3521,8 @@ VirtualGPU::buildKernelInfo(const HSAILKernel& hsaKernel,
dbgSetting.scratchAddress_ = kernelInfo.scratchBufAddr;
dbgSetting.scratchSize_ = kernelInfo.scratchBufferSizeInBytes;
dbgSetting.globalAddress_ = kernelInfo.heapBufAddr;
+ dbgSetting.aclBinary_ = hsaKernel.prog().binaryElf();
+ dbgSetting.event_ = enqueueEvent;
// Call the predispatch callback function & set the trap info
AqlCodeInfo aqlCodeInfo;
diff --git a/projects/clr/rocclr/runtime/device/gpu/gpuvirtual.hpp b/projects/clr/rocclr/runtime/device/gpu/gpuvirtual.hpp
index 5585f51823..76d156cfac 100644
--- a/projects/clr/rocclr/runtime/device/gpu/gpuvirtual.hpp
+++ b/projects/clr/rocclr/runtime/device/gpu/gpuvirtual.hpp
@@ -223,13 +223,15 @@ public:
const amd::NDRangeContainer& sizes, //!< Workload sizes
const amd::Kernel& kernel, //!< Kernel for execution
const_address parameters, //!< Parameters for the kernel
- bool nativeMem = true //!< Native memory objects
+ bool nativeMem = true, //!< Native memory objects
+ amd::Event* enqueueEvent = NULL //!< Event provided in the enqueue kernel command
);
bool submitKernelInternalHSA(
const amd::NDRangeContainer& sizes, //!< Workload sizes
const amd::Kernel& kernel, //!< Kernel for execution
const_address parameters, //!< Parameters for the kernel
- bool nativeMem = true //!< Native memory objects
+ bool nativeMem = true, //!< Native memory objects
+ amd::Event* enqueueEvent = NULL //!< Event provided in the enqueue kernel command
);
void submitNativeFn(amd::NativeFnCommand& vcmd);
void submitFillMemory(amd::FillMemoryCommand& vcmd);
@@ -505,7 +507,8 @@ private:
void buildKernelInfo(
const HSAILKernel& hsaKernel, //!< hsa kernel
hsa_kernel_dispatch_packet_t* aqlPkt, //!< aql packet for dispatch
- HwDbgKernelInfo& kernelInfo //!< kernel info for the dispatch
+ HwDbgKernelInfo& kernelInfo, //!< kernel info for the dispatch
+ amd::Event* enqueueEvent //!< Event provided in the enqueue kernel command
);
void assignTrapHandler(