P4 to Git Change 1114420 by wchau@wchau_WINDOWS7_OCL on 2015/01/22 16:45:46

ECR #399840 - OpenCL Runtime HW Debug support development - set aclBinary & event in the pre-dispatch callback function

Affected files ...

... //depot/stg/opencl/drivers/opencl/runtime/device/gpu/gpudebugger.hpp#4 edit
... //depot/stg/opencl/drivers/opencl/runtime/device/gpu/gpudebugmanager.cpp#4 edit
... //depot/stg/opencl/drivers/opencl/runtime/device/gpu/gpuvirtual.cpp#347 edit
... //depot/stg/opencl/drivers/opencl/runtime/device/gpu/gpuvirtual.hpp#125 edit
Cette révision appartient à :
foreman
2015-01-22 17:05:58 -05:00
Parent 9049f11ef4
révision fe098712df
4 fichiers modifiés avec 24 ajouts et 9 suppressions
+3
Voir le fichier
@@ -18,6 +18,7 @@
#include "sc-hsa/Interface/SCHSAInterface.h"
#include "device/device.hpp"
#include "device/hwdebug.hpp"
#include "acl.h"
static const int NumberReserveVgprs = 4;
@@ -93,6 +94,8 @@ struct DebugToolInfo
amd::Memory* trapHandler_; //! Trap handler address
amd::Memory* trapBuffer_; //! Trap buffer address
bool sqPerfcounterEnable_; //! whether SQ perf counters are enabled
aclBinary* aclBinary_; //! pointer of the kernel ACL binary
amd::Event* event_; //! pointer of the kernel event in the enqueue command
};
/*! \brief Message used by the KFD wave control for CI
+3
Voir le fichier
@@ -85,6 +85,9 @@ GpuDebugManager::executePreDispatchCallBack(void* aqlPacket,
// // for invalidate cache (BuildEndOfKernelNotifyCommands)
// aqlPacket->release_fence_scope = 2;
aclBinary_ = reinterpret_cast<void*>(info->aclBinary_);
oclEventHandle_ = reinterpret_cast<void*>(as_cl(info->event_));
cl_device_id clDeviceId = as_cl(device_);
preDispatchCallBackFunc_(clDeviceId,
oclEventHandle_,
+12 -6
Voir le fichier
@@ -1627,7 +1627,8 @@ VirtualGPU::submitKernel(amd::NDRangeKernelCommand& vcmd)
profilingBegin(vcmd);
// Submit kernel to HW
if (!submitKernelInternal(vcmd.sizes(), vcmd.kernel(), vcmd.parameters(), false)) {
if (!submitKernelInternal(vcmd.sizes(), vcmd.kernel(), vcmd.parameters(), false,
&vcmd.event())) {
vcmd.setStatus(CL_INVALID_OPERATION);
}
@@ -1639,7 +1640,8 @@ VirtualGPU::submitKernelInternalHSA(
const amd::NDRangeContainer& sizes,
const amd::Kernel& kernel,
const_address parameters,
bool nativeMem)
bool nativeMem,
amd::Event* enqueueEvent)
{
uint64_t vmParentWrap = 0;
uint64_t vmDefQueue = 0;
@@ -1766,7 +1768,7 @@ VirtualGPU::submitKernelInternalHSA(
HwDbgKernelInfo *pKernelInfo = NULL;
if (useHwDebug_) {
buildKernelInfo(hsaKernel, aqlPkt, kernelInfo);
buildKernelInfo(hsaKernel, aqlPkt, kernelInfo, enqueueEvent);
pKernelInfo = &kernelInfo;
}
@@ -1982,7 +1984,8 @@ VirtualGPU::submitKernelInternal(
const amd::NDRangeContainer& sizes,
const amd::Kernel& kernel,
const_address parameters,
bool nativeMem)
bool nativeMem,
amd::Event* enqueueEvent)
{
bool result = true;
uint i;
@@ -1999,7 +2002,7 @@ VirtualGPU::submitKernelInternal(
Kernel& gpuKernelOpt = static_cast<gpu::Kernel&>(*devKernel);
if (gpuKernelOpt.hsa()) {
return submitKernelInternalHSA(sizes, kernel, parameters, nativeMem);
return submitKernelInternalHSA(sizes, kernel, parameters, nativeMem, enqueueEvent);
}
else if (state_.hsailKernel_) {
// Reload GSL state to HW, so runtime could run AMDIL kernel
@@ -3458,7 +3461,8 @@ VirtualGPU::flushCuCaches(HwDbgGpuCacheMask cache_mask)
void
VirtualGPU::buildKernelInfo(const HSAILKernel& hsaKernel,
hsa_kernel_dispatch_packet_t* aqlPkt,
HwDbgKernelInfo& kernelInfo)
HwDbgKernelInfo& kernelInfo,
amd::Event* enqueueEvent)
{
amd::HwDebugManager * dbgManager = dev().hwDebugMgr();
assert (dbgManager && "No HW Debug Manager!");
@@ -3517,6 +3521,8 @@ VirtualGPU::buildKernelInfo(const HSAILKernel& hsaKernel,
dbgSetting.scratchAddress_ = kernelInfo.scratchBufAddr;
dbgSetting.scratchSize_ = kernelInfo.scratchBufferSizeInBytes;
dbgSetting.globalAddress_ = kernelInfo.heapBufAddr;
dbgSetting.aclBinary_ = hsaKernel.prog().binaryElf();
dbgSetting.event_ = enqueueEvent;
// Call the predispatch callback function & set the trap info
AqlCodeInfo aqlCodeInfo;
+6 -3
Voir le fichier
@@ -223,13 +223,15 @@ public:
const amd::NDRangeContainer& sizes, //!< Workload sizes
const amd::Kernel& kernel, //!< Kernel for execution
const_address parameters, //!< Parameters for the kernel
bool nativeMem = true //!< Native memory objects
bool nativeMem = true, //!< Native memory objects
amd::Event* enqueueEvent = NULL //!< Event provided in the enqueue kernel command
);
bool submitKernelInternalHSA(
const amd::NDRangeContainer& sizes, //!< Workload sizes
const amd::Kernel& kernel, //!< Kernel for execution
const_address parameters, //!< Parameters for the kernel
bool nativeMem = true //!< Native memory objects
bool nativeMem = true, //!< Native memory objects
amd::Event* enqueueEvent = NULL //!< Event provided in the enqueue kernel command
);
void submitNativeFn(amd::NativeFnCommand& vcmd);
void submitFillMemory(amd::FillMemoryCommand& vcmd);
@@ -505,7 +507,8 @@ private:
void buildKernelInfo(
const HSAILKernel& hsaKernel, //!< hsa kernel
hsa_kernel_dispatch_packet_t* aqlPkt, //!< aql packet for dispatch
HwDbgKernelInfo& kernelInfo //!< kernel info for the dispatch
HwDbgKernelInfo& kernelInfo, //!< kernel info for the dispatch
amd::Event* enqueueEvent //!< Event provided in the enqueue kernel command
);
void assignTrapHandler(