From 436ca8ad4d9cd750675cbc3cee700e2598edd4c4 Mon Sep 17 00:00:00 2001
From: foreman
Date: Thu, 2 Jun 2016 14:26:40 -0400
Subject: [PATCH] P4 to Git Change 1275745 by gujin@gujin-sc-win on 2016/06/02
13:34:16
SWDEV-76911 - Set output pipeline of multi-shader compilation in the same way driver sets. Fill output shader pointers only if they will be generated based on the input shaders, otherwise NULL.
ReviewBoardURL = http://dxreview.amd.com/r/20059/
Affected files ...
... //depot/stg/sc/Src/Dev/TestEngine.cpp#512 edit
---
rocclr/runtime/device/gpu/gpukernel.cpp | 12 +++--------
rocclr/runtime/device/gpu/gpukernel.hpp | 3 +--
rocclr/runtime/device/gpu/gpuvirtual.cpp | 20 ++++---------------
.../device/gpu/gslbe/src/rt/GSLDeviceGL.cpp | 11 ----------
rocclr/runtime/platform/kernel.hpp | 11 +---------
5 files changed, 9 insertions(+), 48 deletions(-)
diff --git a/rocclr/runtime/device/gpu/gpukernel.cpp b/rocclr/runtime/device/gpu/gpukernel.cpp
index 50b5d5c827..4aa8b539ae 100644
--- a/rocclr/runtime/device/gpu/gpukernel.cpp
+++ b/rocclr/runtime/device/gpu/gpukernel.cpp
@@ -1567,7 +1567,7 @@ Kernel::loadParameters(
}
bool
-Kernel::run(VirtualGPU& gpu, GpuEvent* calEvent, bool lastRun, bool lastDoppCmd) const
+Kernel::run(VirtualGPU& gpu, GpuEvent* calEvent, bool lastRun) const
{
const VirtualGPU::CalVirtualDesc* dispatch = gpu.cal();
@@ -1576,7 +1576,7 @@ Kernel::run(VirtualGPU& gpu, GpuEvent* calEvent, bool lastRun, bool lastDoppCmd)
gpu.eventBegin(MainEngine);
gpu.rs()->Dispatch(gpu.cs(), &dispatch->gridBlock, &dispatch->partialGridBlock,
- &dispatch->gridSize, dispatch->localSize, gpu.vmMems(), dispatch->memCount_, lastDoppCmd);
+ &dispatch->gridSize, dispatch->localSize, gpu.vmMems(), dispatch->memCount_);
gpu.eventEnd(MainEngine, *calEvent);
// Unbind all resources
@@ -1889,12 +1889,6 @@ Kernel::setArgument(
copyImageConstants(gpuMem->owner()->asImage(),
reinterpret_cast(memory + arg->cbPos_));
}
-
- // Handle DOPP texture resource
- gslMemObject gslMem = gpuMem->gslResource();
- if (gslMem->getAttribs().isDOPPDesktopTexture) {
- gpu.addVmMemory(gpuMem);
- }
}
break;
case KernelArg::Sampler:
@@ -3464,7 +3458,7 @@ HSAILKernel::init(amd::hsa::loader::Symbol *sym, bool finalize)
// Copy wavefront size
workGroupInfo_.wavefrontSize_ = prog().isNull() ? 64 : dev().getAttribs().wavefrontSize;
-
+
// Find total workgroup size
if (workGroupInfo_.compileSize_[0] != 0) {
workGroupInfo_.size_ =
diff --git a/rocclr/runtime/device/gpu/gpukernel.hpp b/rocclr/runtime/device/gpu/gpukernel.hpp
index fd383d757a..bdb5455ef6 100644
--- a/rocclr/runtime/device/gpu/gpukernel.hpp
+++ b/rocclr/runtime/device/gpu/gpukernel.hpp
@@ -620,8 +620,7 @@ public:
bool run(
VirtualGPU& gpu, //!< virtual GPU device object
GpuEvent* gpuEvent, //!< Pointer to the GPU event
- bool lastRun, //!< Last run in the split execution
- bool lastDoppCmd //!< info for kernel dispatch
+ bool lastRun //!< Last run in the split execution
) const;
//! Help function to debug the kernel output
diff --git a/rocclr/runtime/device/gpu/gpuvirtual.cpp b/rocclr/runtime/device/gpu/gpuvirtual.cpp
index a5a8757b42..2ec9386fae 100644
--- a/rocclr/runtime/device/gpu/gpuvirtual.cpp
+++ b/rocclr/runtime/device/gpu/gpuvirtual.cpp
@@ -472,7 +472,7 @@ VirtualGPU::create(bool profiling, uint rtCUs, uint deviceQueueSize,
}
//!@todo This is not a generic solution and
// may have issues with > 8 queues
- idx = index() % (dev().engines().numComputeRings() +
+ idx = index() % (dev().engines().numComputeRings() +
dev().engines().numComputeRingsRT());
}
// hwRing_ should be set 0 if forced to have single scratch buffer
@@ -1839,23 +1839,11 @@ VirtualGPU::submitKernelInternalHSA(
pKernelInfo = &kernelInfo;
}
- // Set up the dispatch information
- KernelDispatchInfo dispatchInfo;
- dispatchInfo.aqlPacket = aqlPkt;
- dispatchInfo.mems = vmMems();
- dispatchInfo.numMems = cal_.memCount_;
- dispatchInfo.scratch = scratch;
- dispatchInfo.scratchOffset = scratchOffset;
- dispatchInfo.cpuAqlCode = hsaKernel.cpuAqlCode();
- dispatchInfo.hsaQueueVA = hsaQueueMem_->vmAddress();
- dispatchInfo.kernelInfo = pKernelInfo;
- dispatchInfo.wavesPerSH = hsaKernel.getWavesPerSH(this);
- dispatchInfo.lastDoppSubmission = kernel.parameters().getExecNewVcop();
-
GpuEvent gpuEvent;
// Run AQL dispatch in HW
eventBegin(MainEngine);
- cs()->AqlDispatch(&dispatchInfo);
+ cs()->AqlDispatch(aqlPkt, vmMems(), cal_.memCount_, scratch, scratchOffset,
+ hsaKernel.cpuAqlCode(), hsaQueueMem_->vmAddress(), pKernelInfo, hsaKernel.getWavesPerSH(this));
eventEnd(MainEngine, gpuEvent);
if (dbgManager && (NULL != dbgManager->postDispatchCallBackFunc())) {
@@ -2155,7 +2143,7 @@ VirtualGPU::submitKernelInternal(
}
// Execute the kernel
- if (gpuKernel.run(*this, &gpuEvent, lastRun, kernel.parameters().getExecNewVcop())) {
+ if (gpuKernel.run(*this, &gpuEvent, lastRun)) {
//! @todo A flush is necessary to make sure
// that 2 consecutive runs won't access to the same
// private/local memory. CAL has to generate cache flush
diff --git a/rocclr/runtime/device/gpu/gslbe/src/rt/GSLDeviceGL.cpp b/rocclr/runtime/device/gpu/gslbe/src/rt/GSLDeviceGL.cpp
index a8bdf336c1..ef8b14070e 100644
--- a/rocclr/runtime/device/gpu/gslbe/src/rt/GSLDeviceGL.cpp
+++ b/rocclr/runtime/device/gpu/gslbe/src/rt/GSLDeviceGL.cpp
@@ -745,17 +745,6 @@ CALGSLDevice::resGLAssociate(GLResAssociate & resData) const
attribs.cpu_address = (void*)hData->handle;
attribs.alias_subtile = hData->tilingMode;
attribs.mcaddress = hData->cardAddr;
- if (hData->isDoppDesktopTexture == GL_TRUE)
- {
- attribs.isDOPPDesktopTexture = ATIGL_TRUE;
- attribs.displayable = GSL_MOA_DISPLAYABLE_LAYOUT;
- }
- if (hData->isDoppPresentTexture == GL_TRUE)
- {
- attribs.displayable = GSL_MOA_DISPLAYABLE_LAYOUT;
- }
-
-
// VBOs are hardcoded to have a UINT8 type format
if (hRes.type == GL_RESOURCE_ATTACH_VERTEXBUFFER_AMD)
{
diff --git a/rocclr/runtime/platform/kernel.hpp b/rocclr/runtime/platform/kernel.hpp
index 86293b3135..cf679e83bf 100644
--- a/rocclr/runtime/platform/kernel.hpp
+++ b/rocclr/runtime/platform/kernel.hpp
@@ -83,13 +83,11 @@ private:
std::vector execSvmPtr_; //!< The non argument svm pointers for kernel
FGSStatus svmSystemPointersSupport_; //!< The flag for the status of the kernel
// support of fine-grain system sharing.
- bool execNewVcop_; //!< special new VCOP for kernel execution
public:
//! Construct a new instance of parameters for the given signature.
KernelParameters(const KernelSignature& signature) :
- signature_(signature), validated_(false), execInfoOffset_(0), svmSystemPointersSupport_(FGS_DEFAULT),
- execNewVcop_(false)
+ signature_(signature), validated_(false), execInfoOffset_(0), svmSystemPointersSupport_(FGS_DEFAULT)
{
values_ = (address) this + alignUp(sizeof(KernelParameters), 16);
defined_ = (bool*) (values_ + signature.paramsSize());
@@ -168,13 +166,6 @@ public:
//! return the status of kernel support fine-grained SVM system pointer sharing
FGSStatus getSvmSystemPointersSupport() const { return svmSystemPointersSupport_; }
-
- //! set the new VCOP in the execInfo container
- void setExecNewVcop(const bool newVcop) { execNewVcop_ = newVcop; }
-
- //! get the new VCOP in the execInfo container
- bool getExecNewVcop() const { return execNewVcop_; }
-
};
/*! \brief Encapsulates a __kernel function and the argument values