From 436ca8ad4d9cd750675cbc3cee700e2598edd4c4 Mon Sep 17 00:00:00 2001 From: foreman Date: Thu, 2 Jun 2016 14:26:40 -0400 Subject: [PATCH] P4 to Git Change 1275745 by gujin@gujin-sc-win on 2016/06/02 13:34:16 SWDEV-76911 - Set output pipeline of multi-shader compilation in the same way driver sets. Fill output shader pointers only if they will be generated based on the input shaders, otherwise NULL. ReviewBoardURL = http://dxreview.amd.com/r/20059/ Affected files ... ... //depot/stg/sc/Src/Dev/TestEngine.cpp#512 edit --- rocclr/runtime/device/gpu/gpukernel.cpp | 12 +++-------- rocclr/runtime/device/gpu/gpukernel.hpp | 3 +-- rocclr/runtime/device/gpu/gpuvirtual.cpp | 20 ++++--------------- .../device/gpu/gslbe/src/rt/GSLDeviceGL.cpp | 11 ---------- rocclr/runtime/platform/kernel.hpp | 11 +--------- 5 files changed, 9 insertions(+), 48 deletions(-) diff --git a/rocclr/runtime/device/gpu/gpukernel.cpp b/rocclr/runtime/device/gpu/gpukernel.cpp index 50b5d5c827..4aa8b539ae 100644 --- a/rocclr/runtime/device/gpu/gpukernel.cpp +++ b/rocclr/runtime/device/gpu/gpukernel.cpp @@ -1567,7 +1567,7 @@ Kernel::loadParameters( } bool -Kernel::run(VirtualGPU& gpu, GpuEvent* calEvent, bool lastRun, bool lastDoppCmd) const +Kernel::run(VirtualGPU& gpu, GpuEvent* calEvent, bool lastRun) const { const VirtualGPU::CalVirtualDesc* dispatch = gpu.cal(); @@ -1576,7 +1576,7 @@ Kernel::run(VirtualGPU& gpu, GpuEvent* calEvent, bool lastRun, bool lastDoppCmd) gpu.eventBegin(MainEngine); gpu.rs()->Dispatch(gpu.cs(), &dispatch->gridBlock, &dispatch->partialGridBlock, - &dispatch->gridSize, dispatch->localSize, gpu.vmMems(), dispatch->memCount_, lastDoppCmd); + &dispatch->gridSize, dispatch->localSize, gpu.vmMems(), dispatch->memCount_); gpu.eventEnd(MainEngine, *calEvent); // Unbind all resources @@ -1889,12 +1889,6 @@ Kernel::setArgument( copyImageConstants(gpuMem->owner()->asImage(), reinterpret_cast(memory + arg->cbPos_)); } - - // Handle DOPP texture resource - gslMemObject gslMem = gpuMem->gslResource(); - if (gslMem->getAttribs().isDOPPDesktopTexture) { - gpu.addVmMemory(gpuMem); - } } break; case KernelArg::Sampler: @@ -3464,7 +3458,7 @@ HSAILKernel::init(amd::hsa::loader::Symbol *sym, bool finalize) // Copy wavefront size workGroupInfo_.wavefrontSize_ = prog().isNull() ? 64 : dev().getAttribs().wavefrontSize; - + // Find total workgroup size if (workGroupInfo_.compileSize_[0] != 0) { workGroupInfo_.size_ = diff --git a/rocclr/runtime/device/gpu/gpukernel.hpp b/rocclr/runtime/device/gpu/gpukernel.hpp index fd383d757a..bdb5455ef6 100644 --- a/rocclr/runtime/device/gpu/gpukernel.hpp +++ b/rocclr/runtime/device/gpu/gpukernel.hpp @@ -620,8 +620,7 @@ public: bool run( VirtualGPU& gpu, //!< virtual GPU device object GpuEvent* gpuEvent, //!< Pointer to the GPU event - bool lastRun, //!< Last run in the split execution - bool lastDoppCmd //!< info for kernel dispatch + bool lastRun //!< Last run in the split execution ) const; //! Help function to debug the kernel output diff --git a/rocclr/runtime/device/gpu/gpuvirtual.cpp b/rocclr/runtime/device/gpu/gpuvirtual.cpp index a5a8757b42..2ec9386fae 100644 --- a/rocclr/runtime/device/gpu/gpuvirtual.cpp +++ b/rocclr/runtime/device/gpu/gpuvirtual.cpp @@ -472,7 +472,7 @@ VirtualGPU::create(bool profiling, uint rtCUs, uint deviceQueueSize, } //!@todo This is not a generic solution and // may have issues with > 8 queues - idx = index() % (dev().engines().numComputeRings() + + idx = index() % (dev().engines().numComputeRings() + dev().engines().numComputeRingsRT()); } // hwRing_ should be set 0 if forced to have single scratch buffer @@ -1839,23 +1839,11 @@ VirtualGPU::submitKernelInternalHSA( pKernelInfo = &kernelInfo; } - // Set up the dispatch information - KernelDispatchInfo dispatchInfo; - dispatchInfo.aqlPacket = aqlPkt; - dispatchInfo.mems = vmMems(); - dispatchInfo.numMems = cal_.memCount_; - dispatchInfo.scratch = scratch; - dispatchInfo.scratchOffset = scratchOffset; - dispatchInfo.cpuAqlCode = hsaKernel.cpuAqlCode(); - dispatchInfo.hsaQueueVA = hsaQueueMem_->vmAddress(); - dispatchInfo.kernelInfo = pKernelInfo; - dispatchInfo.wavesPerSH = hsaKernel.getWavesPerSH(this); - dispatchInfo.lastDoppSubmission = kernel.parameters().getExecNewVcop(); - GpuEvent gpuEvent; // Run AQL dispatch in HW eventBegin(MainEngine); - cs()->AqlDispatch(&dispatchInfo); + cs()->AqlDispatch(aqlPkt, vmMems(), cal_.memCount_, scratch, scratchOffset, + hsaKernel.cpuAqlCode(), hsaQueueMem_->vmAddress(), pKernelInfo, hsaKernel.getWavesPerSH(this)); eventEnd(MainEngine, gpuEvent); if (dbgManager && (NULL != dbgManager->postDispatchCallBackFunc())) { @@ -2155,7 +2143,7 @@ VirtualGPU::submitKernelInternal( } // Execute the kernel - if (gpuKernel.run(*this, &gpuEvent, lastRun, kernel.parameters().getExecNewVcop())) { + if (gpuKernel.run(*this, &gpuEvent, lastRun)) { //! @todo A flush is necessary to make sure // that 2 consecutive runs won't access to the same // private/local memory. CAL has to generate cache flush diff --git a/rocclr/runtime/device/gpu/gslbe/src/rt/GSLDeviceGL.cpp b/rocclr/runtime/device/gpu/gslbe/src/rt/GSLDeviceGL.cpp index a8bdf336c1..ef8b14070e 100644 --- a/rocclr/runtime/device/gpu/gslbe/src/rt/GSLDeviceGL.cpp +++ b/rocclr/runtime/device/gpu/gslbe/src/rt/GSLDeviceGL.cpp @@ -745,17 +745,6 @@ CALGSLDevice::resGLAssociate(GLResAssociate & resData) const attribs.cpu_address = (void*)hData->handle; attribs.alias_subtile = hData->tilingMode; attribs.mcaddress = hData->cardAddr; - if (hData->isDoppDesktopTexture == GL_TRUE) - { - attribs.isDOPPDesktopTexture = ATIGL_TRUE; - attribs.displayable = GSL_MOA_DISPLAYABLE_LAYOUT; - } - if (hData->isDoppPresentTexture == GL_TRUE) - { - attribs.displayable = GSL_MOA_DISPLAYABLE_LAYOUT; - } - - // VBOs are hardcoded to have a UINT8 type format if (hRes.type == GL_RESOURCE_ATTACH_VERTEXBUFFER_AMD) { diff --git a/rocclr/runtime/platform/kernel.hpp b/rocclr/runtime/platform/kernel.hpp index 86293b3135..cf679e83bf 100644 --- a/rocclr/runtime/platform/kernel.hpp +++ b/rocclr/runtime/platform/kernel.hpp @@ -83,13 +83,11 @@ private: std::vector execSvmPtr_; //!< The non argument svm pointers for kernel FGSStatus svmSystemPointersSupport_; //!< The flag for the status of the kernel // support of fine-grain system sharing. - bool execNewVcop_; //!< special new VCOP for kernel execution public: //! Construct a new instance of parameters for the given signature. KernelParameters(const KernelSignature& signature) : - signature_(signature), validated_(false), execInfoOffset_(0), svmSystemPointersSupport_(FGS_DEFAULT), - execNewVcop_(false) + signature_(signature), validated_(false), execInfoOffset_(0), svmSystemPointersSupport_(FGS_DEFAULT) { values_ = (address) this + alignUp(sizeof(KernelParameters), 16); defined_ = (bool*) (values_ + signature.paramsSize()); @@ -168,13 +166,6 @@ public: //! return the status of kernel support fine-grained SVM system pointer sharing FGSStatus getSvmSystemPointersSupport() const { return svmSystemPointersSupport_; } - - //! set the new VCOP in the execInfo container - void setExecNewVcop(const bool newVcop) { execNewVcop_ = newVcop; } - - //! get the new VCOP in the execInfo container - bool getExecNewVcop() const { return execNewVcop_; } - }; /*! \brief Encapsulates a __kernel function and the argument values