From d939bb5f128a6db00bf1a2b5e568cdff228506ca Mon Sep 17 00:00:00 2001
From: foreman
Date: Thu, 6 Aug 2015 17:39:55 -0400
Subject: [PATCH] P4 to Git Change 1177889 by weizhang@msdnweizhang-opencl on
2015/08/06 17:16:54
EPR #421017 - IOMMU2/SVM on CZ Win10, the bit INST_ATC of COMPUTE_PGM_HI needs to be set for device enqueue.
Affected files ...
... //depot/stg/opencl/drivers/opencl/library/hsa/hsail/src/devenq/schedule.cl#9 edit
... //depot/stg/opencl/drivers/opencl/runtime/device/gpu/gpusched.hpp#16 edit
... //depot/stg/opencl/drivers/opencl/runtime/device/gpu/gpuvirtual.cpp#370 edit
[ROCm/clr commit: 7a54d367f3412b1ea599c474f35f9d4174fa12fd]
---
projects/clr/rocclr/runtime/device/gpu/gpusched.hpp | 2 +-
projects/clr/rocclr/runtime/device/gpu/gpuvirtual.cpp | 1 +
2 files changed, 2 insertions(+), 1 deletion(-)
diff --git a/projects/clr/rocclr/runtime/device/gpu/gpusched.hpp b/projects/clr/rocclr/runtime/device/gpu/gpusched.hpp
index 01985190cd..7304351f6b 100644
--- a/projects/clr/rocclr/runtime/device/gpu/gpusched.hpp
+++ b/projects/clr/rocclr/runtime/device/gpu/gpusched.hpp
@@ -62,7 +62,7 @@ struct SchedulerParam {
uint32_t eng_clk; //!< Engine clock in Mhz
uint64_t hw_queue; //!< Address to HW queue
uint64_t hsa_queue; //!< Address to HSA dummy queue
- uint32_t reserved; //!< Reserved
+ uint32_t useATC; //!< GPU access to shader program by ATC.
uint32_t scratchSize; //!< Scratch buffer size
uint64_t scratch; //!< GPU address to the scratch buffer
uint32_t numMaxWaves; //!< The max number of possible waves
diff --git a/projects/clr/rocclr/runtime/device/gpu/gpuvirtual.cpp b/projects/clr/rocclr/runtime/device/gpu/gpuvirtual.cpp
index 399a5fa462..daae61edcc 100644
--- a/projects/clr/rocclr/runtime/device/gpu/gpuvirtual.cpp
+++ b/projects/clr/rocclr/runtime/device/gpu/gpuvirtual.cpp
@@ -1926,6 +1926,7 @@ VirtualGPU::submitKernelInternalHSA(
param->releaseHostCP = 0;
param->parentAQL = vmParentWrap;
param->dedicatedQueue = dev().settings().useDeviceQueue_;
+ param->useATC = dev().settings().svmFineGrainSystem_;
// Fill the scratch buffer information
if (hsaKernel.prog().maxScratchRegs() > 0) {