From 46c35fca09faa21dff9d98eb8f985ce25c451ba6 Mon Sep 17 00:00:00 2001
From: foreman
Date: Fri, 28 Jun 2019 15:01:03 -0400
Subject: [PATCH] P4 to Git Change 1918811 by
vsytchen@vsytchen-remote-ocl-win10 on 2019/06/28 14:47:49
SWDEV-193973 - [OpenCL][NV21] Add support for Navi21
1. Enable OCLOfflineCompilation for gfx1010/1011/1012.
2. Skip gfx1030 as there is no support for this target in the device libraries for now.
ReviewBoardURL = http://ocltc.amd.com/reviews/r/17611/diff/
Affected files ...
... //depot/stg/opencl/drivers/opencl/runtime/device/devkernel.cpp#24 edit
... //depot/stg/opencl/drivers/opencl/runtime/device/pal/palkernel.cpp#80 edit
... //depot/stg/opencl/drivers/opencl/tests/ocltst/module/runtime/OCLOfflineCompilation.cpp#21 edit
[ROCm/clr commit: 35b8eb2f0e160b36b4d06b07508e728485120f7c]
---
.../clr/rocclr/runtime/device/devkernel.cpp | 1 -
.../rocclr/runtime/device/pal/palkernel.cpp | 38 ++++++++++---------
2 files changed, 20 insertions(+), 19 deletions(-)
diff --git a/projects/clr/rocclr/runtime/device/devkernel.cpp b/projects/clr/rocclr/runtime/device/devkernel.cpp
index b7f5d5a83b..6fcab423aa 100644
--- a/projects/clr/rocclr/runtime/device/devkernel.cpp
+++ b/projects/clr/rocclr/runtime/device/devkernel.cpp
@@ -1271,7 +1271,6 @@ bool Kernel::GetAttrCodePropMetadata( const amd_comgr_metadata_node_t kernelMeta
// Set the workgroup information for the kernel
workGroupInfo_.availableLDSSize_ = dev().info().localMemSizePerCU_;
- assert(workGroupInfo_.availableLDSSize_ > 0);
workGroupInfo_.availableSGPRs_ = 104;
workGroupInfo_.availableVGPRs_ = 256;
diff --git a/projects/clr/rocclr/runtime/device/pal/palkernel.cpp b/projects/clr/rocclr/runtime/device/pal/palkernel.cpp
index de15c52a93..34c5c5257f 100644
--- a/projects/clr/rocclr/runtime/device/pal/palkernel.cpp
+++ b/projects/clr/rocclr/runtime/device/pal/palkernel.cpp
@@ -441,29 +441,31 @@ bool LightningKernel::init() {
return false;
}
- codeSize_ = prog().codeSegGpu().owner()->getSize();
+ if (!prog().isNull()) {
+ codeSize_ = prog().codeSegGpu().owner()->getSize();
- // handle device enqueue
- if (!kernelMD.mAttrs.mRuntimeHandle.empty()) {
- hsa_agent_t agent;
- agent.handle = 1;
- amd::hsa::loader::Symbol* rth_symbol;
+ // handle device enqueue
+ if (!kernelMD.mAttrs.mRuntimeHandle.empty()) {
+ hsa_agent_t agent;
+ agent.handle = 1;
+ amd::hsa::loader::Symbol* rth_symbol;
- // Get the runtime handle symbol GPU address
- rth_symbol = prog().GetSymbol(const_cast(kernelMD.mAttrs.mRuntimeHandle.c_str()),
- const_cast(&agent));
- uint64_t symbol_address;
- rth_symbol->GetInfo(HSA_EXECUTABLE_SYMBOL_INFO_VARIABLE_ADDRESS, &symbol_address);
+ // Get the runtime handle symbol GPU address
+ rth_symbol = prog().GetSymbol(const_cast(kernelMD.mAttrs.mRuntimeHandle.c_str()),
+ const_cast(&agent));
+ uint64_t symbol_address;
+ rth_symbol->GetInfo(HSA_EXECUTABLE_SYMBOL_INFO_VARIABLE_ADDRESS, &symbol_address);
- // Copy the kernel_object pointer to the runtime handle symbol GPU address
- const Memory& codeSegGpu = prog().codeSegGpu();
- uint64_t offset = symbol_address - codeSegGpu.vmAddress();
- uint64_t kernel_object = gpuAqlCode();
- VirtualGPU* gpu = codeSegGpu.dev().xferQueue();
+ // Copy the kernel_object pointer to the runtime handle symbol GPU address
+ const Memory& codeSegGpu = prog().codeSegGpu();
+ uint64_t offset = symbol_address - codeSegGpu.vmAddress();
+ uint64_t kernel_object = gpuAqlCode();
+ VirtualGPU* gpu = codeSegGpu.dev().xferQueue();
- const struct RuntimeHandle runtime_handle = {gpuAqlCode(), spillSegSize(), ldsSize()};
+ const struct RuntimeHandle runtime_handle = {gpuAqlCode(), spillSegSize(), ldsSize()};
- codeSegGpu.writeRawData(*gpu, offset, sizeof(runtime_handle), &runtime_handle, true);
+ codeSegGpu.writeRawData(*gpu, offset, sizeof(runtime_handle), &runtime_handle, true);
+ }
}
// Setup the the workgroup info