From 46c35fca09faa21dff9d98eb8f985ce25c451ba6 Mon Sep 17 00:00:00 2001 From: foreman Date: Fri, 28 Jun 2019 15:01:03 -0400 Subject: [PATCH] P4 to Git Change 1918811 by vsytchen@vsytchen-remote-ocl-win10 on 2019/06/28 14:47:49 SWDEV-193973 - [OpenCL][NV21] Add support for Navi21 1. Enable OCLOfflineCompilation for gfx1010/1011/1012. 2. Skip gfx1030 as there is no support for this target in the device libraries for now. ReviewBoardURL = http://ocltc.amd.com/reviews/r/17611/diff/ Affected files ... ... //depot/stg/opencl/drivers/opencl/runtime/device/devkernel.cpp#24 edit ... //depot/stg/opencl/drivers/opencl/runtime/device/pal/palkernel.cpp#80 edit ... //depot/stg/opencl/drivers/opencl/tests/ocltst/module/runtime/OCLOfflineCompilation.cpp#21 edit [ROCm/clr commit: 35b8eb2f0e160b36b4d06b07508e728485120f7c] --- .../clr/rocclr/runtime/device/devkernel.cpp | 1 - .../rocclr/runtime/device/pal/palkernel.cpp | 38 ++++++++++--------- 2 files changed, 20 insertions(+), 19 deletions(-) diff --git a/projects/clr/rocclr/runtime/device/devkernel.cpp b/projects/clr/rocclr/runtime/device/devkernel.cpp index b7f5d5a83b..6fcab423aa 100644 --- a/projects/clr/rocclr/runtime/device/devkernel.cpp +++ b/projects/clr/rocclr/runtime/device/devkernel.cpp @@ -1271,7 +1271,6 @@ bool Kernel::GetAttrCodePropMetadata( const amd_comgr_metadata_node_t kernelMeta // Set the workgroup information for the kernel workGroupInfo_.availableLDSSize_ = dev().info().localMemSizePerCU_; - assert(workGroupInfo_.availableLDSSize_ > 0); workGroupInfo_.availableSGPRs_ = 104; workGroupInfo_.availableVGPRs_ = 256; diff --git a/projects/clr/rocclr/runtime/device/pal/palkernel.cpp b/projects/clr/rocclr/runtime/device/pal/palkernel.cpp index de15c52a93..34c5c5257f 100644 --- a/projects/clr/rocclr/runtime/device/pal/palkernel.cpp +++ b/projects/clr/rocclr/runtime/device/pal/palkernel.cpp @@ -441,29 +441,31 @@ bool LightningKernel::init() { return false; } - codeSize_ = prog().codeSegGpu().owner()->getSize(); + if (!prog().isNull()) { + codeSize_ = prog().codeSegGpu().owner()->getSize(); - // handle device enqueue - if (!kernelMD.mAttrs.mRuntimeHandle.empty()) { - hsa_agent_t agent; - agent.handle = 1; - amd::hsa::loader::Symbol* rth_symbol; + // handle device enqueue + if (!kernelMD.mAttrs.mRuntimeHandle.empty()) { + hsa_agent_t agent; + agent.handle = 1; + amd::hsa::loader::Symbol* rth_symbol; - // Get the runtime handle symbol GPU address - rth_symbol = prog().GetSymbol(const_cast(kernelMD.mAttrs.mRuntimeHandle.c_str()), - const_cast(&agent)); - uint64_t symbol_address; - rth_symbol->GetInfo(HSA_EXECUTABLE_SYMBOL_INFO_VARIABLE_ADDRESS, &symbol_address); + // Get the runtime handle symbol GPU address + rth_symbol = prog().GetSymbol(const_cast(kernelMD.mAttrs.mRuntimeHandle.c_str()), + const_cast(&agent)); + uint64_t symbol_address; + rth_symbol->GetInfo(HSA_EXECUTABLE_SYMBOL_INFO_VARIABLE_ADDRESS, &symbol_address); - // Copy the kernel_object pointer to the runtime handle symbol GPU address - const Memory& codeSegGpu = prog().codeSegGpu(); - uint64_t offset = symbol_address - codeSegGpu.vmAddress(); - uint64_t kernel_object = gpuAqlCode(); - VirtualGPU* gpu = codeSegGpu.dev().xferQueue(); + // Copy the kernel_object pointer to the runtime handle symbol GPU address + const Memory& codeSegGpu = prog().codeSegGpu(); + uint64_t offset = symbol_address - codeSegGpu.vmAddress(); + uint64_t kernel_object = gpuAqlCode(); + VirtualGPU* gpu = codeSegGpu.dev().xferQueue(); - const struct RuntimeHandle runtime_handle = {gpuAqlCode(), spillSegSize(), ldsSize()}; + const struct RuntimeHandle runtime_handle = {gpuAqlCode(), spillSegSize(), ldsSize()}; - codeSegGpu.writeRawData(*gpu, offset, sizeof(runtime_handle), &runtime_handle, true); + codeSegGpu.writeRawData(*gpu, offset, sizeof(runtime_handle), &runtime_handle, true); + } } // Setup the the workgroup info