P4 to Git Change 1918811 by vsytchen@vsytchen-remote-ocl-win10 on 2019/06/28 14:47:49
SWDEV-193973 - [OpenCL][NV21] Add support for Navi21
1. Enable OCLOfflineCompilation for gfx1010/1011/1012.
2. Skip gfx1030 as there is no support for this target in the device libraries for now.
ReviewBoardURL = http://ocltc.amd.com/reviews/r/17611/diff/
Affected files ...
... //depot/stg/opencl/drivers/opencl/runtime/device/devkernel.cpp#24 edit
... //depot/stg/opencl/drivers/opencl/runtime/device/pal/palkernel.cpp#80 edit
... //depot/stg/opencl/drivers/opencl/tests/ocltst/module/runtime/OCLOfflineCompilation.cpp#21 edit
[ROCm/clr commit: 35b8eb2f0e]
Этот коммит содержится в:
@@ -1271,7 +1271,6 @@ bool Kernel::GetAttrCodePropMetadata( const amd_comgr_metadata_node_t kernelMeta
|
||||
|
||||
// Set the workgroup information for the kernel
|
||||
workGroupInfo_.availableLDSSize_ = dev().info().localMemSizePerCU_;
|
||||
assert(workGroupInfo_.availableLDSSize_ > 0);
|
||||
workGroupInfo_.availableSGPRs_ = 104;
|
||||
workGroupInfo_.availableVGPRs_ = 256;
|
||||
|
||||
|
||||
@@ -441,29 +441,31 @@ bool LightningKernel::init() {
|
||||
return false;
|
||||
}
|
||||
|
||||
codeSize_ = prog().codeSegGpu().owner()->getSize();
|
||||
if (!prog().isNull()) {
|
||||
codeSize_ = prog().codeSegGpu().owner()->getSize();
|
||||
|
||||
// handle device enqueue
|
||||
if (!kernelMD.mAttrs.mRuntimeHandle.empty()) {
|
||||
hsa_agent_t agent;
|
||||
agent.handle = 1;
|
||||
amd::hsa::loader::Symbol* rth_symbol;
|
||||
// handle device enqueue
|
||||
if (!kernelMD.mAttrs.mRuntimeHandle.empty()) {
|
||||
hsa_agent_t agent;
|
||||
agent.handle = 1;
|
||||
amd::hsa::loader::Symbol* rth_symbol;
|
||||
|
||||
// Get the runtime handle symbol GPU address
|
||||
rth_symbol = prog().GetSymbol(const_cast<char*>(kernelMD.mAttrs.mRuntimeHandle.c_str()),
|
||||
const_cast<hsa_agent_t*>(&agent));
|
||||
uint64_t symbol_address;
|
||||
rth_symbol->GetInfo(HSA_EXECUTABLE_SYMBOL_INFO_VARIABLE_ADDRESS, &symbol_address);
|
||||
// Get the runtime handle symbol GPU address
|
||||
rth_symbol = prog().GetSymbol(const_cast<char*>(kernelMD.mAttrs.mRuntimeHandle.c_str()),
|
||||
const_cast<hsa_agent_t*>(&agent));
|
||||
uint64_t symbol_address;
|
||||
rth_symbol->GetInfo(HSA_EXECUTABLE_SYMBOL_INFO_VARIABLE_ADDRESS, &symbol_address);
|
||||
|
||||
// Copy the kernel_object pointer to the runtime handle symbol GPU address
|
||||
const Memory& codeSegGpu = prog().codeSegGpu();
|
||||
uint64_t offset = symbol_address - codeSegGpu.vmAddress();
|
||||
uint64_t kernel_object = gpuAqlCode();
|
||||
VirtualGPU* gpu = codeSegGpu.dev().xferQueue();
|
||||
// Copy the kernel_object pointer to the runtime handle symbol GPU address
|
||||
const Memory& codeSegGpu = prog().codeSegGpu();
|
||||
uint64_t offset = symbol_address - codeSegGpu.vmAddress();
|
||||
uint64_t kernel_object = gpuAqlCode();
|
||||
VirtualGPU* gpu = codeSegGpu.dev().xferQueue();
|
||||
|
||||
const struct RuntimeHandle runtime_handle = {gpuAqlCode(), spillSegSize(), ldsSize()};
|
||||
const struct RuntimeHandle runtime_handle = {gpuAqlCode(), spillSegSize(), ldsSize()};
|
||||
|
||||
codeSegGpu.writeRawData(*gpu, offset, sizeof(runtime_handle), &runtime_handle, true);
|
||||
codeSegGpu.writeRawData(*gpu, offset, sizeof(runtime_handle), &runtime_handle, true);
|
||||
}
|
||||
}
|
||||
|
||||
// Setup the the workgroup info
|
||||
|
||||
Ссылка в новой задаче
Block a user