Files
rocm-systems/rocclr/runtime/device/rocm/rockernel.cpp
T
foreman e51a27e182 P4 to Git Change 1753035 by wchau@wchau_OCL_Linux on 2019/03/07 15:02:39
SWDEV-182054 - Allow building OpenCL-Runtime with COMGR enabled without OpenCL-Driver being present

Affected files ...

... //depot/stg/opencl/drivers/opencl/api/hip/build/Makefile.hip#12 edit
... //depot/stg/opencl/drivers/opencl/api/opencl/amdocl/build/Makefile.api#184 edit
... //depot/stg/opencl/drivers/opencl/compiler/Makefile#71 edit
... //depot/stg/opencl/drivers/opencl/runtime/device/device.cpp#242 edit
... //depot/stg/opencl/drivers/opencl/runtime/device/device.hpp#335 edit
... //depot/stg/opencl/drivers/opencl/runtime/device/devprogram.cpp#30 edit
... //depot/stg/opencl/drivers/opencl/runtime/device/devprogram.hpp#18 edit
... //depot/stg/opencl/drivers/opencl/runtime/device/pal/paldevice.cpp#126 edit
... //depot/stg/opencl/drivers/opencl/runtime/device/pal/palprogram.cpp#87 edit
... //depot/stg/opencl/drivers/opencl/runtime/device/rocm/rocdevice.cpp#120 edit
... //depot/stg/opencl/drivers/opencl/runtime/device/rocm/rockernel.cpp#51 edit
... //depot/stg/opencl/drivers/opencl/runtime/device/rocm/rocprogram.cpp#100 edit
2019-03-07 15:48:33 -05:00

386 wiersze
14 KiB
C++

//
// Copyright (c) 2009 Advanced Micro Devices, Inc. All rights reserved.
//
#include "rockernel.hpp"
#include "amd_hsa_kernel_code.h"
#include <algorithm>
#ifndef WITHOUT_HSA_BACKEND
#if defined(WITH_LIGHTNING_COMPILER) || defined(USE_COMGR_LIBRARY)
#ifndef USE_COMGR_LIBRARY
#include "driver/AmdCompiler.h"
#endif
#include "llvm/Support/AMDGPUMetadata.h"
typedef llvm::AMDGPU::HSAMD::Metadata CodeObjectMD;
typedef llvm::AMDGPU::HSAMD::Kernel::Metadata KernelMD;
#endif // defined(WITH_LIGHTNING_COMPILER) || defined(USE_COMGR_LIBRARY)
namespace roc {
Kernel::Kernel(std::string name, Program* prog, const uint64_t& kernelCodeHandle,
const uint32_t workgroupGroupSegmentByteSize,
const uint32_t workitemPrivateSegmentByteSize, const uint32_t kernargSegmentByteSize,
const uint32_t kernargSegmentAlignment)
: device::Kernel(prog->dev(), name),
program_(prog),
kernelCodeHandle_(kernelCodeHandle),
workgroupGroupSegmentByteSize_(workgroupGroupSegmentByteSize),
workitemPrivateSegmentByteSize_(workitemPrivateSegmentByteSize),
kernargSegmentByteSize_(kernargSegmentByteSize),
kernargSegmentAlignment_(kernargSegmentAlignment) {}
#if defined(WITH_LIGHTNING_COMPILER) || defined(USE_COMGR_LIBRARY)
#if defined(USE_COMGR_LIBRARY)
bool LightningKernel::init() {
hsa_agent_t hsaDevice = program_->hsaDevice();
const amd_comgr_metadata_node_t* kernelMetaNode =
static_cast<LightningProgram*>(program_)->getKernelMetadata(name());
if (kernelMetaNode == nullptr) {
return false;
}
KernelMD kernelMD;
if (!GetAttrCodePropMetadata(*kernelMetaNode, KernargSegmentByteSize(), &kernelMD)) {
return false;
}
// Set the workgroup information for the kernel
workGroupInfo_.availableLDSSize_ = dev().info().localMemSizePerCU_;
assert(workGroupInfo_.availableLDSSize_ > 0);
// Get the available SGPRs and VGPRs
std::string targetIdent = std::string("amdgcn-amd-amdhsa--")+program_->machineTarget();
if (program_->xnackEnable()) {
targetIdent.append("+xnack");
}
if (program_->sramEccEnable()) {
targetIdent.append("+sram-ecc");
}
if (!SetAvailableSgprVgpr(targetIdent)) {
return false;
}
if (!kernelMD.mAttrs.mRuntimeHandle.empty()) {
hsa_agent_t agent = program_->hsaDevice();
hsa_executable_symbol_t kernelSymbol;
hsa_status_t hsaStatus;
int variable_size;
uint64_t variable_address;
// Only kernels that could be enqueued by another kernel has the RuntimeHandle metadata. The RuntimeHandle
// metadata is a string that represents a variable from which the library code can retrieve the kernel code
// object handle of such a kernel. The address of the variable and the kernel code object handle are known
// only after the hsa executable is loaded. The below code copies the kernel code object handle to the
// address of the variable.
hsaStatus = hsa_executable_get_symbol_by_name(program_->hsaExecutable(),
kernelMD.mAttrs.mRuntimeHandle.c_str(),
&agent, &kernelSymbol);
if (hsaStatus == HSA_STATUS_SUCCESS) {
hsaStatus = hsa_executable_symbol_get_info(kernelSymbol,
HSA_EXECUTABLE_SYMBOL_INFO_VARIABLE_SIZE,
&variable_size);
}
if (hsaStatus == HSA_STATUS_SUCCESS) {
hsaStatus = hsa_executable_symbol_get_info(kernelSymbol,
HSA_EXECUTABLE_SYMBOL_INFO_VARIABLE_SIZE,
&variable_size);
}
if (hsaStatus == HSA_STATUS_SUCCESS) {
hsaStatus = hsa_executable_symbol_get_info(kernelSymbol,
HSA_EXECUTABLE_SYMBOL_INFO_VARIABLE_ADDRESS,
&variable_address);
}
if (hsaStatus == HSA_STATUS_SUCCESS) {
const struct RuntimeHandle runtime_handle = {
kernelCodeHandle_,
workitemPrivateSegmentByteSize(),
WorkgroupGroupSegmentByteSize()
};
hsaStatus = hsa_memory_copy(reinterpret_cast<void*>(variable_address),
&runtime_handle, variable_size);
}
if (hsaStatus != HSA_STATUS_SUCCESS) {
return false;
}
}
uint32_t wavefront_size = 0;
if (hsa_agent_get_info(program_->hsaDevice(), HSA_AGENT_INFO_WAVEFRONT_SIZE, &wavefront_size) !=
HSA_STATUS_SUCCESS) {
return false;
}
assert(wavefront_size > 0);
workGroupInfo_.privateMemSize_ = workitemPrivateSegmentByteSize_;
workGroupInfo_.localMemSize_ = workgroupGroupSegmentByteSize_;
workGroupInfo_.usedLDSSize_ = workgroupGroupSegmentByteSize_;
workGroupInfo_.preferredSizeMultiple_ = wavefront_size;
workGroupInfo_.usedSGPRs_ = kernelMD.mCodeProps.mNumSGPRs;
workGroupInfo_.usedVGPRs_ = kernelMD.mCodeProps.mNumVGPRs;
workGroupInfo_.usedStackSize_ = 0;
workGroupInfo_.wavefrontPerSIMD_ = program_->dev().info().maxWorkItemSizes_[0] / wavefront_size;
workGroupInfo_.wavefrontSize_ = wavefront_size;
workGroupInfo_.size_ = kernelMD.mCodeProps.mMaxFlatWorkGroupSize;
if (workGroupInfo_.size_ == 0) {
return false;
}
// handle the printf metadata if any
const amd_comgr_metadata_node_t* programMD = static_cast<LightningProgram*>(program_)->metadata();
assert(programMD != nullptr);
std::vector<std::string> printfStr;
if (!GetPrintfStr(*programMD, &printfStr)) {
return false;
}
if (!printfStr.empty()) {
InitPrintf(printfStr);
}
return true;
}
#else
static const KernelMD* FindKernelMetadata(const CodeObjectMD* programMD, const std::string& name) {
for (const KernelMD& kernelMD : programMD->mKernels) {
if (kernelMD.mName == name) {
return &kernelMD;
}
}
return nullptr;
}
bool LightningKernel::init() {
hsa_agent_t hsaDevice = program_->hsaDevice();
// Pull out metadata from the ELF
const CodeObjectMD* programMD = static_cast<LightningProgram*>(program_)->metadata();
assert(programMD != nullptr);
const KernelMD* kernelMD = FindKernelMetadata(programMD, name());
if (kernelMD == nullptr) {
return false;
}
InitParameters(*kernelMD, KernargSegmentByteSize());
// Set the workgroup information for the kernel
workGroupInfo_.availableLDSSize_ = program_->dev().info().localMemSizePerCU_;
assert(workGroupInfo_.availableLDSSize_ > 0);
workGroupInfo_.availableSGPRs_ = 104;
workGroupInfo_.availableVGPRs_ = 256;
if (!kernelMD->mAttrs.mReqdWorkGroupSize.empty()) {
const auto& requiredWorkgroupSize = kernelMD->mAttrs.mReqdWorkGroupSize;
workGroupInfo_.compileSize_[0] = requiredWorkgroupSize[0];
workGroupInfo_.compileSize_[1] = requiredWorkgroupSize[1];
workGroupInfo_.compileSize_[2] = requiredWorkgroupSize[2];
}
if (!kernelMD->mAttrs.mWorkGroupSizeHint.empty()) {
const auto& workgroupSizeHint = kernelMD->mAttrs.mWorkGroupSizeHint;
workGroupInfo_.compileSizeHint_[0] = workgroupSizeHint[0];
workGroupInfo_.compileSizeHint_[1] = workgroupSizeHint[1];
workGroupInfo_.compileSizeHint_[2] = workgroupSizeHint[2];
}
if (!kernelMD->mAttrs.mVecTypeHint.empty()) {
workGroupInfo_.compileVecTypeHint_ = kernelMD->mAttrs.mVecTypeHint.c_str();
}
if (!kernelMD->mAttrs.mRuntimeHandle.empty()) {
hsa_agent_t agent = program_->hsaDevice();
hsa_executable_symbol_t kernelSymbol;
hsa_status_t status;
int variable_size;
uint64_t variable_address;
// Only kernels that could be enqueued by another kernel has the RuntimeHandle metadata. The RuntimeHandle
// metadata is a string that represents a variable from which the library code can retrieve the kernel code
// object handle of such a kernel. The address of the variable and the kernel code object handle are known
// only after the hsa executable is loaded. The below code copies the kernel code object handle to the
// address of the variable.
status = hsa_executable_get_symbol_by_name(program_->hsaExecutable(), kernelMD->mAttrs.mRuntimeHandle.c_str(),
&agent, &kernelSymbol);
if (status != HSA_STATUS_SUCCESS) {
return false;
}
status = hsa_executable_symbol_get_info(kernelSymbol, HSA_EXECUTABLE_SYMBOL_INFO_VARIABLE_SIZE,
&variable_size);
if (status != HSA_STATUS_SUCCESS) {
return false;
}
status = hsa_executable_symbol_get_info(kernelSymbol, HSA_EXECUTABLE_SYMBOL_INFO_VARIABLE_ADDRESS,
&variable_address);
if (status != HSA_STATUS_SUCCESS) {
return false;
}
const struct RuntimeHandle runtime_handle = {
kernelCodeHandle_,
workitemPrivateSegmentByteSize(),
WorkgroupGroupSegmentByteSize()
};
status = hsa_memory_copy(reinterpret_cast<void*>(variable_address), &runtime_handle, variable_size);
if (status != HSA_STATUS_SUCCESS) {
return false;
}
}
uint32_t wavefront_size = 0;
if (hsa_agent_get_info(program_->hsaDevice(), HSA_AGENT_INFO_WAVEFRONT_SIZE, &wavefront_size) !=
HSA_STATUS_SUCCESS) {
return false;
}
assert(wavefront_size > 0);
workGroupInfo_.privateMemSize_ = workitemPrivateSegmentByteSize_;
workGroupInfo_.localMemSize_ = workgroupGroupSegmentByteSize_;
workGroupInfo_.usedLDSSize_ = workgroupGroupSegmentByteSize_;
workGroupInfo_.preferredSizeMultiple_ = wavefront_size;
/// TODO: Are there any other fields that are getting queried from akc?
/// If so, code properties metadata should be used instead.
workGroupInfo_.usedSGPRs_ = kernelMD->mCodeProps.mNumSGPRs;
workGroupInfo_.usedVGPRs_ = kernelMD->mCodeProps.mNumVGPRs;
workGroupInfo_.usedStackSize_ = 0;
workGroupInfo_.wavefrontPerSIMD_ = program_->dev().info().maxWorkItemSizes_[0] / wavefront_size;
workGroupInfo_.wavefrontSize_ = wavefront_size;
workGroupInfo_.size_ = kernelMD->mCodeProps.mMaxFlatWorkGroupSize;
if (workGroupInfo_.size_ == 0) {
return false;
}
InitPrintf(programMD->mPrintf);
return true;
}
#endif // defined(USE_COMGR_LIBRARY)
#endif // defined(WITH_LIGHTNING_COMPILER) || defined(USE_COMGR_LIBRARY)
#if defined(WITH_COMPILER_LIB)
bool HSAILKernel::init() {
acl_error errorCode;
// compile kernel down to ISA
hsa_agent_t hsaDevice = program_->hsaDevice();
// Pull out metadata from the ELF
size_t sizeOfArgList;
aclCompiler* compileHandle = program_->dev().compiler();
std::string openClKernelName("&__OpenCL_" + name() + "_kernel");
errorCode = aclQueryInfo(compileHandle, program_->binaryElf(), RT_ARGUMENT_ARRAY,
openClKernelName.c_str(), nullptr, &sizeOfArgList);
if (errorCode != ACL_SUCCESS) {
return false;
}
std::unique_ptr<char[]> argList(new char[sizeOfArgList]);
errorCode = aclQueryInfo(compileHandle, program_->binaryElf(), RT_ARGUMENT_ARRAY,
openClKernelName.c_str(), argList.get(), &sizeOfArgList);
if (errorCode != ACL_SUCCESS) {
return false;
}
// Set the argList
InitParameters((const aclArgData*)argList.get(), KernargSegmentByteSize());
// Set the workgroup information for the kernel
memset(&workGroupInfo_, 0, sizeof(workGroupInfo_));
workGroupInfo_.availableLDSSize_ = program_->dev().info().localMemSizePerCU_;
assert(workGroupInfo_.availableLDSSize_ > 0);
workGroupInfo_.availableSGPRs_ = 104;
workGroupInfo_.availableVGPRs_ = 256;
size_t sizeOfWorkGroupSize;
errorCode = aclQueryInfo(compileHandle, program_->binaryElf(), RT_WORK_GROUP_SIZE,
openClKernelName.c_str(), nullptr, &sizeOfWorkGroupSize);
if (errorCode != ACL_SUCCESS) {
return false;
}
errorCode = aclQueryInfo(compileHandle, program_->binaryElf(), RT_WORK_GROUP_SIZE,
openClKernelName.c_str(), workGroupInfo_.compileSize_,
&sizeOfWorkGroupSize);
if (errorCode != ACL_SUCCESS) {
return false;
}
uint32_t wavefront_size = 0;
if (HSA_STATUS_SUCCESS !=
hsa_agent_get_info(program_->hsaDevice(), HSA_AGENT_INFO_WAVEFRONT_SIZE, &wavefront_size)) {
return false;
}
assert(wavefront_size > 0);
// Setting it the same as used LDS.
workGroupInfo_.localMemSize_ = workgroupGroupSegmentByteSize_;
workGroupInfo_.privateMemSize_ = workitemPrivateSegmentByteSize_;
workGroupInfo_.usedLDSSize_ = workgroupGroupSegmentByteSize_;
workGroupInfo_.preferredSizeMultiple_ = wavefront_size;
// Query kernel header object to initialize the number of
// SGPR's and VGPR's used by the kernel
const void* kernelHostPtr = nullptr;
if (Device::loaderQueryHostAddress(reinterpret_cast<const void*>(kernelCodeHandle_),
&kernelHostPtr) == HSA_STATUS_SUCCESS) {
auto akc = reinterpret_cast<const amd_kernel_code_t*>(kernelHostPtr);
workGroupInfo_.usedSGPRs_ = akc->wavefront_sgpr_count;
workGroupInfo_.usedVGPRs_ = akc->workitem_vgpr_count;
} else {
workGroupInfo_.usedSGPRs_ = 0;
workGroupInfo_.usedVGPRs_ = 0;
}
workGroupInfo_.usedStackSize_ = 0;
workGroupInfo_.wavefrontPerSIMD_ = program_->dev().info().maxWorkItemSizes_[0] / wavefront_size;
workGroupInfo_.wavefrontSize_ = wavefront_size;
if (workGroupInfo_.compileSize_[0] != 0) {
workGroupInfo_.size_ = workGroupInfo_.compileSize_[0] * workGroupInfo_.compileSize_[1] *
workGroupInfo_.compileSize_[2];
} else {
workGroupInfo_.size_ = program_->dev().info().preferredWorkGroupSize_;
}
// Pull out printf metadata from the ELF
size_t sizeOfPrintfList;
errorCode = aclQueryInfo(compileHandle, program_->binaryElf(), RT_GPU_PRINTF_ARRAY,
openClKernelName.c_str(), nullptr, &sizeOfPrintfList);
if (errorCode != ACL_SUCCESS) {
return false;
}
// Make sure kernel has any printf info
if (0 != sizeOfPrintfList) {
std::unique_ptr<char[]> aclPrintfList(new char[sizeOfPrintfList]);
if (!aclPrintfList) {
return false;
}
errorCode = aclQueryInfo(compileHandle, program_->binaryElf(),
RT_GPU_PRINTF_ARRAY, openClKernelName.c_str(),
aclPrintfList.get(), &sizeOfPrintfList);
if (errorCode != ACL_SUCCESS) {
return false;
}
// Set the Printf List
InitPrintf(reinterpret_cast<aclPrintfFmt*>(aclPrintfList.get()));
}
return true;
}
#endif // defined(WITH_COMPILER_LIB)
} // namespace roc
#endif // WITHOUT_HSA_BACKEND