Files
rocm-systems/projects/clr/rocclr/device/rocm/rockernel.cpp
T
Tony Tye 2cb73c8b82 Rename device access functions
Rename functions that access devices to reflect the derived device
they return. This includes the base device::Device and the derived
gpu/pal/roc device classes in both NullDevice and Device forms. Change
to use the least derived versions to clarify what operations will be
available.

Change-Id: I1abb6bfed7efa24852bc8d0d49acaea357d8b5d0


[ROCm/clr commit: 001fd66cac]
2021-01-13 08:24:28 +00:00

293 строки
12 KiB
C++

/* Copyright (c) 2009-present Advanced Micro Devices, Inc.
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in
all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
THE SOFTWARE. */
#include "rockernel.hpp"
#include "amd_hsa_kernel_code.h"
#include <algorithm>
#ifndef WITHOUT_HSA_BACKEND
namespace roc {
Kernel::Kernel(std::string name, Program* prog, const uint64_t& kernelCodeHandle,
const uint32_t workgroupGroupSegmentByteSize,
const uint32_t workitemPrivateSegmentByteSize, const uint32_t kernargSegmentByteSize,
const uint32_t kernargSegmentAlignment)
: device::Kernel(prog->device(), name, *prog) {
kernelCodeHandle_ = kernelCodeHandle;
workgroupGroupSegmentByteSize_ = workgroupGroupSegmentByteSize;
workitemPrivateSegmentByteSize_ = workitemPrivateSegmentByteSize;
kernargSegmentByteSize_ = kernargSegmentByteSize;
kernargSegmentAlignment_ = kernargSegmentAlignment;
}
Kernel::Kernel(std::string name, Program* prog)
: device::Kernel(prog->device(), name, *prog) {
}
#if defined(USE_COMGR_LIBRARY)
bool LightningKernel::init() {
if (!GetAttrCodePropMetadata()) {
LogError("[ROC][Kernel] Could not get Code Prop Meta Data \n");
return false;
}
// Set the kernel symbol name and size/alignment based on the kernel metadata
// NOTE: kernel name is used to get the kernel code handle in V2,
// but kernel symbol name is used in V3
if (codeObjectVer() == 2) {
symbolName_ = name();
}
kernargSegmentAlignment_ =
amd::alignUp(std::max(kernargSegmentAlignment_, 128u), device().info().globalMemCacheLineSize_);
// Set the workgroup information for the kernel
workGroupInfo_.availableLDSSize_ = device().info().localMemSizePerCU_;
assert(workGroupInfo_.availableLDSSize_ > 0);
if (!SetAvailableSgprVgpr()) {
DevLogError("Cannot set available SGPR/VGPR\n");
return false;
}
// Get the kernel code handle
hsa_status_t hsaStatus;
hsa_executable_symbol_t symbol;
hsa_agent_t agent = program()->hsaDevice();
hsaStatus = hsa_executable_get_symbol_by_name(program()->hsaExecutable(),
symbolName().c_str(),
&agent, &symbol);
if (hsaStatus != HSA_STATUS_SUCCESS) {
DevLogPrintfError("Cannot Get Symbol : %s, failed with hsa_status: %d \n",
symbolName().c_str(), hsaStatus);
return false;
}
hsaStatus = hsa_executable_symbol_get_info(symbol, HSA_EXECUTABLE_SYMBOL_INFO_KERNEL_OBJECT,
&kernelCodeHandle_);
if (hsaStatus != HSA_STATUS_SUCCESS) {
DevLogPrintfError(" Cannot Get Symbol Info: %s, failed with hsa_status: %d \n ",
symbolName().c_str(), hsaStatus);
return false;
}
if (!RuntimeHandle().empty()) {
hsa_executable_symbol_t kernelSymbol;
int variable_size;
uint64_t variable_address;
// Only kernels that could be enqueued by another kernel has the RuntimeHandle metadata. The RuntimeHandle
// metadata is a string that represents a variable from which the library code can retrieve the kernel code
// object handle of such a kernel. The address of the variable and the kernel code object handle are known
// only after the hsa executable is loaded. The below code copies the kernel code object handle to the
// address of the variable.
hsaStatus = hsa_executable_get_symbol_by_name(program()->hsaExecutable(),
RuntimeHandle().c_str(),
&agent, &kernelSymbol);
if (hsaStatus != HSA_STATUS_SUCCESS) {
DevLogPrintfError("Cannot get Kernel Symbol by name: %s, failed with hsa_status: %d \n",
RuntimeHandle().c_str(), hsaStatus);
return false;
}
hsaStatus = hsa_executable_symbol_get_info(kernelSymbol,
HSA_EXECUTABLE_SYMBOL_INFO_VARIABLE_SIZE,
&variable_size);
if (hsaStatus != HSA_STATUS_SUCCESS) {
DevLogPrintfError("[ROC][Kernel] Cannot get Kernel Symbol Info, failed with hsa_status: %d \n",
hsaStatus);
return false;
}
hsaStatus = hsa_executable_symbol_get_info(kernelSymbol,
HSA_EXECUTABLE_SYMBOL_INFO_VARIABLE_ADDRESS,
&variable_address);
if (hsaStatus != HSA_STATUS_SUCCESS) {
DevLogPrintfError("[ROC][Kernel] Cannot get Kernel Address, failed with hsa_status: %d \n",
hsaStatus);
return false;
}
const struct RuntimeHandle runtime_handle = {
kernelCodeHandle_,
WorkitemPrivateSegmentByteSize(),
WorkgroupGroupSegmentByteSize()
};
hsaStatus = hsa_memory_copy(reinterpret_cast<void*>(variable_address),
&runtime_handle, variable_size);
if (hsaStatus != HSA_STATUS_SUCCESS) {
DevLogPrintfError("[ROC][Kernel] HSA Memory copy failed, failed with hsa_status: %d \n",
hsaStatus);
return false;
}
}
uint32_t wavefront_size = 0;
if (hsa_agent_get_info(program()->hsaDevice(), HSA_AGENT_INFO_WAVEFRONT_SIZE, &wavefront_size) !=
HSA_STATUS_SUCCESS) {
DevLogPrintfError("[ROC][Kernel] Cannot get Wavefront Size, failed with hsa_status: %d \n",
hsaStatus);
return false;
}
assert(wavefront_size > 0);
workGroupInfo_.privateMemSize_ = workitemPrivateSegmentByteSize_;
workGroupInfo_.localMemSize_ = workgroupGroupSegmentByteSize_;
workGroupInfo_.usedLDSSize_ = workgroupGroupSegmentByteSize_;
workGroupInfo_.preferredSizeMultiple_ = wavefront_size;
workGroupInfo_.usedStackSize_ = 0;
workGroupInfo_.wavefrontPerSIMD_ = program()->rocDevice().info().maxWorkItemSizes_[0] / wavefront_size;
workGroupInfo_.wavefrontSize_ = wavefront_size;
if (workGroupInfo_.size_ == 0) {
return false;
}
// handle the printf metadata if any
std::vector<std::string> printfStr;
if (!GetPrintfStr(&printfStr)) {
return false;
}
if (!printfStr.empty()) {
InitPrintf(printfStr);
}
return true;
}
#endif // defined(USE_COMGR_LIBRARY)
#if defined(WITH_COMPILER_LIB)
bool HSAILKernel::init() {
acl_error errorCode;
// compile kernel down to ISA
hsa_agent_t hsaDevice = program()->hsaDevice();
// Pull out metadata from the ELF
size_t sizeOfArgList;
aclCompiler* compileHandle = program()->rocDevice().compiler();
std::string openClKernelName("&__OpenCL_" + name() + "_kernel");
errorCode = aclQueryInfo(compileHandle, program()->binaryElf(), RT_ARGUMENT_ARRAY,
openClKernelName.c_str(), nullptr, &sizeOfArgList);
if (errorCode != ACL_SUCCESS) {
LogPrintfError("Query Info failed with error code: %d \n", errorCode);
return false;
}
std::unique_ptr<char[]> argList(new char[sizeOfArgList]);
errorCode = aclQueryInfo(compileHandle, program()->binaryElf(), RT_ARGUMENT_ARRAY,
openClKernelName.c_str(), argList.get(), &sizeOfArgList);
if (errorCode != ACL_SUCCESS) {
LogPrintfError("Query Info failed with error code: %d \n", errorCode);
return false;
}
// Set the argList
InitParameters((const aclArgData*)argList.get(), KernargSegmentByteSize());
// Set the workgroup information for the kernel
memset(&workGroupInfo_, 0, sizeof(workGroupInfo_));
workGroupInfo_.availableLDSSize_ = program()->rocDevice().info().localMemSizePerCU_;
assert(workGroupInfo_.availableLDSSize_ > 0);
workGroupInfo_.availableSGPRs_ = 104;
workGroupInfo_.availableVGPRs_ = 256;
size_t sizeOfWorkGroupSize;
errorCode = aclQueryInfo(compileHandle, program()->binaryElf(), RT_WORK_GROUP_SIZE,
openClKernelName.c_str(), nullptr, &sizeOfWorkGroupSize);
if (errorCode != ACL_SUCCESS) {
LogPrintfError("Query Info failed with error code: %d \n", errorCode);
return false;
}
errorCode = aclQueryInfo(compileHandle, program()->binaryElf(), RT_WORK_GROUP_SIZE,
openClKernelName.c_str(), workGroupInfo_.compileSize_,
&sizeOfWorkGroupSize);
if (errorCode != ACL_SUCCESS) {
LogPrintfError("Query Info failed with error code: %d \n ", errorCode);
return false;
}
uint32_t wavefront_size = 0;
hsa_status_t hsaStatus = hsa_agent_get_info(program()->hsaDevice(), HSA_AGENT_INFO_WAVEFRONT_SIZE, &wavefront_size);
if (HSA_STATUS_SUCCESS != hsaStatus) {
DevLogPrintfError("Could not get Wave Info Size: %d, failed with hsa_status: %d \n",
errorCode, hsaStatus);
return false;
}
assert(wavefront_size > 0);
// Setting it the same as used LDS.
workGroupInfo_.localMemSize_ = workgroupGroupSegmentByteSize_;
workGroupInfo_.privateMemSize_ = workitemPrivateSegmentByteSize_;
workGroupInfo_.usedLDSSize_ = workgroupGroupSegmentByteSize_;
workGroupInfo_.preferredSizeMultiple_ = wavefront_size;
// Query kernel header object to initialize the number of
// SGPR's and VGPR's used by the kernel
const void* kernelHostPtr = nullptr;
if (Device::loaderQueryHostAddress(reinterpret_cast<const void*>(kernelCodeHandle_),
&kernelHostPtr) == HSA_STATUS_SUCCESS) {
auto akc = reinterpret_cast<const amd_kernel_code_t*>(kernelHostPtr);
workGroupInfo_.usedSGPRs_ = akc->wavefront_sgpr_count;
workGroupInfo_.usedVGPRs_ = akc->workitem_vgpr_count;
} else {
workGroupInfo_.usedSGPRs_ = 0;
workGroupInfo_.usedVGPRs_ = 0;
}
workGroupInfo_.usedStackSize_ = 0;
workGroupInfo_.wavefrontPerSIMD_ = program()->rocDevice().info().maxWorkItemSizes_[0] / wavefront_size;
workGroupInfo_.wavefrontSize_ = wavefront_size;
if (workGroupInfo_.compileSize_[0] != 0) {
workGroupInfo_.size_ = workGroupInfo_.compileSize_[0] * workGroupInfo_.compileSize_[1] *
workGroupInfo_.compileSize_[2];
} else {
workGroupInfo_.size_ = program()->rocDevice().info().preferredWorkGroupSize_;
}
// Pull out printf metadata from the ELF
size_t sizeOfPrintfList;
errorCode = aclQueryInfo(compileHandle, program()->binaryElf(), RT_GPU_PRINTF_ARRAY,
openClKernelName.c_str(), nullptr, &sizeOfPrintfList);
if (errorCode != ACL_SUCCESS) {
LogPrintfError("Query Info failed with error code: %d \n", errorCode);
return false;
}
// Make sure kernel has any printf info
if (0 != sizeOfPrintfList) {
std::unique_ptr<char[]> aclPrintfList(new char[sizeOfPrintfList]);
if (!aclPrintfList) {
return false;
}
errorCode = aclQueryInfo(compileHandle, program()->binaryElf(),
RT_GPU_PRINTF_ARRAY, openClKernelName.c_str(),
aclPrintfList.get(), &sizeOfPrintfList);
if (errorCode != ACL_SUCCESS) {
return false;
}
// Set the Printf List
InitPrintf(reinterpret_cast<aclPrintfFmt*>(aclPrintfList.get()));
}
return true;
}
#endif // defined(WITH_COMPILER_LIB)
} // namespace roc
#endif // WITHOUT_HSA_BACKEND