2025-09-19 11:25:30 -04:00
|
|
|
/* Copyright (c) 2009 - 2025 Advanced Micro Devices, Inc.
|
2020-02-04 09:26:14 -08:00
|
|
|
|
|
|
|
|
Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
|
|
|
of this software and associated documentation files (the "Software"), to deal
|
|
|
|
|
in the Software without restriction, including without limitation the rights
|
|
|
|
|
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
|
|
|
copies of the Software, and to permit persons to whom the Software is
|
|
|
|
|
furnished to do so, subject to the following conditions:
|
|
|
|
|
|
|
|
|
|
The above copyright notice and this permission notice shall be included in
|
|
|
|
|
all copies or substantial portions of the Software.
|
|
|
|
|
|
|
|
|
|
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
|
|
|
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
|
|
|
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
|
|
|
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
|
|
|
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
|
|
|
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
|
|
|
|
THE SOFTWARE. */
|
2016-07-21 12:41:26 -04:00
|
|
|
|
|
|
|
|
#include "rockernel.hpp"
|
|
|
|
|
|
|
|
|
|
#include <algorithm>
|
|
|
|
|
|
2024-06-06 18:40:49 +01:00
|
|
|
namespace amd::roc {
|
2016-07-21 12:41:26 -04:00
|
|
|
|
2021-03-26 15:29:05 -04:00
|
|
|
bool Kernel::init() { return GetAttrCodePropMetadata(); }
|
2018-11-22 14:04:51 -05:00
|
|
|
|
2025-01-22 14:47:02 -05:00
|
|
|
bool Kernel::postLoad() {
|
2019-04-09 23:24:10 -04:00
|
|
|
// Set the kernel symbol name and size/alignment based on the kernel metadata
|
|
|
|
|
// NOTE: kernel name is used to get the kernel code handle in V2,
|
|
|
|
|
// but kernel symbol name is used in V3
|
2020-01-21 18:24:20 -05:00
|
|
|
if (codeObjectVer() == 2) {
|
|
|
|
|
symbolName_ = name();
|
|
|
|
|
}
|
2022-02-10 23:50:32 -08:00
|
|
|
kernargSegmentAlignment_ = amd::alignUp(std::max(kernargSegmentAlignment_, 128u),
|
2024-11-08 05:51:17 +00:00
|
|
|
device().info().globalMemCacheLineSize_);
|
2019-04-09 23:24:10 -04:00
|
|
|
|
2018-11-22 14:04:51 -05:00
|
|
|
// Set the workgroup information for the kernel
|
2021-01-10 01:12:54 +00:00
|
|
|
workGroupInfo_.availableLDSSize_ = device().info().localMemSizePerCU_;
|
2018-11-22 14:04:51 -05:00
|
|
|
assert(workGroupInfo_.availableLDSSize_ > 0);
|
|
|
|
|
|
2019-04-09 23:24:10 -04:00
|
|
|
// Get the kernel code handle
|
|
|
|
|
hsa_status_t hsaStatus;
|
|
|
|
|
hsa_executable_symbol_t symbol;
|
2021-01-10 02:32:45 +00:00
|
|
|
hsa_agent_t agent = program()->rocDevice().getBackendDevice();
|
2025-09-19 11:25:30 -04:00
|
|
|
hsaStatus = Hsa::executable_get_symbol_by_name(program()->hsaExecutable(), symbolName().c_str(),
|
2019-04-09 23:24:10 -04:00
|
|
|
&agent, &symbol);
|
2020-04-02 22:13:26 -04:00
|
|
|
if (hsaStatus != HSA_STATUS_SUCCESS) {
|
2020-04-13 22:41:44 -04:00
|
|
|
DevLogPrintfError("Cannot Get Symbol : %s, failed with hsa_status: %d \n", symbolName().c_str(),
|
|
|
|
|
hsaStatus);
|
2020-04-02 22:13:26 -04:00
|
|
|
return false;
|
2019-04-09 23:24:10 -04:00
|
|
|
}
|
2020-04-02 22:13:26 -04:00
|
|
|
|
2025-09-19 11:25:30 -04:00
|
|
|
hsaStatus = Hsa::executable_symbol_get_info(symbol, HSA_EXECUTABLE_SYMBOL_INFO_KERNEL_OBJECT,
|
2020-04-02 22:13:26 -04:00
|
|
|
&kernelCodeHandle_);
|
2019-04-09 23:24:10 -04:00
|
|
|
if (hsaStatus != HSA_STATUS_SUCCESS) {
|
2020-04-13 22:41:44 -04:00
|
|
|
DevLogPrintfError(" Cannot Get Symbol Info: %s, failed with hsa_status: %d \n ",
|
|
|
|
|
symbolName().c_str(), hsaStatus);
|
2019-04-09 23:24:10 -04:00
|
|
|
return false;
|
|
|
|
|
}
|
|
|
|
|
|
2025-09-19 11:25:30 -04:00
|
|
|
hsaStatus = Hsa::executable_symbol_get_info(
|
2022-05-16 11:31:44 +00:00
|
|
|
symbol, HSA_EXECUTABLE_SYMBOL_INFO_KERNEL_DYNAMIC_CALLSTACK, &kernelHasDynamicCallStack_);
|
|
|
|
|
if (hsaStatus != HSA_STATUS_SUCCESS) {
|
|
|
|
|
DevLogPrintfError(" Cannot Get Dynamic callstack info, failed with hsa_status: %d \n ",
|
|
|
|
|
hsaStatus);
|
|
|
|
|
return false;
|
|
|
|
|
}
|
|
|
|
|
|
2020-01-21 18:24:20 -05:00
|
|
|
if (!RuntimeHandle().empty()) {
|
2018-11-22 14:04:51 -05:00
|
|
|
hsa_executable_symbol_t kernelSymbol;
|
|
|
|
|
int variable_size;
|
|
|
|
|
uint64_t variable_address;
|
2025-08-20 16:28:06 +02:00
|
|
|
|
2018-11-22 14:04:51 -05:00
|
|
|
// Only kernels that could be enqueued by another kernel has the RuntimeHandle metadata. The
|
|
|
|
|
// RuntimeHandle metadata is a string that represents a variable from which the library code can
|
|
|
|
|
// retrieve the kernel code object handle of such a kernel. The address of the variable and the
|
|
|
|
|
// kernel code object handle are known only after the hsa executable is loaded. The below code
|
|
|
|
|
// copies the kernel code object handle to the address of the variable.
|
2025-09-19 11:25:30 -04:00
|
|
|
hsaStatus = Hsa::executable_get_symbol_by_name(program()->hsaExecutable(),
|
2020-01-21 18:24:20 -05:00
|
|
|
RuntimeHandle().c_str(), &agent, &kernelSymbol);
|
2020-04-02 22:13:26 -04:00
|
|
|
if (hsaStatus != HSA_STATUS_SUCCESS) {
|
2020-04-13 22:41:44 -04:00
|
|
|
DevLogPrintfError("Cannot get Kernel Symbol by name: %s, failed with hsa_status: %d \n",
|
|
|
|
|
RuntimeHandle().c_str(), hsaStatus);
|
2020-04-02 22:13:26 -04:00
|
|
|
return false;
|
2018-11-22 14:04:51 -05:00
|
|
|
}
|
2020-04-02 22:13:26 -04:00
|
|
|
|
2025-09-19 11:25:30 -04:00
|
|
|
hsaStatus = Hsa::executable_symbol_get_info(
|
2020-04-02 22:13:26 -04:00
|
|
|
kernelSymbol, HSA_EXECUTABLE_SYMBOL_INFO_VARIABLE_SIZE, &variable_size);
|
|
|
|
|
if (hsaStatus != HSA_STATUS_SUCCESS) {
|
2020-04-13 22:41:44 -04:00
|
|
|
DevLogPrintfError(
|
|
|
|
|
"[ROC][Kernel] Cannot get Kernel Symbol Info, failed with hsa_status: %d \n", hsaStatus);
|
2020-04-02 22:13:26 -04:00
|
|
|
return false;
|
2018-11-22 14:04:51 -05:00
|
|
|
}
|
2018-12-12 15:53:41 -05:00
|
|
|
|
2025-09-19 11:25:30 -04:00
|
|
|
hsaStatus = Hsa::executable_symbol_get_info(
|
2020-04-02 22:13:26 -04:00
|
|
|
kernelSymbol, HSA_EXECUTABLE_SYMBOL_INFO_VARIABLE_ADDRESS, &variable_address);
|
|
|
|
|
if (hsaStatus != HSA_STATUS_SUCCESS) {
|
2020-04-13 22:41:44 -04:00
|
|
|
DevLogPrintfError("[ROC][Kernel] Cannot get Kernel Address, failed with hsa_status: %d \n",
|
|
|
|
|
hsaStatus);
|
2020-04-02 22:13:26 -04:00
|
|
|
return false;
|
2018-11-22 14:04:51 -05:00
|
|
|
}
|
|
|
|
|
|
2020-04-02 22:13:26 -04:00
|
|
|
const struct RuntimeHandle runtime_handle = {
|
|
|
|
|
kernelCodeHandle_, WorkitemPrivateSegmentByteSize(), WorkgroupGroupSegmentByteSize()};
|
|
|
|
|
hsaStatus =
|
2025-09-19 11:25:30 -04:00
|
|
|
Hsa::memory_copy(reinterpret_cast<void*>(variable_address), &runtime_handle, variable_size);
|
2020-04-02 22:13:26 -04:00
|
|
|
|
2018-11-22 14:04:51 -05:00
|
|
|
if (hsaStatus != HSA_STATUS_SUCCESS) {
|
2020-04-13 22:41:44 -04:00
|
|
|
DevLogPrintfError("[ROC][Kernel] HSA Memory copy failed, failed with hsa_status: %d \n",
|
|
|
|
|
hsaStatus);
|
2018-11-22 14:04:51 -05:00
|
|
|
return false;
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
2025-08-11 05:47:27 +01:00
|
|
|
// This can be set in code object and the value might be different than what HSA reports
|
|
|
|
|
// For example on Navi GPUs someone using -mwavefrontsize64
|
|
|
|
|
// We set the value to HSA if the value is uninitialized
|
|
|
|
|
uint32_t wavefront_size = workGroupInfo_.wavefrontPerSIMD_;
|
|
|
|
|
if (wavefront_size == 0 &&
|
2025-09-19 11:25:30 -04:00
|
|
|
Hsa::agent_get_info(program()->rocDevice().getBackendDevice(), HSA_AGENT_INFO_WAVEFRONT_SIZE,
|
|
|
|
|
&wavefront_size) != HSA_STATUS_SUCCESS) {
|
2020-04-13 22:41:44 -04:00
|
|
|
DevLogPrintfError("[ROC][Kernel] Cannot get Wavefront Size, failed with hsa_status: %d \n",
|
|
|
|
|
hsaStatus);
|
2018-11-22 14:04:51 -05:00
|
|
|
return false;
|
|
|
|
|
}
|
|
|
|
|
assert(wavefront_size > 0);
|
|
|
|
|
|
2025-09-21 19:23:12 -04:00
|
|
|
workGroupInfo_.availableVGPRs_ = device().info().availableVGPRs_;
|
2025-09-02 15:05:18 +01:00
|
|
|
workGroupInfo_.availableSGPRs_ = device().info().availableSGPRs_;
|
2018-11-22 14:04:51 -05:00
|
|
|
workGroupInfo_.privateMemSize_ = workitemPrivateSegmentByteSize_;
|
|
|
|
|
workGroupInfo_.localMemSize_ = workgroupGroupSegmentByteSize_;
|
|
|
|
|
workGroupInfo_.usedLDSSize_ = workgroupGroupSegmentByteSize_;
|
|
|
|
|
workGroupInfo_.preferredSizeMultiple_ = wavefront_size;
|
2022-05-16 11:31:44 +00:00
|
|
|
workGroupInfo_.usedStackSize_ = kernelHasDynamicCallStack_;
|
2021-01-10 01:12:54 +00:00
|
|
|
workGroupInfo_.wavefrontPerSIMD_ =
|
|
|
|
|
program()->rocDevice().info().maxWorkItemSizes_[0] / wavefront_size;
|
2025-08-13 13:13:27 -07:00
|
|
|
workGroupInfo_.constMemSize_ = 0;
|
2024-06-04 12:53:52 -07:00
|
|
|
workGroupInfo_.maxDynamicSharedSizeBytes_ =
|
|
|
|
|
static_cast<int>(workGroupInfo_.availableLDSSize_ - workGroupInfo_.localMemSize_);
|
2018-11-22 14:04:51 -05:00
|
|
|
if (workGroupInfo_.size_ == 0) {
|
|
|
|
|
return false;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// handle the printf metadata if any
|
|
|
|
|
std::vector<std::string> printfStr;
|
2019-10-28 18:13:35 -04:00
|
|
|
if (!GetPrintfStr(&printfStr)) {
|
2018-11-22 14:04:51 -05:00
|
|
|
return false;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
if (!printfStr.empty()) {
|
|
|
|
|
InitPrintf(printfStr);
|
|
|
|
|
}
|
2025-01-22 14:47:02 -05:00
|
|
|
// Add kernel to the map of all kernels on the device
|
|
|
|
|
program()->rocDevice().AddKernel(*this);
|
2018-11-22 14:04:51 -05:00
|
|
|
return true;
|
|
|
|
|
}
|
2016-08-15 18:51:49 -04:00
|
|
|
|
2024-06-06 18:40:49 +01:00
|
|
|
} // namespace amd::roc
|