SWDEV-543366 - Bump PAL_CLIENT_INTERFACE_MAJOR_VERSION 916 --> 932 (#725)
Co-authored-by: Lin, Qun <Quentin.Lin@amd.com>
Co-authored-by: Lin,Qun <qlin@amd.com>
[ROCm/clr commit: ed903e8889]
Этот коммит содержится в:
коммит произвёл
GitHub
родитель
b3ae0230a5
Коммит
bfcf0ef4e8
@@ -20,7 +20,7 @@
|
||||
|
||||
set(PAL_CLIENT "OCL")
|
||||
|
||||
set(PAL_CLIENT_INTERFACE_MAJOR_VERSION 916)
|
||||
set(PAL_CLIENT_INTERFACE_MAJOR_VERSION 932)
|
||||
set(GPUOPEN_CLIENT_INTERFACE_MAJOR_VERSION 42)
|
||||
set(GPUOPEN_CLIENT_INTERFACE_MINOR_VERSION 0)
|
||||
set(AMD_DK_ROOT $ENV{DK_ROOT})
|
||||
|
||||
@@ -336,6 +336,7 @@ class Kernel : public amd::HeapObject {
|
||||
|
||||
const uint32_t WorkitemPrivateSegmentByteSize() const { return workitemPrivateSegmentByteSize_; }
|
||||
void SetWorkitemPrivateSegmentByteSize(uint32_t size) { workitemPrivateSegmentByteSize_ = size; }
|
||||
|
||||
const bool KernalHasDynamicCallStack() const { return kernelHasDynamicCallStack_; }
|
||||
|
||||
const uint32_t KernargSegmentByteSize() const { return kernargSegmentByteSize_; }
|
||||
|
||||
@@ -483,8 +483,10 @@ hsa_kernel_dispatch_packet_t* HSAILKernel::loadArguments(
|
||||
hsaDisp->completion_signal.handle = 0;
|
||||
memcpy(aqlArgBuf + argsBufferSize(), hsaDisp, sizeof(hsa_kernel_dispatch_packet_t));
|
||||
|
||||
if (AMD_HSA_BITS_GET(akc_.kernel_code_properties,
|
||||
AMD_KERNEL_CODE_PROPERTIES_ENABLE_SGPR_QUEUE_PTR)) {
|
||||
static_assert(offsetof(amd_kernel_code_t, kernel_code_properties) ==
|
||||
offsetof(llvm::amdhsa::kernel_descriptor_t, kernel_code_properties));
|
||||
if (AMD_HSA_BITS_GET(akd_.kernel_code_properties,
|
||||
llvm::amdhsa::KERNEL_CODE_PROPERTY_ENABLE_SGPR_QUEUE_PTR)) {
|
||||
gpu.addVmMemory(gpu.hsaQueueMem());
|
||||
}
|
||||
|
||||
@@ -511,7 +513,7 @@ bool LightningKernel::postLoad() {
|
||||
|
||||
auto sym = prog().getSymbol(symbolName().c_str(), &agent);
|
||||
|
||||
if (!setKernelCode(sym, &akc_)) {
|
||||
if (!setKernelDescriptor(sym, &akd_)) {
|
||||
return false;
|
||||
}
|
||||
if (!sym->GetInfo(HSA_EXECUTABLE_SYMBOL_INFO_KERNEL_DYNAMIC_CALLSTACK,
|
||||
@@ -571,6 +573,23 @@ bool LightningKernel::postLoad() {
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
bool LightningKernel::setKernelDescriptor(amd::hsa::loader::Symbol* sym,
|
||||
llvm::amdhsa::kernel_descriptor_t* akd) {
|
||||
if (!sym) {
|
||||
return false;
|
||||
}
|
||||
if (!sym->GetInfo(HSA_EXECUTABLE_SYMBOL_INFO_KERNEL_OBJECT, reinterpret_cast<void*>(&code_))) {
|
||||
return false;
|
||||
}
|
||||
|
||||
// Copy code object of this kernel from the program CPU segment
|
||||
memcpy(akd, reinterpret_cast<void*>(prog().findHostKernelAddress(code_)),
|
||||
sizeof(llvm::amdhsa::kernel_descriptor_t));
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
#endif // defined(USE_COMGR_LIBRARY)
|
||||
|
||||
} // namespace amd::pal
|
||||
|
||||
@@ -29,6 +29,7 @@
|
||||
#include "device/pal/paldevice.hpp"
|
||||
#include "device/pal/palvirtual.hpp"
|
||||
#include "amd_hsa_kernel_code.h"
|
||||
#include "AMDHSAKernelDescriptor.h"
|
||||
#include "device/pal/palprintf.hpp"
|
||||
#include "hsa.h"
|
||||
|
||||
@@ -87,6 +88,9 @@ class HSAILKernel : public device::Kernel {
|
||||
//! Returns pointer on CPU to AQL code info
|
||||
const amd_kernel_code_t* cpuAqlCode() const { return &akc_; }
|
||||
|
||||
//! Returns pointer on CPU to AQL kernel descriptor info
|
||||
const llvm::amdhsa::kernel_descriptor_t* cpuAqlKd() const { return &akd_; }
|
||||
|
||||
//! Returns memory object with AQL code
|
||||
uint64_t gpuAqlCode() const { return code_; }
|
||||
|
||||
@@ -130,8 +134,11 @@ class HSAILKernel : public device::Kernel {
|
||||
void setWorkGroupInfo(const uint32_t privateSegmentSize, const uint32_t groupSegmentSize,
|
||||
const uint16_t numSGPRs, const uint16_t numVGPRs);
|
||||
|
||||
amd_kernel_code_t akc_; //!< AQL kernel code on CPU
|
||||
uint index_; //!< Kernel index in the program
|
||||
union {
|
||||
amd_kernel_code_t akc_; //!< AQL kernel code on CPU, used by HSAIL
|
||||
llvm::amdhsa::kernel_descriptor_t akd_; //!< AQL kernel descriptor on CPU, used by LC
|
||||
};
|
||||
uint index_; //!< Kernel index in the program
|
||||
|
||||
uint64_t code_; //!< GPU memory pointer to the kernel
|
||||
size_t codeSize_; //!< Size of ISA code
|
||||
@@ -146,6 +153,8 @@ class LightningKernel : public HSAILKernel {
|
||||
const LightningProgram& prog() const;
|
||||
|
||||
#if defined(USE_COMGR_LIBRARY)
|
||||
//! Get the kernel descriptor and copy the code object from the program CPU segment
|
||||
bool setKernelDescriptor(amd::hsa::loader::Symbol* sym, llvm::amdhsa::kernel_descriptor_t* akd);
|
||||
//! Initializes the metadata required for this kernel
|
||||
bool init();
|
||||
|
||||
|
||||
@@ -2718,7 +2718,7 @@ bool VirtualGPU::submitKernelInternal(const amd::NDRangeContainer& sizes,
|
||||
dispatchParam.scratchOffset = scratch->offset_;
|
||||
dispatchParam.workitemPrivateSegmentSize = privateMemSize;
|
||||
}
|
||||
dispatchParam.pCpuAqlCode = hsaKernel.cpuAqlCode();
|
||||
dispatchParam.pCpuAqlCode = hsaKernel.cpuAqlKd();
|
||||
dispatchParam.hsaQueueVa = hsaQueueMem_->vmAddress();
|
||||
if (!hsaKernel.prog().isLC() && hsaKernel.workGroupInfo()->wavesPerSimdHint_ != 0) {
|
||||
constexpr uint32_t kWavesPerSimdLimit = 4;
|
||||
|
||||
Ссылка в новой задаче
Block a user