Files
rocm-systems/projects/clr/rocclr/device/pal/palkernel.hpp
T
2025-10-23 11:21:49 -07:00

151 rader
5.3 KiB
C++

/* Copyright (c) 2015 - 2021 Advanced Micro Devices, Inc.
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in
all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
THE SOFTWARE. */
#pragma once
#include "device/device.hpp"
#include "utils/macros.hpp"
#include "platform/command.hpp"
#include "platform/program.hpp"
#include "platform/kernel.hpp"
#include "platform/sampler.hpp"
#include "device/pal/paldevice.hpp"
#include "device/pal/palvirtual.hpp"
#include "amd_hsa_kernel_code.h"
#include "AMDHSAKernelDescriptor.h"
#include "device/pal/palprintf.hpp"
#include "hsa.h"
namespace amd {
namespace hsa {
namespace loader {
class Symbol;
} // namespace loader
namespace code {
namespace Kernel {
class Metadata;
} // namespace Kernel
} // namespace code
} // namespace hsa
} // namespace amd
//! \namespace amd::pal PAL Device Implementation
namespace amd::pal {
class VirtualGPU;
class Device;
class NullDevice;
class Program;
/*! \addtogroup pal PAL Device Implementation
* @{
*/
class Kernel : public device::Kernel {
public:
Kernel(std::string name, pal::Program* prog, bool internalKernel);
virtual ~Kernel();
//! Initializes the metadata required for this kernel,
bool init();
//! Setup after code object loading
bool postLoad();
//! Returns PAL, possibly null, device object, associated with this kernel.
const NullDevice& palNullDevice() const { return reinterpret_cast<const NullDevice&>(dev_); }
//! Returns PAL device object, associated with this kernel which must not be the null device.
const Device& palDevice() const {
assert(dev_.isOnline());
return reinterpret_cast<const Device&>(dev_);
}
//! Returns HSA program associated with this kernel
const pal::Program& prog() const;
//! Returns LDS size used in this kernel
uint32_t ldsSize() const { return WorkgroupGroupSegmentByteSize(); }
//! Returns pointer on CPU to AQL code info
const amd_kernel_code_t* cpuAqlCode() const { return &akc_; }
//! Returns pointer on CPU to AQL kernel descriptor info
const llvm::amdhsa::kernel_descriptor_t* cpuAqlKd() const { return &akd_; }
//! Returns memory object with AQL code
uint64_t gpuAqlCode() const { return code_; }
//! Returns size of AQL code
size_t aqlCodeSize() const { return codeSize_; }
//! Returns the size of argument buffer
size_t argsBufferSize() const { return kernargSegmentByteSize_; }
//! Returns spill reg size per workitem
uint32_t spillSegSize() const { return workGroupInfo_.privateMemSize_; }
//! Returns AQL packet in CPU memory
//! if the kernel arguments were successfully loaded, otherwise NULL
hsa_kernel_dispatch_packet_t* loadArguments(
VirtualGPU& gpu, //!< Running GPU context
const amd::Kernel& kernel, //!< AMD kernel object
const amd::NDRangeContainer& sizes, //!< NDrange container
const_address params, //!< Application arguments for the kernel
size_t ldsAddress, //!< LDS address that includes all arguments.
uint64_t vmDefQueue, //!< GPU VM default queue pointer
uint64_t* vmParentWrap, //!< GPU VM parent aql wrap object
uint32_t* aql_index //!< AQL packet index in the packets array for debugger
) const;
//! Returns the kernel index in the program
uint index() const { return index_; }
//! Get the kernel descriptor and copy the code object from the program CPU segment
bool setKernelDescriptor(amd::hsa::loader::Symbol* sym, llvm::amdhsa::kernel_descriptor_t* akd);
private:
//! Disable copy constructor
Kernel(const pal::Kernel&);
//! Disable operator=
Kernel& operator=(const pal::Kernel&);
protected:
//! Get the kernel code and copy the code object from the program CPU segment
bool setKernelCode(amd::hsa::loader::Symbol* sym, amd_kernel_code_t* akc);
//! Set up the workgroup info based on the kernel metadata
void setWorkGroupInfo(const uint32_t privateSegmentSize, const uint32_t groupSegmentSize,
const uint16_t numSGPRs, const uint16_t numVGPRs);
union {
amd_kernel_code_t akc_; //!< AQL kernel code on CPU, used by HSAIL
llvm::amdhsa::kernel_descriptor_t akd_; //!< AQL kernel descriptor on CPU, used by LC
};
uint index_; //!< Kernel index in the program
uint64_t code_; //!< GPU memory pointer to the kernel
size_t codeSize_; //!< Size of ISA code
};
/*@}*/ // namespace amd::pal
} // namespace amd::pal