// // Copyright (c) 2015 Advanced Micro Devices, Inc. All rights reserved. // #pragma once #include "device/pal/palkernel.hpp" #include "amd_hsa_loader.hpp" namespace amd { namespace option { class Options; } // option namespace hsa { namespace loader { class Loader; class Executable; class Context; } // loader } // hsa } // amd //! \namespace pal PAL Device Implementation namespace pal { /*! \addtogroup pal PAL Device Implementation * @{ */ using namespace amd::hsa::loader; class HSAILProgram; class Segment : public amd::HeapObject { public: Segment(); ~Segment(); //! Allocates a segment bool alloc(HSAILProgram& prog, amdgpu_hsa_elf_segment_t segment, size_t size, size_t align, bool zero); //! Copies data from host to the segment void copy(size_t offset, const void* src, size_t size); //! Segment freeze bool freeze(bool destroySysmem); //! Returns address for GPU access in the segment uint64_t gpuAddress(size_t offset) const { return gpuAccess_->vmAddress() + offset; } //! Returns address for CPU access in the segment void* cpuAddress(size_t offset) const { return ((cpuAccess_ != nullptr) ? cpuAccess_->data() : cpuMem_) + offset; } void DestroyCpuAccess(); private: Memory* gpuAccess_; //!< GPU memory for segment access Memory* cpuAccess_; //!< CPU memory for segment (backing store) address cpuMem_; //!< CPU memory for segment without GPU direct access (backing store) }; class PALHSALoaderContext final : public Context { public: PALHSALoaderContext(HSAILProgram* program) : program_(program) {} virtual ~PALHSALoaderContext() {} hsa_isa_t IsaFromName(const char* name) override; bool IsaSupportedByAgent(hsa_agent_t agent, hsa_isa_t isa) override; void* SegmentAlloc(amdgpu_hsa_elf_segment_t segment, hsa_agent_t agent, size_t size, size_t align, bool zero) override; bool SegmentCopy(amdgpu_hsa_elf_segment_t segment, hsa_agent_t agent, void* dst, size_t offset, const void* src, size_t size) override; void SegmentFree(amdgpu_hsa_elf_segment_t segment, hsa_agent_t agent, void* seg, size_t size = 0) override; void* SegmentAddress(amdgpu_hsa_elf_segment_t segment, hsa_agent_t agent, void* seg, size_t offset) override; void* SegmentHostAddress(amdgpu_hsa_elf_segment_t segment, hsa_agent_t agent, void* seg, size_t offset) override; bool SegmentFreeze(amdgpu_hsa_elf_segment_t segment, hsa_agent_t agent, void* seg, size_t size) override; bool ImageExtensionSupported() override { return false; } hsa_status_t ImageCreate(hsa_agent_t agent, hsa_access_permission_t image_permission, const hsa_ext_image_descriptor_t* image_descriptor, const void* image_data, hsa_ext_image_t* image_handle) override { // not supported assert(false); return HSA_STATUS_ERROR; } hsa_status_t ImageDestroy(hsa_agent_t agent, hsa_ext_image_t image_handle) override { // not supported assert(false); return HSA_STATUS_ERROR; } hsa_status_t SamplerCreate(hsa_agent_t agent, const hsa_ext_sampler_descriptor_t* sampler_descriptor, hsa_ext_sampler_t* sampler_handle) override; //! All samplers are owned by HSAILProgram and are deleted in its destructor. hsa_status_t SamplerDestroy(hsa_agent_t agent, hsa_ext_sampler_t sampler_handle) override; private: PALHSALoaderContext(const PALHSALoaderContext& c); PALHSALoaderContext& operator=(const PALHSALoaderContext& c); pal::HSAILProgram* program_; }; //! \class HSAIL program class HSAILProgram : public device::Program { friend class ClBinary; public: //! Default constructor HSAILProgram(Device& device); HSAILProgram(NullDevice& device); //! Default destructor virtual ~HSAILProgram(); void addGlobalStore(Memory* mem) { globalStores_.push_back(mem); } void setCodeObjects(Segment* seg, Memory* codeGpu, address codeCpu) { codeSegGpu_ = codeGpu; codeSegment_ = seg; } const std::vector& globalStores() const { return globalStores_; } //! Return a typecasted GPU device pal::Device& dev() { return const_cast(static_cast(device())); } //! Returns GPU kernel table const Memory* kernelTable() const { return kernels_; } //! Adds all kernels to the mem handle lists void fillResListWithKernels(VirtualGPU& gpu) const; //! Returns the maximum number of scratch regs used in the program uint maxScratchRegs() const { return maxScratchRegs_; } //! Add internal static sampler void addSampler(Sampler* sampler) { staticSamplers_.push_back(sampler); } //! Returns TRUE if the program contains static samplers bool isStaticSampler() const { return (staticSamplers_.size() != 0); } //! Returns code segement on GPU const Memory& codeSegGpu() const { return *codeSegGpu_; } //! Returns CPU address for a kernel uint64_t findHostKernelAddress(uint64_t devAddr) const { return loader_->FindHostAddress(devAddr); } //! Get symbol by name amd::hsa::loader::Symbol* GetSymbol(const char* symbol_name, const hsa_agent_t *agent) const { return executable_->GetSymbol(symbol_name, agent); } protected: bool saveBinaryAndSetType(type_t type); virtual bool createBinary(amd::option::Options* options); virtual const aclTargetInfo& info(const char* str = ""); virtual bool setKernels(amd::option::Options* options, void* binary, size_t binSize) override; //! Destroys CPU allocations in the code segment void DestroySegmentCpuAccess() const { if (codeSegment_ != nullptr) { codeSegment_->DestroyCpuAccess(); } } private: //! Disable default copy constructor HSAILProgram(const HSAILProgram&); //! Disable operator= HSAILProgram& operator=(const HSAILProgram&); protected: //! Allocate kernel table bool allocKernelTable(); void* rawBinary_; //!< Pointer to the raw binary std::vector globalStores_; //!< Global memory for the program Memory* kernels_; //!< Table with kernel object pointers Memory* codeSegGpu_; //!< GPU memory with code objects Segment* codeSegment_; //!< Pointer to the code segment for this program uint maxScratchRegs_; //!< Maximum number of scratch regs used in the program by individual kernel std::list staticSamplers_; //!< List od internal static samplers amd::hsa::loader::Loader* loader_; //!< Loader object amd::hsa::loader::Executable* executable_; //!< Executable for HSA Loader PALHSALoaderContext loaderContext_; //!< Context for HSA Loader }; //! \class Lightning Compiler Program class LightningProgram : public HSAILProgram { public: LightningProgram(NullDevice& device) : HSAILProgram(device) { isLC_ = true; xnackEnabled_ = dev().hwInfo()->xnackEnabled_; machineTarget_ = dev().hwInfo()->machineTargetLC_; } LightningProgram(Device& device) : HSAILProgram(device) { isLC_ = true; xnackEnabled_ = dev().hwInfo()->xnackEnabled_; machineTarget_ = dev().hwInfo()->machineTargetLC_; } virtual ~LightningProgram() {} protected: virtual bool setKernels(amd::option::Options* options, void* binary, size_t binSize) override; virtual bool createBinary(amd::option::Options* options) override; }; /*@}*/} // namespace pal