P4 to Git Change 1330796 by gandryey@gera-w8 on 2016/10/24 15:12:41

SWDEV-86035 - Add PAL backend to OpenCL
	- Use loader for the code objects loading and avoid allocation of each individual kernel

Affected files ...

... //depot/stg/opencl/drivers/opencl/runtime/device/pal/Makefile#17 edit
... //depot/stg/opencl/drivers/opencl/runtime/device/pal/palkernel.cpp#13 edit
... //depot/stg/opencl/drivers/opencl/runtime/device/pal/palkernel.hpp#6 edit
... //depot/stg/opencl/drivers/opencl/runtime/device/pal/palprogram.cpp#13 edit
... //depot/stg/opencl/drivers/opencl/runtime/device/pal/palprogram.hpp#10 edit
... //depot/stg/opencl/drivers/opencl/runtime/device/pal/palvirtual.cpp#33 edit


[ROCm/clr commit: 4e1f32bd71]
Этот коммит содержится в:
foreman
2016-10-24 15:25:54 -04:00
родитель eea91c7d61
Коммит bc18cf7a72
5 изменённых файлов: 185 добавлений и 94 удалений
+6 -27
Просмотреть файл
@@ -377,11 +377,11 @@ HSAILKernel::aqlCreateHWInfo(amd::hsa::loader::Symbol *sym)
if (!sym) {
return false;
}
uint64_t akc_addr = 0;
if (!sym->GetInfo(HSA_EXECUTABLE_SYMBOL_INFO_KERNEL_OBJECT, reinterpret_cast<void*>(&akc_addr))) {
if (!sym->GetInfo(HSA_EXECUTABLE_SYMBOL_INFO_KERNEL_OBJECT, reinterpret_cast<void*>(&code_))) {
return false;
}
amd_kernel_code_t *akc = reinterpret_cast<amd_kernel_code_t*>(akc_addr);
amd_kernel_code_t *akc = reinterpret_cast<amd_kernel_code_t*>(prog().findHostKernelAddress(code_));
cpuAqlCode_ = akc;
if (!sym->GetInfo(HSA_EXT_EXECUTABLE_SYMBOL_INFO_KERNEL_OBJECT_SIZE, reinterpret_cast<void*>(&codeSize_))) {
return false;
@@ -390,22 +390,6 @@ HSAILKernel::aqlCreateHWInfo(amd::hsa::loader::Symbol *sym)
if (!sym->GetInfo(HSA_EXT_EXECUTABLE_SYMBOL_INFO_KERNEL_OBJECT_ALIGN, reinterpret_cast<void*>(&akc_align))) {
return false;
}
// Allocate HW resources for the real program only
if (!prog().isNull()) {
code_ = new Memory(dev(), amd::alignUp(codeSize_, akc_align));
Resource::MemoryType type = Resource::Local;
// Initialize kernel ISA code
if (code_ && code_->create(type)) {
constexpr bool WaitForUpload = true;
code_->writeRawData(*code_->dev().xferQueue(), 0, codeSize_,
reinterpret_cast<void*>(akc), WaitForUpload);
}
else {
LogError("Failed to allocate ISA code!");
return false;
}
}
assert((akc->workitem_private_segment_byte_size & 3) == 0 &&
"Scratch must be DWORD aligned");
@@ -591,9 +575,8 @@ HSAILKernel::HSAILKernel(std::string name,
, dev_(prog->dev())
, prog_(*prog)
, index_(0)
, code_(nullptr)
, code_(0)
, codeSize_(0)
, hwMetaData_(nullptr)
, extraArgumentsNum_(extraArgsNum)
, waveLimiter_(this, (prog->isNull() ? 1 :
dev().properties().gfxipProperties.shaderCore.numCusPerShaderArray) * dev().hwInfo()->simdPerCU_)
@@ -608,10 +591,6 @@ HSAILKernel::~HSAILKernel()
delete arg;
arguments_.pop_back();
}
delete [] hwMetaData_;
delete code_;
}
bool
@@ -1217,7 +1196,7 @@ HSAILKernel::loadArguments(
// Initialize kernel ISA and execution buffer requirements
hsaDisp->private_segment_size = spillSegSize();
hsaDisp->group_segment_size = ldsAddress - ldsSize();
hsaDisp->kernel_object = gpuAqlCode()->vmAddress();
hsaDisp->kernel_object = gpuAqlCode();
ConstBuffer* cb = gpu.constBufs_[0];
cb->uploadDataToHw(argsBufferSize() + sizeof(hsa_kernel_dispatch_packet_t));
@@ -1228,7 +1207,7 @@ HSAILKernel::loadArguments(
hsaDisp->completion_signal.handle = 0;
memList.push_back(cb);
memList.push_back(gpuAqlCode());
memList.push_back(&prog().codeSegGpu());
for (pal::Memory * mem : prog().globalStores()) {
memList.push_back(mem);
}
+3 -5
Просмотреть файл
@@ -154,7 +154,7 @@ public:
const amd_kernel_code_t* cpuAqlCode() const { return cpuAqlCode_; }
//! Returns memory object with AQL code
pal::Memory* gpuAqlCode() const { return code_; }
uint64_t gpuAqlCode() const { return code_; }
//! Returns size of AQL code
size_t aqlCodeSize() const { return codeSize_; }
@@ -249,12 +249,10 @@ protected:
std::vector<PrintfInfo> printf_; //!< Format strings for GPU printf support
uint index_; //!< Kernel index in the program
pal::Memory* code_; //!< Memory object with ISA code
uint64_t code_; //!< GPU memory pointer to the kernel
size_t codeSize_; //!< Size of ISA code
char* hwMetaData_; //!< SI metadata
uint extraArgumentsNum_; //! Number of extra (hidden) kernel arguments
uint extraArgumentsNum_; //! Number of extra (hidden) kernel arguments
union Flags {
struct {
+141 -39
Просмотреть файл
@@ -34,6 +34,8 @@ HSAILProgram::HSAILProgram(Device& device)
, binaryElf_(nullptr)
, rawBinary_(nullptr)
, kernels_(nullptr)
, codeSegGpu_(nullptr)
, codeSegCpu_(nullptr)
, maxScratchRegs_(0)
, flags_(0)
, executable_(nullptr)
@@ -54,6 +56,8 @@ HSAILProgram::HSAILProgram(NullDevice& device)
, binaryElf_(nullptr)
, rawBinary_(nullptr)
, kernels_(nullptr)
, codeSegGpu_(nullptr)
, codeSegCpu_(nullptr)
, maxScratchRegs_(0)
, flags_(0)
, executable_(nullptr)
@@ -93,6 +97,8 @@ HSAILProgram::~HSAILProgram()
}
delete kernels_;
amd::hsa::loader::Loader::Destroy(loader_);
assert((codeSegGpu_ == nullptr) && "Loader didn't destroy code!");
assert((codeSegCpu_ == nullptr) && "Loader didn't destroy code!");
}
bool
@@ -470,6 +476,9 @@ HSAILProgram::linkImpl(amd::option::Options* options)
aclType continueCompileFrom = ACL_TYPE_LLVMIR_BINARY;
bool finalize = true;
bool hsaLoad = true;
internal_ = (compileOptions_.find("-cl-internal-kernel") !=
std::string::npos) ? true : false;
// If !binaryElf_ then program must have been created using clCreateProgramWithBinary
if (!binaryElf_) {
@@ -545,6 +554,11 @@ HSAILProgram::linkImpl(amd::option::Options* options)
buildLog_ += "Error: AMD HSA Code Object loading failed.\n";
return false;
}
status = executable_->Freeze(nullptr);
if (status != HSA_STATUS_SUCCESS) {
buildLog_ += "Error: AMD HSA Code Object freeze failed.\n";
return false;
}
}
size_t kernelNamesSize = 0;
errorCode = aclQueryInfo(dev().compiler(), binaryElf_, RT_KERNEL_NAMES, nullptr, nullptr, &kernelNamesSize);
@@ -687,8 +701,7 @@ HSAILProgram::allocKernelTable()
kernels_->map(nullptr, pal::Resource::WriteOnly));
for (auto& it : kernels()) {
HSAILKernel* kernel = static_cast<HSAILKernel*>(it.second);
table[kernel->index()] = static_cast<size_t>(
kernel->gpuAqlCode()->vmAddress());
table[kernel->index()] = static_cast<size_t>(kernel->gpuAqlCode());
}
kernels_->unmap(nullptr);
}
@@ -699,10 +712,7 @@ void
HSAILProgram::fillResListWithKernels(
std::vector<const Memory*>& memList) const
{
for (auto& it : kernels()) {
memList.push_back(
static_cast<HSAILKernel*>(it.second)->gpuAqlCode());
}
memList.push_back(&codeSegGpu());
}
const aclTargetInfo &
@@ -749,7 +759,7 @@ HSAILProgram::saveBinaryAndSetType(type_t type)
return true;
}
hsa_isa_t ORCAHSALoaderContext::IsaFromName(const char *name) {
hsa_isa_t PALHSALoaderContext::IsaFromName(const char *name) {
hsa_isa_t isa = {0};
if (!strcmp(Gfx700, name)) { isa.handle = gfx700; return isa; }
if (!strcmp(Gfx701, name)) { isa.handle = gfx701; return isa; }
@@ -762,7 +772,7 @@ hsa_isa_t ORCAHSALoaderContext::IsaFromName(const char *name) {
return isa;
}
bool ORCAHSALoaderContext::IsaSupportedByAgent(hsa_agent_t agent, hsa_isa_t isa) {
bool PALHSALoaderContext::IsaSupportedByAgent(hsa_agent_t agent, hsa_isa_t isa) {
switch (program_->dev().hwInfo()->gfxipVersion_) {
default:
LogError("Unsupported gfxip version");
@@ -785,7 +795,7 @@ bool ORCAHSALoaderContext::IsaSupportedByAgent(hsa_agent_t agent, hsa_isa_t isa)
}
}
void* ORCAHSALoaderContext::SegmentAlloc(amdgpu_hsa_elf_segment_t segment,
void* PALHSALoaderContext::SegmentAlloc(amdgpu_hsa_elf_segment_t segment,
hsa_agent_t agent, size_t size, size_t align, bool zero) {
assert(size);
assert(align);
@@ -795,13 +805,13 @@ void* ORCAHSALoaderContext::SegmentAlloc(amdgpu_hsa_elf_segment_t segment,
case AMDGPU_HSA_SEGMENT_READONLY_AGENT:
return AgentGlobalAlloc(agent, size, align, zero);
case AMDGPU_HSA_SEGMENT_CODE_AGENT:
return KernelCodeAlloc(agent, size, align, zero);
return KernelCodeAlloc(size, align, zero);
default:
assert(false); return 0;
}
}
bool ORCAHSALoaderContext::SegmentCopy(amdgpu_hsa_elf_segment_t segment,
bool PALHSALoaderContext::SegmentCopy(amdgpu_hsa_elf_segment_t segment,
hsa_agent_t agent, void* dst, size_t offset, const void* src, size_t size) {
switch (segment) {
case AMDGPU_HSA_SEGMENT_GLOBAL_PROGRAM:
@@ -815,8 +825,9 @@ bool ORCAHSALoaderContext::SegmentCopy(amdgpu_hsa_elf_segment_t segment,
}
}
void ORCAHSALoaderContext::SegmentFree(amdgpu_hsa_elf_segment_t segment,
hsa_agent_t agent, void* seg, size_t size) {
void PALHSALoaderContext::SegmentFree(
amdgpu_hsa_elf_segment_t segment, hsa_agent_t agent, void* seg, size_t size)
{
switch (segment) {
case AMDGPU_HSA_SEGMENT_GLOBAL_PROGRAM:
case AMDGPU_HSA_SEGMENT_GLOBAL_AGENT:
@@ -827,25 +838,72 @@ void ORCAHSALoaderContext::SegmentFree(amdgpu_hsa_elf_segment_t segment,
}
}
void* ORCAHSALoaderContext::SegmentAddress(amdgpu_hsa_elf_segment_t segment,
hsa_agent_t agent, void* seg, size_t offset) {
void* PALHSALoaderContext::SegmentAddress(
amdgpu_hsa_elf_segment_t segment, hsa_agent_t agent, void* seg, size_t offset)
{
assert(seg);
switch (segment) {
case AMDGPU_HSA_SEGMENT_GLOBAL_PROGRAM:
case AMDGPU_HSA_SEGMENT_GLOBAL_AGENT:
case AMDGPU_HSA_SEGMENT_READONLY_AGENT: {
case AMDGPU_HSA_SEGMENT_CODE_AGENT:
if (!program_->isNull()) {
pal::Memory *gpuMem = reinterpret_cast<pal::Memory*>(seg);
return reinterpret_cast<void*>(gpuMem->vmAddress() + offset);
}
else {
return reinterpret_cast<address>(seg) + offset;
}
}
case AMDGPU_HSA_SEGMENT_CODE_AGENT: return (char*) seg + offset;
default:
assert(false); return nullptr;
}
}
hsa_status_t ORCAHSALoaderContext::SamplerCreate(
void* PALHSALoaderContext::SegmentHostAddress(
amdgpu_hsa_elf_segment_t segment, hsa_agent_t agent, void* seg, size_t offset)
{
void* host = nullptr;
assert(seg);
switch (segment) {
case AMDGPU_HSA_SEGMENT_CODE_AGENT:
host = program_->codeSegCpu() + offset;
break;
case AMDGPU_HSA_SEGMENT_GLOBAL_PROGRAM:
case AMDGPU_HSA_SEGMENT_GLOBAL_AGENT:
case AMDGPU_HSA_SEGMENT_READONLY_AGENT:
default:
break;
}
return host;
}
bool PALHSALoaderContext::SegmentFreeze(
amdgpu_hsa_elf_segment_t segment, hsa_agent_t agent, void* seg, size_t size)
{
assert(seg);
switch (segment) {
case AMDGPU_HSA_SEGMENT_GLOBAL_PROGRAM:
case AMDGPU_HSA_SEGMENT_GLOBAL_AGENT:
case AMDGPU_HSA_SEGMENT_READONLY_AGENT:
return true;
case AMDGPU_HSA_SEGMENT_CODE_AGENT: {
if (program_->isNull()) {
return true;
}
const pal::Memory& mem = program_->codeSegGpu();
constexpr bool WaitForCopy = true;
mem.writeRawData(*mem.dev().xferQueue(), 0, size, program_->codeSegCpu(), WaitForCopy);
return true;
}
default:
assert(false);
return false;
}
}
hsa_status_t PALHSALoaderContext::SamplerCreate(
hsa_agent_t agent,
const hsa_ext_sampler_descriptor_t *sampler_descriptor,
hsa_ext_sampler_t *sampler_handle)
@@ -897,8 +955,9 @@ hsa_status_t ORCAHSALoaderContext::SamplerCreate(
return HSA_STATUS_SUCCESS;
}
hsa_status_t ORCAHSALoaderContext::SamplerDestroy(
hsa_agent_t agent, hsa_ext_sampler_t sampler_handle) {
hsa_status_t PALHSALoaderContext::SamplerDestroy(
hsa_agent_t agent, hsa_ext_sampler_t sampler_handle)
{
if (!agent.handle) {
return HSA_STATUS_ERROR_INVALID_AGENT;
}
@@ -908,7 +967,8 @@ hsa_status_t ORCAHSALoaderContext::SamplerDestroy(
return HSA_STATUS_SUCCESS;
}
void* ORCAHSALoaderContext::CpuMemAlloc(size_t size, size_t align, bool zero) {
address PALHSALoaderContext::CpuMemAlloc(size_t size, size_t align, bool zero)
{
assert(size);
assert(align);
assert(sizeof(void*) == 8 || sizeof(void*) == 4);
@@ -917,26 +977,21 @@ void* ORCAHSALoaderContext::CpuMemAlloc(size_t size, size_t align, bool zero) {
if (zero) {
memset(ptr, 0, size);
}
return ptr;
return reinterpret_cast<address>(ptr);
}
bool ORCAHSALoaderContext::CpuMemCopy(void *dst, size_t offset, const void* src, size_t size) {
if (!dst || !src || dst == src) {
return false;
}
if (0 == size) {
return true;
}
amd::Os::fastMemcpy((char*)dst + offset, src, size);
return true;
bool PALHSALoaderContext::CpuMemCopy(void *dst, size_t offset, const void* src, size_t size)
{
amd::Os::fastMemcpy((char*)dst + offset, src, size);
return true;
}
void* ORCAHSALoaderContext::GpuMemAlloc(size_t size, size_t align, bool zero) {
void* PALHSALoaderContext::GpuMemAlloc(size_t size, size_t align, bool zero) {
assert(size);
assert(align);
assert(sizeof(void*) == 8 || sizeof(void*) == 4);
if (program_->isNull()) {
return new char[size];
return CpuMemAlloc(size, align, zero);
}
pal::Memory* mem = new pal::Memory(program_->dev(), amd::alignUp(size, align));
@@ -945,7 +1000,7 @@ void* ORCAHSALoaderContext::GpuMemAlloc(size_t size, size_t align, bool zero) {
return nullptr;
}
assert(program_->dev().xferQueue());
if (zero) {
if (zero && !program_->isInternal()) {
char pattern = 0;
program_->dev().xferMgr().fillBuffer(*mem, &pattern, sizeof(pattern), amd::Coord3D(0), amd::Coord3D(size));
}
@@ -954,7 +1009,7 @@ void* ORCAHSALoaderContext::GpuMemAlloc(size_t size, size_t align, bool zero) {
return mem;
}
bool ORCAHSALoaderContext::GpuMemCopy(void *dst, size_t offset, const void *src, size_t size) {
bool PALHSALoaderContext::GpuMemCopy(void *dst, size_t offset, const void *src, size_t size) {
if (!dst || !src || dst == src) {
return false;
}
@@ -962,7 +1017,7 @@ bool ORCAHSALoaderContext::GpuMemCopy(void *dst, size_t offset, const void *src,
return true;
}
if (program_->isNull()) {
memcpy(reinterpret_cast<address>(dst) + offset, src, size);
CpuMemCopy(dst, offset, src, size);
return true;
}
assert(program_->dev().xferQueue());
@@ -972,16 +1027,62 @@ bool ORCAHSALoaderContext::GpuMemCopy(void *dst, size_t offset, const void *src,
return true;
}
void ORCAHSALoaderContext::GpuMemFree(void *ptr, size_t size)
void PALHSALoaderContext::GpuMemFree(void *ptr, size_t size)
{
if (program_->isNull()) {
delete[] reinterpret_cast<char*>(ptr);
CpuMemFree(ptr, size);
}
else {
delete reinterpret_cast<pal::Memory*>(ptr);
}
}
void* PALHSALoaderContext::KernelCodeAlloc(
size_t size, size_t align, bool zero)
{
address host = CpuMemAlloc(size, align, zero);
pal::Memory* mem = nullptr;
if (!program_->isNull()) {
mem = new pal::Memory(program_->dev(), amd::alignUp(size, align));
if (!mem || !mem->create(pal::Resource::Local)) {
delete mem;
mem = nullptr;
}
}
program_->setCodeObjects(mem, host);
return ((host == nullptr || mem == nullptr) ? nullptr : mem);
}
bool PALHSALoaderContext::KernelCodeCopy(void *dst, size_t offset, const void *src, size_t size)
{
if (!dst || !src || dst == src) {
return false;
}
if (0 == size) {
return true;
}
if (program_->isNull()) {
return CpuMemCopy(dst, offset, src, size);
}
assert(program_->dev().xferQueue());
pal::Memory* mem = reinterpret_cast<pal::Memory*>(dst);
if (mem == &program_->codeSegGpu()) {
return CpuMemCopy(program_->codeSegCpu(), offset, src, size);
}
assert(!"The segement doesn't match code segment in the program!");
return false;
}
void PALHSALoaderContext::KernelCodeFree(void *ptr, size_t size)
{
CpuMemFree(program_->codeSegCpu(), size);
if (!program_->isNull()) {
delete reinterpret_cast<pal::Memory*>(ptr);
}
program_->setCodeObjects(nullptr, nullptr);
}
#if defined(WITH_LIGHTNING_COMPILER)
static hsa_status_t
@@ -1019,6 +1120,8 @@ bool
LightningProgram::linkImpl(amd::option::Options *options)
{
using namespace amd::opencl_driver;
internal_ = (compileOptions_.find("-cl-internal-kernel") !=
std::string::npos) ? true : false;
aclType continueCompileFrom = llvmBinary_.empty()
? getNextCompilationStageFromBinary(options)
@@ -1270,12 +1373,11 @@ LightningProgram::setKernels(
return false;
}
/* FIXME_lmoriche: We need to call this!
status = executable_->Freeze(nullptr);
if (status != HSA_STATUS_SUCCESS) {
buildLog_ += "Error: Freezing the executable failed: ";
return false;
}*/
}
size_t progvarsTotalSize = 0;
+33 -21
Просмотреть файл
@@ -34,11 +34,11 @@ namespace pal {
using namespace amd::hsa::loader;
class HSAILProgram;
class ORCAHSALoaderContext final: public Context {
class PALHSALoaderContext final: public Context {
public:
ORCAHSALoaderContext(HSAILProgram* program): program_(program) {}
PALHSALoaderContext(HSAILProgram* program): program_(program) {}
virtual ~ORCAHSALoaderContext() {}
virtual ~PALHSALoaderContext() {}
hsa_isa_t IsaFromName(const char *name) override;
@@ -58,12 +58,10 @@ public:
hsa_agent_t agent, void* seg, size_t offset) override;
void* SegmentHostAddress(amdgpu_hsa_elf_segment_t segment,
hsa_agent_t agent, void* seg, size_t offset) override {
return nullptr;
}
hsa_agent_t agent, void* seg, size_t offset) override;
bool SegmentFreeze(amdgpu_hsa_elf_segment_t segment,
hsa_agent_t agent, void* seg, size_t size) override { return false; }
hsa_agent_t agent, void* seg, size_t size) override;
bool ImageExtensionSupported() override { return false; }
@@ -109,20 +107,13 @@ private:
GpuMemFree(ptr, size);
}
void* KernelCodeAlloc(
hsa_agent_t agent, size_t size, size_t align, bool zero) {
return CpuMemAlloc(size, align, zero);
}
void* KernelCodeAlloc(size_t size, size_t align, bool zero);
bool KernelCodeCopy(void *dst, size_t offset, const void *src, size_t size) {
return CpuMemCopy(dst, offset, src, size);
}
bool KernelCodeCopy(void *dst, size_t offset, const void *src, size_t size);
void KernelCodeFree(void *ptr, size_t size) {
CpuMemFree(ptr, size);
}
void KernelCodeFree(void *ptr, size_t size);
void* CpuMemAlloc(size_t size, size_t align, bool zero);
address CpuMemAlloc(size_t size, size_t align, bool zero);
bool CpuMemCopy(void *dst, size_t offset, const void* src, size_t size);
@@ -136,9 +127,9 @@ private:
void GpuMemFree(void *ptr, size_t size = 0);
ORCAHSALoaderContext(const ORCAHSALoaderContext &c);
PALHSALoaderContext(const PALHSALoaderContext &c);
ORCAHSALoaderContext& operator=(const ORCAHSALoaderContext &c);
PALHSALoaderContext& operator=(const PALHSALoaderContext &c);
pal::HSAILProgram* program_;
};
@@ -160,6 +151,9 @@ public:
void addGlobalStore(Memory* mem) { globalStores_.push_back(mem); }
void setCodeObjects(Memory* codeGpu, address codeCpu)
{ codeSegGpu_ = codeGpu; codeSegCpu_ = codeCpu; }
const std::vector<Memory*>& globalStores() const { return globalStores_; }
//! Return a typecasted GPU device
@@ -182,9 +176,24 @@ public:
//! Returns TRUE if the program just compiled
bool isNull() const { return isNull_; }
//! Returns TRUE if the program used internally by runtime
bool isInternal() const { return internal_; }
//! Returns TRUE if the program contains static samplers
bool isStaticSampler() const { return (staticSamplers_.size() != 0); }
//! Returns code segement on GPU
const Memory& codeSegGpu() const { return *codeSegGpu_; }
//! Returns code segement on CPU
address codeSegCpu() const { return codeSegCpu_; }
//! Returns CPU address for a kernel
uint64_t findHostKernelAddress(uint64_t devAddr) const
{
return loader_->FindHostAddress(devAddr);
}
protected:
//! pre-compile setup for GPU
virtual bool initBuild(amd::option::Options* options);
@@ -270,17 +279,20 @@ protected:
aclBinaryOptions binOpts_; //!< Binary options to create aclBinary
std::vector<Memory*> globalStores_; //!< Global memory for the program
Memory* kernels_; //!< Table with kernel object pointers
Memory* codeSegGpu_; //!< GPU memory with code objects
address codeSegCpu_; //!< CPU memory with code objects
uint maxScratchRegs_; //!< Maximum number of scratch regs used in the program by individual kernel
std::list<Sampler*> staticSamplers_; //!< List od internal static samplers
union {
struct {
uint32_t isNull_ : 1; //!< Null program no memory allocations
uint32_t internal_ : 1; //!< Internal blit program
};
uint32_t flags_; //!< Program flags
};
amd::hsa::loader::Loader* loader_; //!< Loader object
amd::hsa::loader::Executable* executable_; //!< Executable for HSA Loader
ORCAHSALoaderContext loaderContext_; //!< Context for HSA Loader
PALHSALoaderContext loaderContext_; //!< Context for HSA Loader
};
#if defined(WITH_LIGHTNING_COMPILER)
+2 -2
Просмотреть файл
@@ -3300,7 +3300,7 @@ VirtualGPU::buildKernelInfo(const HSAILKernel& hsaKernel,
kernelInfo.pAqlQueuePtr = reinterpret_cast<void*>(hsaQueueMem_->vmAddress());
// Get the address of the kernel code and its size for CPU access
pal::Memory* aqlCode = hsaKernel.gpuAqlCode();
/* pal::Memory* aqlCode = hsaKernel.gpuAqlCode();
if (nullptr != aqlCode) {
address aqlCodeAddr = static_cast<address>(aqlCode->map(nullptr, 0));
dbgManager->setKernelCodeInfo(aqlCodeAddr, hsaKernel.aqlCodeSize());
@@ -3309,7 +3309,7 @@ VirtualGPU::buildKernelInfo(const HSAILKernel& hsaKernel,
else {
dbgManager->setKernelCodeInfo(nullptr, 0);
}
*/
kernelInfo.trapPresent = false;
kernelInfo.trapHandler = nullptr;
kernelInfo.trapHandlerBuffer = nullptr;