Rename device access functions

Rename functions that access devices to reflect the derived device
they return. This includes the base device::Device and the derived
gpu/pal/roc device classes in both NullDevice and Device forms. Change
to use the least derived versions to clarify what operations will be
available.

Change-Id: I1abb6bfed7efa24852bc8d0d49acaea357d8b5d0
This commit is contained in:
Tony Tye
2021-01-10 01:12:54 +00:00
parent f679b05df7
commit 001fd66cac
13 changed files with 147 additions and 127 deletions
+4 -4
View File
@@ -674,7 +674,7 @@ void Kernel::FindLocalWorkSize(size_t workDim, const amd::NDRange& gblWorkSize,
if (workGroupInfo()->compileSize_[0] == 0) {
// Find the default local workgroup size, if it wasn't specified
if (lclWorkSize[0] == 0) {
if ((dev().settings().overrideLclSet & (1 << (workDim - 1))) == 0) {
if ((device().settings().overrideLclSet & (1 << (workDim - 1))) == 0) {
// Find threads per group
size_t thrPerGrp = workGroupInfo()->size_;
@@ -685,7 +685,7 @@ void Kernel::FindLocalWorkSize(size_t workDim, const amd::NDRange& gblWorkSize,
// and it's 2 or 3-dimensional workload
(workDim > 1) && (((gblWorkSize[0] % 16) == 0) && ((gblWorkSize[1] % 16) == 0))) {
// Use 8x8 workgroup size if kernel has image writes
if (flags_.imageWriteEna_ || (thrPerGrp != dev().info().preferredWorkGroupSize_)) {
if (flags_.imageWriteEna_ || (thrPerGrp != device().info().preferredWorkGroupSize_)) {
lclWorkSize[0] = 8;
lclWorkSize[1] = 8;
}
@@ -709,7 +709,7 @@ void Kernel::FindLocalWorkSize(size_t workDim, const amd::NDRange& gblWorkSize,
}
// Assuming DWORD access
const uint cacheLineMatch = dev().info().globalMemCacheLineSize_ >> 2;
const uint cacheLineMatch = device().info().globalMemCacheLineSize_ >> 2;
// Check if we couldn't find optimal workload
if (((lclWorkSize.product() % workGroupInfo()->wavefrontSize_) != 0) ||
@@ -1095,7 +1095,7 @@ bool Kernel::GetAttrCodePropMetadata() {
InitParameters(kernelMetaNode);
// Set the workgroup information for the kernel
workGroupInfo_.availableLDSSize_ = dev().info().localMemSizePerCU_;
workGroupInfo_.availableLDSSize_ = device().info().localMemSizePerCU_;
workGroupInfo_.availableSGPRs_ = 104;
workGroupInfo_.availableVGPRs_ = 256;
+1 -1
View File
@@ -407,7 +407,7 @@ class Kernel : public amd::HeapObject {
};
//! Returns GPU device object, associated with this kernel
const amd::Device& dev() const { return dev_; }
const amd::Device& device() const { return dev_; }
void setVecTypeHint(const std::string& hint) { workGroupInfo_.compileVecTypeHint_ = hint; }
+15 -15
View File
@@ -42,7 +42,7 @@ bool NullProgram::compileImpl(const std::string& src,
const char** headerIncludeNames, amd::option::Options* options) {
std::string sourceCode = src;
if (dev().settings().debugFlags_ & Settings::CheckForILSource) {
if (gpuNullDevice().settings().debugFlags_ & Settings::CheckForILSource) {
size_t inc = sourceCode.find("il_cs_", 0);
if (inc != std::string::npos) {
// CL program is an IL program
@@ -55,7 +55,7 @@ bool NullProgram::compileImpl(const std::string& src,
std::string tempFolder = amd::Os::getTempPath();
std::string tempFileName = amd::Os::getTempFileName();
if (dev().settings().debugFlags_ & Settings::StubCLPrograms) {
if (gpuNullDevice().settings().debugFlags_ & Settings::StubCLPrograms) {
std::stringstream fileName;
std::fstream stubRead;
// Dump the IL function
@@ -130,7 +130,7 @@ bool NullProgram::compileImpl(const std::string& src,
}
if (ACL_SUCCESS !=
aclInsertSection(dev().amdilCompiler(), bin, sourceCode.c_str(), sourceCode.size(), aclSOURCE)) {
aclInsertSection(gpuNullDevice().amdilCompiler(), bin, sourceCode.c_str(), sourceCode.size(), aclSOURCE)) {
LogWarning("aclInsertSection failed");
aclBinaryFini(bin);
return false;
@@ -145,7 +145,7 @@ bool NullProgram::compileImpl(const std::string& src,
opts << options->origOptionStr.c_str();
if (options->origOptionStr.find("-cl-std=CL") == std::string::npos) {
switch (dev().settings().oclVersion_) {
switch (gpuNullDevice().settings().oclVersion_) {
case OpenCL10:
opts << " -cl-std=CL1.0";
break;
@@ -179,15 +179,15 @@ bool NullProgram::compileImpl(const std::string& src,
opts << " -I" << tempFolder;
}
if (!dev().settings().imageSupport_) {
if (!gpuNullDevice().settings().imageSupport_) {
opts << " -fno-image-support";
}
if (dev().settings().reportFMAF_) {
if (gpuNullDevice().settings().reportFMAF_) {
opts << " -mfast-fmaf";
}
if (dev().settings().reportFMA_) {
if (gpuNullDevice().settings().reportFMA_) {
opts << " -mfast-fma";
}
@@ -206,10 +206,10 @@ bool NullProgram::compileImpl(const std::string& src,
pos = newOpt.find("-fno-bin-llvmir");
}
err = aclCompile(dev().amdilCompiler(), bin, newOpt.c_str(), ACL_TYPE_OPENCL, ACL_TYPE_LLVMIR_BINARY,
err = aclCompile(gpuNullDevice().amdilCompiler(), bin, newOpt.c_str(), ACL_TYPE_OPENCL, ACL_TYPE_LLVMIR_BINARY,
NULL);
buildLog_ += aclGetCompilerLog(dev().amdilCompiler());
buildLog_ += aclGetCompilerLog(gpuNullDevice().amdilCompiler());
if (err != ACL_SUCCESS) {
LogWarning("aclCompile failed");
@@ -218,7 +218,7 @@ bool NullProgram::compileImpl(const std::string& src,
}
size_t len = 0;
const void* ir = aclExtractSection(dev().amdilCompiler(), bin, &len, aclLLVMIR, &err);
const void* ir = aclExtractSection(gpuNullDevice().amdilCompiler(), bin, &len, aclLLVMIR, &err);
if (err != ACL_SUCCESS) {
LogWarning("aclExtractSection failed");
aclBinaryFini(bin);
@@ -284,7 +284,7 @@ int NullProgram::compileBinaryToIL(amd::option::Options* options) {
}
if (ACL_SUCCESS !=
aclInsertSection(dev().amdilCompiler(), bin, llvmBinary_.data(), llvmBinary_.size(), spirFlag)) {
aclInsertSection(gpuNullDevice().amdilCompiler(), bin, llvmBinary_.data(), llvmBinary_.size(), spirFlag)) {
LogWarning("aclInsertSection failed");
aclBinaryFini(bin);
return CL_BUILD_PROGRAM_FAILURE;
@@ -294,7 +294,7 @@ int NullProgram::compileBinaryToIL(amd::option::Options* options) {
std::string optionStr = options->origOptionStr;
if (options->origOptionStr.find("kernel-arg-alignment") == std::string::npos) {
char s[256];
sprintf(s, " -Wb,-kernel-arg-alignment=%d", dev().info().memBaseAddrAlign_ / 8);
sprintf(s, " -Wb,-kernel-arg-alignment=%d", gpuNullDevice().info().memBaseAddrAlign_ / 8);
optionStr += s;
}
@@ -308,8 +308,8 @@ int NullProgram::compileBinaryToIL(amd::option::Options* options) {
type = ACL_TYPE_ISA;
}
err = aclCompile(dev().amdilCompiler(), bin, optionStr.c_str(), aclTypeBinaryUsed, type, NULL);
buildLog_ += aclGetCompilerLog(dev().amdilCompiler());
err = aclCompile(gpuNullDevice().amdilCompiler(), bin, optionStr.c_str(), aclTypeBinaryUsed, type, NULL);
buildLog_ += aclGetCompilerLog(gpuNullDevice().amdilCompiler());
if (err != ACL_SUCCESS) {
LogWarning("aclCompile failed");
@@ -332,7 +332,7 @@ int NullProgram::compileBinaryToIL(amd::option::Options* options) {
}
size_t len = 0;
const void* amdil = aclExtractSection(dev().amdilCompiler(), bin, &len, aclCODEGEN, &err);
const void* amdil = aclExtractSection(gpuNullDevice().amdilCompiler(), bin, &len, aclCODEGEN, &err);
if (err != ACL_SUCCESS) {
LogWarning("aclExtractSection failed");
aclBinaryFini(bin);
+1 -1
View File
@@ -3045,7 +3045,7 @@ void HSAILKernel::initHsailArgs(const aclArgData* aclArg) {
HSAILKernel::HSAILKernel(std::string name, HSAILProgram* prog, std::string compileOptions,
uint extraArgsNum)
: device::Kernel(prog->dev(), name, *prog),
: device::Kernel(prog->device(), name, *prog),
compileOptions_(compileOptions),
index_(0),
code_(NULL),
+34 -34
View File
@@ -115,7 +115,7 @@ NullKernel* Program::createKernel(const std::string& name, const Kernel::InitDat
*created = false;
// Create a GPU kernel
Kernel* gpuKernel = new Kernel(name, static_cast<const gpu::Device&>(device()), *this, initData);
Kernel* gpuKernel = new Kernel(name, gpuDevice(), *this, initData);
if (gpuKernel == NULL) {
buildLog_ += "new Kernel() failed";
@@ -512,7 +512,7 @@ bool NullProgram::linkImpl(const std::vector<device::Program*>& inputPrograms,
} else {
aclTypeUsed = aclLLVMIR;
}
err = aclInsertSection(dev().amdilCompiler(), libs[i], llvmBinaries[i]->data(),
err = aclInsertSection(gpuNullDevice().amdilCompiler(), libs[i], llvmBinaries[i]->data(),
llvmBinaries[i]->size(), aclTypeUsed);
if (err != ACL_SUCCESS) {
LogWarning("aclInsertSection failed");
@@ -529,10 +529,10 @@ bool NullProgram::linkImpl(const std::vector<device::Program*>& inputPrograms,
unsigned int numLibs = libs.size() - 1;
if (numLibs > 0) {
err = aclLink(dev().amdilCompiler(), libs[0], numLibs, &libs[1], ACL_TYPE_LLVMIR_BINARY,
err = aclLink(gpuNullDevice().amdilCompiler(), libs[0], numLibs, &libs[1], ACL_TYPE_LLVMIR_BINARY,
"-create-library", NULL);
buildLog_ += aclGetCompilerLog(dev().amdilCompiler());
buildLog_ += aclGetCompilerLog(gpuNullDevice().amdilCompiler());
if (err != ACL_SUCCESS) {
LogWarning("aclLink failed");
@@ -549,7 +549,7 @@ bool NullProgram::linkImpl(const std::vector<device::Program*>& inputPrograms,
} else {
aclTypeUsed = aclLLVMIR;
}
const void* llvmir = aclExtractSection(dev().amdilCompiler(), libs[0], &size, aclTypeUsed, &err);
const void* llvmir = aclExtractSection(gpuNullDevice().amdilCompiler(), libs[0], &size, aclTypeUsed, &err);
if (err != ACL_SUCCESS) {
LogWarning("aclExtractSection failed");
break;
@@ -1456,13 +1456,13 @@ bool Program::allocGlobalData(const void* globalData, size_t dataSize, uint inde
// so possible reallocation won't occur twice or
// another thread could destroy a heap block,
// while we didn't finish allocation
amd::ScopedLock k(dev().lockAsyncOps());
amd::ScopedLock k(gpuDevice().lockAsyncOps());
// Allocate memory for the global data store
glbData_ = dev().createScratchBuffer(amd::alignUp(dataSize, 0x1000));
glbData_ = gpuDevice().createScratchBuffer(amd::alignUp(dataSize, 0x1000));
dataStore = glbData_;
} else {
dataStore = new Memory(dev(), amd::alignUp(dataSize, ConstBuffer::VectorSize));
dataStore = new Memory(gpuDevice(), amd::alignUp(dataSize, ConstBuffer::VectorSize));
// Initialize constant buffer
if ((dataStore == NULL) || !dataStore->create(Resource::RemoteUSWC)) {
@@ -1478,7 +1478,7 @@ bool Program::allocGlobalData(const void* globalData, size_t dataSize, uint inde
static const bool Entire = true;
amd::Coord3D origin(0, 0, 0);
amd::Coord3D region(dataSize);
result = dev().xferMgr().writeBuffer(globalData, *dataStore, origin, region, Entire);
result = gpuDevice().xferMgr().writeBuffer(globalData, *dataStore, origin, region, Entire);
}
return result;
@@ -1505,7 +1505,7 @@ HSAILProgram::HSAILProgram(Device& device, amd::Program& owner)
maxScratchRegs_(0),
executable_(NULL),
loaderContext_(this) {
machineTarget_ = dev().hwInfo()->targetName_;
machineTarget_ = gpuNullDevice().hwInfo()->targetName_;
loader_ = amd::hsa::loader::Loader::Create(&loaderContext_);
}
@@ -1517,7 +1517,7 @@ HSAILProgram::HSAILProgram(NullDevice& device, amd::Program& owner)
executable_(NULL),
loaderContext_(this) {
isNull_ = true;
machineTarget_ = dev().hwInfo()->targetName_;
machineTarget_ = gpuNullDevice().hwInfo()->targetName_;
loader_ = amd::hsa::loader::Loader::Create(&loaderContext_);
}
@@ -1578,9 +1578,9 @@ bool HSAILProgram::linkImpl(amd::option::Options* options) {
// 1. if the program is created with binary and contains only hsail text
case ACL_TYPE_HSAIL_TEXT: {
std::string curOptions = options->origOptionStr + hsailOptions();
errorCode = aclCompile(dev().hsaCompiler(), binaryElf_, curOptions.c_str(),
errorCode = aclCompile(gpuNullDevice().hsaCompiler(), binaryElf_, curOptions.c_str(),
continueCompileFrom, ACL_TYPE_CG, NULL);
buildLog_ += aclGetCompilerLog(dev().hsaCompiler());
buildLog_ += aclGetCompilerLog(gpuNullDevice().hsaCompiler());
if (errorCode != ACL_SUCCESS) {
buildLog_ += "Error: BRIG code generation failed.\n";
return false;
@@ -1602,12 +1602,12 @@ bool HSAILProgram::linkImpl(amd::option::Options* options) {
std::string fin_options(options->origOptionStr + hsailOptions());
// Append an option so that we can selectively enable a SCOption on CZ
// whenever IOMMUv2 is enabled.
if (dev().settings().svmFineGrainSystem_) {
if (gpuNullDevice().settings().svmFineGrainSystem_) {
fin_options.append(" -sc-xnack-iommu");
}
errorCode = aclCompile(dev().hsaCompiler(), binaryElf_, fin_options.c_str(), ACL_TYPE_CG,
errorCode = aclCompile(gpuNullDevice().hsaCompiler(), binaryElf_, fin_options.c_str(), ACL_TYPE_CG,
ACL_TYPE_ISA, NULL);
buildLog_ += aclGetCompilerLog(dev().hsaCompiler());
buildLog_ += aclGetCompilerLog(gpuNullDevice().hsaCompiler());
if (errorCode != ACL_SUCCESS) {
buildLog_ += "Error: BRIG finalization to ISA failed.\n";
return false;
@@ -1625,7 +1625,7 @@ bool HSAILProgram::linkImpl(amd::option::Options* options) {
size_t size = 0;
hsa_code_object_t code_object;
code_object.handle = reinterpret_cast<uint64_t>(
aclExtractSection(dev().hsaCompiler(), binaryElf_, &size, aclTEXT, &errorCode));
aclExtractSection(gpuNullDevice().hsaCompiler(), binaryElf_, &size, aclTEXT, &errorCode));
if (errorCode != ACL_SUCCESS) {
buildLog_ += "Error: Extracting AMD HSA Code Object from binary failed.\n";
return false;
@@ -1638,14 +1638,14 @@ bool HSAILProgram::linkImpl(amd::option::Options* options) {
}
size_t kernelNamesSize = 0;
errorCode =
aclQueryInfo(dev().hsaCompiler(), binaryElf_, RT_KERNEL_NAMES, NULL, NULL, &kernelNamesSize);
aclQueryInfo(gpuNullDevice().hsaCompiler(), binaryElf_, RT_KERNEL_NAMES, NULL, NULL, &kernelNamesSize);
if (errorCode != ACL_SUCCESS) {
buildLog_ += "Error: Querying of kernel names size from the binary failed.\n";
return false;
}
if (kernelNamesSize > 0) {
char* kernelNames = new char[kernelNamesSize];
errorCode = aclQueryInfo(dev().hsaCompiler(), binaryElf_, RT_KERNEL_NAMES, NULL, kernelNames,
errorCode = aclQueryInfo(gpuNullDevice().hsaCompiler(), binaryElf_, RT_KERNEL_NAMES, NULL, kernelNames,
&kernelNamesSize);
if (errorCode != ACL_SUCCESS) {
buildLog_ += "Error: Querying of kernel names from the binary failed.\n";
@@ -1661,7 +1661,7 @@ bool HSAILProgram::linkImpl(amd::option::Options* options) {
for (const auto& it : vKernels) {
std::string kernelName(it);
std::string openclKernelName = Kernel::openclMangledName(kernelName);
errorCode = aclQueryInfo(dev().hsaCompiler(), binaryElf_, RT_NUM_KERNEL_HIDDEN_ARGS,
errorCode = aclQueryInfo(gpuNullDevice().hsaCompiler(), binaryElf_, RT_NUM_KERNEL_HIDDEN_ARGS,
openclKernelName.c_str(), &md.numHiddenKernelArgs,
&sizeOfnumHiddenKernelArgs);
if (errorCode != ACL_SUCCESS) {
@@ -1698,7 +1698,7 @@ bool HSAILProgram::linkImpl(amd::option::Options* options) {
}
// Save the binary in the interface class
saveBinaryAndSetType(TYPE_EXECUTABLE);
buildLog_ += aclGetCompilerLog(dev().hsaCompiler());
buildLog_ += aclGetCompilerLog(gpuNullDevice().hsaCompiler());
return true;
}
@@ -1708,13 +1708,13 @@ std::string HSAILProgram::hsailOptions() {
std::string hsailOptions;
// Set options for the standard device specific options
// All our devices support these options now
if (dev().settings().reportFMAF_) {
if (gpuNullDevice().settings().reportFMAF_) {
hsailOptions.append(" -DFP_FAST_FMAF=1");
}
if (dev().settings().reportFMA_) {
if (gpuNullDevice().settings().reportFMA_) {
hsailOptions.append(" -DFP_FAST_FMA=1");
}
if (!dev().settings().singleFpDenorm_) {
if (!gpuNullDevice().settings().singleFpDenorm_) {
hsailOptions.append(" -cl-denorms-are-zero");
}
@@ -1738,7 +1738,7 @@ std::string HSAILProgram::hsailOptions() {
bool HSAILProgram::allocKernelTable() {
uint size = kernels().size() * sizeof(size_t);
kernels_ = new gpu::Memory(dev(), size);
kernels_ = new gpu::Memory(gpuDevice(), size);
// Initialize kernel table
if ((kernels_ == NULL) || !kernels_->create(Resource::RemoteUSWC)) {
delete kernels_;
@@ -1767,7 +1767,7 @@ const aclTargetInfo& HSAILProgram::info(const char* str) {
arch = "hsail64";
}
info_ = aclGetTargetInfo(arch.c_str(),
(str && str[0] == '\0' ? dev().hwInfo()->targetName_ : str), &err);
(str && str[0] == '\0' ? gpuNullDevice().hwInfo()->targetName_ : str), &err);
if (err != ACL_SUCCESS) {
LogWarning("aclGetTargetInfo failed");
}
@@ -1867,7 +1867,7 @@ hsa_isa_t ORCAHSALoaderContext::IsaFromName(const char* name) {
}
bool ORCAHSALoaderContext::IsaSupportedByAgent(hsa_agent_t agent, hsa_isa_t isa) {
uint dev_gfxip = program_->dev().hwInfo()->gfxipVersion_;
uint dev_gfxip = program_->gpuNullDevice().hwInfo()->gfxipVersion_;
uint isa_gfxip = isa.handle;
switch (dev_gfxip) {
case gfx700:
@@ -1892,7 +1892,7 @@ bool ORCAHSALoaderContext::IsaSupportedByAgent(hsa_agent_t agent, hsa_isa_t isa)
case gfx602:
default:
LogPrintfError("Unsupported gfxip version gfx%d", dev_gfxip);
return false;
return false;
}
}
@@ -2024,7 +2024,7 @@ hsa_status_t ORCAHSALoaderContext::SamplerCreate(
assert(false);
return HSA_STATUS_ERROR_INVALID_ARGUMENT;
}
gpu::Sampler* sampler = new gpu::Sampler(program_->dev());
gpu::Sampler* sampler = new gpu::Sampler(program_->gpuDevice());
if (!sampler || !sampler->create(state)) {
delete sampler;
return HSA_STATUS_ERROR;
@@ -2075,15 +2075,15 @@ void* ORCAHSALoaderContext::GpuMemAlloc(size_t size, size_t align, bool zero) {
return new char[size];
}
gpu::Memory* mem = new gpu::Memory(program_->dev(), amd::alignUp(size, align));
gpu::Memory* mem = new gpu::Memory(program_->gpuDevice(), amd::alignUp(size, align));
if (!mem || !mem->create(gpu::Resource::Local)) {
delete mem;
return NULL;
}
assert(program_->dev().xferQueue());
assert(program_->gpuDevice().xferQueue());
if (zero) {
char pattern = 0;
program_->dev().xferMgr().fillBuffer(*mem, &pattern, sizeof(pattern), amd::Coord3D(0),
program_->gpuDevice().xferMgr().fillBuffer(*mem, &pattern, sizeof(pattern), amd::Coord3D(0),
amd::Coord3D(size));
}
program_->addGlobalStore(mem);
@@ -2102,9 +2102,9 @@ bool ORCAHSALoaderContext::GpuMemCopy(void* dst, size_t offset, const void* src,
memcpy(reinterpret_cast<address>(dst) + offset, src, size);
return true;
}
assert(program_->dev().xferQueue());
assert(program_->gpuDevice().xferQueue());
gpu::Memory* mem = reinterpret_cast<gpu::Memory*>(dst);
return program_->dev().xferMgr().writeBuffer(src, *mem, amd::Coord3D(offset), amd::Coord3D(size),
return program_->gpuDevice().xferMgr().writeBuffer(src, *mem, amd::Coord3D(offset), amd::Coord3D(size),
true);
return true;
}
+16 -7
View File
@@ -289,7 +289,7 @@ class NullProgram : public device::Program {
);
//! Return a typecasted GPU device
gpu::NullDevice& dev() {
gpu::NullDevice& gpuNullDevice() {
return const_cast<gpu::NullDevice&>(static_cast<const gpu::NullDevice&>(device()));
}
@@ -339,10 +339,7 @@ class Program : public NullProgram {
//! Returns pritnf info array
const std::vector<device::PrintfInfo>& printfInfo() const { return printf_; }
//! Return a typecasted GPU device
gpu::Device& dev() { return const_cast<gpu::Device&>(static_cast<const gpu::Device&>(device())); }
protected:
protected:
private:
//! Disable copy constructor
Program(const Program&);
@@ -350,6 +347,11 @@ protected:
//! Disable operator=
Program& operator=(const Program&);
//! Return a typecasted GPU device
gpu::Device& gpuDevice() {
return const_cast<gpu::Device&>(static_cast<const gpu::Device&>(device()));
}
HwConstBuffers constBufs_; //!< Constant buffers for the global store
gpu::Memory* glbData_; //!< Global data store
};
@@ -467,8 +469,15 @@ class HSAILProgram : public device::Program {
const std::vector<Memory*>& globalStores() const { return globalStores_; }
//! Return a typecasted GPU device
gpu::Device& dev() { return const_cast<gpu::Device&>(static_cast<const gpu::Device&>(device())); }
//! Return a typecasted GPU null device.
gpu::NullDevice& gpuNullDevice() {
return const_cast<gpu::NullDevice&>(static_cast<const gpu::NullDevice&>(device()));
}
//! Return a typecasted GPU device. The device must not be the NullDevice.
gpu::Device& gpuDevice() {
return const_cast<gpu::Device&>(static_cast<const gpu::Device&>(device()));
}
//! Returns GPU kernel table
const Memory* kernelTable() const { return kernels_; }
+25 -27
View File
@@ -43,8 +43,8 @@ void HSAILKernel::setWorkGroupInfo(const uint32_t privateSegmentSize,
// Make sure runtime matches HW alignment, which is 256 scratch regs (DWORDs) per wave
constexpr uint32_t ScratchRegAlignment = 256;
workGroupInfo_.scratchRegs_ =
amd::alignUp((workGroupInfo_.scratchRegs_ * dev().info().wavefrontWidth_),
ScratchRegAlignment) / dev().info().wavefrontWidth_;
amd::alignUp((workGroupInfo_.scratchRegs_ * device().info().wavefrontWidth_),
ScratchRegAlignment) / device().info().wavefrontWidth_;
workGroupInfo_.privateMemSize_ = workGroupInfo_.scratchRegs_ * sizeof(uint32_t);
workGroupInfo_.localMemSize_ = workGroupInfo_.usedLDSSize_ = groupSegmentSize;
workGroupInfo_.usedSGPRs_ = numSGPRs;
@@ -52,13 +52,13 @@ void HSAILKernel::setWorkGroupInfo(const uint32_t privateSegmentSize,
workGroupInfo_.usedVGPRs_ = numVGPRs;
if (!prog().isNull()) {
workGroupInfo_.availableLDSSize_ = dev().properties().gfxipProperties.shaderCore.ldsSizePerCu;
workGroupInfo_.availableLDSSize_ = palDevice().properties().gfxipProperties.shaderCore.ldsSizePerCu;
workGroupInfo_.availableSGPRs_ =
dev().properties().gfxipProperties.shaderCore.numAvailableSgprs;
palDevice().properties().gfxipProperties.shaderCore.numAvailableSgprs;
workGroupInfo_.availableVGPRs_ =
dev().properties().gfxipProperties.shaderCore.numAvailableVgprs;
palDevice().properties().gfxipProperties.shaderCore.numAvailableVgprs;
workGroupInfo_.preferredSizeMultiple_ = workGroupInfo_.wavefrontPerSIMD_ =
dev().info().wavefrontWidth_;
device().info().wavefrontWidth_;
} else {
workGroupInfo_.availableLDSSize_ = 64 * Ki;
workGroupInfo_.availableSGPRs_ = 104;
@@ -105,7 +105,7 @@ bool HSAILKernel::aqlCreateHWInfo(amd::hsa::loader::Symbol* sym) {
}
HSAILKernel::HSAILKernel(std::string name, HSAILProgram* prog, std::string compileOptions)
: device::Kernel(prog->dev(), name, *prog),
: device::Kernel(prog->device(), name, *prog),
compileOptions_(compileOptions),
index_(0),
code_(0),
@@ -128,12 +128,12 @@ bool HSAILKernel::init(amd::hsa::loader::Symbol* sym, bool finalize) {
options.append(openClKernelName.c_str());
// Append an option so that we can selectively enable a SCOption on CZ
// whenever IOMMUv2 is enabled.
if (dev().settings().svmFineGrainSystem_) {
if (palNullDevice().settings().svmFineGrainSystem_) {
options.append(" -sc-xnack-iommu");
}
error = aclCompile(dev().compiler(), prog().binaryElf(), options.c_str(), ACL_TYPE_CG,
error = aclCompile(palNullDevice().compiler(), prog().binaryElf(), options.c_str(), ACL_TYPE_CG,
ACL_TYPE_ISA, nullptr);
buildLog_ += aclGetCompilerLog(dev().compiler());
buildLog_ += aclGetCompilerLog(palNullDevice().compiler());
if (error != ACL_SUCCESS) {
LogError("Failed to finalize kernel");
return false;
@@ -144,7 +144,7 @@ bool HSAILKernel::init(amd::hsa::loader::Symbol* sym, bool finalize) {
// Pull out metadata from the ELF
size_t sizeOfArgList;
error = aclQueryInfo(dev().compiler(), prog().binaryElf(), RT_ARGUMENT_ARRAY,
error = aclQueryInfo(palNullDevice().compiler(), prog().binaryElf(), RT_ARGUMENT_ARRAY,
openClKernelName.c_str(), nullptr, &sizeOfArgList);
if (error != ACL_SUCCESS) {
return false;
@@ -154,7 +154,7 @@ bool HSAILKernel::init(amd::hsa::loader::Symbol* sym, bool finalize) {
if (nullptr == aclArgList) {
return false;
}
error = aclQueryInfo(dev().compiler(), prog().binaryElf(), RT_ARGUMENT_ARRAY,
error = aclQueryInfo(palNullDevice().compiler(), prog().binaryElf(), RT_ARGUMENT_ARRAY,
openClKernelName.c_str(), aclArgList, &sizeOfArgList);
if (error != ACL_SUCCESS) {
return false;
@@ -164,30 +164,30 @@ bool HSAILKernel::init(amd::hsa::loader::Symbol* sym, bool finalize) {
delete[] aclArgList;
size_t sizeOfWorkGroupSize;
error = aclQueryInfo(dev().compiler(), prog().binaryElf(), RT_WORK_GROUP_SIZE,
error = aclQueryInfo(palNullDevice().compiler(), prog().binaryElf(), RT_WORK_GROUP_SIZE,
openClKernelName.c_str(), nullptr, &sizeOfWorkGroupSize);
if (error != ACL_SUCCESS) {
return false;
}
error = aclQueryInfo(dev().compiler(), prog().binaryElf(), RT_WORK_GROUP_SIZE,
error = aclQueryInfo(palNullDevice().compiler(), prog().binaryElf(), RT_WORK_GROUP_SIZE,
openClKernelName.c_str(), workGroupInfo_.compileSize_, &sizeOfWorkGroupSize);
if (error != ACL_SUCCESS) {
return false;
}
// Copy wavefront size
workGroupInfo_.wavefrontSize_ = dev().info().wavefrontWidth_;
workGroupInfo_.wavefrontSize_ = device().info().wavefrontWidth_;
// Find total workgroup size
if (workGroupInfo_.compileSize_[0] != 0) {
workGroupInfo_.size_ = workGroupInfo_.compileSize_[0] * workGroupInfo_.compileSize_[1] *
workGroupInfo_.compileSize_[2];
} else {
workGroupInfo_.size_ = dev().info().preferredWorkGroupSize_;
workGroupInfo_.size_ = device().info().preferredWorkGroupSize_;
}
// Pull out printf metadata from the ELF
size_t sizeOfPrintfList;
error = aclQueryInfo(dev().compiler(), prog().binaryElf(), RT_GPU_PRINTF_ARRAY,
error = aclQueryInfo(palNullDevice().compiler(), prog().binaryElf(), RT_GPU_PRINTF_ARRAY,
openClKernelName.c_str(), nullptr, &sizeOfPrintfList);
if (error != ACL_SUCCESS) {
return false;
@@ -199,7 +199,7 @@ bool HSAILKernel::init(amd::hsa::loader::Symbol* sym, bool finalize) {
if (nullptr == aclPrintfList) {
return false;
}
error = aclQueryInfo(dev().compiler(), prog().binaryElf(), RT_GPU_PRINTF_ARRAY,
error = aclQueryInfo(palNullDevice().compiler(), prog().binaryElf(), RT_GPU_PRINTF_ARRAY,
openClKernelName.c_str(), aclPrintfList, &sizeOfPrintfList);
if (error != ACL_SUCCESS) {
return false;
@@ -213,7 +213,7 @@ bool HSAILKernel::init(amd::hsa::loader::Symbol* sym, bool finalize) {
aclMetadata md;
md.enqueue_kernel = false;
size_t sizeOfDeviceEnqueue = sizeof(md.enqueue_kernel);
error = aclQueryInfo(dev().compiler(), prog().binaryElf(), RT_DEVICE_ENQUEUE,
error = aclQueryInfo(palNullDevice().compiler(), prog().binaryElf(), RT_DEVICE_ENQUEUE,
openClKernelName.c_str(), &md.enqueue_kernel, &sizeOfDeviceEnqueue);
if (error != ACL_SUCCESS) {
return false;
@@ -222,7 +222,7 @@ bool HSAILKernel::init(amd::hsa::loader::Symbol* sym, bool finalize) {
md.kernel_index = -1;
size_t sizeOfIndex = sizeof(md.kernel_index);
error = aclQueryInfo(dev().compiler(), prog().binaryElf(), RT_KERNEL_INDEX,
error = aclQueryInfo(palNullDevice().compiler(), prog().binaryElf(), RT_KERNEL_INDEX,
openClKernelName.c_str(), &md.kernel_index, &sizeOfIndex);
if (error != ACL_SUCCESS) {
return false;
@@ -230,7 +230,7 @@ bool HSAILKernel::init(amd::hsa::loader::Symbol* sym, bool finalize) {
index_ = md.kernel_index;
size_t sizeOfWavesPerSimdHint = sizeof(workGroupInfo_.wavesPerSimdHint_);
error = aclQueryInfo(dev().compiler(), prog().binaryElf(), RT_WAVES_PER_SIMD_HINT,
error = aclQueryInfo(palNullDevice().compiler(), prog().binaryElf(), RT_WAVES_PER_SIMD_HINT,
openClKernelName.c_str(), &workGroupInfo_.wavesPerSimdHint_,
&sizeOfWavesPerSimdHint);
if (error != ACL_SUCCESS) {
@@ -240,7 +240,7 @@ bool HSAILKernel::init(amd::hsa::loader::Symbol* sym, bool finalize) {
waveLimiter_.enable();
size_t sizeOfWorkGroupSizeHint = sizeof(workGroupInfo_.compileSizeHint_);
error = aclQueryInfo(dev().compiler(), prog().binaryElf(), RT_WORK_GROUP_SIZE_HINT,
error = aclQueryInfo(palNullDevice().compiler(), prog().binaryElf(), RT_WORK_GROUP_SIZE_HINT,
openClKernelName.c_str(), workGroupInfo_.compileSizeHint_,
&sizeOfWorkGroupSizeHint);
if (error != ACL_SUCCESS) {
@@ -248,7 +248,7 @@ bool HSAILKernel::init(amd::hsa::loader::Symbol* sym, bool finalize) {
}
size_t sizeOfVecTypeHint;
error = aclQueryInfo(dev().compiler(), prog().binaryElf(), RT_VEC_TYPE_HINT,
error = aclQueryInfo(palNullDevice().compiler(), prog().binaryElf(), RT_VEC_TYPE_HINT,
openClKernelName.c_str(), NULL, &sizeOfVecTypeHint);
if (error != ACL_SUCCESS) {
return false;
@@ -259,7 +259,7 @@ bool HSAILKernel::init(amd::hsa::loader::Symbol* sym, bool finalize) {
if (NULL == VecTypeHint) {
return false;
}
error = aclQueryInfo(dev().compiler(), prog().binaryElf(), RT_VEC_TYPE_HINT,
error = aclQueryInfo(palNullDevice().compiler(), prog().binaryElf(), RT_VEC_TYPE_HINT,
openClKernelName.c_str(), VecTypeHint, &sizeOfVecTypeHint);
if (error != ACL_SUCCESS) {
return false;
@@ -273,8 +273,6 @@ bool HSAILKernel::init(amd::hsa::loader::Symbol* sym, bool finalize) {
return true;
}
const Device& HSAILKernel::dev() const { return reinterpret_cast<const Device&>(dev_); }
const HSAILProgram& HSAILKernel::prog() const {
return reinterpret_cast<const HSAILProgram&>(prog_);
}
@@ -476,7 +474,7 @@ bool LightningKernel::init() {
workGroupInfo()->usedSGPRs_, workGroupInfo()->usedVGPRs_);
// Copy wavefront size
workGroupInfo_.wavefrontSize_ = dev().info().wavefrontWidth_;
workGroupInfo_.wavefrontSize_ = device().info().wavefrontWidth_;
if (workGroupInfo_.size_ == 0) {
return false;
+5 -2
View File
@@ -68,8 +68,11 @@ class HSAILKernel : public device::Kernel {
//! finalizes the kernel if needed
bool init(amd::hsa::loader::Symbol* sym, bool finalize = false);
//! Returns PAL device object, associated with this kernel
const Device& dev() const;
//! Returns PAL, possibly null, device object, associated with this kernel.
const NullDevice& palNullDevice() const { return reinterpret_cast<const NullDevice&>(dev_); }
//! Returns PAL device object, associated with this kernel which must not be the null device.
const Device& palDevice() const { return reinterpret_cast<const Device&>(dev_); }
//! Returns HSA program associated with this kernel
const HSAILProgram& prog() const;
+14 -14
View File
@@ -70,8 +70,8 @@ bool Segment::alloc(HSAILProgram& prog, amdgpu_hsa_elf_segment_t segment, size_t
bool zero) {
align = amd::alignUp(align, sizeof(uint32_t));
amd::Memory* amd_mem_obj = new (prog.dev().context())
amd::Buffer(prog.dev().context(), 0, amd::alignUp(size, align),
amd::Memory* amd_mem_obj = new (prog.palDevice().context())
amd::Buffer(prog.palDevice().context(), 0, amd::alignUp(size, align),
// HIP requires SVM allocation for segment code due to possible global variable
// access and global variables are a part of code segment with the latest loader
amd::IS_HIP ? reinterpret_cast<void*>(1) : nullptr);
@@ -87,11 +87,11 @@ bool Segment::alloc(HSAILProgram& prog, amdgpu_hsa_elf_segment_t segment, size_t
return false;
}
gpuAccess_ = static_cast<pal::Memory*>(amd_mem_obj->getDeviceMemory(prog.dev(), false));
gpuAccess_ = static_cast<pal::Memory*>(amd_mem_obj->getDeviceMemory(prog.palDevice(), false));
if (segment == AMDGPU_HSA_SEGMENT_CODE_AGENT) {
void* ptr = nullptr;
cpuAccess_ = new pal::Memory(prog.dev(), amd::alignUp(size, align));
cpuAccess_ = new pal::Memory(prog.palDevice(), amd::alignUp(size, align));
if ((cpuAccess_ == nullptr) || !cpuAccess_->create(pal::Resource::Remote)) {
delete cpuAccess_;
cpuAccess_ = nullptr;
@@ -111,8 +111,8 @@ bool Segment::alloc(HSAILProgram& prog, amdgpu_hsa_elf_segment_t segment, size_t
if ((cpuAccess_ == nullptr) && zero && !prog.isInternal()) {
uint64_t pattern = 0;
size_t patternSize = ((size % sizeof(pattern)) == 0) ? sizeof(pattern) : 1;
prog.dev().xferMgr().fillBuffer(*gpuAccess_, &pattern, patternSize, amd::Coord3D(0),
amd::Coord3D(size));
prog.palDevice().xferMgr().fillBuffer(*gpuAccess_, &pattern, patternSize, amd::Coord3D(0),
amd::Coord3D(size));
}
switch (segment) {
@@ -266,7 +266,7 @@ bool HSAILProgram::setKernels(amd::option::Options* options, void* binary, size_
}
size_t kernelNamesSize = 0;
acl_error errorCode = aclQueryInfo(dev().compiler(), binaryElf_, RT_KERNEL_NAMES, nullptr,
acl_error errorCode = aclQueryInfo(palNullDevice().compiler(), binaryElf_, RT_KERNEL_NAMES, nullptr,
nullptr, &kernelNamesSize);
if (errorCode != ACL_SUCCESS) {
buildLog_ += "Error: Querying of kernel names size from the binary failed.\n";
@@ -274,7 +274,7 @@ bool HSAILProgram::setKernels(amd::option::Options* options, void* binary, size_
}
if (kernelNamesSize > 0) {
char* kernelNames = new char[kernelNamesSize];
errorCode = aclQueryInfo(dev().compiler(), binaryElf_, RT_KERNEL_NAMES, nullptr, kernelNames,
errorCode = aclQueryInfo(palNullDevice().compiler(), binaryElf_, RT_KERNEL_NAMES, nullptr, kernelNames,
&kernelNamesSize);
if (errorCode != ACL_SUCCESS) {
buildLog_ += "Error: Querying of kernel names from the binary failed.\n";
@@ -326,7 +326,7 @@ bool HSAILProgram::createBinary(amd::option::Options* options) { return true; }
bool HSAILProgram::allocKernelTable() {
uint size = kernels().size() * sizeof(size_t);
kernels_ = new pal::Memory(dev(), size);
kernels_ = new pal::Memory(palDevice(), size);
// Initialize kernel table
if ((kernels_ == nullptr) || !kernels_->create(Resource::RemoteUSWC)) {
delete kernels_;
@@ -352,7 +352,7 @@ const aclTargetInfo& HSAILProgram::info(const char* str) {
arch = "hsail64";
}
info_ = aclGetTargetInfo(arch.c_str(),
(str && str[0] == '\0' ? dev().hwInfo()->machineTarget_ : str), &err);
(str && str[0] == '\0' ? palNullDevice().hwInfo()->machineTarget_ : str), &err);
if (err != ACL_SUCCESS) {
LogWarning("aclGetTargetInfo failed");
}
@@ -523,9 +523,9 @@ hsa_isa_t PALHSALoaderContext::IsaFromName(const char* name) {
}
bool PALHSALoaderContext::IsaSupportedByAgent(hsa_agent_t agent, hsa_isa_t isa) {
uint32_t gfxipVersion = program_->dev().settings().useLightning_
? program_->dev().hwInfo()->gfxipVersionLC_
: program_->dev().hwInfo()->gfxipVersion_;
uint32_t gfxipVersion = program_->palNullDevice().settings().useLightning_
? program_->palNullDevice().hwInfo()->gfxipVersionLC_
: program_->palNullDevice().hwInfo()->gfxipVersion_;
uint32_t majorSrc = gfxipVersion / 10;
uint32_t minorSrc = gfxipVersion % 10;
@@ -671,7 +671,7 @@ hsa_status_t PALHSALoaderContext::SamplerCreate(
assert(false);
return HSA_STATUS_ERROR_INVALID_ARGUMENT;
}
pal::Sampler* sampler = new pal::Sampler(program_->dev());
pal::Sampler* sampler = new pal::Sampler(program_->palDevice());
if (!sampler || !sampler->create(state)) {
delete sampler;
return HSA_STATUS_ERROR;
+10 -3
View File
@@ -157,8 +157,15 @@ class HSAILProgram : public device::Program {
const std::vector<Memory*>& globalStores() const { return globalStores_; }
//! Return a typecasted PAL device
pal::Device& dev() { return const_cast<pal::Device&>(static_cast<const pal::Device&>(device())); }
//! Return a typecasted PAL null device.
pal::NullDevice& palNullDevice() {
return const_cast<pal::NullDevice&>(static_cast<const pal::NullDevice&>(device()));
}
//! Return a typecasted PAL device. The device must not be the NullDevice.
pal::Device& palDevice() {
return const_cast<pal::Device&>(static_cast<const pal::Device&>(device()));
}
//! Returns GPU kernel table
const Memory* kernelTable() const { return kernels_; }
@@ -241,7 +248,7 @@ class LightningProgram : public HSAILProgram {
LightningProgram(NullDevice& device, amd::Program& owner) : HSAILProgram(device, owner) {
isLC_ = true;
isHIP_ = (owner.language() == amd::Program::HIP);
machineTarget_ = dev().hwInfo()->machineTargetLC_;
machineTarget_ = palNullDevice().hwInfo()->machineTargetLC_;
}
LightningProgram(Device& device, amd::Program& owner) : HSAILProgram(device, owner) {
+9 -9
View File
@@ -31,7 +31,7 @@ Kernel::Kernel(std::string name, Program* prog, const uint64_t& kernelCodeHandle
const uint32_t workgroupGroupSegmentByteSize,
const uint32_t workitemPrivateSegmentByteSize, const uint32_t kernargSegmentByteSize,
const uint32_t kernargSegmentAlignment)
: device::Kernel(prog->dev(), name, *prog) {
: device::Kernel(prog->device(), name, *prog) {
kernelCodeHandle_ = kernelCodeHandle;
workgroupGroupSegmentByteSize_ = workgroupGroupSegmentByteSize;
workitemPrivateSegmentByteSize_ = workitemPrivateSegmentByteSize;
@@ -40,7 +40,7 @@ Kernel::Kernel(std::string name, Program* prog, const uint64_t& kernelCodeHandle
}
Kernel::Kernel(std::string name, Program* prog)
: device::Kernel(prog->dev(), name, *prog) {
: device::Kernel(prog->device(), name, *prog) {
}
#if defined(USE_COMGR_LIBRARY)
@@ -57,10 +57,10 @@ bool LightningKernel::init() {
symbolName_ = name();
}
kernargSegmentAlignment_ =
amd::alignUp(std::max(kernargSegmentAlignment_, 128u), dev().info().globalMemCacheLineSize_);
amd::alignUp(std::max(kernargSegmentAlignment_, 128u), device().info().globalMemCacheLineSize_);
// Set the workgroup information for the kernel
workGroupInfo_.availableLDSSize_ = dev().info().localMemSizePerCU_;
workGroupInfo_.availableLDSSize_ = device().info().localMemSizePerCU_;
assert(workGroupInfo_.availableLDSSize_ > 0);
if (!SetAvailableSgprVgpr()) {
@@ -155,7 +155,7 @@ bool LightningKernel::init() {
workGroupInfo_.usedLDSSize_ = workgroupGroupSegmentByteSize_;
workGroupInfo_.preferredSizeMultiple_ = wavefront_size;
workGroupInfo_.usedStackSize_ = 0;
workGroupInfo_.wavefrontPerSIMD_ = program()->dev().info().maxWorkItemSizes_[0] / wavefront_size;
workGroupInfo_.wavefrontPerSIMD_ = program()->rocDevice().info().maxWorkItemSizes_[0] / wavefront_size;
workGroupInfo_.wavefrontSize_ = wavefront_size;
if (workGroupInfo_.size_ == 0) {
return false;
@@ -181,7 +181,7 @@ bool HSAILKernel::init() {
hsa_agent_t hsaDevice = program()->hsaDevice();
// Pull out metadata from the ELF
size_t sizeOfArgList;
aclCompiler* compileHandle = program()->dev().compiler();
aclCompiler* compileHandle = program()->rocDevice().compiler();
std::string openClKernelName("&__OpenCL_" + name() + "_kernel");
errorCode = aclQueryInfo(compileHandle, program()->binaryElf(), RT_ARGUMENT_ARRAY,
openClKernelName.c_str(), nullptr, &sizeOfArgList);
@@ -202,7 +202,7 @@ bool HSAILKernel::init() {
// Set the workgroup information for the kernel
memset(&workGroupInfo_, 0, sizeof(workGroupInfo_));
workGroupInfo_.availableLDSSize_ = program()->dev().info().localMemSizePerCU_;
workGroupInfo_.availableLDSSize_ = program()->rocDevice().info().localMemSizePerCU_;
assert(workGroupInfo_.availableLDSSize_ > 0);
workGroupInfo_.availableSGPRs_ = 104;
workGroupInfo_.availableVGPRs_ = 256;
@@ -250,13 +250,13 @@ bool HSAILKernel::init() {
}
workGroupInfo_.usedStackSize_ = 0;
workGroupInfo_.wavefrontPerSIMD_ = program()->dev().info().maxWorkItemSizes_[0] / wavefront_size;
workGroupInfo_.wavefrontPerSIMD_ = program()->rocDevice().info().maxWorkItemSizes_[0] / wavefront_size;
workGroupInfo_.wavefrontSize_ = wavefront_size;
if (workGroupInfo_.compileSize_[0] != 0) {
workGroupInfo_.size_ = workGroupInfo_.compileSize_[0] * workGroupInfo_.compileSize_[1] *
workGroupInfo_.compileSize_[2];
} else {
workGroupInfo_.size_ = program()->dev().info().preferredWorkGroupSize_;
workGroupInfo_.size_ = program()->rocDevice().info().preferredWorkGroupSize_;
}
// Pull out printf metadata from the ELF
+8 -8
View File
@@ -127,7 +127,7 @@ bool Program::initClBinary(char* binaryIn, size_t size) {
bool Program::defineGlobalVar(const char* name, void* dptr) {
hsa_status_t status = HSA_STATUS_SUCCESS;
hsa_agent_t hsa_device = dev().getBackendDevice();
hsa_agent_t hsa_device = rocDevice().getBackendDevice();
status = hsa_executable_agent_global_variable_define(hsaExecutable_, hsa_device, name, dptr);
if (status != HSA_STATUS_SUCCESS) {
@@ -153,7 +153,7 @@ bool Program::createGlobalVarObj(amd::Memory** amd_mem_obj, void** device_pptr,
return false;
}
hsa_device= dev().getBackendDevice();
hsa_device = rocDevice().getBackendDevice();
/* Find HSA Symbol by name */
status = hsa_executable_get_symbol_by_name(hsaExecutable_, global_name, &hsa_device,
@@ -206,7 +206,7 @@ bool Program::createGlobalVarObj(amd::Memory** amd_mem_obj, void** device_pptr,
return false;
}
roc_device = static_cast<const roc::Device*>(&dev());
roc_device = &(rocDevice());
*amd_mem_obj = new(roc_device->context()) amd::Buffer(roc_device->context(), 0, *bytes,
*device_pptr);
@@ -227,7 +227,7 @@ bool Program::createGlobalVarObj(amd::Memory** amd_mem_obj, void** device_pptr,
}
HSAILProgram::HSAILProgram(roc::NullDevice& device, amd::Program& owner) : roc::Program(device, owner) {
machineTarget_ = dev().deviceInfo().machineTarget_;
machineTarget_ = rocNullDevice().deviceInfo().machineTarget_;
}
HSAILProgram::~HSAILProgram() {
@@ -268,7 +268,7 @@ bool HSAILProgram::setKernels(amd::option::Options* options, void* binary, size_
#if defined(WITH_COMPILER_LIB)
// Stop compilation if it is an offline device - HSA runtime does not
// support ISA compiled offline
if (!dev().isOnline()) {
if (!device().isOnline()) {
return true;
}
@@ -294,7 +294,7 @@ bool HSAILProgram::setKernels(amd::option::Options* options, void* binary, size_
return false;
}
hsa_agent_t hsaDevice = dev().getBackendDevice();
hsa_agent_t hsaDevice = rocDevice().getBackendDevice();
status = hsa_executable_load_agent_code_object(hsaExecutable_, hsaDevice, hsaCodeObjectReader_,
nullptr, nullptr);
if (status != HSA_STATUS_SUCCESS) {
@@ -432,7 +432,7 @@ LightningProgram::LightningProgram(roc::NullDevice& device, amd::Program& owner)
: roc::Program(device, owner) {
isLC_ = true;
isHIP_ = (owner.language() == amd::Program::HIP);
machineTarget_ = dev().deviceInfo().machineTargetLC_;
machineTarget_ = rocNullDevice().deviceInfo().machineTargetLC_;
}
bool LightningProgram::createBinary(amd::option::Options* options) {
@@ -477,7 +477,7 @@ bool LightningProgram::setKernels(amd::option::Options* options, void* binary, s
return false;
}
hsa_agent_t agent = dev().getBackendDevice();
hsa_agent_t agent = rocDevice().getBackendDevice();
hsa_status_t status;
status = hsa_executable_create_alt(HSA_PROFILE_FULL, HSA_DEFAULT_FLOAT_ROUNDING_MODE_DEFAULT,
+5 -2
View File
@@ -49,10 +49,13 @@ class Program : public device::Program {
virtual bool initClBinary(char* binaryIn, size_t size);
//! Return a typecasted GPU device
const NullDevice& dev() const { return static_cast<const NullDevice&>(device()); }
const NullDevice& rocNullDevice() const { return static_cast<const NullDevice&>(device()); }
//! Return a typecasted GPU device
const Device& rocDevice() const { return static_cast<const Device&>(device()); }
//! Returns the hsaBinary associated with the program
hsa_agent_t hsaDevice() const { return dev().getBackendDevice(); }
hsa_agent_t hsaDevice() const { return rocNullDevice().getBackendDevice(); }
hsa_executable_t hsaExecutable() const { return hsaExecutable_; }