Rename device access functions
Rename functions that access devices to reflect the derived device they return. This includes the base device::Device and the derived gpu/pal/roc device classes in both NullDevice and Device forms. Change to use the least derived versions to clarify what operations will be available. Change-Id: I1abb6bfed7efa24852bc8d0d49acaea357d8b5d0
This commit is contained in:
@@ -674,7 +674,7 @@ void Kernel::FindLocalWorkSize(size_t workDim, const amd::NDRange& gblWorkSize,
|
||||
if (workGroupInfo()->compileSize_[0] == 0) {
|
||||
// Find the default local workgroup size, if it wasn't specified
|
||||
if (lclWorkSize[0] == 0) {
|
||||
if ((dev().settings().overrideLclSet & (1 << (workDim - 1))) == 0) {
|
||||
if ((device().settings().overrideLclSet & (1 << (workDim - 1))) == 0) {
|
||||
// Find threads per group
|
||||
size_t thrPerGrp = workGroupInfo()->size_;
|
||||
|
||||
@@ -685,7 +685,7 @@ void Kernel::FindLocalWorkSize(size_t workDim, const amd::NDRange& gblWorkSize,
|
||||
// and it's 2 or 3-dimensional workload
|
||||
(workDim > 1) && (((gblWorkSize[0] % 16) == 0) && ((gblWorkSize[1] % 16) == 0))) {
|
||||
// Use 8x8 workgroup size if kernel has image writes
|
||||
if (flags_.imageWriteEna_ || (thrPerGrp != dev().info().preferredWorkGroupSize_)) {
|
||||
if (flags_.imageWriteEna_ || (thrPerGrp != device().info().preferredWorkGroupSize_)) {
|
||||
lclWorkSize[0] = 8;
|
||||
lclWorkSize[1] = 8;
|
||||
}
|
||||
@@ -709,7 +709,7 @@ void Kernel::FindLocalWorkSize(size_t workDim, const amd::NDRange& gblWorkSize,
|
||||
}
|
||||
|
||||
// Assuming DWORD access
|
||||
const uint cacheLineMatch = dev().info().globalMemCacheLineSize_ >> 2;
|
||||
const uint cacheLineMatch = device().info().globalMemCacheLineSize_ >> 2;
|
||||
|
||||
// Check if we couldn't find optimal workload
|
||||
if (((lclWorkSize.product() % workGroupInfo()->wavefrontSize_) != 0) ||
|
||||
@@ -1095,7 +1095,7 @@ bool Kernel::GetAttrCodePropMetadata() {
|
||||
InitParameters(kernelMetaNode);
|
||||
|
||||
// Set the workgroup information for the kernel
|
||||
workGroupInfo_.availableLDSSize_ = dev().info().localMemSizePerCU_;
|
||||
workGroupInfo_.availableLDSSize_ = device().info().localMemSizePerCU_;
|
||||
workGroupInfo_.availableSGPRs_ = 104;
|
||||
workGroupInfo_.availableVGPRs_ = 256;
|
||||
|
||||
|
||||
@@ -407,7 +407,7 @@ class Kernel : public amd::HeapObject {
|
||||
};
|
||||
|
||||
//! Returns GPU device object, associated with this kernel
|
||||
const amd::Device& dev() const { return dev_; }
|
||||
const amd::Device& device() const { return dev_; }
|
||||
|
||||
void setVecTypeHint(const std::string& hint) { workGroupInfo_.compileVecTypeHint_ = hint; }
|
||||
|
||||
|
||||
@@ -42,7 +42,7 @@ bool NullProgram::compileImpl(const std::string& src,
|
||||
const char** headerIncludeNames, amd::option::Options* options) {
|
||||
std::string sourceCode = src;
|
||||
|
||||
if (dev().settings().debugFlags_ & Settings::CheckForILSource) {
|
||||
if (gpuNullDevice().settings().debugFlags_ & Settings::CheckForILSource) {
|
||||
size_t inc = sourceCode.find("il_cs_", 0);
|
||||
if (inc != std::string::npos) {
|
||||
// CL program is an IL program
|
||||
@@ -55,7 +55,7 @@ bool NullProgram::compileImpl(const std::string& src,
|
||||
std::string tempFolder = amd::Os::getTempPath();
|
||||
std::string tempFileName = amd::Os::getTempFileName();
|
||||
|
||||
if (dev().settings().debugFlags_ & Settings::StubCLPrograms) {
|
||||
if (gpuNullDevice().settings().debugFlags_ & Settings::StubCLPrograms) {
|
||||
std::stringstream fileName;
|
||||
std::fstream stubRead;
|
||||
// Dump the IL function
|
||||
@@ -130,7 +130,7 @@ bool NullProgram::compileImpl(const std::string& src,
|
||||
}
|
||||
|
||||
if (ACL_SUCCESS !=
|
||||
aclInsertSection(dev().amdilCompiler(), bin, sourceCode.c_str(), sourceCode.size(), aclSOURCE)) {
|
||||
aclInsertSection(gpuNullDevice().amdilCompiler(), bin, sourceCode.c_str(), sourceCode.size(), aclSOURCE)) {
|
||||
LogWarning("aclInsertSection failed");
|
||||
aclBinaryFini(bin);
|
||||
return false;
|
||||
@@ -145,7 +145,7 @@ bool NullProgram::compileImpl(const std::string& src,
|
||||
opts << options->origOptionStr.c_str();
|
||||
|
||||
if (options->origOptionStr.find("-cl-std=CL") == std::string::npos) {
|
||||
switch (dev().settings().oclVersion_) {
|
||||
switch (gpuNullDevice().settings().oclVersion_) {
|
||||
case OpenCL10:
|
||||
opts << " -cl-std=CL1.0";
|
||||
break;
|
||||
@@ -179,15 +179,15 @@ bool NullProgram::compileImpl(const std::string& src,
|
||||
opts << " -I" << tempFolder;
|
||||
}
|
||||
|
||||
if (!dev().settings().imageSupport_) {
|
||||
if (!gpuNullDevice().settings().imageSupport_) {
|
||||
opts << " -fno-image-support";
|
||||
}
|
||||
|
||||
if (dev().settings().reportFMAF_) {
|
||||
if (gpuNullDevice().settings().reportFMAF_) {
|
||||
opts << " -mfast-fmaf";
|
||||
}
|
||||
|
||||
if (dev().settings().reportFMA_) {
|
||||
if (gpuNullDevice().settings().reportFMA_) {
|
||||
opts << " -mfast-fma";
|
||||
}
|
||||
|
||||
@@ -206,10 +206,10 @@ bool NullProgram::compileImpl(const std::string& src,
|
||||
pos = newOpt.find("-fno-bin-llvmir");
|
||||
}
|
||||
|
||||
err = aclCompile(dev().amdilCompiler(), bin, newOpt.c_str(), ACL_TYPE_OPENCL, ACL_TYPE_LLVMIR_BINARY,
|
||||
err = aclCompile(gpuNullDevice().amdilCompiler(), bin, newOpt.c_str(), ACL_TYPE_OPENCL, ACL_TYPE_LLVMIR_BINARY,
|
||||
NULL);
|
||||
|
||||
buildLog_ += aclGetCompilerLog(dev().amdilCompiler());
|
||||
buildLog_ += aclGetCompilerLog(gpuNullDevice().amdilCompiler());
|
||||
|
||||
if (err != ACL_SUCCESS) {
|
||||
LogWarning("aclCompile failed");
|
||||
@@ -218,7 +218,7 @@ bool NullProgram::compileImpl(const std::string& src,
|
||||
}
|
||||
|
||||
size_t len = 0;
|
||||
const void* ir = aclExtractSection(dev().amdilCompiler(), bin, &len, aclLLVMIR, &err);
|
||||
const void* ir = aclExtractSection(gpuNullDevice().amdilCompiler(), bin, &len, aclLLVMIR, &err);
|
||||
if (err != ACL_SUCCESS) {
|
||||
LogWarning("aclExtractSection failed");
|
||||
aclBinaryFini(bin);
|
||||
@@ -284,7 +284,7 @@ int NullProgram::compileBinaryToIL(amd::option::Options* options) {
|
||||
}
|
||||
|
||||
if (ACL_SUCCESS !=
|
||||
aclInsertSection(dev().amdilCompiler(), bin, llvmBinary_.data(), llvmBinary_.size(), spirFlag)) {
|
||||
aclInsertSection(gpuNullDevice().amdilCompiler(), bin, llvmBinary_.data(), llvmBinary_.size(), spirFlag)) {
|
||||
LogWarning("aclInsertSection failed");
|
||||
aclBinaryFini(bin);
|
||||
return CL_BUILD_PROGRAM_FAILURE;
|
||||
@@ -294,7 +294,7 @@ int NullProgram::compileBinaryToIL(amd::option::Options* options) {
|
||||
std::string optionStr = options->origOptionStr;
|
||||
if (options->origOptionStr.find("kernel-arg-alignment") == std::string::npos) {
|
||||
char s[256];
|
||||
sprintf(s, " -Wb,-kernel-arg-alignment=%d", dev().info().memBaseAddrAlign_ / 8);
|
||||
sprintf(s, " -Wb,-kernel-arg-alignment=%d", gpuNullDevice().info().memBaseAddrAlign_ / 8);
|
||||
optionStr += s;
|
||||
}
|
||||
|
||||
@@ -308,8 +308,8 @@ int NullProgram::compileBinaryToIL(amd::option::Options* options) {
|
||||
type = ACL_TYPE_ISA;
|
||||
}
|
||||
|
||||
err = aclCompile(dev().amdilCompiler(), bin, optionStr.c_str(), aclTypeBinaryUsed, type, NULL);
|
||||
buildLog_ += aclGetCompilerLog(dev().amdilCompiler());
|
||||
err = aclCompile(gpuNullDevice().amdilCompiler(), bin, optionStr.c_str(), aclTypeBinaryUsed, type, NULL);
|
||||
buildLog_ += aclGetCompilerLog(gpuNullDevice().amdilCompiler());
|
||||
|
||||
if (err != ACL_SUCCESS) {
|
||||
LogWarning("aclCompile failed");
|
||||
@@ -332,7 +332,7 @@ int NullProgram::compileBinaryToIL(amd::option::Options* options) {
|
||||
}
|
||||
|
||||
size_t len = 0;
|
||||
const void* amdil = aclExtractSection(dev().amdilCompiler(), bin, &len, aclCODEGEN, &err);
|
||||
const void* amdil = aclExtractSection(gpuNullDevice().amdilCompiler(), bin, &len, aclCODEGEN, &err);
|
||||
if (err != ACL_SUCCESS) {
|
||||
LogWarning("aclExtractSection failed");
|
||||
aclBinaryFini(bin);
|
||||
|
||||
@@ -3045,7 +3045,7 @@ void HSAILKernel::initHsailArgs(const aclArgData* aclArg) {
|
||||
|
||||
HSAILKernel::HSAILKernel(std::string name, HSAILProgram* prog, std::string compileOptions,
|
||||
uint extraArgsNum)
|
||||
: device::Kernel(prog->dev(), name, *prog),
|
||||
: device::Kernel(prog->device(), name, *prog),
|
||||
compileOptions_(compileOptions),
|
||||
index_(0),
|
||||
code_(NULL),
|
||||
|
||||
@@ -115,7 +115,7 @@ NullKernel* Program::createKernel(const std::string& name, const Kernel::InitDat
|
||||
|
||||
*created = false;
|
||||
// Create a GPU kernel
|
||||
Kernel* gpuKernel = new Kernel(name, static_cast<const gpu::Device&>(device()), *this, initData);
|
||||
Kernel* gpuKernel = new Kernel(name, gpuDevice(), *this, initData);
|
||||
|
||||
if (gpuKernel == NULL) {
|
||||
buildLog_ += "new Kernel() failed";
|
||||
@@ -512,7 +512,7 @@ bool NullProgram::linkImpl(const std::vector<device::Program*>& inputPrograms,
|
||||
} else {
|
||||
aclTypeUsed = aclLLVMIR;
|
||||
}
|
||||
err = aclInsertSection(dev().amdilCompiler(), libs[i], llvmBinaries[i]->data(),
|
||||
err = aclInsertSection(gpuNullDevice().amdilCompiler(), libs[i], llvmBinaries[i]->data(),
|
||||
llvmBinaries[i]->size(), aclTypeUsed);
|
||||
if (err != ACL_SUCCESS) {
|
||||
LogWarning("aclInsertSection failed");
|
||||
@@ -529,10 +529,10 @@ bool NullProgram::linkImpl(const std::vector<device::Program*>& inputPrograms,
|
||||
unsigned int numLibs = libs.size() - 1;
|
||||
|
||||
if (numLibs > 0) {
|
||||
err = aclLink(dev().amdilCompiler(), libs[0], numLibs, &libs[1], ACL_TYPE_LLVMIR_BINARY,
|
||||
err = aclLink(gpuNullDevice().amdilCompiler(), libs[0], numLibs, &libs[1], ACL_TYPE_LLVMIR_BINARY,
|
||||
"-create-library", NULL);
|
||||
|
||||
buildLog_ += aclGetCompilerLog(dev().amdilCompiler());
|
||||
buildLog_ += aclGetCompilerLog(gpuNullDevice().amdilCompiler());
|
||||
|
||||
if (err != ACL_SUCCESS) {
|
||||
LogWarning("aclLink failed");
|
||||
@@ -549,7 +549,7 @@ bool NullProgram::linkImpl(const std::vector<device::Program*>& inputPrograms,
|
||||
} else {
|
||||
aclTypeUsed = aclLLVMIR;
|
||||
}
|
||||
const void* llvmir = aclExtractSection(dev().amdilCompiler(), libs[0], &size, aclTypeUsed, &err);
|
||||
const void* llvmir = aclExtractSection(gpuNullDevice().amdilCompiler(), libs[0], &size, aclTypeUsed, &err);
|
||||
if (err != ACL_SUCCESS) {
|
||||
LogWarning("aclExtractSection failed");
|
||||
break;
|
||||
@@ -1456,13 +1456,13 @@ bool Program::allocGlobalData(const void* globalData, size_t dataSize, uint inde
|
||||
// so possible reallocation won't occur twice or
|
||||
// another thread could destroy a heap block,
|
||||
// while we didn't finish allocation
|
||||
amd::ScopedLock k(dev().lockAsyncOps());
|
||||
amd::ScopedLock k(gpuDevice().lockAsyncOps());
|
||||
|
||||
// Allocate memory for the global data store
|
||||
glbData_ = dev().createScratchBuffer(amd::alignUp(dataSize, 0x1000));
|
||||
glbData_ = gpuDevice().createScratchBuffer(amd::alignUp(dataSize, 0x1000));
|
||||
dataStore = glbData_;
|
||||
} else {
|
||||
dataStore = new Memory(dev(), amd::alignUp(dataSize, ConstBuffer::VectorSize));
|
||||
dataStore = new Memory(gpuDevice(), amd::alignUp(dataSize, ConstBuffer::VectorSize));
|
||||
|
||||
// Initialize constant buffer
|
||||
if ((dataStore == NULL) || !dataStore->create(Resource::RemoteUSWC)) {
|
||||
@@ -1478,7 +1478,7 @@ bool Program::allocGlobalData(const void* globalData, size_t dataSize, uint inde
|
||||
static const bool Entire = true;
|
||||
amd::Coord3D origin(0, 0, 0);
|
||||
amd::Coord3D region(dataSize);
|
||||
result = dev().xferMgr().writeBuffer(globalData, *dataStore, origin, region, Entire);
|
||||
result = gpuDevice().xferMgr().writeBuffer(globalData, *dataStore, origin, region, Entire);
|
||||
}
|
||||
|
||||
return result;
|
||||
@@ -1505,7 +1505,7 @@ HSAILProgram::HSAILProgram(Device& device, amd::Program& owner)
|
||||
maxScratchRegs_(0),
|
||||
executable_(NULL),
|
||||
loaderContext_(this) {
|
||||
machineTarget_ = dev().hwInfo()->targetName_;
|
||||
machineTarget_ = gpuNullDevice().hwInfo()->targetName_;
|
||||
loader_ = amd::hsa::loader::Loader::Create(&loaderContext_);
|
||||
}
|
||||
|
||||
@@ -1517,7 +1517,7 @@ HSAILProgram::HSAILProgram(NullDevice& device, amd::Program& owner)
|
||||
executable_(NULL),
|
||||
loaderContext_(this) {
|
||||
isNull_ = true;
|
||||
machineTarget_ = dev().hwInfo()->targetName_;
|
||||
machineTarget_ = gpuNullDevice().hwInfo()->targetName_;
|
||||
loader_ = amd::hsa::loader::Loader::Create(&loaderContext_);
|
||||
}
|
||||
|
||||
@@ -1578,9 +1578,9 @@ bool HSAILProgram::linkImpl(amd::option::Options* options) {
|
||||
// 1. if the program is created with binary and contains only hsail text
|
||||
case ACL_TYPE_HSAIL_TEXT: {
|
||||
std::string curOptions = options->origOptionStr + hsailOptions();
|
||||
errorCode = aclCompile(dev().hsaCompiler(), binaryElf_, curOptions.c_str(),
|
||||
errorCode = aclCompile(gpuNullDevice().hsaCompiler(), binaryElf_, curOptions.c_str(),
|
||||
continueCompileFrom, ACL_TYPE_CG, NULL);
|
||||
buildLog_ += aclGetCompilerLog(dev().hsaCompiler());
|
||||
buildLog_ += aclGetCompilerLog(gpuNullDevice().hsaCompiler());
|
||||
if (errorCode != ACL_SUCCESS) {
|
||||
buildLog_ += "Error: BRIG code generation failed.\n";
|
||||
return false;
|
||||
@@ -1602,12 +1602,12 @@ bool HSAILProgram::linkImpl(amd::option::Options* options) {
|
||||
std::string fin_options(options->origOptionStr + hsailOptions());
|
||||
// Append an option so that we can selectively enable a SCOption on CZ
|
||||
// whenever IOMMUv2 is enabled.
|
||||
if (dev().settings().svmFineGrainSystem_) {
|
||||
if (gpuNullDevice().settings().svmFineGrainSystem_) {
|
||||
fin_options.append(" -sc-xnack-iommu");
|
||||
}
|
||||
errorCode = aclCompile(dev().hsaCompiler(), binaryElf_, fin_options.c_str(), ACL_TYPE_CG,
|
||||
errorCode = aclCompile(gpuNullDevice().hsaCompiler(), binaryElf_, fin_options.c_str(), ACL_TYPE_CG,
|
||||
ACL_TYPE_ISA, NULL);
|
||||
buildLog_ += aclGetCompilerLog(dev().hsaCompiler());
|
||||
buildLog_ += aclGetCompilerLog(gpuNullDevice().hsaCompiler());
|
||||
if (errorCode != ACL_SUCCESS) {
|
||||
buildLog_ += "Error: BRIG finalization to ISA failed.\n";
|
||||
return false;
|
||||
@@ -1625,7 +1625,7 @@ bool HSAILProgram::linkImpl(amd::option::Options* options) {
|
||||
size_t size = 0;
|
||||
hsa_code_object_t code_object;
|
||||
code_object.handle = reinterpret_cast<uint64_t>(
|
||||
aclExtractSection(dev().hsaCompiler(), binaryElf_, &size, aclTEXT, &errorCode));
|
||||
aclExtractSection(gpuNullDevice().hsaCompiler(), binaryElf_, &size, aclTEXT, &errorCode));
|
||||
if (errorCode != ACL_SUCCESS) {
|
||||
buildLog_ += "Error: Extracting AMD HSA Code Object from binary failed.\n";
|
||||
return false;
|
||||
@@ -1638,14 +1638,14 @@ bool HSAILProgram::linkImpl(amd::option::Options* options) {
|
||||
}
|
||||
size_t kernelNamesSize = 0;
|
||||
errorCode =
|
||||
aclQueryInfo(dev().hsaCompiler(), binaryElf_, RT_KERNEL_NAMES, NULL, NULL, &kernelNamesSize);
|
||||
aclQueryInfo(gpuNullDevice().hsaCompiler(), binaryElf_, RT_KERNEL_NAMES, NULL, NULL, &kernelNamesSize);
|
||||
if (errorCode != ACL_SUCCESS) {
|
||||
buildLog_ += "Error: Querying of kernel names size from the binary failed.\n";
|
||||
return false;
|
||||
}
|
||||
if (kernelNamesSize > 0) {
|
||||
char* kernelNames = new char[kernelNamesSize];
|
||||
errorCode = aclQueryInfo(dev().hsaCompiler(), binaryElf_, RT_KERNEL_NAMES, NULL, kernelNames,
|
||||
errorCode = aclQueryInfo(gpuNullDevice().hsaCompiler(), binaryElf_, RT_KERNEL_NAMES, NULL, kernelNames,
|
||||
&kernelNamesSize);
|
||||
if (errorCode != ACL_SUCCESS) {
|
||||
buildLog_ += "Error: Querying of kernel names from the binary failed.\n";
|
||||
@@ -1661,7 +1661,7 @@ bool HSAILProgram::linkImpl(amd::option::Options* options) {
|
||||
for (const auto& it : vKernels) {
|
||||
std::string kernelName(it);
|
||||
std::string openclKernelName = Kernel::openclMangledName(kernelName);
|
||||
errorCode = aclQueryInfo(dev().hsaCompiler(), binaryElf_, RT_NUM_KERNEL_HIDDEN_ARGS,
|
||||
errorCode = aclQueryInfo(gpuNullDevice().hsaCompiler(), binaryElf_, RT_NUM_KERNEL_HIDDEN_ARGS,
|
||||
openclKernelName.c_str(), &md.numHiddenKernelArgs,
|
||||
&sizeOfnumHiddenKernelArgs);
|
||||
if (errorCode != ACL_SUCCESS) {
|
||||
@@ -1698,7 +1698,7 @@ bool HSAILProgram::linkImpl(amd::option::Options* options) {
|
||||
}
|
||||
// Save the binary in the interface class
|
||||
saveBinaryAndSetType(TYPE_EXECUTABLE);
|
||||
buildLog_ += aclGetCompilerLog(dev().hsaCompiler());
|
||||
buildLog_ += aclGetCompilerLog(gpuNullDevice().hsaCompiler());
|
||||
return true;
|
||||
}
|
||||
|
||||
@@ -1708,13 +1708,13 @@ std::string HSAILProgram::hsailOptions() {
|
||||
std::string hsailOptions;
|
||||
// Set options for the standard device specific options
|
||||
// All our devices support these options now
|
||||
if (dev().settings().reportFMAF_) {
|
||||
if (gpuNullDevice().settings().reportFMAF_) {
|
||||
hsailOptions.append(" -DFP_FAST_FMAF=1");
|
||||
}
|
||||
if (dev().settings().reportFMA_) {
|
||||
if (gpuNullDevice().settings().reportFMA_) {
|
||||
hsailOptions.append(" -DFP_FAST_FMA=1");
|
||||
}
|
||||
if (!dev().settings().singleFpDenorm_) {
|
||||
if (!gpuNullDevice().settings().singleFpDenorm_) {
|
||||
hsailOptions.append(" -cl-denorms-are-zero");
|
||||
}
|
||||
|
||||
@@ -1738,7 +1738,7 @@ std::string HSAILProgram::hsailOptions() {
|
||||
bool HSAILProgram::allocKernelTable() {
|
||||
uint size = kernels().size() * sizeof(size_t);
|
||||
|
||||
kernels_ = new gpu::Memory(dev(), size);
|
||||
kernels_ = new gpu::Memory(gpuDevice(), size);
|
||||
// Initialize kernel table
|
||||
if ((kernels_ == NULL) || !kernels_->create(Resource::RemoteUSWC)) {
|
||||
delete kernels_;
|
||||
@@ -1767,7 +1767,7 @@ const aclTargetInfo& HSAILProgram::info(const char* str) {
|
||||
arch = "hsail64";
|
||||
}
|
||||
info_ = aclGetTargetInfo(arch.c_str(),
|
||||
(str && str[0] == '\0' ? dev().hwInfo()->targetName_ : str), &err);
|
||||
(str && str[0] == '\0' ? gpuNullDevice().hwInfo()->targetName_ : str), &err);
|
||||
if (err != ACL_SUCCESS) {
|
||||
LogWarning("aclGetTargetInfo failed");
|
||||
}
|
||||
@@ -1867,7 +1867,7 @@ hsa_isa_t ORCAHSALoaderContext::IsaFromName(const char* name) {
|
||||
}
|
||||
|
||||
bool ORCAHSALoaderContext::IsaSupportedByAgent(hsa_agent_t agent, hsa_isa_t isa) {
|
||||
uint dev_gfxip = program_->dev().hwInfo()->gfxipVersion_;
|
||||
uint dev_gfxip = program_->gpuNullDevice().hwInfo()->gfxipVersion_;
|
||||
uint isa_gfxip = isa.handle;
|
||||
switch (dev_gfxip) {
|
||||
case gfx700:
|
||||
@@ -1892,7 +1892,7 @@ bool ORCAHSALoaderContext::IsaSupportedByAgent(hsa_agent_t agent, hsa_isa_t isa)
|
||||
case gfx602:
|
||||
default:
|
||||
LogPrintfError("Unsupported gfxip version gfx%d", dev_gfxip);
|
||||
return false;
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
@@ -2024,7 +2024,7 @@ hsa_status_t ORCAHSALoaderContext::SamplerCreate(
|
||||
assert(false);
|
||||
return HSA_STATUS_ERROR_INVALID_ARGUMENT;
|
||||
}
|
||||
gpu::Sampler* sampler = new gpu::Sampler(program_->dev());
|
||||
gpu::Sampler* sampler = new gpu::Sampler(program_->gpuDevice());
|
||||
if (!sampler || !sampler->create(state)) {
|
||||
delete sampler;
|
||||
return HSA_STATUS_ERROR;
|
||||
@@ -2075,15 +2075,15 @@ void* ORCAHSALoaderContext::GpuMemAlloc(size_t size, size_t align, bool zero) {
|
||||
return new char[size];
|
||||
}
|
||||
|
||||
gpu::Memory* mem = new gpu::Memory(program_->dev(), amd::alignUp(size, align));
|
||||
gpu::Memory* mem = new gpu::Memory(program_->gpuDevice(), amd::alignUp(size, align));
|
||||
if (!mem || !mem->create(gpu::Resource::Local)) {
|
||||
delete mem;
|
||||
return NULL;
|
||||
}
|
||||
assert(program_->dev().xferQueue());
|
||||
assert(program_->gpuDevice().xferQueue());
|
||||
if (zero) {
|
||||
char pattern = 0;
|
||||
program_->dev().xferMgr().fillBuffer(*mem, &pattern, sizeof(pattern), amd::Coord3D(0),
|
||||
program_->gpuDevice().xferMgr().fillBuffer(*mem, &pattern, sizeof(pattern), amd::Coord3D(0),
|
||||
amd::Coord3D(size));
|
||||
}
|
||||
program_->addGlobalStore(mem);
|
||||
@@ -2102,9 +2102,9 @@ bool ORCAHSALoaderContext::GpuMemCopy(void* dst, size_t offset, const void* src,
|
||||
memcpy(reinterpret_cast<address>(dst) + offset, src, size);
|
||||
return true;
|
||||
}
|
||||
assert(program_->dev().xferQueue());
|
||||
assert(program_->gpuDevice().xferQueue());
|
||||
gpu::Memory* mem = reinterpret_cast<gpu::Memory*>(dst);
|
||||
return program_->dev().xferMgr().writeBuffer(src, *mem, amd::Coord3D(offset), amd::Coord3D(size),
|
||||
return program_->gpuDevice().xferMgr().writeBuffer(src, *mem, amd::Coord3D(offset), amd::Coord3D(size),
|
||||
true);
|
||||
return true;
|
||||
}
|
||||
|
||||
@@ -289,7 +289,7 @@ class NullProgram : public device::Program {
|
||||
);
|
||||
|
||||
//! Return a typecasted GPU device
|
||||
gpu::NullDevice& dev() {
|
||||
gpu::NullDevice& gpuNullDevice() {
|
||||
return const_cast<gpu::NullDevice&>(static_cast<const gpu::NullDevice&>(device()));
|
||||
}
|
||||
|
||||
@@ -339,10 +339,7 @@ class Program : public NullProgram {
|
||||
//! Returns pritnf info array
|
||||
const std::vector<device::PrintfInfo>& printfInfo() const { return printf_; }
|
||||
|
||||
//! Return a typecasted GPU device
|
||||
gpu::Device& dev() { return const_cast<gpu::Device&>(static_cast<const gpu::Device&>(device())); }
|
||||
|
||||
protected:
|
||||
protected:
|
||||
private:
|
||||
//! Disable copy constructor
|
||||
Program(const Program&);
|
||||
@@ -350,6 +347,11 @@ protected:
|
||||
//! Disable operator=
|
||||
Program& operator=(const Program&);
|
||||
|
||||
//! Return a typecasted GPU device
|
||||
gpu::Device& gpuDevice() {
|
||||
return const_cast<gpu::Device&>(static_cast<const gpu::Device&>(device()));
|
||||
}
|
||||
|
||||
HwConstBuffers constBufs_; //!< Constant buffers for the global store
|
||||
gpu::Memory* glbData_; //!< Global data store
|
||||
};
|
||||
@@ -467,8 +469,15 @@ class HSAILProgram : public device::Program {
|
||||
|
||||
const std::vector<Memory*>& globalStores() const { return globalStores_; }
|
||||
|
||||
//! Return a typecasted GPU device
|
||||
gpu::Device& dev() { return const_cast<gpu::Device&>(static_cast<const gpu::Device&>(device())); }
|
||||
//! Return a typecasted GPU null device.
|
||||
gpu::NullDevice& gpuNullDevice() {
|
||||
return const_cast<gpu::NullDevice&>(static_cast<const gpu::NullDevice&>(device()));
|
||||
}
|
||||
|
||||
//! Return a typecasted GPU device. The device must not be the NullDevice.
|
||||
gpu::Device& gpuDevice() {
|
||||
return const_cast<gpu::Device&>(static_cast<const gpu::Device&>(device()));
|
||||
}
|
||||
|
||||
//! Returns GPU kernel table
|
||||
const Memory* kernelTable() const { return kernels_; }
|
||||
|
||||
@@ -43,8 +43,8 @@ void HSAILKernel::setWorkGroupInfo(const uint32_t privateSegmentSize,
|
||||
// Make sure runtime matches HW alignment, which is 256 scratch regs (DWORDs) per wave
|
||||
constexpr uint32_t ScratchRegAlignment = 256;
|
||||
workGroupInfo_.scratchRegs_ =
|
||||
amd::alignUp((workGroupInfo_.scratchRegs_ * dev().info().wavefrontWidth_),
|
||||
ScratchRegAlignment) / dev().info().wavefrontWidth_;
|
||||
amd::alignUp((workGroupInfo_.scratchRegs_ * device().info().wavefrontWidth_),
|
||||
ScratchRegAlignment) / device().info().wavefrontWidth_;
|
||||
workGroupInfo_.privateMemSize_ = workGroupInfo_.scratchRegs_ * sizeof(uint32_t);
|
||||
workGroupInfo_.localMemSize_ = workGroupInfo_.usedLDSSize_ = groupSegmentSize;
|
||||
workGroupInfo_.usedSGPRs_ = numSGPRs;
|
||||
@@ -52,13 +52,13 @@ void HSAILKernel::setWorkGroupInfo(const uint32_t privateSegmentSize,
|
||||
workGroupInfo_.usedVGPRs_ = numVGPRs;
|
||||
|
||||
if (!prog().isNull()) {
|
||||
workGroupInfo_.availableLDSSize_ = dev().properties().gfxipProperties.shaderCore.ldsSizePerCu;
|
||||
workGroupInfo_.availableLDSSize_ = palDevice().properties().gfxipProperties.shaderCore.ldsSizePerCu;
|
||||
workGroupInfo_.availableSGPRs_ =
|
||||
dev().properties().gfxipProperties.shaderCore.numAvailableSgprs;
|
||||
palDevice().properties().gfxipProperties.shaderCore.numAvailableSgprs;
|
||||
workGroupInfo_.availableVGPRs_ =
|
||||
dev().properties().gfxipProperties.shaderCore.numAvailableVgprs;
|
||||
palDevice().properties().gfxipProperties.shaderCore.numAvailableVgprs;
|
||||
workGroupInfo_.preferredSizeMultiple_ = workGroupInfo_.wavefrontPerSIMD_ =
|
||||
dev().info().wavefrontWidth_;
|
||||
device().info().wavefrontWidth_;
|
||||
} else {
|
||||
workGroupInfo_.availableLDSSize_ = 64 * Ki;
|
||||
workGroupInfo_.availableSGPRs_ = 104;
|
||||
@@ -105,7 +105,7 @@ bool HSAILKernel::aqlCreateHWInfo(amd::hsa::loader::Symbol* sym) {
|
||||
}
|
||||
|
||||
HSAILKernel::HSAILKernel(std::string name, HSAILProgram* prog, std::string compileOptions)
|
||||
: device::Kernel(prog->dev(), name, *prog),
|
||||
: device::Kernel(prog->device(), name, *prog),
|
||||
compileOptions_(compileOptions),
|
||||
index_(0),
|
||||
code_(0),
|
||||
@@ -128,12 +128,12 @@ bool HSAILKernel::init(amd::hsa::loader::Symbol* sym, bool finalize) {
|
||||
options.append(openClKernelName.c_str());
|
||||
// Append an option so that we can selectively enable a SCOption on CZ
|
||||
// whenever IOMMUv2 is enabled.
|
||||
if (dev().settings().svmFineGrainSystem_) {
|
||||
if (palNullDevice().settings().svmFineGrainSystem_) {
|
||||
options.append(" -sc-xnack-iommu");
|
||||
}
|
||||
error = aclCompile(dev().compiler(), prog().binaryElf(), options.c_str(), ACL_TYPE_CG,
|
||||
error = aclCompile(palNullDevice().compiler(), prog().binaryElf(), options.c_str(), ACL_TYPE_CG,
|
||||
ACL_TYPE_ISA, nullptr);
|
||||
buildLog_ += aclGetCompilerLog(dev().compiler());
|
||||
buildLog_ += aclGetCompilerLog(palNullDevice().compiler());
|
||||
if (error != ACL_SUCCESS) {
|
||||
LogError("Failed to finalize kernel");
|
||||
return false;
|
||||
@@ -144,7 +144,7 @@ bool HSAILKernel::init(amd::hsa::loader::Symbol* sym, bool finalize) {
|
||||
|
||||
// Pull out metadata from the ELF
|
||||
size_t sizeOfArgList;
|
||||
error = aclQueryInfo(dev().compiler(), prog().binaryElf(), RT_ARGUMENT_ARRAY,
|
||||
error = aclQueryInfo(palNullDevice().compiler(), prog().binaryElf(), RT_ARGUMENT_ARRAY,
|
||||
openClKernelName.c_str(), nullptr, &sizeOfArgList);
|
||||
if (error != ACL_SUCCESS) {
|
||||
return false;
|
||||
@@ -154,7 +154,7 @@ bool HSAILKernel::init(amd::hsa::loader::Symbol* sym, bool finalize) {
|
||||
if (nullptr == aclArgList) {
|
||||
return false;
|
||||
}
|
||||
error = aclQueryInfo(dev().compiler(), prog().binaryElf(), RT_ARGUMENT_ARRAY,
|
||||
error = aclQueryInfo(palNullDevice().compiler(), prog().binaryElf(), RT_ARGUMENT_ARRAY,
|
||||
openClKernelName.c_str(), aclArgList, &sizeOfArgList);
|
||||
if (error != ACL_SUCCESS) {
|
||||
return false;
|
||||
@@ -164,30 +164,30 @@ bool HSAILKernel::init(amd::hsa::loader::Symbol* sym, bool finalize) {
|
||||
delete[] aclArgList;
|
||||
|
||||
size_t sizeOfWorkGroupSize;
|
||||
error = aclQueryInfo(dev().compiler(), prog().binaryElf(), RT_WORK_GROUP_SIZE,
|
||||
error = aclQueryInfo(palNullDevice().compiler(), prog().binaryElf(), RT_WORK_GROUP_SIZE,
|
||||
openClKernelName.c_str(), nullptr, &sizeOfWorkGroupSize);
|
||||
if (error != ACL_SUCCESS) {
|
||||
return false;
|
||||
}
|
||||
error = aclQueryInfo(dev().compiler(), prog().binaryElf(), RT_WORK_GROUP_SIZE,
|
||||
error = aclQueryInfo(palNullDevice().compiler(), prog().binaryElf(), RT_WORK_GROUP_SIZE,
|
||||
openClKernelName.c_str(), workGroupInfo_.compileSize_, &sizeOfWorkGroupSize);
|
||||
if (error != ACL_SUCCESS) {
|
||||
return false;
|
||||
}
|
||||
|
||||
// Copy wavefront size
|
||||
workGroupInfo_.wavefrontSize_ = dev().info().wavefrontWidth_;
|
||||
workGroupInfo_.wavefrontSize_ = device().info().wavefrontWidth_;
|
||||
// Find total workgroup size
|
||||
if (workGroupInfo_.compileSize_[0] != 0) {
|
||||
workGroupInfo_.size_ = workGroupInfo_.compileSize_[0] * workGroupInfo_.compileSize_[1] *
|
||||
workGroupInfo_.compileSize_[2];
|
||||
} else {
|
||||
workGroupInfo_.size_ = dev().info().preferredWorkGroupSize_;
|
||||
workGroupInfo_.size_ = device().info().preferredWorkGroupSize_;
|
||||
}
|
||||
|
||||
// Pull out printf metadata from the ELF
|
||||
size_t sizeOfPrintfList;
|
||||
error = aclQueryInfo(dev().compiler(), prog().binaryElf(), RT_GPU_PRINTF_ARRAY,
|
||||
error = aclQueryInfo(palNullDevice().compiler(), prog().binaryElf(), RT_GPU_PRINTF_ARRAY,
|
||||
openClKernelName.c_str(), nullptr, &sizeOfPrintfList);
|
||||
if (error != ACL_SUCCESS) {
|
||||
return false;
|
||||
@@ -199,7 +199,7 @@ bool HSAILKernel::init(amd::hsa::loader::Symbol* sym, bool finalize) {
|
||||
if (nullptr == aclPrintfList) {
|
||||
return false;
|
||||
}
|
||||
error = aclQueryInfo(dev().compiler(), prog().binaryElf(), RT_GPU_PRINTF_ARRAY,
|
||||
error = aclQueryInfo(palNullDevice().compiler(), prog().binaryElf(), RT_GPU_PRINTF_ARRAY,
|
||||
openClKernelName.c_str(), aclPrintfList, &sizeOfPrintfList);
|
||||
if (error != ACL_SUCCESS) {
|
||||
return false;
|
||||
@@ -213,7 +213,7 @@ bool HSAILKernel::init(amd::hsa::loader::Symbol* sym, bool finalize) {
|
||||
aclMetadata md;
|
||||
md.enqueue_kernel = false;
|
||||
size_t sizeOfDeviceEnqueue = sizeof(md.enqueue_kernel);
|
||||
error = aclQueryInfo(dev().compiler(), prog().binaryElf(), RT_DEVICE_ENQUEUE,
|
||||
error = aclQueryInfo(palNullDevice().compiler(), prog().binaryElf(), RT_DEVICE_ENQUEUE,
|
||||
openClKernelName.c_str(), &md.enqueue_kernel, &sizeOfDeviceEnqueue);
|
||||
if (error != ACL_SUCCESS) {
|
||||
return false;
|
||||
@@ -222,7 +222,7 @@ bool HSAILKernel::init(amd::hsa::loader::Symbol* sym, bool finalize) {
|
||||
|
||||
md.kernel_index = -1;
|
||||
size_t sizeOfIndex = sizeof(md.kernel_index);
|
||||
error = aclQueryInfo(dev().compiler(), prog().binaryElf(), RT_KERNEL_INDEX,
|
||||
error = aclQueryInfo(palNullDevice().compiler(), prog().binaryElf(), RT_KERNEL_INDEX,
|
||||
openClKernelName.c_str(), &md.kernel_index, &sizeOfIndex);
|
||||
if (error != ACL_SUCCESS) {
|
||||
return false;
|
||||
@@ -230,7 +230,7 @@ bool HSAILKernel::init(amd::hsa::loader::Symbol* sym, bool finalize) {
|
||||
index_ = md.kernel_index;
|
||||
|
||||
size_t sizeOfWavesPerSimdHint = sizeof(workGroupInfo_.wavesPerSimdHint_);
|
||||
error = aclQueryInfo(dev().compiler(), prog().binaryElf(), RT_WAVES_PER_SIMD_HINT,
|
||||
error = aclQueryInfo(palNullDevice().compiler(), prog().binaryElf(), RT_WAVES_PER_SIMD_HINT,
|
||||
openClKernelName.c_str(), &workGroupInfo_.wavesPerSimdHint_,
|
||||
&sizeOfWavesPerSimdHint);
|
||||
if (error != ACL_SUCCESS) {
|
||||
@@ -240,7 +240,7 @@ bool HSAILKernel::init(amd::hsa::loader::Symbol* sym, bool finalize) {
|
||||
waveLimiter_.enable();
|
||||
|
||||
size_t sizeOfWorkGroupSizeHint = sizeof(workGroupInfo_.compileSizeHint_);
|
||||
error = aclQueryInfo(dev().compiler(), prog().binaryElf(), RT_WORK_GROUP_SIZE_HINT,
|
||||
error = aclQueryInfo(palNullDevice().compiler(), prog().binaryElf(), RT_WORK_GROUP_SIZE_HINT,
|
||||
openClKernelName.c_str(), workGroupInfo_.compileSizeHint_,
|
||||
&sizeOfWorkGroupSizeHint);
|
||||
if (error != ACL_SUCCESS) {
|
||||
@@ -248,7 +248,7 @@ bool HSAILKernel::init(amd::hsa::loader::Symbol* sym, bool finalize) {
|
||||
}
|
||||
|
||||
size_t sizeOfVecTypeHint;
|
||||
error = aclQueryInfo(dev().compiler(), prog().binaryElf(), RT_VEC_TYPE_HINT,
|
||||
error = aclQueryInfo(palNullDevice().compiler(), prog().binaryElf(), RT_VEC_TYPE_HINT,
|
||||
openClKernelName.c_str(), NULL, &sizeOfVecTypeHint);
|
||||
if (error != ACL_SUCCESS) {
|
||||
return false;
|
||||
@@ -259,7 +259,7 @@ bool HSAILKernel::init(amd::hsa::loader::Symbol* sym, bool finalize) {
|
||||
if (NULL == VecTypeHint) {
|
||||
return false;
|
||||
}
|
||||
error = aclQueryInfo(dev().compiler(), prog().binaryElf(), RT_VEC_TYPE_HINT,
|
||||
error = aclQueryInfo(palNullDevice().compiler(), prog().binaryElf(), RT_VEC_TYPE_HINT,
|
||||
openClKernelName.c_str(), VecTypeHint, &sizeOfVecTypeHint);
|
||||
if (error != ACL_SUCCESS) {
|
||||
return false;
|
||||
@@ -273,8 +273,6 @@ bool HSAILKernel::init(amd::hsa::loader::Symbol* sym, bool finalize) {
|
||||
return true;
|
||||
}
|
||||
|
||||
const Device& HSAILKernel::dev() const { return reinterpret_cast<const Device&>(dev_); }
|
||||
|
||||
const HSAILProgram& HSAILKernel::prog() const {
|
||||
return reinterpret_cast<const HSAILProgram&>(prog_);
|
||||
}
|
||||
@@ -476,7 +474,7 @@ bool LightningKernel::init() {
|
||||
workGroupInfo()->usedSGPRs_, workGroupInfo()->usedVGPRs_);
|
||||
|
||||
// Copy wavefront size
|
||||
workGroupInfo_.wavefrontSize_ = dev().info().wavefrontWidth_;
|
||||
workGroupInfo_.wavefrontSize_ = device().info().wavefrontWidth_;
|
||||
|
||||
if (workGroupInfo_.size_ == 0) {
|
||||
return false;
|
||||
|
||||
@@ -68,8 +68,11 @@ class HSAILKernel : public device::Kernel {
|
||||
//! finalizes the kernel if needed
|
||||
bool init(amd::hsa::loader::Symbol* sym, bool finalize = false);
|
||||
|
||||
//! Returns PAL device object, associated with this kernel
|
||||
const Device& dev() const;
|
||||
//! Returns PAL, possibly null, device object, associated with this kernel.
|
||||
const NullDevice& palNullDevice() const { return reinterpret_cast<const NullDevice&>(dev_); }
|
||||
|
||||
//! Returns PAL device object, associated with this kernel which must not be the null device.
|
||||
const Device& palDevice() const { return reinterpret_cast<const Device&>(dev_); }
|
||||
|
||||
//! Returns HSA program associated with this kernel
|
||||
const HSAILProgram& prog() const;
|
||||
|
||||
@@ -70,8 +70,8 @@ bool Segment::alloc(HSAILProgram& prog, amdgpu_hsa_elf_segment_t segment, size_t
|
||||
bool zero) {
|
||||
align = amd::alignUp(align, sizeof(uint32_t));
|
||||
|
||||
amd::Memory* amd_mem_obj = new (prog.dev().context())
|
||||
amd::Buffer(prog.dev().context(), 0, amd::alignUp(size, align),
|
||||
amd::Memory* amd_mem_obj = new (prog.palDevice().context())
|
||||
amd::Buffer(prog.palDevice().context(), 0, amd::alignUp(size, align),
|
||||
// HIP requires SVM allocation for segment code due to possible global variable
|
||||
// access and global variables are a part of code segment with the latest loader
|
||||
amd::IS_HIP ? reinterpret_cast<void*>(1) : nullptr);
|
||||
@@ -87,11 +87,11 @@ bool Segment::alloc(HSAILProgram& prog, amdgpu_hsa_elf_segment_t segment, size_t
|
||||
return false;
|
||||
}
|
||||
|
||||
gpuAccess_ = static_cast<pal::Memory*>(amd_mem_obj->getDeviceMemory(prog.dev(), false));
|
||||
gpuAccess_ = static_cast<pal::Memory*>(amd_mem_obj->getDeviceMemory(prog.palDevice(), false));
|
||||
|
||||
if (segment == AMDGPU_HSA_SEGMENT_CODE_AGENT) {
|
||||
void* ptr = nullptr;
|
||||
cpuAccess_ = new pal::Memory(prog.dev(), amd::alignUp(size, align));
|
||||
cpuAccess_ = new pal::Memory(prog.palDevice(), amd::alignUp(size, align));
|
||||
if ((cpuAccess_ == nullptr) || !cpuAccess_->create(pal::Resource::Remote)) {
|
||||
delete cpuAccess_;
|
||||
cpuAccess_ = nullptr;
|
||||
@@ -111,8 +111,8 @@ bool Segment::alloc(HSAILProgram& prog, amdgpu_hsa_elf_segment_t segment, size_t
|
||||
if ((cpuAccess_ == nullptr) && zero && !prog.isInternal()) {
|
||||
uint64_t pattern = 0;
|
||||
size_t patternSize = ((size % sizeof(pattern)) == 0) ? sizeof(pattern) : 1;
|
||||
prog.dev().xferMgr().fillBuffer(*gpuAccess_, &pattern, patternSize, amd::Coord3D(0),
|
||||
amd::Coord3D(size));
|
||||
prog.palDevice().xferMgr().fillBuffer(*gpuAccess_, &pattern, patternSize, amd::Coord3D(0),
|
||||
amd::Coord3D(size));
|
||||
}
|
||||
|
||||
switch (segment) {
|
||||
@@ -266,7 +266,7 @@ bool HSAILProgram::setKernels(amd::option::Options* options, void* binary, size_
|
||||
}
|
||||
|
||||
size_t kernelNamesSize = 0;
|
||||
acl_error errorCode = aclQueryInfo(dev().compiler(), binaryElf_, RT_KERNEL_NAMES, nullptr,
|
||||
acl_error errorCode = aclQueryInfo(palNullDevice().compiler(), binaryElf_, RT_KERNEL_NAMES, nullptr,
|
||||
nullptr, &kernelNamesSize);
|
||||
if (errorCode != ACL_SUCCESS) {
|
||||
buildLog_ += "Error: Querying of kernel names size from the binary failed.\n";
|
||||
@@ -274,7 +274,7 @@ bool HSAILProgram::setKernels(amd::option::Options* options, void* binary, size_
|
||||
}
|
||||
if (kernelNamesSize > 0) {
|
||||
char* kernelNames = new char[kernelNamesSize];
|
||||
errorCode = aclQueryInfo(dev().compiler(), binaryElf_, RT_KERNEL_NAMES, nullptr, kernelNames,
|
||||
errorCode = aclQueryInfo(palNullDevice().compiler(), binaryElf_, RT_KERNEL_NAMES, nullptr, kernelNames,
|
||||
&kernelNamesSize);
|
||||
if (errorCode != ACL_SUCCESS) {
|
||||
buildLog_ += "Error: Querying of kernel names from the binary failed.\n";
|
||||
@@ -326,7 +326,7 @@ bool HSAILProgram::createBinary(amd::option::Options* options) { return true; }
|
||||
bool HSAILProgram::allocKernelTable() {
|
||||
uint size = kernels().size() * sizeof(size_t);
|
||||
|
||||
kernels_ = new pal::Memory(dev(), size);
|
||||
kernels_ = new pal::Memory(palDevice(), size);
|
||||
// Initialize kernel table
|
||||
if ((kernels_ == nullptr) || !kernels_->create(Resource::RemoteUSWC)) {
|
||||
delete kernels_;
|
||||
@@ -352,7 +352,7 @@ const aclTargetInfo& HSAILProgram::info(const char* str) {
|
||||
arch = "hsail64";
|
||||
}
|
||||
info_ = aclGetTargetInfo(arch.c_str(),
|
||||
(str && str[0] == '\0' ? dev().hwInfo()->machineTarget_ : str), &err);
|
||||
(str && str[0] == '\0' ? palNullDevice().hwInfo()->machineTarget_ : str), &err);
|
||||
if (err != ACL_SUCCESS) {
|
||||
LogWarning("aclGetTargetInfo failed");
|
||||
}
|
||||
@@ -523,9 +523,9 @@ hsa_isa_t PALHSALoaderContext::IsaFromName(const char* name) {
|
||||
}
|
||||
|
||||
bool PALHSALoaderContext::IsaSupportedByAgent(hsa_agent_t agent, hsa_isa_t isa) {
|
||||
uint32_t gfxipVersion = program_->dev().settings().useLightning_
|
||||
? program_->dev().hwInfo()->gfxipVersionLC_
|
||||
: program_->dev().hwInfo()->gfxipVersion_;
|
||||
uint32_t gfxipVersion = program_->palNullDevice().settings().useLightning_
|
||||
? program_->palNullDevice().hwInfo()->gfxipVersionLC_
|
||||
: program_->palNullDevice().hwInfo()->gfxipVersion_;
|
||||
uint32_t majorSrc = gfxipVersion / 10;
|
||||
uint32_t minorSrc = gfxipVersion % 10;
|
||||
|
||||
@@ -671,7 +671,7 @@ hsa_status_t PALHSALoaderContext::SamplerCreate(
|
||||
assert(false);
|
||||
return HSA_STATUS_ERROR_INVALID_ARGUMENT;
|
||||
}
|
||||
pal::Sampler* sampler = new pal::Sampler(program_->dev());
|
||||
pal::Sampler* sampler = new pal::Sampler(program_->palDevice());
|
||||
if (!sampler || !sampler->create(state)) {
|
||||
delete sampler;
|
||||
return HSA_STATUS_ERROR;
|
||||
|
||||
@@ -157,8 +157,15 @@ class HSAILProgram : public device::Program {
|
||||
|
||||
const std::vector<Memory*>& globalStores() const { return globalStores_; }
|
||||
|
||||
//! Return a typecasted PAL device
|
||||
pal::Device& dev() { return const_cast<pal::Device&>(static_cast<const pal::Device&>(device())); }
|
||||
//! Return a typecasted PAL null device.
|
||||
pal::NullDevice& palNullDevice() {
|
||||
return const_cast<pal::NullDevice&>(static_cast<const pal::NullDevice&>(device()));
|
||||
}
|
||||
|
||||
//! Return a typecasted PAL device. The device must not be the NullDevice.
|
||||
pal::Device& palDevice() {
|
||||
return const_cast<pal::Device&>(static_cast<const pal::Device&>(device()));
|
||||
}
|
||||
|
||||
//! Returns GPU kernel table
|
||||
const Memory* kernelTable() const { return kernels_; }
|
||||
@@ -241,7 +248,7 @@ class LightningProgram : public HSAILProgram {
|
||||
LightningProgram(NullDevice& device, amd::Program& owner) : HSAILProgram(device, owner) {
|
||||
isLC_ = true;
|
||||
isHIP_ = (owner.language() == amd::Program::HIP);
|
||||
machineTarget_ = dev().hwInfo()->machineTargetLC_;
|
||||
machineTarget_ = palNullDevice().hwInfo()->machineTargetLC_;
|
||||
}
|
||||
|
||||
LightningProgram(Device& device, amd::Program& owner) : HSAILProgram(device, owner) {
|
||||
|
||||
@@ -31,7 +31,7 @@ Kernel::Kernel(std::string name, Program* prog, const uint64_t& kernelCodeHandle
|
||||
const uint32_t workgroupGroupSegmentByteSize,
|
||||
const uint32_t workitemPrivateSegmentByteSize, const uint32_t kernargSegmentByteSize,
|
||||
const uint32_t kernargSegmentAlignment)
|
||||
: device::Kernel(prog->dev(), name, *prog) {
|
||||
: device::Kernel(prog->device(), name, *prog) {
|
||||
kernelCodeHandle_ = kernelCodeHandle;
|
||||
workgroupGroupSegmentByteSize_ = workgroupGroupSegmentByteSize;
|
||||
workitemPrivateSegmentByteSize_ = workitemPrivateSegmentByteSize;
|
||||
@@ -40,7 +40,7 @@ Kernel::Kernel(std::string name, Program* prog, const uint64_t& kernelCodeHandle
|
||||
}
|
||||
|
||||
Kernel::Kernel(std::string name, Program* prog)
|
||||
: device::Kernel(prog->dev(), name, *prog) {
|
||||
: device::Kernel(prog->device(), name, *prog) {
|
||||
}
|
||||
|
||||
#if defined(USE_COMGR_LIBRARY)
|
||||
@@ -57,10 +57,10 @@ bool LightningKernel::init() {
|
||||
symbolName_ = name();
|
||||
}
|
||||
kernargSegmentAlignment_ =
|
||||
amd::alignUp(std::max(kernargSegmentAlignment_, 128u), dev().info().globalMemCacheLineSize_);
|
||||
amd::alignUp(std::max(kernargSegmentAlignment_, 128u), device().info().globalMemCacheLineSize_);
|
||||
|
||||
// Set the workgroup information for the kernel
|
||||
workGroupInfo_.availableLDSSize_ = dev().info().localMemSizePerCU_;
|
||||
workGroupInfo_.availableLDSSize_ = device().info().localMemSizePerCU_;
|
||||
assert(workGroupInfo_.availableLDSSize_ > 0);
|
||||
|
||||
if (!SetAvailableSgprVgpr()) {
|
||||
@@ -155,7 +155,7 @@ bool LightningKernel::init() {
|
||||
workGroupInfo_.usedLDSSize_ = workgroupGroupSegmentByteSize_;
|
||||
workGroupInfo_.preferredSizeMultiple_ = wavefront_size;
|
||||
workGroupInfo_.usedStackSize_ = 0;
|
||||
workGroupInfo_.wavefrontPerSIMD_ = program()->dev().info().maxWorkItemSizes_[0] / wavefront_size;
|
||||
workGroupInfo_.wavefrontPerSIMD_ = program()->rocDevice().info().maxWorkItemSizes_[0] / wavefront_size;
|
||||
workGroupInfo_.wavefrontSize_ = wavefront_size;
|
||||
if (workGroupInfo_.size_ == 0) {
|
||||
return false;
|
||||
@@ -181,7 +181,7 @@ bool HSAILKernel::init() {
|
||||
hsa_agent_t hsaDevice = program()->hsaDevice();
|
||||
// Pull out metadata from the ELF
|
||||
size_t sizeOfArgList;
|
||||
aclCompiler* compileHandle = program()->dev().compiler();
|
||||
aclCompiler* compileHandle = program()->rocDevice().compiler();
|
||||
std::string openClKernelName("&__OpenCL_" + name() + "_kernel");
|
||||
errorCode = aclQueryInfo(compileHandle, program()->binaryElf(), RT_ARGUMENT_ARRAY,
|
||||
openClKernelName.c_str(), nullptr, &sizeOfArgList);
|
||||
@@ -202,7 +202,7 @@ bool HSAILKernel::init() {
|
||||
|
||||
// Set the workgroup information for the kernel
|
||||
memset(&workGroupInfo_, 0, sizeof(workGroupInfo_));
|
||||
workGroupInfo_.availableLDSSize_ = program()->dev().info().localMemSizePerCU_;
|
||||
workGroupInfo_.availableLDSSize_ = program()->rocDevice().info().localMemSizePerCU_;
|
||||
assert(workGroupInfo_.availableLDSSize_ > 0);
|
||||
workGroupInfo_.availableSGPRs_ = 104;
|
||||
workGroupInfo_.availableVGPRs_ = 256;
|
||||
@@ -250,13 +250,13 @@ bool HSAILKernel::init() {
|
||||
}
|
||||
|
||||
workGroupInfo_.usedStackSize_ = 0;
|
||||
workGroupInfo_.wavefrontPerSIMD_ = program()->dev().info().maxWorkItemSizes_[0] / wavefront_size;
|
||||
workGroupInfo_.wavefrontPerSIMD_ = program()->rocDevice().info().maxWorkItemSizes_[0] / wavefront_size;
|
||||
workGroupInfo_.wavefrontSize_ = wavefront_size;
|
||||
if (workGroupInfo_.compileSize_[0] != 0) {
|
||||
workGroupInfo_.size_ = workGroupInfo_.compileSize_[0] * workGroupInfo_.compileSize_[1] *
|
||||
workGroupInfo_.compileSize_[2];
|
||||
} else {
|
||||
workGroupInfo_.size_ = program()->dev().info().preferredWorkGroupSize_;
|
||||
workGroupInfo_.size_ = program()->rocDevice().info().preferredWorkGroupSize_;
|
||||
}
|
||||
|
||||
// Pull out printf metadata from the ELF
|
||||
|
||||
@@ -127,7 +127,7 @@ bool Program::initClBinary(char* binaryIn, size_t size) {
|
||||
|
||||
bool Program::defineGlobalVar(const char* name, void* dptr) {
|
||||
hsa_status_t status = HSA_STATUS_SUCCESS;
|
||||
hsa_agent_t hsa_device = dev().getBackendDevice();
|
||||
hsa_agent_t hsa_device = rocDevice().getBackendDevice();
|
||||
|
||||
status = hsa_executable_agent_global_variable_define(hsaExecutable_, hsa_device, name, dptr);
|
||||
if (status != HSA_STATUS_SUCCESS) {
|
||||
@@ -153,7 +153,7 @@ bool Program::createGlobalVarObj(amd::Memory** amd_mem_obj, void** device_pptr,
|
||||
return false;
|
||||
}
|
||||
|
||||
hsa_device= dev().getBackendDevice();
|
||||
hsa_device = rocDevice().getBackendDevice();
|
||||
|
||||
/* Find HSA Symbol by name */
|
||||
status = hsa_executable_get_symbol_by_name(hsaExecutable_, global_name, &hsa_device,
|
||||
@@ -206,7 +206,7 @@ bool Program::createGlobalVarObj(amd::Memory** amd_mem_obj, void** device_pptr,
|
||||
return false;
|
||||
}
|
||||
|
||||
roc_device = static_cast<const roc::Device*>(&dev());
|
||||
roc_device = &(rocDevice());
|
||||
*amd_mem_obj = new(roc_device->context()) amd::Buffer(roc_device->context(), 0, *bytes,
|
||||
*device_pptr);
|
||||
|
||||
@@ -227,7 +227,7 @@ bool Program::createGlobalVarObj(amd::Memory** amd_mem_obj, void** device_pptr,
|
||||
}
|
||||
|
||||
HSAILProgram::HSAILProgram(roc::NullDevice& device, amd::Program& owner) : roc::Program(device, owner) {
|
||||
machineTarget_ = dev().deviceInfo().machineTarget_;
|
||||
machineTarget_ = rocNullDevice().deviceInfo().machineTarget_;
|
||||
}
|
||||
|
||||
HSAILProgram::~HSAILProgram() {
|
||||
@@ -268,7 +268,7 @@ bool HSAILProgram::setKernels(amd::option::Options* options, void* binary, size_
|
||||
#if defined(WITH_COMPILER_LIB)
|
||||
// Stop compilation if it is an offline device - HSA runtime does not
|
||||
// support ISA compiled offline
|
||||
if (!dev().isOnline()) {
|
||||
if (!device().isOnline()) {
|
||||
return true;
|
||||
}
|
||||
|
||||
@@ -294,7 +294,7 @@ bool HSAILProgram::setKernels(amd::option::Options* options, void* binary, size_
|
||||
return false;
|
||||
}
|
||||
|
||||
hsa_agent_t hsaDevice = dev().getBackendDevice();
|
||||
hsa_agent_t hsaDevice = rocDevice().getBackendDevice();
|
||||
status = hsa_executable_load_agent_code_object(hsaExecutable_, hsaDevice, hsaCodeObjectReader_,
|
||||
nullptr, nullptr);
|
||||
if (status != HSA_STATUS_SUCCESS) {
|
||||
@@ -432,7 +432,7 @@ LightningProgram::LightningProgram(roc::NullDevice& device, amd::Program& owner)
|
||||
: roc::Program(device, owner) {
|
||||
isLC_ = true;
|
||||
isHIP_ = (owner.language() == amd::Program::HIP);
|
||||
machineTarget_ = dev().deviceInfo().machineTargetLC_;
|
||||
machineTarget_ = rocNullDevice().deviceInfo().machineTargetLC_;
|
||||
}
|
||||
|
||||
bool LightningProgram::createBinary(amd::option::Options* options) {
|
||||
@@ -477,7 +477,7 @@ bool LightningProgram::setKernels(amd::option::Options* options, void* binary, s
|
||||
return false;
|
||||
}
|
||||
|
||||
hsa_agent_t agent = dev().getBackendDevice();
|
||||
hsa_agent_t agent = rocDevice().getBackendDevice();
|
||||
hsa_status_t status;
|
||||
|
||||
status = hsa_executable_create_alt(HSA_PROFILE_FULL, HSA_DEFAULT_FLOAT_ROUNDING_MODE_DEFAULT,
|
||||
|
||||
@@ -49,10 +49,13 @@ class Program : public device::Program {
|
||||
virtual bool initClBinary(char* binaryIn, size_t size);
|
||||
|
||||
//! Return a typecasted GPU device
|
||||
const NullDevice& dev() const { return static_cast<const NullDevice&>(device()); }
|
||||
const NullDevice& rocNullDevice() const { return static_cast<const NullDevice&>(device()); }
|
||||
|
||||
//! Return a typecasted GPU device
|
||||
const Device& rocDevice() const { return static_cast<const Device&>(device()); }
|
||||
|
||||
//! Returns the hsaBinary associated with the program
|
||||
hsa_agent_t hsaDevice() const { return dev().getBackendDevice(); }
|
||||
hsa_agent_t hsaDevice() const { return rocNullDevice().getBackendDevice(); }
|
||||
|
||||
hsa_executable_t hsaExecutable() const { return hsaExecutable_; }
|
||||
|
||||
|
||||
Reference in New Issue
Block a user