P4 to Git Change 1250949 by gandryey@gera-w8 on 2016/03/24 12:06:49
SWDEV-90618 - cl_kernel_info_amd always returns 0 when working via HSAIL path - Allow null kernel creation for offline compilation Affected files ... ... //depot/stg/opencl/drivers/opencl/runtime/device/gpu/gpukernel.cpp#312 edit ... //depot/stg/opencl/drivers/opencl/runtime/device/gpu/gpuprogram.cpp#224 edit ... //depot/stg/opencl/drivers/opencl/runtime/device/gpu/gpuprogram.hpp#66 edit ... //depot/stg/opencl/drivers/opencl/runtime/device/gpu/gpuscsi.cpp#36 edit
Этот коммит содержится в:
@@ -3422,10 +3422,7 @@ HSAILKernel::init(amd::hsa::loader::Symbol *sym, bool finalize)
|
||||
}
|
||||
}
|
||||
|
||||
// Allocate HW resources for the real program only
|
||||
if (!prog().isNull()) {
|
||||
aqlCreateHWInfo(sym);
|
||||
}
|
||||
aqlCreateHWInfo(sym);
|
||||
|
||||
// Pull out metadata from the ELF
|
||||
size_t sizeOfArgList;
|
||||
|
||||
@@ -2151,7 +2151,7 @@ HSAILProgram::linkImpl(amd::option::Options* options)
|
||||
// ACL_TYPE_CG stage is not performed for offline compilation
|
||||
hsa_agent_t agent;
|
||||
agent.handle = 1;
|
||||
if (!isNull() && hsaLoad) {
|
||||
if (hsaLoad) {
|
||||
executable_ = loader_->CreateExecutable(HSA_PROFILE_FULL, NULL);
|
||||
if (executable_ == NULL) {
|
||||
buildLog_ += "Error: Executable for AMD HSA Code Object isn't created.\n";
|
||||
@@ -2176,7 +2176,7 @@ HSAILProgram::linkImpl(amd::option::Options* options)
|
||||
buildLog_ += "Error: Querying of kernel names size from the binary failed.\n";
|
||||
return false;
|
||||
}
|
||||
if (!isNull() && kernelNamesSize > 0) {
|
||||
if (kernelNamesSize > 0) {
|
||||
char* kernelNames = new char[kernelNamesSize];
|
||||
errorCode = aclQueryInfo(dev().hsaCompiler(), binaryElf_, RT_KERNEL_NAMES, NULL, kernelNames, &kernelNamesSize);
|
||||
if (errorCode != ACL_SUCCESS) {
|
||||
@@ -2447,8 +2447,10 @@ void* ORCAHSALoaderContext::SegmentAddress(amdgpu_hsa_elf_segment_t segment,
|
||||
case AMDGPU_HSA_SEGMENT_GLOBAL_PROGRAM:
|
||||
case AMDGPU_HSA_SEGMENT_GLOBAL_AGENT:
|
||||
case AMDGPU_HSA_SEGMENT_READONLY_AGENT: {
|
||||
gpu::Memory *gpuMem = reinterpret_cast<gpu::Memory*>(seg);
|
||||
return reinterpret_cast<void*>(gpuMem->vmAddress() + offset);
|
||||
if (!program_->isNull()) {
|
||||
gpu::Memory *gpuMem = reinterpret_cast<gpu::Memory*>(seg);
|
||||
return reinterpret_cast<void*>(gpuMem->vmAddress() + offset);
|
||||
}
|
||||
}
|
||||
case AMDGPU_HSA_SEGMENT_CODE_AGENT: return (char*) seg + offset;
|
||||
default:
|
||||
@@ -2487,7 +2489,7 @@ hsa_status_t ORCAHSALoaderContext::SamplerCreate(
|
||||
case HSA_EXT_SAMPLER_ADDRESSING_MODE_CLAMP_TO_BORDER: state |= amd::Sampler::StateAddressClamp; break;
|
||||
case HSA_EXT_SAMPLER_ADDRESSING_MODE_REPEAT: state |= amd::Sampler::StateAddressRepeat; break;
|
||||
case HSA_EXT_SAMPLER_ADDRESSING_MODE_MIRRORED_REPEAT: state |= amd::Sampler::StateAddressMirroredRepeat; break;
|
||||
case HSA_EXT_SAMPLER_ADDRESSING_MODE_UNDEFINED: state |= amd::Sampler::StateAddressNone; break;
|
||||
case HSA_EXT_SAMPLER_ADDRESSING_MODE_UNDEFINED: state |= amd::Sampler::StateAddressNone; break;
|
||||
default:
|
||||
assert(false);
|
||||
return HSA_STATUS_ERROR_INVALID_ARGUMENT;
|
||||
@@ -2540,6 +2542,10 @@ void* ORCAHSALoaderContext::GpuMemAlloc(size_t size, size_t align, bool zero) {
|
||||
assert(size);
|
||||
assert(align);
|
||||
assert(sizeof(void*) == 8 || sizeof(void*) == 4);
|
||||
if (program_->isNull()) {
|
||||
return new char [size];
|
||||
}
|
||||
|
||||
gpu::Memory* mem = new gpu::Memory(program_->dev(), amd::alignUp(size, align));
|
||||
if (!mem || !mem->create(gpu::Resource::Local)) {
|
||||
delete mem;
|
||||
@@ -2562,10 +2568,24 @@ bool ORCAHSALoaderContext::GpuMemCopy(void *dst, size_t offset, const void *src,
|
||||
if (0 == size) {
|
||||
return true;
|
||||
}
|
||||
if (program_->isNull()) {
|
||||
memcpy(reinterpret_cast<address>(dst) + offset, src, size);
|
||||
return true;
|
||||
}
|
||||
assert(program_->dev().xferQueue());
|
||||
gpu::Memory* mem = reinterpret_cast<gpu::Memory*>(dst);
|
||||
return program_->dev().xferMgr().writeBuffer(src, *mem, amd::Coord3D(offset), amd::Coord3D(size), true);
|
||||
return true;
|
||||
}
|
||||
|
||||
void ORCAHSALoaderContext::GpuMemFree(void *ptr, size_t size)
|
||||
{
|
||||
if (program_->isNull()) {
|
||||
delete [] reinterpret_cast<char*>(ptr);
|
||||
}
|
||||
else {
|
||||
delete reinterpret_cast<gpu::Memory*>(ptr);
|
||||
}
|
||||
}
|
||||
|
||||
} // namespace gpu
|
||||
|
||||
@@ -480,9 +480,7 @@ private:
|
||||
|
||||
bool GpuMemCopy(void *dst, size_t offset, const void *src, size_t size);
|
||||
|
||||
void GpuMemFree(void *ptr, size_t size = 0) {
|
||||
delete reinterpret_cast<gpu::Memory*>(ptr);
|
||||
}
|
||||
void GpuMemFree(void *ptr, size_t size = 0);
|
||||
|
||||
ORCAHSALoaderContext(const ORCAHSALoaderContext &c);
|
||||
|
||||
|
||||
@@ -153,26 +153,27 @@ HSAILKernel::aqlCreateHWInfo(amd::hsa::loader::Symbol *sym)
|
||||
if (!sym->GetInfo(HSA_EXT_EXECUTABLE_SYMBOL_INFO_KERNEL_OBJECT_ALIGN, reinterpret_cast<void*>(&akc_align))) {
|
||||
return false;
|
||||
}
|
||||
code_ = new gpu::Memory(dev(), amd::alignUp(codeSize_, akc_align));
|
||||
// Initialize kernel ISA code
|
||||
if (code_ && code_->create(Resource::Shader)) {
|
||||
address cpuCodePtr = static_cast<address>(code_->map(NULL, Resource::WriteOnly));
|
||||
// Copy only amd_kernel_code_t
|
||||
memcpy(cpuCodePtr, reinterpret_cast<address>(akc), codeSize_);
|
||||
code_->unmap(NULL);
|
||||
}
|
||||
else {
|
||||
LogError("Failed to allocate ISA code!");
|
||||
return false;
|
||||
|
||||
// Allocate HW resources for the real program only
|
||||
if (!prog().isNull()) {
|
||||
code_ = new gpu::Memory(dev(), amd::alignUp(codeSize_, akc_align));
|
||||
// Initialize kernel ISA code
|
||||
if (code_ && code_->create(Resource::Shader)) {
|
||||
address cpuCodePtr = static_cast<address>(code_->map(NULL, Resource::WriteOnly));
|
||||
// Copy only amd_kernel_code_t
|
||||
memcpy(cpuCodePtr, reinterpret_cast<address>(akc), codeSize_);
|
||||
code_->unmap(NULL);
|
||||
}
|
||||
else {
|
||||
LogError("Failed to allocate ISA code!");
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
assert((akc->workitem_private_segment_byte_size & 3) == 0 &&
|
||||
"Scratch must be DWORD aligned");
|
||||
workGroupInfo_.scratchRegs_ =
|
||||
amd::alignUp(akc->workitem_private_segment_byte_size, 16) / sizeof(uint);
|
||||
workGroupInfo_.availableSGPRs_ = dev().gslCtx()->getNumSGPRsAvailable();
|
||||
workGroupInfo_.availableVGPRs_ = dev().gslCtx()->getNumVGPRsAvailable();
|
||||
workGroupInfo_.preferredSizeMultiple_ = dev().getAttribs().wavefrontSize;
|
||||
workGroupInfo_.privateMemSize_ = akc->workitem_private_segment_byte_size;
|
||||
workGroupInfo_.availableLDSSize_ = dev().info().localMemSize_;
|
||||
workGroupInfo_.localMemSize_ =
|
||||
@@ -180,8 +181,19 @@ HSAILKernel::aqlCreateHWInfo(amd::hsa::loader::Symbol *sym)
|
||||
workGroupInfo_.usedSGPRs_ = akc->wavefront_sgpr_count;
|
||||
workGroupInfo_.usedStackSize_ = 0;
|
||||
workGroupInfo_.usedVGPRs_ = akc->workitem_vgpr_count;
|
||||
workGroupInfo_.wavefrontPerSIMD_ = dev().getAttribs().wavefrontSize;
|
||||
|
||||
if (!prog().isNull()) {
|
||||
workGroupInfo_.availableSGPRs_ = dev().gslCtx()->getNumSGPRsAvailable();
|
||||
workGroupInfo_.availableVGPRs_ = dev().gslCtx()->getNumVGPRsAvailable();
|
||||
workGroupInfo_.preferredSizeMultiple_ = dev().getAttribs().wavefrontSize;
|
||||
workGroupInfo_.wavefrontPerSIMD_ = dev().getAttribs().wavefrontSize;
|
||||
}
|
||||
else {
|
||||
workGroupInfo_.availableSGPRs_ = 104;
|
||||
workGroupInfo_.availableVGPRs_ = 256;
|
||||
workGroupInfo_.preferredSizeMultiple_ =
|
||||
workGroupInfo_.wavefrontPerSIMD_ = 64;
|
||||
}
|
||||
return true;
|
||||
}
|
||||
} // namespace gpu
|
||||
|
||||
Ссылка в новой задаче
Block a user