P4 to Git Change 1312566 by lmoriche@lmoriche_opencl_dev on 2016/09/08 18:25:02

SWDEV-94610 - Make sure each kernarg segment sits on a different cache line (align the kernargs on cache lines at minimum). Minor misc cleanups.

Affected files ...

... //depot/stg/opencl/drivers/opencl/runtime/device/rocm/rocdevice.cpp#13 edit
... //depot/stg/opencl/drivers/opencl/runtime/device/rocm/rockernel.cpp#14 edit
... //depot/stg/opencl/drivers/opencl/runtime/device/rocm/rockernel.hpp#8 edit
... //depot/stg/opencl/drivers/opencl/runtime/device/rocm/rocprogram.cpp#27 edit
... //depot/stg/opencl/drivers/opencl/runtime/device/rocm/rocvirtual.cpp#13 edit
This commit is contained in:
foreman
2016-09-08 19:52:04 -04:00
bovenliggende 8705eca425
commit 3a61b24dd5
5 gewijzigde bestanden met toevoegingen van 38 en 28 verwijderingen
@@ -817,8 +817,18 @@ Device::populateOCLDeviceConstants()
info_.maxSamplers_ = 16;
info_.bufferFromImageSupport_ = CL_FALSE;
info_.oclcVersion_ = "OpenCL C " OPENCL_VERSION_STR " ";
strcpy(info_.driverVersion_, "1.0 Provisional (hsa)");
info_.version_ = "OpenCL " OPENCL_VERSION_STR " ";
uint16_t major, minor;
if (hsa_agent_get_info(_bkendDevice, HSA_AGENT_INFO_VERSION_MAJOR, &major)
!= HSA_STATUS_SUCCESS
|| hsa_agent_get_info(_bkendDevice, HSA_AGENT_INFO_VERSION_MINOR, &minor)
!= HSA_STATUS_SUCCESS) {
return false;
}
std::stringstream ss;
ss << major << "." << minor << " (hsa)";
strcpy(info_.driverVersion_, ss.str().c_str());
info_.version_ = "OpenCL " /*OPENCL_VERSION_STR*/"1.2" " ";
info_.builtInKernels_ = "";
info_.linkerAvailable_ = true;
@@ -689,16 +689,14 @@ Kernel::Kernel(
const uint32_t workgroupGroupSegmentByteSize,
const uint32_t workitemPrivateSegmentByteSize,
const uint32_t kernargSegmentByteSize,
const uint32_t kernargSegmentAlignment,
uint extraArgsNum)
const uint32_t kernargSegmentAlignment)
: device::Kernel(name),
program_(prog),
kernelCodeHandle_(kernelCodeHandle),
workgroupGroupSegmentByteSize_(workgroupGroupSegmentByteSize),
workitemPrivateSegmentByteSize_(workitemPrivateSegmentByteSize),
kernargSegmentByteSize_(kernargSegmentByteSize),
kernargSegmentAlignment_(kernargSegmentAlignment),
extraArgumentsNum_(extraArgsNum) {}
kernargSegmentAlignment_(kernargSegmentAlignment) {}
#if defined(WITH_LIGHTNING_COMPILER)
bool Kernel::init_LC()
@@ -96,8 +96,7 @@ public:
const uint32_t workgroupGroupSegmentByteSize,
const uint32_t workitemPrivateSegmentByteSize,
const uint32_t kernargSegmentByteSize,
const uint32_t kernargSegmentAlignment,
uint extraArgsNum);
const uint32_t kernargSegmentAlignment);
const uint64_t& KernelCodeHandle() {
return kernelCodeHandle_;
@@ -144,11 +143,6 @@ public:
return NULL;
}
//! Max number of possible extra (hidden) kernel arguments
static const uint MaxExtraArgumentsNum = 6;
uint extraArgumentsNum() const { return extraArgumentsNum_; }
//! Return printf info array
const std::vector<PrintfInfo>& printfInfo() const {return printf_;}
@@ -172,7 +166,6 @@ private:
const uint32_t kernargSegmentByteSize_;
const uint32_t kernargSegmentAlignment_;
size_t kernelDirectiveOffset_;
const uint extraArgumentsNum_; // Number of arguments in Kernenv
std::vector<PrintfInfo> printf_;
};
@@ -1093,9 +1093,6 @@ HSAILProgram::linkImpl_LC(amd::option::Options *options)
return false;
}
// for OpenCL default hidden kernel arguments assuming there is no printf
size_t numHiddenKernelArgs = 3; // FIXME_Wilkin
Kernel *aKernel = new roc::Kernel(
kernelName,
this,
@@ -1105,10 +1102,8 @@ HSAILProgram::linkImpl_LC(amd::option::Options *options)
// TODO: remove the workaround
// add 24 bytes for global offsets as workaround for LC reporting
// excluded the hidden arguments
amd::alignUp(kernargSegmentByteSize, sizeof(size_t)) + numHiddenKernelArgs * sizeof(size_t),
kernargSegmentAlignment,
numHiddenKernelArgs
);
amd::alignUp(kernargSegmentByteSize, sizeof(size_t)) + 3 * sizeof(size_t),
amd::alignUp(kernargSegmentAlignment,device().info().globalMemCacheLineSize_));
if (!aKernel->init()) {
return false;
}
@@ -1450,11 +1445,30 @@ std::string
HSAILProgram::hsailOptions(amd::option::Options* options)
{
std::string hsailOptions;
//Set options for the standard device specific options
hsailOptions.append(" -D__AMD__");
int major, minor;
::sscanf(device().info().version_, "OpenCL %d.%d ", &major, &minor);
std::stringstream ss;
ss << " -D__OPENCL_VERSION__=" << (major * 100 + minor * 10);
hsailOptions.append(ss.str());
if (device().info().imageSupport_ && options->oVariables->ImageSupport) {
hsailOptions.append(" -D__IMAGE_SUPPORT__");
}
//This is just for legacy compiler code
// All our devices support these options now
hsailOptions.append(" -DFP_FAST_FMAF");
hsailOptions.append(" -DFP_FAST_FMA");
if (options->oVariables->FastFMA) {
hsailOptions.append(" -DFP_FAST_FMA");
}
if (options->oVariables->FastFMAF) {
hsailOptions.append(" -DFP_FAST_FMAF");
}
if (dev().deviceInfo().gfxipVersion_ < 900) {
hsailOptions.append(" -cl-denorms-are-zero");
@@ -519,11 +519,6 @@ VirtualGPU::create(bool profilingEna)
}
device::BlitManager::Setup blitSetup;
#if defined(WITH_LIGHTNING_COMPILER)
// TODO: Wilkin - remove the setting of value_ after image kernels are available
blitSetup.value_ = 0x3724; // disable the image related BLIT kernels for now
#endif // defined(WITH_LIGHTNING_COMPILER)
blitMgr_ = new KernelBlitManager(*this, blitSetup);
if ((NULL == blitMgr_) || !blitMgr_->create(roc_device_)) {
LogError("Could not create BlitManager!");